A from-scratch atproto PDS implementation in Python (mirrors https://github.com/DavidBuchanan314/millipds)

wip account creation

+186 -14
+3
millipds_dev.dockerfile
··· 20 # init the db with dev presets 21 RUN python3 -m millipds init millipds.test --dev 22 23 # do the thing 24 CMD python3 -m millipds run --listen_host=0.0.0.0 --listen_port=8123 25
··· 20 # init the db with dev presets 21 RUN python3 -m millipds init millipds.test --dev 22 23 + # create a test user 24 + RUN python3 -m millipds account create bob.test did:web:bob.test --unsafe_password=hunter2 25 + 26 # do the thing 27 CMD python3 -m millipds run --listen_host=0.0.0.0 --listen_port=8123 28
+41 -2
src/millipds/__main__.py
··· 3 Usage: 4 millipds init <hostname> [--dev|--sandbox] 5 millipds config [--pds_pfx=URL] [--pds_did=DID] [--bsky_appview_pfx=URL] [--bsky_appview_did=DID] 6 millipds run [--sock_path=PATH] [--listen_host=HOST] [--listen_port=PORT] 7 millipds (-h | --help) 8 millipds --version ··· 19 Any options not specified will be left at their previous values. Once changes 20 have been made (or even if they haven't), the new config will be printed. 21 22 --pds_pfx=URL The HTTP URL prefix that this PDS is publicly accessible at (e.g. mypds.example) 23 --pds_did=DID This PDS's DID (e.g. did:web:mypds.example) 24 --bsky_appview_pfx=URL AppView URL prefix e.g. "https://api.bsky-sandbox.dev" 25 --bsky_appview_did=DID AppView DID e.g. did:web:api.bsky-sandbox.dev 26 27 Run: 28 Launch the service (in the foreground) 29 ··· 36 --version Show version. 37 """ 38 39 - from docopt import docopt 40 import importlib.metadata 41 import asyncio 42 43 from . import service 44 from . import database 45 46 """ 47 This is the entrypoint for the `millipds` command (declared in project.scripts) ··· 75 bsky_appview_pfx="https://api.bsky-sandbox.dev", 76 bsky_appview_did="did:web:api.bsky-sandbox.dev", 77 ) 78 - else: 79 db.update_config( 80 pds_pfx=f'https://{args["<hostname>"]}', 81 pds_did=f'did:web:{args["<hostname>"]}', ··· 98 bsky_appview_did=args["--bsky_appview_did"], 99 ) 100 db.print_config() 101 elif args["run"]: 102 asyncio.run(service.run( 103 sock_path=args["--sock_path"],
··· 3 Usage: 4 millipds init <hostname> [--dev|--sandbox] 5 millipds config [--pds_pfx=URL] [--pds_did=DID] [--bsky_appview_pfx=URL] [--bsky_appview_did=DID] 6 + millipds account create <did> <handle> [--unsafe_password=PW] 7 millipds run [--sock_path=PATH] [--listen_host=HOST] [--listen_port=PORT] 8 millipds (-h | --help) 9 millipds --version ··· 20 Any options not specified will be left at their previous values. Once changes 21 have been made (or even if they haven't), the new config will be printed. 22 23 + Do not change the config while the PDS is running (TODO: enforce this in code (or make sure it's harmless?)) 24 + 25 --pds_pfx=URL The HTTP URL prefix that this PDS is publicly accessible at (e.g. mypds.example) 26 --pds_did=DID This PDS's DID (e.g. did:web:mypds.example) 27 --bsky_appview_pfx=URL AppView URL prefix e.g. "https://api.bsky-sandbox.dev" 28 --bsky_appview_did=DID AppView DID e.g. did:web:api.bsky-sandbox.dev 29 30 + Account create: 31 + Create a new user account on the PDS. Bring your own DID and corresponding 32 + handle - millipds will not (yet?) attempt to validate either. 33 + You'll be prompted for a password interactively. 34 + 35 + TODO: consider bring-your-own signing key? 36 + 37 + --unsafe_password=PW Specify password non-iteractively, for use in test scripts etc. 38 + 39 Run: 40 Launch the service (in the foreground) 41 ··· 48 --version Show version. 49 """ 50 51 import importlib.metadata 52 import asyncio 53 + import sys 54 + import logging 55 + from getpass import getpass 56 + 57 + from docopt import docopt 58 59 from . import service 60 from . import database 61 + from . import crypto 62 + 63 + 64 + logging.basicConfig(level=logging.DEBUG) # TODO: make this configurable? 65 + 66 67 """ 68 This is the entrypoint for the `millipds` command (declared in project.scripts) ··· 96 bsky_appview_pfx="https://api.bsky-sandbox.dev", 97 bsky_appview_did="did:web:api.bsky-sandbox.dev", 98 ) 99 + else: # "prod" presets 100 db.update_config( 101 pds_pfx=f'https://{args["<hostname>"]}', 102 pds_did=f'did:web:{args["<hostname>"]}', ··· 119 bsky_appview_did=args["--bsky_appview_did"], 120 ) 121 db.print_config() 122 + elif args["account"]: 123 + if args["create"]: 124 + pw = args["--unsafe_password"] 125 + if pw: 126 + # rationale: only allow non-iteractive password input from scripts etc. 127 + if sys.stdin.buffer.isatty(): 128 + print("error: --unsafe_password can't be used from an interactive shell") 129 + return 130 + else: 131 + pw = getpass(f"Password for new account: ") 132 + db.account_create( 133 + did=args["<did>"], 134 + handle=args["<handle>"], 135 + password=pw, 136 + privkey=crypto.keygen_p256() # TODO: supply from arg 137 + ) 138 + else: 139 + print("CLI arg parse error?!") 140 elif args["run"]: 141 asyncio.run(service.run( 142 sock_path=args["--sock_path"],
+22
src/millipds/crypto.py
··· 1 from cryptography.hazmat.primitives.asymmetric import ec 2 from cryptography.hazmat.primitives import hashes 3 from cryptography.hazmat.primitives.asymmetric.utils import decode_dss_signature, encode_dss_signature 4 from cryptography.exceptions import InvalidSignature 5 ··· 47 ) 48 signature = r.to_bytes(32, "big") + s.to_bytes(32, "big") 49 return signature
··· 1 from cryptography.hazmat.primitives.asymmetric import ec 2 from cryptography.hazmat.primitives import hashes 3 + from cryptography.hazmat.primitives import serialization 4 from cryptography.hazmat.primitives.asymmetric.utils import decode_dss_signature, encode_dss_signature 5 from cryptography.exceptions import InvalidSignature 6 ··· 48 ) 49 signature = r.to_bytes(32, "big") + s.to_bytes(32, "big") 50 return signature 51 + 52 + 53 + def keygen_p256() -> ec.EllipticCurvePrivateKey: 54 + return ec.generate_private_key(ec.SECP256R1()) 55 + 56 + 57 + def privkey_to_pem(privkey: ec.EllipticCurvePrivateKey) -> str: 58 + return privkey.private_bytes( 59 + encoding=serialization.Encoding.PEM, 60 + format=serialization.PrivateFormat.PKCS8, 61 + encryption_algorithm=serialization.NoEncryption() 62 + ).decode() 63 + 64 + 65 + def privkey_from_pem(pem: str) -> ec.EllipticCurvePrivateKey: 66 + privkey = serialization.load_pem_private_key(pem.encode(), password=None) 67 + if not isinstance(privkey, ec.EllipticCurvePrivateKey): 68 + raise TypeError("unsupported key type") 69 + if not isinstance(privkey.curve, (ec.SECP256R1, ec.SECP256K1)): 70 + raise TypeError("unsupported key type") 71 + return privkey
+99 -5
src/millipds/database.py
··· 1 """ 2 - Ideally, all SQL statements are contained within this file 3 """ 4 5 from typing import Optional, Dict 6 from functools import cached_property 7 import secrets 8 9 import apsw 10 11 from . import static_config 12 from . import util 13 14 class Database: 15 def __init__(self, path: str=static_config.MAIN_DB_PATH) -> None: 16 util.mkdirs_for_file(path) 17 self.con = apsw.Connection(path) 18 19 try: 20 if self.config["db_version"] != static_config.MILLIPDS_DB_VERSION: ··· 27 self._init_central_tables() 28 29 def _init_central_tables(self): 30 self.con.execute( 31 """ 32 CREATE TABLE config( ··· 42 43 self.con.execute( 44 """ 45 - INSERT INTO config ( 46 db_version, 47 jwt_access_secret 48 ) VALUES (?, ?) ··· 54 """ 55 CREATE TABLE user( 56 did TEXT PRIMARY KEY NOT NULL, 57 prefs BLOB NOT NULL, 58 pw_hash TEXT NOT NULL, 59 repo_path TEXT NOT NULL, ··· 61 ) 62 """ 63 ) 64 65 self.con.execute( 66 """ ··· 84 if pds_did is not None: 85 self.con.execute("UPDATE config SET pds_did=?", (pds_did,)) 86 if bsky_appview_pfx is not None: 87 - self.con.execute("UPDATE config SET bsky_appview_pfx=?", (bsky_appview_pfx,)) 88 if bsky_appview_did is not None: 89 - self.con.execute("UPDATE config SET bsky_appview_did=?", (bsky_appview_did,)) 90 91 - del self.config # invalidate the cached value 92 93 @cached_property 94 def config(self) -> Dict[str, object]: ··· 117 if redact_secrets and "secret" in k: 118 v = "[REDACTED]" 119 print(f"{k:<{maxlen}} : {v!r}")
··· 1 """ 2 + Ideally, all SQL statements are contained within this file. 3 + 4 + Password hashing also happens in here, because it doesn't make much sense to do 5 + it anywhere else. 6 """ 7 8 from typing import Optional, Dict 9 from functools import cached_property 10 import secrets 11 + import os 12 + import logging 13 14 + from argon2 import PasswordHasher # maybe this should come from .crypto? 15 import apsw 16 + import apsw.bestpractice 17 + 18 + from atmst.blockstore import BlockStore 19 20 from . import static_config 21 from . import util 22 + from . import crypto 23 + 24 + logger = logging.getLogger(__name__) 25 + 26 + # https://rogerbinns.github.io/apsw/bestpractice.html 27 + apsw.bestpractice.apply(apsw.bestpractice.recommended) 28 29 class Database: 30 def __init__(self, path: str=static_config.MAIN_DB_PATH) -> None: 31 util.mkdirs_for_file(path) 32 self.con = apsw.Connection(path) 33 + self.pw_hasher = PasswordHasher() 34 35 try: 36 if self.config["db_version"] != static_config.MILLIPDS_DB_VERSION: ··· 43 self._init_central_tables() 44 45 def _init_central_tables(self): 46 + logger.info("initing central tables") 47 self.con.execute( 48 """ 49 CREATE TABLE config( ··· 59 60 self.con.execute( 61 """ 62 + INSERT INTO config( 63 db_version, 64 jwt_access_secret 65 ) VALUES (?, ?) ··· 71 """ 72 CREATE TABLE user( 73 did TEXT PRIMARY KEY NOT NULL, 74 + handle TEXT NOT NULL, 75 prefs BLOB NOT NULL, 76 pw_hash TEXT NOT NULL, 77 repo_path TEXT NOT NULL, ··· 79 ) 80 """ 81 ) 82 + 83 + self.con.execute("CREATE UNIQUE INDEX user_by_handle ON user(handle)") 84 85 self.con.execute( 86 """ ··· 104 if pds_did is not None: 105 self.con.execute("UPDATE config SET pds_did=?", (pds_did,)) 106 if bsky_appview_pfx is not None: 107 + self.con.execute( 108 + "UPDATE config SET bsky_appview_pfx=?", 109 + (bsky_appview_pfx,) 110 + ) 111 if bsky_appview_did is not None: 112 + self.con.execute( 113 + "UPDATE config SET bsky_appview_did=?", 114 + (bsky_appview_did,) 115 + ) 116 117 + try: 118 + del self.config # invalidate the cached value 119 + except AttributeError: 120 + pass 121 122 @cached_property 123 def config(self) -> Dict[str, object]: ··· 146 if redact_secrets and "secret" in k: 147 v = "[REDACTED]" 148 print(f"{k:<{maxlen}} : {v!r}") 149 + 150 + def account_create(self, 151 + did: str, 152 + handle: str, 153 + password: str, 154 + privkey: crypto.ec.EllipticCurvePrivateKey 155 + ) -> None: 156 + pw_hash = self.pw_hasher.hash(password) 157 + privkey_pem = crypto.privkey_to_pem(privkey) 158 + repo_path = f"{static_config.REPOS_DIR}/{util.did_to_safe_filename(did)}.sqlite3" 159 + logger.info( 160 + f"creating account for did={did}, handle={handle} at {repo_path}" 161 + ) 162 + with self.con: 163 + self.con.execute( 164 + """ 165 + INSERT INTO user( 166 + did, 167 + handle, 168 + prefs, 169 + pw_hash, 170 + repo_path, 171 + signing_key 172 + ) VALUES (?, ?, ?, ?, ?, ?) 173 + """, 174 + (did, handle, b"{}", pw_hash, repo_path, privkey_pem) 175 + ) 176 + UserDatabase.init_tables(self.con, did, repo_path) 177 + self.con.execute("DETACH spoke") 178 + 179 + 180 + class UserDBBlockStore(BlockStore): 181 + pass # TODO 182 + 183 + 184 + class UserDatabase: 185 + def __init__(self, wcon: apsw.Connection, did: str, path: str) -> None: 186 + self.wcon = wcon # writes go via the hub database connection, using ATTACH 187 + self.rcon = apsw.Connection(path, flags=apsw.SQLITE_OPEN_READONLY) 188 + 189 + # TODO: check db version and did match 190 + 191 + @staticmethod 192 + def init_tables(wcon: apsw.Connection, did: str, path: str) -> None: 193 + util.mkdirs_for_file(path) 194 + wcon.execute("ATTACH ? AS spoke", (path,)) 195 + 196 + wcon.execute( 197 + """ 198 + CREATE TABLE spoke.repo( 199 + db_version INTEGER NOT NULL, 200 + did TEXT NOT NULL 201 + ) 202 + """ 203 + ) 204 + 205 + wcon.execute( 206 + "INSERT INTO spoke.repo(db_version, did) VALUES (?, ?)", 207 + (static_config.MILLIPDS_DB_VERSION, did) 208 + ) 209 + 210 + # TODO: the other tables 211 + 212 + # nb: caller is responsible for running "DETACH spoke", after the end 213 + # of the transaction
+1 -3
src/millipds/service.py
··· 9 10 from . import static_config 11 12 - logging.basicConfig(level=logging.DEBUG) # TODO: make this configurable? 13 - 14 15 async def hello(request: web.Request): 16 version = importlib.metadata.version("millipds") ··· 46 This gets invoked via millipds.__main__.py 47 """ 48 async def run(sock_path: Optional[str], host: str, port: int): 49 - runner = web.AppRunner(app, access_log_format=static_config.LOG_FMT) 50 await runner.setup() 51 52 if sock_path is None:
··· 9 10 from . import static_config 11 12 13 async def hello(request: web.Request): 14 version = importlib.metadata.version("millipds") ··· 44 This gets invoked via millipds.__main__.py 45 """ 46 async def run(sock_path: Optional[str], host: str, port: int): 47 + runner = web.AppRunner(app, access_log_format=static_config.HTTP_LOG_FMT) 48 await runner.setup() 49 50 if sock_path is None:
+4 -4
src/millipds/static_config.py
··· 4 (some of this stuff might want to be broken out into a proper config file, eventually) 5 """ 6 7 - LOG_FMT = '%{X-Forwarded-For}i %t (%Tf) "%r" %s %b "%{Referer}i" "%{User-Agent}i"' 8 9 GROUPNAME = "millipds-sock" 10 11 MILLIPDS_DB_VERSION = 1 # this gets bumped if we make breaking changes to the db schema 12 13 - DATA_DIR = "./data/" 14 - MAIN_DB_PATH = DATA_DIR + "millipds.sqlite3" 15 - REPOS_DIR = DATA_DIR + "repos/"
··· 4 (some of this stuff might want to be broken out into a proper config file, eventually) 5 """ 6 7 + HTTP_LOG_FMT = '%{X-Forwarded-For}i %t (%Tf) "%r" %s %b "%{Referer}i" "%{User-Agent}i"' 8 9 GROUPNAME = "millipds-sock" 10 11 MILLIPDS_DB_VERSION = 1 # this gets bumped if we make breaking changes to the db schema 12 13 + DATA_DIR = "./data" 14 + MAIN_DB_PATH = DATA_DIR + "/millipds.sqlite3" 15 + REPOS_DIR = DATA_DIR + "/repos"
+16
src/millipds/util.py
··· 1 import os 2 3 def mkdirs_for_file(path: str) -> None: 4 os.makedirs(os.path.dirname(path), exist_ok=True)
··· 1 import os 2 + import hashlib 3 4 def mkdirs_for_file(path: str) -> None: 5 os.makedirs(os.path.dirname(path), exist_ok=True) 6 + 7 + FILANEME_SAFE_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" 8 + 9 + def did_to_safe_filename(did: str) -> str: 10 + """ 11 + The format is <sha256(did)>_<filtered_did> 12 + The former guarantees uniqueness, and the latter makes it human-recognizeable (ish) 13 + """ 14 + 15 + hexdigest = hashlib.sha256(did.encode()).hexdigest() 16 + filtered = "".join(char for char in did if char in FILANEME_SAFE_CHARS) 17 + 18 + # Truncate to make sure we're staying within PATH_MAX 19 + # (with room to spare, in case the caller appends a file extension) 20 + return f"{hexdigest}_{filtered}"[:200]