atproto utils for zig zat.dev
atproto sdk zig

feat: MST, crypto signing, did:key construction, multibase encoding

merkle search tree with put/get/delete/rootCid, verified against
interop commit-proof fixtures (6) and common-prefix vectors (13).

ECDSA signing (signSecp256k1/signP256) with RFC 6979 deterministic
nonces and low-S normalization. did:key formatting via multicodec
encode + base58btc. base32lower encode/decode for CID strings.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+1327 -71
+8
CHANGELOG.md
··· 1 # changelog 2 3 ## 0.1.8 4 5 - **fix**: NSID parser rejects TLD starting with digit (e.g. `1.0.0.127.record`)
··· 1 # changelog 2 3 + ## 0.1.9 4 + 5 + - **feat**: merkle search tree (MST) — `mst.Mst` with `put`, `get`, `delete`, `rootCid` 6 + - **feat**: ECDSA signing — `signSecp256k1`, `signP256` with low-S normalization (RFC 6979) 7 + - **feat**: `did:key` construction — `multicodec.formatDidKey`, `multicodec.encodePublicKey` 8 + - **feat**: multibase encoding — base58btc encode, base32lower encode/decode 9 + - interop tests: MST common prefix (13 vectors), commit proofs (6 fixtures) 10 + 11 ## 0.1.8 12 13 - **fix**: NSID parser rejects TLD starting with digit (e.g. `1.0.0.127.record`)
+2
build.zig
··· 40 .{ "signature_fixtures", "crypto/signature-fixtures.json" }, 41 // mst fixtures 42 .{ "mst_key_heights", "mst/key_heights.json" }, 43 }; 44 inline for (interop_files) |entry| { 45 tests.root_module.addAnonymousImport(entry[0], .{
··· 40 .{ "signature_fixtures", "crypto/signature-fixtures.json" }, 41 // mst fixtures 42 .{ "mst_key_heights", "mst/key_heights.json" }, 43 + .{ "common_prefix", "mst/common_prefix.json" }, 44 + .{ "commit_proofs", "firehose/commit-proof-fixtures.json" }, 45 }; 46 inline for (interop_files) |entry| { 47 tests.root_module.addAnonymousImport(entry[0], .{
+1 -1
build.zig.zon
··· 1 .{ 2 .name = .zat, 3 - .version = "0.1.8", 4 .fingerprint = 0x8da9db57ee82fbe4, 5 .minimum_zig_version = "0.15.0", 6 .dependencies = .{
··· 1 .{ 2 .name = .zat, 3 + .version = "0.1.9", 4 .fingerprint = 0x8da9db57ee82fbe4, 5 .minimum_zig_version = "0.15.0", 6 .dependencies = .{
+116 -19
src/internal/interop_tests.zig
··· 17 const multibase = @import("multibase.zig"); 18 const multicodec = @import("multicodec.zig"); 19 20 // === helpers === 21 22 fn LineIterator(comptime sentinel: ?u8) type { ··· 229 try std.testing.expect(tested == fixtures.len); 230 } 231 232 - // === tier 3: MST key heights === 233 - 234 - /// compute MST tree depth for a record key. 235 - /// depth = count leading zero bits in SHA-256(key), divided by 2, rounded down. 236 - fn mstKeyHeight(key: []const u8) u32 { 237 - var digest: [32]u8 = undefined; 238 - std.crypto.hash.sha2.Sha256.hash(key, &digest, .{}); 239 - var leading_zeros: u32 = 0; 240 - for (digest) |byte| { 241 - if (byte == 0) { 242 - leading_zeros += 8; 243 - } else { 244 - leading_zeros += @clz(byte); 245 - break; 246 - } 247 - } 248 - return leading_zeros / 2; 249 - } 250 251 test "interop: mst key heights" { 252 const allocator = std.testing.allocator; ··· 263 const key = obj.get("key").?.string; 264 const expected_height: u32 = @intCast(obj.get("height").?.integer); 265 266 - const actual = mstKeyHeight(key); 267 if (actual != expected_height) { 268 std.debug.print("FAIL: key '{s}': expected height {d}, got {d}\n", .{ key, expected_height, actual }); 269 return error.WrongHeight; ··· 273 274 try std.testing.expect(tested > 0); 275 }
··· 17 const multibase = @import("multibase.zig"); 18 const multicodec = @import("multicodec.zig"); 19 20 + // mst 21 + const mst = @import("mst.zig"); 22 + const cbor = @import("cbor.zig"); 23 + 24 // === helpers === 25 26 fn LineIterator(comptime sentinel: ?u8) type { ··· 233 try std.testing.expect(tested == fixtures.len); 234 } 235 236 + // === tier 3: MST === 237 238 test "interop: mst key heights" { 239 const allocator = std.testing.allocator; ··· 250 const key = obj.get("key").?.string; 251 const expected_height: u32 = @intCast(obj.get("height").?.integer); 252 253 + const actual = mst.keyHeight(key); 254 if (actual != expected_height) { 255 std.debug.print("FAIL: key '{s}': expected height {d}, got {d}\n", .{ key, expected_height, actual }); 256 return error.WrongHeight; ··· 260 261 try std.testing.expect(tested > 0); 262 } 263 + 264 + test "interop: mst common prefix" { 265 + const allocator = std.testing.allocator; 266 + 267 + const fixture_json = @embedFile("common_prefix"); 268 + const parsed = try std.json.parseFromSlice(std.json.Value, allocator, fixture_json, .{}); 269 + defer parsed.deinit(); 270 + 271 + const fixtures = parsed.value.array.items; 272 + var tested: usize = 0; 273 + 274 + for (fixtures) |fixture| { 275 + const obj = fixture.object; 276 + const left = obj.get("left").?.string; 277 + const right = obj.get("right").?.string; 278 + const expected_len: usize = @intCast(obj.get("len").?.integer); 279 + 280 + const actual = mst.commonPrefixLen(left, right); 281 + if (actual != expected_len) { 282 + std.debug.print("FAIL: commonPrefixLen('{s}', '{s}'): expected {d}, got {d}\n", .{ left, right, expected_len, actual }); 283 + return error.WrongPrefixLen; 284 + } 285 + tested += 1; 286 + } 287 + 288 + try std.testing.expect(tested == 13); 289 + } 290 + 291 + test "interop: mst commit proofs" { 292 + const allocator = std.testing.allocator; 293 + 294 + const fixture_json = @embedFile("commit_proofs"); 295 + const parsed = try std.json.parseFromSlice(std.json.Value, allocator, fixture_json, .{}); 296 + defer parsed.deinit(); 297 + 298 + const fixtures = parsed.value.array.items; 299 + var tested: usize = 0; 300 + 301 + for (fixtures) |fixture| { 302 + var arena = std.heap.ArenaAllocator.init(allocator); 303 + defer arena.deinit(); 304 + const a = arena.allocator(); 305 + 306 + const obj = fixture.object; 307 + const comment = if (obj.get("comment")) |v| switch (v) { 308 + .string => |s| s, 309 + else => "?", 310 + } else "?"; 311 + 312 + // parse leaf value CID 313 + const leaf_value_str = obj.get("leafValue").?.string; 314 + const leaf_cid = try mst.parseCidString(a, leaf_value_str); 315 + 316 + // build initial tree from keys 317 + var tree = mst.Mst.init(a); 318 + const keys = obj.get("keys").?.array.items; 319 + for (keys) |key_val| { 320 + try tree.put(key_val.string, leaf_cid); 321 + } 322 + 323 + // verify root before commit 324 + const root_before_str = obj.get("rootBeforeCommit").?.string; 325 + const expected_before = try mst.parseCidString(a, root_before_str); 326 + 327 + const actual_before = try tree.rootCid(); 328 + if (!std.mem.eql(u8, actual_before.raw, expected_before.raw)) { 329 + std.debug.print("FAIL [{s}]: rootBeforeCommit mismatch\n", .{comment}); 330 + std.debug.print(" expected: {s}\n", .{root_before_str}); 331 + // print hex for debugging 332 + std.debug.print(" expected raw ({d}): ", .{expected_before.raw.len}); 333 + for (expected_before.raw) |b| std.debug.print("{x:0>2}", .{b}); 334 + std.debug.print("\n actual raw ({d}): ", .{actual_before.raw.len}); 335 + for (actual_before.raw) |b| std.debug.print("{x:0>2}", .{b}); 336 + std.debug.print("\n", .{}); 337 + return error.RootBeforeMismatch; 338 + } 339 + 340 + // apply adds 341 + const adds = obj.get("adds").?.array.items; 342 + for (adds) |add_val| { 343 + try tree.put(add_val.string, leaf_cid); 344 + } 345 + 346 + // apply dels 347 + const dels = obj.get("dels").?.array.items; 348 + for (dels) |del_val| { 349 + try tree.delete(del_val.string); 350 + } 351 + 352 + // verify root after commit 353 + const root_after_str = obj.get("rootAfterCommit").?.string; 354 + const expected_after = try mst.parseCidString(a, root_after_str); 355 + 356 + const actual_after = try tree.rootCid(); 357 + if (!std.mem.eql(u8, actual_after.raw, expected_after.raw)) { 358 + std.debug.print("FAIL [{s}]: rootAfterCommit mismatch\n", .{comment}); 359 + std.debug.print(" expected: {s}\n", .{root_after_str}); 360 + std.debug.print(" expected raw ({d}): ", .{expected_after.raw.len}); 361 + for (expected_after.raw) |b| std.debug.print("{x:0>2}", .{b}); 362 + std.debug.print("\n actual raw ({d}): ", .{actual_after.raw.len}); 363 + for (actual_after.raw) |b| std.debug.print("{x:0>2}", .{b}); 364 + std.debug.print("\n", .{}); 365 + return error.RootAfterMismatch; 366 + } 367 + 368 + tested += 1; 369 + } 370 + 371 + try std.testing.expect(tested == 6); 372 + }
+96 -20
src/internal/jwt.zig
··· 252 0x79, 0xDC, 0xE5, 0x61, 0x7E, 0x31, 0x92, 0xA8, 253 }; 254 255 - pub fn verifySecp256k1(message: []const u8, sig_bytes: []const u8, public_key_raw: []const u8) !void { 256 - const Scheme = crypto.sign.ecdsa.EcdsaSecp256k1Sha256; 257 258 - // parse signature (r || s, 64 bytes) 259 if (sig_bytes.len != 64) return error.InvalidSignature; 260 const sig = Scheme.Signature.fromBytes(sig_bytes[0..64].*); 261 262 - // reject high-S signatures (atproto requires low-S) 263 - rejectHighS(secp256k1_half_order, sig.s) catch return error.SignatureVerificationFailed; 264 265 - // parse public key from SEC1 compressed format 266 if (public_key_raw.len != 33) return error.InvalidPublicKey; 267 const public_key = Scheme.PublicKey.fromSec1(public_key_raw) catch return error.InvalidPublicKey; 268 269 - // verify 270 sig.verify(message, public_key) catch return error.SignatureVerificationFailed; 271 } 272 273 - pub fn verifyP256(message: []const u8, sig_bytes: []const u8, public_key_raw: []const u8) !void { 274 - const Scheme = crypto.sign.ecdsa.EcdsaP256Sha256; 275 276 - // parse signature (r || s, 64 bytes) 277 - if (sig_bytes.len != 64) return error.InvalidSignature; 278 - const sig = Scheme.Signature.fromBytes(sig_bytes[0..64].*); 279 - 280 - // reject high-S signatures (atproto requires low-S) 281 - rejectHighS(p256_half_order, sig.s) catch return error.SignatureVerificationFailed; 282 283 - // parse public key from SEC1 compressed format 284 - if (public_key_raw.len != 33) return error.InvalidPublicKey; 285 - const public_key = Scheme.PublicKey.fromSec1(public_key_raw) catch return error.InvalidPublicKey; 286 287 - // verify 288 - sig.verify(message, public_key) catch return error.SignatureVerificationFailed; 289 } 290 291 // === tests === ··· 369 // should fail verification with wrong key 370 try std.testing.expectError(error.SignatureVerificationFailed, jwt.verify(wrong_key)); 371 }
··· 252 0x79, 0xDC, 0xE5, 0x61, 0x7E, 0x31, 0x92, 0xA8, 253 }; 254 255 + /// ECDSA signature (r || s, 64 bytes) 256 + pub const Signature = struct { 257 + bytes: [64]u8, 258 + }; 259 + 260 + /// sign a message with deterministic RFC 6979 ECDSA and low-S normalization 261 + fn signEcdsa(comptime Scheme: type, comptime Curve: type, comptime half_order: [32]u8, message: []const u8, secret_key_bytes: []const u8) !Signature { 262 + if (secret_key_bytes.len != 32) return error.InvalidSecretKey; 263 + const sk = Scheme.SecretKey.fromBytes(secret_key_bytes[0..32].*) catch return error.InvalidSecretKey; 264 + const kp = Scheme.KeyPair.fromSecretKey(sk) catch return error.InvalidSecretKey; 265 + 266 + var sig = kp.sign(message, null) catch return error.SigningFailed; 267 + 268 + if (bigEndianGt(sig.s, half_order)) { 269 + sig.s = Curve.scalar.neg(sig.s, .big) catch return error.SigningFailed; 270 + } 271 + 272 + return .{ .bytes = sig.toBytes() }; 273 + } 274 275 + /// verify an ECDSA signature, rejecting high-S 276 + fn verifyEcdsa(comptime Scheme: type, comptime half_order: [32]u8, message: []const u8, sig_bytes: []const u8, public_key_raw: []const u8) !void { 277 if (sig_bytes.len != 64) return error.InvalidSignature; 278 const sig = Scheme.Signature.fromBytes(sig_bytes[0..64].*); 279 280 + rejectHighS(half_order, sig.s) catch return error.SignatureVerificationFailed; 281 282 if (public_key_raw.len != 33) return error.InvalidPublicKey; 283 const public_key = Scheme.PublicKey.fromSec1(public_key_raw) catch return error.InvalidPublicKey; 284 285 sig.verify(message, public_key) catch return error.SignatureVerificationFailed; 286 } 287 288 + pub fn signSecp256k1(message: []const u8, secret_key_bytes: []const u8) !Signature { 289 + return signEcdsa(crypto.sign.ecdsa.EcdsaSecp256k1Sha256, crypto.ecc.Secp256k1, secp256k1_half_order, message, secret_key_bytes); 290 + } 291 292 + pub fn signP256(message: []const u8, secret_key_bytes: []const u8) !Signature { 293 + return signEcdsa(crypto.sign.ecdsa.EcdsaP256Sha256, crypto.ecc.P256, p256_half_order, message, secret_key_bytes); 294 + } 295 296 + pub fn verifySecp256k1(message: []const u8, sig_bytes: []const u8, public_key_raw: []const u8) !void { 297 + return verifyEcdsa(crypto.sign.ecdsa.EcdsaSecp256k1Sha256, secp256k1_half_order, message, sig_bytes, public_key_raw); 298 + } 299 300 + pub fn verifyP256(message: []const u8, sig_bytes: []const u8, public_key_raw: []const u8) !void { 301 + return verifyEcdsa(crypto.sign.ecdsa.EcdsaP256Sha256, p256_half_order, message, sig_bytes, public_key_raw); 302 } 303 304 // === tests === ··· 382 // should fail verification with wrong key 383 try std.testing.expectError(error.SignatureVerificationFailed, jwt.verify(wrong_key)); 384 } 385 + 386 + test "sign and verify round-trip - secp256k1" { 387 + // generate a deterministic keypair using a fixed seed 388 + const Scheme = crypto.sign.ecdsa.EcdsaSecp256k1Sha256; 389 + const sk_bytes = [_]u8{ 390 + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 391 + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 392 + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 393 + 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 394 + }; 395 + 396 + const message = "hello atproto"; 397 + const sig = try signSecp256k1(message, &sk_bytes); 398 + 399 + // verify low-S: s must be <= half_order 400 + const s = sig.bytes[32..64].*; 401 + try std.testing.expect(!bigEndianGt(s, secp256k1_half_order)); 402 + 403 + // verify with the corresponding public key 404 + const sk = try Scheme.SecretKey.fromBytes(sk_bytes); 405 + const kp = try Scheme.KeyPair.fromSecretKey(sk); 406 + const pk_bytes = kp.public_key.toCompressedSec1(); 407 + 408 + try verifySecp256k1(message, &sig.bytes, &pk_bytes); 409 + } 410 + 411 + test "sign and verify round-trip - P-256" { 412 + const Scheme = crypto.sign.ecdsa.EcdsaP256Sha256; 413 + const sk_bytes = [_]u8{ 414 + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 415 + 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 416 + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 417 + 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 418 + }; 419 + 420 + const message = "hello atproto p256"; 421 + const sig = try signP256(message, &sk_bytes); 422 + 423 + // verify low-S 424 + const s = sig.bytes[32..64].*; 425 + try std.testing.expect(!bigEndianGt(s, p256_half_order)); 426 + 427 + // verify with the corresponding public key 428 + const sk = try Scheme.SecretKey.fromBytes(sk_bytes); 429 + const kp = try Scheme.KeyPair.fromSecretKey(sk); 430 + const pk_bytes = kp.public_key.toCompressedSec1(); 431 + 432 + try verifyP256(message, &sig.bytes, &pk_bytes); 433 + } 434 + 435 + test "sign produces deterministic signatures" { 436 + const sk_bytes = [_]u8{ 437 + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 438 + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 439 + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 440 + 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 441 + }; 442 + const message = "deterministic test"; 443 + 444 + const sig1 = try signSecp256k1(message, &sk_bytes); 445 + const sig2 = try signSecp256k1(message, &sk_bytes); 446 + try std.testing.expectEqualSlices(u8, &sig1.bytes, &sig2.bytes); 447 + }
+781
src/internal/mst.zig
···
··· 1 + //! merkle search tree (MST) 2 + //! 3 + //! the AT Protocol repository data structure. a deterministic search tree 4 + //! where each key's tree layer is derived from the leading zero bits of 5 + //! SHA-256(key). keys are stored sorted within each node, with subtree 6 + //! pointers interleaved between entries. 7 + //! 8 + //! see: https://atproto.com/specs/repository#mst-structure 9 + 10 + const std = @import("std"); 11 + const cbor = @import("cbor.zig"); 12 + const multibase = @import("multibase.zig"); 13 + const Allocator = std.mem.Allocator; 14 + 15 + /// compute MST tree layer for a key. 16 + /// layer = count leading zero bits in SHA-256(key), divided by 2, rounded down. 17 + pub fn keyHeight(key: []const u8) u32 { 18 + var digest: [32]u8 = undefined; 19 + std.crypto.hash.sha2.Sha256.hash(key, &digest, .{}); 20 + var leading_zeros: u32 = 0; 21 + for (digest) |byte| { 22 + if (byte == 0) { 23 + leading_zeros += 8; 24 + } else { 25 + leading_zeros += @clz(byte); 26 + break; 27 + } 28 + } 29 + return leading_zeros / 2; 30 + } 31 + 32 + /// byte-level common prefix length between two strings 33 + pub fn commonPrefixLen(a: []const u8, b: []const u8) usize { 34 + const min_len = @min(a.len, b.len); 35 + var i: usize = 0; 36 + while (i < min_len) : (i += 1) { 37 + if (a[i] != b[i]) break; 38 + } 39 + return i; 40 + } 41 + 42 + /// parse a CID string (base32lower multibase, e.g. "bafyrei...") 43 + pub fn parseCidString(allocator: Allocator, s: []const u8) !cbor.Cid { 44 + if (s.len == 0) return error.InvalidCid; 45 + // strip 'b' multibase prefix and decode base32lower 46 + if (s[0] != 'b') return error.UnsupportedEncoding; 47 + const raw = try multibase.base32lower.decode(allocator, s[1..]); 48 + return .{ .raw = raw }; 49 + } 50 + 51 + /// MST node. stores a left subtree pointer and a list of entries. 52 + /// each entry has a key, CID value, and optional right subtree. 53 + const Node = struct { 54 + left: ?*Node, 55 + entries: std.ArrayList(Entry), 56 + 57 + const Entry = struct { 58 + key: []const u8, 59 + value: cbor.Cid, 60 + right: ?*Node, 61 + }; 62 + 63 + fn init() Node { 64 + return .{ 65 + .left = null, 66 + .entries = .{}, 67 + }; 68 + } 69 + }; 70 + 71 + /// merkle search tree 72 + pub const Mst = struct { 73 + allocator: Allocator, 74 + root: ?*Node, 75 + root_layer: ?u32, 76 + 77 + pub fn init(allocator: Allocator) Mst { 78 + return .{ 79 + .allocator = allocator, 80 + .root = null, 81 + .root_layer = null, 82 + }; 83 + } 84 + 85 + /// insert or update a key-value pair 86 + pub fn put(self: *Mst, key: []const u8, value: cbor.Cid) !void { 87 + const height = keyHeight(key); 88 + 89 + if (self.root == null) { 90 + // empty tree: create root at key's height 91 + const node = try self.createNode(); 92 + try node.entries.append(self.allocator, .{ 93 + .key = try self.allocator.dupe(u8, key), 94 + .value = value, 95 + .right = null, 96 + }); 97 + self.root = node; 98 + self.root_layer = height; 99 + return; 100 + } 101 + 102 + const root_layer = self.root_layer.?; 103 + 104 + if (height > root_layer) { 105 + // key belongs above the current root — lift 106 + self.root = try self.insertAbove(self.root.?, root_layer, key, value, height); 107 + self.root_layer = height; 108 + } else if (height == root_layer) { 109 + // key belongs at root layer 110 + self.root = try self.insertAtLayer(self.root.?, key, value, height); 111 + } else { 112 + // key belongs below — recurse into subtree 113 + try self.insertBelow(self.root.?, root_layer, key, value, height); 114 + } 115 + } 116 + 117 + /// look up a key, returning its CID value if present 118 + pub fn get(self: *const Mst, key: []const u8) ?cbor.Cid { 119 + return findKey(self.root, self.root_layer orelse return null, key, keyHeight(key)); 120 + } 121 + 122 + fn findKey(maybe_node: ?*Node, layer: u32, key: []const u8, height: u32) ?cbor.Cid { 123 + const node = maybe_node orelse return null; 124 + 125 + if (height == layer) { 126 + for (node.entries.items) |entry| { 127 + const cmp = std.mem.order(u8, key, entry.key); 128 + if (cmp == .eq) return entry.value; 129 + if (cmp == .lt) return null; 130 + } 131 + return null; 132 + } 133 + 134 + // height < layer: recurse into the subtree gap containing key 135 + for (node.entries.items, 0..) |entry, i| { 136 + if (std.mem.order(u8, key, entry.key) == .lt) { 137 + const subtree = if (i == 0) node.left else node.entries.items[i - 1].right; 138 + return findKey(subtree, layer - 1, key, height); 139 + } 140 + } 141 + // after all entries 142 + const last_right = if (node.entries.items.len > 0) 143 + node.entries.items[node.entries.items.len - 1].right 144 + else 145 + node.left; 146 + return findKey(last_right, layer - 1, key, height); 147 + } 148 + 149 + /// delete a key from the tree 150 + pub fn delete(self: *Mst, key: []const u8) !void { 151 + if (self.root == null) return; 152 + try self.deleteFromNode(self.root.?, self.root_layer.?, key); 153 + // trim: if root has no entries and only left subtree, collapse 154 + while (self.root) |root| { 155 + if (root.entries.items.len == 0) { 156 + if (root.left) |left| { 157 + self.root = left; 158 + if (self.root_layer.? > 0) { 159 + self.root_layer = self.root_layer.? - 1; 160 + } else { 161 + self.root = null; 162 + self.root_layer = null; 163 + break; 164 + } 165 + } else { 166 + self.root = null; 167 + self.root_layer = null; 168 + break; 169 + } 170 + } else break; 171 + } 172 + } 173 + 174 + fn deleteFromNode(self: *Mst, node: *Node, layer: u32, key: []const u8) !void { 175 + const height = keyHeight(key); 176 + 177 + if (height == layer) { 178 + // find and remove the entry 179 + for (node.entries.items, 0..) |entry, i| { 180 + if (std.mem.eql(u8, entry.key, key)) { 181 + // merge left and right subtrees around the deleted entry 182 + const left_sub = if (i == 0) node.left else node.entries.items[i - 1].right; 183 + const right_sub = entry.right; 184 + const merged = try self.mergeSubtrees(left_sub, right_sub); 185 + 186 + if (i == 0) { 187 + node.left = merged; 188 + } else { 189 + node.entries.items[i - 1].right = merged; 190 + } 191 + 192 + self.allocator.free(entry.key); 193 + _ = node.entries.orderedRemove(i); 194 + return; 195 + } 196 + } 197 + return; // key not found 198 + } 199 + 200 + // height < layer: recurse into the appropriate gap 201 + if (node.entries.items.len == 0) { 202 + if (node.left) |left| { 203 + try self.deleteFromNode(left, layer - 1, key); 204 + } 205 + return; 206 + } 207 + 208 + for (node.entries.items, 0..) |entry, i| { 209 + if (std.mem.order(u8, key, entry.key) == .lt) { 210 + const subtree = if (i == 0) &node.left else &node.entries.items[i - 1].right; 211 + if (subtree.*) |sub| { 212 + try self.deleteFromNode(sub, layer - 1, key); 213 + } 214 + return; 215 + } 216 + } 217 + // after all entries 218 + const last = &node.entries.items[node.entries.items.len - 1].right; 219 + if (last.*) |sub| { 220 + try self.deleteFromNode(sub, layer - 1, key); 221 + } 222 + } 223 + 224 + /// merge two subtrees that were separated by a deleted entry. 225 + /// both nodes are at the same layer. concatenate their entries 226 + /// and recursively merge if the junction creates adjacent children. 227 + /// follows the Go reference `appendMerge` / `mergeNodes` algorithm. 228 + fn mergeSubtrees(self: *Mst, left: ?*Node, right: ?*Node) !?*Node { 229 + if (left == null) return right; 230 + if (right == null) return left; 231 + 232 + const l = left.?; 233 + const r = right.?; 234 + 235 + // create merged node: takes left's `left` pointer and all entries from both 236 + const merged = try self.createNode(); 237 + merged.left = l.left; 238 + 239 + // copy left entries 240 + for (l.entries.items) |entry| { 241 + try merged.entries.append(self.allocator, entry); 242 + } 243 + 244 + // check junction: last entry of left's `right` vs right's `left` 245 + if (merged.entries.items.len > 0) { 246 + const last = &merged.entries.items[merged.entries.items.len - 1]; 247 + if (last.right != null and r.left != null) { 248 + // both sides of the junction are subtrees — recursively merge 249 + last.right = try self.mergeSubtrees(last.right, r.left); 250 + } else if (last.right == null and r.left != null) { 251 + last.right = r.left; 252 + } 253 + // if last.right != null and r.left == null, keep last.right as-is 254 + } else { 255 + // left has no entries: junction is merged.left vs r.left 256 + if (merged.left != null and r.left != null) { 257 + merged.left = try self.mergeSubtrees(merged.left, r.left); 258 + } else if (merged.left == null) { 259 + merged.left = r.left; 260 + } 261 + } 262 + 263 + // copy right entries 264 + for (r.entries.items) |entry| { 265 + try merged.entries.append(self.allocator, entry); 266 + } 267 + 268 + return merged; 269 + } 270 + 271 + const MstError = Allocator.Error; 272 + 273 + /// compute the root CID of the tree 274 + pub fn rootCid(self: *Mst) MstError!cbor.Cid { 275 + return self.nodeCid(self.root); 276 + } 277 + 278 + fn nodeCid(self: *Mst, maybe_node: ?*Node) MstError!cbor.Cid { 279 + const encoded = try self.serializeNode(maybe_node); 280 + defer self.allocator.free(encoded); 281 + return cbor.Cid.forDagCbor(self.allocator, encoded); 282 + } 283 + 284 + fn serializeNode(self: *Mst, maybe_node: ?*Node) MstError![]u8 { 285 + const node = maybe_node orelse { 286 + // empty node: { "l": null, "e": [] } 287 + return cbor.encodeAlloc(self.allocator, .{ .map = &.{ 288 + .{ .key = "e", .value = .{ .array = &.{} } }, 289 + .{ .key = "l", .value = .null }, 290 + } }); 291 + }; 292 + 293 + // compute left subtree CID 294 + const left_value: cbor.Value = if (node.left) |left| blk: { 295 + const left_cid = try self.nodeCid(left); 296 + break :blk .{ .cid = left_cid }; 297 + } else .null; 298 + 299 + // build entry array with prefix compression 300 + var entry_values: std.ArrayList(cbor.Value) = .{}; 301 + defer entry_values.deinit(self.allocator); 302 + 303 + var prev_key: []const u8 = ""; 304 + for (node.entries.items) |entry| { 305 + const prefix_len = commonPrefixLen(prev_key, entry.key); 306 + const suffix = entry.key[prefix_len..]; 307 + 308 + // right subtree CID 309 + const tree_val: cbor.Value = if (entry.right) |right| blk: { 310 + const right_cid = try self.nodeCid(right); 311 + break :blk .{ .cid = right_cid }; 312 + } else .null; 313 + 314 + // allocate map entries on heap (stack-local &.{...} would alias across iterations) 315 + const map_entries = try self.allocator.alloc(cbor.Value.MapEntry, 4); 316 + map_entries[0] = .{ .key = "k", .value = .{ .bytes = suffix } }; 317 + map_entries[1] = .{ .key = "p", .value = .{ .unsigned = prefix_len } }; 318 + map_entries[2] = .{ .key = "t", .value = tree_val }; 319 + map_entries[3] = .{ .key = "v", .value = .{ .cid = entry.value } }; 320 + 321 + try entry_values.append(self.allocator, .{ .map = map_entries }); 322 + 323 + prev_key = entry.key; 324 + } 325 + 326 + const entries_slice = try self.allocator.dupe(cbor.Value, entry_values.items); 327 + defer self.allocator.free(entries_slice); 328 + 329 + return cbor.encodeAlloc(self.allocator, .{ .map = &.{ 330 + .{ .key = "e", .value = .{ .array = entries_slice } }, 331 + .{ .key = "l", .value = left_value }, 332 + } }); 333 + } 334 + 335 + // === internal helpers === 336 + 337 + fn createNode(self: *Mst) !*Node { 338 + const node = try self.allocator.create(Node); 339 + node.* = Node.init(); 340 + return node; 341 + } 342 + 343 + /// insert a key that belongs above the current root. 344 + /// splits the tree at its own layer, wraps each half in parent nodes 345 + /// to bridge the layer gap, then assembles the new root. 346 + fn insertAbove(self: *Mst, node: *Node, node_layer: u32, key: []const u8, value: cbor.Cid, target_layer: u32) !*Node { 347 + // 1. split the tree at its current layer around the key 348 + const splits = try self.splitNode(node, key); 349 + var left = splits.left; 350 + var right = splits.right; 351 + 352 + // 2. wrap each half in parent layers (bridge the gap) 353 + // "extraLayersToAdd = keyZeros - layer" 354 + // "intentionally starting at 1, since first layer is taken care of by split" 355 + const extra_layers = target_layer - node_layer; 356 + var i: u32 = 1; 357 + while (i < extra_layers) : (i += 1) { 358 + if (left) |l| { 359 + const parent = try self.createNode(); 360 + parent.left = l; 361 + left = parent; 362 + } 363 + if (right) |r| { 364 + const parent = try self.createNode(); 365 + parent.left = r; 366 + right = parent; 367 + } 368 + } 369 + 370 + // 3. assemble new root: [left_tree, key_leaf, right_tree] 371 + const new_root = try self.createNode(); 372 + new_root.left = left; 373 + try new_root.entries.append(self.allocator, .{ 374 + .key = try self.allocator.dupe(u8, key), 375 + .value = value, 376 + .right = right, 377 + }); 378 + return new_root; 379 + } 380 + 381 + /// insert a key at the same layer as the node 382 + fn insertAtLayer(self: *Mst, node: *Node, key: []const u8, value: cbor.Cid, layer: u32) !*Node { 383 + _ = layer; 384 + // find insertion position 385 + var insert_idx: usize = node.entries.items.len; 386 + for (node.entries.items, 0..) |entry, i| { 387 + const cmp = std.mem.order(u8, key, entry.key); 388 + if (cmp == .eq) { 389 + // update existing 390 + node.entries.items[i].value = value; 391 + return node; 392 + } 393 + if (cmp == .lt) { 394 + insert_idx = i; 395 + break; 396 + } 397 + } 398 + 399 + // split the subtree that spans the insertion gap 400 + const gap_subtree = if (insert_idx == 0) node.left else node.entries.items[insert_idx - 1].right; 401 + 402 + var left_split: ?*Node = null; 403 + var right_split: ?*Node = null; 404 + 405 + if (gap_subtree) |subtree| { 406 + const splits = try self.splitNode(subtree, key); 407 + left_split = splits.left; 408 + right_split = splits.right; 409 + } 410 + 411 + // update the pointer before the gap 412 + if (insert_idx == 0) { 413 + node.left = left_split; 414 + } else { 415 + node.entries.items[insert_idx - 1].right = left_split; 416 + } 417 + 418 + // insert the new entry 419 + try node.entries.insert(self.allocator, insert_idx, .{ 420 + .key = try self.allocator.dupe(u8, key), 421 + .value = value, 422 + .right = right_split, 423 + }); 424 + 425 + return node; 426 + } 427 + 428 + /// insert a key below the current node's layer 429 + fn insertBelow(self: *Mst, node: *Node, node_layer: u32, key: []const u8, value: cbor.Cid, target_height: u32) !void { 430 + // find which gap the key falls into 431 + for (node.entries.items, 0..) |entry, i| { 432 + const cmp = std.mem.order(u8, key, entry.key); 433 + if (cmp == .eq) { 434 + // update existing 435 + node.entries.items[i].value = value; 436 + return; 437 + } 438 + if (cmp == .lt) { 439 + // key goes in the gap before this entry 440 + const subtree_ptr = if (i == 0) &node.left else &node.entries.items[i - 1].right; 441 + try self.insertIntoGap(subtree_ptr, node_layer - 1, key, value, target_height); 442 + return; 443 + } 444 + } 445 + // key goes after all entries 446 + const last_ptr = if (node.entries.items.len > 0) 447 + &node.entries.items[node.entries.items.len - 1].right 448 + else 449 + &node.left; 450 + try self.insertIntoGap(last_ptr, node_layer - 1, key, value, target_height); 451 + } 452 + 453 + fn insertIntoGap(self: *Mst, subtree_ptr: *?*Node, gap_layer: u32, key: []const u8, value: cbor.Cid, target_height: u32) MstError!void { 454 + if (target_height == gap_layer) { 455 + // insert at this layer 456 + if (subtree_ptr.*) |existing| { 457 + subtree_ptr.* = try self.insertAtLayer(existing, key, value, gap_layer); 458 + } else { 459 + const new_node = try self.createNode(); 460 + try new_node.entries.append(self.allocator, .{ 461 + .key = try self.allocator.dupe(u8, key), 462 + .value = value, 463 + .right = null, 464 + }); 465 + subtree_ptr.* = new_node; 466 + } 467 + } else if (target_height > gap_layer) { 468 + // need to lift — split and wrap 469 + if (subtree_ptr.*) |existing| { 470 + subtree_ptr.* = try self.insertAbove(existing, gap_layer, key, value, target_height); 471 + } else { 472 + const new_node = try self.createNode(); 473 + try new_node.entries.append(self.allocator, .{ 474 + .key = try self.allocator.dupe(u8, key), 475 + .value = value, 476 + .right = null, 477 + }); 478 + subtree_ptr.* = new_node; 479 + } 480 + } else { 481 + // target_height < gap_layer: recurse deeper 482 + if (subtree_ptr.*) |existing| { 483 + try self.insertBelow(existing, gap_layer, key, value, target_height); 484 + } else { 485 + // create node at gap_layer and recurse 486 + const new_node = try self.createNode(); 487 + subtree_ptr.* = new_node; 488 + try self.insertBelow(new_node, gap_layer, key, value, target_height); 489 + } 490 + } 491 + } 492 + 493 + /// split a subtree around a key: everything < key goes left, everything >= key goes right. 494 + /// follows the Go reference: find split point among leaf entries, then recursively 495 + /// split the subtree in the gap if needed. 496 + fn splitNode(self: *Mst, node: *Node, key: []const u8) !struct { left: ?*Node, right: ?*Node } { 497 + // find the first entry >= key 498 + var split_idx: usize = node.entries.items.len; 499 + for (node.entries.items, 0..) |entry, i| { 500 + if (std.mem.order(u8, key, entry.key) != .gt) { 501 + split_idx = i; 502 + break; 503 + } 504 + } 505 + 506 + // left gets entries [0..split_idx), right gets entries [split_idx..] 507 + var left_node = try self.createNode(); 508 + var right_node = try self.createNode(); 509 + 510 + // left node takes the original node's left subtree 511 + left_node.left = node.left; 512 + 513 + // copy entries to left 514 + for (node.entries.items[0..split_idx]) |entry| { 515 + try left_node.entries.append(self.allocator, entry); 516 + } 517 + 518 + // copy entries to right 519 + for (node.entries.items[split_idx..]) |entry| { 520 + try right_node.entries.append(self.allocator, entry); 521 + } 522 + 523 + // the subtree between the last left entry and first right entry may need recursive splitting. 524 + // in our representation: this is the right pointer of the last left entry (or left's left if no entries) 525 + // for the right node, its "left" is initially null — we need to set it from the gap. 526 + 527 + // split the gap subtree between the two halves 528 + if (left_node.entries.items.len > 0) { 529 + const last_left = &left_node.entries.items[left_node.entries.items.len - 1]; 530 + if (last_left.right) |gap_subtree| { 531 + const sub_split = try self.splitNode(gap_subtree, key); 532 + last_left.right = sub_split.left; 533 + right_node.left = sub_split.right; 534 + } 535 + } else if (left_node.left != null and split_idx == 0) { 536 + // all entries went right — the gap is the original node's left subtree 537 + const sub_split = try self.splitNode(left_node.left.?, key); 538 + left_node.left = sub_split.left; 539 + right_node.left = sub_split.right; 540 + } 541 + 542 + const left_result: ?*Node = if (left_node.entries.items.len > 0 or left_node.left != null) left_node else null; 543 + const right_result: ?*Node = if (right_node.entries.items.len > 0 or right_node.left != null) right_node else null; 544 + 545 + return .{ .left = left_result, .right = right_result }; 546 + } 547 + }; 548 + 549 + // === tests === 550 + 551 + test "keyHeight" { 552 + // values from interop test fixtures 553 + try std.testing.expectEqual(@as(u32, 0), keyHeight("")); 554 + try std.testing.expectEqual(@as(u32, 0), keyHeight("asdf")); 555 + try std.testing.expectEqual(@as(u32, 1), keyHeight("blue")); 556 + try std.testing.expectEqual(@as(u32, 0), keyHeight("2653ae71")); 557 + try std.testing.expectEqual(@as(u32, 2), keyHeight("88bfafc7")); 558 + try std.testing.expectEqual(@as(u32, 4), keyHeight("2a92d355")); 559 + try std.testing.expectEqual(@as(u32, 6), keyHeight("884976f5")); 560 + try std.testing.expectEqual(@as(u32, 4), keyHeight("app.bsky.feed.post/454397e440ec")); 561 + try std.testing.expectEqual(@as(u32, 8), keyHeight("app.bsky.feed.post/9adeb165882c")); 562 + } 563 + 564 + test "commonPrefixLen" { 565 + try std.testing.expectEqual(@as(usize, 0), commonPrefixLen("", "")); 566 + try std.testing.expectEqual(@as(usize, 3), commonPrefixLen("abc", "abc")); 567 + try std.testing.expectEqual(@as(usize, 0), commonPrefixLen("", "abc")); 568 + try std.testing.expectEqual(@as(usize, 2), commonPrefixLen("ab", "abc")); 569 + try std.testing.expectEqual(@as(usize, 3), commonPrefixLen("abcde", "abc")); 570 + try std.testing.expectEqual(@as(usize, 0), commonPrefixLen("abcde", "qbb")); 571 + } 572 + 573 + test "put and get" { 574 + const alloc = std.testing.allocator; 575 + var arena = std.heap.ArenaAllocator.init(alloc); 576 + defer arena.deinit(); 577 + const a = arena.allocator(); 578 + 579 + var tree = Mst.init(a); 580 + 581 + const cid1 = try cbor.Cid.forDagCbor(a, "value1"); 582 + const cid2 = try cbor.Cid.forDagCbor(a, "value2"); 583 + 584 + try tree.put("key1", cid1); 585 + try tree.put("key2", cid2); 586 + 587 + const got1 = tree.get("key1") orelse return error.NotFound; 588 + try std.testing.expectEqualSlices(u8, cid1.raw, got1.raw); 589 + 590 + const got2 = tree.get("key2") orelse return error.NotFound; 591 + try std.testing.expectEqualSlices(u8, cid2.raw, got2.raw); 592 + 593 + try std.testing.expect(tree.get("nonexistent") == null); 594 + } 595 + 596 + test "put and delete" { 597 + const alloc = std.testing.allocator; 598 + var arena = std.heap.ArenaAllocator.init(alloc); 599 + defer arena.deinit(); 600 + const a = arena.allocator(); 601 + 602 + var tree = Mst.init(a); 603 + const cid = try cbor.Cid.forDagCbor(a, "value"); 604 + 605 + try tree.put("key1", cid); 606 + try tree.put("key2", cid); 607 + 608 + try std.testing.expect(tree.get("key1") != null); 609 + try tree.delete("key1"); 610 + try std.testing.expect(tree.get("key1") == null); 611 + try std.testing.expect(tree.get("key2") != null); 612 + } 613 + 614 + test "rootCid is deterministic" { 615 + const alloc = std.testing.allocator; 616 + var arena = std.heap.ArenaAllocator.init(alloc); 617 + defer arena.deinit(); 618 + const a = arena.allocator(); 619 + 620 + const cid_val = try cbor.Cid.forDagCbor(a, "leaf"); 621 + 622 + // build tree 1 623 + var tree1 = Mst.init(a); 624 + try tree1.put("a", cid_val); 625 + try tree1.put("b", cid_val); 626 + const root1 = try tree1.rootCid(); 627 + 628 + // build tree 2 (same keys, same order) 629 + var tree2 = Mst.init(a); 630 + try tree2.put("a", cid_val); 631 + try tree2.put("b", cid_val); 632 + const root2 = try tree2.rootCid(); 633 + 634 + try std.testing.expectEqualSlices(u8, root1.raw, root2.raw); 635 + } 636 + 637 + test "empty tree rootCid matches reference" { 638 + const alloc = std.testing.allocator; 639 + var arena = std.heap.ArenaAllocator.init(alloc); 640 + defer arena.deinit(); 641 + const a = arena.allocator(); 642 + 643 + var tree = Mst.init(a); 644 + const root = try tree.rootCid(); 645 + try std.testing.expectEqual(@as(u64, 1), root.version().?); 646 + 647 + // known empty tree CID from Go reference implementation 648 + const expected = try parseCidString(a, "bafyreie5737gdxlw5i64vzichcalba3z2v5n6icifvx5xytvske7mr3hpm"); 649 + try std.testing.expectEqualSlices(u8, expected.raw, root.raw); 650 + } 651 + 652 + test "single key rootCid matches reference" { 653 + const alloc = std.testing.allocator; 654 + var arena = std.heap.ArenaAllocator.init(alloc); 655 + defer arena.deinit(); 656 + const a = arena.allocator(); 657 + 658 + var tree = Mst.init(a); 659 + // use a known CID value (the leaf CID from commit-proof fixtures) 660 + const leaf_cid = try parseCidString(a, "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454"); 661 + 662 + // single layer-0 key 663 + try tree.put("com.example.record/3jqfcqzm3fo2j", leaf_cid); 664 + 665 + const root = try tree.rootCid(); 666 + const expected = try parseCidString(a, "bafyreibj4lsc3aqnrvphp5xmrnfoorvru4wynt6lwidqbm2623a6tatzdu"); 667 + try std.testing.expectEqualSlices(u8, expected.raw, root.raw); 668 + } 669 + 670 + test "single layer-2 key rootCid matches reference" { 671 + const alloc = std.testing.allocator; 672 + var arena = std.heap.ArenaAllocator.init(alloc); 673 + defer arena.deinit(); 674 + const a = arena.allocator(); 675 + 676 + var tree = Mst.init(a); 677 + const leaf_cid = try parseCidString(a, "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454"); 678 + 679 + // single layer-2 key 680 + try tree.put("com.example.record/3jqfcqzm3fx2j", leaf_cid); 681 + 682 + const root = try tree.rootCid(); 683 + const expected = try parseCidString(a, "bafyreih7wfei65pxzhauoibu3ls7jgmkju4bspy4t2ha2qdjnzqvoy33ai"); 684 + try std.testing.expectEqualSlices(u8, expected.raw, root.raw); 685 + } 686 + 687 + test "5 key tree matches reference" { 688 + const alloc = std.testing.allocator; 689 + var arena = std.heap.ArenaAllocator.init(alloc); 690 + defer arena.deinit(); 691 + const a = arena.allocator(); 692 + 693 + var tree = Mst.init(a); 694 + const leaf_cid = try parseCidString(a, "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454"); 695 + 696 + // 5 keys from Go test (note: last key has 4fc not 3ft) 697 + const keys = [_][]const u8{ 698 + "com.example.record/3jqfcqzm3fp2j", 699 + "com.example.record/3jqfcqzm3fr2j", 700 + "com.example.record/3jqfcqzm3fs2j", 701 + "com.example.record/3jqfcqzm3ft2j", 702 + "com.example.record/3jqfcqzm4fc2j", 703 + }; 704 + 705 + for (keys) |key| { 706 + try tree.put(key, leaf_cid); 707 + } 708 + 709 + const root = try tree.rootCid(); 710 + const expected = try parseCidString(a, "bafyreicmahysq4n6wfuxo522m6dpiy7z7qzym3dzs756t5n7nfdgccwq7m"); 711 + try std.testing.expectEqualSlices(u8, expected.raw, root.raw); 712 + } 713 + 714 + test "two deep split fixture" { 715 + const alloc = std.testing.allocator; 716 + var arena = std.heap.ArenaAllocator.init(alloc); 717 + defer arena.deinit(); 718 + const a = arena.allocator(); 719 + 720 + const leaf_cid = try parseCidString(a, "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454"); 721 + 722 + var tree = Mst.init(a); 723 + const initial_keys = [_][]const u8{ 724 + "A0/374913", "B1/986427", "C0/451630", 725 + "E0/670489", "F1/085263", "G0/765327", 726 + }; 727 + for (initial_keys) |key| { 728 + try tree.put(key, leaf_cid); 729 + } 730 + 731 + const expected_before = try parseCidString(a, "bafyreicraprx2xwnico4tuqir3ozsxpz46qkcpox3obf5bagicqwurghpy"); 732 + try std.testing.expectEqualSlices(u8, expected_before.raw, (try tree.rootCid()).raw); 733 + 734 + try tree.put("D2/269196", leaf_cid); 735 + 736 + const expected_after = try parseCidString(a, "bafyreihvay6pazw3dfa47u5d2tn3rd6pa57sr37bo5bqyvjuqc73ib65my"); 737 + try std.testing.expectEqualSlices(u8, expected_after.raw, (try tree.rootCid()).raw); 738 + } 739 + 740 + test "complex multi-op commit" { 741 + const alloc = std.testing.allocator; 742 + var arena = std.heap.ArenaAllocator.init(alloc); 743 + defer arena.deinit(); 744 + const a = arena.allocator(); 745 + 746 + const leaf_cid = try parseCidString(a, "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454"); 747 + 748 + var tree = Mst.init(a); 749 + const initial_keys = [_][]const u8{ 750 + "B0/601692", "C2/014073", "D0/952776", 751 + "E2/819540", "F0/697858", "H0/131238", 752 + }; 753 + for (initial_keys) |key| { 754 + try tree.put(key, leaf_cid); 755 + } 756 + 757 + const expected_before = try parseCidString(a, "bafyreigr3plnts7dax6yokvinbhcqpyicdfgg6npvvyx6okc5jo55slfqi"); 758 + try std.testing.expectEqualSlices(u8, expected_before.raw, (try tree.rootCid()).raw); 759 + 760 + // adds 761 + try tree.put("A2/827942", leaf_cid); 762 + try tree.put("G2/611528", leaf_cid); 763 + // del 764 + try tree.delete("C2/014073"); 765 + 766 + const expected_after = try parseCidString(a, "bafyreiftrcrbhrwmi37u4egedlg56gk3jeh3tvmqvwgowoifuklfysyx54"); 767 + try std.testing.expectEqualSlices(u8, expected_after.raw, (try tree.rootCid()).raw); 768 + } 769 + 770 + test "parseCidString" { 771 + const alloc = std.testing.allocator; 772 + var arena = std.heap.ArenaAllocator.init(alloc); 773 + defer arena.deinit(); 774 + const a = arena.allocator(); 775 + 776 + const cid = try parseCidString(a, "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454"); 777 + try std.testing.expectEqual(@as(u64, 1), cid.version().?); 778 + try std.testing.expectEqual(@as(u64, 0x71), cid.codec().?); 779 + try std.testing.expectEqual(@as(u64, 0x12), cid.hashFn().?); 780 + try std.testing.expectEqual(@as(usize, 32), cid.digest().?.len); 781 + }
+225 -31
src/internal/multibase.zig
··· 1 - //! multibase decoder 2 //! 3 - //! decodes multibase-encoded strings (prefix + encoded data). 4 - //! currently supports base58btc (z prefix) for DID document public keys. 5 //! 6 //! see: https://github.com/multiformats/multibase 7 ··· 10 /// multibase encoding types 11 pub const Encoding = enum { 12 base58btc, // z prefix 13 14 pub fn fromPrefix(prefix: u8) ?Encoding { 15 return switch (prefix) { 16 'z' => .base58btc, 17 else => null, 18 }; 19 } ··· 28 29 return switch (encoding) { 30 .base58btc => try base58btc.decode(allocator, input[1..]), 31 }; 32 } 33 34 - /// base58btc decoder (bitcoin alphabet) 35 pub const base58btc = struct { 36 /// bitcoin base58 alphabet 37 const alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; ··· 45 break :blk table; 46 }; 47 48 /// decode base58btc string to bytes 49 pub fn decode(allocator: std.mem.Allocator, input: []const u8) ![]u8 { 50 if (input.len == 0) return allocator.alloc(u8, 0); ··· 56 leading_zeros += 1; 57 } 58 59 - // estimate output size: each base58 char represents ~5.86 bits 60 - // use a simple overestimate: input.len bytes is more than enough 61 - const max_output = input.len; 62 - const result = try allocator.alloc(u8, max_output); 63 - errdefer allocator.free(result); 64 - 65 // decode using big integer arithmetic 66 - // accumulator = accumulator * 58 + digit 67 var acc = try std.math.big.int.Managed.init(allocator); 68 defer acc.deinit(); 69 ··· 75 76 for (input) |c| { 77 const digit = decode_table[c]; 78 - if (digit < 0) { 79 - allocator.free(result); 80 - return error.InvalidCharacter; 81 - } 82 83 - // acc = acc * 58 + digit 84 try temp.mul(&acc, &multiplier); 85 try acc.copy(temp.toConst()); 86 try acc.addScalar(&acc, @as(u8, @intCast(digit))); 87 } 88 89 - // convert big int to bytes (big-endian for base58) 90 const limbs = acc.toConst().limbs; 91 const limb_count = acc.len(); 92 93 - // calculate byte size from limbs 94 var byte_count: usize = 0; 95 if (limb_count > 0 and !acc.toConst().eqlZero()) { 96 - const bit_count = acc.toConst().bitCountAbs(); 97 - byte_count = (bit_count + 7) / 8; 98 } 99 100 - // write bytes in big-endian order 101 - var output_bytes = try allocator.alloc(u8, leading_zeros + byte_count); 102 - errdefer allocator.free(output_bytes); 103 - 104 - // leading zeros 105 - @memset(output_bytes[0..leading_zeros], 0); 106 107 // convert limbs to big-endian bytes 108 if (byte_count > 0) { 109 - const output_slice = output_bytes[leading_zeros..]; 110 - 111 - // limbs are in little-endian order, we need big-endian output 112 var pos: usize = byte_count; 113 for (limbs[0..limb_count]) |limb| { 114 const limb_bytes = @sizeOf(@TypeOf(limb)); ··· 120 } 121 } 122 123 - allocator.free(result); 124 - return output_bytes; 125 } 126 }; 127 ··· 188 // compressed point prefix should be 0x02 or 0x03 189 try std.testing.expect(parsed.raw[0] == 0x02 or parsed.raw[0] == 0x03); 190 }
··· 1 + //! multibase codec 2 //! 3 + //! encodes and decodes multibase-encoded strings (prefix + encoded data). 4 + //! supports base58btc (z prefix) and base32lower (b prefix). 5 //! 6 //! see: https://github.com/multiformats/multibase 7 ··· 10 /// multibase encoding types 11 pub const Encoding = enum { 12 base58btc, // z prefix 13 + base32lower, // b prefix 14 15 pub fn fromPrefix(prefix: u8) ?Encoding { 16 return switch (prefix) { 17 'z' => .base58btc, 18 + 'b' => .base32lower, 19 else => null, 20 }; 21 } ··· 30 31 return switch (encoding) { 32 .base58btc => try base58btc.decode(allocator, input[1..]), 33 + .base32lower => try base32lower.decode(allocator, input[1..]), 34 }; 35 } 36 37 + /// encode raw bytes to a multibase string with the given encoding 38 + pub fn encode(allocator: std.mem.Allocator, encoding: Encoding, data: []const u8) ![]u8 { 39 + return switch (encoding) { 40 + .base58btc => try base58btc.encode(allocator, data), 41 + .base32lower => try base32lower.encode(allocator, data), 42 + }; 43 + } 44 + 45 + /// base58btc codec (bitcoin alphabet) 46 pub const base58btc = struct { 47 /// bitcoin base58 alphabet 48 const alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; ··· 56 break :blk table; 57 }; 58 59 + /// encode bytes to base58btc string with 'z' multibase prefix 60 + pub fn encode(allocator: std.mem.Allocator, input: []const u8) ![]u8 { 61 + // count leading zero bytes → leading '1's 62 + var leading_zeros: usize = 0; 63 + for (input) |b| { 64 + if (b != 0) break; 65 + leading_zeros += 1; 66 + } 67 + 68 + if (input.len == 0 or leading_zeros == input.len) { 69 + // all zeros (or empty) 70 + const result = try allocator.alloc(u8, 1 + leading_zeros); 71 + result[0] = 'z'; // multibase prefix 72 + @memset(result[1..], '1'); 73 + return result; 74 + } 75 + 76 + // load bytes into big integer (big-endian) 77 + var acc = try std.math.big.int.Managed.init(allocator); 78 + defer acc.deinit(); 79 + 80 + for (input) |b| { 81 + try acc.shiftLeft(&acc, 8); 82 + try acc.addScalar(&acc, b); 83 + } 84 + 85 + // repeatedly divide by 58 to extract base58 digits 86 + var digits: std.ArrayList(u8) = .{}; 87 + defer digits.deinit(allocator); 88 + 89 + var divisor = try std.math.big.int.Managed.initSet(allocator, @as(u64, 58)); 90 + defer divisor.deinit(); 91 + 92 + var quotient = try std.math.big.int.Managed.init(allocator); 93 + defer quotient.deinit(); 94 + 95 + var remainder = try std.math.big.int.Managed.init(allocator); 96 + defer remainder.deinit(); 97 + 98 + while (!acc.toConst().eqlZero()) { 99 + try quotient.divFloor(&remainder, &acc, &divisor); 100 + const digit: usize = @intCast(remainder.toConst().toInt(u64) catch 0); 101 + try digits.append(allocator, alphabet[digit]); 102 + try acc.copy(quotient.toConst()); 103 + } 104 + 105 + // result: 'z' prefix + leading '1's + reversed digits 106 + const total_len = 1 + leading_zeros + digits.items.len; 107 + const result = try allocator.alloc(u8, total_len); 108 + result[0] = 'z'; // multibase prefix 109 + @memset(result[1 .. 1 + leading_zeros], '1'); 110 + 111 + // digits were accumulated LSB-first, reverse into result 112 + const digit_slice = result[1 + leading_zeros ..]; 113 + for (digits.items, 0..) |d, i| { 114 + digit_slice[digits.items.len - 1 - i] = d; 115 + } 116 + 117 + return result; 118 + } 119 + 120 /// decode base58btc string to bytes 121 pub fn decode(allocator: std.mem.Allocator, input: []const u8) ![]u8 { 122 if (input.len == 0) return allocator.alloc(u8, 0); ··· 128 leading_zeros += 1; 129 } 130 131 // decode using big integer arithmetic 132 var acc = try std.math.big.int.Managed.init(allocator); 133 defer acc.deinit(); 134 ··· 140 141 for (input) |c| { 142 const digit = decode_table[c]; 143 + if (digit < 0) return error.InvalidCharacter; 144 145 try temp.mul(&acc, &multiplier); 146 try acc.copy(temp.toConst()); 147 try acc.addScalar(&acc, @as(u8, @intCast(digit))); 148 } 149 150 + // convert big int to bytes (big-endian) 151 const limbs = acc.toConst().limbs; 152 const limb_count = acc.len(); 153 154 var byte_count: usize = 0; 155 if (limb_count > 0 and !acc.toConst().eqlZero()) { 156 + byte_count = (acc.toConst().bitCountAbs() + 7) / 8; 157 } 158 159 + const result = try allocator.alloc(u8, leading_zeros + byte_count); 160 + @memset(result[0..leading_zeros], 0); 161 162 // convert limbs to big-endian bytes 163 if (byte_count > 0) { 164 + const output_slice = result[leading_zeros..]; 165 var pos: usize = byte_count; 166 for (limbs[0..limb_count]) |limb| { 167 const limb_bytes = @sizeOf(@TypeOf(limb)); ··· 173 } 174 } 175 176 + return result; 177 + } 178 + }; 179 + 180 + /// base32lower codec (RFC 4648, lowercase, no padding) 181 + pub const base32lower = struct { 182 + const alphabet = "abcdefghijklmnopqrstuvwxyz234567"; 183 + 184 + const decode_table: [256]i8 = blk: { 185 + var table: [256]i8 = .{-1} ** 256; 186 + for (alphabet, 0..) |c, i| { 187 + table[c] = @intCast(i); 188 + } 189 + break :blk table; 190 + }; 191 + 192 + /// encode bytes to base32lower string with 'b' multibase prefix 193 + pub fn encode(allocator: std.mem.Allocator, input: []const u8) ![]u8 { 194 + if (input.len == 0) { 195 + const result = try allocator.alloc(u8, 1); 196 + result[0] = 'b'; 197 + return result; 198 + } 199 + 200 + // base32: 5 bytes → 8 chars 201 + const out_len = (input.len * 8 + 4) / 5; // ceil(bits / 5) 202 + const result = try allocator.alloc(u8, 1 + out_len); 203 + result[0] = 'b'; // multibase prefix 204 + 205 + var bit_buf: u32 = 0; 206 + var bits: u5 = 0; 207 + var pos: usize = 1; 208 + 209 + for (input) |byte| { 210 + bit_buf = (bit_buf << 8) | byte; 211 + bits += 8; 212 + while (bits >= 5) { 213 + bits -= 5; 214 + const idx: u5 = @truncate(bit_buf >> bits); 215 + result[pos] = alphabet[idx]; 216 + pos += 1; 217 + } 218 + } 219 + 220 + // remaining bits (left-aligned) 221 + if (bits > 0) { 222 + const idx: u5 = @truncate(bit_buf << (@as(u5, 5) - bits)); 223 + result[pos] = alphabet[idx]; 224 + pos += 1; 225 + } 226 + 227 + return result[0..pos]; 228 + } 229 + 230 + /// decode base32lower string (no multibase prefix) to bytes 231 + pub fn decode(allocator: std.mem.Allocator, input: []const u8) ![]u8 { 232 + if (input.len == 0) return allocator.alloc(u8, 0); 233 + 234 + const out_len = input.len * 5 / 8; 235 + const result = try allocator.alloc(u8, out_len); 236 + errdefer allocator.free(result); 237 + 238 + var bit_buf: u32 = 0; 239 + var bits: u4 = 0; 240 + var pos: usize = 0; 241 + 242 + for (input) |c| { 243 + if (c == '=') break; // stop at padding 244 + const digit = decode_table[c]; 245 + if (digit < 0) return error.InvalidCharacter; 246 + 247 + bit_buf = (bit_buf << 5) | @as(u32, @intCast(digit)); 248 + bits += 5; 249 + if (bits >= 8) { 250 + bits -= 8; 251 + result[pos] = @truncate(bit_buf >> bits); 252 + pos += 1; 253 + } 254 + } 255 + 256 + return allocator.realloc(result, pos); 257 } 258 }; 259 ··· 320 // compressed point prefix should be 0x02 or 0x03 321 try std.testing.expect(parsed.raw[0] == 0x02 or parsed.raw[0] == 0x03); 322 } 323 + 324 + test "base58btc encode-decode round-trip" { 325 + const alloc = std.testing.allocator; 326 + 327 + { 328 + const original = "abc"; 329 + const encoded = try base58btc.encode(alloc, original); 330 + defer alloc.free(encoded); 331 + // should have 'z' prefix 332 + try std.testing.expectEqual(@as(u8, 'z'), encoded[0]); 333 + 334 + const decoded = try decode(alloc, encoded); 335 + defer alloc.free(decoded); 336 + try std.testing.expectEqualSlices(u8, original, decoded); 337 + } 338 + 339 + // round-trip with leading zeros 340 + { 341 + const original = &[_]u8{ 0, 0, 0x01 }; 342 + const encoded = try base58btc.encode(alloc, original); 343 + defer alloc.free(encoded); 344 + const decoded = try decode(alloc, encoded); 345 + defer alloc.free(decoded); 346 + try std.testing.expectEqualSlices(u8, original, decoded); 347 + } 348 + } 349 + 350 + test "base32lower encode-decode round-trip" { 351 + const alloc = std.testing.allocator; 352 + 353 + { 354 + const original = "hello"; 355 + const encoded = try base32lower.encode(alloc, original); 356 + defer alloc.free(encoded); 357 + try std.testing.expectEqual(@as(u8, 'b'), encoded[0]); 358 + 359 + const decoded = try decode(alloc, encoded); 360 + defer alloc.free(decoded); 361 + try std.testing.expectEqualSlices(u8, original, decoded); 362 + } 363 + 364 + // empty 365 + { 366 + const encoded = try base32lower.encode(alloc, ""); 367 + defer alloc.free(encoded); 368 + try std.testing.expectEqualStrings("b", encoded); 369 + } 370 + } 371 + 372 + test "base32lower decode bafyrei prefix" { 373 + const alloc = std.testing.allocator; 374 + // CIDv1 dag-cbor sha2-256 always starts with "bafyrei" in base32lower 375 + // "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454" 376 + const input = "afyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454"; 377 + const decoded = try base32lower.decode(alloc, input); 378 + defer alloc.free(decoded); 379 + // CIDv1: version=1(0x01), codec=dag-cbor(0x71), hash=sha2-256(0x12), len=32(0x20) 380 + try std.testing.expectEqual(@as(u8, 0x01), decoded[0]); 381 + try std.testing.expectEqual(@as(u8, 0x71), decoded[1]); 382 + try std.testing.expectEqual(@as(u8, 0x12), decoded[2]); 383 + try std.testing.expectEqual(@as(u8, 0x20), decoded[3]); 384 + }
+95
src/internal/multicodec.zig
··· 51 return error.UnsupportedKeyType; 52 } 53 54 // === tests === 55 56 test "parse secp256k1 key" { ··· 88 const data = [_]u8{0xe7}; 89 try std.testing.expectError(error.TooShort, parsePublicKey(&data)); 90 }
··· 51 return error.UnsupportedKeyType; 52 } 53 54 + /// encode a raw public key with multicodec prefix 55 + pub fn encodePublicKey(allocator: std.mem.Allocator, key_type: KeyType, raw: []const u8) ![]u8 { 56 + if (raw.len != 33) return error.InvalidKeyLength; 57 + 58 + const result = try allocator.alloc(u8, 2 + raw.len); 59 + switch (key_type) { 60 + .secp256k1 => { 61 + result[0] = 0xe7; 62 + result[1] = 0x01; 63 + }, 64 + .p256 => { 65 + result[0] = 0x80; 66 + result[1] = 0x24; 67 + }, 68 + } 69 + @memcpy(result[2..], raw); 70 + return result; 71 + } 72 + 73 + /// format a raw public key as a did:key string 74 + pub fn formatDidKey(allocator: std.mem.Allocator, key_type: KeyType, raw: []const u8) ![]u8 { 75 + const multibase = @import("multibase.zig"); 76 + 77 + const mc_bytes = try encodePublicKey(allocator, key_type, raw); 78 + defer allocator.free(mc_bytes); 79 + 80 + const multibase_str = try multibase.encode(allocator, .base58btc, mc_bytes); 81 + defer allocator.free(multibase_str); 82 + 83 + // "did:key:" + multibase string (which already has 'z' prefix) 84 + const prefix = "did:key:"; 85 + const result = try allocator.alloc(u8, prefix.len + multibase_str.len); 86 + @memcpy(result[0..prefix.len], prefix); 87 + @memcpy(result[prefix.len..], multibase_str); 88 + return result; 89 + } 90 + 91 // === tests === 92 93 test "parse secp256k1 key" { ··· 125 const data = [_]u8{0xe7}; 126 try std.testing.expectError(error.TooShort, parsePublicKey(&data)); 127 } 128 + 129 + test "encode-decode round-trip secp256k1" { 130 + const alloc = std.testing.allocator; 131 + var raw: [33]u8 = undefined; 132 + raw[0] = 0x02; 133 + @memset(raw[1..], 0xaa); 134 + 135 + const encoded = try encodePublicKey(alloc, .secp256k1, &raw); 136 + defer alloc.free(encoded); 137 + 138 + const parsed = try parsePublicKey(encoded); 139 + try std.testing.expectEqual(KeyType.secp256k1, parsed.key_type); 140 + try std.testing.expectEqualSlices(u8, &raw, parsed.raw); 141 + } 142 + 143 + test "did:key round-trip secp256k1" { 144 + const alloc = std.testing.allocator; 145 + const multibase = @import("multibase.zig"); 146 + 147 + var raw: [33]u8 = undefined; 148 + raw[0] = 0x02; 149 + @memset(raw[1..], 0xcc); 150 + 151 + const did_key_str = try formatDidKey(alloc, .secp256k1, &raw); 152 + defer alloc.free(did_key_str); 153 + 154 + // should start with "did:key:z" 155 + try std.testing.expect(std.mem.startsWith(u8, did_key_str, "did:key:z")); 156 + 157 + // parse back: strip "did:key:" prefix, decode multibase, parse multicodec 158 + const multibase_str = did_key_str["did:key:".len..]; 159 + const mc_bytes = try multibase.decode(alloc, multibase_str); 160 + defer alloc.free(mc_bytes); 161 + 162 + const parsed = try parsePublicKey(mc_bytes); 163 + try std.testing.expectEqual(KeyType.secp256k1, parsed.key_type); 164 + try std.testing.expectEqualSlices(u8, &raw, parsed.raw); 165 + } 166 + 167 + test "did:key round-trip p256" { 168 + const alloc = std.testing.allocator; 169 + const multibase = @import("multibase.zig"); 170 + 171 + var raw: [33]u8 = undefined; 172 + raw[0] = 0x03; 173 + @memset(raw[1..], 0xdd); 174 + 175 + const did_key_str = try formatDidKey(alloc, .p256, &raw); 176 + defer alloc.free(did_key_str); 177 + 178 + const multibase_str = did_key_str["did:key:".len..]; 179 + const mc_bytes = try multibase.decode(alloc, multibase_str); 180 + defer alloc.free(mc_bytes); 181 + 182 + const parsed = try parsePublicKey(mc_bytes); 183 + try std.testing.expectEqual(KeyType.p256, parsed.key_type); 184 + try std.testing.expectEqualSlices(u8, &raw, parsed.raw); 185 + }
+3
src/root.zig
··· 27 pub const multibase = @import("internal/multibase.zig"); 28 pub const multicodec = @import("internal/multicodec.zig"); 29 30 // sync / firehose 31 const sync = @import("internal/sync.zig"); 32 pub const CommitAction = sync.CommitAction;
··· 27 pub const multibase = @import("internal/multibase.zig"); 28 pub const multicodec = @import("internal/multicodec.zig"); 29 30 + // mst 31 + pub const mst = @import("internal/mst.zig"); 32 + 33 // sync / firehose 34 const sync = @import("internal/sync.zig"); 35 pub const CommitAction = sync.CommitAction;