atproto utils for zig zat.dev
atproto sdk zig

feat: sync 1.1 — partial MST, commit diff verification via inversion

ChildRef union (none/node/stub) for partial trees loaded from CAR blocks.
putReturn/deleteReturn return displaced CIDs, copy() for deep clone,
loadFromBlocks() deserializes commit CARs into partial MSTs.

Operation/normalizeOps/invertOp implement the inductive firehose:
undoing ops against the post-commit MST root must recover prevData CID.

verifyCommitDiff() is the top-level pipeline: parse CAR, verify signature,
load partial MST, copy, normalize, invert, compare root to prevData.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+983 -178
+600 -138
src/internal/repo/mst.zig
··· 5 5 //! SHA-256(key). keys are stored sorted within each node, with subtree 6 6 //! pointers interleaved between entries. 7 7 //! 8 + //! supports partial trees for sync 1.1: nodes not present in a CAR are 9 + //! represented as stubs (known CID, no block data). operations that need 10 + //! to descend into a stub return error.PartialTree. 11 + //! 8 12 //! see: https://atproto.com/specs/repository#mst-structure 9 13 10 14 const std = @import("std"); 11 15 const cbor = @import("cbor.zig"); 16 + const car = @import("car.zig"); 12 17 const multibase = @import("../crypto/multibase.zig"); 13 18 const Allocator = std.mem.Allocator; 14 19 ··· 48 53 return .{ .raw = raw }; 49 54 } 50 55 56 + /// reference to a child subtree — either a loaded node, a stub (CID only), or absent. 57 + pub const ChildRef = union(enum) { 58 + none, 59 + node: *Node, 60 + stub: cbor.Cid, // known CID, block not in CAR 61 + 62 + fn toNode(self: ChildRef) ?*Node { 63 + return switch (self) { 64 + .node => |n| n, 65 + else => null, 66 + }; 67 + } 68 + 69 + fn isPresent(self: ChildRef) bool { 70 + return self != .none; 71 + } 72 + }; 73 + 51 74 /// MST node. stores a left subtree pointer and a list of entries. 52 75 /// each entry has a key, CID value, and optional right subtree. 53 - const Node = struct { 54 - left: ?*Node, 76 + pub const Node = struct { 77 + left: ChildRef, 55 78 entries: std.ArrayList(Entry), 56 79 57 - const Entry = struct { 80 + pub const Entry = struct { 58 81 key: []const u8, 59 82 value: cbor.Cid, 60 - right: ?*Node, 83 + right: ChildRef, 61 84 }; 62 85 63 86 fn init() Node { 64 87 return .{ 65 - .left = null, 88 + .left = .none, 66 89 .entries = .{}, 67 90 }; 68 91 } 69 92 }; 70 93 94 + /// an operation on the MST (create, update, or delete a record) 95 + pub const Operation = struct { 96 + path: []const u8, // "collection/rkey" 97 + value: ?[]const u8, // raw CID bytes — non-null for create/update 98 + prev: ?[]const u8, // raw CID bytes — non-null for update/delete 99 + 100 + fn isCreate(self: Operation) bool { 101 + return self.value != null and self.prev == null; 102 + } 103 + 104 + fn isDelete(self: Operation) bool { 105 + return self.value == null and self.prev != null; 106 + } 107 + 108 + fn isUpdate(self: Operation) bool { 109 + return self.value != null and self.prev != null; 110 + } 111 + }; 112 + 71 113 /// merkle search tree 72 114 pub const Mst = struct { 73 115 allocator: Allocator, ··· 84 126 85 127 /// insert or update a key-value pair 86 128 pub fn put(self: *Mst, key: []const u8, value: cbor.Cid) !void { 129 + _ = try self.putReturn(key, value); 130 + } 131 + 132 + /// insert or update a key-value pair, returning the previous value CID if it existed 133 + pub fn putReturn(self: *Mst, key: []const u8, value: cbor.Cid) !?cbor.Cid { 87 134 const height = keyHeight(key); 88 135 89 136 if (self.root == null) { ··· 92 139 try node.entries.append(self.allocator, .{ 93 140 .key = try self.allocator.dupe(u8, key), 94 141 .value = value, 95 - .right = null, 142 + .right = .none, 96 143 }); 97 144 self.root = node; 98 145 self.root_layer = height; 99 - return; 146 + return null; 100 147 } 101 148 102 149 const root_layer = self.root_layer.?; 103 150 104 151 if (height > root_layer) { 105 - // key belongs above the current root — lift 152 + // key belongs above the current root — lift (new key, never an update) 106 153 self.root = try self.insertAbove(self.root.?, root_layer, key, value, height); 107 154 self.root_layer = height; 155 + return null; 108 156 } else if (height == root_layer) { 109 157 // key belongs at root layer 110 - self.root = try self.insertAtLayer(self.root.?, key, value, height); 158 + var prev: ?cbor.Cid = null; 159 + self.root = try self.insertAtLayer(self.root.?, key, value, height, &prev); 160 + return prev; 111 161 } else { 112 162 // key belongs below — recurse into subtree 113 - try self.insertBelow(self.root.?, root_layer, key, value, height); 163 + return try self.insertBelow(self.root.?, root_layer, key, value, height); 114 164 } 115 165 } 116 166 ··· 134 184 // height < layer: recurse into the subtree gap containing key 135 185 for (node.entries.items, 0..) |entry, i| { 136 186 if (std.mem.order(u8, key, entry.key) == .lt) { 137 - const subtree = if (i == 0) node.left else node.entries.items[i - 1].right; 138 - return findKey(subtree, layer - 1, key, height); 187 + const child = if (i == 0) node.left else node.entries.items[i - 1].right; 188 + return findKey(child.toNode(), layer - 1, key, height); 139 189 } 140 190 } 141 191 // after all entries ··· 143 193 node.entries.items[node.entries.items.len - 1].right 144 194 else 145 195 node.left; 146 - return findKey(last_right, layer - 1, key, height); 196 + return findKey(last_right.toNode(), layer - 1, key, height); 147 197 } 148 198 149 199 /// delete a key from the tree 150 200 pub fn delete(self: *Mst, key: []const u8) !void { 151 - if (self.root == null) return; 152 - try self.deleteFromNode(self.root.?, self.root_layer.?, key); 201 + _ = try self.deleteReturn(key); 202 + } 203 + 204 + /// delete a key from the tree, returning the removed value CID if it existed 205 + pub fn deleteReturn(self: *Mst, key: []const u8) !?cbor.Cid { 206 + if (self.root == null) return null; 207 + const prev = try self.deleteFromNode(self.root.?, self.root_layer.?, key); 153 208 // trim: if root has no entries and only left subtree, collapse 154 209 while (self.root) |root| { 155 210 if (root.entries.items.len == 0) { 156 - if (root.left) |left| { 157 - self.root = left; 158 - if (self.root_layer.? > 0) { 159 - self.root_layer = self.root_layer.? - 1; 160 - } else { 211 + switch (root.left) { 212 + .node => |left| { 213 + self.root = left; 214 + if (self.root_layer.? > 0) { 215 + self.root_layer = self.root_layer.? - 1; 216 + } else { 217 + self.root = null; 218 + self.root_layer = null; 219 + break; 220 + } 221 + }, 222 + .stub => return error.PartialTree, 223 + .none => { 161 224 self.root = null; 162 225 self.root_layer = null; 163 226 break; 164 - } 165 - } else { 166 - self.root = null; 167 - self.root_layer = null; 168 - break; 227 + }, 169 228 } 170 229 } else break; 171 230 } 231 + return prev; 172 232 } 173 233 174 - fn deleteFromNode(self: *Mst, node: *Node, layer: u32, key: []const u8) !void { 234 + fn deleteFromNode(self: *Mst, node: *Node, layer: u32, key: []const u8) !?cbor.Cid { 175 235 const height = keyHeight(key); 176 236 177 237 if (height == layer) { 178 238 // find and remove the entry 179 239 for (node.entries.items, 0..) |entry, i| { 180 240 if (std.mem.eql(u8, entry.key, key)) { 241 + const prev_value = entry.value; 181 242 // merge left and right subtrees around the deleted entry 182 243 const left_sub = if (i == 0) node.left else node.entries.items[i - 1].right; 183 244 const right_sub = entry.right; ··· 191 252 192 253 self.allocator.free(entry.key); 193 254 _ = node.entries.orderedRemove(i); 194 - return; 255 + return prev_value; 195 256 } 196 257 } 197 - return; // key not found 258 + return null; // key not found 198 259 } 199 260 200 261 // height < layer: recurse into the appropriate gap 201 262 if (node.entries.items.len == 0) { 202 - if (node.left) |left| { 203 - try self.deleteFromNode(left, layer - 1, key); 263 + switch (node.left) { 264 + .node => |left| return try self.deleteFromNode(left, layer - 1, key), 265 + .stub => return error.PartialTree, 266 + .none => return null, 204 267 } 205 - return; 206 268 } 207 269 208 270 for (node.entries.items, 0..) |entry, i| { 209 271 if (std.mem.order(u8, key, entry.key) == .lt) { 210 - const subtree = if (i == 0) &node.left else &node.entries.items[i - 1].right; 211 - if (subtree.*) |sub| { 212 - try self.deleteFromNode(sub, layer - 1, key); 272 + const child_ref = if (i == 0) &node.left else &node.entries.items[i - 1].right; 273 + switch (child_ref.*) { 274 + .node => |sub| return try self.deleteFromNode(sub, layer - 1, key), 275 + .stub => return error.PartialTree, 276 + .none => return null, 213 277 } 214 - return; 215 278 } 216 279 } 217 280 // after all entries 218 281 const last = &node.entries.items[node.entries.items.len - 1].right; 219 - if (last.*) |sub| { 220 - try self.deleteFromNode(sub, layer - 1, key); 282 + switch (last.*) { 283 + .node => |sub| return try self.deleteFromNode(sub, layer - 1, key), 284 + .stub => return error.PartialTree, 285 + .none => return null, 221 286 } 222 287 } 223 288 ··· 225 290 /// both nodes are at the same layer. concatenate their entries 226 291 /// and recursively merge if the junction creates adjacent children. 227 292 /// follows the Go reference `appendMerge` / `mergeNodes` algorithm. 228 - fn mergeSubtrees(self: *Mst, left: ?*Node, right: ?*Node) !?*Node { 229 - if (left == null) return right; 230 - if (right == null) return left; 293 + fn mergeSubtrees(self: *Mst, left: ChildRef, right: ChildRef) !ChildRef { 294 + if (left == .none) return right; 295 + if (right == .none) return left; 231 296 232 - const l = left.?; 233 - const r = right.?; 297 + const l = switch (left) { 298 + .node => |n| n, 299 + .stub => return error.PartialTree, 300 + .none => unreachable, 301 + }; 302 + const r = switch (right) { 303 + .node => |n| n, 304 + .stub => return error.PartialTree, 305 + .none => unreachable, 306 + }; 234 307 235 308 // create merged node: takes left's `left` pointer and all entries from both 236 309 const merged = try self.createNode(); ··· 244 317 // check junction: last entry of left's `right` vs right's `left` 245 318 if (merged.entries.items.len > 0) { 246 319 const last = &merged.entries.items[merged.entries.items.len - 1]; 247 - if (last.right != null and r.left != null) { 320 + if (last.right.isPresent() and r.left.isPresent()) { 248 321 // both sides of the junction are subtrees — recursively merge 249 322 last.right = try self.mergeSubtrees(last.right, r.left); 250 - } else if (last.right == null and r.left != null) { 323 + } else if (!last.right.isPresent() and r.left.isPresent()) { 251 324 last.right = r.left; 252 325 } 253 - // if last.right != null and r.left == null, keep last.right as-is 326 + // if last.right is present and r.left is not, keep last.right as-is 254 327 } else { 255 328 // left has no entries: junction is merged.left vs r.left 256 - if (merged.left != null and r.left != null) { 329 + if (merged.left.isPresent() and r.left.isPresent()) { 257 330 merged.left = try self.mergeSubtrees(merged.left, r.left); 258 - } else if (merged.left == null) { 331 + } else if (!merged.left.isPresent()) { 259 332 merged.left = r.left; 260 333 } 261 334 } ··· 265 338 try merged.entries.append(self.allocator, entry); 266 339 } 267 340 268 - return merged; 341 + return .{ .node = merged }; 269 342 } 270 343 271 - const MstError = Allocator.Error; 344 + pub const MstError = error{PartialTree} || Allocator.Error; 272 345 273 346 /// compute the root CID of the tree 274 347 pub fn rootCid(self: *Mst) MstError!cbor.Cid { 275 - return self.nodeCid(self.root); 348 + if (self.root) |root| { 349 + return self.nodeCid(.{ .node = root }); 350 + } 351 + return self.nodeCid(.none); 276 352 } 277 353 278 - fn nodeCid(self: *Mst, maybe_node: ?*Node) MstError!cbor.Cid { 279 - const encoded = try self.serializeNode(maybe_node); 280 - defer self.allocator.free(encoded); 281 - return cbor.Cid.forDagCbor(self.allocator, encoded); 354 + fn nodeCid(self: *Mst, child: ChildRef) MstError!cbor.Cid { 355 + switch (child) { 356 + .stub => |cid| return cid, 357 + .none => { 358 + // empty node: { "l": null, "e": [] } 359 + const encoded = try cbor.encodeAlloc(self.allocator, .{ .map = &.{ 360 + .{ .key = "e", .value = .{ .array = &.{} } }, 361 + .{ .key = "l", .value = .null }, 362 + } }); 363 + defer self.allocator.free(encoded); 364 + return cbor.Cid.forDagCbor(self.allocator, encoded); 365 + }, 366 + .node => |node| { 367 + const encoded = try self.serializeNode(node); 368 + defer self.allocator.free(encoded); 369 + return cbor.Cid.forDagCbor(self.allocator, encoded); 370 + }, 371 + } 282 372 } 283 373 284 - fn serializeNode(self: *Mst, maybe_node: ?*Node) MstError![]u8 { 285 - const node = maybe_node orelse { 286 - // empty node: { "l": null, "e": [] } 287 - return cbor.encodeAlloc(self.allocator, .{ .map = &.{ 288 - .{ .key = "e", .value = .{ .array = &.{} } }, 289 - .{ .key = "l", .value = .null }, 290 - } }); 291 - }; 292 - 374 + fn serializeNode(self: *Mst, node: *Node) MstError![]u8 { 293 375 // compute left subtree CID 294 - const left_value: cbor.Value = if (node.left) |left| blk: { 295 - const left_cid = try self.nodeCid(left); 296 - break :blk .{ .cid = left_cid }; 297 - } else .null; 376 + const left_value: cbor.Value = switch (node.left) { 377 + .node => |left| blk: { 378 + const left_cid = try self.nodeCid(.{ .node = left }); 379 + break :blk .{ .cid = left_cid }; 380 + }, 381 + .stub => |cid| .{ .cid = cid }, 382 + .none => .null, 383 + }; 298 384 299 385 // build entry array with prefix compression 300 386 var entry_values: std.ArrayList(cbor.Value) = .{}; ··· 306 392 const suffix = entry.key[prefix_len..]; 307 393 308 394 // right subtree CID 309 - const tree_val: cbor.Value = if (entry.right) |right| blk: { 310 - const right_cid = try self.nodeCid(right); 311 - break :blk .{ .cid = right_cid }; 312 - } else .null; 395 + const tree_val: cbor.Value = switch (entry.right) { 396 + .node => |right| blk: { 397 + const right_cid = try self.nodeCid(.{ .node = right }); 398 + break :blk .{ .cid = right_cid }; 399 + }, 400 + .stub => |cid| .{ .cid = cid }, 401 + .none => .null, 402 + }; 313 403 314 404 // allocate map entries on heap (stack-local &.{...} would alias across iterations) 315 405 const map_entries = try self.allocator.alloc(cbor.Value.MapEntry, 4); ··· 332 422 } }); 333 423 } 334 424 425 + /// deep copy the tree. shares key slices and CID raw slices (immutable). 426 + /// stubs stay as stubs. 427 + pub fn copy(self: *Mst) !Mst { 428 + var new = Mst.init(self.allocator); 429 + if (self.root) |root| { 430 + new.root = try self.copyNode(root); 431 + } 432 + new.root_layer = self.root_layer; 433 + return new; 434 + } 435 + 436 + fn copyNode(self: *Mst, node: *Node) !*Node { 437 + const new_node = try self.createNode(); 438 + new_node.left = try self.copyChild(node.left); 439 + for (node.entries.items) |entry| { 440 + try new_node.entries.append(self.allocator, .{ 441 + .key = try self.allocator.dupe(u8, entry.key), 442 + .value = entry.value, 443 + .right = try self.copyChild(entry.right), 444 + }); 445 + } 446 + return new_node; 447 + } 448 + 449 + fn copyChild(self: *Mst, child: ChildRef) Allocator.Error!ChildRef { 450 + return switch (child) { 451 + .none => .none, 452 + .stub => |cid| .{ .stub = cid }, 453 + .node => |n| .{ .node = try self.copyNode(n) }, 454 + }; 455 + } 456 + 457 + /// load a partial MST from CAR blocks. nodes present in the CAR are 458 + /// fully loaded; child CIDs not present become stubs. 459 + pub fn loadFromBlocks(allocator: Allocator, repo_car: car.Car, root_cid_raw: []const u8) !Mst { 460 + const root_data = car.findBlock(repo_car, root_cid_raw) orelse return error.CommitBlockNotFound; 461 + const root_node_data = try decodeMstNode(allocator, root_data); 462 + 463 + if (root_node_data.entries.len == 0 and root_node_data.left == null) { 464 + return Mst.init(allocator); 465 + } 466 + 467 + const root_node = try loadNodeFromData(allocator, repo_car, root_node_data); 468 + 469 + // root layer = key height of first entry 470 + var key_buf: [512]u8 = undefined; 471 + const first = root_node_data.entries[0]; 472 + @memcpy(key_buf[0..first.key_suffix.len], first.key_suffix); 473 + const root_layer = keyHeight(key_buf[0..first.key_suffix.len]); 474 + 475 + return .{ 476 + .allocator = allocator, 477 + .root = root_node, 478 + .root_layer = root_layer, 479 + }; 480 + } 481 + 482 + fn loadNodeFromData(allocator: Allocator, repo_car: car.Car, data: MstNodeData) !*Node { 483 + const node = try allocator.create(Node); 484 + node.* = Node.init(); 485 + 486 + // load left child 487 + node.left = if (data.left) |left_cid_raw| 488 + try loadChild(allocator, repo_car, left_cid_raw) 489 + else 490 + .none; 491 + 492 + // load entries, reconstructing full keys from prefix compression 493 + var prev_key: []const u8 = ""; 494 + for (data.entries) |entry_data| { 495 + // reconstruct full key 496 + const full_key = try allocator.alloc(u8, entry_data.prefix_len + entry_data.key_suffix.len); 497 + if (entry_data.prefix_len > 0) { 498 + @memcpy(full_key[0..entry_data.prefix_len], prev_key[0..entry_data.prefix_len]); 499 + } 500 + @memcpy(full_key[entry_data.prefix_len..], entry_data.key_suffix); 501 + 502 + const right_child = if (entry_data.tree) |tree_cid_raw| 503 + try loadChild(allocator, repo_car, tree_cid_raw) 504 + else 505 + ChildRef.none; 506 + 507 + try node.entries.append(allocator, .{ 508 + .key = full_key, 509 + .value = .{ .raw = entry_data.value }, 510 + .right = right_child, 511 + }); 512 + 513 + prev_key = full_key; 514 + } 515 + 516 + return node; 517 + } 518 + 519 + fn loadChild(allocator: Allocator, repo_car: car.Car, cid_raw: []const u8) (MstDecodeError || error{CommitBlockNotFound})!ChildRef { 520 + if (car.findBlock(repo_car, cid_raw)) |block_data| { 521 + const child_data = try decodeMstNode(allocator, block_data); 522 + return .{ .node = try loadNodeFromData(allocator, repo_car, child_data) }; 523 + } 524 + // block not in CAR — stub 525 + return .{ .stub = .{ .raw = cid_raw } }; 526 + } 527 + 335 528 // === internal helpers === 336 529 337 530 fn createNode(self: *Mst) !*Node { ··· 350 543 var right = splits.right; 351 544 352 545 // 2. wrap each half in parent layers (bridge the gap) 353 - // "extraLayersToAdd = keyZeros - layer" 354 - // "intentionally starting at 1, since first layer is taken care of by split" 355 546 const extra_layers = target_layer - node_layer; 356 547 var i: u32 = 1; 357 548 while (i < extra_layers) : (i += 1) { 358 - if (left) |l| { 549 + if (left.isPresent()) { 359 550 const parent = try self.createNode(); 360 - parent.left = l; 361 - left = parent; 551 + parent.left = left; 552 + left = .{ .node = parent }; 362 553 } 363 - if (right) |r| { 554 + if (right.isPresent()) { 364 555 const parent = try self.createNode(); 365 - parent.left = r; 366 - right = parent; 556 + parent.left = right; 557 + right = .{ .node = parent }; 367 558 } 368 559 } 369 560 ··· 379 570 } 380 571 381 572 /// insert a key at the same layer as the node 382 - fn insertAtLayer(self: *Mst, node: *Node, key: []const u8, value: cbor.Cid, layer: u32) !*Node { 573 + fn insertAtLayer(self: *Mst, node: *Node, key: []const u8, value: cbor.Cid, layer: u32, prev_out: *?cbor.Cid) !*Node { 383 574 _ = layer; 384 575 // find insertion position 385 576 var insert_idx: usize = node.entries.items.len; 386 577 for (node.entries.items, 0..) |entry, i| { 387 578 const cmp = std.mem.order(u8, key, entry.key); 388 579 if (cmp == .eq) { 389 - // update existing 580 + // update existing — return previous value 581 + prev_out.* = node.entries.items[i].value; 390 582 node.entries.items[i].value = value; 391 583 return node; 392 584 } ··· 397 589 } 398 590 399 591 // split the subtree that spans the insertion gap 400 - const gap_subtree = if (insert_idx == 0) node.left else node.entries.items[insert_idx - 1].right; 592 + const gap_child = if (insert_idx == 0) node.left else node.entries.items[insert_idx - 1].right; 401 593 402 - var left_split: ?*Node = null; 403 - var right_split: ?*Node = null; 594 + var left_split: ChildRef = .none; 595 + var right_split: ChildRef = .none; 404 596 405 - if (gap_subtree) |subtree| { 406 - const splits = try self.splitNode(subtree, key); 407 - left_split = splits.left; 408 - right_split = splits.right; 597 + switch (gap_child) { 598 + .node => |subtree| { 599 + const splits = try self.splitNode(subtree, key); 600 + left_split = splits.left; 601 + right_split = splits.right; 602 + }, 603 + .stub => return error.PartialTree, 604 + .none => {}, 409 605 } 410 606 411 607 // update the pointer before the gap ··· 426 622 } 427 623 428 624 /// insert a key below the current node's layer 429 - fn insertBelow(self: *Mst, node: *Node, node_layer: u32, key: []const u8, value: cbor.Cid, target_height: u32) !void { 625 + fn insertBelow(self: *Mst, node: *Node, node_layer: u32, key: []const u8, value: cbor.Cid, target_height: u32) !?cbor.Cid { 430 626 // find which gap the key falls into 431 627 for (node.entries.items, 0..) |entry, i| { 432 628 const cmp = std.mem.order(u8, key, entry.key); 433 629 if (cmp == .eq) { 434 630 // update existing 631 + const prev = node.entries.items[i].value; 435 632 node.entries.items[i].value = value; 436 - return; 633 + return prev; 437 634 } 438 635 if (cmp == .lt) { 439 636 // key goes in the gap before this entry 440 637 const subtree_ptr = if (i == 0) &node.left else &node.entries.items[i - 1].right; 441 - try self.insertIntoGap(subtree_ptr, node_layer - 1, key, value, target_height); 442 - return; 638 + return try self.insertIntoGap(subtree_ptr, node_layer - 1, key, value, target_height); 443 639 } 444 640 } 445 641 // key goes after all entries ··· 447 643 &node.entries.items[node.entries.items.len - 1].right 448 644 else 449 645 &node.left; 450 - try self.insertIntoGap(last_ptr, node_layer - 1, key, value, target_height); 646 + return try self.insertIntoGap(last_ptr, node_layer - 1, key, value, target_height); 451 647 } 452 648 453 - fn insertIntoGap(self: *Mst, subtree_ptr: *?*Node, gap_layer: u32, key: []const u8, value: cbor.Cid, target_height: u32) MstError!void { 649 + fn insertIntoGap(self: *Mst, subtree_ptr: *ChildRef, gap_layer: u32, key: []const u8, value: cbor.Cid, target_height: u32) MstError!?cbor.Cid { 454 650 if (target_height == gap_layer) { 455 651 // insert at this layer 456 - if (subtree_ptr.*) |existing| { 457 - subtree_ptr.* = try self.insertAtLayer(existing, key, value, gap_layer); 458 - } else { 459 - const new_node = try self.createNode(); 460 - try new_node.entries.append(self.allocator, .{ 461 - .key = try self.allocator.dupe(u8, key), 462 - .value = value, 463 - .right = null, 464 - }); 465 - subtree_ptr.* = new_node; 652 + switch (subtree_ptr.*) { 653 + .node => |existing| { 654 + var prev: ?cbor.Cid = null; 655 + subtree_ptr.* = .{ .node = try self.insertAtLayer(existing, key, value, gap_layer, &prev) }; 656 + return prev; 657 + }, 658 + .stub => return error.PartialTree, 659 + .none => { 660 + const new_node = try self.createNode(); 661 + try new_node.entries.append(self.allocator, .{ 662 + .key = try self.allocator.dupe(u8, key), 663 + .value = value, 664 + .right = .none, 665 + }); 666 + subtree_ptr.* = .{ .node = new_node }; 667 + return null; 668 + }, 466 669 } 467 670 } else if (target_height > gap_layer) { 468 671 // need to lift — split and wrap 469 - if (subtree_ptr.*) |existing| { 470 - subtree_ptr.* = try self.insertAbove(existing, gap_layer, key, value, target_height); 471 - } else { 472 - const new_node = try self.createNode(); 473 - try new_node.entries.append(self.allocator, .{ 474 - .key = try self.allocator.dupe(u8, key), 475 - .value = value, 476 - .right = null, 477 - }); 478 - subtree_ptr.* = new_node; 672 + switch (subtree_ptr.*) { 673 + .node => |existing| { 674 + subtree_ptr.* = .{ .node = try self.insertAbove(existing, gap_layer, key, value, target_height) }; 675 + return null; 676 + }, 677 + .stub => return error.PartialTree, 678 + .none => { 679 + const new_node = try self.createNode(); 680 + try new_node.entries.append(self.allocator, .{ 681 + .key = try self.allocator.dupe(u8, key), 682 + .value = value, 683 + .right = .none, 684 + }); 685 + subtree_ptr.* = .{ .node = new_node }; 686 + return null; 687 + }, 479 688 } 480 689 } else { 481 690 // target_height < gap_layer: recurse deeper 482 - if (subtree_ptr.*) |existing| { 483 - try self.insertBelow(existing, gap_layer, key, value, target_height); 484 - } else { 485 - // create node at gap_layer and recurse 486 - const new_node = try self.createNode(); 487 - subtree_ptr.* = new_node; 488 - try self.insertBelow(new_node, gap_layer, key, value, target_height); 691 + switch (subtree_ptr.*) { 692 + .node => |existing| return try self.insertBelow(existing, gap_layer, key, value, target_height), 693 + .stub => return error.PartialTree, 694 + .none => { 695 + // create node at gap_layer and recurse 696 + const new_node = try self.createNode(); 697 + subtree_ptr.* = .{ .node = new_node }; 698 + return try self.insertBelow(new_node, gap_layer, key, value, target_height); 699 + }, 489 700 } 490 701 } 491 702 } 492 703 493 704 /// split a subtree around a key: everything < key goes left, everything >= key goes right. 494 - /// follows the Go reference: find split point among leaf entries, then recursively 495 - /// split the subtree in the gap if needed. 496 - fn splitNode(self: *Mst, node: *Node, key: []const u8) !struct { left: ?*Node, right: ?*Node } { 705 + fn splitNode(self: *Mst, node: *Node, key: []const u8) !struct { left: ChildRef, right: ChildRef } { 497 706 // find the first entry >= key 498 707 var split_idx: usize = node.entries.items.len; 499 708 for (node.entries.items, 0..) |entry, i| { ··· 520 729 try right_node.entries.append(self.allocator, entry); 521 730 } 522 731 523 - // the subtree between the last left entry and first right entry may need recursive splitting. 524 - // in our representation: this is the right pointer of the last left entry (or left's left if no entries) 525 - // for the right node, its "left" is initially null — we need to set it from the gap. 526 - 527 732 // split the gap subtree between the two halves 528 733 if (left_node.entries.items.len > 0) { 529 734 const last_left = &left_node.entries.items[left_node.entries.items.len - 1]; 530 - if (last_left.right) |gap_subtree| { 531 - const sub_split = try self.splitNode(gap_subtree, key); 532 - last_left.right = sub_split.left; 533 - right_node.left = sub_split.right; 735 + switch (last_left.right) { 736 + .node => |gap_subtree| { 737 + const sub_split = try self.splitNode(gap_subtree, key); 738 + last_left.right = sub_split.left; 739 + right_node.left = sub_split.right; 740 + }, 741 + .stub => return error.PartialTree, 742 + .none => {}, 534 743 } 535 - } else if (left_node.left != null and split_idx == 0) { 744 + } else if (left_node.left.isPresent() and split_idx == 0) { 536 745 // all entries went right — the gap is the original node's left subtree 537 - const sub_split = try self.splitNode(left_node.left.?, key); 538 - left_node.left = sub_split.left; 539 - right_node.left = sub_split.right; 746 + switch (left_node.left) { 747 + .node => |gap_subtree| { 748 + const sub_split = try self.splitNode(gap_subtree, key); 749 + left_node.left = sub_split.left; 750 + right_node.left = sub_split.right; 751 + }, 752 + .stub => return error.PartialTree, 753 + .none => {}, 754 + } 540 755 } 541 756 542 - const left_result: ?*Node = if (left_node.entries.items.len > 0 or left_node.left != null) left_node else null; 543 - const right_result: ?*Node = if (right_node.entries.items.len > 0 or right_node.left != null) right_node else null; 757 + const left_result: ChildRef = if (left_node.entries.items.len > 0 or left_node.left.isPresent()) 758 + .{ .node = left_node } 759 + else 760 + .none; 761 + const right_result: ChildRef = if (right_node.entries.items.len > 0 or right_node.left.isPresent()) 762 + .{ .node = right_node } 763 + else 764 + .none; 544 765 545 766 return .{ .left = left_result, .right = right_result }; 546 767 } 547 768 }; 769 + 770 + // === inversion primitives === 771 + 772 + /// normalize operations: check for duplicate paths, sort deletions first then by path 773 + pub fn normalizeOps(allocator: Allocator, ops: []const Operation) ![]Operation { 774 + if (ops.len == 0) return try allocator.alloc(Operation, 0); 775 + 776 + const sorted = try allocator.dupe(Operation, ops); 777 + errdefer allocator.free(sorted); 778 + 779 + // sort: deletions first, then by path 780 + std.mem.sort(Operation, sorted, {}, struct { 781 + fn lessThan(_: void, a: Operation, b: Operation) bool { 782 + // deletions before creates/updates 783 + const a_del: u1 = if (a.isDelete()) 0 else 1; 784 + const b_del: u1 = if (b.isDelete()) 0 else 1; 785 + if (a_del != b_del) return a_del < b_del; 786 + return std.mem.order(u8, a.path, b.path) == .lt; 787 + } 788 + }.lessThan); 789 + 790 + // check for duplicate paths 791 + var i: usize = 1; 792 + while (i < sorted.len) : (i += 1) { 793 + if (std.mem.eql(u8, sorted[i].path, sorted[i - 1].path)) { 794 + allocator.free(sorted); 795 + return error.DuplicatePath; 796 + } 797 + } 798 + 799 + return sorted; 800 + } 801 + 802 + /// invert a single operation against the tree. 803 + /// create → delete, update → reverse update, delete → put back 804 + pub fn invertOp(tree: *Mst, op: Operation) !void { 805 + if (op.isCreate()) { 806 + // create → delete: remove the path, verify removed CID matches op.value 807 + const removed = try tree.deleteReturn(op.path) orelse return error.InversionMismatch; 808 + if (!std.mem.eql(u8, removed.raw, op.value.?)) return error.InversionMismatch; 809 + } else if (op.isUpdate()) { 810 + // update → reverse: put op.prev back, verify displaced CID matches op.value 811 + const displaced = try tree.putReturn(op.path, .{ .raw = op.prev.? }) orelse return error.InversionMismatch; 812 + if (!std.mem.eql(u8, displaced.raw, op.value.?)) return error.InversionMismatch; 813 + } else if (op.isDelete()) { 814 + // delete → put back: insert op.prev, verify path didn't already exist 815 + const displaced = try tree.putReturn(op.path, .{ .raw = op.prev.? }); 816 + if (displaced != null) return error.InversionMismatch; 817 + } else { 818 + return error.InversionMismatch; 819 + } 820 + } 548 821 549 822 // === specialized MST node decoder === 550 823 // ··· 774 1047 try std.testing.expect(tree.get("key2") != null); 775 1048 } 776 1049 1050 + test "putReturn and deleteReturn" { 1051 + const alloc = std.testing.allocator; 1052 + var arena = std.heap.ArenaAllocator.init(alloc); 1053 + defer arena.deinit(); 1054 + const a = arena.allocator(); 1055 + 1056 + var tree = Mst.init(a); 1057 + const cid1 = try cbor.Cid.forDagCbor(a, "v1"); 1058 + const cid2 = try cbor.Cid.forDagCbor(a, "v2"); 1059 + 1060 + // first insert returns null (no previous) 1061 + const prev1 = try tree.putReturn("key1", cid1); 1062 + try std.testing.expect(prev1 == null); 1063 + 1064 + // update returns old value 1065 + const prev2 = try tree.putReturn("key1", cid2); 1066 + try std.testing.expect(prev2 != null); 1067 + try std.testing.expectEqualSlices(u8, cid1.raw, prev2.?.raw); 1068 + 1069 + // delete returns removed value 1070 + const removed = try tree.deleteReturn("key1"); 1071 + try std.testing.expect(removed != null); 1072 + try std.testing.expectEqualSlices(u8, cid2.raw, removed.?.raw); 1073 + 1074 + // delete nonexistent returns null 1075 + const removed2 = try tree.deleteReturn("key1"); 1076 + try std.testing.expect(removed2 == null); 1077 + } 1078 + 1079 + test "copy produces independent tree" { 1080 + const alloc = std.testing.allocator; 1081 + var arena = std.heap.ArenaAllocator.init(alloc); 1082 + defer arena.deinit(); 1083 + const a = arena.allocator(); 1084 + 1085 + var tree = Mst.init(a); 1086 + const cid1 = try cbor.Cid.forDagCbor(a, "v1"); 1087 + const cid2 = try cbor.Cid.forDagCbor(a, "v2"); 1088 + 1089 + try tree.put("key1", cid1); 1090 + try tree.put("key2", cid1); 1091 + 1092 + var tree2 = try tree.copy(); 1093 + 1094 + // modify copy 1095 + try tree2.put("key1", cid2); 1096 + try tree2.delete("key2"); 1097 + 1098 + // original unchanged 1099 + const got1 = tree.get("key1") orelse return error.NotFound; 1100 + try std.testing.expectEqualSlices(u8, cid1.raw, got1.raw); 1101 + try std.testing.expect(tree.get("key2") != null); 1102 + 1103 + // copy has changes 1104 + const got1_copy = tree2.get("key1") orelse return error.NotFound; 1105 + try std.testing.expectEqualSlices(u8, cid2.raw, got1_copy.raw); 1106 + try std.testing.expect(tree2.get("key2") == null); 1107 + } 1108 + 777 1109 test "rootCid is deterministic" { 778 1110 const alloc = std.testing.allocator; 779 1111 var arena = std.heap.ArenaAllocator.init(alloc); ··· 928 1260 929 1261 const expected_after = try parseCidString(a, "bafyreiftrcrbhrwmi37u4egedlg56gk3jeh3tvmqvwgowoifuklfysyx54"); 930 1262 try std.testing.expectEqualSlices(u8, expected_after.raw, (try tree.rootCid()).raw); 1263 + } 1264 + 1265 + test "inversion: create then invert" { 1266 + const alloc = std.testing.allocator; 1267 + var arena = std.heap.ArenaAllocator.init(alloc); 1268 + defer arena.deinit(); 1269 + const a = arena.allocator(); 1270 + 1271 + const cid1 = try cbor.Cid.forDagCbor(a, "record1"); 1272 + 1273 + var tree = Mst.init(a); 1274 + const root_before = try tree.rootCid(); 1275 + 1276 + // apply forward: create 1277 + try tree.put("col/rkey1", cid1); 1278 + 1279 + // invert: should remove it 1280 + try invertOp(&tree, .{ 1281 + .path = "col/rkey1", 1282 + .value = cid1.raw, 1283 + .prev = null, 1284 + }); 1285 + 1286 + const root_after = try tree.rootCid(); 1287 + try std.testing.expectEqualSlices(u8, root_before.raw, root_after.raw); 1288 + } 1289 + 1290 + test "inversion: update then invert" { 1291 + const alloc = std.testing.allocator; 1292 + var arena = std.heap.ArenaAllocator.init(alloc); 1293 + defer arena.deinit(); 1294 + const a = arena.allocator(); 1295 + 1296 + const cid1 = try cbor.Cid.forDagCbor(a, "v1"); 1297 + const cid2 = try cbor.Cid.forDagCbor(a, "v2"); 1298 + 1299 + var tree = Mst.init(a); 1300 + try tree.put("col/rkey1", cid1); 1301 + const root_before = try tree.rootCid(); 1302 + 1303 + // apply forward: update cid1 → cid2 1304 + try tree.put("col/rkey1", cid2); 1305 + 1306 + // invert 1307 + try invertOp(&tree, .{ 1308 + .path = "col/rkey1", 1309 + .value = cid2.raw, 1310 + .prev = cid1.raw, 1311 + }); 1312 + 1313 + const root_after = try tree.rootCid(); 1314 + try std.testing.expectEqualSlices(u8, root_before.raw, root_after.raw); 1315 + } 1316 + 1317 + test "inversion: delete then invert" { 1318 + const alloc = std.testing.allocator; 1319 + var arena = std.heap.ArenaAllocator.init(alloc); 1320 + defer arena.deinit(); 1321 + const a = arena.allocator(); 1322 + 1323 + const cid1 = try cbor.Cid.forDagCbor(a, "v1"); 1324 + 1325 + var tree = Mst.init(a); 1326 + try tree.put("col/rkey1", cid1); 1327 + const root_before = try tree.rootCid(); 1328 + 1329 + // apply forward: delete 1330 + try tree.delete("col/rkey1"); 1331 + 1332 + // invert 1333 + try invertOp(&tree, .{ 1334 + .path = "col/rkey1", 1335 + .value = null, 1336 + .prev = cid1.raw, 1337 + }); 1338 + 1339 + const root_after = try tree.rootCid(); 1340 + try std.testing.expectEqualSlices(u8, root_before.raw, root_after.raw); 1341 + } 1342 + 1343 + test "inversion: multi-op commit round-trip" { 1344 + const alloc = std.testing.allocator; 1345 + var arena = std.heap.ArenaAllocator.init(alloc); 1346 + defer arena.deinit(); 1347 + const a = arena.allocator(); 1348 + 1349 + const cid1 = try cbor.Cid.forDagCbor(a, "v1"); 1350 + const cid2 = try cbor.Cid.forDagCbor(a, "v2"); 1351 + const cid3 = try cbor.Cid.forDagCbor(a, "v3"); 1352 + 1353 + // build initial tree 1354 + var tree = Mst.init(a); 1355 + try tree.put("col/existing", cid1); 1356 + try tree.put("col/to_update", cid1); 1357 + try tree.put("col/to_delete", cid2); 1358 + const root_before = try tree.rootCid(); 1359 + 1360 + // apply forward ops 1361 + try tree.put("col/new_record", cid3); // create 1362 + try tree.put("col/to_update", cid2); // update 1363 + try tree.delete("col/to_delete"); // delete 1364 + 1365 + // normalize and invert 1366 + const ops = [_]Operation{ 1367 + .{ .path = "col/new_record", .value = cid3.raw, .prev = null }, // create 1368 + .{ .path = "col/to_update", .value = cid2.raw, .prev = cid1.raw }, // update 1369 + .{ .path = "col/to_delete", .value = null, .prev = cid2.raw }, // delete 1370 + }; 1371 + const sorted = try normalizeOps(a, &ops); 1372 + 1373 + for (sorted) |op| { 1374 + try invertOp(&tree, op); 1375 + } 1376 + 1377 + const root_after = try tree.rootCid(); 1378 + try std.testing.expectEqualSlices(u8, root_before.raw, root_after.raw); 1379 + } 1380 + 1381 + test "normalizeOps rejects duplicates" { 1382 + const alloc = std.testing.allocator; 1383 + var arena = std.heap.ArenaAllocator.init(alloc); 1384 + defer arena.deinit(); 1385 + const a = arena.allocator(); 1386 + 1387 + const ops = [_]Operation{ 1388 + .{ .path = "col/same", .value = "cid1", .prev = null }, 1389 + .{ .path = "col/same", .value = "cid2", .prev = null }, 1390 + }; 1391 + 1392 + try std.testing.expectError(error.DuplicatePath, normalizeOps(a, &ops)); 931 1393 } 932 1394 933 1395 test "parseCidString" {
+374 -40
src/internal/repo/repo_verifier.zig
··· 32 32 record_count: usize, 33 33 }; 34 34 35 + /// result of verifying a commit's CAR data against a signing key. 36 + /// used by the relay to verify firehose frames without identity resolution. 37 + pub const CommitVerifyResult = struct { 38 + commit_did: []const u8, 39 + commit_rev: []const u8, 40 + commit_version: i64, 41 + record_count: usize, 42 + commit_cid: []const u8, 43 + }; 44 + 35 45 pub const VerifyError = error{ 36 46 InvalidIdentifier, 37 47 SigningKeyNotFound, ··· 44 54 FetchFailed, 45 55 } || Allocator.Error; 46 56 57 + /// verify a commit's CAR bytes against a pre-resolved signing key. 58 + /// this is the inner loop of verifyRepo() without identity resolution or PDS fetch. 59 + /// used by the relay to verify firehose commit frames directly. 60 + /// 61 + /// `car_bytes` is the raw CAR data (from the firehose frame's `blocks` field). 62 + /// `public_key` is the pre-resolved signing key for the commit's DID. 63 + /// 64 + /// options: 65 + /// `verify_mst` — walk the MST and verify key heights (default true). 66 + /// `expected_did` — if set, verify the commit DID matches. 67 + pub fn verifyCommitCar( 68 + allocator: Allocator, 69 + car_bytes: []const u8, 70 + public_key: multicodec.PublicKey, 71 + options: VerifyCommitCarOptions, 72 + ) VerifyCommitCarError!CommitVerifyResult { 73 + // 1. parse CAR 74 + const repo_car = car.readWithOptions(allocator, car_bytes, .{ 75 + .max_size = options.max_car_size, 76 + .max_blocks = options.max_blocks, 77 + }) catch return error.InvalidCommit; 78 + if (repo_car.roots.len == 0) return error.NoRootsInCar; 79 + 80 + // 2. find commit block 81 + const commit_cid_raw = repo_car.roots[0].raw; 82 + const commit_data = car.findBlock(repo_car, commit_cid_raw) orelse return error.CommitBlockNotFound; 83 + 84 + // 3. decode commit 85 + const commit = cbor.decodeAll(allocator, commit_data) catch return error.InvalidCommit; 86 + const commit_did = commit.getString("did") orelse return error.InvalidCommit; 87 + const commit_version = commit.getInt("version") orelse return error.InvalidCommit; 88 + const commit_rev = commit.getString("rev") orelse return error.InvalidCommit; 89 + const sig_bytes = commit.getBytes("sig") orelse return error.SignatureNotFound; 90 + 91 + // 4. validate commit structure 92 + if (commit_version != 3) return error.InvalidCommit; 93 + if (Did.parse(commit_did) == null) return error.InvalidCommit; 94 + 95 + // 5. check DID matches expected (if provided) 96 + if (options.expected_did) |expected| { 97 + if (!std.mem.eql(u8, commit_did, expected)) return error.InvalidCommit; 98 + } 99 + 100 + // 6. verify signature 101 + const unsigned_commit_bytes = try encodeUnsignedCommit(allocator, commit); 102 + switch (public_key.key_type) { 103 + .p256 => jwt.verifyP256(unsigned_commit_bytes, sig_bytes, public_key.raw) catch return error.SignatureVerificationFailed, 104 + .secp256k1 => jwt.verifySecp256k1(unsigned_commit_bytes, sig_bytes, public_key.raw) catch return error.SignatureVerificationFailed, 105 + } 106 + 107 + // 7. optionally walk MST 108 + var record_count: usize = 0; 109 + if (options.verify_mst) { 110 + const data_cid_value = commit.get("data") orelse return error.InvalidCommit; 111 + const data_cid = switch (data_cid_value) { 112 + .cid => |c| c, 113 + else => return error.InvalidCommit, 114 + }; 115 + record_count = walkAndVerifyMst(allocator, repo_car, data_cid.raw) catch |err| switch (err) { 116 + error.OutOfMemory => return error.OutOfMemory, 117 + else => return error.MstRootMismatch, 118 + }; 119 + } 120 + 121 + return .{ 122 + .commit_did = commit_did, 123 + .commit_rev = commit_rev, 124 + .commit_version = commit_version, 125 + .record_count = record_count, 126 + .commit_cid = commit_cid_raw, 127 + }; 128 + } 129 + 130 + pub const VerifyCommitCarOptions = struct { 131 + verify_mst: bool = true, 132 + expected_did: ?[]const u8 = null, 133 + max_car_size: ?usize = null, // null = default 2MB 134 + max_blocks: ?usize = null, // null = default 10,000 135 + }; 136 + 137 + pub const VerifyCommitCarError = error{ 138 + NoRootsInCar, 139 + CommitBlockNotFound, 140 + InvalidCommit, 141 + SignatureNotFound, 142 + SignatureVerificationFailed, 143 + MstRootMismatch, 144 + OutOfMemory, 145 + }; 146 + 47 147 /// verify a repo end-to-end: resolve identity, fetch repo, verify commit signature, walk and rebuild MST. 48 148 pub fn verifyRepo(caller_alloc: Allocator, identifier: []const u8) !VerifyResult { 49 149 var arena = std.heap.ArenaAllocator.init(caller_alloc); ··· 79 179 // 5. fetch repo CAR 80 180 const car_bytes = try fetchRepo(allocator, pds_endpoint, did_str); 81 181 82 - // 6. parse CAR (no size limits — we fetched this ourselves from the PDS) 83 - const repo_car = car.readWithOptions(allocator, car_bytes, .{ 84 - .max_size = car_bytes.len, 182 + // 6-10. verify CAR: signature, commit structure, MST 183 + const commit_result = verifyCommitCar(allocator, car_bytes, public_key, .{ 184 + .expected_did = did_str, 185 + .max_car_size = car_bytes.len, // no size limits — we fetched this ourselves 85 186 .max_blocks = car_bytes.len, // effectively unlimited 86 - }) catch return error.InvalidCommit; 87 - if (repo_car.roots.len == 0) return error.NoRootsInCar; 88 - 89 - // 7. find commit block 90 - const commit_data = car.findBlock(repo_car, repo_car.roots[0].raw) orelse return error.CommitBlockNotFound; 91 - 92 - // 8. decode commit 93 - const commit = cbor.decodeAll(allocator, commit_data) catch return error.InvalidCommit; 94 - const commit_did = commit.getString("did") orelse return error.InvalidCommit; 95 - const commit_version = commit.getInt("version") orelse return error.InvalidCommit; 96 - const commit_rev = commit.getString("rev") orelse return error.InvalidCommit; 97 - const sig_bytes = commit.getBytes("sig") orelse return error.SignatureNotFound; 98 - 99 - const data_cid_value = commit.get("data") orelse return error.InvalidCommit; 100 - const data_cid = switch (data_cid_value) { 101 - .cid => |c| c, 102 - else => return error.InvalidCommit, 187 + }) catch |err| switch (err) { 188 + error.SignatureVerificationFailed => return error.InvalidCommit, 189 + error.OutOfMemory => return error.OutOfMemory, 190 + inline else => |e| return e, 103 191 }; 104 192 105 - // sanity: commit DID matches resolved DID 106 - if (!std.mem.eql(u8, commit_did, did_str)) return error.InvalidCommit; 107 - 108 - // 9. verify signature: encode unsigned commit, then verify 109 - const unsigned_commit_bytes = try encodeUnsignedCommit(allocator, commit); 110 - switch (public_key.key_type) { 111 - .p256 => try jwt.verifyP256(unsigned_commit_bytes, sig_bytes, public_key.raw), 112 - .secp256k1 => try jwt.verifySecp256k1(unsigned_commit_bytes, sig_bytes, public_key.raw), 113 - } 114 - 115 - // 10. walk MST with in-walk structure verification 116 - // uses specialized MST decoder (not generic CBOR) and verifies each key's 117 - // tree layer is deterministically correct. combined with CAR block CID 118 - // verification, this is equivalent to a full rebuild. 119 - const record_count = try walkAndVerifyMst(allocator, repo_car, data_cid.raw); 120 - 121 193 // build result — dupe strings to caller's allocator so they survive arena cleanup 122 194 return VerifyResult{ 123 195 .did = try caller_alloc.dupe(u8, did_str), 124 196 .handle = try caller_alloc.dupe(u8, did_doc.handle() orelse identifier), 125 197 .signing_key_type = public_key.key_type, 126 - .commit_rev = try caller_alloc.dupe(u8, commit_rev), 127 - .commit_version = commit_version, 128 - .record_count = record_count, 198 + .commit_rev = try caller_alloc.dupe(u8, commit_result.commit_rev), 199 + .commit_version = commit_result.commit_version, 200 + .record_count = commit_result.record_count, 129 201 }; 130 202 } 131 203 ··· 143 215 } 144 216 145 217 /// encode a commit value without the "sig" field (for signature verification) 146 - fn encodeUnsignedCommit(allocator: Allocator, commit: cbor.Value) ![]u8 { 218 + pub fn encodeUnsignedCommit(allocator: Allocator, commit: cbor.Value) ![]u8 { 147 219 const entries = switch (commit) { 148 220 .map => |m| m, 149 221 else => return error.InvalidCommit, ··· 214 286 return walkVerifyNode(allocator, repo_car, node, expected_layer); 215 287 } 216 288 289 + // === sync 1.1: commit diff verification === 290 + 291 + /// decoded commit fields from a CAR file 292 + pub const Commit = struct { 293 + did: []const u8, 294 + rev: []const u8, 295 + version: i64, 296 + sig: []const u8, 297 + data_cid: []const u8, // raw CID bytes — MST root 298 + prev: ?[]const u8, // raw CID bytes — previous commit CID (null for first commit) 299 + }; 300 + 301 + /// lightweight: parse CAR, find root block, decode commit CBOR. 302 + /// no MST loading. reusable for both #commit and #sync frames. 303 + /// pre-computes unsigned commit bytes for signature verification (avoids re-decode). 304 + pub fn loadCommitFromCAR(allocator: Allocator, car_bytes: []const u8) !struct { 305 + commit: Commit, 306 + commit_cid: []const u8, 307 + unsigned_commit_bytes: []const u8, 308 + repo_car: car.Car, 309 + } { 310 + const repo_car = car.readWithOptions(allocator, car_bytes, .{}) catch return error.InvalidCommit; 311 + if (repo_car.roots.len == 0) return error.NoRootsInCar; 312 + 313 + const commit_cid_raw = repo_car.roots[0].raw; 314 + const commit_data = car.findBlock(repo_car, commit_cid_raw) orelse return error.CommitBlockNotFound; 315 + 316 + const commit_value = cbor.decodeAll(allocator, commit_data) catch return error.InvalidCommit; 317 + const commit_did = commit_value.getString("did") orelse return error.InvalidCommit; 318 + const commit_version = commit_value.getInt("version") orelse return error.InvalidCommit; 319 + const commit_rev = commit_value.getString("rev") orelse return error.InvalidCommit; 320 + const sig_bytes = commit_value.getBytes("sig") orelse return error.SignatureNotFound; 321 + 322 + // pre-compute unsigned commit bytes while we have the cbor.Value 323 + const unsigned_commit_bytes = encodeUnsignedCommit(allocator, commit_value) catch return error.InvalidCommit; 324 + 325 + // extract data CID (MST root) 326 + const data_cid_value = commit_value.get("data") orelse return error.InvalidCommit; 327 + const data_cid_raw = switch (data_cid_value) { 328 + .cid => |c| c.raw, 329 + else => return error.InvalidCommit, 330 + }; 331 + 332 + // extract prev commit CID (optional) 333 + const prev_cid_raw: ?[]const u8 = if (commit_value.get("prev")) |prev_value| switch (prev_value) { 334 + .cid => |c| c.raw, 335 + .null => null, 336 + else => return error.InvalidCommit, 337 + } else null; 338 + 339 + return .{ 340 + .commit = .{ 341 + .did = commit_did, 342 + .rev = commit_rev, 343 + .version = commit_version, 344 + .sig = sig_bytes, 345 + .data_cid = data_cid_raw, 346 + .prev = prev_cid_raw, 347 + }, 348 + .commit_cid = commit_cid_raw, 349 + .unsigned_commit_bytes = unsigned_commit_bytes, 350 + .repo_car = repo_car, 351 + }; 352 + } 353 + 354 + pub const VerifyCommitDiffOptions = struct { 355 + expected_did: ?[]const u8 = null, 356 + skip_inversion: bool = false, 357 + max_car_size: ?usize = null, 358 + max_blocks: ?usize = null, 359 + }; 360 + 361 + pub const VerifyCommitDiffError = error{ 362 + NoRootsInCar, 363 + CommitBlockNotFound, 364 + InvalidCommit, 365 + SignatureNotFound, 366 + SignatureVerificationFailed, 367 + MstRootMismatch, 368 + PrevDataMismatch, 369 + InversionMismatch, 370 + PartialTree, 371 + DuplicatePath, 372 + OutOfMemory, 373 + InvalidMstNode, 374 + }; 375 + 376 + /// result of commit diff verification 377 + pub const CommitDiffResult = struct { 378 + commit_did: []const u8, 379 + commit_rev: []const u8, 380 + commit_version: i64, 381 + commit_cid: []const u8, 382 + data_cid: []const u8, 383 + }; 384 + 385 + /// verify a commit diff: parse CAR, verify signature, load partial MST, 386 + /// invert operations, and verify the resulting root matches prev_data. 387 + pub fn verifyCommitDiff( 388 + allocator: Allocator, 389 + blocks: []const u8, 390 + msg_ops: []const mst.Operation, 391 + prev_data: ?[]const u8, 392 + public_key: multicodec.PublicKey, 393 + options: VerifyCommitDiffOptions, 394 + ) VerifyCommitDiffError!CommitDiffResult { 395 + // 1. parse CAR + extract commit 396 + const loaded = loadCommitFromCAR(allocator, blocks) catch return error.InvalidCommit; 397 + const commit = loaded.commit; 398 + const repo_car = loaded.repo_car; 399 + 400 + // 2. verify commit structure 401 + if (commit.version != 3) return error.InvalidCommit; 402 + if (Did.parse(commit.did) == null) return error.InvalidCommit; 403 + 404 + // 3. check expected_did 405 + if (options.expected_did) |expected| { 406 + if (!std.mem.eql(u8, commit.did, expected)) return error.InvalidCommit; 407 + } 408 + 409 + // 4. verify signature (unsigned bytes pre-computed by loadCommitFromCAR) 410 + switch (public_key.key_type) { 411 + .p256 => jwt.verifyP256(loaded.unsigned_commit_bytes, commit.sig, public_key.raw) catch return error.SignatureVerificationFailed, 412 + .secp256k1 => jwt.verifySecp256k1(loaded.unsigned_commit_bytes, commit.sig, public_key.raw) catch return error.SignatureVerificationFailed, 413 + } 414 + 415 + // 5. if no prev_data or skip_inversion, we're done (first commit or lenient mode) 416 + if (prev_data == null or options.skip_inversion) { 417 + return .{ 418 + .commit_did = commit.did, 419 + .commit_rev = commit.rev, 420 + .commit_version = commit.version, 421 + .commit_cid = loaded.commit_cid, 422 + .data_cid = commit.data_cid, 423 + }; 424 + } 425 + 426 + // 6. load partial MST from CAR blocks 427 + var tree = mst.Mst.loadFromBlocks(allocator, repo_car, commit.data_cid) catch |err| switch (err) { 428 + error.OutOfMemory => return error.OutOfMemory, 429 + error.InvalidMstNode => return error.InvalidMstNode, 430 + else => return error.MstRootMismatch, 431 + }; 432 + 433 + // 7. deep copy for inversion 434 + var inverted = tree.copy() catch return error.OutOfMemory; 435 + 436 + // 8. normalize ops 437 + const sorted_ops = mst.normalizeOps(allocator, msg_ops) catch |err| switch (err) { 438 + error.OutOfMemory => return error.OutOfMemory, 439 + error.DuplicatePath => return error.DuplicatePath, 440 + }; 441 + 442 + // 9. invert each operation 443 + for (sorted_ops) |op| { 444 + mst.invertOp(&inverted, op) catch |err| switch (err) { 445 + error.OutOfMemory => return error.OutOfMemory, 446 + error.InversionMismatch => return error.InversionMismatch, 447 + error.PartialTree => return error.PartialTree, 448 + }; 449 + } 450 + 451 + // 10. compute root CID of inverted tree 452 + const inverted_root = inverted.rootCid() catch |err| switch (err) { 453 + error.OutOfMemory => return error.OutOfMemory, 454 + error.PartialTree => return error.PartialTree, 455 + }; 456 + 457 + // 11. compare against prev_data 458 + if (!std.mem.eql(u8, inverted_root.raw, prev_data.?)) { 459 + return error.PrevDataMismatch; 460 + } 461 + 462 + return .{ 463 + .commit_did = commit.did, 464 + .commit_rev = commit.rev, 465 + .commit_version = commit.version, 466 + .commit_cid = loaded.commit_cid, 467 + .data_cid = commit.data_cid, 468 + }; 469 + } 470 + 217 471 // === tests === 218 472 219 473 test "verify repo - zzstoatzz.io" { ··· 229 483 try std.testing.expectEqualStrings("did:plc:xbtmt2zjwlrfegqvch7fboei", result.did); 230 484 try std.testing.expect(result.record_count > 0); 231 485 std.debug.print("verified zzstoatzz.io: {d} records, rev={s}\n", .{ result.record_count, result.commit_rev }); 486 + } 487 + 488 + test "verifyCommitDiff: build tree, serialize partial CAR, verify inversion" { 489 + // this test constructs a tree, applies ops, builds a partial CAR 490 + // with the commit + changed MST nodes, and verifies the diff 491 + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 492 + defer arena.deinit(); 493 + const a = arena.allocator(); 494 + 495 + const cid1 = try cbor.Cid.forDagCbor(a, "record1"); 496 + const cid2 = try cbor.Cid.forDagCbor(a, "record2"); 497 + const cid3 = try cbor.Cid.forDagCbor(a, "record3"); 498 + 499 + // build "before" tree 500 + var before_tree = mst.Mst.init(a); 501 + try before_tree.put("col/existing", cid1); 502 + try before_tree.put("col/to_update", cid1); 503 + try before_tree.put("col/to_delete", cid2); 504 + const prev_data_cid = try before_tree.rootCid(); 505 + 506 + // build "after" tree (apply ops forward) 507 + var after_tree = try before_tree.copy(); 508 + try after_tree.put("col/new_record", cid3); // create 509 + try after_tree.put("col/to_update", cid2); // update 510 + try after_tree.delete("col/to_delete"); // delete 511 + const new_data_cid = try after_tree.rootCid(); 512 + 513 + // verify inversion works at the MST level 514 + var inverted = try after_tree.copy(); 515 + const ops = [_]mst.Operation{ 516 + .{ .path = "col/new_record", .value = cid3.raw, .prev = null }, 517 + .{ .path = "col/to_update", .value = cid2.raw, .prev = cid1.raw }, 518 + .{ .path = "col/to_delete", .value = null, .prev = cid2.raw }, 519 + }; 520 + const sorted = try mst.normalizeOps(a, &ops); 521 + for (sorted) |op| { 522 + try mst.invertOp(&inverted, op); 523 + } 524 + const inverted_root = try inverted.rootCid(); 525 + try std.testing.expectEqualSlices(u8, prev_data_cid.raw, inverted_root.raw); 526 + 527 + // also verify the after tree has the expected new root 528 + try std.testing.expect(!std.mem.eql(u8, prev_data_cid.raw, new_data_cid.raw)); 529 + } 530 + 531 + test "loadCommitFromCAR extracts commit fields" { 532 + // build a minimal valid commit CAR 533 + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 534 + defer arena.deinit(); 535 + const a = arena.allocator(); 536 + 537 + const data_cid = try cbor.Cid.forDagCbor(a, "mst-root-placeholder"); 538 + 539 + // build commit CBOR 540 + const commit_value: cbor.Value = .{ .map = &.{ 541 + .{ .key = "data", .value = .{ .cid = data_cid } }, 542 + .{ .key = "did", .value = .{ .text = "did:plc:test123" } }, 543 + .{ .key = "rev", .value = .{ .text = "3k2abc000000" } }, 544 + .{ .key = "sig", .value = .{ .bytes = "fakesig" } }, 545 + .{ .key = "version", .value = .{ .unsigned = 3 } }, 546 + } }; 547 + const commit_bytes = try cbor.encodeAlloc(a, commit_value); 548 + const commit_cid = try cbor.Cid.forDagCbor(a, commit_bytes); 549 + 550 + // build CAR with commit block 551 + const car_data = car.Car{ 552 + .roots = &.{commit_cid}, 553 + .blocks = &.{ 554 + .{ .cid_raw = commit_cid.raw, .data = commit_bytes }, 555 + }, 556 + }; 557 + const car_bytes = try car.writeAlloc(a, car_data); 558 + 559 + // parse it back 560 + const loaded = try loadCommitFromCAR(a, car_bytes); 561 + try std.testing.expectEqualStrings("did:plc:test123", loaded.commit.did); 562 + try std.testing.expectEqualStrings("3k2abc000000", loaded.commit.rev); 563 + try std.testing.expectEqual(@as(i64, 3), loaded.commit.version); 564 + try std.testing.expectEqualSlices(u8, data_cid.raw, loaded.commit.data_cid); 565 + try std.testing.expect(loaded.commit.prev == null); 232 566 } 233 567 234 568 // stress test: pfrazee.com (~192k records on bsky.network)
+9
src/root.zig
··· 37 37 pub const repo_verifier = @import("internal/repo/repo_verifier.zig"); 38 38 pub const verifyRepo = repo_verifier.verifyRepo; 39 39 pub const VerifyResult = repo_verifier.VerifyResult; 40 + pub const verifyCommitCar = repo_verifier.verifyCommitCar; 41 + pub const CommitVerifyResult = repo_verifier.CommitVerifyResult; 42 + 43 + // sync 1.1: commit diff verification 44 + pub const MstOperation = mst.Operation; 45 + pub const Commit = repo_verifier.Commit; 46 + pub const loadCommitFromCAR = repo_verifier.loadCommitFromCAR; 47 + pub const verifyCommitDiff = repo_verifier.verifyCommitDiff; 48 + pub const CommitDiffResult = repo_verifier.CommitDiffResult; 40 49 41 50 // sync / streaming 42 51 const sync = @import("internal/streaming/sync.zig");