atproto utils for zig zat.dev
atproto sdk zig

feat: verify CID hashes in CAR parser

car.read() now SHA-256 hashes each block and compares against the CID
digest. this is the correct behavior for untrusted data from the network.
car.readWithOptions() accepts a verify_block_hashes flag to skip
verification for trusted local data.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+74 -3
+74 -3
src/internal/repo/car.zig
··· 31 31 InvalidCid, 32 32 UnexpectedEof, 33 33 OutOfMemory, 34 + BadBlockHash, 35 + }; 36 + 37 + pub const ReadOptions = struct { 38 + /// verify that each block's content hashes to its CID. 39 + /// this is the correct behavior for untrusted data (e.g. from the network). 40 + /// set to false only for trusted local data where you want raw decode speed. 41 + verify_block_hashes: bool = true, 34 42 }; 35 43 36 44 /// parse a CAR v1 file from raw bytes 37 45 pub fn read(allocator: Allocator, data: []const u8) CarError!Car { 46 + return readWithOptions(allocator, data, .{}); 47 + } 48 + 49 + /// parse a CAR v1 file from raw bytes with options 50 + pub fn readWithOptions(allocator: Allocator, data: []const u8, options: ReadOptions) CarError!Car { 38 51 var pos: usize = 0; 39 52 40 53 // read header length (unsigned varint) ··· 76 89 // parse CID to determine its length, then the rest is block content 77 90 const cid_len = cidLength(block_data) orelse return error.InvalidCid; 78 91 if (cid_len > block_data.len) return error.InvalidCid; 92 + 93 + const cid_bytes = block_data[0..cid_len]; 94 + const content = block_data[cid_len..]; 95 + 96 + if (options.verify_block_hashes) { 97 + try verifyBlockHash(cid_bytes, content); 98 + } 79 99 80 100 try blocks.append(allocator, .{ 81 - .cid_raw = block_data[0..cid_len], 82 - .data = block_data[cid_len..], 101 + .cid_raw = cid_bytes, 102 + .data = content, 83 103 }); 84 104 85 105 pos = block_end; ··· 91 111 }; 92 112 } 93 113 114 + /// verify that block content hashes to the digest in its CID 115 + fn verifyBlockHash(cid_bytes: []const u8, content: []const u8) CarError!void { 116 + const cid = cbor.Cid{ .raw = cid_bytes }; 117 + const hash_fn = cid.hashFn() orelse return error.InvalidCid; 118 + 119 + // identity hash (0x00) — digest IS the content, no hashing needed 120 + if (hash_fn == cbor.HashFn.identity) return; 121 + 122 + // only SHA-256 supported 123 + if (hash_fn != cbor.HashFn.sha2_256) return error.BadBlockHash; 124 + 125 + const expected = cid.digest() orelse return error.InvalidCid; 126 + if (expected.len != 32) return error.BadBlockHash; 127 + 128 + const Sha256 = std.crypto.hash.sha2.Sha256; 129 + var computed: [Sha256.digest_length]u8 = undefined; 130 + Sha256.hash(content, &computed, .{}); 131 + 132 + if (!std.mem.eql(u8, &computed, expected)) return error.BadBlockHash; 133 + } 134 + 94 135 /// determine the byte length of a CID at the start of data 95 136 fn cidLength(data: []const u8) ?usize { 96 137 if (data.len < 2) return null; ··· 240 281 @memcpy(car_buf[car_pos..][0..block_content.len], &block_content); 241 282 car_pos += block_content.len; 242 283 243 - const car_file = try read(alloc, car_buf[0..car_pos]); 284 + // this test uses a fake digest, so skip verification 285 + const car_file = try readWithOptions(alloc, car_buf[0..car_pos], .{ .verify_block_hashes = false }); 244 286 245 287 try std.testing.expectEqual(@as(usize, 1), car_file.blocks.len); 246 288 try std.testing.expectEqual(@as(usize, block_content.len), car_file.blocks[0].data.len); ··· 343 385 const decoded = try cbor.decodeAll(alloc, found); 344 386 try std.testing.expectEqualStrings("hello from CAR writer", decoded.getString("text").?); 345 387 try std.testing.expectEqualStrings("app.bsky.feed.post", decoded.getString("$type").?); 388 + } 389 + 390 + test "read rejects block with bad hash" { 391 + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 392 + defer arena.deinit(); 393 + const alloc = arena.allocator(); 394 + 395 + // create a valid CAR, then corrupt the block content 396 + const record_bytes = try cbor.encodeAlloc(alloc, .{ .map = &.{ 397 + .{ .key = "text", .value = .{ .text = "original" } }, 398 + } }); 399 + const cid = try cbor.Cid.forDagCbor(alloc, record_bytes); 400 + 401 + // write a CAR with the correct CID but wrong data 402 + const tampered_data = "tampered"; 403 + const tampered_car = Car{ 404 + .roots = &.{cid}, 405 + .blocks = &.{ 406 + .{ .cid_raw = cid.raw, .data = tampered_data }, 407 + }, 408 + }; 409 + const car_bytes = try writeAlloc(alloc, tampered_car); 410 + 411 + // should fail with verification on (default) 412 + try std.testing.expectError(error.BadBlockHash, read(alloc, car_bytes)); 413 + 414 + // should succeed with verification off 415 + const parsed = try readWithOptions(alloc, car_bytes, .{ .verify_block_hashes = false }); 416 + try std.testing.expectEqual(@as(usize, 1), parsed.blocks.len); 346 417 } 347 418 348 419 test "findBlock returns null for missing CID" {