a collection of lightweight TypeScript packages for AT Protocol, the protocol powering Bluesky

refactor(car): remove sync reader indirection

mary.my.id ab76ddaa 441b28ac

verified
+280 -97
+5
.changeset/old-phones-send.md
··· 1 + --- 2 + '@atcute/car': patch 3 + --- 4 + 5 + remove sync reader indirection
+193
packages/utilities/car/lib/reader.bench.ts
··· 1 + import { readdirSync, readFileSync } from 'node:fs'; 2 + import { dirname, resolve } from 'node:path'; 3 + import { fileURLToPath } from 'node:url'; 4 + 5 + import * as CID from '@atcute/cid'; 6 + 7 + import { bench, do_not_optimize, run, summary } from 'mitata'; 8 + 9 + import { fromUint8Array } from './reader.ts'; 10 + import { serializeCarEntry, serializeCarHeader } from './writer.ts'; 11 + 12 + interface CarBlock { 13 + cid: Uint8Array; 14 + data: Uint8Array; 15 + } 16 + 17 + const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url)); 18 + 19 + const seeded = (seed: number) => { 20 + let x = seed | 0; 21 + 22 + return () => { 23 + x ^= x << 13; 24 + x ^= x >>> 17; 25 + x ^= x << 5; 26 + return x >>> 0; 27 + }; 28 + }; 29 + 30 + const collectCarFiles = (dir: string): string[] => { 31 + const root = resolve(SCRIPT_DIR, dir); 32 + const out: string[] = []; 33 + const stack = [root]; 34 + 35 + while (stack.length > 0) { 36 + const current = stack.pop()!; 37 + const entries = readdirSync(current, { withFileTypes: true }); 38 + 39 + for (let i = 0; i < entries.length; i++) { 40 + const entry = entries[i]; 41 + const path = resolve(current, entry.name); 42 + 43 + if (entry.isDirectory()) { 44 + stack.push(path); 45 + } else if (entry.isFile() && path.endsWith('.car')) { 46 + out.push(path); 47 + } 48 + } 49 + } 50 + 51 + out.sort(); 52 + return out; 53 + }; 54 + 55 + const joinChunks = (chunks: Uint8Array[]): Uint8Array => { 56 + let total = 0; 57 + for (let i = 0; i < chunks.length; i++) { 58 + total += chunks[i].length; 59 + } 60 + 61 + const out = new Uint8Array(total); 62 + let offset = 0; 63 + for (let i = 0; i < chunks.length; i++) { 64 + const chunk = chunks[i]; 65 + out.set(chunk, offset); 66 + offset += chunk.length; 67 + } 68 + 69 + return out; 70 + }; 71 + 72 + const makeSynthetic = (entries: number, payloadSize: number): Uint8Array => { 73 + const rand = seeded(0x5eed1234 ^ entries ^ payloadSize); 74 + const blocks: CarBlock[] = []; 75 + 76 + for (let i = 0; i < entries; i++) { 77 + const digest = new Uint8Array(32); 78 + for (let j = 0; j < 32; j++) { 79 + digest[j] = (rand() + i + j * 13) & 0xff; 80 + } 81 + 82 + const cid = CID.fromDigest(CID.CODEC_DCBOR, digest); 83 + const payload = new Uint8Array(payloadSize + (i & 31)); 84 + for (let j = 0; j < payload.length; j++) { 85 + payload[j] = (rand() + i + j) & 0xff; 86 + } 87 + 88 + blocks.push({ cid: cid.bytes, data: payload }); 89 + } 90 + 91 + const root = CID.toCidLink(CID.fromDigest(CID.CODEC_DCBOR, new Uint8Array(32).fill(9))); 92 + const chunks: Uint8Array[] = [serializeCarHeader([root])]; 93 + 94 + for (let i = 0; i < blocks.length; i++) { 95 + chunks.push(serializeCarEntry(blocks[i].cid, blocks[i].data)); 96 + } 97 + 98 + return joinChunks(chunks); 99 + }; 100 + 101 + const checksumForOf = (bytes: Uint8Array): number => { 102 + const car = fromUint8Array(bytes); 103 + let sum = car.roots.length; 104 + 105 + for (const entry of car) { 106 + sum += entry.bytes.length; 107 + sum += entry.cid.bytes[35] ?? 0; 108 + } 109 + 110 + return sum; 111 + }; 112 + 113 + const checksumNextLoop = (bytes: Uint8Array): number => { 114 + const car = fromUint8Array(bytes); 115 + let sum = car.roots.length; 116 + 117 + const iterator = car.iterate(); 118 + while (true) { 119 + const next = iterator.next(); 120 + if (next.done) { 121 + break; 122 + } 123 + 124 + const entry = next.value; 125 + sum += entry.bytes.length; 126 + sum += entry.cid.bytes[35] ?? 0; 127 + } 128 + 129 + return sum; 130 + }; 131 + 132 + const fixtures = collectCarFiles('../../mst/mst-test-suite/cars').map( 133 + (file) => new Uint8Array(readFileSync(file)), 134 + ); 135 + const synthetic = makeSynthetic(20_000, 128); 136 + 137 + summary(() => { 138 + bench('fromUint8Array fixtures (for..of)', function* () { 139 + yield { 140 + [0]() { 141 + return fixtures; 142 + }, 143 + bench(dataset: Uint8Array[]) { 144 + let sum = 0; 145 + for (let i = 0; i < dataset.length; i++) { 146 + sum += checksumForOf(dataset[i]); 147 + } 148 + return do_not_optimize(sum); 149 + }, 150 + }; 151 + }); 152 + 153 + bench('fromUint8Array fixtures (next loop)', function* () { 154 + yield { 155 + [0]() { 156 + return fixtures; 157 + }, 158 + bench(dataset: Uint8Array[]) { 159 + let sum = 0; 160 + for (let i = 0; i < dataset.length; i++) { 161 + sum += checksumNextLoop(dataset[i]); 162 + } 163 + return do_not_optimize(sum); 164 + }, 165 + }; 166 + }); 167 + }); 168 + 169 + summary(() => { 170 + bench('fromUint8Array synthetic 20k (for..of)', function* () { 171 + yield { 172 + [0]() { 173 + return synthetic; 174 + }, 175 + bench(bytes: Uint8Array) { 176 + return do_not_optimize(checksumForOf(bytes)); 177 + }, 178 + }; 179 + }); 180 + 181 + bench('fromUint8Array synthetic 20k (next loop)', function* () { 182 + yield { 183 + [0]() { 184 + return synthetic; 185 + }, 186 + bench(bytes: Uint8Array) { 187 + return do_not_optimize(checksumNextLoop(bytes)); 188 + }, 189 + }; 190 + }); 191 + }); 192 + 193 + await run();
+77 -96
packages/utilities/car/lib/reader.ts
··· 5 5 6 6 import { isCarV1Header, type CarEntry, type CarHeader } from './types.ts'; 7 7 8 - interface SyncByteReader { 9 - readonly pos: number; 10 - readonly source: Uint8Array; 11 - upto(size: number): Uint8Array; 12 - exactly(size: number, seek: boolean): Uint8Array; 13 - seek(size: number): void; 14 - } 15 - 16 8 export interface SyncCarReader { 17 9 readonly header: CarHeader; 18 10 readonly roots: CidLink[]; 19 11 20 12 /** @deprecated do for..of on the reader directly */ 21 - iterate(): Generator<CarEntry>; 13 + iterate(): IterableIterator<CarEntry>; 22 14 [Symbol.iterator](): Iterator<CarEntry>; 23 15 } 24 16 25 17 export const fromUint8Array = (buffer: Uint8Array): SyncCarReader => { 26 - const reader = createUint8Reader(buffer); 27 - const header = readHeader(reader); 18 + const { header, nextOffset: headerOffset } = readHeader(buffer, 0); 19 + let pos = headerOffset; 28 20 29 21 return { 30 22 header, 31 23 roots: header.data.roots, 32 24 33 - *iterate() { 34 - while (reader.upto(8 + 36).length > 0) { 35 - const entryStart = reader.pos; 36 - const entrySize = readVarint(reader, 8); 25 + iterate(): IterableIterator<CarEntry> { 26 + return { 27 + next(): IteratorResult<CarEntry> { 28 + if (pos >= buffer.length) { 29 + return { 30 + done: true, 31 + value: undefined, 32 + }; 33 + } 37 34 38 - const cidStart = reader.pos; 39 - const cid = readCid(reader); 35 + const entryStart = pos; 36 + const { value: entryLength, nextOffset: lengthOffset } = varint.decode(buffer, pos, 8); 37 + pos = lengthOffset; 40 38 41 - const bytesStart = reader.pos; 42 - const bytesSize = entrySize - (bytesStart - cidStart); 43 - const bytes = reader.exactly(bytesSize, true); 39 + const cidStart = pos; 40 + const { cid, nextOffset: cidOffset } = readCid(buffer, pos); 41 + pos = cidOffset; 44 42 45 - const cidEnd = bytesStart; 46 - const bytesEnd = reader.pos; 47 - const entryEnd = bytesEnd; 43 + const bytesStart = pos; 44 + const bytesSize = entryLength - (bytesStart - cidStart); 45 + if (bytesSize < 0 || bytesStart + bytesSize > buffer.length) { 46 + throw new RangeError('unexpected end of data'); 47 + } 48 48 49 - yield { 50 - cid, 51 - bytes, 49 + const bytesEnd = bytesStart + bytesSize; 50 + const bytes = buffer.subarray(bytesStart, bytesEnd); 51 + pos = bytesEnd; 52 52 53 - entryStart, 54 - entryEnd, 55 - cidStart, 56 - cidEnd, 57 - bytesStart, 58 - bytesEnd, 59 - }; 60 - } 61 - }, 53 + const cidEnd = bytesStart; 54 + const entryEnd = bytesEnd; 62 55 63 - [Symbol.iterator](): Iterator<CarEntry> { 64 - return this.iterate(); 65 - }, 66 - }; 67 - }; 56 + return { 57 + done: false, 58 + value: { 59 + cid, 60 + bytes, 68 61 69 - const createUint8Reader = (buf: Uint8Array): SyncByteReader => { 70 - let pos = 0; 62 + entryStart, 63 + entryEnd, 64 + cidStart, 65 + cidEnd, 66 + bytesStart, 67 + bytesEnd, 68 + }, 69 + }; 70 + }, 71 71 72 - return { 73 - get pos() { 74 - return pos; 75 - }, 76 - get source() { 77 - return buf; 72 + [Symbol.iterator]() { 73 + return this; 74 + }, 75 + }; 78 76 }, 79 77 80 - seek(size) { 81 - if (size > buf.length - pos) { 82 - throw new RangeError('unexpected end of data'); 83 - } 84 - 85 - pos += size; 86 - }, 87 - upto(size) { 88 - return buf.subarray(pos, pos + size); 89 - }, 90 - exactly(size, seek) { 91 - if (size > buf.length - pos) { 92 - throw new RangeError('unexpected end of data'); 93 - } 94 - 95 - const slice = buf.subarray(pos, pos + size); 96 - if (seek) { 97 - pos += size; 98 - } 99 - 100 - return slice; 78 + [Symbol.iterator](): Iterator<CarEntry> { 79 + return this.iterate(); 101 80 }, 102 81 }; 103 82 }; 104 83 105 - const readVarint = (reader: SyncByteReader, size: number): number => { 106 - if (reader.pos >= reader.source.length) { 107 - throw new RangeError(`unexpected end of data`); 108 - } 109 - 110 - const { value, nextOffset } = varint.decode(reader.source, reader.pos, size); 111 - reader.seek(nextOffset - reader.pos); 112 - 113 - return value; 114 - }; 115 - 116 - const readHeader = (reader: SyncByteReader): CarHeader => { 117 - const headerStart = reader.pos; 118 - const length = readVarint(reader, 8); 84 + const readHeader = (source: Uint8Array, offset: number): { header: CarHeader; nextOffset: number } => { 85 + const headerStart = offset; 86 + const { value: length, nextOffset: lengthOffset } = varint.decode(source, offset, 8); 119 87 if (length === 0) { 120 88 throw new RangeError(`invalid car header; length=0`); 121 89 } 122 90 123 - const dataStart = reader.pos; 124 - const rawHeader = reader.exactly(length, true); 91 + const dataStart = lengthOffset; 92 + const dataEnd = dataStart + length; 93 + if (dataEnd > source.length) { 94 + throw new RangeError('unexpected end of data'); 95 + } 125 96 126 - const data = CBOR.decode(rawHeader); 97 + const data = CBOR.decode(source.subarray(dataStart, dataEnd)); 127 98 if (!isCarV1Header(data)) { 128 99 throw new TypeError(`expected a car v1 archive`); 129 100 } 130 101 131 - const dataEnd = reader.pos; 132 102 const headerEnd = dataEnd; 133 103 134 - return { data, headerStart, headerEnd, dataStart, dataEnd }; 104 + return { 105 + header: { data, headerStart, headerEnd, dataStart, dataEnd }, 106 + nextOffset: dataEnd, 107 + }; 135 108 }; 136 109 137 - const readCid = (reader: SyncByteReader): CID.Cid => { 138 - const bytes = reader.exactly(36, true); 110 + const readCid = (source: Uint8Array, offset: number): { cid: CID.Cid; nextOffset: number } => { 111 + const cidEnd = offset + 36; 112 + if (cidEnd > source.length) { 113 + throw new RangeError('unexpected end of data'); 114 + } 115 + 116 + const bytes = source.subarray(offset, cidEnd); 139 117 140 118 const version = bytes[0]; 141 119 const codec = bytes[1]; ··· 159 137 } 160 138 161 139 return { 162 - version: version, 163 - codec: codec, 164 - digest: { 165 - codec: digestType, 166 - contents: bytes.subarray(4, 36), 140 + cid: { 141 + version: version, 142 + codec: codec, 143 + digest: { 144 + codec: digestType, 145 + contents: bytes.subarray(4, 36), 146 + }, 147 + bytes: bytes, 167 148 }, 168 - bytes: bytes, 149 + nextOffset: cidEnd, 169 150 }; 170 151 };
+1
packages/utilities/car/package.json
··· 42 42 }, 43 43 "devDependencies": { 44 44 "@atcute/multibase": "workspace:^", 45 + "@types/node": "^25.2.1", 45 46 "@vitest/coverage-v8": "^4.0.18", 46 47 "vitest": "^4.0.18" 47 48 }
+1 -1
packages/utilities/car/tsconfig.json
··· 1 1 { 2 2 "compilerOptions": { 3 - "types": [], 3 + "types": ["node"], 4 4 "rootDir": "lib/", 5 5 "outDir": "dist/", 6 6 "esModuleInterop": true,
+3
pnpm-lock.yaml
··· 1042 1042 '@atcute/multibase': 1043 1043 specifier: workspace:^ 1044 1044 version: link:../multibase 1045 + '@types/node': 1046 + specifier: ^25.2.1 1047 + version: 25.2.1 1045 1048 '@vitest/coverage-v8': 1046 1049 specifier: ^4.0.18 1047 1050 version: 4.0.18(@vitest/browser@4.0.18(vite@7.3.1(@types/node@25.2.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.0))(vitest@4.0.18))(vitest@4.0.18)