tangled
alpha
login
or
join now
sajidanwar.com
/
atcute
forked from
mary.my.id/atcute
0
fork
atom
a collection of lightweight TypeScript packages for AT Protocol, the protocol powering Bluesky
0
fork
atom
overview
issues
pulls
pipelines
refactor(car): remove sync reader indirection
mary.my.id
1 month ago
ab76ddaa
441b28ac
verified
This commit was signed with the committer's
known signature
.
mary.my.id
SSH Key Fingerprint:
SHA256:ZlTP/auFSGpGnaoDg4mCTG1g9OZvXp62jWR4c6H4O3c=
+280
-97
6 changed files
expand all
collapse all
unified
split
.changeset
old-phones-send.md
packages
utilities
car
lib
reader.bench.ts
reader.ts
package.json
tsconfig.json
pnpm-lock.yaml
+5
.changeset/old-phones-send.md
···
1
1
+
---
2
2
+
'@atcute/car': patch
3
3
+
---
4
4
+
5
5
+
remove sync reader indirection
+193
packages/utilities/car/lib/reader.bench.ts
···
1
1
+
import { readdirSync, readFileSync } from 'node:fs';
2
2
+
import { dirname, resolve } from 'node:path';
3
3
+
import { fileURLToPath } from 'node:url';
4
4
+
5
5
+
import * as CID from '@atcute/cid';
6
6
+
7
7
+
import { bench, do_not_optimize, run, summary } from 'mitata';
8
8
+
9
9
+
import { fromUint8Array } from './reader.ts';
10
10
+
import { serializeCarEntry, serializeCarHeader } from './writer.ts';
11
11
+
12
12
+
interface CarBlock {
13
13
+
cid: Uint8Array;
14
14
+
data: Uint8Array;
15
15
+
}
16
16
+
17
17
+
const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url));
18
18
+
19
19
+
const seeded = (seed: number) => {
20
20
+
let x = seed | 0;
21
21
+
22
22
+
return () => {
23
23
+
x ^= x << 13;
24
24
+
x ^= x >>> 17;
25
25
+
x ^= x << 5;
26
26
+
return x >>> 0;
27
27
+
};
28
28
+
};
29
29
+
30
30
+
const collectCarFiles = (dir: string): string[] => {
31
31
+
const root = resolve(SCRIPT_DIR, dir);
32
32
+
const out: string[] = [];
33
33
+
const stack = [root];
34
34
+
35
35
+
while (stack.length > 0) {
36
36
+
const current = stack.pop()!;
37
37
+
const entries = readdirSync(current, { withFileTypes: true });
38
38
+
39
39
+
for (let i = 0; i < entries.length; i++) {
40
40
+
const entry = entries[i];
41
41
+
const path = resolve(current, entry.name);
42
42
+
43
43
+
if (entry.isDirectory()) {
44
44
+
stack.push(path);
45
45
+
} else if (entry.isFile() && path.endsWith('.car')) {
46
46
+
out.push(path);
47
47
+
}
48
48
+
}
49
49
+
}
50
50
+
51
51
+
out.sort();
52
52
+
return out;
53
53
+
};
54
54
+
55
55
+
const joinChunks = (chunks: Uint8Array[]): Uint8Array => {
56
56
+
let total = 0;
57
57
+
for (let i = 0; i < chunks.length; i++) {
58
58
+
total += chunks[i].length;
59
59
+
}
60
60
+
61
61
+
const out = new Uint8Array(total);
62
62
+
let offset = 0;
63
63
+
for (let i = 0; i < chunks.length; i++) {
64
64
+
const chunk = chunks[i];
65
65
+
out.set(chunk, offset);
66
66
+
offset += chunk.length;
67
67
+
}
68
68
+
69
69
+
return out;
70
70
+
};
71
71
+
72
72
+
const makeSynthetic = (entries: number, payloadSize: number): Uint8Array => {
73
73
+
const rand = seeded(0x5eed1234 ^ entries ^ payloadSize);
74
74
+
const blocks: CarBlock[] = [];
75
75
+
76
76
+
for (let i = 0; i < entries; i++) {
77
77
+
const digest = new Uint8Array(32);
78
78
+
for (let j = 0; j < 32; j++) {
79
79
+
digest[j] = (rand() + i + j * 13) & 0xff;
80
80
+
}
81
81
+
82
82
+
const cid = CID.fromDigest(CID.CODEC_DCBOR, digest);
83
83
+
const payload = new Uint8Array(payloadSize + (i & 31));
84
84
+
for (let j = 0; j < payload.length; j++) {
85
85
+
payload[j] = (rand() + i + j) & 0xff;
86
86
+
}
87
87
+
88
88
+
blocks.push({ cid: cid.bytes, data: payload });
89
89
+
}
90
90
+
91
91
+
const root = CID.toCidLink(CID.fromDigest(CID.CODEC_DCBOR, new Uint8Array(32).fill(9)));
92
92
+
const chunks: Uint8Array[] = [serializeCarHeader([root])];
93
93
+
94
94
+
for (let i = 0; i < blocks.length; i++) {
95
95
+
chunks.push(serializeCarEntry(blocks[i].cid, blocks[i].data));
96
96
+
}
97
97
+
98
98
+
return joinChunks(chunks);
99
99
+
};
100
100
+
101
101
+
const checksumForOf = (bytes: Uint8Array): number => {
102
102
+
const car = fromUint8Array(bytes);
103
103
+
let sum = car.roots.length;
104
104
+
105
105
+
for (const entry of car) {
106
106
+
sum += entry.bytes.length;
107
107
+
sum += entry.cid.bytes[35] ?? 0;
108
108
+
}
109
109
+
110
110
+
return sum;
111
111
+
};
112
112
+
113
113
+
const checksumNextLoop = (bytes: Uint8Array): number => {
114
114
+
const car = fromUint8Array(bytes);
115
115
+
let sum = car.roots.length;
116
116
+
117
117
+
const iterator = car.iterate();
118
118
+
while (true) {
119
119
+
const next = iterator.next();
120
120
+
if (next.done) {
121
121
+
break;
122
122
+
}
123
123
+
124
124
+
const entry = next.value;
125
125
+
sum += entry.bytes.length;
126
126
+
sum += entry.cid.bytes[35] ?? 0;
127
127
+
}
128
128
+
129
129
+
return sum;
130
130
+
};
131
131
+
132
132
+
const fixtures = collectCarFiles('../../mst/mst-test-suite/cars').map(
133
133
+
(file) => new Uint8Array(readFileSync(file)),
134
134
+
);
135
135
+
const synthetic = makeSynthetic(20_000, 128);
136
136
+
137
137
+
summary(() => {
138
138
+
bench('fromUint8Array fixtures (for..of)', function* () {
139
139
+
yield {
140
140
+
[0]() {
141
141
+
return fixtures;
142
142
+
},
143
143
+
bench(dataset: Uint8Array[]) {
144
144
+
let sum = 0;
145
145
+
for (let i = 0; i < dataset.length; i++) {
146
146
+
sum += checksumForOf(dataset[i]);
147
147
+
}
148
148
+
return do_not_optimize(sum);
149
149
+
},
150
150
+
};
151
151
+
});
152
152
+
153
153
+
bench('fromUint8Array fixtures (next loop)', function* () {
154
154
+
yield {
155
155
+
[0]() {
156
156
+
return fixtures;
157
157
+
},
158
158
+
bench(dataset: Uint8Array[]) {
159
159
+
let sum = 0;
160
160
+
for (let i = 0; i < dataset.length; i++) {
161
161
+
sum += checksumNextLoop(dataset[i]);
162
162
+
}
163
163
+
return do_not_optimize(sum);
164
164
+
},
165
165
+
};
166
166
+
});
167
167
+
});
168
168
+
169
169
+
summary(() => {
170
170
+
bench('fromUint8Array synthetic 20k (for..of)', function* () {
171
171
+
yield {
172
172
+
[0]() {
173
173
+
return synthetic;
174
174
+
},
175
175
+
bench(bytes: Uint8Array) {
176
176
+
return do_not_optimize(checksumForOf(bytes));
177
177
+
},
178
178
+
};
179
179
+
});
180
180
+
181
181
+
bench('fromUint8Array synthetic 20k (next loop)', function* () {
182
182
+
yield {
183
183
+
[0]() {
184
184
+
return synthetic;
185
185
+
},
186
186
+
bench(bytes: Uint8Array) {
187
187
+
return do_not_optimize(checksumNextLoop(bytes));
188
188
+
},
189
189
+
};
190
190
+
});
191
191
+
});
192
192
+
193
193
+
await run();
+77
-96
packages/utilities/car/lib/reader.ts
···
5
5
6
6
import { isCarV1Header, type CarEntry, type CarHeader } from './types.ts';
7
7
8
8
-
interface SyncByteReader {
9
9
-
readonly pos: number;
10
10
-
readonly source: Uint8Array;
11
11
-
upto(size: number): Uint8Array;
12
12
-
exactly(size: number, seek: boolean): Uint8Array;
13
13
-
seek(size: number): void;
14
14
-
}
15
15
-
16
8
export interface SyncCarReader {
17
9
readonly header: CarHeader;
18
10
readonly roots: CidLink[];
19
11
20
12
/** @deprecated do for..of on the reader directly */
21
21
-
iterate(): Generator<CarEntry>;
13
13
+
iterate(): IterableIterator<CarEntry>;
22
14
[Symbol.iterator](): Iterator<CarEntry>;
23
15
}
24
16
25
17
export const fromUint8Array = (buffer: Uint8Array): SyncCarReader => {
26
26
-
const reader = createUint8Reader(buffer);
27
27
-
const header = readHeader(reader);
18
18
+
const { header, nextOffset: headerOffset } = readHeader(buffer, 0);
19
19
+
let pos = headerOffset;
28
20
29
21
return {
30
22
header,
31
23
roots: header.data.roots,
32
24
33
33
-
*iterate() {
34
34
-
while (reader.upto(8 + 36).length > 0) {
35
35
-
const entryStart = reader.pos;
36
36
-
const entrySize = readVarint(reader, 8);
25
25
+
iterate(): IterableIterator<CarEntry> {
26
26
+
return {
27
27
+
next(): IteratorResult<CarEntry> {
28
28
+
if (pos >= buffer.length) {
29
29
+
return {
30
30
+
done: true,
31
31
+
value: undefined,
32
32
+
};
33
33
+
}
37
34
38
38
-
const cidStart = reader.pos;
39
39
-
const cid = readCid(reader);
35
35
+
const entryStart = pos;
36
36
+
const { value: entryLength, nextOffset: lengthOffset } = varint.decode(buffer, pos, 8);
37
37
+
pos = lengthOffset;
40
38
41
41
-
const bytesStart = reader.pos;
42
42
-
const bytesSize = entrySize - (bytesStart - cidStart);
43
43
-
const bytes = reader.exactly(bytesSize, true);
39
39
+
const cidStart = pos;
40
40
+
const { cid, nextOffset: cidOffset } = readCid(buffer, pos);
41
41
+
pos = cidOffset;
44
42
45
45
-
const cidEnd = bytesStart;
46
46
-
const bytesEnd = reader.pos;
47
47
-
const entryEnd = bytesEnd;
43
43
+
const bytesStart = pos;
44
44
+
const bytesSize = entryLength - (bytesStart - cidStart);
45
45
+
if (bytesSize < 0 || bytesStart + bytesSize > buffer.length) {
46
46
+
throw new RangeError('unexpected end of data');
47
47
+
}
48
48
49
49
-
yield {
50
50
-
cid,
51
51
-
bytes,
49
49
+
const bytesEnd = bytesStart + bytesSize;
50
50
+
const bytes = buffer.subarray(bytesStart, bytesEnd);
51
51
+
pos = bytesEnd;
52
52
53
53
-
entryStart,
54
54
-
entryEnd,
55
55
-
cidStart,
56
56
-
cidEnd,
57
57
-
bytesStart,
58
58
-
bytesEnd,
59
59
-
};
60
60
-
}
61
61
-
},
53
53
+
const cidEnd = bytesStart;
54
54
+
const entryEnd = bytesEnd;
62
55
63
63
-
[Symbol.iterator](): Iterator<CarEntry> {
64
64
-
return this.iterate();
65
65
-
},
66
66
-
};
67
67
-
};
56
56
+
return {
57
57
+
done: false,
58
58
+
value: {
59
59
+
cid,
60
60
+
bytes,
68
61
69
69
-
const createUint8Reader = (buf: Uint8Array): SyncByteReader => {
70
70
-
let pos = 0;
62
62
+
entryStart,
63
63
+
entryEnd,
64
64
+
cidStart,
65
65
+
cidEnd,
66
66
+
bytesStart,
67
67
+
bytesEnd,
68
68
+
},
69
69
+
};
70
70
+
},
71
71
72
72
-
return {
73
73
-
get pos() {
74
74
-
return pos;
75
75
-
},
76
76
-
get source() {
77
77
-
return buf;
72
72
+
[Symbol.iterator]() {
73
73
+
return this;
74
74
+
},
75
75
+
};
78
76
},
79
77
80
80
-
seek(size) {
81
81
-
if (size > buf.length - pos) {
82
82
-
throw new RangeError('unexpected end of data');
83
83
-
}
84
84
-
85
85
-
pos += size;
86
86
-
},
87
87
-
upto(size) {
88
88
-
return buf.subarray(pos, pos + size);
89
89
-
},
90
90
-
exactly(size, seek) {
91
91
-
if (size > buf.length - pos) {
92
92
-
throw new RangeError('unexpected end of data');
93
93
-
}
94
94
-
95
95
-
const slice = buf.subarray(pos, pos + size);
96
96
-
if (seek) {
97
97
-
pos += size;
98
98
-
}
99
99
-
100
100
-
return slice;
78
78
+
[Symbol.iterator](): Iterator<CarEntry> {
79
79
+
return this.iterate();
101
80
},
102
81
};
103
82
};
104
83
105
105
-
const readVarint = (reader: SyncByteReader, size: number): number => {
106
106
-
if (reader.pos >= reader.source.length) {
107
107
-
throw new RangeError(`unexpected end of data`);
108
108
-
}
109
109
-
110
110
-
const { value, nextOffset } = varint.decode(reader.source, reader.pos, size);
111
111
-
reader.seek(nextOffset - reader.pos);
112
112
-
113
113
-
return value;
114
114
-
};
115
115
-
116
116
-
const readHeader = (reader: SyncByteReader): CarHeader => {
117
117
-
const headerStart = reader.pos;
118
118
-
const length = readVarint(reader, 8);
84
84
+
const readHeader = (source: Uint8Array, offset: number): { header: CarHeader; nextOffset: number } => {
85
85
+
const headerStart = offset;
86
86
+
const { value: length, nextOffset: lengthOffset } = varint.decode(source, offset, 8);
119
87
if (length === 0) {
120
88
throw new RangeError(`invalid car header; length=0`);
121
89
}
122
90
123
123
-
const dataStart = reader.pos;
124
124
-
const rawHeader = reader.exactly(length, true);
91
91
+
const dataStart = lengthOffset;
92
92
+
const dataEnd = dataStart + length;
93
93
+
if (dataEnd > source.length) {
94
94
+
throw new RangeError('unexpected end of data');
95
95
+
}
125
96
126
126
-
const data = CBOR.decode(rawHeader);
97
97
+
const data = CBOR.decode(source.subarray(dataStart, dataEnd));
127
98
if (!isCarV1Header(data)) {
128
99
throw new TypeError(`expected a car v1 archive`);
129
100
}
130
101
131
131
-
const dataEnd = reader.pos;
132
102
const headerEnd = dataEnd;
133
103
134
134
-
return { data, headerStart, headerEnd, dataStart, dataEnd };
104
104
+
return {
105
105
+
header: { data, headerStart, headerEnd, dataStart, dataEnd },
106
106
+
nextOffset: dataEnd,
107
107
+
};
135
108
};
136
109
137
137
-
const readCid = (reader: SyncByteReader): CID.Cid => {
138
138
-
const bytes = reader.exactly(36, true);
110
110
+
const readCid = (source: Uint8Array, offset: number): { cid: CID.Cid; nextOffset: number } => {
111
111
+
const cidEnd = offset + 36;
112
112
+
if (cidEnd > source.length) {
113
113
+
throw new RangeError('unexpected end of data');
114
114
+
}
115
115
+
116
116
+
const bytes = source.subarray(offset, cidEnd);
139
117
140
118
const version = bytes[0];
141
119
const codec = bytes[1];
···
159
137
}
160
138
161
139
return {
162
162
-
version: version,
163
163
-
codec: codec,
164
164
-
digest: {
165
165
-
codec: digestType,
166
166
-
contents: bytes.subarray(4, 36),
140
140
+
cid: {
141
141
+
version: version,
142
142
+
codec: codec,
143
143
+
digest: {
144
144
+
codec: digestType,
145
145
+
contents: bytes.subarray(4, 36),
146
146
+
},
147
147
+
bytes: bytes,
167
148
},
168
168
-
bytes: bytes,
149
149
+
nextOffset: cidEnd,
169
150
};
170
151
};
+1
packages/utilities/car/package.json
···
42
42
},
43
43
"devDependencies": {
44
44
"@atcute/multibase": "workspace:^",
45
45
+
"@types/node": "^25.2.1",
45
46
"@vitest/coverage-v8": "^4.0.18",
46
47
"vitest": "^4.0.18"
47
48
}
+1
-1
packages/utilities/car/tsconfig.json
···
1
1
{
2
2
"compilerOptions": {
3
3
-
"types": [],
3
3
+
"types": ["node"],
4
4
"rootDir": "lib/",
5
5
"outDir": "dist/",
6
6
"esModuleInterop": true,
+3
pnpm-lock.yaml
···
1042
1042
'@atcute/multibase':
1043
1043
specifier: workspace:^
1044
1044
version: link:../multibase
1045
1045
+
'@types/node':
1046
1046
+
specifier: ^25.2.1
1047
1047
+
version: 25.2.1
1045
1048
'@vitest/coverage-v8':
1046
1049
specifier: ^4.0.18
1047
1050
version: 4.0.18(@vitest/browser@4.0.18(vite@7.3.1(@types/node@25.2.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.0))(vitest@4.0.18))(vitest@4.0.18)