a collection of lightweight TypeScript packages for AT Protocol, the protocol powering Bluesky

refactor(cbor): separate POJO/non-POJO handling for cid-link and bytes

mary.my.id 9b2db27d ece2cea2

verified
+379 -30
+44 -22
packages/utilities/cbor/lib/encode.ts
··· 290 290 return; 291 291 } 292 292 293 - // case: cid-link 294 - if ('$link' in val) { 295 - if (val instanceof CidLinkWrapper || typeof val.$link === 'string') { 296 - writeCid(state, val); 297 - return; 298 - } 299 - 300 - throw new TypeError(`unexpected cid-link value`); 301 - } 302 - 303 - // case: bytes 304 - if ('$bytes' in val) { 305 - if (val instanceof BytesWrapper || typeof val.$bytes === 'string') { 306 - writeBytes(state, val); 307 - return; 308 - } 309 - 310 - throw new TypeError(`unexpected bytes value`); 311 - } 312 - 313 293 // case: POJO 314 294 if (val.constructor === Object) { 315 295 const keys = getOrderedObjectKeys(val); 316 296 const len = keys.length; 317 297 298 + if (len === 1) { 299 + const key = keys[0]!; 300 + 301 + if (key === '$link') { 302 + if (typeof val.$link === 'string') { 303 + writeCid(state, val); 304 + return; 305 + } 306 + 307 + throw new TypeError(`unexpected cid-link value`); 308 + } 309 + 310 + if (key === '$bytes') { 311 + if (typeof val.$bytes === 'string') { 312 + writeBytes(state, val); 313 + return; 314 + } 315 + 316 + throw new TypeError(`unexpected bytes value`); 317 + } 318 + } 319 + 318 320 resizeIfNeeded(state, MAX_TYPE_ARG_LEN); 319 321 writeTypeAndArgument(state, 5, len); 320 322 ··· 327 329 328 330 return; 329 331 } 332 + 333 + // case: cid-link wrappers / odd objects 334 + if ('$link' in val) { 335 + if (val instanceof CidLinkWrapper || typeof val.$link === 'string') { 336 + writeCid(state, val); 337 + return; 338 + } 339 + 340 + throw new TypeError(`unexpected cid-link value`); 341 + } 342 + 343 + // case: bytes wrappers / odd objects 344 + if ('$bytes' in val) { 345 + if (val instanceof BytesWrapper || typeof val.$bytes === 'string') { 346 + writeBytes(state, val); 347 + return; 348 + } 349 + 350 + throw new TypeError(`unexpected bytes value`); 351 + } 330 352 } 331 353 } 332 354 ··· 366 388 continue; 367 389 } 368 390 391 + const lenA = valA.length; 369 392 let j = len - 1; 370 393 for (; j >= 0; j--) { 371 394 const valB = keys[j]; 372 395 373 396 // Note: Don't need to check for equality, keys are always distinct. 374 - const cmp = valA.length - valB.length || +(valA > valB); 375 - if (cmp > 0) { 397 + if (lenA > valB.length || (lenA === valB.length && valA > valB)) { 376 398 break; 377 399 } 378 400
+333
packages/utilities/cbor/lib/firehose.bench.ts
··· 1 + import * as ipld from '@ipld/dag-cbor'; 2 + import * as cborx from 'cbor-x/index-no-eval'; 3 + import { bench, do_not_optimize, run, summary } from 'mitata'; 4 + 5 + import ozoneDefs from '../../../definitions/ozone/lexicons/tools/ozone/moderation/defs.json' with { type: 'json' }; 6 + 7 + import * as atcute from './index.ts'; 8 + 9 + interface Lcg { 10 + s: number; 11 + } 12 + 13 + const VALID_CID = 'bafyreihffx5a2e7k5uwrmmgofbvzujc5cmw5h4espouwuxt3liqoflx3ee'; 14 + 15 + const rand = (lcg: Lcg): number => { 16 + lcg.s = (1664525 * lcg.s + 1013904223) >>> 0; 17 + return lcg.s; 18 + }; 19 + 20 + const randInt = (lcg: Lcg, max: number): number => { 21 + return rand(lcg) % max; 22 + }; 23 + 24 + const pick = <T>(lcg: Lcg, values: readonly T[]): T => { 25 + return values[randInt(lcg, values.length)]!; 26 + }; 27 + 28 + const WORDS = [ 29 + 'atproto', 30 + 'bluesky', 31 + 'firehose', 32 + 'post', 33 + 'record', 34 + 'stream', 35 + 'facet', 36 + 'link', 37 + 'ops', 38 + 'seq', 39 + 'commit', 40 + 'repo', 41 + 'mst', 42 + 'car', 43 + 'block', 44 + 'json', 45 + 'cbor', 46 + 'event', 47 + 'codec', 48 + 'lexicon', 49 + ] as const; 50 + 51 + const TLDS = ['bsky.social', 'example.com', 'test.dev'] as const; 52 + 53 + const toBase32 = (num: number, len: number): string => { 54 + let out = ''; 55 + let n = num >>> 0; 56 + const alpha = 'abcdefghijklmnopqrstuvwxyz234567'; 57 + 58 + for (let i = 0; i < len; i++) { 59 + out += alpha[n & 31]; 60 + n = (n * 1103515245 + 12345) >>> 0; 61 + } 62 + 63 + return out; 64 + }; 65 + 66 + const makeDid = (n: number): string => { 67 + return `did:plc:${toBase32(n, 24)}`; 68 + }; 69 + 70 + const makeAtUri = (did: string, collection: string, seed: number): string => { 71 + return `at://${did}/${collection}/${toBase32(seed, 13)}`; 72 + }; 73 + 74 + const makeText = (lcg: Lcg, minWords: number, maxWords: number): string => { 75 + const count = minWords + randInt(lcg, maxWords - minWords + 1); 76 + let text = ''; 77 + 78 + for (let i = 0; i < count; i++) { 79 + if (i !== 0) { 80 + text += ' '; 81 + } 82 + text += pick(lcg, WORDS); 83 + } 84 + 85 + return text; 86 + }; 87 + 88 + const makeFirehoseFixtures = (count: number): unknown[] => { 89 + const lcg: Lcg = { s: 0xdecafbad }; 90 + const fixtures: unknown[] = []; 91 + 92 + for (let i = 0; i < count; i++) { 93 + const did = makeDid(rand(lcg)); 94 + const actor = `${toBase32(rand(lcg), 10)}.${pick(lcg, TLDS)}`; 95 + const createdAt = `2025-0${(i % 9) + 1}-1${i % 9}T1${i % 10}:2${i % 6}:3${i % 10}.000Z`; 96 + 97 + switch (i % 5) { 98 + case 0: { 99 + fixtures.push({ 100 + $type: 'app.bsky.feed.post', 101 + text: makeText(lcg, 20, 60), 102 + createdAt, 103 + langs: [pick(lcg, ['en', 'ja', 'pt', 'de'] as const)], 104 + facets: 105 + randInt(lcg, 4) === 0 106 + ? [ 107 + { 108 + features: [ 109 + { 110 + $type: 'app.bsky.richtext.facet#link', 111 + uri: `https://${actor}/${toBase32(rand(lcg), 8)}`, 112 + }, 113 + ], 114 + index: { 115 + byteStart: 0, 116 + byteEnd: 20, 117 + }, 118 + }, 119 + ] 120 + : undefined, 121 + }); 122 + break; 123 + } 124 + case 1: { 125 + fixtures.push({ 126 + $type: 'app.bsky.feed.like', 127 + subject: { 128 + uri: makeAtUri(makeDid(rand(lcg)), 'app.bsky.feed.post', rand(lcg)), 129 + cid: { $link: VALID_CID }, 130 + }, 131 + createdAt, 132 + }); 133 + break; 134 + } 135 + case 2: { 136 + fixtures.push({ 137 + $type: 'app.bsky.feed.repost', 138 + subject: { 139 + uri: makeAtUri(makeDid(rand(lcg)), 'app.bsky.feed.post', rand(lcg)), 140 + cid: { $link: VALID_CID }, 141 + }, 142 + createdAt, 143 + }); 144 + break; 145 + } 146 + case 3: { 147 + fixtures.push({ 148 + $type: 'app.bsky.graph.follow', 149 + subject: makeDid(rand(lcg)), 150 + createdAt, 151 + }); 152 + break; 153 + } 154 + default: { 155 + fixtures.push({ 156 + $type: 'com.atproto.repo.putRecord', 157 + repo: did, 158 + collection: pick(lcg, [ 159 + 'app.bsky.feed.post', 160 + 'app.bsky.feed.like', 161 + 'app.bsky.feed.repost', 162 + 'app.bsky.graph.follow', 163 + ] as const), 164 + rkey: toBase32(rand(lcg), 13), 165 + record: { 166 + $type: 'app.bsky.actor.profile', 167 + displayName: `${toBase32(rand(lcg), 8)} ${toBase32(rand(lcg), 8)}`, 168 + description: makeText(lcg, 20, 40), 169 + avatar: { 170 + ref: { $link: VALID_CID }, 171 + mimeType: 'image/jpeg', 172 + size: 12345, 173 + }, 174 + }, 175 + swapRecord: randInt(lcg, 2) ? { $link: VALID_CID } : undefined, 176 + }); 177 + break; 178 + } 179 + } 180 + } 181 + 182 + const lexiconCount = Math.max(1, count >> 6); 183 + for (let i = 0; i < lexiconCount; i++) { 184 + fixtures.push(structuredClone(ozoneDefs)); 185 + } 186 + 187 + return fixtures; 188 + }; 189 + 190 + const stripUndefined = (value: unknown): unknown => { 191 + if (Array.isArray(value)) { 192 + const out = new Array(value.length); 193 + for (let i = 0; i < value.length; i++) { 194 + out[i] = stripUndefined(value[i]); 195 + } 196 + return out; 197 + } 198 + 199 + if (value && typeof value === 'object') { 200 + const obj = value as Record<string, unknown>; 201 + const out: Record<string, unknown> = {}; 202 + const keys = Object.keys(obj); 203 + 204 + for (let i = 0; i < keys.length; i++) { 205 + const key = keys[i]!; 206 + const prop = obj[key]; 207 + if (prop !== undefined) { 208 + out[key] = stripUndefined(prop); 209 + } 210 + } 211 + 212 + return out; 213 + } 214 + 215 + return value; 216 + }; 217 + 218 + const FIXTURES = makeFirehoseFixtures(2000); 219 + const IPLD_FIXTURES = FIXTURES.map((value) => stripUndefined(value)); 220 + const ATCUTE_BUFFERS = FIXTURES.map((value) => atcute.encode(value)); 221 + 222 + summary(() => { 223 + bench('cbor-x encode (firehose mix)', function* () { 224 + yield { 225 + [0]() { 226 + return FIXTURES; 227 + }, 228 + bench(records: unknown[]) { 229 + const encoder = new cborx.Encoder({ useRecords: false }); 230 + let total = 0; 231 + 232 + for (let i = 0; i < records.length; i++) { 233 + total += encoder.encode(records[i]!).byteLength; 234 + } 235 + 236 + return do_not_optimize(total); 237 + }, 238 + }; 239 + }); 240 + 241 + bench('@ipld/dag-cbor encode (firehose mix)', function* () { 242 + yield { 243 + [0]() { 244 + return IPLD_FIXTURES; 245 + }, 246 + bench(records: unknown[]) { 247 + let total = 0; 248 + 249 + for (let i = 0; i < records.length; i++) { 250 + total += ipld.encode(records[i]!).byteLength; 251 + } 252 + 253 + return do_not_optimize(total); 254 + }, 255 + }; 256 + }); 257 + 258 + bench('@atcute/cbor encode (firehose mix)', function* () { 259 + yield { 260 + [0]() { 261 + return FIXTURES; 262 + }, 263 + bench(records: unknown[]) { 264 + let total = 0; 265 + 266 + for (let i = 0; i < records.length; i++) { 267 + total += atcute.encode(records[i]!).byteLength; 268 + } 269 + 270 + return do_not_optimize(total); 271 + }, 272 + }; 273 + }); 274 + }); 275 + 276 + summary(() => { 277 + bench('cbor-x decode (firehose mix)', function* () { 278 + yield { 279 + [0]() { 280 + return ATCUTE_BUFFERS; 281 + }, 282 + bench(buffers: Uint8Array[]) { 283 + const decoder = new cborx.Decoder({ useRecords: false }); 284 + let total = 0; 285 + 286 + for (let i = 0; i < buffers.length; i++) { 287 + const value = decoder.decode(buffers[i]!); 288 + total += typeof value === 'object' && value ? 1 : 0; 289 + } 290 + 291 + return do_not_optimize(total); 292 + }, 293 + }; 294 + }); 295 + 296 + bench('@ipld/dag-cbor decode (firehose mix)', function* () { 297 + yield { 298 + [0]() { 299 + return ATCUTE_BUFFERS; 300 + }, 301 + bench(buffers: Uint8Array[]) { 302 + let total = 0; 303 + 304 + for (let i = 0; i < buffers.length; i++) { 305 + const value = ipld.decode(buffers[i]!); 306 + total += typeof value === 'object' && value ? 1 : 0; 307 + } 308 + 309 + return do_not_optimize(total); 310 + }, 311 + }; 312 + }); 313 + 314 + bench('@atcute/cbor decode (firehose mix)', function* () { 315 + yield { 316 + [0]() { 317 + return ATCUTE_BUFFERS; 318 + }, 319 + bench(buffers: Uint8Array[]) { 320 + let total = 0; 321 + 322 + for (let i = 0; i < buffers.length; i++) { 323 + const value = atcute.decode(buffers[i]!); 324 + total += typeof value === 'object' && value ? 1 : 0; 325 + } 326 + 327 + return do_not_optimize(total); 328 + }, 329 + }; 330 + }); 331 + }); 332 + 333 + await run();
+2 -8
packages/utilities/cbor/lib/large.bench.ts
··· 2 2 import * as cborx from 'cbor-x'; 3 3 import { bench, do_not_optimize, run, summary } from 'mitata'; 4 4 5 - import * as atcute from './index.ts'; 6 - 7 - const OBJECT = await fetch( 8 - 'https://tangled.org/@mary.my.id/atcute/raw/trunk/packages/definitions/ozone/lexicons/tools/ozone/moderation/defs.json', 9 - ).then((r) => r.json()); 5 + import OBJECT from '../../../definitions/ozone/lexicons/tools/ozone/moderation/defs.json' with { type: 'json' }; 10 6 11 - if (OBJECT.id !== 'tools.ozone.moderation.defs') { 12 - throw new Error(`invalid`); 13 - } 7 + import * as atcute from './index.ts'; 14 8 15 9 const BUFFER = atcute.encode(OBJECT); 16 10