the statusphere demo reworked into a vite/react app in a monorepo

refactor firehose

dholms a4ee1607 19149b18

+535 -76
+4 -8
package.json
··· 18 18 }, 19 19 "dependencies": { 20 20 "@atproto/repo": "^0.4.1", 21 + "@atproto/syntax": "^0.3.0", 21 22 "@atproto/xrpc-server": "^0.5.3", 22 23 "better-sqlite3": "^11.1.2", 23 24 "cors": "^2.8.5", ··· 28 29 "helmet": "^7.1.0", 29 30 "http-status-codes": "^2.3.0", 30 31 "kysely": "^0.27.4", 32 + "multiformats": "^9.9.0", 31 33 "pino-http": "^10.0.0" 32 34 }, 33 35 "devDependencies": { ··· 46 48 "vitest": "^2.0.0" 47 49 }, 48 50 "lint-staged": { 49 - "*.{js,ts,cjs,mjs,d.cts,d.mts,json,jsonc}": [ 50 - "biome check --apply --no-errors-on-unmatched" 51 - ] 51 + "*.{js,ts,cjs,mjs,d.cts,d.mts,json,jsonc}": ["biome check --apply --no-errors-on-unmatched"] 52 52 }, 53 53 "tsup": { 54 - "entry": [ 55 - "src", 56 - "!src/**/__tests__/**", 57 - "!src/**/*.test.*" 58 - ], 54 + "entry": ["src", "!src/**/__tests__/**", "!src/**/*.test.*"], 59 55 "splitting": false, 60 56 "sourcemap": true, 61 57 "clean": true
+2 -2
src/config.ts
··· 1 1 import type pino from "pino"; 2 2 import type { Database } from "#/db"; 3 - import type { Firehose } from "#/firehose"; 3 + import type { Ingester } from "#/firehose/ingester"; 4 4 5 5 export type AppContext = { 6 6 db: Database; 7 - firehose: Firehose; 7 + ingester: Ingester; 8 8 logger: pino.Logger; 9 9 };
-62
src/firehose.ts
··· 1 - import { cborToLexRecord, readCar } from "@atproto/repo"; 2 - import { Subscription } from "@atproto/xrpc-server"; 3 - import type { Database } from "#/db"; 4 - 5 - export class Firehose { 6 - public sub: Subscription<unknown>; 7 - 8 - constructor(public service: string, public db: Database) { 9 - this.sub = new Subscription({ 10 - service: service, 11 - method: "com.atproto.sync.subscribeRepos", 12 - getParams: () => ({}), 13 - validate: (value: unknown) => value, 14 - }); 15 - } 16 - 17 - async handleEvent(evt: any): Promise<void> { 18 - if (evt.$type !== "com.atproto.sync.subscribeRepos#commit") { 19 - return; 20 - } 21 - 22 - const car = await readCar(evt.blocks); 23 - 24 - for (const op of evt.ops) { 25 - if (op.action !== "create") continue; 26 - const uri = `at://${evt.repo}/${op.path}`; 27 - const [collection] = op.path.split("/"); 28 - if (collection !== "app.bsky.feed.post") continue; 29 - 30 - if (!op.cid) continue; 31 - const recordBytes = car.blocks.get(op.cid); 32 - if (!recordBytes) continue; 33 - const record = cborToLexRecord(recordBytes); 34 - await this.db 35 - .insertInto("post") 36 - .values({ 37 - uri, 38 - text: record.text as string, 39 - indexedAt: new Date().toISOString(), 40 - }) 41 - .execute(); 42 - } 43 - } 44 - 45 - async run(subscriptionReconnectDelay: number) { 46 - try { 47 - for await (const evt of this.sub) { 48 - try { 49 - await this.handleEvent(evt); 50 - } catch (err) { 51 - console.error("repo subscription could not handle message", err); 52 - } 53 - } 54 - } catch (err) { 55 - console.error("repo subscription errored", err); 56 - setTimeout( 57 - () => this.run(subscriptionReconnectDelay), 58 - subscriptionReconnectDelay 59 - ); 60 - } 61 - } 62 - }
+139
src/firehose/firehose.ts
··· 1 + import type { RepoRecord } from "@atproto/lexicon"; 2 + import { cborToLexRecord, readCar } from "@atproto/repo"; 3 + import { AtUri } from "@atproto/syntax"; 4 + import { Subscription } from "@atproto/xrpc-server"; 5 + import type { CID } from "multiformats/cid"; 6 + import { 7 + type Commit, 8 + type RepoEvent, 9 + isCommit, 10 + isValidRepoEvent, 11 + } from "./lexicons"; 12 + 13 + type Opts = { 14 + service?: string; 15 + getCursor?: () => Promise<number | undefined>; 16 + setCursor?: (cursor: number) => Promise<void>; 17 + subscriptionReconnectDelay?: number; 18 + }; 19 + 20 + export class Firehose { 21 + public sub: Subscription<RepoEvent>; 22 + private abortController: AbortController; 23 + 24 + constructor(public opts: Opts) { 25 + this.abortController = new AbortController(); 26 + this.sub = new Subscription({ 27 + service: opts.service ?? "https://bsky.network", 28 + method: "com.atproto.sync.subscribeRepos", 29 + signal: this.abortController.signal, 30 + getParams: async () => { 31 + if (!opts.getCursor) return undefined; 32 + const cursor = await opts.getCursor(); 33 + return { cursor }; 34 + }, 35 + validate: (value: unknown) => { 36 + try { 37 + return isValidRepoEvent(value); 38 + } catch (err) { 39 + console.error("repo subscription skipped invalid message", err); 40 + } 41 + }, 42 + }); 43 + } 44 + 45 + async *run(): AsyncGenerator<Event> { 46 + try { 47 + for await (const evt of this.sub) { 48 + try { 49 + const parsed = await parseEvent(evt); 50 + for (const op of parsed) { 51 + yield op; 52 + } 53 + } catch (err) { 54 + console.error("repo subscription could not handle message", err); 55 + } 56 + if (this.opts.setCursor && typeof evt.seq === "number") { 57 + await this.opts.setCursor(evt.seq); 58 + } 59 + } 60 + } catch (err) { 61 + console.error("repo subscription errored", err); 62 + setTimeout( 63 + () => this.run(), 64 + this.opts.subscriptionReconnectDelay ?? 3000 65 + ); 66 + } 67 + } 68 + 69 + destroy() { 70 + this.abortController.abort(); 71 + } 72 + } 73 + 74 + export const parseEvent = async (evt: RepoEvent): Promise<Event[]> => { 75 + if (!isCommit(evt)) return []; 76 + return parseCommit(evt); 77 + }; 78 + 79 + export const parseCommit = async (evt: Commit): Promise<Event[]> => { 80 + const car = await readCar(evt.blocks); 81 + 82 + const evts: Event[] = []; 83 + 84 + for (const op of evt.ops) { 85 + const uri = new AtUri(`at://${evt.repo}/${op.path}`); 86 + 87 + const meta: CommitMeta = { 88 + uri, 89 + author: uri.host, 90 + collection: uri.collection, 91 + rkey: uri.rkey, 92 + }; 93 + 94 + if (op.action === "create" || op.action === "update") { 95 + if (!op.cid) continue; 96 + const recordBytes = car.blocks.get(op.cid); 97 + if (!recordBytes) continue; 98 + const record = cborToLexRecord(recordBytes); 99 + evts.push({ 100 + ...meta, 101 + event: op.action as "create" | "update", 102 + cid: op.cid, 103 + record, 104 + }); 105 + } 106 + 107 + if (op.action === "delete") { 108 + evts.push({ 109 + ...meta, 110 + event: "delete", 111 + }); 112 + } 113 + } 114 + 115 + return evts; 116 + }; 117 + 118 + type Event = Create | Update | Delete; 119 + 120 + type CommitMeta = { 121 + uri: AtUri; 122 + author: string; 123 + collection: string; 124 + rkey: string; 125 + }; 126 + 127 + type Create = CommitMeta & { 128 + event: "create"; 129 + record: RepoRecord; 130 + cid: CID; 131 + }; 132 + 133 + type Update = CommitMeta & { 134 + event: "update"; 135 + }; 136 + 137 + type Delete = CommitMeta & { 138 + event: "delete"; 139 + };
+30
src/firehose/ingester.ts
··· 1 + import type { Database } from "#/db"; 2 + import { Firehose } from "#/firehose/firehose"; 3 + 4 + export class Ingester { 5 + firehose: Firehose | undefined; 6 + constructor(public db: Database) {} 7 + 8 + async start() { 9 + const firehose = new Firehose({}); 10 + 11 + for await (const evt of firehose.run()) { 12 + if (evt.event === "create") { 13 + if (evt.collection !== "app.bsky.feed.post") continue; 14 + const post: any = evt.record; // @TODO fix types 15 + await this.db 16 + .insertInto("post") 17 + .values({ 18 + uri: evt.uri.toString(), 19 + text: post.text as string, 20 + indexedAt: new Date().toISOString(), 21 + }) 22 + .execute(); 23 + } 24 + } 25 + } 26 + 27 + destroy() { 28 + this.firehose?.destroy(); 29 + } 30 + }
+355
src/firehose/lexicons.ts
··· 1 + import type { IncomingMessage } from "node:http"; 2 + 3 + import { type LexiconDoc, Lexicons } from "@atproto/lexicon"; 4 + import type { ErrorFrame, HandlerAuth } from "@atproto/xrpc-server"; 5 + import type { CID } from "multiformats/cid"; 6 + 7 + // @NOTE: this file is an ugly copy job of codegen output. I'd like to clean this whole thing up 8 + 9 + export function isObj(v: unknown): v is Record<string, unknown> { 10 + return typeof v === "object" && v !== null; 11 + } 12 + 13 + export function hasProp<K extends PropertyKey>( 14 + data: object, 15 + prop: K 16 + ): data is Record<K, unknown> { 17 + return prop in data; 18 + } 19 + 20 + export interface QueryParams { 21 + /** The last known event to backfill from. */ 22 + cursor?: number; 23 + } 24 + 25 + export type RepoEvent = 26 + | Commit 27 + | Handle 28 + | Migrate 29 + | Tombstone 30 + | Info 31 + | { $type: string; [k: string]: unknown }; 32 + export type HandlerError = ErrorFrame<"FutureCursor" | "ConsumerTooSlow">; 33 + export type HandlerOutput = HandlerError | RepoEvent; 34 + export type HandlerReqCtx<HA extends HandlerAuth = never> = { 35 + auth: HA; 36 + params: QueryParams; 37 + req: IncomingMessage; 38 + signal: AbortSignal; 39 + }; 40 + export type Handler<HA extends HandlerAuth = never> = ( 41 + ctx: HandlerReqCtx<HA> 42 + ) => AsyncIterable<HandlerOutput>; 43 + 44 + export interface Commit { 45 + seq: number; 46 + rebase: boolean; 47 + tooBig: boolean; 48 + repo: string; 49 + commit: CID; 50 + prev?: CID | null; 51 + /** The rev of the emitted commit */ 52 + rev: string; 53 + /** The rev of the last emitted commit from this repo */ 54 + since: string | null; 55 + /** CAR file containing relevant blocks */ 56 + blocks: Uint8Array; 57 + ops: RepoOp[]; 58 + blobs: CID[]; 59 + time: string; 60 + [k: string]: unknown; 61 + } 62 + 63 + export function isCommit(v: unknown): v is Commit { 64 + return ( 65 + isObj(v) && 66 + hasProp(v, "$type") && 67 + v.$type === "com.atproto.sync.subscribeRepos#commit" 68 + ); 69 + } 70 + 71 + export interface Handle { 72 + seq: number; 73 + did: string; 74 + handle: string; 75 + time: string; 76 + [k: string]: unknown; 77 + } 78 + 79 + export function isHandle(v: unknown): v is Handle { 80 + return ( 81 + isObj(v) && 82 + hasProp(v, "$type") && 83 + v.$type === "com.atproto.sync.subscribeRepos#handle" 84 + ); 85 + } 86 + 87 + export interface Migrate { 88 + seq: number; 89 + did: string; 90 + migrateTo: string | null; 91 + time: string; 92 + [k: string]: unknown; 93 + } 94 + 95 + export function isMigrate(v: unknown): v is Migrate { 96 + return ( 97 + isObj(v) && 98 + hasProp(v, "$type") && 99 + v.$type === "com.atproto.sync.subscribeRepos#migrate" 100 + ); 101 + } 102 + 103 + export interface Tombstone { 104 + seq: number; 105 + did: string; 106 + time: string; 107 + [k: string]: unknown; 108 + } 109 + 110 + export function isTombstone(v: unknown): v is Tombstone { 111 + return ( 112 + isObj(v) && 113 + hasProp(v, "$type") && 114 + v.$type === "com.atproto.sync.subscribeRepos#tombstone" 115 + ); 116 + } 117 + 118 + export interface Info { 119 + name: "OutdatedCursor" | (string & {}); 120 + message?: string; 121 + [k: string]: unknown; 122 + } 123 + 124 + export function isInfo(v: unknown): v is Info { 125 + return ( 126 + isObj(v) && 127 + hasProp(v, "$type") && 128 + v.$type === "com.atproto.sync.subscribeRepos#info" 129 + ); 130 + } 131 + 132 + /** A repo operation, ie a write of a single record. For creates and updates, cid is the record's CID as of this operation. For deletes, it's null. */ 133 + export interface RepoOp { 134 + action: "create" | "update" | "delete" | (string & {}); 135 + path: string; 136 + cid: CID | null; 137 + [k: string]: unknown; 138 + } 139 + 140 + export function isRepoOp(v: unknown): v is RepoOp { 141 + return ( 142 + isObj(v) && 143 + hasProp(v, "$type") && 144 + v.$type === "com.atproto.sync.subscribeRepos#repoOp" 145 + ); 146 + } 147 + 148 + export const ComAtprotoSyncSubscribeRepos: LexiconDoc = { 149 + lexicon: 1, 150 + id: "com.atproto.sync.subscribeRepos", 151 + defs: { 152 + main: { 153 + type: "subscription", 154 + description: "Subscribe to repo updates", 155 + parameters: { 156 + type: "params", 157 + properties: { 158 + cursor: { 159 + type: "integer", 160 + description: "The last known event to backfill from.", 161 + }, 162 + }, 163 + }, 164 + message: { 165 + schema: { 166 + type: "union", 167 + refs: [ 168 + "lex:com.atproto.sync.subscribeRepos#commit", 169 + "lex:com.atproto.sync.subscribeRepos#handle", 170 + "lex:com.atproto.sync.subscribeRepos#migrate", 171 + "lex:com.atproto.sync.subscribeRepos#tombstone", 172 + "lex:com.atproto.sync.subscribeRepos#info", 173 + ], 174 + }, 175 + }, 176 + errors: [ 177 + { 178 + name: "FutureCursor", 179 + }, 180 + { 181 + name: "ConsumerTooSlow", 182 + }, 183 + ], 184 + }, 185 + commit: { 186 + type: "object", 187 + required: [ 188 + "seq", 189 + "rebase", 190 + "tooBig", 191 + "repo", 192 + "commit", 193 + "rev", 194 + "since", 195 + "blocks", 196 + "ops", 197 + "blobs", 198 + "time", 199 + ], 200 + nullable: ["prev", "since"], 201 + properties: { 202 + seq: { 203 + type: "integer", 204 + }, 205 + rebase: { 206 + type: "boolean", 207 + }, 208 + tooBig: { 209 + type: "boolean", 210 + }, 211 + repo: { 212 + type: "string", 213 + format: "did", 214 + }, 215 + commit: { 216 + type: "cid-link", 217 + }, 218 + prev: { 219 + type: "cid-link", 220 + }, 221 + rev: { 222 + type: "string", 223 + description: "The rev of the emitted commit", 224 + }, 225 + since: { 226 + type: "string", 227 + description: "The rev of the last emitted commit from this repo", 228 + }, 229 + blocks: { 230 + type: "bytes", 231 + description: "CAR file containing relevant blocks", 232 + maxLength: 1000000, 233 + }, 234 + ops: { 235 + type: "array", 236 + items: { 237 + type: "ref", 238 + ref: "lex:com.atproto.sync.subscribeRepos#repoOp", 239 + }, 240 + maxLength: 200, 241 + }, 242 + blobs: { 243 + type: "array", 244 + items: { 245 + type: "cid-link", 246 + }, 247 + }, 248 + time: { 249 + type: "string", 250 + format: "datetime", 251 + }, 252 + }, 253 + }, 254 + handle: { 255 + type: "object", 256 + required: ["seq", "did", "handle", "time"], 257 + properties: { 258 + seq: { 259 + type: "integer", 260 + }, 261 + did: { 262 + type: "string", 263 + format: "did", 264 + }, 265 + handle: { 266 + type: "string", 267 + format: "handle", 268 + }, 269 + time: { 270 + type: "string", 271 + format: "datetime", 272 + }, 273 + }, 274 + }, 275 + migrate: { 276 + type: "object", 277 + required: ["seq", "did", "migrateTo", "time"], 278 + nullable: ["migrateTo"], 279 + properties: { 280 + seq: { 281 + type: "integer", 282 + }, 283 + did: { 284 + type: "string", 285 + format: "did", 286 + }, 287 + migrateTo: { 288 + type: "string", 289 + }, 290 + time: { 291 + type: "string", 292 + format: "datetime", 293 + }, 294 + }, 295 + }, 296 + tombstone: { 297 + type: "object", 298 + required: ["seq", "did", "time"], 299 + properties: { 300 + seq: { 301 + type: "integer", 302 + }, 303 + did: { 304 + type: "string", 305 + format: "did", 306 + }, 307 + time: { 308 + type: "string", 309 + format: "datetime", 310 + }, 311 + }, 312 + }, 313 + info: { 314 + type: "object", 315 + required: ["name"], 316 + properties: { 317 + name: { 318 + type: "string", 319 + knownValues: ["OutdatedCursor"], 320 + }, 321 + message: { 322 + type: "string", 323 + }, 324 + }, 325 + }, 326 + repoOp: { 327 + type: "object", 328 + description: 329 + "A repo operation, ie a write of a single record. For creates and updates, cid is the record's CID as of this operation. For deletes, it's null.", 330 + required: ["action", "path", "cid"], 331 + nullable: ["cid"], 332 + properties: { 333 + action: { 334 + type: "string", 335 + knownValues: ["create", "update", "delete"], 336 + }, 337 + path: { 338 + type: "string", 339 + }, 340 + cid: { 341 + type: "cid-link", 342 + }, 343 + }, 344 + }, 345 + }, 346 + }; 347 + 348 + const lexicons = new Lexicons([ComAtprotoSyncSubscribeRepos]); 349 + 350 + export const isValidRepoEvent = (evt: unknown) => { 351 + return lexicons.assertValidXrpcMessage<RepoEvent>( 352 + "com.atproto.sync.subscribeRepos", 353 + evt 354 + ); 355 + };
+5 -4
src/server.ts
··· 9 9 import requestLogger from "#/common/middleware/requestLogger"; 10 10 import { env } from "#/common/utils/envConfig"; 11 11 import { createDb, migrateToLatest } from "#/db"; 12 - import { Firehose } from "#/firehose"; 12 + import { Ingester } from "#/firehose/ingester"; 13 13 import { createRouter } from "#/routes"; 14 14 import type { AppContext } from "./config"; 15 15 ··· 26 26 const logger = pino({ name: "server start" }); 27 27 const db = createDb(":memory:"); 28 28 await migrateToLatest(db); 29 - const firehose = new Firehose("https://bsky.network", db); 30 - firehose.run(10); 29 + const ingester = new Ingester(db); 30 + ingester.start(); 31 31 const ctx = { 32 32 db, 33 - firehose, 33 + ingester, 34 34 logger, 35 35 }; 36 36 ··· 67 67 68 68 async close() { 69 69 this.ctx.logger.info("sigint received, shutting down"); 70 + this.ctx.ingester.destroy(); 70 71 return new Promise<void>((resolve) => { 71 72 this.server.close(() => { 72 73 this.ctx.logger.info("server closed");