···6PDS=bsky.social
7WSS_URL=wss://your-labeler-service.com/xrpc/com.atproto.label.subscribeLabels
80009# Blob & Image Handling
10HYDRATE_BLOBS=false # Set to true to download images/videos
11BLOB_STORAGE_TYPE=local # 'local' or 's3'
···6PDS=bsky.social
7WSS_URL=wss://your-labeler-service.com/xrpc/com.atproto.label.subscribeLabels
89+# PLC Directory (for DID resolution)
10+PLC_ENDPOINT=https://plc.wtf
11+12# Blob & Image Handling
13HYDRATE_BLOBS=false # Set to true to download images/videos
14BLOB_STORAGE_TYPE=local # 'local' or 's3'
···34 did TEXT PRIMARY KEY,
35 handle TEXT,
36 display_name TEXT,
37- description TEXT
0038);
3940-- Blobs table: stores information about image blobs found in posts
···49 FOREIGN KEY (post_uri) REFERENCES posts(uri)
50);
510000000000000052-- Indexes for performance
53CREATE INDEX IF NOT EXISTS idx_labels_uri ON labels(uri);
54CREATE INDEX IF NOT EXISTS idx_labels_val ON labels(val);
55CREATE INDEX IF NOT EXISTS idx_labels_cts ON labels(cts);
56CREATE INDEX IF NOT EXISTS idx_posts_did ON posts(did);
057CREATE INDEX IF NOT EXISTS idx_blobs_sha256 ON blobs(sha256);
58CREATE INDEX IF NOT EXISTS idx_blobs_phash ON blobs(phash);
0059`;
60000000000000000000000000000000000000000000000061export async function initializeSchema(): Promise<void> {
62 const db = getDatabase();
6364 return new Promise((resolve, reject) => {
65- db.exec(SCHEMA_SQL, (err) => {
66 if (err) {
67 logger.error({ err }, "Failed to initialize schema");
68 reject(err);
69 return;
70 }
71 logger.info("Database schema initialized");
72- resolve();
00000073 });
74 });
75}
···34 did TEXT PRIMARY KEY,
35 handle TEXT,
36 display_name TEXT,
37+ description TEXT,
38+ avatar_cid TEXT,
39+ banner_cid TEXT
40);
4142-- Blobs table: stores information about image blobs found in posts
···51 FOREIGN KEY (post_uri) REFERENCES posts(uri)
52);
5354+-- Profile blobs table: stores avatar and banner blobs for profiles
55+CREATE TABLE IF NOT EXISTS profile_blobs (
56+ did TEXT NOT NULL,
57+ blob_type TEXT NOT NULL CHECK (blob_type IN ('avatar', 'banner')),
58+ blob_cid TEXT NOT NULL,
59+ sha256 TEXT NOT NULL,
60+ phash TEXT,
61+ storage_path TEXT,
62+ mimetype TEXT,
63+ captured_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
64+ PRIMARY KEY (did, blob_type, captured_at),
65+ FOREIGN KEY (did) REFERENCES profiles(did)
66+);
67+68-- Indexes for performance
69CREATE INDEX IF NOT EXISTS idx_labels_uri ON labels(uri);
70CREATE INDEX IF NOT EXISTS idx_labels_val ON labels(val);
71CREATE INDEX IF NOT EXISTS idx_labels_cts ON labels(cts);
72CREATE INDEX IF NOT EXISTS idx_posts_did ON posts(did);
73+CREATE INDEX IF NOT EXISTS idx_blobs_cid ON blobs(blob_cid);
74CREATE INDEX IF NOT EXISTS idx_blobs_sha256 ON blobs(sha256);
75CREATE INDEX IF NOT EXISTS idx_blobs_phash ON blobs(phash);
76+CREATE INDEX IF NOT EXISTS idx_profile_blobs_sha256 ON profile_blobs(sha256);
77+CREATE INDEX IF NOT EXISTS idx_profile_blobs_phash ON profile_blobs(phash);
78`;
7980+async function migrateProfilesTable(): Promise<void> {
81+ const db = getDatabase();
82+83+ return new Promise((resolve, reject) => {
84+ db.all(
85+ "SELECT column_name FROM information_schema.columns WHERE table_name = 'profiles'",
86+ (err, rows: any[]) => {
87+ if (err) {
88+ logger.error({ err }, "Failed to check profiles table columns");
89+ reject(err);
90+ return;
91+ }
92+93+ const columnNames = rows.map((row) => row.column_name);
94+ const hasAvatarCid = columnNames.includes("avatar_cid");
95+ const hasBannerCid = columnNames.includes("banner_cid");
96+97+ if (!hasAvatarCid || !hasBannerCid) {
98+ logger.info("Migrating profiles table to add avatar_cid and banner_cid columns");
99+100+ const migrations: string[] = [];
101+ if (!hasAvatarCid) {
102+ migrations.push("ALTER TABLE profiles ADD COLUMN avatar_cid TEXT");
103+ }
104+ if (!hasBannerCid) {
105+ migrations.push("ALTER TABLE profiles ADD COLUMN banner_cid TEXT");
106+ }
107+108+ db.exec(migrations.join("; "), (err) => {
109+ if (err) {
110+ logger.error({ err }, "Failed to migrate profiles table");
111+ reject(err);
112+ return;
113+ }
114+ logger.info("Profiles table migration completed");
115+ resolve();
116+ });
117+ } else {
118+ logger.debug("Profiles table already has avatar_cid and banner_cid columns");
119+ resolve();
120+ }
121+ }
122+ );
123+ });
124+}
125+126export async function initializeSchema(): Promise<void> {
127 const db = getDatabase();
128129 return new Promise((resolve, reject) => {
130+ db.exec(SCHEMA_SQL, async (err) => {
131 if (err) {
132 logger.error({ err }, "Failed to initialize schema");
133 reject(err);
134 return;
135 }
136 logger.info("Database schema initialized");
137+138+ try {
139+ await migrateProfilesTable();
140+ resolve();
141+ } catch (migrationErr) {
142+ reject(migrationErr);
143+ }
144 });
145 });
146}
+138-3
src/hydration/profiles.service.ts
···1import { AtpAgent } from "@atproto/api";
2import { Database } from "duckdb";
3import { ProfilesRepository } from "../database/profiles.repository.js";
000004import { pRateLimit } from "p-ratelimit";
5import { withRetry, isRateLimitError, isNetworkError, isServerError, isRecordNotFoundError } from "../utils/retry.js";
6import { logger } from "../logger/index.js";
···9export class ProfileHydrationService {
10 private agent: AtpAgent;
11 private profilesRepo: ProfilesRepository;
0012 private limit: ReturnType<typeof pRateLimit>;
1314 constructor(db: Database) {
15 this.agent = new AtpAgent({ service: `https://${config.bsky.pds}` });
16 this.profilesRepo = new ProfilesRepository(db);
00000000000000017 this.limit = pRateLimit({
18 interval: 300000,
19 rate: 3000,
···38 async hydrateProfile(did: string): Promise<void> {
39 try {
40 const existingProfile = await this.profilesRepo.findByDid(did);
41- if (existingProfile) {
42- logger.debug({ did }, "Profile already hydrated, skipping");
0043 return;
000044 }
4546 const profileResponse = await this.limit(() =>
···6869 let displayName: string | undefined;
70 let description: string | undefined;
007172 if (profileResponse.success && profileResponse.data.value) {
73 const record = profileResponse.data.value as any;
74 displayName = record.displayName;
75 description = record.description;
0000000000000076 }
7778 const profileLookup = await this.limit(() =>
···104 handle,
105 display_name: displayName,
106 description,
00107 });
108109- logger.info({ did, handle }, "Profile hydrated successfully");
0000000000000000110 } catch (error) {
111 if (isRecordNotFoundError(error)) {
112 logger.warn({ did }, "Profile record not found, skipping");
···115 logger.error({ error, did }, "Failed to hydrate profile");
116 throw error;
117 }
0000000000000000000000000000000000000000000000000000000000000000000000000118 }
119}