···1+import { getAllSites } from './db';
2+import { fetchSiteRecord, getPdsForDid, downloadAndCacheSite, isCached } from './utils';
3+import { logger } from './observability';
4+5+export interface BackfillOptions {
6+ skipExisting?: boolean; // Skip sites already in cache
7+ concurrency?: number; // Number of sites to cache concurrently
8+ maxSites?: number; // Maximum number of sites to backfill (for testing)
9+}
10+11+export interface BackfillStats {
12+ total: number;
13+ cached: number;
14+ skipped: number;
15+ failed: number;
16+ duration: number;
17+}
18+19+/**
20+ * Backfill all sites from the database into the local cache
21+ */
22+export async function backfillCache(options: BackfillOptions = {}): Promise<BackfillStats> {
23+ const {
24+ skipExisting = true,
25+ concurrency = 3,
26+ maxSites,
27+ } = options;
28+29+ const startTime = Date.now();
30+ const stats: BackfillStats = {
31+ total: 0,
32+ cached: 0,
33+ skipped: 0,
34+ failed: 0,
35+ duration: 0,
36+ };
37+38+ logger.info('Starting cache backfill', { skipExisting, concurrency, maxSites });
39+ console.log(`
40+╔══════════════════════════════════════════╗
41+║ CACHE BACKFILL STARTING ║
42+╚══════════════════════════════════════════╝
43+ `);
44+45+ try {
46+ // Get all sites from database
47+ let sites = await getAllSites();
48+ stats.total = sites.length;
49+50+ logger.info(`Found ${sites.length} sites in database`);
51+ console.log(`📊 Found ${sites.length} sites in database`);
52+53+ // Limit if specified
54+ if (maxSites && maxSites > 0) {
55+ sites = sites.slice(0, maxSites);
56+ console.log(`⚙️ Limited to ${maxSites} sites for backfill`);
57+ }
58+59+ // Process sites in batches
60+ const batches: typeof sites[] = [];
61+ for (let i = 0; i < sites.length; i += concurrency) {
62+ batches.push(sites.slice(i, i + concurrency));
63+ }
64+65+ let processed = 0;
66+ for (const batch of batches) {
67+ await Promise.all(
68+ batch.map(async (site) => {
69+ try {
70+ // Check if already cached
71+ if (skipExisting && isCached(site.did, site.rkey)) {
72+ stats.skipped++;
73+ processed++;
74+ logger.debug(`Skipping already cached site`, { did: site.did, rkey: site.rkey });
75+ console.log(`⏭️ [${processed}/${sites.length}] Skipped (cached): ${site.display_name || site.rkey}`);
76+ return;
77+ }
78+79+ // Fetch site record
80+ const siteData = await fetchSiteRecord(site.did, site.rkey);
81+ if (!siteData) {
82+ stats.failed++;
83+ processed++;
84+ logger.error('Site record not found during backfill', null, { did: site.did, rkey: site.rkey });
85+ console.log(`❌ [${processed}/${sites.length}] Failed (not found): ${site.display_name || site.rkey}`);
86+ return;
87+ }
88+89+ // Get PDS endpoint
90+ const pdsEndpoint = await getPdsForDid(site.did);
91+ if (!pdsEndpoint) {
92+ stats.failed++;
93+ processed++;
94+ logger.error('PDS not found during backfill', null, { did: site.did });
95+ console.log(`❌ [${processed}/${sites.length}] Failed (no PDS): ${site.display_name || site.rkey}`);
96+ return;
97+ }
98+99+ // Download and cache site
100+ await downloadAndCacheSite(site.did, site.rkey, siteData.record, pdsEndpoint, siteData.cid);
101+ stats.cached++;
102+ processed++;
103+ logger.info('Successfully cached site during backfill', { did: site.did, rkey: site.rkey });
104+ console.log(`✅ [${processed}/${sites.length}] Cached: ${site.display_name || site.rkey}`);
105+ } catch (err) {
106+ stats.failed++;
107+ processed++;
108+ logger.error('Failed to cache site during backfill', err, { did: site.did, rkey: site.rkey });
109+ console.log(`❌ [${processed}/${sites.length}] Failed: ${site.display_name || site.rkey}`);
110+ }
111+ })
112+ );
113+ }
114+115+ stats.duration = Date.now() - startTime;
116+117+ console.log(`
118+╔══════════════════════════════════════════╗
119+║ CACHE BACKFILL COMPLETED ║
120+╚══════════════════════════════════════════╝
121+122+📊 Total Sites: ${stats.total}
123+✅ Cached: ${stats.cached}
124+⏭️ Skipped: ${stats.skipped}
125+❌ Failed: ${stats.failed}
126+⏱️ Duration: ${(stats.duration / 1000).toFixed(2)}s
127+ `);
128+129+ logger.info('Cache backfill completed', stats);
130+ } catch (err) {
131+ logger.error('Cache backfill failed', err);
132+ console.error('❌ Cache backfill failed:', err);
133+ }
134+135+ return stats;
136+}
+19
hosting-service/src/lib/db.ts
···81 }
82}
83000000000000000000084/**
85 * Generate a numeric lock ID from a string key
86 * PostgreSQL advisory locks use bigint (64-bit signed integer)
···81 }
82}
8384+export interface SiteRecord {
85+ did: string;
86+ rkey: string;
87+ display_name?: string;
88+}
89+90+export async function getAllSites(): Promise<SiteRecord[]> {
91+ try {
92+ const result = await sql<SiteRecord[]>`
93+ SELECT did, rkey, display_name FROM sites
94+ ORDER BY created_at DESC
95+ `;
96+ return result;
97+ } catch (err) {
98+ console.error('Failed to get all sites', err);
99+ return [];
100+ }
101+}
102+103/**
104 * Generate a numeric lock ID from a string key
105 * PostgreSQL advisory locks use bigint (64-bit signed integer)