···11+# Database
22+DATABASE_URL=postgres://postgres:postgres@localhost:5432/wisp
33+44+# Firehose
55+FIREHOSE_SERVICE=wss://bsky.network
66+FIREHOSE_MAX_CONCURRENCY=5
77+88+# Redis (cache invalidation + revalidation queue)
99+REDIS_URL=redis://localhost:6379
1010+1111+# S3 Storage (leave empty for local disk fallback)
1212+S3_BUCKET=
1313+S3_METADATA_BUCKET=
1414+S3_REGION=auto
1515+S3_ENDPOINT=
1616+S3_PREFIX=sites/
1717+S3_FORCE_PATH_STYLE=true
1818+1919+# AWS Credentials (required if using S3)
2020+AWS_ACCESS_KEY_ID=
2121+AWS_SECRET_ACCESS_KEY=
2222+2323+# Health check server
2424+HEALTH_PORT=3001
2525+2626+# For local disk fallback (when S3_BUCKET is empty)
2727+CACHE_DIR=./cache/sites
+2
apps/firehose-service/src/index.ts
···1414import { storage } from './lib/storage';
1515import { handleSiteCreateOrUpdate, fetchSiteRecord } from './lib/cache-writer';
1616import { startRevalidateWorker, stopRevalidateWorker } from './lib/revalidate-worker';
1717+import { closeCacheInvalidationPublisher } from './lib/cache-invalidation';
17181819const app = new Hono();
1920···41424243 stopFirehose();
4344 await stopRevalidateWorker();
4545+ await closeCacheInvalidationPublisher();
4446 await closeDatabase();
45474648 console.log('[Service] Shutdown complete');
···44# Server
55PORT=3001
66BASE_HOST=wisp.place
77+88+# Redis (cache invalidation + revalidation queue)
99+REDIS_URL=redis://localhost:6379
1010+1111+# S3 Storage (leave empty for local disk fallback)
1212+S3_BUCKET=
1313+S3_METADATA_BUCKET=
1414+S3_REGION=auto
1515+S3_ENDPOINT=
1616+S3_PREFIX=sites/
1717+S3_FORCE_PATH_STYLE=true
1818+1919+# AWS Credentials (required if using S3)
2020+AWS_ACCESS_KEY_ID=
2121+AWS_SECRET_ACCESS_KEY=
2222+2323+# For local disk fallback (when S3_BUCKET is empty)
2424+CACHE_DIR=./cache/sites
···1818import { enqueueRevalidate } from './revalidate-queue';
1919import { recordStorageMiss } from './revalidate-metrics';
2020import { normalizeFileCids } from '@wispplace/fs-utils';
2121+import { fetchAndCacheSite } from './on-demand-cache';
2222+import type { StorageResult } from '@wispplace/tiered-storage';
2323+2424+type FileStorageResult = StorageResult<Uint8Array>;
21252226/**
2327 * Helper to retrieve a file with metadata from tiered storage
···9195 rkey: string,
9296 filePath: string,
9397 preferRewrittenHtml: boolean
9494-): Promise<{ result: Awaited<ReturnType<typeof storage.getWithMetadata>>; filePath: string } | null> {
9898+): Promise<{ result: FileStorageResult; filePath: string } | null> {
9599 const mimeTypeGuess = lookup(filePath) || 'application/octet-stream';
96100 if (preferRewrittenHtml && isHtmlContent(filePath, mimeTypeGuess)) {
97101 const rewrittenPath = `.rewritten/${filePath}`;
···107111}
108112109113function buildResponseFromStorageResult(
110110- result: Awaited<ReturnType<typeof storage.getWithMetadata>>,
114114+ result: FileStorageResult,
111115 filePath: string,
112116 settings: WispSettings | null,
113117 requestHeaders?: Record<string, string>
···149153}
150154151155/**
156156+ * Ensure a site is cached locally. If the site has no DB entry (completely unknown),
157157+ * attempt to fetch and cache it on-demand from the PDS.
158158+ */
159159+async function ensureSiteCached(did: string, rkey: string): Promise<void> {
160160+ const existing = await getSiteCache(did, rkey);
161161+ if (existing) return; // Site is known, proceed normally
162162+163163+ // Site is completely unknown — try on-demand fetch
164164+ console.log(`[FileServing] Site ${did}/${rkey} not in DB, attempting on-demand cache`);
165165+ await fetchAndCacheSite(did, rkey);
166166+}
167167+168168+/**
152169 * Helper to serve files from cache (for custom domains and subdomains)
153170 */
154171export async function serveFromCache(
···158175 fullUrl?: string,
159176 headers?: Record<string, string>
160177): Promise<Response> {
178178+ // Check if this site is completely unknown (not in DB, no files in storage)
179179+ // If so, attempt to fetch and cache it on-demand from the PDS
180180+ await ensureSiteCached(did, rkey);
181181+161182 // Load settings for this site
162183 const settings = await getCachedSettings(did, rkey);
163184 const indexFiles = getIndexFiles(settings);
···445466 fullUrl?: string,
446467 headers?: Record<string, string>
447468): Promise<Response> {
469469+ // Check if this site is completely unknown (not in DB, no files in storage)
470470+ // If so, attempt to fetch and cache it on-demand from the PDS
471471+ await ensureSiteCached(did, rkey);
472472+448473 // Load settings for this site
449474 const settings = await getCachedSettings(did, rkey);
450475 const indexFiles = getIndexFiles(settings);
+259
apps/hosting-service/src/lib/on-demand-cache.ts
···11+/**
22+ * On-demand site caching for the hosting service
33+ *
44+ * When a request hits a site that is completely missing (no DB entry, no files),
55+ * this module fetches the site record from the PDS, downloads all blobs,
66+ * writes them to local storage (hot + warm tiers), and updates the DB.
77+ *
88+ * This gives immediate serving capability. A revalidate is also enqueued
99+ * so the firehose-service backfills S3 (cold tier).
1010+ */
1111+1212+import type { Record as WispFsRecord, Directory, Entry, File } from '@wispplace/lexicons/types/place/wisp/fs';
1313+import { safeFetchJson, safeFetchBlob } from '@wispplace/safe-fetch';
1414+import { extractBlobCid, getPdsForDid } from '@wispplace/atproto-utils';
1515+import { shouldCompressMimeType } from '@wispplace/atproto-utils/compression';
1616+import { collectFileCidsFromEntries, countFilesInDirectory } from '@wispplace/fs-utils';
1717+import { MAX_BLOB_SIZE, MAX_FILE_COUNT, MAX_SITE_SIZE } from '@wispplace/constants';
1818+import { expandSubfsNodes } from './utils';
1919+import { storage } from './storage';
2020+import { upsertSiteCache, tryAcquireLock, releaseLock } from './db';
2121+import { enqueueRevalidate } from './revalidate-queue';
2222+import { gunzipSync } from 'zlib';
2323+2424+// Track in-flight fetches to avoid duplicate work
2525+const inFlightFetches = new Map<string, Promise<boolean>>();
2626+2727+interface FileInfo {
2828+ path: string;
2929+ cid: string;
3030+ blob: any;
3131+ encoding?: 'gzip';
3232+ mimeType?: string;
3333+ base64?: boolean;
3434+}
3535+3636+/**
3737+ * Attempt to fetch and cache a completely missing site on-demand.
3838+ * Returns true if the site was successfully cached, false otherwise.
3939+ *
4040+ * Uses a distributed lock (pg advisory lock) to prevent multiple
4141+ * hosting-service instances from fetching the same site simultaneously.
4242+ */
4343+export async function fetchAndCacheSite(did: string, rkey: string): Promise<boolean> {
4444+ const key = `${did}:${rkey}`;
4545+4646+ // Check if there's already an in-flight fetch for this site
4747+ const existing = inFlightFetches.get(key);
4848+ if (existing) {
4949+ return existing;
5050+ }
5151+5252+ const fetchPromise = doFetchAndCache(did, rkey);
5353+ inFlightFetches.set(key, fetchPromise);
5454+5555+ try {
5656+ return await fetchPromise;
5757+ } finally {
5858+ inFlightFetches.delete(key);
5959+ }
6060+}
6161+6262+async function doFetchAndCache(did: string, rkey: string): Promise<boolean> {
6363+ const lockKey = `on-demand-cache:${did}:${rkey}`;
6464+6565+ // Try to acquire a distributed lock
6666+ const acquired = await tryAcquireLock(lockKey);
6767+ if (!acquired) {
6868+ console.log(`[OnDemandCache] Lock not acquired for ${did}/${rkey}, another instance is handling it`);
6969+ return false;
7070+ }
7171+7272+ try {
7373+ console.log(`[OnDemandCache] Fetching missing site ${did}/${rkey}`);
7474+7575+ // Fetch site record from PDS
7676+ const pdsEndpoint = await getPdsForDid(did);
7777+ if (!pdsEndpoint) {
7878+ console.error(`[OnDemandCache] Could not resolve PDS for ${did}`);
7979+ return false;
8080+ }
8181+8282+ const recordUrl = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(rkey)}`;
8383+8484+ let data: any;
8585+ try {
8686+ data = await safeFetchJson(recordUrl);
8787+ } catch (err) {
8888+ const msg = err instanceof Error ? err.message : String(err);
8989+ if (msg.includes('HTTP 404') || msg.includes('Not Found')) {
9090+ console.log(`[OnDemandCache] Site record not found on PDS: ${did}/${rkey}`);
9191+ } else {
9292+ console.error(`[OnDemandCache] Failed to fetch site record: ${did}/${rkey}`, msg);
9393+ }
9494+ return false;
9595+ }
9696+9797+ const record = data.value as WispFsRecord;
9898+ const recordCid = data.cid || '';
9999+100100+ if (!record?.root?.entries) {
101101+ console.error(`[OnDemandCache] Invalid record structure for ${did}/${rkey}`);
102102+ return false;
103103+ }
104104+105105+ // Expand subfs nodes
106106+ const expandedRoot = await expandSubfsNodes(record.root, pdsEndpoint);
107107+108108+ // Validate limits
109109+ const fileCount = countFilesInDirectory(expandedRoot);
110110+ if (fileCount > MAX_FILE_COUNT) {
111111+ console.error(`[OnDemandCache] Site exceeds file limit: ${fileCount} > ${MAX_FILE_COUNT}`);
112112+ return false;
113113+ }
114114+115115+ // Collect files
116116+ const files = collectFileInfo(expandedRoot.entries);
117117+118118+ // Collect file CIDs for DB
119119+ const fileCids: Record<string, string> = {};
120120+ collectFileCidsFromEntries(expandedRoot.entries, '', fileCids);
121121+122122+ // Download and write all files to local storage (hot + warm tiers)
123123+ const CONCURRENCY = 10;
124124+ let downloaded = 0;
125125+ let failed = 0;
126126+127127+ for (let i = 0; i < files.length; i += CONCURRENCY) {
128128+ const batch = files.slice(i, i + CONCURRENCY);
129129+ const results = await Promise.allSettled(
130130+ batch.map(file => downloadAndWriteBlob(did, rkey, file, pdsEndpoint))
131131+ );
132132+133133+ for (const result of results) {
134134+ if (result.status === 'fulfilled') {
135135+ downloaded++;
136136+ } else {
137137+ failed++;
138138+ console.error(`[OnDemandCache] Failed to download blob:`, result.reason);
139139+ }
140140+ }
141141+ }
142142+143143+ console.log(`[OnDemandCache] Downloaded ${downloaded} files (${failed} failed) for ${did}/${rkey}`);
144144+145145+ // Update DB with file CIDs so future storage misses can be detected
146146+ await upsertSiteCache(did, rkey, recordCid, fileCids);
147147+148148+ // Enqueue revalidate so firehose-service backfills S3 (cold tier)
149149+ await enqueueRevalidate(did, rkey, `on-demand-cache`);
150150+151151+ console.log(`[OnDemandCache] Successfully cached site ${did}/${rkey}`);
152152+ return downloaded > 0;
153153+ } catch (err) {
154154+ console.error(`[OnDemandCache] Error caching site ${did}/${rkey}:`, err);
155155+ return false;
156156+ } finally {
157157+ await releaseLock(lockKey);
158158+ }
159159+}
160160+161161+function collectFileInfo(entries: Entry[], pathPrefix: string = ''): FileInfo[] {
162162+ const files: FileInfo[] = [];
163163+164164+ for (const entry of entries) {
165165+ const currentPath = pathPrefix ? `${pathPrefix}/${entry.name}` : entry.name;
166166+ const node = entry.node;
167167+168168+ if ('type' in node && node.type === 'directory' && 'entries' in node) {
169169+ files.push(...collectFileInfo(node.entries, currentPath));
170170+ } else if ('type' in node && node.type === 'file' && 'blob' in node) {
171171+ const fileNode = node as File;
172172+ const cid = extractBlobCid(fileNode.blob);
173173+ if (cid) {
174174+ files.push({
175175+ path: currentPath,
176176+ cid,
177177+ blob: fileNode.blob,
178178+ encoding: fileNode.encoding,
179179+ mimeType: fileNode.mimeType,
180180+ base64: fileNode.base64,
181181+ });
182182+ }
183183+ }
184184+ }
185185+186186+ return files;
187187+}
188188+189189+async function downloadAndWriteBlob(
190190+ did: string,
191191+ rkey: string,
192192+ file: FileInfo,
193193+ pdsEndpoint: string
194194+): Promise<void> {
195195+ const blobUrl = `${pdsEndpoint}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(file.cid)}`;
196196+197197+ let content = await safeFetchBlob(blobUrl, { maxSize: MAX_BLOB_SIZE, timeout: 300000 });
198198+ let encoding = file.encoding;
199199+200200+ // Decode base64 if flagged
201201+ if (file.base64) {
202202+ const base64String = new TextDecoder().decode(content);
203203+ content = Buffer.from(base64String, 'base64');
204204+ }
205205+206206+ // Decompress if needed and shouldn't stay compressed
207207+ const shouldStayCompressed = shouldCompressMimeType(file.mimeType);
208208+209209+ if (encoding === 'gzip' && !shouldStayCompressed && content.length >= 2 &&
210210+ content[0] === 0x1f && content[1] === 0x8b) {
211211+ try {
212212+ content = gunzipSync(content);
213213+ encoding = undefined;
214214+ } catch {
215215+ // Keep gzipped if decompression fails
216216+ }
217217+ }
218218+219219+ // If encoding is missing but data looks gzipped for a text-like file, mark it
220220+ if (!encoding && isTextLikeMime(file.mimeType, file.path) && content.length >= 2 &&
221221+ content[0] === 0x1f && content[1] === 0x8b) {
222222+ encoding = 'gzip';
223223+ }
224224+225225+ // Build storage key and metadata
226226+ const key = `${did}/${rkey}/${file.path}`;
227227+ const metadata: Record<string, string> = {};
228228+ if (encoding) metadata.encoding = encoding;
229229+ if (file.mimeType) metadata.mimeType = file.mimeType;
230230+231231+ // Write to hot + warm tiers only (cold/S3 is read-only in hosting-service,
232232+ // firehose-service will backfill via revalidate)
233233+ await storage.set(key as any, content as any, {
234234+ metadata,
235235+ skipTiers: [],
236236+ });
237237+}
238238+239239+function isTextLikeMime(mimeType?: string, path?: string): boolean {
240240+ if (mimeType) {
241241+ if (mimeType === 'text/html') return true;
242242+ if (mimeType === 'text/css') return true;
243243+ if (mimeType === 'text/javascript') return true;
244244+ if (mimeType === 'application/javascript') return true;
245245+ if (mimeType === 'application/json') return true;
246246+ if (mimeType === 'application/xml') return true;
247247+ if (mimeType === 'image/svg+xml') return true;
248248+ }
249249+250250+ if (!path) return false;
251251+ const lower = path.toLowerCase();
252252+ return lower.endsWith('.html') ||
253253+ lower.endsWith('.htm') ||
254254+ lower.endsWith('.css') ||
255255+ lower.endsWith('.js') ||
256256+ lower.endsWith('.json') ||
257257+ lower.endsWith('.xml') ||
258258+ lower.endsWith('.svg');
259259+}