Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol.

faster backfilling, fix wonkiness of redirects

+86 -22
+4 -1
hosting-service/src/lib/backfill.ts
··· 2 import { fetchSiteRecord, getPdsForDid, downloadAndCacheSite, isCached } from './utils'; 3 import { logger } from './observability'; 4 import { markSiteAsBeingCached, unmarkSiteAsBeingCached } from './cache'; 5 6 export interface BackfillOptions { 7 skipExisting?: boolean; // Skip sites already in cache ··· 23 export async function backfillCache(options: BackfillOptions = {}): Promise<BackfillStats> { 24 const { 25 skipExisting = true, 26 - concurrency = 3, 27 maxSites, 28 } = options; 29 ··· 103 try { 104 // Download and cache site 105 await downloadAndCacheSite(site.did, site.rkey, siteData.record, pdsEndpoint, siteData.cid); 106 stats.cached++; 107 processed++; 108 logger.info('Successfully cached site during backfill', { did: site.did, rkey: site.rkey });
··· 2 import { fetchSiteRecord, getPdsForDid, downloadAndCacheSite, isCached } from './utils'; 3 import { logger } from './observability'; 4 import { markSiteAsBeingCached, unmarkSiteAsBeingCached } from './cache'; 5 + import { clearRedirectRulesCache } from '../server'; 6 7 export interface BackfillOptions { 8 skipExisting?: boolean; // Skip sites already in cache ··· 24 export async function backfillCache(options: BackfillOptions = {}): Promise<BackfillStats> { 25 const { 26 skipExisting = true, 27 + concurrency = 10, // Increased from 3 to 10 for better parallelization 28 maxSites, 29 } = options; 30 ··· 104 try { 105 // Download and cache site 106 await downloadAndCacheSite(site.did, site.rkey, siteData.record, pdsEndpoint, siteData.cid); 107 + // Clear redirect rules cache since the site was updated 108 + clearRedirectRulesCache(site.did, site.rkey); 109 stats.cached++; 110 processed++; 111 logger.info('Successfully cached site during backfill', { did: site.did, rkey: site.rkey });
+4
hosting-service/src/lib/firehose.ts
··· 11 import { Firehose } from '@atproto/sync' 12 import { IdResolver } from '@atproto/identity' 13 import { invalidateSiteCache, markSiteAsBeingCached, unmarkSiteAsBeingCached } from './cache' 14 15 const CACHE_DIR = './cache/sites' 16 ··· 201 pdsEndpoint, 202 verifiedCid 203 ) 204 205 // Acquire distributed lock only for database write to prevent duplicate writes 206 // Note: upsertSite will check cache-only mode internally and skip if needed
··· 11 import { Firehose } from '@atproto/sync' 12 import { IdResolver } from '@atproto/identity' 13 import { invalidateSiteCache, markSiteAsBeingCached, unmarkSiteAsBeingCached } from './cache' 14 + import { clearRedirectRulesCache } from '../server' 15 16 const CACHE_DIR = './cache/sites' 17 ··· 202 pdsEndpoint, 203 verifiedCid 204 ) 205 + 206 + // Clear redirect rules cache since the site was updated 207 + clearRedirectRulesCache(did, site) 208 209 // Acquire distributed lock only for database write to prevent duplicate writes 210 // Note: upsertSite will check cache-only mode internally and skip if needed
+63 -17
hosting-service/src/lib/redirects.ts
··· 24 status: number; 25 } 26 27 /** 28 * Parse a _redirects file into an array of redirect rules 29 */ ··· 34 for (let lineNum = 0; lineNum < lines.length; lineNum++) { 35 const lineRaw = lines[lineNum]; 36 if (!lineRaw) continue; 37 - 38 const line = lineRaw.trim(); 39 - 40 // Skip empty lines and comments 41 if (!line || line.startsWith('#')) { 42 continue; 43 } 44 45 try { ··· 218 } 219 220 /** 221 - * Match a request path against redirect rules 222 */ 223 export function matchRedirectRule( 224 requestPath: string, ··· 227 queryParams?: Record<string, string>; 228 headers?: Record<string, string>; 229 cookies?: Record<string, string>; 230 - } 231 ): RedirectMatch | null { 232 // Normalize path: ensure leading slash, remove trailing slash (except for root) 233 let normalizedPath = requestPath.startsWith('/') ? requestPath : `/${requestPath}`; 234 - 235 for (const rule of rules) { 236 // Check query parameter conditions first (if any) 237 if (rule.queryParams) { ··· 239 if (!context?.queryParams) { 240 continue; 241 } 242 - 243 - const queryMatches = Object.entries(rule.queryParams).every(([key, value]) => { 244 const actualValue = context.queryParams?.[key]; 245 - return actualValue !== undefined; 246 }); 247 - 248 if (!queryMatches) { 249 continue; 250 } ··· 302 303 // Build the target path by replacing placeholders 304 let targetPath = rule.to; 305 - 306 - // Replace captured parameters 307 if (rule.fromParams && match.length > 1) { 308 for (let i = 0; i < rule.fromParams.length; i++) { 309 const paramName = rule.fromParams[i]; 310 const paramValue = match[i + 1]; 311 - 312 if (!paramName || !paramValue) continue; 313 - 314 if (paramName === 'splat') { 315 - targetPath = targetPath.replace(':splat', paramValue); 316 } else { 317 - targetPath = targetPath.replace(`:${paramName}`, paramValue); 318 } 319 } 320 } 321 322 - // Handle query parameter replacements 323 if (rule.queryParams && context?.queryParams) { 324 for (const [key, placeholder] of Object.entries(rule.queryParams)) { 325 const actualValue = context.queryParams[key]; 326 if (actualValue && placeholder && placeholder.startsWith(':')) { 327 const paramName = placeholder.slice(1); 328 if (paramName) { 329 - targetPath = targetPath.replace(`:${paramName}`, actualValue); 330 } 331 } 332 }
··· 24 status: number; 25 } 26 27 + // Maximum number of redirect rules to prevent DoS attacks 28 + const MAX_REDIRECT_RULES = 1000; 29 + 30 /** 31 * Parse a _redirects file into an array of redirect rules 32 */ ··· 37 for (let lineNum = 0; lineNum < lines.length; lineNum++) { 38 const lineRaw = lines[lineNum]; 39 if (!lineRaw) continue; 40 + 41 const line = lineRaw.trim(); 42 + 43 // Skip empty lines and comments 44 if (!line || line.startsWith('#')) { 45 continue; 46 + } 47 + 48 + // Enforce max rules limit 49 + if (rules.length >= MAX_REDIRECT_RULES) { 50 + console.warn(`Redirect rules limit reached (${MAX_REDIRECT_RULES}), ignoring remaining rules`); 51 + break; 52 } 53 54 try { ··· 227 } 228 229 /** 230 + * Match a request path against redirect rules with loop detection 231 */ 232 export function matchRedirectRule( 233 requestPath: string, ··· 236 queryParams?: Record<string, string>; 237 headers?: Record<string, string>; 238 cookies?: Record<string, string>; 239 + }, 240 + visitedPaths: Set<string> = new Set() 241 ): RedirectMatch | null { 242 // Normalize path: ensure leading slash, remove trailing slash (except for root) 243 let normalizedPath = requestPath.startsWith('/') ? requestPath : `/${requestPath}`; 244 + 245 + // Detect redirect loops 246 + if (visitedPaths.has(normalizedPath)) { 247 + console.warn(`Redirect loop detected for path: ${normalizedPath}`); 248 + return null; 249 + } 250 + 251 + // Track this path to detect loops 252 + visitedPaths.add(normalizedPath); 253 + 254 + // Limit redirect chain depth to 10 255 + if (visitedPaths.size > 10) { 256 + console.warn(`Redirect chain too deep (>10) for path: ${normalizedPath}`); 257 + return null; 258 + } 259 + 260 for (const rule of rules) { 261 // Check query parameter conditions first (if any) 262 if (rule.queryParams) { ··· 264 if (!context?.queryParams) { 265 continue; 266 } 267 + 268 + // Check that all required query params are present 269 + // The value in rule.queryParams is either a literal or a placeholder (:name) 270 + const queryMatches = Object.entries(rule.queryParams).every(([key, expectedValue]) => { 271 const actualValue = context.queryParams?.[key]; 272 + 273 + // Query param must exist 274 + if (actualValue === undefined) { 275 + return false; 276 + } 277 + 278 + // If expected value is a placeholder (:name), any value is acceptable 279 + // If it's a literal, it must match exactly 280 + if (expectedValue && !expectedValue.startsWith(':')) { 281 + return actualValue === expectedValue; 282 + } 283 + 284 + return true; 285 }); 286 + 287 if (!queryMatches) { 288 continue; 289 } ··· 341 342 // Build the target path by replacing placeholders 343 let targetPath = rule.to; 344 + 345 + // Replace captured parameters (with URL encoding) 346 if (rule.fromParams && match.length > 1) { 347 for (let i = 0; i < rule.fromParams.length; i++) { 348 const paramName = rule.fromParams[i]; 349 const paramValue = match[i + 1]; 350 + 351 if (!paramName || !paramValue) continue; 352 + 353 + // URL encode captured values to prevent invalid URLs 354 + const encodedValue = encodeURIComponent(paramValue); 355 + 356 if (paramName === 'splat') { 357 + // For splats, preserve slashes by re-decoding them 358 + const splatValue = encodedValue.replace(/%2F/g, '/'); 359 + targetPath = targetPath.replace(':splat', splatValue); 360 } else { 361 + targetPath = targetPath.replace(`:${paramName}`, encodedValue); 362 } 363 } 364 } 365 366 + // Handle query parameter replacements (with URL encoding) 367 if (rule.queryParams && context?.queryParams) { 368 for (const [key, placeholder] of Object.entries(rule.queryParams)) { 369 const actualValue = context.queryParams[key]; 370 if (actualValue && placeholder && placeholder.startsWith(':')) { 371 const paramName = placeholder.slice(1); 372 if (paramName) { 373 + // URL encode query parameter values 374 + const encodedValue = encodeURIComponent(actualValue); 375 + targetPath = targetPath.replace(`:${paramName}`, encodedValue); 376 } 377 } 378 }
+6
src/lib/wisp-utils.test.ts
··· 58 expect(shouldCompressFile('text/plain')).toBe(true) 59 }) 60 61 test('should NOT compress images', () => { 62 expect(shouldCompressFile('image/png')).toBe(false) 63 expect(shouldCompressFile('image/jpeg')).toBe(false)
··· 58 expect(shouldCompressFile('text/plain')).toBe(true) 59 }) 60 61 + test('should NOT compress _redirects file', () => { 62 + expect(shouldCompressFile('text/plain', '_redirects')).toBe(false) 63 + expect(shouldCompressFile('text/plain', 'folder/_redirects')).toBe(false) 64 + expect(shouldCompressFile('application/octet-stream', '_redirects')).toBe(false) 65 + }) 66 + 67 test('should NOT compress images', () => { 68 expect(shouldCompressFile('image/png')).toBe(false) 69 expect(shouldCompressFile('image/jpeg')).toBe(false)
+7 -2
src/lib/wisp-utils.ts
··· 32 } 33 34 /** 35 - * Determine if a file should be gzip compressed based on its MIME type 36 */ 37 - export function shouldCompressFile(mimeType: string): boolean { 38 // Compress text-based files and uncompressed audio formats 39 const compressibleTypes = [ 40 'text/html',
··· 32 } 33 34 /** 35 + * Determine if a file should be gzip compressed based on its MIME type and filename 36 */ 37 + export function shouldCompressFile(mimeType: string, fileName?: string): boolean { 38 + // Never compress _redirects file - it needs to be plain text for the hosting service 39 + if (fileName && (fileName.endsWith('/_redirects') || fileName === '_redirects')) { 40 + return false; 41 + } 42 + 43 // Compress text-based files and uncompressed audio formats 44 const compressibleTypes = [ 45 'text/html',
+2 -2
src/routes/wisp.ts
··· 191 const originalContent = Buffer.from(arrayBuffer); 192 const originalMimeType = file.type || 'application/octet-stream'; 193 194 - // Determine if file should be compressed 195 - const shouldCompress = shouldCompressFile(originalMimeType); 196 197 // Text files (HTML/CSS/JS) need base64 encoding to prevent PDS content sniffing 198 // Audio files just need compression without base64
··· 191 const originalContent = Buffer.from(arrayBuffer); 192 const originalMimeType = file.type || 'application/octet-stream'; 193 194 + // Determine if file should be compressed (pass filename to exclude _redirects) 195 + const shouldCompress = shouldCompressFile(originalMimeType, normalizedPath); 196 197 // Text files (HTML/CSS/JS) need base64 encoding to prevent PDS content sniffing 198 // Audio files just need compression without base64