Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol.

fix htmlrewrite

+82 -11
+2
apps/firehose-service/src/lib/revalidate-worker.ts
··· 51 51 52 52 // For storage-miss events, force re-download all files since storage is empty 53 53 const forceDownload = reason.startsWith('storage-miss'); 54 + const forceRewriteHtml = reason.startsWith('rewrite-miss'); 54 55 55 56 await handleSiteCreateOrUpdate(did, rkey, record.record, record.cid, { 56 57 skipInvalidation: true, 57 58 forceDownload, 59 + forceRewriteHtml, 58 60 }); 59 61 60 62 logger.info(`[Revalidate] Completed ${id}: ${did}/${rkey}`);
+80 -11
apps/hosting-service/src/lib/file-serving.ts
··· 4 4 */ 5 5 6 6 import { lookup } from 'mime-types'; 7 - import { gunzipSync } from 'zlib'; 7 + import { gunzipSync, gzipSync } from 'zlib'; 8 8 import type { Record as WispSettings } from '@wispplace/lexicons/types/place/wisp/settings'; 9 9 import { shouldCompressMimeType } from '@wispplace/atproto-utils/compression'; 10 10 import { getCachedSettings } from './utils'; 11 11 import { loadRedirectRules, matchRedirectRule, parseCookies, parseQueryString } from './redirects'; 12 - import { isHtmlContent } from './html-rewriter'; 12 + import { isHtmlContent, rewriteHtmlPaths } from './html-rewriter'; 13 13 import { generate404Page, generateDirectoryListing } from './page-generators'; 14 14 import { getIndexFiles, applyCustomHeaders } from './request-utils'; 15 15 import { cache } from './cache-manager'; ··· 26 26 const logger = createLogger('file-serving'); 27 27 28 28 type FileStorageResult = StorageResult<Uint8Array>; 29 + type FileForRequestResult = { result: FileStorageResult; filePath: string; wasRewritten: boolean }; 29 30 30 31 /** 31 32 * Check if the last segment of a path looks like it has a file extension. ··· 105 106 rkey: string, 106 107 filePath: string, 107 108 preferRewrittenHtml: boolean 108 - ): Promise<{ result: FileStorageResult; filePath: string } | null> { 109 + ): Promise<FileForRequestResult | null> { 109 110 const mimeTypeGuess = lookup(filePath) || 'application/octet-stream'; 110 111 if (preferRewrittenHtml && isHtmlContent(filePath, mimeTypeGuess)) { 111 112 const rewrittenPath = `.rewritten/${filePath}`; 112 113 const rewritten = await getFileWithMetadata(did, rkey, rewrittenPath); 113 114 if (rewritten) { 114 - return { result: rewritten, filePath }; 115 + return { result: rewritten, filePath, wasRewritten: true }; 115 116 } 116 117 } 117 118 118 119 const result = await getFileWithMetadata(did, rkey, filePath); 119 120 if (!result) return null; 120 - return { result, filePath }; 121 + return { result, filePath, wasRewritten: false }; 121 122 } 122 123 123 124 function buildResponseFromStorageResult( ··· 178 179 179 180 applyCustomHeaders(headers, filePath, settings); 180 181 return new Response(content, { headers }); 182 + } 183 + 184 + function buildRewrittenHtmlResponse( 185 + result: FileStorageResult, 186 + filePath: string, 187 + basePath: string, 188 + settings: WispSettings | null, 189 + requestHeaders?: Record<string, string> 190 + ): Response { 191 + try { 192 + const content = Buffer.from(result.data); 193 + const meta = result.metadata.customMetadata as { encoding?: string; mimeType?: string } | undefined; 194 + const mimeType = meta?.mimeType || lookup(filePath) || 'application/octet-stream'; 195 + const cacheControl = mimeType.startsWith('text/html') 196 + ? 'public, max-age=300' 197 + : 'public, max-age=31536000, immutable'; 198 + 199 + const headers: Record<string, string> = { 200 + 'Content-Type': mimeType, 201 + 'Cache-Control': cacheControl, 202 + 'X-Cache-Tier': result.source, 203 + }; 204 + 205 + const hasGzipMagic = content.length >= 2 && content[0] === 0x1f && content[1] === 0x8b; 206 + let decoded = content; 207 + if (meta?.encoding === 'gzip') { 208 + if (hasGzipMagic) { 209 + decoded = gunzipSync(content); 210 + } else { 211 + logger.warn(`File marked as gzipped but lacks magic bytes, serving original`, { filePath }); 212 + applyCustomHeaders(headers, filePath, settings); 213 + return new Response(content, { headers }); 214 + } 215 + } else if (hasGzipMagic && shouldCompressMimeType(mimeType)) { 216 + // Heuristic: treat as gzipped text content even if encoding metadata is missing 217 + decoded = gunzipSync(content); 218 + } 219 + 220 + const htmlString = new TextDecoder().decode(decoded); 221 + const rewritten = rewriteHtmlPaths(htmlString, basePath, filePath); 222 + let output = new TextEncoder().encode(rewritten); 223 + 224 + const shouldServeCompressed = shouldCompressMimeType(mimeType); 225 + const acceptEncoding = requestHeaders?.['accept-encoding'] ?? ''; 226 + const clientAcceptsGzip = acceptEncoding.includes('gzip'); 227 + if (clientAcceptsGzip && shouldServeCompressed) { 228 + output = gzipSync(output); 229 + headers['Content-Encoding'] = 'gzip'; 230 + } 231 + 232 + applyCustomHeaders(headers, filePath, settings); 233 + return new Response(output, { headers }); 234 + } catch (err) { 235 + logger.warn('Failed to rewrite HTML on demand, serving original', { filePath, error: err }); 236 + return buildResponseFromStorageResult(result, filePath, settings, requestHeaders); 237 + } 181 238 } 182 239 183 240 /** ··· 626 683 }; 627 684 628 685 const indexFiles = getIndexFiles(settings); 686 + const buildResponse = (fileResult: FileForRequestResult): Response => { 687 + const meta = fileResult.result.metadata.customMetadata as { encoding?: string; mimeType?: string } | undefined; 688 + const mimeType = meta?.mimeType || lookup(fileResult.filePath) || 'application/octet-stream'; 689 + const needsRewrite = !fileResult.wasRewritten && isHtmlContent(fileResult.filePath, mimeType); 690 + 691 + if (needsRewrite) { 692 + void enqueueRevalidate(did, rkey, `rewrite-miss:${fileResult.filePath}`); 693 + return buildRewrittenHtmlResponse(fileResult.result, fileResult.filePath, basePath, settings, requestHeaders); 694 + } 695 + 696 + return buildResponseFromStorageResult(fileResult.result, fileResult.filePath, settings, requestHeaders); 697 + }; 629 698 630 699 // Normalize the request path (keep empty for root, remove trailing slash for others) 631 700 let requestPath = filePath || ''; ··· 639 708 if (requestPath) { 640 709 const directResult = await span(trace, `storage:${requestPath}`, () => getFileForRequest(did, rkey, requestPath, true)); 641 710 if (directResult) { 642 - return buildResponseFromStorageResult(directResult.result, requestPath, settings, requestHeaders); 711 + return buildResponse(directResult); 643 712 } 644 713 await markExpectedMiss(requestPath); 645 714 } ··· 648 717 const indexPath = requestPath ? `${requestPath}/${indexFile}` : indexFile; 649 718 const fileResult = await span(trace, `storage:${indexPath}`, () => getFileForRequest(did, rkey, indexPath, true)); 650 719 if (fileResult) { 651 - return buildResponseFromStorageResult(fileResult.result, indexPath, settings, requestHeaders); 720 + return buildResponse(fileResult); 652 721 } 653 722 await markExpectedMiss(indexPath); 654 723 } ··· 676 745 677 746 const fileResult = await span(trace, `storage:${fileRequestPath}`, () => getFileForRequest(did, rkey, fileRequestPath, true)); 678 747 if (fileResult) { 679 - return buildResponseFromStorageResult(fileResult.result, fileRequestPath, settings, requestHeaders); 748 + return buildResponse(fileResult); 680 749 } 681 750 await markExpectedMiss(fileRequestPath); 682 751 ··· 686 755 const indexPath = fileRequestPath ? `${fileRequestPath}/${indexFileName}` : indexFileName; 687 756 const indexResult = await span(trace, `storage:${indexPath}`, () => getFileForRequest(did, rkey, indexPath, true)); 688 757 if (indexResult) { 689 - return buildResponseFromStorageResult(indexResult.result, indexPath, settings, requestHeaders); 758 + return buildResponse(indexResult); 690 759 } 691 760 await markExpectedMiss(indexPath); 692 761 } ··· 775 844 if (success) { 776 845 // Retry serving the originally requested file 777 846 const retryPath = filePath || indexFiles[0] || 'index.html'; 778 - const retryResult = await span(trace, `storage:${retryPath}`, () => getFileWithMetadata(did, rkey, retryPath)); 847 + const retryResult = await span(trace, `storage:${retryPath}`, () => getFileForRequest(did, rkey, retryPath, true)); 779 848 if (retryResult) { 780 - return buildResponseFromStorageResult(retryResult, retryPath, settings, requestHeaders); 849 + return buildResponse(retryResult); 781 850 } 782 851 } 783 852 }