tangled
alpha
login
or
join now
ptr.pet
/
wisp.place-monorepo
forked from
nekomimi.pet/wisp.place-monorepo
0
fork
atom
Monorepo for Wisp.place. A static site hosting service built on top of the AT Protocol.
0
fork
atom
overview
issues
pulls
pipelines
update dockerfiles, solidify html path rewriting
@nekomimi.pet
4 months ago
25d89b8d
fbbc9a83
+915
-326
6 changed files
expand all
collapse all
unified
split
.dockerignore
Dockerfile
hosting-service
src
lib
firehose.ts
html-rewriter.test.ts
html-rewriter.ts
server.ts
+8
.dockerignore
···
9
9
*.log
10
10
.vscode
11
11
.idea
12
12
+
server
13
13
+
.prettierrc
14
14
+
testDeploy
15
15
+
.tangled
16
16
+
.crush
17
17
+
.claude
18
18
+
server
19
19
+
hosting-service
+10
-6
Dockerfile
···
15
15
COPY public ./public
16
16
17
17
# Build the application (if needed)
18
18
-
# RUN bun run build
18
18
+
RUN bun build \
19
19
+
--compile \
20
20
+
--minify \
21
21
+
--outfile server \
22
22
+
src/index.ts
23
23
+
24
24
+
FROM scratch AS runtime
25
25
+
WORKDIR /app
26
26
+
COPY --from=base /app/server /app/server
19
27
20
28
# Set environment variables (can be overridden at runtime)
21
29
ENV PORT=3000
···
24
32
# Expose the application port
25
33
EXPOSE 3000
26
34
27
27
-
# Health check
28
28
-
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
29
29
-
CMD bun -e "fetch('http://localhost:3000/health').then(r => r.ok ? process.exit(0) : process.exit(1)).catch(() => process.exit(1))"
30
30
-
31
35
# Start the application
32
32
-
CMD ["bun", "src/index.ts"]
36
36
+
CMD ["./server"]
+259
-219
hosting-service/src/lib/firehose.ts
···
1
1
-
import { existsSync, rmSync } from 'fs';
2
2
-
import { getPdsForDid, downloadAndCacheSite, extractBlobCid, fetchSiteRecord } from './utils';
3
3
-
import { upsertSite, tryAcquireLock, releaseLock } from './db';
4
4
-
import { safeFetch } from './safe-fetch';
5
5
-
import { isRecord, validateRecord } from '../lexicon/types/place/wisp/fs';
6
6
-
import { Firehose } from '@atproto/sync';
7
7
-
import { IdResolver } from '@atproto/identity';
1
1
+
import { existsSync, rmSync } from 'fs'
2
2
+
import {
3
3
+
getPdsForDid,
4
4
+
downloadAndCacheSite,
5
5
+
extractBlobCid,
6
6
+
fetchSiteRecord
7
7
+
} from './utils'
8
8
+
import { upsertSite, tryAcquireLock, releaseLock } from './db'
9
9
+
import { safeFetch } from './safe-fetch'
10
10
+
import { isRecord, validateRecord } from '../lexicon/types/place/wisp/fs'
11
11
+
import { Firehose } from '@atproto/sync'
12
12
+
import { IdResolver } from '@atproto/identity'
8
13
9
9
-
const CACHE_DIR = './cache/sites';
14
14
+
const CACHE_DIR = './cache/sites'
10
15
11
16
export class FirehoseWorker {
12
12
-
private firehose: Firehose | null = null;
13
13
-
private idResolver: IdResolver;
14
14
-
private isShuttingDown = false;
15
15
-
private lastEventTime = Date.now();
17
17
+
private firehose: Firehose | null = null
18
18
+
private idResolver: IdResolver
19
19
+
private isShuttingDown = false
20
20
+
private lastEventTime = Date.now()
16
21
17
17
-
constructor(
18
18
-
private logger?: (msg: string, data?: Record<string, unknown>) => void,
19
19
-
) {
20
20
-
this.idResolver = new IdResolver();
21
21
-
}
22
22
+
constructor(
23
23
+
private logger?: (msg: string, data?: Record<string, unknown>) => void
24
24
+
) {
25
25
+
this.idResolver = new IdResolver()
26
26
+
}
22
27
23
23
-
private log(msg: string, data?: Record<string, unknown>) {
24
24
-
const log = this.logger || console.log;
25
25
-
log(`[FirehoseWorker] ${msg}`, data || {});
26
26
-
}
28
28
+
private log(msg: string, data?: Record<string, unknown>) {
29
29
+
const log = this.logger || console.log
30
30
+
log(`[FirehoseWorker] ${msg}`, data || {})
31
31
+
}
27
32
28
28
-
start() {
29
29
-
this.log('Starting firehose worker');
30
30
-
this.connect();
31
31
-
}
33
33
+
start() {
34
34
+
this.log('Starting firehose worker')
35
35
+
this.connect()
36
36
+
}
32
37
33
33
-
stop() {
34
34
-
this.log('Stopping firehose worker');
35
35
-
this.isShuttingDown = true;
38
38
+
stop() {
39
39
+
this.log('Stopping firehose worker')
40
40
+
this.isShuttingDown = true
36
41
37
37
-
if (this.firehose) {
38
38
-
this.firehose.destroy();
39
39
-
this.firehose = null;
40
40
-
}
41
41
-
}
42
42
+
if (this.firehose) {
43
43
+
this.firehose.destroy()
44
44
+
this.firehose = null
45
45
+
}
46
46
+
}
42
47
43
43
-
private connect() {
44
44
-
if (this.isShuttingDown) return;
48
48
+
private connect() {
49
49
+
if (this.isShuttingDown) return
45
50
46
46
-
this.log('Connecting to AT Protocol firehose');
51
51
+
this.log('Connecting to AT Protocol firehose')
47
52
48
48
-
this.firehose = new Firehose({
49
49
-
idResolver: this.idResolver,
50
50
-
service: 'wss://bsky.network',
51
51
-
filterCollections: ['place.wisp.fs'],
52
52
-
handleEvent: async (evt: any) => {
53
53
-
this.lastEventTime = Date.now();
53
53
+
this.firehose = new Firehose({
54
54
+
idResolver: this.idResolver,
55
55
+
service: 'wss://bsky.network',
56
56
+
filterCollections: ['place.wisp.fs'],
57
57
+
handleEvent: async (evt: any) => {
58
58
+
this.lastEventTime = Date.now()
54
59
55
55
-
// Watch for write events
56
56
-
if (evt.event === 'create' || evt.event === 'update') {
57
57
-
const record = evt.record;
60
60
+
// Watch for write events
61
61
+
if (evt.event === 'create' || evt.event === 'update') {
62
62
+
const record = evt.record
58
63
59
59
-
// If the write is a valid place.wisp.fs record
60
60
-
if (
61
61
-
evt.collection === 'place.wisp.fs' &&
62
62
-
isRecord(record) &&
63
63
-
validateRecord(record).success
64
64
-
) {
65
65
-
this.log('Received place.wisp.fs event', {
66
66
-
did: evt.did,
67
67
-
event: evt.event,
68
68
-
rkey: evt.rkey,
69
69
-
});
64
64
+
// If the write is a valid place.wisp.fs record
65
65
+
if (
66
66
+
evt.collection === 'place.wisp.fs' &&
67
67
+
isRecord(record) &&
68
68
+
validateRecord(record).success
69
69
+
) {
70
70
+
this.log('Received place.wisp.fs event', {
71
71
+
did: evt.did,
72
72
+
event: evt.event,
73
73
+
rkey: evt.rkey
74
74
+
})
70
75
71
71
-
try {
72
72
-
await this.handleCreateOrUpdate(evt.did, evt.rkey, record, evt.cid?.toString());
73
73
-
} catch (err) {
74
74
-
this.log('Error handling event', {
75
75
-
did: evt.did,
76
76
-
event: evt.event,
77
77
-
rkey: evt.rkey,
78
78
-
error: err instanceof Error ? err.message : String(err),
79
79
-
});
80
80
-
}
81
81
-
}
82
82
-
} else if (evt.event === 'delete' && evt.collection === 'place.wisp.fs') {
83
83
-
this.log('Received delete event', {
84
84
-
did: evt.did,
85
85
-
rkey: evt.rkey,
86
86
-
});
76
76
+
try {
77
77
+
await this.handleCreateOrUpdate(
78
78
+
evt.did,
79
79
+
evt.rkey,
80
80
+
record,
81
81
+
evt.cid?.toString()
82
82
+
)
83
83
+
} catch (err) {
84
84
+
this.log('Error handling event', {
85
85
+
did: evt.did,
86
86
+
event: evt.event,
87
87
+
rkey: evt.rkey,
88
88
+
error:
89
89
+
err instanceof Error
90
90
+
? err.message
91
91
+
: String(err)
92
92
+
})
93
93
+
}
94
94
+
}
95
95
+
} else if (
96
96
+
evt.event === 'delete' &&
97
97
+
evt.collection === 'place.wisp.fs'
98
98
+
) {
99
99
+
this.log('Received delete event', {
100
100
+
did: evt.did,
101
101
+
rkey: evt.rkey
102
102
+
})
87
103
88
88
-
try {
89
89
-
await this.handleDelete(evt.did, evt.rkey);
90
90
-
} catch (err) {
91
91
-
this.log('Error handling delete', {
92
92
-
did: evt.did,
93
93
-
rkey: evt.rkey,
94
94
-
error: err instanceof Error ? err.message : String(err),
95
95
-
});
96
96
-
}
97
97
-
}
98
98
-
},
99
99
-
onError: (err: any) => {
100
100
-
this.log('Firehose error', {
101
101
-
error: err instanceof Error ? err.message : String(err),
102
102
-
stack: err instanceof Error ? err.stack : undefined,
103
103
-
fullError: err,
104
104
-
});
105
105
-
console.error('Full firehose error:', err);
106
106
-
},
107
107
-
});
104
104
+
try {
105
105
+
await this.handleDelete(evt.did, evt.rkey)
106
106
+
} catch (err) {
107
107
+
this.log('Error handling delete', {
108
108
+
did: evt.did,
109
109
+
rkey: evt.rkey,
110
110
+
error:
111
111
+
err instanceof Error ? err.message : String(err)
112
112
+
})
113
113
+
}
114
114
+
}
115
115
+
},
116
116
+
onError: (err: any) => {
117
117
+
this.log('Firehose error', {
118
118
+
error: err instanceof Error ? err.message : String(err),
119
119
+
stack: err instanceof Error ? err.stack : undefined,
120
120
+
fullError: err
121
121
+
})
122
122
+
console.error('Full firehose error:', err)
123
123
+
}
124
124
+
})
108
125
109
109
-
this.firehose.start();
110
110
-
this.log('Firehose started');
111
111
-
}
126
126
+
this.firehose.start()
127
127
+
this.log('Firehose started')
128
128
+
}
112
129
113
113
-
private async handleCreateOrUpdate(did: string, site: string, record: any, eventCid?: string) {
114
114
-
this.log('Processing create/update', { did, site });
130
130
+
private async handleCreateOrUpdate(
131
131
+
did: string,
132
132
+
site: string,
133
133
+
record: any,
134
134
+
eventCid?: string
135
135
+
) {
136
136
+
this.log('Processing create/update', { did, site })
115
137
116
116
-
// Record is already validated in handleEvent
117
117
-
const fsRecord = record;
138
138
+
// Record is already validated in handleEvent
139
139
+
const fsRecord = record
118
140
119
119
-
const pdsEndpoint = await getPdsForDid(did);
120
120
-
if (!pdsEndpoint) {
121
121
-
this.log('Could not resolve PDS for DID', { did });
122
122
-
return;
123
123
-
}
141
141
+
const pdsEndpoint = await getPdsForDid(did)
142
142
+
if (!pdsEndpoint) {
143
143
+
this.log('Could not resolve PDS for DID', { did })
144
144
+
return
145
145
+
}
124
146
125
125
-
this.log('Resolved PDS', { did, pdsEndpoint });
147
147
+
this.log('Resolved PDS', { did, pdsEndpoint })
126
148
127
127
-
// Verify record exists on PDS and fetch its CID
128
128
-
let verifiedCid: string;
129
129
-
try {
130
130
-
const result = await fetchSiteRecord(did, site);
149
149
+
// Verify record exists on PDS and fetch its CID
150
150
+
let verifiedCid: string
151
151
+
try {
152
152
+
const result = await fetchSiteRecord(did, site)
131
153
132
132
-
if (!result) {
133
133
-
this.log('Record not found on PDS, skipping cache', { did, site });
134
134
-
return;
135
135
-
}
154
154
+
if (!result) {
155
155
+
this.log('Record not found on PDS, skipping cache', {
156
156
+
did,
157
157
+
site
158
158
+
})
159
159
+
return
160
160
+
}
136
161
137
137
-
verifiedCid = result.cid;
162
162
+
verifiedCid = result.cid
138
163
139
139
-
// Verify event CID matches PDS CID (prevent cache poisoning)
140
140
-
if (eventCid && eventCid !== verifiedCid) {
141
141
-
this.log('CID mismatch detected - potential spoofed event', {
142
142
-
did,
143
143
-
site,
144
144
-
eventCid,
145
145
-
verifiedCid
146
146
-
});
147
147
-
return;
148
148
-
}
164
164
+
// Verify event CID matches PDS CID (prevent cache poisoning)
165
165
+
if (eventCid && eventCid !== verifiedCid) {
166
166
+
this.log('CID mismatch detected - potential spoofed event', {
167
167
+
did,
168
168
+
site,
169
169
+
eventCid,
170
170
+
verifiedCid
171
171
+
})
172
172
+
return
173
173
+
}
149
174
150
150
-
this.log('Record verified on PDS', { did, site, cid: verifiedCid });
151
151
-
} catch (err) {
152
152
-
this.log('Failed to verify record on PDS', {
153
153
-
did,
154
154
-
site,
155
155
-
error: err instanceof Error ? err.message : String(err),
156
156
-
});
157
157
-
return;
158
158
-
}
175
175
+
this.log('Record verified on PDS', { did, site, cid: verifiedCid })
176
176
+
} catch (err) {
177
177
+
this.log('Failed to verify record on PDS', {
178
178
+
did,
179
179
+
site,
180
180
+
error: err instanceof Error ? err.message : String(err)
181
181
+
})
182
182
+
return
183
183
+
}
159
184
160
160
-
// Cache the record with verified CID (uses atomic swap internally)
161
161
-
// All instances cache locally for edge serving
162
162
-
await downloadAndCacheSite(did, site, fsRecord, pdsEndpoint, verifiedCid);
185
185
+
// Cache the record with verified CID (uses atomic swap internally)
186
186
+
// All instances cache locally for edge serving
187
187
+
await downloadAndCacheSite(
188
188
+
did,
189
189
+
site,
190
190
+
fsRecord,
191
191
+
pdsEndpoint,
192
192
+
verifiedCid
193
193
+
)
163
194
164
164
-
// Acquire distributed lock only for database write to prevent duplicate writes
165
165
-
const lockKey = `db:upsert:${did}:${site}`;
166
166
-
const lockAcquired = await tryAcquireLock(lockKey);
195
195
+
// Acquire distributed lock only for database write to prevent duplicate writes
196
196
+
const lockKey = `db:upsert:${did}:${site}`
197
197
+
const lockAcquired = await tryAcquireLock(lockKey)
167
198
168
168
-
if (!lockAcquired) {
169
169
-
this.log('Another instance is writing to DB, skipping upsert', { did, site });
170
170
-
this.log('Successfully processed create/update (cached locally)', { did, site });
171
171
-
return;
172
172
-
}
199
199
+
if (!lockAcquired) {
200
200
+
this.log('Another instance is writing to DB, skipping upsert', {
201
201
+
did,
202
202
+
site
203
203
+
})
204
204
+
this.log('Successfully processed create/update (cached locally)', {
205
205
+
did,
206
206
+
site
207
207
+
})
208
208
+
return
209
209
+
}
173
210
174
174
-
try {
175
175
-
// Upsert site to database (only one instance does this)
176
176
-
await upsertSite(did, site, fsRecord.site);
177
177
-
this.log('Successfully processed create/update (cached + DB updated)', { did, site });
178
178
-
} finally {
179
179
-
// Always release lock, even if DB write fails
180
180
-
await releaseLock(lockKey);
181
181
-
}
182
182
-
}
211
211
+
try {
212
212
+
// Upsert site to database (only one instance does this)
213
213
+
await upsertSite(did, site, fsRecord.site)
214
214
+
this.log(
215
215
+
'Successfully processed create/update (cached + DB updated)',
216
216
+
{ did, site }
217
217
+
)
218
218
+
} finally {
219
219
+
// Always release lock, even if DB write fails
220
220
+
await releaseLock(lockKey)
221
221
+
}
222
222
+
}
183
223
184
184
-
private async handleDelete(did: string, site: string) {
185
185
-
this.log('Processing delete', { did, site });
224
224
+
private async handleDelete(did: string, site: string) {
225
225
+
this.log('Processing delete', { did, site })
186
226
187
187
-
// All instances should delete their local cache (no lock needed)
188
188
-
const pdsEndpoint = await getPdsForDid(did);
189
189
-
if (!pdsEndpoint) {
190
190
-
this.log('Could not resolve PDS for DID', { did });
191
191
-
return;
192
192
-
}
227
227
+
// All instances should delete their local cache (no lock needed)
228
228
+
const pdsEndpoint = await getPdsForDid(did)
229
229
+
if (!pdsEndpoint) {
230
230
+
this.log('Could not resolve PDS for DID', { did })
231
231
+
return
232
232
+
}
193
233
194
194
-
// Verify record is actually deleted from PDS
195
195
-
try {
196
196
-
const recordUrl = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(site)}`;
197
197
-
const recordRes = await safeFetch(recordUrl);
234
234
+
// Verify record is actually deleted from PDS
235
235
+
try {
236
236
+
const recordUrl = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(site)}`
237
237
+
const recordRes = await safeFetch(recordUrl)
198
238
199
199
-
if (recordRes.ok) {
200
200
-
this.log('Record still exists on PDS, not deleting cache', {
201
201
-
did,
202
202
-
site,
203
203
-
});
204
204
-
return;
205
205
-
}
239
239
+
if (recordRes.ok) {
240
240
+
this.log('Record still exists on PDS, not deleting cache', {
241
241
+
did,
242
242
+
site
243
243
+
})
244
244
+
return
245
245
+
}
206
246
207
207
-
this.log('Verified record is deleted from PDS', {
208
208
-
did,
209
209
-
site,
210
210
-
status: recordRes.status,
211
211
-
});
212
212
-
} catch (err) {
213
213
-
this.log('Error verifying deletion on PDS', {
214
214
-
did,
215
215
-
site,
216
216
-
error: err instanceof Error ? err.message : String(err),
217
217
-
});
218
218
-
}
247
247
+
this.log('Verified record is deleted from PDS', {
248
248
+
did,
249
249
+
site,
250
250
+
status: recordRes.status
251
251
+
})
252
252
+
} catch (err) {
253
253
+
this.log('Error verifying deletion on PDS', {
254
254
+
did,
255
255
+
site,
256
256
+
error: err instanceof Error ? err.message : String(err)
257
257
+
})
258
258
+
}
219
259
220
220
-
// Delete cache
221
221
-
this.deleteCache(did, site);
260
260
+
// Delete cache
261
261
+
this.deleteCache(did, site)
222
262
223
223
-
this.log('Successfully processed delete', { did, site });
224
224
-
}
263
263
+
this.log('Successfully processed delete', { did, site })
264
264
+
}
225
265
226
226
-
private deleteCache(did: string, site: string) {
227
227
-
const cacheDir = `${CACHE_DIR}/${did}/${site}`;
266
266
+
private deleteCache(did: string, site: string) {
267
267
+
const cacheDir = `${CACHE_DIR}/${did}/${site}`
228
268
229
229
-
if (!existsSync(cacheDir)) {
230
230
-
this.log('Cache directory does not exist, nothing to delete', {
231
231
-
did,
232
232
-
site,
233
233
-
});
234
234
-
return;
235
235
-
}
269
269
+
if (!existsSync(cacheDir)) {
270
270
+
this.log('Cache directory does not exist, nothing to delete', {
271
271
+
did,
272
272
+
site
273
273
+
})
274
274
+
return
275
275
+
}
236
276
237
237
-
try {
238
238
-
rmSync(cacheDir, { recursive: true, force: true });
239
239
-
this.log('Cache deleted', { did, site, path: cacheDir });
240
240
-
} catch (err) {
241
241
-
this.log('Failed to delete cache', {
242
242
-
did,
243
243
-
site,
244
244
-
path: cacheDir,
245
245
-
error: err instanceof Error ? err.message : String(err),
246
246
-
});
247
247
-
}
248
248
-
}
277
277
+
try {
278
278
+
rmSync(cacheDir, { recursive: true, force: true })
279
279
+
this.log('Cache deleted', { did, site, path: cacheDir })
280
280
+
} catch (err) {
281
281
+
this.log('Failed to delete cache', {
282
282
+
did,
283
283
+
site,
284
284
+
path: cacheDir,
285
285
+
error: err instanceof Error ? err.message : String(err)
286
286
+
})
287
287
+
}
288
288
+
}
249
289
250
250
-
getHealth() {
251
251
-
const isConnected = this.firehose !== null;
252
252
-
const timeSinceLastEvent = Date.now() - this.lastEventTime;
290
290
+
getHealth() {
291
291
+
const isConnected = this.firehose !== null
292
292
+
const timeSinceLastEvent = Date.now() - this.lastEventTime
253
293
254
254
-
return {
255
255
-
connected: isConnected,
256
256
-
lastEventTime: this.lastEventTime,
257
257
-
timeSinceLastEvent,
258
258
-
healthy: isConnected && timeSinceLastEvent < 300000, // 5 minutes
259
259
-
};
260
260
-
}
294
294
+
return {
295
295
+
connected: isConnected,
296
296
+
lastEventTime: this.lastEventTime,
297
297
+
timeSinceLastEvent,
298
298
+
healthy: isConnected && timeSinceLastEvent < 300000 // 5 minutes
299
299
+
}
300
300
+
}
261
301
}
+457
hosting-service/src/lib/html-rewriter.test.ts
···
1
1
+
import { describe, test, expect } from 'bun:test'
2
2
+
import { rewriteHtmlPaths, isHtmlContent } from './html-rewriter'
3
3
+
4
4
+
describe('rewriteHtmlPaths', () => {
5
5
+
const basePath = '/identifier/site/'
6
6
+
7
7
+
describe('absolute paths', () => {
8
8
+
test('rewrites absolute paths with leading slash', () => {
9
9
+
const html = '<img src="/image.png">'
10
10
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
11
11
+
expect(result).toBe('<img src="/identifier/site/image.png">')
12
12
+
})
13
13
+
14
14
+
test('rewrites nested absolute paths', () => {
15
15
+
const html = '<link href="/css/style.css">'
16
16
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
17
17
+
expect(result).toBe('<link href="/identifier/site/css/style.css">')
18
18
+
})
19
19
+
})
20
20
+
21
21
+
describe('relative paths from root document', () => {
22
22
+
test('rewrites relative paths with ./ prefix', () => {
23
23
+
const html = '<img src="./image.png">'
24
24
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
25
25
+
expect(result).toBe('<img src="/identifier/site/image.png">')
26
26
+
})
27
27
+
28
28
+
test('rewrites relative paths without prefix', () => {
29
29
+
const html = '<img src="image.png">'
30
30
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
31
31
+
expect(result).toBe('<img src="/identifier/site/image.png">')
32
32
+
})
33
33
+
34
34
+
test('rewrites relative paths with ../ (should stay at root)', () => {
35
35
+
const html = '<img src="../image.png">'
36
36
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
37
37
+
expect(result).toBe('<img src="/identifier/site/image.png">')
38
38
+
})
39
39
+
})
40
40
+
41
41
+
describe('relative paths from nested documents', () => {
42
42
+
test('rewrites relative path from nested document', () => {
43
43
+
const html = '<img src="./photo.jpg">'
44
44
+
const result = rewriteHtmlPaths(
45
45
+
html,
46
46
+
basePath,
47
47
+
'folder1/folder2/index.html'
48
48
+
)
49
49
+
expect(result).toBe(
50
50
+
'<img src="/identifier/site/folder1/folder2/photo.jpg">'
51
51
+
)
52
52
+
})
53
53
+
54
54
+
test('rewrites plain filename from nested document', () => {
55
55
+
const html = '<script src="app.js"></script>'
56
56
+
const result = rewriteHtmlPaths(
57
57
+
html,
58
58
+
basePath,
59
59
+
'folder1/folder2/index.html'
60
60
+
)
61
61
+
expect(result).toBe(
62
62
+
'<script src="/identifier/site/folder1/folder2/app.js"></script>'
63
63
+
)
64
64
+
})
65
65
+
66
66
+
test('rewrites ../ to go up one level', () => {
67
67
+
const html = '<img src="../image.png">'
68
68
+
const result = rewriteHtmlPaths(
69
69
+
html,
70
70
+
basePath,
71
71
+
'folder1/folder2/folder3/index.html'
72
72
+
)
73
73
+
expect(result).toBe(
74
74
+
'<img src="/identifier/site/folder1/folder2/image.png">'
75
75
+
)
76
76
+
})
77
77
+
78
78
+
test('rewrites multiple ../ to go up multiple levels', () => {
79
79
+
const html = '<link href="../../css/style.css">'
80
80
+
const result = rewriteHtmlPaths(
81
81
+
html,
82
82
+
basePath,
83
83
+
'folder1/folder2/folder3/index.html'
84
84
+
)
85
85
+
expect(result).toBe(
86
86
+
'<link href="/identifier/site/folder1/css/style.css">'
87
87
+
)
88
88
+
})
89
89
+
90
90
+
test('rewrites ../ with additional path segments', () => {
91
91
+
const html = '<img src="../assets/logo.png">'
92
92
+
const result = rewriteHtmlPaths(
93
93
+
html,
94
94
+
basePath,
95
95
+
'pages/about/index.html'
96
96
+
)
97
97
+
expect(result).toBe(
98
98
+
'<img src="/identifier/site/pages/assets/logo.png">'
99
99
+
)
100
100
+
})
101
101
+
102
102
+
test('handles complex nested relative paths', () => {
103
103
+
const html = '<script src="../../lib/vendor/jquery.js"></script>'
104
104
+
const result = rewriteHtmlPaths(
105
105
+
html,
106
106
+
basePath,
107
107
+
'pages/blog/post/index.html'
108
108
+
)
109
109
+
expect(result).toBe(
110
110
+
'<script src="/identifier/site/pages/lib/vendor/jquery.js"></script>'
111
111
+
)
112
112
+
})
113
113
+
114
114
+
test('handles ../ going past root (stays at root)', () => {
115
115
+
const html = '<img src="../../../image.png">'
116
116
+
const result = rewriteHtmlPaths(html, basePath, 'folder1/index.html')
117
117
+
expect(result).toBe('<img src="/identifier/site/image.png">')
118
118
+
})
119
119
+
})
120
120
+
121
121
+
describe('external URLs and special schemes', () => {
122
122
+
test('does not rewrite http URLs', () => {
123
123
+
const html = '<img src="http://example.com/image.png">'
124
124
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
125
125
+
expect(result).toBe('<img src="http://example.com/image.png">')
126
126
+
})
127
127
+
128
128
+
test('does not rewrite https URLs', () => {
129
129
+
const html = '<link href="https://cdn.example.com/style.css">'
130
130
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
131
131
+
expect(result).toBe(
132
132
+
'<link href="https://cdn.example.com/style.css">'
133
133
+
)
134
134
+
})
135
135
+
136
136
+
test('does not rewrite protocol-relative URLs', () => {
137
137
+
const html = '<script src="//cdn.example.com/script.js"></script>'
138
138
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
139
139
+
expect(result).toBe(
140
140
+
'<script src="//cdn.example.com/script.js"></script>'
141
141
+
)
142
142
+
})
143
143
+
144
144
+
test('does not rewrite data URIs', () => {
145
145
+
const html =
146
146
+
'<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA">'
147
147
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
148
148
+
expect(result).toBe(
149
149
+
'<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA">'
150
150
+
)
151
151
+
})
152
152
+
153
153
+
test('does not rewrite mailto links', () => {
154
154
+
const html = '<a href="mailto:test@example.com">Email</a>'
155
155
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
156
156
+
expect(result).toBe('<a href="mailto:test@example.com">Email</a>')
157
157
+
})
158
158
+
159
159
+
test('does not rewrite tel links', () => {
160
160
+
const html = '<a href="tel:+1234567890">Call</a>'
161
161
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
162
162
+
expect(result).toBe('<a href="tel:+1234567890">Call</a>')
163
163
+
})
164
164
+
})
165
165
+
166
166
+
describe('different HTML attributes', () => {
167
167
+
test('rewrites src attribute', () => {
168
168
+
const html = '<img src="/image.png">'
169
169
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
170
170
+
expect(result).toBe('<img src="/identifier/site/image.png">')
171
171
+
})
172
172
+
173
173
+
test('rewrites href attribute', () => {
174
174
+
const html = '<a href="/page.html">Link</a>'
175
175
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
176
176
+
expect(result).toBe('<a href="/identifier/site/page.html">Link</a>')
177
177
+
})
178
178
+
179
179
+
test('rewrites action attribute', () => {
180
180
+
const html = '<form action="/submit"></form>'
181
181
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
182
182
+
expect(result).toBe('<form action="/identifier/site/submit"></form>')
183
183
+
})
184
184
+
185
185
+
test('rewrites data attribute', () => {
186
186
+
const html = '<object data="/document.pdf"></object>'
187
187
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
188
188
+
expect(result).toBe(
189
189
+
'<object data="/identifier/site/document.pdf"></object>'
190
190
+
)
191
191
+
})
192
192
+
193
193
+
test('rewrites poster attribute', () => {
194
194
+
const html = '<video poster="/thumbnail.jpg"></video>'
195
195
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
196
196
+
expect(result).toBe(
197
197
+
'<video poster="/identifier/site/thumbnail.jpg"></video>'
198
198
+
)
199
199
+
})
200
200
+
201
201
+
test('rewrites srcset attribute with single URL', () => {
202
202
+
const html = '<img srcset="/image.png 1x">'
203
203
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
204
204
+
expect(result).toBe(
205
205
+
'<img srcset="/identifier/site/image.png 1x">'
206
206
+
)
207
207
+
})
208
208
+
209
209
+
test('rewrites srcset attribute with multiple URLs', () => {
210
210
+
const html = '<img srcset="/image-1x.png 1x, /image-2x.png 2x">'
211
211
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
212
212
+
expect(result).toBe(
213
213
+
'<img srcset="/identifier/site/image-1x.png 1x, /identifier/site/image-2x.png 2x">'
214
214
+
)
215
215
+
})
216
216
+
217
217
+
test('rewrites srcset with width descriptors', () => {
218
218
+
const html = '<img srcset="/small.jpg 320w, /large.jpg 1024w">'
219
219
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
220
220
+
expect(result).toBe(
221
221
+
'<img srcset="/identifier/site/small.jpg 320w, /identifier/site/large.jpg 1024w">'
222
222
+
)
223
223
+
})
224
224
+
225
225
+
test('rewrites srcset with relative paths from nested document', () => {
226
226
+
const html = '<img srcset="../img1.png 1x, ../img2.png 2x">'
227
227
+
const result = rewriteHtmlPaths(
228
228
+
html,
229
229
+
basePath,
230
230
+
'folder1/folder2/index.html'
231
231
+
)
232
232
+
expect(result).toBe(
233
233
+
'<img srcset="/identifier/site/folder1/img1.png 1x, /identifier/site/folder1/img2.png 2x">'
234
234
+
)
235
235
+
})
236
236
+
})
237
237
+
238
238
+
describe('quote handling', () => {
239
239
+
test('handles double quotes', () => {
240
240
+
const html = '<img src="/image.png">'
241
241
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
242
242
+
expect(result).toBe('<img src="/identifier/site/image.png">')
243
243
+
})
244
244
+
245
245
+
test('handles single quotes', () => {
246
246
+
const html = "<img src='/image.png'>"
247
247
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
248
248
+
expect(result).toBe("<img src='/identifier/site/image.png'>")
249
249
+
})
250
250
+
251
251
+
test('handles mixed quotes in same document', () => {
252
252
+
const html = '<img src="/img1.png"><link href=\'/style.css\'>'
253
253
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
254
254
+
expect(result).toBe(
255
255
+
'<img src="/identifier/site/img1.png"><link href=\'/identifier/site/style.css\'>'
256
256
+
)
257
257
+
})
258
258
+
})
259
259
+
260
260
+
describe('multiple rewrites in same document', () => {
261
261
+
test('rewrites multiple attributes in complex HTML', () => {
262
262
+
const html = `
263
263
+
<!DOCTYPE html>
264
264
+
<html>
265
265
+
<head>
266
266
+
<link href="/css/style.css" rel="stylesheet">
267
267
+
<script src="/js/app.js"></script>
268
268
+
</head>
269
269
+
<body>
270
270
+
<img src="/images/logo.png" alt="Logo">
271
271
+
<a href="/about.html">About</a>
272
272
+
<form action="/submit">
273
273
+
<button type="submit">Submit</button>
274
274
+
</form>
275
275
+
</body>
276
276
+
</html>
277
277
+
`
278
278
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
279
279
+
expect(result).toContain('href="/identifier/site/css/style.css"')
280
280
+
expect(result).toContain('src="/identifier/site/js/app.js"')
281
281
+
expect(result).toContain('src="/identifier/site/images/logo.png"')
282
282
+
expect(result).toContain('href="/identifier/site/about.html"')
283
283
+
expect(result).toContain('action="/identifier/site/submit"')
284
284
+
})
285
285
+
286
286
+
test('handles mix of relative and absolute paths', () => {
287
287
+
const html = `
288
288
+
<img src="/abs/image.png">
289
289
+
<img src="./rel/image.png">
290
290
+
<img src="../parent/image.png">
291
291
+
<img src="https://external.com/image.png">
292
292
+
`
293
293
+
const result = rewriteHtmlPaths(
294
294
+
html,
295
295
+
basePath,
296
296
+
'folder1/folder2/page.html'
297
297
+
)
298
298
+
expect(result).toContain('src="/identifier/site/abs/image.png"')
299
299
+
expect(result).toContain(
300
300
+
'src="/identifier/site/folder1/folder2/rel/image.png"'
301
301
+
)
302
302
+
expect(result).toContain(
303
303
+
'src="/identifier/site/folder1/parent/image.png"'
304
304
+
)
305
305
+
expect(result).toContain('src="https://external.com/image.png"')
306
306
+
})
307
307
+
})
308
308
+
309
309
+
describe('edge cases', () => {
310
310
+
test('handles empty src attribute', () => {
311
311
+
const html = '<img src="">'
312
312
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
313
313
+
expect(result).toBe('<img src="">')
314
314
+
})
315
315
+
316
316
+
test('handles basePath without trailing slash', () => {
317
317
+
const html = '<img src="/image.png">'
318
318
+
const result = rewriteHtmlPaths(html, '/identifier/site', 'index.html')
319
319
+
expect(result).toBe('<img src="/identifier/site/image.png">')
320
320
+
})
321
321
+
322
322
+
test('handles basePath with trailing slash', () => {
323
323
+
const html = '<img src="/image.png">'
324
324
+
const result = rewriteHtmlPaths(
325
325
+
html,
326
326
+
'/identifier/site/',
327
327
+
'index.html'
328
328
+
)
329
329
+
expect(result).toBe('<img src="/identifier/site/image.png">')
330
330
+
})
331
331
+
332
332
+
test('handles whitespace around equals sign', () => {
333
333
+
const html = '<img src = "/image.png">'
334
334
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
335
335
+
expect(result).toBe('<img src="/identifier/site/image.png">')
336
336
+
})
337
337
+
338
338
+
test('preserves query strings in URLs', () => {
339
339
+
const html = '<img src="/image.png?v=123">'
340
340
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
341
341
+
expect(result).toBe('<img src="/identifier/site/image.png?v=123">')
342
342
+
})
343
343
+
344
344
+
test('preserves hash fragments in URLs', () => {
345
345
+
const html = '<a href="/page.html#section">Link</a>'
346
346
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
347
347
+
expect(result).toBe(
348
348
+
'<a href="/identifier/site/page.html#section">Link</a>'
349
349
+
)
350
350
+
})
351
351
+
352
352
+
test('handles paths with special characters', () => {
353
353
+
const html = '<img src="/folder-name/file_name.png">'
354
354
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
355
355
+
expect(result).toBe(
356
356
+
'<img src="/identifier/site/folder-name/file_name.png">'
357
357
+
)
358
358
+
})
359
359
+
})
360
360
+
361
361
+
describe('real-world scenario', () => {
362
362
+
test('handles the example from the bug report', () => {
363
363
+
// HTML file at: /folder1/folder2/folder3/index.html
364
364
+
// Image at: /folder1/folder2/img.png
365
365
+
// Reference: src="../img.png"
366
366
+
const html = '<img src="../img.png">'
367
367
+
const result = rewriteHtmlPaths(
368
368
+
html,
369
369
+
basePath,
370
370
+
'folder1/folder2/folder3/index.html'
371
371
+
)
372
372
+
expect(result).toBe(
373
373
+
'<img src="/identifier/site/folder1/folder2/img.png">'
374
374
+
)
375
375
+
})
376
376
+
377
377
+
test('handles deeply nested static site structure', () => {
378
378
+
// A typical static site with nested pages and shared assets
379
379
+
const html = `
380
380
+
<!DOCTYPE html>
381
381
+
<html>
382
382
+
<head>
383
383
+
<link href="../../css/style.css" rel="stylesheet">
384
384
+
<link href="../../css/theme.css" rel="stylesheet">
385
385
+
<script src="../../js/main.js"></script>
386
386
+
</head>
387
387
+
<body>
388
388
+
<img src="../../images/logo.png" alt="Logo">
389
389
+
<img src="./post-image.jpg" alt="Post">
390
390
+
<a href="../index.html">Back to Blog</a>
391
391
+
<a href="../../index.html">Home</a>
392
392
+
</body>
393
393
+
</html>
394
394
+
`
395
395
+
const result = rewriteHtmlPaths(
396
396
+
html,
397
397
+
basePath,
398
398
+
'blog/posts/my-post.html'
399
399
+
)
400
400
+
401
401
+
// Assets two levels up
402
402
+
expect(result).toContain('href="/identifier/site/css/style.css"')
403
403
+
expect(result).toContain('href="/identifier/site/css/theme.css"')
404
404
+
expect(result).toContain('src="/identifier/site/js/main.js"')
405
405
+
expect(result).toContain('src="/identifier/site/images/logo.png"')
406
406
+
407
407
+
// Same directory
408
408
+
expect(result).toContain(
409
409
+
'src="/identifier/site/blog/posts/post-image.jpg"'
410
410
+
)
411
411
+
412
412
+
// One level up
413
413
+
expect(result).toContain('href="/identifier/site/blog/index.html"')
414
414
+
415
415
+
// Two levels up
416
416
+
expect(result).toContain('href="/identifier/site/index.html"')
417
417
+
})
418
418
+
})
419
419
+
})
420
420
+
421
421
+
describe('isHtmlContent', () => {
422
422
+
test('identifies HTML by content type', () => {
423
423
+
expect(isHtmlContent('file.txt', 'text/html')).toBe(true)
424
424
+
expect(isHtmlContent('file.txt', 'text/html; charset=utf-8')).toBe(
425
425
+
true
426
426
+
)
427
427
+
})
428
428
+
429
429
+
test('identifies HTML by .html extension', () => {
430
430
+
expect(isHtmlContent('index.html')).toBe(true)
431
431
+
expect(isHtmlContent('page.html', undefined)).toBe(true)
432
432
+
expect(isHtmlContent('/path/to/file.html')).toBe(true)
433
433
+
})
434
434
+
435
435
+
test('identifies HTML by .htm extension', () => {
436
436
+
expect(isHtmlContent('index.htm')).toBe(true)
437
437
+
expect(isHtmlContent('page.htm', undefined)).toBe(true)
438
438
+
})
439
439
+
440
440
+
test('handles case-insensitive extensions', () => {
441
441
+
expect(isHtmlContent('INDEX.HTML')).toBe(true)
442
442
+
expect(isHtmlContent('page.HTM')).toBe(true)
443
443
+
expect(isHtmlContent('File.HtMl')).toBe(true)
444
444
+
})
445
445
+
446
446
+
test('returns false for non-HTML files', () => {
447
447
+
expect(isHtmlContent('script.js')).toBe(false)
448
448
+
expect(isHtmlContent('style.css')).toBe(false)
449
449
+
expect(isHtmlContent('image.png')).toBe(false)
450
450
+
expect(isHtmlContent('data.json')).toBe(false)
451
451
+
})
452
452
+
453
453
+
test('returns false for files with no extension', () => {
454
454
+
expect(isHtmlContent('README')).toBe(false)
455
455
+
expect(isHtmlContent('Makefile')).toBe(false)
456
456
+
})
457
457
+
})
+178
-99
hosting-service/src/lib/html-rewriter.ts
···
4
4
*/
5
5
6
6
const REWRITABLE_ATTRIBUTES = [
7
7
-
'src',
8
8
-
'href',
9
9
-
'action',
10
10
-
'data',
11
11
-
'poster',
12
12
-
'srcset',
13
13
-
] as const;
7
7
+
'src',
8
8
+
'href',
9
9
+
'action',
10
10
+
'data',
11
11
+
'poster',
12
12
+
'srcset'
13
13
+
] as const
14
14
15
15
/**
16
16
* Check if a path should be rewritten
17
17
*/
18
18
function shouldRewritePath(path: string): boolean {
19
19
-
// Don't rewrite empty paths
20
20
-
if (!path) return false;
19
19
+
// Don't rewrite empty paths
20
20
+
if (!path) return false
21
21
22
22
-
// Don't rewrite external URLs (http://, https://, //)
23
23
-
if (path.startsWith('http://') || path.startsWith('https://') || path.startsWith('//')) {
24
24
-
return false;
25
25
-
}
22
22
+
// Don't rewrite external URLs (http://, https://, //)
23
23
+
if (
24
24
+
path.startsWith('http://') ||
25
25
+
path.startsWith('https://') ||
26
26
+
path.startsWith('//')
27
27
+
) {
28
28
+
return false
29
29
+
}
26
30
27
27
-
// Don't rewrite data URIs or other schemes (except file paths)
28
28
-
if (path.includes(':') && !path.startsWith('./') && !path.startsWith('../')) {
29
29
-
return false;
30
30
-
}
31
31
+
// Don't rewrite data URIs or other schemes (except file paths)
32
32
+
if (
33
33
+
path.includes(':') &&
34
34
+
!path.startsWith('./') &&
35
35
+
!path.startsWith('../')
36
36
+
) {
37
37
+
return false
38
38
+
}
31
39
32
32
-
// Don't rewrite pure anchors or paths that start with /#
33
33
-
if (path.startsWith('#') || path.startsWith('/#')) return false;
40
40
+
// Rewrite absolute paths (/) and relative paths (./ or ../ or plain filenames)
41
41
+
return true
42
42
+
}
43
43
+
44
44
+
/**
45
45
+
* Normalize a path by resolving . and .. segments
46
46
+
*/
47
47
+
function normalizePath(path: string): string {
48
48
+
const parts = path.split('/')
49
49
+
const result: string[] = []
50
50
+
51
51
+
for (const part of parts) {
52
52
+
if (part === '.' || part === '') {
53
53
+
// Skip current directory and empty parts (but keep leading empty for absolute paths)
54
54
+
if (part === '' && result.length === 0) {
55
55
+
result.push(part)
56
56
+
}
57
57
+
continue
58
58
+
}
59
59
+
if (part === '..') {
60
60
+
// Go up one directory (but not past root)
61
61
+
if (result.length > 0 && result[result.length - 1] !== '..') {
62
62
+
result.pop()
63
63
+
}
64
64
+
continue
65
65
+
}
66
66
+
result.push(part)
67
67
+
}
34
68
35
35
-
// Don't rewrite relative paths (./ or ../)
36
36
-
if (path.startsWith('./') || path.startsWith('../')) return false;
69
69
+
return result.join('/')
70
70
+
}
37
71
38
38
-
// Rewrite absolute paths (/)
39
39
-
return true;
72
72
+
/**
73
73
+
* Get the directory path from a file path
74
74
+
* e.g., "folder1/folder2/file.html" -> "folder1/folder2/"
75
75
+
*/
76
76
+
function getDirectory(filepath: string): string {
77
77
+
const lastSlash = filepath.lastIndexOf('/')
78
78
+
if (lastSlash === -1) {
79
79
+
return ''
80
80
+
}
81
81
+
return filepath.substring(0, lastSlash + 1)
40
82
}
41
83
42
84
/**
43
85
* Rewrite a single path
44
86
*/
45
45
-
function rewritePath(path: string, basePath: string): string {
46
46
-
if (!shouldRewritePath(path)) {
47
47
-
return path;
48
48
-
}
87
87
+
function rewritePath(
88
88
+
path: string,
89
89
+
basePath: string,
90
90
+
documentPath: string
91
91
+
): string {
92
92
+
if (!shouldRewritePath(path)) {
93
93
+
return path
94
94
+
}
95
95
+
96
96
+
// Handle absolute paths: /file.js -> /base/file.js
97
97
+
if (path.startsWith('/')) {
98
98
+
return basePath + path.slice(1)
99
99
+
}
100
100
+
101
101
+
// Handle relative paths by resolving against document directory
102
102
+
const documentDir = getDirectory(documentPath)
103
103
+
let resolvedPath: string
49
104
50
50
-
// Handle absolute paths: /file.js -> /base/file.js
51
51
-
if (path.startsWith('/')) {
52
52
-
return basePath + path.slice(1);
53
53
-
}
105
105
+
if (path.startsWith('./')) {
106
106
+
// ./file.js relative to current directory
107
107
+
resolvedPath = documentDir + path.slice(2)
108
108
+
} else if (path.startsWith('../')) {
109
109
+
// ../file.js relative to parent directory
110
110
+
resolvedPath = documentDir + path
111
111
+
} else {
112
112
+
// file.js (no prefix) - treat as relative to current directory
113
113
+
resolvedPath = documentDir + path
114
114
+
}
54
115
55
55
-
// At this point, only plain filenames without ./ or ../ prefix should reach here
56
56
-
// But since we're filtering those in shouldRewritePath, this shouldn't happen
57
57
-
return path;
116
116
+
// Normalize the path to resolve .. and .
117
117
+
resolvedPath = normalizePath(resolvedPath)
118
118
+
119
119
+
return basePath + resolvedPath
58
120
}
59
121
60
122
/**
61
123
* Rewrite srcset attribute (can contain multiple URLs)
62
124
* Format: "url1 1x, url2 2x" or "url1 100w, url2 200w"
63
125
*/
64
64
-
function rewriteSrcset(srcset: string, basePath: string): string {
65
65
-
return srcset
66
66
-
.split(',')
67
67
-
.map(part => {
68
68
-
const trimmed = part.trim();
69
69
-
const spaceIndex = trimmed.indexOf(' ');
126
126
+
function rewriteSrcset(
127
127
+
srcset: string,
128
128
+
basePath: string,
129
129
+
documentPath: string
130
130
+
): string {
131
131
+
return srcset
132
132
+
.split(',')
133
133
+
.map((part) => {
134
134
+
const trimmed = part.trim()
135
135
+
const spaceIndex = trimmed.indexOf(' ')
70
136
71
71
-
if (spaceIndex === -1) {
72
72
-
// No descriptor, just URL
73
73
-
return rewritePath(trimmed, basePath);
74
74
-
}
137
137
+
if (spaceIndex === -1) {
138
138
+
// No descriptor, just URL
139
139
+
return rewritePath(trimmed, basePath, documentPath)
140
140
+
}
75
141
76
76
-
const url = trimmed.substring(0, spaceIndex);
77
77
-
const descriptor = trimmed.substring(spaceIndex);
78
78
-
return rewritePath(url, basePath) + descriptor;
79
79
-
})
80
80
-
.join(', ');
142
142
+
const url = trimmed.substring(0, spaceIndex)
143
143
+
const descriptor = trimmed.substring(spaceIndex)
144
144
+
return rewritePath(url, basePath, documentPath) + descriptor
145
145
+
})
146
146
+
.join(', ')
81
147
}
82
148
83
149
/**
84
84
-
* Rewrite absolute paths in HTML content
150
150
+
* Rewrite absolute and relative paths in HTML content
85
151
* Uses simple regex matching for safety (no full HTML parsing)
86
152
*/
87
87
-
export function rewriteHtmlPaths(html: string, basePath: string): string {
88
88
-
// Ensure base path ends with /
89
89
-
const normalizedBase = basePath.endsWith('/') ? basePath : basePath + '/';
153
153
+
export function rewriteHtmlPaths(
154
154
+
html: string,
155
155
+
basePath: string,
156
156
+
documentPath: string
157
157
+
): string {
158
158
+
// Ensure base path ends with /
159
159
+
const normalizedBase = basePath.endsWith('/') ? basePath : basePath + '/'
90
160
91
91
-
let rewritten = html;
161
161
+
let rewritten = html
92
162
93
93
-
// Rewrite each attribute type
94
94
-
// Use more specific patterns to prevent ReDoS attacks
95
95
-
for (const attr of REWRITABLE_ATTRIBUTES) {
96
96
-
if (attr === 'srcset') {
97
97
-
// Special handling for srcset - use possessive quantifiers via atomic grouping simulation
98
98
-
// Limit whitespace to reasonable amount (max 5 spaces) to prevent ReDoS
99
99
-
const srcsetRegex = new RegExp(
100
100
-
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`,
101
101
-
'gi'
102
102
-
);
103
103
-
rewritten = rewritten.replace(srcsetRegex, (match, value) => {
104
104
-
const rewrittenValue = rewriteSrcset(value, normalizedBase);
105
105
-
return `${attr}="${rewrittenValue}"`;
106
106
-
});
107
107
-
} else {
108
108
-
// Regular attributes with quoted values
109
109
-
// Limit whitespace to prevent catastrophic backtracking
110
110
-
const doubleQuoteRegex = new RegExp(
111
111
-
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`,
112
112
-
'gi'
113
113
-
);
114
114
-
const singleQuoteRegex = new RegExp(
115
115
-
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}'([^']*)'`,
116
116
-
'gi'
117
117
-
);
163
163
+
// Rewrite each attribute type
164
164
+
// Use more specific patterns to prevent ReDoS attacks
165
165
+
for (const attr of REWRITABLE_ATTRIBUTES) {
166
166
+
if (attr === 'srcset') {
167
167
+
// Special handling for srcset - use possessive quantifiers via atomic grouping simulation
168
168
+
// Limit whitespace to reasonable amount (max 5 spaces) to prevent ReDoS
169
169
+
const srcsetRegex = new RegExp(
170
170
+
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`,
171
171
+
'gi'
172
172
+
)
173
173
+
rewritten = rewritten.replace(srcsetRegex, (match, value) => {
174
174
+
const rewrittenValue = rewriteSrcset(
175
175
+
value,
176
176
+
normalizedBase,
177
177
+
documentPath
178
178
+
)
179
179
+
return `${attr}="${rewrittenValue}"`
180
180
+
})
181
181
+
} else {
182
182
+
// Regular attributes with quoted values
183
183
+
// Limit whitespace to prevent catastrophic backtracking
184
184
+
const doubleQuoteRegex = new RegExp(
185
185
+
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`,
186
186
+
'gi'
187
187
+
)
188
188
+
const singleQuoteRegex = new RegExp(
189
189
+
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}'([^']*)'`,
190
190
+
'gi'
191
191
+
)
118
192
119
119
-
rewritten = rewritten.replace(doubleQuoteRegex, (match, value) => {
120
120
-
const rewrittenValue = rewritePath(value, normalizedBase);
121
121
-
return `${attr}="${rewrittenValue}"`;
122
122
-
});
193
193
+
rewritten = rewritten.replace(doubleQuoteRegex, (match, value) => {
194
194
+
const rewrittenValue = rewritePath(
195
195
+
value,
196
196
+
normalizedBase,
197
197
+
documentPath
198
198
+
)
199
199
+
return `${attr}="${rewrittenValue}"`
200
200
+
})
123
201
124
124
-
rewritten = rewritten.replace(singleQuoteRegex, (match, value) => {
125
125
-
const rewrittenValue = rewritePath(value, normalizedBase);
126
126
-
return `${attr}='${rewrittenValue}'`;
127
127
-
});
128
128
-
}
129
129
-
}
202
202
+
rewritten = rewritten.replace(singleQuoteRegex, (match, value) => {
203
203
+
const rewrittenValue = rewritePath(
204
204
+
value,
205
205
+
normalizedBase,
206
206
+
documentPath
207
207
+
)
208
208
+
return `${attr}='${rewrittenValue}'`
209
209
+
})
210
210
+
}
211
211
+
}
130
212
131
131
-
return rewritten;
213
213
+
return rewritten
132
214
}
133
215
134
216
/**
135
217
* Check if content is HTML based on content or filename
136
218
*/
137
137
-
export function isHtmlContent(
138
138
-
filepath: string,
139
139
-
contentType?: string
140
140
-
): boolean {
141
141
-
if (contentType && contentType.includes('text/html')) {
142
142
-
return true;
143
143
-
}
219
219
+
export function isHtmlContent(filepath: string, contentType?: string): boolean {
220
220
+
if (contentType && contentType.includes('text/html')) {
221
221
+
return true
222
222
+
}
144
223
145
145
-
const ext = filepath.toLowerCase().split('.').pop();
146
146
-
return ext === 'html' || ext === 'htm';
224
224
+
const ext = filepath.toLowerCase().split('.').pop()
225
225
+
return ext === 'html' || ext === 'htm'
147
226
}
+3
-2
hosting-service/src/server.ts
···
156
156
} else {
157
157
content = readFileSync(cachedFile, 'utf-8');
158
158
}
159
159
-
const rewritten = rewriteHtmlPaths(content, basePath);
159
159
+
const rewritten = rewriteHtmlPaths(content, basePath, requestPath);
160
160
161
161
// Recompress the HTML for efficient delivery
162
162
const { gzipSync } = await import('zlib');
···
224
224
} else {
225
225
content = readFileSync(indexFile, 'utf-8');
226
226
}
227
227
-
const rewritten = rewriteHtmlPaths(content, basePath);
227
227
+
const indexPath = `${requestPath}/index.html`;
228
228
+
const rewritten = rewriteHtmlPaths(content, basePath, indexPath);
228
229
229
230
// Recompress the HTML for efficient delivery
230
231
const { gzipSync } = await import('zlib');