tangled
alpha
login
or
join now
evan.jarrett.net
/
at-container-registry
66
fork
atom
A container registry that uses the AT Protocol for manifest storage and S3 for blob storage.
atcr.io
docker
container
atproto
go
66
fork
atom
overview
issues
1
pulls
pipelines
add a verify check on the relay-compare
evan.jarrett.net
2 weeks ago
e6c2099a
5249c9ea
verified
This commit was signed with the committer's
known signature
.
evan.jarrett.net
SSH Key Fingerprint:
SHA256:bznk0uVPp7XFOl67P0uTM1pCjf2A4ojeP/lsUE7uauQ=
0/2
lint.yaml
failed
5min 2s
tests.yml
failed
5min 2s
+193
-23
1 changed file
expand all
collapse all
unified
split
cmd
relay-compare
main.go
+193
-23
cmd/relay-compare/main.go
···
56
56
counts []int
57
57
status string // "sync", "diff", "error"
58
58
diffCount int
59
59
+
realGaps int // verified: record exists on PDS but relay is missing it
60
60
+
ghosts int // verified: record doesn't exist on PDS, relay has stale entry
61
61
+
}
62
62
+
63
63
+
// verifyResult holds the PDS verification result for a (DID, collection) pair.
64
64
+
type verifyResult struct {
65
65
+
exists bool
66
66
+
err error
67
67
+
}
68
68
+
69
69
+
// key identifies a (collection, relay-or-DID) pair for result lookups.
70
70
+
type key struct{ col, relay string }
71
71
+
72
72
+
// diffEntry represents a DID missing from a specific relay for a collection.
73
73
+
type diffEntry struct {
74
74
+
did string
75
75
+
collection string
76
76
+
relayIdx int
59
77
}
60
78
61
79
func main() {
62
80
noColor := flag.Bool("no-color", false, "disable colored output")
81
81
+
verify := flag.Bool("verify", false, "verify diffs against PDS to distinguish real gaps from ghost entries")
63
82
collection := flag.String("collection", "", "compare only this collection")
64
83
timeout := flag.Duration("timeout", 2*time.Minute, "timeout for all relay queries")
65
84
flag.Usage = func() {
···
107
126
fmt.Printf("%sFetching %d collections from %d relays...%s\n", cDim, len(cols), len(relays), cReset)
108
127
109
128
// Fetch all data in parallel: every (collection, relay) pair concurrently
110
110
-
type key struct{ col, relay string }
111
129
type fetchResult struct {
112
130
dids map[string]struct{}
113
131
err error
···
130
148
}
131
149
wg.Wait()
132
150
133
133
-
// Display per-collection diffs and collect summary
134
134
-
var summary []summaryRow
135
135
-
totalMissing := 0
151
151
+
// Collect all diffs across collections (for optional verification)
152
152
+
var allDiffs []diffEntry
136
153
137
137
-
for _, col := range cols {
138
138
-
fmt.Printf("\n%s%s━━━ %s ━━━%s\n", cBold, cCyan, col, cReset)
154
154
+
// First pass: compute diffs per collection
155
155
+
type colDiffs struct {
156
156
+
hasError bool
157
157
+
counts []int
158
158
+
// per-relay missing DIDs (sorted)
159
159
+
missing [][]string
160
160
+
}
161
161
+
colResults := make(map[string]*colDiffs)
139
162
140
140
-
row := summaryRow{collection: col, counts: make([]int, len(relays))}
141
141
-
hasError := false
163
163
+
for _, col := range cols {
164
164
+
cd := &colDiffs{counts: make([]int, len(relays)), missing: make([][]string, len(relays))}
165
165
+
colResults[col] = cd
142
166
143
143
-
// Show counts per relay
144
167
for ri, relay := range relays {
145
168
r := allResults[key{col, relay}]
146
169
if r.err != nil {
147
147
-
hasError = true
148
148
-
fmt.Printf(" %-*s %s%serror%s: %v\n", maxNameLen, names[ri], cBold, cRed, cReset, r.err)
170
170
+
cd.hasError = true
149
171
} else {
150
150
-
row.counts[ri] = len(r.dids)
151
151
-
fmt.Printf(" %-*s %s%d%s DIDs\n", maxNameLen, names[ri], cBold, len(r.dids), cReset)
172
172
+
cd.counts[ri] = len(r.dids)
152
173
}
153
174
}
154
175
155
155
-
if hasError {
156
156
-
row.status = "error"
157
157
-
summary = append(summary, row)
176
176
+
if cd.hasError {
158
177
continue
159
178
}
160
179
···
166
185
}
167
186
}
168
187
169
169
-
// For each relay, show what it's missing
170
170
-
inSync := true
171
188
for ri, relay := range relays {
172
189
var missing []string
173
190
for did := range union {
···
175
192
missing = append(missing, did)
176
193
}
177
194
}
195
195
+
sort.Strings(missing)
196
196
+
cd.missing[ri] = missing
197
197
+
for _, did := range missing {
198
198
+
allDiffs = append(allDiffs, diffEntry{did: did, collection: col, relayIdx: ri})
199
199
+
}
200
200
+
}
201
201
+
}
202
202
+
203
203
+
// Optionally verify diffs against PDS
204
204
+
verified := make(map[key]verifyResult)
205
205
+
if *verify && len(allDiffs) > 0 {
206
206
+
verified = verifyDiffs(ctx, allDiffs)
207
207
+
}
208
208
+
209
209
+
// Display per-collection diffs and collect summary
210
210
+
var summary []summaryRow
211
211
+
totalMissing := 0
212
212
+
totalRealGaps := 0
213
213
+
totalGhosts := 0
214
214
+
215
215
+
for _, col := range cols {
216
216
+
fmt.Printf("\n%s%s━━━ %s ━━━%s\n", cBold, cCyan, col, cReset)
217
217
+
218
218
+
cd := colResults[col]
219
219
+
row := summaryRow{collection: col, counts: cd.counts}
220
220
+
221
221
+
if cd.hasError {
222
222
+
for ri, relay := range relays {
223
223
+
r := allResults[key{col, relay}]
224
224
+
if r.err != nil {
225
225
+
fmt.Printf(" %-*s %s%serror%s: %v\n", maxNameLen, names[ri], cBold, cRed, cReset, r.err)
226
226
+
} else {
227
227
+
fmt.Printf(" %-*s %s%d%s DIDs\n", maxNameLen, names[ri], cBold, len(r.dids), cReset)
228
228
+
}
229
229
+
}
230
230
+
row.status = "error"
231
231
+
summary = append(summary, row)
232
232
+
continue
233
233
+
}
234
234
+
235
235
+
// Show counts per relay
236
236
+
for ri := range relays {
237
237
+
fmt.Printf(" %-*s %s%d%s DIDs\n", maxNameLen, names[ri], cBold, cd.counts[ri], cReset)
238
238
+
}
239
239
+
240
240
+
// Show missing DIDs per relay
241
241
+
inSync := true
242
242
+
for ri := range relays {
243
243
+
missing := cd.missing[ri]
178
244
if len(missing) == 0 {
179
245
continue
180
246
}
···
182
248
inSync = false
183
249
totalMissing += len(missing)
184
250
row.diffCount += len(missing)
185
185
-
sort.Strings(missing)
186
251
187
252
fmt.Printf("\n %sMissing from %s (%d):%s\n", cRed, names[ri], len(missing), cReset)
188
253
for _, did := range missing {
189
189
-
fmt.Printf(" %s- %s%s\n", cRed, did, cReset)
254
254
+
suffix := ""
255
255
+
if *verify {
256
256
+
vr, ok := verified[key{col, did}]
257
257
+
if !ok {
258
258
+
suffix = fmt.Sprintf(" %s(verify: unknown)%s", cDim, cReset)
259
259
+
} else if vr.err != nil {
260
260
+
suffix = fmt.Sprintf(" %s(verify: %s)%s", cDim, vr.err, cReset)
261
261
+
} else if vr.exists {
262
262
+
suffix = fmt.Sprintf(" %s← real gap%s", cRed, cReset)
263
263
+
row.realGaps++
264
264
+
totalRealGaps++
265
265
+
} else {
266
266
+
suffix = fmt.Sprintf(" %s← ghost (not on PDS)%s", cDim, cReset)
267
267
+
row.ghosts++
268
268
+
totalGhosts++
269
269
+
}
270
270
+
}
271
271
+
fmt.Printf(" %s- %s%s%s\n", cRed, did, cReset, suffix)
190
272
}
191
273
}
192
274
···
200
282
}
201
283
202
284
// Summary table
203
203
-
printSummary(summary, names, maxNameLen, totalMissing)
285
285
+
printSummary(summary, names, maxNameLen, totalMissing, *verify, totalRealGaps, totalGhosts)
204
286
}
205
287
206
206
-
func printSummary(rows []summaryRow, names []string, maxNameLen, totalMissing int) {
288
288
+
func printSummary(rows []summaryRow, names []string, maxNameLen, totalMissing int, showVerify bool, totalRealGaps, totalGhosts int) {
207
289
fmt.Printf("\n%s%s━━━ Summary ━━━%s\n\n", cBold, cCyan, cReset)
208
290
209
291
colW := 28
···
241
323
case "sync":
242
324
fmt.Printf(" %s✓ in sync%s", cGreen, cReset)
243
325
case "diff":
244
244
-
fmt.Printf(" %s≠ %d missing%s", cYellow, row.diffCount, cReset)
326
326
+
if showVerify {
327
327
+
fmt.Printf(" %s≠ %d missing%s %s(%d real, %d ghost)%s",
328
328
+
cYellow, row.diffCount, cReset, cDim, row.realGaps, row.ghosts, cReset)
329
329
+
} else {
330
330
+
fmt.Printf(" %s≠ %d missing%s", cYellow, row.diffCount, cReset)
331
331
+
}
245
332
case "error":
246
333
fmt.Printf(" %s✗ error%s", cRed, cReset)
247
334
}
···
252
339
fmt.Println()
253
340
if totalMissing > 0 {
254
341
fmt.Printf("%s%d total missing DID-collection pairs across relays%s\n", cYellow, totalMissing, cReset)
342
342
+
if showVerify {
343
343
+
fmt.Printf(" %s%d real gaps%s (record exists on PDS), %s%d ghosts%s (record deleted from PDS)\n",
344
344
+
cRed, totalRealGaps, cReset, cDim, totalGhosts, cReset)
345
345
+
}
255
346
} else {
256
347
fmt.Printf("%s✓ All relays fully in sync%s\n", cGreen, cReset)
257
348
}
349
349
+
}
350
350
+
351
351
+
// verifyDiffs resolves each diff DID to its PDS and checks if records actually exist.
352
352
+
func verifyDiffs(ctx context.Context, diffs []diffEntry) map[key]verifyResult {
353
353
+
// Collect unique (DID, collection) pairs to verify
354
354
+
type didCol struct{ did, col string }
355
355
+
unique := make(map[didCol]struct{})
356
356
+
for _, d := range diffs {
357
357
+
unique[didCol{d.did, d.collection}] = struct{}{}
358
358
+
}
359
359
+
360
360
+
// Resolve unique DIDs to PDS endpoints (deduplicate across collections)
361
361
+
uniqueDIDs := make(map[string]struct{})
362
362
+
for dc := range unique {
363
363
+
uniqueDIDs[dc.did] = struct{}{}
364
364
+
}
365
365
+
366
366
+
fmt.Printf("\n%sVerifying %d DID-collection pairs (%d unique DIDs)...%s\n", cDim, len(unique), len(uniqueDIDs), cReset)
367
367
+
368
368
+
pdsEndpoints := make(map[string]string) // DID → PDS URL
369
369
+
pdsErrors := make(map[string]error) // DID → resolution error
370
370
+
var mu sync.Mutex
371
371
+
var wg sync.WaitGroup
372
372
+
sem := make(chan struct{}, 10) // concurrency limit
373
373
+
374
374
+
for did := range uniqueDIDs {
375
375
+
wg.Add(1)
376
376
+
go func(did string) {
377
377
+
defer wg.Done()
378
378
+
sem <- struct{}{}
379
379
+
defer func() { <-sem }()
380
380
+
381
381
+
pds, err := atproto.ResolveDIDToPDS(ctx, did)
382
382
+
mu.Lock()
383
383
+
if err != nil {
384
384
+
pdsErrors[did] = err
385
385
+
} else {
386
386
+
pdsEndpoints[did] = pds
387
387
+
}
388
388
+
mu.Unlock()
389
389
+
}(did)
390
390
+
}
391
391
+
wg.Wait()
392
392
+
393
393
+
// Check each (DID, collection) pair against the resolved PDS
394
394
+
results := make(map[key]verifyResult)
395
395
+
396
396
+
for dc := range unique {
397
397
+
wg.Add(1)
398
398
+
go func(dc didCol) {
399
399
+
defer wg.Done()
400
400
+
sem <- struct{}{}
401
401
+
defer func() { <-sem }()
402
402
+
403
403
+
k := key{dc.col, dc.did}
404
404
+
405
405
+
// Check if DID resolution failed
406
406
+
if err, ok := pdsErrors[dc.did]; ok {
407
407
+
mu.Lock()
408
408
+
results[k] = verifyResult{err: fmt.Errorf("DID resolution failed: %w", err)}
409
409
+
mu.Unlock()
410
410
+
return
411
411
+
}
412
412
+
413
413
+
pds := pdsEndpoints[dc.did]
414
414
+
client := atproto.NewClient(pds, "", "")
415
415
+
records, _, err := client.ListRecordsForRepo(ctx, dc.did, dc.col, 1, "")
416
416
+
mu.Lock()
417
417
+
if err != nil {
418
418
+
results[k] = verifyResult{err: err}
419
419
+
} else {
420
420
+
results[k] = verifyResult{exists: len(records) > 0}
421
421
+
}
422
422
+
mu.Unlock()
423
423
+
}(dc)
424
424
+
}
425
425
+
wg.Wait()
426
426
+
427
427
+
return results
258
428
}
259
429
260
430
// fetchAllDIDs paginates through listReposByCollection to collect all DIDs.