A container registry that uses the AT Protocol for manifest storage and S3 for blob storage. atcr.io
docker container atproto go

update GC options, minor fix to scanners

evan.jarrett.net 5615dd41 27cf7815

verified
+550 -235
-1
pkg/appview/public/icons.svg
··· 49 49 <symbol id="user" viewBox="0 0 24 24"><path d="M19 21v-2a4 4 0 0 0-4-4H9a4 4 0 0 0-4 4v2"/><circle cx="12" cy="7" r="4"/></symbol> 50 50 <symbol id="user-plus" viewBox="0 0 24 24"><path d="M16 21v-2a4 4 0 0 0-4-4H6a4 4 0 0 0-4 4v2"/><circle cx="9" cy="7" r="4"/><line x1="19" x2="19" y1="8" y2="14"/><line x1="22" x2="16" y1="11" y2="11"/></symbol> 51 51 <symbol id="x-circle" viewBox="0 0 24 24"><circle cx="12" cy="12" r="10"/><path d="m15 9-6 6"/><path d="m9 9 6 6"/></symbol> 52 - <symbol id="zap" viewBox="0 0 24 24"><path d="M4 14a1 1 0 0 1-.78-1.63l9.9-10.2a.5.5 0 0 1 .86.46l-1.92 6.02A1 1 0 0 0 13 10h7a1 1 0 0 1 .78 1.63l-9.9 10.2a.5.5 0 0 1-.86-.46l1.92-6.02A1 1 0 0 0 11 14z"/></symbol> 53 52 <symbol id="helm" viewBox="0 0 24 24"><path d="M12.337 0c-.475 0-.861 1.016-.861 2.269 0 .527.069 1.011.183 1.396a8.514 8.514 0 0 0-3.961 1.22 5.229 5.229 0 0 0-.595-1.093c-.606-.866-1.34-1.436-1.79-1.43a.381.381 0 0 0-.217.066c-.39.273-.123 1.326.596 2.353.267.381.559.705.84.948a8.683 8.683 0 0 0-1.528 1.716h1.734a7.179 7.179 0 0 1 5.381-2.421 7.18 7.18 0 0 1 5.382 2.42h1.733a8.687 8.687 0 0 0-1.32-1.53c.35-.249.735-.643 1.078-1.133.719-1.027.986-2.08.596-2.353a.382.382 0 0 0-.217-.065c-.45-.007-1.184.563-1.79 1.43a4.897 4.897 0 0 0-.676 1.325 8.52 8.52 0 0 0-3.899-1.42c.12-.39.193-.887.193-1.429 0-1.253-.386-2.269-.862-2.269zM1.624 9.443v5.162h1.358v-1.968h1.64v1.968h1.357V9.443H4.62v1.838H2.98V9.443zm5.912 0v5.162h3.21v-1.108H8.893v-.95h1.64v-1.142h-1.64v-.84h1.853V9.443zm4.698 0v5.162h3.218v-1.362h-1.86v-3.8zm4.706 0v5.162h1.364v-2.643l1.357 1.225 1.35-1.232v2.65h1.365V9.443h-.614l-2.1 1.914-2.109-1.914zm-11.82 7.28a8.688 8.688 0 0 0 1.412 1.548 5.206 5.206 0 0 0-.841.948c-.719 1.027-.985 2.08-.596 2.353.39.273 1.289-.338 2.007-1.364a5.23 5.23 0 0 0 .595-1.092 8.514 8.514 0 0 0 3.961 1.219 5.01 5.01 0 0 0-.183 1.396c0 1.253.386 2.269.861 2.269.476 0 .862-1.016.862-2.269 0-.542-.072-1.04-.193-1.43a8.52 8.52 0 0 0 3.9-1.42c.121.4.352.865.675 1.327.719 1.026 1.617 1.637 2.007 1.364.39-.273.123-1.326-.596-2.353-.343-.49-.727-.885-1.077-1.135a8.69 8.69 0 0 0 1.202-1.36h-1.771a7.174 7.174 0 0 1-5.227 2.252 7.174 7.174 0 0 1-5.226-2.252z" fill="currentColor" stroke="none"/></symbol> 54 53 </svg>
+3
pkg/hold/admin/admin.go
··· 414 414 // GC POSTs 415 415 r.Post("/admin/api/gc/preview", ui.handleGCPreview) 416 416 r.Post("/admin/api/gc/run", ui.handleGCRun) 417 + r.Post("/admin/api/gc/reconcile", ui.handleGCReconcile) 418 + r.Post("/admin/api/gc/delete-records", ui.handleGCDeleteRecords) 419 + r.Post("/admin/api/gc/delete-blobs", ui.handleGCDeleteBlobs) 417 420 418 421 // API endpoints (for HTMX) 419 422 r.Get("/admin/api/stats", ui.handleStatsAPI)
+77
pkg/hold/admin/handlers_gc.go
··· 99 99 }{Result: result}) 100 100 } 101 101 102 + // handleGCReconcile creates missing layer records without deleting anything 103 + func (ui *AdminUI) handleGCReconcile(w http.ResponseWriter, r *http.Request) { 104 + if ui.gc == nil { 105 + ui.renderTemplate(w, "partials/gc_error.html", struct{ Error string }{"GC not available"}) 106 + return 107 + } 108 + 109 + result, err := ui.gc.Reconcile(r.Context()) 110 + if err != nil { 111 + slog.Error("GC reconcile failed", "error", err) 112 + ui.renderTemplate(w, "partials/gc_error.html", struct{ Error string }{err.Error()}) 113 + return 114 + } 115 + 116 + session := getSessionFromContext(r.Context()) 117 + slog.Info("GC reconcile completed via admin panel", 118 + "recordsReconciled", result.RecordsReconciled, 119 + "duration", result.Duration, 120 + "by", session.DID) 121 + 122 + ui.renderTemplate(w, "partials/gc_result.html", struct { 123 + Result *gc.GCResult 124 + }{Result: result}) 125 + } 126 + 127 + // handleGCDeleteRecords deletes orphaned layer records (no blob deletion) 128 + func (ui *AdminUI) handleGCDeleteRecords(w http.ResponseWriter, r *http.Request) { 129 + if ui.gc == nil { 130 + ui.renderTemplate(w, "partials/gc_error.html", struct{ Error string }{"GC not available"}) 131 + return 132 + } 133 + 134 + result, err := ui.gc.DeleteOrphanedRecords(r.Context()) 135 + if err != nil { 136 + slog.Error("GC delete records failed", "error", err) 137 + ui.renderTemplate(w, "partials/gc_error.html", struct{ Error string }{err.Error()}) 138 + return 139 + } 140 + 141 + session := getSessionFromContext(r.Context()) 142 + slog.Info("GC delete orphaned records completed via admin panel", 143 + "recordsDeleted", result.RecordsDeleted, 144 + "orphanedRecords", result.OrphanedRecords, 145 + "duration", result.Duration, 146 + "by", session.DID) 147 + 148 + ui.renderTemplate(w, "partials/gc_result.html", struct { 149 + Result *gc.GCResult 150 + }{Result: result}) 151 + } 152 + 153 + // handleGCDeleteBlobs walks S3 and deletes unreferenced blobs 154 + func (ui *AdminUI) handleGCDeleteBlobs(w http.ResponseWriter, r *http.Request) { 155 + if ui.gc == nil { 156 + ui.renderTemplate(w, "partials/gc_error.html", struct{ Error string }{"GC not available"}) 157 + return 158 + } 159 + 160 + result, err := ui.gc.DeleteOrphanedBlobs(r.Context()) 161 + if err != nil { 162 + slog.Error("GC delete blobs failed", "error", err) 163 + ui.renderTemplate(w, "partials/gc_error.html", struct{ Error string }{err.Error()}) 164 + return 165 + } 166 + 167 + session := getSessionFromContext(r.Context()) 168 + slog.Info("GC delete orphaned blobs completed via admin panel", 169 + "blobsDeleted", result.BlobsDeleted, 170 + "bytesReclaimed", result.BytesReclaimed, 171 + "duration", result.Duration, 172 + "by", session.DID) 173 + 174 + ui.renderTemplate(w, "partials/gc_result.html", struct { 175 + Result *gc.GCResult 176 + }{Result: result}) 177 + } 178 + 102 179 // timeAgo returns a human-readable relative time string 103 180 func timeAgo(t time.Time) string { 104 181 if t.IsZero() {
-1
pkg/hold/admin/public/icons.svg
··· 49 49 <symbol id="user" viewBox="0 0 24 24"><path d="M19 21v-2a4 4 0 0 0-4-4H9a4 4 0 0 0-4 4v2"/><circle cx="12" cy="7" r="4"/></symbol> 50 50 <symbol id="user-plus" viewBox="0 0 24 24"><path d="M16 21v-2a4 4 0 0 0-4-4H6a4 4 0 0 0-4 4v2"/><circle cx="9" cy="7" r="4"/><line x1="19" x2="19" y1="8" y2="14"/><line x1="22" x2="16" y1="11" y2="11"/></symbol> 51 51 <symbol id="x-circle" viewBox="0 0 24 24"><circle cx="12" cy="12" r="10"/><path d="m15 9-6 6"/><path d="m9 9 6 6"/></symbol> 52 - <symbol id="zap" viewBox="0 0 24 24"><path d="M4 14a1 1 0 0 1-.78-1.63l9.9-10.2a.5.5 0 0 1 .86.46l-1.92 6.02A1 1 0 0 0 13 10h7a1 1 0 0 1 .78 1.63l-9.9 10.2a.5.5 0 0 1-.86-.46l1.92-6.02A1 1 0 0 0 11 14z"/></symbol> 53 52 <symbol id="helm" viewBox="0 0 24 24"><path d="M12.337 0c-.475 0-.861 1.016-.861 2.269 0 .527.069 1.011.183 1.396a8.514 8.514 0 0 0-3.961 1.22 5.229 5.229 0 0 0-.595-1.093c-.606-.866-1.34-1.436-1.79-1.43a.381.381 0 0 0-.217.066c-.39.273-.123 1.326.596 2.353.267.381.559.705.84.948a8.683 8.683 0 0 0-1.528 1.716h1.734a7.179 7.179 0 0 1 5.381-2.421 7.18 7.18 0 0 1 5.382 2.42h1.733a8.687 8.687 0 0 0-1.32-1.53c.35-.249.735-.643 1.078-1.133.719-1.027.986-2.08.596-2.353a.382.382 0 0 0-.217-.065c-.45-.007-1.184.563-1.79 1.43a4.897 4.897 0 0 0-.676 1.325 8.52 8.52 0 0 0-3.899-1.42c.12-.39.193-.887.193-1.429 0-1.253-.386-2.269-.862-2.269zM1.624 9.443v5.162h1.358v-1.968h1.64v1.968h1.357V9.443H4.62v1.838H2.98V9.443zm5.912 0v5.162h3.21v-1.108H8.893v-.95h1.64v-1.142h-1.64v-.84h1.853V9.443zm4.698 0v5.162h3.218v-1.362h-1.86v-3.8zm4.706 0v5.162h1.364v-2.643l1.357 1.225 1.35-1.232v2.65h1.365V9.443h-.614l-2.1 1.914-2.109-1.914zm-11.82 7.28a8.688 8.688 0 0 0 1.412 1.548 5.206 5.206 0 0 0-.841.948c-.719 1.027-.985 2.08-.596 2.353.39.273 1.289-.338 2.007-1.364a5.23 5.23 0 0 0 .595-1.092 8.514 8.514 0 0 0 3.961 1.219 5.01 5.01 0 0 0-.183 1.396c0 1.253.386 2.269.861 2.269.476 0 .862-1.016.862-2.269 0-.542-.072-1.04-.193-1.43a8.52 8.52 0 0 0 3.9-1.42c.121.4.352.865.675 1.327.719 1.026 1.617 1.637 2.007 1.364.39-.273.123-1.326-.596-2.353-.343-.49-.727-.885-1.077-1.135a8.69 8.69 0 0 0 1.202-1.36h-1.771a7.174 7.174 0 0 1-5.227 2.252 7.174 7.174 0 0 1-5.226-2.252z" fill="currentColor" stroke="none"/></symbol> 54 53 </svg>
+30 -6
pkg/hold/admin/templates/partials/gc_preview.html
··· 135 135 </div> 136 136 {{end}} 137 137 138 - <!-- Run GC button (only if there are actions to take) --> 138 + <!-- Individual action buttons --> 139 139 {{if or .Preview.OrphanedRecords .Preview.OrphanedBlobs .Preview.MissingRecords}} 140 - <div class="flex items-center gap-3 mt-6"> 140 + <div class="flex flex-wrap items-center gap-3 mt-6"> 141 + {{if .Preview.MissingRecords}} 142 + <button class="btn btn-warning gap-2" 143 + hx-post="/admin/api/gc/reconcile" 144 + hx-target="#gc-results" 145 + hx-swap="innerHTML" 146 + hx-indicator="#gc-loading"> 147 + {{ icon "file-plus" "size-4" }} 148 + Reconcile {{len .Preview.MissingRecords}} Records 149 + </button> 150 + {{end}} 151 + {{if .Preview.OrphanedRecords}} 141 152 <button class="btn btn-error gap-2" 142 - hx-post="/admin/api/gc/run" 153 + hx-post="/admin/api/gc/delete-records" 143 154 hx-target="#gc-results" 144 155 hx-swap="innerHTML" 145 - hx-confirm="Are you sure you want to run garbage collection?" 156 + hx-confirm="Delete {{len .Preview.OrphanedRecords}} orphaned layer records?" 146 157 hx-indicator="#gc-loading"> 147 - {{ icon "zap" "size-4" }} 148 - Run GC 158 + {{ icon "file-x" "size-4" }} 159 + Delete {{len .Preview.OrphanedRecords}} Orphaned Records 149 160 </button> 161 + {{end}} 162 + {{if .Preview.OrphanedBlobs}} 163 + <button class="btn btn-error gap-2" 164 + hx-post="/admin/api/gc/delete-blobs" 165 + hx-target="#gc-results" 166 + hx-swap="innerHTML" 167 + hx-confirm="Delete {{len .Preview.OrphanedBlobs}} orphaned blobs from S3? This cannot be undone." 168 + hx-indicator="#gc-loading"> 169 + {{ icon "trash-2" "size-4" }} 170 + Delete {{len .Preview.OrphanedBlobs}} Orphaned Blobs 171 + </button> 172 + {{end}} 150 173 </div> 174 + <p class="text-sm text-base-content/50 mt-2">Run Scan again after each operation to see updated counts.</p> 151 175 {{end}} 152 176 153 177 <!-- Nothing to clean -->
+2 -2
pkg/hold/config.go
··· 141 141 Secret string `yaml:"secret" comment:"Shared secret for scanner WebSocket auth. Empty disables scanning."` 142 142 143 143 // Minimum interval between re-scans of the same manifest. 0 disables proactive scanning. 144 - RescanInterval time.Duration `yaml:"rescan_interval" comment:"Minimum interval between re-scans of the same manifest. When set, the hold proactively scans manifests when the scanner is idle. Default: 24h. Set to 0 to disable."` 144 + RescanInterval time.Duration `yaml:"rescan_interval" comment:"Minimum interval between re-scans of the same manifest. When set, the hold proactively scans manifests when the scanner is idle. Default: 168h (7 days). Set to 0 to disable."` 145 145 } 146 146 147 147 // DatabaseConfig defines embedded PDS database settings ··· 223 223 v.SetDefault("gc.enabled", false) 224 224 // Scanner defaults 225 225 v.SetDefault("scanner.secret", "") 226 - v.SetDefault("scanner.rescan_interval", "24h") 226 + v.SetDefault("scanner.rescan_interval", "168h") // 7 days 227 227 228 228 // Log shipper defaults 229 229 v.SetDefault("log_shipper.batch_size", 100)
+112
pkg/hold/gc/gc.go
··· 296 296 return preview, nil 297 297 } 298 298 299 + // Reconcile creates missing layer records without deleting anything. 300 + // Requires a prior Preview() to identify missing records. 301 + func (gc *GarbageCollector) Reconcile(ctx context.Context) (*GCResult, error) { 302 + if !gc.tryStart() { 303 + return nil, fmt.Errorf("GC operation already in progress") 304 + } 305 + defer gc.finish() 306 + 307 + gc.mu.Lock() 308 + preview := gc.lastPreview 309 + gc.mu.Unlock() 310 + 311 + if preview == nil { 312 + return nil, fmt.Errorf("no preview available — run Scan first") 313 + } 314 + if len(preview.MissingRecords) == 0 { 315 + return &GCResult{}, nil 316 + } 317 + 318 + start := time.Now() 319 + result := &GCResult{} 320 + 321 + gc.logger.Info("Starting reconciliation", "missingRecords", len(preview.MissingRecords)) 322 + gc.reconcileMissingRecords(ctx, preview.MissingRecords, result) 323 + result.Duration = time.Since(start) 324 + 325 + gc.mu.Lock() 326 + gc.lastResult = result 327 + gc.lastResultAt = time.Now() 328 + gc.mu.Unlock() 329 + 330 + return result, nil 331 + } 332 + 333 + // DeleteOrphanedRecords deletes layer records whose manifests no longer exist. 334 + // Requires a prior Preview() to identify orphaned records. 335 + func (gc *GarbageCollector) DeleteOrphanedRecords(ctx context.Context) (*GCResult, error) { 336 + if !gc.tryStart() { 337 + return nil, fmt.Errorf("GC operation already in progress") 338 + } 339 + defer gc.finish() 340 + 341 + gc.mu.Lock() 342 + preview := gc.lastPreview 343 + gc.mu.Unlock() 344 + 345 + if preview == nil { 346 + return nil, fmt.Errorf("no preview available — run Scan first") 347 + } 348 + if len(preview.OrphanedRecords) == 0 { 349 + return &GCResult{}, nil 350 + } 351 + 352 + start := time.Now() 353 + result := &GCResult{ 354 + OrphanedRecords: int64(len(preview.OrphanedRecords)), 355 + } 356 + 357 + rkeys := make([]string, len(preview.OrphanedRecords)) 358 + for i, r := range preview.OrphanedRecords { 359 + rkeys[i] = r.Rkey 360 + } 361 + 362 + gc.logger.Info("Deleting orphaned records", "count", len(rkeys)) 363 + if err := gc.deleteOrphanedRecords(ctx, rkeys, result); err != nil { 364 + return nil, fmt.Errorf("delete orphaned records: %w", err) 365 + } 366 + result.Duration = time.Since(start) 367 + 368 + gc.mu.Lock() 369 + gc.lastResult = result 370 + gc.lastResultAt = time.Now() 371 + gc.mu.Unlock() 372 + 373 + return result, nil 374 + } 375 + 376 + // DeleteOrphanedBlobs walks S3 and deletes blobs not referenced by any manifest. 377 + // Runs a fresh analysis to build the current referenced set (reflects any reconciliation 378 + // done since the last preview). 379 + func (gc *GarbageCollector) DeleteOrphanedBlobs(ctx context.Context) (*GCResult, error) { 380 + if !gc.tryStart() { 381 + return nil, fmt.Errorf("GC operation already in progress") 382 + } 383 + defer gc.finish() 384 + 385 + start := time.Now() 386 + result := &GCResult{} 387 + 388 + gc.logger.Info("Starting orphaned blob deletion (fresh analysis)") 389 + 390 + // Fresh analysis so the referenced set includes any records reconciled since preview 391 + analysis, err := gc.analyzeRecords(ctx) 392 + if err != nil { 393 + return nil, fmt.Errorf("analyze records: %w", err) 394 + } 395 + 396 + result.ReferencedBlobs = int64(len(analysis.referenced)) 397 + 398 + if err := gc.deleteOrphanedBlobs(ctx, analysis.referenced, result); err != nil { 399 + return nil, fmt.Errorf("delete orphaned blobs: %w", err) 400 + } 401 + result.Duration = time.Since(start) 402 + 403 + gc.mu.Lock() 404 + gc.lastResult = result 405 + gc.lastResultAt = time.Now() 406 + gc.mu.Unlock() 407 + 408 + return result, nil 409 + } 410 + 299 411 // analyzeRecords performs Phase 1 analysis: builds referenced set, finds orphaned records, 300 412 // and identifies missing layer records. Pure analysis — no mutations. 301 413 // Discovers users, fetches manifests, scans records, identifies missing records.
+2 -2
pkg/hold/oci/xrpc.go
··· 380 380 } 381 381 } 382 382 383 - // Enqueue scan job if scanner is connected 384 - if h.scanBroadcaster != nil { 383 + // Enqueue scan job if scanner is connected (skip manifest lists — children get their own jobs) 384 + if h.scanBroadcaster != nil && !isMultiArch { 385 385 tier := "deckhand" 386 386 if stats != nil && stats.Tier != "" { 387 387 tier = stats.Tier
+146 -48
pkg/hold/pds/scan_broadcaster.go
··· 551 551 "total", msg.Summary.Total) 552 552 } 553 553 554 - // handleError marks a job as failed 554 + // handleError marks a job as failed and creates a scan record so the proactive 555 + // scanner treats it as "stale" rather than "never scanned" (avoids retry loops). 555 556 func (sb *ScanBroadcaster) handleError(sub *ScanSubscriber, msg ScannerMessage) { 556 - _, err := sb.db.Exec(` 557 + ctx := context.Background() 558 + 559 + // Get job details to create failure scan record 560 + var manifestDigest, repository, userDID string 561 + err := sb.db.QueryRow(` 562 + SELECT manifest_digest, repository, user_did 563 + FROM scan_jobs WHERE seq = ? 564 + `, msg.Seq).Scan(&manifestDigest, &repository, &userDID) 565 + if err != nil { 566 + slog.Error("Failed to get job details for failure record", 567 + "seq", msg.Seq, "error", err) 568 + } else { 569 + // Create a scan record with zero counts and nil blobs — marks it as 570 + // "scanned" so the proactive scheduler won't retry until rescan interval 571 + scanRecord := atproto.NewScanRecord( 572 + manifestDigest, repository, userDID, 573 + nil, nil, // no SBOM or vuln report 574 + 0, 0, 0, 0, 0, 575 + "failed: "+truncateError(msg.Error, 200), 576 + ) 577 + if _, _, err := sb.pds.CreateScanRecord(ctx, scanRecord); err != nil { 578 + slog.Error("Failed to store failure scan record", 579 + "seq", msg.Seq, "error", err) 580 + } 581 + } 582 + 583 + // Mark job as failed 584 + _, err = sb.db.Exec(` 557 585 UPDATE scan_jobs SET status = 'failed', completed_at = ? 558 586 WHERE seq = ? 559 587 `, time.Now(), msg.Seq) ··· 567 595 "seq", msg.Seq, 568 596 "subscriberId", sub.id, 569 597 "error", msg.Error) 598 + } 599 + 600 + func truncateError(s string, maxLen int) string { 601 + if len(s) <= maxLen { 602 + return s 603 + } 604 + return s[:maxLen] 570 605 } 571 606 572 607 // drainPendingJobs sends pending/timed-out jobs to a newly connected scanner. ··· 650 685 } 651 686 } 652 687 653 - // reDispatchTimedOut finds jobs that were assigned but not acked/completed within timeout. 688 + // reDispatchTimedOut finds jobs that were assigned but not acked/completed within timeout, 689 + // and also marks stuck processing jobs as failed. 654 690 // Collects timed-out rows first, closes cursor, then resets and re-dispatches 655 691 // to avoid holding a SELECT cursor open during UPDATEs (prevents SQLite BUSY). 656 692 func (sb *ScanBroadcaster) reDispatchTimedOut() { 657 693 timeout := time.Now().Add(-sb.ackTimeout) 658 694 695 + // Fail processing jobs stuck for >10 minutes (scanner likely crashed mid-scan) 696 + processingTimeout := time.Now().Add(-10 * time.Minute) 697 + res, err := sb.db.Exec(` 698 + UPDATE scan_jobs SET status = 'failed', completed_at = ? 699 + WHERE status = 'processing' AND assigned_at < ? 700 + `, time.Now(), processingTimeout) 701 + if err != nil { 702 + slog.Error("Failed to clean up stuck processing jobs", "error", err) 703 + } else if n, _ := res.RowsAffected(); n > 0 { 704 + slog.Warn("Cleaned up stuck processing jobs", "count", n) 705 + } 706 + 659 707 rows, err := sb.db.Query(` 660 708 SELECT seq, manifest_digest, repository, tag, user_did, user_handle, hold_did, hold_endpoint, tier, config_json, layers_json 661 709 FROM scan_jobs ··· 798 846 func (sb *ScanBroadcaster) proactiveScanLoop() { 799 847 defer sb.wg.Done() 800 848 801 - // Wait a bit before starting to let the system settle 849 + // Wait for the system to settle and DID list to populate 802 850 select { 803 851 case <-sb.stopCh: 804 852 return 805 - case <-time.After(30 * time.Second): 853 + case <-time.After(45 * time.Second): 806 854 } 807 855 856 + // Run immediately on startup, then every 60s 857 + slog.Info("Proactive scan loop started") 858 + sb.tryEnqueueProactiveScan() 859 + 808 860 ticker := time.NewTicker(60 * time.Second) 809 861 defer ticker.Stop() 810 862 ··· 824 876 // Uses the cached DID list from the relay (refreshed by refreshManifestDIDsLoop). 825 877 func (sb *ScanBroadcaster) tryEnqueueProactiveScan() { 826 878 if !sb.hasConnectedScanners() { 879 + slog.Debug("Proactive scan: no scanners connected, skipping") 827 880 return 828 881 } 829 882 if sb.hasActiveJobs() { 883 + slog.Debug("Proactive scan: active jobs in queue, skipping") 830 884 return 831 885 } 832 886 ··· 839 893 sb.manifestDIDsMu.RUnlock() 840 894 841 895 if len(userDIDs) == 0 { 896 + slog.Debug("Proactive scan: no manifest DIDs cached from relay, skipping") 842 897 return 843 898 } 844 899 ··· 854 909 } 855 910 } 856 911 912 + // scanCandidate is a manifest that needs scanning, with its scan freshness. 913 + type scanCandidate struct { 914 + manifest atproto.ManifestRecord 915 + userDID string 916 + userHandle string 917 + scannedAt time.Time // zero value = never scanned 918 + } 919 + 857 920 // tryEnqueueForUser fetches manifests from a user's PDS and enqueues a scan for the 858 - // first one that needs scanning. Returns true if a job was enqueued. 921 + // one that most needs it: never-scanned manifests first, then the stalest scan. 922 + // Returns true if a job was enqueued. 859 923 func (sb *ScanBroadcaster) tryEnqueueForUser(ctx context.Context, userDID string) bool { 860 924 // Resolve user DID to PDS endpoint and handle 861 925 did, userHandle, pdsEndpoint, err := atproto.ResolveIdentity(ctx, userDID) ··· 865 929 return false 866 930 } 867 931 868 - // Fetch manifest records from user's PDS 932 + // Collect all scannable manifests with their scan age 933 + var unscanned []scanCandidate 934 + var oldest *scanCandidate 935 + 869 936 client := atproto.NewClient(pdsEndpoint, did, "") 870 937 var cursor string 871 938 for { ··· 879 946 for _, record := range records { 880 947 var manifest atproto.ManifestRecord 881 948 if err := json.Unmarshal(record.Value, &manifest); err != nil { 882 - slog.Debug("Proactive scan: failed to unmarshal manifest record", 883 - "uri", record.URI, "error", err) 884 949 continue 885 950 } 886 951 ··· 898 963 continue 899 964 } 900 965 901 - // Skip if config is nil (shouldn't happen for image manifests, but be safe) 966 + // Skip if config is nil 902 967 if manifest.Config == nil { 903 968 continue 904 969 } 905 970 906 - // Check if already scanned recently 907 - if sb.isRecentlyScanned(ctx, manifest.Digest) { 971 + // Check scan status 972 + _, scanRecord, err := sb.pds.GetScanRecord(ctx, manifest.Digest) 973 + if err != nil { 974 + // No scan record — never scanned 975 + unscanned = append(unscanned, scanCandidate{ 976 + manifest: manifest, 977 + userDID: did, 978 + userHandle: userHandle, 979 + }) 908 980 continue 909 981 } 910 982 911 - // Construct and enqueue scan job 912 - configJSON, _ := json.Marshal(manifest.Config) 913 - layersJSON, _ := json.Marshal(manifest.Layers) 983 + scannedAt, err := time.Parse(time.RFC3339, scanRecord.ScannedAt) 984 + if err != nil { 985 + // Can't parse timestamp — treat as never scanned 986 + unscanned = append(unscanned, scanCandidate{ 987 + manifest: manifest, 988 + userDID: did, 989 + userHandle: userHandle, 990 + }) 991 + continue 992 + } 914 993 915 - slog.Info("Enqueuing proactive scan", 916 - "manifestDigest", manifest.Digest, 917 - "repository", manifest.Repository, 918 - "userDID", did) 994 + // Skip if scanned recently 995 + if time.Since(scannedAt) < sb.rescanInterval { 996 + continue 997 + } 919 998 920 - if err := sb.Enqueue(&ScanJobEvent{ 921 - ManifestDigest: manifest.Digest, 922 - Repository: manifest.Repository, 923 - UserDID: did, 924 - UserHandle: userHandle, 925 - Tier: "deckhand", 926 - Config: configJSON, 927 - Layers: layersJSON, 928 - }); err != nil { 929 - slog.Error("Proactive scan: failed to enqueue", 930 - "manifest", manifest.Digest, "error", err) 931 - return false 999 + // Stale scan — track the oldest 1000 + if oldest == nil || scannedAt.Before(oldest.scannedAt) { 1001 + oldest = &scanCandidate{ 1002 + manifest: manifest, 1003 + userDID: did, 1004 + userHandle: userHandle, 1005 + scannedAt: scannedAt, 1006 + } 932 1007 } 933 - return true 934 1008 } 935 1009 936 1010 if nextCursor == "" || len(records) == 0 { ··· 939 1013 cursor = nextCursor 940 1014 } 941 1015 942 - return false 1016 + // Prefer never-scanned, then oldest stale scan 1017 + var pick *scanCandidate 1018 + if len(unscanned) > 0 { 1019 + pick = &unscanned[0] 1020 + } else if oldest != nil { 1021 + pick = oldest 1022 + } 1023 + 1024 + if pick == nil { 1025 + return false 1026 + } 1027 + 1028 + configJSON, _ := json.Marshal(pick.manifest.Config) 1029 + layersJSON, _ := json.Marshal(pick.manifest.Layers) 1030 + 1031 + reason := "never scanned" 1032 + if !pick.scannedAt.IsZero() { 1033 + reason = fmt.Sprintf("last scanned %s ago", time.Since(pick.scannedAt).Truncate(time.Minute)) 1034 + } 1035 + 1036 + slog.Info("Enqueuing proactive scan", 1037 + "manifestDigest", pick.manifest.Digest, 1038 + "repository", pick.manifest.Repository, 1039 + "userDID", pick.userDID, 1040 + "reason", reason) 1041 + 1042 + if err := sb.Enqueue(&ScanJobEvent{ 1043 + ManifestDigest: pick.manifest.Digest, 1044 + Repository: pick.manifest.Repository, 1045 + UserDID: pick.userDID, 1046 + UserHandle: pick.userHandle, 1047 + Tier: "deckhand", 1048 + Config: configJSON, 1049 + Layers: layersJSON, 1050 + }); err != nil { 1051 + slog.Error("Proactive scan: failed to enqueue", 1052 + "manifest", pick.manifest.Digest, "error", err) 1053 + return false 1054 + } 1055 + return true 943 1056 } 944 1057 945 1058 // isOurManifest checks if a manifest's holdDID matches this hold directly, ··· 1026 1139 } 1027 1140 1028 1141 return false 1029 - } 1030 - 1031 - // isRecentlyScanned checks if a manifest has been scanned within the rescan interval. 1032 - func (sb *ScanBroadcaster) isRecentlyScanned(ctx context.Context, manifestDigest string) bool { 1033 - _, scanRecord, err := sb.pds.GetScanRecord(ctx, manifestDigest) 1034 - if err != nil { 1035 - return false // Not scanned or error reading → needs scanning 1036 - } 1037 - 1038 - scannedAt, err := time.Parse(time.RFC3339, scanRecord.ScannedAt) 1039 - if err != nil { 1040 - return false // Can't parse timestamp → treat as needing scan 1041 - } 1042 - 1043 - return time.Since(scannedAt) < sb.rescanInterval 1044 1142 } 1045 1143 1046 1144 // hasConnectedScanners returns true if at least one scanner is connected.
+2 -2
scanner/go.mod
··· 56 56 github.com/anchore/go-sync v0.0.0-20260122203928-582959aeb913 // indirect 57 57 github.com/anchore/go-version v1.2.2-0.20210903204242-51efa5b487c4 // indirect 58 58 github.com/anchore/packageurl-go v0.1.1-0.20250220190351-d62adb6e1115 // indirect 59 - github.com/anchore/stereoscope v0.1.20 // indirect 59 + github.com/anchore/stereoscope v0.1.20 60 60 github.com/andybalholm/brotli v1.2.0 // indirect 61 61 github.com/apparentlymart/go-textseg/v15 v15.0.0 // indirect 62 62 github.com/aquasecurity/go-pep440-version v0.0.1 // indirect ··· 181 181 github.com/json-iterator/go v1.1.12 // indirect 182 182 github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 // indirect 183 183 github.com/kevinburke/ssh_config v1.4.0 // indirect 184 - github.com/klauspost/compress v1.18.4 184 + github.com/klauspost/compress v1.18.4 // indirect 185 185 github.com/klauspost/cpuid/v2 v2.3.0 // indirect 186 186 github.com/klauspost/pgzip v1.2.6 // indirect 187 187 github.com/knqyf263/go-apk-version v0.0.0-20200609155635-041fdbb8563f // indirect
+4
scanner/internal/config/config.go
··· 54 54 55 55 // Directory for temporary layer extraction. 56 56 TmpDir string `yaml:"tmp_dir" comment:"Directory for temporary layer extraction."` 57 + 58 + // Maximum total compressed image size in bytes. Images exceeding this are skipped. 0 = no limit. 59 + MaxImageSize int64 `yaml:"max_image_size" comment:"Maximum total compressed image size in bytes. 0 = no limit. Default: 2 GiB."` 57 60 } 58 61 59 62 // setScannerDefaults registers all default values on the given Viper instance. ··· 76 79 v.SetDefault("vuln.enabled", true) 77 80 v.SetDefault("vuln.db_path", "/var/lib/atcr-scanner/vulndb") 78 81 v.SetDefault("vuln.tmp_dir", "/var/lib/atcr-scanner/tmp") 82 + v.SetDefault("vuln.max_image_size", 2*1024*1024*1024) // 2 GiB 79 83 80 84 // Log shipper defaults 81 85 v.SetDefault("log_shipper.batch_size", 100)
+128 -155
scanner/internal/scan/extractor.go
··· 1 1 package scan 2 2 3 3 import ( 4 - "archive/tar" 5 - "compress/gzip" 4 + "crypto/sha256" 6 5 "encoding/json" 7 6 "fmt" 8 - "io" 9 7 "log/slog" 10 8 "os" 11 9 "path/filepath" ··· 13 11 14 12 scanner "atcr.io/scanner" 15 13 "atcr.io/scanner/internal/client" 16 - "github.com/klauspost/compress/zstd" 17 14 ) 18 15 19 - // extractLayers downloads and extracts all image layers via presigned URLs 20 - // Returns the rootfs directory path and a cleanup function 21 - func extractLayers(job *scanner.ScanJob, tmpDir, secret string) (string, func(), error) { 16 + // OCI image layout types for constructing the layout on disk. 17 + type ociDescriptor struct { 18 + MediaType string `json:"mediaType"` 19 + Digest string `json:"digest"` 20 + Size int64 `json:"size"` 21 + } 22 + 23 + type ociManifest struct { 24 + SchemaVersion int `json:"schemaVersion"` 25 + MediaType string `json:"mediaType,omitempty"` 26 + Config ociDescriptor `json:"config"` 27 + Layers []ociDescriptor `json:"layers"` 28 + } 29 + 30 + type ociIndex struct { 31 + SchemaVersion int `json:"schemaVersion"` 32 + Manifests []ociDescriptor `json:"manifests"` 33 + } 34 + 35 + // buildOCILayout downloads image blobs and constructs an OCI image layout directory. 36 + // Instead of extracting layers to a rootfs (which requires decompression and causes 37 + // permission/security issues), this writes compressed blobs directly and lets Syft's 38 + // stereoscope handle layer processing internally. 39 + // 40 + // Layout structure: 41 + // 42 + // scan-*/ 43 + // ├── oci-layout 44 + // ├── index.json 45 + // └── blobs/sha256/ 46 + // ├── <manifest-hex> 47 + // ├── <config-hex> 48 + // └── <layer-hex>... 49 + func buildOCILayout(job *scanner.ScanJob, tmpDir, secret string) (string, func(), error) { 22 50 scanDir, err := os.MkdirTemp(tmpDir, "scan-*") 23 51 if err != nil { 24 52 return "", nil, fmt.Errorf("failed to create temp directory: %w", err) ··· 30 58 } 31 59 } 32 60 33 - imageDir := filepath.Join(scanDir, "image") 34 - rootfsDir := filepath.Join(imageDir, "rootfs") 35 - layersDir := filepath.Join(imageDir, "layers") 36 - 37 - for _, dir := range []string{rootfsDir, layersDir} { 38 - if err := os.MkdirAll(dir, 0755); err != nil { 39 - cleanup() 40 - return "", nil, fmt.Errorf("failed to create directory %s: %w", dir, err) 41 - } 61 + blobsDir := filepath.Join(scanDir, "blobs", "sha256") 62 + if err := os.MkdirAll(blobsDir, 0755); err != nil { 63 + cleanup() 64 + return "", nil, fmt.Errorf("failed to create blobs directory: %w", err) 42 65 } 43 66 44 - // Download and validate config blob 67 + // Download config blob 45 68 if job.Config.Digest == "" { 46 69 cleanup() 47 70 return "", nil, fmt.Errorf("config blob has empty digest, cannot download") 48 71 } 49 72 slog.Info("Downloading config blob", "digest", job.Config.Digest) 50 - configPath := filepath.Join(imageDir, "config.json") 51 - if err := downloadBlobViaPresignedURL(job.HoldEndpoint, job.HoldDID, job.Config.Digest, configPath, secret); err != nil { 73 + if err := downloadBlob(job, job.Config.Digest, blobsDir, secret); err != nil { 52 74 cleanup() 53 75 return "", nil, fmt.Errorf("failed to download config blob: %w", err) 54 76 } 55 77 56 - configData, err := os.ReadFile(configPath) 57 - if err != nil { 58 - cleanup() 59 - return "", nil, fmt.Errorf("failed to read config: %w", err) 60 - } 61 - var configObj map[string]interface{} 62 - if err := json.Unmarshal(configData, &configObj); err != nil { 63 - cleanup() 64 - return "", nil, fmt.Errorf("invalid config JSON: %w", err) 65 - } 66 - 67 - // Download and extract each layer 78 + // Download layer blobs (no extraction — kept compressed) 68 79 for i, layer := range job.Layers { 69 80 if layer.Digest == "" { 70 81 slog.Warn("Skipping layer with empty digest", "index", i) 71 82 continue 72 83 } 73 - // Skip non-tar layers (cosign signatures, attestations, etc.) 84 + // Skip non-tar layers (cosign signatures, in-toto attestations, etc.) 74 85 if layer.MediaType != "" && !strings.Contains(layer.MediaType, "tar") { 75 86 slog.Info("Skipping non-tar layer", "index", i, "digest", layer.Digest, "mediaType", layer.MediaType) 76 87 continue 77 88 } 78 - slog.Info("Extracting layer", "index", i, "digest", layer.Digest, "size", layer.Size, "mediaType", layer.MediaType) 79 - 80 - layerPath := filepath.Join(layersDir, fmt.Sprintf("layer-%d", i)) 81 - if err := downloadBlobViaPresignedURL(job.HoldEndpoint, job.HoldDID, layer.Digest, layerPath, secret); err != nil { 89 + slog.Info("Downloading layer", "index", i, "digest", layer.Digest, "size", layer.Size, "mediaType", layer.MediaType) 90 + if err := downloadBlob(job, layer.Digest, blobsDir, secret); err != nil { 82 91 cleanup() 83 92 return "", nil, fmt.Errorf("failed to download layer %d: %w", i, err) 84 93 } 94 + } 85 95 86 - if err := extractLayer(layerPath, rootfsDir, layer.MediaType); err != nil { 87 - cleanup() 88 - return "", nil, fmt.Errorf("failed to extract layer %d: %w", i, err) 96 + // Build OCI manifest from job descriptors 97 + manifest := ociManifest{ 98 + SchemaVersion: 2, 99 + MediaType: "application/vnd.oci.image.manifest.v1+json", 100 + Config: ociDescriptor{ 101 + MediaType: defaultMediaType(job.Config.MediaType, "application/vnd.oci.image.config.v1+json"), 102 + Digest: job.Config.Digest, 103 + Size: job.Config.Size, 104 + }, 105 + Layers: make([]ociDescriptor, 0, len(job.Layers)), 106 + } 107 + for _, layer := range job.Layers { 108 + if layer.Digest == "" { 109 + continue 89 110 } 111 + if layer.MediaType != "" && !strings.Contains(layer.MediaType, "tar") { 112 + continue 113 + } 114 + manifest.Layers = append(manifest.Layers, ociDescriptor{ 115 + MediaType: defaultMediaType(layer.MediaType, "application/vnd.oci.image.layer.v1.tar+gzip"), 116 + Digest: layer.Digest, 117 + Size: layer.Size, 118 + }) 119 + } 90 120 91 - // Remove layer file to save space 92 - os.Remove(layerPath) 121 + // Write manifest blob 122 + manifestJSON, err := json.Marshal(manifest) 123 + if err != nil { 124 + cleanup() 125 + return "", nil, fmt.Errorf("failed to marshal manifest: %w", err) 126 + } 127 + manifestHash := sha256.Sum256(manifestJSON) 128 + manifestDigest := fmt.Sprintf("sha256:%x", manifestHash) 129 + manifestPath := filepath.Join(blobsDir, fmt.Sprintf("%x", manifestHash)) 130 + if err := os.WriteFile(manifestPath, manifestJSON, 0644); err != nil { 131 + cleanup() 132 + return "", nil, fmt.Errorf("failed to write manifest blob: %w", err) 93 133 } 94 134 95 - entries, err := os.ReadDir(rootfsDir) 135 + // Write index.json 136 + index := ociIndex{ 137 + SchemaVersion: 2, 138 + Manifests: []ociDescriptor{ 139 + { 140 + MediaType: "application/vnd.oci.image.manifest.v1+json", 141 + Digest: manifestDigest, 142 + Size: int64(len(manifestJSON)), 143 + }, 144 + }, 145 + } 146 + indexJSON, err := json.Marshal(index) 96 147 if err != nil { 97 - slog.Warn("Failed to read rootfs directory", "error", err) 98 - } else { 99 - slog.Info("Successfully extracted image", 100 - "layers", len(job.Layers), 101 - "topLevelEntries", len(entries)) 148 + cleanup() 149 + return "", nil, fmt.Errorf("failed to marshal index: %w", err) 150 + } 151 + if err := os.WriteFile(filepath.Join(scanDir, "index.json"), indexJSON, 0644); err != nil { 152 + cleanup() 153 + return "", nil, fmt.Errorf("failed to write index.json: %w", err) 154 + } 155 + 156 + // Write oci-layout file 157 + ociLayout := []byte(`{"imageLayoutVersion":"1.0.0"}`) 158 + if err := os.WriteFile(filepath.Join(scanDir, "oci-layout"), ociLayout, 0644); err != nil { 159 + cleanup() 160 + return "", nil, fmt.Errorf("failed to write oci-layout: %w", err) 102 161 } 103 162 104 - return rootfsDir, cleanup, nil 163 + slog.Info("OCI layout built", 164 + "dir", scanDir, 165 + "layers", len(manifest.Layers), 166 + "manifestDigest", manifestDigest) 167 + 168 + return scanDir, cleanup, nil 105 169 } 106 170 107 - // downloadBlobViaPresignedURL gets a presigned URL from the hold and downloads the blob 108 - func downloadBlobViaPresignedURL(holdEndpoint, holdDID, digest, destPath, secret string) error { 109 - presignedURL, err := client.GetBlobPresignedURL(holdEndpoint, holdDID, digest, secret) 171 + // downloadBlob downloads a blob by digest to the blobs directory using presigned URLs. 172 + func downloadBlob(job *scanner.ScanJob, digest, blobsDir, secret string) error { 173 + hex := digestHex(digest) 174 + destPath := filepath.Join(blobsDir, hex) 175 + 176 + presignedURL, err := client.GetBlobPresignedURL(job.HoldEndpoint, job.HoldDID, digest, secret) 110 177 if err != nil { 111 178 return fmt.Errorf("failed to get presigned URL for %s: %w", digest, err) 112 179 } 113 180 return client.DownloadBlob(presignedURL, destPath) 114 181 } 115 182 116 - // extractLayer extracts a layer tar archive to a destination directory (overlayfs style). 117 - // Supports gzip, zstd, and uncompressed tar based on the OCI media type. 118 - // Falls back to header sniffing if the media type is unrecognized. 119 - func extractLayer(layerPath, destDir, mediaType string) error { 120 - file, err := os.Open(layerPath) 121 - if err != nil { 122 - return fmt.Errorf("failed to open layer: %w", err) 183 + // digestHex extracts the hex portion from a digest string (e.g., "sha256:abc123" → "abc123"). 184 + func digestHex(digest string) string { 185 + if _, hex, ok := strings.Cut(digest, ":"); ok { 186 + return hex 123 187 } 124 - defer file.Close() 188 + return digest 189 + } 125 190 126 - var tarReader io.Reader 127 - 128 - switch { 129 - case strings.Contains(mediaType, "zstd"): 130 - decoder, err := zstd.NewReader(file) 131 - if err != nil { 132 - return fmt.Errorf("failed to create zstd reader: %w", err) 133 - } 134 - defer decoder.Close() 135 - tarReader = decoder 136 - 137 - case strings.Contains(mediaType, "gzip") || mediaType == "": 138 - // Default to gzip for unspecified media types (most common) 139 - gzr, err := gzip.NewReader(file) 140 - if err != nil { 141 - // If gzip fails, try plain tar (header sniff fallback) 142 - if _, seekErr := file.Seek(0, io.SeekStart); seekErr != nil { 143 - return fmt.Errorf("failed to create gzip reader: %w", err) 144 - } 145 - slog.Debug("Gzip header invalid, falling back to plain tar", "mediaType", mediaType) 146 - tarReader = file 147 - } else { 148 - defer gzr.Close() 149 - tarReader = gzr 150 - } 151 - 152 - default: 153 - // Uncompressed tar or unknown — try plain tar 154 - tarReader = file 191 + func defaultMediaType(mediaType, fallback string) string { 192 + if mediaType == "" { 193 + return fallback 155 194 } 156 - 157 - tr := tar.NewReader(tarReader) 158 - 159 - for { 160 - header, err := tr.Next() 161 - if err == io.EOF { 162 - break 163 - } 164 - if err != nil { 165 - return fmt.Errorf("failed to read tar header: %w", err) 166 - } 167 - 168 - target := filepath.Join(destDir, filepath.Clean(header.Name)) 169 - 170 - // Security: ensure target is within destDir 171 - if !strings.HasPrefix(target, filepath.Clean(destDir)+string(os.PathSeparator)) { 172 - slog.Warn("Skipping path outside destination", "path", header.Name) 173 - continue 174 - } 175 - 176 - switch header.Typeflag { 177 - case tar.TypeDir: 178 - // Always set owner write bit so we can create files inside (e.g. Go module 179 - // cache dirs are 0555 in images, which would block subsequent writes) 180 - if err := os.MkdirAll(target, os.FileMode(header.Mode)|0200); err != nil { 181 - return fmt.Errorf("failed to create directory %s: %w", target, err) 182 - } 183 - 184 - case tar.TypeReg: 185 - if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil { 186 - return fmt.Errorf("failed to create parent directory: %w", err) 187 - } 188 - outFile, err := os.OpenFile(target, os.O_CREATE|os.O_RDWR|os.O_TRUNC, os.FileMode(header.Mode)) 189 - if err != nil { 190 - return fmt.Errorf("failed to create file %s: %w", target, err) 191 - } 192 - if _, err := io.Copy(outFile, tr); err != nil { 193 - outFile.Close() 194 - return fmt.Errorf("failed to write file %s: %w", target, err) 195 - } 196 - outFile.Close() 197 - 198 - case tar.TypeSymlink: 199 - if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil { 200 - return fmt.Errorf("failed to create parent directory for symlink: %w", err) 201 - } 202 - os.Remove(target) 203 - if err := os.Symlink(header.Linkname, target); err != nil { 204 - slog.Warn("Failed to create symlink", "target", target, "link", header.Linkname, "error", err) 205 - } 206 - 207 - case tar.TypeLink: 208 - linkTarget := filepath.Join(destDir, filepath.Clean(header.Linkname)) 209 - if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil { 210 - return fmt.Errorf("failed to create parent directory for hardlink: %w", err) 211 - } 212 - os.Remove(target) 213 - if err := os.Link(linkTarget, target); err != nil { 214 - slog.Warn("Failed to create hardlink", "target", target, "link", linkTarget, "error", err) 215 - } 216 - 217 - default: 218 - slog.Debug("Skipping unsupported tar entry type", "type", header.Typeflag, "name", header.Name) 219 - } 220 - } 221 - 222 - return nil 195 + return mediaType 223 196 }
+22 -12
scanner/internal/scan/syft.go
··· 5 5 "crypto/sha256" 6 6 "fmt" 7 7 "log/slog" 8 - "os" 9 8 9 + "github.com/anchore/stereoscope/pkg/file" 10 + "github.com/anchore/stereoscope/pkg/image/oci" 10 11 "github.com/anchore/syft/syft" 11 12 "github.com/anchore/syft/syft/format" 12 13 "github.com/anchore/syft/syft/format/spdxjson" 13 14 "github.com/anchore/syft/syft/sbom" 14 - "github.com/anchore/syft/syft/source/directorysource" 15 + "github.com/anchore/syft/syft/source/stereoscopesource" 15 16 ) 16 17 17 - // generateSBOM generates an SBOM using Syft from an extracted image directory 18 - // Returns the SBOM object, SBOM JSON bytes, and its digest 19 - func generateSBOM(ctx context.Context, imageDir string) (*sbom.SBOM, []byte, string, error) { 20 - slog.Info("Generating SBOM with Syft", "imageDir", imageDir) 18 + // generateSBOM generates an SBOM using Syft from an OCI image layout directory. 19 + // Returns the SBOM object, SBOM JSON bytes, and its digest. 20 + func generateSBOM(ctx context.Context, ociLayoutDir string) (*sbom.SBOM, []byte, string, error) { 21 + slog.Info("Generating SBOM with Syft", "ociLayout", ociLayoutDir) 21 22 22 - entries, err := os.ReadDir(imageDir) 23 + // Create stereoscope OCI directory provider 24 + tmpGen := file.NewTempDirGenerator("syft-scan") 25 + defer tmpGen.Cleanup() 26 + 27 + provider := oci.NewDirectoryProvider(tmpGen, ociLayoutDir) 28 + img, err := provider.Provide(ctx) 23 29 if err != nil { 24 - return nil, nil, "", fmt.Errorf("failed to read image directory: %w", err) 30 + return nil, nil, "", fmt.Errorf("failed to load OCI image: %w", err) 25 31 } 26 - slog.Info("Image directory contents", "path", imageDir, "entries", len(entries)) 32 + defer img.Cleanup() 27 33 28 - src, err := directorysource.NewFromPath(imageDir) 29 - if err != nil { 30 - return nil, nil, "", fmt.Errorf("failed to create Syft source: %w", err) 34 + if err := img.Read(); err != nil { 35 + return nil, nil, "", fmt.Errorf("failed to read OCI image: %w", err) 31 36 } 37 + 38 + // Wrap in Syft source 39 + src := stereoscopesource.New(img, stereoscopesource.ImageConfig{ 40 + Reference: ociLayoutDir, 41 + }) 32 42 defer src.Close() 33 43 34 44 slog.Info("Running Syft cataloging")
+22 -6
scanner/internal/scan/worker.go
··· 45 45 }() 46 46 } 47 47 48 + // Point TMPDIR at the configured tmp dir so stereoscope's internal 49 + // layer extraction uses the same partition (not /tmp which may be small) 50 + os.Setenv("TMPDIR", wp.cfg.Vuln.TmpDir) 51 + 48 52 for i := 0; i < wp.cfg.Scanner.Workers; i++ { 49 53 wp.wg.Add(1) 50 54 go wp.worker(ctx, i) ··· 104 108 return nil, fmt.Errorf("failed to create tmp dir: %w", err) 105 109 } 106 110 107 - // Step 1: Extract image layers from hold via presigned URLs 108 - slog.Info("Extracting image layers", "repository", job.Repository) 109 - imageDir, cleanup, err := extractLayers(job, wp.cfg.Vuln.TmpDir, wp.cfg.Hold.Secret) 111 + // Check total compressed image size before downloading 112 + if wp.cfg.Vuln.MaxImageSize > 0 { 113 + var totalSize int64 114 + for _, layer := range job.Layers { 115 + totalSize += layer.Size 116 + } 117 + totalSize += job.Config.Size 118 + if totalSize > wp.cfg.Vuln.MaxImageSize { 119 + return nil, fmt.Errorf("image too large: %d bytes compressed (limit %d bytes)", totalSize, wp.cfg.Vuln.MaxImageSize) 120 + } 121 + } 122 + 123 + // Step 1: Build OCI image layout from hold via presigned URLs 124 + slog.Info("Building OCI layout", "repository", job.Repository) 125 + ociLayoutDir, cleanup, err := buildOCILayout(job, wp.cfg.Vuln.TmpDir, wp.cfg.Hold.Secret) 110 126 if err != nil { 111 - return nil, fmt.Errorf("failed to extract layers: %w", err) 127 + return nil, fmt.Errorf("failed to build OCI layout: %w", err) 112 128 } 113 129 defer cleanup() 114 130 115 131 // Step 2: Generate SBOM with Syft 116 132 slog.Info("Generating SBOM", "repository", job.Repository) 117 - sbomResult, sbomJSON, sbomDigest, err := generateSBOM(ctx, imageDir) 133 + sbomResult, sbomJSON, sbomDigest, err := generateSBOM(ctx, ociLayoutDir) 118 134 if err != nil { 119 135 return nil, fmt.Errorf("failed to generate SBOM: %w", err) 120 136 } ··· 127 143 128 144 // Step 3: Scan SBOM with Grype (if enabled) 129 145 if wp.cfg.Vuln.Enabled { 130 - slog.Info("Scanning for vulnerabilities", "repository", job.Repository) 146 + slog.Info("Scanning for vulnerabilities", "repository", job.Repository, "handle", job.UserHandle) 131 147 vulnJSON, vulnDigest, summary, err := scanVulnerabilities(ctx, sbomResult, wp.cfg.Vuln.DBPath) 132 148 if err != nil { 133 149 return nil, fmt.Errorf("failed to scan vulnerabilities: %w", err)