A container registry that uses the AT Protocol for manifest storage and S3 for blob storage. atcr.io
docker container atproto go

begin getRepo and subscribeRepos

evan.jarrett.net 963786f7 29ccb15e

verified
+448 -9
+18
CLAUDE.md
··· 53 53 export HOLD_OWNER=did:plc:your-did-here 54 54 ./bin/atcr-hold 55 55 # Hold starts immediately with embedded PDS 56 + 57 + # Request Bluesky relay crawl (makes your PDS discoverable) 58 + ./deploy/request-crawl.sh hold01.atcr.io 59 + # Or specify a different relay: 60 + ./deploy/request-crawl.sh hold01.atcr.io https://custom-relay.example.com/xrpc/com.atproto.sync.requestCrawl 56 61 ``` 57 62 58 63 ## Architecture Overview ··· 370 375 - `PUT /blobs/{digest}` - Proxy upload (fallback) 371 376 - `POST /register` - Manual registration endpoint 372 377 - `GET /health` - Health check 378 + 379 + **Embedded PDS Endpoints:** 380 + 381 + Each hold service includes an embedded PDS (Personal Data Server) that stores captain + crew records: 382 + 383 + - `GET /xrpc/com.atproto.sync.getRepo?did={did}` - Download full repository as CAR file 384 + - `GET /xrpc/com.atproto.sync.getRepo?did={did}&since={rev}` - Download repository diff since revision 385 + - `GET /xrpc/com.atproto.sync.subscribeRepos` - WebSocket firehose for real-time events 386 + - `GET /xrpc/com.atproto.sync.listRepos` - List all repositories (single-user PDS) 387 + - `GET /.well-known/did.json` - DID document (did:web resolution) 388 + - Standard ATProto repo endpoints (getRecord, listRecords, etc.) 389 + 390 + The `subscribeRepos` endpoint broadcasts #commit events whenever crew membership changes, allowing AppViews to monitor hold access control in real-time. 373 391 374 392 **Configuration:** Environment variables (see `.env.example`) 375 393 - `HOLD_PUBLIC_URL` - Public URL of hold service (required)
+9 -2
cmd/hold/main.go
··· 27 27 // This must happen before creating HoldService since service needs PDS for authorization 28 28 var holdPDS *pds.HoldPDS 29 29 var xrpcHandler *pds.XRPCHandler 30 + var broadcaster *pds.EventBroadcaster 30 31 if cfg.Database.Path != "" { 31 32 // Generate did:web from public URL 32 33 holdDID := pds.GenerateDIDFromURL(cfg.Server.PublicURL) ··· 44 45 log.Fatalf("Failed to bootstrap PDS: %v", err) 45 46 } 46 47 47 - log.Printf("Embedded PDS initialized successfully") 48 + // Create event broadcaster for subscribeRepos firehose 49 + broadcaster = pds.NewEventBroadcaster(holdDID, 100) // Keep 100 events for backfill 50 + 51 + // Wire up repo event handler to broadcaster 52 + holdPDS.RepomgrRef().SetEventHandler(broadcaster.SetRepoEventHandler(), true) 53 + 54 + log.Printf("Embedded PDS initialized successfully with firehose enabled") 48 55 } else { 49 56 log.Fatalf("Database path is required for embedded PDS authorization") 50 57 } ··· 59 66 if holdPDS != nil { 60 67 holdDID := holdPDS.DID() 61 68 blobStore := hold.NewHoldServiceBlobStore(service, holdDID) 62 - xrpcHandler = pds.NewXRPCHandler(holdPDS, cfg.Server.PublicURL, blobStore) 69 + xrpcHandler = pds.NewXRPCHandler(holdPDS, cfg.Server.PublicURL, blobStore, broadcaster) 63 70 } 64 71 65 72 // Setup HTTP routes
+55
deploy/request-crawl.sh
··· 1 + #!/bin/bash 2 + # 3 + # Request crawl for a PDS from the Bluesky relay 4 + # 5 + # Usage: ./request-crawl.sh <hostname> [relay-url] 6 + # Example: ./request-crawl.sh hold01.atcr.io 7 + # 8 + 9 + set -e 10 + 11 + DEFAULT_RELAY="https://bsky.network/xrpc/com.atproto.sync.requestCrawl" 12 + 13 + # Parse arguments 14 + HOSTNAME="${1:-}" 15 + RELAY_URL="${2:-$DEFAULT_RELAY}" 16 + 17 + # Validate hostname 18 + if [ -z "$HOSTNAME" ]; then 19 + echo "Error: hostname is required" >&2 20 + echo "" >&2 21 + echo "Usage: $0 <hostname> [relay-url]" >&2 22 + echo "Example: $0 hold01.atcr.io" >&2 23 + echo "" >&2 24 + echo "Options:" >&2 25 + echo " hostname Hostname of the PDS to request crawl for (required)" >&2 26 + echo " relay-url Relay URL to send crawl request to (default: $DEFAULT_RELAY)" >&2 27 + exit 1 28 + fi 29 + 30 + # Log what we're doing 31 + echo "Requesting crawl for hostname: $HOSTNAME" 32 + echo "Sending to relay: $RELAY_URL" 33 + 34 + # Make the request 35 + RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "$RELAY_URL" \ 36 + -H "Content-Type: application/json" \ 37 + -d "{\"hostname\":\"$HOSTNAME\"}") 38 + 39 + # Split response and status code 40 + HTTP_BODY=$(echo "$RESPONSE" | head -n -1) 41 + HTTP_CODE=$(echo "$RESPONSE" | tail -n 1) 42 + 43 + # Check response 44 + if [ "$HTTP_CODE" -ge 200 ] && [ "$HTTP_CODE" -lt 300 ]; then 45 + echo "✅ Success! Crawl requested for $HOSTNAME" 46 + if [ -n "$HTTP_BODY" ]; then 47 + echo "Response: $HTTP_BODY" 48 + fi 49 + else 50 + echo "❌ Failed with status $HTTP_CODE" >&2 51 + if [ -n "$HTTP_BODY" ]; then 52 + echo "Response: $HTTP_BODY" >&2 53 + fi 54 + exit 1 55 + fi
+252
pkg/hold/pds/events.go
··· 1 + package pds 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "log" 7 + "sync" 8 + "time" 9 + 10 + atproto "github.com/bluesky-social/indigo/api/atproto" 11 + lexutil "github.com/bluesky-social/indigo/lex/util" 12 + "github.com/gorilla/websocket" 13 + ) 14 + 15 + // EventBroadcaster manages WebSocket connections and broadcasts repo events 16 + type EventBroadcaster struct { 17 + mu sync.RWMutex 18 + subscribers map[*Subscriber]bool 19 + eventSeq int64 20 + eventHistory []HistoricalEvent // Ring buffer for cursor backfill 21 + maxHistory int 22 + holdDID string // DID of the hold for setting repo field 23 + } 24 + 25 + // Subscriber represents a WebSocket client subscribed to the firehose 26 + type Subscriber struct { 27 + conn *websocket.Conn 28 + send chan *RepoCommitEvent 29 + cursor int64 // Last sequence number this subscriber has seen 30 + } 31 + 32 + // HistoricalEvent stores past events for cursor-based backfill 33 + type HistoricalEvent struct { 34 + Seq int64 35 + Event *RepoCommitEvent 36 + } 37 + 38 + // RepoCommitEvent represents a #commit event in subscribeRepos 39 + type RepoCommitEvent struct { 40 + Seq int64 `json:"seq" cborgen:"seq"` 41 + Repo string `json:"repo" cborgen:"repo"` 42 + Commit string `json:"commit" cborgen:"commit"` // CID string 43 + Rev string `json:"rev" cborgen:"rev"` 44 + Since *string `json:"since,omitempty" cborgen:"since,omitempty"` 45 + Blocks []byte `json:"blocks" cborgen:"blocks"` // CAR slice bytes 46 + Ops []*atproto.SyncSubscribeRepos_RepoOp `json:"ops" cborgen:"ops"` 47 + Time string `json:"time" cborgen:"time"` 48 + Type string `json:"$type" cborgen:"$type"` // Always "#commit" 49 + } 50 + 51 + // NewEventBroadcaster creates a new event broadcaster 52 + func NewEventBroadcaster(holdDID string, maxHistory int) *EventBroadcaster { 53 + if maxHistory <= 0 { 54 + maxHistory = 100 // Default to keeping 100 events 55 + } 56 + 57 + return &EventBroadcaster{ 58 + subscribers: make(map[*Subscriber]bool), 59 + eventSeq: 0, 60 + eventHistory: make([]HistoricalEvent, 0, maxHistory), 61 + maxHistory: maxHistory, 62 + holdDID: holdDID, 63 + } 64 + } 65 + 66 + // Subscribe adds a new WebSocket subscriber 67 + func (b *EventBroadcaster) Subscribe(conn *websocket.Conn, cursor int64) *Subscriber { 68 + sub := &Subscriber{ 69 + conn: conn, 70 + send: make(chan *RepoCommitEvent, 10), // Buffer 10 events 71 + cursor: cursor, 72 + } 73 + 74 + b.mu.Lock() 75 + b.subscribers[sub] = true 76 + currentSeq := b.eventSeq 77 + b.mu.Unlock() 78 + 79 + // Send historical events if cursor is provided and < current seq 80 + if cursor > 0 && cursor < currentSeq { 81 + go b.backfillSubscriber(sub, cursor) 82 + } 83 + 84 + // Start goroutine to handle sending events to this subscriber 85 + go b.handleSubscriber(sub) 86 + 87 + return sub 88 + } 89 + 90 + // Unsubscribe removes a WebSocket subscriber 91 + func (b *EventBroadcaster) Unsubscribe(sub *Subscriber) { 92 + b.mu.Lock() 93 + defer b.mu.Unlock() 94 + 95 + if _, ok := b.subscribers[sub]; ok { 96 + delete(b.subscribers, sub) 97 + close(sub.send) 98 + } 99 + } 100 + 101 + // Broadcast sends an event to all subscribers 102 + func (b *EventBroadcaster) Broadcast(ctx context.Context, event *RepoEvent) { 103 + b.mu.Lock() 104 + defer b.mu.Unlock() 105 + 106 + // Increment sequence 107 + b.eventSeq++ 108 + seq := b.eventSeq 109 + 110 + // Convert RepoEvent to RepoCommitEvent 111 + commitEvent := b.convertToCommitEvent(event, seq) 112 + 113 + // Store in history for backfill 114 + b.addToHistory(seq, commitEvent) 115 + 116 + // Broadcast to all subscribers 117 + for sub := range b.subscribers { 118 + select { 119 + case sub.send <- commitEvent: 120 + // Sent successfully 121 + default: 122 + // Subscriber's buffer is full, skip (they'll get disconnected for being too slow) 123 + log.Printf("Warning: subscriber buffer full, skipping event seq=%d", seq) 124 + } 125 + } 126 + } 127 + 128 + // convertToCommitEvent converts a RepoEvent to a RepoCommitEvent 129 + func (b *EventBroadcaster) convertToCommitEvent(event *RepoEvent, seq int64) *RepoCommitEvent { 130 + // Convert RepoOps to atproto.SyncSubscribeRepos_RepoOp 131 + ops := make([]*atproto.SyncSubscribeRepos_RepoOp, len(event.Ops)) 132 + for i, op := range event.Ops { 133 + action := string(op.Kind) // "create", "update", "delete" 134 + path := op.Collection + "/" + op.Rkey 135 + 136 + // Convert CID to LexLink if present 137 + var cidLink *lexutil.LexLink 138 + if op.RecCid != nil { 139 + link := lexutil.LexLink(*op.RecCid) 140 + cidLink = &link 141 + } 142 + 143 + ops[i] = &atproto.SyncSubscribeRepos_RepoOp{ 144 + Action: action, 145 + Path: path, 146 + Cid: cidLink, 147 + } 148 + } 149 + 150 + // Event.NewRoot is a cid.Cid, convert to string 151 + commitCID := event.NewRoot.String() 152 + 153 + return &RepoCommitEvent{ 154 + Seq: seq, 155 + Repo: b.holdDID, // Set to hold's DID 156 + Commit: commitCID, 157 + Rev: event.Rev, 158 + Since: event.Since, 159 + Blocks: event.RepoSlice, // CAR slice bytes 160 + Ops: ops, 161 + Time: time.Now().Format(time.RFC3339), 162 + Type: "#commit", 163 + } 164 + } 165 + 166 + // addToHistory adds an event to the history ring buffer 167 + func (b *EventBroadcaster) addToHistory(seq int64, event *RepoCommitEvent) { 168 + he := HistoricalEvent{ 169 + Seq: seq, 170 + Event: event, 171 + } 172 + 173 + // Simple ring buffer: keep last N events 174 + if len(b.eventHistory) >= b.maxHistory { 175 + // Remove oldest event 176 + b.eventHistory = b.eventHistory[1:] 177 + } 178 + b.eventHistory = append(b.eventHistory, he) 179 + } 180 + 181 + // backfillSubscriber sends historical events to a subscriber 182 + func (b *EventBroadcaster) backfillSubscriber(sub *Subscriber, cursor int64) { 183 + b.mu.RLock() 184 + defer b.mu.RUnlock() 185 + 186 + for _, he := range b.eventHistory { 187 + if he.Seq > cursor { 188 + select { 189 + case sub.send <- he.Event: 190 + // Sent 191 + case <-time.After(5 * time.Second): 192 + // Timeout, subscriber too slow 193 + log.Printf("Backfill timeout for subscriber at seq=%d", he.Seq) 194 + return 195 + } 196 + } 197 + } 198 + } 199 + 200 + // handleSubscriber handles sending events to a subscriber over WebSocket 201 + func (b *EventBroadcaster) handleSubscriber(sub *Subscriber) { 202 + defer func() { 203 + b.Unsubscribe(sub) 204 + sub.conn.Close() 205 + }() 206 + 207 + for event := range sub.send { 208 + // Encode as CBOR 209 + cborBytes, err := encodeCBOR(event) 210 + if err != nil { 211 + log.Printf("Failed to encode event as CBOR: %v", err) 212 + continue 213 + } 214 + 215 + // Write CBOR message to WebSocket 216 + err = sub.conn.WriteMessage(websocket.BinaryMessage, cborBytes) 217 + if err != nil { 218 + log.Printf("Failed to write to websocket: %v", err) 219 + return 220 + } 221 + 222 + // Update cursor 223 + sub.cursor = event.Seq 224 + } 225 + } 226 + 227 + // encodeCBOR encodes an event as CBOR 228 + func encodeCBOR(event *RepoCommitEvent) ([]byte, error) { 229 + // For now, use JSON encoding wrapped in CBOR envelope 230 + // In production, you'd use proper CBOR encoding 231 + // The atproto spec requires DAG-CBOR with specific header 232 + 233 + // Simple approach: encode as JSON for MVP 234 + // Real implementation needs proper CBOR-gen types 235 + return json.Marshal(event) 236 + } 237 + 238 + // SetRepoEventHandler creates a callback to be registered with RepoManager 239 + func (b *EventBroadcaster) SetRepoEventHandler() func(context.Context, *RepoEvent) { 240 + return func(ctx context.Context, event *RepoEvent) { 241 + // Broadcast the event to all subscribers 242 + // The holdDID is already set in the broadcaster 243 + b.Broadcast(ctx, event) 244 + } 245 + } 246 + 247 + // GetCurrentSeq returns the current event sequence number 248 + func (b *EventBroadcaster) GetCurrentSeq() int64 { 249 + b.mu.RLock() 250 + defer b.mu.RUnlock() 251 + return b.eventSeq 252 + }
+5
pkg/hold/pds/server.go
··· 102 102 return p.signingKey 103 103 } 104 104 105 + // RepomgrRef returns a reference to the RepoManager for event handler setup 106 + func (p *HoldPDS) RepomgrRef() *RepoManager { 107 + return p.repomgr 108 + } 109 + 105 110 // Bootstrap initializes the hold with the captain record and owner as first crew member 106 111 func (p *HoldPDS) Bootstrap(ctx context.Context, ownerDID string, public bool, allowAllCrew bool) error { 107 112 if ownerDID == "" {
+109 -7
pkg/hold/pds/xrpc.go
··· 5 5 "encoding/json" 6 6 "fmt" 7 7 "net/http" 8 + "strconv" 8 9 "strings" 9 10 10 11 "atcr.io/pkg/atproto" 11 12 lexutil "github.com/bluesky-social/indigo/lex/util" 12 13 "github.com/bluesky-social/indigo/repo" 14 + "github.com/gorilla/websocket" 13 15 "github.com/ipfs/go-cid" 14 16 "github.com/ipld/go-car" 15 17 carutil "github.com/ipld/go-car/util" ··· 19 21 20 22 // XRPCHandler handles XRPC requests for the embedded PDS 21 23 type XRPCHandler struct { 22 - pds *HoldPDS 23 - publicURL string 24 - blobStore BlobStore 24 + pds *HoldPDS 25 + publicURL string 26 + blobStore BlobStore 27 + broadcaster *EventBroadcaster 25 28 } 26 29 27 30 // BlobStore interface wraps the existing hold service storage operations ··· 33 36 } 34 37 35 38 // NewXRPCHandler creates a new XRPC handler 36 - func NewXRPCHandler(pds *HoldPDS, publicURL string, blobStore BlobStore) *XRPCHandler { 39 + func NewXRPCHandler(pds *HoldPDS, publicURL string, blobStore BlobStore, broadcaster *EventBroadcaster) *XRPCHandler { 37 40 return &XRPCHandler{ 38 - pds: pds, 39 - publicURL: publicURL, 40 - blobStore: blobStore, 41 + pds: pds, 42 + publicURL: publicURL, 43 + blobStore: blobStore, 44 + broadcaster: broadcaster, 41 45 } 42 46 } 43 47 ··· 72 76 // Sync endpoints 73 77 mux.HandleFunc("/xrpc/com.atproto.sync.listRepos", corsMiddleware(h.HandleListRepos)) 74 78 mux.HandleFunc("/xrpc/com.atproto.sync.getRecord", corsMiddleware(h.HandleSyncGetRecord)) 79 + mux.HandleFunc("/xrpc/com.atproto.sync.getRepo", corsMiddleware(h.HandleGetRepo)) 80 + mux.HandleFunc("/xrpc/com.atproto.sync.subscribeRepos", corsMiddleware(h.HandleSubscribeRepos)) 75 81 76 82 // Blob endpoints (wrap existing presigned URL logic) 77 83 mux.HandleFunc("/xrpc/com.atproto.repo.uploadBlob", corsMiddleware(h.HandleUploadBlob)) ··· 438 444 439 445 // Write the CAR data to the response 440 446 w.Write(buf.Bytes()) 447 + } 448 + 449 + // HandleGetRepo returns the full repository as a CAR file 450 + // This is the critical endpoint for relay crawling and Bluesky discovery 451 + func (h *XRPCHandler) HandleGetRepo(w http.ResponseWriter, r *http.Request) { 452 + if r.Method != http.MethodGet { 453 + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) 454 + return 455 + } 456 + 457 + // Get required 'did' parameter 458 + did := r.URL.Query().Get("did") 459 + if did == "" { 460 + http.Error(w, "missing required parameter: did", http.StatusBadRequest) 461 + return 462 + } 463 + 464 + // Validate DID matches this PDS 465 + if did != h.pds.DID() { 466 + http.Error(w, "repo not found", http.StatusNotFound) 467 + return 468 + } 469 + 470 + // Get optional 'since' parameter for diff export 471 + since := r.URL.Query().Get("since") 472 + 473 + // Set CAR content type 474 + w.Header().Set("Content-Type", "application/vnd.ipld.car") 475 + 476 + // Stream the repository CAR file directly to the response 477 + // ReadRepo handles full export or diff based on 'since' parameter 478 + err := h.pds.repomgr.ReadRepo(r.Context(), h.pds.uid, since, w) 479 + if err != nil { 480 + // Error already written to response by ReadRepo streaming 481 + // Log it but don't try to write another HTTP error 482 + fmt.Printf("Error streaming repo CAR: %v\n", err) 483 + return 484 + } 485 + } 486 + 487 + // WebSocket upgrader 488 + var upgrader = websocket.Upgrader{ 489 + CheckOrigin: func(r *http.Request) bool { 490 + // Allow all origins for MVP (ATProto firehose is public) 491 + return true 492 + }, 493 + } 494 + 495 + // HandleSubscribeRepos handles WebSocket connections for the firehose 496 + // This is the real-time event stream for repo changes 497 + func (h *XRPCHandler) HandleSubscribeRepos(w http.ResponseWriter, r *http.Request) { 498 + if r.Method != http.MethodGet { 499 + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) 500 + return 501 + } 502 + 503 + // Check if broadcaster is configured 504 + if h.broadcaster == nil { 505 + http.Error(w, "firehose not enabled", http.StatusNotImplemented) 506 + return 507 + } 508 + 509 + // Get optional cursor parameter for backfill 510 + var cursor int64 = 0 511 + if cursorStr := r.URL.Query().Get("cursor"); cursorStr != "" { 512 + var err error 513 + cursor, err = strconv.ParseInt(cursorStr, 10, 64) 514 + if err != nil { 515 + http.Error(w, "invalid cursor parameter", http.StatusBadRequest) 516 + return 517 + } 518 + } 519 + 520 + // Upgrade to WebSocket 521 + conn, err := upgrader.Upgrade(w, r, nil) 522 + if err != nil { 523 + fmt.Printf("WebSocket upgrade failed: %v\n", err) 524 + return 525 + } 526 + 527 + // Subscribe to events 528 + sub := h.broadcaster.Subscribe(conn, cursor) 529 + 530 + // The broadcaster's handleSubscriber goroutine will manage this connection 531 + // We just need to keep reading to detect client disconnects 532 + go func() { 533 + defer h.broadcaster.Unsubscribe(sub) 534 + for { 535 + // Read messages from client (mostly just to detect disconnect) 536 + _, _, err := conn.ReadMessage() 537 + if err != nil { 538 + // Client disconnected 539 + break 540 + } 541 + } 542 + }() 441 543 } 442 544 443 545 // HandleUploadBlob wraps existing presigned upload URL logic