A community based topic aggregation platform built on atproto

feat(jetstream): add aggregator firehose consumer

Implement Jetstream consumer to index aggregator service declarations
and authorization records from the firehose in real-time.

aggregator_consumer.go:
- Handles social.coves.aggregator.service records (create/update/delete)
- Handles social.coves.aggregator.authorization records (create/update/delete)
- Upsert logic for both create and update operations
- Delete by URI for authorization cleanup
- Validation:
* Service rkey must be "self" (canonical location)
* communityDid in authorization must match repo DID (prevents forgery)
* did in service must match repo DID (prevents DID spoofing)
* Required fields validation
- Avatar blob extraction from atProto blob ref
- createdAt parsing from RFC3339 with fallback

aggregator_jetstream_connector.go:
- WebSocket connection management with auto-reconnect
- Ping/pong keepalive
- Graceful error handling (continues on parsing errors)
- Filters for wanted collections

Jetstream URL:
ws://localhost:6008/subscribe?wantedCollections=social.coves.aggregator.service&wantedCollections=social.coves.aggregator.authorization

Indexed to database:
- aggregators table (stats auto-updated via triggers)
- aggregator_authorizations table (unique constraint on aggregator+community)

Security:
- DID validation prevents impersonation
- communityDid validation prevents authorization forgery
- Graceful error handling prevents consumer crashes

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

+483
+347
internal/atproto/jetstream/aggregator_consumer.go
··· 1 + package jetstream 2 + 3 + import ( 4 + "Coves/internal/core/aggregators" 5 + "context" 6 + "encoding/json" 7 + "fmt" 8 + "log" 9 + "time" 10 + ) 11 + 12 + // AggregatorEventConsumer consumes aggregator-related events from Jetstream 13 + // Following Bluesky's pattern: feed generators (app.bsky.feed.generator) and labelers (app.bsky.labeler.service) 14 + type AggregatorEventConsumer struct { 15 + repo aggregators.Repository // Repository for aggregator operations 16 + } 17 + 18 + // NewAggregatorEventConsumer creates a new Jetstream consumer for aggregator events 19 + func NewAggregatorEventConsumer(repo aggregators.Repository) *AggregatorEventConsumer { 20 + return &AggregatorEventConsumer{ 21 + repo: repo, 22 + } 23 + } 24 + 25 + // HandleEvent processes a Jetstream event for aggregator records 26 + // This is called by the main Jetstream consumer when it receives commit events 27 + func (c *AggregatorEventConsumer) HandleEvent(ctx context.Context, event *JetstreamEvent) error { 28 + // We only care about commit events for aggregator records 29 + if event.Kind != "commit" || event.Commit == nil { 30 + return nil 31 + } 32 + 33 + commit := event.Commit 34 + 35 + // Route to appropriate handler based on collection 36 + // IMPORTANT: Collection names refer to RECORD TYPES in repositories 37 + // - social.coves.aggregator.service: Service declaration (in aggregator's own repo, rkey="self") 38 + // - social.coves.aggregator.authorization: Authorization (in community's repo, any rkey) 39 + switch commit.Collection { 40 + case "social.coves.aggregator.service": 41 + return c.handleServiceDeclaration(ctx, event.Did, commit) 42 + case "social.coves.aggregator.authorization": 43 + return c.handleAuthorization(ctx, event.Did, commit) 44 + default: 45 + // Not an aggregator-related collection 46 + return nil 47 + } 48 + } 49 + 50 + // handleServiceDeclaration processes aggregator service declaration events 51 + // Service declarations are stored at: at://aggregator_did/social.coves.aggregator.service/self 52 + func (c *AggregatorEventConsumer) handleServiceDeclaration(ctx context.Context, did string, commit *CommitEvent) error { 53 + switch commit.Operation { 54 + case "create", "update": 55 + // Both create and update are handled the same way (upsert) 56 + return c.upsertAggregator(ctx, did, commit) 57 + case "delete": 58 + return c.deleteAggregator(ctx, did) 59 + default: 60 + log.Printf("Unknown operation for aggregator service: %s", commit.Operation) 61 + return nil 62 + } 63 + } 64 + 65 + // handleAuthorization processes authorization record events 66 + // Authorizations are stored at: at://community_did/social.coves.aggregator.authorization/{rkey} 67 + func (c *AggregatorEventConsumer) handleAuthorization(ctx context.Context, communityDID string, commit *CommitEvent) error { 68 + switch commit.Operation { 69 + case "create", "update": 70 + // Both create and update are handled the same way (upsert) 71 + return c.upsertAuthorization(ctx, communityDID, commit) 72 + case "delete": 73 + return c.deleteAuthorization(ctx, communityDID, commit) 74 + default: 75 + log.Printf("Unknown operation for aggregator authorization: %s", commit.Operation) 76 + return nil 77 + } 78 + } 79 + 80 + // upsertAggregator indexes or updates an aggregator service declaration 81 + func (c *AggregatorEventConsumer) upsertAggregator(ctx context.Context, did string, commit *CommitEvent) error { 82 + if commit.Record == nil { 83 + return fmt.Errorf("aggregator service event missing record data") 84 + } 85 + 86 + // Verify rkey is "self" (canonical location for service declaration) 87 + // Following Bluesky's pattern: app.bsky.feed.generator and app.bsky.labeler.service use /self 88 + if commit.RKey != "self" { 89 + return fmt.Errorf("invalid aggregator service rkey: expected 'self', got '%s'", commit.RKey) 90 + } 91 + 92 + // Parse the service declaration record 93 + service, err := parseAggregatorService(commit.Record) 94 + if err != nil { 95 + return fmt.Errorf("failed to parse aggregator service: %w", err) 96 + } 97 + 98 + // Validate DID matches repo DID (security check) 99 + if service.DID != "" && service.DID != did { 100 + return fmt.Errorf("service record DID (%s) does not match repo DID (%s)", service.DID, did) 101 + } 102 + 103 + // Build AT-URI for this record 104 + uri := fmt.Sprintf("at://%s/social.coves.aggregator.service/self", did) 105 + 106 + // Parse createdAt from service record 107 + var createdAt time.Time 108 + if service.CreatedAt != "" { 109 + createdAt, err = time.Parse(time.RFC3339, service.CreatedAt) 110 + if err != nil { 111 + createdAt = time.Now() // Fallback 112 + log.Printf("Warning: invalid createdAt format for aggregator %s: %v", did, err) 113 + } 114 + } else { 115 + createdAt = time.Now() 116 + } 117 + 118 + // Extract avatar CID from blob if present 119 + var avatarCID string 120 + if service.Avatar != nil { 121 + if cid, ok := extractBlobCID(service.Avatar); ok { 122 + avatarCID = cid 123 + } 124 + } 125 + 126 + // Build aggregator domain model 127 + agg := &aggregators.Aggregator{ 128 + DID: did, 129 + DisplayName: service.DisplayName, 130 + Description: service.Description, 131 + AvatarURL: avatarCID, // Now contains the CID from blob 132 + MaintainerDID: service.MaintainerDID, 133 + SourceURL: service.SourceURL, 134 + CreatedAt: createdAt, 135 + IndexedAt: time.Now(), 136 + RecordURI: uri, 137 + RecordCID: commit.CID, 138 + } 139 + 140 + // Handle config schema (JSONB) 141 + if service.ConfigSchema != nil { 142 + schemaBytes, err := json.Marshal(service.ConfigSchema) 143 + if err != nil { 144 + return fmt.Errorf("failed to marshal config schema: %w", err) 145 + } 146 + agg.ConfigSchema = schemaBytes 147 + } 148 + 149 + // Create or update in database 150 + if err := c.repo.CreateAggregator(ctx, agg); err != nil { 151 + return fmt.Errorf("failed to index aggregator: %w", err) 152 + } 153 + 154 + log.Printf("[AGGREGATOR-CONSUMER] Indexed service: %s (%s)", agg.DisplayName, did) 155 + return nil 156 + } 157 + 158 + // deleteAggregator removes an aggregator from the index 159 + func (c *AggregatorEventConsumer) deleteAggregator(ctx context.Context, did string) error { 160 + // Delete from database (cascade deletes authorizations and posts via FK) 161 + if err := c.repo.DeleteAggregator(ctx, did); err != nil { 162 + // Log but don't fail if not found (idempotent delete) 163 + if aggregators.IsNotFound(err) { 164 + log.Printf("[AGGREGATOR-CONSUMER] Aggregator not found for deletion: %s (already deleted?)", did) 165 + return nil 166 + } 167 + return fmt.Errorf("failed to delete aggregator: %w", err) 168 + } 169 + 170 + log.Printf("[AGGREGATOR-CONSUMER] Deleted aggregator: %s", did) 171 + return nil 172 + } 173 + 174 + // upsertAuthorization indexes or updates an authorization record 175 + func (c *AggregatorEventConsumer) upsertAuthorization(ctx context.Context, communityDID string, commit *CommitEvent) error { 176 + if commit.Record == nil { 177 + return fmt.Errorf("authorization event missing record data") 178 + } 179 + 180 + // Parse the authorization record 181 + authRecord, err := parseAggregatorAuthorization(commit.Record) 182 + if err != nil { 183 + return fmt.Errorf("failed to parse authorization: %w", err) 184 + } 185 + 186 + // Validate communityDid matches repo DID (security check) 187 + if authRecord.CommunityDid != "" && authRecord.CommunityDid != communityDID { 188 + return fmt.Errorf("authorization record communityDid (%s) does not match repo DID (%s)", 189 + authRecord.CommunityDid, communityDID) 190 + } 191 + 192 + // Build AT-URI for this record 193 + uri := fmt.Sprintf("at://%s/social.coves.aggregator.authorization/%s", communityDID, commit.RKey) 194 + 195 + // Parse createdAt from authorization record 196 + var createdAt time.Time 197 + if authRecord.CreatedAt != "" { 198 + createdAt, err = time.Parse(time.RFC3339, authRecord.CreatedAt) 199 + if err != nil { 200 + createdAt = time.Now() // Fallback 201 + log.Printf("Warning: invalid createdAt format for authorization %s: %v", uri, err) 202 + } 203 + } else { 204 + createdAt = time.Now() 205 + } 206 + 207 + // Parse disabledAt from authorization record (optional, for modlog/audit) 208 + var disabledAt *time.Time 209 + if authRecord.DisabledAt != "" { 210 + parsed, err := time.Parse(time.RFC3339, authRecord.DisabledAt) 211 + if err != nil { 212 + log.Printf("Warning: invalid disabledAt format for authorization %s: %v", uri, err) 213 + } else { 214 + disabledAt = &parsed 215 + } 216 + } 217 + 218 + // Build authorization domain model 219 + auth := &aggregators.Authorization{ 220 + AggregatorDID: authRecord.Aggregator, 221 + CommunityDID: communityDID, 222 + Enabled: authRecord.Enabled, 223 + CreatedBy: authRecord.CreatedBy, 224 + DisabledBy: authRecord.DisabledBy, 225 + DisabledAt: disabledAt, 226 + CreatedAt: createdAt, 227 + IndexedAt: time.Now(), 228 + RecordURI: uri, 229 + RecordCID: commit.CID, 230 + } 231 + 232 + // Handle config (JSONB) 233 + if authRecord.Config != nil { 234 + configBytes, err := json.Marshal(authRecord.Config) 235 + if err != nil { 236 + return fmt.Errorf("failed to marshal config: %w", err) 237 + } 238 + auth.Config = configBytes 239 + } 240 + 241 + // Create or update in database 242 + if err := c.repo.CreateAuthorization(ctx, auth); err != nil { 243 + return fmt.Errorf("failed to index authorization: %w", err) 244 + } 245 + 246 + log.Printf("[AGGREGATOR-CONSUMER] Indexed authorization: community=%s, aggregator=%s, enabled=%v", 247 + communityDID, authRecord.Aggregator, authRecord.Enabled) 248 + return nil 249 + } 250 + 251 + // deleteAuthorization removes an authorization from the index 252 + func (c *AggregatorEventConsumer) deleteAuthorization(ctx context.Context, communityDID string, commit *CommitEvent) error { 253 + // Build AT-URI to find the authorization 254 + uri := fmt.Sprintf("at://%s/social.coves.aggregator.authorization/%s", communityDID, commit.RKey) 255 + 256 + // Delete from database 257 + if err := c.repo.DeleteAuthorizationByURI(ctx, uri); err != nil { 258 + // Log but don't fail if not found (idempotent delete) 259 + if aggregators.IsNotFound(err) { 260 + log.Printf("[AGGREGATOR-CONSUMER] Authorization not found for deletion: %s (already deleted?)", uri) 261 + return nil 262 + } 263 + return fmt.Errorf("failed to delete authorization: %w", err) 264 + } 265 + 266 + log.Printf("[AGGREGATOR-CONSUMER] Deleted authorization: %s", uri) 267 + return nil 268 + } 269 + 270 + // ===== Record Parsing Functions ===== 271 + 272 + // AggregatorServiceRecord represents the service declaration record structure 273 + type AggregatorServiceRecord struct { 274 + Type string `json:"$type"` 275 + DID string `json:"did"` // DID of aggregator (must match repo DID) 276 + DisplayName string `json:"displayName"` 277 + Description string `json:"description,omitempty"` 278 + Avatar map[string]interface{} `json:"avatar,omitempty"` // Blob reference (CID will be extracted) 279 + ConfigSchema map[string]interface{} `json:"configSchema,omitempty"` // JSON Schema 280 + MaintainerDID string `json:"maintainer,omitempty"` // Fixed: was maintainerDid 281 + SourceURL string `json:"sourceUrl,omitempty"` // Fixed: was homepageUrl 282 + CreatedAt string `json:"createdAt"` 283 + } 284 + 285 + // parseAggregatorService parses an aggregator service record 286 + func parseAggregatorService(record interface{}) (*AggregatorServiceRecord, error) { 287 + recordBytes, err := json.Marshal(record) 288 + if err != nil { 289 + return nil, fmt.Errorf("failed to marshal record: %w", err) 290 + } 291 + 292 + var service AggregatorServiceRecord 293 + if err := json.Unmarshal(recordBytes, &service); err != nil { 294 + return nil, fmt.Errorf("failed to unmarshal service record: %w", err) 295 + } 296 + 297 + // Validate required fields 298 + if service.DisplayName == "" { 299 + return nil, fmt.Errorf("displayName is required") 300 + } 301 + 302 + return &service, nil 303 + } 304 + 305 + // Note: extractBlobCID is defined in community_consumer.go and shared across consumers 306 + 307 + // AggregatorAuthorizationRecord represents the authorization record structure 308 + type AggregatorAuthorizationRecord struct { 309 + Type string `json:"$type"` 310 + Aggregator string `json:"aggregatorDid"` // Aggregator DID - fixed field name 311 + CommunityDid string `json:"communityDid"` // Community DID (must match repo DID) 312 + Enabled bool `json:"enabled"` 313 + Config map[string]interface{} `json:"config,omitempty"` // Aggregator-specific config 314 + CreatedBy string `json:"createdBy"` // Required: DID of moderator who authorized 315 + DisabledBy string `json:"disabledBy,omitempty"` 316 + DisabledAt string `json:"disabledAt,omitempty"` // When authorization was disabled (for modlog/audit) 317 + CreatedAt string `json:"createdAt"` 318 + } 319 + 320 + // parseAggregatorAuthorization parses an aggregator authorization record 321 + func parseAggregatorAuthorization(record interface{}) (*AggregatorAuthorizationRecord, error) { 322 + recordBytes, err := json.Marshal(record) 323 + if err != nil { 324 + return nil, fmt.Errorf("failed to marshal record: %w", err) 325 + } 326 + 327 + var auth AggregatorAuthorizationRecord 328 + if err := json.Unmarshal(recordBytes, &auth); err != nil { 329 + return nil, fmt.Errorf("failed to unmarshal authorization record: %w", err) 330 + } 331 + 332 + // Validate required fields per lexicon 333 + if auth.Aggregator == "" { 334 + return nil, fmt.Errorf("aggregatorDid is required") 335 + } 336 + if auth.CommunityDid == "" { 337 + return nil, fmt.Errorf("communityDid is required") 338 + } 339 + if auth.CreatedAt == "" { 340 + return nil, fmt.Errorf("createdAt is required") 341 + } 342 + if auth.CreatedBy == "" { 343 + return nil, fmt.Errorf("createdBy is required") 344 + } 345 + 346 + return &auth, nil 347 + }
+136
internal/atproto/jetstream/aggregator_jetstream_connector.go
··· 1 + package jetstream 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "fmt" 7 + "log" 8 + "sync" 9 + "time" 10 + 11 + "github.com/gorilla/websocket" 12 + ) 13 + 14 + // AggregatorJetstreamConnector handles WebSocket connection to Jetstream for aggregator events 15 + type AggregatorJetstreamConnector struct { 16 + consumer *AggregatorEventConsumer 17 + wsURL string 18 + } 19 + 20 + // NewAggregatorJetstreamConnector creates a new Jetstream WebSocket connector for aggregator events 21 + func NewAggregatorJetstreamConnector(consumer *AggregatorEventConsumer, wsURL string) *AggregatorJetstreamConnector { 22 + return &AggregatorJetstreamConnector{ 23 + consumer: consumer, 24 + wsURL: wsURL, 25 + } 26 + } 27 + 28 + // Start begins consuming events from Jetstream 29 + // Runs indefinitely, reconnecting on errors 30 + func (c *AggregatorJetstreamConnector) Start(ctx context.Context) error { 31 + log.Printf("Starting Jetstream aggregator consumer: %s", c.wsURL) 32 + 33 + for { 34 + select { 35 + case <-ctx.Done(): 36 + log.Println("Jetstream aggregator consumer shutting down") 37 + return ctx.Err() 38 + default: 39 + if err := c.connect(ctx); err != nil { 40 + log.Printf("Jetstream aggregator connection error: %v. Retrying in 5s...", err) 41 + time.Sleep(5 * time.Second) 42 + continue 43 + } 44 + } 45 + } 46 + } 47 + 48 + // connect establishes WebSocket connection and processes events 49 + func (c *AggregatorJetstreamConnector) connect(ctx context.Context) error { 50 + conn, _, err := websocket.DefaultDialer.DialContext(ctx, c.wsURL, nil) 51 + if err != nil { 52 + return fmt.Errorf("failed to connect to Jetstream: %w", err) 53 + } 54 + defer func() { 55 + if closeErr := conn.Close(); closeErr != nil { 56 + log.Printf("Failed to close WebSocket connection: %v", closeErr) 57 + } 58 + }() 59 + 60 + log.Println("Connected to Jetstream (aggregator consumer)") 61 + 62 + // Set read deadline to detect connection issues 63 + if err := conn.SetReadDeadline(time.Now().Add(60 * time.Second)); err != nil { 64 + log.Printf("Failed to set read deadline: %v", err) 65 + } 66 + 67 + // Set pong handler to keep connection alive 68 + conn.SetPongHandler(func(string) error { 69 + if err := conn.SetReadDeadline(time.Now().Add(60 * time.Second)); err != nil { 70 + log.Printf("Failed to set read deadline in pong handler: %v", err) 71 + } 72 + return nil 73 + }) 74 + 75 + // Start ping ticker 76 + ticker := time.NewTicker(30 * time.Second) 77 + defer ticker.Stop() 78 + 79 + done := make(chan struct{}) 80 + var closeOnce sync.Once // Ensure done channel is only closed once 81 + 82 + // Goroutine to send pings 83 + go func() { 84 + for { 85 + select { 86 + case <-ticker.C: 87 + if err := conn.WriteMessage(websocket.PingMessage, nil); err != nil { 88 + log.Printf("Ping error: %v", err) 89 + closeOnce.Do(func() { close(done) }) 90 + return 91 + } 92 + case <-done: 93 + return 94 + case <-ctx.Done(): 95 + return 96 + } 97 + } 98 + }() 99 + 100 + // Read messages 101 + for { 102 + select { 103 + case <-ctx.Done(): 104 + return ctx.Err() 105 + case <-done: 106 + return fmt.Errorf("connection closed") 107 + default: 108 + _, message, err := conn.ReadMessage() 109 + if err != nil { 110 + closeOnce.Do(func() { close(done) }) 111 + return fmt.Errorf("read error: %w", err) 112 + } 113 + 114 + // Reset read deadline on successful read 115 + if err := conn.SetReadDeadline(time.Now().Add(60 * time.Second)); err != nil { 116 + log.Printf("Failed to set read deadline: %v", err) 117 + } 118 + 119 + if err := c.handleEvent(ctx, message); err != nil { 120 + log.Printf("Error handling aggregator event: %v", err) 121 + // Continue processing other events 122 + } 123 + } 124 + } 125 + } 126 + 127 + // handleEvent processes a single Jetstream event 128 + func (c *AggregatorJetstreamConnector) handleEvent(ctx context.Context, data []byte) error { 129 + var event JetstreamEvent 130 + if err := json.Unmarshal(data, &event); err != nil { 131 + return fmt.Errorf("failed to parse event: %w", err) 132 + } 133 + 134 + // Pass to consumer's HandleEvent method 135 + return c.consumer.HandleEvent(ctx, &event) 136 + }