more cleanup, add comments, fmt, organize database modules

+74 -71

api/src/actor_resolver.rs

··· 1 - use reqwest::Client; 1 + use crate::cache::SliceCache; 2 + use crate::errors::ActorResolverError; 2 3 use atproto_identity::{ 3 4 plc::query as plc_query, 4 5 resolve::{InputType, parse_input}, 5 6 web::query as web_query, 6 7 }; 7 - use thiserror::Error; 8 + use reqwest::Client; 9 + use serde::{Deserialize, Serialize}; 8 10 use std::sync::Arc; 9 11 use tokio::sync::Mutex; 10 - use serde::{Serialize, Deserialize}; 11 - use crate::cache::SliceCache; 12 - 13 - #[derive(Error, Debug)] 14 - pub enum ActorResolverError { 15 - #[error("error-slice-actor-1 Failed to resolve DID: {0}")] 16 - ResolveFailed(String), 17 - 18 - #[error("error-slice-actor-2 Failed to parse DID: {0}")] 19 - ParseFailed(String), 20 - 21 - #[error("error-slice-actor-3 Subject resolved to handle instead of DID")] 22 - InvalidSubject, 23 - } 24 12 25 13 #[derive(Debug, Clone, Serialize, Deserialize)] 26 14 pub struct ActorData { ··· 29 17 pub pds: String, 30 18 } 31 19 32 - pub async fn resolve_actor_data(client: &Client, did: &str) -> Result<ActorData, ActorResolverError> { 20 + pub async fn resolve_actor_data( 21 + client: &Client, 22 + did: &str, 23 + ) -> Result<ActorData, ActorResolverError> { 33 24 resolve_actor_data_cached(client, did, None).await 34 25 } 35 26 36 27 pub async fn resolve_actor_data_cached( 37 28 client: &Client, 38 29 did: &str, 39 - cache: Option<Arc<Mutex<SliceCache>>> 30 + cache: Option<Arc<Mutex<SliceCache>>>, 40 31 ) -> Result<ActorData, ActorResolverError> { 41 32 // Try cache first if provided 42 33 if let Some(cache) = &cache { ··· 46 37 }; 47 38 48 39 if let Ok(Some(actor_data_value)) = cached_result 49 - && let Ok(actor_data) = serde_json::from_value::<ActorData>(actor_data_value) { 50 - return Ok(actor_data); 51 - } 40 + && let Ok(actor_data) = serde_json::from_value::<ActorData>(actor_data_value) 41 + { 42 + return Ok(actor_data); 43 + } 52 44 } 53 45 54 46 // Cache miss - resolve from PLC/web ··· 56 48 57 49 // Cache the result if cache is provided 58 50 if let Some(cache) = &cache 59 - && let Ok(actor_data_value) = serde_json::to_value(&actor_data) { 60 - let mut cache_lock = cache.lock().await; 61 - let _ = cache_lock.cache_did_resolution(did, &actor_data_value).await; 62 - } 51 + && let Ok(actor_data_value) = serde_json::to_value(&actor_data) 52 + { 53 + let mut cache_lock = cache.lock().await; 54 + let _ = cache_lock 55 + .cache_did_resolution(did, &actor_data_value) 56 + .await; 57 + } 63 58 64 59 Ok(actor_data) 65 60 } ··· 68 63 client: &Client, 69 64 did: &str, 70 65 cache: Option<Arc<Mutex<SliceCache>>>, 71 - invalidate_cache_on_retry: bool 66 + invalidate_cache_on_retry: bool, 72 67 ) -> Result<ActorData, ActorResolverError> { 73 68 match resolve_actor_data_cached(client, did, cache.clone()).await { 74 69 Ok(actor_data) => Ok(actor_data), ··· 89 84 } 90 85 } 91 86 92 - async fn resolve_actor_data_impl(client: &Client, did: &str) -> Result<ActorData, ActorResolverError> { 87 + async fn resolve_actor_data_impl( 88 + client: &Client, 89 + did: &str, 90 + ) -> Result<ActorData, ActorResolverError> { 93 91 let (pds_url, handle) = match parse_input(did) { 94 - Ok(InputType::Plc(did_str)) => { 95 - match plc_query(client, "plc.directory", &did_str).await { 96 - Ok(did_doc) => { 97 - let pds = did_doc.service 98 - .iter() 99 - .find(|service| { 100 - service.r#type.contains("AtprotoPersonalDataServer") 101 - }) 102 - .map(|service| service.service_endpoint.clone()) 103 - .map(|url| url.to_string()) 104 - .unwrap_or_else(|| "https://bsky.social".to_string()); 105 - let handle = did_doc.also_known_as 106 - .iter() 107 - .find(|aka| aka.starts_with("at://")) 108 - .map(|aka| aka.strip_prefix("at://").unwrap_or(aka).to_string()); 109 - (pds, handle) 110 - } 111 - Err(e) => { 112 - return Err(ActorResolverError::ResolveFailed(format!("Failed to query PLC for {}: {:?}", did, e))); 113 - } 92 + Ok(InputType::Plc(did_str)) => match plc_query(client, "plc.directory", &did_str).await { 93 + Ok(did_doc) => { 94 + let pds = did_doc 95 + .service 96 + .iter() 97 + .find(|service| service.r#type.contains("AtprotoPersonalDataServer")) 98 + .map(|service| service.service_endpoint.clone()) 99 + .map(|url| url.to_string()) 100 + .unwrap_or_else(|| "https://bsky.social".to_string()); 101 + let handle = did_doc 102 + .also_known_as 103 + .iter() 104 + .find(|aka| aka.starts_with("at://")) 105 + .map(|aka| aka.strip_prefix("at://").unwrap_or(aka).to_string()); 106 + (pds, handle) 107 + } 108 + Err(e) => { 109 + return Err(ActorResolverError::ResolveFailed(format!( 110 + "Failed to query PLC for {}: {:?}", 111 + did, e 112 + ))); 114 113 } 115 - } 116 - Ok(InputType::Web(did_str)) => { 117 - match web_query(client, &did_str).await { 118 - Ok(did_doc) => { 119 - let pds = did_doc.service 120 - .iter() 121 - .find(|service| { 122 - service.r#type.contains("AtprotoPersonalDataServer") 123 - }) 124 - .map(|service| service.service_endpoint.clone()) 125 - .map(|url| url.to_string()) 126 - .unwrap_or_else(|| "https://bsky.social".to_string()); 127 - let handle = did_doc.also_known_as 128 - .iter() 129 - .find(|aka| aka.starts_with("at://")) 130 - .map(|aka| aka.strip_prefix("at://").unwrap_or(aka).to_string()); 131 - (pds, handle) 132 - } 133 - Err(e) => { 134 - return Err(ActorResolverError::ResolveFailed(format!("Failed to query web DID for {}: {:?}", did, e))); 135 - } 114 + }, 115 + Ok(InputType::Web(did_str)) => match web_query(client, &did_str).await { 116 + Ok(did_doc) => { 117 + let pds = did_doc 118 + .service 119 + .iter() 120 + .find(|service| service.r#type.contains("AtprotoPersonalDataServer")) 121 + .map(|service| service.service_endpoint.clone()) 122 + .map(|url| url.to_string()) 123 + .unwrap_or_else(|| "https://bsky.social".to_string()); 124 + let handle = did_doc 125 + .also_known_as 126 + .iter() 127 + .find(|aka| aka.starts_with("at://")) 128 + .map(|aka| aka.strip_prefix("at://").unwrap_or(aka).to_string()); 129 + (pds, handle) 136 130 } 137 - } 131 + Err(e) => { 132 + return Err(ActorResolverError::ResolveFailed(format!( 133 + "Failed to query web DID for {}: {:?}", 134 + did, e 135 + ))); 136 + } 137 + }, 138 138 Ok(InputType::Handle(_)) => { 139 139 return Err(ActorResolverError::InvalidSubject); 140 140 } 141 141 Err(e) => { 142 - return Err(ActorResolverError::ParseFailed(format!("Failed to parse DID {}: {:?}", did, e))); 142 + return Err(ActorResolverError::ParseFailed(format!( 143 + "Failed to parse DID {}: {:?}", 144 + did, e 145 + ))); 143 146 } 144 147 }; 145 148 ··· 148 151 handle, 149 152 pds: pds_url, 150 153 }) 151 - } 154 + }

+1 -1

api/src/api/mod.rs

··· 1 1 pub mod openapi; 2 - pub mod xrpc_dynamic; 2 + pub mod xrpc_dynamic;

+915 -647

api/src/api/openapi.rs

··· 164 164 info: OpenApiInfo { 165 165 title: format!("Slice API: {}", params.slice), 166 166 version: "1.0.0".to_string(), 167 - description: format!("Dynamically generated OpenAPI specification for slice: {}", params.slice), 167 + description: format!( 168 + "Dynamically generated OpenAPI specification for slice: {}", 169 + params.slice 170 + ), 168 171 contact: OpenApiContact { 169 172 name: "Slice API".to_string(), 170 173 url: "https://github.com/anthropics/slice".to_string(), ··· 175 178 description: "XRPC endpoint base".to_string(), 176 179 }], 177 180 paths, 178 - components: OpenApiComponents { 181 + components: OpenApiComponents { 179 182 schemas, 180 183 security_schemes: Some(create_security_schemes()), 181 184 }, ··· 184 187 Ok(Json(spec)) 185 188 } 186 189 187 - fn create_collection_paths(collection: &str, slice_uri: &str, lexicon_data: Option<&serde_json::Value>, paths: &mut HashMap<String, HashMap<String, OpenApiOperation>>) { 190 + fn create_collection_paths( 191 + collection: &str, 192 + slice_uri: &str, 193 + lexicon_data: Option<&serde_json::Value>, 194 + paths: &mut HashMap<String, HashMap<String, OpenApiOperation>>, 195 + ) { 188 196 // Get Records operation (POST) - Consolidated endpoint for listing, filtering, and searching 189 197 let get_records_path = format!("/{}.getRecords", collection); 190 198 let mut get_records_operations = HashMap::new(); ··· 203 211 // Count records operation (POST) 204 212 let count_records_path = format!("/{}.countRecords", collection); 205 213 let mut count_records_operations = HashMap::new(); 206 - count_records_operations.insert("post".to_string(), OpenApiOperation { 207 - operation_id: format!("countRecords{}", collection.replace(".", "_")), 208 - summary: format!("Count {} records", collection), 209 - description: format!("Count records in the {} collection with optional filtering", collection), 210 - parameters: None, 211 - request_body: Some(create_get_records_request_body(slice_uri, lexicon_data)), 212 - responses: create_count_responses(), 213 - tags: vec![collection.to_string()], 214 - security: None, // No auth required for read operations 215 - }); 214 + count_records_operations.insert( 215 + "post".to_string(), 216 + OpenApiOperation { 217 + operation_id: format!("countRecords{}", collection.replace(".", "_")), 218 + summary: format!("Count {} records", collection), 219 + description: format!( 220 + "Count records in the {} collection with optional filtering", 221 + collection 222 + ), 223 + parameters: None, 224 + request_body: Some(create_get_records_request_body(slice_uri, lexicon_data)), 225 + responses: create_count_responses(), 226 + tags: vec![collection.to_string()], 227 + security: None, // No auth required for read operations 228 + }, 229 + ); 216 230 paths.insert(count_records_path, count_records_operations); 217 231 218 232 // Get operation (GET) 219 233 let get_path = format!("/{}.getRecord", collection); 220 234 let mut get_operations = HashMap::new(); 221 - get_operations.insert("get".to_string(), OpenApiOperation { 222 - operation_id: format!("getRecord{}", collection.replace(".", "_")), 223 - summary: format!("Get {} record", collection), 224 - description: format!("Get a specific record from the {} collection", collection), 225 - parameters: Some(vec![ 226 - OpenApiParameter { 227 - name: "slice".to_string(), 228 - location: "query".to_string(), 229 - description: "Slice URI to filter records by".to_string(), 230 - required: true, 231 - schema: string_schema_with_default(slice_uri), 232 - example: Some(slice_uri.to_string()), 233 - }, 234 - OpenApiParameter { 235 - name: "uri".to_string(), 236 - location: "query".to_string(), 237 - description: "AT Protocol URI of the record".to_string(), 238 - required: true, 239 - schema: OpenApiSchema { 240 - schema_type: "string".to_string(), 241 - format: None, 242 - items: None, 243 - properties: None, 244 - required: None, 245 - default: None, 235 + get_operations.insert( 236 + "get".to_string(), 237 + OpenApiOperation { 238 + operation_id: format!("getRecord{}", collection.replace(".", "_")), 239 + summary: format!("Get {} record", collection), 240 + description: format!("Get a specific record from the {} collection", collection), 241 + parameters: Some(vec![ 242 + OpenApiParameter { 243 + name: "slice".to_string(), 244 + location: "query".to_string(), 245 + description: "Slice URI to filter records by".to_string(), 246 + required: true, 247 + schema: string_schema_with_default(slice_uri), 248 + example: Some(slice_uri.to_string()), 246 249 }, 247 - example: None, 248 - }, 249 - ]), 250 - request_body: None, 251 - responses: create_get_responses(), 252 - tags: vec![collection.to_string()], 253 - security: None, // No auth required for read operations 254 - }); 250 + OpenApiParameter { 251 + name: "uri".to_string(), 252 + location: "query".to_string(), 253 + description: "AT Protocol URI of the record".to_string(), 254 + required: true, 255 + schema: OpenApiSchema { 256 + schema_type: "string".to_string(), 257 + format: None, 258 + items: None, 259 + properties: None, 260 + required: None, 261 + default: None, 262 + }, 263 + example: None, 264 + }, 265 + ]), 266 + request_body: None, 267 + responses: create_get_responses(), 268 + tags: vec![collection.to_string()], 269 + security: None, // No auth required for read operations 270 + }, 271 + ); 255 272 paths.insert(get_path, get_operations); 256 - 257 273 258 274 // Create operation (POST) 259 275 let create_path = format!("/{}.createRecord", collection); ··· 281 297 if let Some(key) = main_def.get("key") { 282 298 if let Some(key_str) = key.as_str() { 283 299 if key_str.starts_with("literal:") { 284 - Some(serde_json::Value::String(key_str.strip_prefix("literal:").unwrap_or("").to_string())) 300 + Some(serde_json::Value::String( 301 + key_str.strip_prefix("literal:").unwrap_or("").to_string(), 302 + )) 285 303 } else { 286 304 None 287 305 } ··· 308 326 items: None, 309 327 properties: Some({ 310 328 let mut props = HashMap::new(); 311 - props.insert("slice".to_string(), OpenApiSchema { 312 - schema_type: "string".to_string(), 313 - format: Some("uri".to_string()), 314 - items: None, 315 - properties: None, 316 - required: None, 317 - default: Some(serde_json::Value::String(slice_uri.to_string())), 318 - }); 319 - props.insert("rkey".to_string(), OpenApiSchema { 320 - schema_type: "string".to_string(), 321 - format: None, 322 - items: None, 323 - properties: None, 324 - required: None, 325 - default: rkey_default.clone(), 326 - }); 329 + props.insert( 330 + "slice".to_string(), 331 + OpenApiSchema { 332 + schema_type: "string".to_string(), 333 + format: Some("uri".to_string()), 334 + items: None, 335 + properties: None, 336 + required: None, 337 + default: Some(serde_json::Value::String(slice_uri.to_string())), 338 + }, 339 + ); 340 + props.insert( 341 + "rkey".to_string(), 342 + OpenApiSchema { 343 + schema_type: "string".to_string(), 344 + format: None, 345 + items: None, 346 + properties: None, 347 + required: None, 348 + default: rkey_default.clone(), 349 + }, 350 + ); 327 351 props.insert("record".to_string(), record_schema.clone()); 328 352 props 329 353 }), ··· 331 355 default: None, 332 356 }; 333 357 334 - create_operations.insert("post".to_string(), OpenApiOperation { 335 - operation_id: format!("createRecord{}", collection.replace(".", "_")), 336 - summary: format!("Create {} record", collection), 337 - description: format!("Create a new record in the {} collection", collection), 338 - parameters: None, 339 - request_body: Some(OpenApiRequestBody { 340 - description: "Structured request with slice URI, optional rkey, and record data".to_string(), 341 - content: { 342 - let mut content = HashMap::new(); 343 - content.insert("application/json".to_string(), OpenApiMediaType { 344 - schema: create_schema, 345 - }); 346 - content 347 - }, 348 - required: true, 349 - }), 350 - responses: create_mutation_responses(), 351 - tags: vec![collection.to_string()], 352 - security: Some(create_bearer_auth_security()), // Auth required for mutations 353 - }); 358 + create_operations.insert( 359 + "post".to_string(), 360 + OpenApiOperation { 361 + operation_id: format!("createRecord{}", collection.replace(".", "_")), 362 + summary: format!("Create {} record", collection), 363 + description: format!("Create a new record in the {} collection", collection), 364 + parameters: None, 365 + request_body: Some(OpenApiRequestBody { 366 + description: "Structured request with slice URI, optional rkey, and record data" 367 + .to_string(), 368 + content: { 369 + let mut content = HashMap::new(); 370 + content.insert( 371 + "application/json".to_string(), 372 + OpenApiMediaType { 373 + schema: create_schema, 374 + }, 375 + ); 376 + content 377 + }, 378 + required: true, 379 + }), 380 + responses: create_mutation_responses(), 381 + tags: vec![collection.to_string()], 382 + security: Some(create_bearer_auth_security()), // Auth required for mutations 383 + }, 384 + ); 354 385 paths.insert(create_path, create_operations); 355 386 356 387 // Update operation (POST) 357 388 let update_path = format!("/{}.updateRecord", collection); 358 389 let mut update_operations = HashMap::new(); 359 - update_operations.insert("post".to_string(), OpenApiOperation { 360 - operation_id: format!("updateRecord{}", collection.replace(".", "_")), 361 - summary: format!("Update {} record", collection), 362 - description: format!("Update an existing record in the {} collection", collection), 363 - parameters: None, 364 - request_body: Some(OpenApiRequestBody { 365 - description: "Record data, rkey, and slice URI to update".to_string(), 366 - content: { 367 - let mut content = HashMap::new(); 368 - content.insert("application/json".to_string(), OpenApiMediaType { 369 - schema: OpenApiSchema { 370 - schema_type: "object".to_string(), 371 - format: None, 372 - items: None, 373 - properties: Some({ 374 - let mut props = HashMap::new(); 375 - props.insert("rkey".to_string(), OpenApiSchema { 376 - schema_type: "string".to_string(), 390 + update_operations.insert( 391 + "post".to_string(), 392 + OpenApiOperation { 393 + operation_id: format!("updateRecord{}", collection.replace(".", "_")), 394 + summary: format!("Update {} record", collection), 395 + description: format!("Update an existing record in the {} collection", collection), 396 + parameters: None, 397 + request_body: Some(OpenApiRequestBody { 398 + description: "Record data, rkey, and slice URI to update".to_string(), 399 + content: { 400 + let mut content = HashMap::new(); 401 + content.insert( 402 + "application/json".to_string(), 403 + OpenApiMediaType { 404 + schema: OpenApiSchema { 405 + schema_type: "object".to_string(), 377 406 format: None, 378 407 items: None, 379 - properties: None, 380 - required: None, 381 - default: rkey_default.clone(), 382 - }); 383 - props.insert("record".to_string(), record_schema.clone()); 384 - props.insert("slice".to_string(), OpenApiSchema { 385 - schema_type: "string".to_string(), 386 - format: Some("uri".to_string()), 387 - items: None, 388 - properties: None, 389 - required: None, 390 - default: Some(serde_json::Value::String(slice_uri.to_string())), 391 - }); 392 - props 393 - }), 394 - required: Some(vec!["rkey".to_string(), "record".to_string(), "slice".to_string()]), 395 - default: None, 396 - }, 397 - }); 398 - content 399 - }, 400 - required: true, 401 - }), 402 - responses: create_mutation_responses(), 403 - tags: vec![collection.to_string()], 404 - security: Some(create_bearer_auth_security()), // Auth required for mutations 405 - }); 408 + properties: Some({ 409 + let mut props = HashMap::new(); 410 + props.insert( 411 + "rkey".to_string(), 412 + OpenApiSchema { 413 + schema_type: "string".to_string(), 414 + format: None, 415 + items: None, 416 + properties: None, 417 + required: None, 418 + default: rkey_default.clone(), 419 + }, 420 + ); 421 + props.insert("record".to_string(), record_schema.clone()); 422 + props.insert( 423 + "slice".to_string(), 424 + OpenApiSchema { 425 + schema_type: "string".to_string(), 426 + format: Some("uri".to_string()), 427 + items: None, 428 + properties: None, 429 + required: None, 430 + default: Some(serde_json::Value::String( 431 + slice_uri.to_string(), 432 + )), 433 + }, 434 + ); 435 + props 436 + }), 437 + required: Some(vec![ 438 + "rkey".to_string(), 439 + "record".to_string(), 440 + "slice".to_string(), 441 + ]), 442 + default: None, 443 + }, 444 + }, 445 + ); 446 + content 447 + }, 448 + required: true, 449 + }), 450 + responses: create_mutation_responses(), 451 + tags: vec![collection.to_string()], 452 + security: Some(create_bearer_auth_security()), // Auth required for mutations 453 + }, 454 + ); 406 455 paths.insert(update_path, update_operations); 407 456 408 457 // Delete operation (POST) 409 458 let delete_path = format!("/{}.deleteRecord", collection); 410 459 let mut delete_operations = HashMap::new(); 411 - delete_operations.insert("post".to_string(), OpenApiOperation { 412 - operation_id: format!("deleteRecord{}", collection.replace(".", "_")), 413 - summary: format!("Delete {} record", collection), 414 - description: format!("Delete a record from the {} collection", collection), 415 - parameters: None, 416 - request_body: Some(OpenApiRequestBody { 417 - description: "Record key to delete".to_string(), 418 - content: { 419 - let mut content = HashMap::new(); 420 - content.insert("application/json".to_string(), OpenApiMediaType { 421 - schema: OpenApiSchema { 422 - schema_type: "object".to_string(), 423 - format: None, 424 - items: None, 425 - properties: Some({ 426 - let mut props = HashMap::new(); 427 - props.insert("rkey".to_string(), OpenApiSchema { 428 - schema_type: "string".to_string(), 460 + delete_operations.insert( 461 + "post".to_string(), 462 + OpenApiOperation { 463 + operation_id: format!("deleteRecord{}", collection.replace(".", "_")), 464 + summary: format!("Delete {} record", collection), 465 + description: format!("Delete a record from the {} collection", collection), 466 + parameters: None, 467 + request_body: Some(OpenApiRequestBody { 468 + description: "Record key to delete".to_string(), 469 + content: { 470 + let mut content = HashMap::new(); 471 + content.insert( 472 + "application/json".to_string(), 473 + OpenApiMediaType { 474 + schema: OpenApiSchema { 475 + schema_type: "object".to_string(), 429 476 format: None, 430 477 items: None, 431 - properties: None, 432 - required: None, 433 - default: rkey_default, 434 - }); 435 - props 436 - }), 437 - required: Some(vec!["rkey".to_string()]), 438 - default: None, 439 - }, 440 - }); 441 - content 442 - }, 443 - required: true, 444 - }), 445 - responses: create_delete_responses(), 446 - tags: vec![collection.to_string()], 447 - security: Some(create_bearer_auth_security()), // Auth required for mutations 448 - }); 478 + properties: Some({ 479 + let mut props = HashMap::new(); 480 + props.insert( 481 + "rkey".to_string(), 482 + OpenApiSchema { 483 + schema_type: "string".to_string(), 484 + format: None, 485 + items: None, 486 + properties: None, 487 + required: None, 488 + default: rkey_default, 489 + }, 490 + ); 491 + props 492 + }), 493 + required: Some(vec!["rkey".to_string()]), 494 + default: None, 495 + }, 496 + }, 497 + ); 498 + content 499 + }, 500 + required: true, 501 + }), 502 + responses: create_delete_responses(), 503 + tags: vec![collection.to_string()], 504 + security: Some(create_bearer_auth_security()), // Auth required for mutations 505 + }, 506 + ); 449 507 paths.insert(delete_path, delete_operations); 450 508 } 451 509 452 - fn create_collection_schemas(_collection: &str, _lexicon_data: Option<&serde_json::Value>, schemas: &mut HashMap<String, OpenApiSchema>) { 510 + fn create_collection_schemas( 511 + _collection: &str, 512 + _lexicon_data: Option<&serde_json::Value>, 513 + schemas: &mut HashMap<String, OpenApiSchema>, 514 + ) { 453 515 // IndexedRecord schema 454 516 let mut record_props = HashMap::new(); 455 - record_props.insert("uri".to_string(), OpenApiSchema { 456 - schema_type: "string".to_string(), 457 - format: None, 458 - items: None, 459 - properties: None, 460 - required: None, 461 - default: None, 462 - }); 463 - record_props.insert("cid".to_string(), OpenApiSchema { 464 - schema_type: "string".to_string(), 465 - format: None, 466 - items: None, 467 - properties: None, 468 - required: None, 469 - default: None, 470 - }); 471 - record_props.insert("did".to_string(), OpenApiSchema { 472 - schema_type: "string".to_string(), 473 - format: None, 474 - items: None, 475 - properties: None, 476 - required: None, 477 - default: None, 478 - }); 479 - record_props.insert("collection".to_string(), OpenApiSchema { 480 - schema_type: "string".to_string(), 481 - format: None, 482 - items: None, 483 - properties: None, 484 - required: None, 485 - default: None, 486 - }); 487 - record_props.insert("value".to_string(), OpenApiSchema { 488 - schema_type: "object".to_string(), 489 - format: None, 490 - items: None, 491 - properties: None, 492 - required: None, 493 - default: None, 494 - }); 495 - record_props.insert("indexed_at".to_string(), OpenApiSchema { 496 - schema_type: "string".to_string(), 497 - format: Some("date-time".to_string()), 498 - items: None, 499 - properties: None, 500 - required: None, 501 - default: None, 502 - }); 517 + record_props.insert( 518 + "uri".to_string(), 519 + OpenApiSchema { 520 + schema_type: "string".to_string(), 521 + format: None, 522 + items: None, 523 + properties: None, 524 + required: None, 525 + default: None, 526 + }, 527 + ); 528 + record_props.insert( 529 + "cid".to_string(), 530 + OpenApiSchema { 531 + schema_type: "string".to_string(), 532 + format: None, 533 + items: None, 534 + properties: None, 535 + required: None, 536 + default: None, 537 + }, 538 + ); 539 + record_props.insert( 540 + "did".to_string(), 541 + OpenApiSchema { 542 + schema_type: "string".to_string(), 543 + format: None, 544 + items: None, 545 + properties: None, 546 + required: None, 547 + default: None, 548 + }, 549 + ); 550 + record_props.insert( 551 + "collection".to_string(), 552 + OpenApiSchema { 553 + schema_type: "string".to_string(), 554 + format: None, 555 + items: None, 556 + properties: None, 557 + required: None, 558 + default: None, 559 + }, 560 + ); 561 + record_props.insert( 562 + "value".to_string(), 563 + OpenApiSchema { 564 + schema_type: "object".to_string(), 565 + format: None, 566 + items: None, 567 + properties: None, 568 + required: None, 569 + default: None, 570 + }, 571 + ); 572 + record_props.insert( 573 + "indexed_at".to_string(), 574 + OpenApiSchema { 575 + schema_type: "string".to_string(), 576 + format: Some("date-time".to_string()), 577 + items: None, 578 + properties: None, 579 + required: None, 580 + default: None, 581 + }, 582 + ); 503 583 504 - schemas.insert("IndexedRecord".to_string(), OpenApiSchema { 505 - schema_type: "object".to_string(), 506 - format: None, 507 - items: None, 508 - properties: Some(record_props), 509 - required: Some(vec!["uri".to_string(), "cid".to_string(), "did".to_string(), "collection".to_string(), "value".to_string(), "indexed_at".to_string()]), 510 - default: None, 511 - }); 584 + schemas.insert( 585 + "IndexedRecord".to_string(), 586 + OpenApiSchema { 587 + schema_type: "object".to_string(), 588 + format: None, 589 + items: None, 590 + properties: Some(record_props), 591 + required: Some(vec![ 592 + "uri".to_string(), 593 + "cid".to_string(), 594 + "did".to_string(), 595 + "collection".to_string(), 596 + "value".to_string(), 597 + "indexed_at".to_string(), 598 + ]), 599 + default: None, 600 + }, 601 + ); 512 602 513 603 // ListRecordsOutput schema 514 604 let mut list_props = HashMap::new(); 515 - list_props.insert("records".to_string(), OpenApiSchema { 516 - schema_type: "array".to_string(), 517 - format: None, 518 - items: Some(Box::new(OpenApiSchema { 519 - schema_type: "object".to_string(), 605 + list_props.insert( 606 + "records".to_string(), 607 + OpenApiSchema { 608 + schema_type: "array".to_string(), 609 + format: None, 610 + items: Some(Box::new(OpenApiSchema { 611 + schema_type: "object".to_string(), 612 + format: None, 613 + items: None, 614 + properties: None, 615 + required: None, 616 + default: None, 617 + })), 618 + properties: None, 619 + required: None, 620 + default: None, 621 + }, 622 + ); 623 + list_props.insert( 624 + "cursor".to_string(), 625 + OpenApiSchema { 626 + schema_type: "string".to_string(), 520 627 format: None, 521 628 items: None, 522 629 properties: None, 523 630 required: None, 524 631 default: None, 525 - })), 526 - properties: None, 527 - required: None, 528 - default: None, 529 - }); 530 - list_props.insert("cursor".to_string(), OpenApiSchema { 531 - schema_type: "string".to_string(), 532 - format: None, 533 - items: None, 534 - properties: None, 535 - required: None, 536 - default: None, 537 - }); 632 + }, 633 + ); 538 634 539 - schemas.insert("ListRecordsOutput".to_string(), OpenApiSchema { 540 - schema_type: "object".to_string(), 541 - format: None, 542 - items: None, 543 - properties: Some(list_props), 544 - required: Some(vec!["records".to_string()]), 545 - default: None, 546 - }); 635 + schemas.insert( 636 + "ListRecordsOutput".to_string(), 637 + OpenApiSchema { 638 + schema_type: "object".to_string(), 639 + format: None, 640 + items: None, 641 + properties: Some(list_props), 642 + required: Some(vec!["records".to_string()]), 643 + default: None, 644 + }, 645 + ); 547 646 } 548 647 549 648 fn create_list_responses() -> HashMap<String, OpenApiResponse> { 550 649 let mut responses = HashMap::new(); 551 650 552 - responses.insert("200".to_string(), OpenApiResponse { 553 - description: "Successfully retrieved records".to_string(), 554 - content: Some({ 555 - let mut content = HashMap::new(); 556 - content.insert("application/json".to_string(), OpenApiMediaType { 557 - schema: OpenApiSchema { 558 - schema_type: "object".to_string(), 559 - format: None, 560 - items: None, 561 - properties: None, 562 - required: None, 563 - default: None, 564 - }, 565 - }); 566 - content 567 - }), 568 - }); 651 + responses.insert( 652 + "200".to_string(), 653 + OpenApiResponse { 654 + description: "Successfully retrieved records".to_string(), 655 + content: Some({ 656 + let mut content = HashMap::new(); 657 + content.insert( 658 + "application/json".to_string(), 659 + OpenApiMediaType { 660 + schema: OpenApiSchema { 661 + schema_type: "object".to_string(), 662 + format: None, 663 + items: None, 664 + properties: None, 665 + required: None, 666 + default: None, 667 + }, 668 + }, 669 + ); 670 + content 671 + }), 672 + }, 673 + ); 569 674 570 - responses.insert("400".to_string(), OpenApiResponse { 571 - description: "Bad request".to_string(), 572 - content: None, 573 - }); 675 + responses.insert( 676 + "400".to_string(), 677 + OpenApiResponse { 678 + description: "Bad request".to_string(), 679 + content: None, 680 + }, 681 + ); 574 682 575 - responses.insert("500".to_string(), OpenApiResponse { 576 - description: "Internal server error".to_string(), 577 - content: None, 578 - }); 683 + responses.insert( 684 + "500".to_string(), 685 + OpenApiResponse { 686 + description: "Internal server error".to_string(), 687 + content: None, 688 + }, 689 + ); 579 690 580 691 responses 581 692 } ··· 583 694 fn create_get_responses() -> HashMap<String, OpenApiResponse> { 584 695 let mut responses = HashMap::new(); 585 696 586 - responses.insert("200".to_string(), OpenApiResponse { 587 - description: "Successfully retrieved record".to_string(), 588 - content: Some({ 589 - let mut content = HashMap::new(); 590 - content.insert("application/json".to_string(), OpenApiMediaType { 591 - schema: OpenApiSchema { 592 - schema_type: "object".to_string(), 593 - format: None, 594 - items: None, 595 - properties: None, 596 - required: None, 597 - default: None, 598 - }, 599 - }); 600 - content 601 - }), 602 - }); 697 + responses.insert( 698 + "200".to_string(), 699 + OpenApiResponse { 700 + description: "Successfully retrieved record".to_string(), 701 + content: Some({ 702 + let mut content = HashMap::new(); 703 + content.insert( 704 + "application/json".to_string(), 705 + OpenApiMediaType { 706 + schema: OpenApiSchema { 707 + schema_type: "object".to_string(), 708 + format: None, 709 + items: None, 710 + properties: None, 711 + required: None, 712 + default: None, 713 + }, 714 + }, 715 + ); 716 + content 717 + }), 718 + }, 719 + ); 603 720 604 - responses.insert("404".to_string(), OpenApiResponse { 605 - description: "Record not found".to_string(), 606 - content: None, 607 - }); 721 + responses.insert( 722 + "404".to_string(), 723 + OpenApiResponse { 724 + description: "Record not found".to_string(), 725 + content: None, 726 + }, 727 + ); 608 728 609 - responses.insert("500".to_string(), OpenApiResponse { 610 - description: "Internal server error".to_string(), 611 - content: None, 612 - }); 729 + responses.insert( 730 + "500".to_string(), 731 + OpenApiResponse { 732 + description: "Internal server error".to_string(), 733 + content: None, 734 + }, 735 + ); 613 736 614 737 responses 615 738 } ··· 617 740 fn create_mutation_responses() -> HashMap<String, OpenApiResponse> { 618 741 let mut responses = HashMap::new(); 619 742 620 - responses.insert("200".to_string(), OpenApiResponse { 621 - description: "Successfully created/updated record".to_string(), 622 - content: Some({ 623 - let mut content = HashMap::new(); 624 - content.insert("application/json".to_string(), OpenApiMediaType { 625 - schema: OpenApiSchema { 626 - schema_type: "object".to_string(), 627 - format: None, 628 - items: None, 629 - properties: Some({ 630 - let mut props = HashMap::new(); 631 - props.insert("uri".to_string(), OpenApiSchema { 632 - schema_type: "string".to_string(), 743 + responses.insert( 744 + "200".to_string(), 745 + OpenApiResponse { 746 + description: "Successfully created/updated record".to_string(), 747 + content: Some({ 748 + let mut content = HashMap::new(); 749 + content.insert( 750 + "application/json".to_string(), 751 + OpenApiMediaType { 752 + schema: OpenApiSchema { 753 + schema_type: "object".to_string(), 633 754 format: None, 634 755 items: None, 635 - properties: None, 636 - required: None, 637 - default: None, 638 - }); 639 - props.insert("cid".to_string(), OpenApiSchema { 640 - schema_type: "string".to_string(), 641 - format: None, 642 - items: None, 643 - properties: None, 644 - required: None, 756 + properties: Some({ 757 + let mut props = HashMap::new(); 758 + props.insert( 759 + "uri".to_string(), 760 + OpenApiSchema { 761 + schema_type: "string".to_string(), 762 + format: None, 763 + items: None, 764 + properties: None, 765 + required: None, 766 + default: None, 767 + }, 768 + ); 769 + props.insert( 770 + "cid".to_string(), 771 + OpenApiSchema { 772 + schema_type: "string".to_string(), 773 + format: None, 774 + items: None, 775 + properties: None, 776 + required: None, 777 + default: None, 778 + }, 779 + ); 780 + props 781 + }), 782 + required: Some(vec!["uri".to_string(), "cid".to_string()]), 645 783 default: None, 646 - }); 647 - props 648 - }), 649 - required: Some(vec!["uri".to_string(), "cid".to_string()]), 650 - default: None, 651 - }, 652 - }); 653 - content 654 - }), 655 - }); 784 + }, 785 + }, 786 + ); 787 + content 788 + }), 789 + }, 790 + ); 656 791 657 - responses.insert("400".to_string(), OpenApiResponse { 658 - description: "Bad request".to_string(), 659 - content: None, 660 - }); 792 + responses.insert( 793 + "400".to_string(), 794 + OpenApiResponse { 795 + description: "Bad request".to_string(), 796 + content: None, 797 + }, 798 + ); 661 799 662 - responses.insert("401".to_string(), OpenApiResponse { 663 - description: "Unauthorized".to_string(), 664 - content: None, 665 - }); 800 + responses.insert( 801 + "401".to_string(), 802 + OpenApiResponse { 803 + description: "Unauthorized".to_string(), 804 + content: None, 805 + }, 806 + ); 666 807 667 - responses.insert("500".to_string(), OpenApiResponse { 668 - description: "Internal server error".to_string(), 669 - content: None, 670 - }); 808 + responses.insert( 809 + "500".to_string(), 810 + OpenApiResponse { 811 + description: "Internal server error".to_string(), 812 + content: None, 813 + }, 814 + ); 671 815 672 816 responses 673 817 } ··· 675 819 fn create_delete_responses() -> HashMap<String, OpenApiResponse> { 676 820 let mut responses = HashMap::new(); 677 821 678 - responses.insert("200".to_string(), OpenApiResponse { 679 - description: "Successfully deleted record".to_string(), 680 - content: Some({ 681 - let mut content = HashMap::new(); 682 - content.insert("application/json".to_string(), OpenApiMediaType { 683 - schema: OpenApiSchema { 684 - schema_type: "object".to_string(), 685 - format: None, 686 - items: None, 687 - properties: None, 688 - required: None, 689 - default: None, 690 - }, 691 - }); 692 - content 693 - }), 694 - }); 822 + responses.insert( 823 + "200".to_string(), 824 + OpenApiResponse { 825 + description: "Successfully deleted record".to_string(), 826 + content: Some({ 827 + let mut content = HashMap::new(); 828 + content.insert( 829 + "application/json".to_string(), 830 + OpenApiMediaType { 831 + schema: OpenApiSchema { 832 + schema_type: "object".to_string(), 833 + format: None, 834 + items: None, 835 + properties: None, 836 + required: None, 837 + default: None, 838 + }, 839 + }, 840 + ); 841 + content 842 + }), 843 + }, 844 + ); 695 845 696 - responses.insert("400".to_string(), OpenApiResponse { 697 - description: "Bad request".to_string(), 698 - content: None, 699 - }); 846 + responses.insert( 847 + "400".to_string(), 848 + OpenApiResponse { 849 + description: "Bad request".to_string(), 850 + content: None, 851 + }, 852 + ); 700 853 701 - responses.insert("401".to_string(), OpenApiResponse { 702 - description: "Unauthorized".to_string(), 703 - content: None, 704 - }); 854 + responses.insert( 855 + "401".to_string(), 856 + OpenApiResponse { 857 + description: "Unauthorized".to_string(), 858 + content: None, 859 + }, 860 + ); 705 861 706 - responses.insert("404".to_string(), OpenApiResponse { 707 - description: "Record not found".to_string(), 708 - content: None, 709 - }); 862 + responses.insert( 863 + "404".to_string(), 864 + OpenApiResponse { 865 + description: "Record not found".to_string(), 866 + content: None, 867 + }, 868 + ); 710 869 711 - responses.insert("500".to_string(), OpenApiResponse { 712 - description: "Internal server error".to_string(), 713 - content: None, 714 - }); 870 + responses.insert( 871 + "500".to_string(), 872 + OpenApiResponse { 873 + description: "Internal server error".to_string(), 874 + content: None, 875 + }, 876 + ); 715 877 716 878 responses 717 879 } 718 880 719 881 fn create_count_responses() -> HashMap<String, OpenApiResponse> { 720 882 let mut responses = HashMap::new(); 721 - responses.insert("200".to_string(), OpenApiResponse { 722 - description: "Successfully counted records".to_string(), 723 - content: Some({ 724 - let mut content = HashMap::new(); 725 - content.insert("application/json".to_string(), OpenApiMediaType { 726 - schema: OpenApiSchema { 727 - schema_type: "object".to_string(), 728 - format: None, 729 - items: None, 730 - properties: Some({ 731 - let mut props = HashMap::new(); 732 - props.insert("success".to_string(), OpenApiSchema { 733 - schema_type: "boolean".to_string(), 883 + responses.insert( 884 + "200".to_string(), 885 + OpenApiResponse { 886 + description: "Successfully counted records".to_string(), 887 + content: Some({ 888 + let mut content = HashMap::new(); 889 + content.insert( 890 + "application/json".to_string(), 891 + OpenApiMediaType { 892 + schema: OpenApiSchema { 893 + schema_type: "object".to_string(), 734 894 format: None, 735 895 items: None, 736 - properties: None, 737 - required: None, 896 + properties: Some({ 897 + let mut props = HashMap::new(); 898 + props.insert( 899 + "success".to_string(), 900 + OpenApiSchema { 901 + schema_type: "boolean".to_string(), 902 + format: None, 903 + items: None, 904 + properties: None, 905 + required: None, 906 + default: None, 907 + }, 908 + ); 909 + props.insert( 910 + "count".to_string(), 911 + OpenApiSchema { 912 + schema_type: "integer".to_string(), 913 + format: Some("int64".to_string()), 914 + items: None, 915 + properties: None, 916 + required: None, 917 + default: None, 918 + }, 919 + ); 920 + props.insert( 921 + "message".to_string(), 922 + OpenApiSchema { 923 + schema_type: "string".to_string(), 924 + format: None, 925 + items: None, 926 + properties: None, 927 + required: None, 928 + default: None, 929 + }, 930 + ); 931 + props 932 + }), 933 + required: Some(vec!["success".to_string(), "count".to_string()]), 738 934 default: None, 739 - }); 740 - props.insert("count".to_string(), OpenApiSchema { 741 - schema_type: "integer".to_string(), 742 - format: Some("int64".to_string()), 743 - items: None, 744 - properties: None, 745 - required: None, 746 - default: None, 747 - }); 748 - props.insert("message".to_string(), OpenApiSchema { 749 - schema_type: "string".to_string(), 750 - format: None, 751 - items: None, 752 - properties: None, 753 - required: None, 754 - default: None, 755 - }); 756 - props 757 - }), 758 - required: Some(vec!["success".to_string(), "count".to_string()]), 759 - default: None, 760 - }, 761 - }); 762 - content 763 - }), 764 - }); 765 - responses.insert("400".to_string(), OpenApiResponse { 766 - description: "Bad request".to_string(), 767 - content: None, 768 - }); 769 - responses.insert("500".to_string(), OpenApiResponse { 770 - description: "Internal server error".to_string(), 771 - content: None, 772 - }); 935 + }, 936 + }, 937 + ); 938 + content 939 + }), 940 + }, 941 + ); 942 + responses.insert( 943 + "400".to_string(), 944 + OpenApiResponse { 945 + description: "Bad request".to_string(), 946 + content: None, 947 + }, 948 + ); 949 + responses.insert( 950 + "500".to_string(), 951 + OpenApiResponse { 952 + description: "Internal server error".to_string(), 953 + content: None, 954 + }, 955 + ); 773 956 responses 774 957 } 775 958 ··· 784 967 } 785 968 } 786 969 787 - fn create_record_schema_from_lexicon(lexicon_data: Option<&serde_json::Value>, slice_uri: &str) -> OpenApiSchema { 970 + fn create_record_schema_from_lexicon( 971 + lexicon_data: Option<&serde_json::Value>, 972 + slice_uri: &str, 973 + ) -> OpenApiSchema { 788 974 if let Some(lexicon) = lexicon_data { 789 975 // Get the definitions object directly (it's already parsed JSON, not a string) 790 976 if let Some(definitions) = lexicon.get("defs") 791 - && let Some(main_def) = definitions.get("main") 792 - && let Some(record_def) = main_def.get("record") 793 - && let Some(properties) = record_def.get("properties") { 794 - // Convert lexicon properties to OpenAPI schema properties 795 - let mut openapi_props = HashMap::new(); 796 - let mut required_fields = Vec::new(); 977 + && let Some(main_def) = definitions.get("main") 978 + && let Some(record_def) = main_def.get("record") 979 + && let Some(properties) = record_def.get("properties") 980 + { 981 + // Convert lexicon properties to OpenAPI schema properties 982 + let mut openapi_props = HashMap::new(); 983 + let mut required_fields = Vec::new(); 797 984 798 - // Get required fields from record level 799 - if let Some(required_array) = record_def.get("required") 800 - && let Some(required_list) = required_array.as_array() { 801 - for req_field in required_list { 802 - if let Some(field_name) = req_field.as_str() { 803 - required_fields.push(field_name.to_string()); 804 - } 805 - } 806 - } 985 + // Get required fields from record level 986 + if let Some(required_array) = record_def.get("required") 987 + && let Some(required_list) = required_array.as_array() 988 + { 989 + for req_field in required_list { 990 + if let Some(field_name) = req_field.as_str() { 991 + required_fields.push(field_name.to_string()); 992 + } 993 + } 994 + } 807 995 808 - let default_example = if let Some(props_obj) = properties.as_object() { 809 - for (prop_name, prop_def) in props_obj { 810 - if let Some(prop_schema) = convert_lexicon_property_to_openapi(prop_def) { 811 - openapi_props.insert(prop_name.clone(), prop_schema); 812 - } 813 - } 996 + let default_example = if let Some(props_obj) = properties.as_object() { 997 + for (prop_name, prop_def) in props_obj { 998 + if let Some(prop_schema) = convert_lexicon_property_to_openapi(prop_def) { 999 + openapi_props.insert(prop_name.clone(), prop_schema); 1000 + } 1001 + } 814 1002 815 - // Create a default example object with required fields from lexicon 816 - if !required_fields.is_empty() { 817 - let mut example_obj = serde_json::Map::new(); 818 - for field_name in &required_fields { 819 - if let Some(prop_def) = props_obj.get(field_name) { 820 - let example_value = create_example_value_from_lexicon_prop(prop_def, field_name, slice_uri); 821 - example_obj.insert(field_name.clone(), example_value); 822 - } 823 - } 824 - Some(serde_json::Value::Object(example_obj)) 825 - } else { 826 - None 1003 + // Create a default example object with required fields from lexicon 1004 + if !required_fields.is_empty() { 1005 + let mut example_obj = serde_json::Map::new(); 1006 + for field_name in &required_fields { 1007 + if let Some(prop_def) = props_obj.get(field_name) { 1008 + let example_value = create_example_value_from_lexicon_prop( 1009 + prop_def, field_name, slice_uri, 1010 + ); 1011 + example_obj.insert(field_name.clone(), example_value); 827 1012 } 828 - } else { 829 - None 830 - }; 1013 + } 1014 + Some(serde_json::Value::Object(example_obj)) 1015 + } else { 1016 + None 1017 + } 1018 + } else { 1019 + None 1020 + }; 831 1021 832 - return OpenApiSchema { 833 - schema_type: "object".to_string(), 834 - format: None, 835 - items: None, 836 - properties: Some(openapi_props), 837 - required: if required_fields.is_empty() { None } else { Some(required_fields) }, 838 - default: default_example, 839 - }; 840 - } 1022 + return OpenApiSchema { 1023 + schema_type: "object".to_string(), 1024 + format: None, 1025 + items: None, 1026 + properties: Some(openapi_props), 1027 + required: if required_fields.is_empty() { 1028 + None 1029 + } else { 1030 + Some(required_fields) 1031 + }, 1032 + default: default_example, 1033 + }; 1034 + } 841 1035 } 842 1036 843 1037 // Fallback to generic object schema (without rkey - that's a separate request parameter) ··· 885 1079 items: None, 886 1080 properties: Some({ 887 1081 let mut blob_props = HashMap::new(); 888 - blob_props.insert("$type".to_string(), OpenApiSchema { 889 - schema_type: "string".to_string(), 890 - format: None, 891 - items: None, 892 - properties: None, 893 - required: None, 894 - default: None, 895 - }); 896 - blob_props.insert("ref".to_string(), OpenApiSchema { 897 - schema_type: "object".to_string(), 898 - format: None, 899 - items: None, 900 - properties: Some({ 901 - let mut ref_props = HashMap::new(); 902 - ref_props.insert("$link".to_string(), OpenApiSchema { 903 - schema_type: "string".to_string(), 904 - format: None, 905 - items: None, 906 - properties: None, 907 - required: None, 908 - default: None, 909 - }); 910 - ref_props 911 - }), 912 - required: Some(vec!["$link".to_string()]), 913 - default: None, 914 - }); 915 - blob_props.insert("mimeType".to_string(), OpenApiSchema { 916 - schema_type: "string".to_string(), 917 - format: None, 918 - items: None, 919 - properties: None, 920 - required: None, 921 - default: None, 922 - }); 923 - blob_props.insert("size".to_string(), OpenApiSchema { 924 - schema_type: "integer".to_string(), 925 - format: Some("int64".to_string()), 926 - items: None, 927 - properties: None, 928 - required: None, 929 - default: None, 930 - }); 1082 + blob_props.insert( 1083 + "$type".to_string(), 1084 + OpenApiSchema { 1085 + schema_type: "string".to_string(), 1086 + format: None, 1087 + items: None, 1088 + properties: None, 1089 + required: None, 1090 + default: None, 1091 + }, 1092 + ); 1093 + blob_props.insert( 1094 + "ref".to_string(), 1095 + OpenApiSchema { 1096 + schema_type: "object".to_string(), 1097 + format: None, 1098 + items: None, 1099 + properties: Some({ 1100 + let mut ref_props = HashMap::new(); 1101 + ref_props.insert( 1102 + "$link".to_string(), 1103 + OpenApiSchema { 1104 + schema_type: "string".to_string(), 1105 + format: None, 1106 + items: None, 1107 + properties: None, 1108 + required: None, 1109 + default: None, 1110 + }, 1111 + ); 1112 + ref_props 1113 + }), 1114 + required: Some(vec!["$link".to_string()]), 1115 + default: None, 1116 + }, 1117 + ); 1118 + blob_props.insert( 1119 + "mimeType".to_string(), 1120 + OpenApiSchema { 1121 + schema_type: "string".to_string(), 1122 + format: None, 1123 + items: None, 1124 + properties: None, 1125 + required: None, 1126 + default: None, 1127 + }, 1128 + ); 1129 + blob_props.insert( 1130 + "size".to_string(), 1131 + OpenApiSchema { 1132 + schema_type: "integer".to_string(), 1133 + format: Some("int64".to_string()), 1134 + items: None, 1135 + properties: None, 1136 + required: None, 1137 + default: None, 1138 + }, 1139 + ); 931 1140 blob_props 932 1141 }), 933 - required: Some(vec!["$type".to_string(), "ref".to_string(), "mimeType".to_string(), "size".to_string()]), 1142 + required: Some(vec![ 1143 + "$type".to_string(), 1144 + "ref".to_string(), 1145 + "mimeType".to_string(), 1146 + "size".to_string(), 1147 + ]), 934 1148 default: None, 935 1149 }), 936 1150 "array" => { 937 1151 if let Some(items_def) = prop_def.get("items") 938 - && let Some(items_schema) = convert_lexicon_property_to_openapi(items_def) { 939 - return Some(OpenApiSchema { 940 - schema_type: "array".to_string(), 941 - format: None, 942 - items: Some(Box::new(items_schema)), 943 - properties: None, 944 - required: None, 945 - default: None, 946 - }); 947 - } 1152 + && let Some(items_schema) = convert_lexicon_property_to_openapi(items_def) 1153 + { 1154 + return Some(OpenApiSchema { 1155 + schema_type: "array".to_string(), 1156 + format: None, 1157 + items: Some(Box::new(items_schema)), 1158 + properties: None, 1159 + required: None, 1160 + default: None, 1161 + }); 1162 + } 948 1163 Some(OpenApiSchema { 949 1164 schema_type: "array".to_string(), 950 1165 format: None, ··· 960 1175 required: None, 961 1176 default: None, 962 1177 }) 963 - }, 1178 + } 964 1179 "object" => Some(OpenApiSchema { 965 1180 schema_type: "object".to_string(), 966 1181 format: None, ··· 982 1197 983 1198 fn create_security_schemes() -> HashMap<String, OpenApiSecurityScheme> { 984 1199 let mut schemes = HashMap::new(); 985 - 1200 + 986 1201 schemes.insert("bearerAuth".to_string(), OpenApiSecurityScheme { 987 1202 scheme_type: "http".to_string(), 988 1203 scheme: "bearer".to_string(), ··· 990 1205 description: "OAuth Bearer token authentication. Use your OAuth access token from the auth server.".to_string(), 991 1206 example: None, 992 1207 }); 993 - 1208 + 994 1209 schemes 995 1210 } 996 1211 ··· 1000 1215 vec![auth_requirement] 1001 1216 } 1002 1217 1003 - fn create_example_value_from_lexicon_prop(prop_def: &serde_json::Value, _field_name: &str, _slice_uri: &str) -> serde_json::Value { 1218 + fn create_example_value_from_lexicon_prop( 1219 + prop_def: &serde_json::Value, 1220 + _field_name: &str, 1221 + _slice_uri: &str, 1222 + ) -> serde_json::Value { 1004 1223 // Generate example based on lexicon type and format 1005 1224 if let Some(prop_type) = prop_def.get("type").and_then(|t| t.as_str()) { 1006 1225 match prop_type { ··· 1008 1227 // Check for format to provide more specific examples 1009 1228 if let Some(format) = prop_def.get("format").and_then(|f| f.as_str()) { 1010 1229 match format { 1011 - "datetime" => serde_json::Value::String(chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string()), 1012 - "at-uri" => serde_json::Value::String("at://did:plc:example/collection/record".to_string()), 1013 - "at-identifier" => serde_json::Value::String("handle.example.com".to_string()), 1230 + "datetime" => serde_json::Value::String( 1231 + chrono::Utc::now() 1232 + .format("%Y-%m-%dT%H:%M:%S%.3fZ") 1233 + .to_string(), 1234 + ), 1235 + "at-uri" => serde_json::Value::String( 1236 + "at://did:plc:example/collection/record".to_string(), 1237 + ), 1238 + "at-identifier" => { 1239 + serde_json::Value::String("handle.example.com".to_string()) 1240 + } 1014 1241 "did" => serde_json::Value::String("did:plc:example123".to_string()), 1015 1242 "handle" => serde_json::Value::String("user.bsky.social".to_string()), 1016 1243 "nsid" => serde_json::Value::String("com.example.record".to_string()), 1017 - "cid" => serde_json::Value::String("bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua".to_string()), 1244 + "cid" => serde_json::Value::String( 1245 + "bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua" 1246 + .to_string(), 1247 + ), 1018 1248 "uri" => serde_json::Value::String("https://example.com".to_string()), 1019 1249 "language" => serde_json::Value::String("en".to_string()), 1020 1250 _ => serde_json::Value::String("example string".to_string()), ··· 1022 1252 } else { 1023 1253 serde_json::Value::String("example string".to_string()) 1024 1254 } 1025 - }, 1255 + } 1026 1256 "integer" => serde_json::Value::Number(serde_json::Number::from(42)), 1027 1257 "boolean" => serde_json::Value::Bool(true), 1028 1258 "array" => serde_json::Value::Array(vec![]), ··· 1040 1270 } 1041 1271 } 1042 1272 1043 - fn create_get_records_request_body(slice_uri: &str, lexicon_data: Option<&serde_json::Value>) -> OpenApiRequestBody { 1273 + fn create_get_records_request_body( 1274 + slice_uri: &str, 1275 + lexicon_data: Option<&serde_json::Value>, 1276 + ) -> OpenApiRequestBody { 1044 1277 let mut properties = HashMap::new(); 1045 - 1046 - properties.insert("slice".to_string(), OpenApiSchema { 1047 - schema_type: "string".to_string(), 1048 - format: None, 1049 - items: None, 1050 - properties: None, 1051 - required: None, 1052 - default: None, 1053 - }); 1054 - 1055 - properties.insert("limit".to_string(), OpenApiSchema { 1056 - schema_type: "integer".to_string(), 1057 - format: Some("int32".to_string()), 1058 - items: None, 1059 - properties: None, 1060 - required: None, 1061 - default: None, 1062 - }); 1063 - 1064 - properties.insert("cursor".to_string(), OpenApiSchema { 1065 - schema_type: "string".to_string(), 1066 - format: None, 1067 - items: None, 1068 - properties: None, 1069 - required: None, 1070 - default: None, 1071 - }); 1072 - 1278 + 1279 + properties.insert( 1280 + "slice".to_string(), 1281 + OpenApiSchema { 1282 + schema_type: "string".to_string(), 1283 + format: None, 1284 + items: None, 1285 + properties: None, 1286 + required: None, 1287 + default: None, 1288 + }, 1289 + ); 1290 + 1291 + properties.insert( 1292 + "limit".to_string(), 1293 + OpenApiSchema { 1294 + schema_type: "integer".to_string(), 1295 + format: Some("int32".to_string()), 1296 + items: None, 1297 + properties: None, 1298 + required: None, 1299 + default: None, 1300 + }, 1301 + ); 1302 + 1303 + properties.insert( 1304 + "cursor".to_string(), 1305 + OpenApiSchema { 1306 + schema_type: "string".to_string(), 1307 + format: None, 1308 + items: None, 1309 + properties: None, 1310 + required: None, 1311 + default: None, 1312 + }, 1313 + ); 1314 + 1073 1315 // Where conditions object with fields from lexicon 1074 1316 let mut where_conditions = HashMap::new(); 1075 - 1317 + 1076 1318 // WhereCondition schema for individual field filters 1077 1319 let where_condition_schema = OpenApiSchema { 1078 1320 schema_type: "object".to_string(), ··· 1080 1322 items: None, 1081 1323 properties: Some({ 1082 1324 let mut condition_props = HashMap::new(); 1083 - condition_props.insert("eq".to_string(), OpenApiSchema { 1084 - schema_type: "string".to_string(), 1085 - format: None, 1086 - items: None, 1087 - properties: None, 1088 - required: None, 1089 - default: None, 1090 - }); 1091 - condition_props.insert("in".to_string(), OpenApiSchema { 1092 - schema_type: "array".to_string(), 1093 - format: None, 1094 - items: Some(Box::new(OpenApiSchema { 1325 + condition_props.insert( 1326 + "eq".to_string(), 1327 + OpenApiSchema { 1095 1328 schema_type: "string".to_string(), 1096 1329 format: None, 1097 1330 items: None, 1098 1331 properties: None, 1099 1332 required: None, 1100 1333 default: None, 1101 - })), 1102 - properties: None, 1103 - required: None, 1104 - default: None, 1105 - }); 1106 - condition_props.insert("contains".to_string(), OpenApiSchema { 1107 - schema_type: "string".to_string(), 1108 - format: None, 1109 - items: None, 1110 - properties: None, 1111 - required: None, 1112 - default: None, 1113 - }); 1334 + }, 1335 + ); 1336 + condition_props.insert( 1337 + "in".to_string(), 1338 + OpenApiSchema { 1339 + schema_type: "array".to_string(), 1340 + format: None, 1341 + items: Some(Box::new(OpenApiSchema { 1342 + schema_type: "string".to_string(), 1343 + format: None, 1344 + items: None, 1345 + properties: None, 1346 + required: None, 1347 + default: None, 1348 + })), 1349 + properties: None, 1350 + required: None, 1351 + default: None, 1352 + }, 1353 + ); 1354 + condition_props.insert( 1355 + "contains".to_string(), 1356 + OpenApiSchema { 1357 + schema_type: "string".to_string(), 1358 + format: None, 1359 + items: None, 1360 + properties: None, 1361 + required: None, 1362 + default: None, 1363 + }, 1364 + ); 1114 1365 condition_props 1115 1366 }), 1116 1367 required: None, 1117 1368 default: None, 1118 1369 }; 1119 - 1370 + 1120 1371 // Always include indexed record fields 1121 1372 where_conditions.insert("did".to_string(), where_condition_schema.clone()); 1122 1373 where_conditions.insert("collection".to_string(), where_condition_schema.clone()); 1123 1374 where_conditions.insert("uri".to_string(), where_condition_schema.clone()); 1124 1375 where_conditions.insert("cid".to_string(), where_condition_schema.clone()); 1125 1376 where_conditions.insert("indexedAt".to_string(), where_condition_schema.clone()); 1126 - 1377 + 1127 1378 // Extract fields from lexicon if available 1128 1379 if let Some(lexicon) = lexicon_data 1129 1380 && let Some(definitions) = lexicon.get("definitions").or_else(|| lexicon.get("defs")) 1130 - && let Some(main_def) = definitions.get("main") 1131 - && let Some(record_def) = main_def.get("record") 1132 - && let Some(record_properties) = record_def.get("properties") 1133 - && let Some(props_obj) = record_properties.as_object() { 1134 - for field_name in props_obj.keys() { 1135 - where_conditions.insert(field_name.clone(), where_condition_schema.clone()); 1136 - } 1137 - } 1138 - 1139 - properties.insert("where".to_string(), OpenApiSchema { 1140 - schema_type: "object".to_string(), 1141 - format: None, 1142 - items: None, 1143 - properties: Some(where_conditions), 1144 - required: None, 1145 - default: None, 1146 - }); 1147 - 1381 + && let Some(main_def) = definitions.get("main") 1382 + && let Some(record_def) = main_def.get("record") 1383 + && let Some(record_properties) = record_def.get("properties") 1384 + && let Some(props_obj) = record_properties.as_object() 1385 + { 1386 + for field_name in props_obj.keys() { 1387 + where_conditions.insert(field_name.clone(), where_condition_schema.clone()); 1388 + } 1389 + } 1390 + 1391 + properties.insert( 1392 + "where".to_string(), 1393 + OpenApiSchema { 1394 + schema_type: "object".to_string(), 1395 + format: None, 1396 + items: None, 1397 + properties: Some(where_conditions), 1398 + required: None, 1399 + default: None, 1400 + }, 1401 + ); 1402 + 1148 1403 // Sort by array 1149 1404 let sort_field_schema = OpenApiSchema { 1150 1405 schema_type: "object".to_string(), ··· 1152 1407 items: None, 1153 1408 properties: Some({ 1154 1409 let mut sort_props = HashMap::new(); 1155 - sort_props.insert("field".to_string(), OpenApiSchema { 1156 - schema_type: "string".to_string(), 1157 - format: None, 1158 - items: None, 1159 - properties: None, 1160 - required: None, 1161 - default: None, 1162 - }); 1163 - sort_props.insert("direction".to_string(), OpenApiSchema { 1164 - schema_type: "string".to_string(), 1165 - format: None, 1166 - items: None, 1167 - properties: None, 1168 - required: None, 1169 - default: None, 1170 - }); 1410 + sort_props.insert( 1411 + "field".to_string(), 1412 + OpenApiSchema { 1413 + schema_type: "string".to_string(), 1414 + format: None, 1415 + items: None, 1416 + properties: None, 1417 + required: None, 1418 + default: None, 1419 + }, 1420 + ); 1421 + sort_props.insert( 1422 + "direction".to_string(), 1423 + OpenApiSchema { 1424 + schema_type: "string".to_string(), 1425 + format: None, 1426 + items: None, 1427 + properties: None, 1428 + required: None, 1429 + default: None, 1430 + }, 1431 + ); 1171 1432 sort_props 1172 1433 }), 1173 1434 required: Some(vec!["field".to_string(), "direction".to_string()]), 1174 1435 default: None, 1175 1436 }; 1176 - 1177 - properties.insert("sortBy".to_string(), OpenApiSchema { 1178 - schema_type: "array".to_string(), 1179 - format: None, 1180 - items: Some(Box::new(sort_field_schema)), 1181 - properties: None, 1182 - required: None, 1183 - default: None, 1184 - }); 1437 + 1438 + properties.insert( 1439 + "sortBy".to_string(), 1440 + OpenApiSchema { 1441 + schema_type: "array".to_string(), 1442 + format: None, 1443 + items: Some(Box::new(sort_field_schema)), 1444 + properties: None, 1445 + required: None, 1446 + default: None, 1447 + }, 1448 + ); 1185 1449 1186 1450 OpenApiRequestBody { 1187 - description: "Parameters for getting records with optional filtering, searching, and sorting".to_string(), 1451 + description: 1452 + "Parameters for getting records with optional filtering, searching, and sorting" 1453 + .to_string(), 1188 1454 content: { 1189 1455 let mut content = HashMap::new(); 1190 - content.insert("application/json".to_string(), OpenApiMediaType { 1191 - schema: OpenApiSchema { 1192 - schema_type: "object".to_string(), 1193 - format: None, 1194 - items: None, 1195 - properties: Some(properties), 1196 - required: Some(vec!["slice".to_string()]), 1197 - default: Some(serde_json::json!({ 1198 - "slice": slice_uri 1199 - })), 1456 + content.insert( 1457 + "application/json".to_string(), 1458 + OpenApiMediaType { 1459 + schema: OpenApiSchema { 1460 + schema_type: "object".to_string(), 1461 + format: None, 1462 + items: None, 1463 + properties: Some(properties), 1464 + required: Some(vec!["slice".to_string()]), 1465 + default: Some(serde_json::json!({ 1466 + "slice": slice_uri 1467 + })), 1468 + }, 1200 1469 }, 1201 - }); 1470 + ); 1202 1471 content 1203 1472 }, 1204 1473 required: true, 1205 1474 } 1206 1475 } 1207 -

+69 -28

api/src/api/xrpc_dynamic.rs

··· 11 11 use serde::Deserialize; 12 12 13 13 use crate::AppState; 14 - use crate::auth::{extract_bearer_token, get_atproto_auth_for_user_cached, verify_oauth_token_cached}; 14 + use crate::auth::{ 15 + extract_bearer_token, get_atproto_auth_for_user_cached, verify_oauth_token_cached, 16 + }; 15 17 use crate::models::{ 16 18 IndexedRecord, Record, SliceRecordsOutput, SliceRecordsParams, SortField, WhereCondition, 17 19 }; ··· 526 528 ) -> Result<Json<serde_json::Value>, (StatusCode, Json<serde_json::Value>)> { 527 529 // Extract and verify OAuth token 528 530 let token = extract_bearer_token(&headers).map_err(status_to_error_response)?; 529 - let user_info = verify_oauth_token_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone())) 530 - .await 531 - .map_err(status_to_error_response)?; 531 + let user_info = verify_oauth_token_cached( 532 + &token, 533 + &state.config.auth_base_url, 534 + Some(state.auth_cache.clone()), 535 + ) 536 + .await 537 + .map_err(status_to_error_response)?; 532 538 533 539 // Get AT Protocol DPoP auth and PDS URL (with caching) 534 - let (dpop_auth, pds_url) = get_atproto_auth_for_user_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone())) 535 - .await 536 - .map_err(status_to_error_response)?; 540 + let (dpop_auth, pds_url) = get_atproto_auth_for_user_cached( 541 + &token, 542 + &state.config.auth_base_url, 543 + Some(state.auth_cache.clone()), 544 + ) 545 + .await 546 + .map_err(status_to_error_response)?; 537 547 538 548 // Extract the repo DID from user info 539 549 let repo = user_info.did.unwrap_or(user_info.sub); ··· 603 613 validate: false, 604 614 }; 605 615 606 - let result = create_record(&http_client, &atproto_client::client::Auth::DPoP(dpop_auth), &pds_url, create_request) 607 - .await 608 - .map_err(|_e| status_to_error_response(StatusCode::INTERNAL_SERVER_ERROR))?; 616 + let result = create_record( 617 + &http_client, 618 + &atproto_client::client::Auth::DPoP(dpop_auth), 619 + &pds_url, 620 + create_request, 621 + ) 622 + .await 623 + .map_err(|_e| status_to_error_response(StatusCode::INTERNAL_SERVER_ERROR))?; 609 624 610 625 // Extract URI and CID from the response enum 611 626 let (uri, cid) = match result { ··· 644 659 ) -> Result<Json<serde_json::Value>, (StatusCode, Json<serde_json::Value>)> { 645 660 // Extract and verify OAuth token 646 661 let token = extract_bearer_token(&headers).map_err(status_to_error_response)?; 647 - let user_info = verify_oauth_token_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone())) 648 - .await 649 - .map_err(status_to_error_response)?; 662 + let user_info = verify_oauth_token_cached( 663 + &token, 664 + &state.config.auth_base_url, 665 + Some(state.auth_cache.clone()), 666 + ) 667 + .await 668 + .map_err(status_to_error_response)?; 650 669 651 670 // Get AT Protocol DPoP auth and PDS URL (with caching) 652 - let (dpop_auth, pds_url) = get_atproto_auth_for_user_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone())) 653 - .await 654 - .map_err(status_to_error_response)?; 671 + let (dpop_auth, pds_url) = get_atproto_auth_for_user_cached( 672 + &token, 673 + &state.config.auth_base_url, 674 + Some(state.auth_cache.clone()), 675 + ) 676 + .await 677 + .map_err(status_to_error_response)?; 655 678 656 679 // Extract slice URI, rkey, and record value from structured body 657 680 let slice_uri = body ··· 721 744 validate: false, 722 745 }; 723 746 724 - let result = put_record(&http_client, &atproto_client::client::Auth::DPoP(dpop_auth), &pds_url, put_request) 725 - .await 726 - .map_err(|_| status_to_error_response(StatusCode::INTERNAL_SERVER_ERROR))?; 747 + let result = put_record( 748 + &http_client, 749 + &atproto_client::client::Auth::DPoP(dpop_auth), 750 + &pds_url, 751 + put_request, 752 + ) 753 + .await 754 + .map_err(|_| status_to_error_response(StatusCode::INTERNAL_SERVER_ERROR))?; 727 755 728 756 // Extract URI and CID from the response enum 729 757 let (uri, cid) = match result { ··· 762 790 ) -> Result<Json<serde_json::Value>, (StatusCode, Json<serde_json::Value>)> { 763 791 // Extract and verify OAuth token 764 792 let token = extract_bearer_token(&headers).map_err(status_to_error_response)?; 765 - let user_info = verify_oauth_token_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone())) 766 - .await 767 - .map_err(status_to_error_response)?; 793 + let user_info = verify_oauth_token_cached( 794 + &token, 795 + &state.config.auth_base_url, 796 + Some(state.auth_cache.clone()), 797 + ) 798 + .await 799 + .map_err(status_to_error_response)?; 768 800 769 801 // Get AT Protocol DPoP auth and PDS URL (with caching) 770 - let (dpop_auth, pds_url) = get_atproto_auth_for_user_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone())) 771 - .await 772 - .map_err(status_to_error_response)?; 802 + let (dpop_auth, pds_url) = get_atproto_auth_for_user_cached( 803 + &token, 804 + &state.config.auth_base_url, 805 + Some(state.auth_cache.clone()), 806 + ) 807 + .await 808 + .map_err(status_to_error_response)?; 773 809 774 810 // Extract repo and rkey from body 775 811 let repo = user_info.did.unwrap_or(user_info.sub); ··· 790 826 swap_commit: None, 791 827 }; 792 828 793 - delete_record(&http_client, &atproto_client::client::Auth::DPoP(dpop_auth), &pds_url, delete_request) 794 - .await 795 - .map_err(|_| status_to_error_response(StatusCode::INTERNAL_SERVER_ERROR))?; 829 + delete_record( 830 + &http_client, 831 + &atproto_client::client::Auth::DPoP(dpop_auth), 832 + &pds_url, 833 + delete_request, 834 + ) 835 + .await 836 + .map_err(|_| status_to_error_response(StatusCode::INTERNAL_SERVER_ERROR))?; 796 837 797 838 // Also delete from local database (from all slices) 798 839 let uri = format!("at://{}/{}/{}", repo, collection, rkey);

+18 -24

api/src/atproto_extensions.rs

··· 1 1 // Extensions to atproto-client for functionality not yet available 2 2 // This module provides additional AT Protocol functions following the same patterns 3 3 4 - use serde::{Deserialize, Serialize}; 4 + use crate::errors::BlobUploadError; 5 5 use atproto_client::client::DPoPAuth; 6 - use thiserror::Error; 7 6 use atproto_oauth::dpop::{DpopRetry, request_dpop}; 8 - use reqwest_middleware::ClientBuilder; 9 7 use reqwest_chain::ChainMiddleware; 10 - 11 - #[derive(Error, Debug)] 12 - pub enum BlobUploadError { 13 - #[error("error-slice-blob-1 HTTP request failed: {0}")] 14 - HttpRequest(#[from] reqwest_middleware::Error), 15 - 16 - #[error("error-slice-blob-2 JSON parsing failed: {0}")] 17 - JsonParse(#[from] serde_json::Error), 18 - 19 - #[error("error-slice-blob-3 DPoP proof creation failed: {0}")] 20 - DPoPProof(String), 21 - 22 - #[error("error-slice-blob-4 Upload request failed: {status} - {message}")] 23 - UploadFailed { status: u16, message: String }, 24 - } 25 - 8 + use reqwest_middleware::ClientBuilder; 9 + use serde::{Deserialize, Serialize}; 26 10 27 11 /// Response from blob upload 28 12 #[cfg_attr(debug_assertions, derive(Debug))] ··· 62 46 mime_type: &str, 63 47 ) -> Result<UploadBlobResponse, BlobUploadError> { 64 48 // Build the URL using standard string formatting 65 - let url = format!("{}/xrpc/com.atproto.repo.uploadBlob", base_url.trim_end_matches('/')); 49 + let url = format!( 50 + "{}/xrpc/com.atproto.repo.uploadBlob", 51 + base_url.trim_end_matches('/') 52 + ); 66 53 67 54 // For blob uploads, we need to use a different approach than post_dpop_json 68 55 // since we're sending binary data, not JSON ··· 89 76 "POST", 90 77 url, 91 78 &dpop_auth.oauth_access_token, 92 - ).map_err(|e| BlobUploadError::DPoPProof(e.to_string()))?; 79 + ) 80 + .map_err(|e| BlobUploadError::DPoPProof(e.to_string()))?; 93 81 94 82 // Create DpopRetry middleware (same as atproto-client) 95 83 let dpop_retry = DpopRetry::new( ··· 107 95 // Make the request with automatic nonce retry handling 108 96 let http_response = dpop_retry_client 109 97 .post(url) 110 - .header("Authorization", format!("DPoP {}", dpop_auth.oauth_access_token)) 98 + .header( 99 + "Authorization", 100 + format!("DPoP {}", dpop_auth.oauth_access_token), 101 + ) 111 102 .header("DPoP", &dpop_proof_token) 112 103 .header("Content-Type", content_type) 113 104 .body(data) ··· 117 108 118 109 if !http_response.status().is_success() { 119 110 let status = http_response.status(); 120 - let error_text = http_response.text().await.unwrap_or_else(|_| "unknown".to_string()); 111 + let error_text = http_response 112 + .text() 113 + .await 114 + .unwrap_or_else(|_| "unknown".to_string()); 121 115 return Err(BlobUploadError::UploadFailed { 122 116 status: status.as_u16(), 123 - message: error_text 117 + message: error_text, 124 118 }); 125 119 } 126 120

+99 -31

api/src/auth.rs

··· 1 - use axum::http::{HeaderMap, StatusCode}; 2 - use serde::{Deserialize, Serialize}; 1 + //! Authentication and authorization utilities for OAuth and AT Protocol. 2 + //! 3 + //! This module provides functions for: 4 + //! - Extracting and validating OAuth bearer tokens 5 + //! - Verifying tokens with the authorization server 6 + //! - Managing AT Protocol DPoP (Demonstrating Proof-of-Possession) authentication 7 + //! - Caching authentication state for performance (5-minute TTL) 8 + 9 + use crate::cache::SliceCache; 3 10 use atproto_client::client::DPoPAuth; 4 11 use atproto_identity::key::KeyData; 5 12 use atproto_oauth::jwk::WrappedJsonWebKey; 13 + use axum::http::{HeaderMap, StatusCode}; 14 + use serde::{Deserialize, Serialize}; 6 15 use std::sync::Arc; 7 16 use tokio::sync::Mutex; 8 - use crate::cache::SliceCache; 9 17 18 + /// OAuth userinfo response containing the authenticated user's identity. 10 19 #[derive(Serialize, Deserialize, Debug)] 11 20 pub struct UserInfoResponse { 21 + /// Subject identifier (user ID) from the OAuth provider 12 22 pub sub: String, 23 + /// Decentralized identifier for the user in AT Protocol 13 24 pub did: Option<String>, 14 25 } 15 26 27 + /// Cached AT Protocol session data to avoid repeated auth server requests. 16 28 #[derive(Serialize, Deserialize, Debug, Clone)] 17 29 struct CachedSession { 30 + /// Personal Data Server endpoint URL for the user 18 31 pds_url: String, 32 + /// AT Protocol access token for PDS operations 19 33 atproto_access_token: String, 34 + /// DPoP JSON Web Key for proof-of-possession 20 35 dpop_jwk: serde_json::Value, 21 36 } 22 37 23 - // Extract bearer token from Authorization header 38 + /// Extracts the bearer token from the Authorization header. 39 + /// 40 + /// # Arguments 41 + /// * `headers` - HTTP request headers 42 + /// 43 + /// # Returns 44 + /// * `Ok(String)` - The extracted bearer token 45 + /// * `Err(StatusCode::UNAUTHORIZED)` - If the header is missing, malformed, or not a Bearer token 46 + /// 47 + /// # Example 48 + /// ```ignore 49 + /// let token = extract_bearer_token(&headers)?; 50 + /// ``` 24 51 pub fn extract_bearer_token(headers: &HeaderMap) -> Result<String, StatusCode> { 25 52 let auth_header = headers 26 53 .get("authorization") ··· 31 58 return Err(StatusCode::UNAUTHORIZED); 32 59 } 33 60 61 + // Safe to unwrap since we just verified the prefix exists 34 62 let token = auth_header.strip_prefix("Bearer ").unwrap().to_string(); 35 63 Ok(token) 36 64 } 37 65 38 - // Verify OAuth token with auth server 39 - 40 - // Verify OAuth token with auth server with optional caching 66 + /// Verifies an OAuth bearer token with the authorization server. 67 + /// 68 + /// This function first checks the cache for a previously validated token to avoid 69 + /// unnecessary network calls. If not found in cache, it validates with the auth server 70 + /// and caches the result for 5 minutes. 71 + /// 72 + /// # Arguments 73 + /// * `token` - The OAuth bearer token to verify 74 + /// * `auth_base_url` - Base URL of the authorization server 75 + /// * `cache` - Optional cache instance (falls back to direct verification if None) 76 + /// 77 + /// # Returns 78 + /// * `Ok(UserInfoResponse)` - User information if the token is valid 79 + /// * `Err(StatusCode)` - HTTP status code indicating the failure reason 80 + /// - `UNAUTHORIZED` - Invalid or expired token 81 + /// - `INTERNAL_SERVER_ERROR` - Network or parsing errors 82 + /// 83 + /// # Cache Behavior 84 + /// - Cache key format: `oauth_userinfo:{token}` 85 + /// - TTL: 300 seconds (5 minutes) 86 + /// - Cache miss triggers verification with auth server 41 87 pub async fn verify_oauth_token_cached( 42 88 token: &str, 43 89 auth_base_url: &str, 44 90 cache: Option<Arc<Mutex<SliceCache>>>, 45 91 ) -> Result<UserInfoResponse, StatusCode> { 46 - 47 - // Try cache first if provided 92 + // Try cache first if provided to avoid network round-trip 48 93 if let Some(cache) = &cache { 49 94 let cached_result = { 50 95 let mut cache_lock = cache.lock().await; ··· 58 103 } 59 104 } 60 105 61 - // Cache miss - verify with auth server 106 + // Cache miss - verify token by calling the OAuth userinfo endpoint 62 107 let client = reqwest::Client::new(); 63 108 let userinfo_url = format!("{}/oauth/userinfo", auth_base_url); 64 109 ··· 78 123 .await 79 124 .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?; 80 125 81 - // Cache the userinfo if cache is provided (5 minute TTL) 126 + // Cache the validated userinfo for 5 minutes to improve performance 82 127 if let Some(cache) = &cache { 83 - let user_info_value = serde_json::to_value(&user_info) 84 - .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?; 128 + let user_info_value = 129 + serde_json::to_value(&user_info).map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?; 85 130 let mut cache_lock = cache.lock().await; 86 - let _ = cache_lock.cache_oauth_userinfo(token, &user_info_value, 300).await; 131 + let _ = cache_lock 132 + .cache_oauth_userinfo(token, &user_info_value, 300) 133 + .await; 87 134 } 88 135 89 136 Ok(user_info) 90 137 } 91 138 92 - // Get AT Protocol DPoP auth and PDS URL for the user 93 - 94 - // Get AT Protocol DPoP auth and PDS URL for the user with optional caching 139 + /// Retrieves AT Protocol DPoP authentication credentials and PDS URL for a user. 140 + /// 141 + /// DPoP (Demonstrating Proof-of-Possession) is a security mechanism that binds tokens 142 + /// to specific cryptographic keys, preventing token theft and replay attacks. 143 + /// 144 + /// This function first checks the cache for existing credentials, then falls back to 145 + /// fetching from the auth server if needed. Results are cached for 5 minutes. 146 + /// 147 + /// # Arguments 148 + /// * `token` - OAuth bearer token identifying the user 149 + /// * `auth_base_url` - Base URL of the authorization server 150 + /// * `cache` - Optional cache instance (falls back to direct fetch if None) 151 + /// 152 + /// # Returns 153 + /// * `Ok((DPoPAuth, String))` - Tuple of (DPoP authentication object, PDS endpoint URL) 154 + /// * `Err(StatusCode)` - HTTP status code indicating the failure reason 155 + /// - `UNAUTHORIZED` - Invalid token or session expired 156 + /// - `INTERNAL_SERVER_ERROR` - Network, parsing, or key conversion errors 157 + /// 158 + /// # Cache Behavior 159 + /// - Cache key format: `atproto_session:{token}` 160 + /// - TTL: 300 seconds (5 minutes) 161 + /// - Stores serialized CachedSession with PDS URL, access token, and DPoP JWK 95 162 pub async fn get_atproto_auth_for_user_cached( 96 163 token: &str, 97 164 auth_base_url: &str, 98 165 cache: Option<Arc<Mutex<SliceCache>>>, 99 166 ) -> Result<(DPoPAuth, String), StatusCode> { 100 - 101 - // Try cache first if provided 167 + // Try cache first if provided to avoid expensive auth server call 102 168 if let Some(cache) = &cache { 103 169 let cached_result = { 104 170 let mut cache_lock = cache.lock().await; ··· 109 175 let cached_session: CachedSession = serde_json::from_value(session_value) 110 176 .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?; 111 177 112 - // Convert cached data back to DPoP auth 178 + // Reconstruct DPoP auth from cached session data 113 179 let dpop_jwk: WrappedJsonWebKey = serde_json::from_value(cached_session.dpop_jwk) 114 180 .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?; 115 181 116 - let dpop_private_key_data = KeyData::try_from(dpop_jwk) 117 - .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?; 182 + let dpop_private_key_data = 183 + KeyData::try_from(dpop_jwk).map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?; 118 184 119 185 let dpop_auth = DPoPAuth { 120 186 dpop_private_key_data, ··· 125 191 } 126 192 } 127 193 128 - // Cache miss - fetch from auth server 194 + // Cache miss - fetch fresh session data from auth server 129 195 let client = reqwest::Client::new(); 130 196 let session_url = format!("{}/api/atprotocol/session", auth_base_url); 131 197 ··· 145 211 .await 146 212 .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?; 147 213 148 - // Extract PDS URL from session 214 + // Extract the user's Personal Data Server endpoint URL 149 215 let pds_url = session_data["pds_endpoint"] 150 216 .as_str() 151 217 .ok_or(StatusCode::INTERNAL_SERVER_ERROR)? 152 218 .to_string(); 153 219 154 - // Extract AT Protocol access token from session data 220 + // Extract the access token used for authenticating with the PDS 155 221 let atproto_access_token = session_data["access_token"] 156 222 .as_str() 157 223 .ok_or(StatusCode::INTERNAL_SERVER_ERROR)? 158 224 .to_string(); 159 225 160 - // Extract DPoP private key from session data - convert JWK to KeyData 226 + // Extract and convert the DPoP JSON Web Key to internal key representation 161 227 let dpop_jwk_value = session_data["dpop_jwk"].clone(); 162 228 let dpop_jwk: WrappedJsonWebKey = serde_json::from_value(dpop_jwk_value.clone()) 163 229 .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?; 164 230 165 - let dpop_private_key_data = KeyData::try_from(dpop_jwk) 166 - .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?; 231 + let dpop_private_key_data = 232 + KeyData::try_from(dpop_jwk).map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?; 167 233 168 234 let dpop_auth = DPoPAuth { 169 235 dpop_private_key_data, 170 236 oauth_access_token: atproto_access_token.clone(), 171 237 }; 172 238 173 - // Cache the session data if cache is provided (5 minute TTL) 239 + // Cache the complete session for 5 minutes to avoid repeated auth server calls 174 240 if let Some(cache) = &cache { 175 241 let cached_session = CachedSession { 176 242 pds_url: pds_url.clone(), ··· 180 246 let session_value = serde_json::to_value(&cached_session) 181 247 .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?; 182 248 let mut cache_lock = cache.lock().await; 183 - let _ = cache_lock.cache_atproto_session(token, &session_value, 300).await; 249 + let _ = cache_lock 250 + .cache_atproto_session(token, &session_value, 300) 251 + .await; 184 252 } 185 253 186 254 Ok((dpop_auth, pds_url)) 187 - } 255 + }

-455

api/src/cache.rs

··· 1 - use async_trait::async_trait; 2 - use anyhow::Result; 3 - use serde::{Serialize, Deserialize}; 4 - use std::collections::{HashMap, HashSet}; 5 - use std::sync::Arc; 6 - use tokio::sync::RwLock; 7 - use tracing::{debug, info, warn}; 8 - use std::time::{Duration, Instant}; 9 - 10 - /// Generic cache trait for different backend implementations 11 - #[async_trait] 12 - pub trait Cache: Send + Sync { 13 - /// Get a value from cache 14 - async fn get<T>(&mut self, key: &str) -> Result<Option<T>> 15 - where 16 - T: for<'de> Deserialize<'de> + Send; 17 - 18 - /// Set a value in cache with optional TTL 19 - async fn set<T>(&mut self, key: &str, value: &T, ttl_seconds: Option<u64>) -> Result<()> 20 - where 21 - T: Serialize + Send + Sync; 22 - 23 - 24 - /// Delete a key from cache 25 - async fn delete(&mut self, key: &str) -> Result<()>; 26 - 27 - /// Set multiple key-value pairs 28 - async fn set_multiple<T>(&mut self, items: Vec<(&str, &T, Option<u64>)>) -> Result<()> 29 - where 30 - T: Serialize + Send + Sync; 31 - 32 - /// Test cache connection/health 33 - async fn ping(&mut self) -> Result<bool>; 34 - 35 - /// Get cache info/statistics 36 - async fn get_info(&mut self) -> Result<String>; 37 - } 38 - 39 - /// Cache entry type: (serialized_value, expiry) 40 - type CacheEntry = (String, Option<Instant>); 41 - 42 - /// In-memory cache implementation with TTL support 43 - pub struct InMemoryCache { 44 - data: Arc<RwLock<HashMap<String, CacheEntry>>>, 45 - default_ttl_seconds: u64, 46 - } 47 - 48 - impl InMemoryCache { 49 - pub fn new(default_ttl_seconds: Option<u64>) -> Self { 50 - Self { 51 - data: Arc::new(RwLock::new(HashMap::new())), 52 - default_ttl_seconds: default_ttl_seconds.unwrap_or(3600), 53 - } 54 - } 55 - 56 - } 57 - 58 - #[async_trait] 59 - impl Cache for InMemoryCache { 60 - async fn get<T>(&mut self, key: &str) -> Result<Option<T>> 61 - where 62 - T: for<'de> Deserialize<'de> + Send, 63 - { 64 - let data = self.data.read().await; 65 - 66 - if let Some((serialized, expiry)) = data.get(key) { 67 - // Check if expired 68 - if let Some(exp) = expiry 69 - && *exp <= Instant::now() { 70 - debug!(cache_key = %key, "Cache entry expired"); 71 - return Ok(None); 72 - } 73 - 74 - match serde_json::from_str::<T>(serialized) { 75 - Ok(value) => { 76 - // Cache hit - no logging needed 77 - Ok(Some(value)) 78 - } 79 - Err(e) => { 80 - warn!( 81 - error = ?e, 82 - cache_key = %key, 83 - "Failed to deserialize cached value" 84 - ); 85 - Ok(None) 86 - } 87 - } 88 - } else { 89 - // Cache miss - no logging needed 90 - Ok(None) 91 - } 92 - } 93 - 94 - async fn set<T>(&mut self, key: &str, value: &T, ttl_seconds: Option<u64>) -> Result<()> 95 - where 96 - T: Serialize + Send + Sync, 97 - { 98 - let ttl = ttl_seconds.unwrap_or(self.default_ttl_seconds); 99 - 100 - match serde_json::to_string(value) { 101 - Ok(serialized) => { 102 - let expiry = if ttl > 0 { 103 - Some(Instant::now() + Duration::from_secs(ttl)) 104 - } else { 105 - None // No expiry 106 - }; 107 - 108 - let mut data = self.data.write().await; 109 - data.insert(key.to_string(), (serialized, expiry)); 110 - 111 - debug!( 112 - cache_key = %key, 113 - ttl_seconds = ttl, 114 - "Cached value in memory" 115 - ); 116 - Ok(()) 117 - } 118 - Err(e) => { 119 - warn!( 120 - error = ?e, 121 - cache_key = %key, 122 - "Failed to serialize value for caching" 123 - ); 124 - Ok(()) 125 - } 126 - } 127 - } 128 - 129 - 130 - async fn delete(&mut self, key: &str) -> Result<()> { 131 - let mut data = self.data.write().await; 132 - data.remove(key); 133 - debug!(cache_key = %key, "Deleted key from in-memory cache"); 134 - Ok(()) 135 - } 136 - 137 - async fn set_multiple<T>(&mut self, items: Vec<(&str, &T, Option<u64>)>) -> Result<()> 138 - where 139 - T: Serialize + Send + Sync, 140 - { 141 - if items.is_empty() { 142 - return Ok(()); 143 - } 144 - 145 - let mut data = self.data.write().await; 146 - let mut success_count = 0; 147 - 148 - for (key, value, ttl) in &items { 149 - match serde_json::to_string(value) { 150 - Ok(serialized) => { 151 - let ttl_to_use = ttl.unwrap_or(self.default_ttl_seconds); 152 - let expiry = if ttl_to_use > 0 { 153 - Some(Instant::now() + Duration::from_secs(ttl_to_use)) 154 - } else { 155 - None 156 - }; 157 - 158 - data.insert(key.to_string(), (serialized, expiry)); 159 - success_count += 1; 160 - } 161 - Err(e) => { 162 - warn!( 163 - error = ?e, 164 - cache_key = %key, 165 - "Failed to serialize value for bulk caching" 166 - ); 167 - } 168 - } 169 - } 170 - 171 - debug!( 172 - items_count = success_count, 173 - total_items = items.len(), 174 - "Successfully bulk cached items in memory" 175 - ); 176 - Ok(()) 177 - } 178 - 179 - async fn ping(&mut self) -> Result<bool> { 180 - // Always healthy for in-memory cache 181 - Ok(true) 182 - } 183 - 184 - async fn get_info(&mut self) -> Result<String> { 185 - let data = self.data.read().await; 186 - let now = Instant::now(); 187 - 188 - let mut total_entries = 0; 189 - let mut expired_entries = 0; 190 - 191 - for (_, expiry) in data.values() { 192 - total_entries += 1; 193 - if let Some(exp) = expiry 194 - && *exp <= now { 195 - expired_entries += 1; 196 - } 197 - } 198 - 199 - Ok(format!( 200 - "InMemoryCache: {} total entries, {} expired, {} active", 201 - total_entries, 202 - expired_entries, 203 - total_entries - expired_entries 204 - )) 205 - } 206 - } 207 - 208 - /// Cache backend enum to avoid dyn trait issues 209 - pub enum CacheBackendImpl { 210 - InMemory(InMemoryCache), 211 - Redis(crate::redis_cache::RedisCache), 212 - } 213 - 214 - impl CacheBackendImpl { 215 - pub async fn get<T>(&mut self, key: &str) -> Result<Option<T>> 216 - where 217 - T: for<'de> Deserialize<'de> + Send, 218 - { 219 - match self { 220 - CacheBackendImpl::InMemory(cache) => cache.get(key).await, 221 - CacheBackendImpl::Redis(cache) => cache.get(key).await, 222 - } 223 - } 224 - 225 - pub async fn set<T>(&mut self, key: &str, value: &T, ttl_seconds: Option<u64>) -> Result<()> 226 - where 227 - T: Serialize + Send + Sync, 228 - { 229 - match self { 230 - CacheBackendImpl::InMemory(cache) => cache.set(key, value, ttl_seconds).await, 231 - CacheBackendImpl::Redis(cache) => cache.set(key, value, ttl_seconds).await, 232 - } 233 - } 234 - 235 - pub async fn delete(&mut self, key: &str) -> Result<()> { 236 - match self { 237 - CacheBackendImpl::InMemory(cache) => cache.delete(key).await, 238 - CacheBackendImpl::Redis(cache) => cache.delete(key).await, 239 - } 240 - } 241 - 242 - pub async fn set_multiple<T>(&mut self, items: Vec<(&str, &T, Option<u64>)>) -> Result<()> 243 - where 244 - T: Serialize + Send + Sync, 245 - { 246 - match self { 247 - CacheBackendImpl::InMemory(cache) => cache.set_multiple(items).await, 248 - CacheBackendImpl::Redis(cache) => cache.set_multiple(items).await, 249 - } 250 - } 251 - 252 - pub async fn ping(&mut self) -> Result<bool> { 253 - match self { 254 - CacheBackendImpl::InMemory(cache) => cache.ping().await, 255 - CacheBackendImpl::Redis(cache) => cache.ping().await, 256 - } 257 - } 258 - 259 - pub async fn get_info(&mut self) -> Result<String> { 260 - match self { 261 - CacheBackendImpl::InMemory(cache) => cache.get_info().await, 262 - CacheBackendImpl::Redis(cache) => cache.get_info().await, 263 - } 264 - } 265 - } 266 - 267 - /// Cache-specific helper methods for slice operations 268 - pub struct SliceCache { 269 - cache: CacheBackendImpl, 270 - } 271 - 272 - impl SliceCache { 273 - pub fn new(cache: CacheBackendImpl) -> Self { 274 - Self { cache } 275 - } 276 - 277 - /// Actor cache methods 278 - pub async fn is_actor(&mut self, did: &str, slice_uri: &str) -> Result<Option<bool>> { 279 - let key = format!("actor:{}:{}", did, slice_uri); 280 - self.cache.get::<bool>(&key).await 281 - } 282 - 283 - pub async fn cache_actor_exists(&mut self, did: &str, slice_uri: &str) -> Result<()> { 284 - let key = format!("actor:{}:{}", did, slice_uri); 285 - self.cache.set(&key, &true, None).await 286 - } 287 - 288 - pub async fn remove_actor(&mut self, did: &str, slice_uri: &str) -> Result<()> { 289 - let key = format!("actor:{}:{}", did, slice_uri); 290 - self.cache.delete(&key).await 291 - } 292 - 293 - pub async fn preload_actors(&mut self, actors: Vec<(String, String)>) -> Result<()> { 294 - if actors.is_empty() { 295 - return Ok(()); 296 - } 297 - 298 - let items: Vec<(String, bool, Option<u64>)> = actors 299 - .into_iter() 300 - .map(|(did, slice_uri)| { 301 - (format!("actor:{}:{}", did, slice_uri), true, None) 302 - }) 303 - .collect(); 304 - 305 - let items_ref: Vec<(&str, &bool, Option<u64>)> = items 306 - .iter() 307 - .map(|(key, value, ttl)| (key.as_str(), value, *ttl)) 308 - .collect(); 309 - 310 - self.cache.set_multiple(items_ref).await 311 - } 312 - 313 - /// Lexicon cache methods 314 - pub async fn cache_lexicons(&mut self, slice_uri: &str, lexicons: &Vec<serde_json::Value>) -> Result<()> { 315 - let key = format!("lexicons:{}", slice_uri); 316 - let lexicons_ttl = 7200; // 2 hours for lexicons 317 - self.cache.set(&key, lexicons, Some(lexicons_ttl)).await 318 - } 319 - 320 - pub async fn get_lexicons(&mut self, slice_uri: &str) -> Result<Option<Vec<serde_json::Value>>> { 321 - let key = format!("lexicons:{}", slice_uri); 322 - self.cache.get::<Vec<serde_json::Value>>(&key).await 323 - } 324 - 325 - /// Domain cache methods 326 - pub async fn cache_slice_domain(&mut self, slice_uri: &str, domain: &str) -> Result<()> { 327 - let key = format!("domain:{}", slice_uri); 328 - let domain_ttl = 14400; // 4 hours for domains 329 - self.cache.set(&key, &domain.to_string(), Some(domain_ttl)).await 330 - } 331 - 332 - pub async fn get_slice_domain(&mut self, slice_uri: &str) -> Result<Option<String>> { 333 - let key = format!("domain:{}", slice_uri); 334 - self.cache.get::<String>(&key).await 335 - } 336 - 337 - /// Collections cache methods 338 - pub async fn cache_slice_collections(&mut self, slice_uri: &str, collections: &HashSet<String>) -> Result<()> { 339 - let key = format!("collections:{}", slice_uri); 340 - let collections_ttl = 7200; // 2 hours for collections 341 - self.cache.set(&key, collections, Some(collections_ttl)).await 342 - } 343 - 344 - pub async fn get_slice_collections(&mut self, slice_uri: &str) -> Result<Option<HashSet<String>>> { 345 - let key = format!("collections:{}", slice_uri); 346 - self.cache.get::<HashSet<String>>(&key).await 347 - } 348 - 349 - /// Utility methods 350 - pub async fn ping(&mut self) -> Result<bool> { 351 - self.cache.ping().await 352 - } 353 - 354 - pub async fn get_info(&mut self) -> Result<String> { 355 - self.cache.get_info().await 356 - } 357 - 358 - /// Auth cache methods (5 minute TTL) 359 - pub async fn get_cached_oauth_userinfo(&mut self, token: &str) -> Result<Option<serde_json::Value>> { 360 - let key = format!("oauth_userinfo:{}", token); 361 - self.cache.get(&key).await 362 - } 363 - 364 - pub async fn cache_oauth_userinfo(&mut self, token: &str, userinfo: &serde_json::Value, ttl_seconds: u64) -> Result<()> { 365 - let key = format!("oauth_userinfo:{}", token); 366 - self.cache.set(&key, userinfo, Some(ttl_seconds)).await 367 - } 368 - 369 - pub async fn get_cached_atproto_session(&mut self, token: &str) -> Result<Option<serde_json::Value>> { 370 - let key = format!("atproto_session:{}", token); 371 - self.cache.get(&key).await 372 - } 373 - 374 - pub async fn cache_atproto_session(&mut self, token: &str, session: &serde_json::Value, ttl_seconds: u64) -> Result<()> { 375 - let key = format!("atproto_session:{}", token); 376 - self.cache.set(&key, session, Some(ttl_seconds)).await 377 - } 378 - 379 - /// DID resolution cache methods (24 hour TTL - DIDs change infrequently) 380 - pub async fn get_cached_did_resolution(&mut self, did: &str) -> Result<Option<serde_json::Value>> { 381 - let key = format!("did_resolution:{}", did); 382 - self.cache.get(&key).await 383 - } 384 - 385 - pub async fn cache_did_resolution(&mut self, did: &str, actor_data: &serde_json::Value) -> Result<()> { 386 - let key = format!("did_resolution:{}", did); 387 - let ttl_seconds = 24 * 60 * 60; // 24 hours 388 - self.cache.set(&key, actor_data, Some(ttl_seconds)).await 389 - } 390 - 391 - pub async fn invalidate_did_resolution(&mut self, did: &str) -> Result<()> { 392 - let key = format!("did_resolution:{}", did); 393 - self.cache.delete(&key).await 394 - } 395 - 396 - /// Generic get/set for custom caching needs 397 - pub async fn get<T>(&mut self, key: &str) -> Result<Option<T>> 398 - where 399 - T: for<'de> Deserialize<'de> + Send, 400 - { 401 - self.cache.get(key).await 402 - } 403 - 404 - pub async fn set<T>(&mut self, key: &str, value: &T, ttl_seconds: Option<u64>) -> Result<()> 405 - where 406 - T: Serialize + Send + Sync, 407 - { 408 - self.cache.set(key, value, ttl_seconds).await 409 - } 410 - } 411 - 412 - /// Cache backend configuration 413 - #[derive(Debug, Clone)] 414 - pub enum CacheBackend { 415 - InMemory { ttl_seconds: Option<u64> }, 416 - Redis { url: String, ttl_seconds: Option<u64> }, 417 - } 418 - 419 - /// Cache factory for creating cache instances 420 - pub struct CacheFactory; 421 - 422 - impl CacheFactory { 423 - /// Create a cache instance based on configuration 424 - pub async fn create_cache(backend: CacheBackend) -> Result<CacheBackendImpl> { 425 - match backend { 426 - CacheBackend::InMemory { ttl_seconds } => { 427 - let ttl_display = ttl_seconds.map(|t| format!("{}s", t)).unwrap_or_else(|| "default".to_string()); 428 - info!("Creating in-memory cache with TTL: {}", ttl_display); 429 - Ok(CacheBackendImpl::InMemory(InMemoryCache::new(ttl_seconds))) 430 - } 431 - CacheBackend::Redis { url, ttl_seconds } => { 432 - info!("Attempting to create Redis cache at: {}", url); 433 - match crate::redis_cache::RedisCache::new(&url, ttl_seconds).await { 434 - Ok(redis_cache) => { 435 - info!("✓ Created Redis cache successfully"); 436 - Ok(CacheBackendImpl::Redis(redis_cache)) 437 - } 438 - Err(e) => { 439 - warn!( 440 - error = ?e, 441 - "Failed to create Redis cache, falling back to in-memory" 442 - ); 443 - Ok(CacheBackendImpl::InMemory(InMemoryCache::new(ttl_seconds))) 444 - } 445 - } 446 - } 447 - } 448 - } 449 - 450 - /// Create a SliceCache with the specified backend 451 - pub async fn create_slice_cache(backend: CacheBackend) -> Result<SliceCache> { 452 - let cache = Self::create_cache(backend).await?; 453 - Ok(SliceCache::new(cache)) 454 - } 455 - }

+184

api/src/cache/in_memory.rs

··· 1 + //! In-memory cache implementation with TTL support. 2 + //! 3 + //! Provides a thread-safe, in-memory cache with automatic expiration. 4 + //! Used as a fallback when Redis is unavailable or for local development. 5 + 6 + use super::Cache; 7 + use anyhow::Result; 8 + use async_trait::async_trait; 9 + use serde::{Deserialize, Serialize}; 10 + use std::collections::HashMap; 11 + use std::sync::Arc; 12 + use std::time::{Duration, Instant}; 13 + use tokio::sync::RwLock; 14 + use tracing::{debug, warn}; 15 + 16 + /// Cache entry: (serialized_value, optional_expiry) 17 + type CacheEntry = (String, Option<Instant>); 18 + 19 + /// In-memory cache implementation with TTL support. 20 + /// 21 + /// Uses Arc<RwLock<HashMap>> for thread-safe concurrent access. 22 + /// Expired entries are checked on read but not automatically cleaned up. 23 + pub struct InMemoryCache { 24 + data: Arc<RwLock<HashMap<String, CacheEntry>>>, 25 + default_ttl_seconds: u64, 26 + } 27 + 28 + impl InMemoryCache { 29 + /// Create a new in-memory cache with optional default TTL. 30 + /// 31 + /// # Arguments 32 + /// * `default_ttl_seconds` - Default expiration time in seconds (default: 3600) 33 + pub fn new(default_ttl_seconds: Option<u64>) -> Self { 34 + Self { 35 + data: Arc::new(RwLock::new(HashMap::new())), 36 + default_ttl_seconds: default_ttl_seconds.unwrap_or(3600), 37 + } 38 + } 39 + } 40 + 41 + #[async_trait] 42 + impl Cache for InMemoryCache { 43 + async fn get<T>(&mut self, key: &str) -> Result<Option<T>> 44 + where 45 + T: for<'de> Deserialize<'de> + Send, 46 + { 47 + let data = self.data.read().await; 48 + 49 + if let Some((serialized, expiry)) = data.get(key) { 50 + if let Some(exp) = expiry 51 + && *exp <= Instant::now() 52 + { 53 + debug!(cache_key = %key, "Cache entry expired"); 54 + return Ok(None); 55 + } 56 + 57 + match serde_json::from_str::<T>(serialized) { 58 + Ok(value) => Ok(Some(value)), 59 + Err(e) => { 60 + warn!( 61 + error = ?e, 62 + cache_key = %key, 63 + "Failed to deserialize cached value" 64 + ); 65 + Ok(None) 66 + } 67 + } 68 + } else { 69 + Ok(None) 70 + } 71 + } 72 + 73 + async fn set<T>(&mut self, key: &str, value: &T, ttl_seconds: Option<u64>) -> Result<()> 74 + where 75 + T: Serialize + Send + Sync, 76 + { 77 + let ttl = ttl_seconds.unwrap_or(self.default_ttl_seconds); 78 + 79 + match serde_json::to_string(value) { 80 + Ok(serialized) => { 81 + let expiry = if ttl > 0 { 82 + Some(Instant::now() + Duration::from_secs(ttl)) 83 + } else { 84 + None 85 + }; 86 + 87 + let mut data = self.data.write().await; 88 + data.insert(key.to_string(), (serialized, expiry)); 89 + 90 + debug!( 91 + cache_key = %key, 92 + ttl_seconds = ttl, 93 + "Cached value in memory" 94 + ); 95 + Ok(()) 96 + } 97 + Err(e) => { 98 + warn!( 99 + error = ?e, 100 + cache_key = %key, 101 + "Failed to serialize value for caching" 102 + ); 103 + Ok(()) 104 + } 105 + } 106 + } 107 + 108 + async fn delete(&mut self, key: &str) -> Result<()> { 109 + let mut data = self.data.write().await; 110 + data.remove(key); 111 + debug!(cache_key = %key, "Deleted key from in-memory cache"); 112 + Ok(()) 113 + } 114 + 115 + async fn set_multiple<T>(&mut self, items: Vec<(&str, &T, Option<u64>)>) -> Result<()> 116 + where 117 + T: Serialize + Send + Sync, 118 + { 119 + if items.is_empty() { 120 + return Ok(()); 121 + } 122 + 123 + let mut data = self.data.write().await; 124 + let mut success_count = 0; 125 + 126 + for (key, value, ttl) in &items { 127 + match serde_json::to_string(value) { 128 + Ok(serialized) => { 129 + let ttl_to_use = ttl.unwrap_or(self.default_ttl_seconds); 130 + let expiry = if ttl_to_use > 0 { 131 + Some(Instant::now() + Duration::from_secs(ttl_to_use)) 132 + } else { 133 + None 134 + }; 135 + 136 + data.insert(key.to_string(), (serialized, expiry)); 137 + success_count += 1; 138 + } 139 + Err(e) => { 140 + warn!( 141 + error = ?e, 142 + cache_key = %key, 143 + "Failed to serialize value for bulk caching" 144 + ); 145 + } 146 + } 147 + } 148 + 149 + debug!( 150 + items_count = success_count, 151 + total_items = items.len(), 152 + "Successfully bulk cached items in memory" 153 + ); 154 + Ok(()) 155 + } 156 + 157 + async fn ping(&mut self) -> Result<bool> { 158 + Ok(true) 159 + } 160 + 161 + async fn get_info(&mut self) -> Result<String> { 162 + let data = self.data.read().await; 163 + let now = Instant::now(); 164 + 165 + let mut total_entries = 0; 166 + let mut expired_entries = 0; 167 + 168 + for (_, expiry) in data.values() { 169 + total_entries += 1; 170 + if let Some(exp) = expiry 171 + && *exp <= now 172 + { 173 + expired_entries += 1; 174 + } 175 + } 176 + 177 + Ok(format!( 178 + "InMemoryCache: {} total entries, {} expired, {} active", 179 + total_entries, 180 + expired_entries, 181 + total_entries - expired_entries 182 + )) 183 + } 184 + }

+335

api/src/cache/mod.rs

··· 1 + //! Cache module providing flexible caching with Redis and in-memory backends. 2 + //! 3 + //! This module provides a unified caching interface with automatic fallback 4 + //! from Redis to in-memory caching when Redis is unavailable. 5 + 6 + mod in_memory; 7 + mod redis; 8 + 9 + use anyhow::Result; 10 + use async_trait::async_trait; 11 + use serde::{Deserialize, Serialize}; 12 + use std::collections::HashSet; 13 + use tracing::{info, warn}; 14 + 15 + pub use in_memory::InMemoryCache; 16 + pub use redis::RedisCache; 17 + 18 + /// Generic cache trait implemented by all cache backends. 19 + /// 20 + /// Provides async methods for get, set, delete, and bulk operations. 21 + #[async_trait] 22 + pub trait Cache: Send + Sync { 23 + /// Get a value from cache, returning None if not found or expired. 24 + async fn get<T>(&mut self, key: &str) -> Result<Option<T>> 25 + where 26 + T: for<'de> Deserialize<'de> + Send; 27 + 28 + /// Set a value in cache with optional TTL in seconds. 29 + async fn set<T>(&mut self, key: &str, value: &T, ttl_seconds: Option<u64>) -> Result<()> 30 + where 31 + T: Serialize + Send + Sync; 32 + 33 + /// Delete a key from cache. 34 + async fn delete(&mut self, key: &str) -> Result<()>; 35 + 36 + /// Set multiple key-value pairs efficiently (uses pipelining for Redis). 37 + async fn set_multiple<T>(&mut self, items: Vec<(&str, &T, Option<u64>)>) -> Result<()> 38 + where 39 + T: Serialize + Send + Sync; 40 + 41 + /// Test cache connection/health. 42 + async fn ping(&mut self) -> Result<bool>; 43 + 44 + /// Get cache info/statistics for monitoring. 45 + async fn get_info(&mut self) -> Result<String>; 46 + } 47 + 48 + /// Cache backend implementation enum to avoid trait object overhead. 49 + pub enum CacheBackendImpl { 50 + InMemory(InMemoryCache), 51 + Redis(RedisCache), 52 + } 53 + 54 + impl CacheBackendImpl { 55 + pub async fn get<T>(&mut self, key: &str) -> Result<Option<T>> 56 + where 57 + T: for<'de> Deserialize<'de> + Send, 58 + { 59 + match self { 60 + CacheBackendImpl::InMemory(cache) => cache.get(key).await, 61 + CacheBackendImpl::Redis(cache) => cache.get(key).await, 62 + } 63 + } 64 + 65 + pub async fn set<T>(&mut self, key: &str, value: &T, ttl_seconds: Option<u64>) -> Result<()> 66 + where 67 + T: Serialize + Send + Sync, 68 + { 69 + match self { 70 + CacheBackendImpl::InMemory(cache) => cache.set(key, value, ttl_seconds).await, 71 + CacheBackendImpl::Redis(cache) => cache.set(key, value, ttl_seconds).await, 72 + } 73 + } 74 + 75 + pub async fn delete(&mut self, key: &str) -> Result<()> { 76 + match self { 77 + CacheBackendImpl::InMemory(cache) => cache.delete(key).await, 78 + CacheBackendImpl::Redis(cache) => cache.delete(key).await, 79 + } 80 + } 81 + 82 + pub async fn set_multiple<T>(&mut self, items: Vec<(&str, &T, Option<u64>)>) -> Result<()> 83 + where 84 + T: Serialize + Send + Sync, 85 + { 86 + match self { 87 + CacheBackendImpl::InMemory(cache) => cache.set_multiple(items).await, 88 + CacheBackendImpl::Redis(cache) => cache.set_multiple(items).await, 89 + } 90 + } 91 + 92 + pub async fn ping(&mut self) -> Result<bool> { 93 + match self { 94 + CacheBackendImpl::InMemory(cache) => cache.ping().await, 95 + CacheBackendImpl::Redis(cache) => cache.ping().await, 96 + } 97 + } 98 + 99 + pub async fn get_info(&mut self) -> Result<String> { 100 + match self { 101 + CacheBackendImpl::InMemory(cache) => cache.get_info().await, 102 + CacheBackendImpl::Redis(cache) => cache.get_info().await, 103 + } 104 + } 105 + } 106 + 107 + /// Domain-specific cache wrapper with convenience methods for Slices operations. 108 + /// 109 + /// Provides typed methods for actors, lexicons, domains, collections, auth, and DID resolution. 110 + pub struct SliceCache { 111 + cache: CacheBackendImpl, 112 + } 113 + 114 + impl SliceCache { 115 + pub fn new(cache: CacheBackendImpl) -> Self { 116 + Self { cache } 117 + } 118 + 119 + /// Actor cache methods (permanent cache - no TTL) 120 + pub async fn is_actor(&mut self, did: &str, slice_uri: &str) -> Result<Option<bool>> { 121 + let key = format!("actor:{}:{}", did, slice_uri); 122 + self.cache.get::<bool>(&key).await 123 + } 124 + 125 + pub async fn cache_actor_exists(&mut self, did: &str, slice_uri: &str) -> Result<()> { 126 + let key = format!("actor:{}:{}", did, slice_uri); 127 + self.cache.set(&key, &true, None).await 128 + } 129 + 130 + pub async fn remove_actor(&mut self, did: &str, slice_uri: &str) -> Result<()> { 131 + let key = format!("actor:{}:{}", did, slice_uri); 132 + self.cache.delete(&key).await 133 + } 134 + 135 + pub async fn preload_actors(&mut self, actors: Vec<(String, String)>) -> Result<()> { 136 + if actors.is_empty() { 137 + return Ok(()); 138 + } 139 + 140 + let items: Vec<(String, bool, Option<u64>)> = actors 141 + .into_iter() 142 + .map(|(did, slice_uri)| (format!("actor:{}:{}", did, slice_uri), true, None)) 143 + .collect(); 144 + 145 + let items_ref: Vec<(&str, &bool, Option<u64>)> = items 146 + .iter() 147 + .map(|(key, value, ttl)| (key.as_str(), value, *ttl)) 148 + .collect(); 149 + 150 + self.cache.set_multiple(items_ref).await 151 + } 152 + 153 + /// Lexicon cache methods (2 hour TTL) 154 + pub async fn cache_lexicons( 155 + &mut self, 156 + slice_uri: &str, 157 + lexicons: &Vec<serde_json::Value>, 158 + ) -> Result<()> { 159 + let key = format!("lexicons:{}", slice_uri); 160 + self.cache.set(&key, lexicons, Some(7200)).await 161 + } 162 + 163 + pub async fn get_lexicons( 164 + &mut self, 165 + slice_uri: &str, 166 + ) -> Result<Option<Vec<serde_json::Value>>> { 167 + let key = format!("lexicons:{}", slice_uri); 168 + self.cache.get::<Vec<serde_json::Value>>(&key).await 169 + } 170 + 171 + /// Domain cache methods (4 hour TTL) 172 + pub async fn cache_slice_domain(&mut self, slice_uri: &str, domain: &str) -> Result<()> { 173 + let key = format!("domain:{}", slice_uri); 174 + self.cache.set(&key, &domain.to_string(), Some(14400)).await 175 + } 176 + 177 + pub async fn get_slice_domain(&mut self, slice_uri: &str) -> Result<Option<String>> { 178 + let key = format!("domain:{}", slice_uri); 179 + self.cache.get::<String>(&key).await 180 + } 181 + 182 + /// Collections cache methods (2 hour TTL) 183 + pub async fn cache_slice_collections( 184 + &mut self, 185 + slice_uri: &str, 186 + collections: &HashSet<String>, 187 + ) -> Result<()> { 188 + let key = format!("collections:{}", slice_uri); 189 + self.cache.set(&key, collections, Some(7200)).await 190 + } 191 + 192 + pub async fn get_slice_collections( 193 + &mut self, 194 + slice_uri: &str, 195 + ) -> Result<Option<HashSet<String>>> { 196 + let key = format!("collections:{}", slice_uri); 197 + self.cache.get::<HashSet<String>>(&key).await 198 + } 199 + 200 + /// Auth cache methods (5 minute TTL) 201 + pub async fn get_cached_oauth_userinfo( 202 + &mut self, 203 + token: &str, 204 + ) -> Result<Option<serde_json::Value>> { 205 + let key = format!("oauth_userinfo:{}", token); 206 + self.cache.get(&key).await 207 + } 208 + 209 + pub async fn cache_oauth_userinfo( 210 + &mut self, 211 + token: &str, 212 + userinfo: &serde_json::Value, 213 + ttl_seconds: u64, 214 + ) -> Result<()> { 215 + let key = format!("oauth_userinfo:{}", token); 216 + self.cache.set(&key, userinfo, Some(ttl_seconds)).await 217 + } 218 + 219 + pub async fn get_cached_atproto_session( 220 + &mut self, 221 + token: &str, 222 + ) -> Result<Option<serde_json::Value>> { 223 + let key = format!("atproto_session:{}", token); 224 + self.cache.get(&key).await 225 + } 226 + 227 + pub async fn cache_atproto_session( 228 + &mut self, 229 + token: &str, 230 + session: &serde_json::Value, 231 + ttl_seconds: u64, 232 + ) -> Result<()> { 233 + let key = format!("atproto_session:{}", token); 234 + self.cache.set(&key, session, Some(ttl_seconds)).await 235 + } 236 + 237 + /// DID resolution cache methods (24 hour TTL) 238 + pub async fn get_cached_did_resolution( 239 + &mut self, 240 + did: &str, 241 + ) -> Result<Option<serde_json::Value>> { 242 + let key = format!("did_resolution:{}", did); 243 + self.cache.get(&key).await 244 + } 245 + 246 + pub async fn cache_did_resolution( 247 + &mut self, 248 + did: &str, 249 + actor_data: &serde_json::Value, 250 + ) -> Result<()> { 251 + let key = format!("did_resolution:{}", did); 252 + self.cache.set(&key, actor_data, Some(86400)).await 253 + } 254 + 255 + pub async fn invalidate_did_resolution(&mut self, did: &str) -> Result<()> { 256 + let key = format!("did_resolution:{}", did); 257 + self.cache.delete(&key).await 258 + } 259 + 260 + /// Generic get/set for custom caching needs 261 + pub async fn get<T>(&mut self, key: &str) -> Result<Option<T>> 262 + where 263 + T: for<'de> Deserialize<'de> + Send, 264 + { 265 + self.cache.get(key).await 266 + } 267 + 268 + pub async fn set<T>(&mut self, key: &str, value: &T, ttl_seconds: Option<u64>) -> Result<()> 269 + where 270 + T: Serialize + Send + Sync, 271 + { 272 + self.cache.set(key, value, ttl_seconds).await 273 + } 274 + 275 + /// Utility methods 276 + pub async fn ping(&mut self) -> Result<bool> { 277 + self.cache.ping().await 278 + } 279 + 280 + pub async fn get_info(&mut self) -> Result<String> { 281 + self.cache.get_info().await 282 + } 283 + } 284 + 285 + /// Cache backend configuration enum. 286 + #[derive(Debug, Clone)] 287 + pub enum CacheBackend { 288 + InMemory { 289 + ttl_seconds: Option<u64>, 290 + }, 291 + Redis { 292 + url: String, 293 + ttl_seconds: Option<u64>, 294 + }, 295 + } 296 + 297 + /// Factory for creating cache instances with automatic Redis fallback. 298 + pub struct CacheFactory; 299 + 300 + impl CacheFactory { 301 + /// Create a cache backend, falling back to in-memory if Redis fails. 302 + pub async fn create_cache(backend: CacheBackend) -> Result<CacheBackendImpl> { 303 + match backend { 304 + CacheBackend::InMemory { ttl_seconds } => { 305 + let ttl_display = ttl_seconds 306 + .map(|t| format!("{}s", t)) 307 + .unwrap_or_else(|| "default".to_string()); 308 + info!("Creating in-memory cache with TTL: {}", ttl_display); 309 + Ok(CacheBackendImpl::InMemory(InMemoryCache::new(ttl_seconds))) 310 + } 311 + CacheBackend::Redis { url, ttl_seconds } => { 312 + info!("Attempting to create Redis cache at: {}", url); 313 + match RedisCache::new(&url, ttl_seconds).await { 314 + Ok(redis_cache) => { 315 + info!("Created Redis cache successfully"); 316 + Ok(CacheBackendImpl::Redis(redis_cache)) 317 + } 318 + Err(e) => { 319 + warn!( 320 + error = ?e, 321 + "Failed to create Redis cache, falling back to in-memory" 322 + ); 323 + Ok(CacheBackendImpl::InMemory(InMemoryCache::new(ttl_seconds))) 324 + } 325 + } 326 + } 327 + } 328 + } 329 + 330 + /// Create a SliceCache with the specified backend. 331 + pub async fn create_slice_cache(backend: CacheBackend) -> Result<SliceCache> { 332 + let cache = Self::create_cache(backend).await?; 333 + Ok(SliceCache::new(cache)) 334 + } 335 + }

-1317

api/src/database.rs

··· 1 - use base64::{Engine as _, engine::general_purpose}; 2 - use sqlx::PgPool; 3 - 4 - use crate::errors::DatabaseError; 5 - use crate::models::{ 6 - Actor, CollectionStats, IndexedRecord, OAuthClient, Record, SortField, WhereClause, 7 - WhereCondition, 8 - }; 9 - use std::collections::HashMap; 10 - 11 - // Helper function to build ORDER BY clause from sortBy array 12 - fn build_order_by_clause(sort_by: Option<&Vec<SortField>>) -> String { 13 - match sort_by { 14 - Some(sort_fields) if !sort_fields.is_empty() => { 15 - let mut order_clauses = Vec::new(); 16 - for sort_field in sort_fields { 17 - let field = &sort_field.field; 18 - let direction = match sort_field.direction.to_lowercase().as_str() { 19 - "desc" => "DESC", 20 - _ => "ASC", // Default to ASC 21 - }; 22 - 23 - // Validate field name to prevent SQL injection 24 - if field 25 - .chars() 26 - .all(|c| c.is_alphanumeric() || c == '_' || c == '.') 27 - { 28 - if field == "indexed_at" 29 - || field == "uri" 30 - || field == "cid" 31 - || field == "did" 32 - || field == "collection" 33 - { 34 - order_clauses.push(format!("{field} {direction}")); 35 - } else { 36 - // For JSON fields, handle nested paths and NULLs properly 37 - if field.contains('.') { 38 - let parts: Vec<&str> = field.split('.').collect(); 39 - let mut path = String::from("json"); 40 - for (i, part) in parts.iter().enumerate() { 41 - if i == parts.len() - 1 { 42 - path.push_str(&format!("->>'{}'", part)); 43 - } else { 44 - path.push_str(&format!("->'{}'", part)); 45 - } 46 - } 47 - order_clauses.push(format!("{path} {direction} NULLS LAST")); 48 - } else { 49 - order_clauses.push(format!("json->>'{field}' {direction} NULLS LAST")); 50 - } 51 - } 52 - } 53 - } 54 - if !order_clauses.is_empty() { 55 - // Always add indexed_at as tie-breaker if not already included 56 - let has_indexed_at = order_clauses 57 - .iter() 58 - .any(|clause| clause.contains("indexed_at")); 59 - if !has_indexed_at { 60 - order_clauses.push("indexed_at DESC".to_string()); 61 - } 62 - order_clauses.join(", ") 63 - } else { 64 - "indexed_at DESC".to_string() 65 - } 66 - } 67 - _ => "indexed_at DESC".to_string(), 68 - } 69 - } 70 - 71 - fn generate_cursor( 72 - sort_value: &str, 73 - indexed_at: chrono::DateTime<chrono::Utc>, 74 - cid: &str, 75 - ) -> String { 76 - let cursor_content = format!("{}::{}::{}", sort_value, indexed_at.to_rfc3339(), cid); 77 - general_purpose::URL_SAFE_NO_PAD.encode(cursor_content) 78 - } 79 - 80 - // Extract the primary sort field from sortBy array for cursor generation 81 - fn get_primary_sort_field(sort_by: Option<&Vec<SortField>>) -> String { 82 - match sort_by { 83 - Some(sort_fields) if !sort_fields.is_empty() => sort_fields[0].field.clone(), 84 - _ => "indexed_at".to_string(), 85 - } 86 - } 87 - 88 - // Generate cursor from record and sortBy array 89 - fn generate_cursor_from_record(record: &Record, sort_by: Option<&Vec<SortField>>) -> String { 90 - let primary_sort_field = get_primary_sort_field(sort_by); 91 - 92 - // Extract sort value from the record based on the sort field 93 - let sort_value = match primary_sort_field.as_str() { 94 - "indexed_at" => record.indexed_at.to_rfc3339(), 95 - field => { 96 - // Extract field value from JSON 97 - record 98 - .json 99 - .get(field) 100 - .and_then(|v| match v { 101 - serde_json::Value::String(s) if !s.is_empty() => Some(s.clone()), 102 - serde_json::Value::Number(n) => Some(n.to_string()), 103 - serde_json::Value::Bool(b) => Some(b.to_string()), 104 - serde_json::Value::Null => None, // Explicitly handle null 105 - _ => None, 106 - }) 107 - .unwrap_or_else(|| "NULL".to_string()) // Use "NULL" string for null values to match SQL NULLS LAST behavior 108 - } 109 - }; 110 - 111 - generate_cursor(&sort_value, record.indexed_at, &record.cid) 112 - } 113 - 114 - // Helper function to build WHERE conditions from WhereClause 115 - fn build_where_conditions( 116 - where_clause: Option<&WhereClause>, 117 - param_count: &mut usize, 118 - ) -> (Vec<String>, Vec<String>) { 119 - let mut where_clauses = Vec::new(); 120 - let mut or_clauses = Vec::new(); 121 - 122 - if let Some(clause) = where_clause { 123 - // Process regular AND conditions 124 - for (field, condition) in &clause.conditions { 125 - let field_clause = build_single_condition(field, condition, param_count); 126 - where_clauses.push(field_clause); 127 - } 128 - 129 - // Process OR conditions 130 - if let Some(or_conditions) = &clause.or_conditions { 131 - for (field, condition) in or_conditions { 132 - let field_clause = build_single_condition(field, condition, param_count); 133 - or_clauses.push(field_clause); 134 - } 135 - } 136 - } 137 - 138 - (where_clauses, or_clauses) 139 - } 140 - 141 - // Helper function to bind parameters from WhereClause 142 - fn bind_where_parameters<'q>( 143 - mut query_builder: sqlx::query::QueryAs< 144 - 'q, 145 - sqlx::Postgres, 146 - Record, 147 - sqlx::postgres::PgArguments, 148 - >, 149 - where_clause: Option<&'q WhereClause>, 150 - ) -> sqlx::query::QueryAs<'q, sqlx::Postgres, Record, sqlx::postgres::PgArguments> { 151 - if let Some(clause) = where_clause { 152 - // Bind AND condition parameters 153 - for condition in clause.conditions.values() { 154 - query_builder = bind_single_condition(query_builder, condition); 155 - } 156 - 157 - // Bind OR condition parameters 158 - if let Some(or_conditions) = &clause.or_conditions { 159 - for condition in or_conditions.values() { 160 - query_builder = bind_single_condition(query_builder, condition); 161 - } 162 - } 163 - } 164 - query_builder 165 - } 166 - 167 - // Helper function to bind parameters for a single condition 168 - fn bind_single_condition<'q>( 169 - mut query_builder: sqlx::query::QueryAs< 170 - 'q, 171 - sqlx::Postgres, 172 - Record, 173 - sqlx::postgres::PgArguments, 174 - >, 175 - condition: &'q WhereCondition, 176 - ) -> sqlx::query::QueryAs<'q, sqlx::Postgres, Record, sqlx::postgres::PgArguments> { 177 - if let Some(eq_value) = &condition.eq { 178 - if let Some(str_val) = eq_value.as_str() { 179 - query_builder = query_builder.bind(str_val); 180 - } else { 181 - query_builder = query_builder.bind(eq_value); 182 - } 183 - } 184 - 185 - if let Some(in_values) = &condition.in_values { 186 - let str_values: Vec<String> = in_values 187 - .iter() 188 - .filter_map(|v| v.as_str().map(|s| s.to_string())) 189 - .collect(); 190 - query_builder = query_builder.bind(str_values); 191 - } 192 - 193 - if let Some(contains_value) = &condition.contains { 194 - query_builder = query_builder.bind(contains_value); 195 - } 196 - 197 - query_builder 198 - } 199 - 200 - // Helper function to build a single condition clause 201 - fn build_single_condition( 202 - field: &str, 203 - condition: &WhereCondition, 204 - param_count: &mut usize, 205 - ) -> String { 206 - if let Some(_eq_value) = &condition.eq { 207 - let clause = match field { 208 - "did" | "collection" | "uri" | "cid" => { 209 - format!("{} = ${}", field, param_count) 210 - } 211 - _ => { 212 - let json_path = if field.contains('.') { 213 - let parts: Vec<&str> = field.split('.').collect(); 214 - let mut path = String::from("json"); 215 - for (i, part) in parts.iter().enumerate() { 216 - if i == parts.len() - 1 { 217 - path.push_str(&format!("->>'{}'", part)); 218 - } else { 219 - path.push_str(&format!("->'{}'", part)); 220 - } 221 - } 222 - path 223 - } else { 224 - format!("json->>'{}'", field) 225 - }; 226 - format!("{} = ${}", json_path, param_count) 227 - } 228 - }; 229 - *param_count += 1; 230 - clause 231 - } else if let Some(_in_values) = &condition.in_values { 232 - let clause = match field { 233 - "did" | "collection" | "uri" | "cid" => { 234 - format!("{} = ANY(${})", field, param_count) 235 - } 236 - _ => { 237 - let json_path = if field.contains('.') { 238 - let parts: Vec<&str> = field.split('.').collect(); 239 - let mut path = String::from("json"); 240 - for (i, part) in parts.iter().enumerate() { 241 - if i == parts.len() - 1 { 242 - path.push_str(&format!("->>'{}'", part)); 243 - } else { 244 - path.push_str(&format!("->'{}'", part)); 245 - } 246 - } 247 - path 248 - } else { 249 - format!("json->>'{}'", field) 250 - }; 251 - format!("{} = ANY(${})", json_path, param_count) 252 - } 253 - }; 254 - *param_count += 1; 255 - clause 256 - } else if let Some(_contains_value) = &condition.contains { 257 - let clause = if field == "json" { 258 - format!("json::text ILIKE '%' || ${} || '%'", param_count) 259 - } else { 260 - let json_path = if field.contains('.') { 261 - let parts: Vec<&str> = field.split('.').collect(); 262 - let mut path = String::from("json"); 263 - for (i, part) in parts.iter().enumerate() { 264 - if i == parts.len() - 1 { 265 - path.push_str(&format!("->>'{}'", part)); 266 - } else { 267 - path.push_str(&format!("->'{}'", part)); 268 - } 269 - } 270 - path 271 - } else { 272 - format!("json->>'{}'", field) 273 - }; 274 - format!("{} ILIKE '%' || ${} || '%'", json_path, param_count) 275 - }; 276 - *param_count += 1; 277 - clause 278 - } else { 279 - String::new() // Return empty if no conditions match 280 - } 281 - } 282 - 283 - #[derive(Clone)] 284 - pub struct Database { 285 - pool: PgPool, 286 - } 287 - 288 - impl Database { 289 - pub fn new(pool: PgPool) -> Self { 290 - Self { pool } 291 - } 292 - 293 - pub fn from_pool(pool: PgPool) -> Self { 294 - Self::new(pool) 295 - } 296 - 297 - #[allow(dead_code)] 298 - pub async fn insert_record(&self, record: &Record) -> Result<(), DatabaseError> { 299 - sqlx::query!( 300 - r#"INSERT INTO "record" ("uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri") 301 - VALUES ($1, $2, $3, $4, $5, $6, $7) 302 - ON CONFLICT ON CONSTRAINT record_pkey 303 - DO UPDATE SET 304 - "cid" = EXCLUDED."cid", 305 - "json" = EXCLUDED."json", 306 - "indexed_at" = EXCLUDED."indexed_at""#, 307 - record.uri, 308 - record.cid, 309 - record.did, 310 - record.collection, 311 - record.json, 312 - record.indexed_at, 313 - record.slice_uri 314 - ) 315 - .execute(&self.pool) 316 - .await?; 317 - 318 - Ok(()) 319 - } 320 - 321 - pub async fn batch_insert_records(&self, records: &[Record]) -> Result<(), DatabaseError> { 322 - if records.is_empty() { 323 - return Ok(()); 324 - } 325 - 326 - // PostgreSQL has a limit on the number of parameters (65536 by default) 327 - // With 7 fields per record, we can safely batch up to ~9000 records at once 328 - const BATCH_SIZE: usize = 8000; 329 - 330 - for chunk in records.chunks(BATCH_SIZE) { 331 - self.batch_insert_records_chunk(chunk).await?; 332 - } 333 - 334 - Ok(()) 335 - } 336 - 337 - async fn batch_insert_records_chunk(&self, records: &[Record]) -> Result<(), DatabaseError> { 338 - let mut tx = self.pool.begin().await?; 339 - 340 - // Build bulk INSERT with multiple VALUES 341 - let mut query = String::from( 342 - r#"INSERT INTO "record" ("uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri") VALUES "#, 343 - ); 344 - 345 - // Add placeholders for each record 346 - for (i, _) in records.iter().enumerate() { 347 - if i > 0 { 348 - query.push_str(", "); 349 - } 350 - let base = i * 7 + 1; // 7 fields per record 351 - query.push_str(&format!( 352 - "(${}, ${}, ${}, ${}, ${}, ${}, ${})", 353 - base, 354 - base + 1, 355 - base + 2, 356 - base + 3, 357 - base + 4, 358 - base + 5, 359 - base + 6 360 - )); 361 - } 362 - 363 - query.push_str( 364 - r#" 365 - ON CONFLICT ON CONSTRAINT record_pkey 366 - DO UPDATE SET 367 - "cid" = EXCLUDED."cid", 368 - "json" = EXCLUDED."json", 369 - "indexed_at" = EXCLUDED."indexed_at" 370 - "#, 371 - ); 372 - 373 - // Bind all parameters 374 - let mut sqlx_query = sqlx::query(&query); 375 - for record in records { 376 - sqlx_query = sqlx_query 377 - .bind(&record.uri) 378 - .bind(&record.cid) 379 - .bind(&record.did) 380 - .bind(&record.collection) 381 - .bind(&record.json) 382 - .bind(record.indexed_at) 383 - .bind(&record.slice_uri); 384 - } 385 - 386 - sqlx_query.execute(&mut *tx).await?; 387 - tx.commit().await?; 388 - 389 - Ok(()) 390 - } 391 - 392 - pub async fn get_existing_record_cids_for_slice( 393 - &self, 394 - did: &str, 395 - collection: &str, 396 - slice_uri: &str, 397 - ) -> Result<std::collections::HashMap<String, String>, DatabaseError> { 398 - let records = sqlx::query!( 399 - r#"SELECT "uri", "cid" 400 - FROM "record" 401 - WHERE "did" = $1 AND "collection" = $2 AND "slice_uri" = $3"#, 402 - did, 403 - collection, 404 - slice_uri 405 - ) 406 - .fetch_all(&self.pool) 407 - .await?; 408 - 409 - let mut cid_map = std::collections::HashMap::new(); 410 - for record in records { 411 - cid_map.insert(record.uri, record.cid); 412 - } 413 - Ok(cid_map) 414 - } 415 - 416 - pub async fn get_record(&self, uri: &str) -> Result<Option<IndexedRecord>, DatabaseError> { 417 - let record = sqlx::query_as::<_, Record>( 418 - r#"SELECT "uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri" 419 - FROM "record" 420 - WHERE "uri" = $1"#, 421 - ) 422 - .bind(uri) 423 - .fetch_optional(&self.pool) 424 - .await?; 425 - 426 - let indexed_record = record.map(|record| IndexedRecord { 427 - uri: record.uri, 428 - cid: record.cid, 429 - did: record.did, 430 - collection: record.collection, 431 - value: record.json, 432 - indexed_at: record.indexed_at.to_rfc3339(), 433 - }); 434 - 435 - Ok(indexed_record) 436 - } 437 - 438 - pub async fn get_lexicons_by_slice( 439 - &self, 440 - slice_uri: &str, 441 - ) -> Result<Vec<serde_json::Value>, DatabaseError> { 442 - let records = sqlx::query_as::<_, Record>( 443 - r#"SELECT "uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri" 444 - FROM "record" 445 - WHERE "collection" = 'network.slices.lexicon' 446 - AND "json"->>'slice' = $1 447 - ORDER BY "indexed_at" DESC"#, 448 - ) 449 - .bind(slice_uri) 450 - .fetch_all(&self.pool) 451 - .await?; 452 - 453 - let lexicon_definitions: Vec<serde_json::Value> = records 454 - .into_iter() 455 - .filter_map(|record| { 456 - let nsid = record.json.get("nsid")?.as_str()?; 457 - let definitions_str = record.json.get("definitions")?.as_str()?; 458 - let definitions: serde_json::Value = serde_json::from_str(definitions_str).ok()?; 459 - 460 - Some(serde_json::json!({ 461 - "lexicon": 1, 462 - "id": nsid, 463 - "defs": definitions 464 - })) 465 - }) 466 - .collect(); 467 - 468 - Ok(lexicon_definitions) 469 - } 470 - 471 - pub async fn update_record(&self, record: &Record) -> Result<(), DatabaseError> { 472 - let result = sqlx::query!( 473 - r#"UPDATE "record" 474 - SET "cid" = $1, "json" = $2, "indexed_at" = $3 475 - WHERE "uri" = $4 AND "slice_uri" = $5"#, 476 - record.cid, 477 - record.json, 478 - record.indexed_at, 479 - record.uri, 480 - record.slice_uri 481 - ) 482 - .execute(&self.pool) 483 - .await?; 484 - 485 - if result.rows_affected() == 0 { 486 - return Err(DatabaseError::RecordNotFound { 487 - uri: record.uri.clone(), 488 - }); 489 - } 490 - 491 - Ok(()) 492 - } 493 - 494 - pub async fn batch_insert_actors(&self, actors: &[Actor]) -> Result<(), DatabaseError> { 495 - if actors.is_empty() { 496 - return Ok(()); 497 - } 498 - 499 - let mut tx = self.pool.begin().await?; 500 - 501 - // Process actors in chunks to avoid hitting parameter limits 502 - const CHUNK_SIZE: usize = 1000; 503 - 504 - for chunk in actors.chunks(CHUNK_SIZE) { 505 - for actor in chunk { 506 - sqlx::query!( 507 - r#"INSERT INTO "actor" ("did", "handle", "slice_uri", "indexed_at") 508 - VALUES ($1, $2, $3, $4) 509 - ON CONFLICT ("did", "slice_uri") 510 - DO UPDATE SET 511 - "handle" = EXCLUDED."handle", 512 - "indexed_at" = EXCLUDED."indexed_at""#, 513 - actor.did, 514 - actor.handle, 515 - actor.slice_uri, 516 - actor.indexed_at 517 - ) 518 - .execute(&mut *tx) 519 - .await?; 520 - } 521 - } 522 - 523 - tx.commit().await?; 524 - Ok(()) 525 - } 526 - 527 - pub async fn get_slice_collection_stats( 528 - &self, 529 - slice_uri: &str, 530 - ) -> Result<Vec<CollectionStats>, DatabaseError> { 531 - let stats = sqlx::query!( 532 - r#" 533 - WITH slice_collections AS ( 534 - SELECT DISTINCT 535 - json->>'nsid' as collection_nsid 536 - FROM record 537 - WHERE collection = 'network.slices.lexicon' 538 - AND json->>'slice' = $1 539 - AND json->>'nsid' IS NOT NULL 540 - AND (json->>'definitions')::jsonb->'main'->>'type' = 'record' 541 - AND (json->>'excludedFromSync' IS NULL OR json->>'excludedFromSync' != 'true') 542 - ) 543 - SELECT 544 - r.collection, 545 - COUNT(*) as record_count, 546 - COUNT(DISTINCT r.did) as unique_actors 547 - FROM record r 548 - INNER JOIN slice_collections sc ON r.collection = sc.collection_nsid 549 - WHERE r.slice_uri = $1 550 - GROUP BY r.collection 551 - ORDER BY r.collection 552 - "#, 553 - slice_uri 554 - ) 555 - .fetch_all(&self.pool) 556 - .await?; 557 - 558 - Ok(stats 559 - .into_iter() 560 - .map(|row| CollectionStats { 561 - collection: row.collection, 562 - record_count: row.record_count.unwrap_or(0), 563 - unique_actors: row.unique_actors.unwrap_or(0), 564 - }) 565 - .collect()) 566 - } 567 - 568 - pub async fn get_slice_collections_list( 569 - &self, 570 - slice_uri: &str, 571 - ) -> Result<Vec<String>, DatabaseError> { 572 - let rows = sqlx::query!( 573 - r#" 574 - SELECT DISTINCT json->>'nsid' as collection_nsid 575 - FROM record 576 - WHERE collection = 'network.slices.lexicon' 577 - AND json->>'slice' = $1 578 - AND json->>'nsid' IS NOT NULL 579 - AND (json->>'definitions')::jsonb->'main'->>'type' = 'record' 580 - AND (json->>'excludedFromSync' IS NULL OR json->>'excludedFromSync' != 'true') 581 - ORDER BY json->>'nsid' 582 - "#, 583 - slice_uri 584 - ) 585 - .fetch_all(&self.pool) 586 - .await?; 587 - 588 - Ok(rows 589 - .into_iter() 590 - .filter_map(|row| row.collection_nsid) 591 - .collect()) 592 - } 593 - 594 - pub async fn get_slice_total_records(&self, slice_uri: &str) -> Result<i64, DatabaseError> { 595 - let count = sqlx::query!( 596 - r#" 597 - WITH slice_collections AS ( 598 - SELECT DISTINCT 599 - json->>'nsid' as collection_nsid 600 - FROM record 601 - WHERE collection = 'network.slices.lexicon' 602 - AND json->>'slice' = $1 603 - AND json->>'nsid' IS NOT NULL 604 - AND (json->>'definitions')::jsonb->'main'->>'type' = 'record' 605 - AND (json->>'excludedFromSync' IS NULL OR json->>'excludedFromSync' != 'true') 606 - ) 607 - SELECT COUNT(*) as count 608 - FROM record r 609 - INNER JOIN slice_collections sc ON r.collection = sc.collection_nsid 610 - WHERE r.slice_uri = $1 611 - "#, 612 - slice_uri 613 - ) 614 - .fetch_one(&self.pool) 615 - .await?; 616 - 617 - Ok(count.count.unwrap_or(0)) 618 - } 619 - 620 - pub async fn get_slice_total_actors(&self, slice_uri: &str) -> Result<i64, DatabaseError> { 621 - let count = sqlx::query!( 622 - r#" 623 - SELECT COUNT(*) as count 624 - FROM actor 625 - WHERE slice_uri = $1 626 - "#, 627 - slice_uri 628 - ) 629 - .fetch_one(&self.pool) 630 - .await?; 631 - 632 - Ok(count.count.unwrap_or(0)) 633 - } 634 - 635 - pub async fn get_slice_actors( 636 - &self, 637 - slice_uri: &str, 638 - limit: Option<i32>, 639 - cursor: Option<&str>, 640 - where_conditions: Option<&HashMap<String, WhereCondition>>, 641 - ) -> Result<(Vec<Actor>, Option<String>), DatabaseError> { 642 - let limit = limit.unwrap_or(50).min(100); // Cap at 100 643 - 644 - // Handle where conditions with specific cases 645 - let records = if let Some(conditions) = where_conditions { 646 - // Check for handle contains filter 647 - if let Some(handle_condition) = conditions.get("handle") { 648 - if let Some(contains_value) = &handle_condition.contains { 649 - let pattern = format!("%{}%", contains_value); 650 - if let Some(cursor_did) = cursor { 651 - sqlx::query_as!( 652 - Actor, 653 - r#" 654 - SELECT did, handle, slice_uri, indexed_at 655 - FROM actor 656 - WHERE slice_uri = $1 AND handle ILIKE $2 AND did > $3 657 - ORDER BY did ASC 658 - LIMIT $4 659 - "#, 660 - slice_uri, 661 - pattern, 662 - cursor_did, 663 - limit as i64 664 - ) 665 - .fetch_all(&self.pool) 666 - .await? 667 - } else { 668 - sqlx::query_as!( 669 - Actor, 670 - r#" 671 - SELECT did, handle, slice_uri, indexed_at 672 - FROM actor 673 - WHERE slice_uri = $1 AND handle ILIKE $2 674 - ORDER BY did ASC 675 - LIMIT $3 676 - "#, 677 - slice_uri, 678 - pattern, 679 - limit as i64 680 - ) 681 - .fetch_all(&self.pool) 682 - .await? 683 - } 684 - } else if let Some(eq_value) = &handle_condition.eq { 685 - let handle_str = eq_value.as_str().unwrap_or(""); 686 - if let Some(cursor_did) = cursor { 687 - sqlx::query_as!( 688 - Actor, 689 - r#" 690 - SELECT did, handle, slice_uri, indexed_at 691 - FROM actor 692 - WHERE slice_uri = $1 AND handle = $2 AND did > $3 693 - ORDER BY did ASC 694 - LIMIT $4 695 - "#, 696 - slice_uri, 697 - handle_str, 698 - cursor_did, 699 - limit as i64 700 - ) 701 - .fetch_all(&self.pool) 702 - .await? 703 - } else { 704 - sqlx::query_as!( 705 - Actor, 706 - r#" 707 - SELECT did, handle, slice_uri, indexed_at 708 - FROM actor 709 - WHERE slice_uri = $1 AND handle = $2 710 - ORDER BY did ASC 711 - LIMIT $3 712 - "#, 713 - slice_uri, 714 - handle_str, 715 - limit as i64 716 - ) 717 - .fetch_all(&self.pool) 718 - .await? 719 - } 720 - } else { 721 - // Default case with basic filtering 722 - self.query_actors_with_cursor(slice_uri, cursor, limit) 723 - .await? 724 - } 725 - } else if let Some(did_condition) = conditions.get("did") { 726 - if let Some(in_values) = &did_condition.in_values { 727 - let string_values: Vec<String> = in_values 728 - .iter() 729 - .filter_map(|v| v.as_str()) 730 - .map(|s| s.to_string()) 731 - .collect(); 732 - 733 - sqlx::query_as!( 734 - Actor, 735 - r#" 736 - SELECT did, handle, slice_uri, indexed_at 737 - FROM actor 738 - WHERE slice_uri = $1 AND did = ANY($2) 739 - ORDER BY did ASC 740 - LIMIT $3 741 - "#, 742 - slice_uri, 743 - &string_values, 744 - limit as i64 745 - ) 746 - .fetch_all(&self.pool) 747 - .await? 748 - } else if let Some(eq_value) = &did_condition.eq { 749 - let did_str = eq_value.as_str().unwrap_or(""); 750 - if let Some(cursor_did) = cursor { 751 - sqlx::query_as!( 752 - Actor, 753 - r#" 754 - SELECT did, handle, slice_uri, indexed_at 755 - FROM actor 756 - WHERE slice_uri = $1 AND did = $2 AND did > $3 757 - ORDER BY did ASC 758 - LIMIT $4 759 - "#, 760 - slice_uri, 761 - did_str, 762 - cursor_did, 763 - limit as i64 764 - ) 765 - .fetch_all(&self.pool) 766 - .await? 767 - } else { 768 - sqlx::query_as!( 769 - Actor, 770 - r#" 771 - SELECT did, handle, slice_uri, indexed_at 772 - FROM actor 773 - WHERE slice_uri = $1 AND did = $2 774 - ORDER BY did ASC 775 - LIMIT $3 776 - "#, 777 - slice_uri, 778 - did_str, 779 - limit as i64 780 - ) 781 - .fetch_all(&self.pool) 782 - .await? 783 - } 784 - } else { 785 - // Default case with basic filtering 786 - self.query_actors_with_cursor(slice_uri, cursor, limit) 787 - .await? 788 - } 789 - } else { 790 - // Default case with basic filtering 791 - self.query_actors_with_cursor(slice_uri, cursor, limit) 792 - .await? 793 - } 794 - } else { 795 - // No where conditions, just basic slice + cursor filtering 796 - self.query_actors_with_cursor(slice_uri, cursor, limit) 797 - .await? 798 - }; 799 - 800 - // Generate cursor from the last record if there are any records 801 - let cursor = if records.is_empty() { 802 - None 803 - } else { 804 - records.last().map(|actor| actor.did.clone()) 805 - }; 806 - 807 - Ok((records, cursor)) 808 - } 809 - 810 - async fn query_actors_with_cursor( 811 - &self, 812 - slice_uri: &str, 813 - cursor: Option<&str>, 814 - limit: i32, 815 - ) -> Result<Vec<Actor>, DatabaseError> { 816 - match cursor { 817 - Some(cursor_did) => sqlx::query_as!( 818 - Actor, 819 - r#" 820 - SELECT did, handle, slice_uri, indexed_at 821 - FROM actor 822 - WHERE slice_uri = $1 AND did > $2 823 - ORDER BY did ASC 824 - LIMIT $3 825 - "#, 826 - slice_uri, 827 - cursor_did, 828 - limit as i64 829 - ) 830 - .fetch_all(&self.pool) 831 - .await 832 - .map_err(DatabaseError::from), 833 - None => sqlx::query_as!( 834 - Actor, 835 - r#" 836 - SELECT did, handle, slice_uri, indexed_at 837 - FROM actor 838 - WHERE slice_uri = $1 839 - ORDER BY did ASC 840 - LIMIT $2 841 - "#, 842 - slice_uri, 843 - limit as i64 844 - ) 845 - .fetch_all(&self.pool) 846 - .await 847 - .map_err(DatabaseError::from), 848 - } 849 - } 850 - 851 - pub async fn get_slice_lexicon_count(&self, slice_uri: &str) -> Result<i64, DatabaseError> { 852 - let count = sqlx::query!( 853 - r#" 854 - SELECT COUNT(*) as count 855 - FROM record 856 - WHERE collection = 'network.slices.lexicon' 857 - AND json->>'slice' = $1 858 - AND (json->>'definitions')::jsonb->'main'->>'type' = 'record' 859 - "#, 860 - slice_uri 861 - ) 862 - .fetch_one(&self.pool) 863 - .await?; 864 - 865 - Ok(count.count.unwrap_or(0)) 866 - } 867 - 868 - pub async fn get_slice_collections_records( 869 - &self, 870 - slice_uri: &str, 871 - limit: Option<i32>, 872 - cursor: Option<&str>, 873 - sort_by: Option<&Vec<SortField>>, 874 - where_clause: Option<&WhereClause>, 875 - ) -> Result<(Vec<Record>, Option<String>), DatabaseError> { 876 - let limit = limit.unwrap_or(50).min(100); // Cap at 100 877 - let order_by = build_order_by_clause(sort_by); 878 - 879 - // Build WHERE clause dynamically 880 - let mut where_clauses = Vec::new(); 881 - let mut param_count = 1; 882 - 883 - // Always filter by slice_uri, except for network.slices.lexicon which uses json->>'slice' 884 - let is_lexicon = where_clause 885 - .as_ref() 886 - .and_then(|wc| wc.conditions.get("collection")) 887 - .and_then(|c| c.eq.as_ref()) 888 - .and_then(|v| v.as_str()) == Some("network.slices.lexicon"); 889 - 890 - if is_lexicon { 891 - where_clauses.push(format!("json->>'slice' = ${}", param_count)); 892 - } else { 893 - where_clauses.push(format!("slice_uri = ${}", param_count)); 894 - } 895 - param_count += 1; 896 - 897 - // Add cursor condition if present 898 - if cursor.is_some() { 899 - where_clauses.push(format!("indexed_at < ${}", param_count)); 900 - param_count += 1; 901 - } 902 - 903 - // Use helper function to build where conditions 904 - let (and_conditions, or_conditions) = 905 - build_where_conditions(where_clause, &mut param_count); 906 - where_clauses.extend(and_conditions); 907 - 908 - // Add OR conditions with proper parentheses if present 909 - if !or_conditions.is_empty() { 910 - let or_clause = format!("({})", or_conditions.join(" OR ")); 911 - where_clauses.push(or_clause); 912 - } 913 - 914 - // Build the final query 915 - let where_sql = where_clauses.join(" AND "); 916 - let query = format!( 917 - "SELECT uri, cid, did, collection, json, indexed_at, slice_uri 918 - FROM record 919 - WHERE {} 920 - ORDER BY {} 921 - LIMIT ${}", 922 - where_sql, order_by, param_count 923 - ); 924 - 925 - // Build query and bind parameters 926 - let mut query_builder = sqlx::query_as::<_, Record>(&query); 927 - 928 - // Bind slice_uri 929 - query_builder = query_builder.bind(slice_uri); 930 - 931 - // Bind cursor if present 932 - if let Some(cursor_time) = cursor { 933 - let cursor_dt = cursor_time 934 - .parse::<chrono::DateTime<chrono::Utc>>() 935 - .unwrap_or_else(|_| chrono::Utc::now()); 936 - query_builder = query_builder.bind(cursor_dt); 937 - } 938 - 939 - // Bind where condition values 940 - query_builder = bind_where_parameters(query_builder, where_clause); 941 - 942 - // Bind limit 943 - query_builder = query_builder.bind(limit as i64); 944 - 945 - // Execute query 946 - let records = query_builder.fetch_all(&self.pool).await?; 947 - 948 - // Generate cursor from the last record 949 - let cursor = if records.is_empty() { 950 - None 951 - } else { 952 - records 953 - .last() 954 - .map(|record| generate_cursor_from_record(record, sort_by)) 955 - }; 956 - 957 - Ok((records, cursor)) 958 - } 959 - 960 - pub async fn count_slice_collections_records( 961 - &self, 962 - slice_uri: &str, 963 - where_clause: Option<&WhereClause>, 964 - ) -> Result<i64, DatabaseError> { 965 - // Build WHERE clause dynamically 966 - let mut where_clauses = Vec::new(); 967 - let mut param_count = 1; 968 - 969 - // Always filter by slice_uri, except for network.slices.lexicon which uses json->>'slice' 970 - let is_lexicon = where_clause 971 - .as_ref() 972 - .and_then(|wc| wc.conditions.get("collection")) 973 - .and_then(|c| c.eq.as_ref()) 974 - .and_then(|v| v.as_str()) == Some("network.slices.lexicon"); 975 - 976 - if is_lexicon { 977 - where_clauses.push(format!("json->>'slice' = ${}", param_count)); 978 - } else { 979 - where_clauses.push(format!("slice_uri = ${}", param_count)); 980 - } 981 - param_count += 1; 982 - 983 - // Use helper function to build where conditions 984 - let (and_conditions, or_conditions) = 985 - build_where_conditions(where_clause, &mut param_count); 986 - where_clauses.extend(and_conditions); 987 - 988 - // Add OR conditions with proper parentheses if present 989 - if !or_conditions.is_empty() { 990 - let or_clause = format!("({})", or_conditions.join(" OR ")); 991 - where_clauses.push(or_clause); 992 - } 993 - 994 - // Build the final query 995 - let where_sql = if where_clauses.is_empty() { 996 - String::new() 997 - } else { 998 - format!(" WHERE {}", where_clauses.join(" AND ")) 999 - }; 1000 - 1001 - let query = format!("SELECT COUNT(*) as count FROM record{}", where_sql); 1002 - 1003 - // Execute query with parameters 1004 - let mut query_builder = sqlx::query_scalar::<_, i64>(&query); 1005 - query_builder = query_builder.bind(slice_uri); 1006 - 1007 - // Bind where condition values using helper 1008 - if let Some(clause) = where_clause { 1009 - // Bind AND condition parameters 1010 - for condition in clause.conditions.values() { 1011 - if let Some(eq_value) = &condition.eq { 1012 - if let Some(str_val) = eq_value.as_str() { 1013 - query_builder = query_builder.bind(str_val); 1014 - } else { 1015 - query_builder = query_builder.bind(eq_value); 1016 - } 1017 - } 1018 - if let Some(in_values) = &condition.in_values { 1019 - let str_values: Vec<String> = in_values 1020 - .iter() 1021 - .filter_map(|v| v.as_str().map(|s| s.to_string())) 1022 - .collect(); 1023 - query_builder = query_builder.bind(str_values); 1024 - } 1025 - if let Some(contains_value) = &condition.contains { 1026 - query_builder = query_builder.bind(contains_value); 1027 - } 1028 - } 1029 - 1030 - // Bind OR condition parameters 1031 - if let Some(or_conditions) = &clause.or_conditions { 1032 - for condition in or_conditions.values() { 1033 - if let Some(eq_value) = &condition.eq { 1034 - if let Some(str_val) = eq_value.as_str() { 1035 - query_builder = query_builder.bind(str_val); 1036 - } else { 1037 - query_builder = query_builder.bind(eq_value); 1038 - } 1039 - } 1040 - if let Some(in_values) = &condition.in_values { 1041 - let str_values: Vec<String> = in_values 1042 - .iter() 1043 - .filter_map(|v| v.as_str().map(|s| s.to_string())) 1044 - .collect(); 1045 - query_builder = query_builder.bind(str_values); 1046 - } 1047 - if let Some(contains_value) = &condition.contains { 1048 - query_builder = query_builder.bind(contains_value); 1049 - } 1050 - } 1051 - } 1052 - } 1053 - 1054 - let count = query_builder.fetch_one(&self.pool).await?; 1055 - Ok(count) 1056 - } 1057 - 1058 - pub async fn delete_record_by_uri( 1059 - &self, 1060 - uri: &str, 1061 - slice_uri: Option<&str>, 1062 - ) -> Result<u64, DatabaseError> { 1063 - let result = if let Some(slice_uri) = slice_uri { 1064 - sqlx::query("DELETE FROM record WHERE uri = $1 AND slice_uri = $2") 1065 - .bind(uri) 1066 - .bind(slice_uri) 1067 - .execute(&self.pool) 1068 - .await? 1069 - } else { 1070 - // Delete from all slices if no specific slice provided 1071 - sqlx::query("DELETE FROM record WHERE uri = $1") 1072 - .bind(uri) 1073 - .execute(&self.pool) 1074 - .await? 1075 - }; 1076 - Ok(result.rows_affected()) 1077 - } 1078 - 1079 - pub async fn upsert_record(&self, record: &Record) -> Result<bool, DatabaseError> { 1080 - // Returns true if inserted, false if updated 1081 - let result = sqlx::query_scalar::<_, bool>( 1082 - r#" 1083 - INSERT INTO record (uri, cid, did, collection, json, indexed_at, slice_uri) 1084 - VALUES ($1, $2, $3, $4, $5, $6, $7) 1085 - ON CONFLICT ON CONSTRAINT record_pkey DO UPDATE 1086 - SET cid = EXCLUDED.cid, 1087 - json = EXCLUDED.json, 1088 - indexed_at = EXCLUDED.indexed_at 1089 - RETURNING (xmax = 0) 1090 - "#, 1091 - ) 1092 - .bind(&record.uri) 1093 - .bind(&record.cid) 1094 - .bind(&record.did) 1095 - .bind(&record.collection) 1096 - .bind(&record.json) 1097 - .bind(record.indexed_at) 1098 - .bind(&record.slice_uri) 1099 - .fetch_one(&self.pool) 1100 - .await?; 1101 - Ok(result) 1102 - } 1103 - 1104 - pub async fn get_all_slices(&self) -> Result<Vec<String>, DatabaseError> { 1105 - let rows: Vec<(String,)> = sqlx::query_as( 1106 - r#" 1107 - SELECT DISTINCT json->>'slice' as slice_uri 1108 - FROM record 1109 - WHERE collection = 'network.slices.lexicon' 1110 - AND json->>'slice' IS NOT NULL 1111 - "#, 1112 - ) 1113 - .fetch_all(&self.pool) 1114 - .await?; 1115 - 1116 - Ok(rows.into_iter().map(|(uri,)| uri).collect()) 1117 - } 1118 - 1119 - pub async fn get_all_actors(&self) -> Result<Vec<(String, String)>, DatabaseError> { 1120 - let rows = sqlx::query!( 1121 - r#" 1122 - SELECT did, slice_uri 1123 - FROM actor 1124 - "# 1125 - ) 1126 - .fetch_all(&self.pool) 1127 - .await?; 1128 - 1129 - Ok(rows 1130 - .into_iter() 1131 - .map(|row| (row.did, row.slice_uri)) 1132 - .collect()) 1133 - } 1134 - 1135 - pub async fn actor_has_records(&self, did: &str, slice_uri: &str) -> Result<bool, DatabaseError> { 1136 - let count = sqlx::query!( 1137 - r#" 1138 - SELECT COUNT(*) as count 1139 - FROM record 1140 - WHERE did = $1 AND slice_uri = $2 1141 - "#, 1142 - did, 1143 - slice_uri 1144 - ) 1145 - .fetch_one(&self.pool) 1146 - .await?; 1147 - Ok(count.count.unwrap_or(0) > 0) 1148 - } 1149 - 1150 - pub async fn delete_actor(&self, did: &str, slice_uri: &str) -> Result<u64, DatabaseError> { 1151 - let result = sqlx::query!( 1152 - r#" 1153 - DELETE FROM actor 1154 - WHERE did = $1 AND slice_uri = $2 1155 - "#, 1156 - did, 1157 - slice_uri 1158 - ) 1159 - .execute(&self.pool) 1160 - .await?; 1161 - Ok(result.rows_affected()) 1162 - } 1163 - 1164 - pub async fn get_slice_domain(&self, slice_uri: &str) -> Result<Option<String>, DatabaseError> { 1165 - let row = sqlx::query!( 1166 - r#" 1167 - SELECT json->>'domain' as domain 1168 - FROM record 1169 - WHERE collection = 'network.slices.slice' 1170 - AND uri = $1 1171 - "#, 1172 - slice_uri 1173 - ) 1174 - .fetch_optional(&self.pool) 1175 - .await?; 1176 - 1177 - Ok(row.and_then(|r| r.domain)) 1178 - } 1179 - 1180 - pub async fn create_oauth_client( 1181 - &self, 1182 - slice_uri: &str, 1183 - client_id: &str, 1184 - registration_access_token: Option<&str>, 1185 - created_by_did: &str, 1186 - ) -> Result<OAuthClient, DatabaseError> { 1187 - let client = sqlx::query_as!( 1188 - OAuthClient, 1189 - r#" 1190 - INSERT INTO oauth_clients (slice_uri, client_id, registration_access_token, created_by_did) 1191 - VALUES ($1, $2, $3, $4) 1192 - RETURNING id, slice_uri, client_id, registration_access_token, created_at as "created_at!", created_by_did 1193 - "#, 1194 - slice_uri, 1195 - client_id, 1196 - registration_access_token, 1197 - created_by_did 1198 - ) 1199 - .fetch_one(&self.pool) 1200 - .await?; 1201 - 1202 - Ok(client) 1203 - } 1204 - 1205 - pub async fn get_oauth_clients_for_slice( 1206 - &self, 1207 - slice_uri: &str, 1208 - ) -> Result<Vec<OAuthClient>, DatabaseError> { 1209 - let clients = sqlx::query_as!( 1210 - OAuthClient, 1211 - r#" 1212 - SELECT id, slice_uri, client_id, registration_access_token, created_at as "created_at!", created_by_did 1213 - FROM oauth_clients 1214 - WHERE slice_uri = $1 1215 - ORDER BY created_at DESC 1216 - "#, 1217 - slice_uri 1218 - ) 1219 - .fetch_all(&self.pool) 1220 - .await?; 1221 - 1222 - Ok(clients) 1223 - } 1224 - 1225 - pub async fn get_oauth_client_by_id( 1226 - &self, 1227 - client_id: &str, 1228 - ) -> Result<Option<OAuthClient>, DatabaseError> { 1229 - let client = sqlx::query_as!( 1230 - OAuthClient, 1231 - r#" 1232 - SELECT id, slice_uri, client_id, registration_access_token, created_at as "created_at!", created_by_did 1233 - FROM oauth_clients 1234 - WHERE client_id = $1 1235 - "#, 1236 - client_id 1237 - ) 1238 - .fetch_optional(&self.pool) 1239 - .await?; 1240 - 1241 - Ok(client) 1242 - } 1243 - 1244 - pub async fn delete_oauth_client(&self, client_id: &str) -> Result<(), DatabaseError> { 1245 - let result = sqlx::query!( 1246 - r#" 1247 - DELETE FROM oauth_clients 1248 - WHERE client_id = $1 1249 - "#, 1250 - client_id 1251 - ) 1252 - .execute(&self.pool) 1253 - .await?; 1254 - 1255 - if result.rows_affected() == 0 { 1256 - return Err(DatabaseError::RecordNotFound { 1257 - uri: client_id.to_string(), 1258 - }); 1259 - } 1260 - 1261 - Ok(()) 1262 - } 1263 - 1264 - 1265 - pub async fn get_batch_sparkline_data( 1266 - &self, 1267 - slice_uris: &[String], 1268 - interval: &str, 1269 - duration_hours: i32, 1270 - ) -> Result<std::collections::HashMap<String, Vec<crate::models::SparklinePoint>>, DatabaseError> { 1271 - use chrono::{Duration, Utc}; 1272 - let cutoff_time = Utc::now() - Duration::hours(duration_hours as i64); 1273 - 1274 - let mut sparklines = std::collections::HashMap::new(); 1275 - 1276 - for slice_uri in slice_uris { 1277 - // Validate interval to prevent SQL injection 1278 - let interval_validated = match interval { 1279 - "minute" => "minute", 1280 - "day" => "day", 1281 - _ => "hour", 1282 - }; 1283 - 1284 - let query = format!( 1285 - r#" 1286 - SELECT 1287 - date_trunc('{}', indexed_at) as bucket, 1288 - COUNT(*) as count 1289 - FROM record 1290 - WHERE indexed_at >= $1 1291 - AND slice_uri = $2 1292 - GROUP BY bucket 1293 - ORDER BY bucket 1294 - "#, 1295 - interval_validated 1296 - ); 1297 - 1298 - let rows = sqlx::query_as::<_, (Option<chrono::DateTime<chrono::Utc>>, Option<i64>)>(&query) 1299 - .bind(cutoff_time) 1300 - .bind(slice_uri) 1301 - .fetch_all(&self.pool) 1302 - .await?; 1303 - 1304 - let data_points = rows 1305 - .into_iter() 1306 - .map(|(bucket, count)| crate::models::SparklinePoint { 1307 - timestamp: bucket.unwrap().to_rfc3339(), 1308 - count: count.unwrap_or(0), 1309 - }) 1310 - .collect(); 1311 - 1312 - sparklines.insert(slice_uri.clone(), data_points); 1313 - } 1314 - 1315 - Ok(sparklines) 1316 - } 1317 - }

+326

api/src/database/actors.rs

··· 1 + //! Actor management operations. 2 + //! 3 + //! This module handles database operations for ATProto actors (users/DIDs) 4 + //! tracked within slices, including batch insertion, querying, and filtering. 5 + 6 + use super::client::Database; 7 + use super::types::WhereCondition; 8 + use crate::errors::DatabaseError; 9 + use crate::models::Actor; 10 + use std::collections::HashMap; 11 + 12 + impl Database { 13 + /// Inserts multiple actors in batches with conflict resolution. 14 + /// 15 + /// Updates handle and indexed_at if an actor already exists for the 16 + /// (did, slice_uri) pair. 17 + pub async fn batch_insert_actors(&self, actors: &[Actor]) -> Result<(), DatabaseError> { 18 + if actors.is_empty() { 19 + return Ok(()); 20 + } 21 + 22 + let mut tx = self.pool.begin().await?; 23 + 24 + const CHUNK_SIZE: usize = 1000; 25 + 26 + for chunk in actors.chunks(CHUNK_SIZE) { 27 + for actor in chunk { 28 + sqlx::query!( 29 + r#"INSERT INTO "actor" ("did", "handle", "slice_uri", "indexed_at") 30 + VALUES ($1, $2, $3, $4) 31 + ON CONFLICT ("did", "slice_uri") 32 + DO UPDATE SET 33 + "handle" = EXCLUDED."handle", 34 + "indexed_at" = EXCLUDED."indexed_at""#, 35 + actor.did, 36 + actor.handle, 37 + actor.slice_uri, 38 + actor.indexed_at 39 + ) 40 + .execute(&mut *tx) 41 + .await?; 42 + } 43 + } 44 + 45 + tx.commit().await?; 46 + Ok(()) 47 + } 48 + 49 + /// Queries actors for a slice with optional filtering and cursor-based pagination. 50 + /// 51 + /// Supports filtering by: 52 + /// - handle (exact match or contains) 53 + /// - did (exact match or IN clause) 54 + /// 55 + /// # Returns 56 + /// Tuple of (actors, next_cursor) where cursor is the last DID 57 + pub async fn get_slice_actors( 58 + &self, 59 + slice_uri: &str, 60 + limit: Option<i32>, 61 + cursor: Option<&str>, 62 + where_conditions: Option<&HashMap<String, WhereCondition>>, 63 + ) -> Result<(Vec<Actor>, Option<String>), DatabaseError> { 64 + let limit = limit.unwrap_or(50).min(100); 65 + 66 + let records = if let Some(conditions) = where_conditions { 67 + if let Some(handle_condition) = conditions.get("handle") { 68 + if let Some(contains_value) = &handle_condition.contains { 69 + let pattern = format!("%{}%", contains_value); 70 + if let Some(cursor_did) = cursor { 71 + sqlx::query_as!( 72 + Actor, 73 + r#" 74 + SELECT did, handle, slice_uri, indexed_at 75 + FROM actor 76 + WHERE slice_uri = $1 AND handle ILIKE $2 AND did > $3 77 + ORDER BY did ASC 78 + LIMIT $4 79 + "#, 80 + slice_uri, 81 + pattern, 82 + cursor_did, 83 + limit as i64 84 + ) 85 + .fetch_all(&self.pool) 86 + .await? 87 + } else { 88 + sqlx::query_as!( 89 + Actor, 90 + r#" 91 + SELECT did, handle, slice_uri, indexed_at 92 + FROM actor 93 + WHERE slice_uri = $1 AND handle ILIKE $2 94 + ORDER BY did ASC 95 + LIMIT $3 96 + "#, 97 + slice_uri, 98 + pattern, 99 + limit as i64 100 + ) 101 + .fetch_all(&self.pool) 102 + .await? 103 + } 104 + } else if let Some(eq_value) = &handle_condition.eq { 105 + let handle_str = eq_value.as_str().unwrap_or(""); 106 + if let Some(cursor_did) = cursor { 107 + sqlx::query_as!( 108 + Actor, 109 + r#" 110 + SELECT did, handle, slice_uri, indexed_at 111 + FROM actor 112 + WHERE slice_uri = $1 AND handle = $2 AND did > $3 113 + ORDER BY did ASC 114 + LIMIT $4 115 + "#, 116 + slice_uri, 117 + handle_str, 118 + cursor_did, 119 + limit as i64 120 + ) 121 + .fetch_all(&self.pool) 122 + .await? 123 + } else { 124 + sqlx::query_as!( 125 + Actor, 126 + r#" 127 + SELECT did, handle, slice_uri, indexed_at 128 + FROM actor 129 + WHERE slice_uri = $1 AND handle = $2 130 + ORDER BY did ASC 131 + LIMIT $3 132 + "#, 133 + slice_uri, 134 + handle_str, 135 + limit as i64 136 + ) 137 + .fetch_all(&self.pool) 138 + .await? 139 + } 140 + } else { 141 + self.query_actors_with_cursor(slice_uri, cursor, limit) 142 + .await? 143 + } 144 + } else if let Some(did_condition) = conditions.get("did") { 145 + if let Some(in_values) = &did_condition.in_values { 146 + let string_values: Vec<String> = in_values 147 + .iter() 148 + .filter_map(|v| v.as_str()) 149 + .map(|s| s.to_string()) 150 + .collect(); 151 + 152 + sqlx::query_as!( 153 + Actor, 154 + r#" 155 + SELECT did, handle, slice_uri, indexed_at 156 + FROM actor 157 + WHERE slice_uri = $1 AND did = ANY($2) 158 + ORDER BY did ASC 159 + LIMIT $3 160 + "#, 161 + slice_uri, 162 + &string_values, 163 + limit as i64 164 + ) 165 + .fetch_all(&self.pool) 166 + .await? 167 + } else if let Some(eq_value) = &did_condition.eq { 168 + let did_str = eq_value.as_str().unwrap_or(""); 169 + if let Some(cursor_did) = cursor { 170 + sqlx::query_as!( 171 + Actor, 172 + r#" 173 + SELECT did, handle, slice_uri, indexed_at 174 + FROM actor 175 + WHERE slice_uri = $1 AND did = $2 AND did > $3 176 + ORDER BY did ASC 177 + LIMIT $4 178 + "#, 179 + slice_uri, 180 + did_str, 181 + cursor_did, 182 + limit as i64 183 + ) 184 + .fetch_all(&self.pool) 185 + .await? 186 + } else { 187 + sqlx::query_as!( 188 + Actor, 189 + r#" 190 + SELECT did, handle, slice_uri, indexed_at 191 + FROM actor 192 + WHERE slice_uri = $1 AND did = $2 193 + ORDER BY did ASC 194 + LIMIT $3 195 + "#, 196 + slice_uri, 197 + did_str, 198 + limit as i64 199 + ) 200 + .fetch_all(&self.pool) 201 + .await? 202 + } 203 + } else { 204 + self.query_actors_with_cursor(slice_uri, cursor, limit) 205 + .await? 206 + } 207 + } else { 208 + self.query_actors_with_cursor(slice_uri, cursor, limit) 209 + .await? 210 + } 211 + } else { 212 + self.query_actors_with_cursor(slice_uri, cursor, limit) 213 + .await? 214 + }; 215 + 216 + let cursor = if records.is_empty() { 217 + None 218 + } else { 219 + records.last().map(|actor| actor.did.clone()) 220 + }; 221 + 222 + Ok((records, cursor)) 223 + } 224 + 225 + /// Internal helper for basic actor queries with cursor pagination. 226 + async fn query_actors_with_cursor( 227 + &self, 228 + slice_uri: &str, 229 + cursor: Option<&str>, 230 + limit: i32, 231 + ) -> Result<Vec<Actor>, DatabaseError> { 232 + match cursor { 233 + Some(cursor_did) => sqlx::query_as!( 234 + Actor, 235 + r#" 236 + SELECT did, handle, slice_uri, indexed_at 237 + FROM actor 238 + WHERE slice_uri = $1 AND did > $2 239 + ORDER BY did ASC 240 + LIMIT $3 241 + "#, 242 + slice_uri, 243 + cursor_did, 244 + limit as i64 245 + ) 246 + .fetch_all(&self.pool) 247 + .await 248 + .map_err(DatabaseError::from), 249 + None => sqlx::query_as!( 250 + Actor, 251 + r#" 252 + SELECT did, handle, slice_uri, indexed_at 253 + FROM actor 254 + WHERE slice_uri = $1 255 + ORDER BY did ASC 256 + LIMIT $2 257 + "#, 258 + slice_uri, 259 + limit as i64 260 + ) 261 + .fetch_all(&self.pool) 262 + .await 263 + .map_err(DatabaseError::from), 264 + } 265 + } 266 + 267 + /// Gets all actors across all slices. 268 + /// 269 + /// # Returns 270 + /// Vector of (did, slice_uri) tuples 271 + pub async fn get_all_actors(&self) -> Result<Vec<(String, String)>, DatabaseError> { 272 + let rows = sqlx::query!( 273 + r#" 274 + SELECT did, slice_uri 275 + FROM actor 276 + "# 277 + ) 278 + .fetch_all(&self.pool) 279 + .await?; 280 + 281 + Ok(rows 282 + .into_iter() 283 + .map(|row| (row.did, row.slice_uri)) 284 + .collect()) 285 + } 286 + 287 + /// Checks if an actor has any records in a slice. 288 + /// 289 + /// Used before actor deletion to maintain referential integrity. 290 + pub async fn actor_has_records( 291 + &self, 292 + did: &str, 293 + slice_uri: &str, 294 + ) -> Result<bool, DatabaseError> { 295 + let count = sqlx::query!( 296 + r#" 297 + SELECT COUNT(*) as count 298 + FROM record 299 + WHERE did = $1 AND slice_uri = $2 300 + "#, 301 + did, 302 + slice_uri 303 + ) 304 + .fetch_one(&self.pool) 305 + .await?; 306 + Ok(count.count.unwrap_or(0) > 0) 307 + } 308 + 309 + /// Deletes an actor from a specific slice. 310 + /// 311 + /// # Returns 312 + /// Number of rows affected 313 + pub async fn delete_actor(&self, did: &str, slice_uri: &str) -> Result<u64, DatabaseError> { 314 + let result = sqlx::query!( 315 + r#" 316 + DELETE FROM actor 317 + WHERE did = $1 AND slice_uri = $2 318 + "#, 319 + did, 320 + slice_uri 321 + ) 322 + .execute(&self.pool) 323 + .await?; 324 + Ok(result.rows_affected()) 325 + } 326 + }

+75

api/src/database/analytics.rs

··· 1 + //! Analytics and time-series data queries. 2 + //! 3 + //! This module handles database operations for generating analytics data, 4 + //! including sparkline time-series data for record indexing activity. 5 + 6 + use super::client::Database; 7 + use crate::errors::DatabaseError; 8 + use crate::models::SparklinePoint; 9 + use std::collections::HashMap; 10 + 11 + impl Database { 12 + /// Gets sparkline data for multiple slices in a single query batch. 13 + /// 14 + /// Generates time-bucketed counts of indexed records for visualization. 15 + /// 16 + /// # Arguments 17 + /// * `slice_uris` - Array of slice URIs to get data for 18 + /// * `interval` - Time bucket size: "minute", "hour", or "day" 19 + /// * `duration_hours` - How many hours of history to include 20 + /// 21 + /// # Returns 22 + /// HashMap mapping slice_uri -> array of (timestamp, count) data points 23 + pub async fn get_batch_sparkline_data( 24 + &self, 25 + slice_uris: &[String], 26 + interval: &str, 27 + duration_hours: i32, 28 + ) -> Result<HashMap<String, Vec<SparklinePoint>>, DatabaseError> { 29 + use chrono::{Duration, Utc}; 30 + let cutoff_time = Utc::now() - Duration::hours(duration_hours as i64); 31 + 32 + let mut sparklines = HashMap::new(); 33 + 34 + for slice_uri in slice_uris { 35 + let interval_validated = match interval { 36 + "minute" => "minute", 37 + "day" => "day", 38 + _ => "hour", 39 + }; 40 + 41 + let query = format!( 42 + r#" 43 + SELECT 44 + date_trunc('{}', indexed_at) as bucket, 45 + COUNT(*) as count 46 + FROM record 47 + WHERE indexed_at >= $1 48 + AND slice_uri = $2 49 + GROUP BY bucket 50 + ORDER BY bucket 51 + "#, 52 + interval_validated 53 + ); 54 + 55 + let rows = 56 + sqlx::query_as::<_, (Option<chrono::DateTime<chrono::Utc>>, Option<i64>)>(&query) 57 + .bind(cutoff_time) 58 + .bind(slice_uri) 59 + .fetch_all(&self.pool) 60 + .await?; 61 + 62 + let data_points = rows 63 + .into_iter() 64 + .map(|(bucket, count)| SparklinePoint { 65 + timestamp: bucket.unwrap().to_rfc3339(), 66 + count: count.unwrap_or(0), 67 + }) 68 + .collect(); 69 + 70 + sparklines.insert(slice_uri.clone(), data_points); 71 + } 72 + 73 + Ok(sparklines) 74 + } 75 + }

+23

api/src/database/client.rs

··· 1 + use sqlx::PgPool; 2 + 3 + /// Core database client for interacting with PostgreSQL. 4 + /// 5 + /// The Database struct wraps a connection pool and provides methods for 6 + /// all database operations across records, actors, slices, OAuth, and analytics. 7 + #[derive(Clone)] 8 + pub struct Database { 9 + pub(super) pool: PgPool, 10 + } 11 + 12 + impl Database { 13 + /// Creates a new Database instance from a connection pool. 14 + pub fn new(pool: PgPool) -> Self { 15 + Self { pool } 16 + } 17 + 18 + /// Creates a new Database instance from a connection pool. 19 + /// Alias for `new()` for clarity in some contexts. 20 + pub fn from_pool(pool: PgPool) -> Self { 21 + Self::new(pool) 22 + } 23 + }

+77

api/src/database/cursor.rs

··· 1 + //! Cursor-based pagination utilities. 2 + //! 3 + //! Cursors encode the position in a result set as base64(sort_value::indexed_at::cid) 4 + //! to enable stable pagination even when new records are inserted. 5 + 6 + use super::types::SortField; 7 + use crate::models::Record; 8 + use base64::{Engine as _, engine::general_purpose}; 9 + 10 + /// Generates a base64-encoded cursor from sort value, timestamp, and CID. 11 + /// 12 + /// The cursor format is: `base64(sort_value::indexed_at::cid)` 13 + /// 14 + /// # Arguments 15 + /// * `sort_value` - The value of the primary sort field 16 + /// * `indexed_at` - The timestamp when the record was indexed 17 + /// * `cid` - The content identifier (CID) of the record 18 + /// 19 + /// # Returns 20 + /// Base64-encoded cursor string 21 + pub fn generate_cursor( 22 + sort_value: &str, 23 + indexed_at: chrono::DateTime<chrono::Utc>, 24 + cid: &str, 25 + ) -> String { 26 + let cursor_content = format!("{}::{}::{}", sort_value, indexed_at.to_rfc3339(), cid); 27 + general_purpose::URL_SAFE_NO_PAD.encode(cursor_content) 28 + } 29 + 30 + /// Extracts the primary sort field name from a sort array. 31 + /// 32 + /// Returns "indexed_at" if no sort fields are provided. 33 + /// 34 + /// # Arguments 35 + /// * `sort_by` - Optional array of sort fields 36 + /// 37 + /// # Returns 38 + /// The name of the primary sort field 39 + pub fn get_primary_sort_field(sort_by: Option<&Vec<SortField>>) -> String { 40 + match sort_by { 41 + Some(sort_fields) if !sort_fields.is_empty() => sort_fields[0].field.clone(), 42 + _ => "indexed_at".to_string(), 43 + } 44 + } 45 + 46 + /// Generates a cursor from a record based on the sort configuration. 47 + /// 48 + /// Extracts the sort value from the record (either from a table column 49 + /// or from the JSON field), then creates a cursor encoding that value 50 + /// along with indexed_at and cid. 51 + /// 52 + /// # Arguments 53 + /// * `record` - The record to generate a cursor for 54 + /// * `sort_by` - Optional array defining sort fields 55 + /// 56 + /// # Returns 57 + /// Base64-encoded cursor string 58 + pub fn generate_cursor_from_record(record: &Record, sort_by: Option<&Vec<SortField>>) -> String { 59 + let primary_sort_field = get_primary_sort_field(sort_by); 60 + 61 + let sort_value = match primary_sort_field.as_str() { 62 + "indexed_at" => record.indexed_at.to_rfc3339(), 63 + field => record 64 + .json 65 + .get(field) 66 + .and_then(|v| match v { 67 + serde_json::Value::String(s) if !s.is_empty() => Some(s.clone()), 68 + serde_json::Value::Number(n) => Some(n.to_string()), 69 + serde_json::Value::Bool(b) => Some(b.to_string()), 70 + serde_json::Value::Null => None, 71 + _ => None, 72 + }) 73 + .unwrap_or_else(|| "NULL".to_string()), 74 + }; 75 + 76 + generate_cursor(&sort_value, record.indexed_at, &record.cid) 77 + }

+12

api/src/database/mod.rs

··· 1 + mod actors; 2 + mod analytics; 3 + mod client; 4 + mod cursor; 5 + mod oauth; 6 + mod query_builder; 7 + mod records; 8 + mod slices; 9 + pub mod types; 10 + 11 + pub use client::Database; 12 + pub use types::{SortField, WhereClause, WhereCondition};

+115

api/src/database/oauth.rs

··· 1 + //! OAuth client management operations. 2 + //! 3 + //! This module handles database operations for OAuth client registrations 4 + //! associated with slices, including creation, retrieval, and deletion. 5 + 6 + use super::client::Database; 7 + use crate::errors::DatabaseError; 8 + use crate::models::OAuthClient; 9 + 10 + impl Database { 11 + /// Creates a new OAuth client registration for a slice. 12 + /// 13 + /// # Arguments 14 + /// * `slice_uri` - The slice this client is registered for 15 + /// * `client_id` - The OAuth client ID from the authorization server 16 + /// * `registration_access_token` - Optional token for client management 17 + /// * `created_by_did` - The DID of the user who created this client 18 + /// 19 + /// # Returns 20 + /// The created OAuthClient with generated ID and timestamp 21 + pub async fn create_oauth_client( 22 + &self, 23 + slice_uri: &str, 24 + client_id: &str, 25 + registration_access_token: Option<&str>, 26 + created_by_did: &str, 27 + ) -> Result<OAuthClient, DatabaseError> { 28 + let client = sqlx::query_as!( 29 + OAuthClient, 30 + r#" 31 + INSERT INTO oauth_clients (slice_uri, client_id, registration_access_token, created_by_did) 32 + VALUES ($1, $2, $3, $4) 33 + RETURNING id, slice_uri, client_id, registration_access_token, created_at as "created_at!", created_by_did 34 + "#, 35 + slice_uri, 36 + client_id, 37 + registration_access_token, 38 + created_by_did 39 + ) 40 + .fetch_one(&self.pool) 41 + .await?; 42 + 43 + Ok(client) 44 + } 45 + 46 + /// Gets all OAuth clients registered for a specific slice. 47 + /// 48 + /// Results are ordered by creation time, most recent first. 49 + pub async fn get_oauth_clients_for_slice( 50 + &self, 51 + slice_uri: &str, 52 + ) -> Result<Vec<OAuthClient>, DatabaseError> { 53 + let clients = sqlx::query_as!( 54 + OAuthClient, 55 + r#" 56 + SELECT id, slice_uri, client_id, registration_access_token, created_at as "created_at!", created_by_did 57 + FROM oauth_clients 58 + WHERE slice_uri = $1 59 + ORDER BY created_at DESC 60 + "#, 61 + slice_uri 62 + ) 63 + .fetch_all(&self.pool) 64 + .await?; 65 + 66 + Ok(clients) 67 + } 68 + 69 + /// Gets a single OAuth client by its client_id. 70 + /// 71 + /// # Returns 72 + /// Some(OAuthClient) if found, None otherwise 73 + pub async fn get_oauth_client_by_id( 74 + &self, 75 + client_id: &str, 76 + ) -> Result<Option<OAuthClient>, DatabaseError> { 77 + let client = sqlx::query_as!( 78 + OAuthClient, 79 + r#" 80 + SELECT id, slice_uri, client_id, registration_access_token, created_at as "created_at!", created_by_did 81 + FROM oauth_clients 82 + WHERE client_id = $1 83 + "#, 84 + client_id 85 + ) 86 + .fetch_optional(&self.pool) 87 + .await?; 88 + 89 + Ok(client) 90 + } 91 + 92 + /// Deletes an OAuth client by its client_id. 93 + /// 94 + /// # Returns 95 + /// Error if no client with the given client_id exists 96 + pub async fn delete_oauth_client(&self, client_id: &str) -> Result<(), DatabaseError> { 97 + let result = sqlx::query!( 98 + r#" 99 + DELETE FROM oauth_clients 100 + WHERE client_id = $1 101 + "#, 102 + client_id 103 + ) 104 + .execute(&self.pool) 105 + .await?; 106 + 107 + if result.rows_affected() == 0 { 108 + return Err(DatabaseError::RecordNotFound { 109 + uri: client_id.to_string(), 110 + }); 111 + } 112 + 113 + Ok(()) 114 + } 115 + }

+253

api/src/database/query_builder.rs

··· 1 + //! SQL query building utilities for dynamic WHERE and ORDER BY clauses. 2 + //! 3 + //! This module provides helpers for constructing SQL queries dynamically 4 + //! based on user input while preventing SQL injection attacks. 5 + 6 + use super::types::{SortField, WhereClause, WhereCondition}; 7 + use crate::models::Record; 8 + 9 + /// Builds an ORDER BY clause from an optional array of sort fields. 10 + /// 11 + /// Handles both table columns (indexed_at, uri, cid, did, collection) 12 + /// and JSON fields with nested paths. Always adds indexed_at as a 13 + /// tie-breaker if not already present. 14 + /// 15 + /// # Arguments 16 + /// * `sort_by` - Optional array of fields to sort by 17 + /// 18 + /// # Returns 19 + /// SQL ORDER BY clause string (without "ORDER BY" prefix) 20 + pub fn build_order_by_clause(sort_by: Option<&Vec<SortField>>) -> String { 21 + match sort_by { 22 + Some(sort_fields) if !sort_fields.is_empty() => { 23 + let mut order_clauses = Vec::new(); 24 + for sort_field in sort_fields { 25 + let field = &sort_field.field; 26 + let direction = match sort_field.direction.to_lowercase().as_str() { 27 + "desc" => "DESC", 28 + _ => "ASC", 29 + }; 30 + 31 + if field 32 + .chars() 33 + .all(|c| c.is_alphanumeric() || c == '_' || c == '.') 34 + { 35 + if field == "indexed_at" 36 + || field == "uri" 37 + || field == "cid" 38 + || field == "did" 39 + || field == "collection" 40 + { 41 + order_clauses.push(format!("{field} {direction}")); 42 + } else { 43 + if field.contains('.') { 44 + let parts: Vec<&str> = field.split('.').collect(); 45 + let mut path = String::from("json"); 46 + for (i, part) in parts.iter().enumerate() { 47 + if i == parts.len() - 1 { 48 + path.push_str(&format!("->>'{}'", part)); 49 + } else { 50 + path.push_str(&format!("->'{}'", part)); 51 + } 52 + } 53 + order_clauses.push(format!("{path} {direction} NULLS LAST")); 54 + } else { 55 + order_clauses.push(format!("json->>'{field}' {direction} NULLS LAST")); 56 + } 57 + } 58 + } 59 + } 60 + if !order_clauses.is_empty() { 61 + let has_indexed_at = order_clauses 62 + .iter() 63 + .any(|clause| clause.contains("indexed_at")); 64 + if !has_indexed_at { 65 + order_clauses.push("indexed_at DESC".to_string()); 66 + } 67 + order_clauses.join(", ") 68 + } else { 69 + "indexed_at DESC".to_string() 70 + } 71 + } 72 + _ => "indexed_at DESC".to_string(), 73 + } 74 + } 75 + 76 + /// Builds WHERE conditions from a WhereClause structure. 77 + /// 78 + /// Returns separate arrays for AND conditions and OR conditions 79 + /// to be combined in the final query. 80 + /// 81 + /// # Arguments 82 + /// * `where_clause` - Optional where clause with AND/OR conditions 83 + /// * `param_count` - Mutable counter for parameter numbering ($1, $2, etc) 84 + /// 85 + /// # Returns 86 + /// Tuple of (and_conditions, or_conditions) as SQL strings 87 + pub fn build_where_conditions( 88 + where_clause: Option<&WhereClause>, 89 + param_count: &mut usize, 90 + ) -> (Vec<String>, Vec<String>) { 91 + let mut where_clauses = Vec::new(); 92 + let mut or_clauses = Vec::new(); 93 + 94 + if let Some(clause) = where_clause { 95 + for (field, condition) in &clause.conditions { 96 + let field_clause = build_single_condition(field, condition, param_count); 97 + where_clauses.push(field_clause); 98 + } 99 + 100 + if let Some(or_conditions) = &clause.or_conditions { 101 + for (field, condition) in or_conditions { 102 + let field_clause = build_single_condition(field, condition, param_count); 103 + or_clauses.push(field_clause); 104 + } 105 + } 106 + } 107 + 108 + (where_clauses, or_clauses) 109 + } 110 + 111 + /// Builds a single SQL condition clause for a field. 112 + /// 113 + /// Supports equality (eq), array membership (in_values), and pattern matching (contains) 114 + /// for both table columns and JSON fields with nested paths. 115 + /// 116 + /// # Arguments 117 + /// * `field` - Field name (table column or JSON path) 118 + /// * `condition` - The condition to apply (eq, in_values, or contains) 119 + /// * `param_count` - Mutable counter for parameter numbering 120 + /// 121 + /// # Returns 122 + /// SQL condition string with parameter placeholder 123 + pub fn build_single_condition( 124 + field: &str, 125 + condition: &WhereCondition, 126 + param_count: &mut usize, 127 + ) -> String { 128 + if let Some(_eq_value) = &condition.eq { 129 + let clause = match field { 130 + "did" | "collection" | "uri" | "cid" => { 131 + format!("{} = ${}", field, param_count) 132 + } 133 + _ => { 134 + let json_path = build_json_path(field); 135 + format!("{} = ${}", json_path, param_count) 136 + } 137 + }; 138 + *param_count += 1; 139 + clause 140 + } else if let Some(_in_values) = &condition.in_values { 141 + let clause = match field { 142 + "did" | "collection" | "uri" | "cid" => { 143 + format!("{} = ANY(${})", field, param_count) 144 + } 145 + _ => { 146 + let json_path = build_json_path(field); 147 + format!("{} = ANY(${})", json_path, param_count) 148 + } 149 + }; 150 + *param_count += 1; 151 + clause 152 + } else if let Some(_contains_value) = &condition.contains { 153 + let clause = if field == "json" { 154 + format!("json::text ILIKE '%' || ${} || '%'", param_count) 155 + } else { 156 + let json_path = build_json_path(field); 157 + format!("{} ILIKE '%' || ${} || '%'", json_path, param_count) 158 + }; 159 + *param_count += 1; 160 + clause 161 + } else { 162 + String::new() 163 + } 164 + } 165 + 166 + /// Builds a PostgreSQL JSON path accessor string. 167 + /// 168 + /// Converts dot notation (e.g., "user.name") into PostgreSQL JSON operators 169 + /// (e.g., "json->'user'->>'name'"). 170 + fn build_json_path(field: &str) -> String { 171 + if field.contains('.') { 172 + let parts: Vec<&str> = field.split('.').collect(); 173 + let mut path = String::from("json"); 174 + for (i, part) in parts.iter().enumerate() { 175 + if i == parts.len() - 1 { 176 + path.push_str(&format!("->>'{}'", part)); 177 + } else { 178 + path.push_str(&format!("->'{}'", part)); 179 + } 180 + } 181 + path 182 + } else { 183 + format!("json->>'{}'", field) 184 + } 185 + } 186 + 187 + /// Binds WHERE clause parameters to a sqlx query. 188 + /// 189 + /// Iterates through all conditions and binds their values in the correct order. 190 + /// 191 + /// # Arguments 192 + /// * `query_builder` - The sqlx query to bind parameters to 193 + /// * `where_clause` - Optional where clause with parameter values 194 + /// 195 + /// # Returns 196 + /// Query builder with all parameters bound 197 + pub fn bind_where_parameters<'q>( 198 + mut query_builder: sqlx::query::QueryAs< 199 + 'q, 200 + sqlx::Postgres, 201 + Record, 202 + sqlx::postgres::PgArguments, 203 + >, 204 + where_clause: Option<&'q WhereClause>, 205 + ) -> sqlx::query::QueryAs<'q, sqlx::Postgres, Record, sqlx::postgres::PgArguments> { 206 + if let Some(clause) = where_clause { 207 + for condition in clause.conditions.values() { 208 + query_builder = bind_single_condition(query_builder, condition); 209 + } 210 + 211 + if let Some(or_conditions) = &clause.or_conditions { 212 + for condition in or_conditions.values() { 213 + query_builder = bind_single_condition(query_builder, condition); 214 + } 215 + } 216 + } 217 + query_builder 218 + } 219 + 220 + /// Binds parameters for a single condition to a sqlx query. 221 + /// 222 + /// Handles eq (single value), in_values (array), and contains (pattern) conditions. 223 + fn bind_single_condition<'q>( 224 + mut query_builder: sqlx::query::QueryAs< 225 + 'q, 226 + sqlx::Postgres, 227 + Record, 228 + sqlx::postgres::PgArguments, 229 + >, 230 + condition: &'q WhereCondition, 231 + ) -> sqlx::query::QueryAs<'q, sqlx::Postgres, Record, sqlx::postgres::PgArguments> { 232 + if let Some(eq_value) = &condition.eq { 233 + if let Some(str_val) = eq_value.as_str() { 234 + query_builder = query_builder.bind(str_val); 235 + } else { 236 + query_builder = query_builder.bind(eq_value); 237 + } 238 + } 239 + 240 + if let Some(in_values) = &condition.in_values { 241 + let str_values: Vec<String> = in_values 242 + .iter() 243 + .filter_map(|v| v.as_str().map(|s| s.to_string())) 244 + .collect(); 245 + query_builder = query_builder.bind(str_values); 246 + } 247 + 248 + if let Some(contains_value) = &condition.contains { 249 + query_builder = query_builder.bind(contains_value); 250 + } 251 + 252 + query_builder 253 + }

+468

api/src/database/records.rs

··· 1 + //! Record CRUD operations and queries. 2 + //! 3 + //! This module handles all database operations related to ATProto records, 4 + //! including insertion, updates, deletion, and complex queries with filtering, 5 + //! sorting, and pagination. 6 + 7 + use super::client::Database; 8 + use super::cursor::generate_cursor_from_record; 9 + use super::query_builder::{bind_where_parameters, build_order_by_clause, build_where_conditions}; 10 + use super::types::{SortField, WhereClause}; 11 + use crate::errors::DatabaseError; 12 + use crate::models::{IndexedRecord, Record}; 13 + 14 + impl Database { 15 + /// Inserts a single record into the database. 16 + /// 17 + /// Uses ON CONFLICT to update existing records with matching URI and slice_uri. 18 + #[allow(dead_code)] 19 + pub async fn insert_record(&self, record: &Record) -> Result<(), DatabaseError> { 20 + sqlx::query!( 21 + r#"INSERT INTO "record" ("uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri") 22 + VALUES ($1, $2, $3, $4, $5, $6, $7) 23 + ON CONFLICT ON CONSTRAINT record_pkey 24 + DO UPDATE SET 25 + "cid" = EXCLUDED."cid", 26 + "json" = EXCLUDED."json", 27 + "indexed_at" = EXCLUDED."indexed_at""#, 28 + record.uri, 29 + record.cid, 30 + record.did, 31 + record.collection, 32 + record.json, 33 + record.indexed_at, 34 + record.slice_uri 35 + ) 36 + .execute(&self.pool) 37 + .await?; 38 + 39 + Ok(()) 40 + } 41 + 42 + /// Inserts multiple records in optimized batches. 43 + /// 44 + /// Automatically chunks records to stay within PostgreSQL parameter limits 45 + /// (65536 parameters, ~8000 records per batch with 7 fields each). 46 + pub async fn batch_insert_records(&self, records: &[Record]) -> Result<(), DatabaseError> { 47 + if records.is_empty() { 48 + return Ok(()); 49 + } 50 + 51 + const BATCH_SIZE: usize = 8000; 52 + 53 + for chunk in records.chunks(BATCH_SIZE) { 54 + self.batch_insert_records_chunk(chunk).await?; 55 + } 56 + 57 + Ok(()) 58 + } 59 + 60 + /// Internal helper to insert a single chunk of records. 61 + async fn batch_insert_records_chunk(&self, records: &[Record]) -> Result<(), DatabaseError> { 62 + let mut tx = self.pool.begin().await?; 63 + 64 + let mut query = String::from( 65 + r#"INSERT INTO "record" ("uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri") VALUES "#, 66 + ); 67 + 68 + for (i, _) in records.iter().enumerate() { 69 + if i > 0 { 70 + query.push_str(", "); 71 + } 72 + let base = i * 7 + 1; 73 + query.push_str(&format!( 74 + "(${}, ${}, ${}, ${}, ${}, ${}, ${})", 75 + base, 76 + base + 1, 77 + base + 2, 78 + base + 3, 79 + base + 4, 80 + base + 5, 81 + base + 6 82 + )); 83 + } 84 + 85 + query.push_str( 86 + r#" 87 + ON CONFLICT ON CONSTRAINT record_pkey 88 + DO UPDATE SET 89 + "cid" = EXCLUDED."cid", 90 + "json" = EXCLUDED."json", 91 + "indexed_at" = EXCLUDED."indexed_at" 92 + "#, 93 + ); 94 + 95 + let mut sqlx_query = sqlx::query(&query); 96 + for record in records { 97 + sqlx_query = sqlx_query 98 + .bind(&record.uri) 99 + .bind(&record.cid) 100 + .bind(&record.did) 101 + .bind(&record.collection) 102 + .bind(&record.json) 103 + .bind(record.indexed_at) 104 + .bind(&record.slice_uri); 105 + } 106 + 107 + sqlx_query.execute(&mut *tx).await?; 108 + tx.commit().await?; 109 + 110 + Ok(()) 111 + } 112 + 113 + /// Gets a map of existing record CIDs for a specific actor, collection, and slice. 114 + /// 115 + /// Used during sync to determine which records need updating vs inserting. 116 + /// 117 + /// # Returns 118 + /// HashMap mapping URI -> CID 119 + pub async fn get_existing_record_cids_for_slice( 120 + &self, 121 + did: &str, 122 + collection: &str, 123 + slice_uri: &str, 124 + ) -> Result<std::collections::HashMap<String, String>, DatabaseError> { 125 + let records = sqlx::query!( 126 + r#"SELECT "uri", "cid" 127 + FROM "record" 128 + WHERE "did" = $1 AND "collection" = $2 AND "slice_uri" = $3"#, 129 + did, 130 + collection, 131 + slice_uri 132 + ) 133 + .fetch_all(&self.pool) 134 + .await?; 135 + 136 + let mut cid_map = std::collections::HashMap::new(); 137 + for record in records { 138 + cid_map.insert(record.uri, record.cid); 139 + } 140 + Ok(cid_map) 141 + } 142 + 143 + /// Retrieves a single record by URI. 144 + /// 145 + /// # Returns 146 + /// Some(IndexedRecord) if found, None otherwise 147 + pub async fn get_record(&self, uri: &str) -> Result<Option<IndexedRecord>, DatabaseError> { 148 + let record = sqlx::query_as::<_, Record>( 149 + r#"SELECT "uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri" 150 + FROM "record" 151 + WHERE "uri" = $1"#, 152 + ) 153 + .bind(uri) 154 + .fetch_optional(&self.pool) 155 + .await?; 156 + 157 + let indexed_record = record.map(|record| IndexedRecord { 158 + uri: record.uri, 159 + cid: record.cid, 160 + did: record.did, 161 + collection: record.collection, 162 + value: record.json, 163 + indexed_at: record.indexed_at.to_rfc3339(), 164 + }); 165 + 166 + Ok(indexed_record) 167 + } 168 + 169 + /// Updates an existing record. 170 + /// 171 + /// Returns error if no record with matching URI and slice_uri exists. 172 + pub async fn update_record(&self, record: &Record) -> Result<(), DatabaseError> { 173 + let result = sqlx::query!( 174 + r#"UPDATE "record" 175 + SET "cid" = $1, "json" = $2, "indexed_at" = $3 176 + WHERE "uri" = $4 AND "slice_uri" = $5"#, 177 + record.cid, 178 + record.json, 179 + record.indexed_at, 180 + record.uri, 181 + record.slice_uri 182 + ) 183 + .execute(&self.pool) 184 + .await?; 185 + 186 + if result.rows_affected() == 0 { 187 + return Err(DatabaseError::RecordNotFound { 188 + uri: record.uri.clone(), 189 + }); 190 + } 191 + 192 + Ok(()) 193 + } 194 + 195 + /// Queries records for a slice with advanced filtering, sorting, and pagination. 196 + /// 197 + /// Supports: 198 + /// - Cursor-based pagination 199 + /// - Multi-field sorting (with JSON path support) 200 + /// - Complex WHERE conditions (AND/OR, eq/in/contains operators) 201 + /// - Automatic handling of lexicon records vs regular records 202 + /// 203 + /// # Returns 204 + /// Tuple of (records, next_cursor) 205 + pub async fn get_slice_collections_records( 206 + &self, 207 + slice_uri: &str, 208 + limit: Option<i32>, 209 + cursor: Option<&str>, 210 + sort_by: Option<&Vec<SortField>>, 211 + where_clause: Option<&WhereClause>, 212 + ) -> Result<(Vec<Record>, Option<String>), DatabaseError> { 213 + let limit = limit.unwrap_or(50).min(100); 214 + let order_by = build_order_by_clause(sort_by); 215 + 216 + let mut where_clauses = Vec::new(); 217 + let mut param_count = 1; 218 + 219 + let is_lexicon = where_clause 220 + .as_ref() 221 + .and_then(|wc| wc.conditions.get("collection")) 222 + .and_then(|c| c.eq.as_ref()) 223 + .and_then(|v| v.as_str()) 224 + == Some("network.slices.lexicon"); 225 + 226 + if is_lexicon { 227 + where_clauses.push(format!("json->>'slice' = ${}", param_count)); 228 + } else { 229 + where_clauses.push(format!("slice_uri = ${}", param_count)); 230 + } 231 + param_count += 1; 232 + 233 + if cursor.is_some() { 234 + where_clauses.push(format!("indexed_at < ${}", param_count)); 235 + param_count += 1; 236 + } 237 + 238 + let (and_conditions, or_conditions) = 239 + build_where_conditions(where_clause, &mut param_count); 240 + where_clauses.extend(and_conditions); 241 + 242 + if !or_conditions.is_empty() { 243 + let or_clause = format!("({})", or_conditions.join(" OR ")); 244 + where_clauses.push(or_clause); 245 + } 246 + 247 + let where_sql = where_clauses.join(" AND "); 248 + let query = format!( 249 + "SELECT uri, cid, did, collection, json, indexed_at, slice_uri 250 + FROM record 251 + WHERE {} 252 + ORDER BY {} 253 + LIMIT ${}", 254 + where_sql, order_by, param_count 255 + ); 256 + 257 + let mut query_builder = sqlx::query_as::<_, Record>(&query); 258 + 259 + query_builder = query_builder.bind(slice_uri); 260 + 261 + if let Some(cursor_time) = cursor { 262 + let cursor_dt = cursor_time 263 + .parse::<chrono::DateTime<chrono::Utc>>() 264 + .unwrap_or_else(|_| chrono::Utc::now()); 265 + query_builder = query_builder.bind(cursor_dt); 266 + } 267 + 268 + query_builder = bind_where_parameters(query_builder, where_clause); 269 + query_builder = query_builder.bind(limit as i64); 270 + 271 + let records = query_builder.fetch_all(&self.pool).await?; 272 + 273 + let cursor = if records.is_empty() { 274 + None 275 + } else { 276 + records 277 + .last() 278 + .map(|record| generate_cursor_from_record(record, sort_by)) 279 + }; 280 + 281 + Ok((records, cursor)) 282 + } 283 + 284 + /// Counts records matching the given criteria. 285 + /// 286 + /// Used for pagination metadata and statistics. 287 + pub async fn count_slice_collections_records( 288 + &self, 289 + slice_uri: &str, 290 + where_clause: Option<&WhereClause>, 291 + ) -> Result<i64, DatabaseError> { 292 + let mut where_clauses = Vec::new(); 293 + let mut param_count = 1; 294 + 295 + let is_lexicon = where_clause 296 + .as_ref() 297 + .and_then(|wc| wc.conditions.get("collection")) 298 + .and_then(|c| c.eq.as_ref()) 299 + .and_then(|v| v.as_str()) 300 + == Some("network.slices.lexicon"); 301 + 302 + if is_lexicon { 303 + where_clauses.push(format!("json->>'slice' = ${}", param_count)); 304 + } else { 305 + where_clauses.push(format!("slice_uri = ${}", param_count)); 306 + } 307 + param_count += 1; 308 + 309 + let (and_conditions, or_conditions) = 310 + build_where_conditions(where_clause, &mut param_count); 311 + where_clauses.extend(and_conditions); 312 + 313 + if !or_conditions.is_empty() { 314 + let or_clause = format!("({})", or_conditions.join(" OR ")); 315 + where_clauses.push(or_clause); 316 + } 317 + 318 + let where_sql = if where_clauses.is_empty() { 319 + String::new() 320 + } else { 321 + format!(" WHERE {}", where_clauses.join(" AND ")) 322 + }; 323 + 324 + let query = format!("SELECT COUNT(*) as count FROM record{}", where_sql); 325 + 326 + let mut query_builder = sqlx::query_scalar::<_, i64>(&query); 327 + query_builder = query_builder.bind(slice_uri); 328 + 329 + if let Some(clause) = where_clause { 330 + for condition in clause.conditions.values() { 331 + if let Some(eq_value) = &condition.eq { 332 + if let Some(str_val) = eq_value.as_str() { 333 + query_builder = query_builder.bind(str_val); 334 + } else { 335 + query_builder = query_builder.bind(eq_value); 336 + } 337 + } 338 + if let Some(in_values) = &condition.in_values { 339 + let str_values: Vec<String> = in_values 340 + .iter() 341 + .filter_map(|v| v.as_str().map(|s| s.to_string())) 342 + .collect(); 343 + query_builder = query_builder.bind(str_values); 344 + } 345 + if let Some(contains_value) = &condition.contains { 346 + query_builder = query_builder.bind(contains_value); 347 + } 348 + } 349 + 350 + if let Some(or_conditions) = &clause.or_conditions { 351 + for condition in or_conditions.values() { 352 + if let Some(eq_value) = &condition.eq { 353 + if let Some(str_val) = eq_value.as_str() { 354 + query_builder = query_builder.bind(str_val); 355 + } else { 356 + query_builder = query_builder.bind(eq_value); 357 + } 358 + } 359 + if let Some(in_values) = &condition.in_values { 360 + let str_values: Vec<String> = in_values 361 + .iter() 362 + .filter_map(|v| v.as_str().map(|s| s.to_string())) 363 + .collect(); 364 + query_builder = query_builder.bind(str_values); 365 + } 366 + if let Some(contains_value) = &condition.contains { 367 + query_builder = query_builder.bind(contains_value); 368 + } 369 + } 370 + } 371 + } 372 + 373 + let count = query_builder.fetch_one(&self.pool).await?; 374 + Ok(count) 375 + } 376 + 377 + /// Deletes a record by URI. 378 + /// 379 + /// If slice_uri is provided, only deletes from that slice. 380 + /// Otherwise deletes from all slices. 381 + /// 382 + /// # Returns 383 + /// Number of rows affected 384 + pub async fn delete_record_by_uri( 385 + &self, 386 + uri: &str, 387 + slice_uri: Option<&str>, 388 + ) -> Result<u64, DatabaseError> { 389 + let result = if let Some(slice_uri) = slice_uri { 390 + sqlx::query("DELETE FROM record WHERE uri = $1 AND slice_uri = $2") 391 + .bind(uri) 392 + .bind(slice_uri) 393 + .execute(&self.pool) 394 + .await? 395 + } else { 396 + sqlx::query("DELETE FROM record WHERE uri = $1") 397 + .bind(uri) 398 + .execute(&self.pool) 399 + .await? 400 + }; 401 + Ok(result.rows_affected()) 402 + } 403 + 404 + /// Inserts or updates a record atomically. 405 + /// 406 + /// # Returns 407 + /// true if inserted (new record), false if updated (existing record) 408 + pub async fn upsert_record(&self, record: &Record) -> Result<bool, DatabaseError> { 409 + let result = sqlx::query_scalar::<_, bool>( 410 + r#" 411 + INSERT INTO record (uri, cid, did, collection, json, indexed_at, slice_uri) 412 + VALUES ($1, $2, $3, $4, $5, $6, $7) 413 + ON CONFLICT ON CONSTRAINT record_pkey DO UPDATE 414 + SET cid = EXCLUDED.cid, 415 + json = EXCLUDED.json, 416 + indexed_at = EXCLUDED.indexed_at 417 + RETURNING (xmax = 0) 418 + "#, 419 + ) 420 + .bind(&record.uri) 421 + .bind(&record.cid) 422 + .bind(&record.did) 423 + .bind(&record.collection) 424 + .bind(&record.json) 425 + .bind(record.indexed_at) 426 + .bind(&record.slice_uri) 427 + .fetch_one(&self.pool) 428 + .await?; 429 + Ok(result) 430 + } 431 + 432 + /// Gets lexicon definitions for a specific slice. 433 + /// 434 + /// Filters for network.slices.lexicon records and transforms them 435 + /// into the lexicon JSON format expected by the lexicon parser. 436 + pub async fn get_lexicons_by_slice( 437 + &self, 438 + slice_uri: &str, 439 + ) -> Result<Vec<serde_json::Value>, DatabaseError> { 440 + let records = sqlx::query_as::<_, Record>( 441 + r#"SELECT "uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri" 442 + FROM "record" 443 + WHERE "collection" = 'network.slices.lexicon' 444 + AND "json"->>'slice' = $1 445 + ORDER BY "indexed_at" DESC"#, 446 + ) 447 + .bind(slice_uri) 448 + .fetch_all(&self.pool) 449 + .await?; 450 + 451 + let lexicon_definitions: Vec<serde_json::Value> = records 452 + .into_iter() 453 + .filter_map(|record| { 454 + let nsid = record.json.get("nsid")?.as_str()?; 455 + let definitions_str = record.json.get("definitions")?.as_str()?; 456 + let definitions: serde_json::Value = serde_json::from_str(definitions_str).ok()?; 457 + 458 + Some(serde_json::json!({ 459 + "lexicon": 1, 460 + "id": nsid, 461 + "defs": definitions 462 + })) 463 + }) 464 + .collect(); 465 + 466 + Ok(lexicon_definitions) 467 + } 468 + }

+189

api/src/database/slices.rs

··· 1 + //! Slice-related queries and statistics. 2 + //! 3 + //! This module handles database operations for slice metadata, including 4 + //! collection statistics, actor counts, lexicon counts, and slice discovery. 5 + 6 + use super::client::Database; 7 + use crate::errors::DatabaseError; 8 + use crate::models::CollectionStats; 9 + 10 + impl Database { 11 + /// Gets collection statistics for a slice. 12 + /// 13 + /// Returns record counts and unique actor counts per collection 14 + /// (excluding lexicons marked as excludedFromSync). 15 + pub async fn get_slice_collection_stats( 16 + &self, 17 + slice_uri: &str, 18 + ) -> Result<Vec<CollectionStats>, DatabaseError> { 19 + let stats = sqlx::query!( 20 + r#" 21 + WITH slice_collections AS ( 22 + SELECT DISTINCT 23 + json->>'nsid' as collection_nsid 24 + FROM record 25 + WHERE collection = 'network.slices.lexicon' 26 + AND json->>'slice' = $1 27 + AND json->>'nsid' IS NOT NULL 28 + AND (json->>'definitions')::jsonb->'main'->>'type' = 'record' 29 + AND (json->>'excludedFromSync' IS NULL OR json->>'excludedFromSync' != 'true') 30 + ) 31 + SELECT 32 + r.collection, 33 + COUNT(*) as record_count, 34 + COUNT(DISTINCT r.did) as unique_actors 35 + FROM record r 36 + INNER JOIN slice_collections sc ON r.collection = sc.collection_nsid 37 + WHERE r.slice_uri = $1 38 + GROUP BY r.collection 39 + ORDER BY r.collection 40 + "#, 41 + slice_uri 42 + ) 43 + .fetch_all(&self.pool) 44 + .await?; 45 + 46 + Ok(stats 47 + .into_iter() 48 + .map(|row| CollectionStats { 49 + collection: row.collection, 50 + record_count: row.record_count.unwrap_or(0), 51 + unique_actors: row.unique_actors.unwrap_or(0), 52 + }) 53 + .collect()) 54 + } 55 + 56 + /// Gets the list of collection NSIDs defined for a slice. 57 + /// 58 + /// Only includes lexicons with type 'record' that are not excluded from sync. 59 + pub async fn get_slice_collections_list( 60 + &self, 61 + slice_uri: &str, 62 + ) -> Result<Vec<String>, DatabaseError> { 63 + let rows = sqlx::query!( 64 + r#" 65 + SELECT DISTINCT json->>'nsid' as collection_nsid 66 + FROM record 67 + WHERE collection = 'network.slices.lexicon' 68 + AND json->>'slice' = $1 69 + AND json->>'nsid' IS NOT NULL 70 + AND (json->>'definitions')::jsonb->'main'->>'type' = 'record' 71 + AND (json->>'excludedFromSync' IS NULL OR json->>'excludedFromSync' != 'true') 72 + ORDER BY json->>'nsid' 73 + "#, 74 + slice_uri 75 + ) 76 + .fetch_all(&self.pool) 77 + .await?; 78 + 79 + Ok(rows 80 + .into_iter() 81 + .filter_map(|row| row.collection_nsid) 82 + .collect()) 83 + } 84 + 85 + /// Counts total records across all collections in a slice. 86 + /// 87 + /// Excludes lexicons marked as excludedFromSync. 88 + pub async fn get_slice_total_records(&self, slice_uri: &str) -> Result<i64, DatabaseError> { 89 + let count = sqlx::query!( 90 + r#" 91 + WITH slice_collections AS ( 92 + SELECT DISTINCT 93 + json->>'nsid' as collection_nsid 94 + FROM record 95 + WHERE collection = 'network.slices.lexicon' 96 + AND json->>'slice' = $1 97 + AND json->>'nsid' IS NOT NULL 98 + AND (json->>'definitions')::jsonb->'main'->>'type' = 'record' 99 + AND (json->>'excludedFromSync' IS NULL OR json->>'excludedFromSync' != 'true') 100 + ) 101 + SELECT COUNT(*) as count 102 + FROM record r 103 + INNER JOIN slice_collections sc ON r.collection = sc.collection_nsid 104 + WHERE r.slice_uri = $1 105 + "#, 106 + slice_uri 107 + ) 108 + .fetch_one(&self.pool) 109 + .await?; 110 + 111 + Ok(count.count.unwrap_or(0)) 112 + } 113 + 114 + /// Counts total actors tracked in a slice. 115 + pub async fn get_slice_total_actors(&self, slice_uri: &str) -> Result<i64, DatabaseError> { 116 + let count = sqlx::query!( 117 + r#" 118 + SELECT COUNT(*) as count 119 + FROM actor 120 + WHERE slice_uri = $1 121 + "#, 122 + slice_uri 123 + ) 124 + .fetch_one(&self.pool) 125 + .await?; 126 + 127 + Ok(count.count.unwrap_or(0)) 128 + } 129 + 130 + /// Counts lexicon definitions for a slice. 131 + /// 132 + /// Only includes record-type lexicons. 133 + pub async fn get_slice_lexicon_count(&self, slice_uri: &str) -> Result<i64, DatabaseError> { 134 + let count = sqlx::query!( 135 + r#" 136 + SELECT COUNT(*) as count 137 + FROM record 138 + WHERE collection = 'network.slices.lexicon' 139 + AND json->>'slice' = $1 140 + AND (json->>'definitions')::jsonb->'main'->>'type' = 'record' 141 + "#, 142 + slice_uri 143 + ) 144 + .fetch_one(&self.pool) 145 + .await?; 146 + 147 + Ok(count.count.unwrap_or(0)) 148 + } 149 + 150 + /// Gets all slice URIs that have lexicons defined. 151 + /// 152 + /// Useful for discovering all active slices in the system. 153 + pub async fn get_all_slices(&self) -> Result<Vec<String>, DatabaseError> { 154 + let rows: Vec<(String,)> = sqlx::query_as( 155 + r#" 156 + SELECT DISTINCT json->>'slice' as slice_uri 157 + FROM record 158 + WHERE collection = 'network.slices.lexicon' 159 + AND json->>'slice' IS NOT NULL 160 + "#, 161 + ) 162 + .fetch_all(&self.pool) 163 + .await?; 164 + 165 + Ok(rows.into_iter().map(|(uri,)| uri).collect()) 166 + } 167 + 168 + /// Gets the domain associated with a slice. 169 + /// 170 + /// Looks up the network.slices.slice record and extracts the domain field. 171 + /// 172 + /// # Returns 173 + /// Some(domain) if the slice exists and has a domain, None otherwise 174 + pub async fn get_slice_domain(&self, slice_uri: &str) -> Result<Option<String>, DatabaseError> { 175 + let row = sqlx::query!( 176 + r#" 177 + SELECT json->>'domain' as domain 178 + FROM record 179 + WHERE collection = 'network.slices.slice' 180 + AND uri = $1 181 + "#, 182 + slice_uri 183 + ) 184 + .fetch_optional(&self.pool) 185 + .await?; 186 + 187 + Ok(row.and_then(|r| r.domain)) 188 + } 189 + }

+60

api/src/database/types.rs

··· 1 + //! Database query parameter types. 2 + //! 3 + //! This module contains types used for building dynamic SQL queries, 4 + //! including WHERE conditions, sorting, and filtering. 5 + 6 + use serde::{Deserialize, Serialize}; 7 + use serde_json::Value; 8 + use std::collections::HashMap; 9 + 10 + /// Represents a single condition in a WHERE clause. 11 + /// 12 + /// Supports three types of operations: 13 + /// - `eq`: Exact match (field = value) 14 + /// - `in_values`: Array membership (field IN (...)) 15 + /// - `contains`: Pattern matching (field ILIKE '%value%') 16 + #[derive(Debug, Serialize, Deserialize)] 17 + #[serde(rename_all = "camelCase")] 18 + pub struct WhereCondition { 19 + pub eq: Option<Value>, 20 + #[serde(rename = "in")] 21 + pub in_values: Option<Vec<Value>>, 22 + pub contains: Option<String>, 23 + } 24 + 25 + /// Represents a complete WHERE clause with AND/OR conditions. 26 + /// 27 + /// The main conditions map is combined with AND logic. 28 + /// The or_conditions map (if present) is combined with OR logic 29 + /// and the entire OR group is ANDed with the main conditions. 30 + /// 31 + /// Example JSON: 32 + /// ```json 33 + /// { 34 + /// "collection": {"eq": "app.bsky.feed.post"}, 35 + /// "author": {"eq": "did:plc:123"}, 36 + /// "$or": { 37 + /// "lang": {"eq": "en"}, 38 + /// "lang": {"eq": "es"} 39 + /// } 40 + /// } 41 + /// ``` 42 + #[derive(Debug, Serialize, Deserialize)] 43 + #[serde(rename_all = "camelCase")] 44 + pub struct WhereClause { 45 + #[serde(flatten)] 46 + pub conditions: HashMap<String, WhereCondition>, 47 + 48 + #[serde(rename = "$or")] 49 + pub or_conditions: Option<HashMap<String, WhereCondition>>, 50 + } 51 + 52 + /// Represents a field to sort by with direction. 53 + /// 54 + /// Used for multi-field sorting in queries. 55 + #[derive(Debug, Clone, Serialize, Deserialize)] 56 + #[serde(rename_all = "camelCase")] 57 + pub struct SortField { 58 + pub field: String, 59 + pub direction: String, 60 + }

+176 -33

api/src/errors.rs

··· 1 - use thiserror::Error; 1 + //! Error types for the Slices API. 2 + //! 3 + //! All error types follow the pattern: `error-slices-{domain}-{number} {message}` 4 + //! This provides consistent error tracking and debugging across the application. 5 + //! 6 + //! ## Error Hierarchy 7 + //! 8 + //! ```text 9 + //! AppError (HTTP boundary layer) 10 + //! ├─> DatabaseError (data layer) 11 + //! ├─> SyncError (sync service) 12 + //! ├─> JetstreamError (real-time processing) 13 + //! ├─> ActorResolverError (identity resolution) 14 + //! └─> BlobUploadError (ATProto extensions) 15 + //! ``` 16 + 2 17 use axum::{ 18 + Json, 3 19 http::StatusCode, 4 20 response::{IntoResponse, Response}, 5 - Json, 6 21 }; 22 + use thiserror::Error; 7 23 24 + // ============================================================================= 25 + // Database Layer Errors 26 + // ============================================================================= 8 27 28 + /// Database operation errors from the data access layer. 9 29 #[derive(Error, Debug)] 10 30 pub enum DatabaseError { 11 31 #[error("error-slices-database-1 SQL query failed: {0}")] ··· 15 35 RecordNotFound { uri: String }, 16 36 } 17 37 38 + // ============================================================================= 39 + // Sync Service Errors 40 + // ============================================================================= 41 + 42 + /// Errors from background sync operations with ATProto relay. 18 43 #[derive(Error, Debug)] 19 44 pub enum SyncError { 20 45 #[error("error-slices-sync-1 HTTP request failed: {0}")] ··· 39 64 Generic(String), 40 65 } 41 66 67 + // ============================================================================= 68 + // Jetstream / Real-time Processing Errors 69 + // ============================================================================= 70 + 71 + /// Errors from Jetstream event stream processing. 72 + #[derive(Error, Debug)] 73 + pub enum JetstreamError { 74 + #[error("error-slices-jetstream-1 Connection failed: {message}")] 75 + ConnectionFailed { message: String }, 76 + 77 + #[error("error-slices-jetstream-2 Database error: {0}")] 78 + Database(#[from] DatabaseError), 79 + } 80 + 81 + // ============================================================================= 82 + // ATProto Identity Resolution Errors 83 + // ============================================================================= 84 + 85 + /// Errors from DID and handle resolution operations. 86 + #[derive(Error, Debug)] 87 + pub enum ActorResolverError { 88 + #[error("error-slice-actor-1 Failed to resolve DID: {0}")] 89 + ResolveFailed(String), 90 + 91 + #[error("error-slice-actor-2 Failed to parse DID: {0}")] 92 + ParseFailed(String), 93 + 94 + #[error("error-slice-actor-3 Subject resolved to handle instead of DID")] 95 + InvalidSubject, 96 + } 97 + 98 + // ============================================================================= 99 + // ATProto Extensions Errors 100 + // ============================================================================= 101 + 102 + /// Errors from ATProto blob upload operations. 103 + #[derive(Error, Debug)] 104 + pub enum BlobUploadError { 105 + #[error("error-slice-blob-1 HTTP request failed: {0}")] 106 + HttpRequest(#[from] reqwest_middleware::Error), 107 + 108 + #[error("error-slice-blob-2 JSON parsing failed: {0}")] 109 + JsonParse(#[from] serde_json::Error), 110 + 111 + #[error("error-slice-blob-3 DPoP proof creation failed: {0}")] 112 + DPoPProof(String), 113 + 114 + #[error("error-slice-blob-4 Upload request failed: {status} - {message}")] 115 + UploadFailed { status: u16, message: String }, 116 + } 117 + 118 + // ============================================================================= 119 + // Core Application Errors (HTTP Boundary Layer) 120 + // ============================================================================= 121 + 122 + /// Top-level application errors for HTTP handlers and server operations. 123 + /// 124 + /// This is the boundary layer that converts domain errors into HTTP responses. 125 + /// Domain-specific errors are wrapped and converted to appropriate HTTP status codes. 42 126 #[derive(Error, Debug)] 43 127 pub enum AppError { 44 - #[error("error-slices-app-1 Database connection failed: {0}")] 128 + #[error("error-slices-app-1 Database error: {0}")] 129 + Database(#[from] DatabaseError), 130 + 131 + #[error("error-slices-app-2 Sync error: {0}")] 132 + Sync(#[from] SyncError), 133 + 134 + #[error("error-slices-app-3 Jetstream error: {0}")] 135 + Jetstream(#[from] JetstreamError), 136 + 137 + #[error("error-slices-app-4 Actor resolution error: {0}")] 138 + ActorResolver(#[from] ActorResolverError), 139 + 140 + #[error("error-slices-app-5 Blob upload error: {0}")] 141 + BlobUpload(#[from] BlobUploadError), 142 + 143 + #[error("error-slices-app-6 Database connection failed: {0}")] 45 144 DatabaseConnection(#[from] sqlx::Error), 46 145 47 - #[error("error-slices-app-2 Migration failed: {0}")] 146 + #[error("error-slices-app-7 Database migration failed: {0}")] 48 147 Migration(#[from] sqlx::migrate::MigrateError), 49 148 50 - #[error("error-slices-app-3 Server bind failed: {0}")] 149 + #[error("error-slices-app-8 Server bind failed: {0}")] 51 150 ServerBind(#[from] std::io::Error), 52 151 53 - #[error("error-slices-app-4 Internal server error: {0}")] 54 - Internal(String), 152 + #[error("error-slices-app-9 Cache error: {0}")] 153 + Cache(#[from] anyhow::Error), 55 154 56 - #[error("error-slices-app-5 Resource not found: {0}")] 57 - NotFound(String), 58 - 59 - #[error("error-slices-app-6 Bad request: {0}")] 155 + #[error("error-slices-app-10 Bad request: {0}")] 60 156 BadRequest(String), 61 157 62 - #[error("error-slices-app-7 Authentication required: {0}")] 158 + #[error("error-slices-app-11 Resource not found: {0}")] 159 + NotFound(String), 160 + 161 + #[error("error-slices-app-12 Authentication required: {0}")] 63 162 AuthRequired(String), 64 163 65 - #[error("error-slices-app-8 Forbidden: {0}")] 164 + #[error("error-slices-app-13 Forbidden: {0}")] 66 165 Forbidden(String), 67 166 68 - #[error("error-slices-app-9 Cache error: {0}")] 69 - Cache(#[from] anyhow::Error), 167 + #[error("error-slices-app-14 Internal server error: {0}")] 168 + Internal(String), 70 169 } 71 170 72 171 impl From<StatusCode> for AppError { 73 172 fn from(status: StatusCode) -> Self { 74 173 match status { 75 174 StatusCode::BAD_REQUEST => AppError::BadRequest("Bad request".to_string()), 76 - StatusCode::UNAUTHORIZED => AppError::AuthRequired("Authentication required".to_string()), 175 + StatusCode::UNAUTHORIZED => { 176 + AppError::AuthRequired("Authentication required".to_string()) 177 + } 77 178 StatusCode::FORBIDDEN => AppError::Forbidden("Forbidden".to_string()), 78 179 StatusCode::NOT_FOUND => AppError::NotFound("Not found".to_string()), 79 180 _ => AppError::Internal(format!("HTTP error: {}", status)), ··· 81 182 } 82 183 } 83 184 84 - #[derive(Error, Debug)] 85 - pub enum SliceError { 86 - #[error("error-slices-jetstream-1 Jetstream error: {message}")] 87 - JetstreamError { message: String }, 88 - 89 - #[error("error-slices-database Database error: {0}")] 90 - Database(#[from] DatabaseError), 91 - 92 - } 93 - 94 185 impl IntoResponse for AppError { 95 186 fn into_response(self) -> Response { 96 187 let (status, error_name, error_message) = match &self { 97 188 AppError::BadRequest(msg) => (StatusCode::BAD_REQUEST, "BadRequest", msg.clone()), 98 189 AppError::NotFound(msg) => (StatusCode::NOT_FOUND, "NotFound", msg.clone()), 99 - AppError::AuthRequired(msg) => (StatusCode::UNAUTHORIZED, "AuthenticationRequired", msg.clone()), 190 + AppError::AuthRequired(msg) => ( 191 + StatusCode::UNAUTHORIZED, 192 + "AuthenticationRequired", 193 + msg.clone(), 194 + ), 100 195 AppError::Forbidden(msg) => (StatusCode::FORBIDDEN, "Forbidden", msg.clone()), 101 - AppError::Internal(msg) => (StatusCode::INTERNAL_SERVER_ERROR, "InternalServerError", msg.clone()), 102 - AppError::DatabaseConnection(e) => (StatusCode::INTERNAL_SERVER_ERROR, "InternalServerError", e.to_string()), 103 - AppError::Migration(e) => (StatusCode::INTERNAL_SERVER_ERROR, "InternalServerError", e.to_string()), 104 - AppError::ServerBind(e) => (StatusCode::INTERNAL_SERVER_ERROR, "InternalServerError", e.to_string()), 105 - AppError::Cache(e) => (StatusCode::INTERNAL_SERVER_ERROR, "InternalServerError", e.to_string()), 196 + AppError::Internal(msg) => ( 197 + StatusCode::INTERNAL_SERVER_ERROR, 198 + "InternalServerError", 199 + msg.clone(), 200 + ), 201 + 202 + // Domain errors - all map to internal server error 203 + AppError::Database(e) => ( 204 + StatusCode::INTERNAL_SERVER_ERROR, 205 + "InternalServerError", 206 + e.to_string(), 207 + ), 208 + AppError::Sync(e) => ( 209 + StatusCode::INTERNAL_SERVER_ERROR, 210 + "InternalServerError", 211 + e.to_string(), 212 + ), 213 + AppError::Jetstream(e) => ( 214 + StatusCode::INTERNAL_SERVER_ERROR, 215 + "InternalServerError", 216 + e.to_string(), 217 + ), 218 + AppError::ActorResolver(e) => ( 219 + StatusCode::INTERNAL_SERVER_ERROR, 220 + "InternalServerError", 221 + e.to_string(), 222 + ), 223 + AppError::BlobUpload(e) => ( 224 + StatusCode::INTERNAL_SERVER_ERROR, 225 + "InternalServerError", 226 + e.to_string(), 227 + ), 228 + 229 + // Infrastructure errors 230 + AppError::DatabaseConnection(e) => ( 231 + StatusCode::INTERNAL_SERVER_ERROR, 232 + "InternalServerError", 233 + e.to_string(), 234 + ), 235 + AppError::Migration(e) => ( 236 + StatusCode::INTERNAL_SERVER_ERROR, 237 + "InternalServerError", 238 + e.to_string(), 239 + ), 240 + AppError::ServerBind(e) => ( 241 + StatusCode::INTERNAL_SERVER_ERROR, 242 + "InternalServerError", 243 + e.to_string(), 244 + ), 245 + AppError::Cache(e) => ( 246 + StatusCode::INTERNAL_SERVER_ERROR, 247 + "InternalServerError", 248 + e.to_string(), 249 + ), 106 250 }; 107 251 108 252 let body = Json(serde_json::json!({ ··· 113 257 (status, body).into_response() 114 258 } 115 259 } 116 -

+251 -120

api/src/jetstream.rs

··· 1 - use atproto_jetstream::{Consumer, ConsumerTaskConfig, EventHandler, JetstreamEvent, CancellationToken}; 1 + use anyhow::Result; 2 2 use async_trait::async_trait; 3 - use anyhow::Result; 3 + use atproto_jetstream::{ 4 + CancellationToken, Consumer, ConsumerTaskConfig, EventHandler, JetstreamEvent, 5 + }; 4 6 use chrono::Utc; 7 + use reqwest::Client; 5 8 use std::collections::HashSet; 6 9 use std::sync::Arc; 7 10 use tokio::sync::{Mutex, RwLock}; 8 11 use tracing::{error, info, warn}; 9 - use reqwest::Client; 10 12 11 13 use crate::actor_resolver::resolve_actor_data; 14 + use crate::cache::{CacheBackend, CacheFactory, SliceCache}; 12 15 use crate::database::Database; 16 + use crate::errors::JetstreamError; 13 17 use crate::jetstream_cursor::PostgresCursorHandler; 14 - use crate::models::{Record, Actor}; 15 - use crate::errors::SliceError; 16 - use crate::logging::{Logger, LogLevel}; 17 - use crate::cache::{SliceCache, CacheFactory, CacheBackend}; 18 + use crate::logging::{LogLevel, Logger}; 19 + use crate::models::{Actor, Record}; 18 20 19 21 pub struct JetstreamConsumer { 20 22 consumer: Consumer, ··· 45 47 #[async_trait] 46 48 impl EventHandler for SliceEventHandler { 47 49 async fn handle_event(&self, event: JetstreamEvent) -> Result<()> { 48 - let count = self.event_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1; 50 + let count = self 51 + .event_count 52 + .fetch_add(1, std::sync::atomic::Ordering::Relaxed) 53 + + 1; 49 54 50 55 if count.is_multiple_of(10000) { 51 56 info!("Jetstream consumer has processed {} events", count); ··· 73 78 if let Err(e) = self.handle_commit_event(&did, commit).await { 74 79 let message = format!("Error handling commit event: {}", e); 75 80 error!("{}", message); 76 - Logger::global().log_jetstream(LogLevel::Error, &message, Some(serde_json::json!({ 77 - "error": e.to_string(), 78 - "did": did, 79 - "event_type": "commit" 80 - }))); 81 + Logger::global().log_jetstream( 82 + LogLevel::Error, 83 + &message, 84 + Some(serde_json::json!({ 85 + "error": e.to_string(), 86 + "did": did, 87 + "event_type": "commit" 88 + })), 89 + ); 81 90 } 82 91 } 83 92 JetstreamEvent::Delete { did, commit, .. } => { 84 93 if let Err(e) = self.handle_delete_event(&did, commit).await { 85 94 let message = format!("Error handling delete event: {}", e); 86 95 error!("{}", message); 87 - Logger::global().log_jetstream(LogLevel::Error, &message, Some(serde_json::json!({ 88 - "error": e.to_string(), 89 - "did": did, 90 - "event_type": "delete" 91 - }))); 96 + Logger::global().log_jetstream( 97 + LogLevel::Error, 98 + &message, 99 + Some(serde_json::json!({ 100 + "error": e.to_string(), 101 + "did": did, 102 + "event_type": "delete" 103 + })), 104 + ); 92 105 } 93 106 } 94 107 _ => { ··· 105 118 106 119 impl SliceEventHandler { 107 120 /// Check if DID is an actor for the given slice 108 - async fn is_actor_cached(&self, did: &str, slice_uri: &str) -> Result<Option<bool>, anyhow::Error> { 121 + async fn is_actor_cached( 122 + &self, 123 + did: &str, 124 + slice_uri: &str, 125 + ) -> Result<Option<bool>, anyhow::Error> { 109 126 match self.actor_cache.lock().await.is_actor(did, slice_uri).await { 110 127 Ok(result) => Ok(result), 111 128 Err(e) => { ··· 122 139 123 140 /// Cache that an actor exists 124 141 async fn cache_actor_exists(&self, did: &str, slice_uri: &str) { 125 - if let Err(e) = self.actor_cache.lock().await.cache_actor_exists(did, slice_uri).await { 142 + if let Err(e) = self 143 + .actor_cache 144 + .lock() 145 + .await 146 + .cache_actor_exists(did, slice_uri) 147 + .await 148 + { 126 149 warn!( 127 150 error = ?e, 128 151 did = did, ··· 134 157 135 158 /// Remove actor from cache 136 159 async fn remove_actor_from_cache(&self, did: &str, slice_uri: &str) { 137 - if let Err(e) = self.actor_cache.lock().await.remove_actor(did, slice_uri).await { 160 + if let Err(e) = self 161 + .actor_cache 162 + .lock() 163 + .await 164 + .remove_actor(did, slice_uri) 165 + .await 166 + { 138 167 warn!( 139 168 error = ?e, 140 169 did = did, ··· 145 174 } 146 175 147 176 /// Get slice collections from cache with database fallback 148 - async fn get_slice_collections(&self, slice_uri: &str) -> Result<Option<HashSet<String>>, anyhow::Error> { 177 + async fn get_slice_collections( 178 + &self, 179 + slice_uri: &str, 180 + ) -> Result<Option<HashSet<String>>, anyhow::Error> { 149 181 // Try cache first 150 182 let cache_result = { 151 183 let mut cache = self.collections_cache.lock().await; ··· 160 192 Ok(collections) => { 161 193 let collections_set: HashSet<String> = collections.into_iter().collect(); 162 194 // Cache the result 163 - let _ = self.collections_cache.lock().await.cache_slice_collections(slice_uri, &collections_set).await; 195 + let _ = self 196 + .collections_cache 197 + .lock() 198 + .await 199 + .cache_slice_collections(slice_uri, &collections_set) 200 + .await; 164 201 Ok(Some(collections_set)) 165 202 } 166 - Err(e) => Err(e.into()) 203 + Err(e) => Err(e.into()), 167 204 } 168 205 } 169 - Err(e) => Err(e) 206 + Err(e) => Err(e), 170 207 } 171 208 } 172 209 ··· 185 222 match self.database.get_slice_domain(slice_uri).await { 186 223 Ok(Some(domain)) => { 187 224 // Cache the result 188 - let _ = self.domain_cache.lock().await.cache_slice_domain(slice_uri, &domain).await; 225 + let _ = self 226 + .domain_cache 227 + .lock() 228 + .await 229 + .cache_slice_domain(slice_uri, &domain) 230 + .await; 189 231 Ok(Some(domain)) 190 232 } 191 233 Ok(None) => Ok(None), 192 - Err(e) => Err(e.into()) 234 + Err(e) => Err(e.into()), 193 235 } 194 236 } 195 - Err(e) => Err(e) 237 + Err(e) => Err(e), 196 238 } 197 239 } 198 240 199 241 /// Get slice lexicons from cache with database fallback 200 - async fn get_slice_lexicons(&self, slice_uri: &str) -> Result<Option<Vec<serde_json::Value>>, anyhow::Error> { 242 + async fn get_slice_lexicons( 243 + &self, 244 + slice_uri: &str, 245 + ) -> Result<Option<Vec<serde_json::Value>>, anyhow::Error> { 201 246 // Try cache first 202 247 let cache_result = { 203 248 let mut cache = self.lexicon_cache.lock().await; ··· 211 256 match self.database.get_lexicons_by_slice(slice_uri).await { 212 257 Ok(lexicons) if !lexicons.is_empty() => { 213 258 // Cache the result 214 - let _ = self.lexicon_cache.lock().await.cache_lexicons(slice_uri, &lexicons).await; 259 + let _ = self 260 + .lexicon_cache 261 + .lock() 262 + .await 263 + .cache_lexicons(slice_uri, &lexicons) 264 + .await; 215 265 Ok(Some(lexicons)) 216 266 } 217 267 Ok(_) => Ok(None), // Empty lexicons 218 - Err(e) => Err(e.into()) 268 + Err(e) => Err(e.into()), 219 269 } 220 270 } 221 - Err(e) => Err(e) 271 + Err(e) => Err(e), 222 272 } 223 273 } 224 274 async fn handle_commit_event( ··· 245 295 // Special handling for network.slices.lexicon records 246 296 // These should only be indexed to the slice specified in their JSON data 247 297 if commit.collection == "network.slices.lexicon" { 248 - if let Some(target_slice_uri) = commit.record.get("slice").and_then(|v| v.as_str()) { 298 + if let Some(target_slice_uri) = 299 + commit.record.get("slice").and_then(|v| v.as_str()) 300 + { 249 301 // Skip this slice if it's not the target slice for this lexicon 250 302 if slice_uri != target_slice_uri { 251 303 continue; ··· 293 345 let lexicons = match self.get_slice_lexicons(&slice_uri).await { 294 346 Ok(Some(lexicons)) => lexicons, 295 347 Ok(None) => { 296 - info!("No lexicons found for slice {} - skipping validation", slice_uri); 348 + info!( 349 + "No lexicons found for slice {} - skipping validation", 350 + slice_uri 351 + ); 297 352 continue; 298 353 } 299 354 Err(e) => { ··· 303 358 }; 304 359 305 360 // Validate the record against the slice's lexicons 306 - let validation_result = match slices_lexicon::validate_record(lexicons.clone(), &commit.collection, commit.record.clone()) { 361 + let validation_result = match slices_lexicon::validate_record( 362 + lexicons.clone(), 363 + &commit.collection, 364 + commit.record.clone(), 365 + ) { 307 366 Ok(_) => { 308 - info!("✓ Record validated for collection {} in slice {}", commit.collection, slice_uri); 367 + info!( 368 + "Record validated for collection {} in slice {}", 369 + commit.collection, slice_uri 370 + ); 309 371 true 310 372 } 311 373 Err(e) => { 312 - let message = format!("Validation failed for collection {} in slice {}", commit.collection, slice_uri); 313 - error!("✗ {}: {}", message, e); 314 - Logger::global().log_jetstream_with_slice(LogLevel::Warn, &message, Some(serde_json::json!({ 315 - "collection": commit.collection, 316 - "slice_uri": slice_uri, 317 - "did": did 318 - })), Some(&slice_uri)); 374 + let message = format!( 375 + "Validation failed for collection {} in slice {}", 376 + commit.collection, slice_uri 377 + ); 378 + error!("{}: {}", message, e); 379 + Logger::global().log_jetstream_with_slice( 380 + LogLevel::Warn, 381 + &message, 382 + Some(serde_json::json!({ 383 + "collection": commit.collection, 384 + "slice_uri": slice_uri, 385 + "did": did 386 + })), 387 + Some(&slice_uri), 388 + ); 319 389 false 320 390 } 321 391 }; ··· 326 396 327 397 if is_primary_collection { 328 398 // Primary collection - ensure actor exists and index ALL records 329 - info!("✓ Primary collection {} for slice {} (domain: {}) - indexing record", 330 - commit.collection, slice_uri, domain); 399 + info!( 400 + "Primary collection {} for slice {} (domain: {}) - indexing record", 401 + commit.collection, slice_uri, domain 402 + ); 331 403 332 404 // Ensure actor exists for primary collections 333 - let is_cached = matches!(self.is_actor_cached(did, &slice_uri).await, Ok(Some(_))); 405 + let is_cached = 406 + matches!(self.is_actor_cached(did, &slice_uri).await, Ok(Some(_))); 334 407 335 408 if !is_cached { 336 409 // Actor not in cache - create it ··· 352 425 } else { 353 426 // Add to cache after successful database insert 354 427 self.cache_actor_exists(did, &slice_uri).await; 355 - info!("✓ Created actor {} for slice {}", did, slice_uri); 428 + info!("Created actor {} for slice {}", did, slice_uri); 356 429 } 357 430 } 358 431 Err(e) => { ··· 381 454 format!("Record updated in {}", commit.collection) 382 455 }; 383 456 let operation = if is_insert { "insert" } else { "update" }; 384 - Logger::global().log_jetstream_with_slice(LogLevel::Info, &message, Some(serde_json::json!({ 385 - "operation": operation, 386 - "collection": commit.collection, 387 - "slice_uri": slice_uri, 388 - "did": did, 389 - "record_type": "primary" 390 - })), Some(&slice_uri)); 457 + Logger::global().log_jetstream_with_slice( 458 + LogLevel::Info, 459 + &message, 460 + Some(serde_json::json!({ 461 + "operation": operation, 462 + "collection": commit.collection, 463 + "slice_uri": slice_uri, 464 + "did": did, 465 + "record_type": "primary" 466 + })), 467 + Some(&slice_uri), 468 + ); 391 469 } 392 470 Err(e) => { 393 471 let message = "Failed to insert/update record"; 394 - Logger::global().log_jetstream_with_slice(LogLevel::Error, message, Some(serde_json::json!({ 395 - "operation": "upsert", 396 - "collection": commit.collection, 397 - "slice_uri": slice_uri, 398 - "did": did, 399 - "error": e.to_string(), 400 - "record_type": "primary" 401 - })), Some(&slice_uri)); 472 + Logger::global().log_jetstream_with_slice( 473 + LogLevel::Error, 474 + message, 475 + Some(serde_json::json!({ 476 + "operation": "upsert", 477 + "collection": commit.collection, 478 + "slice_uri": slice_uri, 479 + "did": did, 480 + "error": e.to_string(), 481 + "record_type": "primary" 482 + })), 483 + Some(&slice_uri), 484 + ); 402 485 return Err(anyhow::anyhow!("Database error: {}", e)); 403 486 } 404 487 } 405 488 406 - info!("✓ Successfully indexed {} record from primary collection: {}", 407 - commit.operation, uri); 489 + info!( 490 + "Successfully indexed {} record from primary collection: {}", 491 + commit.operation, uri 492 + ); 408 493 break; 409 494 } else { 410 495 // External collection - we already checked actor status, so just index 411 - info!("✓ External collection {} - DID {} is actor in slice {} - indexing", 412 - commit.collection, did, slice_uri); 496 + info!( 497 + "External collection {} - DID {} is actor in slice {} - indexing", 498 + commit.collection, did, slice_uri 499 + ); 413 500 414 501 let uri = format!("at://{}/{}/{}", did, commit.collection, commit.rkey); 415 502 ··· 431 518 format!("Record updated in {}", commit.collection) 432 519 }; 433 520 let operation = if is_insert { "insert" } else { "update" }; 434 - Logger::global().log_jetstream_with_slice(LogLevel::Info, &message, Some(serde_json::json!({ 435 - "operation": operation, 436 - "collection": commit.collection, 437 - "slice_uri": slice_uri, 438 - "did": did, 439 - "record_type": "external" 440 - })), Some(&slice_uri)); 521 + Logger::global().log_jetstream_with_slice( 522 + LogLevel::Info, 523 + &message, 524 + Some(serde_json::json!({ 525 + "operation": operation, 526 + "collection": commit.collection, 527 + "slice_uri": slice_uri, 528 + "did": did, 529 + "record_type": "external" 530 + })), 531 + Some(&slice_uri), 532 + ); 441 533 } 442 534 Err(e) => { 443 535 let message = "Failed to insert/update record"; 444 - Logger::global().log_jetstream_with_slice(LogLevel::Error, message, Some(serde_json::json!({ 445 - "operation": "upsert", 446 - "collection": commit.collection, 447 - "slice_uri": slice_uri, 448 - "did": did, 449 - "error": e.to_string(), 450 - "record_type": "external" 451 - })), Some(&slice_uri)); 536 + Logger::global().log_jetstream_with_slice( 537 + LogLevel::Error, 538 + message, 539 + Some(serde_json::json!({ 540 + "operation": "upsert", 541 + "collection": commit.collection, 542 + "slice_uri": slice_uri, 543 + "did": did, 544 + "error": e.to_string(), 545 + "record_type": "external" 546 + })), 547 + Some(&slice_uri), 548 + ); 452 549 return Err(anyhow::anyhow!("Database error: {}", e)); 453 550 } 454 551 } 455 552 456 - info!("✓ Successfully indexed {} record from external collection: {}", 457 - commit.operation, uri); 553 + info!( 554 + "Successfully indexed {} record from external collection: {}", 555 + commit.operation, uri 556 + ); 458 557 break; 459 558 } 460 559 } ··· 527 626 match self.database.delete_record_by_uri(&uri, None).await { 528 627 Ok(rows_affected) => { 529 628 if rows_affected > 0 { 530 - info!("✓ Deleted record: {} ({} rows) for {} slice(s)", uri, rows_affected, relevant_slices.len()); 629 + info!( 630 + "Deleted record: {} ({} rows) for {} slice(s)", 631 + uri, 632 + rows_affected, 633 + relevant_slices.len() 634 + ); 531 635 let message = format!("Record deleted from {}", commit.collection); 532 636 533 637 // Log to each relevant slice and check if actor cleanup is needed ··· 542 646 "uri": uri, 543 647 "rows_affected": rows_affected 544 648 })), 545 - Some(slice_uri) 649 + Some(slice_uri), 546 650 ); 547 651 } 548 652 ··· 555 659 match self.database.delete_actor(did, slice_uri).await { 556 660 Ok(deleted) => { 557 661 if deleted > 0 { 558 - info!("✓ Cleaned up actor {} from slice {} (no records remaining)", did, slice_uri); 662 + info!( 663 + "Cleaned up actor {} from slice {} (no records remaining)", 664 + did, slice_uri 665 + ); 559 666 // Remove from cache 560 667 self.remove_actor_from_cache(did, slice_uri).await; 561 668 } 562 669 } 563 670 Err(e) => { 564 - error!("Failed to delete actor {} from slice {}: {}", did, slice_uri, e); 671 + error!( 672 + "Failed to delete actor {} from slice {}: {}", 673 + did, slice_uri, e 674 + ); 565 675 } 566 676 } 567 677 } 568 678 } 569 679 Err(e) => { 570 - error!("Failed to check if actor {} has records in slice {}: {}", did, slice_uri, e); 680 + error!( 681 + "Failed to check if actor {} has records in slice {}: {}", 682 + did, slice_uri, e 683 + ); 571 684 } 572 685 } 573 686 } ··· 589 702 "uri": uri, 590 703 "error": e.to_string() 591 704 })), 592 - Some(&slice_uri) 705 + Some(&slice_uri), 593 706 ); 594 707 } 595 708 } ··· 597 710 598 711 Ok(()) 599 712 } 600 - 601 713 } 602 714 603 715 impl JetstreamConsumer { ··· 615 727 cursor_handler: Option<Arc<PostgresCursorHandler>>, 616 728 initial_cursor: Option<i64>, 617 729 redis_url: Option<String>, 618 - ) -> Result<Self, SliceError> { 730 + ) -> Result<Self, JetstreamError> { 619 731 let config = ConsumerTaskConfig { 620 732 user_agent: "slice-server/1.0".to_string(), 621 733 compression: false, ··· 634 746 635 747 // Determine cache backend based on Redis URL 636 748 let cache_backend = if let Some(redis_url) = redis_url { 637 - CacheBackend::Redis { url: redis_url, ttl_seconds: None } 749 + CacheBackend::Redis { 750 + url: redis_url, 751 + ttl_seconds: None, 752 + } 638 753 } else { 639 754 CacheBackend::InMemory { ttl_seconds: None } 640 755 }; 641 756 642 757 // Create cache instances 643 758 let actor_cache = Arc::new(Mutex::new( 644 - CacheFactory::create_slice_cache(cache_backend.clone()).await 645 - .map_err(|e| SliceError::JetstreamError { 646 - message: format!("Failed to create actor cache: {}", e) 647 - })? 759 + CacheFactory::create_slice_cache(cache_backend.clone()) 760 + .await 761 + .map_err(|e| JetstreamError::ConnectionFailed { 762 + message: format!("Failed to create actor cache: {}", e), 763 + })?, 648 764 )); 649 765 650 766 let lexicon_cache = Arc::new(Mutex::new( 651 - CacheFactory::create_slice_cache(cache_backend.clone()).await 652 - .map_err(|e| SliceError::JetstreamError { 653 - message: format!("Failed to create lexicon cache: {}", e) 654 - })? 767 + CacheFactory::create_slice_cache(cache_backend.clone()) 768 + .await 769 + .map_err(|e| JetstreamError::ConnectionFailed { 770 + message: format!("Failed to create lexicon cache: {}", e), 771 + })?, 655 772 )); 656 773 657 774 let domain_cache = Arc::new(Mutex::new( 658 - CacheFactory::create_slice_cache(cache_backend.clone()).await 659 - .map_err(|e| SliceError::JetstreamError { 660 - message: format!("Failed to create domain cache: {}", e) 661 - })? 775 + CacheFactory::create_slice_cache(cache_backend.clone()) 776 + .await 777 + .map_err(|e| JetstreamError::ConnectionFailed { 778 + message: format!("Failed to create domain cache: {}", e), 779 + })?, 662 780 )); 663 781 664 782 let collections_cache = Arc::new(Mutex::new( 665 - CacheFactory::create_slice_cache(cache_backend).await 666 - .map_err(|e| SliceError::JetstreamError { 667 - message: format!("Failed to create collections cache: {}", e) 668 - })? 783 + CacheFactory::create_slice_cache(cache_backend) 784 + .await 785 + .map_err(|e| JetstreamError::ConnectionFailed { 786 + message: format!("Failed to create collections cache: {}", e), 787 + })?, 669 788 )); 670 789 671 790 Ok(Self { ··· 683 802 } 684 803 685 804 /// Load slice configurations 686 - pub async fn load_slice_configurations(&self) -> Result<(), SliceError> { 805 + pub async fn load_slice_configurations(&self) -> Result<(), JetstreamError> { 687 806 info!("Jetstream consumer now uses on-demand loading with caching"); 688 807 689 808 // Get all slices and update cached list 690 809 let slices = self.database.get_all_slices().await?; 691 810 *self.slices_list.write().await = slices.clone(); 692 - info!("Found {} total slices in database - data will be loaded on-demand", slices.len()); 811 + info!( 812 + "Found {} total slices in database - data will be loaded on-demand", 813 + slices.len() 814 + ); 693 815 694 816 Ok(()) 695 817 } 696 818 697 819 /// Preload actor cache to avoid database hits during event processing 698 - async fn preload_actor_cache(&self) -> Result<(), SliceError> { 820 + async fn preload_actor_cache(&self) -> Result<(), JetstreamError> { 699 821 info!("Preloading actor cache..."); 700 822 701 823 let actors = self.database.get_all_actors().await?; ··· 703 825 704 826 match self.actor_cache.lock().await.preload_actors(actors).await { 705 827 Ok(_) => { 706 - info!("✓ Actor cache preloaded successfully"); 828 + info!("Actor cache preloaded successfully"); 707 829 Ok(()) 708 830 } 709 831 Err(e) => { ··· 713 835 } 714 836 } 715 837 716 - 717 838 /// Start consuming events from Jetstream 718 - pub async fn start_consuming(&self, cancellation_token: CancellationToken) -> Result<(), SliceError> { 839 + pub async fn start_consuming( 840 + &self, 841 + cancellation_token: CancellationToken, 842 + ) -> Result<(), JetstreamError> { 719 843 info!("Starting Jetstream consumer"); 720 844 721 845 // Load initial slice configurations ··· 737 861 slices_list: self.slices_list.clone(), 738 862 }); 739 863 740 - self.consumer.register_handler(handler).await 741 - .map_err(|e| SliceError::JetstreamError { 864 + self.consumer.register_handler(handler).await.map_err(|e| { 865 + JetstreamError::ConnectionFailed { 742 866 message: format!("Failed to register event handler: {}", e), 743 - })?; 867 + } 868 + })?; 744 869 745 870 // Start periodic status reporting 746 871 let event_count_for_status = self.event_count.clone(); ··· 749 874 loop { 750 875 interval.tick().await; 751 876 let count = event_count_for_status.load(std::sync::atomic::Ordering::Relaxed); 752 - info!("Jetstream consumer status: {} total events processed", count); 877 + info!( 878 + "Jetstream consumer status: {} total events processed", 879 + count 880 + ); 753 881 } 754 882 }); 755 883 756 884 // Start the consumer 757 885 info!("Starting Jetstream background consumer..."); 758 - let result = self.consumer.run_background(cancellation_token).await 759 - .map_err(|e| SliceError::JetstreamError { 886 + let result = self 887 + .consumer 888 + .run_background(cancellation_token) 889 + .await 890 + .map_err(|e| JetstreamError::ConnectionFailed { 760 891 message: format!("Consumer failed: {}", e), 761 892 }); 762 893

+2 -6

api/src/jetstream_cursor.rs

··· 127 127 { 128 128 Ok(Some(row)) => { 129 129 let time_us = row.time_us; 130 - if time_us > 0 { 131 - Some(time_us) 132 - } else { 133 - None 134 - } 130 + if time_us > 0 { Some(time_us) } else { None } 135 131 } 136 132 Ok(None) => None, 137 133 Err(e) => { ··· 140 136 } 141 137 } 142 138 } 143 - } 139 + }

+355 -206

api/src/jobs.rs

··· 1 - use serde::{Deserialize, Serialize}; 2 - use sqlxmq::{job, CurrentJob, JobRegistry}; 3 - use sqlx::PgPool; 4 - use uuid::Uuid; 5 - use crate::sync::SyncService; 6 - use crate::models::BulkSyncParams; 7 - use crate::logging::LogLevel; 1 + //! Background job system for asynchronous collection synchronization. 2 + //! 3 + //! This module uses sqlxmq (a PostgreSQL-backed message queue) to handle 4 + //! background sync jobs. Jobs are: 5 + //! - Enqueued with deduplication checks (one active job per user+slice) 6 + //! - Executed asynchronously in background workers 7 + //! - Retried up to 5 times on failure 8 + //! - Tracked with detailed logging and result persistence 9 + //! 10 + //! The sync process fetches records from AT Protocol relays and validates them 11 + //! against Lexicon schemas before persisting to the database. 12 + 8 13 use crate::cache; 14 + use crate::logging::LogLevel; 15 + use crate::models::BulkSyncParams; 16 + use crate::sync::SyncService; 17 + use serde::{Deserialize, Serialize}; 9 18 use serde_json::json; 10 - use tracing::{info, error}; 19 + use sqlx::PgPool; 20 + use sqlxmq::{CurrentJob, JobRegistry, job}; 11 21 use std::sync::Arc; 12 22 use tokio::sync::Mutex; 23 + use tracing::{error, info}; 24 + use uuid::Uuid; 13 25 14 - /// Payload for sync jobs 26 + /// Job payload containing all parameters needed to execute a sync job. 15 27 #[derive(Debug, Clone, Serialize, Deserialize)] 16 28 pub struct SyncJobPayload { 29 + /// Unique identifier for tracking this specific job execution 17 30 pub job_id: Uuid, 31 + /// Decentralized identifier of the user requesting the sync 18 32 pub user_did: String, 33 + /// AT-URI of the slice being synchronized 19 34 pub slice_uri: String, 35 + /// Synchronization parameters (collections, repos, validation settings) 20 36 pub params: BulkSyncParams, 21 37 } 22 38 23 - /// Result stored for completed sync jobs 39 + /// Result data persisted after job completion or failure. 40 + /// 41 + /// This is stored in the `job_results` table for historical tracking and 42 + /// status queries. Field names are camelCased for JSON API responses. 24 43 #[derive(Debug, Clone, Serialize, Deserialize)] 25 44 #[serde(rename_all = "camelCase")] 26 45 pub struct SyncJobResult { ··· 31 50 pub message: String, 32 51 } 33 52 34 - /// Initialize the job registry with all job handlers 53 + /// Initializes the sqlxmq job registry with all job handlers. 54 + /// 55 + /// This must be called once at application startup to register job handlers 56 + /// before workers can process jobs from the queue. 57 + /// 58 + /// # Returns 59 + /// A configured JobRegistry containing all registered job handlers 35 60 pub fn registry() -> JobRegistry { 36 61 JobRegistry::new(&[sync_job]) 37 62 } 38 63 39 - /// The sync job handler 64 + /// Background job handler for collection synchronization. 65 + /// 66 + /// This is the main worker function that executes sync jobs from the queue. 67 + /// It performs the following steps: 68 + /// 1. Extracts job payload and validates parameters 69 + /// 2. Initializes sync service with logging and caching 70 + /// 3. Fetches and validates records from AT Protocol relays 71 + /// 4. Persists results to the database 72 + /// 5. Logs detailed progress and completion status 73 + /// 74 + /// # Job Behavior 75 + /// - Channel: `sync_queue` 76 + /// - Retries: Up to 5 attempts on failure 77 + /// - Concurrency: Multiple jobs can run in parallel 78 + /// - Deduplication: Enforced at enqueue time (one active job per user+slice) 79 + /// 80 + /// # Arguments 81 + /// * `current_job` - The sqlxmq job context containing payload and database access 82 + /// 83 + /// # Returns 84 + /// * `Ok(())` - Job completed successfully and marked complete 85 + /// * `Err(...)` - Job failed and will be retried (up to max retry limit) 40 86 #[job(channel_name = "sync_queue")] 41 - async fn sync_job(mut current_job: CurrentJob) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { 42 - let payload: SyncJobPayload = current_job.json()?.expect("Invalid job payload"); 43 - 44 - info!( 45 - "Starting sync job {} for user {} on slice {}", 46 - payload.job_id, payload.user_did, payload.slice_uri 47 - ); 87 + async fn sync_job( 88 + mut current_job: CurrentJob, 89 + ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { 90 + let payload: SyncJobPayload = current_job.json()?.expect("Invalid job payload"); 91 + 92 + info!( 93 + "Starting sync job {} for user {} on slice {}", 94 + payload.job_id, payload.user_did, payload.slice_uri 95 + ); 96 + 97 + // Access database pool and global logger for this job execution 98 + let pool = current_job.pool(); 99 + let logger = crate::logging::Logger::global(); 100 + 101 + // Log job start 102 + logger.log_sync_job( 103 + payload.job_id, 104 + &payload.user_did, 105 + &payload.slice_uri, 106 + LogLevel::Info, 107 + &format!( 108 + "Starting sync job for {} collections", 109 + payload 110 + .params 111 + .collections 112 + .as_ref() 113 + .map(|c| c.len()) 114 + .unwrap_or(0) 115 + + payload 116 + .params 117 + .external_collections 118 + .as_ref() 119 + .map(|c| c.len()) 120 + .unwrap_or(0) 121 + ), 122 + Some(json!({ 123 + "collections": payload.params.collections, 124 + "external_collections": payload.params.external_collections, 125 + "repos": payload.params.repos, 126 + "skip_validation": payload.params.skip_validation 127 + })), 128 + ); 129 + 130 + // Initialize sync service with database, relay endpoint, and caching 131 + let database = crate::database::Database::from_pool(pool.clone()); 132 + let relay_endpoint = std::env::var("RELAY_ENDPOINT") 133 + .unwrap_or_else(|_| "https://relay1.us-west.bsky.network".to_string()); 134 + 135 + // Create in-memory cache for DID resolution with 24-hour TTL to reduce identity lookups 136 + let cache = Arc::new(Mutex::new( 137 + cache::CacheFactory::create_slice_cache(cache::CacheBackend::InMemory { 138 + ttl_seconds: Some(24 * 60 * 60), 139 + }) 140 + .await?, 141 + )); 142 + 143 + let sync_service = SyncService::with_logging_and_cache( 144 + database.clone(), 145 + relay_endpoint, 146 + logger.clone(), 147 + payload.job_id, 148 + payload.user_did.clone(), 149 + cache, 150 + ); 151 + 152 + // Track execution time for performance monitoring 153 + let start_time = std::time::Instant::now(); 48 154 49 - // Get database pool and logger 50 - let pool = current_job.pool(); 51 - let logger = crate::logging::Logger::global(); 52 - 53 - // Log job start 54 - logger.log_sync_job( 55 - payload.job_id, 56 - &payload.user_did, 155 + // Execute the synchronization process 156 + match sync_service 157 + .backfill_collections( 57 158 &payload.slice_uri, 58 - LogLevel::Info, 59 - &format!("Starting sync job for {} collections", 60 - payload.params.collections.as_ref().map(|c| c.len()).unwrap_or(0) + 61 - payload.params.external_collections.as_ref().map(|c| c.len()).unwrap_or(0) 62 - ), 63 - Some(json!({ 64 - "collections": payload.params.collections, 65 - "external_collections": payload.params.external_collections, 66 - "repos": payload.params.repos, 67 - "skip_validation": payload.params.skip_validation 68 - })) 69 - ); 70 - 71 - // Create sync service with logging and cache 72 - let database = crate::database::Database::from_pool(pool.clone()); 73 - let relay_endpoint = std::env::var("RELAY_ENDPOINT") 74 - .unwrap_or_else(|_| "https://relay1.us-west.bsky.network".to_string()); 159 + payload.params.collections.as_deref(), 160 + payload.params.external_collections.as_deref(), 161 + payload.params.repos.as_deref(), 162 + payload.params.skip_validation.unwrap_or(false), 163 + ) 164 + .await 165 + { 166 + Ok((repos_processed, records_synced)) => { 167 + let elapsed = start_time.elapsed(); 168 + let result = SyncJobResult { 169 + success: true, 170 + total_records: records_synced, 171 + collections_synced: [ 172 + payload.params.collections.unwrap_or_default(), 173 + payload.params.external_collections.unwrap_or_default(), 174 + ] 175 + .concat(), 176 + repos_processed, 177 + message: format!("Sync completed successfully in {:?}", elapsed), 178 + }; 75 179 76 - // Create cache for DID resolution (24 hour TTL) 77 - let cache = Arc::new(Mutex::new( 78 - cache::CacheFactory::create_slice_cache( 79 - cache::CacheBackend::InMemory { ttl_seconds: Some(24 * 60 * 60) } 80 - ).await? 81 - )); 180 + // Log completion with detailed metrics for monitoring 181 + logger.log_sync_job( 182 + payload.job_id, 183 + &payload.user_did, 184 + &payload.slice_uri, 185 + LogLevel::Info, 186 + &format!( 187 + "Sync completed successfully: {} repos, {} records in {:?}", 188 + repos_processed, records_synced, elapsed 189 + ), 190 + Some(json!({ 191 + "repos_processed": repos_processed, 192 + "records_synced": records_synced, 193 + "duration_secs": elapsed.as_secs_f64(), 194 + "collections_synced": result.collections_synced 195 + })), 196 + ); 82 197 83 - let sync_service = SyncService::with_logging_and_cache( 84 - database.clone(), 85 - relay_endpoint, 86 - logger.clone(), 87 - payload.job_id, 88 - payload.user_did.clone(), 89 - cache 90 - ); 91 - 92 - // Track progress 93 - let start_time = std::time::Instant::now(); 94 - 95 - // Perform the sync 96 - match sync_service 97 - .backfill_collections( 198 + // Persist job result before marking complete (ensures result is queryable) 199 + store_job_result( 200 + pool, 201 + payload.job_id, 202 + &payload.user_did, 98 203 &payload.slice_uri, 99 - payload.params.collections.as_deref(), 100 - payload.params.external_collections.as_deref(), 101 - payload.params.repos.as_deref(), 102 - payload.params.skip_validation.unwrap_or(false), 204 + &result, 205 + None, 103 206 ) 104 - .await 105 - { 106 - Ok((repos_processed, records_synced)) => { 107 - let elapsed = start_time.elapsed(); 108 - let result = SyncJobResult { 109 - success: true, 110 - total_records: records_synced, 111 - collections_synced: [ 112 - payload.params.collections.unwrap_or_default(), 113 - payload.params.external_collections.unwrap_or_default(), 114 - ].concat(), 115 - repos_processed, 116 - message: format!( 117 - "Sync completed successfully in {:?}", 118 - elapsed 119 - ), 120 - }; 207 + .await?; 121 208 122 - // Log successful completion 123 - logger.log_sync_job( 124 - payload.job_id, 125 - &payload.user_did, 126 - &payload.slice_uri, 127 - LogLevel::Info, 128 - &format!("Sync completed successfully: {} repos, {} records in {:?}", 129 - repos_processed, records_synced, elapsed), 130 - Some(json!({ 131 - "repos_processed": repos_processed, 132 - "records_synced": records_synced, 133 - "duration_secs": elapsed.as_secs_f64(), 134 - "collections_synced": result.collections_synced 135 - })) 136 - ); 209 + info!( 210 + "Sync job {} completed successfully: {} repos, {} records", 211 + payload.job_id, repos_processed, records_synced 212 + ); 137 213 138 - // Store result in database before completing the job 139 - store_job_result( 140 - pool, 141 - payload.job_id, 142 - &payload.user_did, 143 - &payload.slice_uri, 144 - &result, 145 - None, 146 - ).await?; 214 + // CRITICAL: Explicitly mark job as complete to prevent automatic retry 215 + // Without this, sqlxmq will treat the job as failed and retry it 216 + current_job.complete().await?; 147 217 148 - info!( 149 - "Sync job {} completed successfully: {} repos, {} records", 150 - payload.job_id, repos_processed, records_synced 151 - ); 218 + info!( 219 + "Sync job {} marked as complete and will be cleaned up", 220 + payload.job_id 221 + ); 152 222 153 - // CRITICAL: Must explicitly complete the job to prevent it from being retried 154 - current_job.complete().await?; 155 - 156 - info!( 157 - "Sync job {} marked as complete and will be cleaned up", 158 - payload.job_id 159 - ); 223 + Ok(()) 224 + } 225 + Err(e) => { 226 + error!("Sync job {} failed: {}", payload.job_id, e); 160 227 161 - Ok(()) 162 - } 163 - Err(e) => { 164 - error!("Sync job {} failed: {}", payload.job_id, e); 165 - 166 - // Log error 167 - logger.log_sync_job( 168 - payload.job_id, 169 - &payload.user_did, 170 - &payload.slice_uri, 171 - LogLevel::Error, 172 - &format!("Sync job failed: {}", e), 173 - Some(json!({ 174 - "error": e.to_string(), 175 - "duration_secs": start_time.elapsed().as_secs_f64() 176 - })) 177 - ); 178 - 179 - let result = SyncJobResult { 180 - success: false, 181 - total_records: 0, 182 - collections_synced: vec![], 183 - repos_processed: 0, 184 - message: format!("Sync failed: {}", e), 185 - }; 228 + // Log error details for debugging and user visibility 229 + logger.log_sync_job( 230 + payload.job_id, 231 + &payload.user_did, 232 + &payload.slice_uri, 233 + LogLevel::Error, 234 + &format!("Sync job failed: {}", e), 235 + Some(json!({ 236 + "error": e.to_string(), 237 + "duration_secs": start_time.elapsed().as_secs_f64() 238 + })), 239 + ); 186 240 187 - // Store error result before returning error 188 - if let Err(db_err) = store_job_result( 189 - pool, 190 - payload.job_id, 191 - &payload.user_did, 192 - &payload.slice_uri, 193 - &result, 194 - Some(&format!("{}", e)), 195 - ).await { 196 - error!("Failed to store job result: {}", db_err); 197 - } 241 + let result = SyncJobResult { 242 + success: false, 243 + total_records: 0, 244 + collections_synced: vec![], 245 + repos_processed: 0, 246 + message: format!("Sync failed: {}", e), 247 + }; 198 248 199 - // Return error to trigger retry 200 - Err(Box::new(e)) 249 + // Persist failure result even if job will retry (for status tracking) 250 + if let Err(db_err) = store_job_result( 251 + pool, 252 + payload.job_id, 253 + &payload.user_did, 254 + &payload.slice_uri, 255 + &result, 256 + Some(&format!("{}", e)), 257 + ) 258 + .await 259 + { 260 + error!("Failed to store job result: {}", db_err); 201 261 } 262 + 263 + // Return error to trigger sqlxmq's automatic retry mechanism (up to 5 attempts) 264 + Err(Box::new(e)) 202 265 } 266 + } 203 267 } 204 268 205 - /// Store job result in the database for later retrieval 269 + /// Persists job result to the database for status queries and historical tracking. 270 + /// 271 + /// This is called both on success and failure to ensure result data is available 272 + /// via the job status API. Uses UPSERT to handle retries (updates existing result). 273 + /// 274 + /// # Arguments 275 + /// * `pool` - PostgreSQL connection pool 276 + /// * `job_id` - Unique identifier for the job 277 + /// * `user_did` - User who initiated the job 278 + /// * `slice_uri` - Slice being synchronized 279 + /// * `result` - Job result data (success/failure, metrics) 280 + /// * `error_message` - Optional error details for failed jobs 281 + /// 282 + /// # Returns 283 + /// * `Ok(())` - Result stored successfully 284 + /// * `Err(sqlx::Error)` - Database error during insert/update 206 285 async fn store_job_result( 207 286 pool: &PgPool, 208 287 job_id: Uuid, ··· 215 294 "Storing job result: job_id={}, user_did={}, slice_uri={}, success={}", 216 295 job_id, user_did, slice_uri, result.success 217 296 ); 218 - 297 + 298 + // Convert collections list to JSONB for storage 219 299 let collections_json = serde_json::to_value(&result.collections_synced) 220 300 .map_err(|e| sqlx::Error::Protocol(format!("Failed to serialize collections: {}", e)))?; 221 301 302 + // UPSERT: insert new result or update existing on retry 222 303 sqlx::query!( 223 304 r#" 224 305 INSERT INTO job_results ( 225 306 job_id, user_did, slice_uri, status, success, total_records, 226 307 collections_synced, repos_processed, message, error_message 227 308 ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) 228 - ON CONFLICT (job_id) 309 + ON CONFLICT (job_id) 229 310 DO UPDATE SET 230 311 status = EXCLUDED.status, 231 312 success = EXCLUDED.success, ··· 239 320 job_id, 240 321 user_did, 241 322 slice_uri, 242 - if result.success { "completed" } else { "failed" }, 323 + if result.success { 324 + "completed" 325 + } else { 326 + "failed" 327 + }, 243 328 result.success, 244 329 result.total_records, 245 330 collections_json, ··· 253 338 Ok(()) 254 339 } 255 340 256 - /// Enqueue a new sync job 341 + /// Enqueues a new sync job with deduplication checks. 342 + /// 343 + /// This function ensures only one active sync job exists per user+slice combination 344 + /// by checking both the message queue and recent job results. This prevents: 345 + /// - Duplicate jobs competing for the same data 346 + /// - Wasted resources on redundant syncs 347 + /// - Race conditions in record persistence 348 + /// 349 + /// # Arguments 350 + /// * `pool` - PostgreSQL connection pool 351 + /// * `user_did` - Decentralized identifier of the user 352 + /// * `slice_uri` - AT-URI of the slice to synchronize 353 + /// * `params` - Sync parameters (collections, repos, validation settings) 354 + /// 355 + /// # Returns 356 + /// * `Ok(Uuid)` - Job ID of the newly enqueued job 357 + /// * `Err(...)` - Error if job already running or enqueue fails 358 + /// 359 + /// # Deduplication Strategy 360 + /// 1. Check for pending jobs in `mq_msgs` queue 361 + /// 2. Check for recent jobs (< 10 min) without results 362 + /// 3. Reject if either check finds an active job 257 363 pub async fn enqueue_sync_job( 258 364 pool: &PgPool, 259 365 user_did: String, 260 366 slice_uri: String, 261 367 params: BulkSyncParams, 262 368 ) -> Result<Uuid, Box<dyn std::error::Error + Send + Sync>> { 263 - // Check if there's already a running sync job for this user+slice combination 264 - // We do this by checking: 265 - // 1. If there are any jobs in mq_msgs for sync_queue channel that haven't been processed yet 266 - // 2. If there are any recent job_results entries that indicate a job might still be running 369 + // Deduplication check 1: Look for pending jobs in the message queue 370 + // This catches jobs that haven't started executing yet 267 371 let existing_running_msg = sqlx::query!( 268 372 r#" 269 - SELECT m.id 373 + SELECT m.id 270 374 FROM mq_msgs m 271 375 JOIN mq_payloads p ON m.id = p.id 272 376 WHERE m.channel_name = 'sync_queue' 273 377 AND m.id != '00000000-0000-0000-0000-000000000000' 274 - AND p.payload_json->>'user_did' = $1 378 + AND p.payload_json->>'user_did' = $1 275 379 AND p.payload_json->>'slice_uri' = $2 276 380 AND m.attempt_at <= NOW() 277 381 "#, ··· 281 385 .fetch_optional(pool) 282 386 .await?; 283 387 284 - // Also check if there's a very recent job that might still be running 285 - // (within the last 10 minutes and no completion record) 388 + // Deduplication check 2: Look for recently started jobs without results 389 + // This catches jobs that started but haven't written results yet (< 10 min) 286 390 let recent_start = sqlx::query!( 287 391 r#" 288 392 SELECT m.id ··· 291 395 LEFT JOIN job_results jr ON (p.payload_json->>'job_id')::uuid = jr.job_id 292 396 WHERE m.channel_name = 'sync_queue' 293 397 AND m.id != '00000000-0000-0000-0000-000000000000' 294 - AND p.payload_json->>'user_did' = $1 398 + AND p.payload_json->>'user_did' = $1 295 399 AND p.payload_json->>'slice_uri' = $2 296 400 AND m.created_at > NOW() - INTERVAL '10 minutes' 297 401 AND jr.job_id IS NULL ··· 306 410 return Err("A sync job is already running for this slice. Please wait for it to complete before starting another.".into()); 307 411 } 308 412 413 + // Generate unique job ID for tracking and result storage 309 414 let job_id = Uuid::new_v4(); 310 - 415 + 311 416 let payload = SyncJobPayload { 312 417 job_id, 313 418 user_did: user_did.clone(), 314 419 slice_uri: slice_uri.clone(), 315 420 params, 316 421 }; 317 - 318 - // Spawn the job using the correct builder pattern 319 - let job_uuid = sync_job.builder() 320 - .set_json(&payload)? 321 - .spawn(pool) 322 - .await?; 323 - 422 + 423 + // Enqueue the job using sqlxmq's builder pattern 424 + let job_uuid = sync_job.builder().set_json(&payload)?.spawn(pool).await?; 425 + 324 426 info!( 325 427 "Enqueued sync job {} (queue id: {}) for user {}", 326 428 job_id, job_uuid, user_did 327 429 ); 328 - 430 + 329 431 Ok(job_id) 330 432 } 331 433 332 - /// Check the status of a sync job 434 + /// Status information for a sync job, including progress and results. 435 + /// 436 + /// This combines data from both the message queue (for pending jobs) and 437 + /// the job_results table (for completed jobs). Field names are camelCased 438 + /// for JSON API responses. 333 439 #[derive(Debug, Serialize, Deserialize)] 334 440 #[serde(rename_all = "camelCase")] 335 441 pub struct JobStatus { 442 + /// Unique identifier for the job 336 443 pub job_id: Uuid, 444 + /// Current status: "pending", "running", "completed", or "failed" 337 445 pub status: String, 446 + /// Timestamp when job was enqueued 338 447 pub created_at: chrono::DateTime<chrono::Utc>, 448 + /// Timestamp when job execution started (None if still pending) 339 449 pub started_at: Option<chrono::DateTime<chrono::Utc>>, 450 + /// Timestamp when job finished (None if still running) 340 451 pub completed_at: Option<chrono::DateTime<chrono::Utc>>, 452 + /// Detailed result data (None if still running) 341 453 pub result: Option<SyncJobResult>, 454 + /// Error message if job failed 342 455 pub error: Option<String>, 456 + /// Number of retry attempts remaining (5 max) 343 457 pub retry_count: i32, 344 458 } 345 459 346 - pub async fn get_job_status( 347 - pool: &PgPool, 348 - job_id: Uuid, 349 - ) -> Result<Option<JobStatus>, sqlx::Error> { 350 - // First, check if we have a stored result for this job 460 + /// Retrieves the current status of a sync job. 461 + /// 462 + /// This function checks both the job_results table (for completed jobs) and 463 + /// the message queue (for pending/running jobs) to provide comprehensive status. 464 + /// 465 + /// # Arguments 466 + /// * `pool` - PostgreSQL connection pool 467 + /// * `job_id` - Unique identifier of the job to query 468 + /// 469 + /// # Returns 470 + /// * `Ok(Some(JobStatus))` - Job found with current status 471 + /// * `Ok(None)` - Job not found (may have been cleaned up) 472 + /// * `Err(sqlx::Error)` - Database query error 473 + pub async fn get_job_status(pool: &PgPool, job_id: Uuid) -> Result<Option<JobStatus>, sqlx::Error> { 474 + // Priority 1: Check for completed job result (most common case) 351 475 let result_row = sqlx::query!( 352 476 r#" 353 - SELECT 477 + SELECT 354 478 job_id, user_did, slice_uri, status, success, total_records, 355 479 collections_synced, repos_processed, message, error_message, 356 480 created_at, completed_at 357 - FROM job_results 481 + FROM job_results 358 482 WHERE job_id = $1 359 483 "#, 360 484 job_id ··· 363 487 .await?; 364 488 365 489 if let Some(result) = result_row { 366 - // We have a stored result, return it 367 - let collections_synced: Vec<String> = serde_json::from_value(result.collections_synced) 368 - .unwrap_or_default(); 490 + // Found completed job, construct status from result data 491 + let collections_synced: Vec<String> = 492 + serde_json::from_value(result.collections_synced).unwrap_or_default(); 369 493 370 494 return Ok(Some(JobStatus { 371 495 job_id, ··· 385 509 })); 386 510 } 387 511 388 - // No stored result, check if job is still in queue 512 + // Priority 2: Check message queue for pending/running jobs 389 513 let queue_row = sqlx::query!( 390 514 r#" 391 - SELECT 515 + SELECT 392 516 m.id, 393 517 m.created_at, 394 518 m.attempt_at, 395 519 m.attempts, 396 520 p.payload_json 397 - FROM mq_msgs m 521 + FROM mq_msgs m 398 522 LEFT JOIN mq_payloads p ON m.id = p.id 399 523 WHERE p.payload_json::jsonb ->> 'job_id' = $1 400 524 "#, ··· 405 529 406 530 match queue_row { 407 531 Some(row) => { 532 + // Determine status based on attempt_at timestamp 408 533 let status = if row.attempt_at.is_none() { 409 534 "completed".to_string() 410 535 } else if let Some(attempt_at) = row.attempt_at { ··· 421 546 job_id, 422 547 status: status.clone(), 423 548 created_at: row.created_at.unwrap_or_else(chrono::Utc::now), 424 - started_at: if status == "running" || status == "completed" { row.created_at } else { None }, 425 - completed_at: if status == "completed" { row.attempt_at } else { None }, 549 + started_at: if status == "running" || status == "completed" { 550 + row.created_at 551 + } else { 552 + None 553 + }, 554 + completed_at: if status == "completed" { 555 + row.attempt_at 556 + } else { 557 + None 558 + }, 426 559 result: None, 427 560 error: None, 428 561 retry_count: 5 - row.attempts, 429 562 })) 430 - }, 563 + } 431 564 None => { 432 - // Job not found in queue or results - it might not exist 565 + // Job not found anywhere - either never existed or was cleaned up 433 566 Ok(None) 434 567 } 435 568 } 436 569 } 437 570 438 - /// Get job results for a specific slice, ordered by most recent first 571 + /// Retrieves job history for a specific user and slice combination. 572 + /// 573 + /// This returns both completed jobs (from job_results) and pending/running jobs 574 + /// (from the message queue), ordered by creation time (most recent first). 575 + /// Useful for displaying sync history in the UI. 576 + /// 577 + /// # Arguments 578 + /// * `pool` - PostgreSQL connection pool 579 + /// * `user_did` - User's decentralized identifier 580 + /// * `slice_uri` - AT-URI of the slice 581 + /// * `limit` - Optional maximum number of results (default: 10) 582 + /// 583 + /// # Returns 584 + /// * `Ok(Vec<JobStatus>)` - List of job statuses ordered by recency 585 + /// * `Err(sqlx::Error)` - Database query error 439 586 pub async fn get_slice_job_history( 440 587 pool: &PgPool, 441 588 user_did: &str, ··· 449 596 user_did, slice_uri, limit 450 597 ); 451 598 452 - // Get both completed jobs and pending jobs 599 + // Query combines completed jobs (job_results) and pending jobs (mq_msgs) via UNION 453 600 let rows = sqlx::query!( 454 601 r#" 455 602 -- Completed jobs from job_results ··· 499 646 .fetch_all(pool) 500 647 .await?; 501 648 649 + // Transform database rows into JobStatus structs 502 650 let mut results = Vec::new(); 503 651 for row in rows { 504 652 let collections_synced: Vec<String> = serde_json::from_value( 505 - row.collections_synced.unwrap_or_else(|| serde_json::json!([])) 506 - ).unwrap_or_default(); 653 + row.collections_synced 654 + .unwrap_or_else(|| serde_json::json!([])), 655 + ) 656 + .unwrap_or_default(); 507 657 508 - // Handle both completed and pending jobs 658 + // Differentiate between pending jobs (no result data) and completed jobs 509 659 let result = if row.job_type.as_deref() == Some("pending") || row.success.is_none() { 510 660 // This is a pending job - no result data available 511 661 None ··· 534 684 535 685 Ok(results) 536 686 } 537 -

+258 -75

api/src/logging.rs

··· 1 + //! Batched logging system for high-throughput database log persistence. 2 + //! 3 + //! This module provides an async, batched logging system that: 4 + //! - Queues log entries in memory using an unbounded channel 5 + //! - Flushes to PostgreSQL in batches (every 5 seconds or 100 entries) 6 + //! - Maintains a global singleton logger instance 7 + //! - Supports different log types (sync jobs, Jetstream events, system logs) 8 + //! - Automatically cleans up old logs (1 day for Jetstream, 7 days for jobs) 9 + //! 10 + //! The batching approach significantly reduces database load during high-throughput 11 + //! scenarios like Jetstream event processing. 12 + 13 + use chrono::Utc; 1 14 use serde_json::Value; 2 15 use sqlx::PgPool; 3 - use uuid::Uuid; 4 - use tokio::sync::mpsc; 5 - use tokio::time::{interval, Duration}; 6 - use tracing::{info, warn, error}; 7 - use chrono::Utc; 8 16 use std::sync::OnceLock; 17 + use tokio::sync::mpsc; 18 + use tokio::time::{Duration, interval}; 19 + use tracing::{error, info, warn}; 20 + use uuid::Uuid; 9 21 22 + /// Log severity levels for structured logging. 10 23 #[derive(Debug, Clone)] 11 24 pub enum LogLevel { 12 25 Info, ··· 15 28 } 16 29 17 30 impl LogLevel { 31 + /// Returns the string representation of the log level. 18 32 pub fn as_str(&self) -> &'static str { 19 33 match self { 20 34 LogLevel::Info => "info", ··· 24 38 } 25 39 } 26 40 41 + /// Categories of log entries for filtering and organization. 27 42 #[derive(Debug, Clone)] 28 43 #[allow(dead_code)] 29 44 pub enum LogType { 45 + /// Background sync job logs (user-initiated collection sync) 30 46 SyncJob, 47 + /// Real-time Jetstream event processing logs 31 48 Jetstream, 49 + /// System-level operational logs 32 50 System, 33 51 } 34 52 35 53 impl LogType { 54 + /// Returns the string representation of the log type. 36 55 pub fn as_str(&self) -> &'static str { 37 56 match self { 38 57 LogType::SyncJob => "sync_job", ··· 42 61 } 43 62 } 44 63 45 - /// Global logger instance 64 + /// Global singleton logger instance, initialized once at application startup. 46 65 static GLOBAL_LOGGER: OnceLock<Logger> = OnceLock::new(); 47 66 48 - /// Log entry to be queued for batch insertion 67 + /// Internal representation of a log entry pending database insertion. 68 + /// 69 + /// These entries are queued in memory and flushed in batches to reduce 70 + /// database round-trips and improve throughput. 49 71 #[derive(Debug, Clone)] 50 72 struct QueuedLogEntry { 51 73 log_type: String, ··· 58 80 created_at: chrono::DateTime<chrono::Utc>, 59 81 } 60 82 61 - /// Logger that queues log entries and flushes them periodically 83 + /// Batched logger that queues log entries and flushes them periodically. 84 + /// 85 + /// This logger uses an unbounded channel to queue log entries, which are then 86 + /// flushed to the database by a background worker. The worker flushes when: 87 + /// - 100 entries accumulate (batch size threshold) 88 + /// - 5 seconds elapse (time-based threshold) 89 + /// - The channel is closed (graceful shutdown) 90 + /// 91 + /// Logs are also immediately written to stdout via the `tracing` crate for 92 + /// real-time visibility during development and debugging. 62 93 #[derive(Clone)] 63 94 pub struct Logger { 64 95 sender: mpsc::UnboundedSender<QueuedLogEntry>, 65 96 } 66 97 67 98 impl Logger { 68 - /// Create a new batched logger and spawn the background worker 99 + /// Creates a new batched logger and spawns the background worker task. 100 + /// 101 + /// The background worker runs for the lifetime of the application, processing 102 + /// the log queue and flushing to the database. 103 + /// 104 + /// # Arguments 105 + /// * `pool` - PostgreSQL connection pool for database writes 69 106 pub fn new(pool: PgPool) -> Self { 70 107 let (sender, receiver) = mpsc::unbounded_channel(); 71 - 72 - // Spawn background worker 108 + 109 + // Spawn background worker that will run for the lifetime of the application 73 110 tokio::spawn(Self::background_worker(receiver, pool)); 74 - 111 + 75 112 Self { sender } 76 113 } 77 114 78 - /// Initialize the global logger (call once at startup) 115 + /// Initializes the global logger singleton. 116 + /// 117 + /// This should be called once at application startup before any logging occurs. 118 + /// Subsequent calls will be ignored with a warning. 119 + /// 120 + /// # Arguments 121 + /// * `pool` - PostgreSQL connection pool for database writes 122 + /// 123 + /// # Example 124 + /// ```ignore 125 + /// Logger::init_global(pool.clone()); 126 + /// let logger = Logger::global(); 127 + /// logger.log_jetstream(LogLevel::Info, "Started", None); 128 + /// ``` 79 129 pub fn init_global(pool: PgPool) { 80 130 let logger = Self::new(pool); 81 131 if GLOBAL_LOGGER.set(logger).is_err() { ··· 83 133 } 84 134 } 85 135 86 - /// Get the global logger instance 136 + /// Returns a reference to the global logger instance. 137 + /// 138 + /// # Panics 139 + /// Panics if called before `init_global()`. Ensure the logger is initialized 140 + /// during application startup. 87 141 pub fn global() -> &'static Logger { 88 - GLOBAL_LOGGER.get().expect("Global logger not initialized - call Logger::init_global() first") 142 + GLOBAL_LOGGER 143 + .get() 144 + .expect("Global logger not initialized - call Logger::init_global() first") 89 145 } 90 - 91 - /// Log a sync job message (queued for batch insertion) 146 + 147 + /// Logs a sync job message, queuing it for batched database insertion. 148 + /// 149 + /// Sync job logs track the progress of background synchronization tasks where 150 + /// users fetch their collection data from their PDS. 151 + /// 152 + /// # Arguments 153 + /// * `job_id` - Unique identifier for the sync job 154 + /// * `user_did` - Decentralized identifier of the user being synced 155 + /// * `slice_uri` - AT-URI of the slice being synchronized 156 + /// * `level` - Log severity level 157 + /// * `message` - Human-readable log message 158 + /// * `metadata` - Optional structured metadata (JSON) 159 + /// 160 + /// # Behavior 161 + /// - Immediately writes to stdout via `tracing` for real-time visibility 162 + /// - Queues the entry for batch insertion to the database 163 + /// - Send failures are silently ignored (if channel is closed) 92 164 pub fn log_sync_job( 93 165 &self, 94 166 job_id: Uuid, ··· 108 180 metadata, 109 181 created_at: Utc::now(), 110 182 }; 111 - 112 - // Also log to tracing for immediate console output 183 + 184 + // Write to stdout immediately for real-time monitoring and debugging 113 185 match level { 114 186 LogLevel::Info => info!("[sync_job] {}", message), 115 187 LogLevel::Warn => warn!("[sync_job] {}", message), 116 188 LogLevel::Error => error!("[sync_job] {}", message), 117 189 } 118 - 119 - // Queue for database insertion (ignore send errors if channel closed) 190 + 191 + // Queue for batch database insertion (ignore send errors if channel closed) 120 192 let _ = self.sender.send(entry); 121 193 } 122 - 123 - /// Log a jetstream message (queued for batch insertion) 124 - pub fn log_jetstream( 125 - &self, 126 - level: LogLevel, 127 - message: &str, 128 - metadata: Option<Value>, 129 - ) { 194 + 195 + /// Logs a Jetstream message without slice context. 196 + /// 197 + /// This is a convenience wrapper around `log_jetstream_with_slice` for 198 + /// global Jetstream events (e.g., connection status, errors). 199 + /// 200 + /// # Arguments 201 + /// * `level` - Log severity level 202 + /// * `message` - Human-readable log message 203 + /// * `metadata` - Optional structured metadata (JSON) 204 + pub fn log_jetstream(&self, level: LogLevel, message: &str, metadata: Option<Value>) { 130 205 self.log_jetstream_with_slice(level, message, metadata, None); 131 206 } 132 207 133 - /// Log a jetstream message with slice context (queued for batch insertion) 208 + /// Logs a Jetstream message with optional slice context. 209 + /// 210 + /// Jetstream logs track real-time event processing from the AT Protocol firehose. 211 + /// Including `slice_uri` associates the log with a specific slice's event processing. 212 + /// 213 + /// # Arguments 214 + /// * `level` - Log severity level 215 + /// * `message` - Human-readable log message 216 + /// * `metadata` - Optional structured metadata (JSON) 217 + /// * `slice_uri` - Optional AT-URI to associate this log with a specific slice 218 + /// 219 + /// # Behavior 220 + /// - Immediately writes to stdout via `tracing` for real-time visibility 221 + /// - Queues the entry for batch insertion to the database 222 + /// - Send failures are silently ignored (if channel is closed) 134 223 pub fn log_jetstream_with_slice( 135 224 &self, 136 225 level: LogLevel, ··· 148 237 metadata, 149 238 created_at: Utc::now(), 150 239 }; 151 - 152 - // Also log to tracing for immediate console output 240 + 241 + // Write to stdout immediately for real-time monitoring and debugging 153 242 match level { 154 243 LogLevel::Info => info!("[jetstream] {}", message), 155 244 LogLevel::Warn => warn!("[jetstream] {}", message), 156 245 LogLevel::Error => error!("[jetstream] {}", message), 157 246 } 158 - 159 - // Queue for database insertion (ignore send errors if channel closed) 247 + 248 + // Queue for batch database insertion (ignore send errors if channel closed) 160 249 let _ = self.sender.send(entry); 161 250 } 162 - 163 - /// Background worker that processes the log queue 251 + 252 + /// Background worker that processes the log queue and flushes to the database. 253 + /// 254 + /// This worker runs in a dedicated tokio task and flushes batches when: 255 + /// - 100 entries accumulate (to prevent unbounded memory growth) 256 + /// - 5 seconds elapse (to ensure timely persistence) 257 + /// - The channel closes (graceful shutdown) 258 + /// 259 + /// # Arguments 260 + /// * `receiver` - Channel receiver for queued log entries 261 + /// * `pool` - PostgreSQL connection pool for batch inserts 164 262 async fn background_worker( 165 263 mut receiver: mpsc::UnboundedReceiver<QueuedLogEntry>, 166 264 pool: PgPool, 167 265 ) { 168 266 let mut batch = Vec::new(); 169 - let mut flush_interval = interval(Duration::from_secs(5)); // Flush every 5 seconds 170 - 267 + // Periodic flush to ensure logs are persisted even during low-volume periods 268 + let mut flush_interval = interval(Duration::from_secs(5)); 269 + 171 270 info!("Started batched logging background worker"); 172 - 271 + 173 272 loop { 174 273 tokio::select! { 175 - // Receive log entries 274 + // Receive log entries from the queue 176 275 Some(entry) = receiver.recv() => { 177 276 batch.push(entry); 178 - 179 - // Flush if batch is large enough 277 + 278 + // Flush when batch reaches size threshold to prevent memory buildup 180 279 if batch.len() >= 100 { 181 280 Self::flush_batch(&pool, &mut batch).await; 182 281 } 183 282 } 184 - 185 - // Periodic flush 283 + 284 + // Time-based flush to ensure logs are persisted within 5 seconds 186 285 _ = flush_interval.tick() => { 187 286 if !batch.is_empty() { 188 287 Self::flush_batch(&pool, &mut batch).await; 189 288 } 190 289 } 191 - 192 - // Channel closed, flush remaining and exit 290 + 291 + // Channel closed (shutdown), flush remaining logs and exit gracefully 193 292 else => { 194 293 if !batch.is_empty() { 195 294 Self::flush_batch(&pool, &mut batch).await; ··· 198 297 } 199 298 } 200 299 } 201 - 300 + 202 301 info!("Batched logging background worker shut down"); 203 302 } 204 - 205 - /// Flush a batch of log entries to the database 303 + 304 + /// Flushes a batch of log entries to the database using a bulk INSERT. 305 + /// 306 + /// This method dynamically constructs a multi-value INSERT statement to minimize 307 + /// database round-trips. Each log entry contributes 8 parameters (fields). 308 + /// 309 + /// # Arguments 310 + /// * `pool` - PostgreSQL connection pool 311 + /// * `batch` - Mutable vector of queued log entries (cleared after flush) 312 + /// 313 + /// # Performance 314 + /// - Warns if a batch takes >100ms to insert (potential database issue) 315 + /// - Logs successful flushes with timing information 316 + /// - On error, logs are lost but the system continues (fail-open) 206 317 async fn flush_batch(pool: &PgPool, batch: &mut Vec<QueuedLogEntry>) { 207 318 if batch.is_empty() { 208 319 return; 209 320 } 210 - 321 + 211 322 let batch_size = batch.len(); 212 323 let start = std::time::Instant::now(); 213 - 214 - // Build bulk INSERT query 324 + 325 + // Build bulk INSERT query dynamically based on batch size 215 326 let mut query = String::from( 216 - "INSERT INTO logs (log_type, job_id, user_did, slice_uri, level, message, metadata, created_at) VALUES " 327 + "INSERT INTO logs (log_type, job_id, user_did, slice_uri, level, message, metadata, created_at) VALUES ", 217 328 ); 218 - 219 - // Add placeholders for each record 329 + 330 + // Add placeholders for each record (8 parameters per entry) 220 331 for i in 0..batch_size { 221 332 if i > 0 { 222 333 query.push_str(", "); 223 334 } 224 - let base = i * 8 + 1; // 8 fields per log entry 335 + // Calculate base parameter index (8 fields per log entry, 1-indexed) 336 + let base = i * 8 + 1; 225 337 query.push_str(&format!( 226 338 "(${}, ${}, ${}, ${}, ${}, ${}, ${}, ${})", 227 - base, base + 1, base + 2, base + 3, base + 4, base + 5, base + 6, base + 7 339 + base, 340 + base + 1, 341 + base + 2, 342 + base + 3, 343 + base + 4, 344 + base + 5, 345 + base + 6, 346 + base + 7 228 347 )); 229 348 } 230 - 231 - // Bind parameters 349 + 350 + // Bind all parameters in order (log_type, job_id, user_did, slice_uri, level, message, metadata, created_at) 232 351 let mut sqlx_query = sqlx::query(&query); 233 352 for entry in batch.iter() { 234 353 sqlx_query = sqlx_query ··· 241 360 .bind(&entry.metadata) 242 361 .bind(entry.created_at); 243 362 } 244 - 245 - // Execute batch insert 363 + 364 + // Execute the batch insert and handle errors gracefully 246 365 match sqlx_query.execute(pool).await { 247 366 Ok(_) => { 248 367 let elapsed = start.elapsed(); 368 + // Warn about slow inserts that may indicate database performance issues 249 369 if elapsed.as_millis() > 100 { 250 - warn!("Slow log batch insert: {} entries in {:?}", batch_size, elapsed); 370 + warn!( 371 + "Slow log batch insert: {} entries in {:?}", 372 + batch_size, elapsed 373 + ); 251 374 } else { 252 375 info!("Flushed {} log entries in {:?}", batch_size, elapsed); 253 376 } 254 377 } 255 378 Err(e) => { 256 379 error!("Failed to flush log batch of {} entries: {}", batch_size, e); 257 - // Continue processing - logs are lost but system keeps running 380 + // Fail-open: logs are lost but the system continues to prevent cascading failures 258 381 } 259 382 } 260 - 383 + 261 384 batch.clear(); 262 385 } 263 386 } 264 387 265 - /// Log entry struct for database queries 388 + /// Represents a log entry retrieved from the database. 389 + /// 390 + /// This struct is used for query results and API responses. Field names are 391 + /// converted to camelCase for JSON serialization. 266 392 #[derive(Debug, serde::Serialize, sqlx::FromRow)] 267 393 #[serde(rename_all = "camelCase")] 268 394 pub struct LogEntry { ··· 277 403 pub metadata: Option<serde_json::Value>, 278 404 } 279 405 280 - /// Get logs for a specific sync job 406 + /// Retrieves logs for a specific sync job, ordered chronologically. 407 + /// 408 + /// # Arguments 409 + /// * `pool` - PostgreSQL connection pool 410 + /// * `job_id` - Unique identifier of the sync job 411 + /// * `limit` - Optional maximum number of logs to return (default: 100) 412 + /// 413 + /// # Returns 414 + /// * `Ok(Vec<LogEntry>)` - List of log entries ordered by creation time (ASC) 415 + /// * `Err(sqlx::Error)` - Database query error 281 416 pub async fn get_sync_job_logs( 282 417 pool: &PgPool, 283 418 job_id: Uuid, ··· 303 438 Ok(rows) 304 439 } 305 440 306 - /// Get jetstream logs, optionally filtered by slice (still includes global connection logs) 441 + /// Retrieves Jetstream logs, optionally filtered by slice URI. 442 + /// 443 + /// When a slice filter is provided, returns both slice-specific logs AND global 444 + /// connection logs (where slice_uri is NULL). This ensures connection status logs 445 + /// are visible when viewing slice-specific logs. 446 + /// 447 + /// # Arguments 448 + /// * `pool` - PostgreSQL connection pool 449 + /// * `slice_filter` - Optional slice URI to filter logs 450 + /// * `limit` - Optional maximum number of logs to return (default: 100) 451 + /// 452 + /// # Returns 453 + /// * `Ok(Vec<LogEntry>)` - List of log entries ordered by creation time (DESC) 454 + /// * `Err(sqlx::Error)` - Database query error 307 455 pub async fn get_jetstream_logs( 308 456 pool: &PgPool, 309 457 slice_filter: Option<&str>, ··· 313 461 314 462 let rows = if let Some(slice_uri) = slice_filter { 315 463 tracing::info!("Querying jetstream logs with slice filter: {}", slice_uri); 316 - // When filtering by slice, include both slice-specific logs and global connection logs (where slice_uri is NULL) 464 + // Include both slice-specific logs and global connection logs for context 317 465 let results = sqlx::query_as!( 318 466 LogEntry, 319 467 r#" ··· 330 478 .fetch_all(pool) 331 479 .await?; 332 480 333 - tracing::info!("Found {} jetstream logs for slice {}", results.len(), slice_uri); 481 + tracing::info!( 482 + "Found {} jetstream logs for slice {}", 483 + results.len(), 484 + slice_uri 485 + ); 334 486 results 335 487 } else { 336 - // No filter, return all jetstream logs 488 + // No filter provided, return all Jetstream logs across all slices 337 489 sqlx::query_as!( 338 490 LogEntry, 339 491 r#" ··· 352 504 Ok(rows) 353 505 } 354 506 355 - /// Get logs for a specific slice 507 + /// Retrieves all logs associated with a specific slice URI. 508 + /// 509 + /// This includes both sync job logs and Jetstream logs for the slice. 510 + /// 511 + /// # Arguments 512 + /// * `pool` - PostgreSQL connection pool 513 + /// * `slice_uri` - AT-URI of the slice 514 + /// * `log_type_filter` - Optional log type filter ("sync_job", "jetstream", "system") 515 + /// * `limit` - Optional maximum number of logs to return (default: 100) 516 + /// 517 + /// # Returns 518 + /// * `Ok(Vec<LogEntry>)` - List of log entries ordered by creation time (DESC) 519 + /// * `Err(sqlx::Error)` - Database query error 356 520 #[allow(dead_code)] 357 521 pub async fn get_slice_logs( 358 522 pool: &PgPool, ··· 398 562 Ok(rows) 399 563 } 400 564 401 - /// Clean up old logs (keep last 1 day for jetstream, 7 days for completed sync jobs) 565 + /// Deletes old log entries to prevent unbounded database growth. 566 + /// 567 + /// Retention policy: 568 + /// - Jetstream logs: 1 day (high volume, primarily for real-time debugging) 569 + /// - Sync job logs: 7 days (lower volume, useful for historical analysis) 570 + /// - System logs: 7 days 571 + /// 572 + /// # Arguments 573 + /// * `pool` - PostgreSQL connection pool 574 + /// 575 + /// # Returns 576 + /// * `Ok(u64)` - Number of deleted log entries 577 + /// * `Err(sqlx::Error)` - Database query error 402 578 pub async fn cleanup_old_logs(pool: &PgPool) -> Result<u64, sqlx::Error> { 403 579 let result = sqlx::query!( 404 580 r#" ··· 415 591 Ok(result.rows_affected()) 416 592 } 417 593 418 - /// Start a background task that cleans up old logs every 6 hours 594 + /// Spawns a background task that periodically cleans up old logs. 595 + /// 596 + /// The task runs every 6 hours for the lifetime of the application, deleting 597 + /// logs according to the retention policy in `cleanup_old_logs`. 598 + /// 599 + /// # Arguments 600 + /// * `pool` - PostgreSQL connection pool (cloned into the spawned task) 419 601 pub fn start_log_cleanup_task(pool: PgPool) { 420 602 tokio::spawn(async move { 421 - let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(6 * 3600)); // Every 6 hours 603 + // Run cleanup every 6 hours (balances database load with timely cleanup) 604 + let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(6 * 3600)); 422 605 423 606 info!("Started log cleanup background task (runs every 6 hours)"); 424 607 ··· 439 622 } 440 623 } 441 624 }); 442 - } 625 + }

+23 -11

api/src/main.rs

··· 10 10 mod jobs; 11 11 mod logging; 12 12 mod models; 13 - mod redis_cache; 14 13 mod sync; 15 14 mod xrpc; 16 15 ··· 83 82 let relay_endpoint = env::var("RELAY_ENDPOINT") 84 83 .unwrap_or_else(|_| "https://relay1.us-west.bsky.network".to_string()); 85 84 86 - let system_slice_uri = env::var("SYSTEM_SLICE_URI") 87 - .unwrap_or_else(|_| "at://did:plc:bcgltzqazw5tb6k2g3ttenbj/network.slices.slice/3lymhd4jhrd2z".to_string()); 85 + let system_slice_uri = env::var("SYSTEM_SLICE_URI").unwrap_or_else(|_| { 86 + "at://did:plc:bcgltzqazw5tb6k2g3ttenbj/network.slices.slice/3lymhd4jhrd2z".to_string() 87 + }); 88 88 89 89 let config = Config { 90 90 auth_base_url, ··· 161 161 let wait_time = RECONNECT_WINDOW - now.duration_since(window_start); 162 162 tracing::warn!( 163 163 "Rate limit exceeded: {} reconnects in last minute, waiting {:?}", 164 - reconnect_count, wait_time 164 + reconnect_count, 165 + wait_time 165 166 ); 166 167 tokio::time::sleep(wait_time).await; 167 168 continue; ··· 170 171 reconnect_count += 1; 171 172 172 173 // Read cursor position from database 173 - let initial_cursor = PostgresCursorHandler::read_cursor(&pool_for_jetstream, "default").await; 174 + let initial_cursor = 175 + PostgresCursorHandler::read_cursor(&pool_for_jetstream, "default").await; 174 176 if let Some(cursor) = initial_cursor { 175 177 tracing::info!("Resuming from cursor position: {}", cursor); 176 178 } else { ··· 191 193 Some(cursor_handler.clone()), 192 194 initial_cursor, 193 195 redis_url.clone(), 194 - ).await; 196 + ) 197 + .await; 195 198 196 199 let consumer_arc = match consumer_result { 197 200 Ok(consumer) => { ··· 206 209 arc 207 210 } 208 211 Err(e) => { 209 - tracing::error!("Failed to create Jetstream consumer: {} - retry in {:?}", e, retry_delay); 212 + tracing::error!( 213 + "Failed to create Jetstream consumer: {} - retry in {:?}", 214 + e, 215 + retry_delay 216 + ); 210 217 jetstream_connected_clone.store(false, std::sync::atomic::Ordering::Relaxed); 211 218 tokio::time::sleep(retry_delay).await; 212 219 retry_delay = std::cmp::min(retry_delay * 2, MAX_RETRY_DELAY); ··· 240 247 // Create auth cache for token/session caching (5 minute TTL) 241 248 let redis_url = env::var("REDIS_URL").ok(); 242 249 let auth_cache_backend = if let Some(redis_url) = redis_url { 243 - cache::CacheBackend::Redis { url: redis_url, ttl_seconds: Some(300) } 250 + cache::CacheBackend::Redis { 251 + url: redis_url, 252 + ttl_seconds: Some(300), 253 + } 244 254 } else { 245 - cache::CacheBackend::InMemory { ttl_seconds: Some(300) } 255 + cache::CacheBackend::InMemory { 256 + ttl_seconds: Some(300), 257 + } 246 258 }; 247 259 let auth_cache = Arc::new(Mutex::new( 248 - cache::CacheFactory::create_slice_cache(auth_cache_backend).await? 260 + cache::CacheFactory::create_slice_cache(auth_cache_backend).await?, 249 261 )); 250 262 251 263 let state = AppState { ··· 358 370 let addr = format!("0.0.0.0:{}", port); 359 371 360 372 let listener = tokio::net::TcpListener::bind(&addr).await?; 361 - info!("🚀 Server running on http://{}", addr); 373 + info!("Server running on http://{}", addr); 362 374 363 375 axum::serve(listener, app).await?; 364 376 Ok(())

+3 -30

api/src/models.rs

··· 1 1 use chrono::{DateTime, Utc}; 2 2 use serde::{Deserialize, Serialize}; 3 3 use serde_json::Value; 4 - use std::collections::HashMap; 4 + 5 + pub use crate::database::{SortField, WhereClause, WhereCondition}; 5 6 6 7 #[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)] 7 8 #[serde(rename_all = "camelCase")] ··· 55 56 56 57 #[derive(Debug, Serialize, Deserialize)] 57 58 #[serde(rename_all = "camelCase")] 58 - pub struct WhereCondition { 59 - pub eq: Option<Value>, 60 - #[serde(rename = "in")] 61 - pub in_values: Option<Vec<Value>>, 62 - pub contains: Option<String>, 63 - } 64 - 65 - #[derive(Debug, Clone, Serialize, Deserialize)] 66 - #[serde(rename_all = "camelCase")] 67 - pub struct SortField { 68 - pub field: String, 69 - pub direction: String, // "asc" or "desc" 70 - } 71 - 72 - #[derive(Debug, Serialize, Deserialize)] 73 - #[serde(rename_all = "camelCase")] 74 - pub struct WhereClause { 75 - #[serde(flatten)] 76 - pub conditions: HashMap<String, WhereCondition>, 77 - 78 - #[serde(rename = "$or")] 79 - pub or_conditions: Option<HashMap<String, WhereCondition>>, 80 - } 81 - 82 - #[derive(Debug, Serialize, Deserialize)] 83 - #[serde(rename_all = "camelCase")] 84 59 pub struct SliceRecordsParams { 85 60 pub slice: String, 86 61 pub limit: Option<i32>, ··· 111 86 #[derive(Debug, Serialize, Deserialize)] 112 87 #[serde(rename_all = "camelCase")] 113 88 pub struct SparklinePoint { 114 - pub timestamp: String, // ISO 8601 89 + pub timestamp: String, 115 90 pub count: i64, 116 91 } 117 - 118 -

+77 -65

api/src/redis_cache.rs api/src/cache/redis.rs

··· 1 - use redis::{Client, AsyncCommands}; 2 - use redis::aio::ConnectionManager; 1 + //! Redis cache implementation for distributed caching. 2 + //! 3 + //! Provides a production-ready Redis cache with connection pooling, 4 + //! error handling, and fallback behavior when Redis is unavailable. 5 + 6 + use super::Cache; 3 7 use anyhow::Result; 4 - use tracing::{debug, error, warn}; 5 - use serde::{Serialize, Deserialize}; 6 8 use async_trait::async_trait; 7 - use crate::cache::Cache; 9 + use redis::aio::ConnectionManager; 10 + use redis::{AsyncCommands, Client}; 11 + use serde::{Deserialize, Serialize}; 12 + use tracing::{debug, error, warn}; 8 13 9 - /// Generic Redis cache for scalable caching across multiple instances 14 + /// Redis cache implementation using ConnectionManager for automatic reconnection. 15 + /// 16 + /// Designed to be resilient - cache operations fail gracefully when Redis is 17 + /// unavailable rather than breaking the application. 10 18 pub struct RedisCache { 11 19 conn: ConnectionManager, 12 20 default_ttl_seconds: u64, 13 21 } 14 22 15 23 impl RedisCache { 16 - /// Create a new Redis cache 24 + /// Create a new Redis cache instance. 17 25 /// 18 26 /// # Arguments 19 27 /// * `redis_url` - Redis connection URL (e.g., "redis://localhost:6379") 20 - /// * `default_ttl_seconds` - Default time-to-live for cache entries (default: 3600 = 1 hour) 28 + /// * `default_ttl_seconds` - Default expiration time for cache entries (default: 3600) 29 + /// 30 + /// # Returns 31 + /// Error if cannot establish initial connection to Redis 21 32 pub async fn new(redis_url: &str, default_ttl_seconds: Option<u64>) -> Result<Self> { 22 33 let client = Client::open(redis_url)?; 23 34 let conn = ConnectionManager::new(client).await?; ··· 28 39 }) 29 40 } 30 41 31 - /// Get a value from cache 42 + /// Get a value from Redis cache. 32 43 /// 33 - /// Returns: 34 - /// - Some(T) if key exists and can be deserialized 35 - /// - None if key doesn't exist or deserialization fails 44 + /// Returns None if key doesn't exist, is expired, or cannot be deserialized. 45 + /// Redis errors are logged but don't fail - returns None for cache miss. 36 46 pub async fn get_value<T>(&mut self, key: &str) -> Result<Option<T>> 37 47 where 38 48 T: for<'de> Deserialize<'de>, 39 49 { 40 50 match self.conn.get::<_, Option<String>>(key).await { 41 - Ok(Some(value)) => { 42 - match serde_json::from_str::<T>(&value) { 43 - Ok(parsed) => { 44 - // Cache hit - no logging needed 45 - Ok(Some(parsed)) 46 - } 47 - Err(e) => { 48 - error!( 49 - error = ?e, 50 - cache_key = %key, 51 - "Failed to deserialize cached value" 52 - ); 53 - // Remove corrupted entry 54 - let _ = self.conn.del::<_, ()>(key).await; 55 - Ok(None) 56 - } 51 + Ok(Some(value)) => match serde_json::from_str::<T>(&value) { 52 + Ok(parsed) => Ok(Some(parsed)), 53 + Err(e) => { 54 + error!( 55 + error = ?e, 56 + cache_key = %key, 57 + "Failed to deserialize cached value" 58 + ); 59 + let _ = self.conn.del::<_, ()>(key).await; 60 + Ok(None) 57 61 } 58 - } 59 - Ok(None) => { 60 - // Cache miss - no logging needed 61 - Ok(None) 62 - } 62 + }, 63 + Ok(None) => Ok(None), 63 64 Err(e) => { 64 65 error!( 65 66 error = ?e, 66 67 cache_key = %key, 67 68 "Redis error during get" 68 69 ); 69 - // Return cache miss on Redis error 70 70 Ok(None) 71 71 } 72 72 } 73 73 } 74 74 75 - /// Set a value in cache with optional TTL 76 - pub async fn set_value<T>(&mut self, key: &str, value: &T, ttl_seconds: Option<u64>) -> Result<()> 75 + /// Set a value in Redis cache with TTL. 76 + /// 77 + /// Uses SETEX for atomic set-with-expiration. 78 + /// Failures are logged but don't return errors - cache is optional. 79 + pub async fn set_value<T>( 80 + &mut self, 81 + key: &str, 82 + value: &T, 83 + ttl_seconds: Option<u64>, 84 + ) -> Result<()> 77 85 where 78 86 T: Serialize, 79 87 { 80 88 let ttl = ttl_seconds.unwrap_or(self.default_ttl_seconds); 81 89 82 90 match serde_json::to_string(value) { 83 - Ok(serialized) => { 84 - match self.conn.set_ex::<_, _, ()>(key, serialized, ttl).await { 85 - Ok(_) => { 86 - debug!( 87 - cache_key = %key, 88 - ttl_seconds = ttl, 89 - "Cached value in Redis" 90 - ); 91 - Ok(()) 92 - } 93 - Err(e) => { 94 - error!( 95 - error = ?e, 96 - cache_key = %key, 97 - "Failed to cache value in Redis" 98 - ); 99 - // Don't fail the operation if Redis is down 100 - Ok(()) 101 - } 91 + Ok(serialized) => match self.conn.set_ex::<_, _, ()>(key, serialized, ttl).await { 92 + Ok(_) => { 93 + debug!( 94 + cache_key = %key, 95 + ttl_seconds = ttl, 96 + "Cached value in Redis" 97 + ); 98 + Ok(()) 99 + } 100 + Err(e) => { 101 + error!( 102 + error = ?e, 103 + cache_key = %key, 104 + "Failed to cache value in Redis" 105 + ); 106 + Ok(()) 102 107 } 103 - } 108 + }, 104 109 Err(e) => { 105 110 error!( 106 111 error = ?e, ··· 112 117 } 113 118 } 114 119 115 - /// Check if a key exists in cache 120 + /// Check if a key exists in Redis. 116 121 pub async fn key_exists(&mut self, key: &str) -> Result<bool> { 117 122 match self.conn.exists(key).await { 118 123 Ok(exists) => { ··· 130 135 } 131 136 } 132 137 133 - /// Delete a key from cache 138 + /// Delete a key from Redis cache. 134 139 pub async fn delete_key(&mut self, key: &str) -> Result<()> { 135 140 match self.conn.del::<_, ()>(key).await { 136 141 Ok(_) => { ··· 148 153 } 149 154 } 150 155 151 - /// Set multiple key-value pairs using pipeline for efficiency 152 - pub async fn set_multiple_values<T>(&mut self, items: Vec<(&str, &T, Option<u64>)>) -> Result<()> 156 + /// Set multiple key-value pairs using pipelined commands for efficiency. 157 + /// 158 + /// Much faster than individual SET commands for bulk operations. 159 + pub async fn set_multiple_values<T>( 160 + &mut self, 161 + items: Vec<(&str, &T, Option<u64>)>, 162 + ) -> Result<()> 153 163 where 154 164 T: Serialize, 155 165 { ··· 197 207 } 198 208 } 199 209 200 - /// Test Redis connection 210 + /// Test Redis connection health. 201 211 pub async fn ping(&mut self) -> Result<bool> { 202 212 match self.conn.ping::<String>().await { 203 213 Ok(response) => Ok(response == "PONG"), ··· 208 218 } 209 219 } 210 220 211 - /// Get cache statistics (for monitoring) 221 + /// Get Redis memory statistics for monitoring. 212 222 pub async fn get_info(&mut self) -> Result<String> { 213 - match redis::cmd("INFO").arg("memory").query_async::<String>(&mut self.conn).await { 223 + match redis::cmd("INFO") 224 + .arg("memory") 225 + .query_async::<String>(&mut self.conn) 226 + .await 227 + { 214 228 Ok(info) => Ok(info), 215 229 Err(e) => { 216 230 warn!(error = ?e, "Failed to get Redis info"); ··· 236 250 self.set_value(key, value, ttl_seconds).await 237 251 } 238 252 239 - 240 253 async fn delete(&mut self, key: &str) -> Result<()> { 241 254 self.delete_key(key).await 242 255 } ··· 256 269 RedisCache::get_info(self).await 257 270 } 258 271 } 259 -

+485 -192

api/src/sync.rs

··· 1 - use chrono::{Utc}; 1 + //! Bulk synchronization operations with the ATProto relay. 2 + //! 3 + //! This module handles backfilling and syncing data from the ATProto network via the relay endpoint. 4 + //! It provides: 5 + //! - Backfilling collections for slices from the relay 6 + //! - User-specific collection syncing for login flows 7 + //! - Rate-limited PDS requests to avoid overwhelming servers 8 + //! - DID resolution with caching 9 + //! - Record validation against Lexicon schemas 10 + //! - Actor indexing 11 + 12 + use atproto_identity::resolve::{HickoryDnsResolver, resolve_subject}; 13 + use chrono::Utc; 2 14 use reqwest::Client; 3 15 use serde::{Deserialize, Serialize}; 4 16 use serde_json::Value; 5 - use tokio::time::{timeout, Duration}; 17 + use tokio::time::{Duration, timeout}; 6 18 use tracing::{debug, error, info, warn}; 7 - use atproto_identity::{ 8 - resolve::{resolve_subject, HickoryDnsResolver}, 9 - }; 10 19 11 20 use crate::actor_resolver::{resolve_actor_data_cached, resolve_actor_data_with_retry}; 12 21 use crate::cache::SliceCache; 13 22 use crate::database::Database; 14 23 use crate::errors::SyncError; 15 - use crate::models::{Actor, Record}; 16 24 use crate::logging::LogLevel; 17 25 use crate::logging::Logger; 26 + use crate::models::{Actor, Record}; 18 27 use serde_json::json; 19 28 use std::sync::Arc; 20 29 use tokio::sync::Mutex; 21 30 use uuid::Uuid; 22 31 32 + // ============================================================================= 33 + // ATProto API Response Types 34 + // ============================================================================= 23 35 36 + /// Record returned from ATProto `com.atproto.repo.listRecords` endpoint. 24 37 #[derive(Debug, Deserialize)] 25 38 struct AtProtoRecord { 26 39 uri: String, ··· 28 41 value: Value, 29 42 } 30 43 44 + /// Response from `com.atproto.repo.listRecords` with cursor-based pagination. 31 45 #[derive(Debug, Deserialize)] 32 46 struct ListRecordsResponse { 33 47 records: Vec<AtProtoRecord>, 34 48 cursor: Option<String>, 35 49 } 36 50 37 - 51 + /// Response from `com.atproto.sync.listReposByCollection` with cursor-based pagination. 38 52 #[derive(Debug, Deserialize)] 39 53 struct ListReposByCollectionResponse { 40 54 repos: Vec<RepoRef>, 41 55 cursor: Option<String>, 42 56 } 43 57 58 + /// Repository reference from the relay (contains DID). 44 59 #[derive(Debug, Deserialize)] 45 60 struct RepoRef { 46 61 did: String, 47 62 } 48 63 64 + // ============================================================================= 65 + // Internal Data Structures 66 + // ============================================================================= 49 67 68 + /// Resolved ATProto actor data (DID, PDS, handle). 50 69 #[derive(Debug, Clone)] 51 70 struct AtpData { 52 71 did: String, ··· 54 73 handle: Option<String>, 55 74 } 56 75 76 + // ============================================================================= 77 + // Public API Types 78 + // ============================================================================= 79 + 80 + /// Result from syncing user collections (used in login flows). 57 81 #[derive(Debug, Serialize)] 58 82 #[serde(rename_all = "camelCase")] 59 83 pub struct SyncUserCollectionsResult { ··· 64 88 pub message: String, 65 89 } 66 90 91 + // ============================================================================= 92 + // Sync Service 93 + // ============================================================================= 94 + 95 + /// Service for synchronizing ATProto data from the relay. 96 + /// 97 + /// Handles bulk backfills, user syncs, rate limiting, and validation. 67 98 #[derive(Clone)] 68 99 pub struct SyncService { 69 100 client: Client, ··· 76 107 } 77 108 78 109 impl SyncService { 79 - 80 - pub fn with_cache(database: Database, relay_endpoint: String, cache: Arc<Mutex<SliceCache>>) -> Self { 110 + /// Create a new SyncService with cache (for use outside job contexts). 111 + pub fn with_cache( 112 + database: Database, 113 + relay_endpoint: String, 114 + cache: Arc<Mutex<SliceCache>>, 115 + ) -> Self { 81 116 Self { 82 117 client: Client::new(), 83 118 database, ··· 88 123 user_did: None, 89 124 } 90 125 } 91 - 92 126 93 127 /// Create a new SyncService with logging and cache enabled for a specific job 94 - pub fn with_logging_and_cache(database: Database, relay_endpoint: String, logger: Logger, job_id: Uuid, user_did: String, cache: Arc<Mutex<SliceCache>>) -> Self { 128 + pub fn with_logging_and_cache( 129 + database: Database, 130 + relay_endpoint: String, 131 + logger: Logger, 132 + job_id: Uuid, 133 + user_did: String, 134 + cache: Arc<Mutex<SliceCache>>, 135 + ) -> Self { 95 136 Self { 96 137 client: Client::new(), 97 138 database, ··· 103 144 } 104 145 } 105 146 106 - /// Helper to log sync operations - stores slice_uri for reuse in the sync operation 107 - fn log_with_context(&self, slice_uri: &str, level: LogLevel, message: &str, metadata: Option<serde_json::Value>) { 108 - if let (Some(logger), Some(job_id), Some(user_did)) = (&self.logger, &self.job_id, &self.user_did) { 147 + /// Log a message with job context (job_id, user_did, slice_uri). 148 + /// Only logs if this service was created with logging enabled. 149 + fn log_with_context( 150 + &self, 151 + slice_uri: &str, 152 + level: LogLevel, 153 + message: &str, 154 + metadata: Option<serde_json::Value>, 155 + ) { 156 + if let (Some(logger), Some(job_id), Some(user_did)) = 157 + (&self.logger, &self.job_id, &self.user_did) 158 + { 109 159 logger.log_sync_job(*job_id, user_did, slice_uri, level, message, metadata); 110 160 } 111 161 } 112 162 113 - pub async fn backfill_collections(&self, slice_uri: &str, collections: Option<&[String]>, external_collections: Option<&[String]>, repos: Option<&[String]>, skip_validation: bool) -> Result<(i64, i64), SyncError> { 114 - info!("🔄 Starting backfill operation"); 163 + /// Backfill collections from the ATProto relay. 164 + /// 165 + /// This is the main entry point for bulk synchronization operations. 166 + /// 167 + /// # Arguments 168 + /// 169 + /// * `slice_uri` - The slice to backfill data into 170 + /// * `collections` - Primary collections (owned by slice domain) to backfill 171 + /// * `external_collections` - External collections (not owned by slice) to backfill 172 + /// * `repos` - Specific repos to sync (if None, fetches all repos for collections) 173 + /// * `skip_validation` - Skip Lexicon validation (useful for testing) 174 + /// 175 + /// # Returns 176 + /// 177 + /// Tuple of (repos_processed, records_synced) 178 + /// 179 + /// # Rate Limiting 180 + /// 181 + /// Requests are grouped by PDS server and limited to 8 concurrent requests per PDS 182 + /// to avoid overwhelming individual servers. 183 + pub async fn backfill_collections( 184 + &self, 185 + slice_uri: &str, 186 + collections: Option<&[String]>, 187 + external_collections: Option<&[String]>, 188 + repos: Option<&[String]>, 189 + skip_validation: bool, 190 + ) -> Result<(i64, i64), SyncError> { 191 + info!("Starting backfill operation"); 115 192 116 193 let primary_collections = collections.map(|c| c.to_vec()).unwrap_or_default(); 117 194 let external_collections = external_collections.map(|c| c.to_vec()).unwrap_or_default(); 118 195 119 196 if !primary_collections.is_empty() { 120 - info!("📚 Processing {} primary collections: {}", primary_collections.len(), primary_collections.join(", ")); 197 + info!( 198 + "Processing {} primary collections: {}", 199 + primary_collections.len(), 200 + primary_collections.join(", ") 201 + ); 121 202 } 122 203 123 204 if !external_collections.is_empty() { 124 - info!("🌐 Processing {} external collections: {}", external_collections.len(), external_collections.join(", ")); 205 + info!( 206 + "Processing {} external collections: {}", 207 + external_collections.len(), 208 + external_collections.join(", ") 209 + ); 125 210 } 126 211 127 212 if primary_collections.is_empty() && external_collections.is_empty() { 128 - info!("⚠️ No collections specified for backfill"); 213 + info!("No collections specified for backfill"); 129 214 return Ok((0, 0)); 130 215 } 131 216 132 217 let all_collections = [&primary_collections[..], &external_collections[..]].concat(); 133 218 134 - // DID resolution cache is now handled by SliceCache 135 - 219 + // Fetch repos to process (either provided or discovered from collections) 136 220 let all_repos = if let Some(provided_repos) = repos { 137 - info!("📋 Using {} provided repositories", provided_repos.len()); 221 + info!("Using {} provided repositories", provided_repos.len()); 138 222 provided_repos.to_vec() 139 223 } else { 140 - info!("📊 Fetching repositories for collections..."); 224 + info!("Fetching repositories for collections..."); 141 225 let mut unique_repos = std::collections::HashSet::new(); 142 226 143 227 // First, get all repos from primary collections ··· 145 229 for collection in &primary_collections { 146 230 match self.get_repos_for_collection(collection, slice_uri).await { 147 231 Ok(repos) => { 148 - info!("✓ Found {} repositories for primary collection \"{}\"", repos.len(), collection); 149 - self.log_with_context(slice_uri, LogLevel::Info, 150 - &format!("Found {} repositories for collection '{}'", repos.len(), collection), 151 - Some(json!({"collection": collection, "repo_count": repos.len()})) 232 + info!( 233 + "Found {} repositories for primary collection \"{}\"", 234 + repos.len(), 235 + collection 236 + ); 237 + self.log_with_context( 238 + slice_uri, 239 + LogLevel::Info, 240 + &format!( 241 + "Found {} repositories for collection '{}'", 242 + repos.len(), 243 + collection 244 + ), 245 + Some(json!({"collection": collection, "repo_count": repos.len()})), 152 246 ); 153 247 primary_repos.extend(repos); 154 - }, 248 + } 155 249 Err(e) => { 156 - error!("Failed to get repos for primary collection {}: {}", collection, e); 157 - self.log_with_context(slice_uri, LogLevel::Error, 158 - &format!("Failed to fetch repositories for collection '{}': {}", collection, e), 159 - Some(json!({"collection": collection, "error": e.to_string()})) 250 + error!( 251 + "Failed to get repos for primary collection {}: {}", 252 + collection, e 253 + ); 254 + self.log_with_context( 255 + slice_uri, 256 + LogLevel::Error, 257 + &format!( 258 + "Failed to fetch repositories for collection '{}': {}", 259 + collection, e 260 + ), 261 + Some(json!({"collection": collection, "error": e.to_string()})), 160 262 ); 161 263 } 162 264 } 163 265 } 164 266 165 - info!("📋 Found {} unique repositories from primary collections", primary_repos.len()); 267 + info!( 268 + "Found {} unique repositories from primary collections", 269 + primary_repos.len() 270 + ); 166 271 167 272 // Use primary repos for syncing (both primary and external collections) 168 273 unique_repos.extend(primary_repos); 169 274 170 275 let repos: Vec<String> = unique_repos.into_iter().collect(); 171 - info!("📋 Processing {} unique repositories", repos.len()); 276 + info!("Processing {} unique repositories", repos.len()); 172 277 repos 173 278 }; 174 279 175 - // Get ATP data for all repos at once 176 - info!("🔍 Resolving ATP data for repositories..."); 280 + // Resolve DID -> PDS/handle mappings for all repos 281 + info!("Resolving ATP data for repositories..."); 177 282 let atp_map = self.get_atp_map_for_repos(&all_repos).await?; 178 - info!("✓ Resolved ATP data for {}/{} repositories", atp_map.len(), all_repos.len()); 283 + info!( 284 + "Resolved ATP data for {}/{} repositories", 285 + atp_map.len(), 286 + all_repos.len() 287 + ); 179 288 180 - // Only sync repos that have valid ATP data 289 + // Only sync repos that successfully resolved 181 290 let valid_repos: Vec<String> = atp_map.keys().cloned().collect(); 182 291 let failed_resolutions = all_repos.len() - valid_repos.len(); 183 292 184 293 if failed_resolutions > 0 { 185 - info!("⚠️ {} repositories failed DID resolution and will be skipped", failed_resolutions); 294 + info!( 295 + "{} repositories failed DID resolution and will be skipped", 296 + failed_resolutions 297 + ); 186 298 } 187 299 188 - info!("🧠 Starting sync for {} repositories...", valid_repos.len()); 300 + info!("Starting sync for {} repositories...", valid_repos.len()); 189 301 190 302 // Group requests by PDS server to implement rate limiting 191 - let mut requests_by_pds: std::collections::HashMap<String, Vec<(String, String)>> = std::collections::HashMap::new(); 303 + let mut requests_by_pds: std::collections::HashMap<String, Vec<(String, String)>> = 304 + std::collections::HashMap::new(); 192 305 193 306 for repo in &valid_repos { 194 307 if let Some(atp_data) = atp_map.get(repo) { ··· 202 315 } 203 316 } 204 317 205 - info!("📥 Fetching records with rate limiting: {} PDS servers, {} total requests", 206 - requests_by_pds.len(), 207 - requests_by_pds.values().map(|v| v.len()).sum::<usize>()); 318 + info!( 319 + "Fetching records with rate limiting: {} PDS servers, {} total requests", 320 + requests_by_pds.len(), 321 + requests_by_pds.values().map(|v| v.len()).sum::<usize>() 322 + ); 208 323 209 - // Process each PDS server with limited concurrency 324 + // Process each PDS server with limited concurrency to avoid overwhelming them 210 325 let mut fetch_tasks = Vec::new(); 211 - const MAX_CONCURRENT_PER_PDS: usize = 8; // Limit concurrent requests per PDS 326 + const MAX_CONCURRENT_PER_PDS: usize = 8; 212 327 213 328 for (_pds_url, repo_collections) in requests_by_pds { 214 329 let sync_service = self.clone(); ··· 231 346 let slice_uri_inner = slice_uri_clone.clone(); 232 347 233 348 let task = tokio::spawn(async move { 234 - match sync_service_clone.fetch_records_for_repo_collection_with_atp_map(&repo_clone, &collection_clone, &atp_map_inner, &slice_uri_inner).await { 235 - Ok(records) => { 236 - Ok((repo_clone, collection_clone, records)) 237 - } 349 + match sync_service_clone 350 + .fetch_records_for_repo_collection_with_atp_map( 351 + &repo_clone, 352 + &collection_clone, 353 + &atp_map_inner, 354 + &slice_uri_inner, 355 + ) 356 + .await 357 + { 358 + Ok(records) => Ok((repo_clone, collection_clone, records)), 238 359 Err(e) => { 239 360 // Handle common "not error" scenarios as empty results 240 361 match &e { ··· 250 371 // Network errors - treat as empty (like TypeScript version) 251 372 Ok((repo_clone, collection_clone, vec![])) 252 373 } 253 - _ => Err(e) 374 + _ => Err(e), 254 375 } 255 376 } 256 377 } ··· 303 424 for result in pds_results { 304 425 match result { 305 426 Ok((repo, collection, records)) => { 306 - let mut validated_records = Vec::new(); 307 - let total_records = records.len(); 427 + let mut validated_records = Vec::new(); 428 + let total_records = records.len(); 308 429 309 - // Skip validation if requested 310 - if skip_validation { 311 - validated_records = records; 312 - info!("⚠️ Validation skipped - accepting all {} records for collection {} from repo {}", 313 - total_records, collection, repo); 314 - } 315 - // Validate each record if we have lexicons 316 - else if let Some(ref lexicons) = lexicons { 317 - let mut validation_errors = Vec::new(); 318 - for record in records { 319 - match slices_lexicon::validate_record(lexicons.clone(), &collection, record.json.clone()) { 320 - Ok(_) => { 321 - validated_records.push(record); 430 + // Skip validation if requested 431 + if skip_validation { 432 + validated_records = records; 433 + info!( 434 + "Validation skipped - accepting all {} records for collection {} from repo {}", 435 + total_records, collection, repo 436 + ); 322 437 } 323 - Err(e) => { 324 - let error_msg = format!("Validation failed for record {} from {}: {}", record.uri, repo, e); 325 - warn!("{}", error_msg); 326 - validation_errors.push(json!({ 327 - "uri": record.uri, 328 - "error": e.to_string() 329 - })); 438 + // Validate each record if we have lexicons 439 + else if let Some(ref lexicons) = lexicons { 440 + let mut validation_errors = Vec::new(); 441 + for record in records { 442 + match slices_lexicon::validate_record( 443 + lexicons.clone(), 444 + &collection, 445 + record.json.clone(), 446 + ) { 447 + Ok(_) => { 448 + validated_records.push(record); 449 + } 450 + Err(e) => { 451 + let error_msg = format!( 452 + "Validation failed for record {} from {}: {}", 453 + record.uri, repo, e 454 + ); 455 + warn!("{}", error_msg); 456 + validation_errors.push(json!({ 457 + "uri": record.uri, 458 + "error": e.to_string() 459 + })); 330 460 331 - // Log individual validation failures 332 - self.log_with_context(slice_uri, LogLevel::Warn, 333 - &error_msg, 334 - Some(json!({ 335 - "repo": repo, 336 - "collection": collection, 337 - "record_uri": record.uri, 338 - "validation_error": e.to_string() 339 - })) 340 - ); 341 - } 342 - } 343 - } 461 + // Log individual validation failures 462 + self.log_with_context( 463 + slice_uri, 464 + LogLevel::Warn, 465 + &error_msg, 466 + Some(json!({ 467 + "repo": repo, 468 + "collection": collection, 469 + "record_uri": record.uri, 470 + "validation_error": e.to_string() 471 + })), 472 + ); 473 + } 474 + } 475 + } 344 476 345 - let valid_count = validated_records.len(); 346 - let invalid_count = validation_errors.len(); 477 + let valid_count = validated_records.len(); 478 + let invalid_count = validation_errors.len(); 347 479 348 - if invalid_count > 0 { 349 - self.log_with_context(slice_uri, LogLevel::Warn, 480 + if invalid_count > 0 { 481 + self.log_with_context(slice_uri, LogLevel::Warn, 350 482 &format!("Validation completed for {}/{}: {} valid, {} invalid records", 351 483 repo, collection, valid_count, invalid_count), 352 484 Some(json!({ ··· 357 489 "validation_errors": validation_errors 358 490 })) 359 491 ); 360 - } else { 361 - self.log_with_context(slice_uri, LogLevel::Info, 362 - &format!("All {} records validated successfully for {}/{}", 363 - valid_count, repo, collection), 364 - Some(json!({ 365 - "repo": repo, 366 - "collection": collection, 367 - "valid_records": valid_count 368 - })) 369 - ); 370 - } 492 + } else { 493 + self.log_with_context( 494 + slice_uri, 495 + LogLevel::Info, 496 + &format!( 497 + "All {} records validated successfully for {}/{}", 498 + valid_count, repo, collection 499 + ), 500 + Some(json!({ 501 + "repo": repo, 502 + "collection": collection, 503 + "valid_records": valid_count 504 + })), 505 + ); 506 + } 371 507 372 - info!("✓ Validated {}/{} records for collection {} from repo {}", 373 - validated_records.len(), total_records, collection, repo); 374 - } else { 375 - // No validator available, accept all records 376 - validated_records = records; 377 - self.log_with_context(slice_uri, LogLevel::Warn, 508 + info!( 509 + "Validated {}/{} records for collection {} from repo {}", 510 + validated_records.len(), 511 + total_records, 512 + collection, 513 + repo 514 + ); 515 + } else { 516 + // No validator available, accept all records 517 + validated_records = records; 518 + self.log_with_context(slice_uri, LogLevel::Warn, 378 519 &format!("No lexicon validator available for collection {}", collection), 379 520 Some(json!({"collection": collection, "repo": repo, "accepted_records": total_records})) 380 521 ); 381 - warn!("⚠️ No lexicon validator available - accepting all records without validation for collection {}", collection); 382 - } 522 + warn!( 523 + "No lexicon validator available - accepting all records without validation for collection {}", 524 + collection 525 + ); 526 + } 383 527 384 528 all_records.extend(validated_records); 385 529 successful_tasks += 1; ··· 397 541 } 398 542 } 399 543 400 - info!("🔧 Debug: {} successful tasks, {} failed tasks", successful_tasks, failed_tasks); 544 + info!( 545 + "Debug: {} successful tasks, {} failed tasks", 546 + successful_tasks, failed_tasks 547 + ); 401 548 402 549 let total_records = all_records.len() as i64; 403 - info!("✓ Prepared {} new/changed records for indexing", total_records); 550 + info!( 551 + "Prepared {} new/changed records for indexing", 552 + total_records 553 + ); 404 554 405 - // Index actors first (like the TypeScript version) 406 - info!("📝 Indexing actors..."); 555 + // Index actors first (ensuring actor records exist before inserting records) 556 + info!("Indexing actors..."); 407 557 self.index_actors(slice_uri, &valid_repos, &atp_map).await?; 408 - info!("✓ Indexed {} actors", valid_repos.len()); 558 + info!("Indexed {} actors", valid_repos.len()); 409 559 410 - // Single batch insert for new/changed records only 560 + // Batch insert new/changed records 411 561 if !all_records.is_empty() { 412 - info!("📝 Indexing {} new/changed records...", total_records); 562 + info!("Indexing {} new/changed records...", total_records); 413 563 self.database.batch_insert_records(&all_records).await?; 414 564 } else { 415 - info!("📝 No new or changed records to index"); 565 + info!("No new or changed records to index"); 416 566 } 417 567 418 - info!("✅ Backfill complete!"); 568 + info!("Backfill complete!"); 419 569 420 570 Ok((valid_repos.len() as i64, total_records)) 421 571 } 422 572 423 - async fn get_repos_for_collection(&self, collection: &str, slice_uri: &str) -> Result<Vec<String>, SyncError> { 424 - let url = format!("{}/xrpc/com.atproto.sync.listReposByCollection", self.relay_endpoint); 573 + /// Fetch all repositories that have records in a given collection. 574 + /// 575 + /// Uses cursor-based pagination to fetch all repos from the relay. 576 + async fn get_repos_for_collection( 577 + &self, 578 + collection: &str, 579 + slice_uri: &str, 580 + ) -> Result<Vec<String>, SyncError> { 581 + let url = format!( 582 + "{}/xrpc/com.atproto.sync.listReposByCollection", 583 + self.relay_endpoint 584 + ); 425 585 let mut all_repos = Vec::new(); 426 586 let mut cursor: Option<String> = None; 427 587 ··· 431 591 query_params.push(("cursor", cursor_value.clone())); 432 592 } 433 593 434 - let response = self.client 435 - .get(&url) 436 - .query(&query_params) 437 - .send() 438 - .await?; 594 + let response = self.client.get(&url).query(&query_params).send().await?; 439 595 440 596 if !response.status().is_success() { 441 - return Err(SyncError::ListRepos { status: response.status().as_u16() }); 597 + return Err(SyncError::ListRepos { 598 + status: response.status().as_u16(), 599 + }); 442 600 } 443 601 444 602 let repos_response: ListReposByCollectionResponse = response.json().await?; 445 - 603 + 446 604 // Add repos from this page to our collection 447 605 all_repos.extend(repos_response.repos.into_iter().map(|r| r.did)); 448 - 606 + 449 607 // Check if there's a next page 450 608 match repos_response.cursor { 451 609 Some(next_cursor) if !next_cursor.is_empty() => { 452 610 cursor = Some(next_cursor); 453 611 // Log pagination progress if we have a logger 454 - self.log_with_context(slice_uri, LogLevel::Info, 455 - &format!("Fetching next page of repositories for collection {}, total so far: {}", collection, all_repos.len()), 612 + self.log_with_context(slice_uri, LogLevel::Info, 613 + &format!("Fetching next page of repositories for collection {}, total so far: {}", collection, all_repos.len()), 456 614 Some(json!({ 457 615 "collection": collection, 458 616 "repos_count": all_repos.len(), ··· 465 623 } 466 624 467 625 // Log final count 468 - self.log_with_context(slice_uri, LogLevel::Info, 469 - &format!("Completed fetching repositories for collection {}, total: {}", collection, all_repos.len()), 626 + self.log_with_context( 627 + slice_uri, 628 + LogLevel::Info, 629 + &format!( 630 + "Completed fetching repositories for collection {}, total: {}", 631 + collection, 632 + all_repos.len() 633 + ), 470 634 Some(json!({ 471 635 "collection": collection, 472 636 "total_repos": all_repos.len() 473 - })) 637 + })), 474 638 ); 475 639 476 640 Ok(all_repos) 477 641 } 478 642 479 - async fn fetch_records_for_repo_collection_with_atp_map(&self, repo: &str, collection: &str, atp_map: &std::collections::HashMap<String, AtpData>, slice_uri: &str) -> Result<Vec<Record>, SyncError> { 480 - let atp_data = atp_map.get(repo).ok_or_else(|| SyncError::Generic(format!("No ATP data found for repo: {}", repo)))?; 643 + /// Fetch records for a repo/collection with retry logic. 644 + /// 645 + /// If the PDS returns an error, invalidates the cached DID resolution and retries once. 646 + /// This handles cases where PDS URLs change. 647 + async fn fetch_records_for_repo_collection_with_atp_map( 648 + &self, 649 + repo: &str, 650 + collection: &str, 651 + atp_map: &std::collections::HashMap<String, AtpData>, 652 + slice_uri: &str, 653 + ) -> Result<Vec<Record>, SyncError> { 654 + let atp_data = atp_map 655 + .get(repo) 656 + .ok_or_else(|| SyncError::Generic(format!("No ATP data found for repo: {}", repo)))?; 481 657 482 - match self.fetch_records_for_repo_collection(repo, collection, &atp_data.pds, slice_uri).await { 658 + match self 659 + .fetch_records_for_repo_collection(repo, collection, &atp_data.pds, slice_uri) 660 + .await 661 + { 483 662 Ok(records) => Ok(records), 484 663 Err(SyncError::ListRecords { status }) if (400..600).contains(&status) => { 485 664 // 4xx/5xx error from PDS - try invalidating cache and retrying once 486 - debug!("PDS error {} for repo {}, attempting cache invalidation and retry", status, repo); 665 + debug!( 666 + "PDS error {} for repo {}, attempting cache invalidation and retry", 667 + status, repo 668 + ); 487 669 488 - match resolve_actor_data_with_retry(&self.client, repo, self.cache.clone(), true).await { 670 + match resolve_actor_data_with_retry(&self.client, repo, self.cache.clone(), true) 671 + .await 672 + { 489 673 Ok(fresh_actor_data) => { 490 - debug!("Successfully re-resolved actor data for {}, retrying with PDS: {}", repo, fresh_actor_data.pds); 491 - self.fetch_records_for_repo_collection(repo, collection, &fresh_actor_data.pds, slice_uri).await 674 + debug!( 675 + "Successfully re-resolved actor data for {}, retrying with PDS: {}", 676 + repo, fresh_actor_data.pds 677 + ); 678 + self.fetch_records_for_repo_collection( 679 + repo, 680 + collection, 681 + &fresh_actor_data.pds, 682 + slice_uri, 683 + ) 684 + .await 492 685 } 493 686 Err(e) => { 494 687 debug!("Failed to re-resolve actor data for {}: {:?}", repo, e); ··· 500 693 } 501 694 } 502 695 503 - async fn fetch_records_for_repo_collection(&self, repo: &str, collection: &str, pds_url: &str, slice_uri: &str) -> Result<Vec<Record>, SyncError> { 504 - // First, get existing record CIDs from database for this specific slice 505 - let existing_cids = self.database.get_existing_record_cids_for_slice(repo, collection, slice_uri) 696 + /// Fetch records for a specific repo and collection from its PDS. 697 + /// 698 + /// Only returns new or changed records (compared by CID). 699 + /// Uses cursor-based pagination to fetch all records. 700 + async fn fetch_records_for_repo_collection( 701 + &self, 702 + repo: &str, 703 + collection: &str, 704 + pds_url: &str, 705 + slice_uri: &str, 706 + ) -> Result<Vec<Record>, SyncError> { 707 + // Get existing record CIDs to skip unchanged records 708 + let existing_cids = self 709 + .database 710 + .get_existing_record_cids_for_slice(repo, collection, slice_uri) 506 711 .await 507 712 .map_err(|e| SyncError::Generic(format!("Failed to get existing CIDs: {}", e)))?; 508 713 509 - debug!("Found {} existing records for {}/{}", existing_cids.len(), repo, collection); 714 + debug!( 715 + "Found {} existing records for {}/{}", 716 + existing_cids.len(), 717 + repo, 718 + collection 719 + ); 510 720 511 721 let mut records = Vec::new(); 512 722 let mut cursor: Option<String> = None; ··· 520 730 } 521 731 522 732 let request_url = format!("{}/xrpc/com.atproto.repo.listRecords", pds_url); 523 - let response = self.client 524 - .get(&request_url) 525 - .query(&params) 526 - .send() 527 - .await; 733 + let response = self.client.get(&request_url).query(&params).send().await; 528 734 529 735 let response = match response { 530 736 Ok(resp) => resp, ··· 542 748 543 749 // HTTP 400/404 are expected when collections don't exist - log as info, not error 544 750 let (log_level, log_message) = if status == 400 || status == 404 { 545 - (LogLevel::Info, format!("Collection '{}' not found for {}: HTTP {}", collection, repo, status)) 751 + ( 752 + LogLevel::Info, 753 + format!( 754 + "Collection '{}' not found for {}: HTTP {}", 755 + collection, repo, status 756 + ), 757 + ) 546 758 } else { 547 - (LogLevel::Error, format!("Failed to fetch records from {}: HTTP {} from PDS", repo, status)) 759 + ( 760 + LogLevel::Error, 761 + format!( 762 + "Failed to fetch records from {}: HTTP {} from PDS", 763 + repo, status 764 + ), 765 + ) 548 766 }; 549 767 550 768 self.log_with_context(slice_uri, log_level, ··· 559 777 for atproto_record in list_response.records { 560 778 // Check if we already have this record with the same CID 561 779 if let Some(existing_cid) = existing_cids.get(&atproto_record.uri) 562 - && existing_cid == &atproto_record.cid { 563 - // Record unchanged, skip it 564 - skipped_count += 1; 565 - continue; 566 - } 780 + && existing_cid == &atproto_record.cid 781 + { 782 + // Record unchanged, skip it 783 + skipped_count += 1; 784 + continue; 785 + } 567 786 568 787 // Record is new or changed, include it 569 788 let record = Record { ··· 587 806 588 807 // Log results for this repo/collection 589 808 if fetched_count > 0 || skipped_count > 0 { 590 - self.log_with_context(slice_uri, LogLevel::Info, 591 - &format!("Fetched {} new/changed, skipped {} unchanged records from {}/{}", 592 - fetched_count, skipped_count, repo, collection), 809 + self.log_with_context( 810 + slice_uri, 811 + LogLevel::Info, 812 + &format!( 813 + "Fetched {} new/changed, skipped {} unchanged records from {}/{}", 814 + fetched_count, skipped_count, repo, collection 815 + ), 593 816 Some(json!({ 594 817 "repo": repo, 595 818 "collection": collection, 596 819 "new_records": fetched_count, 597 820 "skipped_records": skipped_count, 598 821 "pds_url": pds_url 599 - })) 822 + })), 600 823 ); 601 824 } 602 825 603 826 if skipped_count > 0 { 604 - info!("Skipped {} unchanged records, fetched {} new/changed records for {}/{}", 605 - skipped_count, fetched_count, repo, collection); 827 + info!( 828 + "Skipped {} unchanged records, fetched {} new/changed records for {}/{}", 829 + skipped_count, fetched_count, repo, collection 830 + ); 606 831 } 607 832 608 833 Ok(records) 609 834 } 610 835 611 - async fn get_atp_map_for_repos(&self, repos: &[String]) -> Result<std::collections::HashMap<String, AtpData>, SyncError> { 836 + /// Resolve ATP data (DID, PDS, handle) for multiple repos. 837 + /// 838 + /// Returns a map of DID -> AtpData. Failed resolutions are logged but don't fail the operation. 839 + async fn get_atp_map_for_repos( 840 + &self, 841 + repos: &[String], 842 + ) -> Result<std::collections::HashMap<String, AtpData>, SyncError> { 612 843 let mut atp_map = std::collections::HashMap::new(); 613 844 614 845 for repo in repos { ··· 625 856 Ok(atp_map) 626 857 } 627 858 859 + /// Resolve ATP data for a single DID. 860 + /// 861 + /// Uses DID resolution to get the DID document, then extracts PDS and handle. 862 + /// Results are cached to avoid repeated lookups. 628 863 async fn resolve_atp_data(&self, did: &str) -> Result<AtpData, SyncError> { 629 864 debug!("Resolving ATP data for DID: {}", did); 630 865 ··· 634 869 Ok(resolved_did) => { 635 870 debug!("Successfully resolved subject: {}", resolved_did); 636 871 637 - let actor_data = resolve_actor_data_cached(&self.client, &resolved_did, self.cache.clone()).await 638 - .map_err(|e| SyncError::Generic(e.to_string()))?; 872 + let actor_data = 873 + resolve_actor_data_cached(&self.client, &resolved_did, self.cache.clone()) 874 + .await 875 + .map_err(|e| SyncError::Generic(e.to_string()))?; 639 876 640 877 let atp_data = AtpData { 641 878 did: actor_data.did, ··· 645 882 646 883 Ok(atp_data) 647 884 } 648 - Err(e) => { 649 - Err(SyncError::Generic(format!("Failed to resolve subject for {}: {:?}", did, e))) 650 - } 885 + Err(e) => Err(SyncError::Generic(format!( 886 + "Failed to resolve subject for {}: {:?}", 887 + did, e 888 + ))), 651 889 } 652 890 } 653 891 654 - async fn index_actors(&self, slice_uri: &str, repos: &[String], atp_map: &std::collections::HashMap<String, AtpData>) -> Result<(), SyncError> { 892 + /// Index actors (DIDs with handles) into the database. 893 + /// 894 + /// Creates actor records for all repos being synced. 895 + async fn index_actors( 896 + &self, 897 + slice_uri: &str, 898 + repos: &[String], 899 + atp_map: &std::collections::HashMap<String, AtpData>, 900 + ) -> Result<(), SyncError> { 655 901 let mut actors = Vec::new(); 656 902 let now = chrono::Utc::now().to_rfc3339(); 657 903 ··· 673 919 Ok(()) 674 920 } 675 921 676 - 677 - /// Get external collections for a slice (collections that don't start with the slice's domain) 678 - async fn get_external_collections_for_slice(&self, slice_uri: &str) -> Result<Vec<String>, SyncError> { 922 + /// Get external collections for a slice. 923 + /// 924 + /// External collections are those that don't start with the slice's domain. 925 + /// For example, if slice domain is "com.example", then "app.bsky.feed.post" is external. 926 + async fn get_external_collections_for_slice( 927 + &self, 928 + slice_uri: &str, 929 + ) -> Result<Vec<String>, SyncError> { 679 930 // Get the slice's domain 680 - let domain = self.database.get_slice_domain(slice_uri).await 931 + let domain = self 932 + .database 933 + .get_slice_domain(slice_uri) 934 + .await 681 935 .map_err(|e| SyncError::Generic(format!("Failed to get slice domain: {}", e)))? 682 936 .ok_or_else(|| SyncError::Generic(format!("Slice not found: {}", slice_uri)))?; 683 937 684 938 // Get all collections (lexicons) for this slice 685 - let collections = self.database.get_slice_collections_list(slice_uri).await 939 + let collections = self 940 + .database 941 + .get_slice_collections_list(slice_uri) 942 + .await 686 943 .map_err(|e| SyncError::Generic(format!("Failed to get slice collections: {}", e)))?; 687 944 688 945 // Filter for external collections (those that don't start with the slice domain) ··· 691 948 .filter(|collection| !collection.starts_with(&domain)) 692 949 .collect(); 693 950 694 - info!("🔍 Found {} external collections for slice {} (domain: {}): {:?}", 695 - external_collections.len(), slice_uri, domain, external_collections); 951 + info!( 952 + "Found {} external collections for slice {} (domain: {}): {:?}", 953 + external_collections.len(), 954 + slice_uri, 955 + domain, 956 + external_collections 957 + ); 696 958 697 959 Ok(external_collections) 698 960 } 699 961 700 - /// Sync user's data for all external collections defined in the slice 701 - /// Automatically discovers which collections to sync based on slice configuration 702 - /// Uses timeout protection to ensure responsive login flows 962 + /// Sync user's data for all external collections defined in the slice. 963 + /// 964 + /// Used during login flows to quickly sync a user's data. 965 + /// Automatically discovers which collections to sync based on slice configuration. 966 + /// Uses timeout protection to ensure responsive login flows. 967 + /// 968 + /// # Arguments 969 + /// 970 + /// * `user_did` - The user's DID to sync 971 + /// * `slice_uri` - The slice to sync into 972 + /// * `timeout_secs` - Maximum seconds to wait before timing out 973 + /// 974 + /// # Returns 975 + /// 976 + /// Result with repos_processed, records_synced, and timeout status 703 977 pub async fn sync_user_collections( 704 978 &self, 705 979 user_did: &str, 706 980 slice_uri: &str, 707 981 timeout_secs: u64, 708 982 ) -> Result<SyncUserCollectionsResult, SyncError> { 709 - info!("🔎 Auto-discovering external collections for user {} in slice {}", user_did, slice_uri); 983 + info!( 984 + "Auto-discovering external collections for user {} in slice {}", 985 + user_did, slice_uri 986 + ); 710 987 711 988 // Auto-discover external collections from slice configuration 712 989 let external_collections = self.get_external_collections_for_slice(slice_uri).await?; 713 990 714 991 if external_collections.is_empty() { 715 - info!("ℹ️ No external collections found for slice {}", slice_uri); 992 + info!("No external collections found for slice {}", slice_uri); 716 993 return Ok(SyncUserCollectionsResult { 717 994 success: true, 718 995 repos_processed: 0, ··· 722 999 }); 723 1000 } 724 1001 725 - info!("📋 Syncing {} external collections for user {}: {:?}", 726 - external_collections.len(), user_did, external_collections); 1002 + info!( 1003 + "Syncing {} external collections for user {}: {:?}", 1004 + external_collections.len(), 1005 + user_did, 1006 + external_collections 1007 + ); 727 1008 728 - // Use backfill_collections with timeout protection, but only for this specific user 1009 + // Use backfill_collections with timeout, only syncing this specific user 729 1010 let sync_future = async { 730 1011 self.backfill_collections( 731 1012 slice_uri, 732 1013 None, // No primary collections for user sync 733 1014 Some(&external_collections), 734 1015 Some(&[user_did.to_string()]), // Only sync this user's repos 735 - false, // Always validate user collections 736 - ).await 1016 + false, // Always validate user collections 1017 + ) 1018 + .await 737 1019 }; 738 1020 739 1021 match timeout(Duration::from_secs(timeout_secs), sync_future).await { 740 1022 Ok(result) => { 741 1023 let (repos_processed, records_synced) = result?; 742 - info!("✅ User sync completed within timeout: {} repos, {} records", repos_processed, records_synced); 1024 + info!( 1025 + "User sync completed within timeout: {} repos, {} records", 1026 + repos_processed, records_synced 1027 + ); 743 1028 Ok(SyncUserCollectionsResult { 744 1029 success: true, 745 1030 repos_processed, 746 1031 records_synced, 747 1032 timed_out: false, 748 - message: format!("Sync completed: {} repos, {} records", repos_processed, records_synced), 1033 + message: format!( 1034 + "Sync completed: {} repos, {} records", 1035 + repos_processed, records_synced 1036 + ), 749 1037 }) 750 - }, 1038 + } 751 1039 Err(_) => { 752 1040 // Timeout occurred - return partial success with guidance 753 - warn!("⏰ Sync for user {} timed out after {}s, suggest using async job", user_did, timeout_secs); 1041 + warn!( 1042 + "Sync for user {} timed out after {}s, suggest using async job", 1043 + user_did, timeout_secs 1044 + ); 754 1045 Ok(SyncUserCollectionsResult { 755 1046 success: false, 756 1047 repos_processed: 0, 757 1048 records_synced: 0, 758 1049 timed_out: true, 759 - message: format!("Sync timed out after {}s - use startSync endpoint for larger syncs", timeout_secs), 1050 + message: format!( 1051 + "Sync timed out after {}s - use startSync endpoint for larger syncs", 1052 + timeout_secs 1053 + ), 760 1054 }) 761 1055 } 762 1056 } 763 1057 } 764 - 765 1058 }

+1 -1

api/src/xrpc/com/atproto/mod.rs

··· 1 - pub mod repo; 1 + pub mod repo;

+1 -1

api/src/xrpc/com/atproto/repo/mod.rs

··· 1 - pub mod upload_blob; 1 + pub mod upload_blob;

+24 -9

api/src/xrpc/com/atproto/repo/upload_blob.rs

··· 1 - use axum::{extract::{Request, State}, response::Json}; 1 + use crate::{ 2 + AppState, atproto_extensions::upload_blob as atproto_upload_blob, auth, errors::AppError, 3 + }; 2 4 use axum::body::to_bytes; 3 - use crate::{AppState, auth, atproto_extensions::upload_blob as atproto_upload_blob, errors::AppError}; 5 + use axum::{ 6 + extract::{Request, State}, 7 + response::Json, 8 + }; 4 9 5 10 pub async fn handler( 6 11 State(state): State<AppState>, ··· 9 14 let headers = request.headers().clone(); 10 15 11 16 let token = auth::extract_bearer_token(&headers)?; 12 - let _user_info = auth::verify_oauth_token_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone())).await?; 17 + let _user_info = auth::verify_oauth_token_cached( 18 + &token, 19 + &state.config.auth_base_url, 20 + Some(state.auth_cache.clone()), 21 + ) 22 + .await?; 13 23 14 - let (dpop_auth, pds_url) = 15 - auth::get_atproto_auth_for_user_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone())).await?; 24 + let (dpop_auth, pds_url) = auth::get_atproto_auth_for_user_cached( 25 + &token, 26 + &state.config.auth_base_url, 27 + Some(state.auth_cache.clone()), 28 + ) 29 + .await?; 16 30 17 31 let mime_type = headers 18 32 .get("content-type") ··· 28 42 29 43 let http_client = reqwest::Client::new(); 30 44 31 - let upload_result = atproto_upload_blob(&http_client, &dpop_auth, &pds_url, blob_data, &mime_type) 32 - .await 33 - .map_err(|e| AppError::Internal(format!("Failed to upload blob: {}", e)))?; 45 + let upload_result = 46 + atproto_upload_blob(&http_client, &dpop_auth, &pds_url, blob_data, &mime_type) 47 + .await 48 + .map_err(|e| AppError::Internal(format!("Failed to upload blob: {}", e)))?; 34 49 35 50 let upload_response = serde_json::to_value(upload_result) 36 51 .map_err(|e| AppError::Internal(format!("Failed to serialize response: {}", e)))?; 37 52 38 53 Ok(Json(upload_response)) 39 - } 54 + }

+1 -1

api/src/xrpc/com/mod.rs

··· 1 - pub mod atproto; 1 + pub mod atproto;

+1 -1

api/src/xrpc/mod.rs

··· 1 1 pub mod com; 2 - pub mod network; 2 + pub mod network;

+1 -1

api/src/xrpc/network/mod.rs

··· 1 - pub mod slices; 1 + pub mod slices;

+1 -1

api/src/xrpc/network/slices/mod.rs

··· 1 - pub mod slice; 1 + pub mod slice;

+14 -5

api/src/xrpc/network/slices/slice/create_oauth_client.rs

··· 1 + use crate::{AppState, auth, errors::AppError}; 1 2 use axum::{extract::State, http::HeaderMap, response::Json}; 2 3 use reqwest::Client; 3 4 use serde::{Deserialize, Serialize}; 4 - use crate::{AppState, auth, errors::AppError}; 5 5 6 6 #[derive(Debug, Deserialize)] 7 7 #[serde(rename_all = "camelCase")] ··· 80 80 Json(params): Json<Params>, 81 81 ) -> Result<Json<Output>, AppError> { 82 82 if params.client_name.trim().is_empty() { 83 - return Err(AppError::BadRequest("Client name cannot be empty".to_string())); 83 + return Err(AppError::BadRequest( 84 + "Client name cannot be empty".to_string(), 85 + )); 84 86 } 85 87 86 88 if params.redirect_uris.is_empty() { ··· 97 99 ))); 98 100 } 99 101 if uri.trim().is_empty() { 100 - return Err(AppError::BadRequest("Redirect URI cannot be empty".to_string())); 102 + return Err(AppError::BadRequest( 103 + "Redirect URI cannot be empty".to_string(), 104 + )); 101 105 } 102 106 } 103 107 104 108 let token = auth::extract_bearer_token(&headers)?; 105 - let user_info = auth::verify_oauth_token_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone())).await?; 109 + let user_info = auth::verify_oauth_token_cached( 110 + &token, 111 + &state.config.auth_base_url, 112 + Some(state.auth_cache.clone()), 113 + ) 114 + .await?; 106 115 107 116 let user_did = user_info.sub; 108 117 ··· 173 182 created_at: oauth_client.created_at, 174 183 created_by_did: oauth_client.created_by_did, 175 184 })) 176 - } 185 + }

+8 -3

api/src/xrpc/network/slices/slice/delete_oauth_client.rs

··· 1 + use crate::{AppState, auth, errors::AppError}; 1 2 use axum::{extract::State, http::HeaderMap, response::Json}; 2 3 use reqwest::Client; 3 4 use serde::{Deserialize, Serialize}; 4 - use crate::{AppState, auth, errors::AppError}; 5 5 6 6 #[derive(Debug, Deserialize)] 7 7 #[serde(rename_all = "camelCase")] ··· 21 21 Json(params): Json<Params>, 22 22 ) -> Result<Json<Output>, AppError> { 23 23 let token = auth::extract_bearer_token(&headers)?; 24 - auth::verify_oauth_token_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone())).await?; 24 + auth::verify_oauth_token_cached( 25 + &token, 26 + &state.config.auth_base_url, 27 + Some(state.auth_cache.clone()), 28 + ) 29 + .await?; 25 30 26 31 let oauth_client = state 27 32 .database ··· 51 56 Ok(Json(Output { 52 57 message: format!("OAuth client {} deleted successfully", params.client_id), 53 58 })) 54 - } 59 + }

+6 -2

api/src/xrpc/network/slices/slice/get_actors.rs

··· 1 + use crate::{ 2 + AppState, 3 + errors::AppError, 4 + models::{Actor, WhereCondition}, 5 + }; 1 6 use axum::{extract::State, response::Json}; 2 7 use serde::{Deserialize, Serialize}; 3 8 use std::collections::HashMap; 4 - use crate::{AppState, errors::AppError, models::{Actor, WhereCondition}}; 5 9 6 10 #[derive(Debug, Deserialize)] 7 11 #[serde(rename_all = "camelCase")] ··· 36 40 .map_err(|e| AppError::Internal(format!("Failed to fetch actors: {}", e)))?; 37 41 38 42 Ok(Json(Output { actors, cursor })) 39 - } 43 + }

+10 -3

api/src/xrpc/network/slices/slice/get_jetstream_logs.rs

··· 1 - use axum::{extract::{Query, State}, response::Json}; 1 + use crate::{ 2 + AppState, 3 + errors::AppError, 4 + logging::{LogEntry, get_jetstream_logs}, 5 + }; 6 + use axum::{ 7 + extract::{Query, State}, 8 + response::Json, 9 + }; 2 10 use serde::{Deserialize, Serialize}; 3 - use crate::{AppState, errors::AppError, logging::{get_jetstream_logs, LogEntry}}; 4 11 5 12 #[derive(Debug, Deserialize)] 6 13 pub struct Params { ··· 22 29 .map_err(|e| AppError::Internal(format!("Failed to get jetstream logs: {}", e)))?; 23 30 24 31 Ok(Json(Output { logs })) 25 - } 32 + }

+2 -2

api/src/xrpc/network/slices/slice/get_jetstream_status.rs

··· 1 + use crate::AppState; 1 2 use axum::{extract::State, response::Json}; 2 3 use serde::Serialize; 3 - use crate::AppState; 4 4 5 5 #[derive(Serialize)] 6 6 #[serde(rename_all = "camelCase")] ··· 14 14 .load(std::sync::atomic::Ordering::Relaxed); 15 15 16 16 Json(Output { connected }) 17 - } 17 + }

+6 -3

api/src/xrpc/network/slices/slice/get_job_history.rs

··· 1 - use axum::{extract::{Query, State}, response::Json}; 1 + use crate::{AppState, errors::AppError, jobs}; 2 + use axum::{ 3 + extract::{Query, State}, 4 + response::Json, 5 + }; 2 6 use serde::Deserialize; 3 - use crate::{AppState, errors::AppError, jobs}; 4 7 5 8 #[derive(Debug, Deserialize)] 6 9 #[serde(rename_all = "camelCase")] ··· 23 26 .await 24 27 .map(Json) 25 28 .map_err(|e| AppError::Internal(format!("Failed to get slice job history: {}", e))) 26 - } 29 + }

+10 -3

api/src/xrpc/network/slices/slice/get_job_logs.rs

··· 1 - use axum::{extract::{Query, State}, response::Json}; 1 + use crate::{ 2 + AppState, 3 + errors::AppError, 4 + logging::{LogEntry, get_sync_job_logs}, 5 + }; 6 + use axum::{ 7 + extract::{Query, State}, 8 + response::Json, 9 + }; 2 10 use serde::{Deserialize, Serialize}; 3 11 use uuid::Uuid; 4 - use crate::{AppState, errors::AppError, logging::{get_sync_job_logs, LogEntry}}; 5 12 6 13 #[derive(Debug, Deserialize)] 7 14 #[serde(rename_all = "camelCase")] ··· 24 31 .map_err(|e| AppError::Internal(format!("Failed to get sync job logs: {}", e)))?; 25 32 26 33 Ok(Json(Output { logs })) 27 - } 34 + }

+14 -5

api/src/xrpc/network/slices/slice/get_job_status.rs

··· 1 - use axum::{extract::{Query, State}, response::Json}; 1 + use crate::{AppState, errors::AppError, jobs}; 2 + use axum::{ 3 + extract::{Query, State}, 4 + response::Json, 5 + }; 2 6 use serde::Deserialize; 3 7 use uuid::Uuid; 4 - use crate::{AppState, errors::AppError, jobs}; 5 8 6 9 #[derive(Debug, Deserialize)] 7 10 #[serde(rename_all = "camelCase")] ··· 15 18 ) -> Result<Json<jobs::JobStatus>, AppError> { 16 19 match jobs::get_job_status(&state.database_pool, params.job_id).await { 17 20 Ok(Some(status)) => Ok(Json(status)), 18 - Ok(None) => Err(AppError::NotFound(format!("Job {} not found", params.job_id))), 19 - Err(e) => Err(AppError::Internal(format!("Failed to get job status: {}", e))), 21 + Ok(None) => Err(AppError::NotFound(format!( 22 + "Job {} not found", 23 + params.job_id 24 + ))), 25 + Err(e) => Err(AppError::Internal(format!( 26 + "Failed to get job status: {}", 27 + e 28 + ))), 20 29 } 21 - } 30 + }

+32 -21

api/src/xrpc/network/slices/slice/get_oauth_clients.rs

··· 1 - use axum::{extract::{Query, State}, http::HeaderMap, response::Json}; 1 + use crate::{AppState, auth, errors::AppError}; 2 + use axum::{ 3 + extract::{Query, State}, 4 + http::HeaderMap, 5 + response::Json, 6 + }; 2 7 use reqwest::Client; 3 8 use serde::{Deserialize, Serialize}; 4 - use crate::{AppState, auth, errors::AppError}; 5 9 6 10 #[derive(Debug, Deserialize)] 7 11 #[serde(rename_all = "camelCase")] ··· 55 59 Query(params): Query<Params>, 56 60 ) -> Result<Json<Output>, AppError> { 57 61 let token = auth::extract_bearer_token(&headers)?; 58 - auth::verify_oauth_token_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone())).await?; 62 + auth::verify_oauth_token_cached( 63 + &token, 64 + &state.config.auth_base_url, 65 + Some(state.auth_cache.clone()), 66 + ) 67 + .await?; 59 68 60 69 let clients = state 61 70 .database ··· 82 91 match request_builder.send().await { 83 92 Ok(response) if response.status().is_success() => { 84 93 if let Ok(response_text) = response.text().await 85 - && let Ok(aip_client) = serde_json::from_str::<AipClientResponse>(&response_text) { 86 - client_details.push(OAuthClientDetails { 87 - client_id: aip_client.client_id, 88 - client_secret: aip_client.client_secret, 89 - client_name: aip_client.client_name, 90 - redirect_uris: aip_client.redirect_uris, 91 - grant_types: aip_client.grant_types, 92 - response_types: aip_client.response_types, 93 - scope: aip_client.scope, 94 - client_uri: aip_client.client_uri, 95 - logo_uri: aip_client.logo_uri, 96 - tos_uri: aip_client.tos_uri, 97 - policy_uri: aip_client.policy_uri, 98 - created_at: oauth_client.created_at, 99 - created_by_did: oauth_client.created_by_did, 100 - }); 101 - } 94 + && let Ok(aip_client) = 95 + serde_json::from_str::<AipClientResponse>(&response_text) 96 + { 97 + client_details.push(OAuthClientDetails { 98 + client_id: aip_client.client_id, 99 + client_secret: aip_client.client_secret, 100 + client_name: aip_client.client_name, 101 + redirect_uris: aip_client.redirect_uris, 102 + grant_types: aip_client.grant_types, 103 + response_types: aip_client.response_types, 104 + scope: aip_client.scope, 105 + client_uri: aip_client.client_uri, 106 + logo_uri: aip_client.logo_uri, 107 + tos_uri: aip_client.tos_uri, 108 + policy_uri: aip_client.policy_uri, 109 + created_at: oauth_client.created_at, 110 + created_by_did: oauth_client.created_by_did, 111 + }); 112 + } 102 113 } 103 114 _ => { 104 115 client_details.push(OAuthClientDetails { ··· 123 134 Ok(Json(Output { 124 135 clients: client_details, 125 136 })) 126 - } 137 + }

+6 -2

api/src/xrpc/network/slices/slice/get_slice_records.rs

··· 1 + use crate::{ 2 + AppState, 3 + errors::AppError, 4 + models::{SortField, WhereClause}, 5 + }; 1 6 use axum::{extract::State, response::Json}; 2 7 use serde::{Deserialize, Serialize}; 3 8 use serde_json::Value; 4 - use crate::{AppState, errors::AppError, models::{WhereClause, SortField}}; 5 9 6 10 #[derive(Debug, Deserialize)] 7 11 #[serde(rename_all = "camelCase")] ··· 64 68 records: indexed_records, 65 69 cursor, 66 70 })) 67 - } 71 + }

+2 -2

api/src/xrpc/network/slices/slice/get_sparklines.rs

··· 1 + use crate::{AppState, errors::AppError, models::SparklinePoint}; 1 2 use axum::{extract::State, response::Json}; 2 3 use serde::{Deserialize, Serialize}; 3 - use crate::{AppState, errors::AppError, models::SparklinePoint}; 4 4 5 5 #[derive(Debug, Deserialize)] 6 6 #[serde(rename_all = "camelCase")] ··· 51 51 .collect(); 52 52 53 53 Ok(Json(Output { sparklines })) 54 - } 54 + }

+1 -1

api/src/xrpc/network/slices/slice/mod.rs

··· 13 13 pub mod start_sync; 14 14 pub mod stats; 15 15 pub mod sync_user_collections; 16 - pub mod update_oauth_client; 16 + pub mod update_oauth_client;

+1 -1

api/src/xrpc/network/slices/slice/openapi.rs

··· 1 - pub use crate::api::openapi::get_openapi_spec as handler; 1 + pub use crate::api::openapi::get_openapi_spec as handler;

+16 -6

api/src/xrpc/network/slices/slice/start_sync.rs

··· 1 + use crate::{AppState, auth, errors::AppError, jobs, models::BulkSyncParams}; 1 2 use axum::{extract::State, http::HeaderMap, response::Json}; 2 3 use serde::{Deserialize, Serialize}; 3 4 use uuid::Uuid; 4 - use crate::{AppState, auth, errors::AppError, jobs, models::BulkSyncParams}; 5 5 6 6 #[derive(Debug, Deserialize)] 7 7 #[serde(rename_all = "camelCase")] ··· 24 24 Json(params): Json<Params>, 25 25 ) -> Result<Json<Output>, AppError> { 26 26 let token = auth::extract_bearer_token(&headers)?; 27 - let user_info = auth::verify_oauth_token_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone())).await?; 27 + let user_info = auth::verify_oauth_token_cached( 28 + &token, 29 + &state.config.auth_base_url, 30 + Some(state.auth_cache.clone()), 31 + ) 32 + .await?; 28 33 29 34 let user_did = user_info.sub; 30 35 let slice_uri = params.slice; 31 36 32 - let job_id = jobs::enqueue_sync_job(&state.database_pool, user_did, slice_uri.clone(), params.sync_params) 33 - .await 34 - .map_err(|e| AppError::Internal(format!("Failed to enqueue sync job: {}", e)))?; 37 + let job_id = jobs::enqueue_sync_job( 38 + &state.database_pool, 39 + user_did, 40 + slice_uri.clone(), 41 + params.sync_params, 42 + ) 43 + .await 44 + .map_err(|e| AppError::Internal(format!("Failed to enqueue sync job: {}", e)))?; 35 45 36 46 Ok(Json(Output { 37 47 job_id, 38 48 message: format!("Sync job {} enqueued successfully", job_id), 39 49 })) 40 - } 50 + }

+15 -11

api/src/xrpc/network/slices/slice/stats.rs

··· 1 - use axum::{extract::{State, Query}, response::Json}; 1 + use crate::{AppState, errors::AppError, models::CollectionStats}; 2 + use axum::{ 3 + extract::{Query, State}, 4 + response::Json, 5 + }; 2 6 use serde::{Deserialize, Serialize}; 3 - use crate::{AppState, errors::AppError, models::CollectionStats}; 4 7 5 8 #[derive(Debug, Deserialize)] 6 9 #[serde(rename_all = "camelCase")] ··· 22 25 State(state): State<AppState>, 23 26 Query(params): Query<Params>, 24 27 ) -> Result<Json<Output>, AppError> { 25 - let (collections, collection_stats, total_lexicons, total_records, total_actors) = tokio::try_join!( 26 - state.database.get_slice_collections_list(&params.slice), 27 - state.database.get_slice_collection_stats(&params.slice), 28 - state.database.get_slice_lexicon_count(&params.slice), 29 - state.database.get_slice_total_records(&params.slice), 30 - state.database.get_slice_total_actors(&params.slice), 31 - ) 32 - .map_err(|e| AppError::Internal(format!("Failed to get slice statistics: {}", e)))?; 28 + let (collections, collection_stats, total_lexicons, total_records, total_actors) = 29 + tokio::try_join!( 30 + state.database.get_slice_collections_list(&params.slice), 31 + state.database.get_slice_collection_stats(&params.slice), 32 + state.database.get_slice_lexicon_count(&params.slice), 33 + state.database.get_slice_total_records(&params.slice), 34 + state.database.get_slice_total_actors(&params.slice), 35 + ) 36 + .map_err(|e| AppError::Internal(format!("Failed to get slice statistics: {}", e)))?; 33 37 34 38 Ok(Json(Output { 35 39 collections, ··· 45 49 total_records, 46 50 total_actors, 47 51 })) 48 - } 52 + }

+15 -7

api/src/xrpc/network/slices/slice/sync_user_collections.rs

··· 1 + use crate::{AppState, auth, errors::AppError}; 1 2 use axum::{extract::State, http::HeaderMap, response::Json}; 2 3 use serde::Deserialize; 3 - use crate::{AppState, auth, errors::AppError}; 4 4 5 5 #[derive(Debug, Deserialize)] 6 6 #[serde(rename_all = "camelCase")] ··· 20 20 Json(params): Json<Params>, 21 21 ) -> Result<Json<crate::sync::SyncUserCollectionsResult>, AppError> { 22 22 let token = auth::extract_bearer_token(&headers)?; 23 - let user_info = auth::verify_oauth_token_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone())).await?; 23 + let user_info = auth::verify_oauth_token_cached( 24 + &token, 25 + &state.config.auth_base_url, 26 + Some(state.auth_cache.clone()), 27 + ) 28 + .await?; 24 29 25 30 let user_did = user_info.did.unwrap_or(user_info.sub); 26 31 ··· 37 42 params.timeout_seconds 38 43 ); 39 44 40 - let sync_service = 41 - crate::sync::SyncService::with_cache(state.database.clone(), state.config.relay_endpoint.clone(), state.auth_cache.clone()); 45 + let sync_service = crate::sync::SyncService::with_cache( 46 + state.database.clone(), 47 + state.config.relay_endpoint.clone(), 48 + state.auth_cache.clone(), 49 + ); 42 50 43 51 let result = sync_service 44 52 .sync_user_collections(&user_did, &params.slice, params.timeout_seconds) ··· 46 54 .map_err(|e| AppError::Internal(format!("Sync operation failed: {}", e)))?; 47 55 48 56 if result.timed_out { 49 - tracing::info!("⏰ Sync timed out for user {}, suggesting async job", user_did); 57 + tracing::info!("Sync timed out for user {}, suggesting async job", user_did); 50 58 } else { 51 59 tracing::info!( 52 - "✅ Sync completed for user {}: {} repos, {} records", 60 + "Sync completed for user {}: {} repos, {} records", 53 61 user_did, 54 62 result.repos_processed, 55 63 result.records_synced ··· 57 65 } 58 66 59 67 Ok(Json(result)) 60 - } 68 + }

+15 -7

api/src/xrpc/network/slices/slice/update_oauth_client.rs

··· 1 + use crate::{AppState, auth, errors::AppError}; 1 2 use axum::{extract::State, http::HeaderMap, response::Json}; 2 3 use reqwest::Client; 3 4 use serde::{Deserialize, Serialize}; 4 - use crate::{AppState, auth, errors::AppError}; 5 5 6 6 #[derive(Debug, Deserialize)] 7 7 #[serde(rename_all = "camelCase")] ··· 73 73 Json(params): Json<Params>, 74 74 ) -> Result<Json<Output>, AppError> { 75 75 let token = auth::extract_bearer_token(&headers)?; 76 - auth::verify_oauth_token_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone())).await?; 76 + auth::verify_oauth_token_cached( 77 + &token, 78 + &state.config.auth_base_url, 79 + Some(state.auth_cache.clone()), 80 + ) 81 + .await?; 77 82 78 83 let oauth_client = state 79 84 .database ··· 82 87 .map_err(|e| AppError::Internal(format!("Failed to fetch OAuth client: {}", e)))? 83 88 .ok_or_else(|| AppError::NotFound("OAuth client not found".to_string()))?; 84 89 85 - let registration_token = oauth_client 86 - .registration_access_token 87 - .ok_or_else(|| AppError::Internal("Client missing registration access token".to_string()))?; 90 + let registration_token = oauth_client.registration_access_token.ok_or_else(|| { 91 + AppError::Internal("Client missing registration access token".to_string()) 92 + })?; 88 93 89 94 let aip_request = AipClientRequest { 90 95 client_name: params.client_name.unwrap_or_default(), ··· 115 120 .text() 116 121 .await 117 122 .unwrap_or_else(|_| "Unknown error".to_string()); 118 - return Err(AppError::Internal(format!("AIP update failed: {}", error_text))); 123 + return Err(AppError::Internal(format!( 124 + "AIP update failed: {}", 125 + error_text 126 + ))); 119 127 } 120 128 121 129 let response_body = aip_response ··· 141 149 created_at: oauth_client.created_at, 142 150 created_by_did: oauth_client.created_by_did, 143 151 })) 144 - } 152 + }