···1111use serde::Deserialize;
12121313use crate::AppState;
1414-use crate::auth::{extract_bearer_token, get_atproto_auth_for_user_cached, verify_oauth_token_cached};
1414+use crate::auth::{
1515+ extract_bearer_token, get_atproto_auth_for_user_cached, verify_oauth_token_cached,
1616+};
1517use crate::models::{
1618 IndexedRecord, Record, SliceRecordsOutput, SliceRecordsParams, SortField, WhereCondition,
1719};
···526528) -> Result<Json<serde_json::Value>, (StatusCode, Json<serde_json::Value>)> {
527529 // Extract and verify OAuth token
528530 let token = extract_bearer_token(&headers).map_err(status_to_error_response)?;
529529- let user_info = verify_oauth_token_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone()))
530530- .await
531531- .map_err(status_to_error_response)?;
531531+ let user_info = verify_oauth_token_cached(
532532+ &token,
533533+ &state.config.auth_base_url,
534534+ Some(state.auth_cache.clone()),
535535+ )
536536+ .await
537537+ .map_err(status_to_error_response)?;
532538533539 // Get AT Protocol DPoP auth and PDS URL (with caching)
534534- let (dpop_auth, pds_url) = get_atproto_auth_for_user_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone()))
535535- .await
536536- .map_err(status_to_error_response)?;
540540+ let (dpop_auth, pds_url) = get_atproto_auth_for_user_cached(
541541+ &token,
542542+ &state.config.auth_base_url,
543543+ Some(state.auth_cache.clone()),
544544+ )
545545+ .await
546546+ .map_err(status_to_error_response)?;
537547538548 // Extract the repo DID from user info
539549 let repo = user_info.did.unwrap_or(user_info.sub);
···603613 validate: false,
604614 };
605615606606- let result = create_record(&http_client, &atproto_client::client::Auth::DPoP(dpop_auth), &pds_url, create_request)
607607- .await
608608- .map_err(|_e| status_to_error_response(StatusCode::INTERNAL_SERVER_ERROR))?;
616616+ let result = create_record(
617617+ &http_client,
618618+ &atproto_client::client::Auth::DPoP(dpop_auth),
619619+ &pds_url,
620620+ create_request,
621621+ )
622622+ .await
623623+ .map_err(|_e| status_to_error_response(StatusCode::INTERNAL_SERVER_ERROR))?;
609624610625 // Extract URI and CID from the response enum
611626 let (uri, cid) = match result {
···644659) -> Result<Json<serde_json::Value>, (StatusCode, Json<serde_json::Value>)> {
645660 // Extract and verify OAuth token
646661 let token = extract_bearer_token(&headers).map_err(status_to_error_response)?;
647647- let user_info = verify_oauth_token_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone()))
648648- .await
649649- .map_err(status_to_error_response)?;
662662+ let user_info = verify_oauth_token_cached(
663663+ &token,
664664+ &state.config.auth_base_url,
665665+ Some(state.auth_cache.clone()),
666666+ )
667667+ .await
668668+ .map_err(status_to_error_response)?;
650669651670 // Get AT Protocol DPoP auth and PDS URL (with caching)
652652- let (dpop_auth, pds_url) = get_atproto_auth_for_user_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone()))
653653- .await
654654- .map_err(status_to_error_response)?;
671671+ let (dpop_auth, pds_url) = get_atproto_auth_for_user_cached(
672672+ &token,
673673+ &state.config.auth_base_url,
674674+ Some(state.auth_cache.clone()),
675675+ )
676676+ .await
677677+ .map_err(status_to_error_response)?;
655678656679 // Extract slice URI, rkey, and record value from structured body
657680 let slice_uri = body
···721744 validate: false,
722745 };
723746724724- let result = put_record(&http_client, &atproto_client::client::Auth::DPoP(dpop_auth), &pds_url, put_request)
725725- .await
726726- .map_err(|_| status_to_error_response(StatusCode::INTERNAL_SERVER_ERROR))?;
747747+ let result = put_record(
748748+ &http_client,
749749+ &atproto_client::client::Auth::DPoP(dpop_auth),
750750+ &pds_url,
751751+ put_request,
752752+ )
753753+ .await
754754+ .map_err(|_| status_to_error_response(StatusCode::INTERNAL_SERVER_ERROR))?;
727755728756 // Extract URI and CID from the response enum
729757 let (uri, cid) = match result {
···762790) -> Result<Json<serde_json::Value>, (StatusCode, Json<serde_json::Value>)> {
763791 // Extract and verify OAuth token
764792 let token = extract_bearer_token(&headers).map_err(status_to_error_response)?;
765765- let user_info = verify_oauth_token_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone()))
766766- .await
767767- .map_err(status_to_error_response)?;
793793+ let user_info = verify_oauth_token_cached(
794794+ &token,
795795+ &state.config.auth_base_url,
796796+ Some(state.auth_cache.clone()),
797797+ )
798798+ .await
799799+ .map_err(status_to_error_response)?;
768800769801 // Get AT Protocol DPoP auth and PDS URL (with caching)
770770- let (dpop_auth, pds_url) = get_atproto_auth_for_user_cached(&token, &state.config.auth_base_url, Some(state.auth_cache.clone()))
771771- .await
772772- .map_err(status_to_error_response)?;
802802+ let (dpop_auth, pds_url) = get_atproto_auth_for_user_cached(
803803+ &token,
804804+ &state.config.auth_base_url,
805805+ Some(state.auth_cache.clone()),
806806+ )
807807+ .await
808808+ .map_err(status_to_error_response)?;
773809774810 // Extract repo and rkey from body
775811 let repo = user_info.did.unwrap_or(user_info.sub);
···790826 swap_commit: None,
791827 };
792828793793- delete_record(&http_client, &atproto_client::client::Auth::DPoP(dpop_auth), &pds_url, delete_request)
794794- .await
795795- .map_err(|_| status_to_error_response(StatusCode::INTERNAL_SERVER_ERROR))?;
829829+ delete_record(
830830+ &http_client,
831831+ &atproto_client::client::Auth::DPoP(dpop_auth),
832832+ &pds_url,
833833+ delete_request,
834834+ )
835835+ .await
836836+ .map_err(|_| status_to_error_response(StatusCode::INTERNAL_SERVER_ERROR))?;
796837797838 // Also delete from local database (from all slices)
798839 let uri = format!("at://{}/{}/{}", repo, collection, rkey);
+18-24
api/src/atproto_extensions.rs
···11// Extensions to atproto-client for functionality not yet available
22// This module provides additional AT Protocol functions following the same patterns
3344-use serde::{Deserialize, Serialize};
44+use crate::errors::BlobUploadError;
55use atproto_client::client::DPoPAuth;
66-use thiserror::Error;
76use atproto_oauth::dpop::{DpopRetry, request_dpop};
88-use reqwest_middleware::ClientBuilder;
97use reqwest_chain::ChainMiddleware;
1010-1111-#[derive(Error, Debug)]
1212-pub enum BlobUploadError {
1313- #[error("error-slice-blob-1 HTTP request failed: {0}")]
1414- HttpRequest(#[from] reqwest_middleware::Error),
1515-1616- #[error("error-slice-blob-2 JSON parsing failed: {0}")]
1717- JsonParse(#[from] serde_json::Error),
1818-1919- #[error("error-slice-blob-3 DPoP proof creation failed: {0}")]
2020- DPoPProof(String),
2121-2222- #[error("error-slice-blob-4 Upload request failed: {status} - {message}")]
2323- UploadFailed { status: u16, message: String },
2424-}
2525-88+use reqwest_middleware::ClientBuilder;
99+use serde::{Deserialize, Serialize};
26102711/// Response from blob upload
2812#[cfg_attr(debug_assertions, derive(Debug))]
···6246 mime_type: &str,
6347) -> Result<UploadBlobResponse, BlobUploadError> {
6448 // Build the URL using standard string formatting
6565- let url = format!("{}/xrpc/com.atproto.repo.uploadBlob", base_url.trim_end_matches('/'));
4949+ let url = format!(
5050+ "{}/xrpc/com.atproto.repo.uploadBlob",
5151+ base_url.trim_end_matches('/')
5252+ );
66536754 // For blob uploads, we need to use a different approach than post_dpop_json
6855 // since we're sending binary data, not JSON
···8976 "POST",
9077 url,
9178 &dpop_auth.oauth_access_token,
9292- ).map_err(|e| BlobUploadError::DPoPProof(e.to_string()))?;
7979+ )
8080+ .map_err(|e| BlobUploadError::DPoPProof(e.to_string()))?;
93819482 // Create DpopRetry middleware (same as atproto-client)
9583 let dpop_retry = DpopRetry::new(
···10795 // Make the request with automatic nonce retry handling
10896 let http_response = dpop_retry_client
10997 .post(url)
110110- .header("Authorization", format!("DPoP {}", dpop_auth.oauth_access_token))
9898+ .header(
9999+ "Authorization",
100100+ format!("DPoP {}", dpop_auth.oauth_access_token),
101101+ )
111102 .header("DPoP", &dpop_proof_token)
112103 .header("Content-Type", content_type)
113104 .body(data)
···117108118109 if !http_response.status().is_success() {
119110 let status = http_response.status();
120120- let error_text = http_response.text().await.unwrap_or_else(|_| "unknown".to_string());
111111+ let error_text = http_response
112112+ .text()
113113+ .await
114114+ .unwrap_or_else(|_| "unknown".to_string());
121115 return Err(BlobUploadError::UploadFailed {
122116 status: status.as_u16(),
123123- message: error_text
117117+ message: error_text,
124118 });
125119 }
126120
+99-31
api/src/auth.rs
···11-use axum::http::{HeaderMap, StatusCode};
22-use serde::{Deserialize, Serialize};
11+//! Authentication and authorization utilities for OAuth and AT Protocol.
22+//!
33+//! This module provides functions for:
44+//! - Extracting and validating OAuth bearer tokens
55+//! - Verifying tokens with the authorization server
66+//! - Managing AT Protocol DPoP (Demonstrating Proof-of-Possession) authentication
77+//! - Caching authentication state for performance (5-minute TTL)
88+99+use crate::cache::SliceCache;
310use atproto_client::client::DPoPAuth;
411use atproto_identity::key::KeyData;
512use atproto_oauth::jwk::WrappedJsonWebKey;
1313+use axum::http::{HeaderMap, StatusCode};
1414+use serde::{Deserialize, Serialize};
615use std::sync::Arc;
716use tokio::sync::Mutex;
88-use crate::cache::SliceCache;
9171818+/// OAuth userinfo response containing the authenticated user's identity.
1019#[derive(Serialize, Deserialize, Debug)]
1120pub struct UserInfoResponse {
2121+ /// Subject identifier (user ID) from the OAuth provider
1222 pub sub: String,
2323+ /// Decentralized identifier for the user in AT Protocol
1324 pub did: Option<String>,
1425}
15262727+/// Cached AT Protocol session data to avoid repeated auth server requests.
1628#[derive(Serialize, Deserialize, Debug, Clone)]
1729struct CachedSession {
3030+ /// Personal Data Server endpoint URL for the user
1831 pds_url: String,
3232+ /// AT Protocol access token for PDS operations
1933 atproto_access_token: String,
3434+ /// DPoP JSON Web Key for proof-of-possession
2035 dpop_jwk: serde_json::Value,
2136}
22372323-// Extract bearer token from Authorization header
3838+/// Extracts the bearer token from the Authorization header.
3939+///
4040+/// # Arguments
4141+/// * `headers` - HTTP request headers
4242+///
4343+/// # Returns
4444+/// * `Ok(String)` - The extracted bearer token
4545+/// * `Err(StatusCode::UNAUTHORIZED)` - If the header is missing, malformed, or not a Bearer token
4646+///
4747+/// # Example
4848+/// ```ignore
4949+/// let token = extract_bearer_token(&headers)?;
5050+/// ```
2451pub fn extract_bearer_token(headers: &HeaderMap) -> Result<String, StatusCode> {
2552 let auth_header = headers
2653 .get("authorization")
···3158 return Err(StatusCode::UNAUTHORIZED);
3259 }
33606161+ // Safe to unwrap since we just verified the prefix exists
3462 let token = auth_header.strip_prefix("Bearer ").unwrap().to_string();
3563 Ok(token)
3664}
37653838-// Verify OAuth token with auth server
3939-4040-// Verify OAuth token with auth server with optional caching
6666+/// Verifies an OAuth bearer token with the authorization server.
6767+///
6868+/// This function first checks the cache for a previously validated token to avoid
6969+/// unnecessary network calls. If not found in cache, it validates with the auth server
7070+/// and caches the result for 5 minutes.
7171+///
7272+/// # Arguments
7373+/// * `token` - The OAuth bearer token to verify
7474+/// * `auth_base_url` - Base URL of the authorization server
7575+/// * `cache` - Optional cache instance (falls back to direct verification if None)
7676+///
7777+/// # Returns
7878+/// * `Ok(UserInfoResponse)` - User information if the token is valid
7979+/// * `Err(StatusCode)` - HTTP status code indicating the failure reason
8080+/// - `UNAUTHORIZED` - Invalid or expired token
8181+/// - `INTERNAL_SERVER_ERROR` - Network or parsing errors
8282+///
8383+/// # Cache Behavior
8484+/// - Cache key format: `oauth_userinfo:{token}`
8585+/// - TTL: 300 seconds (5 minutes)
8686+/// - Cache miss triggers verification with auth server
4187pub async fn verify_oauth_token_cached(
4288 token: &str,
4389 auth_base_url: &str,
4490 cache: Option<Arc<Mutex<SliceCache>>>,
4591) -> Result<UserInfoResponse, StatusCode> {
4646-4747- // Try cache first if provided
9292+ // Try cache first if provided to avoid network round-trip
4893 if let Some(cache) = &cache {
4994 let cached_result = {
5095 let mut cache_lock = cache.lock().await;
···58103 }
59104 }
601056161- // Cache miss - verify with auth server
106106+ // Cache miss - verify token by calling the OAuth userinfo endpoint
62107 let client = reqwest::Client::new();
63108 let userinfo_url = format!("{}/oauth/userinfo", auth_base_url);
64109···78123 .await
79124 .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?;
801258181- // Cache the userinfo if cache is provided (5 minute TTL)
126126+ // Cache the validated userinfo for 5 minutes to improve performance
82127 if let Some(cache) = &cache {
8383- let user_info_value = serde_json::to_value(&user_info)
8484- .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?;
128128+ let user_info_value =
129129+ serde_json::to_value(&user_info).map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?;
85130 let mut cache_lock = cache.lock().await;
8686- let _ = cache_lock.cache_oauth_userinfo(token, &user_info_value, 300).await;
131131+ let _ = cache_lock
132132+ .cache_oauth_userinfo(token, &user_info_value, 300)
133133+ .await;
87134 }
8813589136 Ok(user_info)
90137}
911389292-// Get AT Protocol DPoP auth and PDS URL for the user
9393-9494-// Get AT Protocol DPoP auth and PDS URL for the user with optional caching
139139+/// Retrieves AT Protocol DPoP authentication credentials and PDS URL for a user.
140140+///
141141+/// DPoP (Demonstrating Proof-of-Possession) is a security mechanism that binds tokens
142142+/// to specific cryptographic keys, preventing token theft and replay attacks.
143143+///
144144+/// This function first checks the cache for existing credentials, then falls back to
145145+/// fetching from the auth server if needed. Results are cached for 5 minutes.
146146+///
147147+/// # Arguments
148148+/// * `token` - OAuth bearer token identifying the user
149149+/// * `auth_base_url` - Base URL of the authorization server
150150+/// * `cache` - Optional cache instance (falls back to direct fetch if None)
151151+///
152152+/// # Returns
153153+/// * `Ok((DPoPAuth, String))` - Tuple of (DPoP authentication object, PDS endpoint URL)
154154+/// * `Err(StatusCode)` - HTTP status code indicating the failure reason
155155+/// - `UNAUTHORIZED` - Invalid token or session expired
156156+/// - `INTERNAL_SERVER_ERROR` - Network, parsing, or key conversion errors
157157+///
158158+/// # Cache Behavior
159159+/// - Cache key format: `atproto_session:{token}`
160160+/// - TTL: 300 seconds (5 minutes)
161161+/// - Stores serialized CachedSession with PDS URL, access token, and DPoP JWK
95162pub async fn get_atproto_auth_for_user_cached(
96163 token: &str,
97164 auth_base_url: &str,
98165 cache: Option<Arc<Mutex<SliceCache>>>,
99166) -> Result<(DPoPAuth, String), StatusCode> {
100100-101101- // Try cache first if provided
167167+ // Try cache first if provided to avoid expensive auth server call
102168 if let Some(cache) = &cache {
103169 let cached_result = {
104170 let mut cache_lock = cache.lock().await;
···109175 let cached_session: CachedSession = serde_json::from_value(session_value)
110176 .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?;
111177112112- // Convert cached data back to DPoP auth
178178+ // Reconstruct DPoP auth from cached session data
113179 let dpop_jwk: WrappedJsonWebKey = serde_json::from_value(cached_session.dpop_jwk)
114180 .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?;
115181116116- let dpop_private_key_data = KeyData::try_from(dpop_jwk)
117117- .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?;
182182+ let dpop_private_key_data =
183183+ KeyData::try_from(dpop_jwk).map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?;
118184119185 let dpop_auth = DPoPAuth {
120186 dpop_private_key_data,
···125191 }
126192 }
127193128128- // Cache miss - fetch from auth server
194194+ // Cache miss - fetch fresh session data from auth server
129195 let client = reqwest::Client::new();
130196 let session_url = format!("{}/api/atprotocol/session", auth_base_url);
131197···145211 .await
146212 .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?;
147213148148- // Extract PDS URL from session
214214+ // Extract the user's Personal Data Server endpoint URL
149215 let pds_url = session_data["pds_endpoint"]
150216 .as_str()
151217 .ok_or(StatusCode::INTERNAL_SERVER_ERROR)?
152218 .to_string();
153219154154- // Extract AT Protocol access token from session data
220220+ // Extract the access token used for authenticating with the PDS
155221 let atproto_access_token = session_data["access_token"]
156222 .as_str()
157223 .ok_or(StatusCode::INTERNAL_SERVER_ERROR)?
158224 .to_string();
159225160160- // Extract DPoP private key from session data - convert JWK to KeyData
226226+ // Extract and convert the DPoP JSON Web Key to internal key representation
161227 let dpop_jwk_value = session_data["dpop_jwk"].clone();
162228 let dpop_jwk: WrappedJsonWebKey = serde_json::from_value(dpop_jwk_value.clone())
163229 .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?;
164230165165- let dpop_private_key_data = KeyData::try_from(dpop_jwk)
166166- .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?;
231231+ let dpop_private_key_data =
232232+ KeyData::try_from(dpop_jwk).map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?;
167233168234 let dpop_auth = DPoPAuth {
169235 dpop_private_key_data,
170236 oauth_access_token: atproto_access_token.clone(),
171237 };
172238173173- // Cache the session data if cache is provided (5 minute TTL)
239239+ // Cache the complete session for 5 minutes to avoid repeated auth server calls
174240 if let Some(cache) = &cache {
175241 let cached_session = CachedSession {
176242 pds_url: pds_url.clone(),
···180246 let session_value = serde_json::to_value(&cached_session)
181247 .map_err(|_e| StatusCode::INTERNAL_SERVER_ERROR)?;
182248 let mut cache_lock = cache.lock().await;
183183- let _ = cache_lock.cache_atproto_session(token, &session_value, 300).await;
249249+ let _ = cache_lock
250250+ .cache_atproto_session(token, &session_value, 300)
251251+ .await;
184252 }
185253186254 Ok((dpop_auth, pds_url))
187187-}255255+}
···11-use base64::{Engine as _, engine::general_purpose};
22-use sqlx::PgPool;
33-44-use crate::errors::DatabaseError;
55-use crate::models::{
66- Actor, CollectionStats, IndexedRecord, OAuthClient, Record, SortField, WhereClause,
77- WhereCondition,
88-};
99-use std::collections::HashMap;
1010-1111-// Helper function to build ORDER BY clause from sortBy array
1212-fn build_order_by_clause(sort_by: Option<&Vec<SortField>>) -> String {
1313- match sort_by {
1414- Some(sort_fields) if !sort_fields.is_empty() => {
1515- let mut order_clauses = Vec::new();
1616- for sort_field in sort_fields {
1717- let field = &sort_field.field;
1818- let direction = match sort_field.direction.to_lowercase().as_str() {
1919- "desc" => "DESC",
2020- _ => "ASC", // Default to ASC
2121- };
2222-2323- // Validate field name to prevent SQL injection
2424- if field
2525- .chars()
2626- .all(|c| c.is_alphanumeric() || c == '_' || c == '.')
2727- {
2828- if field == "indexed_at"
2929- || field == "uri"
3030- || field == "cid"
3131- || field == "did"
3232- || field == "collection"
3333- {
3434- order_clauses.push(format!("{field} {direction}"));
3535- } else {
3636- // For JSON fields, handle nested paths and NULLs properly
3737- if field.contains('.') {
3838- let parts: Vec<&str> = field.split('.').collect();
3939- let mut path = String::from("json");
4040- for (i, part) in parts.iter().enumerate() {
4141- if i == parts.len() - 1 {
4242- path.push_str(&format!("->>'{}'", part));
4343- } else {
4444- path.push_str(&format!("->'{}'", part));
4545- }
4646- }
4747- order_clauses.push(format!("{path} {direction} NULLS LAST"));
4848- } else {
4949- order_clauses.push(format!("json->>'{field}' {direction} NULLS LAST"));
5050- }
5151- }
5252- }
5353- }
5454- if !order_clauses.is_empty() {
5555- // Always add indexed_at as tie-breaker if not already included
5656- let has_indexed_at = order_clauses
5757- .iter()
5858- .any(|clause| clause.contains("indexed_at"));
5959- if !has_indexed_at {
6060- order_clauses.push("indexed_at DESC".to_string());
6161- }
6262- order_clauses.join(", ")
6363- } else {
6464- "indexed_at DESC".to_string()
6565- }
6666- }
6767- _ => "indexed_at DESC".to_string(),
6868- }
6969-}
7070-7171-fn generate_cursor(
7272- sort_value: &str,
7373- indexed_at: chrono::DateTime<chrono::Utc>,
7474- cid: &str,
7575-) -> String {
7676- let cursor_content = format!("{}::{}::{}", sort_value, indexed_at.to_rfc3339(), cid);
7777- general_purpose::URL_SAFE_NO_PAD.encode(cursor_content)
7878-}
7979-8080-// Extract the primary sort field from sortBy array for cursor generation
8181-fn get_primary_sort_field(sort_by: Option<&Vec<SortField>>) -> String {
8282- match sort_by {
8383- Some(sort_fields) if !sort_fields.is_empty() => sort_fields[0].field.clone(),
8484- _ => "indexed_at".to_string(),
8585- }
8686-}
8787-8888-// Generate cursor from record and sortBy array
8989-fn generate_cursor_from_record(record: &Record, sort_by: Option<&Vec<SortField>>) -> String {
9090- let primary_sort_field = get_primary_sort_field(sort_by);
9191-9292- // Extract sort value from the record based on the sort field
9393- let sort_value = match primary_sort_field.as_str() {
9494- "indexed_at" => record.indexed_at.to_rfc3339(),
9595- field => {
9696- // Extract field value from JSON
9797- record
9898- .json
9999- .get(field)
100100- .and_then(|v| match v {
101101- serde_json::Value::String(s) if !s.is_empty() => Some(s.clone()),
102102- serde_json::Value::Number(n) => Some(n.to_string()),
103103- serde_json::Value::Bool(b) => Some(b.to_string()),
104104- serde_json::Value::Null => None, // Explicitly handle null
105105- _ => None,
106106- })
107107- .unwrap_or_else(|| "NULL".to_string()) // Use "NULL" string for null values to match SQL NULLS LAST behavior
108108- }
109109- };
110110-111111- generate_cursor(&sort_value, record.indexed_at, &record.cid)
112112-}
113113-114114-// Helper function to build WHERE conditions from WhereClause
115115-fn build_where_conditions(
116116- where_clause: Option<&WhereClause>,
117117- param_count: &mut usize,
118118-) -> (Vec<String>, Vec<String>) {
119119- let mut where_clauses = Vec::new();
120120- let mut or_clauses = Vec::new();
121121-122122- if let Some(clause) = where_clause {
123123- // Process regular AND conditions
124124- for (field, condition) in &clause.conditions {
125125- let field_clause = build_single_condition(field, condition, param_count);
126126- where_clauses.push(field_clause);
127127- }
128128-129129- // Process OR conditions
130130- if let Some(or_conditions) = &clause.or_conditions {
131131- for (field, condition) in or_conditions {
132132- let field_clause = build_single_condition(field, condition, param_count);
133133- or_clauses.push(field_clause);
134134- }
135135- }
136136- }
137137-138138- (where_clauses, or_clauses)
139139-}
140140-141141-// Helper function to bind parameters from WhereClause
142142-fn bind_where_parameters<'q>(
143143- mut query_builder: sqlx::query::QueryAs<
144144- 'q,
145145- sqlx::Postgres,
146146- Record,
147147- sqlx::postgres::PgArguments,
148148- >,
149149- where_clause: Option<&'q WhereClause>,
150150-) -> sqlx::query::QueryAs<'q, sqlx::Postgres, Record, sqlx::postgres::PgArguments> {
151151- if let Some(clause) = where_clause {
152152- // Bind AND condition parameters
153153- for condition in clause.conditions.values() {
154154- query_builder = bind_single_condition(query_builder, condition);
155155- }
156156-157157- // Bind OR condition parameters
158158- if let Some(or_conditions) = &clause.or_conditions {
159159- for condition in or_conditions.values() {
160160- query_builder = bind_single_condition(query_builder, condition);
161161- }
162162- }
163163- }
164164- query_builder
165165-}
166166-167167-// Helper function to bind parameters for a single condition
168168-fn bind_single_condition<'q>(
169169- mut query_builder: sqlx::query::QueryAs<
170170- 'q,
171171- sqlx::Postgres,
172172- Record,
173173- sqlx::postgres::PgArguments,
174174- >,
175175- condition: &'q WhereCondition,
176176-) -> sqlx::query::QueryAs<'q, sqlx::Postgres, Record, sqlx::postgres::PgArguments> {
177177- if let Some(eq_value) = &condition.eq {
178178- if let Some(str_val) = eq_value.as_str() {
179179- query_builder = query_builder.bind(str_val);
180180- } else {
181181- query_builder = query_builder.bind(eq_value);
182182- }
183183- }
184184-185185- if let Some(in_values) = &condition.in_values {
186186- let str_values: Vec<String> = in_values
187187- .iter()
188188- .filter_map(|v| v.as_str().map(|s| s.to_string()))
189189- .collect();
190190- query_builder = query_builder.bind(str_values);
191191- }
192192-193193- if let Some(contains_value) = &condition.contains {
194194- query_builder = query_builder.bind(contains_value);
195195- }
196196-197197- query_builder
198198-}
199199-200200-// Helper function to build a single condition clause
201201-fn build_single_condition(
202202- field: &str,
203203- condition: &WhereCondition,
204204- param_count: &mut usize,
205205-) -> String {
206206- if let Some(_eq_value) = &condition.eq {
207207- let clause = match field {
208208- "did" | "collection" | "uri" | "cid" => {
209209- format!("{} = ${}", field, param_count)
210210- }
211211- _ => {
212212- let json_path = if field.contains('.') {
213213- let parts: Vec<&str> = field.split('.').collect();
214214- let mut path = String::from("json");
215215- for (i, part) in parts.iter().enumerate() {
216216- if i == parts.len() - 1 {
217217- path.push_str(&format!("->>'{}'", part));
218218- } else {
219219- path.push_str(&format!("->'{}'", part));
220220- }
221221- }
222222- path
223223- } else {
224224- format!("json->>'{}'", field)
225225- };
226226- format!("{} = ${}", json_path, param_count)
227227- }
228228- };
229229- *param_count += 1;
230230- clause
231231- } else if let Some(_in_values) = &condition.in_values {
232232- let clause = match field {
233233- "did" | "collection" | "uri" | "cid" => {
234234- format!("{} = ANY(${})", field, param_count)
235235- }
236236- _ => {
237237- let json_path = if field.contains('.') {
238238- let parts: Vec<&str> = field.split('.').collect();
239239- let mut path = String::from("json");
240240- for (i, part) in parts.iter().enumerate() {
241241- if i == parts.len() - 1 {
242242- path.push_str(&format!("->>'{}'", part));
243243- } else {
244244- path.push_str(&format!("->'{}'", part));
245245- }
246246- }
247247- path
248248- } else {
249249- format!("json->>'{}'", field)
250250- };
251251- format!("{} = ANY(${})", json_path, param_count)
252252- }
253253- };
254254- *param_count += 1;
255255- clause
256256- } else if let Some(_contains_value) = &condition.contains {
257257- let clause = if field == "json" {
258258- format!("json::text ILIKE '%' || ${} || '%'", param_count)
259259- } else {
260260- let json_path = if field.contains('.') {
261261- let parts: Vec<&str> = field.split('.').collect();
262262- let mut path = String::from("json");
263263- for (i, part) in parts.iter().enumerate() {
264264- if i == parts.len() - 1 {
265265- path.push_str(&format!("->>'{}'", part));
266266- } else {
267267- path.push_str(&format!("->'{}'", part));
268268- }
269269- }
270270- path
271271- } else {
272272- format!("json->>'{}'", field)
273273- };
274274- format!("{} ILIKE '%' || ${} || '%'", json_path, param_count)
275275- };
276276- *param_count += 1;
277277- clause
278278- } else {
279279- String::new() // Return empty if no conditions match
280280- }
281281-}
282282-283283-#[derive(Clone)]
284284-pub struct Database {
285285- pool: PgPool,
286286-}
287287-288288-impl Database {
289289- pub fn new(pool: PgPool) -> Self {
290290- Self { pool }
291291- }
292292-293293- pub fn from_pool(pool: PgPool) -> Self {
294294- Self::new(pool)
295295- }
296296-297297- #[allow(dead_code)]
298298- pub async fn insert_record(&self, record: &Record) -> Result<(), DatabaseError> {
299299- sqlx::query!(
300300- r#"INSERT INTO "record" ("uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri")
301301- VALUES ($1, $2, $3, $4, $5, $6, $7)
302302- ON CONFLICT ON CONSTRAINT record_pkey
303303- DO UPDATE SET
304304- "cid" = EXCLUDED."cid",
305305- "json" = EXCLUDED."json",
306306- "indexed_at" = EXCLUDED."indexed_at""#,
307307- record.uri,
308308- record.cid,
309309- record.did,
310310- record.collection,
311311- record.json,
312312- record.indexed_at,
313313- record.slice_uri
314314- )
315315- .execute(&self.pool)
316316- .await?;
317317-318318- Ok(())
319319- }
320320-321321- pub async fn batch_insert_records(&self, records: &[Record]) -> Result<(), DatabaseError> {
322322- if records.is_empty() {
323323- return Ok(());
324324- }
325325-326326- // PostgreSQL has a limit on the number of parameters (65536 by default)
327327- // With 7 fields per record, we can safely batch up to ~9000 records at once
328328- const BATCH_SIZE: usize = 8000;
329329-330330- for chunk in records.chunks(BATCH_SIZE) {
331331- self.batch_insert_records_chunk(chunk).await?;
332332- }
333333-334334- Ok(())
335335- }
336336-337337- async fn batch_insert_records_chunk(&self, records: &[Record]) -> Result<(), DatabaseError> {
338338- let mut tx = self.pool.begin().await?;
339339-340340- // Build bulk INSERT with multiple VALUES
341341- let mut query = String::from(
342342- r#"INSERT INTO "record" ("uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri") VALUES "#,
343343- );
344344-345345- // Add placeholders for each record
346346- for (i, _) in records.iter().enumerate() {
347347- if i > 0 {
348348- query.push_str(", ");
349349- }
350350- let base = i * 7 + 1; // 7 fields per record
351351- query.push_str(&format!(
352352- "(${}, ${}, ${}, ${}, ${}, ${}, ${})",
353353- base,
354354- base + 1,
355355- base + 2,
356356- base + 3,
357357- base + 4,
358358- base + 5,
359359- base + 6
360360- ));
361361- }
362362-363363- query.push_str(
364364- r#"
365365- ON CONFLICT ON CONSTRAINT record_pkey
366366- DO UPDATE SET
367367- "cid" = EXCLUDED."cid",
368368- "json" = EXCLUDED."json",
369369- "indexed_at" = EXCLUDED."indexed_at"
370370- "#,
371371- );
372372-373373- // Bind all parameters
374374- let mut sqlx_query = sqlx::query(&query);
375375- for record in records {
376376- sqlx_query = sqlx_query
377377- .bind(&record.uri)
378378- .bind(&record.cid)
379379- .bind(&record.did)
380380- .bind(&record.collection)
381381- .bind(&record.json)
382382- .bind(record.indexed_at)
383383- .bind(&record.slice_uri);
384384- }
385385-386386- sqlx_query.execute(&mut *tx).await?;
387387- tx.commit().await?;
388388-389389- Ok(())
390390- }
391391-392392- pub async fn get_existing_record_cids_for_slice(
393393- &self,
394394- did: &str,
395395- collection: &str,
396396- slice_uri: &str,
397397- ) -> Result<std::collections::HashMap<String, String>, DatabaseError> {
398398- let records = sqlx::query!(
399399- r#"SELECT "uri", "cid"
400400- FROM "record"
401401- WHERE "did" = $1 AND "collection" = $2 AND "slice_uri" = $3"#,
402402- did,
403403- collection,
404404- slice_uri
405405- )
406406- .fetch_all(&self.pool)
407407- .await?;
408408-409409- let mut cid_map = std::collections::HashMap::new();
410410- for record in records {
411411- cid_map.insert(record.uri, record.cid);
412412- }
413413- Ok(cid_map)
414414- }
415415-416416- pub async fn get_record(&self, uri: &str) -> Result<Option<IndexedRecord>, DatabaseError> {
417417- let record = sqlx::query_as::<_, Record>(
418418- r#"SELECT "uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri"
419419- FROM "record"
420420- WHERE "uri" = $1"#,
421421- )
422422- .bind(uri)
423423- .fetch_optional(&self.pool)
424424- .await?;
425425-426426- let indexed_record = record.map(|record| IndexedRecord {
427427- uri: record.uri,
428428- cid: record.cid,
429429- did: record.did,
430430- collection: record.collection,
431431- value: record.json,
432432- indexed_at: record.indexed_at.to_rfc3339(),
433433- });
434434-435435- Ok(indexed_record)
436436- }
437437-438438- pub async fn get_lexicons_by_slice(
439439- &self,
440440- slice_uri: &str,
441441- ) -> Result<Vec<serde_json::Value>, DatabaseError> {
442442- let records = sqlx::query_as::<_, Record>(
443443- r#"SELECT "uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri"
444444- FROM "record"
445445- WHERE "collection" = 'network.slices.lexicon'
446446- AND "json"->>'slice' = $1
447447- ORDER BY "indexed_at" DESC"#,
448448- )
449449- .bind(slice_uri)
450450- .fetch_all(&self.pool)
451451- .await?;
452452-453453- let lexicon_definitions: Vec<serde_json::Value> = records
454454- .into_iter()
455455- .filter_map(|record| {
456456- let nsid = record.json.get("nsid")?.as_str()?;
457457- let definitions_str = record.json.get("definitions")?.as_str()?;
458458- let definitions: serde_json::Value = serde_json::from_str(definitions_str).ok()?;
459459-460460- Some(serde_json::json!({
461461- "lexicon": 1,
462462- "id": nsid,
463463- "defs": definitions
464464- }))
465465- })
466466- .collect();
467467-468468- Ok(lexicon_definitions)
469469- }
470470-471471- pub async fn update_record(&self, record: &Record) -> Result<(), DatabaseError> {
472472- let result = sqlx::query!(
473473- r#"UPDATE "record"
474474- SET "cid" = $1, "json" = $2, "indexed_at" = $3
475475- WHERE "uri" = $4 AND "slice_uri" = $5"#,
476476- record.cid,
477477- record.json,
478478- record.indexed_at,
479479- record.uri,
480480- record.slice_uri
481481- )
482482- .execute(&self.pool)
483483- .await?;
484484-485485- if result.rows_affected() == 0 {
486486- return Err(DatabaseError::RecordNotFound {
487487- uri: record.uri.clone(),
488488- });
489489- }
490490-491491- Ok(())
492492- }
493493-494494- pub async fn batch_insert_actors(&self, actors: &[Actor]) -> Result<(), DatabaseError> {
495495- if actors.is_empty() {
496496- return Ok(());
497497- }
498498-499499- let mut tx = self.pool.begin().await?;
500500-501501- // Process actors in chunks to avoid hitting parameter limits
502502- const CHUNK_SIZE: usize = 1000;
503503-504504- for chunk in actors.chunks(CHUNK_SIZE) {
505505- for actor in chunk {
506506- sqlx::query!(
507507- r#"INSERT INTO "actor" ("did", "handle", "slice_uri", "indexed_at")
508508- VALUES ($1, $2, $3, $4)
509509- ON CONFLICT ("did", "slice_uri")
510510- DO UPDATE SET
511511- "handle" = EXCLUDED."handle",
512512- "indexed_at" = EXCLUDED."indexed_at""#,
513513- actor.did,
514514- actor.handle,
515515- actor.slice_uri,
516516- actor.indexed_at
517517- )
518518- .execute(&mut *tx)
519519- .await?;
520520- }
521521- }
522522-523523- tx.commit().await?;
524524- Ok(())
525525- }
526526-527527- pub async fn get_slice_collection_stats(
528528- &self,
529529- slice_uri: &str,
530530- ) -> Result<Vec<CollectionStats>, DatabaseError> {
531531- let stats = sqlx::query!(
532532- r#"
533533- WITH slice_collections AS (
534534- SELECT DISTINCT
535535- json->>'nsid' as collection_nsid
536536- FROM record
537537- WHERE collection = 'network.slices.lexicon'
538538- AND json->>'slice' = $1
539539- AND json->>'nsid' IS NOT NULL
540540- AND (json->>'definitions')::jsonb->'main'->>'type' = 'record'
541541- AND (json->>'excludedFromSync' IS NULL OR json->>'excludedFromSync' != 'true')
542542- )
543543- SELECT
544544- r.collection,
545545- COUNT(*) as record_count,
546546- COUNT(DISTINCT r.did) as unique_actors
547547- FROM record r
548548- INNER JOIN slice_collections sc ON r.collection = sc.collection_nsid
549549- WHERE r.slice_uri = $1
550550- GROUP BY r.collection
551551- ORDER BY r.collection
552552- "#,
553553- slice_uri
554554- )
555555- .fetch_all(&self.pool)
556556- .await?;
557557-558558- Ok(stats
559559- .into_iter()
560560- .map(|row| CollectionStats {
561561- collection: row.collection,
562562- record_count: row.record_count.unwrap_or(0),
563563- unique_actors: row.unique_actors.unwrap_or(0),
564564- })
565565- .collect())
566566- }
567567-568568- pub async fn get_slice_collections_list(
569569- &self,
570570- slice_uri: &str,
571571- ) -> Result<Vec<String>, DatabaseError> {
572572- let rows = sqlx::query!(
573573- r#"
574574- SELECT DISTINCT json->>'nsid' as collection_nsid
575575- FROM record
576576- WHERE collection = 'network.slices.lexicon'
577577- AND json->>'slice' = $1
578578- AND json->>'nsid' IS NOT NULL
579579- AND (json->>'definitions')::jsonb->'main'->>'type' = 'record'
580580- AND (json->>'excludedFromSync' IS NULL OR json->>'excludedFromSync' != 'true')
581581- ORDER BY json->>'nsid'
582582- "#,
583583- slice_uri
584584- )
585585- .fetch_all(&self.pool)
586586- .await?;
587587-588588- Ok(rows
589589- .into_iter()
590590- .filter_map(|row| row.collection_nsid)
591591- .collect())
592592- }
593593-594594- pub async fn get_slice_total_records(&self, slice_uri: &str) -> Result<i64, DatabaseError> {
595595- let count = sqlx::query!(
596596- r#"
597597- WITH slice_collections AS (
598598- SELECT DISTINCT
599599- json->>'nsid' as collection_nsid
600600- FROM record
601601- WHERE collection = 'network.slices.lexicon'
602602- AND json->>'slice' = $1
603603- AND json->>'nsid' IS NOT NULL
604604- AND (json->>'definitions')::jsonb->'main'->>'type' = 'record'
605605- AND (json->>'excludedFromSync' IS NULL OR json->>'excludedFromSync' != 'true')
606606- )
607607- SELECT COUNT(*) as count
608608- FROM record r
609609- INNER JOIN slice_collections sc ON r.collection = sc.collection_nsid
610610- WHERE r.slice_uri = $1
611611- "#,
612612- slice_uri
613613- )
614614- .fetch_one(&self.pool)
615615- .await?;
616616-617617- Ok(count.count.unwrap_or(0))
618618- }
619619-620620- pub async fn get_slice_total_actors(&self, slice_uri: &str) -> Result<i64, DatabaseError> {
621621- let count = sqlx::query!(
622622- r#"
623623- SELECT COUNT(*) as count
624624- FROM actor
625625- WHERE slice_uri = $1
626626- "#,
627627- slice_uri
628628- )
629629- .fetch_one(&self.pool)
630630- .await?;
631631-632632- Ok(count.count.unwrap_or(0))
633633- }
634634-635635- pub async fn get_slice_actors(
636636- &self,
637637- slice_uri: &str,
638638- limit: Option<i32>,
639639- cursor: Option<&str>,
640640- where_conditions: Option<&HashMap<String, WhereCondition>>,
641641- ) -> Result<(Vec<Actor>, Option<String>), DatabaseError> {
642642- let limit = limit.unwrap_or(50).min(100); // Cap at 100
643643-644644- // Handle where conditions with specific cases
645645- let records = if let Some(conditions) = where_conditions {
646646- // Check for handle contains filter
647647- if let Some(handle_condition) = conditions.get("handle") {
648648- if let Some(contains_value) = &handle_condition.contains {
649649- let pattern = format!("%{}%", contains_value);
650650- if let Some(cursor_did) = cursor {
651651- sqlx::query_as!(
652652- Actor,
653653- r#"
654654- SELECT did, handle, slice_uri, indexed_at
655655- FROM actor
656656- WHERE slice_uri = $1 AND handle ILIKE $2 AND did > $3
657657- ORDER BY did ASC
658658- LIMIT $4
659659- "#,
660660- slice_uri,
661661- pattern,
662662- cursor_did,
663663- limit as i64
664664- )
665665- .fetch_all(&self.pool)
666666- .await?
667667- } else {
668668- sqlx::query_as!(
669669- Actor,
670670- r#"
671671- SELECT did, handle, slice_uri, indexed_at
672672- FROM actor
673673- WHERE slice_uri = $1 AND handle ILIKE $2
674674- ORDER BY did ASC
675675- LIMIT $3
676676- "#,
677677- slice_uri,
678678- pattern,
679679- limit as i64
680680- )
681681- .fetch_all(&self.pool)
682682- .await?
683683- }
684684- } else if let Some(eq_value) = &handle_condition.eq {
685685- let handle_str = eq_value.as_str().unwrap_or("");
686686- if let Some(cursor_did) = cursor {
687687- sqlx::query_as!(
688688- Actor,
689689- r#"
690690- SELECT did, handle, slice_uri, indexed_at
691691- FROM actor
692692- WHERE slice_uri = $1 AND handle = $2 AND did > $3
693693- ORDER BY did ASC
694694- LIMIT $4
695695- "#,
696696- slice_uri,
697697- handle_str,
698698- cursor_did,
699699- limit as i64
700700- )
701701- .fetch_all(&self.pool)
702702- .await?
703703- } else {
704704- sqlx::query_as!(
705705- Actor,
706706- r#"
707707- SELECT did, handle, slice_uri, indexed_at
708708- FROM actor
709709- WHERE slice_uri = $1 AND handle = $2
710710- ORDER BY did ASC
711711- LIMIT $3
712712- "#,
713713- slice_uri,
714714- handle_str,
715715- limit as i64
716716- )
717717- .fetch_all(&self.pool)
718718- .await?
719719- }
720720- } else {
721721- // Default case with basic filtering
722722- self.query_actors_with_cursor(slice_uri, cursor, limit)
723723- .await?
724724- }
725725- } else if let Some(did_condition) = conditions.get("did") {
726726- if let Some(in_values) = &did_condition.in_values {
727727- let string_values: Vec<String> = in_values
728728- .iter()
729729- .filter_map(|v| v.as_str())
730730- .map(|s| s.to_string())
731731- .collect();
732732-733733- sqlx::query_as!(
734734- Actor,
735735- r#"
736736- SELECT did, handle, slice_uri, indexed_at
737737- FROM actor
738738- WHERE slice_uri = $1 AND did = ANY($2)
739739- ORDER BY did ASC
740740- LIMIT $3
741741- "#,
742742- slice_uri,
743743- &string_values,
744744- limit as i64
745745- )
746746- .fetch_all(&self.pool)
747747- .await?
748748- } else if let Some(eq_value) = &did_condition.eq {
749749- let did_str = eq_value.as_str().unwrap_or("");
750750- if let Some(cursor_did) = cursor {
751751- sqlx::query_as!(
752752- Actor,
753753- r#"
754754- SELECT did, handle, slice_uri, indexed_at
755755- FROM actor
756756- WHERE slice_uri = $1 AND did = $2 AND did > $3
757757- ORDER BY did ASC
758758- LIMIT $4
759759- "#,
760760- slice_uri,
761761- did_str,
762762- cursor_did,
763763- limit as i64
764764- )
765765- .fetch_all(&self.pool)
766766- .await?
767767- } else {
768768- sqlx::query_as!(
769769- Actor,
770770- r#"
771771- SELECT did, handle, slice_uri, indexed_at
772772- FROM actor
773773- WHERE slice_uri = $1 AND did = $2
774774- ORDER BY did ASC
775775- LIMIT $3
776776- "#,
777777- slice_uri,
778778- did_str,
779779- limit as i64
780780- )
781781- .fetch_all(&self.pool)
782782- .await?
783783- }
784784- } else {
785785- // Default case with basic filtering
786786- self.query_actors_with_cursor(slice_uri, cursor, limit)
787787- .await?
788788- }
789789- } else {
790790- // Default case with basic filtering
791791- self.query_actors_with_cursor(slice_uri, cursor, limit)
792792- .await?
793793- }
794794- } else {
795795- // No where conditions, just basic slice + cursor filtering
796796- self.query_actors_with_cursor(slice_uri, cursor, limit)
797797- .await?
798798- };
799799-800800- // Generate cursor from the last record if there are any records
801801- let cursor = if records.is_empty() {
802802- None
803803- } else {
804804- records.last().map(|actor| actor.did.clone())
805805- };
806806-807807- Ok((records, cursor))
808808- }
809809-810810- async fn query_actors_with_cursor(
811811- &self,
812812- slice_uri: &str,
813813- cursor: Option<&str>,
814814- limit: i32,
815815- ) -> Result<Vec<Actor>, DatabaseError> {
816816- match cursor {
817817- Some(cursor_did) => sqlx::query_as!(
818818- Actor,
819819- r#"
820820- SELECT did, handle, slice_uri, indexed_at
821821- FROM actor
822822- WHERE slice_uri = $1 AND did > $2
823823- ORDER BY did ASC
824824- LIMIT $3
825825- "#,
826826- slice_uri,
827827- cursor_did,
828828- limit as i64
829829- )
830830- .fetch_all(&self.pool)
831831- .await
832832- .map_err(DatabaseError::from),
833833- None => sqlx::query_as!(
834834- Actor,
835835- r#"
836836- SELECT did, handle, slice_uri, indexed_at
837837- FROM actor
838838- WHERE slice_uri = $1
839839- ORDER BY did ASC
840840- LIMIT $2
841841- "#,
842842- slice_uri,
843843- limit as i64
844844- )
845845- .fetch_all(&self.pool)
846846- .await
847847- .map_err(DatabaseError::from),
848848- }
849849- }
850850-851851- pub async fn get_slice_lexicon_count(&self, slice_uri: &str) -> Result<i64, DatabaseError> {
852852- let count = sqlx::query!(
853853- r#"
854854- SELECT COUNT(*) as count
855855- FROM record
856856- WHERE collection = 'network.slices.lexicon'
857857- AND json->>'slice' = $1
858858- AND (json->>'definitions')::jsonb->'main'->>'type' = 'record'
859859- "#,
860860- slice_uri
861861- )
862862- .fetch_one(&self.pool)
863863- .await?;
864864-865865- Ok(count.count.unwrap_or(0))
866866- }
867867-868868- pub async fn get_slice_collections_records(
869869- &self,
870870- slice_uri: &str,
871871- limit: Option<i32>,
872872- cursor: Option<&str>,
873873- sort_by: Option<&Vec<SortField>>,
874874- where_clause: Option<&WhereClause>,
875875- ) -> Result<(Vec<Record>, Option<String>), DatabaseError> {
876876- let limit = limit.unwrap_or(50).min(100); // Cap at 100
877877- let order_by = build_order_by_clause(sort_by);
878878-879879- // Build WHERE clause dynamically
880880- let mut where_clauses = Vec::new();
881881- let mut param_count = 1;
882882-883883- // Always filter by slice_uri, except for network.slices.lexicon which uses json->>'slice'
884884- let is_lexicon = where_clause
885885- .as_ref()
886886- .and_then(|wc| wc.conditions.get("collection"))
887887- .and_then(|c| c.eq.as_ref())
888888- .and_then(|v| v.as_str()) == Some("network.slices.lexicon");
889889-890890- if is_lexicon {
891891- where_clauses.push(format!("json->>'slice' = ${}", param_count));
892892- } else {
893893- where_clauses.push(format!("slice_uri = ${}", param_count));
894894- }
895895- param_count += 1;
896896-897897- // Add cursor condition if present
898898- if cursor.is_some() {
899899- where_clauses.push(format!("indexed_at < ${}", param_count));
900900- param_count += 1;
901901- }
902902-903903- // Use helper function to build where conditions
904904- let (and_conditions, or_conditions) =
905905- build_where_conditions(where_clause, &mut param_count);
906906- where_clauses.extend(and_conditions);
907907-908908- // Add OR conditions with proper parentheses if present
909909- if !or_conditions.is_empty() {
910910- let or_clause = format!("({})", or_conditions.join(" OR "));
911911- where_clauses.push(or_clause);
912912- }
913913-914914- // Build the final query
915915- let where_sql = where_clauses.join(" AND ");
916916- let query = format!(
917917- "SELECT uri, cid, did, collection, json, indexed_at, slice_uri
918918- FROM record
919919- WHERE {}
920920- ORDER BY {}
921921- LIMIT ${}",
922922- where_sql, order_by, param_count
923923- );
924924-925925- // Build query and bind parameters
926926- let mut query_builder = sqlx::query_as::<_, Record>(&query);
927927-928928- // Bind slice_uri
929929- query_builder = query_builder.bind(slice_uri);
930930-931931- // Bind cursor if present
932932- if let Some(cursor_time) = cursor {
933933- let cursor_dt = cursor_time
934934- .parse::<chrono::DateTime<chrono::Utc>>()
935935- .unwrap_or_else(|_| chrono::Utc::now());
936936- query_builder = query_builder.bind(cursor_dt);
937937- }
938938-939939- // Bind where condition values
940940- query_builder = bind_where_parameters(query_builder, where_clause);
941941-942942- // Bind limit
943943- query_builder = query_builder.bind(limit as i64);
944944-945945- // Execute query
946946- let records = query_builder.fetch_all(&self.pool).await?;
947947-948948- // Generate cursor from the last record
949949- let cursor = if records.is_empty() {
950950- None
951951- } else {
952952- records
953953- .last()
954954- .map(|record| generate_cursor_from_record(record, sort_by))
955955- };
956956-957957- Ok((records, cursor))
958958- }
959959-960960- pub async fn count_slice_collections_records(
961961- &self,
962962- slice_uri: &str,
963963- where_clause: Option<&WhereClause>,
964964- ) -> Result<i64, DatabaseError> {
965965- // Build WHERE clause dynamically
966966- let mut where_clauses = Vec::new();
967967- let mut param_count = 1;
968968-969969- // Always filter by slice_uri, except for network.slices.lexicon which uses json->>'slice'
970970- let is_lexicon = where_clause
971971- .as_ref()
972972- .and_then(|wc| wc.conditions.get("collection"))
973973- .and_then(|c| c.eq.as_ref())
974974- .and_then(|v| v.as_str()) == Some("network.slices.lexicon");
975975-976976- if is_lexicon {
977977- where_clauses.push(format!("json->>'slice' = ${}", param_count));
978978- } else {
979979- where_clauses.push(format!("slice_uri = ${}", param_count));
980980- }
981981- param_count += 1;
982982-983983- // Use helper function to build where conditions
984984- let (and_conditions, or_conditions) =
985985- build_where_conditions(where_clause, &mut param_count);
986986- where_clauses.extend(and_conditions);
987987-988988- // Add OR conditions with proper parentheses if present
989989- if !or_conditions.is_empty() {
990990- let or_clause = format!("({})", or_conditions.join(" OR "));
991991- where_clauses.push(or_clause);
992992- }
993993-994994- // Build the final query
995995- let where_sql = if where_clauses.is_empty() {
996996- String::new()
997997- } else {
998998- format!(" WHERE {}", where_clauses.join(" AND "))
999999- };
10001000-10011001- let query = format!("SELECT COUNT(*) as count FROM record{}", where_sql);
10021002-10031003- // Execute query with parameters
10041004- let mut query_builder = sqlx::query_scalar::<_, i64>(&query);
10051005- query_builder = query_builder.bind(slice_uri);
10061006-10071007- // Bind where condition values using helper
10081008- if let Some(clause) = where_clause {
10091009- // Bind AND condition parameters
10101010- for condition in clause.conditions.values() {
10111011- if let Some(eq_value) = &condition.eq {
10121012- if let Some(str_val) = eq_value.as_str() {
10131013- query_builder = query_builder.bind(str_val);
10141014- } else {
10151015- query_builder = query_builder.bind(eq_value);
10161016- }
10171017- }
10181018- if let Some(in_values) = &condition.in_values {
10191019- let str_values: Vec<String> = in_values
10201020- .iter()
10211021- .filter_map(|v| v.as_str().map(|s| s.to_string()))
10221022- .collect();
10231023- query_builder = query_builder.bind(str_values);
10241024- }
10251025- if let Some(contains_value) = &condition.contains {
10261026- query_builder = query_builder.bind(contains_value);
10271027- }
10281028- }
10291029-10301030- // Bind OR condition parameters
10311031- if let Some(or_conditions) = &clause.or_conditions {
10321032- for condition in or_conditions.values() {
10331033- if let Some(eq_value) = &condition.eq {
10341034- if let Some(str_val) = eq_value.as_str() {
10351035- query_builder = query_builder.bind(str_val);
10361036- } else {
10371037- query_builder = query_builder.bind(eq_value);
10381038- }
10391039- }
10401040- if let Some(in_values) = &condition.in_values {
10411041- let str_values: Vec<String> = in_values
10421042- .iter()
10431043- .filter_map(|v| v.as_str().map(|s| s.to_string()))
10441044- .collect();
10451045- query_builder = query_builder.bind(str_values);
10461046- }
10471047- if let Some(contains_value) = &condition.contains {
10481048- query_builder = query_builder.bind(contains_value);
10491049- }
10501050- }
10511051- }
10521052- }
10531053-10541054- let count = query_builder.fetch_one(&self.pool).await?;
10551055- Ok(count)
10561056- }
10571057-10581058- pub async fn delete_record_by_uri(
10591059- &self,
10601060- uri: &str,
10611061- slice_uri: Option<&str>,
10621062- ) -> Result<u64, DatabaseError> {
10631063- let result = if let Some(slice_uri) = slice_uri {
10641064- sqlx::query("DELETE FROM record WHERE uri = $1 AND slice_uri = $2")
10651065- .bind(uri)
10661066- .bind(slice_uri)
10671067- .execute(&self.pool)
10681068- .await?
10691069- } else {
10701070- // Delete from all slices if no specific slice provided
10711071- sqlx::query("DELETE FROM record WHERE uri = $1")
10721072- .bind(uri)
10731073- .execute(&self.pool)
10741074- .await?
10751075- };
10761076- Ok(result.rows_affected())
10771077- }
10781078-10791079- pub async fn upsert_record(&self, record: &Record) -> Result<bool, DatabaseError> {
10801080- // Returns true if inserted, false if updated
10811081- let result = sqlx::query_scalar::<_, bool>(
10821082- r#"
10831083- INSERT INTO record (uri, cid, did, collection, json, indexed_at, slice_uri)
10841084- VALUES ($1, $2, $3, $4, $5, $6, $7)
10851085- ON CONFLICT ON CONSTRAINT record_pkey DO UPDATE
10861086- SET cid = EXCLUDED.cid,
10871087- json = EXCLUDED.json,
10881088- indexed_at = EXCLUDED.indexed_at
10891089- RETURNING (xmax = 0)
10901090- "#,
10911091- )
10921092- .bind(&record.uri)
10931093- .bind(&record.cid)
10941094- .bind(&record.did)
10951095- .bind(&record.collection)
10961096- .bind(&record.json)
10971097- .bind(record.indexed_at)
10981098- .bind(&record.slice_uri)
10991099- .fetch_one(&self.pool)
11001100- .await?;
11011101- Ok(result)
11021102- }
11031103-11041104- pub async fn get_all_slices(&self) -> Result<Vec<String>, DatabaseError> {
11051105- let rows: Vec<(String,)> = sqlx::query_as(
11061106- r#"
11071107- SELECT DISTINCT json->>'slice' as slice_uri
11081108- FROM record
11091109- WHERE collection = 'network.slices.lexicon'
11101110- AND json->>'slice' IS NOT NULL
11111111- "#,
11121112- )
11131113- .fetch_all(&self.pool)
11141114- .await?;
11151115-11161116- Ok(rows.into_iter().map(|(uri,)| uri).collect())
11171117- }
11181118-11191119- pub async fn get_all_actors(&self) -> Result<Vec<(String, String)>, DatabaseError> {
11201120- let rows = sqlx::query!(
11211121- r#"
11221122- SELECT did, slice_uri
11231123- FROM actor
11241124- "#
11251125- )
11261126- .fetch_all(&self.pool)
11271127- .await?;
11281128-11291129- Ok(rows
11301130- .into_iter()
11311131- .map(|row| (row.did, row.slice_uri))
11321132- .collect())
11331133- }
11341134-11351135- pub async fn actor_has_records(&self, did: &str, slice_uri: &str) -> Result<bool, DatabaseError> {
11361136- let count = sqlx::query!(
11371137- r#"
11381138- SELECT COUNT(*) as count
11391139- FROM record
11401140- WHERE did = $1 AND slice_uri = $2
11411141- "#,
11421142- did,
11431143- slice_uri
11441144- )
11451145- .fetch_one(&self.pool)
11461146- .await?;
11471147- Ok(count.count.unwrap_or(0) > 0)
11481148- }
11491149-11501150- pub async fn delete_actor(&self, did: &str, slice_uri: &str) -> Result<u64, DatabaseError> {
11511151- let result = sqlx::query!(
11521152- r#"
11531153- DELETE FROM actor
11541154- WHERE did = $1 AND slice_uri = $2
11551155- "#,
11561156- did,
11571157- slice_uri
11581158- )
11591159- .execute(&self.pool)
11601160- .await?;
11611161- Ok(result.rows_affected())
11621162- }
11631163-11641164- pub async fn get_slice_domain(&self, slice_uri: &str) -> Result<Option<String>, DatabaseError> {
11651165- let row = sqlx::query!(
11661166- r#"
11671167- SELECT json->>'domain' as domain
11681168- FROM record
11691169- WHERE collection = 'network.slices.slice'
11701170- AND uri = $1
11711171- "#,
11721172- slice_uri
11731173- )
11741174- .fetch_optional(&self.pool)
11751175- .await?;
11761176-11771177- Ok(row.and_then(|r| r.domain))
11781178- }
11791179-11801180- pub async fn create_oauth_client(
11811181- &self,
11821182- slice_uri: &str,
11831183- client_id: &str,
11841184- registration_access_token: Option<&str>,
11851185- created_by_did: &str,
11861186- ) -> Result<OAuthClient, DatabaseError> {
11871187- let client = sqlx::query_as!(
11881188- OAuthClient,
11891189- r#"
11901190- INSERT INTO oauth_clients (slice_uri, client_id, registration_access_token, created_by_did)
11911191- VALUES ($1, $2, $3, $4)
11921192- RETURNING id, slice_uri, client_id, registration_access_token, created_at as "created_at!", created_by_did
11931193- "#,
11941194- slice_uri,
11951195- client_id,
11961196- registration_access_token,
11971197- created_by_did
11981198- )
11991199- .fetch_one(&self.pool)
12001200- .await?;
12011201-12021202- Ok(client)
12031203- }
12041204-12051205- pub async fn get_oauth_clients_for_slice(
12061206- &self,
12071207- slice_uri: &str,
12081208- ) -> Result<Vec<OAuthClient>, DatabaseError> {
12091209- let clients = sqlx::query_as!(
12101210- OAuthClient,
12111211- r#"
12121212- SELECT id, slice_uri, client_id, registration_access_token, created_at as "created_at!", created_by_did
12131213- FROM oauth_clients
12141214- WHERE slice_uri = $1
12151215- ORDER BY created_at DESC
12161216- "#,
12171217- slice_uri
12181218- )
12191219- .fetch_all(&self.pool)
12201220- .await?;
12211221-12221222- Ok(clients)
12231223- }
12241224-12251225- pub async fn get_oauth_client_by_id(
12261226- &self,
12271227- client_id: &str,
12281228- ) -> Result<Option<OAuthClient>, DatabaseError> {
12291229- let client = sqlx::query_as!(
12301230- OAuthClient,
12311231- r#"
12321232- SELECT id, slice_uri, client_id, registration_access_token, created_at as "created_at!", created_by_did
12331233- FROM oauth_clients
12341234- WHERE client_id = $1
12351235- "#,
12361236- client_id
12371237- )
12381238- .fetch_optional(&self.pool)
12391239- .await?;
12401240-12411241- Ok(client)
12421242- }
12431243-12441244- pub async fn delete_oauth_client(&self, client_id: &str) -> Result<(), DatabaseError> {
12451245- let result = sqlx::query!(
12461246- r#"
12471247- DELETE FROM oauth_clients
12481248- WHERE client_id = $1
12491249- "#,
12501250- client_id
12511251- )
12521252- .execute(&self.pool)
12531253- .await?;
12541254-12551255- if result.rows_affected() == 0 {
12561256- return Err(DatabaseError::RecordNotFound {
12571257- uri: client_id.to_string(),
12581258- });
12591259- }
12601260-12611261- Ok(())
12621262- }
12631263-12641264-12651265- pub async fn get_batch_sparkline_data(
12661266- &self,
12671267- slice_uris: &[String],
12681268- interval: &str,
12691269- duration_hours: i32,
12701270- ) -> Result<std::collections::HashMap<String, Vec<crate::models::SparklinePoint>>, DatabaseError> {
12711271- use chrono::{Duration, Utc};
12721272- let cutoff_time = Utc::now() - Duration::hours(duration_hours as i64);
12731273-12741274- let mut sparklines = std::collections::HashMap::new();
12751275-12761276- for slice_uri in slice_uris {
12771277- // Validate interval to prevent SQL injection
12781278- let interval_validated = match interval {
12791279- "minute" => "minute",
12801280- "day" => "day",
12811281- _ => "hour",
12821282- };
12831283-12841284- let query = format!(
12851285- r#"
12861286- SELECT
12871287- date_trunc('{}', indexed_at) as bucket,
12881288- COUNT(*) as count
12891289- FROM record
12901290- WHERE indexed_at >= $1
12911291- AND slice_uri = $2
12921292- GROUP BY bucket
12931293- ORDER BY bucket
12941294- "#,
12951295- interval_validated
12961296- );
12971297-12981298- let rows = sqlx::query_as::<_, (Option<chrono::DateTime<chrono::Utc>>, Option<i64>)>(&query)
12991299- .bind(cutoff_time)
13001300- .bind(slice_uri)
13011301- .fetch_all(&self.pool)
13021302- .await?;
13031303-13041304- let data_points = rows
13051305- .into_iter()
13061306- .map(|(bucket, count)| crate::models::SparklinePoint {
13071307- timestamp: bucket.unwrap().to_rfc3339(),
13081308- count: count.unwrap_or(0),
13091309- })
13101310- .collect();
13111311-13121312- sparklines.insert(slice_uri.clone(), data_points);
13131313- }
13141314-13151315- Ok(sparklines)
13161316- }
13171317-}
+326
api/src/database/actors.rs
···11+//! Actor management operations.
22+//!
33+//! This module handles database operations for ATProto actors (users/DIDs)
44+//! tracked within slices, including batch insertion, querying, and filtering.
55+66+use super::client::Database;
77+use super::types::WhereCondition;
88+use crate::errors::DatabaseError;
99+use crate::models::Actor;
1010+use std::collections::HashMap;
1111+1212+impl Database {
1313+ /// Inserts multiple actors in batches with conflict resolution.
1414+ ///
1515+ /// Updates handle and indexed_at if an actor already exists for the
1616+ /// (did, slice_uri) pair.
1717+ pub async fn batch_insert_actors(&self, actors: &[Actor]) -> Result<(), DatabaseError> {
1818+ if actors.is_empty() {
1919+ return Ok(());
2020+ }
2121+2222+ let mut tx = self.pool.begin().await?;
2323+2424+ const CHUNK_SIZE: usize = 1000;
2525+2626+ for chunk in actors.chunks(CHUNK_SIZE) {
2727+ for actor in chunk {
2828+ sqlx::query!(
2929+ r#"INSERT INTO "actor" ("did", "handle", "slice_uri", "indexed_at")
3030+ VALUES ($1, $2, $3, $4)
3131+ ON CONFLICT ("did", "slice_uri")
3232+ DO UPDATE SET
3333+ "handle" = EXCLUDED."handle",
3434+ "indexed_at" = EXCLUDED."indexed_at""#,
3535+ actor.did,
3636+ actor.handle,
3737+ actor.slice_uri,
3838+ actor.indexed_at
3939+ )
4040+ .execute(&mut *tx)
4141+ .await?;
4242+ }
4343+ }
4444+4545+ tx.commit().await?;
4646+ Ok(())
4747+ }
4848+4949+ /// Queries actors for a slice with optional filtering and cursor-based pagination.
5050+ ///
5151+ /// Supports filtering by:
5252+ /// - handle (exact match or contains)
5353+ /// - did (exact match or IN clause)
5454+ ///
5555+ /// # Returns
5656+ /// Tuple of (actors, next_cursor) where cursor is the last DID
5757+ pub async fn get_slice_actors(
5858+ &self,
5959+ slice_uri: &str,
6060+ limit: Option<i32>,
6161+ cursor: Option<&str>,
6262+ where_conditions: Option<&HashMap<String, WhereCondition>>,
6363+ ) -> Result<(Vec<Actor>, Option<String>), DatabaseError> {
6464+ let limit = limit.unwrap_or(50).min(100);
6565+6666+ let records = if let Some(conditions) = where_conditions {
6767+ if let Some(handle_condition) = conditions.get("handle") {
6868+ if let Some(contains_value) = &handle_condition.contains {
6969+ let pattern = format!("%{}%", contains_value);
7070+ if let Some(cursor_did) = cursor {
7171+ sqlx::query_as!(
7272+ Actor,
7373+ r#"
7474+ SELECT did, handle, slice_uri, indexed_at
7575+ FROM actor
7676+ WHERE slice_uri = $1 AND handle ILIKE $2 AND did > $3
7777+ ORDER BY did ASC
7878+ LIMIT $4
7979+ "#,
8080+ slice_uri,
8181+ pattern,
8282+ cursor_did,
8383+ limit as i64
8484+ )
8585+ .fetch_all(&self.pool)
8686+ .await?
8787+ } else {
8888+ sqlx::query_as!(
8989+ Actor,
9090+ r#"
9191+ SELECT did, handle, slice_uri, indexed_at
9292+ FROM actor
9393+ WHERE slice_uri = $1 AND handle ILIKE $2
9494+ ORDER BY did ASC
9595+ LIMIT $3
9696+ "#,
9797+ slice_uri,
9898+ pattern,
9999+ limit as i64
100100+ )
101101+ .fetch_all(&self.pool)
102102+ .await?
103103+ }
104104+ } else if let Some(eq_value) = &handle_condition.eq {
105105+ let handle_str = eq_value.as_str().unwrap_or("");
106106+ if let Some(cursor_did) = cursor {
107107+ sqlx::query_as!(
108108+ Actor,
109109+ r#"
110110+ SELECT did, handle, slice_uri, indexed_at
111111+ FROM actor
112112+ WHERE slice_uri = $1 AND handle = $2 AND did > $3
113113+ ORDER BY did ASC
114114+ LIMIT $4
115115+ "#,
116116+ slice_uri,
117117+ handle_str,
118118+ cursor_did,
119119+ limit as i64
120120+ )
121121+ .fetch_all(&self.pool)
122122+ .await?
123123+ } else {
124124+ sqlx::query_as!(
125125+ Actor,
126126+ r#"
127127+ SELECT did, handle, slice_uri, indexed_at
128128+ FROM actor
129129+ WHERE slice_uri = $1 AND handle = $2
130130+ ORDER BY did ASC
131131+ LIMIT $3
132132+ "#,
133133+ slice_uri,
134134+ handle_str,
135135+ limit as i64
136136+ )
137137+ .fetch_all(&self.pool)
138138+ .await?
139139+ }
140140+ } else {
141141+ self.query_actors_with_cursor(slice_uri, cursor, limit)
142142+ .await?
143143+ }
144144+ } else if let Some(did_condition) = conditions.get("did") {
145145+ if let Some(in_values) = &did_condition.in_values {
146146+ let string_values: Vec<String> = in_values
147147+ .iter()
148148+ .filter_map(|v| v.as_str())
149149+ .map(|s| s.to_string())
150150+ .collect();
151151+152152+ sqlx::query_as!(
153153+ Actor,
154154+ r#"
155155+ SELECT did, handle, slice_uri, indexed_at
156156+ FROM actor
157157+ WHERE slice_uri = $1 AND did = ANY($2)
158158+ ORDER BY did ASC
159159+ LIMIT $3
160160+ "#,
161161+ slice_uri,
162162+ &string_values,
163163+ limit as i64
164164+ )
165165+ .fetch_all(&self.pool)
166166+ .await?
167167+ } else if let Some(eq_value) = &did_condition.eq {
168168+ let did_str = eq_value.as_str().unwrap_or("");
169169+ if let Some(cursor_did) = cursor {
170170+ sqlx::query_as!(
171171+ Actor,
172172+ r#"
173173+ SELECT did, handle, slice_uri, indexed_at
174174+ FROM actor
175175+ WHERE slice_uri = $1 AND did = $2 AND did > $3
176176+ ORDER BY did ASC
177177+ LIMIT $4
178178+ "#,
179179+ slice_uri,
180180+ did_str,
181181+ cursor_did,
182182+ limit as i64
183183+ )
184184+ .fetch_all(&self.pool)
185185+ .await?
186186+ } else {
187187+ sqlx::query_as!(
188188+ Actor,
189189+ r#"
190190+ SELECT did, handle, slice_uri, indexed_at
191191+ FROM actor
192192+ WHERE slice_uri = $1 AND did = $2
193193+ ORDER BY did ASC
194194+ LIMIT $3
195195+ "#,
196196+ slice_uri,
197197+ did_str,
198198+ limit as i64
199199+ )
200200+ .fetch_all(&self.pool)
201201+ .await?
202202+ }
203203+ } else {
204204+ self.query_actors_with_cursor(slice_uri, cursor, limit)
205205+ .await?
206206+ }
207207+ } else {
208208+ self.query_actors_with_cursor(slice_uri, cursor, limit)
209209+ .await?
210210+ }
211211+ } else {
212212+ self.query_actors_with_cursor(slice_uri, cursor, limit)
213213+ .await?
214214+ };
215215+216216+ let cursor = if records.is_empty() {
217217+ None
218218+ } else {
219219+ records.last().map(|actor| actor.did.clone())
220220+ };
221221+222222+ Ok((records, cursor))
223223+ }
224224+225225+ /// Internal helper for basic actor queries with cursor pagination.
226226+ async fn query_actors_with_cursor(
227227+ &self,
228228+ slice_uri: &str,
229229+ cursor: Option<&str>,
230230+ limit: i32,
231231+ ) -> Result<Vec<Actor>, DatabaseError> {
232232+ match cursor {
233233+ Some(cursor_did) => sqlx::query_as!(
234234+ Actor,
235235+ r#"
236236+ SELECT did, handle, slice_uri, indexed_at
237237+ FROM actor
238238+ WHERE slice_uri = $1 AND did > $2
239239+ ORDER BY did ASC
240240+ LIMIT $3
241241+ "#,
242242+ slice_uri,
243243+ cursor_did,
244244+ limit as i64
245245+ )
246246+ .fetch_all(&self.pool)
247247+ .await
248248+ .map_err(DatabaseError::from),
249249+ None => sqlx::query_as!(
250250+ Actor,
251251+ r#"
252252+ SELECT did, handle, slice_uri, indexed_at
253253+ FROM actor
254254+ WHERE slice_uri = $1
255255+ ORDER BY did ASC
256256+ LIMIT $2
257257+ "#,
258258+ slice_uri,
259259+ limit as i64
260260+ )
261261+ .fetch_all(&self.pool)
262262+ .await
263263+ .map_err(DatabaseError::from),
264264+ }
265265+ }
266266+267267+ /// Gets all actors across all slices.
268268+ ///
269269+ /// # Returns
270270+ /// Vector of (did, slice_uri) tuples
271271+ pub async fn get_all_actors(&self) -> Result<Vec<(String, String)>, DatabaseError> {
272272+ let rows = sqlx::query!(
273273+ r#"
274274+ SELECT did, slice_uri
275275+ FROM actor
276276+ "#
277277+ )
278278+ .fetch_all(&self.pool)
279279+ .await?;
280280+281281+ Ok(rows
282282+ .into_iter()
283283+ .map(|row| (row.did, row.slice_uri))
284284+ .collect())
285285+ }
286286+287287+ /// Checks if an actor has any records in a slice.
288288+ ///
289289+ /// Used before actor deletion to maintain referential integrity.
290290+ pub async fn actor_has_records(
291291+ &self,
292292+ did: &str,
293293+ slice_uri: &str,
294294+ ) -> Result<bool, DatabaseError> {
295295+ let count = sqlx::query!(
296296+ r#"
297297+ SELECT COUNT(*) as count
298298+ FROM record
299299+ WHERE did = $1 AND slice_uri = $2
300300+ "#,
301301+ did,
302302+ slice_uri
303303+ )
304304+ .fetch_one(&self.pool)
305305+ .await?;
306306+ Ok(count.count.unwrap_or(0) > 0)
307307+ }
308308+309309+ /// Deletes an actor from a specific slice.
310310+ ///
311311+ /// # Returns
312312+ /// Number of rows affected
313313+ pub async fn delete_actor(&self, did: &str, slice_uri: &str) -> Result<u64, DatabaseError> {
314314+ let result = sqlx::query!(
315315+ r#"
316316+ DELETE FROM actor
317317+ WHERE did = $1 AND slice_uri = $2
318318+ "#,
319319+ did,
320320+ slice_uri
321321+ )
322322+ .execute(&self.pool)
323323+ .await?;
324324+ Ok(result.rows_affected())
325325+ }
326326+}
+75
api/src/database/analytics.rs
···11+//! Analytics and time-series data queries.
22+//!
33+//! This module handles database operations for generating analytics data,
44+//! including sparkline time-series data for record indexing activity.
55+66+use super::client::Database;
77+use crate::errors::DatabaseError;
88+use crate::models::SparklinePoint;
99+use std::collections::HashMap;
1010+1111+impl Database {
1212+ /// Gets sparkline data for multiple slices in a single query batch.
1313+ ///
1414+ /// Generates time-bucketed counts of indexed records for visualization.
1515+ ///
1616+ /// # Arguments
1717+ /// * `slice_uris` - Array of slice URIs to get data for
1818+ /// * `interval` - Time bucket size: "minute", "hour", or "day"
1919+ /// * `duration_hours` - How many hours of history to include
2020+ ///
2121+ /// # Returns
2222+ /// HashMap mapping slice_uri -> array of (timestamp, count) data points
2323+ pub async fn get_batch_sparkline_data(
2424+ &self,
2525+ slice_uris: &[String],
2626+ interval: &str,
2727+ duration_hours: i32,
2828+ ) -> Result<HashMap<String, Vec<SparklinePoint>>, DatabaseError> {
2929+ use chrono::{Duration, Utc};
3030+ let cutoff_time = Utc::now() - Duration::hours(duration_hours as i64);
3131+3232+ let mut sparklines = HashMap::new();
3333+3434+ for slice_uri in slice_uris {
3535+ let interval_validated = match interval {
3636+ "minute" => "minute",
3737+ "day" => "day",
3838+ _ => "hour",
3939+ };
4040+4141+ let query = format!(
4242+ r#"
4343+ SELECT
4444+ date_trunc('{}', indexed_at) as bucket,
4545+ COUNT(*) as count
4646+ FROM record
4747+ WHERE indexed_at >= $1
4848+ AND slice_uri = $2
4949+ GROUP BY bucket
5050+ ORDER BY bucket
5151+ "#,
5252+ interval_validated
5353+ );
5454+5555+ let rows =
5656+ sqlx::query_as::<_, (Option<chrono::DateTime<chrono::Utc>>, Option<i64>)>(&query)
5757+ .bind(cutoff_time)
5858+ .bind(slice_uri)
5959+ .fetch_all(&self.pool)
6060+ .await?;
6161+6262+ let data_points = rows
6363+ .into_iter()
6464+ .map(|(bucket, count)| SparklinePoint {
6565+ timestamp: bucket.unwrap().to_rfc3339(),
6666+ count: count.unwrap_or(0),
6767+ })
6868+ .collect();
6969+7070+ sparklines.insert(slice_uri.clone(), data_points);
7171+ }
7272+7373+ Ok(sparklines)
7474+ }
7575+}
+23
api/src/database/client.rs
···11+use sqlx::PgPool;
22+33+/// Core database client for interacting with PostgreSQL.
44+///
55+/// The Database struct wraps a connection pool and provides methods for
66+/// all database operations across records, actors, slices, OAuth, and analytics.
77+#[derive(Clone)]
88+pub struct Database {
99+ pub(super) pool: PgPool,
1010+}
1111+1212+impl Database {
1313+ /// Creates a new Database instance from a connection pool.
1414+ pub fn new(pool: PgPool) -> Self {
1515+ Self { pool }
1616+ }
1717+1818+ /// Creates a new Database instance from a connection pool.
1919+ /// Alias for `new()` for clarity in some contexts.
2020+ pub fn from_pool(pool: PgPool) -> Self {
2121+ Self::new(pool)
2222+ }
2323+}
+77
api/src/database/cursor.rs
···11+//! Cursor-based pagination utilities.
22+//!
33+//! Cursors encode the position in a result set as base64(sort_value::indexed_at::cid)
44+//! to enable stable pagination even when new records are inserted.
55+66+use super::types::SortField;
77+use crate::models::Record;
88+use base64::{Engine as _, engine::general_purpose};
99+1010+/// Generates a base64-encoded cursor from sort value, timestamp, and CID.
1111+///
1212+/// The cursor format is: `base64(sort_value::indexed_at::cid)`
1313+///
1414+/// # Arguments
1515+/// * `sort_value` - The value of the primary sort field
1616+/// * `indexed_at` - The timestamp when the record was indexed
1717+/// * `cid` - The content identifier (CID) of the record
1818+///
1919+/// # Returns
2020+/// Base64-encoded cursor string
2121+pub fn generate_cursor(
2222+ sort_value: &str,
2323+ indexed_at: chrono::DateTime<chrono::Utc>,
2424+ cid: &str,
2525+) -> String {
2626+ let cursor_content = format!("{}::{}::{}", sort_value, indexed_at.to_rfc3339(), cid);
2727+ general_purpose::URL_SAFE_NO_PAD.encode(cursor_content)
2828+}
2929+3030+/// Extracts the primary sort field name from a sort array.
3131+///
3232+/// Returns "indexed_at" if no sort fields are provided.
3333+///
3434+/// # Arguments
3535+/// * `sort_by` - Optional array of sort fields
3636+///
3737+/// # Returns
3838+/// The name of the primary sort field
3939+pub fn get_primary_sort_field(sort_by: Option<&Vec<SortField>>) -> String {
4040+ match sort_by {
4141+ Some(sort_fields) if !sort_fields.is_empty() => sort_fields[0].field.clone(),
4242+ _ => "indexed_at".to_string(),
4343+ }
4444+}
4545+4646+/// Generates a cursor from a record based on the sort configuration.
4747+///
4848+/// Extracts the sort value from the record (either from a table column
4949+/// or from the JSON field), then creates a cursor encoding that value
5050+/// along with indexed_at and cid.
5151+///
5252+/// # Arguments
5353+/// * `record` - The record to generate a cursor for
5454+/// * `sort_by` - Optional array defining sort fields
5555+///
5656+/// # Returns
5757+/// Base64-encoded cursor string
5858+pub fn generate_cursor_from_record(record: &Record, sort_by: Option<&Vec<SortField>>) -> String {
5959+ let primary_sort_field = get_primary_sort_field(sort_by);
6060+6161+ let sort_value = match primary_sort_field.as_str() {
6262+ "indexed_at" => record.indexed_at.to_rfc3339(),
6363+ field => record
6464+ .json
6565+ .get(field)
6666+ .and_then(|v| match v {
6767+ serde_json::Value::String(s) if !s.is_empty() => Some(s.clone()),
6868+ serde_json::Value::Number(n) => Some(n.to_string()),
6969+ serde_json::Value::Bool(b) => Some(b.to_string()),
7070+ serde_json::Value::Null => None,
7171+ _ => None,
7272+ })
7373+ .unwrap_or_else(|| "NULL".to_string()),
7474+ };
7575+7676+ generate_cursor(&sort_value, record.indexed_at, &record.cid)
7777+}
+12
api/src/database/mod.rs
···11+mod actors;
22+mod analytics;
33+mod client;
44+mod cursor;
55+mod oauth;
66+mod query_builder;
77+mod records;
88+mod slices;
99+pub mod types;
1010+1111+pub use client::Database;
1212+pub use types::{SortField, WhereClause, WhereCondition};
+115
api/src/database/oauth.rs
···11+//! OAuth client management operations.
22+//!
33+//! This module handles database operations for OAuth client registrations
44+//! associated with slices, including creation, retrieval, and deletion.
55+66+use super::client::Database;
77+use crate::errors::DatabaseError;
88+use crate::models::OAuthClient;
99+1010+impl Database {
1111+ /// Creates a new OAuth client registration for a slice.
1212+ ///
1313+ /// # Arguments
1414+ /// * `slice_uri` - The slice this client is registered for
1515+ /// * `client_id` - The OAuth client ID from the authorization server
1616+ /// * `registration_access_token` - Optional token for client management
1717+ /// * `created_by_did` - The DID of the user who created this client
1818+ ///
1919+ /// # Returns
2020+ /// The created OAuthClient with generated ID and timestamp
2121+ pub async fn create_oauth_client(
2222+ &self,
2323+ slice_uri: &str,
2424+ client_id: &str,
2525+ registration_access_token: Option<&str>,
2626+ created_by_did: &str,
2727+ ) -> Result<OAuthClient, DatabaseError> {
2828+ let client = sqlx::query_as!(
2929+ OAuthClient,
3030+ r#"
3131+ INSERT INTO oauth_clients (slice_uri, client_id, registration_access_token, created_by_did)
3232+ VALUES ($1, $2, $3, $4)
3333+ RETURNING id, slice_uri, client_id, registration_access_token, created_at as "created_at!", created_by_did
3434+ "#,
3535+ slice_uri,
3636+ client_id,
3737+ registration_access_token,
3838+ created_by_did
3939+ )
4040+ .fetch_one(&self.pool)
4141+ .await?;
4242+4343+ Ok(client)
4444+ }
4545+4646+ /// Gets all OAuth clients registered for a specific slice.
4747+ ///
4848+ /// Results are ordered by creation time, most recent first.
4949+ pub async fn get_oauth_clients_for_slice(
5050+ &self,
5151+ slice_uri: &str,
5252+ ) -> Result<Vec<OAuthClient>, DatabaseError> {
5353+ let clients = sqlx::query_as!(
5454+ OAuthClient,
5555+ r#"
5656+ SELECT id, slice_uri, client_id, registration_access_token, created_at as "created_at!", created_by_did
5757+ FROM oauth_clients
5858+ WHERE slice_uri = $1
5959+ ORDER BY created_at DESC
6060+ "#,
6161+ slice_uri
6262+ )
6363+ .fetch_all(&self.pool)
6464+ .await?;
6565+6666+ Ok(clients)
6767+ }
6868+6969+ /// Gets a single OAuth client by its client_id.
7070+ ///
7171+ /// # Returns
7272+ /// Some(OAuthClient) if found, None otherwise
7373+ pub async fn get_oauth_client_by_id(
7474+ &self,
7575+ client_id: &str,
7676+ ) -> Result<Option<OAuthClient>, DatabaseError> {
7777+ let client = sqlx::query_as!(
7878+ OAuthClient,
7979+ r#"
8080+ SELECT id, slice_uri, client_id, registration_access_token, created_at as "created_at!", created_by_did
8181+ FROM oauth_clients
8282+ WHERE client_id = $1
8383+ "#,
8484+ client_id
8585+ )
8686+ .fetch_optional(&self.pool)
8787+ .await?;
8888+8989+ Ok(client)
9090+ }
9191+9292+ /// Deletes an OAuth client by its client_id.
9393+ ///
9494+ /// # Returns
9595+ /// Error if no client with the given client_id exists
9696+ pub async fn delete_oauth_client(&self, client_id: &str) -> Result<(), DatabaseError> {
9797+ let result = sqlx::query!(
9898+ r#"
9999+ DELETE FROM oauth_clients
100100+ WHERE client_id = $1
101101+ "#,
102102+ client_id
103103+ )
104104+ .execute(&self.pool)
105105+ .await?;
106106+107107+ if result.rows_affected() == 0 {
108108+ return Err(DatabaseError::RecordNotFound {
109109+ uri: client_id.to_string(),
110110+ });
111111+ }
112112+113113+ Ok(())
114114+ }
115115+}
+253
api/src/database/query_builder.rs
···11+//! SQL query building utilities for dynamic WHERE and ORDER BY clauses.
22+//!
33+//! This module provides helpers for constructing SQL queries dynamically
44+//! based on user input while preventing SQL injection attacks.
55+66+use super::types::{SortField, WhereClause, WhereCondition};
77+use crate::models::Record;
88+99+/// Builds an ORDER BY clause from an optional array of sort fields.
1010+///
1111+/// Handles both table columns (indexed_at, uri, cid, did, collection)
1212+/// and JSON fields with nested paths. Always adds indexed_at as a
1313+/// tie-breaker if not already present.
1414+///
1515+/// # Arguments
1616+/// * `sort_by` - Optional array of fields to sort by
1717+///
1818+/// # Returns
1919+/// SQL ORDER BY clause string (without "ORDER BY" prefix)
2020+pub fn build_order_by_clause(sort_by: Option<&Vec<SortField>>) -> String {
2121+ match sort_by {
2222+ Some(sort_fields) if !sort_fields.is_empty() => {
2323+ let mut order_clauses = Vec::new();
2424+ for sort_field in sort_fields {
2525+ let field = &sort_field.field;
2626+ let direction = match sort_field.direction.to_lowercase().as_str() {
2727+ "desc" => "DESC",
2828+ _ => "ASC",
2929+ };
3030+3131+ if field
3232+ .chars()
3333+ .all(|c| c.is_alphanumeric() || c == '_' || c == '.')
3434+ {
3535+ if field == "indexed_at"
3636+ || field == "uri"
3737+ || field == "cid"
3838+ || field == "did"
3939+ || field == "collection"
4040+ {
4141+ order_clauses.push(format!("{field} {direction}"));
4242+ } else {
4343+ if field.contains('.') {
4444+ let parts: Vec<&str> = field.split('.').collect();
4545+ let mut path = String::from("json");
4646+ for (i, part) in parts.iter().enumerate() {
4747+ if i == parts.len() - 1 {
4848+ path.push_str(&format!("->>'{}'", part));
4949+ } else {
5050+ path.push_str(&format!("->'{}'", part));
5151+ }
5252+ }
5353+ order_clauses.push(format!("{path} {direction} NULLS LAST"));
5454+ } else {
5555+ order_clauses.push(format!("json->>'{field}' {direction} NULLS LAST"));
5656+ }
5757+ }
5858+ }
5959+ }
6060+ if !order_clauses.is_empty() {
6161+ let has_indexed_at = order_clauses
6262+ .iter()
6363+ .any(|clause| clause.contains("indexed_at"));
6464+ if !has_indexed_at {
6565+ order_clauses.push("indexed_at DESC".to_string());
6666+ }
6767+ order_clauses.join(", ")
6868+ } else {
6969+ "indexed_at DESC".to_string()
7070+ }
7171+ }
7272+ _ => "indexed_at DESC".to_string(),
7373+ }
7474+}
7575+7676+/// Builds WHERE conditions from a WhereClause structure.
7777+///
7878+/// Returns separate arrays for AND conditions and OR conditions
7979+/// to be combined in the final query.
8080+///
8181+/// # Arguments
8282+/// * `where_clause` - Optional where clause with AND/OR conditions
8383+/// * `param_count` - Mutable counter for parameter numbering ($1, $2, etc)
8484+///
8585+/// # Returns
8686+/// Tuple of (and_conditions, or_conditions) as SQL strings
8787+pub fn build_where_conditions(
8888+ where_clause: Option<&WhereClause>,
8989+ param_count: &mut usize,
9090+) -> (Vec<String>, Vec<String>) {
9191+ let mut where_clauses = Vec::new();
9292+ let mut or_clauses = Vec::new();
9393+9494+ if let Some(clause) = where_clause {
9595+ for (field, condition) in &clause.conditions {
9696+ let field_clause = build_single_condition(field, condition, param_count);
9797+ where_clauses.push(field_clause);
9898+ }
9999+100100+ if let Some(or_conditions) = &clause.or_conditions {
101101+ for (field, condition) in or_conditions {
102102+ let field_clause = build_single_condition(field, condition, param_count);
103103+ or_clauses.push(field_clause);
104104+ }
105105+ }
106106+ }
107107+108108+ (where_clauses, or_clauses)
109109+}
110110+111111+/// Builds a single SQL condition clause for a field.
112112+///
113113+/// Supports equality (eq), array membership (in_values), and pattern matching (contains)
114114+/// for both table columns and JSON fields with nested paths.
115115+///
116116+/// # Arguments
117117+/// * `field` - Field name (table column or JSON path)
118118+/// * `condition` - The condition to apply (eq, in_values, or contains)
119119+/// * `param_count` - Mutable counter for parameter numbering
120120+///
121121+/// # Returns
122122+/// SQL condition string with parameter placeholder
123123+pub fn build_single_condition(
124124+ field: &str,
125125+ condition: &WhereCondition,
126126+ param_count: &mut usize,
127127+) -> String {
128128+ if let Some(_eq_value) = &condition.eq {
129129+ let clause = match field {
130130+ "did" | "collection" | "uri" | "cid" => {
131131+ format!("{} = ${}", field, param_count)
132132+ }
133133+ _ => {
134134+ let json_path = build_json_path(field);
135135+ format!("{} = ${}", json_path, param_count)
136136+ }
137137+ };
138138+ *param_count += 1;
139139+ clause
140140+ } else if let Some(_in_values) = &condition.in_values {
141141+ let clause = match field {
142142+ "did" | "collection" | "uri" | "cid" => {
143143+ format!("{} = ANY(${})", field, param_count)
144144+ }
145145+ _ => {
146146+ let json_path = build_json_path(field);
147147+ format!("{} = ANY(${})", json_path, param_count)
148148+ }
149149+ };
150150+ *param_count += 1;
151151+ clause
152152+ } else if let Some(_contains_value) = &condition.contains {
153153+ let clause = if field == "json" {
154154+ format!("json::text ILIKE '%' || ${} || '%'", param_count)
155155+ } else {
156156+ let json_path = build_json_path(field);
157157+ format!("{} ILIKE '%' || ${} || '%'", json_path, param_count)
158158+ };
159159+ *param_count += 1;
160160+ clause
161161+ } else {
162162+ String::new()
163163+ }
164164+}
165165+166166+/// Builds a PostgreSQL JSON path accessor string.
167167+///
168168+/// Converts dot notation (e.g., "user.name") into PostgreSQL JSON operators
169169+/// (e.g., "json->'user'->>'name'").
170170+fn build_json_path(field: &str) -> String {
171171+ if field.contains('.') {
172172+ let parts: Vec<&str> = field.split('.').collect();
173173+ let mut path = String::from("json");
174174+ for (i, part) in parts.iter().enumerate() {
175175+ if i == parts.len() - 1 {
176176+ path.push_str(&format!("->>'{}'", part));
177177+ } else {
178178+ path.push_str(&format!("->'{}'", part));
179179+ }
180180+ }
181181+ path
182182+ } else {
183183+ format!("json->>'{}'", field)
184184+ }
185185+}
186186+187187+/// Binds WHERE clause parameters to a sqlx query.
188188+///
189189+/// Iterates through all conditions and binds their values in the correct order.
190190+///
191191+/// # Arguments
192192+/// * `query_builder` - The sqlx query to bind parameters to
193193+/// * `where_clause` - Optional where clause with parameter values
194194+///
195195+/// # Returns
196196+/// Query builder with all parameters bound
197197+pub fn bind_where_parameters<'q>(
198198+ mut query_builder: sqlx::query::QueryAs<
199199+ 'q,
200200+ sqlx::Postgres,
201201+ Record,
202202+ sqlx::postgres::PgArguments,
203203+ >,
204204+ where_clause: Option<&'q WhereClause>,
205205+) -> sqlx::query::QueryAs<'q, sqlx::Postgres, Record, sqlx::postgres::PgArguments> {
206206+ if let Some(clause) = where_clause {
207207+ for condition in clause.conditions.values() {
208208+ query_builder = bind_single_condition(query_builder, condition);
209209+ }
210210+211211+ if let Some(or_conditions) = &clause.or_conditions {
212212+ for condition in or_conditions.values() {
213213+ query_builder = bind_single_condition(query_builder, condition);
214214+ }
215215+ }
216216+ }
217217+ query_builder
218218+}
219219+220220+/// Binds parameters for a single condition to a sqlx query.
221221+///
222222+/// Handles eq (single value), in_values (array), and contains (pattern) conditions.
223223+fn bind_single_condition<'q>(
224224+ mut query_builder: sqlx::query::QueryAs<
225225+ 'q,
226226+ sqlx::Postgres,
227227+ Record,
228228+ sqlx::postgres::PgArguments,
229229+ >,
230230+ condition: &'q WhereCondition,
231231+) -> sqlx::query::QueryAs<'q, sqlx::Postgres, Record, sqlx::postgres::PgArguments> {
232232+ if let Some(eq_value) = &condition.eq {
233233+ if let Some(str_val) = eq_value.as_str() {
234234+ query_builder = query_builder.bind(str_val);
235235+ } else {
236236+ query_builder = query_builder.bind(eq_value);
237237+ }
238238+ }
239239+240240+ if let Some(in_values) = &condition.in_values {
241241+ let str_values: Vec<String> = in_values
242242+ .iter()
243243+ .filter_map(|v| v.as_str().map(|s| s.to_string()))
244244+ .collect();
245245+ query_builder = query_builder.bind(str_values);
246246+ }
247247+248248+ if let Some(contains_value) = &condition.contains {
249249+ query_builder = query_builder.bind(contains_value);
250250+ }
251251+252252+ query_builder
253253+}
+468
api/src/database/records.rs
···11+//! Record CRUD operations and queries.
22+//!
33+//! This module handles all database operations related to ATProto records,
44+//! including insertion, updates, deletion, and complex queries with filtering,
55+//! sorting, and pagination.
66+77+use super::client::Database;
88+use super::cursor::generate_cursor_from_record;
99+use super::query_builder::{bind_where_parameters, build_order_by_clause, build_where_conditions};
1010+use super::types::{SortField, WhereClause};
1111+use crate::errors::DatabaseError;
1212+use crate::models::{IndexedRecord, Record};
1313+1414+impl Database {
1515+ /// Inserts a single record into the database.
1616+ ///
1717+ /// Uses ON CONFLICT to update existing records with matching URI and slice_uri.
1818+ #[allow(dead_code)]
1919+ pub async fn insert_record(&self, record: &Record) -> Result<(), DatabaseError> {
2020+ sqlx::query!(
2121+ r#"INSERT INTO "record" ("uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri")
2222+ VALUES ($1, $2, $3, $4, $5, $6, $7)
2323+ ON CONFLICT ON CONSTRAINT record_pkey
2424+ DO UPDATE SET
2525+ "cid" = EXCLUDED."cid",
2626+ "json" = EXCLUDED."json",
2727+ "indexed_at" = EXCLUDED."indexed_at""#,
2828+ record.uri,
2929+ record.cid,
3030+ record.did,
3131+ record.collection,
3232+ record.json,
3333+ record.indexed_at,
3434+ record.slice_uri
3535+ )
3636+ .execute(&self.pool)
3737+ .await?;
3838+3939+ Ok(())
4040+ }
4141+4242+ /// Inserts multiple records in optimized batches.
4343+ ///
4444+ /// Automatically chunks records to stay within PostgreSQL parameter limits
4545+ /// (65536 parameters, ~8000 records per batch with 7 fields each).
4646+ pub async fn batch_insert_records(&self, records: &[Record]) -> Result<(), DatabaseError> {
4747+ if records.is_empty() {
4848+ return Ok(());
4949+ }
5050+5151+ const BATCH_SIZE: usize = 8000;
5252+5353+ for chunk in records.chunks(BATCH_SIZE) {
5454+ self.batch_insert_records_chunk(chunk).await?;
5555+ }
5656+5757+ Ok(())
5858+ }
5959+6060+ /// Internal helper to insert a single chunk of records.
6161+ async fn batch_insert_records_chunk(&self, records: &[Record]) -> Result<(), DatabaseError> {
6262+ let mut tx = self.pool.begin().await?;
6363+6464+ let mut query = String::from(
6565+ r#"INSERT INTO "record" ("uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri") VALUES "#,
6666+ );
6767+6868+ for (i, _) in records.iter().enumerate() {
6969+ if i > 0 {
7070+ query.push_str(", ");
7171+ }
7272+ let base = i * 7 + 1;
7373+ query.push_str(&format!(
7474+ "(${}, ${}, ${}, ${}, ${}, ${}, ${})",
7575+ base,
7676+ base + 1,
7777+ base + 2,
7878+ base + 3,
7979+ base + 4,
8080+ base + 5,
8181+ base + 6
8282+ ));
8383+ }
8484+8585+ query.push_str(
8686+ r#"
8787+ ON CONFLICT ON CONSTRAINT record_pkey
8888+ DO UPDATE SET
8989+ "cid" = EXCLUDED."cid",
9090+ "json" = EXCLUDED."json",
9191+ "indexed_at" = EXCLUDED."indexed_at"
9292+ "#,
9393+ );
9494+9595+ let mut sqlx_query = sqlx::query(&query);
9696+ for record in records {
9797+ sqlx_query = sqlx_query
9898+ .bind(&record.uri)
9999+ .bind(&record.cid)
100100+ .bind(&record.did)
101101+ .bind(&record.collection)
102102+ .bind(&record.json)
103103+ .bind(record.indexed_at)
104104+ .bind(&record.slice_uri);
105105+ }
106106+107107+ sqlx_query.execute(&mut *tx).await?;
108108+ tx.commit().await?;
109109+110110+ Ok(())
111111+ }
112112+113113+ /// Gets a map of existing record CIDs for a specific actor, collection, and slice.
114114+ ///
115115+ /// Used during sync to determine which records need updating vs inserting.
116116+ ///
117117+ /// # Returns
118118+ /// HashMap mapping URI -> CID
119119+ pub async fn get_existing_record_cids_for_slice(
120120+ &self,
121121+ did: &str,
122122+ collection: &str,
123123+ slice_uri: &str,
124124+ ) -> Result<std::collections::HashMap<String, String>, DatabaseError> {
125125+ let records = sqlx::query!(
126126+ r#"SELECT "uri", "cid"
127127+ FROM "record"
128128+ WHERE "did" = $1 AND "collection" = $2 AND "slice_uri" = $3"#,
129129+ did,
130130+ collection,
131131+ slice_uri
132132+ )
133133+ .fetch_all(&self.pool)
134134+ .await?;
135135+136136+ let mut cid_map = std::collections::HashMap::new();
137137+ for record in records {
138138+ cid_map.insert(record.uri, record.cid);
139139+ }
140140+ Ok(cid_map)
141141+ }
142142+143143+ /// Retrieves a single record by URI.
144144+ ///
145145+ /// # Returns
146146+ /// Some(IndexedRecord) if found, None otherwise
147147+ pub async fn get_record(&self, uri: &str) -> Result<Option<IndexedRecord>, DatabaseError> {
148148+ let record = sqlx::query_as::<_, Record>(
149149+ r#"SELECT "uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri"
150150+ FROM "record"
151151+ WHERE "uri" = $1"#,
152152+ )
153153+ .bind(uri)
154154+ .fetch_optional(&self.pool)
155155+ .await?;
156156+157157+ let indexed_record = record.map(|record| IndexedRecord {
158158+ uri: record.uri,
159159+ cid: record.cid,
160160+ did: record.did,
161161+ collection: record.collection,
162162+ value: record.json,
163163+ indexed_at: record.indexed_at.to_rfc3339(),
164164+ });
165165+166166+ Ok(indexed_record)
167167+ }
168168+169169+ /// Updates an existing record.
170170+ ///
171171+ /// Returns error if no record with matching URI and slice_uri exists.
172172+ pub async fn update_record(&self, record: &Record) -> Result<(), DatabaseError> {
173173+ let result = sqlx::query!(
174174+ r#"UPDATE "record"
175175+ SET "cid" = $1, "json" = $2, "indexed_at" = $3
176176+ WHERE "uri" = $4 AND "slice_uri" = $5"#,
177177+ record.cid,
178178+ record.json,
179179+ record.indexed_at,
180180+ record.uri,
181181+ record.slice_uri
182182+ )
183183+ .execute(&self.pool)
184184+ .await?;
185185+186186+ if result.rows_affected() == 0 {
187187+ return Err(DatabaseError::RecordNotFound {
188188+ uri: record.uri.clone(),
189189+ });
190190+ }
191191+192192+ Ok(())
193193+ }
194194+195195+ /// Queries records for a slice with advanced filtering, sorting, and pagination.
196196+ ///
197197+ /// Supports:
198198+ /// - Cursor-based pagination
199199+ /// - Multi-field sorting (with JSON path support)
200200+ /// - Complex WHERE conditions (AND/OR, eq/in/contains operators)
201201+ /// - Automatic handling of lexicon records vs regular records
202202+ ///
203203+ /// # Returns
204204+ /// Tuple of (records, next_cursor)
205205+ pub async fn get_slice_collections_records(
206206+ &self,
207207+ slice_uri: &str,
208208+ limit: Option<i32>,
209209+ cursor: Option<&str>,
210210+ sort_by: Option<&Vec<SortField>>,
211211+ where_clause: Option<&WhereClause>,
212212+ ) -> Result<(Vec<Record>, Option<String>), DatabaseError> {
213213+ let limit = limit.unwrap_or(50).min(100);
214214+ let order_by = build_order_by_clause(sort_by);
215215+216216+ let mut where_clauses = Vec::new();
217217+ let mut param_count = 1;
218218+219219+ let is_lexicon = where_clause
220220+ .as_ref()
221221+ .and_then(|wc| wc.conditions.get("collection"))
222222+ .and_then(|c| c.eq.as_ref())
223223+ .and_then(|v| v.as_str())
224224+ == Some("network.slices.lexicon");
225225+226226+ if is_lexicon {
227227+ where_clauses.push(format!("json->>'slice' = ${}", param_count));
228228+ } else {
229229+ where_clauses.push(format!("slice_uri = ${}", param_count));
230230+ }
231231+ param_count += 1;
232232+233233+ if cursor.is_some() {
234234+ where_clauses.push(format!("indexed_at < ${}", param_count));
235235+ param_count += 1;
236236+ }
237237+238238+ let (and_conditions, or_conditions) =
239239+ build_where_conditions(where_clause, &mut param_count);
240240+ where_clauses.extend(and_conditions);
241241+242242+ if !or_conditions.is_empty() {
243243+ let or_clause = format!("({})", or_conditions.join(" OR "));
244244+ where_clauses.push(or_clause);
245245+ }
246246+247247+ let where_sql = where_clauses.join(" AND ");
248248+ let query = format!(
249249+ "SELECT uri, cid, did, collection, json, indexed_at, slice_uri
250250+ FROM record
251251+ WHERE {}
252252+ ORDER BY {}
253253+ LIMIT ${}",
254254+ where_sql, order_by, param_count
255255+ );
256256+257257+ let mut query_builder = sqlx::query_as::<_, Record>(&query);
258258+259259+ query_builder = query_builder.bind(slice_uri);
260260+261261+ if let Some(cursor_time) = cursor {
262262+ let cursor_dt = cursor_time
263263+ .parse::<chrono::DateTime<chrono::Utc>>()
264264+ .unwrap_or_else(|_| chrono::Utc::now());
265265+ query_builder = query_builder.bind(cursor_dt);
266266+ }
267267+268268+ query_builder = bind_where_parameters(query_builder, where_clause);
269269+ query_builder = query_builder.bind(limit as i64);
270270+271271+ let records = query_builder.fetch_all(&self.pool).await?;
272272+273273+ let cursor = if records.is_empty() {
274274+ None
275275+ } else {
276276+ records
277277+ .last()
278278+ .map(|record| generate_cursor_from_record(record, sort_by))
279279+ };
280280+281281+ Ok((records, cursor))
282282+ }
283283+284284+ /// Counts records matching the given criteria.
285285+ ///
286286+ /// Used for pagination metadata and statistics.
287287+ pub async fn count_slice_collections_records(
288288+ &self,
289289+ slice_uri: &str,
290290+ where_clause: Option<&WhereClause>,
291291+ ) -> Result<i64, DatabaseError> {
292292+ let mut where_clauses = Vec::new();
293293+ let mut param_count = 1;
294294+295295+ let is_lexicon = where_clause
296296+ .as_ref()
297297+ .and_then(|wc| wc.conditions.get("collection"))
298298+ .and_then(|c| c.eq.as_ref())
299299+ .and_then(|v| v.as_str())
300300+ == Some("network.slices.lexicon");
301301+302302+ if is_lexicon {
303303+ where_clauses.push(format!("json->>'slice' = ${}", param_count));
304304+ } else {
305305+ where_clauses.push(format!("slice_uri = ${}", param_count));
306306+ }
307307+ param_count += 1;
308308+309309+ let (and_conditions, or_conditions) =
310310+ build_where_conditions(where_clause, &mut param_count);
311311+ where_clauses.extend(and_conditions);
312312+313313+ if !or_conditions.is_empty() {
314314+ let or_clause = format!("({})", or_conditions.join(" OR "));
315315+ where_clauses.push(or_clause);
316316+ }
317317+318318+ let where_sql = if where_clauses.is_empty() {
319319+ String::new()
320320+ } else {
321321+ format!(" WHERE {}", where_clauses.join(" AND "))
322322+ };
323323+324324+ let query = format!("SELECT COUNT(*) as count FROM record{}", where_sql);
325325+326326+ let mut query_builder = sqlx::query_scalar::<_, i64>(&query);
327327+ query_builder = query_builder.bind(slice_uri);
328328+329329+ if let Some(clause) = where_clause {
330330+ for condition in clause.conditions.values() {
331331+ if let Some(eq_value) = &condition.eq {
332332+ if let Some(str_val) = eq_value.as_str() {
333333+ query_builder = query_builder.bind(str_val);
334334+ } else {
335335+ query_builder = query_builder.bind(eq_value);
336336+ }
337337+ }
338338+ if let Some(in_values) = &condition.in_values {
339339+ let str_values: Vec<String> = in_values
340340+ .iter()
341341+ .filter_map(|v| v.as_str().map(|s| s.to_string()))
342342+ .collect();
343343+ query_builder = query_builder.bind(str_values);
344344+ }
345345+ if let Some(contains_value) = &condition.contains {
346346+ query_builder = query_builder.bind(contains_value);
347347+ }
348348+ }
349349+350350+ if let Some(or_conditions) = &clause.or_conditions {
351351+ for condition in or_conditions.values() {
352352+ if let Some(eq_value) = &condition.eq {
353353+ if let Some(str_val) = eq_value.as_str() {
354354+ query_builder = query_builder.bind(str_val);
355355+ } else {
356356+ query_builder = query_builder.bind(eq_value);
357357+ }
358358+ }
359359+ if let Some(in_values) = &condition.in_values {
360360+ let str_values: Vec<String> = in_values
361361+ .iter()
362362+ .filter_map(|v| v.as_str().map(|s| s.to_string()))
363363+ .collect();
364364+ query_builder = query_builder.bind(str_values);
365365+ }
366366+ if let Some(contains_value) = &condition.contains {
367367+ query_builder = query_builder.bind(contains_value);
368368+ }
369369+ }
370370+ }
371371+ }
372372+373373+ let count = query_builder.fetch_one(&self.pool).await?;
374374+ Ok(count)
375375+ }
376376+377377+ /// Deletes a record by URI.
378378+ ///
379379+ /// If slice_uri is provided, only deletes from that slice.
380380+ /// Otherwise deletes from all slices.
381381+ ///
382382+ /// # Returns
383383+ /// Number of rows affected
384384+ pub async fn delete_record_by_uri(
385385+ &self,
386386+ uri: &str,
387387+ slice_uri: Option<&str>,
388388+ ) -> Result<u64, DatabaseError> {
389389+ let result = if let Some(slice_uri) = slice_uri {
390390+ sqlx::query("DELETE FROM record WHERE uri = $1 AND slice_uri = $2")
391391+ .bind(uri)
392392+ .bind(slice_uri)
393393+ .execute(&self.pool)
394394+ .await?
395395+ } else {
396396+ sqlx::query("DELETE FROM record WHERE uri = $1")
397397+ .bind(uri)
398398+ .execute(&self.pool)
399399+ .await?
400400+ };
401401+ Ok(result.rows_affected())
402402+ }
403403+404404+ /// Inserts or updates a record atomically.
405405+ ///
406406+ /// # Returns
407407+ /// true if inserted (new record), false if updated (existing record)
408408+ pub async fn upsert_record(&self, record: &Record) -> Result<bool, DatabaseError> {
409409+ let result = sqlx::query_scalar::<_, bool>(
410410+ r#"
411411+ INSERT INTO record (uri, cid, did, collection, json, indexed_at, slice_uri)
412412+ VALUES ($1, $2, $3, $4, $5, $6, $7)
413413+ ON CONFLICT ON CONSTRAINT record_pkey DO UPDATE
414414+ SET cid = EXCLUDED.cid,
415415+ json = EXCLUDED.json,
416416+ indexed_at = EXCLUDED.indexed_at
417417+ RETURNING (xmax = 0)
418418+ "#,
419419+ )
420420+ .bind(&record.uri)
421421+ .bind(&record.cid)
422422+ .bind(&record.did)
423423+ .bind(&record.collection)
424424+ .bind(&record.json)
425425+ .bind(record.indexed_at)
426426+ .bind(&record.slice_uri)
427427+ .fetch_one(&self.pool)
428428+ .await?;
429429+ Ok(result)
430430+ }
431431+432432+ /// Gets lexicon definitions for a specific slice.
433433+ ///
434434+ /// Filters for network.slices.lexicon records and transforms them
435435+ /// into the lexicon JSON format expected by the lexicon parser.
436436+ pub async fn get_lexicons_by_slice(
437437+ &self,
438438+ slice_uri: &str,
439439+ ) -> Result<Vec<serde_json::Value>, DatabaseError> {
440440+ let records = sqlx::query_as::<_, Record>(
441441+ r#"SELECT "uri", "cid", "did", "collection", "json", "indexed_at", "slice_uri"
442442+ FROM "record"
443443+ WHERE "collection" = 'network.slices.lexicon'
444444+ AND "json"->>'slice' = $1
445445+ ORDER BY "indexed_at" DESC"#,
446446+ )
447447+ .bind(slice_uri)
448448+ .fetch_all(&self.pool)
449449+ .await?;
450450+451451+ let lexicon_definitions: Vec<serde_json::Value> = records
452452+ .into_iter()
453453+ .filter_map(|record| {
454454+ let nsid = record.json.get("nsid")?.as_str()?;
455455+ let definitions_str = record.json.get("definitions")?.as_str()?;
456456+ let definitions: serde_json::Value = serde_json::from_str(definitions_str).ok()?;
457457+458458+ Some(serde_json::json!({
459459+ "lexicon": 1,
460460+ "id": nsid,
461461+ "defs": definitions
462462+ }))
463463+ })
464464+ .collect();
465465+466466+ Ok(lexicon_definitions)
467467+ }
468468+}
+189
api/src/database/slices.rs
···11+//! Slice-related queries and statistics.
22+//!
33+//! This module handles database operations for slice metadata, including
44+//! collection statistics, actor counts, lexicon counts, and slice discovery.
55+66+use super::client::Database;
77+use crate::errors::DatabaseError;
88+use crate::models::CollectionStats;
99+1010+impl Database {
1111+ /// Gets collection statistics for a slice.
1212+ ///
1313+ /// Returns record counts and unique actor counts per collection
1414+ /// (excluding lexicons marked as excludedFromSync).
1515+ pub async fn get_slice_collection_stats(
1616+ &self,
1717+ slice_uri: &str,
1818+ ) -> Result<Vec<CollectionStats>, DatabaseError> {
1919+ let stats = sqlx::query!(
2020+ r#"
2121+ WITH slice_collections AS (
2222+ SELECT DISTINCT
2323+ json->>'nsid' as collection_nsid
2424+ FROM record
2525+ WHERE collection = 'network.slices.lexicon'
2626+ AND json->>'slice' = $1
2727+ AND json->>'nsid' IS NOT NULL
2828+ AND (json->>'definitions')::jsonb->'main'->>'type' = 'record'
2929+ AND (json->>'excludedFromSync' IS NULL OR json->>'excludedFromSync' != 'true')
3030+ )
3131+ SELECT
3232+ r.collection,
3333+ COUNT(*) as record_count,
3434+ COUNT(DISTINCT r.did) as unique_actors
3535+ FROM record r
3636+ INNER JOIN slice_collections sc ON r.collection = sc.collection_nsid
3737+ WHERE r.slice_uri = $1
3838+ GROUP BY r.collection
3939+ ORDER BY r.collection
4040+ "#,
4141+ slice_uri
4242+ )
4343+ .fetch_all(&self.pool)
4444+ .await?;
4545+4646+ Ok(stats
4747+ .into_iter()
4848+ .map(|row| CollectionStats {
4949+ collection: row.collection,
5050+ record_count: row.record_count.unwrap_or(0),
5151+ unique_actors: row.unique_actors.unwrap_or(0),
5252+ })
5353+ .collect())
5454+ }
5555+5656+ /// Gets the list of collection NSIDs defined for a slice.
5757+ ///
5858+ /// Only includes lexicons with type 'record' that are not excluded from sync.
5959+ pub async fn get_slice_collections_list(
6060+ &self,
6161+ slice_uri: &str,
6262+ ) -> Result<Vec<String>, DatabaseError> {
6363+ let rows = sqlx::query!(
6464+ r#"
6565+ SELECT DISTINCT json->>'nsid' as collection_nsid
6666+ FROM record
6767+ WHERE collection = 'network.slices.lexicon'
6868+ AND json->>'slice' = $1
6969+ AND json->>'nsid' IS NOT NULL
7070+ AND (json->>'definitions')::jsonb->'main'->>'type' = 'record'
7171+ AND (json->>'excludedFromSync' IS NULL OR json->>'excludedFromSync' != 'true')
7272+ ORDER BY json->>'nsid'
7373+ "#,
7474+ slice_uri
7575+ )
7676+ .fetch_all(&self.pool)
7777+ .await?;
7878+7979+ Ok(rows
8080+ .into_iter()
8181+ .filter_map(|row| row.collection_nsid)
8282+ .collect())
8383+ }
8484+8585+ /// Counts total records across all collections in a slice.
8686+ ///
8787+ /// Excludes lexicons marked as excludedFromSync.
8888+ pub async fn get_slice_total_records(&self, slice_uri: &str) -> Result<i64, DatabaseError> {
8989+ let count = sqlx::query!(
9090+ r#"
9191+ WITH slice_collections AS (
9292+ SELECT DISTINCT
9393+ json->>'nsid' as collection_nsid
9494+ FROM record
9595+ WHERE collection = 'network.slices.lexicon'
9696+ AND json->>'slice' = $1
9797+ AND json->>'nsid' IS NOT NULL
9898+ AND (json->>'definitions')::jsonb->'main'->>'type' = 'record'
9999+ AND (json->>'excludedFromSync' IS NULL OR json->>'excludedFromSync' != 'true')
100100+ )
101101+ SELECT COUNT(*) as count
102102+ FROM record r
103103+ INNER JOIN slice_collections sc ON r.collection = sc.collection_nsid
104104+ WHERE r.slice_uri = $1
105105+ "#,
106106+ slice_uri
107107+ )
108108+ .fetch_one(&self.pool)
109109+ .await?;
110110+111111+ Ok(count.count.unwrap_or(0))
112112+ }
113113+114114+ /// Counts total actors tracked in a slice.
115115+ pub async fn get_slice_total_actors(&self, slice_uri: &str) -> Result<i64, DatabaseError> {
116116+ let count = sqlx::query!(
117117+ r#"
118118+ SELECT COUNT(*) as count
119119+ FROM actor
120120+ WHERE slice_uri = $1
121121+ "#,
122122+ slice_uri
123123+ )
124124+ .fetch_one(&self.pool)
125125+ .await?;
126126+127127+ Ok(count.count.unwrap_or(0))
128128+ }
129129+130130+ /// Counts lexicon definitions for a slice.
131131+ ///
132132+ /// Only includes record-type lexicons.
133133+ pub async fn get_slice_lexicon_count(&self, slice_uri: &str) -> Result<i64, DatabaseError> {
134134+ let count = sqlx::query!(
135135+ r#"
136136+ SELECT COUNT(*) as count
137137+ FROM record
138138+ WHERE collection = 'network.slices.lexicon'
139139+ AND json->>'slice' = $1
140140+ AND (json->>'definitions')::jsonb->'main'->>'type' = 'record'
141141+ "#,
142142+ slice_uri
143143+ )
144144+ .fetch_one(&self.pool)
145145+ .await?;
146146+147147+ Ok(count.count.unwrap_or(0))
148148+ }
149149+150150+ /// Gets all slice URIs that have lexicons defined.
151151+ ///
152152+ /// Useful for discovering all active slices in the system.
153153+ pub async fn get_all_slices(&self) -> Result<Vec<String>, DatabaseError> {
154154+ let rows: Vec<(String,)> = sqlx::query_as(
155155+ r#"
156156+ SELECT DISTINCT json->>'slice' as slice_uri
157157+ FROM record
158158+ WHERE collection = 'network.slices.lexicon'
159159+ AND json->>'slice' IS NOT NULL
160160+ "#,
161161+ )
162162+ .fetch_all(&self.pool)
163163+ .await?;
164164+165165+ Ok(rows.into_iter().map(|(uri,)| uri).collect())
166166+ }
167167+168168+ /// Gets the domain associated with a slice.
169169+ ///
170170+ /// Looks up the network.slices.slice record and extracts the domain field.
171171+ ///
172172+ /// # Returns
173173+ /// Some(domain) if the slice exists and has a domain, None otherwise
174174+ pub async fn get_slice_domain(&self, slice_uri: &str) -> Result<Option<String>, DatabaseError> {
175175+ let row = sqlx::query!(
176176+ r#"
177177+ SELECT json->>'domain' as domain
178178+ FROM record
179179+ WHERE collection = 'network.slices.slice'
180180+ AND uri = $1
181181+ "#,
182182+ slice_uri
183183+ )
184184+ .fetch_optional(&self.pool)
185185+ .await?;
186186+187187+ Ok(row.and_then(|r| r.domain))
188188+ }
189189+}
+60
api/src/database/types.rs
···11+//! Database query parameter types.
22+//!
33+//! This module contains types used for building dynamic SQL queries,
44+//! including WHERE conditions, sorting, and filtering.
55+66+use serde::{Deserialize, Serialize};
77+use serde_json::Value;
88+use std::collections::HashMap;
99+1010+/// Represents a single condition in a WHERE clause.
1111+///
1212+/// Supports three types of operations:
1313+/// - `eq`: Exact match (field = value)
1414+/// - `in_values`: Array membership (field IN (...))
1515+/// - `contains`: Pattern matching (field ILIKE '%value%')
1616+#[derive(Debug, Serialize, Deserialize)]
1717+#[serde(rename_all = "camelCase")]
1818+pub struct WhereCondition {
1919+ pub eq: Option<Value>,
2020+ #[serde(rename = "in")]
2121+ pub in_values: Option<Vec<Value>>,
2222+ pub contains: Option<String>,
2323+}
2424+2525+/// Represents a complete WHERE clause with AND/OR conditions.
2626+///
2727+/// The main conditions map is combined with AND logic.
2828+/// The or_conditions map (if present) is combined with OR logic
2929+/// and the entire OR group is ANDed with the main conditions.
3030+///
3131+/// Example JSON:
3232+/// ```json
3333+/// {
3434+/// "collection": {"eq": "app.bsky.feed.post"},
3535+/// "author": {"eq": "did:plc:123"},
3636+/// "$or": {
3737+/// "lang": {"eq": "en"},
3838+/// "lang": {"eq": "es"}
3939+/// }
4040+/// }
4141+/// ```
4242+#[derive(Debug, Serialize, Deserialize)]
4343+#[serde(rename_all = "camelCase")]
4444+pub struct WhereClause {
4545+ #[serde(flatten)]
4646+ pub conditions: HashMap<String, WhereCondition>,
4747+4848+ #[serde(rename = "$or")]
4949+ pub or_conditions: Option<HashMap<String, WhereCondition>>,
5050+}
5151+5252+/// Represents a field to sort by with direction.
5353+///
5454+/// Used for multi-field sorting in queries.
5555+#[derive(Debug, Clone, Serialize, Deserialize)]
5656+#[serde(rename_all = "camelCase")]
5757+pub struct SortField {
5858+ pub field: String,
5959+ pub direction: String,
6060+}
···11-use serde::{Deserialize, Serialize};
22-use sqlxmq::{job, CurrentJob, JobRegistry};
33-use sqlx::PgPool;
44-use uuid::Uuid;
55-use crate::sync::SyncService;
66-use crate::models::BulkSyncParams;
77-use crate::logging::LogLevel;
11+//! Background job system for asynchronous collection synchronization.
22+//!
33+//! This module uses sqlxmq (a PostgreSQL-backed message queue) to handle
44+//! background sync jobs. Jobs are:
55+//! - Enqueued with deduplication checks (one active job per user+slice)
66+//! - Executed asynchronously in background workers
77+//! - Retried up to 5 times on failure
88+//! - Tracked with detailed logging and result persistence
99+//!
1010+//! The sync process fetches records from AT Protocol relays and validates them
1111+//! against Lexicon schemas before persisting to the database.
1212+813use crate::cache;
1414+use crate::logging::LogLevel;
1515+use crate::models::BulkSyncParams;
1616+use crate::sync::SyncService;
1717+use serde::{Deserialize, Serialize};
918use serde_json::json;
1010-use tracing::{info, error};
1919+use sqlx::PgPool;
2020+use sqlxmq::{CurrentJob, JobRegistry, job};
1121use std::sync::Arc;
1222use tokio::sync::Mutex;
2323+use tracing::{error, info};
2424+use uuid::Uuid;
13251414-/// Payload for sync jobs
2626+/// Job payload containing all parameters needed to execute a sync job.
1527#[derive(Debug, Clone, Serialize, Deserialize)]
1628pub struct SyncJobPayload {
2929+ /// Unique identifier for tracking this specific job execution
1730 pub job_id: Uuid,
3131+ /// Decentralized identifier of the user requesting the sync
1832 pub user_did: String,
3333+ /// AT-URI of the slice being synchronized
1934 pub slice_uri: String,
3535+ /// Synchronization parameters (collections, repos, validation settings)
2036 pub params: BulkSyncParams,
2137}
22382323-/// Result stored for completed sync jobs
3939+/// Result data persisted after job completion or failure.
4040+///
4141+/// This is stored in the `job_results` table for historical tracking and
4242+/// status queries. Field names are camelCased for JSON API responses.
2443#[derive(Debug, Clone, Serialize, Deserialize)]
2544#[serde(rename_all = "camelCase")]
2645pub struct SyncJobResult {
···3150 pub message: String,
3251}
33523434-/// Initialize the job registry with all job handlers
5353+/// Initializes the sqlxmq job registry with all job handlers.
5454+///
5555+/// This must be called once at application startup to register job handlers
5656+/// before workers can process jobs from the queue.
5757+///
5858+/// # Returns
5959+/// A configured JobRegistry containing all registered job handlers
3560pub fn registry() -> JobRegistry {
3661 JobRegistry::new(&[sync_job])
3762}
38633939-/// The sync job handler
6464+/// Background job handler for collection synchronization.
6565+///
6666+/// This is the main worker function that executes sync jobs from the queue.
6767+/// It performs the following steps:
6868+/// 1. Extracts job payload and validates parameters
6969+/// 2. Initializes sync service with logging and caching
7070+/// 3. Fetches and validates records from AT Protocol relays
7171+/// 4. Persists results to the database
7272+/// 5. Logs detailed progress and completion status
7373+///
7474+/// # Job Behavior
7575+/// - Channel: `sync_queue`
7676+/// - Retries: Up to 5 attempts on failure
7777+/// - Concurrency: Multiple jobs can run in parallel
7878+/// - Deduplication: Enforced at enqueue time (one active job per user+slice)
7979+///
8080+/// # Arguments
8181+/// * `current_job` - The sqlxmq job context containing payload and database access
8282+///
8383+/// # Returns
8484+/// * `Ok(())` - Job completed successfully and marked complete
8585+/// * `Err(...)` - Job failed and will be retried (up to max retry limit)
4086#[job(channel_name = "sync_queue")]
4141-async fn sync_job(mut current_job: CurrentJob) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
4242- let payload: SyncJobPayload = current_job.json()?.expect("Invalid job payload");
4343-4444- info!(
4545- "Starting sync job {} for user {} on slice {}",
4646- payload.job_id, payload.user_did, payload.slice_uri
4747- );
8787+async fn sync_job(
8888+ mut current_job: CurrentJob,
8989+) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
9090+ let payload: SyncJobPayload = current_job.json()?.expect("Invalid job payload");
9191+9292+ info!(
9393+ "Starting sync job {} for user {} on slice {}",
9494+ payload.job_id, payload.user_did, payload.slice_uri
9595+ );
9696+9797+ // Access database pool and global logger for this job execution
9898+ let pool = current_job.pool();
9999+ let logger = crate::logging::Logger::global();
100100+101101+ // Log job start
102102+ logger.log_sync_job(
103103+ payload.job_id,
104104+ &payload.user_did,
105105+ &payload.slice_uri,
106106+ LogLevel::Info,
107107+ &format!(
108108+ "Starting sync job for {} collections",
109109+ payload
110110+ .params
111111+ .collections
112112+ .as_ref()
113113+ .map(|c| c.len())
114114+ .unwrap_or(0)
115115+ + payload
116116+ .params
117117+ .external_collections
118118+ .as_ref()
119119+ .map(|c| c.len())
120120+ .unwrap_or(0)
121121+ ),
122122+ Some(json!({
123123+ "collections": payload.params.collections,
124124+ "external_collections": payload.params.external_collections,
125125+ "repos": payload.params.repos,
126126+ "skip_validation": payload.params.skip_validation
127127+ })),
128128+ );
129129+130130+ // Initialize sync service with database, relay endpoint, and caching
131131+ let database = crate::database::Database::from_pool(pool.clone());
132132+ let relay_endpoint = std::env::var("RELAY_ENDPOINT")
133133+ .unwrap_or_else(|_| "https://relay1.us-west.bsky.network".to_string());
134134+135135+ // Create in-memory cache for DID resolution with 24-hour TTL to reduce identity lookups
136136+ let cache = Arc::new(Mutex::new(
137137+ cache::CacheFactory::create_slice_cache(cache::CacheBackend::InMemory {
138138+ ttl_seconds: Some(24 * 60 * 60),
139139+ })
140140+ .await?,
141141+ ));
142142+143143+ let sync_service = SyncService::with_logging_and_cache(
144144+ database.clone(),
145145+ relay_endpoint,
146146+ logger.clone(),
147147+ payload.job_id,
148148+ payload.user_did.clone(),
149149+ cache,
150150+ );
151151+152152+ // Track execution time for performance monitoring
153153+ let start_time = std::time::Instant::now();
481544949- // Get database pool and logger
5050- let pool = current_job.pool();
5151- let logger = crate::logging::Logger::global();
5252-5353- // Log job start
5454- logger.log_sync_job(
5555- payload.job_id,
5656- &payload.user_did,
155155+ // Execute the synchronization process
156156+ match sync_service
157157+ .backfill_collections(
57158 &payload.slice_uri,
5858- LogLevel::Info,
5959- &format!("Starting sync job for {} collections",
6060- payload.params.collections.as_ref().map(|c| c.len()).unwrap_or(0) +
6161- payload.params.external_collections.as_ref().map(|c| c.len()).unwrap_or(0)
6262- ),
6363- Some(json!({
6464- "collections": payload.params.collections,
6565- "external_collections": payload.params.external_collections,
6666- "repos": payload.params.repos,
6767- "skip_validation": payload.params.skip_validation
6868- }))
6969- );
7070-7171- // Create sync service with logging and cache
7272- let database = crate::database::Database::from_pool(pool.clone());
7373- let relay_endpoint = std::env::var("RELAY_ENDPOINT")
7474- .unwrap_or_else(|_| "https://relay1.us-west.bsky.network".to_string());
159159+ payload.params.collections.as_deref(),
160160+ payload.params.external_collections.as_deref(),
161161+ payload.params.repos.as_deref(),
162162+ payload.params.skip_validation.unwrap_or(false),
163163+ )
164164+ .await
165165+ {
166166+ Ok((repos_processed, records_synced)) => {
167167+ let elapsed = start_time.elapsed();
168168+ let result = SyncJobResult {
169169+ success: true,
170170+ total_records: records_synced,
171171+ collections_synced: [
172172+ payload.params.collections.unwrap_or_default(),
173173+ payload.params.external_collections.unwrap_or_default(),
174174+ ]
175175+ .concat(),
176176+ repos_processed,
177177+ message: format!("Sync completed successfully in {:?}", elapsed),
178178+ };
751797676- // Create cache for DID resolution (24 hour TTL)
7777- let cache = Arc::new(Mutex::new(
7878- cache::CacheFactory::create_slice_cache(
7979- cache::CacheBackend::InMemory { ttl_seconds: Some(24 * 60 * 60) }
8080- ).await?
8181- ));
180180+ // Log completion with detailed metrics for monitoring
181181+ logger.log_sync_job(
182182+ payload.job_id,
183183+ &payload.user_did,
184184+ &payload.slice_uri,
185185+ LogLevel::Info,
186186+ &format!(
187187+ "Sync completed successfully: {} repos, {} records in {:?}",
188188+ repos_processed, records_synced, elapsed
189189+ ),
190190+ Some(json!({
191191+ "repos_processed": repos_processed,
192192+ "records_synced": records_synced,
193193+ "duration_secs": elapsed.as_secs_f64(),
194194+ "collections_synced": result.collections_synced
195195+ })),
196196+ );
821978383- let sync_service = SyncService::with_logging_and_cache(
8484- database.clone(),
8585- relay_endpoint,
8686- logger.clone(),
8787- payload.job_id,
8888- payload.user_did.clone(),
8989- cache
9090- );
9191-9292- // Track progress
9393- let start_time = std::time::Instant::now();
9494-9595- // Perform the sync
9696- match sync_service
9797- .backfill_collections(
198198+ // Persist job result before marking complete (ensures result is queryable)
199199+ store_job_result(
200200+ pool,
201201+ payload.job_id,
202202+ &payload.user_did,
98203 &payload.slice_uri,
9999- payload.params.collections.as_deref(),
100100- payload.params.external_collections.as_deref(),
101101- payload.params.repos.as_deref(),
102102- payload.params.skip_validation.unwrap_or(false),
204204+ &result,
205205+ None,
103206 )
104104- .await
105105- {
106106- Ok((repos_processed, records_synced)) => {
107107- let elapsed = start_time.elapsed();
108108- let result = SyncJobResult {
109109- success: true,
110110- total_records: records_synced,
111111- collections_synced: [
112112- payload.params.collections.unwrap_or_default(),
113113- payload.params.external_collections.unwrap_or_default(),
114114- ].concat(),
115115- repos_processed,
116116- message: format!(
117117- "Sync completed successfully in {:?}",
118118- elapsed
119119- ),
120120- };
207207+ .await?;
121208122122- // Log successful completion
123123- logger.log_sync_job(
124124- payload.job_id,
125125- &payload.user_did,
126126- &payload.slice_uri,
127127- LogLevel::Info,
128128- &format!("Sync completed successfully: {} repos, {} records in {:?}",
129129- repos_processed, records_synced, elapsed),
130130- Some(json!({
131131- "repos_processed": repos_processed,
132132- "records_synced": records_synced,
133133- "duration_secs": elapsed.as_secs_f64(),
134134- "collections_synced": result.collections_synced
135135- }))
136136- );
209209+ info!(
210210+ "Sync job {} completed successfully: {} repos, {} records",
211211+ payload.job_id, repos_processed, records_synced
212212+ );
137213138138- // Store result in database before completing the job
139139- store_job_result(
140140- pool,
141141- payload.job_id,
142142- &payload.user_did,
143143- &payload.slice_uri,
144144- &result,
145145- None,
146146- ).await?;
214214+ // CRITICAL: Explicitly mark job as complete to prevent automatic retry
215215+ // Without this, sqlxmq will treat the job as failed and retry it
216216+ current_job.complete().await?;
147217148148- info!(
149149- "Sync job {} completed successfully: {} repos, {} records",
150150- payload.job_id, repos_processed, records_synced
151151- );
218218+ info!(
219219+ "Sync job {} marked as complete and will be cleaned up",
220220+ payload.job_id
221221+ );
152222153153- // CRITICAL: Must explicitly complete the job to prevent it from being retried
154154- current_job.complete().await?;
155155-156156- info!(
157157- "Sync job {} marked as complete and will be cleaned up",
158158- payload.job_id
159159- );
223223+ Ok(())
224224+ }
225225+ Err(e) => {
226226+ error!("Sync job {} failed: {}", payload.job_id, e);
160227161161- Ok(())
162162- }
163163- Err(e) => {
164164- error!("Sync job {} failed: {}", payload.job_id, e);
165165-166166- // Log error
167167- logger.log_sync_job(
168168- payload.job_id,
169169- &payload.user_did,
170170- &payload.slice_uri,
171171- LogLevel::Error,
172172- &format!("Sync job failed: {}", e),
173173- Some(json!({
174174- "error": e.to_string(),
175175- "duration_secs": start_time.elapsed().as_secs_f64()
176176- }))
177177- );
178178-179179- let result = SyncJobResult {
180180- success: false,
181181- total_records: 0,
182182- collections_synced: vec![],
183183- repos_processed: 0,
184184- message: format!("Sync failed: {}", e),
185185- };
228228+ // Log error details for debugging and user visibility
229229+ logger.log_sync_job(
230230+ payload.job_id,
231231+ &payload.user_did,
232232+ &payload.slice_uri,
233233+ LogLevel::Error,
234234+ &format!("Sync job failed: {}", e),
235235+ Some(json!({
236236+ "error": e.to_string(),
237237+ "duration_secs": start_time.elapsed().as_secs_f64()
238238+ })),
239239+ );
186240187187- // Store error result before returning error
188188- if let Err(db_err) = store_job_result(
189189- pool,
190190- payload.job_id,
191191- &payload.user_did,
192192- &payload.slice_uri,
193193- &result,
194194- Some(&format!("{}", e)),
195195- ).await {
196196- error!("Failed to store job result: {}", db_err);
197197- }
241241+ let result = SyncJobResult {
242242+ success: false,
243243+ total_records: 0,
244244+ collections_synced: vec![],
245245+ repos_processed: 0,
246246+ message: format!("Sync failed: {}", e),
247247+ };
198248199199- // Return error to trigger retry
200200- Err(Box::new(e))
249249+ // Persist failure result even if job will retry (for status tracking)
250250+ if let Err(db_err) = store_job_result(
251251+ pool,
252252+ payload.job_id,
253253+ &payload.user_did,
254254+ &payload.slice_uri,
255255+ &result,
256256+ Some(&format!("{}", e)),
257257+ )
258258+ .await
259259+ {
260260+ error!("Failed to store job result: {}", db_err);
201261 }
262262+263263+ // Return error to trigger sqlxmq's automatic retry mechanism (up to 5 attempts)
264264+ Err(Box::new(e))
202265 }
266266+ }
203267}
204268205205-/// Store job result in the database for later retrieval
269269+/// Persists job result to the database for status queries and historical tracking.
270270+///
271271+/// This is called both on success and failure to ensure result data is available
272272+/// via the job status API. Uses UPSERT to handle retries (updates existing result).
273273+///
274274+/// # Arguments
275275+/// * `pool` - PostgreSQL connection pool
276276+/// * `job_id` - Unique identifier for the job
277277+/// * `user_did` - User who initiated the job
278278+/// * `slice_uri` - Slice being synchronized
279279+/// * `result` - Job result data (success/failure, metrics)
280280+/// * `error_message` - Optional error details for failed jobs
281281+///
282282+/// # Returns
283283+/// * `Ok(())` - Result stored successfully
284284+/// * `Err(sqlx::Error)` - Database error during insert/update
206285async fn store_job_result(
207286 pool: &PgPool,
208287 job_id: Uuid,
···215294 "Storing job result: job_id={}, user_did={}, slice_uri={}, success={}",
216295 job_id, user_did, slice_uri, result.success
217296 );
218218-297297+298298+ // Convert collections list to JSONB for storage
219299 let collections_json = serde_json::to_value(&result.collections_synced)
220300 .map_err(|e| sqlx::Error::Protocol(format!("Failed to serialize collections: {}", e)))?;
221301302302+ // UPSERT: insert new result or update existing on retry
222303 sqlx::query!(
223304 r#"
224305 INSERT INTO job_results (
225306 job_id, user_did, slice_uri, status, success, total_records,
226307 collections_synced, repos_processed, message, error_message
227308 ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
228228- ON CONFLICT (job_id)
309309+ ON CONFLICT (job_id)
229310 DO UPDATE SET
230311 status = EXCLUDED.status,
231312 success = EXCLUDED.success,
···239320 job_id,
240321 user_did,
241322 slice_uri,
242242- if result.success { "completed" } else { "failed" },
323323+ if result.success {
324324+ "completed"
325325+ } else {
326326+ "failed"
327327+ },
243328 result.success,
244329 result.total_records,
245330 collections_json,
···253338 Ok(())
254339}
255340256256-/// Enqueue a new sync job
341341+/// Enqueues a new sync job with deduplication checks.
342342+///
343343+/// This function ensures only one active sync job exists per user+slice combination
344344+/// by checking both the message queue and recent job results. This prevents:
345345+/// - Duplicate jobs competing for the same data
346346+/// - Wasted resources on redundant syncs
347347+/// - Race conditions in record persistence
348348+///
349349+/// # Arguments
350350+/// * `pool` - PostgreSQL connection pool
351351+/// * `user_did` - Decentralized identifier of the user
352352+/// * `slice_uri` - AT-URI of the slice to synchronize
353353+/// * `params` - Sync parameters (collections, repos, validation settings)
354354+///
355355+/// # Returns
356356+/// * `Ok(Uuid)` - Job ID of the newly enqueued job
357357+/// * `Err(...)` - Error if job already running or enqueue fails
358358+///
359359+/// # Deduplication Strategy
360360+/// 1. Check for pending jobs in `mq_msgs` queue
361361+/// 2. Check for recent jobs (< 10 min) without results
362362+/// 3. Reject if either check finds an active job
257363pub async fn enqueue_sync_job(
258364 pool: &PgPool,
259365 user_did: String,
260366 slice_uri: String,
261367 params: BulkSyncParams,
262368) -> Result<Uuid, Box<dyn std::error::Error + Send + Sync>> {
263263- // Check if there's already a running sync job for this user+slice combination
264264- // We do this by checking:
265265- // 1. If there are any jobs in mq_msgs for sync_queue channel that haven't been processed yet
266266- // 2. If there are any recent job_results entries that indicate a job might still be running
369369+ // Deduplication check 1: Look for pending jobs in the message queue
370370+ // This catches jobs that haven't started executing yet
267371 let existing_running_msg = sqlx::query!(
268372 r#"
269269- SELECT m.id
373373+ SELECT m.id
270374 FROM mq_msgs m
271375 JOIN mq_payloads p ON m.id = p.id
272376 WHERE m.channel_name = 'sync_queue'
273377 AND m.id != '00000000-0000-0000-0000-000000000000'
274274- AND p.payload_json->>'user_did' = $1
378378+ AND p.payload_json->>'user_did' = $1
275379 AND p.payload_json->>'slice_uri' = $2
276380 AND m.attempt_at <= NOW()
277381 "#,
···281385 .fetch_optional(pool)
282386 .await?;
283387284284- // Also check if there's a very recent job that might still be running
285285- // (within the last 10 minutes and no completion record)
388388+ // Deduplication check 2: Look for recently started jobs without results
389389+ // This catches jobs that started but haven't written results yet (< 10 min)
286390 let recent_start = sqlx::query!(
287391 r#"
288392 SELECT m.id
···291395 LEFT JOIN job_results jr ON (p.payload_json->>'job_id')::uuid = jr.job_id
292396 WHERE m.channel_name = 'sync_queue'
293397 AND m.id != '00000000-0000-0000-0000-000000000000'
294294- AND p.payload_json->>'user_did' = $1
398398+ AND p.payload_json->>'user_did' = $1
295399 AND p.payload_json->>'slice_uri' = $2
296400 AND m.created_at > NOW() - INTERVAL '10 minutes'
297401 AND jr.job_id IS NULL
···306410 return Err("A sync job is already running for this slice. Please wait for it to complete before starting another.".into());
307411 }
308412413413+ // Generate unique job ID for tracking and result storage
309414 let job_id = Uuid::new_v4();
310310-415415+311416 let payload = SyncJobPayload {
312417 job_id,
313418 user_did: user_did.clone(),
314419 slice_uri: slice_uri.clone(),
315420 params,
316421 };
317317-318318- // Spawn the job using the correct builder pattern
319319- let job_uuid = sync_job.builder()
320320- .set_json(&payload)?
321321- .spawn(pool)
322322- .await?;
323323-422422+423423+ // Enqueue the job using sqlxmq's builder pattern
424424+ let job_uuid = sync_job.builder().set_json(&payload)?.spawn(pool).await?;
425425+324426 info!(
325427 "Enqueued sync job {} (queue id: {}) for user {}",
326428 job_id, job_uuid, user_did
327429 );
328328-430430+329431 Ok(job_id)
330432}
331433332332-/// Check the status of a sync job
434434+/// Status information for a sync job, including progress and results.
435435+///
436436+/// This combines data from both the message queue (for pending jobs) and
437437+/// the job_results table (for completed jobs). Field names are camelCased
438438+/// for JSON API responses.
333439#[derive(Debug, Serialize, Deserialize)]
334440#[serde(rename_all = "camelCase")]
335441pub struct JobStatus {
442442+ /// Unique identifier for the job
336443 pub job_id: Uuid,
444444+ /// Current status: "pending", "running", "completed", or "failed"
337445 pub status: String,
446446+ /// Timestamp when job was enqueued
338447 pub created_at: chrono::DateTime<chrono::Utc>,
448448+ /// Timestamp when job execution started (None if still pending)
339449 pub started_at: Option<chrono::DateTime<chrono::Utc>>,
450450+ /// Timestamp when job finished (None if still running)
340451 pub completed_at: Option<chrono::DateTime<chrono::Utc>>,
452452+ /// Detailed result data (None if still running)
341453 pub result: Option<SyncJobResult>,
454454+ /// Error message if job failed
342455 pub error: Option<String>,
456456+ /// Number of retry attempts remaining (5 max)
343457 pub retry_count: i32,
344458}
345459346346-pub async fn get_job_status(
347347- pool: &PgPool,
348348- job_id: Uuid,
349349-) -> Result<Option<JobStatus>, sqlx::Error> {
350350- // First, check if we have a stored result for this job
460460+/// Retrieves the current status of a sync job.
461461+///
462462+/// This function checks both the job_results table (for completed jobs) and
463463+/// the message queue (for pending/running jobs) to provide comprehensive status.
464464+///
465465+/// # Arguments
466466+/// * `pool` - PostgreSQL connection pool
467467+/// * `job_id` - Unique identifier of the job to query
468468+///
469469+/// # Returns
470470+/// * `Ok(Some(JobStatus))` - Job found with current status
471471+/// * `Ok(None)` - Job not found (may have been cleaned up)
472472+/// * `Err(sqlx::Error)` - Database query error
473473+pub async fn get_job_status(pool: &PgPool, job_id: Uuid) -> Result<Option<JobStatus>, sqlx::Error> {
474474+ // Priority 1: Check for completed job result (most common case)
351475 let result_row = sqlx::query!(
352476 r#"
353353- SELECT
477477+ SELECT
354478 job_id, user_did, slice_uri, status, success, total_records,
355479 collections_synced, repos_processed, message, error_message,
356480 created_at, completed_at
357357- FROM job_results
481481+ FROM job_results
358482 WHERE job_id = $1
359483 "#,
360484 job_id
···363487 .await?;
364488365489 if let Some(result) = result_row {
366366- // We have a stored result, return it
367367- let collections_synced: Vec<String> = serde_json::from_value(result.collections_synced)
368368- .unwrap_or_default();
490490+ // Found completed job, construct status from result data
491491+ let collections_synced: Vec<String> =
492492+ serde_json::from_value(result.collections_synced).unwrap_or_default();
369493370494 return Ok(Some(JobStatus {
371495 job_id,
···385509 }));
386510 }
387511388388- // No stored result, check if job is still in queue
512512+ // Priority 2: Check message queue for pending/running jobs
389513 let queue_row = sqlx::query!(
390514 r#"
391391- SELECT
515515+ SELECT
392516 m.id,
393517 m.created_at,
394518 m.attempt_at,
395519 m.attempts,
396520 p.payload_json
397397- FROM mq_msgs m
521521+ FROM mq_msgs m
398522 LEFT JOIN mq_payloads p ON m.id = p.id
399523 WHERE p.payload_json::jsonb ->> 'job_id' = $1
400524 "#,
···405529406530 match queue_row {
407531 Some(row) => {
532532+ // Determine status based on attempt_at timestamp
408533 let status = if row.attempt_at.is_none() {
409534 "completed".to_string()
410535 } else if let Some(attempt_at) = row.attempt_at {
···421546 job_id,
422547 status: status.clone(),
423548 created_at: row.created_at.unwrap_or_else(chrono::Utc::now),
424424- started_at: if status == "running" || status == "completed" { row.created_at } else { None },
425425- completed_at: if status == "completed" { row.attempt_at } else { None },
549549+ started_at: if status == "running" || status == "completed" {
550550+ row.created_at
551551+ } else {
552552+ None
553553+ },
554554+ completed_at: if status == "completed" {
555555+ row.attempt_at
556556+ } else {
557557+ None
558558+ },
426559 result: None,
427560 error: None,
428561 retry_count: 5 - row.attempts,
429562 }))
430430- },
563563+ }
431564 None => {
432432- // Job not found in queue or results - it might not exist
565565+ // Job not found anywhere - either never existed or was cleaned up
433566 Ok(None)
434567 }
435568 }
436569}
437570438438-/// Get job results for a specific slice, ordered by most recent first
571571+/// Retrieves job history for a specific user and slice combination.
572572+///
573573+/// This returns both completed jobs (from job_results) and pending/running jobs
574574+/// (from the message queue), ordered by creation time (most recent first).
575575+/// Useful for displaying sync history in the UI.
576576+///
577577+/// # Arguments
578578+/// * `pool` - PostgreSQL connection pool
579579+/// * `user_did` - User's decentralized identifier
580580+/// * `slice_uri` - AT-URI of the slice
581581+/// * `limit` - Optional maximum number of results (default: 10)
582582+///
583583+/// # Returns
584584+/// * `Ok(Vec<JobStatus>)` - List of job statuses ordered by recency
585585+/// * `Err(sqlx::Error)` - Database query error
439586pub async fn get_slice_job_history(
440587 pool: &PgPool,
441588 user_did: &str,
···449596 user_did, slice_uri, limit
450597 );
451598452452- // Get both completed jobs and pending jobs
599599+ // Query combines completed jobs (job_results) and pending jobs (mq_msgs) via UNION
453600 let rows = sqlx::query!(
454601 r#"
455602 -- Completed jobs from job_results
···499646 .fetch_all(pool)
500647 .await?;
501648649649+ // Transform database rows into JobStatus structs
502650 let mut results = Vec::new();
503651 for row in rows {
504652 let collections_synced: Vec<String> = serde_json::from_value(
505505- row.collections_synced.unwrap_or_else(|| serde_json::json!([]))
506506- ).unwrap_or_default();
653653+ row.collections_synced
654654+ .unwrap_or_else(|| serde_json::json!([])),
655655+ )
656656+ .unwrap_or_default();
507657508508- // Handle both completed and pending jobs
658658+ // Differentiate between pending jobs (no result data) and completed jobs
509659 let result = if row.job_type.as_deref() == Some("pending") || row.success.is_none() {
510660 // This is a pending job - no result data available
511661 None
···534684535685 Ok(results)
536686}
537537-
+258-75
api/src/logging.rs
···11+//! Batched logging system for high-throughput database log persistence.
22+//!
33+//! This module provides an async, batched logging system that:
44+//! - Queues log entries in memory using an unbounded channel
55+//! - Flushes to PostgreSQL in batches (every 5 seconds or 100 entries)
66+//! - Maintains a global singleton logger instance
77+//! - Supports different log types (sync jobs, Jetstream events, system logs)
88+//! - Automatically cleans up old logs (1 day for Jetstream, 7 days for jobs)
99+//!
1010+//! The batching approach significantly reduces database load during high-throughput
1111+//! scenarios like Jetstream event processing.
1212+1313+use chrono::Utc;
114use serde_json::Value;
215use sqlx::PgPool;
33-use uuid::Uuid;
44-use tokio::sync::mpsc;
55-use tokio::time::{interval, Duration};
66-use tracing::{info, warn, error};
77-use chrono::Utc;
816use std::sync::OnceLock;
1717+use tokio::sync::mpsc;
1818+use tokio::time::{Duration, interval};
1919+use tracing::{error, info, warn};
2020+use uuid::Uuid;
9212222+/// Log severity levels for structured logging.
1023#[derive(Debug, Clone)]
1124pub enum LogLevel {
1225 Info,
···1528}
16291730impl LogLevel {
3131+ /// Returns the string representation of the log level.
1832 pub fn as_str(&self) -> &'static str {
1933 match self {
2034 LogLevel::Info => "info",
···2438 }
2539}
26404141+/// Categories of log entries for filtering and organization.
2742#[derive(Debug, Clone)]
2843#[allow(dead_code)]
2944pub enum LogType {
4545+ /// Background sync job logs (user-initiated collection sync)
3046 SyncJob,
4747+ /// Real-time Jetstream event processing logs
3148 Jetstream,
4949+ /// System-level operational logs
3250 System,
3351}
34523553impl LogType {
5454+ /// Returns the string representation of the log type.
3655 pub fn as_str(&self) -> &'static str {
3756 match self {
3857 LogType::SyncJob => "sync_job",
···4261 }
4362}
44634545-/// Global logger instance
6464+/// Global singleton logger instance, initialized once at application startup.
4665static GLOBAL_LOGGER: OnceLock<Logger> = OnceLock::new();
47664848-/// Log entry to be queued for batch insertion
6767+/// Internal representation of a log entry pending database insertion.
6868+///
6969+/// These entries are queued in memory and flushed in batches to reduce
7070+/// database round-trips and improve throughput.
4971#[derive(Debug, Clone)]
5072struct QueuedLogEntry {
5173 log_type: String,
···5880 created_at: chrono::DateTime<chrono::Utc>,
5981}
60826161-/// Logger that queues log entries and flushes them periodically
8383+/// Batched logger that queues log entries and flushes them periodically.
8484+///
8585+/// This logger uses an unbounded channel to queue log entries, which are then
8686+/// flushed to the database by a background worker. The worker flushes when:
8787+/// - 100 entries accumulate (batch size threshold)
8888+/// - 5 seconds elapse (time-based threshold)
8989+/// - The channel is closed (graceful shutdown)
9090+///
9191+/// Logs are also immediately written to stdout via the `tracing` crate for
9292+/// real-time visibility during development and debugging.
6293#[derive(Clone)]
6394pub struct Logger {
6495 sender: mpsc::UnboundedSender<QueuedLogEntry>,
6596}
66976798impl Logger {
6868- /// Create a new batched logger and spawn the background worker
9999+ /// Creates a new batched logger and spawns the background worker task.
100100+ ///
101101+ /// The background worker runs for the lifetime of the application, processing
102102+ /// the log queue and flushing to the database.
103103+ ///
104104+ /// # Arguments
105105+ /// * `pool` - PostgreSQL connection pool for database writes
69106 pub fn new(pool: PgPool) -> Self {
70107 let (sender, receiver) = mpsc::unbounded_channel();
7171-7272- // Spawn background worker
108108+109109+ // Spawn background worker that will run for the lifetime of the application
73110 tokio::spawn(Self::background_worker(receiver, pool));
7474-111111+75112 Self { sender }
76113 }
771147878- /// Initialize the global logger (call once at startup)
115115+ /// Initializes the global logger singleton.
116116+ ///
117117+ /// This should be called once at application startup before any logging occurs.
118118+ /// Subsequent calls will be ignored with a warning.
119119+ ///
120120+ /// # Arguments
121121+ /// * `pool` - PostgreSQL connection pool for database writes
122122+ ///
123123+ /// # Example
124124+ /// ```ignore
125125+ /// Logger::init_global(pool.clone());
126126+ /// let logger = Logger::global();
127127+ /// logger.log_jetstream(LogLevel::Info, "Started", None);
128128+ /// ```
79129 pub fn init_global(pool: PgPool) {
80130 let logger = Self::new(pool);
81131 if GLOBAL_LOGGER.set(logger).is_err() {
···83133 }
84134 }
851358686- /// Get the global logger instance
136136+ /// Returns a reference to the global logger instance.
137137+ ///
138138+ /// # Panics
139139+ /// Panics if called before `init_global()`. Ensure the logger is initialized
140140+ /// during application startup.
87141 pub fn global() -> &'static Logger {
8888- GLOBAL_LOGGER.get().expect("Global logger not initialized - call Logger::init_global() first")
142142+ GLOBAL_LOGGER
143143+ .get()
144144+ .expect("Global logger not initialized - call Logger::init_global() first")
89145 }
9090-9191- /// Log a sync job message (queued for batch insertion)
146146+147147+ /// Logs a sync job message, queuing it for batched database insertion.
148148+ ///
149149+ /// Sync job logs track the progress of background synchronization tasks where
150150+ /// users fetch their collection data from their PDS.
151151+ ///
152152+ /// # Arguments
153153+ /// * `job_id` - Unique identifier for the sync job
154154+ /// * `user_did` - Decentralized identifier of the user being synced
155155+ /// * `slice_uri` - AT-URI of the slice being synchronized
156156+ /// * `level` - Log severity level
157157+ /// * `message` - Human-readable log message
158158+ /// * `metadata` - Optional structured metadata (JSON)
159159+ ///
160160+ /// # Behavior
161161+ /// - Immediately writes to stdout via `tracing` for real-time visibility
162162+ /// - Queues the entry for batch insertion to the database
163163+ /// - Send failures are silently ignored (if channel is closed)
92164 pub fn log_sync_job(
93165 &self,
94166 job_id: Uuid,
···108180 metadata,
109181 created_at: Utc::now(),
110182 };
111111-112112- // Also log to tracing for immediate console output
183183+184184+ // Write to stdout immediately for real-time monitoring and debugging
113185 match level {
114186 LogLevel::Info => info!("[sync_job] {}", message),
115187 LogLevel::Warn => warn!("[sync_job] {}", message),
116188 LogLevel::Error => error!("[sync_job] {}", message),
117189 }
118118-119119- // Queue for database insertion (ignore send errors if channel closed)
190190+191191+ // Queue for batch database insertion (ignore send errors if channel closed)
120192 let _ = self.sender.send(entry);
121193 }
122122-123123- /// Log a jetstream message (queued for batch insertion)
124124- pub fn log_jetstream(
125125- &self,
126126- level: LogLevel,
127127- message: &str,
128128- metadata: Option<Value>,
129129- ) {
194194+195195+ /// Logs a Jetstream message without slice context.
196196+ ///
197197+ /// This is a convenience wrapper around `log_jetstream_with_slice` for
198198+ /// global Jetstream events (e.g., connection status, errors).
199199+ ///
200200+ /// # Arguments
201201+ /// * `level` - Log severity level
202202+ /// * `message` - Human-readable log message
203203+ /// * `metadata` - Optional structured metadata (JSON)
204204+ pub fn log_jetstream(&self, level: LogLevel, message: &str, metadata: Option<Value>) {
130205 self.log_jetstream_with_slice(level, message, metadata, None);
131206 }
132207133133- /// Log a jetstream message with slice context (queued for batch insertion)
208208+ /// Logs a Jetstream message with optional slice context.
209209+ ///
210210+ /// Jetstream logs track real-time event processing from the AT Protocol firehose.
211211+ /// Including `slice_uri` associates the log with a specific slice's event processing.
212212+ ///
213213+ /// # Arguments
214214+ /// * `level` - Log severity level
215215+ /// * `message` - Human-readable log message
216216+ /// * `metadata` - Optional structured metadata (JSON)
217217+ /// * `slice_uri` - Optional AT-URI to associate this log with a specific slice
218218+ ///
219219+ /// # Behavior
220220+ /// - Immediately writes to stdout via `tracing` for real-time visibility
221221+ /// - Queues the entry for batch insertion to the database
222222+ /// - Send failures are silently ignored (if channel is closed)
134223 pub fn log_jetstream_with_slice(
135224 &self,
136225 level: LogLevel,
···148237 metadata,
149238 created_at: Utc::now(),
150239 };
151151-152152- // Also log to tracing for immediate console output
240240+241241+ // Write to stdout immediately for real-time monitoring and debugging
153242 match level {
154243 LogLevel::Info => info!("[jetstream] {}", message),
155244 LogLevel::Warn => warn!("[jetstream] {}", message),
156245 LogLevel::Error => error!("[jetstream] {}", message),
157246 }
158158-159159- // Queue for database insertion (ignore send errors if channel closed)
247247+248248+ // Queue for batch database insertion (ignore send errors if channel closed)
160249 let _ = self.sender.send(entry);
161250 }
162162-163163- /// Background worker that processes the log queue
251251+252252+ /// Background worker that processes the log queue and flushes to the database.
253253+ ///
254254+ /// This worker runs in a dedicated tokio task and flushes batches when:
255255+ /// - 100 entries accumulate (to prevent unbounded memory growth)
256256+ /// - 5 seconds elapse (to ensure timely persistence)
257257+ /// - The channel closes (graceful shutdown)
258258+ ///
259259+ /// # Arguments
260260+ /// * `receiver` - Channel receiver for queued log entries
261261+ /// * `pool` - PostgreSQL connection pool for batch inserts
164262 async fn background_worker(
165263 mut receiver: mpsc::UnboundedReceiver<QueuedLogEntry>,
166264 pool: PgPool,
167265 ) {
168266 let mut batch = Vec::new();
169169- let mut flush_interval = interval(Duration::from_secs(5)); // Flush every 5 seconds
170170-267267+ // Periodic flush to ensure logs are persisted even during low-volume periods
268268+ let mut flush_interval = interval(Duration::from_secs(5));
269269+171270 info!("Started batched logging background worker");
172172-271271+173272 loop {
174273 tokio::select! {
175175- // Receive log entries
274274+ // Receive log entries from the queue
176275 Some(entry) = receiver.recv() => {
177276 batch.push(entry);
178178-179179- // Flush if batch is large enough
277277+278278+ // Flush when batch reaches size threshold to prevent memory buildup
180279 if batch.len() >= 100 {
181280 Self::flush_batch(&pool, &mut batch).await;
182281 }
183282 }
184184-185185- // Periodic flush
283283+284284+ // Time-based flush to ensure logs are persisted within 5 seconds
186285 _ = flush_interval.tick() => {
187286 if !batch.is_empty() {
188287 Self::flush_batch(&pool, &mut batch).await;
189288 }
190289 }
191191-192192- // Channel closed, flush remaining and exit
290290+291291+ // Channel closed (shutdown), flush remaining logs and exit gracefully
193292 else => {
194293 if !batch.is_empty() {
195294 Self::flush_batch(&pool, &mut batch).await;
···198297 }
199298 }
200299 }
201201-300300+202301 info!("Batched logging background worker shut down");
203302 }
204204-205205- /// Flush a batch of log entries to the database
303303+304304+ /// Flushes a batch of log entries to the database using a bulk INSERT.
305305+ ///
306306+ /// This method dynamically constructs a multi-value INSERT statement to minimize
307307+ /// database round-trips. Each log entry contributes 8 parameters (fields).
308308+ ///
309309+ /// # Arguments
310310+ /// * `pool` - PostgreSQL connection pool
311311+ /// * `batch` - Mutable vector of queued log entries (cleared after flush)
312312+ ///
313313+ /// # Performance
314314+ /// - Warns if a batch takes >100ms to insert (potential database issue)
315315+ /// - Logs successful flushes with timing information
316316+ /// - On error, logs are lost but the system continues (fail-open)
206317 async fn flush_batch(pool: &PgPool, batch: &mut Vec<QueuedLogEntry>) {
207318 if batch.is_empty() {
208319 return;
209320 }
210210-321321+211322 let batch_size = batch.len();
212323 let start = std::time::Instant::now();
213213-214214- // Build bulk INSERT query
324324+325325+ // Build bulk INSERT query dynamically based on batch size
215326 let mut query = String::from(
216216- "INSERT INTO logs (log_type, job_id, user_did, slice_uri, level, message, metadata, created_at) VALUES "
327327+ "INSERT INTO logs (log_type, job_id, user_did, slice_uri, level, message, metadata, created_at) VALUES ",
217328 );
218218-219219- // Add placeholders for each record
329329+330330+ // Add placeholders for each record (8 parameters per entry)
220331 for i in 0..batch_size {
221332 if i > 0 {
222333 query.push_str(", ");
223334 }
224224- let base = i * 8 + 1; // 8 fields per log entry
335335+ // Calculate base parameter index (8 fields per log entry, 1-indexed)
336336+ let base = i * 8 + 1;
225337 query.push_str(&format!(
226338 "(${}, ${}, ${}, ${}, ${}, ${}, ${}, ${})",
227227- base, base + 1, base + 2, base + 3, base + 4, base + 5, base + 6, base + 7
339339+ base,
340340+ base + 1,
341341+ base + 2,
342342+ base + 3,
343343+ base + 4,
344344+ base + 5,
345345+ base + 6,
346346+ base + 7
228347 ));
229348 }
230230-231231- // Bind parameters
349349+350350+ // Bind all parameters in order (log_type, job_id, user_did, slice_uri, level, message, metadata, created_at)
232351 let mut sqlx_query = sqlx::query(&query);
233352 for entry in batch.iter() {
234353 sqlx_query = sqlx_query
···241360 .bind(&entry.metadata)
242361 .bind(entry.created_at);
243362 }
244244-245245- // Execute batch insert
363363+364364+ // Execute the batch insert and handle errors gracefully
246365 match sqlx_query.execute(pool).await {
247366 Ok(_) => {
248367 let elapsed = start.elapsed();
368368+ // Warn about slow inserts that may indicate database performance issues
249369 if elapsed.as_millis() > 100 {
250250- warn!("Slow log batch insert: {} entries in {:?}", batch_size, elapsed);
370370+ warn!(
371371+ "Slow log batch insert: {} entries in {:?}",
372372+ batch_size, elapsed
373373+ );
251374 } else {
252375 info!("Flushed {} log entries in {:?}", batch_size, elapsed);
253376 }
254377 }
255378 Err(e) => {
256379 error!("Failed to flush log batch of {} entries: {}", batch_size, e);
257257- // Continue processing - logs are lost but system keeps running
380380+ // Fail-open: logs are lost but the system continues to prevent cascading failures
258381 }
259382 }
260260-383383+261384 batch.clear();
262385 }
263386}
264387265265-/// Log entry struct for database queries
388388+/// Represents a log entry retrieved from the database.
389389+///
390390+/// This struct is used for query results and API responses. Field names are
391391+/// converted to camelCase for JSON serialization.
266392#[derive(Debug, serde::Serialize, sqlx::FromRow)]
267393#[serde(rename_all = "camelCase")]
268394pub struct LogEntry {
···277403 pub metadata: Option<serde_json::Value>,
278404}
279405280280-/// Get logs for a specific sync job
406406+/// Retrieves logs for a specific sync job, ordered chronologically.
407407+///
408408+/// # Arguments
409409+/// * `pool` - PostgreSQL connection pool
410410+/// * `job_id` - Unique identifier of the sync job
411411+/// * `limit` - Optional maximum number of logs to return (default: 100)
412412+///
413413+/// # Returns
414414+/// * `Ok(Vec<LogEntry>)` - List of log entries ordered by creation time (ASC)
415415+/// * `Err(sqlx::Error)` - Database query error
281416pub async fn get_sync_job_logs(
282417 pool: &PgPool,
283418 job_id: Uuid,
···303438 Ok(rows)
304439}
305440306306-/// Get jetstream logs, optionally filtered by slice (still includes global connection logs)
441441+/// Retrieves Jetstream logs, optionally filtered by slice URI.
442442+///
443443+/// When a slice filter is provided, returns both slice-specific logs AND global
444444+/// connection logs (where slice_uri is NULL). This ensures connection status logs
445445+/// are visible when viewing slice-specific logs.
446446+///
447447+/// # Arguments
448448+/// * `pool` - PostgreSQL connection pool
449449+/// * `slice_filter` - Optional slice URI to filter logs
450450+/// * `limit` - Optional maximum number of logs to return (default: 100)
451451+///
452452+/// # Returns
453453+/// * `Ok(Vec<LogEntry>)` - List of log entries ordered by creation time (DESC)
454454+/// * `Err(sqlx::Error)` - Database query error
307455pub async fn get_jetstream_logs(
308456 pool: &PgPool,
309457 slice_filter: Option<&str>,
···313461314462 let rows = if let Some(slice_uri) = slice_filter {
315463 tracing::info!("Querying jetstream logs with slice filter: {}", slice_uri);
316316- // When filtering by slice, include both slice-specific logs and global connection logs (where slice_uri is NULL)
464464+ // Include both slice-specific logs and global connection logs for context
317465 let results = sqlx::query_as!(
318466 LogEntry,
319467 r#"
···330478 .fetch_all(pool)
331479 .await?;
332480333333- tracing::info!("Found {} jetstream logs for slice {}", results.len(), slice_uri);
481481+ tracing::info!(
482482+ "Found {} jetstream logs for slice {}",
483483+ results.len(),
484484+ slice_uri
485485+ );
334486 results
335487 } else {
336336- // No filter, return all jetstream logs
488488+ // No filter provided, return all Jetstream logs across all slices
337489 sqlx::query_as!(
338490 LogEntry,
339491 r#"
···352504 Ok(rows)
353505}
354506355355-/// Get logs for a specific slice
507507+/// Retrieves all logs associated with a specific slice URI.
508508+///
509509+/// This includes both sync job logs and Jetstream logs for the slice.
510510+///
511511+/// # Arguments
512512+/// * `pool` - PostgreSQL connection pool
513513+/// * `slice_uri` - AT-URI of the slice
514514+/// * `log_type_filter` - Optional log type filter ("sync_job", "jetstream", "system")
515515+/// * `limit` - Optional maximum number of logs to return (default: 100)
516516+///
517517+/// # Returns
518518+/// * `Ok(Vec<LogEntry>)` - List of log entries ordered by creation time (DESC)
519519+/// * `Err(sqlx::Error)` - Database query error
356520#[allow(dead_code)]
357521pub async fn get_slice_logs(
358522 pool: &PgPool,
···398562 Ok(rows)
399563}
400564401401-/// Clean up old logs (keep last 1 day for jetstream, 7 days for completed sync jobs)
565565+/// Deletes old log entries to prevent unbounded database growth.
566566+///
567567+/// Retention policy:
568568+/// - Jetstream logs: 1 day (high volume, primarily for real-time debugging)
569569+/// - Sync job logs: 7 days (lower volume, useful for historical analysis)
570570+/// - System logs: 7 days
571571+///
572572+/// # Arguments
573573+/// * `pool` - PostgreSQL connection pool
574574+///
575575+/// # Returns
576576+/// * `Ok(u64)` - Number of deleted log entries
577577+/// * `Err(sqlx::Error)` - Database query error
402578pub async fn cleanup_old_logs(pool: &PgPool) -> Result<u64, sqlx::Error> {
403579 let result = sqlx::query!(
404580 r#"
···415591 Ok(result.rows_affected())
416592}
417593418418-/// Start a background task that cleans up old logs every 6 hours
594594+/// Spawns a background task that periodically cleans up old logs.
595595+///
596596+/// The task runs every 6 hours for the lifetime of the application, deleting
597597+/// logs according to the retention policy in `cleanup_old_logs`.
598598+///
599599+/// # Arguments
600600+/// * `pool` - PostgreSQL connection pool (cloned into the spawned task)
419601pub fn start_log_cleanup_task(pool: PgPool) {
420602 tokio::spawn(async move {
421421- let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(6 * 3600)); // Every 6 hours
603603+ // Run cleanup every 6 hours (balances database load with timely cleanup)
604604+ let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(6 * 3600));
422605423606 info!("Started log cleanup background task (runs every 6 hours)");
424607···439622 }
440623 }
441624 });
442442-}625625+}
+23-11
api/src/main.rs
···1010mod jobs;
1111mod logging;
1212mod models;
1313-mod redis_cache;
1413mod sync;
1514mod xrpc;
1615···8382 let relay_endpoint = env::var("RELAY_ENDPOINT")
8483 .unwrap_or_else(|_| "https://relay1.us-west.bsky.network".to_string());
85848686- let system_slice_uri = env::var("SYSTEM_SLICE_URI")
8787- .unwrap_or_else(|_| "at://did:plc:bcgltzqazw5tb6k2g3ttenbj/network.slices.slice/3lymhd4jhrd2z".to_string());
8585+ let system_slice_uri = env::var("SYSTEM_SLICE_URI").unwrap_or_else(|_| {
8686+ "at://did:plc:bcgltzqazw5tb6k2g3ttenbj/network.slices.slice/3lymhd4jhrd2z".to_string()
8787+ });
88888989 let config = Config {
9090 auth_base_url,
···161161 let wait_time = RECONNECT_WINDOW - now.duration_since(window_start);
162162 tracing::warn!(
163163 "Rate limit exceeded: {} reconnects in last minute, waiting {:?}",
164164- reconnect_count, wait_time
164164+ reconnect_count,
165165+ wait_time
165166 );
166167 tokio::time::sleep(wait_time).await;
167168 continue;
···170171 reconnect_count += 1;
171172172173 // Read cursor position from database
173173- let initial_cursor = PostgresCursorHandler::read_cursor(&pool_for_jetstream, "default").await;
174174+ let initial_cursor =
175175+ PostgresCursorHandler::read_cursor(&pool_for_jetstream, "default").await;
174176 if let Some(cursor) = initial_cursor {
175177 tracing::info!("Resuming from cursor position: {}", cursor);
176178 } else {
···191193 Some(cursor_handler.clone()),
192194 initial_cursor,
193195 redis_url.clone(),
194194- ).await;
196196+ )
197197+ .await;
195198196199 let consumer_arc = match consumer_result {
197200 Ok(consumer) => {
···206209 arc
207210 }
208211 Err(e) => {
209209- tracing::error!("Failed to create Jetstream consumer: {} - retry in {:?}", e, retry_delay);
212212+ tracing::error!(
213213+ "Failed to create Jetstream consumer: {} - retry in {:?}",
214214+ e,
215215+ retry_delay
216216+ );
210217 jetstream_connected_clone.store(false, std::sync::atomic::Ordering::Relaxed);
211218 tokio::time::sleep(retry_delay).await;
212219 retry_delay = std::cmp::min(retry_delay * 2, MAX_RETRY_DELAY);
···240247 // Create auth cache for token/session caching (5 minute TTL)
241248 let redis_url = env::var("REDIS_URL").ok();
242249 let auth_cache_backend = if let Some(redis_url) = redis_url {
243243- cache::CacheBackend::Redis { url: redis_url, ttl_seconds: Some(300) }
250250+ cache::CacheBackend::Redis {
251251+ url: redis_url,
252252+ ttl_seconds: Some(300),
253253+ }
244254 } else {
245245- cache::CacheBackend::InMemory { ttl_seconds: Some(300) }
255255+ cache::CacheBackend::InMemory {
256256+ ttl_seconds: Some(300),
257257+ }
246258 };
247259 let auth_cache = Arc::new(Mutex::new(
248248- cache::CacheFactory::create_slice_cache(auth_cache_backend).await?
260260+ cache::CacheFactory::create_slice_cache(auth_cache_backend).await?,
249261 ));
250262251263 let state = AppState {
···358370 let addr = format!("0.0.0.0:{}", port);
359371360372 let listener = tokio::net::TcpListener::bind(&addr).await?;
361361- info!("🚀 Server running on http://{}", addr);
373373+ info!("Server running on http://{}", addr);
362374363375 axum::serve(listener, app).await?;
364376 Ok(())