···7# Authentication service base URL
8AUTH_BASE_URL=http://localhost:8081
910-# AT Protocol relay endpoint for syncing data
11RELAY_ENDPOINT=https://relay1.us-west.bsky.network
0001213# System slice URI
14SYSTEM_SLICE_URI=at://did:plc:bcgltzqazw5tb6k2g3ttenbj/network.slices.slice/3lymhd4jhrd2z
···7# Authentication service base URL
8AUTH_BASE_URL=http://localhost:8081
910+# AT Protocol relay endpoint for backfill
11RELAY_ENDPOINT=https://relay1.us-west.bsky.network
12+13+# AT Protocol Jetstream hostname
14+JETSTREAM_HOSTNAME=jetstream2.us-west.bsky.network
1516# System slice URI
17SYSTEM_SLICE_URI=at://did:plc:bcgltzqazw5tb6k2g3ttenbj/network.slices.slice/3lymhd4jhrd2z
···49async-trait = "0.1"
5051# AT Protocol client
52-atproto-client = "0.11.2"
53-atproto-identity = "0.11.2"
54-atproto-oauth = "0.11.2"
55-atproto-jetstream = "0.11.2"
565758# Middleware for HTTP requests with retry logic
···49async-trait = "0.1"
5051# AT Protocol client
52+atproto-client = "0.13.0"
53+atproto-identity = "0.13.0"
54+atproto-oauth = "0.13.0"
55+atproto-jetstream = "0.13.0"
565758# Middleware for HTTP requests with retry logic
+13
api/migrations/011_jetstream_cursor.sql
···0000000000000
···1+-- Add jetstream cursor table for tracking event processing position
2+CREATE TABLE IF NOT EXISTS jetstream_cursor (
3+ id TEXT PRIMARY KEY DEFAULT 'default',
4+ time_us BIGINT NOT NULL,
5+ updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
6+);
7+8+-- Index for tracking cursor freshness
9+CREATE INDEX idx_jetstream_cursor_updated_at ON jetstream_cursor(updated_at);
10+11+-- Insert default cursor starting at 0 (will be updated when events are processed)
12+INSERT INTO jetstream_cursor (id, time_us) VALUES ('default', 0)
13+ON CONFLICT (id) DO NOTHING;
···34use serde::{Deserialize, Serialize};
5use atproto_client::client::DPoPAuth;
6-use atproto_client::url::URLBuilder;
7use thiserror::Error;
8use atproto_oauth::dpop::{DpopRetry, request_dpop};
9use reqwest_middleware::ClientBuilder;
···24 UploadFailed { status: u16, message: String },
25}
2627-/// Request for uploading a blob
28-#[derive(Serialize, Deserialize, Debug)]
29-pub struct UploadBlobRequest {
30- // Note: For blob uploads, the data is sent as the request body, not JSON
31- // So this struct is mainly for reference - we'll handle the actual upload differently
32-}
3334/// Response from blob upload
35#[cfg_attr(debug_assertions, derive(Debug))]
···68 blob_data: Vec<u8>,
69 mime_type: &str,
70) -> Result<UploadBlobResponse, BlobUploadError> {
71- // Build the URL
72- let mut url_builder = URLBuilder::new(base_url);
73- url_builder.path("/xrpc/com.atproto.repo.uploadBlob");
74- let url = url_builder.build();
75-76 // For blob uploads, we need to use a different approach than post_dpop_json
77 // since we're sending binary data, not JSON
78 // We need to use the same DPoP mechanism but with binary body
79-80 // Use the internal post_dpop function but for binary data
81 post_dpop_binary(http_client, dpop_auth, &url, blob_data, mime_type)
82 .await
···127 if !http_response.status().is_success() {
128 let status = http_response.status();
129 let error_text = http_response.text().await.unwrap_or_else(|_| "unknown".to_string());
130- return Err(BlobUploadError::UploadFailed {
131- status: status.as_u16(),
132- message: error_text
133 });
134 }
135···137 .json::<serde_json::Value>()
138 .await
139 .map_err(|e| BlobUploadError::HttpRequest(e.into()))?;
140-141 Ok(value)
142-}
···34use serde::{Deserialize, Serialize};
5use atproto_client::client::DPoPAuth;
06use thiserror::Error;
7use atproto_oauth::dpop::{DpopRetry, request_dpop};
8use reqwest_middleware::ClientBuilder;
···23 UploadFailed { status: u16, message: String },
24}
250000002627/// Response from blob upload
28#[cfg_attr(debug_assertions, derive(Debug))]
···61 blob_data: Vec<u8>,
62 mime_type: &str,
63) -> Result<UploadBlobResponse, BlobUploadError> {
64+ // Build the URL using standard string formatting
65+ let url = format!("{}/xrpc/com.atproto.repo.uploadBlob", base_url.trim_end_matches('/'));
66+0067 // For blob uploads, we need to use a different approach than post_dpop_json
68 // since we're sending binary data, not JSON
69 // We need to use the same DPoP mechanism but with binary body
70+71 // Use the internal post_dpop function but for binary data
72 post_dpop_binary(http_client, dpop_auth, &url, blob_data, mime_type)
73 .await
···118 if !http_response.status().is_success() {
119 let status = http_response.status();
120 let error_text = http_response.text().await.unwrap_or_else(|_| "unknown".to_string());
121+ return Err(BlobUploadError::UploadFailed {
122+ status: status.as_u16(),
123+ message: error_text
124 });
125 }
126···128 .json::<serde_json::Value>()
129 .await
130 .map_err(|e| BlobUploadError::HttpRequest(e.into()))?;
131+132 Ok(value)
133+}
+55-20
api/src/jetstream.rs
···1011use crate::actor_resolver::resolve_actor_data;
12use crate::database::Database;
013use crate::models::{Record, Actor};
14use crate::errors::SliceError;
15use crate::logging::{Logger, LogLevel};
···18 consumer: Consumer,
19 database: Database,
20 http_client: Client,
21- // Track which collections we should index for each slice
22 slice_collections: Arc<RwLock<HashMap<String, HashSet<String>>>>,
23- // Track domains for each slice (slice_uri -> domain)
24 slice_domains: Arc<RwLock<HashMap<String, String>>>,
25- // Cache for actor lookups
26 actor_cache: Arc<RwLock<HashMap<(String, String), bool>>>,
27- // Lexicon cache for each slice
28 slice_lexicons: Arc<RwLock<HashMap<String, Vec<serde_json::Value>>>>,
29- // Event counter for health monitoring
30 pub event_count: Arc<std::sync::atomic::AtomicU64>,
031}
3233// Event handler that implements the EventHandler trait
···37 slice_collections: Arc<RwLock<HashMap<String, HashSet<String>>>>,
38 slice_domains: Arc<RwLock<HashMap<String, String>>>,
39 event_count: Arc<std::sync::atomic::AtomicU64>,
40- // Cache for (did, slice_uri) -> is_actor lookups
41 actor_cache: Arc<RwLock<HashMap<(String, String), bool>>>,
42- // Lexicon cache for each slice
43 slice_lexicons: Arc<RwLock<HashMap<String, Vec<serde_json::Value>>>>,
044}
4546#[async_trait]
47impl EventHandler for SliceEventHandler {
48 async fn handle_event(&self, event: JetstreamEvent) -> Result<()> {
49- // Increment event counter
50 let count = self.event_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1;
51-52- // Log every 10000 events to show activity (console only, not in DB)
53- if count % 10000 == 0 {
54 info!("Jetstream consumer has processed {} events", count);
55 }
56-0000000000000000057 match event {
58 JetstreamEvent::Commit { did, commit, .. } => {
59 if let Err(e) = self.handle_commit_event(&did, commit).await {
···501}
502503impl JetstreamConsumer {
504- pub async fn new(database: Database, jetstream_hostname: Option<String>) -> Result<Self, SliceError> {
000000000000505 let config = ConsumerTaskConfig {
506 user_agent: "slice-server/1.0".to_string(),
507 compression: false,
508 zstd_dictionary_location: String::new(),
509 jetstream_hostname: jetstream_hostname
510 .unwrap_or_else(|| "jetstream1.us-east.bsky.network".to_string()),
511- collections: Vec::new(), // We'll update this dynamically based on slice configs
512- dids: Vec::new(), // Subscribe to all DIDs
513 max_message_size_bytes: None,
514- cursor: None,
515- require_hello: true, // Match official example - enables proper handshake
516 };
517518 let consumer = Consumer::new(config);
···527 actor_cache: Arc::new(RwLock::new(HashMap::new())),
528 slice_lexicons: Arc::new(RwLock::new(HashMap::new())),
529 event_count: Arc::new(std::sync::atomic::AtomicU64::new(0)),
0530 })
531 }
532···651 event_count: self.event_count.clone(),
652 actor_cache: self.actor_cache.clone(),
653 slice_lexicons: self.slice_lexicons.clone(),
0654 });
655656 self.consumer.register_handler(handler).await
···671672 // Start the consumer
673 info!("Starting Jetstream background consumer...");
674- self.consumer.run_background(cancellation_token).await
675 .map_err(|e| SliceError::JetstreamError {
676 message: format!("Consumer failed: {}", e),
677- })?;
678-0000000000679 Ok(())
680 }
681
···1011use crate::actor_resolver::resolve_actor_data;
12use crate::database::Database;
13+use crate::jetstream_cursor::PostgresCursorHandler;
14use crate::models::{Record, Actor};
15use crate::errors::SliceError;
16use crate::logging::{Logger, LogLevel};
···19 consumer: Consumer,
20 database: Database,
21 http_client: Client,
022 slice_collections: Arc<RwLock<HashMap<String, HashSet<String>>>>,
023 slice_domains: Arc<RwLock<HashMap<String, String>>>,
024 actor_cache: Arc<RwLock<HashMap<(String, String), bool>>>,
025 slice_lexicons: Arc<RwLock<HashMap<String, Vec<serde_json::Value>>>>,
026 pub event_count: Arc<std::sync::atomic::AtomicU64>,
27+ cursor_handler: Option<Arc<PostgresCursorHandler>>,
28}
2930// Event handler that implements the EventHandler trait
···34 slice_collections: Arc<RwLock<HashMap<String, HashSet<String>>>>,
35 slice_domains: Arc<RwLock<HashMap<String, String>>>,
36 event_count: Arc<std::sync::atomic::AtomicU64>,
037 actor_cache: Arc<RwLock<HashMap<(String, String), bool>>>,
038 slice_lexicons: Arc<RwLock<HashMap<String, Vec<serde_json::Value>>>>,
39+ cursor_handler: Option<Arc<PostgresCursorHandler>>,
40}
4142#[async_trait]
43impl EventHandler for SliceEventHandler {
44 async fn handle_event(&self, event: JetstreamEvent) -> Result<()> {
045 let count = self.event_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1;
46+47+ if count.is_multiple_of(10000) {
048 info!("Jetstream consumer has processed {} events", count);
49 }
50+51+ // Extract and update cursor position from event
52+ let time_us = match &event {
53+ JetstreamEvent::Commit { time_us, .. } => *time_us,
54+ JetstreamEvent::Delete { time_us, .. } => *time_us,
55+ JetstreamEvent::Identity { time_us, .. } => *time_us,
56+ JetstreamEvent::Account { time_us, .. } => *time_us,
57+ };
58+59+ if let Some(cursor_handler) = &self.cursor_handler {
60+ cursor_handler.update_position(time_us);
61+62+ // Periodically write cursor to DB (debounced by handler)
63+ if let Err(e) = cursor_handler.maybe_write_cursor().await {
64+ error!("Failed to write cursor: {}", e);
65+ }
66+ }
67+68 match event {
69 JetstreamEvent::Commit { did, commit, .. } => {
70 if let Err(e) = self.handle_commit_event(&did, commit).await {
···512}
513514impl JetstreamConsumer {
515+ /// Create a new Jetstream consumer with optional cursor support
516+ ///
517+ /// # Arguments
518+ /// * `database` - Database connection for slice configurations and record storage
519+ /// * `jetstream_hostname` - Optional custom jetstream hostname
520+ /// * `cursor_handler` - Optional cursor handler for resumable event processing
521+ /// * `initial_cursor` - Optional starting cursor position (time_us) to resume from
522+ pub async fn new(
523+ database: Database,
524+ jetstream_hostname: Option<String>,
525+ cursor_handler: Option<Arc<PostgresCursorHandler>>,
526+ initial_cursor: Option<i64>,
527+ ) -> Result<Self, SliceError> {
528 let config = ConsumerTaskConfig {
529 user_agent: "slice-server/1.0".to_string(),
530 compression: false,
531 zstd_dictionary_location: String::new(),
532 jetstream_hostname: jetstream_hostname
533 .unwrap_or_else(|| "jetstream1.us-east.bsky.network".to_string()),
534+ collections: Vec::new(),
535+ dids: Vec::new(),
536 max_message_size_bytes: None,
537+ cursor: initial_cursor,
538+ require_hello: true,
539 };
540541 let consumer = Consumer::new(config);
···550 actor_cache: Arc::new(RwLock::new(HashMap::new())),
551 slice_lexicons: Arc::new(RwLock::new(HashMap::new())),
552 event_count: Arc::new(std::sync::atomic::AtomicU64::new(0)),
553+ cursor_handler,
554 })
555 }
556···675 event_count: self.event_count.clone(),
676 actor_cache: self.actor_cache.clone(),
677 slice_lexicons: self.slice_lexicons.clone(),
678+ cursor_handler: self.cursor_handler.clone(),
679 });
680681 self.consumer.register_handler(handler).await
···696697 // Start the consumer
698 info!("Starting Jetstream background consumer...");
699+ let result = self.consumer.run_background(cancellation_token).await
700 .map_err(|e| SliceError::JetstreamError {
701 message: format!("Consumer failed: {}", e),
702+ });
703+704+ // Force write cursor on shutdown to ensure latest position is persisted
705+ if let Some(cursor_handler) = &self.cursor_handler {
706+ if let Err(e) = cursor_handler.force_write_cursor().await {
707+ error!("Failed to write final cursor position: {}", e);
708+ } else {
709+ info!("Final cursor position written to database");
710+ }
711+ }
712+713+ result?;
714 Ok(())
715 }
716