Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

whew gross

un-fun time refactoring jetstream to use serde_json RawValue, it's gross and has options everywhere now but it works. bleh.

+217 -279
+1 -1
jetstream/Cargo.toml
··· 22 futures-util = "0.3.31" 23 url = "2.5.4" 24 serde = { version = "1.0.215", features = ["derive"] } 25 - serde_json = "1.0.132" 26 chrono = "0.4.38" 27 zstd = "0.13.2" 28 thiserror = "2.0.3"
··· 22 futures-util = "0.3.31" 23 url = "2.5.4" 24 serde = { version = "1.0.215", features = ["derive"] } 25 + serde_json = { version = "1.0.140", features = ["raw_value"] } 26 chrono = "0.4.38" 27 zstd = "0.13.2" 28 thiserror = "2.0.3"
+22 -5
jetstream/examples/arbitrary_record.rs
··· 5 use clap::Parser; 6 use jetstream::{ 7 events::{ 8 - commit::CommitEvent, 9 - JetstreamEvent::Commit, 10 }, 11 DefaultJetstreamEndpoints, 12 JetstreamCompression, ··· 30 let args = Args::parse(); 31 32 let dids = args.did.unwrap_or_default(); 33 - let config: JetstreamConfig<serde_json::Value> = JetstreamConfig { 34 endpoint: DefaultJetstreamEndpoints::USEastOne.into(), 35 wanted_collections: vec![args.nsid.clone()], 36 wanted_dids: dids.clone(), ··· 48 ); 49 50 while let Some(event) = receiver.recv().await { 51 - if let Commit(CommitEvent::CreateOrUpdate { commit, .. }) = event { 52 - println!("got record: {:?}", commit.record); 53 } 54 } 55
··· 5 use clap::Parser; 6 use jetstream::{ 7 events::{ 8 + CommitOp, 9 + EventKind, 10 + JetstreamEvent, 11 }, 12 DefaultJetstreamEndpoints, 13 JetstreamCompression, ··· 31 let args = Args::parse(); 32 33 let dids = args.did.unwrap_or_default(); 34 + let config: JetstreamConfig = JetstreamConfig { 35 endpoint: DefaultJetstreamEndpoints::USEastOne.into(), 36 wanted_collections: vec![args.nsid.clone()], 37 wanted_dids: dids.clone(), ··· 49 ); 50 51 while let Some(event) = receiver.recv().await { 52 + if let JetstreamEvent { 53 + kind: EventKind::Commit, 54 + commit: Some(commit), 55 + .. 56 + } = event 57 + { 58 + if commit.collection != args.nsid { 59 + continue; 60 + } 61 + if !(commit.operation == CommitOp::Create || commit.operation == CommitOp::Update) { 62 + continue; 63 + } 64 + let Some(rec) = commit.record else { continue }; 65 + println!( 66 + "New or updated record! ({})\n{:?}\n", 67 + commit.rkey.as_str(), 68 + rec.get() 69 + ); 70 } 71 } 72
+20 -31
jetstream/examples/basic.rs
··· 7 use clap::Parser; 8 use jetstream::{ 9 events::{ 10 - commit::{ 11 - CommitEvent, 12 - CommitType, 13 - }, 14 - JetstreamEvent::Commit, 15 }, 16 DefaultJetstreamEndpoints, 17 JetstreamCompression, ··· 25 /// The DIDs to listen for events on, if not provided we will listen for all DIDs. 26 #[arg(short, long)] 27 did: Option<Vec<string::Did>>, 28 - /// The NSID for the collection to listen for (e.g. `app.bsky.feed.post`). 29 - #[arg(short, long)] 30 - nsid: string::Nsid, 31 } 32 33 #[tokio::main] ··· 37 let dids = args.did.unwrap_or_default(); 38 let config = JetstreamConfig { 39 endpoint: DefaultJetstreamEndpoints::USEastOne.into(), 40 - wanted_collections: vec![args.nsid.clone()], 41 wanted_dids: dids.clone(), 42 compression: JetstreamCompression::Zstd, 43 ..Default::default() ··· 46 let jetstream = JetstreamConnector::new(config)?; 47 let mut receiver = jetstream.connect().await?; 48 49 - println!( 50 - "Listening for '{}' events on DIDs: {:?}", 51 - args.nsid.as_str(), 52 - dids 53 - ); 54 55 while let Some(event) = receiver.recv().await { 56 - if let Commit(commit) = event { 57 - match commit { 58 - CommitEvent::CreateOrUpdate { info: _, commit } 59 - if commit.info.operation == CommitType::Create => 60 - { 61 - if let AppBskyFeedPost(record) = commit.record { 62 - println!( 63 - "New post created! ({})\n\n'{}'", 64 - commit.info.rkey.as_str(), 65 - record.text 66 - ); 67 - } 68 - } 69 - CommitEvent::Delete { info: _, commit } => { 70 - println!("A post has been deleted. ({})", commit.rkey.as_str()); 71 - } 72 - _ => {} 73 } 74 } 75 }
··· 7 use clap::Parser; 8 use jetstream::{ 9 events::{ 10 + CommitEvent, 11 + CommitOp, 12 + EventKind, 13 + JetstreamEvent, 14 }, 15 DefaultJetstreamEndpoints, 16 JetstreamCompression, ··· 24 /// The DIDs to listen for events on, if not provided we will listen for all DIDs. 25 #[arg(short, long)] 26 did: Option<Vec<string::Did>>, 27 } 28 29 #[tokio::main] ··· 33 let dids = args.did.unwrap_or_default(); 34 let config = JetstreamConfig { 35 endpoint: DefaultJetstreamEndpoints::USEastOne.into(), 36 + wanted_collections: vec![string::Nsid::new("app.bsky.feed.post".to_string()).unwrap()], 37 wanted_dids: dids.clone(), 38 compression: JetstreamCompression::Zstd, 39 ..Default::default() ··· 42 let jetstream = JetstreamConnector::new(config)?; 43 let mut receiver = jetstream.connect().await?; 44 45 + println!("Listening for 'app.bsky.feed.post' events on DIDs: {dids:?}"); 46 47 while let Some(event) = receiver.recv().await { 48 + if let JetstreamEvent { 49 + kind: EventKind::Commit, 50 + commit: 51 + Some(CommitEvent { 52 + operation: CommitOp::Create, 53 + rkey, 54 + record: Some(record), 55 + .. 56 + }), 57 + .. 58 + } = event 59 + { 60 + if let Ok(AppBskyFeedPost(rec)) = serde_json::from_str(record.get()) { 61 + println!("New post created! ({})\n{:?}\n", rkey.as_str(), rec.text); 62 } 63 } 64 }
-40
jetstream/src/events/account.rs
··· 1 - use chrono::Utc; 2 - use serde::Deserialize; 3 - 4 - use crate::{ 5 - events::EventInfo, 6 - exports, 7 - }; 8 - 9 - /// An event representing a change to an account. 10 - #[derive(Deserialize, Debug)] 11 - pub struct AccountEvent { 12 - /// Basic metadata included with every event. 13 - #[serde(flatten)] 14 - pub info: EventInfo, 15 - /// Account specific data bundled with this event. 16 - pub account: AccountData, 17 - } 18 - 19 - /// Account specific data bundled with an account event. 20 - #[derive(Deserialize, Debug)] 21 - pub struct AccountData { 22 - /// Whether the account is currently active. 23 - pub active: bool, 24 - /// The DID of the account. 25 - pub did: exports::Did, 26 - pub seq: u64, 27 - pub time: chrono::DateTime<Utc>, 28 - /// If `active` is `false` this will be present to explain why the account is inactive. 29 - pub status: Option<AccountStatus>, 30 - } 31 - 32 - /// The possible reasons an account might be listed as inactive. 33 - #[derive(Deserialize, Debug)] 34 - #[serde(rename_all = "lowercase")] 35 - pub enum AccountStatus { 36 - Deactivated, 37 - Deleted, 38 - Suspended, 39 - TakenDown, 40 - }
···
-55
jetstream/src/events/commit.rs
··· 1 - use serde::Deserialize; 2 - 3 - use crate::{ 4 - events::EventInfo, 5 - exports, 6 - }; 7 - 8 - /// An event representing a repo commit, which can be a `create`, `update`, or `delete` operation. 9 - #[derive(Deserialize, Debug)] 10 - #[serde(untagged, rename_all = "snake_case")] 11 - pub enum CommitEvent<R> { 12 - CreateOrUpdate { 13 - #[serde(flatten)] 14 - info: EventInfo, 15 - commit: CommitData<R>, 16 - }, 17 - Delete { 18 - #[serde(flatten)] 19 - info: EventInfo, 20 - commit: CommitInfo, 21 - }, 22 - } 23 - 24 - /// The type of commit operation that was performed. 25 - #[derive(Deserialize, Debug, PartialEq)] 26 - #[serde(rename_all = "snake_case")] 27 - pub enum CommitType { 28 - Create, 29 - Update, 30 - Delete, 31 - } 32 - 33 - /// Basic commit specific info bundled with every event, also the only data included with a `delete` 34 - /// operation. 35 - #[derive(Deserialize, Debug)] 36 - pub struct CommitInfo { 37 - /// The type of commit operation that was performed. 38 - pub operation: CommitType, 39 - pub rev: String, 40 - pub rkey: exports::RecordKey, 41 - /// The NSID of the record type that this commit is associated with. 42 - pub collection: exports::Nsid, 43 - } 44 - 45 - /// Detailed data bundled with a commit event. This data is only included when the event is 46 - /// `create` or `update`. 47 - #[derive(Deserialize, Debug)] 48 - pub struct CommitData<R> { 49 - #[serde(flatten)] 50 - pub info: CommitInfo, 51 - /// The CID of the record that was operated on. 52 - pub cid: exports::Cid, 53 - /// The record that was operated on. 54 - pub record: R, 55 - }
···
-28
jetstream/src/events/identity.rs
··· 1 - use chrono::Utc; 2 - use serde::Deserialize; 3 - 4 - use crate::{ 5 - events::EventInfo, 6 - exports, 7 - }; 8 - 9 - /// An event representing a change to an identity. 10 - #[derive(Deserialize, Debug)] 11 - pub struct IdentityEvent { 12 - /// Basic metadata included with every event. 13 - #[serde(flatten)] 14 - pub info: EventInfo, 15 - /// Identity specific data bundled with this event. 16 - pub identity: IdentityData, 17 - } 18 - 19 - /// Identity specific data bundled with an identity event. 20 - #[derive(Deserialize, Debug)] 21 - pub struct IdentityData { 22 - /// The DID of the identity. 23 - pub did: exports::Did, 24 - /// The handle associated with the identity. 25 - pub handle: Option<exports::Handle>, 26 - pub seq: u64, 27 - pub time: chrono::DateTime<Utc>, 28 - }
···
+90 -32
jetstream/src/events/mod.rs
··· 1 - pub mod account; 2 - pub mod commit; 3 - pub mod identity; 4 - 5 use std::time::{ 6 Duration, 7 SystemTime, ··· 9 UNIX_EPOCH, 10 }; 11 12 use serde::Deserialize; 13 14 use crate::exports; 15 16 /// Opaque wrapper for the time_us cursor used by jetstream 17 - /// 18 - /// Generally, you should use a cursor 19 #[derive(Deserialize, Debug, Clone, PartialEq, PartialOrd)] 20 pub struct Cursor(u64); 21 22 - /// Basic data that is included with every event. 23 - #[derive(Deserialize, Debug)] 24 - pub struct EventInfo { 25 pub did: exports::Did, 26 - pub time_us: Cursor, 27 pub kind: EventKind, 28 } 29 30 - #[derive(Deserialize, Debug)] 31 - #[serde(untagged)] 32 - pub enum JetstreamEvent<R> { 33 - Commit(commit::CommitEvent<R>), 34 - Identity(identity::IdentityEvent), 35 - Account(account::AccountEvent), 36 - } 37 - 38 - #[derive(Deserialize, Debug)] 39 #[serde(rename_all = "snake_case")] 40 pub enum EventKind { 41 Commit, ··· 43 Account, 44 } 45 46 - impl<R> JetstreamEvent<R> { 47 - pub fn cursor(&self) -> Cursor { 48 - match self { 49 - JetstreamEvent::Commit(commit::CommitEvent::CreateOrUpdate { info, .. }) => { 50 - info.time_us.clone() 51 - } 52 - JetstreamEvent::Commit(commit::CommitEvent::Delete { info, .. }) => { 53 - info.time_us.clone() 54 - } 55 - JetstreamEvent::Identity(e) => e.info.time_us.clone(), 56 - JetstreamEvent::Account(e) => e.info.time_us.clone(), 57 - } 58 - } 59 } 60 61 impl Cursor { ··· 136 UNIX_EPOCH + Duration::from_micros(c.0) 137 } 138 }
··· 1 use std::time::{ 2 Duration, 3 SystemTime, ··· 5 UNIX_EPOCH, 6 }; 7 8 + use chrono::Utc; 9 use serde::Deserialize; 10 + use serde_json::value::RawValue; 11 12 use crate::exports; 13 14 /// Opaque wrapper for the time_us cursor used by jetstream 15 #[derive(Deserialize, Debug, Clone, PartialEq, PartialOrd)] 16 pub struct Cursor(u64); 17 18 + #[derive(Debug, Deserialize)] 19 + #[serde(rename_all = "snake_case")] 20 + pub struct JetstreamEvent { 21 + #[serde(rename = "time_us")] 22 + pub cursor: Cursor, 23 pub did: exports::Did, 24 pub kind: EventKind, 25 + pub commit: Option<CommitEvent>, 26 + pub identity: Option<IdentityEvent>, 27 + pub account: Option<AccountEvent>, 28 } 29 30 + #[derive(Debug, Deserialize, PartialEq)] 31 #[serde(rename_all = "snake_case")] 32 pub enum EventKind { 33 Commit, ··· 35 Account, 36 } 37 38 + #[derive(Debug, Deserialize)] 39 + #[serde(rename_all = "snake_case")] 40 + pub struct CommitEvent { 41 + pub collection: exports::Nsid, 42 + pub rkey: exports::RecordKey, 43 + pub rev: String, 44 + pub operation: CommitOp, 45 + pub record: Option<Box<RawValue>>, 46 + pub cid: Option<exports::Cid>, 47 + } 48 + 49 + #[derive(Debug, Deserialize, PartialEq)] 50 + #[serde(rename_all = "snake_case")] 51 + pub enum CommitOp { 52 + Create, 53 + Update, 54 + Delete, 55 + } 56 + 57 + #[derive(Debug, Deserialize, PartialEq)] 58 + pub struct IdentityEvent { 59 + pub did: exports::Did, 60 + pub handle: Option<exports::Handle>, 61 + pub seq: u64, 62 + pub time: chrono::DateTime<Utc>, 63 + } 64 + 65 + #[derive(Debug, Deserialize, PartialEq)] 66 + pub struct AccountEvent { 67 + pub active: bool, 68 + pub did: exports::Did, 69 + pub seq: u64, 70 + pub time: chrono::DateTime<Utc>, 71 + pub status: Option<String>, 72 } 73 74 impl Cursor { ··· 149 UNIX_EPOCH + Duration::from_micros(c.0) 150 } 151 } 152 + 153 + #[cfg(test)] 154 + mod test { 155 + use super::*; 156 + 157 + #[test] 158 + fn test_parse_commit_event() -> anyhow::Result<()> { 159 + let json = r#"{ 160 + "rev":"3llrdsginou2i", 161 + "operation":"create", 162 + "collection":"app.bsky.feed.post", 163 + "rkey":"3llrdsglqdc2s", 164 + "cid": "bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy", 165 + "record": {"$type":"app.bsky.feed.post","createdAt":"2025-04-01T16:58:06.154Z","langs":["en"],"text":"I wish apirl 1st would stop existing lol"} 166 + }"#; 167 + let commit: CommitEvent = serde_json::from_str(json)?; 168 + assert_eq!( 169 + commit.cid.unwrap(), 170 + "bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy".parse()? 171 + ); 172 + assert_eq!( 173 + commit.record.unwrap().get(), 174 + r#"{"$type":"app.bsky.feed.post","createdAt":"2025-04-01T16:58:06.154Z","langs":["en"],"text":"I wish apirl 1st would stop existing lol"}"# 175 + ); 176 + Ok(()) 177 + } 178 + 179 + #[test] 180 + fn test_parse_whole_event() -> anyhow::Result<()> { 181 + let json = r#"{"did":"did:plc:ai3dzf35cth7s3st7n7jsd7r","time_us":1743526687419798,"kind":"commit","commit":{"rev":"3llrdsginou2i","operation":"create","collection":"app.bsky.feed.post","rkey":"3llrdsglqdc2s","record":{"$type":"app.bsky.feed.post","createdAt":"2025-04-01T16:58:06.154Z","langs":["en"],"text":"I wish apirl 1st would stop existing lol"},"cid":"bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy"}}"#; 182 + let event: JetstreamEvent = serde_json::from_str(json)?; 183 + assert_eq!(event.kind, EventKind::Commit); 184 + assert!(event.commit.is_some()); 185 + let commit = event.commit.unwrap(); 186 + assert_eq!( 187 + commit.cid.unwrap(), 188 + "bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy".parse()? 189 + ); 190 + assert_eq!( 191 + commit.record.unwrap().get(), 192 + r#"{"$type":"app.bsky.feed.post","createdAt":"2025-04-01T16:58:06.154Z","langs":["en"],"text":"I wish apirl 1st would stop existing lol"}"# 193 + ); 194 + Ok(()) 195 + } 196 + }
+20 -29
jetstream/src/lib.rs
··· 7 Cursor as IoCursor, 8 Read, 9 }, 10 - marker::PhantomData, 11 time::{ 12 Duration, 13 Instant, 14 }, 15 }; 16 17 - use atrium_api::record::KnownRecord; 18 use futures_util::{ 19 stream::StreamExt, 20 SinkExt, 21 }; 22 - use serde::de::DeserializeOwned; 23 use tokio::{ 24 net::TcpStream, 25 sync::mpsc::{ ··· 124 const JETSTREAM_ZSTD_DICTIONARY: &[u8] = include_bytes!("../zstd/dictionary"); 125 126 /// A receiver channel for consuming Jetstream events. 127 - pub type JetstreamReceiver<R> = Receiver<JetstreamEvent<R>>; 128 129 /// An internal sender channel for sending Jetstream events to [JetstreamReceiver]'s. 130 - type JetstreamSender<R> = Sender<JetstreamEvent<R>>; 131 132 /// A wrapper connector type for working with a WebSocket connection to a Jetstream instance to 133 /// receive and consume events. See [JetstreamConnector::connect] for more info. 134 - pub struct JetstreamConnector<R: DeserializeOwned> { 135 /// The configuration for the Jetstream connection. 136 - config: JetstreamConfig<R>, 137 } 138 139 pub enum JetstreamCompression { ··· 163 } 164 } 165 166 - pub struct JetstreamConfig<R: DeserializeOwned = KnownRecord> { 167 /// A Jetstream endpoint to connect to with a WebSocket Scheme i.e. 168 /// `wss://jetstream1.us-east.bsky.network/subscribe`. 169 pub endpoint: String, ··· 200 /// can help prevent that if your consumer sometimes pauses, at a cost of higher memory 201 /// usage while events are buffered. 202 pub channel_size: usize, 203 - /// Marker for record deserializable type. 204 - /// 205 - /// See examples/arbitrary_record.rs for an example using serde_json::Value 206 - /// 207 - /// You can omit this if you construct `JetstreamConfig { a: b, ..Default::default() }. 208 - /// If you have to specify it, use `std::marker::PhantomData` with no type parameters. 209 - pub record_type: PhantomData<R>, 210 } 211 212 - impl<R: DeserializeOwned> Default for JetstreamConfig<R> { 213 fn default() -> Self { 214 JetstreamConfig { 215 endpoint: DefaultJetstreamEndpoints::USEastOne.into(), ··· 220 omit_user_agent_jetstream_info: false, 221 replay_on_reconnect: false, 222 channel_size: 4096, // a few seconds of firehose buffer 223 - record_type: PhantomData, 224 } 225 } 226 } 227 228 - impl<R: DeserializeOwned> JetstreamConfig<R> { 229 /// Constructs a new endpoint URL with the given [JetstreamConfig] applied. 230 pub fn get_request_builder( 231 &self, ··· 313 } 314 } 315 316 - impl<R: DeserializeOwned + Send + 'static> JetstreamConnector<R> { 317 /// Create a Jetstream connector with a valid [JetstreamConfig]. 318 /// 319 /// After creation, you can call [connect] to connect to the provided Jetstream instance. 320 - pub fn new(config: JetstreamConfig<R>) -> Result<Self, ConfigValidationError> { 321 // We validate the configuration here so any issues are caught early. 322 config.validate()?; 323 Ok(JetstreamConnector { config }) ··· 327 /// 328 /// A [JetstreamReceiver] is returned which can be used to respond to events. When all instances 329 /// of this receiver are dropped, the connection and task are automatically closed. 330 - pub async fn connect(&self) -> Result<JetstreamReceiver<R>, ConnectionError> { 331 self.connect_cursor(None).await 332 } 333 ··· 343 pub async fn connect_cursor( 344 &self, 345 cursor: Option<Cursor>, 346 - ) -> Result<JetstreamReceiver<R>, ConnectionError> { 347 // We validate the config again for good measure. Probably not necessary but it can't hurt. 348 self.config 349 .validate() ··· 424 425 /// The main task that handles the WebSocket connection and sends [JetstreamEvent]'s to any 426 /// receivers that are listening for them. 427 - async fn websocket_task<R: DeserializeOwned>( 428 dictionary: DecoderDictionary<'_>, 429 ws: WebSocketStream<MaybeTlsStream<TcpStream>>, 430 - send_channel: JetstreamSender<R>, 431 last_cursor: &mut Option<Cursor>, 432 ) -> Result<(), JetstreamEventError> { 433 // TODO: Use the write half to allow the user to change configuration settings on the fly. ··· 439 Some(Ok(message)) => { 440 match message { 441 Message::Text(json) => { 442 - let event: JetstreamEvent<R> = serde_json::from_str(&json) 443 .map_err(JetstreamEventError::ReceivedMalformedJSON)?; 444 - let event_cursor = event.cursor(); 445 446 if let Some(last) = last_cursor { 447 if event_cursor <= *last { ··· 475 .read_to_string(&mut json) 476 .map_err(JetstreamEventError::CompressionDecoderError)?; 477 478 - let event: JetstreamEvent<R> = serde_json::from_str(&json) 479 - .map_err(JetstreamEventError::ReceivedMalformedJSON)?; 480 - let event_cursor = event.cursor(); 481 482 if let Some(last) = last_cursor { 483 if event_cursor <= *last {
··· 7 Cursor as IoCursor, 8 Read, 9 }, 10 time::{ 11 Duration, 12 Instant, 13 }, 14 }; 15 16 use futures_util::{ 17 stream::StreamExt, 18 SinkExt, 19 }; 20 use tokio::{ 21 net::TcpStream, 22 sync::mpsc::{ ··· 121 const JETSTREAM_ZSTD_DICTIONARY: &[u8] = include_bytes!("../zstd/dictionary"); 122 123 /// A receiver channel for consuming Jetstream events. 124 + pub type JetstreamReceiver = Receiver<JetstreamEvent>; 125 126 /// An internal sender channel for sending Jetstream events to [JetstreamReceiver]'s. 127 + type JetstreamSender = Sender<JetstreamEvent>; 128 129 /// A wrapper connector type for working with a WebSocket connection to a Jetstream instance to 130 /// receive and consume events. See [JetstreamConnector::connect] for more info. 131 + pub struct JetstreamConnector { 132 /// The configuration for the Jetstream connection. 133 + config: JetstreamConfig, 134 } 135 136 pub enum JetstreamCompression { ··· 160 } 161 } 162 163 + pub struct JetstreamConfig { 164 /// A Jetstream endpoint to connect to with a WebSocket Scheme i.e. 165 /// `wss://jetstream1.us-east.bsky.network/subscribe`. 166 pub endpoint: String, ··· 197 /// can help prevent that if your consumer sometimes pauses, at a cost of higher memory 198 /// usage while events are buffered. 199 pub channel_size: usize, 200 } 201 202 + impl Default for JetstreamConfig { 203 fn default() -> Self { 204 JetstreamConfig { 205 endpoint: DefaultJetstreamEndpoints::USEastOne.into(), ··· 210 omit_user_agent_jetstream_info: false, 211 replay_on_reconnect: false, 212 channel_size: 4096, // a few seconds of firehose buffer 213 } 214 } 215 } 216 217 + impl JetstreamConfig { 218 /// Constructs a new endpoint URL with the given [JetstreamConfig] applied. 219 pub fn get_request_builder( 220 &self, ··· 302 } 303 } 304 305 + impl JetstreamConnector { 306 /// Create a Jetstream connector with a valid [JetstreamConfig]. 307 /// 308 /// After creation, you can call [connect] to connect to the provided Jetstream instance. 309 + pub fn new(config: JetstreamConfig) -> Result<Self, ConfigValidationError> { 310 // We validate the configuration here so any issues are caught early. 311 config.validate()?; 312 Ok(JetstreamConnector { config }) ··· 316 /// 317 /// A [JetstreamReceiver] is returned which can be used to respond to events. When all instances 318 /// of this receiver are dropped, the connection and task are automatically closed. 319 + pub async fn connect(&self) -> Result<JetstreamReceiver, ConnectionError> { 320 self.connect_cursor(None).await 321 } 322 ··· 332 pub async fn connect_cursor( 333 &self, 334 cursor: Option<Cursor>, 335 + ) -> Result<JetstreamReceiver, ConnectionError> { 336 // We validate the config again for good measure. Probably not necessary but it can't hurt. 337 self.config 338 .validate() ··· 413 414 /// The main task that handles the WebSocket connection and sends [JetstreamEvent]'s to any 415 /// receivers that are listening for them. 416 + async fn websocket_task( 417 dictionary: DecoderDictionary<'_>, 418 ws: WebSocketStream<MaybeTlsStream<TcpStream>>, 419 + send_channel: JetstreamSender, 420 last_cursor: &mut Option<Cursor>, 421 ) -> Result<(), JetstreamEventError> { 422 // TODO: Use the write half to allow the user to change configuration settings on the fly. ··· 428 Some(Ok(message)) => { 429 match message { 430 Message::Text(json) => { 431 + let event: JetstreamEvent = serde_json::from_str(&json) 432 .map_err(JetstreamEventError::ReceivedMalformedJSON)?; 433 + let event_cursor = event.cursor.clone(); 434 435 if let Some(last) = last_cursor { 436 if event_cursor <= *last { ··· 464 .read_to_string(&mut json) 465 .map_err(JetstreamEventError::CompressionDecoderError)?; 466 467 + let event: JetstreamEvent = serde_json::from_str(&json).map_err(|e| { 468 + eprintln!("lkasjdflkajsd {e:?} {json}"); 469 + JetstreamEventError::ReceivedMalformedJSON(e) 470 + })?; 471 + let event_cursor = event.cursor.clone(); 472 473 if let Some(last) = last_cursor { 474 if event_cursor <= *last {
+58 -53
ufos/src/consumer.rs
··· 1 use jetstream::{ 2 - events::{ 3 - account::AccountEvent, 4 - commit::{CommitData, CommitEvent, CommitInfo, CommitType}, 5 - Cursor, EventInfo, JetstreamEvent, 6 - }, 7 exports::Did, 8 DefaultJetstreamEndpoints, JetstreamCompression, JetstreamConfig, JetstreamConnector, 9 JetstreamReceiver, ··· 26 27 #[derive(Debug)] 28 struct Batcher { 29 - jetstream_receiver: JetstreamReceiver<serde_json::Value>, 30 batch_sender: Sender<EventBatch>, 31 current_batch: EventBatch, 32 } ··· 42 } else { 43 eprintln!("connecting to jetstream at {jetstream_endpoint} => {endpoint}"); 44 } 45 - let config: JetstreamConfig<serde_json::Value> = JetstreamConfig { 46 endpoint, 47 compression: if no_compress { 48 JetstreamCompression::None 49 } else { 50 JetstreamCompression::Zstd 51 }, 52 - channel_size: 64, // small because we'd rather buffer events into batches 53 ..Default::default() 54 }; 55 let jetstream_receiver = JetstreamConnector::new(config)? ··· 62 } 63 64 impl Batcher { 65 - fn new( 66 - jetstream_receiver: JetstreamReceiver<serde_json::Value>, 67 - batch_sender: Sender<EventBatch>, 68 - ) -> Self { 69 Self { 70 jetstream_receiver, 71 batch_sender, ··· 83 } 84 } 85 86 - async fn handle_event( 87 - &mut self, 88 - event: JetstreamEvent<serde_json::Value>, 89 - ) -> anyhow::Result<()> { 90 - let event_cursor = event.cursor(); 91 92 if let Some(earliest) = &self.current_batch.first_jetstream_cursor { 93 if event_cursor.duration_since(earliest)? > Duration::from_secs_f64(MAX_BATCH_SPAN_SECS) ··· 98 self.current_batch.first_jetstream_cursor = Some(event_cursor.clone()); 99 } 100 101 - match event { 102 - JetstreamEvent::Commit(CommitEvent::CreateOrUpdate { commit, info }) => { 103 - match commit.info.operation { 104 - CommitType::Create => self.handle_create_record(commit, info).await?, 105 - CommitType::Update => { 106 - self.handle_modify_record(modify_update(commit, info)) 107 - .await? 108 } 109 - CommitType::Delete => { 110 - panic!("jetstream Commit::CreateOrUpdate had Delete operation type") 111 } 112 } 113 } 114 - JetstreamEvent::Commit(CommitEvent::Delete { commit, info }) => { 115 - self.handle_modify_record(modify_delete(commit, info)) 116 - .await? 117 - } 118 - JetstreamEvent::Account(AccountEvent { info, account }) if !account.active => { 119 - self.handle_remove_account(info.did, info.time_us).await? 120 } 121 - JetstreamEvent::Account(_) => {} // ignore account *activations* 122 - JetstreamEvent::Identity(_) => {} // identity events are noops for us 123 }; 124 self.current_batch.last_jetstream_cursor = Some(event_cursor.clone()); 125 ··· 159 160 async fn handle_create_record( 161 &mut self, 162 - commit: CommitData<serde_json::Value>, 163 - info: EventInfo, 164 ) -> anyhow::Result<()> { 165 if !self 166 .current_batch 167 .record_creates 168 - .contains_key(&commit.info.collection) 169 && self.current_batch.record_creates.len() >= MAX_BATCHED_COLLECTIONS 170 { 171 self.send_current_batch_now().await?; 172 } 173 let record = CreateRecord { 174 - did: info.did, 175 - rkey: commit.info.rkey, 176 - record: commit.record, 177 - cursor: info.time_us, 178 }; 179 let collection = self 180 .current_batch 181 .record_creates 182 - .entry(commit.info.collection) 183 .or_default(); 184 collection.total_seen += 1; 185 collection.samples.push_front(record); ··· 206 } 207 } 208 209 - fn modify_update(commit: CommitData<serde_json::Value>, info: EventInfo) -> ModifyRecord { 210 ModifyRecord::Update(UpdateRecord { 211 - did: info.did, 212 - collection: commit.info.collection, 213 - rkey: commit.info.rkey, 214 - record: commit.record, 215 - cursor: info.time_us, 216 }) 217 } 218 219 - fn modify_delete(commit_info: CommitInfo, info: EventInfo) -> ModifyRecord { 220 ModifyRecord::Delete(DeleteRecord { 221 - did: info.did, 222 - collection: commit_info.collection, 223 - rkey: commit_info.rkey, 224 - cursor: info.time_us, 225 }) 226 }
··· 1 use jetstream::{ 2 + events::{CommitEvent, CommitOp, Cursor, EventKind, JetstreamEvent}, 3 exports::Did, 4 DefaultJetstreamEndpoints, JetstreamCompression, JetstreamConfig, JetstreamConnector, 5 JetstreamReceiver, ··· 22 23 #[derive(Debug)] 24 struct Batcher { 25 + jetstream_receiver: JetstreamReceiver, 26 batch_sender: Sender<EventBatch>, 27 current_batch: EventBatch, 28 } ··· 38 } else { 39 eprintln!("connecting to jetstream at {jetstream_endpoint} => {endpoint}"); 40 } 41 + let config: JetstreamConfig = JetstreamConfig { 42 endpoint, 43 compression: if no_compress { 44 JetstreamCompression::None 45 } else { 46 JetstreamCompression::Zstd 47 }, 48 + channel_size: 64, // small because we expect to be fast....? 49 ..Default::default() 50 }; 51 let jetstream_receiver = JetstreamConnector::new(config)? ··· 58 } 59 60 impl Batcher { 61 + fn new(jetstream_receiver: JetstreamReceiver, batch_sender: Sender<EventBatch>) -> Self { 62 Self { 63 jetstream_receiver, 64 batch_sender, ··· 76 } 77 } 78 79 + async fn handle_event(&mut self, event: JetstreamEvent) -> anyhow::Result<()> { 80 + let event_cursor = event.cursor; 81 82 if let Some(earliest) = &self.current_batch.first_jetstream_cursor { 83 if event_cursor.duration_since(earliest)? > Duration::from_secs_f64(MAX_BATCH_SPAN_SECS) ··· 88 self.current_batch.first_jetstream_cursor = Some(event_cursor.clone()); 89 } 90 91 + match event.kind { 92 + EventKind::Commit if event.commit.is_some() => { 93 + let commit = event.commit.unwrap(); 94 + match commit.operation { 95 + CommitOp::Create => { 96 + self.handle_create_record(event.did, commit, event_cursor.clone()) 97 + .await?; 98 } 99 + CommitOp::Update => { 100 + self.handle_modify_record(modify_update( 101 + event.did, 102 + commit, 103 + event_cursor.clone(), 104 + )) 105 + .await?; 106 + } 107 + CommitOp::Delete => { 108 + self.handle_modify_record(modify_delete( 109 + event.did, 110 + commit, 111 + event_cursor.clone(), 112 + )) 113 + .await?; 114 } 115 } 116 } 117 + EventKind::Account if event.account.is_some() => { 118 + let account = event.account.unwrap(); 119 + if !account.active { 120 + self.handle_remove_account(account.did, event_cursor.clone()) 121 + .await?; 122 + } 123 } 124 + _ => {} 125 }; 126 self.current_batch.last_jetstream_cursor = Some(event_cursor.clone()); 127 ··· 161 162 async fn handle_create_record( 163 &mut self, 164 + did: Did, 165 + commit: CommitEvent, 166 + cursor: Cursor, 167 ) -> anyhow::Result<()> { 168 if !self 169 .current_batch 170 .record_creates 171 + .contains_key(&commit.collection) 172 && self.current_batch.record_creates.len() >= MAX_BATCHED_COLLECTIONS 173 { 174 self.send_current_batch_now().await?; 175 } 176 + let record = serde_json::from_str(commit.record.unwrap().get())?; 177 let record = CreateRecord { 178 + did, 179 + rkey: commit.rkey, 180 + record, 181 + cursor, 182 }; 183 let collection = self 184 .current_batch 185 .record_creates 186 + .entry(commit.collection) 187 .or_default(); 188 collection.total_seen += 1; 189 collection.samples.push_front(record); ··· 210 } 211 } 212 213 + fn modify_update(did: Did, commit: CommitEvent, cursor: Cursor) -> ModifyRecord { 214 + let record = serde_json::from_str(commit.record.unwrap().get()).unwrap(); 215 ModifyRecord::Update(UpdateRecord { 216 + did, 217 + collection: commit.collection, 218 + rkey: commit.rkey, 219 + record, 220 + cursor, 221 }) 222 } 223 224 + fn modify_delete(did: Did, commit: CommitEvent, cursor: Cursor) -> ModifyRecord { 225 ModifyRecord::Delete(DeleteRecord { 226 + did, 227 + collection: commit.collection, 228 + rkey: commit.rkey, 229 + cursor, 230 }) 231 }
+2 -1
ufos/src/lib.rs
··· 1 pub mod consumer; 2 pub mod db_types; 3 pub mod server; 4 - pub mod store; 5 pub mod store_types; 6 7 use jetstream::events::Cursor;
··· 1 pub mod consumer; 2 pub mod db_types; 3 pub mod server; 4 + // pub mod storage; 5 + pub mod storage_fjall; 6 pub mod store_types; 7 8 use jetstream::events::Cursor;
+2 -2
ufos/src/main.rs
··· 1 use clap::Parser; 2 use std::path::PathBuf; 3 - use ufos::{consumer, server, store}; 4 5 #[cfg(not(target_env = "msvc"))] 6 use tikv_jemallocator::Jemalloc; ··· 43 44 let args = Args::parse(); 45 let (storage, cursor) = 46 - store::Storage::open(args.data, &args.jetstream, args.jetstream_force).await?; 47 48 println!("starting server with storage..."); 49 let serving = server::serve(storage.clone());
··· 1 use clap::Parser; 2 use std::path::PathBuf; 3 + use ufos::{consumer, server, storage_fjall}; 4 5 #[cfg(not(target_env = "msvc"))] 6 use tikv_jemallocator::Jemalloc; ··· 43 44 let args = Args::parse(); 45 let (storage, cursor) = 46 + storage_fjall::Storage::open(args.data, &args.jetstream, args.jetstream_force).await?; 47 48 println!("starting server with storage..."); 49 let serving = server::serve(storage.clone());
+1 -1
ufos/src/server.rs
··· 1 - use crate::store::{Storage, StorageInfo}; 2 use crate::{CreateRecord, Nsid}; 3 use dropshot::endpoint; 4 use dropshot::ApiDescription;
··· 1 + use crate::storage_fjall::{Storage, StorageInfo}; 2 use crate::{CreateRecord, Nsid}; 3 use dropshot::endpoint; 4 use dropshot::ApiDescription;
+1 -1
ufos/src/store.rs ufos/src/storage_fjall.rs
··· 117 // TODO: see rw_loop: enforce single-thread. 118 loop { 119 let t_sleep = Instant::now(); 120 - sleep(Duration::from_secs_f64(0.8)).await; // TODO: minimize during replay 121 let slept_for = t_sleep.elapsed(); 122 let queue_size = receiver.len(); 123
··· 117 // TODO: see rw_loop: enforce single-thread. 118 loop { 119 let t_sleep = Instant::now(); 120 + sleep(Duration::from_secs_f64(0.08)).await; // TODO: minimize during replay 121 let slept_for = t_sleep.elapsed(); 122 let queue_size = receiver.len(); 123