tracks lexicons and how many times they appeared on the jetstream

feat(server): enable jetstream compression and use just a single thread to ingest the events

ptr.pet 85f997c4 9bab265d

verified
+87 -34
+1
.gitignore
··· 28 28 events.sqlite-shm 29 29 30 30 result 31 + server/bsky_zstd_dictionary
server/src/bsky_zstd_dictionary

This is a binary file and will not be displayed.

+40 -5
server/src/db.rs
··· 1 + use atproto_jetstream::JetstreamEvent; 1 2 use fjall::{Config, Keyspace, Partition, PartitionCreateOptions}; 2 3 use rkyv::{Archive, Deserialize, Serialize, rancor::Error}; 3 4 use smol_str::SmolStr; ··· 19 20 pub deleted: bool, 20 21 } 21 22 23 + pub struct EventRecord { 24 + nsid: SmolStr, 25 + timestamp: u64, 26 + deleted: bool, 27 + } 28 + 29 + impl EventRecord { 30 + pub fn from_jetstream(event: JetstreamEvent) -> Option<Self> { 31 + match event { 32 + JetstreamEvent::Commit { 33 + time_us, commit, .. 34 + } => Some(Self { 35 + nsid: commit.collection.into(), 36 + timestamp: time_us, 37 + deleted: false, 38 + }), 39 + JetstreamEvent::Delete { 40 + time_us, commit, .. 41 + } => Some(Self { 42 + nsid: commit.collection.into(), 43 + timestamp: time_us, 44 + deleted: true, 45 + }), 46 + _ => None, 47 + } 48 + } 49 + } 50 + 22 51 // counts is nsid -> NsidCounts 23 52 // hits is tree per nsid: timestamp -> NsidHit 24 53 pub struct Db { ··· 57 86 })) 58 87 } 59 88 60 - pub fn record_event(&self, nsid: &str, timestamp: u64, deleted: bool) -> AppResult<()> { 61 - self.insert_event(nsid, timestamp, deleted)?; 89 + pub fn record_event(&self, e: EventRecord) -> AppResult<()> { 90 + let EventRecord { 91 + nsid, 92 + timestamp, 93 + deleted, 94 + } = e; 95 + 96 + self.insert_event(&nsid, timestamp, deleted)?; 62 97 // increment count 63 - let mut counts = self.get_count(nsid)?; 98 + let mut counts = self.get_count(&nsid)?; 64 99 counts.last_seen = timestamp; 65 100 if deleted { 66 101 counts.deleted_count += 1; 67 102 } else { 68 103 counts.count += 1; 69 104 } 70 - self.insert_count(nsid, counts.clone())?; 105 + self.insert_count(&nsid, counts.clone())?; 71 106 if self.event_broadcaster.receiver_count() > 0 { 72 - let _ = self.event_broadcaster.send((SmolStr::new(nsid), counts)); 107 + let _ = self.event_broadcaster.send((SmolStr::new(&nsid), counts)); 73 108 } 74 109 Ok(()) 75 110 }
+46 -29
server/src/main.rs
··· 1 1 use std::sync::Arc; 2 2 3 3 use atproto_jetstream::{CancellationToken, Consumer, EventHandler, JetstreamEvent}; 4 + use tokio::sync::mpsc::{Receiver, Sender}; 4 5 5 - use crate::{api::serve, db::Db}; 6 + use crate::{ 7 + api::serve, 8 + db::{Db, EventRecord}, 9 + }; 6 10 7 11 mod api; 8 12 mod db; 9 13 mod error; 14 + 15 + const BSKY_ZSTD_DICT: &[u8] = include_bytes!("./bsky_zstd_dictionary"); 10 16 11 17 struct JetstreamHandler { 12 - db: Arc<Db>, 18 + tx: Sender<EventRecord>, 19 + } 20 + 21 + impl JetstreamHandler { 22 + fn new() -> (Self, Receiver<EventRecord>) { 23 + let (tx, rx) = tokio::sync::mpsc::channel(1000); 24 + (Self { tx }, rx) 25 + } 13 26 } 14 27 15 28 #[async_trait::async_trait] 16 29 impl EventHandler for JetstreamHandler { 17 30 async fn handle_event(&self, event: JetstreamEvent) -> anyhow::Result<()> { 18 - let db = self.db.clone(); 19 - tokio::task::spawn_blocking(move || { 20 - let result = match event { 21 - JetstreamEvent::Commit { 22 - time_us, commit, .. 23 - } => db.record_event(&commit.collection, time_us, false), 24 - JetstreamEvent::Delete { 25 - time_us, commit, .. 26 - } => db.record_event(&commit.collection, time_us, true), 27 - _ => Ok(()), 28 - }; 29 - if let Err(err) = result { 30 - tracing::error!("couldn't record event: {err}"); 31 - } 32 - }); 31 + if let Some(e) = EventRecord::from_jetstream(event) { 32 + self.tx.send(e).await?; 33 + } 33 34 Ok(()) 34 35 } 35 36 ··· 40 41 41 42 #[tokio::main] 42 43 async fn main() { 43 - tracing_subscriber::fmt::init(); 44 + tracing_subscriber::fmt::fmt().compact().init(); 44 45 45 46 let db = Arc::new(Db::new().expect("couldnt create db")); 46 47 47 - let consumer = Consumer::new(atproto_jetstream::ConsumerTaskConfig { 48 - compression: false, 48 + tokio::fs::write("./bsky_zstd_dictionary", BSKY_ZSTD_DICT) 49 + .await 50 + .expect("could not write bsky zstd dict"); 51 + 52 + let jetstream = Consumer::new(atproto_jetstream::ConsumerTaskConfig { 53 + compression: true, 49 54 jetstream_hostname: "jetstream2.us-west.bsky.network".into(), 50 55 collections: Vec::new(), 51 56 dids: Vec::new(), 52 57 max_message_size_bytes: None, 53 58 cursor: None, 54 59 require_hello: true, 55 - zstd_dictionary_location: String::new(), 60 + zstd_dictionary_location: "./bsky_zstd_dictionary".into(), 56 61 user_agent: "nsid-tracker/0.0.1".into(), 57 62 }); 58 63 59 - tracing::info!("running jetstream consumer..."); 64 + let (event_handler, mut event_rx) = JetstreamHandler::new(); 65 + 60 66 let cancel_token = CancellationToken::new(); 61 - tokio::spawn({ 67 + tokio::spawn(async move { 68 + jetstream 69 + .register_handler(Arc::new(event_handler)) 70 + .await 71 + .expect("cant register handler"); 72 + jetstream 73 + .run_background(cancel_token.clone()) 74 + .await 75 + .expect("cant run jetstream"); 76 + }); 77 + 78 + std::thread::spawn({ 62 79 let db = db.clone(); 63 - async move { 64 - consumer 65 - .register_handler(Arc::new(JetstreamHandler { db })) 66 - .await 67 - .unwrap(); 68 - consumer.run_background(cancel_token.clone()).await.unwrap(); 80 + move || { 81 + while let Some(e) = event_rx.blocking_recv() { 82 + if let Err(e) = db.record_event(e) { 83 + tracing::error!("failed to record event: {}", e); 84 + } 85 + } 69 86 } 70 87 }); 71 88