···11-# jetstream-oxide
11+# fork of the awesome jetstream-oxide
2233-[](https://crates.io/crates/jetstream-oxide)
44-[](https://docs.rs/jetstream-oxide/latest/jetstream_oxide)
33+fork note: this readme is likely a bit out of date! i've been messing around with some apis.
5465A typed Rust library for easily interacting with and consuming the
76Bluesky [Jetstream](https://github.com/bluesky-social/jetstream)
+76-1
jetstream/src/events/mod.rs
···22pub mod commit;
33pub mod identity;
4455+use std::time::{
66+ Duration,
77+ SystemTime,
88+ UNIX_EPOCH,
99+};
1010+511use serde::Deserialize;
612713use crate::exports;
8141515+/// Opaque wrapper for the time_us cursor used by jetstream
1616+///
1717+/// Generally, you should use a cursor
1818+#[derive(Deserialize, Debug, Clone)]
1919+pub struct Cursor(u64);
2020+921/// Basic data that is included with every event.
1022#[derive(Deserialize, Debug)]
1123pub struct EventInfo {
1224 pub did: exports::Did,
1313- pub time_us: u64,
2525+ pub time_us: Cursor,
1426 pub kind: EventKind,
1527}
1628···2941 Identity,
3042 Account,
3143}
4444+4545+impl<R> JetstreamEvent<R> {
4646+ pub fn cursor(&self) -> Cursor {
4747+ match self {
4848+ JetstreamEvent::Commit(commit::CommitEvent::Create { info, .. }) => {
4949+ info.time_us.clone()
5050+ }
5151+ JetstreamEvent::Commit(commit::CommitEvent::Update { info, .. }) => {
5252+ info.time_us.clone()
5353+ }
5454+ JetstreamEvent::Commit(commit::CommitEvent::Delete { info, .. }) => {
5555+ info.time_us.clone()
5656+ }
5757+ JetstreamEvent::Identity(e) => e.info.time_us.clone(),
5858+ JetstreamEvent::Account(e) => e.info.time_us.clone(),
5959+ }
6060+ }
6161+}
6262+6363+impl Cursor {
6464+ /// Get a cursor that will consume all available jetstream replay
6565+ ///
6666+ /// This sets the cursor to zero.
6767+ ///
6868+ /// Jetstream instances typically only have a few days of replay.
6969+ pub fn from_start() -> Self {
7070+ Self(0)
7171+ }
7272+ /// Get a cursor for a specific time
7373+ ///
7474+ /// Panics: if t is older than the unix epoch: Jan 1, 1970.
7575+ ///
7676+ /// If you want to receive all available jetstream replay (typically a few days), use
7777+ /// .from_start()
7878+ pub fn at(t: SystemTime) -> Self {
7979+ let unix_dt = t
8080+ .duration_since(UNIX_EPOCH)
8181+ .expect("cannot set jetstream cursor earlier than unix epoch");
8282+ Self(unix_dt.as_micros() as u64)
8383+ }
8484+ /// Get a cursor rewound from now by this amount
8585+ ///
8686+ /// Panics: if d is greater than the time since the unix epoch: Jan 1, 1970.
8787+ ///
8888+ /// Jetstream instances typically only have a few days of replay.
8989+ pub fn back_by(d: Duration) -> Self {
9090+ Self::at(SystemTime::now() - d)
9191+ }
9292+ /// Get a Cursor from a raw u64
9393+ ///
9494+ /// For example, from a jetstream event's `time_us` field.
9595+ pub fn from_raw_u64(time_us: u64) -> Self {
9696+ Self(time_us)
9797+ }
9898+ /// Get the raw u64 value from this cursor.
9999+ pub fn to_raw_u64(&self) -> u64 {
100100+ self.0
101101+ }
102102+ /// Format the cursor value for use in a jetstream connection url querystring
103103+ pub fn to_jetstream(&self) -> String {
104104+ self.0.to_string()
105105+ }
106106+}
+78-31
jetstream/src/lib.rs
···4455use std::{
66 io::{
77- Cursor,
77+ Cursor as IoCursor,
88 Read,
99 },
1010 marker::PhantomData,
···1616};
17171818use atrium_api::record::KnownRecord;
1919-use chrono::Utc;
2019use futures_util::{
2120 stream::StreamExt,
2221 SinkExt,
···4948 ConnectionError,
5049 JetstreamEventError,
5150 },
5252- events::JetstreamEvent,
5151+ events::{
5252+ Cursor,
5353+ JetstreamEvent,
5454+ },
5355};
54565557/// The Jetstream endpoints officially provided by Bluesky themselves.
···167169 pub wanted_dids: Vec<exports::Did>,
168170 /// The compression algorithm to request and use for the WebSocket connection (if any).
169171 pub compression: JetstreamCompression,
170170- /// An optional timestamp to begin playback from.
172172+ /// Enable automatic cursor for auto-reconnect
171173 ///
172172- /// An absent cursor or a cursor from the future will result in live-tail operation.
174174+ /// By default, reconnects will never set a cursor for the connection, so a small number of
175175+ /// events will always be dropped.
173176 ///
174174- /// When reconnecting, use the time_us from your most recently processed event and maybe
175175- /// provide a negative buffer (i.e. subtract a few seconds) to ensure gapless playback.
176176- pub cursor: Option<chrono::DateTime<Utc>>,
177177+ /// If you want gapless playback across reconnects, set this to `true`. If you always want
178178+ /// the latest available events and can tolerate missing some: `false`.
179179+ pub replay_on_reconnect: bool,
177180 /// Maximum size of send channel for jetstream events.
178181 ///
179182 /// If your consuming task can't keep up with every new jetstream event in real-time,
···197200 wanted_collections: Vec::new(),
198201 wanted_dids: Vec::new(),
199202 compression: JetstreamCompression::None,
200200- cursor: None,
203203+ replay_on_reconnect: false,
201204 channel_size: 4096, // a few seconds of firehose buffer
202205 record_type: PhantomData,
203206 }
···225228 },
226229 );
227230228228- let cursor = self
229229- .cursor
230230- .map(|c| ("cursor", c.timestamp_micros().to_string()));
231231-232231 let params = did_search_query
233232 .chain(collection_search_query)
234233 .chain(std::iter::once(compression))
235235- .chain(cursor)
236234 .collect::<Vec<(&str, String)>>();
237235238236 Url::parse_with_params(endpoint, params)
···276274 /// A [JetstreamReceiver] is returned which can be used to respond to events. When all instances
277275 /// of this receiver are dropped, the connection and task are automatically closed.
278276 pub async fn connect(&self) -> Result<JetstreamReceiver<R>, ConnectionError> {
277277+ self.base_connect(None).await
278278+ }
279279+280280+ /// Connects to a Jetstream instance as defined in the [JetstreamConfig] with playback from a
281281+ /// cursor
282282+ ///
283283+ /// A cursor from the future will result in live-tail operation.
284284+ ///
285285+ /// The cursor is only used for first successfull connection -- on auto-reconnect it will
286286+ /// live-tail by default. Set `replay_on_reconnect: true` in the config if you need to
287287+ /// receive every event, which will keep track of the last-seen cursor and reconnect from
288288+ /// there.
289289+ pub async fn connect_cursor(
290290+ &self,
291291+ cursor: Cursor,
292292+ ) -> Result<JetstreamReceiver<R>, ConnectionError> {
293293+ self.base_connect(Some(cursor)).await
294294+ }
295295+296296+ async fn base_connect(
297297+ &self,
298298+ cursor: Option<Cursor>,
299299+ ) -> Result<JetstreamReceiver<R>, ConnectionError> {
279300 // We validate the config again for good measure. Probably not necessary but it can't hurt.
280301 self.config
281302 .validate()
···288309 .construct_endpoint(&self.config.endpoint)
289310 .map_err(ConnectionError::InvalidEndpoint)?;
290311312312+ let replay_on_reconnect = self.config.replay_on_reconnect;
313313+291314 tokio::task::spawn(async move {
292315 let max_retries = 30;
293316 let base_delay_ms = 1_000; // 1 second
···295318 let success_threshold_s = 15; // 15 seconds, retry count is reset if we were connected at least this long
296319297320 let mut retry_attempt = 0;
321321+ let mut connect_cursor = cursor;
298322 loop {
299323 let dict = DecoderDictionary::copy(JETSTREAM_ZSTD_DICTIONARY);
300324325325+ let mut configured_endpoint = configured_endpoint.clone();
326326+ if let Some(ref cursor) = connect_cursor {
327327+ configured_endpoint
328328+ .query_pairs_mut()
329329+ .append_pair("cursor", &cursor.to_jetstream());
330330+ }
331331+332332+ let mut last_cursor = connect_cursor.clone();
333333+301334 retry_attempt += 1;
302335 if let Ok((ws_stream, _)) = connect_async(&configured_endpoint).await {
303336 let t_connected = Instant::now();
304304- if let Err(e) = websocket_task(dict, ws_stream, send_channel.clone()).await {
337337+ if let Err(e) =
338338+ websocket_task(dict, ws_stream, send_channel.clone(), &mut last_cursor)
339339+ .await
340340+ {
305341 log::error!("Jetstream closed after encountering error: {e:?}");
306342 } else {
307343 log::error!("Jetstream connection closed cleanly");
308344 }
309345 if t_connected.elapsed() > Duration::from_secs(success_threshold_s) {
310346 retry_attempt = 0;
311311- continue;
312347 }
313348 }
314349315350 if retry_attempt >= max_retries {
316316- eprintln!("max retries, bye");
351351+ log::error!("hit max retries, bye");
317352 break;
318353 }
319354320320- eprintln!("will try to reconnect");
355355+ connect_cursor = if replay_on_reconnect {
356356+ last_cursor
357357+ } else {
358358+ None
359359+ };
321360322322- // Exponential backoff
323323- let delay_ms = base_delay_ms * (2_u64.pow(retry_attempt));
324324-325325- log::error!("Connection failed, retrying in {delay_ms}ms...");
326326- tokio::time::sleep(Duration::from_millis(delay_ms.min(max_delay_ms))).await;
327327- log::info!("Attempting to reconnect...")
361361+ if retry_attempt > 0 {
362362+ // Exponential backoff
363363+ let delay_ms = base_delay_ms * (2_u64.pow(retry_attempt));
364364+ log::error!("Connection failed, retrying in {delay_ms}ms...");
365365+ tokio::time::sleep(Duration::from_millis(delay_ms.min(max_delay_ms))).await;
366366+ log::info!("Attempting to reconnect...");
367367+ }
328368 }
329369 log::error!("Connection retries exhausted. Jetstream is disconnected.");
330370 });
···339379 dictionary: DecoderDictionary<'_>,
340380 ws: WebSocketStream<MaybeTlsStream<TcpStream>>,
341381 send_channel: JetstreamSender<R>,
382382+ last_cursor: &mut Option<Cursor>,
342383) -> Result<(), JetstreamEventError> {
343384 // TODO: Use the write half to allow the user to change configuration settings on the fly.
344385 let (socket_write, mut socket_read) = ws.split();
···373414 Some(Ok(message)) => {
374415 match message {
375416 Message::Text(json) => {
376376- let event = serde_json::from_str(&json)
417417+ let event: JetstreamEvent<R> = serde_json::from_str(&json)
377418 .map_err(JetstreamEventError::ReceivedMalformedJSON)?;
419419+ let event_cursor = event.cursor();
378420379421 if send_channel.send(event).await.is_err() {
380422 // We can assume that all receivers have been dropped, so we can close
381423 // the connection and exit the task.
382424 log::info!(
383383- "All receivers for the Jetstream connection have been dropped, closing connection."
384384- );
425425+ "All receivers for the Jetstream connection have been dropped, closing connection."
426426+ );
385427 closing_connection = true;
428428+ } else if let Some(v) = last_cursor.as_mut() {
429429+ *v = event_cursor;
386430 }
387431 }
388432 Message::Binary(zstd_json) => {
389389- let mut cursor = Cursor::new(zstd_json);
433433+ let mut cursor = IoCursor::new(zstd_json);
390434 let mut decoder = zstd::stream::Decoder::with_prepared_dictionary(
391435 &mut cursor,
392436 &dictionary,
···398442 .read_to_string(&mut json)
399443 .map_err(JetstreamEventError::CompressionDecoderError)?;
400444401401- let event = serde_json::from_str(&json)
445445+ let event: JetstreamEvent<R> = serde_json::from_str(&json)
402446 .map_err(JetstreamEventError::ReceivedMalformedJSON)?;
447447+ let event_cursor = event.cursor();
403448404449 if send_channel.send(event).await.is_err() {
405450 // We can assume that all receivers have been dropped, so we can close
406451 // the connection and exit the task.
407452 log::info!(
408408- "All receivers for the Jetstream connection have been dropped, closing connection..."
409409- );
453453+ "All receivers for the Jetstream connection have been dropped, closing connection..."
454454+ );
410455 closing_connection = true;
456456+ } else if let Some(v) = last_cursor.as_mut() {
457457+ *v = event_cursor;
411458 }
412459 }
413460 Message::Ping(vec) => {