···1-# jetstream-oxide
23-[](https://crates.io/crates/jetstream-oxide)
4-[](https://docs.rs/jetstream-oxide/latest/jetstream_oxide)
56A typed Rust library for easily interacting with and consuming the
7Bluesky [Jetstream](https://github.com/bluesky-social/jetstream)
···1+# fork of the awesome jetstream-oxide
23+fork note: this readme is likely a bit out of date! i've been messing around with some apis.
045A typed Rust library for easily interacting with and consuming the
6Bluesky [Jetstream](https://github.com/bluesky-social/jetstream)
+76-1
jetstream/src/events/mod.rs
···2pub mod commit;
3pub mod identity;
40000005use serde::Deserialize;
67use crate::exports;
80000009/// Basic data that is included with every event.
10#[derive(Deserialize, Debug)]
11pub struct EventInfo {
12 pub did: exports::Did,
13- pub time_us: u64,
14 pub kind: EventKind,
15}
16···29 Identity,
30 Account,
31}
000000000000000000000000000000000000000000000000000000000000000
···2pub mod commit;
3pub mod identity;
45+use std::time::{
6+ Duration,
7+ SystemTime,
8+ UNIX_EPOCH,
9+};
10+11use serde::Deserialize;
1213use crate::exports;
1415+/// Opaque wrapper for the time_us cursor used by jetstream
16+///
17+/// Generally, you should use a cursor
18+#[derive(Deserialize, Debug, Clone)]
19+pub struct Cursor(u64);
20+21/// Basic data that is included with every event.
22#[derive(Deserialize, Debug)]
23pub struct EventInfo {
24 pub did: exports::Did,
25+ pub time_us: Cursor,
26 pub kind: EventKind,
27}
28···41 Identity,
42 Account,
43}
44+45+impl<R> JetstreamEvent<R> {
46+ pub fn cursor(&self) -> Cursor {
47+ match self {
48+ JetstreamEvent::Commit(commit::CommitEvent::Create { info, .. }) => {
49+ info.time_us.clone()
50+ }
51+ JetstreamEvent::Commit(commit::CommitEvent::Update { info, .. }) => {
52+ info.time_us.clone()
53+ }
54+ JetstreamEvent::Commit(commit::CommitEvent::Delete { info, .. }) => {
55+ info.time_us.clone()
56+ }
57+ JetstreamEvent::Identity(e) => e.info.time_us.clone(),
58+ JetstreamEvent::Account(e) => e.info.time_us.clone(),
59+ }
60+ }
61+}
62+63+impl Cursor {
64+ /// Get a cursor that will consume all available jetstream replay
65+ ///
66+ /// This sets the cursor to zero.
67+ ///
68+ /// Jetstream instances typically only have a few days of replay.
69+ pub fn from_start() -> Self {
70+ Self(0)
71+ }
72+ /// Get a cursor for a specific time
73+ ///
74+ /// Panics: if t is older than the unix epoch: Jan 1, 1970.
75+ ///
76+ /// If you want to receive all available jetstream replay (typically a few days), use
77+ /// .from_start()
78+ pub fn at(t: SystemTime) -> Self {
79+ let unix_dt = t
80+ .duration_since(UNIX_EPOCH)
81+ .expect("cannot set jetstream cursor earlier than unix epoch");
82+ Self(unix_dt.as_micros() as u64)
83+ }
84+ /// Get a cursor rewound from now by this amount
85+ ///
86+ /// Panics: if d is greater than the time since the unix epoch: Jan 1, 1970.
87+ ///
88+ /// Jetstream instances typically only have a few days of replay.
89+ pub fn back_by(d: Duration) -> Self {
90+ Self::at(SystemTime::now() - d)
91+ }
92+ /// Get a Cursor from a raw u64
93+ ///
94+ /// For example, from a jetstream event's `time_us` field.
95+ pub fn from_raw_u64(time_us: u64) -> Self {
96+ Self(time_us)
97+ }
98+ /// Get the raw u64 value from this cursor.
99+ pub fn to_raw_u64(&self) -> u64 {
100+ self.0
101+ }
102+ /// Format the cursor value for use in a jetstream connection url querystring
103+ pub fn to_jetstream(&self) -> String {
104+ self.0.to_string()
105+ }
106+}
+78-31
jetstream/src/lib.rs
···45use std::{
6 io::{
7- Cursor,
8 Read,
9 },
10 marker::PhantomData,
···16};
1718use atrium_api::record::KnownRecord;
19-use chrono::Utc;
20use futures_util::{
21 stream::StreamExt,
22 SinkExt,
···49 ConnectionError,
50 JetstreamEventError,
51 },
52- events::JetstreamEvent,
00053};
5455/// The Jetstream endpoints officially provided by Bluesky themselves.
···167 pub wanted_dids: Vec<exports::Did>,
168 /// The compression algorithm to request and use for the WebSocket connection (if any).
169 pub compression: JetstreamCompression,
170- /// An optional timestamp to begin playback from.
171 ///
172- /// An absent cursor or a cursor from the future will result in live-tail operation.
0173 ///
174- /// When reconnecting, use the time_us from your most recently processed event and maybe
175- /// provide a negative buffer (i.e. subtract a few seconds) to ensure gapless playback.
176- pub cursor: Option<chrono::DateTime<Utc>>,
177 /// Maximum size of send channel for jetstream events.
178 ///
179 /// If your consuming task can't keep up with every new jetstream event in real-time,
···197 wanted_collections: Vec::new(),
198 wanted_dids: Vec::new(),
199 compression: JetstreamCompression::None,
200- cursor: None,
201 channel_size: 4096, // a few seconds of firehose buffer
202 record_type: PhantomData,
203 }
···225 },
226 );
227228- let cursor = self
229- .cursor
230- .map(|c| ("cursor", c.timestamp_micros().to_string()));
231-232 let params = did_search_query
233 .chain(collection_search_query)
234 .chain(std::iter::once(compression))
235- .chain(cursor)
236 .collect::<Vec<(&str, String)>>();
237238 Url::parse_with_params(endpoint, params)
···276 /// A [JetstreamReceiver] is returned which can be used to respond to events. When all instances
277 /// of this receiver are dropped, the connection and task are automatically closed.
278 pub async fn connect(&self) -> Result<JetstreamReceiver<R>, ConnectionError> {
00000000000000000000000279 // We validate the config again for good measure. Probably not necessary but it can't hurt.
280 self.config
281 .validate()
···288 .construct_endpoint(&self.config.endpoint)
289 .map_err(ConnectionError::InvalidEndpoint)?;
29000291 tokio::task::spawn(async move {
292 let max_retries = 30;
293 let base_delay_ms = 1_000; // 1 second
···295 let success_threshold_s = 15; // 15 seconds, retry count is reset if we were connected at least this long
296297 let mut retry_attempt = 0;
0298 loop {
299 let dict = DecoderDictionary::copy(JETSTREAM_ZSTD_DICTIONARY);
300000000000301 retry_attempt += 1;
302 if let Ok((ws_stream, _)) = connect_async(&configured_endpoint).await {
303 let t_connected = Instant::now();
304- if let Err(e) = websocket_task(dict, ws_stream, send_channel.clone()).await {
000305 log::error!("Jetstream closed after encountering error: {e:?}");
306 } else {
307 log::error!("Jetstream connection closed cleanly");
308 }
309 if t_connected.elapsed() > Duration::from_secs(success_threshold_s) {
310 retry_attempt = 0;
311- continue;
312 }
313 }
314315 if retry_attempt >= max_retries {
316- eprintln!("max retries, bye");
317 break;
318 }
319320- eprintln!("will try to reconnect");
0000321322- // Exponential backoff
323- let delay_ms = base_delay_ms * (2_u64.pow(retry_attempt));
324-325- log::error!("Connection failed, retrying in {delay_ms}ms...");
326- tokio::time::sleep(Duration::from_millis(delay_ms.min(max_delay_ms))).await;
327- log::info!("Attempting to reconnect...")
0328 }
329 log::error!("Connection retries exhausted. Jetstream is disconnected.");
330 });
···339 dictionary: DecoderDictionary<'_>,
340 ws: WebSocketStream<MaybeTlsStream<TcpStream>>,
341 send_channel: JetstreamSender<R>,
0342) -> Result<(), JetstreamEventError> {
343 // TODO: Use the write half to allow the user to change configuration settings on the fly.
344 let (socket_write, mut socket_read) = ws.split();
···373 Some(Ok(message)) => {
374 match message {
375 Message::Text(json) => {
376- let event = serde_json::from_str(&json)
377 .map_err(JetstreamEventError::ReceivedMalformedJSON)?;
0378379 if send_channel.send(event).await.is_err() {
380 // We can assume that all receivers have been dropped, so we can close
381 // the connection and exit the task.
382 log::info!(
383- "All receivers for the Jetstream connection have been dropped, closing connection."
384- );
385 closing_connection = true;
00386 }
387 }
388 Message::Binary(zstd_json) => {
389- let mut cursor = Cursor::new(zstd_json);
390 let mut decoder = zstd::stream::Decoder::with_prepared_dictionary(
391 &mut cursor,
392 &dictionary,
···398 .read_to_string(&mut json)
399 .map_err(JetstreamEventError::CompressionDecoderError)?;
400401- let event = serde_json::from_str(&json)
402 .map_err(JetstreamEventError::ReceivedMalformedJSON)?;
0403404 if send_channel.send(event).await.is_err() {
405 // We can assume that all receivers have been dropped, so we can close
406 // the connection and exit the task.
407 log::info!(
408- "All receivers for the Jetstream connection have been dropped, closing connection..."
409- );
410 closing_connection = true;
00411 }
412 }
413 Message::Ping(vec) => {
···45use std::{
6 io::{
7+ Cursor as IoCursor,
8 Read,
9 },
10 marker::PhantomData,
···16};
1718use atrium_api::record::KnownRecord;
019use futures_util::{
20 stream::StreamExt,
21 SinkExt,
···48 ConnectionError,
49 JetstreamEventError,
50 },
51+ events::{
52+ Cursor,
53+ JetstreamEvent,
54+ },
55};
5657/// The Jetstream endpoints officially provided by Bluesky themselves.
···169 pub wanted_dids: Vec<exports::Did>,
170 /// The compression algorithm to request and use for the WebSocket connection (if any).
171 pub compression: JetstreamCompression,
172+ /// Enable automatic cursor for auto-reconnect
173 ///
174+ /// By default, reconnects will never set a cursor for the connection, so a small number of
175+ /// events will always be dropped.
176 ///
177+ /// If you want gapless playback across reconnects, set this to `true`. If you always want
178+ /// the latest available events and can tolerate missing some: `false`.
179+ pub replay_on_reconnect: bool,
180 /// Maximum size of send channel for jetstream events.
181 ///
182 /// If your consuming task can't keep up with every new jetstream event in real-time,
···200 wanted_collections: Vec::new(),
201 wanted_dids: Vec::new(),
202 compression: JetstreamCompression::None,
203+ replay_on_reconnect: false,
204 channel_size: 4096, // a few seconds of firehose buffer
205 record_type: PhantomData,
206 }
···228 },
229 );
2300000231 let params = did_search_query
232 .chain(collection_search_query)
233 .chain(std::iter::once(compression))
0234 .collect::<Vec<(&str, String)>>();
235236 Url::parse_with_params(endpoint, params)
···274 /// A [JetstreamReceiver] is returned which can be used to respond to events. When all instances
275 /// of this receiver are dropped, the connection and task are automatically closed.
276 pub async fn connect(&self) -> Result<JetstreamReceiver<R>, ConnectionError> {
277+ self.base_connect(None).await
278+ }
279+280+ /// Connects to a Jetstream instance as defined in the [JetstreamConfig] with playback from a
281+ /// cursor
282+ ///
283+ /// A cursor from the future will result in live-tail operation.
284+ ///
285+ /// The cursor is only used for first successfull connection -- on auto-reconnect it will
286+ /// live-tail by default. Set `replay_on_reconnect: true` in the config if you need to
287+ /// receive every event, which will keep track of the last-seen cursor and reconnect from
288+ /// there.
289+ pub async fn connect_cursor(
290+ &self,
291+ cursor: Cursor,
292+ ) -> Result<JetstreamReceiver<R>, ConnectionError> {
293+ self.base_connect(Some(cursor)).await
294+ }
295+296+ async fn base_connect(
297+ &self,
298+ cursor: Option<Cursor>,
299+ ) -> Result<JetstreamReceiver<R>, ConnectionError> {
300 // We validate the config again for good measure. Probably not necessary but it can't hurt.
301 self.config
302 .validate()
···309 .construct_endpoint(&self.config.endpoint)
310 .map_err(ConnectionError::InvalidEndpoint)?;
311312+ let replay_on_reconnect = self.config.replay_on_reconnect;
313+314 tokio::task::spawn(async move {
315 let max_retries = 30;
316 let base_delay_ms = 1_000; // 1 second
···318 let success_threshold_s = 15; // 15 seconds, retry count is reset if we were connected at least this long
319320 let mut retry_attempt = 0;
321+ let mut connect_cursor = cursor;
322 loop {
323 let dict = DecoderDictionary::copy(JETSTREAM_ZSTD_DICTIONARY);
324325+ let mut configured_endpoint = configured_endpoint.clone();
326+ if let Some(ref cursor) = connect_cursor {
327+ configured_endpoint
328+ .query_pairs_mut()
329+ .append_pair("cursor", &cursor.to_jetstream());
330+ }
331+332+ let mut last_cursor = connect_cursor.clone();
333+334 retry_attempt += 1;
335 if let Ok((ws_stream, _)) = connect_async(&configured_endpoint).await {
336 let t_connected = Instant::now();
337+ if let Err(e) =
338+ websocket_task(dict, ws_stream, send_channel.clone(), &mut last_cursor)
339+ .await
340+ {
341 log::error!("Jetstream closed after encountering error: {e:?}");
342 } else {
343 log::error!("Jetstream connection closed cleanly");
344 }
345 if t_connected.elapsed() > Duration::from_secs(success_threshold_s) {
346 retry_attempt = 0;
0347 }
348 }
349350 if retry_attempt >= max_retries {
351+ log::error!("hit max retries, bye");
352 break;
353 }
354355+ connect_cursor = if replay_on_reconnect {
356+ last_cursor
357+ } else {
358+ None
359+ };
360361+ if retry_attempt > 0 {
362+ // Exponential backoff
363+ let delay_ms = base_delay_ms * (2_u64.pow(retry_attempt));
364+ log::error!("Connection failed, retrying in {delay_ms}ms...");
365+ tokio::time::sleep(Duration::from_millis(delay_ms.min(max_delay_ms))).await;
366+ log::info!("Attempting to reconnect...");
367+ }
368 }
369 log::error!("Connection retries exhausted. Jetstream is disconnected.");
370 });
···379 dictionary: DecoderDictionary<'_>,
380 ws: WebSocketStream<MaybeTlsStream<TcpStream>>,
381 send_channel: JetstreamSender<R>,
382+ last_cursor: &mut Option<Cursor>,
383) -> Result<(), JetstreamEventError> {
384 // TODO: Use the write half to allow the user to change configuration settings on the fly.
385 let (socket_write, mut socket_read) = ws.split();
···414 Some(Ok(message)) => {
415 match message {
416 Message::Text(json) => {
417+ let event: JetstreamEvent<R> = serde_json::from_str(&json)
418 .map_err(JetstreamEventError::ReceivedMalformedJSON)?;
419+ let event_cursor = event.cursor();
420421 if send_channel.send(event).await.is_err() {
422 // We can assume that all receivers have been dropped, so we can close
423 // the connection and exit the task.
424 log::info!(
425+ "All receivers for the Jetstream connection have been dropped, closing connection."
426+ );
427 closing_connection = true;
428+ } else if let Some(v) = last_cursor.as_mut() {
429+ *v = event_cursor;
430 }
431 }
432 Message::Binary(zstd_json) => {
433+ let mut cursor = IoCursor::new(zstd_json);
434 let mut decoder = zstd::stream::Decoder::with_prepared_dictionary(
435 &mut cursor,
436 &dictionary,
···442 .read_to_string(&mut json)
443 .map_err(JetstreamEventError::CompressionDecoderError)?;
444445+ let event: JetstreamEvent<R> = serde_json::from_str(&json)
446 .map_err(JetstreamEventError::ReceivedMalformedJSON)?;
447+ let event_cursor = event.cursor();
448449 if send_channel.send(event).await.is_err() {
450 // We can assume that all receivers have been dropped, so we can close
451 // the connection and exit the task.
452 log::info!(
453+ "All receivers for the Jetstream connection have been dropped, closing connection..."
454+ );
455 closing_connection = true;
456+ } else if let Some(v) = last_cursor.as_mut() {
457+ *v = event_cursor;
458 }
459 }
460 Message::Ping(vec) => {