at protocol indexer with flexible filtering, xrpc queries, and a cursor-backed event stream, built on fjall
at-protocol atproto indexer rust fjall

[api] unnest stream body

ptr.pet 98b08acb 6201caf0

verified
+170 -158
+170 -158
src/api/stream.rs
··· 12 12 use miette::{Context, IntoDiagnostic}; 13 13 use serde::Deserialize; 14 14 use std::sync::Arc; 15 - use tokio::sync::{broadcast, mpsc}; 15 + use tokio::sync::{broadcast, mpsc, oneshot}; 16 16 use tracing::{error, info_span, warn}; 17 17 18 18 #[derive(Deserialize)] ··· 30 30 31 31 async fn handle_socket(mut socket: WebSocket, state: Arc<AppState>, query: StreamQuery) { 32 32 let (tx, mut rx) = mpsc::channel(500); 33 - let (cancel_tx, cancel_rx) = tokio::sync::oneshot::channel::<()>(); 33 + let (cancel_tx, cancel_rx) = oneshot::channel::<()>(); 34 34 35 35 let runtime = tokio::runtime::Handle::current(); 36 36 let id = std::time::SystemTime::UNIX_EPOCH ··· 39 39 .as_secs(); 40 40 41 41 let thread = std::thread::Builder::new() 42 - .name(format!( 43 - "stream-handler-{id}", 44 - 45 - )) 42 + .name(format!("stream-handler-{id}")) 46 43 .spawn(move || { 47 - let mut cancel_rx = cancel_rx; 48 - let db = &state.db; 49 - let mut event_rx = db.event_tx.subscribe(); 50 - let ks = db.events.clone(); 51 - let mut current_id = match query.cursor { 52 - Some(cursor) => cursor.saturating_sub(1), 53 - None => { 54 - let max_id = db.next_event_id.load(std::sync::atomic::Ordering::SeqCst); 55 - max_id.saturating_sub(1) 56 - } 57 - }; 58 44 let _runtime_guard = runtime.enter(); 59 - let span = info_span!("stream", id); 60 - let _entered_span = span.enter(); 61 - 62 - loop { 63 - // 1. catch up from DB 64 - loop { 65 - let mut found = false; 66 - for item in ks.range(keys::event_key(current_id + 1)..) { 67 - let (k, v) = match item.into_inner() { 68 - Ok((k, v)) => (k, v), 69 - Err(e) => { 70 - error!(err = %e, "failed to read event from db"); 71 - break; 72 - } 73 - }; 74 - let id = match k 75 - .as_ref() 76 - .try_into() 77 - .into_diagnostic() 78 - .wrap_err("expected event id to be 8 bytes") 79 - .map(u64::from_be_bytes) 80 - { 81 - Ok(id) => id, 82 - Err(e) => { 83 - error!(err = %e, "failed to parse event id"); 84 - continue; 85 - } 86 - }; 87 - current_id = id; 45 + stream(state, cancel_rx, tx, query, id); 46 + }) 47 + .expect("failed to spawn stream handler thread"); 88 48 89 - let StoredEvent { 90 - live, 91 - did, 92 - rev, 93 - collection, 94 - rkey, 95 - action, 96 - cid, 97 - } = match rmp_serde::from_slice(&v) { 98 - Ok(e) => e, 99 - Err(e) => { 100 - error!(err = %e, "failed to deserialize stored event"); 101 - continue; 102 - } 103 - }; 104 - 105 - let _entered = info_span!("record", cid = ?cid.map(|c| c.to_string())).entered(); 106 - 107 - let marshallable = { 108 - let record_val; 109 - let block_bytes = cid 110 - .map(|cid| db.blocks.get(&cid.to_bytes())) 111 - .transpose() 112 - .map(Option::flatten); 113 - match block_bytes { 114 - Ok(Some(block_bytes)) => match serde_ipld_dagcbor::from_slice::<serde_json::Value>(&block_bytes) { 115 - Ok(val) => record_val = val, 116 - Err(e) => { 117 - error!(err = %e, "cant parse block, must be corrupted?"); 118 - return; 119 - } 120 - } 121 - Ok(None) => { 122 - warn!("block not found, possibly repo deleted but events not evicted yet?"); 123 - continue; 124 - } 125 - Err(e) => { 126 - error!(err = %e, "can't get block"); 127 - crate::db::check_poisoned(&e); 128 - return; 129 - } 130 - } 49 + while let Some(msg) = rx.recv().await { 50 + if let Err(e) = socket.send(msg).await { 51 + error!(err = %e, "failed to send ws message"); 52 + break; 53 + } 54 + } 131 55 132 - MarshallableEvt { 133 - id, 134 - event_type: "record".into(), 135 - record: Some(RecordEvt { 136 - live, 137 - did: did.to_did(), 138 - rev: CowStr::Owned(rev.to_tid().into()), 139 - collection, 140 - rkey: CowStr::Owned(rkey.to_smolstr().into()), 141 - action: CowStr::Borrowed(action.as_str()), 142 - record: Some(record_val), 143 - cid: cid 144 - .map(|c| jacquard_common::types::cid::Cid::ipld(c).into()), 145 - }), 146 - identity: None, 147 - account: None, 148 - } 149 - }; 56 + let _ = cancel_tx.send(()); 57 + if let Err(e) = thread.join() { 58 + error!(err = ?e, "stream handler thread panicked"); 59 + } 60 + } 150 61 151 - let json_str = match serde_json::to_string(&marshallable) { 152 - Ok(s) => s, 153 - Err(e) => { 154 - error!(err = %e, "failed to serialize ws event"); 155 - continue; 156 - } 157 - }; 62 + fn stream( 63 + state: Arc<AppState>, 64 + mut cancel: oneshot::Receiver<()>, 65 + tx: mpsc::Sender<Message>, 66 + query: StreamQuery, 67 + id: u64, 68 + ) { 69 + let db = &state.db; 70 + let mut event_rx = db.event_tx.subscribe(); 71 + let ks = db.events.clone(); 72 + let mut current_id = match query.cursor { 73 + Some(cursor) => cursor.saturating_sub(1), 74 + None => { 75 + let max_id = db.next_event_id.load(std::sync::atomic::Ordering::SeqCst); 76 + max_id.saturating_sub(1) 77 + } 78 + }; 79 + let runtime = tokio::runtime::Handle::current(); 158 80 159 - if let Err(e) = tx.blocking_send(Message::Text(json_str.into())) { 160 - error!(err = %e, "failed to send ws message"); 161 - return; 162 - } 81 + let span = info_span!("stream", id); 82 + let _entered_span = span.enter(); 163 83 164 - found = true; 165 - } 166 - if !found { 84 + loop { 85 + // 1. catch up from DB 86 + loop { 87 + let mut found = false; 88 + for item in ks.range(keys::event_key(current_id + 1)..) { 89 + let (k, v) = match item.into_inner() { 90 + Ok((k, v)) => (k, v), 91 + Err(e) => { 92 + error!(err = %e, "failed to read event from db"); 167 93 break; 168 94 } 169 - } 170 - 171 - // 2. wait for live events 172 - let next_event = runtime.block_on(async { 173 - tokio::select! { 174 - res = event_rx.recv() => Some(res), 175 - _ = &mut cancel_rx => None, 95 + }; 96 + let id = match k 97 + .as_ref() 98 + .try_into() 99 + .into_diagnostic() 100 + .wrap_err("expected event id to be 8 bytes") 101 + .map(u64::from_be_bytes) 102 + { 103 + Ok(id) => id, 104 + Err(e) => { 105 + error!(err = %e, "failed to parse event id"); 106 + continue; 176 107 } 177 - }); 178 - 179 - let Some(next_event) = next_event else { 180 - break; 181 108 }; 109 + current_id = id; 182 110 183 - match next_event { 184 - Ok(BroadcastEvent::Persisted(_)) => { 185 - // just wake up and run catch-up loop again 111 + let StoredEvent { 112 + live, 113 + did, 114 + rev, 115 + collection, 116 + rkey, 117 + action, 118 + cid, 119 + } = match rmp_serde::from_slice(&v) { 120 + Ok(e) => e, 121 + Err(e) => { 122 + error!(err = %e, "failed to deserialize stored event"); 123 + continue; 186 124 } 187 - Ok(BroadcastEvent::Ephemeral(evt)) => { 188 - // send ephemeral event directly 189 - let json_str = match serde_json::to_string(&evt) { 190 - Ok(s) => s, 191 - Err(e) => { 192 - error!(err = %e, "failed to serialize ws event"); 193 - continue; 125 + }; 126 + 127 + let _entered = info_span!("record", cid = ?cid.map(|c| c.to_string())).entered(); 128 + 129 + let marshallable = { 130 + let record_val; 131 + let block_bytes = cid 132 + .map(|cid| db.blocks.get(&cid.to_bytes())) 133 + .transpose() 134 + .map(Option::flatten); 135 + match block_bytes { 136 + Ok(Some(block_bytes)) => { 137 + match serde_ipld_dagcbor::from_slice::<serde_json::Value>(&block_bytes) 138 + { 139 + Ok(val) => record_val = val, 140 + Err(e) => { 141 + error!(err = %e, "cant parse block, must be corrupted?"); 142 + return; 143 + } 194 144 } 195 - }; 196 - if let Err(e) = tx.blocking_send(Message::Text(json_str.into())) { 197 - error!(err = %e, "failed to send ws message"); 145 + } 146 + Ok(None) => { 147 + warn!( 148 + "block not found, possibly repo deleted but events not evicted yet?" 149 + ); 150 + continue; 151 + } 152 + Err(e) => { 153 + error!(err = %e, "can't get block"); 154 + crate::db::check_poisoned(&e); 198 155 return; 199 156 } 200 157 } 201 - Err(broadcast::error::RecvError::Lagged(_)) => { 202 - // continue to catch up 158 + 159 + MarshallableEvt { 160 + id, 161 + event_type: "record".into(), 162 + record: Some(RecordEvt { 163 + live, 164 + did: did.to_did(), 165 + rev: CowStr::Owned(rev.to_tid().into()), 166 + collection, 167 + rkey: CowStr::Owned(rkey.to_smolstr().into()), 168 + action: CowStr::Borrowed(action.as_str()), 169 + record: Some(record_val), 170 + cid: cid.map(|c| jacquard_common::types::cid::Cid::ipld(c).into()), 171 + }), 172 + identity: None, 173 + account: None, 203 174 } 204 - Err(broadcast::error::RecvError::Closed) => { 205 - break; 175 + }; 176 + 177 + let json_str = match serde_json::to_string(&marshallable) { 178 + Ok(s) => s, 179 + Err(e) => { 180 + error!(err = %e, "failed to serialize ws event"); 181 + continue; 206 182 } 183 + }; 184 + 185 + if let Err(e) = tx.blocking_send(Message::Text(json_str.into())) { 186 + error!(err = %e, "failed to send ws message"); 187 + return; 207 188 } 189 + 190 + found = true; 208 191 } 209 - }) 210 - .expect("failed to spawn stream handler thread"); 192 + if !found { 193 + break; 194 + } 195 + } 211 196 212 - while let Some(msg) = rx.recv().await { 213 - if let Err(e) = socket.send(msg).await { 214 - error!(err = %e, "failed to send ws message"); 197 + // 2. wait for live events 198 + let next_event = runtime.block_on(async { 199 + tokio::select! { 200 + res = event_rx.recv() => Some(res), 201 + _ = &mut cancel => None, 202 + } 203 + }); 204 + 205 + let Some(next_event) = next_event else { 215 206 break; 207 + }; 208 + 209 + match next_event { 210 + Ok(BroadcastEvent::Persisted(_)) => { 211 + // just wake up and run catch-up loop again 212 + } 213 + Ok(BroadcastEvent::Ephemeral(evt)) => { 214 + // send ephemeral event directly 215 + let json_str = match serde_json::to_string(&evt) { 216 + Ok(s) => s, 217 + Err(e) => { 218 + error!(err = %e, "failed to serialize ws event"); 219 + continue; 220 + } 221 + }; 222 + if let Err(e) = tx.blocking_send(Message::Text(json_str.into())) { 223 + error!(err = %e, "failed to send ws message"); 224 + return; 225 + } 226 + } 227 + Err(broadcast::error::RecvError::Lagged(_)) => { 228 + // continue to catch up 229 + } 230 + Err(broadcast::error::RecvError::Closed) => { 231 + break; 232 + } 216 233 } 217 - } 218 - 219 - let _ = cancel_tx.send(()); 220 - if let Err(e) = thread.join() { 221 - error!(err = ?e, "stream handler thread panicked"); 222 234 } 223 235 }