···1+use crate::db::{self, keys};
2+use crate::ingest::BufferedMessage;
3+use crate::ops::{self, send_backfill_req};
4+use crate::state::AppState;
5+use crate::types::{AccountEvt, IdentityEvt, RepoState, RepoStatus};
6+use jacquard::api::com_atproto::sync::subscribe_repos::SubscribeReposMessage;
7+8+use fjall::OwnedWriteBatch;
9+use jacquard::cowstr::ToCowStr;
10+use jacquard::types::did::Did;
11+use jacquard_common::IntoStatic;
12+use miette::{IntoDiagnostic, Result};
13+use smol_str::ToSmolStr;
14+use std::collections::HashSet;
15+use std::sync::Arc;
16+use std::time::Duration;
17+use tokio::sync::mpsc;
18+use tracing::{debug, error, trace, warn};
19+20+#[derive(Debug, Clone, Copy)]
21+enum ProcessResult {
22+ Deleted,
23+ Ok,
24+}
25+26+enum RepoCheckResult {
27+ Syncing,
28+ Ok(RepoState<'static>),
29+}
30+31+pub struct FirehoseWorker {
32+ state: Arc<AppState>,
33+ rx: mpsc::UnboundedReceiver<BufferedMessage>,
34+}
35+36+impl FirehoseWorker {
37+ pub fn new(state: Arc<AppState>, rx: mpsc::UnboundedReceiver<BufferedMessage>) -> Self {
38+ Self { state, rx }
39+ }
40+41+ pub fn run(mut self, handle: tokio::runtime::Handle) -> Result<()> {
42+ const BUF_SIZE: usize = 500;
43+ let mut buf = Vec::<BufferedMessage>::with_capacity(BUF_SIZE);
44+ let mut failed = Vec::<BufferedMessage>::new();
45+46+ loop {
47+ let mut batch = self.state.db.inner.batch();
48+ let mut deleted = HashSet::new();
49+50+ for msg in buf.drain(..) {
51+ let (did, seq) = match &msg {
52+ SubscribeReposMessage::Commit(c) => (&c.repo, c.seq),
53+ SubscribeReposMessage::Identity(i) => (&i.did, i.seq),
54+ SubscribeReposMessage::Account(a) => (&a.did, a.seq),
55+ SubscribeReposMessage::Sync(s) => (&s.did, s.seq),
56+ _ => continue,
57+ };
58+59+ if self.state.blocked_dids.contains_sync(did) {
60+ failed.push(msg);
61+ continue;
62+ }
63+ if deleted.contains(did) {
64+ continue;
65+ }
66+67+ match Self::process_message(&self.state, &mut batch, &msg, did) {
68+ Ok(ProcessResult::Ok) => {}
69+ Ok(ProcessResult::Deleted) => {
70+ deleted.insert(did.clone());
71+ }
72+ Err(e) => {
73+ error!("failed to process buffered message for {did}: {e}");
74+ db::check_poisoned_report(&e);
75+ failed.push(msg);
76+ }
77+ }
78+79+ self.state
80+ .cur_firehose
81+ .store(seq, std::sync::atomic::Ordering::SeqCst);
82+ }
83+84+ // commit all changes to db
85+ batch.commit().into_diagnostic()?;
86+ self.state
87+ .db
88+ .inner
89+ .persist(fjall::PersistMode::Buffer)
90+ .into_diagnostic()?;
91+92+ // add failed back to buf here so the ordering is preserved
93+ if !failed.is_empty() {
94+ buf.append(&mut failed);
95+ }
96+97+ // wait until we receive some messages
98+ // this does mean we will have an up to 1 second delay, before we send events to consumers
99+ // but thats reasonable imo, could also be configured of course
100+ let _ = handle.block_on(tokio::time::timeout(
101+ Duration::from_secs(1),
102+ self.rx.recv_many(&mut buf, BUF_SIZE),
103+ ));
104+ if buf.is_empty() {
105+ if self.rx.is_closed() {
106+ error!("ingestor crashed? shutting down buffer processor");
107+ break;
108+ }
109+ continue;
110+ }
111+ }
112+113+ Ok(())
114+ }
115+116+ fn process_message(
117+ state: &AppState,
118+ batch: &mut OwnedWriteBatch,
119+ msg: &BufferedMessage,
120+ did: &Did,
121+ ) -> Result<ProcessResult> {
122+ let RepoCheckResult::Ok(repo_state) = Self::check_repo_state(batch, state, did)? else {
123+ return Ok(ProcessResult::Ok);
124+ };
125+126+ match msg {
127+ SubscribeReposMessage::Commit(commit) => {
128+ trace!("processing buffered commit for {did}");
129+130+ if matches!(repo_state.rev, Some(ref rev) if commit.rev.as_str() <= rev.as_str()) {
131+ debug!(
132+ "skipping replayed event for {}: {} <= {}",
133+ did,
134+ commit.rev,
135+ repo_state.rev.as_ref().expect("we checked in if")
136+ );
137+ return Ok(ProcessResult::Ok);
138+ }
139+140+ if let (Some(prev_repo), Some(prev_commit)) = (&repo_state.data, &commit.prev_data)
141+ && prev_repo != &prev_commit.0
142+ {
143+ warn!(
144+ "gap detected for {}: prev {} != stored {}. triggering backfill",
145+ did, prev_repo, prev_commit.0
146+ );
147+148+ let mut batch = state.db.inner.batch();
149+ ops::update_repo_status(
150+ &mut batch,
151+ &state.db,
152+ did,
153+ repo_state,
154+ RepoStatus::Backfilling,
155+ )?;
156+ batch.commit().into_diagnostic()?;
157+158+ send_backfill_req(state, did.clone().into_static())?;
159+160+ return Ok(ProcessResult::Ok);
161+ }
162+163+ ops::apply_commit(batch, &state.db, repo_state, &commit)?();
164+ }
165+ SubscribeReposMessage::Identity(identity) => {
166+ debug!("processing buffered identity for {did}");
167+ let handle = identity
168+ .handle
169+ .as_ref()
170+ .map(|h| h.to_cowstr().into_static());
171+172+ let evt = IdentityEvt {
173+ did: did.clone().into_static(),
174+ handle,
175+ };
176+ ops::emit_identity_event(&state.db, evt);
177+ }
178+ SubscribeReposMessage::Account(account) => {
179+ debug!("processing buffered account for {did}");
180+ let evt = AccountEvt {
181+ did: did.clone().into_static(),
182+ active: account.active,
183+ status: account.status.as_ref().map(|s| s.to_cowstr().into_static()),
184+ };
185+186+ if !account.active {
187+ use jacquard::api::com_atproto::sync::subscribe_repos::AccountStatus;
188+ match &account.status {
189+ Some(AccountStatus::Deleted) => {
190+ debug!("account {did} deleted, wiping data");
191+ ops::delete_repo(batch, &state.db, did)?;
192+ return Ok(ProcessResult::Deleted);
193+ }
194+ status => {
195+ let status = match status {
196+ Some(status) => match status {
197+ AccountStatus::Deleted => {
198+ unreachable!("deleted account status is handled before")
199+ }
200+ AccountStatus::Takendown => RepoStatus::Takendown,
201+ AccountStatus::Suspended => RepoStatus::Suspended,
202+ AccountStatus::Deactivated => RepoStatus::Deactivated,
203+ AccountStatus::Throttled => {
204+ RepoStatus::Error("throttled".into())
205+ }
206+ AccountStatus::Desynchronized => {
207+ RepoStatus::Error("desynchronized".into())
208+ }
209+ AccountStatus::Other(s) => {
210+ warn!(
211+ "unknown account status for {did}, will put in error state: {s}"
212+ );
213+ RepoStatus::Error(s.to_smolstr())
214+ }
215+ },
216+ None => {
217+ warn!("account {did} inactive but no status provided");
218+ RepoStatus::Error("unknown".into())
219+ }
220+ };
221+ ops::update_repo_status(batch, &state.db, did, repo_state, status)?;
222+ }
223+ }
224+ } else {
225+ // normally we would initiate backfill here
226+ // but we don't have to do anything because:
227+ // 1. we handle changing repo status to Synced before this (in check repo state)
228+ // 2. initiating backfilling is also handled there
229+ }
230+231+ ops::emit_account_event(&state.db, evt);
232+ }
233+ _ => {
234+ warn!("unknown message type in buffer for {did}");
235+ }
236+ }
237+238+ Ok(ProcessResult::Ok)
239+ }
240+241+ fn check_repo_state(
242+ batch: &mut OwnedWriteBatch,
243+ state: &AppState,
244+ did: &Did<'_>,
245+ ) -> Result<RepoCheckResult> {
246+ // check if we have this repo
247+ let repo_key = keys::repo_key(&did);
248+ let Some(state_bytes) = state.db.repos.get(&repo_key).into_diagnostic()? else {
249+ // we don't know this repo, but we are receiving events for it
250+ // this means we should backfill it before processing its events
251+ debug!("discovered new account {did} from firehose, queueing backfill");
252+253+ let new_state = RepoState::backfilling(did);
254+ // using a separate batch here since we want to make it known its being backfilled
255+ // immediately. we could use the batch for the unit of work we are doing but
256+ // then we wouldn't be able to start backfilling until the unit of work is done
257+ let mut batch = state.db.inner.batch();
258+259+ batch.insert(
260+ &state.db.repos,
261+ &repo_key,
262+ crate::db::ser_repo_state(&new_state)?,
263+ );
264+ batch.insert(&state.db.pending, &repo_key, &[]);
265+ batch.commit().into_diagnostic()?;
266+267+ send_backfill_req(state, did.clone().into_static())?;
268+269+ return Ok(RepoCheckResult::Syncing);
270+ };
271+ let mut repo_state = crate::db::deser_repo_state(&state_bytes)?.into_static();
272+273+ // if we are backfilling or it is new, DON'T mark it as synced yet
274+ // the backfill worker will do that when it finishes
275+ match &repo_state.status {
276+ RepoStatus::Synced => Ok(RepoCheckResult::Ok(repo_state)),
277+ RepoStatus::Backfilling | RepoStatus::Error(_) => {
278+ // repo is being backfilled or is in error state
279+ // we dont touch the state because the backfill worker will do that
280+ // we should not really get here because the backfill worker should have marked it as
281+ // being worked on (blocked repos) meaning we would have returned earlier
282+ debug!(
283+ "ignoring active status for {did} as it is {:?}",
284+ repo_state.status
285+ );
286+ Ok(RepoCheckResult::Syncing)
287+ }
288+ RepoStatus::Deactivated | RepoStatus::Suspended | RepoStatus::Takendown => {
289+ // if it was in deactivated/takendown/suspended state, we can mark it as synced
290+ // because we are receiving live events now
291+ repo_state = ops::update_repo_status(
292+ batch,
293+ &state.db,
294+ &did,
295+ repo_state,
296+ RepoStatus::Synced,
297+ )?;
298+ Ok(RepoCheckResult::Ok(repo_state))
299+ }
300+ }
301+ }
302+}
+37-25
src/main.rs
···1mod api;
2mod backfill;
3-mod buffer;
4mod config;
5mod crawler;
6mod db;
···10mod state;
11mod types;
1213-use crate::backfill::Worker;
14-use crate::buffer::processor::BufferProcessor;
15use crate::config::Config;
16use crate::crawler::Crawler;
17-use crate::db::Db;
18-use crate::ingest::Ingestor;
19use crate::state::AppState;
020use futures::{future::BoxFuture, FutureExt, TryFutureExt};
21use miette::IntoDiagnostic;
22use mimalloc::MiMalloc;
23use std::sync::atomic::Ordering;
24use std::sync::Arc;
25-use tokio::task::spawn_blocking;
26use tracing::{error, info};
2728#[global_allocator]
···3738 info!("{cfg}");
3940- let (state, backfill_rx, buffer_rx) = AppState::new(&cfg)?;
041 let state = Arc::new(state);
4243 tokio::spawn(
···54 tokio::spawn({
55 let state = state.clone();
56 let timeout = cfg.repo_fetch_timeout;
57- Worker::new(state, backfill_rx, timeout, cfg.backfill_concurrency_limit).run()
58 });
5960- let buffer_processor_task = tokio::spawn({
61 let state = state.clone();
62- BufferProcessor::new(state, buffer_rx).run()
063 });
6465 if let Err(e) = spawn_blocking({
···70 .into_diagnostic()?
71 {
72 error!("failed to queue pending backfills: {e}");
73- Db::check_poisoned_report(&e);
74 }
7576 if let Err(e) = spawn_blocking({
···81 .into_diagnostic()?
82 {
83 error!("failed to queue gone backfills: {e}");
84- Db::check_poisoned_report(&e);
85 }
8687 std::thread::spawn({
···127 loop {
128 std::thread::sleep(persist_interval);
1290130 let seq = state.cur_firehose.load(Ordering::SeqCst);
131- const CURSOR_KEY: &[u8] = b"firehose_cursor";
132- if let Err(e) = state
133- .db
134- .cursors
135- .insert(CURSOR_KEY, seq.to_string().into_bytes())
136- {
137 error!("failed to save cursor: {e}");
138- Db::check_poisoned(&e);
139 }
14000000000141 if let Err(e) = state.db.persist() {
142 error!("db persist failed: {e}");
143- Db::check_poisoned_report(&e);
144 }
145 }
146 }
···152 .run()
153 .inspect_err(|e| {
154 error!("crawler died: {e}");
155- Db::check_poisoned_report(&e);
156 }),
157 );
158 }
159160- let ingestor = Ingestor::new(state.clone(), cfg.relay_host, cfg.full_network);
0161162 let res = futures::future::try_join_all::<[BoxFuture<_>; _]>([
163- Box::pin(buffer_processor_task.map(|r| r.into_diagnostic().flatten())),
0000000164 Box::pin(ingestor.run()),
165 ]);
166 if let Err(e) = res.await {
167 error!("ingestor or buffer processor died: {e}");
168- Db::check_poisoned_report(&e);
169 }
170171 if let Err(e) = state.db.persist() {
172- Db::check_poisoned_report(&e);
173 return Err(e);
174 }
175
···1mod api;
2mod backfill;
03mod config;
4mod crawler;
5mod db;
···9mod state;
10mod types;
110012use crate::config::Config;
13use crate::crawler::Crawler;
14+use crate::db::set_firehose_cursor;
15+use crate::ingest::firehose::FirehoseIngestor;
16use crate::state::AppState;
17+use crate::{backfill::BackfillWorker, ingest::worker::FirehoseWorker};
18use futures::{future::BoxFuture, FutureExt, TryFutureExt};
19use miette::IntoDiagnostic;
20use mimalloc::MiMalloc;
21use std::sync::atomic::Ordering;
22use std::sync::Arc;
23+use tokio::{sync::mpsc, task::spawn_blocking};
24use tracing::{error, info};
2526#[global_allocator]
···3536 info!("{cfg}");
3738+ let (state, backfill_rx) = AppState::new(&cfg)?;
39+ let (buffer_tx, buffer_rx) = mpsc::unbounded_channel();
40 let state = Arc::new(state);
4142 tokio::spawn(
···53 tokio::spawn({
54 let state = state.clone();
55 let timeout = cfg.repo_fetch_timeout;
56+ BackfillWorker::new(state, backfill_rx, timeout, cfg.backfill_concurrency_limit).run()
57 });
5859+ let firehose_worker = std::thread::spawn({
60 let state = state.clone();
61+ let handle = tokio::runtime::Handle::current();
62+ move || FirehoseWorker::new(state, buffer_rx).run(handle)
63 });
6465 if let Err(e) = spawn_blocking({
···70 .into_diagnostic()?
71 {
72 error!("failed to queue pending backfills: {e}");
73+ db::check_poisoned_report(&e);
74 }
7576 if let Err(e) = spawn_blocking({
···81 .into_diagnostic()?
82 {
83 error!("failed to queue gone backfills: {e}");
84+ db::check_poisoned_report(&e);
85 }
8687 std::thread::spawn({
···127 loop {
128 std::thread::sleep(persist_interval);
129130+ // persist firehose cursor
131 let seq = state.cur_firehose.load(Ordering::SeqCst);
132+ if let Err(e) = set_firehose_cursor(&state.db, seq) {
00000133 error!("failed to save cursor: {e}");
134+ db::check_poisoned_report(&e);
135 }
136137+ // persist counts
138+ // TODO: make this more durable
139+ if let Err(e) = db::persist_counts(&state.db) {
140+ error!("failed to persist counts: {e}");
141+ db::check_poisoned_report(&e);
142+ }
143+144+ // persist journal
145 if let Err(e) = state.db.persist() {
146 error!("db persist failed: {e}");
147+ db::check_poisoned_report(&e);
148 }
149 }
150 }
···156 .run()
157 .inspect_err(|e| {
158 error!("crawler died: {e}");
159+ db::check_poisoned_report(&e);
160 }),
161 );
162 }
163164+ let ingestor =
165+ FirehoseIngestor::new(state.clone(), buffer_tx, cfg.relay_host, cfg.full_network);
166167 let res = futures::future::try_join_all::<[BoxFuture<_>; _]>([
168+ Box::pin(
169+ tokio::task::spawn_blocking(move || {
170+ firehose_worker
171+ .join()
172+ .map_err(|e| miette::miette!("buffer processor thread died: {e:?}"))
173+ })
174+ .map(|r| r.into_diagnostic().flatten().flatten()),
175+ ),
176 Box::pin(ingestor.run()),
177 ]);
178 if let Err(e) = res.await {
179 error!("ingestor or buffer processor died: {e}");
180+ db::check_poisoned_report(&e);
181 }
182183 if let Err(e) = state.db.persist() {
184+ db::check_poisoned_report(&e);
185 return Err(e);
186 }
187
+126-99
src/ops.rs
···1-use crate::db::{keys, Db};
2-use crate::types::{AccountEvt, BroadcastEvent, IdentityEvt, MarshallableEvt, StoredEvent};
0000003use jacquard::api::com_atproto::sync::subscribe_repos::Commit;
4use jacquard::cowstr::ToCowStr;
5-use jacquard::IntoStatic;
06use jacquard_repo::car::reader::parse_car_bytes;
7-use miette::{IntoDiagnostic, Result};
8-use smol_str::{SmolStr, ToSmolStr};
9use std::collections::HashMap;
10use std::sync::atomic::Ordering;
11use std::time::Instant;
12use tracing::{debug, trace};
0000000001314// emitting identity is ephemeral
15// we dont replay these, consumers can just fetch identity themselves if they need it
···37 let _ = db.event_tx.send(BroadcastEvent::Ephemeral(marshallable));
38}
3940-pub fn delete_repo(db: &Db, did: &jacquard::types::did::Did) -> Result<()> {
000041 debug!("deleting repo {did}");
42- let mut batch = db.inner.batch();
43 let repo_key = keys::repo_key(did);
4445 // 1. delete from repos, pending, resync
46- batch.remove(&db.repos, repo_key);
47- batch.remove(&db.pending, repo_key);
48- batch.remove(&db.resync, repo_key);
49-50- // 2. delete from buffer (prefix: repo_key + SEP)
51- let mut buffer_prefix = repo_key.to_vec();
52- buffer_prefix.push(keys::SEP);
53- for guard in db.buffer.prefix(&buffer_prefix) {
54- let k = guard.key().into_diagnostic()?;
55- batch.remove(&db.buffer, k);
56- }
5758- // 3. delete from records (prefix: repo_key + SEP)
59- let mut records_prefix = repo_key.to_vec();
60 records_prefix.push(keys::SEP);
61- let mut deleted_count = 0;
62-63 for guard in db.records.prefix(&records_prefix) {
64 let k = guard.key().into_diagnostic()?;
65 batch.remove(&db.records, k);
66- deleted_count += 1;
67 }
6869- // 4. reset collection counts
70 let mut count_prefix = Vec::new();
71 count_prefix.push(b'r');
72 count_prefix.push(keys::SEP);
73- count_prefix.extend_from_slice(keys::did_prefix(did).as_bytes());
74 count_prefix.push(keys::SEP);
7576 for guard in db.counts.prefix(&count_prefix) {
···78 batch.remove(&db.counts, k);
79 }
8081- batch.commit().into_diagnostic()?;
82-83- // update global record count
84- if deleted_count > 0 {
85- tokio::spawn(db.increment_count(keys::count_keyspace_key("records"), -deleted_count));
86- }
87-88 Ok(())
89}
9091-pub fn update_repo_status(
092 db: &Db,
93 did: &jacquard::types::did::Did,
94- status: crate::types::RepoStatus,
95-) -> Result<()> {
96- debug!("updating repo status for {did} to {status:?}");
97- let (updated, batch) =
98- Db::update_repo_state(db.inner.batch(), &db.repos, did, |state, _val| {
99- state.status = status.clone();
100- state.last_updated_at = chrono::Utc::now().timestamp();
101- Ok((true, ()))
102- })?;
103104- if updated.is_some() {
105- batch.commit().into_diagnostic()?;
0000000000000000000000000000000000106 }
107- Ok(())
000000108}
109110-pub fn apply_commit(db: &Db, commit: &Commit<'_>, live: bool) -> Result<()> {
00000111 let did = &commit.repo;
112 debug!("applying commit {} for {did}", &commit.commit);
113···121122 trace!("parsed car for {did} in {:?}", start.elapsed());
123124- let (_, mut batch) = Db::update_repo_state(db.inner.batch(), &db.repos, did, |state, _| {
125- state.rev = commit.rev.as_str().into();
126- state.data = parsed.root.to_smolstr();
127- state.last_updated_at = chrono::Utc::now().timestamp();
128- Ok((true, ()))
129- })?;
130131 // store all blocks in the CAS
132 for (cid, bytes) in &parsed.blocks {
···140 // 2. iterate ops and update records index
141 let mut records_delta = 0;
142 let mut events_count = 0;
143- let mut collection_deltas: HashMap<SmolStr, i64> = HashMap::new();
144145 for op in &commit.ops {
146- let parts: Vec<&str> = op.path.splitn(2, '/').collect();
147- if parts.len() != 2 {
148- continue;
149- }
150- let collection = parts[0];
151- let rkey = parts[1];
152-153 let db_key = keys::record_key(did, collection, rkey);
154155 let event_id = db.next_event_id.fetch_add(1, Ordering::SeqCst);
156157- let mut cid_str = None;
158-159 match op.action.as_str() {
160 "create" | "update" => {
161 let Some(cid) = &op.cid else {
···163 };
164 let s = smol_str::SmolStr::from(cid.as_str());
165 batch.insert(&db.records, db_key, s.as_bytes().to_vec());
166- cid_str = Some(s);
167168 // accumulate counts
169 if op.action.as_str() == "create" {
170 records_delta += 1;
171- *collection_deltas
172- .entry(collection.to_smolstr())
173- .or_default() += 1;
174 }
175 }
176 "delete" => {
···178179 // accumulate counts
180 records_delta -= 1;
181- *collection_deltas
182- .entry(collection.to_smolstr())
183- .or_default() -= 1;
184 }
185 _ => {}
186 }
187188- let evt = StoredEvent::Record {
189- live,
190- did: did.clone().into_static(),
191- rev: commit.rev.as_str().into(),
192- collection: collection.into(),
193- rkey: rkey.into(),
194- action: op.action.as_str().into(),
195- cid: cid_str,
196 };
197198 let bytes = rmp_serde::to_vec(&evt).into_diagnostic()?;
199- batch.insert(&db.events, keys::event_key(event_id as i64), bytes);
200 events_count += 1;
201 }
202203 let start = Instant::now();
204205- batch.commit().into_diagnostic()?;
206 trace!("committed sync batch for {did} in {:?}", start.elapsed());
2070208 let blocks_count = parsed.blocks.len() as i64;
209- tokio::spawn({
210- let blocks_fut = (blocks_count > 0)
211- .then(|| db.increment_count(keys::count_keyspace_key("blocks"), blocks_count));
212- let records_fut = (records_delta != 0)
213- .then(|| db.increment_count(keys::count_keyspace_key("records"), records_delta));
214- let events_fut = (events_count > 0)
215- .then(|| db.increment_count(keys::count_keyspace_key("events"), events_count));
216- let collections_fut = collection_deltas
217- .into_iter()
218- .map(|(col, delta)| db.increment_count(keys::count_collection_key(&did, &col), delta))
219- .collect::<Vec<_>>();
220- futures::future::join_all(
221- blocks_fut
222- .into_iter()
223- .chain(records_fut)
224- .chain(events_fut)
225- .chain(collections_fut),
226- )
227- });
228229 let _ = db.event_tx.send(BroadcastEvent::Persisted(
230 db.next_event_id.load(Ordering::SeqCst) - 1,
231 ));
232233- Ok(())
00000000000000000234}
···1+use crate::db::types::TrimmedDid;
2+use crate::db::{self, keys, ser_repo_state, Db};
3+use crate::state::AppState;
4+use crate::types::{
5+ AccountEvt, BroadcastEvent, IdentityEvt, MarshallableEvt, RepoState, RepoStatus, ResyncState,
6+ StoredEvent,
7+};
8+use fjall::OwnedWriteBatch;
9use jacquard::api::com_atproto::sync::subscribe_repos::Commit;
10use jacquard::cowstr::ToCowStr;
11+use jacquard::types::cid::Cid;
12+use jacquard::CowStr;
13use jacquard_repo::car::reader::parse_car_bytes;
14+use miette::{Context, IntoDiagnostic, Result};
015use std::collections::HashMap;
16use std::sync::atomic::Ordering;
17use std::time::Instant;
18use tracing::{debug, trace};
19+20+pub fn send_backfill_req(state: &AppState, did: jacquard::types::did::Did<'static>) -> Result<()> {
21+ state
22+ .backfill_tx
23+ .send(did.clone())
24+ .map_err(|_| miette::miette!("failed to send backfill request for {did}"))?;
25+ let _ = state.blocked_dids.insert_sync(did);
26+ Ok(())
27+}
2829// emitting identity is ephemeral
30// we dont replay these, consumers can just fetch identity themselves if they need it
···52 let _ = db.event_tx.send(BroadcastEvent::Ephemeral(marshallable));
53}
5455+pub fn delete_repo<'batch>(
56+ batch: &'batch mut OwnedWriteBatch,
57+ db: &Db,
58+ did: &jacquard::types::did::Did,
59+) -> Result<()> {
60 debug!("deleting repo {did}");
061 let repo_key = keys::repo_key(did);
6263 // 1. delete from repos, pending, resync
64+ batch.remove(&db.repos, &repo_key);
65+ batch.remove(&db.pending, &repo_key);
66+ batch.remove(&db.resync, &repo_key);
000000006768+ // 2. delete from records (prefix: repo_key + SEP)
69+ let mut records_prefix = repo_key.as_bytes().to_vec();
70 records_prefix.push(keys::SEP);
0071 for guard in db.records.prefix(&records_prefix) {
72 let k = guard.key().into_diagnostic()?;
73 batch.remove(&db.records, k);
074 }
7576+ // 3. reset collection counts
77 let mut count_prefix = Vec::new();
78 count_prefix.push(b'r');
79 count_prefix.push(keys::SEP);
80+ count_prefix.extend_from_slice(TrimmedDid::from(did).as_bytes());
81 count_prefix.push(keys::SEP);
8283 for guard in db.counts.prefix(&count_prefix) {
···85 batch.remove(&db.counts, k);
86 }
87000000088 Ok(())
89}
9091+pub fn update_repo_status<'batch, 's>(
92+ batch: &'batch mut OwnedWriteBatch,
93 db: &Db,
94 did: &jacquard::types::did::Did,
95+ mut repo_state: RepoState<'s>,
96+ new_status: RepoStatus,
97+) -> Result<RepoState<'s>> {
98+ debug!("updating repo status for {did} to {new_status:?}");
0000099100+ let key = keys::repo_key(did);
101+102+ // manage queues
103+ match &new_status {
104+ RepoStatus::Synced => {
105+ batch.remove(&db.pending, &key);
106+ batch.remove(&db.resync, &key);
107+ }
108+ RepoStatus::Backfilling => {
109+ batch.insert(&db.pending, &key, &[]);
110+ batch.remove(&db.resync, &key);
111+ }
112+ RepoStatus::Error(msg) => {
113+ batch.remove(&db.pending, &key);
114+ let resync_state = ResyncState::Error {
115+ message: msg.clone(),
116+ retry_count: 0,
117+ next_retry: chrono::Utc::now().timestamp(),
118+ };
119+ batch.insert(
120+ &db.resync,
121+ &key,
122+ rmp_serde::to_vec(&resync_state).into_diagnostic()?,
123+ );
124+ }
125+ RepoStatus::Deactivated | RepoStatus::Takendown | RepoStatus::Suspended => {
126+ batch.remove(&db.pending, &key);
127+ let resync_state = ResyncState::Gone {
128+ status: new_status.clone(),
129+ };
130+ batch.insert(
131+ &db.resync,
132+ &key,
133+ rmp_serde::to_vec(&resync_state).into_diagnostic()?,
134+ );
135+ }
136 }
137+138+ repo_state.status = new_status;
139+ repo_state.last_updated_at = chrono::Utc::now().timestamp();
140+141+ batch.insert(&db.repos, &key, ser_repo_state(&repo_state)?);
142+143+ Ok(repo_state)
144}
145146+pub fn apply_commit<'batch, 'db>(
147+ batch: &'batch mut OwnedWriteBatch,
148+ db: &'db Db,
149+ mut repo_state: RepoState,
150+ commit: &Commit<'_>,
151+) -> Result<impl FnOnce() + use<'db>> {
152 let did = &commit.repo;
153 debug!("applying commit {} for {did}", &commit.commit);
154···162163 trace!("parsed car for {did} in {:?}", start.elapsed());
164165+ repo_state.rev = Some(commit.rev.clone());
166+ repo_state.data = Some(Cid::ipld(parsed.root));
167+ repo_state.last_updated_at = chrono::Utc::now().timestamp();
168+169+ batch.insert(&db.repos, keys::repo_key(did), ser_repo_state(&repo_state)?);
0170171 // store all blocks in the CAS
172 for (cid, bytes) in &parsed.blocks {
···180 // 2. iterate ops and update records index
181 let mut records_delta = 0;
182 let mut events_count = 0;
183+ let mut collection_deltas: HashMap<&str, i64> = HashMap::new();
184185 for op in &commit.ops {
186+ let (collection, rkey) = parse_path(&op.path)?;
000000187 let db_key = keys::record_key(did, collection, rkey);
188189 let event_id = db.next_event_id.fetch_add(1, Ordering::SeqCst);
19000191 match op.action.as_str() {
192 "create" | "update" => {
193 let Some(cid) = &op.cid else {
···195 };
196 let s = smol_str::SmolStr::from(cid.as_str());
197 batch.insert(&db.records, db_key, s.as_bytes().to_vec());
0198199 // accumulate counts
200 if op.action.as_str() == "create" {
201 records_delta += 1;
202+ *collection_deltas.entry(collection).or_default() += 1;
00203 }
204 }
205 "delete" => {
···207208 // accumulate counts
209 records_delta -= 1;
210+ *collection_deltas.entry(collection).or_default() -= 1;
00211 }
212 _ => {}
213 }
214215+ let evt = StoredEvent {
216+ did: TrimmedDid::from(did),
217+ rev: CowStr::Borrowed(commit.rev.as_str()),
218+ collection: CowStr::Borrowed(collection),
219+ rkey: CowStr::Borrowed(rkey),
220+ action: CowStr::Borrowed(op.action.as_str()),
221+ cid: op.cid.as_ref().map(|c| c.0.clone()),
0222 };
223224 let bytes = rmp_serde::to_vec(&evt).into_diagnostic()?;
225+ batch.insert(&db.events, keys::event_key(event_id), bytes);
226 events_count += 1;
227 }
228229 let start = Instant::now();
2300231 trace!("committed sync batch for {did} in {:?}", start.elapsed());
232233+ // update counts
234 let blocks_count = parsed.blocks.len() as i64;
235+ for (col, delta) in collection_deltas {
236+ db::update_record_count(batch, db, did, col, delta)?;
237+ }
0000000000000000238239 let _ = db.event_tx.send(BroadcastEvent::Persisted(
240 db.next_event_id.load(Ordering::SeqCst) - 1,
241 ));
242243+ Ok(move || {
244+ if blocks_count > 0 {
245+ db.update_count("blocks", blocks_count);
246+ }
247+ if records_delta != 0 {
248+ db.update_count("records", records_delta);
249+ }
250+ if events_count > 0 {
251+ db.update_count("events", events_count);
252+ }
253+ })
254+}
255+256+pub fn parse_path(path: &str) -> Result<(&str, &str)> {
257+ let mut parts = path.splitn(2, '/');
258+ let collection = parts.next().wrap_err("missing collection")?;
259+ let rkey = parts.next().wrap_err("missing rkey")?;
260+ Ok((collection, rkey))
261}