at protocol indexer with flexible filtering, xrpc queries, and a cursor-backed event stream, built on fjall
at-protocol atproto indexer rust fjall

[backfill] respect signals, excludes, collections filters properly

ptr.pet 7d3090b1 e2867db2

verified
+49 -7
+49 -7
src/backfill/mod.rs
··· 1 1 use crate::db::types::{DbAction, DbRkey, DbTid, TrimmedDid}; 2 2 use crate::db::{self, Db, keys, ser_repo_state}; 3 + use crate::filter::FilterMode; 3 4 use crate::ops; 4 5 use crate::resolver::ResolverError; 5 6 use crate::state::AppState; ··· 163 164 let db = &state.db; 164 165 165 166 match process_did(&state, &http, &did, verify_signatures).await { 166 - Ok(previous_state) => { 167 + Ok(Some(previous_state)) => { 167 168 let did_key = keys::repo_key(&did); 168 169 169 170 // determine old gauge state ··· 227 228 } 228 229 Ok(()) 229 230 } 231 + Ok(None) => { 232 + // signal mode: repo had no matching records, was cleaned up by process_did 233 + state.db.update_count_async("repos", -1).await; 234 + state.db.update_count_async("pending", -1).await; 235 + Ok(()) 236 + } 230 237 Err(e) => { 231 238 match &e { 232 239 BackfillError::Ratelimited => { ··· 371 378 http: &reqwest::Client, 372 379 did: &Did<'static>, 373 380 verify_signatures: bool, 374 - ) -> Result<RepoState<'static>, BackfillError> { 381 + ) -> Result<Option<RepoState<'static>>, BackfillError> { 375 382 debug!("backfilling {}", did); 376 383 377 384 let db = &app_state.db; ··· 427 434 let mut batch = db.inner.batch(); 428 435 ops::delete_repo(&mut batch, db, did, state)?; 429 436 batch.commit().into_diagnostic()?; 430 - return Ok(previous_state); // stop backfill 437 + return Ok(Some(previous_state)); // stop backfill 431 438 } 432 439 433 440 let inactive_status = match e { ··· 458 465 .await?; 459 466 460 467 // return success so wrapper stops retrying 461 - return Ok(previous_state); 468 + return Ok(Some(previous_state)); 462 469 } 463 470 464 471 Err(e).into_diagnostic()? ··· 520 527 521 528 // 6. insert records into db 522 529 let start = Instant::now(); 523 - let (_state, records_cnt_delta, added_blocks, count) = { 530 + let result = { 524 531 let app_state = app_state.clone(); 525 532 let did = did.clone(); 526 533 let rev = root_commit.rev; 527 534 528 535 tokio::task::spawn_blocking(move || { 536 + let filter = app_state.filter.load(); 529 537 let mut count = 0; 530 538 let mut delta = 0; 531 539 let mut added_blocks = 0; ··· 561 569 existing_cids.insert((collection.into(), rkey), cid); 562 570 } 563 571 572 + let mut signal_seen = filter.mode != FilterMode::Signal; 573 + 564 574 for (key, cid) in leaves { 565 575 let val_bytes = tokio::runtime::Handle::current() 566 576 .block_on(store.get(&cid)) ··· 568 578 569 579 if let Some(val) = val_bytes { 570 580 let (collection, rkey) = ops::parse_path(&key)?; 581 + 582 + if !filter.matches_collection(collection) { 583 + continue; 584 + } 585 + 586 + if !signal_seen && filter.matches_signal(collection) { 587 + signal_seen = true; 588 + } 589 + 571 590 let rkey = DbRkey::new(rkey); 572 591 let path = (collection.to_smolstr(), rkey.clone()); 573 592 let cid_obj = Cid::ipld(cid); ··· 639 658 count += 1; 640 659 } 641 660 661 + if !signal_seen { 662 + return Ok::<_, miette::Report>(None); 663 + } 664 + 642 665 // 6. update data, status is updated in worker shard 643 666 state.rev = Some((&rev).into()); 644 667 state.data = Some(root_commit.data); ··· 657 680 658 681 batch.commit().into_diagnostic()?; 659 682 660 - Ok::<_, miette::Report>((state, delta, added_blocks, count)) 683 + Ok::<_, miette::Report>(Some((state, delta, added_blocks, count))) 661 684 }) 662 685 .await 663 686 .into_diagnostic()?? 664 687 }; 688 + 689 + let Some((_state, records_cnt_delta, added_blocks, count)) = result else { 690 + // signal mode: no signal-matching records found — clean up the optimistically-added repo 691 + let did_key = keys::repo_key(did); 692 + let backfill_pending_key = keys::pending_key(previous_state.index_id); 693 + let repos_ks = app_state.db.repos.clone(); 694 + let pending_ks = app_state.db.pending.clone(); 695 + let db_inner = app_state.db.inner.clone(); 696 + tokio::task::spawn_blocking(move || { 697 + let mut batch = db_inner.batch(); 698 + batch.remove(&repos_ks, &did_key); 699 + batch.remove(&pending_ks, backfill_pending_key); 700 + batch.commit().into_diagnostic() 701 + }) 702 + .await 703 + .into_diagnostic()??; 704 + return Ok(None); 705 + }; 706 + 665 707 trace!("did {count} ops for {did} in {:?}", start.elapsed()); 666 708 667 709 // do the counts ··· 685 727 )); 686 728 687 729 trace!("backfill complete for {did}"); 688 - Ok(previous_state) 730 + Ok(Some(previous_state)) 689 731 }