Parakeet is a Rust-based Bluesky AppServer aiming to implement most of the functionality required to support the Bluesky client
appview atproto bluesky rust appserver

fix(consumer): use a mutex when checking status to stop a race

mia.omg.lol 885cb966 637cb01d

verified
+23 -1
+23 -1
crates/consumer/src/backfill/mod.rs
··· 13 13 use redis::aio::MultiplexedConnection; 14 14 use redis::AsyncTypedCommands; 15 15 use reqwest::Client; 16 + use std::collections::HashSet; 16 17 use std::path::PathBuf; 17 18 use std::str::FromStr; 18 19 use std::sync::Arc; 19 20 use std::time::Duration; 20 21 use tokio::sync::watch::Receiver as WatchReceiver; 21 - use tokio::sync::Semaphore; 22 + use tokio::sync::{Mutex, Semaphore}; 22 23 use tokio_util::task::TaskTracker; 23 24 use tracing::instrument; 24 25 ··· 32 33 33 34 #[derive(Clone)] 34 35 pub struct BackfillManagerInner { 36 + // we don't need to store anything, just ensure only one thread in the status check at a time 37 + status_lookup_lock: Arc<Mutex<HashSet<String>>>, 35 38 index_client: Option<parakeet_index::Client>, 36 39 tmp_dir: PathBuf, 37 40 resolver: JacquardResolver, ··· 57 60 58 61 let client = Client::builder().brotli(true).build()?; 59 62 63 + let current_tasks = Arc::new(Mutex::new(HashSet::new())); 64 + 60 65 Ok(BackfillManager { 61 66 pool, 62 67 redis, 63 68 semaphore, 64 69 inner: BackfillManagerInner { 70 + status_lookup_lock: current_tasks, 65 71 index_client, 66 72 tmp_dir: PathBuf::from_str(&opts.download_tmp_dir)?, 67 73 resolver, ··· 131 137 mut inner: BackfillManagerInner, 132 138 did: &str, 133 139 ) -> eyre::Result<()> { 140 + let mut l = inner.status_lookup_lock.lock().await; 141 + 134 142 // has the repo already been downloaded? 143 + if l.contains(did) { 144 + tracing::info!("skipping duplicate repo {did}"); 145 + return Ok(()); 146 + } else { 147 + l.insert(did.to_string()); 148 + } 149 + 135 150 match db::actor_get_statuses(conn, did).await { 136 151 Ok(Some((_, state))) => { 137 152 if state == ActorSyncState::Synced || state == ActorSyncState::Processing { ··· 153 168 &[&did], 154 169 ) 155 170 .await?; 171 + 172 + drop(l); 156 173 157 174 let jd = Did::raw(did); 158 175 let (pds, handle) = match utils::resolve_service(&inner.resolver, &jd).await { ··· 208 225 db::actor_set_sync_status(conn, did, ActorSyncState::Dirty, Utc::now()).await?; 209 226 db::backfill_job_write(conn, did, "failed.write").await?; 210 227 } 228 + } 229 + 230 + { 231 + let mut l = inner.status_lookup_lock.lock().await; 232 + l.remove(did); 211 233 } 212 234 213 235 Ok(())