···1010use rand::RngExt;
1111use rand::rngs::SmallRng;
1212use reqwest::StatusCode;
1313+use serde::{Deserialize, Serialize};
1314use smol_str::SmolStr;
1415use std::future::Future;
1516use std::ops::Mul;
···376377 )
377378}
378379380380+#[derive(Debug, Serialize, Deserialize)]
381381+enum Cursor {
382382+ Done,
383383+ Next(Option<SmolStr>),
384384+}
385385+379386pub mod throttle;
380380-use throttle::{OrThrottle, Throttler};
387387+use throttle::{OrFailure, Throttler};
381388382389pub struct Crawler {
383390 state: Arc<AppState>,
···505512 let db = &crawler.state.db;
506513507514 let cursor_key = b"crawler_cursor";
508508- let mut cursor: Option<SmolStr> = Db::get(db.cursors.clone(), cursor_key.to_vec())
509509- .await?
510510- .map(|bytes| {
511511- let s = String::from_utf8_lossy(&bytes);
512512- info!(cursor = %s, "resuming");
513513- s.into()
514514- });
515515+ let cursor_bytes = Db::get(db.cursors.clone(), cursor_key.to_vec()).await?;
516516+ let mut cursor: Cursor = cursor_bytes
517517+ .as_deref()
518518+ .map(rmp_serde::from_slice)
519519+ .transpose()
520520+ .into_diagnostic()?
521521+ .unwrap_or(Cursor::Next(None));
515522 let mut was_throttled = false;
516523517524 loop {
···569576 list_repos_url
570577 .query_pairs_mut()
571578 .append_pair("limit", "1000");
572572- if let Some(c) = &cursor {
579579+ if let Cursor::Next(Some(c)) = &cursor {
573580 list_repos_url
574581 .query_pairs_mut()
575582 .append_pair("cursor", c.as_str());
···664671 }
665672666673 if let Some(new_cursor) = output.cursor {
667667- cursor = Some(new_cursor.as_str().into());
674674+ cursor = Cursor::Next(Some(new_cursor.as_str().into()));
668675 batch.insert(
669676 &db.cursors,
670677 cursor_key.to_vec(),
···672679 );
673680 } else {
674681 info!("reached end of list.");
675675- cursor = None;
682682+ cursor = Cursor::Done;
676683 }
677684678685 tokio::task::spawn_blocking(move || batch.commit().into_diagnostic())
···681688682689 crawler.account_new_repos(to_queue.len()).await;
683690684684- if cursor.is_none() {
691691+ if matches!(cursor, Cursor::Done) {
685692 tokio::time::sleep(Duration::from_secs(3600)).await;
686693 }
687694 }
···699706700707 let mut rng: SmallRng = rand::make_rng();
701708709709+ let mut batch = db.inner.batch();
702710 for guard in db.crawler.prefix(keys::CRAWLER_RETRY_PREFIX) {
703711 let (key, val) = guard.into_inner().into_diagnostic()?;
704704- let (retry_after, _) = keys::crawler_retry_parse_value(&val)?;
712712+ let (retry_after, _) = match keys::crawler_retry_parse_value(&val) {
713713+ Ok(x) => x,
714714+ Err(_) => {
715715+ // this handles the old db format
716716+ // todo: remove this later!! its just for testing...
717717+ let retry_after = now + 60 * 5;
718718+ batch.insert(
719719+ &db.crawler,
720720+ key.clone(),
721721+ keys::crawler_retry_value(retry_after, 0),
722722+ );
723723+ (retry_after, 0)
724724+ }
725725+ };
705726 let did = keys::crawler_retry_parse_key(&key)?.to_did();
706727707728 // we check an extra backoff of 1 - 7% just to make it less likely for
···727748 info!(count = ready.len(), "retrying pending repos");
728749729750 let handle = tokio::runtime::Handle::current();
730730- let mut batch = db.inner.batch();
731751 let filter = self.state.filter.load();
732752 let valid_dids = handle.block_on(self.check_signals_batch(&ready, &filter, &mut batch))?;
733753
+3-6
src/crawler/throttle.rs
···148148 }
149149}
150150151151-/// extension trait that adds `.or_throttle()` to any future returning `Result<T, E>`.
152152-///
153153-/// races the future against a hard-failure notification. soft ratelimits (429) do NOT
154154-/// trigger cancellation — those are handled by the background retry loop.
151151+/// adds a method for racing the future against a hard-failure notification.
155152#[allow(async_fn_in_trait)]
156156-pub trait OrThrottle<T, E>: Future<Output = Result<T, E>> {
153153+pub trait OrFailure<T, E>: Future<Output = Result<T, E>> {
157154 async fn or_failure(
158155 self,
159156 handle: &ThrottleHandle,
···169166 }
170167}
171168172172-impl<T, E, F: Future<Output = Result<T, E>>> OrThrottle<T, E> for F {}
169169+impl<T, E, F: Future<Output = Result<T, E>>> OrFailure<T, E> for F {}