tangled
alpha
login
or
join now
ptr.pet
/
hydrant
24
fork
atom
at protocol indexer with flexible filtering, xrpc queries, and a cursor-backed event stream, built on fjall
at-protocol
atproto
indexer
rust
fjall
24
fork
atom
overview
issues
6
pulls
pipelines
[crawler] fix cursor resetting to Next(None)
ptr.pet
2 days ago
1e32f8a6
80f5c529
verified
This commit was signed with the committer's
known signature
.
ptr.pet
SSH Key Fingerprint:
SHA256:Abmvag+juovVufZTxyWY8KcVgrznxvBjQpJesv071Aw=
+22
-15
1 changed file
expand all
collapse all
unified
split
src
crawler
mod.rs
+22
-15
src/crawler/mod.rs
···
14
use rand::rngs::SmallRng;
15
use reqwest::StatusCode;
16
use serde::{Deserialize, Serialize};
17
-
use smol_str::{SmolStr, ToSmolStr};
18
use std::collections::HashMap;
19
use std::ops::{Add, Mul, Sub};
20
use std::sync::Arc;
···
143
144
#[derive(Debug, Serialize, Deserialize)]
145
enum Cursor {
146
-
Done,
147
Next(Option<SmolStr>),
148
}
149
···
235
let cursor = Self::get_cursor(&crawler).await.map_or_else(
236
|e| e.to_smolstr(),
237
|c| match c {
238
-
Cursor::Done => "done".to_smolstr(),
239
Cursor::Next(None) => "none".to_smolstr(),
240
Cursor::Next(Some(c)) => c.to_smolstr(),
241
},
···
299
300
let mut cursor = Self::get_cursor(&crawler).await?;
301
302
-
match cursor {
303
-
Cursor::Next(Some(ref cursor)) => info!(cursor = %cursor, "resuming"),
304
Cursor::Next(None) => info!("starting from scratch"),
305
-
Cursor::Done => info!("was done, resuming"),
306
}
307
308
let mut was_throttled = false;
···
473
miette::miette!("spawn_blocking task for parsing listRepos timed out")
474
})?;
475
476
-
let Ok(Some(ParseResult {
477
unknown_dids,
478
cursor: next_cursor,
479
count,
480
-
})) = parse_result
481
-
else {
482
-
info!("finished enumeration (or empty page)");
483
-
tokio::time::sleep(Duration::from_secs(3600)).await;
484
-
continue;
0
0
0
0
0
0
0
485
};
486
487
debug!(count, "fetched repos");
···
508
509
if let Some(new_cursor) = next_cursor {
510
cursor = Cursor::Next(Some(new_cursor.as_str().into()));
511
-
} else {
512
info!("reached end of list.");
513
-
cursor = Cursor::Done;
514
}
515
batch.insert(
516
&db.cursors,
···
540
541
crawler.account_new_repos(to_queue.len()).await;
542
543
-
if matches!(cursor, Cursor::Done) {
544
tokio::time::sleep(Duration::from_secs(3600)).await;
545
}
546
}
···
14
use rand::rngs::SmallRng;
15
use reqwest::StatusCode;
16
use serde::{Deserialize, Serialize};
17
+
use smol_str::{SmolStr, ToSmolStr, format_smolstr};
18
use std::collections::HashMap;
19
use std::ops::{Add, Mul, Sub};
20
use std::sync::Arc;
···
143
144
#[derive(Debug, Serialize, Deserialize)]
145
enum Cursor {
146
+
Done(SmolStr),
147
Next(Option<SmolStr>),
148
}
149
···
235
let cursor = Self::get_cursor(&crawler).await.map_or_else(
236
|e| e.to_smolstr(),
237
|c| match c {
238
+
Cursor::Done(c) => format_smolstr!("done({c})"),
239
Cursor::Next(None) => "none".to_smolstr(),
240
Cursor::Next(Some(c)) => c.to_smolstr(),
241
},
···
299
300
let mut cursor = Self::get_cursor(&crawler).await?;
301
302
+
match &cursor {
303
+
Cursor::Next(Some(c)) => info!(cursor = %c, "resuming"),
304
Cursor::Next(None) => info!("starting from scratch"),
305
+
Cursor::Done(c) => info!(cursor = %c, "was done, resuming"),
306
}
307
308
let mut was_throttled = false;
···
473
miette::miette!("spawn_blocking task for parsing listRepos timed out")
474
})?;
475
476
+
let ParseResult {
477
unknown_dids,
478
cursor: next_cursor,
479
count,
480
+
} = match parse_result {
481
+
Ok(Some(res)) => res,
482
+
Ok(None) => {
483
+
info!("finished enumeration (or empty page)");
484
+
if let Cursor::Next(Some(c)) = cursor {
485
+
info!("reached end of list.");
486
+
cursor = Cursor::Done(c);
487
+
}
488
+
tokio::time::sleep(Duration::from_secs(3600)).await;
489
+
continue;
490
+
}
491
+
Err(e) => return Err(e).wrap_err("error while crawling"),
492
};
493
494
debug!(count, "fetched repos");
···
515
516
if let Some(new_cursor) = next_cursor {
517
cursor = Cursor::Next(Some(new_cursor.as_str().into()));
518
+
} else if let Cursor::Next(Some(c)) = cursor {
519
info!("reached end of list.");
520
+
cursor = Cursor::Done(c);
521
}
522
batch.insert(
523
&db.cursors,
···
547
548
crawler.account_new_repos(to_queue.len()).await;
549
550
+
if matches!(cursor, Cursor::Done(_)) {
551
tokio::time::sleep(Duration::from_secs(3600)).await;
552
}
553
}