tangled
alpha
login
or
join now
parakeet.at
/
parakeet
63
fork
atom
Parakeet is a Rust-based Bluesky AppServer aiming to implement most of the functionality required to support the Bluesky client
appview
atproto
bluesky
rust
appserver
63
fork
atom
overview
issues
12
pulls
pipelines
feat: backfill spidering
mia.omg.lol
2 weeks ago
8f8bf0f0
924b6f6a
verified
This commit was signed with the committer's
known signature
.
mia.omg.lol
SSH Key Fingerprint:
SHA256:eb+NhC0QEl+XKRuFP/97oH6LEz0TXTKPXGDIAI5y7CQ=
+89
-7
4 changed files
expand all
collapse all
unified
split
crates
consumer
src
backfill
mod.rs
utils.rs
parakeet
src
xrpc
at_parakeet
admin.rs
mod.rs
+12
-7
crates/consumer/src/backfill/mod.rs
···
25
25
mod repo;
26
26
mod utils;
27
27
28
28
+
const BF_QUEUE: &str = "backfill_queue";
28
29
const DL_DUP_KEY: &str = "bf_completed";
29
30
// There's a 4MiB limit on parakeet-index, so break delta batches up if there's loads.
30
31
// this should be plenty low enough to not trigger the size limit. (59k did slightly)
···
80
81
break;
81
82
}
82
83
83
83
-
let did: String = match self.redis.lpop("backfill_queue", None).await {
84
84
+
let did: String = match self.redis.lpop(BF_QUEUE, None).await {
84
85
Ok(Some(did)) => did,
85
86
Ok(None) => {
86
87
tokio::time::sleep(Duration::from_millis(250)).await;
···
96
97
97
98
let inner = self.inner.clone();
98
99
let mut conn = self.pool.get().await?;
99
99
-
let rc = self.redis.clone();
100
100
+
let mut rc = self.redis.clone();
100
101
101
102
tracker.spawn(async move {
102
103
let _p = p;
103
104
tracing::trace!("backfilling {did}");
104
105
105
105
-
if let Err(e) = do_actor_backfill(&mut conn, rc, inner, &did).await {
106
106
+
if let Err(e) = do_actor_backfill(&mut conn, &mut rc, inner, &did).await {
106
107
tracing::error!(did, "backfill failed: {e}");
107
108
counter!("backfill_failure").increment(1);
108
109
} else {
···
111
112
db::backfill_job_write(&mut conn, &did, "successful")
112
113
.await
113
114
.unwrap();
115
115
+
116
116
+
if let Err(e) = utils::handle_spider(&mut conn, &mut rc, &did).await {
117
117
+
tracing::error!("failed to trigger spider for {did}: {e}");
118
118
+
}
114
119
}
115
120
});
116
121
}
···
123
128
124
129
async fn do_actor_backfill(
125
130
conn: &mut Object,
126
126
-
mut rc: MultiplexedConnection,
131
131
+
rc: &mut MultiplexedConnection,
127
132
mut inner: BackfillManagerInner,
128
133
did: &str,
129
134
) -> eyre::Result<()> {
···
191
196
}
192
197
}
193
198
194
194
-
utils::enforce_ratelimit(&mut rc, &pds).await?;
199
199
+
utils::enforce_ratelimit(rc, &pds).await?;
195
200
196
196
-
match backfill_repo(conn, &mut rc, &mut inner, &pds, did).await {
201
201
+
match backfill_repo(conn, rc, &mut inner, &pds, did).await {
197
202
Ok(Some((rem, reset))) => {
198
203
let _ = rc.zadd(utils::BF_REM_KEY, &pds, rem).await;
199
204
let _ = rc.zadd(utils::BF_RESET_KEY, &pds, reset).await;
···
203
208
pds,
204
209
"got response with no ratelimit headers, using defaults."
205
210
);
206
206
-
utils::handle_default_ratelimit(&mut rc, &pds).await?;
211
211
+
utils::handle_default_ratelimit(rc, &pds).await?;
207
212
}
208
213
Err(e) => {
209
214
tracing::error!(did, "backfill failed: {e}");
+41
crates/consumer/src/backfill/utils.rs
···
9
9
use std::time::Duration;
10
10
use tracing::instrument;
11
11
12
12
+
const SPIDER_KEY: &str = "bf_spider";
12
13
pub const BF_RESET_KEY: &str = "bf_ratelimit_reset";
13
14
pub const BF_REM_KEY: &str = "bf_ratelimit_rem";
14
15
const BF_REM_DEFAULT: i32 = 1000;
16
16
+
17
17
+
pub async fn handle_spider(
18
18
+
conn: &mut Object,
19
19
+
rc: &mut MultiplexedConnection,
20
20
+
did: &str,
21
21
+
) -> eyre::Result<()> {
22
22
+
let Some(spider_count) =
23
23
+
redis::AsyncCommands::hget::<_, _, Option<i32>>(rc, SPIDER_KEY, did).await?
24
24
+
else {
25
25
+
return Ok(());
26
26
+
};
27
27
+
rc.hdel(SPIDER_KEY, did).await?;
28
28
+
29
29
+
let new_count = spider_count - 1;
30
30
+
31
31
+
let follows = conn
32
32
+
.query("SELECT subject FROM follows WHERE did=$1", &[&did])
33
33
+
.await?;
34
34
+
if follows.is_empty() {
35
35
+
return Ok(());
36
36
+
}
37
37
+
38
38
+
let follows = follows.iter().map(|v| v.get::<_, String>(0));
39
39
+
40
40
+
let items = follows
41
41
+
.clone()
42
42
+
.map(|follow| (follow, new_count))
43
43
+
.collect::<Vec<_>>();
44
44
+
45
45
+
if new_count > 0 {
46
46
+
// write all the new accounts
47
47
+
rc.hset_multiple(SPIDER_KEY, &items).await.unwrap();
48
48
+
}
49
49
+
50
50
+
// and then to backfill
51
51
+
let follows = follows.collect::<Vec<_>>();
52
52
+
rc.rpush(super::BF_QUEUE, &follows).await.unwrap();
53
53
+
54
54
+
Ok(())
55
55
+
}
15
56
16
57
#[derive(Debug, Deserialize)]
17
58
pub struct GetRepoStatusRes {
+35
crates/parakeet/src/xrpc/at_parakeet/admin.rs
···
53
53
54
54
Ok(())
55
55
}
56
56
+
57
57
+
#[derive(Debug, Deserialize)]
58
58
+
pub struct RequestSpiderReq {
59
59
+
pub depth: i32,
60
60
+
pub dids: Vec<String>,
61
61
+
}
62
62
+
63
63
+
pub async fn request_spider(
64
64
+
State(mut state): State<GlobalState>,
65
65
+
auth: AtpAuth,
66
66
+
Json(form): Json<RequestSpiderReq>,
67
67
+
) -> XrpcResult<()> {
68
68
+
if !check_admin_did(&state, &auth.0) {
69
69
+
return Err(Error::new(StatusCode::FORBIDDEN, "Forbidden", None));
70
70
+
}
71
71
+
72
72
+
let items = form
73
73
+
.dids
74
74
+
.iter()
75
75
+
.clone()
76
76
+
.map(|did| (did, form.depth))
77
77
+
.collect::<Vec<_>>();
78
78
+
79
79
+
if let Err(e) = state.redis_mp.hset_multiple("bf_spider", &items).await {
80
80
+
tracing::error!("failed to push to spider store: {e}");
81
81
+
return Err(Error::server_error(None));
82
82
+
}
83
83
+
84
84
+
if let Err(e) = state.redis_mp.rpush(BACKFILL_QUEUE, form.dids).await {
85
85
+
tracing::error!("failed to push to backfill queue: {e}");
86
86
+
return Err(Error::server_error(None));
87
87
+
}
88
88
+
89
89
+
Ok(())
90
90
+
}
+1
crates/parakeet/src/xrpc/at_parakeet/mod.rs
···
8
8
Router::new()
9
9
.route("/at.parakeet.admin.backfillQueueSize", get(admin::backfill_queue_size))
10
10
.route("/at.parakeet.admin.requestBackfill", post(admin::request_backfill))
11
11
+
.route("/at.parakeet.admin.requestSpider", post(admin::request_spider))
11
12
}
12
13
13
14
pub fn check_admin_did(state: &crate::GlobalState, did: &String) -> bool {