tangled
alpha
login
or
join now
ptr.pet
/
Allegedly
forked from
microcosm.blue/Allegedly
0
fork
atom
Server tools to backfill, tail, mirror, and verify PLC logs
0
fork
atom
overview
issues
pulls
pipelines
fixup export polling
bad-example.com
6 months ago
a9cd495a
11d75ac7
+164
-55
7 changed files
expand all
collapse all
unified
split
Cargo.lock
Cargo.toml
src
bin
get_backfill_chunk_adsf.rs
main.rs
tail_export.rs
lib.rs
poll.rs
+21
Cargo.lock
···
41
41
"reqwest",
42
42
"serde",
43
43
"serde_json",
44
44
+
"thiserror",
44
45
"tokio",
45
46
"tokio-postgres",
46
47
"url",
···
1758
1759
"once_cell",
1759
1760
"rustix",
1760
1761
"windows-sys 0.61.0",
1762
1762
+
]
1763
1763
+
1764
1764
+
[[package]]
1765
1765
+
name = "thiserror"
1766
1766
+
version = "2.0.16"
1767
1767
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1768
1768
+
checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0"
1769
1769
+
dependencies = [
1770
1770
+
"thiserror-impl",
1771
1771
+
]
1772
1772
+
1773
1773
+
[[package]]
1774
1774
+
name = "thiserror-impl"
1775
1775
+
version = "2.0.16"
1776
1776
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1777
1777
+
checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960"
1778
1778
+
dependencies = [
1779
1779
+
"proc-macro2",
1780
1780
+
"quote",
1781
1781
+
"syn",
1761
1782
]
1762
1783
1763
1784
[[package]]
+1
Cargo.toml
···
16
16
reqwest = { version = "0.12.23", features = ["stream"] }
17
17
serde = "1.0.219"
18
18
serde_json = { version = "1.0.143", features = ["raw_value"] }
19
19
+
thiserror = "2.0.16"
19
20
tokio = { version = "1.47.1", features = ["full"] }
20
21
tokio-postgres = { version = "0.7.13", features = ["with-chrono-0_4", "with-serde_json-1"] }
21
22
url = "2.5.7"
src/bin/blah.rs
src/bin/get_backfill_chunk_adsf.rs
+28
-55
src/bin/main.rs
···
4
4
use tokio_postgres::NoTls;
5
5
use url::Url;
6
6
7
7
-
use allegedly::{ExportPage, week_to_pages};
7
7
+
use allegedly::{ExportPage, poll_upstream, week_to_pages};
8
8
9
9
const EXPORT_PAGE_QUEUE_SIZE: usize = 0; // rendezvous for now
10
10
-
const UPSTREAM_REQUEST_INTERVAL: Duration = Duration::from_millis(500);
11
10
const WEEK_IN_SECONDS: u64 = 7 * 86400;
12
11
13
12
#[derive(Parser)]
···
45
44
46
45
#[derive(Deserialize)]
47
46
#[serde(rename_all = "camelCase")]
48
48
-
struct OpPeek {
49
49
-
pub created_at: chrono::DateTime<chrono::Utc>,
50
50
-
}
51
51
-
52
52
-
#[derive(Deserialize)]
53
53
-
#[serde(rename_all = "camelCase")]
54
47
struct Op<'a> {
55
48
pub did: &'a str,
56
49
pub cid: &'a str,
···
85
78
upstream: Url,
86
79
bulk: (Url, u64),
87
80
tx: flume::Sender<ExportPage>,
88
88
-
latest: Option<chrono::DateTime<chrono::Utc>>,
81
81
+
pg_client: tokio_postgres::Client,
89
82
) {
83
83
+
let latest = get_latest(&pg_client).await;
90
84
let client = reqwest::Client::builder()
91
85
.user_agent(concat!(
92
86
"allegedly v",
···
99
93
if latest.is_none() {
100
94
bulk_backfill(client.clone(), bulk, tx.clone()).await;
101
95
}
102
102
-
103
96
let mut upstream = upstream;
104
97
upstream.set_path("/export");
105
105
-
let mut after = latest;
106
106
-
let mut tick = tokio::time::interval(UPSTREAM_REQUEST_INTERVAL);
107
107
-
108
108
-
loop {
109
109
-
tick.tick().await;
110
110
-
let mut url = upstream.clone();
111
111
-
if let Some(ref after) = after {
112
112
-
url.query_pairs_mut()
113
113
-
.append_pair("after", &after.to_rfc3339());
114
114
-
}
115
115
-
let ops = client
116
116
-
.get(url)
117
117
-
.send()
118
118
-
.await
119
119
-
.unwrap()
120
120
-
.error_for_status()
121
121
-
.unwrap()
122
122
-
.text()
123
123
-
.await
124
124
-
.unwrap()
125
125
-
.trim()
126
126
-
.to_string();
127
127
-
128
128
-
let Some((_, last_line)) = ops.rsplit_once('\n') else {
129
129
-
log::trace!("no ops in response page, nothing to do");
130
130
-
continue;
131
131
-
};
132
132
-
133
133
-
let op: OpPeek = serde_json::from_str(last_line).unwrap();
134
134
-
after = Some(op.created_at);
135
135
-
136
136
-
log::trace!("got some ops until {after:?}, sending them...");
137
137
-
let ops = ops.split('\n').map(Into::into).collect();
138
138
-
tx.send_async(ExportPage { ops }).await.unwrap();
139
139
-
}
98
98
+
poll_upstream(&client, latest, upstream, tx).await.unwrap();
140
99
}
141
100
142
101
async fn write_pages(
···
221
180
Ok(())
222
181
}
223
182
183
183
+
async fn get_latest(pg_client: &tokio_postgres::Client) -> Option<chrono::DateTime<chrono::Utc>> {
184
184
+
pg_client
185
185
+
.query_opt(
186
186
+
r#"SELECT "createdAt" FROM operations
187
187
+
ORDER BY "createdAt" DESC LIMIT 1"#,
188
188
+
&[],
189
189
+
)
190
190
+
.await
191
191
+
.unwrap()
192
192
+
.map(|r| r.get(0))
193
193
+
}
194
194
+
224
195
#[tokio::main]
225
196
async fn main() {
226
197
env_logger::init();
···
241
212
}
242
213
});
243
214
244
244
-
let latest = pg_client
245
245
-
.query_opt(
246
246
-
r#"SELECT "createdAt" FROM operations
247
247
-
ORDER BY "createdAt" DESC LIMIT 1"#,
248
248
-
&[],
249
249
-
)
215
215
+
log::trace!("connecting postgres 2...");
216
216
+
let (pg_client2, connection2) = tokio_postgres::connect(&args.postgres, NoTls)
250
217
.await
251
251
-
.unwrap()
252
252
-
.map(|r| r.get(0));
218
218
+
.unwrap();
253
219
254
254
-
log::info!("connected! latest: {latest:?}");
220
220
+
// send the connection away to do the actual communication work
221
221
+
// TODO: error and shutdown handling
222
222
+
let conn_task2 = tokio::task::spawn(async move {
223
223
+
if let Err(e) = connection2.await {
224
224
+
eprintln!("connection error: {e}");
225
225
+
}
226
226
+
});
255
227
256
228
let (tx, rx) = flume::bounded(EXPORT_PAGE_QUEUE_SIZE);
257
229
···
259
231
args.upstream,
260
232
(args.upstream_bulk, args.bulk_epoch),
261
233
tx,
262
262
-
latest,
234
234
+
pg_client2,
263
235
));
264
236
let writer_task = tokio::task::spawn(write_pages(rx, pg_client));
265
237
266
238
tokio::select! {
267
239
z = conn_task => log::warn!("connection task ended: {z:?}"),
240
240
+
z = conn_task2 => log::warn!("connection task ended: {z:?}"),
268
241
z = export_task => log::warn!("export task ended: {z:?}"),
269
242
z = writer_task => log::warn!("writer task ended: {z:?}"),
270
243
};
+44
src/bin/tail_export.rs
···
1
1
+
use allegedly::OpPeek;
2
2
+
use url::Url;
3
3
+
4
4
+
async fn get_page(client: &reqwest::Client, url: Url) -> Vec<String> {
5
5
+
client
6
6
+
.get(url)
7
7
+
.send()
8
8
+
.await
9
9
+
.unwrap()
10
10
+
.error_for_status()
11
11
+
.unwrap()
12
12
+
.text()
13
13
+
.await
14
14
+
.unwrap()
15
15
+
.trim()
16
16
+
.split('\n')
17
17
+
.map(Into::into)
18
18
+
.collect()
19
19
+
}
20
20
+
21
21
+
#[tokio::main]
22
22
+
async fn main() {
23
23
+
let client = reqwest::Client::builder()
24
24
+
.user_agent(concat!(
25
25
+
"allegedly (export) v",
26
26
+
env!("CARGO_PKG_VERSION"),
27
27
+
" (from @microcosm.blue; contact @bad-example.com)"
28
28
+
))
29
29
+
.build()
30
30
+
.unwrap();
31
31
+
32
32
+
let mut url = Url::parse("https://plc.directory/export").unwrap();
33
33
+
let ops = get_page(&client, url.clone()).await;
34
34
+
35
35
+
println!("first: {:?}", ops.first());
36
36
+
37
37
+
if let Some(last_line) = ops.last() {
38
38
+
let x: OpPeek = serde_json::from_str(last_line).unwrap();
39
39
+
url.query_pairs_mut()
40
40
+
.append_pair("after", &x.created_at.to_rfc3339());
41
41
+
let ops2 = get_page(&client, url).await;
42
42
+
println!("2nd: {:?}", ops2.first());
43
43
+
}
44
44
+
}
+10
src/lib.rs
···
1
1
+
use serde::Deserialize;
2
2
+
1
3
mod backfill;
4
4
+
mod poll;
2
5
3
6
pub use backfill::week_to_pages;
7
7
+
pub use poll::poll_upstream;
4
8
5
9
/// One page of PLC export
6
10
///
···
8
12
pub struct ExportPage {
9
13
pub ops: Vec<String>,
10
14
}
15
15
+
16
16
+
#[derive(Deserialize)]
17
17
+
#[serde(rename_all = "camelCase")]
18
18
+
pub struct OpPeek {
19
19
+
pub created_at: chrono::DateTime<chrono::Utc>,
20
20
+
}
+60
src/poll.rs
···
1
1
+
use crate::{ExportPage, OpPeek};
2
2
+
use chrono::{DateTime, Utc};
3
3
+
use std::time::Duration;
4
4
+
use thiserror::Error;
5
5
+
use url::Url;
6
6
+
7
7
+
const UPSTREAM_REQUEST_INTERVAL: Duration = Duration::from_millis(500);
8
8
+
9
9
+
#[derive(Debug, Error)]
10
10
+
pub enum GetPageError {
11
11
+
#[error(transparent)]
12
12
+
ReqwestError(#[from] reqwest::Error),
13
13
+
#[error(transparent)]
14
14
+
SerdeError(#[from] serde_json::Error),
15
15
+
}
16
16
+
17
17
+
pub async fn get_page(
18
18
+
client: &reqwest::Client,
19
19
+
url: Url,
20
20
+
) -> Result<(ExportPage, Option<DateTime<Utc>>), GetPageError> {
21
21
+
let ops: Vec<String> = client
22
22
+
.get(url)
23
23
+
.send()
24
24
+
.await?
25
25
+
.error_for_status()?
26
26
+
.text()
27
27
+
.await?
28
28
+
.trim()
29
29
+
.split('\n')
30
30
+
.map(Into::into)
31
31
+
.collect();
32
32
+
33
33
+
let last_at = ops
34
34
+
.last()
35
35
+
.map(|s| serde_json::from_str::<OpPeek>(s))
36
36
+
.transpose()?
37
37
+
.map(|o| o.created_at);
38
38
+
39
39
+
Ok((ExportPage { ops }, last_at))
40
40
+
}
41
41
+
42
42
+
pub async fn poll_upstream(
43
43
+
client: &reqwest::Client,
44
44
+
after: Option<DateTime<Utc>>,
45
45
+
base: Url,
46
46
+
dest: flume::Sender<ExportPage>,
47
47
+
) -> anyhow::Result<()> {
48
48
+
let mut tick = tokio::time::interval(UPSTREAM_REQUEST_INTERVAL);
49
49
+
let mut after = after;
50
50
+
loop {
51
51
+
tick.tick().await;
52
52
+
let mut url = base.clone();
53
53
+
if let Some(a) = after {
54
54
+
url.query_pairs_mut().append_pair("after", &a.to_rfc3339());
55
55
+
};
56
56
+
let (page, next_after) = get_page(client, url).await?;
57
57
+
dest.send_async(page).await?;
58
58
+
after = next_after;
59
59
+
}
60
60
+
}