···23thiserror = "2.0.16"
24tokio = { version = "1.47.1", features = ["full"] }
25tokio-postgres = { version = "0.7.13", features = ["with-chrono-0_4", "with-serde_json-1"] }
0026url = "2.5.7"
···23thiserror = "2.0.16"
24tokio = { version = "1.47.1", features = ["full"] }
25tokio-postgres = { version = "0.7.13", features = ["with-chrono-0_4", "with-serde_json-1"] }
26+tokio-stream = { version = "0.1.17", features = ["io-util"] }
27+tokio-util = { version = "0.7.16", features = ["compat"] }
28url = "2.5.7"
+1
readme.md
···67- Tail PLC ops to stdout: `allegedly tail | jq`
8- Export PLC ops to weekly gzipped bundles: `allegdly bundle --dest ./some-folder`
0910(add `--help` to any command for more info about it)
11
···67- Tail PLC ops to stdout: `allegedly tail | jq`
8- Export PLC ops to weekly gzipped bundles: `allegdly bundle --dest ./some-folder`
9+- Dump bundled ops to stdout FAST: `allegedly backfill --source-workers 6 | pv -l > /ops-unordered.jsonl`
1011(add `--help` to any command for more info about it)
12
···1+use allegedly::{HttpSource, Week, week_to_pages};
2+use std::io::Write;
034#[tokio::main]
5async fn main() {
6+ let url: url::Url = "https://plc.t3.storage.dev/plc.directory/".parse().unwrap();
7+ let source = HttpSource(url);
8+ // let source = FolderSource("./weekly/".into());
9+ let week = Week::from_n(1699488000);
00000001011+ let (tx, rx) = flume::bounded(32);
12+13+ tokio::task::spawn(async move {
14+ week_to_pages(source, week, tx).await.unwrap();
15+ });
16+17+ let mut n = 0;
18+19+ print!("receiving");
20+ while let Ok(page) = rx.recv_async().await {
21 print!(".");
22+ std::io::stdout().flush().unwrap();
23+ n += page.ops.len();
24 }
25 println!();
26+27+ println!("bye ({n})");
28+29+ // let reader = CLIENT
30+ // .get("https://plc.t3.storage.dev/plc.directory/1699488000.jsonl.gz")
31+ // // .get("https://plc.t3.storage.dev/plc.directory/1669248000.jsonl.gz")
32+ // .send()
33+ // .await
34+ // .unwrap()
35+ // .error_for_status()
36+ // .unwrap()
37+ // .bytes_stream()
38+ // .map_err(io::Error::other)
39+ // .into_async_read();
40+41+ // let decoder = GzipDecoder::new(io::BufReader::new(reader));
42+ // let mut chunks = io::BufReader::new(decoder).lines().chunks(1000);
43+ // while let Some(ref _chunk) = chunks.next().await {
44+ // print!(".");
45+ // }
46+ // println!();
47}
+3-3
src/lib.rs
···6mod poll;
7mod weekly;
89-pub use backfill::week_to_pages;
10pub use client::CLIENT;
11pub use plc_pg::Db;
12pub use poll::{get_page, poll_upstream};
13-pub use weekly::{Week, pages_to_weeks};
1415pub type Dt = chrono::DateTime<chrono::Utc>;
1617/// One page of PLC export
18///
19-/// Expected to have up to around 1000 lines of raw json ops
20#[derive(Debug)]
21pub struct ExportPage {
22 pub ops: Vec<String>,
···6mod poll;
7mod weekly;
89+pub use backfill::backfill;
10pub use client::CLIENT;
11pub use plc_pg::Db;
12pub use poll::{get_page, poll_upstream};
13+pub use weekly::{BundleSource, FolderSource, HttpSource, Week, pages_to_weeks, week_to_pages};
1415pub type Dt = chrono::DateTime<chrono::Utc>;
1617/// One page of PLC export
18///
19+/// plc.directory caps /export at 1000 ops; backfill tasks may send more in a page.
20#[derive(Debug)]
21pub struct ExportPage {
22 pub ops: Vec<String>,