···45Allegedly can
67-- Tail PLC ops to stdout
0000008- Copy ops to postgres for a mirror running the [reference typescript implementation](https://github.com/did-method-plc/did-method-plc)
···45Allegedly can
67+- Tail PLC ops to stdout: `allegedly tail | jq`
8+- Export PLC ops to weekly gzipped bundles: `allegdly bundle --dest ./some-folder`
9+10+(add `--help` to any command for more info about it)
11+12+also can:
13+14- Copy ops to postgres for a mirror running the [reference typescript implementation](https://github.com/did-method-plc/did-method-plc)
-55
src/bin/bundle-weekly.rs
···1-use allegedly::{Week, bin_init, pages_to_weeks, poll_upstream};
2-use clap::Parser;
3-use std::path::PathBuf;
4-use url::Url;
5-6-const PAGE_QUEUE_SIZE: usize = 128;
7-8-#[derive(Parser)]
9-struct Args {
10- /// Upstream PLC server to poll
11- ///
12- /// default: https://plc.directory
13- #[arg(long, env)]
14- #[clap(default_value = "https://plc.directory")]
15- upstream: Url,
16- /// Directory to save gzipped weekly bundles
17- ///
18- /// default: ./weekly/
19- #[arg(long, env)]
20- #[clap(default_value = "./weekly/")]
21- dir: PathBuf,
22- /// The week to start from
23- ///
24- /// Must be a week-truncated unix timestamp
25- #[arg(long, env)]
26- start_at: Option<i64>,
27-}
28-29-#[tokio::main]
30-async fn main() -> anyhow::Result<()> {
31- bin_init("weekly");
32- let args = Args::parse();
33-34- let mut url = args.upstream;
35- url.set_path("/export");
36-37- let after = args.start_at.map(|n| Week::from_n(n).into());
38-39- log::trace!("ensure weekly output directory exists");
40- std::fs::create_dir_all(&args.dir)?;
41-42- let (tx, rx) = flume::bounded(PAGE_QUEUE_SIZE);
43-44- tokio::task::spawn(async move {
45- if let Err(e) = poll_upstream(after, url, tx).await {
46- log::error!("polling failed: {e}");
47- } else {
48- log::warn!("poller finished ok (weird?)");
49- }
50- });
51-52- pages_to_weeks(rx, args.dir).await?;
53-54- Ok(())
55-}
···1-use allegedly::{Dt, bin_init, poll_upstream};
2use clap::{Parser, Subcommand};
03use url::Url;
45#[derive(Debug, Parser)]
···1415#[derive(Debug, Subcommand)]
16enum Commands {
00000000000000000000017 /// Poll an upstream PLC server and log new ops to stdout
18 Tail {
19 /// Begin tailing from a specific timestamp for replay or wait-until
···29 let args = Cli::parse();
3031 match args.command {
000000000000032 Commands::Tail { after } => {
33 let mut url = args.upstream;
34 url.set_path("/export");
35 let start_at = after.or_else(|| Some(chrono::Utc::now()));
36- let (tx, rx) = flume::bounded(0); // rendezvous
37 tokio::task::spawn(async move { poll_upstream(start_at, url, tx).await.unwrap() });
38 loop {
39 for op in rx.recv_async().await.unwrap().ops {
···1+use allegedly::{Dt, bin_init, pages_to_weeks, poll_upstream};
2use clap::{Parser, Subcommand};
3+use std::path::PathBuf;
4use url::Url;
56#[derive(Debug, Parser)]
···1516#[derive(Debug, Subcommand)]
17enum Commands {
18+ /// Scrape a PLC server, collecting ops into weekly bundles
19+ ///
20+ /// Bundles are gzipped files named `<WEEK>.jsonl.gz` where WEEK is a unix
21+ /// timestamp rounded down to a multiple of 604,800 (one week in seconds).
22+ ///
23+ /// Will stop by default at floor((now - 73hrs) / one week) * one week. PLC
24+ /// operations can be invalidated within 72 hrs, so stopping before that
25+ /// time ensures that the bundles are (hopefully) immutable.
26+ Bundle {
27+ /// Where to save the bundled files
28+ #[arg(short, long)]
29+ #[clap(default_value = "./weekly/")]
30+ dest: PathBuf,
31+ /// Start the export from this time. Should be a week boundary.
32+ #[arg(short, long)]
33+ #[clap(default_value = "2022-11-17T00:00:00Z")]
34+ after: Dt,
35+ /// Overwrite existing files, if present
36+ #[arg(long, action)]
37+ clobber: bool,
38+ },
39 /// Poll an upstream PLC server and log new ops to stdout
40 Tail {
41 /// Begin tailing from a specific timestamp for replay or wait-until
···51 let args = Cli::parse();
5253 match args.command {
54+ Commands::Bundle {
55+ dest,
56+ after,
57+ clobber,
58+ } => {
59+ let mut url = args.upstream;
60+ url.set_path("/export");
61+ let (tx, rx) = flume::bounded(32); // read ahead if gzip stalls for some reason
62+ tokio::task::spawn(async move { poll_upstream(Some(after), url, tx).await.unwrap() });
63+ log::trace!("ensuring output directory exists");
64+ std::fs::create_dir_all(&dest).unwrap();
65+ pages_to_weeks(rx, dest, clobber).await.unwrap();
66+ }
67 Commands::Tail { after } => {
68 let mut url = args.upstream;
69 url.set_path("/export");
70 let start_at = after.or_else(|| Some(chrono::Utc::now()));
71+ let (tx, rx) = flume::bounded(0); // rendezvous, don't read ahead
72 tokio::task::spawn(async move { poll_upstream(start_at, url, tx).await.unwrap() });
73 loop {
74 for op in rx.recv_async().await.unwrap().ops {