···4455Allegedly can
6677-- Tail PLC ops to stdout
77+- Tail PLC ops to stdout: `allegedly tail | jq`
88+- Export PLC ops to weekly gzipped bundles: `allegdly bundle --dest ./some-folder`
99+1010+(add `--help` to any command for more info about it)
1111+1212+also can:
1313+814- Copy ops to postgres for a mirror running the [reference typescript implementation](https://github.com/did-method-plc/did-method-plc)
-55
src/bin/bundle-weekly.rs
···11-use allegedly::{Week, bin_init, pages_to_weeks, poll_upstream};
22-use clap::Parser;
33-use std::path::PathBuf;
44-use url::Url;
55-66-const PAGE_QUEUE_SIZE: usize = 128;
77-88-#[derive(Parser)]
99-struct Args {
1010- /// Upstream PLC server to poll
1111- ///
1212- /// default: https://plc.directory
1313- #[arg(long, env)]
1414- #[clap(default_value = "https://plc.directory")]
1515- upstream: Url,
1616- /// Directory to save gzipped weekly bundles
1717- ///
1818- /// default: ./weekly/
1919- #[arg(long, env)]
2020- #[clap(default_value = "./weekly/")]
2121- dir: PathBuf,
2222- /// The week to start from
2323- ///
2424- /// Must be a week-truncated unix timestamp
2525- #[arg(long, env)]
2626- start_at: Option<i64>,
2727-}
2828-2929-#[tokio::main]
3030-async fn main() -> anyhow::Result<()> {
3131- bin_init("weekly");
3232- let args = Args::parse();
3333-3434- let mut url = args.upstream;
3535- url.set_path("/export");
3636-3737- let after = args.start_at.map(|n| Week::from_n(n).into());
3838-3939- log::trace!("ensure weekly output directory exists");
4040- std::fs::create_dir_all(&args.dir)?;
4141-4242- let (tx, rx) = flume::bounded(PAGE_QUEUE_SIZE);
4343-4444- tokio::task::spawn(async move {
4545- if let Err(e) = poll_upstream(after, url, tx).await {
4646- log::error!("polling failed: {e}");
4747- } else {
4848- log::warn!("poller finished ok (weird?)");
4949- }
5050- });
5151-5252- pages_to_weeks(rx, args.dir).await?;
5353-5454- Ok(())
5555-}
+37-2
src/bin/main.rs
···11-use allegedly::{Dt, bin_init, poll_upstream};
11+use allegedly::{Dt, bin_init, pages_to_weeks, poll_upstream};
22use clap::{Parser, Subcommand};
33+use std::path::PathBuf;
34use url::Url;
4556#[derive(Debug, Parser)]
···14151516#[derive(Debug, Subcommand)]
1617enum Commands {
1818+ /// Scrape a PLC server, collecting ops into weekly bundles
1919+ ///
2020+ /// Bundles are gzipped files named `<WEEK>.jsonl.gz` where WEEK is a unix
2121+ /// timestamp rounded down to a multiple of 604,800 (one week in seconds).
2222+ ///
2323+ /// Will stop by default at floor((now - 73hrs) / one week) * one week. PLC
2424+ /// operations can be invalidated within 72 hrs, so stopping before that
2525+ /// time ensures that the bundles are (hopefully) immutable.
2626+ Bundle {
2727+ /// Where to save the bundled files
2828+ #[arg(short, long)]
2929+ #[clap(default_value = "./weekly/")]
3030+ dest: PathBuf,
3131+ /// Start the export from this time. Should be a week boundary.
3232+ #[arg(short, long)]
3333+ #[clap(default_value = "2022-11-17T00:00:00Z")]
3434+ after: Dt,
3535+ /// Overwrite existing files, if present
3636+ #[arg(long, action)]
3737+ clobber: bool,
3838+ },
1739 /// Poll an upstream PLC server and log new ops to stdout
1840 Tail {
1941 /// Begin tailing from a specific timestamp for replay or wait-until
···2951 let args = Cli::parse();
30523153 match args.command {
5454+ Commands::Bundle {
5555+ dest,
5656+ after,
5757+ clobber,
5858+ } => {
5959+ let mut url = args.upstream;
6060+ url.set_path("/export");
6161+ let (tx, rx) = flume::bounded(32); // read ahead if gzip stalls for some reason
6262+ tokio::task::spawn(async move { poll_upstream(Some(after), url, tx).await.unwrap() });
6363+ log::trace!("ensuring output directory exists");
6464+ std::fs::create_dir_all(&dest).unwrap();
6565+ pages_to_weeks(rx, dest, clobber).await.unwrap();
6666+ }
3267 Commands::Tail { after } => {
3368 let mut url = args.upstream;
3469 url.set_path("/export");
3570 let start_at = after.or_else(|| Some(chrono::Utc::now()));
3636- let (tx, rx) = flume::bounded(0); // rendezvous
7171+ let (tx, rx) = flume::bounded(0); // rendezvous, don't read ahead
3772 tokio::task::spawn(async move { poll_upstream(start_at, url, tx).await.unwrap() });
3873 loop {
3974 for op in rx.recv_async().await.unwrap().ops {
+2-1
src/poll.rs
···33use thiserror::Error;
44use url::Url;
5566-const UPSTREAM_REQUEST_INTERVAL: Duration = Duration::from_millis(500);
66+// plc.directory ratelimit on /export is 500 per 5 mins
77+const UPSTREAM_REQUEST_INTERVAL: Duration = Duration::from_millis(600);
7889#[derive(Debug, Error)]
910pub enum GetPageError {
+11-3
src/weekly.rs
···3434 }
3535}
36363737-pub async fn pages_to_weeks(rx: flume::Receiver<ExportPage>, dir: PathBuf) -> anyhow::Result<()> {
3737+pub async fn pages_to_weeks(
3838+ rx: flume::Receiver<ExportPage>,
3939+ dir: PathBuf,
4040+ clobber: bool,
4141+) -> anyhow::Result<()> {
3842 pub use std::time::Instant;
39434044 // ...there is certainly a nicer way to write this
···6771 total_ops / 1000,
6872 (total_ops as f64) / (now - total_t0).as_secs_f64(),
6973 );
7070-7171- let file = File::create(dir.join(format!("{}.jsonl.gz", op_week.0))).await?;
7474+ let path = dir.join(format!("{}.jsonl.gz", op_week.0));
7575+ let file = if clobber {
7676+ File::create(path).await?
7777+ } else {
7878+ File::create_new(path).await?
7979+ };
7280 encoder = GzipEncoder::with_quality(file, async_compression::Level::Best);
7381 current_week = Some(op_week);
7482 week_ops = 0;