Server tools to backfill, tail, mirror, and verify PLC logs

drop flume

don't need multiple consumers after all

+34 -59
-31
Cargo.lock
··· 35 "chrono", 36 "clap", 37 "env_logger", 38 - "flume", 39 "futures", 40 "log", 41 "reqwest", ··· 446 dependencies = [ 447 "crc32fast", 448 "miniz_oxide", 449 - ] 450 - 451 - [[package]] 452 - name = "flume" 453 - version = "0.11.1" 454 - source = "registry+https://github.com/rust-lang/crates.io-index" 455 - checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" 456 - dependencies = [ 457 - "futures-core", 458 - "futures-sink", 459 - "nanorand", 460 - "spin", 461 ] 462 463 [[package]] ··· 1094 ] 1095 1096 [[package]] 1097 - name = "nanorand" 1098 - version = "0.7.0" 1099 - source = "registry+https://github.com/rust-lang/crates.io-index" 1100 - checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" 1101 - dependencies = [ 1102 - "getrandom 0.2.16", 1103 - ] 1104 - 1105 - [[package]] 1106 name = "native-tls" 1107 version = "0.2.14" 1108 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1789 dependencies = [ 1790 "libc", 1791 "windows-sys 0.59.0", 1792 - ] 1793 - 1794 - [[package]] 1795 - name = "spin" 1796 - version = "0.9.8" 1797 - source = "registry+https://github.com/rust-lang/crates.io-index" 1798 - checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" 1799 - dependencies = [ 1800 - "lock_api", 1801 ] 1802 1803 [[package]]
··· 35 "chrono", 36 "clap", 37 "env_logger", 38 "futures", 39 "log", 40 "reqwest", ··· 445 dependencies = [ 446 "crc32fast", 447 "miniz_oxide", 448 ] 449 450 [[package]] ··· 1081 ] 1082 1083 [[package]] 1084 name = "native-tls" 1085 version = "0.2.14" 1086 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1767 dependencies = [ 1768 "libc", 1769 "windows-sys 0.59.0", 1770 ] 1771 1772 [[package]]
-1
Cargo.toml
··· 12 chrono = { version = "0.4.42", features = ["serde"] } 13 clap = { version = "4.5.47", features = ["derive", "env"] } 14 env_logger = "0.11.8" 15 - flume = "0.11.1" 16 futures = "0.3.31" 17 log = "0.4.28" 18 reqwest = { version = "0.12.23", features = ["stream"] }
··· 12 chrono = { version = "0.4.42", features = ["serde"] } 13 clap = { version = "4.5.47", features = ["derive", "env"] } 14 env_logger = "0.11.8" 15 futures = "0.3.31" 16 log = "0.4.28" 17 reqwest = { version = "0.12.23", features = ["stream"] }
+5 -2
src/backfill.rs
··· 1 use crate::{BundleSource, Dt, ExportPage, Week, week_to_pages}; 2 use std::sync::Arc; 3 use std::time::Instant; 4 - use tokio::{sync::Mutex, task::JoinSet}; 5 6 const FIRST_WEEK: Week = Week::from_n(1668643200); 7 8 pub async fn backfill( 9 source: impl BundleSource + Send + 'static, 10 - dest: flume::Sender<ExportPage>, 11 source_workers: usize, 12 until: Option<Dt>, 13 ) -> anyhow::Result<()> {
··· 1 use crate::{BundleSource, Dt, ExportPage, Week, week_to_pages}; 2 use std::sync::Arc; 3 use std::time::Instant; 4 + use tokio::{ 5 + sync::{Mutex, mpsc}, 6 + task::JoinSet, 7 + }; 8 9 const FIRST_WEEK: Week = Week::from_n(1668643200); 10 11 pub async fn backfill( 12 source: impl BundleSource + Send + 'static, 13 + dest: mpsc::Sender<ExportPage>, 14 source_workers: usize, 15 until: Option<Dt>, 16 ) -> anyhow::Result<()> {
+15 -13
src/bin/allegedly.rs
··· 3 bin_init, pages_to_pg, pages_to_weeks, poll_upstream, 4 }; 5 use clap::{Parser, Subcommand}; 6 - use std::path::PathBuf; 7 - use tokio::sync::oneshot; 8 use url::Url; 9 10 #[derive(Debug, Parser)] ··· 80 } 81 82 async fn pages_to_stdout( 83 - rx: flume::Receiver<ExportPage>, 84 notify_last_at: Option<oneshot::Sender<Option<Dt>>>, 85 - ) -> Result<(), flume::RecvError> { 86 let mut last_at = None; 87 - while let Ok(page) = rx.recv_async().await { 88 for op in &page.ops { 89 println!("{op}"); 90 } ··· 107 /// 108 /// PLC will return up to 1000 ops on a page, and returns full pages until it 109 /// has caught up, so this is a (hacky?) way to stop polling once we're up. 110 - fn full_pages(rx: flume::Receiver<ExportPage>) -> flume::Receiver<ExportPage> { 111 - let (tx, fwd) = flume::bounded(0); 112 tokio::task::spawn(async move { 113 - while let Ok(page) = rx.recv_async().await 114 && page.ops.len() > 900 115 { 116 - tx.send_async(page).await.unwrap(); 117 } 118 }); 119 fwd ··· 125 126 let args = Cli::parse(); 127 128 match args.command { 129 Commands::Backfill { 130 http, ··· 135 until, 136 catch_up, 137 } => { 138 - let (tx, rx) = flume::bounded(32); // these are big pages 139 tokio::task::spawn(async move { 140 if let Some(dir) = dir { 141 log::info!("Reading weekly bundles from local folder {dir:?}"); ··· 177 // wait until the time for `after` is known 178 let last_at = rx_last.await.unwrap(); 179 log::info!("beginning catch-up from {last_at:?} while the writer finalizes stuff"); 180 - let (tx, rx) = flume::bounded(256); 181 tokio::task::spawn( 182 async move { poll_upstream(last_at, upstream, tx).await.unwrap() }, 183 ); ··· 199 } => { 200 let mut url = args.upstream; 201 url.set_path("/export"); 202 - let (tx, rx) = flume::bounded(32); // read ahead if gzip stalls for some reason 203 tokio::task::spawn(async move { poll_upstream(Some(after), url, tx).await.unwrap() }); 204 log::trace!("ensuring output directory exists"); 205 std::fs::create_dir_all(&dest).unwrap(); ··· 209 let mut url = args.upstream; 210 url.set_path("/export"); 211 let start_at = after.or_else(|| Some(chrono::Utc::now())); 212 - let (tx, rx) = flume::bounded(1); 213 tokio::task::spawn(async move { poll_upstream(start_at, url, tx).await.unwrap() }); 214 pages_to_stdout(rx, None).await.unwrap(); 215 } 216 } 217 }
··· 3 bin_init, pages_to_pg, pages_to_weeks, poll_upstream, 4 }; 5 use clap::{Parser, Subcommand}; 6 + use std::{path::PathBuf, time::Instant}; 7 + use tokio::sync::{mpsc, oneshot}; 8 use url::Url; 9 10 #[derive(Debug, Parser)] ··· 80 } 81 82 async fn pages_to_stdout( 83 + mut rx: mpsc::Receiver<ExportPage>, 84 notify_last_at: Option<oneshot::Sender<Option<Dt>>>, 85 + ) -> anyhow::Result<()> { 86 let mut last_at = None; 87 + while let Some(page) = rx.recv().await { 88 for op in &page.ops { 89 println!("{op}"); 90 } ··· 107 /// 108 /// PLC will return up to 1000 ops on a page, and returns full pages until it 109 /// has caught up, so this is a (hacky?) way to stop polling once we're up. 110 + fn full_pages(mut rx: mpsc::Receiver<ExportPage>) -> mpsc::Receiver<ExportPage> { 111 + let (tx, fwd) = mpsc::channel(1); 112 tokio::task::spawn(async move { 113 + while let Some(page) = rx.recv().await 114 && page.ops.len() > 900 115 { 116 + tx.send(page).await.unwrap(); 117 } 118 }); 119 fwd ··· 125 126 let args = Cli::parse(); 127 128 + let t0 = Instant::now(); 129 match args.command { 130 Commands::Backfill { 131 http, ··· 136 until, 137 catch_up, 138 } => { 139 + let (tx, rx) = mpsc::channel(32); // these are big pages 140 tokio::task::spawn(async move { 141 if let Some(dir) = dir { 142 log::info!("Reading weekly bundles from local folder {dir:?}"); ··· 178 // wait until the time for `after` is known 179 let last_at = rx_last.await.unwrap(); 180 log::info!("beginning catch-up from {last_at:?} while the writer finalizes stuff"); 181 + let (tx, rx) = mpsc::channel(256); // these are small pages 182 tokio::task::spawn( 183 async move { poll_upstream(last_at, upstream, tx).await.unwrap() }, 184 ); ··· 200 } => { 201 let mut url = args.upstream; 202 url.set_path("/export"); 203 + let (tx, rx) = mpsc::channel(32); // read ahead if gzip stalls for some reason 204 tokio::task::spawn(async move { poll_upstream(Some(after), url, tx).await.unwrap() }); 205 log::trace!("ensuring output directory exists"); 206 std::fs::create_dir_all(&dest).unwrap(); ··· 210 let mut url = args.upstream; 211 url.set_path("/export"); 212 let start_at = after.or_else(|| Some(chrono::Utc::now())); 213 + let (tx, rx) = mpsc::channel(1); 214 tokio::task::spawn(async move { poll_upstream(start_at, url, tx).await.unwrap() }); 215 pages_to_stdout(rx, None).await.unwrap(); 216 } 217 } 218 + log::info!("whew, {:?}. goodbye!", t0.elapsed()); 219 }
+5 -5
src/plc_pg.rs
··· 1 use crate::{Dt, ExportPage, Op, PageBoundaryState}; 2 use std::pin::pin; 3 use std::time::Instant; 4 - use tokio::sync::oneshot; 5 use tokio_postgres::{ 6 Client, Error as PgError, NoTls, 7 binary_copy::BinaryCopyInWriter, ··· 72 } 73 } 74 75 - pub async fn pages_to_pg(db: Db, pages: flume::Receiver<ExportPage>) -> Result<(), PgError> { 76 let mut client = db.connect().await?; 77 78 let ops_stmt = client ··· 89 let mut ops_inserted = 0; 90 let mut dids_inserted = 0; 91 92 - while let Ok(page) = pages.recv_async().await { 93 log::trace!("writing page with {} ops", page.ops.len()); 94 let tx = client.transaction().await?; 95 for s in page.ops { ··· 137 pub async fn backfill_to_pg( 138 db: Db, 139 reset: bool, 140 - pages: flume::Receiver<ExportPage>, 141 notify_last_at: Option<oneshot::Sender<Option<Dt>>>, 142 ) -> Result<(), PgError> { 143 let mut client = db.connect().await?; ··· 195 .await?; 196 let mut writer = pin!(BinaryCopyInWriter::new(sync, types)); 197 let mut last_at = None; 198 - while let Ok(page) = pages.recv_async().await { 199 for s in &page.ops { 200 let Ok(op) = serde_json::from_str::<Op>(s) else { 201 log::warn!("ignoring unparseable op: {s:?}");
··· 1 use crate::{Dt, ExportPage, Op, PageBoundaryState}; 2 use std::pin::pin; 3 use std::time::Instant; 4 + use tokio::sync::{mpsc, oneshot}; 5 use tokio_postgres::{ 6 Client, Error as PgError, NoTls, 7 binary_copy::BinaryCopyInWriter, ··· 72 } 73 } 74 75 + pub async fn pages_to_pg(db: Db, mut pages: mpsc::Receiver<ExportPage>) -> Result<(), PgError> { 76 let mut client = db.connect().await?; 77 78 let ops_stmt = client ··· 89 let mut ops_inserted = 0; 90 let mut dids_inserted = 0; 91 92 + while let Some(page) = pages.recv().await { 93 log::trace!("writing page with {} ops", page.ops.len()); 94 let tx = client.transaction().await?; 95 for s in page.ops { ··· 137 pub async fn backfill_to_pg( 138 db: Db, 139 reset: bool, 140 + mut pages: mpsc::Receiver<ExportPage>, 141 notify_last_at: Option<oneshot::Sender<Option<Dt>>>, 142 ) -> Result<(), PgError> { 143 let mut client = db.connect().await?; ··· 195 .await?; 196 let mut writer = pin!(BinaryCopyInWriter::new(sync, types)); 197 let mut last_at = None; 198 + while let Some(page) = pages.recv().await { 199 for s in &page.ops { 200 let Ok(op) = serde_json::from_str::<Op>(s) else { 201 log::warn!("ignoring unparseable op: {s:?}");
+4 -3
src/poll.rs
··· 1 use crate::{CLIENT, Dt, ExportPage, Op, OpKey}; 2 use std::time::Duration; 3 use thiserror::Error; 4 use url::Url; 5 6 // plc.directory ratelimit on /export is 500 per 5 mins ··· 209 pub async fn poll_upstream( 210 after: Option<Dt>, 211 base: Url, 212 - dest: flume::Sender<ExportPage>, 213 ) -> anyhow::Result<()> { 214 let mut tick = tokio::time::interval(UPSTREAM_REQUEST_INTERVAL); 215 let mut prev_last: Option<LastOp> = after.map(Into::into); ··· 232 if !page.is_empty() { 233 match dest.try_send(page) { 234 Ok(()) => {} 235 - Err(flume::TrySendError::Full(page)) => { 236 log::warn!("export: destination channel full, awaiting..."); 237 - dest.send_async(page).await?; 238 } 239 e => e?, 240 };
··· 1 use crate::{CLIENT, Dt, ExportPage, Op, OpKey}; 2 use std::time::Duration; 3 use thiserror::Error; 4 + use tokio::sync::mpsc; 5 use url::Url; 6 7 // plc.directory ratelimit on /export is 500 per 5 mins ··· 210 pub async fn poll_upstream( 211 after: Option<Dt>, 212 base: Url, 213 + dest: mpsc::Sender<ExportPage>, 214 ) -> anyhow::Result<()> { 215 let mut tick = tokio::time::interval(UPSTREAM_REQUEST_INTERVAL); 216 let mut prev_last: Option<LastOp> = after.map(Into::into); ··· 233 if !page.is_empty() { 234 match dest.try_send(page) { 235 Ok(()) => {} 236 + Err(mpsc::error::TrySendError::Full(page)) => { 237 log::warn!("export: destination channel full, awaiting..."); 238 + dest.send(page).await?; 239 } 240 e => e?, 241 };
+5 -4
src/weekly.rs
··· 8 use tokio::{ 9 fs::File, 10 io::{AsyncBufReadExt, AsyncRead, AsyncWriteExt, BufReader}, 11 }; 12 use tokio_stream::wrappers::LinesStream; 13 use tokio_util::compat::FuturesAsyncReadCompatExt; ··· 120 } 121 122 pub async fn pages_to_weeks( 123 - rx: flume::Receiver<ExportPage>, 124 dir: PathBuf, 125 clobber: bool, 126 ) -> anyhow::Result<()> { ··· 136 let mut week_ops = 0; 137 let mut week_t0 = total_t0; 138 139 - while let Ok(page) = rx.recv_async().await { 140 for mut s in page.ops { 141 let Ok(op) = serde_json::from_str::<Op>(&s) 142 .inspect_err(|e| log::error!("failed to parse plc op, ignoring: {e}")) ··· 193 pub async fn week_to_pages( 194 source: impl BundleSource, 195 week: Week, 196 - dest: flume::Sender<ExportPage>, 197 ) -> anyhow::Result<()> { 198 use futures::TryStreamExt; 199 let decoder = GzipDecoder::new(BufReader::new(source.reader_for(week).await?)); ··· 202 while let Some(chunk) = chunks.try_next().await? { 203 let ops: Vec<String> = chunk.into_iter().collect(); 204 let page = ExportPage { ops }; 205 - dest.send_async(page).await?; 206 } 207 Ok(()) 208 }
··· 8 use tokio::{ 9 fs::File, 10 io::{AsyncBufReadExt, AsyncRead, AsyncWriteExt, BufReader}, 11 + sync::mpsc, 12 }; 13 use tokio_stream::wrappers::LinesStream; 14 use tokio_util::compat::FuturesAsyncReadCompatExt; ··· 121 } 122 123 pub async fn pages_to_weeks( 124 + mut rx: mpsc::Receiver<ExportPage>, 125 dir: PathBuf, 126 clobber: bool, 127 ) -> anyhow::Result<()> { ··· 137 let mut week_ops = 0; 138 let mut week_t0 = total_t0; 139 140 + while let Some(page) = rx.recv().await { 141 for mut s in page.ops { 142 let Ok(op) = serde_json::from_str::<Op>(&s) 143 .inspect_err(|e| log::error!("failed to parse plc op, ignoring: {e}")) ··· 194 pub async fn week_to_pages( 195 source: impl BundleSource, 196 week: Week, 197 + dest: mpsc::Sender<ExportPage>, 198 ) -> anyhow::Result<()> { 199 use futures::TryStreamExt; 200 let decoder = GzipDecoder::new(BufReader::new(source.reader_for(week).await?)); ··· 203 while let Some(chunk) = chunks.try_next().await? { 204 let ops: Vec<String> = chunk.into_iter().collect(); 205 let page = ExportPage { ops }; 206 + dest.send(page).await?; 207 } 208 Ok(()) 209 }