Server tools to backfill, tail, mirror, and verify PLC logs

clean up backfill script

+134 -72
+108 -61
src/bin/backfill.rs
··· 1 1 use allegedly::{ 2 - Db, Dt, FolderSource, HttpSource, backfill, backfill_to_pg, bin::GlobalArgs, bin_init, 3 - full_pages, pages_to_pg, pages_to_stdout, poll_upstream, 2 + Db, Dt, ExportPage, FolderSource, HttpSource, backfill, backfill_to_pg, bin::GlobalArgs, 3 + bin_init, full_pages, pages_to_pg, pages_to_stdout, poll_upstream, 4 4 }; 5 5 use clap::Parser; 6 6 use reqwest::Url; 7 7 use std::path::PathBuf; 8 - use tokio::sync::{mpsc, oneshot}; 8 + use tokio::{ 9 + sync::{mpsc, oneshot}, 10 + task::JoinSet, 11 + }; 12 + 13 + pub const DEFAULT_HTTP: &str = "https://plc.t3.storage.dev/plc.directory/"; 9 14 10 15 #[derive(Debug, clap::Args)] 11 16 pub struct Args { 12 17 /// Remote URL prefix to fetch bundles from 13 18 #[arg(long)] 14 - #[clap(default_value = "https://plc.t3.storage.dev/plc.directory/")] 19 + #[clap(default_value = DEFAULT_HTTP)] 15 20 http: Url, 16 21 /// Local folder to fetch bundles from (overrides `http`) 17 22 #[arg(long)] 18 23 dir: Option<PathBuf>, 24 + /// Don't do weekly bulk-loading at all. 25 + /// 26 + /// overrides `http` and `dir`, makes catch_up redundant 27 + #[arg(long, action)] 28 + no_bulk: bool, 19 29 /// Parallel bundle fetchers 20 30 /// 21 31 /// Default: 4 for http fetches, 1 for local folder ··· 47 57 Args { 48 58 http, 49 59 dir, 60 + no_bulk, 50 61 source_workers, 51 62 to_postgres, 52 63 postgres_cert, ··· 55 66 catch_up, 56 67 }: Args, 57 68 ) -> anyhow::Result<()> { 58 - let (tx, rx) = mpsc::channel(32); // these are big pages 59 - tokio::task::spawn(async move { 60 - if let Some(dir) = dir { 61 - log::info!("Reading weekly bundles from local folder {dir:?}"); 62 - backfill(FolderSource(dir), tx, source_workers.unwrap_or(1), until) 63 - .await 64 - .inspect_err(|e| log::error!("backfill from folder problem: {e}")) 65 - .expect("to source bundles from a folder"); 66 - } else { 67 - log::info!("Fetching weekly bundles from from {http}"); 68 - backfill(HttpSource(http), tx, source_workers.unwrap_or(4), until) 69 - .await 70 - .expect("to source bundles from http"); 71 - } 72 - }); 69 + let mut tasks = JoinSet::new(); 70 + 71 + let (bulk_tx, bulk_out) = mpsc::channel(32); // bulk uses big pages 73 72 74 - // postgres writer will notify us as soon as the very last op's time is known 75 - // so we can start catching up while pg is restoring indexes and stuff 76 - let (notify_last_at, rx_last) = if catch_up { 73 + // a bulk sink can notify us as soon as the very last op's time is known 74 + // so we can start catching up while the sink might restore indexes and such 75 + let (found_last_tx, found_last_out) = if catch_up { 77 76 let (tx, rx) = oneshot::channel(); 78 77 (Some(tx), Some(rx)) 79 78 } else { 80 79 (None, None) 81 80 }; 82 81 83 - let to_postgres_url_bulk = to_postgres.clone(); 84 - let pg_cert = postgres_cert.clone(); 85 - let bulk_out_write = tokio::task::spawn(async move { 86 - if let Some(ref url) = to_postgres_url_bulk { 87 - let db = Db::new(url.as_str(), pg_cert) 88 - .await 89 - .expect("to get db for bulk out write"); 90 - backfill_to_pg(db, postgres_reset, rx, notify_last_at) 91 - .await 92 - .expect("to backfill to pg"); 82 + let (poll_tx, poll_out) = mpsc::channel::<ExportPage>(128); // normal/small pages 83 + let (full_tx, full_out) = mpsc::channel(1); // don't need to buffer at this filter 84 + 85 + // set up sources 86 + if no_bulk { 87 + // simple mode, just poll upstream from teh beginning 88 + if http != DEFAULT_HTTP.parse()? { 89 + log::warn!("ignoring non-default bulk http setting since --no-bulk was set"); 90 + } 91 + if let Some(d) = dir { 92 + log::warn!("ignoring bulk dir setting ({d:?}) since --no-bulk was set."); 93 + } 94 + if let Some(u) = until { 95 + log::warn!( 96 + "ignoring `until` setting ({u:?}) since --no-bulk was set. (feature request?)" 97 + ); 98 + } 99 + let mut upstream = upstream; 100 + upstream.set_path("/export"); 101 + tasks.spawn(poll_upstream(None, upstream, poll_tx)); 102 + tasks.spawn(full_pages(poll_out, full_tx)); 103 + tasks.spawn(pages_to_stdout(full_out, None)); 104 + } else { 105 + // fun mode 106 + 107 + // set up bulk sources 108 + if let Some(dir) = dir { 109 + if http != DEFAULT_HTTP.parse()? { 110 + anyhow::bail!( 111 + "non-default bulk http setting can't be used with bulk dir setting ({dir:?})" 112 + ); 113 + } 114 + tasks.spawn(backfill( 115 + FolderSource(dir), 116 + bulk_tx, 117 + source_workers.unwrap_or(1), 118 + until, 119 + )); 93 120 } else { 94 - pages_to_stdout(rx, notify_last_at) 95 - .await 96 - .expect("to backfill to stdout"); 121 + tasks.spawn(backfill( 122 + HttpSource(http), 123 + bulk_tx, 124 + source_workers.unwrap_or(4), 125 + until, 126 + )); 97 127 } 98 - }); 99 128 100 - if let Some(rx_last) = rx_last { 101 - let mut upstream = upstream; 102 - upstream.set_path("/export"); 103 - // wait until the time for `after` is known 104 - let last_at = rx_last.await.expect("to get the last log's createdAt"); 105 - log::info!("beginning catch-up from {last_at:?} while the writer finalizes stuff"); 106 - let (tx, rx) = mpsc::channel(256); // these are small pages 107 - tokio::task::spawn(async move { 108 - poll_upstream(last_at, upstream, tx) 109 - .await 110 - .expect("polling upstream to work") 111 - }); 112 - bulk_out_write.await.expect("to wait for bulk_out_write"); 113 - log::info!("writing catch-up pages"); 114 - let full_pages = full_pages(rx); 115 - if let Some(url) = to_postgres { 116 - let db = Db::new(url.as_str(), postgres_cert) 117 - .await 118 - .expect("to connect pg for catchup"); 119 - pages_to_pg(db, full_pages) 120 - .await 121 - .expect("to write catch-up pages to pg"); 129 + // and the catch-up source... 130 + if let Some(last) = found_last_out { 131 + tasks.spawn(async move { 132 + let mut upstream = upstream; 133 + upstream.set_path("/export"); 134 + poll_upstream(last.await?, upstream, poll_tx).await 135 + }); 136 + } 137 + 138 + // set up sinks 139 + if let Some(pg_url) = to_postgres { 140 + log::trace!("connecting to postgres..."); 141 + let db = Db::new(pg_url.as_str(), postgres_cert).await?; 142 + log::trace!("connected to postgres"); 143 + 144 + tasks.spawn(backfill_to_pg( 145 + db.clone(), 146 + postgres_reset, 147 + bulk_out, 148 + found_last_tx, 149 + )); 150 + tasks.spawn(pages_to_pg(db, full_out)); 122 151 } else { 123 - pages_to_stdout(full_pages, None) 124 - .await 125 - .expect("to write catch-up pages to stdout"); 152 + tasks.spawn(pages_to_stdout(bulk_out, found_last_tx)); 153 + tasks.spawn(pages_to_stdout(full_out, None)); 126 154 } 127 155 } 156 + 157 + while let Some(next) = tasks.join_next().await { 158 + match next { 159 + Err(e) if e.is_panic() => { 160 + log::error!("a joinset task panicked: {e}. bailing now. (should we panic?)"); 161 + return Err(e.into()); 162 + } 163 + Err(e) => { 164 + log::error!("a joinset task failed to join: {e}"); 165 + return Err(e.into()); 166 + } 167 + Ok(Err(e)) => { 168 + log::error!("a joinset task completed with error: {e}"); 169 + return Err(e); 170 + } 171 + _ => {} 172 + } 173 + } 174 + 128 175 Ok(()) 129 176 } 130 177
+24 -9
src/lib.rs
··· 80 80 /// 81 81 /// PLC will return up to 1000 ops on a page, and returns full pages until it 82 82 /// has caught up, so this is a (hacky?) way to stop polling once we're up. 83 - pub fn full_pages(mut rx: mpsc::Receiver<ExportPage>) -> mpsc::Receiver<ExportPage> { 84 - let (tx, fwd) = mpsc::channel(1); 85 - tokio::task::spawn(async move { 86 - while let Some(page) = rx.recv().await 87 - && page.ops.len() > 900 88 - { 89 - tx.send(page).await.expect("to be able to forward a page"); 83 + pub async fn full_pages( 84 + mut rx: mpsc::Receiver<ExportPage>, 85 + tx: mpsc::Sender<ExportPage>, 86 + ) -> anyhow::Result<()> { 87 + while let Some(page) = rx.recv().await { 88 + let n = page.ops.len(); 89 + if n < 900 { 90 + let last_age = page.ops.last().map(|op| chrono::Utc::now() - op.created_at); 91 + let Some(age) = last_age else { 92 + log::info!("full_pages done, empty final page"); 93 + return Ok(()); 94 + }; 95 + if age <= chrono::TimeDelta::hours(6) { 96 + log::info!("full_pages done, final page of {n} ops"); 97 + } else { 98 + log::warn!("full_pages finished with small page of {n} ops, but it's {age} old"); 99 + } 100 + return Ok(()); 90 101 } 91 - }); 92 - fwd 102 + log::trace!("full_pages: continuing with page of {n} ops"); 103 + tx.send(page).await?; 104 + } 105 + Err(anyhow::anyhow!( 106 + "full_pages ran out of source material, sender closed" 107 + )) 93 108 } 94 109 95 110 pub async fn pages_to_stdout(
+2 -2
src/plc_pg.rs
··· 133 133 } 134 134 } 135 135 136 - pub async fn pages_to_pg(db: Db, mut pages: mpsc::Receiver<ExportPage>) -> Result<(), PgError> { 136 + pub async fn pages_to_pg(db: Db, mut pages: mpsc::Receiver<ExportPage>) -> anyhow::Result<()> { 137 137 let mut client = db.connect().await?; 138 138 139 139 let ops_stmt = client ··· 197 197 reset: bool, 198 198 mut pages: mpsc::Receiver<ExportPage>, 199 199 notify_last_at: Option<oneshot::Sender<Option<Dt>>>, 200 - ) -> Result<(), PgError> { 200 + ) -> anyhow::Result<()> { 201 201 let mut client = db.connect().await?; 202 202 203 203 let t0 = Instant::now();