Server tools to backfill, tail, mirror, and verify PLC logs

update from-fjall backfill to use the weekly code by implementing BundleSource

ptr.pet 567d97e9 94339149

verified
+79 -80
+2 -2
src/bin/backfill.rs
··· 2 2 Db, Dt, ExportPage, FjallDb, FolderSource, HttpSource, backfill, backfill_to_fjall, 3 3 backfill_to_pg, 4 4 bin::{GlobalArgs, bin_init}, 5 - fjall_to_pages, full_pages, logo, pages_to_fjall, pages_to_pg, pages_to_stdout, poll_upstream, 5 + full_pages, logo, pages_to_fjall, pages_to_pg, pages_to_stdout, poll_upstream, 6 6 }; 7 7 use clap::Parser; 8 8 use reqwest::Url; ··· 139 139 log::trace!("opening source fjall db at {fjall_path:?}..."); 140 140 let db = FjallDb::open(&fjall_path)?; 141 141 log::trace!("opened source fjall db"); 142 - tasks.spawn(fjall_to_pages(db, bulk_tx, until)); 142 + tasks.spawn(backfill(db, bulk_tx, source_workers.unwrap_or(4), until)); 143 143 } else if let Some(dir) = dir { 144 144 if http != DEFAULT_HTTP.parse()? { 145 145 anyhow::bail!(
+1 -1
src/lib.rs
··· 19 19 pub use cached_value::{CachedValue, Fetcher}; 20 20 pub use client::{CLIENT, UA}; 21 21 pub use mirror::{ExperimentalConf, ListenConf, serve, serve_fjall}; 22 - pub use plc_fjall::{FjallDb, backfill_to_fjall, fjall_to_pages, pages_to_fjall}; 22 + pub use plc_fjall::{FjallDb, backfill_to_fjall, pages_to_fjall}; 23 23 pub use plc_pg::{Db, backfill_to_pg, pages_to_pg}; 24 24 pub use poll::{PageBoundaryState, get_page, poll_upstream}; 25 25 pub use ratelimit::{CreatePlcOpLimiter, GovernorMiddleware, IpLimiters};
+2 -2
src/mirror/fjall.rs
··· 275 275 let db = fjall.clone(); 276 276 277 277 let ops = tokio::task::spawn_blocking(move || { 278 - let iter = db.export_ops(after, limit)?; 279 - iter.collect::<anyhow::Result<Vec<_>>>() 278 + let iter = db.export_ops(after.unwrap_or(Dt::UNIX_EPOCH)..)?; 279 + iter.take(limit).collect::<anyhow::Result<Vec<_>>>() 280 280 }) 281 281 .await 282 282 .map_err(|e| Error::from_string(e.to_string(), StatusCode::INTERNAL_SERVER_ERROR))?
+67 -70
src/plc_fjall.rs
··· 1 + use crate::{BundleSource, Week}; 1 2 use crate::{Dt, ExportPage, Op as CommonOp, PageBoundaryState}; 2 3 use anyhow::Context; 3 4 use data_encoding::{BASE32_NOPAD, BASE64URL_NOPAD}; ··· 5 6 Database, Keyspace, KeyspaceCreateOptions, OwnedWriteBatch, PersistMode, 6 7 config::BlockSizePolicy, 7 8 }; 9 + use futures::Future; 8 10 use serde::{Deserialize, Serialize}; 9 11 use std::collections::BTreeMap; 10 12 use std::fmt; 11 13 use std::path::Path; 12 14 use std::sync::Arc; 13 15 use std::time::Instant; 16 + use tokio::io::{AsyncRead, AsyncWriteExt}; 14 17 use tokio::sync::{mpsc, oneshot}; 15 18 16 19 const SEP: u8 = 0; ··· 1022 1025 1023 1026 pub fn export_ops( 1024 1027 &self, 1025 - after: Option<Dt>, 1026 - limit: usize, 1028 + range: impl std::ops::RangeBounds<Dt>, 1027 1029 ) -> anyhow::Result<impl Iterator<Item = anyhow::Result<Op>> + '_> { 1028 - let iter = if let Some(after) = after { 1029 - let start = (after.timestamp_micros() as u64).to_be_bytes(); 1030 - self.inner.ops.range(start..) 1031 - } else { 1032 - self.inner.ops.iter() 1030 + use std::ops::Bound; 1031 + let map_bound = |b: Bound<&Dt>| -> Bound<[u8; 8]> { 1032 + match b { 1033 + Bound::Included(dt) => Bound::Included(dt.timestamp_micros().to_be_bytes()), 1034 + Bound::Excluded(dt) => Bound::Excluded(dt.timestamp_micros().to_be_bytes()), 1035 + Bound::Unbounded => Bound::Unbounded, 1036 + } 1033 1037 }; 1038 + let range = (map_bound(range.start_bound()), map_bound(range.end_bound())); 1034 1039 1035 - Ok(iter.take(limit).map(|item| { 1040 + let iter = self.inner.ops.range(range); 1041 + 1042 + Ok(iter.map(|item| { 1036 1043 let (key, value) = item 1037 1044 .into_inner() 1038 1045 .map_err(|e| anyhow::anyhow!("fjall read error: {e}"))?; ··· 1060 1067 }) 1061 1068 })) 1062 1069 } 1070 + 1071 + pub fn export_ops_week( 1072 + &self, 1073 + week: Week, 1074 + ) -> anyhow::Result<impl Iterator<Item = anyhow::Result<Op>> + '_> { 1075 + let after: Dt = week.into(); 1076 + let before: Dt = week.next().into(); 1077 + 1078 + self.export_ops(after..before) 1079 + } 1080 + } 1081 + 1082 + impl BundleSource for FjallDb { 1083 + fn reader_for( 1084 + &self, 1085 + week: Week, 1086 + ) -> impl Future<Output = anyhow::Result<impl AsyncRead + Send>> + Send { 1087 + let db = self.clone(); 1088 + 1089 + async move { 1090 + let (mut tx, rx) = tokio::io::duplex(1024 * 1024 * 64); 1091 + 1092 + tokio::task::spawn_blocking(move || -> anyhow::Result<()> { 1093 + let iter = db.export_ops_week(week)?; 1094 + 1095 + let rt = tokio::runtime::Handle::current(); 1096 + 1097 + for op_res in iter { 1098 + let op = op_res?; 1099 + let operation_str = serde_json::to_string(&op.operation)?; 1100 + let common_op = crate::Op { 1101 + did: op.did, 1102 + cid: op.cid, 1103 + created_at: op.created_at, 1104 + nullified: op.nullified, 1105 + operation: serde_json::value::RawValue::from_string(operation_str)?, 1106 + }; 1107 + 1108 + let mut json_bytes = serde_json::to_vec(&common_op)?; 1109 + json_bytes.push(b'\n'); 1110 + 1111 + if rt.block_on(tx.write_all(&json_bytes)).is_err() { 1112 + break; 1113 + } 1114 + } 1115 + 1116 + Ok(()) 1117 + }); 1118 + 1119 + Ok(rx) 1120 + } 1121 + } 1063 1122 } 1064 1123 1065 1124 pub async fn backfill_to_fjall( ··· 1149 1208 t0.elapsed() 1150 1209 ); 1151 1210 Ok("pages_to_fjall") 1152 - } 1153 - 1154 - pub async fn fjall_to_pages( 1155 - db: FjallDb, 1156 - dest: mpsc::Sender<ExportPage>, 1157 - until: Option<Dt>, 1158 - ) -> anyhow::Result<&'static str> { 1159 - log::info!("starting fjall_to_pages backfill source..."); 1160 - 1161 - let t0 = Instant::now(); 1162 - 1163 - let dest_clone = dest.clone(); 1164 - let ops_sent = tokio::task::spawn_blocking(move || -> anyhow::Result<usize> { 1165 - let iter = db.export_ops(None, usize::MAX)?; 1166 - let mut current_page = Vec::with_capacity(1000); 1167 - let mut count = 0; 1168 - 1169 - for op_res in iter { 1170 - let op = op_res?; 1171 - 1172 - if let Some(u) = until { 1173 - if op.created_at >= u { 1174 - break; 1175 - } 1176 - } 1177 - 1178 - let operation_str = serde_json::to_string(&op.operation)?; 1179 - let common_op = crate::Op { 1180 - did: op.did, 1181 - cid: op.cid, 1182 - created_at: op.created_at, 1183 - nullified: op.nullified, 1184 - operation: serde_json::value::RawValue::from_string(operation_str)?, 1185 - }; 1186 - 1187 - current_page.push(common_op); 1188 - count += 1; 1189 - 1190 - if current_page.len() >= 1000 { 1191 - let page = ExportPage { 1192 - ops: std::mem::take(&mut current_page), 1193 - }; 1194 - if dest_clone.blocking_send(page).is_err() { 1195 - break; 1196 - } 1197 - } 1198 - } 1199 - 1200 - if !current_page.is_empty() { 1201 - let page = ExportPage { ops: current_page }; 1202 - let _ = dest_clone.blocking_send(page); 1203 - } 1204 - 1205 - Ok(count) 1206 - }) 1207 - .await??; 1208 - 1209 - log::info!( 1210 - "finished sending {ops_sent} ops from fjall in {:?}", 1211 - t0.elapsed() 1212 - ); 1213 - Ok("fjall_to_pages") 1214 1211 } 1215 1212 1216 1213 #[cfg(test)]
+7 -5
src/weekly.rs
··· 101 101 let file = File::open(path) 102 102 .await 103 103 .inspect_err(|e| log::error!("failed to open file: {e}"))?; 104 - Ok(file) 104 + let decoder = GzipDecoder::new(BufReader::new(file)); 105 + Ok(decoder) 105 106 } 106 107 } 107 108 ··· 112 113 use futures::TryStreamExt; 113 114 let HttpSource(base) = self; 114 115 let url = base.join(&format!("{}.jsonl.gz", week.0))?; 115 - Ok(CLIENT 116 + let stream = CLIENT 116 117 .get(url) 117 118 .send() 118 119 .await? ··· 120 121 .bytes_stream() 121 122 .map_err(futures::io::Error::other) 122 123 .into_async_read() 123 - .compat()) 124 + .compat(); 125 + let decoder = GzipDecoder::new(BufReader::new(stream)); 126 + Ok(decoder) 124 127 } 125 128 } 126 129 ··· 213 216 } 214 217 }; 215 218 216 - let decoder = GzipDecoder::new(BufReader::new(reader)); 217 - let mut chunks = pin!(LinesStream::new(BufReader::new(decoder).lines()).try_chunks(10000)); 219 + let mut chunks = pin!(LinesStream::new(BufReader::new(reader).lines()).try_chunks(10000)); 218 220 let mut success = true; 219 221 220 222 while let Some(chunk) = match chunks.as_mut().try_next().await {