Server tools to backfill, tail, mirror, and verify PLC logs

a test

+93 -73
+76
src/backfill.rs
··· 1 + use crate::ExportPage; 2 + use std::io::Write; 3 + 4 + pub struct PageForwarder<const N: usize> { 5 + newlines: usize, 6 + bytes: Vec<u8>, 7 + dest: flume::Sender<ExportPage>, 8 + } 9 + 10 + impl<const N: usize> PageForwarder<N> { 11 + pub fn new(dest: flume::Sender<ExportPage>) -> Self { 12 + Self { 13 + newlines: 0, 14 + bytes: Vec::new(), 15 + dest, 16 + } 17 + } 18 + fn send_page(&mut self) { 19 + log::info!("sending page!"); 20 + let page_bytes = std::mem::take(&mut self.bytes); 21 + if !page_bytes.is_empty() { 22 + let ops = String::from_utf8(page_bytes) 23 + .unwrap() 24 + .trim() 25 + .replace("}{", "}\n{"); // HACK because oops the exports i made are corrupted 26 + self.dest.send(ExportPage { ops }).unwrap(); 27 + self.newlines = 0; 28 + } 29 + } 30 + } 31 + 32 + impl<const N: usize> Write for PageForwarder<N> { 33 + fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { 34 + let mut buf = buf; 35 + loop { 36 + let newlines_to_next_split = N - 1 - self.newlines; 37 + let Some((i, _)) = buf 38 + .iter() 39 + .enumerate() 40 + .filter(|&(_, &b)| b == b'\n') 41 + .nth(newlines_to_next_split) 42 + else { 43 + // we're left with a partial page 44 + self.bytes.extend_from_slice(buf); 45 + // i guess we need this second pass to update the count 46 + self.newlines += buf.iter().filter(|&&b| b == b'\n').count(); 47 + // could probably do it all in one pass but whatever 48 + break; 49 + }; 50 + // we have one complete page from current bytes + buf[..i] 51 + let (page_rest, rest) = buf.split_at(i); 52 + self.bytes.extend_from_slice(page_rest); 53 + self.send_page(); 54 + buf = rest; 55 + } 56 + 57 + Ok(buf.len()) 58 + } 59 + fn flush(&mut self) -> std::io::Result<()> { 60 + self.send_page(); 61 + Ok(()) 62 + } 63 + } 64 + 65 + #[cfg(test)] 66 + mod test { 67 + use super::*; 68 + 69 + #[test] 70 + fn test_page_forwarder_empty_flush() { 71 + let (tx, rx) = flume::bounded(1); 72 + let mut pf = PageForwarder::<1>::new(tx); 73 + pf.flush().unwrap(); 74 + assert!(rx.is_empty()); 75 + } 76 + }
+13
src/lib.rs
··· 1 + mod backfill; 2 + 3 + pub use backfill::PageForwarder; 4 + 5 + /// One page of PLC export 6 + /// 7 + /// should have maximum length of 1000 lines. 8 + /// A bulk export consumer should chunk ops into pages of max 1000 ops. 9 + /// 10 + /// leading and trailing whitespace should be trimmed. 11 + pub struct ExportPage { 12 + pub ops: String, 13 + }
+4 -73
src/main.rs
··· 5 5 use tokio_postgres::NoTls; 6 6 use url::Url; 7 7 8 + use allegedly::{ExportPage, PageForwarder}; 9 + 8 10 const EXPORT_PAGE_QUEUE_SIZE: usize = 0; // rendezvous for now 9 11 const UPSTREAM_REQUEST_INTERVAL: Duration = Duration::from_millis(500); 10 12 const WEEK_IN_SECONDS: u64 = 7 * 86400; ··· 42 44 postgres: String, 43 45 } 44 46 45 - /// One page of PLC export 46 - /// 47 - /// should have maximum length of 1000 lines. 48 - /// A bulk export consumer should chunk ops into pages of max 1000 ops. 49 - /// 50 - /// leading and trailing whitespace should be trimmed. 51 - struct ExportPage { 52 - pub ops: String, 53 - } 54 - 55 47 #[derive(Deserialize)] 56 48 #[serde(rename_all = "camelCase")] 57 49 struct OpPeek { ··· 69 61 pub operation: &'a serde_json::value::RawValue, 70 62 } 71 63 72 - struct PageForwarder { 73 - newlines: usize, 74 - bytes: Vec<u8>, 75 - dest: flume::Sender<ExportPage>, 76 - } 77 - 78 - impl PageForwarder { 79 - fn new(dest: flume::Sender<ExportPage>) -> Self { 80 - Self { 81 - newlines: 0, 82 - bytes: Vec::new(), 83 - dest, 84 - } 85 - } 86 - fn send_page(&mut self) { 87 - log::info!("sending page!"); 88 - let page_bytes = std::mem::take(&mut self.bytes); 89 - if !page_bytes.is_empty() { 90 - let ops = String::from_utf8(page_bytes) 91 - .unwrap() 92 - .trim() 93 - .replace("}{", "}\n{"); // HACK because oops the exports i made are corrupted 94 - self.dest.send(ExportPage { ops }).unwrap(); 95 - self.newlines = 0; 96 - } 97 - } 98 - } 99 - 100 - impl Write for PageForwarder { 101 - fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { 102 - let mut buf = buf; 103 - loop { 104 - let newlines_to_next_split = 999 - self.newlines; 105 - let Some((i, _)) = buf 106 - .iter() 107 - .enumerate() 108 - .filter(|&(_, &b)| b == b'\n') 109 - .nth(newlines_to_next_split) 110 - else { 111 - // we're left with a partial page 112 - self.bytes.extend_from_slice(buf); 113 - // i guess we need this second pass to update the count 114 - self.newlines += buf.iter().filter(|&&b| b == b'\n').count(); 115 - // could probably do it all in one pass but whatever 116 - break; 117 - }; 118 - // we have one complete page from current bytes + buf[..i] 119 - let (page_rest, rest) = buf.split_at(i); 120 - self.bytes.extend_from_slice(page_rest); 121 - self.send_page(); 122 - buf = rest; 123 - } 124 - 125 - Ok(buf.len()) 126 - } 127 - fn flush(&mut self) -> std::io::Result<()> { 128 - self.send_page(); 129 - Ok(()) 130 - } 131 - } 132 - 133 64 async fn bulk_backfill( 134 65 client: reqwest::Client, 135 66 (upstream, epoch): (Url, u64), ··· 153 84 .error_for_status() 154 85 .unwrap(); 155 86 156 - let mut sink = PageForwarder::new(tx.clone()); 87 + let mut sink = PageForwarder::<1000>::new(tx.clone()); 157 88 let mut decoder = flate2::write::GzDecoder::new(&mut sink); 158 89 159 90 while let Some(chunk) = gzipped_chunks.chunk().await.unwrap() { ··· 343 274 344 275 log::info!("connected! latest: {latest:?}"); 345 276 346 - let (tx, rx) = flume::bounded::<ExportPage>(EXPORT_PAGE_QUEUE_SIZE); 277 + let (tx, rx) = flume::bounded(EXPORT_PAGE_QUEUE_SIZE); 347 278 348 279 let export_task = tokio::task::spawn(export_upstream( 349 280 args.upstream,