···1+use crate::ExportPage;
2+use std::io::Write;
3+4+pub struct PageForwarder<const N: usize> {
5+ newlines: usize,
6+ bytes: Vec<u8>,
7+ dest: flume::Sender<ExportPage>,
8+}
9+10+impl<const N: usize> PageForwarder<N> {
11+ pub fn new(dest: flume::Sender<ExportPage>) -> Self {
12+ Self {
13+ newlines: 0,
14+ bytes: Vec::new(),
15+ dest,
16+ }
17+ }
18+ fn send_page(&mut self) {
19+ log::info!("sending page!");
20+ let page_bytes = std::mem::take(&mut self.bytes);
21+ if !page_bytes.is_empty() {
22+ let ops = String::from_utf8(page_bytes)
23+ .unwrap()
24+ .trim()
25+ .replace("}{", "}\n{"); // HACK because oops the exports i made are corrupted
26+ self.dest.send(ExportPage { ops }).unwrap();
27+ self.newlines = 0;
28+ }
29+ }
30+}
31+32+impl<const N: usize> Write for PageForwarder<N> {
33+ fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
34+ let mut buf = buf;
35+ loop {
36+ let newlines_to_next_split = N - 1 - self.newlines;
37+ let Some((i, _)) = buf
38+ .iter()
39+ .enumerate()
40+ .filter(|&(_, &b)| b == b'\n')
41+ .nth(newlines_to_next_split)
42+ else {
43+ // we're left with a partial page
44+ self.bytes.extend_from_slice(buf);
45+ // i guess we need this second pass to update the count
46+ self.newlines += buf.iter().filter(|&&b| b == b'\n').count();
47+ // could probably do it all in one pass but whatever
48+ break;
49+ };
50+ // we have one complete page from current bytes + buf[..i]
51+ let (page_rest, rest) = buf.split_at(i);
52+ self.bytes.extend_from_slice(page_rest);
53+ self.send_page();
54+ buf = rest;
55+ }
56+57+ Ok(buf.len())
58+ }
59+ fn flush(&mut self) -> std::io::Result<()> {
60+ self.send_page();
61+ Ok(())
62+ }
63+}
64+65+#[cfg(test)]
66+mod test {
67+ use super::*;
68+69+ #[test]
70+ fn test_page_forwarder_empty_flush() {
71+ let (tx, rx) = flume::bounded(1);
72+ let mut pf = PageForwarder::<1>::new(tx);
73+ pf.flush().unwrap();
74+ assert!(rx.is_empty());
75+ }
76+}
+13
src/lib.rs
···0000000000000
···1+mod backfill;
2+3+pub use backfill::PageForwarder;
4+5+/// One page of PLC export
6+///
7+/// should have maximum length of 1000 lines.
8+/// A bulk export consumer should chunk ops into pages of max 1000 ops.
9+///
10+/// leading and trailing whitespace should be trimmed.
11+pub struct ExportPage {
12+ pub ops: String,
13+}
+4-73
src/main.rs
···5use tokio_postgres::NoTls;
6use url::Url;
7008const EXPORT_PAGE_QUEUE_SIZE: usize = 0; // rendezvous for now
9const UPSTREAM_REQUEST_INTERVAL: Duration = Duration::from_millis(500);
10const WEEK_IN_SECONDS: u64 = 7 * 86400;
···42 postgres: String,
43}
4445-/// One page of PLC export
46-///
47-/// should have maximum length of 1000 lines.
48-/// A bulk export consumer should chunk ops into pages of max 1000 ops.
49-///
50-/// leading and trailing whitespace should be trimmed.
51-struct ExportPage {
52- pub ops: String,
53-}
54-55#[derive(Deserialize)]
56#[serde(rename_all = "camelCase")]
57struct OpPeek {
···69 pub operation: &'a serde_json::value::RawValue,
70}
7172-struct PageForwarder {
73- newlines: usize,
74- bytes: Vec<u8>,
75- dest: flume::Sender<ExportPage>,
76-}
77-78-impl PageForwarder {
79- fn new(dest: flume::Sender<ExportPage>) -> Self {
80- Self {
81- newlines: 0,
82- bytes: Vec::new(),
83- dest,
84- }
85- }
86- fn send_page(&mut self) {
87- log::info!("sending page!");
88- let page_bytes = std::mem::take(&mut self.bytes);
89- if !page_bytes.is_empty() {
90- let ops = String::from_utf8(page_bytes)
91- .unwrap()
92- .trim()
93- .replace("}{", "}\n{"); // HACK because oops the exports i made are corrupted
94- self.dest.send(ExportPage { ops }).unwrap();
95- self.newlines = 0;
96- }
97- }
98-}
99-100-impl Write for PageForwarder {
101- fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
102- let mut buf = buf;
103- loop {
104- let newlines_to_next_split = 999 - self.newlines;
105- let Some((i, _)) = buf
106- .iter()
107- .enumerate()
108- .filter(|&(_, &b)| b == b'\n')
109- .nth(newlines_to_next_split)
110- else {
111- // we're left with a partial page
112- self.bytes.extend_from_slice(buf);
113- // i guess we need this second pass to update the count
114- self.newlines += buf.iter().filter(|&&b| b == b'\n').count();
115- // could probably do it all in one pass but whatever
116- break;
117- };
118- // we have one complete page from current bytes + buf[..i]
119- let (page_rest, rest) = buf.split_at(i);
120- self.bytes.extend_from_slice(page_rest);
121- self.send_page();
122- buf = rest;
123- }
124-125- Ok(buf.len())
126- }
127- fn flush(&mut self) -> std::io::Result<()> {
128- self.send_page();
129- Ok(())
130- }
131-}
132-133async fn bulk_backfill(
134 client: reqwest::Client,
135 (upstream, epoch): (Url, u64),
···153 .error_for_status()
154 .unwrap();
155156- let mut sink = PageForwarder::new(tx.clone());
157 let mut decoder = flate2::write::GzDecoder::new(&mut sink);
158159 while let Some(chunk) = gzipped_chunks.chunk().await.unwrap() {
···343344 log::info!("connected! latest: {latest:?}");
345346- let (tx, rx) = flume::bounded::<ExportPage>(EXPORT_PAGE_QUEUE_SIZE);
347348 let export_task = tokio::task::spawn(export_upstream(
349 args.upstream,