Fast and robust atproto CAR file processing in rust

wonder if this works

+47 -4
+2 -2
src/mst.rs
··· 55 55 pub things: Vec<NodeThing>, 56 56 } 57 57 58 - #[derive(Debug)] 58 + #[derive(Debug, Clone)] 59 59 pub(crate) struct NodeThing { 60 60 pub(crate) cid: Cid, 61 61 pub(crate) kind: ThingKind, 62 62 } 63 63 64 - #[derive(Debug)] 64 + #[derive(Debug, Clone)] 65 65 pub(crate) enum ThingKind { 66 66 Tree, 67 67 Value { rkey: Rkey },
+45 -2
src/walk.rs
··· 1 1 //! Depth-first MST traversal 2 2 3 3 use crate::mst::{Depth, MstNode, NodeThing, ThingKind}; 4 - use crate::{Bytes, HashMap, Rkey, disk::DiskStore, drive::MaybeProcessedBlock}; 4 + use crate::{Bytes, HashMap, Rkey, noop, disk::DiskStore, drive::MaybeProcessedBlock}; 5 5 use cid::Cid; 6 6 use std::convert::Infallible; 7 7 ··· 50 50 /// Traverser of an atproto MST 51 51 /// 52 52 /// Walks the tree from left-to-right in depth-first order 53 - #[derive(Debug)] 53 + #[derive(Debug, Clone)] 54 54 pub struct Walker { 55 55 prev_rkey: Rkey, 56 56 root_depth: Depth, ··· 150 150 } 151 151 } 152 152 Ok(Step::End(None)) 153 + } 154 + 155 + pub fn step_to_slice_edge( 156 + &mut self, 157 + blocks: &mut HashMap<Cid, MaybeProcessedBlock>, 158 + ) -> Result<Option<Rkey>, WalkError> { 159 + let mut ant = self.clone(); 160 + let mut ant_prev; 161 + let mut rkey_prev = None; 162 + 163 + loop { 164 + ant_prev = ant.clone(); 165 + ant = ant.clone(); 166 + 167 + let Some(NodeThing { cid, kind }) = ant.next_todo() else { 168 + return Ok(None); 169 + }; 170 + 171 + let maybe_mpb = blocks.get(&cid); 172 + 173 + match (&kind, maybe_mpb) { 174 + (ThingKind::Value { rkey: _ }, Some(_)) => { 175 + // oops we took a step too far 176 + *self = ant_prev; 177 + return Ok(rkey_prev); 178 + } 179 + (ThingKind::Value { rkey }, None) => { 180 + if let Some(p) = rkey_prev && *rkey <= p { 181 + return Err(WalkError::MstError(MstError::RkeyOutOfOrder { 182 + rkey: rkey.clone(), 183 + prev: p, 184 + })); 185 + } 186 + rkey_prev = Some(rkey.clone()); 187 + } 188 + (ThingKind::Tree, Some(mpb)) => { 189 + ant.mpb_step(kind, cid, mpb, noop)?; 190 + } 191 + (ThingKind::Tree, None) => { 192 + return Err(WalkError::MissingBlock(cid)); 193 + } 194 + } 195 + } 153 196 } 154 197 155 198 /// blocking!!!!!!