Fast and robust atproto CAR file processing in rust

fix example

+24 -11
+7 -1
benches/huge-car.rs
··· 22 22 }); 23 23 } 24 24 25 + #[inline(always)] 26 + fn ser(block: Vec<u8>) -> Vec<u8> { 27 + let s = block.len(); 28 + usize::to_ne_bytes(s).to_vec() 29 + } 30 + 25 31 async fn drive_car(filename: impl AsRef<Path>) -> usize { 26 32 let reader = tokio::fs::File::open(filename).await.unwrap(); 27 33 let reader = tokio::io::BufReader::new(reader); 28 34 29 35 let mut driver = 30 - match Driver::load_car(reader, |block| block.len().to_le_bytes().to_vec(), 1024) 36 + match Driver::load_car(reader, ser, 1024) 31 37 .await 32 38 .unwrap() 33 39 .unwrap()
+7 -1
benches/non-huge-cars.rs
··· 32 32 }); 33 33 } 34 34 35 + #[inline(always)] 36 + fn ser(block: Vec<u8>) -> Vec<u8> { 37 + let s = block.len(); 38 + usize::to_ne_bytes(s).to_vec() 39 + } 40 + 35 41 async fn drive_car(bytes: &[u8]) -> usize { 36 - let mut driver = match Driver::load_car(bytes, |block| block.len().to_le_bytes().to_vec(), 32) 42 + let mut driver = match Driver::load_car(bytes, ser, 32) 37 43 .await 38 44 .unwrap() 39 45 {
+4 -3
examples/read-file/main.rs
··· 24 24 let reader = tokio::io::BufReader::new(reader); 25 25 26 26 let (commit, mut driver) = match DriverBuilder::new() 27 - .with_block_processor(|block| block.len().to_ne_bytes().to_vec().into()) 27 + .with_block_processor(|block| block.len().to_ne_bytes().to_vec()) 28 28 .load_car(reader) 29 29 .await? 30 30 { 31 - Driver::Memory(commit, mem_driver) => (commit, mem_driver), 32 - Driver::Disk(_) => panic!("this example doesn't handle big CARs"), 31 + None => todo!(), 32 + Some(Driver::Memory(commit, mem_driver)) => (commit, mem_driver), 33 + Some(Driver::Disk(_)) => panic!("this example doesn't handle big CARs"), 33 34 }; 34 35 35 36 log::info!("got commit: {commit:?}");
+6 -6
readme.md
··· 86 86 static GLOBAL: MiMalloc = MiMalloc; 87 87 ``` 88 88 89 - - 450MiB CAR file: `1.1s` (-15%) 90 - - 128MiB: `310ms` (-13%) 91 - - 5.0MiB: `6.1ms` (-10%) 92 - - 279KiB: `160us` (-5%) 93 - - 3.4KiB: `5.7us` (-9%) 94 - - empty: `660ns` (-7%) 89 + - 450MiB CAR file: `1.2s` (-8%) 90 + - 128MiB: `300ms` (-14%) 91 + - 5.0MiB: `6.0ms` (-12%) 92 + - 279KiB: `140us` (-21%) 93 + - 3.4KiB: `4.7us` (-10%) 94 + - empty: `640ns` (-4%) 95 95 96 96 processing CARs requires buffering blocks, so it can consume a lot of memory. repo-stream's in-memory driver has minimal memory overhead, but there are two ways to make it work with less mem (you can do either or both!) 97 97