Fast and robust atproto CAR file processing in rust

bench it

actually can't really beat multihash

oh well

+50 -13
+4
Cargo.toml
··· 48 48 # [[bench]] 49 49 # name = "leading" 50 50 # harness = false 51 + 52 + [[bench]] 53 + name = "cid-check" 54 + harness = false
+39
benches/cid-check.rs
··· 1 + use criterion::{Criterion, criterion_group, criterion_main}; 2 + use multihash_codetable::{Code, MultihashDigest}; 3 + use cid::Cid; 4 + use sha2::{Digest, Sha256}; 5 + 6 + fn multihash_verify(given: Cid, block: &[u8]) -> bool { 7 + let calculated = Cid::new_v1(0x71, Code::Sha2_256.digest(block)); 8 + calculated == given 9 + } 10 + 11 + fn effortful_verify(given: Cid, block: &[u8]) -> bool { 12 + // we know we're in atproto, so we can make a few assumptions 13 + if given.version() != cid::Version::V1 { 14 + return false; 15 + } 16 + let (codec, given_digest, _) = given.hash().into_inner(); 17 + if codec != 0x12 { 18 + return false; 19 + } 20 + given_digest[..32] == *Sha256::digest(block) 21 + } 22 + 23 + fn fastloose_verify(given: Cid, block: &[u8]) -> bool { 24 + let (_, given_digest, _) = given.hash().into_inner(); 25 + given_digest[..32] == *Sha256::digest(block) 26 + } 27 + 28 + pub fn criterion_benchmark(c: &mut Criterion) { 29 + let some_bytes: Vec<u8> = vec![0x1a, 0x00, 0xAA, 0x39, 0x8C].repeat(100); 30 + let cid = Cid::new_v1(0x71, Code::Sha2_256.digest(&some_bytes)); 31 + 32 + let mut g = c.benchmark_group("CID check"); 33 + g.bench_function("multihash", |b| b.iter(|| multihash_verify(cid, &some_bytes))); 34 + g.bench_function("effortful", |b| b.iter(|| effortful_verify(cid, &some_bytes))); 35 + g.bench_function("fastloose", |b| b.iter(|| fastloose_verify(cid, &some_bytes))); 36 + } 37 + 38 + criterion_group!(benches, criterion_benchmark); 39 + criterion_main!(benches);
+7 -13
src/drive.rs
··· 1 1 //! Consume a CAR from an AsyncRead, producing an ordered stream of records 2 2 3 + use multihash_codetable::{MultihashDigest, Code}; 3 4 use crate::{ 4 5 Bytes, HashMap, 5 6 disk::{DiskError, DiskStore}, ··· 10 11 use iroh_car::CarReader; 11 12 use std::convert::Infallible; 12 13 use tokio::{io::AsyncRead, sync::mpsc}; 13 - use sha2::{Digest, Sha256}; 14 14 15 15 use crate::mst::Commit; 16 16 use crate::walk::{WalkError, Walker}; ··· 122 122 #[inline] 123 123 pub fn noop(block: Bytes) -> Bytes { 124 124 block 125 + } 126 + 127 + // iroh-car doesn't verify CIDs!!!!!! 128 + #[inline(always)] 129 + fn verify_block(given: Cid, block: &[u8]) -> bool { 130 + Cid::new_v1(0x71, Code::Sha2_256.digest(block)) == given 125 131 } 126 132 127 133 /// Builder-style driver setup ··· 302 308 max_size: usize, 303 309 mem_blocks: HashMap<Cid, MaybeProcessedBlock>, 304 310 pub commit: Option<Commit>, 305 - } 306 - 307 - fn verify_block(given: Cid, block: &[u8]) -> bool { 308 - // we know we're in atproto, so we can make a few assumptions 309 - if given.version() != cid::Version::V1 { 310 - return false; 311 - } 312 - let (codec, given_digest, _) = given.hash().into_inner(); 313 - if codec != 0x12 { 314 - return false; 315 - } 316 - given_digest[..32] == *Sha256::digest(block) 317 311 } 318 312 319 313 impl<R: AsyncRead + Unpin> NeedDisk<R> {