Streaming Tree ARchive format
at rust-impl 158 lines 4.5 kB view raw
1use anyhow::{Context, Result}; 2use cid::Cid; 3use iroh_car::CarReader; 4use star::{RepoMstNode, StarCommit, StarMstEntry, StarMstNode, StarSerializer, calculate_height}; 5use std::collections::HashMap; 6use std::env; 7 8#[tokio::main] 9async fn main() -> Result<()> { 10 let args: Vec<String> = env::args().collect(); 11 if args.len() != 3 { 12 eprintln!("Usage: car-to-star <input.car> <output.star>"); 13 std::process::exit(1); 14 } 15 16 let car_path = &args[1]; 17 let star_path = &args[2]; 18 19 let reader = tokio::fs::File::open(car_path).await?; 20 let reader = tokio::io::BufReader::new(reader); 21 22 println!("Reading CAR file..."); 23 let mut car = CarReader::new(reader).await?; 24 let roots = car.header().roots(); 25 assert_eq!(roots.len(), 1); 26 27 let commit_cid = *roots.first().expect("a root to be present"); 28 29 let mut blocks: HashMap<Cid, Vec<u8>> = HashMap::new(); 30 31 while let Some((cid, data)) = car.next_block().await? { 32 blocks.insert(cid, data); 33 } 34 println!("Loaded {} blocks.", blocks.len()); 35 36 let output_file = std::fs::File::create(star_path)?; 37 38 let commit_bytes = blocks.get(&commit_cid).context("Commit block not found")?; 39 40 #[derive(serde::Deserialize)] 41 struct RepoCommit { 42 did: String, 43 version: i64, 44 data: Cid, 45 rev: String, 46 prev: Option<Cid>, 47 sig: Option<serde_bytes::ByteBuf>, 48 } 49 50 let repo_commit: RepoCommit = serde_ipld_dagcbor::from_slice(commit_bytes)?; 51 52 let root_bytes = blocks 53 .get(&repo_commit.data) 54 .context("repo data cannot be null")?; 55 let root_node: RepoMstNode = 56 serde_ipld_dagcbor::from_slice(root_bytes).context("root must be an mst node")?; 57 58 let star_data = if root_node.l.is_none() && root_node.e.is_empty() { 59 None 60 } else { 61 Some(repo_commit.data) 62 }; 63 64 let star_commit = StarCommit { 65 did: repo_commit.did, 66 version: repo_commit.version, 67 data: star_data, 68 rev: repo_commit.rev, 69 prev: repo_commit.prev, 70 sig: repo_commit.sig, 71 }; 72 73 let mut serializer = StarSerializer::new(output_file); 74 75 serializer.write_header(&star_commit)?; 76 println!("wrote header. Root: {}", repo_commit.data); 77 78 if let Some(root_cid) = star_commit.data { 79 println!("writing tree..."); 80 let (nodes, records) = write_tree(root_cid, &blocks, &mut serializer)?; 81 println!("wrote {nodes} nodes and {records} records."); 82 } else { 83 println!("empty MST, no tree written."); 84 } 85 86 serializer.finish()?; 87 println!("Done!"); 88 Ok(()) 89} 90 91fn write_tree( 92 node_cid: Cid, 93 blocks: &HashMap<Cid, Vec<u8>>, 94 serializer: &mut StarSerializer<std::fs::File>, 95) -> Result<(usize, usize)> { 96 // println!("writing tree under {node_cid:?}..."); 97 98 let mut nodes_written = 0; 99 let mut records_written = 0; 100 101 let block_bytes = blocks 102 .get(&node_cid) 103 .with_context(|| format!("Missing block {}", node_cid))?; 104 105 let repo_node: RepoMstNode = serde_ipld_dagcbor::from_slice(block_bytes)?; 106 107 let height = if let Some(first_entry) = repo_node.e.first() { 108 calculate_height(&first_entry.k) 109 } else { 110 0 111 }; 112 113 let star_node = StarMstNode { 114 l: repo_node.l, 115 l_archived: repo_node.l.map(|_| true), 116 e: repo_node 117 .e 118 .iter() 119 .map(|e| { 120 let v = if height == 0 { None } else { Some(e.v) }; 121 StarMstEntry { 122 p: e.p, 123 k: e.k.clone(), 124 v, 125 v_archived: Some(true), 126 t: e.t, 127 t_archived: e.t.map(|_| true), 128 } 129 }) 130 .collect(), 131 }; 132 133 serializer.write_node(&star_node)?; 134 nodes_written += 1; 135 136 if let Some(l_cid) = repo_node.l { 137 let (n, r) = write_tree(l_cid, blocks, serializer)?; 138 nodes_written += n; 139 records_written += r; 140 } 141 142 for e in repo_node.e { 143 let record_bytes = blocks 144 .get(&e.v) 145 .with_context(|| format!("Missing record {}", e.v))?; 146 // eprintln!("writing record {:?} (<= {node_cid:?})", e.v); 147 serializer.write_record(record_bytes)?; 148 records_written += 1; 149 150 if let Some(t_cid) = e.t { 151 let (n, r) = write_tree(t_cid, blocks, serializer)?; 152 nodes_written += n; 153 records_written += r; 154 } 155 } 156 157 Ok((nodes_written, records_written)) 158}