Streaming Tree ARchive format

strict serializer

+276 -14
+1 -1
readme.md
··· 117 117 - `mst node` (DAG-CBOR): object with the following schema 118 118 - `l` (hash link, optional): reference to a subtree at a lower depth containing only keys to the left of this node. if absent, there is no left subtree. 119 119 - `L` (bool, optional): "archived": if `true`, the subtree is contained in this archive. must not be present when `l` is not present. 120 - - `e` (array, required): ordered array of entry objects with length of at least one, each containing: 120 + - `e` (array, required): ordered array of entry objects, each containing: 121 121 - `p` (integer, required): number of bytes shared with the previous entry (TODO [key compression](https://www.ietf.org/archive/id/draft-holmgren-at-repository-00.html#name-mst-node-schema) actually) 122 122 - `k` (byte string, required): key suffix remaining 123 123 - `v` (hash link, optional): reference to the record data for this key.
+3 -1
src/lib.rs
··· 2 2 pub mod parser; 3 3 pub mod ser; 4 4 pub mod types; 5 + pub mod validation; 5 6 6 7 #[cfg(feature = "blocking")] 7 8 pub mod blocking; ··· 11 12 12 13 pub use error::{Result, StarError}; 13 14 pub use parser::StarParser; 14 - pub use ser::StarEncoder; 15 + pub use ser::{StarEncoder, StarSerializer}; 15 16 pub use types::{RepoMstEntry, RepoMstNode, StarCommit, StarItem, StarMstEntry, StarMstNode}; 17 + pub use validation::StarValidator; 16 18 17 19 #[cfg(feature = "blocking")] 18 20 pub use blocking::StarIterator;
-2
src/parser.rs
··· 247 247 let mut key = if e.p as usize <= prev_key_bytes.len() { 248 248 prev_key_bytes[..e.p as usize].to_vec() 249 249 } else { 250 - // If prefix len > prev key len, invalid compression 251 - // Although spec says "shared prefix bytes", usually <= prev.len() 252 250 prev_key_bytes.clone() 253 251 }; 254 252 key.extend_from_slice(&e.k);
+51 -9
src/ser.rs
··· 1 1 use crate::error::Result; 2 2 use crate::types::{StarCommit, StarItem, StarMstNode}; 3 + use crate::validation::StarValidator; 3 4 use std::io::Write; 4 5 5 6 pub struct StarEncoder; ··· 13 14 14 15 pub fn write_header<W: Write>(commit: &StarCommit, dst: &mut W) -> Result<()> { 15 16 dst.write_all(&[0x2A])?; 16 - 17 + 17 18 Self::write_varint(1, dst)?; 18 - 19 + 19 20 let commit_bytes = serde_ipld_dagcbor::to_vec(commit) 20 21 .map_err(|e| crate::error::StarError::Cbor(e.to_string()))?; 21 - 22 + 22 23 Self::write_varint(commit_bytes.len(), dst)?; 23 24 dst.write_all(&commit_bytes)?; 24 - 25 + 25 26 Ok(()) 26 27 } 27 28 28 29 pub fn write_node<W: Write>(node: &StarMstNode, dst: &mut W) -> Result<()> { 29 30 let node_bytes = serde_ipld_dagcbor::to_vec(node) 30 31 .map_err(|e| crate::error::StarError::Cbor(e.to_string()))?; 31 - 32 + 32 33 Self::write_varint(node_bytes.len(), dst)?; 33 34 dst.write_all(&node_bytes)?; 34 - 35 + 35 36 Ok(()) 36 37 } 37 38 38 39 pub fn write_record<W: Write>(record_bytes: &[u8], dst: &mut W) -> Result<()> { 39 40 Self::write_varint(record_bytes.len(), dst)?; 40 41 dst.write_all(record_bytes)?; 41 - 42 + 42 43 Ok(()) 43 44 } 44 45 } ··· 51 52 use bytes::BufMut; 52 53 // BytesMut::writer() returns an impl Write 53 54 let mut writer = dst.writer(); 54 - 55 + 55 56 match item { 56 57 StarItem::Commit(c) => Self::write_header(&c, &mut writer), 57 58 StarItem::Node(n) => Self::write_node(&n, &mut writer), ··· 59 60 if let Some(bytes) = content { 60 61 Self::write_record(&bytes, &mut writer) 61 62 } else { 62 - Err(crate::error::StarError::InvalidState("Cannot serialize record without content".into())) 63 + Err(crate::error::StarError::InvalidState( 64 + "Cannot serialize record without content".into(), 65 + )) 63 66 } 64 67 } 65 68 } 66 69 } 67 70 } 71 + 72 + /// A serializer that enforces strict STAR format compliance. 73 + pub struct StarSerializer<W> { 74 + writer: W, 75 + validator: StarValidator, 76 + } 77 + 78 + impl<W: Write> StarSerializer<W> { 79 + pub fn new(writer: W) -> Self { 80 + Self { 81 + writer, 82 + validator: StarValidator::new(), 83 + } 84 + } 85 + 86 + pub fn write_header(&mut self, commit: &StarCommit) -> Result<()> { 87 + self.validator.accept_header(commit)?; 88 + StarEncoder::write_header(commit, &mut self.writer) 89 + } 90 + 91 + pub fn write_node(&mut self, node: &StarMstNode) -> Result<()> { 92 + self.validator.accept_node(node)?; 93 + StarEncoder::write_node(node, &mut self.writer) 94 + } 95 + 96 + pub fn write_record(&mut self, record_bytes: &[u8]) -> Result<()> { 97 + self.validator.accept_record(record_bytes)?; 98 + StarEncoder::write_record(record_bytes, &mut self.writer) 99 + } 100 + 101 + pub fn finish(self) -> Result<W> { 102 + if !self.validator.is_done() { 103 + return Err(crate::error::StarError::InvalidState( 104 + "Incomplete tree".into(), 105 + )); 106 + } 107 + Ok(self.writer) 108 + } 109 + }
+33 -1
src/tests.rs
··· 1 1 #[cfg(test)] 2 2 mod tests { 3 3 use crate::parser::StarParser; 4 - use crate::ser::StarEncoder; 4 + use crate::ser::{StarEncoder, StarSerializer}; 5 5 use crate::types::{ 6 6 RepoMstEntry, RepoMstNode, StarCommit, StarItem, StarMstEntry, StarMstNode, 7 7 }; ··· 174 174 match result.unwrap_err() { 175 175 crate::error::StarError::VerificationFailed { .. } => {}, 176 176 e => panic!("Expected VerificationFailed, got {:?}", e), 177 + } 178 + } 179 + 180 + #[test] 181 + fn test_strict_serializer() { 182 + // Test that strict serializer enforces constraints 183 + let mut buf = Vec::new(); 184 + let mut serializer = StarSerializer::new(&mut buf); 185 + 186 + // 1. Create invalid node (height 0 but empty) 187 + let invalid_node = StarMstNode { 188 + l: None, 189 + l_archived: None, 190 + e: vec![], 191 + }; 192 + 193 + // 2. Commit pointing to root 194 + let cid = create_test_cid(b"foo"); 195 + let commit = StarCommit { 196 + did: "did".into(), version: 3, data: Some(cid), rev: "1".into(), prev: None, sig: None 197 + }; 198 + 199 + // Header OK 200 + serializer.write_header(&commit).unwrap(); 201 + 202 + // Node Fail 203 + let err = serializer.write_node(&invalid_node).unwrap_err(); 204 + match err { 205 + crate::error::StarError::InvalidState(msg) => { 206 + assert!(msg.contains("Root node must contain entries") || msg.contains("Height 0 cannot be empty")); 207 + }, 208 + e => panic!("Expected InvalidState, got {:?}", e), 177 209 } 178 210 } 179 211 }
+16
src/types.rs
··· 2 2 use cid::Cid; 3 3 use serde::{Deserialize, Serialize}; 4 4 use serde_bytes::ByteBuf; 5 + use sha2::{Digest, Sha256}; 5 6 6 7 // --- STAR Types (Wire Format) --- 7 8 ··· 86 87 e: entries, 87 88 }) 88 89 } 90 + } 91 + 92 + /// Calculates the MST height of a key (number of leading zero bits in SHA256 / 2). 93 + pub fn calculate_height(key: &[u8]) -> u32 { 94 + let digest = Sha256::digest(key); 95 + let mut zeros = 0; 96 + for &byte in digest.iter() { 97 + if byte == 0 { 98 + zeros += 8; 99 + } else { 100 + zeros += byte.leading_zeros(); 101 + break; 102 + } 103 + } 104 + zeros / 2 89 105 } 90 106 91 107 /// A parsed item from the STAR stream
+172
src/validation.rs
··· 1 + use crate::error::{Result, StarError}; 2 + use crate::types::{calculate_height, StarCommit, StarMstNode}; 3 + 4 + #[derive(Debug)] 5 + pub struct StarValidator { 6 + state: State, 7 + } 8 + 9 + #[derive(Debug)] 10 + enum State { 11 + Header, 12 + Body { stack: Vec<Expectation> }, 13 + Done, 14 + } 15 + 16 + #[derive(Debug)] 17 + enum Expectation { 18 + Root, 19 + Node { height: u32 }, 20 + Record, 21 + } 22 + 23 + impl StarValidator { 24 + pub fn new() -> Self { 25 + Self { state: State::Header } 26 + } 27 + 28 + pub fn accept_header(&mut self, commit: &StarCommit) -> Result<()> { 29 + match &self.state { 30 + State::Header => { 31 + let stack = if commit.data.is_some() { 32 + vec![Expectation::Root] 33 + } else { 34 + Vec::new() // Empty tree 35 + }; 36 + self.state = State::Body { stack }; 37 + Ok(()) 38 + }, 39 + _ => Err(StarError::InvalidState("Header already written or invalid state".into())), 40 + } 41 + } 42 + 43 + pub fn accept_node(&mut self, node: &StarMstNode) -> Result<()> { 44 + match &mut self.state { 45 + State::Body { stack } => { 46 + if stack.is_empty() { 47 + return Err(StarError::InvalidState("Unexpected node: tree is complete".into())); 48 + } 49 + 50 + let expectation = stack.pop().unwrap(); 51 + let height = match expectation { 52 + Expectation::Record => { 53 + return Err(StarError::InvalidState("Expected record, got node".into())); 54 + }, 55 + Expectation::Root => { 56 + if node.e.is_empty() { 57 + return Err(StarError::InvalidState("Root node must contain entries".into())); 58 + } 59 + Self::validate_node_height(node, None)? 60 + }, 61 + Expectation::Node { height } => { 62 + Self::validate_node_height(node, Some(height))? 63 + } 64 + }; 65 + 66 + if height == 0 { 67 + if node.l.is_some() || node.l_archived.is_some() { 68 + return Err(StarError::InvalidState("Height 0 node cannot have left child".into())); 69 + } 70 + for e in &node.e { 71 + if e.t.is_some() || e.t_archived.is_some() { 72 + return Err(StarError::InvalidState("Height 0 entries cannot have subtrees".into())); 73 + } 74 + } 75 + } else { 76 + if node.e.is_empty() && node.l.is_none() { 77 + return Err(StarError::InvalidState("Empty intermediate node must have left child".into())); 78 + } 79 + } 80 + 81 + let child_height = if height > 0 { height - 1 } else { 0 }; 82 + 83 + for e in node.e.iter().rev() { 84 + if e.t_archived == Some(true) { 85 + stack.push(Expectation::Node { height: child_height }); 86 + } 87 + 88 + if e.v_archived == Some(true) { 89 + if height == 0 && e.v.is_some() { 90 + return Err(StarError::InvalidState("Height 0 node must omit record CIDs".into())); 91 + } 92 + if height > 0 && e.v.is_none() { 93 + return Err(StarError::InvalidState("Intermediate node must include record CIDs".into())); 94 + } 95 + stack.push(Expectation::Record); 96 + } 97 + } 98 + 99 + if node.l_archived == Some(true) { 100 + stack.push(Expectation::Node { height: child_height }); 101 + } 102 + 103 + Ok(()) 104 + }, 105 + _ => Err(StarError::InvalidState("Invalid state for node".into())), 106 + } 107 + } 108 + 109 + pub fn accept_record(&mut self, _bytes: &[u8]) -> Result<()> { 110 + match &mut self.state { 111 + State::Body { stack } => { 112 + if stack.is_empty() { 113 + return Err(StarError::InvalidState("Unexpected record: tree is complete".into())); 114 + } 115 + match stack.pop().unwrap() { 116 + Expectation::Record => Ok(()), 117 + _ => Err(StarError::InvalidState("Expected node, got record".into())), 118 + } 119 + }, 120 + _ => Err(StarError::InvalidState("Invalid state for record".into())), 121 + } 122 + } 123 + 124 + fn validate_node_height(node: &StarMstNode, expected: Option<u32>) -> Result<u32> { 125 + let mut node_height = None; 126 + let mut prev_key_bytes = Vec::new(); 127 + 128 + for e in &node.e { 129 + let mut key = if e.p as usize <= prev_key_bytes.len() { 130 + prev_key_bytes[..e.p as usize].to_vec() 131 + } else { 132 + prev_key_bytes.clone() 133 + }; 134 + key.extend_from_slice(&e.k); 135 + 136 + let h = calculate_height(&key); 137 + 138 + if let Some(existing) = node_height { 139 + if h != existing { 140 + return Err(StarError::InvalidState(format!("Inconsistent key height in node: {} vs {}", h, existing))); 141 + } 142 + } else { 143 + node_height = Some(h); 144 + } 145 + prev_key_bytes = key; 146 + } 147 + 148 + let height = match (node_height, expected) { 149 + (Some(h), Some(exp)) => { 150 + if h != exp { 151 + return Err(StarError::InvalidState(format!("Height mismatch: found {}, expected {}", h, exp))); 152 + } 153 + h 154 + }, 155 + (Some(h), None) => h, 156 + (None, Some(exp)) => { 157 + if exp == 0 { return Err(StarError::InvalidState("Height 0 cannot be empty".into())); } 158 + exp 159 + }, 160 + (None, None) => return Err(StarError::InvalidState("Root cannot be empty".into())), 161 + }; 162 + Ok(height) 163 + } 164 + 165 + pub fn is_done(&self) -> bool { 166 + match &self.state { 167 + State::Body { stack } => stack.is_empty(), 168 + State::Done => true, // Not reachable with current logic but semantically true 169 + _ => false, 170 + } 171 + } 172 + }