···192192 }
193193}
194194195195+/// Decode unsigned varint from bytes
196196+///
197197+/// Returns `Some((value, bytes_read))` on success, `None` on invalid input.
198198+/// Used for decoding multicodec prefixes in multibase-encoded keys.
195199pub fn decode_uvarint(data: &[u8]) -> Option<(u64, usize)> {
196200 let mut x: u64 = 0;
197201 let mut s: u32 = 0;
···208212 None
209213}
210214215215+/// Encode unsigned varint to bytes
216216+///
217217+/// Encodes a u64 value as a multicodec-style varint.
218218+/// Used for encoding multicodec prefixes in multibase-encoded keys.
211219pub fn encode_uvarint(mut x: u64) -> Vec<u8> {
212220 let mut out = Vec::new();
213221 while x >= 0x80 {
···218226 out
219227}
220228229229+/// Encode public key as multibase multikey string
230230+///
231231+/// Creates a multikey string with the given multicodec code and key bytes.
232232+/// Returns base58btc-encoded string with varint prefix.
221233pub fn multikey(code: u64, key: &[u8]) -> String {
222234 let mut buf = encode_uvarint(code);
223235 buf.extend_from_slice(key);
+25
crates/jacquard-common/src/types/recordkey.rs
···3232#[repr(transparent)]
3333pub struct RecordKey<T: RecordKeyType>(pub T);
34343535+impl<'a> RecordKey<Rkey<'a>> {
3636+ /// Create a new `RecordKey` from a string slice
3737+ pub fn any(str: &'a str) -> Result<Self, AtStrError> {
3838+ Ok(RecordKey(Rkey::new(str)?))
3939+ }
4040+4141+ /// Create a new `RecordKey` from a CowStr
4242+ pub fn any_cow(str: CowStr<'a>) -> Result<Self, AtStrError> {
4343+ Ok(RecordKey(Rkey::new_cow(str)?))
4444+ }
4545+4646+ /// Create a new `RecordKey` from a static string slice
4747+ pub fn any_static(str: &'static str) -> Result<Self, AtStrError> {
4848+ Ok(RecordKey(Rkey::new_static(str)?))
4949+ }
5050+}
5151+3552impl<T> From<T> for RecordKey<Rkey<'_>>
3653where
3754 T: RecordKeyType,
3855{
3956 fn from(value: T) -> Self {
4057 RecordKey(Rkey::from_str(value.as_str()).expect("Invalid rkey"))
5858+ }
5959+}
6060+6161+impl FromStr for RecordKey<Rkey<'_>> {
6262+ type Err = AtStrError;
6363+6464+ fn from_str(s: &str) -> Result<Self, Self::Err> {
6565+ Ok(RecordKey(Rkey::from_str(s)?))
4166 }
4267}
4368
+1-1
crates/jacquard-repo/src/car/writer.rs
···46464747/// Write blocks to CAR bytes (in-memory)
4848///
4949-/// Like `write_car()` but writes to a Vec<u8> instead of a file.
4949+/// Like `write_car()` but writes to a `Vec<u8>` instead of a file.
5050/// Useful for tests and proof generation.
5151pub async fn write_car_bytes(
5252 root: IpldCid,
+320
crates/jacquard-repo/src/mst/cursor.rs
···11+//! MST cursor for efficient tree traversal
22+33+use super::node::NodeEntry;
44+use super::tree::Mst;
55+use crate::error::Result;
66+use crate::storage::BlockStore;
77+use cid::Cid as IpldCid;
88+use smol_str::SmolStr;
99+1010+/// Position within an MST traversal
1111+#[derive(Debug, Clone)]
1212+pub enum CursorPosition<S: BlockStore> {
1313+ /// Pointing at a leaf entry
1414+ Leaf {
1515+ /// Leaf key
1616+ key: SmolStr,
1717+ /// Leaf CID (record value)
1818+ cid: IpldCid,
1919+ },
2020+2121+ /// Pointing at a tree (subtree root)
2222+ Tree {
2323+ /// Subtree MST
2424+ mst: Mst<S>,
2525+ },
2626+2727+ /// Past the end of traversal
2828+ End,
2929+}
3030+3131+/// Cursor for navigating an MST in sorted order
3232+///
3333+/// Maintains a position in the tree and supports efficient navigation:
3434+/// - `advance()`: Move to next item in sorted order
3535+/// - `skip_subtree()`: Skip entire subtree at current position
3636+/// - `current()`: Get current position without moving
3737+///
3838+/// # Example traversal
3939+///
4040+/// ```ignore
4141+/// let mut cursor = MstCursor::new(tree);
4242+///
4343+/// while !cursor.is_end() {
4444+/// match cursor.current() {
4545+/// CursorPosition::Leaf { key, cid } => {
4646+/// println!("Leaf: {} -> {}", key, cid);
4747+/// }
4848+/// CursorPosition::Tree { mst } => {
4949+/// println!("Subtree at layer {}", mst.layer);
5050+/// }
5151+/// CursorPosition::End => break,
5252+/// }
5353+/// cursor.advance().await?;
5454+/// }
5555+/// ```
5656+#[derive(Debug, Clone)]
5757+pub struct MstCursor<S: BlockStore> {
5858+ /// Stack of (node, entries, index) pairs tracking path from root
5959+ ///
6060+ /// Each entry represents a level we've descended into:
6161+ /// - `node`: The MST node at this level
6262+ /// - `entries`: Cached entries for this node
6363+ /// - `index`: Current position within entries
6464+ path: Vec<(Mst<S>, Vec<NodeEntry<S>>, usize)>,
6565+6666+ /// Current position in traversal
6767+ current: CursorPosition<S>,
6868+}
6969+7070+impl<S: BlockStore + Sync + 'static> MstCursor<S> {
7171+ /// Create new cursor at the start of a tree
7272+ ///
7373+ /// Initial position is the root of the tree (which is a Tree position).
7474+ /// Call `advance()` to move to the first leaf.
7575+ pub fn new(root: Mst<S>) -> Self {
7676+ Self {
7777+ path: Vec::new(),
7878+ current: CursorPosition::Tree { mst: root },
7979+ }
8080+ }
8181+8282+ /// Get current position without advancing
8383+ pub fn current(&self) -> &CursorPosition<S> {
8484+ &self.current
8585+ }
8686+8787+ /// Check if cursor is at end
8888+ pub fn is_end(&self) -> bool {
8989+ matches!(self.current, CursorPosition::End)
9090+ }
9191+9292+ /// Get key at current position (if pointing at leaf)
9393+ pub fn key(&self) -> Option<&str> {
9494+ match &self.current {
9595+ CursorPosition::Leaf { key, .. } => Some(key.as_str()),
9696+ _ => None,
9797+ }
9898+ }
9999+100100+ /// Get the layer we're currently walking at
101101+ ///
102102+ /// Returns the layer of the node we're traversing within.
103103+ /// If at the root level (before stepping in), returns root's layer + 1.
104104+ pub async fn layer(&self) -> Result<usize> {
105105+ if let Some((walking_node, _, _)) = self.path.last() {
106106+ // We're inside a node - return its layer
107107+ walking_node.get_layer().await
108108+ } else {
109109+ // At root level (not yet stepped in) - return root's layer + 1
110110+ // This matches rsky's walker behavior: being "at" the root (before entering)
111111+ // is one layer higher than being "inside" the root
112112+ match &self.current {
113113+ CursorPosition::Tree { mst } => {
114114+ let root_layer = mst.get_layer().await?;
115115+ Ok(root_layer + 1)
116116+ }
117117+ CursorPosition::End => Ok(0),
118118+ CursorPosition::Leaf { .. } => {
119119+ // Shouldn't happen - root can't be a leaf
120120+ Ok(0)
121121+ }
122122+ }
123123+ }
124124+ }
125125+126126+ /// Advance to next position in sorted order
127127+ ///
128128+ /// - If at Leaf: move to next sibling or pop up
129129+ /// - If at Tree: descend into it (step into first entry)
130130+ /// - If at End: stay at End
131131+ pub async fn advance(&mut self) -> Result<()> {
132132+ match &self.current {
133133+ CursorPosition::End => Ok(()),
134134+135135+ CursorPosition::Leaf { .. } => {
136136+ // Move to next sibling
137137+ self.step_over().await
138138+ }
139139+140140+ CursorPosition::Tree { mst } => {
141141+ // Descend into tree
142142+ self.step_into(mst.clone()).await
143143+ }
144144+ }
145145+ }
146146+147147+ /// Skip entire subtree at current position
148148+ ///
149149+ /// If pointing at a Tree, skips all its contents.
150150+ /// If pointing at a Leaf, equivalent to `advance()`.
151151+ pub async fn skip_subtree(&mut self) -> Result<()> {
152152+ self.step_over().await
153153+ }
154154+155155+ /// Move to next sibling or pop up
156156+ fn step_over<'a>(
157157+ &'a mut self,
158158+ ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + Send + 'a>> {
159159+ Box::pin(async move {
160160+ if let Some((_node, entries, index)) = self.path.last_mut() {
161161+ // Try to move to next entry in current node
162162+ *index += 1;
163163+164164+ if *index < entries.len() {
165165+ // Move to next entry
166166+ self.current = match &entries[*index] {
167167+ NodeEntry::Leaf { key, value } => CursorPosition::Leaf {
168168+ key: key.clone(),
169169+ cid: *value,
170170+ },
171171+ NodeEntry::Tree(tree) => CursorPosition::Tree { mst: tree.clone() },
172172+ };
173173+ Ok(())
174174+ } else {
175175+ // No more entries at this level - pop up
176176+ self.path.pop();
177177+ self.step_over().await
178178+ }
179179+ } else {
180180+ // No parent - we're done
181181+ self.current = CursorPosition::End;
182182+ Ok(())
183183+ }
184184+ })
185185+ }
186186+187187+ /// Descend into a tree node
188188+ async fn step_into(&mut self, mst: Mst<S>) -> Result<()> {
189189+ let entries = mst.get_entries().await?;
190190+191191+ if entries.is_empty() {
192192+ // Empty tree - skip it
193193+ self.step_over().await
194194+ } else {
195195+ // Push current level onto stack and move to first entry
196196+ self.path.push((mst, entries.clone(), 0));
197197+198198+ self.current = match &entries[0] {
199199+ NodeEntry::Leaf { key, value } => CursorPosition::Leaf {
200200+ key: key.clone(),
201201+ cid: *value,
202202+ },
203203+ NodeEntry::Tree(tree) => CursorPosition::Tree { mst: tree.clone() },
204204+ };
205205+206206+ Ok(())
207207+ }
208208+ }
209209+}
210210+211211+#[cfg(test)]
212212+mod tests {
213213+ use super::*;
214214+ use crate::DAG_CBOR_CID_CODEC;
215215+ use crate::mst::tree::Mst;
216216+ use crate::storage::memory::MemoryBlockStore;
217217+ use jacquard_common::types::crypto::SHA2_256;
218218+ use std::sync::Arc;
219219+220220+ fn test_cid(n: u8) -> IpldCid {
221221+ let data = vec![n; 32];
222222+ let mh = multihash::Multihash::wrap(SHA2_256, &data).unwrap();
223223+ IpldCid::new_v1(DAG_CBOR_CID_CODEC, mh)
224224+ }
225225+226226+ #[tokio::test]
227227+ async fn test_cursor_empty_tree() {
228228+ let storage = Arc::new(MemoryBlockStore::new());
229229+ let tree = Mst::new(storage);
230230+231231+ let mut cursor = MstCursor::new(tree);
232232+233233+ // Should start at root (Tree position)
234234+ assert!(matches!(cursor.current(), CursorPosition::Tree { .. }));
235235+236236+ // Advance into empty tree should reach end
237237+ cursor.advance().await.unwrap();
238238+ assert!(cursor.is_end());
239239+ }
240240+241241+ #[tokio::test]
242242+ async fn test_cursor_single_leaf() {
243243+ let storage = Arc::new(MemoryBlockStore::new());
244244+ let tree = Mst::new(storage);
245245+ let tree = tree.add("key1", test_cid(1)).await.unwrap();
246246+247247+ let mut cursor = MstCursor::new(tree);
248248+249249+ // Start at root
250250+ assert!(matches!(cursor.current(), CursorPosition::Tree { .. }));
251251+252252+ // Advance to first leaf
253253+ cursor.advance().await.unwrap();
254254+ assert_eq!(cursor.key(), Some("key1"));
255255+256256+ // Advance past last leaf
257257+ cursor.advance().await.unwrap();
258258+ assert!(cursor.is_end());
259259+ }
260260+261261+ #[tokio::test]
262262+ async fn test_cursor_multiple_leaves() {
263263+ let storage = Arc::new(MemoryBlockStore::new());
264264+ let tree = Mst::new(storage);
265265+ let tree = tree.add("a", test_cid(1)).await.unwrap();
266266+ let tree = tree.add("b", test_cid(2)).await.unwrap();
267267+ let tree = tree.add("c", test_cid(3)).await.unwrap();
268268+269269+ let mut cursor = MstCursor::new(tree);
270270+271271+ let mut keys = Vec::new();
272272+273273+ // Skip root
274274+ cursor.advance().await.unwrap();
275275+276276+ while !cursor.is_end() {
277277+ if let Some(key) = cursor.key() {
278278+ keys.push(key.to_string());
279279+ }
280280+ cursor.advance().await.unwrap();
281281+ }
282282+283283+ assert_eq!(keys, vec!["a", "b", "c"]);
284284+ }
285285+286286+ #[tokio::test]
287287+ async fn test_cursor_skip_subtree() {
288288+ let storage = Arc::new(MemoryBlockStore::new());
289289+ let tree = Mst::new(storage);
290290+291291+ // Add enough keys to create subtrees
292292+ let tree = tree.add("a", test_cid(1)).await.unwrap();
293293+ let tree = tree.add("b", test_cid(2)).await.unwrap();
294294+ let tree = tree.add("c", test_cid(3)).await.unwrap();
295295+296296+ let mut cursor = MstCursor::new(tree);
297297+298298+ // Advance to first position
299299+ cursor.advance().await.unwrap();
300300+301301+ // If we hit a tree, skip it
302302+ let mut leaf_count = 0;
303303+ while !cursor.is_end() {
304304+ match cursor.current() {
305305+ CursorPosition::Leaf { .. } => {
306306+ leaf_count += 1;
307307+ cursor.advance().await.unwrap();
308308+ }
309309+ CursorPosition::Tree { .. } => {
310310+ // Skip entire subtree
311311+ cursor.skip_subtree().await.unwrap();
312312+ }
313313+ CursorPosition::End => break,
314314+ }
315315+ }
316316+317317+ // We should have encountered some leaves
318318+ assert!(leaf_count > 0);
319319+ }
320320+}
+345-32
crates/jacquard-repo/src/mst/diff.rs
···11//! MST diff calculation
2233+use std::collections::BTreeMap;
44+55+use super::cursor::{CursorPosition, MstCursor};
36use super::tree::Mst;
47use crate::error::Result;
58use crate::storage::BlockStore;
99+use bytes::Bytes;
610use cid::Cid as IpldCid;
711use smol_str::SmolStr;
88-use std::collections::HashMap;
9121013/// Diff between two MST states
1114///
···21242225 /// Records deleted (key, old CID)
2326 pub deletes: Vec<(SmolStr, IpldCid)>,
2727+2828+ /// Record CIDs that are newly referenced (from creates + updates)
2929+ ///
3030+ /// This includes:
3131+ /// - CIDs from created records
3232+ /// - New CIDs from updated records
3333+ ///
3434+ /// These need to be available in storage for the new tree.
3535+ pub new_leaf_cids: Vec<IpldCid>,
3636+3737+ /// Record CIDs that are no longer referenced (from deletes + updates)
3838+ ///
3939+ /// This includes:
4040+ /// - CIDs from deleted records
4141+ /// - Old CIDs from updated records
4242+ ///
4343+ /// These can be garbage collected if not referenced elsewhere.
4444+ pub removed_cids: Vec<IpldCid>,
4545+4646+ /// MST node blocks that are newly created
4747+ ///
4848+ /// When modifying a tree, new MST nodes are created along changed paths.
4949+ /// This tracks those nodes for persistence/commit inclusion.
5050+ pub new_mst_blocks: BTreeMap<IpldCid, Bytes>,
5151+5252+ /// MST node blocks that are no longer needed
5353+ ///
5454+ /// When modifying a tree, old MST nodes along changed paths become unreachable.
5555+ /// This tracks those nodes for garbage collection.
5656+ pub removed_mst_blocks: Vec<IpldCid>,
2457}
25582659use super::tree::VerifiedWriteOp;
···3265 creates: Vec::new(),
3366 updates: Vec::new(),
3467 deletes: Vec::new(),
6868+ new_leaf_cids: Vec::new(),
6969+ removed_cids: Vec::new(),
7070+ new_mst_blocks: BTreeMap::new(),
7171+ removed_mst_blocks: Vec::new(),
3572 }
3673 }
3774···94131 ops
95132 }
96133134134+ /// Fetch new record data blocks from storage
135135+ ///
136136+ /// Returns a map of CID → bytes for all new record data (creates + updates).
137137+ /// This is useful for including record data in commits and firehose messages.
138138+ pub async fn fetch_new_blocks<S: BlockStore>(
139139+ &self,
140140+ storage: &S,
141141+ ) -> Result<std::collections::BTreeMap<IpldCid, bytes::Bytes>> {
142142+ use std::collections::BTreeMap;
143143+144144+ let mut blocks = BTreeMap::new();
145145+146146+ for cid in &self.new_leaf_cids {
147147+ if let Some(block) = storage.get(cid).await? {
148148+ blocks.insert(*cid, block);
149149+ }
150150+ }
151151+152152+ Ok(blocks)
153153+ }
154154+97155 /// Convert diff to firehose repository operations
98156 ///
99157 /// Returns operations in the format used by `com.atproto.sync.subscribeRepos`.
···150208 /// - Creates: keys in `other` but not in `self`
151209 /// - Updates: keys in both but with different CIDs
152210 /// - Deletes: keys in `self` but not in `other`
211211+ ///
212212+ /// Uses an efficient walker-based algorithm that only visits changed subtrees.
213213+ /// When two subtrees have the same CID, the entire subtree is skipped.
153214 pub async fn diff(&self, other: &Mst<S>) -> Result<MstDiff> {
154154- // Collect all leaves from both trees
155155- let self_leaves = self.leaves().await?;
156156- let other_leaves = other.leaves().await?;
215215+ let mut diff = MstDiff::new();
216216+ diff_recursive(self, other, &mut diff).await?;
217217+ Ok(diff)
218218+ }
219219+}
220220+221221+/// Recursively diff two MST nodes using cursors
222222+fn diff_recursive<'a, S: BlockStore + Sync + 'static>(
223223+ old: &'a Mst<S>,
224224+ new: &'a Mst<S>,
225225+ diff: &'a mut MstDiff,
226226+) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + Send + 'a>> {
227227+ Box::pin(async move {
228228+ // If CIDs are equal, trees are identical - skip entire subtree
229229+ let old_cid = old.get_pointer().await?;
230230+ let new_cid = new.get_pointer().await?;
231231+ if old_cid == new_cid {
232232+ return Ok(());
233233+ }
234234+235235+ // CIDs differ - use cursors to walk both trees
236236+ let mut old_cursor = MstCursor::new(old.clone());
237237+ let mut new_cursor = MstCursor::new(new.clone());
238238+239239+ // Don't advance yet - let loop handle roots like any other tree comparison
240240+ loop {
241241+ match (old_cursor.current(), new_cursor.current()) {
242242+ (CursorPosition::End, CursorPosition::End) => break,
243243+244244+ // Only new entries remain - all adds
245245+ (CursorPosition::End, CursorPosition::Leaf { key, cid }) => {
246246+ diff.creates.push((key.clone(), *cid));
247247+ diff.new_leaf_cids.push(*cid);
248248+ new_cursor.advance().await?;
249249+ }
250250+ (CursorPosition::End, CursorPosition::Tree { mst }) => {
251251+ track_added_tree(mst, diff).await?;
252252+ new_cursor.skip_subtree().await?;
253253+ }
254254+255255+ // Only old entries remain - all deletes
256256+ (CursorPosition::Leaf { key, cid }, CursorPosition::End) => {
257257+ diff.deletes.push((key.clone(), *cid));
258258+ diff.removed_cids.push(*cid);
259259+ old_cursor.advance().await?;
260260+ }
261261+ (CursorPosition::Tree { mst }, CursorPosition::End) => {
262262+ track_removed_tree(mst, diff).await?;
263263+ old_cursor.skip_subtree().await?;
264264+ }
265265+266266+ // Both have entries - compare them
267267+ (old_pos, new_pos) => {
268268+ // Handle Leaf/Leaf comparison FIRST (before layer checks)
269269+ // This matches rsky's logic - key comparison takes precedence
270270+ if let (
271271+ CursorPosition::Leaf {
272272+ key: old_key,
273273+ cid: old_cid,
274274+ },
275275+ CursorPosition::Leaf {
276276+ key: new_key,
277277+ cid: new_cid,
278278+ },
279279+ ) = (old_pos, new_pos)
280280+ {
281281+ match old_key.as_str().cmp(new_key.as_str()) {
282282+ std::cmp::Ordering::Equal => {
283283+ // Same key - check if value changed
284284+ if old_cid != new_cid {
285285+ diff.updates.push((old_key.clone(), *new_cid, *old_cid));
286286+ diff.new_leaf_cids.push(*new_cid);
287287+ diff.removed_cids.push(*old_cid);
288288+ }
289289+ old_cursor.advance().await?;
290290+ new_cursor.advance().await?;
291291+ }
292292+ std::cmp::Ordering::Less => {
293293+ // Old key < new key - old was deleted
294294+ diff.deletes.push((old_key.clone(), *old_cid));
295295+ diff.removed_cids.push(*old_cid);
296296+ old_cursor.advance().await?;
297297+ }
298298+ std::cmp::Ordering::Greater => {
299299+ // Old key > new key - new was created
300300+ diff.creates.push((new_key.clone(), *new_cid));
301301+ diff.new_leaf_cids.push(*new_cid);
302302+ new_cursor.advance().await?;
303303+ }
304304+ }
305305+ continue;
306306+ }
307307+308308+ // Now check layers for Tree comparisons
309309+ let old_layer = old_cursor.layer().await?;
310310+ let new_layer = new_cursor.layer().await?;
311311+312312+ match (old_pos, new_pos) {
313313+ // Both trees at same layer - check if CIDs match, skip or recurse
314314+ (
315315+ CursorPosition::Tree { mst: old_tree },
316316+ CursorPosition::Tree { mst: new_tree },
317317+ ) if old_layer == new_layer => {
318318+ let old_tree_cid = old_tree.get_pointer().await?;
319319+ let new_tree_cid = new_tree.get_pointer().await?;
320320+321321+ if old_tree_cid == new_tree_cid {
322322+ // Same subtree - skip both
323323+ old_cursor.skip_subtree().await?;
324324+ new_cursor.skip_subtree().await?;
325325+ } else {
326326+ // Different subtrees - serialize and track MST blocks, then step in to find leaf diff
327327+ serialize_and_track_mst(new_tree, diff).await?;
328328+ diff.removed_mst_blocks.push(old_tree_cid);
329329+ // Don't track recursively - step in to compare leaves
330330+ old_cursor.advance().await?;
331331+ new_cursor.advance().await?;
332332+ }
333333+ }
334334+335335+ // Layer mismatch handling (rsky pattern)
336336+ _ if old_layer > new_layer => {
337337+ // Old is at higher layer - need to descend or advance appropriately
338338+ match old_pos {
339339+ CursorPosition::Leaf { .. } => {
340340+ // Higher layer leaf - serialize and track new node, advance new to continue comparing
341341+ if let CursorPosition::Tree { mst } = new_pos {
342342+ serialize_and_track_mst(mst, diff).await?;
343343+ }
344344+ new_cursor.advance().await?; // Don't blindly add - let loop compare
345345+ }
346346+ CursorPosition::Tree { mst } => {
347347+ // Higher layer tree - track MST block removal, then step into to find leaves
348348+ let tree_cid = mst.get_pointer().await?;
349349+ diff.removed_mst_blocks.push(tree_cid);
350350+ old_cursor.advance().await?; // Step into to continue comparing
351351+ }
352352+ _ => {}
353353+ }
354354+ }
157355158158- // Build hashmaps for efficient lookup
159159- let self_map: HashMap<SmolStr, IpldCid> = self_leaves.into_iter().collect();
160160- let other_map: HashMap<SmolStr, IpldCid> = other_leaves.into_iter().collect();
356356+ _ if old_layer < new_layer => {
357357+ // New is at higher layer
358358+ match new_pos {
359359+ CursorPosition::Leaf { .. } => {
360360+ // Higher layer leaf - track old node, advance old to continue comparing
361361+ if let CursorPosition::Tree { mst } = old_pos {
362362+ let tree_cid = mst.get_pointer().await?;
363363+ diff.removed_mst_blocks.push(tree_cid);
364364+ }
365365+ old_cursor.advance().await?; // Don't blindly delete - let loop compare
366366+ }
367367+ CursorPosition::Tree { mst } => {
368368+ // Higher layer tree - serialize and track MST block addition, then step into to find leaves
369369+ serialize_and_track_mst(mst, diff).await?;
370370+ new_cursor.advance().await?; // Step into to continue comparing
371371+ }
372372+ _ => {}
373373+ }
374374+ }
161375162162- let mut diff = MstDiff::new();
376376+ // Same layer, mixed Leaf/Tree - step into tree to compare
377377+ (CursorPosition::Leaf { .. }, CursorPosition::Tree { mst }) => {
378378+ // Old has leaf, new has tree - serialize and track new MST block, step in to compare leaves
379379+ serialize_and_track_mst(mst, diff).await?;
380380+ new_cursor.advance().await?;
381381+ }
163382164164- // Find creates and updates
165165- for (key, new_cid) in &other_map {
166166- match self_map.get(key) {
167167- Some(old_cid) => {
168168- // Key exists in both - check if CID changed
169169- if old_cid != new_cid {
170170- diff.updates.push((key.clone(), *new_cid, *old_cid));
383383+ (CursorPosition::Tree { mst }, CursorPosition::Leaf { .. }) => {
384384+ // Old has tree, new has leaf - track removed MST block, step in to compare leaves
385385+ let tree_cid = mst.get_pointer().await?;
386386+ diff.removed_mst_blocks.push(tree_cid);
387387+ old_cursor.advance().await?;
388388+ }
389389+390390+ _ => {}
171391 }
172392 }
173173- None => {
174174- // Key only in other - create
175175- diff.creates.push((key.clone(), *new_cid));
393393+ }
394394+ }
395395+396396+ Ok(())
397397+ })
398398+}
399399+400400+/// Serialize MST node and add to new_mst_blocks
401401+async fn serialize_and_track_mst<S: BlockStore + Sync + 'static>(
402402+ tree: &Mst<S>,
403403+ diff: &mut MstDiff,
404404+) -> Result<()> {
405405+ let tree_cid = tree.get_pointer().await?;
406406+407407+ // Serialize the MST node
408408+ let entries = tree.get_entries().await?;
409409+ let node_data = super::util::serialize_node_data(&entries).await?;
410410+ let cbor = serde_ipld_dagcbor::to_vec(&node_data)
411411+ .map_err(|e| crate::error::RepoError::serialization(e))?;
412412+413413+ // Track the serialized block
414414+ diff.new_mst_blocks.insert(tree_cid, Bytes::from(cbor));
415415+416416+ Ok(())
417417+}
418418+419419+/// Track entire tree as added (all leaves and nodes)
420420+fn track_added_tree<'a, S: BlockStore + Sync + 'static>(
421421+ tree: &'a Mst<S>,
422422+ diff: &'a mut MstDiff,
423423+) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + Send + 'a>> {
424424+ Box::pin(async move {
425425+ use super::node::NodeEntry;
426426+427427+ // Serialize and track this MST node
428428+ serialize_and_track_mst(tree, diff).await?;
429429+430430+ let entries = tree.get_entries().await?;
431431+ for entry in &entries {
432432+ match entry {
433433+ NodeEntry::Leaf { key, value } => {
434434+ diff.creates.push((key.clone(), *value));
435435+ diff.new_leaf_cids.push(*value);
436436+ }
437437+ NodeEntry::Tree(subtree) => {
438438+ track_added_tree(subtree, diff).await?;
176439 }
177440 }
178441 }
179442180180- // Find deletes
181181- for (key, old_cid) in &self_map {
182182- if !other_map.contains_key(key) {
183183- // Key only in self - delete
184184- diff.deletes.push((key.clone(), *old_cid));
443443+ Ok(())
444444+ })
445445+}
446446+447447+/// Track entire tree as removed (all leaves and nodes)
448448+fn track_removed_tree<'a, S: BlockStore + Sync + 'static>(
449449+ tree: &'a Mst<S>,
450450+ diff: &'a mut MstDiff,
451451+) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + Send + 'a>> {
452452+ Box::pin(async move {
453453+ use super::node::NodeEntry;
454454+455455+ // Track this MST node as removed
456456+ let tree_cid = tree.get_pointer().await?;
457457+ diff.removed_mst_blocks.push(tree_cid);
458458+459459+ // Recursively remove all leaves and nodes
460460+ let entries = tree.get_entries().await?;
461461+ for entry in &entries {
462462+ match entry {
463463+ NodeEntry::Leaf { key, value } => {
464464+ diff.deletes.push((key.clone(), *value));
465465+ diff.removed_cids.push(*value);
466466+ }
467467+ NodeEntry::Tree(subtree) => {
468468+ track_removed_tree(subtree, diff).await?;
469469+ }
185470 }
186471 }
187472188188- Ok(diff)
189189- }
473473+ Ok(())
474474+ })
475475+}
190476477477+impl<S: BlockStore + Sync + 'static> Mst<S> {
191478 /// Compute diff from this tree to empty (all deletes)
192479 ///
193480 /// Returns diff representing deletion of all records in this tree.
194481 pub async fn diff_to_empty(&self) -> Result<MstDiff> {
195195- let leaves = self.leaves().await?;
482482+ let mut diff = MstDiff::new();
483483+ track_removed_tree_all(self, &mut diff).await?;
484484+ Ok(diff)
485485+ }
486486+}
487487+488488+/// Track entire tree as removed (all nodes and leaves)
489489+fn track_removed_tree_all<'a, S: BlockStore + Sync + 'static>(
490490+ tree: &'a Mst<S>,
491491+ diff: &'a mut MstDiff,
492492+) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + Send + 'a>> {
493493+ Box::pin(async move {
494494+ use super::node::NodeEntry;
495495+496496+ // Track this node as removed
497497+ let tree_cid = tree.get_pointer().await?;
498498+ diff.removed_mst_blocks.push(tree_cid);
499499+500500+ // Recurse through entries
501501+ let entries = tree.get_entries().await?;
502502+ for entry in &entries {
503503+ match entry {
504504+ NodeEntry::Leaf { key, value } => {
505505+ diff.deletes.push((key.clone(), *value));
506506+ diff.removed_cids.push(*value);
507507+ }
508508+ NodeEntry::Tree(subtree) => {
509509+ track_removed_tree_all(subtree, diff).await?;
510510+ }
511511+ }
512512+ }
196513197197- Ok(MstDiff {
198198- creates: Vec::new(),
199199- updates: Vec::new(),
200200- deletes: leaves,
201201- })
202202- }
514514+ Ok(())
515515+ })
203516}
204517205518#[cfg(test)]
+3-1
crates/jacquard-repo/src/mst/mod.rs
···44pub mod tree;
55pub mod util;
66pub mod diff;
77+pub mod cursor;
7889pub use node::{NodeData, NodeEntry, TreeEntry};
99-pub use tree::{Mst, WriteOp};
1010+pub use tree::{Mst, WriteOp, RecordWriteOp, VerifiedWriteOp};
1011pub use diff::MstDiff;
1212+pub use cursor::{MstCursor, CursorPosition};
+14-1
crates/jacquard-repo/src/mst/node.rs
···11//! MST node data structures
2233+use std::fmt;
44+35use bytes::Bytes;
46use cid::Cid as IpldCid;
57use smol_str::SmolStr;
···1113/// `[Tree, Leaf, Tree, Leaf, Leaf, Tree]` etc.
1214///
1315/// The wire format (CBOR) is different - see `NodeData` and `TreeEntry`.
1414-#[derive(Debug, Clone)]
1616+#[derive(Clone)]
1517pub enum NodeEntry<S: crate::storage::BlockStore> {
1618 /// Subtree reference
1719 ///
···2527 /// CID of the record value
2628 value: IpldCid,
2729 },
3030+}
3131+3232+impl<S: crate::storage::BlockStore> fmt::Debug for NodeEntry<S> {
3333+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3434+ match self {
3535+ NodeEntry::Tree(t) => write!(f, "{:?}", t),
3636+ NodeEntry::Leaf { key, value } => {
3737+ write!(f, "Leaf {{ key: {}, value: {} }}", key, value)
3838+ }
3939+ }
4040+ }
2841}
29423043impl<S: crate::storage::BlockStore> NodeEntry<S> {
+233-25
crates/jacquard-repo/src/mst/tree.rs
···55use crate::error::{RepoError, Result};
66use crate::storage::BlockStore;
77use cid::Cid as IpldCid;
88+use core::fmt;
99+use jacquard_common::types::recordkey::Rkey;
1010+use jacquard_common::types::string::{Nsid, RecordKey};
1111+use jacquard_common::types::value::RawData;
812use smol_str::SmolStr;
1313+use std::fmt::{Display, Formatter};
1414+use std::pin::Pin;
915use std::sync::Arc;
1016use tokio::sync::RwLock;
1117···4652 },
4753}
48545555+/// Record write operation with inline data
5656+///
5757+/// Used for high-level record operations where the actual record data
5858+/// needs to be serialized and stored. The data is a generic IPLD map
5959+/// (similar to rsky's `RepoRecord = BTreeMap<String, Lex>`).
6060+#[derive(Debug, Clone, PartialEq)]
6161+pub enum RecordWriteOp<'a> {
6262+ /// Create new record with data
6363+ Create {
6464+ /// Collection NSID
6565+ collection: Nsid<'a>,
6666+ /// Record key
6767+ rkey: RecordKey<Rkey<'a>>,
6868+ /// Record data (will be serialized to DAG-CBOR and CID computed)
6969+ record: std::collections::BTreeMap<SmolStr, RawData<'a>>,
7070+ },
7171+7272+ /// Update existing record with new data
7373+ Update {
7474+ /// Collection NSID
7575+ collection: Nsid<'a>,
7676+ /// Record key
7777+ rkey: RecordKey<Rkey<'a>>,
7878+ /// New record data
7979+ record: std::collections::BTreeMap<SmolStr, RawData<'a>>,
8080+ /// Previous CID (optional for validation)
8181+ prev: Option<IpldCid>,
8282+ },
8383+8484+ /// Delete record
8585+ Delete {
8686+ /// Collection NSID
8787+ collection: Nsid<'a>,
8888+ /// Record key
8989+ rkey: RecordKey<Rkey<'a>>,
9090+ /// Previous CID (optional for validation)
9191+ prev: Option<IpldCid>,
9292+ },
9393+}
9494+9595+impl<'a> RecordWriteOp<'a> {
9696+ /// Get the collection NSID for this operation
9797+ pub fn collection(&self) -> &Nsid<'a> {
9898+ match self {
9999+ RecordWriteOp::Create { collection, .. } => collection,
100100+ RecordWriteOp::Update { collection, .. } => collection,
101101+ RecordWriteOp::Delete { collection, .. } => collection,
102102+ }
103103+ }
104104+105105+ /// Get the record key for this operation
106106+ pub fn rkey(&self) -> &RecordKey<Rkey<'a>> {
107107+ match self {
108108+ RecordWriteOp::Create { rkey, .. } => rkey,
109109+ RecordWriteOp::Update { rkey, .. } => rkey,
110110+ RecordWriteOp::Delete { rkey, .. } => rkey,
111111+ }
112112+ }
113113+}
114114+49115/// Verified write operation with required prev fields
50116///
51117/// Used for operations where prev CID has been verified against tree state.
···101167/// - More leading zeros = higher layer (deeper in tree)
102168/// - Layer = floor(leading_zeros / 2) for ~4 fanout
103169/// - Deterministic and insertion-order independent
104104-#[derive(Debug, Clone)]
170170+#[derive(Clone)]
105171pub struct Mst<S: BlockStore> {
106172 /// Block storage for loading/saving nodes (shared via Arc)
107173 storage: Arc<S>,
···186252 Ok(Self {
187253 storage: self.storage.clone(),
188254 entries: Arc::new(RwLock::new(Some(entries))),
189189- pointer: self.pointer.clone(),
255255+ pointer: Arc::new(RwLock::new(self.pointer.read().await.clone())),
190256 outdated_pointer: Arc::new(RwLock::new(true)),
191257 layer: self.layer,
192258 })
193259 }
194260195261 /// Get entries (lazy load if needed)
196196- async fn get_entries(&self) -> Result<Vec<NodeEntry<S>>> {
262262+ pub(crate) async fn get_entries(&self) -> Result<Vec<NodeEntry<S>>> {
197263 {
198264 let entries_guard = self.entries.read().await;
199265 if let Some(ref entries) = *entries_guard {
···227293 ///
228294 /// Computes CID from current entries but doesn't persist to storage.
229295 /// Use `collect_blocks()` to gather blocks for persistence.
230230- pub async fn get_pointer(&self) -> Result<IpldCid> {
231231- let outdated = *self.outdated_pointer.read().await;
232232- if !outdated {
233233- return Ok(*self.pointer.read().await);
234234- }
296296+ pub fn get_pointer<'a>(&'a self) -> Pin<Box<dyn Future<Output = Result<IpldCid>> + Send + 'a>> {
297297+ Box::pin(async move {
298298+ let outdated = *self.outdated_pointer.read().await;
299299+ if !outdated {
300300+ return Ok(*self.pointer.read().await);
301301+ }
235302236236- // Serialize and compute CID (don't persist yet)
237237- let entries = self.get_entries().await?;
238238- let node_data = util::serialize_node_data(&entries).await?;
239239- let cbor =
240240- serde_ipld_dagcbor::to_vec(&node_data).map_err(|e| RepoError::serialization(e))?;
241241- let cid = util::compute_cid(&cbor)?;
303303+ // Check for outdated children and recursively update them first
304304+ let mut entries = self.get_entries().await?;
305305+ let mut outdated_children = Vec::new();
242306243243- // Update pointer and mark as fresh
244244- {
245245- let mut pointer_guard = self.pointer.write().await;
246246- *pointer_guard = cid;
247247- }
248248- {
249249- let mut outdated_guard = self.outdated_pointer.write().await;
250250- *outdated_guard = false;
251251- }
307307+ for entry in &entries {
308308+ if let NodeEntry::Tree(mst) = entry {
309309+ let is_outdated = *mst.outdated_pointer.read().await;
310310+ if is_outdated {
311311+ outdated_children.push(mst.clone());
312312+ }
313313+ }
314314+ }
252315253253- Ok(cid)
316316+ // Recursively update outdated children
317317+ if !outdated_children.is_empty() {
318318+ for child in &outdated_children {
319319+ let _ = child.get_pointer().await?;
320320+ }
321321+ // Re-fetch entries with updated child CIDs
322322+ entries = self.get_entries().await?;
323323+ }
324324+325325+ // Now serialize and compute CID with fresh child CIDs
326326+ let node_data = util::serialize_node_data(&entries).await?;
327327+ let cbor =
328328+ serde_ipld_dagcbor::to_vec(&node_data).map_err(|e| RepoError::serialization(e))?;
329329+ let cid = util::compute_cid(&cbor)?;
330330+331331+ // Update pointer and mark as fresh
332332+ {
333333+ let mut pointer_guard = self.pointer.write().await;
334334+ *pointer_guard = cid;
335335+ }
336336+ {
337337+ let mut outdated_guard = self.outdated_pointer.write().await;
338338+ *outdated_guard = false;
339339+ }
340340+341341+ Ok(cid)
342342+ })
254343 }
255344256345 /// Get root CID (alias for get_pointer)
···269358 ///
270359 /// Layer is the maximum layer of any leaf key in this node.
271360 /// For nodes with no leaves, recursively checks subtrees.
272272- fn get_layer<'a>(
361361+ pub(crate) fn get_layer<'a>(
273362 &'a self,
274363 ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<usize>> + Send + 'a>> {
275364 Box::pin(async move {
···818907 })
819908 }
820909910910+ /// Copy tree with same entries (marking pointer as outdated)
911911+ ///
912912+ /// Internal helper for creating modified tree copies.
913913+ pub async fn copy_tree(&self) -> Result<Self> {
914914+ let entries = self.get_entries().await?;
915915+ self.new_tree(entries).await
916916+ }
917917+821918 /// Apply batch of verified write operations (returns new tree)
822919 ///
823920 /// More efficient than individual operations as it only rebuilds
···9461043 Ok(root_cid)
9471044 }
948104510461046+ /// Collect all MST node CIDs in this tree
10471047+ ///
10481048+ /// Returns all CIDs for MST nodes (internal nodes), not leaves.
10491049+ /// Used for diff calculation to determine which MST blocks are removed.
10501050+ pub async fn collect_node_cids(&self) -> Result<Vec<IpldCid>> {
10511051+ let mut cids = Vec::new();
10521052+ let pointer = self.get_pointer().await?;
10531053+ cids.push(pointer);
10541054+10551055+ let entries = self.get_entries().await?;
10561056+ for entry in &entries {
10571057+ if let NodeEntry::Tree(subtree) = entry {
10581058+ let subtree_cids = subtree.collect_node_cids().await?;
10591059+ cids.extend(subtree_cids);
10601060+ }
10611061+ }
10621062+10631063+ Ok(cids)
10641064+ }
10651065+9491066 /// Get all CIDs in the merkle path to a key
9501067 ///
9511068 /// Returns a list of CIDs representing the proof path from root to the target key:
···1070118710711188 Ok(())
10721189 })
11901190+ }
11911191+}
11921192+11931193+impl<S: BlockStore> std::fmt::Debug for Mst<S> {
11941194+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
11951195+ f.debug_struct("MST")
11961196+ .field("entries", &self.entries.try_read().unwrap())
11971197+ .field("layer", &self.layer)
11981198+ .field("pointer", &self.pointer.try_read().unwrap().to_string())
11991199+ .field(
12001200+ "outdated_pointer",
12011201+ &self.outdated_pointer.try_read().unwrap(),
12021202+ )
12031203+ .finish()
12041204+ }
12051205+}
12061206+12071207+/// Format a CID for display (shortens long CIDs)
12081208+///
12091209+/// Truncates long CIDs to first 7 and last 8 characters with `...` in between.
12101210+pub fn short_cid(cid: &IpldCid) -> String {
12111211+ let cid_string = cid.to_string();
12121212+ let len = cid_string.len();
12131213+ if len > 15 {
12141214+ let first = &cid_string[0..7];
12151215+ let last = &cid_string[len - 8..];
12161216+ format!("{}...{}", first, last)
12171217+ } else {
12181218+ cid_string
12191219+ }
12201220+}
12211221+12221222+impl<S: BlockStore> Display for Mst<S> {
12231223+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
12241224+ fn pointer_str<S: BlockStore>(mst: &Mst<S>) -> String {
12251225+ let cid_guard = mst.pointer.try_read().unwrap();
12261226+ format!("*({})", short_cid(&*cid_guard))
12271227+ }
12281228+12291229+ fn fmt_mst<S: BlockStore>(
12301230+ mst: &Mst<S>,
12311231+ f: &mut Formatter<'_>,
12321232+ prefix: &str,
12331233+ is_last: bool,
12341234+ ) -> fmt::Result {
12351235+ // Print MST pointer using our helper
12361236+ writeln!(
12371237+ f,
12381238+ "{}{}── {}",
12391239+ prefix,
12401240+ if is_last { "└" } else { "├" },
12411241+ pointer_str(mst),
12421242+ )?;
12431243+12441244+ // Prepare the child prefix
12451245+ let child_prefix = format!("{}{}", prefix, if is_last { " " } else { "│ " });
12461246+12471247+ let entries_guard = mst.entries.try_read().unwrap();
12481248+ let entries = match &*entries_guard {
12491249+ Some(e) => e,
12501250+ None => {
12511251+ writeln!(f, "{}(virtual node)", child_prefix)?;
12521252+ return Ok(());
12531253+ }
12541254+ };
12551255+12561256+ for (i, entry) in entries.iter().enumerate() {
12571257+ let last_child = i == entries.len() - 1;
12581258+ match entry {
12591259+ NodeEntry::Leaf { key, value } => {
12601260+ // Print leaf key and (short) leaf value
12611261+ writeln!(
12621262+ f,
12631263+ "{}{}── {} -> {}",
12641264+ child_prefix,
12651265+ if last_child { "└" } else { "├" },
12661266+ key,
12671267+ short_cid(&value)
12681268+ )?;
12691269+ }
12701270+ NodeEntry::Tree(child_mst) => {
12711271+ // Recurse
12721272+ fmt_mst(child_mst, f, &child_prefix, last_child)?;
12731273+ }
12741274+ }
12751275+ }
12761276+ Ok(())
12771277+ }
12781278+12791279+ // Start with empty prefix for the root
12801280+ fmt_mst(self, f, "", true)
10731281 }
10741282}
10751283
+466-150
crates/jacquard-repo/src/repo.rs
···33//! Optional convenience layer over MST primitives. Provides type-safe record operations,
44//! batch writes, commit creation, and CAR export.
5566-use crate::MstDiff;
76use crate::commit::Commit;
87use crate::error::Result;
99-use crate::mst::{Mst, WriteOp};
88+use crate::mst::Mst;
109use crate::storage::BlockStore;
1110use cid::Cid as IpldCid;
1211use jacquard_common::IntoStatic;
···4039 /// Previous MST root CID (for sync v1.1)
4140 pub prev_data: Option<IpldCid>,
42414343- /// All blocks to persist (MST nodes + commit block)
4242+ /// All blocks to persist (MST nodes + record data + commit block)
4443 ///
4544 /// Includes:
4646- /// - All new MST node blocks from `mst.collect_blocks()`
4545+ /// - All new MST node blocks from `diff.new_mst_blocks`
4646+ /// - All new record data blocks (from creates + updates)
4747 /// - The commit block itself
4848 pub blocks: BTreeMap<IpldCid, bytes::Bytes>,
4949···5454 /// - MST node blocks along paths for all changed keys
5555 /// - Includes "adjacent" blocks needed for operation inversion
5656 pub relevant_blocks: BTreeMap<IpldCid, bytes::Bytes>,
5757+5858+ /// CIDs of blocks to delete from storage
5959+ ///
6060+ /// Contains CIDs that are no longer referenced by the current tree:
6161+ /// - Record CIDs from deleted records
6262+ /// - Old record CIDs from updated records
6363+ ///
6464+ /// **Note:** Actual deletion should consider whether previous commits still
6565+ /// reference these CIDs. A proper GC strategy might:
6666+ /// - Only delete if previous commits are also being GC'd
6767+ /// - Use reference counting across all retained commits
6868+ /// - Perform periodic reachability analysis
6969+ ///
7070+ /// For simple single-commit repos or when old commits are discarded, direct
7171+ /// deletion is safe.
7272+ pub deleted_cids: Vec<IpldCid>,
5773}
58745975impl CommitData {
···224240 Ok(old_cid)
225241 }
226242227227- /// Apply write operations individually (validates existence/prev)
228228- pub async fn create_writes(&mut self, ops: &[WriteOp]) -> Result<crate::mst::MstDiff> {
229229- let old_mst = self.mst.clone();
243243+ /// Apply record write operations with inline data
244244+ ///
245245+ /// Serializes record data to DAG-CBOR, computes CIDs, stores data blocks,
246246+ /// then applies write operations to the MST. Returns the diff for inspection.
247247+ ///
248248+ /// For creating commits with operations, use `create_commit()` instead.
249249+ pub async fn apply_record_writes(
250250+ &mut self,
251251+ ops: &[crate::mst::RecordWriteOp<'_>],
252252+ ) -> Result<crate::mst::MstDiff> {
253253+ use crate::mst::RecordWriteOp;
254254+ use smol_str::format_smolstr;
255255+256256+ let mut updated_tree = self.mst.clone();
230257231231- // Apply operations individually (add/update/delete verify existence)
232258 for op in ops {
233233- self.mst = match op {
234234- WriteOp::Create { key, cid } => {
235235- // Check doesn't exist
236236- if self.mst.get(key.as_str()).await?.is_some() {
237237- return Err(crate::error::RepoError::already_exists(
238238- "record",
239239- key.as_str(),
240240- ));
241241- }
242242- self.mst.add(key.as_str(), *cid).await?
259259+ updated_tree = match op {
260260+ RecordWriteOp::Create {
261261+ collection,
262262+ rkey,
263263+ record,
264264+ } => {
265265+ let key = format_smolstr!("{}/{}", collection.as_ref(), rkey.as_ref());
266266+267267+ // Serialize record to DAG-CBOR
268268+ let cbor = serde_ipld_dagcbor::to_vec(record)
269269+ .map_err(|e| crate::error::RepoError::serialization(e))?;
270270+271271+ // Compute CID and store data
272272+ let cid = self.storage.put(&cbor).await?;
273273+274274+ updated_tree.add(key.as_str(), cid).await?
243275 }
244244- WriteOp::Update { key, cid, prev } => {
245245- // Check exists
246246- let current = self.mst.get(key.as_str()).await?.ok_or_else(|| {
247247- crate::error::RepoError::not_found("record", key.as_str())
248248- })?;
276276+ RecordWriteOp::Update {
277277+ collection,
278278+ rkey,
279279+ record,
280280+ prev,
281281+ } => {
282282+ let key = format_smolstr!("{}/{}", collection.as_ref(), rkey.as_ref());
283283+284284+ // Serialize record to DAG-CBOR
285285+ let cbor = serde_ipld_dagcbor::to_vec(record)
286286+ .map_err(|e| crate::error::RepoError::serialization(e))?;
287287+288288+ // Compute CID and store data
289289+ let cid = self.storage.put(&cbor).await?;
249290250291 // Validate prev if provided
251292 if let Some(prev_cid) = prev {
252252- if ¤t != prev_cid {
293293+ if &cid != prev_cid {
253294 return Err(crate::error::RepoError::invalid(format!(
254295 "Update prev CID mismatch for key {}: expected {}, got {}",
255255- key, prev_cid, current
296296+ key, prev_cid, cid
256297 )));
257298 }
258299 }
259300260260- self.mst.add(key.as_str(), *cid).await?
301301+ updated_tree.add(key.as_str(), cid).await?
261302 }
262262- WriteOp::Delete { key, prev } => {
303303+ RecordWriteOp::Delete {
304304+ collection,
305305+ rkey,
306306+ prev,
307307+ } => {
308308+ let key = format_smolstr!("{}/{}", collection.as_ref(), rkey.as_ref());
309309+263310 // Check exists
264311 let current = self.mst.get(key.as_str()).await?.ok_or_else(|| {
265312 crate::error::RepoError::not_found("record", key.as_str())
···275322 }
276323 }
277324278278- self.mst.delete(key.as_str()).await?
325325+ updated_tree.delete(key.as_str()).await?
279326 }
280327 };
281328 }
282329283283- old_mst.diff(&self.mst).await
284284- }
330330+ // Compute diff before updating
331331+ let diff = self.mst.diff(&updated_tree).await?;
332332+333333+ // Update mst
334334+ self.mst = updated_tree;
285335286286- /// Apply write operations and create a commit
287287- ///
288288- /// Convenience method that calls `create_writes()` and `commit()`.
289289- pub async fn apply_writes<K>(&mut self, ops: &[WriteOp], signing_key: &K) -> Result<MstDiff>
290290- where
291291- K: crate::commit::SigningKey,
292292- {
293293- let did = &self.commit.did.clone();
294294- let cid = &self.commit_cid.clone();
295295- let diff = self.create_writes(ops).await?;
296296- self.commit(&did, Some(*cid), signing_key).await?;
297336 Ok(diff)
298337 }
299338300300- /// Format a commit (create signed commit + collect blocks)
339339+ /// Create a commit from record write operations
301340 ///
302302- /// Creates signed commit and collects blocks for persistence and firehose:
303303- /// - All MST node blocks from `mst.collect_blocks()`
304304- /// - Commit block itself
305305- /// - Relevant blocks for sync v1.1 (walks paths for all changed keys)
341341+ /// Applies write operations, creates signed commit, and collects blocks:
342342+ /// - Serializes records to DAG-CBOR and stores data blocks
343343+ /// - Applies operations to MST and computes diff
344344+ /// - Uses `diff.new_mst_blocks` for efficient block tracking
345345+ /// - Walks paths for original operations to build relevant_blocks (sync v1.1)
306346 ///
307347 /// Returns `(ops, CommitData)` - ops are needed for `to_firehose_commit()`.
308308- pub async fn format_commit<K>(
309309- &self,
348348+ pub async fn create_commit<K>(
349349+ &mut self,
350350+ ops: &[crate::mst::RecordWriteOp<'_>],
310351 did: &Did<'_>,
311352 prev: Option<IpldCid>,
312353 signing_key: &K,
···314355 where
315356 K: crate::commit::SigningKey,
316357 {
317317- let rev = Ticker::new().next(Some(self.commit.rev.clone()));
318318- let data = self.mst.root().await?;
319319- let prev_data = *self.commit.data();
358358+ use crate::mst::RecordWriteOp;
359359+ use smol_str::format_smolstr;
320360321321- // Create signed commit
322322- let commit = Commit::new_unsigned(did.clone().into_static(), data, rev.clone(), prev)
323323- .sign(signing_key)?;
361361+ // Step 1: Apply all write operations to build new MST
362362+ let mut updated_tree = self.mst.clone();
324363325325- // Load previous MST to compute diff
326326- let prev_mst = Mst::load(self.storage.clone(), prev_data, None);
327327- let diff = prev_mst.diff(&self.mst).await?;
364364+ for op in ops {
365365+ updated_tree = match op {
366366+ RecordWriteOp::Create {
367367+ collection,
368368+ rkey,
369369+ record,
370370+ } => {
371371+ let key = format_smolstr!("{}/{}", collection.as_ref(), rkey.as_ref());
328372329329- // Collect all MST blocks for persistence
330330- let (_root_cid, mut blocks) = self.mst.collect_blocks().await?;
373373+ // Serialize record to DAG-CBOR
374374+ let cbor = serde_ipld_dagcbor::to_vec(record)
375375+ .map_err(|e| crate::error::RepoError::serialization(e))?;
331376332332- // Collect relevant blocks for firehose (walk paths for all changed keys)
333333- let mut relevant_blocks = BTreeMap::new();
377377+ // Compute CID and store data
378378+ let cid = self.storage.put(&cbor).await?;
334379335335- // Walk paths for creates
336336- for (key, _cid) in &diff.creates {
337337- let path_cids = self.mst.cids_for_path(key.as_str()).await?;
338338- for path_cid in path_cids {
339339- if let Some(block) = blocks.get(&path_cid) {
340340- relevant_blocks.insert(path_cid, block.clone());
341341- } else if let Some(block) = self.storage.get(&path_cid).await? {
342342- relevant_blocks.insert(path_cid, block);
380380+ updated_tree.add(key.as_str(), cid).await?
343381 }
344344- }
345345- }
382382+ RecordWriteOp::Update {
383383+ collection,
384384+ rkey,
385385+ record,
386386+ prev,
387387+ } => {
388388+ let key = format_smolstr!("{}/{}", collection.as_ref(), rkey.as_ref());
389389+390390+ // Serialize record to DAG-CBOR
391391+ let cbor = serde_ipld_dagcbor::to_vec(record)
392392+ .map_err(|e| crate::error::RepoError::serialization(e))?;
393393+394394+ // Compute CID and store data
395395+ let cid = self.storage.put(&cbor).await?;
396396+397397+ // Validate prev if provided
398398+ if let Some(prev_cid) = prev {
399399+ if &cid != prev_cid {
400400+ return Err(crate::error::RepoError::invalid(format!(
401401+ "Update prev CID mismatch for key {}: expected {}, got {}",
402402+ key, prev_cid, cid
403403+ )));
404404+ }
405405+ }
346406347347- // Walk paths for updates
348348- for (key, _new_cid, _old_cid) in &diff.updates {
349349- let path_cids = self.mst.cids_for_path(key.as_str()).await?;
350350- for path_cid in path_cids {
351351- if let Some(block) = blocks.get(&path_cid) {
352352- relevant_blocks.insert(path_cid, block.clone());
353353- } else if let Some(block) = self.storage.get(&path_cid).await? {
354354- relevant_blocks.insert(path_cid, block);
407407+ updated_tree.add(key.as_str(), cid).await?
355408 }
356356- }
409409+ RecordWriteOp::Delete {
410410+ collection,
411411+ rkey,
412412+ prev,
413413+ } => {
414414+ let key = format_smolstr!("{}/{}", collection.as_ref(), rkey.as_ref());
415415+416416+ // Check exists
417417+ let current = self.mst.get(key.as_str()).await?.ok_or_else(|| {
418418+ crate::error::RepoError::not_found("record", key.as_str())
419419+ })?;
420420+421421+ // Validate prev if provided
422422+ if let Some(prev_cid) = prev {
423423+ if ¤t != prev_cid {
424424+ return Err(crate::error::RepoError::invalid(format!(
425425+ "Delete prev CID mismatch for key {}: expected {}, got {}",
426426+ key, prev_cid, current
427427+ )));
428428+ }
429429+ }
430430+431431+ updated_tree.delete(key.as_str()).await?
432432+ }
433433+ };
357434 }
358435359359- // Walk paths for deletes (path may not exist in new tree, but walk as far as possible)
360360- for (key, _old_cid) in &diff.deletes {
361361- let path_cids = self.mst.cids_for_path(key.as_str()).await?;
436436+ // Step 2: Compute diff and get new MST root
437437+ let data = updated_tree.root().await?;
438438+ let prev_data = *self.commit.data();
439439+ let diff = self.mst.diff(&updated_tree).await?;
440440+441441+ // Step 3: Extract everything we need from diff before moving it
442442+ let new_leaf_blocks = diff.fetch_new_blocks(self.storage.as_ref()).await?;
443443+ let repo_ops = diff
444444+ .to_repo_ops()
445445+ .into_iter()
446446+ .map(|op| op.into_static())
447447+ .collect();
448448+ let deleted_cids = diff.removed_cids;
449449+450450+ // Step 4: Use diff.new_mst_blocks instead of collect_blocks()
451451+ let mut blocks = diff.new_mst_blocks;
452452+453453+ // Step 5: Build relevant_blocks by walking paths for ORIGINAL operations
454454+ let mut relevant_blocks = BTreeMap::new();
455455+ for op in ops {
456456+ let key = format_smolstr!("{}/{}", op.collection().as_ref(), op.rkey().as_ref());
457457+ let path_cids = updated_tree.cids_for_path(key.as_str()).await?;
458458+362459 for path_cid in path_cids {
363460 if let Some(block) = blocks.get(&path_cid) {
364461 relevant_blocks.insert(path_cid, block.clone());
···368465 }
369466 }
370467371371- // Add commit block to both collections
468468+ // Step 6: Add new leaf blocks (record data) to both collections
469469+ for (cid, block) in new_leaf_blocks {
470470+ blocks.insert(cid, block.clone());
471471+ relevant_blocks.insert(cid, block);
472472+ }
473473+474474+ // Step 7: Create and sign commit
475475+ let rev = Ticker::new().next(Some(self.commit.rev.clone()));
476476+ let commit = Commit::new_unsigned(did.clone().into_static(), data, rev.clone(), prev)
477477+ .sign(signing_key)?;
478478+372479 let commit_cbor = commit.to_cbor()?;
373480 let commit_cid = crate::mst::util::compute_cid(&commit_cbor)?;
374481 let commit_bytes = bytes::Bytes::from(commit_cbor);
482482+483483+ // Step 8: Add commit block to both collections
375484 blocks.insert(commit_cid, commit_bytes.clone());
376485 relevant_blocks.insert(commit_cid, commit_bytes);
377486378378- // Convert diff to repository operations
379379- let ops = diff
380380- .to_repo_ops()
381381- .into_iter()
382382- .map(|op| op.into_static())
383383- .collect();
487487+ // Step 9: Update internal MST state
488488+ self.mst = updated_tree;
384489385490 Ok((
386386- ops,
491491+ repo_ops,
387492 CommitData {
388493 cid: commit_cid,
389494 rev,
···393498 prev_data: Some(prev_data),
394499 blocks,
395500 relevant_blocks,
501501+ deleted_cids,
396502 },
397503 ))
398504 }
···400506 /// Apply a commit (persist blocks to storage)
401507 ///
402508 /// Persists all blocks from `CommitData` and updates internal state.
509509+ /// Uses `BlockStore::apply_commit()` to perform atomic write+delete operations.
403510 pub async fn apply_commit(&mut self, commit_data: CommitData) -> Result<IpldCid> {
404511 let commit_cid = commit_data.cid;
405512406406- // Persist all blocks (MST + commit)
407407- self.storage.put_many(commit_data.blocks).await?;
513513+ // Apply commit to storage (writes new blocks, deletes garbage)
514514+ self.storage.apply_commit(commit_data).await?;
408515409516 // Load and update internal state
410517 let commit_bytes = self
···425532426533 /// Create a commit for the current repository state
427534 ///
428428- /// Convenience method that calls `format_commit()` and `apply_commit()`.
535535+ /// Convenience method that calls `create_commit()` with no additional operations
536536+ /// and `apply_commit()`. Use this after manually updating the MST with individual
537537+ /// record operations (e.g., `create_record()`, `update_record()`, `delete_record()`).
429538 pub async fn commit<K>(
430539 &mut self,
431540 did: &Did<'_>,
···435544 where
436545 K: crate::commit::SigningKey,
437546 {
438438- let (ops, commit_data) = self.format_commit(did, prev, signing_key).await?;
547547+ let (ops, commit_data) = self.create_commit(&[], did, prev, signing_key).await?;
439548 Ok((ops, self.apply_commit(commit_data).await?))
440549 }
441550···477586 use super::*;
478587 use crate::storage::MemoryBlockStore;
479588 use jacquard_common::types::recordkey::Rkey;
589589+ use smol_str::SmolStr;
480590481591 fn make_test_cid(value: u8) -> IpldCid {
482592 use crate::DAG_CBOR_CID_CODEC;
···488598 IpldCid::new_v1(DAG_CBOR_CID_CODEC, mh)
489599 }
490600601601+ fn make_test_record(
602602+ n: u32,
603603+ ) -> std::collections::BTreeMap<SmolStr, jacquard_common::types::value::RawData<'static>> {
604604+ use jacquard_common::types::value::RawData;
605605+ use smol_str::SmolStr;
606606+607607+ let mut record = std::collections::BTreeMap::new();
608608+ record.insert(
609609+ SmolStr::new("$type"),
610610+ RawData::String("app.bsky.feed.post".into()),
611611+ );
612612+ record.insert(
613613+ SmolStr::new("text"),
614614+ RawData::String(format!("Test post #{}", n).into()),
615615+ );
616616+ record.insert(
617617+ SmolStr::new("createdAt"),
618618+ RawData::String("2024-01-01T00:00:00Z".to_string().into()),
619619+ );
620620+ record
621621+ }
622622+491623 async fn create_test_repo(storage: Arc<MemoryBlockStore>) -> Repository<MemoryBlockStore> {
492624 let did = Did::new("did:plc:test").unwrap();
493625 let signing_key = k256::ecdsa::SigningKey::random(&mut rand::rngs::OsRng);
···508640509641 #[tokio::test]
510642 async fn test_create_and_get_record() {
643643+ use crate::mst::RecordWriteOp;
644644+511645 let storage = Arc::new(MemoryBlockStore::new());
512646 let mut repo = create_test_repo(storage.clone()).await;
513647514648 let collection = Nsid::new("app.bsky.feed.post").unwrap();
515649 let rkey = RecordKey(Rkey::new("abc123").unwrap());
516516- let cid = make_test_cid(1);
517650518518- repo.create_record(&collection, &rkey, cid).await.unwrap();
651651+ let ops = vec![RecordWriteOp::Create {
652652+ collection: collection.clone().into_static(),
653653+ rkey: rkey.clone(),
654654+ record: make_test_record(1),
655655+ }];
656656+657657+ let did = Did::new("did:plc:test").unwrap();
658658+ let signing_key = k256::ecdsa::SigningKey::random(&mut rand::rngs::OsRng);
659659+ let (repo_ops, commit_data) = repo
660660+ .create_commit(
661661+ &ops,
662662+ &did,
663663+ Some(repo.current_commit_cid().clone()),
664664+ &signing_key,
665665+ )
666666+ .await
667667+ .unwrap();
668668+669669+ assert_eq!(repo_ops.len(), 1);
670670+ assert_eq!(repo_ops[0].action.as_ref(), "create");
519671520672 let retrieved = repo.get_record(&collection, &rkey).await.unwrap();
521521- assert_eq!(retrieved, Some(cid));
673673+ assert!(retrieved.is_some());
674674+675675+ // Verify data is actually in storage (from commit_data blocks)
676676+ let cid = retrieved.unwrap();
677677+ assert!(commit_data.blocks.contains_key(&cid));
522678 }
523679524680 #[tokio::test]
···598754599755 let result = repo.delete_record(&collection, &rkey).await;
600756 assert!(result.is_err());
601601- }
602602-603603- #[tokio::test]
604604- async fn test_apply_writes() {
605605- let storage = Arc::new(MemoryBlockStore::new());
606606- let mut repo = create_test_repo(storage).await;
607607-608608- let ops = vec![
609609- WriteOp::Create {
610610- key: "app.bsky.feed.post/abc123".into(),
611611- cid: make_test_cid(1),
612612- },
613613- WriteOp::Create {
614614- key: "app.bsky.feed.post/def456".into(),
615615- cid: make_test_cid(2),
616616- },
617617- ];
618618-619619- let diff = repo.create_writes(&ops).await.unwrap();
620620- assert_eq!(diff.creates.len(), 2);
621621- assert_eq!(diff.updates.len(), 0);
622622- assert_eq!(diff.deletes.len(), 0);
623757 }
624758625759 #[tokio::test]
···827961 }
828962829963 #[tokio::test]
964964+ async fn test_commit_tracks_deleted_cids() {
965965+ use crate::mst::RecordWriteOp;
966966+967967+ let storage = Arc::new(MemoryBlockStore::new());
968968+ let mut repo = create_test_repo(storage.clone()).await;
969969+970970+ let collection = Nsid::new("app.bsky.feed.post").unwrap();
971971+ let rkey1 = RecordKey(Rkey::new("test1").unwrap());
972972+ let rkey2 = RecordKey(Rkey::new("test2").unwrap());
973973+974974+ let did = Did::new("did:plc:test").unwrap();
975975+ let signing_key = k256::ecdsa::SigningKey::random(&mut rand::rngs::OsRng);
976976+977977+ // Create records with actual data
978978+ let create_ops = vec![
979979+ RecordWriteOp::Create {
980980+ collection: collection.clone(),
981981+ rkey: rkey1.clone(),
982982+ record: make_test_record(1),
983983+ },
984984+ RecordWriteOp::Create {
985985+ collection: collection.clone(),
986986+ rkey: rkey2.clone(),
987987+ record: make_test_record(2),
988988+ },
989989+ ];
990990+991991+ let (_repo_ops, commit_data) = repo
992992+ .create_commit(
993993+ &create_ops,
994994+ &did,
995995+ Some(repo.current_commit_cid().clone()),
996996+ &signing_key,
997997+ )
998998+ .await
999999+ .unwrap();
10001000+10011001+ let cid1 = repo.get_record(&collection, &rkey1).await.unwrap().unwrap();
10021002+10031003+ repo.apply_commit(commit_data).await.unwrap();
10041004+10051005+ // Delete one record and format commit (don't apply yet)
10061006+ let delete_ops = vec![RecordWriteOp::Delete {
10071007+ collection: collection.clone(),
10081008+ rkey: rkey1.clone(),
10091009+ prev: None,
10101010+ }];
10111011+10121012+ let (_, commit_data) = repo
10131013+ .create_commit(
10141014+ &delete_ops,
10151015+ &did,
10161016+ Some(repo.current_commit_cid().clone()),
10171017+ &signing_key,
10181018+ )
10191019+ .await
10201020+ .unwrap();
10211021+10221022+ // Verify deleted_cids contains the deleted record CID
10231023+ assert_eq!(commit_data.deleted_cids.len(), 1);
10241024+ assert_eq!(commit_data.deleted_cids[0], cid1);
10251025+ }
10261026+10271027+ #[tokio::test]
10281028+ async fn test_record_writes_with_commit_includes_data_blocks() {
10291029+ use crate::mst::RecordWriteOp;
10301030+10311031+ let storage = Arc::new(MemoryBlockStore::new());
10321032+ let mut repo = create_test_repo(storage.clone()).await;
10331033+10341034+ let collection = Nsid::new("app.bsky.feed.post").unwrap();
10351035+ let rkey1 = RecordKey(Rkey::new("post1").unwrap());
10361036+ let rkey2 = RecordKey(Rkey::new("post2").unwrap());
10371037+10381038+ // Create records with actual data
10391039+ let ops = vec![
10401040+ RecordWriteOp::Create {
10411041+ collection: collection.clone(),
10421042+ rkey: rkey1.clone(),
10431043+ record: make_test_record(1),
10441044+ },
10451045+ RecordWriteOp::Create {
10461046+ collection: collection.clone(),
10471047+ rkey: rkey2.clone(),
10481048+ record: make_test_record(2),
10491049+ },
10501050+ ];
10511051+10521052+ // Format commit
10531053+ let did = Did::new("did:plc:test").unwrap();
10541054+ let signing_key = k256::ecdsa::SigningKey::random(&mut rand::rngs::OsRng);
10551055+ let (repo_ops, commit_data) = repo
10561056+ .create_commit(
10571057+ &ops,
10581058+ &did,
10591059+ Some(repo.current_commit_cid().clone()),
10601060+ &signing_key,
10611061+ )
10621062+ .await
10631063+ .unwrap();
10641064+10651065+ let cid1 = repo.get_record(&collection, &rkey1).await.unwrap().unwrap();
10661066+ let cid2 = repo.get_record(&collection, &rkey2).await.unwrap().unwrap();
10671067+10681068+ // Verify commit data includes record data blocks
10691069+ assert!(
10701070+ commit_data.blocks.contains_key(&cid1),
10711071+ "blocks should contain record 1 data"
10721072+ );
10731073+ assert!(
10741074+ commit_data.blocks.contains_key(&cid2),
10751075+ "blocks should contain record 2 data"
10761076+ );
10771077+ assert!(
10781078+ commit_data.relevant_blocks.contains_key(&cid1),
10791079+ "relevant_blocks should contain record 1 data"
10801080+ );
10811081+ assert!(
10821082+ commit_data.relevant_blocks.contains_key(&cid2),
10831083+ "relevant_blocks should contain record 2 data"
10841084+ );
10851085+10861086+ // Verify we can deserialize the record data
10871087+ let record1_bytes = commit_data.blocks.get(&cid1).unwrap();
10881088+ let record1: std::collections::BTreeMap<SmolStr, jacquard_common::types::value::RawData> =
10891089+ serde_ipld_dagcbor::from_slice(record1_bytes).unwrap();
10901090+ assert_eq!(
10911091+ record1.get(&SmolStr::new("text")).unwrap(),
10921092+ &jacquard_common::types::value::RawData::String("Test post #1".to_string().into())
10931093+ );
10941094+10951095+ // Verify firehose ops
10961096+ assert_eq!(repo_ops.len(), 2);
10971097+ assert_eq!(repo_ops[0].action.as_ref(), "create");
10981098+ assert_eq!(repo_ops[1].action.as_ref(), "create");
10991099+ }
11001100+11011101+ #[tokio::test]
8301102 async fn test_batch_mixed_operations() {
11031103+ use crate::mst::RecordWriteOp;
11041104+8311105 let storage = Arc::new(MemoryBlockStore::new());
8321106 let mut repo = create_test_repo(storage.clone()).await;
8331107···8371111 let rkey1 = RecordKey(Rkey::new("existing1").unwrap());
8381112 let rkey2 = RecordKey(Rkey::new("existing2").unwrap());
8391113 let rkey3 = RecordKey(Rkey::new("existing3").unwrap());
840840- repo.create_record(&collection, &rkey1, make_test_cid(1))
841841- .await
842842- .unwrap();
843843- repo.create_record(&collection, &rkey2, make_test_cid(2))
844844- .await
845845- .unwrap();
846846- repo.create_record(&collection, &rkey3, make_test_cid(3))
11141114+11151115+ let did = Did::new("did:plc:test").unwrap();
11161116+ let signing_key = k256::ecdsa::SigningKey::random(&mut rand::rngs::OsRng);
11171117+11181118+ let create_ops = vec![
11191119+ RecordWriteOp::Create {
11201120+ collection: collection.clone(),
11211121+ rkey: rkey1.clone(),
11221122+ record: make_test_record(1),
11231123+ },
11241124+ RecordWriteOp::Create {
11251125+ collection: collection.clone(),
11261126+ rkey: rkey2.clone(),
11271127+ record: make_test_record(2),
11281128+ },
11291129+ RecordWriteOp::Create {
11301130+ collection: collection.clone(),
11311131+ rkey: rkey3.clone(),
11321132+ record: make_test_record(3),
11331133+ },
11341134+ ];
11351135+11361136+ let (_, commit_data) = repo
11371137+ .create_commit(
11381138+ &create_ops,
11391139+ &did,
11401140+ Some(repo.current_commit_cid().clone()),
11411141+ &signing_key,
11421142+ )
8471143 .await
8481144 .unwrap();
849114511461146+ // Get the CID of existing1 so we can verify it changed
11471147+ let old_cid1 = repo.get_record(&collection, &rkey1).await.unwrap().unwrap();
11481148+11491149+ repo.apply_commit(commit_data).await.unwrap();
11501150+8501151 // Batch operation: create new, update existing, delete existing
11521152+ let new_rkey = RecordKey(Rkey::new("new1").unwrap());
8511153 let ops = vec![
852852- WriteOp::Create {
853853- key: format!("{}/{}", collection.as_ref(), "new1").into(),
854854- cid: make_test_cid(10),
11541154+ RecordWriteOp::Create {
11551155+ collection: collection.clone(),
11561156+ rkey: new_rkey.clone(),
11571157+ record: make_test_record(10),
8551158 },
856856- WriteOp::Update {
857857- key: format!("{}/{}", collection.as_ref(), "existing1").into(),
858858- cid: make_test_cid(11),
11591159+ RecordWriteOp::Update {
11601160+ collection: collection.clone(),
11611161+ rkey: rkey1.clone(),
11621162+ record: make_test_record(11),
8591163 prev: None,
8601164 },
861861- WriteOp::Delete {
862862- key: format!("{}/{}", collection.as_ref(), "existing2").into(),
11651165+ RecordWriteOp::Delete {
11661166+ collection: collection.clone(),
11671167+ rkey: rkey2.clone(),
8631168 prev: None,
8641169 },
8651170 ];
8661171867867- let diff = repo.create_writes(&ops).await.unwrap();
868868- assert_eq!(diff.creates.len(), 1);
869869- assert_eq!(diff.updates.len(), 1);
870870- assert_eq!(diff.deletes.len(), 1);
11721172+ let (repo_ops, _commit_data) = repo
11731173+ .create_commit(
11741174+ &ops,
11751175+ &did,
11761176+ Some(repo.current_commit_cid().clone()),
11771177+ &signing_key,
11781178+ )
11791179+ .await
11801180+ .unwrap();
11811181+11821182+ assert_eq!(repo_ops.len(), 3);
87111838721184 // Verify final state
873873- let new_rkey = RecordKey(Rkey::new("new1").unwrap());
874874- assert_eq!(
875875- repo.get_record(&collection, &new_rkey).await.unwrap(),
876876- Some(make_test_cid(10))
877877- );
878878- assert_eq!(
879879- repo.get_record(&collection, &rkey1).await.unwrap(),
880880- Some(make_test_cid(11))
11851185+ let new_cid = repo.get_record(&collection, &new_rkey).await.unwrap();
11861186+ assert!(new_cid.is_some(), "new record should exist");
11871187+11881188+ let updated_cid1 = repo.get_record(&collection, &rkey1).await.unwrap();
11891189+ assert!(updated_cid1.is_some(), "updated record should exist");
11901190+ assert_ne!(
11911191+ updated_cid1.unwrap(),
11921192+ old_cid1,
11931193+ "record should have new CID"
8811194 );
11951195+8821196 assert_eq!(repo.get_record(&collection, &rkey2).await.unwrap(), None);
883883- assert_eq!(
884884- repo.get_record(&collection, &rkey3).await.unwrap(),
885885- Some(make_test_cid(3))
11971197+ assert!(
11981198+ repo.get_record(&collection, &rkey3)
11991199+ .await
12001200+ .unwrap()
12011201+ .is_some()
8861202 );
8871203 }
8881204}
+17
crates/jacquard-repo/src/storage/file.rs
···144144 }
145145 Ok(results)
146146 }
147147+148148+ async fn apply_commit(&self, commit: crate::repo::CommitData) -> Result<()> {
149149+ let mut store = self.blocks.write().unwrap();
150150+151151+ // First, insert all new blocks
152152+ for (cid, data) in commit.blocks {
153153+ store.insert(cid, data);
154154+ }
155155+156156+ // Then, delete all garbage-collected blocks
157157+ for cid in commit.deleted_cids {
158158+ store.remove(&cid);
159159+ }
160160+161161+ *self.dirty.write().unwrap() = true;
162162+ Ok(())
163163+ }
147164}
148165149166#[cfg(test)]
+5
crates/jacquard-repo/src/storage/layered.rs
···110110111111 Ok(results)
112112 }
113113+114114+ async fn apply_commit(&self, commit: crate::repo::CommitData) -> Result<()> {
115115+ // All operations go to writable layer only (base layer is read-only)
116116+ self.writable.apply_commit(commit).await
117117+ }
113118}
114119115120#[cfg(test)]
+16
crates/jacquard-repo/src/storage/memory.rs
···119119 }
120120 Ok(results)
121121 }
122122+123123+ async fn apply_commit(&self, commit: crate::repo::CommitData) -> Result<()> {
124124+ let mut store = self.blocks.write().unwrap();
125125+126126+ // First, insert all new blocks
127127+ for (cid, data) in commit.blocks {
128128+ store.insert(cid, data);
129129+ }
130130+131131+ // Then, delete all garbage-collected blocks
132132+ for cid in commit.deleted_cids {
133133+ store.remove(&cid);
134134+ }
135135+136136+ Ok(())
137137+ }
122138}
123139124140#[cfg(test)]
+13-2
crates/jacquard-repo/src/storage/mod.rs
···88///
99/// Provides CID-keyed block storage for MST nodes, commits, and record data.
1010/// Implementations might use:
1111-/// - In-memory HashMap ([`MemoryBlockStore`](memory::MemoryBlockStore))
1212-/// - CAR file ([`FileBlockStore`](file::FileBlockStore))
1111+/// - In-memory HashMap ([`MemoryBlockStore`])
1212+/// - CAR file ([`FileBlockStore`])
1313/// - SQLite/RocksDB (user-provided)
1414/// - Remote HTTP storage (user-provided)
1515///
···7777 ///
7878 /// Returns a vec of the same length as the input, with `None` for missing blocks.
7979 async fn get_many(&self, cids: &[IpldCid]) -> Result<Vec<Option<Bytes>>>;
8080+8181+ /// Apply a commit (atomic write + delete)
8282+ ///
8383+ /// Performs validated commit operations on the underlying storage:
8484+ /// - Persists all blocks from `commit.blocks`
8585+ /// - Deletes blocks listed in `commit.deleted_cids` (garbage collection)
8686+ ///
8787+ /// This should be atomic where possible - either both operations succeed or both fail.
8888+ /// For implementations that don't support atomic operations, writes should happen first,
8989+ /// then deletes.
9090+ async fn apply_commit(&self, commit: crate::repo::CommitData) -> Result<()>;
8091}
81928293pub mod file;