···11+open Storage
12module StringMap = Dag_cbor.StringMap
2334type node_raw =
···37383839and entry =
3940 {layer: int; key: string; value: Cid.t; right: node option Lwt.t Lazy.t}
4141+4242+type node_or_entry = Node of node | Entry of entry
40434144let ( let*? ) lazy_opt_lwt f =
4245 let%lwt result = Lazy.force lazy_opt_lwt in
···8588 node.entries <- node.entries @ [entry] ;
8689 Lwt.return node )
87908888-(* helper to find the entry with a given key in a hydrated node *)
8989-let find_entry_nonrec node key =
9090- let rec aux entries =
9191+(* returns the index of the first entry in an interspersed list that's gte a given key *)
9292+let find_gte_entry_index entries key : int =
9393+ let rec aux entries index =
9194 match entries with
9295 | [] ->
9393- None
9494- | e :: es ->
9595- if e.key = key then Some e else if e.key > key then None else aux es
9696+ (* will be entries length when not found *)
9797+ index
9898+ | e :: es -> (
9999+ match e with
100100+ | Entry entry when entry.key >= key ->
101101+ index
102102+ | _ ->
103103+ aux es (index + 1) )
96104 in
9797- aux node.entries
105105+ aux entries 0
981069999-(* from a list of raw entries, produces a list of their keys *)
100100-let entries_to_keys entries =
101101- entries
102102- |> List.fold_left
103103- (fun keys entry ->
104104- let prefix =
105105- match keys with [] -> "" | prev :: _ -> String.sub prev 0 entry.p
106106- in
107107- let path = String.concat "" [prefix; Bytes.to_string entry.k] in
108108- Util.ensure_valid_key path ; path :: keys )
109109- []
110110- |> List.rev
107107+(* produces a cid and cbor-encoded bytes for a given tree *)
108108+let serialize node : (Cid.t * bytes) Lwt.t =
109109+ let sorted_entries =
110110+ List.sort (fun a b -> String.compare a.key b.key) node.entries
111111+ in
112112+ let rec aux node =
113113+ let%lwt left =
114114+ node.left
115115+ >>? function
116116+ | Some l ->
117117+ let%lwt cid, _ = aux l in
118118+ Lwt.return_some cid
119119+ | None ->
120120+ Lwt.return_none
121121+ in
122122+ let last_key = ref "" in
123123+ let%lwt mst_entries =
124124+ Lwt_list.map_s
125125+ (fun entry ->
126126+ let%lwt right =
127127+ entry.right
128128+ >>? function
129129+ | Some r ->
130130+ let%lwt cid, _ = aux r in
131131+ Lwt.return (Some cid)
132132+ | None ->
133133+ Lwt.return None
134134+ in
135135+ let prefix_len = Util.shared_prefix_length !last_key entry.key in
136136+ last_key := entry.key ;
137137+ Lwt.return
138138+ { k=
139139+ Bytes.of_string
140140+ (String.sub entry.key prefix_len
141141+ (String.length entry.key - prefix_len) )
142142+ ; p= prefix_len
143143+ ; v= entry.value
144144+ ; t= right } )
145145+ node.entries
146146+ in
147147+ let encoded = Dag_cbor.encode (encode_node_raw {l= left; e= mst_entries}) in
148148+ let cid = Cid.create Dcbor encoded in
149149+ Lwt.return (cid, encoded)
150150+ in
151151+ aux {node with entries= sorted_entries}
111152112112-module Make (Store : Storage.Writable_blockstore) = struct
153153+module Make (Store : Writable_blockstore) = struct
113154 type bs = Store.t
114155115156 type t = {blockstore: bs; root: Cid.t}
···168209 | _ ->
169210 raise (Invalid_argument "invalid block")
170211212212+ (* retrieves a raw node by cid *)
171213 let retrieve_node_raw t cid : node_raw option Lwt.t =
172214 match%lwt Store.get_bytes t.blockstore cid with
173215 | Some bytes ->
···175217 | None ->
176218 Lwt.return_none
177219178178- (* retrieves & decodes a node by cid *)
220220+ (* retrieves & hydrates a node by cid *)
179221 let rec retrieve_node t cid : node option Lwt.t =
180222 match%lwt retrieve_node_raw t cid with
181223 | Some raw ->
···183225 | None ->
184226 Lwt.return_none
185227228228+ (* lazy version of retrieve_node *)
186229 and retrieve_node_lazy t cid = lazy (retrieve_node t cid)
187230188231 (* hydrates a raw node *)
···261304 traverse t (fun path cid -> ignore (StringMap.add path cid map))
262305 in
263306 Lwt.return map
264264-265265- (* produces a cid and cbor-encoded bytes for this mst *)
266266- let serialize t map : (Cid.t * bytes) Lwt.t =
267267- let keys =
268268- map |> StringMap.bindings |> List.map fst |> List.sort String.compare
269269- in
270270- let entry_for_key key =
271271- let value = StringMap.find key map in
272272- let height = Util.leading_zeros_on_hash key in
273273- {layer= height; key; value; right= lazy Lwt.return_none}
274274- in
275275- let root =
276276- { layer= keys |> List.hd |> Util.leading_zeros_on_hash
277277- ; entries= []
278278- ; left= lazy Lwt.return_none }
279279- in
280280- List.iter
281281- (fun key -> ignore (insert_entry root (entry_for_key key)))
282282- (List.tl keys) ;
283283- let rec finalize node : (Cid.t * bytes) Lwt.t =
284284- let%lwt left =
285285- node.left
286286- >>? function
287287- | Some l ->
288288- let%lwt cid, _ = finalize l in
289289- Lwt.return_some cid
290290- | None ->
291291- Lwt.return_none
292292- in
293293- let last_key = ref "" in
294294- let%lwt mst_entries =
295295- Lwt_list.map_s
296296- (fun entry ->
297297- let%lwt right =
298298- entry.right
299299- >>? function
300300- | Some r ->
301301- let%lwt cid, _ = finalize r in
302302- Lwt.return (Some cid)
303303- | None ->
304304- Lwt.return None
305305- in
306306- let prefix_len = Util.shared_prefix_length !last_key entry.key in
307307- last_key := entry.key ;
308308- Lwt.return
309309- { k=
310310- Bytes.of_string
311311- (String.sub entry.key prefix_len
312312- (String.length entry.key - prefix_len) )
313313- ; p= prefix_len
314314- ; v= entry.value
315315- ; t= right } )
316316- node.entries
317317- in
318318- let encoded =
319319- Dag_cbor.encode (encode_node_raw {l= left; e= mst_entries})
320320- in
321321- let cid = Cid.create Dcbor encoded in
322322- let%lwt () = Store.put_block t.blockstore cid encoded in
323323- Lwt.return (cid, encoded)
324324- in
325325- finalize root
326307327308 (* returns cids and blocks that form the path from a given node to a given entry *)
328309 let rec path_to_entry t node key : (Cid.t * bytes) list Lwt.t =
329310 let%lwt root_bytes = Store.get_bytes t.blockstore node in
330330- let%lwt root =
311311+ let%lwt root_raw =
331312 match root_bytes with
332313 | None ->
333314 Lwt.return_none
334315 | Some bytes ->
335316 Lwt.return_some (decode_block_raw bytes)
336317 in
337337- let path_tail = [(node, Option.get root_bytes)] in
318318+ let%lwt root =
319319+ match root_raw with
320320+ | None ->
321321+ Lwt.return_none
322322+ | Some root ->
323323+ hydrate_node t root |> Lwt.map Option.some
324324+ in
338325 (* if there is a left child, try to find a path through the left subtree *)
339326 let%lwt path_through_left =
340340- match root with
327327+ match root_raw with
341328 | None ->
342329 Lwt.return_some []
343343- | Some root -> (
344344- match root.l with
330330+ | Some raw -> (
331331+ match raw.l with
345332 | None ->
346333 Lwt.return_none
347334 | Some left -> (
···350337 Lwt.return_none
351338 | path ->
352339 (* Option.get is safe because root is Some only when root_bytes is Some *)
353353- Lwt.return_some (path @ path_tail) ) )
340340+ Lwt.return_some (path @ [(node, Option.get root_bytes)]) ) )
354341 in
355342 match path_through_left with
356343 | Some path ->
357344 Lwt.return path
358345 | None -> (
359346 (* if a left subtree path couldn't be found, find the entry whose right subtree this key would belong to *)
360360- let entries = (Option.get root).e in
361361- let entries_keys = entries_to_keys entries in
347347+ (* this branch is only reached when root/root_raw/root_bytes are not None;
348348+ if they were, path_through_left would be Some [] *)
349349+ let entries = (Option.get root).entries in
362350 let entries_len = List.length entries in
363351 let entry_index =
364364- match List.find_index (fun e -> e >= key) entries_keys with
352352+ match List.find_index (fun e -> e.key >= key) entries with
365353 | Some index ->
366354 index
367355 | None ->
368356 entries_len
369357 in
358358+ (* path_through_left is None -> root_bytes is Some *)
359359+ let path_tail = [(node, Option.get root_bytes)] in
370360 (* entry_index here is actually the entry to the right of the subtree the key would belong to *)
371361 match entry_index with
372362 | _
373363 (* because entries[entry_index] might turn out to be the entry we're looking for *)
374364 when entry_index < entries_len
375375- && List.nth entries_keys entry_index = key ->
365365+ && (List.nth entries entry_index).key = key ->
376366 Lwt.return path_tail
377367 | _ -> (
378368 (* otherwise, we continue down the right subtree of the entry before entry_index *)
379379- match Util.last entries with
380380- | Some last when last.t != None ->
369369+ (* path_through_left is None -> root_raw is Some *)
370370+ match Util.last (Option.get root_raw).e with
371371+ | Some last when last.t <> None ->
381372 let%lwt path_through_right =
373373+ (* when last.t <> None *)
382374 path_to_entry t (Option.get last.t) key
383375 in
384376 Lwt.return (path_through_right @ path_tail)
···415407 let leaves_list = Cid.Set.to_list leaves in
416408 let%lwt leaves_bm = Store.get_blocks t.blockstore leaves_list in
417409 if leaves_bm.missing <> [] then failwith "missing mst leaf blocks" ;
418418- let leaves_nodes = Storage.Block_map.entries leaves_bm.blocks in
410410+ let leaves_nodes = Block_map.entries leaves_bm.blocks in
419411 match leaves_nodes with
420412 | [] ->
421413 (* with Done, we don't care about the first pair element *)
···432424 (fun (acc, nxt, lvs) cid ->
433425 let bytes =
434426 (* we should be safe to do this since we just got the cids from the blockmap *)
435435- Storage.Block_map.get cid bm.blocks |> Option.get
427427+ Block_map.get cid bm.blocks |> Option.get
436428 in
437429 let node = decode_block_raw bytes in
438430 let nxt' =
···470462 Lwt.return_some (Obj.magic (), Done)
471463 in
472464 Lwt_seq.unfold_lwt step init_state
465465+466466+ (* returns all mst nodes needed to prove the value of a given key *)
467467+ let rec proof_for_key t root key : Block_map.t Lwt.t =
468468+ let e_rev = List.rev root.entries in
469469+ (* iterate in reverse because if the key doesn't exist at this level,
470470+ we need to search the "previous" node's right subtree *)
471471+ let rec find_proof entries_rev =
472472+ match entries_rev with
473473+ | [] ->
474474+ Lwt.return Block_map.empty
475475+ | e :: rest -> (
476476+ if e.key > key then find_proof rest
477477+ else if e.key = key then Lwt.return Block_map.empty
478478+ else
479479+ let*? right = e.right in
480480+ match right with
481481+ | Some r ->
482482+ proof_for_key t r key
483483+ | None ->
484484+ Lwt.return Block_map.empty )
485485+ in
486486+ let%lwt bm = find_proof e_rev in
487487+ let%lwt root_cid, root_bytes = serialize root in
488488+ Lwt.return (Block_map.set root_cid root_bytes bm)
489489+490490+ (* returns all mst nodes needed to prove the value of a given key's left sibling *)
491491+ let rec proof_for_left_sibling t root key : Block_map.t Lwt.t =
492492+ let e_rev = List.rev root.entries in
493493+ (* iterate in reverse for the same reason as proof_for_key *)
494494+ let rec find_proof entries_rev =
495495+ match entries_rev with
496496+ | [] ->
497497+ Lwt.return Block_map.empty
498498+ | e :: rest -> (
499499+ if e.key >= key then find_proof rest
500500+ else
501501+ let*? right = e.right in
502502+ match right with
503503+ | Some r ->
504504+ proof_for_left_sibling t r key
505505+ | None ->
506506+ Lwt.return Block_map.empty )
507507+ in
508508+ let%lwt bm = find_proof e_rev in
509509+ let%lwt root_cid, root_bytes = serialize root in
510510+ Lwt.return (Block_map.set root_cid root_bytes bm)
511511+512512+ (* returns all mst nodes needed to prove the value of a given key's right sibling *)
513513+ let rec proof_for_right_sibling t root key : Block_map.t Lwt.t =
514514+ (* unlike the other two, this doesn't iterate in reverse
515515+ because we can stop as soon as we're past the key *)
516516+ let rec find_proof ?(prev = None) entries =
517517+ match entries with
518518+ | [] -> (
519519+ (* end of entries, right sibling must be in the last entry's right subtree *)
520520+ match prev with
521521+ | Some e -> (
522522+ let*? right = e.right in
523523+ match right with
524524+ | Some r ->
525525+ proof_for_right_sibling t r key
526526+ | None ->
527527+ Lwt.return Block_map.empty )
528528+ | None ->
529529+ Lwt.return Block_map.empty )
530530+ | e :: rest ->
531531+ if e.key > key then
532532+ (* we're past target key; right sibling is in previous entry's right subtree *)
533533+ match prev with
534534+ | Some p -> (
535535+ let*? right = p.right in
536536+ match right with
537537+ | Some r ->
538538+ proof_for_right_sibling t r key
539539+ (* I don't think this should ever happen? *)
540540+ | None ->
541541+ Lwt.return Block_map.empty )
542542+ (* first entry is already greater than key; we're inside the sibling *)
543543+ | None ->
544544+ Lwt.return Block_map.empty
545545+ else if e.key = key then
546546+ (* found the entry, right sibling is in its right subtree *)
547547+ let*? right = e.right in
548548+ match right with
549549+ | Some r ->
550550+ proof_for_right_sibling t r key
551551+ | None ->
552552+ Lwt.return Block_map.empty
553553+ else (* e.key < key, keep searching *)
554554+ find_proof ~prev:(Some e) rest
555555+ in
556556+ let%lwt bm = find_proof root.entries in
557557+ let%lwt root_cid, root_bytes = serialize root in
558558+ Lwt.return (Block_map.set root_cid root_bytes bm)
559559+560560+ (* a covering proof is all mst nodes needed to prove the value of a given leaf
561561+ and its siblings to its immediate right and left (if applicable) *)
562562+ let get_covering_proof t root key : Block_map.t Lwt.t =
563563+ let%lwt proofs =
564564+ Lwt.all
565565+ [ proof_for_key t root key
566566+ ; proof_for_left_sibling t root key
567567+ ; proof_for_right_sibling t root key ]
568568+ in
569569+ Lwt.return
570570+ (List.fold_left
571571+ (fun acc proof -> Block_map.merge acc proof)
572572+ Block_map.empty proofs )
473573end