···3030 [ ("l", match node.l with Some l -> `Link l | None -> `Null)
3131 ; ("e", `Array (Array.of_list (List.map encode_entry_raw node.e))) ] )
32323333-type node_hydrated =
3333+type node =
3434 { layer: int
3535- ; mutable left: node_hydrated option
3636- ; mutable entries: entry_hydrated list }
3535+ ; mutable left: node option Lwt.t Lazy.t
3636+ ; mutable entries: entry list }
37373838-and entry_hydrated =
3939- {layer: int; key: string; value: Cid.t; right: node_hydrated option}
3838+and entry =
3939+ {layer: int; key: string; value: Cid.t; right: node option Lwt.t Lazy.t}
4040+4141+let ( let*? ) lazy_opt_lwt f =
4242+ let%lwt result = Lazy.force lazy_opt_lwt in
4343+ f result
4444+4545+let ( >>? ) lazy_opt_lwt f =
4646+ let%lwt result = Lazy.force lazy_opt_lwt in
4747+ f result
40484149(* figures out where to put an entry in or below a hydrated node, returns new node *)
4242-let rec insert_entry node entry : node_hydrated Lwt.t =
5050+let rec insert_entry node entry : node Lwt.t =
4351 let entry_layer = Util.leading_zeros_on_hash entry.key in
4452 (* as long as node layer <= entry layer, create a new node above node
4553 until we have a node at the correct height for the entry to be inserted *)
···4755 if layer >= entry_layer then node
4856 else
4957 build_insert_node
5050- {layer= layer + 1; left= Some node; entries= []}
5858+ {layer= layer + 1; left= lazy (Lwt.return_some node); entries= []}
5159 (layer + 1)
5260 in
5361 let insert_node = build_insert_node node node.layer in
5462 (* if entry is below node, recursively insert into node's left subtree *)
5563 if entry_layer < insert_node.layer then
5656- match (insert_node.entries, insert_node.left) with
6464+ let*? left = insert_node.left in
6565+ match (insert_node.entries, left) with
5766 | [], None ->
5867 failwith "found totally empty mst node"
5968 | [], Some left ->
6060- node.left <- Some (Lwt_main.run (insert_entry left entry)) ;
6969+ let%lwt left_inserted = insert_entry left entry in
7070+ node.left <- lazy (Lwt.return_some left_inserted) ;
6171 Lwt.return insert_node
6272 | _ ->
6373 Lwt.return insert_node
···8696 in
8797 aux node.entries
88988989-(* hydrates a list of entries with their keys; layer and right value are placeholders *)
9090-let hydrate_entries_keys_only node =
9191- node.e
9999+(* from a list of raw entries, produces a list of their keys *)
100100+let entries_to_keys entries =
101101+ entries
92102 |> List.fold_left
9393- (fun (prev_path, entries) entry ->
9494- let prefix = String.sub prev_path 0 entry.p in
103103+ (fun keys entry ->
104104+ let prefix =
105105+ match keys with [] -> "" | prev :: _ -> String.sub prev 0 entry.p
106106+ in
95107 let path = String.concat "" [prefix; Bytes.to_string entry.k] in
9696- Util.ensure_valid_key path ;
9797- (path, entries @ [{layer= 0; key= path; value= entry.v; right= None}]) )
9898- ("", [])
9999- |> snd
108108+ Util.ensure_valid_key path ; path :: keys )
109109+ []
110110+ |> List.rev
100111101112module Make (Store : Storage.Writable_blockstore) = struct
102113 type bs = Store.t
···106117 let create blockstore root = {blockstore; root}
107118108119 (* decodes a node retrieved from the blockstore *)
109109- let decode_block b : node_raw =
120120+ let decode_block_raw b : node_raw =
110121 match Dag_cbor.decode b with
111122 | `Map node ->
112123 if not (StringMap.mem "e" node) then
···157168 | _ ->
158169 raise (Invalid_argument "invalid block")
159170160160- (* retrieves & decodes a node by cid *)
161161- let retrieve_node t cid : node_raw option Lwt.t =
171171+ let retrieve_node_raw t cid : node_raw option Lwt.t =
162172 match%lwt Store.get_bytes t.blockstore cid with
163173 | Some bytes ->
164164- Lwt.return_some (decode_block bytes)
174174+ bytes |> decode_block_raw |> Lwt.return_some
165175 | None ->
166176 Lwt.return_none
167177178178+ (* retrieves & decodes a node by cid *)
179179+ let rec retrieve_node t cid : node option Lwt.t =
180180+ match%lwt retrieve_node_raw t cid with
181181+ | Some raw ->
182182+ hydrate_node t raw |> Lwt.map Option.some
183183+ | None ->
184184+ Lwt.return_none
185185+186186+ and retrieve_node_lazy t cid = lazy (retrieve_node t cid)
187187+188188+ (* hydrates a raw node *)
189189+ and hydrate_node t node_raw : node Lwt.t =
190190+ let left =
191191+ match node_raw.l with
192192+ | Some l ->
193193+ retrieve_node_lazy t l
194194+ | None ->
195195+ lazy Lwt.return_none
196196+ in
197197+ let%lwt layer = get_node_height t node_raw in
198198+ let entries =
199199+ List.fold_left
200200+ (fun entries entry ->
201201+ let prefix =
202202+ match entries with
203203+ | [] ->
204204+ ""
205205+ | prev :: _ ->
206206+ String.sub prev.key 0 entry.p
207207+ in
208208+ let path = String.concat "" [prefix; Bytes.to_string entry.k] in
209209+ Util.ensure_valid_key path ;
210210+ let right =
211211+ match entry.t with
212212+ | Some r ->
213213+ retrieve_node_lazy t r
214214+ | None ->
215215+ lazy Lwt.return_none
216216+ in
217217+ {layer; key= path; value= entry.v; right} :: entries )
218218+ [] node_raw.e
219219+ in
220220+ Lwt.return {layer; left; entries}
221221+168222 (* returns the layer of a node *)
169169- let rec get_node_height t node : int Lwt.t =
223223+ and get_node_height t node : int Lwt.t =
170224 match (node.l, node.e) with
171225 | None, [] ->
172226 Lwt.return 0
173227 | Some left, [] -> (
174174- match%lwt retrieve_node t left with
228228+ match%lwt retrieve_node_raw t left with
175229 | Some node ->
176230 let%lwt height = get_node_height t node in
177231 Lwt.return (height + 1)
···188242 let traverse t fn : unit Lwt.t =
189243 let rec traverse node =
190244 let%lwt () =
191191- match node.l with
192192- | Some cid -> (
193193- match%lwt retrieve_node t cid with
194194- | Some node ->
195195- traverse node
196196- | None ->
197197- Lwt.return_unit )
198198- | None ->
199199- Lwt.return_unit
245245+ let*? left = node.left in
246246+ match left with Some l -> traverse l | None -> Lwt.return_unit
200247 in
201201- ignore
202202- (List.fold_left
203203- (fun prev_path entry ->
204204- let prefix = String.sub prev_path 0 entry.p in
205205- let path = String.concat "" [prefix; Bytes.to_string entry.k] in
206206- fn path entry.v ; path )
207207- "" node.e ) ;
248248+ List.iter (fun entry -> fn entry.key entry.value) node.entries ;
208249 Lwt.return_unit
209250 in
210251 match%lwt retrieve_node t t.root with
···221262 in
222263 Lwt.return map
223264224224- (* produces a hydrated mst from a map of key -> cid *)
225225- let hydrate_from_map t map : Cid.t Lwt.t =
265265+ (* produces a cid and cbor-encoded bytes for this mst *)
266266+ let serialize t map : (Cid.t * bytes) Lwt.t =
226267 let keys =
227268 map |> StringMap.bindings |> List.map fst |> List.sort String.compare
228269 in
229270 let entry_for_key key =
230271 let value = StringMap.find key map in
231272 let height = Util.leading_zeros_on_hash key in
232232- {layer= height; key; value; right= None}
273273+ {layer= height; key; value; right= lazy Lwt.return_none}
233274 in
234275 let root =
235276 { layer= keys |> List.hd |> Util.leading_zeros_on_hash
236277 ; entries= []
237237- ; left= None }
278278+ ; left= lazy Lwt.return_none }
238279 in
239280 List.iter
240281 (fun key -> ignore (insert_entry root (entry_for_key key)))
241282 (List.tl keys) ;
242242- let rec finalize node : Cid.t Lwt.t =
243243- let left =
244244- match node.left with
283283+ let rec finalize node : (Cid.t * bytes) Lwt.t =
284284+ let%lwt left =
285285+ node.left
286286+ >>? function
245287 | Some l ->
246246- Some (Lwt_main.run (finalize l))
288288+ let%lwt cid, _ = finalize l in
289289+ Lwt.return_some cid
247290 | None ->
248248- None
291291+ Lwt.return_none
249292 in
250293 let last_key = ref "" in
251251- let mst_entries =
252252- List.map
294294+ let%lwt mst_entries =
295295+ Lwt_list.map_s
253296 (fun entry ->
254254- let right =
255255- match entry.right with
297297+ let%lwt right =
298298+ entry.right
299299+ >>? function
256300 | Some r ->
257257- Some (Lwt_main.run (finalize r))
301301+ let%lwt cid, _ = finalize r in
302302+ Lwt.return (Some cid)
258303 | None ->
259259- None
304304+ Lwt.return None
260305 in
261306 let prefix_len = Util.shared_prefix_length !last_key entry.key in
262307 last_key := entry.key ;
263263- { k=
264264- Bytes.of_string
265265- (String.sub entry.key prefix_len
266266- (String.length entry.key - prefix_len) )
267267- ; p= prefix_len
268268- ; v= entry.value
269269- ; t= right } )
308308+ Lwt.return
309309+ { k=
310310+ Bytes.of_string
311311+ (String.sub entry.key prefix_len
312312+ (String.length entry.key - prefix_len) )
313313+ ; p= prefix_len
314314+ ; v= entry.value
315315+ ; t= right } )
270316 node.entries
271317 in
272272- let mst_node = {l= left; e= mst_entries} in
273273- let encoded = Dag_cbor.encode (encode_node_raw mst_node) in
318318+ let encoded =
319319+ Dag_cbor.encode (encode_node_raw {l= left; e= mst_entries})
320320+ in
274321 let cid = Cid.create Dcbor encoded in
275322 let%lwt () = Store.put_block t.blockstore cid encoded in
276276- Lwt.return cid
323323+ Lwt.return (cid, encoded)
277324 in
278325 finalize root
279326280327 (* returns cids and blocks that form the path from a given node to a given entry *)
281328 let rec path_to_entry t node key : (Cid.t * bytes) list Lwt.t =
282282- let%lwt root_bytes = Store.get_bytes t node in
329329+ let%lwt root_bytes = Store.get_bytes t.blockstore node in
283330 let%lwt root =
284331 match root_bytes with
285332 | None ->
286333 Lwt.return_none
287334 | Some bytes ->
288288- Lwt.return_some (decode_block bytes)
335335+ Lwt.return_some (decode_block_raw bytes)
289336 in
290337 let path_tail = [(node, Option.get root_bytes)] in
291338 (* if there is a left child, try to find a path through the left subtree *)
···310357 Lwt.return path
311358 | None -> (
312359 (* if a left subtree path couldn't be found, find the entry whose right subtree this key would belong to *)
313313- let root' = Option.get root in
314314- let entries_keys = hydrate_entries_keys_only root' in
315315- let entries_len = List.length root'.e in
360360+ let entries = (Option.get root).e in
361361+ let entries_keys = entries_to_keys entries in
362362+ let entries_len = List.length entries in
316363 let entry_index =
317317- match List.find_index (fun e -> e.key >= key) entries_keys with
364364+ match List.find_index (fun e -> e >= key) entries_keys with
318365 | Some index ->
319366 index
320367 | None ->
···325372 | _
326373 (* because entries[entry_index] might turn out to be the entry we're looking for *)
327374 when entry_index < entries_len
328328- && (List.nth entries_keys entry_index).key = key ->
375375+ && List.nth entries_keys entry_index = key ->
329376 Lwt.return path_tail
330377 | _ -> (
331378 (* otherwise, we continue down the right subtree of the entry before entry_index *)
332332- match Util.last root'.e with
379379+ match Util.last entries with
333380 | Some last when last.t != None ->
334381 let%lwt path_through_right =
335382 path_to_entry t (Option.get last.t) key
···339386 Lwt.return path_tail ) )
340387341388 (* returns all mst entries in order for a car stream *)
342342- let to_car_stream t : (Cid.t * bytes) Seq.t =
389389+ let to_blocks_seq t : (Cid.t * bytes) Lwt_seq.t =
343390 let module M = struct
344391 type stage =
392392+ (* currently walking nodes *)
345393 | Nodes of
346346- (* currently walking nodes *)
347347-348394 { next: Cid.t list (* next cids to fetch *)
349395 ; fetched: (Cid.t * bytes) list (* fetched cids and their bytes *)
350396 ; leaves: Cid.Set.t (* seen leaf cids *) }
351351- | Leaves of
352352- (* done walking nodes, streaming accumulated leaves *)
353353- (Cid.t * bytes) list
397397+ (* done walking nodes, streaming accumulated leaves *)
398398+ | Leaves of (Cid.t * bytes) list
354399 | Done
355400 end in
356401 let open M in
···359404 in
360405 let rec step = function
361406 | Done ->
362362- None
407407+ Lwt.return_none
363408 (* node has been fetched, can now be yielded *)
364409 | Nodes ({fetched= (cid, bytes) :: rest; _} as s) ->
365365- Some ((cid, bytes), Nodes {s with fetched= rest})
410410+ Lwt.return_some ((cid, bytes), Nodes {s with fetched= rest})
366411 (* need to fetch next nodes *)
367412 | Nodes {next; fetched= []; leaves} ->
368413 if List.is_empty next then (
369414 (* finished traversing nodes, time to switch to leaves *)
370415 let leaves_list = Cid.Set.to_list leaves in
371371- let leaves_bm =
372372- Lwt_main.run (Store.get_blocks t.blockstore leaves_list)
373373- in
416416+ let%lwt leaves_bm = Store.get_blocks t.blockstore leaves_list in
374417 if leaves_bm.missing <> [] then failwith "missing mst leaf blocks" ;
375418 let leaves_nodes = Storage.Block_map.entries leaves_bm.blocks in
376419 match leaves_nodes with
377420 | [] ->
378421 (* with Done, we don't care about the first pair element *)
379379- Some (Obj.magic (), Done)
422422+ Lwt.return_some (Obj.magic (), Done)
380423 | _ ->
381424 (* it's leafin time *)
382425 step (Leaves leaves_nodes) )
383426 else
384427 (* go ahead and fetch the next nodes *)
385385- let bm = Lwt_main.run (Store.get_blocks t.blockstore next) in
428428+ let%lwt bm = Store.get_blocks t.blockstore next in
386429 if bm.missing <> [] then failwith "missing mst nodes" ;
387430 let fetched, next', leaves' =
388431 List.fold_left
···391434 (* we should be safe to do this since we just got the cids from the blockmap *)
392435 Storage.Block_map.get cid bm.blocks |> Option.get
393436 in
394394- let node = decode_block bytes in
437437+ let node = decode_block_raw bytes in
395438 let nxt' =
396439 List.fold_left
397440 (* node.entries.map(e => e.right) *)
398441 (fun n e -> match e.t with Some c -> c :: n | None -> n )
399442 (* start with [node.left, ...nxt] if node has a left subtree *)
443443+ (* next' looks like [..., n_2.r_2, n_2.l, n_1.r_n, ..., n_1.r_1, n_1.l]) *)
400444 ( match node.l with
401445 | Some l ->
402446 l :: nxt
···420464 (* if we're onto yielding leaves, do that *)
421465 | Leaves ((cid, bytes) :: rest) ->
422466 let next = if rest = [] then Done else Leaves rest in
423423- Some ((cid, bytes), next)
467467+ Lwt.return_some ((cid, bytes), next)
424468 (* once we're out of leaves, we're done *)
425469 | Leaves [] ->
426426- Some (Obj.magic (), Done)
470470+ Lwt.return_some (Obj.magic (), Done)
427471 in
428428- Seq.unfold step init_state
472472+ Lwt_seq.unfold_lwt step init_state
429473end