···1module String_map = Map.Make (String)
20000003let ordered_map_keys (m : 'a String_map.t) : string list =
4 let keys = String_map.bindings m |> List.map fst in
5- List.sort
6- (fun a b ->
7- let la = String.length a in
8- let lb = String.length b in
9- if la = lb then String.compare a b else compare la lb )
10- keys
1112let type_info_length len =
13 if len < 24 then 1
···195 | `Map m ->
196 let len = String_map.cardinal m in
197 write_type_and_argument t 5 (Int64.of_int len) ;
198- ordered_map_keys m
199- |> List.iter (fun k ->
200 write_string t k ;
201- write_value t (String_map.find k m) )
202 | `Link cid ->
203 write_cid t cid
204
···1module String_map = Map.Make (String)
23+(* sort map keys by length first, then lexicographically *)
4+let dag_cbor_key_compare a b =
5+ let la = String.length a in
6+ let lb = String.length b in
7+ if la = lb then String.compare a b else compare la lb
8+9let ordered_map_keys (m : 'a String_map.t) : string list =
10 let keys = String_map.bindings m |> List.map fst in
11+ List.sort dag_cbor_key_compare keys
12+13+(* returns bindings sorted in dag-cbor canonical order *)
14+let ordered_map_bindings (m : 'a String_map.t) : (string * 'a) list =
15+ String_map.bindings m |> List.sort (fun (a, _) (b, _) -> dag_cbor_key_compare a b)
01617let type_info_length len =
18 if len < 24 then 1
···200 | `Map m ->
201 let len = String_map.cardinal m in
202 write_type_and_argument t 5 (Int64.of_int len) ;
203+ ordered_map_bindings m
204+ |> List.iter (fun (k, v) ->
205 write_string t k ;
206+ write_value t v )
207 | `Link cid ->
208 write_cid t cid
209
+400-155
mist/lib/mst.ml
···208209 val proof_for_key : t -> Cid.t -> string -> Block_map.t Lwt.t
21000211 val leaf_count : t -> int Lwt.t
212213 val layer : t -> int Lwt.t
···246247 let create blockstore root = {blockstore; root}
2480000000000000000000000000000000000000249 (* retrieves a raw node by cid *)
250 let retrieve_node_raw t cid : node_raw option Lwt.t =
251 match%lwt Store.get_bytes t.blockstore cid with
···288 | None ->
289 lazy Lwt.return_none
290 in
0291 let entries =
292- List.fold_left
293- (fun (entries : entry list) entry ->
294 let prefix =
295- match entries with
296- | [] ->
297- ""
298- | prev :: _ ->
299- String.sub prev.key 0 entry.p
300 in
301 let path = String.concat "" [prefix; Bytes.to_string entry.k] in
302 Util.ensure_valid_key path ;
0303 let right =
304 match entry.t with
305 | Some r ->
···307 | None ->
308 lazy Lwt.return_none
309 in
310- ({layer; key= path; value= entry.v; right} : entry) :: entries )
311- [] node_raw.e
312 in
313 Lwt.return {layer; left; entries}
314···349 (* returns a map of key -> cid *)
350 let build_map t : Cid.t String_map.t Lwt.t =
351 let map = ref String_map.empty in
352- let%lwt () =
353- traverse t (fun path cid -> map := String_map.add path cid !map)
000000000000000000000000000000000000000000000354 in
355- Lwt.return !map
356357 (* returns all non-leaf mst node blocks in order for a car stream
358 leaf cids can be obtained via collect_nodes_and_leaves or leaves_of_root *)
···404 for each node: node block, left subtree, then for each entry: record, right subtree *)
405 let to_ordered_stream t : ordered_item Lwt_seq.t =
406 (* queue items: `Node cid to visit, `Leaf cid to yield *)
407- let rec step queue =
000000000000000000000000000000000408 match queue with
409 | [] ->
410 Lwt.return_none
411- | `Node cid :: rest -> (
412- let%lwt bytes_opt = Store.get_bytes t.blockstore cid in
413- match bytes_opt with
414- | None ->
415- step rest
416- | Some bytes ->
417- let node = decode_block_raw bytes in
418- (* queue items: left subtree, then for each entry: record then right subtree *)
419- let left_queue =
420- match node.l with Some l -> [`Node l] | None -> []
421- in
422- let entries_queue =
423- List.concat_map
424- (fun (e : entry_raw) ->
425- let right_queue =
426- match e.t with Some r -> [`Node r] | None -> []
427- in
428- `Leaf e.v :: right_queue )
429- node.e
430- in
431- let new_queue = left_queue @ entries_queue @ rest in
432- Lwt.return_some ((Node (cid, bytes) : ordered_item), new_queue) )
433 | `Leaf cid :: rest ->
434- Lwt.return_some ((Leaf cid : ordered_item), rest)
00000000000000000000000000000435 in
436- Lwt_seq.unfold_lwt step [`Node t.root]
437438 (* produces a cid and cbor-encoded bytes for a given tree *)
439 let serialize t node : (Cid.t * bytes, exn) Lwt_result.t =
440- let sorted_entries =
441- List.sort (fun (a : entry) b -> String.compare a.key b.key) node.entries
442- in
443 let rec aux node : (Cid.t * bytes) Lwt.t =
000000444 let%lwt left =
445 node.left
446 >>? function
···473 ; p= prefix_len
474 ; v= entry.value
475 ; t= right } )
476- node.entries
477 in
478 let encoded =
479 Dag_cbor.encode (encode_node_raw {l= left; e= mst_entries})
···485 | Error e ->
486 raise e
487 in
488- try%lwt Lwt.map Result.ok (aux {node with entries= sorted_entries})
489 with e -> Lwt.return_error e
490491 (* raw-node helpers for covering proofs: operate on stored bytes, not re-serialization *)
···545 let seq = interleave_raw raw keys in
546 let index = find_gte_leaf_index key seq in
547 let%lwt blocks =
548- match Util.at_index index seq with
549 | Some (Leaf (k, _, _)) when k = key ->
550 Lwt.return Block_map.empty
551 | Some (Leaf (_k, v_right, _)) -> (
552 let prev =
553- if index - 1 >= 0 then Util.at_index (index - 1) seq else None
554 in
555 match prev with
556 | Some (Tree c) ->
···587 proof_for_key t c key
588 | None -> (
589 let prev =
590- if index - 1 >= 0 then Util.at_index (index - 1) seq else None
591 in
592 match prev with
593 | Some (Tree c) ->
···602 None
603 in
604 let right_leaf =
605- match Util.at_index index seq with
606 | Some (Leaf (_, v_right, _)) ->
607 Some v_right
608 | _ ->
···634 in
635 Lwt.return (Block_map.set cid bytes blocks)
63600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000637 (* collects all node blocks (cid, bytes) and all leaf cids reachable from root
638 only traverses nodes; doesn't fetch leaf blocks
639 returns (nodes, visited, leaves) *)
640 let collect_nodes_and_leaves t :
641 ((Cid.t * bytes) list * Cid.Set.t * Cid.Set.t) Lwt.t =
642- let rec bfs (queue : Cid.t list) (visited : Cid.Set.t)
643- (nodes : (Cid.t * bytes) list) (leaves : Cid.Set.t) =
644 match queue with
645 | [] ->
646 Lwt.return (nodes, visited, leaves)
647- | cid :: rest -> (
648- if Cid.Set.mem cid visited then bfs rest visited nodes leaves
649- else
650- let%lwt bytes_opt = Store.get_bytes t.blockstore cid in
651- match bytes_opt with
652- | None ->
653- failwith ("missing mst node block: " ^ Cid.to_string cid)
654- | Some bytes ->
655- let raw = decode_block_raw bytes in
656- (* queue subtrees *)
657- let next_cids =
658- let acc = match raw.l with Some l -> [l] | None -> [] in
659- List.fold_left
660- (fun acc e ->
661- match e.t with Some r -> r :: acc | None -> acc )
662- acc raw.e
663- in
664- (* accumulate leaf cids *)
665- let leaves' =
666- List.fold_left (fun s e -> Cid.Set.add e.v s) leaves raw.e
667- in
668- let visited' = Cid.Set.add cid visited in
669- bfs
670- (List.rev_append next_cids rest)
671- visited' ((cid, bytes) :: nodes) leaves' )
000000672 in
673- bfs [t.root] Cid.Set.empty [] Cid.Set.empty
674675 (* list of all leaves belonging to a node and its children, ordered by key *)
676 let rec leaves_of_node n : (string * Cid.t) list Lwt.t =
677 let%lwt left_leaves =
678 n.left >>? function Some l -> leaves_of_node l | None -> Lwt.return []
679 in
680- let sorted_entries =
681- List.sort
682- (fun (a : entry) (b : entry) -> String.compare a.key b.key)
683- n.entries
00684 in
685- let%lwt leaves =
686- Lwt_list.fold_left_s
687- (fun acc e ->
688 let%lwt right_leaves =
689 e.right
690 >>? function Some r -> leaves_of_node r | None -> Lwt.return []
691 in
692- Lwt.return (acc @ [(e.key, e.value)] @ right_leaves) )
693- left_leaves sorted_entries
694 in
695- Lwt.return leaves
696697 (* list of all leaves in the mst *)
698 let leaves_of_root t : (string * Cid.t) list Lwt.t =
···704705 (* returns a count of all leaves in the mst *)
706 let leaf_count t : int Lwt.t =
707- match%lwt retrieve_node t t.root with
708- | None ->
709- failwith "root cid not found in repo store"
710- | Some root ->
711- let rec count (n : node) : int Lwt.t =
712- let%lwt left_count =
713- n.left >>? function Some l -> count l | None -> Lwt.return 0
0714 in
715- let%lwt right_counts =
716- Lwt_list.map_s
717- (fun (e : entry) ->
718- e.right >>? function Some r -> count r | None -> Lwt.return 0 )
719- n.entries
000000000000000720 in
721- let sum_right = List.fold_left ( + ) 0 right_counts in
722- Lwt.return (left_count + List.length n.entries + sum_right)
723- in
724- count root
725726 (* returns height of mst root *)
727 let layer t : int Lwt.t =
···733734 (* returns all nodes sorted by cid *)
735 let all_nodes t : (Cid.t * bytes) list Lwt.t =
736- let rec bfs (queue : Cid.t list) (visited : Cid.Set.t)
737- (nodes : (Cid.t * bytes) list) =
738 match queue with
739 | [] ->
740 Lwt.return nodes
741- | cid :: rest -> (
742- if Cid.Set.mem cid visited then bfs rest visited nodes
743- else
744- match%lwt Store.get_bytes t.blockstore cid with
745- | None ->
746- failwith ("missing mst node block: " ^ Cid.to_string cid)
747- | Some bytes ->
748- let raw = decode_block_raw bytes in
749- let next_cids =
750- let acc = match raw.l with Some l -> [l] | None -> [] in
751- List.fold_left
752- (fun acc e ->
753- match e.t with Some r -> r :: acc | None -> acc )
754- acc raw.e
755- in
756- let visited' = Cid.Set.add cid visited in
757- bfs
758- (List.rev_append next_cids rest)
759- visited' ((cid, bytes) :: nodes) )
00000000760 in
761- let%lwt nodes = bfs [t.root] Cid.Set.empty [] in
762 let sorted =
763 List.sort
764 (fun (a, _) (b, _) -> String.compare (Cid.to_string a) (Cid.to_string b))
···803 let root_layer =
804 List.fold_left (fun acc (_, _, lz) -> max acc lz) 0 with_layers
805 in
806- let on_layer =
807- List.filter (fun (_, _, lz) -> lz = root_layer) with_layers
808- |> List.map (fun (k, v, _) -> (k, v))
809- in
810- (* left group is keys below first on-layer key *)
811- let left_group =
812- match on_layer with
813- | (k0, _) :: _ ->
814- List.filter
815- (fun (k, _, lz) -> lz < root_layer && k < k0)
816- with_layers
817- |> List.map (fun (k, v, _) -> (k, v))
818- | [] ->
819- []
00000000820 in
821 let%lwt l_cid =
822 match left_group with
···838 let%lwt c = wrap cid child_layer in
839 Lwt.return_some c
840 in
841- (* compute right groups aligned to on-layer entries *)
842- let rec right_groups acc rest =
843- match rest with
844- | [] ->
845- List.rev acc
846- | (k, _) :: tl ->
847- let upper =
848- match tl with (k2, _) :: _ -> Some k2 | [] -> None
849- in
850- let grp =
851- List.filter
852- (fun (k', _, lz) ->
853- lz < root_layer && k' > k
854- && match upper with Some ku -> k' < ku | None -> true )
855- with_layers
856- |> List.map (fun (k', v', _) -> (k', v'))
857- in
858- right_groups ((k, grp) :: acc) tl
859 in
860- let rights = right_groups [] on_layer in
861 let%lwt t_links =
862 Lwt_list.map_s
863 (fun (_k, grp) ->
···208209 val proof_for_key : t -> Cid.t -> string -> Block_map.t Lwt.t
210211+ val proof_for_keys : t -> Cid.t -> string list -> Block_map.t Lwt.t
212+213 val leaf_count : t -> int Lwt.t
214215 val layer : t -> int Lwt.t
···248249 let create blockstore root = {blockstore; root}
250251+ let entries_are_sorted (entries : entry list) : bool =
252+ let rec aux prev_key = function
253+ | [] ->
254+ true
255+ | e :: tl ->
256+ if String.compare prev_key e.key <= 0 then aux e.key tl else false
257+ in
258+ match entries with [] -> true | e :: tl -> aux e.key tl
259+260+ (* we try to batch reads from the blockstore when possible
261+ 200 seems like a sane upper limit for that *)
262+ let batch_size = 200
263+264+ let take_n n lst =
265+ if n <= 0 then ([], lst)
266+ else
267+ let rec loop acc remaining xs =
268+ match (remaining, xs) with
269+ | 0, _ ->
270+ (List.rev acc, xs)
271+ | _, [] ->
272+ (List.rev acc, [])
273+ | _, x :: xs' ->
274+ loop (x :: acc) (remaining - 1) xs'
275+ in
276+ loop [] n lst
277+278+ let get_blocks_exn (t : t) (cids : Cid.t list) : Block_map.t Lwt.t =
279+ if List.is_empty cids then Lwt.return Block_map.empty
280+ else
281+ let%lwt bm = Store.get_blocks t.blockstore cids in
282+ match bm.missing with
283+ | [] ->
284+ Lwt.return bm.blocks
285+ | missing :: _ ->
286+ failwith ("missing mst node block: " ^ Cid.to_string missing)
287+288 (* retrieves a raw node by cid *)
289 let retrieve_node_raw t cid : node_raw option Lwt.t =
290 match%lwt Store.get_bytes t.blockstore cid with
···327 | None ->
328 lazy Lwt.return_none
329 in
330+ let last_key = ref "" in
331 let entries =
332+ List.map
333+ (fun entry ->
334 let prefix =
335+ if entry.p = 0 then ""
336+ else if !last_key = "" then ""
337+ else String.sub !last_key 0 entry.p
00338 in
339 let path = String.concat "" [prefix; Bytes.to_string entry.k] in
340 Util.ensure_valid_key path ;
341+ last_key := path ;
342 let right =
343 match entry.t with
344 | Some r ->
···346 | None ->
347 lazy Lwt.return_none
348 in
349+ ({layer; key= path; value= entry.v; right} : entry) )
350+ node_raw.e
351 in
352 Lwt.return {layer; left; entries}
353···388 (* returns a map of key -> cid *)
389 let build_map t : Cid.t String_map.t Lwt.t =
390 let map = ref String_map.empty in
391+ let rec loop queue visited =
392+ match queue with
393+ | [] ->
394+ Lwt.return !map
395+ | _ ->
396+ let batch, rest = take_n batch_size queue in
397+ let to_fetch =
398+ List.filter (fun (cid, _) -> not (Cid.Set.mem cid visited)) batch
399+ in
400+ let%lwt blocks = get_blocks_exn t (List.map fst to_fetch) in
401+ let visited', next_queue =
402+ List.fold_left
403+ (fun (visited, queue) (cid, prefix) ->
404+ if Cid.Set.mem cid visited then (visited, queue)
405+ else
406+ let bytes = Block_map.get cid blocks |> Option.get in
407+ let raw = decode_block_raw bytes in
408+ let last_key = ref prefix in
409+ let next_pairs =
410+ List.fold_left
411+ (fun acc (e : entry_raw) ->
412+ let key_prefix =
413+ if e.p = 0 then ""
414+ else if e.p <= String.length !last_key then
415+ String.sub !last_key 0 e.p
416+ else !last_key
417+ in
418+ let full_key = key_prefix ^ Bytes.to_string e.k in
419+ Util.ensure_valid_key full_key ;
420+ last_key := full_key ;
421+ map := String_map.add full_key e.v !map ;
422+ match e.t with
423+ | Some r ->
424+ (r, full_key) :: acc
425+ | None ->
426+ acc )
427+ ( match raw.l with
428+ | Some l ->
429+ [(l, prefix)]
430+ | None ->
431+ [] )
432+ raw.e
433+ in
434+ (Cid.Set.add cid visited, List.rev_append next_pairs queue) )
435+ (visited, rest) batch
436+ in
437+ loop next_queue visited'
438 in
439+ loop [(t.root, "")] Cid.Set.empty
440441 (* returns all non-leaf mst node blocks in order for a car stream
442 leaf cids can be obtained via collect_nodes_and_leaves or leaves_of_root *)
···488 for each node: node block, left subtree, then for each entry: record, right subtree *)
489 let to_ordered_stream t : ordered_item Lwt_seq.t =
490 (* queue items: `Node cid to visit, `Leaf cid to yield *)
491+ let prefetch queue cache missing =
492+ let rec collect acc seen remaining = function
493+ | [] ->
494+ (List.rev acc, seen)
495+ | _ when remaining = 0 ->
496+ (List.rev acc, seen)
497+ | `Node cid :: rest ->
498+ if
499+ Cid.Set.mem cid missing
500+ || Block_map.has cid cache
501+ || Cid.Set.mem cid seen
502+ then collect acc seen remaining rest
503+ else
504+ collect (cid :: acc) (Cid.Set.add cid seen) (remaining - 1) rest
505+ | _ :: rest ->
506+ collect acc seen remaining rest
507+ in
508+ let cids, _seen = collect [] Cid.Set.empty batch_size queue in
509+ if List.is_empty cids then Lwt.return (cache, missing)
510+ else
511+ let%lwt bm = Store.get_blocks t.blockstore cids in
512+ let cache' =
513+ List.fold_left
514+ (fun acc (cid, bytes) -> Block_map.set cid bytes acc)
515+ cache (Block_map.entries bm.blocks)
516+ in
517+ let missing' =
518+ List.fold_left
519+ (fun acc cid -> Cid.Set.add cid acc)
520+ missing bm.missing
521+ in
522+ Lwt.return (cache', missing')
523+ in
524+ let rec step (queue, cache, missing) =
525 match queue with
526 | [] ->
527 Lwt.return_none
0000000000000000000000528 | `Leaf cid :: rest ->
529+ Lwt.return_some ((Leaf cid : ordered_item), (rest, cache, missing))
530+ | `Node cid :: rest ->
531+ if Cid.Set.mem cid missing then step (rest, cache, missing)
532+ else
533+ ( match Block_map.get cid cache with
534+ | None ->
535+ let%lwt cache', missing' = prefetch queue cache missing in
536+ if cache' == cache && Cid.Set.mem cid missing' then
537+ step (rest, cache', missing')
538+ else step (queue, cache', missing')
539+ | Some bytes ->
540+ let node = decode_block_raw bytes in
541+ (* queue items: left subtree, then for each entry: record then right subtree *)
542+ let left_queue =
543+ match node.l with Some l -> [`Node l] | None -> []
544+ in
545+ let entries_queue =
546+ List.concat_map
547+ (fun (e : entry_raw) ->
548+ let right_queue =
549+ match e.t with Some r -> [`Node r] | None -> []
550+ in
551+ `Leaf e.v :: right_queue )
552+ node.e
553+ in
554+ let new_queue = left_queue @ entries_queue @ rest in
555+ let cache' = Block_map.remove cid cache in
556+ Lwt.return_some
557+ ((Node (cid, bytes) : ordered_item), (new_queue, cache', missing))
558+ )
559 in
560+ Lwt_seq.unfold_lwt step ([`Node t.root], Block_map.empty, Cid.Set.empty)
561562 (* produces a cid and cbor-encoded bytes for a given tree *)
563 let serialize t node : (Cid.t * bytes, exn) Lwt_result.t =
000564 let rec aux node : (Cid.t * bytes) Lwt.t =
565+ let entries =
566+ if entries_are_sorted node.entries then node.entries
567+ else
568+ List.sort (fun (a : entry) b -> String.compare a.key b.key)
569+ node.entries
570+ in
571 let%lwt left =
572 node.left
573 >>? function
···600 ; p= prefix_len
601 ; v= entry.value
602 ; t= right } )
603+ entries
604 in
605 let encoded =
606 Dag_cbor.encode (encode_node_raw {l= left; e= mst_entries})
···612 | Error e ->
613 raise e
614 in
615+ try%lwt Lwt.map Result.ok (aux node)
616 with e -> Lwt.return_error e
617618 (* raw-node helpers for covering proofs: operate on stored bytes, not re-serialization *)
···672 let seq = interleave_raw raw keys in
673 let index = find_gte_leaf_index key seq in
674 let%lwt blocks =
675+ match List.nth_opt seq index with
676 | Some (Leaf (k, _, _)) when k = key ->
677 Lwt.return Block_map.empty
678 | Some (Leaf (_k, v_right, _)) -> (
679 let prev =
680+ if index - 1 >= 0 then List.nth_opt seq (index - 1) else None
681 in
682 match prev with
683 | Some (Tree c) ->
···714 proof_for_key t c key
715 | None -> (
716 let prev =
717+ if index - 1 >= 0 then List.nth_opt seq (index - 1) else None
718 in
719 match prev with
720 | Some (Tree c) ->
···729 None
730 in
731 let right_leaf =
732+ match List.nth_opt seq index with
733 | Some (Leaf (_, v_right, _)) ->
734 Some v_right
735 | _ ->
···761 in
762 Lwt.return (Block_map.set cid bytes blocks)
763764+ let proof_for_keys t cid keys : Block_map.t Lwt.t =
765+ if List.is_empty keys then Lwt.return Block_map.empty
766+ else
767+ let keys = List.sort_uniq String.compare keys in
768+ let cache = ref Block_map.empty in
769+ let missing = ref Cid.Set.empty in
770+ let acc = ref Block_map.empty in
771+ let add_block cid bytes =
772+ if not (Block_map.has cid !acc) then
773+ acc := Block_map.set cid bytes !acc
774+ in
775+ let get_bytes_cached cid =
776+ match Block_map.get cid !cache with
777+ | Some bytes ->
778+ Lwt.return_some bytes
779+ | None ->
780+ if Cid.Set.mem cid !missing then Lwt.return_none
781+ else
782+ let%lwt bytes_opt = Store.get_bytes t.blockstore cid in
783+ ( match bytes_opt with
784+ | Some bytes ->
785+ cache := Block_map.set cid bytes !cache
786+ | None ->
787+ missing := Cid.Set.add cid !missing ) ;
788+ Lwt.return bytes_opt
789+ in
790+ let add_leaf cid_opt =
791+ match cid_opt with
792+ | None ->
793+ Lwt.return_unit
794+ | Some leaf_cid -> (
795+ match%lwt get_bytes_cached leaf_cid with
796+ | Some bytes ->
797+ add_block leaf_cid bytes ;
798+ Lwt.return_unit
799+ | None ->
800+ Lwt.return_unit )
801+ in
802+ let rec proof_for_key_cached cid key =
803+ match%lwt get_bytes_cached cid with
804+ | None ->
805+ Lwt.return_unit
806+ | Some bytes ->
807+ add_block cid bytes ;
808+ let raw = decode_block_raw bytes in
809+ let keys = node_entry_keys raw in
810+ let seq = interleave_raw raw keys in
811+ let index = find_gte_leaf_index key seq in
812+ ( match List.nth_opt seq index with
813+ | Some (Leaf (k, _, _)) when k = key ->
814+ Lwt.return_unit
815+ | Some (Leaf (_k, v_right, _)) -> (
816+ let prev =
817+ if index - 1 >= 0 then List.nth_opt seq (index - 1) else None
818+ in
819+ match prev with
820+ | Some (Tree c) ->
821+ proof_for_key_cached c key
822+ | _ ->
823+ let left_leaf =
824+ match prev with
825+ | Some (Leaf (_, v_left, _)) ->
826+ Some v_left
827+ | _ ->
828+ None
829+ in
830+ let%lwt () = add_leaf left_leaf in
831+ add_leaf (Some v_right) )
832+ | Some (Tree c) ->
833+ proof_for_key_cached c key
834+ | None -> (
835+ let prev =
836+ if index - 1 >= 0 then List.nth_opt seq (index - 1) else None
837+ in
838+ match prev with
839+ | Some (Tree c) ->
840+ proof_for_key_cached c key
841+ | _ ->
842+ let left_leaf =
843+ match prev with
844+ | Some (Leaf (_, v_left, _)) ->
845+ Some v_left
846+ | _ ->
847+ None
848+ in
849+ let right_leaf =
850+ match List.nth_opt seq index with
851+ | Some (Leaf (_, v_right, _)) ->
852+ Some v_right
853+ | _ ->
854+ None
855+ in
856+ let%lwt () = add_leaf left_leaf in
857+ add_leaf right_leaf ) )
858+ in
859+ let%lwt () = Lwt_list.iter_s (proof_for_key_cached cid) keys in
860+ Lwt.return !acc
861+862 (* collects all node blocks (cid, bytes) and all leaf cids reachable from root
863 only traverses nodes; doesn't fetch leaf blocks
864 returns (nodes, visited, leaves) *)
865 let collect_nodes_and_leaves t :
866 ((Cid.t * bytes) list * Cid.Set.t * Cid.Set.t) Lwt.t =
867+ let rec loop queue visited nodes leaves =
0868 match queue with
869 | [] ->
870 Lwt.return (nodes, visited, leaves)
871+ | _ ->
872+ let batch, rest = take_n batch_size queue in
873+ let to_fetch =
874+ List.filter (fun cid -> not (Cid.Set.mem cid visited)) batch
875+ in
876+ let%lwt blocks = get_blocks_exn t to_fetch in
877+ let visited', nodes', leaves', next_queue =
878+ List.fold_left
879+ (fun (visited, nodes, leaves, queue) cid ->
880+ if Cid.Set.mem cid visited then (visited, nodes, leaves, queue)
881+ else
882+ let bytes = Block_map.get cid blocks |> Option.get in
883+ let raw = decode_block_raw bytes in
884+ let next_cids =
885+ let acc = match raw.l with Some l -> [l] | None -> [] in
886+ List.fold_left
887+ (fun acc e ->
888+ match e.t with Some r -> r :: acc | None -> acc )
889+ acc raw.e
890+ in
891+ let leaves' =
892+ List.fold_left (fun s e -> Cid.Set.add e.v s) leaves raw.e
893+ in
894+ let visited' = Cid.Set.add cid visited in
895+ ( visited'
896+ , (cid, bytes) :: nodes
897+ , leaves'
898+ , List.rev_append next_cids queue ) )
899+ (visited, nodes, leaves, rest) batch
900+ in
901+ loop next_queue visited' nodes' leaves'
902 in
903+ loop [t.root] Cid.Set.empty [] Cid.Set.empty
904905 (* list of all leaves belonging to a node and its children, ordered by key *)
906 let rec leaves_of_node n : (string * Cid.t) list Lwt.t =
907 let%lwt left_leaves =
908 n.left >>? function Some l -> leaves_of_node l | None -> Lwt.return []
909 in
910+ let entries =
911+ if entries_are_sorted n.entries then n.entries
912+ else
913+ List.sort
914+ (fun (a : entry) (b : entry) -> String.compare a.key b.key)
915+ n.entries
916 in
917+ let%lwt entry_sublists =
918+ Lwt_list.map_s
919+ (fun e ->
920 let%lwt right_leaves =
921 e.right
922 >>? function Some r -> leaves_of_node r | None -> Lwt.return []
923 in
924+ Lwt.return ((e.key, e.value) :: right_leaves) )
925+ entries
926 in
927+ Lwt.return (left_leaves @ List.concat entry_sublists)
928929 (* list of all leaves in the mst *)
930 let leaves_of_root t : (string * Cid.t) list Lwt.t =
···936937 (* returns a count of all leaves in the mst *)
938 let leaf_count t : int Lwt.t =
939+ let rec loop queue visited acc =
940+ match queue with
941+ | [] ->
942+ Lwt.return acc
943+ | _ ->
944+ let batch, rest = take_n batch_size queue in
945+ let to_fetch =
946+ List.filter (fun cid -> not (Cid.Set.mem cid visited)) batch
947 in
948+ let%lwt blocks = get_blocks_exn t to_fetch in
949+ let visited', acc', next_queue =
950+ List.fold_left
951+ (fun (visited, acc, queue) cid ->
952+ if Cid.Set.mem cid visited then (visited, acc, queue)
953+ else
954+ let bytes = Block_map.get cid blocks |> Option.get in
955+ let raw = decode_block_raw bytes in
956+ let next_cids =
957+ let acc = match raw.l with Some l -> [l] | None -> [] in
958+ List.fold_left
959+ (fun acc e ->
960+ match e.t with Some r -> r :: acc | None -> acc )
961+ acc raw.e
962+ in
963+ let visited' = Cid.Set.add cid visited in
964+ ( visited'
965+ , acc + List.length raw.e
966+ , List.rev_append next_cids queue ) )
967+ (visited, acc, rest) batch
968 in
969+ loop next_queue visited' acc'
970+ in
971+ loop [t.root] Cid.Set.empty 0
0972973 (* returns height of mst root *)
974 let layer t : int Lwt.t =
···980981 (* returns all nodes sorted by cid *)
982 let all_nodes t : (Cid.t * bytes) list Lwt.t =
983+ let rec loop queue visited nodes =
0984 match queue with
985 | [] ->
986 Lwt.return nodes
987+ | _ ->
988+ let batch, rest = take_n batch_size queue in
989+ let to_fetch =
990+ List.filter (fun cid -> not (Cid.Set.mem cid visited)) batch
991+ in
992+ let%lwt blocks = get_blocks_exn t to_fetch in
993+ let visited', nodes', next_queue =
994+ List.fold_left
995+ (fun (visited, nodes, queue) cid ->
996+ if Cid.Set.mem cid visited then (visited, nodes, queue)
997+ else
998+ let bytes = Block_map.get cid blocks |> Option.get in
999+ let raw = decode_block_raw bytes in
1000+ let next_cids =
1001+ let acc = match raw.l with Some l -> [l] | None -> [] in
1002+ List.fold_left
1003+ (fun acc e ->
1004+ match e.t with Some r -> r :: acc | None -> acc )
1005+ acc raw.e
1006+ in
1007+ let visited' = Cid.Set.add cid visited in
1008+ ( visited'
1009+ , (cid, bytes) :: nodes
1010+ , List.rev_append next_cids queue ) )
1011+ (visited, nodes, rest) batch
1012+ in
1013+ loop next_queue visited' nodes'
1014 in
1015+ let%lwt nodes = loop [t.root] Cid.Set.empty [] in
1016 let sorted =
1017 List.sort
1018 (fun (a, _) (b, _) -> String.compare (Cid.to_string a) (Cid.to_string b))
···1057 let root_layer =
1058 List.fold_left (fun acc (_, _, lz) -> max acc lz) 0 with_layers
1059 in
1060+ let left_group, on_layer, right_groups =
1061+ let left_group = ref [] in
1062+ let current_group = ref [] in
1063+ let on_layer_rev = ref [] in
1064+ let groups_rev = ref [] in
1065+ let seen_on = ref false in
1066+ List.iter
1067+ (fun (k, v, lz) ->
1068+ if lz = root_layer then (
1069+ if not !seen_on then left_group := List.rev !current_group
1070+ else groups_rev := List.rev !current_group :: !groups_rev ;
1071+ current_group := [] ;
1072+ on_layer_rev := (k, v) :: !on_layer_rev ;
1073+ seen_on := true )
1074+ else current_group := (k, v) :: !current_group )
1075+ with_layers ;
1076+ let on_layer = List.rev !on_layer_rev in
1077+ let right_groups =
1078+ if not !seen_on then []
1079+ else List.rev (List.rev !current_group :: !groups_rev)
1080+ in
1081+ (!left_group, on_layer, right_groups)
1082 in
1083 let%lwt l_cid =
1084 match left_group with
···1100 let%lwt c = wrap cid child_layer in
1101 Lwt.return_some c
1102 in
1103+ let rights =
1104+ List.map2 (fun (k, _) grp -> (k, grp)) on_layer right_groups
00000000000000001105 in
01106 let%lwt t_links =
1107 Lwt_list.map_s
1108 (fun (_k, grp) ->
-11
mist/lib/util.ml
···4849let rec last (lst : 'a list) : 'a option =
50 match lst with [] -> None | [x] -> Some x | _ :: xs -> last xs
51-52-let at_index i (lst : 'a list) : 'a option =
53- let rec aux j = function
54- | [] ->
55- None
56- | [x] ->
57- Some x
58- | x :: xs ->
59- if j = 0 then Some x else aux (j - 1) xs
60- in
61- aux i lst
···4849let rec last (lst : 'a list) : 'a option =
50 match lst with [] -> None | [x] -> Some x | _ :: xs -> last xs
00000000000
+1-1
pegasus/bench/bench_repository.ml
···406let bench_db_io_patterns () =
407 print_header "database i/o" ;
408 let%lwt db, path = setup_test_db () in
409- let size = 100000 in
410 let blocks = generate_blocks size in
411 let%lwt () =
412 Util.use_pool db.db (fun conn -> User_store.Bulk.put_blocks blocks conn) >|= fun _ -> ()
···406let bench_db_io_patterns () =
407 print_header "database i/o" ;
408 let%lwt db, path = setup_test_db () in
409+ let size = 20000 in
410 let blocks = generate_blocks size in
411 let%lwt () =
412 Util.use_pool db.db (fun conn -> User_store.Bulk.put_blocks blocks conn) >|= fun _ -> ()
+29-37
pegasus/lib/repository.ml
···221 ref (Cached_mst.create cached_store prev_commit.data)
222 in
223 (* ops to emit, built in loop because prev_data (previous cid) is otherwise inaccessible *)
224- let commit_ops : commit_evt_op list ref = ref [] in
225 let added_leaves = ref Block_map.empty in
226 let%lwt results =
227 Lwt_list.map_s
···250 User_store.put_record t.db (`LexMap record_with_type) path
251 in
252 added_leaves := Block_map.set cid block !added_leaves ;
253- commit_ops :=
254- !commit_ops @ [{action= `Create; path; cid= Some cid; prev= None}] ;
0255 let%lwt new_mst = Cached_mst.add !mst path cid in
256 mst := new_mst ;
257 let refs =
···272 | Update {collection; rkey; value; swap_record; _} ->
273 let path = Format.sprintf "%s/%s" collection rkey in
274 let uri = Format.sprintf "at://%s/%s" t.did path in
275- let%lwt old_cid = User_store.get_record_cid t.db path in
0276 ( if
277 (swap_record <> None && swap_record <> old_cid)
278 || (swap_record = None && old_cid = None)
···288 (Format.sprintf "attempted to update record %s with cid %s"
289 path cid_str ) ) ;
290 let%lwt () =
291- match old_cid with
292- | Some _ -> (
293- match%lwt User_store.get_record t.db path with
294- | Some record ->
295- let refs =
296- Util.find_blob_refs record.value
297- |> List.map (fun (r : Mist.Blob_ref.t) -> r.ref)
00298 in
299- if not (List.is_empty refs) then
300- let%lwt _ =
301- User_store.delete_orphaned_blobs_by_record_path t.db
302- path
303- in
304- Lwt.return_unit
305- else Lwt.return_unit
306- | None ->
307- Lwt.return_unit )
308 | None ->
309 Lwt.return_unit
310 in
···316 User_store.put_record t.db (`LexMap record_with_type) path
317 in
318 added_leaves := Block_map.set new_cid new_block !added_leaves ;
319- commit_ops :=
320- !commit_ops
321- @ [{action= `Update; path; cid= Some new_cid; prev= old_cid}] ;
322 let%lwt new_mst = Cached_mst.add !mst path new_cid in
323 mst := new_mst ;
324 let refs =
···339 ; cid= new_cid } )
340 | Delete {collection; rkey; swap_record; _} ->
341 let path = Format.sprintf "%s/%s" collection rkey in
342- let%lwt cid = User_store.get_record_cid t.db path in
0343 ( if cid = None || (swap_record <> None && swap_record <> cid) then
344 let cid_str =
345 match cid with
···352 (Format.sprintf "attempted to delete record %s with cid %s"
353 path cid_str ) ) ;
354 let%lwt () =
355- match%lwt User_store.get_record t.db path with
356 | Some record ->
357 let refs =
358 Util.find_blob_refs record.value
···368 Lwt.return_unit
369 in
370 let%lwt () = User_store.delete_record t.db path in
371- commit_ops :=
372- !commit_ops @ [{action= `Delete; path; cid= None; prev= cid}] ;
373 let%lwt new_mst = Cached_mst.delete !mst path in
374 mst := new_mst ;
375 Lwt.return
376 (Delete {type'= "com.atproto.repo.applyWrites#deleteResult"}) )
377 writes
378 in
0379 let new_mst = !mst in
380 (* flush all writes, ensuring all blocks are written or none are *)
381 let%lwt () =
···390 let commit_block =
391 new_commit_signed |> signed_commit_to_yojson |> Dag_cbor.encode_yojson
392 in
393- let%lwt proof_blocks =
394- Lwt_list.fold_left_s
395- (fun acc ({path; _} : commit_evt_op) ->
396- let%lwt key_proof =
397- Cached_mst.proof_for_key new_mst new_mst.root path
398- in
399- Lwt.return (Block_map.merge acc key_proof) )
400- Block_map.empty !commit_ops
401- in
402 let proof_blocks = Block_map.merge proof_blocks !added_leaves in
403 let block_stream =
404 proof_blocks |> Block_map.entries |> Lwt_seq.of_list
···410 let%lwt ds = Data_store.connect () in
411 let%lwt _ =
412 Sequencer.sequence_commit ds ~did:t.did ~commit:new_commit_cid
413- ~rev:new_commit_signed.rev ~blocks ~ops:!commit_ops ~since:prev_commit.rev
414 ~prev_data:prev_commit.data ()
415 in
416 Lwt.return {commit= new_commit; results}
···221 ref (Cached_mst.create cached_store prev_commit.data)
222 in
223 (* ops to emit, built in loop because prev_data (previous cid) is otherwise inaccessible *)
224+ let commit_ops_rev : commit_evt_op list ref = ref [] in
225 let added_leaves = ref Block_map.empty in
226 let%lwt results =
227 Lwt_list.map_s
···250 User_store.put_record t.db (`LexMap record_with_type) path
251 in
252 added_leaves := Block_map.set cid block !added_leaves ;
253+ commit_ops_rev :=
254+ {action= `Create; path; cid= Some cid; prev= None}
255+ :: !commit_ops_rev ;
256 let%lwt new_mst = Cached_mst.add !mst path cid in
257 mst := new_mst ;
258 let refs =
···273 | Update {collection; rkey; value; swap_record; _} ->
274 let path = Format.sprintf "%s/%s" collection rkey in
275 let uri = Format.sprintf "at://%s/%s" t.did path in
276+ let%lwt existing_record = User_store.get_record t.db path in
277+ let old_cid = Option.map (fun (r : record) -> r.cid) existing_record in
278 ( if
279 (swap_record <> None && swap_record <> old_cid)
280 || (swap_record = None && old_cid = None)
···290 (Format.sprintf "attempted to update record %s with cid %s"
291 path cid_str ) ) ;
292 let%lwt () =
293+ match existing_record with
294+ | Some record ->
295+ let refs =
296+ Util.find_blob_refs record.value
297+ |> List.map (fun (r : Mist.Blob_ref.t) -> r.ref)
298+ in
299+ if not (List.is_empty refs) then
300+ let%lwt _ =
301+ User_store.delete_orphaned_blobs_by_record_path t.db path
302 in
303+ Lwt.return_unit
304+ else Lwt.return_unit
0000000305 | None ->
306 Lwt.return_unit
307 in
···313 User_store.put_record t.db (`LexMap record_with_type) path
314 in
315 added_leaves := Block_map.set new_cid new_block !added_leaves ;
316+ commit_ops_rev :=
317+ {action= `Update; path; cid= Some new_cid; prev= old_cid}
318+ :: !commit_ops_rev ;
319 let%lwt new_mst = Cached_mst.add !mst path new_cid in
320 mst := new_mst ;
321 let refs =
···336 ; cid= new_cid } )
337 | Delete {collection; rkey; swap_record; _} ->
338 let path = Format.sprintf "%s/%s" collection rkey in
339+ let%lwt existing_record = User_store.get_record t.db path in
340+ let cid = Option.map (fun (r : record) -> r.cid) existing_record in
341 ( if cid = None || (swap_record <> None && swap_record <> cid) then
342 let cid_str =
343 match cid with
···350 (Format.sprintf "attempted to delete record %s with cid %s"
351 path cid_str ) ) ;
352 let%lwt () =
353+ match existing_record with
354 | Some record ->
355 let refs =
356 Util.find_blob_refs record.value
···366 Lwt.return_unit
367 in
368 let%lwt () = User_store.delete_record t.db path in
369+ commit_ops_rev :=
370+ {action= `Delete; path; cid= None; prev= cid} :: !commit_ops_rev ;
371 let%lwt new_mst = Cached_mst.delete !mst path in
372 mst := new_mst ;
373 Lwt.return
374 (Delete {type'= "com.atproto.repo.applyWrites#deleteResult"}) )
375 writes
376 in
377+ let commit_ops = List.rev !commit_ops_rev in
378 let new_mst = !mst in
379 (* flush all writes, ensuring all blocks are written or none are *)
380 let%lwt () =
···389 let commit_block =
390 new_commit_signed |> signed_commit_to_yojson |> Dag_cbor.encode_yojson
391 in
392+ let proof_keys = List.map (fun ({path; _} : commit_evt_op) -> path) commit_ops in
393+ let%lwt proof_blocks = Cached_mst.proof_for_keys new_mst new_mst.root proof_keys in
0000000394 let proof_blocks = Block_map.merge proof_blocks !added_leaves in
395 let block_stream =
396 proof_blocks |> Block_map.entries |> Lwt_seq.of_list
···402 let%lwt ds = Data_store.connect () in
403 let%lwt _ =
404 Sequencer.sequence_commit ds ~did:t.did ~commit:new_commit_cid
405+ ~rev:new_commit_signed.rev ~blocks ~ops:commit_ops ~since:prev_commit.rev
406 ~prev_data:prev_commit.data ()
407 in
408 Lwt.return {commit= new_commit; results}
+156-149
pegasus/lib/user_store.ml
···314 ~path ~cids
315end
31600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000317type t = {did: string; db: Util.caqti_pool}
318319let pool_cache : (string, t) Hashtbl.t = Hashtbl.create 64
···351 Lwt.return ({blocks= Block_map.empty; missing= []} : Block_map.with_missing)
352 else
353 let%lwt blocks = Util.use_pool t.db @@ Queries.get_blocks cids in
00000354 Lwt.return
355 (List.fold_left
356 (fun (acc : Block_map.with_missing) cid ->
357- match List.find_opt (fun (b : block) -> b.cid = cid) blocks with
358- | Some {data; _} ->
359 {acc with blocks= Block_map.set cid data acc.blocks}
360 | None ->
361 {acc with missing= cid :: acc.missing} )
···376 Lwt.return false
377378let put_many t bm : (int, exn) Lwt_result.t =
379- Util.multi_query t.db
380- (List.map
381- (fun (cid, block) -> Queries.put_block cid block)
382- (Block_map.entries bm) )
00383384let delete_block t cid : (bool, exn) Lwt_result.t =
385 Lwt_result.catch
···569 let storage_str = Blob_store.storage_to_string storage in
570 Util.use_pool t.db
571 @@ Queries.list_blobs_by_storage ~storage:storage_str ~limit ~cursor
572-573-module Bulk = struct
574- open struct
575- let escape_sql_string s = Str.global_replace (Str.regexp "'") "''" s
576-577- let bytes_to_hex data =
578- let buf = Buffer.create (Bytes.length data * 2) in
579- Bytes.iter
580- (fun c -> Buffer.add_string buf (Printf.sprintf "%02x" (Char.code c)))
581- data ;
582- Buffer.contents buf
583-584- let chunk_list n lst =
585- if n <= 0 then invalid_arg "negative n passed to chunk_list" ;
586- let rec take_n acc remaining xs =
587- match (remaining, xs) with
588- | _, [] ->
589- (List.rev acc, [])
590- | 0, rest ->
591- (List.rev acc, rest)
592- | _, x :: xs' ->
593- take_n (x :: acc) (remaining - 1) xs'
594- in
595- let rec go xs =
596- match xs with
597- | [] ->
598- []
599- | _ ->
600- let chunk, rest = take_n [] n xs in
601- chunk :: go rest
602- in
603- go lst
604- end
605-606- let put_blocks (blocks : (Cid.t * bytes) list) conn =
607- if List.is_empty blocks then Lwt.return_ok ()
608- else
609- let module C = (val conn : Caqti_lwt.CONNECTION) in
610- let chunks = chunk_list 200 blocks in
611- let rec process_chunks = function
612- | [] ->
613- Lwt.return_ok ()
614- | chunk :: rest -> (
615- let values =
616- List.map
617- (fun (cid, data) ->
618- let cid_str = escape_sql_string (Cid.to_string cid) in
619- let hex_data = bytes_to_hex data in
620- Printf.sprintf "('%s', CAST(X'%s' AS TEXT))" cid_str hex_data )
621- chunk
622- |> String.concat ", "
623- in
624- let sql =
625- Printf.sprintf
626- "INSERT INTO mst (cid, data) VALUES %s ON CONFLICT DO NOTHING"
627- values
628- in
629- let query =
630- Caqti_request.Infix.( ->. ) Caqti_type.unit Caqti_type.unit sql
631- in
632- let%lwt result = C.exec query () in
633- match result with
634- | Ok () ->
635- process_chunks rest
636- | Error e ->
637- Lwt.return_error e )
638- in
639- process_chunks chunks
640-641- let put_records (records : (string * Cid.t * bytes * string) list) conn =
642- if List.is_empty records then Lwt.return_ok ()
643- else
644- let module C = (val conn : Caqti_lwt.CONNECTION) in
645- let chunks = chunk_list 100 records in
646- let rec process_chunks = function
647- | [] ->
648- Lwt.return_ok ()
649- | chunk :: rest -> (
650- let values =
651- List.map
652- (fun (path, cid, data, since) ->
653- let hex_data = bytes_to_hex data in
654- Printf.sprintf "('%s', '%s', CAST(X'%s' AS TEXT), '%s')"
655- (escape_sql_string path)
656- (escape_sql_string (Cid.to_string cid))
657- hex_data (escape_sql_string since) )
658- chunk
659- |> String.concat ", "
660- in
661- let sql =
662- Printf.sprintf
663- "INSERT INTO records (path, cid, data, since) VALUES %s ON \
664- CONFLICT (path) DO UPDATE SET cid = excluded.cid, data = \
665- excluded.data, since = excluded.since"
666- values
667- in
668- let query =
669- Caqti_request.Infix.( ->. ) Caqti_type.unit Caqti_type.unit sql
670- in
671- let%lwt result = C.exec query () in
672- match result with
673- | Ok () ->
674- process_chunks rest
675- | Error e ->
676- Lwt.return_error e )
677- in
678- process_chunks chunks
679-680- let put_blob_refs (refs : (string * Cid.t) list) conn =
681- if List.is_empty refs then Lwt.return_ok ()
682- else
683- let module C = (val conn : Caqti_lwt.CONNECTION) in
684- let chunks = chunk_list 200 refs in
685- let rec process_chunks = function
686- | [] ->
687- Lwt.return_ok ()
688- | chunk :: rest -> (
689- let values =
690- List.map
691- (fun (path, cid) ->
692- Printf.sprintf "('%s', '%s')" (escape_sql_string path)
693- (escape_sql_string (Cid.to_string cid)) )
694- chunk
695- |> String.concat ", "
696- in
697- let sql =
698- Printf.sprintf
699- "INSERT INTO blobs_records (record_path, blob_cid) VALUES %s \
700- ON CONFLICT DO NOTHING"
701- values
702- in
703- let query =
704- Caqti_request.Infix.( ->. ) Caqti_type.unit Caqti_type.unit sql
705- in
706- let%lwt result = C.exec query () in
707- match result with
708- | Ok () ->
709- process_chunks rest
710- | Error e ->
711- Lwt.return_error e )
712- in
713- process_chunks chunks
714-end
···314 ~path ~cids
315end
316317+module Bulk = struct
318+ open struct
319+ let escape_sql_string s = Str.global_replace (Str.regexp "'") "''" s
320+321+ let bytes_to_hex data =
322+ let buf = Buffer.create (Bytes.length data * 2) in
323+ Bytes.iter
324+ (fun c -> Buffer.add_string buf (Printf.sprintf "%02x" (Char.code c)))
325+ data ;
326+ Buffer.contents buf
327+328+ let chunk_list n lst =
329+ if n <= 0 then invalid_arg "negative n passed to chunk_list" ;
330+ let rec take_n acc remaining xs =
331+ match (remaining, xs) with
332+ | _, [] ->
333+ (List.rev acc, [])
334+ | 0, rest ->
335+ (List.rev acc, rest)
336+ | _, x :: xs' ->
337+ take_n (x :: acc) (remaining - 1) xs'
338+ in
339+ let rec go xs =
340+ match xs with
341+ | [] ->
342+ []
343+ | _ ->
344+ let chunk, rest = take_n [] n xs in
345+ chunk :: go rest
346+ in
347+ go lst
348+ end
349+350+ let put_blocks (blocks : (Cid.t * bytes) list) conn =
351+ if List.is_empty blocks then Lwt.return_ok ()
352+ else
353+ let module C = (val conn : Caqti_lwt.CONNECTION) in
354+ let chunks = chunk_list 200 blocks in
355+ let rec process_chunks = function
356+ | [] ->
357+ Lwt.return_ok ()
358+ | chunk :: rest -> (
359+ let values =
360+ List.map
361+ (fun (cid, data) ->
362+ let cid_str = escape_sql_string (Cid.to_string cid) in
363+ let hex_data = bytes_to_hex data in
364+ Printf.sprintf "('%s', CAST(X'%s' AS TEXT))" cid_str hex_data )
365+ chunk
366+ |> String.concat ", "
367+ in
368+ let sql =
369+ Printf.sprintf
370+ "INSERT INTO mst (cid, data) VALUES %s ON CONFLICT DO NOTHING"
371+ values
372+ in
373+ let query =
374+ Caqti_request.Infix.( ->. ) Caqti_type.unit Caqti_type.unit sql
375+ in
376+ let%lwt result = C.exec query () in
377+ match result with
378+ | Ok () ->
379+ process_chunks rest
380+ | Error e ->
381+ Lwt.return_error e )
382+ in
383+ process_chunks chunks
384+385+ let put_records (records : (string * Cid.t * bytes * string) list) conn =
386+ if List.is_empty records then Lwt.return_ok ()
387+ else
388+ let module C = (val conn : Caqti_lwt.CONNECTION) in
389+ let chunks = chunk_list 100 records in
390+ let rec process_chunks = function
391+ | [] ->
392+ Lwt.return_ok ()
393+ | chunk :: rest -> (
394+ let values =
395+ List.map
396+ (fun (path, cid, data, since) ->
397+ let hex_data = bytes_to_hex data in
398+ Printf.sprintf "('%s', '%s', CAST(X'%s' AS TEXT), '%s')"
399+ (escape_sql_string path)
400+ (escape_sql_string (Cid.to_string cid))
401+ hex_data (escape_sql_string since) )
402+ chunk
403+ |> String.concat ", "
404+ in
405+ let sql =
406+ Printf.sprintf
407+ "INSERT INTO records (path, cid, data, since) VALUES %s ON \
408+ CONFLICT (path) DO UPDATE SET cid = excluded.cid, data = \
409+ excluded.data, since = excluded.since"
410+ values
411+ in
412+ let query =
413+ Caqti_request.Infix.( ->. ) Caqti_type.unit Caqti_type.unit sql
414+ in
415+ let%lwt result = C.exec query () in
416+ match result with
417+ | Ok () ->
418+ process_chunks rest
419+ | Error e ->
420+ Lwt.return_error e )
421+ in
422+ process_chunks chunks
423+424+ let put_blob_refs (refs : (string * Cid.t) list) conn =
425+ if List.is_empty refs then Lwt.return_ok ()
426+ else
427+ let module C = (val conn : Caqti_lwt.CONNECTION) in
428+ let chunks = chunk_list 200 refs in
429+ let rec process_chunks = function
430+ | [] ->
431+ Lwt.return_ok ()
432+ | chunk :: rest -> (
433+ let values =
434+ List.map
435+ (fun (path, cid) ->
436+ Printf.sprintf "('%s', '%s')" (escape_sql_string path)
437+ (escape_sql_string (Cid.to_string cid)) )
438+ chunk
439+ |> String.concat ", "
440+ in
441+ let sql =
442+ Printf.sprintf
443+ "INSERT INTO blobs_records (record_path, blob_cid) VALUES %s \
444+ ON CONFLICT DO NOTHING"
445+ values
446+ in
447+ let query =
448+ Caqti_request.Infix.( ->. ) Caqti_type.unit Caqti_type.unit sql
449+ in
450+ let%lwt result = C.exec query () in
451+ match result with
452+ | Ok () ->
453+ process_chunks rest
454+ | Error e ->
455+ Lwt.return_error e )
456+ in
457+ process_chunks chunks
458+end
459+460type t = {did: string; db: Util.caqti_pool}
461462let pool_cache : (string, t) Hashtbl.t = Hashtbl.create 64
···494 Lwt.return ({blocks= Block_map.empty; missing= []} : Block_map.with_missing)
495 else
496 let%lwt blocks = Util.use_pool t.db @@ Queries.get_blocks cids in
497+ let found_map =
498+ List.fold_left
499+ (fun acc ({cid; data} : block) -> Block_map.set cid data acc)
500+ Block_map.empty blocks
501+ in
502 Lwt.return
503 (List.fold_left
504 (fun (acc : Block_map.with_missing) cid ->
505+ match Block_map.get cid found_map with
506+ | Some data ->
507 {acc with blocks= Block_map.set cid data acc.blocks}
508 | None ->
509 {acc with missing= cid :: acc.missing} )
···524 Lwt.return false
525526let put_many t bm : (int, exn) Lwt_result.t =
527+ let entries = Block_map.entries bm in
528+ if List.is_empty entries then Lwt.return_ok 0
529+ else
530+ Lwt_result.catch (fun () ->
531+ let%lwt () = Util.use_pool t.db (fun conn -> Bulk.put_blocks entries conn) in
532+ Lwt.return (List.length entries) )
533534let delete_block t cid : (bool, exn) Lwt_result.t =
535 Lwt_result.catch
···719 let storage_str = Blob_store.storage_to_string storage in
720 Util.use_pool t.db
721 @@ Queries.list_blobs_by_storage ~storage:storage_str ~limit ~cursor
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000