objective categorical abstract machine language personal data server

MST/repo benchmarks

futur.blue c93abc7b 7d0bf431

verified
+961
+493
mist/bench/bench_mst.ml
··· 1 + open Mist 2 + open Lwt.Infix 3 + module Mem_mst = Mst.Make (Storage.Memory_blockstore) 4 + module String_map = Dag_cbor.String_map 5 + 6 + type timing_result = {name: string; iterations: int; total_s: float; per_iter_s: float} 7 + 8 + let time_it name f : timing_result Lwt.t = 9 + let start = Unix.gettimeofday () in 10 + let%lwt _ = f () in 11 + let elapsed = Unix.gettimeofday () -. start in 12 + Lwt.return {name; iterations= 1; total_s= elapsed; per_iter_s= elapsed} 13 + 14 + let time_it_n name n f : timing_result Lwt.t = 15 + let start = Unix.gettimeofday () in 16 + let%lwt () = 17 + let rec loop i = if i >= n then Lwt.return_unit else f () >>= fun _ -> loop (i + 1) in 18 + loop 0 19 + in 20 + let elapsed = Unix.gettimeofday () -. start in 21 + Lwt.return {name; iterations= n; total_s= elapsed; per_iter_s= elapsed /. float_of_int n} 22 + 23 + let print_result r = 24 + if r.iterations = 1 then Printf.printf " %-50s %10.4f s\n%!" r.name r.total_s 25 + else 26 + Printf.printf " %-50s %10.4f s total, %10.6f s/iter (%d iters)\n%!" r.name r.total_s 27 + r.per_iter_s r.iterations 28 + 29 + let print_header name = Printf.printf "\n=== %s ===\n%!" name 30 + 31 + let rand_bytes n = 32 + let b = Bytes.create n in 33 + for i = 0 to n - 1 do 34 + Bytes.set b i (Char.chr (Random.int 256)) 35 + done ; 36 + b 37 + 38 + let random_block () = 39 + let b = rand_bytes 64 in 40 + let bytes = Dag_cbor.encode (`Bytes b) in 41 + let cid = Cid.create Dcbor bytes in 42 + (cid, bytes) 43 + 44 + let put_random_block store = 45 + let cid, bytes = random_block () in 46 + Storage.Memory_blockstore.put_block store cid bytes >|= fun _ -> cid 47 + 48 + let random_alnum len = 49 + let allowed = "abcdefghijklmnopqrstuvwxyz0123456789" in 50 + let b = Bytes.create len in 51 + for i = 0 to len - 1 do 52 + Bytes.set b i allowed.[Random.int (String.length allowed)] 53 + done ; 54 + Bytes.to_string b 55 + 56 + let valid_rkey () = random_alnum 13 57 + 58 + let make_key () = "com.example/" ^ valid_rkey () 59 + 60 + let rec unique_keys n acc = 61 + if n <= 0 then acc 62 + else 63 + let k = make_key () in 64 + if List.mem k acc then unique_keys n acc else unique_keys (n - 1) (k :: acc) 65 + 66 + let generate_bulk_data store count = 67 + let keys = unique_keys count [] in 68 + Lwt_list.map_s (fun k -> put_random_block store >|= fun cid -> (k, cid)) keys 69 + 70 + let shuffle lst = 71 + let arr = Array.of_list lst in 72 + for i = Array.length arr - 1 downto 1 do 73 + let j = Random.int (i + 1) in 74 + let tmp = arr.(i) in 75 + arr.(i) <- arr.(j) ; 76 + arr.(j) <- tmp 77 + done ; 78 + Array.to_list arr 79 + 80 + let bench_of_assoc sizes = 81 + print_header "bulk creation" ; 82 + Lwt_list.iter_s 83 + (fun size -> 84 + let store = Storage.Memory_blockstore.create () in 85 + let%lwt data = generate_bulk_data store size in 86 + let%lwt r = 87 + time_it (Printf.sprintf "of_assoc %d records" size) (fun () -> Mem_mst.of_assoc store data) 88 + in 89 + print_result r ; Lwt.return_unit ) 90 + sizes 91 + 92 + let bench_incremental_add sizes = 93 + print_header "incremental add vs add_rebuild" ; 94 + Lwt_list.iter_s 95 + (fun size -> 96 + let store = Storage.Memory_blockstore.create () in 97 + let%lwt data = generate_bulk_data store size in 98 + let shuffled = shuffle data in 99 + (* incremental add *) 100 + let%lwt mst_base = 101 + match%lwt Mem_mst.create_empty store with Ok mst -> Lwt.return mst | Error e -> raise e 102 + in 103 + let%lwt r1 = 104 + time_it 105 + (Printf.sprintf "add (incremental) %d records" size) 106 + (fun () -> Lwt_list.fold_left_s (fun t (k, v) -> Mem_mst.add t k v) mst_base shuffled) 107 + in 108 + print_result r1 ; 109 + (* add_rebuild for comparison *) 110 + let%lwt mst_base2 = 111 + match%lwt Mem_mst.create_empty store with Ok mst -> Lwt.return mst | Error e -> raise e 112 + in 113 + let%lwt r2 = 114 + time_it 115 + (Printf.sprintf "add_rebuild %d records" size) 116 + (fun () -> 117 + Lwt_list.fold_left_s (fun t (k, v) -> Mem_mst.add_rebuild t k v) mst_base2 shuffled ) 118 + in 119 + print_result r2 ; 120 + let speedup = r2.total_s /. r1.total_s in 121 + Printf.printf " -> incremental is %.2fx %s\n%!" (abs_float speedup) 122 + (if speedup > 1.0 then "faster" else "slower") ; 123 + Lwt.return_unit ) 124 + sizes 125 + 126 + let bench_incremental_delete sizes = 127 + print_header "incremental delete vs delete_rebuild" ; 128 + Lwt_list.iter_s 129 + (fun (tree_size, delete_count) -> 130 + let store = Storage.Memory_blockstore.create () in 131 + let%lwt data = generate_bulk_data store tree_size in 132 + let%lwt mst = Mem_mst.of_assoc store data in 133 + let to_delete = shuffle data |> List.filteri (fun i _ -> i < delete_count) in 134 + (* incremental delete *) 135 + let%lwt r1 = 136 + time_it 137 + (Printf.sprintf "delete (incr) %d from %d" delete_count tree_size) 138 + (fun () -> Lwt_list.fold_left_s (fun t (k, _) -> Mem_mst.delete t k) mst to_delete) 139 + in 140 + print_result r1 ; 141 + (* rebuild the tree for delete_rebuild test *) 142 + let%lwt mst2 = Mem_mst.of_assoc store data in 143 + let%lwt r2 = 144 + time_it 145 + (Printf.sprintf "delete_rebuild %d from %d" delete_count tree_size) 146 + (fun () -> 147 + Lwt_list.fold_left_s (fun t (k, _) -> Mem_mst.delete_rebuild t k) mst2 to_delete ) 148 + in 149 + print_result r2 ; 150 + let speedup = r2.total_s /. r1.total_s in 151 + Printf.printf " -> incremental is %.2fx %s\n%!" (abs_float speedup) 152 + (if speedup > 1.0 then "faster" else "slower") ; 153 + Lwt.return_unit ) 154 + sizes 155 + 156 + let bench_single_add_scaling sizes = 157 + print_header "time to add 1 record to tree of size n" ; 158 + let iterations = 500 in 159 + Lwt_list.iter_s 160 + (fun size -> 161 + let store = Storage.Memory_blockstore.create () in 162 + let%lwt data = generate_bulk_data store size in 163 + let%lwt mst = Mem_mst.of_assoc store data in 164 + let%lwt extra_data = generate_bulk_data store iterations in 165 + let%lwt r = 166 + time_it_n 167 + (Printf.sprintf "single add to %d-record tree" size) 168 + iterations 169 + (fun () -> 170 + let k, v = List.nth extra_data (Random.int iterations) in 171 + Mem_mst.add mst k v >|= fun _ -> () ) 172 + in 173 + print_result r ; Lwt.return_unit ) 174 + sizes 175 + 176 + let bench_single_delete_scaling sizes = 177 + print_header "time to delete 1 record from tree of size n" ; 178 + let iterations = 500 in 179 + Lwt_list.iter_s 180 + (fun size -> 181 + let store = Storage.Memory_blockstore.create () in 182 + let%lwt data = generate_bulk_data store size in 183 + let%lwt mst = Mem_mst.of_assoc store data in 184 + let shuffled = shuffle data in 185 + let idx = ref 0 in 186 + let%lwt r = 187 + time_it_n 188 + (Printf.sprintf "single delete from %d-record tree" size) 189 + (min iterations size) 190 + (fun () -> 191 + let k, _ = List.nth shuffled !idx in 192 + idx := !idx + 1 ; 193 + Mem_mst.delete mst k >|= fun _ -> () ) 194 + in 195 + print_result r ; Lwt.return_unit ) 196 + sizes 197 + 198 + let bench_traversal sizes = 199 + print_header "traversal" ; 200 + Lwt_list.iter_s 201 + (fun size -> 202 + let store = Storage.Memory_blockstore.create () in 203 + let%lwt data = generate_bulk_data store size in 204 + let%lwt mst = Mem_mst.of_assoc store data in 205 + let%lwt r1 = 206 + time_it (Printf.sprintf "build_map %d records" size) (fun () -> Mem_mst.build_map mst) 207 + in 208 + print_result r1 ; 209 + let%lwt r2 = 210 + time_it 211 + (Printf.sprintf "leaves_of_root %d records" size) 212 + (fun () -> Mem_mst.leaves_of_root mst) 213 + in 214 + print_result r2 ; 215 + let%lwt r3 = 216 + time_it (Printf.sprintf "leaf_count %d records" size) (fun () -> Mem_mst.leaf_count mst) 217 + in 218 + print_result r3 ; 219 + let%lwt r4 = 220 + time_it (Printf.sprintf "all_nodes %d records" size) (fun () -> Mem_mst.all_nodes mst) 221 + in 222 + print_result r4 ; 223 + let%lwt r5 = 224 + time_it 225 + (Printf.sprintf "collect_nodes_and_leaves %d records" size) 226 + (fun () -> Mem_mst.collect_nodes_and_leaves mst) 227 + in 228 + print_result r5 ; Lwt.return_unit ) 229 + sizes 230 + 231 + let bench_streaming sizes = 232 + print_header "streaming" ; 233 + Lwt_list.iter_s 234 + (fun size -> 235 + let store = Storage.Memory_blockstore.create () in 236 + let%lwt data = generate_bulk_data store size in 237 + let%lwt mst = Mem_mst.of_assoc store data in 238 + let%lwt r1 = 239 + time_it 240 + (Printf.sprintf "to_blocks_stream consume %d" size) 241 + (fun () -> 242 + let stream = Mem_mst.to_blocks_stream mst in 243 + Lwt_seq.fold_left_s (fun count _ -> Lwt.return (count + 1)) 0 stream ) 244 + in 245 + print_result r1 ; 246 + let%lwt r2 = 247 + time_it 248 + (Printf.sprintf "to_ordered_stream consume %d" size) 249 + (fun () -> 250 + let stream = Mem_mst.to_ordered_stream mst in 251 + Lwt_seq.fold_left_s (fun count _ -> Lwt.return (count + 1)) 0 stream ) 252 + in 253 + print_result r2 ; Lwt.return_unit ) 254 + sizes 255 + 256 + let bench_proof_generation sizes = 257 + print_header "proof generation" ; 258 + Lwt_list.iter_s 259 + (fun size -> 260 + let store = Storage.Memory_blockstore.create () in 261 + let%lwt data = generate_bulk_data store size in 262 + let%lwt mst = Mem_mst.of_assoc store data in 263 + let num_proofs = min 10 size in 264 + let test_keys = 265 + shuffle data |> List.filteri (fun i _ -> i < num_proofs) |> List.map fst 266 + in 267 + let%lwt r = 268 + time_it 269 + (Printf.sprintf "proof_for_key %d proofs, %d-record tree" num_proofs size) 270 + (fun () -> 271 + Lwt_list.iter_s 272 + (fun k -> Mem_mst.proof_for_key mst mst.root k >|= fun _ -> ()) 273 + test_keys ) 274 + in 275 + print_result r ; 276 + Printf.printf " (%.6f s per proof)\n%!" (r.total_s /. float_of_int num_proofs) ; 277 + Lwt.return_unit ) 278 + sizes 279 + 280 + let bench_equality sizes = 281 + print_header "equality check" ; 282 + Lwt_list.iter_s 283 + (fun size -> 284 + let store = Storage.Memory_blockstore.create () in 285 + let%lwt data = generate_bulk_data store size in 286 + let%lwt mst1 = Mem_mst.of_assoc store data in 287 + let%lwt mst2 = Mem_mst.of_assoc store (shuffle data) in 288 + let%lwt r = 289 + time_it 290 + (Printf.sprintf "equal (identical trees) %d records" size) 291 + (fun () -> Mem_mst.equal mst1 mst2) 292 + in 293 + print_result r ; Lwt.return_unit ) 294 + sizes 295 + 296 + let bench_mixed_ops () = 297 + print_header "mixed operations" ; 298 + let configs = [(10000, 5000); (20000, 10000); (50000, 20000)] in 299 + Lwt_list.iter_s 300 + (fun (initial_size, num_ops) -> 301 + let store = Storage.Memory_blockstore.create () in 302 + let%lwt initial_data = generate_bulk_data store initial_size in 303 + let%lwt mst = Mem_mst.of_assoc store initial_data in 304 + let%lwt extra_data = generate_bulk_data store num_ops in 305 + let existing = ref (shuffle initial_data) in 306 + let pending_adds = ref (shuffle extra_data) in 307 + let%lwt r = 308 + time_it 309 + (Printf.sprintf "mixed %d ops on %d-record tree" 310 + num_ops initial_size ) 311 + (fun () -> 312 + let rec loop mst i = 313 + if i >= num_ops then Lwt.return mst 314 + else 315 + let op_type = Random.int 100 in 316 + if op_type < 70 then 317 + (* add new record *) 318 + match !pending_adds with 319 + | (k, v) :: rest -> 320 + pending_adds := rest ; 321 + existing := (k, v) :: !existing ; 322 + let%lwt mst' = Mem_mst.add mst k v in 323 + loop mst' (i + 1) 324 + | [] -> 325 + loop mst (i + 1) 326 + else if op_type < 90 then 327 + (* update existing record *) 328 + match !existing with 329 + | (k, _) :: _ -> 330 + let%lwt new_cid = put_random_block store in 331 + let%lwt mst' = Mem_mst.add mst k new_cid in 332 + loop mst' (i + 1) 333 + | [] -> 334 + loop mst (i + 1) 335 + else 336 + (* delete existing record *) 337 + match !existing with 338 + | (k, _) :: rest -> 339 + existing := rest ; 340 + let%lwt mst' = Mem_mst.delete mst k in 341 + loop mst' (i + 1) 342 + | [] -> 343 + loop mst (i + 1) 344 + in 345 + loop mst 0 ) 346 + in 347 + print_result r ; 348 + Printf.printf " (%.6f s per op avg)\n%!" (r.total_s /. float_of_int num_ops) ; 349 + Lwt.return_unit ) 350 + configs 351 + 352 + let bench_batch_add () = 353 + print_header "batch add" ; 354 + let tree_size = 1000 in 355 + let batch_sizes = [100; 500; 1000; 2000] in 356 + Lwt_list.iter_s 357 + (fun batch_size -> 358 + let store = Storage.Memory_blockstore.create () in 359 + let%lwt initial_data = generate_bulk_data store tree_size in 360 + let%lwt mst = Mem_mst.of_assoc store initial_data in 361 + let%lwt batch_data = generate_bulk_data store batch_size in 362 + let%lwt r1 = 363 + time_it 364 + (Printf.sprintf "batch add (incremental) %d to %d tree" batch_size tree_size) 365 + (fun () -> Lwt_list.fold_left_s (fun t (k, v) -> Mem_mst.add t k v) mst batch_data) 366 + in 367 + print_result r1 ; 368 + let%lwt r2 = 369 + time_it 370 + (Printf.sprintf "batch add (rebuild) %d to %d tree" batch_size tree_size) 371 + (fun () -> Mem_mst.of_assoc store (initial_data @ batch_data)) 372 + in 373 + print_result r2 ; 374 + let speedup = r2.total_s /. r1.total_s in 375 + Printf.printf " -> incremental is %.2fx %s for batch of %d\n%!" (abs_float speedup) 376 + (if speedup > 1.0 then "faster" else "slower") 377 + batch_size ; 378 + Lwt.return_unit ) 379 + batch_sizes 380 + 381 + (* testing proof generation with different key distributions *) 382 + let bench_key_lookup_patterns sizes = 383 + print_header "key lookup patterns" ; 384 + Lwt_list.iter_s 385 + (fun size -> 386 + let store = Storage.Memory_blockstore.create () in 387 + let%lwt data = generate_bulk_data store size in 388 + let%lwt mst = Mem_mst.of_assoc store data in 389 + let sorted_keys = List.sort compare (List.map fst data) in 390 + let num_lookups = min 20 size in 391 + (* first n keys *) 392 + let early_keys = List.filteri (fun i _ -> i < num_lookups) sorted_keys in 393 + let%lwt r1 = 394 + time_it 395 + (Printf.sprintf "proof early keys (%d from %d tree)" num_lookups size) 396 + (fun () -> 397 + Lwt_list.iter_s 398 + (fun k -> Mem_mst.proof_for_key mst mst.root k >|= fun _ -> ()) 399 + early_keys ) 400 + in 401 + print_result r1 ; 402 + (* last n keys *) 403 + let late_keys = 404 + List.filteri (fun i _ -> i >= List.length sorted_keys - num_lookups) sorted_keys 405 + in 406 + let%lwt r2 = 407 + time_it 408 + (Printf.sprintf "proof late keys (%d from %d tree)" num_lookups size) 409 + (fun () -> 410 + Lwt_list.iter_s 411 + (fun k -> Mem_mst.proof_for_key mst mst.root k >|= fun _ -> ()) 412 + late_keys ) 413 + in 414 + print_result r2 ; 415 + (* random keys *) 416 + let random_keys = shuffle sorted_keys |> List.filteri (fun i _ -> i < num_lookups) in 417 + let%lwt r3 = 418 + time_it 419 + (Printf.sprintf "proof random keys (%d from %d tree)" num_lookups size) 420 + (fun () -> 421 + Lwt_list.iter_s 422 + (fun k -> Mem_mst.proof_for_key mst mst.root k >|= fun _ -> ()) 423 + random_keys ) 424 + in 425 + print_result r3 ; Lwt.return_unit ) 426 + sizes 427 + 428 + let bench_serialization sizes = 429 + print_header "serialization" ; 430 + Lwt_list.iter_s 431 + (fun size -> 432 + let store = Storage.Memory_blockstore.create () in 433 + let%lwt data = generate_bulk_data store size in 434 + let%lwt mst = Mem_mst.of_assoc store data in 435 + (* Retrieve the root node to serialize *) 436 + let%lwt root_node = 437 + match%lwt Mem_mst.retrieve_node mst mst.root with 438 + | Some n -> 439 + Lwt.return n 440 + | None -> 441 + failwith "root not found" 442 + in 443 + let iterations = 100 in 444 + let%lwt r = 445 + time_it_n 446 + (Printf.sprintf "serialize root node (%d-record tree)" size) 447 + iterations 448 + (fun () -> Mem_mst.serialize mst root_node >|= fun _ -> ()) 449 + in 450 + print_result r ; Lwt.return_unit ) 451 + sizes 452 + 453 + let bench_layer_ops sizes = 454 + print_header "layer/height operations" ; 455 + Lwt_list.iter_s 456 + (fun size -> 457 + let store = Storage.Memory_blockstore.create () in 458 + let%lwt data = generate_bulk_data store size in 459 + let%lwt mst = Mem_mst.of_assoc store data in 460 + let iterations = 1000 in 461 + let%lwt r = 462 + time_it_n (Printf.sprintf "layer query (%d-record tree)" size) iterations (fun () -> 463 + Mem_mst.layer mst >|= fun _ -> () ) 464 + in 465 + print_result r ; Lwt.return_unit ) 466 + sizes 467 + 468 + let run_all_benchmarks () = 469 + Printf.printf "mst benchmarks\n" ; 470 + Printf.printf "==============\n" ; 471 + let small = [500; 1000; 2500] in 472 + let medium = [1000; 2500; 5000] in 473 + let large = [5000; 10000; 20000] in 474 + let delete_configs = [(500, 1000); (1000, 2500); (2000, 5000)] in 475 + let%lwt () = bench_of_assoc large in 476 + let%lwt () = bench_incremental_add small in 477 + let%lwt () = bench_incremental_delete delete_configs in 478 + let%lwt () = bench_single_add_scaling medium in 479 + let%lwt () = bench_single_delete_scaling medium in 480 + let%lwt () = bench_traversal medium in 481 + let%lwt () = bench_streaming medium in 482 + let%lwt () = bench_proof_generation medium in 483 + let%lwt () = bench_equality small in 484 + let%lwt () = bench_mixed_ops () in 485 + let%lwt () = bench_batch_add () in 486 + let%lwt () = bench_key_lookup_patterns medium in 487 + let%lwt () = bench_serialization small in 488 + let%lwt () = bench_layer_ops medium in 489 + Lwt.return_unit 490 + 491 + let () = 492 + Random.self_init () ; 493 + Lwt_main.run (run_all_benchmarks ())
+7
mist/bench/dune
··· 1 + (executable 2 + (name bench_mst) 3 + (public_name bench_mst) 4 + (package mist) 5 + (libraries ipld mist lwt lwt_ppx unix) 6 + (preprocess 7 + (pps lwt_ppx)))
+454
pegasus/bench/bench_repository.ml
··· 1 + open Lwt.Infix 2 + module Block_map = Mist.Storage.Block_map 3 + module Lex = Mist.Lex 4 + module Tid = Mist.Tid 5 + module User_store = Pegasus.User_store 6 + module Util = Pegasus.Util 7 + module Migrations = Pegasus.Migrations 8 + module Mst = Mist.Mst.Make (User_store) 9 + module Mem_mst = Mist.Mst.Make (Mist.Storage.Memory_blockstore) 10 + 11 + type timing_result = {name: string; iterations: int; total_s: float; per_iter_s: float} 12 + 13 + let time_it name f : timing_result Lwt.t = 14 + let start = Unix.gettimeofday () in 15 + let%lwt _ = f () in 16 + let elapsed = Unix.gettimeofday () -. start in 17 + Lwt.return {name; iterations= 1; total_s= elapsed; per_iter_s= elapsed} 18 + 19 + let time_it_n name n f : timing_result Lwt.t = 20 + let start = Unix.gettimeofday () in 21 + let%lwt () = 22 + let rec loop i = if i >= n then Lwt.return_unit else f () >>= fun _ -> loop (i + 1) in 23 + loop 0 24 + in 25 + let elapsed = Unix.gettimeofday () -. start in 26 + Lwt.return {name; iterations= n; total_s= elapsed; per_iter_s= elapsed /. float_of_int n} 27 + 28 + let print_result r = 29 + if r.iterations = 1 then Printf.printf " %-55s %10.4f s\n%!" r.name r.total_s 30 + else 31 + Printf.printf " %-55s %10.4f s total, %10.6f s/iter (%d iters)\n%!" r.name r.total_s 32 + r.per_iter_s r.iterations 33 + 34 + let print_header name = Printf.printf "\n=== %s ===\n%!" name 35 + 36 + let temp_db_counter = ref 0 37 + 38 + let create_temp_db () = 39 + incr temp_db_counter ; 40 + let path = 41 + Printf.sprintf "/tmp/pegasus_bench_%d_%d.db" (Unix.getpid ()) !temp_db_counter 42 + in 43 + let uri = Uri.of_string ("sqlite3://" ^ path) in 44 + (path, uri) 45 + 46 + let cleanup_temp_db path = 47 + try Unix.unlink path with Unix.Unix_error _ -> () ; 48 + try Unix.unlink (path ^ "-wal") with Unix.Unix_error _ -> () ; 49 + try Unix.unlink (path ^ "-shm") with Unix.Unix_error _ -> () 50 + 51 + let setup_test_db () : (User_store.t * string) Lwt.t = 52 + let path, uri = create_temp_db () in 53 + let%lwt pool = Util.connect_sqlite ~create:true ~write:true uri in 54 + let%lwt () = Migrations.run_migrations User_store pool in 55 + let db : User_store.t = {did= "did:plc:bench"; db= pool} in 56 + Lwt.return (db, path) 57 + 58 + let rand_bytes n = 59 + let b = Bytes.create n in 60 + for i = 0 to n - 1 do 61 + Bytes.set b i (Char.chr (Random.int 256)) 62 + done ; 63 + b 64 + 65 + let random_block () = 66 + let b = rand_bytes 64 in 67 + let bytes = Dag_cbor.encode (`Bytes b) in 68 + let cid = Cid.create Dcbor bytes in 69 + (cid, bytes) 70 + 71 + let random_alnum len = 72 + let allowed = "abcdefghijklmnopqrstuvwxyz0123456789" in 73 + let b = Bytes.create len in 74 + for i = 0 to len - 1 do 75 + Bytes.set b i allowed.[Random.int (String.length allowed)] 76 + done ; 77 + Bytes.to_string b 78 + 79 + let valid_rkey () = random_alnum 13 80 + 81 + let make_path () = "app.bsky.feed.post/" ^ valid_rkey () 82 + 83 + let rec unique_paths n acc = 84 + if n <= 0 then acc 85 + else 86 + let p = make_path () in 87 + if List.mem p acc then unique_paths n acc else unique_paths (n - 1) (p :: acc) 88 + 89 + let generate_blocks count = 90 + List.init count (fun _ -> random_block ()) 91 + 92 + let generate_record_data count = 93 + let paths = unique_paths count [] in 94 + List.map 95 + (fun path -> 96 + let record : Lex.repo_record = 97 + Lex.String_map.empty 98 + |> Lex.String_map.add "$type" (`String "app.bsky.feed.post") 99 + |> Lex.String_map.add "text" (`String (random_alnum 100)) 100 + |> Lex.String_map.add "createdAt" (`String (Tid.now ())) 101 + in 102 + let cid, data = Lex.to_cbor_block (`LexMap record) in 103 + (path, cid, data, Tid.now ()) ) 104 + paths 105 + 106 + let shuffle lst = 107 + let arr = Array.of_list lst in 108 + for i = Array.length arr - 1 downto 1 do 109 + let j = Random.int (i + 1) in 110 + let tmp = arr.(i) in 111 + arr.(i) <- arr.(j) ; 112 + arr.(j) <- tmp 113 + done ; 114 + Array.to_list arr 115 + 116 + let bench_single_block_ops () = 117 + print_header "single block ops" ; 118 + let%lwt db, path = setup_test_db () in 119 + let blocks = generate_blocks 1000 in 120 + let%lwt r1 = 121 + time_it_n "put_block (single)" 1000 (fun () -> 122 + let cid, data = List.nth blocks (Random.int 1000) in 123 + User_store.put_block db cid data >|= fun _ -> () ) 124 + in 125 + print_result r1 ; 126 + let%lwt () = 127 + Lwt_list.iter_s 128 + (fun (cid, data) -> User_store.put_block db cid data >|= fun _ -> ()) 129 + blocks 130 + in 131 + let%lwt r2 = 132 + time_it_n "get_bytes (single, existing)" 1000 (fun () -> 133 + let cid, _ = List.nth blocks (Random.int 1000) in 134 + User_store.get_bytes db cid >|= fun _ -> () ) 135 + in 136 + print_result r2 ; 137 + let missing_cid, _ = random_block () in 138 + let%lwt r3 = 139 + time_it_n "get_bytes (single, missing)" 1000 (fun () -> 140 + User_store.get_bytes db missing_cid >|= fun _ -> () ) 141 + in 142 + print_result r3 ; 143 + cleanup_temp_db path ; Lwt.return_unit 144 + 145 + let bench_batch_block_ops () = 146 + print_header "batch block ops" ; 147 + let batch_sizes = [100; 500; 1000; 2000] in 148 + Lwt_list.iter_s 149 + (fun batch_size -> 150 + let%lwt db, path = setup_test_db () in 151 + let blocks = generate_blocks batch_size in 152 + let block_map = 153 + List.fold_left (fun acc (cid, data) -> Block_map.set cid data acc) Block_map.empty blocks 154 + in 155 + let%lwt r1 = 156 + time_it (Printf.sprintf "put_many (%d blocks)" batch_size) (fun () -> 157 + User_store.put_many db block_map >|= fun _ -> () ) 158 + in 159 + print_result r1 ; 160 + let cids = List.map fst blocks in 161 + let%lwt r2 = 162 + time_it (Printf.sprintf "get_blocks (%d blocks)" batch_size) (fun () -> 163 + User_store.get_blocks db cids >|= fun _ -> () ) 164 + in 165 + print_result r2 ; 166 + cleanup_temp_db path ; Lwt.return_unit ) 167 + batch_sizes 168 + 169 + let bench_bulk_insert_ops () = 170 + print_header "bulk insert" ; 171 + let sizes = [1000; 5000; 10000; 20000] in 172 + Lwt_list.iter_s 173 + (fun size -> 174 + let%lwt db, path = setup_test_db () in 175 + let blocks = generate_blocks size in 176 + let%lwt r1 = 177 + time_it (Printf.sprintf "Bulk.put_blocks (%d blocks)" size) (fun () -> 178 + Util.use_pool db.db (fun conn -> User_store.Bulk.put_blocks blocks conn) 179 + >|= fun _ -> () ) 180 + in 181 + print_result r1 ; 182 + cleanup_temp_db path ; 183 + let%lwt db2, path2 = setup_test_db () in 184 + let records = generate_record_data size in 185 + let%lwt r2 = 186 + time_it (Printf.sprintf "Bulk.put_records (%d records)" size) (fun () -> 187 + Util.use_pool db2.db (fun conn -> User_store.Bulk.put_records records conn) 188 + >|= fun _ -> () ) 189 + in 190 + print_result r2 ; 191 + cleanup_temp_db path2 ; Lwt.return_unit ) 192 + sizes 193 + 194 + let bench_db_mst_ops () = 195 + print_header "mst operations (with db)" ; 196 + let sizes = [1000; 5000; 10000] in 197 + Lwt_list.iter_s 198 + (fun size -> 199 + let%lwt db, path = setup_test_db () in 200 + let records = generate_record_data size in 201 + let kv_pairs = List.map (fun (path, cid, _, _) -> (path, cid)) records in 202 + let%lwt () = 203 + Util.use_pool db.db (fun conn -> User_store.Bulk.put_records records conn) 204 + >|= fun _ -> () 205 + in 206 + let%lwt r1 = 207 + time_it (Printf.sprintf "Mst.of_assoc (db-backed, %d records)" size) (fun () -> 208 + Mst.of_assoc db kv_pairs >|= fun _ -> () ) 209 + in 210 + print_result r1 ; 211 + let%lwt mst = Mst.of_assoc db kv_pairs in 212 + let%lwt r2 = 213 + time_it (Printf.sprintf "Mst.build_map (db-backed, %d records)" size) (fun () -> 214 + Mst.build_map mst >|= fun _ -> () ) 215 + in 216 + print_result r2 ; 217 + let%lwt r3 = 218 + time_it (Printf.sprintf "Mst.leaf_count (db-backed, %d records)" size) (fun () -> 219 + Mst.leaf_count mst >|= fun _ -> () ) 220 + in 221 + print_result r3 ; 222 + cleanup_temp_db path ; Lwt.return_unit ) 223 + sizes 224 + 225 + let bench_db_mst_incremental () = 226 + print_header "incremental mst add (with db)" ; 227 + let configs = [(5000, 50); (10000, 100)] in 228 + Lwt_list.iter_s 229 + (fun (initial_size, add_count) -> 230 + let%lwt db, path = setup_test_db () in 231 + let initial_records = generate_record_data initial_size in 232 + let initial_kv = List.map (fun (path, cid, _, _) -> (path, cid)) initial_records in 233 + let%lwt () = 234 + Util.use_pool db.db (fun conn -> User_store.Bulk.put_records initial_records conn) 235 + >|= fun _ -> () 236 + in 237 + let%lwt mst = Mst.of_assoc db initial_kv in 238 + let add_records = generate_record_data add_count in 239 + let add_kv = List.map (fun (path, cid, _, _) -> (path, cid)) add_records in 240 + let%lwt () = 241 + Util.use_pool db.db (fun conn -> User_store.Bulk.put_records add_records conn) 242 + >|= fun _ -> () 243 + in 244 + let%lwt r1 = 245 + time_it 246 + (Printf.sprintf "Mst.add incremental (%d to %d-record tree)" add_count initial_size) 247 + (fun () -> 248 + Lwt_list.fold_left_s (fun t (k, v) -> Mst.add t k v) mst add_kv >|= fun _ -> () ) 249 + in 250 + print_result r1 ; 251 + Printf.printf " (%.6f s per add avg)\n%!" (r1.total_s /. float_of_int add_count) ; 252 + cleanup_temp_db path ; Lwt.return_unit ) 253 + configs 254 + 255 + let bench_car_export () = 256 + print_header "car export" ; 257 + let sizes = [1000; 5000; 10000; 20000] in 258 + Lwt_list.iter_s 259 + (fun size -> 260 + let store = Mist.Storage.Memory_blockstore.create () in 261 + let records = generate_record_data size in 262 + let%lwt () = 263 + Lwt_list.iter_s 264 + (fun (_, cid, data, _) -> 265 + Mist.Storage.Memory_blockstore.put_block store cid data >|= fun _ -> () ) 266 + records 267 + in 268 + let kv_pairs = List.map (fun (path, cid, _, _) -> (path, cid)) records in 269 + let%lwt mst = Mem_mst.of_assoc store kv_pairs in 270 + let%lwt r1 = 271 + time_it (Printf.sprintf "to_blocks_stream + consume (%d records)" size) (fun () -> 272 + let stream = Mem_mst.to_blocks_stream mst in 273 + Lwt_seq.fold_left_s (fun count _ -> Lwt.return (count + 1)) 0 stream >|= fun _ -> () ) 274 + in 275 + print_result r1 ; 276 + let commit_map = Dag_cbor.String_map.(empty |> add "root" (`Link mst.root)) in 277 + let commit_block = Dag_cbor.encode (`Map commit_map) in 278 + let commit_cid = Cid.create Dcbor commit_block in 279 + let%lwt r2 = 280 + time_it (Printf.sprintf "Car.blocks_to_stream (%d records)" size) (fun () -> 281 + let blocks = Mem_mst.to_blocks_stream mst in 282 + let all_blocks = Lwt_seq.cons (commit_cid, commit_block) blocks in 283 + let car_stream = Car.blocks_to_stream commit_cid all_blocks in 284 + Lwt_seq.fold_left_s (fun acc chunk -> Lwt.return (acc + Bytes.length chunk)) 0 car_stream 285 + >|= fun _ -> () ) 286 + in 287 + print_result r2 ; 288 + Lwt.return_unit ) 289 + sizes 290 + 291 + let bench_car_import () = 292 + print_header "car import" ; 293 + let sizes = [10000; 25000; 50000] in 294 + Lwt_list.iter_s 295 + (fun size -> 296 + let store = Mist.Storage.Memory_blockstore.create () in 297 + let records = generate_record_data size in 298 + let%lwt () = 299 + Lwt_list.iter_s 300 + (fun (_, cid, data, _) -> 301 + Mist.Storage.Memory_blockstore.put_block store cid data >|= fun _ -> () ) 302 + records 303 + in 304 + let kv_pairs = List.map (fun (path, cid, _, _) -> (path, cid)) records in 305 + let%lwt mst = Mem_mst.of_assoc store kv_pairs in 306 + let commit_map = Dag_cbor.String_map.(empty |> add "root" (`Link mst.root)) in 307 + let commit_block = Dag_cbor.encode (`Map commit_map) in 308 + let commit_cid = Cid.create Dcbor commit_block in 309 + let blocks = Mem_mst.to_blocks_stream mst in 310 + let all_blocks = Lwt_seq.cons (commit_cid, commit_block) blocks in 311 + let car_stream = Car.blocks_to_stream commit_cid all_blocks in 312 + let%lwt car_bytes = Car.collect_stream car_stream in 313 + Printf.printf " (car size for %d records: %d bytes)\n%!" size (Bytes.length car_bytes) ; 314 + let%lwt r1 = 315 + time_it (Printf.sprintf "Car.read_car_stream parse (%d records)" size) (fun () -> 316 + let stream = Lwt_seq.return car_bytes in 317 + let%lwt roots, blocks_seq = Car.read_car_stream stream in 318 + let%lwt _ = 319 + Lwt_seq.fold_left_s (fun acc (_, _) -> Lwt.return (acc + 1)) 0 blocks_seq 320 + in 321 + Lwt.return roots ) 322 + in 323 + print_result r1 ; 324 + Lwt.return_unit ) 325 + sizes 326 + 327 + let bench_rebuild_mst () = 328 + print_header "mst rebuild" ; 329 + let sizes = [1000; 2500; 5000] in 330 + Lwt_list.iter_s 331 + (fun size -> 332 + let%lwt db, path = setup_test_db () in 333 + let records = generate_record_data size in 334 + let%lwt () = 335 + Util.use_pool db.db (fun conn -> User_store.Bulk.put_records records conn) 336 + >|= fun _ -> () 337 + in 338 + let%lwt r1 = 339 + time_it (Printf.sprintf "get_all_record_cids + of_assoc (%d records)" size) (fun () -> 340 + let%lwt record_cids = User_store.get_all_record_cids db in 341 + let%lwt _ = Mst.of_assoc db record_cids in 342 + Lwt.return_unit ) 343 + in 344 + print_result r1 ; 345 + cleanup_temp_db path ; Lwt.return_unit ) 346 + sizes 347 + 348 + let bench_mixed_ops () = 349 + print_header "mixed operations" ; 350 + let%lwt db, path = setup_test_db () in 351 + let initial_size = 1000 in 352 + let num_ops = 500 in 353 + let initial_records = generate_record_data initial_size in 354 + let%lwt () = 355 + Util.use_pool db.db (fun conn -> User_store.Bulk.put_records initial_records conn) 356 + >|= fun _ -> () 357 + in 358 + let initial_kv = List.map (fun (path, cid, _, _) -> (path, cid)) initial_records in 359 + let%lwt mst = Mst.of_assoc db initial_kv in 360 + let extra_records = generate_record_data num_ops in 361 + let%lwt () = 362 + Util.use_pool db.db (fun conn -> User_store.Bulk.put_records extra_records conn) 363 + >|= fun _ -> () 364 + in 365 + let existing = ref (shuffle initial_records) in 366 + let pending_adds = ref (shuffle extra_records) in 367 + let%lwt r1 = 368 + time_it 369 + (Printf.sprintf "mixed ops (%d ops)" num_ops) 370 + (fun () -> 371 + let rec loop mst i = 372 + if i >= num_ops then Lwt.return mst 373 + else 374 + let op_type = Random.int 100 in 375 + if op_type < 60 then 376 + match !pending_adds with 377 + | (path, cid, _, _) :: rest -> 378 + pending_adds := rest ; 379 + existing := (path, cid, Bytes.empty, "") :: !existing ; 380 + let%lwt mst' = Mst.add mst path cid in 381 + loop mst' (i + 1) 382 + | [] -> 383 + loop mst (i + 1) 384 + else if op_type < 90 then 385 + match !existing with 386 + | (path, _, _, _) :: _ -> 387 + let%lwt _ = Mst.proof_for_key mst mst.root path in 388 + loop mst (i + 1) 389 + | [] -> 390 + loop mst (i + 1) 391 + else 392 + match !existing with 393 + | (path, _, _, _) :: rest -> 394 + existing := rest ; 395 + let%lwt mst' = Mst.delete mst path in 396 + loop mst' (i + 1) 397 + | [] -> 398 + loop mst (i + 1) 399 + in 400 + loop mst 0 >|= fun _ -> () ) 401 + in 402 + print_result r1 ; 403 + Printf.printf " (%.6f s per op avg)\n%!" (r1.total_s /. float_of_int num_ops) ; 404 + cleanup_temp_db path ; Lwt.return_unit 405 + 406 + let bench_db_io_patterns () = 407 + print_header "database i/o" ; 408 + let%lwt db, path = setup_test_db () in 409 + let size = 100000 in 410 + let blocks = generate_blocks size in 411 + let%lwt () = 412 + Util.use_pool db.db (fun conn -> User_store.Bulk.put_blocks blocks conn) >|= fun _ -> () 413 + in 414 + let cids = List.map fst blocks in 415 + let shuffled_cids = shuffle cids in 416 + let%lwt r1 = 417 + time_it (Printf.sprintf "sequential read (%d blocks)" size) (fun () -> 418 + Lwt_list.iter_s (fun cid -> User_store.get_bytes db cid >|= fun _ -> ()) cids ) 419 + in 420 + print_result r1 ; 421 + let%lwt r2 = 422 + time_it (Printf.sprintf "random read (%d blocks)" size) (fun () -> 423 + Lwt_list.iter_s (fun cid -> User_store.get_bytes db cid >|= fun _ -> ()) shuffled_cids ) 424 + in 425 + print_result r2 ; 426 + let batch_size = 50 in 427 + let batches = List.init (size / batch_size) (fun i -> 428 + List.filteri (fun j _ -> j >= i * batch_size && j < (i + 1) * batch_size) cids ) 429 + in 430 + let%lwt r3 = 431 + time_it (Printf.sprintf "batched read (%d blocks, batch=%d)" size batch_size) (fun () -> 432 + Lwt_list.iter_s (fun batch -> User_store.get_blocks db batch >|= fun _ -> ()) batches ) 433 + in 434 + print_result r3 ; 435 + cleanup_temp_db path ; Lwt.return_unit 436 + 437 + let run_all_benchmarks () = 438 + Printf.printf "repository benchmarks\n" ; 439 + Printf.printf "=====================\n" ; 440 + let%lwt () = bench_single_block_ops () in 441 + let%lwt () = bench_batch_block_ops () in 442 + let%lwt () = bench_bulk_insert_ops () in 443 + let%lwt () = bench_db_mst_ops () in 444 + let%lwt () = bench_db_mst_incremental () in 445 + let%lwt () = bench_car_export () in 446 + let%lwt () = bench_car_import () in 447 + let%lwt () = bench_rebuild_mst () in 448 + let%lwt () = bench_mixed_ops () in 449 + let%lwt () = bench_db_io_patterns () in 450 + Lwt.return_unit 451 + 452 + let () = 453 + Random.self_init () ; 454 + Lwt_main.run (run_all_benchmarks ())
+7
pegasus/bench/dune
··· 1 + (executable 2 + (name bench_repository) 3 + (public_name bench_repository) 4 + (package pegasus) 5 + (libraries ipld mist pegasus lwt lwt_ppx unix) 6 + (preprocess 7 + (pps lwt_ppx)))