objective categorical abstract machine language personal data server

Implement MST covering proof

futur.blue e1cce0f6 57bea8a7

verified
+197 -97
+197 -97
mist/lib/mst.ml
··· 1 + open Storage 1 2 module StringMap = Dag_cbor.StringMap 2 3 3 4 type node_raw = ··· 37 38 38 39 and entry = 39 40 {layer: int; key: string; value: Cid.t; right: node option Lwt.t Lazy.t} 41 + 42 + type node_or_entry = Node of node | Entry of entry 40 43 41 44 let ( let*? ) lazy_opt_lwt f = 42 45 let%lwt result = Lazy.force lazy_opt_lwt in ··· 85 88 node.entries <- node.entries @ [entry] ; 86 89 Lwt.return node ) 87 90 88 - (* helper to find the entry with a given key in a hydrated node *) 89 - let find_entry_nonrec node key = 90 - let rec aux entries = 91 + (* returns the index of the first entry in an interspersed list that's gte a given key *) 92 + let find_gte_entry_index entries key : int = 93 + let rec aux entries index = 91 94 match entries with 92 95 | [] -> 93 - None 94 - | e :: es -> 95 - if e.key = key then Some e else if e.key > key then None else aux es 96 + (* will be entries length when not found *) 97 + index 98 + | e :: es -> ( 99 + match e with 100 + | Entry entry when entry.key >= key -> 101 + index 102 + | _ -> 103 + aux es (index + 1) ) 96 104 in 97 - aux node.entries 105 + aux entries 0 98 106 99 - (* from a list of raw entries, produces a list of their keys *) 100 - let entries_to_keys entries = 101 - entries 102 - |> List.fold_left 103 - (fun keys entry -> 104 - let prefix = 105 - match keys with [] -> "" | prev :: _ -> String.sub prev 0 entry.p 106 - in 107 - let path = String.concat "" [prefix; Bytes.to_string entry.k] in 108 - Util.ensure_valid_key path ; path :: keys ) 109 - [] 110 - |> List.rev 107 + (* produces a cid and cbor-encoded bytes for a given tree *) 108 + let serialize node : (Cid.t * bytes) Lwt.t = 109 + let sorted_entries = 110 + List.sort (fun a b -> String.compare a.key b.key) node.entries 111 + in 112 + let rec aux node = 113 + let%lwt left = 114 + node.left 115 + >>? function 116 + | Some l -> 117 + let%lwt cid, _ = aux l in 118 + Lwt.return_some cid 119 + | None -> 120 + Lwt.return_none 121 + in 122 + let last_key = ref "" in 123 + let%lwt mst_entries = 124 + Lwt_list.map_s 125 + (fun entry -> 126 + let%lwt right = 127 + entry.right 128 + >>? function 129 + | Some r -> 130 + let%lwt cid, _ = aux r in 131 + Lwt.return (Some cid) 132 + | None -> 133 + Lwt.return None 134 + in 135 + let prefix_len = Util.shared_prefix_length !last_key entry.key in 136 + last_key := entry.key ; 137 + Lwt.return 138 + { k= 139 + Bytes.of_string 140 + (String.sub entry.key prefix_len 141 + (String.length entry.key - prefix_len) ) 142 + ; p= prefix_len 143 + ; v= entry.value 144 + ; t= right } ) 145 + node.entries 146 + in 147 + let encoded = Dag_cbor.encode (encode_node_raw {l= left; e= mst_entries}) in 148 + let cid = Cid.create Dcbor encoded in 149 + Lwt.return (cid, encoded) 150 + in 151 + aux {node with entries= sorted_entries} 111 152 112 - module Make (Store : Storage.Writable_blockstore) = struct 153 + module Make (Store : Writable_blockstore) = struct 113 154 type bs = Store.t 114 155 115 156 type t = {blockstore: bs; root: Cid.t} ··· 168 209 | _ -> 169 210 raise (Invalid_argument "invalid block") 170 211 212 + (* retrieves a raw node by cid *) 171 213 let retrieve_node_raw t cid : node_raw option Lwt.t = 172 214 match%lwt Store.get_bytes t.blockstore cid with 173 215 | Some bytes -> ··· 175 217 | None -> 176 218 Lwt.return_none 177 219 178 - (* retrieves & decodes a node by cid *) 220 + (* retrieves & hydrates a node by cid *) 179 221 let rec retrieve_node t cid : node option Lwt.t = 180 222 match%lwt retrieve_node_raw t cid with 181 223 | Some raw -> ··· 183 225 | None -> 184 226 Lwt.return_none 185 227 228 + (* lazy version of retrieve_node *) 186 229 and retrieve_node_lazy t cid = lazy (retrieve_node t cid) 187 230 188 231 (* hydrates a raw node *) ··· 261 304 traverse t (fun path cid -> ignore (StringMap.add path cid map)) 262 305 in 263 306 Lwt.return map 264 - 265 - (* produces a cid and cbor-encoded bytes for this mst *) 266 - let serialize t map : (Cid.t * bytes) Lwt.t = 267 - let keys = 268 - map |> StringMap.bindings |> List.map fst |> List.sort String.compare 269 - in 270 - let entry_for_key key = 271 - let value = StringMap.find key map in 272 - let height = Util.leading_zeros_on_hash key in 273 - {layer= height; key; value; right= lazy Lwt.return_none} 274 - in 275 - let root = 276 - { layer= keys |> List.hd |> Util.leading_zeros_on_hash 277 - ; entries= [] 278 - ; left= lazy Lwt.return_none } 279 - in 280 - List.iter 281 - (fun key -> ignore (insert_entry root (entry_for_key key))) 282 - (List.tl keys) ; 283 - let rec finalize node : (Cid.t * bytes) Lwt.t = 284 - let%lwt left = 285 - node.left 286 - >>? function 287 - | Some l -> 288 - let%lwt cid, _ = finalize l in 289 - Lwt.return_some cid 290 - | None -> 291 - Lwt.return_none 292 - in 293 - let last_key = ref "" in 294 - let%lwt mst_entries = 295 - Lwt_list.map_s 296 - (fun entry -> 297 - let%lwt right = 298 - entry.right 299 - >>? function 300 - | Some r -> 301 - let%lwt cid, _ = finalize r in 302 - Lwt.return (Some cid) 303 - | None -> 304 - Lwt.return None 305 - in 306 - let prefix_len = Util.shared_prefix_length !last_key entry.key in 307 - last_key := entry.key ; 308 - Lwt.return 309 - { k= 310 - Bytes.of_string 311 - (String.sub entry.key prefix_len 312 - (String.length entry.key - prefix_len) ) 313 - ; p= prefix_len 314 - ; v= entry.value 315 - ; t= right } ) 316 - node.entries 317 - in 318 - let encoded = 319 - Dag_cbor.encode (encode_node_raw {l= left; e= mst_entries}) 320 - in 321 - let cid = Cid.create Dcbor encoded in 322 - let%lwt () = Store.put_block t.blockstore cid encoded in 323 - Lwt.return (cid, encoded) 324 - in 325 - finalize root 326 307 327 308 (* returns cids and blocks that form the path from a given node to a given entry *) 328 309 let rec path_to_entry t node key : (Cid.t * bytes) list Lwt.t = 329 310 let%lwt root_bytes = Store.get_bytes t.blockstore node in 330 - let%lwt root = 311 + let%lwt root_raw = 331 312 match root_bytes with 332 313 | None -> 333 314 Lwt.return_none 334 315 | Some bytes -> 335 316 Lwt.return_some (decode_block_raw bytes) 336 317 in 337 - let path_tail = [(node, Option.get root_bytes)] in 318 + let%lwt root = 319 + match root_raw with 320 + | None -> 321 + Lwt.return_none 322 + | Some root -> 323 + hydrate_node t root |> Lwt.map Option.some 324 + in 338 325 (* if there is a left child, try to find a path through the left subtree *) 339 326 let%lwt path_through_left = 340 - match root with 327 + match root_raw with 341 328 | None -> 342 329 Lwt.return_some [] 343 - | Some root -> ( 344 - match root.l with 330 + | Some raw -> ( 331 + match raw.l with 345 332 | None -> 346 333 Lwt.return_none 347 334 | Some left -> ( ··· 350 337 Lwt.return_none 351 338 | path -> 352 339 (* Option.get is safe because root is Some only when root_bytes is Some *) 353 - Lwt.return_some (path @ path_tail) ) ) 340 + Lwt.return_some (path @ [(node, Option.get root_bytes)]) ) ) 354 341 in 355 342 match path_through_left with 356 343 | Some path -> 357 344 Lwt.return path 358 345 | None -> ( 359 346 (* if a left subtree path couldn't be found, find the entry whose right subtree this key would belong to *) 360 - let entries = (Option.get root).e in 361 - let entries_keys = entries_to_keys entries in 347 + (* this branch is only reached when root/root_raw/root_bytes are not None; 348 + if they were, path_through_left would be Some [] *) 349 + let entries = (Option.get root).entries in 362 350 let entries_len = List.length entries in 363 351 let entry_index = 364 - match List.find_index (fun e -> e >= key) entries_keys with 352 + match List.find_index (fun e -> e.key >= key) entries with 365 353 | Some index -> 366 354 index 367 355 | None -> 368 356 entries_len 369 357 in 358 + (* path_through_left is None -> root_bytes is Some *) 359 + let path_tail = [(node, Option.get root_bytes)] in 370 360 (* entry_index here is actually the entry to the right of the subtree the key would belong to *) 371 361 match entry_index with 372 362 | _ 373 363 (* because entries[entry_index] might turn out to be the entry we're looking for *) 374 364 when entry_index < entries_len 375 - && List.nth entries_keys entry_index = key -> 365 + && (List.nth entries entry_index).key = key -> 376 366 Lwt.return path_tail 377 367 | _ -> ( 378 368 (* otherwise, we continue down the right subtree of the entry before entry_index *) 379 - match Util.last entries with 380 - | Some last when last.t != None -> 369 + (* path_through_left is None -> root_raw is Some *) 370 + match Util.last (Option.get root_raw).e with 371 + | Some last when last.t <> None -> 381 372 let%lwt path_through_right = 373 + (* when last.t <> None *) 382 374 path_to_entry t (Option.get last.t) key 383 375 in 384 376 Lwt.return (path_through_right @ path_tail) ··· 415 407 let leaves_list = Cid.Set.to_list leaves in 416 408 let%lwt leaves_bm = Store.get_blocks t.blockstore leaves_list in 417 409 if leaves_bm.missing <> [] then failwith "missing mst leaf blocks" ; 418 - let leaves_nodes = Storage.Block_map.entries leaves_bm.blocks in 410 + let leaves_nodes = Block_map.entries leaves_bm.blocks in 419 411 match leaves_nodes with 420 412 | [] -> 421 413 (* with Done, we don't care about the first pair element *) ··· 432 424 (fun (acc, nxt, lvs) cid -> 433 425 let bytes = 434 426 (* we should be safe to do this since we just got the cids from the blockmap *) 435 - Storage.Block_map.get cid bm.blocks |> Option.get 427 + Block_map.get cid bm.blocks |> Option.get 436 428 in 437 429 let node = decode_block_raw bytes in 438 430 let nxt' = ··· 470 462 Lwt.return_some (Obj.magic (), Done) 471 463 in 472 464 Lwt_seq.unfold_lwt step init_state 465 + 466 + (* returns all mst nodes needed to prove the value of a given key *) 467 + let rec proof_for_key t root key : Block_map.t Lwt.t = 468 + let e_rev = List.rev root.entries in 469 + (* iterate in reverse because if the key doesn't exist at this level, 470 + we need to search the "previous" node's right subtree *) 471 + let rec find_proof entries_rev = 472 + match entries_rev with 473 + | [] -> 474 + Lwt.return Block_map.empty 475 + | e :: rest -> ( 476 + if e.key > key then find_proof rest 477 + else if e.key = key then Lwt.return Block_map.empty 478 + else 479 + let*? right = e.right in 480 + match right with 481 + | Some r -> 482 + proof_for_key t r key 483 + | None -> 484 + Lwt.return Block_map.empty ) 485 + in 486 + let%lwt bm = find_proof e_rev in 487 + let%lwt root_cid, root_bytes = serialize root in 488 + Lwt.return (Block_map.set root_cid root_bytes bm) 489 + 490 + (* returns all mst nodes needed to prove the value of a given key's left sibling *) 491 + let rec proof_for_left_sibling t root key : Block_map.t Lwt.t = 492 + let e_rev = List.rev root.entries in 493 + (* iterate in reverse for the same reason as proof_for_key *) 494 + let rec find_proof entries_rev = 495 + match entries_rev with 496 + | [] -> 497 + Lwt.return Block_map.empty 498 + | e :: rest -> ( 499 + if e.key >= key then find_proof rest 500 + else 501 + let*? right = e.right in 502 + match right with 503 + | Some r -> 504 + proof_for_left_sibling t r key 505 + | None -> 506 + Lwt.return Block_map.empty ) 507 + in 508 + let%lwt bm = find_proof e_rev in 509 + let%lwt root_cid, root_bytes = serialize root in 510 + Lwt.return (Block_map.set root_cid root_bytes bm) 511 + 512 + (* returns all mst nodes needed to prove the value of a given key's right sibling *) 513 + let rec proof_for_right_sibling t root key : Block_map.t Lwt.t = 514 + (* unlike the other two, this doesn't iterate in reverse 515 + because we can stop as soon as we're past the key *) 516 + let rec find_proof ?(prev = None) entries = 517 + match entries with 518 + | [] -> ( 519 + (* end of entries, right sibling must be in the last entry's right subtree *) 520 + match prev with 521 + | Some e -> ( 522 + let*? right = e.right in 523 + match right with 524 + | Some r -> 525 + proof_for_right_sibling t r key 526 + | None -> 527 + Lwt.return Block_map.empty ) 528 + | None -> 529 + Lwt.return Block_map.empty ) 530 + | e :: rest -> 531 + if e.key > key then 532 + (* we're past target key; right sibling is in previous entry's right subtree *) 533 + match prev with 534 + | Some p -> ( 535 + let*? right = p.right in 536 + match right with 537 + | Some r -> 538 + proof_for_right_sibling t r key 539 + (* I don't think this should ever happen? *) 540 + | None -> 541 + Lwt.return Block_map.empty ) 542 + (* first entry is already greater than key; we're inside the sibling *) 543 + | None -> 544 + Lwt.return Block_map.empty 545 + else if e.key = key then 546 + (* found the entry, right sibling is in its right subtree *) 547 + let*? right = e.right in 548 + match right with 549 + | Some r -> 550 + proof_for_right_sibling t r key 551 + | None -> 552 + Lwt.return Block_map.empty 553 + else (* e.key < key, keep searching *) 554 + find_proof ~prev:(Some e) rest 555 + in 556 + let%lwt bm = find_proof root.entries in 557 + let%lwt root_cid, root_bytes = serialize root in 558 + Lwt.return (Block_map.set root_cid root_bytes bm) 559 + 560 + (* a covering proof is all mst nodes needed to prove the value of a given leaf 561 + and its siblings to its immediate right and left (if applicable) *) 562 + let get_covering_proof t root key : Block_map.t Lwt.t = 563 + let%lwt proofs = 564 + Lwt.all 565 + [ proof_for_key t root key 566 + ; proof_for_left_sibling t root key 567 + ; proof_for_right_sibling t root key ] 568 + in 569 + Lwt.return 570 + (List.fold_left 571 + (fun acc proof -> Block_map.merge acc proof) 572 + Block_map.empty proofs ) 473 573 end