objective categorical abstract machine language personal data server

Fix overly aggressive blob ref cleanup on update record

futur.blue 04f0b026 0d72fb31

verified
+76 -46
+22 -30
pegasus/lib/repository.ml
··· 291 291 Errors.invalid_request ~name:"InvalidSwap" 292 292 (Format.sprintf "attempted to update record %s with cid %s" 293 293 path cid_str ) ) ; 294 - let%lwt () = 294 + let old_blob_refs = 295 295 match existing_record with 296 296 | Some record -> 297 - let refs = 298 - Util.find_blob_refs record.value 299 - |> List.map (fun (r : Mist.Blob_ref.t) -> r.ref) 300 - in 301 - if not (List.is_empty refs) then 302 - let%lwt _ = 303 - User_store.delete_orphaned_blobs_by_record_path t.db path 304 - in 305 - Lwt.return_unit 306 - else Lwt.return_unit 297 + Util.find_blob_refs record.value 298 + |> List.map (fun (r : Mist.Blob_ref.t) -> r.ref) 307 299 | None -> 308 - Lwt.return_unit 300 + [] 309 301 in 310 302 let record_with_type : Lex.repo_record = 311 303 if String_map.mem "$type" value then value ··· 320 312 :: !commit_ops_rev ; 321 313 let%lwt new_mst = Cached_mst.add !mst path new_cid in 322 314 mst := new_mst ; 323 - let refs = 315 + let new_blob_refs = 324 316 Util.find_blob_refs value 325 317 |> List.map (fun (r : Mist.Blob_ref.t) -> r.ref) 326 318 in 319 + let%lwt () = User_store.delete_blob_refs_for_path t.db path in 327 320 let%lwt () = 328 - match%lwt User_store.put_blob_refs t.db path refs with 321 + match%lwt User_store.put_blob_refs t.db path new_blob_refs with 329 322 | Ok () -> 330 323 Lwt.return () 331 324 | Error err -> 332 325 raise err 333 326 in 327 + let removed_blob_refs = 328 + List.filter 329 + (* include old refs such that *) 330 + (fun old_ref -> 331 + (* there isn't a new ref such that *) 332 + not 333 + (List.exists 334 + (* the new ref equals the old ref *) 335 + (fun new_ref -> Cid.equal old_ref new_ref ) 336 + new_blob_refs ) ) 337 + old_blob_refs 338 + in 339 + let%lwt () = 340 + User_store.delete_unreferenced_blobs t.db removed_blob_refs 341 + in 334 342 Lwt.return 335 343 (Update 336 344 { type'= "com.atproto.repo.applyWrites#updateResult" ··· 351 359 Errors.invalid_request ~name:"InvalidSwap" 352 360 (Format.sprintf "attempted to delete record %s with cid %s" 353 361 path cid_str ) ) ; 354 - let%lwt () = 355 - match existing_record with 356 - | Some record -> 357 - let refs = 358 - Util.find_blob_refs record.value 359 - |> List.map (fun (r : Mist.Blob_ref.t) -> r.ref) 360 - in 361 - if not (List.is_empty refs) then 362 - let%lwt _ = 363 - User_store.delete_orphaned_blobs_by_record_path t.db path 364 - in 365 - Lwt.return_unit 366 - else Lwt.return_unit 367 - | None -> 368 - Lwt.return_unit 369 - in 370 362 let%lwt () = User_store.delete_record t.db path in 371 363 commit_ops_rev := 372 364 {action= `Delete; path; cid= None; prev= cid} :: !commit_ops_rev ;
+54 -16
pegasus/lib/user_store.ml
··· 270 270 LIMIT %int{limit} 271 271 |sql}] 272 272 273 + let delete_blob_refs_for_path path = 274 + [%rapper 275 + execute 276 + {sql| DELETE FROM blobs_records WHERE record_path = %string{path} |sql}] 277 + ~path 278 + 279 + let delete_unreferenced_blobs cids = 280 + [%rapper 281 + get_many 282 + {sql| DELETE FROM blobs 283 + WHERE cid IN (%list{%CID{cids}}) 284 + AND NOT EXISTS ( 285 + SELECT 1 FROM blobs_records 286 + WHERE blob_cid = blobs.cid 287 + ) 288 + RETURNING @CID{cid}, @string{storage} 289 + |sql}] 290 + ~cids 291 + 273 292 let delete_orphaned_blobs_by_record_path path = 274 293 [%rapper 275 294 get_many ··· 545 564 546 565 let delete_block t cid : (bool, exn) Lwt_result.t = 547 566 Lwt_result.catch 548 - @@ fun () -> Util.Sqlite.use_pool t.db @@ Queries.delete_block cid >|= fun _ -> true 567 + @@ fun () -> 568 + Util.Sqlite.use_pool t.db @@ Queries.delete_block cid >|= fun _ -> true 549 569 550 570 let delete_many t cids : (int, exn) Lwt_result.t = 551 571 Lwt_result.catch 552 - @@ fun () -> Util.Sqlite.use_pool t.db @@ Queries.delete_blocks cids >|= List.length 572 + @@ fun () -> 573 + Util.Sqlite.use_pool t.db @@ Queries.delete_blocks cids >|= List.length 553 574 554 575 let clear_mst t : unit Lwt.t = 555 576 let%lwt () = Util.Sqlite.use_pool t.db Queries.clear_mst in ··· 557 578 558 579 (* mst misc *) 559 580 560 - let count_blocks t : int Lwt.t = Util.Sqlite.use_pool t.db @@ Queries.count_blocks () 581 + let count_blocks t : int Lwt.t = 582 + Util.Sqlite.use_pool t.db @@ Queries.count_blocks () 561 583 562 584 (* repo commit *) 563 585 ··· 606 628 >|= List.map (fun (path, cid, data, since) -> 607 629 {path; cid; value= Lex.of_cbor data; since} ) 608 630 609 - let count_records t : int Lwt.t = Util.Sqlite.use_pool t.db @@ Queries.count_records () 631 + let count_records t : int Lwt.t = 632 + Util.Sqlite.use_pool t.db @@ Queries.count_records () 610 633 611 634 let list_collections t : string list Lwt.t = 612 635 Util.Sqlite.use_pool t.db @@ Queries.list_collections ··· 625 648 let delete_record t path : unit Lwt.t = 626 649 Util.Sqlite.use_pool t.db (fun conn -> 627 650 Util.Sqlite.transact conn (fun () -> 628 - let del = Queries.delete_record path conn in 629 - let$! () = del in 630 651 let$! deleted_blobs = 631 652 Queries.delete_orphaned_blobs_by_record_path path conn 632 653 in 633 - let () = 634 - List.iter 635 - (fun (cid, storage_str) -> 636 - let storage = Blob_store.storage_of_string storage_str in 637 - delete_blob_file ~did:t.did ~cid ~storage ) 638 - deleted_blobs 639 - in 640 - del ) ) 654 + let$! () = Queries.delete_record path conn in 655 + List.iter 656 + (fun (cid, storage_str) -> 657 + let storage = Blob_store.storage_of_string storage_str in 658 + delete_blob_file ~did:t.did ~cid ~storage ) 659 + deleted_blobs ; 660 + Lwt.return_ok () ) ) 641 661 642 662 (* blobs *) 643 663 ··· 675 695 (string * Cid.t) list Lwt.t = 676 696 Util.Sqlite.use_pool t.db @@ Queries.list_missing_blobs ~limit ~cursor 677 697 678 - let count_blobs t : int Lwt.t = Util.Sqlite.use_pool t.db @@ Queries.count_blobs () 698 + let count_blobs t : int Lwt.t = 699 + Util.Sqlite.use_pool t.db @@ Queries.count_blobs () 679 700 680 701 let count_referenced_blobs t : int Lwt.t = 681 702 Util.Sqlite.use_pool t.db @@ Queries.count_referenced_blobs () ··· 697 718 let delete_orphaned_blobs_by_record_path t path : 698 719 (Cid.t * Blob_store.storage) list Lwt.t = 699 720 let%lwt results = 700 - Util.Sqlite.use_pool t.db @@ Queries.delete_orphaned_blobs_by_record_path path 721 + Util.Sqlite.use_pool t.db 722 + @@ Queries.delete_orphaned_blobs_by_record_path path 701 723 in 702 724 Lwt.return 703 725 @@ List.map ··· 721 743 let clear_blob_refs t path cids : unit Lwt.t = 722 744 if List.is_empty cids then Lwt.return_unit 723 745 else Util.Sqlite.use_pool t.db @@ Queries.clear_blob_refs path cids 746 + 747 + let delete_blob_refs_for_path t path : unit Lwt.t = 748 + Util.Sqlite.use_pool t.db @@ Queries.delete_blob_refs_for_path path 749 + 750 + let delete_unreferenced_blobs t cids : unit Lwt.t = 751 + if List.is_empty cids then Lwt.return_unit 752 + else 753 + let%lwt results = 754 + Util.Sqlite.use_pool t.db @@ Queries.delete_unreferenced_blobs cids 755 + in 756 + List.iter 757 + (fun (cid, storage_str) -> 758 + let storage = Blob_store.storage_of_string storage_str in 759 + delete_blob_file ~did:t.did ~cid ~storage ) 760 + results ; 761 + Lwt.return_unit 724 762 725 763 let update_blob_storage t cid storage : unit Lwt.t = 726 764 let storage_str = Blob_store.storage_to_string storage in