this repo has no description

js_top_worker: memoize CRC checks and raise on mismatch

- Add symtable_memo and crc_memo hash tables to Impl for caching
module availability and CRC lookups (both binary and server side)
- Extract pure check_preload_status function into Impl for testability
- Capture server-side CRCs from fetched .cmi files in add_dynamic_cmis_sync
- Raise Crc_mismatch exception (instead of returning false) when a
preloaded package has different CRCs than the universe
- Add 8 ppx_expect tests covering: match, mismatch, partial load,
empty, missing CRCs, server CRC priority, single mismatch in group

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+236 -41
+49 -39
lib/findlibish.ml
··· 27 27 whether any of the package's toplevel modules are already in the 28 28 bytecode global table (i.e. linked into the binary). *) 29 29 30 + exception Crc_mismatch of string 31 + 30 32 let is_module_available module_name = 31 - let id = Ident.create_persistent module_name in 32 - match Symtable.Global.of_ident id with 33 - | None -> false 34 - | Some g -> Symtable.is_global_defined g 33 + match Hashtbl.find_opt Js_top_worker.Impl.symtable_memo module_name with 34 + | Some v -> v 35 + | None -> 36 + let v = 37 + let id = Ident.create_persistent module_name in 38 + match Symtable.Global.of_ident id with 39 + | None -> false 40 + | Some g -> Symtable.is_global_defined g 41 + in 42 + Hashtbl.replace Js_top_worker.Impl.symtable_memo module_name v; 43 + v 35 44 36 - let module_crc module_name = 37 - try Some (Digest.to_hex (Env.crc_of_unit module_name)) 38 - with Not_found -> None 45 + let binary_crc module_name = 46 + match Js_top_worker.Impl.lookup_binary_crc module_name with 47 + | Some _ as v -> v 48 + | None -> 49 + let v = 50 + try Some (Digest.to_hex (Env.crc_of_unit module_name)) 51 + with Not_found -> None 52 + in 53 + Option.iter (Js_top_worker.Impl.memo_binary_crc module_name) v; 54 + v 55 + 56 + let universe_crc module_name (dcs : Js_top_worker.Impl.dynamic_cmis) = 57 + match Js_top_worker.Impl.lookup_server_crc module_name with 58 + | Some _ as v -> v 59 + | None -> List.assoc_opt module_name dcs.dcs_module_crcs 39 60 40 61 let is_package_preloaded (dcs : Js_top_worker.Impl.dynamic_cmis) = 41 - match dcs.dcs_toplevel_modules with 42 - | [] -> false 43 - | modules -> 44 - let loaded = List.filter is_module_available modules in 45 - let n_loaded = List.length loaded in 46 - let n_total = List.length modules in 47 - if n_loaded = 0 then false 48 - else if n_loaded <> n_total then begin 49 - Jslib.log 50 - "WARNING: package partially preloaded (%d/%d modules). \ 51 - Loaded: [%s], missing: [%s]" 52 - n_loaded n_total 53 - (String.concat ", " loaded) 54 - (String.concat ", " 55 - (List.filter (fun m -> not (is_module_available m)) modules)); 56 - false 57 - end else 58 - (* All modules are linked. Verify CRCs match if available. *) 59 - let mismatches = List.filter_map (fun modname -> 60 - match List.assoc_opt modname dcs.dcs_module_crcs, module_crc modname with 61 - | Some expected, Some actual when expected <> actual -> 62 - Some (Printf.sprintf "%s (universe=%s binary=%s)" modname expected actual) 63 - | _ -> None) 64 - modules 65 - in 66 - match mismatches with 67 - | [] -> true 68 - | ms -> 69 - Jslib.log 70 - "WARNING: package preloaded but CRC mismatch: %s" 71 - (String.concat ", " ms); 72 - false 62 + let open Js_top_worker.Impl in 63 + match check_preload_status 64 + ~is_available:is_module_available 65 + ~get_binary_crc:binary_crc 66 + ~get_universe_crc:(fun m -> universe_crc m dcs) 67 + dcs 68 + with 69 + | Preloaded -> true 70 + | Not_loaded -> false 71 + | Partially_loaded { loaded; missing } -> 72 + Jslib.log 73 + "WARNING: package partially preloaded (%d/%d modules). \ 74 + Loaded: [%s], missing: [%s]" 75 + (List.length loaded) (List.length loaded + List.length missing) 76 + (String.concat ", " loaded) 77 + (String.concat ", " missing); 78 + false 79 + | Crc_mismatch ms -> 80 + raise (Crc_mismatch (Printf.sprintf 81 + "Package preloaded but CRC mismatch: %s" 82 + (String.concat ", " ms))) 73 83 74 84 let rec read_libraries_from_pkg_defs ~library_name ~dir meta_uri pkg_expr = 75 85 try
+10
lib/findlibish.mli
··· 32 32 (** Find the dynamic_cmis.json URL for a named package. 33 33 Returns [None] if the package is not in the library list. *) 34 34 val find_dcs_url : t -> string -> string option 35 + 36 + (** Raised when a package's modules are all present in the Symtable 37 + but their CRCs don't match the universe version. *) 38 + exception Crc_mismatch of string 39 + 40 + (** Check whether a package is already linked into the worker binary 41 + by testing its toplevel modules against the Symtable. 42 + 43 + @raise Crc_mismatch if the package is linked but CRCs don't match. *) 44 + val is_package_preloaded : Js_top_worker.Impl.dynamic_cmis -> bool
+67 -2
lib/impl.cppo.ml
··· 48 48 dcs_module_crcs : (string * string) list; 49 49 } 50 50 51 + (** Memoised module CRC cache. 52 + 53 + Stores CRCs keyed by module name. The binary-side CRC 54 + (from {!Env.crc_of_unit}) and server-side CRC (from fetched [.cmi] 55 + files) are stored under separate keys to allow comparison. 56 + 57 + Keys are ["binary:" ^ module_name] and ["server:" ^ module_name]. *) 58 + let symtable_memo : (string, bool) Hashtbl.t = Hashtbl.create 64 59 + let crc_memo : (string, string) Hashtbl.t = Hashtbl.create 64 60 + 61 + let memo_binary_crc module_name crc = 62 + Hashtbl.replace crc_memo ("binary:" ^ module_name) crc 63 + 64 + let memo_server_crc module_name crc = 65 + Hashtbl.replace crc_memo ("server:" ^ module_name) crc 66 + 67 + let lookup_binary_crc module_name = 68 + Hashtbl.find_opt crc_memo ("binary:" ^ module_name) 69 + 70 + let lookup_server_crc module_name = 71 + Hashtbl.find_opt crc_memo ("server:" ^ module_name) 72 + 73 + (** Result of checking whether a package is preloaded. *) 74 + type preload_check = 75 + | Preloaded 76 + | Not_loaded 77 + | Partially_loaded of { loaded : string list; missing : string list } 78 + | Crc_mismatch of string list (** list of "Module (universe=X binary=Y)" *) 79 + 80 + (** Pure check: given lookup functions for availability and CRCs, 81 + determine the preload status of a package. *) 82 + let check_preload_status 83 + ~(is_available : string -> bool) 84 + ~(get_binary_crc : string -> string option) 85 + ~(get_universe_crc : string -> string option) 86 + (dcs : dynamic_cmis) = 87 + match dcs.dcs_toplevel_modules with 88 + | [] -> Not_loaded 89 + | modules -> 90 + let loaded = List.filter is_available modules in 91 + let n_loaded = List.length loaded in 92 + let n_total = List.length modules in 93 + if n_loaded = 0 then Not_loaded 94 + else if n_loaded <> n_total then 95 + let missing = List.filter (fun m -> not (is_available m)) modules in 96 + Partially_loaded { loaded; missing } 97 + else 98 + let mismatches = List.filter_map (fun modname -> 99 + match get_universe_crc modname, get_binary_crc modname with 100 + | Some expected, Some actual when expected <> actual -> 101 + Some (Printf.sprintf "%s (universe=%s binary=%s)" 102 + modname expected actual) 103 + | _ -> None) 104 + modules 105 + in 106 + match mismatches with 107 + | [] -> Preloaded 108 + | ms -> Crc_mismatch ms 109 + 51 110 type init_config = { 52 111 findlib_requires : string list; 53 112 stdlib_dcs : string option; ··· 313 372 Logs.info (fun m -> m "Adding toplevel modules for dynamic cmis from %s" dcs.dcs_url); 314 373 Logs.info (fun m -> m " toplevel modules: %s" 315 374 (String.concat ", " dcs.dcs_toplevel_modules)); 316 - (* Fetch and create toplevel module CMIs *) 375 + (* Fetch and create toplevel module CMIs, extracting CRCs *) 317 376 List.iter 318 377 (fun name -> 319 378 let filename = to_cmi_filename name in 320 379 match fetch_sync filename with 321 380 | Some content -> 322 381 let fs_name = Filename.(concat path filename) in 323 - (try S.create_file ~name:fs_name ~content with _ -> ()) 382 + (try S.create_file ~name:fs_name ~content with _ -> ()); 383 + (try 384 + let cmi = Cmi_format.read_cmi fs_name in 385 + match cmi.Cmi_format.cmi_crcs with 386 + | (_, Some crc) :: _ -> memo_server_crc name (Digest.to_hex crc) 387 + | _ -> () 388 + with _ -> ()) 324 389 | None -> ()) 325 390 dcs.dcs_toplevel_modules; 326 391 (* Install on-demand loader for prefixed modules *)
+110
test/libtest/preloaded_test.ml
··· 1 + (* Tests for preloaded package detection and CRC verification. 2 + 3 + Uses Impl.check_preload_status directly (pure function, no JS deps) 4 + with mock lookup functions to simulate different scenarios. *) 5 + 6 + open Js_top_worker.Impl 7 + 8 + let make_dcs ?(crcs = []) modules = 9 + { dcs_url = "test://"; 10 + dcs_toplevel_modules = modules; 11 + dcs_file_prefixes = []; 12 + dcs_module_crcs = crcs } 13 + 14 + let print_status = function 15 + | Preloaded -> print_string "Preloaded" 16 + | Not_loaded -> print_string "Not_loaded" 17 + | Partially_loaded { loaded; missing } -> 18 + Printf.printf "Partially_loaded(loaded=[%s], missing=[%s])" 19 + (String.concat "," loaded) (String.concat "," missing) 20 + | Crc_mismatch ms -> 21 + Printf.printf "Crc_mismatch([%s])" (String.concat "; " ms) 22 + 23 + (* Mock helpers *) 24 + let available_set = ref [] 25 + let binary_crcs = ref [] 26 + let server_crcs = ref [] 27 + 28 + let is_available m = List.mem m !available_set 29 + let get_binary_crc m = List.assoc_opt m !binary_crcs 30 + let get_universe_crc m = List.assoc_opt m !server_crcs 31 + 32 + let setup ~available ~binary ~server = 33 + available_set := available; 34 + binary_crcs := binary; 35 + server_crcs := server 36 + 37 + let check dcs = 38 + check_preload_status ~is_available ~get_binary_crc ~get_universe_crc dcs 39 + 40 + let%expect_test "all modules linked, CRCs match — Preloaded" = 41 + setup ~available:["Foo"; "Bar"] 42 + ~binary:[("Foo", "abc123"); ("Bar", "def456")] 43 + ~server:[("Foo", "abc123"); ("Bar", "def456")]; 44 + let dcs = make_dcs ~crcs:[("Foo", "abc123"); ("Bar", "def456")] 45 + ["Foo"; "Bar"] in 46 + print_status (check dcs); 47 + [%expect {| Preloaded |}] 48 + 49 + let%expect_test "no modules linked — Not_loaded" = 50 + setup ~available:[] ~binary:[] ~server:[]; 51 + let dcs = make_dcs ~crcs:[("Foo", "abc123")] ["Foo"] in 52 + print_status (check dcs); 53 + [%expect {| Not_loaded |}] 54 + 55 + let%expect_test "CRC mismatch" = 56 + setup ~available:["Foo"] 57 + ~binary:[("Foo", "binary_crc")] 58 + ~server:[("Foo", "universe_crc")]; 59 + let dcs = make_dcs ~crcs:[("Foo", "universe_crc")] ["Foo"] in 60 + print_status (check dcs); 61 + [%expect {| Crc_mismatch([Foo (universe=universe_crc binary=binary_crc)]) |}] 62 + 63 + let%expect_test "partial load" = 64 + setup ~available:["Foo"] 65 + ~binary:[("Foo", "abc123")] 66 + ~server:[]; 67 + let dcs = make_dcs ~crcs:[("Foo", "abc123"); ("Bar", "def456")] 68 + ["Foo"; "Bar"] in 69 + print_status (check dcs); 70 + [%expect {| Partially_loaded(loaded=[Foo], missing=[Bar]) |}] 71 + 72 + let%expect_test "no CRCs in universe — still Preloaded" = 73 + setup ~available:["Foo"] 74 + ~binary:[("Foo", "abc123")] 75 + ~server:[]; 76 + let dcs = make_dcs ["Foo"] in 77 + print_status (check dcs); 78 + [%expect {| Preloaded |}] 79 + 80 + let%expect_test "empty module list — Not_loaded" = 81 + setup ~available:[] ~binary:[] ~server:[]; 82 + let dcs = make_dcs [] in 83 + print_status (check dcs); 84 + [%expect {| Not_loaded |}] 85 + 86 + let%expect_test "server CRC overrides dcs_module_crcs" = 87 + (* get_universe_crc returns server_crcs, ignoring dcs.dcs_module_crcs *) 88 + setup ~available:["Foo"] 89 + ~binary:[("Foo", "binary_crc")] 90 + ~server:[("Foo", "real_server_crc")]; 91 + let dcs = make_dcs ~crcs:[("Foo", "stale_json_crc")] ["Foo"] in 92 + print_status (check dcs); 93 + [%expect {| Crc_mismatch([Foo (universe=real_server_crc binary=binary_crc)]) |}] 94 + 95 + let%expect_test "binary CRC missing — no mismatch" = 96 + setup ~available:["Foo"] 97 + ~binary:[] 98 + ~server:[("Foo", "server_crc")]; 99 + let dcs = make_dcs ~crcs:[("Foo", "server_crc")] ["Foo"] in 100 + print_status (check dcs); 101 + [%expect {| Preloaded |}] 102 + 103 + let%expect_test "multiple modules, one mismatch" = 104 + setup ~available:["Foo"; "Bar"] 105 + ~binary:[("Foo", "abc123"); ("Bar", "wrong")] 106 + ~server:[("Foo", "abc123"); ("Bar", "def456")]; 107 + let dcs = make_dcs ~crcs:[("Foo", "abc123"); ("Bar", "def456")] 108 + ["Foo"; "Bar"] in 109 + print_status (check dcs); 110 + [%expect {| Crc_mismatch([Bar (universe=def456 binary=wrong)]) |}]