···33*.install
44*.merlin
5566-# Third-party sources (fetch locally with opam source)
77-third_party/
88-96# Editor and OS files
107.DS_Store
118*.swp
+183-20
tools/analyze_repos.ml
···33let src = Logs.Src.create "analyze_repos" ~doc:"Analyze HTTP client repos"
44module Log = (val Logs.src_log src : Logs.LOG)
5566+(* Helper to normalize language names for directory structure *)
77+let normalize_language lang =
88+ String.lowercase_ascii lang
99+ |> String.map (function '/' -> '-' | c -> c)
1010+1111+(* Helper to extract repo name from "owner/repo" format *)
1212+let extract_repo_name repo_path =
1313+ match String.rindex_opt repo_path '/' with
1414+ | Some idx -> String.sub repo_path (idx + 1) (String.length repo_path - idx - 1)
1515+ | None -> repo_path
1616+1717+(* Parse resources.json to get list of repos *)
1818+let parse_resources_json json_path =
1919+ Log.info (fun m -> m "Parsing resources.json from %s" json_path);
2020+ let ic = open_in json_path in
2121+ let content = really_input_string ic (in_channel_length ic) in
2222+ close_in ic;
2323+2424+ match Jsont_bytesrw.decode_string Jsont.json content with
2525+ | Ok json -> (
2626+ match json with
2727+ | Jsont.Object (fields, _) ->
2828+ List.fold_left (fun acc ((lang_name, _), lang_repos) ->
2929+ let lang_dir = normalize_language lang_name in
3030+ match lang_repos with
3131+ | Jsont.Array (repos, _) ->
3232+ List.fold_left (fun acc repo ->
3333+ match repo with
3434+ | Jsont.Object (repo_fields, _) -> (
3535+ let repo_path_opt = List.assoc_opt ("repo", Jsont.Meta.none) repo_fields in
3636+ match repo_path_opt with
3737+ | Some (Jsont.String (repo_path, _)) ->
3838+ let repo_name = extract_repo_name repo_path in
3939+ let normalized_name = String.lowercase_ascii repo_name in
4040+ (lang_name, lang_dir, normalized_name) :: acc
4141+ | _ -> acc)
4242+ | _ -> acc) acc repos
4343+ | _ -> acc) [] fields
4444+ | _ ->
4545+ Log.err (fun m -> m "Invalid JSON format in resources.json");
4646+ [])
4747+ | Error err ->
4848+ Log.err (fun m -> m "Failed to parse resources.json: %s" err);
4949+ []
5050+5151+(* Check if analysis output already exists *)
5252+let analysis_exists repo_dir =
5353+ let output_path = Printf.sprintf "%s.json" repo_dir in
5454+ Sys.file_exists output_path
5555+656(* JSON schema for recommendations *)
757let recommendation_schema =
858 let meta = Jsont.Meta.none in
···89139 ]
9014091141(* Analyze a single repository by path *)
9292-let analyze_single_repo repo_path =
142142+let analyze_single_repo_with_env ~eio_env ~sw repo_path =
93143 Log.info (fun m -> m "Analyzing repository at: %s" repo_path);
9414495145 (* Check if directory exists *)
96146 if not (Sys.file_exists repo_path && Sys.is_directory repo_path) then (
9797- Log.err (fun m -> m "Directory not found: %s" repo_path);
9898- exit 1
9999- );
147147+ Log.warn (fun m -> m "Directory not found: %s" repo_path);
148148+ false
149149+ ) else
100150101151 let output_path = Printf.sprintf "%s.json" repo_path in
102152103153 Log.info (fun m -> m "Output will be saved to: %s" output_path);
104104-105105- Eio_main.run @@ fun eio_env ->
106106- Switch.run @@ fun sw ->
107154108155 (* Create Claude client with structured output *)
109156 let output_format = Claude.Proto.Structured_output.of_json_schema recommendation_schema in
···181228 | _ -> ())
182229 responses;
183230184184- if !success then
185185- Log.info (fun m -> m "Analysis complete!")
186186- else (
187187- Log.err (fun m -> m "Analysis failed - no recommendations generated");
231231+ if !success then (
232232+ Log.info (fun m -> m "Analysis complete for %s" repo_path);
233233+ true
234234+ ) else (
235235+ Log.err (fun m -> m "Analysis failed for %s - no recommendations generated" repo_path);
236236+ false
237237+ )
238238+239239+(* Wrapper for single repo analysis - for command line use *)
240240+let analyze_single_repo repo_path =
241241+ Eio_main.run @@ fun eio_env ->
242242+ Switch.run @@ fun sw ->
243243+ let success = analyze_single_repo_with_env ~eio_env ~sw repo_path in
244244+ if not success then exit 1
245245+246246+(* Parallel analysis of multiple repos from resources.json *)
247247+let analyze_all_repos ?(max_parallel=8) () =
248248+ let resources_path = "tools/resources.json" in
249249+250250+ if not (Sys.file_exists resources_path) then (
251251+ Log.err (fun m -> m "Resources file not found: %s" resources_path);
252252+ exit 1
253253+ );
254254+255255+ let all_repos = parse_resources_json resources_path in
256256+ Log.info (fun m -> m "Found %d total repositories in resources.json" (List.length all_repos));
257257+258258+ (* Filter to only repos that don't have analysis yet *)
259259+ let repos_to_analyze =
260260+ List.filter (fun (_lang_name, lang_dir, repo_name) ->
261261+ let repo_dir = Printf.sprintf "third_party/%s/%s" lang_dir repo_name in
262262+ let exists = analysis_exists repo_dir in
263263+ let dir_exists = Sys.file_exists repo_dir && Sys.is_directory repo_dir in
264264+ if exists then
265265+ Log.info (fun m -> m "Skipping %s (analysis already exists)" repo_dir)
266266+ else if not dir_exists then
267267+ Log.info (fun m -> m "Skipping %s (directory not found)" repo_dir)
268268+ else
269269+ Log.info (fun m -> m "Will analyze: %s" repo_dir);
270270+ (not exists) && dir_exists
271271+ ) all_repos
272272+ in
273273+274274+ let count = List.length repos_to_analyze in
275275+ Log.info (fun m -> m "Will analyze %d repositories (max %d in parallel)" count max_parallel);
276276+277277+ if count = 0 then (
278278+ Log.info (fun m -> m "No repositories need analysis. All done!");
279279+ exit 0
280280+ );
281281+282282+ Eio_main.run @@ fun eio_env ->
283283+ Switch.run @@ fun _sw ->
284284+285285+ (* Run analyses in parallel with max_fibers limiting *)
286286+ let final_results =
287287+ Fiber.List.map ~max_fibers:max_parallel (fun (_lang_name, lang_dir, repo_name) ->
288288+ let repo_dir = Printf.sprintf "third_party/%s/%s" lang_dir repo_name in
289289+ let result =
290290+ try
291291+ Switch.run @@ fun analysis_sw ->
292292+ analyze_single_repo_with_env ~eio_env ~sw:analysis_sw repo_dir
293293+ with exn ->
294294+ Log.err (fun m -> m "Exception analyzing %s: %s" repo_dir (Printexc.to_string exn));
295295+ false
296296+ in
297297+ (repo_dir, result)
298298+ ) repos_to_analyze
299299+ in
300300+301301+ (* Report summary *)
302302+ let successful = List.filter snd final_results in
303303+ let failed = List.filter (fun (_, success) -> not success) final_results in
304304+305305+ Log.info (fun m -> m "");
306306+ Log.info (fun m -> m "=== Analysis Summary ===");
307307+ Log.info (fun m -> m "Total: %d" count);
308308+ Log.info (fun m -> m "Successful: %d" (List.length successful));
309309+ Log.info (fun m -> m "Failed: %d" (List.length failed));
310310+311311+ if List.length failed > 0 then (
312312+ Log.info (fun m -> m "");
313313+ Log.info (fun m -> m "Failed repositories:");
314314+ List.iter (fun (repo, _) -> Log.info (fun m -> m " - %s" repo)) failed;
188315 exit 1
189316 )
190317191318(* Command-line interface *)
192319let repo_path_arg =
193193- let doc = "Path to repository directory to analyze (e.g., third_party/rust/reqwest)" in
194194- Cmdliner.Arg.(required & pos 0 (some string) None & info [] ~docv:"REPO_PATH" ~doc)
320320+ let doc = "Path to repository directory to analyze (e.g., third_party/rust/reqwest). \
321321+ Not required when using --all." in
322322+ Cmdliner.Arg.(value & pos 0 (some string) None & info [] ~docv:"REPO_PATH" ~doc)
323323+324324+let all_flag =
325325+ let doc = "Analyze all repositories from tools/resources.json that don't have \
326326+ analysis output yet. Runs in parallel." in
327327+ Cmdliner.Arg.(value & flag & info ["all"; "a"] ~doc)
328328+329329+let max_parallel_arg =
330330+ let doc = "Maximum number of parallel analysis sessions (default: 8). \
331331+ Only used with --all." in
332332+ Cmdliner.Arg.(value & opt int 8 & info ["max-parallel"; "j"] ~docv:"N" ~doc)
195333196334let setup_log style_renderer level =
197335 Fmt_tty.setup_std_outputs ?style_renderer ();
···202340let setup_log_t =
203341 Cmdliner.Term.(const setup_log $ Fmt_cli.style_renderer () $ Logs_cli.level ())
204342205205-let run repo_path =
206206- analyze_single_repo repo_path
343343+let run all_mode max_parallel repo_path =
344344+ if all_mode then
345345+ analyze_all_repos ~max_parallel ()
346346+ else
347347+ match repo_path with
348348+ | Some path -> analyze_single_repo path
349349+ | None ->
350350+ prerr_endline "Error: REPO_PATH required unless --all is specified";
351351+ exit 1
207352208353let () =
209209- let combined_term = Cmdliner.Term.(const (fun () repo_path -> run repo_path)
210210- $ setup_log_t $ repo_path_arg) in
211211- let combined_info = Cmdliner.Cmd.info "analyze_repos" ~version:"1.0"
212212- ~doc:"Analyze a single HTTP client repository and generate recommendations.
213213-Output is saved to <repo-path>.json (e.g., third_party/rust/reqwest.json)." in
354354+ let combined_term = Cmdliner.Term.(const (fun () all max_parallel repo_path ->
355355+ run all max_parallel repo_path)
356356+ $ setup_log_t $ all_flag $ max_parallel_arg $ repo_path_arg) in
357357+ let combined_info = Cmdliner.Cmd.info "analyze_repos" ~version:"2.0"
358358+ ~doc:"Analyze HTTP client repositories and generate recommendations."
359359+ ~man:[
360360+ `S Cmdliner.Manpage.s_description;
361361+ `P "Analyzes HTTP client libraries from third_party/ and generates \
362362+ structured recommendations for improving the OCaml requests library.";
363363+ `P "Two modes are supported:";
364364+ `P "1. Single repository mode (default): Analyze one specific repository \
365365+ by providing its path as an argument.";
366366+ `P "2. Batch mode (--all): Analyze all repositories listed in \
367367+ tools/resources.json that don't have analysis output yet, \
368368+ running multiple analyses in parallel.";
369369+ `S Cmdliner.Manpage.s_examples;
370370+ `P "Analyze a single repository:";
371371+ `Pre " $(b,analyze_repos) third_party/php/buzz";
372372+ `P "Analyze all repositories with default parallelism (8):";
373373+ `Pre " $(b,analyze_repos) --all";
374374+ `P "Analyze all repositories with custom parallelism:";
375375+ `Pre " $(b,analyze_repos) --all --max-parallel 4";
376376+ ] in
214377 exit (Cmdliner.Cmd.eval (Cmdliner.Cmd.v combined_info combined_term))