refine · anil.recoil.org/ocaml-requests@129f8fd

anil.recoil.org / ocaml-requests

fork atom

A batteries included HTTP/1.1 client in OCaml

fork atom

refine

anil.recoil.org 2 months ago 129f8fd0 dad2f1bc

+183 -23

2 changed files

expand all

unified split

.gitignore

tools

analyze_repos.ml

-3

.gitignore

··· 3 3 *.install 4 4 *.merlin 5 5 6 - # Third-party sources (fetch locally with opam source) 7 - third_party/ 8 - 9 6 # Editor and OS files 10 7 .DS_Store 11 8 *.swp

+183 -20

tools/analyze_repos.ml

··· 3 3 let src = Logs.Src.create "analyze_repos" ~doc:"Analyze HTTP client repos" 4 4 module Log = (val Logs.src_log src : Logs.LOG) 5 5 6 + (* Helper to normalize language names for directory structure *) 7 + let normalize_language lang = 8 + String.lowercase_ascii lang 9 + |> String.map (function '/' -> '-' | c -> c) 10 + 11 + (* Helper to extract repo name from "owner/repo" format *) 12 + let extract_repo_name repo_path = 13 + match String.rindex_opt repo_path '/' with 14 + | Some idx -> String.sub repo_path (idx + 1) (String.length repo_path - idx - 1) 15 + | None -> repo_path 16 + 17 + (* Parse resources.json to get list of repos *) 18 + let parse_resources_json json_path = 19 + Log.info (fun m -> m "Parsing resources.json from %s" json_path); 20 + let ic = open_in json_path in 21 + let content = really_input_string ic (in_channel_length ic) in 22 + close_in ic; 23 + 24 + match Jsont_bytesrw.decode_string Jsont.json content with 25 + | Ok json -> ( 26 + match json with 27 + | Jsont.Object (fields, _) -> 28 + List.fold_left (fun acc ((lang_name, _), lang_repos) -> 29 + let lang_dir = normalize_language lang_name in 30 + match lang_repos with 31 + | Jsont.Array (repos, _) -> 32 + List.fold_left (fun acc repo -> 33 + match repo with 34 + | Jsont.Object (repo_fields, _) -> ( 35 + let repo_path_opt = List.assoc_opt ("repo", Jsont.Meta.none) repo_fields in 36 + match repo_path_opt with 37 + | Some (Jsont.String (repo_path, _)) -> 38 + let repo_name = extract_repo_name repo_path in 39 + let normalized_name = String.lowercase_ascii repo_name in 40 + (lang_name, lang_dir, normalized_name) :: acc 41 + | _ -> acc) 42 + | _ -> acc) acc repos 43 + | _ -> acc) [] fields 44 + | _ -> 45 + Log.err (fun m -> m "Invalid JSON format in resources.json"); 46 + []) 47 + | Error err -> 48 + Log.err (fun m -> m "Failed to parse resources.json: %s" err); 49 + [] 50 + 51 + (* Check if analysis output already exists *) 52 + let analysis_exists repo_dir = 53 + let output_path = Printf.sprintf "%s.json" repo_dir in 54 + Sys.file_exists output_path 55 + 6 56 (* JSON schema for recommendations *) 7 57 let recommendation_schema = 8 58 let meta = Jsont.Meta.none in ··· 89 139 ] 90 140 91 141 (* Analyze a single repository by path *) 92 - let analyze_single_repo repo_path = 142 + let analyze_single_repo_with_env ~eio_env ~sw repo_path = 93 143 Log.info (fun m -> m "Analyzing repository at: %s" repo_path); 94 144 95 145 (* Check if directory exists *) 96 146 if not (Sys.file_exists repo_path && Sys.is_directory repo_path) then ( 97 - Log.err (fun m -> m "Directory not found: %s" repo_path); 98 - exit 1 99 - ); 147 + Log.warn (fun m -> m "Directory not found: %s" repo_path); 148 + false 149 + ) else 100 150 101 151 let output_path = Printf.sprintf "%s.json" repo_path in 102 152 103 153 Log.info (fun m -> m "Output will be saved to: %s" output_path); 104 - 105 - Eio_main.run @@ fun eio_env -> 106 - Switch.run @@ fun sw -> 107 154 108 155 (* Create Claude client with structured output *) 109 156 let output_format = Claude.Proto.Structured_output.of_json_schema recommendation_schema in ··· 181 228 | _ -> ()) 182 229 responses; 183 230 184 - if !success then 185 - Log.info (fun m -> m "Analysis complete!") 186 - else ( 187 - Log.err (fun m -> m "Analysis failed - no recommendations generated"); 231 + if !success then ( 232 + Log.info (fun m -> m "Analysis complete for %s" repo_path); 233 + true 234 + ) else ( 235 + Log.err (fun m -> m "Analysis failed for %s - no recommendations generated" repo_path); 236 + false 237 + ) 238 + 239 + (* Wrapper for single repo analysis - for command line use *) 240 + let analyze_single_repo repo_path = 241 + Eio_main.run @@ fun eio_env -> 242 + Switch.run @@ fun sw -> 243 + let success = analyze_single_repo_with_env ~eio_env ~sw repo_path in 244 + if not success then exit 1 245 + 246 + (* Parallel analysis of multiple repos from resources.json *) 247 + let analyze_all_repos ?(max_parallel=8) () = 248 + let resources_path = "tools/resources.json" in 249 + 250 + if not (Sys.file_exists resources_path) then ( 251 + Log.err (fun m -> m "Resources file not found: %s" resources_path); 252 + exit 1 253 + ); 254 + 255 + let all_repos = parse_resources_json resources_path in 256 + Log.info (fun m -> m "Found %d total repositories in resources.json" (List.length all_repos)); 257 + 258 + (* Filter to only repos that don't have analysis yet *) 259 + let repos_to_analyze = 260 + List.filter (fun (_lang_name, lang_dir, repo_name) -> 261 + let repo_dir = Printf.sprintf "third_party/%s/%s" lang_dir repo_name in 262 + let exists = analysis_exists repo_dir in 263 + let dir_exists = Sys.file_exists repo_dir && Sys.is_directory repo_dir in 264 + if exists then 265 + Log.info (fun m -> m "Skipping %s (analysis already exists)" repo_dir) 266 + else if not dir_exists then 267 + Log.info (fun m -> m "Skipping %s (directory not found)" repo_dir) 268 + else 269 + Log.info (fun m -> m "Will analyze: %s" repo_dir); 270 + (not exists) && dir_exists 271 + ) all_repos 272 + in 273 + 274 + let count = List.length repos_to_analyze in 275 + Log.info (fun m -> m "Will analyze %d repositories (max %d in parallel)" count max_parallel); 276 + 277 + if count = 0 then ( 278 + Log.info (fun m -> m "No repositories need analysis. All done!"); 279 + exit 0 280 + ); 281 + 282 + Eio_main.run @@ fun eio_env -> 283 + Switch.run @@ fun _sw -> 284 + 285 + (* Run analyses in parallel with max_fibers limiting *) 286 + let final_results = 287 + Fiber.List.map ~max_fibers:max_parallel (fun (_lang_name, lang_dir, repo_name) -> 288 + let repo_dir = Printf.sprintf "third_party/%s/%s" lang_dir repo_name in 289 + let result = 290 + try 291 + Switch.run @@ fun analysis_sw -> 292 + analyze_single_repo_with_env ~eio_env ~sw:analysis_sw repo_dir 293 + with exn -> 294 + Log.err (fun m -> m "Exception analyzing %s: %s" repo_dir (Printexc.to_string exn)); 295 + false 296 + in 297 + (repo_dir, result) 298 + ) repos_to_analyze 299 + in 300 + 301 + (* Report summary *) 302 + let successful = List.filter snd final_results in 303 + let failed = List.filter (fun (_, success) -> not success) final_results in 304 + 305 + Log.info (fun m -> m ""); 306 + Log.info (fun m -> m "=== Analysis Summary ==="); 307 + Log.info (fun m -> m "Total: %d" count); 308 + Log.info (fun m -> m "Successful: %d" (List.length successful)); 309 + Log.info (fun m -> m "Failed: %d" (List.length failed)); 310 + 311 + if List.length failed > 0 then ( 312 + Log.info (fun m -> m ""); 313 + Log.info (fun m -> m "Failed repositories:"); 314 + List.iter (fun (repo, _) -> Log.info (fun m -> m " - %s" repo)) failed; 188 315 exit 1 189 316 ) 190 317 191 318 (* Command-line interface *) 192 319 let repo_path_arg = 193 - let doc = "Path to repository directory to analyze (e.g., third_party/rust/reqwest)" in 194 - Cmdliner.Arg.(required & pos 0 (some string) None & info [] ~docv:"REPO_PATH" ~doc) 320 + let doc = "Path to repository directory to analyze (e.g., third_party/rust/reqwest). \ 321 + Not required when using --all." in 322 + Cmdliner.Arg.(value & pos 0 (some string) None & info [] ~docv:"REPO_PATH" ~doc) 323 + 324 + let all_flag = 325 + let doc = "Analyze all repositories from tools/resources.json that don't have \ 326 + analysis output yet. Runs in parallel." in 327 + Cmdliner.Arg.(value & flag & info ["all"; "a"] ~doc) 328 + 329 + let max_parallel_arg = 330 + let doc = "Maximum number of parallel analysis sessions (default: 8). \ 331 + Only used with --all." in 332 + Cmdliner.Arg.(value & opt int 8 & info ["max-parallel"; "j"] ~docv:"N" ~doc) 195 333 196 334 let setup_log style_renderer level = 197 335 Fmt_tty.setup_std_outputs ?style_renderer (); ··· 202 340 let setup_log_t = 203 341 Cmdliner.Term.(const setup_log $ Fmt_cli.style_renderer () $ Logs_cli.level ()) 204 342 205 - let run repo_path = 206 - analyze_single_repo repo_path 343 + let run all_mode max_parallel repo_path = 344 + if all_mode then 345 + analyze_all_repos ~max_parallel () 346 + else 347 + match repo_path with 348 + | Some path -> analyze_single_repo path 349 + | None -> 350 + prerr_endline "Error: REPO_PATH required unless --all is specified"; 351 + exit 1 207 352 208 353 let () = 209 - let combined_term = Cmdliner.Term.(const (fun () repo_path -> run repo_path) 210 - $ setup_log_t $ repo_path_arg) in 211 - let combined_info = Cmdliner.Cmd.info "analyze_repos" ~version:"1.0" 212 - ~doc:"Analyze a single HTTP client repository and generate recommendations. 213 - Output is saved to <repo-path>.json (e.g., third_party/rust/reqwest.json)." in 354 + let combined_term = Cmdliner.Term.(const (fun () all max_parallel repo_path -> 355 + run all max_parallel repo_path) 356 + $ setup_log_t $ all_flag $ max_parallel_arg $ repo_path_arg) in 357 + let combined_info = Cmdliner.Cmd.info "analyze_repos" ~version:"2.0" 358 + ~doc:"Analyze HTTP client repositories and generate recommendations." 359 + ~man:[ 360 + `S Cmdliner.Manpage.s_description; 361 + `P "Analyzes HTTP client libraries from third_party/ and generates \ 362 + structured recommendations for improving the OCaml requests library."; 363 + `P "Two modes are supported:"; 364 + `P "1. Single repository mode (default): Analyze one specific repository \ 365 + by providing its path as an argument."; 366 + `P "2. Batch mode (--all): Analyze all repositories listed in \ 367 + tools/resources.json that don't have analysis output yet, \ 368 + running multiple analyses in parallel."; 369 + `S Cmdliner.Manpage.s_examples; 370 + `P "Analyze a single repository:"; 371 + `Pre " $(b,analyze_repos) third_party/php/buzz"; 372 + `P "Analyze all repositories with default parallelism (8):"; 373 + `Pre " $(b,analyze_repos) --all"; 374 + `P "Analyze all repositories with custom parallelism:"; 375 + `Pre " $(b,analyze_repos) --all --max-parallel 4"; 376 + ] in 214 377 exit (Cmdliner.Cmd.eval (Cmdliner.Cmd.v combined_info combined_term))