Monorepo management for opam overlays

monopam: replace git subtree push with fast diff-based sync

git subtree push is slow because it walks the entire repository history
to reconstruct commits. For large repos, this is O(commits) which can
take minutes.

Replace with a diff-based approach that is O(changed files):
1. Generate diff between monorepo subtree and checkout using
git diff --no-index
2. Apply the diff to checkout using git apply
3. Stage and commit the changes

Also parallelize remote push using Eio.Fiber.List.map with max_fibers:2.

+128 -28
+66
lib/git.ml
··· 397 397 match run_git_ok ~proc ~cwd [ "rev-list"; "--count"; base ^ ".." ^ head ] with 398 398 | Error _ -> 0 399 399 | Ok s -> ( try int_of_string (String.trim s) with _ -> 0) 400 + 401 + (** {1 Diff Operations} *) 402 + 403 + let diff_trees ~proc ~fs ~source ~target = 404 + (* Use git diff --no-index to compare two directory trees. 405 + This works even if neither directory is a git repo. 406 + Exit code 0 = no diff, exit code 1 = diff found, other = error *) 407 + let cwd = path_to_eio ~fs (Fpath.v ".") in 408 + let source_str = Fpath.to_string source in 409 + let target_str = Fpath.to_string target in 410 + let result = 411 + run_git ~proc ~cwd 412 + [ 413 + "diff"; 414 + "--no-index"; 415 + "--binary"; 416 + (* Handle binary files *) 417 + "--no-color"; 418 + target_str; 419 + (* old = checkout *) 420 + source_str (* new = monorepo subtree *); 421 + ] 422 + in 423 + match result.exit_code with 424 + | 0 -> 425 + (* No differences *) 426 + Ok "" 427 + | 1 -> 428 + (* Differences found - this is success for diff *) 429 + Ok result.stdout 430 + | _ -> 431 + (* Actual error *) 432 + Error 433 + (Command_failed 434 + (String.concat " " [ "git"; "diff"; "--no-index" ], result)) 435 + 436 + let apply_diff ~proc ~fs ~cwd ~diff = 437 + if String.length diff = 0 then Ok () 438 + else 439 + let cwd_eio = path_to_eio ~fs cwd in 440 + (* Apply the diff using git apply. 441 + We need to handle the path rewriting since git diff --no-index 442 + uses absolute or relative paths as prefixes. *) 443 + let cmd = [ "apply"; "--binary"; "-p1"; "-" ] in 444 + let buf_stdout = Buffer.create 256 in 445 + let buf_stderr = Buffer.create 256 in 446 + Eio.Switch.run @@ fun sw -> 447 + let child = 448 + Eio.Process.spawn proc ~sw ~cwd:cwd_eio 449 + ~stdin:(Eio.Flow.string_source diff) 450 + ~stdout:(Eio.Flow.buffer_sink buf_stdout) 451 + ~stderr:(Eio.Flow.buffer_sink buf_stderr) 452 + ("git" :: cmd) 453 + in 454 + let exit_status = Eio.Process.await child in 455 + match exit_status with 456 + | `Exited 0 -> Ok () 457 + | `Exited n | `Signaled n -> 458 + Error 459 + (Command_failed 460 + ( String.concat " " ("git" :: cmd), 461 + { 462 + exit_code = n; 463 + stdout = Buffer.contents buf_stdout; 464 + stderr = Buffer.contents buf_stderr; 465 + } ))
+28
lib/git.mli
··· 443 443 int 444 444 (** [count_commits_between ~proc ~fs ~repo ~base ~head ()] counts the number of 445 445 commits between base and head (exclusive of base, inclusive of head). *) 446 + 447 + (** {1 Diff Operations} *) 448 + 449 + val diff_trees : 450 + proc:_ Eio.Process.mgr -> 451 + fs:Eio.Fs.dir_ty Eio.Path.t -> 452 + source:Fpath.t -> 453 + target:Fpath.t -> 454 + (string, error) result 455 + (** [diff_trees ~proc ~fs ~source ~target] generates a diff between two 456 + directory trees using [git diff --no-index]. 457 + 458 + Returns [Ok ""] if the trees are identical, [Ok diff] with the diff content 459 + if they differ, or [Error] if the diff command fails. 460 + 461 + @param source The source directory (typically the monorepo subtree) 462 + @param target The target directory (typically the checkout) *) 463 + 464 + val apply_diff : 465 + proc:_ Eio.Process.mgr -> 466 + fs:Eio.Fs.dir_ty Eio.Path.t -> 467 + cwd:Fpath.t -> 468 + diff:string -> 469 + (unit, error) result 470 + (** [apply_diff ~proc ~fs ~cwd ~diff] applies a diff to the directory at [cwd]. 471 + 472 + Uses [git apply] to apply the diff. Returns [Ok ()] if the diff was applied 473 + successfully or was empty, [Error] if the apply failed. *)
+34 -28
lib/monopam.ml
··· 952 952 let prefix = Package.subtree_prefix pkg in 953 953 let checkouts_root = Config.Paths.checkouts config in 954 954 let checkout_dir = Package.checkout_dir ~checkouts_root pkg in 955 - let branch = get_branch ~config pkg in 956 - let sync_branch = "monopam-sync" in 957 955 if not (Git.Subtree.exists ~fs ~repo:monorepo ~prefix) then begin 958 956 Log.debug (fun m -> m "Subtree %s not in monorepo, skipping" prefix); 959 957 Ok () ··· 973 971 end 974 972 else Ok () 975 973 in 976 - let monorepo_eio = Eio.Path.(fs / Fpath.to_string monorepo) in 977 - let checkout_path = Fpath.to_string checkout_dir in 978 - (* Push subtree to a sync branch (avoids "branch is checked out" error) *) 979 - Log.info (fun m -> m "Pushing subtree %s to checkout" prefix); 980 - let* _ = 981 - run_git_in ~proc ~cwd:monorepo_eio 982 - [ "subtree"; "push"; "--prefix"; prefix; checkout_path; sync_branch ] 974 + (* Fast path: use diff-based approach instead of git subtree push *) 975 + let subtree_path = Fpath.(monorepo / prefix) in 976 + Log.info (fun m -> m "Comparing %s with checkout" prefix); 977 + let* diff = 978 + Git.diff_trees ~proc ~fs ~source:subtree_path ~target:checkout_dir 983 979 in 984 - (* Merge sync branch into the target branch in checkout *) 985 - Log.debug (fun m -> m "Merging %s into %s" sync_branch branch); 986 - let* _ = 987 - run_git_in ~proc ~cwd:checkout_eio [ "merge"; "--ff-only"; sync_branch ] 988 - in 989 - (* Delete the sync branch *) 990 - Log.debug (fun m -> m "Cleaning up %s branch" sync_branch); 991 - ignore (run_git_in ~proc ~cwd:checkout_eio [ "branch"; "-d"; sync_branch ]); 992 - Ok () 980 + if String.length diff = 0 then begin 981 + Log.debug (fun m -> m "No changes in %s" prefix); 982 + Ok () 983 + end 984 + else begin 985 + (* Apply diff to checkout *) 986 + Log.info (fun m -> m "Applying changes to %s checkout" prefix); 987 + let* () = Git.apply_diff ~proc ~fs ~cwd:checkout_dir ~diff in 988 + (* Stage all changes *) 989 + let* _ = run_git_in ~proc ~cwd:checkout_eio [ "add"; "-A" ] in 990 + (* Commit with a descriptive message *) 991 + let repo_name = Package.repo_name pkg in 992 + let message = Printf.sprintf "Sync %s from monorepo" repo_name in 993 + let* _ = run_git_in ~proc ~cwd:checkout_eio [ "commit"; "-m"; message ] in 994 + Ok () 995 + end 993 996 end 994 997 995 998 let push ~proc ~fs ~config ?package ?(upstream = false) () = ··· 1034 1037 | Ok pushed_repos -> 1035 1038 if upstream && pushed_repos <> [] then begin 1036 1039 Log.info (fun m -> 1037 - m "Pushing %d repos to upstream" (List.length pushed_repos)); 1040 + m "Pushing %d repos to upstream (parallel)" 1041 + (List.length pushed_repos)); 1038 1042 let checkouts_root = Config.Paths.checkouts config in 1039 - let total = List.length pushed_repos in 1040 - let rec push_upstream i = function 1041 - | [] -> Ok () 1042 - | pkg :: rest -> ( 1043 + (* Push to remotes in parallel, limited to 2 concurrent pushes *) 1044 + let push_results = 1045 + Eio.Fiber.List.map ~max_fibers:2 1046 + (fun pkg -> 1043 1047 let checkout_dir = 1044 1048 Package.checkout_dir ~checkouts_root pkg 1045 1049 in 1046 1050 let branch = get_branch ~config pkg in 1047 - (* Configure push URL (rewriting GitHub/tangled URLs to SSH) *) 1048 1051 let push_url = url_to_push_url (Package.dev_repo pkg) in 1049 1052 Log.info (fun m -> 1050 - m "[%d/%d] Pushing %s to %s" i total 1051 - (Package.repo_name pkg) push_url); 1053 + m "Pushing %s to %s" (Package.repo_name pkg) push_url); 1052 1054 (* Set the push URL for origin *) 1053 1055 (match 1054 1056 Git.set_push_url ~proc ~fs:fs_t ~url:push_url ··· 1065 1067 Log.app (fun m -> 1066 1068 m " Pushed %s to %s (%s)" (Package.repo_name pkg) 1067 1069 push_url branch); 1068 - push_upstream (i + 1) rest 1070 + Ok () 1069 1071 | Error e -> Error (Git_error e)) 1072 + pushed_repos 1070 1073 in 1071 - push_upstream 1 pushed_repos 1074 + (* Return first error if any *) 1075 + match List.find_opt Result.is_error push_results with 1076 + | Some (Error e) -> Error e 1077 + | _ -> Ok () 1072 1078 end 1073 1079 else Ok () 1074 1080 end