The unpac monorepo manager self-hosting as a monorepo using unpac

Merge pull request #121 from tbrk/fashwo

FASH algorithm for minimal feedback arc sets

authored by

Jean-Christophe Filliatre and committed by
GitHub
ef366e7a 496c0d9a

+513
+319
src/cycles.ml
··· 1 + 2 + type weight = 3 + | Normal of int 4 + | Obligatory of int 5 + 6 + module Fashwo 7 + (GB : sig 8 + include Builder.S 9 + val weight : G.edge -> weight 10 + end) 11 + = 12 + struct 13 + module G = GB.G 14 + 15 + exception Stuck of G.vertex list 16 + 17 + module IM = Map.Make (Int) 18 + module VM = Map.Make (G.V) 19 + module VS = Set.Make (G.V) 20 + 21 + (* The algorithm of Eades, Lin, and Smyth (ELS 1993) works by "scheduling" 22 + vertexes onto two lists called s1 and s2. At each iteration a vertex is 23 + chosen, scheduled, and removed from the graph. Arcs from a newly scheduled 24 + node toward nodes already in s1 are classified as "leftward"; they are 25 + included in the generated feedback arc set. "Rightward" arcs, to vertexes 26 + in s2 or that have not yet been scheduled, are not included in the 27 + feedback arc set. The algorithm tries to maximize the number of rightward 28 + arcs and thereby minimize the number of leftward ones. Source vertexes, 29 + those with no incoming arcs in the current graph (i.e., because all its 30 + predecssors have already been scheduled), are appended directly onto s1 31 + and do not induce any feedback arcs. Sink vertexes are consed directly 32 + onto s2 and do not induce any feedback arcs. Otherwise, the algorithm 33 + chooses a vertex to maximize the difference between the number of 34 + outgoing arcs and the number of incoming ones: the (remaining) incoming 35 + arcs must be included in the feedback arc set. The difference between the 36 + number of rightward arcs (no cost) and the number of leftward arcs 37 + (feedback arcs) is called "delta". The algorithm is implemented 38 + efficiently by using a data structure to group unscheduled vertexes 39 + according to their delta value. When more than one vertex has the maximum 40 + delta value, the original algorithm makes an arbitrary choice. The 41 + algorithm of Eades and Lin (EL 1995) makes the choice using a heuristic 42 + that maximizes the difference between incoming arcs and outgoing ones in 43 + the vertexes that remain at the end of the iteration as such vertexes are 44 + the most "unbalanced" and thus less likely to contribute to the feedback 45 + arc set in future iterations. The EL 1995 algorithm includes a further 46 + refinement to ignore chains of vertexes when looking for unbalanced ones, 47 + since such chains do not contribute feedback arcs. 48 + 49 + Since we just want to produce a list of feedback arcs, we don't bother 50 + tracking order in s1, and we only track s2 to properly handle the 51 + preprocessing optimization that removes two cycles. We maintain lists of 52 + source and sink vertexes (scheduled but not yet removed from the graph) 53 + and a map from delta values to sets of vertexes. As the delta value map 54 + caches the state of the graph, it must be updated when the a vertex is 55 + scheduled and removed from the graph. Additionally, we remember which two 56 + cycles were removed during preprocessing and ensure that one of their 57 + arcs is included in the feedback arc set, depending on whichever of the 58 + two interlinked vertexes is scheduled first. *) 59 + 60 + type t = { 61 + s1 : VS.t; (* vertexes placed "at left" *) 62 + s2 : VS.t; (* vertexes placed "at right"; 63 + only needed to optimize for two_cycles *) 64 + sources : VS.t; (* vertexes with no incoming arcs *) 65 + sinks : VS.t; (* vertexes with no outgoing arcs *) 66 + delta_bins : VS.t IM.t; (* group vertexes by delta value *) 67 + vertex_bin : int VM.t; (* map each vertex to its bin *) 68 + two_cycles : G.edge list VM.t; (* edges for 2-cycles *) 69 + fas : G.edge list; (* current feedback arc set *) 70 + } 71 + 72 + let empty = { 73 + s1 = VS.empty; 74 + s2 = VS.empty; 75 + sources = VS.empty; 76 + sinks = VS.empty; 77 + delta_bins = IM.empty; 78 + vertex_bin = VM.empty; 79 + two_cycles = VM.empty; 80 + fas = []; 81 + } 82 + 83 + let add_to_bin delta v ({ delta_bins; vertex_bin; _ } as st) = 84 + { st with delta_bins = 85 + IM.update delta (function None -> Some (VS.singleton v) 86 + | Some vs -> Some (VS.add v vs)) 87 + delta_bins; 88 + vertex_bin = VM.add v delta vertex_bin } 89 + 90 + let remove_from_bin v ({ delta_bins; vertex_bin; _ } as st) = 91 + match VM.find_opt v vertex_bin with 92 + | None -> st 93 + | Some delta -> 94 + { st with delta_bins = 95 + IM.update delta (function None -> None 96 + | Some vs -> Some (VS.remove v vs)) 97 + delta_bins; 98 + vertex_bin = VM.remove v vertex_bin } 99 + 100 + (* Calculate the sums of incoming and outgoing edge weights, ignoring 101 + obligatory arcs; they must be respected so their weight is irrelevant. *) 102 + let weights g v = 103 + let add_pweight e (s, b) = 104 + match GB.weight e with Obligatory _ -> (s, true) | Normal w -> (s + w, b) 105 + in 106 + let add_sweight e s = 107 + match GB.weight e with Obligatory w -> s + w | Normal w -> s + w 108 + in 109 + let inw, blocked = G.fold_pred_e add_pweight g v (0, false) in 110 + let outw = G.fold_succ_e add_sweight g v 0 in 111 + blocked, inw, outw 112 + 113 + let add_vertex g v delta ({ sources; sinks; _ } as st) = 114 + let ind, outd = G.in_degree g v, G.out_degree g v in 115 + if ind = 0 then { st with sources = VS.add v sources } 116 + else if outd = 0 then { st with sinks = VS.add v sinks } 117 + else add_to_bin delta v st 118 + 119 + (* Initialize the state for a given vertex. *) 120 + let init_vertex g v st = 121 + let blocked, inw, outw = weights g v in 122 + if blocked then st else add_vertex g v (outw - inw) st 123 + 124 + let init g = G.fold_vertex (init_vertex g) g empty 125 + 126 + (* Move v from the bin for delta to sources, sinks, or another bin. *) 127 + let shift_bins g v delta' st0 = add_vertex g v delta' (remove_from_bin v st0) 128 + 129 + (* Before removing v from the graph, update the state of its sucessors. *) 130 + let update_removed_succ g' e st = 131 + let v = G.E.dst e in 132 + let still_blocked, inw', outw' = weights g' v in 133 + if still_blocked then st else shift_bins g' v (outw' - inw') st 134 + 135 + (* Before removing v from the graph, update the state of its predecessors. *) 136 + let update_removed_pred g' e ({ sinks; _ } as st) = 137 + let v = G.E.src e in 138 + let blocked, inw', outw' = weights g' v in 139 + match GB.weight e with 140 + | Obligatory _ -> 141 + if blocked || outw' > 0 then st 142 + else (* not blocked && outw' = 0 *) 143 + { (remove_from_bin v st) with sinks = VS.add v sinks } 144 + | Normal _ -> 145 + if blocked then st else shift_bins g' v (outw' - inw') st 146 + 147 + (* Remove a vertex from the graph and update the data structures for its 148 + succesors and predecessors. *) 149 + let remove_vertex g v st = 150 + let g' = GB.remove_vertex g v in 151 + (g', G.fold_succ_e (update_removed_succ g') g v st 152 + |> G.fold_pred_e (update_removed_pred g') g v) 153 + 154 + (* The original article proposes preprocessing the graph to condense long 155 + chains of vertexes. This works together with the heuristic for generating 156 + unbalanced vertexes, since the intermediate nodes on the chain do not 157 + contribute any leftward arcs (when the last vertex is removed, they 158 + become a sequence of sinks). Using such a preprocessing step with 159 + weighted edges risks removing good feedback arcs, i.e., those with a big 160 + difference between outgoing and incoming weights. That is why here we 161 + use on-the-fly condensation, even if there is a risk of recomputing the 162 + same result several times. *) 163 + let rec condense w g v = 164 + if G.out_degree g v = 1 then 165 + match G.pred g v with 166 + | [u] when not (G.V.equal u w) -> condense w g u 167 + | _ -> v 168 + else v 169 + 170 + (* Find the vertex v that has the most "unbalanced" predecessor u. Most 171 + unbalanced means the biggest difference between the input weights and 172 + output weights. Skip any vertex with an incoming obligatory arc. *) 173 + let takemax g v imax = 174 + let check_edge e max = (* check u -> v *) 175 + let u_blocked, u_inw, u_outw = 176 + weights g (condense (G.E.dst e) g (G.E.src e)) in 177 + let u_w = u_inw - u_outw in 178 + match max with 179 + | Some (None, _) 180 + | None -> Some ((if u_blocked then None else Some u_w), v) 181 + | Some (Some x_w, _) when u_w > x_w -> Some (Some u_w, v) 182 + | _ -> max 183 + in 184 + G.fold_pred_e check_edge g v imax 185 + 186 + (* Look for the vertex with the highest delta value that is not the target 187 + of an obligatory arc. Use the "unbalanced" heuristic impllemented in 188 + [takemax] to discriminate between competing possibilities. If a vertex 189 + is found, remove it from the returned delta bins. *) 190 + let max_from_deltas g ({ delta_bins; _ } as st) = 191 + let rec f = function 192 + | Seq.Nil -> None 193 + | Seq.Cons ((_, dbin), tl) -> 194 + (match VS.fold (takemax g) dbin None with 195 + | None -> f (tl ()) 196 + | Some (_, v) -> Some (v, remove_from_bin v st)) 197 + in 198 + f (IM.to_rev_seq delta_bins ()) 199 + 200 + (* Include any leftward arcs due to the two-cycles that were removed by 201 + preprocessing. *) 202 + let add_from_two_cycles s1 s2 two_cycles v fas = 203 + let bf es b = if G.V.equal (G.E.dst b) v then b::es else es in 204 + let f es e = 205 + let w = G.E.dst e in 206 + if VS.mem w s1 then e::es 207 + else if VS.mem w s2 then 208 + (* the two-cycle partner has already been scheduled as sink, so 209 + the feedback edges come from it. *) 210 + match VM.find_opt w two_cycles with 211 + | None -> es 212 + | Some bs -> List.fold_left bf es bs 213 + else es in 214 + match VM.find_opt v two_cycles with 215 + | None -> fas 216 + | Some es -> List.fold_left f fas es 217 + 218 + (* Shift a given vertex onto s1, and add any leftward arcs to the feedback 219 + arc set. *) 220 + let schedule_vertex g (v, ({ s1; s2; fas; two_cycles; _ } as st)) = 221 + let add_to_fas e es = if VS.mem (G.E.src e) s1 then es else e::es in 222 + (v, { st with s1 = VS.add v s1; 223 + fas = G.fold_pred_e add_to_fas g v fas 224 + |> add_from_two_cycles s1 s2 two_cycles v }) 225 + 226 + (* Take the next available vertex from, in order, sources, sinks, or the 227 + highset possible delta bin. *) 228 + let choose_vertex g ({ s1; s2; sources; sinks; two_cycles; fas; _ } as st0) = 229 + match VS.choose_opt sources with 230 + | Some v -> 231 + Some (v, { st0 with sources = VS.remove v sources; 232 + sinks = VS.remove v sinks; 233 + s1 = VS.add v s1; 234 + fas = add_from_two_cycles s1 s2 two_cycles v fas }) 235 + | None -> 236 + (match VS.choose_opt sinks with 237 + | Some v -> 238 + Some (v, { st0 with sinks = VS.remove v sinks; 239 + s2 = VS.add v s2; 240 + fas = add_from_two_cycles s1 s2 two_cycles v fas }) 241 + | None -> Option.map (schedule_vertex g) (max_from_deltas g st0)) 242 + 243 + let add_two_cycle_edge two_cycles e = 244 + VM.update (G.E.src e) (function None -> Some [e] 245 + | Some es -> Some (e :: es)) two_cycles 246 + 247 + let same_weight w e = 248 + match GB.weight e with 249 + | Obligatory _ -> false 250 + | Normal w' -> w' = w 251 + 252 + (* For every pair of distinct vertexes A and B linked to each other by 253 + edges A -ab-> B and B -ba-> A with the same weight, update the mapping 254 + by linking A to ab, and B to ba, and remove the edges from the graph. 255 + When A is scheduled, if B is already in s1 then the edge ab is a 256 + feedback arc, and similarly for B and ba. The principle is that there 257 + will be a feedback arc regardless of whether A is "scheduled" before B or 258 + vice versa, therefore such cycles should not constrain vertex choices. *) 259 + let remove_two_cycles g0 = 260 + let f e ((g, cycles) as unchanged) = 261 + match GB.weight e with 262 + | Obligatory _ -> unchanged 263 + | Normal w -> 264 + if List.length (G.find_all_edges g0 (G.E.src e) (G.E.dst e)) > 1 265 + (* invalid for graphs like: { A -1-> B, A -2-> B, B -3-> A *) 266 + then raise Exit 267 + else 268 + let back_edges = 269 + G.find_all_edges g0 (G.E.dst e) (G.E.src e) 270 + |> List.filter (same_weight w) 271 + in 272 + if back_edges = [] then unchanged 273 + else (GB.remove_edge_e g e, 274 + List.fold_left add_two_cycle_edge cycles back_edges) 275 + in 276 + try 277 + G.fold_edges_e f g0 (g0, VM.empty) 278 + with Exit -> (g0, VM.empty) 279 + 280 + (* All self loops must be broken, so just add them straight into the 281 + feedback arc set. *) 282 + let remove_self_loops g0 = 283 + let f v (g, fas) = 284 + let self_loops = G.find_all_edges g0 v v in 285 + (List.fold_left GB.remove_edge_e g self_loops, 286 + List.rev_append self_loops fas) 287 + in 288 + G.fold_vertex f g0 (g0, []) 289 + 290 + (* Remove any arcs between strongly connected components. There can be no 291 + cycles between distinct sccs by definition. *) 292 + module C = Components.Make(G) 293 + module Emap = Gmap.Edge(G)(struct include GB.G include GB end) 294 + 295 + let disconnect_sccs g = 296 + let nsccs, fscc = C.scc g in 297 + let in_same_scc e = 298 + if fscc (G.E.src e) = fscc (G.E.dst e) then Some e else None 299 + in 300 + if nsccs < 2 then g 301 + else Emap.filter_map in_same_scc g 302 + 303 + let feedback_arc_set g0 = 304 + let rec loop (g, st) = 305 + match choose_vertex g st with 306 + | Some (v, st') when G.mem_vertex g v -> loop (remove_vertex g v st') 307 + | Some (_, st') -> loop (g, st') 308 + | None -> 309 + let remaining = IM.fold (Fun.const VS.union) st.delta_bins VS.empty in 310 + if VS.is_empty remaining then st.fas 311 + else raise (Stuck (VS.elements remaining)) 312 + in 313 + let g1 = disconnect_sccs g0 in 314 + let g2, fas = remove_self_loops g1 in 315 + let g3, two_cycles = remove_two_cycles g2 in 316 + loop (g3, { (init g3) with fas; two_cycles }) 317 + 318 + end 319 +
+71
src/cycles.mli
··· 1 + (**************************************************************************) 2 + (* *) 3 + (* Ocamlgraph: a generic graph library for OCaml *) 4 + (* Copyright (C) 2004-2022 *) 5 + (* Sylvain Conchon, Jean-Christophe Filliatre and Julien Signoles *) 6 + (* *) 7 + (* This software is free software; you can redistribute it and/or *) 8 + (* modify it under the terms of the GNU Library General Public *) 9 + (* License version 2.1, with the special exception on linking *) 10 + (* described in file LICENSE. *) 11 + (* *) 12 + (* This software is distributed in the hope that it will be useful, *) 13 + (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 + (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *) 15 + (* *) 16 + (**************************************************************************) 17 + 18 + (** Algorithms related to cycles in directed graphs. *) 19 + 20 + type weight = 21 + | Normal of int 22 + (** Weighted arc that can be included in the feedback set. The 23 + weight must be zero (not normally a good choice) or positive 24 + (1 may be a good choice). *) 25 + | Obligatory of int 26 + (** Obligatory arc that cannot be returned in the feedback set. 27 + Set the weight to zero to completely ignore obligatory arcs 28 + when choosing which vertex to schedule. Set it to a positive 29 + value (1 may be a good choice) to adjust the preference for 30 + choosing vertexes that may "unblock" other vertexes by 31 + removing their incoming obligatory arcs. *) 32 + 33 + (** Adaptation of the FASH algorithm of Eades and Lin (1995) to handle 34 + edge weights and obligatory arcs. The algorithm tries to minimize the 35 + total weight of the returned feedback arc set. Obligatory arcs are 36 + respected and never returned in the feedback arc set, an exception is 37 + raised if the obligatory arcs form a cycle. The adapted algorithm is 38 + hereby called FASHWO: “feedback arc set heuristic + weights and 39 + obligations”. 40 + 41 + For a graph G and any one of its feedback arc sets F, the graph G - F is 42 + obviously acyclic. If F is minimal, i.e., adding any of its edges to G - F 43 + would introduce a cycle, then reversing, rather than removing, the 44 + feedback arcs also gives an ayclic graph, [G - F + F^R]. In fact, Eades 45 + and Lin define the feedback arc set as "a set of arcs whose reversal makes 46 + G acyclic". 47 + 48 + @see <https://mathoverflow.net/a/234023/> David Epstein proof about reversed arcs *) 49 + module Fashwo 50 + (GB : sig 51 + include Builder.S 52 + 53 + (** Assign weights to edges. *) 54 + val weight : G.edge -> weight 55 + end) : 56 + sig 57 + (** Raised if cycles remain and all the remaining vertexes are obligatory. 58 + The argument gives the list of remaining vertexes. *) 59 + exception Stuck of GB.G.vertex list 60 + 61 + (** Return a minimal set of arcs whose removal or reversal would make the 62 + given graph acyclic. 63 + 64 + By minimal, we mean that each arc in the returned list must be removed 65 + or reversed, i.e., none are superfluous. Since a heuristic is used, the 66 + returned list may not be a minimum feedback arc set. Finding the {i 67 + minimum feedback arc set}, dually, the {i maximum acyclic subgraph} is 68 + NP-hard for general graphs. *) 69 + val feedback_arc_set : GB.G.t -> GB.G.edge list 70 + end 71 +
+1
src/graph.ml
··· 12 12 module Oper = Oper 13 13 module Components = Components 14 14 module Path = Path 15 + module Cycles = Cycles 15 16 module Nonnegative = Nonnegative 16 17 module Traverse = Traverse 17 18 module Coloring = Coloring
+19
tests/dune
··· 156 156 (modules test_saps) 157 157 (libraries graph)) 158 158 159 + ;; Rules for the test_cycles test 160 + 161 + (rule 162 + (with-stdout-to 163 + test_cycles.output 164 + (run ./test_cycles.exe))) 165 + 166 + (rule 167 + (alias runtest) 168 + (action 169 + (progn 170 + (diff test_cycles.expected test_cycles.output) 171 + (echo "test_cycles: all tests succeeded.\n")))) 172 + 173 + (executable 174 + (name test_cycles) 175 + (modules test_cycles) 176 + (libraries graph)) 177 + 159 178 ;; Rules for the weak topological test 160 179 161 180 (rule
+3
tests/test_cycles.expected
··· 1 + cycles1 = { 3 -> 2 } (cycles to no cycles) 2 + cycles2 = { 7 -> 5, 3 -> 1 } (cycles to no cycles) 3 + cycles3 = { 4 -> 2, 3 -> 1 } (cycles to no cycles)
+100
tests/test_cycles.ml
··· 1 + 2 + (* Test file for Cycles module *) 3 + 4 + open Graph 5 + 6 + module Int = struct 7 + type t = int 8 + let compare = compare 9 + let hash = Hashtbl.hash 10 + let equal = (=) 11 + let default = 0 12 + end 13 + 14 + let pp_comma p () = Format.(pp_print_char p ','; pp_print_space p ()) 15 + let pp_edge p (s, d) = Format.fprintf p "%d -> %d" s d 16 + 17 + module GP = Persistent.Digraph.Concrete(Int) 18 + 19 + module GPDFS = Traverse.Dfs (GP) 20 + 21 + let pp_has_cycles p g = 22 + if GPDFS.has_cycle g 23 + then Format.pp_print_string p "cycles" 24 + else Format.pp_print_string p "no cycles" 25 + 26 + module FW = Cycles.Fashwo(struct 27 + include Builder.P(GP) 28 + let weight _ = Cycles.Normal 1 29 + end) 30 + 31 + (* Eades and Linh, "A Heuristic for the Feedback Arc Set Problem", Fig. 1 *) 32 + let g1 = 33 + List.fold_left (fun g (s, d) -> GP.add_edge g s d) GP.empty 34 + [ (1, 4); 35 + (1, 3); 36 + (2, 1); 37 + (2, 4); 38 + (3, 2); 39 + (4, 3); 40 + ] 41 + let cycles1 = FW.feedback_arc_set g1 42 + let g1' = List.fold_left (fun g (s, d) -> GP.remove_edge g s d) g1 cycles1 43 + 44 + let () = 45 + Format.(printf "cycles1 = @[<hv 2>{ %a }@] (%a to %a)@." 46 + (pp_print_list ~pp_sep:pp_comma pp_edge) cycles1 47 + pp_has_cycles g1 48 + pp_has_cycles g1') 49 + 50 + (* Eades and Linh, "A Heuristic for the Feedback Arc Set Problem", Fig. 5 *) 51 + let g2 = 52 + List.fold_left (fun g (s, d) -> GP.add_edge g s d) GP.empty 53 + [ (1, 2); 54 + (1, 4); 55 + (2, 3); 56 + (2, 4); 57 + (3, 1); 58 + (4, 8); 59 + (5, 3); 60 + (5, 6); 61 + (6, 7); 62 + (7, 5); 63 + (8, 6); 64 + (8, 7); 65 + ] 66 + let cycles2 = FW.feedback_arc_set g2 67 + let g2' = List.fold_left 68 + (fun g (s, d) -> GP.add_edge (GP.remove_edge g s d) d s) 69 + g2 cycles2 70 + 71 + let () = 72 + Format.(printf "cycles2 = @[<hv 2>{ %a }@] (%a to %a)@." 73 + (pp_print_list ~pp_sep:pp_comma pp_edge) cycles2 74 + pp_has_cycles g2 75 + pp_has_cycles g2') 76 + 77 + (* Eades and Linh, "A Heuristic for the Feedback Arc Set Problem", Fig. 6 *) 78 + let g3 = 79 + List.fold_left (fun g (s, d) -> GP.add_edge g s d) GP.empty 80 + [ (1, 2); 81 + (1, 5); 82 + (2, 6); 83 + (3, 1); 84 + (4, 2); 85 + (4, 3); 86 + (5, 3); 87 + (5, 6); 88 + (6, 4); 89 + ] 90 + let cycles3 = FW.feedback_arc_set g3 91 + let g3' = List.fold_left 92 + (fun g (s, d) -> GP.add_edge (GP.remove_edge g s d) d s) 93 + g3 cycles3 94 + 95 + let () = 96 + Format.(printf "cycles3 = @[<hv 2>{ %a }@] (%a to %a)@." 97 + (pp_print_list ~pp_sep:pp_comma pp_edge) cycles3 98 + pp_has_cycles g3 99 + pp_has_cycles g3') 100 +