···11+22+type weight =
33+ | Normal of int
44+ | Obligatory of int
55+66+module Fashwo
77+ (GB : sig
88+ include Builder.S
99+ val weight : G.edge -> weight
1010+ end)
1111+=
1212+struct
1313+ module G = GB.G
1414+1515+ exception Stuck of G.vertex list
1616+1717+ module IM = Map.Make (Int)
1818+ module VM = Map.Make (G.V)
1919+ module VS = Set.Make (G.V)
2020+2121+ (* The algorithm of Eades, Lin, and Smyth (ELS 1993) works by "scheduling"
2222+ vertexes onto two lists called s1 and s2. At each iteration a vertex is
2323+ chosen, scheduled, and removed from the graph. Arcs from a newly scheduled
2424+ node toward nodes already in s1 are classified as "leftward"; they are
2525+ included in the generated feedback arc set. "Rightward" arcs, to vertexes
2626+ in s2 or that have not yet been scheduled, are not included in the
2727+ feedback arc set. The algorithm tries to maximize the number of rightward
2828+ arcs and thereby minimize the number of leftward ones. Source vertexes,
2929+ those with no incoming arcs in the current graph (i.e., because all its
3030+ predecssors have already been scheduled), are appended directly onto s1
3131+ and do not induce any feedback arcs. Sink vertexes are consed directly
3232+ onto s2 and do not induce any feedback arcs. Otherwise, the algorithm
3333+ chooses a vertex to maximize the difference between the number of
3434+ outgoing arcs and the number of incoming ones: the (remaining) incoming
3535+ arcs must be included in the feedback arc set. The difference between the
3636+ number of rightward arcs (no cost) and the number of leftward arcs
3737+ (feedback arcs) is called "delta". The algorithm is implemented
3838+ efficiently by using a data structure to group unscheduled vertexes
3939+ according to their delta value. When more than one vertex has the maximum
4040+ delta value, the original algorithm makes an arbitrary choice. The
4141+ algorithm of Eades and Lin (EL 1995) makes the choice using a heuristic
4242+ that maximizes the difference between incoming arcs and outgoing ones in
4343+ the vertexes that remain at the end of the iteration as such vertexes are
4444+ the most "unbalanced" and thus less likely to contribute to the feedback
4545+ arc set in future iterations. The EL 1995 algorithm includes a further
4646+ refinement to ignore chains of vertexes when looking for unbalanced ones,
4747+ since such chains do not contribute feedback arcs.
4848+4949+ Since we just want to produce a list of feedback arcs, we don't bother
5050+ tracking order in s1, and we only track s2 to properly handle the
5151+ preprocessing optimization that removes two cycles. We maintain lists of
5252+ source and sink vertexes (scheduled but not yet removed from the graph)
5353+ and a map from delta values to sets of vertexes. As the delta value map
5454+ caches the state of the graph, it must be updated when the a vertex is
5555+ scheduled and removed from the graph. Additionally, we remember which two
5656+ cycles were removed during preprocessing and ensure that one of their
5757+ arcs is included in the feedback arc set, depending on whichever of the
5858+ two interlinked vertexes is scheduled first. *)
5959+6060+ type t = {
6161+ s1 : VS.t; (* vertexes placed "at left" *)
6262+ s2 : VS.t; (* vertexes placed "at right";
6363+ only needed to optimize for two_cycles *)
6464+ sources : VS.t; (* vertexes with no incoming arcs *)
6565+ sinks : VS.t; (* vertexes with no outgoing arcs *)
6666+ delta_bins : VS.t IM.t; (* group vertexes by delta value *)
6767+ vertex_bin : int VM.t; (* map each vertex to its bin *)
6868+ two_cycles : G.edge list VM.t; (* edges for 2-cycles *)
6969+ fas : G.edge list; (* current feedback arc set *)
7070+ }
7171+7272+ let empty = {
7373+ s1 = VS.empty;
7474+ s2 = VS.empty;
7575+ sources = VS.empty;
7676+ sinks = VS.empty;
7777+ delta_bins = IM.empty;
7878+ vertex_bin = VM.empty;
7979+ two_cycles = VM.empty;
8080+ fas = [];
8181+ }
8282+8383+ let add_to_bin delta v ({ delta_bins; vertex_bin; _ } as st) =
8484+ { st with delta_bins =
8585+ IM.update delta (function None -> Some (VS.singleton v)
8686+ | Some vs -> Some (VS.add v vs))
8787+ delta_bins;
8888+ vertex_bin = VM.add v delta vertex_bin }
8989+9090+ let remove_from_bin v ({ delta_bins; vertex_bin; _ } as st) =
9191+ match VM.find_opt v vertex_bin with
9292+ | None -> st
9393+ | Some delta ->
9494+ { st with delta_bins =
9595+ IM.update delta (function None -> None
9696+ | Some vs -> Some (VS.remove v vs))
9797+ delta_bins;
9898+ vertex_bin = VM.remove v vertex_bin }
9999+100100+ (* Calculate the sums of incoming and outgoing edge weights, ignoring
101101+ obligatory arcs; they must be respected so their weight is irrelevant. *)
102102+ let weights g v =
103103+ let add_pweight e (s, b) =
104104+ match GB.weight e with Obligatory _ -> (s, true) | Normal w -> (s + w, b)
105105+ in
106106+ let add_sweight e s =
107107+ match GB.weight e with Obligatory w -> s + w | Normal w -> s + w
108108+ in
109109+ let inw, blocked = G.fold_pred_e add_pweight g v (0, false) in
110110+ let outw = G.fold_succ_e add_sweight g v 0 in
111111+ blocked, inw, outw
112112+113113+ let add_vertex g v delta ({ sources; sinks; _ } as st) =
114114+ let ind, outd = G.in_degree g v, G.out_degree g v in
115115+ if ind = 0 then { st with sources = VS.add v sources }
116116+ else if outd = 0 then { st with sinks = VS.add v sinks }
117117+ else add_to_bin delta v st
118118+119119+ (* Initialize the state for a given vertex. *)
120120+ let init_vertex g v st =
121121+ let blocked, inw, outw = weights g v in
122122+ if blocked then st else add_vertex g v (outw - inw) st
123123+124124+ let init g = G.fold_vertex (init_vertex g) g empty
125125+126126+ (* Move v from the bin for delta to sources, sinks, or another bin. *)
127127+ let shift_bins g v delta' st0 = add_vertex g v delta' (remove_from_bin v st0)
128128+129129+ (* Before removing v from the graph, update the state of its sucessors. *)
130130+ let update_removed_succ g' e st =
131131+ let v = G.E.dst e in
132132+ let still_blocked, inw', outw' = weights g' v in
133133+ if still_blocked then st else shift_bins g' v (outw' - inw') st
134134+135135+ (* Before removing v from the graph, update the state of its predecessors. *)
136136+ let update_removed_pred g' e ({ sinks; _ } as st) =
137137+ let v = G.E.src e in
138138+ let blocked, inw', outw' = weights g' v in
139139+ match GB.weight e with
140140+ | Obligatory _ ->
141141+ if blocked || outw' > 0 then st
142142+ else (* not blocked && outw' = 0 *)
143143+ { (remove_from_bin v st) with sinks = VS.add v sinks }
144144+ | Normal _ ->
145145+ if blocked then st else shift_bins g' v (outw' - inw') st
146146+147147+ (* Remove a vertex from the graph and update the data structures for its
148148+ succesors and predecessors. *)
149149+ let remove_vertex g v st =
150150+ let g' = GB.remove_vertex g v in
151151+ (g', G.fold_succ_e (update_removed_succ g') g v st
152152+ |> G.fold_pred_e (update_removed_pred g') g v)
153153+154154+ (* The original article proposes preprocessing the graph to condense long
155155+ chains of vertexes. This works together with the heuristic for generating
156156+ unbalanced vertexes, since the intermediate nodes on the chain do not
157157+ contribute any leftward arcs (when the last vertex is removed, they
158158+ become a sequence of sinks). Using such a preprocessing step with
159159+ weighted edges risks removing good feedback arcs, i.e., those with a big
160160+ difference between outgoing and incoming weights. That is why here we
161161+ use on-the-fly condensation, even if there is a risk of recomputing the
162162+ same result several times. *)
163163+ let rec condense w g v =
164164+ if G.out_degree g v = 1 then
165165+ match G.pred g v with
166166+ | [u] when not (G.V.equal u w) -> condense w g u
167167+ | _ -> v
168168+ else v
169169+170170+ (* Find the vertex v that has the most "unbalanced" predecessor u. Most
171171+ unbalanced means the biggest difference between the input weights and
172172+ output weights. Skip any vertex with an incoming obligatory arc. *)
173173+ let takemax g v imax =
174174+ let check_edge e max = (* check u -> v *)
175175+ let u_blocked, u_inw, u_outw =
176176+ weights g (condense (G.E.dst e) g (G.E.src e)) in
177177+ let u_w = u_inw - u_outw in
178178+ match max with
179179+ | Some (None, _)
180180+ | None -> Some ((if u_blocked then None else Some u_w), v)
181181+ | Some (Some x_w, _) when u_w > x_w -> Some (Some u_w, v)
182182+ | _ -> max
183183+ in
184184+ G.fold_pred_e check_edge g v imax
185185+186186+ (* Look for the vertex with the highest delta value that is not the target
187187+ of an obligatory arc. Use the "unbalanced" heuristic impllemented in
188188+ [takemax] to discriminate between competing possibilities. If a vertex
189189+ is found, remove it from the returned delta bins. *)
190190+ let max_from_deltas g ({ delta_bins; _ } as st) =
191191+ let rec f = function
192192+ | Seq.Nil -> None
193193+ | Seq.Cons ((_, dbin), tl) ->
194194+ (match VS.fold (takemax g) dbin None with
195195+ | None -> f (tl ())
196196+ | Some (_, v) -> Some (v, remove_from_bin v st))
197197+ in
198198+ f (IM.to_rev_seq delta_bins ())
199199+200200+ (* Include any leftward arcs due to the two-cycles that were removed by
201201+ preprocessing. *)
202202+ let add_from_two_cycles s1 s2 two_cycles v fas =
203203+ let bf es b = if G.V.equal (G.E.dst b) v then b::es else es in
204204+ let f es e =
205205+ let w = G.E.dst e in
206206+ if VS.mem w s1 then e::es
207207+ else if VS.mem w s2 then
208208+ (* the two-cycle partner has already been scheduled as sink, so
209209+ the feedback edges come from it. *)
210210+ match VM.find_opt w two_cycles with
211211+ | None -> es
212212+ | Some bs -> List.fold_left bf es bs
213213+ else es in
214214+ match VM.find_opt v two_cycles with
215215+ | None -> fas
216216+ | Some es -> List.fold_left f fas es
217217+218218+ (* Shift a given vertex onto s1, and add any leftward arcs to the feedback
219219+ arc set. *)
220220+ let schedule_vertex g (v, ({ s1; s2; fas; two_cycles; _ } as st)) =
221221+ let add_to_fas e es = if VS.mem (G.E.src e) s1 then es else e::es in
222222+ (v, { st with s1 = VS.add v s1;
223223+ fas = G.fold_pred_e add_to_fas g v fas
224224+ |> add_from_two_cycles s1 s2 two_cycles v })
225225+226226+ (* Take the next available vertex from, in order, sources, sinks, or the
227227+ highset possible delta bin. *)
228228+ let choose_vertex g ({ s1; s2; sources; sinks; two_cycles; fas; _ } as st0) =
229229+ match VS.choose_opt sources with
230230+ | Some v ->
231231+ Some (v, { st0 with sources = VS.remove v sources;
232232+ sinks = VS.remove v sinks;
233233+ s1 = VS.add v s1;
234234+ fas = add_from_two_cycles s1 s2 two_cycles v fas })
235235+ | None ->
236236+ (match VS.choose_opt sinks with
237237+ | Some v ->
238238+ Some (v, { st0 with sinks = VS.remove v sinks;
239239+ s2 = VS.add v s2;
240240+ fas = add_from_two_cycles s1 s2 two_cycles v fas })
241241+ | None -> Option.map (schedule_vertex g) (max_from_deltas g st0))
242242+243243+ let add_two_cycle_edge two_cycles e =
244244+ VM.update (G.E.src e) (function None -> Some [e]
245245+ | Some es -> Some (e :: es)) two_cycles
246246+247247+ let same_weight w e =
248248+ match GB.weight e with
249249+ | Obligatory _ -> false
250250+ | Normal w' -> w' = w
251251+252252+ (* For every pair of distinct vertexes A and B linked to each other by
253253+ edges A -ab-> B and B -ba-> A with the same weight, update the mapping
254254+ by linking A to ab, and B to ba, and remove the edges from the graph.
255255+ When A is scheduled, if B is already in s1 then the edge ab is a
256256+ feedback arc, and similarly for B and ba. The principle is that there
257257+ will be a feedback arc regardless of whether A is "scheduled" before B or
258258+ vice versa, therefore such cycles should not constrain vertex choices. *)
259259+ let remove_two_cycles g0 =
260260+ let f e ((g, cycles) as unchanged) =
261261+ match GB.weight e with
262262+ | Obligatory _ -> unchanged
263263+ | Normal w ->
264264+ if List.length (G.find_all_edges g0 (G.E.src e) (G.E.dst e)) > 1
265265+ (* invalid for graphs like: { A -1-> B, A -2-> B, B -3-> A *)
266266+ then raise Exit
267267+ else
268268+ let back_edges =
269269+ G.find_all_edges g0 (G.E.dst e) (G.E.src e)
270270+ |> List.filter (same_weight w)
271271+ in
272272+ if back_edges = [] then unchanged
273273+ else (GB.remove_edge_e g e,
274274+ List.fold_left add_two_cycle_edge cycles back_edges)
275275+ in
276276+ try
277277+ G.fold_edges_e f g0 (g0, VM.empty)
278278+ with Exit -> (g0, VM.empty)
279279+280280+ (* All self loops must be broken, so just add them straight into the
281281+ feedback arc set. *)
282282+ let remove_self_loops g0 =
283283+ let f v (g, fas) =
284284+ let self_loops = G.find_all_edges g0 v v in
285285+ (List.fold_left GB.remove_edge_e g self_loops,
286286+ List.rev_append self_loops fas)
287287+ in
288288+ G.fold_vertex f g0 (g0, [])
289289+290290+ (* Remove any arcs between strongly connected components. There can be no
291291+ cycles between distinct sccs by definition. *)
292292+ module C = Components.Make(G)
293293+ module Emap = Gmap.Edge(G)(struct include GB.G include GB end)
294294+295295+ let disconnect_sccs g =
296296+ let nsccs, fscc = C.scc g in
297297+ let in_same_scc e =
298298+ if fscc (G.E.src e) = fscc (G.E.dst e) then Some e else None
299299+ in
300300+ if nsccs < 2 then g
301301+ else Emap.filter_map in_same_scc g
302302+303303+ let feedback_arc_set g0 =
304304+ let rec loop (g, st) =
305305+ match choose_vertex g st with
306306+ | Some (v, st') when G.mem_vertex g v -> loop (remove_vertex g v st')
307307+ | Some (_, st') -> loop (g, st')
308308+ | None ->
309309+ let remaining = IM.fold (Fun.const VS.union) st.delta_bins VS.empty in
310310+ if VS.is_empty remaining then st.fas
311311+ else raise (Stuck (VS.elements remaining))
312312+ in
313313+ let g1 = disconnect_sccs g0 in
314314+ let g2, fas = remove_self_loops g1 in
315315+ let g3, two_cycles = remove_two_cycles g2 in
316316+ loop (g3, { (init g3) with fas; two_cycles })
317317+318318+end
319319+
+71
src/cycles.mli
···11+(**************************************************************************)
22+(* *)
33+(* Ocamlgraph: a generic graph library for OCaml *)
44+(* Copyright (C) 2004-2022 *)
55+(* Sylvain Conchon, Jean-Christophe Filliatre and Julien Signoles *)
66+(* *)
77+(* This software is free software; you can redistribute it and/or *)
88+(* modify it under the terms of the GNU Library General Public *)
99+(* License version 2.1, with the special exception on linking *)
1010+(* described in file LICENSE. *)
1111+(* *)
1212+(* This software is distributed in the hope that it will be useful, *)
1313+(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
1414+(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *)
1515+(* *)
1616+(**************************************************************************)
1717+1818+(** Algorithms related to cycles in directed graphs. *)
1919+2020+type weight =
2121+ | Normal of int
2222+ (** Weighted arc that can be included in the feedback set. The
2323+ weight must be zero (not normally a good choice) or positive
2424+ (1 may be a good choice). *)
2525+ | Obligatory of int
2626+ (** Obligatory arc that cannot be returned in the feedback set.
2727+ Set the weight to zero to completely ignore obligatory arcs
2828+ when choosing which vertex to schedule. Set it to a positive
2929+ value (1 may be a good choice) to adjust the preference for
3030+ choosing vertexes that may "unblock" other vertexes by
3131+ removing their incoming obligatory arcs. *)
3232+3333+(** Adaptation of the FASH algorithm of Eades and Lin (1995) to handle
3434+ edge weights and obligatory arcs. The algorithm tries to minimize the
3535+ total weight of the returned feedback arc set. Obligatory arcs are
3636+ respected and never returned in the feedback arc set, an exception is
3737+ raised if the obligatory arcs form a cycle. The adapted algorithm is
3838+ hereby called FASHWO: “feedback arc set heuristic + weights and
3939+ obligations”.
4040+4141+ For a graph G and any one of its feedback arc sets F, the graph G - F is
4242+ obviously acyclic. If F is minimal, i.e., adding any of its edges to G - F
4343+ would introduce a cycle, then reversing, rather than removing, the
4444+ feedback arcs also gives an ayclic graph, [G - F + F^R]. In fact, Eades
4545+ and Lin define the feedback arc set as "a set of arcs whose reversal makes
4646+ G acyclic".
4747+4848+ @see <https://mathoverflow.net/a/234023/> David Epstein proof about reversed arcs *)
4949+module Fashwo
5050+ (GB : sig
5151+ include Builder.S
5252+5353+ (** Assign weights to edges. *)
5454+ val weight : G.edge -> weight
5555+ end) :
5656+sig
5757+ (** Raised if cycles remain and all the remaining vertexes are obligatory.
5858+ The argument gives the list of remaining vertexes. *)
5959+ exception Stuck of GB.G.vertex list
6060+6161+ (** Return a minimal set of arcs whose removal or reversal would make the
6262+ given graph acyclic.
6363+6464+ By minimal, we mean that each arc in the returned list must be removed
6565+ or reversed, i.e., none are superfluous. Since a heuristic is used, the
6666+ returned list may not be a minimum feedback arc set. Finding the {i
6767+ minimum feedback arc set}, dually, the {i maximum acyclic subgraph} is
6868+ NP-hard for general graphs. *)
6969+ val feedback_arc_set : GB.G.t -> GB.G.edge list
7070+end
7171+