Find and remove dead code and unused APIs in OCaml projects
1(* Symbol discovery and analysis orchestration for prune *)
2
3open Types
4module Log = (val Logs.src_log (Logs.Src.create "prune.analysis") : Logs.LOG)
5
6(* {2 Merlin type conversions} *)
7
8let convert_symbol_kind (kind : Merlin.symbol_kind) : symbol_kind option =
9 match kind with
10 | Value -> Some Value
11 | Type -> Some Type
12 | Module -> Some Module
13 | Constructor -> Some Constructor
14 | Exception -> Some Constructor
15 | Field -> Some Field
16 | Module_type -> Some Module
17 | Class | Class_type | Method | Label -> None
18
19let convert_location (loc : Merlin.location) : location =
20 Types.location ~line:loc.start.line ~end_line:loc.end_.line
21 ~start_col:loc.start.col ~end_col:loc.end_.col loc.file
22
23let rec convert_outline_item (item : Merlin.outline_item) : outline_item option
24 =
25 match convert_symbol_kind item.kind with
26 | None -> None
27 | Some kind ->
28 let location = convert_location item.location in
29 let children =
30 match item.kind with
31 | Module_type ->
32 (* Children of module types are part of the type signature, not
33 independently removable exports *)
34 None
35 | _ -> (
36 match item.children with
37 | [] -> None
38 | items ->
39 let converted = List.filter_map convert_outline_item items in
40 if converted = [] then None else Some converted)
41 in
42 Some { kind; name = item.name; location; children }
43
44(* {2 Symbol extraction} *)
45
46(* Helper to create a symbol with its children *)
47let rec symbol_with_children ~cache item =
48 let main_symbol =
49 { name = item.name; kind = item.kind; location = item.location }
50 in
51 let child_symbols =
52 match item.children with
53 | None -> []
54 | Some children -> List.concat_map (outline_item_to_symbol ~cache) children
55 in
56 main_symbol :: child_symbols
57
58(* Convert a single outline item to a symbol_info *)
59and outline_item_to_symbol ~cache (item : outline_item) =
60 match item.kind with
61 | Module -> (
62 (* For modules, check if it's a module alias and skip if so *)
63 match Cache.load cache item.location.file with
64 | Error _ -> symbol_with_children ~cache item
65 | Ok () -> (
66 match Cache.file_content cache item.location.file with
67 | None -> symbol_with_children ~cache item
68 | Some content ->
69 if
70 Module_alias.is_module_alias ~cache item.location.file item.kind
71 item.location content
72 then (
73 Log.debug (fun m ->
74 m "Skipping module alias: %s at %a" item.name pp_location
75 item.location);
76 [])
77 else symbol_with_children ~cache item))
78 | _ -> symbol_with_children ~cache item
79
80(* {2 Symbol discovery} *)
81
82(* Get all exported symbols from a single .mli file *)
83let file_symbols ~backend ~cache file_str =
84 let result = Merlin.outline backend ~file:file_str in
85 match result with
86 | Error e ->
87 Log.warn (fun m -> m "Merlin outline failed for %s: %s" file_str e);
88 []
89 | Ok merlin_items ->
90 let outline_items = List.filter_map convert_outline_item merlin_items in
91 let symbols =
92 List.concat_map (outline_item_to_symbol ~cache) outline_items
93 in
94 (* Debug: print outline summary *)
95 Log.info (fun m ->
96 m "Outline summary for %s: found %d symbols" file_str
97 (List.length symbols));
98 List.iteri
99 (fun i (symbol : symbol_info) ->
100 Log.debug (fun m ->
101 m " [%d] %s (%s) at %a" (i + 1) symbol.name
102 (string_of_symbol_kind symbol.kind)
103 pp_location symbol.location))
104 symbols;
105 symbols
106
107(* {2 Main analysis orchestration} *)
108
109(* Filter symbols to only those we care about *)
110let filter_relevant_symbols all_symbols =
111 let relevant_symbols =
112 List.filter
113 (fun (s : symbol_info) ->
114 match s.kind with
115 | Value | Type | Constructor | Module -> true
116 | Field -> false)
117 all_symbols
118 in
119 if List.length relevant_symbols > 0 then
120 Log.info (fun m ->
121 m
122 "Filtering to %d relevant symbols (values, types, exceptions, \
123 modules)"
124 (List.length relevant_symbols));
125 relevant_symbols
126
127(* Group occurrence info by file, preserving usage classification *)
128let group_occurrences_by_file occurrence_infos =
129 let by_file =
130 List.fold_left
131 (fun acc occ ->
132 let file = occ.symbol.location.file in
133 let existing = try List.assoc file acc with Not_found -> [] in
134 (file, occ :: existing) :: List.remove_assoc file acc)
135 [] occurrence_infos
136 in
137 if List.length by_file > 0 then
138 Log.info (fun m -> m "Grouped into %d files" (List.length by_file));
139 by_file
140
141(* Build a recursive check for modules with used children *)
142let rec has_used_children all_occurrence_data module_occ =
143 match module_occ.symbol.kind with
144 | Module ->
145 let module_start = module_occ.symbol.location.start_line in
146 let module_end =
147 match Some module_occ.symbol.location.end_line with
148 | Some el -> el
149 | None -> module_start
150 in
151
152 (* Find all symbols within this module's range *)
153 let children =
154 List.filter
155 (fun occ ->
156 occ.symbol.location.file = module_occ.symbol.location.file
157 && occ.symbol.location.start_line > module_start
158 && occ.symbol.location.start_line <= module_end
159 && occ.symbol.name <> module_occ.symbol.name)
160 all_occurrence_data
161 in
162
163 Log.debug (fun m ->
164 m "Module %s has %d children" module_occ.symbol.name
165 (List.length children));
166
167 (* Check if any child is either: 1. A used symbol (occurrences > 0), or 2.
168 A module that has used children (recursive check) *)
169 List.exists
170 (fun child ->
171 if child.occurrences > 0 then (
172 Log.debug (fun m ->
173 m " Child %s is used (%d occurrences)" child.symbol.name
174 child.occurrences);
175 true)
176 else has_used_children all_occurrence_data child)
177 children
178 | _ -> false
179
180(* Filter out modules that have any used children *)
181let filter_modules_with_used unused_symbols all_occurrence_data =
182 if List.length unused_symbols > 0 then
183 Log.info (fun m ->
184 m "Filtering modules with used children: %d unused symbols to check"
185 (List.length unused_symbols));
186
187 (* Filter out modules that have used children *)
188 List.filter
189 (fun occ ->
190 match occ.symbol.kind with
191 | Module ->
192 let should_keep = not (has_used_children all_occurrence_data occ) in
193 Log.debug (fun m ->
194 m "Module %s: has_used_children=%b, keeping=%b" occ.symbol.name
195 (not should_keep) should_keep);
196 should_keep
197 | _ -> true (* Keep all non-module symbols in the unused list *))
198 unused_symbols
199
200(* Common function to get symbols and their occurrences *)
201let symbols_and_occurrences ~cache exclude_dirs root_dir files =
202 if List.length files > 0 then
203 Log.info (fun m -> m "Analyzing %d files for symbols" (List.length files));
204
205 (* Create Merlin backend once for all queries *)
206 let backend = Merlin.v ~backend:Lib ~root_dir () in
207
208 (* Get exported symbols from all files with progress *)
209 let total = List.length files in
210 let processed = ref 0 in
211 let root_path = Fpath.v root_dir in
212 let progress = Progress.v ~total in
213
214 let all_symbols =
215 List.fold_left
216 (fun acc file ->
217 incr processed;
218 let display_path =
219 match Fpath.relativize ~root:root_path (Fpath.v file) with
220 | Some rel -> Fpath.to_string rel
221 | None -> file
222 in
223 Progress.update progress ~current:!processed
224 (Fmt.str "Processing file: %s" display_path);
225
226 let symbols = file_symbols ~backend ~cache file in
227 symbols @ acc)
228 [] files
229 in
230 Progress.clear progress;
231
232 if List.length all_symbols > 0 then
233 Log.info (fun m ->
234 m "Found %d total exported symbols" (List.length all_symbols));
235
236 let relevant_symbols = filter_relevant_symbols all_symbols in
237 let occurrence_data =
238 Occurrence.check_bulk ~backend ~cache exclude_dirs root_dir relevant_symbols
239 in
240 Merlin.close backend;
241 (all_symbols, occurrence_data)
242
243(* Analyze symbols from files and find unused ones *)
244(* Find symbols that appear in multiple .mli files *)
245let multi_mli_symbols occurrence_data =
246 let mli_symbols =
247 List.filter
248 (fun sym -> Filename.check_suffix sym.symbol.location.file ".mli")
249 occurrence_data
250 in
251 let name_to_files = Hashtbl.create 10 in
252 List.iter
253 (fun occ ->
254 let files =
255 try Hashtbl.find name_to_files occ.symbol.name with Not_found -> []
256 in
257 if not (List.mem occ.symbol.location.file files) then
258 Hashtbl.replace name_to_files occ.symbol.name
259 (occ.symbol.location.file :: files))
260 mli_symbols;
261
262 Hashtbl.fold
263 (fun name files acc -> if List.length files > 1 then name :: acc else acc)
264 name_to_files []
265
266(* Fix symbols that appear in multiple .mli files by marking them as Used *)
267let fix_multi_mli_symbols occurrence_data multi_mli_names =
268 if multi_mli_names <> [] then (
269 Log.info (fun m ->
270 m "Found symbols in multiple .mli files: %s"
271 (String.concat ", " multi_mli_names));
272 List.map
273 (fun occ ->
274 if List.mem occ.symbol.name multi_mli_names && occ.usage_class = Unused
275 then { occ with usage_class = Used }
276 else occ)
277 occurrence_data)
278 else occurrence_data
279
280(* Filter occurrence data to get unused and excluded-only symbols *)
281let categorize_symbols occurrence_data =
282 let unused =
283 List.filter
284 (fun occ ->
285 match occ.usage_class with
286 | Unused -> true
287 | Unknown | Used | Used_only_in_excluded -> false)
288 occurrence_data
289 in
290 let excluded_only =
291 List.filter
292 (fun occ ->
293 match occ.usage_class with Used_only_in_excluded -> true | _ -> false)
294 occurrence_data
295 in
296 (unused, excluded_only)
297
298let analyze_files_for_unused ~cache exclude_dirs root_dir files =
299 let _all_symbols, occurrence_data =
300 symbols_and_occurrences ~cache exclude_dirs root_dir files
301 in
302
303 (* Post-process: if a symbol name appears in multiple .mli files, mark all as
304 Used. This handles both re-exports and symbols accessible through module
305 aliases. *)
306 let multi_mli_names = multi_mli_symbols occurrence_data in
307
308 let occurrence_data_fixed =
309 fix_multi_mli_symbols occurrence_data multi_mli_names
310 in
311
312 let unused, excluded_only = categorize_symbols occurrence_data_fixed in
313
314 (* Filter out modules that have used children *)
315 let filtered_unused = filter_modules_with_used unused occurrence_data_fixed in
316
317 Log.info (fun m -> m "Found %d unused exports" (List.length filtered_unused));
318 if List.length excluded_only > 0 then
319 Log.info (fun m ->
320 m "Found %d exports used only in excluded dirs"
321 (List.length excluded_only));
322
323 ( group_occurrences_by_file filtered_unused,
324 group_occurrences_by_file excluded_only )
325
326let all_symbol_occurrences ~cache ?(exclude_dirs = []) root_dir files =
327 match System.validate_dune_project root_dir with
328 | Error (`Msg e) -> Error (`Msg e)
329 | Ok () ->
330 Log.info (fun m -> m "Getting all symbol occurrences");
331 let _all_symbols, occurrence_data =
332 symbols_and_occurrences ~cache exclude_dirs root_dir files
333 in
334 Ok occurrence_data
335
336let unused_exports ~cache ?(exclude_dirs = []) root_dir files =
337 match System.validate_dune_project root_dir with
338 | Error (`Msg e) -> Error (`Msg e)
339 | Ok () ->
340 Log.info (fun m -> m "Starting analysis");
341 Ok (analyze_files_for_unused ~cache exclude_dirs root_dir files)