Find and remove dead code and unused APIs in OCaml projects
1(* Occurrence checking and classification for symbols *)
2
3open Types
4module Log = (val Logs.src_log (Logs.Src.create "prune.occurrence") : Logs.LOG)
5
6(* {2 Merlin type conversions} *)
7
8let convert_occurrence ~root_dir (occ : Merlin.occurrence) : location =
9 let file = occ.loc.file in
10 let file =
11 let root_dir_fpath = Fpath.v root_dir in
12 let path_fpath = Fpath.v file in
13 match Fpath.relativize ~root:root_dir_fpath path_fpath with
14 | None -> file
15 | Some rel ->
16 let rel_str = Fpath.to_string rel in
17 if String.length rel_str >= 2 && String.sub rel_str 0 2 = "./" then
18 String.sub rel_str 2 (String.length rel_str - 2)
19 else rel_str
20 in
21 Types.location ~line:occ.loc.start.line ~end_line:occ.loc.end_.line
22 ~start_col:occ.loc.start.col ~end_col:occ.loc.end_.col file
23
24(* Find the column position of an identifier in a type declaration *)
25let type_identifier_column line_content start_col =
26 (* After "type", skip whitespace and type parameters to find the identifier *)
27 let len = String.length line_content in
28 let rec skip_whitespace i =
29 if i >= len then i
30 else if line_content.[i] = ' ' || line_content.[i] = '\t' then
31 skip_whitespace (i + 1)
32 else i
33 in
34
35 let rec skip_type_params i paren_depth =
36 if i >= len then i
37 else
38 match line_content.[i] with
39 | '(' -> skip_type_params (i + 1) (paren_depth + 1)
40 | ')' -> skip_type_params (i + 1) (paren_depth - 1)
41 | (' ' | '\t') when paren_depth = 0 ->
42 (* Found space outside parens, we're past type params *)
43 skip_whitespace (i + 1)
44 | '\'' when paren_depth = 0 ->
45 (* Type variable like 'a, skip it *)
46 let i = i + 1 in
47 let rec skip_var j =
48 if j >= len then j
49 else
50 match line_content.[j] with
51 | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' -> skip_var (j + 1)
52 | _ -> j
53 in
54 skip_type_params (skip_var i) 0
55 | _ when paren_depth > 0 ->
56 (* Inside parens, skip everything *)
57 skip_type_params (i + 1) paren_depth
58 | ('a' .. 'z' | 'A' .. 'Z' | '_') when paren_depth = 0 ->
59 (* Found the start of an identifier *)
60 i
61 | _ -> skip_type_params (i + 1) paren_depth
62 in
63
64 (* Start after "type " *)
65 let start = start_col + 5 in
66 if start < len then skip_type_params (skip_whitespace start) 0 else start
67
68(* Get the column position of the identifier based on symbol kind *)
69let identifier_column ~cache (symbol : symbol_info) =
70 (* For .mli files, identifiers start after the keyword *)
71 let is_mli =
72 let len = String.length symbol.location.file in
73 len >= 4 && String.sub symbol.location.file (len - 4) 4 = ".mli"
74 in
75 let col = symbol.location.start_col in
76 if is_mli then
77 match symbol.kind with
78 | Value -> col + 4 (* "val " = 4 chars *)
79 | Type -> (
80 (* For types, we need to handle type parameters *)
81 match
82 Cache.line cache symbol.location.file symbol.location.start_line
83 with
84 | Some line_content -> type_identifier_column line_content col
85 | None -> col + 5 (* Fallback to simple offset *))
86 | Module -> col + 7 (* "module " = 7 chars *)
87 | Constructor -> col + 10 (* "exception " = 10 chars *)
88 | Field -> col (* Fields don't have a keyword prefix *)
89 else col (* For .ml files, use position as-is *)
90
91(* Check if a file or directory is in the excluded list *)
92let is_excluded_file exclude_dirs file_path =
93 match exclude_dirs with
94 | [] -> false
95 | dirs ->
96 let fpath = Fpath.v file_path in
97 (* Check if file is in any excluded directory *)
98 List.exists
99 (fun dir ->
100 (* Normalize the excluded directory path *)
101 let dir_fpath = Fpath.normalize (Fpath.v dir) in
102 (* Check if path has dir as a prefix segment *)
103 Fpath.is_prefix dir_fpath fpath
104 ||
105 (* Check if any parent directory has the excluded dir name *)
106 let rec check_parents p depth =
107 (* Safety check: limit recursion depth *)
108 if depth > 100 then false
109 else if
110 Fpath.is_root p
111 || Fpath.equal p (Fpath.v ".")
112 || Fpath.equal p (Fpath.v "..")
113 then false
114 else
115 let basename = Fpath.basename p in
116 if String.equal basename dir then true
117 else
118 let parent = Fpath.parent p in
119 (* Check if we've reached a fixed point *)
120 if Fpath.equal parent p then false
121 else check_parents parent (depth + 1)
122 in
123 check_parents fpath 0)
124 dirs
125
126(* Helper to handle modules and constructors that rely on build warnings *)
127let handle_module_or_constructor (symbol : symbol_info) =
128 Log.info (fun m ->
129 m "Skipping merlin occurrences for %s %s (relying on build warnings)"
130 (string_of_symbol_kind symbol.kind)
131 symbol.name);
132
133 {
134 symbol;
135 occurrences = -1;
136 (* Mark as -1 to indicate merlin check was skipped *)
137 locations = [];
138 usage_class = Unknown;
139 (* Cannot determine via occurrences *)
140 }
141
142(* Helper to query merlin for occurrences *)
143let query_merlin ~backend ~cache root_dir symbol =
144 let identifier_col = identifier_column ~cache symbol in
145 Log.info (fun m ->
146 m "Checking occurrences for %s at %a (adjusted to %d:%d)" symbol.name
147 pp_location symbol.location symbol.location.start_line identifier_col);
148 let result =
149 Merlin.occurrences backend ~file:symbol.location.file
150 ~line:symbol.location.start_line ~col:identifier_col ~scope:Project
151 in
152 match result with
153 | Error e ->
154 Log.debug (fun f ->
155 f "Merlin occurrences failed for %s: %s" symbol.name e);
156 (0, [])
157 | Ok occ_result ->
158 let locations =
159 List.map (convert_occurrence ~root_dir) occ_result.occurrences
160 in
161 (List.length locations, locations)
162
163(* Get base module name from file path *)
164let module_base file =
165 let basename = Filename.basename file in
166 try Filename.chop_extension basename with Invalid_argument _ -> basename
167
168(* Get the full module path (directory + module name) to distinguish between
169 modules with the same name in different directories *)
170let module_path file =
171 let dir = Filename.dirname file in
172 let base = module_base file in
173 Filename.concat dir base
174
175(* Count occurrences by location type *)
176type counts = {
177 in_defining_mli : int;
178 in_defining_ml : int;
179 external_uses : location list;
180}
181
182let count_occurrences_by_location defining_module_path locations =
183 let counts =
184 ref { in_defining_mli = 0; in_defining_ml = 0; external_uses = [] }
185 in
186
187 List.iter
188 (fun (loc : location) ->
189 Log.debug (fun m ->
190 m " Occurrence at %s:%d:%d" loc.file loc.start_line loc.start_col);
191 let module_path = module_path loc.file in
192 Log.debug (fun m ->
193 m " Module path: %s, defining module path: %s, equal: %b"
194 module_path defining_module_path
195 (module_path = defining_module_path));
196 if module_path = defining_module_path then (
197 if Filename.check_suffix loc.file ".mli" then (
198 counts :=
199 { !counts with in_defining_mli = !counts.in_defining_mli + 1 };
200 Log.debug (fun m -> m " -> Counted as in_defining_mli"))
201 else if Filename.check_suffix loc.file ".ml" then (
202 counts := { !counts with in_defining_ml = !counts.in_defining_ml + 1 };
203 Log.debug (fun m -> m " -> Counted as in_defining_ml")))
204 else (
205 counts := { !counts with external_uses = loc :: !counts.external_uses };
206 Log.debug (fun m -> m " -> Counted as external use")))
207 locations;
208 !counts
209
210(* Check if symbol appears to be a re-export *)
211let is_likely_reexport locations occurrence_count =
212 let mli_count =
213 List.fold_left
214 (fun acc (loc : location) ->
215 if Filename.check_suffix loc.file ".mli" then acc + 1 else acc)
216 0 locations
217 in
218 mli_count > 1 && occurrence_count <= mli_count + 1
219
220(* Check if all external uses are in excluded directories *)
221let all_external_uses_excluded exclude_dirs external_locs =
222 not
223 (List.exists
224 (fun (loc : location) ->
225 let is_excluded = is_excluded_file exclude_dirs loc.file in
226 Log.debug (fun m ->
227 m " Checking if %s is excluded: %b (exclude_dirs: %s)" loc.file
228 is_excluded
229 (String.concat ", " exclude_dirs));
230 not is_excluded)
231 external_locs)
232
233(* Classify symbol with no external uses *)
234let classify_no_external_uses (sym : symbol_info) counts =
235 Log.debug (fun m ->
236 m " No external uses for %s, in_defining_mli=%d, in_defining_ml=%d"
237 sym.name counts.in_defining_mli counts.in_defining_ml);
238 if counts.in_defining_mli = 1 then (
239 Log.debug (fun m -> m " -> Marking %s as Unused" sym.name);
240 Unused)
241 else (
242 Log.debug (fun m -> m " -> Marking %s as Used" sym.name);
243 Used)
244
245(* Classify symbol with external uses *)
246let classify_with_external_uses exclude_dirs (sym : symbol_info) external_locs =
247 Log.debug (fun m ->
248 m " %s has %d external uses" sym.name (List.length external_locs));
249 if all_external_uses_excluded exclude_dirs external_locs then (
250 Log.debug (fun m ->
251 m
252 " -> All external uses are excluded, marking as \
253 Used_only_in_excluded");
254 Used_only_in_excluded)
255 else (
256 Log.debug (fun m ->
257 m " -> Has non-excluded external uses, marking as Used");
258 Used)
259
260(* Classify usage for types, values, and fields *)
261let classify_type_value_field exclude_dirs (sym : symbol_info) occurrence_count
262 locations =
263 Log.debug (fun m ->
264 m " Analyzing %d occurrences for %s %s" occurrence_count
265 (string_of_symbol_kind sym.kind)
266 sym.name);
267
268 let defining_module_path = module_path sym.location.file in
269 let counts = count_occurrences_by_location defining_module_path locations in
270
271 Log.debug (fun m ->
272 m " Symbol %s: mli_in_defining=%d, external=%d" sym.name
273 counts.in_defining_mli
274 (List.length counts.external_uses));
275
276 (* Check for re-export pattern *)
277 if is_likely_reexport locations occurrence_count then (
278 Log.debug (fun m ->
279 m
280 "Symbol %s appears in multiple .mli files with only %d occurrences, \
281 likely a re-export"
282 sym.name occurrence_count);
283 Used)
284 else
285 (* Determine usage classification *)
286 match counts.external_uses with
287 | [] -> classify_no_external_uses sym counts
288 | external_locs ->
289 classify_with_external_uses exclude_dirs sym external_locs
290
291(* Classify how a symbol is used based on its occurrences *)
292let classify_usage exclude_dirs (symbol : symbol_info) occurrence_count
293 locations =
294 (* Special handling for different symbol kinds *)
295 match symbol.kind with
296 | Module | Constructor ->
297 (* Modules and constructors may not have all their uses tracked by merlin
298 E.g., when used in patterns or type annotations -1 means merlin check
299 was skipped, so we can't determine usage *)
300 if occurrence_count = -1 then Unknown
301 else if occurrence_count > 1 then Used
302 else Unused
303 | Type | Value | Field ->
304 (* For other symbols, analyze occurrence locations *)
305 classify_type_value_field exclude_dirs symbol occurrence_count locations
306
307(* Check a single symbol *)
308let check_single ~backend ~cache exclude_dirs root_dir (symbol : symbol_info) =
309 (* For modules and exceptions, skip merlin occurrences *)
310 (* Merlin's occurrence detection doesn't work reliably for:
311 - Exceptions: often returns 0 even when used
312 - Modules: need special handling for children
313 For now, mark as potentially unused and let build warnings decide *)
314 match symbol.kind with
315 | Module | Constructor -> handle_module_or_constructor symbol
316 | _ ->
317 (* For values and types, use merlin occurrences *)
318 let occurrence_count, locations =
319 query_merlin ~backend ~cache root_dir symbol
320 in
321
322 Log.debug (fun m ->
323 m "Extracted from merlin for %s: count=%d, locations=[%s]" symbol.name
324 occurrence_count
325 (locations
326 |> List.map (fun loc -> Fmt.str "%a" pp_location loc)
327 |> String.concat "; "));
328
329 let usage_class =
330 classify_usage exclude_dirs symbol occurrence_count locations
331 in
332
333 Log.debug (fun m ->
334 m "Symbol %s: %d occurrences, usage=%a, locations=%s" symbol.name
335 occurrence_count pp_usage_classification usage_class
336 (String.concat ", "
337 (List.map (fun loc -> Fmt.str "%a" pp_location loc) locations)));
338
339 (* Debug: print occurrence mapping summary *)
340 Log.info (fun m ->
341 m "OCCURRENCE MAPPING: %s@%a -> %d occurrences" symbol.name
342 pp_location symbol.location occurrence_count);
343
344 { symbol; occurrences = occurrence_count; locations; usage_class }
345
346(* Check occurrences for a list of symbols using merlin *)
347let check_bulk ~backend ~cache exclude_dirs root_dir
348 (symbols : symbol_info list) =
349 let total = List.length symbols in
350 let processed = ref 0 in
351
352 if total > 0 then
353 Log.info (fun m -> m "Checking occurrences for %d symbols" total);
354
355 let progress = Progress.v ~total in
356 let results =
357 List.map
358 (fun (symbol : symbol_info) ->
359 incr processed;
360 let module_name =
361 let basename = Filename.basename symbol.location.file in
362 let name =
363 try Filename.chop_extension basename
364 with Invalid_argument _ -> basename
365 in
366 String.capitalize_ascii name
367 in
368 Progress.update progress ~current:!processed
369 (Fmt.str "Checking symbol: %s.%s" module_name symbol.name);
370
371 check_single ~backend ~cache exclude_dirs root_dir symbol)
372 symbols
373 in
374 Progress.clear progress;
375 results