Find and remove dead code and unused APIs in OCaml projects
at main 375 lines 14 kB view raw
1(* Occurrence checking and classification for symbols *) 2 3open Types 4module Log = (val Logs.src_log (Logs.Src.create "prune.occurrence") : Logs.LOG) 5 6(* {2 Merlin type conversions} *) 7 8let convert_occurrence ~root_dir (occ : Merlin.occurrence) : location = 9 let file = occ.loc.file in 10 let file = 11 let root_dir_fpath = Fpath.v root_dir in 12 let path_fpath = Fpath.v file in 13 match Fpath.relativize ~root:root_dir_fpath path_fpath with 14 | None -> file 15 | Some rel -> 16 let rel_str = Fpath.to_string rel in 17 if String.length rel_str >= 2 && String.sub rel_str 0 2 = "./" then 18 String.sub rel_str 2 (String.length rel_str - 2) 19 else rel_str 20 in 21 Types.location ~line:occ.loc.start.line ~end_line:occ.loc.end_.line 22 ~start_col:occ.loc.start.col ~end_col:occ.loc.end_.col file 23 24(* Find the column position of an identifier in a type declaration *) 25let type_identifier_column line_content start_col = 26 (* After "type", skip whitespace and type parameters to find the identifier *) 27 let len = String.length line_content in 28 let rec skip_whitespace i = 29 if i >= len then i 30 else if line_content.[i] = ' ' || line_content.[i] = '\t' then 31 skip_whitespace (i + 1) 32 else i 33 in 34 35 let rec skip_type_params i paren_depth = 36 if i >= len then i 37 else 38 match line_content.[i] with 39 | '(' -> skip_type_params (i + 1) (paren_depth + 1) 40 | ')' -> skip_type_params (i + 1) (paren_depth - 1) 41 | (' ' | '\t') when paren_depth = 0 -> 42 (* Found space outside parens, we're past type params *) 43 skip_whitespace (i + 1) 44 | '\'' when paren_depth = 0 -> 45 (* Type variable like 'a, skip it *) 46 let i = i + 1 in 47 let rec skip_var j = 48 if j >= len then j 49 else 50 match line_content.[j] with 51 | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' -> skip_var (j + 1) 52 | _ -> j 53 in 54 skip_type_params (skip_var i) 0 55 | _ when paren_depth > 0 -> 56 (* Inside parens, skip everything *) 57 skip_type_params (i + 1) paren_depth 58 | ('a' .. 'z' | 'A' .. 'Z' | '_') when paren_depth = 0 -> 59 (* Found the start of an identifier *) 60 i 61 | _ -> skip_type_params (i + 1) paren_depth 62 in 63 64 (* Start after "type " *) 65 let start = start_col + 5 in 66 if start < len then skip_type_params (skip_whitespace start) 0 else start 67 68(* Get the column position of the identifier based on symbol kind *) 69let identifier_column ~cache (symbol : symbol_info) = 70 (* For .mli files, identifiers start after the keyword *) 71 let is_mli = 72 let len = String.length symbol.location.file in 73 len >= 4 && String.sub symbol.location.file (len - 4) 4 = ".mli" 74 in 75 let col = symbol.location.start_col in 76 if is_mli then 77 match symbol.kind with 78 | Value -> col + 4 (* "val " = 4 chars *) 79 | Type -> ( 80 (* For types, we need to handle type parameters *) 81 match 82 Cache.line cache symbol.location.file symbol.location.start_line 83 with 84 | Some line_content -> type_identifier_column line_content col 85 | None -> col + 5 (* Fallback to simple offset *)) 86 | Module -> col + 7 (* "module " = 7 chars *) 87 | Constructor -> col + 10 (* "exception " = 10 chars *) 88 | Field -> col (* Fields don't have a keyword prefix *) 89 else col (* For .ml files, use position as-is *) 90 91(* Check if a file or directory is in the excluded list *) 92let is_excluded_file exclude_dirs file_path = 93 match exclude_dirs with 94 | [] -> false 95 | dirs -> 96 let fpath = Fpath.v file_path in 97 (* Check if file is in any excluded directory *) 98 List.exists 99 (fun dir -> 100 (* Normalize the excluded directory path *) 101 let dir_fpath = Fpath.normalize (Fpath.v dir) in 102 (* Check if path has dir as a prefix segment *) 103 Fpath.is_prefix dir_fpath fpath 104 || 105 (* Check if any parent directory has the excluded dir name *) 106 let rec check_parents p depth = 107 (* Safety check: limit recursion depth *) 108 if depth > 100 then false 109 else if 110 Fpath.is_root p 111 || Fpath.equal p (Fpath.v ".") 112 || Fpath.equal p (Fpath.v "..") 113 then false 114 else 115 let basename = Fpath.basename p in 116 if String.equal basename dir then true 117 else 118 let parent = Fpath.parent p in 119 (* Check if we've reached a fixed point *) 120 if Fpath.equal parent p then false 121 else check_parents parent (depth + 1) 122 in 123 check_parents fpath 0) 124 dirs 125 126(* Helper to handle modules and constructors that rely on build warnings *) 127let handle_module_or_constructor (symbol : symbol_info) = 128 Log.info (fun m -> 129 m "Skipping merlin occurrences for %s %s (relying on build warnings)" 130 (string_of_symbol_kind symbol.kind) 131 symbol.name); 132 133 { 134 symbol; 135 occurrences = -1; 136 (* Mark as -1 to indicate merlin check was skipped *) 137 locations = []; 138 usage_class = Unknown; 139 (* Cannot determine via occurrences *) 140 } 141 142(* Helper to query merlin for occurrences *) 143let query_merlin ~backend ~cache root_dir symbol = 144 let identifier_col = identifier_column ~cache symbol in 145 Log.info (fun m -> 146 m "Checking occurrences for %s at %a (adjusted to %d:%d)" symbol.name 147 pp_location symbol.location symbol.location.start_line identifier_col); 148 let result = 149 Merlin.occurrences backend ~file:symbol.location.file 150 ~line:symbol.location.start_line ~col:identifier_col ~scope:Project 151 in 152 match result with 153 | Error e -> 154 Log.debug (fun f -> 155 f "Merlin occurrences failed for %s: %s" symbol.name e); 156 (0, []) 157 | Ok occ_result -> 158 let locations = 159 List.map (convert_occurrence ~root_dir) occ_result.occurrences 160 in 161 (List.length locations, locations) 162 163(* Get base module name from file path *) 164let module_base file = 165 let basename = Filename.basename file in 166 try Filename.chop_extension basename with Invalid_argument _ -> basename 167 168(* Get the full module path (directory + module name) to distinguish between 169 modules with the same name in different directories *) 170let module_path file = 171 let dir = Filename.dirname file in 172 let base = module_base file in 173 Filename.concat dir base 174 175(* Count occurrences by location type *) 176type counts = { 177 in_defining_mli : int; 178 in_defining_ml : int; 179 external_uses : location list; 180} 181 182let count_occurrences_by_location defining_module_path locations = 183 let counts = 184 ref { in_defining_mli = 0; in_defining_ml = 0; external_uses = [] } 185 in 186 187 List.iter 188 (fun (loc : location) -> 189 Log.debug (fun m -> 190 m " Occurrence at %s:%d:%d" loc.file loc.start_line loc.start_col); 191 let module_path = module_path loc.file in 192 Log.debug (fun m -> 193 m " Module path: %s, defining module path: %s, equal: %b" 194 module_path defining_module_path 195 (module_path = defining_module_path)); 196 if module_path = defining_module_path then ( 197 if Filename.check_suffix loc.file ".mli" then ( 198 counts := 199 { !counts with in_defining_mli = !counts.in_defining_mli + 1 }; 200 Log.debug (fun m -> m " -> Counted as in_defining_mli")) 201 else if Filename.check_suffix loc.file ".ml" then ( 202 counts := { !counts with in_defining_ml = !counts.in_defining_ml + 1 }; 203 Log.debug (fun m -> m " -> Counted as in_defining_ml"))) 204 else ( 205 counts := { !counts with external_uses = loc :: !counts.external_uses }; 206 Log.debug (fun m -> m " -> Counted as external use"))) 207 locations; 208 !counts 209 210(* Check if symbol appears to be a re-export *) 211let is_likely_reexport locations occurrence_count = 212 let mli_count = 213 List.fold_left 214 (fun acc (loc : location) -> 215 if Filename.check_suffix loc.file ".mli" then acc + 1 else acc) 216 0 locations 217 in 218 mli_count > 1 && occurrence_count <= mli_count + 1 219 220(* Check if all external uses are in excluded directories *) 221let all_external_uses_excluded exclude_dirs external_locs = 222 not 223 (List.exists 224 (fun (loc : location) -> 225 let is_excluded = is_excluded_file exclude_dirs loc.file in 226 Log.debug (fun m -> 227 m " Checking if %s is excluded: %b (exclude_dirs: %s)" loc.file 228 is_excluded 229 (String.concat ", " exclude_dirs)); 230 not is_excluded) 231 external_locs) 232 233(* Classify symbol with no external uses *) 234let classify_no_external_uses (sym : symbol_info) counts = 235 Log.debug (fun m -> 236 m " No external uses for %s, in_defining_mli=%d, in_defining_ml=%d" 237 sym.name counts.in_defining_mli counts.in_defining_ml); 238 if counts.in_defining_mli = 1 then ( 239 Log.debug (fun m -> m " -> Marking %s as Unused" sym.name); 240 Unused) 241 else ( 242 Log.debug (fun m -> m " -> Marking %s as Used" sym.name); 243 Used) 244 245(* Classify symbol with external uses *) 246let classify_with_external_uses exclude_dirs (sym : symbol_info) external_locs = 247 Log.debug (fun m -> 248 m " %s has %d external uses" sym.name (List.length external_locs)); 249 if all_external_uses_excluded exclude_dirs external_locs then ( 250 Log.debug (fun m -> 251 m 252 " -> All external uses are excluded, marking as \ 253 Used_only_in_excluded"); 254 Used_only_in_excluded) 255 else ( 256 Log.debug (fun m -> 257 m " -> Has non-excluded external uses, marking as Used"); 258 Used) 259 260(* Classify usage for types, values, and fields *) 261let classify_type_value_field exclude_dirs (sym : symbol_info) occurrence_count 262 locations = 263 Log.debug (fun m -> 264 m " Analyzing %d occurrences for %s %s" occurrence_count 265 (string_of_symbol_kind sym.kind) 266 sym.name); 267 268 let defining_module_path = module_path sym.location.file in 269 let counts = count_occurrences_by_location defining_module_path locations in 270 271 Log.debug (fun m -> 272 m " Symbol %s: mli_in_defining=%d, external=%d" sym.name 273 counts.in_defining_mli 274 (List.length counts.external_uses)); 275 276 (* Check for re-export pattern *) 277 if is_likely_reexport locations occurrence_count then ( 278 Log.debug (fun m -> 279 m 280 "Symbol %s appears in multiple .mli files with only %d occurrences, \ 281 likely a re-export" 282 sym.name occurrence_count); 283 Used) 284 else 285 (* Determine usage classification *) 286 match counts.external_uses with 287 | [] -> classify_no_external_uses sym counts 288 | external_locs -> 289 classify_with_external_uses exclude_dirs sym external_locs 290 291(* Classify how a symbol is used based on its occurrences *) 292let classify_usage exclude_dirs (symbol : symbol_info) occurrence_count 293 locations = 294 (* Special handling for different symbol kinds *) 295 match symbol.kind with 296 | Module | Constructor -> 297 (* Modules and constructors may not have all their uses tracked by merlin 298 E.g., when used in patterns or type annotations -1 means merlin check 299 was skipped, so we can't determine usage *) 300 if occurrence_count = -1 then Unknown 301 else if occurrence_count > 1 then Used 302 else Unused 303 | Type | Value | Field -> 304 (* For other symbols, analyze occurrence locations *) 305 classify_type_value_field exclude_dirs symbol occurrence_count locations 306 307(* Check a single symbol *) 308let check_single ~backend ~cache exclude_dirs root_dir (symbol : symbol_info) = 309 (* For modules and exceptions, skip merlin occurrences *) 310 (* Merlin's occurrence detection doesn't work reliably for: 311 - Exceptions: often returns 0 even when used 312 - Modules: need special handling for children 313 For now, mark as potentially unused and let build warnings decide *) 314 match symbol.kind with 315 | Module | Constructor -> handle_module_or_constructor symbol 316 | _ -> 317 (* For values and types, use merlin occurrences *) 318 let occurrence_count, locations = 319 query_merlin ~backend ~cache root_dir symbol 320 in 321 322 Log.debug (fun m -> 323 m "Extracted from merlin for %s: count=%d, locations=[%s]" symbol.name 324 occurrence_count 325 (locations 326 |> List.map (fun loc -> Fmt.str "%a" pp_location loc) 327 |> String.concat "; ")); 328 329 let usage_class = 330 classify_usage exclude_dirs symbol occurrence_count locations 331 in 332 333 Log.debug (fun m -> 334 m "Symbol %s: %d occurrences, usage=%a, locations=%s" symbol.name 335 occurrence_count pp_usage_classification usage_class 336 (String.concat ", " 337 (List.map (fun loc -> Fmt.str "%a" pp_location loc) locations))); 338 339 (* Debug: print occurrence mapping summary *) 340 Log.info (fun m -> 341 m "OCCURRENCE MAPPING: %s@%a -> %d occurrences" symbol.name 342 pp_location symbol.location occurrence_count); 343 344 { symbol; occurrences = occurrence_count; locations; usage_class } 345 346(* Check occurrences for a list of symbols using merlin *) 347let check_bulk ~backend ~cache exclude_dirs root_dir 348 (symbols : symbol_info list) = 349 let total = List.length symbols in 350 let processed = ref 0 in 351 352 if total > 0 then 353 Log.info (fun m -> m "Checking occurrences for %d symbols" total); 354 355 let progress = Progress.v ~total in 356 let results = 357 List.map 358 (fun (symbol : symbol_info) -> 359 incr processed; 360 let module_name = 361 let basename = Filename.basename symbol.location.file in 362 let name = 363 try Filename.chop_extension basename 364 with Invalid_argument _ -> basename 365 in 366 String.capitalize_ascii name 367 in 368 Progress.update progress ~current:!processed 369 (Fmt.str "Checking symbol: %s.%s" module_name symbol.name); 370 371 check_single ~backend ~cache exclude_dirs root_dir symbol) 372 symbols 373 in 374 Progress.clear progress; 375 results