Find and remove dead code and unused APIs in OCaml projects
at main 341 lines 12 kB view raw
1(* Symbol discovery and analysis orchestration for prune *) 2 3open Types 4module Log = (val Logs.src_log (Logs.Src.create "prune.analysis") : Logs.LOG) 5 6(* {2 Merlin type conversions} *) 7 8let convert_symbol_kind (kind : Merlin.symbol_kind) : symbol_kind option = 9 match kind with 10 | Value -> Some Value 11 | Type -> Some Type 12 | Module -> Some Module 13 | Constructor -> Some Constructor 14 | Exception -> Some Constructor 15 | Field -> Some Field 16 | Module_type -> Some Module 17 | Class | Class_type | Method | Label -> None 18 19let convert_location (loc : Merlin.location) : location = 20 Types.location ~line:loc.start.line ~end_line:loc.end_.line 21 ~start_col:loc.start.col ~end_col:loc.end_.col loc.file 22 23let rec convert_outline_item (item : Merlin.outline_item) : outline_item option 24 = 25 match convert_symbol_kind item.kind with 26 | None -> None 27 | Some kind -> 28 let location = convert_location item.location in 29 let children = 30 match item.kind with 31 | Module_type -> 32 (* Children of module types are part of the type signature, not 33 independently removable exports *) 34 None 35 | _ -> ( 36 match item.children with 37 | [] -> None 38 | items -> 39 let converted = List.filter_map convert_outline_item items in 40 if converted = [] then None else Some converted) 41 in 42 Some { kind; name = item.name; location; children } 43 44(* {2 Symbol extraction} *) 45 46(* Helper to create a symbol with its children *) 47let rec symbol_with_children ~cache item = 48 let main_symbol = 49 { name = item.name; kind = item.kind; location = item.location } 50 in 51 let child_symbols = 52 match item.children with 53 | None -> [] 54 | Some children -> List.concat_map (outline_item_to_symbol ~cache) children 55 in 56 main_symbol :: child_symbols 57 58(* Convert a single outline item to a symbol_info *) 59and outline_item_to_symbol ~cache (item : outline_item) = 60 match item.kind with 61 | Module -> ( 62 (* For modules, check if it's a module alias and skip if so *) 63 match Cache.load cache item.location.file with 64 | Error _ -> symbol_with_children ~cache item 65 | Ok () -> ( 66 match Cache.file_content cache item.location.file with 67 | None -> symbol_with_children ~cache item 68 | Some content -> 69 if 70 Module_alias.is_module_alias ~cache item.location.file item.kind 71 item.location content 72 then ( 73 Log.debug (fun m -> 74 m "Skipping module alias: %s at %a" item.name pp_location 75 item.location); 76 []) 77 else symbol_with_children ~cache item)) 78 | _ -> symbol_with_children ~cache item 79 80(* {2 Symbol discovery} *) 81 82(* Get all exported symbols from a single .mli file *) 83let file_symbols ~backend ~cache file_str = 84 let result = Merlin.outline backend ~file:file_str in 85 match result with 86 | Error e -> 87 Log.warn (fun m -> m "Merlin outline failed for %s: %s" file_str e); 88 [] 89 | Ok merlin_items -> 90 let outline_items = List.filter_map convert_outline_item merlin_items in 91 let symbols = 92 List.concat_map (outline_item_to_symbol ~cache) outline_items 93 in 94 (* Debug: print outline summary *) 95 Log.info (fun m -> 96 m "Outline summary for %s: found %d symbols" file_str 97 (List.length symbols)); 98 List.iteri 99 (fun i (symbol : symbol_info) -> 100 Log.debug (fun m -> 101 m " [%d] %s (%s) at %a" (i + 1) symbol.name 102 (string_of_symbol_kind symbol.kind) 103 pp_location symbol.location)) 104 symbols; 105 symbols 106 107(* {2 Main analysis orchestration} *) 108 109(* Filter symbols to only those we care about *) 110let filter_relevant_symbols all_symbols = 111 let relevant_symbols = 112 List.filter 113 (fun (s : symbol_info) -> 114 match s.kind with 115 | Value | Type | Constructor | Module -> true 116 | Field -> false) 117 all_symbols 118 in 119 if List.length relevant_symbols > 0 then 120 Log.info (fun m -> 121 m 122 "Filtering to %d relevant symbols (values, types, exceptions, \ 123 modules)" 124 (List.length relevant_symbols)); 125 relevant_symbols 126 127(* Group occurrence info by file, preserving usage classification *) 128let group_occurrences_by_file occurrence_infos = 129 let by_file = 130 List.fold_left 131 (fun acc occ -> 132 let file = occ.symbol.location.file in 133 let existing = try List.assoc file acc with Not_found -> [] in 134 (file, occ :: existing) :: List.remove_assoc file acc) 135 [] occurrence_infos 136 in 137 if List.length by_file > 0 then 138 Log.info (fun m -> m "Grouped into %d files" (List.length by_file)); 139 by_file 140 141(* Build a recursive check for modules with used children *) 142let rec has_used_children all_occurrence_data module_occ = 143 match module_occ.symbol.kind with 144 | Module -> 145 let module_start = module_occ.symbol.location.start_line in 146 let module_end = 147 match Some module_occ.symbol.location.end_line with 148 | Some el -> el 149 | None -> module_start 150 in 151 152 (* Find all symbols within this module's range *) 153 let children = 154 List.filter 155 (fun occ -> 156 occ.symbol.location.file = module_occ.symbol.location.file 157 && occ.symbol.location.start_line > module_start 158 && occ.symbol.location.start_line <= module_end 159 && occ.symbol.name <> module_occ.symbol.name) 160 all_occurrence_data 161 in 162 163 Log.debug (fun m -> 164 m "Module %s has %d children" module_occ.symbol.name 165 (List.length children)); 166 167 (* Check if any child is either: 1. A used symbol (occurrences > 0), or 2. 168 A module that has used children (recursive check) *) 169 List.exists 170 (fun child -> 171 if child.occurrences > 0 then ( 172 Log.debug (fun m -> 173 m " Child %s is used (%d occurrences)" child.symbol.name 174 child.occurrences); 175 true) 176 else has_used_children all_occurrence_data child) 177 children 178 | _ -> false 179 180(* Filter out modules that have any used children *) 181let filter_modules_with_used unused_symbols all_occurrence_data = 182 if List.length unused_symbols > 0 then 183 Log.info (fun m -> 184 m "Filtering modules with used children: %d unused symbols to check" 185 (List.length unused_symbols)); 186 187 (* Filter out modules that have used children *) 188 List.filter 189 (fun occ -> 190 match occ.symbol.kind with 191 | Module -> 192 let should_keep = not (has_used_children all_occurrence_data occ) in 193 Log.debug (fun m -> 194 m "Module %s: has_used_children=%b, keeping=%b" occ.symbol.name 195 (not should_keep) should_keep); 196 should_keep 197 | _ -> true (* Keep all non-module symbols in the unused list *)) 198 unused_symbols 199 200(* Common function to get symbols and their occurrences *) 201let symbols_and_occurrences ~cache exclude_dirs root_dir files = 202 if List.length files > 0 then 203 Log.info (fun m -> m "Analyzing %d files for symbols" (List.length files)); 204 205 (* Create Merlin backend once for all queries *) 206 let backend = Merlin.v ~backend:Lib ~root_dir () in 207 208 (* Get exported symbols from all files with progress *) 209 let total = List.length files in 210 let processed = ref 0 in 211 let root_path = Fpath.v root_dir in 212 let progress = Progress.v ~total in 213 214 let all_symbols = 215 List.fold_left 216 (fun acc file -> 217 incr processed; 218 let display_path = 219 match Fpath.relativize ~root:root_path (Fpath.v file) with 220 | Some rel -> Fpath.to_string rel 221 | None -> file 222 in 223 Progress.update progress ~current:!processed 224 (Fmt.str "Processing file: %s" display_path); 225 226 let symbols = file_symbols ~backend ~cache file in 227 symbols @ acc) 228 [] files 229 in 230 Progress.clear progress; 231 232 if List.length all_symbols > 0 then 233 Log.info (fun m -> 234 m "Found %d total exported symbols" (List.length all_symbols)); 235 236 let relevant_symbols = filter_relevant_symbols all_symbols in 237 let occurrence_data = 238 Occurrence.check_bulk ~backend ~cache exclude_dirs root_dir relevant_symbols 239 in 240 Merlin.close backend; 241 (all_symbols, occurrence_data) 242 243(* Analyze symbols from files and find unused ones *) 244(* Find symbols that appear in multiple .mli files *) 245let multi_mli_symbols occurrence_data = 246 let mli_symbols = 247 List.filter 248 (fun sym -> Filename.check_suffix sym.symbol.location.file ".mli") 249 occurrence_data 250 in 251 let name_to_files = Hashtbl.create 10 in 252 List.iter 253 (fun occ -> 254 let files = 255 try Hashtbl.find name_to_files occ.symbol.name with Not_found -> [] 256 in 257 if not (List.mem occ.symbol.location.file files) then 258 Hashtbl.replace name_to_files occ.symbol.name 259 (occ.symbol.location.file :: files)) 260 mli_symbols; 261 262 Hashtbl.fold 263 (fun name files acc -> if List.length files > 1 then name :: acc else acc) 264 name_to_files [] 265 266(* Fix symbols that appear in multiple .mli files by marking them as Used *) 267let fix_multi_mli_symbols occurrence_data multi_mli_names = 268 if multi_mli_names <> [] then ( 269 Log.info (fun m -> 270 m "Found symbols in multiple .mli files: %s" 271 (String.concat ", " multi_mli_names)); 272 List.map 273 (fun occ -> 274 if List.mem occ.symbol.name multi_mli_names && occ.usage_class = Unused 275 then { occ with usage_class = Used } 276 else occ) 277 occurrence_data) 278 else occurrence_data 279 280(* Filter occurrence data to get unused and excluded-only symbols *) 281let categorize_symbols occurrence_data = 282 let unused = 283 List.filter 284 (fun occ -> 285 match occ.usage_class with 286 | Unused -> true 287 | Unknown | Used | Used_only_in_excluded -> false) 288 occurrence_data 289 in 290 let excluded_only = 291 List.filter 292 (fun occ -> 293 match occ.usage_class with Used_only_in_excluded -> true | _ -> false) 294 occurrence_data 295 in 296 (unused, excluded_only) 297 298let analyze_files_for_unused ~cache exclude_dirs root_dir files = 299 let _all_symbols, occurrence_data = 300 symbols_and_occurrences ~cache exclude_dirs root_dir files 301 in 302 303 (* Post-process: if a symbol name appears in multiple .mli files, mark all as 304 Used. This handles both re-exports and symbols accessible through module 305 aliases. *) 306 let multi_mli_names = multi_mli_symbols occurrence_data in 307 308 let occurrence_data_fixed = 309 fix_multi_mli_symbols occurrence_data multi_mli_names 310 in 311 312 let unused, excluded_only = categorize_symbols occurrence_data_fixed in 313 314 (* Filter out modules that have used children *) 315 let filtered_unused = filter_modules_with_used unused occurrence_data_fixed in 316 317 Log.info (fun m -> m "Found %d unused exports" (List.length filtered_unused)); 318 if List.length excluded_only > 0 then 319 Log.info (fun m -> 320 m "Found %d exports used only in excluded dirs" 321 (List.length excluded_only)); 322 323 ( group_occurrences_by_file filtered_unused, 324 group_occurrences_by_file excluded_only ) 325 326let all_symbol_occurrences ~cache ?(exclude_dirs = []) root_dir files = 327 match System.validate_dune_project root_dir with 328 | Error (`Msg e) -> Error (`Msg e) 329 | Ok () -> 330 Log.info (fun m -> m "Getting all symbol occurrences"); 331 let _all_symbols, occurrence_data = 332 symbols_and_occurrences ~cache exclude_dirs root_dir files 333 in 334 Ok occurrence_data 335 336let unused_exports ~cache ?(exclude_dirs = []) root_dir files = 337 match System.validate_dune_project root_dir with 338 | Error (`Msg e) -> Error (`Msg e) 339 | Ok () -> 340 Log.info (fun m -> m "Starting analysis"); 341 Ok (analyze_files_for_unused ~cache exclude_dirs root_dir files)