···13 mutable char_count : int;
14}
1516-let max_chars = 30720
17let min_chars = 1024
1819(* Elements whose text content we skip for language detection - O(1) lookup *)
20let skip_elements =
21 Attr_utils.hashtbl_of_list [
22- "a"; "button"; "details"; "figcaption"; "form"; "li"; "nav";
23- "pre"; "script"; "select"; "span"; "style"; "summary";
24- "td"; "textarea"; "th"; "tr"
25 ]
2627let is_skip_element name = Hashtbl.mem skip_elements name
···13 mutable char_count : int;
14}
1516+let max_chars = 8192 (* Reduced from 30720 to avoid slow language detection *)
17let min_chars = 1024
1819(* Elements whose text content we skip for language detection - O(1) lookup *)
20let skip_elements =
21 Attr_utils.hashtbl_of_list [
22+ "a"; "button"; "code"; "details"; "figcaption"; "form"; "kbd"; "li"; "nav";
23+ "pre"; "samp"; "script"; "select"; "span"; "style"; "summary";
24+ "td"; "textarea"; "th"; "tr"; "var"; "xmp"
25 ]
2627let is_skip_element name = Hashtbl.mem skip_elements name
+20-6
lib/check/specialized/normalization_checker.ml
···23 Validates that text content is in Unicode Normalization Form C (NFC). *)
45-type state = unit [@@warning "-34"]
0067-let create () = ()
8-let reset _state = ()
0000910(** Normalize a string to NFC form using uunf. *)
11let normalize_nfc text =
···40 if end_pos = len then s
41 else String.sub s 0 end_pos
4243-let start_element _state ~element:_ _collector = ()
0004445-let end_element _state ~tag:_ _collector = ()
0004647-let characters _state text collector =
0048 (* Skip empty text or whitespace-only text *)
49 let text_trimmed = String.trim text in
50 if String.length text_trimmed = 0 then ()
···23 Validates that text content is in Unicode Normalization Form C (NFC). *)
45+type state = {
6+ mutable in_raw_text : int; (** Depth inside style/script elements *)
7+}
89+let create () = { in_raw_text = 0 }
10+let reset state = state.in_raw_text <- 0
11+12+(** Elements whose text content is raw text and should be skipped *)
13+let is_raw_text_element name =
14+ name = "style" || name = "script" || name = "xmp" || name = "textarea"
1516(** Normalize a string to NFC form using uunf. *)
17let normalize_nfc text =
···46 if end_pos = len then s
47 else String.sub s 0 end_pos
4849+let start_element state ~element _collector =
50+ let name = Tag.tag_to_string element.Element.tag in
51+ if is_raw_text_element name then
52+ state.in_raw_text <- state.in_raw_text + 1
5354+let end_element state ~tag _collector =
55+ let name = Tag.tag_to_string tag in
56+ if is_raw_text_element name && state.in_raw_text > 0 then
57+ state.in_raw_text <- state.in_raw_text - 1
5859+let characters state text collector =
60+ (* Skip text inside raw text elements like style/script *)
61+ if state.in_raw_text > 0 then () else
62 (* Skip empty text or whitespace-only text *)
63 let text_trimmed = String.trim text in
64 if String.length text_trimmed = 0 then ()
+6-1
test/test_roundtrip.ml
···129 Printf.printf "Running roundtrip tests...\n%!";
130131 (* Run tests *)
132- let results = List.map test_file test_files in
00000133134 (* Categorize results *)
135 let isvalid_tests = List.filter (fun r -> r.test_type = "isvalid") results in
···129 Printf.printf "Running roundtrip tests...\n%!";
130131 (* Run tests *)
132+ let total = List.length test_files in
133+ let results = List.mapi (fun i path ->
134+ Printf.printf "\r[%d/%d] %s%!" (i + 1) total (Filename.basename path);
135+ test_file path
136+ ) test_files in
137+ Printf.printf "\n%!";
138139 (* Categorize results *)
140 let isvalid_tests = List.filter (fun r -> r.test_type = "isvalid") results in
+6-1
test/test_validator.ml
···426 Printf.printf "Found %d test files\n%!" (List.length tests);
427428 Printf.printf "Running tests...\n%!";
429- let results = List.map (run_test messages) tests in
00000430431 (* Print failing isvalid tests *)
432 let failing_isvalid = List.filter (fun r ->
···426 Printf.printf "Found %d test files\n%!" (List.length tests);
427428 Printf.printf "Running tests...\n%!";
429+ let total = List.length tests in
430+ let results = List.mapi (fun i test ->
431+ Printf.printf "\r[%d/%d] %s%!" (i + 1) total test.relative_path;
432+ run_test messages test
433+ ) tests in
434+ Printf.printf "\n%!";
435436 (* Print failing isvalid tests *)
437 let failing_isvalid = List.filter (fun r ->