···1+(* Roundtrip test: Parse -> Serialize -> Re-parse -> Validate
2+3+ This test validates that the HTML5 serializer produces valid HTML5
4+ by roundtripping the validator test suite files through:
5+ 1. Parse with HTML5 parser
6+ 2. Serialize DOM back to HTML
7+ 3. Re-parse the serialized HTML
8+ 4. Validate the result
9+10+ For "isvalid" tests: the roundtripped document should still be valid
11+ For "novalid/haswarn" tests: we just verify the roundtrip works without crashes
12+*)
13+14+module Report = Test_report
15+16+(* Test result type *)
17+type test_result = {
18+ filename : string;
19+ test_type : string; (* isvalid, novalid, haswarn *)
20+ original_valid : bool; (* Did original pass validation? *)
21+ roundtrip_valid : bool; (* Did roundtripped doc pass validation? *)
22+ roundtrip_ok : bool; (* Did roundtrip work without errors? *)
23+ original_errors : int;
24+ roundtrip_errors : int;
25+ parse_error : string option;
26+}
27+28+(* Get test type from filename *)
29+let get_test_type filename =
30+ if Astring.String.is_suffix ~affix:"-isvalid.html" filename ||
31+ Astring.String.is_suffix ~affix:"-isvalid.xhtml" filename then "isvalid"
32+ else if Astring.String.is_suffix ~affix:"-novalid.html" filename ||
33+ Astring.String.is_suffix ~affix:"-novalid.xhtml" filename then "novalid"
34+ else if Astring.String.is_suffix ~affix:"-haswarn.html" filename ||
35+ Astring.String.is_suffix ~affix:"-haswarn.xhtml" filename then "haswarn"
36+ else "unknown"
37+38+(* Count errors in validation result *)
39+let count_errors messages =
40+ List.length (List.filter (fun (m : Html5_checker.Message.t) ->
41+ m.severity = Html5_checker.Message.Error
42+ ) messages)
43+44+(* Serialize a document to HTML string *)
45+let serialize_document doc =
46+ Html5rw.Dom.to_html ~pretty:false doc
47+48+(* Run roundtrip test on a single file *)
49+let test_file path =
50+ let filename = Filename.basename path in
51+ let test_type = get_test_type filename in
52+53+ try
54+ (* Read file content *)
55+ let content =
56+ let ic = open_in path in
57+ let n = in_channel_length ic in
58+ let s = really_input_string ic n in
59+ close_in ic;
60+ s
61+ in
62+63+ (* Parse original *)
64+ let original_result = Html5rw.parse_bytes (Bytes.of_string content) in
65+ let original_doc = Html5rw.root original_result in
66+67+ (* Validate original *)
68+ let checker_result = Html5_checker.check_dom ~system_id:path original_result in
69+ let original_messages = Html5_checker.messages checker_result in
70+ let original_errors = count_errors original_messages in
71+ let original_valid = original_errors = 0 in
72+73+ (* Serialize to HTML *)
74+ let serialized = serialize_document original_doc in
75+76+ (* Re-parse serialized HTML *)
77+ let roundtrip_result = Html5rw.parse_bytes (Bytes.of_string serialized) in
78+79+ (* Validate roundtripped document *)
80+ let roundtrip_checker = Html5_checker.check_dom ~system_id:path roundtrip_result in
81+ let roundtrip_messages = Html5_checker.messages roundtrip_checker in
82+ let roundtrip_errors = count_errors roundtrip_messages in
83+ let roundtrip_valid = roundtrip_errors = 0 in
84+85+ {
86+ filename;
87+ test_type;
88+ original_valid;
89+ roundtrip_valid;
90+ roundtrip_ok = true;
91+ original_errors;
92+ roundtrip_errors;
93+ parse_error = None;
94+ }
95+ with e ->
96+ {
97+ filename;
98+ test_type;
99+ original_valid = false;
100+ roundtrip_valid = false;
101+ roundtrip_ok = false;
102+ original_errors = 0;
103+ roundtrip_errors = 0;
104+ parse_error = Some (Printexc.to_string e);
105+ }
106+107+(* Recursively find all test files *)
108+let rec find_test_files dir =
109+ let files = Sys.readdir dir |> Array.to_list in
110+ List.concat_map (fun f ->
111+ let path = Filename.concat dir f in
112+ if Sys.is_directory path then
113+ find_test_files path
114+ else if Astring.String.is_suffix ~affix:"-isvalid.html" f ||
115+ Astring.String.is_suffix ~affix:"-novalid.html" f ||
116+ Astring.String.is_suffix ~affix:"-haswarn.html" f then
117+ [path]
118+ else
119+ []
120+ ) files
121+122+let () =
123+ let test_dir = Sys.argv.(1) in
124+125+ Printf.printf "Discovering test files...\n%!";
126+ let test_files = find_test_files test_dir in
127+ Printf.printf "Found %d test files\n%!" (List.length test_files);
128+129+ Printf.printf "Running roundtrip tests...\n%!";
130+131+ (* Run tests *)
132+ let results = List.map test_file test_files in
133+134+ (* Categorize results *)
135+ let isvalid_tests = List.filter (fun r -> r.test_type = "isvalid") results in
136+ let novalid_tests = List.filter (fun r -> r.test_type = "novalid") results in
137+ let haswarn_tests = List.filter (fun r -> r.test_type = "haswarn") results in
138+139+ (* For isvalid tests: check that roundtripped document is still valid *)
140+ let isvalid_passed = List.filter (fun r ->
141+ r.roundtrip_ok && r.roundtrip_valid
142+ ) isvalid_tests in
143+144+ (* For novalid/haswarn tests: just check roundtrip works *)
145+ let novalid_passed = List.filter (fun r -> r.roundtrip_ok) novalid_tests in
146+ let haswarn_passed = List.filter (fun r -> r.roundtrip_ok) haswarn_tests in
147+148+ (* Print failures for isvalid tests *)
149+ let isvalid_failed = List.filter (fun r ->
150+ not r.roundtrip_ok || not r.roundtrip_valid
151+ ) isvalid_tests in
152+153+ if List.length isvalid_failed > 0 then begin
154+ Printf.printf "\n=== Failing isvalid roundtrip tests (first 20) ===\n";
155+ List.iteri (fun i r ->
156+ if i < 20 then begin
157+ match r.parse_error with
158+ | Some err -> Printf.printf "%s: %s\n" r.filename err
159+ | None ->
160+ Printf.printf "%s: original_valid=%b, roundtrip_valid=%b (errors: %d -> %d)\n"
161+ r.filename r.original_valid r.roundtrip_valid
162+ r.original_errors r.roundtrip_errors
163+ end
164+ ) isvalid_failed
165+ end;
166+167+ (* Print roundtrip failures for all tests *)
168+ let roundtrip_failures = List.filter (fun r -> not r.roundtrip_ok) results in
169+ if List.length roundtrip_failures > 0 then begin
170+ Printf.printf "\n=== Roundtrip failures (first 20) ===\n";
171+ List.iteri (fun i r ->
172+ if i < 20 then
173+ Printf.printf "%s: %s\n" r.filename
174+ (Option.value ~default:"unknown error" r.parse_error)
175+ ) roundtrip_failures
176+ end;
177+178+ (* Summary *)
179+ Printf.printf "\n=== Roundtrip Test Results ===\n";
180+ Printf.printf "isvalid tests: %d/%d passed (roundtripped and still valid)\n"
181+ (List.length isvalid_passed) (List.length isvalid_tests);
182+ Printf.printf "novalid tests: %d/%d roundtripped successfully\n"
183+ (List.length novalid_passed) (List.length novalid_tests);
184+ Printf.printf "haswarn tests: %d/%d roundtripped successfully\n"
185+ (List.length haswarn_passed) (List.length haswarn_tests);
186+187+ let total_roundtrip_ok = List.length (List.filter (fun r -> r.roundtrip_ok) results) in
188+ Printf.printf "\nTotal: %d/%d roundtripped without errors\n"
189+ total_roundtrip_ok (List.length results);
190+ Printf.printf "isvalid preservation: %d/%d still valid after roundtrip\n"
191+ (List.length isvalid_passed) (List.length isvalid_tests);
192+193+ (* Exit with error if isvalid tests fail validation after roundtrip *)
194+ let exit_code = if List.length isvalid_failed > 0 then 1 else 0 in
195+ exit exit_code