···11+(* Roundtrip test: Parse -> Serialize -> Re-parse -> Validate
22+33+ This test validates that the HTML5 serializer produces valid HTML5
44+ by roundtripping the validator test suite files through:
55+ 1. Parse with HTML5 parser
66+ 2. Serialize DOM back to HTML
77+ 3. Re-parse the serialized HTML
88+ 4. Validate the result
99+1010+ For "isvalid" tests: the roundtripped document should still be valid
1111+ For "novalid/haswarn" tests: we just verify the roundtrip works without crashes
1212+*)
1313+1414+module Report = Test_report
1515+1616+(* Test result type *)
1717+type test_result = {
1818+ filename : string;
1919+ test_type : string; (* isvalid, novalid, haswarn *)
2020+ original_valid : bool; (* Did original pass validation? *)
2121+ roundtrip_valid : bool; (* Did roundtripped doc pass validation? *)
2222+ roundtrip_ok : bool; (* Did roundtrip work without errors? *)
2323+ original_errors : int;
2424+ roundtrip_errors : int;
2525+ parse_error : string option;
2626+}
2727+2828+(* Get test type from filename *)
2929+let get_test_type filename =
3030+ if Astring.String.is_suffix ~affix:"-isvalid.html" filename ||
3131+ Astring.String.is_suffix ~affix:"-isvalid.xhtml" filename then "isvalid"
3232+ else if Astring.String.is_suffix ~affix:"-novalid.html" filename ||
3333+ Astring.String.is_suffix ~affix:"-novalid.xhtml" filename then "novalid"
3434+ else if Astring.String.is_suffix ~affix:"-haswarn.html" filename ||
3535+ Astring.String.is_suffix ~affix:"-haswarn.xhtml" filename then "haswarn"
3636+ else "unknown"
3737+3838+(* Count errors in validation result *)
3939+let count_errors messages =
4040+ List.length (List.filter (fun (m : Html5_checker.Message.t) ->
4141+ m.severity = Html5_checker.Message.Error
4242+ ) messages)
4343+4444+(* Serialize a document to HTML string *)
4545+let serialize_document doc =
4646+ Html5rw.Dom.to_html ~pretty:false doc
4747+4848+(* Run roundtrip test on a single file *)
4949+let test_file path =
5050+ let filename = Filename.basename path in
5151+ let test_type = get_test_type filename in
5252+5353+ try
5454+ (* Read file content *)
5555+ let content =
5656+ let ic = open_in path in
5757+ let n = in_channel_length ic in
5858+ let s = really_input_string ic n in
5959+ close_in ic;
6060+ s
6161+ in
6262+6363+ (* Parse original *)
6464+ let original_result = Html5rw.parse_bytes (Bytes.of_string content) in
6565+ let original_doc = Html5rw.root original_result in
6666+6767+ (* Validate original *)
6868+ let checker_result = Html5_checker.check_dom ~system_id:path original_result in
6969+ let original_messages = Html5_checker.messages checker_result in
7070+ let original_errors = count_errors original_messages in
7171+ let original_valid = original_errors = 0 in
7272+7373+ (* Serialize to HTML *)
7474+ let serialized = serialize_document original_doc in
7575+7676+ (* Re-parse serialized HTML *)
7777+ let roundtrip_result = Html5rw.parse_bytes (Bytes.of_string serialized) in
7878+7979+ (* Validate roundtripped document *)
8080+ let roundtrip_checker = Html5_checker.check_dom ~system_id:path roundtrip_result in
8181+ let roundtrip_messages = Html5_checker.messages roundtrip_checker in
8282+ let roundtrip_errors = count_errors roundtrip_messages in
8383+ let roundtrip_valid = roundtrip_errors = 0 in
8484+8585+ {
8686+ filename;
8787+ test_type;
8888+ original_valid;
8989+ roundtrip_valid;
9090+ roundtrip_ok = true;
9191+ original_errors;
9292+ roundtrip_errors;
9393+ parse_error = None;
9494+ }
9595+ with e ->
9696+ {
9797+ filename;
9898+ test_type;
9999+ original_valid = false;
100100+ roundtrip_valid = false;
101101+ roundtrip_ok = false;
102102+ original_errors = 0;
103103+ roundtrip_errors = 0;
104104+ parse_error = Some (Printexc.to_string e);
105105+ }
106106+107107+(* Recursively find all test files *)
108108+let rec find_test_files dir =
109109+ let files = Sys.readdir dir |> Array.to_list in
110110+ List.concat_map (fun f ->
111111+ let path = Filename.concat dir f in
112112+ if Sys.is_directory path then
113113+ find_test_files path
114114+ else if Astring.String.is_suffix ~affix:"-isvalid.html" f ||
115115+ Astring.String.is_suffix ~affix:"-novalid.html" f ||
116116+ Astring.String.is_suffix ~affix:"-haswarn.html" f then
117117+ [path]
118118+ else
119119+ []
120120+ ) files
121121+122122+let () =
123123+ let test_dir = Sys.argv.(1) in
124124+125125+ Printf.printf "Discovering test files...\n%!";
126126+ let test_files = find_test_files test_dir in
127127+ Printf.printf "Found %d test files\n%!" (List.length test_files);
128128+129129+ Printf.printf "Running roundtrip tests...\n%!";
130130+131131+ (* Run tests *)
132132+ let results = List.map test_file test_files in
133133+134134+ (* Categorize results *)
135135+ let isvalid_tests = List.filter (fun r -> r.test_type = "isvalid") results in
136136+ let novalid_tests = List.filter (fun r -> r.test_type = "novalid") results in
137137+ let haswarn_tests = List.filter (fun r -> r.test_type = "haswarn") results in
138138+139139+ (* For isvalid tests: check that roundtripped document is still valid *)
140140+ let isvalid_passed = List.filter (fun r ->
141141+ r.roundtrip_ok && r.roundtrip_valid
142142+ ) isvalid_tests in
143143+144144+ (* For novalid/haswarn tests: just check roundtrip works *)
145145+ let novalid_passed = List.filter (fun r -> r.roundtrip_ok) novalid_tests in
146146+ let haswarn_passed = List.filter (fun r -> r.roundtrip_ok) haswarn_tests in
147147+148148+ (* Print failures for isvalid tests *)
149149+ let isvalid_failed = List.filter (fun r ->
150150+ not r.roundtrip_ok || not r.roundtrip_valid
151151+ ) isvalid_tests in
152152+153153+ if List.length isvalid_failed > 0 then begin
154154+ Printf.printf "\n=== Failing isvalid roundtrip tests (first 20) ===\n";
155155+ List.iteri (fun i r ->
156156+ if i < 20 then begin
157157+ match r.parse_error with
158158+ | Some err -> Printf.printf "%s: %s\n" r.filename err
159159+ | None ->
160160+ Printf.printf "%s: original_valid=%b, roundtrip_valid=%b (errors: %d -> %d)\n"
161161+ r.filename r.original_valid r.roundtrip_valid
162162+ r.original_errors r.roundtrip_errors
163163+ end
164164+ ) isvalid_failed
165165+ end;
166166+167167+ (* Print roundtrip failures for all tests *)
168168+ let roundtrip_failures = List.filter (fun r -> not r.roundtrip_ok) results in
169169+ if List.length roundtrip_failures > 0 then begin
170170+ Printf.printf "\n=== Roundtrip failures (first 20) ===\n";
171171+ List.iteri (fun i r ->
172172+ if i < 20 then
173173+ Printf.printf "%s: %s\n" r.filename
174174+ (Option.value ~default:"unknown error" r.parse_error)
175175+ ) roundtrip_failures
176176+ end;
177177+178178+ (* Summary *)
179179+ Printf.printf "\n=== Roundtrip Test Results ===\n";
180180+ Printf.printf "isvalid tests: %d/%d passed (roundtripped and still valid)\n"
181181+ (List.length isvalid_passed) (List.length isvalid_tests);
182182+ Printf.printf "novalid tests: %d/%d roundtripped successfully\n"
183183+ (List.length novalid_passed) (List.length novalid_tests);
184184+ Printf.printf "haswarn tests: %d/%d roundtripped successfully\n"
185185+ (List.length haswarn_passed) (List.length haswarn_tests);
186186+187187+ let total_roundtrip_ok = List.length (List.filter (fun r -> r.roundtrip_ok) results) in
188188+ Printf.printf "\nTotal: %d/%d roundtripped without errors\n"
189189+ total_roundtrip_ok (List.length results);
190190+ Printf.printf "isvalid preservation: %d/%d still valid after roundtrip\n"
191191+ (List.length isvalid_passed) (List.length isvalid_tests);
192192+193193+ (* Exit with error if isvalid tests fail validation after roundtrip *)
194194+ let exit_code = if List.length isvalid_failed > 0 then 1 else 0 in
195195+ exit exit_code