···11-(** Tests for the html5_checker library *)
22-33-(** Helper to create a reader from a string *)
44-let reader_of_string s = Bytesrw.Bytes.Reader.of_string s
55-66-(** Helper to check if a message contains a substring *)
77-let message_contains msg substring =
88- String.lowercase_ascii msg.Htmlrw_check.text
99- |> fun s -> String.length s >= String.length substring &&
1010- try
1111- ignore (Str.search_forward (Str.regexp_case_fold (Str.quote substring)) s 0);
1212- true
1313- with Not_found -> false
1414-1515-(** Test that valid HTML5 produces no errors *)
1616-let test_valid_html5 () =
1717- Printf.printf "Test 1: Valid HTML5 document\n";
1818- let html = {|<!DOCTYPE html>
1919-<html lang="en">
2020-<head><title>Test</title></head>
2121-<body><p>Hello world</p></body>
2222-</html>|} in
2323- let reader = reader_of_string html in
2424- let result = Htmlrw_check.check reader in
2525- let errors = Htmlrw_check.errors result in
2626- Printf.printf " Found %d error(s)\n" (List.length errors);
2727- if List.length errors > 0 then begin
2828- List.iter (fun msg ->
2929- Printf.printf " - %s\n" msg.Htmlrw_check.text
3030- ) errors;
3131- end else
3232- Printf.printf " OK: No errors as expected\n"
3333-3434-(** Test that missing DOCTYPE is detected *)
3535-let test_missing_doctype () =
3636- Printf.printf "\nTest 2: Missing DOCTYPE\n";
3737- let html = "<html><body>Hello</body></html>" in
3838- let reader = reader_of_string html in
3939- let result = Htmlrw_check.check reader in
4040- let errors = Htmlrw_check.errors result in
4141- Printf.printf " Found %d error(s)\n" (List.length errors);
4242- if List.length errors = 0 then
4343- Printf.printf " Warning: Expected parse errors for missing DOCTYPE\n"
4444- else begin
4545- List.iter (fun msg ->
4646- Printf.printf " - %s\n" msg.Htmlrw_check.text
4747- ) errors;
4848- end
4949-5050-(** Test that obsolete elements are detected *)
5151-let test_obsolete_element () =
5252- Printf.printf "\nTest 3: Obsolete <center> element\n";
5353- let html = "<!DOCTYPE html><html><body><center>Centered</center></body></html>" in
5454- let reader = reader_of_string html in
5555- let result = Htmlrw_check.check reader in
5656- let all_msgs = Htmlrw_check.messages result in
5757- Printf.printf " Found %d message(s)\n" (List.length all_msgs);
5858- let obsolete_msgs = List.filter (fun m ->
5959- message_contains m "obsolete" || message_contains m "center"
6060- ) all_msgs in
6161- if List.length obsolete_msgs > 0 then begin
6262- Printf.printf " Found obsolete-related messages:\n";
6363- List.iter (fun msg ->
6464- Printf.printf " - %s\n" msg.Htmlrw_check.text
6565- ) obsolete_msgs;
6666- end else
6767- Printf.printf " Note: No obsolete element warnings found (checker may not be enabled)\n"
6868-6969-(** Test duplicate IDs *)
7070-let test_duplicate_id () =
7171- Printf.printf "\nTest 4: Duplicate ID attributes\n";
7272- let html = {|<!DOCTYPE html><html><body>
7373- <div id="foo">First</div>
7474- <div id="foo">Second</div>
7575- </body></html>|} in
7676- let reader = reader_of_string html in
7777- let result = Htmlrw_check.check reader in
7878- let all_msgs = Htmlrw_check.messages result in
7979- Printf.printf " Found %d message(s)\n" (List.length all_msgs);
8080- let id_msgs = List.filter (fun m ->
8181- message_contains m "duplicate" || message_contains m "id"
8282- ) all_msgs in
8383- if List.length id_msgs > 0 then begin
8484- Printf.printf " Found ID-related messages:\n";
8585- List.iter (fun msg ->
8686- Printf.printf " - %s\n" msg.Htmlrw_check.text
8787- ) id_msgs;
8888- end else
8989- Printf.printf " Note: No duplicate ID errors found (checker may not be enabled)\n"
9090-9191-(** Test heading structure *)
9292-let test_heading_skip () =
9393- Printf.printf "\nTest 5: Skipped heading level\n";
9494- let html = {|<!DOCTYPE html><html><body>
9595- <h1>Title</h1>
9696- <h3>Skipped h2</h3>
9797- </body></html>|} in
9898- let reader = reader_of_string html in
9999- let result = Htmlrw_check.check reader in
100100- let all_msgs = Htmlrw_check.messages result in
101101- Printf.printf " Found %d message(s)\n" (List.length all_msgs);
102102- let heading_msgs = List.filter (fun m ->
103103- message_contains m "heading" || message_contains m "skip"
104104- ) all_msgs in
105105- if List.length heading_msgs > 0 then begin
106106- Printf.printf " Found heading-related messages:\n";
107107- List.iter (fun msg ->
108108- Printf.printf " - %s\n" msg.Htmlrw_check.text
109109- ) heading_msgs;
110110- end else
111111- Printf.printf " Note: No heading structure warnings found (checker may not be enabled)\n"
112112-113113-(** Test img without alt *)
114114-let test_img_without_alt () =
115115- Printf.printf "\nTest 6: Image without alt attribute\n";
116116- let html = {|<!DOCTYPE html><html><body>
117117- <img src="test.jpg">
118118- </body></html>|} in
119119- let reader = reader_of_string html in
120120- let result = Htmlrw_check.check reader in
121121- let all_msgs = Htmlrw_check.messages result in
122122- Printf.printf " Found %d message(s)\n" (List.length all_msgs);
123123- let img_msgs = List.filter (fun m ->
124124- message_contains m "alt" || (message_contains m "img" && message_contains m "attribute")
125125- ) all_msgs in
126126- if List.length img_msgs > 0 then begin
127127- Printf.printf " Found img/alt-related messages:\n";
128128- List.iter (fun msg ->
129129- Printf.printf " - %s\n" msg.Htmlrw_check.text
130130- ) img_msgs;
131131- end else
132132- Printf.printf " Note: No missing alt attribute errors found (checker may not be enabled)\n"
133133-134134-(** Test invalid nesting *)
135135-let test_invalid_nesting () =
136136- Printf.printf "\nTest 7: Invalid nesting - <a> inside <a>\n";
137137- let html = {|<!DOCTYPE html><html><body>
138138- <a href="#">Link <a href="#">Nested</a></a>
139139- </body></html>|} in
140140- let reader = reader_of_string html in
141141- let result = Htmlrw_check.check reader in
142142- let all_msgs = Htmlrw_check.messages result in
143143- Printf.printf " Found %d message(s)\n" (List.length all_msgs);
144144- let nesting_msgs = List.filter (fun m ->
145145- message_contains m "nesting" || message_contains m "nested" || message_contains m "ancestor"
146146- ) all_msgs in
147147- if List.length nesting_msgs > 0 then begin
148148- Printf.printf " Found nesting-related messages:\n";
149149- List.iter (fun msg ->
150150- Printf.printf " - %s\n" msg.Htmlrw_check.text
151151- ) nesting_msgs;
152152- end else
153153- Printf.printf " Note: No nesting errors found (checker may not be enabled)\n"
154154-155155-(** Test form inside form *)
156156-let test_form_nesting () =
157157- Printf.printf "\nTest 8: Invalid nesting - <form> inside <form>\n";
158158- let html = {|<!DOCTYPE html><html><body>
159159- <form><form></form></form>
160160- </body></html>|} in
161161- let reader = reader_of_string html in
162162- let result = Htmlrw_check.check reader in
163163- let all_msgs = Htmlrw_check.messages result in
164164- Printf.printf " Found %d message(s)\n" (List.length all_msgs);
165165- let form_msgs = List.filter (fun m ->
166166- message_contains m "form"
167167- ) all_msgs in
168168- if List.length form_msgs > 0 then begin
169169- Printf.printf " Found form-related messages:\n";
170170- List.iter (fun msg ->
171171- Printf.printf " - %s\n" msg.Htmlrw_check.text
172172- ) form_msgs;
173173- end else
174174- Printf.printf " Note: No form nesting errors found (checker may not be enabled)\n"
175175-176176-(** Test output formatting *)
177177-let test_output_formats () =
178178- Printf.printf "\nTest 9: Output format testing\n";
179179- let html = {|<!DOCTYPE html><html><body><p>Test</p></body></html>|} in
180180- let reader = reader_of_string html in
181181- let result = Htmlrw_check.check reader in
182182-183183- Printf.printf " Testing text format:\n";
184184- let text_output = Htmlrw_check.to_text result in
185185- Printf.printf " Length: %d chars\n" (String.length text_output);
186186-187187- Printf.printf " Testing JSON format:\n";
188188- let json_output = Htmlrw_check.to_json result in
189189- Printf.printf " Length: %d chars\n" (String.length json_output);
190190-191191- Printf.printf " Testing GNU format:\n";
192192- let gnu_output = Htmlrw_check.to_gnu result in
193193- Printf.printf " Length: %d chars\n" (String.length gnu_output)
194194-195195-(** Test has_errors function *)
196196-let test_has_errors () =
197197- Printf.printf "\nTest 10: has_errors function\n";
198198-199199- (* Valid document should have no errors *)
200200- let valid_html = "<!DOCTYPE html><html><body><p>Valid</p></body></html>" in
201201- let result1 = Htmlrw_check.check (reader_of_string valid_html) in
202202- Printf.printf " Valid document has_errors: %b\n" (Htmlrw_check.has_errors result1);
203203-204204- (* Document with likely parse errors *)
205205- let invalid_html = "<html><body><p>Unclosed" in
206206- let result2 = Htmlrw_check.check (reader_of_string invalid_html) in
207207- Printf.printf " Invalid document has_errors: %b\n" (Htmlrw_check.has_errors result2)
208208-209209-(** Test check_dom with pre-parsed document *)
210210-let test_check_dom () =
211211- Printf.printf "\nTest 11: check_dom with pre-parsed document\n";
212212- let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
213213- let reader = reader_of_string html in
214214- let parsed = Html5rw.parse reader in
215215- let result = Htmlrw_check.check_parsed parsed in
216216- let all_msgs = Htmlrw_check.messages result in
217217- Printf.printf " check_dom found %d message(s)\n" (List.length all_msgs);
218218- Printf.printf " OK: check_dom completed successfully\n"
219219-220220-(** Test system_id parameter *)
221221-let test_system_id () =
222222- Printf.printf "\nTest 12: system_id parameter\n";
223223- let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
224224- let reader = reader_of_string html in
225225- let result = Htmlrw_check.check ~system_id:"test.html" reader in
226226- match Htmlrw_check.system_id result with
227227- | Some id -> Printf.printf " system_id: %s\n" id
228228- | None -> Printf.printf " Warning: system_id not set\n"
229229-230230-(** Test collect_parse_errors flag *)
231231-let test_collect_parse_errors_flag () =
232232- Printf.printf "\nTest 13: collect_parse_errors flag\n";
233233- let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
234234-235235- let result_with = Htmlrw_check.check ~collect_parse_errors:true (reader_of_string html) in
236236- let msgs_with = Htmlrw_check.messages result_with in
237237- Printf.printf " With parse errors: %d message(s)\n" (List.length msgs_with);
238238-239239- let result_without = Htmlrw_check.check ~collect_parse_errors:false (reader_of_string html) in
240240- let msgs_without = Htmlrw_check.messages result_without in
241241- Printf.printf " Without parse errors: %d message(s)\n" (List.length msgs_without)
242242-243243-(** Test document accessor *)
244244-let test_document_accessor () =
245245- Printf.printf "\nTest 14: document accessor\n";
246246- let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
247247- let reader = reader_of_string html in
248248- let result = Htmlrw_check.check reader in
249249- let _doc = Htmlrw_check.document result in
250250- Printf.printf " OK: document accessor works\n"
251251-252252-(** Test message severity filtering *)
253253-let test_severity_filtering () =
254254- Printf.printf "\nTest 15: Message severity filtering\n";
255255- let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
256256- let reader = reader_of_string html in
257257- let result = Htmlrw_check.check reader in
258258-259259- let all_msgs = Htmlrw_check.messages result in
260260- let errors = Htmlrw_check.errors result in
261261- let warnings = Htmlrw_check.warnings result in
262262-263263- Printf.printf " Total messages: %d\n" (List.length all_msgs);
264264- Printf.printf " Errors: %d\n" (List.length errors);
265265- Printf.printf " Warnings: %d\n" (List.length warnings);
266266-267267- (* Verify that errors + warnings <= all messages *)
268268- if List.length errors + List.length warnings <= List.length all_msgs then
269269- Printf.printf " OK: Message counts are consistent\n"
270270- else
271271- Printf.printf " Warning: Message counts inconsistent\n"
272272-273273-(** Run all tests *)
274274-let () =
275275- Printf.printf "Running html5_checker tests...\n";
276276- Printf.printf "========================================\n\n";
277277-278278- test_valid_html5 ();
279279- test_missing_doctype ();
280280- test_obsolete_element ();
281281- test_duplicate_id ();
282282- test_heading_skip ();
283283- test_img_without_alt ();
284284- test_invalid_nesting ();
285285- test_form_nesting ();
286286- test_output_formats ();
287287- test_has_errors ();
288288- test_check_dom ();
289289- test_system_id ();
290290- test_collect_parse_errors_flag ();
291291- test_document_accessor ();
292292- test_severity_filtering ();
293293-294294- Printf.printf "\n========================================\n";
295295- Printf.printf "All tests completed!\n";
296296- Printf.printf "\nNote: Some checkers may not be enabled yet.\n";
297297- Printf.printf "Tests marked with 'Note:' indicate features that may be\n";
298298- Printf.printf "implemented in future versions.\n"
-68
test/test_nesting_checker.ml
···11-(** Test for nesting checker functionality via public API *)
22-33-let check_html html =
44- let reader = Bytesrw.Bytes.Reader.of_string html in
55- Htmlrw_check.check reader
66-77-let () =
88- (* Test 1: <a> cannot contain another <a> *)
99- Printf.printf "Test 1: Checking <a href> inside <a href>\n";
1010- let result1 = check_html "<a href='#'><a href='#'>nested</a></a>" in
1111- let errors1 = Htmlrw_check.errors result1 in
1212- Printf.printf " Found %d error(s)\n" (List.length errors1);
1313- List.iter (fun msg ->
1414- Printf.printf " - %s\n" msg.Htmlrw_check.text
1515- ) errors1;
1616-1717- (* Test 2: <button> inside <a> *)
1818- Printf.printf "\nTest 2: Checking <button> inside <a href>\n";
1919- let result2 = check_html "<a href='#'><button>click</button></a>" in
2020- let errors2 = Htmlrw_check.errors result2 in
2121- Printf.printf " Found %d error(s)\n" (List.length errors2);
2222- List.iter (fun msg ->
2323- Printf.printf " - %s\n" msg.Htmlrw_check.text
2424- ) errors2;
2525-2626- (* Test 3: form inside form *)
2727- Printf.printf "\nTest 3: Checking <form> inside <form>\n";
2828- let result3 = check_html "<form><form>nested</form></form>" in
2929- let errors3 = Htmlrw_check.errors result3 in
3030- Printf.printf " Found %d error(s)\n" (List.length errors3);
3131- List.iter (fun msg ->
3232- Printf.printf " - %s\n" msg.Htmlrw_check.text
3333- ) errors3;
3434-3535- (* Test 4: header inside footer (should be allowed) *)
3636- Printf.printf "\nTest 4: Checking <header> inside <footer>\n";
3737- let result4 = check_html "<footer><header>test</header></footer>" in
3838- let errors4 = Htmlrw_check.errors result4 in
3939- Printf.printf " Found %d error(s)\n" (List.length errors4);
4040- if List.length errors4 > 0 then
4141- List.iter (fun msg ->
4242- Printf.printf " - %s\n" msg.Htmlrw_check.text
4343- ) errors4
4444- else
4545- Printf.printf " OK: No errors (header inside footer is valid)\n";
4646-4747- (* Test 5: input inside button *)
4848- Printf.printf "\nTest 5: Checking <input type=text> inside <button>\n";
4949- let result5 = check_html "<button><input type='text'></button>" in
5050- let errors5 = Htmlrw_check.errors result5 in
5151- Printf.printf " Found %d error(s)\n" (List.length errors5);
5252- List.iter (fun msg ->
5353- Printf.printf " - %s\n" msg.Htmlrw_check.text
5454- ) errors5;
5555-5656- (* Test 6: valid nesting - should not error *)
5757- Printf.printf "\nTest 6: Checking valid nesting: <div> inside <div>\n";
5858- let result6 = check_html "<div><div>nested</div></div>" in
5959- let errors6 = Htmlrw_check.errors result6 in
6060- Printf.printf " Found %d error(s)\n" (List.length errors6);
6161- if List.length errors6 = 0 then
6262- Printf.printf " OK: No errors as expected\n"
6363- else
6464- List.iter (fun msg ->
6565- Printf.printf " - %s\n" msg.Htmlrw_check.text
6666- ) errors6;
6767-6868- Printf.printf "\nAll tests completed!\n"