···1+(** Common attribute utilities used across checkers. *)
2+3+type attrs = (string * string) list
4+5+let has_attr name attrs =
6+ List.exists (fun (n, _) -> String.lowercase_ascii n = name) attrs
7+8+let get_attr name attrs =
9+ List.find_map (fun (n, v) ->
10+ if String.lowercase_ascii n = name then Some v else None
11+ ) attrs
12+13+let get_attr_or name ~default attrs =
14+ Option.value ~default (get_attr name attrs)
15+16+let is_non_empty_attr name attrs =
17+ match get_attr name attrs with
18+ | Some v -> String.trim v <> ""
19+ | None -> false
+2-8
lib/htmlrw_check/datatype/dt_color.ml
···213 if String.length s = 0 then Error "Color value must not be empty"
214 else if List.mem s named_colors then Ok ()
215 else if String.length s > 0 && s.[0] = '#' then validate_hex_color s
216- else if
217- String.length s > 4
218- && (String.sub s 0 4 = "rgb(" || String.sub s 0 5 = "rgba(")
219- then
220 (* Basic validation for rgb/rgba - just check balanced parens *)
221 if s.[String.length s - 1] = ')' then Ok ()
222 else Error "rgb/rgba function must end with ')'"
223- else if
224- String.length s > 4
225- && (String.sub s 0 4 = "hsl(" || String.sub s 0 5 = "hsla(")
226- then
227 (* Basic validation for hsl/hsla - just check balanced parens *)
228 if s.[String.length s - 1] = ')' then Ok ()
229 else Error "hsl/hsla function must end with ')'"
···213 if String.length s = 0 then Error "Color value must not be empty"
214 else if List.mem s named_colors then Ok ()
215 else if String.length s > 0 && s.[0] = '#' then validate_hex_color s
216+ else if String.starts_with ~prefix:"rgb(" s || String.starts_with ~prefix:"rgba(" s then
000217 (* Basic validation for rgb/rgba - just check balanced parens *)
218 if s.[String.length s - 1] = ')' then Ok ()
219 else Error "rgb/rgba function must end with ')'"
220+ else if String.starts_with ~prefix:"hsl(" s || String.starts_with ~prefix:"hsla(" s then
000221 (* Basic validation for hsl/hsla - just check balanced parens *)
222 if s.[String.length s - 1] = ')' then Ok ()
223 else Error "hsl/hsla function must end with ')'"
+2-2
lib/htmlrw_check/datatype/dt_media_query.ml
···330331 (* Get base feature name for error messages (strip min-/max- prefix) *)
332 let base_feature =
333- if String.length feature > 4 && String.sub feature 0 4 = "min-" then
334 String.sub feature 4 (String.length feature - 4)
335- else if String.length feature > 4 && String.sub feature 0 4 = "max-" then
336 String.sub feature 4 (String.length feature - 4)
337 else
338 feature
···330331 (* Get base feature name for error messages (strip min-/max- prefix) *)
332 let base_feature =
333+ if String.starts_with ~prefix:"min-" feature then
334 String.sub feature 4 (String.length feature - 4)
335+ else if String.starts_with ~prefix:"max-" feature then
336 String.sub feature 4 (String.length feature - 4)
337 else
338 feature
+77-102
lib/htmlrw_check/message_format.ml
···0000001let format_text ?system_id messages =
2 let buf = Buffer.create 1024 in
3- List.iter
4- (fun msg ->
5- let loc_str =
6- match msg.Message.location with
7- | Some loc -> (
8- let sid =
9- match loc.Message.system_id with
10- | Some s -> s
11- | None -> (
12- match system_id with Some s -> s | None -> "input")
13- in
14- let col_info =
15- match (loc.end_line, loc.end_column) with
16- | Some el, Some ec when el = loc.line && ec > loc.column ->
17- Printf.sprintf "%d.%d-%d" loc.line loc.column ec
18- | Some el, Some ec when el > loc.line ->
19- Printf.sprintf "%d.%d-%d.%d" loc.line loc.column el ec
20- | _ -> Printf.sprintf "%d.%d" loc.line loc.column
21- in
22- Printf.sprintf "%s:%s" sid col_info)
23- | None -> (
24- match system_id with Some s -> s | None -> "input")
25- in
26- let severity_str = Message.severity_to_string msg.Message.severity in
27- let code_str = " [" ^ Message.error_code_to_string msg.Message.error_code ^ "]" in
28- let elem_str =
29- match msg.Message.element with
30- | Some e -> " (element: " ^ e ^ ")"
31- | None -> ""
32- in
33- let attr_str =
34- match msg.Message.attribute with
35- | Some a -> " (attribute: " ^ a ^ ")"
36- | None -> ""
37- in
38- Buffer.add_string buf
39- (Printf.sprintf "%s: %s%s: %s%s%s\n" loc_str severity_str code_str
40- msg.Message.message elem_str attr_str))
41- messages;
42 Buffer.contents buf
4344let format_gnu ?system_id messages =
45 let buf = Buffer.create 1024 in
46- List.iter
47- (fun msg ->
48- let loc_str =
49- match msg.Message.location with
50- | Some loc -> (
51- let sid =
52- match loc.Message.system_id with
53- | Some s -> s
54- | None -> (
55- match system_id with Some s -> s | None -> "input")
56- in
57- Printf.sprintf "%s:%d:%d" sid loc.line loc.column)
58- | None -> (
59- match system_id with Some s -> s ^ ":0:0" | None -> "input:0:0")
60- in
61- let severity_str = Message.severity_to_string msg.Message.severity in
62- let code_str = " [" ^ Message.error_code_to_string msg.Message.error_code ^ "]" in
63- Buffer.add_string buf
64- (Printf.sprintf "%s: %s%s: %s\n" loc_str severity_str code_str
65- msg.Message.message))
66- messages;
67 Buffer.contents buf
6869let message_to_json ?system_id msg =
70 let open Jsont in
71- let severity = String (Message.severity_to_string msg.Message.severity, Meta.none) in
72- let message_text = String (msg.Message.message, Meta.none) in
73- let base = [ (("type", Meta.none), severity); (("message", Meta.none), message_text) ] in
74- let with_code =
75- (("subType", Meta.none), String (Message.error_code_to_string msg.Message.error_code, Meta.none)) :: base
76- in
77- let with_location =
78- match msg.Message.location with
00079 | Some loc ->
80- let line = Number (float_of_int loc.Message.line, Meta.none) in
81- let column = Number (float_of_int loc.Message.column, Meta.none) in
82- let loc_fields =
83- [ (("firstLine", Meta.none), line); (("firstColumn", Meta.none), column) ]
84- in
85- let loc_fields =
86- match loc.Message.end_line with
87- | Some el ->
88- (("lastLine", Meta.none), Number (float_of_int el, Meta.none)) :: loc_fields
89- | None -> loc_fields
90- in
91- let loc_fields =
92- match loc.Message.end_column with
93- | Some ec ->
94- (("lastColumn", Meta.none), Number (float_of_int ec, Meta.none))
95- :: loc_fields
96- | None -> loc_fields
97- in
98- let url =
99- match loc.Message.system_id with
100- | Some s -> s
101- | None -> (
102- match system_id with Some s -> s | None -> "input")
103- in
104- (("url", Meta.none), String (url, Meta.none)) :: loc_fields @ with_code
105 | None ->
106- let url =
107- match system_id with Some s -> s | None -> "input"
108- in
109- (("url", Meta.none), String (url, Meta.none)) :: with_code
110 in
111- let with_extract =
112- match msg.Message.extract with
113- | Some e -> (("extract", Meta.none), String (e, Meta.none)) :: with_location
114- | None -> with_location
115- in
116 Object (with_extract, Meta.none)
117118let format_json ?system_id messages =
···1+(** Get effective system_id, preferring location's system_id over the passed one *)
2+let get_system_id ?system_id loc_system_id =
3+ loc_system_id
4+ |> Option.fold ~none:system_id ~some:Option.some
5+ |> Option.value ~default:"input"
6+7let format_text ?system_id messages =
8 let buf = Buffer.create 1024 in
9+ List.iter (fun msg ->
10+ let loc_str = match msg.Message.location with
11+ | Some loc ->
12+ let sid = get_system_id ?system_id loc.Message.system_id in
13+ let col_info = match loc.end_line, loc.end_column with
14+ | Some el, Some ec when el = loc.line && ec > loc.column ->
15+ Printf.sprintf "%d.%d-%d" loc.line loc.column ec
16+ | Some el, Some ec when el > loc.line ->
17+ Printf.sprintf "%d.%d-%d.%d" loc.line loc.column el ec
18+ | _ ->
19+ Printf.sprintf "%d.%d" loc.line loc.column
20+ in
21+ Printf.sprintf "%s:%s" sid col_info
22+ | None ->
23+ Option.value system_id ~default:"input"
24+ in
25+ let elem_str = Option.fold ~none:"" ~some:(Printf.sprintf " (element: %s)") msg.Message.element in
26+ let attr_str = Option.fold ~none:"" ~some:(Printf.sprintf " (attribute: %s)") msg.Message.attribute in
27+ Buffer.add_string buf (Printf.sprintf "%s: %s [%s]: %s%s%s\n"
28+ loc_str
29+ (Message.severity_to_string msg.Message.severity)
30+ (Message.error_code_to_string msg.Message.error_code)
31+ msg.Message.message
32+ elem_str
33+ attr_str)
34+ ) messages;
000000000000035 Buffer.contents buf
3637let format_gnu ?system_id messages =
38 let buf = Buffer.create 1024 in
39+ List.iter (fun msg ->
40+ let loc_str = match msg.Message.location with
41+ | Some loc ->
42+ Printf.sprintf "%s:%d:%d"
43+ (get_system_id ?system_id loc.Message.system_id)
44+ loc.line loc.column
45+ | None ->
46+ Option.value system_id ~default:"input" ^ ":0:0"
47+ in
48+ Buffer.add_string buf (Printf.sprintf "%s: %s [%s]: %s\n"
49+ loc_str
50+ (Message.severity_to_string msg.Message.severity)
51+ (Message.error_code_to_string msg.Message.error_code)
52+ msg.Message.message)
53+ ) messages;
00000054 Buffer.contents buf
5556let message_to_json ?system_id msg =
57 let open Jsont in
58+ let str s = String (s, Meta.none) in
59+ let num n = Number (float_of_int n, Meta.none) in
60+ let field name value = ((name, Meta.none), value) in
61+62+ let base = [
63+ field "type" (str (Message.severity_to_string msg.Message.severity));
64+ field "message" (str msg.Message.message);
65+ field "subType" (str (Message.error_code_to_string msg.Message.error_code));
66+ ] in
67+68+ let with_location = match msg.Message.location with
69 | Some loc ->
70+ let url = get_system_id ?system_id loc.Message.system_id in
71+ let loc_fields = [
72+ field "url" (str url);
73+ field "firstLine" (num loc.line);
74+ field "firstColumn" (num loc.column);
75+ ] in
76+ let loc_fields = Option.fold ~none:loc_fields
77+ ~some:(fun el -> field "lastLine" (num el) :: loc_fields)
78+ loc.Message.end_line in
79+ let loc_fields = Option.fold ~none:loc_fields
80+ ~some:(fun ec -> field "lastColumn" (num ec) :: loc_fields)
81+ loc.Message.end_column in
82+ loc_fields @ base
00000000000083 | None ->
84+ field "url" (str (Option.value system_id ~default:"input")) :: base
00085 in
86+87+ let with_extract = Option.fold ~none:with_location
88+ ~some:(fun e -> field "extract" (str e) :: with_location)
89+ msg.Message.extract in
90+91 Object (with_extract, Meta.none)
9293let format_json ?system_id messages =
+10-10
lib/htmlrw_check/parse_error_bridge.ml
···14 | Html5rw.Parse_error_code.Tree_construction_error s ->
15 (* Check for control-character/noncharacter/surrogate with codepoint info *)
16 (try
17- if String.length s > 28 && String.sub s 0 28 = "control-character-in-input-s" then
18 let colon_pos = String.index s ':' in
19 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
20 let cp = int_of_string ("0x" ^ cp_str) in
21 Printf.sprintf "Forbidden code point U+%04x." cp
22- else if String.length s > 25 && String.sub s 0 25 = "noncharacter-in-input-str" then
23 let colon_pos = String.index s ':' in
24 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
25 let cp = int_of_string ("0x" ^ cp_str) in
26 Printf.sprintf "Forbidden code point U+%04x." cp
27- else if String.length s > 22 && String.sub s 0 22 = "surrogate-in-input-str" then
28 let colon_pos = String.index s ':' in
29 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
30 let cp = int_of_string ("0x" ^ cp_str) in
31 Printf.sprintf "Forbidden code point U+%04x." cp
32 (* Character reference errors *)
33- else if String.length s > 28 && String.sub s 0 28 = "control-character-reference:" then
34 let cp_str = String.sub s 28 (String.length s - 28) in
35 let cp = int_of_string ("0x" ^ cp_str) in
36 if cp = 0x0D then
37 "A numeric character reference expanded to carriage return."
38 else
39 Printf.sprintf "Character reference expands to a control character (U+%04x)." cp
40- else if String.length s > 31 && String.sub s 0 31 = "noncharacter-character-referenc" then
41 let colon_pos = String.index s ':' in
42 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
43 let cp = int_of_string ("0x" ^ cp_str) in
···49 Printf.sprintf "Character reference expands to an astral non-character (U+%05x)." cp
50 else
51 Printf.sprintf "Character reference expands to a non-character (U+%04x)." cp
52- else if String.length s > 36 && String.sub s 0 36 = "character-reference-outside-unicode-" then
53 "Character reference outside the permissible Unicode range."
54- else if String.length s > 27 && String.sub s 0 27 = "surrogate-character-referen" then
55 let colon_pos = String.index s ':' in
56 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
57 let cp = int_of_string ("0x" ^ cp_str) in
···64 "End tag \xe2\x80\x9cbr\xe2\x80\x9d."
65 else if s = "expected-closing-tag-but-got-eof" then
66 "End of file seen and there were open elements."
67- else if String.length s > 28 && String.sub s 0 28 = "bad-start-tag-in-head-noscri" then
68 let colon_pos = String.index s ':' in
69 let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
70 Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element
71- else if String.length s > 19 && String.sub s 0 19 = "unexpected-end-tag:" then
72 let element = String.sub s 19 (String.length s - 19) in
73 Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element
74- else if String.length s > 19 && String.sub s 0 19 = "start-tag-in-table:" then
75 let tag = String.sub s 19 (String.length s - 19) in
76 Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag
77 else
···14 | Html5rw.Parse_error_code.Tree_construction_error s ->
15 (* Check for control-character/noncharacter/surrogate with codepoint info *)
16 (try
17+ if String.starts_with ~prefix:"control-character-in-input-s" s then
18 let colon_pos = String.index s ':' in
19 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
20 let cp = int_of_string ("0x" ^ cp_str) in
21 Printf.sprintf "Forbidden code point U+%04x." cp
22+ else if String.starts_with ~prefix:"noncharacter-in-input-str" s then
23 let colon_pos = String.index s ':' in
24 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
25 let cp = int_of_string ("0x" ^ cp_str) in
26 Printf.sprintf "Forbidden code point U+%04x." cp
27+ else if String.starts_with ~prefix:"surrogate-in-input-str" s then
28 let colon_pos = String.index s ':' in
29 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
30 let cp = int_of_string ("0x" ^ cp_str) in
31 Printf.sprintf "Forbidden code point U+%04x." cp
32 (* Character reference errors *)
33+ else if String.starts_with ~prefix:"control-character-reference:" s then
34 let cp_str = String.sub s 28 (String.length s - 28) in
35 let cp = int_of_string ("0x" ^ cp_str) in
36 if cp = 0x0D then
37 "A numeric character reference expanded to carriage return."
38 else
39 Printf.sprintf "Character reference expands to a control character (U+%04x)." cp
40+ else if String.starts_with ~prefix:"noncharacter-character-referenc" s then
41 let colon_pos = String.index s ':' in
42 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
43 let cp = int_of_string ("0x" ^ cp_str) in
···49 Printf.sprintf "Character reference expands to an astral non-character (U+%05x)." cp
50 else
51 Printf.sprintf "Character reference expands to a non-character (U+%04x)." cp
52+ else if String.starts_with ~prefix:"character-reference-outside-unicode-" s then
53 "Character reference outside the permissible Unicode range."
54+ else if String.starts_with ~prefix:"surrogate-character-referen" s then
55 let colon_pos = String.index s ':' in
56 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
57 let cp = int_of_string ("0x" ^ cp_str) in
···64 "End tag \xe2\x80\x9cbr\xe2\x80\x9d."
65 else if s = "expected-closing-tag-but-got-eof" then
66 "End of file seen and there were open elements."
67+ else if String.starts_with ~prefix:"bad-start-tag-in-head-noscri" s then
68 let colon_pos = String.index s ':' in
69 let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
70 Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element
71+ else if String.starts_with ~prefix:"unexpected-end-tag:" s then
72 let element = String.sub s 19 (String.length s - 19) in
73 Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element
74+ else if String.starts_with ~prefix:"start-tag-in-table:" s then
75 let tag = String.sub s 19 (String.length s - 19) in
76 Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag
77 else
+17-39
lib/htmlrw_check/semantic/autofocus_checker.ml
···3 Validates that only one element with autofocus attribute exists within
4 each dialog or popover context. *)
56-(** Context for tracking autofocus elements. *)
7type context_type = Dialog | Popover
89type context = {
···26 state.context_stack <- [];
27 state.current_depth <- 0
2829-(** Check if an attribute list contains a specific attribute. *)
30-let has_attr name attrs =
31- List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
32-33-(** Get an attribute value from the list. *)
34-let get_attr name attrs =
35- List.find_map (fun (attr_name, value) ->
36- if String.lowercase_ascii attr_name = name then Some value else None
37- ) attrs
38-39-(** Check if element has popover attribute. *)
40-let has_popover attrs =
41- List.exists (fun (attr_name, _) ->
42- String.lowercase_ascii attr_name = "popover"
43- ) attrs
44-45let start_element state ~name ~namespace ~attrs collector =
46- let name_lower = String.lowercase_ascii name in
47-48- (* Track depth *)
49 state.current_depth <- state.current_depth + 1;
5051- if namespace = None then begin
000052 (* Check if we're entering a dialog or popover context *)
53- let enters_context =
54- if name_lower = "dialog" then Some Dialog
55- else if has_popover attrs then Some Popover
56- else None
57 in
5859- (match enters_context with
60- | Some ctx_type ->
61 let ctx = { context_type = ctx_type; autofocus_count = 0; depth = state.current_depth } in
62 state.context_stack <- ctx :: state.context_stack
63- | None -> ());
6465 (* Check for autofocus attribute *)
66- if has_attr "autofocus" attrs then begin
67- (* Increment count in innermost context if any *)
68 match state.context_stack with
69 | ctx :: _ ->
70 ctx.autofocus_count <- ctx.autofocus_count + 1;
71 if ctx.autofocus_count > 1 then
72 Message_collector.add_typed collector (`Misc `Multiple_autofocus)
73 | [] -> ()
74- end
75- end
7677let end_element state ~name ~namespace _collector =
78- let name_lower = String.lowercase_ascii name in
79-80- if namespace = None then begin
81- (* Pop context if we're leaving one *)
82 match state.context_stack with
83 | ctx :: rest when ctx.depth = state.current_depth ->
84- (* Verify this is the right element *)
85 let matches =
86 (name_lower = "dialog" && ctx.context_type = Dialog) ||
87 (ctx.context_type = Popover)
88 in
89 if matches then state.context_stack <- rest
90- | _ -> ()
91- end;
9293 state.current_depth <- state.current_depth - 1
94
···3 Validates that only one element with autofocus attribute exists within
4 each dialog or popover context. *)
506type context_type = Dialog | Popover
78type context = {
···25 state.context_stack <- [];
26 state.current_depth <- 0
27000000000000000028let start_element state ~name ~namespace ~attrs collector =
00029 state.current_depth <- state.current_depth + 1;
3031+ match namespace with
32+ | Some _ -> ()
33+ | None ->
34+ let name_lower = String.lowercase_ascii name in
35+36 (* Check if we're entering a dialog or popover context *)
37+ let enters_context = match name_lower with
38+ | "dialog" -> Some Dialog
39+ | _ when Attr_utils.has_attr "popover" attrs -> Some Popover
40+ | _ -> None
41 in
4243+ Option.iter (fun ctx_type ->
044 let ctx = { context_type = ctx_type; autofocus_count = 0; depth = state.current_depth } in
45 state.context_stack <- ctx :: state.context_stack
46+ ) enters_context;
4748 (* Check for autofocus attribute *)
49+ if Attr_utils.has_attr "autofocus" attrs then
050 match state.context_stack with
51 | ctx :: _ ->
52 ctx.autofocus_count <- ctx.autofocus_count + 1;
53 if ctx.autofocus_count > 1 then
54 Message_collector.add_typed collector (`Misc `Multiple_autofocus)
55 | [] -> ()
005657let end_element state ~name ~namespace _collector =
58+ (match namespace with
59+ | Some _ -> ()
60+ | None ->
61+ let name_lower = String.lowercase_ascii name in
62 match state.context_stack with
63 | ctx :: rest when ctx.depth = state.current_depth ->
064 let matches =
65 (name_lower = "dialog" && ctx.context_type = Dialog) ||
66 (ctx.context_type = Popover)
67 in
68 if matches then state.context_stack <- rest
69+ | _ -> ());
07071 state.current_depth <- state.current_depth - 1
72
+1-8
lib/htmlrw_check/semantic/form_checker.ml
···1011let reset _state = ()
1213-(** Get the value of an attribute if present. *)
14-let get_attr name attrs =
15- List.find_map
16- (fun (attr_name, value) ->
17- if String.equal attr_name name then Some value else None)
18- attrs
19-20(** Check if autocomplete value contains webauthn token *)
21let contains_webauthn value =
22 let lower = String.lowercase_ascii value in
···42 (* Check autocomplete attribute on form elements *)
43 match name with
44 | "input" | "select" | "textarea" ->
45- (match get_attr "autocomplete" attrs with
46 | Some autocomplete_value ->
47 check_autocomplete_value autocomplete_value name collector
48 | None -> ())
···1011let reset _state = ()
12000000013(** Check if autocomplete value contains webauthn token *)
14let contains_webauthn value =
15 let lower = String.lowercase_ascii value in
···35 (* Check autocomplete attribute on form elements *)
36 match name with
37 | "input" | "select" | "textarea" ->
38+ (match Attr_utils.get_attr "autocomplete" attrs with
39 | Some autocomplete_value ->
40 check_autocomplete_value autocomplete_value name collector
41 | None -> ())
···60 let n = String.lowercase_ascii name in
61 n = "svg" || n = "math"
6263-let get_attr name attrs =
64- List.find_map (fun (n, v) ->
65- if String.lowercase_ascii n = name then Some v else None
66- ) attrs
67-68let get_lang_code lang =
69 (* Extract primary language subtag *)
70 match String.split_on_char '-' lang with
···226 let ns = Option.value namespace ~default:"" in
227228 if name_lower = "html" then begin
229- state.html_lang <- get_attr "lang" attrs;
230- state.html_dir <- get_attr "dir" attrs;
231 (* TODO: get line/column from locator *)
232 state.html_locator <- Some (1, 1)
233 end
···244 state.skip_depth <- state.skip_depth + 1
245 else begin
246 (* Check for different lang attribute *)
247- match get_attr "lang" attrs with
248 | Some lang when state.html_lang <> Some lang ->
249 state.skip_depth <- state.skip_depth + 1
250 | _ -> ()
···60 let n = String.lowercase_ascii name in
61 n = "svg" || n = "math"
620000063let get_lang_code lang =
64 (* Extract primary language subtag *)
65 match String.split_on_char '-' lang with
···221 let ns = Option.value namespace ~default:"" in
222223 if name_lower = "html" then begin
224+ state.html_lang <- Attr_utils.get_attr "lang" attrs;
225+ state.html_dir <- Attr_utils.get_attr "dir" attrs;
226 (* TODO: get line/column from locator *)
227 state.html_locator <- Some (1, 1)
228 end
···239 state.skip_depth <- state.skip_depth + 1
240 else begin
241 (* Check for different lang attribute *)
242+ match Attr_utils.get_attr "lang" attrs with
243 | Some lang when state.html_lang <> Some lang ->
244 state.skip_depth <- state.skip_depth + 1
245 | _ -> ()
+1-7
lib/htmlrw_check/semantic/option_checker.ml
···22 state.option_stack <- [];
23 state.in_template <- 0
2425-(** Get attribute value if present. *)
26-let get_attr name attrs =
27- List.find_map (fun (attr_name, value) ->
28- if String.lowercase_ascii attr_name = name then Some value else None
29- ) attrs
30-31let start_element state ~name ~namespace ~attrs collector =
32 let name_lower = String.lowercase_ascii name in
33···36 if name_lower = "template" then
37 state.in_template <- state.in_template + 1
38 else if state.in_template = 0 && name_lower = "option" then begin
39- let label_opt = get_attr "label" attrs in
40 let has_label = label_opt <> None in
41 let label_empty = match label_opt with
42 | Some v -> String.trim v = ""
···22 state.option_stack <- [];
23 state.in_template <- 0
2400000025let start_element state ~name ~namespace ~attrs collector =
26 let name_lower = String.lowercase_ascii name in
27···30 if name_lower = "template" then
31 state.in_template <- state.in_template + 1
32 else if state.in_template = 0 && name_lower = "option" then begin
33+ let label_opt = Attr_utils.get_attr "label" attrs in
34 let has_label = label_opt <> None in
35 let label_empty = match label_opt with
36 | Some v -> String.trim v = ""
···13 state._in_figure <- false;
14 state.in_a_with_href <- false
1516-(** Check if an attribute list contains a specific attribute. *)
17-let has_attr name attrs =
18- List.exists (fun (attr_name, _) -> String.equal attr_name name) attrs
19-20-(** Get the value of an attribute if present. *)
21-let get_attr name attrs =
22- List.find_map
23- (fun (attr_name, value) ->
24- if String.equal attr_name name then Some value else None)
25- attrs
26-27let check_img_element state attrs collector =
28 (* Check for required src OR srcset attribute *)
29- if not (has_attr "src" attrs) && not (has_attr "srcset" attrs) then
30 Message_collector.add_typed collector (`Img `Missing_src_or_srcset);
3132 (* Check for alt attribute - always required *)
33- if not (has_attr "alt" attrs) then
34 Message_collector.add_typed collector (`Img `Missing_alt);
3536 (* Check ismap requires 'a' ancestor with href *)
37- if has_attr "ismap" attrs && not state.in_a_with_href then
38 Message_collector.add_typed collector (`Img `Ismap_needs_href)
3940let check_area_element attrs collector =
41 (* area with href requires alt *)
42- if has_attr "href" attrs && not (has_attr "alt" attrs) then
43 Message_collector.add_typed collector
44 (`Attr (`Missing (`Elem "area", `Attr "alt")))
4546let check_input_element attrs collector =
47- match get_attr "type" attrs with
48 | Some "image" ->
49 (* input[type=image] requires alt *)
50- if not (has_attr "alt" attrs) then
51 Message_collector.add_typed collector
52 (`Attr (`Missing (`Elem "input", `Attr "alt")))
53 | Some "hidden" ->
54 (* input[type=hidden] should not have required attribute *)
55- if has_attr "required" attrs then
56 Message_collector.add_typed collector
57 (`Attr (`Not_allowed_when (`Attr "required", `Elem "input", `Condition "the type attribute is hidden")))
58 | Some "file" ->
59 (* input[type=file] should not have value attribute *)
60- if has_attr "value" attrs then
61 Message_collector.add_typed collector
62 (`Attr (`Not_allowed_when (`Attr "value", `Elem "input", `Condition "the type attribute is file")))
63 | _ -> ()
6465let check_script_element attrs _collector =
66 (* script requires src OR text content *)
67- if not (has_attr "src" attrs) then
68 (* We can't check for text content here; that would need to be done
69 in end_element or with state tracking *)
70 ()
···76 - http-equiv AND content
77 - property AND content (RDFa)
78 - itemprop AND content (microdata) *)
79- let has_charset = has_attr "charset" attrs in
80- let has_name = has_attr "name" attrs in
81- let has_content = has_attr "content" attrs in
82- let has_http_equiv = has_attr "http-equiv" attrs in
83- let has_property = has_attr "property" attrs in
84- let has_itemprop = has_attr "itemprop" attrs in
8586 let valid =
87 has_charset
···100101let check_link_element attrs collector =
102 (* link[rel="stylesheet"] requires href *)
103- match get_attr "rel" attrs with
104 | Some rel when String.equal rel "stylesheet" ->
105- if not (has_attr "href" attrs) then
106 Message_collector.add_typed collector (`Link `Missing_href)
107 | _ -> ()
108109let check_a_element attrs collector =
110 (* a[download] requires href *)
111- if has_attr "download" attrs && not (has_attr "href" attrs) then
112 Message_collector.add_typed collector
113 (`Attr (`Missing (`Elem "a", `Attr "href")))
114115let check_map_element attrs collector =
116 (* map requires name *)
117- if not (has_attr "name" attrs) then
118 Message_collector.add_typed collector
119 (`Attr (`Missing (`Elem "map", `Attr "name")))
120121let check_object_element attrs collector =
122 (* object requires data attribute (or type attribute alone is not sufficient) *)
123- let has_data = has_attr "data" attrs in
124- let has_type = has_attr "type" attrs in
125 if not has_data && has_type then
126 Message_collector.add_typed collector
127 (`Attr (`Missing (`Elem "object", `Attr "data")))
128129let check_popover_element element_name attrs collector =
130 (* popover attribute must have valid value *)
131- match get_attr "popover" attrs with
132 | Some value ->
133 let value_lower = String.lowercase_ascii value in
134 (* Valid values: empty string, auto, manual, hint *)
···141142let check_meter_element attrs collector =
143 (* meter requires value attribute *)
144- if not (has_attr "value" attrs) then
145 Message_collector.add_typed collector
146 (`Attr (`Missing (`Elem "meter", `Attr "value")))
147 else begin
148 (* Validate min <= value constraint *)
149- match get_attr "value" attrs, get_attr "min" attrs with
150 | Some value_str, Some min_str ->
151 (try
152 let value = float_of_string value_str in
···162163let check_progress_element attrs collector =
164 (* Validate progress value constraints *)
165- match get_attr "value" attrs with
166 | None -> () (* value is optional *)
167 | Some value_str ->
168 (try
169 let value = float_of_string value_str in
170- let max_val = match get_attr "max" attrs with
171 | None -> 1.0 (* default max is 1 *)
172 | Some max_str -> (try float_of_string max_str with _ -> 1.0)
173 in
174 if value > max_val then
175 let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
176 (* Check which message to use based on whether max is present *)
177- if has_attr "max" attrs then
178 Message_collector.add_typed collector
179 (`Generic (
180 (* Note: double space before "value" matches Nu validator quirk *)
···198 | "link" -> check_link_element attrs collector
199 | "a" ->
200 check_a_element attrs collector;
201- if has_attr "href" attrs then state.in_a_with_href <- true
202 | "map" -> check_map_element attrs collector
203 | "object" -> check_object_element attrs collector
204 | "meter" -> check_meter_element attrs collector
···206 | "figure" -> state._in_figure <- true
207 | _ ->
208 (* Check popover attribute on any element *)
209- if has_attr "popover" attrs then check_popover_element name attrs collector
210211let end_element state ~name ~namespace:_ _collector =
212 match name with
···13 state._in_figure <- false;
14 state.in_a_with_href <- false
150000000000016let check_img_element state attrs collector =
17 (* Check for required src OR srcset attribute *)
18+ if not (Attr_utils.has_attr "src" attrs) && not (Attr_utils.has_attr "srcset" attrs) then
19 Message_collector.add_typed collector (`Img `Missing_src_or_srcset);
2021 (* Check for alt attribute - always required *)
22+ if not (Attr_utils.has_attr "alt" attrs) then
23 Message_collector.add_typed collector (`Img `Missing_alt);
2425 (* Check ismap requires 'a' ancestor with href *)
26+ if Attr_utils.has_attr "ismap" attrs && not state.in_a_with_href then
27 Message_collector.add_typed collector (`Img `Ismap_needs_href)
2829let check_area_element attrs collector =
30 (* area with href requires alt *)
31+ if Attr_utils.has_attr "href" attrs && not (Attr_utils.has_attr "alt" attrs) then
32 Message_collector.add_typed collector
33 (`Attr (`Missing (`Elem "area", `Attr "alt")))
3435let check_input_element attrs collector =
36+ match Attr_utils.get_attr "type" attrs with
37 | Some "image" ->
38 (* input[type=image] requires alt *)
39+ if not (Attr_utils.has_attr "alt" attrs) then
40 Message_collector.add_typed collector
41 (`Attr (`Missing (`Elem "input", `Attr "alt")))
42 | Some "hidden" ->
43 (* input[type=hidden] should not have required attribute *)
44+ if Attr_utils.has_attr "required" attrs then
45 Message_collector.add_typed collector
46 (`Attr (`Not_allowed_when (`Attr "required", `Elem "input", `Condition "the type attribute is hidden")))
47 | Some "file" ->
48 (* input[type=file] should not have value attribute *)
49+ if Attr_utils.has_attr "value" attrs then
50 Message_collector.add_typed collector
51 (`Attr (`Not_allowed_when (`Attr "value", `Elem "input", `Condition "the type attribute is file")))
52 | _ -> ()
5354let check_script_element attrs _collector =
55 (* script requires src OR text content *)
56+ if not (Attr_utils.has_attr "src" attrs) then
57 (* We can't check for text content here; that would need to be done
58 in end_element or with state tracking *)
59 ()
···65 - http-equiv AND content
66 - property AND content (RDFa)
67 - itemprop AND content (microdata) *)
68+ let has_charset = Attr_utils.has_attr "charset" attrs in
69+ let has_name = Attr_utils.has_attr "name" attrs in
70+ let has_content = Attr_utils.has_attr "content" attrs in
71+ let has_http_equiv = Attr_utils.has_attr "http-equiv" attrs in
72+ let has_property = Attr_utils.has_attr "property" attrs in
73+ let has_itemprop = Attr_utils.has_attr "itemprop" attrs in
7475 let valid =
76 has_charset
···8990let check_link_element attrs collector =
91 (* link[rel="stylesheet"] requires href *)
92+ match Attr_utils.get_attr "rel" attrs with
93 | Some rel when String.equal rel "stylesheet" ->
94+ if not (Attr_utils.has_attr "href" attrs) then
95 Message_collector.add_typed collector (`Link `Missing_href)
96 | _ -> ()
9798let check_a_element attrs collector =
99 (* a[download] requires href *)
100+ if Attr_utils.has_attr "download" attrs && not (Attr_utils.has_attr "href" attrs) then
101 Message_collector.add_typed collector
102 (`Attr (`Missing (`Elem "a", `Attr "href")))
103104let check_map_element attrs collector =
105 (* map requires name *)
106+ if not (Attr_utils.has_attr "name" attrs) then
107 Message_collector.add_typed collector
108 (`Attr (`Missing (`Elem "map", `Attr "name")))
109110let check_object_element attrs collector =
111 (* object requires data attribute (or type attribute alone is not sufficient) *)
112+ let has_data = Attr_utils.has_attr "data" attrs in
113+ let has_type = Attr_utils.has_attr "type" attrs in
114 if not has_data && has_type then
115 Message_collector.add_typed collector
116 (`Attr (`Missing (`Elem "object", `Attr "data")))
117118let check_popover_element element_name attrs collector =
119 (* popover attribute must have valid value *)
120+ match Attr_utils.get_attr "popover" attrs with
121 | Some value ->
122 let value_lower = String.lowercase_ascii value in
123 (* Valid values: empty string, auto, manual, hint *)
···130131let check_meter_element attrs collector =
132 (* meter requires value attribute *)
133+ if not (Attr_utils.has_attr "value" attrs) then
134 Message_collector.add_typed collector
135 (`Attr (`Missing (`Elem "meter", `Attr "value")))
136 else begin
137 (* Validate min <= value constraint *)
138+ match Attr_utils.get_attr "value" attrs, Attr_utils.get_attr "min" attrs with
139 | Some value_str, Some min_str ->
140 (try
141 let value = float_of_string value_str in
···151152let check_progress_element attrs collector =
153 (* Validate progress value constraints *)
154+ match Attr_utils.get_attr "value" attrs with
155 | None -> () (* value is optional *)
156 | Some value_str ->
157 (try
158 let value = float_of_string value_str in
159+ let max_val = match Attr_utils.get_attr "max" attrs with
160 | None -> 1.0 (* default max is 1 *)
161 | Some max_str -> (try float_of_string max_str with _ -> 1.0)
162 in
163 if value > max_val then
164 let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
165 (* Check which message to use based on whether max is present *)
166+ if Attr_utils.has_attr "max" attrs then
167 Message_collector.add_typed collector
168 (`Generic (
169 (* Note: double space before "value" matches Nu validator quirk *)
···187 | "link" -> check_link_element attrs collector
188 | "a" ->
189 check_a_element attrs collector;
190+ if Attr_utils.has_attr "href" attrs then state.in_a_with_href <- true
191 | "map" -> check_map_element attrs collector
192 | "object" -> check_object_element attrs collector
193 | "meter" -> check_meter_element attrs collector
···195 | "figure" -> state._in_figure <- true
196 | _ ->
197 (* Check popover attribute on any element *)
198+ if Attr_utils.has_attr "popover" attrs then check_popover_element name attrs collector
199200let end_element state ~name ~namespace:_ _collector =
201 match name with
+1-1
lib/htmlrw_check/specialized/aria_checker.ml
···491 if name_lower = "br" || name_lower = "wbr" then begin
492 List.iter (fun (attr_name, _) ->
493 let attr_lower = String.lowercase_ascii attr_name in
494- if String.length attr_lower > 5 && String.sub attr_lower 0 5 = "aria-" &&
495 attr_lower <> "aria-hidden" then
496 Message_collector.add_typed collector
497 (`Attr (`Not_allowed (`Attr attr_name, `Elem name)))
···491 if name_lower = "br" || name_lower = "wbr" then begin
492 List.iter (fun (attr_name, _) ->
493 let attr_lower = String.lowercase_ascii attr_name in
494+ if String.starts_with ~prefix:"aria-" attr_lower &&
495 attr_lower <> "aria-hidden" then
496 Message_collector.add_typed collector
497 (`Attr (`Not_allowed (`Attr attr_name, `Elem name)))
···41let create () = { is_xhtml = false }
42let reset state = state.is_xhtml <- false
4344-(** Check if an attribute list contains a specific attribute. *)
45-let has_attr name attrs =
46- List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
47-48-(** Get an attribute value from the list. *)
49-let get_attr name attrs =
50- List.find_map (fun (attr_name, value) ->
51- if String.lowercase_ascii attr_name = name then Some value else None
52- ) attrs
53-54(** Input types that allow the list attribute. *)
55let input_types_allowing_list = [
56 "color"; "date"; "datetime-local"; "email"; "month"; "number";
···6768 (* Detect XHTML mode from xmlns attribute on html element *)
69 if name_lower = "html" then begin
70- let xmlns_value = get_attr "xmlns" attrs in
71- match xmlns_value with
72 | Some "http://www.w3.org/1999/xhtml" -> state.is_xhtml <- true
73 | _ -> ()
74 end;
7576 (* Check HTML element attribute restrictions *)
77- if namespace = None then begin
0078 match List.assoc_opt name_lower disallowed_attrs_html with
79 | Some disallowed ->
80 List.iter (fun attr ->
81- if has_attr attr attrs then
82 report_disallowed_attr name_lower attr collector
83 ) disallowed
84- | None -> ()
85- end;
8687 (* Check for xml:base attribute - not allowed in HTML *)
88- if namespace = None && name_lower = "html" then begin
89- if has_attr "xml:base" attrs then
0090 report_disallowed_attr name_lower "xml:base" collector
91- end;
9293 (* Check for xmlns:* prefixed attributes - not allowed in HTML *)
94 (* Standard xmlns declarations are allowed but custom prefixes are not *)
95- if namespace = None then begin
0096 List.iter (fun (attr_name, _) ->
97 let attr_lower = String.lowercase_ascii attr_name in
98- if String.length attr_lower > 6 && String.sub attr_lower 0 6 = "xmlns:" then begin
99 let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in
100 (* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *)
101 if prefix <> "xlink" && prefix <> "xml" then
102 Message_collector.add_typed collector
103 (`Attr (`Not_allowed_here (`Attr attr_name)))
104 end
105- ) attrs
106- end;
107108 (* Check SVG element restrictions - works in both HTML-embedded and XHTML SVG *)
109 (* xml:id is never valid on SVG elements in HTML5 *)
110 if List.mem name_lower svg_no_xml_id then begin
111- if has_attr "xml:id" attrs then
112 report_disallowed_attr name_lower "xml:id" collector
113 end;
114115 (* SVG feConvolveMatrix requires order attribute *)
116 if name_lower = "feconvolvematrix" then begin
117- if not (has_attr "order" attrs) then
118 Message_collector.add_typed collector
119 (`Svg (`Missing_attr (`Elem "feConvolveMatrix", `Attr "order")))
120 end;
121122 (* Validate style type attribute - must be "text/css" or omitted *)
123- if namespace = None && name_lower = "style" then begin
00124 List.iter (fun (attr_name, attr_value) ->
125 let attr_lower = String.lowercase_ascii attr_name in
126 if attr_lower = "type" then begin
···129 Message_collector.add_typed collector (`Misc `Style_type_invalid)
130 end
131 ) attrs
132- end;
133134 (* Validate object element requires data or type attribute *)
135- if namespace = None && name_lower = "object" then begin
136- let has_data = has_attr "data" attrs in
137- let has_type = has_attr "type" attrs in
00138 if not has_data && not has_type then
139 Message_collector.add_typed collector
140 (`Attr (`Missing (`Elem "object", `Attr "data")))
141- end;
142143 (* Validate link imagesizes/imagesrcset attributes *)
144- if namespace = None && name_lower = "link" then begin
145- let has_imagesizes = has_attr "imagesizes" attrs in
146- let has_imagesrcset = has_attr "imagesrcset" attrs in
147- let rel_value = get_attr "rel" attrs in
148- let as_value = get_attr "as" attrs in
00149150 (* imagesizes requires imagesrcset *)
151 if has_imagesizes && not has_imagesrcset then
···175 if not rel_is_preload then
176 Message_collector.add_typed collector (`Link `As_requires_preload)
177 | None -> ())
178- end;
179180 (* Validate img usemap attribute - must be hash-name reference with content *)
181- if namespace = None && name_lower = "img" then begin
00182 List.iter (fun (attr_name, attr_value) ->
183 let attr_lower = String.lowercase_ascii attr_name in
184 if attr_lower = "usemap" then begin
···189 attr_value attr_name name))))
190 end
191 ) attrs
192- end;
193194 (* Validate embed type attribute - must be valid MIME type *)
195- if namespace = None && name_lower = "embed" then begin
00196 List.iter (fun (attr_name, attr_value) ->
197 let attr_lower = String.lowercase_ascii attr_name in
198 if attr_lower = "type" then begin
···205 attr_value attr_name name msg))))
206 end
207 ) attrs
208- end;
209210 (* Validate width/height on embed and img - must be non-negative integers *)
211- if namespace = None && (name_lower = "embed" || name_lower = "img" ||
212- name_lower = "video" || name_lower = "canvas" ||
213- name_lower = "iframe" || name_lower = "source") then begin
000214 List.iter (fun (attr_name, attr_value) ->
215 let attr_lower = String.lowercase_ascii attr_name in
216 if attr_lower = "width" || attr_lower = "height" then begin
···255 end
256 end
257 ) attrs
258- end;
259260 (* Validate area[shape=default] cannot have coords *)
261- if namespace = None && name_lower = "area" then begin
262- let shape_value = get_attr "shape" attrs in
263- match shape_value with
0264 | Some s when String.lowercase_ascii (String.trim s) = "default" ->
265- if has_attr "coords" attrs then
266 Message_collector.add_typed collector
267 (`Attr (`Not_allowed (`Attr "coords", `Elem "area")))
268- | _ -> ()
269- end;
270271 (* Validate bdo element requires dir attribute, and dir cannot be "auto" *)
272- if namespace = None && name_lower = "bdo" then begin
273- let dir_value = get_attr "dir" attrs in
274- match dir_value with
0275 | None ->
276 Message_collector.add_typed collector (`Misc `Bdo_missing_dir)
277 | Some v when String.lowercase_ascii (String.trim v) = "auto" ->
278 Message_collector.add_typed collector (`Misc `Bdo_dir_auto)
279- | _ -> ()
280- end;
281282 (* Validate input list attribute - only allowed for certain types *)
283- if namespace = None && name_lower = "input" then begin
284- if has_attr "list" attrs then begin
285- let input_type = match get_attr "type" attrs with
286- | Some t -> String.lowercase_ascii (String.trim t)
287- | None -> "text" (* default type is text *)
288- in
289 if not (List.mem input_type input_types_allowing_list) then
290 Message_collector.add_typed collector (`Input `List_not_allowed)
291 end
292- end;
293294 (* Validate data-* attributes *)
295- if namespace = None then begin
00296 List.iter (fun (attr_name, _) ->
297 let attr_lower = String.lowercase_ascii attr_name in
298 (* Check if it starts with "data-" *)
299- if String.length attr_lower >= 5 && String.sub attr_lower 0 5 = "data-" then begin
300 let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in
301 (* Check if it's exactly "data-" with nothing after *)
302 if after_prefix = "" then
···306 Message_collector.add_typed collector
307 (`Attr (`Data_invalid_name (`Reason "must be XML 1.0 4th ed. plus Namespaces NCNames")))
308 end
309- ) attrs
310- end;
311312 (* Validate xml:lang must have matching lang attribute - only in HTML mode, not XHTML *)
313- if namespace = None && not state.is_xhtml then begin
314- let xmllang_value = get_attr "xml:lang" attrs in
315- let lang_value = get_attr "lang" attrs in
316- match xmllang_value with
00317 | Some xmllang ->
318 (match lang_value with
319 | None ->
320- (* xml:lang without lang attribute *)
321 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
322 | Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang ->
323- (* xml:lang and lang have different values - "lang present with same value" message *)
324 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
325 | _ -> ())
326- | None -> ()
327- end;
328329 (* Validate spellcheck attribute - must be "true" or "false" or empty *)
330- if namespace = None then begin
00331 List.iter (fun (attr_name, attr_value) ->
332 let attr_lower = String.lowercase_ascii attr_name in
333 if attr_lower = "spellcheck" then begin
···336 Message_collector.add_typed collector
337 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
338 end
339- ) attrs
340- end;
341342 (* Validate enterkeyhint attribute - must be one of specific values *)
343- if namespace = None then begin
00344 let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in
345 List.iter (fun (attr_name, attr_value) ->
346 let attr_lower = String.lowercase_ascii attr_name in
···350 Message_collector.add_typed collector
351 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
352 end
353- ) attrs
354- end;
355356 (* Validate headingoffset attribute - must be a number between 0 and 8 *)
357- if namespace = None then begin
00358 List.iter (fun (attr_name, attr_value) ->
359 let attr_lower = String.lowercase_ascii attr_name in
360 if attr_lower = "headingoffset" then begin
···370 if not is_valid then
371 Message_collector.add_typed collector (`Misc `Headingoffset_invalid)
372 end
373- ) attrs
374- end;
375376 (* Validate accesskey attribute - each key label must be a single code point *)
377- if namespace = None then begin
00378 List.iter (fun (attr_name, attr_value) ->
379 let attr_lower = String.lowercase_ascii attr_name in
380 if attr_lower = "accesskey" then begin
···419 in
420 find_duplicates [] keys
421 end
422- ) attrs
423- end;
424425 (* Validate that command and popovertarget cannot have aria-expanded *)
426- if namespace = None && name_lower = "button" then begin
427- let has_command = has_attr "command" attrs in
428- let has_popovertarget = has_attr "popovertarget" attrs in
429- let has_aria_expanded = has_attr "aria-expanded" attrs in
00430431 if has_command && has_aria_expanded then
432 Message_collector.add_typed collector
···437 Message_collector.add_typed collector
438 (`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
439 `Condition "a \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute")))
440- end;
441442 (* Note: data-* uppercase check requires XML parsing which preserves case.
443 The HTML5 parser normalizes attribute names to lowercase, so this check
···446 ignore state.is_xhtml;
447448 (* Validate media attribute on link, style, source elements *)
449- if namespace = None && (name_lower = "link" || name_lower = "style" || name_lower = "source") then begin
000450 List.iter (fun (attr_name, attr_value) ->
451 let attr_lower = String.lowercase_ascii attr_name in
452 if attr_lower = "media" then begin
···462 end
463 end
464 ) attrs
465- end;
466467 (* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *)
468- if namespace = None then begin
00469 List.iter (fun (attr_name, attr_value) ->
470 let attr_lower = String.lowercase_ascii attr_name in
471 if attr_lower = "prefix" then begin
···487 end
488 end
489 end
490- ) attrs
491- end
492493let end_element _state ~name:_ ~namespace:_ _collector = ()
494let characters _state _text _collector = ()
···41let create () = { is_xhtml = false }
42let reset state = state.is_xhtml <- false
43000000000044(** Input types that allow the list attribute. *)
45let input_types_allowing_list = [
46 "color"; "date"; "datetime-local"; "email"; "month"; "number";
···5758 (* Detect XHTML mode from xmlns attribute on html element *)
59 if name_lower = "html" then begin
60+ match Attr_utils.get_attr "xmlns" attrs with
061 | Some "http://www.w3.org/1999/xhtml" -> state.is_xhtml <- true
62 | _ -> ()
63 end;
6465 (* Check HTML element attribute restrictions *)
66+ (match namespace with
67+ | Some _ -> ()
68+ | None ->
69 match List.assoc_opt name_lower disallowed_attrs_html with
70 | Some disallowed ->
71 List.iter (fun attr ->
72+ if Attr_utils.has_attr attr attrs then
73 report_disallowed_attr name_lower attr collector
74 ) disallowed
75+ | None -> ());
07677 (* Check for xml:base attribute - not allowed in HTML *)
78+ (match namespace with
79+ | Some _ -> ()
80+ | None when name_lower = "html" ->
81+ if Attr_utils.has_attr "xml:base" attrs then
82 report_disallowed_attr name_lower "xml:base" collector
83+ | None -> ());
8485 (* Check for xmlns:* prefixed attributes - not allowed in HTML *)
86 (* Standard xmlns declarations are allowed but custom prefixes are not *)
87+ (match namespace with
88+ | Some _ -> ()
89+ | None ->
90 List.iter (fun (attr_name, _) ->
91 let attr_lower = String.lowercase_ascii attr_name in
92+ if String.starts_with ~prefix:"xmlns:" attr_lower then begin
93 let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in
94 (* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *)
95 if prefix <> "xlink" && prefix <> "xml" then
96 Message_collector.add_typed collector
97 (`Attr (`Not_allowed_here (`Attr attr_name)))
98 end
99+ ) attrs);
0100101 (* Check SVG element restrictions - works in both HTML-embedded and XHTML SVG *)
102 (* xml:id is never valid on SVG elements in HTML5 *)
103 if List.mem name_lower svg_no_xml_id then begin
104+ if Attr_utils.has_attr "xml:id" attrs then
105 report_disallowed_attr name_lower "xml:id" collector
106 end;
107108 (* SVG feConvolveMatrix requires order attribute *)
109 if name_lower = "feconvolvematrix" then begin
110+ if not (Attr_utils.has_attr "order" attrs) then
111 Message_collector.add_typed collector
112 (`Svg (`Missing_attr (`Elem "feConvolveMatrix", `Attr "order")))
113 end;
114115 (* Validate style type attribute - must be "text/css" or omitted *)
116+ (match namespace with
117+ | Some _ -> ()
118+ | None when name_lower = "style" ->
119 List.iter (fun (attr_name, attr_value) ->
120 let attr_lower = String.lowercase_ascii attr_name in
121 if attr_lower = "type" then begin
···124 Message_collector.add_typed collector (`Misc `Style_type_invalid)
125 end
126 ) attrs
127+ | None -> ());
128129 (* Validate object element requires data or type attribute *)
130+ (match namespace with
131+ | Some _ -> ()
132+ | None when name_lower = "object" ->
133+ let has_data = Attr_utils.has_attr "data" attrs in
134+ let has_type = Attr_utils.has_attr "type" attrs in
135 if not has_data && not has_type then
136 Message_collector.add_typed collector
137 (`Attr (`Missing (`Elem "object", `Attr "data")))
138+ | None -> ());
139140 (* Validate link imagesizes/imagesrcset attributes *)
141+ (match namespace with
142+ | Some _ -> ()
143+ | None when name_lower = "link" ->
144+ let has_imagesizes = Attr_utils.has_attr "imagesizes" attrs in
145+ let has_imagesrcset = Attr_utils.has_attr "imagesrcset" attrs in
146+ let rel_value = Attr_utils.get_attr "rel" attrs in
147+ let as_value = Attr_utils.get_attr "as" attrs in
148149 (* imagesizes requires imagesrcset *)
150 if has_imagesizes && not has_imagesrcset then
···174 if not rel_is_preload then
175 Message_collector.add_typed collector (`Link `As_requires_preload)
176 | None -> ())
177+ | None -> ());
178179 (* Validate img usemap attribute - must be hash-name reference with content *)
180+ (match namespace with
181+ | Some _ -> ()
182+ | None when name_lower = "img" ->
183 List.iter (fun (attr_name, attr_value) ->
184 let attr_lower = String.lowercase_ascii attr_name in
185 if attr_lower = "usemap" then begin
···190 attr_value attr_name name))))
191 end
192 ) attrs
193+ | None -> ());
194195 (* Validate embed type attribute - must be valid MIME type *)
196+ (match namespace with
197+ | Some _ -> ()
198+ | None when name_lower = "embed" ->
199 List.iter (fun (attr_name, attr_value) ->
200 let attr_lower = String.lowercase_ascii attr_name in
201 if attr_lower = "type" then begin
···208 attr_value attr_name name msg))))
209 end
210 ) attrs
211+ | None -> ());
212213 (* Validate width/height on embed and img - must be non-negative integers *)
214+ let is_dimension_element = name_lower = "embed" || name_lower = "img" ||
215+ name_lower = "video" || name_lower = "canvas" ||
216+ name_lower = "iframe" || name_lower = "source" in
217+ (match namespace with
218+ | Some _ -> ()
219+ | None when is_dimension_element ->
220 List.iter (fun (attr_name, attr_value) ->
221 let attr_lower = String.lowercase_ascii attr_name in
222 if attr_lower = "width" || attr_lower = "height" then begin
···261 end
262 end
263 ) attrs
264+ | None -> ());
265266 (* Validate area[shape=default] cannot have coords *)
267+ (match namespace with
268+ | Some _ -> ()
269+ | None when name_lower = "area" ->
270+ (match Attr_utils.get_attr "shape" attrs with
271 | Some s when String.lowercase_ascii (String.trim s) = "default" ->
272+ if Attr_utils.has_attr "coords" attrs then
273 Message_collector.add_typed collector
274 (`Attr (`Not_allowed (`Attr "coords", `Elem "area")))
275+ | _ -> ())
276+ | None -> ());
277278 (* Validate bdo element requires dir attribute, and dir cannot be "auto" *)
279+ (match namespace with
280+ | Some _ -> ()
281+ | None when name_lower = "bdo" ->
282+ (match Attr_utils.get_attr "dir" attrs with
283 | None ->
284 Message_collector.add_typed collector (`Misc `Bdo_missing_dir)
285 | Some v when String.lowercase_ascii (String.trim v) = "auto" ->
286 Message_collector.add_typed collector (`Misc `Bdo_dir_auto)
287+ | _ -> ())
288+ | None -> ());
289290 (* Validate input list attribute - only allowed for certain types *)
291+ (match namespace with
292+ | Some _ -> ()
293+ | None when name_lower = "input" ->
294+ if Attr_utils.has_attr "list" attrs then begin
295+ let input_type = Attr_utils.get_attr_or "type" ~default:"text" attrs
296+ |> String.trim |> String.lowercase_ascii in
297 if not (List.mem input_type input_types_allowing_list) then
298 Message_collector.add_typed collector (`Input `List_not_allowed)
299 end
300+ | None -> ());
301302 (* Validate data-* attributes *)
303+ (match namespace with
304+ | Some _ -> ()
305+ | None ->
306 List.iter (fun (attr_name, _) ->
307 let attr_lower = String.lowercase_ascii attr_name in
308 (* Check if it starts with "data-" *)
309+ if String.starts_with ~prefix:"data-" attr_lower then begin
310 let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in
311 (* Check if it's exactly "data-" with nothing after *)
312 if after_prefix = "" then
···316 Message_collector.add_typed collector
317 (`Attr (`Data_invalid_name (`Reason "must be XML 1.0 4th ed. plus Namespaces NCNames")))
318 end
319+ ) attrs);
0320321 (* Validate xml:lang must have matching lang attribute - only in HTML mode, not XHTML *)
322+ (match namespace with
323+ | Some _ -> ()
324+ | None when not state.is_xhtml ->
325+ let xmllang_value = Attr_utils.get_attr "xml:lang" attrs in
326+ let lang_value = Attr_utils.get_attr "lang" attrs in
327+ (match xmllang_value with
328 | Some xmllang ->
329 (match lang_value with
330 | None ->
0331 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
332 | Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang ->
0333 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
334 | _ -> ())
335+ | None -> ())
336+ | None -> ());
337338 (* Validate spellcheck attribute - must be "true" or "false" or empty *)
339+ (match namespace with
340+ | Some _ -> ()
341+ | None ->
342 List.iter (fun (attr_name, attr_value) ->
343 let attr_lower = String.lowercase_ascii attr_name in
344 if attr_lower = "spellcheck" then begin
···347 Message_collector.add_typed collector
348 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
349 end
350+ ) attrs);
0351352 (* Validate enterkeyhint attribute - must be one of specific values *)
353+ (match namespace with
354+ | Some _ -> ()
355+ | None ->
356 let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in
357 List.iter (fun (attr_name, attr_value) ->
358 let attr_lower = String.lowercase_ascii attr_name in
···362 Message_collector.add_typed collector
363 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
364 end
365+ ) attrs);
0366367 (* Validate headingoffset attribute - must be a number between 0 and 8 *)
368+ (match namespace with
369+ | Some _ -> ()
370+ | None ->
371 List.iter (fun (attr_name, attr_value) ->
372 let attr_lower = String.lowercase_ascii attr_name in
373 if attr_lower = "headingoffset" then begin
···383 if not is_valid then
384 Message_collector.add_typed collector (`Misc `Headingoffset_invalid)
385 end
386+ ) attrs);
0387388 (* Validate accesskey attribute - each key label must be a single code point *)
389+ (match namespace with
390+ | Some _ -> ()
391+ | None ->
392 List.iter (fun (attr_name, attr_value) ->
393 let attr_lower = String.lowercase_ascii attr_name in
394 if attr_lower = "accesskey" then begin
···433 in
434 find_duplicates [] keys
435 end
436+ ) attrs);
0437438 (* Validate that command and popovertarget cannot have aria-expanded *)
439+ (match namespace with
440+ | Some _ -> ()
441+ | None when name_lower = "button" ->
442+ let has_command = Attr_utils.has_attr "command" attrs in
443+ let has_popovertarget = Attr_utils.has_attr "popovertarget" attrs in
444+ let has_aria_expanded = Attr_utils.has_attr "aria-expanded" attrs in
445446 if has_command && has_aria_expanded then
447 Message_collector.add_typed collector
···452 Message_collector.add_typed collector
453 (`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
454 `Condition "a \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute")))
455+ | None -> ());
456457 (* Note: data-* uppercase check requires XML parsing which preserves case.
458 The HTML5 parser normalizes attribute names to lowercase, so this check
···461 ignore state.is_xhtml;
462463 (* Validate media attribute on link, style, source elements *)
464+ let is_media_element = name_lower = "link" || name_lower = "style" || name_lower = "source" in
465+ (match namespace with
466+ | Some _ -> ()
467+ | None when is_media_element ->
468 List.iter (fun (attr_name, attr_value) ->
469 let attr_lower = String.lowercase_ascii attr_name in
470 if attr_lower = "media" then begin
···480 end
481 end
482 ) attrs
483+ | None -> ());
484485 (* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *)
486+ (match namespace with
487+ | Some _ -> ()
488+ | None ->
489 List.iter (fun (attr_name, attr_value) ->
490 let attr_lower = String.lowercase_ascii attr_name in
491 if attr_lower = "prefix" then begin
···507 end
508 end
509 end
510+ ) attrs)
0511512let end_element _state ~name:_ ~namespace:_ _collector = ()
513let characters _state _text _collector = ()
+5-12
lib/htmlrw_check/specialized/base_checker.ml
···11let reset state =
12 state.seen_link_or_script <- false
1314-(** Check if an attribute list contains a specific attribute. *)
15-let has_attr name attrs =
16- List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
17-18let start_element state ~name ~namespace ~attrs collector =
19- if namespace <> None then ()
20- else begin
21- let name_lower = String.lowercase_ascii name in
22- match name_lower with
23 | "link" | "script" ->
24 state.seen_link_or_script <- true
25 | "base" ->
26 if state.seen_link_or_script then
27 Message_collector.add_typed collector (`Misc `Base_after_link_script);
28 (* base element must have href or target attribute *)
29- let has_href = has_attr "href" attrs in
30- let has_target = has_attr "target" attrs in
31- if not has_href && not has_target then
32 Message_collector.add_typed collector (`Misc `Base_missing_href_or_target)
33 | _ -> ()
34- end
3536let end_element _state ~name:_ ~namespace:_ _collector = ()
37let characters _state _text _collector = ()
···11let reset state =
12 state.seen_link_or_script <- false
13000014let start_element state ~name ~namespace ~attrs collector =
15+ match namespace with
16+ | Some _ -> ()
17+ | None ->
18+ match String.lowercase_ascii name with
19 | "link" | "script" ->
20 state.seen_link_or_script <- true
21 | "base" ->
22 if state.seen_link_or_script then
23 Message_collector.add_typed collector (`Misc `Base_after_link_script);
24 (* base element must have href or target attribute *)
25+ if not (Attr_utils.has_attr "href" attrs || Attr_utils.has_attr "target" attrs) then
0026 Message_collector.add_typed collector (`Misc `Base_missing_href_or_target)
27 | _ -> ()
02829let end_element _state ~name:_ ~namespace:_ _collector = ()
30let characters _state _text _collector = ()
+1-6
lib/htmlrw_check/specialized/dl_checker.ml
···57 | ctx :: _ -> Some ctx
58 | [] -> None
5960-let get_attr name attrs =
61- List.find_map (fun (n, v) ->
62- if String.lowercase_ascii n = name then Some v else None
63- ) attrs
64-65let start_element state ~name ~namespace ~attrs collector =
66 let name_lower = String.lowercase_ascii name in
67···115 Message_collector.add_typed collector
116 (`Element (`Not_allowed_as_child (`Child "div", `Parent "dl")));
117 (* Check that role is only presentation or none *)
118- (match get_attr "role" attrs with
119 | Some role_value ->
120 let role_lower = String.lowercase_ascii (String.trim role_value) in
121 if role_lower <> "presentation" && role_lower <> "none" then
···57 | ctx :: _ -> Some ctx
58 | [] -> None
590000060let start_element state ~name ~namespace ~attrs collector =
61 let name_lower = String.lowercase_ascii name in
62···110 Message_collector.add_typed collector
111 (`Element (`Not_allowed_as_child (`Child "div", `Parent "dl")));
112 (* Check that role is only presentation or none *)
113+ (match Attr_utils.get_attr "role" attrs with
114 | Some role_value ->
115 let role_lower = String.lowercase_ascii (String.trim role_value) in
116 if role_lower <> "presentation" && role_lower <> "none" then
+42-76
lib/htmlrw_check/specialized/picture_checker.ml
···66 state.always_matching_is_media_all <- false;
67 state.always_matching_is_media_empty <- false
6869-(** Check if an attribute list contains a specific attribute. *)
70-let has_attr name attrs =
71- List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
72-73(** Report disallowed attribute error *)
74let report_disallowed_attr element attr collector =
75 Message_collector.add_typed collector
···80 Message_collector.add_typed collector
81 (`Element (`Not_allowed_as_child (`Child child, `Parent parent)))
8200000083let check_picture_attrs attrs collector =
84- List.iter (fun disallowed ->
85- if has_attr disallowed attrs then
86- report_disallowed_attr "picture" disallowed collector
87- ) disallowed_picture_attrs
8889let check_source_attrs_in_picture attrs collector =
90- List.iter (fun disallowed ->
91- if has_attr disallowed attrs then
92- report_disallowed_attr "source" disallowed collector
93- ) disallowed_source_attrs_in_picture;
94- (* source in picture requires srcset *)
95- if not (has_attr "srcset" attrs) then
96- Message_collector.add_typed collector
97- (`Srcset `Source_missing_srcset)
9899let check_img_attrs attrs collector =
100- List.iter (fun disallowed ->
101- if has_attr disallowed attrs then
102- report_disallowed_attr "img" disallowed collector
103- ) disallowed_img_attrs
104105let start_element state ~name ~namespace ~attrs collector =
106 let name_lower = String.lowercase_ascii name in
···112 end;
113114 (* Rest of checks only apply to HTML namespace elements *)
115- if namespace = None then begin
116- match name_lower with
00117 | "picture" ->
118 (* Check if picture is in a disallowed parent context *)
119 (match state.parent_stack with
···124 check_picture_attrs attrs collector;
125 state.in_picture <- true;
126 state.has_img_in_picture <- false;
127- state.picture_depth <- 0; (* Will be incremented to 1 at end of function *)
128 state.children_in_picture <- [];
129 state.last_was_img <- false;
130 state.has_source_after_img <- false;
···136 state.children_in_picture <- "source" :: state.children_in_picture;
137 if state.last_was_img then
138 state.has_source_after_img <- true;
139- (* Check for always-matching source followed by another source *)
140 if state.has_always_matching_source then
141 state.source_after_always_matching <- true;
142- (* A source is "always matching" if it has:
143- - no media and no type attribute, OR
144- - media attribute with empty/whitespace-only value, OR
145- - media="all" (with optional whitespace) *)
146- let media_value = List.find_map (fun (attr_name, v) ->
147- if String.lowercase_ascii attr_name = "media" then Some v else None
148- ) attrs in
149- let has_type = has_attr "type" attrs in
150 let is_media_all = match media_value with
151 | Some v -> String.lowercase_ascii (String.trim v) = "all"
152- | None -> false
153- in
154 let is_media_empty = match media_value with
155 | Some v -> String.trim v = ""
156- | None -> false
157- in
158 let is_always_matching = match media_value with
159- | None -> not has_type (* no media, check if no type either *)
160 | Some v ->
161 let trimmed = String.trim v in
162 trimmed = "" || String.lowercase_ascii trimmed = "all"
163 in
164 if is_always_matching then begin
165 state.has_always_matching_source <- true;
166- if is_media_all then
167- state.always_matching_is_media_all <- true
168- else if is_media_empty then
169- state.always_matching_is_media_empty <- true
170 end
171172 | "img" when state.in_picture && state.picture_depth = 1 ->
···174 state.has_img_in_picture <- true;
175 state.children_in_picture <- "img" :: state.children_in_picture;
176 state.last_was_img <- true;
177- (* Check for multiple img elements *)
178- let img_count = List.filter (fun c -> c = "img") state.children_in_picture |> List.length in
179 if img_count > 1 then
180 report_disallowed_child "picture" "img" collector;
181- (* Check if always-matching source is followed by img with srcset *)
182- if state.has_always_matching_source && has_attr "srcset" attrs then begin
183- if state.always_matching_is_media_all then
184- Message_collector.add_typed collector (`Misc `Media_all)
185- else if state.always_matching_is_media_empty then
186- Message_collector.add_typed collector (`Misc `Media_empty)
187- else
188- Message_collector.add_typed collector (`Srcset `Source_needs_media_or_type)
189- end
190191 | "script" when state.in_picture && state.picture_depth = 1 ->
192 state.children_in_picture <- "script" :: state.children_in_picture
···197 | "img" ->
198 check_img_attrs attrs collector
199200- | _ -> ()
201- end;
202203 (* Track depth when inside picture *)
204 if state.in_picture then
···209 state.parent_stack <- name_lower :: state.parent_stack
210211let end_element state ~name ~namespace collector =
212- if namespace <> None then ()
213- else begin
0214 let name_lower = String.lowercase_ascii name in
215216- (* Track depth *)
217 if state.in_picture then
218 state.picture_depth <- state.picture_depth - 1;
219220 if name_lower = "picture" && state.picture_depth = 0 then begin
221- (* Check if picture had img child *)
222 if not state.has_img_in_picture then
223- Message_collector.add_typed collector
224- (`Srcset `Picture_missing_img);
225- (* Check for source after img *)
226 if state.has_source_after_img then
227 report_disallowed_child "picture" "source" collector;
228- (* Check for source after always-matching source *)
229- if state.source_after_always_matching then begin
230- if state.always_matching_is_media_all then
231- Message_collector.add_typed collector (`Misc `Media_all)
232- else if state.always_matching_is_media_empty then
233- Message_collector.add_typed collector (`Misc `Media_empty)
234- else
235- Message_collector.add_typed collector (`Srcset `Source_needs_media_or_type)
236- end;
237-238 state.in_picture <- false
239 end;
240241- (* Pop from parent stack *)
242- state.parent_stack <- (match state.parent_stack with
243- | _ :: rest -> rest
244- | [] -> [])
245- end
246247let characters state text collector =
248 (* Text in picture element is not allowed *)
···66 state.always_matching_is_media_all <- false;
67 state.always_matching_is_media_empty <- false
68000069(** Report disallowed attribute error *)
70let report_disallowed_attr element attr collector =
71 Message_collector.add_typed collector
···76 Message_collector.add_typed collector
77 (`Element (`Not_allowed_as_child (`Child child, `Parent parent)))
7879+let check_disallowed_attrs element disallowed_list attrs collector =
80+ List.iter (fun attr ->
81+ if Attr_utils.has_attr attr attrs then
82+ report_disallowed_attr element attr collector
83+ ) disallowed_list
84+85let check_picture_attrs attrs collector =
86+ check_disallowed_attrs "picture" disallowed_picture_attrs attrs collector
0008788let check_source_attrs_in_picture attrs collector =
89+ check_disallowed_attrs "source" disallowed_source_attrs_in_picture attrs collector;
90+ if not (Attr_utils.has_attr "srcset" attrs) then
91+ Message_collector.add_typed collector (`Srcset `Source_missing_srcset)
000009293let check_img_attrs attrs collector =
94+ check_disallowed_attrs "img" disallowed_img_attrs attrs collector
0009596let start_element state ~name ~namespace ~attrs collector =
97 let name_lower = String.lowercase_ascii name in
···103 end;
104105 (* Rest of checks only apply to HTML namespace elements *)
106+ match namespace with
107+ | Some _ -> ()
108+ | None ->
109+ (match name_lower with
110 | "picture" ->
111 (* Check if picture is in a disallowed parent context *)
112 (match state.parent_stack with
···117 check_picture_attrs attrs collector;
118 state.in_picture <- true;
119 state.has_img_in_picture <- false;
120+ state.picture_depth <- 0;
121 state.children_in_picture <- [];
122 state.last_was_img <- false;
123 state.has_source_after_img <- false;
···129 state.children_in_picture <- "source" :: state.children_in_picture;
130 if state.last_was_img then
131 state.has_source_after_img <- true;
0132 if state.has_always_matching_source then
133 state.source_after_always_matching <- true;
134+ (* A source is "always matching" if it has no media/type, or media="" or media="all" *)
135+ let media_value = Attr_utils.get_attr "media" attrs in
136+ let has_type = Attr_utils.has_attr "type" attrs in
00000137 let is_media_all = match media_value with
138 | Some v -> String.lowercase_ascii (String.trim v) = "all"
139+ | None -> false in
0140 let is_media_empty = match media_value with
141 | Some v -> String.trim v = ""
142+ | None -> false in
0143 let is_always_matching = match media_value with
144+ | None -> not has_type
145 | Some v ->
146 let trimmed = String.trim v in
147 trimmed = "" || String.lowercase_ascii trimmed = "all"
148 in
149 if is_always_matching then begin
150 state.has_always_matching_source <- true;
151+ (* Only set flags to true, never reset to false *)
152+ if is_media_all then state.always_matching_is_media_all <- true;
153+ if is_media_empty then state.always_matching_is_media_empty <- true
0154 end
155156 | "img" when state.in_picture && state.picture_depth = 1 ->
···158 state.has_img_in_picture <- true;
159 state.children_in_picture <- "img" :: state.children_in_picture;
160 state.last_was_img <- true;
161+ let img_count = List.length (List.filter (( = ) "img") state.children_in_picture) in
0162 if img_count > 1 then
163 report_disallowed_child "picture" "img" collector;
164+ if state.has_always_matching_source && Attr_utils.has_attr "srcset" attrs then
165+ Message_collector.add_typed collector
166+ (if state.always_matching_is_media_all then `Misc `Media_all
167+ else if state.always_matching_is_media_empty then `Misc `Media_empty
168+ else `Srcset `Source_needs_media_or_type)
0000169170 | "script" when state.in_picture && state.picture_depth = 1 ->
171 state.children_in_picture <- "script" :: state.children_in_picture
···176 | "img" ->
177 check_img_attrs attrs collector
178179+ | _ -> ());
0180181 (* Track depth when inside picture *)
182 if state.in_picture then
···187 state.parent_stack <- name_lower :: state.parent_stack
188189let end_element state ~name ~namespace collector =
190+ match namespace with
191+ | Some _ -> ()
192+ | None ->
193 let name_lower = String.lowercase_ascii name in
1940195 if state.in_picture then
196 state.picture_depth <- state.picture_depth - 1;
197198 if name_lower = "picture" && state.picture_depth = 0 then begin
0199 if not state.has_img_in_picture then
200+ Message_collector.add_typed collector (`Srcset `Picture_missing_img);
00201 if state.has_source_after_img then
202 report_disallowed_child "picture" "source" collector;
203+ if state.source_after_always_matching then
204+ Message_collector.add_typed collector
205+ (if state.always_matching_is_media_all then `Misc `Media_all
206+ else if state.always_matching_is_media_empty then `Misc `Media_empty
207+ else `Srcset `Source_needs_media_or_type);
00000208 state.in_picture <- false
209 end;
210211+ state.parent_stack <- match state.parent_stack with _ :: rest -> rest | [] -> []
0000212213let characters state text collector =
214 (* Text in picture element is not allowed *)
+4-8
lib/htmlrw_check/specialized/source_checker.ml
···23 | ctx :: _ -> ctx
24 | [] -> Other
2526-(** Check if an attribute list contains a specific attribute. *)
27-let has_attr name attrs =
28- List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
29-30let start_element state ~name ~namespace ~attrs collector =
31 if namespace <> None then ()
32 else begin
···42 let ctx = current_context state in
43 begin match ctx with
44 | Video | Audio ->
45- if has_attr "srcset" attrs then
46 Message_collector.add_typed collector
47 (`Attr (`Not_allowed (`Attr "srcset", `Elem "source")));
48- if has_attr "sizes" attrs then
49 Message_collector.add_typed collector
50 (`Attr (`Not_allowed (`Attr "sizes", `Elem "source")));
51- if has_attr "width" attrs then
52 Message_collector.add_typed collector
53 (`Attr (`Not_allowed (`Attr "width", `Elem "source")));
54- if has_attr "height" attrs then
55 Message_collector.add_typed collector
56 (`Attr (`Not_allowed (`Attr "height", `Elem "source")))
57 | Picture | Other -> ()
···23 | ctx :: _ -> ctx
24 | [] -> Other
25000026let start_element state ~name ~namespace ~attrs collector =
27 if namespace <> None then ()
28 else begin
···38 let ctx = current_context state in
39 begin match ctx with
40 | Video | Audio ->
41+ if Attr_utils.has_attr "srcset" attrs then
42 Message_collector.add_typed collector
43 (`Attr (`Not_allowed (`Attr "srcset", `Elem "source")));
44+ if Attr_utils.has_attr "sizes" attrs then
45 Message_collector.add_typed collector
46 (`Attr (`Not_allowed (`Attr "sizes", `Elem "source")));
47+ if Attr_utils.has_attr "width" attrs then
48 Message_collector.add_typed collector
49 (`Attr (`Not_allowed (`Attr "width", `Elem "source")));
50+ if Attr_utils.has_attr "height" attrs then
51 Message_collector.add_typed collector
52 (`Attr (`Not_allowed (`Attr "height", `Elem "source")))
53 | Picture | Other -> ()
···14let create () = ()
15let reset _state = ()
1617-(** Get attribute value *)
18-let get_attr name attrs =
19- List.find_map (fun (n, v) ->
20- if String.lowercase_ascii n = name then Some v else None
21- ) attrs
22-23(** Split string on a character while respecting parentheses *)
24let split_respecting_parens ~sep s =
25 let len = String.length s in
···971972 (* SVG image elements should not have srcset *)
973 if namespace <> None && name_lower = "image" then begin
974- if get_attr "srcset" attrs <> None then
975 Message_collector.add_typed collector
976 (`Attr (`Not_allowed (`Attr "srcset", `Elem "image")))
977 end;
···980 else begin
981 (* Check sizes and srcset on img and source *)
982 if name_lower = "img" || name_lower = "source" then begin
983- let sizes_value = get_attr "sizes" attrs in
984- let srcset_value = get_attr "srcset" attrs in
985 let has_sizes = sizes_value <> None in
986 let has_srcset = srcset_value <> None in
987
···14let create () = ()
15let reset _state = ()
1600000017(** Split string on a character while respecting parentheses *)
18let split_respecting_parens ~sep s =
19 let len = String.length s in
···965966 (* SVG image elements should not have srcset *)
967 if namespace <> None && name_lower = "image" then begin
968+ if Attr_utils.get_attr "srcset" attrs <> None then
969 Message_collector.add_typed collector
970 (`Attr (`Not_allowed (`Attr "srcset", `Elem "image")))
971 end;
···974 else begin
975 (* Check sizes and srcset on img and source *)
976 if name_lower = "img" || name_lower = "source" then begin
977+ let sizes_value = Attr_utils.get_attr "sizes" attrs in
978+ let srcset_value = Attr_utils.get_attr "srcset" attrs in
979 let has_sizes = sizes_value <> None in
980 let has_srcset = srcset_value <> None in
981