···11+(** Common attribute utilities used across checkers. *)
22+33+type attrs = (string * string) list
44+55+let has_attr name attrs =
66+ List.exists (fun (n, _) -> String.lowercase_ascii n = name) attrs
77+88+let get_attr name attrs =
99+ List.find_map (fun (n, v) ->
1010+ if String.lowercase_ascii n = name then Some v else None
1111+ ) attrs
1212+1313+let get_attr_or name ~default attrs =
1414+ Option.value ~default (get_attr name attrs)
1515+1616+let is_non_empty_attr name attrs =
1717+ match get_attr name attrs with
1818+ | Some v -> String.trim v <> ""
1919+ | None -> false
+2-8
lib/htmlrw_check/datatype/dt_color.ml
···213213 if String.length s = 0 then Error "Color value must not be empty"
214214 else if List.mem s named_colors then Ok ()
215215 else if String.length s > 0 && s.[0] = '#' then validate_hex_color s
216216- else if
217217- String.length s > 4
218218- && (String.sub s 0 4 = "rgb(" || String.sub s 0 5 = "rgba(")
219219- then
216216+ else if String.starts_with ~prefix:"rgb(" s || String.starts_with ~prefix:"rgba(" s then
220217 (* Basic validation for rgb/rgba - just check balanced parens *)
221218 if s.[String.length s - 1] = ')' then Ok ()
222219 else Error "rgb/rgba function must end with ')'"
223223- else if
224224- String.length s > 4
225225- && (String.sub s 0 4 = "hsl(" || String.sub s 0 5 = "hsla(")
226226- then
220220+ else if String.starts_with ~prefix:"hsl(" s || String.starts_with ~prefix:"hsla(" s then
227221 (* Basic validation for hsl/hsla - just check balanced parens *)
228222 if s.[String.length s - 1] = ')' then Ok ()
229223 else Error "hsl/hsla function must end with ')'"
+2-2
lib/htmlrw_check/datatype/dt_media_query.ml
···330330331331 (* Get base feature name for error messages (strip min-/max- prefix) *)
332332 let base_feature =
333333- if String.length feature > 4 && String.sub feature 0 4 = "min-" then
333333+ if String.starts_with ~prefix:"min-" feature then
334334 String.sub feature 4 (String.length feature - 4)
335335- else if String.length feature > 4 && String.sub feature 0 4 = "max-" then
335335+ else if String.starts_with ~prefix:"max-" feature then
336336 String.sub feature 4 (String.length feature - 4)
337337 else
338338 feature
+77-102
lib/htmlrw_check/message_format.ml
···11+(** Get effective system_id, preferring location's system_id over the passed one *)
22+let get_system_id ?system_id loc_system_id =
33+ loc_system_id
44+ |> Option.fold ~none:system_id ~some:Option.some
55+ |> Option.value ~default:"input"
66+17let format_text ?system_id messages =
28 let buf = Buffer.create 1024 in
33- List.iter
44- (fun msg ->
55- let loc_str =
66- match msg.Message.location with
77- | Some loc -> (
88- let sid =
99- match loc.Message.system_id with
1010- | Some s -> s
1111- | None -> (
1212- match system_id with Some s -> s | None -> "input")
1313- in
1414- let col_info =
1515- match (loc.end_line, loc.end_column) with
1616- | Some el, Some ec when el = loc.line && ec > loc.column ->
1717- Printf.sprintf "%d.%d-%d" loc.line loc.column ec
1818- | Some el, Some ec when el > loc.line ->
1919- Printf.sprintf "%d.%d-%d.%d" loc.line loc.column el ec
2020- | _ -> Printf.sprintf "%d.%d" loc.line loc.column
2121- in
2222- Printf.sprintf "%s:%s" sid col_info)
2323- | None -> (
2424- match system_id with Some s -> s | None -> "input")
2525- in
2626- let severity_str = Message.severity_to_string msg.Message.severity in
2727- let code_str = " [" ^ Message.error_code_to_string msg.Message.error_code ^ "]" in
2828- let elem_str =
2929- match msg.Message.element with
3030- | Some e -> " (element: " ^ e ^ ")"
3131- | None -> ""
3232- in
3333- let attr_str =
3434- match msg.Message.attribute with
3535- | Some a -> " (attribute: " ^ a ^ ")"
3636- | None -> ""
3737- in
3838- Buffer.add_string buf
3939- (Printf.sprintf "%s: %s%s: %s%s%s\n" loc_str severity_str code_str
4040- msg.Message.message elem_str attr_str))
4141- messages;
99+ List.iter (fun msg ->
1010+ let loc_str = match msg.Message.location with
1111+ | Some loc ->
1212+ let sid = get_system_id ?system_id loc.Message.system_id in
1313+ let col_info = match loc.end_line, loc.end_column with
1414+ | Some el, Some ec when el = loc.line && ec > loc.column ->
1515+ Printf.sprintf "%d.%d-%d" loc.line loc.column ec
1616+ | Some el, Some ec when el > loc.line ->
1717+ Printf.sprintf "%d.%d-%d.%d" loc.line loc.column el ec
1818+ | _ ->
1919+ Printf.sprintf "%d.%d" loc.line loc.column
2020+ in
2121+ Printf.sprintf "%s:%s" sid col_info
2222+ | None ->
2323+ Option.value system_id ~default:"input"
2424+ in
2525+ let elem_str = Option.fold ~none:"" ~some:(Printf.sprintf " (element: %s)") msg.Message.element in
2626+ let attr_str = Option.fold ~none:"" ~some:(Printf.sprintf " (attribute: %s)") msg.Message.attribute in
2727+ Buffer.add_string buf (Printf.sprintf "%s: %s [%s]: %s%s%s\n"
2828+ loc_str
2929+ (Message.severity_to_string msg.Message.severity)
3030+ (Message.error_code_to_string msg.Message.error_code)
3131+ msg.Message.message
3232+ elem_str
3333+ attr_str)
3434+ ) messages;
4235 Buffer.contents buf
43364437let format_gnu ?system_id messages =
4538 let buf = Buffer.create 1024 in
4646- List.iter
4747- (fun msg ->
4848- let loc_str =
4949- match msg.Message.location with
5050- | Some loc -> (
5151- let sid =
5252- match loc.Message.system_id with
5353- | Some s -> s
5454- | None -> (
5555- match system_id with Some s -> s | None -> "input")
5656- in
5757- Printf.sprintf "%s:%d:%d" sid loc.line loc.column)
5858- | None -> (
5959- match system_id with Some s -> s ^ ":0:0" | None -> "input:0:0")
6060- in
6161- let severity_str = Message.severity_to_string msg.Message.severity in
6262- let code_str = " [" ^ Message.error_code_to_string msg.Message.error_code ^ "]" in
6363- Buffer.add_string buf
6464- (Printf.sprintf "%s: %s%s: %s\n" loc_str severity_str code_str
6565- msg.Message.message))
6666- messages;
3939+ List.iter (fun msg ->
4040+ let loc_str = match msg.Message.location with
4141+ | Some loc ->
4242+ Printf.sprintf "%s:%d:%d"
4343+ (get_system_id ?system_id loc.Message.system_id)
4444+ loc.line loc.column
4545+ | None ->
4646+ Option.value system_id ~default:"input" ^ ":0:0"
4747+ in
4848+ Buffer.add_string buf (Printf.sprintf "%s: %s [%s]: %s\n"
4949+ loc_str
5050+ (Message.severity_to_string msg.Message.severity)
5151+ (Message.error_code_to_string msg.Message.error_code)
5252+ msg.Message.message)
5353+ ) messages;
6754 Buffer.contents buf
68556956let message_to_json ?system_id msg =
7057 let open Jsont in
7171- let severity = String (Message.severity_to_string msg.Message.severity, Meta.none) in
7272- let message_text = String (msg.Message.message, Meta.none) in
7373- let base = [ (("type", Meta.none), severity); (("message", Meta.none), message_text) ] in
7474- let with_code =
7575- (("subType", Meta.none), String (Message.error_code_to_string msg.Message.error_code, Meta.none)) :: base
7676- in
7777- let with_location =
7878- match msg.Message.location with
5858+ let str s = String (s, Meta.none) in
5959+ let num n = Number (float_of_int n, Meta.none) in
6060+ let field name value = ((name, Meta.none), value) in
6161+6262+ let base = [
6363+ field "type" (str (Message.severity_to_string msg.Message.severity));
6464+ field "message" (str msg.Message.message);
6565+ field "subType" (str (Message.error_code_to_string msg.Message.error_code));
6666+ ] in
6767+6868+ let with_location = match msg.Message.location with
7969 | Some loc ->
8080- let line = Number (float_of_int loc.Message.line, Meta.none) in
8181- let column = Number (float_of_int loc.Message.column, Meta.none) in
8282- let loc_fields =
8383- [ (("firstLine", Meta.none), line); (("firstColumn", Meta.none), column) ]
8484- in
8585- let loc_fields =
8686- match loc.Message.end_line with
8787- | Some el ->
8888- (("lastLine", Meta.none), Number (float_of_int el, Meta.none)) :: loc_fields
8989- | None -> loc_fields
9090- in
9191- let loc_fields =
9292- match loc.Message.end_column with
9393- | Some ec ->
9494- (("lastColumn", Meta.none), Number (float_of_int ec, Meta.none))
9595- :: loc_fields
9696- | None -> loc_fields
9797- in
9898- let url =
9999- match loc.Message.system_id with
100100- | Some s -> s
101101- | None -> (
102102- match system_id with Some s -> s | None -> "input")
103103- in
104104- (("url", Meta.none), String (url, Meta.none)) :: loc_fields @ with_code
7070+ let url = get_system_id ?system_id loc.Message.system_id in
7171+ let loc_fields = [
7272+ field "url" (str url);
7373+ field "firstLine" (num loc.line);
7474+ field "firstColumn" (num loc.column);
7575+ ] in
7676+ let loc_fields = Option.fold ~none:loc_fields
7777+ ~some:(fun el -> field "lastLine" (num el) :: loc_fields)
7878+ loc.Message.end_line in
7979+ let loc_fields = Option.fold ~none:loc_fields
8080+ ~some:(fun ec -> field "lastColumn" (num ec) :: loc_fields)
8181+ loc.Message.end_column in
8282+ loc_fields @ base
10583 | None ->
106106- let url =
107107- match system_id with Some s -> s | None -> "input"
108108- in
109109- (("url", Meta.none), String (url, Meta.none)) :: with_code
8484+ field "url" (str (Option.value system_id ~default:"input")) :: base
11085 in
111111- let with_extract =
112112- match msg.Message.extract with
113113- | Some e -> (("extract", Meta.none), String (e, Meta.none)) :: with_location
114114- | None -> with_location
115115- in
8686+8787+ let with_extract = Option.fold ~none:with_location
8888+ ~some:(fun e -> field "extract" (str e) :: with_location)
8989+ msg.Message.extract in
9090+11691 Object (with_extract, Meta.none)
1179211893let format_json ?system_id messages =
+10-10
lib/htmlrw_check/parse_error_bridge.ml
···1414 | Html5rw.Parse_error_code.Tree_construction_error s ->
1515 (* Check for control-character/noncharacter/surrogate with codepoint info *)
1616 (try
1717- if String.length s > 28 && String.sub s 0 28 = "control-character-in-input-s" then
1717+ if String.starts_with ~prefix:"control-character-in-input-s" s then
1818 let colon_pos = String.index s ':' in
1919 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
2020 let cp = int_of_string ("0x" ^ cp_str) in
2121 Printf.sprintf "Forbidden code point U+%04x." cp
2222- else if String.length s > 25 && String.sub s 0 25 = "noncharacter-in-input-str" then
2222+ else if String.starts_with ~prefix:"noncharacter-in-input-str" s then
2323 let colon_pos = String.index s ':' in
2424 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
2525 let cp = int_of_string ("0x" ^ cp_str) in
2626 Printf.sprintf "Forbidden code point U+%04x." cp
2727- else if String.length s > 22 && String.sub s 0 22 = "surrogate-in-input-str" then
2727+ else if String.starts_with ~prefix:"surrogate-in-input-str" s then
2828 let colon_pos = String.index s ':' in
2929 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
3030 let cp = int_of_string ("0x" ^ cp_str) in
3131 Printf.sprintf "Forbidden code point U+%04x." cp
3232 (* Character reference errors *)
3333- else if String.length s > 28 && String.sub s 0 28 = "control-character-reference:" then
3333+ else if String.starts_with ~prefix:"control-character-reference:" s then
3434 let cp_str = String.sub s 28 (String.length s - 28) in
3535 let cp = int_of_string ("0x" ^ cp_str) in
3636 if cp = 0x0D then
3737 "A numeric character reference expanded to carriage return."
3838 else
3939 Printf.sprintf "Character reference expands to a control character (U+%04x)." cp
4040- else if String.length s > 31 && String.sub s 0 31 = "noncharacter-character-referenc" then
4040+ else if String.starts_with ~prefix:"noncharacter-character-referenc" s then
4141 let colon_pos = String.index s ':' in
4242 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
4343 let cp = int_of_string ("0x" ^ cp_str) in
···4949 Printf.sprintf "Character reference expands to an astral non-character (U+%05x)." cp
5050 else
5151 Printf.sprintf "Character reference expands to a non-character (U+%04x)." cp
5252- else if String.length s > 36 && String.sub s 0 36 = "character-reference-outside-unicode-" then
5252+ else if String.starts_with ~prefix:"character-reference-outside-unicode-" s then
5353 "Character reference outside the permissible Unicode range."
5454- else if String.length s > 27 && String.sub s 0 27 = "surrogate-character-referen" then
5454+ else if String.starts_with ~prefix:"surrogate-character-referen" s then
5555 let colon_pos = String.index s ':' in
5656 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
5757 let cp = int_of_string ("0x" ^ cp_str) in
···6464 "End tag \xe2\x80\x9cbr\xe2\x80\x9d."
6565 else if s = "expected-closing-tag-but-got-eof" then
6666 "End of file seen and there were open elements."
6767- else if String.length s > 28 && String.sub s 0 28 = "bad-start-tag-in-head-noscri" then
6767+ else if String.starts_with ~prefix:"bad-start-tag-in-head-noscri" s then
6868 let colon_pos = String.index s ':' in
6969 let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
7070 Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element
7171- else if String.length s > 19 && String.sub s 0 19 = "unexpected-end-tag:" then
7171+ else if String.starts_with ~prefix:"unexpected-end-tag:" s then
7272 let element = String.sub s 19 (String.length s - 19) in
7373 Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element
7474- else if String.length s > 19 && String.sub s 0 19 = "start-tag-in-table:" then
7474+ else if String.starts_with ~prefix:"start-tag-in-table:" s then
7575 let tag = String.sub s 19 (String.length s - 19) in
7676 Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag
7777 else
+17-39
lib/htmlrw_check/semantic/autofocus_checker.ml
···33 Validates that only one element with autofocus attribute exists within
44 each dialog or popover context. *)
5566-(** Context for tracking autofocus elements. *)
76type context_type = Dialog | Popover
8798type context = {
···2625 state.context_stack <- [];
2726 state.current_depth <- 0
28272929-(** Check if an attribute list contains a specific attribute. *)
3030-let has_attr name attrs =
3131- List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
3232-3333-(** Get an attribute value from the list. *)
3434-let get_attr name attrs =
3535- List.find_map (fun (attr_name, value) ->
3636- if String.lowercase_ascii attr_name = name then Some value else None
3737- ) attrs
3838-3939-(** Check if element has popover attribute. *)
4040-let has_popover attrs =
4141- List.exists (fun (attr_name, _) ->
4242- String.lowercase_ascii attr_name = "popover"
4343- ) attrs
4444-4528let start_element state ~name ~namespace ~attrs collector =
4646- let name_lower = String.lowercase_ascii name in
4747-4848- (* Track depth *)
4929 state.current_depth <- state.current_depth + 1;
50305151- if namespace = None then begin
3131+ match namespace with
3232+ | Some _ -> ()
3333+ | None ->
3434+ let name_lower = String.lowercase_ascii name in
3535+5236 (* Check if we're entering a dialog or popover context *)
5353- let enters_context =
5454- if name_lower = "dialog" then Some Dialog
5555- else if has_popover attrs then Some Popover
5656- else None
3737+ let enters_context = match name_lower with
3838+ | "dialog" -> Some Dialog
3939+ | _ when Attr_utils.has_attr "popover" attrs -> Some Popover
4040+ | _ -> None
5741 in
58425959- (match enters_context with
6060- | Some ctx_type ->
4343+ Option.iter (fun ctx_type ->
6144 let ctx = { context_type = ctx_type; autofocus_count = 0; depth = state.current_depth } in
6245 state.context_stack <- ctx :: state.context_stack
6363- | None -> ());
4646+ ) enters_context;
64476548 (* Check for autofocus attribute *)
6666- if has_attr "autofocus" attrs then begin
6767- (* Increment count in innermost context if any *)
4949+ if Attr_utils.has_attr "autofocus" attrs then
6850 match state.context_stack with
6951 | ctx :: _ ->
7052 ctx.autofocus_count <- ctx.autofocus_count + 1;
7153 if ctx.autofocus_count > 1 then
7254 Message_collector.add_typed collector (`Misc `Multiple_autofocus)
7355 | [] -> ()
7474- end
7575- end
76567757let end_element state ~name ~namespace _collector =
7878- let name_lower = String.lowercase_ascii name in
7979-8080- if namespace = None then begin
8181- (* Pop context if we're leaving one *)
5858+ (match namespace with
5959+ | Some _ -> ()
6060+ | None ->
6161+ let name_lower = String.lowercase_ascii name in
8262 match state.context_stack with
8363 | ctx :: rest when ctx.depth = state.current_depth ->
8484- (* Verify this is the right element *)
8564 let matches =
8665 (name_lower = "dialog" && ctx.context_type = Dialog) ||
8766 (ctx.context_type = Popover)
8867 in
8968 if matches then state.context_stack <- rest
9090- | _ -> ()
9191- end;
6969+ | _ -> ());
92709371 state.current_depth <- state.current_depth - 1
9472
+1-8
lib/htmlrw_check/semantic/form_checker.ml
···10101111let reset _state = ()
12121313-(** Get the value of an attribute if present. *)
1414-let get_attr name attrs =
1515- List.find_map
1616- (fun (attr_name, value) ->
1717- if String.equal attr_name name then Some value else None)
1818- attrs
1919-2013(** Check if autocomplete value contains webauthn token *)
2114let contains_webauthn value =
2215 let lower = String.lowercase_ascii value in
···4235 (* Check autocomplete attribute on form elements *)
4336 match name with
4437 | "input" | "select" | "textarea" ->
4545- (match get_attr "autocomplete" attrs with
3838+ (match Attr_utils.get_attr "autocomplete" attrs with
4639 | Some autocomplete_value ->
4740 check_autocomplete_value autocomplete_value name collector
4841 | None -> ())
···6060 let n = String.lowercase_ascii name in
6161 n = "svg" || n = "math"
62626363-let get_attr name attrs =
6464- List.find_map (fun (n, v) ->
6565- if String.lowercase_ascii n = name then Some v else None
6666- ) attrs
6767-6863let get_lang_code lang =
6964 (* Extract primary language subtag *)
7065 match String.split_on_char '-' lang with
···226221 let ns = Option.value namespace ~default:"" in
227222228223 if name_lower = "html" then begin
229229- state.html_lang <- get_attr "lang" attrs;
230230- state.html_dir <- get_attr "dir" attrs;
224224+ state.html_lang <- Attr_utils.get_attr "lang" attrs;
225225+ state.html_dir <- Attr_utils.get_attr "dir" attrs;
231226 (* TODO: get line/column from locator *)
232227 state.html_locator <- Some (1, 1)
233228 end
···244239 state.skip_depth <- state.skip_depth + 1
245240 else begin
246241 (* Check for different lang attribute *)
247247- match get_attr "lang" attrs with
242242+ match Attr_utils.get_attr "lang" attrs with
248243 | Some lang when state.html_lang <> Some lang ->
249244 state.skip_depth <- state.skip_depth + 1
250245 | _ -> ()
+1-7
lib/htmlrw_check/semantic/option_checker.ml
···2222 state.option_stack <- [];
2323 state.in_template <- 0
24242525-(** Get attribute value if present. *)
2626-let get_attr name attrs =
2727- List.find_map (fun (attr_name, value) ->
2828- if String.lowercase_ascii attr_name = name then Some value else None
2929- ) attrs
3030-3125let start_element state ~name ~namespace ~attrs collector =
3226 let name_lower = String.lowercase_ascii name in
3327···3630 if name_lower = "template" then
3731 state.in_template <- state.in_template + 1
3832 else if state.in_template = 0 && name_lower = "option" then begin
3939- let label_opt = get_attr "label" attrs in
3333+ let label_opt = Attr_utils.get_attr "label" attrs in
4034 let has_label = label_opt <> None in
4135 let label_empty = match label_opt with
4236 | Some v -> String.trim v = ""
···1313 state._in_figure <- false;
1414 state.in_a_with_href <- false
15151616-(** Check if an attribute list contains a specific attribute. *)
1717-let has_attr name attrs =
1818- List.exists (fun (attr_name, _) -> String.equal attr_name name) attrs
1919-2020-(** Get the value of an attribute if present. *)
2121-let get_attr name attrs =
2222- List.find_map
2323- (fun (attr_name, value) ->
2424- if String.equal attr_name name then Some value else None)
2525- attrs
2626-2716let check_img_element state attrs collector =
2817 (* Check for required src OR srcset attribute *)
2929- if not (has_attr "src" attrs) && not (has_attr "srcset" attrs) then
1818+ if not (Attr_utils.has_attr "src" attrs) && not (Attr_utils.has_attr "srcset" attrs) then
3019 Message_collector.add_typed collector (`Img `Missing_src_or_srcset);
31203221 (* Check for alt attribute - always required *)
3333- if not (has_attr "alt" attrs) then
2222+ if not (Attr_utils.has_attr "alt" attrs) then
3423 Message_collector.add_typed collector (`Img `Missing_alt);
35243625 (* Check ismap requires 'a' ancestor with href *)
3737- if has_attr "ismap" attrs && not state.in_a_with_href then
2626+ if Attr_utils.has_attr "ismap" attrs && not state.in_a_with_href then
3827 Message_collector.add_typed collector (`Img `Ismap_needs_href)
39284029let check_area_element attrs collector =
4130 (* area with href requires alt *)
4242- if has_attr "href" attrs && not (has_attr "alt" attrs) then
3131+ if Attr_utils.has_attr "href" attrs && not (Attr_utils.has_attr "alt" attrs) then
4332 Message_collector.add_typed collector
4433 (`Attr (`Missing (`Elem "area", `Attr "alt")))
45344635let check_input_element attrs collector =
4747- match get_attr "type" attrs with
3636+ match Attr_utils.get_attr "type" attrs with
4837 | Some "image" ->
4938 (* input[type=image] requires alt *)
5050- if not (has_attr "alt" attrs) then
3939+ if not (Attr_utils.has_attr "alt" attrs) then
5140 Message_collector.add_typed collector
5241 (`Attr (`Missing (`Elem "input", `Attr "alt")))
5342 | Some "hidden" ->
5443 (* input[type=hidden] should not have required attribute *)
5555- if has_attr "required" attrs then
4444+ if Attr_utils.has_attr "required" attrs then
5645 Message_collector.add_typed collector
5746 (`Attr (`Not_allowed_when (`Attr "required", `Elem "input", `Condition "the type attribute is hidden")))
5847 | Some "file" ->
5948 (* input[type=file] should not have value attribute *)
6060- if has_attr "value" attrs then
4949+ if Attr_utils.has_attr "value" attrs then
6150 Message_collector.add_typed collector
6251 (`Attr (`Not_allowed_when (`Attr "value", `Elem "input", `Condition "the type attribute is file")))
6352 | _ -> ()
64536554let check_script_element attrs _collector =
6655 (* script requires src OR text content *)
6767- if not (has_attr "src" attrs) then
5656+ if not (Attr_utils.has_attr "src" attrs) then
6857 (* We can't check for text content here; that would need to be done
6958 in end_element or with state tracking *)
7059 ()
···7665 - http-equiv AND content
7766 - property AND content (RDFa)
7867 - itemprop AND content (microdata) *)
7979- let has_charset = has_attr "charset" attrs in
8080- let has_name = has_attr "name" attrs in
8181- let has_content = has_attr "content" attrs in
8282- let has_http_equiv = has_attr "http-equiv" attrs in
8383- let has_property = has_attr "property" attrs in
8484- let has_itemprop = has_attr "itemprop" attrs in
6868+ let has_charset = Attr_utils.has_attr "charset" attrs in
6969+ let has_name = Attr_utils.has_attr "name" attrs in
7070+ let has_content = Attr_utils.has_attr "content" attrs in
7171+ let has_http_equiv = Attr_utils.has_attr "http-equiv" attrs in
7272+ let has_property = Attr_utils.has_attr "property" attrs in
7373+ let has_itemprop = Attr_utils.has_attr "itemprop" attrs in
85748675 let valid =
8776 has_charset
···1008910190let check_link_element attrs collector =
10291 (* link[rel="stylesheet"] requires href *)
103103- match get_attr "rel" attrs with
9292+ match Attr_utils.get_attr "rel" attrs with
10493 | Some rel when String.equal rel "stylesheet" ->
105105- if not (has_attr "href" attrs) then
9494+ if not (Attr_utils.has_attr "href" attrs) then
10695 Message_collector.add_typed collector (`Link `Missing_href)
10796 | _ -> ()
1089710998let check_a_element attrs collector =
11099 (* a[download] requires href *)
111111- if has_attr "download" attrs && not (has_attr "href" attrs) then
100100+ if Attr_utils.has_attr "download" attrs && not (Attr_utils.has_attr "href" attrs) then
112101 Message_collector.add_typed collector
113102 (`Attr (`Missing (`Elem "a", `Attr "href")))
114103115104let check_map_element attrs collector =
116105 (* map requires name *)
117117- if not (has_attr "name" attrs) then
106106+ if not (Attr_utils.has_attr "name" attrs) then
118107 Message_collector.add_typed collector
119108 (`Attr (`Missing (`Elem "map", `Attr "name")))
120109121110let check_object_element attrs collector =
122111 (* object requires data attribute (or type attribute alone is not sufficient) *)
123123- let has_data = has_attr "data" attrs in
124124- let has_type = has_attr "type" attrs in
112112+ let has_data = Attr_utils.has_attr "data" attrs in
113113+ let has_type = Attr_utils.has_attr "type" attrs in
125114 if not has_data && has_type then
126115 Message_collector.add_typed collector
127116 (`Attr (`Missing (`Elem "object", `Attr "data")))
128117129118let check_popover_element element_name attrs collector =
130119 (* popover attribute must have valid value *)
131131- match get_attr "popover" attrs with
120120+ match Attr_utils.get_attr "popover" attrs with
132121 | Some value ->
133122 let value_lower = String.lowercase_ascii value in
134123 (* Valid values: empty string, auto, manual, hint *)
···141130142131let check_meter_element attrs collector =
143132 (* meter requires value attribute *)
144144- if not (has_attr "value" attrs) then
133133+ if not (Attr_utils.has_attr "value" attrs) then
145134 Message_collector.add_typed collector
146135 (`Attr (`Missing (`Elem "meter", `Attr "value")))
147136 else begin
148137 (* Validate min <= value constraint *)
149149- match get_attr "value" attrs, get_attr "min" attrs with
138138+ match Attr_utils.get_attr "value" attrs, Attr_utils.get_attr "min" attrs with
150139 | Some value_str, Some min_str ->
151140 (try
152141 let value = float_of_string value_str in
···162151163152let check_progress_element attrs collector =
164153 (* Validate progress value constraints *)
165165- match get_attr "value" attrs with
154154+ match Attr_utils.get_attr "value" attrs with
166155 | None -> () (* value is optional *)
167156 | Some value_str ->
168157 (try
169158 let value = float_of_string value_str in
170170- let max_val = match get_attr "max" attrs with
159159+ let max_val = match Attr_utils.get_attr "max" attrs with
171160 | None -> 1.0 (* default max is 1 *)
172161 | Some max_str -> (try float_of_string max_str with _ -> 1.0)
173162 in
174163 if value > max_val then
175164 let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
176165 (* Check which message to use based on whether max is present *)
177177- if has_attr "max" attrs then
166166+ if Attr_utils.has_attr "max" attrs then
178167 Message_collector.add_typed collector
179168 (`Generic (
180169 (* Note: double space before "value" matches Nu validator quirk *)
···198187 | "link" -> check_link_element attrs collector
199188 | "a" ->
200189 check_a_element attrs collector;
201201- if has_attr "href" attrs then state.in_a_with_href <- true
190190+ if Attr_utils.has_attr "href" attrs then state.in_a_with_href <- true
202191 | "map" -> check_map_element attrs collector
203192 | "object" -> check_object_element attrs collector
204193 | "meter" -> check_meter_element attrs collector
···206195 | "figure" -> state._in_figure <- true
207196 | _ ->
208197 (* Check popover attribute on any element *)
209209- if has_attr "popover" attrs then check_popover_element name attrs collector
198198+ if Attr_utils.has_attr "popover" attrs then check_popover_element name attrs collector
210199211200let end_element state ~name ~namespace:_ _collector =
212201 match name with
+1-1
lib/htmlrw_check/specialized/aria_checker.ml
···491491 if name_lower = "br" || name_lower = "wbr" then begin
492492 List.iter (fun (attr_name, _) ->
493493 let attr_lower = String.lowercase_ascii attr_name in
494494- if String.length attr_lower > 5 && String.sub attr_lower 0 5 = "aria-" &&
494494+ if String.starts_with ~prefix:"aria-" attr_lower &&
495495 attr_lower <> "aria-hidden" then
496496 Message_collector.add_typed collector
497497 (`Attr (`Not_allowed (`Attr attr_name, `Elem name)))
···4141let create () = { is_xhtml = false }
4242let reset state = state.is_xhtml <- false
43434444-(** Check if an attribute list contains a specific attribute. *)
4545-let has_attr name attrs =
4646- List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
4747-4848-(** Get an attribute value from the list. *)
4949-let get_attr name attrs =
5050- List.find_map (fun (attr_name, value) ->
5151- if String.lowercase_ascii attr_name = name then Some value else None
5252- ) attrs
5353-5444(** Input types that allow the list attribute. *)
5545let input_types_allowing_list = [
5646 "color"; "date"; "datetime-local"; "email"; "month"; "number";
···67576858 (* Detect XHTML mode from xmlns attribute on html element *)
6959 if name_lower = "html" then begin
7070- let xmlns_value = get_attr "xmlns" attrs in
7171- match xmlns_value with
6060+ match Attr_utils.get_attr "xmlns" attrs with
7261 | Some "http://www.w3.org/1999/xhtml" -> state.is_xhtml <- true
7362 | _ -> ()
7463 end;
75647665 (* Check HTML element attribute restrictions *)
7777- if namespace = None then begin
6666+ (match namespace with
6767+ | Some _ -> ()
6868+ | None ->
7869 match List.assoc_opt name_lower disallowed_attrs_html with
7970 | Some disallowed ->
8071 List.iter (fun attr ->
8181- if has_attr attr attrs then
7272+ if Attr_utils.has_attr attr attrs then
8273 report_disallowed_attr name_lower attr collector
8374 ) disallowed
8484- | None -> ()
8585- end;
7575+ | None -> ());
86768777 (* Check for xml:base attribute - not allowed in HTML *)
8888- if namespace = None && name_lower = "html" then begin
8989- if has_attr "xml:base" attrs then
7878+ (match namespace with
7979+ | Some _ -> ()
8080+ | None when name_lower = "html" ->
8181+ if Attr_utils.has_attr "xml:base" attrs then
9082 report_disallowed_attr name_lower "xml:base" collector
9191- end;
8383+ | None -> ());
92849385 (* Check for xmlns:* prefixed attributes - not allowed in HTML *)
9486 (* Standard xmlns declarations are allowed but custom prefixes are not *)
9595- if namespace = None then begin
8787+ (match namespace with
8888+ | Some _ -> ()
8989+ | None ->
9690 List.iter (fun (attr_name, _) ->
9791 let attr_lower = String.lowercase_ascii attr_name in
9898- if String.length attr_lower > 6 && String.sub attr_lower 0 6 = "xmlns:" then begin
9292+ if String.starts_with ~prefix:"xmlns:" attr_lower then begin
9993 let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in
10094 (* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *)
10195 if prefix <> "xlink" && prefix <> "xml" then
10296 Message_collector.add_typed collector
10397 (`Attr (`Not_allowed_here (`Attr attr_name)))
10498 end
105105- ) attrs
106106- end;
9999+ ) attrs);
107100108101 (* Check SVG element restrictions - works in both HTML-embedded and XHTML SVG *)
109102 (* xml:id is never valid on SVG elements in HTML5 *)
110103 if List.mem name_lower svg_no_xml_id then begin
111111- if has_attr "xml:id" attrs then
104104+ if Attr_utils.has_attr "xml:id" attrs then
112105 report_disallowed_attr name_lower "xml:id" collector
113106 end;
114107115108 (* SVG feConvolveMatrix requires order attribute *)
116109 if name_lower = "feconvolvematrix" then begin
117117- if not (has_attr "order" attrs) then
110110+ if not (Attr_utils.has_attr "order" attrs) then
118111 Message_collector.add_typed collector
119112 (`Svg (`Missing_attr (`Elem "feConvolveMatrix", `Attr "order")))
120113 end;
121114122115 (* Validate style type attribute - must be "text/css" or omitted *)
123123- if namespace = None && name_lower = "style" then begin
116116+ (match namespace with
117117+ | Some _ -> ()
118118+ | None when name_lower = "style" ->
124119 List.iter (fun (attr_name, attr_value) ->
125120 let attr_lower = String.lowercase_ascii attr_name in
126121 if attr_lower = "type" then begin
···129124 Message_collector.add_typed collector (`Misc `Style_type_invalid)
130125 end
131126 ) attrs
132132- end;
127127+ | None -> ());
133128134129 (* Validate object element requires data or type attribute *)
135135- if namespace = None && name_lower = "object" then begin
136136- let has_data = has_attr "data" attrs in
137137- let has_type = has_attr "type" attrs in
130130+ (match namespace with
131131+ | Some _ -> ()
132132+ | None when name_lower = "object" ->
133133+ let has_data = Attr_utils.has_attr "data" attrs in
134134+ let has_type = Attr_utils.has_attr "type" attrs in
138135 if not has_data && not has_type then
139136 Message_collector.add_typed collector
140137 (`Attr (`Missing (`Elem "object", `Attr "data")))
141141- end;
138138+ | None -> ());
142139143140 (* Validate link imagesizes/imagesrcset attributes *)
144144- if namespace = None && name_lower = "link" then begin
145145- let has_imagesizes = has_attr "imagesizes" attrs in
146146- let has_imagesrcset = has_attr "imagesrcset" attrs in
147147- let rel_value = get_attr "rel" attrs in
148148- let as_value = get_attr "as" attrs in
141141+ (match namespace with
142142+ | Some _ -> ()
143143+ | None when name_lower = "link" ->
144144+ let has_imagesizes = Attr_utils.has_attr "imagesizes" attrs in
145145+ let has_imagesrcset = Attr_utils.has_attr "imagesrcset" attrs in
146146+ let rel_value = Attr_utils.get_attr "rel" attrs in
147147+ let as_value = Attr_utils.get_attr "as" attrs in
149148150149 (* imagesizes requires imagesrcset *)
151150 if has_imagesizes && not has_imagesrcset then
···175174 if not rel_is_preload then
176175 Message_collector.add_typed collector (`Link `As_requires_preload)
177176 | None -> ())
178178- end;
177177+ | None -> ());
179178180179 (* Validate img usemap attribute - must be hash-name reference with content *)
181181- if namespace = None && name_lower = "img" then begin
180180+ (match namespace with
181181+ | Some _ -> ()
182182+ | None when name_lower = "img" ->
182183 List.iter (fun (attr_name, attr_value) ->
183184 let attr_lower = String.lowercase_ascii attr_name in
184185 if attr_lower = "usemap" then begin
···189190 attr_value attr_name name))))
190191 end
191192 ) attrs
192192- end;
193193+ | None -> ());
193194194195 (* Validate embed type attribute - must be valid MIME type *)
195195- if namespace = None && name_lower = "embed" then begin
196196+ (match namespace with
197197+ | Some _ -> ()
198198+ | None when name_lower = "embed" ->
196199 List.iter (fun (attr_name, attr_value) ->
197200 let attr_lower = String.lowercase_ascii attr_name in
198201 if attr_lower = "type" then begin
···205208 attr_value attr_name name msg))))
206209 end
207210 ) attrs
208208- end;
211211+ | None -> ());
209212210213 (* Validate width/height on embed and img - must be non-negative integers *)
211211- if namespace = None && (name_lower = "embed" || name_lower = "img" ||
212212- name_lower = "video" || name_lower = "canvas" ||
213213- name_lower = "iframe" || name_lower = "source") then begin
214214+ let is_dimension_element = name_lower = "embed" || name_lower = "img" ||
215215+ name_lower = "video" || name_lower = "canvas" ||
216216+ name_lower = "iframe" || name_lower = "source" in
217217+ (match namespace with
218218+ | Some _ -> ()
219219+ | None when is_dimension_element ->
214220 List.iter (fun (attr_name, attr_value) ->
215221 let attr_lower = String.lowercase_ascii attr_name in
216222 if attr_lower = "width" || attr_lower = "height" then begin
···255261 end
256262 end
257263 ) attrs
258258- end;
264264+ | None -> ());
259265260266 (* Validate area[shape=default] cannot have coords *)
261261- if namespace = None && name_lower = "area" then begin
262262- let shape_value = get_attr "shape" attrs in
263263- match shape_value with
267267+ (match namespace with
268268+ | Some _ -> ()
269269+ | None when name_lower = "area" ->
270270+ (match Attr_utils.get_attr "shape" attrs with
264271 | Some s when String.lowercase_ascii (String.trim s) = "default" ->
265265- if has_attr "coords" attrs then
272272+ if Attr_utils.has_attr "coords" attrs then
266273 Message_collector.add_typed collector
267274 (`Attr (`Not_allowed (`Attr "coords", `Elem "area")))
268268- | _ -> ()
269269- end;
275275+ | _ -> ())
276276+ | None -> ());
270277271278 (* Validate bdo element requires dir attribute, and dir cannot be "auto" *)
272272- if namespace = None && name_lower = "bdo" then begin
273273- let dir_value = get_attr "dir" attrs in
274274- match dir_value with
279279+ (match namespace with
280280+ | Some _ -> ()
281281+ | None when name_lower = "bdo" ->
282282+ (match Attr_utils.get_attr "dir" attrs with
275283 | None ->
276284 Message_collector.add_typed collector (`Misc `Bdo_missing_dir)
277285 | Some v when String.lowercase_ascii (String.trim v) = "auto" ->
278286 Message_collector.add_typed collector (`Misc `Bdo_dir_auto)
279279- | _ -> ()
280280- end;
287287+ | _ -> ())
288288+ | None -> ());
281289282290 (* Validate input list attribute - only allowed for certain types *)
283283- if namespace = None && name_lower = "input" then begin
284284- if has_attr "list" attrs then begin
285285- let input_type = match get_attr "type" attrs with
286286- | Some t -> String.lowercase_ascii (String.trim t)
287287- | None -> "text" (* default type is text *)
288288- in
291291+ (match namespace with
292292+ | Some _ -> ()
293293+ | None when name_lower = "input" ->
294294+ if Attr_utils.has_attr "list" attrs then begin
295295+ let input_type = Attr_utils.get_attr_or "type" ~default:"text" attrs
296296+ |> String.trim |> String.lowercase_ascii in
289297 if not (List.mem input_type input_types_allowing_list) then
290298 Message_collector.add_typed collector (`Input `List_not_allowed)
291299 end
292292- end;
300300+ | None -> ());
293301294302 (* Validate data-* attributes *)
295295- if namespace = None then begin
303303+ (match namespace with
304304+ | Some _ -> ()
305305+ | None ->
296306 List.iter (fun (attr_name, _) ->
297307 let attr_lower = String.lowercase_ascii attr_name in
298308 (* Check if it starts with "data-" *)
299299- if String.length attr_lower >= 5 && String.sub attr_lower 0 5 = "data-" then begin
309309+ if String.starts_with ~prefix:"data-" attr_lower then begin
300310 let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in
301311 (* Check if it's exactly "data-" with nothing after *)
302312 if after_prefix = "" then
···306316 Message_collector.add_typed collector
307317 (`Attr (`Data_invalid_name (`Reason "must be XML 1.0 4th ed. plus Namespaces NCNames")))
308318 end
309309- ) attrs
310310- end;
319319+ ) attrs);
311320312321 (* Validate xml:lang must have matching lang attribute - only in HTML mode, not XHTML *)
313313- if namespace = None && not state.is_xhtml then begin
314314- let xmllang_value = get_attr "xml:lang" attrs in
315315- let lang_value = get_attr "lang" attrs in
316316- match xmllang_value with
322322+ (match namespace with
323323+ | Some _ -> ()
324324+ | None when not state.is_xhtml ->
325325+ let xmllang_value = Attr_utils.get_attr "xml:lang" attrs in
326326+ let lang_value = Attr_utils.get_attr "lang" attrs in
327327+ (match xmllang_value with
317328 | Some xmllang ->
318329 (match lang_value with
319330 | None ->
320320- (* xml:lang without lang attribute *)
321331 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
322332 | Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang ->
323323- (* xml:lang and lang have different values - "lang present with same value" message *)
324333 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
325334 | _ -> ())
326326- | None -> ()
327327- end;
335335+ | None -> ())
336336+ | None -> ());
328337329338 (* Validate spellcheck attribute - must be "true" or "false" or empty *)
330330- if namespace = None then begin
339339+ (match namespace with
340340+ | Some _ -> ()
341341+ | None ->
331342 List.iter (fun (attr_name, attr_value) ->
332343 let attr_lower = String.lowercase_ascii attr_name in
333344 if attr_lower = "spellcheck" then begin
···336347 Message_collector.add_typed collector
337348 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
338349 end
339339- ) attrs
340340- end;
350350+ ) attrs);
341351342352 (* Validate enterkeyhint attribute - must be one of specific values *)
343343- if namespace = None then begin
353353+ (match namespace with
354354+ | Some _ -> ()
355355+ | None ->
344356 let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in
345357 List.iter (fun (attr_name, attr_value) ->
346358 let attr_lower = String.lowercase_ascii attr_name in
···350362 Message_collector.add_typed collector
351363 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
352364 end
353353- ) attrs
354354- end;
365365+ ) attrs);
355366356367 (* Validate headingoffset attribute - must be a number between 0 and 8 *)
357357- if namespace = None then begin
368368+ (match namespace with
369369+ | Some _ -> ()
370370+ | None ->
358371 List.iter (fun (attr_name, attr_value) ->
359372 let attr_lower = String.lowercase_ascii attr_name in
360373 if attr_lower = "headingoffset" then begin
···370383 if not is_valid then
371384 Message_collector.add_typed collector (`Misc `Headingoffset_invalid)
372385 end
373373- ) attrs
374374- end;
386386+ ) attrs);
375387376388 (* Validate accesskey attribute - each key label must be a single code point *)
377377- if namespace = None then begin
389389+ (match namespace with
390390+ | Some _ -> ()
391391+ | None ->
378392 List.iter (fun (attr_name, attr_value) ->
379393 let attr_lower = String.lowercase_ascii attr_name in
380394 if attr_lower = "accesskey" then begin
···419433 in
420434 find_duplicates [] keys
421435 end
422422- ) attrs
423423- end;
436436+ ) attrs);
424437425438 (* Validate that command and popovertarget cannot have aria-expanded *)
426426- if namespace = None && name_lower = "button" then begin
427427- let has_command = has_attr "command" attrs in
428428- let has_popovertarget = has_attr "popovertarget" attrs in
429429- let has_aria_expanded = has_attr "aria-expanded" attrs in
439439+ (match namespace with
440440+ | Some _ -> ()
441441+ | None when name_lower = "button" ->
442442+ let has_command = Attr_utils.has_attr "command" attrs in
443443+ let has_popovertarget = Attr_utils.has_attr "popovertarget" attrs in
444444+ let has_aria_expanded = Attr_utils.has_attr "aria-expanded" attrs in
430445431446 if has_command && has_aria_expanded then
432447 Message_collector.add_typed collector
···437452 Message_collector.add_typed collector
438453 (`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
439454 `Condition "a \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute")))
440440- end;
455455+ | None -> ());
441456442457 (* Note: data-* uppercase check requires XML parsing which preserves case.
443458 The HTML5 parser normalizes attribute names to lowercase, so this check
···446461 ignore state.is_xhtml;
447462448463 (* Validate media attribute on link, style, source elements *)
449449- if namespace = None && (name_lower = "link" || name_lower = "style" || name_lower = "source") then begin
464464+ let is_media_element = name_lower = "link" || name_lower = "style" || name_lower = "source" in
465465+ (match namespace with
466466+ | Some _ -> ()
467467+ | None when is_media_element ->
450468 List.iter (fun (attr_name, attr_value) ->
451469 let attr_lower = String.lowercase_ascii attr_name in
452470 if attr_lower = "media" then begin
···462480 end
463481 end
464482 ) attrs
465465- end;
483483+ | None -> ());
466484467485 (* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *)
468468- if namespace = None then begin
486486+ (match namespace with
487487+ | Some _ -> ()
488488+ | None ->
469489 List.iter (fun (attr_name, attr_value) ->
470490 let attr_lower = String.lowercase_ascii attr_name in
471491 if attr_lower = "prefix" then begin
···487507 end
488508 end
489509 end
490490- ) attrs
491491- end
510510+ ) attrs)
492511493512let end_element _state ~name:_ ~namespace:_ _collector = ()
494513let characters _state _text _collector = ()
+5-12
lib/htmlrw_check/specialized/base_checker.ml
···1111let reset state =
1212 state.seen_link_or_script <- false
13131414-(** Check if an attribute list contains a specific attribute. *)
1515-let has_attr name attrs =
1616- List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
1717-1814let start_element state ~name ~namespace ~attrs collector =
1919- if namespace <> None then ()
2020- else begin
2121- let name_lower = String.lowercase_ascii name in
2222- match name_lower with
1515+ match namespace with
1616+ | Some _ -> ()
1717+ | None ->
1818+ match String.lowercase_ascii name with
2319 | "link" | "script" ->
2420 state.seen_link_or_script <- true
2521 | "base" ->
2622 if state.seen_link_or_script then
2723 Message_collector.add_typed collector (`Misc `Base_after_link_script);
2824 (* base element must have href or target attribute *)
2929- let has_href = has_attr "href" attrs in
3030- let has_target = has_attr "target" attrs in
3131- if not has_href && not has_target then
2525+ if not (Attr_utils.has_attr "href" attrs || Attr_utils.has_attr "target" attrs) then
3226 Message_collector.add_typed collector (`Misc `Base_missing_href_or_target)
3327 | _ -> ()
3434- end
35283629let end_element _state ~name:_ ~namespace:_ _collector = ()
3730let characters _state _text _collector = ()
+1-6
lib/htmlrw_check/specialized/dl_checker.ml
···5757 | ctx :: _ -> Some ctx
5858 | [] -> None
59596060-let get_attr name attrs =
6161- List.find_map (fun (n, v) ->
6262- if String.lowercase_ascii n = name then Some v else None
6363- ) attrs
6464-6560let start_element state ~name ~namespace ~attrs collector =
6661 let name_lower = String.lowercase_ascii name in
6762···115110 Message_collector.add_typed collector
116111 (`Element (`Not_allowed_as_child (`Child "div", `Parent "dl")));
117112 (* Check that role is only presentation or none *)
118118- (match get_attr "role" attrs with
113113+ (match Attr_utils.get_attr "role" attrs with
119114 | Some role_value ->
120115 let role_lower = String.lowercase_ascii (String.trim role_value) in
121116 if role_lower <> "presentation" && role_lower <> "none" then
+42-76
lib/htmlrw_check/specialized/picture_checker.ml
···6666 state.always_matching_is_media_all <- false;
6767 state.always_matching_is_media_empty <- false
68686969-(** Check if an attribute list contains a specific attribute. *)
7070-let has_attr name attrs =
7171- List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
7272-7369(** Report disallowed attribute error *)
7470let report_disallowed_attr element attr collector =
7571 Message_collector.add_typed collector
···8076 Message_collector.add_typed collector
8177 (`Element (`Not_allowed_as_child (`Child child, `Parent parent)))
82787979+let check_disallowed_attrs element disallowed_list attrs collector =
8080+ List.iter (fun attr ->
8181+ if Attr_utils.has_attr attr attrs then
8282+ report_disallowed_attr element attr collector
8383+ ) disallowed_list
8484+8385let check_picture_attrs attrs collector =
8484- List.iter (fun disallowed ->
8585- if has_attr disallowed attrs then
8686- report_disallowed_attr "picture" disallowed collector
8787- ) disallowed_picture_attrs
8686+ check_disallowed_attrs "picture" disallowed_picture_attrs attrs collector
88878988let check_source_attrs_in_picture attrs collector =
9090- List.iter (fun disallowed ->
9191- if has_attr disallowed attrs then
9292- report_disallowed_attr "source" disallowed collector
9393- ) disallowed_source_attrs_in_picture;
9494- (* source in picture requires srcset *)
9595- if not (has_attr "srcset" attrs) then
9696- Message_collector.add_typed collector
9797- (`Srcset `Source_missing_srcset)
8989+ check_disallowed_attrs "source" disallowed_source_attrs_in_picture attrs collector;
9090+ if not (Attr_utils.has_attr "srcset" attrs) then
9191+ Message_collector.add_typed collector (`Srcset `Source_missing_srcset)
98929993let check_img_attrs attrs collector =
100100- List.iter (fun disallowed ->
101101- if has_attr disallowed attrs then
102102- report_disallowed_attr "img" disallowed collector
103103- ) disallowed_img_attrs
9494+ check_disallowed_attrs "img" disallowed_img_attrs attrs collector
1049510596let start_element state ~name ~namespace ~attrs collector =
10697 let name_lower = String.lowercase_ascii name in
···112103 end;
113104114105 (* Rest of checks only apply to HTML namespace elements *)
115115- if namespace = None then begin
116116- match name_lower with
106106+ match namespace with
107107+ | Some _ -> ()
108108+ | None ->
109109+ (match name_lower with
117110 | "picture" ->
118111 (* Check if picture is in a disallowed parent context *)
119112 (match state.parent_stack with
···124117 check_picture_attrs attrs collector;
125118 state.in_picture <- true;
126119 state.has_img_in_picture <- false;
127127- state.picture_depth <- 0; (* Will be incremented to 1 at end of function *)
120120+ state.picture_depth <- 0;
128121 state.children_in_picture <- [];
129122 state.last_was_img <- false;
130123 state.has_source_after_img <- false;
···136129 state.children_in_picture <- "source" :: state.children_in_picture;
137130 if state.last_was_img then
138131 state.has_source_after_img <- true;
139139- (* Check for always-matching source followed by another source *)
140132 if state.has_always_matching_source then
141133 state.source_after_always_matching <- true;
142142- (* A source is "always matching" if it has:
143143- - no media and no type attribute, OR
144144- - media attribute with empty/whitespace-only value, OR
145145- - media="all" (with optional whitespace) *)
146146- let media_value = List.find_map (fun (attr_name, v) ->
147147- if String.lowercase_ascii attr_name = "media" then Some v else None
148148- ) attrs in
149149- let has_type = has_attr "type" attrs in
134134+ (* A source is "always matching" if it has no media/type, or media="" or media="all" *)
135135+ let media_value = Attr_utils.get_attr "media" attrs in
136136+ let has_type = Attr_utils.has_attr "type" attrs in
150137 let is_media_all = match media_value with
151138 | Some v -> String.lowercase_ascii (String.trim v) = "all"
152152- | None -> false
153153- in
139139+ | None -> false in
154140 let is_media_empty = match media_value with
155141 | Some v -> String.trim v = ""
156156- | None -> false
157157- in
142142+ | None -> false in
158143 let is_always_matching = match media_value with
159159- | None -> not has_type (* no media, check if no type either *)
144144+ | None -> not has_type
160145 | Some v ->
161146 let trimmed = String.trim v in
162147 trimmed = "" || String.lowercase_ascii trimmed = "all"
163148 in
164149 if is_always_matching then begin
165150 state.has_always_matching_source <- true;
166166- if is_media_all then
167167- state.always_matching_is_media_all <- true
168168- else if is_media_empty then
169169- state.always_matching_is_media_empty <- true
151151+ (* Only set flags to true, never reset to false *)
152152+ if is_media_all then state.always_matching_is_media_all <- true;
153153+ if is_media_empty then state.always_matching_is_media_empty <- true
170154 end
171155172156 | "img" when state.in_picture && state.picture_depth = 1 ->
···174158 state.has_img_in_picture <- true;
175159 state.children_in_picture <- "img" :: state.children_in_picture;
176160 state.last_was_img <- true;
177177- (* Check for multiple img elements *)
178178- let img_count = List.filter (fun c -> c = "img") state.children_in_picture |> List.length in
161161+ let img_count = List.length (List.filter (( = ) "img") state.children_in_picture) in
179162 if img_count > 1 then
180163 report_disallowed_child "picture" "img" collector;
181181- (* Check if always-matching source is followed by img with srcset *)
182182- if state.has_always_matching_source && has_attr "srcset" attrs then begin
183183- if state.always_matching_is_media_all then
184184- Message_collector.add_typed collector (`Misc `Media_all)
185185- else if state.always_matching_is_media_empty then
186186- Message_collector.add_typed collector (`Misc `Media_empty)
187187- else
188188- Message_collector.add_typed collector (`Srcset `Source_needs_media_or_type)
189189- end
164164+ if state.has_always_matching_source && Attr_utils.has_attr "srcset" attrs then
165165+ Message_collector.add_typed collector
166166+ (if state.always_matching_is_media_all then `Misc `Media_all
167167+ else if state.always_matching_is_media_empty then `Misc `Media_empty
168168+ else `Srcset `Source_needs_media_or_type)
190169191170 | "script" when state.in_picture && state.picture_depth = 1 ->
192171 state.children_in_picture <- "script" :: state.children_in_picture
···197176 | "img" ->
198177 check_img_attrs attrs collector
199178200200- | _ -> ()
201201- end;
179179+ | _ -> ());
202180203181 (* Track depth when inside picture *)
204182 if state.in_picture then
···209187 state.parent_stack <- name_lower :: state.parent_stack
210188211189let end_element state ~name ~namespace collector =
212212- if namespace <> None then ()
213213- else begin
190190+ match namespace with
191191+ | Some _ -> ()
192192+ | None ->
214193 let name_lower = String.lowercase_ascii name in
215194216216- (* Track depth *)
217195 if state.in_picture then
218196 state.picture_depth <- state.picture_depth - 1;
219197220198 if name_lower = "picture" && state.picture_depth = 0 then begin
221221- (* Check if picture had img child *)
222199 if not state.has_img_in_picture then
223223- Message_collector.add_typed collector
224224- (`Srcset `Picture_missing_img);
225225- (* Check for source after img *)
200200+ Message_collector.add_typed collector (`Srcset `Picture_missing_img);
226201 if state.has_source_after_img then
227202 report_disallowed_child "picture" "source" collector;
228228- (* Check for source after always-matching source *)
229229- if state.source_after_always_matching then begin
230230- if state.always_matching_is_media_all then
231231- Message_collector.add_typed collector (`Misc `Media_all)
232232- else if state.always_matching_is_media_empty then
233233- Message_collector.add_typed collector (`Misc `Media_empty)
234234- else
235235- Message_collector.add_typed collector (`Srcset `Source_needs_media_or_type)
236236- end;
237237-203203+ if state.source_after_always_matching then
204204+ Message_collector.add_typed collector
205205+ (if state.always_matching_is_media_all then `Misc `Media_all
206206+ else if state.always_matching_is_media_empty then `Misc `Media_empty
207207+ else `Srcset `Source_needs_media_or_type);
238208 state.in_picture <- false
239209 end;
240210241241- (* Pop from parent stack *)
242242- state.parent_stack <- (match state.parent_stack with
243243- | _ :: rest -> rest
244244- | [] -> [])
245245- end
211211+ state.parent_stack <- match state.parent_stack with _ :: rest -> rest | [] -> []
246212247213let characters state text collector =
248214 (* Text in picture element is not allowed *)
+4-8
lib/htmlrw_check/specialized/source_checker.ml
···2323 | ctx :: _ -> ctx
2424 | [] -> Other
25252626-(** Check if an attribute list contains a specific attribute. *)
2727-let has_attr name attrs =
2828- List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
2929-3026let start_element state ~name ~namespace ~attrs collector =
3127 if namespace <> None then ()
3228 else begin
···4238 let ctx = current_context state in
4339 begin match ctx with
4440 | Video | Audio ->
4545- if has_attr "srcset" attrs then
4141+ if Attr_utils.has_attr "srcset" attrs then
4642 Message_collector.add_typed collector
4743 (`Attr (`Not_allowed (`Attr "srcset", `Elem "source")));
4848- if has_attr "sizes" attrs then
4444+ if Attr_utils.has_attr "sizes" attrs then
4945 Message_collector.add_typed collector
5046 (`Attr (`Not_allowed (`Attr "sizes", `Elem "source")));
5151- if has_attr "width" attrs then
4747+ if Attr_utils.has_attr "width" attrs then
5248 Message_collector.add_typed collector
5349 (`Attr (`Not_allowed (`Attr "width", `Elem "source")));
5454- if has_attr "height" attrs then
5050+ if Attr_utils.has_attr "height" attrs then
5551 Message_collector.add_typed collector
5652 (`Attr (`Not_allowed (`Attr "height", `Elem "source")))
5753 | Picture | Other -> ()
···1414let create () = ()
1515let reset _state = ()
16161717-(** Get attribute value *)
1818-let get_attr name attrs =
1919- List.find_map (fun (n, v) ->
2020- if String.lowercase_ascii n = name then Some v else None
2121- ) attrs
2222-2317(** Split string on a character while respecting parentheses *)
2418let split_respecting_parens ~sep s =
2519 let len = String.length s in
···971965972966 (* SVG image elements should not have srcset *)
973967 if namespace <> None && name_lower = "image" then begin
974974- if get_attr "srcset" attrs <> None then
968968+ if Attr_utils.get_attr "srcset" attrs <> None then
975969 Message_collector.add_typed collector
976970 (`Attr (`Not_allowed (`Attr "srcset", `Elem "image")))
977971 end;
···980974 else begin
981975 (* Check sizes and srcset on img and source *)
982976 if name_lower = "img" || name_lower = "source" then begin
983983- let sizes_value = get_attr "sizes" attrs in
984984- let srcset_value = get_attr "srcset" attrs in
977977+ let sizes_value = Attr_utils.get_attr "sizes" attrs in
978978+ let srcset_value = Attr_utils.get_attr "srcset" attrs in
985979 let has_sizes = sizes_value <> None in
986980 let has_srcset = srcset_value <> None in
987981