···162 List.find_opt (fun t -> starts_with t "section-") tokens
163 in
16400000000000165 (* Process remaining tokens *)
166 let process_field_tokens tokens =
0000000167 match tokens with
168 | [] -> Error "A list of autofill details tokens must contain an autofill field name."
169 | [ "webauthn" ] ->
···246 | None ->
247 Error
248 "A list of autofill details tokens must not contain more than one \
249- autofill field name.")
250 in
251 process_field_tokens !tokens
252
···162 List.find_opt (fun t -> starts_with t "section-") tokens
163 in
164165+ (* Check if webauthn appears anywhere except as the very last token *)
166+ let check_webauthn_position tokens =
167+ let rec check = function
168+ | [] -> None
169+ | ["webauthn"] -> None (* webauthn as last token is ok *)
170+ | "webauthn" :: _ :: _ -> Some () (* webauthn not last is error *)
171+ | _ :: rest -> check rest
172+ in
173+ check tokens
174+ in
175+176 (* Process remaining tokens *)
177 let process_field_tokens tokens =
178+ (* First check if webauthn appears but not at the very end *)
179+ (match check_webauthn_position tokens with
180+ | Some () ->
181+ Error
182+ "The token \"webauthn\" must only appear as the very last token in a \
183+ list of autofill detail tokens."
184+ | None ->
185 match tokens with
186 | [] -> Error "A list of autofill details tokens must contain an autofill field name."
187 | [ "webauthn" ] ->
···264 | None ->
265 Error
266 "A list of autofill details tokens must not contain more than one \
267+ autofill field name."))
268 in
269 process_field_tokens !tokens
270
+8-8
lib/html5_checker/error_code.ml
···382 Printf.sprintf "Element %s is missing required attribute %s."
383 (q element) (q attr)
384 | Missing_required_attr_one_of { element; attrs } ->
385- let attrs_str = String.concat ", " (List.map q attrs) in
386 Printf.sprintf "Element %s is missing one or more of the following attributes: [%s]."
387 (q element) attrs_str
388 | Bad_attr_value { element; attr; value; reason } ->
···420 Printf.sprintf "Element %s is missing required child element %s."
421 (q parent) (q child)
422 | Missing_required_child_one_of { parent; children } ->
423- let children_str = String.concat ", " (List.map q children) in
424 Printf.sprintf "Element %s is missing one or more of the following child elements: [%s]."
425 (q parent) children_str
426 | Missing_required_child_generic { parent } ->
···488 Printf.sprintf "An %s element must have an %s attribute, except under certain conditions. For details, consult guidance on providing text alternatives for images."
489 (q "img") (q "alt")
490 | Img_missing_src_or_srcset ->
491- Printf.sprintf "Element %s is missing one or more of the following attributes: [%s, %s]."
492- (q "img") (q "src") (q "srcset")
493 | Option_empty_without_label ->
494 Printf.sprintf "Element %s without attribute %s must not be empty."
495 (q "option") (q "label")
···499 Printf.sprintf "The value of %s attribute for the %s element must not be %s."
500 (q "dir") (q "bdo") (q "auto")
501 | Base_missing_href_or_target ->
502- Printf.sprintf "Element %s is missing one or more of the following attributes: [%s, %s]."
503- (q "base") (q "href") (q "target")
504 | Base_after_link_script ->
505 Printf.sprintf "The %s element must come before any %s or %s elements in the document."
506 (q "base") (q "link") (q "script")
···551 Printf.sprintf "Element %s is missing required attribute %s."
552 (q "summary") (q "role")
553 | Summary_missing_attrs ->
554- Printf.sprintf "Element %s is missing one or more of the following attributes: [%s, %s, %s]."
555- (q "summary") (q "aria-checked") (q "aria-level") (q "role")
556 | Autocomplete_webauthn_on_select ->
557 Printf.sprintf "The value of the %s attribute for the %s element must not contain %s."
558 (q "autocomplete") (q "select") (q "webauthn")
···382 Printf.sprintf "Element %s is missing required attribute %s."
383 (q element) (q attr)
384 | Missing_required_attr_one_of { element; attrs } ->
385+ let attrs_str = String.concat ", " attrs in
386 Printf.sprintf "Element %s is missing one or more of the following attributes: [%s]."
387 (q element) attrs_str
388 | Bad_attr_value { element; attr; value; reason } ->
···420 Printf.sprintf "Element %s is missing required child element %s."
421 (q parent) (q child)
422 | Missing_required_child_one_of { parent; children } ->
423+ let children_str = String.concat ", " children in
424 Printf.sprintf "Element %s is missing one or more of the following child elements: [%s]."
425 (q parent) children_str
426 | Missing_required_child_generic { parent } ->
···488 Printf.sprintf "An %s element must have an %s attribute, except under certain conditions. For details, consult guidance on providing text alternatives for images."
489 (q "img") (q "alt")
490 | Img_missing_src_or_srcset ->
491+ Printf.sprintf "Element %s is missing one or more of the following attributes: [src, srcset]."
492+ (q "img")
493 | Option_empty_without_label ->
494 Printf.sprintf "Element %s without attribute %s must not be empty."
495 (q "option") (q "label")
···499 Printf.sprintf "The value of %s attribute for the %s element must not be %s."
500 (q "dir") (q "bdo") (q "auto")
501 | Base_missing_href_or_target ->
502+ Printf.sprintf "Element %s is missing one or more of the following attributes: [href, target]."
503+ (q "base")
504 | Base_after_link_script ->
505 Printf.sprintf "The %s element must come before any %s or %s elements in the document."
506 (q "base") (q "link") (q "script")
···551 Printf.sprintf "Element %s is missing required attribute %s."
552 (q "summary") (q "role")
553 | Summary_missing_attrs ->
554+ Printf.sprintf "Element %s is missing one or more of the following attributes: [aria-checked, aria-level, role]."
555+ (q "summary")
556 | Autocomplete_webauthn_on_select ->
557 Printf.sprintf "The value of the %s attribute for the %s element must not contain %s."
558 (q "autocomplete") (q "select") (q "webauthn")
+7
lib/html5_checker/parse_error_bridge.ml
···14 let (message, final_code) = match code with
15 | Html5rw.Parse_error_code.Non_void_html_element_start_tag_with_trailing_solidus ->
16 ("Self-closing syntax (\"/>\") used on a non-void HTML element. Ignoring the slash and treating as a start tag.", code_str)
0017 | Html5rw.Parse_error_code.Tree_construction_error s ->
18 (* Check for control-character/noncharacter/surrogate with codepoint info *)
19 (try
···67 ("End tag \xe2\x80\x9cp\xe2\x80\x9d implied, but there were open elements.", "end-tag-p-implied")
68 else if s = "end-tag-br" then
69 ("End tag \xe2\x80\x9cbr\xe2\x80\x9d.", "end-tag-br")
0000070 else
71 (Printf.sprintf "Parse error: %s" s, s)
72 with _ -> (Printf.sprintf "Parse error: %s" s, s))
···14 let (message, final_code) = match code with
15 | Html5rw.Parse_error_code.Non_void_html_element_start_tag_with_trailing_solidus ->
16 ("Self-closing syntax (\"/>\") used on a non-void HTML element. Ignoring the slash and treating as a start tag.", code_str)
17+ | Html5rw.Parse_error_code.Null_character_reference ->
18+ ("Character reference expands to zero.", "null-character-reference")
19 | Html5rw.Parse_error_code.Tree_construction_error s ->
20 (* Check for control-character/noncharacter/surrogate with codepoint info *)
21 (try
···69 ("End tag \xe2\x80\x9cp\xe2\x80\x9d implied, but there were open elements.", "end-tag-p-implied")
70 else if s = "end-tag-br" then
71 ("End tag \xe2\x80\x9cbr\xe2\x80\x9d.", "end-tag-br")
72+ else if s = "expected-closing-tag-but-got-eof" then
73+ ("End of file seen and there were open elements.", "eof-in-open-element")
74+ else if String.length s > 19 && String.sub s 0 19 = "unexpected-end-tag:" then
75+ let element = String.sub s 19 (String.length s - 19) in
76+ (Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element, "stray-end-tag")
77 else
78 (Printf.sprintf "Parse error: %s" s, s)
79 with _ -> (Printf.sprintf "Parse error: %s" s, s))
+86-10
lib/html5_checker/semantic/nesting_checker.ml
···32let ancestor_mask_by_descendant : (string, int) Hashtbl.t =
33 Hashtbl.create 64
340000035(** Register that [ancestor] is prohibited for [descendant]. *)
36let register_prohibited_ancestor ancestor descendant =
37 let number = special_ancestor_number ancestor in
···44 in
45 let new_mask = mask lor (1 lsl number) in
46 Hashtbl.replace ancestor_mask_by_descendant descendant new_mask
0000000000004748(** Initialize the prohibited ancestor map. *)
49let () =
···113 ) interactive_elements;
114115 (* Phrasing-only elements: cannot contain flow content like p, div, h1-h6, etc. *)
0116 let phrasing_only = ["span"; "strong"; "em"; "b"; "i"; "u"; "s"; "small"; "mark";
117 "abbr"; "cite"; "code"; "q"; "sub"; "sup"; "samp"; "kbd"; "var"] in
118 let flow_content = ["p"; "div"; "article"; "section"; "nav"; "aside"; "header"; "footer";
···120 "ol"; "ul"; "dl"; "pre"; "blockquote"; "hr"] in
121 List.iter (fun ancestor ->
122 List.iter (fun descendant ->
123- register_prohibited_ancestor ancestor descendant
124 ) flow_content
125 ) phrasing_only
126···134 let map_num = special_ancestor_number "map" in
135 1 lsl map_num
136000137(** Stack node representing an element's context. *)
138type stack_node = {
139 ancestor_mask : int;
140- _name : string; [@warning "-69"]
0141}
142143(** Checker state. *)
···181 | _ ->
182 false
1830000000184(** Report nesting violations. *)
185let check_nesting state name attrs collector =
186 (* Compute the prohibited ancestor mask for this element *)
···190 | None -> 0
191 in
1920000000193 (* Add interactive element restrictions if applicable *)
194 let mask =
195 if is_interactive_element name attrs then
···212 | "object" when has_attr attrs "usemap" -> Some "usemap"
213 | _ -> None
214 in
00215 (* Find which ancestors are violated *)
216 Array.iteri (fun i ancestor ->
217 let bit = 1 lsl i in
218- if (mask_hit land bit) <> 0 then
219- Message_collector.add_typed collector
220- (Error_code.Element_must_not_be_descendant {
221- element = name;
222- attr;
223- ancestor
224- })
0000000000000000225 ) special_ancestors
226 end
227 end
···238 })
239 | _ -> ()
24000000000000000000000241let start_element state ~name ~namespace ~attrs collector =
242 (* Only check HTML elements, not SVG or MathML *)
243 match namespace with
···246 (* Check for nesting violations *)
247 check_nesting state name attrs collector;
248 check_required_ancestors state name collector;
0249250 (* Update ancestor mask if this is a special ancestor *)
251 let new_mask = state.ancestor_mask in
···267 in
268269 (* Push onto stack *)
270- let node = { ancestor_mask = state.ancestor_mask; _name = name } in
0271 state.stack <- node :: state.stack;
272 state.ancestor_mask <- new_mask
273
···32let ancestor_mask_by_descendant : (string, int) Hashtbl.t =
33 Hashtbl.create 64
3435+(** Map from descendant element name to bitmask of ancestors that cause content model violations.
36+ (These use different error messages than nesting violations.) *)
37+let content_model_violation_mask : (string, int) Hashtbl.t =
38+ Hashtbl.create 64
39+40(** Register that [ancestor] is prohibited for [descendant]. *)
41let register_prohibited_ancestor ancestor descendant =
42 let number = special_ancestor_number ancestor in
···49 in
50 let new_mask = mask lor (1 lsl number) in
51 Hashtbl.replace ancestor_mask_by_descendant descendant new_mask
52+53+(** Register a content model violation (phrasing-only element containing flow content). *)
54+let register_content_model_violation ancestor descendant =
55+ register_prohibited_ancestor ancestor descendant;
56+ let number = special_ancestor_number ancestor in
57+ let mask =
58+ match Hashtbl.find_opt content_model_violation_mask descendant with
59+ | None -> 0
60+ | Some m -> m
61+ in
62+ let new_mask = mask lor (1 lsl number) in
63+ Hashtbl.replace content_model_violation_mask descendant new_mask
6465(** Initialize the prohibited ancestor map. *)
66let () =
···130 ) interactive_elements;
131132 (* Phrasing-only elements: cannot contain flow content like p, div, h1-h6, etc. *)
133+ (* These are content model violations, not nesting violations. *)
134 let phrasing_only = ["span"; "strong"; "em"; "b"; "i"; "u"; "s"; "small"; "mark";
135 "abbr"; "cite"; "code"; "q"; "sub"; "sup"; "samp"; "kbd"; "var"] in
136 let flow_content = ["p"; "div"; "article"; "section"; "nav"; "aside"; "header"; "footer";
···138 "ol"; "ul"; "dl"; "pre"; "blockquote"; "hr"] in
139 List.iter (fun ancestor ->
140 List.iter (fun descendant ->
141+ register_content_model_violation ancestor descendant
142 ) flow_content
143 ) phrasing_only
144···152 let map_num = special_ancestor_number "map" in
153 1 lsl map_num
154155+(** Transparent elements - inherit content model from parent *)
156+let transparent_elements = ["a"; "canvas"; "video"; "audio"; "object"; "ins"; "del"; "map"]
157+158(** Stack node representing an element's context. *)
159type stack_node = {
160 ancestor_mask : int;
161+ name : string;
162+ is_transparent : bool;
163}
164165(** Checker state. *)
···203 | _ ->
204 false
205206+(** Find the nearest transparent element in the ancestor stack, if any.
207+ Returns the immediate parent's name if it's transparent, otherwise None. *)
208+let find_nearest_transparent_parent state =
209+ match state.stack with
210+ | parent :: _ when parent.is_transparent -> Some parent.name
211+ | _ -> None
212+213(** Report nesting violations. *)
214let check_nesting state name attrs collector =
215 (* Compute the prohibited ancestor mask for this element *)
···219 | None -> 0
220 in
221222+ (* Get content model violation mask for this element *)
223+ let content_model_mask =
224+ match Hashtbl.find_opt content_model_violation_mask name with
225+ | Some m -> m
226+ | None -> 0
227+ in
228+229 (* Add interactive element restrictions if applicable *)
230 let mask =
231 if is_interactive_element name attrs then
···248 | "object" when has_attr attrs "usemap" -> Some "usemap"
249 | _ -> None
250 in
251+ (* Find the transparent parent (like canvas) if any *)
252+ let transparent_parent = find_nearest_transparent_parent state in
253 (* Find which ancestors are violated *)
254 Array.iteri (fun i ancestor ->
255 let bit = 1 lsl i in
256+ if (mask_hit land bit) <> 0 then begin
257+ (* Check if this is a content model violation or a nesting violation *)
258+ if (content_model_mask land bit) <> 0 then begin
259+ (* Content model violation: use "not allowed as child" format *)
260+ (* If there's a transparent parent, use that instead of the ancestor *)
261+ let parent = match transparent_parent with
262+ | Some p -> p
263+ | None -> ancestor
264+ in
265+ Message_collector.add_typed collector
266+ (Error_code.Element_not_allowed_as_child {
267+ child = name;
268+ parent
269+ })
270+ end else
271+ (* Nesting violation: use "must not be descendant" format *)
272+ Message_collector.add_typed collector
273+ (Error_code.Element_must_not_be_descendant {
274+ element = name;
275+ attr;
276+ ancestor
277+ })
278+ end
279 ) special_ancestors
280 end
281 end
···292 })
293 | _ -> ()
294295+(** Check for metadata-only elements appearing outside valid contexts.
296+ style element is only valid in head or in noscript (in head). *)
297+let check_metadata_element_context state name collector =
298+ match name with
299+ | "style" ->
300+ (* style is only valid inside head or noscript *)
301+ begin match state.stack with
302+ | parent :: _ when parent.name = "head" -> () (* valid *)
303+ | parent :: _ when parent.name = "noscript" -> () (* valid in noscript in head *)
304+ | parent :: _ ->
305+ (* style inside any other element is not allowed *)
306+ Message_collector.add_typed collector
307+ (Error_code.Element_not_allowed_as_child {
308+ child = "style";
309+ parent = parent.name
310+ })
311+ | [] -> () (* at root level, would be caught elsewhere *)
312+ end
313+ | _ -> ()
314+315let start_element state ~name ~namespace ~attrs collector =
316 (* Only check HTML elements, not SVG or MathML *)
317 match namespace with
···320 (* Check for nesting violations *)
321 check_nesting state name attrs collector;
322 check_required_ancestors state name collector;
323+ check_metadata_element_context state name collector;
324325 (* Update ancestor mask if this is a special ancestor *)
326 let new_mask = state.ancestor_mask in
···342 in
343344 (* Push onto stack *)
345+ let is_transparent = List.mem name transparent_elements in
346+ let node = { ancestor_mask = state.ancestor_mask; name; is_transparent } in
347 state.stack <- node :: state.stack;
348 state.ancestor_mask <- new_mask
349
+52-31
lib/html5_checker/semantic/obsolete_checker.ml
···242 tbl
243244(** Checker state *)
245-type state = unit
00246247-let create () = ()
248249-let reset _state = ()
250251-let start_element _state ~name ~namespace ~attrs collector =
252 (* Only check HTML elements (no namespace or explicit HTML namespace) *)
253 let is_html = match namespace with
254 | None -> true
···259 else begin
260 let name_lower = String.lowercase_ascii name in
261000262 (* Check for obsolete element *)
263 (match Hashtbl.find_opt obsolete_elements name_lower with
264 | None -> ()
···270 List.iter (fun (attr_name, _attr_value) ->
271 let attr_lower = String.lowercase_ascii attr_name in
272273- (* Check specific obsolete attributes for this element *)
274- (match Hashtbl.find_opt obsolete_attributes attr_lower with
275- | None -> ()
276- | Some element_map ->
277- (match Hashtbl.find_opt element_map name_lower with
278- | None -> ()
279- | Some suggestion ->
280- Message_collector.add_typed collector
281- (Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some suggestion })));
282-283- (* Check obsolete style attributes *)
284- (match Hashtbl.find_opt obsolete_style_attrs attr_lower with
285- | None -> ()
286- | Some elements ->
287- if List.mem name_lower elements then
288- Message_collector.add_typed collector
289- (Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some "Use CSS instead." }));
290-291- (* Check obsolete global attributes *)
292- (match Hashtbl.find_opt obsolete_global_attrs attr_lower with
293- | None -> ()
294- | Some suggestion ->
295- (* Global attributes use a different format - just "The X attribute is obsolete. Y" *)
296 Message_collector.add_error collector
297- ~message:(Printf.sprintf "The %s attribute is obsolete. %s" (Error_code.q attr_name) suggestion)
298- ~code:"obsolete-global-attribute"
0299 ~element:name
300 ~attribute:attr_name
301- ())
0000000000000000000000000000000302 ) attrs
303 end
304305-let end_element _state ~name:_ ~namespace:_ _collector = ()
00306307let characters _state _text _collector = ()
308
···242 tbl
243244(** Checker state *)
245+type state = {
246+ mutable in_head : bool;
247+}
248249+let create () = { in_head = false }
250251+let reset state = state.in_head <- false
252253+let start_element state ~name ~namespace ~attrs collector =
254 (* Only check HTML elements (no namespace or explicit HTML namespace) *)
255 let is_html = match namespace with
256 | None -> true
···261 else begin
262 let name_lower = String.lowercase_ascii name in
263264+ (* Track head context *)
265+ if name_lower = "head" then state.in_head <- true;
266+267 (* Check for obsolete element *)
268 (match Hashtbl.find_opt obsolete_elements name_lower with
269 | None -> ()
···275 List.iter (fun (attr_name, _attr_value) ->
276 let attr_lower = String.lowercase_ascii attr_name in
277278+ (* Special handling for scoped attribute on style *)
279+ if attr_lower = "scoped" && name_lower = "style" then begin
280+ (* Only report if style is in head (correct context) - otherwise the content model
281+ error from nesting_checker takes precedence *)
282+ if state.in_head then
000000000000000000283 Message_collector.add_error collector
284+ ~message:(Printf.sprintf "Attribute %s not allowed on element %s at this point."
285+ (Error_code.q attr_name) (Error_code.q name))
286+ ~code:"disallowed-attribute"
287 ~element:name
288 ~attribute:attr_name
289+ ()
290+ end else begin
291+ (* Check specific obsolete attributes for this element *)
292+ (match Hashtbl.find_opt obsolete_attributes attr_lower with
293+ | None -> ()
294+ | Some element_map ->
295+ (match Hashtbl.find_opt element_map name_lower with
296+ | None -> ()
297+ | Some suggestion ->
298+ Message_collector.add_typed collector
299+ (Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some suggestion })));
300+301+ (* Check obsolete style attributes *)
302+ (match Hashtbl.find_opt obsolete_style_attrs attr_lower with
303+ | None -> ()
304+ | Some elements ->
305+ if List.mem name_lower elements then
306+ Message_collector.add_typed collector
307+ (Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some "Use CSS instead." }));
308+309+ (* Check obsolete global attributes *)
310+ (match Hashtbl.find_opt obsolete_global_attrs attr_lower with
311+ | None -> ()
312+ | Some suggestion ->
313+ (* Global attributes use a different format - just "The X attribute is obsolete. Y" *)
314+ Message_collector.add_error collector
315+ ~message:(Printf.sprintf "The %s attribute is obsolete. %s" (Error_code.q attr_name) suggestion)
316+ ~code:"obsolete-global-attribute"
317+ ~element:name
318+ ~attribute:attr_name
319+ ())
320+ end
321 ) attrs
322 end
323324+let end_element state ~name ~namespace:_ _collector =
325+ let name_lower = String.lowercase_ascii name in
326+ if name_lower = "head" then state.in_head <- false
327328let characters _state _text _collector = ()
329
···143 (* Valid values: empty string, auto, manual, hint *)
144 if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then
145 Message_collector.add_typed collector
146- (Error_code.Bad_attr_value {
147- element = element_name;
148- attr = "popover";
149- value;
150- reason = "Must be a valid popover state (auto, manual, or hint)."
151 })
152 | None -> ()
153
···143 (* Valid values: empty string, auto, manual, hint *)
144 if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then
145 Message_collector.add_typed collector
146+ (Error_code.Bad_attr_value_generic {
147+ message = Printf.sprintf "Bad value %s for attribute %s on element %s."
148+ (Error_code.q value) (Error_code.q "popover") (Error_code.q element_name)
00149 })
150 | None -> ()
151
+12-6
lib/html5_checker/specialized/aria_checker.ml
···673 | _ -> ()
674 end;
675676- (* Validate explicit roles *)
677- List.iter (fun role ->
678- (* Check if role is valid *)
679- if not (Hashtbl.mem valid_aria_roles role) then
000680 Message_collector.add_error collector
681 ~message:(Printf.sprintf
682 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9crole\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d."
683- role name)
684 ~code:"bad-role"
685 ~element:name
686 ~attribute:"role"
687- ();
006880689 (* Check if role cannot be named *)
690 if Hashtbl.mem roles_which_cannot_be_named role && has_accessible_name then
691 Message_collector.add_error collector
···673 | _ -> ()
674 end;
675676+ (* Validate explicit roles - report full attribute value if any role is invalid *)
677+ let has_invalid_role = List.exists (fun role ->
678+ not (Hashtbl.mem valid_aria_roles role)
679+ ) explicit_roles in
680+ if has_invalid_role then begin
681+ match role_attr with
682+ | Some role_value ->
683 Message_collector.add_error collector
684 ~message:(Printf.sprintf
685 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9crole\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d."
686+ role_value name)
687 ~code:"bad-role"
688 ~element:name
689 ~attribute:"role"
690+ ()
691+ | None -> ()
692+ end;
693694+ List.iter (fun role ->
695 (* Check if role cannot be named *)
696 if Hashtbl.mem roles_which_cannot_be_named role && has_accessible_name then
697 Message_collector.add_error collector
+54-19
lib/html5_checker/specialized/datetime_checker.ml
···27let validate_date s =
28 let pattern = Str.regexp "^\\([0-9]+\\)-\\([0-9][0-9]\\)-\\([0-9][0-9]\\)$" in
29 if not (Str.string_match pattern s 0) then
30- (false, Some "Date must be in YYYY-MM-DD format")
31 else
32 let year_s = Str.matched_group 1 s in
33 let month_s = Str.matched_group 2 s in
34 let day_s = Str.matched_group 3 s in
35 if String.length year_s < 4 then
36- (false, Some "Year must be at least 4 digits")
37 else
38 match (parse_int year_s, parse_int month_s, parse_int day_s) with
39 | None, _, _ | _, None, _ | _, _, None ->
40 (false, Some "Invalid year, month or day")
41 | Some year, Some month, Some day ->
42 if year < 1 then (false, Some "Year cannot be less than 1")
43- else if month < 1 || month > 12 then (false, Some "Month out of range")
044 else if day < 1 then (false, Some "Day cannot be less than 1")
45 else
46 let max_day = max_day_for_month year month in
···71let validate_time s =
72 let pattern = Str.regexp "^\\([0-9][0-9]\\):\\([0-9][0-9]\\)\\(:\\([0-9][0-9]\\)\\(\\.\\([0-9]+\\)\\)?\\)?$" in
73 if not (Str.string_match pattern s 0) then
74- (false, Some "Time must be in HH:MM format")
75 else
76 let hour_s = Str.matched_group 1 s in
77 let minute_s = Str.matched_group 2 s in
78 match (parse_int hour_s, parse_int minute_s) with
79 | None, _ | _, None -> (false, Some "Invalid hour or minute")
80 | Some hour, Some minute ->
81- if hour > 23 then (false, Some "Hour out of range")
82- else if minute > 59 then (false, Some "Minute out of range")
83 else
84 let second_s = try Some (Str.matched_group 4 s) with Not_found -> None in
85 match second_s with
···88 match parse_int sec_s with
89 | None -> (false, Some "Invalid seconds")
90 | Some sec ->
91- if sec > 59 then (false, Some "Second out of range")
92 else
93 (* Check milliseconds if present *)
94 let millis_s = try Some (Str.matched_group 6 s) with Not_found -> None in
···108 else
109 let year_s = Str.matched_group 1 s in
110 if String.length year_s < 4 then
111- (false, Some "Year must be at least 4 digits")
112 else
113 match parse_int year_s with
114 | None -> (false, Some "Invalid year")
···125 let year_s = Str.matched_group 1 s in
126 let month_s = Str.matched_group 2 s in
127 if String.length year_s < 4 then
128- (false, Some "Year must be at least 4 digits")
129 else
130 match (parse_int year_s, parse_int month_s) with
131 | None, _ | _, None -> (false, Some "Invalid year or month")
···143 let year_s = Str.matched_group 1 s in
144 let week_s = Str.matched_group 2 s in
145 if String.length year_s < 4 then
146- (false, Some "Year must be at least 4 digits")
147 else
148 match (parse_int year_s, parse_int week_s) with
149 | None, _ | _, None -> (false, Some "Invalid year or week")
···222 (false, "+")
223 in
224 if not matched then
225- TzError "Invalid timezone offset"
226 else
227 let hour_s = Str.matched_group 2 s in
228 let minute_s = Str.matched_group 3 s in
229 match (parse_int hour_s, parse_int minute_s) with
230 | None, _ | _, None -> TzError "Invalid timezone"
231 | Some hour, Some minute ->
232- if hour > 23 || minute > 59 then TzError "Timezone offset out of range"
0233 else begin
234 (* Check for unusual but valid offsets *)
235 let unusual_range =
···267 let time_and_tz = String.sub s (pos + 1) (String.length s - pos - 1) in
268 (* Validate date *)
269 match validate_date date_part with
270- | (false, reason) ->
271- DtError (match reason with Some r -> r | None -> "Invalid date")
272 | (true, _) ->
273 let date_old = has_old_year date_part in
274 (* Check if ends with Z *)
275 if String.length time_and_tz > 0 && time_and_tz.[String.length time_and_tz - 1] = 'Z' then begin
276 let time_part = String.sub time_and_tz 0 (String.length time_and_tz - 1) in
277 match validate_time time_part with
278- | (false, _) -> DtError "The literal did not satisfy the datetime with timezone format"
0279 | (true, _) ->
280 if date_old then DtWarning "Year may be mistyped"
281 else DtOk
···296 let time_part = String.sub time_and_tz 0 tp in
297 let tz_part = String.sub time_and_tz tp (String.length time_and_tz - tp) in
298 match validate_time time_part with
299- | (false, _) -> DtError "The literal did not satisfy the datetime with timezone format"
0300 | (true, _) ->
301 match validate_timezone_offset tz_part with
302- | TzError _ -> DtError "The literal did not satisfy the datetime with timezone format"
303 | TzWarning w ->
304 DtWarning w
305 | TzOk ->
···400 | Some e -> Printf.sprintf "Bad date: %s." e
401 | None -> "Bad date: The literal did not satisfy the date format."
402 in
403- Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
404- value attr_name element_name tz_msg date_msg)
0000000000000000000000000000000405 end
406407(** Checker state *)
···27let validate_date s =
28 let pattern = Str.regexp "^\\([0-9]+\\)-\\([0-9][0-9]\\)-\\([0-9][0-9]\\)$" in
29 if not (Str.string_match pattern s 0) then
30+ (false, Some "The literal did not satisfy the date format")
31 else
32 let year_s = Str.matched_group 1 s in
33 let month_s = Str.matched_group 2 s in
34 let day_s = Str.matched_group 3 s in
35 if String.length year_s < 4 then
36+ (false, Some "The literal did not satisfy the date format")
37 else
38 match (parse_int year_s, parse_int month_s, parse_int day_s) with
39 | None, _, _ | _, None, _ | _, _, None ->
40 (false, Some "Invalid year, month or day")
41 | Some year, Some month, Some day ->
42 if year < 1 then (false, Some "Year cannot be less than 1")
43+ else if month = 0 then (false, Some "Month cannot be less than 1")
44+ else if month > 12 then (false, Some "Month cannot be greater than 12")
45 else if day < 1 then (false, Some "Day cannot be less than 1")
46 else
47 let max_day = max_day_for_month year month in
···72let validate_time s =
73 let pattern = Str.regexp "^\\([0-9][0-9]\\):\\([0-9][0-9]\\)\\(:\\([0-9][0-9]\\)\\(\\.\\([0-9]+\\)\\)?\\)?$" in
74 if not (Str.string_match pattern s 0) then
75+ (false, None) (* Format error - return None so caller uses generic message *)
76 else
77 let hour_s = Str.matched_group 1 s in
78 let minute_s = Str.matched_group 2 s in
79 match (parse_int hour_s, parse_int minute_s) with
80 | None, _ | _, None -> (false, Some "Invalid hour or minute")
81 | Some hour, Some minute ->
82+ if hour > 23 then (false, Some "Hour cannot be greater than 23")
83+ else if minute > 59 then (false, Some "Minute cannot be greater than 59")
84 else
85 let second_s = try Some (Str.matched_group 4 s) with Not_found -> None in
86 match second_s with
···89 match parse_int sec_s with
90 | None -> (false, Some "Invalid seconds")
91 | Some sec ->
92+ if sec > 59 then (false, Some "Second cannot be greater than 59")
93 else
94 (* Check milliseconds if present *)
95 let millis_s = try Some (Str.matched_group 6 s) with Not_found -> None in
···109 else
110 let year_s = Str.matched_group 1 s in
111 if String.length year_s < 4 then
112+ (false, Some "The literal did not satisfy the date format")
113 else
114 match parse_int year_s with
115 | None -> (false, Some "Invalid year")
···126 let year_s = Str.matched_group 1 s in
127 let month_s = Str.matched_group 2 s in
128 if String.length year_s < 4 then
129+ (false, Some "The literal did not satisfy the date format")
130 else
131 match (parse_int year_s, parse_int month_s) with
132 | None, _ | _, None -> (false, Some "Invalid year or month")
···144 let year_s = Str.matched_group 1 s in
145 let week_s = Str.matched_group 2 s in
146 if String.length year_s < 4 then
147+ (false, Some "The literal did not satisfy the date format")
148 else
149 match (parse_int year_s, parse_int week_s) with
150 | None, _ | _, None -> (false, Some "Invalid year or week")
···223 (false, "+")
224 in
225 if not matched then
226+ TzError "The literal did not satisfy the datetime with timezone format"
227 else
228 let hour_s = Str.matched_group 2 s in
229 let minute_s = Str.matched_group 3 s in
230 match (parse_int hour_s, parse_int minute_s) with
231 | None, _ | _, None -> TzError "Invalid timezone"
232 | Some hour, Some minute ->
233+ if hour > 23 then TzError "Hours out of range in time zone designator"
234+ else if minute > 59 then TzError "Minutes out of range in time zone designator"
235 else begin
236 (* Check for unusual but valid offsets *)
237 let unusual_range =
···269 let time_and_tz = String.sub s (pos + 1) (String.length s - pos - 1) in
270 (* Validate date *)
271 match validate_date date_part with
272+ | (false, _) ->
273+ DtError "The literal did not satisfy the datetime with timezone format"
274 | (true, _) ->
275 let date_old = has_old_year date_part in
276 (* Check if ends with Z *)
277 if String.length time_and_tz > 0 && time_and_tz.[String.length time_and_tz - 1] = 'Z' then begin
278 let time_part = String.sub time_and_tz 0 (String.length time_and_tz - 1) in
279 match validate_time time_part with
280+ | (false, Some reason) -> DtError reason
281+ | (false, None) -> DtError "The literal did not satisfy the datetime with timezone format"
282 | (true, _) ->
283 if date_old then DtWarning "Year may be mistyped"
284 else DtOk
···299 let time_part = String.sub time_and_tz 0 tp in
300 let tz_part = String.sub time_and_tz tp (String.length time_and_tz - tp) in
301 match validate_time time_part with
302+ | (false, Some reason) -> DtError reason
303+ | (false, None) -> DtError "The literal did not satisfy the datetime with timezone format"
304 | (true, _) ->
305 match validate_timezone_offset tz_part with
306+ | TzError e -> DtError e
307 | TzWarning w ->
308 DtWarning w
309 | TzOk ->
···404 | Some e -> Printf.sprintf "Bad date: %s." e
405 | None -> "Bad date: The literal did not satisfy the date format."
406 in
407+ (* Order depends on error type. The Nu validator has specific patterns:
408+ - Time hour/minute errors (not timezone) -> datetime first
409+ - Timezone hours error -> datetime first
410+ - Timezone minutes error -> date first
411+ - Time fraction error -> date first
412+ - Date "less than" error -> date first
413+ - Date "greater than" error -> datetime first
414+ - Generic errors both sides -> datetime first *)
415+ let is_generic_tz = tz_error = "The literal did not satisfy the datetime with timezone format" in
416+ let is_tz_hours_error = String.length tz_error >= 5 && String.sub tz_error 0 5 = "Hours" in
417+ let is_tz_minutes_error = String.length tz_error >= 7 && String.sub tz_error 0 7 = "Minutes" in
418+ let is_time_minute_or_hour_error =
419+ (try ignore (Str.search_forward (Str.regexp "Minute cannot\\|Hour cannot") tz_error 0); true with Not_found -> false)
420+ in
421+ let is_fraction_error = try ignore (Str.search_forward (Str.regexp "fraction") tz_error 0); true with Not_found -> false in
422+ let is_month_less_than_error = match date_error with
423+ | Some e -> (try ignore (Str.search_forward (Str.regexp "Month cannot be less than") e 0); true with Not_found -> false)
424+ | None -> false
425+ in
426+ (* Datetime first for: generic tz, tz hours error, time minute/hour errors, year errors
427+ Date first for: "Month cannot be less than" date error, tz minutes error, fraction error *)
428+ if is_month_less_than_error then
429+ Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
430+ value attr_name element_name date_msg tz_msg)
431+ else if is_tz_minutes_error || is_fraction_error then
432+ Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
433+ value attr_name element_name date_msg tz_msg)
434+ else if is_tz_hours_error || is_time_minute_or_hour_error || is_generic_tz then
435+ Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
436+ value attr_name element_name tz_msg date_msg)
437+ else
438+ Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
439+ value attr_name element_name tz_msg date_msg)
440 end
441442(** Checker state *)
+58-27
lib/html5_checker/specialized/dl_checker.ml
···8 mutable contains_div : bool;
9 mutable contains_dt_dd : bool;
10 mutable dd_before_dt_error_reported : bool; (* Track if we've reported dd-before-dt error *)
011}
1213type div_context = {
···72 else begin
73 match name_lower with
74 | "template" ->
75- state.in_template <- state.in_template + 1
0000007677 | "dl" when state.in_template = 0 ->
78- (* Check for nested dl - only error if direct child (not inside dt/dd) *)
79- begin match current_dl state with
80- | Some _ when state.in_dt_dd = 0 && state.div_in_dl_stack = [] ->
081 Message_collector.add_error collector
82- ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdl\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
83 ~code:"disallowed-child"
84 ~element:"dl" ()
85- | _ -> ()
000000086 end;
87 let ctx = {
88 has_dt = false;
···91 contains_div = false;
92 contains_dt_dd = false;
93 dd_before_dt_error_reported = false;
094 } in
95 state.dl_stack <- ctx :: state.dl_stack
96···131 state.in_dt_dd <- state.in_dt_dd + 1;
132 begin match current_div state with
133 | Some div_ctx ->
134- div_ctx.has_dt <- true;
135- (* If we've seen dd, this dt starts a new group *)
136 if div_ctx.in_dd_part then begin
0000137 div_ctx.group_count <- div_ctx.group_count + 1;
138 div_ctx.in_dd_part <- false
139- end
0140 | None ->
141 match current_dl state with
142 | Some dl_ctx ->
···236 ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing a required child element."
237 ~code:"missing-required-child"
238 ~element:"dl" ()
239- else if not ctx.has_dd then
240- Message_collector.add_error collector
241- ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
242- ~code:"missing-required-child"
243- ~element:"dl" ()
00000000244 else if ctx.last_was_dt then
245- (* Ended with dt, missing dd *)
246 Message_collector.add_error collector
247 ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
248 ~code:"missing-required-child"
···274 ~message:"Element \xe2\x80\x9cdiv\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
275 ~code:"missing-required-child"
276 ~element:"div" ()
277- else if div_ctx.group_count > 1 then
278- (* Multiple name-value groups in a single div is not allowed *)
279- Message_collector.add_error collector
280- ~message:"A child \xe2\x80\x9cdiv\xe2\x80\x9d element of a \xe2\x80\x9cdl\xe2\x80\x9d element must contain only one name-value group."
281- ~code:"multiple-groups-in-div"
282- ~element:"div" ()
283 | [] -> ()
284 end
285···292 else begin
293 let trimmed = String.trim text in
294 if trimmed <> "" then begin
295- (* Check for text directly in dl *)
296- match current_dl state with
297- | Some _ when state.div_in_dl_stack = [] ->
0298 Message_collector.add_error collector
299- ~message:"Text not allowed in element \xe2\x80\x9cdl\xe2\x80\x9d in this context."
300 ~code:"text-not-allowed"
301- ~element:"dl" ()
302- | _ -> ()
0000000303 end
304 end
305
···8 mutable contains_div : bool;
9 mutable contains_dt_dd : bool;
10 mutable dd_before_dt_error_reported : bool; (* Track if we've reported dd-before-dt error *)
11+ mutable has_template : bool; (* Track if template element was seen inside dl *)
12}
1314type div_context = {
···73 else begin
74 match name_lower with
75 | "template" ->
76+ state.in_template <- state.in_template + 1;
77+ (* Track if template is direct child of dl *)
78+ begin match current_dl state with
79+ | Some dl_ctx when state.div_in_dl_stack = [] ->
80+ dl_ctx.has_template <- true
81+ | _ -> ()
82+ end
8384 | "dl" when state.in_template = 0 ->
85+ (* Check for nested dl - error if direct child of dl OR inside div-in-dl *)
86+ begin match current_div state with
87+ | Some _ ->
88+ (* dl inside div-in-dl is not allowed *)
89 Message_collector.add_error collector
90+ ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdiv\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
91 ~code:"disallowed-child"
92 ~element:"dl" ()
93+ | None ->
94+ match current_dl state with
95+ | Some _ when state.in_dt_dd = 0 ->
96+ Message_collector.add_error collector
97+ ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdl\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
98+ ~code:"disallowed-child"
99+ ~element:"dl" ()
100+ | _ -> ()
101 end;
102 let ctx = {
103 has_dt = false;
···106 contains_div = false;
107 contains_dt_dd = false;
108 dd_before_dt_error_reported = false;
109+ has_template = false;
110 } in
111 state.dl_stack <- ctx :: state.dl_stack
112···147 state.in_dt_dd <- state.in_dt_dd + 1;
148 begin match current_div state with
149 | Some div_ctx ->
150+ (* If we've already seen dd, this dt starts a new group - which is not allowed *)
0151 if div_ctx.in_dd_part then begin
152+ Message_collector.add_error collector
153+ ~message:"Element \xe2\x80\x9cdt\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdiv\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
154+ ~code:"disallowed-child"
155+ ~element:"dt" ();
156 div_ctx.group_count <- div_ctx.group_count + 1;
157 div_ctx.in_dd_part <- false
158+ end;
159+ div_ctx.has_dt <- true
160 | None ->
161 match current_dl state with
162 | Some dl_ctx ->
···256 ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing a required child element."
257 ~code:"missing-required-child"
258 ~element:"dl" ()
259+ else if not ctx.has_dd then begin
260+ (* If template is present in dl, use list format; otherwise use simple format *)
261+ if ctx.has_template then
262+ Message_collector.add_error collector
263+ ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing one or more of the following child elements: [dd]."
264+ ~code:"missing-required-child"
265+ ~element:"dl" ()
266+ else
267+ Message_collector.add_error collector
268+ ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
269+ ~code:"missing-required-child"
270+ ~element:"dl" ()
271+ end
272 else if ctx.last_was_dt then
273+ (* Ended with dt, missing dd for the last group *)
274 Message_collector.add_error collector
275 ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
276 ~code:"missing-required-child"
···302 ~message:"Element \xe2\x80\x9cdiv\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
303 ~code:"missing-required-child"
304 ~element:"div" ()
305+ (* Multiple groups error is now reported inline when dt appears after dd *)
00000306 | [] -> ()
307 end
308···315 else begin
316 let trimmed = String.trim text in
317 if trimmed <> "" then begin
318+ (* Check for text directly in dl or div-in-dl *)
319+ match current_div state with
320+ | Some _ ->
321+ (* Text in div within dl is not allowed *)
322 Message_collector.add_error collector
323+ ~message:"Text not allowed in element \xe2\x80\x9cdiv\xe2\x80\x9d in this context."
324 ~code:"text-not-allowed"
325+ ~element:"div" ()
326+ | None ->
327+ match current_dl state with
328+ | Some _ ->
329+ Message_collector.add_error collector
330+ ~message:"Text not allowed in element \xe2\x80\x9cdl\xe2\x80\x9d in this context."
331+ ~code:"text-not-allowed"
332+ ~element:"dl" ()
333+ | None -> ()
334 end
335 end
336
···68 String.contains s ':'
6970(** Validate that a URL is a valid absolute URL for itemtype/itemid.
71- Uses the comprehensive URL validation from Url_checker. *)
72-let validate_microdata_url url element attr_name =
073 let url_trimmed = String.trim url in
74 if String.length url_trimmed = 0 then
75 Some (Printf.sprintf
76 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: Must be non-empty."
77- url attr_name element)
78 else
79 (* First check if it has a scheme (required for absolute URL) *)
80 match Url_checker.extract_scheme url_trimmed with
81 | None ->
82 Some (Printf.sprintf
83 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: The string \xe2\x80\x9c%s\xe2\x80\x9d is not an absolute URL."
84- url attr_name element url)
85 | Some _ ->
86 (* Has a scheme - do comprehensive URL validation *)
87 match Url_checker.validate_url url element attr_name with
···89 | Some error_msg ->
90 (* Replace "Bad URL:" with "Bad absolute URL:" for microdata *)
91 let error_msg = Str.global_replace (Str.regexp "Bad URL:") "Bad absolute URL:" error_msg in
000000092 Some error_msg
9394(** Check if itemprop value is valid. *)
···201 ()
202 else
203 List.iter (fun url ->
204- match validate_microdata_url url element "itemtype" with
205 | None -> ()
206 | Some error_msg ->
207 Message_collector.add_error collector
···68 String.contains s ':'
6970(** Validate that a URL is a valid absolute URL for itemtype/itemid.
71+ Uses the comprehensive URL validation from Url_checker.
72+ original_value is the full attribute value (for error messages when split by whitespace) *)
73+let validate_microdata_url url element attr_name original_value =
74 let url_trimmed = String.trim url in
75 if String.length url_trimmed = 0 then
76 Some (Printf.sprintf
77 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: Must be non-empty."
78+ original_value attr_name element)
79 else
80 (* First check if it has a scheme (required for absolute URL) *)
81 match Url_checker.extract_scheme url_trimmed with
82 | None ->
83 Some (Printf.sprintf
84 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: The string \xe2\x80\x9c%s\xe2\x80\x9d is not an absolute URL."
85+ original_value attr_name element url)
86 | Some _ ->
87 (* Has a scheme - do comprehensive URL validation *)
88 match Url_checker.validate_url url element attr_name with
···90 | Some error_msg ->
91 (* Replace "Bad URL:" with "Bad absolute URL:" for microdata *)
92 let error_msg = Str.global_replace (Str.regexp "Bad URL:") "Bad absolute URL:" error_msg in
93+ (* Also replace the URL value with the original value in case they differ *)
94+ (* Escape backslashes in replacement string for Str.global_replace *)
95+ let escaped_original = Str.global_replace (Str.regexp "\\\\") "\\\\\\\\" original_value in
96+ let error_msg = Str.global_replace
97+ (Str.regexp_string (Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" url))
98+ (Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" escaped_original)
99+ error_msg in
100 Some error_msg
101102(** Check if itemprop value is valid. *)
···209 ()
210 else
211 List.iter (fun url ->
212+ match validate_microdata_url url element "itemtype" itemtype with
213 | None -> ()
214 | Some error_msg ->
215 Message_collector.add_error collector
+60-13
lib/html5_checker/specialized/picture_checker.ml
···34 mutable has_source_after_img : bool;
35 mutable has_always_matching_source : bool; (* source without media/type *)
36 mutable source_after_always_matching : bool; (* source after always-matching source *)
0037 mutable parent_stack : string list; (* track parent elements *)
38}
39···46 has_source_after_img = false;
47 has_always_matching_source = false;
48 source_after_always_matching = false;
0049 parent_stack = [];
50}
51···58 state.parent_stack <- [];
59 state.has_source_after_img <- false;
60 state.has_always_matching_source <- false;
61- state.source_after_always_matching <- false
006263(** Check if an attribute list contains a specific attribute. *)
64let has_attr name attrs =
···151 if String.lowercase_ascii attr_name = "media" then Some v else None
152 ) attrs in
153 let has_type = has_attr "type" attrs in
00000000154 let is_always_matching = match media_value with
155 | None -> not has_type (* no media, check if no type either *)
156 | Some v ->
157 let trimmed = String.trim v in
158 trimmed = "" || String.lowercase_ascii trimmed = "all"
159 in
160- if is_always_matching then
161- state.has_always_matching_source <- true
00000162163 | "img" when state.in_picture && state.picture_depth = 1 ->
164 check_img_attrs attrs collector;
···170 if img_count > 1 then
171 report_disallowed_child "picture" "img" collector;
172 (* Check if always-matching source is followed by img with srcset *)
173- if state.has_always_matching_source && has_attr "srcset" attrs then
174- Message_collector.add_error collector
175- ~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that has a following sibling \xe2\x80\x9csource\xe2\x80\x9d element or \xe2\x80\x9cimg\xe2\x80\x9d element with a \xe2\x80\x9csrcset\xe2\x80\x9d attribute must have a \xe2\x80\x9cmedia\xe2\x80\x9d attribute and/or \xe2\x80\x9ctype\xe2\x80\x9d attribute."
176- ~code:"always-matching-source-followed-by-srcset"
177- ~element:"source" ()
00000000000000178179 | "script" when state.in_picture && state.picture_depth = 1 ->
180 state.children_in_picture <- "script" :: state.children_in_picture
···216 if state.has_source_after_img then
217 report_disallowed_child "picture" "source" collector;
218 (* Check for source after always-matching source *)
219- if state.source_after_always_matching then
220- Message_collector.add_error collector
221- ~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that matches all media types cannot be followed by another \xe2\x80\x9csource\xe2\x80\x9d element."
222- ~code:"always-matching-source"
223- ~element:"source" ();
00000000000000224225 state.in_picture <- false
226 end;
···34 mutable has_source_after_img : bool;
35 mutable has_always_matching_source : bool; (* source without media/type *)
36 mutable source_after_always_matching : bool; (* source after always-matching source *)
37+ mutable always_matching_is_media_all : bool; (* true if caused by media="all" *)
38+ mutable always_matching_is_media_empty : bool; (* true if caused by media="" or whitespace *)
39 mutable parent_stack : string list; (* track parent elements *)
40}
41···48 has_source_after_img = false;
49 has_always_matching_source = false;
50 source_after_always_matching = false;
51+ always_matching_is_media_all = false;
52+ always_matching_is_media_empty = false;
53 parent_stack = [];
54}
55···62 state.parent_stack <- [];
63 state.has_source_after_img <- false;
64 state.has_always_matching_source <- false;
65+ state.source_after_always_matching <- false;
66+ state.always_matching_is_media_all <- false;
67+ state.always_matching_is_media_empty <- false
6869(** Check if an attribute list contains a specific attribute. *)
70let has_attr name attrs =
···157 if String.lowercase_ascii attr_name = "media" then Some v else None
158 ) attrs in
159 let has_type = has_attr "type" attrs in
160+ let is_media_all = match media_value with
161+ | Some v -> String.lowercase_ascii (String.trim v) = "all"
162+ | None -> false
163+ in
164+ let is_media_empty = match media_value with
165+ | Some v -> String.trim v = ""
166+ | None -> false
167+ in
168 let is_always_matching = match media_value with
169 | None -> not has_type (* no media, check if no type either *)
170 | Some v ->
171 let trimmed = String.trim v in
172 trimmed = "" || String.lowercase_ascii trimmed = "all"
173 in
174+ if is_always_matching then begin
175+ state.has_always_matching_source <- true;
176+ if is_media_all then
177+ state.always_matching_is_media_all <- true
178+ else if is_media_empty then
179+ state.always_matching_is_media_empty <- true
180+ end
181182 | "img" when state.in_picture && state.picture_depth = 1 ->
183 check_img_attrs attrs collector;
···189 if img_count > 1 then
190 report_disallowed_child "picture" "img" collector;
191 (* Check if always-matching source is followed by img with srcset *)
192+ if state.has_always_matching_source && has_attr "srcset" attrs then begin
193+ if state.always_matching_is_media_all then
194+ Message_collector.add_error collector
195+ ~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be \xe2\x80\x9call\xe2\x80\x9d."
196+ ~code:"media-all-not-allowed"
197+ ~element:"source"
198+ ~attribute:"media" ()
199+ else if state.always_matching_is_media_empty then
200+ Message_collector.add_error collector
201+ ~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be empty."
202+ ~code:"media-empty-not-allowed"
203+ ~element:"source"
204+ ~attribute:"media" ()
205+ else
206+ Message_collector.add_error collector
207+ ~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that has a following sibling \xe2\x80\x9csource\xe2\x80\x9d element or \xe2\x80\x9cimg\xe2\x80\x9d element with a \xe2\x80\x9csrcset\xe2\x80\x9d attribute must have a \xe2\x80\x9cmedia\xe2\x80\x9d attribute and/or \xe2\x80\x9ctype\xe2\x80\x9d attribute."
208+ ~code:"always-matching-source-followed-by-srcset"
209+ ~element:"source" ()
210+ end
211212 | "script" when state.in_picture && state.picture_depth = 1 ->
213 state.children_in_picture <- "script" :: state.children_in_picture
···249 if state.has_source_after_img then
250 report_disallowed_child "picture" "source" collector;
251 (* Check for source after always-matching source *)
252+ if state.source_after_always_matching then begin
253+ if state.always_matching_is_media_all then
254+ Message_collector.add_error collector
255+ ~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be \xe2\x80\x9call\xe2\x80\x9d."
256+ ~code:"media-all-not-allowed"
257+ ~element:"source"
258+ ~attribute:"media" ()
259+ else if state.always_matching_is_media_empty then
260+ Message_collector.add_error collector
261+ ~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be empty."
262+ ~code:"media-empty-not-allowed"
263+ ~element:"source"
264+ ~attribute:"media" ()
265+ else
266+ Message_collector.add_error collector
267+ ~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that has a following sibling \xe2\x80\x9csource\xe2\x80\x9d element or \xe2\x80\x9cimg\xe2\x80\x9d element with a \xe2\x80\x9csrcset\xe2\x80\x9d attribute must have a \xe2\x80\x9cmedia\xe2\x80\x9d attribute and/or \xe2\x80\x9ctype\xe2\x80\x9d attribute."
268+ ~code:"always-matching-source"
269+ ~element:"source" ()
270+ end;
271272 state.in_picture <- false
273 end;
···54 Buffer.contents buf
5556(** Check if a size value has a valid CSS length unit and non-negative value *)
57-type size_check_result = Valid | InvalidUnit | NegativeValue | CssCommentInside | BadScientificNotation
00000000000005859(** Check if CSS comment appears in an invalid position:
60 - Between sign and number (+/**/50vw)
61 - Between number and unit (50/**/vw)
62 Trailing comments (50vw/**/) are valid. *)
63-let has_invalid_css_comment s =
64 let len = String.length s in
65 (* Find comment position *)
66 let rec find_comment i =
···69 else find_comment (i + 1)
70 in
71 match find_comment 0 with
72- | None -> false
73 | Some comment_pos ->
74 let before = String.sub s 0 comment_pos in
75 let trimmed_before = String.trim before in
76- if String.length trimmed_before = 0 then false (* Leading comment is OK *)
77 else begin
78 (* Find end of comment *)
79 let rec find_end i =
···84 let end_pos = find_end (comment_pos + 2) in
85 let after = if end_pos < len then String.sub s end_pos (len - end_pos) else "" in
86 let trimmed_after = String.trim (strip_css_comments after) in
87- if trimmed_after = "" then false (* Trailing comment is OK *)
88 else begin
89 (* Comment is in the middle - check if it breaks a number/unit combo *)
90 let last = trimmed_before.[String.length trimmed_before - 1] in
91- (* Invalid if comment appears after +/- or after a digit (before more non-whitespace) *)
92- (last >= '0' && last <= '9') || last = '+' || last = '-' || last = '.'
0000000093 end
94 end
0000009596(** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *)
97let has_invalid_scientific_notation s =
···109 in
110 String.contains after_sign '.'
11100000000000000000000000112let check_size_value size_value =
113 let trimmed = String.trim size_value in
114- if trimmed = "" then InvalidUnit
115- (* Check for CSS comments inside numbers - this is invalid *)
116- else if has_invalid_css_comment trimmed then CssCommentInside
117 else begin
00000118 (* Strip valid leading/trailing CSS comments for further checks *)
119 let value_no_comments = String.trim (strip_css_comments trimmed) in
120 (* Check for invalid scientific notation like 1e+1.5px *)
121 if has_invalid_scientific_notation value_no_comments then BadScientificNotation
122 (* "auto" is only valid with lazy loading, which requires checking the element context.
123 For general validation, treat "auto" alone as invalid in sizes. *)
124- else if String.lowercase_ascii value_no_comments = "auto" then InvalidUnit
125- else if value_no_comments = "" then InvalidUnit
0126 else begin
127 let lower = String.lowercase_ascii value_no_comments in
128- (* Check for invalid units first *)
129- let has_invalid = List.exists (fun unit ->
130- let len = String.length unit in
131- String.length lower > len &&
132- String.sub lower (String.length lower - len) len = unit
133- ) invalid_size_units in
134- if has_invalid then InvalidUnit
135 else begin
136- (* Check for valid CSS length units *)
137- let has_valid_unit = List.exists (fun unit ->
138- let len = String.length unit in
139- String.length lower > len &&
140- String.sub lower (String.length lower - len) len = unit
141- ) valid_length_units in
142- if has_valid_unit then begin
143- (* Check if it's negative (starts with - but not -0) *)
144- if String.length value_no_comments > 0 && value_no_comments.[0] = '-' then begin
145- (* Check if it's -0 which is valid *)
146- let after_minus = String.sub value_no_comments 1 (String.length value_no_comments - 1) in
147- try
148- let num_str = Str.global_replace (Str.regexp "[a-zA-Z]+$") "" after_minus in
149- let f = float_of_string num_str in
150- if f = 0.0 then Valid else NegativeValue
151- with _ -> NegativeValue
152- end else
153- Valid
154- end
155- (* Could be calc() or other CSS functions - allow those *)
156- else if String.contains value_no_comments '(' then Valid
157 else begin
158- (* Check if it's a zero value (0, -0, +0) - these are valid without units *)
159- let stripped =
160- let s = value_no_comments in
161- let s = if String.length s > 0 && (s.[0] = '+' || s.[0] = '-') then String.sub s 1 (String.length s - 1) else s in
162- s
163- in
164- (* Check if it's zero or a numeric value starting with 0 *)
165- try
166- let f = float_of_string stripped in
167- if f = 0.0 then Valid else InvalidUnit
168- with _ -> InvalidUnit
000000000000000000000000000000169 end
170 end
171 end
···174let has_valid_size_unit size_value =
175 match check_size_value size_value with
176 | Valid -> true
177- | InvalidUnit | NegativeValue | CssCommentInside | BadScientificNotation -> false
178179(** Check if a sizes entry has a media condition (starts with '(') *)
180let has_media_condition entry =
···236 if not (has_media_condition trimmed) then
237 trimmed
238 else begin
239- (* Find matching closing paren, then get the size value after it *)
00240 let len = String.length trimmed in
241- let rec find_close_paren i depth =
242 if i >= len then len
243- else match trimmed.[i] with
244- | '(' -> find_close_paren (i + 1) (depth + 1)
245- | ')' -> if depth = 1 then i + 1 else find_close_paren (i + 1) (depth - 1)
246- | _ -> find_close_paren (i + 1) depth
000000000000000000000000000000000000247 in
248- let after_paren = find_close_paren 0 0 in
249- if after_paren >= len then ""
250- else String.trim (String.sub trimmed after_paren (len - after_paren))
251 end
252253(** Validate sizes attribute value *)
···275 (* Check for trailing comma *)
276 let last_entry = String.trim (List.nth entries (List.length entries - 1)) in
277 if List.length entries > 1 && last_entry = "" then begin
000000278 Message_collector.add_error collector
279- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Ends with trailing comma." value element_name)
280 ~code:"bad-sizes-value"
281 ~element:element_name ~attribute:"sizes" ();
282 false
···285286 (* Check for default-first pattern: unconditional value before conditional ones *)
287 let non_empty_entries = List.filter (fun e -> String.trim e <> "") entries in
288- if List.length non_empty_entries > 1 then begin
289- let first = List.hd non_empty_entries in
290- let rest = List.tl non_empty_entries in
0000291 (* If first entry has no media condition but later ones do, that's invalid *)
292 if not (has_media_condition first) && List.exists has_media_condition rest then begin
00293 Message_collector.add_error collector
294- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Default size must be last." value element_name)
295 ~code:"bad-sizes-value"
296 ~element:element_name ~attribute:"sizes" ();
297 valid := false
298 end;
299- (* Check for multiple consecutive defaults (entries without media conditions) *)
300- let defaults_without_media = List.filter (fun e -> not (has_media_condition e)) non_empty_entries in
301- if List.length defaults_without_media > 1 then begin
302- Message_collector.add_error collector
303- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Multiple source sizes without media conditions." value element_name)
304- ~code:"bad-sizes-value"
305- ~element:element_name ~attribute:"sizes" ();
306- valid := false
000000307 end
308 end;
309310 (* Validate each entry's media condition and size value *)
311- List.iter (fun entry ->
0312 let trimmed = String.trim entry in
313 if trimmed <> "" then begin
314 (* Check for invalid media condition *)
315 (match has_invalid_media_condition trimmed with
316 | Some err_msg ->
0000000317 Message_collector.add_error collector
318- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: %s." value element_name err_msg)
319 ~code:"bad-sizes-value"
320 ~element:element_name ~attribute:"sizes" ();
321 valid := false
···323324 let size_val = extract_size_value trimmed in
325 if size_val <> "" then begin
326- match check_size_value size_val with
000000000000000000000000000000000000000000000000000000000000000000000327 | Valid -> ()
328 | NegativeValue ->
00000329 Message_collector.add_error collector
330- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Source size value cannot be negative." value element_name)
0000000331 ~code:"bad-sizes-value"
332 ~element:element_name ~attribute:"sizes" ();
333 valid := false
334- | CssCommentInside ->
000335 Message_collector.add_error collector
336- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token." value element_name)
337 ~code:"bad-sizes-value"
338 ~element:element_name ~attribute:"sizes" ();
339 valid := false
340 | BadScientificNotation ->
0000000341 Message_collector.add_error collector
342- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token." value element_name)
343 ~code:"bad-sizes-value"
344 ~element:element_name ~attribute:"sizes" ();
345 valid := false
346- | InvalidUnit ->
0000000000000000000000000347 Message_collector.add_error collector
348- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size value." value element_name)
349 ~code:"bad-sizes-value"
350 ~element:element_name ~attribute:"sizes" ();
351 valid := false
···359 end
360361(** Validate srcset descriptor *)
362-let validate_srcset_descriptor desc element_name srcset_value collector =
363 let desc_lower = String.lowercase_ascii (String.trim desc) in
364 if String.length desc_lower = 0 then true
365 else begin
···371 (* Width descriptor - must be positive integer, no leading + *)
372 let trimmed_desc = String.trim desc in
373 if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin
00374 Message_collector.add_error collector
375- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name trimmed_desc srcset_value)
376 ~code:"bad-srcset-value"
377 ~element:element_name ~attribute:"srcset" ();
378 false
···381 let n = int_of_string num_part in
382 if n <= 0 then begin
383 Message_collector.add_error collector
384- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Width must be positive." srcset_value element_name)
385 ~code:"bad-srcset-value"
386 ~element:element_name ~attribute:"srcset" ();
387 false
···390 let original_last = desc.[String.length desc - 1] in
391 if original_last = 'W' then begin
392 Message_collector.add_error collector
393- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Width descriptor must use lowercase \xe2\x80\x9cw\xe2\x80\x9d." srcset_value element_name)
394 ~code:"bad-srcset-value"
395 ~element:element_name ~attribute:"srcset" ();
396 false
397 end else true
398 end
399 with _ ->
400- (* Check for scientific notation or decimal *)
401- if String.contains num_part 'e' || String.contains num_part 'E' then begin
402 Message_collector.add_error collector
403- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Scientific notation not allowed in width descriptor." srcset_value element_name)
404 ~code:"bad-srcset-value"
405 ~element:element_name ~attribute:"srcset" ();
406 false
···415 (* Pixel density descriptor - must be positive number, no leading + *)
416 let trimmed_desc = String.trim desc in
417 if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin
00418 Message_collector.add_error collector
419- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Leading plus sign not allowed." srcset_value element_name)
420 ~code:"bad-srcset-value"
421 ~element:element_name ~attribute:"srcset" ();
422 false
···424 (try
425 let n = float_of_string num_part in
426 if Float.is_nan n then begin
0000427 Message_collector.add_error collector
428- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: NaN not allowed." srcset_value element_name)
429 ~code:"bad-srcset-value"
430 ~element:element_name ~attribute:"srcset" ();
431 false
432- end else if n <= 0.0 then begin
0000000000000000433 Message_collector.add_error collector
434- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Density must be positive." srcset_value element_name)
435 ~code:"bad-srcset-value"
436 ~element:element_name ~attribute:"srcset" ();
437 false
438 end else if n = neg_infinity || n = infinity then begin
0000439 Message_collector.add_error collector
440- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Infinity not allowed." srcset_value element_name)
441 ~code:"bad-srcset-value"
442 ~element:element_name ~attribute:"srcset" ();
443 false
···451 end
452 | 'h' ->
453 (* Height descriptor - not allowed *)
454- Message_collector.add_error collector
455- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Height descriptor \xe2\x80\x9ch\xe2\x80\x9d is not allowed." srcset_value element_name)
456- ~code:"bad-srcset-value"
457- ~element:element_name ~attribute:"srcset" ();
0000000000000000000458 false
459 | _ ->
460- (* Unknown descriptor *)
00000000000461 Message_collector.add_error collector
462- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor." srcset_value element_name)
463 ~code:"bad-srcset-value"
464 ~element:element_name ~attribute:"srcset" ();
465 false
···489 let entries = String.split_on_char ',' value in
490 let has_w_descriptor = ref false in
491 let has_x_descriptor = ref false in
492- let has_no_descriptor = ref false in (* Track if any entry has no descriptor *)
493- let seen_descriptors = Hashtbl.create 8 in (* Track seen descriptor values *)
0494495 (* Check for empty srcset *)
496 if String.trim value = "" then begin
497 Message_collector.add_error collector
498- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Must not be empty." value element_name)
499 ~code:"bad-srcset-value"
500 ~element:element_name ~attribute:"srcset" ()
501 end;
···503 (* Check for leading comma *)
504 if String.length value > 0 && value.[0] = ',' then begin
505 Message_collector.add_error collector
506- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Leading comma." value element_name)
507 ~code:"bad-srcset-value"
508 ~element:element_name ~attribute:"srcset" ()
509 end;
510511- (* Check for trailing comma *)
512 let trimmed_value = String.trim value in
513 if String.length trimmed_value > 0 && trimmed_value.[String.length trimmed_value - 1] = ',' then begin
514- Message_collector.add_error collector
515- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Trailing comma." value element_name)
516- ~code:"bad-srcset-value"
517- ~element:element_name ~attribute:"srcset" ()
0000000000000000518 end;
519520 List.iter (fun entry ->
···532 let scheme_colon = scheme ^ ":" in
533 if url_lower = scheme_colon then
534 Message_collector.add_error collector
535- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Expected a slash (\"/\")." value element_name)
536 ~code:"bad-srcset-url"
537 ~element:element_name ~attribute:"srcset" ()
538 ) special_schemes
···542 | [url] ->
543 check_srcset_url url;
544 (* URL only = implicit 1x descriptor - only flag if explicit 1x also seen *)
545- has_no_descriptor := true;
546- if Hashtbl.mem seen_descriptors "explicit-1x" then begin
0547 Message_collector.add_error collector
548- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name)
549 ~code:"bad-srcset-value"
550 ~element:element_name ~attribute:"srcset" ()
551- end else
552- Hashtbl.add seen_descriptors "implicit-1x" true
0553 | url :: desc :: rest ->
554 (* Check URL for broken schemes *)
555 check_srcset_url url;
556 (* Check for extra junk - multiple descriptors are not allowed *)
557 if rest <> [] then begin
0558 Message_collector.add_error collector
559- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Multiple descriptors in candidate." value element_name)
560 ~code:"bad-srcset-value"
561 ~element:element_name ~attribute:"srcset" ()
562 end;
···565 if String.length desc_lower > 0 then begin
566 let last_char = desc_lower.[String.length desc_lower - 1] in
567 if last_char = 'w' then has_w_descriptor := true
568- else if last_char = 'x' then has_x_descriptor := true;
000000000000000000000000000000000000569570 (* Check for duplicate descriptors - use normalized form *)
571 let normalized = normalize_descriptor desc in
572 let is_1x = (normalized = "1x") in
573- if Hashtbl.mem seen_descriptors normalized then begin
574- Message_collector.add_error collector
575- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name)
576- ~code:"bad-srcset-value"
577- ~element:element_name ~attribute:"srcset" ()
578- end else if is_1x && Hashtbl.mem seen_descriptors "implicit-1x" then begin
579- (* Explicit 1x conflicts with implicit 1x *)
580 Message_collector.add_error collector
581- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name)
582 ~code:"bad-srcset-value"
583 ~element:element_name ~attribute:"srcset" ()
584- end else begin
585- Hashtbl.add seen_descriptors normalized true;
586- if is_1x then Hashtbl.add seen_descriptors "explicit-1x" true
000000000587 end
588 end;
589590- ignore (validate_srcset_descriptor desc element_name value collector)
591 end
592 ) entries;
593594 (* Check: if w descriptor used and no sizes, that's an error for img and source *)
595 if !has_w_descriptor && not has_sizes then
596 Message_collector.add_error collector
597- ~message:(Printf.sprintf "When the \xe2\x80\x9csrcset\xe2\x80\x9d attribute on the \xe2\x80\x9c%s\xe2\x80\x9d element uses width descriptors, the \xe2\x80\x9csizes\xe2\x80\x9d attribute must also be present." element_name)
598 ~code:"srcset-w-without-sizes"
599 ~element:element_name ~attribute:"srcset" ();
600601 (* Check: if sizes is present, all entries must have width descriptors *)
602- if has_sizes && !has_no_descriptor then
0603 Message_collector.add_error collector
604- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: No width specified for image. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name)
605 ~code:"bad-srcset-value"
606- ~element:element_name ~attribute:"srcset" ();
0607608- (* Check: if sizes is present and srcset uses x descriptors, that's an error *)
609- if has_sizes && !has_x_descriptor then
0610 Message_collector.add_error collector
611 ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width." value element_name)
612 ~code:"bad-srcset-value"
···54 Buffer.contents buf
5556(** Check if a size value has a valid CSS length unit and non-negative value *)
57+type size_check_result =
58+ | Valid
59+ | InvalidUnit of string * string (* (found_unit, context) *)
60+ | NegativeValue
61+ | CssCommentAfterSign of string * string (* what was found, context *)
62+ | CssCommentBeforeUnit of string * string (* what was found, context *)
63+ | BadScientificNotation
64+ | BadCssNumber of char * string (* (first_char, context) - not starting with digit or minus *)
65+66+(** CSS comment error types *)
67+type css_comment_error =
68+ | NoCommentError
69+ | CommentAfterSign of string * string (* what was found, context *)
70+ | CommentBetweenNumberAndUnit of string * string (* what was found at comment position, context *)
7172(** Check if CSS comment appears in an invalid position:
73 - Between sign and number (+/**/50vw)
74 - Between number and unit (50/**/vw)
75 Trailing comments (50vw/**/) are valid. *)
76+let check_css_comment_position s =
77 let len = String.length s in
78 (* Find comment position *)
79 let rec find_comment i =
···82 else find_comment (i + 1)
83 in
84 match find_comment 0 with
85+ | None -> NoCommentError
86 | Some comment_pos ->
87 let before = String.sub s 0 comment_pos in
88 let trimmed_before = String.trim before in
89+ if String.length trimmed_before = 0 then NoCommentError (* Leading comment is OK *)
90 else begin
91 (* Find end of comment *)
92 let rec find_end i =
···97 let end_pos = find_end (comment_pos + 2) in
98 let after = if end_pos < len then String.sub s end_pos (len - end_pos) else "" in
99 let trimmed_after = String.trim (strip_css_comments after) in
100+ if trimmed_after = "" then NoCommentError (* Trailing comment is OK *)
101 else begin
102 (* Comment is in the middle - check if it breaks a number/unit combo *)
103 let last = trimmed_before.[String.length trimmed_before - 1] in
104+ (* What's at the comment position? Just show "/" *)
105+ let slash = "/" in
106+ (* Invalid if comment appears after +/- *)
107+ if last = '+' || last = '-' then
108+ CommentAfterSign (trimmed_before ^ slash, s)
109+ (* Invalid if comment appears after digit (before more content) *)
110+ else if (last >= '0' && last <= '9') || last = '.' then
111+ CommentBetweenNumberAndUnit (slash ^ trimmed_after, s)
112+ else
113+ NoCommentError
114 end
115 end
116+117+(** For backward compatibility *)
118+let has_invalid_css_comment s =
119+ match check_css_comment_position s with
120+ | NoCommentError -> false
121+ | _ -> true
122123(** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *)
124let has_invalid_scientific_notation s =
···136 in
137 String.contains after_sign '.'
138139+(** Extract unit from a size value like "10px" -> "px", "100vw" -> "vw", "50%" -> "%"
140+ Returns the unit with original case preserved *)
141+let extract_unit s =
142+ let trimmed = String.trim s in
143+ let len = String.length trimmed in
144+ if len = 0 then ""
145+ (* Check for % at the end *)
146+ else if trimmed.[len - 1] = '%' then "%"
147+ else begin
148+ let lower = String.lowercase_ascii trimmed in
149+ (* Try to find a unit at the end (letters only) *)
150+ let rec find_unit_length i =
151+ if i < 0 then 0
152+ else if lower.[i] >= 'a' && lower.[i] <= 'z' then find_unit_length (i - 1)
153+ else i + 1
154+ in
155+ let start = find_unit_length (len - 1) in
156+ if start < len then
157+ (* Return the unit from the original string (preserving case) *)
158+ String.sub trimmed start (len - start)
159+ else ""
160+ end
161+162let check_size_value size_value =
163 let trimmed = String.trim size_value in
164+ if trimmed = "" then InvalidUnit ("", trimmed)
00165 else begin
166+ (* Check for CSS comments inside numbers - this is invalid *)
167+ match check_css_comment_position trimmed with
168+ | CommentAfterSign (found, ctx) -> CssCommentAfterSign (found, ctx)
169+ | CommentBetweenNumberAndUnit (found, ctx) -> CssCommentBeforeUnit (found, ctx)
170+ | NoCommentError ->
171 (* Strip valid leading/trailing CSS comments for further checks *)
172 let value_no_comments = String.trim (strip_css_comments trimmed) in
173 (* Check for invalid scientific notation like 1e+1.5px *)
174 if has_invalid_scientific_notation value_no_comments then BadScientificNotation
175 (* "auto" is only valid with lazy loading, which requires checking the element context.
176 For general validation, treat "auto" alone as invalid in sizes. *)
177+ else if String.lowercase_ascii value_no_comments = "auto" then
178+ BadCssNumber (value_no_comments.[0], trimmed)
179+ else if value_no_comments = "" then InvalidUnit ("", trimmed)
180 else begin
181 let lower = String.lowercase_ascii value_no_comments in
182+ (* Check for calc() or other CSS functions first - these are always valid *)
183+ if String.contains value_no_comments '(' then Valid
00000184 else begin
185+ (* Check if the value starts with a digit, minus, or plus sign *)
186+ let first_char = value_no_comments.[0] in
187+ let starts_with_number =
188+ (first_char >= '0' && first_char <= '9') ||
189+ first_char = '-' ||
190+ first_char = '+' ||
191+ first_char = '.' (* decimal point like .5px *)
192+ in
193+ if not starts_with_number then
194+ (* Not a valid CSS number token - doesn't start with digit or sign *)
195+ BadCssNumber (first_char, trimmed)
0000000000196 else begin
197+ (* Check for invalid units first *)
198+ let found_invalid = List.find_opt (fun unit ->
199+ let len = String.length unit in
200+ String.length lower > len &&
201+ String.sub lower (String.length lower - len) len = unit
202+ ) invalid_size_units in
203+ match found_invalid with
204+ | Some _unit -> InvalidUnit (extract_unit value_no_comments, trimmed)
205+ | None ->
206+ (* Check for valid CSS length units *)
207+ let has_valid_unit = List.exists (fun unit ->
208+ let len = String.length unit in
209+ String.length lower > len &&
210+ String.sub lower (String.length lower - len) len = unit
211+ ) valid_length_units in
212+ if has_valid_unit then begin
213+ (* Check if it's negative (starts with - but not -0) *)
214+ if String.length value_no_comments > 0 && value_no_comments.[0] = '-' then begin
215+ (* Check if it's -0 which is valid *)
216+ let after_minus = String.sub value_no_comments 1 (String.length value_no_comments - 1) in
217+ try
218+ let num_str = Str.global_replace (Str.regexp "[a-zA-Z]+$") "" after_minus in
219+ let f = float_of_string num_str in
220+ if f = 0.0 then Valid else NegativeValue
221+ with _ -> NegativeValue
222+ end else
223+ Valid
224+ end
225+ else begin
226+ (* Check if it's a zero value (0, -0, +0) - these are valid without units *)
227+ let stripped =
228+ let s = value_no_comments in
229+ let s = if String.length s > 0 && (s.[0] = '+' || s.[0] = '-') then String.sub s 1 (String.length s - 1) else s in
230+ s
231+ in
232+ (* Check if it's zero or a numeric value starting with 0 *)
233+ try
234+ let f = float_of_string stripped in
235+ if f = 0.0 then Valid else InvalidUnit (extract_unit value_no_comments, trimmed)
236+ with _ -> InvalidUnit (extract_unit value_no_comments, trimmed)
237+ end
238 end
239 end
240 end
···243let has_valid_size_unit size_value =
244 match check_size_value size_value with
245 | Valid -> true
246+ | InvalidUnit (_, _) | NegativeValue | CssCommentAfterSign (_, _) | CssCommentBeforeUnit (_, _) | BadScientificNotation | BadCssNumber (_, _) -> false
247248(** Check if a sizes entry has a media condition (starts with '(') *)
249let has_media_condition entry =
···305 if not (has_media_condition trimmed) then
306 trimmed
307 else begin
308+ (* Media conditions can have "and", "or", "not" operators connecting
309+ multiple parenthesized groups, e.g., "(not (width:500px)) and (width:500px) 500px"
310+ We need to skip all media condition parts to find the size value *)
311 let len = String.length trimmed in
312+ let rec skip_media_condition i =
313 if i >= len then len
314+ else begin
315+ let remaining = String.trim (String.sub trimmed i (len - i)) in
316+ let remaining_len = String.length remaining in
317+ if remaining_len = 0 then len
318+ else begin
319+ let first_char = remaining.[0] in
320+ if first_char = '(' then begin
321+ (* Skip this parenthesized group *)
322+ let rec find_close_paren j depth =
323+ if j >= remaining_len then remaining_len
324+ else match remaining.[j] with
325+ | '(' -> find_close_paren (j + 1) (depth + 1)
326+ | ')' -> if depth = 1 then j + 1 else find_close_paren (j + 1) (depth - 1)
327+ | _ -> find_close_paren (j + 1) depth
328+ in
329+ let after_paren = find_close_paren 0 0 in
330+ let new_pos = i + (len - i) - remaining_len + after_paren in
331+ skip_media_condition new_pos
332+ end
333+ else begin
334+ (* Check if remaining starts with "and", "or", "not" followed by space or paren *)
335+ let lower_remaining = String.lowercase_ascii remaining in
336+ if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and " then
337+ skip_media_condition (i + (len - i) - remaining_len + 4)
338+ else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or " then
339+ skip_media_condition (i + (len - i) - remaining_len + 3)
340+ else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "not " then
341+ skip_media_condition (i + (len - i) - remaining_len + 4)
342+ else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and(" then
343+ skip_media_condition (i + (len - i) - remaining_len + 3)
344+ else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or(" then
345+ skip_media_condition (i + (len - i) - remaining_len + 2)
346+ else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "not(" then
347+ skip_media_condition (i + (len - i) - remaining_len + 3)
348+ else
349+ (* Found something that's not a media condition part - this is the size value *)
350+ i + (len - i) - remaining_len
351+ end
352+ end
353+ end
354 in
355+ let size_start = skip_media_condition 0 in
356+ if size_start >= len then ""
357+ else String.trim (String.sub trimmed size_start (len - size_start))
358 end
359360(** Validate sizes attribute value *)
···382 (* Check for trailing comma *)
383 let last_entry = String.trim (List.nth entries (List.length entries - 1)) in
384 if List.length entries > 1 && last_entry = "" then begin
385+ (* Generate abbreviated context - show last ~25 chars with ellipsis if needed *)
386+ let context =
387+ if String.length value > 25 then
388+ "\xe2\x80\xa6" ^ String.sub value (String.length value - 25) 25
389+ else value
390+ in
391 Message_collector.add_error collector
392+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)
393 ~code:"bad-sizes-value"
394 ~element:element_name ~attribute:"sizes" ();
395 false
···398399 (* Check for default-first pattern: unconditional value before conditional ones *)
400 let non_empty_entries = List.filter (fun e -> String.trim e <> "") entries in
401+ (* Filter out entries that have invalid media conditions - they'll be reported separately *)
402+ let valid_entries = List.filter (fun e ->
403+ has_invalid_media_condition (String.trim e) = None
404+ ) non_empty_entries in
405+ if List.length valid_entries > 1 then begin
406+ let first = List.hd valid_entries in
407+ let rest = List.tl valid_entries in
408 (* If first entry has no media condition but later ones do, that's invalid *)
409 if not (has_media_condition first) && List.exists has_media_condition rest then begin
410+ (* Context is the first entry with a comma *)
411+ let context = (String.trim first) ^ "," in
412 Message_collector.add_error collector
413+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)
414 ~code:"bad-sizes-value"
415 ~element:element_name ~attribute:"sizes" ();
416 valid := false
417 end;
418+ (* Check for multiple entries without media conditions.
419+ When the first entry has no media condition, report "Expected media condition"
420+ regardless of whether later entries have media conditions or not *)
421+ if not (has_media_condition first) && !valid then begin
422+ (* Only report if we haven't already reported the default-first error *)
423+ if not (List.exists has_media_condition rest) then begin
424+ (* Multiple defaults - report as "Expected media condition" *)
425+ let context = (String.trim first) ^ "," in
426+ Message_collector.add_error collector
427+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)
428+ ~code:"bad-sizes-value"
429+ ~element:element_name ~attribute:"sizes" ();
430+ valid := false
431+ end
432 end
433 end;
434435 (* Validate each entry's media condition and size value *)
436+ let num_entries = List.length entries in
437+ List.iteri (fun idx entry ->
438 let trimmed = String.trim entry in
439 if trimmed <> "" then begin
440 (* Check for invalid media condition *)
441 (match has_invalid_media_condition trimmed with
442 | Some err_msg ->
443+ (* Generate context: "entry," with ellipsis if needed *)
444+ let context = (String.trim entry) ^ "," in
445+ let context =
446+ if String.length context > 25 then
447+ "\xe2\x80\xa6" ^ String.sub context (String.length context - 25) 25
448+ else context
449+ in
450 Message_collector.add_error collector
451+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: %s at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name err_msg context)
452 ~code:"bad-sizes-value"
453 ~element:element_name ~attribute:"sizes" ();
454 valid := false
···456457 let size_val = extract_size_value trimmed in
458 if size_val <> "" then begin
459+ (* Check if there are multiple space-separated words in the size value.
460+ Only the first word should be the size, rest is junk. *)
461+ let size_parts = String.split_on_char ' ' size_val |> List.filter (fun s -> s <> "") in
462+ let first_size = match size_parts with [] -> size_val | hd :: _ -> hd in
463+ let extra_parts = match size_parts with [] -> [] | _ :: tl -> tl in
464+465+ (* Check if first word looks like it should have been a media condition
466+ (doesn't start with digit, sign, decimal, '/', or look like a CSS function) *)
467+ let first_char = if String.length first_size > 0 then first_size.[0] else 'x' in
468+ let has_paren = String.contains size_val '(' in (* calc(), etc. *)
469+ let looks_like_junk_entry =
470+ not (has_media_condition trimmed) &&
471+ not has_paren && (* Allow CSS functions like calc() *)
472+ not (first_char = '/') && (* Allow leading CSS comments *)
473+ not ((first_char >= '0' && first_char <= '9') ||
474+ first_char = '+' || first_char = '-' || first_char = '.')
475+ in
476+477+ (* If this entry looks like junk and there are multiple entries,
478+ report "Expected media condition" instead of "Bad CSS number".
479+ For single entries with invalid values, fall through to BadCssNumber. *)
480+ if looks_like_junk_entry && num_entries > 1 then begin
481+ (* Find the context ending with the previous entry *)
482+ let prev_entries = List.filter (fun e -> String.trim e <> "" && e <> entry) entries in
483+ let context =
484+ if List.length prev_entries > 0 then
485+ let prev_value = String.concat ", " (List.map String.trim prev_entries) ^ "," in
486+ if String.length prev_value > 25 then
487+ "\xe2\x80\xa6" ^ String.sub prev_value (String.length prev_value - 25) 25
488+ else prev_value
489+ else value
490+ in
491+ Message_collector.add_error collector
492+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)
493+ ~code:"bad-sizes-value"
494+ ~element:element_name ~attribute:"sizes" ();
495+ valid := false
496+ end
497+ (* If there's extra junk after the size, report BadCssNumber error for it *)
498+ else if extra_parts <> [] then begin
499+ let junk = String.concat " " extra_parts in
500+ let last_junk = List.nth extra_parts (List.length extra_parts - 1) in
501+ let first_char = if String.length last_junk > 0 then last_junk.[0] else 'x' in
502+ (* Context depends on whether this is the last entry:
503+ - For non-last entries: entry with trailing comma, truncated from beginning
504+ - For last entry: full value truncated from beginning (no trailing comma) *)
505+ let is_last_entry = idx = num_entries - 1 in
506+ let context =
507+ if is_last_entry then begin
508+ (* Last entry: use full value truncated *)
509+ if String.length value > 25 then
510+ "\xe2\x80\xa6" ^ String.sub value (String.length value - 25) 25
511+ else value
512+ end else begin
513+ (* Non-last entry: use entry with comma, truncated *)
514+ let entry_with_comma = trimmed ^ "," in
515+ if String.length entry_with_comma > 25 then
516+ "\xe2\x80\xa6" ^ String.sub entry_with_comma (String.length entry_with_comma - 25) 25
517+ else entry_with_comma
518+ end
519+ in
520+ let _ = junk in
521+ Message_collector.add_error collector
522+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a minus sign or a digit but saw \xe2\x80\x9c%c\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name first_char context)
523+ ~code:"bad-sizes-value"
524+ ~element:element_name ~attribute:"sizes" ();
525+ valid := false
526+ end
527+ else
528+ match check_size_value first_size with
529 | Valid -> ()
530 | NegativeValue ->
531+ let full_context =
532+ if List.length entries > 1 then size_val ^ ","
533+ else size_val
534+ in
535+ let _ = full_context in
536 Message_collector.add_error collector
537+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected positive size value but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name size_val size_val)
538+ ~code:"bad-sizes-value"
539+ ~element:element_name ~attribute:"sizes" ();
540+ valid := false
541+ | CssCommentAfterSign (found, context) ->
542+ (* e.g., +/**/50vw - expected number after sign *)
543+ Message_collector.add_error collector
544+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected number but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name found context)
545 ~code:"bad-sizes-value"
546 ~element:element_name ~attribute:"sizes" ();
547 valid := false
548+ | CssCommentBeforeUnit (found, context) ->
549+ (* e.g., 50/**/vw - expected units after number *)
550+ let units_list = List.map (fun u -> Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" u) valid_length_units in
551+ let units_str = String.concat ", " units_list in
552 Message_collector.add_error collector
553+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected units (one of %s) but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name units_str found context)
554 ~code:"bad-sizes-value"
555 ~element:element_name ~attribute:"sizes" ();
556 valid := false
557 | BadScientificNotation ->
558+ (* For scientific notation with bad exponent, show what char was expected vs found *)
559+ let context =
560+ if List.length entries > 1 then trimmed ^ ","
561+ else trimmed
562+ in
563+ (* Find the period in the exponent *)
564+ let _ = context in
565 Message_collector.add_error collector
566+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a digit but saw \xe2\x80\x9c.\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name size_val)
567 ~code:"bad-sizes-value"
568 ~element:element_name ~attribute:"sizes" ();
569 valid := false
570+ | BadCssNumber (first_char, context) ->
571+ (* Value doesn't start with a digit or minus sign *)
572+ let full_context =
573+ if List.length entries > 1 then context ^ ","
574+ else context
575+ in
576+ let _ = full_context in
577+ Message_collector.add_error collector
578+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a minus sign or a digit but saw \xe2\x80\x9c%c\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name first_char context)
579+ ~code:"bad-sizes-value"
580+ ~element:element_name ~attribute:"sizes" ();
581+ valid := false
582+ | InvalidUnit (found_unit, _context) ->
583+ (* Generate the full list of expected units *)
584+ let units_list = List.map (fun u -> Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" u) valid_length_units in
585+ let units_str = String.concat ", " units_list in
586+ (* Context should be the full entry, with comma only if there are multiple entries *)
587+ let full_context =
588+ if List.length entries > 1 then trimmed ^ ","
589+ else trimmed
590+ in
591+ (* When found_unit is empty, say "no units" instead of quoting empty string *)
592+ let found_str =
593+ if found_unit = "" then "no units"
594+ else Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" found_unit
595+ in
596 Message_collector.add_error collector
597+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected units (one of %s) but found %s at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name units_str found_str full_context)
598 ~code:"bad-sizes-value"
599 ~element:element_name ~attribute:"sizes" ();
600 valid := false
···608 end
609610(** Validate srcset descriptor *)
611+let validate_srcset_descriptor desc element_name srcset_value has_sizes collector =
612 let desc_lower = String.lowercase_ascii (String.trim desc) in
613 if String.length desc_lower = 0 then true
614 else begin
···620 (* Width descriptor - must be positive integer, no leading + *)
621 let trimmed_desc = String.trim desc in
622 if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin
623+ (* Show just the number part (without the 'w') *)
624+ let num_part_for_msg = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
625 Message_collector.add_error collector
626+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part_for_msg srcset_value)
627 ~code:"bad-srcset-value"
628 ~element:element_name ~attribute:"srcset" ();
629 false
···632 let n = int_of_string num_part in
633 if n <= 0 then begin
634 Message_collector.add_error collector
635+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value)
636 ~code:"bad-srcset-value"
637 ~element:element_name ~attribute:"srcset" ();
638 false
···641 let original_last = desc.[String.length desc - 1] in
642 if original_last = 'W' then begin
643 Message_collector.add_error collector
644+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" srcset_value element_name desc srcset_value)
645 ~code:"bad-srcset-value"
646 ~element:element_name ~attribute:"srcset" ();
647 false
648 end else true
649 end
650 with _ ->
651+ (* Check for scientific notation, decimal, or other non-integer values *)
652+ if String.contains num_part 'e' || String.contains num_part 'E' || String.contains num_part '.' then begin
653 Message_collector.add_error collector
654+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected integer but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value)
655 ~code:"bad-srcset-value"
656 ~element:element_name ~attribute:"srcset" ();
657 false
···666 (* Pixel density descriptor - must be positive number, no leading + *)
667 let trimmed_desc = String.trim desc in
668 if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin
669+ (* Extract the number part including the plus sign *)
670+ let num_with_plus = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
671 Message_collector.add_error collector
672+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_with_plus srcset_value)
673 ~code:"bad-srcset-value"
674 ~element:element_name ~attribute:"srcset" ();
675 false
···677 (try
678 let n = float_of_string num_part in
679 if Float.is_nan n then begin
680+ (* NaN is not a valid float - report as parse error with first char from ORIGINAL desc *)
681+ let trimmed_desc = String.trim desc in
682+ let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
683+ let first_char = if String.length orig_num_part > 0 then String.make 1 orig_num_part.[0] else "" in
684 Message_collector.add_error collector
685+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Expected a digit but saw \xe2\x80\x9c%s\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name first_char srcset_value)
686 ~code:"bad-srcset-value"
687 ~element:element_name ~attribute:"srcset" ();
688 false
689+ end else if n = 0.0 then begin
690+ (* Check if it's -0 (starts with minus) - report as "greater than zero" error *)
691+ let trimmed_desc = String.trim desc in
692+ let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
693+ if String.length orig_num_part > 0 && orig_num_part.[0] = '-' then begin
694+ Message_collector.add_error collector
695+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name orig_num_part srcset_value)
696+ ~code:"bad-srcset-value"
697+ ~element:element_name ~attribute:"srcset" ()
698+ end else begin
699+ Message_collector.add_error collector
700+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Zero is not a valid positive floating point number at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name srcset_value)
701+ ~code:"bad-srcset-value"
702+ ~element:element_name ~attribute:"srcset" ()
703+ end;
704+ false
705+ end else if n < 0.0 then begin
706 Message_collector.add_error collector
707+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value)
708 ~code:"bad-srcset-value"
709 ~element:element_name ~attribute:"srcset" ();
710 false
711 end else if n = neg_infinity || n = infinity then begin
712+ (* Infinity is not a valid float - report as parse error with first char from ORIGINAL desc *)
713+ let trimmed_desc = String.trim desc in
714+ let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
715+ let first_char = if String.length orig_num_part > 0 then String.make 1 orig_num_part.[0] else "" in
716 Message_collector.add_error collector
717+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Expected a digit but saw \xe2\x80\x9c%s\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name first_char srcset_value)
718 ~code:"bad-srcset-value"
719 ~element:element_name ~attribute:"srcset" ();
720 false
···728 end
729 | 'h' ->
730 (* Height descriptor - not allowed *)
731+ let trimmed_desc = String.trim desc in
732+ (* Generate context: find where this entry appears *)
733+ let context =
734+ try
735+ let pos = Str.search_forward (Str.regexp_string trimmed_desc) srcset_value 0 in
736+ (* Get the entry context ending with comma *)
737+ let search_from = max 0 (pos - 3) in
738+ let comma_pos = try Str.search_forward (Str.regexp_string ",") srcset_value pos with Not_found -> String.length srcset_value - 1 in
739+ let end_pos = min (comma_pos + 1) (String.length srcset_value) in
740+ let len = end_pos - search_from in
741+ if len > 0 then String.trim (String.sub srcset_value search_from len) else srcset_value
742+ with Not_found | Invalid_argument _ -> srcset_value
743+ in
744+ if has_sizes then
745+ Message_collector.add_error collector
746+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" srcset_value element_name trimmed_desc context)
747+ ~code:"bad-srcset-value"
748+ ~element:element_name ~attribute:"srcset" ()
749+ else
750+ Message_collector.add_error collector
751+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Height descriptor \xe2\x80\x9ch\xe2\x80\x9d is not allowed." srcset_value element_name)
752+ ~code:"bad-srcset-value"
753+ ~element:element_name ~attribute:"srcset" ();
754 false
755 | _ ->
756+ (* Unknown descriptor - find context in srcset_value *)
757+ let trimmed_desc = String.trim desc in
758+ (* Try to find the context: find where this descriptor appears in srcset_value *)
759+ let context =
760+ try
761+ let pos = Str.search_forward (Str.regexp_string trimmed_desc) srcset_value 0 in
762+ (* Get the context up to and including the descriptor and the comma after *)
763+ let end_pos = min (pos + String.length trimmed_desc + 1) (String.length srcset_value) in
764+ let start_pos = max 0 (pos - 2) in
765+ String.trim (String.sub srcset_value start_pos (end_pos - start_pos))
766+ with Not_found -> srcset_value
767+ in
768 Message_collector.add_error collector
769+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number followed by \xe2\x80\x9cw\xe2\x80\x9d or \xe2\x80\x9cx\xe2\x80\x9d but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name trimmed_desc context)
770 ~code:"bad-srcset-value"
771 ~element:element_name ~attribute:"srcset" ();
772 false
···796 let entries = String.split_on_char ',' value in
797 let has_w_descriptor = ref false in
798 let has_x_descriptor = ref false in
799+ let no_descriptor_url = ref None in (* Track URL of first entry without width descriptor *)
800+ let x_with_sizes_error_reported = ref false in (* Track if we already reported x-with-sizes error *)
801+ let seen_descriptors = Hashtbl.create 8 in (* Track seen descriptor values -> first URL *)
802803 (* Check for empty srcset *)
804 if String.trim value = "" then begin
805 Message_collector.add_error collector
806+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Must contain one or more image candidate strings." value element_name)
807 ~code:"bad-srcset-value"
808 ~element:element_name ~attribute:"srcset" ()
809 end;
···811 (* Check for leading comma *)
812 if String.length value > 0 && value.[0] = ',' then begin
813 Message_collector.add_error collector
814+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Starts with empty image-candidate string." value element_name)
815 ~code:"bad-srcset-value"
816 ~element:element_name ~attribute:"srcset" ()
817 end;
818819+ (* Check for trailing comma(s) / empty entries *)
820 let trimmed_value = String.trim value in
821 if String.length trimmed_value > 0 && trimmed_value.[String.length trimmed_value - 1] = ',' then begin
822+ (* Count consecutive trailing commas *)
823+ let rec count_trailing_commas s idx count =
824+ if idx < 0 then count
825+ else if s.[idx] = ',' then count_trailing_commas s (idx - 1) (count + 1)
826+ else if s.[idx] = ' ' || s.[idx] = '\t' then count_trailing_commas s (idx - 1) count
827+ else count
828+ in
829+ let trailing_commas = count_trailing_commas trimmed_value (String.length trimmed_value - 1) 0 in
830+ if trailing_commas > 1 then
831+ (* Multiple trailing commas: "Empty image-candidate string at" *)
832+ Message_collector.add_error collector
833+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Empty image-candidate string at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name value)
834+ ~code:"bad-srcset-value"
835+ ~element:element_name ~attribute:"srcset" ()
836+ else
837+ (* Single trailing comma: "Ends with empty image-candidate string." *)
838+ Message_collector.add_error collector
839+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Ends with empty image-candidate string." value element_name)
840+ ~code:"bad-srcset-value"
841+ ~element:element_name ~attribute:"srcset" ()
842 end;
843844 List.iter (fun entry ->
···856 let scheme_colon = scheme ^ ":" in
857 if url_lower = scheme_colon then
858 Message_collector.add_error collector
859+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad image-candidate URL: \xe2\x80\x9c%s\xe2\x80\x9d: Expected a slash (\"/\")." value element_name url)
860 ~code:"bad-srcset-url"
861 ~element:element_name ~attribute:"srcset" ()
862 ) special_schemes
···866 | [url] ->
867 check_srcset_url url;
868 (* URL only = implicit 1x descriptor - only flag if explicit 1x also seen *)
869+ if !no_descriptor_url = None then no_descriptor_url := Some url;
870+ begin match Hashtbl.find_opt seen_descriptors "explicit-1x" with
871+ | Some first_url ->
872 Message_collector.add_error collector
873+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Density for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to density for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name url first_url)
874 ~code:"bad-srcset-value"
875 ~element:element_name ~attribute:"srcset" ()
876+ | None ->
877+ Hashtbl.add seen_descriptors "implicit-1x" url
878+ end
879 | url :: desc :: rest ->
880 (* Check URL for broken schemes *)
881 check_srcset_url url;
882 (* Check for extra junk - multiple descriptors are not allowed *)
883 if rest <> [] then begin
884+ let extra_desc = List.hd rest in
885 Message_collector.add_error collector
886+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected single descriptor but found extraneous descriptor \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name extra_desc value)
887 ~code:"bad-srcset-value"
888 ~element:element_name ~attribute:"srcset" ()
889 end;
···892 if String.length desc_lower > 0 then begin
893 let last_char = desc_lower.[String.length desc_lower - 1] in
894 if last_char = 'w' then has_w_descriptor := true
895+ else if last_char = 'x' then begin
896+ has_x_descriptor := true;
897+ (* If sizes is present and we have an x descriptor, generate detailed error *)
898+ if has_sizes && not !x_with_sizes_error_reported then begin
899+ x_with_sizes_error_reported := true;
900+ (* Build context:
901+ - If entry has extra parts (multiple descriptors): show "url descriptor "
902+ - Else if entry has trailing comma: show "url descriptor,"
903+ - Else (last entry, no extra parts): show full srcset value *)
904+ let trimmed_url = String.trim url in
905+ let trimmed_desc = String.trim desc in
906+ let entry_context =
907+ if rest <> [] then
908+ (* Entry has multiple descriptors - show URL + first descriptor + space *)
909+ trimmed_url ^ " " ^ trimmed_desc ^ " "
910+ else
911+ (* Check if entry ends with comma in original value *)
912+ let trimmed_entry = String.trim entry in
913+ try
914+ let entry_start = Str.search_forward (Str.regexp_string trimmed_url) value 0 in
915+ let entry_end = entry_start + String.length trimmed_entry in
916+ let has_trailing_comma = entry_end < String.length value && value.[entry_end] = ',' in
917+ if has_trailing_comma then
918+ (* Entry followed by comma - show "url descriptor," *)
919+ trimmed_url ^ " " ^ trimmed_desc ^ ","
920+ else
921+ (* Last entry - show full srcset value *)
922+ value
923+ with Not_found ->
924+ value
925+ in
926+ Message_collector.add_error collector
927+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name trimmed_desc entry_context)
928+ ~code:"bad-srcset-value"
929+ ~element:element_name ~attribute:"srcset" ()
930+ end
931+ end;
932933 (* Check for duplicate descriptors - use normalized form *)
934 let normalized = normalize_descriptor desc in
935 let is_1x = (normalized = "1x") in
936+ let is_width = (last_char = 'w') in
937+ let dup_type = if is_width then "Width" else "Density" in
938+ begin match Hashtbl.find_opt seen_descriptors normalized with
939+ | Some first_url ->
000940 Message_collector.add_error collector
941+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to %s for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name dup_type url (String.lowercase_ascii dup_type) first_url)
942 ~code:"bad-srcset-value"
943 ~element:element_name ~attribute:"srcset" ()
944+ | None ->
945+ begin match (if is_1x then Hashtbl.find_opt seen_descriptors "implicit-1x" else None) with
946+ | Some first_url ->
947+ (* Explicit 1x conflicts with implicit 1x *)
948+ Message_collector.add_error collector
949+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to %s for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name dup_type url (String.lowercase_ascii dup_type) first_url)
950+ ~code:"bad-srcset-value"
951+ ~element:element_name ~attribute:"srcset" ()
952+ | None ->
953+ Hashtbl.add seen_descriptors normalized url;
954+ if is_1x then Hashtbl.add seen_descriptors "explicit-1x" url
955+ end
956 end
957 end;
958959+ ignore (validate_srcset_descriptor desc element_name value has_sizes collector)
960 end
961 ) entries;
962963 (* Check: if w descriptor used and no sizes, that's an error for img and source *)
964 if !has_w_descriptor && not has_sizes then
965 Message_collector.add_error collector
966+ ~message:"When the \xe2\x80\x9csrcset\xe2\x80\x9d attribute has any image candidate string with a width descriptor, the \xe2\x80\x9csizes\xe2\x80\x9d attribute must also be specified."
967 ~code:"srcset-w-without-sizes"
968 ~element:element_name ~attribute:"srcset" ();
969970 (* Check: if sizes is present, all entries must have width descriptors *)
971+ (match !no_descriptor_url with
972+ | Some url when has_sizes ->
973 Message_collector.add_error collector
974+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: No width specified for image \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name url)
975 ~code:"bad-srcset-value"
976+ ~element:element_name ~attribute:"srcset" ()
977+ | _ -> ());
978979+ (* Check: if sizes is present and srcset uses x descriptors, that's an error.
980+ Only report if we haven't already reported the detailed error. *)
981+ if has_sizes && !has_x_descriptor && not !x_with_sizes_error_reported then
982 Message_collector.add_error collector
983 ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width." value element_name)
984 ~code:"bad-srcset-value"
···34 | "menu" ->
35 (* menu only allows li, script, template *)
36 List.mem child ["li"; "script"; "template"]
00037 | _ -> true
3839(* Check if text is allowed in element *)
···34 | "menu" ->
35 (* menu only allows li, script, template *)
36 List.mem child ["li"; "script"; "template"]
37+ | "table" ->
38+ (* col must be in colgroup, not directly in table *)
39+ child <> "col"
40 | _ -> true
4142(* Check if text is allowed in element *)
+13-13
lib/html5rw/parser/parser_tree_builder.ml
···787 t.open_elements <- [html];
788 t.mode <- Parser_insertion_mode.Before_head;
789 process_token t token
790- | Token.Tag { kind = Token.End; _ } ->
791- parse_error t "unexpected-end-tag"
792 | _ ->
793 let html = insert_element t "html" [] in
794 t.open_elements <- [html];
···813 t.head_element <- Some head;
814 t.mode <- Parser_insertion_mode.In_head;
815 process_token t token
816- | Token.Tag { kind = Token.End; _ } ->
817- parse_error t "unexpected-end-tag"
818 | _ ->
819 let head = insert_element t "head" [] in
820 t.open_elements <- head :: t.open_elements;
···902 end
903 | Token.Tag { kind = Token.Start; name = "head"; _ } ->
904 parse_error t "unexpected-start-tag"
905- | Token.Tag { kind = Token.End; _ } ->
906- parse_error t "unexpected-end-tag"
907 | _ ->
908 pop_current t;
909 t.mode <- Parser_insertion_mode.After_head;
···943 pop_current t; (* Pop noscript *)
944 t.mode <- Parser_insertion_mode.In_head;
945 process_token t token
946- | Token.Tag { kind = Token.End; _ } ->
947- parse_error t "unexpected-end-tag"
948 | Token.EOF ->
949 parse_error t "expected-closing-tag-but-got-eof";
950 pop_current t; (* Pop noscript *)
···998 process_token t token
999 | Token.Tag { kind = Token.Start; name = "head"; _ } ->
1000 parse_error t "unexpected-start-tag"
1001- | Token.Tag { kind = Token.End; _ } ->
1002- parse_error t "unexpected-end-tag"
1003 | _ ->
1004 let body = insert_element t "body" [] in
1005 t.open_elements <- body :: t.open_elements;
···1447 | _ -> ());
1448 pop_until t (fun n -> n == node)
1449 end else if is_special_element node then
1450- parse_error t "unexpected-end-tag"
1451 else
1452 check rest
1453 in
···2056 t.template_modes <- Parser_insertion_mode.In_body :: t.template_modes;
2057 t.mode <- Parser_insertion_mode.In_body;
2058 process_token t token
2059- | Token.Tag { kind = Token.End; _ } ->
2060- parse_error t "unexpected-end-tag"
2061 | Token.EOF ->
2062 if not (List.exists (fun n -> n.Dom.name = "template" && is_in_html_namespace n) t.open_elements) then
2063 () (* Stop parsing *)
···787 t.open_elements <- [html];
788 t.mode <- Parser_insertion_mode.Before_head;
789 process_token t token
790+ | Token.Tag { kind = Token.End; name; _ } ->
791+ parse_error t ("unexpected-end-tag:" ^ name)
792 | _ ->
793 let html = insert_element t "html" [] in
794 t.open_elements <- [html];
···813 t.head_element <- Some head;
814 t.mode <- Parser_insertion_mode.In_head;
815 process_token t token
816+ | Token.Tag { kind = Token.End; name; _ } ->
817+ parse_error t ("unexpected-end-tag:" ^ name)
818 | _ ->
819 let head = insert_element t "head" [] in
820 t.open_elements <- head :: t.open_elements;
···902 end
903 | Token.Tag { kind = Token.Start; name = "head"; _ } ->
904 parse_error t "unexpected-start-tag"
905+ | Token.Tag { kind = Token.End; name; _ } ->
906+ parse_error t ("unexpected-end-tag:" ^ name)
907 | _ ->
908 pop_current t;
909 t.mode <- Parser_insertion_mode.After_head;
···943 pop_current t; (* Pop noscript *)
944 t.mode <- Parser_insertion_mode.In_head;
945 process_token t token
946+ | Token.Tag { kind = Token.End; name; _ } ->
947+ parse_error t ("unexpected-end-tag:" ^ name)
948 | Token.EOF ->
949 parse_error t "expected-closing-tag-but-got-eof";
950 pop_current t; (* Pop noscript *)
···998 process_token t token
999 | Token.Tag { kind = Token.Start; name = "head"; _ } ->
1000 parse_error t "unexpected-start-tag"
1001+ | Token.Tag { kind = Token.End; name; _ } ->
1002+ parse_error t ("unexpected-end-tag:" ^ name)
1003 | _ ->
1004 let body = insert_element t "body" [] in
1005 t.open_elements <- body :: t.open_elements;
···1447 | _ -> ());
1448 pop_until t (fun n -> n == node)
1449 end else if is_special_element node then
1450+ parse_error t ("unexpected-end-tag:" ^ name)
1451 else
1452 check rest
1453 in
···2056 t.template_modes <- Parser_insertion_mode.In_body :: t.template_modes;
2057 t.mode <- Parser_insertion_mode.In_body;
2058 process_token t token
2059+ | Token.Tag { kind = Token.End; name; _ } ->
2060+ parse_error t ("unexpected-end-tag:" ^ name)
2061 | Token.EOF ->
2062 if not (List.exists (fun n -> n.Dom.name = "template" && is_in_html_namespace n) t.open_elements) then
2063 () (* Stop parsing *)