···162162 List.find_opt (fun t -> starts_with t "section-") tokens
163163 in
164164165165+ (* Check if webauthn appears anywhere except as the very last token *)
166166+ let check_webauthn_position tokens =
167167+ let rec check = function
168168+ | [] -> None
169169+ | ["webauthn"] -> None (* webauthn as last token is ok *)
170170+ | "webauthn" :: _ :: _ -> Some () (* webauthn not last is error *)
171171+ | _ :: rest -> check rest
172172+ in
173173+ check tokens
174174+ in
175175+165176 (* Process remaining tokens *)
166177 let process_field_tokens tokens =
178178+ (* First check if webauthn appears but not at the very end *)
179179+ (match check_webauthn_position tokens with
180180+ | Some () ->
181181+ Error
182182+ "The token \"webauthn\" must only appear as the very last token in a \
183183+ list of autofill detail tokens."
184184+ | None ->
167185 match tokens with
168186 | [] -> Error "A list of autofill details tokens must contain an autofill field name."
169187 | [ "webauthn" ] ->
···246264 | None ->
247265 Error
248266 "A list of autofill details tokens must not contain more than one \
249249- autofill field name.")
267267+ autofill field name."))
250268 in
251269 process_field_tokens !tokens
252270
+8-8
lib/html5_checker/error_code.ml
···382382 Printf.sprintf "Element %s is missing required attribute %s."
383383 (q element) (q attr)
384384 | Missing_required_attr_one_of { element; attrs } ->
385385- let attrs_str = String.concat ", " (List.map q attrs) in
385385+ let attrs_str = String.concat ", " attrs in
386386 Printf.sprintf "Element %s is missing one or more of the following attributes: [%s]."
387387 (q element) attrs_str
388388 | Bad_attr_value { element; attr; value; reason } ->
···420420 Printf.sprintf "Element %s is missing required child element %s."
421421 (q parent) (q child)
422422 | Missing_required_child_one_of { parent; children } ->
423423- let children_str = String.concat ", " (List.map q children) in
423423+ let children_str = String.concat ", " children in
424424 Printf.sprintf "Element %s is missing one or more of the following child elements: [%s]."
425425 (q parent) children_str
426426 | Missing_required_child_generic { parent } ->
···488488 Printf.sprintf "An %s element must have an %s attribute, except under certain conditions. For details, consult guidance on providing text alternatives for images."
489489 (q "img") (q "alt")
490490 | Img_missing_src_or_srcset ->
491491- Printf.sprintf "Element %s is missing one or more of the following attributes: [%s, %s]."
492492- (q "img") (q "src") (q "srcset")
491491+ Printf.sprintf "Element %s is missing one or more of the following attributes: [src, srcset]."
492492+ (q "img")
493493 | Option_empty_without_label ->
494494 Printf.sprintf "Element %s without attribute %s must not be empty."
495495 (q "option") (q "label")
···499499 Printf.sprintf "The value of %s attribute for the %s element must not be %s."
500500 (q "dir") (q "bdo") (q "auto")
501501 | Base_missing_href_or_target ->
502502- Printf.sprintf "Element %s is missing one or more of the following attributes: [%s, %s]."
503503- (q "base") (q "href") (q "target")
502502+ Printf.sprintf "Element %s is missing one or more of the following attributes: [href, target]."
503503+ (q "base")
504504 | Base_after_link_script ->
505505 Printf.sprintf "The %s element must come before any %s or %s elements in the document."
506506 (q "base") (q "link") (q "script")
···551551 Printf.sprintf "Element %s is missing required attribute %s."
552552 (q "summary") (q "role")
553553 | Summary_missing_attrs ->
554554- Printf.sprintf "Element %s is missing one or more of the following attributes: [%s, %s, %s]."
555555- (q "summary") (q "aria-checked") (q "aria-level") (q "role")
554554+ Printf.sprintf "Element %s is missing one or more of the following attributes: [aria-checked, aria-level, role]."
555555+ (q "summary")
556556 | Autocomplete_webauthn_on_select ->
557557 Printf.sprintf "The value of the %s attribute for the %s element must not contain %s."
558558 (q "autocomplete") (q "select") (q "webauthn")
+7
lib/html5_checker/parse_error_bridge.ml
···1414 let (message, final_code) = match code with
1515 | Html5rw.Parse_error_code.Non_void_html_element_start_tag_with_trailing_solidus ->
1616 ("Self-closing syntax (\"/>\") used on a non-void HTML element. Ignoring the slash and treating as a start tag.", code_str)
1717+ | Html5rw.Parse_error_code.Null_character_reference ->
1818+ ("Character reference expands to zero.", "null-character-reference")
1719 | Html5rw.Parse_error_code.Tree_construction_error s ->
1820 (* Check for control-character/noncharacter/surrogate with codepoint info *)
1921 (try
···6769 ("End tag \xe2\x80\x9cp\xe2\x80\x9d implied, but there were open elements.", "end-tag-p-implied")
6870 else if s = "end-tag-br" then
6971 ("End tag \xe2\x80\x9cbr\xe2\x80\x9d.", "end-tag-br")
7272+ else if s = "expected-closing-tag-but-got-eof" then
7373+ ("End of file seen and there were open elements.", "eof-in-open-element")
7474+ else if String.length s > 19 && String.sub s 0 19 = "unexpected-end-tag:" then
7575+ let element = String.sub s 19 (String.length s - 19) in
7676+ (Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element, "stray-end-tag")
7077 else
7178 (Printf.sprintf "Parse error: %s" s, s)
7279 with _ -> (Printf.sprintf "Parse error: %s" s, s))
+86-10
lib/html5_checker/semantic/nesting_checker.ml
···3232let ancestor_mask_by_descendant : (string, int) Hashtbl.t =
3333 Hashtbl.create 64
34343535+(** Map from descendant element name to bitmask of ancestors that cause content model violations.
3636+ (These use different error messages than nesting violations.) *)
3737+let content_model_violation_mask : (string, int) Hashtbl.t =
3838+ Hashtbl.create 64
3939+3540(** Register that [ancestor] is prohibited for [descendant]. *)
3641let register_prohibited_ancestor ancestor descendant =
3742 let number = special_ancestor_number ancestor in
···4449 in
4550 let new_mask = mask lor (1 lsl number) in
4651 Hashtbl.replace ancestor_mask_by_descendant descendant new_mask
5252+5353+(** Register a content model violation (phrasing-only element containing flow content). *)
5454+let register_content_model_violation ancestor descendant =
5555+ register_prohibited_ancestor ancestor descendant;
5656+ let number = special_ancestor_number ancestor in
5757+ let mask =
5858+ match Hashtbl.find_opt content_model_violation_mask descendant with
5959+ | None -> 0
6060+ | Some m -> m
6161+ in
6262+ let new_mask = mask lor (1 lsl number) in
6363+ Hashtbl.replace content_model_violation_mask descendant new_mask
47644865(** Initialize the prohibited ancestor map. *)
4966let () =
···113130 ) interactive_elements;
114131115132 (* Phrasing-only elements: cannot contain flow content like p, div, h1-h6, etc. *)
133133+ (* These are content model violations, not nesting violations. *)
116134 let phrasing_only = ["span"; "strong"; "em"; "b"; "i"; "u"; "s"; "small"; "mark";
117135 "abbr"; "cite"; "code"; "q"; "sub"; "sup"; "samp"; "kbd"; "var"] in
118136 let flow_content = ["p"; "div"; "article"; "section"; "nav"; "aside"; "header"; "footer";
···120138 "ol"; "ul"; "dl"; "pre"; "blockquote"; "hr"] in
121139 List.iter (fun ancestor ->
122140 List.iter (fun descendant ->
123123- register_prohibited_ancestor ancestor descendant
141141+ register_content_model_violation ancestor descendant
124142 ) flow_content
125143 ) phrasing_only
126144···134152 let map_num = special_ancestor_number "map" in
135153 1 lsl map_num
136154155155+(** Transparent elements - inherit content model from parent *)
156156+let transparent_elements = ["a"; "canvas"; "video"; "audio"; "object"; "ins"; "del"; "map"]
157157+137158(** Stack node representing an element's context. *)
138159type stack_node = {
139160 ancestor_mask : int;
140140- _name : string; [@warning "-69"]
161161+ name : string;
162162+ is_transparent : bool;
141163}
142164143165(** Checker state. *)
···181203 | _ ->
182204 false
183205206206+(** Find the nearest transparent element in the ancestor stack, if any.
207207+ Returns the immediate parent's name if it's transparent, otherwise None. *)
208208+let find_nearest_transparent_parent state =
209209+ match state.stack with
210210+ | parent :: _ when parent.is_transparent -> Some parent.name
211211+ | _ -> None
212212+184213(** Report nesting violations. *)
185214let check_nesting state name attrs collector =
186215 (* Compute the prohibited ancestor mask for this element *)
···190219 | None -> 0
191220 in
192221222222+ (* Get content model violation mask for this element *)
223223+ let content_model_mask =
224224+ match Hashtbl.find_opt content_model_violation_mask name with
225225+ | Some m -> m
226226+ | None -> 0
227227+ in
228228+193229 (* Add interactive element restrictions if applicable *)
194230 let mask =
195231 if is_interactive_element name attrs then
···212248 | "object" when has_attr attrs "usemap" -> Some "usemap"
213249 | _ -> None
214250 in
251251+ (* Find the transparent parent (like canvas) if any *)
252252+ let transparent_parent = find_nearest_transparent_parent state in
215253 (* Find which ancestors are violated *)
216254 Array.iteri (fun i ancestor ->
217255 let bit = 1 lsl i in
218218- if (mask_hit land bit) <> 0 then
219219- Message_collector.add_typed collector
220220- (Error_code.Element_must_not_be_descendant {
221221- element = name;
222222- attr;
223223- ancestor
224224- })
256256+ if (mask_hit land bit) <> 0 then begin
257257+ (* Check if this is a content model violation or a nesting violation *)
258258+ if (content_model_mask land bit) <> 0 then begin
259259+ (* Content model violation: use "not allowed as child" format *)
260260+ (* If there's a transparent parent, use that instead of the ancestor *)
261261+ let parent = match transparent_parent with
262262+ | Some p -> p
263263+ | None -> ancestor
264264+ in
265265+ Message_collector.add_typed collector
266266+ (Error_code.Element_not_allowed_as_child {
267267+ child = name;
268268+ parent
269269+ })
270270+ end else
271271+ (* Nesting violation: use "must not be descendant" format *)
272272+ Message_collector.add_typed collector
273273+ (Error_code.Element_must_not_be_descendant {
274274+ element = name;
275275+ attr;
276276+ ancestor
277277+ })
278278+ end
225279 ) special_ancestors
226280 end
227281 end
···238292 })
239293 | _ -> ()
240294295295+(** Check for metadata-only elements appearing outside valid contexts.
296296+ style element is only valid in head or in noscript (in head). *)
297297+let check_metadata_element_context state name collector =
298298+ match name with
299299+ | "style" ->
300300+ (* style is only valid inside head or noscript *)
301301+ begin match state.stack with
302302+ | parent :: _ when parent.name = "head" -> () (* valid *)
303303+ | parent :: _ when parent.name = "noscript" -> () (* valid in noscript in head *)
304304+ | parent :: _ ->
305305+ (* style inside any other element is not allowed *)
306306+ Message_collector.add_typed collector
307307+ (Error_code.Element_not_allowed_as_child {
308308+ child = "style";
309309+ parent = parent.name
310310+ })
311311+ | [] -> () (* at root level, would be caught elsewhere *)
312312+ end
313313+ | _ -> ()
314314+241315let start_element state ~name ~namespace ~attrs collector =
242316 (* Only check HTML elements, not SVG or MathML *)
243317 match namespace with
···246320 (* Check for nesting violations *)
247321 check_nesting state name attrs collector;
248322 check_required_ancestors state name collector;
323323+ check_metadata_element_context state name collector;
249324250325 (* Update ancestor mask if this is a special ancestor *)
251326 let new_mask = state.ancestor_mask in
···267342 in
268343269344 (* Push onto stack *)
270270- let node = { ancestor_mask = state.ancestor_mask; _name = name } in
345345+ let is_transparent = List.mem name transparent_elements in
346346+ let node = { ancestor_mask = state.ancestor_mask; name; is_transparent } in
271347 state.stack <- node :: state.stack;
272348 state.ancestor_mask <- new_mask
273349
+52-31
lib/html5_checker/semantic/obsolete_checker.ml
···242242 tbl
243243244244(** Checker state *)
245245-type state = unit
245245+type state = {
246246+ mutable in_head : bool;
247247+}
246248247247-let create () = ()
249249+let create () = { in_head = false }
248250249249-let reset _state = ()
251251+let reset state = state.in_head <- false
250252251251-let start_element _state ~name ~namespace ~attrs collector =
253253+let start_element state ~name ~namespace ~attrs collector =
252254 (* Only check HTML elements (no namespace or explicit HTML namespace) *)
253255 let is_html = match namespace with
254256 | None -> true
···259261 else begin
260262 let name_lower = String.lowercase_ascii name in
261263264264+ (* Track head context *)
265265+ if name_lower = "head" then state.in_head <- true;
266266+262267 (* Check for obsolete element *)
263268 (match Hashtbl.find_opt obsolete_elements name_lower with
264269 | None -> ()
···270275 List.iter (fun (attr_name, _attr_value) ->
271276 let attr_lower = String.lowercase_ascii attr_name in
272277273273- (* Check specific obsolete attributes for this element *)
274274- (match Hashtbl.find_opt obsolete_attributes attr_lower with
275275- | None -> ()
276276- | Some element_map ->
277277- (match Hashtbl.find_opt element_map name_lower with
278278- | None -> ()
279279- | Some suggestion ->
280280- Message_collector.add_typed collector
281281- (Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some suggestion })));
282282-283283- (* Check obsolete style attributes *)
284284- (match Hashtbl.find_opt obsolete_style_attrs attr_lower with
285285- | None -> ()
286286- | Some elements ->
287287- if List.mem name_lower elements then
288288- Message_collector.add_typed collector
289289- (Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some "Use CSS instead." }));
290290-291291- (* Check obsolete global attributes *)
292292- (match Hashtbl.find_opt obsolete_global_attrs attr_lower with
293293- | None -> ()
294294- | Some suggestion ->
295295- (* Global attributes use a different format - just "The X attribute is obsolete. Y" *)
278278+ (* Special handling for scoped attribute on style *)
279279+ if attr_lower = "scoped" && name_lower = "style" then begin
280280+ (* Only report if style is in head (correct context) - otherwise the content model
281281+ error from nesting_checker takes precedence *)
282282+ if state.in_head then
296283 Message_collector.add_error collector
297297- ~message:(Printf.sprintf "The %s attribute is obsolete. %s" (Error_code.q attr_name) suggestion)
298298- ~code:"obsolete-global-attribute"
284284+ ~message:(Printf.sprintf "Attribute %s not allowed on element %s at this point."
285285+ (Error_code.q attr_name) (Error_code.q name))
286286+ ~code:"disallowed-attribute"
299287 ~element:name
300288 ~attribute:attr_name
301301- ())
289289+ ()
290290+ end else begin
291291+ (* Check specific obsolete attributes for this element *)
292292+ (match Hashtbl.find_opt obsolete_attributes attr_lower with
293293+ | None -> ()
294294+ | Some element_map ->
295295+ (match Hashtbl.find_opt element_map name_lower with
296296+ | None -> ()
297297+ | Some suggestion ->
298298+ Message_collector.add_typed collector
299299+ (Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some suggestion })));
300300+301301+ (* Check obsolete style attributes *)
302302+ (match Hashtbl.find_opt obsolete_style_attrs attr_lower with
303303+ | None -> ()
304304+ | Some elements ->
305305+ if List.mem name_lower elements then
306306+ Message_collector.add_typed collector
307307+ (Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some "Use CSS instead." }));
308308+309309+ (* Check obsolete global attributes *)
310310+ (match Hashtbl.find_opt obsolete_global_attrs attr_lower with
311311+ | None -> ()
312312+ | Some suggestion ->
313313+ (* Global attributes use a different format - just "The X attribute is obsolete. Y" *)
314314+ Message_collector.add_error collector
315315+ ~message:(Printf.sprintf "The %s attribute is obsolete. %s" (Error_code.q attr_name) suggestion)
316316+ ~code:"obsolete-global-attribute"
317317+ ~element:name
318318+ ~attribute:attr_name
319319+ ())
320320+ end
302321 ) attrs
303322 end
304323305305-let end_element _state ~name:_ ~namespace:_ _collector = ()
324324+let end_element state ~name ~namespace:_ _collector =
325325+ let name_lower = String.lowercase_ascii name in
326326+ if name_lower = "head" then state.in_head <- false
306327307328let characters _state _text _collector = ()
308329
···143143 (* Valid values: empty string, auto, manual, hint *)
144144 if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then
145145 Message_collector.add_typed collector
146146- (Error_code.Bad_attr_value {
147147- element = element_name;
148148- attr = "popover";
149149- value;
150150- reason = "Must be a valid popover state (auto, manual, or hint)."
146146+ (Error_code.Bad_attr_value_generic {
147147+ message = Printf.sprintf "Bad value %s for attribute %s on element %s."
148148+ (Error_code.q value) (Error_code.q "popover") (Error_code.q element_name)
151149 })
152150 | None -> ()
153151
+12-6
lib/html5_checker/specialized/aria_checker.ml
···673673 | _ -> ()
674674 end;
675675676676- (* Validate explicit roles *)
677677- List.iter (fun role ->
678678- (* Check if role is valid *)
679679- if not (Hashtbl.mem valid_aria_roles role) then
676676+ (* Validate explicit roles - report full attribute value if any role is invalid *)
677677+ let has_invalid_role = List.exists (fun role ->
678678+ not (Hashtbl.mem valid_aria_roles role)
679679+ ) explicit_roles in
680680+ if has_invalid_role then begin
681681+ match role_attr with
682682+ | Some role_value ->
680683 Message_collector.add_error collector
681684 ~message:(Printf.sprintf
682685 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9crole\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d."
683683- role name)
686686+ role_value name)
684687 ~code:"bad-role"
685688 ~element:name
686689 ~attribute:"role"
687687- ();
690690+ ()
691691+ | None -> ()
692692+ end;
688693694694+ List.iter (fun role ->
689695 (* Check if role cannot be named *)
690696 if Hashtbl.mem roles_which_cannot_be_named role && has_accessible_name then
691697 Message_collector.add_error collector
+54-19
lib/html5_checker/specialized/datetime_checker.ml
···2727let validate_date s =
2828 let pattern = Str.regexp "^\\([0-9]+\\)-\\([0-9][0-9]\\)-\\([0-9][0-9]\\)$" in
2929 if not (Str.string_match pattern s 0) then
3030- (false, Some "Date must be in YYYY-MM-DD format")
3030+ (false, Some "The literal did not satisfy the date format")
3131 else
3232 let year_s = Str.matched_group 1 s in
3333 let month_s = Str.matched_group 2 s in
3434 let day_s = Str.matched_group 3 s in
3535 if String.length year_s < 4 then
3636- (false, Some "Year must be at least 4 digits")
3636+ (false, Some "The literal did not satisfy the date format")
3737 else
3838 match (parse_int year_s, parse_int month_s, parse_int day_s) with
3939 | None, _, _ | _, None, _ | _, _, None ->
4040 (false, Some "Invalid year, month or day")
4141 | Some year, Some month, Some day ->
4242 if year < 1 then (false, Some "Year cannot be less than 1")
4343- else if month < 1 || month > 12 then (false, Some "Month out of range")
4343+ else if month = 0 then (false, Some "Month cannot be less than 1")
4444+ else if month > 12 then (false, Some "Month cannot be greater than 12")
4445 else if day < 1 then (false, Some "Day cannot be less than 1")
4546 else
4647 let max_day = max_day_for_month year month in
···7172let validate_time s =
7273 let pattern = Str.regexp "^\\([0-9][0-9]\\):\\([0-9][0-9]\\)\\(:\\([0-9][0-9]\\)\\(\\.\\([0-9]+\\)\\)?\\)?$" in
7374 if not (Str.string_match pattern s 0) then
7474- (false, Some "Time must be in HH:MM format")
7575+ (false, None) (* Format error - return None so caller uses generic message *)
7576 else
7677 let hour_s = Str.matched_group 1 s in
7778 let minute_s = Str.matched_group 2 s in
7879 match (parse_int hour_s, parse_int minute_s) with
7980 | None, _ | _, None -> (false, Some "Invalid hour or minute")
8081 | Some hour, Some minute ->
8181- if hour > 23 then (false, Some "Hour out of range")
8282- else if minute > 59 then (false, Some "Minute out of range")
8282+ if hour > 23 then (false, Some "Hour cannot be greater than 23")
8383+ else if minute > 59 then (false, Some "Minute cannot be greater than 59")
8384 else
8485 let second_s = try Some (Str.matched_group 4 s) with Not_found -> None in
8586 match second_s with
···8889 match parse_int sec_s with
8990 | None -> (false, Some "Invalid seconds")
9091 | Some sec ->
9191- if sec > 59 then (false, Some "Second out of range")
9292+ if sec > 59 then (false, Some "Second cannot be greater than 59")
9293 else
9394 (* Check milliseconds if present *)
9495 let millis_s = try Some (Str.matched_group 6 s) with Not_found -> None in
···108109 else
109110 let year_s = Str.matched_group 1 s in
110111 if String.length year_s < 4 then
111111- (false, Some "Year must be at least 4 digits")
112112+ (false, Some "The literal did not satisfy the date format")
112113 else
113114 match parse_int year_s with
114115 | None -> (false, Some "Invalid year")
···125126 let year_s = Str.matched_group 1 s in
126127 let month_s = Str.matched_group 2 s in
127128 if String.length year_s < 4 then
128128- (false, Some "Year must be at least 4 digits")
129129+ (false, Some "The literal did not satisfy the date format")
129130 else
130131 match (parse_int year_s, parse_int month_s) with
131132 | None, _ | _, None -> (false, Some "Invalid year or month")
···143144 let year_s = Str.matched_group 1 s in
144145 let week_s = Str.matched_group 2 s in
145146 if String.length year_s < 4 then
146146- (false, Some "Year must be at least 4 digits")
147147+ (false, Some "The literal did not satisfy the date format")
147148 else
148149 match (parse_int year_s, parse_int week_s) with
149150 | None, _ | _, None -> (false, Some "Invalid year or week")
···222223 (false, "+")
223224 in
224225 if not matched then
225225- TzError "Invalid timezone offset"
226226+ TzError "The literal did not satisfy the datetime with timezone format"
226227 else
227228 let hour_s = Str.matched_group 2 s in
228229 let minute_s = Str.matched_group 3 s in
229230 match (parse_int hour_s, parse_int minute_s) with
230231 | None, _ | _, None -> TzError "Invalid timezone"
231232 | Some hour, Some minute ->
232232- if hour > 23 || minute > 59 then TzError "Timezone offset out of range"
233233+ if hour > 23 then TzError "Hours out of range in time zone designator"
234234+ else if minute > 59 then TzError "Minutes out of range in time zone designator"
233235 else begin
234236 (* Check for unusual but valid offsets *)
235237 let unusual_range =
···267269 let time_and_tz = String.sub s (pos + 1) (String.length s - pos - 1) in
268270 (* Validate date *)
269271 match validate_date date_part with
270270- | (false, reason) ->
271271- DtError (match reason with Some r -> r | None -> "Invalid date")
272272+ | (false, _) ->
273273+ DtError "The literal did not satisfy the datetime with timezone format"
272274 | (true, _) ->
273275 let date_old = has_old_year date_part in
274276 (* Check if ends with Z *)
275277 if String.length time_and_tz > 0 && time_and_tz.[String.length time_and_tz - 1] = 'Z' then begin
276278 let time_part = String.sub time_and_tz 0 (String.length time_and_tz - 1) in
277279 match validate_time time_part with
278278- | (false, _) -> DtError "The literal did not satisfy the datetime with timezone format"
280280+ | (false, Some reason) -> DtError reason
281281+ | (false, None) -> DtError "The literal did not satisfy the datetime with timezone format"
279282 | (true, _) ->
280283 if date_old then DtWarning "Year may be mistyped"
281284 else DtOk
···296299 let time_part = String.sub time_and_tz 0 tp in
297300 let tz_part = String.sub time_and_tz tp (String.length time_and_tz - tp) in
298301 match validate_time time_part with
299299- | (false, _) -> DtError "The literal did not satisfy the datetime with timezone format"
302302+ | (false, Some reason) -> DtError reason
303303+ | (false, None) -> DtError "The literal did not satisfy the datetime with timezone format"
300304 | (true, _) ->
301305 match validate_timezone_offset tz_part with
302302- | TzError _ -> DtError "The literal did not satisfy the datetime with timezone format"
306306+ | TzError e -> DtError e
303307 | TzWarning w ->
304308 DtWarning w
305309 | TzOk ->
···400404 | Some e -> Printf.sprintf "Bad date: %s." e
401405 | None -> "Bad date: The literal did not satisfy the date format."
402406 in
403403- Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
404404- value attr_name element_name tz_msg date_msg)
407407+ (* Order depends on error type. The Nu validator has specific patterns:
408408+ - Time hour/minute errors (not timezone) -> datetime first
409409+ - Timezone hours error -> datetime first
410410+ - Timezone minutes error -> date first
411411+ - Time fraction error -> date first
412412+ - Date "less than" error -> date first
413413+ - Date "greater than" error -> datetime first
414414+ - Generic errors both sides -> datetime first *)
415415+ let is_generic_tz = tz_error = "The literal did not satisfy the datetime with timezone format" in
416416+ let is_tz_hours_error = String.length tz_error >= 5 && String.sub tz_error 0 5 = "Hours" in
417417+ let is_tz_minutes_error = String.length tz_error >= 7 && String.sub tz_error 0 7 = "Minutes" in
418418+ let is_time_minute_or_hour_error =
419419+ (try ignore (Str.search_forward (Str.regexp "Minute cannot\\|Hour cannot") tz_error 0); true with Not_found -> false)
420420+ in
421421+ let is_fraction_error = try ignore (Str.search_forward (Str.regexp "fraction") tz_error 0); true with Not_found -> false in
422422+ let is_month_less_than_error = match date_error with
423423+ | Some e -> (try ignore (Str.search_forward (Str.regexp "Month cannot be less than") e 0); true with Not_found -> false)
424424+ | None -> false
425425+ in
426426+ (* Datetime first for: generic tz, tz hours error, time minute/hour errors, year errors
427427+ Date first for: "Month cannot be less than" date error, tz minutes error, fraction error *)
428428+ if is_month_less_than_error then
429429+ Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
430430+ value attr_name element_name date_msg tz_msg)
431431+ else if is_tz_minutes_error || is_fraction_error then
432432+ Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
433433+ value attr_name element_name date_msg tz_msg)
434434+ else if is_tz_hours_error || is_time_minute_or_hour_error || is_generic_tz then
435435+ Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
436436+ value attr_name element_name tz_msg date_msg)
437437+ else
438438+ Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
439439+ value attr_name element_name tz_msg date_msg)
405440 end
406441407442(** Checker state *)
+58-27
lib/html5_checker/specialized/dl_checker.ml
···88 mutable contains_div : bool;
99 mutable contains_dt_dd : bool;
1010 mutable dd_before_dt_error_reported : bool; (* Track if we've reported dd-before-dt error *)
1111+ mutable has_template : bool; (* Track if template element was seen inside dl *)
1112}
12131314type div_context = {
···7273 else begin
7374 match name_lower with
7475 | "template" ->
7575- state.in_template <- state.in_template + 1
7676+ state.in_template <- state.in_template + 1;
7777+ (* Track if template is direct child of dl *)
7878+ begin match current_dl state with
7979+ | Some dl_ctx when state.div_in_dl_stack = [] ->
8080+ dl_ctx.has_template <- true
8181+ | _ -> ()
8282+ end
76837784 | "dl" when state.in_template = 0 ->
7878- (* Check for nested dl - only error if direct child (not inside dt/dd) *)
7979- begin match current_dl state with
8080- | Some _ when state.in_dt_dd = 0 && state.div_in_dl_stack = [] ->
8585+ (* Check for nested dl - error if direct child of dl OR inside div-in-dl *)
8686+ begin match current_div state with
8787+ | Some _ ->
8888+ (* dl inside div-in-dl is not allowed *)
8189 Message_collector.add_error collector
8282- ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdl\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
9090+ ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdiv\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
8391 ~code:"disallowed-child"
8492 ~element:"dl" ()
8585- | _ -> ()
9393+ | None ->
9494+ match current_dl state with
9595+ | Some _ when state.in_dt_dd = 0 ->
9696+ Message_collector.add_error collector
9797+ ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdl\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
9898+ ~code:"disallowed-child"
9999+ ~element:"dl" ()
100100+ | _ -> ()
86101 end;
87102 let ctx = {
88103 has_dt = false;
···91106 contains_div = false;
92107 contains_dt_dd = false;
93108 dd_before_dt_error_reported = false;
109109+ has_template = false;
94110 } in
95111 state.dl_stack <- ctx :: state.dl_stack
96112···131147 state.in_dt_dd <- state.in_dt_dd + 1;
132148 begin match current_div state with
133149 | Some div_ctx ->
134134- div_ctx.has_dt <- true;
135135- (* If we've seen dd, this dt starts a new group *)
150150+ (* If we've already seen dd, this dt starts a new group - which is not allowed *)
136151 if div_ctx.in_dd_part then begin
152152+ Message_collector.add_error collector
153153+ ~message:"Element \xe2\x80\x9cdt\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdiv\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
154154+ ~code:"disallowed-child"
155155+ ~element:"dt" ();
137156 div_ctx.group_count <- div_ctx.group_count + 1;
138157 div_ctx.in_dd_part <- false
139139- end
158158+ end;
159159+ div_ctx.has_dt <- true
140160 | None ->
141161 match current_dl state with
142162 | Some dl_ctx ->
···236256 ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing a required child element."
237257 ~code:"missing-required-child"
238258 ~element:"dl" ()
239239- else if not ctx.has_dd then
240240- Message_collector.add_error collector
241241- ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
242242- ~code:"missing-required-child"
243243- ~element:"dl" ()
259259+ else if not ctx.has_dd then begin
260260+ (* If template is present in dl, use list format; otherwise use simple format *)
261261+ if ctx.has_template then
262262+ Message_collector.add_error collector
263263+ ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing one or more of the following child elements: [dd]."
264264+ ~code:"missing-required-child"
265265+ ~element:"dl" ()
266266+ else
267267+ Message_collector.add_error collector
268268+ ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
269269+ ~code:"missing-required-child"
270270+ ~element:"dl" ()
271271+ end
244272 else if ctx.last_was_dt then
245245- (* Ended with dt, missing dd *)
273273+ (* Ended with dt, missing dd for the last group *)
246274 Message_collector.add_error collector
247275 ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
248276 ~code:"missing-required-child"
···274302 ~message:"Element \xe2\x80\x9cdiv\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
275303 ~code:"missing-required-child"
276304 ~element:"div" ()
277277- else if div_ctx.group_count > 1 then
278278- (* Multiple name-value groups in a single div is not allowed *)
279279- Message_collector.add_error collector
280280- ~message:"A child \xe2\x80\x9cdiv\xe2\x80\x9d element of a \xe2\x80\x9cdl\xe2\x80\x9d element must contain only one name-value group."
281281- ~code:"multiple-groups-in-div"
282282- ~element:"div" ()
305305+ (* Multiple groups error is now reported inline when dt appears after dd *)
283306 | [] -> ()
284307 end
285308···292315 else begin
293316 let trimmed = String.trim text in
294317 if trimmed <> "" then begin
295295- (* Check for text directly in dl *)
296296- match current_dl state with
297297- | Some _ when state.div_in_dl_stack = [] ->
318318+ (* Check for text directly in dl or div-in-dl *)
319319+ match current_div state with
320320+ | Some _ ->
321321+ (* Text in div within dl is not allowed *)
298322 Message_collector.add_error collector
299299- ~message:"Text not allowed in element \xe2\x80\x9cdl\xe2\x80\x9d in this context."
323323+ ~message:"Text not allowed in element \xe2\x80\x9cdiv\xe2\x80\x9d in this context."
300324 ~code:"text-not-allowed"
301301- ~element:"dl" ()
302302- | _ -> ()
325325+ ~element:"div" ()
326326+ | None ->
327327+ match current_dl state with
328328+ | Some _ ->
329329+ Message_collector.add_error collector
330330+ ~message:"Text not allowed in element \xe2\x80\x9cdl\xe2\x80\x9d in this context."
331331+ ~code:"text-not-allowed"
332332+ ~element:"dl" ()
333333+ | None -> ()
303334 end
304335 end
305336
···6868 String.contains s ':'
69697070(** Validate that a URL is a valid absolute URL for itemtype/itemid.
7171- Uses the comprehensive URL validation from Url_checker. *)
7272-let validate_microdata_url url element attr_name =
7171+ Uses the comprehensive URL validation from Url_checker.
7272+ original_value is the full attribute value (for error messages when split by whitespace) *)
7373+let validate_microdata_url url element attr_name original_value =
7374 let url_trimmed = String.trim url in
7475 if String.length url_trimmed = 0 then
7576 Some (Printf.sprintf
7677 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: Must be non-empty."
7777- url attr_name element)
7878+ original_value attr_name element)
7879 else
7980 (* First check if it has a scheme (required for absolute URL) *)
8081 match Url_checker.extract_scheme url_trimmed with
8182 | None ->
8283 Some (Printf.sprintf
8384 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: The string \xe2\x80\x9c%s\xe2\x80\x9d is not an absolute URL."
8484- url attr_name element url)
8585+ original_value attr_name element url)
8586 | Some _ ->
8687 (* Has a scheme - do comprehensive URL validation *)
8788 match Url_checker.validate_url url element attr_name with
···8990 | Some error_msg ->
9091 (* Replace "Bad URL:" with "Bad absolute URL:" for microdata *)
9192 let error_msg = Str.global_replace (Str.regexp "Bad URL:") "Bad absolute URL:" error_msg in
9393+ (* Also replace the URL value with the original value in case they differ *)
9494+ (* Escape backslashes in replacement string for Str.global_replace *)
9595+ let escaped_original = Str.global_replace (Str.regexp "\\\\") "\\\\\\\\" original_value in
9696+ let error_msg = Str.global_replace
9797+ (Str.regexp_string (Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" url))
9898+ (Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" escaped_original)
9999+ error_msg in
92100 Some error_msg
9310194102(** Check if itemprop value is valid. *)
···201209 ()
202210 else
203211 List.iter (fun url ->
204204- match validate_microdata_url url element "itemtype" with
212212+ match validate_microdata_url url element "itemtype" itemtype with
205213 | None -> ()
206214 | Some error_msg ->
207215 Message_collector.add_error collector
+60-13
lib/html5_checker/specialized/picture_checker.ml
···3434 mutable has_source_after_img : bool;
3535 mutable has_always_matching_source : bool; (* source without media/type *)
3636 mutable source_after_always_matching : bool; (* source after always-matching source *)
3737+ mutable always_matching_is_media_all : bool; (* true if caused by media="all" *)
3838+ mutable always_matching_is_media_empty : bool; (* true if caused by media="" or whitespace *)
3739 mutable parent_stack : string list; (* track parent elements *)
3840}
3941···4648 has_source_after_img = false;
4749 has_always_matching_source = false;
4850 source_after_always_matching = false;
5151+ always_matching_is_media_all = false;
5252+ always_matching_is_media_empty = false;
4953 parent_stack = [];
5054}
5155···5862 state.parent_stack <- [];
5963 state.has_source_after_img <- false;
6064 state.has_always_matching_source <- false;
6161- state.source_after_always_matching <- false
6565+ state.source_after_always_matching <- false;
6666+ state.always_matching_is_media_all <- false;
6767+ state.always_matching_is_media_empty <- false
62686369(** Check if an attribute list contains a specific attribute. *)
6470let has_attr name attrs =
···151157 if String.lowercase_ascii attr_name = "media" then Some v else None
152158 ) attrs in
153159 let has_type = has_attr "type" attrs in
160160+ let is_media_all = match media_value with
161161+ | Some v -> String.lowercase_ascii (String.trim v) = "all"
162162+ | None -> false
163163+ in
164164+ let is_media_empty = match media_value with
165165+ | Some v -> String.trim v = ""
166166+ | None -> false
167167+ in
154168 let is_always_matching = match media_value with
155169 | None -> not has_type (* no media, check if no type either *)
156170 | Some v ->
157171 let trimmed = String.trim v in
158172 trimmed = "" || String.lowercase_ascii trimmed = "all"
159173 in
160160- if is_always_matching then
161161- state.has_always_matching_source <- true
174174+ if is_always_matching then begin
175175+ state.has_always_matching_source <- true;
176176+ if is_media_all then
177177+ state.always_matching_is_media_all <- true
178178+ else if is_media_empty then
179179+ state.always_matching_is_media_empty <- true
180180+ end
162181163182 | "img" when state.in_picture && state.picture_depth = 1 ->
164183 check_img_attrs attrs collector;
···170189 if img_count > 1 then
171190 report_disallowed_child "picture" "img" collector;
172191 (* Check if always-matching source is followed by img with srcset *)
173173- if state.has_always_matching_source && has_attr "srcset" attrs then
174174- Message_collector.add_error collector
175175- ~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that has a following sibling \xe2\x80\x9csource\xe2\x80\x9d element or \xe2\x80\x9cimg\xe2\x80\x9d element with a \xe2\x80\x9csrcset\xe2\x80\x9d attribute must have a \xe2\x80\x9cmedia\xe2\x80\x9d attribute and/or \xe2\x80\x9ctype\xe2\x80\x9d attribute."
176176- ~code:"always-matching-source-followed-by-srcset"
177177- ~element:"source" ()
192192+ if state.has_always_matching_source && has_attr "srcset" attrs then begin
193193+ if state.always_matching_is_media_all then
194194+ Message_collector.add_error collector
195195+ ~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be \xe2\x80\x9call\xe2\x80\x9d."
196196+ ~code:"media-all-not-allowed"
197197+ ~element:"source"
198198+ ~attribute:"media" ()
199199+ else if state.always_matching_is_media_empty then
200200+ Message_collector.add_error collector
201201+ ~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be empty."
202202+ ~code:"media-empty-not-allowed"
203203+ ~element:"source"
204204+ ~attribute:"media" ()
205205+ else
206206+ Message_collector.add_error collector
207207+ ~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that has a following sibling \xe2\x80\x9csource\xe2\x80\x9d element or \xe2\x80\x9cimg\xe2\x80\x9d element with a \xe2\x80\x9csrcset\xe2\x80\x9d attribute must have a \xe2\x80\x9cmedia\xe2\x80\x9d attribute and/or \xe2\x80\x9ctype\xe2\x80\x9d attribute."
208208+ ~code:"always-matching-source-followed-by-srcset"
209209+ ~element:"source" ()
210210+ end
178211179212 | "script" when state.in_picture && state.picture_depth = 1 ->
180213 state.children_in_picture <- "script" :: state.children_in_picture
···216249 if state.has_source_after_img then
217250 report_disallowed_child "picture" "source" collector;
218251 (* Check for source after always-matching source *)
219219- if state.source_after_always_matching then
220220- Message_collector.add_error collector
221221- ~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that matches all media types cannot be followed by another \xe2\x80\x9csource\xe2\x80\x9d element."
222222- ~code:"always-matching-source"
223223- ~element:"source" ();
252252+ if state.source_after_always_matching then begin
253253+ if state.always_matching_is_media_all then
254254+ Message_collector.add_error collector
255255+ ~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be \xe2\x80\x9call\xe2\x80\x9d."
256256+ ~code:"media-all-not-allowed"
257257+ ~element:"source"
258258+ ~attribute:"media" ()
259259+ else if state.always_matching_is_media_empty then
260260+ Message_collector.add_error collector
261261+ ~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be empty."
262262+ ~code:"media-empty-not-allowed"
263263+ ~element:"source"
264264+ ~attribute:"media" ()
265265+ else
266266+ Message_collector.add_error collector
267267+ ~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that has a following sibling \xe2\x80\x9csource\xe2\x80\x9d element or \xe2\x80\x9cimg\xe2\x80\x9d element with a \xe2\x80\x9csrcset\xe2\x80\x9d attribute must have a \xe2\x80\x9cmedia\xe2\x80\x9d attribute and/or \xe2\x80\x9ctype\xe2\x80\x9d attribute."
268268+ ~code:"always-matching-source"
269269+ ~element:"source" ()
270270+ end;
224271225272 state.in_picture <- false
226273 end;
···5454 Buffer.contents buf
55555656(** Check if a size value has a valid CSS length unit and non-negative value *)
5757-type size_check_result = Valid | InvalidUnit | NegativeValue | CssCommentInside | BadScientificNotation
5757+type size_check_result =
5858+ | Valid
5959+ | InvalidUnit of string * string (* (found_unit, context) *)
6060+ | NegativeValue
6161+ | CssCommentAfterSign of string * string (* what was found, context *)
6262+ | CssCommentBeforeUnit of string * string (* what was found, context *)
6363+ | BadScientificNotation
6464+ | BadCssNumber of char * string (* (first_char, context) - not starting with digit or minus *)
6565+6666+(** CSS comment error types *)
6767+type css_comment_error =
6868+ | NoCommentError
6969+ | CommentAfterSign of string * string (* what was found, context *)
7070+ | CommentBetweenNumberAndUnit of string * string (* what was found at comment position, context *)
58715972(** Check if CSS comment appears in an invalid position:
6073 - Between sign and number (+/**/50vw)
6174 - Between number and unit (50/**/vw)
6275 Trailing comments (50vw/**/) are valid. *)
6363-let has_invalid_css_comment s =
7676+let check_css_comment_position s =
6477 let len = String.length s in
6578 (* Find comment position *)
6679 let rec find_comment i =
···6982 else find_comment (i + 1)
7083 in
7184 match find_comment 0 with
7272- | None -> false
8585+ | None -> NoCommentError
7386 | Some comment_pos ->
7487 let before = String.sub s 0 comment_pos in
7588 let trimmed_before = String.trim before in
7676- if String.length trimmed_before = 0 then false (* Leading comment is OK *)
8989+ if String.length trimmed_before = 0 then NoCommentError (* Leading comment is OK *)
7790 else begin
7891 (* Find end of comment *)
7992 let rec find_end i =
···8497 let end_pos = find_end (comment_pos + 2) in
8598 let after = if end_pos < len then String.sub s end_pos (len - end_pos) else "" in
8699 let trimmed_after = String.trim (strip_css_comments after) in
8787- if trimmed_after = "" then false (* Trailing comment is OK *)
100100+ if trimmed_after = "" then NoCommentError (* Trailing comment is OK *)
88101 else begin
89102 (* Comment is in the middle - check if it breaks a number/unit combo *)
90103 let last = trimmed_before.[String.length trimmed_before - 1] in
9191- (* Invalid if comment appears after +/- or after a digit (before more non-whitespace) *)
9292- (last >= '0' && last <= '9') || last = '+' || last = '-' || last = '.'
104104+ (* What's at the comment position? Just show "/" *)
105105+ let slash = "/" in
106106+ (* Invalid if comment appears after +/- *)
107107+ if last = '+' || last = '-' then
108108+ CommentAfterSign (trimmed_before ^ slash, s)
109109+ (* Invalid if comment appears after digit (before more content) *)
110110+ else if (last >= '0' && last <= '9') || last = '.' then
111111+ CommentBetweenNumberAndUnit (slash ^ trimmed_after, s)
112112+ else
113113+ NoCommentError
93114 end
94115 end
116116+117117+(** For backward compatibility *)
118118+let has_invalid_css_comment s =
119119+ match check_css_comment_position s with
120120+ | NoCommentError -> false
121121+ | _ -> true
9512296123(** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *)
97124let has_invalid_scientific_notation s =
···109136 in
110137 String.contains after_sign '.'
111138139139+(** Extract unit from a size value like "10px" -> "px", "100vw" -> "vw", "50%" -> "%"
140140+ Returns the unit with original case preserved *)
141141+let extract_unit s =
142142+ let trimmed = String.trim s in
143143+ let len = String.length trimmed in
144144+ if len = 0 then ""
145145+ (* Check for % at the end *)
146146+ else if trimmed.[len - 1] = '%' then "%"
147147+ else begin
148148+ let lower = String.lowercase_ascii trimmed in
149149+ (* Try to find a unit at the end (letters only) *)
150150+ let rec find_unit_length i =
151151+ if i < 0 then 0
152152+ else if lower.[i] >= 'a' && lower.[i] <= 'z' then find_unit_length (i - 1)
153153+ else i + 1
154154+ in
155155+ let start = find_unit_length (len - 1) in
156156+ if start < len then
157157+ (* Return the unit from the original string (preserving case) *)
158158+ String.sub trimmed start (len - start)
159159+ else ""
160160+ end
161161+112162let check_size_value size_value =
113163 let trimmed = String.trim size_value in
114114- if trimmed = "" then InvalidUnit
115115- (* Check for CSS comments inside numbers - this is invalid *)
116116- else if has_invalid_css_comment trimmed then CssCommentInside
164164+ if trimmed = "" then InvalidUnit ("", trimmed)
117165 else begin
166166+ (* Check for CSS comments inside numbers - this is invalid *)
167167+ match check_css_comment_position trimmed with
168168+ | CommentAfterSign (found, ctx) -> CssCommentAfterSign (found, ctx)
169169+ | CommentBetweenNumberAndUnit (found, ctx) -> CssCommentBeforeUnit (found, ctx)
170170+ | NoCommentError ->
118171 (* Strip valid leading/trailing CSS comments for further checks *)
119172 let value_no_comments = String.trim (strip_css_comments trimmed) in
120173 (* Check for invalid scientific notation like 1e+1.5px *)
121174 if has_invalid_scientific_notation value_no_comments then BadScientificNotation
122175 (* "auto" is only valid with lazy loading, which requires checking the element context.
123176 For general validation, treat "auto" alone as invalid in sizes. *)
124124- else if String.lowercase_ascii value_no_comments = "auto" then InvalidUnit
125125- else if value_no_comments = "" then InvalidUnit
177177+ else if String.lowercase_ascii value_no_comments = "auto" then
178178+ BadCssNumber (value_no_comments.[0], trimmed)
179179+ else if value_no_comments = "" then InvalidUnit ("", trimmed)
126180 else begin
127181 let lower = String.lowercase_ascii value_no_comments in
128128- (* Check for invalid units first *)
129129- let has_invalid = List.exists (fun unit ->
130130- let len = String.length unit in
131131- String.length lower > len &&
132132- String.sub lower (String.length lower - len) len = unit
133133- ) invalid_size_units in
134134- if has_invalid then InvalidUnit
182182+ (* Check for calc() or other CSS functions first - these are always valid *)
183183+ if String.contains value_no_comments '(' then Valid
135184 else begin
136136- (* Check for valid CSS length units *)
137137- let has_valid_unit = List.exists (fun unit ->
138138- let len = String.length unit in
139139- String.length lower > len &&
140140- String.sub lower (String.length lower - len) len = unit
141141- ) valid_length_units in
142142- if has_valid_unit then begin
143143- (* Check if it's negative (starts with - but not -0) *)
144144- if String.length value_no_comments > 0 && value_no_comments.[0] = '-' then begin
145145- (* Check if it's -0 which is valid *)
146146- let after_minus = String.sub value_no_comments 1 (String.length value_no_comments - 1) in
147147- try
148148- let num_str = Str.global_replace (Str.regexp "[a-zA-Z]+$") "" after_minus in
149149- let f = float_of_string num_str in
150150- if f = 0.0 then Valid else NegativeValue
151151- with _ -> NegativeValue
152152- end else
153153- Valid
154154- end
155155- (* Could be calc() or other CSS functions - allow those *)
156156- else if String.contains value_no_comments '(' then Valid
185185+ (* Check if the value starts with a digit, minus, or plus sign *)
186186+ let first_char = value_no_comments.[0] in
187187+ let starts_with_number =
188188+ (first_char >= '0' && first_char <= '9') ||
189189+ first_char = '-' ||
190190+ first_char = '+' ||
191191+ first_char = '.' (* decimal point like .5px *)
192192+ in
193193+ if not starts_with_number then
194194+ (* Not a valid CSS number token - doesn't start with digit or sign *)
195195+ BadCssNumber (first_char, trimmed)
157196 else begin
158158- (* Check if it's a zero value (0, -0, +0) - these are valid without units *)
159159- let stripped =
160160- let s = value_no_comments in
161161- let s = if String.length s > 0 && (s.[0] = '+' || s.[0] = '-') then String.sub s 1 (String.length s - 1) else s in
162162- s
163163- in
164164- (* Check if it's zero or a numeric value starting with 0 *)
165165- try
166166- let f = float_of_string stripped in
167167- if f = 0.0 then Valid else InvalidUnit
168168- with _ -> InvalidUnit
197197+ (* Check for invalid units first *)
198198+ let found_invalid = List.find_opt (fun unit ->
199199+ let len = String.length unit in
200200+ String.length lower > len &&
201201+ String.sub lower (String.length lower - len) len = unit
202202+ ) invalid_size_units in
203203+ match found_invalid with
204204+ | Some _unit -> InvalidUnit (extract_unit value_no_comments, trimmed)
205205+ | None ->
206206+ (* Check for valid CSS length units *)
207207+ let has_valid_unit = List.exists (fun unit ->
208208+ let len = String.length unit in
209209+ String.length lower > len &&
210210+ String.sub lower (String.length lower - len) len = unit
211211+ ) valid_length_units in
212212+ if has_valid_unit then begin
213213+ (* Check if it's negative (starts with - but not -0) *)
214214+ if String.length value_no_comments > 0 && value_no_comments.[0] = '-' then begin
215215+ (* Check if it's -0 which is valid *)
216216+ let after_minus = String.sub value_no_comments 1 (String.length value_no_comments - 1) in
217217+ try
218218+ let num_str = Str.global_replace (Str.regexp "[a-zA-Z]+$") "" after_minus in
219219+ let f = float_of_string num_str in
220220+ if f = 0.0 then Valid else NegativeValue
221221+ with _ -> NegativeValue
222222+ end else
223223+ Valid
224224+ end
225225+ else begin
226226+ (* Check if it's a zero value (0, -0, +0) - these are valid without units *)
227227+ let stripped =
228228+ let s = value_no_comments in
229229+ let s = if String.length s > 0 && (s.[0] = '+' || s.[0] = '-') then String.sub s 1 (String.length s - 1) else s in
230230+ s
231231+ in
232232+ (* Check if it's zero or a numeric value starting with 0 *)
233233+ try
234234+ let f = float_of_string stripped in
235235+ if f = 0.0 then Valid else InvalidUnit (extract_unit value_no_comments, trimmed)
236236+ with _ -> InvalidUnit (extract_unit value_no_comments, trimmed)
237237+ end
169238 end
170239 end
171240 end
···174243let has_valid_size_unit size_value =
175244 match check_size_value size_value with
176245 | Valid -> true
177177- | InvalidUnit | NegativeValue | CssCommentInside | BadScientificNotation -> false
246246+ | InvalidUnit (_, _) | NegativeValue | CssCommentAfterSign (_, _) | CssCommentBeforeUnit (_, _) | BadScientificNotation | BadCssNumber (_, _) -> false
178247179248(** Check if a sizes entry has a media condition (starts with '(') *)
180249let has_media_condition entry =
···236305 if not (has_media_condition trimmed) then
237306 trimmed
238307 else begin
239239- (* Find matching closing paren, then get the size value after it *)
308308+ (* Media conditions can have "and", "or", "not" operators connecting
309309+ multiple parenthesized groups, e.g., "(not (width:500px)) and (width:500px) 500px"
310310+ We need to skip all media condition parts to find the size value *)
240311 let len = String.length trimmed in
241241- let rec find_close_paren i depth =
312312+ let rec skip_media_condition i =
242313 if i >= len then len
243243- else match trimmed.[i] with
244244- | '(' -> find_close_paren (i + 1) (depth + 1)
245245- | ')' -> if depth = 1 then i + 1 else find_close_paren (i + 1) (depth - 1)
246246- | _ -> find_close_paren (i + 1) depth
314314+ else begin
315315+ let remaining = String.trim (String.sub trimmed i (len - i)) in
316316+ let remaining_len = String.length remaining in
317317+ if remaining_len = 0 then len
318318+ else begin
319319+ let first_char = remaining.[0] in
320320+ if first_char = '(' then begin
321321+ (* Skip this parenthesized group *)
322322+ let rec find_close_paren j depth =
323323+ if j >= remaining_len then remaining_len
324324+ else match remaining.[j] with
325325+ | '(' -> find_close_paren (j + 1) (depth + 1)
326326+ | ')' -> if depth = 1 then j + 1 else find_close_paren (j + 1) (depth - 1)
327327+ | _ -> find_close_paren (j + 1) depth
328328+ in
329329+ let after_paren = find_close_paren 0 0 in
330330+ let new_pos = i + (len - i) - remaining_len + after_paren in
331331+ skip_media_condition new_pos
332332+ end
333333+ else begin
334334+ (* Check if remaining starts with "and", "or", "not" followed by space or paren *)
335335+ let lower_remaining = String.lowercase_ascii remaining in
336336+ if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and " then
337337+ skip_media_condition (i + (len - i) - remaining_len + 4)
338338+ else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or " then
339339+ skip_media_condition (i + (len - i) - remaining_len + 3)
340340+ else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "not " then
341341+ skip_media_condition (i + (len - i) - remaining_len + 4)
342342+ else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and(" then
343343+ skip_media_condition (i + (len - i) - remaining_len + 3)
344344+ else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or(" then
345345+ skip_media_condition (i + (len - i) - remaining_len + 2)
346346+ else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "not(" then
347347+ skip_media_condition (i + (len - i) - remaining_len + 3)
348348+ else
349349+ (* Found something that's not a media condition part - this is the size value *)
350350+ i + (len - i) - remaining_len
351351+ end
352352+ end
353353+ end
247354 in
248248- let after_paren = find_close_paren 0 0 in
249249- if after_paren >= len then ""
250250- else String.trim (String.sub trimmed after_paren (len - after_paren))
355355+ let size_start = skip_media_condition 0 in
356356+ if size_start >= len then ""
357357+ else String.trim (String.sub trimmed size_start (len - size_start))
251358 end
252359253360(** Validate sizes attribute value *)
···275382 (* Check for trailing comma *)
276383 let last_entry = String.trim (List.nth entries (List.length entries - 1)) in
277384 if List.length entries > 1 && last_entry = "" then begin
385385+ (* Generate abbreviated context - show last ~25 chars with ellipsis if needed *)
386386+ let context =
387387+ if String.length value > 25 then
388388+ "\xe2\x80\xa6" ^ String.sub value (String.length value - 25) 25
389389+ else value
390390+ in
278391 Message_collector.add_error collector
279279- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Ends with trailing comma." value element_name)
392392+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)
280393 ~code:"bad-sizes-value"
281394 ~element:element_name ~attribute:"sizes" ();
282395 false
···285398286399 (* Check for default-first pattern: unconditional value before conditional ones *)
287400 let non_empty_entries = List.filter (fun e -> String.trim e <> "") entries in
288288- if List.length non_empty_entries > 1 then begin
289289- let first = List.hd non_empty_entries in
290290- let rest = List.tl non_empty_entries in
401401+ (* Filter out entries that have invalid media conditions - they'll be reported separately *)
402402+ let valid_entries = List.filter (fun e ->
403403+ has_invalid_media_condition (String.trim e) = None
404404+ ) non_empty_entries in
405405+ if List.length valid_entries > 1 then begin
406406+ let first = List.hd valid_entries in
407407+ let rest = List.tl valid_entries in
291408 (* If first entry has no media condition but later ones do, that's invalid *)
292409 if not (has_media_condition first) && List.exists has_media_condition rest then begin
410410+ (* Context is the first entry with a comma *)
411411+ let context = (String.trim first) ^ "," in
293412 Message_collector.add_error collector
294294- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Default size must be last." value element_name)
413413+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)
295414 ~code:"bad-sizes-value"
296415 ~element:element_name ~attribute:"sizes" ();
297416 valid := false
298417 end;
299299- (* Check for multiple consecutive defaults (entries without media conditions) *)
300300- let defaults_without_media = List.filter (fun e -> not (has_media_condition e)) non_empty_entries in
301301- if List.length defaults_without_media > 1 then begin
302302- Message_collector.add_error collector
303303- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Multiple source sizes without media conditions." value element_name)
304304- ~code:"bad-sizes-value"
305305- ~element:element_name ~attribute:"sizes" ();
306306- valid := false
418418+ (* Check for multiple entries without media conditions.
419419+ When the first entry has no media condition, report "Expected media condition"
420420+ regardless of whether later entries have media conditions or not *)
421421+ if not (has_media_condition first) && !valid then begin
422422+ (* Only report if we haven't already reported the default-first error *)
423423+ if not (List.exists has_media_condition rest) then begin
424424+ (* Multiple defaults - report as "Expected media condition" *)
425425+ let context = (String.trim first) ^ "," in
426426+ Message_collector.add_error collector
427427+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)
428428+ ~code:"bad-sizes-value"
429429+ ~element:element_name ~attribute:"sizes" ();
430430+ valid := false
431431+ end
307432 end
308433 end;
309434310435 (* Validate each entry's media condition and size value *)
311311- List.iter (fun entry ->
436436+ let num_entries = List.length entries in
437437+ List.iteri (fun idx entry ->
312438 let trimmed = String.trim entry in
313439 if trimmed <> "" then begin
314440 (* Check for invalid media condition *)
315441 (match has_invalid_media_condition trimmed with
316442 | Some err_msg ->
443443+ (* Generate context: "entry," with ellipsis if needed *)
444444+ let context = (String.trim entry) ^ "," in
445445+ let context =
446446+ if String.length context > 25 then
447447+ "\xe2\x80\xa6" ^ String.sub context (String.length context - 25) 25
448448+ else context
449449+ in
317450 Message_collector.add_error collector
318318- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: %s." value element_name err_msg)
451451+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: %s at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name err_msg context)
319452 ~code:"bad-sizes-value"
320453 ~element:element_name ~attribute:"sizes" ();
321454 valid := false
···323456324457 let size_val = extract_size_value trimmed in
325458 if size_val <> "" then begin
326326- match check_size_value size_val with
459459+ (* Check if there are multiple space-separated words in the size value.
460460+ Only the first word should be the size, rest is junk. *)
461461+ let size_parts = String.split_on_char ' ' size_val |> List.filter (fun s -> s <> "") in
462462+ let first_size = match size_parts with [] -> size_val | hd :: _ -> hd in
463463+ let extra_parts = match size_parts with [] -> [] | _ :: tl -> tl in
464464+465465+ (* Check if first word looks like it should have been a media condition
466466+ (doesn't start with digit, sign, decimal, '/', or look like a CSS function) *)
467467+ let first_char = if String.length first_size > 0 then first_size.[0] else 'x' in
468468+ let has_paren = String.contains size_val '(' in (* calc(), etc. *)
469469+ let looks_like_junk_entry =
470470+ not (has_media_condition trimmed) &&
471471+ not has_paren && (* Allow CSS functions like calc() *)
472472+ not (first_char = '/') && (* Allow leading CSS comments *)
473473+ not ((first_char >= '0' && first_char <= '9') ||
474474+ first_char = '+' || first_char = '-' || first_char = '.')
475475+ in
476476+477477+ (* If this entry looks like junk and there are multiple entries,
478478+ report "Expected media condition" instead of "Bad CSS number".
479479+ For single entries with invalid values, fall through to BadCssNumber. *)
480480+ if looks_like_junk_entry && num_entries > 1 then begin
481481+ (* Find the context ending with the previous entry *)
482482+ let prev_entries = List.filter (fun e -> String.trim e <> "" && e <> entry) entries in
483483+ let context =
484484+ if List.length prev_entries > 0 then
485485+ let prev_value = String.concat ", " (List.map String.trim prev_entries) ^ "," in
486486+ if String.length prev_value > 25 then
487487+ "\xe2\x80\xa6" ^ String.sub prev_value (String.length prev_value - 25) 25
488488+ else prev_value
489489+ else value
490490+ in
491491+ Message_collector.add_error collector
492492+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)
493493+ ~code:"bad-sizes-value"
494494+ ~element:element_name ~attribute:"sizes" ();
495495+ valid := false
496496+ end
497497+ (* If there's extra junk after the size, report BadCssNumber error for it *)
498498+ else if extra_parts <> [] then begin
499499+ let junk = String.concat " " extra_parts in
500500+ let last_junk = List.nth extra_parts (List.length extra_parts - 1) in
501501+ let first_char = if String.length last_junk > 0 then last_junk.[0] else 'x' in
502502+ (* Context depends on whether this is the last entry:
503503+ - For non-last entries: entry with trailing comma, truncated from beginning
504504+ - For last entry: full value truncated from beginning (no trailing comma) *)
505505+ let is_last_entry = idx = num_entries - 1 in
506506+ let context =
507507+ if is_last_entry then begin
508508+ (* Last entry: use full value truncated *)
509509+ if String.length value > 25 then
510510+ "\xe2\x80\xa6" ^ String.sub value (String.length value - 25) 25
511511+ else value
512512+ end else begin
513513+ (* Non-last entry: use entry with comma, truncated *)
514514+ let entry_with_comma = trimmed ^ "," in
515515+ if String.length entry_with_comma > 25 then
516516+ "\xe2\x80\xa6" ^ String.sub entry_with_comma (String.length entry_with_comma - 25) 25
517517+ else entry_with_comma
518518+ end
519519+ in
520520+ let _ = junk in
521521+ Message_collector.add_error collector
522522+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a minus sign or a digit but saw \xe2\x80\x9c%c\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name first_char context)
523523+ ~code:"bad-sizes-value"
524524+ ~element:element_name ~attribute:"sizes" ();
525525+ valid := false
526526+ end
527527+ else
528528+ match check_size_value first_size with
327529 | Valid -> ()
328530 | NegativeValue ->
531531+ let full_context =
532532+ if List.length entries > 1 then size_val ^ ","
533533+ else size_val
534534+ in
535535+ let _ = full_context in
329536 Message_collector.add_error collector
330330- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Source size value cannot be negative." value element_name)
537537+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected positive size value but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name size_val size_val)
538538+ ~code:"bad-sizes-value"
539539+ ~element:element_name ~attribute:"sizes" ();
540540+ valid := false
541541+ | CssCommentAfterSign (found, context) ->
542542+ (* e.g., +/**/50vw - expected number after sign *)
543543+ Message_collector.add_error collector
544544+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected number but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name found context)
331545 ~code:"bad-sizes-value"
332546 ~element:element_name ~attribute:"sizes" ();
333547 valid := false
334334- | CssCommentInside ->
548548+ | CssCommentBeforeUnit (found, context) ->
549549+ (* e.g., 50/**/vw - expected units after number *)
550550+ let units_list = List.map (fun u -> Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" u) valid_length_units in
551551+ let units_str = String.concat ", " units_list in
335552 Message_collector.add_error collector
336336- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token." value element_name)
553553+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected units (one of %s) but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name units_str found context)
337554 ~code:"bad-sizes-value"
338555 ~element:element_name ~attribute:"sizes" ();
339556 valid := false
340557 | BadScientificNotation ->
558558+ (* For scientific notation with bad exponent, show what char was expected vs found *)
559559+ let context =
560560+ if List.length entries > 1 then trimmed ^ ","
561561+ else trimmed
562562+ in
563563+ (* Find the period in the exponent *)
564564+ let _ = context in
341565 Message_collector.add_error collector
342342- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token." value element_name)
566566+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a digit but saw \xe2\x80\x9c.\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name size_val)
343567 ~code:"bad-sizes-value"
344568 ~element:element_name ~attribute:"sizes" ();
345569 valid := false
346346- | InvalidUnit ->
570570+ | BadCssNumber (first_char, context) ->
571571+ (* Value doesn't start with a digit or minus sign *)
572572+ let full_context =
573573+ if List.length entries > 1 then context ^ ","
574574+ else context
575575+ in
576576+ let _ = full_context in
577577+ Message_collector.add_error collector
578578+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a minus sign or a digit but saw \xe2\x80\x9c%c\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name first_char context)
579579+ ~code:"bad-sizes-value"
580580+ ~element:element_name ~attribute:"sizes" ();
581581+ valid := false
582582+ | InvalidUnit (found_unit, _context) ->
583583+ (* Generate the full list of expected units *)
584584+ let units_list = List.map (fun u -> Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" u) valid_length_units in
585585+ let units_str = String.concat ", " units_list in
586586+ (* Context should be the full entry, with comma only if there are multiple entries *)
587587+ let full_context =
588588+ if List.length entries > 1 then trimmed ^ ","
589589+ else trimmed
590590+ in
591591+ (* When found_unit is empty, say "no units" instead of quoting empty string *)
592592+ let found_str =
593593+ if found_unit = "" then "no units"
594594+ else Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" found_unit
595595+ in
347596 Message_collector.add_error collector
348348- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size value." value element_name)
597597+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected units (one of %s) but found %s at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name units_str found_str full_context)
349598 ~code:"bad-sizes-value"
350599 ~element:element_name ~attribute:"sizes" ();
351600 valid := false
···359608 end
360609361610(** Validate srcset descriptor *)
362362-let validate_srcset_descriptor desc element_name srcset_value collector =
611611+let validate_srcset_descriptor desc element_name srcset_value has_sizes collector =
363612 let desc_lower = String.lowercase_ascii (String.trim desc) in
364613 if String.length desc_lower = 0 then true
365614 else begin
···371620 (* Width descriptor - must be positive integer, no leading + *)
372621 let trimmed_desc = String.trim desc in
373622 if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin
623623+ (* Show just the number part (without the 'w') *)
624624+ let num_part_for_msg = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
374625 Message_collector.add_error collector
375375- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name trimmed_desc srcset_value)
626626+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part_for_msg srcset_value)
376627 ~code:"bad-srcset-value"
377628 ~element:element_name ~attribute:"srcset" ();
378629 false
···381632 let n = int_of_string num_part in
382633 if n <= 0 then begin
383634 Message_collector.add_error collector
384384- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Width must be positive." srcset_value element_name)
635635+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value)
385636 ~code:"bad-srcset-value"
386637 ~element:element_name ~attribute:"srcset" ();
387638 false
···390641 let original_last = desc.[String.length desc - 1] in
391642 if original_last = 'W' then begin
392643 Message_collector.add_error collector
393393- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Width descriptor must use lowercase \xe2\x80\x9cw\xe2\x80\x9d." srcset_value element_name)
644644+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" srcset_value element_name desc srcset_value)
394645 ~code:"bad-srcset-value"
395646 ~element:element_name ~attribute:"srcset" ();
396647 false
397648 end else true
398649 end
399650 with _ ->
400400- (* Check for scientific notation or decimal *)
401401- if String.contains num_part 'e' || String.contains num_part 'E' then begin
651651+ (* Check for scientific notation, decimal, or other non-integer values *)
652652+ if String.contains num_part 'e' || String.contains num_part 'E' || String.contains num_part '.' then begin
402653 Message_collector.add_error collector
403403- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Scientific notation not allowed in width descriptor." srcset_value element_name)
654654+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected integer but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value)
404655 ~code:"bad-srcset-value"
405656 ~element:element_name ~attribute:"srcset" ();
406657 false
···415666 (* Pixel density descriptor - must be positive number, no leading + *)
416667 let trimmed_desc = String.trim desc in
417668 if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin
669669+ (* Extract the number part including the plus sign *)
670670+ let num_with_plus = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
418671 Message_collector.add_error collector
419419- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Leading plus sign not allowed." srcset_value element_name)
672672+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_with_plus srcset_value)
420673 ~code:"bad-srcset-value"
421674 ~element:element_name ~attribute:"srcset" ();
422675 false
···424677 (try
425678 let n = float_of_string num_part in
426679 if Float.is_nan n then begin
680680+ (* NaN is not a valid float - report as parse error with first char from ORIGINAL desc *)
681681+ let trimmed_desc = String.trim desc in
682682+ let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
683683+ let first_char = if String.length orig_num_part > 0 then String.make 1 orig_num_part.[0] else "" in
427684 Message_collector.add_error collector
428428- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: NaN not allowed." srcset_value element_name)
685685+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Expected a digit but saw \xe2\x80\x9c%s\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name first_char srcset_value)
429686 ~code:"bad-srcset-value"
430687 ~element:element_name ~attribute:"srcset" ();
431688 false
432432- end else if n <= 0.0 then begin
689689+ end else if n = 0.0 then begin
690690+ (* Check if it's -0 (starts with minus) - report as "greater than zero" error *)
691691+ let trimmed_desc = String.trim desc in
692692+ let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
693693+ if String.length orig_num_part > 0 && orig_num_part.[0] = '-' then begin
694694+ Message_collector.add_error collector
695695+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name orig_num_part srcset_value)
696696+ ~code:"bad-srcset-value"
697697+ ~element:element_name ~attribute:"srcset" ()
698698+ end else begin
699699+ Message_collector.add_error collector
700700+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Zero is not a valid positive floating point number at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name srcset_value)
701701+ ~code:"bad-srcset-value"
702702+ ~element:element_name ~attribute:"srcset" ()
703703+ end;
704704+ false
705705+ end else if n < 0.0 then begin
433706 Message_collector.add_error collector
434434- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Density must be positive." srcset_value element_name)
707707+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value)
435708 ~code:"bad-srcset-value"
436709 ~element:element_name ~attribute:"srcset" ();
437710 false
438711 end else if n = neg_infinity || n = infinity then begin
712712+ (* Infinity is not a valid float - report as parse error with first char from ORIGINAL desc *)
713713+ let trimmed_desc = String.trim desc in
714714+ let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
715715+ let first_char = if String.length orig_num_part > 0 then String.make 1 orig_num_part.[0] else "" in
439716 Message_collector.add_error collector
440440- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Infinity not allowed." srcset_value element_name)
717717+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Expected a digit but saw \xe2\x80\x9c%s\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name first_char srcset_value)
441718 ~code:"bad-srcset-value"
442719 ~element:element_name ~attribute:"srcset" ();
443720 false
···451728 end
452729 | 'h' ->
453730 (* Height descriptor - not allowed *)
454454- Message_collector.add_error collector
455455- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Height descriptor \xe2\x80\x9ch\xe2\x80\x9d is not allowed." srcset_value element_name)
456456- ~code:"bad-srcset-value"
457457- ~element:element_name ~attribute:"srcset" ();
731731+ let trimmed_desc = String.trim desc in
732732+ (* Generate context: find where this entry appears *)
733733+ let context =
734734+ try
735735+ let pos = Str.search_forward (Str.regexp_string trimmed_desc) srcset_value 0 in
736736+ (* Get the entry context ending with comma *)
737737+ let search_from = max 0 (pos - 3) in
738738+ let comma_pos = try Str.search_forward (Str.regexp_string ",") srcset_value pos with Not_found -> String.length srcset_value - 1 in
739739+ let end_pos = min (comma_pos + 1) (String.length srcset_value) in
740740+ let len = end_pos - search_from in
741741+ if len > 0 then String.trim (String.sub srcset_value search_from len) else srcset_value
742742+ with Not_found | Invalid_argument _ -> srcset_value
743743+ in
744744+ if has_sizes then
745745+ Message_collector.add_error collector
746746+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" srcset_value element_name trimmed_desc context)
747747+ ~code:"bad-srcset-value"
748748+ ~element:element_name ~attribute:"srcset" ()
749749+ else
750750+ Message_collector.add_error collector
751751+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Height descriptor \xe2\x80\x9ch\xe2\x80\x9d is not allowed." srcset_value element_name)
752752+ ~code:"bad-srcset-value"
753753+ ~element:element_name ~attribute:"srcset" ();
458754 false
459755 | _ ->
460460- (* Unknown descriptor *)
756756+ (* Unknown descriptor - find context in srcset_value *)
757757+ let trimmed_desc = String.trim desc in
758758+ (* Try to find the context: find where this descriptor appears in srcset_value *)
759759+ let context =
760760+ try
761761+ let pos = Str.search_forward (Str.regexp_string trimmed_desc) srcset_value 0 in
762762+ (* Get the context up to and including the descriptor and the comma after *)
763763+ let end_pos = min (pos + String.length trimmed_desc + 1) (String.length srcset_value) in
764764+ let start_pos = max 0 (pos - 2) in
765765+ String.trim (String.sub srcset_value start_pos (end_pos - start_pos))
766766+ with Not_found -> srcset_value
767767+ in
461768 Message_collector.add_error collector
462462- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor." srcset_value element_name)
769769+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number followed by \xe2\x80\x9cw\xe2\x80\x9d or \xe2\x80\x9cx\xe2\x80\x9d but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name trimmed_desc context)
463770 ~code:"bad-srcset-value"
464771 ~element:element_name ~attribute:"srcset" ();
465772 false
···489796 let entries = String.split_on_char ',' value in
490797 let has_w_descriptor = ref false in
491798 let has_x_descriptor = ref false in
492492- let has_no_descriptor = ref false in (* Track if any entry has no descriptor *)
493493- let seen_descriptors = Hashtbl.create 8 in (* Track seen descriptor values *)
799799+ let no_descriptor_url = ref None in (* Track URL of first entry without width descriptor *)
800800+ let x_with_sizes_error_reported = ref false in (* Track if we already reported x-with-sizes error *)
801801+ let seen_descriptors = Hashtbl.create 8 in (* Track seen descriptor values -> first URL *)
494802495803 (* Check for empty srcset *)
496804 if String.trim value = "" then begin
497805 Message_collector.add_error collector
498498- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Must not be empty." value element_name)
806806+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Must contain one or more image candidate strings." value element_name)
499807 ~code:"bad-srcset-value"
500808 ~element:element_name ~attribute:"srcset" ()
501809 end;
···503811 (* Check for leading comma *)
504812 if String.length value > 0 && value.[0] = ',' then begin
505813 Message_collector.add_error collector
506506- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Leading comma." value element_name)
814814+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Starts with empty image-candidate string." value element_name)
507815 ~code:"bad-srcset-value"
508816 ~element:element_name ~attribute:"srcset" ()
509817 end;
510818511511- (* Check for trailing comma *)
819819+ (* Check for trailing comma(s) / empty entries *)
512820 let trimmed_value = String.trim value in
513821 if String.length trimmed_value > 0 && trimmed_value.[String.length trimmed_value - 1] = ',' then begin
514514- Message_collector.add_error collector
515515- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Trailing comma." value element_name)
516516- ~code:"bad-srcset-value"
517517- ~element:element_name ~attribute:"srcset" ()
822822+ (* Count consecutive trailing commas *)
823823+ let rec count_trailing_commas s idx count =
824824+ if idx < 0 then count
825825+ else if s.[idx] = ',' then count_trailing_commas s (idx - 1) (count + 1)
826826+ else if s.[idx] = ' ' || s.[idx] = '\t' then count_trailing_commas s (idx - 1) count
827827+ else count
828828+ in
829829+ let trailing_commas = count_trailing_commas trimmed_value (String.length trimmed_value - 1) 0 in
830830+ if trailing_commas > 1 then
831831+ (* Multiple trailing commas: "Empty image-candidate string at" *)
832832+ Message_collector.add_error collector
833833+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Empty image-candidate string at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name value)
834834+ ~code:"bad-srcset-value"
835835+ ~element:element_name ~attribute:"srcset" ()
836836+ else
837837+ (* Single trailing comma: "Ends with empty image-candidate string." *)
838838+ Message_collector.add_error collector
839839+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Ends with empty image-candidate string." value element_name)
840840+ ~code:"bad-srcset-value"
841841+ ~element:element_name ~attribute:"srcset" ()
518842 end;
519843520844 List.iter (fun entry ->
···532856 let scheme_colon = scheme ^ ":" in
533857 if url_lower = scheme_colon then
534858 Message_collector.add_error collector
535535- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Expected a slash (\"/\")." value element_name)
859859+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad image-candidate URL: \xe2\x80\x9c%s\xe2\x80\x9d: Expected a slash (\"/\")." value element_name url)
536860 ~code:"bad-srcset-url"
537861 ~element:element_name ~attribute:"srcset" ()
538862 ) special_schemes
···542866 | [url] ->
543867 check_srcset_url url;
544868 (* URL only = implicit 1x descriptor - only flag if explicit 1x also seen *)
545545- has_no_descriptor := true;
546546- if Hashtbl.mem seen_descriptors "explicit-1x" then begin
869869+ if !no_descriptor_url = None then no_descriptor_url := Some url;
870870+ begin match Hashtbl.find_opt seen_descriptors "explicit-1x" with
871871+ | Some first_url ->
547872 Message_collector.add_error collector
548548- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name)
873873+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Density for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to density for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name url first_url)
549874 ~code:"bad-srcset-value"
550875 ~element:element_name ~attribute:"srcset" ()
551551- end else
552552- Hashtbl.add seen_descriptors "implicit-1x" true
876876+ | None ->
877877+ Hashtbl.add seen_descriptors "implicit-1x" url
878878+ end
553879 | url :: desc :: rest ->
554880 (* Check URL for broken schemes *)
555881 check_srcset_url url;
556882 (* Check for extra junk - multiple descriptors are not allowed *)
557883 if rest <> [] then begin
884884+ let extra_desc = List.hd rest in
558885 Message_collector.add_error collector
559559- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Multiple descriptors in candidate." value element_name)
886886+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected single descriptor but found extraneous descriptor \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name extra_desc value)
560887 ~code:"bad-srcset-value"
561888 ~element:element_name ~attribute:"srcset" ()
562889 end;
···565892 if String.length desc_lower > 0 then begin
566893 let last_char = desc_lower.[String.length desc_lower - 1] in
567894 if last_char = 'w' then has_w_descriptor := true
568568- else if last_char = 'x' then has_x_descriptor := true;
895895+ else if last_char = 'x' then begin
896896+ has_x_descriptor := true;
897897+ (* If sizes is present and we have an x descriptor, generate detailed error *)
898898+ if has_sizes && not !x_with_sizes_error_reported then begin
899899+ x_with_sizes_error_reported := true;
900900+ (* Build context:
901901+ - If entry has extra parts (multiple descriptors): show "url descriptor "
902902+ - Else if entry has trailing comma: show "url descriptor,"
903903+ - Else (last entry, no extra parts): show full srcset value *)
904904+ let trimmed_url = String.trim url in
905905+ let trimmed_desc = String.trim desc in
906906+ let entry_context =
907907+ if rest <> [] then
908908+ (* Entry has multiple descriptors - show URL + first descriptor + space *)
909909+ trimmed_url ^ " " ^ trimmed_desc ^ " "
910910+ else
911911+ (* Check if entry ends with comma in original value *)
912912+ let trimmed_entry = String.trim entry in
913913+ try
914914+ let entry_start = Str.search_forward (Str.regexp_string trimmed_url) value 0 in
915915+ let entry_end = entry_start + String.length trimmed_entry in
916916+ let has_trailing_comma = entry_end < String.length value && value.[entry_end] = ',' in
917917+ if has_trailing_comma then
918918+ (* Entry followed by comma - show "url descriptor," *)
919919+ trimmed_url ^ " " ^ trimmed_desc ^ ","
920920+ else
921921+ (* Last entry - show full srcset value *)
922922+ value
923923+ with Not_found ->
924924+ value
925925+ in
926926+ Message_collector.add_error collector
927927+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name trimmed_desc entry_context)
928928+ ~code:"bad-srcset-value"
929929+ ~element:element_name ~attribute:"srcset" ()
930930+ end
931931+ end;
569932570933 (* Check for duplicate descriptors - use normalized form *)
571934 let normalized = normalize_descriptor desc in
572935 let is_1x = (normalized = "1x") in
573573- if Hashtbl.mem seen_descriptors normalized then begin
574574- Message_collector.add_error collector
575575- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name)
576576- ~code:"bad-srcset-value"
577577- ~element:element_name ~attribute:"srcset" ()
578578- end else if is_1x && Hashtbl.mem seen_descriptors "implicit-1x" then begin
579579- (* Explicit 1x conflicts with implicit 1x *)
936936+ let is_width = (last_char = 'w') in
937937+ let dup_type = if is_width then "Width" else "Density" in
938938+ begin match Hashtbl.find_opt seen_descriptors normalized with
939939+ | Some first_url ->
580940 Message_collector.add_error collector
581581- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name)
941941+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to %s for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name dup_type url (String.lowercase_ascii dup_type) first_url)
582942 ~code:"bad-srcset-value"
583943 ~element:element_name ~attribute:"srcset" ()
584584- end else begin
585585- Hashtbl.add seen_descriptors normalized true;
586586- if is_1x then Hashtbl.add seen_descriptors "explicit-1x" true
944944+ | None ->
945945+ begin match (if is_1x then Hashtbl.find_opt seen_descriptors "implicit-1x" else None) with
946946+ | Some first_url ->
947947+ (* Explicit 1x conflicts with implicit 1x *)
948948+ Message_collector.add_error collector
949949+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to %s for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name dup_type url (String.lowercase_ascii dup_type) first_url)
950950+ ~code:"bad-srcset-value"
951951+ ~element:element_name ~attribute:"srcset" ()
952952+ | None ->
953953+ Hashtbl.add seen_descriptors normalized url;
954954+ if is_1x then Hashtbl.add seen_descriptors "explicit-1x" url
955955+ end
587956 end
588957 end;
589958590590- ignore (validate_srcset_descriptor desc element_name value collector)
959959+ ignore (validate_srcset_descriptor desc element_name value has_sizes collector)
591960 end
592961 ) entries;
593962594963 (* Check: if w descriptor used and no sizes, that's an error for img and source *)
595964 if !has_w_descriptor && not has_sizes then
596965 Message_collector.add_error collector
597597- ~message:(Printf.sprintf "When the \xe2\x80\x9csrcset\xe2\x80\x9d attribute on the \xe2\x80\x9c%s\xe2\x80\x9d element uses width descriptors, the \xe2\x80\x9csizes\xe2\x80\x9d attribute must also be present." element_name)
966966+ ~message:"When the \xe2\x80\x9csrcset\xe2\x80\x9d attribute has any image candidate string with a width descriptor, the \xe2\x80\x9csizes\xe2\x80\x9d attribute must also be specified."
598967 ~code:"srcset-w-without-sizes"
599968 ~element:element_name ~attribute:"srcset" ();
600969601970 (* Check: if sizes is present, all entries must have width descriptors *)
602602- if has_sizes && !has_no_descriptor then
971971+ (match !no_descriptor_url with
972972+ | Some url when has_sizes ->
603973 Message_collector.add_error collector
604604- ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: No width specified for image. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name)
974974+ ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: No width specified for image \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name url)
605975 ~code:"bad-srcset-value"
606606- ~element:element_name ~attribute:"srcset" ();
976976+ ~element:element_name ~attribute:"srcset" ()
977977+ | _ -> ());
607978608608- (* Check: if sizes is present and srcset uses x descriptors, that's an error *)
609609- if has_sizes && !has_x_descriptor then
979979+ (* Check: if sizes is present and srcset uses x descriptors, that's an error.
980980+ Only report if we haven't already reported the detailed error. *)
981981+ if has_sizes && !has_x_descriptor && not !x_with_sizes_error_reported then
610982 Message_collector.add_error collector
611983 ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width." value element_name)
612984 ~code:"bad-srcset-value"
···3434 | "menu" ->
3535 (* menu only allows li, script, template *)
3636 List.mem child ["li"; "script"; "template"]
3737+ | "table" ->
3838+ (* col must be in colgroup, not directly in table *)
3939+ child <> "col"
3740 | _ -> true
38413942(* Check if text is allowed in element *)
+13-13
lib/html5rw/parser/parser_tree_builder.ml
···787787 t.open_elements <- [html];
788788 t.mode <- Parser_insertion_mode.Before_head;
789789 process_token t token
790790- | Token.Tag { kind = Token.End; _ } ->
791791- parse_error t "unexpected-end-tag"
790790+ | Token.Tag { kind = Token.End; name; _ } ->
791791+ parse_error t ("unexpected-end-tag:" ^ name)
792792 | _ ->
793793 let html = insert_element t "html" [] in
794794 t.open_elements <- [html];
···813813 t.head_element <- Some head;
814814 t.mode <- Parser_insertion_mode.In_head;
815815 process_token t token
816816- | Token.Tag { kind = Token.End; _ } ->
817817- parse_error t "unexpected-end-tag"
816816+ | Token.Tag { kind = Token.End; name; _ } ->
817817+ parse_error t ("unexpected-end-tag:" ^ name)
818818 | _ ->
819819 let head = insert_element t "head" [] in
820820 t.open_elements <- head :: t.open_elements;
···902902 end
903903 | Token.Tag { kind = Token.Start; name = "head"; _ } ->
904904 parse_error t "unexpected-start-tag"
905905- | Token.Tag { kind = Token.End; _ } ->
906906- parse_error t "unexpected-end-tag"
905905+ | Token.Tag { kind = Token.End; name; _ } ->
906906+ parse_error t ("unexpected-end-tag:" ^ name)
907907 | _ ->
908908 pop_current t;
909909 t.mode <- Parser_insertion_mode.After_head;
···943943 pop_current t; (* Pop noscript *)
944944 t.mode <- Parser_insertion_mode.In_head;
945945 process_token t token
946946- | Token.Tag { kind = Token.End; _ } ->
947947- parse_error t "unexpected-end-tag"
946946+ | Token.Tag { kind = Token.End; name; _ } ->
947947+ parse_error t ("unexpected-end-tag:" ^ name)
948948 | Token.EOF ->
949949 parse_error t "expected-closing-tag-but-got-eof";
950950 pop_current t; (* Pop noscript *)
···998998 process_token t token
999999 | Token.Tag { kind = Token.Start; name = "head"; _ } ->
10001000 parse_error t "unexpected-start-tag"
10011001- | Token.Tag { kind = Token.End; _ } ->
10021002- parse_error t "unexpected-end-tag"
10011001+ | Token.Tag { kind = Token.End; name; _ } ->
10021002+ parse_error t ("unexpected-end-tag:" ^ name)
10031003 | _ ->
10041004 let body = insert_element t "body" [] in
10051005 t.open_elements <- body :: t.open_elements;
···14471447 | _ -> ());
14481448 pop_until t (fun n -> n == node)
14491449 end else if is_special_element node then
14501450- parse_error t "unexpected-end-tag"
14501450+ parse_error t ("unexpected-end-tag:" ^ name)
14511451 else
14521452 check rest
14531453 in
···20562056 t.template_modes <- Parser_insertion_mode.In_body :: t.template_modes;
20572057 t.mode <- Parser_insertion_mode.In_body;
20582058 process_token t token
20592059- | Token.Tag { kind = Token.End; _ } ->
20602060- parse_error t "unexpected-end-tag"
20592059+ | Token.Tag { kind = Token.End; name; _ } ->
20602060+ parse_error t ("unexpected-end-tag:" ^ name)
20612061 | Token.EOF ->
20622062 if not (List.exists (fun n -> n.Dom.name = "template" && is_in_html_namespace n) t.open_elements) then
20632063 () (* Stop parsing *)