···1(** Helper functions for language tag validation *)
2003let is_lower_alpha c = c >= 'a' && c <= 'z'
4let is_upper_alpha c = c >= 'A' && c <= 'Z'
5let is_alpha c = is_lower_alpha c || is_upper_alpha c
···123 if is_valid_extlang first_lower second_lower then
124 Ok ()
125 else
126- Error (Printf.sprintf "Bad extlang subtag \xe2\x80\x9c%s\xe2\x80\x9d" second_lower)
127 else
128 Ok () (* Not an extlang pattern, continue *)
129 | [] -> Ok ())
···1(** Helper functions for language tag validation *)
23+let q = Error_code.q
4+5let is_lower_alpha c = c >= 'a' && c <= 'z'
6let is_upper_alpha c = c >= 'A' && c <= 'Z'
7let is_alpha c = is_lower_alpha c || is_upper_alpha c
···125 if is_valid_extlang first_lower second_lower then
126 Ok ()
127 else
128+ Error (Printf.sprintf "Bad extlang subtag %s" (q second_lower))
129 else
130 Ok () (* Not an extlang pattern, continue *)
131 | [] -> Ok ())
+8-6
lib/htmlrw_check/parse_error_bridge.ml
···3 SPDX-License-Identifier: MIT
4 ---------------------------------------------------------------------------*)
5006(** Generate human-readable message for a parse error code *)
7let message_of_parse_error code =
8 let code_str = Html5rw.Parse_error_code.to_string code in
···57 let cp = int_of_string ("0x" ^ cp_str) in
58 Printf.sprintf "Character reference expands to a surrogate (U+%04x)." cp
59 else if s = "no-p-element-in-scope" then
60- "No \xe2\x80\x9cp\xe2\x80\x9d element in scope but a \xe2\x80\x9cp\xe2\x80\x9d end tag seen."
61 else if s = "end-tag-p-implied-but-open-elements" then
62- "End tag \xe2\x80\x9cp\xe2\x80\x9d implied, but there were open elements."
63 else if s = "end-tag-br" then
64- "End tag \xe2\x80\x9cbr\xe2\x80\x9d."
65 else if s = "expected-closing-tag-but-got-eof" then
66 "End of file seen and there were open elements."
67 else if String.starts_with ~prefix:"bad-start-tag-in-head-noscri" s then
68 let colon_pos = String.index s ':' in
69 let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
70- Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element
71 else if String.starts_with ~prefix:"unexpected-end-tag:" s then
72 let element = String.sub s 19 (String.length s - 19) in
73- Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element
74 else if String.starts_with ~prefix:"start-tag-in-table:" s then
75 let tag = String.sub s 19 (String.length s - 19) in
76- Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag
77 else
78 Printf.sprintf "Parse error: %s" s
79 with _ -> Printf.sprintf "Parse error: %s" s)
···3 SPDX-License-Identifier: MIT
4 ---------------------------------------------------------------------------*)
56+let q = Error_code.q
7+8(** Generate human-readable message for a parse error code *)
9let message_of_parse_error code =
10 let code_str = Html5rw.Parse_error_code.to_string code in
···59 let cp = int_of_string ("0x" ^ cp_str) in
60 Printf.sprintf "Character reference expands to a surrogate (U+%04x)." cp
61 else if s = "no-p-element-in-scope" then
62+ Printf.sprintf "No %s element in scope but a %s end tag seen." (q "p") (q "p")
63 else if s = "end-tag-p-implied-but-open-elements" then
64+ Printf.sprintf "End tag %s implied, but there were open elements." (q "p")
65 else if s = "end-tag-br" then
66+ Printf.sprintf "End tag %s." (q "br")
67 else if s = "expected-closing-tag-but-got-eof" then
68 "End of file seen and there were open elements."
69 else if String.starts_with ~prefix:"bad-start-tag-in-head-noscri" s then
70 let colon_pos = String.index s ':' in
71 let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
72+ Printf.sprintf "Bad start tag in %s in %s in %s." (q element) (q "noscript") (q "head")
73 else if String.starts_with ~prefix:"unexpected-end-tag:" s then
74 let element = String.sub s 19 (String.length s - 19) in
75+ Printf.sprintf "Stray end tag %s." (q element)
76 else if String.starts_with ~prefix:"start-tag-in-table:" s then
77 let tag = String.sub s 19 (String.length s - 19) in
78+ Printf.sprintf "Start tag %s seen in %s." (q tag) (q "table")
79 else
80 Printf.sprintf "Parse error: %s" s
81 with _ -> Printf.sprintf "Parse error: %s" s)
+4-2
lib/htmlrw_check/semantic/obsolete_checker.ml
···001(** Obsolete elements map: element name -> suggestion message *)
2let obsolete_elements =
3 let tbl = Hashtbl.create 32 in
···131 "Use the HTTP OPTIONS feature instead.";
132133 register "name" ["a"]
134- "Consider putting an \xe2\x80\x9cid\xe2\x80\x9d attribute on the nearest container instead.";
135136 register "name" ["embed"; "img"; "option"]
137- "Use the \xe2\x80\x9cid\xe2\x80\x9d attribute instead.";
138139 register "nohref" ["area"]
140 "Omitting the \"href\" attribute is sufficient.";
···1+let q = Error_code.q
2+3(** Obsolete elements map: element name -> suggestion message *)
4let obsolete_elements =
5 let tbl = Hashtbl.create 32 in
···133 "Use the HTTP OPTIONS feature instead.";
134135 register "name" ["a"]
136+ (Printf.sprintf "Consider putting an %s attribute on the nearest container instead." (q "id"));
137138 register "name" ["embed"; "img"; "option"]
139+ (Printf.sprintf "Use the %s attribute instead." (q "id"));
140141 register "nohref" ["area"]
142 "Omitting the \"href\" attribute is sufficient.";
···1(** Required attribute checker implementation. *)
2003type state = {
4 mutable _in_figure : bool;
5 (** Track if we're inside a <figure> element (alt is more critical there) *)
···81 in
8283 if not valid then
84- let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
85 Message_collector.add_typed collector
86 (`Generic (Printf.sprintf "A %s element must have either a %s attribute, a %s attribute with a %s attribute, or an %s attribute with a %s attribute."
87 (q "meta") (q "charset") (q "name")
···122 let value_lower = String.lowercase_ascii value in
123 (* Valid values: empty string, auto, manual, hint *)
124 if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then
125- let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
126 Message_collector.add_typed collector
127 (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s."
128 (q value) (q "popover") (q element_name)))))
···141 let value = float_of_string value_str in
142 let min_val = float_of_string min_str in
143 if min_val > value then
144- let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
145 Message_collector.add_typed collector
146 (`Generic (Printf.sprintf "The value of the %s attribute must be less than or equal to the value of the %s attribute."
147 (q "min") (q "value")))
···161 | Some max_str -> (try float_of_string max_str with _ -> 1.0)
162 in
163 if value > max_val then
164- let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
165 (* Check which message to use based on whether max is present *)
166 if Attr_utils.has_attr "max" attrs then
167 Message_collector.add_typed collector
···1(** Required attribute checker implementation. *)
23+let q = Error_code.q
4+5type state = {
6 mutable _in_figure : bool;
7 (** Track if we're inside a <figure> element (alt is more critical there) *)
···83 in
8485 if not valid then
086 Message_collector.add_typed collector
87 (`Generic (Printf.sprintf "A %s element must have either a %s attribute, a %s attribute with a %s attribute, or an %s attribute with a %s attribute."
88 (q "meta") (q "charset") (q "name")
···123 let value_lower = String.lowercase_ascii value in
124 (* Valid values: empty string, auto, manual, hint *)
125 if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then
0126 Message_collector.add_typed collector
127 (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s."
128 (q value) (q "popover") (q element_name)))))
···141 let value = float_of_string value_str in
142 let min_val = float_of_string min_str in
143 if min_val > value then
0144 Message_collector.add_typed collector
145 (`Generic (Printf.sprintf "The value of the %s attribute must be less than or equal to the value of the %s attribute."
146 (q "min") (q "value")))
···160 | Some max_str -> (try float_of_string max_str with _ -> 1.0)
161 in
162 if value > max_val then
0163 (* Check which message to use based on whether max is present *)
164 if Attr_utils.has_attr "max" attrs then
165 Message_collector.add_typed collector
+1-1
lib/htmlrw_check/specialized/aria_checker.ml
···586 if aria_checked <> None then
587 Message_collector.add_typed collector
588 (`Aria (`Must_not_use (`Attr "aria-checked", `Elem "input",
589- `Condition "a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9ccheckbox\xe2\x80\x9d")))
590 | _ -> ()
591 end;
592
···586 if aria_checked <> None then
587 Message_collector.add_typed collector
588 (`Aria (`Must_not_use (`Attr "aria-checked", `Elem "input",
589+ `Condition (Printf.sprintf "a %s attribute whose value is %s" (q "type") (q "checkbox")))))
590 | _ -> ()
591 end;
592
···1(** Attribute restrictions checker - validates that certain attributes
2 are not used on elements where they're not allowed. *)
3004(** List of (element, [disallowed attributes]) pairs for HTML elements. *)
5let disallowed_attrs_html = [
6 (* Elements that cannot have href attribute (RDFa misuses) *)
···174 if attr_value = "#" then
175 Message_collector.add_typed collector
176 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
177- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad hash-name reference: A hash-name reference must have at least one character after \xe2\x80\x9c#\xe2\x80\x9d."
178- attr_value attr_name name))))
179 end
180 ) attrs
181 end;
···190 | Error msg ->
191 Message_collector.add_typed collector
192 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
193- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: %s"
194- attr_value attr_name name msg))))
195 end
196 ) attrs
197 end;
···213 (* Determine specific error message *)
214 let error_msg =
215 if String.length attr_value = 0 then
216- Printf.sprintf "Bad value \xe2\x80\x9c\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: The empty string is not a valid non-negative integer."
217- attr_name name
218 else if String.contains attr_value '%' then
219- Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit but saw \xe2\x80\x9c%%\xe2\x80\x9d instead."
220- attr_value attr_name name
221 else if String.length attr_value > 0 && attr_value.[0] = '-' then
222- Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit but saw \xe2\x80\x9c-\xe2\x80\x9d instead."
223- attr_value attr_name name
224 else
225 (* Find first non-digit character *)
226 let bad_char =
···234 in
235 match bad_char with
236 | Some c ->
237- Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit but saw \xe2\x80\x9c%c\xe2\x80\x9d instead."
238- attr_value attr_name name c
239 | None ->
240- Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit."
241- attr_value attr_name name
242 in
243 Message_collector.add_typed collector
244 (`Attr (`Bad_value_generic (`Message error_msg)))
···377 if count_codepoints key > 1 then
378 Message_collector.add_typed collector
379 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
380- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad key label list: Key label has multiple characters. Each key label must be a single character."
381- attr_value attr_name name))))
382 ) keys;
383 (* Check for duplicate keys *)
384 let rec find_duplicates seen = function
···387 if List.mem k seen then
388 Message_collector.add_typed collector
389 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
390- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad key label list: Duplicate key label. Each key label must be unique."
391- attr_value attr_name name))))
392 else
393 find_duplicates (k :: seen) rest
394 in
···405 if has_command && has_aria_expanded then
406 Message_collector.add_typed collector
407 (`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
408- `Condition "a \xe2\x80\x9ccommand\xe2\x80\x9d attribute")));
409410 if has_popovertarget && has_aria_expanded then
411 Message_collector.add_typed collector
412 (`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
413- `Condition "a \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute")))
414 end;
415416 (* Note: data-* uppercase check requires XML parsing which preserves case.
···432 | Error msg ->
433 Message_collector.add_typed collector
434 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
435- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad media query: %s"
436- attr_value attr_name name msg))))
437 end
438 end
439 ) attrs
···1(** Attribute restrictions checker - validates that certain attributes
2 are not used on elements where they're not allowed. *)
34+let q = Error_code.q
5+6(** List of (element, [disallowed attributes]) pairs for HTML elements. *)
7let disallowed_attrs_html = [
8 (* Elements that cannot have href attribute (RDFa misuses) *)
···176 if attr_value = "#" then
177 Message_collector.add_typed collector
178 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
179+ "Bad value %s for attribute %s on element %s: Bad hash-name reference: A hash-name reference must have at least one character after %s."
180+ (q attr_value) (q attr_name) (q name) (q "#")))))
181 end
182 ) attrs
183 end;
···192 | Error msg ->
193 Message_collector.add_typed collector
194 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
195+ "Bad value %s for attribute %s on element %s: Bad MIME type: %s"
196+ (q attr_value) (q attr_name) (q name) msg))))
197 end
198 ) attrs
199 end;
···215 (* Determine specific error message *)
216 let error_msg =
217 if String.length attr_value = 0 then
218+ Printf.sprintf "Bad value %s for attribute %s on element %s: The empty string is not a valid non-negative integer."
219+ (q "") (q attr_name) (q name)
220 else if String.contains attr_value '%' then
221+ Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit but saw %s instead."
222+ (q attr_value) (q attr_name) (q name) (q "%")
223 else if String.length attr_value > 0 && attr_value.[0] = '-' then
224+ Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit but saw %s instead."
225+ (q attr_value) (q attr_name) (q name) (q "-")
226 else
227 (* Find first non-digit character *)
228 let bad_char =
···236 in
237 match bad_char with
238 | Some c ->
239+ Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit but saw %s instead."
240+ (q attr_value) (q attr_name) (q name) (q (String.make 1 c))
241 | None ->
242+ Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit."
243+ (q attr_value) (q attr_name) (q name)
244 in
245 Message_collector.add_typed collector
246 (`Attr (`Bad_value_generic (`Message error_msg)))
···379 if count_codepoints key > 1 then
380 Message_collector.add_typed collector
381 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
382+ "Bad value %s for attribute %s on element %s: Bad key label list: Key label has multiple characters. Each key label must be a single character."
383+ (q attr_value) (q attr_name) (q name)))))
384 ) keys;
385 (* Check for duplicate keys *)
386 let rec find_duplicates seen = function
···389 if List.mem k seen then
390 Message_collector.add_typed collector
391 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
392+ "Bad value %s for attribute %s on element %s: Bad key label list: Duplicate key label. Each key label must be unique."
393+ (q attr_value) (q attr_name) (q name)))))
394 else
395 find_duplicates (k :: seen) rest
396 in
···407 if has_command && has_aria_expanded then
408 Message_collector.add_typed collector
409 (`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
410+ `Condition (Printf.sprintf "a %s attribute" (q "command")))));
411412 if has_popovertarget && has_aria_expanded then
413 Message_collector.add_typed collector
414 (`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
415+ `Condition (Printf.sprintf "a %s attribute" (q "popovertarget")))))
416 end;
417418 (* Note: data-* uppercase check requires XML parsing which preserves case.
···434 | Error msg ->
435 Message_collector.add_typed collector
436 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
437+ "Bad value %s for attribute %s on element %s: Bad media query: %s"
438+ (q attr_value) (q attr_name) (q name) msg))))
439 end
440 end
441 ) attrs
+18-16
lib/htmlrw_check/specialized/datetime_checker.ml
···1(** Datetime attribute validation checker *)
2003(** Elements that have datetime attribute *)
4let datetime_elements = ["del"; "ins"; "time"]
5···346 if value <> String.trim value then begin
347 let tz_msg = "Bad datetime with timezone: The literal did not satisfy the datetime with timezone format." in
348 let date_msg = "Bad date: The literal did not satisfy the date format." in
349- Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
350- value attr_name element_name tz_msg date_msg)
351 end
352 else
353 (* Try datetime with timezone first *)
···355 | DtOk -> Ok (* Valid datetime with timezone *)
356 | DtWarning w ->
357 (* Valid but with warning - format matches Nu validator *)
358- Warning (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad datetime with timezone: %s Bad date: The literal did not satisfy the date format."
359- value attr_name element_name w)
360 | DtError tz_error ->
361 (* Try just date - valid for all elements *)
362 match validate_date value with
···365 if has_suspicious_year value || has_old_year value then begin
366 let date_msg = "Bad date: Year may be mistyped." in
367 let tz_msg = Printf.sprintf "Bad datetime with timezone: %s." tz_error in
368- Warning (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
369- value attr_name element_name date_msg tz_msg)
370 end else
371 Ok (* Valid date with normal year *)
372 | (false, date_error) ->
···394 | (true, _) -> Ok (* Valid duration P... *)
395 | (false, _) ->
396 (* Use simplified message for time element matching Nu validator format *)
397- Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad time-datetime: The literal did not satisfy the time-datetime format."
398- value attr_name element_name)
399 end
400 else begin
401 (* del/ins only allow date or datetime-with-timezone *)
···426 (* Datetime first for: generic tz, tz hours error, time minute/hour errors, year errors
427 Date first for: "Month cannot be less than" date error, tz minutes error, fraction error *)
428 if is_month_less_than_error then
429- Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
430- value attr_name element_name date_msg tz_msg)
431 else if is_tz_minutes_error || is_fraction_error then
432- Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
433- value attr_name element_name date_msg tz_msg)
434 else if is_tz_hours_error || is_time_minute_or_hour_error || is_generic_tz then
435- Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
436- value attr_name element_name tz_msg date_msg)
437 else
438- Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
439- value attr_name element_name tz_msg date_msg)
440 end
441442(** Checker state *)
···1(** Datetime attribute validation checker *)
23+let q = Error_code.q
4+5(** Elements that have datetime attribute *)
6let datetime_elements = ["del"; "ins"; "time"]
7···348 if value <> String.trim value then begin
349 let tz_msg = "Bad datetime with timezone: The literal did not satisfy the datetime with timezone format." in
350 let date_msg = "Bad date: The literal did not satisfy the date format." in
351+ Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
352+ (q value) (q attr_name) (q element_name) tz_msg date_msg)
353 end
354 else
355 (* Try datetime with timezone first *)
···357 | DtOk -> Ok (* Valid datetime with timezone *)
358 | DtWarning w ->
359 (* Valid but with warning - format matches Nu validator *)
360+ Warning (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad datetime with timezone: %s Bad date: The literal did not satisfy the date format."
361+ (q value) (q attr_name) (q element_name) w)
362 | DtError tz_error ->
363 (* Try just date - valid for all elements *)
364 match validate_date value with
···367 if has_suspicious_year value || has_old_year value then begin
368 let date_msg = "Bad date: Year may be mistyped." in
369 let tz_msg = Printf.sprintf "Bad datetime with timezone: %s." tz_error in
370+ Warning (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
371+ (q value) (q attr_name) (q element_name) date_msg tz_msg)
372 end else
373 Ok (* Valid date with normal year *)
374 | (false, date_error) ->
···396 | (true, _) -> Ok (* Valid duration P... *)
397 | (false, _) ->
398 (* Use simplified message for time element matching Nu validator format *)
399+ Error (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad time-datetime: The literal did not satisfy the time-datetime format."
400+ (q value) (q attr_name) (q element_name))
401 end
402 else begin
403 (* del/ins only allow date or datetime-with-timezone *)
···428 (* Datetime first for: generic tz, tz hours error, time minute/hour errors, year errors
429 Date first for: "Month cannot be less than" date error, tz minutes error, fraction error *)
430 if is_month_less_than_error then
431+ Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
432+ (q value) (q attr_name) (q element_name) date_msg tz_msg)
433 else if is_tz_minutes_error || is_fraction_error then
434+ Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
435+ (q value) (q attr_name) (q element_name) date_msg tz_msg)
436 else if is_tz_hours_error || is_time_minute_or_hour_error || is_generic_tz then
437+ Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
438+ (q value) (q attr_name) (q element_name) tz_msg date_msg)
439 else
440+ Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
441+ (q value) (q attr_name) (q element_name) tz_msg date_msg)
442 end
443444(** Checker state *)
+9-6
lib/htmlrw_check/specialized/microdata_checker.ml
···23 Validates HTML5 microdata attributes. *)
40005(** Information about an itemscope. *)
6type item_scope = {
7 element : string;
···74 let url_trimmed = String.trim url in
75 if String.length url_trimmed = 0 then
76 Some (Printf.sprintf
77- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: Must be non-empty."
78- original_value attr_name element)
79 else
80 (* First check if it has a scheme (required for absolute URL) *)
81 match Url_checker.extract_scheme url_trimmed with
82 | None ->
83 Some (Printf.sprintf
84- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: The string \xe2\x80\x9c%s\xe2\x80\x9d is not an absolute URL."
85- original_value attr_name element url)
86 | Some _ ->
87 (* Has a scheme - do comprehensive URL validation *)
88 match Url_checker.validate_url url element attr_name with
···94 (* Escape backslashes in replacement string for Str.global_replace *)
95 let escaped_original = Str.global_replace (Str.regexp "\\\\") "\\\\\\\\" original_value in
96 let error_msg = Str.global_replace
97- (Str.regexp_string (Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" url))
98- (Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" escaped_original)
99 error_msg in
100 Some error_msg
101
···23 Validates HTML5 microdata attributes. *)
45+(** Quote helper for consistent message formatting. *)
6+let q = Error_code.q
7+8(** Information about an itemscope. *)
9type item_scope = {
10 element : string;
···77 let url_trimmed = String.trim url in
78 if String.length url_trimmed = 0 then
79 Some (Printf.sprintf
80+ "Bad value %s for attribute %s on element %s: Bad absolute URL: Must be non-empty."
81+ (q original_value) (q attr_name) (q element))
82 else
83 (* First check if it has a scheme (required for absolute URL) *)
84 match Url_checker.extract_scheme url_trimmed with
85 | None ->
86 Some (Printf.sprintf
87+ "Bad value %s for attribute %s on element %s: Bad absolute URL: The string %s is not an absolute URL."
88+ (q original_value) (q attr_name) (q element) (q url))
89 | Some _ ->
90 (* Has a scheme - do comprehensive URL validation *)
91 match Url_checker.validate_url url element attr_name with
···97 (* Escape backslashes in replacement string for Str.global_replace *)
98 let escaped_original = Str.global_replace (Str.regexp "\\\\") "\\\\\\\\" original_value in
99 let error_msg = Str.global_replace
100+ (Str.regexp_string (Printf.sprintf "%s for attribute" (q url)))
101+ (Printf.sprintf "%s for attribute" (q escaped_original))
102 error_msg in
103 Some error_msg
104
+32-30
lib/htmlrw_check/specialized/mime_type_checker.ml
···23 Validates MIME type values in type attributes. *)
4005(** Validate a MIME type value. Returns error message or None. *)
6let validate_mime_type value element attr_name =
7 let len = String.length value in
8 if len = 0 then
9 Some (Printf.sprintf
10- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Empty value."
11- value attr_name element)
12 else if value.[len - 1] = ' ' || value.[len - 1] = '\t' then begin
13 (* Check if this is a semicolon followed by only whitespace *)
14 let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
···18 let params_trimmed = String.trim params in
19 if params_trimmed = "" then
20 Some (Printf.sprintf
21- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
22- value attr_name element)
23 else
24 Some (Printf.sprintf
25- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Extraneous trailing whitespace."
26- value attr_name element)
27 | None ->
28 Some (Printf.sprintf
29- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Extraneous trailing whitespace."
30- value attr_name element)
31 end
32 else if len > 0 && (value.[0] = ' ' || value.[0] = '\t') then
33 Some (Printf.sprintf
34- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Expected a token character but saw \xe2\x80\x9c \xe2\x80\x9d instead."
35- value attr_name element)
36 else
37 (* Parse type/subtype *)
38 let slash_pos = try Some (String.index value '/') with Not_found -> None in
···43 (match semicolon_pos with
44 | Some _ ->
45 Some (Printf.sprintf
46- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
47- value attr_name element)
48 | None ->
49 Some (Printf.sprintf
50- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
51- value attr_name element))
52 | Some slash_pos ->
53 (* Check for empty subtype *)
54 let after_slash = String.sub value (slash_pos + 1) (len - slash_pos - 1) in
···60 let subtype_trimmed = String.trim subtype in
61 if subtype_trimmed = "" then
62 Some (Printf.sprintf
63- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
64- value attr_name element)
65 else if String.length subtype > 0 && subtype.[String.length subtype - 1] = ' ' then
66 (* Space before semicolon - also check parameter format *)
67 let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
···72 let params_trimmed = String.trim params in
73 if params_trimmed = "" then
74 Some (Printf.sprintf
75- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
76- value attr_name element)
77 else
78 (* Check for param_name=value format *)
79 let eq_pos = try Some (String.index params '=') with Not_found -> None in
80 (match eq_pos with
81 | None ->
82 Some (Printf.sprintf
83- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
84- value attr_name element)
85 | Some _ -> None)
86 | None -> None)
87 else
···94 let params_trimmed = String.trim params in
95 if params_trimmed = "" then
96 Some (Printf.sprintf
97- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
98- value attr_name element)
99 else
100 (* Check for param_name=value format *)
101 let eq_pos = try Some (String.index params '=') with Not_found -> None in
102 (match eq_pos with
103 | None ->
104 Some (Printf.sprintf
105- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
106- value attr_name element)
107 | Some eq_pos ->
108 let param_value = String.sub params (eq_pos + 1) (String.length params - eq_pos - 1) in
109 let param_value_trimmed = String.trim param_value in
110 if param_value_trimmed = "" then
111 Some (Printf.sprintf
112- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
113- value attr_name element)
114 else if param_value_trimmed.[0] = '"' then
115 (* Quoted string - check for closing quote *)
116 let quote_end = try Some (String.index_from param_value_trimmed 1 '"') with
···127 in
128 if has_backslash_at_end then
129 Some (Printf.sprintf
130- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Unfinished quoted string."
131- value attr_name element)
132 else
133 Some (Printf.sprintf
134- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Unfinished quoted string."
135- value attr_name element))
136 else
137 None))
138
···23 Validates MIME type values in type attributes. *)
45+let q = Error_code.q
6+7(** Validate a MIME type value. Returns error message or None. *)
8let validate_mime_type value element attr_name =
9 let len = String.length value in
10 if len = 0 then
11 Some (Printf.sprintf
12+ "Bad value %s for attribute %s on element %s: Bad MIME type: Empty value."
13+ (q value) (q attr_name) (q element))
14 else if value.[len - 1] = ' ' || value.[len - 1] = '\t' then begin
15 (* Check if this is a semicolon followed by only whitespace *)
16 let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
···20 let params_trimmed = String.trim params in
21 if params_trimmed = "" then
22 Some (Printf.sprintf
23+ "Bad value %s for attribute %s on element %s: Bad MIME type: Semicolon seen but there was no parameter following it."
24+ (q value) (q attr_name) (q element))
25 else
26 Some (Printf.sprintf
27+ "Bad value %s for attribute %s on element %s: Bad MIME type: Extraneous trailing whitespace."
28+ (q value) (q attr_name) (q element))
29 | None ->
30 Some (Printf.sprintf
31+ "Bad value %s for attribute %s on element %s: Bad MIME type: Extraneous trailing whitespace."
32+ (q value) (q attr_name) (q element))
33 end
34 else if len > 0 && (value.[0] = ' ' || value.[0] = '\t') then
35 Some (Printf.sprintf
36+ "Bad value %s for attribute %s on element %s: Bad MIME type: Expected a token character but saw %s instead."
37+ (q value) (q attr_name) (q element) (q " "))
38 else
39 (* Parse type/subtype *)
40 let slash_pos = try Some (String.index value '/') with Not_found -> None in
···45 (match semicolon_pos with
46 | Some _ ->
47 Some (Printf.sprintf
48+ "Bad value %s for attribute %s on element %s: Bad MIME type: Subtype missing."
49+ (q value) (q attr_name) (q element))
50 | None ->
51 Some (Printf.sprintf
52+ "Bad value %s for attribute %s on element %s: Bad MIME type: Subtype missing."
53+ (q value) (q attr_name) (q element)))
54 | Some slash_pos ->
55 (* Check for empty subtype *)
56 let after_slash = String.sub value (slash_pos + 1) (len - slash_pos - 1) in
···62 let subtype_trimmed = String.trim subtype in
63 if subtype_trimmed = "" then
64 Some (Printf.sprintf
65+ "Bad value %s for attribute %s on element %s: Bad MIME type: Subtype missing."
66+ (q value) (q attr_name) (q element))
67 else if String.length subtype > 0 && subtype.[String.length subtype - 1] = ' ' then
68 (* Space before semicolon - also check parameter format *)
69 let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
···74 let params_trimmed = String.trim params in
75 if params_trimmed = "" then
76 Some (Printf.sprintf
77+ "Bad value %s for attribute %s on element %s: Bad MIME type: Semicolon seen but there was no parameter following it."
78+ (q value) (q attr_name) (q element))
79 else
80 (* Check for param_name=value format *)
81 let eq_pos = try Some (String.index params '=') with Not_found -> None in
82 (match eq_pos with
83 | None ->
84 Some (Printf.sprintf
85+ "Bad value %s for attribute %s on element %s: Bad MIME type: Parameter value missing."
86+ (q value) (q attr_name) (q element))
87 | Some _ -> None)
88 | None -> None)
89 else
···96 let params_trimmed = String.trim params in
97 if params_trimmed = "" then
98 Some (Printf.sprintf
99+ "Bad value %s for attribute %s on element %s: Bad MIME type: Semicolon seen but there was no parameter following it."
100+ (q value) (q attr_name) (q element))
101 else
102 (* Check for param_name=value format *)
103 let eq_pos = try Some (String.index params '=') with Not_found -> None in
104 (match eq_pos with
105 | None ->
106 Some (Printf.sprintf
107+ "Bad value %s for attribute %s on element %s: Bad MIME type: Parameter value missing."
108+ (q value) (q attr_name) (q element))
109 | Some eq_pos ->
110 let param_value = String.sub params (eq_pos + 1) (String.length params - eq_pos - 1) in
111 let param_value_trimmed = String.trim param_value in
112 if param_value_trimmed = "" then
113 Some (Printf.sprintf
114+ "Bad value %s for attribute %s on element %s: Bad MIME type: Parameter value missing."
115+ (q value) (q attr_name) (q element))
116 else if param_value_trimmed.[0] = '"' then
117 (* Quoted string - check for closing quote *)
118 let quote_end = try Some (String.index_from param_value_trimmed 1 '"') with
···129 in
130 if has_backslash_at_end then
131 Some (Printf.sprintf
132+ "Bad value %s for attribute %s on element %s: Bad MIME type: Unfinished quoted string."
133+ (q value) (q attr_name) (q element))
134 else
135 Some (Printf.sprintf
136+ "Bad value %s for attribute %s on element %s: Bad MIME type: Unfinished quoted string."
137+ (q value) (q attr_name) (q element)))
138 else
139 None))
140
+10-8
lib/htmlrw_check/specialized/svg_checker.ml
···23 Validates SVG elements and attributes according to SVG 1.1/2 specifications. *)
4005type font_state = {
6 mutable has_missing_glyph : bool;
7}
···292 if value <> svg_ns_url then
293 Message_collector.add_typed collector
294 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
295- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for the attribute \xe2\x80\x9cxmlns\xe2\x80\x9d (only \xe2\x80\x9c%s\xe2\x80\x9d permitted here)."
296- value svg_ns_url))))
297 | "xmlns:xlink" ->
298 if value <> "http://www.w3.org/1999/xlink" then
299 Message_collector.add_typed collector
300 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
301- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for the attribute \xe2\x80\x9cxmlns:link\xe2\x80\x9d (only \xe2\x80\x9chttp://www.w3.org/1999/xlink\xe2\x80\x9d permitted here)."
302- value))))
303 | _ when String.starts_with ~prefix:"xmlns:" attr && attr <> "xmlns:xlink" ->
304 (* Other xmlns declarations are not allowed in HTML-embedded SVG *)
305 Message_collector.add_typed collector
···324 let context = String.sub d !context_start (ctx_end - !context_start) in
325 Message_collector.add_typed collector
326 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
327- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9cd\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad SVG path data: Expected command but found \xe2\x80\x9c#\xe2\x80\x9d (context: \xe2\x80\x9c%s\xe2\x80\x9d)."
328- d element context))));
329 i := len (* Stop processing *)
330 | _ ->
331 incr i
···344 let context = String.sub d ctx_start (flag_end - ctx_start) in
345 Message_collector.add_typed collector
346 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
347- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9cd\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad SVG path data: Expected \xe2\x80\x9c0\xe2\x80\x9d or \xe2\x80\x9c1\xe2\x80\x9d for large-arc-flag for \xe2\x80\x9ca\xe2\x80\x9d command but found \xe2\x80\x9c%s\xe2\x80\x9d instead (context: \xe2\x80\x9c%s\xe2\x80\x9d)."
348- d element flag context))))
349 end
350 with Not_found -> ()
351
···23 Validates SVG elements and attributes according to SVG 1.1/2 specifications. *)
45+let q = Error_code.q
6+7type font_state = {
8 mutable has_missing_glyph : bool;
9}
···294 if value <> svg_ns_url then
295 Message_collector.add_typed collector
296 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
297+ "Bad value %s for the attribute %s (only %s permitted here)."
298+ (q value) (q "xmlns") (q svg_ns_url)))))
299 | "xmlns:xlink" ->
300 if value <> "http://www.w3.org/1999/xlink" then
301 Message_collector.add_typed collector
302 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
303+ "Bad value %s for the attribute %s (only %s permitted here)."
304+ (q value) (q "xmlns:link") (q "http://www.w3.org/1999/xlink")))))
305 | _ when String.starts_with ~prefix:"xmlns:" attr && attr <> "xmlns:xlink" ->
306 (* Other xmlns declarations are not allowed in HTML-embedded SVG *)
307 Message_collector.add_typed collector
···326 let context = String.sub d !context_start (ctx_end - !context_start) in
327 Message_collector.add_typed collector
328 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
329+ "Bad value %s for attribute %s on element %s: Bad SVG path data: Expected command but found %s (context: %s)."
330+ (q d) (q "d") (q element) (q "#") (q context)))));
331 i := len (* Stop processing *)
332 | _ ->
333 incr i
···346 let context = String.sub d ctx_start (flag_end - ctx_start) in
347 Message_collector.add_typed collector
348 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
349+ "Bad value %s for attribute %s on element %s: Bad SVG path data: Expected %s or %s for large-arc-flag for %s command but found %s instead (context: %s)."
350+ (q d) (q "d") (q element) (q "0") (q "1") (q "a") (q flag) (q context)))))
351 end
352 with Not_found -> ()
353
+73-70
lib/htmlrw_check/specialized/url_checker.ml
···1(** URL validation checker for href, src, action, and other URL attributes. *)
20003(** Attributes that contain URLs and should be validated.
4 Note: srcset uses special microsyntax, not validated as URL here.
5 Note: input[value] is only checked for type="url", handled specially below. *)
···44let validate_ipv6_host host url attr_name element_name =
45 (* Host should be in format [xxxx:...] *)
46 if String.length host < 3 then
47- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character."
48- url attr_name element_name)
49 else begin
50 (* Check if all characters are valid IPv6 chars *)
51 let invalid_char = String.exists (fun c -> not (is_valid_ipv6_char c)) host in
52 if invalid_char then
53- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character."
54- url attr_name element_name)
55 else
56 None
57 end
···239 let _ = contains_invalid_unicode decoded in
240 None
241 with Exit ->
242- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: A label or domain name contains disallowed characters.."
243- url attr_name element_name)
244245(** Check if string contains a character (checking both ASCII and UTF-8 fullwidth variants). *)
246let contains_percent_char s =
···258 let decoded = percent_decode host in
259 (* Check for % character in decoded host - this catches fullwidth percent signs etc. *)
260 if contains_percent_char decoded then
261- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c%%\xe2\x80\x9d is not allowed."
262- url attr_name element_name)
263 else
264 None
265···275 ) port;
276 match !invalid_char with
277 | Some c ->
278- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in port: \xe2\x80\x9c%c\xe2\x80\x9d is not allowed."
279- url attr_name element_name c)
280 | None ->
281 (* Check port range *)
282 try
283 let port_num = int_of_string port in
284 if port_num >= 65536 then
285- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Port number must be less than 65536."
286- url attr_name element_name)
287 else
288 None
289 with _ -> None
···297 (* Check for empty host *)
298 let requires_host = List.mem scheme special_schemes in
299 if host = "" && requires_host && scheme <> "file" then
300- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: empty host."
301- url attr_name element_name)
302 else
303 (* Check for invalid chars *)
304 let invalid_char =
···306 in
307 match invalid_char with
308 | Some c ->
309- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c%c\xe2\x80\x9d is not allowed."
310- url attr_name element_name c)
311 | None ->
312 (* Check for | *)
313 if String.contains host '|' && not (is_pipe_allowed_in_host url host) then
314- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c|\xe2\x80\x9d is not allowed."
315- url attr_name element_name)
316 (* Check for backslash in host *)
317 else if String.contains host '\\' then
318- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c\\\xe2\x80\x9d is not allowed."
319- url attr_name element_name)
320 (* Check for space in host *)
321 else if String.contains host ' ' then
322- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: space is not allowed."
323- url attr_name element_name)
324 (* Check for invalid percent-encoded Unicode in host *)
325 else begin
326 match check_invalid_percent_encoded_unicode host url attr_name element_name with
···342 let colon_pos = String.index url ':' in
343 let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
344 if String.length after_colon < 2 || after_colon.[0] <> '/' || after_colon.[1] <> '/' then
345- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Expected a slash (\"/\")."
346- url attr_name element_name)
347 else
348 None
349 end else
···357 | Some scheme ->
358 if scheme = "data" && String.contains url '#' then
359 let url_type = if is_absolute_url then "Bad absolute URL:" else "Bad URL:" in
360- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s Fragment is not allowed for data: URIs according to RFC 2397."
361- url attr_name element_name url_type)
362 else
363 None
364···375 let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
376 (* data: URLs should NOT start with / - format is data:[mediatype][;base64],data *)
377 if String.length after_colon > 0 && after_colon.[0] = '/' then
378- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Expected a token character or a semicolon but saw \xe2\x80\x9c/\xe2\x80\x9d instead."
379- url attr_name element_name)
380 else
381 None
382 end else
···393 let scheme_data = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
394 (* Check for tab in scheme data *)
395 if String.contains scheme_data '\t' then
396- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: tab is not allowed."
397- url attr_name element_name)
398 (* Check for newline in scheme data *)
399 else if String.contains scheme_data '\n' then
400- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: line break is not allowed."
401- url attr_name element_name)
402 (* Check for carriage return in scheme data *)
403 else if String.contains scheme_data '\r' then
404- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: line break is not allowed."
405- url attr_name element_name)
406 (* Check for space in scheme data *)
407 else if String.contains scheme_data ' ' then
408- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: space is not allowed."
409- url attr_name element_name)
410 else
411 None
412 end else
···449 let path = remove_query_fragment raw_path in
450 (* Check for space in path (not allowed) *)
451 if String.contains path ' ' then
452- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: space is not allowed."
453- url attr_name element_name)
454 (* Check for pipe in path (not allowed except in file:// authority) *)
455 else if String.contains path '|' then
456- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: \xe2\x80\x9c|\xe2\x80\x9d is not allowed."
457- url attr_name element_name)
458 (* Check for unescaped square brackets in path *)
459 else if String.contains path '[' then
460- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: \xe2\x80\x9c[\xe2\x80\x9d is not allowed."
461- url attr_name element_name)
462 else
463 None
464···470 | None ->
471 (* Check for square brackets at start (not IPv6 - that requires scheme) *)
472 if String.length url > 0 && url.[0] = '[' then
473- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: \xe2\x80\x9c[\xe2\x80\x9d is not allowed."
474- url attr_name element_name)
475 else
476 None
477···489 if i + 2 < len && is_hex_digit url.[i + 1] && is_hex_digit url.[i + 2] then
490 find_bare_percent (i + 3) (* Valid percent encoding, continue *)
491 else
492- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Percentage (\xe2\x80\x9c%%\xe2\x80\x9d) is not followed by two hexadecimal digits."
493- url attr_name element_name)
494 end else
495 find_bare_percent (i + 1)
496 in
···511 let query = String.sub url (query_start + 1) (query_end - query_start - 1) in
512 (* Check for unescaped space in query *)
513 if String.contains query ' ' then
514- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in query: space is not allowed."
515- url attr_name element_name)
516 else
517 None
518 with Not_found -> None (* No query string *)
···524 let fragment = String.sub url (fragment_start + 1) (String.length url - fragment_start - 1) in
525 (* Check for backslash in fragment *)
526 if String.contains fragment '\\' then
527- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in fragment: \xe2\x80\x9c\\\xe2\x80\x9d is not allowed."
528- url attr_name element_name)
529 (* Check for second hash in fragment *)
530 else if String.contains fragment '#' then
531- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in fragment: \xe2\x80\x9c#\xe2\x80\x9d is not allowed."
532- url attr_name element_name)
533 (* Check for space in fragment *)
534 else if String.contains fragment ' ' then
535- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in fragment: space is not allowed."
536- url attr_name element_name)
537 else
538 None
539 with Not_found -> None (* No fragment *)
···572 let userinfo = String.sub authority 0 at in
573 (* Check for @ in userinfo (should be percent-encoded) *)
574 if String.contains userinfo '@' then
575- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: User or password contains an at symbol (\xe2\x80\x9c@\xe2\x80\x9d) not percent-encoded."
576- url attr_name element_name)
577 (* Check for space *)
578 else if String.contains userinfo ' ' then
579- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in user or password: space is not allowed."
580- url attr_name element_name)
581 else begin
582 (* Check for non-ASCII characters (like emoji) using UTF-8 decoding *)
583 let find_non_ascii_char userinfo =
···600 in
601 match find_non_ascii_char userinfo with
602 | Some bad_char ->
603- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in user or password: \xe2\x80\x9c%s\xe2\x80\x9d is not allowed."
604- url attr_name element_name bad_char)
605 | None ->
606 (* Check for other invalid chars *)
607 let invalid = List.find_opt (fun c -> String.contains userinfo c) invalid_userinfo_chars in
608 match invalid with
609 | Some c ->
610- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in user or password: \xe2\x80\x9c%c\xe2\x80\x9d is not allowed."
611- url attr_name element_name c)
612 | None -> None
613 end
614 with _ -> None
···634 let attr_lower = String.lowercase_ascii attr_name in
635 if List.mem attr_lower must_be_non_empty ||
636 List.mem (name_lower, attr_lower) must_be_non_empty_combinations then
637- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Must be non-empty."
638- original_url attr_name element_name)
639 else
640 None
641 end
···647 let last = original_url.[String.length original_url - 1] in
648 last = ' ' || last = '\t' in
649 if has_leading || has_trailing then
650- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character: leading/trailing ASCII whitespace."
651- original_url attr_name element_name)
652 else None
653 (* Check scheme data for non-special schemes FIRST - handles tab/newline/CR in scheme data *)
654 else begin
···657 | None ->
658 (* Check for newlines/tabs in special scheme URLs *)
659 if String.contains url '\n' || String.contains url '\r' || String.contains url '\t' then
660- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Tab, new line or carriage return found."
661- url attr_name element_name)
662 else begin
663 (* Check for relative URL issues first *)
664 match check_relative_url url attr_name element_name with
···697698 (* Check for backslash AFTER special scheme check *)
699 if String.contains url '\\' then
700- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Backslash (\"\\\") used as path segment delimiter."
701- url attr_name element_name)
702 else
703704 (* Check path segment for illegal characters *)
···1(** URL validation checker for href, src, action, and other URL attributes. *)
23+(** Quote helper for consistent message formatting. *)
4+let q = Error_code.q
5+6(** Attributes that contain URLs and should be validated.
7 Note: srcset uses special microsyntax, not validated as URL here.
8 Note: input[value] is only checked for type="url", handled specially below. *)
···47let validate_ipv6_host host url attr_name element_name =
48 (* Host should be in format [xxxx:...] *)
49 if String.length host < 3 then
50+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character."
51+ (q url) (q attr_name) (q element_name))
52 else begin
53 (* Check if all characters are valid IPv6 chars *)
54 let invalid_char = String.exists (fun c -> not (is_valid_ipv6_char c)) host in
55 if invalid_char then
56+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character."
57+ (q url) (q attr_name) (q element_name))
58 else
59 None
60 end
···242 let _ = contains_invalid_unicode decoded in
243 None
244 with Exit ->
245+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: A label or domain name contains disallowed characters.."
246+ (q url) (q attr_name) (q element_name))
247248(** Check if string contains a character (checking both ASCII and UTF-8 fullwidth variants). *)
249let contains_percent_char s =
···261 let decoded = percent_decode host in
262 (* Check for % character in decoded host - this catches fullwidth percent signs etc. *)
263 if contains_percent_char decoded then
264+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
265+ (q url) (q attr_name) (q element_name) (q "%"))
266 else
267 None
268···278 ) port;
279 match !invalid_char with
280 | Some c ->
281+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in port: %s is not allowed."
282+ (q url) (q attr_name) (q element_name) (q (String.make 1 c)))
283 | None ->
284 (* Check port range *)
285 try
286 let port_num = int_of_string port in
287 if port_num >= 65536 then
288+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Port number must be less than 65536."
289+ (q url) (q attr_name) (q element_name))
290 else
291 None
292 with _ -> None
···300 (* Check for empty host *)
301 let requires_host = List.mem scheme special_schemes in
302 if host = "" && requires_host && scheme <> "file" then
303+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: empty host."
304+ (q url) (q attr_name) (q element_name))
305 else
306 (* Check for invalid chars *)
307 let invalid_char =
···309 in
310 match invalid_char with
311 | Some c ->
312+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
313+ (q url) (q attr_name) (q element_name) (q (String.make 1 c)))
314 | None ->
315 (* Check for | *)
316 if String.contains host '|' && not (is_pipe_allowed_in_host url host) then
317+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
318+ (q url) (q attr_name) (q element_name) (q "|"))
319 (* Check for backslash in host *)
320 else if String.contains host '\\' then
321+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
322+ (q url) (q attr_name) (q element_name) (q "\\"))
323 (* Check for space in host *)
324 else if String.contains host ' ' then
325+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: space is not allowed."
326+ (q url) (q attr_name) (q element_name))
327 (* Check for invalid percent-encoded Unicode in host *)
328 else begin
329 match check_invalid_percent_encoded_unicode host url attr_name element_name with
···345 let colon_pos = String.index url ':' in
346 let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
347 if String.length after_colon < 2 || after_colon.[0] <> '/' || after_colon.[1] <> '/' then
348+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Expected a slash (\"/\")."
349+ (q url) (q attr_name) (q element_name))
350 else
351 None
352 end else
···360 | Some scheme ->
361 if scheme = "data" && String.contains url '#' then
362 let url_type = if is_absolute_url then "Bad absolute URL:" else "Bad URL:" in
363+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: %s Fragment is not allowed for data: URIs according to RFC 2397."
364+ (q url) (q attr_name) (q element_name) url_type)
365 else
366 None
367···378 let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
379 (* data: URLs should NOT start with / - format is data:[mediatype][;base64],data *)
380 if String.length after_colon > 0 && after_colon.[0] = '/' then
381+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Expected a token character or a semicolon but saw %s instead."
382+ (q url) (q attr_name) (q element_name) (q "/"))
383 else
384 None
385 end else
···396 let scheme_data = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
397 (* Check for tab in scheme data *)
398 if String.contains scheme_data '\t' then
399+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: tab is not allowed."
400+ (q url) (q attr_name) (q element_name))
401 (* Check for newline in scheme data *)
402 else if String.contains scheme_data '\n' then
403+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: line break is not allowed."
404+ (q url) (q attr_name) (q element_name))
405 (* Check for carriage return in scheme data *)
406 else if String.contains scheme_data '\r' then
407+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: line break is not allowed."
408+ (q url) (q attr_name) (q element_name))
409 (* Check for space in scheme data *)
410 else if String.contains scheme_data ' ' then
411+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: space is not allowed."
412+ (q url) (q attr_name) (q element_name))
413 else
414 None
415 end else
···452 let path = remove_query_fragment raw_path in
453 (* Check for space in path (not allowed) *)
454 if String.contains path ' ' then
455+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: space is not allowed."
456+ (q url) (q attr_name) (q element_name))
457 (* Check for pipe in path (not allowed except in file:// authority) *)
458 else if String.contains path '|' then
459+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: %s is not allowed."
460+ (q url) (q attr_name) (q element_name) (q "|"))
461 (* Check for unescaped square brackets in path *)
462 else if String.contains path '[' then
463+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: %s is not allowed."
464+ (q url) (q attr_name) (q element_name) (q "["))
465 else
466 None
467···473 | None ->
474 (* Check for square brackets at start (not IPv6 - that requires scheme) *)
475 if String.length url > 0 && url.[0] = '[' then
476+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: %s is not allowed."
477+ (q url) (q attr_name) (q element_name) (q "["))
478 else
479 None
480···492 if i + 2 < len && is_hex_digit url.[i + 1] && is_hex_digit url.[i + 2] then
493 find_bare_percent (i + 3) (* Valid percent encoding, continue *)
494 else
495+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Percentage (%s) is not followed by two hexadecimal digits."
496+ (q url) (q attr_name) (q element_name) (q "%"))
497 end else
498 find_bare_percent (i + 1)
499 in
···514 let query = String.sub url (query_start + 1) (query_end - query_start - 1) in
515 (* Check for unescaped space in query *)
516 if String.contains query ' ' then
517+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in query: space is not allowed."
518+ (q url) (q attr_name) (q element_name))
519 else
520 None
521 with Not_found -> None (* No query string *)
···527 let fragment = String.sub url (fragment_start + 1) (String.length url - fragment_start - 1) in
528 (* Check for backslash in fragment *)
529 if String.contains fragment '\\' then
530+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in fragment: %s is not allowed."
531+ (q url) (q attr_name) (q element_name) (q "\\"))
532 (* Check for second hash in fragment *)
533 else if String.contains fragment '#' then
534+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in fragment: %s is not allowed."
535+ (q url) (q attr_name) (q element_name) (q "#"))
536 (* Check for space in fragment *)
537 else if String.contains fragment ' ' then
538+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in fragment: space is not allowed."
539+ (q url) (q attr_name) (q element_name))
540 else
541 None
542 with Not_found -> None (* No fragment *)
···575 let userinfo = String.sub authority 0 at in
576 (* Check for @ in userinfo (should be percent-encoded) *)
577 if String.contains userinfo '@' then
578+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: User or password contains an at symbol (%s) not percent-encoded."
579+ (q url) (q attr_name) (q element_name) (q "@"))
580 (* Check for space *)
581 else if String.contains userinfo ' ' then
582+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in user or password: space is not allowed."
583+ (q url) (q attr_name) (q element_name))
584 else begin
585 (* Check for non-ASCII characters (like emoji) using UTF-8 decoding *)
586 let find_non_ascii_char userinfo =
···603 in
604 match find_non_ascii_char userinfo with
605 | Some bad_char ->
606+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in user or password: %s is not allowed."
607+ (q url) (q attr_name) (q element_name) (q bad_char))
608 | None ->
609 (* Check for other invalid chars *)
610 let invalid = List.find_opt (fun c -> String.contains userinfo c) invalid_userinfo_chars in
611 match invalid with
612 | Some c ->
613+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in user or password: %s is not allowed."
614+ (q url) (q attr_name) (q element_name) (q (String.make 1 c)))
615 | None -> None
616 end
617 with _ -> None
···637 let attr_lower = String.lowercase_ascii attr_name in
638 if List.mem attr_lower must_be_non_empty ||
639 List.mem (name_lower, attr_lower) must_be_non_empty_combinations then
640+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Must be non-empty."
641+ (q original_url) (q attr_name) (q element_name))
642 else
643 None
644 end
···650 let last = original_url.[String.length original_url - 1] in
651 last = ' ' || last = '\t' in
652 if has_leading || has_trailing then
653+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character: leading/trailing ASCII whitespace."
654+ (q original_url) (q attr_name) (q element_name))
655 else None
656 (* Check scheme data for non-special schemes FIRST - handles tab/newline/CR in scheme data *)
657 else begin
···660 | None ->
661 (* Check for newlines/tabs in special scheme URLs *)
662 if String.contains url '\n' || String.contains url '\r' || String.contains url '\t' then
663+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Tab, new line or carriage return found."
664+ (q url) (q attr_name) (q element_name))
665 else begin
666 (* Check for relative URL issues first *)
667 match check_relative_url url attr_name element_name with
···700701 (* Check for backslash AFTER special scheme check *)
702 if String.contains url '\\' then
703+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Backslash (\"\\\") used as path segment delimiter."
704+ (q url) (q attr_name) (q element_name))
705 else
706707 (* Check path segment for illegal characters *)