···11(** Helper functions for language tag validation *)
2233+let q = Error_code.q
44+35let is_lower_alpha c = c >= 'a' && c <= 'z'
46let is_upper_alpha c = c >= 'A' && c <= 'Z'
57let is_alpha c = is_lower_alpha c || is_upper_alpha c
···123125 if is_valid_extlang first_lower second_lower then
124126 Ok ()
125127 else
126126- Error (Printf.sprintf "Bad extlang subtag \xe2\x80\x9c%s\xe2\x80\x9d" second_lower)
128128+ Error (Printf.sprintf "Bad extlang subtag %s" (q second_lower))
127129 else
128130 Ok () (* Not an extlang pattern, continue *)
129131 | [] -> Ok ())
+8-6
lib/htmlrw_check/parse_error_bridge.ml
···33 SPDX-License-Identifier: MIT
44 ---------------------------------------------------------------------------*)
5566+let q = Error_code.q
77+68(** Generate human-readable message for a parse error code *)
79let message_of_parse_error code =
810 let code_str = Html5rw.Parse_error_code.to_string code in
···5759 let cp = int_of_string ("0x" ^ cp_str) in
5860 Printf.sprintf "Character reference expands to a surrogate (U+%04x)." cp
5961 else if s = "no-p-element-in-scope" then
6060- "No \xe2\x80\x9cp\xe2\x80\x9d element in scope but a \xe2\x80\x9cp\xe2\x80\x9d end tag seen."
6262+ Printf.sprintf "No %s element in scope but a %s end tag seen." (q "p") (q "p")
6163 else if s = "end-tag-p-implied-but-open-elements" then
6262- "End tag \xe2\x80\x9cp\xe2\x80\x9d implied, but there were open elements."
6464+ Printf.sprintf "End tag %s implied, but there were open elements." (q "p")
6365 else if s = "end-tag-br" then
6464- "End tag \xe2\x80\x9cbr\xe2\x80\x9d."
6666+ Printf.sprintf "End tag %s." (q "br")
6567 else if s = "expected-closing-tag-but-got-eof" then
6668 "End of file seen and there were open elements."
6769 else if String.starts_with ~prefix:"bad-start-tag-in-head-noscri" s then
6870 let colon_pos = String.index s ':' in
6971 let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
7070- Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element
7272+ Printf.sprintf "Bad start tag in %s in %s in %s." (q element) (q "noscript") (q "head")
7173 else if String.starts_with ~prefix:"unexpected-end-tag:" s then
7274 let element = String.sub s 19 (String.length s - 19) in
7373- Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element
7575+ Printf.sprintf "Stray end tag %s." (q element)
7476 else if String.starts_with ~prefix:"start-tag-in-table:" s then
7577 let tag = String.sub s 19 (String.length s - 19) in
7676- Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag
7878+ Printf.sprintf "Start tag %s seen in %s." (q tag) (q "table")
7779 else
7880 Printf.sprintf "Parse error: %s" s
7981 with _ -> Printf.sprintf "Parse error: %s" s)
+4-2
lib/htmlrw_check/semantic/obsolete_checker.ml
···11+let q = Error_code.q
22+13(** Obsolete elements map: element name -> suggestion message *)
24let obsolete_elements =
35 let tbl = Hashtbl.create 32 in
···131133 "Use the HTTP OPTIONS feature instead.";
132134133135 register "name" ["a"]
134134- "Consider putting an \xe2\x80\x9cid\xe2\x80\x9d attribute on the nearest container instead.";
136136+ (Printf.sprintf "Consider putting an %s attribute on the nearest container instead." (q "id"));
135137136138 register "name" ["embed"; "img"; "option"]
137137- "Use the \xe2\x80\x9cid\xe2\x80\x9d attribute instead.";
139139+ (Printf.sprintf "Use the %s attribute instead." (q "id"));
138140139141 register "nohref" ["area"]
140142 "Omitting the \"href\" attribute is sufficient.";
···11(** Required attribute checker implementation. *)
2233+let q = Error_code.q
44+35type state = {
46 mutable _in_figure : bool;
57 (** Track if we're inside a <figure> element (alt is more critical there) *)
···8183 in
82848385 if not valid then
8484- let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
8586 Message_collector.add_typed collector
8687 (`Generic (Printf.sprintf "A %s element must have either a %s attribute, a %s attribute with a %s attribute, or an %s attribute with a %s attribute."
8788 (q "meta") (q "charset") (q "name")
···122123 let value_lower = String.lowercase_ascii value in
123124 (* Valid values: empty string, auto, manual, hint *)
124125 if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then
125125- let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
126126 Message_collector.add_typed collector
127127 (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s."
128128 (q value) (q "popover") (q element_name)))))
···141141 let value = float_of_string value_str in
142142 let min_val = float_of_string min_str in
143143 if min_val > value then
144144- let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
145144 Message_collector.add_typed collector
146145 (`Generic (Printf.sprintf "The value of the %s attribute must be less than or equal to the value of the %s attribute."
147146 (q "min") (q "value")))
···161160 | Some max_str -> (try float_of_string max_str with _ -> 1.0)
162161 in
163162 if value > max_val then
164164- let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
165163 (* Check which message to use based on whether max is present *)
166164 if Attr_utils.has_attr "max" attrs then
167165 Message_collector.add_typed collector
+1-1
lib/htmlrw_check/specialized/aria_checker.ml
···586586 if aria_checked <> None then
587587 Message_collector.add_typed collector
588588 (`Aria (`Must_not_use (`Attr "aria-checked", `Elem "input",
589589- `Condition "a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9ccheckbox\xe2\x80\x9d")))
589589+ `Condition (Printf.sprintf "a %s attribute whose value is %s" (q "type") (q "checkbox")))))
590590 | _ -> ()
591591 end;
592592
···11(** Attribute restrictions checker - validates that certain attributes
22 are not used on elements where they're not allowed. *)
3344+let q = Error_code.q
55+46(** List of (element, [disallowed attributes]) pairs for HTML elements. *)
57let disallowed_attrs_html = [
68 (* Elements that cannot have href attribute (RDFa misuses) *)
···174176 if attr_value = "#" then
175177 Message_collector.add_typed collector
176178 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
177177- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad hash-name reference: A hash-name reference must have at least one character after \xe2\x80\x9c#\xe2\x80\x9d."
178178- attr_value attr_name name))))
179179+ "Bad value %s for attribute %s on element %s: Bad hash-name reference: A hash-name reference must have at least one character after %s."
180180+ (q attr_value) (q attr_name) (q name) (q "#")))))
179181 end
180182 ) attrs
181183 end;
···190192 | Error msg ->
191193 Message_collector.add_typed collector
192194 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
193193- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: %s"
194194- attr_value attr_name name msg))))
195195+ "Bad value %s for attribute %s on element %s: Bad MIME type: %s"
196196+ (q attr_value) (q attr_name) (q name) msg))))
195197 end
196198 ) attrs
197199 end;
···213215 (* Determine specific error message *)
214216 let error_msg =
215217 if String.length attr_value = 0 then
216216- Printf.sprintf "Bad value \xe2\x80\x9c\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: The empty string is not a valid non-negative integer."
217217- attr_name name
218218+ Printf.sprintf "Bad value %s for attribute %s on element %s: The empty string is not a valid non-negative integer."
219219+ (q "") (q attr_name) (q name)
218220 else if String.contains attr_value '%' then
219219- Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit but saw \xe2\x80\x9c%%\xe2\x80\x9d instead."
220220- attr_value attr_name name
221221+ Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit but saw %s instead."
222222+ (q attr_value) (q attr_name) (q name) (q "%")
221223 else if String.length attr_value > 0 && attr_value.[0] = '-' then
222222- Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit but saw \xe2\x80\x9c-\xe2\x80\x9d instead."
223223- attr_value attr_name name
224224+ Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit but saw %s instead."
225225+ (q attr_value) (q attr_name) (q name) (q "-")
224226 else
225227 (* Find first non-digit character *)
226228 let bad_char =
···234236 in
235237 match bad_char with
236238 | Some c ->
237237- Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit but saw \xe2\x80\x9c%c\xe2\x80\x9d instead."
238238- attr_value attr_name name c
239239+ Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit but saw %s instead."
240240+ (q attr_value) (q attr_name) (q name) (q (String.make 1 c))
239241 | None ->
240240- Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit."
241241- attr_value attr_name name
242242+ Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit."
243243+ (q attr_value) (q attr_name) (q name)
242244 in
243245 Message_collector.add_typed collector
244246 (`Attr (`Bad_value_generic (`Message error_msg)))
···377379 if count_codepoints key > 1 then
378380 Message_collector.add_typed collector
379381 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
380380- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad key label list: Key label has multiple characters. Each key label must be a single character."
381381- attr_value attr_name name))))
382382+ "Bad value %s for attribute %s on element %s: Bad key label list: Key label has multiple characters. Each key label must be a single character."
383383+ (q attr_value) (q attr_name) (q name)))))
382384 ) keys;
383385 (* Check for duplicate keys *)
384386 let rec find_duplicates seen = function
···387389 if List.mem k seen then
388390 Message_collector.add_typed collector
389391 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
390390- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad key label list: Duplicate key label. Each key label must be unique."
391391- attr_value attr_name name))))
392392+ "Bad value %s for attribute %s on element %s: Bad key label list: Duplicate key label. Each key label must be unique."
393393+ (q attr_value) (q attr_name) (q name)))))
392394 else
393395 find_duplicates (k :: seen) rest
394396 in
···405407 if has_command && has_aria_expanded then
406408 Message_collector.add_typed collector
407409 (`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
408408- `Condition "a \xe2\x80\x9ccommand\xe2\x80\x9d attribute")));
410410+ `Condition (Printf.sprintf "a %s attribute" (q "command")))));
409411410412 if has_popovertarget && has_aria_expanded then
411413 Message_collector.add_typed collector
412414 (`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
413413- `Condition "a \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute")))
415415+ `Condition (Printf.sprintf "a %s attribute" (q "popovertarget")))))
414416 end;
415417416418 (* Note: data-* uppercase check requires XML parsing which preserves case.
···432434 | Error msg ->
433435 Message_collector.add_typed collector
434436 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
435435- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad media query: %s"
436436- attr_value attr_name name msg))))
437437+ "Bad value %s for attribute %s on element %s: Bad media query: %s"
438438+ (q attr_value) (q attr_name) (q name) msg))))
437439 end
438440 end
439441 ) attrs
+18-16
lib/htmlrw_check/specialized/datetime_checker.ml
···11(** Datetime attribute validation checker *)
2233+let q = Error_code.q
44+35(** Elements that have datetime attribute *)
46let datetime_elements = ["del"; "ins"; "time"]
57···346348 if value <> String.trim value then begin
347349 let tz_msg = "Bad datetime with timezone: The literal did not satisfy the datetime with timezone format." in
348350 let date_msg = "Bad date: The literal did not satisfy the date format." in
349349- Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
350350- value attr_name element_name tz_msg date_msg)
351351+ Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
352352+ (q value) (q attr_name) (q element_name) tz_msg date_msg)
351353 end
352354 else
353355 (* Try datetime with timezone first *)
···355357 | DtOk -> Ok (* Valid datetime with timezone *)
356358 | DtWarning w ->
357359 (* Valid but with warning - format matches Nu validator *)
358358- Warning (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad datetime with timezone: %s Bad date: The literal did not satisfy the date format."
359359- value attr_name element_name w)
360360+ Warning (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad datetime with timezone: %s Bad date: The literal did not satisfy the date format."
361361+ (q value) (q attr_name) (q element_name) w)
360362 | DtError tz_error ->
361363 (* Try just date - valid for all elements *)
362364 match validate_date value with
···365367 if has_suspicious_year value || has_old_year value then begin
366368 let date_msg = "Bad date: Year may be mistyped." in
367369 let tz_msg = Printf.sprintf "Bad datetime with timezone: %s." tz_error in
368368- Warning (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
369369- value attr_name element_name date_msg tz_msg)
370370+ Warning (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
371371+ (q value) (q attr_name) (q element_name) date_msg tz_msg)
370372 end else
371373 Ok (* Valid date with normal year *)
372374 | (false, date_error) ->
···394396 | (true, _) -> Ok (* Valid duration P... *)
395397 | (false, _) ->
396398 (* Use simplified message for time element matching Nu validator format *)
397397- Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad time-datetime: The literal did not satisfy the time-datetime format."
398398- value attr_name element_name)
399399+ Error (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad time-datetime: The literal did not satisfy the time-datetime format."
400400+ (q value) (q attr_name) (q element_name))
399401 end
400402 else begin
401403 (* del/ins only allow date or datetime-with-timezone *)
···426428 (* Datetime first for: generic tz, tz hours error, time minute/hour errors, year errors
427429 Date first for: "Month cannot be less than" date error, tz minutes error, fraction error *)
428430 if is_month_less_than_error then
429429- Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
430430- value attr_name element_name date_msg tz_msg)
431431+ Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
432432+ (q value) (q attr_name) (q element_name) date_msg tz_msg)
431433 else if is_tz_minutes_error || is_fraction_error then
432432- Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
433433- value attr_name element_name date_msg tz_msg)
434434+ Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
435435+ (q value) (q attr_name) (q element_name) date_msg tz_msg)
434436 else if is_tz_hours_error || is_time_minute_or_hour_error || is_generic_tz then
435435- Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
436436- value attr_name element_name tz_msg date_msg)
437437+ Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
438438+ (q value) (q attr_name) (q element_name) tz_msg date_msg)
437439 else
438438- Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
439439- value attr_name element_name tz_msg date_msg)
440440+ Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
441441+ (q value) (q attr_name) (q element_name) tz_msg date_msg)
440442 end
441443442444(** Checker state *)
+9-6
lib/htmlrw_check/specialized/microdata_checker.ml
···2233 Validates HTML5 microdata attributes. *)
4455+(** Quote helper for consistent message formatting. *)
66+let q = Error_code.q
77+58(** Information about an itemscope. *)
69type item_scope = {
710 element : string;
···7477 let url_trimmed = String.trim url in
7578 if String.length url_trimmed = 0 then
7679 Some (Printf.sprintf
7777- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: Must be non-empty."
7878- original_value attr_name element)
8080+ "Bad value %s for attribute %s on element %s: Bad absolute URL: Must be non-empty."
8181+ (q original_value) (q attr_name) (q element))
7982 else
8083 (* First check if it has a scheme (required for absolute URL) *)
8184 match Url_checker.extract_scheme url_trimmed with
8285 | None ->
8386 Some (Printf.sprintf
8484- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: The string \xe2\x80\x9c%s\xe2\x80\x9d is not an absolute URL."
8585- original_value attr_name element url)
8787+ "Bad value %s for attribute %s on element %s: Bad absolute URL: The string %s is not an absolute URL."
8888+ (q original_value) (q attr_name) (q element) (q url))
8689 | Some _ ->
8790 (* Has a scheme - do comprehensive URL validation *)
8891 match Url_checker.validate_url url element attr_name with
···9497 (* Escape backslashes in replacement string for Str.global_replace *)
9598 let escaped_original = Str.global_replace (Str.regexp "\\\\") "\\\\\\\\" original_value in
9699 let error_msg = Str.global_replace
9797- (Str.regexp_string (Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" url))
9898- (Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" escaped_original)
100100+ (Str.regexp_string (Printf.sprintf "%s for attribute" (q url)))
101101+ (Printf.sprintf "%s for attribute" (q escaped_original))
99102 error_msg in
100103 Some error_msg
101104
+32-30
lib/htmlrw_check/specialized/mime_type_checker.ml
···2233 Validates MIME type values in type attributes. *)
4455+let q = Error_code.q
66+57(** Validate a MIME type value. Returns error message or None. *)
68let validate_mime_type value element attr_name =
79 let len = String.length value in
810 if len = 0 then
911 Some (Printf.sprintf
1010- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Empty value."
1111- value attr_name element)
1212+ "Bad value %s for attribute %s on element %s: Bad MIME type: Empty value."
1313+ (q value) (q attr_name) (q element))
1214 else if value.[len - 1] = ' ' || value.[len - 1] = '\t' then begin
1315 (* Check if this is a semicolon followed by only whitespace *)
1416 let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
···1820 let params_trimmed = String.trim params in
1921 if params_trimmed = "" then
2022 Some (Printf.sprintf
2121- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
2222- value attr_name element)
2323+ "Bad value %s for attribute %s on element %s: Bad MIME type: Semicolon seen but there was no parameter following it."
2424+ (q value) (q attr_name) (q element))
2325 else
2426 Some (Printf.sprintf
2525- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Extraneous trailing whitespace."
2626- value attr_name element)
2727+ "Bad value %s for attribute %s on element %s: Bad MIME type: Extraneous trailing whitespace."
2828+ (q value) (q attr_name) (q element))
2729 | None ->
2830 Some (Printf.sprintf
2929- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Extraneous trailing whitespace."
3030- value attr_name element)
3131+ "Bad value %s for attribute %s on element %s: Bad MIME type: Extraneous trailing whitespace."
3232+ (q value) (q attr_name) (q element))
3133 end
3234 else if len > 0 && (value.[0] = ' ' || value.[0] = '\t') then
3335 Some (Printf.sprintf
3434- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Expected a token character but saw \xe2\x80\x9c \xe2\x80\x9d instead."
3535- value attr_name element)
3636+ "Bad value %s for attribute %s on element %s: Bad MIME type: Expected a token character but saw %s instead."
3737+ (q value) (q attr_name) (q element) (q " "))
3638 else
3739 (* Parse type/subtype *)
3840 let slash_pos = try Some (String.index value '/') with Not_found -> None in
···4345 (match semicolon_pos with
4446 | Some _ ->
4547 Some (Printf.sprintf
4646- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
4747- value attr_name element)
4848+ "Bad value %s for attribute %s on element %s: Bad MIME type: Subtype missing."
4949+ (q value) (q attr_name) (q element))
4850 | None ->
4951 Some (Printf.sprintf
5050- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
5151- value attr_name element))
5252+ "Bad value %s for attribute %s on element %s: Bad MIME type: Subtype missing."
5353+ (q value) (q attr_name) (q element)))
5254 | Some slash_pos ->
5355 (* Check for empty subtype *)
5456 let after_slash = String.sub value (slash_pos + 1) (len - slash_pos - 1) in
···6062 let subtype_trimmed = String.trim subtype in
6163 if subtype_trimmed = "" then
6264 Some (Printf.sprintf
6363- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
6464- value attr_name element)
6565+ "Bad value %s for attribute %s on element %s: Bad MIME type: Subtype missing."
6666+ (q value) (q attr_name) (q element))
6567 else if String.length subtype > 0 && subtype.[String.length subtype - 1] = ' ' then
6668 (* Space before semicolon - also check parameter format *)
6769 let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
···7274 let params_trimmed = String.trim params in
7375 if params_trimmed = "" then
7476 Some (Printf.sprintf
7575- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
7676- value attr_name element)
7777+ "Bad value %s for attribute %s on element %s: Bad MIME type: Semicolon seen but there was no parameter following it."
7878+ (q value) (q attr_name) (q element))
7779 else
7880 (* Check for param_name=value format *)
7981 let eq_pos = try Some (String.index params '=') with Not_found -> None in
8082 (match eq_pos with
8183 | None ->
8284 Some (Printf.sprintf
8383- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
8484- value attr_name element)
8585+ "Bad value %s for attribute %s on element %s: Bad MIME type: Parameter value missing."
8686+ (q value) (q attr_name) (q element))
8587 | Some _ -> None)
8688 | None -> None)
8789 else
···9496 let params_trimmed = String.trim params in
9597 if params_trimmed = "" then
9698 Some (Printf.sprintf
9797- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
9898- value attr_name element)
9999+ "Bad value %s for attribute %s on element %s: Bad MIME type: Semicolon seen but there was no parameter following it."
100100+ (q value) (q attr_name) (q element))
99101 else
100102 (* Check for param_name=value format *)
101103 let eq_pos = try Some (String.index params '=') with Not_found -> None in
102104 (match eq_pos with
103105 | None ->
104106 Some (Printf.sprintf
105105- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
106106- value attr_name element)
107107+ "Bad value %s for attribute %s on element %s: Bad MIME type: Parameter value missing."
108108+ (q value) (q attr_name) (q element))
107109 | Some eq_pos ->
108110 let param_value = String.sub params (eq_pos + 1) (String.length params - eq_pos - 1) in
109111 let param_value_trimmed = String.trim param_value in
110112 if param_value_trimmed = "" then
111113 Some (Printf.sprintf
112112- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
113113- value attr_name element)
114114+ "Bad value %s for attribute %s on element %s: Bad MIME type: Parameter value missing."
115115+ (q value) (q attr_name) (q element))
114116 else if param_value_trimmed.[0] = '"' then
115117 (* Quoted string - check for closing quote *)
116118 let quote_end = try Some (String.index_from param_value_trimmed 1 '"') with
···127129 in
128130 if has_backslash_at_end then
129131 Some (Printf.sprintf
130130- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Unfinished quoted string."
131131- value attr_name element)
132132+ "Bad value %s for attribute %s on element %s: Bad MIME type: Unfinished quoted string."
133133+ (q value) (q attr_name) (q element))
132134 else
133135 Some (Printf.sprintf
134134- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Unfinished quoted string."
135135- value attr_name element))
136136+ "Bad value %s for attribute %s on element %s: Bad MIME type: Unfinished quoted string."
137137+ (q value) (q attr_name) (q element)))
136138 else
137139 None))
138140
+10-8
lib/htmlrw_check/specialized/svg_checker.ml
···2233 Validates SVG elements and attributes according to SVG 1.1/2 specifications. *)
4455+let q = Error_code.q
66+57type font_state = {
68 mutable has_missing_glyph : bool;
79}
···292294 if value <> svg_ns_url then
293295 Message_collector.add_typed collector
294296 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
295295- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for the attribute \xe2\x80\x9cxmlns\xe2\x80\x9d (only \xe2\x80\x9c%s\xe2\x80\x9d permitted here)."
296296- value svg_ns_url))))
297297+ "Bad value %s for the attribute %s (only %s permitted here)."
298298+ (q value) (q "xmlns") (q svg_ns_url)))))
297299 | "xmlns:xlink" ->
298300 if value <> "http://www.w3.org/1999/xlink" then
299301 Message_collector.add_typed collector
300302 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
301301- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for the attribute \xe2\x80\x9cxmlns:link\xe2\x80\x9d (only \xe2\x80\x9chttp://www.w3.org/1999/xlink\xe2\x80\x9d permitted here)."
302302- value))))
303303+ "Bad value %s for the attribute %s (only %s permitted here)."
304304+ (q value) (q "xmlns:link") (q "http://www.w3.org/1999/xlink")))))
303305 | _ when String.starts_with ~prefix:"xmlns:" attr && attr <> "xmlns:xlink" ->
304306 (* Other xmlns declarations are not allowed in HTML-embedded SVG *)
305307 Message_collector.add_typed collector
···324326 let context = String.sub d !context_start (ctx_end - !context_start) in
325327 Message_collector.add_typed collector
326328 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
327327- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9cd\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad SVG path data: Expected command but found \xe2\x80\x9c#\xe2\x80\x9d (context: \xe2\x80\x9c%s\xe2\x80\x9d)."
328328- d element context))));
329329+ "Bad value %s for attribute %s on element %s: Bad SVG path data: Expected command but found %s (context: %s)."
330330+ (q d) (q "d") (q element) (q "#") (q context)))));
329331 i := len (* Stop processing *)
330332 | _ ->
331333 incr i
···344346 let context = String.sub d ctx_start (flag_end - ctx_start) in
345347 Message_collector.add_typed collector
346348 (`Attr (`Bad_value_generic (`Message (Printf.sprintf
347347- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9cd\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad SVG path data: Expected \xe2\x80\x9c0\xe2\x80\x9d or \xe2\x80\x9c1\xe2\x80\x9d for large-arc-flag for \xe2\x80\x9ca\xe2\x80\x9d command but found \xe2\x80\x9c%s\xe2\x80\x9d instead (context: \xe2\x80\x9c%s\xe2\x80\x9d)."
348348- d element flag context))))
349349+ "Bad value %s for attribute %s on element %s: Bad SVG path data: Expected %s or %s for large-arc-flag for %s command but found %s instead (context: %s)."
350350+ (q d) (q "d") (q element) (q "0") (q "1") (q "a") (q flag) (q context)))))
349351 end
350352 with Not_found -> ()
351353
+73-70
lib/htmlrw_check/specialized/url_checker.ml
···11(** URL validation checker for href, src, action, and other URL attributes. *)
2233+(** Quote helper for consistent message formatting. *)
44+let q = Error_code.q
55+36(** Attributes that contain URLs and should be validated.
47 Note: srcset uses special microsyntax, not validated as URL here.
58 Note: input[value] is only checked for type="url", handled specially below. *)
···4447let validate_ipv6_host host url attr_name element_name =
4548 (* Host should be in format [xxxx:...] *)
4649 if String.length host < 3 then
4747- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character."
4848- url attr_name element_name)
5050+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character."
5151+ (q url) (q attr_name) (q element_name))
4952 else begin
5053 (* Check if all characters are valid IPv6 chars *)
5154 let invalid_char = String.exists (fun c -> not (is_valid_ipv6_char c)) host in
5255 if invalid_char then
5353- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character."
5454- url attr_name element_name)
5656+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character."
5757+ (q url) (q attr_name) (q element_name))
5558 else
5659 None
5760 end
···239242 let _ = contains_invalid_unicode decoded in
240243 None
241244 with Exit ->
242242- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: A label or domain name contains disallowed characters.."
243243- url attr_name element_name)
245245+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: A label or domain name contains disallowed characters.."
246246+ (q url) (q attr_name) (q element_name))
244247245248(** Check if string contains a character (checking both ASCII and UTF-8 fullwidth variants). *)
246249let contains_percent_char s =
···258261 let decoded = percent_decode host in
259262 (* Check for % character in decoded host - this catches fullwidth percent signs etc. *)
260263 if contains_percent_char decoded then
261261- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c%%\xe2\x80\x9d is not allowed."
262262- url attr_name element_name)
264264+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
265265+ (q url) (q attr_name) (q element_name) (q "%"))
263266 else
264267 None
265268···275278 ) port;
276279 match !invalid_char with
277280 | Some c ->
278278- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in port: \xe2\x80\x9c%c\xe2\x80\x9d is not allowed."
279279- url attr_name element_name c)
281281+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in port: %s is not allowed."
282282+ (q url) (q attr_name) (q element_name) (q (String.make 1 c)))
280283 | None ->
281284 (* Check port range *)
282285 try
283286 let port_num = int_of_string port in
284287 if port_num >= 65536 then
285285- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Port number must be less than 65536."
286286- url attr_name element_name)
288288+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Port number must be less than 65536."
289289+ (q url) (q attr_name) (q element_name))
287290 else
288291 None
289292 with _ -> None
···297300 (* Check for empty host *)
298301 let requires_host = List.mem scheme special_schemes in
299302 if host = "" && requires_host && scheme <> "file" then
300300- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: empty host."
301301- url attr_name element_name)
303303+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: empty host."
304304+ (q url) (q attr_name) (q element_name))
302305 else
303306 (* Check for invalid chars *)
304307 let invalid_char =
···306309 in
307310 match invalid_char with
308311 | Some c ->
309309- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c%c\xe2\x80\x9d is not allowed."
310310- url attr_name element_name c)
312312+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
313313+ (q url) (q attr_name) (q element_name) (q (String.make 1 c)))
311314 | None ->
312315 (* Check for | *)
313316 if String.contains host '|' && not (is_pipe_allowed_in_host url host) then
314314- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c|\xe2\x80\x9d is not allowed."
315315- url attr_name element_name)
317317+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
318318+ (q url) (q attr_name) (q element_name) (q "|"))
316319 (* Check for backslash in host *)
317320 else if String.contains host '\\' then
318318- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c\\\xe2\x80\x9d is not allowed."
319319- url attr_name element_name)
321321+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
322322+ (q url) (q attr_name) (q element_name) (q "\\"))
320323 (* Check for space in host *)
321324 else if String.contains host ' ' then
322322- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: space is not allowed."
323323- url attr_name element_name)
325325+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: space is not allowed."
326326+ (q url) (q attr_name) (q element_name))
324327 (* Check for invalid percent-encoded Unicode in host *)
325328 else begin
326329 match check_invalid_percent_encoded_unicode host url attr_name element_name with
···342345 let colon_pos = String.index url ':' in
343346 let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
344347 if String.length after_colon < 2 || after_colon.[0] <> '/' || after_colon.[1] <> '/' then
345345- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Expected a slash (\"/\")."
346346- url attr_name element_name)
348348+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Expected a slash (\"/\")."
349349+ (q url) (q attr_name) (q element_name))
347350 else
348351 None
349352 end else
···357360 | Some scheme ->
358361 if scheme = "data" && String.contains url '#' then
359362 let url_type = if is_absolute_url then "Bad absolute URL:" else "Bad URL:" in
360360- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s Fragment is not allowed for data: URIs according to RFC 2397."
361361- url attr_name element_name url_type)
363363+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: %s Fragment is not allowed for data: URIs according to RFC 2397."
364364+ (q url) (q attr_name) (q element_name) url_type)
362365 else
363366 None
364367···375378 let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
376379 (* data: URLs should NOT start with / - format is data:[mediatype][;base64],data *)
377380 if String.length after_colon > 0 && after_colon.[0] = '/' then
378378- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Expected a token character or a semicolon but saw \xe2\x80\x9c/\xe2\x80\x9d instead."
379379- url attr_name element_name)
381381+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Expected a token character or a semicolon but saw %s instead."
382382+ (q url) (q attr_name) (q element_name) (q "/"))
380383 else
381384 None
382385 end else
···393396 let scheme_data = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
394397 (* Check for tab in scheme data *)
395398 if String.contains scheme_data '\t' then
396396- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: tab is not allowed."
397397- url attr_name element_name)
399399+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: tab is not allowed."
400400+ (q url) (q attr_name) (q element_name))
398401 (* Check for newline in scheme data *)
399402 else if String.contains scheme_data '\n' then
400400- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: line break is not allowed."
401401- url attr_name element_name)
403403+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: line break is not allowed."
404404+ (q url) (q attr_name) (q element_name))
402405 (* Check for carriage return in scheme data *)
403406 else if String.contains scheme_data '\r' then
404404- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: line break is not allowed."
405405- url attr_name element_name)
407407+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: line break is not allowed."
408408+ (q url) (q attr_name) (q element_name))
406409 (* Check for space in scheme data *)
407410 else if String.contains scheme_data ' ' then
408408- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: space is not allowed."
409409- url attr_name element_name)
411411+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: space is not allowed."
412412+ (q url) (q attr_name) (q element_name))
410413 else
411414 None
412415 end else
···449452 let path = remove_query_fragment raw_path in
450453 (* Check for space in path (not allowed) *)
451454 if String.contains path ' ' then
452452- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: space is not allowed."
453453- url attr_name element_name)
455455+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: space is not allowed."
456456+ (q url) (q attr_name) (q element_name))
454457 (* Check for pipe in path (not allowed except in file:// authority) *)
455458 else if String.contains path '|' then
456456- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: \xe2\x80\x9c|\xe2\x80\x9d is not allowed."
457457- url attr_name element_name)
459459+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: %s is not allowed."
460460+ (q url) (q attr_name) (q element_name) (q "|"))
458461 (* Check for unescaped square brackets in path *)
459462 else if String.contains path '[' then
460460- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: \xe2\x80\x9c[\xe2\x80\x9d is not allowed."
461461- url attr_name element_name)
463463+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: %s is not allowed."
464464+ (q url) (q attr_name) (q element_name) (q "["))
462465 else
463466 None
464467···470473 | None ->
471474 (* Check for square brackets at start (not IPv6 - that requires scheme) *)
472475 if String.length url > 0 && url.[0] = '[' then
473473- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: \xe2\x80\x9c[\xe2\x80\x9d is not allowed."
474474- url attr_name element_name)
476476+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: %s is not allowed."
477477+ (q url) (q attr_name) (q element_name) (q "["))
475478 else
476479 None
477480···489492 if i + 2 < len && is_hex_digit url.[i + 1] && is_hex_digit url.[i + 2] then
490493 find_bare_percent (i + 3) (* Valid percent encoding, continue *)
491494 else
492492- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Percentage (\xe2\x80\x9c%%\xe2\x80\x9d) is not followed by two hexadecimal digits."
493493- url attr_name element_name)
495495+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Percentage (%s) is not followed by two hexadecimal digits."
496496+ (q url) (q attr_name) (q element_name) (q "%"))
494497 end else
495498 find_bare_percent (i + 1)
496499 in
···511514 let query = String.sub url (query_start + 1) (query_end - query_start - 1) in
512515 (* Check for unescaped space in query *)
513516 if String.contains query ' ' then
514514- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in query: space is not allowed."
515515- url attr_name element_name)
517517+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in query: space is not allowed."
518518+ (q url) (q attr_name) (q element_name))
516519 else
517520 None
518521 with Not_found -> None (* No query string *)
···524527 let fragment = String.sub url (fragment_start + 1) (String.length url - fragment_start - 1) in
525528 (* Check for backslash in fragment *)
526529 if String.contains fragment '\\' then
527527- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in fragment: \xe2\x80\x9c\\\xe2\x80\x9d is not allowed."
528528- url attr_name element_name)
530530+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in fragment: %s is not allowed."
531531+ (q url) (q attr_name) (q element_name) (q "\\"))
529532 (* Check for second hash in fragment *)
530533 else if String.contains fragment '#' then
531531- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in fragment: \xe2\x80\x9c#\xe2\x80\x9d is not allowed."
532532- url attr_name element_name)
534534+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in fragment: %s is not allowed."
535535+ (q url) (q attr_name) (q element_name) (q "#"))
533536 (* Check for space in fragment *)
534537 else if String.contains fragment ' ' then
535535- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in fragment: space is not allowed."
536536- url attr_name element_name)
538538+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in fragment: space is not allowed."
539539+ (q url) (q attr_name) (q element_name))
537540 else
538541 None
539542 with Not_found -> None (* No fragment *)
···572575 let userinfo = String.sub authority 0 at in
573576 (* Check for @ in userinfo (should be percent-encoded) *)
574577 if String.contains userinfo '@' then
575575- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: User or password contains an at symbol (\xe2\x80\x9c@\xe2\x80\x9d) not percent-encoded."
576576- url attr_name element_name)
578578+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: User or password contains an at symbol (%s) not percent-encoded."
579579+ (q url) (q attr_name) (q element_name) (q "@"))
577580 (* Check for space *)
578581 else if String.contains userinfo ' ' then
579579- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in user or password: space is not allowed."
580580- url attr_name element_name)
582582+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in user or password: space is not allowed."
583583+ (q url) (q attr_name) (q element_name))
581584 else begin
582585 (* Check for non-ASCII characters (like emoji) using UTF-8 decoding *)
583586 let find_non_ascii_char userinfo =
···600603 in
601604 match find_non_ascii_char userinfo with
602605 | Some bad_char ->
603603- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in user or password: \xe2\x80\x9c%s\xe2\x80\x9d is not allowed."
604604- url attr_name element_name bad_char)
606606+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in user or password: %s is not allowed."
607607+ (q url) (q attr_name) (q element_name) (q bad_char))
605608 | None ->
606609 (* Check for other invalid chars *)
607610 let invalid = List.find_opt (fun c -> String.contains userinfo c) invalid_userinfo_chars in
608611 match invalid with
609612 | Some c ->
610610- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in user or password: \xe2\x80\x9c%c\xe2\x80\x9d is not allowed."
611611- url attr_name element_name c)
613613+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in user or password: %s is not allowed."
614614+ (q url) (q attr_name) (q element_name) (q (String.make 1 c)))
612615 | None -> None
613616 end
614617 with _ -> None
···634637 let attr_lower = String.lowercase_ascii attr_name in
635638 if List.mem attr_lower must_be_non_empty ||
636639 List.mem (name_lower, attr_lower) must_be_non_empty_combinations then
637637- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Must be non-empty."
638638- original_url attr_name element_name)
640640+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Must be non-empty."
641641+ (q original_url) (q attr_name) (q element_name))
639642 else
640643 None
641644 end
···647650 let last = original_url.[String.length original_url - 1] in
648651 last = ' ' || last = '\t' in
649652 if has_leading || has_trailing then
650650- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character: leading/trailing ASCII whitespace."
651651- original_url attr_name element_name)
653653+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character: leading/trailing ASCII whitespace."
654654+ (q original_url) (q attr_name) (q element_name))
652655 else None
653656 (* Check scheme data for non-special schemes FIRST - handles tab/newline/CR in scheme data *)
654657 else begin
···657660 | None ->
658661 (* Check for newlines/tabs in special scheme URLs *)
659662 if String.contains url '\n' || String.contains url '\r' || String.contains url '\t' then
660660- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Tab, new line or carriage return found."
661661- url attr_name element_name)
663663+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Tab, new line or carriage return found."
664664+ (q url) (q attr_name) (q element_name))
662665 else begin
663666 (* Check for relative URL issues first *)
664667 match check_relative_url url attr_name element_name with
···697700698701 (* Check for backslash AFTER special scheme check *)
699702 if String.contains url '\\' then
700700- Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Backslash (\"\\\") used as path segment delimiter."
701701- url attr_name element_name)
703703+ Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Backslash (\"\\\") used as path segment delimiter."
704704+ (q url) (q attr_name) (q element_name))
702705 else
703706704707 (* Check path segment for illegal characters *)