···11+(** Importmap validation checker.
22+33+ Validates that <script type="importmap"> elements contain valid JSON
44+ and conform to importmap structural requirements. *)
55+66+type state = {
77+ mutable in_importmap : bool;
88+ content : Buffer.t;
99+}
1010+1111+let create () = {
1212+ in_importmap = false;
1313+ content = Buffer.create 256;
1414+}
1515+1616+let reset state =
1717+ state.in_importmap <- false;
1818+ Buffer.clear state.content
1919+2020+(** Simple JSON value representation *)
2121+type json =
2222+ | JNull
2323+ | JBool of bool
2424+ | JNumber of float
2525+ | JString of string
2626+ | JArray of json list
2727+ | JObject of (string * json) list
2828+2929+(** Simple JSON parser *)
3030+let parse_json s_orig =
3131+ let s = String.trim s_orig in
3232+ let len = String.length s in
3333+ if len = 0 then Error "Empty JSON"
3434+ else
3535+ let pos = ref 0 in
3636+3737+ let skip_ws () =
3838+ while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t' || s.[!pos] = '\n' || s.[!pos] = '\r') do
3939+ incr pos
4040+ done
4141+ in
4242+4343+ let peek () = if !pos < len then Some s.[!pos] else None in
4444+ let consume () = let c = s.[!pos] in incr pos; c in
4545+4646+ let rec parse_value () =
4747+ skip_ws ();
4848+ match peek () with
4949+ | None -> Error "Unexpected end of input"
5050+ | Some '{' -> parse_object ()
5151+ | Some '[' -> parse_array ()
5252+ | Some '"' -> parse_string ()
5353+ | Some 't' -> parse_true ()
5454+ | Some 'f' -> parse_false ()
5555+ | Some 'n' -> parse_null ()
5656+ | Some c when c = '-' || (c >= '0' && c <= '9') -> parse_number ()
5757+ | Some _ -> Error "Unexpected character"
5858+5959+ and parse_object () =
6060+ ignore (consume ()); (* consume { *)
6161+ skip_ws ();
6262+ match peek () with
6363+ | Some '}' -> ignore (consume ()); Ok (JObject [])
6464+ | _ ->
6565+ let rec parse_members acc =
6666+ skip_ws ();
6767+ match parse_string () with
6868+ | Error e -> Error e
6969+ | Ok (JString key) ->
7070+ skip_ws ();
7171+ (match peek () with
7272+ | Some ':' ->
7373+ ignore (consume ());
7474+ (match parse_value () with
7575+ | Error e -> Error e
7676+ | Ok value ->
7777+ skip_ws ();
7878+ let acc' = (key, value) :: acc in
7979+ match peek () with
8080+ | Some ',' -> ignore (consume ()); parse_members acc'
8181+ | Some '}' -> ignore (consume ()); Ok (JObject (List.rev acc'))
8282+ | _ -> Error "Expected ',' or '}'")
8383+ | _ -> Error "Expected ':'")
8484+ | Ok _ -> Error "Expected string key"
8585+ in
8686+ parse_members []
8787+8888+ and parse_array () =
8989+ ignore (consume ()); (* consume [ *)
9090+ skip_ws ();
9191+ match peek () with
9292+ | Some ']' -> ignore (consume ()); Ok (JArray [])
9393+ | _ ->
9494+ let rec parse_elements acc =
9595+ match parse_value () with
9696+ | Error e -> Error e
9797+ | Ok value ->
9898+ skip_ws ();
9999+ let acc' = value :: acc in
100100+ match peek () with
101101+ | Some ',' -> ignore (consume ()); parse_elements acc'
102102+ | Some ']' -> ignore (consume ()); Ok (JArray (List.rev acc'))
103103+ | _ -> Error "Expected ',' or ']'"
104104+ in
105105+ parse_elements []
106106+107107+ and parse_string () =
108108+ skip_ws ();
109109+ match peek () with
110110+ | Some '"' ->
111111+ ignore (consume ());
112112+ let buf = Buffer.create 32 in
113113+ let rec read () =
114114+ match peek () with
115115+ | None -> Error "Unterminated string"
116116+ | Some '"' -> ignore (consume ()); Ok (JString (Buffer.contents buf))
117117+ | Some '\\' ->
118118+ ignore (consume ());
119119+ (match peek () with
120120+ | None -> Error "Unterminated escape"
121121+ | Some c -> ignore (consume ()); Buffer.add_char buf c; read ())
122122+ | Some c -> ignore (consume ()); Buffer.add_char buf c; read ()
123123+ in
124124+ read ()
125125+ | _ -> Error "Expected string"
126126+127127+ and parse_number () =
128128+ let start = !pos in
129129+ if peek () = Some '-' then incr pos;
130130+ while !pos < len && s.[!pos] >= '0' && s.[!pos] <= '9' do incr pos done;
131131+ if !pos < len && s.[!pos] = '.' then begin
132132+ incr pos;
133133+ while !pos < len && s.[!pos] >= '0' && s.[!pos] <= '9' do incr pos done
134134+ end;
135135+ if !pos < len && (s.[!pos] = 'e' || s.[!pos] = 'E') then begin
136136+ incr pos;
137137+ if !pos < len && (s.[!pos] = '+' || s.[!pos] = '-') then incr pos;
138138+ while !pos < len && s.[!pos] >= '0' && s.[!pos] <= '9' do incr pos done
139139+ end;
140140+ let num_str = String.sub s start (!pos - start) in
141141+ match float_of_string_opt num_str with
142142+ | Some f -> Ok (JNumber f)
143143+ | None -> Error "Invalid number"
144144+145145+ and parse_true () =
146146+ if !pos + 4 <= len && String.sub s !pos 4 = "true" then
147147+ (pos := !pos + 4; Ok (JBool true))
148148+ else Error "Expected 'true'"
149149+150150+ and parse_false () =
151151+ if !pos + 5 <= len && String.sub s !pos 5 = "false" then
152152+ (pos := !pos + 5; Ok (JBool false))
153153+ else Error "Expected 'false'"
154154+155155+ and parse_null () =
156156+ if !pos + 4 <= len && String.sub s !pos 4 = "null" then
157157+ (pos := !pos + 4; Ok JNull)
158158+ else Error "Expected 'null'"
159159+ in
160160+161161+ match parse_value () with
162162+ | Error e -> Error e
163163+ | Ok v ->
164164+ skip_ws ();
165165+ if !pos = len then Ok v
166166+ else Error "Unexpected content after JSON"
167167+168168+(** Validate importmap structure *)
169169+type importmap_error =
170170+ | InvalidJSON of string
171171+ | EmptyKey of string (* property name where empty key was found *)
172172+ | NotObject of string (* property name that should be object but isn't *)
173173+ | NotString of string (* property name that should be string but isn't *)
174174+ | ForbiddenProperty of string
175175+ | SlashKeyWithoutSlashValue of string (* property name where slash key doesn't have slash value *)
176176+ | InvalidScopeKey (* scope key is not a valid URL *)
177177+ | InvalidScopeValue of string (* scope value is not a valid URL *)
178178+179179+(** Check if a string looks like a valid URL-like specifier for importmaps *)
180180+let is_valid_url_like s =
181181+ if String.length s = 0 then false
182182+ else
183183+ (* Valid URL-like: starts with /, ./, ../, or has a scheme followed by :// or : *)
184184+ let starts_with_slash = s.[0] = '/' in
185185+ let starts_with_dot_slash = String.length s >= 2 && s.[0] = '.' && s.[1] = '/' in
186186+ let starts_with_dot_dot_slash = String.length s >= 3 && s.[0] = '.' && s.[1] = '.' && s.[2] = '/' in
187187+ let has_scheme =
188188+ match String.index_opt s ':' with
189189+ | None -> false
190190+ | Some pos when pos > 0 ->
191191+ (* Check that characters before : are valid scheme characters *)
192192+ let scheme = String.sub s 0 pos in
193193+ String.length scheme > 0 &&
194194+ String.for_all (fun c ->
195195+ (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
196196+ (c >= '0' && c <= '9') || c = '+' || c = '-' || c = '.'
197197+ ) scheme
198198+ | _ -> false
199199+ in
200200+ starts_with_slash || starts_with_dot_slash || starts_with_dot_dot_slash || has_scheme
201201+202202+let validate_importmap s =
203203+ match parse_json s with
204204+ | Error msg -> [InvalidJSON msg]
205205+ | Ok json ->
206206+ let errors = ref [] in
207207+ let add_error e = errors := e :: !errors in
208208+209209+ (match json with
210210+ | JObject members ->
211211+ List.iter (fun (key, value) ->
212212+ (* Check for forbidden top-level properties *)
213213+ if key <> "imports" && key <> "scopes" && key <> "integrity" then
214214+ add_error (ForbiddenProperty key);
215215+216216+ (* Check imports *)
217217+ if key = "imports" then begin
218218+ match value with
219219+ | JObject import_members ->
220220+ List.iter (fun (ikey, ivalue) ->
221221+ if ikey = "" then add_error (EmptyKey "imports");
222222+ (* Check slash-ending consistency *)
223223+ let key_ends_with_slash = String.length ikey > 0 && ikey.[String.length ikey - 1] = '/' in
224224+ match ivalue with
225225+ | JString v ->
226226+ if key_ends_with_slash then begin
227227+ let val_ends_with_slash = String.length v > 0 && v.[String.length v - 1] = '/' in
228228+ if not val_ends_with_slash then
229229+ add_error (SlashKeyWithoutSlashValue "imports")
230230+ end
231231+ | JNull -> () (* null is allowed *)
232232+ | _ -> add_error (NotString ("imports[" ^ ikey ^ "]"))
233233+ ) import_members
234234+ | _ -> add_error (NotObject "imports")
235235+ end;
236236+237237+ (* Check scopes *)
238238+ if key = "scopes" then begin
239239+ match value with
240240+ | JObject scope_members ->
241241+ List.iter (fun (skey, svalue) ->
242242+ if skey = "" then add_error (EmptyKey "scopes");
243243+ (* Check that scope key is a valid URL *)
244244+ if skey <> "" && not (is_valid_url_like skey) then
245245+ add_error InvalidScopeKey;
246246+ match svalue with
247247+ | JObject scope_imports ->
248248+ List.iter (fun (sikey, sivalue) ->
249249+ if sikey = "" then add_error (EmptyKey ("scopes[" ^ skey ^ "]"));
250250+ match sivalue with
251251+ | JString v ->
252252+ (* Check that scope value is a valid URL *)
253253+ if not (is_valid_url_like v) then
254254+ add_error (InvalidScopeValue sikey)
255255+ | JNull -> ()
256256+ | _ -> add_error (NotString ("scopes[" ^ skey ^ "][" ^ sikey ^ "]"))
257257+ ) scope_imports
258258+ | _ -> add_error (NotObject ("scopes[" ^ skey ^ "]"))
259259+ ) scope_members
260260+ | _ -> add_error (NotObject "scopes")
261261+ end
262262+ ) members
263263+ | _ -> add_error (NotObject "root"));
264264+265265+ List.rev !errors
266266+267267+let start_element state ~name ~namespace ~attrs _collector =
268268+ if namespace <> None then ()
269269+ else begin
270270+ let name_lower = String.lowercase_ascii name in
271271+ if name_lower = "script" then begin
272272+ (* Check if type="importmap" *)
273273+ let type_attr = List.find_opt (fun (n, _) ->
274274+ String.lowercase_ascii n = "type"
275275+ ) attrs in
276276+ match type_attr with
277277+ | Some (_, v) when String.lowercase_ascii v = "importmap" ->
278278+ state.in_importmap <- true;
279279+ Buffer.clear state.content
280280+ | _ -> ()
281281+ end
282282+ end
283283+284284+let error_to_message = function
285285+ | InvalidJSON _ ->
286286+ "A script \xe2\x80\x9cscript\xe2\x80\x9d with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d must have valid JSON content."
287287+ | EmptyKey prop ->
288288+ Printf.sprintf "A specifier map defined in a \xe2\x80\x9c%s\xe2\x80\x9d property within the content of a \xe2\x80\x9cscript\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d must only contain non-empty keys." prop
289289+ | NotObject prop ->
290290+ Printf.sprintf "The value of the \xe2\x80\x9c%s\xe2\x80\x9d property within the content of a \xe2\x80\x9cscript\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d must be a JSON object." prop
291291+ | NotString _ ->
292292+ "A specifier map defined in a \xe2\x80\x9cimports\xe2\x80\x9d property within the content of a \xe2\x80\x9cscript\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d must only contain string values."
293293+ | ForbiddenProperty prop ->
294294+ Printf.sprintf "The \xe2\x80\x9c%s\xe2\x80\x9d property within the content of a \xe2\x80\x9cscript\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d is not an allowed property." prop
295295+ | SlashKeyWithoutSlashValue prop ->
296296+ Printf.sprintf "A specifier map defined in a \xe2\x80\x9c%s\xe2\x80\x9d property within the content of a \xe2\x80\x9cscript\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d must have values that end with \xe2\x80\x9c/\xe2\x80\x9d when its corresponding key ends with \xe2\x80\x9c/\xe2\x80\x9d." prop
297297+ | InvalidScopeKey ->
298298+ "The value of the \xe2\x80\x9cscopes\xe2\x80\x9d property within the content of a \xe2\x80\x9cscript\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d must be a JSON object whose keys are valid URL strings."
299299+ | InvalidScopeValue _ ->
300300+ "A specifier map defined in a \xe2\x80\x9cscopes\xe2\x80\x9d property within the content of a \xe2\x80\x9cscript\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d must only contain valid URL values."
301301+302302+let end_element state ~name ~namespace collector =
303303+ if namespace <> None then ()
304304+ else begin
305305+ let name_lower = String.lowercase_ascii name in
306306+ if name_lower = "script" && state.in_importmap then begin
307307+ let content = Buffer.contents state.content in
308308+ let errors = validate_importmap content in
309309+ List.iter (fun err ->
310310+ Message_collector.add_error collector
311311+ ~message:(error_to_message err)
312312+ ~code:"importmap-invalid"
313313+ ~element:"script"
314314+ ~attribute:"type"
315315+ ()
316316+ ) errors;
317317+ state.in_importmap <- false
318318+ end
319319+ end
320320+321321+let characters state text _collector =
322322+ if state.in_importmap then
323323+ Buffer.add_string state.content text
324324+325325+let end_document _state _collector = ()
326326+327327+let checker =
328328+ (module struct
329329+ type nonrec state = state
330330+ let create = create
331331+ let reset = reset
332332+ let start_element = start_element
333333+ let end_element = end_element
334334+ let characters = characters
335335+ let end_document = end_document
336336+ end : Checker.S)
···6767let is_url s =
6868 String.contains s ':'
69697070-(** Validate that a URL is a valid absolute URL for itemtype.
7171- itemtype must be an absolute URL per the HTML5 spec.
7272- http/https URLs require :// but other schemes like mailto:, data:, javascript: don't. *)
7373-let validate_itemtype_url url =
7474- let url = String.trim url in
7575- if String.length url = 0 then
7676- Error "itemtype must not be empty"
7070+(** Validate that a URL is a valid absolute URL for itemtype/itemid.
7171+ Uses the comprehensive URL validation from Url_checker. *)
7272+let validate_microdata_url url element attr_name =
7373+ let url_trimmed = String.trim url in
7474+ if String.length url_trimmed = 0 then
7575+ Some (Printf.sprintf
7676+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: Must be non-empty."
7777+ url attr_name element)
7778 else
7878- match String.index_opt url ':' with
7979- | None -> Error "Expected a slash (\"/\")."
8080- | Some colon_pos ->
8181- if colon_pos = 0 then
8282- Error "Expected a slash (\"/\")."
8383- else
8484- let scheme = String.lowercase_ascii (String.sub url 0 colon_pos) in
8585- (* Schemes that require :// for itemtype validation
8686- Note: The Nu validator only enforces :// for http, https, and ftp *)
8787- let special_schemes = [
8888- "http"; "https"; "ftp"
8989- ] in
9090- if List.mem scheme special_schemes then begin
9191- if colon_pos + 2 >= String.length url then
9292- Error "Expected a slash (\"/\")."
9393- else if url.[colon_pos + 1] <> '/' || url.[colon_pos + 2] <> '/' then
9494- Error "Expected a slash (\"/\")."
9595- else
9696- Ok ()
9797- end else
9898- (* Other schemes (mailto:, data:, javascript:, etc.) are valid as-is *)
9999- Ok ()
7979+ (* First check if it has a scheme (required for absolute URL) *)
8080+ match Url_checker.extract_scheme url_trimmed with
8181+ | None ->
8282+ Some (Printf.sprintf
8383+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: The string \xe2\x80\x9c%s\xe2\x80\x9d is not an absolute URL."
8484+ url attr_name element url)
8585+ | Some _ ->
8686+ (* Has a scheme - do comprehensive URL validation *)
8787+ match Url_checker.validate_url url element attr_name with
8888+ | None -> None
8989+ | Some error_msg ->
9090+ (* Replace "Bad URL:" with "Bad absolute URL:" for microdata *)
9191+ let error_msg = Str.global_replace (Str.regexp "Bad URL:") "Bad absolute URL:" error_msg in
9292+ Some error_msg
1009310194(** Check if itemprop value is valid. *)
10295let validate_itemprop_value value =
···125118 let itemref_opt = get_attr attrs "itemref" in
126119 let itemprop_opt = get_attr attrs "itemprop" in
127120128128- (* Check itemid requires itemscope and itemtype *)
121121+ (* Check itemid requires itemscope and itemtype, and validate URL *)
129122 begin match itemid_opt with
130130- | Some _itemid ->
123123+ | Some itemid ->
131124 if not has_itemscope then
132125 Message_collector.add_error collector
133126 ~message:"itemid attribute requires itemscope attribute"
···143136 ?location
144137 ~element
145138 ~attribute:"itemid"
146146- ()
139139+ ();
140140+ (* Validate itemid as URL (note: itemid can be relative, unlike itemtype) *)
141141+ (match Url_checker.validate_url itemid element "itemid" with
142142+ | None -> ()
143143+ | Some error_msg ->
144144+ Message_collector.add_error collector
145145+ ~message:error_msg
146146+ ~code:"microdata-invalid-itemid"
147147+ ?location
148148+ ~element
149149+ ~attribute:"itemid"
150150+ ())
147151 | None -> ()
148152 end;
149153···184188 else begin
185189 (* Validate each itemtype URL (can be space-separated) *)
186190 let types = split_whitespace itemtype in
187187- List.iter (fun url ->
188188- match validate_itemtype_url url with
189189- | Ok () -> ()
190190- | Error msg ->
191191- Message_collector.add_error collector
192192- ~message:(Printf.sprintf
193193- "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9citemtype\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: %s"
194194- url element msg)
195195- ~code:"microdata-invalid-itemtype"
196196- ?location
197197- ~element
198198- ~attribute:"itemtype"
199199- ()
200200- ) types
191191+ if types = [] then
192192+ (* Empty itemtype is an error *)
193193+ Message_collector.add_error collector
194194+ ~message:(Printf.sprintf
195195+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9citemtype\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d."
196196+ itemtype element)
197197+ ~code:"microdata-invalid-itemtype"
198198+ ?location
199199+ ~element
200200+ ~attribute:"itemtype"
201201+ ()
202202+ else
203203+ List.iter (fun url ->
204204+ match validate_microdata_url url element "itemtype" with
205205+ | None -> ()
206206+ | Some error_msg ->
207207+ Message_collector.add_error collector
208208+ ~message:error_msg
209209+ ~code:"microdata-invalid-itemtype"
210210+ ?location
211211+ ~element
212212+ ~attribute:"itemtype"
213213+ ()
214214+ ) types
201215 end
202216 | None -> ()
203217 end;
···11+(** MIME type validation checker.
22+33+ Validates MIME type values in type attributes. *)
44+55+(** Validate a MIME type value. Returns error message or None. *)
66+let validate_mime_type value element attr_name =
77+ let len = String.length value in
88+ if len = 0 then
99+ Some (Printf.sprintf
1010+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Empty value."
1111+ value attr_name element)
1212+ else if value.[len - 1] = ' ' || value.[len - 1] = '\t' then
1313+ Some (Printf.sprintf
1414+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Extraneous trailing whitespace."
1515+ value attr_name element)
1616+ else if len > 0 && (value.[0] = ' ' || value.[0] = '\t') then
1717+ Some (Printf.sprintf
1818+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Expected a token character but saw \xe2\x80\x9c \xe2\x80\x9d instead."
1919+ value attr_name element)
2020+ else
2121+ (* Parse type/subtype *)
2222+ let slash_pos = try Some (String.index value '/') with Not_found -> None in
2323+ match slash_pos with
2424+ | None ->
2525+ (* No slash found - check if it looks like a type without subtype *)
2626+ let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
2727+ (match semicolon_pos with
2828+ | Some _ ->
2929+ Some (Printf.sprintf
3030+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
3131+ value attr_name element)
3232+ | None ->
3333+ Some (Printf.sprintf
3434+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
3535+ value attr_name element))
3636+ | Some slash_pos ->
3737+ (* Check for empty subtype *)
3838+ let after_slash = String.sub value (slash_pos + 1) (len - slash_pos - 1) in
3939+ let subtype_end =
4040+ try String.index after_slash ';'
4141+ with Not_found -> String.length after_slash
4242+ in
4343+ let subtype = String.sub after_slash 0 subtype_end in
4444+ let subtype_trimmed = String.trim subtype in
4545+ if subtype_trimmed = "" then
4646+ Some (Printf.sprintf
4747+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
4848+ value attr_name element)
4949+ else if String.length subtype > 0 && subtype.[String.length subtype - 1] = ' ' then
5050+ (* Space before semicolon - also check parameter format *)
5151+ let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
5252+ (match semicolon_pos with
5353+ | Some semi_pos ->
5454+ (* Check what comes after semicolon *)
5555+ let params = String.sub value (semi_pos + 1) (len - semi_pos - 1) in
5656+ let params_trimmed = String.trim params in
5757+ if params_trimmed = "" then
5858+ Some (Printf.sprintf
5959+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
6060+ value attr_name element)
6161+ else
6262+ (* Check for param_name=value format *)
6363+ let eq_pos = try Some (String.index params '=') with Not_found -> None in
6464+ (match eq_pos with
6565+ | None ->
6666+ Some (Printf.sprintf
6767+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
6868+ value attr_name element)
6969+ | Some _ -> None)
7070+ | None -> None)
7171+ else
7272+ (* Check parameters after semicolon *)
7373+ let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
7474+ (match semicolon_pos with
7575+ | None -> None (* No parameters - OK *)
7676+ | Some semi_pos ->
7777+ let params = String.sub value (semi_pos + 1) (len - semi_pos - 1) in
7878+ let params_trimmed = String.trim params in
7979+ if params_trimmed = "" then
8080+ Some (Printf.sprintf
8181+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
8282+ value attr_name element)
8383+ else
8484+ (* Check for param_name=value format *)
8585+ let eq_pos = try Some (String.index params '=') with Not_found -> None in
8686+ (match eq_pos with
8787+ | None ->
8888+ Some (Printf.sprintf
8989+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
9090+ value attr_name element)
9191+ | Some eq_pos ->
9292+ let param_value = String.sub params (eq_pos + 1) (String.length params - eq_pos - 1) in
9393+ let param_value_trimmed = String.trim param_value in
9494+ if param_value_trimmed = "" then
9595+ Some (Printf.sprintf
9696+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
9797+ value attr_name element)
9898+ else if param_value_trimmed.[0] = '"' then
9999+ (* Quoted string - check for closing quote *)
100100+ let quote_end = try Some (String.index_from param_value_trimmed 1 '"') with
101101+ | Not_found -> None
102102+ | Invalid_argument _ -> None
103103+ in
104104+ (match quote_end with
105105+ | Some _ -> None (* Properly quoted *)
106106+ | None ->
107107+ (* Check for escaped quote at end *)
108108+ let has_backslash_at_end =
109109+ String.length param_value_trimmed >= 2 &&
110110+ param_value_trimmed.[String.length param_value_trimmed - 1] = '\\'
111111+ in
112112+ if has_backslash_at_end then
113113+ Some (Printf.sprintf
114114+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Unfinished quoted string."
115115+ value attr_name element)
116116+ else
117117+ Some (Printf.sprintf
118118+ "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Unfinished quoted string."
119119+ value attr_name element))
120120+ else
121121+ None))
122122+123123+(** Elements and attributes that contain MIME types. *)
124124+let mime_type_attrs = [
125125+ ("link", ["type"]);
126126+ ("style", ["type"]);
127127+ ("script", ["type"]);
128128+ ("source", ["type"]);
129129+ ("embed", ["type"]);
130130+ ("object", ["type"]);
131131+]
132132+133133+type state = unit
134134+135135+let create () = ()
136136+let reset _state = ()
137137+138138+let get_attr_value name attrs =
139139+ List.find_map (fun (k, v) ->
140140+ if String.lowercase_ascii k = String.lowercase_ascii name then Some v else None
141141+ ) attrs
142142+143143+let start_element _state ~name ~namespace ~attrs collector =
144144+ if namespace <> None then ()
145145+ else begin
146146+ let name_lower = String.lowercase_ascii name in
147147+ match List.assoc_opt name_lower mime_type_attrs with
148148+ | None -> ()
149149+ | Some type_attrs ->
150150+ List.iter (fun attr_name ->
151151+ match get_attr_value attr_name attrs with
152152+ | None -> ()
153153+ | Some value ->
154154+ (* Don't validate empty type attributes or special script types *)
155155+ if value = "" then ()
156156+ else if name_lower = "script" then
157157+ (* script type can be module, importmap, etc. - skip validation for non-MIME types *)
158158+ let value_lower = String.lowercase_ascii value in
159159+ if value_lower = "module" || value_lower = "importmap" ||
160160+ not (String.contains value '/') then ()
161161+ else
162162+ match validate_mime_type value name attr_name with
163163+ | None -> ()
164164+ | Some err ->
165165+ Message_collector.add_error collector
166166+ ~message:err ~code:"bad-mime-type" ~element:name ~attribute:attr_name ()
167167+ else
168168+ match validate_mime_type value name attr_name with
169169+ | None -> ()
170170+ | Some err ->
171171+ Message_collector.add_error collector
172172+ ~message:err ~code:"bad-mime-type" ~element:name ~attribute:attr_name ()
173173+ ) type_attrs
174174+ end
175175+176176+let end_element _state ~name:_ ~namespace:_ _collector = ()
177177+let characters _state _text _collector = ()
178178+let end_document _state _collector = ()
179179+180180+let checker =
181181+ (module struct
182182+ type nonrec state = state
183183+ let create = create
184184+ let reset = reset
185185+ let start_element = start_element
186186+ let end_element = end_element
187187+ let characters = characters
188188+ let end_document = end_document
189189+ end : Checker.S)
···11+(** MIME type validation checker.
22+33+ Validates MIME type values in type attributes. *)
44+55+val checker : Checker.t
+42-40
lib/html5_checker/specialized/table_checker.ml
···767767768768let reset state = state.tables := []
769769770770+let is_html_namespace = function
771771+ | None -> true (* HTML mode - no namespace specified *)
772772+ | Some ns -> ns = html_ns (* XHTML mode - check namespace *)
773773+770774let start_element state ~name ~namespace ~attrs collector =
771771- match namespace with
772772- | Some ns when ns = html_ns -> (
773773- match name with
774774- | "table" ->
775775- (* Push a new table onto the stack *)
776776- state.tables := make_table () :: !(state.tables)
777777- | _ -> (
778778- match !(state.tables) with
779779- | [] -> ()
780780- | table :: _ -> (
781781- match name with
782782- | "td" -> start_cell table false attrs collector
783783- | "th" -> start_cell table true attrs collector
784784- | "tr" -> start_row table collector
785785- | "tbody" | "thead" | "tfoot" -> start_row_group table name collector
786786- | "col" -> start_col table attrs collector
787787- | "colgroup" -> start_colgroup table attrs collector
788788- | _ -> ())))
789789- | _ -> ()
775775+ if is_html_namespace namespace then (
776776+ let name_lower = String.lowercase_ascii name in
777777+ match name_lower with
778778+ | "table" ->
779779+ (* Push a new table onto the stack *)
780780+ state.tables := make_table () :: !(state.tables)
781781+ | _ -> (
782782+ match !(state.tables) with
783783+ | [] -> ()
784784+ | table :: _ -> (
785785+ match name_lower with
786786+ | "td" -> start_cell table false attrs collector
787787+ | "th" -> start_cell table true attrs collector
788788+ | "tr" -> start_row table collector
789789+ | "tbody" | "thead" | "tfoot" -> start_row_group table name collector
790790+ | "col" -> start_col table attrs collector
791791+ | "colgroup" -> start_colgroup table attrs collector
792792+ | _ -> ())))
790793791794let end_element state ~name ~namespace collector =
792792- match namespace with
793793- | Some ns when ns = html_ns -> (
794794- match name with
795795- | "table" -> (
796796- match !(state.tables) with
797797- | [] -> failwith "Bug: end table but no table on stack"
798798- | table :: rest ->
799799- end_table table collector;
800800- state.tables := rest)
801801- | _ -> (
802802- match !(state.tables) with
803803- | [] -> ()
804804- | table :: _ -> (
805805- match name with
806806- | "td" | "th" -> end_cell table
807807- | "tr" -> end_row table collector
808808- | "tbody" | "thead" | "tfoot" -> end_row_group_handler table collector
809809- | "col" -> end_col table
810810- | "colgroup" -> end_colgroup table
811811- | _ -> ())))
812812- | _ -> ()
795795+ if is_html_namespace namespace then (
796796+ let name_lower = String.lowercase_ascii name in
797797+ match name_lower with
798798+ | "table" -> (
799799+ match !(state.tables) with
800800+ | [] -> () (* End tag without start - ignore *)
801801+ | table :: rest ->
802802+ end_table table collector;
803803+ state.tables := rest)
804804+ | _ -> (
805805+ match !(state.tables) with
806806+ | [] -> ()
807807+ | table :: _ -> (
808808+ match name_lower with
809809+ | "td" | "th" -> end_cell table
810810+ | "tr" -> end_row table collector
811811+ | "tbody" | "thead" | "tfoot" -> end_row_group_handler table collector
812812+ | "col" -> end_col table
813813+ | "colgroup" -> end_colgroup table
814814+ | _ -> ())))
813815814816let characters _state _text _collector = ()
815817
+1-1
test/debug_check.ml
···11let () =
22- let test_file = "validator/tests/html/microdata/itemtype/scheme-https-no-slash-novalid.html" in
22+ let test_file = "validator/tests/html/mime-types/004-novalid.html" in
33 let ic = open_in test_file in
44 let html = really_input_string ic (in_channel_length ic) in
55 close_in ic;