···3(* Use Astring for string operations *)
4let lowercase = Astring.String.Ascii.lowercase
50000006(* Void elements - no end tag allowed *)
7let void_elements = [
8 "area"; "base"; "br"; "col"; "embed"; "hr"; "img"; "input";
9 "link"; "meta"; "source"; "track"; "wbr"
10]
01112(* Raw text elements - content is raw text *)
13let raw_text_elements = ["script"; "style"]
···20 "a"; "b"; "big"; "code"; "em"; "font"; "i"; "nobr"; "s"; "small";
21 "strike"; "strong"; "tt"; "u"
22]
02324(* Special elements *)
25let special_elements = [
···35 "tbody"; "td"; "template"; "textarea"; "tfoot"; "th"; "thead"; "title";
36 "tr"; "track"; "ul"; "wbr"; "xmp"
37]
03839(* Heading elements *)
40let heading_elements = ["h1"; "h2"; "h3"; "h4"; "h5"; "h6"]
04142(* Implied end tag elements *)
43let implied_end_tags = [
44 "dd"; "dt"; "li"; "optgroup"; "option"; "p"; "rb"; "rp"; "rt"; "rtc"
45]
04647(* Thoroughly implied end tags *)
48let thoroughly_implied_end_tags = [
49 "caption"; "colgroup"; "dd"; "dt"; "li"; "optgroup"; "option"; "p";
50 "rb"; "rp"; "rt"; "rtc"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"
51]
05253(* Scope elements for various scope checks *)
54let default_scope = [
···62let table_scope = ["html"; "table"; "template"]
6364let select_scope_exclude = ["optgroup"; "option"]
06566(* MathML text integration points *)
67let mathml_text_integration = ["mi"; "mo"; "mn"; "ms"; "mtext"]
06869(* MathML attribute adjustments *)
70let mathml_attr_adjustments = [
···8081(* SVG HTML integration points *)
82let svg_html_integration = ["foreignObject"; "desc"; "title"]
08384(* SVG tag name adjustments *)
85let svg_tag_adjustments = [
···278 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"
279]
280281-(* Helper functions *)
00000000000282let is_void = List.mem
283let is_formatting = List.mem
284let is_special name = List.mem name special_elements
···3(* Use Astring for string operations *)
4let lowercase = Astring.String.Ascii.lowercase
56+(* Helper to create a hashtable set from a list for O(1) membership *)
7+let make_set elements =
8+ let tbl = Hashtbl.create (List.length elements) in
9+ List.iter (fun e -> Hashtbl.add tbl e ()) elements;
10+ tbl
11+12(* Void elements - no end tag allowed *)
13let void_elements = [
14 "area"; "base"; "br"; "col"; "embed"; "hr"; "img"; "input";
15 "link"; "meta"; "source"; "track"; "wbr"
16]
17+let void_elements_tbl = make_set void_elements
1819(* Raw text elements - content is raw text *)
20let raw_text_elements = ["script"; "style"]
···27 "a"; "b"; "big"; "code"; "em"; "font"; "i"; "nobr"; "s"; "small";
28 "strike"; "strong"; "tt"; "u"
29]
30+let formatting_elements_tbl = make_set formatting_elements
3132(* Special elements *)
33let special_elements = [
···43 "tbody"; "td"; "template"; "textarea"; "tfoot"; "th"; "thead"; "title";
44 "tr"; "track"; "ul"; "wbr"; "xmp"
45]
46+let special_elements_tbl = make_set special_elements
4748(* Heading elements *)
49let heading_elements = ["h1"; "h2"; "h3"; "h4"; "h5"; "h6"]
50+let heading_elements_tbl = make_set heading_elements
5152(* Implied end tag elements *)
53let implied_end_tags = [
54 "dd"; "dt"; "li"; "optgroup"; "option"; "p"; "rb"; "rp"; "rt"; "rtc"
55]
56+let implied_end_tags_tbl = make_set implied_end_tags
5758(* Thoroughly implied end tags *)
59let thoroughly_implied_end_tags = [
60 "caption"; "colgroup"; "dd"; "dt"; "li"; "optgroup"; "option"; "p";
61 "rb"; "rp"; "rt"; "rtc"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"
62]
63+let thoroughly_implied_end_tags_tbl = make_set thoroughly_implied_end_tags
6465(* Scope elements for various scope checks *)
66let default_scope = [
···74let table_scope = ["html"; "table"; "template"]
7576let select_scope_exclude = ["optgroup"; "option"]
77+let select_scope_exclude_tbl = make_set select_scope_exclude
7879(* MathML text integration points *)
80let mathml_text_integration = ["mi"; "mo"; "mn"; "ms"; "mtext"]
81+let mathml_text_integration_tbl = make_set mathml_text_integration
8283(* MathML attribute adjustments *)
84let mathml_attr_adjustments = [
···9495(* SVG HTML integration points *)
96let svg_html_integration = ["foreignObject"; "desc"; "title"]
97+let svg_html_integration_tbl = make_set (List.map lowercase svg_html_integration)
9899(* SVG tag name adjustments *)
100let svg_tag_adjustments = [
···293 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"
294]
295296+(* Helper functions - O(1) hashtable lookups *)
297+let is_void_element name = Hashtbl.mem void_elements_tbl name
298+let is_formatting_element name = Hashtbl.mem formatting_elements_tbl name
299+let is_special_element name = Hashtbl.mem special_elements_tbl name
300+let is_heading_element name = Hashtbl.mem heading_elements_tbl name
301+let is_implied_end_tag name = Hashtbl.mem implied_end_tags_tbl name
302+let is_thoroughly_implied_end_tag name = Hashtbl.mem thoroughly_implied_end_tags_tbl name
303+let is_mathml_text_integration name = Hashtbl.mem mathml_text_integration_tbl name
304+let is_svg_html_integration name = Hashtbl.mem svg_html_integration_tbl (lowercase name)
305+let is_select_scope_exclude name = Hashtbl.mem select_scope_exclude_tbl name
306+307+(* Backwards compatibility aliases *)
308let is_void = List.mem
309let is_formatting = List.mem
310let is_special name = List.mem name special_elements
+14-14
lib/html5rw/parser/parser_tree_builder.ml
···294let is_html_integration_point node =
295 (* SVG foreignObject, desc, and title are always HTML integration points *)
296 if node.Dom.namespace = Some "svg" &&
297- List.mem node.Dom.name Parser_constants.svg_html_integration then true
298 (* annotation-xml is an HTML integration point only with specific encoding values *)
299 else if node.Dom.namespace = Some "mathml" && node.Dom.name = "annotation-xml" then
300 match List.assoc_opt "encoding" node.Dom.attrs with
···307(* Check if element is a MathML text integration point *)
308let is_mathml_text_integration_point node =
309 node.Dom.namespace = Some "mathml" &&
310- List.mem node.Dom.name ["mi"; "mo"; "mn"; "ms"; "mtext"]
311312(* Scope checks - integration points also terminate scope (except for table scope) *)
313(* Per WHATWG spec, scope checks only consider HTML namespace elements for the target names *)
···341 | [] -> false
342 | n :: rest ->
343 if n.Dom.name = name then true
344- else if not (List.mem n.Dom.name Parser_constants.select_scope_exclude) then false
345 else check rest
346 in
347 check t.open_elements
···350let generate_implied_end_tags t ?except () =
351 let rec loop () =
352 match current_node t with
353- | Some n when List.mem n.Dom.name Parser_constants.implied_end_tags ->
354 (match except with
355 | Some ex when n.Dom.name = ex -> ()
356 | _ -> pop_current t; loop ())
···361let generate_all_implied_end_tags t =
362 let rec loop () =
363 match current_node t with
364- | Some n when List.mem n.Dom.name Parser_constants.thoroughly_implied_end_tags ->
365 pop_current t; loop ()
366 | _ -> ()
367 in
···1105 when List.mem name ["address"; "article"; "aside"; "blockquote"; "center"; "details"; "dialog"; "dir"; "div"; "dl"; "fieldset"; "figcaption"; "figure"; "footer"; "header"; "hgroup"; "main"; "menu"; "nav"; "ol"; "p"; "search"; "section"; "summary"; "ul"] ->
1106 if has_element_in_button_scope t "p" then close_p_element t;
1107 ignore (insert_element t name ~push:true attrs)
1108- | Token.Tag { kind = Token.Start; name; attrs; _ } when List.mem name Parser_constants.heading_elements ->
1109 if has_element_in_button_scope t "p" then close_p_element t;
1110 (match current_node t with
1111- | Some n when List.mem n.Dom.name Parser_constants.heading_elements ->
1112 parse_error t "unexpected-start-tag";
1113 pop_current t
1114 | _ -> ());
···1243 | _ -> ());
1244 pop_until_tag t name
1245 end
1246- | Token.Tag { kind = Token.End; name; _ } when List.mem name Parser_constants.heading_elements ->
1247 if not (has_element_in_scope_impl t Parser_constants.heading_elements Parser_constants.default_scope ~check_integration_points:true) then
1248 parse_error t "unexpected-end-tag"
1249 else begin
···1437 reconstruct_active_formatting t;
1438 ignore (insert_element t name ~push:true attrs);
1439 (* Check for self-closing on non-void HTML element *)
1440- if self_closing && not (List.mem name Parser_constants.void_elements) then
1441 parse_error t "non-void-html-element-start-tag-with-trailing-solidus"
1442 | Token.Tag { kind = Token.End; name; _ } ->
1443 (* Any other end tag *)
···1943 ignore (insert_element t name attrs)
1944 (* Don't push to stack - void elements *)
1945 (* Handle formatting elements in select *)
1946- | Token.Tag { kind = Token.Start; name; attrs; _ } when List.mem name Parser_constants.formatting_elements ->
1947 reconstruct_active_formatting t;
1948 let node = insert_element t name ~push:true attrs in
1949 push_formatting_element t node name attrs
1950- | Token.Tag { kind = Token.End; name; _ } when List.mem name Parser_constants.formatting_elements ->
1951 (* Find select element and check if formatting element is inside select *)
1952 let select_idx = ref None in
1953 let fmt_idx = ref None in
···2211 let is_html_integration_point node =
2212 (* SVG foreignObject, desc, and title are always HTML integration points *)
2213 if node.Dom.namespace = Some "svg" &&
2214- List.mem node.Dom.name Parser_constants.svg_html_integration then true
2215 (* annotation-xml is an HTML integration point only with specific encoding values *)
2216 else if node.Dom.namespace = Some "mathml" && node.Dom.name = "annotation-xml" then
2217 match List.assoc_opt "encoding" node.Dom.attrs with
···2224 (* Check for MathML text integration points *)
2225 let is_mathml_text_integration_point node =
2226 node.Dom.namespace = Some "mathml" &&
2227- List.mem node.Dom.name ["mi"; "mo"; "mn"; "ms"; "mtext"]
2228 in
2229 (* Foreign content handling *)
2230 let in_foreign =
···2293 let is_html_integration_point node =
2294 (* SVG foreignObject, desc, and title are always HTML integration points *)
2295 if node.Dom.namespace = Some "svg" &&
2296- List.mem node.Dom.name Parser_constants.svg_html_integration then true
2297 (* annotation-xml is an HTML integration point only with specific encoding values *)
2298 else if node.Dom.namespace = Some "mathml" && node.Dom.name = "annotation-xml" then
2299 match List.assoc_opt "encoding" node.Dom.attrs with
···294let is_html_integration_point node =
295 (* SVG foreignObject, desc, and title are always HTML integration points *)
296 if node.Dom.namespace = Some "svg" &&
297+ Parser_constants.is_svg_html_integration node.Dom.name then true
298 (* annotation-xml is an HTML integration point only with specific encoding values *)
299 else if node.Dom.namespace = Some "mathml" && node.Dom.name = "annotation-xml" then
300 match List.assoc_opt "encoding" node.Dom.attrs with
···307(* Check if element is a MathML text integration point *)
308let is_mathml_text_integration_point node =
309 node.Dom.namespace = Some "mathml" &&
310+ Parser_constants.is_mathml_text_integration node.Dom.name
311312(* Scope checks - integration points also terminate scope (except for table scope) *)
313(* Per WHATWG spec, scope checks only consider HTML namespace elements for the target names *)
···341 | [] -> false
342 | n :: rest ->
343 if n.Dom.name = name then true
344+ else if not (Parser_constants.is_select_scope_exclude n.Dom.name) then false
345 else check rest
346 in
347 check t.open_elements
···350let generate_implied_end_tags t ?except () =
351 let rec loop () =
352 match current_node t with
353+ | Some n when Parser_constants.is_implied_end_tag n.Dom.name ->
354 (match except with
355 | Some ex when n.Dom.name = ex -> ()
356 | _ -> pop_current t; loop ())
···361let generate_all_implied_end_tags t =
362 let rec loop () =
363 match current_node t with
364+ | Some n when Parser_constants.is_thoroughly_implied_end_tag n.Dom.name ->
365 pop_current t; loop ()
366 | _ -> ()
367 in
···1105 when List.mem name ["address"; "article"; "aside"; "blockquote"; "center"; "details"; "dialog"; "dir"; "div"; "dl"; "fieldset"; "figcaption"; "figure"; "footer"; "header"; "hgroup"; "main"; "menu"; "nav"; "ol"; "p"; "search"; "section"; "summary"; "ul"] ->
1106 if has_element_in_button_scope t "p" then close_p_element t;
1107 ignore (insert_element t name ~push:true attrs)
1108+ | Token.Tag { kind = Token.Start; name; attrs; _ } when Parser_constants.is_heading_element name ->
1109 if has_element_in_button_scope t "p" then close_p_element t;
1110 (match current_node t with
1111+ | Some n when Parser_constants.is_heading_element n.Dom.name ->
1112 parse_error t "unexpected-start-tag";
1113 pop_current t
1114 | _ -> ());
···1243 | _ -> ());
1244 pop_until_tag t name
1245 end
1246+ | Token.Tag { kind = Token.End; name; _ } when Parser_constants.is_heading_element name ->
1247 if not (has_element_in_scope_impl t Parser_constants.heading_elements Parser_constants.default_scope ~check_integration_points:true) then
1248 parse_error t "unexpected-end-tag"
1249 else begin
···1437 reconstruct_active_formatting t;
1438 ignore (insert_element t name ~push:true attrs);
1439 (* Check for self-closing on non-void HTML element *)
1440+ if self_closing && not (Parser_constants.is_void_element name) then
1441 parse_error t "non-void-html-element-start-tag-with-trailing-solidus"
1442 | Token.Tag { kind = Token.End; name; _ } ->
1443 (* Any other end tag *)
···1943 ignore (insert_element t name attrs)
1944 (* Don't push to stack - void elements *)
1945 (* Handle formatting elements in select *)
1946+ | Token.Tag { kind = Token.Start; name; attrs; _ } when Parser_constants.is_formatting_element name ->
1947 reconstruct_active_formatting t;
1948 let node = insert_element t name ~push:true attrs in
1949 push_formatting_element t node name attrs
1950+ | Token.Tag { kind = Token.End; name; _ } when Parser_constants.is_formatting_element name ->
1951 (* Find select element and check if formatting element is inside select *)
1952 let select_idx = ref None in
1953 let fmt_idx = ref None in
···2211 let is_html_integration_point node =
2212 (* SVG foreignObject, desc, and title are always HTML integration points *)
2213 if node.Dom.namespace = Some "svg" &&
2214+ Parser_constants.is_svg_html_integration node.Dom.name then true
2215 (* annotation-xml is an HTML integration point only with specific encoding values *)
2216 else if node.Dom.namespace = Some "mathml" && node.Dom.name = "annotation-xml" then
2217 match List.assoc_opt "encoding" node.Dom.attrs with
···2224 (* Check for MathML text integration points *)
2225 let is_mathml_text_integration_point node =
2226 node.Dom.namespace = Some "mathml" &&
2227+ Parser_constants.is_mathml_text_integration node.Dom.name
2228 in
2229 (* Foreign content handling *)
2230 let in_foreign =
···2293 let is_html_integration_point node =
2294 (* SVG foreignObject, desc, and title are always HTML integration points *)
2295 if node.Dom.namespace = Some "svg" &&
2296+ Parser_constants.is_svg_html_integration node.Dom.name then true
2297 (* annotation-xml is an HTML integration point only with specific encoding values *)
2298 else if node.Dom.namespace = Some "mathml" && node.Dom.name = "annotation-xml" then
2299 match List.assoc_opt "encoding" node.Dom.attrs with
+23
lib/htmlrw_check/datatype/datatype.ml
···41 if start > end_pos then ""
42 else String.sub s start (end_pos - start + 1)
430000000000000000000000044(** Factory for creating enum-based validators.
45 Many HTML attributes accept a fixed set of keyword values.
46 Uses Hashtbl for O(1) membership check. *)
···41 if start > end_pos then ""
42 else String.sub s start (end_pos - start + 1)
4344+(** Split string on HTML whitespace characters (space, tab, LF, FF, CR).
45+ Filters out empty tokens. Used for space-separated attribute values. *)
46+let split_on_whitespace s =
47+ let len = String.length s in
48+ let rec split acc start i =
49+ if i >= len then
50+ if i > start then
51+ List.rev ((String.sub s start (i - start)) :: acc)
52+ else
53+ List.rev acc
54+ else if is_whitespace s.[i] then
55+ let acc' =
56+ if i > start then
57+ (String.sub s start (i - start)) :: acc
58+ else
59+ acc
60+ in
61+ split acc' (i + 1) (i + 1)
62+ else
63+ split acc start (i + 1)
64+ in
65+ split [] 0 0
66+67(** Factory for creating enum-based validators.
68 Many HTML attributes accept a fixed set of keyword values.
69 Uses Hashtbl for O(1) membership check. *)
+4
lib/htmlrw_check/datatype/datatype.mli
···44(** Trim HTML5 whitespace from both ends of a string. *)
45val trim_html_spaces : string -> string
46000047(** {2 Datatype Factories} *)
4849(** Create an enum-based validator for attributes with fixed keyword values.
···44(** Trim HTML5 whitespace from both ends of a string. *)
45val trim_html_spaces : string -> string
4647+(** Split string on HTML5 whitespace characters (space, tab, LF, FF, CR).
48+ Filters out empty tokens. Used for space-separated attribute values. *)
49+val split_on_whitespace : string -> string list
50+51(** {2 Datatype Factories} *)
5253(** Create an enum-based validator for attributes with fixed keyword values.
+6-20
lib/htmlrw_check/datatype/dt_autocomplete.ml
···1(** Autocomplete attribute validation based on HTML5 spec *)
23-(** Check if character is whitespace *)
4-let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r'
056-(** Convert character to ASCII lowercase *)
7-let to_ascii_lowercase c =
8- if c >= 'A' && c <= 'Z' then Char.chr (Char.code c + 32) else c
9-10-(** Trim whitespace from string *)
11let trim_whitespace s =
12 let s = String.trim s in
13 (* Also collapse internal whitespace *)
···104 "impp";
105 ]
106107-(** Split string on whitespace *)
108-let split_on_whitespace s =
109- let rec split acc start i =
110- if i >= String.length s then
111- if start < i then List.rev (String.sub s start (i - start) :: acc)
112- else List.rev acc
113- else if is_whitespace s.[i] then
114- if start < i then
115- split (String.sub s start (i - start) :: acc) (i + 1) (i + 1)
116- else split acc (i + 1) (i + 1)
117- else split acc start (i + 1)
118- in
119- split [] 0 0
120121(** Check if string starts with prefix *)
122let starts_with s prefix =
···1(** Autocomplete attribute validation based on HTML5 spec *)
23+(* Use shared utilities from Datatype *)
4+let is_whitespace = Datatype.is_whitespace
5+let to_ascii_lowercase = Datatype.to_ascii_lowercase
67+(** Trim whitespace from string and collapse internal whitespace *)
00008let trim_whitespace s =
9 let s = String.trim s in
10 (* Also collapse internal whitespace *)
···101 "impp";
102 ]
103104+(** Split string on whitespace - uses shared utility *)
105+let split_on_whitespace = Datatype.split_on_whitespace
00000000000106107(** Check if string starts with prefix *)
108let starts_with s prefix =
+34-44
lib/htmlrw_check/semantic/id_checker.ml
···50 else
51 None
5253-(** Split whitespace-separated ID references. *)
54-let split_ids value =
55- let rec split acc start i =
56- if i >= String.length value then
57- if i > start then
58- (String.sub value start (i - start)) :: acc
59- else
60- acc
61- else
62- match value.[i] with
63- | ' ' | '\t' | '\n' | '\r' ->
64- let acc' =
65- if i > start then
66- (String.sub value start (i - start)) :: acc
67- else
68- acc
69- in
70- split acc' (i + 1) (i + 1)
71- | _ ->
72- split acc start (i + 1)
73- in
74- List.rev (split [] 0 0)
7576-(** Attributes that reference a single ID. *)
77-let single_id_ref_attrs = [
78- "for"; (* label *)
79- "form"; (* form-associated elements *)
80- "list"; (* input *)
81- "aria-activedescendant";
82- "popovertarget"; (* button - references popover element *)
83- "commandfor"; (* button - references element to control *)
84- "anchor"; (* popover - references anchor element *)
85-]
0008687-(** Attributes that reference multiple IDs (space-separated). *)
88-let multi_id_ref_attrs = [
89- "headers"; (* td, th *)
90- "aria-labelledby";
91- "aria-describedby";
92- "aria-controls";
93- "aria-flowto";
94- "aria-owns";
95- "itemref";
96-]
00000009798(** Check and store an ID attribute. *)
99let check_id state ~element:_ ~id ~location:_ collector =
···161 if String.length value > 0 then
162 Hashtbl.add state.map_names value ()
163164- | attr when List.mem attr single_id_ref_attrs ->
165 add_reference state ~referring_element:element
166 ~attribute:attr ~referenced_id:value ~location
167168- | attr when List.mem attr multi_id_ref_attrs ->
169 (* Split space-separated IDs and add each as a reference *)
170 let ids = split_ids value in
171 List.iter (fun id ->
···50 else
51 None
5253+(** Split whitespace-separated ID references - uses shared utility. *)
54+let split_ids = Datatype.split_on_whitespace
000000000000000000005556+(** Attributes that reference a single ID - O(1) lookup. *)
57+let single_id_ref_attrs =
58+ let tbl = Hashtbl.create 8 in
59+ List.iter (fun a -> Hashtbl.add tbl a ()) [
60+ "for"; (* label *)
61+ "form"; (* form-associated elements *)
62+ "list"; (* input *)
63+ "aria-activedescendant";
64+ "popovertarget"; (* button - references popover element *)
65+ "commandfor"; (* button - references element to control *)
66+ "anchor"; (* popover - references anchor element *)
67+ ];
68+ tbl
6970+let is_single_id_ref_attr name = Hashtbl.mem single_id_ref_attrs name
71+72+(** Attributes that reference multiple IDs (space-separated) - O(1) lookup. *)
73+let multi_id_ref_attrs =
74+ let tbl = Hashtbl.create 8 in
75+ List.iter (fun a -> Hashtbl.add tbl a ()) [
76+ "headers"; (* td, th *)
77+ "aria-labelledby";
78+ "aria-describedby";
79+ "aria-controls";
80+ "aria-flowto";
81+ "aria-owns";
82+ "itemref";
83+ ];
84+ tbl
85+86+let is_multi_id_ref_attr name = Hashtbl.mem multi_id_ref_attrs name
8788(** Check and store an ID attribute. *)
89let check_id state ~element:_ ~id ~location:_ collector =
···151 if String.length value > 0 then
152 Hashtbl.add state.map_names value ()
153154+ | attr when is_single_id_ref_attr attr ->
155 add_reference state ~referring_element:element
156 ~attribute:attr ~referenced_id:value ~location
157158+ | attr when is_multi_id_ref_attr attr ->
159 (* Split space-separated IDs and add each as a reference *)
160 let ids = split_ids value in
161 List.iter (fun id ->
+2-22
lib/htmlrw_check/specialized/microdata_checker.ml
···43 Hashtbl.clear state.all_ids;
44 state.html_element_seen <- false
4546-(** Split whitespace-separated values. *)
47-let split_whitespace value =
48- let rec split acc start i =
49- if i >= String.length value then
50- if i > start then
51- (String.sub value start (i - start)) :: acc
52- else
53- acc
54- else
55- match value.[i] with
56- | ' ' | '\t' | '\n' | '\r' ->
57- let acc' =
58- if i > start then
59- (String.sub value start (i - start)) :: acc
60- else
61- acc
62- in
63- split acc' (i + 1) (i + 1)
64- | _ ->
65- split acc start (i + 1)
66- in
67- List.rev (split [] 0 0)
6869(** Check if a string is a valid URL (contains a colon). *)
70let is_url s =
···43 Hashtbl.clear state.all_ids;
44 state.html_element_seen <- false
4546+(** Split whitespace-separated values - uses shared utility. *)
47+let split_whitespace = Datatype.split_on_whitespace
000000000000000000004849(** Check if a string is a valid URL (contains a colon). *)
50let is_url s =