···38end
3940let noop () = (module Noop : S)
41+42+(** Input signature for Make functor *)
43+module type Input = sig
44+ type state
45+ val create : unit -> state
46+ val reset : state -> unit
47+ val start_element : state -> element:Element.t -> Message_collector.t -> unit
48+ val end_element : state -> tag:Tag.element_tag -> Message_collector.t -> unit
49+ val characters : (state -> string -> Message_collector.t -> unit) option
50+ val end_document : (state -> Message_collector.t -> unit) option
51+end
52+53+(** Functor to create a checker with default implementations for optional callbacks *)
54+module Make (I : Input) : S with type state = I.state = struct
55+ type state = I.state
56+57+ let create = I.create
58+ let reset = I.reset
59+ let start_element = I.start_element
60+ let end_element = I.end_element
61+62+ let characters = match I.characters with
63+ | Some f -> f
64+ | None -> fun _ _ _ -> ()
65+66+ let end_document = match I.end_document with
67+ | Some f -> f
68+ | None -> fun _ _ -> ()
69+end
+42
lib/htmlrw_check/checker.mli
···172 (* Does nothing when walked over a DOM tree *)
173 ]}
174*)
000000000000000000000000000000000000000000
···172 (* Does nothing when walked over a DOM tree *)
173 ]}
174*)
175+176+(** {1 Checker Construction Helpers} *)
177+178+(** Input signature for {!Make} functor.
179+180+ Only the required callbacks need to be provided. Optional callbacks
181+ (characters, end_document) default to no-op implementations. *)
182+module type Input = sig
183+ type state
184+ val create : unit -> state
185+ val reset : state -> unit
186+ val start_element : state -> element:Element.t -> Message_collector.t -> unit
187+ val end_element : state -> tag:Tag.element_tag -> Message_collector.t -> unit
188+189+ (** Optional: called for text content. Default: no-op. *)
190+ val characters : (state -> string -> Message_collector.t -> unit) option
191+192+ (** Optional: called at document end. Default: no-op. *)
193+ val end_document : (state -> Message_collector.t -> unit) option
194+end
195+196+(** Functor to create a checker from an {!Input} module.
197+198+ This reduces boilerplate when creating checkers that don't need
199+ to handle all events. The characters and end_document callbacks
200+ default to no-ops if not provided.
201+202+ {b Example:}
203+ {[
204+ let checker = Checker.Make(struct
205+ type state = { mutable count : int }
206+ let create () = { count = 0 }
207+ let reset s = s.count <- 0
208+ let start_element s ~element collector =
209+ s.count <- s.count + 1
210+ let end_element _ ~tag:_ _ = ()
211+ let characters = None (* Use default no-op *)
212+ let end_document = None (* Use default no-op *)
213+ end)
214+ ]}
215+*)
216+module Make : functor (I : Input) -> S with type state = I.state
···1+(** Reusable context/ancestor tracking for checkers.
2+3+ Many checkers need to track element ancestors, depth, or maintain
4+ context stacks during DOM traversal. This module provides common
5+ utilities to reduce duplication. *)
6+7+(** Generic stack-based context tracker. *)
8+module Stack : sig
9+ type 'a t
10+11+ (** Create an empty context stack. *)
12+ val create : unit -> 'a t
13+14+ (** Reset the stack to empty. *)
15+ val reset : 'a t -> unit
16+17+ (** Push a context onto the stack. *)
18+ val push : 'a t -> 'a -> unit
19+20+ (** Pop a context from the stack. Returns None if empty. *)
21+ val pop : 'a t -> 'a option
22+23+ (** Get the current (top) context without removing it. *)
24+ val current : 'a t -> 'a option
25+26+ (** Get current depth (number of items on stack). *)
27+ val depth : 'a t -> int
28+29+ (** Check if stack is empty. *)
30+ val is_empty : 'a t -> bool
31+32+ (** Get all ancestors (bottom to top). *)
33+ val to_list : 'a t -> 'a list
34+35+ (** Check if any ancestor satisfies predicate. *)
36+ val exists : 'a t -> ('a -> bool) -> bool
37+38+ (** Find first ancestor satisfying predicate (top to bottom). *)
39+ val find : 'a t -> ('a -> bool) -> 'a option
40+41+ (** Iterate over all contexts (top to bottom). *)
42+ val iter : 'a t -> ('a -> unit) -> unit
43+end = struct
44+ type 'a t = { mutable stack : 'a list }
45+46+ let create () = { stack = [] }
47+ let reset t = t.stack <- []
48+ let push t x = t.stack <- x :: t.stack
49+ let pop t = match t.stack with
50+ | [] -> None
51+ | x :: rest -> t.stack <- rest; Some x
52+ let current t = match t.stack with
53+ | [] -> None
54+ | x :: _ -> Some x
55+ let depth t = List.length t.stack
56+ let is_empty t = t.stack = []
57+ let to_list t = List.rev t.stack
58+ let exists t f = List.exists f t.stack
59+ let find t f = List.find_opt f t.stack
60+ let iter t f = List.iter f t.stack
61+end
62+63+(** Simple depth counter for tracking nesting level. *)
64+module Depth : sig
65+ type t
66+67+ (** Create a depth counter starting at 0. *)
68+ val create : unit -> t
69+70+ (** Reset depth to 0. *)
71+ val reset : t -> unit
72+73+ (** Increment depth (entering element). *)
74+ val enter : t -> unit
75+76+ (** Decrement depth (leaving element). Returns false if was already 0. *)
77+ val leave : t -> bool
78+79+ (** Get current depth. *)
80+ val get : t -> int
81+82+ (** Check if inside (depth > 0). *)
83+ val is_inside : t -> bool
84+end = struct
85+ type t = { mutable depth : int }
86+87+ let create () = { depth = 0 }
88+ let reset t = t.depth <- 0
89+ let enter t = t.depth <- t.depth + 1
90+ let leave t =
91+ if t.depth > 0 then begin
92+ t.depth <- t.depth - 1;
93+ true
94+ end else false
95+ let get t = t.depth
96+ let is_inside t = t.depth > 0
97+end
98+99+(** Element name stack for tracking ancestors by name. *)
100+module Ancestors : sig
101+ type t
102+103+ (** Create an empty ancestor tracker. *)
104+ val create : unit -> t
105+106+ (** Reset to empty. *)
107+ val reset : t -> unit
108+109+ (** Push an element name onto the ancestor stack. *)
110+ val push : t -> string -> unit
111+112+ (** Pop an element from the ancestor stack. *)
113+ val pop : t -> unit
114+115+ (** Get the immediate parent element name. *)
116+ val parent : t -> string option
117+118+ (** Check if an element name is an ancestor. *)
119+ val has_ancestor : t -> string -> bool
120+121+ (** Get depth (number of ancestors). *)
122+ val depth : t -> int
123+124+ (** Get all ancestor names (outermost first). *)
125+ val to_list : t -> string list
126+end = struct
127+ type t = { mutable stack : string list }
128+129+ let create () = { stack = [] }
130+ let reset t = t.stack <- []
131+ let push t name = t.stack <- name :: t.stack
132+ let pop t = match t.stack with
133+ | _ :: rest -> t.stack <- rest
134+ | [] -> ()
135+ let parent t = match t.stack with
136+ | x :: _ -> Some x
137+ | [] -> None
138+ let has_ancestor t name = List.mem name t.stack
139+ let depth t = List.length t.stack
140+ let to_list t = List.rev t.stack
141+end
···1+(** Reusable context/ancestor tracking for checkers.
2+3+ Many checkers need to track element ancestors, depth, or maintain
4+ context stacks during DOM traversal. This module provides common
5+ utilities to reduce duplication across checkers.
6+7+ {2 Available Trackers}
8+9+ - {!Stack}: Generic stack for any context type
10+ - {!Depth}: Simple integer depth counter
11+ - {!Ancestors}: String-based element ancestor tracking *)
12+13+(** {2 Generic Stack} *)
14+15+(** Generic stack-based context tracker.
16+17+ Use this when you need to track complex state at each nesting level.
18+ For example, tracking whether each ancestor has certain attributes. *)
19+module Stack : sig
20+ type 'a t
21+22+ (** Create an empty context stack. *)
23+ val create : unit -> 'a t
24+25+ (** Reset the stack to empty. *)
26+ val reset : 'a t -> unit
27+28+ (** Push a context onto the stack. *)
29+ val push : 'a t -> 'a -> unit
30+31+ (** Pop a context from the stack. Returns None if empty. *)
32+ val pop : 'a t -> 'a option
33+34+ (** Get the current (top) context without removing it. *)
35+ val current : 'a t -> 'a option
36+37+ (** Get current depth (number of items on stack). *)
38+ val depth : 'a t -> int
39+40+ (** Check if stack is empty. *)
41+ val is_empty : 'a t -> bool
42+43+ (** Get all ancestors (bottom to top). *)
44+ val to_list : 'a t -> 'a list
45+46+ (** Check if any ancestor satisfies predicate. *)
47+ val exists : 'a t -> ('a -> bool) -> bool
48+49+ (** Find first ancestor satisfying predicate (top to bottom). *)
50+ val find : 'a t -> ('a -> bool) -> 'a option
51+52+ (** Iterate over all contexts (top to bottom). *)
53+ val iter : 'a t -> ('a -> unit) -> unit
54+end
55+56+(** {2 Depth Counter} *)
57+58+(** Simple depth counter for tracking nesting level.
59+60+ Use this when you only need to know if you're inside a certain
61+ element type, not the full context at each level. *)
62+module Depth : sig
63+ type t
64+65+ (** Create a depth counter starting at 0. *)
66+ val create : unit -> t
67+68+ (** Reset depth to 0. *)
69+ val reset : t -> unit
70+71+ (** Increment depth (entering element). *)
72+ val enter : t -> unit
73+74+ (** Decrement depth (leaving element). Returns false if was already 0. *)
75+ val leave : t -> bool
76+77+ (** Get current depth. *)
78+ val get : t -> int
79+80+ (** Check if inside (depth > 0). *)
81+ val is_inside : t -> bool
82+end
83+84+(** {2 Ancestor Tracker} *)
85+86+(** Element name stack for tracking ancestors by name.
87+88+ Use this when you need to check if certain elements are ancestors,
89+ but don't need complex context at each level. *)
90+module Ancestors : sig
91+ type t
92+93+ (** Create an empty ancestor tracker. *)
94+ val create : unit -> t
95+96+ (** Reset to empty. *)
97+ val reset : t -> unit
98+99+ (** Push an element name onto the ancestor stack. *)
100+ val push : t -> string -> unit
101+102+ (** Pop an element from the ancestor stack. *)
103+ val pop : t -> unit
104+105+ (** Get the immediate parent element name. *)
106+ val parent : t -> string option
107+108+ (** Check if an element name is an ancestor. *)
109+ val has_ancestor : t -> string -> bool
110+111+ (** Get depth (number of ancestors). *)
112+ val depth : t -> int
113+114+ (** Get all ancestor names (outermost first). *)
115+ val to_list : t -> string list
116+end
+15
lib/htmlrw_check/datatype/datatype.ml
···40 let end_pos = find_end (len - 1) in
41 if start > end_pos then ""
42 else String.sub s start (end_pos - start + 1)
000000000000000
···40 let end_pos = find_end (len - 1) in
41 if start > end_pos then ""
42 else String.sub s start (end_pos - start + 1)
43+44+(** Factory for creating enum-based validators.
45+ Many HTML attributes accept a fixed set of keyword values. *)
46+let make_enum ~name ~values ?(allow_empty = true) () : t =
47+ let values_set = List.map String.lowercase_ascii values in
48+ let values_str = String.concat ", " (List.map (Printf.sprintf "'%s'") values) in
49+ (module struct
50+ let name = name
51+ let validate s =
52+ let s_lower = string_to_ascii_lowercase s in
53+ if (allow_empty && s = "") || List.mem s_lower values_set then Ok ()
54+ else Error (Printf.sprintf "The value '%s' is not a valid %s value. Expected %s%s."
55+ s name (if allow_empty then "empty string, " else "") values_str)
56+ let is_valid s = Result.is_ok (validate s)
57+ end : S)
+8
lib/htmlrw_check/datatype/datatype.mli
···4344(** Trim HTML5 whitespace from both ends of a string. *)
45val trim_html_spaces : string -> string
00000000
···4344(** Trim HTML5 whitespace from both ends of a string. *)
45val trim_html_spaces : string -> string
46+47+(** {2 Datatype Factories} *)
48+49+(** Create an enum-based validator for attributes with fixed keyword values.
50+ @param name The datatype name (e.g., "loading", "crossorigin")
51+ @param values List of valid keyword values (case-insensitive)
52+ @param allow_empty Whether empty string is valid (default: true) *)
53+val make_enum : name:string -> values:string list -> ?allow_empty:bool -> unit -> t
···726727 (* Generic *)
728 | `Generic message -> message
729+730+(** {2 Error Construction Helpers} *)
731+732+(** Create a bad attribute value error with element, attribute, value, and reason. *)
733+let bad_value ~element ~attr ~value ~reason : t =
734+ `Attr (`Bad_value (`Elem element, `Attr attr, `Value value, `Reason reason))
735+736+(** Create a bad attribute value error with just a message. *)
737+let bad_value_msg msg : t =
738+ `Attr (`Bad_value_generic (`Message msg))
739+740+(** Create a missing required attribute error. *)
741+let missing_attr ~element ~attr : t =
742+ `Attr (`Missing (`Elem element, `Attr attr))
743+744+(** Create an attribute not allowed error. *)
745+let attr_not_allowed ~element ~attr : t =
746+ `Attr (`Not_allowed (`Attr attr, `Elem element))
747+748+(** Create an element not allowed as child error. *)
749+let not_allowed_as_child ~child ~parent : t =
750+ `Element (`Not_allowed_as_child (`Child child, `Parent parent))
751+752+(** Create a must not be empty error. *)
753+let must_not_be_empty ~element : t =
754+ `Element (`Must_not_be_empty (`Elem element))
+28
lib/htmlrw_check/error_code.mli
···748(** Format a string with Unicode curly quotes.
749 Wraps the string in U+201C and U+201D ("..."). *)
750val q : string -> string
0000000000000000000000000000
···748(** Format a string with Unicode curly quotes.
749 Wraps the string in U+201C and U+201D ("..."). *)
750val q : string -> string
751+752+(** {1 Error Construction Helpers}
753+754+ These functions simplify creating common error types. *)
755+756+(** Create a bad attribute value error.
757+ Example: [bad_value ~element:"img" ~attr:"src" ~value:"" ~reason:"URL cannot be empty"] *)
758+val bad_value : element:string -> attr:string -> value:string -> reason:string -> t
759+760+(** Create a bad attribute value error with just a message.
761+ Example: [bad_value_msg "The value must be a valid URL"] *)
762+val bad_value_msg : string -> t
763+764+(** Create a missing required attribute error.
765+ Example: [missing_attr ~element:"img" ~attr:"alt"] *)
766+val missing_attr : element:string -> attr:string -> t
767+768+(** Create an attribute not allowed error.
769+ Example: [attr_not_allowed ~element:"span" ~attr:"href"] *)
770+val attr_not_allowed : element:string -> attr:string -> t
771+772+(** Create an element not allowed as child error.
773+ Example: [not_allowed_as_child ~child:"div" ~parent:"p"] *)
774+val not_allowed_as_child : child:string -> parent:string -> t
775+776+(** Create a must not be empty error.
777+ Example: [must_not_be_empty ~element:"title"] *)
778+val must_not_be_empty : element:string -> t
+8-27
lib/htmlrw_check/specialized/h1_checker.mli
···1-(*---------------------------------------------------------------------------
2- Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3- SPDX-License-Identifier: MIT
4- ---------------------------------------------------------------------------*)
56-(** H1 element counter and validator.
078- This checker warns about multiple [<h1>] elements in a document.
9- While HTML5 technically allows multiple [<h1>] elements when using
10- the document outline algorithm, this algorithm was never implemented
11- by browsers and has been removed from the specification.
1213- {2 Best Practice}
14-15- Documents should have exactly one [<h1>] element that represents the
16- main heading of the page. Multiple [<h1>] elements can confuse users
17- and assistive technologies about the document's structure.
18-19- {2 Special Cases}
20-21- - [<h1>] elements inside [<svg>] content (e.g., in [<foreignObject>])
22- are not counted, as they may represent different content contexts
23- - The checker reports a warning after the second [<h1>] is encountered
2425 {2 Error Messages}
2627- Reports [Multiple_h1] when more than one [<h1>] element is found
28- in the document.
29-30- @see <https://html.spec.whatwg.org/multipage/sections.html#the-h1,-h2,-h3,-h4,-h5,-and-h6-elements>
31- HTML Standard: The h1-h6 elements
32-*)
3334val checker : Checker.t
35-(** The h1 element counter/validator instance. *)
···1+(** H1 element counter checker.
00023+ This checker validates that documents don't have multiple h1 elements,
4+ which can confuse document structure and accessibility tools.
56+ {2 Validation Rules}
00078+ - Documents should have at most one [<h1>] element
9+ - [<h1>] elements inside SVG content (foreignObject, desc) are not counted
0000000001011 {2 Error Messages}
1213+ - [Multiple_h1]: Document contains more than one h1 element *)
000001415val checker : Checker.t
16+(** The H1 checker instance. *)
+14-2
lib/htmlrw_check/specialized/heading_checker.ml
···14 mutable first_heading_checked : bool;
15 mutable in_heading : Tag.html_tag option;
16 mutable heading_has_text : bool;
017}
1819let create () =
···24 first_heading_checked = false;
25 in_heading = None;
26 heading_has_text = false;
027 }
2829let reset state =
···32 state.has_any_heading <- false;
33 state.first_heading_checked <- false;
34 state.in_heading <- None;
35- state.heading_has_text <- false
03637(** Check if text is effectively empty (only whitespace). *)
38let is_empty_text text =
···4849let start_element state ~element collector =
50 match element.Element.tag with
51- | Tag.Html (#Tag.heading_tag as h) ->
00052 let level = match Tag.heading_level h with Some l -> l | None -> 0 in
53 let name = Tag.html_tag_to_string h in
54 state.has_any_heading <- true;
···89 | _ -> ()
9091let end_element state ~tag collector =
00000092 match state.in_heading, tag with
93 | Some h, Tag.Html h2 when h = h2 ->
94 if not state.heading_has_text then
···14 mutable first_heading_checked : bool;
15 mutable in_heading : Tag.html_tag option;
16 mutable heading_has_text : bool;
17+ mutable svg_depth : int; (* Track depth inside SVG - headings in SVG don't count *)
18}
1920let create () =
···25 first_heading_checked = false;
26 in_heading = None;
27 heading_has_text = false;
28+ svg_depth = 0;
29 }
3031let reset state =
···34 state.has_any_heading <- false;
35 state.first_heading_checked <- false;
36 state.in_heading <- None;
37+ state.heading_has_text <- false;
38+ state.svg_depth <- 0
3940(** Check if text is effectively empty (only whitespace). *)
41let is_empty_text text =
···5152let start_element state ~element collector =
53 match element.Element.tag with
54+ | Tag.Svg _ ->
55+ (* Track SVG depth - headings inside SVG (foreignObject, desc) don't count *)
56+ state.svg_depth <- state.svg_depth + 1
57+ | Tag.Html (#Tag.heading_tag as h) when state.svg_depth = 0 ->
58 let level = match Tag.heading_level h with Some l -> l | None -> 0 in
59 let name = Tag.html_tag_to_string h in
60 state.has_any_heading <- true;
···95 | _ -> ()
9697let end_element state ~tag collector =
98+ (* Track SVG depth *)
99+ (match tag with
100+ | Tag.Svg _ when state.svg_depth > 0 ->
101+ state.svg_depth <- state.svg_depth - 1
102+ | _ -> ());
103+ (* Check for empty headings *)
104 match state.in_heading, tag with
105 | Some h, Tag.Html h2 when h = h2 ->
106 if not state.heading_has_text then