···3838end
39394040let noop () = (module Noop : S)
4141+4242+(** Input signature for Make functor *)
4343+module type Input = sig
4444+ type state
4545+ val create : unit -> state
4646+ val reset : state -> unit
4747+ val start_element : state -> element:Element.t -> Message_collector.t -> unit
4848+ val end_element : state -> tag:Tag.element_tag -> Message_collector.t -> unit
4949+ val characters : (state -> string -> Message_collector.t -> unit) option
5050+ val end_document : (state -> Message_collector.t -> unit) option
5151+end
5252+5353+(** Functor to create a checker with default implementations for optional callbacks *)
5454+module Make (I : Input) : S with type state = I.state = struct
5555+ type state = I.state
5656+5757+ let create = I.create
5858+ let reset = I.reset
5959+ let start_element = I.start_element
6060+ let end_element = I.end_element
6161+6262+ let characters = match I.characters with
6363+ | Some f -> f
6464+ | None -> fun _ _ _ -> ()
6565+6666+ let end_document = match I.end_document with
6767+ | Some f -> f
6868+ | None -> fun _ _ -> ()
6969+end
+42
lib/htmlrw_check/checker.mli
···172172 (* Does nothing when walked over a DOM tree *)
173173 ]}
174174*)
175175+176176+(** {1 Checker Construction Helpers} *)
177177+178178+(** Input signature for {!Make} functor.
179179+180180+ Only the required callbacks need to be provided. Optional callbacks
181181+ (characters, end_document) default to no-op implementations. *)
182182+module type Input = sig
183183+ type state
184184+ val create : unit -> state
185185+ val reset : state -> unit
186186+ val start_element : state -> element:Element.t -> Message_collector.t -> unit
187187+ val end_element : state -> tag:Tag.element_tag -> Message_collector.t -> unit
188188+189189+ (** Optional: called for text content. Default: no-op. *)
190190+ val characters : (state -> string -> Message_collector.t -> unit) option
191191+192192+ (** Optional: called at document end. Default: no-op. *)
193193+ val end_document : (state -> Message_collector.t -> unit) option
194194+end
195195+196196+(** Functor to create a checker from an {!Input} module.
197197+198198+ This reduces boilerplate when creating checkers that don't need
199199+ to handle all events. The characters and end_document callbacks
200200+ default to no-ops if not provided.
201201+202202+ {b Example:}
203203+ {[
204204+ let checker = Checker.Make(struct
205205+ type state = { mutable count : int }
206206+ let create () = { count = 0 }
207207+ let reset s = s.count <- 0
208208+ let start_element s ~element collector =
209209+ s.count <- s.count + 1
210210+ let end_element _ ~tag:_ _ = ()
211211+ let characters = None (* Use default no-op *)
212212+ let end_document = None (* Use default no-op *)
213213+ end)
214214+ ]}
215215+*)
216216+module Make : functor (I : Input) -> S with type state = I.state
+141
lib/htmlrw_check/context_tracker.ml
···11+(** Reusable context/ancestor tracking for checkers.
22+33+ Many checkers need to track element ancestors, depth, or maintain
44+ context stacks during DOM traversal. This module provides common
55+ utilities to reduce duplication. *)
66+77+(** Generic stack-based context tracker. *)
88+module Stack : sig
99+ type 'a t
1010+1111+ (** Create an empty context stack. *)
1212+ val create : unit -> 'a t
1313+1414+ (** Reset the stack to empty. *)
1515+ val reset : 'a t -> unit
1616+1717+ (** Push a context onto the stack. *)
1818+ val push : 'a t -> 'a -> unit
1919+2020+ (** Pop a context from the stack. Returns None if empty. *)
2121+ val pop : 'a t -> 'a option
2222+2323+ (** Get the current (top) context without removing it. *)
2424+ val current : 'a t -> 'a option
2525+2626+ (** Get current depth (number of items on stack). *)
2727+ val depth : 'a t -> int
2828+2929+ (** Check if stack is empty. *)
3030+ val is_empty : 'a t -> bool
3131+3232+ (** Get all ancestors (bottom to top). *)
3333+ val to_list : 'a t -> 'a list
3434+3535+ (** Check if any ancestor satisfies predicate. *)
3636+ val exists : 'a t -> ('a -> bool) -> bool
3737+3838+ (** Find first ancestor satisfying predicate (top to bottom). *)
3939+ val find : 'a t -> ('a -> bool) -> 'a option
4040+4141+ (** Iterate over all contexts (top to bottom). *)
4242+ val iter : 'a t -> ('a -> unit) -> unit
4343+end = struct
4444+ type 'a t = { mutable stack : 'a list }
4545+4646+ let create () = { stack = [] }
4747+ let reset t = t.stack <- []
4848+ let push t x = t.stack <- x :: t.stack
4949+ let pop t = match t.stack with
5050+ | [] -> None
5151+ | x :: rest -> t.stack <- rest; Some x
5252+ let current t = match t.stack with
5353+ | [] -> None
5454+ | x :: _ -> Some x
5555+ let depth t = List.length t.stack
5656+ let is_empty t = t.stack = []
5757+ let to_list t = List.rev t.stack
5858+ let exists t f = List.exists f t.stack
5959+ let find t f = List.find_opt f t.stack
6060+ let iter t f = List.iter f t.stack
6161+end
6262+6363+(** Simple depth counter for tracking nesting level. *)
6464+module Depth : sig
6565+ type t
6666+6767+ (** Create a depth counter starting at 0. *)
6868+ val create : unit -> t
6969+7070+ (** Reset depth to 0. *)
7171+ val reset : t -> unit
7272+7373+ (** Increment depth (entering element). *)
7474+ val enter : t -> unit
7575+7676+ (** Decrement depth (leaving element). Returns false if was already 0. *)
7777+ val leave : t -> bool
7878+7979+ (** Get current depth. *)
8080+ val get : t -> int
8181+8282+ (** Check if inside (depth > 0). *)
8383+ val is_inside : t -> bool
8484+end = struct
8585+ type t = { mutable depth : int }
8686+8787+ let create () = { depth = 0 }
8888+ let reset t = t.depth <- 0
8989+ let enter t = t.depth <- t.depth + 1
9090+ let leave t =
9191+ if t.depth > 0 then begin
9292+ t.depth <- t.depth - 1;
9393+ true
9494+ end else false
9595+ let get t = t.depth
9696+ let is_inside t = t.depth > 0
9797+end
9898+9999+(** Element name stack for tracking ancestors by name. *)
100100+module Ancestors : sig
101101+ type t
102102+103103+ (** Create an empty ancestor tracker. *)
104104+ val create : unit -> t
105105+106106+ (** Reset to empty. *)
107107+ val reset : t -> unit
108108+109109+ (** Push an element name onto the ancestor stack. *)
110110+ val push : t -> string -> unit
111111+112112+ (** Pop an element from the ancestor stack. *)
113113+ val pop : t -> unit
114114+115115+ (** Get the immediate parent element name. *)
116116+ val parent : t -> string option
117117+118118+ (** Check if an element name is an ancestor. *)
119119+ val has_ancestor : t -> string -> bool
120120+121121+ (** Get depth (number of ancestors). *)
122122+ val depth : t -> int
123123+124124+ (** Get all ancestor names (outermost first). *)
125125+ val to_list : t -> string list
126126+end = struct
127127+ type t = { mutable stack : string list }
128128+129129+ let create () = { stack = [] }
130130+ let reset t = t.stack <- []
131131+ let push t name = t.stack <- name :: t.stack
132132+ let pop t = match t.stack with
133133+ | _ :: rest -> t.stack <- rest
134134+ | [] -> ()
135135+ let parent t = match t.stack with
136136+ | x :: _ -> Some x
137137+ | [] -> None
138138+ let has_ancestor t name = List.mem name t.stack
139139+ let depth t = List.length t.stack
140140+ let to_list t = List.rev t.stack
141141+end
+116
lib/htmlrw_check/context_tracker.mli
···11+(** Reusable context/ancestor tracking for checkers.
22+33+ Many checkers need to track element ancestors, depth, or maintain
44+ context stacks during DOM traversal. This module provides common
55+ utilities to reduce duplication across checkers.
66+77+ {2 Available Trackers}
88+99+ - {!Stack}: Generic stack for any context type
1010+ - {!Depth}: Simple integer depth counter
1111+ - {!Ancestors}: String-based element ancestor tracking *)
1212+1313+(** {2 Generic Stack} *)
1414+1515+(** Generic stack-based context tracker.
1616+1717+ Use this when you need to track complex state at each nesting level.
1818+ For example, tracking whether each ancestor has certain attributes. *)
1919+module Stack : sig
2020+ type 'a t
2121+2222+ (** Create an empty context stack. *)
2323+ val create : unit -> 'a t
2424+2525+ (** Reset the stack to empty. *)
2626+ val reset : 'a t -> unit
2727+2828+ (** Push a context onto the stack. *)
2929+ val push : 'a t -> 'a -> unit
3030+3131+ (** Pop a context from the stack. Returns None if empty. *)
3232+ val pop : 'a t -> 'a option
3333+3434+ (** Get the current (top) context without removing it. *)
3535+ val current : 'a t -> 'a option
3636+3737+ (** Get current depth (number of items on stack). *)
3838+ val depth : 'a t -> int
3939+4040+ (** Check if stack is empty. *)
4141+ val is_empty : 'a t -> bool
4242+4343+ (** Get all ancestors (bottom to top). *)
4444+ val to_list : 'a t -> 'a list
4545+4646+ (** Check if any ancestor satisfies predicate. *)
4747+ val exists : 'a t -> ('a -> bool) -> bool
4848+4949+ (** Find first ancestor satisfying predicate (top to bottom). *)
5050+ val find : 'a t -> ('a -> bool) -> 'a option
5151+5252+ (** Iterate over all contexts (top to bottom). *)
5353+ val iter : 'a t -> ('a -> unit) -> unit
5454+end
5555+5656+(** {2 Depth Counter} *)
5757+5858+(** Simple depth counter for tracking nesting level.
5959+6060+ Use this when you only need to know if you're inside a certain
6161+ element type, not the full context at each level. *)
6262+module Depth : sig
6363+ type t
6464+6565+ (** Create a depth counter starting at 0. *)
6666+ val create : unit -> t
6767+6868+ (** Reset depth to 0. *)
6969+ val reset : t -> unit
7070+7171+ (** Increment depth (entering element). *)
7272+ val enter : t -> unit
7373+7474+ (** Decrement depth (leaving element). Returns false if was already 0. *)
7575+ val leave : t -> bool
7676+7777+ (** Get current depth. *)
7878+ val get : t -> int
7979+8080+ (** Check if inside (depth > 0). *)
8181+ val is_inside : t -> bool
8282+end
8383+8484+(** {2 Ancestor Tracker} *)
8585+8686+(** Element name stack for tracking ancestors by name.
8787+8888+ Use this when you need to check if certain elements are ancestors,
8989+ but don't need complex context at each level. *)
9090+module Ancestors : sig
9191+ type t
9292+9393+ (** Create an empty ancestor tracker. *)
9494+ val create : unit -> t
9595+9696+ (** Reset to empty. *)
9797+ val reset : t -> unit
9898+9999+ (** Push an element name onto the ancestor stack. *)
100100+ val push : t -> string -> unit
101101+102102+ (** Pop an element from the ancestor stack. *)
103103+ val pop : t -> unit
104104+105105+ (** Get the immediate parent element name. *)
106106+ val parent : t -> string option
107107+108108+ (** Check if an element name is an ancestor. *)
109109+ val has_ancestor : t -> string -> bool
110110+111111+ (** Get depth (number of ancestors). *)
112112+ val depth : t -> int
113113+114114+ (** Get all ancestor names (outermost first). *)
115115+ val to_list : t -> string list
116116+end
+15
lib/htmlrw_check/datatype/datatype.ml
···4040 let end_pos = find_end (len - 1) in
4141 if start > end_pos then ""
4242 else String.sub s start (end_pos - start + 1)
4343+4444+(** Factory for creating enum-based validators.
4545+ Many HTML attributes accept a fixed set of keyword values. *)
4646+let make_enum ~name ~values ?(allow_empty = true) () : t =
4747+ let values_set = List.map String.lowercase_ascii values in
4848+ let values_str = String.concat ", " (List.map (Printf.sprintf "'%s'") values) in
4949+ (module struct
5050+ let name = name
5151+ let validate s =
5252+ let s_lower = string_to_ascii_lowercase s in
5353+ if (allow_empty && s = "") || List.mem s_lower values_set then Ok ()
5454+ else Error (Printf.sprintf "The value '%s' is not a valid %s value. Expected %s%s."
5555+ s name (if allow_empty then "empty string, " else "") values_str)
5656+ let is_valid s = Result.is_ok (validate s)
5757+ end : S)
+8
lib/htmlrw_check/datatype/datatype.mli
···43434444(** Trim HTML5 whitespace from both ends of a string. *)
4545val trim_html_spaces : string -> string
4646+4747+(** {2 Datatype Factories} *)
4848+4949+(** Create an enum-based validator for attributes with fixed keyword values.
5050+ @param name The datatype name (e.g., "loading", "crossorigin")
5151+ @param values List of valid keyword values (case-insensitive)
5252+ @param allow_empty Whether empty string is valid (default: true) *)
5353+val make_enum : name:string -> values:string list -> ?allow_empty:bool -> unit -> t
+26
lib/htmlrw_check/error_code.ml
···726726727727 (* Generic *)
728728 | `Generic message -> message
729729+730730+(** {2 Error Construction Helpers} *)
731731+732732+(** Create a bad attribute value error with element, attribute, value, and reason. *)
733733+let bad_value ~element ~attr ~value ~reason : t =
734734+ `Attr (`Bad_value (`Elem element, `Attr attr, `Value value, `Reason reason))
735735+736736+(** Create a bad attribute value error with just a message. *)
737737+let bad_value_msg msg : t =
738738+ `Attr (`Bad_value_generic (`Message msg))
739739+740740+(** Create a missing required attribute error. *)
741741+let missing_attr ~element ~attr : t =
742742+ `Attr (`Missing (`Elem element, `Attr attr))
743743+744744+(** Create an attribute not allowed error. *)
745745+let attr_not_allowed ~element ~attr : t =
746746+ `Attr (`Not_allowed (`Attr attr, `Elem element))
747747+748748+(** Create an element not allowed as child error. *)
749749+let not_allowed_as_child ~child ~parent : t =
750750+ `Element (`Not_allowed_as_child (`Child child, `Parent parent))
751751+752752+(** Create a must not be empty error. *)
753753+let must_not_be_empty ~element : t =
754754+ `Element (`Must_not_be_empty (`Elem element))
+28
lib/htmlrw_check/error_code.mli
···748748(** Format a string with Unicode curly quotes.
749749 Wraps the string in U+201C and U+201D ("..."). *)
750750val q : string -> string
751751+752752+(** {1 Error Construction Helpers}
753753+754754+ These functions simplify creating common error types. *)
755755+756756+(** Create a bad attribute value error.
757757+ Example: [bad_value ~element:"img" ~attr:"src" ~value:"" ~reason:"URL cannot be empty"] *)
758758+val bad_value : element:string -> attr:string -> value:string -> reason:string -> t
759759+760760+(** Create a bad attribute value error with just a message.
761761+ Example: [bad_value_msg "The value must be a valid URL"] *)
762762+val bad_value_msg : string -> t
763763+764764+(** Create a missing required attribute error.
765765+ Example: [missing_attr ~element:"img" ~attr:"alt"] *)
766766+val missing_attr : element:string -> attr:string -> t
767767+768768+(** Create an attribute not allowed error.
769769+ Example: [attr_not_allowed ~element:"span" ~attr:"href"] *)
770770+val attr_not_allowed : element:string -> attr:string -> t
771771+772772+(** Create an element not allowed as child error.
773773+ Example: [not_allowed_as_child ~child:"div" ~parent:"p"] *)
774774+val not_allowed_as_child : child:string -> parent:string -> t
775775+776776+(** Create a must not be empty error.
777777+ Example: [must_not_be_empty ~element:"title"] *)
778778+val must_not_be_empty : element:string -> t
+8-27
lib/htmlrw_check/specialized/h1_checker.mli
···11-(*---------------------------------------------------------------------------
22- Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33- SPDX-License-Identifier: MIT
44- ---------------------------------------------------------------------------*)
11+(** H1 element counter checker.
5266-(** H1 element counter and validator.
33+ This checker validates that documents don't have multiple h1 elements,
44+ which can confuse document structure and accessibility tools.
7588- This checker warns about multiple [<h1>] elements in a document.
99- While HTML5 technically allows multiple [<h1>] elements when using
1010- the document outline algorithm, this algorithm was never implemented
1111- by browsers and has been removed from the specification.
66+ {2 Validation Rules}
1271313- {2 Best Practice}
1414-1515- Documents should have exactly one [<h1>] element that represents the
1616- main heading of the page. Multiple [<h1>] elements can confuse users
1717- and assistive technologies about the document's structure.
1818-1919- {2 Special Cases}
2020-2121- - [<h1>] elements inside [<svg>] content (e.g., in [<foreignObject>])
2222- are not counted, as they may represent different content contexts
2323- - The checker reports a warning after the second [<h1>] is encountered
88+ - Documents should have at most one [<h1>] element
99+ - [<h1>] elements inside SVG content (foreignObject, desc) are not counted
24102511 {2 Error Messages}
26122727- Reports [Multiple_h1] when more than one [<h1>] element is found
2828- in the document.
2929-3030- @see <https://html.spec.whatwg.org/multipage/sections.html#the-h1,-h2,-h3,-h4,-h5,-and-h6-elements>
3131- HTML Standard: The h1-h6 elements
3232-*)
1313+ - [Multiple_h1]: Document contains more than one h1 element *)
33143415val checker : Checker.t
3535-(** The h1 element counter/validator instance. *)
1616+(** The H1 checker instance. *)
+14-2
lib/htmlrw_check/specialized/heading_checker.ml
···1414 mutable first_heading_checked : bool;
1515 mutable in_heading : Tag.html_tag option;
1616 mutable heading_has_text : bool;
1717+ mutable svg_depth : int; (* Track depth inside SVG - headings in SVG don't count *)
1718}
18191920let create () =
···2425 first_heading_checked = false;
2526 in_heading = None;
2627 heading_has_text = false;
2828+ svg_depth = 0;
2729 }
28302931let reset state =
···3234 state.has_any_heading <- false;
3335 state.first_heading_checked <- false;
3436 state.in_heading <- None;
3535- state.heading_has_text <- false
3737+ state.heading_has_text <- false;
3838+ state.svg_depth <- 0
36393740(** Check if text is effectively empty (only whitespace). *)
3841let is_empty_text text =
···48514952let start_element state ~element collector =
5053 match element.Element.tag with
5151- | Tag.Html (#Tag.heading_tag as h) ->
5454+ | Tag.Svg _ ->
5555+ (* Track SVG depth - headings inside SVG (foreignObject, desc) don't count *)
5656+ state.svg_depth <- state.svg_depth + 1
5757+ | Tag.Html (#Tag.heading_tag as h) when state.svg_depth = 0 ->
5258 let level = match Tag.heading_level h with Some l -> l | None -> 0 in
5359 let name = Tag.html_tag_to_string h in
5460 state.has_any_heading <- true;
···8995 | _ -> ()
90969197let end_element state ~tag collector =
9898+ (* Track SVG depth *)
9999+ (match tag with
100100+ | Tag.Svg _ when state.svg_depth > 0 ->
101101+ state.svg_depth <- state.svg_depth - 1
102102+ | _ -> ());
103103+ (* Check for empty headings *)
92104 match state.in_heading, tag with
93105 | Some h, Tag.Html h2 when h = h2 ->
94106 if not state.heading_has_text then