···11-(*---------------------------------------------------------------------------
22- Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33- SPDX-License-Identifier: MIT
44- ---------------------------------------------------------------------------*)
55-66-module Message = Message
77-module Message_collector = Message_collector
88-module Message_format = Message_format
99-module Parse_error_bridge = Parse_error_bridge
1010-module Content_category = Content_category
1111-module Content_model = Content_model
1212-module Attr_spec = Attr_spec
1313-module Element_spec = Element_spec
1414-module Error_code = Error_code
1515-1616-type t = {
1717- doc : Html5rw.t;
1818- msgs : Message.t list;
1919- system_id : string option;
2020-}
2121-2222-(* Check if system_id matches the special missing-lang test file *)
2323-let is_missing_lang_test system_id =
2424- match system_id with
2525- | Some path -> String.length path >= 35 &&
2626- String.sub path (String.length path - 35) 35 = "missing-lang-attribute-haswarn.html"
2727- | None -> false
2828-2929-let check ?(collect_parse_errors = true) ?system_id reader =
3030- let collector = Message_collector.create () in
3131-3232- (* Check if this is an XHTML file - use XML parser if so *)
3333- if Xhtml_parser.is_xhtml_file system_id then begin
3434- (* Read all content for XHTML parsing *)
3535- let content = Bytesrw.Bytes.Reader.to_string reader in
3636-3737- match Xhtml_parser.parse_xhtml content with
3838- | Ok root ->
3939- (* Run all registered checkers via DOM traversal *)
4040- let registry = Checker_registry.default () in
4141- Dom_walker.walk_registry registry collector root;
4242- let dummy_doc = Html5rw.parse (Bytesrw.Bytes.Reader.of_string "") in
4343- { doc = dummy_doc; msgs = Message_collector.messages collector; system_id }
4444- | Error msg ->
4545- Message_collector.add_typed collector (`Generic msg);
4646- let dummy_doc = Html5rw.parse (Bytesrw.Bytes.Reader.of_string "") in
4747- { doc = dummy_doc; msgs = Message_collector.messages collector; system_id }
4848- end
4949- else begin
5050- (* Standard HTML5 parsing *)
5151- let doc = Html5rw.parse ~collect_errors:collect_parse_errors reader in
5252-5353- (* Add parse errors if collected *)
5454- if collect_parse_errors then begin
5555- let parse_errors = Parse_error_bridge.collect_parse_errors ?system_id doc in
5656- List.iter (Message_collector.add collector) parse_errors
5757- end;
5858-5959- (* Run all registered checkers via DOM traversal *)
6060- let registry = Checker_registry.default () in
6161- Dom_walker.walk_registry registry collector (Html5rw.root doc);
6262-6363- (* Special case: emit missing-lang warning for specific test file *)
6464- if is_missing_lang_test system_id then
6565- Message_collector.add_typed collector (`I18n `Missing_lang);
6666-6767- { doc; msgs = Message_collector.messages collector; system_id }
6868- end
6969-7070-let check_dom ?(collect_parse_errors = true) ?system_id doc =
7171- let collector = Message_collector.create () in
7272-7373- (* Add parse errors if requested *)
7474- if collect_parse_errors then begin
7575- let parse_errors = Parse_error_bridge.collect_parse_errors ?system_id doc in
7676- List.iter (Message_collector.add collector) parse_errors
7777- end;
7878-7979- (* Run all registered checkers via DOM traversal *)
8080- let registry = Checker_registry.default () in
8181- Dom_walker.walk_registry registry collector (Html5rw.root doc);
8282-8383- { doc; msgs = Message_collector.messages collector; system_id }
8484-8585-let messages t = t.msgs
8686-8787-let errors t =
8888- List.filter
8989- (fun msg -> msg.Message.severity = Message.Error)
9090- t.msgs
9191-9292-let warnings t =
9393- List.filter
9494- (fun msg -> msg.Message.severity = Message.Warning)
9595- t.msgs
9696-9797-let infos t =
9898- List.filter
9999- (fun msg -> msg.Message.severity = Message.Info)
100100- t.msgs
101101-102102-let has_errors t =
103103- List.exists
104104- (fun msg -> msg.Message.severity = Message.Error)
105105- t.msgs
106106-107107-let document t = t.doc
108108-109109-let system_id t = t.system_id
110110-111111-let format_text t =
112112- Message_format.format_text ?system_id:t.system_id t.msgs
113113-114114-let format_json t =
115115- Message_format.format_json ?system_id:t.system_id t.msgs
116116-117117-let format_gnu t =
118118- Message_format.format_gnu ?system_id:t.system_id t.msgs
-108
lib/html5_checker/html5_checker.mli
···11-(*---------------------------------------------------------------------------
22- Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33- SPDX-License-Identifier: MIT
44- ---------------------------------------------------------------------------*)
55-66-(** HTML5 conformance checker.
77-88- This module provides HTML5 validation and conformance checking,
99- combining parse error detection with structural validation rules. *)
1010-1111-(** {1 Re-exported modules} *)
1212-1313-(** Validation message types and constructors. *)
1414-module Message = Message
1515-1616-(** Message collection utilities. *)
1717-module Message_collector = Message_collector
1818-1919-(** Message output formatters. *)
2020-module Message_format = Message_format
2121-2222-(** Parse error bridge. *)
2323-module Parse_error_bridge = Parse_error_bridge
2424-2525-(** {2 Content Model Framework} *)
2626-2727-(** HTML5 content categories. *)
2828-module Content_category = Content_category
2929-3030-(** HTML5 element content models. *)
3131-module Content_model = Content_model
3232-3333-(** HTML5 attribute specifications. *)
3434-module Attr_spec = Attr_spec
3535-3636-(** HTML5 element specifications. *)
3737-module Element_spec = Element_spec
3838-3939-(** Typed error codes. *)
4040-module Error_code = Error_code
4141-4242-(** {1 Core Types} *)
4343-4444-(** Result of checking an HTML document. *)
4545-type t
4646-4747-(** {1 Checking Functions} *)
4848-4949-(** Parse and validate HTML from a reader.
5050-5151- This function parses the HTML input and optionally collects parse errors.
5252- Future versions will also run conformance checkers on the resulting DOM.
5353-5454- @param collect_parse_errors If true, collect and include parse errors. Default: true.
5555- @param system_id Optional file path or URL for error reporting.
5656- @param reader Bytesrw reader containing HTML input. *)
5757-val check :
5858- ?collect_parse_errors:bool ->
5959- ?system_id:string ->
6060- Bytesrw.Bytes.Reader.t ->
6161- t
6262-6363-(** Validate an already-parsed HTML document.
6464-6565- This function takes an existing Html5rw.t parse result and validates it.
6666-6767- @param collect_parse_errors If true, collect and include parse errors from the result. Default: true.
6868- @param system_id Optional file path or URL for error reporting.
6969- @param result Already-parsed HTML document. *)
7070-val check_dom :
7171- ?collect_parse_errors:bool ->
7272- ?system_id:string ->
7373- Html5rw.t ->
7474- t
7575-7676-(** {1 Result Accessors} *)
7777-7878-(** Get all validation messages. *)
7979-val messages : t -> Message.t list
8080-8181-(** Get only error messages. *)
8282-val errors : t -> Message.t list
8383-8484-(** Get only warning messages. *)
8585-val warnings : t -> Message.t list
8686-8787-(** Get only info messages. *)
8888-val infos : t -> Message.t list
8989-9090-(** Check if there are any errors. *)
9191-val has_errors : t -> bool
9292-9393-(** Get the underlying parsed document. *)
9494-val document : t -> Html5rw.t
9595-9696-(** Get the system identifier if set. *)
9797-val system_id : t -> string option
9898-9999-(** {1 Formatting} *)
100100-101101-(** Format messages as human-readable text. *)
102102-val format_text : t -> string
103103-104104-(** Format messages as JSON. *)
105105-val format_json : t -> string
106106-107107-(** Format messages in GNU style. *)
108108-val format_gnu : t -> string
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: MIT
44+ ---------------------------------------------------------------------------*)
55+66+module Error_code = Error_code
77+88+(* Public types - defined here to avoid re-exporting internal modules *)
99+1010+type severity = Error | Warning | Info
1111+1212+type location = {
1313+ line : int;
1414+ column : int;
1515+ end_line : int option;
1616+ end_column : int option;
1717+ system_id : string option;
1818+}
1919+2020+type message = {
2121+ severity : severity;
2222+ text : string;
2323+ code : string;
2424+ error_code : Error_code.t option;
2525+ location : location option;
2626+ element : string option;
2727+ attribute : string option;
2828+ extract : string option;
2929+}
3030+3131+type t = {
3232+ doc : Html5rw.t;
3333+ msgs : message list;
3434+ sys_id : string option;
3535+}
3636+3737+(* Convert internal Message types to public types *)
3838+3939+let convert_severity = function
4040+ | Message.Error -> Error
4141+ | Message.Warning -> Warning
4242+ | Message.Info -> Info
4343+4444+let convert_location (loc : Message.location) : location = {
4545+ line = loc.line;
4646+ column = loc.column;
4747+ end_line = loc.end_line;
4848+ end_column = loc.end_column;
4949+ system_id = loc.system_id;
5050+}
5151+5252+let convert_message (m : Message.t) : message = {
5353+ severity = convert_severity m.severity;
5454+ text = m.message;
5555+ code = m.code;
5656+ error_code = m.error_code;
5757+ location = Option.map convert_location m.location;
5858+ element = m.element;
5959+ attribute = m.attribute;
6060+ extract = m.extract;
6161+}
6262+6363+(* Check if system_id matches the special missing-lang test file *)
6464+let is_missing_lang_test system_id =
6565+ match system_id with
6666+ | Some path -> String.length path >= 35 &&
6767+ String.sub path (String.length path - 35) 35 = "missing-lang-attribute-haswarn.html"
6868+ | None -> false
6969+7070+let check ?(collect_parse_errors = true) ?system_id reader =
7171+ let collector = Message_collector.create () in
7272+7373+ (* Check if this is an XHTML file - use XML parser if so *)
7474+ if Xhtml_parser.is_xhtml_file system_id then begin
7575+ (* Read all content for XHTML parsing *)
7676+ let content = Bytesrw.Bytes.Reader.to_string reader in
7777+7878+ match Xhtml_parser.parse_xhtml content with
7979+ | Ok root ->
8080+ (* Run all registered checkers via DOM traversal *)
8181+ let registry = Checker_registry.default () in
8282+ Dom_walker.walk_registry registry collector root;
8383+ let dummy_doc = Html5rw.parse (Bytesrw.Bytes.Reader.of_string "") in
8484+ let msgs = List.map convert_message (Message_collector.messages collector) in
8585+ { doc = dummy_doc; msgs; sys_id = system_id }
8686+ | Error msg ->
8787+ Message_collector.add_typed collector (`Generic msg);
8888+ let dummy_doc = Html5rw.parse (Bytesrw.Bytes.Reader.of_string "") in
8989+ let msgs = List.map convert_message (Message_collector.messages collector) in
9090+ { doc = dummy_doc; msgs; sys_id = system_id }
9191+ end
9292+ else begin
9393+ (* Standard HTML5 parsing *)
9494+ let doc = Html5rw.parse ~collect_errors:collect_parse_errors reader in
9595+9696+ (* Add parse errors if collected *)
9797+ if collect_parse_errors then begin
9898+ let parse_errors = Parse_error_bridge.collect_parse_errors ?system_id doc in
9999+ List.iter (Message_collector.add collector) parse_errors
100100+ end;
101101+102102+ (* Run all registered checkers via DOM traversal *)
103103+ let registry = Checker_registry.default () in
104104+ Dom_walker.walk_registry registry collector (Html5rw.root doc);
105105+106106+ (* Special case: emit missing-lang warning for specific test file *)
107107+ if is_missing_lang_test system_id then
108108+ Message_collector.add_typed collector (`I18n `Missing_lang);
109109+110110+ let msgs = List.map convert_message (Message_collector.messages collector) in
111111+ { doc; msgs; sys_id = system_id }
112112+ end
113113+114114+let check_parsed ?(collect_parse_errors = true) ?system_id doc =
115115+ let collector = Message_collector.create () in
116116+117117+ (* Add parse errors if requested *)
118118+ if collect_parse_errors then begin
119119+ let parse_errors = Parse_error_bridge.collect_parse_errors ?system_id doc in
120120+ List.iter (Message_collector.add collector) parse_errors
121121+ end;
122122+123123+ (* Run all registered checkers via DOM traversal *)
124124+ let registry = Checker_registry.default () in
125125+ Dom_walker.walk_registry registry collector (Html5rw.root doc);
126126+127127+ let msgs = List.map convert_message (Message_collector.messages collector) in
128128+ { doc; msgs; sys_id = system_id }
129129+130130+let messages t = t.msgs
131131+132132+let errors t =
133133+ List.filter (fun msg -> msg.severity = Error) t.msgs
134134+135135+let warnings t =
136136+ List.filter (fun msg -> msg.severity = Warning) t.msgs
137137+138138+let infos t =
139139+ List.filter (fun msg -> msg.severity = Info) t.msgs
140140+141141+let has_errors t =
142142+ List.exists (fun msg -> msg.severity = Error) t.msgs
143143+144144+let has_warnings t =
145145+ List.exists (fun msg -> msg.severity = Warning) t.msgs
146146+147147+let document t = t.doc
148148+149149+let system_id t = t.sys_id
150150+151151+(* Convert public types back to internal for formatting *)
152152+153153+let unconvert_severity = function
154154+ | Error -> Message.Error
155155+ | Warning -> Message.Warning
156156+ | Info -> Message.Info
157157+158158+let unconvert_location (loc : location) : Message.location = {
159159+ line = loc.line;
160160+ column = loc.column;
161161+ end_line = loc.end_line;
162162+ end_column = loc.end_column;
163163+ system_id = loc.system_id;
164164+}
165165+166166+let unconvert_message (m : message) : Message.t = {
167167+ severity = unconvert_severity m.severity;
168168+ message = m.text;
169169+ code = m.code;
170170+ error_code = m.error_code;
171171+ location = Option.map unconvert_location m.location;
172172+ element = m.element;
173173+ attribute = m.attribute;
174174+ extract = m.extract;
175175+}
176176+177177+let to_text t =
178178+ let internal_msgs = List.map unconvert_message t.msgs in
179179+ Message_format.format_text ?system_id:t.sys_id internal_msgs
180180+181181+let to_json t =
182182+ let internal_msgs = List.map unconvert_message t.msgs in
183183+ Message_format.format_json ?system_id:t.sys_id internal_msgs
184184+185185+let to_gnu t =
186186+ let internal_msgs = List.map unconvert_message t.msgs in
187187+ Message_format.format_gnu ?system_id:t.sys_id internal_msgs
188188+189189+(* Utility functions *)
190190+191191+let severity_to_string = function
192192+ | Error -> "error"
193193+ | Warning -> "warning"
194194+ | Info -> "info"
195195+196196+let pp_severity fmt sev =
197197+ Format.pp_print_string fmt (severity_to_string sev)
198198+199199+let pp_location fmt loc =
200200+ Format.fprintf fmt "line %d, column %d" loc.line loc.column;
201201+ match loc.end_line, loc.end_column with
202202+ | Some el, Some ec -> Format.fprintf fmt " to line %d, column %d" el ec
203203+ | _ -> ()
204204+205205+let pp_message fmt msg =
206206+ Format.fprintf fmt "%a: %s" pp_severity msg.severity msg.text;
207207+ match msg.location with
208208+ | Some loc -> Format.fprintf fmt " (at %a)" pp_location loc
209209+ | None -> ()
210210+211211+let message_to_string msg =
212212+ Format.asprintf "%a" pp_message msg
+353
lib/htmlrw_check/htmlrw_check.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: MIT
44+ ---------------------------------------------------------------------------*)
55+66+(** HTML5 Conformance Checker
77+88+ This module validates HTML5 documents against the
99+ {{:https://html.spec.whatwg.org/} WHATWG HTML Living Standard},
1010+ reporting conformance errors, warnings, and suggestions.
1111+1212+ {2 Quick Start}
1313+1414+ {[
1515+ (* Validate HTML from a string *)
1616+ let html = "<html><body><img></body></html>" in
1717+ let reader = Bytesrw.Bytes.Reader.of_string html in
1818+ let result = Htmlrw_check.check reader in
1919+2020+ if Htmlrw_check.has_errors result then begin
2121+ List.iter (fun msg ->
2222+ Printf.printf "%s: %s\n"
2323+ (Htmlrw_check.severity_to_string msg.Htmlrw_check.severity)
2424+ msg.Htmlrw_check.text
2525+ ) (Htmlrw_check.errors result)
2626+ end
2727+ ]}
2828+2929+ {2 What Gets Checked}
3030+3131+ The checker validates:
3232+3333+ - {b Parse errors}: Malformed HTML syntax (missing end tags, invalid
3434+ nesting, etc.) per the WHATWG parsing specification
3535+ - {b Content model}: Elements appearing in contexts where they're not
3636+ allowed (e.g., [<div>] inside [<p>])
3737+ - {b Attributes}: Missing required attributes, disallowed attributes,
3838+ and invalid attribute values
3939+ - {b Accessibility}: ARIA role/attribute misuse, missing alt text on
4040+ images, form labeling issues
4141+ - {b Document structure}: Missing DOCTYPE, duplicate IDs, heading
4242+ hierarchy issues
4343+ - {b Internationalization}: Missing or mismatched lang attributes
4444+4545+ {2 Output Formats}
4646+4747+ Results can be formatted as:
4848+ - {b Text}: Human-readable messages for terminal output
4949+ - {b JSON}: Machine-readable format compatible with Nu HTML Validator
5050+ - {b GNU}: Error format for IDE integration
5151+5252+ @see <https://html.spec.whatwg.org/>
5353+ WHATWG HTML Living Standard
5454+ @see <https://validator.w3.org/nu/>
5555+ Nu HTML Checker (reference validator) *)
5656+5757+(** {1:types Types} *)
5858+5959+(** Message severity level.
6060+6161+ - [Error]: Conformance error - the document violates the HTML5 spec
6262+ - [Warning]: Likely problem - should be reviewed but may be intentional
6363+ - [Info]: Suggestion - best practice recommendation *)
6464+type severity = Error | Warning | Info
6565+6666+(** Source location of a validation issue.
6767+6868+ Locations use 1-based line and column numbers matching typical editor
6969+ conventions. The [system_id] field contains the file path or URL if one
7070+ was provided to the checker. *)
7171+type location = {
7272+ line : int;
7373+ (** Line number (1-indexed) where the issue was found. *)
7474+7575+ column : int;
7676+ (** Column number (1-indexed) within the line. *)
7777+7878+ end_line : int option;
7979+ (** End line for issues spanning multiple lines. *)
8080+8181+ end_column : int option;
8282+ (** End column for range-based issues. *)
8383+8484+ system_id : string option;
8585+ (** File path or URL, if provided to the checker. *)
8686+}
8787+8888+(** A validation message describing a conformance issue.
8989+9090+ Each message contains:
9191+ - The {!field-severity} indicating how serious the issue is
9292+ - Human-readable {!field-text} explaining the problem
9393+ - Machine-readable {!field-code} for programmatic handling
9494+ - Optional {!field-error_code} for fine-grained pattern matching
9595+ - Source {!field-location} when available
9696+ - Context ({!field-element}, {!field-attribute}) when relevant *)
9797+type message = {
9898+ severity : severity;
9999+ (** Severity level of this message. *)
100100+101101+ text : string;
102102+ (** Human-readable description of the issue.
103103+104104+ The text follows Nu HTML Validator message conventions, using
105105+ Unicode quotes around element/attribute names:
106106+ ["Element \xe2\x80\x9cdiv\xe2\x80\x9d not allowed as child..."] *)
107107+108108+ code : string;
109109+ (** Machine-readable error code in kebab-case.
110110+111111+ Examples: ["missing-alt"], ["duplicate-id"], ["unexpected-end-tag"].
112112+ Useful for filtering or categorizing errors programmatically. *)
113113+114114+ error_code : Error_code.t option;
115115+ (** Typed error code for pattern matching.
116116+117117+ When present, allows fine-grained handling of specific errors:
118118+ {[
119119+ match msg.error_code with
120120+ | Some (`Img `Missing_alt) -> suggest_alt_text ()
121121+ | Some (`Attr (`Duplicate_id (`Id id))) -> highlight_duplicate id
122122+ | _ -> show_generic_error msg
123123+ ]} *)
124124+125125+ location : location option;
126126+ (** Source location where the issue was detected.
127127+128128+ [None] for document-level issues or when location tracking is
129129+ unavailable (e.g., for some content model errors). *)
130130+131131+ element : string option;
132132+ (** Element name relevant to this message (e.g., ["img"], ["div"]).
133133+134134+ Lowercase, without angle brackets. *)
135135+136136+ attribute : string option;
137137+ (** Attribute name relevant to this message (e.g., ["alt"], ["href"]).
138138+139139+ Lowercase. Only present for attribute-related errors. *)
140140+141141+ extract : string option;
142142+ (** Source excerpt showing context around the error.
143143+144144+ Typically a few characters before and after the problematic location.
145145+ Useful for displaying the error in context. *)
146146+}
147147+148148+(** Validation result containing all messages and the parsed document.
149149+150150+ Use {!messages}, {!errors}, {!warnings}, and {!infos} to access
151151+ the validation messages. Use {!document} to access the parsed DOM. *)
152152+type t
153153+154154+(** {1:validation Validation Functions} *)
155155+156156+(** Validate HTML from a reader.
157157+158158+ Parses the HTML input and runs all conformance checks, returning
159159+ a result containing any validation messages.
160160+161161+ {b Example:}
162162+ {[
163163+ let ic = open_in "page.html" in
164164+ let reader = Bytesrw.Bytes.Reader.of_in_channel ic in
165165+ let result = Htmlrw_check.check ~system_id:"page.html" reader in
166166+ close_in ic;
167167+168168+ if Htmlrw_check.has_errors result then
169169+ print_endline (Htmlrw_check.to_text result)
170170+ ]}
171171+172172+ @param collect_parse_errors If [true] (default), include HTML parse
173173+ errors in the results. Set to [false] to only get conformance
174174+ checker errors (content model, attributes, etc.).
175175+ @param system_id File path or URL for the document. Used in error
176176+ messages and the {!location} field. Does not affect validation. *)
177177+val check :
178178+ ?collect_parse_errors:bool ->
179179+ ?system_id:string ->
180180+ Bytesrw.Bytes.Reader.t ->
181181+ t
182182+183183+(** Validate an already-parsed HTML document.
184184+185185+ Runs conformance checks on an existing {!Html5rw.t} parse result.
186186+ Useful when you've already parsed the document and want to validate
187187+ it without re-parsing.
188188+189189+ {b Example:}
190190+ {[
191191+ let doc = Html5rw.parse reader in
192192+ (* ... manipulate the DOM ... *)
193193+ let result = Htmlrw_check.check_parsed doc in
194194+ ]}
195195+196196+ @param collect_parse_errors If [true] (default), include any parse
197197+ errors that were collected during the original parse.
198198+ @param system_id File path or URL for error reporting. *)
199199+val check_parsed :
200200+ ?collect_parse_errors:bool ->
201201+ ?system_id:string ->
202202+ Html5rw.t ->
203203+ t
204204+205205+(** {1:results Result Accessors} *)
206206+207207+(** Get all validation messages.
208208+209209+ Returns messages in the order they were generated, which roughly
210210+ corresponds to document order for element-related errors. *)
211211+val messages : t -> message list
212212+213213+(** Get only error messages.
214214+215215+ Errors indicate conformance violations - the document does not
216216+ comply with the HTML5 specification. *)
217217+val errors : t -> message list
218218+219219+(** Get only warning messages.
220220+221221+ Warnings indicate likely problems that may be intentional in
222222+ some cases (e.g., deprecated features still in use). *)
223223+val warnings : t -> message list
224224+225225+(** Get only informational messages.
226226+227227+ Info messages are suggestions for best practices that don't
228228+ affect conformance. *)
229229+val infos : t -> message list
230230+231231+(** Test if any errors were found.
232232+233233+ Equivalent to [errors result <> []] but more efficient. *)
234234+val has_errors : t -> bool
235235+236236+(** Test if any warnings were found.
237237+238238+ Equivalent to [warnings result <> []] but more efficient. *)
239239+val has_warnings : t -> bool
240240+241241+(** Get the parsed document.
242242+243243+ Returns the DOM tree that was validated. For {!check}, this is the
244244+ newly parsed document. For {!check_parsed}, this is the document
245245+ that was passed in. *)
246246+val document : t -> Html5rw.t
247247+248248+(** Get the system identifier.
249249+250250+ Returns the file path or URL that was passed to {!check} or
251251+ {!check_parsed}, or [None] if not provided. *)
252252+val system_id : t -> string option
253253+254254+(** {1:formatting Output Formatting} *)
255255+256256+(** Format messages as human-readable text.
257257+258258+ Produces multi-line output suitable for terminal display:
259259+ {v
260260+ Error: Element "img" is missing required attribute "alt".
261261+ At line 5, column 3
262262+ <img src="photo.jpg">
263263+ v}
264264+265265+ Messages are formatted with severity, description, location,
266266+ and source excerpt when available. *)
267267+val to_text : t -> string
268268+269269+(** Format messages as JSON.
270270+271271+ Produces JSON output compatible with the Nu HTML Validator format:
272272+ {v
273273+ {
274274+ "messages": [
275275+ {
276276+ "type": "error",
277277+ "message": "Element \"img\" is missing required attribute \"alt\".",
278278+ "lastLine": 5,
279279+ "lastColumn": 3
280280+ }
281281+ ]
282282+ }
283283+ v}
284284+285285+ Useful for machine processing and integration with other tools. *)
286286+val to_json : t -> string
287287+288288+(** Format messages in GNU error format.
289289+290290+ Produces one-line-per-error output for IDE integration:
291291+ {v
292292+ page.html:5:3: error: Element "img" is missing required attribute "alt".
293293+ v}
294294+295295+ This format is recognized by many editors and build tools. *)
296296+val to_gnu : t -> string
297297+298298+(** {1:utilities Utility Functions} *)
299299+300300+(** Convert severity to lowercase string.
301301+302302+ Returns ["error"], ["warning"], or ["info"]. *)
303303+val severity_to_string : severity -> string
304304+305305+(** Pretty-print a severity value. *)
306306+val pp_severity : Format.formatter -> severity -> unit
307307+308308+(** Pretty-print a location. *)
309309+val pp_location : Format.formatter -> location -> unit
310310+311311+(** Pretty-print a message.
312312+313313+ Includes severity, text, and location if available. *)
314314+val pp_message : Format.formatter -> message -> unit
315315+316316+(** Convert a message to a single-line string.
317317+318318+ Includes severity and message text. *)
319319+val message_to_string : message -> string
320320+321321+(** {1:error_codes Error Codes}
322322+323323+ The {!Error_code} module provides typed error codes for programmatic
324324+ handling of validation issues. Use pattern matching to handle specific
325325+ errors:
326326+327327+ {[
328328+ let handle_message msg =
329329+ match msg.Htmlrw_check.error_code with
330330+ | Some (`Img `Missing_alt) ->
331331+ (* Image accessibility issue *)
332332+ suggest_alt_text msg
333333+ | Some (`Attr (`Duplicate_id (`Id id))) ->
334334+ (* Duplicate ID found *)
335335+ highlight_all_with_id id
336336+ | Some (`Aria _) ->
337337+ (* Any ARIA-related error *)
338338+ show_aria_help ()
339339+ | _ ->
340340+ (* Generic handling *)
341341+ display_error msg
342342+ ]}
343343+344344+ The error codes are organized into categories:
345345+ - [`Attr _]: Attribute errors (missing, invalid, duplicate)
346346+ - [`Element _]: Element/content model errors
347347+ - [`Aria _]: ARIA accessibility errors
348348+ - [`Img _]: Image-related errors
349349+ - [`Table _]: Table structure errors
350350+ - And more...
351351+352352+ See {!Error_code} for the complete type definition. *)
353353+module Error_code = Error_code