···3434 ]}
3535*)
36363737+(** {1 Error Handling} *)
3838+3939+(** Global error type that wraps all errors raised by the Html5rw library.
4040+4141+ This provides a unified error type for all parsing and selector errors,
4242+ along with printers for display and debugging.
4343+*)
4444+module Error = struct
4545+ (** The unified error type for the Html5rw library. *)
4646+ type t =
4747+ | Parse_error of {
4848+ code : Parse_error_code.t;
4949+ line : int;
5050+ column : int;
5151+ }
5252+ (** An HTML parse error, including location information. *)
5353+ | Selector_error of Selector.Error_code.t
5454+ (** A CSS selector parse error. *)
5555+5656+ let of_parse_error (err : Parser.parse_error) : t =
5757+ Parse_error {
5858+ code = Parser.error_code err;
5959+ line = Parser.error_line err;
6060+ column = Parser.error_column err;
6161+ }
6262+6363+ let of_selector_error (code : Selector.Error_code.t) : t =
6464+ Selector_error code
6565+6666+ let to_string = function
6767+ | Parse_error { code; line; column } ->
6868+ Printf.sprintf "Parse error at %d:%d: %s" line column
6969+ (Parse_error_code.to_string code)
7070+ | Selector_error code ->
7171+ Printf.sprintf "Selector error: %s"
7272+ (Selector.Error_code.to_human_string code)
7373+7474+ let pp fmt err = Format.pp_print_string fmt (to_string err)
7575+7676+ (** Get the error code as a kebab-case string. *)
7777+ let code_string = function
7878+ | Parse_error { code; _ } -> Parse_error_code.to_string code
7979+ | Selector_error code -> Selector.Error_code.to_string code
8080+end
8181+3782(** {1 Sub-modules} *)
38833984(** Parse error code types *)
+89
lib/html5rw/html5rw.mli
···372372 Column numbers count from 1 and reset at each newline. *)
373373val error_column : parse_error -> int
374374375375+(** {1 Error Handling} *)
376376+377377+(** Global error type that wraps all errors raised by the Html5rw library.
378378+379379+ This module provides a unified error type for all parsing and selector
380380+ errors, along with printers and conversion functions. Use this when you
381381+ want to handle all possible errors from the library in a uniform way.
382382+383383+ {2 Usage}
384384+385385+ {[
386386+ (* Converting parse errors *)
387387+ let errors = Html5rw.errors result in
388388+ List.iter (fun err ->
389389+ let unified = Html5rw.Error.of_parse_error err in
390390+ Printf.eprintf "%s\n" (Html5rw.Error.to_string unified)
391391+ ) errors
392392+393393+ (* Catching selector errors *)
394394+ match Html5rw.query result selector with
395395+ | nodes -> (* success *)
396396+ | exception Html5rw.Selector.Selector_error code ->
397397+ let unified = Html5rw.Error.of_selector_error code in
398398+ Printf.eprintf "%s\n" (Html5rw.Error.to_string unified)
399399+ ]}
400400+*)
401401+module Error : sig
402402+ (** The unified error type for the Html5rw library. *)
403403+ type t =
404404+ | Parse_error of {
405405+ code : Parse_error_code.t;
406406+ line : int;
407407+ column : int;
408408+ }
409409+ (** An HTML parse error, including location information.
410410+411411+ Parse errors occur during HTML tokenization and tree construction.
412412+ The location indicates where in the input the error was detected.
413413+414414+ @see <https://html.spec.whatwg.org/multipage/parsing.html#parse-errors>
415415+ WHATWG: Parse errors *)
416416+417417+ | Selector_error of Selector.Error_code.t
418418+ (** A CSS selector parse error.
419419+420420+ Selector errors occur when parsing malformed CSS selectors passed
421421+ to {!query} or {!matches}. *)
422422+423423+ val of_parse_error : parse_error -> t
424424+ (** Convert a parse error to the unified error type.
425425+426426+ {[
427427+ let errors = Html5rw.errors result in
428428+ let unified_errors = List.map Html5rw.Error.of_parse_error errors
429429+ ]} *)
430430+431431+ val of_selector_error : Selector.Error_code.t -> t
432432+ (** Convert a selector error code to the unified error type.
433433+434434+ {[
435435+ match Html5rw.query result "invalid[" with
436436+ | _ -> ()
437437+ | exception Html5rw.Selector.Selector_error code ->
438438+ let err = Html5rw.Error.of_selector_error code in
439439+ Printf.eprintf "%s\n" (Html5rw.Error.to_string err)
440440+ ]} *)
441441+442442+ val to_string : t -> string
443443+ (** Convert to a human-readable error message with location information.
444444+445445+ Examples:
446446+ - ["Parse error at 5:12: unexpected-null-character"]
447447+ - ["Selector error: Expected \]"] *)
448448+449449+ val pp : Format.formatter -> t -> unit
450450+ (** Pretty-printer for use with [Format] functions. *)
451451+452452+ val code_string : t -> string
453453+ (** Get just the error code as a kebab-case string (without location).
454454+455455+ This is useful for programmatic error handling or logging.
456456+457457+ Examples:
458458+ - ["unexpected-null-character"]
459459+ - ["expected-closing-bracket"] *)
460460+end
461461+462462+(** {1 Fragment Parsing} *)
463463+375464(** Context element for HTML fragment parsing (innerHTML).
376465377466 When parsing HTML fragments (like the [innerHTML] of an element), you
+6-1
lib/html5rw/selector/selector.ml
···5858 ]}
5959*)
60606161+(** {1 Error Types} *)
6262+6363+(** CSS selector error codes. *)
6464+module Error_code = Selector_error_code
6565+6166(** {1 Exceptions} *)
62676368(** Raised when a selector string is malformed.
64696565- The exception contains an error message describing the parse error.
7070+ The exception contains a typed error code describing the parse error.
6671*)
6772exception Selector_error = Selector_lexer.Selector_error
6873
+55-2
lib/html5rw/selector/selector.mli
···5858 ]}
5959*)
60606161+(** {1 Error Types} *)
6262+6363+(** CSS selector error codes.
6464+6565+ This module provides the {!Error_code.t} variant type that represents
6666+ all possible errors when parsing CSS selectors.
6767+*)
6868+module Error_code : sig
6969+ type t =
7070+ | Empty_selector
7171+ (** The selector string was empty or contained only whitespace. *)
7272+ | Unterminated_string
7373+ (** A quoted string was not closed before end of input. *)
7474+ | Unterminated_escape
7575+ (** An escape sequence was not completed before end of input. *)
7676+ | Expected_identifier_after_hash
7777+ (** Expected an identifier after [#] for ID selector. *)
7878+ | Expected_identifier_after_dot
7979+ (** Expected an identifier after [.] for class selector. *)
8080+ | Expected_attribute_name
8181+ (** Expected an attribute name inside an attribute selector. *)
8282+ | Expected_closing_bracket
8383+ (** Expected [\]] to close an attribute selector. *)
8484+ | Expected_equals_after_operator of char
8585+ (** Expected [=] after an attribute operator like [~], [|], [^], [$], or [*]. *)
8686+ | Unexpected_character_in_attribute_selector
8787+ (** Found an unexpected character inside an attribute selector. *)
8888+ | Expected_pseudo_class_name
8989+ (** Expected a pseudo-class name after [:]. *)
9090+ | Expected_closing_paren
9191+ (** Expected [)] to close a pseudo-class argument. *)
9292+ | Unexpected_character of char
9393+ (** Found an unexpected character in the selector. *)
9494+ | Expected_attribute_value
9595+ (** Expected a value after the attribute operator. *)
9696+ | Expected_closing_bracket_or_operator
9797+ (** Expected [\]] or an attribute operator like [=]. *)
9898+ | Expected_selector_after_combinator
9999+ (** Expected a selector after a combinator ([>], [+], [~], or space). *)
100100+ | Unexpected_token
101101+ (** Found an unexpected token in the selector. *)
102102+ | Expected_end_of_selector
103103+ (** Expected end of selector but found more tokens. *)
104104+105105+ val to_string : t -> string
106106+ (** Convert to a kebab-case string identifier suitable for programmatic use. *)
107107+108108+ val to_human_string : t -> string
109109+ (** Convert to a human-readable error message. *)
110110+end
111111+61112(** {1 Exceptions} *)
621136363-exception Selector_error of string
114114+exception Selector_error of Error_code.t
64115(** Raised when a selector string is malformed.
651166666- The exception contains an error message describing the parse error.
117117+ The exception contains a typed error code describing the parse error.
118118+ Use {!Error_code.to_string} or {!Error_code.to_human_string} to get
119119+ a string representation.
67120*)
6812169122(** {1 Sub-modules} *)
+13-13
lib/html5rw/selector/selector_lexer.ml
···11(* CSS selector lexer *)
2233-exception Selector_error of string
33+exception Selector_error of Selector_error_code.t
4455type t = {
66 input : string;
···4747 let buf = Buffer.create 32 in
4848 let rec loop () =
4949 match peek t with
5050- | None -> raise (Selector_error "Unterminated string")
5050+ | None -> raise (Selector_error Selector_error_code.Unterminated_string)
5151 | Some c when c = quote -> advance t
5252 | Some '\\' ->
5353 advance t;
5454 (match peek t with
5555 | Some c -> Buffer.add_char buf c; advance t; loop ()
5656- | None -> raise (Selector_error "Unterminated escape"))
5656+ | None -> raise (Selector_error Selector_error_code.Unterminated_escape))
5757 | Some c ->
5858 Buffer.add_char buf c;
5959 advance t;
···9999 | '#' ->
100100 advance t;
101101 let name = read_name t in
102102- if name = "" then raise (Selector_error "Expected identifier after #");
102102+ if name = "" then raise (Selector_error Selector_error_code.Expected_identifier_after_hash);
103103 tokens := Selector_token.Id name :: !tokens
104104 | '.' ->
105105 advance t;
106106 let name = read_name t in
107107- if name = "" then raise (Selector_error "Expected identifier after .");
107107+ if name = "" then raise (Selector_error Selector_error_code.Expected_identifier_after_dot);
108108 tokens := Selector_token.Class name :: !tokens
109109 | '[' ->
110110 advance t;
111111 tokens := Selector_token.Attr_start :: !tokens;
112112 skip_whitespace t;
113113 let attr_name = read_name t in
114114- if attr_name = "" then raise (Selector_error "Expected attribute name");
114114+ if attr_name = "" then raise (Selector_error Selector_error_code.Expected_attribute_name);
115115 tokens := Selector_token.Tag attr_name :: !tokens;
116116 skip_whitespace t;
117117···130130 in
131131 tokens := Selector_token.String value :: !tokens;
132132 skip_whitespace t;
133133- if peek t <> Some ']' then raise (Selector_error "Expected ]");
133133+ if peek t <> Some ']' then raise (Selector_error Selector_error_code.Expected_closing_bracket);
134134 advance t;
135135 tokens := Selector_token.Attr_end :: !tokens
136136 | Some ('~' | '|' | '^' | '$' | '*') as op_char ->
137137 let op_c = Option.get op_char in
138138 advance t;
139139 if peek t <> Some '=' then
140140- raise (Selector_error ("Expected = after " ^ String.make 1 op_c));
140140+ raise (Selector_error (Selector_error_code.Expected_equals_after_operator op_c));
141141 advance t;
142142 tokens := Selector_token.Attr_op (String.make 1 op_c ^ "=") :: !tokens;
143143 skip_whitespace t;
···148148 in
149149 tokens := Selector_token.String value :: !tokens;
150150 skip_whitespace t;
151151- if peek t <> Some ']' then raise (Selector_error "Expected ]");
151151+ if peek t <> Some ']' then raise (Selector_error Selector_error_code.Expected_closing_bracket);
152152 advance t;
153153 tokens := Selector_token.Attr_end :: !tokens
154154- | _ -> raise (Selector_error "Unexpected character in attribute selector"))
154154+ | _ -> raise (Selector_error Selector_error_code.Unexpected_character_in_attribute_selector))
155155156156 | ',' ->
157157 advance t;
···161161 advance t;
162162 tokens := Selector_token.Colon :: !tokens;
163163 let name = read_name t in
164164- if name = "" then raise (Selector_error "Expected pseudo-class name");
164164+ if name = "" then raise (Selector_error Selector_error_code.Expected_pseudo_class_name);
165165 tokens := Selector_token.Tag name :: !tokens;
166166167167 if peek t = Some '(' then begin
···179179 done;
180180 let arg = String.trim (String.sub t.input start (t.pos - start)) in
181181 if arg <> "" then tokens := Selector_token.String arg :: !tokens;
182182- if peek t <> Some ')' then raise (Selector_error "Expected )");
182182+ if peek t <> Some ')' then raise (Selector_error Selector_error_code.Expected_closing_paren);
183183 advance t;
184184 tokens := Selector_token.Paren_close :: !tokens
185185 end
···187187 let name = read_name t in
188188 tokens := Selector_token.Tag (String.lowercase_ascii name) :: !tokens
189189 | _ ->
190190- raise (Selector_error ("Unexpected character: " ^ String.make 1 c))
190190+ raise (Selector_error (Selector_error_code.Unexpected_character c))
191191 end
192192 done;
193193
+14-13
lib/html5rw/selector/selector_parser.ml
···33open Selector_ast
44open Selector_token
5566-exception Parse_error of string
66+(* Re-use the Selector_error exception from the lexer for consistency *)
77+let raise_error code = raise (Selector_lexer.Selector_error code)
7889type t = {
910 tokens : Selector_token.t list;
···2930let expect t expected =
3031 let tok = peek t in
3132 if tok <> expected then
3232- raise (Parse_error ("Expected " ^ (match expected with EOF -> "EOF" | _ -> "token")))
3333+ raise_error (match expected with EOF -> Selector_error_code.Expected_end_of_selector | _ -> Selector_error_code.Unexpected_token)
3334 else
3435 advance t
3536···5152 advance t;
5253 let attr_name = match peek t with
5354 | Tag name -> advance t; name
5454- | _ -> raise (Parse_error "Expected attribute name")
5555+ | _ -> raise_error Selector_error_code.Expected_attribute_name
5556 in
5657 (match peek t with
5758 | Attr_end ->
···6162 advance t;
6263 let value = match peek t with
6364 | String v -> advance t; v
6464- | _ -> raise (Parse_error "Expected attribute value")
6565+ | _ -> raise_error Selector_error_code.Expected_attribute_value
6566 in
6667 (match peek t with
6768 | Attr_end -> advance t
6868- | _ -> raise (Parse_error "Expected ]"));
6969+ | _ -> raise_error Selector_error_code.Expected_closing_bracket);
6970 Some (make_simple Type_attr ~name:attr_name ~operator:op ~value ())
7070- | _ -> raise (Parse_error "Expected ] or attribute operator"))
7171+ | _ -> raise_error Selector_error_code.Expected_closing_bracket_or_operator)
7172 | Colon ->
7273 advance t;
7374 let name = match peek t with
7475 | Tag n -> advance t; n
7575- | _ -> raise (Parse_error "Expected pseudo-class name")
7676+ | _ -> raise_error Selector_error_code.Expected_pseudo_class_name
7677 in
7778 let arg = match peek t with
7879 | Paren_open ->
···8485 in
8586 (match peek t with
8687 | Paren_close -> advance t
8787- | _ -> raise (Parse_error "Expected )"));
8888+ | _ -> raise_error Selector_error_code.Expected_closing_paren);
8889 a
8990 | _ -> None
9091 in
···111112 | Combinator comb ->
112113 advance t;
113114 (match parse_compound_selector t with
114114- | None -> raise (Parse_error "Expected selector after combinator")
115115+ | None -> raise_error Selector_error_code.Expected_selector_after_combinator
115116 | Some compound ->
116117 parts := (Some comb, compound) :: !parts;
117118 loop ())
···131132 advance t;
132133 loop (sel :: acc)
133134 | EOF -> sel :: acc
134134- | _ -> raise (Parse_error "Unexpected token"))
135135+ | _ -> raise_error Selector_error_code.Unexpected_token)
135136 in
136137 let selectors = List.rev (loop []) in
137138 (match peek t with
138139 | EOF -> ()
139139- | _ -> raise (Parse_error "Expected end of selector"));
140140+ | _ -> raise_error Selector_error_code.Expected_end_of_selector);
140141 match selectors with
141141- | [] -> raise (Parse_error "Empty selector")
142142+ | [] -> raise_error Selector_error_code.Empty_selector
142143 | [sel] -> Complex sel
143144 | sels -> List (make_list sels)
144145145146let parse_selector input =
146147 if String.trim input = "" then
147147- raise (Selector_lexer.Selector_error "Empty selector");
148148+ raise_error Selector_error_code.Empty_selector;
148149 let tokens = Selector_lexer.tokenize input in
149150 parse tokens