TOML 1.1 codecs for OCaml
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** TOML parsing and encoding error types *)
7
8(** Location in the input *)
9type location = {
10 line : int;
11 column : int;
12 file : string option;
13}
14
15let pp_location fmt loc =
16 match loc.file with
17 | Some f -> Format.fprintf fmt "%s:%d:%d" f loc.line loc.column
18 | None -> Format.fprintf fmt "line %d, column %d" loc.line loc.column
19
20(** Lexer errors - low-level tokenization issues *)
21type lexer_error =
22 | Invalid_utf8
23 | Incomplete_utf8
24 | Invalid_escape of char
25 | Incomplete_escape of string (** e.g., "\\x", "\\u", "\\U" *)
26 | Invalid_unicode_escape of string
27 | Invalid_unicode_codepoint of int
28 | Surrogate_codepoint of int
29 | Bare_carriage_return
30 | Control_character of int
31 | Unterminated_string
32 | Unterminated_comment
33 | Too_many_quotes
34 | Newline_in_string
35 | Unexpected_character of char
36 | Unexpected_eof
37
38let pp_lexer_error fmt = function
39 | Invalid_utf8 -> Format.fprintf fmt "invalid UTF-8 sequence"
40 | Incomplete_utf8 -> Format.fprintf fmt "incomplete UTF-8 sequence"
41 | Invalid_escape c -> Format.fprintf fmt "invalid escape sequence: \\%c" c
42 | Incomplete_escape s -> Format.fprintf fmt "incomplete %s escape sequence" s
43 | Invalid_unicode_escape s -> Format.fprintf fmt "invalid %s escape sequence" s
44 | Invalid_unicode_codepoint cp -> Format.fprintf fmt "invalid Unicode codepoint: U+%X" cp
45 | Surrogate_codepoint cp -> Format.fprintf fmt "surrogate codepoint not allowed: U+%04X" cp
46 | Bare_carriage_return -> Format.fprintf fmt "bare carriage return not allowed"
47 | Control_character cp -> Format.fprintf fmt "control character U+%04X not allowed" cp
48 | Unterminated_string -> Format.fprintf fmt "unterminated string"
49 | Unterminated_comment -> Format.fprintf fmt "unterminated comment"
50 | Too_many_quotes -> Format.fprintf fmt "too many consecutive quotes"
51 | Newline_in_string -> Format.fprintf fmt "newline not allowed in basic string"
52 | Unexpected_character c -> Format.fprintf fmt "unexpected character '%c'" c
53 | Unexpected_eof -> Format.fprintf fmt "unexpected end of input"
54
55(** Number parsing errors *)
56type number_error =
57 | Leading_zero
58 | Leading_underscore
59 | Trailing_underscore
60 | Double_underscore
61 | Underscore_not_between_digits
62 | Underscore_after_exponent
63 | Missing_digit
64 | Missing_digit_after_sign
65 | Missing_digit_after_decimal
66 | Missing_digit_after_exponent
67 | Invalid_hex_digit
68 | Invalid_octal_digit
69 | Invalid_binary_digit
70
71let pp_number_error fmt = function
72 | Leading_zero -> Format.fprintf fmt "leading zeros not allowed"
73 | Leading_underscore -> Format.fprintf fmt "leading underscore not allowed"
74 | Trailing_underscore -> Format.fprintf fmt "trailing underscore not allowed"
75 | Double_underscore -> Format.fprintf fmt "double underscore not allowed"
76 | Underscore_not_between_digits -> Format.fprintf fmt "underscore must be between digits"
77 | Underscore_after_exponent -> Format.fprintf fmt "underscore cannot follow exponent"
78 | Missing_digit -> Format.fprintf fmt "expected digit"
79 | Missing_digit_after_sign -> Format.fprintf fmt "expected digit after sign"
80 | Missing_digit_after_decimal -> Format.fprintf fmt "expected digit after decimal point"
81 | Missing_digit_after_exponent -> Format.fprintf fmt "expected digit after exponent"
82 | Invalid_hex_digit -> Format.fprintf fmt "invalid hexadecimal digit"
83 | Invalid_octal_digit -> Format.fprintf fmt "invalid octal digit"
84 | Invalid_binary_digit -> Format.fprintf fmt "invalid binary digit"
85
86(** DateTime parsing errors *)
87type datetime_error =
88 | Invalid_month of int
89 | Invalid_day of int * int (** day, month *)
90 | Invalid_hour of int
91 | Invalid_minute of int
92 | Invalid_second of int
93 | Invalid_timezone_offset_hour of int
94 | Invalid_timezone_offset_minute of int
95 | Invalid_format of string (** expected format description *)
96
97let pp_datetime_error fmt = function
98 | Invalid_month m -> Format.fprintf fmt "invalid month: %d" m
99 | Invalid_day (d, m) -> Format.fprintf fmt "invalid day %d for month %d" d m
100 | Invalid_hour h -> Format.fprintf fmt "invalid hour: %d" h
101 | Invalid_minute m -> Format.fprintf fmt "invalid minute: %d" m
102 | Invalid_second s -> Format.fprintf fmt "invalid second: %d" s
103 | Invalid_timezone_offset_hour h -> Format.fprintf fmt "invalid timezone offset hour: %d" h
104 | Invalid_timezone_offset_minute m -> Format.fprintf fmt "invalid timezone offset minute: %d" m
105 | Invalid_format desc -> Format.fprintf fmt "invalid %s format" desc
106
107(** Semantic/table structure errors *)
108type semantic_error =
109 | Duplicate_key of string
110 | Table_already_defined of string
111 | Cannot_redefine_table_as_value of string
112 | Cannot_redefine_array_as_value of string
113 | Cannot_use_value_as_table of string
114 | Cannot_extend_inline_table of string
115 | Cannot_extend_closed_table of string
116 | Cannot_extend_array_of_tables of string
117 | Cannot_convert_table_to_array of string
118 | Cannot_convert_array_to_table of string
119 | Table_has_content of string
120 | Conflicting_keys
121 | Empty_key
122 | Multiline_key
123
124let pp_semantic_error fmt = function
125 | Duplicate_key k -> Format.fprintf fmt "duplicate key: %s" k
126 | Table_already_defined k -> Format.fprintf fmt "table '%s' already defined" k
127 | Cannot_redefine_table_as_value k -> Format.fprintf fmt "cannot redefine table '%s' as a value" k
128 | Cannot_redefine_array_as_value k -> Format.fprintf fmt "cannot redefine array of tables '%s' as a value" k
129 | Cannot_use_value_as_table k -> Format.fprintf fmt "cannot use value '%s' as a table" k
130 | Cannot_extend_inline_table k -> Format.fprintf fmt "cannot extend inline table '%s'" k
131 | Cannot_extend_closed_table k -> Format.fprintf fmt "cannot extend table '%s' using dotted keys" k
132 | Cannot_extend_array_of_tables k -> Format.fprintf fmt "cannot extend array of tables '%s' using dotted keys" k
133 | Cannot_convert_table_to_array k -> Format.fprintf fmt "cannot define '%s' as array of tables; already defined as table" k
134 | Cannot_convert_array_to_table k -> Format.fprintf fmt "cannot define '%s' as table; already defined as array of tables" k
135 | Table_has_content k -> Format.fprintf fmt "cannot define '%s' as array of tables; already has content" k
136 | Conflicting_keys -> Format.fprintf fmt "conflicting keys in inline table"
137 | Empty_key -> Format.fprintf fmt "empty key"
138 | Multiline_key -> Format.fprintf fmt "multiline strings are not allowed as keys"
139
140(** Syntax errors *)
141type syntax_error =
142 | Expected of string
143 | Invalid_table_header
144 | Invalid_array_of_tables_header
145 | Unexpected_token of string
146 | Unexpected_bare_key of string
147
148let pp_syntax_error fmt = function
149 | Expected s -> Format.fprintf fmt "expected %s" s
150 | Invalid_table_header -> Format.fprintf fmt "invalid table header syntax"
151 | Invalid_array_of_tables_header -> Format.fprintf fmt "invalid array of tables syntax"
152 | Unexpected_token s -> Format.fprintf fmt "unexpected token: %s" s
153 | Unexpected_bare_key k -> Format.fprintf fmt "unexpected bare key '%s' as value" k
154
155(** Encoding errors *)
156type encode_error =
157 | Cannot_encode_inline_table
158 | Not_a_table
159
160let pp_encode_error fmt = function
161 | Cannot_encode_inline_table -> Format.fprintf fmt "cannot encode table inline without inline flag"
162 | Not_a_table -> Format.fprintf fmt "top-level TOML must be a table"
163
164(** All error kinds *)
165type kind =
166 | Lexer of lexer_error
167 | Number of number_error
168 | Datetime of datetime_error
169 | Semantic of semantic_error
170 | Syntax of syntax_error
171 | Encode of encode_error
172
173let pp_kind fmt = function
174 | Lexer e -> pp_lexer_error fmt e
175 | Number e -> pp_number_error fmt e
176 | Datetime e -> pp_datetime_error fmt e
177 | Semantic e -> pp_semantic_error fmt e
178 | Syntax e -> pp_syntax_error fmt e
179 | Encode e -> pp_encode_error fmt e
180
181(** Full error with location *)
182type t = {
183 kind : kind;
184 location : location option;
185}
186
187let make ?location kind = { kind; location }
188
189let pp fmt t =
190 match t.location with
191 | Some loc -> Format.fprintf fmt "%a: %a" pp_location loc pp_kind t.kind
192 | None -> pp_kind fmt t.kind
193
194let to_string t =
195 Format.asprintf "%a" pp t
196
197(** Exception for TOML errors *)
198exception Error of t
199
200let () = Printexc.register_printer (function
201 | Error e -> Some (Format.asprintf "Tomlt.Error: %a" pp e)
202 | _ -> None)
203
204(** Raise a TOML error *)
205let raise_error ?location kind =
206 raise (Error { kind; location })
207
208let raise_lexer ?location e = raise_error ?location (Lexer e)
209let raise_number ?location e = raise_error ?location (Number e)
210let raise_datetime ?location e = raise_error ?location (Datetime e)
211let raise_semantic ?location e = raise_error ?location (Semantic e)
212let raise_syntax ?location e = raise_error ?location (Syntax e)
213let raise_encode ?location e = raise_error ?location (Encode e)
214
215(** Create location from line and column *)
216let loc ?file ~line ~column () = { line; column; file }