TOML 1.1 codecs for OCaml
at main 216 lines 9.1 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** TOML parsing and encoding error types *) 7 8(** Location in the input *) 9type location = { 10 line : int; 11 column : int; 12 file : string option; 13} 14 15let pp_location fmt loc = 16 match loc.file with 17 | Some f -> Format.fprintf fmt "%s:%d:%d" f loc.line loc.column 18 | None -> Format.fprintf fmt "line %d, column %d" loc.line loc.column 19 20(** Lexer errors - low-level tokenization issues *) 21type lexer_error = 22 | Invalid_utf8 23 | Incomplete_utf8 24 | Invalid_escape of char 25 | Incomplete_escape of string (** e.g., "\\x", "\\u", "\\U" *) 26 | Invalid_unicode_escape of string 27 | Invalid_unicode_codepoint of int 28 | Surrogate_codepoint of int 29 | Bare_carriage_return 30 | Control_character of int 31 | Unterminated_string 32 | Unterminated_comment 33 | Too_many_quotes 34 | Newline_in_string 35 | Unexpected_character of char 36 | Unexpected_eof 37 38let pp_lexer_error fmt = function 39 | Invalid_utf8 -> Format.fprintf fmt "invalid UTF-8 sequence" 40 | Incomplete_utf8 -> Format.fprintf fmt "incomplete UTF-8 sequence" 41 | Invalid_escape c -> Format.fprintf fmt "invalid escape sequence: \\%c" c 42 | Incomplete_escape s -> Format.fprintf fmt "incomplete %s escape sequence" s 43 | Invalid_unicode_escape s -> Format.fprintf fmt "invalid %s escape sequence" s 44 | Invalid_unicode_codepoint cp -> Format.fprintf fmt "invalid Unicode codepoint: U+%X" cp 45 | Surrogate_codepoint cp -> Format.fprintf fmt "surrogate codepoint not allowed: U+%04X" cp 46 | Bare_carriage_return -> Format.fprintf fmt "bare carriage return not allowed" 47 | Control_character cp -> Format.fprintf fmt "control character U+%04X not allowed" cp 48 | Unterminated_string -> Format.fprintf fmt "unterminated string" 49 | Unterminated_comment -> Format.fprintf fmt "unterminated comment" 50 | Too_many_quotes -> Format.fprintf fmt "too many consecutive quotes" 51 | Newline_in_string -> Format.fprintf fmt "newline not allowed in basic string" 52 | Unexpected_character c -> Format.fprintf fmt "unexpected character '%c'" c 53 | Unexpected_eof -> Format.fprintf fmt "unexpected end of input" 54 55(** Number parsing errors *) 56type number_error = 57 | Leading_zero 58 | Leading_underscore 59 | Trailing_underscore 60 | Double_underscore 61 | Underscore_not_between_digits 62 | Underscore_after_exponent 63 | Missing_digit 64 | Missing_digit_after_sign 65 | Missing_digit_after_decimal 66 | Missing_digit_after_exponent 67 | Invalid_hex_digit 68 | Invalid_octal_digit 69 | Invalid_binary_digit 70 71let pp_number_error fmt = function 72 | Leading_zero -> Format.fprintf fmt "leading zeros not allowed" 73 | Leading_underscore -> Format.fprintf fmt "leading underscore not allowed" 74 | Trailing_underscore -> Format.fprintf fmt "trailing underscore not allowed" 75 | Double_underscore -> Format.fprintf fmt "double underscore not allowed" 76 | Underscore_not_between_digits -> Format.fprintf fmt "underscore must be between digits" 77 | Underscore_after_exponent -> Format.fprintf fmt "underscore cannot follow exponent" 78 | Missing_digit -> Format.fprintf fmt "expected digit" 79 | Missing_digit_after_sign -> Format.fprintf fmt "expected digit after sign" 80 | Missing_digit_after_decimal -> Format.fprintf fmt "expected digit after decimal point" 81 | Missing_digit_after_exponent -> Format.fprintf fmt "expected digit after exponent" 82 | Invalid_hex_digit -> Format.fprintf fmt "invalid hexadecimal digit" 83 | Invalid_octal_digit -> Format.fprintf fmt "invalid octal digit" 84 | Invalid_binary_digit -> Format.fprintf fmt "invalid binary digit" 85 86(** DateTime parsing errors *) 87type datetime_error = 88 | Invalid_month of int 89 | Invalid_day of int * int (** day, month *) 90 | Invalid_hour of int 91 | Invalid_minute of int 92 | Invalid_second of int 93 | Invalid_timezone_offset_hour of int 94 | Invalid_timezone_offset_minute of int 95 | Invalid_format of string (** expected format description *) 96 97let pp_datetime_error fmt = function 98 | Invalid_month m -> Format.fprintf fmt "invalid month: %d" m 99 | Invalid_day (d, m) -> Format.fprintf fmt "invalid day %d for month %d" d m 100 | Invalid_hour h -> Format.fprintf fmt "invalid hour: %d" h 101 | Invalid_minute m -> Format.fprintf fmt "invalid minute: %d" m 102 | Invalid_second s -> Format.fprintf fmt "invalid second: %d" s 103 | Invalid_timezone_offset_hour h -> Format.fprintf fmt "invalid timezone offset hour: %d" h 104 | Invalid_timezone_offset_minute m -> Format.fprintf fmt "invalid timezone offset minute: %d" m 105 | Invalid_format desc -> Format.fprintf fmt "invalid %s format" desc 106 107(** Semantic/table structure errors *) 108type semantic_error = 109 | Duplicate_key of string 110 | Table_already_defined of string 111 | Cannot_redefine_table_as_value of string 112 | Cannot_redefine_array_as_value of string 113 | Cannot_use_value_as_table of string 114 | Cannot_extend_inline_table of string 115 | Cannot_extend_closed_table of string 116 | Cannot_extend_array_of_tables of string 117 | Cannot_convert_table_to_array of string 118 | Cannot_convert_array_to_table of string 119 | Table_has_content of string 120 | Conflicting_keys 121 | Empty_key 122 | Multiline_key 123 124let pp_semantic_error fmt = function 125 | Duplicate_key k -> Format.fprintf fmt "duplicate key: %s" k 126 | Table_already_defined k -> Format.fprintf fmt "table '%s' already defined" k 127 | Cannot_redefine_table_as_value k -> Format.fprintf fmt "cannot redefine table '%s' as a value" k 128 | Cannot_redefine_array_as_value k -> Format.fprintf fmt "cannot redefine array of tables '%s' as a value" k 129 | Cannot_use_value_as_table k -> Format.fprintf fmt "cannot use value '%s' as a table" k 130 | Cannot_extend_inline_table k -> Format.fprintf fmt "cannot extend inline table '%s'" k 131 | Cannot_extend_closed_table k -> Format.fprintf fmt "cannot extend table '%s' using dotted keys" k 132 | Cannot_extend_array_of_tables k -> Format.fprintf fmt "cannot extend array of tables '%s' using dotted keys" k 133 | Cannot_convert_table_to_array k -> Format.fprintf fmt "cannot define '%s' as array of tables; already defined as table" k 134 | Cannot_convert_array_to_table k -> Format.fprintf fmt "cannot define '%s' as table; already defined as array of tables" k 135 | Table_has_content k -> Format.fprintf fmt "cannot define '%s' as array of tables; already has content" k 136 | Conflicting_keys -> Format.fprintf fmt "conflicting keys in inline table" 137 | Empty_key -> Format.fprintf fmt "empty key" 138 | Multiline_key -> Format.fprintf fmt "multiline strings are not allowed as keys" 139 140(** Syntax errors *) 141type syntax_error = 142 | Expected of string 143 | Invalid_table_header 144 | Invalid_array_of_tables_header 145 | Unexpected_token of string 146 | Unexpected_bare_key of string 147 148let pp_syntax_error fmt = function 149 | Expected s -> Format.fprintf fmt "expected %s" s 150 | Invalid_table_header -> Format.fprintf fmt "invalid table header syntax" 151 | Invalid_array_of_tables_header -> Format.fprintf fmt "invalid array of tables syntax" 152 | Unexpected_token s -> Format.fprintf fmt "unexpected token: %s" s 153 | Unexpected_bare_key k -> Format.fprintf fmt "unexpected bare key '%s' as value" k 154 155(** Encoding errors *) 156type encode_error = 157 | Cannot_encode_inline_table 158 | Not_a_table 159 160let pp_encode_error fmt = function 161 | Cannot_encode_inline_table -> Format.fprintf fmt "cannot encode table inline without inline flag" 162 | Not_a_table -> Format.fprintf fmt "top-level TOML must be a table" 163 164(** All error kinds *) 165type kind = 166 | Lexer of lexer_error 167 | Number of number_error 168 | Datetime of datetime_error 169 | Semantic of semantic_error 170 | Syntax of syntax_error 171 | Encode of encode_error 172 173let pp_kind fmt = function 174 | Lexer e -> pp_lexer_error fmt e 175 | Number e -> pp_number_error fmt e 176 | Datetime e -> pp_datetime_error fmt e 177 | Semantic e -> pp_semantic_error fmt e 178 | Syntax e -> pp_syntax_error fmt e 179 | Encode e -> pp_encode_error fmt e 180 181(** Full error with location *) 182type t = { 183 kind : kind; 184 location : location option; 185} 186 187let make ?location kind = { kind; location } 188 189let pp fmt t = 190 match t.location with 191 | Some loc -> Format.fprintf fmt "%a: %a" pp_location loc pp_kind t.kind 192 | None -> pp_kind fmt t.kind 193 194let to_string t = 195 Format.asprintf "%a" pp t 196 197(** Exception for TOML errors *) 198exception Error of t 199 200let () = Printexc.register_printer (function 201 | Error e -> Some (Format.asprintf "Tomlt.Error: %a" pp e) 202 | _ -> None) 203 204(** Raise a TOML error *) 205let raise_error ?location kind = 206 raise (Error { kind; location }) 207 208let raise_lexer ?location e = raise_error ?location (Lexer e) 209let raise_number ?location e = raise_error ?location (Number e) 210let raise_datetime ?location e = raise_error ?location (Datetime e) 211let raise_semantic ?location e = raise_error ?location (Semantic e) 212let raise_syntax ?location e = raise_error ?location (Syntax e) 213let raise_encode ?location e = raise_error ?location (Encode e) 214 215(** Create location from line and column *) 216let loc ?file ~line ~column () = { line; column; file }