TOML 1.1 codecs for OCaml

rearrange

+6130 -2713
+8 -8
bin/run_tests.ml
··· 238 238 239 239 let run_valid_test toml_file json_file = 240 240 let toml_content = In_channel.with_open_bin toml_file In_channel.input_all in 241 - match Tomlt.of_string toml_content with 242 - | Error e -> `Fail (Printf.sprintf "Decode error: %s" (Tomlt.Error.to_string e)) 241 + match Tomlt.Toml.of_string toml_content with 242 + | Error e -> `Fail (Printf.sprintf "Decode error: %s" (Tomlt.Toml.Error.to_string e)) 243 243 | Ok toml -> 244 - let actual_json = Tomlt.Internal.to_tagged_json toml in 244 + let actual_json = Tomlt.Toml.Tagged_json.encode toml in 245 245 let expected_json = In_channel.with_open_bin json_file In_channel.input_all in 246 246 if json_equal actual_json expected_json then 247 247 `Pass ··· 251 251 252 252 let run_invalid_test toml_file = 253 253 let toml_content = In_channel.with_open_bin toml_file In_channel.input_all in 254 - match Tomlt.of_string toml_content with 254 + match Tomlt.Toml.of_string toml_content with 255 255 | Error _ -> `Pass (* Should fail *) 256 256 | Ok _ -> `Fail "Should have failed but parsed successfully" 257 257 ··· 259 259 let run_encoder_test json_file = 260 260 let json_content = In_channel.with_open_bin json_file In_channel.input_all in 261 261 (* First, encode JSON to TOML *) 262 - match Tomlt.Internal.encode_from_tagged_json json_content with 262 + match Tomlt.Toml.Tagged_json.decode_and_encode_toml json_content with 263 263 | Error msg -> `Fail (Printf.sprintf "Encode error: %s" msg) 264 264 | Ok toml_output -> 265 265 (* Then decode the TOML back to check round-trip *) 266 - match Tomlt.of_string toml_output with 267 - | Error e -> `Fail (Printf.sprintf "Round-trip decode error: %s\nTOML was:\n%s" (Tomlt.Error.to_string e) toml_output) 266 + match Tomlt.Toml.of_string toml_output with 267 + | Error e -> `Fail (Printf.sprintf "Round-trip decode error: %s\nTOML was:\n%s" (Tomlt.Toml.Error.to_string e) toml_output) 268 268 | Ok decoded_toml -> 269 269 (* Compare the decoded result with original JSON *) 270 - let actual_json = Tomlt.Internal.to_tagged_json decoded_toml in 270 + let actual_json = Tomlt.Toml.Tagged_json.encode decoded_toml in 271 271 if json_equal actual_json json_content then 272 272 `Pass 273 273 else
+3 -3
bin/toml_test_decoder.ml
··· 2 2 3 3 let () = 4 4 let input = In_channel.input_all In_channel.stdin in 5 - match Tomlt.of_string input with 5 + match Tomlt.Toml.of_string input with 6 6 | Ok toml -> 7 - let json = Tomlt.Internal.to_tagged_json toml in 7 + let json = Tomlt.Toml.Tagged_json.encode toml in 8 8 print_string json; 9 9 print_newline () 10 10 | Error e -> 11 - Printf.eprintf "Error: %s\n" (Tomlt.Error.to_string e); 11 + Printf.eprintf "Error: %s\n" (Tomlt.Toml.Error.to_string e); 12 12 exit 1
+1 -1
bin/toml_test_encoder.ml
··· 2 2 3 3 let () = 4 4 let input = In_channel.input_all In_channel.stdin in 5 - match Tomlt.Internal.encode_from_tagged_json input with 5 + match Tomlt.Toml.Tagged_json.decode_and_encode_toml input with 6 6 | Ok toml -> 7 7 print_string toml 8 8 | Error msg ->
+10
dune-project
··· 27 27 (ocaml (>= 5.0.0)) 28 28 (tomlt (= :version)) 29 29 (eio (>= 1.0)))) 30 + 31 + (package 32 + (name tomlt-jsont) 33 + (synopsis "Jsont codecs for TOML tagged JSON format") 34 + (description "Convert between TOML values and the toml-test tagged JSON format using Jsont codecs") 35 + (depends 36 + (ocaml (>= 4.14.0)) 37 + (tomlt (= :version)) 38 + (jsont (>= 0.2.0)) 39 + (jsont-bytesrw (>= 0.2.0))))
+1 -1
lib/dune
··· 1 1 (library 2 2 (name tomlt) 3 3 (public_name tomlt) 4 - (modules tomlt tomlt_error) 4 + (modules tomlt toml toml_error) 5 5 (libraries bytesrw uutf))
+2592
lib/toml.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + open Bytesrw 7 + 8 + (* TOML value representation *) 9 + 10 + type t = 11 + | String of string 12 + | Int of int64 13 + | Float of float 14 + | Bool of bool 15 + | Datetime of string (* Offset datetime *) 16 + | Datetime_local of string (* Local datetime *) 17 + | Date_local of string (* Local date *) 18 + | Time_local of string (* Local time *) 19 + | Array of t list 20 + | Table of (string * t) list 21 + 22 + (* Lexer - works directly on bytes buffer filled from Bytes.Reader *) 23 + 24 + type token = 25 + | Tok_lbracket 26 + | Tok_rbracket 27 + | Tok_lbrace 28 + | Tok_rbrace 29 + | Tok_equals 30 + | Tok_comma 31 + | Tok_dot 32 + | Tok_newline 33 + | Tok_eof 34 + | Tok_bare_key of string 35 + | Tok_basic_string of string 36 + | Tok_literal_string of string 37 + | Tok_ml_basic_string of string (* Multiline basic string - not valid as key *) 38 + | Tok_ml_literal_string of string (* Multiline literal string - not valid as key *) 39 + | Tok_integer of int64 * string (* value, original string for key reconstruction *) 40 + | Tok_float of float * string (* value, original string for key reconstruction *) 41 + | Tok_datetime of string 42 + | Tok_datetime_local of string 43 + | Tok_date_local of string 44 + | Tok_time_local of string 45 + 46 + type lexer = { 47 + input : bytes; (* Buffer containing input data *) 48 + input_len : int; (* Length of valid data in input *) 49 + mutable pos : int; 50 + mutable line : int; 51 + mutable col : int; 52 + file : string; 53 + } 54 + 55 + (* Create lexer from string (copies to bytes) *) 56 + let make_lexer ?(file = "-") s = 57 + let input = Bytes.of_string s in 58 + { input; input_len = Bytes.length input; pos = 0; line = 1; col = 1; file } 59 + 60 + (* Create lexer directly from Bytes.Reader - reads all data into buffer *) 61 + let make_lexer_from_reader ?(file = "-") r = 62 + (* Read all slices into a buffer *) 63 + let buf = Buffer.create 4096 in 64 + let rec read_all () = 65 + let slice = Bytes.Reader.read r in 66 + if Bytes.Slice.is_eod slice then () 67 + else begin 68 + Bytes.Slice.add_to_buffer buf slice; 69 + read_all () 70 + end 71 + in 72 + read_all (); 73 + let input = Buffer.to_bytes buf in 74 + { input; input_len = Bytes.length input; pos = 0; line = 1; col = 1; file } 75 + 76 + let is_eof l = l.pos >= l.input_len 77 + 78 + let peek l = if is_eof l then None else Some (Bytes.get l.input l.pos) 79 + 80 + let peek2 l = 81 + if l.pos + 1 >= l.input_len then None 82 + else Some (Bytes.get l.input (l.pos + 1)) 83 + 84 + let peek_n l n = 85 + if l.pos + n - 1 >= l.input_len then None 86 + else Some (Bytes.sub_string l.input l.pos n) 87 + 88 + let advance l = 89 + if not (is_eof l) then begin 90 + if Bytes.get l.input l.pos = '\n' then begin 91 + l.line <- l.line + 1; 92 + l.col <- 1 93 + end else 94 + l.col <- l.col + 1; 95 + l.pos <- l.pos + 1 96 + end 97 + 98 + let advance_n l n = 99 + for _ = 1 to n do advance l done 100 + 101 + let skip_whitespace l = 102 + while not (is_eof l) && (Bytes.get l.input l.pos = ' ' || Bytes.get l.input l.pos = '\t') do 103 + advance l 104 + done 105 + 106 + (* Helper functions for bytes access *) 107 + let[@inline] get_char l pos = Bytes.unsafe_get l.input pos 108 + let[@inline] get_current l = Bytes.unsafe_get l.input l.pos 109 + let sub_string l pos len = Bytes.sub_string l.input pos len 110 + 111 + (* Helper to create error location from lexer state *) 112 + let lexer_loc l = Toml_error.loc ~file:l.file ~line:l.line ~column:l.col () 113 + 114 + (* Get expected byte length of UTF-8 char from first byte *) 115 + let utf8_byte_length_from_first_byte c = 116 + let code = Char.code c in 117 + if code < 0x80 then 1 118 + else if code < 0xC0 then 0 (* Invalid: continuation byte as start *) 119 + else if code < 0xE0 then 2 120 + else if code < 0xF0 then 3 121 + else if code < 0xF8 then 4 122 + else 0 (* Invalid: 5+ byte sequence *) 123 + 124 + (* Validate UTF-8 at position in lexer's bytes buffer, returns byte length *) 125 + let validate_utf8_at_pos_bytes l = 126 + if l.pos >= l.input_len then 127 + Toml_error.raise_lexer ~location:(lexer_loc l) Unexpected_eof; 128 + let byte_len = utf8_byte_length_from_first_byte (Bytes.unsafe_get l.input l.pos) in 129 + if byte_len = 0 then 130 + Toml_error.raise_lexer ~location:(lexer_loc l) Invalid_utf8; 131 + if l.pos + byte_len > l.input_len then 132 + Toml_error.raise_lexer ~location:(lexer_loc l) Incomplete_utf8; 133 + (* Validate using uutf - it checks overlong encodings, surrogates, etc. *) 134 + let sub = Bytes.sub_string l.input l.pos byte_len in 135 + let valid = ref false in 136 + Uutf.String.fold_utf_8 (fun () _ -> function 137 + | `Uchar _ -> valid := true 138 + | `Malformed _ -> () 139 + ) () sub; 140 + if not !valid then 141 + Toml_error.raise_lexer ~location:(lexer_loc l) Invalid_utf8; 142 + byte_len 143 + 144 + (* UTF-8 validation - validates and advances over a single UTF-8 character *) 145 + let validate_utf8_char l = 146 + let byte_len = validate_utf8_at_pos_bytes l in 147 + for _ = 1 to byte_len do advance l done 148 + 149 + let skip_comment l = 150 + if not (is_eof l) && get_current l = '#' then begin 151 + (* Validate comment characters *) 152 + advance l; 153 + let continue = ref true in 154 + while !continue && not (is_eof l) && get_current l <> '\n' do 155 + let c = get_current l in 156 + let code = Char.code c in 157 + (* CR is only valid if followed by LF (CRLF at end of comment) *) 158 + if c = '\r' then begin 159 + (* Check if this CR is followed by LF - if so, it ends the comment *) 160 + if l.pos + 1 < l.input_len && get_char l (l.pos + 1) = '\n' then 161 + (* This is CRLF - stop the loop, let the main lexer handle it *) 162 + continue := false 163 + else 164 + Toml_error.raise_lexer ~location:(lexer_loc l) Bare_carriage_return 165 + end else if code >= 0x80 then begin 166 + (* Multi-byte UTF-8 character - validate it *) 167 + validate_utf8_char l 168 + end else begin 169 + (* ASCII control characters other than tab are not allowed in comments *) 170 + if code < 0x09 || (code > 0x09 && code < 0x20) || code = 0x7F then 171 + Toml_error.raise_lexer ~location:(lexer_loc l) (Control_character code); 172 + advance l 173 + end 174 + done 175 + end 176 + 177 + let skip_ws_and_comments l = 178 + let rec loop () = 179 + skip_whitespace l; 180 + if not (is_eof l) && get_current l = '#' then begin 181 + skip_comment l; 182 + loop () 183 + end 184 + in 185 + loop () 186 + 187 + let is_bare_key_char c = 188 + (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || 189 + (c >= '0' && c <= '9') || c = '_' || c = '-' 190 + 191 + let is_digit c = c >= '0' && c <= '9' 192 + let is_hex_digit c = is_digit c || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') 193 + let is_oct_digit c = c >= '0' && c <= '7' 194 + let is_bin_digit c = c = '0' || c = '1' 195 + 196 + let hex_value c = 197 + if c >= '0' && c <= '9' then Char.code c - Char.code '0' 198 + else if c >= 'a' && c <= 'f' then Char.code c - Char.code 'a' + 10 199 + else if c >= 'A' && c <= 'F' then Char.code c - Char.code 'A' + 10 200 + else Toml_error.raise_number Invalid_hex_digit 201 + 202 + (* Convert Unicode codepoint to UTF-8 using uutf *) 203 + let codepoint_to_utf8 codepoint = 204 + if codepoint < 0 || codepoint > 0x10FFFF then 205 + failwith (Printf.sprintf "Invalid Unicode codepoint: U+%X" codepoint); 206 + if codepoint >= 0xD800 && codepoint <= 0xDFFF then 207 + failwith (Printf.sprintf "Surrogate codepoint not allowed: U+%04X" codepoint); 208 + let buf = Buffer.create 4 in 209 + Uutf.Buffer.add_utf_8 buf (Uchar.of_int codepoint); 210 + Buffer.contents buf 211 + 212 + (* Parse Unicode escape with error location from lexer *) 213 + let unicode_to_utf8 l codepoint = 214 + if codepoint < 0 || codepoint > 0x10FFFF then 215 + Toml_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_codepoint codepoint); 216 + if codepoint >= 0xD800 && codepoint <= 0xDFFF then 217 + Toml_error.raise_lexer ~location:(lexer_loc l) (Surrogate_codepoint codepoint); 218 + let buf = Buffer.create 4 in 219 + Uutf.Buffer.add_utf_8 buf (Uchar.of_int codepoint); 220 + Buffer.contents buf 221 + 222 + let parse_escape l = 223 + advance l; (* skip backslash *) 224 + if is_eof l then 225 + Toml_error.raise_lexer ~location:(lexer_loc l) Unexpected_eof; 226 + let c = get_current l in 227 + advance l; 228 + match c with 229 + | 'b' -> "\b" 230 + | 't' -> "\t" 231 + | 'n' -> "\n" 232 + | 'f' -> "\x0C" 233 + | 'r' -> "\r" 234 + | 'e' -> "\x1B" (* TOML 1.1 escape *) 235 + | '"' -> "\"" 236 + | '\\' -> "\\" 237 + | 'x' -> 238 + (* \xHH - 2 hex digits *) 239 + if l.pos + 1 >= l.input_len then 240 + Toml_error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\x"); 241 + let c1 = get_char l l.pos in 242 + let c2 = get_char l (l.pos + 1) in 243 + if not (is_hex_digit c1 && is_hex_digit c2) then 244 + Toml_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\x"); 245 + let cp = (hex_value c1 * 16) + hex_value c2 in 246 + advance l; advance l; 247 + unicode_to_utf8 l cp 248 + | 'u' -> 249 + (* \uHHHH - 4 hex digits *) 250 + if l.pos + 3 >= l.input_len then 251 + Toml_error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\u"); 252 + let s = sub_string l l.pos 4 in 253 + for i = 0 to 3 do 254 + if not (is_hex_digit s.[i]) then 255 + Toml_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\u") 256 + done; 257 + let cp = int_of_string ("0x" ^ s) in 258 + advance_n l 4; 259 + unicode_to_utf8 l cp 260 + | 'U' -> 261 + (* \UHHHHHHHH - 8 hex digits *) 262 + if l.pos + 7 >= l.input_len then 263 + Toml_error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\U"); 264 + let s = sub_string l l.pos 8 in 265 + for i = 0 to 7 do 266 + if not (is_hex_digit s.[i]) then 267 + Toml_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\U") 268 + done; 269 + let cp = int_of_string ("0x" ^ s) in 270 + advance_n l 8; 271 + unicode_to_utf8 l cp 272 + | _ -> 273 + Toml_error.raise_lexer ~location:(lexer_loc l) (Invalid_escape c) 274 + 275 + let validate_string_char l c is_multiline = 276 + let code = Char.code c in 277 + (* Control characters other than tab (and LF/CR for multiline) are not allowed *) 278 + if code < 0x09 then 279 + Toml_error.raise_lexer ~location:(lexer_loc l) (Control_character code); 280 + if code > 0x09 && code < 0x20 && not (is_multiline && (code = 0x0A || code = 0x0D)) then 281 + Toml_error.raise_lexer ~location:(lexer_loc l) (Control_character code); 282 + if code = 0x7F then 283 + Toml_error.raise_lexer ~location:(lexer_loc l) (Control_character code) 284 + 285 + (* Validate UTF-8 in string context and add bytes to buffer *) 286 + let validate_and_add_utf8_to_buffer l buf = 287 + let byte_len = validate_utf8_at_pos_bytes l in 288 + Buffer.add_string buf (sub_string l l.pos byte_len); 289 + for _ = 1 to byte_len do advance l done 290 + 291 + let parse_basic_string l = 292 + advance l; (* skip opening quote *) 293 + let buf = Buffer.create 64 in 294 + let multiline = 295 + match peek_n l 2 with 296 + | Some "\"\"" -> 297 + advance l; advance l; (* skip two more quotes *) 298 + (* Skip newline immediately after opening delimiter *) 299 + (match peek l with 300 + | Some '\n' -> advance l 301 + | Some '\r' -> 302 + advance l; 303 + if peek l = Some '\n' then advance l 304 + else failwith "Bare carriage return not allowed in string" 305 + | _ -> ()); 306 + true 307 + | _ -> false 308 + in 309 + let rec loop () = 310 + if is_eof l then 311 + failwith "Unterminated string"; 312 + let c = get_current l in 313 + if multiline then begin 314 + if c = '"' then begin 315 + (* Count consecutive quotes *) 316 + let quote_count = ref 0 in 317 + let p = ref l.pos in 318 + while !p < l.input_len && get_char l !p = '"' do 319 + incr quote_count; 320 + incr p 321 + done; 322 + if !quote_count >= 3 then begin 323 + (* 3+ quotes - this is a closing delimiter *) 324 + (* Add extra quotes (up to 2) to content before closing delimiter *) 325 + let extra = min (!quote_count - 3) 2 in 326 + for _ = 1 to extra do 327 + Buffer.add_char buf '"' 328 + done; 329 + advance_n l (!quote_count); 330 + if !quote_count > 5 then 331 + failwith "Too many quotes in multiline string" 332 + end else begin 333 + (* Less than 3 quotes - add them to content *) 334 + for _ = 1 to !quote_count do 335 + Buffer.add_char buf '"'; 336 + advance l 337 + done; 338 + loop () 339 + end 340 + end else if c = '\\' then begin 341 + (* Check for line-ending backslash *) 342 + let saved_pos = l.pos in 343 + let saved_line = l.line in 344 + let saved_col = l.col in 345 + advance l; 346 + let rec skip_ws () = 347 + match peek l with 348 + | Some ' ' | Some '\t' -> advance l; skip_ws () 349 + | _ -> () 350 + in 351 + skip_ws (); 352 + match peek l with 353 + | Some '\n' -> 354 + advance l; 355 + (* Skip all whitespace and newlines after *) 356 + let rec skip_all () = 357 + match peek l with 358 + | Some ' ' | Some '\t' | Some '\n' -> advance l; skip_all () 359 + | Some '\r' -> 360 + advance l; 361 + if peek l = Some '\n' then advance l; 362 + skip_all () 363 + | _ -> () 364 + in 365 + skip_all (); 366 + loop () 367 + | Some '\r' -> 368 + advance l; 369 + if peek l = Some '\n' then advance l; 370 + let rec skip_all () = 371 + match peek l with 372 + | Some ' ' | Some '\t' | Some '\n' -> advance l; skip_all () 373 + | Some '\r' -> 374 + advance l; 375 + if peek l = Some '\n' then advance l; 376 + skip_all () 377 + | _ -> () 378 + in 379 + skip_all (); 380 + loop () 381 + | _ -> 382 + (* Not a line-ending backslash, restore position and parse escape *) 383 + l.pos <- saved_pos; 384 + l.line <- saved_line; 385 + l.col <- saved_col; 386 + Buffer.add_string buf (parse_escape l); 387 + loop () 388 + end else begin 389 + let code = Char.code c in 390 + if c = '\r' then begin 391 + advance l; 392 + if peek l = Some '\n' then begin 393 + Buffer.add_char buf '\n'; 394 + advance l 395 + end else 396 + failwith "Bare carriage return not allowed in string" 397 + end else if code >= 0x80 then begin 398 + (* Multi-byte UTF-8 - validate and add *) 399 + validate_and_add_utf8_to_buffer l buf 400 + end else begin 401 + (* ASCII - validate control chars *) 402 + validate_string_char l c true; 403 + Buffer.add_char buf c; 404 + advance l 405 + end; 406 + loop () 407 + end 408 + end else begin 409 + (* Single-line basic string *) 410 + if c = '"' then begin 411 + advance l; 412 + () 413 + end else if c = '\\' then begin 414 + Buffer.add_string buf (parse_escape l); 415 + loop () 416 + end else if c = '\n' || c = '\r' then 417 + failwith "Newline not allowed in basic string" 418 + else begin 419 + let code = Char.code c in 420 + if code >= 0x80 then begin 421 + (* Multi-byte UTF-8 - validate and add *) 422 + validate_and_add_utf8_to_buffer l buf 423 + end else begin 424 + (* ASCII - validate control chars *) 425 + validate_string_char l c false; 426 + Buffer.add_char buf c; 427 + advance l 428 + end; 429 + loop () 430 + end 431 + end 432 + in 433 + loop (); 434 + (Buffer.contents buf, multiline) 435 + 436 + let parse_literal_string l = 437 + advance l; (* skip opening quote *) 438 + let buf = Buffer.create 64 in 439 + let multiline = 440 + match peek_n l 2 with 441 + | Some "''" -> 442 + advance l; advance l; (* skip two more quotes *) 443 + (* Skip newline immediately after opening delimiter *) 444 + (match peek l with 445 + | Some '\n' -> advance l 446 + | Some '\r' -> 447 + advance l; 448 + if peek l = Some '\n' then advance l 449 + else failwith "Bare carriage return not allowed in literal string" 450 + | _ -> ()); 451 + true 452 + | _ -> false 453 + in 454 + let rec loop () = 455 + if is_eof l then 456 + failwith "Unterminated literal string"; 457 + let c = get_current l in 458 + if multiline then begin 459 + if c = '\'' then begin 460 + (* Count consecutive quotes *) 461 + let quote_count = ref 0 in 462 + let p = ref l.pos in 463 + while !p < l.input_len && get_char l !p = '\'' do 464 + incr quote_count; 465 + incr p 466 + done; 467 + if !quote_count >= 3 then begin 468 + (* 3+ quotes - this is a closing delimiter *) 469 + (* Add extra quotes (up to 2) to content before closing delimiter *) 470 + let extra = min (!quote_count - 3) 2 in 471 + for _ = 1 to extra do 472 + Buffer.add_char buf '\'' 473 + done; 474 + advance_n l (!quote_count); 475 + if !quote_count > 5 then 476 + failwith "Too many quotes in multiline literal string" 477 + end else begin 478 + (* Less than 3 quotes - add them to content *) 479 + for _ = 1 to !quote_count do 480 + Buffer.add_char buf '\''; 481 + advance l 482 + done; 483 + loop () 484 + end 485 + end else begin 486 + let code = Char.code c in 487 + if c = '\r' then begin 488 + advance l; 489 + if peek l = Some '\n' then begin 490 + Buffer.add_char buf '\n'; 491 + advance l 492 + end else 493 + failwith "Bare carriage return not allowed in literal string" 494 + end else if code >= 0x80 then begin 495 + (* Multi-byte UTF-8 - validate and add *) 496 + validate_and_add_utf8_to_buffer l buf 497 + end else begin 498 + (* ASCII control char validation for literal strings *) 499 + if code < 0x09 || (code > 0x09 && code < 0x0A) || (code > 0x0D && code < 0x20) || code = 0x7F then 500 + if code <> 0x0A && code <> 0x0D then 501 + failwith (Printf.sprintf "Control character U+%04X not allowed in literal string at line %d" code l.line); 502 + Buffer.add_char buf c; 503 + advance l 504 + end; 505 + loop () 506 + end 507 + end else begin 508 + if c = '\'' then begin 509 + advance l; 510 + () 511 + end else if c = '\n' || c = '\r' then 512 + failwith "Newline not allowed in literal string" 513 + else begin 514 + let code = Char.code c in 515 + if code >= 0x80 then begin 516 + (* Multi-byte UTF-8 - validate and add *) 517 + validate_and_add_utf8_to_buffer l buf 518 + end else begin 519 + (* ASCII control char validation *) 520 + if code < 0x09 || (code > 0x09 && code < 0x20) || code = 0x7F then 521 + failwith (Printf.sprintf "Control character U+%04X not allowed in literal string at line %d" code l.line); 522 + Buffer.add_char buf c; 523 + advance l 524 + end; 525 + loop () 526 + end 527 + end 528 + in 529 + loop (); 530 + (Buffer.contents buf, multiline) 531 + 532 + let parse_number l = 533 + let start = l.pos in 534 + let neg = 535 + match peek l with 536 + | Some '-' -> advance l; true 537 + | Some '+' -> advance l; false 538 + | _ -> false 539 + in 540 + (* Check for special floats: inf and nan *) 541 + match peek_n l 3 with 542 + | Some "inf" -> 543 + advance_n l 3; 544 + let s = sub_string l start (l.pos - start) in 545 + Tok_float ((if neg then Float.neg_infinity else Float.infinity), s) 546 + | Some "nan" -> 547 + advance_n l 3; 548 + let s = sub_string l start (l.pos - start) in 549 + Tok_float (Float.nan, s) 550 + | _ -> 551 + (* Check for hex, octal, or binary *) 552 + match peek l, peek2 l with 553 + | Some '0', Some 'x' when not neg -> 554 + advance l; advance l; 555 + let num_start = l.pos in 556 + (* Check for leading underscore *) 557 + if peek l = Some '_' then failwith "Leading underscore not allowed after 0x"; 558 + let rec read_hex first = 559 + match peek l with 560 + | Some c when is_hex_digit c -> advance l; read_hex false 561 + | Some '_' -> 562 + if first then failwith "Underscore must follow a hex digit"; 563 + advance l; 564 + if peek l |> Option.map is_hex_digit |> Option.value ~default:false then 565 + read_hex false 566 + else 567 + failwith "Trailing underscore in hex number" 568 + | _ -> 569 + if first then failwith "Expected hex digit after 0x" 570 + in 571 + read_hex true; 572 + let s = sub_string l num_start (l.pos - num_start) in 573 + let s = String.concat "" (String.split_on_char '_' s) in 574 + let orig = sub_string l start (l.pos - start) in 575 + Tok_integer (Int64.of_string ("0x" ^ s), orig) 576 + | Some '0', Some 'o' when not neg -> 577 + advance l; advance l; 578 + let num_start = l.pos in 579 + (* Check for leading underscore *) 580 + if peek l = Some '_' then failwith "Leading underscore not allowed after 0o"; 581 + let rec read_oct first = 582 + match peek l with 583 + | Some c when is_oct_digit c -> advance l; read_oct false 584 + | Some '_' -> 585 + if first then failwith "Underscore must follow an octal digit"; 586 + advance l; 587 + if peek l |> Option.map is_oct_digit |> Option.value ~default:false then 588 + read_oct false 589 + else 590 + failwith "Trailing underscore in octal number" 591 + | _ -> 592 + if first then failwith "Expected octal digit after 0o" 593 + in 594 + read_oct true; 595 + let s = sub_string l num_start (l.pos - num_start) in 596 + let s = String.concat "" (String.split_on_char '_' s) in 597 + let orig = sub_string l start (l.pos - start) in 598 + Tok_integer (Int64.of_string ("0o" ^ s), orig) 599 + | Some '0', Some 'b' when not neg -> 600 + advance l; advance l; 601 + let num_start = l.pos in 602 + (* Check for leading underscore *) 603 + if peek l = Some '_' then failwith "Leading underscore not allowed after 0b"; 604 + let rec read_bin first = 605 + match peek l with 606 + | Some c when is_bin_digit c -> advance l; read_bin false 607 + | Some '_' -> 608 + if first then failwith "Underscore must follow a binary digit"; 609 + advance l; 610 + if peek l |> Option.map is_bin_digit |> Option.value ~default:false then 611 + read_bin false 612 + else 613 + failwith "Trailing underscore in binary number" 614 + | _ -> 615 + if first then failwith "Expected binary digit after 0b" 616 + in 617 + read_bin true; 618 + let s = sub_string l num_start (l.pos - num_start) in 619 + let s = String.concat "" (String.split_on_char '_' s) in 620 + let orig = sub_string l start (l.pos - start) in 621 + Tok_integer (Int64.of_string ("0b" ^ s), orig) 622 + | _ -> 623 + (* Regular decimal number *) 624 + let first_digit = peek l in 625 + (* Check for leading zeros - also reject 0_ followed by digits *) 626 + if first_digit = Some '0' then begin 627 + match peek2 l with 628 + | Some c when is_digit c -> failwith "Leading zeros not allowed" 629 + | Some '_' -> failwith "Leading zeros not allowed" 630 + | _ -> () 631 + end; 632 + let rec read_int first = 633 + match peek l with 634 + | Some c when is_digit c -> advance l; read_int false 635 + | Some '_' -> 636 + if first then failwith "Underscore must follow a digit"; 637 + advance l; 638 + if peek l |> Option.map is_digit |> Option.value ~default:false then 639 + read_int false 640 + else 641 + failwith "Trailing underscore in number" 642 + | _ -> 643 + if first then failwith "Expected digit" 644 + in 645 + (match peek l with 646 + | Some c when is_digit c -> read_int false 647 + | _ -> failwith "Expected digit after sign"); 648 + (* Check for float *) 649 + let is_float = ref false in 650 + (match peek l, peek2 l with 651 + | Some '.', Some c when is_digit c -> 652 + is_float := true; 653 + advance l; 654 + read_int false 655 + | Some '.', _ -> 656 + failwith "Decimal point must be followed by digit" 657 + | _ -> ()); 658 + (* Check for exponent *) 659 + (match peek l with 660 + | Some 'e' | Some 'E' -> 661 + is_float := true; 662 + advance l; 663 + (match peek l with 664 + | Some '+' | Some '-' -> advance l 665 + | _ -> ()); 666 + (* After exponent/sign, first char must be a digit, not underscore *) 667 + (match peek l with 668 + | Some '_' -> failwith "Underscore cannot follow exponent" 669 + | _ -> ()); 670 + read_int true 671 + | _ -> ()); 672 + let s = sub_string l start (l.pos - start) in 673 + let s' = String.concat "" (String.split_on_char '_' s) in 674 + if !is_float then 675 + Tok_float (float_of_string s', s) 676 + else 677 + Tok_integer (Int64.of_string s', s) 678 + 679 + (* Check if we're looking at a datetime/date/time *) 680 + let looks_like_datetime l = 681 + (* YYYY-MM-DD or HH:MM - need to ensure it's not a bare key that starts with numbers *) 682 + let check_datetime () = 683 + let pos = l.pos in 684 + let len = l.input_len in 685 + (* Check for YYYY-MM-DD pattern - must have exactly this structure *) 686 + if pos + 10 <= len then begin 687 + let c0 = get_char l pos in 688 + let c1 = get_char l (pos + 1) in 689 + let c2 = get_char l (pos + 2) in 690 + let c3 = get_char l (pos + 3) in 691 + let c4 = get_char l (pos + 4) in 692 + let c5 = get_char l (pos + 5) in 693 + let c6 = get_char l (pos + 6) in 694 + let c7 = get_char l (pos + 7) in 695 + let c8 = get_char l (pos + 8) in 696 + let c9 = get_char l (pos + 9) in 697 + (* Must match YYYY-MM-DD pattern AND not be followed by bare key chars (except T or space for time) *) 698 + if is_digit c0 && is_digit c1 && is_digit c2 && is_digit c3 && c4 = '-' && 699 + is_digit c5 && is_digit c6 && c7 = '-' && is_digit c8 && is_digit c9 then begin 700 + (* Check what follows - if it's a bare key char other than T/t/space, it's not a date *) 701 + if pos + 10 < len then begin 702 + let next = get_char l (pos + 10) in 703 + if next = 'T' || next = 't' then 704 + `Date (* Datetime continues with time part *) 705 + else if next = ' ' || next = '\t' then begin 706 + (* Check if followed by = (key context) or time part *) 707 + let rec skip_ws p = 708 + if p >= len then p 709 + else match get_char l p with 710 + | ' ' | '\t' -> skip_ws (p + 1) 711 + | _ -> p 712 + in 713 + let after_ws = skip_ws (pos + 11) in 714 + if after_ws < len && get_char l after_ws = '=' then 715 + `Other (* It's a key followed by = *) 716 + else if after_ws < len && is_digit (get_char l after_ws) then 717 + `Date (* Could be "2001-02-03 12:34:56" format *) 718 + else 719 + `Date 720 + end else if next = '\n' || next = '\r' || 721 + next = '#' || next = ',' || next = ']' || next = '}' then 722 + `Date 723 + else if is_bare_key_char next then 724 + `Other (* It's a bare key like "2000-02-29abc" *) 725 + else 726 + `Date 727 + end else 728 + `Date 729 + end else if pos + 5 <= len && 730 + is_digit c0 && is_digit c1 && c2 = ':' && is_digit c3 && is_digit c4 then 731 + `Time 732 + else 733 + `Other 734 + end else if pos + 5 <= len then begin 735 + let c0 = get_char l pos in 736 + let c1 = get_char l (pos + 1) in 737 + let c2 = get_char l (pos + 2) in 738 + let c3 = get_char l (pos + 3) in 739 + let c4 = get_char l (pos + 4) in 740 + if is_digit c0 && is_digit c1 && c2 = ':' && is_digit c3 && is_digit c4 then 741 + `Time 742 + else 743 + `Other 744 + end else 745 + `Other 746 + in 747 + check_datetime () 748 + 749 + (* Date/time validation *) 750 + let validate_date year month day = 751 + if month < 1 || month > 12 then 752 + failwith (Printf.sprintf "Invalid month: %d" month); 753 + if day < 1 then 754 + failwith (Printf.sprintf "Invalid day: %d" day); 755 + let days_in_month = [| 0; 31; 28; 31; 30; 31; 30; 31; 31; 30; 31; 30; 31 |] in 756 + let is_leap = (year mod 4 = 0 && year mod 100 <> 0) || year mod 400 = 0 in 757 + let max_days = 758 + if month = 2 && is_leap then 29 759 + else days_in_month.(month) 760 + in 761 + if day > max_days then 762 + failwith (Printf.sprintf "Invalid day %d for month %d" day month) 763 + 764 + let validate_time hour minute second = 765 + if hour < 0 || hour > 23 then 766 + failwith (Printf.sprintf "Invalid hour: %d" hour); 767 + if minute < 0 || minute > 59 then 768 + failwith (Printf.sprintf "Invalid minute: %d" minute); 769 + if second < 0 || second > 60 then (* 60 for leap second *) 770 + failwith (Printf.sprintf "Invalid second: %d" second) 771 + 772 + let validate_offset hour minute = 773 + if hour < 0 || hour > 23 then 774 + failwith (Printf.sprintf "Invalid timezone offset hour: %d" hour); 775 + if minute < 0 || minute > 59 then 776 + failwith (Printf.sprintf "Invalid timezone offset minute: %d" minute) 777 + 778 + let parse_datetime l = 779 + let buf = Buffer.create 32 in 780 + let year_buf = Buffer.create 4 in 781 + let month_buf = Buffer.create 2 in 782 + let day_buf = Buffer.create 2 in 783 + (* Read date part YYYY-MM-DD *) 784 + for _ = 1 to 4 do 785 + match peek l with 786 + | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char year_buf c; advance l 787 + | _ -> failwith "Invalid date format" 788 + done; 789 + if peek l <> Some '-' then failwith "Invalid date format"; 790 + Buffer.add_char buf '-'; advance l; 791 + for _ = 1 to 2 do 792 + match peek l with 793 + | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char month_buf c; advance l 794 + | _ -> failwith "Invalid date format" 795 + done; 796 + if peek l <> Some '-' then failwith "Invalid date format"; 797 + Buffer.add_char buf '-'; advance l; 798 + for _ = 1 to 2 do 799 + match peek l with 800 + | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char day_buf c; advance l 801 + | _ -> failwith "Invalid date format" 802 + done; 803 + (* Validate date immediately *) 804 + let year = int_of_string (Buffer.contents year_buf) in 805 + let month = int_of_string (Buffer.contents month_buf) in 806 + let day = int_of_string (Buffer.contents day_buf) in 807 + validate_date year month day; 808 + (* Helper to parse time part (after T or space) *) 809 + let parse_time_part () = 810 + let hour_buf = Buffer.create 2 in 811 + let minute_buf = Buffer.create 2 in 812 + let second_buf = Buffer.create 2 in 813 + Buffer.add_char buf 'T'; (* Always normalize to uppercase T *) 814 + for _ = 1 to 2 do 815 + match peek l with 816 + | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char hour_buf c; advance l 817 + | _ -> failwith "Invalid time format" 818 + done; 819 + if peek l <> Some ':' then failwith "Invalid time format"; 820 + Buffer.add_char buf ':'; advance l; 821 + for _ = 1 to 2 do 822 + match peek l with 823 + | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char minute_buf c; advance l 824 + | _ -> failwith "Invalid time format" 825 + done; 826 + (* Optional seconds *) 827 + (match peek l with 828 + | Some ':' -> 829 + Buffer.add_char buf ':'; advance l; 830 + for _ = 1 to 2 do 831 + match peek l with 832 + | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char second_buf c; advance l 833 + | _ -> failwith "Invalid time format" 834 + done; 835 + (* Optional fractional seconds *) 836 + (match peek l with 837 + | Some '.' -> 838 + Buffer.add_char buf '.'; advance l; 839 + if not (peek l |> Option.map is_digit |> Option.value ~default:false) then 840 + failwith "Expected digit after decimal point"; 841 + while peek l |> Option.map is_digit |> Option.value ~default:false do 842 + Buffer.add_char buf (Option.get (peek l)); 843 + advance l 844 + done 845 + | _ -> ()) 846 + | _ -> 847 + (* No seconds - add :00 for normalization per toml-test *) 848 + Buffer.add_string buf ":00"; 849 + Buffer.add_string second_buf "00"); 850 + (* Validate time *) 851 + let hour = int_of_string (Buffer.contents hour_buf) in 852 + let minute = int_of_string (Buffer.contents minute_buf) in 853 + let second = if Buffer.length second_buf > 0 then int_of_string (Buffer.contents second_buf) else 0 in 854 + validate_time hour minute second; 855 + (* Check for offset *) 856 + match peek l with 857 + | Some 'Z' | Some 'z' -> 858 + Buffer.add_char buf 'Z'; 859 + advance l; 860 + Tok_datetime (Buffer.contents buf) 861 + | Some '+' | Some '-' as sign_opt -> 862 + let sign = Option.get sign_opt in 863 + let off_hour_buf = Buffer.create 2 in 864 + let off_min_buf = Buffer.create 2 in 865 + Buffer.add_char buf sign; 866 + advance l; 867 + for _ = 1 to 2 do 868 + match peek l with 869 + | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char off_hour_buf c; advance l 870 + | _ -> failwith "Invalid timezone offset" 871 + done; 872 + if peek l <> Some ':' then failwith "Invalid timezone offset"; 873 + Buffer.add_char buf ':'; advance l; 874 + for _ = 1 to 2 do 875 + match peek l with 876 + | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char off_min_buf c; advance l 877 + | _ -> failwith "Invalid timezone offset" 878 + done; 879 + (* Validate offset *) 880 + let off_hour = int_of_string (Buffer.contents off_hour_buf) in 881 + let off_min = int_of_string (Buffer.contents off_min_buf) in 882 + validate_offset off_hour off_min; 883 + Tok_datetime (Buffer.contents buf) 884 + | _ -> 885 + Tok_datetime_local (Buffer.contents buf) 886 + in 887 + (* Check if there's a time part *) 888 + match peek l with 889 + | Some 'T' | Some 't' -> 890 + advance l; 891 + parse_time_part () 892 + | Some ' ' -> 893 + (* Space could be followed by time (datetime with space separator) 894 + or could be end of date (local date followed by comment/value) *) 895 + advance l; (* Skip the space *) 896 + (* Check if followed by digit (time) *) 897 + (match peek l with 898 + | Some c when is_digit c -> 899 + parse_time_part () 900 + | _ -> 901 + (* Not followed by time - this is just a local date *) 902 + (* Put the space back by not consuming anything further *) 903 + l.pos <- l.pos - 1; (* Go back to before the space *) 904 + Tok_date_local (Buffer.contents buf)) 905 + | _ -> 906 + (* Just a date *) 907 + Tok_date_local (Buffer.contents buf) 908 + 909 + let parse_time l = 910 + let buf = Buffer.create 16 in 911 + let hour_buf = Buffer.create 2 in 912 + let minute_buf = Buffer.create 2 in 913 + let second_buf = Buffer.create 2 in 914 + (* Read HH:MM *) 915 + for _ = 1 to 2 do 916 + match peek l with 917 + | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char hour_buf c; advance l 918 + | _ -> failwith "Invalid time format" 919 + done; 920 + if peek l <> Some ':' then failwith "Invalid time format"; 921 + Buffer.add_char buf ':'; advance l; 922 + for _ = 1 to 2 do 923 + match peek l with 924 + | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char minute_buf c; advance l 925 + | _ -> failwith "Invalid time format" 926 + done; 927 + (* Optional seconds *) 928 + (match peek l with 929 + | Some ':' -> 930 + Buffer.add_char buf ':'; advance l; 931 + for _ = 1 to 2 do 932 + match peek l with 933 + | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char second_buf c; advance l 934 + | _ -> failwith "Invalid time format" 935 + done; 936 + (* Optional fractional seconds *) 937 + (match peek l with 938 + | Some '.' -> 939 + Buffer.add_char buf '.'; advance l; 940 + if not (peek l |> Option.map is_digit |> Option.value ~default:false) then 941 + failwith "Expected digit after decimal point"; 942 + while peek l |> Option.map is_digit |> Option.value ~default:false do 943 + Buffer.add_char buf (Option.get (peek l)); 944 + advance l 945 + done 946 + | _ -> ()) 947 + | _ -> 948 + (* No seconds - add :00 for normalization *) 949 + Buffer.add_string buf ":00"; 950 + Buffer.add_string second_buf "00"); 951 + (* Validate time *) 952 + let hour = int_of_string (Buffer.contents hour_buf) in 953 + let minute = int_of_string (Buffer.contents minute_buf) in 954 + let second = if Buffer.length second_buf > 0 then int_of_string (Buffer.contents second_buf) else 0 in 955 + validate_time hour minute second; 956 + Tok_time_local (Buffer.contents buf) 957 + 958 + let next_token l = 959 + skip_ws_and_comments l; 960 + if is_eof l then Tok_eof 961 + else begin 962 + let c = get_current l in 963 + match c with 964 + | '[' -> advance l; Tok_lbracket 965 + | ']' -> advance l; Tok_rbracket 966 + | '{' -> advance l; Tok_lbrace 967 + | '}' -> advance l; Tok_rbrace 968 + | '=' -> advance l; Tok_equals 969 + | ',' -> advance l; Tok_comma 970 + | '.' -> advance l; Tok_dot 971 + | '\n' -> advance l; Tok_newline 972 + | '\r' -> 973 + advance l; 974 + if peek l = Some '\n' then begin 975 + advance l; 976 + Tok_newline 977 + end else 978 + failwith (Printf.sprintf "Bare carriage return not allowed at line %d" l.line) 979 + | '"' -> 980 + let (s, multiline) = parse_basic_string l in 981 + if multiline then Tok_ml_basic_string s else Tok_basic_string s 982 + | '\'' -> 983 + let (s, multiline) = parse_literal_string l in 984 + if multiline then Tok_ml_literal_string s else Tok_literal_string s 985 + | '+' | '-' -> 986 + (* Could be number, special float (+inf, -inf, +nan, -nan), or bare key starting with - *) 987 + let sign = c in 988 + let start = l.pos in 989 + (match peek2 l with 990 + | Some d when is_digit d -> 991 + (* Check if this looks like a key (followed by = after whitespace/key chars) *) 992 + (* A key like -01 should be followed by whitespace then =, not by . or e (number syntax) *) 993 + let is_key_context = 994 + let rec scan_ahead p = 995 + if p >= l.input_len then false 996 + else 997 + let c = get_char l p in 998 + if is_digit c || c = '_' then scan_ahead (p + 1) 999 + else if c = ' ' || c = '\t' then 1000 + (* Skip whitespace and check for = *) 1001 + let rec skip_ws pp = 1002 + if pp >= l.input_len then false 1003 + else match get_char l pp with 1004 + | ' ' | '\t' -> skip_ws (pp + 1) 1005 + | '=' -> true 1006 + | _ -> false 1007 + in 1008 + skip_ws (p + 1) 1009 + else if c = '=' then true 1010 + else if c = '.' then 1011 + (* Check if . is followed by digit (number) vs letter/underscore (dotted key) *) 1012 + if p + 1 < l.input_len then 1013 + let next = get_char l (p + 1) in 1014 + if is_digit next then false (* It's a decimal number like -3.14 *) 1015 + else if is_bare_key_char next then true (* Dotted key *) 1016 + else false 1017 + else false 1018 + else if c = 'e' || c = 'E' then false (* Scientific notation *) 1019 + else if is_bare_key_char c then 1020 + (* Contains non-digit bare key char - it's a key *) 1021 + true 1022 + else false 1023 + in 1024 + scan_ahead (start + 1) 1025 + in 1026 + if is_key_context then begin 1027 + (* Treat as bare key *) 1028 + while not (is_eof l) && is_bare_key_char (get_current l) do 1029 + advance l 1030 + done; 1031 + Tok_bare_key (sub_string l start (l.pos - start)) 1032 + end else 1033 + parse_number l 1034 + | Some 'i' -> 1035 + (* Check for inf *) 1036 + if l.pos + 3 < l.input_len && 1037 + get_char l (l.pos + 1) = 'i' && get_char l (l.pos + 2) = 'n' && get_char l (l.pos + 3) = 'f' then begin 1038 + advance_n l 4; 1039 + let s = sub_string l start (l.pos - start) in 1040 + if sign = '-' then Tok_float (Float.neg_infinity, s) 1041 + else Tok_float (Float.infinity, s) 1042 + end else if sign = '-' then begin 1043 + (* Could be bare key like -inf-key *) 1044 + while not (is_eof l) && is_bare_key_char (get_current l) do 1045 + advance l 1046 + done; 1047 + Tok_bare_key (sub_string l start (l.pos - start)) 1048 + end else 1049 + failwith (Printf.sprintf "Unexpected character after %c" sign) 1050 + | Some 'n' -> 1051 + (* Check for nan *) 1052 + if l.pos + 3 < l.input_len && 1053 + get_char l (l.pos + 1) = 'n' && get_char l (l.pos + 2) = 'a' && get_char l (l.pos + 3) = 'n' then begin 1054 + advance_n l 4; 1055 + let s = sub_string l start (l.pos - start) in 1056 + Tok_float (Float.nan, s) (* Sign on NaN doesn't change the value *) 1057 + end else if sign = '-' then begin 1058 + (* Could be bare key like -name *) 1059 + while not (is_eof l) && is_bare_key_char (get_current l) do 1060 + advance l 1061 + done; 1062 + Tok_bare_key (sub_string l start (l.pos - start)) 1063 + end else 1064 + failwith (Printf.sprintf "Unexpected character after %c" sign) 1065 + | _ when sign = '-' -> 1066 + (* Bare key starting with - like -key or --- *) 1067 + while not (is_eof l) && is_bare_key_char (get_current l) do 1068 + advance l 1069 + done; 1070 + Tok_bare_key (sub_string l start (l.pos - start)) 1071 + | _ -> failwith (Printf.sprintf "Unexpected character after %c" sign)) 1072 + | c when is_digit c -> 1073 + (* Could be number, datetime, or bare key starting with digits *) 1074 + (match looks_like_datetime l with 1075 + | `Date -> parse_datetime l 1076 + | `Time -> parse_time l 1077 + | `Other -> 1078 + (* Check for hex/octal/binary prefix first - these are always numbers *) 1079 + let start = l.pos in 1080 + let is_prefixed_number = 1081 + start + 1 < l.input_len && get_char l start = '0' && 1082 + (let c1 = get_char l (start + 1) in 1083 + c1 = 'x' || c1 = 'X' || c1 = 'o' || c1 = 'O' || c1 = 'b' || c1 = 'B') 1084 + in 1085 + if is_prefixed_number then 1086 + parse_number l 1087 + else begin 1088 + (* Check if this is a bare key: 1089 + - Contains letters (like "123abc") 1090 + - Has leading zeros (like "0123") which would be invalid as a number *) 1091 + let has_leading_zero = 1092 + get_char l start = '0' && start + 1 < l.input_len && 1093 + let c1 = get_char l (start + 1) in 1094 + is_digit c1 1095 + in 1096 + (* Scan to see if this is a bare key or a number 1097 + - If it looks like scientific notation (digits + e/E + optional sign + digits), it's a number 1098 + - If it contains letters OR dashes between digits, it's a bare key *) 1099 + let rec scan_for_bare_key pos has_dash_between_digits = 1100 + if pos >= l.input_len then has_dash_between_digits 1101 + else 1102 + let c = get_char l pos in 1103 + if is_digit c || c = '_' then scan_for_bare_key (pos + 1) has_dash_between_digits 1104 + else if c = '.' then scan_for_bare_key (pos + 1) has_dash_between_digits 1105 + else if c = '-' then 1106 + (* Dash in key - check what follows *) 1107 + let next_pos = pos + 1 in 1108 + if next_pos < l.input_len then 1109 + let next = get_char l next_pos in 1110 + if is_digit next then 1111 + scan_for_bare_key (next_pos) true (* Dash between digits - bare key *) 1112 + else if is_bare_key_char next then 1113 + true (* Dash followed by letter - definitely bare key like 2000-datetime *) 1114 + else 1115 + has_dash_between_digits (* End of sequence *) 1116 + else 1117 + has_dash_between_digits (* End of input *) 1118 + else if c = 'e' || c = 'E' then 1119 + (* Check if this looks like scientific notation *) 1120 + let next_pos = pos + 1 in 1121 + if next_pos >= l.input_len then true (* Just 'e' at end, bare key *) 1122 + else 1123 + let next = get_char l next_pos in 1124 + if next = '+' || next = '-' then 1125 + (* Has exponent sign - check if followed by digit *) 1126 + let after_sign = next_pos + 1 in 1127 + if after_sign < l.input_len && is_digit (get_char l after_sign) then 1128 + has_dash_between_digits (* Scientific notation, but might have dash earlier *) 1129 + else 1130 + true (* e.g., "3e-abc" - bare key *) 1131 + else if is_digit next then 1132 + has_dash_between_digits (* Scientific notation like 3e2, but check if had dash earlier *) 1133 + else 1134 + true (* e.g., "3eabc" - bare key *) 1135 + else if is_bare_key_char c then 1136 + (* It's a letter - this is a bare key *) 1137 + true 1138 + else has_dash_between_digits 1139 + in 1140 + if has_leading_zero || scan_for_bare_key start false then begin 1141 + (* It's a bare key *) 1142 + while not (is_eof l) && is_bare_key_char (get_current l) do 1143 + advance l 1144 + done; 1145 + Tok_bare_key (sub_string l start (l.pos - start)) 1146 + end else 1147 + (* It's a number - use parse_number *) 1148 + parse_number l 1149 + end) 1150 + | c when c = 't' || c = 'f' || c = 'i' || c = 'n' -> 1151 + (* These could be keywords (true, false, inf, nan) or bare keys 1152 + Always read as bare key and let parser interpret *) 1153 + let start = l.pos in 1154 + while not (is_eof l) && is_bare_key_char (get_current l) do 1155 + advance l 1156 + done; 1157 + Tok_bare_key (sub_string l start (l.pos - start)) 1158 + | c when is_bare_key_char c -> 1159 + let start = l.pos in 1160 + while not (is_eof l) && is_bare_key_char (get_current l) do 1161 + advance l 1162 + done; 1163 + Tok_bare_key (sub_string l start (l.pos - start)) 1164 + | c -> 1165 + let code = Char.code c in 1166 + if code < 0x20 || code = 0x7F then 1167 + failwith (Printf.sprintf "Control character U+%04X not allowed at line %d" code l.line) 1168 + else 1169 + failwith (Printf.sprintf "Unexpected character '%c' at line %d, column %d" c l.line l.col) 1170 + end 1171 + 1172 + (* Parser *) 1173 + 1174 + type parser = { 1175 + lexer : lexer; 1176 + mutable current : token; 1177 + mutable peeked : bool; 1178 + } 1179 + 1180 + let make_parser lexer = 1181 + { lexer; current = Tok_eof; peeked = false } 1182 + 1183 + let peek_token p = 1184 + if not p.peeked then begin 1185 + p.current <- next_token p.lexer; 1186 + p.peeked <- true 1187 + end; 1188 + p.current 1189 + 1190 + let consume_token p = 1191 + let tok = peek_token p in 1192 + p.peeked <- false; 1193 + tok 1194 + 1195 + (* Check if next raw character (without skipping whitespace) matches *) 1196 + let next_raw_char_is p c = 1197 + p.lexer.pos < p.lexer.input_len && get_char p.lexer p.lexer.pos = c 1198 + 1199 + let expect_token p expected = 1200 + let tok = consume_token p in 1201 + if tok <> expected then 1202 + failwith (Printf.sprintf "Expected %s" (match expected with 1203 + | Tok_equals -> "=" 1204 + | Tok_rbracket -> "]" 1205 + | Tok_rbrace -> "}" 1206 + | Tok_newline -> "newline" 1207 + | _ -> "token")) 1208 + 1209 + let skip_newlines p = 1210 + while peek_token p = Tok_newline do 1211 + ignore (consume_token p) 1212 + done 1213 + 1214 + (* Parse a single key segment (bare, basic string, literal string, or integer) *) 1215 + (* Note: Tok_float is handled specially in parse_dotted_key *) 1216 + let parse_key_segment p = 1217 + match peek_token p with 1218 + | Tok_bare_key s -> ignore (consume_token p); [s] 1219 + | Tok_basic_string s -> ignore (consume_token p); [s] 1220 + | Tok_literal_string s -> ignore (consume_token p); [s] 1221 + | Tok_integer (_i, orig_str) -> ignore (consume_token p); [orig_str] 1222 + | Tok_float (f, orig_str) -> 1223 + (* Float in key context - use original string to preserve exact key parts *) 1224 + ignore (consume_token p); 1225 + if Float.is_nan f then ["nan"] 1226 + else if f = Float.infinity then ["inf"] 1227 + else if f = Float.neg_infinity then ["-inf"] 1228 + else begin 1229 + (* Remove underscores from original string and split on dot *) 1230 + let s = String.concat "" (String.split_on_char '_' orig_str) in 1231 + if String.contains s 'e' || String.contains s 'E' then 1232 + (* Has exponent, treat as single key *) 1233 + [s] 1234 + else if String.contains s '.' then 1235 + (* Split on decimal point for dotted key *) 1236 + String.split_on_char '.' s 1237 + else 1238 + (* No decimal point, single integer key *) 1239 + [s] 1240 + end 1241 + | Tok_date_local s -> ignore (consume_token p); [s] 1242 + | Tok_datetime s -> ignore (consume_token p); [s] 1243 + | Tok_datetime_local s -> ignore (consume_token p); [s] 1244 + | Tok_time_local s -> ignore (consume_token p); [s] 1245 + | Tok_ml_basic_string _ -> failwith "Multiline strings are not allowed as keys" 1246 + | Tok_ml_literal_string _ -> failwith "Multiline strings are not allowed as keys" 1247 + | _ -> failwith "Expected key" 1248 + 1249 + (* Parse a dotted key - returns list of key strings *) 1250 + let parse_dotted_key p = 1251 + let first_keys = parse_key_segment p in 1252 + let rec loop acc = 1253 + match peek_token p with 1254 + | Tok_dot -> 1255 + ignore (consume_token p); 1256 + let keys = parse_key_segment p in 1257 + loop (List.rev_append keys acc) 1258 + | _ -> List.rev acc 1259 + in 1260 + let rest = loop [] in 1261 + first_keys @ rest 1262 + 1263 + let rec parse_value p = 1264 + match peek_token p with 1265 + | Tok_basic_string s -> ignore (consume_token p); String s 1266 + | Tok_literal_string s -> ignore (consume_token p); String s 1267 + | Tok_ml_basic_string s -> ignore (consume_token p); String s 1268 + | Tok_ml_literal_string s -> ignore (consume_token p); String s 1269 + | Tok_integer (i, _) -> ignore (consume_token p); Int i 1270 + | Tok_float (f, _) -> ignore (consume_token p); Float f 1271 + | Tok_datetime s -> ignore (consume_token p); Datetime s 1272 + | Tok_datetime_local s -> ignore (consume_token p); Datetime_local s 1273 + | Tok_date_local s -> ignore (consume_token p); Date_local s 1274 + | Tok_time_local s -> ignore (consume_token p); Time_local s 1275 + | Tok_lbracket -> parse_array p 1276 + | Tok_lbrace -> parse_inline_table p 1277 + | Tok_bare_key s -> 1278 + (* Interpret bare keys as boolean, float keywords, or numbers in value context *) 1279 + ignore (consume_token p); 1280 + (match s with 1281 + | "true" -> Bool true 1282 + | "false" -> Bool false 1283 + | "inf" -> Float Float.infinity 1284 + | "nan" -> Float Float.nan 1285 + | _ -> 1286 + (* Validate underscore placement in the original string *) 1287 + let validate_underscores str = 1288 + let len = String.length str in 1289 + if len > 0 && str.[0] = '_' then 1290 + failwith "Leading underscore not allowed in number"; 1291 + if len > 0 && str.[len - 1] = '_' then 1292 + failwith "Trailing underscore not allowed in number"; 1293 + for i = 0 to len - 2 do 1294 + if str.[i] = '_' && str.[i + 1] = '_' then 1295 + failwith "Double underscore not allowed in number"; 1296 + (* Underscore must be between digits (not next to 'e', 'E', '.', 'x', 'o', 'b', etc.) *) 1297 + if str.[i] = '_' then begin 1298 + let prev = if i > 0 then Some str.[i - 1] else None in 1299 + let next = Some str.[i + 1] in 1300 + let is_digit_char c = c >= '0' && c <= '9' in 1301 + let is_hex_char c = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') in 1302 + (* For hex numbers, underscore can be between hex digits *) 1303 + let has_hex_prefix = len > 2 && str.[0] = '0' && (str.[1] = 'x' || str.[1] = 'X') in 1304 + match prev, next with 1305 + | Some p, Some n when has_hex_prefix && is_hex_char p && is_hex_char n -> () 1306 + | Some p, Some n when is_digit_char p && is_digit_char n -> () 1307 + | _ -> failwith "Underscore must be between digits" 1308 + end 1309 + done 1310 + in 1311 + validate_underscores s; 1312 + (* Try to parse as a number - bare keys like "10e3" should be floats *) 1313 + let s_no_underscore = String.concat "" (String.split_on_char '_' s) in 1314 + let len = String.length s_no_underscore in 1315 + if len > 0 then 1316 + let c0 = s_no_underscore.[0] in 1317 + (* Must start with digit for it to be a number in value context *) 1318 + if c0 >= '0' && c0 <= '9' then begin 1319 + (* Check for leading zeros *) 1320 + if len > 1 && c0 = '0' && s_no_underscore.[1] >= '0' && s_no_underscore.[1] <= '9' then 1321 + failwith "Leading zeros not allowed" 1322 + else 1323 + try 1324 + (* Try to parse as float (handles scientific notation) *) 1325 + if String.contains s_no_underscore '.' || 1326 + String.contains s_no_underscore 'e' || 1327 + String.contains s_no_underscore 'E' then 1328 + Float (float_of_string s_no_underscore) 1329 + else 1330 + Int (Int64.of_string s_no_underscore) 1331 + with _ -> 1332 + failwith (Printf.sprintf "Unexpected bare key '%s' as value" s) 1333 + end else 1334 + failwith (Printf.sprintf "Unexpected bare key '%s' as value" s) 1335 + else 1336 + failwith (Printf.sprintf "Unexpected bare key '%s' as value" s)) 1337 + | _ -> failwith "Expected value" 1338 + 1339 + and parse_array p = 1340 + ignore (consume_token p); (* [ *) 1341 + skip_newlines p; 1342 + let rec loop acc = 1343 + match peek_token p with 1344 + | Tok_rbracket -> 1345 + ignore (consume_token p); 1346 + Array (List.rev acc) 1347 + | _ -> 1348 + let v = parse_value p in 1349 + skip_newlines p; 1350 + match peek_token p with 1351 + | Tok_comma -> 1352 + ignore (consume_token p); 1353 + skip_newlines p; 1354 + loop (v :: acc) 1355 + | Tok_rbracket -> 1356 + ignore (consume_token p); 1357 + Array (List.rev (v :: acc)) 1358 + | _ -> failwith "Expected ',' or ']' in array" 1359 + in 1360 + loop [] 1361 + 1362 + and parse_inline_table p = 1363 + ignore (consume_token p); (* { *) 1364 + skip_newlines p; 1365 + (* Track explicitly defined keys - can't be extended with dotted keys *) 1366 + let defined_inline = ref [] in 1367 + let rec loop acc = 1368 + match peek_token p with 1369 + | Tok_rbrace -> 1370 + ignore (consume_token p); 1371 + Table (List.rev acc) 1372 + | _ -> 1373 + let keys = parse_dotted_key p in 1374 + skip_ws p; 1375 + expect_token p Tok_equals; 1376 + skip_ws p; 1377 + let v = parse_value p in 1378 + (* Check if trying to extend a previously-defined inline table *) 1379 + (match keys with 1380 + | first_key :: _ :: _ -> 1381 + (* Multi-key dotted path - check if first key is already defined *) 1382 + if List.mem first_key !defined_inline then 1383 + failwith (Printf.sprintf "Cannot extend inline table '%s' with dotted key" first_key) 1384 + | _ -> ()); 1385 + (* If this is a direct assignment to a key, track it *) 1386 + (match keys with 1387 + | [k] -> 1388 + if List.mem k !defined_inline then 1389 + failwith (Printf.sprintf "Duplicate key '%s' in inline table" k); 1390 + defined_inline := k :: !defined_inline 1391 + | _ -> ()); 1392 + let entry = build_nested_table keys v in 1393 + (* Merge the entry with existing entries (for dotted keys with common prefix) *) 1394 + let acc = merge_entry_into_table acc entry in 1395 + skip_newlines p; 1396 + match peek_token p with 1397 + | Tok_comma -> 1398 + ignore (consume_token p); 1399 + skip_newlines p; 1400 + loop acc 1401 + | Tok_rbrace -> 1402 + ignore (consume_token p); 1403 + Table (List.rev acc) 1404 + | _ -> failwith "Expected ',' or '}' in inline table" 1405 + in 1406 + loop [] 1407 + 1408 + and skip_ws _p = 1409 + (* Skip whitespace in token stream - handled by lexer but needed for lookahead *) 1410 + () 1411 + 1412 + and build_nested_table keys value = 1413 + match keys with 1414 + | [] -> failwith "Empty key" 1415 + | [k] -> (k, value) 1416 + | k :: rest -> 1417 + (k, Table [build_nested_table rest value]) 1418 + 1419 + (* Merge two TOML values - used for combining dotted keys in inline tables *) 1420 + and merge_toml_values v1 v2 = 1421 + match v1, v2 with 1422 + | Table entries1, Table entries2 -> 1423 + (* Merge the entries *) 1424 + let merged = List.fold_left (fun acc (k, v) -> 1425 + match List.assoc_opt k acc with 1426 + | Some existing -> 1427 + (* Key exists - try to merge if both are tables *) 1428 + let merged_v = merge_toml_values existing v in 1429 + (k, merged_v) :: List.remove_assoc k acc 1430 + | None -> 1431 + (k, v) :: acc 1432 + ) entries1 entries2 in 1433 + Table (List.rev merged) 1434 + | _, _ -> 1435 + (* Can't merge non-table values with same key *) 1436 + failwith "Conflicting keys in inline table" 1437 + 1438 + (* Merge a single entry into an existing table *) 1439 + and merge_entry_into_table entries (k, v) = 1440 + match List.assoc_opt k entries with 1441 + | Some existing -> 1442 + let merged_v = merge_toml_values existing v in 1443 + (k, merged_v) :: List.remove_assoc k entries 1444 + | None -> 1445 + (k, v) :: entries 1446 + 1447 + let validate_datetime_string s = 1448 + (* Parse and validate date portion *) 1449 + if String.length s >= 10 then begin 1450 + let year = int_of_string (String.sub s 0 4) in 1451 + let month = int_of_string (String.sub s 5 2) in 1452 + let day = int_of_string (String.sub s 8 2) in 1453 + validate_date year month day; 1454 + (* Parse and validate time portion if present *) 1455 + if String.length s >= 16 then begin 1456 + let time_start = if s.[10] = 'T' || s.[10] = 't' || s.[10] = ' ' then 11 else 10 in 1457 + let hour = int_of_string (String.sub s time_start 2) in 1458 + let minute = int_of_string (String.sub s (time_start + 3) 2) in 1459 + let second = 1460 + if String.length s >= time_start + 8 && s.[time_start + 5] = ':' then 1461 + int_of_string (String.sub s (time_start + 6) 2) 1462 + else 0 1463 + in 1464 + validate_time hour minute second 1465 + end 1466 + end 1467 + 1468 + let validate_date_string s = 1469 + if String.length s >= 10 then begin 1470 + let year = int_of_string (String.sub s 0 4) in 1471 + let month = int_of_string (String.sub s 5 2) in 1472 + let day = int_of_string (String.sub s 8 2) in 1473 + validate_date year month day 1474 + end 1475 + 1476 + let validate_time_string s = 1477 + if String.length s >= 5 then begin 1478 + let hour = int_of_string (String.sub s 0 2) in 1479 + let minute = int_of_string (String.sub s 3 2) in 1480 + let second = 1481 + if String.length s >= 8 && s.[5] = ':' then 1482 + int_of_string (String.sub s 6 2) 1483 + else 0 1484 + in 1485 + validate_time hour minute second 1486 + end 1487 + 1488 + (* Table management for the parser *) 1489 + type table_state = { 1490 + mutable values : (string * t) list; 1491 + subtables : (string, table_state) Hashtbl.t; 1492 + mutable is_array : bool; 1493 + mutable is_inline : bool; 1494 + mutable defined : bool; (* Has this table been explicitly defined with [table]? *) 1495 + mutable closed : bool; (* Closed to extension via dotted keys from parent *) 1496 + mutable array_elements : table_state list; (* For arrays of tables *) 1497 + } 1498 + 1499 + let create_table_state () = { 1500 + values = []; 1501 + subtables = Hashtbl.create 16; 1502 + is_array = false; 1503 + is_inline = false; 1504 + defined = false; 1505 + closed = false; 1506 + array_elements = []; 1507 + } 1508 + 1509 + let rec get_or_create_table state keys create_intermediate = 1510 + match keys with 1511 + | [] -> state 1512 + | [k] -> 1513 + (* Check if key exists as a value *) 1514 + if List.mem_assoc k state.values then 1515 + failwith (Printf.sprintf "Cannot use value '%s' as a table" k); 1516 + (match Hashtbl.find_opt state.subtables k with 1517 + | Some sub -> sub 1518 + | None -> 1519 + let sub = create_table_state () in 1520 + Hashtbl.add state.subtables k sub; 1521 + sub) 1522 + | k :: rest -> 1523 + (* Check if key exists as a value *) 1524 + if List.mem_assoc k state.values then 1525 + failwith (Printf.sprintf "Cannot use value '%s' as a table" k); 1526 + let sub = match Hashtbl.find_opt state.subtables k with 1527 + | Some sub -> sub 1528 + | None -> 1529 + let sub = create_table_state () in 1530 + Hashtbl.add state.subtables k sub; 1531 + sub 1532 + in 1533 + if create_intermediate && not sub.defined then 1534 + sub.defined <- false; (* Mark as implicitly defined *) 1535 + get_or_create_table sub rest create_intermediate 1536 + 1537 + (* Like get_or_create_table but marks tables as defined (for dotted keys) *) 1538 + (* Dotted keys mark tables as "defined" (can't re-define with [table]) but not "closed" *) 1539 + let rec get_or_create_table_for_dotted_key state keys = 1540 + match keys with 1541 + | [] -> state 1542 + | [k] -> 1543 + (* Check if key exists as a value *) 1544 + if List.mem_assoc k state.values then 1545 + failwith (Printf.sprintf "Cannot use value '%s' as a table" k); 1546 + (match Hashtbl.find_opt state.subtables k with 1547 + | Some sub -> 1548 + (* Check if it's an array of tables (can't extend with dotted keys) *) 1549 + if sub.is_array then 1550 + failwith (Printf.sprintf "Cannot extend array of tables '%s' using dotted keys" k); 1551 + (* Check if it's closed (explicitly defined with [table] header) *) 1552 + if sub.closed then 1553 + failwith (Printf.sprintf "Cannot extend table '%s' using dotted keys" k); 1554 + if sub.is_inline then 1555 + failwith (Printf.sprintf "Cannot extend inline table '%s'" k); 1556 + (* Mark as defined by dotted key *) 1557 + sub.defined <- true; 1558 + sub 1559 + | None -> 1560 + let sub = create_table_state () in 1561 + sub.defined <- true; (* Mark as defined by dotted key *) 1562 + Hashtbl.add state.subtables k sub; 1563 + sub) 1564 + | k :: rest -> 1565 + (* Check if key exists as a value *) 1566 + if List.mem_assoc k state.values then 1567 + failwith (Printf.sprintf "Cannot use value '%s' as a table" k); 1568 + let sub = match Hashtbl.find_opt state.subtables k with 1569 + | Some sub -> 1570 + (* Check if it's an array of tables (can't extend with dotted keys) *) 1571 + if sub.is_array then 1572 + failwith (Printf.sprintf "Cannot extend array of tables '%s' using dotted keys" k); 1573 + if sub.closed then 1574 + failwith (Printf.sprintf "Cannot extend table '%s' using dotted keys" k); 1575 + if sub.is_inline then 1576 + failwith (Printf.sprintf "Cannot extend inline table '%s'" k); 1577 + (* Mark as defined by dotted key *) 1578 + sub.defined <- true; 1579 + sub 1580 + | None -> 1581 + let sub = create_table_state () in 1582 + sub.defined <- true; (* Mark as defined by dotted key *) 1583 + Hashtbl.add state.subtables k sub; 1584 + sub 1585 + in 1586 + get_or_create_table_for_dotted_key sub rest 1587 + 1588 + let rec table_state_to_toml state = 1589 + let subtable_values = Hashtbl.fold (fun k sub acc -> 1590 + let v = 1591 + if sub.is_array then 1592 + Array (List.map table_state_to_toml (get_array_elements sub)) 1593 + else 1594 + table_state_to_toml sub 1595 + in 1596 + (k, v) :: acc 1597 + ) state.subtables [] in 1598 + Table (List.rev state.values @ subtable_values) 1599 + 1600 + and get_array_elements state = 1601 + List.rev state.array_elements 1602 + 1603 + (* Main parser function *) 1604 + let parse_toml_from_lexer lexer = 1605 + let parser = make_parser lexer in 1606 + let root = create_table_state () in 1607 + let current_table = ref root in 1608 + (* Stack of array contexts: (full_path, parent_state, array_container) *) 1609 + (* parent_state is where the array lives, array_container is the array table itself *) 1610 + let array_context_stack = ref ([] : (string list * table_state * table_state) list) in 1611 + 1612 + (* Check if keys has a prefix matching the given path *) 1613 + let rec has_prefix keys prefix = 1614 + match keys, prefix with 1615 + | _, [] -> true 1616 + | [], _ -> false 1617 + | k :: krest, p :: prest -> k = p && has_prefix krest prest 1618 + in 1619 + 1620 + (* Remove prefix from keys *) 1621 + let rec remove_prefix keys prefix = 1622 + match keys, prefix with 1623 + | ks, [] -> ks 1624 + | [], _ -> [] 1625 + | _ :: krest, _ :: prest -> remove_prefix krest prest 1626 + in 1627 + 1628 + (* Find matching array context for the given keys *) 1629 + let find_array_context keys = 1630 + (* Stack is newest-first, so first match is the innermost (longest) prefix *) 1631 + let rec find stack = 1632 + match stack with 1633 + | [] -> None 1634 + | (path, parent, container) :: rest -> 1635 + if keys = path then 1636 + (* Exact match - adding sibling element *) 1637 + Some (`Sibling (path, parent, container)) 1638 + else if has_prefix keys path && List.length keys > List.length path then 1639 + (* Proper prefix - nested table/array within current element *) 1640 + let current_entry = List.hd container.array_elements in 1641 + Some (`Nested (path, current_entry)) 1642 + else 1643 + find rest 1644 + in 1645 + find !array_context_stack 1646 + in 1647 + 1648 + (* Pop array contexts that are no longer valid for the given keys *) 1649 + let rec pop_invalid_contexts keys = 1650 + match !array_context_stack with 1651 + | [] -> () 1652 + | (path, _, _) :: rest -> 1653 + if not (has_prefix keys path) then begin 1654 + array_context_stack := rest; 1655 + pop_invalid_contexts keys 1656 + end 1657 + in 1658 + 1659 + let rec parse_document () = 1660 + skip_newlines parser; 1661 + match peek_token parser with 1662 + | Tok_eof -> () 1663 + | Tok_lbracket -> 1664 + (* Check for array of tables [[...]] vs table [...] *) 1665 + ignore (consume_token parser); 1666 + (* For [[, the two brackets must be adjacent (no whitespace) *) 1667 + let is_adjacent_bracket = next_raw_char_is parser '[' in 1668 + (match peek_token parser with 1669 + | Tok_lbracket when not is_adjacent_bracket -> 1670 + (* The next [ was found after whitespace - this is invalid syntax like [ [table]] *) 1671 + failwith "Invalid table header syntax" 1672 + | Tok_lbracket -> 1673 + (* Array of tables - brackets are adjacent *) 1674 + ignore (consume_token parser); 1675 + let keys = parse_dotted_key parser in 1676 + expect_token parser Tok_rbracket; 1677 + (* Check that closing ]] are adjacent (no whitespace) *) 1678 + if not (next_raw_char_is parser ']') then 1679 + failwith "Invalid array of tables syntax (space in ]])"; 1680 + expect_token parser Tok_rbracket; 1681 + skip_to_newline parser; 1682 + (* Pop contexts that are no longer valid for these keys *) 1683 + pop_invalid_contexts keys; 1684 + (* Check array context for this path *) 1685 + (match find_array_context keys with 1686 + | Some (`Sibling (path, _parent, container)) -> 1687 + (* Adding another element to an existing array *) 1688 + let new_entry = create_table_state () in 1689 + container.array_elements <- new_entry :: container.array_elements; 1690 + current_table := new_entry; 1691 + (* Update the stack entry with new current element (by re-adding) *) 1692 + array_context_stack := List.map (fun (p, par, cont) -> 1693 + if p = path then (p, par, cont) else (p, par, cont) 1694 + ) !array_context_stack 1695 + | Some (`Nested (parent_path, parent_entry)) -> 1696 + (* Sub-array within current array element *) 1697 + let relative_keys = remove_prefix keys parent_path in 1698 + let array_table = get_or_create_table parent_entry relative_keys true in 1699 + (* Check if trying to convert a non-array table to array *) 1700 + if array_table.defined && not array_table.is_array then 1701 + failwith (Printf.sprintf "Cannot define '%s' as array of tables; already defined as table" (String.concat "." keys)); 1702 + if (array_table.values <> [] || Hashtbl.length array_table.subtables > 0) && not array_table.is_array then 1703 + failwith (Printf.sprintf "Cannot define '%s' as array of tables; already has content" (String.concat "." keys)); 1704 + array_table.is_array <- true; 1705 + let new_entry = create_table_state () in 1706 + array_table.array_elements <- new_entry :: array_table.array_elements; 1707 + current_table := new_entry; 1708 + (* Push new context for the nested array *) 1709 + array_context_stack := (keys, parent_entry, array_table) :: !array_context_stack 1710 + | None -> 1711 + (* Top-level array *) 1712 + let array_table = get_or_create_table root keys true in 1713 + (* Check if trying to convert a non-array table to array *) 1714 + if array_table.defined && not array_table.is_array then 1715 + failwith (Printf.sprintf "Cannot define '%s' as array of tables; already defined as table" (String.concat "." keys)); 1716 + if (array_table.values <> [] || Hashtbl.length array_table.subtables > 0) && not array_table.is_array then 1717 + failwith (Printf.sprintf "Cannot define '%s' as array of tables; already has content" (String.concat "." keys)); 1718 + array_table.is_array <- true; 1719 + let entry = create_table_state () in 1720 + array_table.array_elements <- entry :: array_table.array_elements; 1721 + current_table := entry; 1722 + (* Push context for this array *) 1723 + array_context_stack := (keys, root, array_table) :: !array_context_stack); 1724 + parse_document () 1725 + | _ -> 1726 + (* Regular table *) 1727 + let keys = parse_dotted_key parser in 1728 + expect_token parser Tok_rbracket; 1729 + skip_to_newline parser; 1730 + (* Pop contexts that are no longer valid for these keys *) 1731 + pop_invalid_contexts keys; 1732 + (* Check if this table is relative to a current array element *) 1733 + (match find_array_context keys with 1734 + | Some (`Nested (parent_path, parent_entry)) -> 1735 + let relative_keys = remove_prefix keys parent_path in 1736 + if relative_keys <> [] then begin 1737 + let table = get_or_create_table parent_entry relative_keys true in 1738 + if table.is_array then 1739 + failwith (Printf.sprintf "Cannot define '%s' as table; already defined as array of tables" (String.concat "." keys)); 1740 + if table.defined then 1741 + failwith (Printf.sprintf "Table '%s' already defined" (String.concat "." keys)); 1742 + table.defined <- true; 1743 + table.closed <- true; (* Can't extend via dotted keys from parent *) 1744 + current_table := table 1745 + end else begin 1746 + (* Keys equal parent_path - shouldn't happen for regular tables *) 1747 + let table = get_or_create_table root keys true in 1748 + if table.is_array then 1749 + failwith (Printf.sprintf "Cannot define '%s' as table; already defined as array of tables" (String.concat "." keys)); 1750 + if table.defined then 1751 + failwith (Printf.sprintf "Table '%s' already defined" (String.concat "." keys)); 1752 + table.defined <- true; 1753 + table.closed <- true; (* Can't extend via dotted keys from parent *) 1754 + current_table := table 1755 + end 1756 + | Some (`Sibling (_, _, container)) -> 1757 + (* Exact match to an array of tables path - can't define as regular table *) 1758 + if container.is_array then 1759 + failwith (Printf.sprintf "Cannot define '%s' as table; already defined as array of tables" (String.concat "." keys)); 1760 + (* Shouldn't reach here normally *) 1761 + let table = get_or_create_table root keys true in 1762 + if table.defined then 1763 + failwith (Printf.sprintf "Table '%s' already defined" (String.concat "." keys)); 1764 + table.defined <- true; 1765 + table.closed <- true; 1766 + current_table := table 1767 + | None -> 1768 + (* Not in an array context *) 1769 + let table = get_or_create_table root keys true in 1770 + if table.is_array then 1771 + failwith (Printf.sprintf "Cannot define '%s' as table; already defined as array of tables" (String.concat "." keys)); 1772 + if table.defined then 1773 + failwith (Printf.sprintf "Table '%s' already defined" (String.concat "." keys)); 1774 + table.defined <- true; 1775 + table.closed <- true; (* Can't extend via dotted keys from parent *) 1776 + current_table := table; 1777 + (* Clear array context stack if we left all array contexts *) 1778 + if not (List.exists (fun (p, _, _) -> has_prefix keys p) !array_context_stack) then 1779 + array_context_stack := []); 1780 + parse_document ()) 1781 + | Tok_bare_key _ | Tok_basic_string _ | Tok_literal_string _ 1782 + | Tok_integer _ | Tok_float _ | Tok_date_local _ | Tok_datetime _ 1783 + | Tok_datetime_local _ | Tok_time_local _ -> 1784 + (* Key-value pair - key can be bare, quoted, or numeric *) 1785 + let keys = parse_dotted_key parser in 1786 + expect_token parser Tok_equals; 1787 + let value = parse_value parser in 1788 + skip_to_newline parser; 1789 + (* Add value to current table - check for duplicates first *) 1790 + let add_value_to_table tbl key v = 1791 + if List.mem_assoc key tbl.values then 1792 + failwith (Printf.sprintf "Duplicate key: %s" key); 1793 + (match Hashtbl.find_opt tbl.subtables key with 1794 + | Some sub -> 1795 + if sub.is_array then 1796 + failwith (Printf.sprintf "Cannot redefine array of tables '%s' as a value" key) 1797 + else 1798 + failwith (Printf.sprintf "Cannot redefine table '%s' as a value" key) 1799 + | None -> ()); 1800 + tbl.values <- (key, v) :: tbl.values 1801 + in 1802 + (match keys with 1803 + | [] -> failwith "Empty key" 1804 + | [k] -> 1805 + add_value_to_table !current_table k value 1806 + | _ -> 1807 + let parent_keys = List.rev (List.tl (List.rev keys)) in 1808 + let final_key = List.hd (List.rev keys) in 1809 + (* Use get_or_create_table_for_dotted_key to check for closed tables *) 1810 + let parent = get_or_create_table_for_dotted_key !current_table parent_keys in 1811 + add_value_to_table parent final_key value); 1812 + parse_document () 1813 + | _tok -> 1814 + failwith (Printf.sprintf "Unexpected token at line %d" parser.lexer.line) 1815 + 1816 + and skip_to_newline parser = 1817 + skip_ws_and_comments parser.lexer; 1818 + match peek_token parser with 1819 + | Tok_newline -> ignore (consume_token parser) 1820 + | Tok_eof -> () 1821 + | _ -> failwith "Expected newline after value" 1822 + in 1823 + 1824 + parse_document (); 1825 + table_state_to_toml root 1826 + 1827 + (* Parse TOML from string - creates lexer internally *) 1828 + let parse_toml input = 1829 + let lexer = make_lexer input in 1830 + parse_toml_from_lexer lexer 1831 + 1832 + (* Parse TOML directly from Bytes.Reader - no intermediate string *) 1833 + let parse_toml_from_reader ?file r = 1834 + let lexer = make_lexer_from_reader ?file r in 1835 + parse_toml_from_lexer lexer 1836 + 1837 + (* Convert TOML to tagged JSON for toml-test compatibility *) 1838 + let rec toml_to_tagged_json value = 1839 + match value with 1840 + | String s -> 1841 + Printf.sprintf "{\"type\":\"string\",\"value\":%s}" (json_encode_string s) 1842 + | Int i -> 1843 + Printf.sprintf "{\"type\":\"integer\",\"value\":\"%Ld\"}" i 1844 + | Float f -> 1845 + let value_str = 1846 + (* Normalize exponent format - lowercase e, keep + for positive exponents *) 1847 + let format_exp s = 1848 + let buf = Buffer.create (String.length s + 1) in 1849 + let i = ref 0 in 1850 + while !i < String.length s do 1851 + let c = s.[!i] in 1852 + if c = 'E' then begin 1853 + Buffer.add_char buf 'e'; 1854 + (* Add + if next char is a digit (no sign present) *) 1855 + if !i + 1 < String.length s then begin 1856 + let next = s.[!i + 1] in 1857 + if next >= '0' && next <= '9' then 1858 + Buffer.add_char buf '+' 1859 + end 1860 + end else if c = 'e' then begin 1861 + Buffer.add_char buf 'e'; 1862 + (* Add + if next char is a digit (no sign present) *) 1863 + if !i + 1 < String.length s then begin 1864 + let next = s.[!i + 1] in 1865 + if next >= '0' && next <= '9' then 1866 + Buffer.add_char buf '+' 1867 + end 1868 + end else 1869 + Buffer.add_char buf c; 1870 + incr i 1871 + done; 1872 + Buffer.contents buf 1873 + in 1874 + if Float.is_nan f then "nan" 1875 + else if f = Float.infinity then "inf" 1876 + else if f = Float.neg_infinity then "-inf" 1877 + else if f = 0.0 then 1878 + (* Special case for zero - output "0" or "-0" *) 1879 + if 1.0 /. f = Float.neg_infinity then "-0" else "0" 1880 + else if Float.is_integer f then 1881 + (* Integer floats - decide on representation *) 1882 + let abs_f = Float.abs f in 1883 + if abs_f = 9007199254740991.0 then 1884 + (* Exact max safe integer - output without .0 per toml-test expectation *) 1885 + Printf.sprintf "%.0f" f 1886 + else if abs_f >= 1e6 then 1887 + (* Use scientific notation for numbers >= 1e6 *) 1888 + (* Start with precision 0 to get XeN format (integer mantissa) *) 1889 + let rec try_exp_precision prec = 1890 + if prec > 17 then format_exp (Printf.sprintf "%.17e" f) 1891 + else 1892 + let s = format_exp (Printf.sprintf "%.*e" prec f) in 1893 + if float_of_string s = f then s 1894 + else try_exp_precision (prec + 1) 1895 + in 1896 + try_exp_precision 0 1897 + else if abs_f >= 2.0 then 1898 + (* Integer floats >= 2 - output with .0 suffix *) 1899 + Printf.sprintf "%.1f" f 1900 + else 1901 + (* Integer floats 0, 1, -1 - output without .0 suffix *) 1902 + Printf.sprintf "%.0f" f 1903 + else 1904 + (* Non-integer float *) 1905 + let abs_f = Float.abs f in 1906 + let use_scientific = abs_f >= 1e10 || (abs_f < 1e-4 && abs_f > 0.0) in 1907 + if use_scientific then 1908 + let rec try_exp_precision prec = 1909 + if prec > 17 then format_exp (Printf.sprintf "%.17e" f) 1910 + else 1911 + let s = format_exp (Printf.sprintf "%.*e" prec f) in 1912 + if float_of_string s = f then s 1913 + else try_exp_precision (prec + 1) 1914 + in 1915 + try_exp_precision 1 1916 + else 1917 + (* Prefer decimal notation for reasonable range *) 1918 + (* Try shortest decimal first *) 1919 + let rec try_decimal_precision prec = 1920 + if prec > 17 then None 1921 + else 1922 + let s = Printf.sprintf "%.*f" prec f in 1923 + (* Remove trailing zeros but keep at least one decimal place *) 1924 + let s = 1925 + let len = String.length s in 1926 + let dot_pos = try String.index s '.' with Not_found -> len in 1927 + let rec find_last_nonzero i = 1928 + if i <= dot_pos then dot_pos + 2 (* Keep at least X.0 *) 1929 + else if s.[i] <> '0' then i + 1 1930 + else find_last_nonzero (i - 1) 1931 + in 1932 + let end_pos = min len (find_last_nonzero (len - 1)) in 1933 + String.sub s 0 end_pos 1934 + in 1935 + (* Ensure there's a decimal point with at least one digit after *) 1936 + let s = 1937 + if not (String.contains s '.') then s ^ ".0" 1938 + else if s.[String.length s - 1] = '.' then s ^ "0" 1939 + else s 1940 + in 1941 + if float_of_string s = f then Some s 1942 + else try_decimal_precision (prec + 1) 1943 + in 1944 + let decimal = try_decimal_precision 1 in 1945 + (* Always prefer decimal notation if it works *) 1946 + match decimal with 1947 + | Some d -> d 1948 + | None -> 1949 + (* Fall back to shortest representation *) 1950 + let rec try_precision prec = 1951 + if prec > 17 then Printf.sprintf "%.17g" f 1952 + else 1953 + let s = Printf.sprintf "%.*g" prec f in 1954 + if float_of_string s = f then s 1955 + else try_precision (prec + 1) 1956 + in 1957 + try_precision 1 1958 + in 1959 + Printf.sprintf "{\"type\":\"float\",\"value\":\"%s\"}" value_str 1960 + | Bool b -> 1961 + Printf.sprintf "{\"type\":\"bool\",\"value\":\"%s\"}" (if b then "true" else "false") 1962 + | Datetime s -> 1963 + validate_datetime_string s; 1964 + Printf.sprintf "{\"type\":\"datetime\",\"value\":\"%s\"}" s 1965 + | Datetime_local s -> 1966 + validate_datetime_string s; 1967 + Printf.sprintf "{\"type\":\"datetime-local\",\"value\":\"%s\"}" s 1968 + | Date_local s -> 1969 + validate_date_string s; 1970 + Printf.sprintf "{\"type\":\"date-local\",\"value\":\"%s\"}" s 1971 + | Time_local s -> 1972 + validate_time_string s; 1973 + Printf.sprintf "{\"type\":\"time-local\",\"value\":\"%s\"}" s 1974 + | Array items -> 1975 + let json_items = List.map toml_to_tagged_json items in 1976 + Printf.sprintf "[%s]" (String.concat "," json_items) 1977 + | Table pairs -> 1978 + let json_pairs = List.map (fun (k, v) -> 1979 + Printf.sprintf "%s:%s" (json_encode_string k) (toml_to_tagged_json v) 1980 + ) pairs in 1981 + Printf.sprintf "{%s}" (String.concat "," json_pairs) 1982 + 1983 + and json_encode_string s = 1984 + let buf = Buffer.create (String.length s + 2) in 1985 + Buffer.add_char buf '"'; 1986 + String.iter (fun c -> 1987 + match c with 1988 + | '"' -> Buffer.add_string buf "\\\"" 1989 + | '\\' -> Buffer.add_string buf "\\\\" 1990 + | '\n' -> Buffer.add_string buf "\\n" 1991 + | '\r' -> Buffer.add_string buf "\\r" 1992 + | '\t' -> Buffer.add_string buf "\\t" 1993 + | '\b' -> Buffer.add_string buf "\\b" (* backspace *) 1994 + | c when Char.code c = 0x0C -> Buffer.add_string buf "\\f" (* formfeed *) 1995 + | c when Char.code c < 0x20 -> 1996 + Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c)) 1997 + | c -> Buffer.add_char buf c 1998 + ) s; 1999 + Buffer.add_char buf '"'; 2000 + Buffer.contents buf 2001 + 2002 + (* Tagged JSON to TOML for encoder *) 2003 + let decode_tagged_json_string s = 2004 + (* Simple JSON parser for tagged format *) 2005 + let pos = ref 0 in 2006 + let len = String.length s in 2007 + 2008 + let skip_ws () = 2009 + while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t' || s.[!pos] = '\n' || s.[!pos] = '\r') do 2010 + incr pos 2011 + done 2012 + in 2013 + 2014 + let expect c = 2015 + skip_ws (); 2016 + if !pos >= len || s.[!pos] <> c then 2017 + failwith (Printf.sprintf "Expected '%c' at position %d" c !pos); 2018 + incr pos 2019 + in 2020 + 2021 + let peek () = 2022 + skip_ws (); 2023 + if !pos >= len then None else Some s.[!pos] 2024 + in 2025 + 2026 + let parse_json_string () = 2027 + skip_ws (); 2028 + expect '"'; 2029 + let buf = Buffer.create 64 in 2030 + while !pos < len && s.[!pos] <> '"' do 2031 + if s.[!pos] = '\\' then begin 2032 + incr pos; 2033 + if !pos >= len then failwith "Unexpected end in string escape"; 2034 + match s.[!pos] with 2035 + | '"' -> Buffer.add_char buf '"'; incr pos 2036 + | '\\' -> Buffer.add_char buf '\\'; incr pos 2037 + | '/' -> Buffer.add_char buf '/'; incr pos 2038 + | 'n' -> Buffer.add_char buf '\n'; incr pos 2039 + | 'r' -> Buffer.add_char buf '\r'; incr pos 2040 + | 't' -> Buffer.add_char buf '\t'; incr pos 2041 + | 'b' -> Buffer.add_char buf '\b'; incr pos 2042 + | 'f' -> Buffer.add_char buf (Char.chr 0x0C); incr pos 2043 + | 'u' -> 2044 + incr pos; 2045 + if !pos + 3 >= len then failwith "Invalid unicode escape"; 2046 + let hex = String.sub s !pos 4 in 2047 + let cp = int_of_string ("0x" ^ hex) in 2048 + Buffer.add_string buf (codepoint_to_utf8 cp); 2049 + pos := !pos + 4 2050 + | c -> failwith (Printf.sprintf "Invalid escape: \\%c" c) 2051 + end else begin 2052 + Buffer.add_char buf s.[!pos]; 2053 + incr pos 2054 + end 2055 + done; 2056 + expect '"'; 2057 + Buffer.contents buf 2058 + in 2059 + 2060 + (* Convert a tagged JSON object to a TOML primitive if applicable *) 2061 + let convert_tagged_value value = 2062 + match value with 2063 + | Table [("type", String typ); ("value", String v)] 2064 + | Table [("value", String v); ("type", String typ)] -> 2065 + (match typ with 2066 + | "string" -> String v 2067 + | "integer" -> Int (Int64.of_string v) 2068 + | "float" -> 2069 + (match v with 2070 + | "inf" -> Float Float.infinity 2071 + | "-inf" -> Float Float.neg_infinity 2072 + | "nan" -> Float Float.nan 2073 + | _ -> Float (float_of_string v)) 2074 + | "bool" -> Bool (v = "true") 2075 + | "datetime" -> Datetime v 2076 + | "datetime-local" -> Datetime_local v 2077 + | "date-local" -> Date_local v 2078 + | "time-local" -> Time_local v 2079 + | _ -> failwith (Printf.sprintf "Unknown type: %s" typ)) 2080 + | _ -> value 2081 + in 2082 + 2083 + let rec parse_value () = 2084 + skip_ws (); 2085 + match peek () with 2086 + | Some '{' -> parse_object () 2087 + | Some '[' -> parse_array () 2088 + | Some '"' -> String (parse_json_string ()) 2089 + | _ -> failwith "Expected value" 2090 + 2091 + and parse_object () = 2092 + expect '{'; 2093 + skip_ws (); 2094 + if peek () = Some '}' then begin 2095 + incr pos; 2096 + Table [] 2097 + end else begin 2098 + let pairs = ref [] in 2099 + let first = ref true in 2100 + while peek () <> Some '}' do 2101 + if not !first then expect ','; 2102 + first := false; 2103 + skip_ws (); 2104 + let key = parse_json_string () in 2105 + expect ':'; 2106 + let value = parse_value () in 2107 + pairs := (key, convert_tagged_value value) :: !pairs 2108 + done; 2109 + expect '}'; 2110 + Table (List.rev !pairs) 2111 + end 2112 + 2113 + and parse_array () = 2114 + expect '['; 2115 + skip_ws (); 2116 + if peek () = Some ']' then begin 2117 + incr pos; 2118 + Array [] 2119 + end else begin 2120 + let items = ref [] in 2121 + let first = ref true in 2122 + while peek () <> Some ']' do 2123 + if not !first then expect ','; 2124 + first := false; 2125 + items := convert_tagged_value (parse_value ()) :: !items 2126 + done; 2127 + expect ']'; 2128 + Array (List.rev !items) 2129 + end 2130 + in 2131 + 2132 + parse_value () 2133 + 2134 + (* Streaming TOML encoder - writes directly to a Bytes.Writer *) 2135 + 2136 + let rec write_toml_string w s = 2137 + (* Check if we need to escape *) 2138 + let needs_escape = String.exists (fun c -> 2139 + let code = Char.code c in 2140 + c = '"' || c = '\\' || c = '\n' || c = '\r' || c = '\t' || 2141 + code < 0x20 || code = 0x7F 2142 + ) s in 2143 + if needs_escape then begin 2144 + Bytes.Writer.write_string w "\""; 2145 + String.iter (fun c -> 2146 + match c with 2147 + | '"' -> Bytes.Writer.write_string w "\\\"" 2148 + | '\\' -> Bytes.Writer.write_string w "\\\\" 2149 + | '\n' -> Bytes.Writer.write_string w "\\n" 2150 + | '\r' -> Bytes.Writer.write_string w "\\r" 2151 + | '\t' -> Bytes.Writer.write_string w "\\t" 2152 + | '\b' -> Bytes.Writer.write_string w "\\b" 2153 + | c when Char.code c = 0x0C -> Bytes.Writer.write_string w "\\f" 2154 + | c when Char.code c < 0x20 || Char.code c = 0x7F -> 2155 + Bytes.Writer.write_string w (Printf.sprintf "\\u%04X" (Char.code c)) 2156 + | c -> 2157 + let b = Bytes.create 1 in 2158 + Bytes.set b 0 c; 2159 + Bytes.Writer.write_bytes w b 2160 + ) s; 2161 + Bytes.Writer.write_string w "\"" 2162 + end else begin 2163 + Bytes.Writer.write_string w "\""; 2164 + Bytes.Writer.write_string w s; 2165 + Bytes.Writer.write_string w "\"" 2166 + end 2167 + 2168 + and write_toml_key w k = 2169 + (* Check if it can be a bare key *) 2170 + let is_bare = String.length k > 0 && String.for_all is_bare_key_char k in 2171 + if is_bare then Bytes.Writer.write_string w k 2172 + else write_toml_string w k 2173 + 2174 + and write_toml_value w ?(inline=false) value = 2175 + match value with 2176 + | String s -> write_toml_string w s 2177 + | Int i -> Bytes.Writer.write_string w (Int64.to_string i) 2178 + | Float f -> 2179 + if Float.is_nan f then Bytes.Writer.write_string w "nan" 2180 + else if f = Float.infinity then Bytes.Writer.write_string w "inf" 2181 + else if f = Float.neg_infinity then Bytes.Writer.write_string w "-inf" 2182 + else begin 2183 + let s = Printf.sprintf "%.17g" f in 2184 + (* Ensure it looks like a float *) 2185 + let s = if String.contains s '.' || String.contains s 'e' || String.contains s 'E' 2186 + then s else s ^ ".0" in 2187 + Bytes.Writer.write_string w s 2188 + end 2189 + | Bool b -> Bytes.Writer.write_string w (if b then "true" else "false") 2190 + | Datetime s -> Bytes.Writer.write_string w s 2191 + | Datetime_local s -> Bytes.Writer.write_string w s 2192 + | Date_local s -> Bytes.Writer.write_string w s 2193 + | Time_local s -> Bytes.Writer.write_string w s 2194 + | Array items -> 2195 + Bytes.Writer.write_string w "["; 2196 + List.iteri (fun i item -> 2197 + if i > 0 then Bytes.Writer.write_string w ", "; 2198 + write_toml_value w ~inline:true item 2199 + ) items; 2200 + Bytes.Writer.write_string w "]" 2201 + | Table pairs when inline -> 2202 + Bytes.Writer.write_string w "{"; 2203 + List.iteri (fun i (k, v) -> 2204 + if i > 0 then Bytes.Writer.write_string w ", "; 2205 + write_toml_key w k; 2206 + Bytes.Writer.write_string w " = "; 2207 + write_toml_value w ~inline:true v 2208 + ) pairs; 2209 + Bytes.Writer.write_string w "}" 2210 + | Table _ -> failwith "Cannot encode table inline without inline flag" 2211 + 2212 + (* True streaming TOML encoder - writes directly to Bytes.Writer *) 2213 + let encode_to_writer w value = 2214 + let has_content = ref false in 2215 + 2216 + let write_path path = 2217 + List.iteri (fun i k -> 2218 + if i > 0 then Bytes.Writer.write_string w "."; 2219 + write_toml_key w k 2220 + ) path 2221 + in 2222 + 2223 + let rec encode_at_path path value = 2224 + match value with 2225 + | Table pairs -> 2226 + (* Separate simple values from nested tables *) 2227 + (* Only PURE table arrays (all items are tables) use [[array]] syntax. 2228 + Mixed arrays (primitives + tables) must be encoded inline. *) 2229 + let is_pure_table_array items = 2230 + items <> [] && List.for_all (function Table _ -> true | _ -> false) items 2231 + in 2232 + let simple, nested = List.partition (fun (_, v) -> 2233 + match v with 2234 + | Table _ -> false 2235 + | Array items -> not (is_pure_table_array items) 2236 + | _ -> true 2237 + ) pairs in 2238 + 2239 + (* Emit simple values first *) 2240 + List.iter (fun (k, v) -> 2241 + write_toml_key w k; 2242 + Bytes.Writer.write_string w " = "; 2243 + write_toml_value w ~inline:true v; 2244 + Bytes.Writer.write_string w "\n"; 2245 + has_content := true 2246 + ) simple; 2247 + 2248 + (* Then nested tables *) 2249 + List.iter (fun (k, v) -> 2250 + let new_path = path @ [k] in 2251 + match v with 2252 + | Table _ -> 2253 + if !has_content then Bytes.Writer.write_string w "\n"; 2254 + Bytes.Writer.write_string w "["; 2255 + write_path new_path; 2256 + Bytes.Writer.write_string w "]\n"; 2257 + has_content := true; 2258 + encode_at_path new_path v 2259 + | Array items when items <> [] && List.for_all (function Table _ -> true | _ -> false) items -> 2260 + (* Pure table array - use [[array]] syntax *) 2261 + List.iter (fun item -> 2262 + match item with 2263 + | Table _ -> 2264 + if !has_content then Bytes.Writer.write_string w "\n"; 2265 + Bytes.Writer.write_string w "[["; 2266 + write_path new_path; 2267 + Bytes.Writer.write_string w "]]\n"; 2268 + has_content := true; 2269 + encode_at_path new_path item 2270 + | _ -> assert false (* Impossible - we checked for_all above *) 2271 + ) items 2272 + | _ -> 2273 + write_toml_key w k; 2274 + Bytes.Writer.write_string w " = "; 2275 + write_toml_value w ~inline:true v; 2276 + Bytes.Writer.write_string w "\n"; 2277 + has_content := true 2278 + ) nested 2279 + | _ -> 2280 + failwith "Top-level TOML must be a table" 2281 + in 2282 + 2283 + encode_at_path [] value 2284 + 2285 + (* ============================================ 2286 + Public Interface - Constructors 2287 + ============================================ *) 2288 + 2289 + let string s = String s 2290 + let int i = Int i 2291 + let int_of_int i = Int (Int64.of_int i) 2292 + let float f = Float f 2293 + let bool b = Bool b 2294 + let array vs = Array vs 2295 + let table pairs = Table pairs 2296 + let datetime s = Datetime s 2297 + let datetime_local s = Datetime_local s 2298 + let date_local s = Date_local s 2299 + let time_local s = Time_local s 2300 + 2301 + (* ============================================ 2302 + Public Interface - Accessors 2303 + ============================================ *) 2304 + 2305 + let to_string = function 2306 + | String s -> s 2307 + | _ -> invalid_arg "Tomlt.to_string: not a string" 2308 + 2309 + let to_string_opt = function 2310 + | String s -> Some s 2311 + | _ -> None 2312 + 2313 + let to_int = function 2314 + | Int i -> i 2315 + | _ -> invalid_arg "Tomlt.to_int: not an integer" 2316 + 2317 + let to_int_opt = function 2318 + | Int i -> Some i 2319 + | _ -> None 2320 + 2321 + let to_float = function 2322 + | Float f -> f 2323 + | _ -> invalid_arg "Tomlt.to_float: not a float" 2324 + 2325 + let to_float_opt = function 2326 + | Float f -> Some f 2327 + | _ -> None 2328 + 2329 + let to_bool = function 2330 + | Bool b -> b 2331 + | _ -> invalid_arg "Tomlt.to_bool: not a boolean" 2332 + 2333 + let to_bool_opt = function 2334 + | Bool b -> Some b 2335 + | _ -> None 2336 + 2337 + let to_array = function 2338 + | Array vs -> vs 2339 + | _ -> invalid_arg "Tomlt.to_array: not an array" 2340 + 2341 + let to_array_opt = function 2342 + | Array vs -> Some vs 2343 + | _ -> None 2344 + 2345 + let to_table = function 2346 + | Table pairs -> pairs 2347 + | _ -> invalid_arg "Tomlt.to_table: not a table" 2348 + 2349 + let to_table_opt = function 2350 + | Table pairs -> Some pairs 2351 + | _ -> None 2352 + 2353 + let to_datetime = function 2354 + | Datetime s | Datetime_local s | Date_local s | Time_local s -> s 2355 + | _ -> invalid_arg "Tomlt.to_datetime: not a datetime" 2356 + 2357 + let to_datetime_opt = function 2358 + | Datetime s | Datetime_local s | Date_local s | Time_local s -> Some s 2359 + | _ -> None 2360 + 2361 + (* ============================================ 2362 + Public Interface - Type Predicates 2363 + ============================================ *) 2364 + 2365 + let is_string = function String _ -> true | _ -> false 2366 + let is_int = function Int _ -> true | _ -> false 2367 + let is_float = function Float _ -> true | _ -> false 2368 + let is_bool = function Bool _ -> true | _ -> false 2369 + let is_array = function Array _ -> true | _ -> false 2370 + let is_table = function Table _ -> true | _ -> false 2371 + let is_datetime = function 2372 + | Datetime _ | Datetime_local _ | Date_local _ | Time_local _ -> true 2373 + | _ -> false 2374 + 2375 + (* ============================================ 2376 + Public Interface - Table Navigation 2377 + ============================================ *) 2378 + 2379 + let find key = function 2380 + | Table pairs -> List.assoc key pairs 2381 + | _ -> invalid_arg "Tomlt.find: not a table" 2382 + 2383 + let find_opt key = function 2384 + | Table pairs -> List.assoc_opt key pairs 2385 + | _ -> None 2386 + 2387 + let mem key = function 2388 + | Table pairs -> List.mem_assoc key pairs 2389 + | _ -> false 2390 + 2391 + let keys = function 2392 + | Table pairs -> List.map fst pairs 2393 + | _ -> invalid_arg "Tomlt.keys: not a table" 2394 + 2395 + let rec get path t = 2396 + match path with 2397 + | [] -> t 2398 + | key :: rest -> 2399 + match t with 2400 + | Table pairs -> 2401 + (match List.assoc_opt key pairs with 2402 + | Some v -> get rest v 2403 + | None -> raise Not_found) 2404 + | _ -> invalid_arg "Tomlt.get: intermediate value is not a table" 2405 + 2406 + let get_opt path t = 2407 + try Some (get path t) with Not_found | Invalid_argument _ -> None 2408 + 2409 + let ( .%{} ) t path = get path t 2410 + 2411 + let rec set_at_path path v t = 2412 + match path with 2413 + | [] -> v 2414 + | [key] -> 2415 + (match t with 2416 + | Table pairs -> 2417 + let pairs' = List.filter (fun (k, _) -> k <> key) pairs in 2418 + Table ((key, v) :: pairs') 2419 + | _ -> invalid_arg "Tomlt.(.%{}<-): not a table") 2420 + | key :: rest -> 2421 + match t with 2422 + | Table pairs -> 2423 + let existing = List.assoc_opt key pairs in 2424 + let subtable = match existing with 2425 + | Some (Table _ as sub) -> sub 2426 + | Some _ -> invalid_arg "Tomlt.(.%{}<-): intermediate value is not a table" 2427 + | None -> Table [] 2428 + in 2429 + let updated = set_at_path rest v subtable in 2430 + let pairs' = List.filter (fun (k, _) -> k <> key) pairs in 2431 + Table ((key, updated) :: pairs') 2432 + | _ -> invalid_arg "Tomlt.(.%{}<-): not a table" 2433 + 2434 + let ( .%{}<- ) t path v = set_at_path path v t 2435 + 2436 + (* ============================================ 2437 + Public Interface - Encoding 2438 + ============================================ *) 2439 + 2440 + let to_buffer buf value = 2441 + let w = Bytes.Writer.of_buffer buf in 2442 + encode_to_writer w value 2443 + 2444 + let to_toml_string value = 2445 + let buf = Buffer.create 256 in 2446 + to_buffer buf value; 2447 + Buffer.contents buf 2448 + 2449 + let to_writer = encode_to_writer 2450 + 2451 + (* ============================================ 2452 + Public Interface - Decoding 2453 + ============================================ *) 2454 + 2455 + let of_string input = 2456 + try 2457 + Ok (parse_toml input) 2458 + with 2459 + | Failure msg -> Error (Toml_error.make (Toml_error.Syntax (Toml_error.Expected msg))) 2460 + | Toml_error.Error e -> Error e 2461 + | e -> Error (Toml_error.make (Toml_error.Syntax (Toml_error.Expected (Printexc.to_string e)))) 2462 + 2463 + let of_reader ?file r = 2464 + try 2465 + Ok (parse_toml_from_reader ?file r) 2466 + with 2467 + | Failure msg -> Error (Toml_error.make (Toml_error.Syntax (Toml_error.Expected msg))) 2468 + | Toml_error.Error e -> Error e 2469 + | e -> Error (Toml_error.make (Toml_error.Syntax (Toml_error.Expected (Printexc.to_string e)))) 2470 + 2471 + let parse = parse_toml 2472 + 2473 + let parse_reader ?file r = parse_toml_from_reader ?file r 2474 + 2475 + (* ============================================ 2476 + Public Interface - Pretty Printing 2477 + ============================================ *) 2478 + 2479 + let rec pp_value fmt = function 2480 + | String s -> 2481 + Format.fprintf fmt "\"%s\"" (String.escaped s) 2482 + | Int i -> 2483 + Format.fprintf fmt "%Ld" i 2484 + | Float f -> 2485 + if Float.is_nan f then Format.fprintf fmt "nan" 2486 + else if f = Float.infinity then Format.fprintf fmt "inf" 2487 + else if f = Float.neg_infinity then Format.fprintf fmt "-inf" 2488 + else Format.fprintf fmt "%g" f 2489 + | Bool b -> 2490 + Format.fprintf fmt "%s" (if b then "true" else "false") 2491 + | Datetime s | Datetime_local s | Date_local s | Time_local s -> 2492 + Format.fprintf fmt "%s" s 2493 + | Array items -> 2494 + Format.fprintf fmt "["; 2495 + List.iteri (fun i item -> 2496 + if i > 0 then Format.fprintf fmt ", "; 2497 + pp_value fmt item 2498 + ) items; 2499 + Format.fprintf fmt "]" 2500 + | Table pairs -> 2501 + Format.fprintf fmt "{"; 2502 + List.iteri (fun i (k, v) -> 2503 + if i > 0 then Format.fprintf fmt ", "; 2504 + Format.fprintf fmt "%s = " k; 2505 + pp_value fmt v 2506 + ) pairs; 2507 + Format.fprintf fmt "}" 2508 + 2509 + let pp fmt t = 2510 + Format.fprintf fmt "%s" (to_toml_string t) 2511 + 2512 + (* ============================================ 2513 + Public Interface - Equality and Comparison 2514 + ============================================ *) 2515 + 2516 + let rec equal a b = 2517 + match a, b with 2518 + | String s1, String s2 -> String.equal s1 s2 2519 + | Int i1, Int i2 -> Int64.equal i1 i2 2520 + | Float f1, Float f2 -> 2521 + (* NaN = NaN for TOML equality *) 2522 + (Float.is_nan f1 && Float.is_nan f2) || Float.equal f1 f2 2523 + | Bool b1, Bool b2 -> Bool.equal b1 b2 2524 + | Datetime s1, Datetime s2 -> String.equal s1 s2 2525 + | Datetime_local s1, Datetime_local s2 -> String.equal s1 s2 2526 + | Date_local s1, Date_local s2 -> String.equal s1 s2 2527 + | Time_local s1, Time_local s2 -> String.equal s1 s2 2528 + | Array vs1, Array vs2 -> 2529 + List.length vs1 = List.length vs2 && 2530 + List.for_all2 equal vs1 vs2 2531 + | Table ps1, Table ps2 -> 2532 + List.length ps1 = List.length ps2 && 2533 + List.for_all2 (fun (k1, v1) (k2, v2) -> 2534 + String.equal k1 k2 && equal v1 v2 2535 + ) ps1 ps2 2536 + | _ -> false 2537 + 2538 + let type_order = function 2539 + | String _ -> 0 2540 + | Int _ -> 1 2541 + | Float _ -> 2 2542 + | Bool _ -> 3 2543 + | Datetime _ -> 4 2544 + | Datetime_local _ -> 5 2545 + | Date_local _ -> 6 2546 + | Time_local _ -> 7 2547 + | Array _ -> 8 2548 + | Table _ -> 9 2549 + 2550 + let rec compare a b = 2551 + let ta, tb = type_order a, type_order b in 2552 + if ta <> tb then Int.compare ta tb 2553 + else match a, b with 2554 + | String s1, String s2 -> String.compare s1 s2 2555 + | Int i1, Int i2 -> Int64.compare i1 i2 2556 + | Float f1, Float f2 -> Float.compare f1 f2 2557 + | Bool b1, Bool b2 -> Bool.compare b1 b2 2558 + | Datetime s1, Datetime s2 -> String.compare s1 s2 2559 + | Datetime_local s1, Datetime_local s2 -> String.compare s1 s2 2560 + | Date_local s1, Date_local s2 -> String.compare s1 s2 2561 + | Time_local s1, Time_local s2 -> String.compare s1 s2 2562 + | Array vs1, Array vs2 -> 2563 + List.compare compare vs1 vs2 2564 + | Table ps1, Table ps2 -> 2565 + List.compare (fun (k1, v1) (k2, v2) -> 2566 + let c = String.compare k1 k2 in 2567 + if c <> 0 then c else compare v1 v2 2568 + ) ps1 ps2 2569 + | _ -> 0 (* Impossible - handled by type_order check *) 2570 + 2571 + (* ============================================ 2572 + Error Module 2573 + ============================================ *) 2574 + 2575 + module Error = Toml_error 2576 + 2577 + (* ============================================ 2578 + Tagged JSON (toml-test interoperability) 2579 + ============================================ *) 2580 + 2581 + module Tagged_json = struct 2582 + let encode = toml_to_tagged_json 2583 + let decode = decode_tagged_json_string 2584 + 2585 + let decode_and_encode_toml json_str = 2586 + try 2587 + let toml = decode_tagged_json_string json_str in 2588 + Ok (to_toml_string toml) 2589 + with 2590 + | Failure msg -> Error msg 2591 + | e -> Error (Printexc.to_string e) 2592 + end
+335
lib/toml.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** TOML 1.1 codec. 7 + 8 + Tomlt provides TOML 1.1 parsing and encoding with efficient streaming 9 + support via {{:https://erratique.ch/software/bytesrw}Bytesrw}. 10 + 11 + {2 Quick Start} 12 + 13 + Parse a TOML string: 14 + {[ 15 + let config = Tomlt.of_string {| 16 + [server] 17 + host = "localhost" 18 + port = 8080 19 + |} in 20 + match config with 21 + | Ok t -> 22 + let host = Tomlt.(t.%{"server"; "host"} |> to_string) in 23 + let port = Tomlt.(t.%{"server"; "port"} |> to_int) in 24 + Printf.printf "Server: %s:%Ld\n" host port 25 + | Error e -> prerr_endline (Tomlt.Error.to_string e) 26 + ]} 27 + 28 + Create and encode TOML: 29 + {[ 30 + let config = Tomlt.(table [ 31 + "title", string "My App"; 32 + "database", table [ 33 + "host", string "localhost"; 34 + "ports", array [int 5432L; int 5433L] 35 + ] 36 + ]) in 37 + print_endline (Tomlt.to_string config) 38 + ]} 39 + 40 + {2 Module Overview} 41 + 42 + - {!section:types} - TOML value representation 43 + - {!section:construct} - Value constructors 44 + - {!section:access} - Value accessors and type conversion 45 + - {!section:navigate} - Table navigation 46 + - {!section:decode} - Parsing from strings and readers 47 + - {!section:encode} - Encoding to strings and writers 48 + - {!module:Error} - Structured error types *) 49 + 50 + open Bytesrw 51 + 52 + (** {1:types TOML Value Types} *) 53 + 54 + (** The type of TOML values. 55 + 56 + TOML supports the following value types: 57 + - Strings (UTF-8 encoded) 58 + - Integers (64-bit signed) 59 + - Floats (IEEE 754 double precision) 60 + - Booleans 61 + - Offset date-times (RFC 3339 with timezone) 62 + - Local date-times (no timezone) 63 + - Local dates 64 + - Local times 65 + - Arrays (heterogeneous in TOML 1.1) 66 + - Tables (string-keyed maps) *) 67 + type t = 68 + | String of string 69 + | Int of int64 70 + | Float of float 71 + | Bool of bool 72 + | Datetime of string (** Offset datetime, e.g. [1979-05-27T07:32:00Z] *) 73 + | Datetime_local of string (** Local datetime, e.g. [1979-05-27T07:32:00] *) 74 + | Date_local of string (** Local date, e.g. [1979-05-27] *) 75 + | Time_local of string (** Local time, e.g. [07:32:00] *) 76 + | Array of t list 77 + | Table of (string * t) list 78 + (** A TOML value. Tables preserve key insertion order. *) 79 + 80 + (** {1:construct Value Constructors} 81 + 82 + These functions create TOML values. Use them to build TOML documents 83 + programmatically. *) 84 + 85 + val string : string -> t 86 + (** [string s] creates a string value. *) 87 + 88 + val int : int64 -> t 89 + (** [int i] creates an integer value. *) 90 + 91 + val int_of_int : int -> t 92 + (** [int_of_int i] creates an integer value from an [int]. *) 93 + 94 + val float : float -> t 95 + (** [float f] creates a float value. *) 96 + 97 + val bool : bool -> t 98 + (** [bool b] creates a boolean value. *) 99 + 100 + val array : t list -> t 101 + (** [array vs] creates an array value from a list of values. 102 + TOML 1.1 allows heterogeneous arrays. *) 103 + 104 + val table : (string * t) list -> t 105 + (** [table pairs] creates a table value from key-value pairs. 106 + Keys should be unique; later bindings shadow earlier ones during lookup. *) 107 + 108 + val datetime : string -> t 109 + (** [datetime s] creates an offset datetime value. 110 + The string should be in RFC 3339 format with timezone, 111 + e.g. ["1979-05-27T07:32:00Z"] or ["1979-05-27T07:32:00-07:00"]. *) 112 + 113 + val datetime_local : string -> t 114 + (** [datetime_local s] creates a local datetime value (no timezone). 115 + E.g. ["1979-05-27T07:32:00"]. *) 116 + 117 + val date_local : string -> t 118 + (** [date_local s] creates a local date value. 119 + E.g. ["1979-05-27"]. *) 120 + 121 + val time_local : string -> t 122 + (** [time_local s] creates a local time value. 123 + E.g. ["07:32:00"] or ["07:32:00.999"]. *) 124 + 125 + (** {1:access Value Accessors} 126 + 127 + These functions extract OCaml values from TOML values. 128 + They raise [Invalid_argument] if the value is not of the expected type. *) 129 + 130 + val to_string : t -> string 131 + (** [to_string t] returns the string if [t] is a [String]. 132 + @raise Invalid_argument if [t] is not a string. *) 133 + 134 + val to_string_opt : t -> string option 135 + (** [to_string_opt t] returns [Some s] if [t] is [String s], [None] otherwise. *) 136 + 137 + val to_int : t -> int64 138 + (** [to_int t] returns the integer if [t] is an [Int]. 139 + @raise Invalid_argument if [t] is not an integer. *) 140 + 141 + val to_int_opt : t -> int64 option 142 + (** [to_int_opt t] returns [Some i] if [t] is [Int i], [None] otherwise. *) 143 + 144 + val to_float : t -> float 145 + (** [to_float t] returns the float if [t] is a [Float]. 146 + @raise Invalid_argument if [t] is not a float. *) 147 + 148 + val to_float_opt : t -> float option 149 + (** [to_float_opt t] returns [Some f] if [t] is [Float f], [None] otherwise. *) 150 + 151 + val to_bool : t -> bool 152 + (** [to_bool t] returns the boolean if [t] is a [Bool]. 153 + @raise Invalid_argument if [t] is not a boolean. *) 154 + 155 + val to_bool_opt : t -> bool option 156 + (** [to_bool_opt t] returns [Some b] if [t] is [Bool b], [None] otherwise. *) 157 + 158 + val to_array : t -> t list 159 + (** [to_array t] returns the list if [t] is an [Array]. 160 + @raise Invalid_argument if [t] is not an array. *) 161 + 162 + val to_array_opt : t -> t list option 163 + (** [to_array_opt t] returns [Some vs] if [t] is [Array vs], [None] otherwise. *) 164 + 165 + val to_table : t -> (string * t) list 166 + (** [to_table t] returns the association list if [t] is a [Table]. 167 + @raise Invalid_argument if [t] is not a table. *) 168 + 169 + val to_table_opt : t -> (string * t) list option 170 + (** [to_table_opt t] returns [Some pairs] if [t] is [Table pairs], [None] otherwise. *) 171 + 172 + val to_datetime : t -> string 173 + (** [to_datetime t] returns the datetime string for any datetime type. 174 + @raise Invalid_argument if [t] is not a datetime variant. *) 175 + 176 + val to_datetime_opt : t -> string option 177 + (** [to_datetime_opt t] returns [Some s] if [t] is any datetime variant. *) 178 + 179 + (** {2 Type Predicates} *) 180 + 181 + val is_string : t -> bool 182 + (** [is_string t] is [true] iff [t] is a [String]. *) 183 + 184 + val is_int : t -> bool 185 + (** [is_int t] is [true] iff [t] is an [Int]. *) 186 + 187 + val is_float : t -> bool 188 + (** [is_float t] is [true] iff [t] is a [Float]. *) 189 + 190 + val is_bool : t -> bool 191 + (** [is_bool t] is [true] iff [t] is a [Bool]. *) 192 + 193 + val is_array : t -> bool 194 + (** [is_array t] is [true] iff [t] is an [Array]. *) 195 + 196 + val is_table : t -> bool 197 + (** [is_table t] is [true] iff [t] is a [Table]. *) 198 + 199 + val is_datetime : t -> bool 200 + (** [is_datetime t] is [true] iff [t] is any datetime variant. *) 201 + 202 + (** {1:navigate Table Navigation} 203 + 204 + Functions for navigating and querying TOML tables. *) 205 + 206 + val find : string -> t -> t 207 + (** [find key t] returns the value associated with [key] in table [t]. 208 + @raise Invalid_argument if [t] is not a table. 209 + @raise Not_found if [key] is not in the table. *) 210 + 211 + val find_opt : string -> t -> t option 212 + (** [find_opt key t] returns [Some v] if [key] maps to [v] in table [t], 213 + or [None] if [key] is not bound or [t] is not a table. *) 214 + 215 + val mem : string -> t -> bool 216 + (** [mem key t] is [true] if [key] is bound in table [t], [false] otherwise. 217 + Returns [false] if [t] is not a table. *) 218 + 219 + val keys : t -> string list 220 + (** [keys t] returns all keys in table [t]. 221 + @raise Invalid_argument if [t] is not a table. *) 222 + 223 + val get : string list -> t -> t 224 + (** [get path t] navigates through nested tables following [path]. 225 + For example, [get ["server"; "port"] t] returns [t.server.port]. 226 + @raise Invalid_argument if any intermediate value is not a table. 227 + @raise Not_found if any key in [path] is not found. *) 228 + 229 + val get_opt : string list -> t -> t option 230 + (** [get_opt path t] is like [get] but returns [None] on any error. *) 231 + 232 + val ( .%{} ) : t -> string list -> t 233 + (** [t.%{path}] is [get path t]. 234 + 235 + Example: [config.%{["database"; "port"]}] 236 + 237 + @raise Invalid_argument if any intermediate value is not a table. 238 + @raise Not_found if any key in the path is not found. *) 239 + 240 + val ( .%{}<- ) : t -> string list -> t -> t 241 + (** [t.%{path} <- v] returns a new table with value [v] at [path]. 242 + Creates intermediate tables as needed. 243 + 244 + Example: [config.%{["server"; "host"]} <- string "localhost"] 245 + 246 + @raise Invalid_argument if [t] is not a table or if an intermediate 247 + value exists but is not a table. *) 248 + 249 + (** {1:decode Decoding (Parsing)} 250 + 251 + Parse TOML from various sources. *) 252 + 253 + val of_string : string -> (t, Toml_error.t) result 254 + (** [of_string s] parses [s] as a TOML document. *) 255 + 256 + val of_reader : ?file:string -> Bytes.Reader.t -> (t, Toml_error.t) result 257 + (** [of_reader r] parses a TOML document from reader [r]. 258 + @param file Optional filename for error messages. *) 259 + 260 + val parse : string -> t 261 + (** [parse s] parses [s] as a TOML document. 262 + @raise Error.Error on parse errors. *) 263 + 264 + val parse_reader : ?file:string -> Bytes.Reader.t -> t 265 + (** [parse_reader r] parses a TOML document from reader [r]. 266 + @param file Optional filename for error messages. 267 + @raise Error.Error on parse errors. *) 268 + 269 + (** {1:encode Encoding} 270 + 271 + Encode TOML values to various outputs. *) 272 + 273 + val to_toml_string : t -> string 274 + (** [to_toml_string t] encodes [t] as a TOML document string. 275 + @raise Invalid_argument if [t] is not a [Table]. *) 276 + 277 + val to_buffer : Buffer.t -> t -> unit 278 + (** [to_buffer buf t] writes [t] as TOML to buffer [buf]. 279 + @raise Invalid_argument if [t] is not a [Table]. *) 280 + 281 + val to_writer : Bytes.Writer.t -> t -> unit 282 + (** [to_writer w t] writes [t] as TOML to writer [w]. 283 + Useful for streaming output without building the full string in memory. 284 + @raise Invalid_argument if [t] is not a [Table]. *) 285 + 286 + (** {1:pp Pretty Printing} *) 287 + 288 + val pp : Format.formatter -> t -> unit 289 + (** [pp fmt t] pretty-prints [t] in TOML format. *) 290 + 291 + val pp_value : Format.formatter -> t -> unit 292 + (** [pp_value fmt t] pretty-prints a single TOML value (not a full document). 293 + Useful for debugging. Tables are printed as inline tables. *) 294 + 295 + val equal : t -> t -> bool 296 + (** [equal a b] is structural equality on TOML values. 297 + NaN floats are considered equal to each other. *) 298 + 299 + val compare : t -> t -> int 300 + (** [compare a b] is a total ordering on TOML values. *) 301 + 302 + (** {1:errors Error Handling} *) 303 + 304 + module Error = Toml_error 305 + (** Structured error types for TOML parsing and encoding. 306 + 307 + See {!Toml_error} for detailed documentation. *) 308 + 309 + (** {1:tagged_json Tagged JSON} 310 + 311 + Functions for interoperating with the 312 + {{:https://github.com/toml-lang/toml-test}toml-test} suite's tagged JSON 313 + format. These functions are primarily for testing and validation. *) 314 + 315 + module Tagged_json : sig 316 + val encode : t -> string 317 + (** [encode t] converts TOML value [t] to tagged JSON format. 318 + 319 + The tagged JSON format wraps each value with type information: 320 + - Strings: [{"type": "string", "value": "..."}] 321 + - Integers: [{"type": "integer", "value": "..."}] 322 + - Floats: [{"type": "float", "value": "..."}] 323 + - Booleans: [{"type": "bool", "value": "true"|"false"}] 324 + - Datetimes: [{"type": "datetime", "value": "..."}] 325 + - Arrays: [[...]] 326 + - Tables: [{...}] *) 327 + 328 + val decode : string -> t 329 + (** [decode s] parses tagged JSON string [s] into a TOML value. 330 + @raise Failure if the JSON is malformed or has invalid types. *) 331 + 332 + val decode_and_encode_toml : string -> (string, string) result 333 + (** [decode_and_encode_toml json] decodes tagged JSON and encodes as TOML. 334 + Used by the toml-test encoder harness. *) 335 + end
+852 -2458
lib/tomlt.ml
··· 1 1 (*--------------------------------------------------------------------------- 2 - Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 - SPDX-License-Identifier: ISC 4 - ---------------------------------------------------------------------------*) 5 - 6 - open Bytesrw 7 - 8 - (* TOML value representation *) 9 - 10 - type t = 11 - | String of string 12 - | Int of int64 13 - | Float of float 14 - | Bool of bool 15 - | Datetime of string (* Offset datetime *) 16 - | Datetime_local of string (* Local datetime *) 17 - | Date_local of string (* Local date *) 18 - | Time_local of string (* Local time *) 19 - | Array of t list 20 - | Table of (string * t) list 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 21 5 22 - (* Lexer - works directly on bytes buffer filled from Bytes.Reader *) 23 - 24 - type token = 25 - | Tok_lbracket 26 - | Tok_rbracket 27 - | Tok_lbrace 28 - | Tok_rbrace 29 - | Tok_equals 30 - | Tok_comma 31 - | Tok_dot 32 - | Tok_newline 33 - | Tok_eof 34 - | Tok_bare_key of string 35 - | Tok_basic_string of string 36 - | Tok_literal_string of string 37 - | Tok_ml_basic_string of string (* Multiline basic string - not valid as key *) 38 - | Tok_ml_literal_string of string (* Multiline literal string - not valid as key *) 39 - | Tok_integer of int64 * string (* value, original string for key reconstruction *) 40 - | Tok_float of float * string (* value, original string for key reconstruction *) 41 - | Tok_datetime of string 42 - | Tok_datetime_local of string 43 - | Tok_date_local of string 44 - | Tok_time_local of string 6 + (** Declarative TOML codecs *) 45 7 46 - type lexer = { 47 - input : bytes; (* Buffer containing input data *) 48 - input_len : int; (* Length of valid data in input *) 49 - mutable pos : int; 50 - mutable line : int; 51 - mutable col : int; 52 - file : string; 53 - } 8 + (* ---- Helpers ---- *) 54 9 55 - (* Create lexer from string (copies to bytes) *) 56 - let make_lexer ?(file = "-") s = 57 - let input = Bytes.of_string s in 58 - { input; input_len = Bytes.length input; pos = 0; line = 1; col = 1; file } 10 + (* Chain comparisons: return first non-zero, or final comparison *) 11 + let ( <?> ) c lazy_c = if c <> 0 then c else Lazy.force lazy_c 59 12 60 - (* Create lexer directly from Bytes.Reader - reads all data into buffer *) 61 - let make_lexer_from_reader ?(file = "-") r = 62 - (* Read all slices into a buffer *) 63 - let buf = Buffer.create 4096 in 64 - let rec read_all () = 65 - let slice = Bytes.Reader.read r in 66 - if Bytes.Slice.is_eod slice then () 67 - else begin 68 - Bytes.Slice.add_to_buffer buf slice; 69 - read_all () 70 - end 13 + (* Find first char matching predicate *) 14 + let string_index_opt p s = 15 + let len = String.length s in 16 + let rec loop i = 17 + if i >= len then None 18 + else if p s.[i] then Some i 19 + else loop (i + 1) 71 20 in 72 - read_all (); 73 - let input = Buffer.to_bytes buf in 74 - { input; input_len = Bytes.length input; pos = 0; line = 1; col = 1; file } 21 + loop 0 75 22 76 - let is_eof l = l.pos >= l.input_len 23 + (* Find separator (T, t, or space) for datetime parsing *) 24 + let find_datetime_sep s = 25 + string_index_opt (fun c -> c = 'T' || c = 't' || c = ' ') s 77 26 78 - let peek l = if is_eof l then None else Some (Bytes.get l.input l.pos) 27 + (* ---- Datetime structured types ---- *) 79 28 80 - let peek2 l = 81 - if l.pos + 1 >= l.input_len then None 82 - else Some (Bytes.get l.input (l.pos + 1)) 29 + module Tz = struct 30 + type t = 31 + | UTC 32 + | Offset of { hours : int; minutes : int } 83 33 84 - let peek_n l n = 85 - if l.pos + n - 1 >= l.input_len then None 86 - else Some (Bytes.sub_string l.input l.pos n) 34 + let utc = UTC 35 + let offset ~hours ~minutes = Offset { hours; minutes } 87 36 88 - let advance l = 89 - if not (is_eof l) then begin 90 - if Bytes.get l.input l.pos = '\n' then begin 91 - l.line <- l.line + 1; 92 - l.col <- 1 93 - end else 94 - l.col <- l.col + 1; 95 - l.pos <- l.pos + 1 96 - end 37 + let equal a b = match a, b with 38 + | UTC, UTC -> true 39 + | Offset { hours = h1; minutes = m1 }, Offset { hours = h2; minutes = m2 } -> 40 + h1 = h2 && m1 = m2 41 + | _ -> false 97 42 98 - let advance_n l n = 99 - for _ = 1 to n do advance l done 43 + let compare a b = match a, b with 44 + | UTC, UTC -> 0 45 + | UTC, Offset _ -> -1 46 + | Offset _, UTC -> 1 47 + | Offset { hours = h1; minutes = m1 }, Offset { hours = h2; minutes = m2 } -> 48 + Int.compare h1 h2 <?> lazy (Int.compare m1 m2) 100 49 101 - let skip_whitespace l = 102 - while not (is_eof l) && (Bytes.get l.input l.pos = ' ' || Bytes.get l.input l.pos = '\t') do 103 - advance l 104 - done 50 + let to_string = function 51 + | UTC -> "Z" 52 + | Offset { hours; minutes } -> 53 + let sign = if hours >= 0 then '+' else '-' in 54 + Printf.sprintf "%c%02d:%02d" sign (abs hours) (abs minutes) 105 55 106 - (* Helper functions for bytes access *) 107 - let[@inline] get_char l pos = Bytes.unsafe_get l.input pos 108 - let[@inline] get_current l = Bytes.unsafe_get l.input l.pos 109 - let sub_string l pos len = Bytes.sub_string l.input pos len 56 + let pp fmt t = Format.pp_print_string fmt (to_string t) 110 57 111 - (* Helper to create error location from lexer state *) 112 - let lexer_loc l = Tomlt_error.loc ~file:l.file ~line:l.line ~column:l.col () 58 + let of_string s = 59 + let len = String.length s in 60 + if len = 0 then Error "empty timezone" 61 + else if s = "Z" || s = "z" then Ok UTC 62 + else if len >= 5 then 63 + let sign = if s.[0] = '-' then -1 else 1 in 64 + let start = if s.[0] = '+' || s.[0] = '-' then 1 else 0 in 65 + try 66 + let hours = int_of_string (String.sub s start 2) * sign in 67 + let minutes = int_of_string (String.sub s (start + 3) 2) in 68 + Ok (Offset { hours; minutes }) 69 + with _ -> Error ("invalid timezone: " ^ s) 70 + else Error ("invalid timezone: " ^ s) 71 + end 113 72 114 - (* Get expected byte length of UTF-8 char from first byte *) 115 - let utf8_byte_length_from_first_byte c = 116 - let code = Char.code c in 117 - if code < 0x80 then 1 118 - else if code < 0xC0 then 0 (* Invalid: continuation byte as start *) 119 - else if code < 0xE0 then 2 120 - else if code < 0xF0 then 3 121 - else if code < 0xF8 then 4 122 - else 0 (* Invalid: 5+ byte sequence *) 73 + module Date = struct 74 + type t = { year : int; month : int; day : int } 123 75 124 - (* Validate UTF-8 at position in lexer's bytes buffer, returns byte length *) 125 - let validate_utf8_at_pos_bytes l = 126 - if l.pos >= l.input_len then 127 - Tomlt_error.raise_lexer ~location:(lexer_loc l) Unexpected_eof; 128 - let byte_len = utf8_byte_length_from_first_byte (Bytes.unsafe_get l.input l.pos) in 129 - if byte_len = 0 then 130 - Tomlt_error.raise_lexer ~location:(lexer_loc l) Invalid_utf8; 131 - if l.pos + byte_len > l.input_len then 132 - Tomlt_error.raise_lexer ~location:(lexer_loc l) Incomplete_utf8; 133 - (* Validate using uutf - it checks overlong encodings, surrogates, etc. *) 134 - let sub = Bytes.sub_string l.input l.pos byte_len in 135 - let valid = ref false in 136 - Uutf.String.fold_utf_8 (fun () _ -> function 137 - | `Uchar _ -> valid := true 138 - | `Malformed _ -> () 139 - ) () sub; 140 - if not !valid then 141 - Tomlt_error.raise_lexer ~location:(lexer_loc l) Invalid_utf8; 142 - byte_len 76 + let make ~year ~month ~day = { year; month; day } 143 77 144 - (* UTF-8 validation - validates and advances over a single UTF-8 character *) 145 - let validate_utf8_char l = 146 - let byte_len = validate_utf8_at_pos_bytes l in 147 - for _ = 1 to byte_len do advance l done 78 + let equal a b = a.year = b.year && a.month = b.month && a.day = b.day 148 79 149 - let skip_comment l = 150 - if not (is_eof l) && get_current l = '#' then begin 151 - (* Validate comment characters *) 152 - advance l; 153 - let continue = ref true in 154 - while !continue && not (is_eof l) && get_current l <> '\n' do 155 - let c = get_current l in 156 - let code = Char.code c in 157 - (* CR is only valid if followed by LF (CRLF at end of comment) *) 158 - if c = '\r' then begin 159 - (* Check if this CR is followed by LF - if so, it ends the comment *) 160 - if l.pos + 1 < l.input_len && get_char l (l.pos + 1) = '\n' then 161 - (* This is CRLF - stop the loop, let the main lexer handle it *) 162 - continue := false 163 - else 164 - Tomlt_error.raise_lexer ~location:(lexer_loc l) Bare_carriage_return 165 - end else if code >= 0x80 then begin 166 - (* Multi-byte UTF-8 character - validate it *) 167 - validate_utf8_char l 168 - end else begin 169 - (* ASCII control characters other than tab are not allowed in comments *) 170 - if code < 0x09 || (code > 0x09 && code < 0x20) || code = 0x7F then 171 - Tomlt_error.raise_lexer ~location:(lexer_loc l) (Control_character code); 172 - advance l 173 - end 174 - done 175 - end 80 + let compare a b = 81 + Int.compare a.year b.year 82 + <?> lazy (Int.compare a.month b.month) 83 + <?> lazy (Int.compare a.day b.day) 176 84 177 - let skip_ws_and_comments l = 178 - let rec loop () = 179 - skip_whitespace l; 180 - if not (is_eof l) && get_current l = '#' then begin 181 - skip_comment l; 182 - loop () 183 - end 184 - in 185 - loop () 85 + let to_string d = Printf.sprintf "%04d-%02d-%02d" d.year d.month d.day 186 86 187 - let is_bare_key_char c = 188 - (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || 189 - (c >= '0' && c <= '9') || c = '_' || c = '-' 87 + let pp fmt d = Format.pp_print_string fmt (to_string d) 190 88 191 - let is_digit c = c >= '0' && c <= '9' 192 - let is_hex_digit c = is_digit c || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') 193 - let is_oct_digit c = c >= '0' && c <= '7' 194 - let is_bin_digit c = c = '0' || c = '1' 89 + let of_string s = 90 + if String.length s < 10 then Error "date too short" 91 + else 92 + try 93 + let year = int_of_string (String.sub s 0 4) in 94 + let month = int_of_string (String.sub s 5 2) in 95 + let day = int_of_string (String.sub s 8 2) in 96 + Ok { year; month; day } 97 + with _ -> Error ("invalid date: " ^ s) 98 + end 195 99 196 - let hex_value c = 197 - if c >= '0' && c <= '9' then Char.code c - Char.code '0' 198 - else if c >= 'a' && c <= 'f' then Char.code c - Char.code 'a' + 10 199 - else if c >= 'A' && c <= 'F' then Char.code c - Char.code 'A' + 10 200 - else Tomlt_error.raise_number Invalid_hex_digit 100 + module Time = struct 101 + type t = { 102 + hour : int; 103 + minute : int; 104 + second : int; 105 + frac : float; 106 + } 201 107 202 - (* Convert Unicode codepoint to UTF-8 using uutf *) 203 - let codepoint_to_utf8 codepoint = 204 - if codepoint < 0 || codepoint > 0x10FFFF then 205 - failwith (Printf.sprintf "Invalid Unicode codepoint: U+%X" codepoint); 206 - if codepoint >= 0xD800 && codepoint <= 0xDFFF then 207 - failwith (Printf.sprintf "Surrogate codepoint not allowed: U+%04X" codepoint); 208 - let buf = Buffer.create 4 in 209 - Uutf.Buffer.add_utf_8 buf (Uchar.of_int codepoint); 210 - Buffer.contents buf 108 + let make ~hour ~minute ~second ?(frac = 0.0) () = 109 + { hour; minute; second; frac } 211 110 212 - (* Parse Unicode escape with error location from lexer *) 213 - let unicode_to_utf8 l codepoint = 214 - if codepoint < 0 || codepoint > 0x10FFFF then 215 - Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_codepoint codepoint); 216 - if codepoint >= 0xD800 && codepoint <= 0xDFFF then 217 - Tomlt_error.raise_lexer ~location:(lexer_loc l) (Surrogate_codepoint codepoint); 218 - let buf = Buffer.create 4 in 219 - Uutf.Buffer.add_utf_8 buf (Uchar.of_int codepoint); 220 - Buffer.contents buf 111 + let equal a b = 112 + a.hour = b.hour && a.minute = b.minute && 113 + a.second = b.second && a.frac = b.frac 221 114 222 - let parse_escape l = 223 - advance l; (* skip backslash *) 224 - if is_eof l then 225 - Tomlt_error.raise_lexer ~location:(lexer_loc l) Unexpected_eof; 226 - let c = get_current l in 227 - advance l; 228 - match c with 229 - | 'b' -> "\b" 230 - | 't' -> "\t" 231 - | 'n' -> "\n" 232 - | 'f' -> "\x0C" 233 - | 'r' -> "\r" 234 - | 'e' -> "\x1B" (* TOML 1.1 escape *) 235 - | '"' -> "\"" 236 - | '\\' -> "\\" 237 - | 'x' -> 238 - (* \xHH - 2 hex digits *) 239 - if l.pos + 1 >= l.input_len then 240 - Tomlt_error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\x"); 241 - let c1 = get_char l l.pos in 242 - let c2 = get_char l (l.pos + 1) in 243 - if not (is_hex_digit c1 && is_hex_digit c2) then 244 - Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\x"); 245 - let cp = (hex_value c1 * 16) + hex_value c2 in 246 - advance l; advance l; 247 - unicode_to_utf8 l cp 248 - | 'u' -> 249 - (* \uHHHH - 4 hex digits *) 250 - if l.pos + 3 >= l.input_len then 251 - Tomlt_error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\u"); 252 - let s = sub_string l l.pos 4 in 253 - for i = 0 to 3 do 254 - if not (is_hex_digit s.[i]) then 255 - Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\u") 256 - done; 257 - let cp = int_of_string ("0x" ^ s) in 258 - advance_n l 4; 259 - unicode_to_utf8 l cp 260 - | 'U' -> 261 - (* \UHHHHHHHH - 8 hex digits *) 262 - if l.pos + 7 >= l.input_len then 263 - Tomlt_error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\U"); 264 - let s = sub_string l l.pos 8 in 265 - for i = 0 to 7 do 266 - if not (is_hex_digit s.[i]) then 267 - Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\U") 268 - done; 269 - let cp = int_of_string ("0x" ^ s) in 270 - advance_n l 8; 271 - unicode_to_utf8 l cp 272 - | _ -> 273 - Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_escape c) 115 + let compare a b = 116 + Int.compare a.hour b.hour 117 + <?> lazy (Int.compare a.minute b.minute) 118 + <?> lazy (Int.compare a.second b.second) 119 + <?> lazy (Float.compare a.frac b.frac) 274 120 275 - let validate_string_char l c is_multiline = 276 - let code = Char.code c in 277 - (* Control characters other than tab (and LF/CR for multiline) are not allowed *) 278 - if code < 0x09 then 279 - Tomlt_error.raise_lexer ~location:(lexer_loc l) (Control_character code); 280 - if code > 0x09 && code < 0x20 && not (is_multiline && (code = 0x0A || code = 0x0D)) then 281 - Tomlt_error.raise_lexer ~location:(lexer_loc l) (Control_character code); 282 - if code = 0x7F then 283 - Tomlt_error.raise_lexer ~location:(lexer_loc l) (Control_character code) 121 + (* Remove trailing zeros from a string, keeping at least one char *) 122 + let rstrip_zeros s = 123 + let rec find_end i = 124 + if i <= 0 then 1 125 + else if s.[i] <> '0' then i + 1 126 + else find_end (i - 1) 127 + in 128 + String.sub s 0 (find_end (String.length s - 1)) 284 129 285 - (* Validate UTF-8 in string context and add bytes to buffer *) 286 - let validate_and_add_utf8_to_buffer l buf = 287 - let byte_len = validate_utf8_at_pos_bytes l in 288 - Buffer.add_string buf (sub_string l l.pos byte_len); 289 - for _ = 1 to byte_len do advance l done 130 + let to_string t = 131 + match t.frac with 132 + | 0.0 -> Printf.sprintf "%02d:%02d:%02d" t.hour t.minute t.second 133 + | frac -> 134 + (* Format fractional seconds: "0.123456789" -> "123456789" -> trim zeros *) 135 + let frac_str = Printf.sprintf "%.9f" frac in 136 + let frac_digits = String.sub frac_str 2 (String.length frac_str - 2) in 137 + Printf.sprintf "%02d:%02d:%02d.%s" t.hour t.minute t.second (rstrip_zeros frac_digits) 290 138 291 - let parse_basic_string l = 292 - advance l; (* skip opening quote *) 293 - let buf = Buffer.create 64 in 294 - let multiline = 295 - match peek_n l 2 with 296 - | Some "\"\"" -> 297 - advance l; advance l; (* skip two more quotes *) 298 - (* Skip newline immediately after opening delimiter *) 299 - (match peek l with 300 - | Some '\n' -> advance l 301 - | Some '\r' -> 302 - advance l; 303 - if peek l = Some '\n' then advance l 304 - else failwith "Bare carriage return not allowed in string" 305 - | _ -> ()); 306 - true 307 - | _ -> false 308 - in 309 - let rec loop () = 310 - if is_eof l then 311 - failwith "Unterminated string"; 312 - let c = get_current l in 313 - if multiline then begin 314 - if c = '"' then begin 315 - (* Count consecutive quotes *) 316 - let quote_count = ref 0 in 317 - let p = ref l.pos in 318 - while !p < l.input_len && get_char l !p = '"' do 319 - incr quote_count; 320 - incr p 321 - done; 322 - if !quote_count >= 3 then begin 323 - (* 3+ quotes - this is a closing delimiter *) 324 - (* Add extra quotes (up to 2) to content before closing delimiter *) 325 - let extra = min (!quote_count - 3) 2 in 326 - for _ = 1 to extra do 327 - Buffer.add_char buf '"' 328 - done; 329 - advance_n l (!quote_count); 330 - if !quote_count > 5 then 331 - failwith "Too many quotes in multiline string" 332 - end else begin 333 - (* Less than 3 quotes - add them to content *) 334 - for _ = 1 to !quote_count do 335 - Buffer.add_char buf '"'; 336 - advance l 337 - done; 338 - loop () 339 - end 340 - end else if c = '\\' then begin 341 - (* Check for line-ending backslash *) 342 - let saved_pos = l.pos in 343 - let saved_line = l.line in 344 - let saved_col = l.col in 345 - advance l; 346 - let rec skip_ws () = 347 - match peek l with 348 - | Some ' ' | Some '\t' -> advance l; skip_ws () 349 - | _ -> () 139 + let pp fmt t = Format.pp_print_string fmt (to_string t) 140 + 141 + let of_string s = 142 + if String.length s < 8 then Error "time too short" 143 + else 144 + try 145 + let hour = int_of_string (String.sub s 0 2) in 146 + let minute = int_of_string (String.sub s 3 2) in 147 + let second = int_of_string (String.sub s 6 2) in 148 + let frac = 149 + if String.length s > 8 && s.[8] = '.' then 150 + float_of_string ("0" ^ String.sub s 8 (String.length s - 8)) 151 + else 0.0 350 152 in 351 - skip_ws (); 352 - match peek l with 353 - | Some '\n' -> 354 - advance l; 355 - (* Skip all whitespace and newlines after *) 356 - let rec skip_all () = 357 - match peek l with 358 - | Some ' ' | Some '\t' | Some '\n' -> advance l; skip_all () 359 - | Some '\r' -> 360 - advance l; 361 - if peek l = Some '\n' then advance l; 362 - skip_all () 363 - | _ -> () 364 - in 365 - skip_all (); 366 - loop () 367 - | Some '\r' -> 368 - advance l; 369 - if peek l = Some '\n' then advance l; 370 - let rec skip_all () = 371 - match peek l with 372 - | Some ' ' | Some '\t' | Some '\n' -> advance l; skip_all () 373 - | Some '\r' -> 374 - advance l; 375 - if peek l = Some '\n' then advance l; 376 - skip_all () 377 - | _ -> () 378 - in 379 - skip_all (); 380 - loop () 381 - | _ -> 382 - (* Not a line-ending backslash, restore position and parse escape *) 383 - l.pos <- saved_pos; 384 - l.line <- saved_line; 385 - l.col <- saved_col; 386 - Buffer.add_string buf (parse_escape l); 387 - loop () 388 - end else begin 389 - let code = Char.code c in 390 - if c = '\r' then begin 391 - advance l; 392 - if peek l = Some '\n' then begin 393 - Buffer.add_char buf '\n'; 394 - advance l 395 - end else 396 - failwith "Bare carriage return not allowed in string" 397 - end else if code >= 0x80 then begin 398 - (* Multi-byte UTF-8 - validate and add *) 399 - validate_and_add_utf8_to_buffer l buf 400 - end else begin 401 - (* ASCII - validate control chars *) 402 - validate_string_char l c true; 403 - Buffer.add_char buf c; 404 - advance l 405 - end; 406 - loop () 407 - end 408 - end else begin 409 - (* Single-line basic string *) 410 - if c = '"' then begin 411 - advance l; 412 - () 413 - end else if c = '\\' then begin 414 - Buffer.add_string buf (parse_escape l); 415 - loop () 416 - end else if c = '\n' || c = '\r' then 417 - failwith "Newline not allowed in basic string" 418 - else begin 419 - let code = Char.code c in 420 - if code >= 0x80 then begin 421 - (* Multi-byte UTF-8 - validate and add *) 422 - validate_and_add_utf8_to_buffer l buf 423 - end else begin 424 - (* ASCII - validate control chars *) 425 - validate_string_char l c false; 426 - Buffer.add_char buf c; 427 - advance l 428 - end; 429 - loop () 430 - end 431 - end 432 - in 433 - loop (); 434 - (Buffer.contents buf, multiline) 153 + Ok { hour; minute; second; frac } 154 + with _ -> Error ("invalid time: " ^ s) 155 + end 435 156 436 - let parse_literal_string l = 437 - advance l; (* skip opening quote *) 438 - let buf = Buffer.create 64 in 439 - let multiline = 440 - match peek_n l 2 with 441 - | Some "''" -> 442 - advance l; advance l; (* skip two more quotes *) 443 - (* Skip newline immediately after opening delimiter *) 444 - (match peek l with 445 - | Some '\n' -> advance l 446 - | Some '\r' -> 447 - advance l; 448 - if peek l = Some '\n' then advance l 449 - else failwith "Bare carriage return not allowed in literal string" 450 - | _ -> ()); 451 - true 452 - | _ -> false 453 - in 454 - let rec loop () = 455 - if is_eof l then 456 - failwith "Unterminated literal string"; 457 - let c = get_current l in 458 - if multiline then begin 459 - if c = '\'' then begin 460 - (* Count consecutive quotes *) 461 - let quote_count = ref 0 in 462 - let p = ref l.pos in 463 - while !p < l.input_len && get_char l !p = '\'' do 464 - incr quote_count; 465 - incr p 466 - done; 467 - if !quote_count >= 3 then begin 468 - (* 3+ quotes - this is a closing delimiter *) 469 - (* Add extra quotes (up to 2) to content before closing delimiter *) 470 - let extra = min (!quote_count - 3) 2 in 471 - for _ = 1 to extra do 472 - Buffer.add_char buf '\'' 473 - done; 474 - advance_n l (!quote_count); 475 - if !quote_count > 5 then 476 - failwith "Too many quotes in multiline literal string" 477 - end else begin 478 - (* Less than 3 quotes - add them to content *) 479 - for _ = 1 to !quote_count do 480 - Buffer.add_char buf '\''; 481 - advance l 482 - done; 483 - loop () 484 - end 485 - end else begin 486 - let code = Char.code c in 487 - if c = '\r' then begin 488 - advance l; 489 - if peek l = Some '\n' then begin 490 - Buffer.add_char buf '\n'; 491 - advance l 492 - end else 493 - failwith "Bare carriage return not allowed in literal string" 494 - end else if code >= 0x80 then begin 495 - (* Multi-byte UTF-8 - validate and add *) 496 - validate_and_add_utf8_to_buffer l buf 497 - end else begin 498 - (* ASCII control char validation for literal strings *) 499 - if code < 0x09 || (code > 0x09 && code < 0x0A) || (code > 0x0D && code < 0x20) || code = 0x7F then 500 - if code <> 0x0A && code <> 0x0D then 501 - failwith (Printf.sprintf "Control character U+%04X not allowed in literal string at line %d" code l.line); 502 - Buffer.add_char buf c; 503 - advance l 504 - end; 505 - loop () 506 - end 507 - end else begin 508 - if c = '\'' then begin 509 - advance l; 510 - () 511 - end else if c = '\n' || c = '\r' then 512 - failwith "Newline not allowed in literal string" 513 - else begin 514 - let code = Char.code c in 515 - if code >= 0x80 then begin 516 - (* Multi-byte UTF-8 - validate and add *) 517 - validate_and_add_utf8_to_buffer l buf 518 - end else begin 519 - (* ASCII control char validation *) 520 - if code < 0x09 || (code > 0x09 && code < 0x20) || code = 0x7F then 521 - failwith (Printf.sprintf "Control character U+%04X not allowed in literal string at line %d" code l.line); 522 - Buffer.add_char buf c; 523 - advance l 524 - end; 525 - loop () 526 - end 527 - end 528 - in 529 - loop (); 530 - (Buffer.contents buf, multiline) 157 + module Datetime = struct 158 + type t = { date : Date.t; time : Time.t; tz : Tz.t } 531 159 532 - let parse_number l = 533 - let start = l.pos in 534 - let neg = 535 - match peek l with 536 - | Some '-' -> advance l; true 537 - | Some '+' -> advance l; false 538 - | _ -> false 539 - in 540 - (* Check for special floats: inf and nan *) 541 - match peek_n l 3 with 542 - | Some "inf" -> 543 - advance_n l 3; 544 - let s = sub_string l start (l.pos - start) in 545 - Tok_float ((if neg then Float.neg_infinity else Float.infinity), s) 546 - | Some "nan" -> 547 - advance_n l 3; 548 - let s = sub_string l start (l.pos - start) in 549 - Tok_float (Float.nan, s) 550 - | _ -> 551 - (* Check for hex, octal, or binary *) 552 - match peek l, peek2 l with 553 - | Some '0', Some 'x' when not neg -> 554 - advance l; advance l; 555 - let num_start = l.pos in 556 - (* Check for leading underscore *) 557 - if peek l = Some '_' then failwith "Leading underscore not allowed after 0x"; 558 - let rec read_hex first = 559 - match peek l with 560 - | Some c when is_hex_digit c -> advance l; read_hex false 561 - | Some '_' -> 562 - if first then failwith "Underscore must follow a hex digit"; 563 - advance l; 564 - if peek l |> Option.map is_hex_digit |> Option.value ~default:false then 565 - read_hex false 566 - else 567 - failwith "Trailing underscore in hex number" 568 - | _ -> 569 - if first then failwith "Expected hex digit after 0x" 570 - in 571 - read_hex true; 572 - let s = sub_string l num_start (l.pos - num_start) in 573 - let s = String.concat "" (String.split_on_char '_' s) in 574 - let orig = sub_string l start (l.pos - start) in 575 - Tok_integer (Int64.of_string ("0x" ^ s), orig) 576 - | Some '0', Some 'o' when not neg -> 577 - advance l; advance l; 578 - let num_start = l.pos in 579 - (* Check for leading underscore *) 580 - if peek l = Some '_' then failwith "Leading underscore not allowed after 0o"; 581 - let rec read_oct first = 582 - match peek l with 583 - | Some c when is_oct_digit c -> advance l; read_oct false 584 - | Some '_' -> 585 - if first then failwith "Underscore must follow an octal digit"; 586 - advance l; 587 - if peek l |> Option.map is_oct_digit |> Option.value ~default:false then 588 - read_oct false 589 - else 590 - failwith "Trailing underscore in octal number" 591 - | _ -> 592 - if first then failwith "Expected octal digit after 0o" 593 - in 594 - read_oct true; 595 - let s = sub_string l num_start (l.pos - num_start) in 596 - let s = String.concat "" (String.split_on_char '_' s) in 597 - let orig = sub_string l start (l.pos - start) in 598 - Tok_integer (Int64.of_string ("0o" ^ s), orig) 599 - | Some '0', Some 'b' when not neg -> 600 - advance l; advance l; 601 - let num_start = l.pos in 602 - (* Check for leading underscore *) 603 - if peek l = Some '_' then failwith "Leading underscore not allowed after 0b"; 604 - let rec read_bin first = 605 - match peek l with 606 - | Some c when is_bin_digit c -> advance l; read_bin false 607 - | Some '_' -> 608 - if first then failwith "Underscore must follow a binary digit"; 609 - advance l; 610 - if peek l |> Option.map is_bin_digit |> Option.value ~default:false then 611 - read_bin false 612 - else 613 - failwith "Trailing underscore in binary number" 614 - | _ -> 615 - if first then failwith "Expected binary digit after 0b" 616 - in 617 - read_bin true; 618 - let s = sub_string l num_start (l.pos - num_start) in 619 - let s = String.concat "" (String.split_on_char '_' s) in 620 - let orig = sub_string l start (l.pos - start) in 621 - Tok_integer (Int64.of_string ("0b" ^ s), orig) 622 - | _ -> 623 - (* Regular decimal number *) 624 - let first_digit = peek l in 625 - (* Check for leading zeros - also reject 0_ followed by digits *) 626 - if first_digit = Some '0' then begin 627 - match peek2 l with 628 - | Some c when is_digit c -> failwith "Leading zeros not allowed" 629 - | Some '_' -> failwith "Leading zeros not allowed" 630 - | _ -> () 631 - end; 632 - let rec read_int first = 633 - match peek l with 634 - | Some c when is_digit c -> advance l; read_int false 635 - | Some '_' -> 636 - if first then failwith "Underscore must follow a digit"; 637 - advance l; 638 - if peek l |> Option.map is_digit |> Option.value ~default:false then 639 - read_int false 640 - else 641 - failwith "Trailing underscore in number" 642 - | _ -> 643 - if first then failwith "Expected digit" 644 - in 645 - (match peek l with 646 - | Some c when is_digit c -> read_int false 647 - | _ -> failwith "Expected digit after sign"); 648 - (* Check for float *) 649 - let is_float = ref false in 650 - (match peek l, peek2 l with 651 - | Some '.', Some c when is_digit c -> 652 - is_float := true; 653 - advance l; 654 - read_int false 655 - | Some '.', _ -> 656 - failwith "Decimal point must be followed by digit" 657 - | _ -> ()); 658 - (* Check for exponent *) 659 - (match peek l with 660 - | Some 'e' | Some 'E' -> 661 - is_float := true; 662 - advance l; 663 - (match peek l with 664 - | Some '+' | Some '-' -> advance l 665 - | _ -> ()); 666 - (* After exponent/sign, first char must be a digit, not underscore *) 667 - (match peek l with 668 - | Some '_' -> failwith "Underscore cannot follow exponent" 669 - | _ -> ()); 670 - read_int true 671 - | _ -> ()); 672 - let s = sub_string l start (l.pos - start) in 673 - let s' = String.concat "" (String.split_on_char '_' s) in 674 - if !is_float then 675 - Tok_float (float_of_string s', s) 676 - else 677 - Tok_integer (Int64.of_string s', s) 160 + let make ~date ~time ~tz = { date; time; tz } 678 161 679 - (* Check if we're looking at a datetime/date/time *) 680 - let looks_like_datetime l = 681 - (* YYYY-MM-DD or HH:MM - need to ensure it's not a bare key that starts with numbers *) 682 - let check_datetime () = 683 - let pos = l.pos in 684 - let len = l.input_len in 685 - (* Check for YYYY-MM-DD pattern - must have exactly this structure *) 686 - if pos + 10 <= len then begin 687 - let c0 = get_char l pos in 688 - let c1 = get_char l (pos + 1) in 689 - let c2 = get_char l (pos + 2) in 690 - let c3 = get_char l (pos + 3) in 691 - let c4 = get_char l (pos + 4) in 692 - let c5 = get_char l (pos + 5) in 693 - let c6 = get_char l (pos + 6) in 694 - let c7 = get_char l (pos + 7) in 695 - let c8 = get_char l (pos + 8) in 696 - let c9 = get_char l (pos + 9) in 697 - (* Must match YYYY-MM-DD pattern AND not be followed by bare key chars (except T or space for time) *) 698 - if is_digit c0 && is_digit c1 && is_digit c2 && is_digit c3 && c4 = '-' && 699 - is_digit c5 && is_digit c6 && c7 = '-' && is_digit c8 && is_digit c9 then begin 700 - (* Check what follows - if it's a bare key char other than T/t/space, it's not a date *) 701 - if pos + 10 < len then begin 702 - let next = get_char l (pos + 10) in 703 - if next = 'T' || next = 't' then 704 - `Date (* Datetime continues with time part *) 705 - else if next = ' ' || next = '\t' then begin 706 - (* Check if followed by = (key context) or time part *) 707 - let rec skip_ws p = 708 - if p >= len then p 709 - else match get_char l p with 710 - | ' ' | '\t' -> skip_ws (p + 1) 711 - | _ -> p 712 - in 713 - let after_ws = skip_ws (pos + 11) in 714 - if after_ws < len && get_char l after_ws = '=' then 715 - `Other (* It's a key followed by = *) 716 - else if after_ws < len && is_digit (get_char l after_ws) then 717 - `Date (* Could be "2001-02-03 12:34:56" format *) 718 - else 719 - `Date 720 - end else if next = '\n' || next = '\r' || 721 - next = '#' || next = ',' || next = ']' || next = '}' then 722 - `Date 723 - else if is_bare_key_char next then 724 - `Other (* It's a bare key like "2000-02-29abc" *) 725 - else 726 - `Date 727 - end else 728 - `Date 729 - end else if pos + 5 <= len && 730 - is_digit c0 && is_digit c1 && c2 = ':' && is_digit c3 && is_digit c4 then 731 - `Time 732 - else 733 - `Other 734 - end else if pos + 5 <= len then begin 735 - let c0 = get_char l pos in 736 - let c1 = get_char l (pos + 1) in 737 - let c2 = get_char l (pos + 2) in 738 - let c3 = get_char l (pos + 3) in 739 - let c4 = get_char l (pos + 4) in 740 - if is_digit c0 && is_digit c1 && c2 = ':' && is_digit c3 && is_digit c4 then 741 - `Time 742 - else 743 - `Other 744 - end else 745 - `Other 746 - in 747 - check_datetime () 162 + let equal a b = 163 + Date.equal a.date b.date && Time.equal a.time b.time && Tz.equal a.tz b.tz 748 164 749 - (* Date/time validation *) 750 - let validate_date year month day = 751 - if month < 1 || month > 12 then 752 - failwith (Printf.sprintf "Invalid month: %d" month); 753 - if day < 1 then 754 - failwith (Printf.sprintf "Invalid day: %d" day); 755 - let days_in_month = [| 0; 31; 28; 31; 30; 31; 30; 31; 31; 30; 31; 30; 31 |] in 756 - let is_leap = (year mod 4 = 0 && year mod 100 <> 0) || year mod 400 = 0 in 757 - let max_days = 758 - if month = 2 && is_leap then 29 759 - else days_in_month.(month) 760 - in 761 - if day > max_days then 762 - failwith (Printf.sprintf "Invalid day %d for month %d" day month) 165 + let compare a b = 166 + Date.compare a.date b.date 167 + <?> lazy (Time.compare a.time b.time) 168 + <?> lazy (Tz.compare a.tz b.tz) 763 169 764 - let validate_time hour minute second = 765 - if hour < 0 || hour > 23 then 766 - failwith (Printf.sprintf "Invalid hour: %d" hour); 767 - if minute < 0 || minute > 59 then 768 - failwith (Printf.sprintf "Invalid minute: %d" minute); 769 - if second < 0 || second > 60 then (* 60 for leap second *) 770 - failwith (Printf.sprintf "Invalid second: %d" second) 170 + let to_string dt = 171 + Printf.sprintf "%sT%s%s" 172 + (Date.to_string dt.date) 173 + (Time.to_string dt.time) 174 + (Tz.to_string dt.tz) 771 175 772 - let validate_offset hour minute = 773 - if hour < 0 || hour > 23 then 774 - failwith (Printf.sprintf "Invalid timezone offset hour: %d" hour); 775 - if minute < 0 || minute > 59 then 776 - failwith (Printf.sprintf "Invalid timezone offset minute: %d" minute) 176 + let pp fmt dt = Format.pp_print_string fmt (to_string dt) 777 177 778 - let parse_datetime l = 779 - let buf = Buffer.create 32 in 780 - let year_buf = Buffer.create 4 in 781 - let month_buf = Buffer.create 2 in 782 - let day_buf = Buffer.create 2 in 783 - (* Read date part YYYY-MM-DD *) 784 - for _ = 1 to 4 do 785 - match peek l with 786 - | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char year_buf c; advance l 787 - | _ -> failwith "Invalid date format" 788 - done; 789 - if peek l <> Some '-' then failwith "Invalid date format"; 790 - Buffer.add_char buf '-'; advance l; 791 - for _ = 1 to 2 do 792 - match peek l with 793 - | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char month_buf c; advance l 794 - | _ -> failwith "Invalid date format" 795 - done; 796 - if peek l <> Some '-' then failwith "Invalid date format"; 797 - Buffer.add_char buf '-'; advance l; 798 - for _ = 1 to 2 do 799 - match peek l with 800 - | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char day_buf c; advance l 801 - | _ -> failwith "Invalid date format" 802 - done; 803 - (* Validate date immediately *) 804 - let year = int_of_string (Buffer.contents year_buf) in 805 - let month = int_of_string (Buffer.contents month_buf) in 806 - let day = int_of_string (Buffer.contents day_buf) in 807 - validate_date year month day; 808 - (* Helper to parse time part (after T or space) *) 809 - let parse_time_part () = 810 - let hour_buf = Buffer.create 2 in 811 - let minute_buf = Buffer.create 2 in 812 - let second_buf = Buffer.create 2 in 813 - Buffer.add_char buf 'T'; (* Always normalize to uppercase T *) 814 - for _ = 1 to 2 do 815 - match peek l with 816 - | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char hour_buf c; advance l 817 - | _ -> failwith "Invalid time format" 818 - done; 819 - if peek l <> Some ':' then failwith "Invalid time format"; 820 - Buffer.add_char buf ':'; advance l; 821 - for _ = 1 to 2 do 822 - match peek l with 823 - | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char minute_buf c; advance l 824 - | _ -> failwith "Invalid time format" 825 - done; 826 - (* Optional seconds *) 827 - (match peek l with 828 - | Some ':' -> 829 - Buffer.add_char buf ':'; advance l; 830 - for _ = 1 to 2 do 831 - match peek l with 832 - | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char second_buf c; advance l 833 - | _ -> failwith "Invalid time format" 834 - done; 835 - (* Optional fractional seconds *) 836 - (match peek l with 837 - | Some '.' -> 838 - Buffer.add_char buf '.'; advance l; 839 - if not (peek l |> Option.map is_digit |> Option.value ~default:false) then 840 - failwith "Expected digit after decimal point"; 841 - while peek l |> Option.map is_digit |> Option.value ~default:false do 842 - Buffer.add_char buf (Option.get (peek l)); 843 - advance l 844 - done 845 - | _ -> ()) 846 - | _ -> 847 - (* No seconds - add :00 for normalization per toml-test *) 848 - Buffer.add_string buf ":00"; 849 - Buffer.add_string second_buf "00"); 850 - (* Validate time *) 851 - let hour = int_of_string (Buffer.contents hour_buf) in 852 - let minute = int_of_string (Buffer.contents minute_buf) in 853 - let second = if Buffer.length second_buf > 0 then int_of_string (Buffer.contents second_buf) else 0 in 854 - validate_time hour minute second; 855 - (* Check for offset *) 856 - match peek l with 857 - | Some 'Z' | Some 'z' -> 858 - Buffer.add_char buf 'Z'; 859 - advance l; 860 - Tok_datetime (Buffer.contents buf) 861 - | Some '+' | Some '-' as sign_opt -> 862 - let sign = Option.get sign_opt in 863 - let off_hour_buf = Buffer.create 2 in 864 - let off_min_buf = Buffer.create 2 in 865 - Buffer.add_char buf sign; 866 - advance l; 867 - for _ = 1 to 2 do 868 - match peek l with 869 - | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char off_hour_buf c; advance l 870 - | _ -> failwith "Invalid timezone offset" 871 - done; 872 - if peek l <> Some ':' then failwith "Invalid timezone offset"; 873 - Buffer.add_char buf ':'; advance l; 874 - for _ = 1 to 2 do 875 - match peek l with 876 - | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char off_min_buf c; advance l 877 - | _ -> failwith "Invalid timezone offset" 878 - done; 879 - (* Validate offset *) 880 - let off_hour = int_of_string (Buffer.contents off_hour_buf) in 881 - let off_min = int_of_string (Buffer.contents off_min_buf) in 882 - validate_offset off_hour off_min; 883 - Tok_datetime (Buffer.contents buf) 884 - | _ -> 885 - Tok_datetime_local (Buffer.contents buf) 886 - in 887 - (* Check if there's a time part *) 888 - match peek l with 889 - | Some 'T' | Some 't' -> 890 - advance l; 891 - parse_time_part () 892 - | Some ' ' -> 893 - (* Space could be followed by time (datetime with space separator) 894 - or could be end of date (local date followed by comment/value) *) 895 - advance l; (* Skip the space *) 896 - (* Check if followed by digit (time) *) 897 - (match peek l with 898 - | Some c when is_digit c -> 899 - parse_time_part () 900 - | _ -> 901 - (* Not followed by time - this is just a local date *) 902 - (* Put the space back by not consuming anything further *) 903 - l.pos <- l.pos - 1; (* Go back to before the space *) 904 - Tok_date_local (Buffer.contents buf)) 905 - | _ -> 906 - (* Just a date *) 907 - Tok_date_local (Buffer.contents buf) 178 + let of_string s = 179 + match find_datetime_sep s with 180 + | None -> Error "missing date/time separator" 181 + | Some idx -> 182 + let date_str = String.sub s 0 idx in 183 + let rest = String.sub s (idx + 1) (String.length s - idx - 1) in 184 + (* Find timezone: Z, z, +, or - (but not - in first 2 chars of time) *) 185 + let is_tz_start i c = c = 'Z' || c = 'z' || c = '+' || (c = '-' && i > 2) in 186 + let tz_idx = 187 + let len = String.length rest in 188 + let rec find i = 189 + if i >= len then len 190 + else if is_tz_start i rest.[i] then i 191 + else find (i + 1) 192 + in 193 + find 0 194 + in 195 + let time_str = String.sub rest 0 tz_idx in 196 + let tz_str = String.sub rest tz_idx (String.length rest - tz_idx) in 197 + Result.bind (Date.of_string date_str) @@ fun date -> 198 + Result.bind (Time.of_string time_str) @@ fun time -> 199 + Result.bind (Tz.of_string tz_str) @@ fun tz -> 200 + Ok { date; time; tz } 201 + end 908 202 909 - let parse_time l = 910 - let buf = Buffer.create 16 in 911 - let hour_buf = Buffer.create 2 in 912 - let minute_buf = Buffer.create 2 in 913 - let second_buf = Buffer.create 2 in 914 - (* Read HH:MM *) 915 - for _ = 1 to 2 do 916 - match peek l with 917 - | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char hour_buf c; advance l 918 - | _ -> failwith "Invalid time format" 919 - done; 920 - if peek l <> Some ':' then failwith "Invalid time format"; 921 - Buffer.add_char buf ':'; advance l; 922 - for _ = 1 to 2 do 923 - match peek l with 924 - | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char minute_buf c; advance l 925 - | _ -> failwith "Invalid time format" 926 - done; 927 - (* Optional seconds *) 928 - (match peek l with 929 - | Some ':' -> 930 - Buffer.add_char buf ':'; advance l; 931 - for _ = 1 to 2 do 932 - match peek l with 933 - | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char second_buf c; advance l 934 - | _ -> failwith "Invalid time format" 935 - done; 936 - (* Optional fractional seconds *) 937 - (match peek l with 938 - | Some '.' -> 939 - Buffer.add_char buf '.'; advance l; 940 - if not (peek l |> Option.map is_digit |> Option.value ~default:false) then 941 - failwith "Expected digit after decimal point"; 942 - while peek l |> Option.map is_digit |> Option.value ~default:false do 943 - Buffer.add_char buf (Option.get (peek l)); 944 - advance l 945 - done 946 - | _ -> ()) 947 - | _ -> 948 - (* No seconds - add :00 for normalization *) 949 - Buffer.add_string buf ":00"; 950 - Buffer.add_string second_buf "00"); 951 - (* Validate time *) 952 - let hour = int_of_string (Buffer.contents hour_buf) in 953 - let minute = int_of_string (Buffer.contents minute_buf) in 954 - let second = if Buffer.length second_buf > 0 then int_of_string (Buffer.contents second_buf) else 0 in 955 - validate_time hour minute second; 956 - Tok_time_local (Buffer.contents buf) 203 + module Datetime_local = struct 204 + type t = { date : Date.t; time : Time.t } 957 205 958 - let next_token l = 959 - skip_ws_and_comments l; 960 - if is_eof l then Tok_eof 961 - else begin 962 - let c = get_current l in 963 - match c with 964 - | '[' -> advance l; Tok_lbracket 965 - | ']' -> advance l; Tok_rbracket 966 - | '{' -> advance l; Tok_lbrace 967 - | '}' -> advance l; Tok_rbrace 968 - | '=' -> advance l; Tok_equals 969 - | ',' -> advance l; Tok_comma 970 - | '.' -> advance l; Tok_dot 971 - | '\n' -> advance l; Tok_newline 972 - | '\r' -> 973 - advance l; 974 - if peek l = Some '\n' then begin 975 - advance l; 976 - Tok_newline 977 - end else 978 - failwith (Printf.sprintf "Bare carriage return not allowed at line %d" l.line) 979 - | '"' -> 980 - let (s, multiline) = parse_basic_string l in 981 - if multiline then Tok_ml_basic_string s else Tok_basic_string s 982 - | '\'' -> 983 - let (s, multiline) = parse_literal_string l in 984 - if multiline then Tok_ml_literal_string s else Tok_literal_string s 985 - | '+' | '-' -> 986 - (* Could be number, special float (+inf, -inf, +nan, -nan), or bare key starting with - *) 987 - let sign = c in 988 - let start = l.pos in 989 - (match peek2 l with 990 - | Some d when is_digit d -> 991 - (* Check if this looks like a key (followed by = after whitespace/key chars) *) 992 - (* A key like -01 should be followed by whitespace then =, not by . or e (number syntax) *) 993 - let is_key_context = 994 - let rec scan_ahead p = 995 - if p >= l.input_len then false 996 - else 997 - let c = get_char l p in 998 - if is_digit c || c = '_' then scan_ahead (p + 1) 999 - else if c = ' ' || c = '\t' then 1000 - (* Skip whitespace and check for = *) 1001 - let rec skip_ws pp = 1002 - if pp >= l.input_len then false 1003 - else match get_char l pp with 1004 - | ' ' | '\t' -> skip_ws (pp + 1) 1005 - | '=' -> true 1006 - | _ -> false 1007 - in 1008 - skip_ws (p + 1) 1009 - else if c = '=' then true 1010 - else if c = '.' then 1011 - (* Check if . is followed by digit (number) vs letter/underscore (dotted key) *) 1012 - if p + 1 < l.input_len then 1013 - let next = get_char l (p + 1) in 1014 - if is_digit next then false (* It's a decimal number like -3.14 *) 1015 - else if is_bare_key_char next then true (* Dotted key *) 1016 - else false 1017 - else false 1018 - else if c = 'e' || c = 'E' then false (* Scientific notation *) 1019 - else if is_bare_key_char c then 1020 - (* Contains non-digit bare key char - it's a key *) 1021 - true 1022 - else false 1023 - in 1024 - scan_ahead (start + 1) 1025 - in 1026 - if is_key_context then begin 1027 - (* Treat as bare key *) 1028 - while not (is_eof l) && is_bare_key_char (get_current l) do 1029 - advance l 1030 - done; 1031 - Tok_bare_key (sub_string l start (l.pos - start)) 1032 - end else 1033 - parse_number l 1034 - | Some 'i' -> 1035 - (* Check for inf *) 1036 - if l.pos + 3 < l.input_len && 1037 - get_char l (l.pos + 1) = 'i' && get_char l (l.pos + 2) = 'n' && get_char l (l.pos + 3) = 'f' then begin 1038 - advance_n l 4; 1039 - let s = sub_string l start (l.pos - start) in 1040 - if sign = '-' then Tok_float (Float.neg_infinity, s) 1041 - else Tok_float (Float.infinity, s) 1042 - end else if sign = '-' then begin 1043 - (* Could be bare key like -inf-key *) 1044 - while not (is_eof l) && is_bare_key_char (get_current l) do 1045 - advance l 1046 - done; 1047 - Tok_bare_key (sub_string l start (l.pos - start)) 1048 - end else 1049 - failwith (Printf.sprintf "Unexpected character after %c" sign) 1050 - | Some 'n' -> 1051 - (* Check for nan *) 1052 - if l.pos + 3 < l.input_len && 1053 - get_char l (l.pos + 1) = 'n' && get_char l (l.pos + 2) = 'a' && get_char l (l.pos + 3) = 'n' then begin 1054 - advance_n l 4; 1055 - let s = sub_string l start (l.pos - start) in 1056 - Tok_float (Float.nan, s) (* Sign on NaN doesn't change the value *) 1057 - end else if sign = '-' then begin 1058 - (* Could be bare key like -name *) 1059 - while not (is_eof l) && is_bare_key_char (get_current l) do 1060 - advance l 1061 - done; 1062 - Tok_bare_key (sub_string l start (l.pos - start)) 1063 - end else 1064 - failwith (Printf.sprintf "Unexpected character after %c" sign) 1065 - | _ when sign = '-' -> 1066 - (* Bare key starting with - like -key or --- *) 1067 - while not (is_eof l) && is_bare_key_char (get_current l) do 1068 - advance l 1069 - done; 1070 - Tok_bare_key (sub_string l start (l.pos - start)) 1071 - | _ -> failwith (Printf.sprintf "Unexpected character after %c" sign)) 1072 - | c when is_digit c -> 1073 - (* Could be number, datetime, or bare key starting with digits *) 1074 - (match looks_like_datetime l with 1075 - | `Date -> parse_datetime l 1076 - | `Time -> parse_time l 1077 - | `Other -> 1078 - (* Check for hex/octal/binary prefix first - these are always numbers *) 1079 - let start = l.pos in 1080 - let is_prefixed_number = 1081 - start + 1 < l.input_len && get_char l start = '0' && 1082 - (let c1 = get_char l (start + 1) in 1083 - c1 = 'x' || c1 = 'X' || c1 = 'o' || c1 = 'O' || c1 = 'b' || c1 = 'B') 1084 - in 1085 - if is_prefixed_number then 1086 - parse_number l 1087 - else begin 1088 - (* Check if this is a bare key: 1089 - - Contains letters (like "123abc") 1090 - - Has leading zeros (like "0123") which would be invalid as a number *) 1091 - let has_leading_zero = 1092 - get_char l start = '0' && start + 1 < l.input_len && 1093 - let c1 = get_char l (start + 1) in 1094 - is_digit c1 1095 - in 1096 - (* Scan to see if this is a bare key or a number 1097 - - If it looks like scientific notation (digits + e/E + optional sign + digits), it's a number 1098 - - If it contains letters OR dashes between digits, it's a bare key *) 1099 - let rec scan_for_bare_key pos has_dash_between_digits = 1100 - if pos >= l.input_len then has_dash_between_digits 1101 - else 1102 - let c = get_char l pos in 1103 - if is_digit c || c = '_' then scan_for_bare_key (pos + 1) has_dash_between_digits 1104 - else if c = '.' then scan_for_bare_key (pos + 1) has_dash_between_digits 1105 - else if c = '-' then 1106 - (* Dash in key - check what follows *) 1107 - let next_pos = pos + 1 in 1108 - if next_pos < l.input_len then 1109 - let next = get_char l next_pos in 1110 - if is_digit next then 1111 - scan_for_bare_key (next_pos) true (* Dash between digits - bare key *) 1112 - else if is_bare_key_char next then 1113 - true (* Dash followed by letter - definitely bare key like 2000-datetime *) 1114 - else 1115 - has_dash_between_digits (* End of sequence *) 1116 - else 1117 - has_dash_between_digits (* End of input *) 1118 - else if c = 'e' || c = 'E' then 1119 - (* Check if this looks like scientific notation *) 1120 - let next_pos = pos + 1 in 1121 - if next_pos >= l.input_len then true (* Just 'e' at end, bare key *) 1122 - else 1123 - let next = get_char l next_pos in 1124 - if next = '+' || next = '-' then 1125 - (* Has exponent sign - check if followed by digit *) 1126 - let after_sign = next_pos + 1 in 1127 - if after_sign < l.input_len && is_digit (get_char l after_sign) then 1128 - has_dash_between_digits (* Scientific notation, but might have dash earlier *) 1129 - else 1130 - true (* e.g., "3e-abc" - bare key *) 1131 - else if is_digit next then 1132 - has_dash_between_digits (* Scientific notation like 3e2, but check if had dash earlier *) 1133 - else 1134 - true (* e.g., "3eabc" - bare key *) 1135 - else if is_bare_key_char c then 1136 - (* It's a letter - this is a bare key *) 1137 - true 1138 - else has_dash_between_digits 1139 - in 1140 - if has_leading_zero || scan_for_bare_key start false then begin 1141 - (* It's a bare key *) 1142 - while not (is_eof l) && is_bare_key_char (get_current l) do 1143 - advance l 1144 - done; 1145 - Tok_bare_key (sub_string l start (l.pos - start)) 1146 - end else 1147 - (* It's a number - use parse_number *) 1148 - parse_number l 1149 - end) 1150 - | c when c = 't' || c = 'f' || c = 'i' || c = 'n' -> 1151 - (* These could be keywords (true, false, inf, nan) or bare keys 1152 - Always read as bare key and let parser interpret *) 1153 - let start = l.pos in 1154 - while not (is_eof l) && is_bare_key_char (get_current l) do 1155 - advance l 1156 - done; 1157 - Tok_bare_key (sub_string l start (l.pos - start)) 1158 - | c when is_bare_key_char c -> 1159 - let start = l.pos in 1160 - while not (is_eof l) && is_bare_key_char (get_current l) do 1161 - advance l 1162 - done; 1163 - Tok_bare_key (sub_string l start (l.pos - start)) 1164 - | c -> 1165 - let code = Char.code c in 1166 - if code < 0x20 || code = 0x7F then 1167 - failwith (Printf.sprintf "Control character U+%04X not allowed at line %d" code l.line) 1168 - else 1169 - failwith (Printf.sprintf "Unexpected character '%c' at line %d, column %d" c l.line l.col) 1170 - end 206 + let make ~date ~time = { date; time } 1171 207 1172 - (* Parser *) 208 + let equal a b = Date.equal a.date b.date && Time.equal a.time b.time 1173 209 1174 - type parser = { 1175 - lexer : lexer; 1176 - mutable current : token; 1177 - mutable peeked : bool; 1178 - } 210 + let compare a b = 211 + Date.compare a.date b.date <?> lazy (Time.compare a.time b.time) 1179 212 1180 - let make_parser lexer = 1181 - { lexer; current = Tok_eof; peeked = false } 213 + let to_string dt = 214 + Printf.sprintf "%sT%s" (Date.to_string dt.date) (Time.to_string dt.time) 1182 215 1183 - let peek_token p = 1184 - if not p.peeked then begin 1185 - p.current <- next_token p.lexer; 1186 - p.peeked <- true 1187 - end; 1188 - p.current 216 + let pp fmt dt = Format.pp_print_string fmt (to_string dt) 1189 217 1190 - let consume_token p = 1191 - let tok = peek_token p in 1192 - p.peeked <- false; 1193 - tok 218 + let of_string s = 219 + match find_datetime_sep s with 220 + | None -> Error "missing date/time separator" 221 + | Some idx -> 222 + let date_str = String.sub s 0 idx in 223 + let time_str = String.sub s (idx + 1) (String.length s - idx - 1) in 224 + Result.bind (Date.of_string date_str) @@ fun date -> 225 + Result.bind (Time.of_string time_str) @@ fun time -> 226 + Ok { date; time } 227 + end 1194 228 1195 - (* Check if next raw character (without skipping whitespace) matches *) 1196 - let next_raw_char_is p c = 1197 - p.lexer.pos < p.lexer.input_len && get_char p.lexer p.lexer.pos = c 229 + (* ---- Codec error type ---- *) 1198 230 1199 - let expect_token p expected = 1200 - let tok = consume_token p in 1201 - if tok <> expected then 1202 - failwith (Printf.sprintf "Expected %s" (match expected with 1203 - | Tok_equals -> "=" 1204 - | Tok_rbracket -> "]" 1205 - | Tok_rbrace -> "}" 1206 - | Tok_newline -> "newline" 1207 - | _ -> "token")) 231 + type codec_error = 232 + | Type_mismatch of { expected : string; got : string } 233 + | Missing_member of string 234 + | Unknown_member of string [@warning "-37"] 235 + | Value_error of string 236 + | Int_overflow of int64 237 + | Parse_error of string [@warning "-37"] 1208 238 1209 - let skip_newlines p = 1210 - while peek_token p = Tok_newline do 1211 - ignore (consume_token p) 1212 - done 239 + let codec_error_to_string = function 240 + | Type_mismatch { expected; got } -> 241 + Printf.sprintf "type mismatch: expected %s, got %s" expected got 242 + | Missing_member name -> 243 + Printf.sprintf "missing required member: %s" name 244 + | Unknown_member name -> 245 + Printf.sprintf "unknown member: %s" name 246 + | Value_error msg -> msg 247 + | Int_overflow n -> 248 + Printf.sprintf "integer overflow: %Ld" n 249 + | Parse_error msg -> 250 + Printf.sprintf "parse error: %s" msg 1213 251 1214 - (* Parse a single key segment (bare, basic string, literal string, or integer) *) 1215 - (* Note: Tok_float is handled specially in parse_dotted_key *) 1216 - let parse_key_segment p = 1217 - match peek_token p with 1218 - | Tok_bare_key s -> ignore (consume_token p); [s] 1219 - | Tok_basic_string s -> ignore (consume_token p); [s] 1220 - | Tok_literal_string s -> ignore (consume_token p); [s] 1221 - | Tok_integer (_i, orig_str) -> ignore (consume_token p); [orig_str] 1222 - | Tok_float (f, orig_str) -> 1223 - (* Float in key context - use original string to preserve exact key parts *) 1224 - ignore (consume_token p); 1225 - if Float.is_nan f then ["nan"] 1226 - else if f = Float.infinity then ["inf"] 1227 - else if f = Float.neg_infinity then ["-inf"] 1228 - else begin 1229 - (* Remove underscores from original string and split on dot *) 1230 - let s = String.concat "" (String.split_on_char '_' orig_str) in 1231 - if String.contains s 'e' || String.contains s 'E' then 1232 - (* Has exponent, treat as single key *) 1233 - [s] 1234 - else if String.contains s '.' then 1235 - (* Split on decimal point for dotted key *) 1236 - String.split_on_char '.' s 1237 - else 1238 - (* No decimal point, single integer key *) 1239 - [s] 1240 - end 1241 - | Tok_date_local s -> ignore (consume_token p); [s] 1242 - | Tok_datetime s -> ignore (consume_token p); [s] 1243 - | Tok_datetime_local s -> ignore (consume_token p); [s] 1244 - | Tok_time_local s -> ignore (consume_token p); [s] 1245 - | Tok_ml_basic_string _ -> failwith "Multiline strings are not allowed as keys" 1246 - | Tok_ml_literal_string _ -> failwith "Multiline strings are not allowed as keys" 1247 - | _ -> failwith "Expected key" 252 + (* ---- Codec type ---- *) 1248 253 1249 - (* Parse a dotted key - returns list of key strings *) 1250 - let parse_dotted_key p = 1251 - let first_keys = parse_key_segment p in 1252 - let rec loop acc = 1253 - match peek_token p with 1254 - | Tok_dot -> 1255 - ignore (consume_token p); 1256 - let keys = parse_key_segment p in 1257 - loop (List.rev_append keys acc) 1258 - | _ -> List.rev acc 1259 - in 1260 - let rest = loop [] in 1261 - first_keys @ rest 254 + type 'a t = { 255 + kind : string; 256 + doc : string; 257 + dec : Toml.t -> ('a, codec_error) result; 258 + enc : 'a -> Toml.t; 259 + } 1262 260 1263 - let rec parse_value p = 1264 - match peek_token p with 1265 - | Tok_basic_string s -> ignore (consume_token p); String s 1266 - | Tok_literal_string s -> ignore (consume_token p); String s 1267 - | Tok_ml_basic_string s -> ignore (consume_token p); String s 1268 - | Tok_ml_literal_string s -> ignore (consume_token p); String s 1269 - | Tok_integer (i, _) -> ignore (consume_token p); Int i 1270 - | Tok_float (f, _) -> ignore (consume_token p); Float f 1271 - | Tok_datetime s -> ignore (consume_token p); Datetime s 1272 - | Tok_datetime_local s -> ignore (consume_token p); Datetime_local s 1273 - | Tok_date_local s -> ignore (consume_token p); Date_local s 1274 - | Tok_time_local s -> ignore (consume_token p); Time_local s 1275 - | Tok_lbracket -> parse_array p 1276 - | Tok_lbrace -> parse_inline_table p 1277 - | Tok_bare_key s -> 1278 - (* Interpret bare keys as boolean, float keywords, or numbers in value context *) 1279 - ignore (consume_token p); 1280 - (match s with 1281 - | "true" -> Bool true 1282 - | "false" -> Bool false 1283 - | "inf" -> Float Float.infinity 1284 - | "nan" -> Float Float.nan 1285 - | _ -> 1286 - (* Validate underscore placement in the original string *) 1287 - let validate_underscores str = 1288 - let len = String.length str in 1289 - if len > 0 && str.[0] = '_' then 1290 - failwith "Leading underscore not allowed in number"; 1291 - if len > 0 && str.[len - 1] = '_' then 1292 - failwith "Trailing underscore not allowed in number"; 1293 - for i = 0 to len - 2 do 1294 - if str.[i] = '_' && str.[i + 1] = '_' then 1295 - failwith "Double underscore not allowed in number"; 1296 - (* Underscore must be between digits (not next to 'e', 'E', '.', 'x', 'o', 'b', etc.) *) 1297 - if str.[i] = '_' then begin 1298 - let prev = if i > 0 then Some str.[i - 1] else None in 1299 - let next = Some str.[i + 1] in 1300 - let is_digit_char c = c >= '0' && c <= '9' in 1301 - let is_hex_char c = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') in 1302 - (* For hex numbers, underscore can be between hex digits *) 1303 - let has_hex_prefix = len > 2 && str.[0] = '0' && (str.[1] = 'x' || str.[1] = 'X') in 1304 - match prev, next with 1305 - | Some p, Some n when has_hex_prefix && is_hex_char p && is_hex_char n -> () 1306 - | Some p, Some n when is_digit_char p && is_digit_char n -> () 1307 - | _ -> failwith "Underscore must be between digits" 1308 - end 1309 - done 1310 - in 1311 - validate_underscores s; 1312 - (* Try to parse as a number - bare keys like "10e3" should be floats *) 1313 - let s_no_underscore = String.concat "" (String.split_on_char '_' s) in 1314 - let len = String.length s_no_underscore in 1315 - if len > 0 then 1316 - let c0 = s_no_underscore.[0] in 1317 - (* Must start with digit for it to be a number in value context *) 1318 - if c0 >= '0' && c0 <= '9' then begin 1319 - (* Check for leading zeros *) 1320 - if len > 1 && c0 = '0' && s_no_underscore.[1] >= '0' && s_no_underscore.[1] <= '9' then 1321 - failwith "Leading zeros not allowed" 1322 - else 1323 - try 1324 - (* Try to parse as float (handles scientific notation) *) 1325 - if String.contains s_no_underscore '.' || 1326 - String.contains s_no_underscore 'e' || 1327 - String.contains s_no_underscore 'E' then 1328 - Float (float_of_string s_no_underscore) 1329 - else 1330 - Int (Int64.of_string s_no_underscore) 1331 - with _ -> 1332 - failwith (Printf.sprintf "Unexpected bare key '%s' as value" s) 1333 - end else 1334 - failwith (Printf.sprintf "Unexpected bare key '%s' as value" s) 1335 - else 1336 - failwith (Printf.sprintf "Unexpected bare key '%s' as value" s)) 1337 - | _ -> failwith "Expected value" 261 + let kind c = c.kind 262 + let doc c = c.doc 1338 263 1339 - and parse_array p = 1340 - ignore (consume_token p); (* [ *) 1341 - skip_newlines p; 1342 - let rec loop acc = 1343 - match peek_token p with 1344 - | Tok_rbracket -> 1345 - ignore (consume_token p); 1346 - Array (List.rev acc) 1347 - | _ -> 1348 - let v = parse_value p in 1349 - skip_newlines p; 1350 - match peek_token p with 1351 - | Tok_comma -> 1352 - ignore (consume_token p); 1353 - skip_newlines p; 1354 - loop (v :: acc) 1355 - | Tok_rbracket -> 1356 - ignore (consume_token p); 1357 - Array (List.rev (v :: acc)) 1358 - | _ -> failwith "Expected ',' or ']' in array" 1359 - in 1360 - loop [] 1361 - 1362 - and parse_inline_table p = 1363 - ignore (consume_token p); (* { *) 1364 - skip_newlines p; 1365 - (* Track explicitly defined keys - can't be extended with dotted keys *) 1366 - let defined_inline = ref [] in 1367 - let rec loop acc = 1368 - match peek_token p with 1369 - | Tok_rbrace -> 1370 - ignore (consume_token p); 1371 - Table (List.rev acc) 1372 - | _ -> 1373 - let keys = parse_dotted_key p in 1374 - skip_ws p; 1375 - expect_token p Tok_equals; 1376 - skip_ws p; 1377 - let v = parse_value p in 1378 - (* Check if trying to extend a previously-defined inline table *) 1379 - (match keys with 1380 - | first_key :: _ :: _ -> 1381 - (* Multi-key dotted path - check if first key is already defined *) 1382 - if List.mem first_key !defined_inline then 1383 - failwith (Printf.sprintf "Cannot extend inline table '%s' with dotted key" first_key) 1384 - | _ -> ()); 1385 - (* If this is a direct assignment to a key, track it *) 1386 - (match keys with 1387 - | [k] -> 1388 - if List.mem k !defined_inline then 1389 - failwith (Printf.sprintf "Duplicate key '%s' in inline table" k); 1390 - defined_inline := k :: !defined_inline 1391 - | _ -> ()); 1392 - let entry = build_nested_table keys v in 1393 - (* Merge the entry with existing entries (for dotted keys with common prefix) *) 1394 - let acc = merge_entry_into_table acc entry in 1395 - skip_newlines p; 1396 - match peek_token p with 1397 - | Tok_comma -> 1398 - ignore (consume_token p); 1399 - skip_newlines p; 1400 - loop acc 1401 - | Tok_rbrace -> 1402 - ignore (consume_token p); 1403 - Table (List.rev acc) 1404 - | _ -> failwith "Expected ',' or '}' in inline table" 1405 - in 1406 - loop [] 264 + let with_doc ?kind:k ?doc:d c = 265 + { c with 266 + kind = Option.value ~default:c.kind k; 267 + doc = Option.value ~default:c.doc d } 1407 268 1408 - and skip_ws _p = 1409 - (* Skip whitespace in token stream - handled by lexer but needed for lookahead *) 1410 - () 269 + (* ---- Type helpers ---- *) 1411 270 1412 - and build_nested_table keys value = 1413 - match keys with 1414 - | [] -> failwith "Empty key" 1415 - | [k] -> (k, value) 1416 - | k :: rest -> 1417 - (k, Table [build_nested_table rest value]) 271 + let type_name = function 272 + | Toml.String _ -> "string" 273 + | Toml.Int _ -> "integer" 274 + | Toml.Float _ -> "float" 275 + | Toml.Bool _ -> "boolean" 276 + | Toml.Datetime _ -> "datetime" 277 + | Toml.Datetime_local _ -> "datetime-local" 278 + | Toml.Date_local _ -> "date-local" 279 + | Toml.Time_local _ -> "time-local" 280 + | Toml.Array _ -> "array" 281 + | Toml.Table _ -> "table" 1418 282 1419 - (* Merge two TOML values - used for combining dotted keys in inline tables *) 1420 - and merge_toml_values v1 v2 = 1421 - match v1, v2 with 1422 - | Table entries1, Table entries2 -> 1423 - (* Merge the entries *) 1424 - let merged = List.fold_left (fun acc (k, v) -> 1425 - match List.assoc_opt k acc with 1426 - | Some existing -> 1427 - (* Key exists - try to merge if both are tables *) 1428 - let merged_v = merge_toml_values existing v in 1429 - (k, merged_v) :: List.remove_assoc k acc 1430 - | None -> 1431 - (k, v) :: acc 1432 - ) entries1 entries2 in 1433 - Table (List.rev merged) 1434 - | _, _ -> 1435 - (* Can't merge non-table values with same key *) 1436 - failwith "Conflicting keys in inline table" 1437 - 1438 - (* Merge a single entry into an existing table *) 1439 - and merge_entry_into_table entries (k, v) = 1440 - match List.assoc_opt k entries with 1441 - | Some existing -> 1442 - let merged_v = merge_toml_values existing v in 1443 - (k, merged_v) :: List.remove_assoc k entries 1444 - | None -> 1445 - (k, v) :: entries 1446 - 1447 - let validate_datetime_string s = 1448 - (* Parse and validate date portion *) 1449 - if String.length s >= 10 then begin 1450 - let year = int_of_string (String.sub s 0 4) in 1451 - let month = int_of_string (String.sub s 5 2) in 1452 - let day = int_of_string (String.sub s 8 2) in 1453 - validate_date year month day; 1454 - (* Parse and validate time portion if present *) 1455 - if String.length s >= 16 then begin 1456 - let time_start = if s.[10] = 'T' || s.[10] = 't' || s.[10] = ' ' then 11 else 10 in 1457 - let hour = int_of_string (String.sub s time_start 2) in 1458 - let minute = int_of_string (String.sub s (time_start + 3) 2) in 1459 - let second = 1460 - if String.length s >= time_start + 8 && s.[time_start + 5] = ':' then 1461 - int_of_string (String.sub s (time_start + 6) 2) 1462 - else 0 1463 - in 1464 - validate_time hour minute second 1465 - end 1466 - end 1467 - 1468 - let validate_date_string s = 1469 - if String.length s >= 10 then begin 1470 - let year = int_of_string (String.sub s 0 4) in 1471 - let month = int_of_string (String.sub s 5 2) in 1472 - let day = int_of_string (String.sub s 8 2) in 1473 - validate_date year month day 1474 - end 1475 - 1476 - let validate_time_string s = 1477 - if String.length s >= 5 then begin 1478 - let hour = int_of_string (String.sub s 0 2) in 1479 - let minute = int_of_string (String.sub s 3 2) in 1480 - let second = 1481 - if String.length s >= 8 && s.[5] = ':' then 1482 - int_of_string (String.sub s 6 2) 1483 - else 0 1484 - in 1485 - validate_time hour minute second 1486 - end 283 + (* ---- Base codecs ---- *) 1487 284 1488 - (* Table management for the parser *) 1489 - type table_state = { 1490 - mutable values : (string * t) list; 1491 - subtables : (string, table_state) Hashtbl.t; 1492 - mutable is_array : bool; 1493 - mutable is_inline : bool; 1494 - mutable defined : bool; (* Has this table been explicitly defined with [table]? *) 1495 - mutable closed : bool; (* Closed to extension via dotted keys from parent *) 1496 - mutable array_elements : table_state list; (* For arrays of tables *) 285 + let bool = { 286 + kind = "boolean"; 287 + doc = ""; 288 + dec = (function 289 + | Toml.Bool b -> Ok b 290 + | v -> Error (Type_mismatch { expected = "boolean"; got = type_name v })); 291 + enc = (fun b -> Toml.Bool b); 1497 292 } 1498 293 1499 - let create_table_state () = { 1500 - values = []; 1501 - subtables = Hashtbl.create 16; 1502 - is_array = false; 1503 - is_inline = false; 1504 - defined = false; 1505 - closed = false; 1506 - array_elements = []; 294 + let int = { 295 + kind = "integer"; 296 + doc = ""; 297 + dec = (function 298 + | Toml.Int i -> 299 + if i >= Int64.of_int min_int && i <= Int64.of_int max_int then 300 + Ok (Int64.to_int i) 301 + else Error (Int_overflow i) 302 + | v -> Error (Type_mismatch { expected = "integer"; got = type_name v })); 303 + enc = (fun i -> Toml.Int (Int64.of_int i)); 1507 304 } 1508 305 1509 - let rec get_or_create_table state keys create_intermediate = 1510 - match keys with 1511 - | [] -> state 1512 - | [k] -> 1513 - (* Check if key exists as a value *) 1514 - if List.mem_assoc k state.values then 1515 - failwith (Printf.sprintf "Cannot use value '%s' as a table" k); 1516 - (match Hashtbl.find_opt state.subtables k with 1517 - | Some sub -> sub 1518 - | None -> 1519 - let sub = create_table_state () in 1520 - Hashtbl.add state.subtables k sub; 1521 - sub) 1522 - | k :: rest -> 1523 - (* Check if key exists as a value *) 1524 - if List.mem_assoc k state.values then 1525 - failwith (Printf.sprintf "Cannot use value '%s' as a table" k); 1526 - let sub = match Hashtbl.find_opt state.subtables k with 1527 - | Some sub -> sub 1528 - | None -> 1529 - let sub = create_table_state () in 1530 - Hashtbl.add state.subtables k sub; 1531 - sub 1532 - in 1533 - if create_intermediate && not sub.defined then 1534 - sub.defined <- false; (* Mark as implicitly defined *) 1535 - get_or_create_table sub rest create_intermediate 306 + let int32 = { 307 + kind = "integer"; 308 + doc = ""; 309 + dec = (function 310 + | Toml.Int i -> 311 + if i >= Int64.of_int32 Int32.min_int && i <= Int64.of_int32 Int32.max_int then 312 + Ok (Int64.to_int32 i) 313 + else Error (Int_overflow i) 314 + | v -> Error (Type_mismatch { expected = "integer"; got = type_name v })); 315 + enc = (fun i -> Toml.Int (Int64.of_int32 i)); 316 + } 1536 317 1537 - (* Like get_or_create_table but marks tables as defined (for dotted keys) *) 1538 - (* Dotted keys mark tables as "defined" (can't re-define with [table]) but not "closed" *) 1539 - let rec get_or_create_table_for_dotted_key state keys = 1540 - match keys with 1541 - | [] -> state 1542 - | [k] -> 1543 - (* Check if key exists as a value *) 1544 - if List.mem_assoc k state.values then 1545 - failwith (Printf.sprintf "Cannot use value '%s' as a table" k); 1546 - (match Hashtbl.find_opt state.subtables k with 1547 - | Some sub -> 1548 - (* Check if it's an array of tables (can't extend with dotted keys) *) 1549 - if sub.is_array then 1550 - failwith (Printf.sprintf "Cannot extend array of tables '%s' using dotted keys" k); 1551 - (* Check if it's closed (explicitly defined with [table] header) *) 1552 - if sub.closed then 1553 - failwith (Printf.sprintf "Cannot extend table '%s' using dotted keys" k); 1554 - if sub.is_inline then 1555 - failwith (Printf.sprintf "Cannot extend inline table '%s'" k); 1556 - (* Mark as defined by dotted key *) 1557 - sub.defined <- true; 1558 - sub 1559 - | None -> 1560 - let sub = create_table_state () in 1561 - sub.defined <- true; (* Mark as defined by dotted key *) 1562 - Hashtbl.add state.subtables k sub; 1563 - sub) 1564 - | k :: rest -> 1565 - (* Check if key exists as a value *) 1566 - if List.mem_assoc k state.values then 1567 - failwith (Printf.sprintf "Cannot use value '%s' as a table" k); 1568 - let sub = match Hashtbl.find_opt state.subtables k with 1569 - | Some sub -> 1570 - (* Check if it's an array of tables (can't extend with dotted keys) *) 1571 - if sub.is_array then 1572 - failwith (Printf.sprintf "Cannot extend array of tables '%s' using dotted keys" k); 1573 - if sub.closed then 1574 - failwith (Printf.sprintf "Cannot extend table '%s' using dotted keys" k); 1575 - if sub.is_inline then 1576 - failwith (Printf.sprintf "Cannot extend inline table '%s'" k); 1577 - (* Mark as defined by dotted key *) 1578 - sub.defined <- true; 1579 - sub 1580 - | None -> 1581 - let sub = create_table_state () in 1582 - sub.defined <- true; (* Mark as defined by dotted key *) 1583 - Hashtbl.add state.subtables k sub; 1584 - sub 1585 - in 1586 - get_or_create_table_for_dotted_key sub rest 318 + let int64 = { 319 + kind = "integer"; 320 + doc = ""; 321 + dec = (function 322 + | Toml.Int i -> Ok i 323 + | v -> Error (Type_mismatch { expected = "integer"; got = type_name v })); 324 + enc = (fun i -> Toml.Int i); 325 + } 1587 326 1588 - let rec table_state_to_toml state = 1589 - let subtable_values = Hashtbl.fold (fun k sub acc -> 1590 - let v = 1591 - if sub.is_array then 1592 - Array (List.map table_state_to_toml (get_array_elements sub)) 1593 - else 1594 - table_state_to_toml sub 1595 - in 1596 - (k, v) :: acc 1597 - ) state.subtables [] in 1598 - Table (List.rev state.values @ subtable_values) 327 + let float = { 328 + kind = "float"; 329 + doc = ""; 330 + dec = (function 331 + | Toml.Float f -> Ok f 332 + | v -> Error (Type_mismatch { expected = "float"; got = type_name v })); 333 + enc = (fun f -> Toml.Float f); 334 + } 1599 335 1600 - and get_array_elements state = 1601 - List.rev state.array_elements 1602 - 1603 - (* Main parser function *) 1604 - let parse_toml_from_lexer lexer = 1605 - let parser = make_parser lexer in 1606 - let root = create_table_state () in 1607 - let current_table = ref root in 1608 - (* Stack of array contexts: (full_path, parent_state, array_container) *) 1609 - (* parent_state is where the array lives, array_container is the array table itself *) 1610 - let array_context_stack = ref ([] : (string list * table_state * table_state) list) in 1611 - 1612 - (* Check if keys has a prefix matching the given path *) 1613 - let rec has_prefix keys prefix = 1614 - match keys, prefix with 1615 - | _, [] -> true 1616 - | [], _ -> false 1617 - | k :: krest, p :: prest -> k = p && has_prefix krest prest 1618 - in 1619 - 1620 - (* Remove prefix from keys *) 1621 - let rec remove_prefix keys prefix = 1622 - match keys, prefix with 1623 - | ks, [] -> ks 1624 - | [], _ -> [] 1625 - | _ :: krest, _ :: prest -> remove_prefix krest prest 1626 - in 1627 - 1628 - (* Find matching array context for the given keys *) 1629 - let find_array_context keys = 1630 - (* Stack is newest-first, so first match is the innermost (longest) prefix *) 1631 - let rec find stack = 1632 - match stack with 1633 - | [] -> None 1634 - | (path, parent, container) :: rest -> 1635 - if keys = path then 1636 - (* Exact match - adding sibling element *) 1637 - Some (`Sibling (path, parent, container)) 1638 - else if has_prefix keys path && List.length keys > List.length path then 1639 - (* Proper prefix - nested table/array within current element *) 1640 - let current_entry = List.hd container.array_elements in 1641 - Some (`Nested (path, current_entry)) 1642 - else 1643 - find rest 1644 - in 1645 - find !array_context_stack 1646 - in 1647 - 1648 - (* Pop array contexts that are no longer valid for the given keys *) 1649 - let rec pop_invalid_contexts keys = 1650 - match !array_context_stack with 1651 - | [] -> () 1652 - | (path, _, _) :: rest -> 1653 - if not (has_prefix keys path) then begin 1654 - array_context_stack := rest; 1655 - pop_invalid_contexts keys 1656 - end 1657 - in 336 + let number = { 337 + kind = "number"; 338 + doc = ""; 339 + dec = (function 340 + | Toml.Float f -> Ok f 341 + | Toml.Int i -> Ok (Int64.to_float i) 342 + | v -> Error (Type_mismatch { expected = "number"; got = type_name v })); 343 + enc = (fun f -> Toml.Float f); 344 + } 1658 345 1659 - let rec parse_document () = 1660 - skip_newlines parser; 1661 - match peek_token parser with 1662 - | Tok_eof -> () 1663 - | Tok_lbracket -> 1664 - (* Check for array of tables [[...]] vs table [...] *) 1665 - ignore (consume_token parser); 1666 - (* For [[, the two brackets must be adjacent (no whitespace) *) 1667 - let is_adjacent_bracket = next_raw_char_is parser '[' in 1668 - (match peek_token parser with 1669 - | Tok_lbracket when not is_adjacent_bracket -> 1670 - (* The next [ was found after whitespace - this is invalid syntax like [ [table]] *) 1671 - failwith "Invalid table header syntax" 1672 - | Tok_lbracket -> 1673 - (* Array of tables - brackets are adjacent *) 1674 - ignore (consume_token parser); 1675 - let keys = parse_dotted_key parser in 1676 - expect_token parser Tok_rbracket; 1677 - (* Check that closing ]] are adjacent (no whitespace) *) 1678 - if not (next_raw_char_is parser ']') then 1679 - failwith "Invalid array of tables syntax (space in ]])"; 1680 - expect_token parser Tok_rbracket; 1681 - skip_to_newline parser; 1682 - (* Pop contexts that are no longer valid for these keys *) 1683 - pop_invalid_contexts keys; 1684 - (* Check array context for this path *) 1685 - (match find_array_context keys with 1686 - | Some (`Sibling (path, _parent, container)) -> 1687 - (* Adding another element to an existing array *) 1688 - let new_entry = create_table_state () in 1689 - container.array_elements <- new_entry :: container.array_elements; 1690 - current_table := new_entry; 1691 - (* Update the stack entry with new current element (by re-adding) *) 1692 - array_context_stack := List.map (fun (p, par, cont) -> 1693 - if p = path then (p, par, cont) else (p, par, cont) 1694 - ) !array_context_stack 1695 - | Some (`Nested (parent_path, parent_entry)) -> 1696 - (* Sub-array within current array element *) 1697 - let relative_keys = remove_prefix keys parent_path in 1698 - let array_table = get_or_create_table parent_entry relative_keys true in 1699 - (* Check if trying to convert a non-array table to array *) 1700 - if array_table.defined && not array_table.is_array then 1701 - failwith (Printf.sprintf "Cannot define '%s' as array of tables; already defined as table" (String.concat "." keys)); 1702 - if (array_table.values <> [] || Hashtbl.length array_table.subtables > 0) && not array_table.is_array then 1703 - failwith (Printf.sprintf "Cannot define '%s' as array of tables; already has content" (String.concat "." keys)); 1704 - array_table.is_array <- true; 1705 - let new_entry = create_table_state () in 1706 - array_table.array_elements <- new_entry :: array_table.array_elements; 1707 - current_table := new_entry; 1708 - (* Push new context for the nested array *) 1709 - array_context_stack := (keys, parent_entry, array_table) :: !array_context_stack 1710 - | None -> 1711 - (* Top-level array *) 1712 - let array_table = get_or_create_table root keys true in 1713 - (* Check if trying to convert a non-array table to array *) 1714 - if array_table.defined && not array_table.is_array then 1715 - failwith (Printf.sprintf "Cannot define '%s' as array of tables; already defined as table" (String.concat "." keys)); 1716 - if (array_table.values <> [] || Hashtbl.length array_table.subtables > 0) && not array_table.is_array then 1717 - failwith (Printf.sprintf "Cannot define '%s' as array of tables; already has content" (String.concat "." keys)); 1718 - array_table.is_array <- true; 1719 - let entry = create_table_state () in 1720 - array_table.array_elements <- entry :: array_table.array_elements; 1721 - current_table := entry; 1722 - (* Push context for this array *) 1723 - array_context_stack := (keys, root, array_table) :: !array_context_stack); 1724 - parse_document () 1725 - | _ -> 1726 - (* Regular table *) 1727 - let keys = parse_dotted_key parser in 1728 - expect_token parser Tok_rbracket; 1729 - skip_to_newline parser; 1730 - (* Pop contexts that are no longer valid for these keys *) 1731 - pop_invalid_contexts keys; 1732 - (* Check if this table is relative to a current array element *) 1733 - (match find_array_context keys with 1734 - | Some (`Nested (parent_path, parent_entry)) -> 1735 - let relative_keys = remove_prefix keys parent_path in 1736 - if relative_keys <> [] then begin 1737 - let table = get_or_create_table parent_entry relative_keys true in 1738 - if table.is_array then 1739 - failwith (Printf.sprintf "Cannot define '%s' as table; already defined as array of tables" (String.concat "." keys)); 1740 - if table.defined then 1741 - failwith (Printf.sprintf "Table '%s' already defined" (String.concat "." keys)); 1742 - table.defined <- true; 1743 - table.closed <- true; (* Can't extend via dotted keys from parent *) 1744 - current_table := table 1745 - end else begin 1746 - (* Keys equal parent_path - shouldn't happen for regular tables *) 1747 - let table = get_or_create_table root keys true in 1748 - if table.is_array then 1749 - failwith (Printf.sprintf "Cannot define '%s' as table; already defined as array of tables" (String.concat "." keys)); 1750 - if table.defined then 1751 - failwith (Printf.sprintf "Table '%s' already defined" (String.concat "." keys)); 1752 - table.defined <- true; 1753 - table.closed <- true; (* Can't extend via dotted keys from parent *) 1754 - current_table := table 1755 - end 1756 - | Some (`Sibling (_, _, container)) -> 1757 - (* Exact match to an array of tables path - can't define as regular table *) 1758 - if container.is_array then 1759 - failwith (Printf.sprintf "Cannot define '%s' as table; already defined as array of tables" (String.concat "." keys)); 1760 - (* Shouldn't reach here normally *) 1761 - let table = get_or_create_table root keys true in 1762 - if table.defined then 1763 - failwith (Printf.sprintf "Table '%s' already defined" (String.concat "." keys)); 1764 - table.defined <- true; 1765 - table.closed <- true; 1766 - current_table := table 1767 - | None -> 1768 - (* Not in an array context *) 1769 - let table = get_or_create_table root keys true in 1770 - if table.is_array then 1771 - failwith (Printf.sprintf "Cannot define '%s' as table; already defined as array of tables" (String.concat "." keys)); 1772 - if table.defined then 1773 - failwith (Printf.sprintf "Table '%s' already defined" (String.concat "." keys)); 1774 - table.defined <- true; 1775 - table.closed <- true; (* Can't extend via dotted keys from parent *) 1776 - current_table := table; 1777 - (* Clear array context stack if we left all array contexts *) 1778 - if not (List.exists (fun (p, _, _) -> has_prefix keys p) !array_context_stack) then 1779 - array_context_stack := []); 1780 - parse_document ()) 1781 - | Tok_bare_key _ | Tok_basic_string _ | Tok_literal_string _ 1782 - | Tok_integer _ | Tok_float _ | Tok_date_local _ | Tok_datetime _ 1783 - | Tok_datetime_local _ | Tok_time_local _ -> 1784 - (* Key-value pair - key can be bare, quoted, or numeric *) 1785 - let keys = parse_dotted_key parser in 1786 - expect_token parser Tok_equals; 1787 - let value = parse_value parser in 1788 - skip_to_newline parser; 1789 - (* Add value to current table - check for duplicates first *) 1790 - let add_value_to_table tbl key v = 1791 - if List.mem_assoc key tbl.values then 1792 - failwith (Printf.sprintf "Duplicate key: %s" key); 1793 - (match Hashtbl.find_opt tbl.subtables key with 1794 - | Some sub -> 1795 - if sub.is_array then 1796 - failwith (Printf.sprintf "Cannot redefine array of tables '%s' as a value" key) 1797 - else 1798 - failwith (Printf.sprintf "Cannot redefine table '%s' as a value" key) 1799 - | None -> ()); 1800 - tbl.values <- (key, v) :: tbl.values 1801 - in 1802 - (match keys with 1803 - | [] -> failwith "Empty key" 1804 - | [k] -> 1805 - add_value_to_table !current_table k value 1806 - | _ -> 1807 - let parent_keys = List.rev (List.tl (List.rev keys)) in 1808 - let final_key = List.hd (List.rev keys) in 1809 - (* Use get_or_create_table_for_dotted_key to check for closed tables *) 1810 - let parent = get_or_create_table_for_dotted_key !current_table parent_keys in 1811 - add_value_to_table parent final_key value); 1812 - parse_document () 1813 - | _tok -> 1814 - failwith (Printf.sprintf "Unexpected token at line %d" parser.lexer.line) 346 + let string = { 347 + kind = "string"; 348 + doc = ""; 349 + dec = (function 350 + | Toml.String s -> Ok s 351 + | v -> Error (Type_mismatch { expected = "string"; got = type_name v })); 352 + enc = (fun s -> Toml.String s); 353 + } 1815 354 1816 - and skip_to_newline parser = 1817 - skip_ws_and_comments parser.lexer; 1818 - match peek_token parser with 1819 - | Tok_newline -> ignore (consume_token parser) 1820 - | Tok_eof -> () 1821 - | _ -> failwith "Expected newline after value" 1822 - in 355 + (* ---- Datetime codecs ---- *) 1823 356 1824 - parse_document (); 1825 - table_state_to_toml root 357 + let datetime = { 358 + kind = "datetime"; 359 + doc = ""; 360 + dec = (function 361 + | Toml.Datetime s -> 362 + (match Datetime.of_string s with 363 + | Ok dt -> Ok dt 364 + | Error msg -> Error (Value_error msg)) 365 + | v -> Error (Type_mismatch { expected = "datetime"; got = type_name v })); 366 + enc = (fun dt -> Toml.Datetime (Datetime.to_string dt)); 367 + } 1826 368 1827 - (* Parse TOML from string - creates lexer internally *) 1828 - let parse_toml input = 1829 - let lexer = make_lexer input in 1830 - parse_toml_from_lexer lexer 369 + let datetime_local = { 370 + kind = "datetime-local"; 371 + doc = ""; 372 + dec = (function 373 + | Toml.Datetime_local s -> 374 + (match Datetime_local.of_string s with 375 + | Ok dt -> Ok dt 376 + | Error msg -> Error (Value_error msg)) 377 + | v -> Error (Type_mismatch { expected = "datetime-local"; got = type_name v })); 378 + enc = (fun dt -> Toml.Datetime_local (Datetime_local.to_string dt)); 379 + } 1831 380 1832 - (* Parse TOML directly from Bytes.Reader - no intermediate string *) 1833 - let parse_toml_from_reader ?file r = 1834 - let lexer = make_lexer_from_reader ?file r in 1835 - parse_toml_from_lexer lexer 381 + let date_local = { 382 + kind = "date-local"; 383 + doc = ""; 384 + dec = (function 385 + | Toml.Date_local s -> 386 + (match Date.of_string s with 387 + | Ok d -> Ok d 388 + | Error msg -> Error (Value_error msg)) 389 + | v -> Error (Type_mismatch { expected = "date-local"; got = type_name v })); 390 + enc = (fun d -> Toml.Date_local (Date.to_string d)); 391 + } 1836 392 1837 - (* Convert TOML to tagged JSON for toml-test compatibility *) 1838 - let rec toml_to_tagged_json value = 1839 - match value with 1840 - | String s -> 1841 - Printf.sprintf "{\"type\":\"string\",\"value\":%s}" (json_encode_string s) 1842 - | Int i -> 1843 - Printf.sprintf "{\"type\":\"integer\",\"value\":\"%Ld\"}" i 1844 - | Float f -> 1845 - let value_str = 1846 - (* Normalize exponent format - lowercase e, keep + for positive exponents *) 1847 - let format_exp s = 1848 - let buf = Buffer.create (String.length s + 1) in 1849 - let i = ref 0 in 1850 - while !i < String.length s do 1851 - let c = s.[!i] in 1852 - if c = 'E' then begin 1853 - Buffer.add_char buf 'e'; 1854 - (* Add + if next char is a digit (no sign present) *) 1855 - if !i + 1 < String.length s then begin 1856 - let next = s.[!i + 1] in 1857 - if next >= '0' && next <= '9' then 1858 - Buffer.add_char buf '+' 1859 - end 1860 - end else if c = 'e' then begin 1861 - Buffer.add_char buf 'e'; 1862 - (* Add + if next char is a digit (no sign present) *) 1863 - if !i + 1 < String.length s then begin 1864 - let next = s.[!i + 1] in 1865 - if next >= '0' && next <= '9' then 1866 - Buffer.add_char buf '+' 1867 - end 1868 - end else 1869 - Buffer.add_char buf c; 1870 - incr i 1871 - done; 1872 - Buffer.contents buf 1873 - in 1874 - if Float.is_nan f then "nan" 1875 - else if f = Float.infinity then "inf" 1876 - else if f = Float.neg_infinity then "-inf" 1877 - else if f = 0.0 then 1878 - (* Special case for zero - output "0" or "-0" *) 1879 - if 1.0 /. f = Float.neg_infinity then "-0" else "0" 1880 - else if Float.is_integer f then 1881 - (* Integer floats - decide on representation *) 1882 - let abs_f = Float.abs f in 1883 - if abs_f = 9007199254740991.0 then 1884 - (* Exact max safe integer - output without .0 per toml-test expectation *) 1885 - Printf.sprintf "%.0f" f 1886 - else if abs_f >= 1e6 then 1887 - (* Use scientific notation for numbers >= 1e6 *) 1888 - (* Start with precision 0 to get XeN format (integer mantissa) *) 1889 - let rec try_exp_precision prec = 1890 - if prec > 17 then format_exp (Printf.sprintf "%.17e" f) 1891 - else 1892 - let s = format_exp (Printf.sprintf "%.*e" prec f) in 1893 - if float_of_string s = f then s 1894 - else try_exp_precision (prec + 1) 1895 - in 1896 - try_exp_precision 0 1897 - else if abs_f >= 2.0 then 1898 - (* Integer floats >= 2 - output with .0 suffix *) 1899 - Printf.sprintf "%.1f" f 1900 - else 1901 - (* Integer floats 0, 1, -1 - output without .0 suffix *) 1902 - Printf.sprintf "%.0f" f 1903 - else 1904 - (* Non-integer float *) 1905 - let abs_f = Float.abs f in 1906 - let use_scientific = abs_f >= 1e10 || (abs_f < 1e-4 && abs_f > 0.0) in 1907 - if use_scientific then 1908 - let rec try_exp_precision prec = 1909 - if prec > 17 then format_exp (Printf.sprintf "%.17e" f) 1910 - else 1911 - let s = format_exp (Printf.sprintf "%.*e" prec f) in 1912 - if float_of_string s = f then s 1913 - else try_exp_precision (prec + 1) 1914 - in 1915 - try_exp_precision 1 1916 - else 1917 - (* Prefer decimal notation for reasonable range *) 1918 - (* Try shortest decimal first *) 1919 - let rec try_decimal_precision prec = 1920 - if prec > 17 then None 1921 - else 1922 - let s = Printf.sprintf "%.*f" prec f in 1923 - (* Remove trailing zeros but keep at least one decimal place *) 1924 - let s = 1925 - let len = String.length s in 1926 - let dot_pos = try String.index s '.' with Not_found -> len in 1927 - let rec find_last_nonzero i = 1928 - if i <= dot_pos then dot_pos + 2 (* Keep at least X.0 *) 1929 - else if s.[i] <> '0' then i + 1 1930 - else find_last_nonzero (i - 1) 1931 - in 1932 - let end_pos = min len (find_last_nonzero (len - 1)) in 1933 - String.sub s 0 end_pos 1934 - in 1935 - (* Ensure there's a decimal point with at least one digit after *) 1936 - let s = 1937 - if not (String.contains s '.') then s ^ ".0" 1938 - else if s.[String.length s - 1] = '.' then s ^ "0" 1939 - else s 1940 - in 1941 - if float_of_string s = f then Some s 1942 - else try_decimal_precision (prec + 1) 1943 - in 1944 - let decimal = try_decimal_precision 1 in 1945 - (* Always prefer decimal notation if it works *) 1946 - match decimal with 1947 - | Some d -> d 1948 - | None -> 1949 - (* Fall back to shortest representation *) 1950 - let rec try_precision prec = 1951 - if prec > 17 then Printf.sprintf "%.17g" f 1952 - else 1953 - let s = Printf.sprintf "%.*g" prec f in 1954 - if float_of_string s = f then s 1955 - else try_precision (prec + 1) 1956 - in 1957 - try_precision 1 1958 - in 1959 - Printf.sprintf "{\"type\":\"float\",\"value\":\"%s\"}" value_str 1960 - | Bool b -> 1961 - Printf.sprintf "{\"type\":\"bool\",\"value\":\"%s\"}" (if b then "true" else "false") 1962 - | Datetime s -> 1963 - validate_datetime_string s; 1964 - Printf.sprintf "{\"type\":\"datetime\",\"value\":\"%s\"}" s 1965 - | Datetime_local s -> 1966 - validate_datetime_string s; 1967 - Printf.sprintf "{\"type\":\"datetime-local\",\"value\":\"%s\"}" s 1968 - | Date_local s -> 1969 - validate_date_string s; 1970 - Printf.sprintf "{\"type\":\"date-local\",\"value\":\"%s\"}" s 1971 - | Time_local s -> 1972 - validate_time_string s; 1973 - Printf.sprintf "{\"type\":\"time-local\",\"value\":\"%s\"}" s 1974 - | Array items -> 1975 - let json_items = List.map toml_to_tagged_json items in 1976 - Printf.sprintf "[%s]" (String.concat "," json_items) 1977 - | Table pairs -> 1978 - let json_pairs = List.map (fun (k, v) -> 1979 - Printf.sprintf "%s:%s" (json_encode_string k) (toml_to_tagged_json v) 1980 - ) pairs in 1981 - Printf.sprintf "{%s}" (String.concat "," json_pairs) 393 + let time_local = { 394 + kind = "time-local"; 395 + doc = ""; 396 + dec = (function 397 + | Toml.Time_local s -> 398 + (match Time.of_string s with 399 + | Ok t -> Ok t 400 + | Error msg -> Error (Value_error msg)) 401 + | v -> Error (Type_mismatch { expected = "time-local"; got = type_name v })); 402 + enc = (fun t -> Toml.Time_local (Time.to_string t)); 403 + } 1982 404 1983 - and json_encode_string s = 1984 - let buf = Buffer.create (String.length s + 2) in 1985 - Buffer.add_char buf '"'; 1986 - String.iter (fun c -> 1987 - match c with 1988 - | '"' -> Buffer.add_string buf "\\\"" 1989 - | '\\' -> Buffer.add_string buf "\\\\" 1990 - | '\n' -> Buffer.add_string buf "\\n" 1991 - | '\r' -> Buffer.add_string buf "\\r" 1992 - | '\t' -> Buffer.add_string buf "\\t" 1993 - | '\b' -> Buffer.add_string buf "\\b" (* backspace *) 1994 - | c when Char.code c = 0x0C -> Buffer.add_string buf "\\f" (* formfeed *) 1995 - | c when Char.code c < 0x20 -> 1996 - Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c)) 1997 - | c -> Buffer.add_char buf c 1998 - ) s; 1999 - Buffer.add_char buf '"'; 2000 - Buffer.contents buf 405 + let datetime_string = { 406 + kind = "datetime"; 407 + doc = ""; 408 + dec = (function 409 + | Toml.Datetime s | Toml.Datetime_local s 410 + | Toml.Date_local s | Toml.Time_local s -> Ok s 411 + | v -> Error (Type_mismatch { expected = "datetime"; got = type_name v })); 412 + enc = (fun s -> Toml.Datetime s); (* Default to offset datetime *) 413 + } 2001 414 2002 - (* Tagged JSON to TOML for encoder *) 2003 - let decode_tagged_json_string s = 2004 - (* Simple JSON parser for tagged format *) 2005 - let pos = ref 0 in 2006 - let len = String.length s in 415 + (* ---- Combinators ---- *) 2007 416 2008 - let skip_ws () = 2009 - while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t' || s.[!pos] = '\n' || s.[!pos] = '\r') do 2010 - incr pos 2011 - done 417 + let map ?kind:k ?doc:d ?dec ?enc c = 418 + let kind = Option.value ~default:c.kind k in 419 + let doc = Option.value ~default:c.doc d in 420 + let dec_fn = match dec with 421 + | Some f -> fun v -> Result.map f (c.dec v) 422 + | None -> fun _ -> Error (Value_error "decode not supported") 2012 423 in 2013 - 2014 - let expect c = 2015 - skip_ws (); 2016 - if !pos >= len || s.[!pos] <> c then 2017 - failwith (Printf.sprintf "Expected '%c' at position %d" c !pos); 2018 - incr pos 424 + let enc_fn = match enc with 425 + | Some f -> fun v -> c.enc (f v) 426 + | None -> fun _ -> failwith "encode not supported" 2019 427 in 428 + { kind; doc; dec = dec_fn; enc = enc_fn } 2020 429 2021 - let peek () = 2022 - skip_ws (); 2023 - if !pos >= len then None else Some s.[!pos] 2024 - in 430 + let const ?kind ?doc v = 431 + let kind = Option.value ~default:"constant" kind in 432 + let doc = Option.value ~default:"" doc in 433 + { kind; doc; dec = (fun _ -> Ok v); enc = (fun _ -> Toml.Table []) } 2025 434 2026 - let parse_json_string () = 2027 - skip_ws (); 2028 - expect '"'; 2029 - let buf = Buffer.create 64 in 2030 - while !pos < len && s.[!pos] <> '"' do 2031 - if s.[!pos] = '\\' then begin 2032 - incr pos; 2033 - if !pos >= len then failwith "Unexpected end in string escape"; 2034 - match s.[!pos] with 2035 - | '"' -> Buffer.add_char buf '"'; incr pos 2036 - | '\\' -> Buffer.add_char buf '\\'; incr pos 2037 - | '/' -> Buffer.add_char buf '/'; incr pos 2038 - | 'n' -> Buffer.add_char buf '\n'; incr pos 2039 - | 'r' -> Buffer.add_char buf '\r'; incr pos 2040 - | 't' -> Buffer.add_char buf '\t'; incr pos 2041 - | 'b' -> Buffer.add_char buf '\b'; incr pos 2042 - | 'f' -> Buffer.add_char buf (Char.chr 0x0C); incr pos 2043 - | 'u' -> 2044 - incr pos; 2045 - if !pos + 3 >= len then failwith "Invalid unicode escape"; 2046 - let hex = String.sub s !pos 4 in 2047 - let cp = int_of_string ("0x" ^ hex) in 2048 - Buffer.add_string buf (codepoint_to_utf8 cp); 2049 - pos := !pos + 4 2050 - | c -> failwith (Printf.sprintf "Invalid escape: \\%c" c) 2051 - end else begin 2052 - Buffer.add_char buf s.[!pos]; 2053 - incr pos 2054 - end 2055 - done; 2056 - expect '"'; 2057 - Buffer.contents buf 2058 - in 435 + let enum ?cmp ?kind ?doc assoc = 436 + let cmp = Option.value ~default:Stdlib.compare cmp in 437 + let kind = Option.value ~default:"enum" kind in 438 + let doc = Option.value ~default:"" doc in 439 + let rev_assoc = List.map (fun (s, v) -> (v, s)) assoc in 440 + { 441 + kind; doc; 442 + dec = (function 443 + | Toml.String s -> 444 + (match List.assoc_opt s assoc with 445 + | Some v -> Ok v 446 + | None -> Error (Value_error ("unknown enum value: " ^ s))) 447 + | v -> Error (Type_mismatch { expected = "string"; got = type_name v })); 448 + enc = (fun v -> 449 + match List.find_opt (fun (v', _) -> cmp v v' = 0) rev_assoc with 450 + | Some (_, s) -> Toml.String s 451 + | None -> failwith "enum value not in association list"); 452 + } 2059 453 2060 - (* Convert a tagged JSON object to a TOML primitive if applicable *) 2061 - let convert_tagged_value value = 2062 - match value with 2063 - | Table [("type", String typ); ("value", String v)] 2064 - | Table [("value", String v); ("type", String typ)] -> 2065 - (match typ with 2066 - | "string" -> String v 2067 - | "integer" -> Int (Int64.of_string v) 2068 - | "float" -> 2069 - (match v with 2070 - | "inf" -> Float Float.infinity 2071 - | "-inf" -> Float Float.neg_infinity 2072 - | "nan" -> Float Float.nan 2073 - | _ -> Float (float_of_string v)) 2074 - | "bool" -> Bool (v = "true") 2075 - | "datetime" -> Datetime v 2076 - | "datetime-local" -> Datetime_local v 2077 - | "date-local" -> Date_local v 2078 - | "time-local" -> Time_local v 2079 - | _ -> failwith (Printf.sprintf "Unknown type: %s" typ)) 2080 - | _ -> value 2081 - in 454 + let option ?kind ?doc c = 455 + let kind = Option.value ~default:("optional " ^ c.kind) kind in 456 + let doc = Option.value ~default:c.doc doc in 457 + { 458 + kind; doc; 459 + dec = (fun v -> Result.map Option.some (c.dec v)); 460 + enc = (function 461 + | Some v -> c.enc v 462 + | None -> Toml.Table []); (* Should not be called for None *) 463 + } 2082 464 2083 - let rec parse_value () = 2084 - skip_ws (); 2085 - match peek () with 2086 - | Some '{' -> parse_object () 2087 - | Some '[' -> parse_array () 2088 - | Some '"' -> String (parse_json_string ()) 2089 - | _ -> failwith "Expected value" 465 + let result ~ok ~error = 466 + { 467 + kind = ok.kind ^ " or " ^ error.kind; 468 + doc = ""; 469 + dec = (fun v -> 470 + match ok.dec v with 471 + | Ok x -> Ok (Ok x) 472 + | Error _ -> 473 + match error.dec v with 474 + | Ok x -> Ok (Error x) 475 + | Error e -> Error e); 476 + enc = (function 477 + | Ok x -> ok.enc x 478 + | Error x -> error.enc x); 479 + } 2090 480 2091 - and parse_object () = 2092 - expect '{'; 2093 - skip_ws (); 2094 - if peek () = Some '}' then begin 2095 - incr pos; 2096 - Table [] 2097 - end else begin 2098 - let pairs = ref [] in 2099 - let first = ref true in 2100 - while peek () <> Some '}' do 2101 - if not !first then expect ','; 2102 - first := false; 2103 - skip_ws (); 2104 - let key = parse_json_string () in 2105 - expect ':'; 2106 - let value = parse_value () in 2107 - pairs := (key, convert_tagged_value value) :: !pairs 2108 - done; 2109 - expect '}'; 2110 - Table (List.rev !pairs) 2111 - end 481 + let rec' lazy_c = 482 + { 483 + kind = "recursive"; 484 + doc = ""; 485 + dec = (fun v -> (Lazy.force lazy_c).dec v); 486 + enc = (fun v -> (Lazy.force lazy_c).enc v); 487 + } 2112 488 2113 - and parse_array () = 2114 - expect '['; 2115 - skip_ws (); 2116 - if peek () = Some ']' then begin 2117 - incr pos; 2118 - Array [] 2119 - end else begin 2120 - let items = ref [] in 2121 - let first = ref true in 2122 - while peek () <> Some ']' do 2123 - if not !first then expect ','; 2124 - first := false; 2125 - items := convert_tagged_value (parse_value ()) :: !items 2126 - done; 2127 - expect ']'; 2128 - Array (List.rev !items) 2129 - end 2130 - in 489 + (* ---- Array codecs ---- *) 2131 490 2132 - parse_value () 491 + module Array = struct 492 + type 'a codec = 'a t 2133 493 2134 - (* Streaming TOML encoder - writes directly to a Bytes.Writer *) 494 + type ('array, 'elt) enc = { 495 + fold : 'acc. ('acc -> 'elt -> 'acc) -> 'acc -> 'array -> 'acc 496 + } 2135 497 2136 - let rec write_toml_string w s = 2137 - (* Check if we need to escape *) 2138 - let needs_escape = String.exists (fun c -> 2139 - let code = Char.code c in 2140 - c = '"' || c = '\\' || c = '\n' || c = '\r' || c = '\t' || 2141 - code < 0x20 || code = 0x7F 2142 - ) s in 2143 - if needs_escape then begin 2144 - Bytes.Writer.write_string w "\""; 2145 - String.iter (fun c -> 2146 - match c with 2147 - | '"' -> Bytes.Writer.write_string w "\\\"" 2148 - | '\\' -> Bytes.Writer.write_string w "\\\\" 2149 - | '\n' -> Bytes.Writer.write_string w "\\n" 2150 - | '\r' -> Bytes.Writer.write_string w "\\r" 2151 - | '\t' -> Bytes.Writer.write_string w "\\t" 2152 - | '\b' -> Bytes.Writer.write_string w "\\b" 2153 - | c when Char.code c = 0x0C -> Bytes.Writer.write_string w "\\f" 2154 - | c when Char.code c < 0x20 || Char.code c = 0x7F -> 2155 - Bytes.Writer.write_string w (Printf.sprintf "\\u%04X" (Char.code c)) 2156 - | c -> 2157 - let b = Bytes.create 1 in 2158 - Bytes.set b 0 c; 2159 - Bytes.Writer.write_bytes w b 2160 - ) s; 2161 - Bytes.Writer.write_string w "\"" 2162 - end else begin 2163 - Bytes.Writer.write_string w "\""; 2164 - Bytes.Writer.write_string w s; 2165 - Bytes.Writer.write_string w "\"" 2166 - end 498 + type ('array, 'elt, 'builder) map = { 499 + kind : string; 500 + doc : string; 501 + elt : 'elt codec; 502 + dec_empty : unit -> 'builder; 503 + dec_add : 'elt -> 'builder -> 'builder; 504 + dec_finish : 'builder -> 'array; 505 + enc : ('array, 'elt) enc; 506 + } 2167 507 2168 - and write_toml_key w k = 2169 - (* Check if it can be a bare key *) 2170 - let is_bare = String.length k > 0 && String.for_all is_bare_key_char k in 2171 - if is_bare then Bytes.Writer.write_string w k 2172 - else write_toml_string w k 508 + let map ?kind ?doc 509 + ?(dec_empty = fun () -> failwith "decode not supported") 510 + ?(dec_add = fun _ _ -> failwith "decode not supported") 511 + ?(dec_finish = fun _ -> failwith "decode not supported") 512 + ?(enc = { fold = fun _ _ _ -> failwith "encode not supported" }) 513 + (elt : 'elt codec) : ('array, 'elt, 'builder) map = 514 + let kind = Option.value ~default:("array of " ^ elt.kind) kind in 515 + let doc = Option.value ~default:"" doc in 516 + { kind; doc; elt; dec_empty; dec_add; dec_finish; enc } 2173 517 2174 - and write_toml_value w ?(inline=false) value = 2175 - match value with 2176 - | String s -> write_toml_string w s 2177 - | Int i -> Bytes.Writer.write_string w (Int64.to_string i) 2178 - | Float f -> 2179 - if Float.is_nan f then Bytes.Writer.write_string w "nan" 2180 - else if f = Float.infinity then Bytes.Writer.write_string w "inf" 2181 - else if f = Float.neg_infinity then Bytes.Writer.write_string w "-inf" 2182 - else begin 2183 - let s = Printf.sprintf "%.17g" f in 2184 - (* Ensure it looks like a float *) 2185 - let s = if String.contains s '.' || String.contains s 'e' || String.contains s 'E' 2186 - then s else s ^ ".0" in 2187 - Bytes.Writer.write_string w s 2188 - end 2189 - | Bool b -> Bytes.Writer.write_string w (if b then "true" else "false") 2190 - | Datetime s -> Bytes.Writer.write_string w s 2191 - | Datetime_local s -> Bytes.Writer.write_string w s 2192 - | Date_local s -> Bytes.Writer.write_string w s 2193 - | Time_local s -> Bytes.Writer.write_string w s 2194 - | Array items -> 2195 - Bytes.Writer.write_string w "["; 2196 - List.iteri (fun i item -> 2197 - if i > 0 then Bytes.Writer.write_string w ", "; 2198 - write_toml_value w ~inline:true item 2199 - ) items; 2200 - Bytes.Writer.write_string w "]" 2201 - | Table pairs when inline -> 2202 - Bytes.Writer.write_string w "{"; 2203 - List.iteri (fun i (k, v) -> 2204 - if i > 0 then Bytes.Writer.write_string w ", "; 2205 - write_toml_key w k; 2206 - Bytes.Writer.write_string w " = "; 2207 - write_toml_value w ~inline:true v 2208 - ) pairs; 2209 - Bytes.Writer.write_string w "}" 2210 - | Table _ -> failwith "Cannot encode table inline without inline flag" 518 + let list ?kind ?doc (elt : 'a codec) : ('a list, 'a, 'a list) map = 519 + let kind = Option.value ~default:("list of " ^ elt.kind) kind in 520 + let doc = Option.value ~default:"" doc in 521 + { 522 + kind; doc; elt; 523 + dec_empty = (fun () -> []); 524 + dec_add = (fun x xs -> x :: xs); 525 + dec_finish = List.rev; 526 + enc = { fold = (fun f acc xs -> List.fold_left f acc xs) }; 527 + } 2211 528 2212 - (* True streaming TOML encoder - writes directly to Bytes.Writer *) 2213 - let encode_to_writer w value = 2214 - let has_content = ref false in 529 + let array ?kind ?doc (elt : 'a codec) : ('a array, 'a, 'a list) map = 530 + let kind = Option.value ~default:("array of " ^ elt.kind) kind in 531 + let doc = Option.value ~default:"" doc in 532 + { 533 + kind; doc; elt; 534 + dec_empty = (fun () -> []); 535 + dec_add = (fun x xs -> x :: xs); 536 + dec_finish = (fun xs -> Stdlib.Array.of_list (List.rev xs)); 537 + enc = { fold = (fun f acc arr -> Stdlib.Array.fold_left f acc arr) }; 538 + } 2215 539 2216 - let write_path path = 2217 - List.iteri (fun i k -> 2218 - if i > 0 then Bytes.Writer.write_string w "."; 2219 - write_toml_key w k 2220 - ) path 2221 - in 540 + let finish m = 541 + { 542 + kind = m.kind; 543 + doc = m.doc; 544 + dec = (function 545 + | Toml.Array items -> 546 + let rec decode_items builder = function 547 + | [] -> Ok (m.dec_finish builder) 548 + | item :: rest -> 549 + match m.elt.dec item with 550 + | Ok v -> decode_items (m.dec_add v builder) rest 551 + | Error e -> Error e 552 + in 553 + decode_items (m.dec_empty ()) items 554 + | v -> Error (Type_mismatch { expected = "array"; got = type_name v })); 555 + enc = (fun arr -> 556 + let items = m.enc.fold (fun acc elt -> m.elt.enc elt :: acc) [] arr in 557 + Toml.Array (List.rev items)); 558 + } 559 + end 2222 560 2223 - let rec encode_at_path path value = 2224 - match value with 2225 - | Table pairs -> 2226 - (* Separate simple values from nested tables *) 2227 - (* Only PURE table arrays (all items are tables) use [[array]] syntax. 2228 - Mixed arrays (primitives + tables) must be encoded inline. *) 2229 - let is_pure_table_array items = 2230 - items <> [] && List.for_all (function Table _ -> true | _ -> false) items 2231 - in 2232 - let simple, nested = List.partition (fun (_, v) -> 2233 - match v with 2234 - | Table _ -> false 2235 - | Array items -> not (is_pure_table_array items) 2236 - | _ -> true 2237 - ) pairs in 561 + let list ?kind ?doc c = Array.(finish (list ?kind ?doc c)) 562 + let array ?kind ?doc c = Array.(finish (array ?kind ?doc c)) 2238 563 2239 - (* Emit simple values first *) 2240 - List.iter (fun (k, v) -> 2241 - write_toml_key w k; 2242 - Bytes.Writer.write_string w " = "; 2243 - write_toml_value w ~inline:true v; 2244 - Bytes.Writer.write_string w "\n"; 2245 - has_content := true 2246 - ) simple; 564 + (* ---- Table codecs ---- *) 2247 565 2248 - (* Then nested tables *) 2249 - List.iter (fun (k, v) -> 2250 - let new_path = path @ [k] in 2251 - match v with 2252 - | Table _ -> 2253 - if !has_content then Bytes.Writer.write_string w "\n"; 2254 - Bytes.Writer.write_string w "["; 2255 - write_path new_path; 2256 - Bytes.Writer.write_string w "]\n"; 2257 - has_content := true; 2258 - encode_at_path new_path v 2259 - | Array items when items <> [] && List.for_all (function Table _ -> true | _ -> false) items -> 2260 - (* Pure table array - use [[array]] syntax *) 2261 - List.iter (fun item -> 2262 - match item with 2263 - | Table _ -> 2264 - if !has_content then Bytes.Writer.write_string w "\n"; 2265 - Bytes.Writer.write_string w "[["; 2266 - write_path new_path; 2267 - Bytes.Writer.write_string w "]]\n"; 2268 - has_content := true; 2269 - encode_at_path new_path item 2270 - | _ -> assert false (* Impossible - we checked for_all above *) 2271 - ) items 2272 - | _ -> 2273 - write_toml_key w k; 2274 - Bytes.Writer.write_string w " = "; 2275 - write_toml_value w ~inline:true v; 2276 - Bytes.Writer.write_string w "\n"; 2277 - has_content := true 2278 - ) nested 2279 - | _ -> 2280 - failwith "Top-level TOML must be a table" 2281 - in 566 + module Table = struct 567 + type 'a codec = 'a t 2282 568 2283 - encode_at_path [] value 569 + (* Unknown member handling *) 570 + type unknown_handling = 571 + | Skip 572 + | Error_on_unknown 573 + | Keep of (string -> Toml.t -> unit) (* Callback to collect *) 2284 574 2285 - (* ============================================ 2286 - Public Interface - Constructors 2287 - ============================================ *) 575 + (* Member specification - existential type for storing typed member info *) 576 + type 'o mem_encoder = { 577 + mem_enc : 'o -> Toml.t; 578 + mem_should_omit : 'o -> bool; 579 + } 2288 580 2289 - let string s = String s 2290 - let int i = Int i 2291 - let int_of_int i = Int (Int64.of_int i) 2292 - let float f = Float f 2293 - let bool b = Bool b 2294 - let array vs = Array vs 2295 - let table pairs = Table pairs 2296 - let datetime s = Datetime s 2297 - let datetime_local s = Datetime_local s 2298 - let date_local s = Date_local s 2299 - let time_local s = Time_local s 581 + type ('o, 'a) mem_spec = { 582 + name : string; 583 + mem_doc : string; 584 + mem_codec : 'a codec; 585 + dec_absent : 'a option; 586 + enc_typed : 'o mem_encoder option; 587 + } 2300 588 2301 - (* ============================================ 2302 - Public Interface - Accessors 2303 - ============================================ *) 589 + (* Helper to create enc_typed from encoder and optional omit function *) 590 + let make_enc_typed (codec : 'a codec) enc enc_omit = 591 + match enc with 592 + | None -> None 593 + | Some f -> 594 + let omit = Option.value ~default:(fun _ -> false) enc_omit in 595 + Some { 596 + mem_enc = (fun o -> codec.enc (f o)); 597 + mem_should_omit = (fun o -> omit (f o)); 598 + } 2304 599 2305 - let to_string = function 2306 - | String s -> s 2307 - | _ -> invalid_arg "Tomlt.to_string: not a string" 600 + module Mem = struct 601 + type 'a codec = 'a t 2308 602 2309 - let to_string_opt = function 2310 - | String s -> Some s 2311 - | _ -> None 603 + type ('o, 'a) t = ('o, 'a) mem_spec 2312 604 2313 - let to_int = function 2314 - | Int i -> i 2315 - | _ -> invalid_arg "Tomlt.to_int: not an integer" 605 + let v ?doc ?(dec_absent : 'a option) ?enc ?enc_omit name (codec : 'a codec) = 606 + { name; 607 + mem_doc = Option.value ~default:"" doc; 608 + mem_codec = codec; 609 + dec_absent; 610 + enc_typed = make_enc_typed codec enc enc_omit } 2316 611 2317 - let to_int_opt = function 2318 - | Int i -> Some i 2319 - | _ -> None 612 + let opt ?doc ?enc name (codec : 'a codec) = 613 + let opt_codec = option codec in 614 + { name; 615 + mem_doc = Option.value ~default:"" doc; 616 + mem_codec = opt_codec; 617 + dec_absent = Some None; 618 + enc_typed = make_enc_typed opt_codec enc (Some Option.is_none) } 619 + end 2320 620 2321 - let to_float = function 2322 - | Float f -> f 2323 - | _ -> invalid_arg "Tomlt.to_float: not a float" 621 + (* Map state for building table codecs *) 622 + type ('o, 'dec) map = { 623 + map_kind : string; 624 + map_doc : string; 625 + members : ('o, Toml.t) mem_spec list; (* Stored in reverse order *) 626 + dec : Toml.t list -> ('dec, codec_error) result; 627 + unknown : unknown_handling; 628 + keep_unknown_enc : ('o -> (string * Toml.t) list) option; 629 + } 2324 630 2325 - let to_float_opt = function 2326 - | Float f -> Some f 2327 - | _ -> None 631 + let obj ?kind ?doc dec = 632 + let kind = Option.value ~default:"table" kind in 633 + let doc = Option.value ~default:"" doc in 634 + { 635 + map_kind = kind; 636 + map_doc = doc; 637 + members = []; 638 + dec = (fun _ -> Ok dec); 639 + unknown = Skip; 640 + keep_unknown_enc = None; 641 + } 2328 642 2329 - let to_bool = function 2330 - | Bool b -> b 2331 - | _ -> invalid_arg "Tomlt.to_bool: not a boolean" 643 + let obj' ?kind ?doc dec_fn = 644 + let kind = Option.value ~default:"table" kind in 645 + let doc = Option.value ~default:"" doc in 646 + { 647 + map_kind = kind; 648 + map_doc = doc; 649 + members = []; 650 + dec = (fun _ -> Ok (dec_fn ())); 651 + unknown = Skip; 652 + keep_unknown_enc = None; 653 + } 2332 654 2333 - let to_bool_opt = function 2334 - | Bool b -> Some b 2335 - | _ -> None 655 + (* Marker to indicate a missing member with a default *) 656 + let missing_marker_str = "__TOMLT_MISSING_WITH_DEFAULT__" 657 + let missing_marker = Toml.String missing_marker_str 2336 658 2337 - let to_array = function 2338 - | Array vs -> vs 2339 - | _ -> invalid_arg "Tomlt.to_array: not an array" 659 + let is_missing_marker = function 660 + | Toml.String s -> String.equal s missing_marker_str 661 + | _ -> false 2340 662 2341 - let to_array_opt = function 2342 - | Array vs -> Some vs 2343 - | _ -> None 663 + let mem ?doc ?dec_absent ?enc ?enc_omit name (c : 'a codec) m = 664 + (* Create a member spec that stores raw TOML for later processing *) 665 + let raw_spec = { 666 + name; 667 + mem_doc = Option.value ~default:"" doc; 668 + mem_codec = { kind = c.kind; doc = c.doc; 669 + dec = (fun v -> Ok v); enc = (fun v -> v) }; 670 + (* We use the marker value when member is missing but has a default *) 671 + dec_absent = Option.map (fun _ -> missing_marker) dec_absent; 672 + enc_typed = make_enc_typed c enc enc_omit; 673 + } in 674 + { 675 + m with 676 + members = raw_spec :: m.members; 677 + dec = (function 678 + | [] -> Error (Value_error "internal: not enough values") 679 + | v :: rest -> 680 + Result.bind (m.dec rest) @@ fun f -> 681 + (* Check if this is the missing marker - use default directly *) 682 + if is_missing_marker v then 683 + match dec_absent with 684 + | Some default -> Ok (f default) 685 + | None -> Error (Value_error "internal: missing marker without default") 686 + else 687 + Result.map f (c.dec v)); 688 + } 2344 689 2345 - let to_table = function 2346 - | Table pairs -> pairs 2347 - | _ -> invalid_arg "Tomlt.to_table: not a table" 690 + let opt_mem ?doc ?enc name (c : 'a codec) m = 691 + (* dec_absent parameter is ('a option) option. 692 + Some None means "the default decoded value is None : 'a option" 693 + None would mean "no default, member is required" *) 694 + let default : 'a option = None in 695 + mem ?doc ?enc ~dec_absent:default ~enc_omit:Option.is_none name (option c) m 2348 696 2349 - let to_table_opt = function 2350 - | Table pairs -> Some pairs 2351 - | _ -> None 697 + (* Unknown member handling *) 698 + module Mems = struct 699 + type 'a codec = 'a t 2352 700 2353 - let to_datetime = function 2354 - | Datetime s | Datetime_local s | Date_local s | Time_local s -> s 2355 - | _ -> invalid_arg "Tomlt.to_datetime: not a datetime" 701 + type ('mems, 'a) enc = { 702 + fold : 'acc. ('acc -> string -> 'a -> 'acc) -> 'acc -> 'mems -> 'acc 703 + } 2356 704 2357 - let to_datetime_opt = function 2358 - | Datetime s | Datetime_local s | Date_local s | Time_local s -> Some s 2359 - | _ -> None 705 + type ('mems, 'a, 'builder) map = { 706 + mems_kind : string; 707 + mems_doc : string; 708 + elt : 'a codec; 709 + dec_empty : unit -> 'builder; 710 + dec_add : string -> 'a -> 'builder -> 'builder; 711 + dec_finish : 'builder -> 'mems; 712 + enc : ('mems, 'a) enc; 713 + } 2360 714 2361 - (* ============================================ 2362 - Public Interface - Type Predicates 2363 - ============================================ *) 715 + let map ?kind ?doc 716 + ?(dec_empty = fun () -> failwith "decode not supported") 717 + ?(dec_add = fun _ _ _ -> failwith "decode not supported") 718 + ?(dec_finish = fun _ -> failwith "decode not supported") 719 + ?(enc = { fold = fun _ _ _ -> failwith "encode not supported" }) 720 + elt = 721 + let kind = Option.value ~default:("members of " ^ elt.kind) kind in 722 + let doc = Option.value ~default:"" doc in 723 + { mems_kind = kind; mems_doc = doc; elt; dec_empty; dec_add; dec_finish; enc } 2364 724 2365 - let is_string = function String _ -> true | _ -> false 2366 - let is_int = function Int _ -> true | _ -> false 2367 - let is_float = function Float _ -> true | _ -> false 2368 - let is_bool = function Bool _ -> true | _ -> false 2369 - let is_array = function Array _ -> true | _ -> false 2370 - let is_table = function Table _ -> true | _ -> false 2371 - let is_datetime = function 2372 - | Datetime _ | Datetime_local _ | Date_local _ | Time_local _ -> true 2373 - | _ -> false 725 + module StringMap = Map.Make(String) 2374 726 2375 - (* ============================================ 2376 - Public Interface - Table Navigation 2377 - ============================================ *) 727 + let string_map ?kind ?doc elt = 728 + let kind = Option.value ~default:("string map of " ^ elt.kind) kind in 729 + let doc = Option.value ~default:"" doc in 730 + { 731 + mems_kind = kind; mems_doc = doc; elt; 732 + dec_empty = (fun () -> []); 733 + dec_add = (fun k v acc -> (k, v) :: acc); 734 + dec_finish = (fun pairs -> 735 + List.fold_left (fun m (k, v) -> StringMap.add k v m) StringMap.empty pairs); 736 + enc = { fold = (fun f acc m -> StringMap.fold (fun k v acc -> f acc k v) m acc) }; 737 + } 2378 738 2379 - let find key = function 2380 - | Table pairs -> List.assoc key pairs 2381 - | _ -> invalid_arg "Tomlt.find: not a table" 739 + let assoc ?kind ?doc elt = 740 + let kind = Option.value ~default:("assoc of " ^ elt.kind) kind in 741 + let doc = Option.value ~default:"" doc in 742 + { 743 + mems_kind = kind; mems_doc = doc; elt; 744 + dec_empty = (fun () -> []); 745 + dec_add = (fun k v acc -> (k, v) :: acc); 746 + dec_finish = List.rev; 747 + enc = { fold = (fun f acc pairs -> List.fold_left (fun acc (k, v) -> f acc k v) acc pairs) }; 748 + } 749 + end 2382 750 2383 - let find_opt key = function 2384 - | Table pairs -> List.assoc_opt key pairs 2385 - | _ -> None 751 + let skip_unknown m = { m with unknown = Skip } 752 + let error_unknown m = { m with unknown = Error_on_unknown } 2386 753 2387 - let mem key = function 2388 - | Table pairs -> List.mem_assoc key pairs 2389 - | _ -> false 754 + let keep_unknown ?enc mems m = 755 + (* Add a pseudo-member that collects unknown members *) 756 + let unknown_vals = ref [] in 757 + let collector name v = 758 + match mems.Mems.elt.dec v with 759 + | Ok decoded -> unknown_vals := (name, decoded) :: !unknown_vals 760 + | Error _ -> () (* Skip values that don't decode *) 761 + in 762 + (* Create a raw spec for unknown members *) 763 + let raw_spec = { 764 + name = ""; (* Special marker for unknown members *) 765 + mem_doc = ""; 766 + mem_codec = { kind = "unknown"; doc = ""; 767 + dec = (fun _ -> Ok (Toml.Table [])); 768 + enc = (fun _ -> Toml.Table []) }; 769 + dec_absent = Some (Toml.Table []); 770 + enc_typed = None; 771 + } in 772 + { 773 + m with 774 + members = raw_spec :: m.members; 775 + unknown = Keep collector; 776 + keep_unknown_enc = Option.map (fun f o -> 777 + let mems_val = f o in 778 + mems.Mems.enc.fold (fun acc k v -> (k, mems.Mems.elt.enc v) :: acc) [] mems_val 779 + |> List.rev 780 + ) enc; 781 + dec = (function 782 + | [] -> Error (Value_error "internal: not enough values") 783 + | _ :: rest -> 784 + Result.map (fun f -> 785 + let collected = mems.Mems.dec_finish ( 786 + List.fold_left (fun acc (k, v) -> mems.Mems.dec_add k v acc) 787 + (mems.Mems.dec_empty ()) 788 + (List.rev !unknown_vals) 789 + ) in 790 + unknown_vals := []; 791 + f collected 792 + ) (m.dec rest)); 793 + } 2390 794 2391 - let keys = function 2392 - | Table pairs -> List.map fst pairs 2393 - | _ -> invalid_arg "Tomlt.keys: not a table" 795 + (* Check for duplicates in a list *) 796 + let find_dup xs = 797 + let rec loop seen = function 798 + | [] -> None 799 + | x :: rest -> if List.mem x seen then Some x else loop (x :: seen) rest 800 + in 801 + loop [] xs 2394 802 2395 - let rec get path t = 2396 - match path with 2397 - | [] -> t 2398 - | key :: rest -> 2399 - match t with 2400 - | Table pairs -> 2401 - (match List.assoc_opt key pairs with 2402 - | Some v -> get rest v 2403 - | None -> raise Not_found) 2404 - | _ -> invalid_arg "Tomlt.get: intermediate value is not a table" 803 + let finish_common ~inline m = 804 + let _ = inline in (* For future inline table support *) 805 + (* members_ordered is for display (reversed to get declaration order) *) 806 + let members_ordered = List.rev m.members in 807 + let known_names = 808 + List.filter_map (fun spec -> if spec.name = "" then None else Some spec.name) members_ordered 809 + in 810 + (* Check for duplicate member names *) 811 + Option.iter (fun name -> invalid_arg ("duplicate member name: " ^ name)) (find_dup known_names); 812 + { 813 + kind = m.map_kind; 814 + doc = m.map_doc; 815 + dec = (function 816 + | Toml.Table pairs -> 817 + (* Build list of values in the order expected by the dec chain. 818 + m.members is in reverse declaration order, which matches 819 + how the dec chain was built (outer = last added). *) 820 + let vals = List.map (fun spec -> 821 + if spec.name = "" then 822 + (* Unknown members placeholder *) 823 + Toml.Table [] 824 + else 825 + match List.assoc_opt spec.name pairs with 826 + | Some v -> v 827 + | None -> 828 + match spec.dec_absent with 829 + | Some default -> default 830 + | None -> 831 + (* Will cause error during decoding *) 832 + Toml.Table [] 833 + ) m.members in 834 + (* Check for unknown members *) 835 + (match m.unknown with 836 + | Skip -> () 837 + | Error_on_unknown -> 838 + List.iter (fun (name, _) -> 839 + if not (List.mem name known_names) then 840 + raise (Toml.Error.Error (Toml.Error.make 841 + (Toml.Error.Semantic (Toml.Error.Duplicate_key name)))) 842 + ) pairs 843 + | Keep collector -> 844 + List.iter (fun (name, v) -> 845 + if not (List.mem name known_names) then 846 + collector name v 847 + ) pairs); 848 + (* Check for missing required members *) 849 + let missing = List.filter_map (fun spec -> 850 + if spec.name = "" then None 851 + else if spec.dec_absent = None && 852 + not (List.exists (fun (n, _) -> n = spec.name) pairs) then 853 + Some spec.name 854 + else None 855 + ) members_ordered in 856 + (match missing with 857 + | name :: _ -> Error (Missing_member name) 858 + | [] -> m.dec vals) 859 + | v -> Error (Type_mismatch { expected = "table"; got = type_name v })); 860 + enc = (fun o -> 861 + let pairs = List.filter_map (fun spec -> 862 + if spec.name = "" then None (* Skip unknown member placeholder *) 863 + else 864 + match spec.enc_typed with 865 + | None -> None 866 + | Some enc_info -> 867 + (* Check should_omit on original object, not encoded value *) 868 + if enc_info.mem_should_omit o then None 869 + else Some (spec.name, enc_info.mem_enc o) 870 + ) members_ordered in 871 + (* Add unknown members if keep_unknown was used *) 872 + let pairs = match m.keep_unknown_enc with 873 + | None -> pairs 874 + | Some get_unknown -> pairs @ get_unknown o 875 + in 876 + Toml.Table pairs); 877 + } 2405 878 2406 - let get_opt path t = 2407 - try Some (get path t) with Not_found | Invalid_argument _ -> None 879 + let finish m = finish_common ~inline:false m 880 + let inline m = finish_common ~inline:true m 881 + end 2408 882 2409 - let ( .%{} ) t path = get path t 883 + (* ---- Array of tables ---- *) 2410 884 2411 - let rec set_at_path path v t = 2412 - match path with 2413 - | [] -> v 2414 - | [key] -> 2415 - (match t with 2416 - | Table pairs -> 2417 - let pairs' = List.filter (fun (k, _) -> k <> key) pairs in 2418 - Table ((key, v) :: pairs') 2419 - | _ -> invalid_arg "Tomlt.(.%{}<-): not a table") 2420 - | key :: rest -> 2421 - match t with 2422 - | Table pairs -> 2423 - let existing = List.assoc_opt key pairs in 2424 - let subtable = match existing with 2425 - | Some (Table _ as sub) -> sub 2426 - | Some _ -> invalid_arg "Tomlt.(.%{}<-): intermediate value is not a table" 2427 - | None -> Table [] 885 + let array_of_tables ?kind ?doc c = 886 + let kind = Option.value ~default:("array of " ^ c.kind) kind in 887 + let doc = Option.value ~default:"" doc in 888 + { 889 + kind; doc; 890 + dec = (function 891 + | Toml.Array items -> 892 + let rec decode_items acc = function 893 + | [] -> Ok (List.rev acc) 894 + | item :: rest -> 895 + match c.dec item with 896 + | Ok v -> decode_items (v :: acc) rest 897 + | Error e -> Error e 2428 898 in 2429 - let updated = set_at_path rest v subtable in 2430 - let pairs' = List.filter (fun (k, _) -> k <> key) pairs in 2431 - Table ((key, updated) :: pairs') 2432 - | _ -> invalid_arg "Tomlt.(.%{}<-): not a table" 899 + decode_items [] items 900 + | v -> Error (Type_mismatch { expected = "array"; got = type_name v })); 901 + enc = (fun xs -> Toml.Array (List.map c.enc xs)); 902 + } 2433 903 2434 - let ( .%{}<- ) t path v = set_at_path path v t 904 + (* ---- Any / Generic value codecs ---- *) 2435 905 2436 - (* ============================================ 2437 - Public Interface - Encoding 2438 - ============================================ *) 906 + let value = { 907 + kind = "value"; 908 + doc = ""; 909 + dec = (fun v -> Ok v); 910 + enc = (fun v -> v); 911 + } 2439 912 2440 - let to_buffer buf value = 2441 - let w = Bytes.Writer.of_buffer buf in 2442 - encode_to_writer w value 2443 - 2444 - let to_toml_string value = 2445 - let buf = Buffer.create 256 in 2446 - to_buffer buf value; 2447 - Buffer.contents buf 913 + let value_mems = { 914 + kind = "value members"; 915 + doc = ""; 916 + dec = (function 917 + | Toml.Table pairs -> Ok pairs 918 + | v -> Error (Type_mismatch { expected = "table"; got = type_name v })); 919 + enc = (fun pairs -> Toml.Table pairs); 920 + } 2448 921 2449 - let to_writer = encode_to_writer 922 + let any ?kind ?doc ?dec_string ?dec_int ?dec_float ?dec_bool 923 + ?dec_datetime ?dec_array ?dec_table ?enc () = 924 + let kind = Option.value ~default:"any" kind in 925 + let doc = Option.value ~default:"" doc in 926 + let type_error expected got = 927 + Error (Type_mismatch { expected; got = type_name got }) 928 + in 929 + { 930 + kind; doc; 931 + dec = (fun v -> 932 + match v with 933 + | Toml.String _ -> 934 + (match dec_string with Some c -> c.dec v | None -> type_error "string" v) 935 + | Toml.Int _ -> 936 + (match dec_int with Some c -> c.dec v | None -> type_error "integer" v) 937 + | Toml.Float _ -> 938 + (match dec_float with Some c -> c.dec v | None -> type_error "float" v) 939 + | Toml.Bool _ -> 940 + (match dec_bool with Some c -> c.dec v | None -> type_error "boolean" v) 941 + | Toml.Datetime _ | Toml.Datetime_local _ 942 + | Toml.Date_local _ | Toml.Time_local _ -> 943 + (match dec_datetime with Some c -> c.dec v | None -> type_error "datetime" v) 944 + | Toml.Array _ -> 945 + (match dec_array with Some c -> c.dec v | None -> type_error "array" v) 946 + | Toml.Table _ -> 947 + (match dec_table with Some c -> c.dec v | None -> type_error "table" v)); 948 + enc = (fun v -> 949 + match enc with 950 + | Some selector -> (selector v).enc v 951 + | None -> failwith "any: enc not provided"); 952 + } 2450 953 2451 - (* ============================================ 2452 - Public Interface - Decoding 2453 - ============================================ *) 2454 - 2455 - let of_string input = 2456 - try 2457 - Ok (parse_toml input) 2458 - with 2459 - | Failure msg -> Error (Tomlt_error.make (Tomlt_error.Syntax (Tomlt_error.Expected msg))) 2460 - | Tomlt_error.Error e -> Error e 2461 - | e -> Error (Tomlt_error.make (Tomlt_error.Syntax (Tomlt_error.Expected (Printexc.to_string e)))) 2462 - 2463 - let of_reader ?file r = 2464 - try 2465 - Ok (parse_toml_from_reader ?file r) 2466 - with 2467 - | Failure msg -> Error (Tomlt_error.make (Tomlt_error.Syntax (Tomlt_error.Expected msg))) 2468 - | Tomlt_error.Error e -> Error e 2469 - | e -> Error (Tomlt_error.make (Tomlt_error.Syntax (Tomlt_error.Expected (Printexc.to_string e)))) 2470 - 2471 - let parse = parse_toml 2472 - 2473 - let parse_reader ?file r = parse_toml_from_reader ?file r 2474 - 2475 - (* ============================================ 2476 - Public Interface - Pretty Printing 2477 - ============================================ *) 2478 - 2479 - let rec pp_value fmt = function 2480 - | String s -> 2481 - Format.fprintf fmt "\"%s\"" (String.escaped s) 2482 - | Int i -> 2483 - Format.fprintf fmt "%Ld" i 2484 - | Float f -> 2485 - if Float.is_nan f then Format.fprintf fmt "nan" 2486 - else if f = Float.infinity then Format.fprintf fmt "inf" 2487 - else if f = Float.neg_infinity then Format.fprintf fmt "-inf" 2488 - else Format.fprintf fmt "%g" f 2489 - | Bool b -> 2490 - Format.fprintf fmt "%s" (if b then "true" else "false") 2491 - | Datetime s | Datetime_local s | Date_local s | Time_local s -> 2492 - Format.fprintf fmt "%s" s 2493 - | Array items -> 2494 - Format.fprintf fmt "["; 2495 - List.iteri (fun i item -> 2496 - if i > 0 then Format.fprintf fmt ", "; 2497 - pp_value fmt item 2498 - ) items; 2499 - Format.fprintf fmt "]" 2500 - | Table pairs -> 2501 - Format.fprintf fmt "{"; 2502 - List.iteri (fun i (k, v) -> 2503 - if i > 0 then Format.fprintf fmt ", "; 2504 - Format.fprintf fmt "%s = " k; 2505 - pp_value fmt v 2506 - ) pairs; 2507 - Format.fprintf fmt "}" 954 + (* ---- Encoding and decoding ---- *) 2508 955 2509 - let pp fmt t = 2510 - Format.fprintf fmt "%s" (to_toml_string t) 956 + let to_tomlt_error e = 957 + Toml.Error.make (Toml.Error.Semantic (Toml.Error.Duplicate_key (codec_error_to_string e))) 2511 958 2512 - (* ============================================ 2513 - Public Interface - Equality and Comparison 2514 - ============================================ *) 959 + let decode c v = Result.map_error to_tomlt_error (c.dec v) 2515 960 2516 - let rec equal a b = 2517 - match a, b with 2518 - | String s1, String s2 -> String.equal s1 s2 2519 - | Int i1, Int i2 -> Int64.equal i1 i2 2520 - | Float f1, Float f2 -> 2521 - (* NaN = NaN for TOML equality *) 2522 - (Float.is_nan f1 && Float.is_nan f2) || Float.equal f1 f2 2523 - | Bool b1, Bool b2 -> Bool.equal b1 b2 2524 - | Datetime s1, Datetime s2 -> String.equal s1 s2 2525 - | Datetime_local s1, Datetime_local s2 -> String.equal s1 s2 2526 - | Date_local s1, Date_local s2 -> String.equal s1 s2 2527 - | Time_local s1, Time_local s2 -> String.equal s1 s2 2528 - | Array vs1, Array vs2 -> 2529 - List.length vs1 = List.length vs2 && 2530 - List.for_all2 equal vs1 vs2 2531 - | Table ps1, Table ps2 -> 2532 - List.length ps1 = List.length ps2 && 2533 - List.for_all2 (fun (k1, v1) (k2, v2) -> 2534 - String.equal k1 k2 && equal v1 v2 2535 - ) ps1 ps2 2536 - | _ -> false 961 + let decode_exn c v = 962 + match c.dec v with 963 + | Ok x -> x 964 + | Error e -> raise (Toml.Error.Error (to_tomlt_error e)) 2537 965 2538 - let type_order = function 2539 - | String _ -> 0 2540 - | Int _ -> 1 2541 - | Float _ -> 2 2542 - | Bool _ -> 3 2543 - | Datetime _ -> 4 2544 - | Datetime_local _ -> 5 2545 - | Date_local _ -> 6 2546 - | Time_local _ -> 7 2547 - | Array _ -> 8 2548 - | Table _ -> 9 966 + let encode c v = c.enc v 2549 967 2550 - let rec compare a b = 2551 - let ta, tb = type_order a, type_order b in 2552 - if ta <> tb then Int.compare ta tb 2553 - else match a, b with 2554 - | String s1, String s2 -> String.compare s1 s2 2555 - | Int i1, Int i2 -> Int64.compare i1 i2 2556 - | Float f1, Float f2 -> Float.compare f1 f2 2557 - | Bool b1, Bool b2 -> Bool.compare b1 b2 2558 - | Datetime s1, Datetime s2 -> String.compare s1 s2 2559 - | Datetime_local s1, Datetime_local s2 -> String.compare s1 s2 2560 - | Date_local s1, Date_local s2 -> String.compare s1 s2 2561 - | Time_local s1, Time_local s2 -> String.compare s1 s2 2562 - | Array vs1, Array vs2 -> 2563 - List.compare compare vs1 vs2 2564 - | Table ps1, Table ps2 -> 2565 - List.compare (fun (k1, v1) (k2, v2) -> 2566 - let c = String.compare k1 k2 in 2567 - if c <> 0 then c else compare v1 v2 2568 - ) ps1 ps2 2569 - | _ -> 0 (* Impossible - handled by type_order check *) 968 + let decode_string c s = Result.bind (Toml.of_string s) (decode c) 2570 969 2571 - (* ============================================ 2572 - Error Module 2573 - ============================================ *) 970 + let decode_string_exn c s = 971 + let toml = Toml.parse s in 972 + decode_exn c toml 2574 973 2575 - module Error = Tomlt_error 974 + let encode_string c v = 975 + let toml = encode c v in 976 + Toml.to_toml_string toml 2576 977 2577 - (* ============================================ 2578 - Internal Module (for testing) 2579 - ============================================ *) 978 + let decode_reader ?file c r = Result.bind (Toml.of_reader ?file r) (decode c) 2580 979 2581 - module Internal = struct 2582 - let to_tagged_json = toml_to_tagged_json 2583 - let of_tagged_json = decode_tagged_json_string 980 + let encode_writer c v w = 981 + let toml = encode c v in 982 + Toml.to_writer w toml 2584 983 2585 - let encode_from_tagged_json json_str = 2586 - try 2587 - let toml = decode_tagged_json_string json_str in 2588 - Ok (to_toml_string toml) 2589 - with 2590 - | Failure msg -> Error msg 2591 - | e -> Error (Printexc.to_string e) 2592 - end 984 + (* Re-export the Toml module for accessing raw TOML values *) 985 + module Toml = Toml 986 + module Error = Toml.Error
+428 -231
lib/tomlt.mli
··· 1 1 (*--------------------------------------------------------------------------- 2 - Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 - SPDX-License-Identifier: ISC 4 - ---------------------------------------------------------------------------*) 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 5 6 - (** TOML 1.1 codec. 6 + (** Declarative TOML 1.1 codecs. 7 7 8 - Tomlt provides TOML 1.1 parsing and encoding with efficient streaming 9 - support via {{:https://erratique.ch/software/bytesrw}Bytesrw}. 8 + Tomlt provides a type-safe, bidirectional codec system for TOML files, 9 + inspired by {{:https://erratique.ch/software/jsont}Jsont}'s approach 10 + to JSON codecs. 10 11 11 12 {2 Quick Start} 12 13 13 - Parse a TOML string: 14 - {[ 15 - let config = Tomlt.of_string {| 16 - [server] 14 + Define a codec for your OCaml types: 15 + {v 16 + type config = { host : string; port : int; debug : bool } 17 + 18 + let config_codec = 19 + Tomlt.(Table.( 20 + obj (fun host port debug -> { host; port; debug }) 21 + |> mem "host" string ~enc:(fun c -> c.host) 22 + |> mem "port" int ~enc:(fun c -> c.port) 23 + |> mem "debug" bool ~enc:(fun c -> c.debug) ~dec_absent:false 24 + |> finish 25 + )) 26 + 27 + let () = 28 + match Tomlt.decode_string config_codec {| 17 29 host = "localhost" 18 30 port = 8080 19 - |} in 20 - match config with 21 - | Ok t -> 22 - let host = Tomlt.(t.%{"server"; "host"} |> to_string) in 23 - let port = Tomlt.(t.%{"server"; "port"} |> to_int) in 24 - Printf.printf "Server: %s:%Ld\n" host port 25 - | Error e -> prerr_endline (Tomlt.Error.to_string e) 26 - ]} 31 + |} with 32 + | Ok config -> Printf.printf "Host: %s\n" config.host 33 + | Error e -> prerr_endline (Tomlt.Toml.Error.to_string e) 34 + v} 35 + 36 + {2 Codec Pattern} 37 + 38 + Each codec ['a t] defines: 39 + - A decoder: [Toml.t -> ('a, error) result] 40 + - An encoder: ['a -> Toml.t] 27 41 28 - Create and encode TOML: 29 - {[ 30 - let config = Tomlt.(table [ 31 - "title", string "My App"; 32 - "database", table [ 33 - "host", string "localhost"; 34 - "ports", array [int 5432L; int 5433L] 35 - ] 36 - ]) in 37 - print_endline (Tomlt.to_string config) 38 - ]} 42 + Codecs compose through combinators to build complex types from 43 + simple primitives. 39 44 40 45 {2 Module Overview} 41 46 42 - - {!section:types} - TOML value representation 43 - - {!section:construct} - Value constructors 44 - - {!section:access} - Value accessors and type conversion 45 - - {!section:navigate} - Table navigation 46 - - {!section:decode} - Parsing from strings and readers 47 - - {!section:encode} - Encoding to strings and writers 48 - - {!module:Error} - Structured error types *) 47 + - {!section:datetime} - Structured datetime types 48 + - {!section:codec} - Core codec type and combinators 49 + - {!section:base} - Primitive type codecs 50 + - {!section:combinators} - Codec transformers 51 + - {!section:arrays} - Array codec builders 52 + - {!section:tables} - Table/object codec builders 53 + - {!section:codec_ops} - Encoding and decoding operations *) 49 54 50 - open Bytesrw 55 + (** {1:datetime Structured Datetime Types} 51 56 52 - (** {1:types TOML Value Types} *) 57 + TOML 1.1 supports four datetime formats. These modules provide 58 + structured representations for parsing and formatting. *) 53 59 54 - (** The type of TOML values. 60 + (** Timezone offsets for TOML offset datetimes. 55 61 56 - TOML supports the following value types: 57 - - Strings (UTF-8 encoded) 58 - - Integers (64-bit signed) 59 - - Floats (IEEE 754 double precision) 60 - - Booleans 61 - - Offset date-times (RFC 3339 with timezone) 62 - - Local date-times (no timezone) 63 - - Local dates 64 - - Local times 65 - - Arrays (heterogeneous in TOML 1.1) 66 - - Tables (string-keyed maps) *) 67 - type t = 68 - | String of string 69 - | Int of int64 70 - | Float of float 71 - | Bool of bool 72 - | Datetime of string (** Offset datetime, e.g. [1979-05-27T07:32:00Z] *) 73 - | Datetime_local of string (** Local datetime, e.g. [1979-05-27T07:32:00] *) 74 - | Date_local of string (** Local date, e.g. [1979-05-27] *) 75 - | Time_local of string (** Local time, e.g. [07:32:00] *) 76 - | Array of t list 77 - | Table of (string * t) list 78 - (** A TOML value. Tables preserve key insertion order. *) 62 + Per RFC 3339, timezones are expressed as [Z] (UTC) or as 63 + [+HH:MM] / [-HH:MM] offsets from UTC. *) 64 + module Tz : sig 65 + (** Timezone offset representation. *) 66 + type t = 67 + | UTC (** UTC timezone, written as [Z] *) 68 + | Offset of { hours : int; minutes : int } (** Fixed offset from UTC *) 79 69 80 - (** {1:construct Value Constructors} 70 + val utc : t 71 + (** [utc] is the UTC timezone. *) 81 72 82 - These functions create TOML values. Use them to build TOML documents 83 - programmatically. *) 73 + val offset : hours:int -> minutes:int -> t 74 + (** [offset ~hours ~minutes] creates a fixed UTC offset. 75 + Hours may be negative for western timezones. *) 84 76 85 - val string : string -> t 86 - (** [string s] creates a string value. *) 77 + val equal : t -> t -> bool 78 + (** [equal a b] is structural equality. *) 87 79 88 - val int : int64 -> t 89 - (** [int i] creates an integer value. *) 80 + val compare : t -> t -> int 81 + (** [compare a b] is a total ordering. *) 90 82 91 - val int_of_int : int -> t 92 - (** [int_of_int i] creates an integer value from an [int]. *) 83 + val to_string : t -> string 84 + (** [to_string tz] formats as ["Z"] or ["+HH:MM"]/["-HH:MM"]. *) 93 85 94 - val float : float -> t 95 - (** [float f] creates a float value. *) 86 + val pp : Format.formatter -> t -> unit 87 + (** [pp fmt tz] pretty-prints the timezone. *) 96 88 97 - val bool : bool -> t 98 - (** [bool b] creates a boolean value. *) 89 + val of_string : string -> (t, string) result 90 + (** [of_string s] parses ["Z"], ["+HH:MM"], or ["-HH:MM"]. *) 91 + end 99 92 100 - val array : t list -> t 101 - (** [array vs] creates an array value from a list of values. 102 - TOML 1.1 allows heterogeneous arrays. *) 93 + (** Local dates (no timezone information). 103 94 104 - val table : (string * t) list -> t 105 - (** [table pairs] creates a table value from key-value pairs. 106 - Keys should be unique; later bindings shadow earlier ones during lookup. *) 95 + Represents a calendar date like [1979-05-27]. *) 96 + module Date : sig 97 + type t = { year : int; month : int; day : int } 98 + (** A calendar date with year (4 digits), month (1-12), and day (1-31). *) 107 99 108 - val datetime : string -> t 109 - (** [datetime s] creates an offset datetime value. 110 - The string should be in RFC 3339 format with timezone, 111 - e.g. ["1979-05-27T07:32:00Z"] or ["1979-05-27T07:32:00-07:00"]. *) 100 + val make : year:int -> month:int -> day:int -> t 101 + (** [make ~year ~month ~day] creates a date value. *) 112 102 113 - val datetime_local : string -> t 114 - (** [datetime_local s] creates a local datetime value (no timezone). 115 - E.g. ["1979-05-27T07:32:00"]. *) 103 + val equal : t -> t -> bool 104 + val compare : t -> t -> int 105 + val to_string : t -> string 106 + (** [to_string d] formats as ["YYYY-MM-DD"]. *) 107 + 108 + val pp : Format.formatter -> t -> unit 109 + val of_string : string -> (t, string) result 110 + (** [of_string s] parses ["YYYY-MM-DD"] format. *) 111 + end 112 + 113 + (** Local times (no date or timezone). 114 + 115 + Represents a time of day like [07:32:00] or [07:32:00.999999]. *) 116 + module Time : sig 117 + type t = { 118 + hour : int; (** Hour (0-23) *) 119 + minute : int; (** Minute (0-59) *) 120 + second : int; (** Second (0-59, 60 for leap seconds) *) 121 + frac : float; (** Fractional seconds [0.0, 1.0) *) 122 + } 123 + 124 + val make : hour:int -> minute:int -> second:int -> ?frac:float -> unit -> t 125 + (** [make ~hour ~minute ~second ?frac ()] creates a time value. 126 + [frac] defaults to [0.0]. *) 127 + 128 + val equal : t -> t -> bool 129 + val compare : t -> t -> int 130 + val to_string : t -> string 131 + (** [to_string t] formats as ["HH:MM:SS"] or ["HH:MM:SS.fff"]. *) 132 + 133 + val pp : Format.formatter -> t -> unit 134 + val of_string : string -> (t, string) result 135 + end 136 + 137 + (** Offset datetimes (date + time + timezone). 138 + 139 + The complete datetime format per RFC 3339, like 140 + [1979-05-27T07:32:00Z] or [1979-05-27T07:32:00-07:00]. *) 141 + module Datetime : sig 142 + type t = { date : Date.t; time : Time.t; tz : Tz.t } 143 + 144 + val make : date:Date.t -> time:Time.t -> tz:Tz.t -> t 145 + val equal : t -> t -> bool 146 + val compare : t -> t -> int 147 + val to_string : t -> string 148 + val pp : Format.formatter -> t -> unit 149 + val of_string : string -> (t, string) result 150 + end 151 + 152 + (** Local datetimes (date + time, no timezone). 153 + 154 + Like [1979-05-27T07:32:00] - a datetime with no timezone 155 + information, representing "wall clock" time. *) 156 + module Datetime_local : sig 157 + type t = { date : Date.t; time : Time.t } 158 + 159 + val make : date:Date.t -> time:Time.t -> t 160 + val equal : t -> t -> bool 161 + val compare : t -> t -> int 162 + val to_string : t -> string 163 + val pp : Format.formatter -> t -> unit 164 + val of_string : string -> (t, string) result 165 + end 166 + 167 + (** {1:codec Codec Types} *) 168 + 169 + (** Errors that can occur during codec operations. *) 170 + type codec_error = 171 + | Type_mismatch of { expected : string; got : string } 172 + (** TOML value was not the expected type *) 173 + | Missing_member of string 174 + (** Required table member was not present *) 175 + | Unknown_member of string 176 + (** Unknown member found (when using [error_unknown]) *) 177 + | Value_error of string 178 + (** Value failed validation or parsing *) 179 + | Int_overflow of int64 180 + (** Integer value exceeds OCaml [int] range *) 181 + | Parse_error of string 182 + (** Parsing failed *) 183 + 184 + val codec_error_to_string : codec_error -> string 185 + (** [codec_error_to_string e] returns a human-readable error message. *) 186 + 187 + (** The type of TOML codecs. 116 188 117 - val date_local : string -> t 118 - (** [date_local s] creates a local date value. 119 - E.g. ["1979-05-27"]. *) 189 + A value of type ['a t] can decode TOML values to type ['a] 190 + and encode values of type ['a] to TOML. *) 191 + type 'a t 120 192 121 - val time_local : string -> t 122 - (** [time_local s] creates a local time value. 123 - E.g. ["07:32:00"] or ["07:32:00.999"]. *) 193 + val kind : 'a t -> string 194 + (** [kind c] returns the kind description of codec [c]. *) 124 195 125 - (** {1:access Value Accessors} 196 + val doc : 'a t -> string 197 + (** [doc c] returns the documentation string of codec [c]. *) 126 198 127 - These functions extract OCaml values from TOML values. 128 - They raise [Invalid_argument] if the value is not of the expected type. *) 199 + val with_doc : ?kind:string -> ?doc:string -> 'a t -> 'a t 200 + (** [with_doc ?kind ?doc c] returns a codec with updated metadata. *) 129 201 130 - val to_string : t -> string 131 - (** [to_string t] returns the string if [t] is a [String]. 132 - @raise Invalid_argument if [t] is not a string. *) 202 + (** {1:base Base Type Codecs} 133 203 134 - val to_string_opt : t -> string option 135 - (** [to_string_opt t] returns [Some s] if [t] is [String s], [None] otherwise. *) 204 + Primitive codecs for TOML's basic value types. *) 136 205 137 - val to_int : t -> int64 138 - (** [to_int t] returns the integer if [t] is an [Int]. 139 - @raise Invalid_argument if [t] is not an integer. *) 206 + val bool : bool t 207 + (** Codec for TOML booleans. *) 140 208 141 - val to_int_opt : t -> int64 option 142 - (** [to_int_opt t] returns [Some i] if [t] is [Int i], [None] otherwise. *) 209 + val int : int t 210 + (** Codec for TOML integers to OCaml [int]. 211 + @raise Int_overflow if the value exceeds platform [int] range. *) 143 212 144 - val to_float : t -> float 145 - (** [to_float t] returns the float if [t] is a [Float]. 146 - @raise Invalid_argument if [t] is not a float. *) 213 + val int32 : int32 t 214 + (** Codec for TOML integers to [int32]. *) 147 215 148 - val to_float_opt : t -> float option 149 - (** [to_float_opt t] returns [Some f] if [t] is [Float f], [None] otherwise. *) 216 + val int64 : int64 t 217 + (** Codec for TOML integers to [int64]. *) 218 + 219 + val float : float t 220 + (** Codec for TOML floats. Handles [inf], [-inf], and [nan]. *) 221 + 222 + val number : float t 223 + (** Codec that accepts both TOML integers and floats as [float]. 224 + Integers are converted to floats during decoding. *) 225 + 226 + val string : string t 227 + (** Codec for TOML strings (UTF-8 encoded). *) 150 228 151 - val to_bool : t -> bool 152 - (** [to_bool t] returns the boolean if [t] is a [Bool]. 153 - @raise Invalid_argument if [t] is not a boolean. *) 229 + val datetime : Datetime.t t 230 + (** Codec for offset datetimes like [1979-05-27T07:32:00Z]. *) 154 231 155 - val to_bool_opt : t -> bool option 156 - (** [to_bool_opt t] returns [Some b] if [t] is [Bool b], [None] otherwise. *) 232 + val datetime_local : Datetime_local.t t 233 + (** Codec for local datetimes like [1979-05-27T07:32:00]. *) 157 234 158 - val to_array : t -> t list 159 - (** [to_array t] returns the list if [t] is an [Array]. 160 - @raise Invalid_argument if [t] is not an array. *) 235 + val date_local : Date.t t 236 + (** Codec for local dates like [1979-05-27]. *) 161 237 162 - val to_array_opt : t -> t list option 163 - (** [to_array_opt t] returns [Some vs] if [t] is [Array vs], [None] otherwise. *) 238 + val time_local : Time.t t 239 + (** Codec for local times like [07:32:00]. *) 164 240 165 - val to_table : t -> (string * t) list 166 - (** [to_table t] returns the association list if [t] is a [Table]. 167 - @raise Invalid_argument if [t] is not a table. *) 241 + val datetime_string : string t 242 + (** Codec for any datetime type as a raw string. 243 + Decodes any datetime variant; encodes as offset datetime. *) 168 244 169 - val to_table_opt : t -> (string * t) list option 170 - (** [to_table_opt t] returns [Some pairs] if [t] is [Table pairs], [None] otherwise. *) 245 + (** {1:combinators Codec Combinators} *) 171 246 172 - val to_datetime : t -> string 173 - (** [to_datetime t] returns the datetime string for any datetime type. 174 - @raise Invalid_argument if [t] is not a datetime variant. *) 247 + val map : 248 + ?kind:string -> ?doc:string -> 249 + ?dec:('a -> 'b) -> ?enc:('b -> 'a) -> 250 + 'a t -> 'b t 251 + (** [map ?dec ?enc c] transforms codec [c] through functions. 252 + [dec] transforms decoded values; [enc] transforms values before encoding. *) 175 253 176 - val to_datetime_opt : t -> string option 177 - (** [to_datetime_opt t] returns [Some s] if [t] is any datetime variant. *) 254 + val const : ?kind:string -> ?doc:string -> 'a -> 'a t 255 + (** [const v] is a codec that always decodes to [v] and encodes as empty. *) 178 256 179 - (** {2 Type Predicates} *) 257 + val enum : ?cmp:('a -> 'a -> int) -> ?kind:string -> ?doc:string -> 258 + (string * 'a) list -> 'a t 259 + (** [enum assoc] creates a codec for string enumerations. 260 + @param cmp Comparison function for finding values during encoding. 261 + @param assoc List of [(string, value)] pairs. *) 180 262 181 - val is_string : t -> bool 182 - (** [is_string t] is [true] iff [t] is a [String]. *) 263 + val option : ?kind:string -> ?doc:string -> 'a t -> 'a option t 264 + (** [option c] wraps codec [c] to decode [Some v] or encode [None] as omitted. *) 183 265 184 - val is_int : t -> bool 185 - (** [is_int t] is [true] iff [t] is an [Int]. *) 266 + val result : ok:'a t -> error:'b t -> ('a, 'b) result t 267 + (** [result ~ok ~error] tries [ok] first, then [error]. *) 186 268 187 - val is_float : t -> bool 188 - (** [is_float t] is [true] iff [t] is a [Float]. *) 269 + val rec' : 'a t Lazy.t -> 'a t 270 + (** [rec' lazy_c] creates a recursive codec. 271 + Use for self-referential types: 272 + {v 273 + let rec tree = lazy Tomlt.( 274 + Table.(obj (fun v children -> Node (v, children)) 275 + |> mem "value" int ~enc:(function Node (v, _) -> v) 276 + |> mem "children" (list (rec' tree)) ~enc:(function Node (_, cs) -> cs) 277 + |> finish)) 278 + v} *) 189 279 190 - val is_bool : t -> bool 191 - (** [is_bool t] is [true] iff [t] is a [Bool]. *) 280 + (** {1:arrays Array Codecs} 192 281 193 - val is_array : t -> bool 194 - (** [is_array t] is [true] iff [t] is an [Array]. *) 282 + Build codecs for TOML arrays. *) 195 283 196 - val is_table : t -> bool 197 - (** [is_table t] is [true] iff [t] is a [Table]. *) 284 + module Array : sig 285 + type 'a codec = 'a t 198 286 199 - val is_datetime : t -> bool 200 - (** [is_datetime t] is [true] iff [t] is any datetime variant. *) 287 + (** Encoder specification for arrays. *) 288 + type ('array, 'elt) enc = { 289 + fold : 'acc. ('acc -> 'elt -> 'acc) -> 'acc -> 'array -> 'acc 290 + } 201 291 202 - (** {1:navigate Table Navigation} 292 + (** Array codec builder. *) 293 + type ('array, 'elt, 'builder) map 203 294 204 - Functions for navigating and querying TOML tables. *) 295 + val map : 296 + ?kind:string -> ?doc:string -> 297 + ?dec_empty:(unit -> 'builder) -> 298 + ?dec_add:('elt -> 'builder -> 'builder) -> 299 + ?dec_finish:('builder -> 'array) -> 300 + ?enc:('array, 'elt) enc -> 301 + 'elt codec -> ('array, 'elt, 'builder) map 302 + (** [map elt] creates an array codec builder for elements of type ['elt]. *) 205 303 206 - val find : string -> t -> t 207 - (** [find key t] returns the value associated with [key] in table [t]. 208 - @raise Invalid_argument if [t] is not a table. 209 - @raise Not_found if [key] is not in the table. *) 304 + val list : ?kind:string -> ?doc:string -> 'a codec -> ('a list, 'a, 'a list) map 305 + (** [list c] builds lists from arrays of elements decoded by [c]. *) 210 306 211 - val find_opt : string -> t -> t option 212 - (** [find_opt key t] returns [Some v] if [key] maps to [v] in table [t], 213 - or [None] if [key] is not bound or [t] is not a table. *) 307 + val array : ?kind:string -> ?doc:string -> 'a codec -> ('a array, 'a, 'a list) map 308 + (** [array c] builds arrays from arrays of elements decoded by [c]. *) 214 309 215 - val mem : string -> t -> bool 216 - (** [mem key t] is [true] if [key] is bound in table [t], [false] otherwise. 217 - Returns [false] if [t] is not a table. *) 310 + val finish : ('array, 'elt, 'builder) map -> 'array codec 311 + (** [finish m] completes the array codec. *) 312 + end 218 313 219 - val keys : t -> string list 220 - (** [keys t] returns all keys in table [t]. 221 - @raise Invalid_argument if [t] is not a table. *) 314 + val list : ?kind:string -> ?doc:string -> 'a t -> 'a list t 315 + (** [list c] is a codec for TOML arrays as OCaml lists. *) 222 316 223 - val get : string list -> t -> t 224 - (** [get path t] navigates through nested tables following [path]. 225 - For example, [get ["server"; "port"] t] returns [t.server.port]. 226 - @raise Invalid_argument if any intermediate value is not a table. 227 - @raise Not_found if any key in [path] is not found. *) 317 + val array : ?kind:string -> ?doc:string -> 'a t -> 'a array t 318 + (** [array c] is a codec for TOML arrays as OCaml arrays. *) 228 319 229 - val get_opt : string list -> t -> t option 230 - (** [get_opt path t] is like [get] but returns [None] on any error. *) 320 + (** {1:tables Table Codecs} 231 321 232 - val ( .%{} ) : t -> string list -> t 233 - (** [t.%{path}] is [get path t]. 322 + Build codecs for TOML tables (objects). The applicative-style 323 + builder pattern allows defining bidirectional codecs declaratively. 234 324 235 - Example: [config.%{["database"; "port"]}] 325 + {2 Basic Usage} 236 326 237 - @raise Invalid_argument if any intermediate value is not a table. 238 - @raise Not_found if any key in the path is not found. *) 327 + {v 328 + type person = { name : string; age : int } 239 329 240 - val ( .%{}<- ) : t -> string list -> t -> t 241 - (** [t.%{path} <- v] returns a new table with value [v] at [path]. 242 - Creates intermediate tables as needed. 330 + let person_codec = Tomlt.Table.( 331 + obj (fun name age -> { name; age }) 332 + |> mem "name" Tomlt.string ~enc:(fun p -> p.name) 333 + |> mem "age" Tomlt.int ~enc:(fun p -> p.age) 334 + |> finish 335 + ) 336 + v} *) 243 337 244 - Example: [config.%{["server"; "host"]} <- string "localhost"] 338 + module Table : sig 339 + type 'a codec = 'a t 245 340 246 - @raise Invalid_argument if [t] is not a table or if an intermediate 247 - value exists but is not a table. *) 341 + (** {2 Member Specifications} *) 248 342 249 - (** {1:decode Decoding (Parsing)} 343 + module Mem : sig 344 + type 'a codec = 'a t 345 + type ('o, 'a) t 346 + (** A member specification for type ['a] within object type ['o]. *) 250 347 251 - Parse TOML from various sources. *) 348 + val v : 349 + ?doc:string -> 350 + ?dec_absent:'a -> 351 + ?enc:('o -> 'a) -> 352 + ?enc_omit:('a -> bool) -> 353 + string -> 'a codec -> ('o, 'a) t 354 + (** [v name codec] creates a member specification. 355 + @param doc Documentation for this member. 356 + @param dec_absent Default value if member is absent (makes it optional). 357 + @param enc Encoder function from object to member value. 358 + @param enc_omit Predicate to omit member during encoding. *) 252 359 253 - val of_string : string -> (t, Tomlt_error.t) result 254 - (** [of_string s] parses [s] as a TOML document. *) 360 + val opt : 361 + ?doc:string -> 362 + ?enc:('o -> 'a option) -> 363 + string -> 'a codec -> ('o, 'a option) t 364 + (** [opt name codec] creates an optional member that decodes to [None] 365 + when absent and is omitted when encoding [None]. *) 366 + end 255 367 256 - val of_reader : ?file:string -> Bytes.Reader.t -> (t, Tomlt_error.t) result 257 - (** [of_reader r] parses a TOML document from reader [r]. 258 - @param file Optional filename for error messages. *) 368 + (** {2 Table Builder} *) 259 369 260 - val parse : string -> t 261 - (** [parse s] parses [s] as a TOML document. 262 - @raise Error.Error on parse errors. *) 370 + type ('o, 'dec) map 371 + (** Builder state for a table codec producing ['o], currently decoding ['dec]. *) 263 372 264 - val parse_reader : ?file:string -> Bytes.Reader.t -> t 265 - (** [parse_reader r] parses a TOML document from reader [r]. 266 - @param file Optional filename for error messages. 267 - @raise Error.Error on parse errors. *) 373 + val obj : ?kind:string -> ?doc:string -> 'dec -> ('o, 'dec) map 374 + (** [obj f] starts building a table codec with decoder function [f]. 268 375 269 - (** {1:encode Encoding} 376 + The function [f] receives each member's decoded value as arguments 377 + and returns the final decoded object. Build incrementally with [mem]: 378 + {v 379 + obj (fun a b c -> { a; b; c }) 380 + |> mem "a" codec_a ~enc:... 381 + |> mem "b" codec_b ~enc:... 382 + |> mem "c" codec_c ~enc:... 383 + |> finish 384 + v} *) 270 385 271 - Encode TOML values to various outputs. *) 386 + val obj' : ?kind:string -> ?doc:string -> (unit -> 'dec) -> ('o, 'dec) map 387 + (** [obj' f] is like [obj] but [f] is a thunk for side-effecting decoders. *) 272 388 273 - val to_toml_string : t -> string 274 - (** [to_toml_string t] encodes [t] as a TOML document string. 275 - @raise Invalid_argument if [t] is not a [Table]. *) 389 + val mem : 390 + ?doc:string -> 391 + ?dec_absent:'a -> 392 + ?enc:('o -> 'a) -> 393 + ?enc_omit:('a -> bool) -> 394 + string -> 'a codec -> ('o, 'a -> 'dec) map -> ('o, 'dec) map 395 + (** [mem name codec m] adds a member to the table builder. 276 396 277 - val to_buffer : Buffer.t -> t -> unit 278 - (** [to_buffer buf t] writes [t] as TOML to buffer [buf]. 279 - @raise Invalid_argument if [t] is not a [Table]. *) 397 + @param name The TOML key name. 398 + @param codec The codec for the member's value. 399 + @param doc Documentation string. 400 + @param dec_absent Default value if absent (makes member optional). 401 + @param enc Extractor function for encoding. 402 + @param enc_omit Predicate; if [true], omit member during encoding. *) 280 403 281 - val to_writer : Bytes.Writer.t -> t -> unit 282 - (** [to_writer w t] writes [t] as TOML to writer [w]. 283 - Useful for streaming output without building the full string in memory. 284 - @raise Invalid_argument if [t] is not a [Table]. *) 404 + val opt_mem : 405 + ?doc:string -> 406 + ?enc:('o -> 'a option) -> 407 + string -> 'a codec -> ('o, 'a option -> 'dec) map -> ('o, 'dec) map 408 + (** [opt_mem name codec m] adds an optional member. 409 + Absent members decode as [None]; [None] values are omitted on encode. *) 285 410 286 - (** {1:pp Pretty Printing} *) 411 + (** {2 Unknown Member Handling} *) 287 412 288 - val pp : Format.formatter -> t -> unit 289 - (** [pp fmt t] pretty-prints [t] in TOML format. *) 413 + val skip_unknown : ('o, 'dec) map -> ('o, 'dec) map 414 + (** [skip_unknown m] ignores unknown members (the default). *) 290 415 291 - val pp_value : Format.formatter -> t -> unit 292 - (** [pp_value fmt t] pretty-prints a single TOML value (not a full document). 293 - Useful for debugging. Tables are printed as inline tables. *) 416 + val error_unknown : ('o, 'dec) map -> ('o, 'dec) map 417 + (** [error_unknown m] raises an error on unknown members. *) 294 418 295 - val equal : t -> t -> bool 296 - (** [equal a b] is structural equality on TOML values. 297 - NaN floats are considered equal to each other. *) 419 + (** Collection of unknown members. *) 420 + module Mems : sig 421 + type 'a codec = 'a t 298 422 299 - val compare : t -> t -> int 300 - (** [compare a b] is a total ordering on TOML values. *) 423 + type ('mems, 'a) enc = { 424 + fold : 'acc. ('acc -> string -> 'a -> 'acc) -> 'acc -> 'mems -> 'acc 425 + } 301 426 302 - (** {1:errors Error Handling} *) 427 + type ('mems, 'a, 'builder) map 303 428 304 - module Error = Tomlt_error 305 - (** Structured error types for TOML parsing and encoding. 429 + val map : 430 + ?kind:string -> ?doc:string -> 431 + ?dec_empty:(unit -> 'builder) -> 432 + ?dec_add:(string -> 'a -> 'builder -> 'builder) -> 433 + ?dec_finish:('builder -> 'mems) -> 434 + ?enc:('mems, 'a) enc -> 435 + 'a codec -> ('mems, 'a, 'builder) map 306 436 307 - See {!Tomlt_error} for detailed documentation. *) 437 + val string_map : ?kind:string -> ?doc:string -> 438 + 'a codec -> ('a Map.Make(String).t, 'a, (string * 'a) list) map 439 + (** [string_map codec] collects unknown members into a [StringMap]. *) 308 440 309 - (** {1:internal Internal} 441 + val assoc : ?kind:string -> ?doc:string -> 442 + 'a codec -> ((string * 'a) list, 'a, (string * 'a) list) map 443 + (** [assoc codec] collects unknown members into an association list. *) 444 + end 310 445 311 - These functions are primarily for testing and interoperability. 312 - They may change between versions. *) 446 + val keep_unknown : 447 + ?enc:('o -> 'mems) -> 448 + ('mems, 'a, 'builder) Mems.map -> 449 + ('o, 'mems -> 'dec) map -> ('o, 'dec) map 450 + (** [keep_unknown mems m] collects unknown members. 313 451 314 - module Internal : sig 315 - val to_tagged_json : t -> string 316 - (** Convert TOML value to tagged JSON format used by toml-test. *) 452 + Unknown members are decoded using [mems] and passed to the decoder. 453 + If [enc] is provided, those members are included during encoding. *) 317 454 318 - val of_tagged_json : string -> t 319 - (** Parse tagged JSON format into TOML value. *) 455 + val finish : ('o, 'o) map -> 'o codec 456 + (** [finish m] completes the table codec. 457 + @raise Invalid_argument if member names are duplicated. *) 320 458 321 - val encode_from_tagged_json : string -> (string, string) result 322 - (** Convert tagged JSON to TOML string. For toml-test encoder. *) 459 + val inline : ('o, 'o) map -> 'o codec 460 + (** [inline m] is like [finish] but marks the table for inline encoding. *) 323 461 end 462 + 463 + val array_of_tables : ?kind:string -> ?doc:string -> 'a t -> 'a list t 464 + (** [array_of_tables c] decodes a TOML array of tables. 465 + This corresponds to TOML's [[[ ]]] syntax. *) 466 + 467 + (** {1 Generic Value Codecs} *) 468 + 469 + val value : Toml.t t 470 + (** [value] passes TOML values through unchanged. *) 471 + 472 + val value_mems : (string * Toml.t) list t 473 + (** [value_mems] decodes a table as raw key-value pairs. *) 474 + 475 + val any : 476 + ?kind:string -> ?doc:string -> 477 + ?dec_string:'a t -> ?dec_int:'a t -> ?dec_float:'a t -> ?dec_bool:'a t -> 478 + ?dec_datetime:'a t -> ?dec_array:'a t -> ?dec_table:'a t -> 479 + ?enc:('a -> 'a t) -> 480 + unit -> 'a t 481 + (** [any ()] creates a codec that handles any TOML type. 482 + Provide decoders for each type you want to support. 483 + The [enc] function should return the appropriate codec for encoding. *) 484 + 485 + (** {1:codec_ops Encoding and Decoding} *) 486 + 487 + val decode : 'a t -> Toml.t -> ('a, Toml.Error.t) result 488 + (** [decode c v] decodes TOML value [v] using codec [c]. *) 489 + 490 + val decode_exn : 'a t -> Toml.t -> 'a 491 + (** [decode_exn c v] is like [decode] but raises on error. 492 + @raise Toml.Error.Error on decode failure. *) 493 + 494 + val encode : 'a t -> 'a -> Toml.t 495 + (** [encode c v] encodes OCaml value [v] to TOML using codec [c]. *) 496 + 497 + val decode_string : 'a t -> string -> ('a, Toml.Error.t) result 498 + (** [decode_string c s] parses TOML string [s] and decodes with [c]. *) 499 + 500 + val decode_string_exn : 'a t -> string -> 'a 501 + (** [decode_string_exn c s] is like [decode_string] but raises on error. *) 502 + 503 + val encode_string : 'a t -> 'a -> string 504 + (** [encode_string c v] encodes [v] to a TOML-formatted string. *) 505 + 506 + val decode_reader : ?file:string -> 'a t -> Bytesrw.Bytes.Reader.t -> 507 + ('a, Toml.Error.t) result 508 + (** [decode_reader c r] parses TOML from reader [r] and decodes with [c]. 509 + @param file Optional filename for error messages. *) 510 + 511 + val encode_writer : 'a t -> 'a -> Bytesrw.Bytes.Writer.t -> unit 512 + (** [encode_writer c v w] encodes [v] and writes TOML to writer [w]. *) 513 + 514 + (** {1 Re-exported Modules} *) 515 + 516 + module Toml = Toml 517 + (** The raw TOML value module. Use for low-level TOML manipulation. *) 518 + 519 + module Error = Toml.Error 520 + (** Error types from the TOML parser. *)
lib/tomlt_error.ml lib/toml_error.ml
lib/tomlt_error.mli lib/toml_error.mli
+3 -3
lib_eio/tomlt_eio.ml
··· 3 3 SPDX-License-Identifier: ISC 4 4 ---------------------------------------------------------------------------*) 5 5 6 - module Error = Tomlt.Error 6 + module Error = Tomlt.Toml.Error 7 7 8 8 type Eio.Exn.err += E of Error.t 9 9 ··· 23 23 raise (err e) 24 24 25 25 let parse ?file input = 26 - try Tomlt.parse input 26 + try Tomlt.Toml.parse input 27 27 with Error.Error e -> 28 28 let bt = Printexc.get_raw_backtrace () in 29 29 let eio_exn = err e in ··· 43 43 |> parse ~file 44 44 45 45 let to_flow flow value = 46 - let output = Tomlt.to_toml_string value in 46 + let output = Tomlt.Toml.to_toml_string value in 47 47 Eio.Flow.copy_string output flow
+7 -7
lib_eio/tomlt_eio.mli
··· 18 18 19 19 (** {1 Eio Exception Integration} *) 20 20 21 - type Eio.Exn.err += E of Tomlt.Error.t 21 + type Eio.Exn.err += E of Tomlt.Toml.Error.t 22 22 (** TOML errors as Eio errors. *) 23 23 24 - val err : Tomlt.Error.t -> exn 24 + val err : Tomlt.Toml.Error.t -> exn 25 25 (** [err e] creates an [Eio.Io] exception from TOML error [e]. *) 26 26 27 27 val wrap_error : (unit -> 'a) -> 'a 28 - (** [wrap_error f] runs [f] and converts [Tomlt.Error.Error] to [Eio.Io]. *) 28 + (** [wrap_error f] runs [f] and converts [Tomlt.Toml.Error.Error] to [Eio.Io]. *) 29 29 30 30 (** {1 Parsing with Eio} *) 31 31 32 - val parse : ?file:string -> string -> Tomlt.t 32 + val parse : ?file:string -> string -> Tomlt.Toml.t 33 33 (** [parse s] parses TOML string [s] with Eio error handling. 34 34 @param file optional filename for error context. 35 35 @raise Eio.Io on parse errors. *) 36 36 37 - val of_flow : ?file:string -> _ Eio.Flow.source -> Tomlt.t 37 + val of_flow : ?file:string -> _ Eio.Flow.source -> Tomlt.Toml.t 38 38 (** [of_flow flow] reads and parses TOML from an Eio flow. 39 39 @param file optional filename for error context. 40 40 @raise Eio.Io on read or parse errors. *) 41 41 42 - val of_path : fs:_ Eio.Path.t -> string -> Tomlt.t 42 + val of_path : fs:_ Eio.Path.t -> string -> Tomlt.Toml.t 43 43 (** [of_path ~fs path] reads and parses TOML from a file path. 44 44 @raise Eio.Io on file or parse errors. *) 45 45 46 46 (** {1 Encoding with Eio} *) 47 47 48 - val to_flow : _ Eio.Flow.sink -> Tomlt.t -> unit 48 + val to_flow : _ Eio.Flow.sink -> Tomlt.Toml.t -> unit 49 49 (** [to_flow flow t] writes TOML value [t] to an Eio flow. 50 50 @raise Invalid_argument if [t] is not a table. *)
+4
lib_jsont/dune
··· 1 + (library 2 + (name tomlt_jsont) 3 + (public_name tomlt-jsont) 4 + (libraries tomlt jsont jsont.bytesrw))
+193
lib_jsont/tomlt_jsont.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Jsont codecs for TOML tagged JSON format. 7 + 8 + This module provides bidirectional codecs between TOML values and 9 + the tagged JSON format used by {{:https://github.com/toml-lang/toml-test} 10 + toml-test}. *) 11 + 12 + module Toml = Tomlt.Toml 13 + module String_map = Map.Make(String) 14 + 15 + (* The tagged JSON format wraps scalar values as {"type": "T", "value": "V"} 16 + while arrays and objects are passed through with their contents recursively 17 + encoded. *) 18 + 19 + (* Encode TOML -> JSON (string representation) using Tomlt's existing encoder *) 20 + let encode (v : Toml.t) : string = 21 + Toml.Tagged_json.encode v 22 + 23 + (* Decode JSON (string) -> TOML using Tomlt's existing decoder *) 24 + let decode (s : string) : Toml.t = 25 + Toml.Tagged_json.decode s 26 + 27 + (* Convenience result-based decode *) 28 + let decode_result (s : string) : (Toml.t, string) result = 29 + try Ok (decode s) 30 + with Failure msg -> Error msg 31 + 32 + (* Tagged value type for scalar types *) 33 + type tagged_value = { 34 + typ : string; 35 + value : string; 36 + } 37 + 38 + (* Convert tagged value to TOML *) 39 + let tagged_to_toml (t : tagged_value) : Toml.t = 40 + match t.typ with 41 + | "string" -> Toml.String t.value 42 + | "integer" -> Toml.Int (Int64.of_string t.value) 43 + | "float" -> 44 + let f = 45 + match t.value with 46 + | "nan" -> Float.nan 47 + | "inf" | "+inf" -> Float.infinity 48 + | "-inf" -> Float.neg_infinity 49 + | s -> float_of_string s 50 + in 51 + Toml.Float f 52 + | "bool" -> Toml.Bool (t.value = "true") 53 + | "datetime" -> Toml.Datetime t.value 54 + | "datetime-local" -> Toml.Datetime_local t.value 55 + | "date-local" -> Toml.Date_local t.value 56 + | "time-local" -> Toml.Time_local t.value 57 + | typ -> failwith ("Unknown tagged type: " ^ typ) 58 + 59 + (* Convert TOML scalar to tagged value *) 60 + let toml_to_tagged (v : Toml.t) : tagged_value = 61 + match v with 62 + | Toml.String s -> { typ = "string"; value = s } 63 + | Toml.Int i -> { typ = "integer"; value = Int64.to_string i } 64 + | Toml.Float f -> 65 + let value = 66 + if Float.is_nan f then "nan" 67 + else if f = Float.infinity then "inf" 68 + else if f = Float.neg_infinity then "-inf" 69 + else if f = 0.0 && 1.0 /. f = Float.neg_infinity then "-0" 70 + else Printf.sprintf "%g" f 71 + in 72 + { typ = "float"; value } 73 + | Toml.Bool b -> { typ = "bool"; value = if b then "true" else "false" } 74 + | Toml.Datetime s -> { typ = "datetime"; value = s } 75 + | Toml.Datetime_local s -> { typ = "datetime-local"; value = s } 76 + | Toml.Date_local s -> { typ = "date-local"; value = s } 77 + | Toml.Time_local s -> { typ = "time-local"; value = s } 78 + | Toml.Array _ | Toml.Table _ -> 79 + failwith "Cannot convert non-scalar TOML value to tagged value" 80 + 81 + (* Jsont codec for tagged values (scalars only) *) 82 + let tagged_jsont : tagged_value Jsont.t = 83 + Jsont.Object.( 84 + map (fun typ value -> { typ; value }) 85 + |> mem "type" Jsont.string ~enc:(fun t -> t.typ) 86 + |> mem "value" Jsont.string ~enc:(fun t -> t.value) 87 + |> finish 88 + ) 89 + 90 + (* The main recursive TOML value codec. 91 + 92 + This is a bit tricky because: 93 + - When decoding an object, we need to determine if it's a tagged scalar 94 + (has "type" and "value" keys) or a table (keys map to tagged values) 95 + - When encoding, scalars become {"type": ..., "value": ...}, arrays become 96 + [...], and tables become {"key": <tagged>, ...} 97 + *) 98 + 99 + let rec toml_jsont : Toml.t Jsont.t Lazy.t = lazy ( 100 + Jsont.any 101 + ~dec_array:(Lazy.force toml_array) 102 + ~dec_object:(Lazy.force toml_object) 103 + ~enc:(fun v -> 104 + match v with 105 + | Toml.Array _ -> Lazy.force toml_array 106 + | Toml.Table _ -> Lazy.force toml_table_enc 107 + | _ -> Lazy.force toml_scalar_enc) 108 + () 109 + ) 110 + 111 + and toml_array : Toml.t Jsont.t Lazy.t = lazy ( 112 + Jsont.map 113 + ~dec:(fun items -> Toml.Array items) 114 + ~enc:(function 115 + | Toml.Array items -> items 116 + | _ -> failwith "Expected array") 117 + (Jsont.list (Jsont.rec' toml_jsont)) 118 + ) 119 + 120 + and toml_object : Toml.t Jsont.t Lazy.t = lazy ( 121 + (* Try to decode as tagged scalar first, fall back to table *) 122 + Jsont.Object.( 123 + map (fun typ_opt value_opt rest -> 124 + match typ_opt, value_opt with 125 + | Some typ, Some value when String_map.is_empty rest -> 126 + (* Tagged scalar value *) 127 + tagged_to_toml { typ; value } 128 + | _ -> 129 + (* Regular table - include type/value if present but not a valid tagged pair *) 130 + let pairs = String_map.bindings rest in 131 + let pairs = 132 + match typ_opt with 133 + | Some typ -> 134 + let typ_toml = Toml.String typ in 135 + ("type", typ_toml) :: pairs 136 + | None -> pairs 137 + in 138 + let pairs = 139 + match value_opt with 140 + | Some value -> 141 + let value_toml = Toml.String value in 142 + ("value", value_toml) :: pairs 143 + | None -> pairs 144 + in 145 + Toml.Table pairs) 146 + |> opt_mem "type" Jsont.string ~enc:(fun _ -> None) 147 + |> opt_mem "value" Jsont.string ~enc:(fun _ -> None) 148 + |> keep_unknown 149 + (Mems.string_map (Jsont.rec' toml_jsont)) 150 + ~enc:(fun _ -> String_map.empty) (* Encoding handled by toml_table_enc *) 151 + |> finish 152 + ) 153 + ) 154 + 155 + and toml_scalar_enc : Toml.t Jsont.t Lazy.t = lazy ( 156 + Jsont.map 157 + ~dec:(fun t -> tagged_to_toml t) 158 + ~enc:toml_to_tagged 159 + tagged_jsont 160 + ) 161 + 162 + and toml_table_enc : Toml.t Jsont.t Lazy.t = lazy ( 163 + Jsont.Object.( 164 + map (fun m -> Toml.Table (String_map.bindings m)) 165 + |> keep_unknown 166 + (Mems.string_map (Jsont.rec' toml_jsont)) 167 + ~enc:(function 168 + | Toml.Table pairs -> 169 + List.fold_left (fun m (k, v) -> String_map.add k v m) 170 + String_map.empty pairs 171 + | _ -> failwith "Expected table") 172 + |> finish 173 + ) 174 + ) 175 + 176 + (* Main codec *) 177 + let toml : Toml.t Jsont.t = Jsont.rec' toml_jsont 178 + 179 + (* Convenience functions using jsont *) 180 + 181 + let encode_jsont (v : Toml.t) : (string, string) result = 182 + Jsont_bytesrw.encode_string toml v 183 + 184 + let decode_jsont (s : string) : (Toml.t, string) result = 185 + Jsont_bytesrw.decode_string toml s 186 + 187 + let decode_jsont' (s : string) : (Toml.t, Jsont.Error.t) result = 188 + Jsont_bytesrw.decode_string' toml s 189 + 190 + let decode_jsont_exn (s : string) : Toml.t = 191 + match decode_jsont' s with 192 + | Ok v -> v 193 + | Error e -> raise (Jsont.Error e)
+115
lib_jsont/tomlt_jsont.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Jsont codecs for TOML tagged JSON format. 7 + 8 + This module provides bidirectional codecs between TOML values and 9 + the tagged JSON format used by {{:https://github.com/toml-lang/toml-test} 10 + toml-test}. 11 + 12 + {2 Tagged JSON Format} 13 + 14 + The toml-test suite uses a "tagged JSON" format where each TOML value 15 + is represented as a JSON object with type information: 16 + - Scalars: [{"type": "string", "value": "hello"}] 17 + - Arrays: [[tagged_value, ...]] 18 + - Tables: [{"key": tagged_value, ...}] 19 + 20 + {2 Quick Start} 21 + 22 + Using the native encoder (recommended for compatibility): 23 + {v 24 + let json = Tomlt_jsont.encode toml_value 25 + let toml = Tomlt_jsont.decode json_string 26 + v} 27 + 28 + Using jsont codecs (for integration with jsont pipelines): 29 + {v 30 + let json = Tomlt_jsont.encode_jsont toml_value 31 + let toml = Tomlt_jsont.decode_jsont json_string 32 + v} 33 + 34 + {2 Module Overview} 35 + 36 + - {!section:native} - Native encode/decode using Tomlt.Toml.Tagged_json 37 + - {!section:jsont} - Jsont codec for tagged JSON format 38 + - {!section:conv} - Convenience functions *) 39 + 40 + module Toml = Tomlt.Toml 41 + (** Re-exported TOML module for convenience. *) 42 + 43 + (** {1:native Native Encode/Decode} 44 + 45 + These functions use Tomlt's built-in tagged JSON encoder/decoder, 46 + which is highly optimized for the toml-test format. *) 47 + 48 + val encode : Toml.t -> string 49 + (** [encode v] encodes TOML value [v] to tagged JSON format. 50 + This uses [Toml.Tagged_json.encode] directly. *) 51 + 52 + val decode : string -> Toml.t 53 + (** [decode s] decodes tagged JSON string [s] to a TOML value. 54 + This uses [Toml.Tagged_json.decode] directly. 55 + @raise Failure on malformed JSON or unknown types. *) 56 + 57 + val decode_result : string -> (Toml.t, string) result 58 + (** [decode_result s] is like [decode] but returns a result. *) 59 + 60 + (** {1:jsont Jsont Codec} 61 + 62 + The [toml] codec provides a jsont-based implementation of the 63 + tagged JSON format. This allows integration with jsont pipelines 64 + and other jsont-based tooling. *) 65 + 66 + val toml : Toml.t Jsont.t 67 + (** [toml] is a jsont codec for TOML values in tagged JSON format. 68 + 69 + This codec can decode and encode the tagged JSON format used by 70 + toml-test. On decode, it distinguishes between: 71 + - Tagged scalars: [{"type": "T", "value": "V"}] (exactly these two keys) 72 + - Tables: Other JSON objects 73 + - Arrays: JSON arrays 74 + 75 + On encode, TOML values are converted to appropriate tagged JSON. *) 76 + 77 + (** {1:conv Convenience Functions} 78 + 79 + These functions use the jsont codec with [Jsont_bytesrw] for 80 + string-based encoding/decoding. *) 81 + 82 + val encode_jsont : Toml.t -> (string, string) result 83 + (** [encode_jsont v] encodes TOML value [v] using the jsont codec. 84 + Returns an error string on failure. *) 85 + 86 + val decode_jsont : string -> (Toml.t, string) result 87 + (** [decode_jsont s] decodes tagged JSON [s] using the jsont codec. 88 + Returns an error string on failure. *) 89 + 90 + val decode_jsont' : string -> (Toml.t, Jsont.Error.t) result 91 + (** [decode_jsont' s] is like [decode_jsont] but preserves the error. *) 92 + 93 + val decode_jsont_exn : string -> Toml.t 94 + (** [decode_jsont_exn s] is like [decode_jsont'] but raises on error. 95 + @raise Jsont.Error on decode failure. *) 96 + 97 + (** {1:internal Internal Types} 98 + 99 + These are exposed for advanced use cases but may change between versions. *) 100 + 101 + type tagged_value = { 102 + typ : string; 103 + value : string; 104 + } 105 + (** A tagged scalar value with type and value strings. *) 106 + 107 + val tagged_jsont : tagged_value Jsont.t 108 + (** Jsont codec for tagged scalar values. *) 109 + 110 + val tagged_to_toml : tagged_value -> Toml.t 111 + (** Convert a tagged value to its TOML representation. *) 112 + 113 + val toml_to_tagged : Toml.t -> tagged_value 114 + (** Convert a TOML scalar to a tagged value. 115 + @raise Failure if the value is not a scalar. *)
+8
test/dune
··· 1 1 (test 2 2 (name test_tomlt) 3 3 (libraries tomlt alcotest)) 4 + 5 + (test 6 + (name test_codec) 7 + (libraries tomlt alcotest)) 8 + 9 + (executable 10 + (name test_debug) 11 + (libraries tomlt))
+1327
test/test_codec.ml
··· 1 + (* Comprehensive tests for Tomlt codecs *) 2 + 3 + open Tomlt 4 + 5 + (* ============================================================================ 6 + Test Helpers 7 + ============================================================================ *) 8 + 9 + (* Decode a value from "value = X" TOML *) 10 + let check_decode_ok name codec input expected = 11 + let toml = Toml.parse input in 12 + let value = Toml.find "value" toml in 13 + let actual = decode codec value in 14 + match actual with 15 + | Ok v when v = expected -> () 16 + | Ok _ -> 17 + Alcotest.failf "%s: decode returned unexpected value" name 18 + | Error e -> 19 + Alcotest.failf "%s: decode failed: %s" name (Toml.Error.to_string e) 20 + 21 + (* Check that decode fails *) 22 + let check_decode_error name codec input = 23 + let toml = Toml.parse input in 24 + let value = Toml.find "value" toml in 25 + match decode codec value with 26 + | Error _ -> () 27 + | Ok _ -> Alcotest.failf "%s: expected decode error but succeeded" name 28 + 29 + (* Decode from a table (for table codecs) *) 30 + let check_decode_table_ok name codec input expected = 31 + let toml = Toml.parse input in 32 + let value = Toml.find "value" toml in 33 + let actual = decode codec value in 34 + match actual with 35 + | Ok v when v = expected -> () 36 + | Ok _ -> 37 + Alcotest.failf "%s: decode returned unexpected value" name 38 + | Error e -> 39 + Alcotest.failf "%s: decode failed: %s" name (Toml.Error.to_string e) 40 + 41 + (* Check table decode error *) 42 + let check_decode_table_error name codec input = 43 + let toml = Toml.parse input in 44 + let value = Toml.find "value" toml in 45 + match decode codec value with 46 + | Error _ -> () 47 + | Ok _ -> Alcotest.failf "%s: expected decode error but succeeded" name 48 + 49 + (* Roundtrip test *) 50 + let check_roundtrip name codec value = 51 + let toml = encode codec value in 52 + match decode codec toml with 53 + | Ok v when v = value -> () 54 + | Ok _ -> 55 + Alcotest.failf "%s: roundtrip mismatch, got different value" name 56 + | Error e -> 57 + Alcotest.failf "%s: roundtrip decode failed: %s" name (Toml.Error.to_string e) 58 + 59 + 60 + (* ============================================================================ 61 + Datetime Type Tests 62 + ============================================================================ *) 63 + 64 + (* ---- Tz tests ---- *) 65 + 66 + let test_tz_utc () = 67 + Alcotest.(check string) "utc to_string" "Z" (Tz.to_string Tz.utc); 68 + Alcotest.(check bool) "utc equal" true (Tz.equal Tz.utc Tz.utc); 69 + match Tz.of_string "Z" with 70 + | Ok tz -> Alcotest.(check bool) "parse Z" true (Tz.equal tz Tz.utc) 71 + | Error e -> Alcotest.failf "failed to parse Z: %s" e 72 + 73 + let test_tz_offset () = 74 + let tz_pos = Tz.offset ~hours:5 ~minutes:30 in 75 + Alcotest.(check string) "positive offset" "+05:30" (Tz.to_string tz_pos); 76 + 77 + let tz_neg = Tz.offset ~hours:(-8) ~minutes:0 in 78 + Alcotest.(check string) "negative offset" "-08:00" (Tz.to_string tz_neg); 79 + 80 + let tz_zero = Tz.offset ~hours:0 ~minutes:0 in 81 + Alcotest.(check string) "zero offset" "+00:00" (Tz.to_string tz_zero) 82 + 83 + let test_tz_parse () = 84 + (match Tz.of_string "+05:30" with 85 + | Ok tz -> Alcotest.(check string) "parse +05:30" "+05:30" (Tz.to_string tz) 86 + | Error e -> Alcotest.failf "failed to parse +05:30: %s" e); 87 + 88 + (match Tz.of_string "-08:00" with 89 + | Ok tz -> Alcotest.(check string) "parse -08:00" "-08:00" (Tz.to_string tz) 90 + | Error e -> Alcotest.failf "failed to parse -08:00: %s" e); 91 + 92 + (match Tz.of_string "z" with 93 + | Ok tz -> Alcotest.(check bool) "parse lowercase z" true (Tz.equal tz Tz.utc) 94 + | Error e -> Alcotest.failf "failed to parse z: %s" e) 95 + 96 + let test_tz_compare () = 97 + let tz1 = Tz.offset ~hours:5 ~minutes:0 in 98 + let tz2 = Tz.offset ~hours:6 ~minutes:0 in 99 + Alcotest.(check int) "compare less" (-1) (Int.compare (Tz.compare tz1 tz2) 0); 100 + Alcotest.(check int) "compare greater" 1 (Int.compare (Tz.compare tz2 tz1) 0); 101 + Alcotest.(check int) "compare equal" 0 (Tz.compare tz1 tz1); 102 + Alcotest.(check int) "utc < offset" (-1) (Int.compare (Tz.compare Tz.utc tz1) 0) 103 + 104 + (* ---- Date tests ---- *) 105 + 106 + let test_date_basic () = 107 + let d = Date.make ~year:2024 ~month:6 ~day:15 in 108 + Alcotest.(check string) "to_string" "2024-06-15" (Date.to_string d); 109 + Alcotest.(check int) "year" 2024 d.year; 110 + Alcotest.(check int) "month" 6 d.month; 111 + Alcotest.(check int) "day" 15 d.day 112 + 113 + let test_date_equal () = 114 + let d1 = Date.make ~year:2024 ~month:6 ~day:15 in 115 + let d2 = Date.make ~year:2024 ~month:6 ~day:15 in 116 + let d3 = Date.make ~year:2024 ~month:6 ~day:16 in 117 + Alcotest.(check bool) "equal same" true (Date.equal d1 d2); 118 + Alcotest.(check bool) "not equal diff day" false (Date.equal d1 d3) 119 + 120 + let test_date_compare () = 121 + let d1 = Date.make ~year:2024 ~month:6 ~day:15 in 122 + let d2 = Date.make ~year:2024 ~month:6 ~day:16 in 123 + let d3 = Date.make ~year:2024 ~month:7 ~day:1 in 124 + let d4 = Date.make ~year:2025 ~month:1 ~day:1 in 125 + Alcotest.(check int) "compare day" (-1) (Int.compare (Date.compare d1 d2) 0); 126 + Alcotest.(check int) "compare month" (-1) (Int.compare (Date.compare d1 d3) 0); 127 + Alcotest.(check int) "compare year" (-1) (Int.compare (Date.compare d1 d4) 0) 128 + 129 + let test_date_parse () = 130 + (match Date.of_string "2024-06-15" with 131 + | Ok d -> 132 + Alcotest.(check int) "year" 2024 d.year; 133 + Alcotest.(check int) "month" 6 d.month; 134 + Alcotest.(check int) "day" 15 d.day 135 + | Error e -> Alcotest.failf "parse failed: %s" e); 136 + 137 + (match Date.of_string "1979-05-27" with 138 + | Ok d -> Alcotest.(check string) "roundtrip" "1979-05-27" (Date.to_string d) 139 + | Error e -> Alcotest.failf "parse failed: %s" e) 140 + 141 + let test_date_edge_cases () = 142 + (* First day of year *) 143 + let d1 = Date.make ~year:2024 ~month:1 ~day:1 in 144 + Alcotest.(check string) "jan 1" "2024-01-01" (Date.to_string d1); 145 + 146 + (* Last day of year *) 147 + let d2 = Date.make ~year:2024 ~month:12 ~day:31 in 148 + Alcotest.(check string) "dec 31" "2024-12-31" (Date.to_string d2); 149 + 150 + (* Leading zeros in year *) 151 + let d3 = Date.make ~year:99 ~month:1 ~day:1 in 152 + Alcotest.(check string) "year 99" "0099-01-01" (Date.to_string d3) 153 + 154 + (* ---- Time tests ---- *) 155 + 156 + let test_time_basic () = 157 + let t = Time.make ~hour:14 ~minute:30 ~second:45 () in 158 + Alcotest.(check string) "to_string" "14:30:45" (Time.to_string t); 159 + Alcotest.(check int) "hour" 14 t.hour; 160 + Alcotest.(check int) "minute" 30 t.minute; 161 + Alcotest.(check int) "second" 45 t.second; 162 + Alcotest.(check (float 0.001)) "frac" 0.0 t.frac 163 + 164 + let test_time_fractional () = 165 + let t1 = Time.make ~hour:14 ~minute:30 ~second:45 ~frac:0.123 () in 166 + Alcotest.(check string) "frac 3 digits" "14:30:45.123" (Time.to_string t1); 167 + 168 + let t2 = Time.make ~hour:0 ~minute:0 ~second:0 ~frac:0.123456789 () in 169 + Alcotest.(check string) "frac 9 digits" "00:00:00.123456789" (Time.to_string t2); 170 + 171 + let t3 = Time.make ~hour:12 ~minute:0 ~second:0 ~frac:0.1 () in 172 + Alcotest.(check string) "frac 1 digit" "12:00:00.1" (Time.to_string t3) 173 + 174 + let test_time_equal () = 175 + let t1 = Time.make ~hour:14 ~minute:30 ~second:45 () in 176 + let t2 = Time.make ~hour:14 ~minute:30 ~second:45 () in 177 + let t3 = Time.make ~hour:14 ~minute:30 ~second:46 () in 178 + Alcotest.(check bool) "equal same" true (Time.equal t1 t2); 179 + Alcotest.(check bool) "not equal" false (Time.equal t1 t3) 180 + 181 + let test_time_compare () = 182 + let t1 = Time.make ~hour:14 ~minute:30 ~second:45 () in 183 + let t2 = Time.make ~hour:14 ~minute:30 ~second:46 () in 184 + let t3 = Time.make ~hour:14 ~minute:31 ~second:0 () in 185 + let t4 = Time.make ~hour:15 ~minute:0 ~second:0 () in 186 + Alcotest.(check int) "compare second" (-1) (Int.compare (Time.compare t1 t2) 0); 187 + Alcotest.(check int) "compare minute" (-1) (Int.compare (Time.compare t1 t3) 0); 188 + Alcotest.(check int) "compare hour" (-1) (Int.compare (Time.compare t1 t4) 0) 189 + 190 + let test_time_parse () = 191 + (match Time.of_string "14:30:45" with 192 + | Ok t -> 193 + Alcotest.(check int) "hour" 14 t.hour; 194 + Alcotest.(check int) "minute" 30 t.minute; 195 + Alcotest.(check int) "second" 45 t.second 196 + | Error e -> Alcotest.failf "parse failed: %s" e); 197 + 198 + (match Time.of_string "00:00:00.123456" with 199 + | Ok t -> 200 + Alcotest.(check (float 0.000001)) "frac" 0.123456 t.frac 201 + | Error e -> Alcotest.failf "parse failed: %s" e) 202 + 203 + let test_time_edge_cases () = 204 + let t1 = Time.make ~hour:0 ~minute:0 ~second:0 () in 205 + Alcotest.(check string) "midnight" "00:00:00" (Time.to_string t1); 206 + 207 + let t2 = Time.make ~hour:23 ~minute:59 ~second:59 () in 208 + Alcotest.(check string) "end of day" "23:59:59" (Time.to_string t2) 209 + 210 + (* ---- Datetime tests ---- *) 211 + 212 + let test_datetime_basic () = 213 + let dt = Datetime.make 214 + ~date:(Date.make ~year:2024 ~month:6 ~day:15) 215 + ~time:(Time.make ~hour:14 ~minute:30 ~second:0 ()) 216 + ~tz:Tz.utc 217 + in 218 + Alcotest.(check string) "to_string" "2024-06-15T14:30:00Z" (Datetime.to_string dt) 219 + 220 + let test_datetime_with_offset () = 221 + let dt = Datetime.make 222 + ~date:(Date.make ~year:2024 ~month:6 ~day:15) 223 + ~time:(Time.make ~hour:14 ~minute:30 ~second:0 ()) 224 + ~tz:(Tz.offset ~hours:5 ~minutes:30) 225 + in 226 + Alcotest.(check string) "with offset" "2024-06-15T14:30:00+05:30" (Datetime.to_string dt) 227 + 228 + let test_datetime_with_frac () = 229 + let dt = Datetime.make 230 + ~date:(Date.make ~year:2024 ~month:6 ~day:15) 231 + ~time:(Time.make ~hour:14 ~minute:30 ~second:0 ~frac:0.123456 ()) 232 + ~tz:Tz.utc 233 + in 234 + Alcotest.(check string) "with frac" "2024-06-15T14:30:00.123456Z" (Datetime.to_string dt) 235 + 236 + let test_datetime_parse () = 237 + (match Datetime.of_string "2024-06-15T14:30:00Z" with 238 + | Ok dt -> 239 + Alcotest.(check int) "year" 2024 dt.date.year; 240 + Alcotest.(check int) "hour" 14 dt.time.hour; 241 + Alcotest.(check bool) "tz" true (Tz.equal dt.tz Tz.utc) 242 + | Error e -> Alcotest.failf "parse failed: %s" e); 243 + 244 + (match Datetime.of_string "1979-05-27T07:32:00-08:00" with 245 + | Ok dt -> 246 + Alcotest.(check int) "year" 1979 dt.date.year; 247 + Alcotest.(check string) "tz" "-08:00" (Tz.to_string dt.tz) 248 + | Error e -> Alcotest.failf "parse failed: %s" e) 249 + 250 + let test_datetime_equal_compare () = 251 + let dt1 = Datetime.make 252 + ~date:(Date.make ~year:2024 ~month:6 ~day:15) 253 + ~time:(Time.make ~hour:14 ~minute:30 ~second:0 ()) 254 + ~tz:Tz.utc in 255 + let dt2 = Datetime.make 256 + ~date:(Date.make ~year:2024 ~month:6 ~day:15) 257 + ~time:(Time.make ~hour:14 ~minute:30 ~second:0 ()) 258 + ~tz:Tz.utc in 259 + let dt3 = Datetime.make 260 + ~date:(Date.make ~year:2024 ~month:6 ~day:16) 261 + ~time:(Time.make ~hour:14 ~minute:30 ~second:0 ()) 262 + ~tz:Tz.utc in 263 + Alcotest.(check bool) "equal same" true (Datetime.equal dt1 dt2); 264 + Alcotest.(check bool) "not equal" false (Datetime.equal dt1 dt3); 265 + Alcotest.(check int) "compare" (-1) (Int.compare (Datetime.compare dt1 dt3) 0) 266 + 267 + (* ---- Datetime_local tests ---- *) 268 + 269 + let test_datetime_local_basic () = 270 + let dt = Datetime_local.make 271 + ~date:(Date.make ~year:2024 ~month:6 ~day:15) 272 + ~time:(Time.make ~hour:14 ~minute:30 ~second:0 ()) 273 + in 274 + Alcotest.(check string) "to_string" "2024-06-15T14:30:00" (Datetime_local.to_string dt) 275 + 276 + let test_datetime_local_parse () = 277 + match Datetime_local.of_string "2024-06-15T14:30:00" with 278 + | Ok dt -> 279 + Alcotest.(check int) "year" 2024 dt.date.year; 280 + Alcotest.(check int) "hour" 14 dt.time.hour 281 + | Error e -> Alcotest.failf "parse failed: %s" e 282 + 283 + let test_datetime_local_equal_compare () = 284 + let dt1 = Datetime_local.make 285 + ~date:(Date.make ~year:2024 ~month:6 ~day:15) 286 + ~time:(Time.make ~hour:14 ~minute:30 ~second:0 ()) in 287 + let dt2 = Datetime_local.make 288 + ~date:(Date.make ~year:2024 ~month:6 ~day:15) 289 + ~time:(Time.make ~hour:14 ~minute:30 ~second:0 ()) in 290 + Alcotest.(check bool) "equal" true (Datetime_local.equal dt1 dt2); 291 + Alcotest.(check int) "compare" 0 (Datetime_local.compare dt1 dt2) 292 + 293 + (* ============================================================================ 294 + Base Codec Tests 295 + ============================================================================ *) 296 + 297 + (* ---- Bool codec ---- *) 298 + 299 + let test_bool_codec () = 300 + check_decode_ok "true" bool "value = true" true; 301 + check_decode_ok "false" bool "value = false" false 302 + 303 + let test_bool_roundtrip () = 304 + check_roundtrip "true roundtrip" bool true; 305 + check_roundtrip "false roundtrip" bool false 306 + 307 + let test_bool_type_error () = 308 + check_decode_error "string not bool" bool {|value = "true"|} 309 + 310 + (* ---- Int codec ---- *) 311 + 312 + let test_int_codec () = 313 + check_decode_ok "positive" int "value = 42" 42; 314 + check_decode_ok "negative" int "value = -17" (-17); 315 + check_decode_ok "zero" int "value = 0" 0; 316 + check_decode_ok "large" int "value = 1000000" 1000000 317 + 318 + let test_int_formats () = 319 + check_decode_ok "hex" int "value = 0xDEADBEEF" 0xDEADBEEF; 320 + check_decode_ok "octal" int "value = 0o755" 0o755; 321 + check_decode_ok "binary" int "value = 0b11010110" 0b11010110; 322 + check_decode_ok "underscore" int "value = 1_000_000" 1_000_000 323 + 324 + let test_int_roundtrip () = 325 + check_roundtrip "positive" int 42; 326 + check_roundtrip "negative" int (-17); 327 + check_roundtrip "zero" int 0 328 + 329 + let test_int_type_error () = 330 + check_decode_error "float not int" int "value = 3.14"; 331 + check_decode_error "string not int" int {|value = "42"|} 332 + 333 + (* ---- Int32 codec ---- *) 334 + 335 + let test_int32_codec () = 336 + check_decode_ok "positive" int32 "value = 42" 42l; 337 + check_decode_ok "negative" int32 "value = -17" (-17l); 338 + check_decode_ok "max" int32 "value = 2147483647" Int32.max_int; 339 + check_decode_ok "min" int32 "value = -2147483648" Int32.min_int 340 + 341 + let test_int32_roundtrip () = 342 + check_roundtrip "positive" int32 42l; 343 + check_roundtrip "max" int32 Int32.max_int; 344 + check_roundtrip "min" int32 Int32.min_int 345 + 346 + (* ---- Int64 codec ---- *) 347 + 348 + let test_int64_codec () = 349 + check_decode_ok "positive" int64 "value = 42" 42L; 350 + check_decode_ok "large" int64 "value = 9223372036854775807" Int64.max_int; 351 + check_decode_ok "large neg" int64 "value = -9223372036854775808" Int64.min_int 352 + 353 + let test_int64_roundtrip () = 354 + check_roundtrip "positive" int64 42L; 355 + check_roundtrip "max" int64 Int64.max_int; 356 + check_roundtrip "min" int64 Int64.min_int 357 + 358 + (* ---- Float codec ---- *) 359 + 360 + let test_float_codec () = 361 + check_decode_ok "positive" float "value = 3.14" 3.14; 362 + check_decode_ok "negative" float "value = -2.5" (-2.5); 363 + check_decode_ok "zero" float "value = 0.0" 0.0; 364 + check_decode_ok "exponent" float "value = 5e+22" 5e+22; 365 + check_decode_ok "neg exponent" float "value = 1e-10" 1e-10 366 + 367 + let test_float_special () = 368 + check_decode_ok "inf" float "value = inf" Float.infinity; 369 + check_decode_ok "neg inf" float "value = -inf" Float.neg_infinity; 370 + check_decode_ok "pos inf" float "value = +inf" Float.infinity; 371 + (* nan requires special handling since nan <> nan *) 372 + let toml = Toml.parse "value = nan" in 373 + let value = Toml.find "value" toml in 374 + match decode float value with 375 + | Ok f when Float.is_nan f -> () 376 + | Ok _ -> Alcotest.fail "expected nan" 377 + | Error e -> Alcotest.failf "decode failed: %s" (Toml.Error.to_string e) 378 + 379 + let test_float_roundtrip () = 380 + check_roundtrip "positive" float 3.14; 381 + check_roundtrip "negative" float (-2.5); 382 + check_roundtrip "zero" float 0.0 383 + 384 + let test_float_type_error () = 385 + check_decode_error "int not float" float "value = 42"; 386 + check_decode_error "string not float" float {|value = "3.14"|} 387 + 388 + (* ---- Number codec ---- *) 389 + 390 + let test_number_codec () = 391 + check_decode_ok "float" number "value = 3.14" 3.14; 392 + check_decode_ok "int as float" number "value = 42" 42.0; 393 + check_decode_ok "negative int" number "value = -17" (-17.0) 394 + 395 + let test_number_type_error () = 396 + check_decode_error "string not number" number {|value = "42"|} 397 + 398 + (* ---- String codec ---- *) 399 + 400 + let test_string_codec () = 401 + check_decode_ok "basic" string {|value = "hello"|} "hello"; 402 + check_decode_ok "empty" string {|value = ""|} ""; 403 + check_decode_ok "unicode" string {|value = "hello \u0048\u0065\u006C\u006C\u006F"|} "hello Hello" 404 + 405 + let test_string_escapes () = 406 + check_decode_ok "newline" string {|value = "line1\nline2"|} "line1\nline2"; 407 + check_decode_ok "tab" string {|value = "col1\tcol2"|} "col1\tcol2"; 408 + check_decode_ok "quote" string {|value = "say \"hello\""|} {|say "hello"|}; 409 + check_decode_ok "backslash" string {|value = "path\\to\\file"|} "path\\to\\file" 410 + 411 + let test_string_multiline () = 412 + check_decode_ok "multiline" string {|value = """ 413 + hello 414 + world"""|} "hello\nworld"; 415 + check_decode_ok "literal" string "value = 'C:\\path\\to\\file'" "C:\\path\\to\\file" 416 + 417 + let test_string_roundtrip () = 418 + check_roundtrip "basic" string "hello"; 419 + check_roundtrip "empty" string ""; 420 + check_roundtrip "unicode" string "Hello, \xE4\xB8\x96\xE7\x95\x8C!" 421 + 422 + let test_string_type_error () = 423 + check_decode_error "int not string" string "value = 42"; 424 + check_decode_error "bool not string" string "value = true" 425 + 426 + (* ============================================================================ 427 + Datetime Codec Tests 428 + ============================================================================ *) 429 + 430 + let test_datetime_codec () = 431 + let input = "value = 2024-06-15T14:30:00Z" in 432 + let expected = Datetime.make 433 + ~date:(Date.make ~year:2024 ~month:6 ~day:15) 434 + ~time:(Time.make ~hour:14 ~minute:30 ~second:0 ()) 435 + ~tz:Tz.utc in 436 + check_decode_ok "basic" datetime input expected 437 + 438 + let test_datetime_codec_offset () = 439 + let input = "value = 1979-05-27T07:32:00-08:00" in 440 + let expected = Datetime.make 441 + ~date:(Date.make ~year:1979 ~month:5 ~day:27) 442 + ~time:(Time.make ~hour:7 ~minute:32 ~second:0 ()) 443 + ~tz:(Tz.offset ~hours:(-8) ~minutes:0) in 444 + check_decode_ok "with offset" datetime input expected 445 + 446 + let test_datetime_codec_roundtrip () = 447 + let dt = Datetime.make 448 + ~date:(Date.make ~year:2024 ~month:6 ~day:15) 449 + ~time:(Time.make ~hour:14 ~minute:30 ~second:45 ~frac:0.123 ()) 450 + ~tz:(Tz.offset ~hours:5 ~minutes:30) in 451 + check_roundtrip "datetime roundtrip" datetime dt 452 + 453 + let test_datetime_local_codec () = 454 + let input = "value = 2024-06-15T14:30:00" in 455 + let expected = Datetime_local.make 456 + ~date:(Date.make ~year:2024 ~month:6 ~day:15) 457 + ~time:(Time.make ~hour:14 ~minute:30 ~second:0 ()) in 458 + check_decode_ok "basic" datetime_local input expected 459 + 460 + let test_datetime_local_codec_roundtrip () = 461 + let dt = Datetime_local.make 462 + ~date:(Date.make ~year:2024 ~month:6 ~day:15) 463 + ~time:(Time.make ~hour:14 ~minute:30 ~second:0 ()) in 464 + check_roundtrip "datetime_local roundtrip" datetime_local dt 465 + 466 + let test_date_local_codec () = 467 + let input = "value = 2024-06-15" in 468 + let expected = Date.make ~year:2024 ~month:6 ~day:15 in 469 + check_decode_ok "basic" date_local input expected 470 + 471 + let test_date_local_codec_roundtrip () = 472 + let d = Date.make ~year:2024 ~month:6 ~day:15 in 473 + check_roundtrip "date_local roundtrip" date_local d 474 + 475 + let test_time_local_codec () = 476 + let input = "value = 14:30:45" in 477 + let expected = Time.make ~hour:14 ~minute:30 ~second:45 () in 478 + check_decode_ok "basic" time_local input expected 479 + 480 + let test_time_local_codec_roundtrip () = 481 + let t = Time.make ~hour:14 ~minute:30 ~second:45 ~frac:0.123 () in 482 + check_roundtrip "time_local roundtrip" time_local t 483 + 484 + let test_datetime_string_codec () = 485 + check_decode_ok "offset dt" datetime_string "value = 2024-06-15T14:30:00Z" "2024-06-15T14:30:00Z"; 486 + check_decode_ok "local dt" datetime_string "value = 2024-06-15T14:30:00" "2024-06-15T14:30:00"; 487 + check_decode_ok "date" datetime_string "value = 2024-06-15" "2024-06-15"; 488 + check_decode_ok "time" datetime_string "value = 14:30:00" "14:30:00" 489 + 490 + (* ============================================================================ 491 + Combinator Tests 492 + ============================================================================ *) 493 + 494 + (* ---- Map combinator ---- *) 495 + 496 + let uppercase_string = 497 + map string ~dec:String.uppercase_ascii ~enc:String.lowercase_ascii 498 + 499 + let test_map_combinator () = 500 + check_decode_ok "uppercase" uppercase_string {|value = "hello"|} "HELLO" 501 + 502 + let test_map_roundtrip () = 503 + check_roundtrip "map roundtrip" uppercase_string "HELLO" 504 + 505 + let doubled_int = 506 + map int ~dec:(fun x -> x * 2) ~enc:(fun x -> x / 2) 507 + 508 + let test_map_int () = 509 + check_decode_ok "doubled" doubled_int "value = 21" 42; 510 + check_roundtrip "doubled roundtrip" doubled_int 42 511 + 512 + (* ---- Const combinator ---- *) 513 + 514 + let test_const () = 515 + let c = const "default_value" in 516 + check_decode_ok "const ignores input" c "value = 42" "default_value"; 517 + check_decode_ok "const ignores string" c {|value = "ignored"|} "default_value" 518 + 519 + (* ---- Enum combinator ---- *) 520 + 521 + type level = Debug | Info | Warn | Error 522 + 523 + let level_codec = 524 + enum [ 525 + "debug", Debug; 526 + "info", Info; 527 + "warn", Warn; 528 + "error", Error; 529 + ] 530 + 531 + let test_enum () = 532 + check_decode_ok "debug" level_codec {|value = "debug"|} Debug; 533 + check_decode_ok "info" level_codec {|value = "info"|} Info; 534 + check_decode_ok "warn" level_codec {|value = "warn"|} Warn; 535 + check_decode_ok "error" level_codec {|value = "error"|} Error 536 + 537 + let test_enum_roundtrip () = 538 + check_roundtrip "debug" level_codec Debug; 539 + check_roundtrip "error" level_codec Error 540 + 541 + let test_enum_unknown () = 542 + check_decode_error "unknown value" level_codec {|value = "trace"|} 543 + 544 + let test_enum_type_error () = 545 + check_decode_error "not string" level_codec "value = 42" 546 + 547 + (* ---- Option combinator ---- *) 548 + 549 + let test_option_codec () = 550 + let opt_int = option int in 551 + check_decode_ok "some" opt_int "value = 42" (Some 42) 552 + 553 + let test_option_roundtrip () = 554 + let opt_str = option string in 555 + check_roundtrip "some string" opt_str (Some "hello") 556 + 557 + (* ---- Result combinator ---- *) 558 + 559 + let string_or_int_codec : (string, int) result t = result ~ok:string ~error:int 560 + 561 + let test_result_codec () = 562 + check_decode_ok "ok string" string_or_int_codec {|value = "hello"|} (Ok "hello"); 563 + check_decode_ok "error int" string_or_int_codec "value = 42" (Error 42) 564 + 565 + let test_result_roundtrip () = 566 + check_roundtrip "ok" string_or_int_codec (Ok "hello"); 567 + check_roundtrip "error" string_or_int_codec (Error 42) 568 + 569 + (* ---- Recursive codec ---- *) 570 + 571 + (* Simple recursive structure for testing rec' *) 572 + type nested_list = { 573 + value : int; 574 + next : nested_list option; 575 + } 576 + 577 + let rec nested_list_codec = lazy ( 578 + Table.( 579 + obj (fun value next -> { value; next }) 580 + |> mem "value" int ~enc:(fun n -> n.value) 581 + |> opt_mem "next" (rec' nested_list_codec) ~enc:(fun n -> n.next) 582 + |> finish 583 + ) 584 + ) 585 + 586 + let test_recursive_codec () = 587 + let input = {| 588 + [value] 589 + value = 1 590 + 591 + [value.next] 592 + value = 2 593 + 594 + [value.next.next] 595 + value = 3 596 + |} in 597 + let expected = { 598 + value = 1; 599 + next = Some { 600 + value = 2; 601 + next = Some { value = 3; next = None } 602 + } 603 + } in 604 + check_decode_table_ok "nested list" (rec' nested_list_codec) input expected 605 + 606 + (* ============================================================================ 607 + Array Codec Tests 608 + ============================================================================ *) 609 + 610 + let test_list_codec () = 611 + check_decode_ok "int list" (list int) "value = [1, 2, 3]" [1; 2; 3]; 612 + check_decode_ok "empty list" (list int) "value = []" []; 613 + check_decode_ok "string list" (list string) {|value = ["a", "b", "c"]|} ["a"; "b"; "c"] 614 + 615 + let test_list_roundtrip () = 616 + check_roundtrip "int list" (list int) [1; 2; 3]; 617 + check_roundtrip "empty" (list int) []; 618 + check_roundtrip "strings" (list string) ["hello"; "world"] 619 + 620 + let test_array_codec () = 621 + check_decode_ok "int array" (array int) "value = [1, 2, 3]" [|1; 2; 3|]; 622 + check_decode_ok "empty array" (array int) "value = []" [||] 623 + 624 + let test_array_roundtrip () = 625 + check_roundtrip "int array" (array int) [|1; 2; 3|]; 626 + check_roundtrip "empty" (array int) [||] 627 + 628 + let test_nested_list () = 629 + let nested = list (list int) in 630 + check_decode_ok "nested" nested "value = [[1, 2], [3, 4], [5]]" [[1; 2]; [3; 4]; [5]]; 631 + check_roundtrip "nested roundtrip" nested [[1; 2]; [3; 4]] 632 + 633 + let test_list_of_tables () = 634 + let point_codec = Table.( 635 + obj (fun x y -> (x, y)) 636 + |> mem "x" int ~enc:fst 637 + |> mem "y" int ~enc:snd 638 + |> finish 639 + ) in 640 + let points_codec = list point_codec in 641 + let input = {|value = [{x = 1, y = 2}, {x = 3, y = 4}]|} in 642 + check_decode_ok "list of inline tables" points_codec input [(1, 2); (3, 4)] 643 + 644 + let test_list_type_error () = 645 + check_decode_error "not array" (list int) "value = 42"; 646 + check_decode_error "mixed types" (list int) {|value = [1, "two", 3]|} 647 + 648 + (* ============================================================================ 649 + Table Codec Tests 650 + ============================================================================ *) 651 + 652 + (* ---- Basic table ---- *) 653 + 654 + type point = { x : int; y : int } 655 + 656 + let point_codec = 657 + Table.( 658 + obj (fun x y -> { x; y }) 659 + |> mem "x" int ~enc:(fun p -> p.x) 660 + |> mem "y" int ~enc:(fun p -> p.y) 661 + |> finish 662 + ) 663 + 664 + let test_table_codec () = 665 + let input = {| 666 + [value] 667 + x = 10 668 + y = 20 669 + |} in 670 + check_decode_table_ok "point" point_codec input { x = 10; y = 20 } 671 + 672 + let test_table_roundtrip () = 673 + check_roundtrip "point roundtrip" point_codec { x = 5; y = 15 } 674 + 675 + let test_table_missing_member () = 676 + let input = {| 677 + [value] 678 + x = 10 679 + |} in 680 + check_decode_table_error "missing y" point_codec input 681 + 682 + let test_table_type_error () = 683 + check_decode_error "not table" point_codec "value = 42" 684 + 685 + (* ---- Optional members ---- *) 686 + 687 + type config = { 688 + name : string; 689 + debug : bool; 690 + timeout : int option; 691 + } 692 + 693 + let config_codec = 694 + Table.( 695 + obj (fun name debug timeout -> { name; debug; timeout }) 696 + |> mem "name" string ~enc:(fun c -> c.name) 697 + |> mem "debug" bool ~enc:(fun c -> c.debug) ~dec_absent:false 698 + |> opt_mem "timeout" int ~enc:(fun c -> c.timeout) 699 + |> finish 700 + ) 701 + 702 + let test_optional_members () = 703 + let input1 = {| 704 + [value] 705 + name = "test" 706 + debug = true 707 + timeout = 30 708 + |} in 709 + check_decode_table_ok "with all" config_codec input1 710 + { name = "test"; debug = true; timeout = Some 30 }; 711 + 712 + let input2 = {| 713 + [value] 714 + name = "test" 715 + |} in 716 + check_decode_table_ok "with defaults" config_codec input2 717 + { name = "test"; debug = false; timeout = None } 718 + 719 + let test_optional_roundtrip () = 720 + let c1 = { name = "app"; debug = true; timeout = Some 60 } in 721 + check_roundtrip "with timeout" config_codec c1; 722 + 723 + let c2 = { name = "app"; debug = false; timeout = None } in 724 + check_roundtrip "without timeout" config_codec c2 725 + 726 + let test_opt_mem_omits_none () = 727 + let c = { name = "app"; debug = false; timeout = None } in 728 + let toml = encode config_codec c in 729 + (* Just verify encoding doesn't crash *) 730 + let _ = Toml.to_toml_string toml in 731 + (* Verify None is not encoded *) 732 + match Toml.find_opt "timeout" toml with 733 + | None -> () 734 + | Some _ -> Alcotest.fail "timeout should not be encoded when None" 735 + 736 + (* ---- enc_omit ---- *) 737 + 738 + type with_omit = { 739 + always : string; 740 + maybe : string; 741 + } 742 + 743 + let with_omit_codec = 744 + Table.( 745 + obj (fun always maybe -> { always; maybe }) 746 + |> mem "always" string ~enc:(fun r -> r.always) 747 + |> mem "maybe" string ~enc:(fun r -> r.maybe) 748 + ~dec_absent:"" ~enc_omit:(fun s -> String.length s = 0) 749 + |> finish 750 + ) 751 + 752 + let test_enc_omit () = 753 + let r1 = { always = "hello"; maybe = "world" } in 754 + let toml1 = encode with_omit_codec r1 in 755 + (match Toml.find_opt "maybe" toml1 with 756 + | Some _ -> () 757 + | None -> Alcotest.fail "maybe should be encoded when non-empty"); 758 + 759 + let r2 = { always = "hello"; maybe = "" } in 760 + let toml2 = encode with_omit_codec r2 in 761 + (match Toml.find_opt "maybe" toml2 with 762 + | None -> () 763 + | Some _ -> Alcotest.fail "maybe should be omitted when empty") 764 + 765 + (* ---- Nested tables ---- *) 766 + 767 + type server = { 768 + host : string; 769 + port : int; 770 + } 771 + 772 + type app_config = { 773 + title : string; 774 + server : server; 775 + } 776 + 777 + let server_codec = 778 + Table.( 779 + obj (fun host port -> { host; port }) 780 + |> mem "host" string ~enc:(fun s -> s.host) 781 + |> mem "port" int ~enc:(fun s -> s.port) 782 + |> finish 783 + ) 784 + 785 + let app_config_codec = 786 + Table.( 787 + obj (fun title server -> { title; server }) 788 + |> mem "title" string ~enc:(fun c -> c.title) 789 + |> mem "server" server_codec ~enc:(fun c -> c.server) 790 + |> finish 791 + ) 792 + 793 + let test_nested_tables () = 794 + let input = {| 795 + [value] 796 + title = "My App" 797 + 798 + [value.server] 799 + host = "localhost" 800 + port = 8080 801 + |} in 802 + check_decode_table_ok "nested" app_config_codec input 803 + { title = "My App"; server = { host = "localhost"; port = 8080 } } 804 + 805 + let test_nested_roundtrip () = 806 + let config = { 807 + title = "Production"; 808 + server = { host = "0.0.0.0"; port = 443 }; 809 + } in 810 + check_roundtrip "nested roundtrip" app_config_codec config 811 + 812 + (* ---- Deeply nested tables ---- *) 813 + 814 + type deep = { 815 + a : int; 816 + inner : deep option; 817 + } 818 + 819 + let rec deep_codec = lazy ( 820 + Table.( 821 + obj (fun a inner -> { a; inner }) 822 + |> mem "a" int ~enc:(fun d -> d.a) 823 + |> opt_mem "inner" (rec' deep_codec) ~enc:(fun d -> d.inner) 824 + |> finish 825 + ) 826 + ) 827 + 828 + let test_deeply_nested () = 829 + let input = {| 830 + [value] 831 + a = 1 832 + 833 + [value.inner] 834 + a = 2 835 + 836 + [value.inner.inner] 837 + a = 3 838 + |} in 839 + let expected = { 840 + a = 1; 841 + inner = Some { 842 + a = 2; 843 + inner = Some { a = 3; inner = None } 844 + } 845 + } in 846 + check_decode_table_ok "deep" (rec' deep_codec) input expected 847 + 848 + (* ---- Unknown member handling ---- *) 849 + 850 + type strict_config = { 851 + name : string; 852 + } 853 + 854 + let strict_config_codec = 855 + Table.( 856 + obj (fun name -> { name }) 857 + |> mem "name" string ~enc:(fun c -> c.name) 858 + |> error_unknown 859 + |> finish 860 + ) 861 + 862 + let test_error_unknown () = 863 + let input1 = {| 864 + [value] 865 + name = "test" 866 + |} in 867 + check_decode_table_ok "known only" strict_config_codec input1 { name = "test" }; 868 + 869 + (* error_unknown raises an exception for unknown members *) 870 + let input2 = {| 871 + [value] 872 + name = "test" 873 + extra = 42 874 + |} in 875 + let toml = Toml.parse input2 in 876 + let value_toml = Toml.find "value" toml in 877 + try 878 + let _ = decode strict_config_codec value_toml in 879 + Alcotest.fail "expected exception for unknown member" 880 + with Toml.Error.Error _ -> () 881 + 882 + type extensible_config = { 883 + name : string; 884 + extras : (string * Toml.t) list; 885 + } 886 + 887 + let extensible_config_codec = 888 + Table.( 889 + obj (fun name extras -> { name; extras }) 890 + |> mem "name" string ~enc:(fun c -> c.name) 891 + |> keep_unknown (Mems.assoc value) ~enc:(fun c -> c.extras) 892 + |> finish 893 + ) 894 + 895 + let test_keep_unknown () = 896 + let input = {| 897 + [value] 898 + name = "test" 899 + extra1 = 42 900 + extra2 = "hello" 901 + |} in 902 + let toml = Toml.parse input in 903 + let value_toml = Toml.find "value" toml in 904 + match decode extensible_config_codec value_toml with 905 + | Ok c -> 906 + Alcotest.(check string) "name" "test" c.name; 907 + Alcotest.(check int) "extras count" 2 (List.length c.extras); 908 + (* Check extras contains the unknown members *) 909 + let has_extra1 = List.exists (fun (k, _) -> k = "extra1") c.extras in 910 + let has_extra2 = List.exists (fun (k, _) -> k = "extra2") c.extras in 911 + Alcotest.(check bool) "has extra1" true has_extra1; 912 + Alcotest.(check bool) "has extra2" true has_extra2 913 + | Error e -> 914 + Alcotest.failf "decode failed: %s" (Toml.Error.to_string e) 915 + 916 + let test_keep_unknown_roundtrip () = 917 + let c = { 918 + name = "test"; 919 + extras = [("custom", Toml.Int 42L); ("flag", Toml.Bool true)] 920 + } in 921 + check_roundtrip "keep_unknown roundtrip" extensible_config_codec c 922 + 923 + (* ---- Skip unknown (default) ---- *) 924 + 925 + type lenient_config = { 926 + lname : string; 927 + } 928 + 929 + let lenient_codec = 930 + Table.( 931 + obj (fun lname -> { lname }) 932 + |> mem "name" string ~enc:(fun c -> c.lname) 933 + |> skip_unknown 934 + |> finish 935 + ) 936 + 937 + let test_skip_unknown () = 938 + let input = {| 939 + [value] 940 + name = "test" 941 + ignored = 42 942 + also_ignored = "hello" 943 + |} in 944 + check_decode_table_ok "skip unknown" lenient_codec input { lname = "test" } 945 + 946 + (* ============================================================================ 947 + Array of Tables Tests 948 + ============================================================================ *) 949 + 950 + type product = { 951 + name : string; 952 + price : float; 953 + } 954 + 955 + let product_codec = 956 + Table.( 957 + obj (fun name price -> { name; price }) 958 + |> mem "name" string ~enc:(fun p -> p.name) 959 + |> mem "price" float ~enc:(fun p -> p.price) 960 + |> finish 961 + ) 962 + 963 + let test_array_of_tables () = 964 + let products_codec = array_of_tables product_codec in 965 + let input = {| 966 + [[value]] 967 + name = "Apple" 968 + price = 1.50 969 + 970 + [[value]] 971 + name = "Banana" 972 + price = 0.75 973 + |} in 974 + let expected = [ 975 + { name = "Apple"; price = 1.50 }; 976 + { name = "Banana"; price = 0.75 }; 977 + ] in 978 + check_decode_ok "products" products_codec input expected 979 + 980 + let test_array_of_tables_roundtrip () = 981 + let products_codec = array_of_tables product_codec in 982 + let products = [ 983 + { name = "Apple"; price = 1.50 }; 984 + { name = "Banana"; price = 0.75 }; 985 + ] in 986 + check_roundtrip "products roundtrip" products_codec products 987 + 988 + let test_array_of_tables_empty () = 989 + let products_codec = array_of_tables product_codec in 990 + check_decode_ok "empty" products_codec "value = []" [] 991 + 992 + (* ============================================================================ 993 + Any/Value Codec Tests 994 + ============================================================================ *) 995 + 996 + let test_value_codec () = 997 + check_decode_ok "int" value "value = 42" (Toml.Int 42L); 998 + check_decode_ok "string" value {|value = "hello"|} (Toml.String "hello"); 999 + check_decode_ok "bool" value "value = true" (Toml.Bool true); 1000 + check_decode_ok "float" value "value = 3.14" (Toml.Float 3.14); 1001 + check_decode_ok "array" value "value = [1, 2, 3]" 1002 + (Toml.Array [Toml.Int 1L; Toml.Int 2L; Toml.Int 3L]) 1003 + 1004 + let test_value_roundtrip () = 1005 + check_roundtrip "int" value (Toml.Int 42L); 1006 + check_roundtrip "string" value (Toml.String "hello"); 1007 + check_roundtrip "bool" value (Toml.Bool true) 1008 + 1009 + let test_value_mems_codec () = 1010 + let input = {| 1011 + [value] 1012 + a = 1 1013 + b = "hello" 1014 + c = true 1015 + |} in 1016 + let toml = Toml.parse input in 1017 + let v = Toml.find "value" toml in 1018 + match decode value_mems v with 1019 + | Ok pairs -> 1020 + Alcotest.(check int) "count" 3 (List.length pairs); 1021 + let has_a = List.exists (fun (k, _) -> k = "a") pairs in 1022 + let has_b = List.exists (fun (k, _) -> k = "b") pairs in 1023 + let has_c = List.exists (fun (k, _) -> k = "c") pairs in 1024 + Alcotest.(check bool) "has a" true has_a; 1025 + Alcotest.(check bool) "has b" true has_b; 1026 + Alcotest.(check bool) "has c" true has_c 1027 + | Error e -> 1028 + Alcotest.failf "decode failed: %s" (Toml.Error.to_string e) 1029 + 1030 + type string_or_int_any = String of string | Int of int 1031 + 1032 + let string_or_int_any_codec = 1033 + any () 1034 + ~dec_string:(map string ~dec:(fun s -> String s)) 1035 + ~dec_int:(map int ~dec:(fun i -> Int i)) 1036 + ~enc:(function 1037 + | String _ -> map string ~enc:(function String s -> s | _ -> "") 1038 + | Int _ -> map int ~enc:(function Int i -> i | _ -> 0)) 1039 + 1040 + let test_any_codec () = 1041 + check_decode_ok "string" string_or_int_any_codec {|value = "hello"|} (String "hello"); 1042 + check_decode_ok "int" string_or_int_any_codec "value = 42" (Int 42) 1043 + 1044 + let test_any_type_error () = 1045 + check_decode_error "bool not handled" string_or_int_any_codec "value = true" 1046 + 1047 + (* ============================================================================ 1048 + Encoding/Decoding Function Tests 1049 + ============================================================================ *) 1050 + 1051 + let test_decode_string () = 1052 + let toml_str = {|name = "test"|} in 1053 + let codec = Table.( 1054 + obj (fun name -> name) 1055 + |> mem "name" string ~enc:Fun.id 1056 + |> finish 1057 + ) in 1058 + match decode_string codec toml_str with 1059 + | Ok name -> Alcotest.(check string) "name" "test" name 1060 + | Error e -> Alcotest.failf "decode failed: %s" (Toml.Error.to_string e) 1061 + 1062 + let test_decode_string_exn () = 1063 + let toml_str = {|value = 42|} in 1064 + let toml = Toml.parse toml_str in 1065 + let v = Toml.find "value" toml in 1066 + let result = decode_exn int v in 1067 + Alcotest.(check int) "value" 42 result 1068 + 1069 + let test_encode_string () = 1070 + let codec = Table.( 1071 + obj (fun name -> name) 1072 + |> mem "name" string ~enc:Fun.id 1073 + |> finish 1074 + ) in 1075 + let s = encode_string codec "test" in 1076 + (* Just verify it produces valid TOML *) 1077 + let _ = Toml.parse s in 1078 + () 1079 + 1080 + (* ============================================================================ 1081 + Edge Cases and Error Handling 1082 + ============================================================================ *) 1083 + 1084 + let test_empty_table () = 1085 + let empty_codec = Table.( 1086 + obj () 1087 + |> finish 1088 + ) in 1089 + let input = "[value]" in 1090 + check_decode_table_ok "empty table" empty_codec input () 1091 + 1092 + let test_unicode_keys () = 1093 + let codec = Table.( 1094 + obj (fun v -> v) 1095 + |> mem "\xE4\xB8\xAD\xE6\x96\x87" string ~enc:Fun.id (* "中文" in UTF-8 *) 1096 + |> finish 1097 + ) in 1098 + let input = {| 1099 + [value] 1100 + "中文" = "hello" 1101 + |} in 1102 + check_decode_table_ok "unicode key" codec input "hello" 1103 + 1104 + let test_special_string_values () = 1105 + check_decode_ok "empty" string {|value = ""|} ""; 1106 + check_decode_ok "spaces" string {|value = " "|} " "; 1107 + check_decode_ok "newlines" string {|value = "a\nb\nc"|} "a\nb\nc" 1108 + 1109 + let test_large_integers () = 1110 + check_decode_ok "large" int64 "value = 9007199254740992" 9007199254740992L; 1111 + check_decode_ok "max i64" int64 "value = 9223372036854775807" 9223372036854775807L 1112 + 1113 + let test_codec_kind_doc () = 1114 + Alcotest.(check string) "bool kind" "boolean" (kind bool); 1115 + Alcotest.(check string) "int kind" "integer" (kind int); 1116 + Alcotest.(check string) "string kind" "string" (kind string); 1117 + Alcotest.(check string) "float kind" "float" (kind float); 1118 + 1119 + let documented = with_doc ~kind:"custom" ~doc:"A custom codec" int in 1120 + Alcotest.(check string) "custom kind" "custom" (kind documented); 1121 + Alcotest.(check string) "custom doc" "A custom codec" (doc documented) 1122 + 1123 + let test_duplicate_member_error () = 1124 + try 1125 + let _ = Table.( 1126 + obj (fun a b -> (a, b)) 1127 + |> mem "same" int ~enc:fst 1128 + |> mem "same" int ~enc:snd 1129 + |> finish 1130 + ) in 1131 + Alcotest.fail "should raise on duplicate member" 1132 + with Invalid_argument _ -> () 1133 + 1134 + (* ============================================================================ 1135 + Test Registration 1136 + ============================================================================ *) 1137 + 1138 + let tz_tests = [ 1139 + "utc", `Quick, test_tz_utc; 1140 + "offset", `Quick, test_tz_offset; 1141 + "parse", `Quick, test_tz_parse; 1142 + "compare", `Quick, test_tz_compare; 1143 + ] 1144 + 1145 + let date_tests = [ 1146 + "basic", `Quick, test_date_basic; 1147 + "equal", `Quick, test_date_equal; 1148 + "compare", `Quick, test_date_compare; 1149 + "parse", `Quick, test_date_parse; 1150 + "edge cases", `Quick, test_date_edge_cases; 1151 + ] 1152 + 1153 + let time_tests = [ 1154 + "basic", `Quick, test_time_basic; 1155 + "fractional", `Quick, test_time_fractional; 1156 + "equal", `Quick, test_time_equal; 1157 + "compare", `Quick, test_time_compare; 1158 + "parse", `Quick, test_time_parse; 1159 + "edge cases", `Quick, test_time_edge_cases; 1160 + ] 1161 + 1162 + let datetime_tests = [ 1163 + "basic", `Quick, test_datetime_basic; 1164 + "with offset", `Quick, test_datetime_with_offset; 1165 + "with frac", `Quick, test_datetime_with_frac; 1166 + "parse", `Quick, test_datetime_parse; 1167 + "equal compare", `Quick, test_datetime_equal_compare; 1168 + ] 1169 + 1170 + let datetime_local_tests = [ 1171 + "basic", `Quick, test_datetime_local_basic; 1172 + "parse", `Quick, test_datetime_local_parse; 1173 + "equal compare", `Quick, test_datetime_local_equal_compare; 1174 + ] 1175 + 1176 + let bool_tests = [ 1177 + "codec", `Quick, test_bool_codec; 1178 + "roundtrip", `Quick, test_bool_roundtrip; 1179 + "type error", `Quick, test_bool_type_error; 1180 + ] 1181 + 1182 + let int_tests = [ 1183 + "codec", `Quick, test_int_codec; 1184 + "formats", `Quick, test_int_formats; 1185 + "roundtrip", `Quick, test_int_roundtrip; 1186 + "type error", `Quick, test_int_type_error; 1187 + ] 1188 + 1189 + let int32_tests = [ 1190 + "codec", `Quick, test_int32_codec; 1191 + "roundtrip", `Quick, test_int32_roundtrip; 1192 + ] 1193 + 1194 + let int64_tests = [ 1195 + "codec", `Quick, test_int64_codec; 1196 + "roundtrip", `Quick, test_int64_roundtrip; 1197 + ] 1198 + 1199 + let float_tests = [ 1200 + "codec", `Quick, test_float_codec; 1201 + "special", `Quick, test_float_special; 1202 + "roundtrip", `Quick, test_float_roundtrip; 1203 + "type error", `Quick, test_float_type_error; 1204 + ] 1205 + 1206 + let number_tests = [ 1207 + "codec", `Quick, test_number_codec; 1208 + "type error", `Quick, test_number_type_error; 1209 + ] 1210 + 1211 + let string_tests = [ 1212 + "codec", `Quick, test_string_codec; 1213 + "escapes", `Quick, test_string_escapes; 1214 + "multiline", `Quick, test_string_multiline; 1215 + "roundtrip", `Quick, test_string_roundtrip; 1216 + "type error", `Quick, test_string_type_error; 1217 + ] 1218 + 1219 + let datetime_codec_tests = [ 1220 + "offset datetime", `Quick, test_datetime_codec; 1221 + "offset datetime with tz", `Quick, test_datetime_codec_offset; 1222 + "offset datetime roundtrip", `Quick, test_datetime_codec_roundtrip; 1223 + "local datetime", `Quick, test_datetime_local_codec; 1224 + "local datetime roundtrip", `Quick, test_datetime_local_codec_roundtrip; 1225 + "local date", `Quick, test_date_local_codec; 1226 + "local date roundtrip", `Quick, test_date_local_codec_roundtrip; 1227 + "local time", `Quick, test_time_local_codec; 1228 + "local time roundtrip", `Quick, test_time_local_codec_roundtrip; 1229 + "datetime string", `Quick, test_datetime_string_codec; 1230 + ] 1231 + 1232 + let combinator_tests = [ 1233 + "map", `Quick, test_map_combinator; 1234 + "map roundtrip", `Quick, test_map_roundtrip; 1235 + "map int", `Quick, test_map_int; 1236 + "const", `Quick, test_const; 1237 + "enum", `Quick, test_enum; 1238 + "enum roundtrip", `Quick, test_enum_roundtrip; 1239 + "enum unknown", `Quick, test_enum_unknown; 1240 + "enum type error", `Quick, test_enum_type_error; 1241 + "option", `Quick, test_option_codec; 1242 + "option roundtrip", `Quick, test_option_roundtrip; 1243 + "result", `Quick, test_result_codec; 1244 + "result roundtrip", `Quick, test_result_roundtrip; 1245 + "recursive", `Quick, test_recursive_codec; 1246 + ] 1247 + 1248 + let array_tests = [ 1249 + "list", `Quick, test_list_codec; 1250 + "list roundtrip", `Quick, test_list_roundtrip; 1251 + "array", `Quick, test_array_codec; 1252 + "array roundtrip", `Quick, test_array_roundtrip; 1253 + "nested list", `Quick, test_nested_list; 1254 + "list of tables", `Quick, test_list_of_tables; 1255 + "list type error", `Quick, test_list_type_error; 1256 + ] 1257 + 1258 + let table_tests = [ 1259 + "basic", `Quick, test_table_codec; 1260 + "roundtrip", `Quick, test_table_roundtrip; 1261 + "missing member", `Quick, test_table_missing_member; 1262 + "type error", `Quick, test_table_type_error; 1263 + "optional members", `Quick, test_optional_members; 1264 + "optional roundtrip", `Quick, test_optional_roundtrip; 1265 + "opt_mem omits none", `Quick, test_opt_mem_omits_none; 1266 + "enc_omit", `Quick, test_enc_omit; 1267 + "nested tables", `Quick, test_nested_tables; 1268 + "nested roundtrip", `Quick, test_nested_roundtrip; 1269 + "deeply nested", `Quick, test_deeply_nested; 1270 + "error unknown", `Quick, test_error_unknown; 1271 + "keep unknown", `Quick, test_keep_unknown; 1272 + "keep unknown roundtrip", `Quick, test_keep_unknown_roundtrip; 1273 + "skip unknown", `Quick, test_skip_unknown; 1274 + ] 1275 + 1276 + let array_of_tables_tests = [ 1277 + "basic", `Quick, test_array_of_tables; 1278 + "roundtrip", `Quick, test_array_of_tables_roundtrip; 1279 + "empty", `Quick, test_array_of_tables_empty; 1280 + ] 1281 + 1282 + let any_value_tests = [ 1283 + "value codec", `Quick, test_value_codec; 1284 + "value roundtrip", `Quick, test_value_roundtrip; 1285 + "value_mems", `Quick, test_value_mems_codec; 1286 + "any codec", `Quick, test_any_codec; 1287 + "any type error", `Quick, test_any_type_error; 1288 + ] 1289 + 1290 + let function_tests = [ 1291 + "decode_string", `Quick, test_decode_string; 1292 + "decode_exn", `Quick, test_decode_string_exn; 1293 + "encode_string", `Quick, test_encode_string; 1294 + ] 1295 + 1296 + let edge_case_tests = [ 1297 + "empty table", `Quick, test_empty_table; 1298 + "unicode keys", `Quick, test_unicode_keys; 1299 + "special strings", `Quick, test_special_string_values; 1300 + "large integers", `Quick, test_large_integers; 1301 + "codec kind doc", `Quick, test_codec_kind_doc; 1302 + "duplicate member error", `Quick, test_duplicate_member_error; 1303 + ] 1304 + 1305 + let () = 1306 + Alcotest.run "tomlt_codec" [ 1307 + "tz", tz_tests; 1308 + "date", date_tests; 1309 + "time", time_tests; 1310 + "datetime", datetime_tests; 1311 + "datetime_local", datetime_local_tests; 1312 + "bool", bool_tests; 1313 + "int", int_tests; 1314 + "int32", int32_tests; 1315 + "int64", int64_tests; 1316 + "float", float_tests; 1317 + "number", number_tests; 1318 + "string", string_tests; 1319 + "datetime_codecs", datetime_codec_tests; 1320 + "combinators", combinator_tests; 1321 + "arrays", array_tests; 1322 + "tables", table_tests; 1323 + "array_of_tables", array_of_tables_tests; 1324 + "any_value", any_value_tests; 1325 + "functions", function_tests; 1326 + "edge_cases", edge_case_tests; 1327 + ]
+39
test/test_debug.ml
··· 1 + open Tomlt 2 + 3 + type config = { name : string; timeout : int option } 4 + 5 + let config_codec = 6 + Table.( 7 + obj (fun name timeout -> { name; timeout }) 8 + |> mem "name" string ~enc:(fun c -> c.name) 9 + |> opt_mem "timeout" int ~enc:(fun c -> c.timeout) 10 + |> finish 11 + ) 12 + 13 + let () = 14 + (* Test encoding *) 15 + let c = { name = "app"; timeout = None } in 16 + let toml = encode config_codec c in 17 + Printf.printf "Encoded TOML:\n%s\n" (Toml.to_toml_string toml); 18 + 19 + (* Show raw structure *) 20 + Printf.printf "\nRaw structure: %s\n" (match toml with 21 + | Toml.Table pairs -> 22 + String.concat ", " (List.map (fun (k, v) -> 23 + Printf.sprintf "%s=%s" k (match v with 24 + | Toml.String s -> Printf.sprintf "\"%s\"" s 25 + | Toml.Bool b -> string_of_bool b 26 + | Toml.Int i -> Int64.to_string i 27 + | _ -> "?" 28 + ) 29 + ) pairs) 30 + | _ -> "not a table"); 31 + 32 + (* Test decoding the encoded value *) 33 + Printf.printf "\nDecoding...\n"; 34 + match decode config_codec toml with 35 + | Ok { name; timeout } -> 36 + Printf.printf "Decoded: name=%s, timeout=%s\n" name 37 + (match timeout with Some t -> string_of_int t | None -> "None") 38 + | Error e -> 39 + Printf.printf "Decode error: %s\n" (Toml.Error.to_string e)
+1 -1
test/test_tomlt.ml
··· 1 1 (* Comprehensive test suite for tomlt - TOML 1.1 codec *) 2 2 3 - open Tomlt 3 + open Tomlt.Toml 4 4 5 5 (* Helper to parse and extract value *) 6 6 let parse s =
+3
test_jsont/dune
··· 1 + (test 2 + (name test_tomlt_jsont) 3 + (libraries tomlt tomlt_jsont alcotest))
+166
test_jsont/test_tomlt_jsont.ml
··· 1 + (* Tests for tomlt-jsont module *) 2 + 3 + open Alcotest 4 + 5 + module Toml = Tomlt.Toml 6 + 7 + (* Test jsont decode/encode *) 8 + let test_jsont_decode_encode name json expected_toml () = 9 + (* Test jsont decode *) 10 + match Tomlt_jsont.decode_jsont json with 11 + | Error e -> Alcotest.fail ("decode failed: " ^ e) 12 + | Ok toml -> 13 + check bool (name ^ " jsont decode") true (Toml.equal toml expected_toml); 14 + (* Test jsont encode then decode roundtrip *) 15 + match Tomlt_jsont.encode_jsont toml with 16 + | Error e -> Alcotest.fail ("encode failed: " ^ e) 17 + | Ok json' -> 18 + match Tomlt_jsont.decode_jsont json' with 19 + | Error e -> Alcotest.fail ("roundtrip decode failed: " ^ e) 20 + | Ok toml' -> 21 + check bool (name ^ " jsont roundtrip") true (Toml.equal toml toml') 22 + 23 + (* Test native encode/decode with table documents *) 24 + let test_native_roundtrip name toml () = 25 + let json = Tomlt_jsont.encode toml in 26 + let toml' = Tomlt_jsont.decode json in 27 + check bool (name ^ " roundtrip") true (Toml.equal toml toml') 28 + 29 + (* Test cases for jsont codec (handles scalar tagged values correctly) *) 30 + let jsont_tests = [ 31 + "string", `Quick, test_jsont_decode_encode "string" 32 + {|{"type":"string","value":"hello"}|} 33 + (Toml.String "hello"); 34 + 35 + "integer", `Quick, test_jsont_decode_encode "integer" 36 + {|{"type":"integer","value":"42"}|} 37 + (Toml.Int 42L); 38 + 39 + "float", `Quick, test_jsont_decode_encode "float" 40 + {|{"type":"float","value":"3.14"}|} 41 + (Toml.Float 3.14); 42 + 43 + "bool true", `Quick, test_jsont_decode_encode "bool true" 44 + {|{"type":"bool","value":"true"}|} 45 + (Toml.Bool true); 46 + 47 + "bool false", `Quick, test_jsont_decode_encode "bool false" 48 + {|{"type":"bool","value":"false"}|} 49 + (Toml.Bool false); 50 + 51 + "datetime", `Quick, test_jsont_decode_encode "datetime" 52 + {|{"type":"datetime","value":"1979-05-27T07:32:00Z"}|} 53 + (Toml.Datetime "1979-05-27T07:32:00Z"); 54 + 55 + "datetime-local", `Quick, test_jsont_decode_encode "datetime-local" 56 + {|{"type":"datetime-local","value":"1979-05-27T07:32:00"}|} 57 + (Toml.Datetime_local "1979-05-27T07:32:00"); 58 + 59 + "date-local", `Quick, test_jsont_decode_encode "date-local" 60 + {|{"type":"date-local","value":"1979-05-27"}|} 61 + (Toml.Date_local "1979-05-27"); 62 + 63 + "time-local", `Quick, test_jsont_decode_encode "time-local" 64 + {|{"type":"time-local","value":"07:32:00"}|} 65 + (Toml.Time_local "07:32:00"); 66 + 67 + "array of integers", `Quick, test_jsont_decode_encode "array of integers" 68 + {|[{"type":"integer","value":"1"},{"type":"integer","value":"2"},{"type":"integer","value":"3"}]|} 69 + (Toml.Array [Toml.Int 1L; Toml.Int 2L; Toml.Int 3L]); 70 + 71 + "array of strings", `Quick, test_jsont_decode_encode "array of strings" 72 + {|[{"type":"string","value":"a"},{"type":"string","value":"b"}]|} 73 + (Toml.Array [Toml.String "a"; Toml.String "b"]); 74 + 75 + "empty table", `Quick, test_jsont_decode_encode "empty table" 76 + {|{}|} 77 + (Toml.Table []); 78 + 79 + "simple table", `Quick, test_jsont_decode_encode "simple table" 80 + {|{"name":{"type":"string","value":"test"}}|} 81 + (Toml.Table [("name", Toml.String "test")]); 82 + 83 + "table with multiple types", `Quick, test_jsont_decode_encode "table with multiple types" 84 + {|{"name":{"type":"string","value":"test"},"count":{"type":"integer","value":"5"},"enabled":{"type":"bool","value":"true"}}|} 85 + (* Note: jsont uses String_map which sorts keys alphabetically *) 86 + (Toml.Table [ 87 + ("count", Toml.Int 5L); 88 + ("enabled", Toml.Bool true); 89 + ("name", Toml.String "test") 90 + ]); 91 + 92 + "nested table", `Quick, test_jsont_decode_encode "nested table" 93 + {|{"outer":{"inner":{"type":"string","value":"value"}}}|} 94 + (Toml.Table [("outer", Toml.Table [("inner", Toml.String "value")])]); 95 + 96 + "table with array", `Quick, test_jsont_decode_encode "table with array" 97 + {|{"items":[{"type":"integer","value":"1"},{"type":"integer","value":"2"}]}|} 98 + (Toml.Table [("items", Toml.Array [Toml.Int 1L; Toml.Int 2L])]); 99 + ] 100 + 101 + (* Test cases for native encode/decode (roundtrip with table documents) *) 102 + let native_tests = [ 103 + "empty table", `Quick, test_native_roundtrip "empty table" 104 + (Toml.Table []); 105 + 106 + "simple table", `Quick, test_native_roundtrip "simple table" 107 + (Toml.Table [("key", Toml.String "value")]); 108 + 109 + "table with all types", `Quick, test_native_roundtrip "table with all types" 110 + (Toml.Table [ 111 + ("string", Toml.String "hello"); 112 + ("integer", Toml.Int 42L); 113 + ("float", Toml.Float 3.14); 114 + ("bool", Toml.Bool true); 115 + ("datetime", Toml.Datetime "1979-05-27T07:32:00Z"); 116 + ("datetime_local", Toml.Datetime_local "1979-05-27T07:32:00"); 117 + ("date_local", Toml.Date_local "1979-05-27"); 118 + ("time_local", Toml.Time_local "07:32:00"); 119 + ]); 120 + 121 + "nested table", `Quick, test_native_roundtrip "nested table" 122 + (Toml.Table [ 123 + ("outer", Toml.Table [ 124 + ("inner", Toml.String "value") 125 + ]) 126 + ]); 127 + 128 + "table with array", `Quick, test_native_roundtrip "table with array" 129 + (Toml.Table [ 130 + ("items", Toml.Array [Toml.Int 1L; Toml.Int 2L; Toml.Int 3L]) 131 + ]); 132 + 133 + "complex document", `Quick, test_native_roundtrip "complex document" 134 + (Toml.Table [ 135 + ("title", Toml.String "TOML Example"); 136 + ("database", Toml.Table [ 137 + ("server", Toml.String "192.168.1.1"); 138 + ("ports", Toml.Array [Toml.Int 8000L; Toml.Int 8001L; Toml.Int 8002L]); 139 + ("enabled", Toml.Bool true); 140 + ]); 141 + ]); 142 + ] 143 + 144 + (* Test native compatibility with existing tests *) 145 + let compatibility_tests = [ 146 + "valid toml roundtrip", `Quick, (fun () -> 147 + let toml_str = {| 148 + [server] 149 + host = "localhost" 150 + port = 8080 151 + |} in 152 + match Toml.of_string toml_str with 153 + | Error _ -> Alcotest.fail "TOML parse failed" 154 + | Ok toml -> 155 + let json = Tomlt_jsont.encode toml in 156 + let toml' = Tomlt_jsont.decode json in 157 + check bool "roundtrip" true (Toml.equal toml toml') 158 + ); 159 + ] 160 + 161 + let () = 162 + run "tomlt_jsont" [ 163 + "jsont", jsont_tests; 164 + "native", native_tests; 165 + "compatibility", compatibility_tests; 166 + ]
+34
tomlt-jsont.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + version: "0.1.0" 4 + synopsis: "Jsont codecs for TOML tagged JSON format" 5 + description: 6 + "Convert between TOML values and the toml-test tagged JSON format using Jsont codecs" 7 + maintainer: ["Anil Madhavapeddy <anil@recoil.org>"] 8 + authors: ["Anil Madhavapeddy <anil@recoil.org>"] 9 + license: "ISC" 10 + homepage: "https://github.com/avsm/tomlt" 11 + bug-reports: "https://github.com/avsm/tomlt/issues" 12 + depends: [ 13 + "dune" {>= "3.0"} 14 + "ocaml" {>= "4.14.0"} 15 + "tomlt" {= version} 16 + "jsont" {>= "0.2.0"} 17 + "jsont-bytesrw" {>= "0.2.0"} 18 + "odoc" {with-doc} 19 + ] 20 + build: [ 21 + ["dune" "subst"] {dev} 22 + [ 23 + "dune" 24 + "build" 25 + "-p" 26 + name 27 + "-j" 28 + jobs 29 + "@install" 30 + "@runtest" {with-test} 31 + "@doc" {with-doc} 32 + ] 33 + ] 34 + dev-repo: "git+https://github.com/avsm/tomlt.git"