TOML 1.1 codecs for OCaml
at main 2277 lines 74 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6open Bytesrw 7 8(* Aliases for cleaner code *) 9module Toml = Tomlt.Toml 10module Toml_error = Toml.Error 11 12(* Lexer - works directly on bytes buffer filled from Bytes.Reader *) 13 14type token = 15 | Tok_lbracket 16 | Tok_rbracket 17 | Tok_lbrace 18 | Tok_rbrace 19 | Tok_equals 20 | Tok_comma 21 | Tok_dot 22 | Tok_newline 23 | Tok_eof 24 | Tok_bare_key of string 25 | Tok_basic_string of string 26 | Tok_literal_string of string 27 | Tok_ml_basic_string of 28 string (* Multiline basic string - not valid as key *) 29 | Tok_ml_literal_string of 30 string (* Multiline literal string - not valid as key *) 31 | Tok_integer of 32 int64 * string (* value, original string for key reconstruction *) 33 | Tok_float of 34 float * string (* value, original string for key reconstruction *) 35 | Tok_datetime of string 36 | Tok_datetime_local of string 37 | Tok_date_local of string 38 | Tok_time_local of string 39 40type lexer = { 41 input : bytes; (* Buffer containing input data *) 42 input_len : int; (* Length of valid data in input *) 43 mutable pos : int; 44 mutable line : int; 45 mutable col : int; 46 file : string; 47} 48 49(* Create lexer from string (copies to bytes) *) 50let lexer ?(file = "-") s = 51 let input = Bytes.of_string s in 52 { input; input_len = Bytes.length input; pos = 0; line = 1; col = 1; file } 53 54(* Create lexer directly from Bytes.Reader - reads all data into buffer *) 55let lexer_from_reader ?(file = "-") r = 56 (* Read all slices into a buffer *) 57 let buf = Buffer.create 4096 in 58 let rec read_all () = 59 let slice = Bytes.Reader.read r in 60 if Bytes.Slice.is_eod slice then () 61 else begin 62 Bytes.Slice.add_to_buffer buf slice; 63 read_all () 64 end 65 in 66 read_all (); 67 let input = Buffer.to_bytes buf in 68 { input; input_len = Bytes.length input; pos = 0; line = 1; col = 1; file } 69 70let is_eof l = l.pos >= l.input_len 71let peek l = if is_eof l then None else Some (Bytes.get l.input l.pos) 72 73let peek2 l = 74 if l.pos + 1 >= l.input_len then None 75 else Some (Bytes.get l.input (l.pos + 1)) 76 77let peek_n l n = 78 if l.pos + n - 1 >= l.input_len then None 79 else Some (Bytes.sub_string l.input l.pos n) 80 81let advance l = 82 if not (is_eof l) then begin 83 if Bytes.get l.input l.pos = '\n' then begin 84 l.line <- l.line + 1; 85 l.col <- 1 86 end 87 else l.col <- l.col + 1; 88 l.pos <- l.pos + 1 89 end 90 91let advance_n l n = 92 for _ = 1 to n do 93 advance l 94 done 95 96let skip_whitespace l = 97 while 98 (not (is_eof l)) 99 && (Bytes.get l.input l.pos = ' ' || Bytes.get l.input l.pos = '\t') 100 do 101 advance l 102 done 103 104(* Helper functions for bytes access *) 105let[@inline] current_char l pos = Bytes.unsafe_get l.input pos 106let[@inline] current l = Bytes.unsafe_get l.input l.pos 107let sub_string l pos len = Bytes.sub_string l.input pos len 108 109(* Helper to create error location from lexer state *) 110let lexer_loc l = Toml.Error.loc ~file:l.file ~line:l.line ~column:l.col () 111 112(* Get expected byte length of UTF-8 char from first byte *) 113let utf8_first_byte_len c = 114 let code = Char.code c in 115 if code < 0x80 then 1 116 else if code < 0xC0 then 0 (* Invalid: continuation byte as start *) 117 else if code < 0xE0 then 2 118 else if code < 0xF0 then 3 119 else if code < 0xF8 then 4 120 else 0 (* Invalid: 5+ byte sequence *) 121 122(* Validate UTF-8 at position in lexer's bytes buffer, returns byte length *) 123let validate_utf8_at_pos_bytes l = 124 if l.pos >= l.input_len then 125 Toml.Error.raise_lexer ~location:(lexer_loc l) Unexpected_eof; 126 let byte_len = utf8_first_byte_len (Bytes.unsafe_get l.input l.pos) in 127 if byte_len = 0 then 128 Toml.Error.raise_lexer ~location:(lexer_loc l) Invalid_utf8; 129 if l.pos + byte_len > l.input_len then 130 Toml.Error.raise_lexer ~location:(lexer_loc l) Incomplete_utf8; 131 (* Validate using uutf - it checks overlong encodings, surrogates, etc. *) 132 let sub = Bytes.sub_string l.input l.pos byte_len in 133 let valid = ref false in 134 Uutf.String.fold_utf_8 135 (fun () _ -> function `Uchar _ -> valid := true | `Malformed _ -> ()) 136 () sub; 137 if not !valid then Toml.Error.raise_lexer ~location:(lexer_loc l) Invalid_utf8; 138 byte_len 139 140(* UTF-8 validation - validates and advances over a single UTF-8 character *) 141let validate_utf8_char l = 142 let byte_len = validate_utf8_at_pos_bytes l in 143 for _ = 1 to byte_len do 144 advance l 145 done 146 147let skip_comment l = 148 if (not (is_eof l)) && current l = '#' then begin 149 (* Validate comment characters *) 150 advance l; 151 let continue = ref true in 152 while !continue && (not (is_eof l)) && current l <> '\n' do 153 let c = current l in 154 let code = Char.code c in 155 (* CR is only valid if followed by LF (CRLF at end of comment) *) 156 if c = '\r' then begin 157 (* Check if this CR is followed by LF - if so, it ends the comment *) 158 if l.pos + 1 < l.input_len && current_char l (l.pos + 1) = '\n' then 159 (* This is CRLF - stop the loop, let the main lexer handle it *) 160 continue := false 161 else Toml.Error.raise_lexer ~location:(lexer_loc l) Bare_carriage_return 162 end 163 else if code >= 0x80 then begin 164 (* Multi-byte UTF-8 character - validate it *) 165 validate_utf8_char l 166 end 167 else begin 168 (* ASCII control characters other than tab are not allowed in comments *) 169 if code < 0x09 || (code > 0x09 && code < 0x20) || code = 0x7F then 170 Toml.Error.raise_lexer ~location:(lexer_loc l) 171 (Control_character code); 172 advance l 173 end 174 done 175 end 176 177let skip_ws_and_comments l = 178 let rec loop () = 179 skip_whitespace l; 180 if (not (is_eof l)) && current l = '#' then begin 181 skip_comment l; 182 loop () 183 end 184 in 185 loop () 186 187let is_bare_key_char c = 188 (c >= 'A' && c <= 'Z') 189 || (c >= 'a' && c <= 'z') 190 || (c >= '0' && c <= '9') 191 || c = '_' || c = '-' 192 193let is_digit c = c >= '0' && c <= '9' 194 195let is_hex_digit c = 196 is_digit c || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') 197 198let is_oct_digit c = c >= '0' && c <= '7' 199let is_bin_digit c = c = '0' || c = '1' 200 201let hex_value c = 202 if c >= '0' && c <= '9' then Char.code c - Char.code '0' 203 else if c >= 'a' && c <= 'f' then Char.code c - Char.code 'a' + 10 204 else if c >= 'A' && c <= 'F' then Char.code c - Char.code 'A' + 10 205 else Toml.Error.raise_number Invalid_hex_digit 206 207(* Convert Unicode codepoint to UTF-8 using uutf *) 208let codepoint_to_utf8 codepoint = 209 if codepoint < 0 || codepoint > 0x10FFFF then 210 Fmt.failwith "Invalid Unicode codepoint: U+%X" codepoint; 211 if codepoint >= 0xD800 && codepoint <= 0xDFFF then 212 Fmt.failwith "Surrogate codepoint not allowed: U+%04X" codepoint; 213 let buf = Buffer.create 4 in 214 Uutf.Buffer.add_utf_8 buf (Uchar.of_int codepoint); 215 Buffer.contents buf 216 217(* Parse Unicode escape with error location from lexer *) 218let unicode_to_utf8 l codepoint = 219 if codepoint < 0 || codepoint > 0x10FFFF then 220 Toml.Error.raise_lexer ~location:(lexer_loc l) 221 (Invalid_unicode_codepoint codepoint); 222 if codepoint >= 0xD800 && codepoint <= 0xDFFF then 223 Toml.Error.raise_lexer ~location:(lexer_loc l) 224 (Surrogate_codepoint codepoint); 225 let buf = Buffer.create 4 in 226 Uutf.Buffer.add_utf_8 buf (Uchar.of_int codepoint); 227 Buffer.contents buf 228 229let parse_escape l = 230 advance l; 231 (* skip backslash *) 232 if is_eof l then Toml.Error.raise_lexer ~location:(lexer_loc l) Unexpected_eof; 233 let c = current l in 234 advance l; 235 match c with 236 | 'b' -> "\b" 237 | 't' -> "\t" 238 | 'n' -> "\n" 239 | 'f' -> "\x0C" 240 | 'r' -> "\r" 241 | 'e' -> "\x1B" (* TOML 1.1 escape *) 242 | '"' -> "\"" 243 | '\\' -> "\\" 244 | 'x' -> 245 (* \xHH - 2 hex digits *) 246 if l.pos + 1 >= l.input_len then 247 Toml.Error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\x"); 248 let c1 = current_char l l.pos in 249 let c2 = current_char l (l.pos + 1) in 250 if not (is_hex_digit c1 && is_hex_digit c2) then 251 Toml.Error.raise_lexer ~location:(lexer_loc l) 252 (Invalid_unicode_escape "\\x"); 253 let cp = (hex_value c1 * 16) + hex_value c2 in 254 advance l; 255 advance l; 256 unicode_to_utf8 l cp 257 | 'u' -> 258 (* \uHHHH - 4 hex digits *) 259 if l.pos + 3 >= l.input_len then 260 Toml.Error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\u"); 261 let s = sub_string l l.pos 4 in 262 for i = 0 to 3 do 263 if not (is_hex_digit s.[i]) then 264 Toml.Error.raise_lexer ~location:(lexer_loc l) 265 (Invalid_unicode_escape "\\u") 266 done; 267 let cp = int_of_string ("0x" ^ s) in 268 advance_n l 4; 269 unicode_to_utf8 l cp 270 | 'U' -> 271 (* \UHHHHHHHH - 8 hex digits *) 272 if l.pos + 7 >= l.input_len then 273 Toml.Error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\U"); 274 let s = sub_string l l.pos 8 in 275 for i = 0 to 7 do 276 if not (is_hex_digit s.[i]) then 277 Toml.Error.raise_lexer ~location:(lexer_loc l) 278 (Invalid_unicode_escape "\\U") 279 done; 280 let cp = int_of_string ("0x" ^ s) in 281 advance_n l 8; 282 unicode_to_utf8 l cp 283 | _ -> Toml.Error.raise_lexer ~location:(lexer_loc l) (Invalid_escape c) 284 285let validate_string_char l c is_multiline = 286 let code = Char.code c in 287 (* Control characters other than tab (and LF/CR for multiline) are not allowed *) 288 if code < 0x09 then 289 Toml.Error.raise_lexer ~location:(lexer_loc l) (Control_character code); 290 if 291 code > 0x09 && code < 0x20 292 && not (is_multiline && (code = 0x0A || code = 0x0D)) 293 then Toml.Error.raise_lexer ~location:(lexer_loc l) (Control_character code); 294 if code = 0x7F then 295 Toml.Error.raise_lexer ~location:(lexer_loc l) (Control_character code) 296 297(* Validate UTF-8 in string context and add bytes to buffer *) 298let utf8_add_validated l buf = 299 let byte_len = validate_utf8_at_pos_bytes l in 300 Buffer.add_string buf (sub_string l l.pos byte_len); 301 for _ = 1 to byte_len do 302 advance l 303 done 304 305let skip_opening_newline l errmsg = 306 match peek l with 307 | Some '\n' -> advance l 308 | Some '\r' -> 309 advance l; 310 if peek l = Some '\n' then advance l else failwith errmsg 311 | _ -> () 312 313let skip_all_ws_newlines l = 314 let rec loop () = 315 match peek l with 316 | Some ' ' | Some '\t' | Some '\n' -> 317 advance l; 318 loop () 319 | Some '\r' -> 320 advance l; 321 if peek l = Some '\n' then advance l; 322 loop () 323 | _ -> () 324 in 325 loop () 326 327let handle_multiline_quotes l buf quote_char cont = 328 let quote_count = ref 0 in 329 let p = ref l.pos in 330 while !p < l.input_len && current_char l !p = quote_char do 331 incr quote_count; 332 incr p 333 done; 334 if !quote_count >= 3 then begin 335 let extra = min (!quote_count - 3) 2 in 336 for _ = 1 to extra do 337 Buffer.add_char buf quote_char 338 done; 339 advance_n l !quote_count; 340 if !quote_count > 5 then failwith "Too many quotes in multiline string" 341 end 342 else begin 343 for _ = 1 to !quote_count do 344 Buffer.add_char buf quote_char; 345 advance l 346 done; 347 cont () 348 end 349 350let handle_multiline_basic_backslash l buf loop = 351 let saved_pos = l.pos and saved_line = l.line and saved_col = l.col in 352 advance l; 353 let rec skip_ws () = 354 match peek l with 355 | Some ' ' | Some '\t' -> 356 advance l; 357 skip_ws () 358 | _ -> () 359 in 360 skip_ws (); 361 match peek l with 362 | Some '\n' -> 363 advance l; 364 skip_all_ws_newlines l; 365 loop () 366 | Some '\r' -> 367 advance l; 368 if peek l = Some '\n' then advance l; 369 skip_all_ws_newlines l; 370 loop () 371 | _ -> 372 l.pos <- saved_pos; 373 l.line <- saved_line; 374 l.col <- saved_col; 375 Buffer.add_string buf (parse_escape l); 376 loop () 377 378let rec parse_multiline_basic_string l buf () = 379 if is_eof l then failwith "Unterminated string"; 380 let c = current l in 381 if c = '"' then 382 handle_multiline_quotes l buf '"' (parse_multiline_basic_string l buf) 383 else if c = '\\' then 384 handle_multiline_basic_backslash l buf (parse_multiline_basic_string l buf) 385 else begin 386 if c = '\r' then begin 387 advance l; 388 if peek l = Some '\n' then ( 389 Buffer.add_char buf '\n'; 390 advance l) 391 else failwith "Bare carriage return not allowed in string" 392 end 393 else if Char.code c >= 0x80 then utf8_add_validated l buf 394 else begin 395 validate_string_char l c true; 396 Buffer.add_char buf c; 397 advance l 398 end; 399 parse_multiline_basic_string l buf () 400 end 401 402let rec parse_single_basic_string l buf () = 403 if is_eof l then failwith "Unterminated string"; 404 let c = current l in 405 if c = '"' then advance l 406 else if c = '\\' then ( 407 Buffer.add_string buf (parse_escape l); 408 parse_single_basic_string l buf ()) 409 else if c = '\n' || c = '\r' then 410 failwith "Newline not allowed in basic string" 411 else begin 412 if Char.code c >= 0x80 then utf8_add_validated l buf 413 else begin 414 validate_string_char l c false; 415 Buffer.add_char buf c; 416 advance l 417 end; 418 parse_single_basic_string l buf () 419 end 420 421let parse_basic_string l = 422 advance l; 423 let buf = Buffer.create 64 in 424 let multiline = 425 match peek_n l 2 with 426 | Some "\"\"" -> 427 advance l; 428 advance l; 429 skip_opening_newline l "Bare carriage return not allowed in string"; 430 true 431 | _ -> false 432 in 433 if multiline then parse_multiline_basic_string l buf () 434 else parse_single_basic_string l buf (); 435 (Buffer.contents buf, multiline) 436 437let validate_literal_ctrl l c code ~multiline = 438 let is_ctrl = 439 if multiline then 440 code < 0x09 441 || (code > 0x09 && code < 0x0A) 442 || (code > 0x0D && code < 0x20) 443 || code = 0x7F 444 else code < 0x09 || (code > 0x09 && code < 0x20) || code = 0x7F 445 in 446 if is_ctrl && code <> 0x0A && code <> 0x0D then 447 failwith 448 (Fmt.str 449 "Control character U+%04X not allowed in literal string at line %d" 450 (Char.code c) l.line) 451 452let rec parse_multiline_literal_string l buf () = 453 if is_eof l then failwith "Unterminated literal string"; 454 let c = current l in 455 if c = '\'' then 456 handle_multiline_quotes l buf '\'' (parse_multiline_literal_string l buf) 457 else begin 458 if c = '\r' then begin 459 advance l; 460 if peek l = Some '\n' then ( 461 Buffer.add_char buf '\n'; 462 advance l) 463 else failwith "Bare carriage return not allowed in literal string" 464 end 465 else if Char.code c >= 0x80 then utf8_add_validated l buf 466 else begin 467 validate_literal_ctrl l c (Char.code c) ~multiline:true; 468 Buffer.add_char buf c; 469 advance l 470 end; 471 parse_multiline_literal_string l buf () 472 end 473 474let rec parse_single_literal_string l buf () = 475 if is_eof l then failwith "Unterminated literal string"; 476 let c = current l in 477 if c = '\'' then advance l 478 else if c = '\n' || c = '\r' then 479 failwith "Newline not allowed in literal string" 480 else begin 481 let code = Char.code c in 482 if code >= 0x80 then utf8_add_validated l buf 483 else begin 484 validate_literal_ctrl l c code ~multiline:false; 485 Buffer.add_char buf c; 486 advance l 487 end; 488 parse_single_literal_string l buf () 489 end 490 491let parse_literal_string l = 492 advance l; 493 let buf = Buffer.create 64 in 494 let multiline = 495 match peek_n l 2 with 496 | Some "''" -> 497 advance l; 498 advance l; 499 skip_opening_newline l 500 "Bare carriage return not allowed in literal string"; 501 true 502 | _ -> false 503 in 504 if multiline then parse_multiline_literal_string l buf () 505 else parse_single_literal_string l buf (); 506 (Buffer.contents buf, multiline) 507 508let read_prefixed_int l is_valid_digit prefix fail_prefix fail_trailing = 509 advance l; 510 advance l; 511 (* skip 0x/0o/0b *) 512 let num_start = l.pos in 513 if peek l = Some '_' then 514 Fmt.failwith "Leading underscore not allowed after %s" prefix; 515 let rec read first = 516 match peek l with 517 | Some c when is_valid_digit c -> 518 advance l; 519 read false 520 | Some '_' -> 521 if first then 522 Fmt.failwith "Underscore must follow a %s digit" fail_prefix; 523 advance l; 524 if peek l |> Option.map is_valid_digit |> Option.value ~default:false 525 then read false 526 else Fmt.failwith "Trailing underscore in %s number" fail_trailing 527 | _ -> 528 if first then 529 Fmt.failwith "Expected %s digit after %s" fail_prefix prefix 530 in 531 read true; 532 (num_start, l.pos) 533 534let read_decimal_int l = 535 (* Read digit sequence with _ separators, returns whether any were read *) 536 let rec read_int first = 537 match peek l with 538 | Some c when is_digit c -> 539 advance l; 540 read_int false 541 | Some '_' -> 542 if first then failwith "Underscore must follow a digit"; 543 advance l; 544 if peek l |> Option.map is_digit |> Option.value ~default:false then 545 read_int false 546 else failwith "Trailing underscore in number" 547 | _ -> if first then failwith "Expected digit" 548 in 549 read_int 550 551let parse_decimal_number l start = 552 let first_digit = peek l in 553 if first_digit = Some '0' then begin 554 match peek2 l with 555 | Some c when is_digit c -> failwith "Leading zeros not allowed" 556 | Some '_' -> failwith "Leading zeros not allowed" 557 | _ -> () 558 end; 559 let read_int = read_decimal_int l in 560 (match peek l with 561 | Some c when is_digit c -> read_int false 562 | _ -> failwith "Expected digit after sign"); 563 let is_float = ref false in 564 (match (peek l, peek2 l) with 565 | Some '.', Some c when is_digit c -> 566 is_float := true; 567 advance l; 568 read_int false 569 | Some '.', _ -> failwith "Decimal point must be followed by digit" 570 | _ -> ()); 571 (match peek l with 572 | Some 'e' | Some 'E' -> 573 is_float := true; 574 advance l; 575 (match peek l with Some '+' | Some '-' -> advance l | _ -> ()); 576 (match peek l with 577 | Some '_' -> failwith "Underscore cannot follow exponent" 578 | _ -> ()); 579 read_int true 580 | _ -> ()); 581 let s = sub_string l start (l.pos - start) in 582 let s' = String.concat "" (String.split_on_char '_' s) in 583 if !is_float then Tok_float (float_of_string s', s) 584 else Tok_integer (Int64.of_string s', s) 585 586let parse_number l = 587 let start = l.pos in 588 let neg = 589 match peek l with 590 | Some '-' -> 591 advance l; 592 true 593 | Some '+' -> 594 advance l; 595 false 596 | _ -> false 597 in 598 match peek_n l 3 with 599 | Some "inf" -> 600 advance_n l 3; 601 let s = sub_string l start (l.pos - start) in 602 Tok_float ((if neg then Float.neg_infinity else Float.infinity), s) 603 | Some "nan" -> 604 advance_n l 3; 605 let s = sub_string l start (l.pos - start) in 606 Tok_float (Float.nan, s) 607 | _ -> ( 608 match (peek l, peek2 l) with 609 | Some '0', Some 'x' when not neg -> 610 let num_start, num_end = 611 read_prefixed_int l is_hex_digit "0x" "hex" "hex" 612 in 613 let s = sub_string l num_start (num_end - num_start) in 614 let s = String.concat "" (String.split_on_char '_' s) in 615 let orig = sub_string l start (l.pos - start) in 616 Tok_integer (Int64.of_string ("0x" ^ s), orig) 617 | Some '0', Some 'o' when not neg -> 618 let num_start, num_end = 619 read_prefixed_int l is_oct_digit "0o" "octal" "octal" 620 in 621 let s = sub_string l num_start (num_end - num_start) in 622 let s = String.concat "" (String.split_on_char '_' s) in 623 let orig = sub_string l start (l.pos - start) in 624 Tok_integer (Int64.of_string ("0o" ^ s), orig) 625 | Some '0', Some 'b' when not neg -> 626 let num_start, num_end = 627 read_prefixed_int l is_bin_digit "0b" "binary" "binary" 628 in 629 let s = sub_string l num_start (num_end - num_start) in 630 let s = String.concat "" (String.split_on_char '_' s) in 631 let orig = sub_string l start (l.pos - start) in 632 Tok_integer (Int64.of_string ("0b" ^ s), orig) 633 | _ -> parse_decimal_number l start) 634 635(* Check if we're looking at a datetime/date/time *) 636let is_time_prefix l pos = 637 pos + 5 <= l.input_len 638 && is_digit (current_char l pos) 639 && is_digit (current_char l (pos + 1)) 640 && current_char l (pos + 2) = ':' 641 && is_digit (current_char l (pos + 3)) 642 && is_digit (current_char l (pos + 4)) 643 644let is_date_prefix l pos = 645 pos + 10 <= l.input_len 646 && is_digit (current_char l pos) 647 && is_digit (current_char l (pos + 1)) 648 && is_digit (current_char l (pos + 2)) 649 && is_digit (current_char l (pos + 3)) 650 && current_char l (pos + 4) = '-' 651 && is_digit (current_char l (pos + 5)) 652 && is_digit (current_char l (pos + 6)) 653 && current_char l (pos + 7) = '-' 654 && is_digit (current_char l (pos + 8)) 655 && is_digit (current_char l (pos + 9)) 656 657let check_date_suffix l pos = 658 let len = l.input_len in 659 if pos + 10 >= len then `Date 660 else 661 let next = current_char l (pos + 10) in 662 if 663 next = 'T' || next = 't' || next = '\n' || next = '\r' || next = '#' 664 || next = ',' || next = ']' || next = '}' 665 then `Date 666 else if next = ' ' || next = '\t' then begin 667 let rec skip_ws p = 668 if p >= len then p 669 else 670 match current_char l p with ' ' | '\t' -> skip_ws (p + 1) | _ -> p 671 in 672 if current_char l (skip_ws (pos + 11)) = '=' then `Other else `Date 673 end 674 else if is_bare_key_char next then `Other 675 else `Date 676 677let looks_like_datetime l = 678 let pos = l.pos in 679 if is_date_prefix l pos then check_date_suffix l pos 680 else if is_time_prefix l pos then `Time 681 else `Other 682 683(* Date/time validation *) 684let validate_date year month day = 685 if month < 1 || month > 12 then Fmt.failwith "Invalid month: %d" month; 686 if day < 1 then Fmt.failwith "Invalid day: %d" day; 687 let days_in_month = [| 0; 31; 28; 31; 30; 31; 30; 31; 31; 30; 31; 30; 31 |] in 688 let is_leap = (year mod 4 = 0 && year mod 100 <> 0) || year mod 400 = 0 in 689 let max_days = if month = 2 && is_leap then 29 else days_in_month.(month) in 690 if day > max_days then Fmt.failwith "Invalid day %d for month %d" day month 691 692let validate_time hour minute second = 693 if hour < 0 || hour > 23 then Fmt.failwith "Invalid hour: %d" hour; 694 if minute < 0 || minute > 59 then Fmt.failwith "Invalid minute: %d" minute; 695 if second < 0 || second > 60 then (* 60 for leap second *) 696 Fmt.failwith "Invalid second: %d" second 697 698let validate_offset hour minute = 699 if hour < 0 || hour > 23 then 700 Fmt.failwith "Invalid timezone offset hour: %d" hour; 701 if minute < 0 || minute > 59 then 702 Fmt.failwith "Invalid timezone offset minute: %d" minute 703 704let read_2_digits l buf secondary_buf fail_msg = 705 for _ = 1 to 2 do 706 match peek l with 707 | Some c when is_digit c -> 708 Buffer.add_char buf c; 709 Buffer.add_char secondary_buf c; 710 advance l 711 | _ -> failwith fail_msg 712 done 713 714let read_optional_seconds_frac l buf second_buf = 715 match peek l with 716 | Some ':' -> ( 717 Buffer.add_char buf ':'; 718 advance l; 719 read_2_digits l buf second_buf "Invalid time format"; 720 (* Optional fractional seconds *) 721 match peek l with 722 | Some '.' -> 723 Buffer.add_char buf '.'; 724 advance l; 725 if not (peek l |> Option.map is_digit |> Option.value ~default:false) 726 then failwith "Expected digit after decimal point"; 727 while peek l |> Option.map is_digit |> Option.value ~default:false do 728 Buffer.add_char buf (Option.get (peek l)); 729 advance l 730 done 731 | _ -> ()) 732 | _ -> 733 (* No seconds - add :00 for normalization *) 734 Buffer.add_string buf ":00"; 735 Buffer.add_string second_buf "00" 736 737let read_date_part l buf = 738 (* Read YYYY-MM-DD into buf, validate date components *) 739 let year_buf = Buffer.create 4 in 740 let month_buf = Buffer.create 2 in 741 let day_buf = Buffer.create 2 in 742 for _ = 1 to 4 do 743 match peek l with 744 | Some c when is_digit c -> 745 Buffer.add_char buf c; 746 Buffer.add_char year_buf c; 747 advance l 748 | _ -> failwith "Invalid date format" 749 done; 750 if peek l <> Some '-' then failwith "Invalid date format"; 751 Buffer.add_char buf '-'; 752 advance l; 753 read_2_digits l buf month_buf "Invalid date format"; 754 if peek l <> Some '-' then failwith "Invalid date format"; 755 Buffer.add_char buf '-'; 756 advance l; 757 read_2_digits l buf day_buf "Invalid date format"; 758 let year = int_of_string (Buffer.contents year_buf) in 759 let month = int_of_string (Buffer.contents month_buf) in 760 let day = int_of_string (Buffer.contents day_buf) in 761 validate_date year month day 762 763let read_tz_offset l buf = 764 (* Parse +HH:MM or -HH:MM offset, validate and append to buf *) 765 let sign = current l in 766 let off_hour_buf = Buffer.create 2 in 767 let off_min_buf = Buffer.create 2 in 768 Buffer.add_char buf sign; 769 advance l; 770 read_2_digits l buf off_hour_buf "Invalid timezone offset"; 771 if peek l <> Some ':' then failwith "Invalid timezone offset"; 772 Buffer.add_char buf ':'; 773 advance l; 774 read_2_digits l buf off_min_buf "Invalid timezone offset"; 775 let off_hour = int_of_string (Buffer.contents off_hour_buf) in 776 let off_min = int_of_string (Buffer.contents off_min_buf) in 777 validate_offset off_hour off_min 778 779let read_time_and_offset l buf = 780 (* Read HH:MM[:SS[.frac]] and optional offset, return datetime token *) 781 let hour_buf = Buffer.create 2 in 782 let minute_buf = Buffer.create 2 in 783 let second_buf = Buffer.create 2 in 784 Buffer.add_char buf 'T'; 785 (* normalize to uppercase T *) 786 read_2_digits l buf hour_buf "Invalid time format"; 787 if peek l <> Some ':' then failwith "Invalid time format"; 788 Buffer.add_char buf ':'; 789 advance l; 790 read_2_digits l buf minute_buf "Invalid time format"; 791 read_optional_seconds_frac l buf second_buf; 792 let hour = int_of_string (Buffer.contents hour_buf) in 793 let minute = int_of_string (Buffer.contents minute_buf) in 794 let second = 795 if Buffer.length second_buf > 0 then 796 int_of_string (Buffer.contents second_buf) 797 else 0 798 in 799 validate_time hour minute second; 800 match peek l with 801 | Some 'Z' | Some 'z' -> 802 Buffer.add_char buf 'Z'; 803 advance l; 804 Tok_datetime (Buffer.contents buf) 805 | Some '+' | Some '-' -> 806 read_tz_offset l buf; 807 Tok_datetime (Buffer.contents buf) 808 | _ -> Tok_datetime_local (Buffer.contents buf) 809 810let parse_datetime l = 811 let buf = Buffer.create 32 in 812 read_date_part l buf; 813 match peek l with 814 | Some 'T' | Some 't' -> 815 advance l; 816 read_time_and_offset l buf 817 | Some ' ' -> ( 818 (* Space could be followed by time (datetime with space separator) 819 or could be end of date (local date followed by comment/value) *) 820 advance l; 821 match peek l with 822 | Some c when is_digit c -> read_time_and_offset l buf 823 | _ -> 824 (* Not followed by time - this is just a local date *) 825 l.pos <- l.pos - 1; 826 (* Go back to before the space *) 827 Tok_date_local (Buffer.contents buf)) 828 | _ -> Tok_date_local (Buffer.contents buf) 829 830let parse_time l = 831 let buf = Buffer.create 16 in 832 let hour_buf = Buffer.create 2 in 833 let minute_buf = Buffer.create 2 in 834 let second_buf = Buffer.create 2 in 835 (* Read HH:MM *) 836 read_2_digits l buf hour_buf "Invalid time format"; 837 if peek l <> Some ':' then failwith "Invalid time format"; 838 Buffer.add_char buf ':'; 839 advance l; 840 read_2_digits l buf minute_buf "Invalid time format"; 841 read_optional_seconds_frac l buf second_buf; 842 (* Validate time *) 843 let hour = int_of_string (Buffer.contents hour_buf) in 844 let minute = int_of_string (Buffer.contents minute_buf) in 845 let second = 846 if Buffer.length second_buf > 0 then 847 int_of_string (Buffer.contents second_buf) 848 else 0 849 in 850 validate_time hour minute second; 851 Tok_time_local (Buffer.contents buf) 852 853let ws_followed_by_eq l p = 854 let rec skip pp = 855 if pp >= l.input_len then false 856 else 857 match current_char l pp with 858 | ' ' | '\t' -> skip (pp + 1) 859 | '=' -> true 860 | _ -> false 861 in 862 skip p 863 864let signed_digit_is_key_context l start = 865 (* Determine if a signed-digit sequence like -01 is a key rather than a number *) 866 let rec scan_ahead p = 867 if p >= l.input_len then false 868 else 869 let c = current_char l p in 870 if is_digit c || c = '_' then scan_ahead (p + 1) 871 else if c = ' ' || c = '\t' then ws_followed_by_eq l (p + 1) 872 else if c = '=' then true 873 else if c = '.' then 874 p + 1 < l.input_len 875 && 876 let next = current_char l (p + 1) in 877 (not (is_digit next)) && is_bare_key_char next 878 else if c = 'e' || c = 'E' then false 879 else is_bare_key_char c 880 in 881 scan_ahead (start + 1) 882 883let digit_is_bare_key l start = 884 (* Scan from start to determine if a digit-led token is a bare key vs number *) 885 let rec scan pos has_dash = 886 if pos >= l.input_len then has_dash 887 else 888 match current_char l pos with 889 | c when is_digit c || c = '_' || c = '.' -> scan (pos + 1) has_dash 890 | '-' when pos + 1 < l.input_len -> 891 let next = current_char l (pos + 1) in 892 if is_digit next then scan (pos + 1) true 893 else is_bare_key_char next || has_dash 894 | ('e' | 'E') when pos + 1 >= l.input_len -> true 895 | 'e' | 'E' -> 896 let next = current_char l (pos + 1) in 897 if is_digit next then has_dash 898 else if 899 (next = '+' || next = '-') 900 && pos + 2 < l.input_len 901 && is_digit (current_char l (pos + 2)) 902 then has_dash 903 else true 904 | c -> is_bare_key_char c || has_dash 905 in 906 let has_leading_zero = 907 current_char l start = '0' 908 && start + 1 < l.input_len 909 && is_digit (current_char l (start + 1)) 910 in 911 has_leading_zero || scan start false 912 913let lex_signed_token l = 914 let sign = current l in 915 let start = l.pos in 916 match peek2 l with 917 | Some d when is_digit d -> 918 if signed_digit_is_key_context l start then begin 919 while (not (is_eof l)) && is_bare_key_char (current l) do 920 advance l 921 done; 922 Tok_bare_key (sub_string l start (l.pos - start)) 923 end 924 else parse_number l 925 | Some 'i' -> 926 if 927 l.pos + 3 < l.input_len 928 && current_char l (l.pos + 1) = 'i' 929 && current_char l (l.pos + 2) = 'n' 930 && current_char l (l.pos + 3) = 'f' 931 then begin 932 advance_n l 4; 933 let s = sub_string l start (l.pos - start) in 934 if sign = '-' then Tok_float (Float.neg_infinity, s) 935 else Tok_float (Float.infinity, s) 936 end 937 else if sign = '-' then begin 938 while (not (is_eof l)) && is_bare_key_char (current l) do 939 advance l 940 done; 941 Tok_bare_key (sub_string l start (l.pos - start)) 942 end 943 else Fmt.failwith "Unexpected character after %c" sign 944 | Some 'n' -> 945 if 946 l.pos + 3 < l.input_len 947 && current_char l (l.pos + 1) = 'n' 948 && current_char l (l.pos + 2) = 'a' 949 && current_char l (l.pos + 3) = 'n' 950 then begin 951 advance_n l 4; 952 let s = sub_string l start (l.pos - start) in 953 Tok_float (Float.nan, s) 954 end 955 else if sign = '-' then begin 956 while (not (is_eof l)) && is_bare_key_char (current l) do 957 advance l 958 done; 959 Tok_bare_key (sub_string l start (l.pos - start)) 960 end 961 else Fmt.failwith "Unexpected character after %c" sign 962 | _ when sign = '-' -> 963 while (not (is_eof l)) && is_bare_key_char (current l) do 964 advance l 965 done; 966 Tok_bare_key (sub_string l start (l.pos - start)) 967 | _ -> Fmt.failwith "Unexpected character after %c" sign 968 969let lex_digit_token l = 970 match looks_like_datetime l with 971 | `Date -> parse_datetime l 972 | `Time -> parse_time l 973 | `Other -> 974 let start = l.pos in 975 let is_prefixed_number = 976 start + 1 < l.input_len 977 && current_char l start = '0' 978 && 979 let c1 = current_char l (start + 1) in 980 c1 = 'x' || c1 = 'X' || c1 = 'o' || c1 = 'O' || c1 = 'b' || c1 = 'B' 981 in 982 if is_prefixed_number then parse_number l 983 else if digit_is_bare_key l start then begin 984 while (not (is_eof l)) && is_bare_key_char (current l) do 985 advance l 986 done; 987 Tok_bare_key (sub_string l start (l.pos - start)) 988 end 989 else parse_number l 990 991let next_token l = 992 skip_ws_and_comments l; 993 if is_eof l then Tok_eof 994 else begin 995 let c = current l in 996 match c with 997 | '[' -> 998 advance l; 999 Tok_lbracket 1000 | ']' -> 1001 advance l; 1002 Tok_rbracket 1003 | '{' -> 1004 advance l; 1005 Tok_lbrace 1006 | '}' -> 1007 advance l; 1008 Tok_rbrace 1009 | '=' -> 1010 advance l; 1011 Tok_equals 1012 | ',' -> 1013 advance l; 1014 Tok_comma 1015 | '.' -> 1016 advance l; 1017 Tok_dot 1018 | '\n' -> 1019 advance l; 1020 Tok_newline 1021 | '\r' -> 1022 advance l; 1023 if peek l = Some '\n' then begin 1024 advance l; 1025 Tok_newline 1026 end 1027 else Fmt.failwith "Bare carriage return not allowed at line %d" l.line 1028 | '"' -> 1029 let s, multiline = parse_basic_string l in 1030 if multiline then Tok_ml_basic_string s else Tok_basic_string s 1031 | '\'' -> 1032 let s, multiline = parse_literal_string l in 1033 if multiline then Tok_ml_literal_string s else Tok_literal_string s 1034 | '+' | '-' -> lex_signed_token l 1035 | c when is_digit c -> lex_digit_token l 1036 | c when c = 't' || c = 'f' || c = 'i' || c = 'n' -> 1037 let start = l.pos in 1038 while (not (is_eof l)) && is_bare_key_char (current l) do 1039 advance l 1040 done; 1041 Tok_bare_key (sub_string l start (l.pos - start)) 1042 | c when is_bare_key_char c -> 1043 let start = l.pos in 1044 while (not (is_eof l)) && is_bare_key_char (current l) do 1045 advance l 1046 done; 1047 Tok_bare_key (sub_string l start (l.pos - start)) 1048 | c -> 1049 let code = Char.code c in 1050 if code < 0x20 || code = 0x7F then 1051 Fmt.failwith "Control character U+%04X not allowed at line %d" code 1052 l.line 1053 else 1054 Fmt.failwith "Unexpected character '%c' at line %d, column %d" c 1055 l.line l.col 1056 end 1057 1058(* Parser *) 1059 1060type parser = { lexer : lexer; mutable current : token; mutable peeked : bool } 1061 1062let parser lexer = { lexer; current = Tok_eof; peeked = false } 1063 1064let peek_token p = 1065 if not p.peeked then begin 1066 p.current <- next_token p.lexer; 1067 p.peeked <- true 1068 end; 1069 p.current 1070 1071let consume_token p = 1072 let tok = peek_token p in 1073 p.peeked <- false; 1074 tok 1075 1076(* Check if next raw character (without skipping whitespace) matches *) 1077let next_raw_char_is p c = 1078 p.lexer.pos < p.lexer.input_len && current_char p.lexer p.lexer.pos = c 1079 1080let expect_token p expected = 1081 let tok = consume_token p in 1082 if tok <> expected then 1083 failwith 1084 (Fmt.str "Expected %s" 1085 (match expected with 1086 | Tok_equals -> "=" 1087 | Tok_rbracket -> "]" 1088 | Tok_rbrace -> "}" 1089 | Tok_newline -> "newline" 1090 | _ -> "token")) 1091 1092let skip_newlines p = 1093 while peek_token p = Tok_newline do 1094 ignore (consume_token p) 1095 done 1096 1097(* Parse a single key segment (bare, basic string, literal string, or integer) *) 1098(* Note: Tok_float is handled specially in parse_dotted_key *) 1099let parse_key_segment p = 1100 match peek_token p with 1101 | Tok_bare_key s -> 1102 ignore (consume_token p); 1103 [ s ] 1104 | Tok_basic_string s -> 1105 ignore (consume_token p); 1106 [ s ] 1107 | Tok_literal_string s -> 1108 ignore (consume_token p); 1109 [ s ] 1110 | Tok_integer (_i, orig_str) -> 1111 ignore (consume_token p); 1112 [ orig_str ] 1113 | Tok_float (f, orig_str) -> 1114 (* Float in key context - use original string to preserve exact key parts *) 1115 ignore (consume_token p); 1116 if Float.is_nan f then [ "nan" ] 1117 else if f = Float.infinity then [ "inf" ] 1118 else if f = Float.neg_infinity then [ "-inf" ] 1119 else begin 1120 (* Remove underscores from original string and split on dot *) 1121 let s = String.concat "" (String.split_on_char '_' orig_str) in 1122 if String.contains s 'e' || String.contains s 'E' then 1123 (* Has exponent, treat as single key *) 1124 [ s ] 1125 else if String.contains s '.' then 1126 (* Split on decimal point for dotted key *) 1127 String.split_on_char '.' s 1128 else 1129 (* No decimal point, single integer key *) 1130 [ s ] 1131 end 1132 | Tok_date_local s -> 1133 ignore (consume_token p); 1134 [ s ] 1135 | Tok_datetime s -> 1136 ignore (consume_token p); 1137 [ s ] 1138 | Tok_datetime_local s -> 1139 ignore (consume_token p); 1140 [ s ] 1141 | Tok_time_local s -> 1142 ignore (consume_token p); 1143 [ s ] 1144 | Tok_ml_basic_string _ -> 1145 failwith "Multiline strings are not allowed as keys" 1146 | Tok_ml_literal_string _ -> 1147 failwith "Multiline strings are not allowed as keys" 1148 | _ -> failwith "Expected key" 1149 1150(* Parse a dotted key - returns list of key strings *) 1151let parse_dotted_key p = 1152 let first_keys = parse_key_segment p in 1153 let rec loop acc = 1154 match peek_token p with 1155 | Tok_dot -> 1156 ignore (consume_token p); 1157 let keys = parse_key_segment p in 1158 loop (List.rev_append keys acc) 1159 | _ -> List.rev acc 1160 in 1161 let rest = loop [] in 1162 first_keys @ rest 1163 1164let validate_number_underscores str = 1165 let len = String.length str in 1166 if len > 0 && str.[0] = '_' then 1167 failwith "Leading underscore not allowed in number"; 1168 if len > 0 && str.[len - 1] = '_' then 1169 failwith "Trailing underscore not allowed in number"; 1170 let has_hex_prefix = 1171 len > 2 && str.[0] = '0' && (str.[1] = 'x' || str.[1] = 'X') 1172 in 1173 let is_digit_char c = c >= '0' && c <= '9' in 1174 let is_hex_char c = 1175 (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') 1176 in 1177 for i = 0 to len - 2 do 1178 if str.[i] = '_' && str.[i + 1] = '_' then 1179 failwith "Double underscore not allowed in number"; 1180 if str.[i] = '_' then begin 1181 let prev = if i > 0 then Some str.[i - 1] else None in 1182 let next = Some str.[i + 1] in 1183 match (prev, next) with 1184 | Some p, Some n when has_hex_prefix && is_hex_char p && is_hex_char n -> 1185 () 1186 | Some p, Some n when is_digit_char p && is_digit_char n -> () 1187 | _ -> failwith "Underscore must be between digits" 1188 end 1189 done 1190 1191let bare_key_as_number s = 1192 validate_number_underscores s; 1193 let s_no_underscore = String.concat "" (String.split_on_char '_' s) in 1194 let len = String.length s_no_underscore in 1195 if len = 0 then Fmt.failwith "Unexpected bare key '%s' as value" s; 1196 let c0 = s_no_underscore.[0] in 1197 if c0 < '0' || c0 > '9' then 1198 Fmt.failwith "Unexpected bare key '%s' as value" s; 1199 if 1200 len > 1 && c0 = '0' 1201 && s_no_underscore.[1] >= '0' 1202 && s_no_underscore.[1] <= '9' 1203 then failwith "Leading zeros not allowed"; 1204 try 1205 if 1206 String.contains s_no_underscore '.' 1207 || String.contains s_no_underscore 'e' 1208 || String.contains s_no_underscore 'E' 1209 then Toml.Float (float_of_string s_no_underscore) 1210 else Toml.Int (Int64.of_string s_no_underscore) 1211 with Failure _ -> Fmt.failwith "Unexpected bare key '%s' as value" s 1212 1213let rec parse_value p = 1214 match peek_token p with 1215 | Tok_basic_string s -> 1216 ignore (consume_token p); 1217 Toml.String s 1218 | Tok_literal_string s -> 1219 ignore (consume_token p); 1220 Toml.String s 1221 | Tok_ml_basic_string s -> 1222 ignore (consume_token p); 1223 Toml.String s 1224 | Tok_ml_literal_string s -> 1225 ignore (consume_token p); 1226 Toml.String s 1227 | Tok_integer (i, _) -> 1228 ignore (consume_token p); 1229 Toml.Int i 1230 | Tok_float (f, _) -> 1231 ignore (consume_token p); 1232 Toml.Float f 1233 | Tok_datetime s -> 1234 ignore (consume_token p); 1235 Toml.Datetime s 1236 | Tok_datetime_local s -> 1237 ignore (consume_token p); 1238 Toml.Datetime_local s 1239 | Tok_date_local s -> 1240 ignore (consume_token p); 1241 Toml.Date_local s 1242 | Tok_time_local s -> 1243 ignore (consume_token p); 1244 Toml.Time_local s 1245 | Tok_lbracket -> parse_array p 1246 | Tok_lbrace -> parse_inline_table p 1247 | Tok_bare_key s -> ( 1248 ignore (consume_token p); 1249 match s with 1250 | "true" -> Bool true 1251 | "false" -> Bool false 1252 | "inf" -> Float Float.infinity 1253 | "nan" -> Float Float.nan 1254 | _ -> bare_key_as_number s) 1255 | _ -> failwith "Expected value" 1256 1257and parse_array p = 1258 ignore (consume_token p); 1259 (* [ *) 1260 skip_newlines p; 1261 let rec loop acc = 1262 match peek_token p with 1263 | Tok_rbracket -> 1264 ignore (consume_token p); 1265 Toml.Array (List.rev acc) 1266 | _ -> ( 1267 let v = parse_value p in 1268 skip_newlines p; 1269 match peek_token p with 1270 | Tok_comma -> 1271 ignore (consume_token p); 1272 skip_newlines p; 1273 loop (v :: acc) 1274 | Tok_rbracket -> 1275 ignore (consume_token p); 1276 Toml.Array (List.rev (v :: acc)) 1277 | _ -> failwith "Expected ',' or ']' in array") 1278 in 1279 loop [] 1280 1281and parse_inline_table p = 1282 ignore (consume_token p); 1283 (* { *) 1284 skip_newlines p; 1285 (* Track explicitly defined keys - can't be extended with dotted keys *) 1286 let defined_inline = ref [] in 1287 let rec loop acc = 1288 match peek_token p with 1289 | Tok_rbrace -> 1290 ignore (consume_token p); 1291 Toml.Table (List.rev acc) 1292 | _ -> ( 1293 let keys = parse_dotted_key p in 1294 skip_ws p; 1295 expect_token p Tok_equals; 1296 skip_ws p; 1297 let v = parse_value p in 1298 (* Check if trying to extend a previously-defined inline table *) 1299 (match keys with 1300 | first_key :: _ :: _ -> 1301 (* Multi-key dotted path - check if first key is already defined *) 1302 if List.mem first_key !defined_inline then 1303 failwith 1304 (Fmt.str "Cannot extend inline table '%s' with dotted key" 1305 first_key) 1306 | _ -> ()); 1307 (* If this is a direct assignment to a key, track it *) 1308 (match keys with 1309 | [ k ] -> 1310 if List.mem k !defined_inline then 1311 Fmt.failwith "Duplicate key '%s' in inline table" k; 1312 defined_inline := k :: !defined_inline 1313 | _ -> ()); 1314 let entry = build_nested_table keys v in 1315 (* Merge the entry with existing entries (for dotted keys with common prefix) *) 1316 let acc = merge_entry_into_table acc entry in 1317 skip_newlines p; 1318 match peek_token p with 1319 | Tok_comma -> 1320 ignore (consume_token p); 1321 skip_newlines p; 1322 loop acc 1323 | Tok_rbrace -> 1324 ignore (consume_token p); 1325 Toml.Table (List.rev acc) 1326 | _ -> failwith "Expected ',' or '}' in inline table") 1327 in 1328 loop [] 1329 1330and skip_ws _p = 1331 (* Skip whitespace in token stream - handled by lexer but needed for lookahead *) 1332 () 1333 1334and build_nested_table keys value = 1335 match keys with 1336 | [] -> failwith "Empty key" 1337 | [ k ] -> (k, value) 1338 | k :: rest -> (k, Toml.Table [ build_nested_table rest value ]) 1339 1340(* Merge two TOML values - used for combining dotted keys in inline tables *) 1341and merge_toml_values v1 v2 = 1342 match (v1, v2) with 1343 | Toml.Table entries1, Toml.Table entries2 -> 1344 (* Merge the entries *) 1345 let merged = 1346 List.fold_left 1347 (fun acc (k, v) -> 1348 match List.assoc_opt k acc with 1349 | Some existing -> 1350 (* Key exists - try to merge if both are tables *) 1351 let merged_v = merge_toml_values existing v in 1352 (k, merged_v) :: List.remove_assoc k acc 1353 | None -> (k, v) :: acc) 1354 entries1 entries2 1355 in 1356 Toml.Table (List.rev merged) 1357 | _, _ -> 1358 (* Can't merge non-table values with same key *) 1359 failwith "Conflicting keys in inline table" 1360 1361(* Merge a single entry into an existing table *) 1362and merge_entry_into_table entries (k, v) = 1363 match List.assoc_opt k entries with 1364 | Some existing -> 1365 let merged_v = merge_toml_values existing v in 1366 (k, merged_v) :: List.remove_assoc k entries 1367 | None -> (k, v) :: entries 1368 1369let validate_datetime_string s = 1370 (* Parse and validate date portion *) 1371 if String.length s >= 10 then begin 1372 let year = int_of_string (String.sub s 0 4) in 1373 let month = int_of_string (String.sub s 5 2) in 1374 let day = int_of_string (String.sub s 8 2) in 1375 validate_date year month day; 1376 (* Parse and validate time portion if present *) 1377 if String.length s >= 16 then begin 1378 let time_start = 1379 if s.[10] = 'T' || s.[10] = 't' || s.[10] = ' ' then 11 else 10 1380 in 1381 let hour = int_of_string (String.sub s time_start 2) in 1382 let minute = int_of_string (String.sub s (time_start + 3) 2) in 1383 let second = 1384 if String.length s >= time_start + 8 && s.[time_start + 5] = ':' then 1385 int_of_string (String.sub s (time_start + 6) 2) 1386 else 0 1387 in 1388 validate_time hour minute second 1389 end 1390 end 1391 1392let validate_date_string s = 1393 if String.length s >= 10 then begin 1394 let year = int_of_string (String.sub s 0 4) in 1395 let month = int_of_string (String.sub s 5 2) in 1396 let day = int_of_string (String.sub s 8 2) in 1397 validate_date year month day 1398 end 1399 1400let validate_time_string s = 1401 if String.length s >= 5 then begin 1402 let hour = int_of_string (String.sub s 0 2) in 1403 let minute = int_of_string (String.sub s 3 2) in 1404 let second = 1405 if String.length s >= 8 && s.[5] = ':' then 1406 int_of_string (String.sub s 6 2) 1407 else 0 1408 in 1409 validate_time hour minute second 1410 end 1411 1412(* Table management for the parser *) 1413type table_state = { 1414 mutable values : (string * Toml.t) list; 1415 subtables : (string, table_state) Hashtbl.t; 1416 mutable is_array : bool; 1417 mutable is_inline : bool; 1418 mutable defined : bool; 1419 (* Has this table been explicitly defined with [table]? *) 1420 mutable closed : bool; (* Closed to extension via dotted keys from parent *) 1421 mutable array_elements : table_state list; (* For arrays of tables *) 1422} 1423 1424let table_state () = 1425 { 1426 values = []; 1427 subtables = Hashtbl.create 16; 1428 is_array = false; 1429 is_inline = false; 1430 defined = false; 1431 closed = false; 1432 array_elements = []; 1433 } 1434 1435let rec ensure_table state keys create_intermediate = 1436 match keys with 1437 | [] -> state 1438 | [ k ] -> ( 1439 (* Check if key exists as a value *) 1440 if List.mem_assoc k state.values then 1441 Fmt.failwith "Cannot use value '%s' as a table" k; 1442 match Hashtbl.find_opt state.subtables k with 1443 | Some sub -> sub 1444 | None -> 1445 let sub = table_state () in 1446 Hashtbl.add state.subtables k sub; 1447 sub) 1448 | k :: rest -> 1449 (* Check if key exists as a value *) 1450 if List.mem_assoc k state.values then 1451 Fmt.failwith "Cannot use value '%s' as a table" k; 1452 let sub = 1453 match Hashtbl.find_opt state.subtables k with 1454 | Some sub -> sub 1455 | None -> 1456 let sub = table_state () in 1457 Hashtbl.add state.subtables k sub; 1458 sub 1459 in 1460 if create_intermediate && not sub.defined then sub.defined <- false; 1461 (* Mark as implicitly defined *) 1462 ensure_table sub rest create_intermediate 1463 1464(* Like ensure_table but marks tables as defined (for dotted keys) *) 1465(* Dotted keys mark tables as "defined" (can't re-define with [table]) but not "closed" *) 1466let rec table_for_dotted_key state keys = 1467 match keys with 1468 | [] -> state 1469 | [ k ] -> ( 1470 (* Check if key exists as a value *) 1471 if List.mem_assoc k state.values then 1472 Fmt.failwith "Cannot use value '%s' as a table" k; 1473 match Hashtbl.find_opt state.subtables k with 1474 | Some sub -> 1475 (* Check if it's an array of tables (can't extend with dotted keys) *) 1476 if sub.is_array then 1477 failwith 1478 (Fmt.str "Cannot extend array of tables '%s' using dotted keys" k); 1479 (* Check if it's closed (explicitly defined with [table] header) *) 1480 if sub.closed then 1481 Fmt.failwith "Cannot extend table '%s' using dotted keys" k; 1482 if sub.is_inline then Fmt.failwith "Cannot extend inline table '%s'" k; 1483 (* Mark as defined by dotted key *) 1484 sub.defined <- true; 1485 sub 1486 | None -> 1487 let sub = table_state () in 1488 sub.defined <- true; 1489 (* Mark as defined by dotted key *) 1490 Hashtbl.add state.subtables k sub; 1491 sub) 1492 | k :: rest -> 1493 (* Check if key exists as a value *) 1494 if List.mem_assoc k state.values then 1495 Fmt.failwith "Cannot use value '%s' as a table" k; 1496 let sub = 1497 match Hashtbl.find_opt state.subtables k with 1498 | Some sub -> 1499 (* Check if it's an array of tables (can't extend with dotted keys) *) 1500 if sub.is_array then 1501 failwith 1502 (Fmt.str "Cannot extend array of tables '%s' using dotted keys" 1503 k); 1504 if sub.closed then 1505 Fmt.failwith "Cannot extend table '%s' using dotted keys" k; 1506 if sub.is_inline then 1507 Fmt.failwith "Cannot extend inline table '%s'" k; 1508 (* Mark as defined by dotted key *) 1509 sub.defined <- true; 1510 sub 1511 | None -> 1512 let sub = table_state () in 1513 sub.defined <- true; 1514 (* Mark as defined by dotted key *) 1515 Hashtbl.add state.subtables k sub; 1516 sub 1517 in 1518 table_for_dotted_key sub rest 1519 1520let rec table_state_to_toml state = 1521 let subtable_values = 1522 Hashtbl.fold 1523 (fun k sub acc -> 1524 let v = 1525 if sub.is_array then 1526 Toml.Array (List.map table_state_to_toml (array_elements sub)) 1527 else table_state_to_toml sub 1528 in 1529 (k, v) :: acc) 1530 state.subtables [] 1531 in 1532 Toml.Table (List.rev state.values @ subtable_values) 1533 1534and array_elements state = List.rev state.array_elements 1535 1536let rec key_path_has_prefix keys prefix = 1537 match (keys, prefix) with 1538 | _, [] -> true 1539 | [], _ -> false 1540 | k :: krest, p :: prest -> k = p && key_path_has_prefix krest prest 1541 1542let rec remove_key_prefix keys prefix = 1543 match (keys, prefix) with 1544 | ks, [] -> ks 1545 | [], _ -> [] 1546 | _ :: krest, _ :: prest -> remove_key_prefix krest prest 1547 1548let check_array_table_conflict array_table keys = 1549 if array_table.defined && not array_table.is_array then 1550 Fmt.failwith 1551 "Cannot define '%s' as array of tables; already defined as table" 1552 (String.concat "." keys); 1553 if 1554 (array_table.values <> [] || Hashtbl.length array_table.subtables > 0) 1555 && not array_table.is_array 1556 then 1557 Fmt.failwith "Cannot define '%s' as array of tables; already has content" 1558 (String.concat "." keys) 1559 1560let define_table_header table keys = 1561 if table.is_array then 1562 Fmt.failwith 1563 "Cannot define '%s' as table; already defined as array of tables" 1564 (String.concat "." keys); 1565 if table.defined then 1566 Fmt.failwith "Table '%s' already defined" (String.concat "." keys); 1567 table.defined <- true; 1568 table.closed <- true 1569 1570let add_value_to_table tbl key v = 1571 if List.mem_assoc key tbl.values then Fmt.failwith "Duplicate key: %s" key; 1572 (match Hashtbl.find_opt tbl.subtables key with 1573 | Some sub -> 1574 if sub.is_array then 1575 Fmt.failwith "Cannot redefine array of tables '%s' as a value" key 1576 else Fmt.failwith "Cannot redefine table '%s' as a value" key 1577 | None -> ()); 1578 tbl.values <- (key, v) :: tbl.values 1579 1580type doc_state = { 1581 root : table_state; 1582 mutable current_table : table_state; 1583 array_context_stack : (string list * table_state * table_state) list ref; 1584} 1585 1586let doc_state_of root = 1587 { root; current_table = root; array_context_stack = ref [] } 1588 1589let array_context ds keys = 1590 let rec find stack = 1591 match stack with 1592 | [] -> None 1593 | (path, parent, container) :: rest -> 1594 if keys = path then Some (`Sibling (path, parent, container)) 1595 else if 1596 key_path_has_prefix keys path && List.length keys > List.length path 1597 then 1598 let current_entry = List.hd container.array_elements in 1599 Some (`Nested (path, current_entry)) 1600 else find rest 1601 in 1602 find !(ds.array_context_stack) 1603 1604let rec pop_invalid_contexts ds keys = 1605 match !(ds.array_context_stack) with 1606 | [] -> () 1607 | (path, _, _) :: rest -> 1608 if not (key_path_has_prefix keys path) then begin 1609 ds.array_context_stack := rest; 1610 pop_invalid_contexts ds keys 1611 end 1612 1613let handle_array_of_tables ds keys = 1614 pop_invalid_contexts ds keys; 1615 match array_context ds keys with 1616 | Some (`Sibling (_path, _parent, container)) -> 1617 let new_entry = table_state () in 1618 container.array_elements <- new_entry :: container.array_elements; 1619 ds.current_table <- new_entry 1620 | Some (`Nested (parent_path, parent_entry)) -> 1621 let relative_keys = remove_key_prefix keys parent_path in 1622 let array_table = ensure_table parent_entry relative_keys true in 1623 check_array_table_conflict array_table keys; 1624 array_table.is_array <- true; 1625 let new_entry = table_state () in 1626 array_table.array_elements <- new_entry :: array_table.array_elements; 1627 ds.current_table <- new_entry; 1628 ds.array_context_stack := 1629 (keys, parent_entry, array_table) :: !(ds.array_context_stack) 1630 | None -> 1631 let array_table = ensure_table ds.root keys true in 1632 check_array_table_conflict array_table keys; 1633 array_table.is_array <- true; 1634 let entry = table_state () in 1635 array_table.array_elements <- entry :: array_table.array_elements; 1636 ds.current_table <- entry; 1637 ds.array_context_stack := 1638 (keys, ds.root, array_table) :: !(ds.array_context_stack) 1639 1640let handle_table_header ds keys = 1641 pop_invalid_contexts ds keys; 1642 match array_context ds keys with 1643 | Some (`Nested (parent_path, parent_entry)) -> 1644 let relative_keys = remove_key_prefix keys parent_path in 1645 let table = 1646 if relative_keys <> [] then ensure_table parent_entry relative_keys true 1647 else ensure_table ds.root keys true 1648 in 1649 define_table_header table keys; 1650 ds.current_table <- table 1651 | Some (`Sibling (_, _, container)) -> 1652 if container.is_array then 1653 Fmt.failwith 1654 "Cannot define '%s' as table; already defined as array of tables" 1655 (String.concat "." keys); 1656 let table = ensure_table ds.root keys true in 1657 if table.defined then 1658 Fmt.failwith "Table '%s' already defined" (String.concat "." keys); 1659 table.defined <- true; 1660 table.closed <- true; 1661 ds.current_table <- table 1662 | None -> 1663 let table = ensure_table ds.root keys true in 1664 define_table_header table keys; 1665 ds.current_table <- table; 1666 if 1667 not 1668 (List.exists 1669 (fun (p, _, _) -> key_path_has_prefix keys p) 1670 !(ds.array_context_stack)) 1671 then ds.array_context_stack := [] 1672 1673(* Main parser function *) 1674let parse_toml_from_lexer lexer = 1675 let parser = parser lexer in 1676 let ds = doc_state_of (table_state ()) in 1677 1678 let rec parse_document () = 1679 skip_newlines parser; 1680 match peek_token parser with 1681 | Tok_eof -> () 1682 | Tok_lbracket -> ( 1683 ignore (consume_token parser); 1684 let is_adjacent_bracket = next_raw_char_is parser '[' in 1685 match peek_token parser with 1686 | Tok_lbracket when not is_adjacent_bracket -> 1687 failwith "Invalid table header syntax" 1688 | Tok_lbracket -> 1689 ignore (consume_token parser); 1690 let keys = parse_dotted_key parser in 1691 expect_token parser Tok_rbracket; 1692 if not (next_raw_char_is parser ']') then 1693 failwith "Invalid array of tables syntax (space in ]])"; 1694 expect_token parser Tok_rbracket; 1695 skip_to_newline parser; 1696 handle_array_of_tables ds keys; 1697 parse_document () 1698 | _ -> 1699 let keys = parse_dotted_key parser in 1700 expect_token parser Tok_rbracket; 1701 skip_to_newline parser; 1702 handle_table_header ds keys; 1703 parse_document ()) 1704 | Tok_bare_key _ | Tok_basic_string _ | Tok_literal_string _ | Tok_integer _ 1705 | Tok_float _ | Tok_date_local _ | Tok_datetime _ | Tok_datetime_local _ 1706 | Tok_time_local _ -> 1707 let keys = parse_dotted_key parser in 1708 expect_token parser Tok_equals; 1709 let value = parse_value parser in 1710 skip_to_newline parser; 1711 (match keys with 1712 | [] -> failwith "Empty key" 1713 | [ k ] -> add_value_to_table ds.current_table k value 1714 | _ -> 1715 let parent_keys = List.rev (List.tl (List.rev keys)) in 1716 let final_key = List.hd (List.rev keys) in 1717 let parent = table_for_dotted_key ds.current_table parent_keys in 1718 add_value_to_table parent final_key value); 1719 parse_document () 1720 | _tok -> Fmt.failwith "Unexpected token at line %d" parser.lexer.line 1721 and skip_to_newline parser = 1722 skip_ws_and_comments parser.lexer; 1723 match peek_token parser with 1724 | Tok_newline -> ignore (consume_token parser) 1725 | Tok_eof -> () 1726 | _ -> failwith "Expected newline after value" 1727 in 1728 1729 parse_document (); 1730 table_state_to_toml ds.root 1731 1732(* Parse TOML from string - creates lexer internally *) 1733let parse_toml input = 1734 let lexer = lexer input in 1735 parse_toml_from_lexer lexer 1736 1737(* Parse TOML directly from Bytes.Reader - no intermediate string *) 1738let parse_toml_from_reader ?file r = 1739 let lexer = lexer_from_reader ?file r in 1740 parse_toml_from_lexer lexer 1741 1742let normalize_exp_notation s = 1743 let buf = Buffer.create (String.length s + 1) in 1744 let i = ref 0 in 1745 while !i < String.length s do 1746 let c = s.[!i] in 1747 if c = 'E' || c = 'e' then begin 1748 Buffer.add_char buf 'e'; 1749 if !i + 1 < String.length s then begin 1750 let next = s.[!i + 1] in 1751 if next >= '0' && next <= '9' then Buffer.add_char buf '+' 1752 end 1753 end 1754 else Buffer.add_char buf c; 1755 incr i 1756 done; 1757 Buffer.contents buf 1758 1759let shortest_exp_notation f init_prec = 1760 let rec try_exp prec = 1761 if prec > 17 then normalize_exp_notation (Fmt.str "%.17e" f) 1762 else 1763 let s = normalize_exp_notation (Fmt.str "%.*e" prec f) in 1764 if float_of_string s = f then s else try_exp (prec + 1) 1765 in 1766 try_exp init_prec 1767 1768let shortest_decimal_notation f = 1769 let rec try_decimal prec = 1770 if prec > 17 then None 1771 else 1772 let s = Fmt.str "%.*f" prec f in 1773 let s = 1774 let len = String.length s in 1775 let dot_pos = try String.index s '.' with Not_found -> len in 1776 let rec last_nonzero i = 1777 if i <= dot_pos then dot_pos + 2 1778 else if s.[i] <> '0' then i + 1 1779 else last_nonzero (i - 1) 1780 in 1781 String.sub s 0 (min len (last_nonzero (len - 1))) 1782 in 1783 let s = 1784 if not (String.contains s '.') then s ^ ".0" 1785 else if s.[String.length s - 1] = '.' then s ^ "0" 1786 else s 1787 in 1788 if float_of_string s = f then Some s else try_decimal (prec + 1) 1789 in 1790 try_decimal 1 1791 1792let shortest_g_notation f = 1793 let rec try_g prec = 1794 if prec > 17 then Fmt.str "%.17g" f 1795 else 1796 let s = Fmt.str "%.*g" prec f in 1797 if float_of_string s = f then s else try_g (prec + 1) 1798 in 1799 try_g 1 1800 1801let integer_float_str f = 1802 let abs_f = Float.abs f in 1803 if abs_f = 9007199254740991.0 then Fmt.str "%.0f" f 1804 else if abs_f >= 1e6 then shortest_exp_notation f 0 1805 else if abs_f >= 2.0 then Fmt.str "%.1f" f 1806 else Fmt.str "%.0f" f 1807 1808let fractional_float_str f = 1809 let abs_f = Float.abs f in 1810 if abs_f >= 1e10 || (abs_f < 1e-4 && abs_f > 0.0) then 1811 shortest_exp_notation f 1 1812 else 1813 match shortest_decimal_notation f with 1814 | Some d -> d 1815 | None -> shortest_g_notation f 1816 1817let float_to_tagged_json_str f = 1818 if Float.is_nan f then "nan" 1819 else if f = Float.infinity then "inf" 1820 else if f = Float.neg_infinity then "-inf" 1821 else if f = 0.0 then if 1.0 /. f = Float.neg_infinity then "-0" else "0" 1822 else if Float.is_integer f then integer_float_str f 1823 else fractional_float_str f 1824 1825(* Convert TOML to tagged JSON for toml-test compatibility *) 1826let rec toml_to_tagged_json value = 1827 match value with 1828 | Toml.String s -> 1829 Fmt.str "{\"type\":\"string\",\"value\":%s}" (json_encode_string s) 1830 | Toml.Int i -> Fmt.str "{\"type\":\"integer\",\"value\":\"%Ld\"}" i 1831 | Toml.Float f -> 1832 Fmt.str "{\"type\":\"float\",\"value\":\"%s\"}" 1833 (float_to_tagged_json_str f) 1834 | Toml.Bool b -> 1835 Fmt.str "{\"type\":\"bool\",\"value\":\"%s\"}" 1836 (if b then "true" else "false") 1837 | Toml.Datetime s -> 1838 validate_datetime_string s; 1839 Fmt.str "{\"type\":\"datetime\",\"value\":\"%s\"}" s 1840 | Toml.Datetime_local s -> 1841 validate_datetime_string s; 1842 Fmt.str "{\"type\":\"datetime-local\",\"value\":\"%s\"}" s 1843 | Toml.Date_local s -> 1844 validate_date_string s; 1845 Fmt.str "{\"type\":\"date-local\",\"value\":\"%s\"}" s 1846 | Toml.Time_local s -> 1847 validate_time_string s; 1848 Fmt.str "{\"type\":\"time-local\",\"value\":\"%s\"}" s 1849 | Toml.Array items -> 1850 let json_items = List.map toml_to_tagged_json items in 1851 Fmt.str "[%s]" (String.concat "," json_items) 1852 | Toml.Table pairs -> 1853 let json_pairs = 1854 List.map 1855 (fun (k, v) -> 1856 Fmt.str "%s:%s" (json_encode_string k) (toml_to_tagged_json v)) 1857 pairs 1858 in 1859 Fmt.str "{%s}" (String.concat "," json_pairs) 1860 1861and json_encode_string s = 1862 let buf = Buffer.create (String.length s + 2) in 1863 Buffer.add_char buf '"'; 1864 String.iter 1865 (fun c -> 1866 match c with 1867 | '"' -> Buffer.add_string buf "\\\"" 1868 | '\\' -> Buffer.add_string buf "\\\\" 1869 | '\n' -> Buffer.add_string buf "\\n" 1870 | '\r' -> Buffer.add_string buf "\\r" 1871 | '\t' -> Buffer.add_string buf "\\t" 1872 | '\b' -> Buffer.add_string buf "\\b" (* backspace *) 1873 | c when Char.code c = 0x0C -> Buffer.add_string buf "\\f" (* formfeed *) 1874 | c when Char.code c < 0x20 -> 1875 Buffer.add_string buf (Fmt.str "\\u%04x" (Char.code c)) 1876 | c -> Buffer.add_char buf c) 1877 s; 1878 Buffer.add_char buf '"'; 1879 Buffer.contents buf 1880 1881let tagged_value_to_toml value = 1882 match value with 1883 | Toml.Table [ ("type", Toml.String typ); ("value", Toml.String v) ] 1884 | Toml.Table [ ("value", Toml.String v); ("type", Toml.String typ) ] -> ( 1885 match typ with 1886 | "string" -> Toml.String v 1887 | "integer" -> Toml.Int (Int64.of_string v) 1888 | "float" -> ( 1889 match v with 1890 | "inf" -> Toml.Float Float.infinity 1891 | "-inf" -> Toml.Float Float.neg_infinity 1892 | "nan" -> Toml.Float Float.nan 1893 | _ -> Toml.Float (float_of_string v)) 1894 | "bool" -> Toml.Bool (v = "true") 1895 | "datetime" -> Toml.Datetime v 1896 | "datetime-local" -> Toml.Datetime_local v 1897 | "date-local" -> Toml.Date_local v 1898 | "time-local" -> Toml.Time_local v 1899 | _ -> Fmt.failwith "Unknown type: %s" typ) 1900 | _ -> value 1901 1902type json_reader = { s : string; len : int; pos : int ref } 1903 1904let json_reader_of s = { s; len = String.length s; pos = ref 0 } 1905 1906let jr_skip_ws r = 1907 while 1908 !(r.pos) < r.len 1909 && (r.s.[!(r.pos)] = ' ' 1910 || r.s.[!(r.pos)] = '\t' 1911 || r.s.[!(r.pos)] = '\n' 1912 || r.s.[!(r.pos)] = '\r') 1913 do 1914 incr r.pos 1915 done 1916 1917let jr_expect r c = 1918 jr_skip_ws r; 1919 if !(r.pos) >= r.len || r.s.[!(r.pos)] <> c then 1920 Fmt.failwith "Expected '%c' at position %d" c !(r.pos); 1921 incr r.pos 1922 1923let jr_peek r = 1924 jr_skip_ws r; 1925 if !(r.pos) >= r.len then None else Some r.s.[!(r.pos)] 1926 1927let jr_read_escape r buf = 1928 incr r.pos; 1929 if !(r.pos) >= r.len then failwith "Unexpected end in string escape"; 1930 match r.s.[!(r.pos)] with 1931 | '"' -> 1932 Buffer.add_char buf '"'; 1933 incr r.pos 1934 | '\\' -> 1935 Buffer.add_char buf '\\'; 1936 incr r.pos 1937 | '/' -> 1938 Buffer.add_char buf '/'; 1939 incr r.pos 1940 | 'n' -> 1941 Buffer.add_char buf '\n'; 1942 incr r.pos 1943 | 'r' -> 1944 Buffer.add_char buf '\r'; 1945 incr r.pos 1946 | 't' -> 1947 Buffer.add_char buf '\t'; 1948 incr r.pos 1949 | 'b' -> 1950 Buffer.add_char buf '\b'; 1951 incr r.pos 1952 | 'f' -> 1953 Buffer.add_char buf (Char.chr 0x0C); 1954 incr r.pos 1955 | 'u' -> 1956 incr r.pos; 1957 if !(r.pos) + 3 >= r.len then failwith "Invalid unicode escape"; 1958 let hex = String.sub r.s !(r.pos) 4 in 1959 let cp = int_of_string ("0x" ^ hex) in 1960 Buffer.add_string buf (codepoint_to_utf8 cp); 1961 r.pos := !(r.pos) + 4 1962 | c -> Fmt.failwith "Invalid escape: \\%c" c 1963 1964let jr_read_string r = 1965 jr_skip_ws r; 1966 jr_expect r '"'; 1967 let buf = Buffer.create 64 in 1968 while !(r.pos) < r.len && r.s.[!(r.pos)] <> '"' do 1969 if r.s.[!(r.pos)] = '\\' then jr_read_escape r buf 1970 else begin 1971 Buffer.add_char buf r.s.[!(r.pos)]; 1972 incr r.pos 1973 end 1974 done; 1975 jr_expect r '"'; 1976 Buffer.contents buf 1977 1978let rec jr_read_value r = 1979 jr_skip_ws r; 1980 match jr_peek r with 1981 | Some '{' -> jr_read_object r 1982 | Some '[' -> jr_read_array r 1983 | Some '"' -> Toml.String (jr_read_string r) 1984 | _ -> failwith "Expected value" 1985 1986and jr_read_object r = 1987 jr_expect r '{'; 1988 jr_skip_ws r; 1989 if jr_peek r = Some '}' then begin 1990 incr r.pos; 1991 Toml.Table [] 1992 end 1993 else begin 1994 let pairs = ref [] in 1995 let first = ref true in 1996 while jr_peek r <> Some '}' do 1997 if not !first then jr_expect r ','; 1998 first := false; 1999 jr_skip_ws r; 2000 let key = jr_read_string r in 2001 jr_expect r ':'; 2002 let value = jr_read_value r in 2003 pairs := (key, tagged_value_to_toml value) :: !pairs 2004 done; 2005 jr_expect r '}'; 2006 Toml.Table (List.rev !pairs) 2007 end 2008 2009and jr_read_array r = 2010 jr_expect r '['; 2011 jr_skip_ws r; 2012 if jr_peek r = Some ']' then begin 2013 incr r.pos; 2014 Toml.Array [] 2015 end 2016 else begin 2017 let items = ref [] in 2018 let first = ref true in 2019 while jr_peek r <> Some ']' do 2020 if not !first then jr_expect r ','; 2021 first := false; 2022 items := tagged_value_to_toml (jr_read_value r) :: !items 2023 done; 2024 jr_expect r ']'; 2025 Toml.Array (List.rev !items) 2026 end 2027 2028(* Tagged JSON to TOML for encoder *) 2029let decode_tagged_json_string s = jr_read_value (json_reader_of s) 2030 2031(* ============================================ 2032 Streaming TOML Encoder 2033 ============================================ *) 2034 2035let is_bare_key_char c = 2036 (c >= 'A' && c <= 'Z') 2037 || (c >= 'a' && c <= 'z') 2038 || (c >= '0' && c <= '9') 2039 || c = '_' || c = '-' 2040 2041let rec write_toml_string w s = 2042 (* Check if we need to escape *) 2043 let needs_escape = 2044 String.exists 2045 (fun c -> 2046 let code = Char.code c in 2047 c = '"' || c = '\\' || c = '\n' || c = '\r' || c = '\t' || code < 0x20 2048 || code = 0x7F) 2049 s 2050 in 2051 if needs_escape then begin 2052 Bytes.Writer.write_string w "\""; 2053 String.iter 2054 (fun c -> 2055 match c with 2056 | '"' -> Bytes.Writer.write_string w "\\\"" 2057 | '\\' -> Bytes.Writer.write_string w "\\\\" 2058 | '\n' -> Bytes.Writer.write_string w "\\n" 2059 | '\r' -> Bytes.Writer.write_string w "\\r" 2060 | '\t' -> Bytes.Writer.write_string w "\\t" 2061 | '\b' -> Bytes.Writer.write_string w "\\b" 2062 | c when Char.code c = 0x0C -> Bytes.Writer.write_string w "\\f" 2063 | c when Char.code c < 0x20 || Char.code c = 0x7F -> 2064 Bytes.Writer.write_string w (Fmt.str "\\u%04X" (Char.code c)) 2065 | c -> 2066 let b = Bytes.create 1 in 2067 Bytes.set b 0 c; 2068 Bytes.Writer.write_bytes w b) 2069 s; 2070 Bytes.Writer.write_string w "\"" 2071 end 2072 else begin 2073 Bytes.Writer.write_string w "\""; 2074 Bytes.Writer.write_string w s; 2075 Bytes.Writer.write_string w "\"" 2076 end 2077 2078and write_toml_key w k = 2079 (* Check if it can be a bare key *) 2080 let is_bare = String.length k > 0 && String.for_all is_bare_key_char k in 2081 if is_bare then Bytes.Writer.write_string w k else write_toml_string w k 2082 2083and write_toml_value w ?(inline = false) value = 2084 match value with 2085 | Toml.String s -> write_toml_string w s 2086 | Toml.Int i -> Bytes.Writer.write_string w (Int64.to_string i) 2087 | Toml.Float f -> 2088 if Float.is_nan f then Bytes.Writer.write_string w "nan" 2089 else if f = Float.infinity then Bytes.Writer.write_string w "inf" 2090 else if f = Float.neg_infinity then Bytes.Writer.write_string w "-inf" 2091 else begin 2092 let s = Fmt.str "%.17g" f in 2093 (* Ensure it looks like a float *) 2094 let s = 2095 if 2096 String.contains s '.' || String.contains s 'e' 2097 || String.contains s 'E' 2098 then s 2099 else s ^ ".0" 2100 in 2101 Bytes.Writer.write_string w s 2102 end 2103 | Toml.Bool b -> Bytes.Writer.write_string w (if b then "true" else "false") 2104 | Toml.Datetime s -> Bytes.Writer.write_string w s 2105 | Toml.Datetime_local s -> Bytes.Writer.write_string w s 2106 | Toml.Date_local s -> Bytes.Writer.write_string w s 2107 | Toml.Time_local s -> Bytes.Writer.write_string w s 2108 | Toml.Array items -> 2109 Bytes.Writer.write_string w "["; 2110 List.iteri 2111 (fun i item -> 2112 if i > 0 then Bytes.Writer.write_string w ", "; 2113 write_toml_value w ~inline:true item) 2114 items; 2115 Bytes.Writer.write_string w "]" 2116 | Toml.Table pairs when inline -> 2117 Bytes.Writer.write_string w "{"; 2118 List.iteri 2119 (fun i (k, v) -> 2120 if i > 0 then Bytes.Writer.write_string w ", "; 2121 write_toml_key w k; 2122 Bytes.Writer.write_string w " = "; 2123 write_toml_value w ~inline:true v) 2124 pairs; 2125 Bytes.Writer.write_string w "}" 2126 | Toml.Table _ -> failwith "Cannot encode table inline without inline flag" 2127 2128let is_pure_table_array items = 2129 items <> [] 2130 && List.for_all (function Toml.Table _ -> true | _ -> false) items 2131 2132let write_path w path = 2133 List.iteri 2134 (fun i k -> 2135 if i > 0 then Bytes.Writer.write_string w "."; 2136 write_toml_key w k) 2137 path 2138 2139let rec encode_at_path w has_content path value = 2140 match value with 2141 | Toml.Table pairs -> 2142 let simple, nested = 2143 List.partition 2144 (fun (_, v) -> 2145 match v with 2146 | Toml.Table _ -> false 2147 | Toml.Array items -> not (is_pure_table_array items) 2148 | _ -> true) 2149 pairs 2150 in 2151 List.iter 2152 (fun (k, v) -> 2153 write_toml_key w k; 2154 Bytes.Writer.write_string w " = "; 2155 write_toml_value w ~inline:true v; 2156 Bytes.Writer.write_string w "\n"; 2157 has_content := true) 2158 simple; 2159 encode_nested_pairs w has_content path nested 2160 | _ -> failwith "Top-level TOML must be a table" 2161 2162and encode_nested_pairs w has_content path nested = 2163 List.iter 2164 (fun (k, v) -> 2165 let new_path = path @ [ k ] in 2166 match v with 2167 | Toml.Table _ -> 2168 if !has_content then Bytes.Writer.write_string w "\n"; 2169 Bytes.Writer.write_string w "["; 2170 write_path w new_path; 2171 Bytes.Writer.write_string w "]\n"; 2172 has_content := true; 2173 encode_at_path w has_content new_path v 2174 | Toml.Array items when is_pure_table_array items -> 2175 encode_table_array w has_content new_path items 2176 | _ -> 2177 write_toml_key w k; 2178 Bytes.Writer.write_string w " = "; 2179 write_toml_value w ~inline:true v; 2180 Bytes.Writer.write_string w "\n"; 2181 has_content := true) 2182 nested 2183 2184and encode_table_array w has_content new_path items = 2185 List.iter 2186 (fun item -> 2187 match item with 2188 | Toml.Table _ -> 2189 if !has_content then Bytes.Writer.write_string w "\n"; 2190 Bytes.Writer.write_string w "[["; 2191 write_path w new_path; 2192 Bytes.Writer.write_string w "]]\n"; 2193 has_content := true; 2194 encode_at_path w has_content new_path item 2195 | _ -> assert false) 2196 items 2197 2198(* True streaming TOML encoder - writes directly to Bytes.Writer *) 2199let encode_to_writer w value = 2200 let has_content = ref false in 2201 encode_at_path w has_content [] value 2202 2203(* ============================================ 2204 Public Interface - Parsing 2205 ============================================ *) 2206 2207let of_string input = 2208 try Ok (parse_toml input) with 2209 | Failure msg -> 2210 Error (Toml.Error.v (Toml.Error.Syntax (Toml.Error.Expected msg))) 2211 | Toml.Error.Error e -> Error e 2212 | e -> 2213 Error 2214 (Toml.Error.v 2215 (Toml.Error.Syntax (Toml.Error.Expected (Printexc.to_string e)))) 2216 2217let of_reader ?file r = 2218 try Ok (parse_toml_from_reader ?file r) with 2219 | Failure msg -> 2220 Error (Toml.Error.v (Toml.Error.Syntax (Toml.Error.Expected msg))) 2221 | Toml.Error.Error e -> Error e 2222 | e -> 2223 Error 2224 (Toml.Error.v 2225 (Toml.Error.Syntax (Toml.Error.Expected (Printexc.to_string e)))) 2226 2227let parse = parse_toml 2228let parse_reader ?file r = parse_toml_from_reader ?file r 2229 2230(* ============================================ 2231 Public Interface - Encoding 2232 ============================================ *) 2233 2234let to_writer w value = encode_to_writer w value 2235 2236let to_string value = 2237 let buf = Buffer.create 256 in 2238 let w = Bytes.Writer.of_buffer buf in 2239 encode_to_writer w value; 2240 Buffer.contents buf 2241 2242(* ============================================ 2243 Codec I/O Operations 2244 ============================================ *) 2245 2246let decode_string c s = Result.bind (of_string s) (Tomlt.decode c) 2247 2248let decode_string_exn c s = 2249 let toml = parse s in 2250 Tomlt.decode_exn c toml 2251 2252let encode_string c v = 2253 let toml = Tomlt.encode c v in 2254 to_string toml 2255 2256let decode_reader ?file c r = Result.bind (of_reader ?file r) (Tomlt.decode c) 2257 2258let encode_writer c v w = 2259 let toml = Tomlt.encode c v in 2260 to_writer w toml 2261 2262(* ============================================ 2263 Tagged JSON Module 2264 ============================================ *) 2265 2266module Tagged_json = struct 2267 let encode = toml_to_tagged_json 2268 let decode = decode_tagged_json_string 2269 2270 let decode_and_encode_toml json_str = 2271 try 2272 let toml = decode_tagged_json_string json_str in 2273 Ok (to_string toml) 2274 with 2275 | Failure msg -> Error msg 2276 | e -> Error (Printexc.to_string e) 2277end