lib_bytesrw/tomlt_bytesrw.ml at main · anil.recoil.org/ocaml-tomlt

TOML 1.1 codecs for OCaml
ocaml-tomlt / lib_bytesrw / tomlt_bytesrw.ml
at main 2355 lines 93 kB view raw
wrap content
   1(*---------------------------------------------------------------------------
   2  Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
   3  SPDX-License-Identifier: ISC
   4 ---------------------------------------------------------------------------*)
   5
   6open Bytesrw
   7
   8(* Aliases for cleaner code *)
   9module Toml = Tomlt.Toml
  10module Toml_error = Toml.Error
  11
  12(* Lexer - works directly on bytes buffer filled from Bytes.Reader *)
  13
  14type token =
  15  | Tok_lbracket
  16  | Tok_rbracket
  17  | Tok_lbrace
  18  | Tok_rbrace
  19  | Tok_equals
  20  | Tok_comma
  21  | Tok_dot
  22  | Tok_newline
  23  | Tok_eof
  24  | Tok_bare_key of string
  25  | Tok_basic_string of string
  26  | Tok_literal_string of string
  27  | Tok_ml_basic_string of string   (* Multiline basic string - not valid as key *)
  28  | Tok_ml_literal_string of string (* Multiline literal string - not valid as key *)
  29  | Tok_integer of int64 * string  (* value, original string for key reconstruction *)
  30  | Tok_float of float * string  (* value, original string for key reconstruction *)
  31  | Tok_datetime of string
  32  | Tok_datetime_local of string
  33  | Tok_date_local of string
  34  | Tok_time_local of string
  35
  36type lexer = {
  37  input : bytes;           (* Buffer containing input data *)
  38  input_len : int;         (* Length of valid data in input *)
  39  mutable pos : int;
  40  mutable line : int;
  41  mutable col : int;
  42  file : string;
  43}
  44
  45(* Create lexer from string (copies to bytes) *)
  46let make_lexer ?(file = "-") s =
  47  let input = Bytes.of_string s in
  48  { input; input_len = Bytes.length input; pos = 0; line = 1; col = 1; file }
  49
  50(* Create lexer directly from Bytes.Reader - reads all data into buffer *)
  51let make_lexer_from_reader ?(file = "-") r =
  52  (* Read all slices into a buffer *)
  53  let buf = Buffer.create 4096 in
  54  let rec read_all () =
  55    let slice = Bytes.Reader.read r in
  56    if Bytes.Slice.is_eod slice then ()
  57    else begin
  58      Bytes.Slice.add_to_buffer buf slice;
  59      read_all ()
  60    end
  61  in
  62  read_all ();
  63  let input = Buffer.to_bytes buf in
  64  { input; input_len = Bytes.length input; pos = 0; line = 1; col = 1; file }
  65
  66let is_eof l = l.pos >= l.input_len
  67
  68let peek l = if is_eof l then None else Some (Bytes.get l.input l.pos)
  69
  70let peek2 l =
  71  if l.pos + 1 >= l.input_len then None
  72  else Some (Bytes.get l.input (l.pos + 1))
  73
  74let peek_n l n =
  75  if l.pos + n - 1 >= l.input_len then None
  76  else Some (Bytes.sub_string l.input l.pos n)
  77
  78let advance l =
  79  if not (is_eof l) then begin
  80    if Bytes.get l.input l.pos = '\n' then begin
  81      l.line <- l.line + 1;
  82      l.col <- 1
  83    end else
  84      l.col <- l.col + 1;
  85    l.pos <- l.pos + 1
  86  end
  87
  88let advance_n l n =
  89  for _ = 1 to n do advance l done
  90
  91let skip_whitespace l =
  92  while not (is_eof l) && (Bytes.get l.input l.pos = ' ' || Bytes.get l.input l.pos = '\t') do
  93    advance l
  94  done
  95
  96(* Helper functions for bytes access *)
  97let[@inline] get_char l pos = Bytes.unsafe_get l.input pos
  98let[@inline] get_current l = Bytes.unsafe_get l.input l.pos
  99let sub_string l pos len = Bytes.sub_string l.input pos len
 100
 101(* Helper to create error location from lexer state *)
 102let lexer_loc l = Toml.Error.loc ~file:l.file ~line:l.line ~column:l.col ()
 103
 104(* Get expected byte length of UTF-8 char from first byte *)
 105let utf8_byte_length_from_first_byte c =
 106  let code = Char.code c in
 107  if code < 0x80 then 1
 108  else if code < 0xC0 then 0  (* Invalid: continuation byte as start *)
 109  else if code < 0xE0 then 2
 110  else if code < 0xF0 then 3
 111  else if code < 0xF8 then 4
 112  else 0  (* Invalid: 5+ byte sequence *)
 113
 114(* Validate UTF-8 at position in lexer's bytes buffer, returns byte length *)
 115let validate_utf8_at_pos_bytes l =
 116  if l.pos >= l.input_len then
 117    Toml.Error.raise_lexer ~location:(lexer_loc l) Unexpected_eof;
 118  let byte_len = utf8_byte_length_from_first_byte (Bytes.unsafe_get l.input l.pos) in
 119  if byte_len = 0 then
 120    Toml.Error.raise_lexer ~location:(lexer_loc l) Invalid_utf8;
 121  if l.pos + byte_len > l.input_len then
 122    Toml.Error.raise_lexer ~location:(lexer_loc l) Incomplete_utf8;
 123  (* Validate using uutf - it checks overlong encodings, surrogates, etc. *)
 124  let sub = Bytes.sub_string l.input l.pos byte_len in
 125  let valid = ref false in
 126  Uutf.String.fold_utf_8 (fun () _ -> function
 127    | `Uchar _ -> valid := true
 128    | `Malformed _ -> ()
 129  ) () sub;
 130  if not !valid then
 131    Toml.Error.raise_lexer ~location:(lexer_loc l) Invalid_utf8;
 132  byte_len
 133
 134(* UTF-8 validation - validates and advances over a single UTF-8 character *)
 135let validate_utf8_char l =
 136  let byte_len = validate_utf8_at_pos_bytes l in
 137  for _ = 1 to byte_len do advance l done
 138
 139let skip_comment l =
 140  if not (is_eof l) && get_current l = '#' then begin
 141    (* Validate comment characters *)
 142    advance l;
 143    let continue = ref true in
 144    while !continue && not (is_eof l) && get_current l <> '\n' do
 145      let c = get_current l in
 146      let code = Char.code c in
 147      (* CR is only valid if followed by LF (CRLF at end of comment) *)
 148      if c = '\r' then begin
 149        (* Check if this CR is followed by LF - if so, it ends the comment *)
 150        if l.pos + 1 < l.input_len && get_char l (l.pos + 1) = '\n' then
 151          (* This is CRLF - stop the loop, let the main lexer handle it *)
 152          continue := false
 153        else
 154          Toml.Error.raise_lexer ~location:(lexer_loc l) Bare_carriage_return
 155      end else if code >= 0x80 then begin
 156        (* Multi-byte UTF-8 character - validate it *)
 157        validate_utf8_char l
 158      end else begin
 159        (* ASCII control characters other than tab are not allowed in comments *)
 160        if code < 0x09 || (code > 0x09 && code < 0x20) || code = 0x7F then
 161          Toml.Error.raise_lexer ~location:(lexer_loc l) (Control_character code);
 162        advance l
 163      end
 164    done
 165  end
 166
 167let skip_ws_and_comments l =
 168  let rec loop () =
 169    skip_whitespace l;
 170    if not (is_eof l) && get_current l = '#' then begin
 171      skip_comment l;
 172      loop ()
 173    end
 174  in
 175  loop ()
 176
 177let is_bare_key_char c =
 178  (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
 179  (c >= '0' && c <= '9') || c = '_' || c = '-'
 180
 181let is_digit c = c >= '0' && c <= '9'
 182let is_hex_digit c = is_digit c || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
 183let is_oct_digit c = c >= '0' && c <= '7'
 184let is_bin_digit c = c = '0' || c = '1'
 185
 186let hex_value c =
 187  if c >= '0' && c <= '9' then Char.code c - Char.code '0'
 188  else if c >= 'a' && c <= 'f' then Char.code c - Char.code 'a' + 10
 189  else if c >= 'A' && c <= 'F' then Char.code c - Char.code 'A' + 10
 190  else Toml.Error.raise_number Invalid_hex_digit
 191
 192(* Convert Unicode codepoint to UTF-8 using uutf *)
 193let codepoint_to_utf8 codepoint =
 194  if codepoint < 0 || codepoint > 0x10FFFF then
 195    failwith (Printf.sprintf "Invalid Unicode codepoint: U+%X" codepoint);
 196  if codepoint >= 0xD800 && codepoint <= 0xDFFF then
 197    failwith (Printf.sprintf "Surrogate codepoint not allowed: U+%04X" codepoint);
 198  let buf = Buffer.create 4 in
 199  Uutf.Buffer.add_utf_8 buf (Uchar.of_int codepoint);
 200  Buffer.contents buf
 201
 202(* Parse Unicode escape with error location from lexer *)
 203let unicode_to_utf8 l codepoint =
 204  if codepoint < 0 || codepoint > 0x10FFFF then
 205    Toml.Error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_codepoint codepoint);
 206  if codepoint >= 0xD800 && codepoint <= 0xDFFF then
 207    Toml.Error.raise_lexer ~location:(lexer_loc l) (Surrogate_codepoint codepoint);
 208  let buf = Buffer.create 4 in
 209  Uutf.Buffer.add_utf_8 buf (Uchar.of_int codepoint);
 210  Buffer.contents buf
 211
 212let parse_escape l =
 213  advance l; (* skip backslash *)
 214  if is_eof l then
 215    Toml.Error.raise_lexer ~location:(lexer_loc l) Unexpected_eof;
 216  let c = get_current l in
 217  advance l;
 218  match c with
 219  | 'b' -> "\b"
 220  | 't' -> "\t"
 221  | 'n' -> "\n"
 222  | 'f' -> "\x0C"
 223  | 'r' -> "\r"
 224  | 'e' -> "\x1B"  (* TOML 1.1 escape *)
 225  | '"' -> "\""
 226  | '\\' -> "\\"
 227  | 'x' ->
 228      (* \xHH - 2 hex digits *)
 229      if l.pos + 1 >= l.input_len then
 230        Toml.Error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\x");
 231      let c1 = get_char l l.pos in
 232      let c2 = get_char l (l.pos + 1) in
 233      if not (is_hex_digit c1 && is_hex_digit c2) then
 234        Toml.Error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\x");
 235      let cp = (hex_value c1 * 16) + hex_value c2 in
 236      advance l; advance l;
 237      unicode_to_utf8 l cp
 238  | 'u' ->
 239      (* \uHHHH - 4 hex digits *)
 240      if l.pos + 3 >= l.input_len then
 241        Toml.Error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\u");
 242      let s = sub_string l l.pos 4 in
 243      for i = 0 to 3 do
 244        if not (is_hex_digit s.[i]) then
 245          Toml.Error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\u")
 246      done;
 247      let cp = int_of_string ("0x" ^ s) in
 248      advance_n l 4;
 249      unicode_to_utf8 l cp
 250  | 'U' ->
 251      (* \UHHHHHHHH - 8 hex digits *)
 252      if l.pos + 7 >= l.input_len then
 253        Toml.Error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\U");
 254      let s = sub_string l l.pos 8 in
 255      for i = 0 to 7 do
 256        if not (is_hex_digit s.[i]) then
 257          Toml.Error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\U")
 258      done;
 259      let cp = int_of_string ("0x" ^ s) in
 260      advance_n l 8;
 261      unicode_to_utf8 l cp
 262  | _ ->
 263      Toml.Error.raise_lexer ~location:(lexer_loc l) (Invalid_escape c)
 264
 265let validate_string_char l c is_multiline =
 266  let code = Char.code c in
 267  (* Control characters other than tab (and LF/CR for multiline) are not allowed *)
 268  if code < 0x09 then
 269    Toml.Error.raise_lexer ~location:(lexer_loc l) (Control_character code);
 270  if code > 0x09 && code < 0x20 && not (is_multiline && (code = 0x0A || code = 0x0D)) then
 271    Toml.Error.raise_lexer ~location:(lexer_loc l) (Control_character code);
 272  if code = 0x7F then
 273    Toml.Error.raise_lexer ~location:(lexer_loc l) (Control_character code)
 274
 275(* Validate UTF-8 in string context and add bytes to buffer *)
 276let validate_and_add_utf8_to_buffer l buf =
 277  let byte_len = validate_utf8_at_pos_bytes l in
 278  Buffer.add_string buf (sub_string l l.pos byte_len);
 279  for _ = 1 to byte_len do advance l done
 280
 281let parse_basic_string l =
 282  advance l; (* skip opening quote *)
 283  let buf = Buffer.create 64 in
 284  let multiline =
 285    match peek_n l 2 with
 286    | Some "\"\"" ->
 287        advance l; advance l; (* skip two more quotes *)
 288        (* Skip newline immediately after opening delimiter *)
 289        (match peek l with
 290        | Some '\n' -> advance l
 291        | Some '\r' ->
 292            advance l;
 293            if peek l = Some '\n' then advance l
 294            else failwith "Bare carriage return not allowed in string"
 295        | _ -> ());
 296        true
 297    | _ -> false
 298  in
 299  let rec loop () =
 300    if is_eof l then
 301      failwith "Unterminated string";
 302    let c = get_current l in
 303    if multiline then begin
 304      if c = '"' then begin
 305        (* Count consecutive quotes *)
 306        let quote_count = ref 0 in
 307        let p = ref l.pos in
 308        while !p < l.input_len && get_char l !p = '"' do
 309          incr quote_count;
 310          incr p
 311        done;
 312        if !quote_count >= 3 then begin
 313          (* 3+ quotes - this is a closing delimiter *)
 314          (* Add extra quotes (up to 2) to content before closing delimiter *)
 315          let extra = min (!quote_count - 3) 2 in
 316          for _ = 1 to extra do
 317            Buffer.add_char buf '"'
 318          done;
 319          advance_n l (!quote_count);
 320          if !quote_count > 5 then
 321            failwith "Too many quotes in multiline string"
 322        end else begin
 323          (* Less than 3 quotes - add them to content *)
 324          for _ = 1 to !quote_count do
 325            Buffer.add_char buf '"';
 326            advance l
 327          done;
 328          loop ()
 329        end
 330      end else if c = '\\' then begin
 331        (* Check for line-ending backslash *)
 332        let saved_pos = l.pos in
 333        let saved_line = l.line in
 334        let saved_col = l.col in
 335        advance l;
 336        let rec skip_ws () =
 337          match peek l with
 338          | Some ' ' | Some '\t' -> advance l; skip_ws ()
 339          | _ -> ()
 340        in
 341        skip_ws ();
 342        match peek l with
 343        | Some '\n' ->
 344            advance l;
 345            (* Skip all whitespace and newlines after *)
 346            let rec skip_all () =
 347              match peek l with
 348              | Some ' ' | Some '\t' | Some '\n' -> advance l; skip_all ()
 349              | Some '\r' ->
 350                  advance l;
 351                  if peek l = Some '\n' then advance l;
 352                  skip_all ()
 353              | _ -> ()
 354            in
 355            skip_all ();
 356            loop ()
 357        | Some '\r' ->
 358            advance l;
 359            if peek l = Some '\n' then advance l;
 360            let rec skip_all () =
 361              match peek l with
 362              | Some ' ' | Some '\t' | Some '\n' -> advance l; skip_all ()
 363              | Some '\r' ->
 364                  advance l;
 365                  if peek l = Some '\n' then advance l;
 366                  skip_all ()
 367              | _ -> ()
 368            in
 369            skip_all ();
 370            loop ()
 371        | _ ->
 372            (* Not a line-ending backslash, restore position and parse escape *)
 373            l.pos <- saved_pos;
 374            l.line <- saved_line;
 375            l.col <- saved_col;
 376            Buffer.add_string buf (parse_escape l);
 377            loop ()
 378      end else begin
 379        let code = Char.code c in
 380        if c = '\r' then begin
 381          advance l;
 382          if peek l = Some '\n' then begin
 383            Buffer.add_char buf '\n';
 384            advance l
 385          end else
 386            failwith "Bare carriage return not allowed in string"
 387        end else if code >= 0x80 then begin
 388          (* Multi-byte UTF-8 - validate and add *)
 389          validate_and_add_utf8_to_buffer l buf
 390        end else begin
 391          (* ASCII - validate control chars *)
 392          validate_string_char l c true;
 393          Buffer.add_char buf c;
 394          advance l
 395        end;
 396        loop ()
 397      end
 398    end else begin
 399      (* Single-line basic string *)
 400      if c = '"' then begin
 401        advance l;
 402        ()
 403      end else if c = '\\' then begin
 404        Buffer.add_string buf (parse_escape l);
 405        loop ()
 406      end else if c = '\n' || c = '\r' then
 407        failwith "Newline not allowed in basic string"
 408      else begin
 409        let code = Char.code c in
 410        if code >= 0x80 then begin
 411          (* Multi-byte UTF-8 - validate and add *)
 412          validate_and_add_utf8_to_buffer l buf
 413        end else begin
 414          (* ASCII - validate control chars *)
 415          validate_string_char l c false;
 416          Buffer.add_char buf c;
 417          advance l
 418        end;
 419        loop ()
 420      end
 421    end
 422  in
 423  loop ();
 424  (Buffer.contents buf, multiline)
 425
 426let parse_literal_string l =
 427  advance l; (* skip opening quote *)
 428  let buf = Buffer.create 64 in
 429  let multiline =
 430    match peek_n l 2 with
 431    | Some "''" ->
 432        advance l; advance l; (* skip two more quotes *)
 433        (* Skip newline immediately after opening delimiter *)
 434        (match peek l with
 435        | Some '\n' -> advance l
 436        | Some '\r' ->
 437            advance l;
 438            if peek l = Some '\n' then advance l
 439            else failwith "Bare carriage return not allowed in literal string"
 440        | _ -> ());
 441        true
 442    | _ -> false
 443  in
 444  let rec loop () =
 445    if is_eof l then
 446      failwith "Unterminated literal string";
 447    let c = get_current l in
 448    if multiline then begin
 449      if c = '\'' then begin
 450        (* Count consecutive quotes *)
 451        let quote_count = ref 0 in
 452        let p = ref l.pos in
 453        while !p < l.input_len && get_char l !p = '\'' do
 454          incr quote_count;
 455          incr p
 456        done;
 457        if !quote_count >= 3 then begin
 458          (* 3+ quotes - this is a closing delimiter *)
 459          (* Add extra quotes (up to 2) to content before closing delimiter *)
 460          let extra = min (!quote_count - 3) 2 in
 461          for _ = 1 to extra do
 462            Buffer.add_char buf '\''
 463          done;
 464          advance_n l (!quote_count);
 465          if !quote_count > 5 then
 466            failwith "Too many quotes in multiline literal string"
 467        end else begin
 468          (* Less than 3 quotes - add them to content *)
 469          for _ = 1 to !quote_count do
 470            Buffer.add_char buf '\'';
 471            advance l
 472          done;
 473          loop ()
 474        end
 475      end else begin
 476        let code = Char.code c in
 477        if c = '\r' then begin
 478          advance l;
 479          if peek l = Some '\n' then begin
 480            Buffer.add_char buf '\n';
 481            advance l
 482          end else
 483            failwith "Bare carriage return not allowed in literal string"
 484        end else if code >= 0x80 then begin
 485          (* Multi-byte UTF-8 - validate and add *)
 486          validate_and_add_utf8_to_buffer l buf
 487        end else begin
 488          (* ASCII control char validation for literal strings *)
 489          if code < 0x09 || (code > 0x09 && code < 0x0A) || (code > 0x0D && code < 0x20) || code = 0x7F then
 490            if code <> 0x0A && code <> 0x0D then
 491              failwith (Printf.sprintf "Control character U+%04X not allowed in literal string at line %d" code l.line);
 492          Buffer.add_char buf c;
 493          advance l
 494        end;
 495        loop ()
 496      end
 497    end else begin
 498      if c = '\'' then begin
 499        advance l;
 500        ()
 501      end else if c = '\n' || c = '\r' then
 502        failwith "Newline not allowed in literal string"
 503      else begin
 504        let code = Char.code c in
 505        if code >= 0x80 then begin
 506          (* Multi-byte UTF-8 - validate and add *)
 507          validate_and_add_utf8_to_buffer l buf
 508        end else begin
 509          (* ASCII control char validation *)
 510          if code < 0x09 || (code > 0x09 && code < 0x20) || code = 0x7F then
 511            failwith (Printf.sprintf "Control character U+%04X not allowed in literal string at line %d" code l.line);
 512          Buffer.add_char buf c;
 513          advance l
 514        end;
 515        loop ()
 516      end
 517    end
 518  in
 519  loop ();
 520  (Buffer.contents buf, multiline)
 521
 522let parse_number l =
 523  let start = l.pos in
 524  let neg =
 525    match peek l with
 526    | Some '-' -> advance l; true
 527    | Some '+' -> advance l; false
 528    | _ -> false
 529  in
 530  (* Check for special floats: inf and nan *)
 531  match peek_n l 3 with
 532  | Some "inf" ->
 533      advance_n l 3;
 534      let s = sub_string l start (l.pos - start) in
 535      Tok_float ((if neg then Float.neg_infinity else Float.infinity), s)
 536  | Some "nan" ->
 537      advance_n l 3;
 538      let s = sub_string l start (l.pos - start) in
 539      Tok_float (Float.nan, s)
 540  | _ ->
 541      (* Check for hex, octal, or binary *)
 542      match peek l, peek2 l with
 543      | Some '0', Some 'x' when not neg ->
 544          advance l; advance l;
 545          let num_start = l.pos in
 546          (* Check for leading underscore *)
 547          if peek l = Some '_' then failwith "Leading underscore not allowed after 0x";
 548          let rec read_hex first =
 549            match peek l with
 550            | Some c when is_hex_digit c -> advance l; read_hex false
 551            | Some '_' ->
 552                if first then failwith "Underscore must follow a hex digit";
 553                advance l;
 554                if peek l |> Option.map is_hex_digit |> Option.value ~default:false then
 555                  read_hex false
 556                else
 557                  failwith "Trailing underscore in hex number"
 558            | _ ->
 559                if first then failwith "Expected hex digit after 0x"
 560          in
 561          read_hex true;
 562          let s = sub_string l num_start (l.pos - num_start) in
 563          let s = String.concat "" (String.split_on_char '_' s) in
 564          let orig = sub_string l start (l.pos - start) in
 565          Tok_integer (Int64.of_string ("0x" ^ s), orig)
 566      | Some '0', Some 'o' when not neg ->
 567          advance l; advance l;
 568          let num_start = l.pos in
 569          (* Check for leading underscore *)
 570          if peek l = Some '_' then failwith "Leading underscore not allowed after 0o";
 571          let rec read_oct first =
 572            match peek l with
 573            | Some c when is_oct_digit c -> advance l; read_oct false
 574            | Some '_' ->
 575                if first then failwith "Underscore must follow an octal digit";
 576                advance l;
 577                if peek l |> Option.map is_oct_digit |> Option.value ~default:false then
 578                  read_oct false
 579                else
 580                  failwith "Trailing underscore in octal number"
 581            | _ ->
 582                if first then failwith "Expected octal digit after 0o"
 583          in
 584          read_oct true;
 585          let s = sub_string l num_start (l.pos - num_start) in
 586          let s = String.concat "" (String.split_on_char '_' s) in
 587          let orig = sub_string l start (l.pos - start) in
 588          Tok_integer (Int64.of_string ("0o" ^ s), orig)
 589      | Some '0', Some 'b' when not neg ->
 590          advance l; advance l;
 591          let num_start = l.pos in
 592          (* Check for leading underscore *)
 593          if peek l = Some '_' then failwith "Leading underscore not allowed after 0b";
 594          let rec read_bin first =
 595            match peek l with
 596            | Some c when is_bin_digit c -> advance l; read_bin false
 597            | Some '_' ->
 598                if first then failwith "Underscore must follow a binary digit";
 599                advance l;
 600                if peek l |> Option.map is_bin_digit |> Option.value ~default:false then
 601                  read_bin false
 602                else
 603                  failwith "Trailing underscore in binary number"
 604            | _ ->
 605                if first then failwith "Expected binary digit after 0b"
 606          in
 607          read_bin true;
 608          let s = sub_string l num_start (l.pos - num_start) in
 609          let s = String.concat "" (String.split_on_char '_' s) in
 610          let orig = sub_string l start (l.pos - start) in
 611          Tok_integer (Int64.of_string ("0b" ^ s), orig)
 612      | _ ->
 613          (* Regular decimal number *)
 614          let first_digit = peek l in
 615          (* Check for leading zeros - also reject 0_ followed by digits *)
 616          if first_digit = Some '0' then begin
 617            match peek2 l with
 618            | Some c when is_digit c -> failwith "Leading zeros not allowed"
 619            | Some '_' -> failwith "Leading zeros not allowed"
 620            | _ -> ()
 621          end;
 622          let rec read_int first =
 623            match peek l with
 624            | Some c when is_digit c -> advance l; read_int false
 625            | Some '_' ->
 626                if first then failwith "Underscore must follow a digit";
 627                advance l;
 628                if peek l |> Option.map is_digit |> Option.value ~default:false then
 629                  read_int false
 630                else
 631                  failwith "Trailing underscore in number"
 632            | _ ->
 633                if first then failwith "Expected digit"
 634          in
 635          (match peek l with
 636          | Some c when is_digit c -> read_int false
 637          | _ -> failwith "Expected digit after sign");
 638          (* Check for float *)
 639          let is_float = ref false in
 640          (match peek l, peek2 l with
 641          | Some '.', Some c when is_digit c ->
 642              is_float := true;
 643              advance l;
 644              read_int false
 645          | Some '.', _ ->
 646              failwith "Decimal point must be followed by digit"
 647          | _ -> ());
 648          (* Check for exponent *)
 649          (match peek l with
 650          | Some 'e' | Some 'E' ->
 651              is_float := true;
 652              advance l;
 653              (match peek l with
 654              | Some '+' | Some '-' -> advance l
 655              | _ -> ());
 656              (* After exponent/sign, first char must be a digit, not underscore *)
 657              (match peek l with
 658              | Some '_' -> failwith "Underscore cannot follow exponent"
 659              | _ -> ());
 660              read_int true
 661          | _ -> ());
 662          let s = sub_string l start (l.pos - start) in
 663          let s' = String.concat "" (String.split_on_char '_' s) in
 664          if !is_float then
 665            Tok_float (float_of_string s', s)
 666          else
 667            Tok_integer (Int64.of_string s', s)
 668
 669(* Check if we're looking at a datetime/date/time *)
 670let looks_like_datetime l =
 671  (* YYYY-MM-DD or HH:MM - need to ensure it's not a bare key that starts with numbers *)
 672  let check_datetime () =
 673    let pos = l.pos in
 674    let len = l.input_len in
 675    (* Check for YYYY-MM-DD pattern - must have exactly this structure *)
 676    if pos + 10 <= len then begin
 677      let c0 = get_char l pos in
 678      let c1 = get_char l (pos + 1) in
 679      let c2 = get_char l (pos + 2) in
 680      let c3 = get_char l (pos + 3) in
 681      let c4 = get_char l (pos + 4) in
 682      let c5 = get_char l (pos + 5) in
 683      let c6 = get_char l (pos + 6) in
 684      let c7 = get_char l (pos + 7) in
 685      let c8 = get_char l (pos + 8) in
 686      let c9 = get_char l (pos + 9) in
 687      (* Must match YYYY-MM-DD pattern AND not be followed by bare key chars (except T or space for time) *)
 688      if is_digit c0 && is_digit c1 && is_digit c2 && is_digit c3 && c4 = '-' &&
 689         is_digit c5 && is_digit c6 && c7 = '-' && is_digit c8 && is_digit c9 then begin
 690        (* Check what follows - if it's a bare key char other than T/t/space, it's not a date *)
 691        if pos + 10 < len then begin
 692          let next = get_char l (pos + 10) in
 693          if next = 'T' || next = 't' then
 694            `Date  (* Datetime continues with time part *)
 695          else if next = ' ' || next = '\t' then begin
 696            (* Check if followed by = (key context) or time part *)
 697            let rec skip_ws p =
 698              if p >= len then p
 699              else match get_char l p with
 700                | ' ' | '\t' -> skip_ws (p + 1)
 701                | _ -> p
 702            in
 703            let after_ws = skip_ws (pos + 11) in
 704            if after_ws < len && get_char l after_ws = '=' then
 705              `Other  (* It's a key followed by = *)
 706            else if after_ws < len && is_digit (get_char l after_ws) then
 707              `Date  (* Could be "2001-02-03 12:34:56" format *)
 708            else
 709              `Date
 710          end else if next = '\n' || next = '\r' ||
 711             next = '#' || next = ',' || next = ']' || next = '}' then
 712            `Date
 713          else if is_bare_key_char next then
 714            `Other  (* It's a bare key like "2000-02-29abc" *)
 715          else
 716            `Date
 717        end else
 718          `Date
 719      end else if pos + 5 <= len &&
 720              is_digit c0 && is_digit c1 && c2 = ':' && is_digit c3 && is_digit c4 then
 721        `Time
 722      else
 723        `Other
 724    end else if pos + 5 <= len then begin
 725      let c0 = get_char l pos in
 726      let c1 = get_char l (pos + 1) in
 727      let c2 = get_char l (pos + 2) in
 728      let c3 = get_char l (pos + 3) in
 729      let c4 = get_char l (pos + 4) in
 730      if is_digit c0 && is_digit c1 && c2 = ':' && is_digit c3 && is_digit c4 then
 731        `Time
 732      else
 733        `Other
 734    end else
 735      `Other
 736  in
 737  check_datetime ()
 738
 739(* Date/time validation *)
 740let validate_date year month day =
 741  if month < 1 || month > 12 then
 742    failwith (Printf.sprintf "Invalid month: %d" month);
 743  if day < 1 then
 744    failwith (Printf.sprintf "Invalid day: %d" day);
 745  let days_in_month = [| 0; 31; 28; 31; 30; 31; 30; 31; 31; 30; 31; 30; 31 |] in
 746  let is_leap = (year mod 4 = 0 && year mod 100 <> 0) || year mod 400 = 0 in
 747  let max_days =
 748    if month = 2 && is_leap then 29
 749    else days_in_month.(month)
 750  in
 751  if day > max_days then
 752    failwith (Printf.sprintf "Invalid day %d for month %d" day month)
 753
 754let validate_time hour minute second =
 755  if hour < 0 || hour > 23 then
 756    failwith (Printf.sprintf "Invalid hour: %d" hour);
 757  if minute < 0 || minute > 59 then
 758    failwith (Printf.sprintf "Invalid minute: %d" minute);
 759  if second < 0 || second > 60 then (* 60 for leap second *)
 760    failwith (Printf.sprintf "Invalid second: %d" second)
 761
 762let validate_offset hour minute =
 763  if hour < 0 || hour > 23 then
 764    failwith (Printf.sprintf "Invalid timezone offset hour: %d" hour);
 765  if minute < 0 || minute > 59 then
 766    failwith (Printf.sprintf "Invalid timezone offset minute: %d" minute)
 767
 768let parse_datetime l =
 769  let buf = Buffer.create 32 in
 770  let year_buf = Buffer.create 4 in
 771  let month_buf = Buffer.create 2 in
 772  let day_buf = Buffer.create 2 in
 773  (* Read date part YYYY-MM-DD *)
 774  for _ = 1 to 4 do
 775    match peek l with
 776    | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char year_buf c; advance l
 777    | _ -> failwith "Invalid date format"
 778  done;
 779  if peek l <> Some '-' then failwith "Invalid date format";
 780  Buffer.add_char buf '-'; advance l;
 781  for _ = 1 to 2 do
 782    match peek l with
 783    | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char month_buf c; advance l
 784    | _ -> failwith "Invalid date format"
 785  done;
 786  if peek l <> Some '-' then failwith "Invalid date format";
 787  Buffer.add_char buf '-'; advance l;
 788  for _ = 1 to 2 do
 789    match peek l with
 790    | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char day_buf c; advance l
 791    | _ -> failwith "Invalid date format"
 792  done;
 793  (* Validate date immediately *)
 794  let year = int_of_string (Buffer.contents year_buf) in
 795  let month = int_of_string (Buffer.contents month_buf) in
 796  let day = int_of_string (Buffer.contents day_buf) in
 797  validate_date year month day;
 798  (* Helper to parse time part (after T or space) *)
 799  let parse_time_part () =
 800    let hour_buf = Buffer.create 2 in
 801    let minute_buf = Buffer.create 2 in
 802    let second_buf = Buffer.create 2 in
 803    Buffer.add_char buf 'T';  (* Always normalize to uppercase T *)
 804    for _ = 1 to 2 do
 805      match peek l with
 806      | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char hour_buf c; advance l
 807      | _ -> failwith "Invalid time format"
 808    done;
 809    if peek l <> Some ':' then failwith "Invalid time format";
 810    Buffer.add_char buf ':'; advance l;
 811    for _ = 1 to 2 do
 812      match peek l with
 813      | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char minute_buf c; advance l
 814      | _ -> failwith "Invalid time format"
 815    done;
 816    (* Optional seconds *)
 817    (match peek l with
 818    | Some ':' ->
 819        Buffer.add_char buf ':'; advance l;
 820        for _ = 1 to 2 do
 821          match peek l with
 822          | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char second_buf c; advance l
 823          | _ -> failwith "Invalid time format"
 824        done;
 825        (* Optional fractional seconds *)
 826        (match peek l with
 827        | Some '.' ->
 828            Buffer.add_char buf '.'; advance l;
 829            if not (peek l |> Option.map is_digit |> Option.value ~default:false) then
 830              failwith "Expected digit after decimal point";
 831            while peek l |> Option.map is_digit |> Option.value ~default:false do
 832              Buffer.add_char buf (Option.get (peek l));
 833              advance l
 834            done
 835        | _ -> ())
 836    | _ ->
 837        (* No seconds - add :00 for normalization per toml-test *)
 838        Buffer.add_string buf ":00";
 839        Buffer.add_string second_buf "00");
 840    (* Validate time *)
 841    let hour = int_of_string (Buffer.contents hour_buf) in
 842    let minute = int_of_string (Buffer.contents minute_buf) in
 843    let second = if Buffer.length second_buf > 0 then int_of_string (Buffer.contents second_buf) else 0 in
 844    validate_time hour minute second;
 845    (* Check for offset *)
 846    match peek l with
 847    | Some 'Z' | Some 'z' ->
 848        Buffer.add_char buf 'Z';
 849        advance l;
 850        Tok_datetime (Buffer.contents buf)
 851    | Some '+' | Some '-' as sign_opt ->
 852        let sign = Option.get sign_opt in
 853        let off_hour_buf = Buffer.create 2 in
 854        let off_min_buf = Buffer.create 2 in
 855        Buffer.add_char buf sign;
 856        advance l;
 857        for _ = 1 to 2 do
 858          match peek l with
 859          | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char off_hour_buf c; advance l
 860          | _ -> failwith "Invalid timezone offset"
 861        done;
 862        if peek l <> Some ':' then failwith "Invalid timezone offset";
 863        Buffer.add_char buf ':'; advance l;
 864        for _ = 1 to 2 do
 865          match peek l with
 866          | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char off_min_buf c; advance l
 867          | _ -> failwith "Invalid timezone offset"
 868        done;
 869        (* Validate offset *)
 870        let off_hour = int_of_string (Buffer.contents off_hour_buf) in
 871        let off_min = int_of_string (Buffer.contents off_min_buf) in
 872        validate_offset off_hour off_min;
 873        Tok_datetime (Buffer.contents buf)
 874    | _ ->
 875        Tok_datetime_local (Buffer.contents buf)
 876  in
 877  (* Check if there's a time part *)
 878  match peek l with
 879  | Some 'T' | Some 't' ->
 880      advance l;
 881      parse_time_part ()
 882  | Some ' ' ->
 883      (* Space could be followed by time (datetime with space separator)
 884         or could be end of date (local date followed by comment/value) *)
 885      advance l;  (* Skip the space *)
 886      (* Check if followed by digit (time) *)
 887      (match peek l with
 888      | Some c when is_digit c ->
 889          parse_time_part ()
 890      | _ ->
 891          (* Not followed by time - this is just a local date *)
 892          (* Put the space back by not consuming anything further *)
 893          l.pos <- l.pos - 1;  (* Go back to before the space *)
 894          Tok_date_local (Buffer.contents buf))
 895  | _ ->
 896      (* Just a date *)
 897      Tok_date_local (Buffer.contents buf)
 898
 899let parse_time l =
 900  let buf = Buffer.create 16 in
 901  let hour_buf = Buffer.create 2 in
 902  let minute_buf = Buffer.create 2 in
 903  let second_buf = Buffer.create 2 in
 904  (* Read HH:MM *)
 905  for _ = 1 to 2 do
 906    match peek l with
 907    | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char hour_buf c; advance l
 908    | _ -> failwith "Invalid time format"
 909  done;
 910  if peek l <> Some ':' then failwith "Invalid time format";
 911  Buffer.add_char buf ':'; advance l;
 912  for _ = 1 to 2 do
 913    match peek l with
 914    | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char minute_buf c; advance l
 915    | _ -> failwith "Invalid time format"
 916  done;
 917  (* Optional seconds *)
 918  (match peek l with
 919  | Some ':' ->
 920      Buffer.add_char buf ':'; advance l;
 921      for _ = 1 to 2 do
 922        match peek l with
 923        | Some c when is_digit c -> Buffer.add_char buf c; Buffer.add_char second_buf c; advance l
 924        | _ -> failwith "Invalid time format"
 925      done;
 926      (* Optional fractional seconds *)
 927      (match peek l with
 928      | Some '.' ->
 929          Buffer.add_char buf '.'; advance l;
 930          if not (peek l |> Option.map is_digit |> Option.value ~default:false) then
 931            failwith "Expected digit after decimal point";
 932          while peek l |> Option.map is_digit |> Option.value ~default:false do
 933            Buffer.add_char buf (Option.get (peek l));
 934            advance l
 935          done
 936      | _ -> ())
 937  | _ ->
 938      (* No seconds - add :00 for normalization *)
 939      Buffer.add_string buf ":00";
 940      Buffer.add_string second_buf "00");
 941  (* Validate time *)
 942  let hour = int_of_string (Buffer.contents hour_buf) in
 943  let minute = int_of_string (Buffer.contents minute_buf) in
 944  let second = if Buffer.length second_buf > 0 then int_of_string (Buffer.contents second_buf) else 0 in
 945  validate_time hour minute second;
 946  Tok_time_local (Buffer.contents buf)
 947
 948let next_token l =
 949  skip_ws_and_comments l;
 950  if is_eof l then Tok_eof
 951  else begin
 952    let c = get_current l in
 953    match c with
 954    | '[' -> advance l; Tok_lbracket
 955    | ']' -> advance l; Tok_rbracket
 956    | '{' -> advance l; Tok_lbrace
 957    | '}' -> advance l; Tok_rbrace
 958    | '=' -> advance l; Tok_equals
 959    | ',' -> advance l; Tok_comma
 960    | '.' -> advance l; Tok_dot
 961    | '\n' -> advance l; Tok_newline
 962    | '\r' ->
 963        advance l;
 964        if peek l = Some '\n' then begin
 965          advance l;
 966          Tok_newline
 967        end else
 968          failwith (Printf.sprintf "Bare carriage return not allowed at line %d" l.line)
 969    | '"' ->
 970        let (s, multiline) = parse_basic_string l in
 971        if multiline then Tok_ml_basic_string s else Tok_basic_string s
 972    | '\'' ->
 973        let (s, multiline) = parse_literal_string l in
 974        if multiline then Tok_ml_literal_string s else Tok_literal_string s
 975    | '+' | '-' ->
 976        (* Could be number, special float (+inf, -inf, +nan, -nan), or bare key starting with - *)
 977        let sign = c in
 978        let start = l.pos in
 979        (match peek2 l with
 980        | Some d when is_digit d ->
 981            (* Check if this looks like a key (followed by = after whitespace/key chars) *)
 982            (* A key like -01 should be followed by whitespace then =, not by . or e (number syntax) *)
 983            let is_key_context =
 984              let rec scan_ahead p =
 985                if p >= l.input_len then false
 986                else
 987                  let c = get_char l p in
 988                  if is_digit c || c = '_' then scan_ahead (p + 1)
 989                  else if c = ' ' || c = '\t' then
 990                    (* Skip whitespace and check for = *)
 991                    let rec skip_ws pp =
 992                      if pp >= l.input_len then false
 993                      else match get_char l pp with
 994                        | ' ' | '\t' -> skip_ws (pp + 1)
 995                        | '=' -> true
 996                        | _ -> false
 997                    in
 998                    skip_ws (p + 1)
 999                  else if c = '=' then true
1000                  else if c = '.' then
1001                    (* Check if . is followed by digit (number) vs letter/underscore (dotted key) *)
1002                    if p + 1 < l.input_len then
1003                      let next = get_char l (p + 1) in
1004                      if is_digit next then false  (* It's a decimal number like -3.14 *)
1005                      else if is_bare_key_char next then true  (* Dotted key *)
1006                      else false
1007                    else false
1008                  else if c = 'e' || c = 'E' then false  (* Scientific notation *)
1009                  else if is_bare_key_char c then
1010                    (* Contains non-digit bare key char - it's a key *)
1011                    true
1012                  else false
1013              in
1014              scan_ahead (start + 1)
1015            in
1016            if is_key_context then begin
1017              (* Treat as bare key *)
1018              while not (is_eof l) && is_bare_key_char (get_current l) do
1019                advance l
1020              done;
1021              Tok_bare_key (sub_string l start (l.pos - start))
1022            end else
1023              parse_number l
1024        | Some 'i' ->
1025            (* Check for inf *)
1026            if l.pos + 3 < l.input_len &&
1027               get_char l (l.pos + 1) = 'i' && get_char l (l.pos + 2) = 'n' && get_char l (l.pos + 3) = 'f' then begin
1028              advance_n l 4;
1029              let s = sub_string l start (l.pos - start) in
1030              if sign = '-' then Tok_float (Float.neg_infinity, s)
1031              else Tok_float (Float.infinity, s)
1032            end else if sign = '-' then begin
1033              (* Could be bare key like -inf-key *)
1034              while not (is_eof l) && is_bare_key_char (get_current l) do
1035                advance l
1036              done;
1037              Tok_bare_key (sub_string l start (l.pos - start))
1038            end else
1039              failwith (Printf.sprintf "Unexpected character after %c" sign)
1040        | Some 'n' ->
1041            (* Check for nan *)
1042            if l.pos + 3 < l.input_len &&
1043               get_char l (l.pos + 1) = 'n' && get_char l (l.pos + 2) = 'a' && get_char l (l.pos + 3) = 'n' then begin
1044              advance_n l 4;
1045              let s = sub_string l start (l.pos - start) in
1046              Tok_float (Float.nan, s)  (* Sign on NaN doesn't change the value *)
1047            end else if sign = '-' then begin
1048              (* Could be bare key like -name *)
1049              while not (is_eof l) && is_bare_key_char (get_current l) do
1050                advance l
1051              done;
1052              Tok_bare_key (sub_string l start (l.pos - start))
1053            end else
1054              failwith (Printf.sprintf "Unexpected character after %c" sign)
1055        | _ when sign = '-' ->
1056            (* Bare key starting with - like -key or --- *)
1057            while not (is_eof l) && is_bare_key_char (get_current l) do
1058              advance l
1059            done;
1060            Tok_bare_key (sub_string l start (l.pos - start))
1061        | _ -> failwith (Printf.sprintf "Unexpected character after %c" sign))
1062    | c when is_digit c ->
1063        (* Could be number, datetime, or bare key starting with digits *)
1064        (match looks_like_datetime l with
1065        | `Date -> parse_datetime l
1066        | `Time -> parse_time l
1067        | `Other ->
1068            (* Check for hex/octal/binary prefix first - these are always numbers *)
1069            let start = l.pos in
1070            let is_prefixed_number =
1071              start + 1 < l.input_len && get_char l start = '0' &&
1072              (let c1 = get_char l (start + 1) in
1073               c1 = 'x' || c1 = 'X' || c1 = 'o' || c1 = 'O' || c1 = 'b' || c1 = 'B')
1074            in
1075            if is_prefixed_number then
1076              parse_number l
1077            else begin
1078              (* Check if this is a bare key:
1079                 - Contains letters (like "123abc")
1080                 - Has leading zeros (like "0123") which would be invalid as a number *)
1081              let has_leading_zero =
1082                get_char l start = '0' && start + 1 < l.input_len &&
1083                let c1 = get_char l (start + 1) in
1084                is_digit c1
1085              in
1086              (* Scan to see if this is a bare key or a number
1087                 - If it looks like scientific notation (digits + e/E + optional sign + digits), it's a number
1088                 - If it contains letters OR dashes between digits, it's a bare key *)
1089              let rec scan_for_bare_key pos has_dash_between_digits =
1090                if pos >= l.input_len then has_dash_between_digits
1091                else
1092                  let c = get_char l pos in
1093                  if is_digit c || c = '_' then scan_for_bare_key (pos + 1) has_dash_between_digits
1094                  else if c = '.' then scan_for_bare_key (pos + 1) has_dash_between_digits
1095                  else if c = '-' then
1096                    (* Dash in key - check what follows *)
1097                    let next_pos = pos + 1 in
1098                    if next_pos < l.input_len then
1099                      let next = get_char l next_pos in
1100                      if is_digit next then
1101                        scan_for_bare_key (next_pos) true  (* Dash between digits - bare key *)
1102                      else if is_bare_key_char next then
1103                        true  (* Dash followed by letter - definitely bare key like 2000-datetime *)
1104                      else
1105                        has_dash_between_digits  (* End of sequence *)
1106                    else
1107                      has_dash_between_digits  (* End of input *)
1108                  else if c = 'e' || c = 'E' then
1109                    (* Check if this looks like scientific notation *)
1110                    let next_pos = pos + 1 in
1111                    if next_pos >= l.input_len then true  (* Just 'e' at end, bare key *)
1112                    else
1113                      let next = get_char l next_pos in
1114                      if next = '+' || next = '-' then
1115                        (* Has exponent sign - check if followed by digit *)
1116                        let after_sign = next_pos + 1 in
1117                        if after_sign < l.input_len && is_digit (get_char l after_sign) then
1118                          has_dash_between_digits  (* Scientific notation, but might have dash earlier *)
1119                        else
1120                          true  (* e.g., "3e-abc" - bare key *)
1121                      else if is_digit next then
1122                        has_dash_between_digits  (* Scientific notation like 3e2, but check if had dash earlier *)
1123                      else
1124                        true  (* e.g., "3eabc" - bare key *)
1125                  else if is_bare_key_char c then
1126                    (* It's a letter - this is a bare key *)
1127                    true
1128                  else has_dash_between_digits
1129              in
1130              if has_leading_zero || scan_for_bare_key start false then begin
1131                (* It's a bare key *)
1132                while not (is_eof l) && is_bare_key_char (get_current l) do
1133                  advance l
1134                done;
1135                Tok_bare_key (sub_string l start (l.pos - start))
1136              end else
1137                (* It's a number - use parse_number *)
1138                parse_number l
1139            end)
1140    | c when c = 't' || c = 'f' || c = 'i' || c = 'n' ->
1141        (* These could be keywords (true, false, inf, nan) or bare keys
1142           Always read as bare key and let parser interpret *)
1143        let start = l.pos in
1144        while not (is_eof l) && is_bare_key_char (get_current l) do
1145          advance l
1146        done;
1147        Tok_bare_key (sub_string l start (l.pos - start))
1148    | c when is_bare_key_char c ->
1149        let start = l.pos in
1150        while not (is_eof l) && is_bare_key_char (get_current l) do
1151          advance l
1152        done;
1153        Tok_bare_key (sub_string l start (l.pos - start))
1154    | c ->
1155        let code = Char.code c in
1156        if code < 0x20 || code = 0x7F then
1157          failwith (Printf.sprintf "Control character U+%04X not allowed at line %d" code l.line)
1158        else
1159          failwith (Printf.sprintf "Unexpected character '%c' at line %d, column %d" c l.line l.col)
1160  end
1161
1162(* Parser *)
1163
1164type parser = {
1165  lexer : lexer;
1166  mutable current : token;
1167  mutable peeked : bool;
1168}
1169
1170let make_parser lexer =
1171  { lexer; current = Tok_eof; peeked = false }
1172
1173let peek_token p =
1174  if not p.peeked then begin
1175    p.current <- next_token p.lexer;
1176    p.peeked <- true
1177  end;
1178  p.current
1179
1180let consume_token p =
1181  let tok = peek_token p in
1182  p.peeked <- false;
1183  tok
1184
1185(* Check if next raw character (without skipping whitespace) matches *)
1186let next_raw_char_is p c =
1187  p.lexer.pos < p.lexer.input_len && get_char p.lexer p.lexer.pos = c
1188
1189let expect_token p expected =
1190  let tok = consume_token p in
1191  if tok <> expected then
1192    failwith (Printf.sprintf "Expected %s" (match expected with
1193      | Tok_equals -> "="
1194      | Tok_rbracket -> "]"
1195      | Tok_rbrace -> "}"
1196      | Tok_newline -> "newline"
1197      | _ -> "token"))
1198
1199let skip_newlines p =
1200  while peek_token p = Tok_newline do
1201    ignore (consume_token p)
1202  done
1203
1204(* Parse a single key segment (bare, basic string, literal string, or integer) *)
1205(* Note: Tok_float is handled specially in parse_dotted_key *)
1206let parse_key_segment p =
1207  match peek_token p with
1208  | Tok_bare_key s -> ignore (consume_token p); [s]
1209  | Tok_basic_string s -> ignore (consume_token p); [s]
1210  | Tok_literal_string s -> ignore (consume_token p); [s]
1211  | Tok_integer (_i, orig_str) -> ignore (consume_token p); [orig_str]
1212  | Tok_float (f, orig_str) ->
1213      (* Float in key context - use original string to preserve exact key parts *)
1214      ignore (consume_token p);
1215      if Float.is_nan f then ["nan"]
1216      else if f = Float.infinity then ["inf"]
1217      else if f = Float.neg_infinity then ["-inf"]
1218      else begin
1219        (* Remove underscores from original string and split on dot *)
1220        let s = String.concat "" (String.split_on_char '_' orig_str) in
1221        if String.contains s 'e' || String.contains s 'E' then
1222          (* Has exponent, treat as single key *)
1223          [s]
1224        else if String.contains s '.' then
1225          (* Split on decimal point for dotted key *)
1226          String.split_on_char '.' s
1227        else
1228          (* No decimal point, single integer key *)
1229          [s]
1230      end
1231  | Tok_date_local s -> ignore (consume_token p); [s]
1232  | Tok_datetime s -> ignore (consume_token p); [s]
1233  | Tok_datetime_local s -> ignore (consume_token p); [s]
1234  | Tok_time_local s -> ignore (consume_token p); [s]
1235  | Tok_ml_basic_string _ -> failwith "Multiline strings are not allowed as keys"
1236  | Tok_ml_literal_string _ -> failwith "Multiline strings are not allowed as keys"
1237  | _ -> failwith "Expected key"
1238
1239(* Parse a dotted key - returns list of key strings *)
1240let parse_dotted_key p =
1241  let first_keys = parse_key_segment p in
1242  let rec loop acc =
1243    match peek_token p with
1244    | Tok_dot ->
1245        ignore (consume_token p);
1246        let keys = parse_key_segment p in
1247        loop (List.rev_append keys acc)
1248    | _ -> List.rev acc
1249  in
1250  let rest = loop [] in
1251  first_keys @ rest
1252
1253let rec parse_value p =
1254  match peek_token p with
1255  | Tok_basic_string s -> ignore (consume_token p); Toml.String s
1256  | Tok_literal_string s -> ignore (consume_token p); Toml.String s
1257  | Tok_ml_basic_string s -> ignore (consume_token p); Toml.String s
1258  | Tok_ml_literal_string s -> ignore (consume_token p); Toml.String s
1259  | Tok_integer (i, _) -> ignore (consume_token p); Toml.Int i
1260  | Tok_float (f, _) -> ignore (consume_token p); Toml.Float f
1261  | Tok_datetime s -> ignore (consume_token p); Toml.Datetime s
1262  | Tok_datetime_local s -> ignore (consume_token p); Toml.Datetime_local s
1263  | Tok_date_local s -> ignore (consume_token p); Toml.Date_local s
1264  | Tok_time_local s -> ignore (consume_token p); Toml.Time_local s
1265  | Tok_lbracket -> parse_array p
1266  | Tok_lbrace -> parse_inline_table p
1267  | Tok_bare_key s ->
1268      (* Interpret bare keys as boolean, float keywords, or numbers in value context *)
1269      ignore (consume_token p);
1270      (match s with
1271      | "true" -> Bool true
1272      | "false" -> Bool false
1273      | "inf" -> Float Float.infinity
1274      | "nan" -> Float Float.nan
1275      | _ ->
1276          (* Validate underscore placement in the original string *)
1277          let validate_underscores str =
1278            let len = String.length str in
1279            if len > 0 && str.[0] = '_' then
1280              failwith "Leading underscore not allowed in number";
1281            if len > 0 && str.[len - 1] = '_' then
1282              failwith "Trailing underscore not allowed in number";
1283            for i = 0 to len - 2 do
1284              if str.[i] = '_' && str.[i + 1] = '_' then
1285                failwith "Double underscore not allowed in number";
1286              (* Underscore must be between digits (not next to 'e', 'E', '.', 'x', 'o', 'b', etc.) *)
1287              if str.[i] = '_' then begin
1288                let prev = if i > 0 then Some str.[i - 1] else None in
1289                let next = Some str.[i + 1] in
1290                let is_digit_char c = c >= '0' && c <= '9' in
1291                let is_hex_char c = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') in
1292                (* For hex numbers, underscore can be between hex digits *)
1293                let has_hex_prefix = len > 2 && str.[0] = '0' && (str.[1] = 'x' || str.[1] = 'X') in
1294                match prev, next with
1295                | Some p, Some n when has_hex_prefix && is_hex_char p && is_hex_char n -> ()
1296                | Some p, Some n when is_digit_char p && is_digit_char n -> ()
1297                | _ -> failwith "Underscore must be between digits"
1298              end
1299            done
1300          in
1301          validate_underscores s;
1302          (* Try to parse as a number - bare keys like "10e3" should be floats *)
1303          let s_no_underscore = String.concat "" (String.split_on_char '_' s) in
1304          let len = String.length s_no_underscore in
1305          if len > 0 then
1306            let c0 = s_no_underscore.[0] in
1307            (* Must start with digit for it to be a number in value context *)
1308            if c0 >= '0' && c0 <= '9' then begin
1309              (* Check for leading zeros *)
1310              if len > 1 && c0 = '0' && s_no_underscore.[1] >= '0' && s_no_underscore.[1] <= '9' then
1311                failwith "Leading zeros not allowed"
1312              else
1313                try
1314                  (* Try to parse as float (handles scientific notation) *)
1315                  if String.contains s_no_underscore '.' ||
1316                     String.contains s_no_underscore 'e' ||
1317                     String.contains s_no_underscore 'E' then
1318                    Toml.Float (float_of_string s_no_underscore)
1319                  else
1320                    Toml.Int (Int64.of_string s_no_underscore)
1321                with _ ->
1322                  failwith (Printf.sprintf "Unexpected bare key '%s' as value" s)
1323            end else
1324              failwith (Printf.sprintf "Unexpected bare key '%s' as value" s)
1325          else
1326            failwith (Printf.sprintf "Unexpected bare key '%s' as value" s))
1327  | _ -> failwith "Expected value"
1328
1329and parse_array p =
1330  ignore (consume_token p); (* [ *)
1331  skip_newlines p;
1332  let rec loop acc =
1333    match peek_token p with
1334    | Tok_rbracket ->
1335        ignore (consume_token p);
1336        Toml.Array (List.rev acc)
1337    | _ ->
1338        let v = parse_value p in
1339        skip_newlines p;
1340        match peek_token p with
1341        | Tok_comma ->
1342            ignore (consume_token p);
1343            skip_newlines p;
1344            loop (v :: acc)
1345        | Tok_rbracket ->
1346            ignore (consume_token p);
1347            Toml.Array (List.rev (v :: acc))
1348        | _ -> failwith "Expected ',' or ']' in array"
1349  in
1350  loop []
1351
1352and parse_inline_table p =
1353  ignore (consume_token p); (* { *)
1354  skip_newlines p;
1355  (* Track explicitly defined keys - can't be extended with dotted keys *)
1356  let defined_inline = ref [] in
1357  let rec loop acc =
1358    match peek_token p with
1359    | Tok_rbrace ->
1360        ignore (consume_token p);
1361        Toml.Table (List.rev acc)
1362    | _ ->
1363        let keys = parse_dotted_key p in
1364        skip_ws p;
1365        expect_token p Tok_equals;
1366        skip_ws p;
1367        let v = parse_value p in
1368        (* Check if trying to extend a previously-defined inline table *)
1369        (match keys with
1370        | first_key :: _ :: _ ->
1371            (* Multi-key dotted path - check if first key is already defined *)
1372            if List.mem first_key !defined_inline then
1373              failwith (Printf.sprintf "Cannot extend inline table '%s' with dotted key" first_key)
1374        | _ -> ());
1375        (* If this is a direct assignment to a key, track it *)
1376        (match keys with
1377        | [k] ->
1378            if List.mem k !defined_inline then
1379              failwith (Printf.sprintf "Duplicate key '%s' in inline table" k);
1380            defined_inline := k :: !defined_inline
1381        | _ -> ());
1382        let entry = build_nested_table keys v in
1383        (* Merge the entry with existing entries (for dotted keys with common prefix) *)
1384        let acc = merge_entry_into_table acc entry in
1385        skip_newlines p;
1386        match peek_token p with
1387        | Tok_comma ->
1388            ignore (consume_token p);
1389            skip_newlines p;
1390            loop acc
1391        | Tok_rbrace ->
1392            ignore (consume_token p);
1393            Toml.Table (List.rev acc)
1394        | _ -> failwith "Expected ',' or '}' in inline table"
1395  in
1396  loop []
1397
1398and skip_ws _p =
1399  (* Skip whitespace in token stream - handled by lexer but needed for lookahead *)
1400  ()
1401
1402and build_nested_table keys value =
1403  match keys with
1404  | [] -> failwith "Empty key"
1405  | [k] -> (k, value)
1406  | k :: rest ->
1407      (k, Toml.Table [build_nested_table rest value])
1408
1409(* Merge two TOML values - used for combining dotted keys in inline tables *)
1410and merge_toml_values v1 v2 =
1411  match v1, v2 with
1412  | Toml.Table entries1, Toml.Table entries2 ->
1413      (* Merge the entries *)
1414      let merged = List.fold_left (fun acc (k, v) ->
1415        match List.assoc_opt k acc with
1416        | Some existing ->
1417            (* Key exists - try to merge if both are tables *)
1418            let merged_v = merge_toml_values existing v in
1419            (k, merged_v) :: List.remove_assoc k acc
1420        | None ->
1421            (k, v) :: acc
1422      ) entries1 entries2 in
1423      Toml.Table (List.rev merged)
1424  | _, _ ->
1425      (* Can't merge non-table values with same key *)
1426      failwith "Conflicting keys in inline table"
1427
1428(* Merge a single entry into an existing table *)
1429and merge_entry_into_table entries (k, v) =
1430  match List.assoc_opt k entries with
1431  | Some existing ->
1432      let merged_v = merge_toml_values existing v in
1433      (k, merged_v) :: List.remove_assoc k entries
1434  | None ->
1435      (k, v) :: entries
1436
1437let validate_datetime_string s =
1438  (* Parse and validate date portion *)
1439  if String.length s >= 10 then begin
1440    let year = int_of_string (String.sub s 0 4) in
1441    let month = int_of_string (String.sub s 5 2) in
1442    let day = int_of_string (String.sub s 8 2) in
1443    validate_date year month day;
1444    (* Parse and validate time portion if present *)
1445    if String.length s >= 16 then begin
1446      let time_start = if s.[10] = 'T' || s.[10] = 't' || s.[10] = ' ' then 11 else 10 in
1447      let hour = int_of_string (String.sub s time_start 2) in
1448      let minute = int_of_string (String.sub s (time_start + 3) 2) in
1449      let second =
1450        if String.length s >= time_start + 8 && s.[time_start + 5] = ':' then
1451          int_of_string (String.sub s (time_start + 6) 2)
1452        else 0
1453      in
1454      validate_time hour minute second
1455    end
1456  end
1457
1458let validate_date_string s =
1459  if String.length s >= 10 then begin
1460    let year = int_of_string (String.sub s 0 4) in
1461    let month = int_of_string (String.sub s 5 2) in
1462    let day = int_of_string (String.sub s 8 2) in
1463    validate_date year month day
1464  end
1465
1466let validate_time_string s =
1467  if String.length s >= 5 then begin
1468    let hour = int_of_string (String.sub s 0 2) in
1469    let minute = int_of_string (String.sub s 3 2) in
1470    let second =
1471      if String.length s >= 8 && s.[5] = ':' then
1472        int_of_string (String.sub s 6 2)
1473      else 0
1474    in
1475    validate_time hour minute second
1476  end
1477
1478(* Table management for the parser *)
1479type table_state = {
1480  mutable values : (string * Toml.t) list;
1481  subtables : (string, table_state) Hashtbl.t;
1482  mutable is_array : bool;
1483  mutable is_inline : bool;
1484  mutable defined : bool;  (* Has this table been explicitly defined with [table]? *)
1485  mutable closed : bool;   (* Closed to extension via dotted keys from parent *)
1486  mutable array_elements : table_state list;  (* For arrays of tables *)
1487}
1488
1489let create_table_state () = {
1490  values = [];
1491  subtables = Hashtbl.create 16;
1492  is_array = false;
1493  is_inline = false;
1494  defined = false;
1495  closed = false;
1496  array_elements = [];
1497}
1498
1499let rec get_or_create_table state keys create_intermediate =
1500  match keys with
1501  | [] -> state
1502  | [k] ->
1503      (* Check if key exists as a value *)
1504      if List.mem_assoc k state.values then
1505        failwith (Printf.sprintf "Cannot use value '%s' as a table" k);
1506      (match Hashtbl.find_opt state.subtables k with
1507      | Some sub -> sub
1508      | None ->
1509          let sub = create_table_state () in
1510          Hashtbl.add state.subtables k sub;
1511          sub)
1512  | k :: rest ->
1513      (* Check if key exists as a value *)
1514      if List.mem_assoc k state.values then
1515        failwith (Printf.sprintf "Cannot use value '%s' as a table" k);
1516      let sub = match Hashtbl.find_opt state.subtables k with
1517        | Some sub -> sub
1518        | None ->
1519            let sub = create_table_state () in
1520            Hashtbl.add state.subtables k sub;
1521            sub
1522      in
1523      if create_intermediate && not sub.defined then
1524        sub.defined <- false;  (* Mark as implicitly defined *)
1525      get_or_create_table sub rest create_intermediate
1526
1527(* Like get_or_create_table but marks tables as defined (for dotted keys) *)
1528(* Dotted keys mark tables as "defined" (can't re-define with [table]) but not "closed" *)
1529let rec get_or_create_table_for_dotted_key state keys =
1530  match keys with
1531  | [] -> state
1532  | [k] ->
1533      (* Check if key exists as a value *)
1534      if List.mem_assoc k state.values then
1535        failwith (Printf.sprintf "Cannot use value '%s' as a table" k);
1536      (match Hashtbl.find_opt state.subtables k with
1537      | Some sub ->
1538          (* Check if it's an array of tables (can't extend with dotted keys) *)
1539          if sub.is_array then
1540            failwith (Printf.sprintf "Cannot extend array of tables '%s' using dotted keys" k);
1541          (* Check if it's closed (explicitly defined with [table] header) *)
1542          if sub.closed then
1543            failwith (Printf.sprintf "Cannot extend table '%s' using dotted keys" k);
1544          if sub.is_inline then
1545            failwith (Printf.sprintf "Cannot extend inline table '%s'" k);
1546          (* Mark as defined by dotted key *)
1547          sub.defined <- true;
1548          sub
1549      | None ->
1550          let sub = create_table_state () in
1551          sub.defined <- true;  (* Mark as defined by dotted key *)
1552          Hashtbl.add state.subtables k sub;
1553          sub)
1554  | k :: rest ->
1555      (* Check if key exists as a value *)
1556      if List.mem_assoc k state.values then
1557        failwith (Printf.sprintf "Cannot use value '%s' as a table" k);
1558      let sub = match Hashtbl.find_opt state.subtables k with
1559        | Some sub ->
1560            (* Check if it's an array of tables (can't extend with dotted keys) *)
1561            if sub.is_array then
1562              failwith (Printf.sprintf "Cannot extend array of tables '%s' using dotted keys" k);
1563            if sub.closed then
1564              failwith (Printf.sprintf "Cannot extend table '%s' using dotted keys" k);
1565            if sub.is_inline then
1566              failwith (Printf.sprintf "Cannot extend inline table '%s'" k);
1567            (* Mark as defined by dotted key *)
1568            sub.defined <- true;
1569            sub
1570        | None ->
1571            let sub = create_table_state () in
1572            sub.defined <- true;  (* Mark as defined by dotted key *)
1573            Hashtbl.add state.subtables k sub;
1574            sub
1575      in
1576      get_or_create_table_for_dotted_key sub rest
1577
1578let rec table_state_to_toml state =
1579  let subtable_values = Hashtbl.fold (fun k sub acc ->
1580    let v =
1581      if sub.is_array then
1582        Toml.Array (List.map table_state_to_toml (get_array_elements sub))
1583      else
1584        table_state_to_toml sub
1585    in
1586    (k, v) :: acc
1587  ) state.subtables [] in
1588  Toml.Table (List.rev state.values @ subtable_values)
1589
1590and get_array_elements state =
1591  List.rev state.array_elements
1592
1593(* Main parser function *)
1594let parse_toml_from_lexer lexer =
1595  let parser = make_parser lexer in
1596  let root = create_table_state () in
1597  let current_table = ref root in
1598  (* Stack of array contexts: (full_path, parent_state, array_container) *)
1599  (* parent_state is where the array lives, array_container is the array table itself *)
1600  let array_context_stack = ref ([] : (string list * table_state * table_state) list) in
1601
1602  (* Check if keys has a prefix matching the given path *)
1603  let rec has_prefix keys prefix =
1604    match keys, prefix with
1605    | _, [] -> true
1606    | [], _ -> false
1607    | k :: krest, p :: prest -> k = p && has_prefix krest prest
1608  in
1609
1610  (* Remove prefix from keys *)
1611  let rec remove_prefix keys prefix =
1612    match keys, prefix with
1613    | ks, [] -> ks
1614    | [], _ -> []
1615    | _ :: krest, _ :: prest -> remove_prefix krest prest
1616  in
1617
1618  (* Find matching array context for the given keys *)
1619  let find_array_context keys =
1620    (* Stack is newest-first, so first match is the innermost (longest) prefix *)
1621    let rec find stack =
1622      match stack with
1623      | [] -> None
1624      | (path, parent, container) :: rest ->
1625          if keys = path then
1626            (* Exact match - adding sibling element *)
1627            Some (`Sibling (path, parent, container))
1628          else if has_prefix keys path && List.length keys > List.length path then
1629            (* Proper prefix - nested table/array within current element *)
1630            let current_entry = List.hd container.array_elements in
1631            Some (`Nested (path, current_entry))
1632          else
1633            find rest
1634    in
1635    find !array_context_stack
1636  in
1637
1638  (* Pop array contexts that are no longer valid for the given keys *)
1639  let rec pop_invalid_contexts keys =
1640    match !array_context_stack with
1641    | [] -> ()
1642    | (path, _, _) :: rest ->
1643        if not (has_prefix keys path) then begin
1644          array_context_stack := rest;
1645          pop_invalid_contexts keys
1646        end
1647  in
1648
1649  let rec parse_document () =
1650    skip_newlines parser;
1651    match peek_token parser with
1652    | Tok_eof -> ()
1653    | Tok_lbracket ->
1654        (* Check for array of tables [[...]] vs table [...] *)
1655        ignore (consume_token parser);
1656        (* For [[, the two brackets must be adjacent (no whitespace) *)
1657        let is_adjacent_bracket = next_raw_char_is parser '[' in
1658        (match peek_token parser with
1659        | Tok_lbracket when not is_adjacent_bracket ->
1660            (* The next [ was found after whitespace - this is invalid syntax like [ [table]] *)
1661            failwith "Invalid table header syntax"
1662        | Tok_lbracket ->
1663            (* Array of tables - brackets are adjacent *)
1664            ignore (consume_token parser);
1665            let keys = parse_dotted_key parser in
1666            expect_token parser Tok_rbracket;
1667            (* Check that closing ]] are adjacent (no whitespace) *)
1668            if not (next_raw_char_is parser ']') then
1669              failwith "Invalid array of tables syntax (space in ]])";
1670            expect_token parser Tok_rbracket;
1671            skip_to_newline parser;
1672            (* Pop contexts that are no longer valid for these keys *)
1673            pop_invalid_contexts keys;
1674            (* Check array context for this path *)
1675            (match find_array_context keys with
1676            | Some (`Sibling (path, _parent, container)) ->
1677                (* Adding another element to an existing array *)
1678                let new_entry = create_table_state () in
1679                container.array_elements <- new_entry :: container.array_elements;
1680                current_table := new_entry;
1681                (* Update the stack entry with new current element (by re-adding) *)
1682                array_context_stack := List.map (fun (p, par, cont) ->
1683                  if p = path then (p, par, cont) else (p, par, cont)
1684                ) !array_context_stack
1685            | Some (`Nested (parent_path, parent_entry)) ->
1686                (* Sub-array within current array element *)
1687                let relative_keys = remove_prefix keys parent_path in
1688                let array_table = get_or_create_table parent_entry relative_keys true in
1689                (* Check if trying to convert a non-array table to array *)
1690                if array_table.defined && not array_table.is_array then
1691                  failwith (Printf.sprintf "Cannot define '%s' as array of tables; already defined as table" (String.concat "." keys));
1692                if (array_table.values <> [] || Hashtbl.length array_table.subtables > 0) && not array_table.is_array then
1693                  failwith (Printf.sprintf "Cannot define '%s' as array of tables; already has content" (String.concat "." keys));
1694                array_table.is_array <- true;
1695                let new_entry = create_table_state () in
1696                array_table.array_elements <- new_entry :: array_table.array_elements;
1697                current_table := new_entry;
1698                (* Push new context for the nested array *)
1699                array_context_stack := (keys, parent_entry, array_table) :: !array_context_stack
1700            | None ->
1701                (* Top-level array *)
1702                let array_table = get_or_create_table root keys true in
1703                (* Check if trying to convert a non-array table to array *)
1704                if array_table.defined && not array_table.is_array then
1705                  failwith (Printf.sprintf "Cannot define '%s' as array of tables; already defined as table" (String.concat "." keys));
1706                if (array_table.values <> [] || Hashtbl.length array_table.subtables > 0) && not array_table.is_array then
1707                  failwith (Printf.sprintf "Cannot define '%s' as array of tables; already has content" (String.concat "." keys));
1708                array_table.is_array <- true;
1709                let entry = create_table_state () in
1710                array_table.array_elements <- entry :: array_table.array_elements;
1711                current_table := entry;
1712                (* Push context for this array *)
1713                array_context_stack := (keys, root, array_table) :: !array_context_stack);
1714            parse_document ()
1715        | _ ->
1716            (* Regular table *)
1717            let keys = parse_dotted_key parser in
1718            expect_token parser Tok_rbracket;
1719            skip_to_newline parser;
1720            (* Pop contexts that are no longer valid for these keys *)
1721            pop_invalid_contexts keys;
1722            (* Check if this table is relative to a current array element *)
1723            (match find_array_context keys with
1724            | Some (`Nested (parent_path, parent_entry)) ->
1725                let relative_keys = remove_prefix keys parent_path in
1726                if relative_keys <> [] then begin
1727                  let table = get_or_create_table parent_entry relative_keys true in
1728                  if table.is_array then
1729                    failwith (Printf.sprintf "Cannot define '%s' as table; already defined as array of tables" (String.concat "." keys));
1730                  if table.defined then
1731                    failwith (Printf.sprintf "Table '%s' already defined" (String.concat "." keys));
1732                  table.defined <- true;
1733                  table.closed <- true;  (* Can't extend via dotted keys from parent *)
1734                  current_table := table
1735                end else begin
1736                  (* Keys equal parent_path - shouldn't happen for regular tables *)
1737                  let table = get_or_create_table root keys true in
1738                  if table.is_array then
1739                    failwith (Printf.sprintf "Cannot define '%s' as table; already defined as array of tables" (String.concat "." keys));
1740                  if table.defined then
1741                    failwith (Printf.sprintf "Table '%s' already defined" (String.concat "." keys));
1742                  table.defined <- true;
1743                  table.closed <- true;  (* Can't extend via dotted keys from parent *)
1744                  current_table := table
1745                end
1746            | Some (`Sibling (_, _, container)) ->
1747                (* Exact match to an array of tables path - can't define as regular table *)
1748                if container.is_array then
1749                  failwith (Printf.sprintf "Cannot define '%s' as table; already defined as array of tables" (String.concat "." keys));
1750                (* Shouldn't reach here normally *)
1751                let table = get_or_create_table root keys true in
1752                if table.defined then
1753                  failwith (Printf.sprintf "Table '%s' already defined" (String.concat "." keys));
1754                table.defined <- true;
1755                table.closed <- true;
1756                current_table := table
1757            | None ->
1758                (* Not in an array context *)
1759                let table = get_or_create_table root keys true in
1760                if table.is_array then
1761                  failwith (Printf.sprintf "Cannot define '%s' as table; already defined as array of tables" (String.concat "." keys));
1762                if table.defined then
1763                  failwith (Printf.sprintf "Table '%s' already defined" (String.concat "." keys));
1764                table.defined <- true;
1765                table.closed <- true;  (* Can't extend via dotted keys from parent *)
1766                current_table := table;
1767                (* Clear array context stack if we left all array contexts *)
1768                if not (List.exists (fun (p, _, _) -> has_prefix keys p) !array_context_stack) then
1769                  array_context_stack := []);
1770            parse_document ())
1771    | Tok_bare_key _ | Tok_basic_string _ | Tok_literal_string _
1772    | Tok_integer _ | Tok_float _ | Tok_date_local _ | Tok_datetime _
1773    | Tok_datetime_local _ | Tok_time_local _ ->
1774        (* Key-value pair - key can be bare, quoted, or numeric *)
1775        let keys = parse_dotted_key parser in
1776        expect_token parser Tok_equals;
1777        let value = parse_value parser in
1778        skip_to_newline parser;
1779        (* Add value to current table - check for duplicates first *)
1780        let add_value_to_table tbl key v =
1781          if List.mem_assoc key tbl.values then
1782            failwith (Printf.sprintf "Duplicate key: %s" key);
1783          (match Hashtbl.find_opt tbl.subtables key with
1784          | Some sub ->
1785              if sub.is_array then
1786                failwith (Printf.sprintf "Cannot redefine array of tables '%s' as a value" key)
1787              else
1788                failwith (Printf.sprintf "Cannot redefine table '%s' as a value" key)
1789          | None -> ());
1790          tbl.values <- (key, v) :: tbl.values
1791        in
1792        (match keys with
1793        | [] -> failwith "Empty key"
1794        | [k] ->
1795            add_value_to_table !current_table k value
1796        | _ ->
1797            let parent_keys = List.rev (List.tl (List.rev keys)) in
1798            let final_key = List.hd (List.rev keys) in
1799            (* Use get_or_create_table_for_dotted_key to check for closed tables *)
1800            let parent = get_or_create_table_for_dotted_key !current_table parent_keys in
1801            add_value_to_table parent final_key value);
1802        parse_document ()
1803    | _tok ->
1804        failwith (Printf.sprintf "Unexpected token at line %d" parser.lexer.line)
1805
1806  and skip_to_newline parser =
1807    skip_ws_and_comments parser.lexer;
1808    match peek_token parser with
1809    | Tok_newline -> ignore (consume_token parser)
1810    | Tok_eof -> ()
1811    | _ -> failwith "Expected newline after value"
1812  in
1813
1814  parse_document ();
1815  table_state_to_toml root
1816
1817(* Parse TOML from string - creates lexer internally *)
1818let parse_toml input =
1819  let lexer = make_lexer input in
1820  parse_toml_from_lexer lexer
1821
1822(* Parse TOML directly from Bytes.Reader - no intermediate string *)
1823let parse_toml_from_reader ?file r =
1824  let lexer = make_lexer_from_reader ?file r in
1825  parse_toml_from_lexer lexer
1826
1827(* Convert TOML to tagged JSON for toml-test compatibility *)
1828let rec toml_to_tagged_json value =
1829  match value with
1830  | Toml.String s ->
1831      Printf.sprintf "{\"type\":\"string\",\"value\":%s}" (json_encode_string s)
1832  | Toml.Int i ->
1833      Printf.sprintf "{\"type\":\"integer\",\"value\":\"%Ld\"}" i
1834  | Toml.Float f ->
1835      let value_str =
1836        (* Normalize exponent format - lowercase e, keep + for positive exponents *)
1837        let format_exp s =
1838          let buf = Buffer.create (String.length s + 1) in
1839          let i = ref 0 in
1840          while !i < String.length s do
1841            let c = s.[!i] in
1842            if c = 'E' then begin
1843              Buffer.add_char buf 'e';
1844              (* Add + if next char is a digit (no sign present) *)
1845              if !i + 1 < String.length s then begin
1846                let next = s.[!i + 1] in
1847                if next >= '0' && next <= '9' then
1848                  Buffer.add_char buf '+'
1849              end
1850            end else if c = 'e' then begin
1851              Buffer.add_char buf 'e';
1852              (* Add + if next char is a digit (no sign present) *)
1853              if !i + 1 < String.length s then begin
1854                let next = s.[!i + 1] in
1855                if next >= '0' && next <= '9' then
1856                  Buffer.add_char buf '+'
1857              end
1858            end else
1859              Buffer.add_char buf c;
1860            incr i
1861          done;
1862          Buffer.contents buf
1863        in
1864        if Float.is_nan f then "nan"
1865        else if f = Float.infinity then "inf"
1866        else if f = Float.neg_infinity then "-inf"
1867        else if f = 0.0 then
1868          (* Special case for zero - output "0" or "-0" *)
1869          if 1.0 /. f = Float.neg_infinity then "-0" else "0"
1870        else if Float.is_integer f then
1871          (* Integer floats - decide on representation *)
1872          let abs_f = Float.abs f in
1873          if abs_f = 9007199254740991.0 then
1874            (* Exact max safe integer - output without .0 per toml-test expectation *)
1875            Printf.sprintf "%.0f" f
1876          else if abs_f >= 1e6 then
1877            (* Use scientific notation for numbers >= 1e6 *)
1878            (* Start with precision 0 to get XeN format (integer mantissa) *)
1879            let rec try_exp_precision prec =
1880              if prec > 17 then format_exp (Printf.sprintf "%.17e" f)
1881              else
1882                let s = format_exp (Printf.sprintf "%.*e" prec f) in
1883                if float_of_string s = f then s
1884                else try_exp_precision (prec + 1)
1885            in
1886            try_exp_precision 0
1887          else if abs_f >= 2.0 then
1888            (* Integer floats >= 2 - output with .0 suffix *)
1889            Printf.sprintf "%.1f" f
1890          else
1891            (* Integer floats 0, 1, -1 - output without .0 suffix *)
1892            Printf.sprintf "%.0f" f
1893        else
1894          (* Non-integer float *)
1895          let abs_f = Float.abs f in
1896          let use_scientific = abs_f >= 1e10 || (abs_f < 1e-4 && abs_f > 0.0) in
1897          if use_scientific then
1898            let rec try_exp_precision prec =
1899              if prec > 17 then format_exp (Printf.sprintf "%.17e" f)
1900              else
1901                let s = format_exp (Printf.sprintf "%.*e" prec f) in
1902                if float_of_string s = f then s
1903                else try_exp_precision (prec + 1)
1904            in
1905            try_exp_precision 1
1906          else
1907            (* Prefer decimal notation for reasonable range *)
1908            (* Try shortest decimal first *)
1909            let rec try_decimal_precision prec =
1910              if prec > 17 then None
1911              else
1912                let s = Printf.sprintf "%.*f" prec f in
1913                (* Remove trailing zeros but keep at least one decimal place *)
1914                let s =
1915                  let len = String.length s in
1916                  let dot_pos = try String.index s '.' with Not_found -> len in
1917                  let rec find_last_nonzero i =
1918                    if i <= dot_pos then dot_pos + 2  (* Keep at least X.0 *)
1919                    else if s.[i] <> '0' then i + 1
1920                    else find_last_nonzero (i - 1)
1921                  in
1922                  let end_pos = min len (find_last_nonzero (len - 1)) in
1923                  String.sub s 0 end_pos
1924                in
1925                (* Ensure there's a decimal point with at least one digit after *)
1926                let s =
1927                  if not (String.contains s '.') then s ^ ".0"
1928                  else if s.[String.length s - 1] = '.' then s ^ "0"
1929                  else s
1930                in
1931                if float_of_string s = f then Some s
1932                else try_decimal_precision (prec + 1)
1933            in
1934            let decimal = try_decimal_precision 1 in
1935            (* Always prefer decimal notation if it works *)
1936            match decimal with
1937            | Some d -> d
1938            | None ->
1939              (* Fall back to shortest representation *)
1940              let rec try_precision prec =
1941                if prec > 17 then Printf.sprintf "%.17g" f
1942                else
1943                  let s = Printf.sprintf "%.*g" prec f in
1944                  if float_of_string s = f then s
1945                  else try_precision (prec + 1)
1946              in
1947              try_precision 1
1948      in
1949      Printf.sprintf "{\"type\":\"float\",\"value\":\"%s\"}" value_str
1950  | Toml.Bool b ->
1951      Printf.sprintf "{\"type\":\"bool\",\"value\":\"%s\"}" (if b then "true" else "false")
1952  | Toml.Datetime s ->
1953      validate_datetime_string s;
1954      Printf.sprintf "{\"type\":\"datetime\",\"value\":\"%s\"}" s
1955  | Toml.Datetime_local s ->
1956      validate_datetime_string s;
1957      Printf.sprintf "{\"type\":\"datetime-local\",\"value\":\"%s\"}" s
1958  | Toml.Date_local s ->
1959      validate_date_string s;
1960      Printf.sprintf "{\"type\":\"date-local\",\"value\":\"%s\"}" s
1961  | Toml.Time_local s ->
1962      validate_time_string s;
1963      Printf.sprintf "{\"type\":\"time-local\",\"value\":\"%s\"}" s
1964  | Toml.Array items ->
1965      let json_items = List.map toml_to_tagged_json items in
1966      Printf.sprintf "[%s]" (String.concat "," json_items)
1967  | Toml.Table pairs ->
1968      let json_pairs = List.map (fun (k, v) ->
1969        Printf.sprintf "%s:%s" (json_encode_string k) (toml_to_tagged_json v)
1970      ) pairs in
1971      Printf.sprintf "{%s}" (String.concat "," json_pairs)
1972
1973and json_encode_string s =
1974  let buf = Buffer.create (String.length s + 2) in
1975  Buffer.add_char buf '"';
1976  String.iter (fun c ->
1977    match c with
1978    | '"' -> Buffer.add_string buf "\\\""
1979    | '\\' -> Buffer.add_string buf "\\\\"
1980    | '\n' -> Buffer.add_string buf "\\n"
1981    | '\r' -> Buffer.add_string buf "\\r"
1982    | '\t' -> Buffer.add_string buf "\\t"
1983    | '\b' -> Buffer.add_string buf "\\b"  (* backspace *)
1984    | c when Char.code c = 0x0C -> Buffer.add_string buf "\\f"  (* formfeed *)
1985    | c when Char.code c < 0x20 ->
1986        Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c))
1987    | c -> Buffer.add_char buf c
1988  ) s;
1989  Buffer.add_char buf '"';
1990  Buffer.contents buf
1991
1992(* Tagged JSON to TOML for encoder *)
1993let decode_tagged_json_string s =
1994  (* Simple JSON parser for tagged format *)
1995  let pos = ref 0 in
1996  let len = String.length s in
1997
1998  let skip_ws () =
1999    while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t' || s.[!pos] = '\n' || s.[!pos] = '\r') do
2000      incr pos
2001    done
2002  in
2003
2004  let expect c =
2005    skip_ws ();
2006    if !pos >= len || s.[!pos] <> c then
2007      failwith (Printf.sprintf "Expected '%c' at position %d" c !pos);
2008    incr pos
2009  in
2010
2011  let peek () =
2012    skip_ws ();
2013    if !pos >= len then None else Some s.[!pos]
2014  in
2015
2016  let parse_json_string () =
2017    skip_ws ();
2018    expect '"';
2019    let buf = Buffer.create 64 in
2020    while !pos < len && s.[!pos] <> '"' do
2021      if s.[!pos] = '\\' then begin
2022        incr pos;
2023        if !pos >= len then failwith "Unexpected end in string escape";
2024        match s.[!pos] with
2025        | '"' -> Buffer.add_char buf '"'; incr pos
2026        | '\\' -> Buffer.add_char buf '\\'; incr pos
2027        | '/' -> Buffer.add_char buf '/'; incr pos
2028        | 'n' -> Buffer.add_char buf '\n'; incr pos
2029        | 'r' -> Buffer.add_char buf '\r'; incr pos
2030        | 't' -> Buffer.add_char buf '\t'; incr pos
2031        | 'b' -> Buffer.add_char buf '\b'; incr pos
2032        | 'f' -> Buffer.add_char buf (Char.chr 0x0C); incr pos
2033        | 'u' ->
2034            incr pos;
2035            if !pos + 3 >= len then failwith "Invalid unicode escape";
2036            let hex = String.sub s !pos 4 in
2037            let cp = int_of_string ("0x" ^ hex) in
2038            Buffer.add_string buf (codepoint_to_utf8 cp);
2039            pos := !pos + 4
2040        | c -> failwith (Printf.sprintf "Invalid escape: \\%c" c)
2041      end else begin
2042        Buffer.add_char buf s.[!pos];
2043        incr pos
2044      end
2045    done;
2046    expect '"';
2047    Buffer.contents buf
2048  in
2049
2050  (* Convert a tagged JSON object to a TOML primitive if applicable *)
2051  let convert_tagged_value value =
2052    match value with
2053    | Toml.Table [("type", Toml.String typ); ("value", Toml.String v)]
2054    | Toml.Table [("value", Toml.String v); ("type", Toml.String typ)] ->
2055        (match typ with
2056        | "string" -> Toml.String v
2057        | "integer" -> Toml.Int (Int64.of_string v)
2058        | "float" ->
2059            (match v with
2060            | "inf" -> Toml.Float Float.infinity
2061            | "-inf" -> Toml.Float Float.neg_infinity
2062            | "nan" -> Toml.Float Float.nan
2063            | _ -> Toml.Float (float_of_string v))
2064        | "bool" -> Toml.Bool (v = "true")
2065        | "datetime" -> Toml.Datetime v
2066        | "datetime-local" -> Toml.Datetime_local v
2067        | "date-local" -> Toml.Date_local v
2068        | "time-local" -> Toml.Time_local v
2069        | _ -> failwith (Printf.sprintf "Unknown type: %s" typ))
2070    | _ -> value
2071  in
2072
2073  let rec parse_value () =
2074    skip_ws ();
2075    match peek () with
2076    | Some '{' -> parse_object ()
2077    | Some '[' -> parse_array ()
2078    | Some '"' -> Toml.String (parse_json_string ())
2079    | _ -> failwith "Expected value"
2080
2081  and parse_object () =
2082    expect '{';
2083    skip_ws ();
2084    if peek () = Some '}' then begin
2085      incr pos;
2086      Toml.Table []
2087    end else begin
2088      let pairs = ref [] in
2089      let first = ref true in
2090      while peek () <> Some '}' do
2091        if not !first then expect ',';
2092        first := false;
2093        skip_ws ();
2094        let key = parse_json_string () in
2095        expect ':';
2096        let value = parse_value () in
2097        pairs := (key, convert_tagged_value value) :: !pairs
2098      done;
2099      expect '}';
2100      Toml.Table (List.rev !pairs)
2101    end
2102
2103  and parse_array () =
2104    expect '[';
2105    skip_ws ();
2106    if peek () = Some ']' then begin
2107      incr pos;
2108      Toml.Array []
2109    end else begin
2110      let items = ref [] in
2111      let first = ref true in
2112      while peek () <> Some ']' do
2113        if not !first then expect ',';
2114        first := false;
2115        items := convert_tagged_value (parse_value ()) :: !items
2116      done;
2117      expect ']';
2118      Toml.Array (List.rev !items)
2119    end
2120  in
2121
2122  parse_value ()
2123
2124
2125(* ============================================
2126   Streaming TOML Encoder
2127   ============================================ *)
2128
2129let is_bare_key_char c =
2130  (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
2131  (c >= '0' && c <= '9') || c = '_' || c = '-'
2132
2133let rec write_toml_string w s =
2134  (* Check if we need to escape *)
2135  let needs_escape = String.exists (fun c ->
2136    let code = Char.code c in
2137    c = '"' || c = '\\' || c = '\n' || c = '\r' || c = '\t' ||
2138    code < 0x20 || code = 0x7F
2139  ) s in
2140  if needs_escape then begin
2141    Bytes.Writer.write_string w "\"";
2142    String.iter (fun c ->
2143      match c with
2144      | '"' -> Bytes.Writer.write_string w "\\\""
2145      | '\\' -> Bytes.Writer.write_string w "\\\\"
2146      | '\n' -> Bytes.Writer.write_string w "\\n"
2147      | '\r' -> Bytes.Writer.write_string w "\\r"
2148      | '\t' -> Bytes.Writer.write_string w "\\t"
2149      | '\b' -> Bytes.Writer.write_string w "\\b"
2150      | c when Char.code c = 0x0C -> Bytes.Writer.write_string w "\\f"
2151      | c when Char.code c < 0x20 || Char.code c = 0x7F ->
2152          Bytes.Writer.write_string w (Printf.sprintf "\\u%04X" (Char.code c))
2153      | c ->
2154          let b = Bytes.create 1 in
2155          Bytes.set b 0 c;
2156          Bytes.Writer.write_bytes w b
2157    ) s;
2158    Bytes.Writer.write_string w "\""
2159  end else begin
2160    Bytes.Writer.write_string w "\"";
2161    Bytes.Writer.write_string w s;
2162    Bytes.Writer.write_string w "\""
2163  end
2164
2165and write_toml_key w k =
2166  (* Check if it can be a bare key *)
2167  let is_bare = String.length k > 0 && String.for_all is_bare_key_char k in
2168  if is_bare then Bytes.Writer.write_string w k
2169  else write_toml_string w k
2170
2171and write_toml_value w ?(inline=false) value =
2172  match value with
2173  | Toml.String s -> write_toml_string w s
2174  | Toml.Int i -> Bytes.Writer.write_string w (Int64.to_string i)
2175  | Toml.Float f ->
2176      if Float.is_nan f then Bytes.Writer.write_string w "nan"
2177      else if f = Float.infinity then Bytes.Writer.write_string w "inf"
2178      else if f = Float.neg_infinity then Bytes.Writer.write_string w "-inf"
2179      else begin
2180        let s = Printf.sprintf "%.17g" f in
2181        (* Ensure it looks like a float *)
2182        let s = if String.contains s '.' || String.contains s 'e' || String.contains s 'E'
2183                then s else s ^ ".0" in
2184        Bytes.Writer.write_string w s
2185      end
2186  | Toml.Bool b -> Bytes.Writer.write_string w (if b then "true" else "false")
2187  | Toml.Datetime s -> Bytes.Writer.write_string w s
2188  | Toml.Datetime_local s -> Bytes.Writer.write_string w s
2189  | Toml.Date_local s -> Bytes.Writer.write_string w s
2190  | Toml.Time_local s -> Bytes.Writer.write_string w s
2191  | Toml.Array items ->
2192      Bytes.Writer.write_string w "[";
2193      List.iteri (fun i item ->
2194        if i > 0 then Bytes.Writer.write_string w ", ";
2195        write_toml_value w ~inline:true item
2196      ) items;
2197      Bytes.Writer.write_string w "]"
2198  | Toml.Table pairs when inline ->
2199      Bytes.Writer.write_string w "{";
2200      List.iteri (fun i (k, v) ->
2201        if i > 0 then Bytes.Writer.write_string w ", ";
2202        write_toml_key w k;
2203        Bytes.Writer.write_string w " = ";
2204        write_toml_value w ~inline:true v
2205      ) pairs;
2206      Bytes.Writer.write_string w "}"
2207  | Toml.Table _ -> failwith "Cannot encode table inline without inline flag"
2208
2209(* True streaming TOML encoder - writes directly to Bytes.Writer *)
2210let encode_to_writer w value =
2211  let has_content = ref false in
2212
2213  let write_path path =
2214    List.iteri (fun i k ->
2215      if i > 0 then Bytes.Writer.write_string w ".";
2216      write_toml_key w k
2217    ) path
2218  in
2219
2220  let rec encode_at_path path value =
2221    match value with
2222    | Toml.Table pairs ->
2223        (* Separate simple values from nested tables *)
2224        (* Only PURE table arrays (all items are tables) use [[array]] syntax.
2225           Mixed arrays (primitives + tables) must be encoded inline. *)
2226        let is_pure_table_array items =
2227          items <> [] && List.for_all (function Toml.Table _ -> true | _ -> false) items
2228        in
2229        let simple, nested = List.partition (fun (_, v) ->
2230          match v with
2231          | Toml.Table _ -> false
2232          | Toml.Array items -> not (is_pure_table_array items)
2233          | _ -> true
2234        ) pairs in
2235
2236        (* Emit simple values first *)
2237        List.iter (fun (k, v) ->
2238          write_toml_key w k;
2239          Bytes.Writer.write_string w " = ";
2240          write_toml_value w ~inline:true v;
2241          Bytes.Writer.write_string w "\n";
2242          has_content := true
2243        ) simple;
2244
2245        (* Then nested tables *)
2246        List.iter (fun (k, v) ->
2247          let new_path = path @ [k] in
2248          match v with
2249          | Toml.Table _ ->
2250              if !has_content then Bytes.Writer.write_string w "\n";
2251              Bytes.Writer.write_string w "[";
2252              write_path new_path;
2253              Bytes.Writer.write_string w "]\n";
2254              has_content := true;
2255              encode_at_path new_path v
2256          | Toml.Array items when items <> [] && List.for_all (function Toml.Table _ -> true | _ -> false) items ->
2257              (* Pure table array - use [[array]] syntax *)
2258              List.iter (fun item ->
2259                match item with
2260                | Toml.Table _ ->
2261                    if !has_content then Bytes.Writer.write_string w "\n";
2262                    Bytes.Writer.write_string w "[[";
2263                    write_path new_path;
2264                    Bytes.Writer.write_string w "]]\n";
2265                    has_content := true;
2266                    encode_at_path new_path item
2267                | _ -> assert false  (* Impossible - we checked for_all above *)
2268              ) items
2269          | _ ->
2270              write_toml_key w k;
2271              Bytes.Writer.write_string w " = ";
2272              write_toml_value w ~inline:true v;
2273              Bytes.Writer.write_string w "\n";
2274              has_content := true
2275        ) nested
2276    | _ ->
2277        failwith "Top-level TOML must be a table"
2278  in
2279
2280  encode_at_path [] value
2281
2282(* ============================================
2283   Public Interface - Parsing
2284   ============================================ *)
2285
2286let of_string input =
2287  try
2288    Ok (parse_toml input)
2289  with
2290  | Failure msg -> Error (Toml.Error.make (Toml.Error.Syntax (Toml.Error.Expected msg)))
2291  | Toml.Error.Error e -> Error e
2292  | e -> Error (Toml.Error.make (Toml.Error.Syntax (Toml.Error.Expected (Printexc.to_string e))))
2293
2294let of_reader ?file r =
2295  try
2296    Ok (parse_toml_from_reader ?file r)
2297  with
2298  | Failure msg -> Error (Toml.Error.make (Toml.Error.Syntax (Toml.Error.Expected msg)))
2299  | Toml.Error.Error e -> Error e
2300  | e -> Error (Toml.Error.make (Toml.Error.Syntax (Toml.Error.Expected (Printexc.to_string e))))
2301
2302let parse = parse_toml
2303
2304let parse_reader ?file r = parse_toml_from_reader ?file r
2305
2306(* ============================================
2307   Public Interface - Encoding
2308   ============================================ *)
2309
2310let to_writer w value = encode_to_writer w value
2311
2312let to_string value =
2313  let buf = Buffer.create 256 in
2314  let w = Bytes.Writer.of_buffer buf in
2315  encode_to_writer w value;
2316  Buffer.contents buf
2317
2318(* ============================================
2319   Codec I/O Operations
2320   ============================================ *)
2321
2322let decode_string c s =
2323  Result.bind (of_string s) (Tomlt.decode c)
2324
2325let decode_string_exn c s =
2326  let toml = parse s in
2327  Tomlt.decode_exn c toml
2328
2329let encode_string c v =
2330  let toml = Tomlt.encode c v in
2331  to_string toml
2332
2333let decode_reader ?file c r =
2334  Result.bind (of_reader ?file r) (Tomlt.decode c)
2335
2336let encode_writer c v w =
2337  let toml = Tomlt.encode c v in
2338  to_writer w toml
2339
2340(* ============================================
2341   Tagged JSON Module
2342   ============================================ *)
2343
2344module Tagged_json = struct
2345  let encode = toml_to_tagged_json
2346  let decode = decode_tagged_json_string
2347
2348  let decode_and_encode_toml json_str =
2349    try
2350      let toml = decode_tagged_json_string json_str in
2351      Ok (to_string toml)
2352    with
2353    | Failure msg -> Error msg
2354    | e -> Error (Printexc.to_string e)
2355end