···238239let run_valid_test toml_file json_file =
240 let toml_content = In_channel.with_open_bin toml_file In_channel.input_all in
241- match Tomlt.decode_string toml_content with
242- | Error msg -> `Fail (Printf.sprintf "Decode error: %s" msg)
243 | Ok toml ->
244- let actual_json = Tomlt.toml_to_tagged_json toml in
245 let expected_json = In_channel.with_open_bin json_file In_channel.input_all in
246 if json_equal actual_json expected_json then
247 `Pass
···251252let run_invalid_test toml_file =
253 let toml_content = In_channel.with_open_bin toml_file In_channel.input_all in
254- match Tomlt.decode_string toml_content with
255 | Error _ -> `Pass (* Should fail *)
256 | Ok _ -> `Fail "Should have failed but parsed successfully"
257···259let run_encoder_test json_file =
260 let json_content = In_channel.with_open_bin json_file In_channel.input_all in
261 (* First, encode JSON to TOML *)
262- match Tomlt.encode_from_tagged_json json_content with
263 | Error msg -> `Fail (Printf.sprintf "Encode error: %s" msg)
264 | Ok toml_output ->
265 (* Then decode the TOML back to check round-trip *)
266- match Tomlt.decode_string toml_output with
267- | Error msg -> `Fail (Printf.sprintf "Round-trip decode error: %s\nTOML was:\n%s" msg toml_output)
268 | Ok decoded_toml ->
269 (* Compare the decoded result with original JSON *)
270- let actual_json = Tomlt.toml_to_tagged_json decoded_toml in
271 if json_equal actual_json json_content then
272 `Pass
273 else
···238239let run_valid_test toml_file json_file =
240 let toml_content = In_channel.with_open_bin toml_file In_channel.input_all in
241+ match Tomlt.of_string toml_content with
242+ | Error e -> `Fail (Printf.sprintf "Decode error: %s" (Tomlt.Error.to_string e))
243 | Ok toml ->
244+ let actual_json = Tomlt.Internal.to_tagged_json toml in
245 let expected_json = In_channel.with_open_bin json_file In_channel.input_all in
246 if json_equal actual_json expected_json then
247 `Pass
···251252let run_invalid_test toml_file =
253 let toml_content = In_channel.with_open_bin toml_file In_channel.input_all in
254+ match Tomlt.of_string toml_content with
255 | Error _ -> `Pass (* Should fail *)
256 | Ok _ -> `Fail "Should have failed but parsed successfully"
257···259let run_encoder_test json_file =
260 let json_content = In_channel.with_open_bin json_file In_channel.input_all in
261 (* First, encode JSON to TOML *)
262+ match Tomlt.Internal.encode_from_tagged_json json_content with
263 | Error msg -> `Fail (Printf.sprintf "Encode error: %s" msg)
264 | Ok toml_output ->
265 (* Then decode the TOML back to check round-trip *)
266+ match Tomlt.of_string toml_output with
267+ | Error e -> `Fail (Printf.sprintf "Round-trip decode error: %s\nTOML was:\n%s" (Tomlt.Error.to_string e) toml_output)
268 | Ok decoded_toml ->
269 (* Compare the decoded result with original JSON *)
270+ let actual_json = Tomlt.Internal.to_tagged_json decoded_toml in
271 if json_equal actual_json json_content then
272 `Pass
273 else
+4-4
bin/toml_test_decoder.ml
···23let () =
4 let input = In_channel.input_all In_channel.stdin in
5- match Tomlt.decode_string input with
6 | Ok toml ->
7- let json = Tomlt.toml_to_tagged_json toml in
8 print_string json;
9 print_newline ()
10- | Error msg ->
11- Printf.eprintf "Error: %s\n" msg;
12 exit 1
···23let () =
4 let input = In_channel.input_all In_channel.stdin in
5+ match Tomlt.of_string input with
6 | Ok toml ->
7+ let json = Tomlt.Internal.to_tagged_json toml in
8 print_string json;
9 print_newline ()
10+ | Error e ->
11+ Printf.eprintf "Error: %s\n" (Tomlt.Error.to_string e);
12 exit 1
+1-1
bin/toml_test_encoder.ml
···23let () =
4 let input = In_channel.input_all In_channel.stdin in
5- match Tomlt.encode_from_tagged_json input with
6 | Ok toml ->
7 print_string toml
8 | Error msg ->
···23let () =
4 let input = In_channel.input_all In_channel.stdin in
5+ match Tomlt.Internal.encode_from_tagged_json input with
6 | Ok toml ->
7 print_string toml
8 | Error msg ->
···78(* TOML value representation *)
910-type toml_value =
11- | Toml_string of string
12- | Toml_int of int64
13- | Toml_float of float
14- | Toml_bool of bool
15- | Toml_datetime of string (* Offset datetime *)
16- | Toml_datetime_local of string (* Local datetime *)
17- | Toml_date_local of string (* Local date *)
18- | Toml_time_local of string (* Local time *)
19- | Toml_array of toml_value list
20- | Toml_table of (string * toml_value) list
2122-(* Lexer *)
2324type token =
25 | Tok_lbracket
···44 | Tok_time_local of string
4546type lexer = {
47- mutable input : string;
048 mutable pos : int;
49 mutable line : int;
50 mutable col : int;
51 file : string;
52}
5354-let make_lexer ?(file = "-") input =
55- { input; pos = 0; line = 1; col = 1; file }
0000000000000000005657-let is_eof l = l.pos >= String.length l.input
5859-let peek l = if is_eof l then None else Some l.input.[l.pos]
6061let peek2 l =
62- if l.pos + 1 >= String.length l.input then None
63- else Some l.input.[l.pos + 1]
6465let peek_n l n =
66- if l.pos + n - 1 >= String.length l.input then None
67- else Some (String.sub l.input l.pos n)
6869let advance l =
70 if not (is_eof l) then begin
71- if l.input.[l.pos] = '\n' then begin
72 l.line <- l.line + 1;
73 l.col <- 1
74 end else
···80 for _ = 1 to n do advance l done
8182let skip_whitespace l =
83- while not (is_eof l) && (l.input.[l.pos] = ' ' || l.input.[l.pos] = '\t') do
84 advance l
85 done
860000000087(* Get expected byte length of UTF-8 char from first byte *)
88let utf8_byte_length_from_first_byte c =
89 let code = Char.code c in
···94 else if code < 0xF8 then 4
95 else 0 (* Invalid: 5+ byte sequence *)
9697-(* Validate UTF-8 at position using uutf, returns byte length *)
98-let validate_utf8_at_pos input pos line =
99- if pos >= String.length input then
100- failwith "Unexpected end of input";
101- let byte_len = utf8_byte_length_from_first_byte input.[pos] in
102 if byte_len = 0 then
103- failwith (Printf.sprintf "Invalid UTF-8 sequence at line %d" line);
104- if pos + byte_len > String.length input then
105- failwith (Printf.sprintf "Incomplete UTF-8 sequence at line %d" line);
106 (* Validate using uutf - it checks overlong encodings, surrogates, etc. *)
107- let sub = String.sub input pos byte_len in
108 let valid = ref false in
109 Uutf.String.fold_utf_8 (fun () _ -> function
110 | `Uchar _ -> valid := true
111 | `Malformed _ -> ()
112 ) () sub;
113 if not !valid then
114- failwith (Printf.sprintf "Invalid UTF-8 sequence at line %d" line);
115 byte_len
116117(* UTF-8 validation - validates and advances over a single UTF-8 character *)
118let validate_utf8_char l =
119- let byte_len = validate_utf8_at_pos l.input l.pos l.line in
120 for _ = 1 to byte_len do advance l done
121122let skip_comment l =
123- if not (is_eof l) && l.input.[l.pos] = '#' then begin
124 (* Validate comment characters *)
125 advance l;
126 let continue = ref true in
127- while !continue && not (is_eof l) && l.input.[l.pos] <> '\n' do
128- let c = l.input.[l.pos] in
129 let code = Char.code c in
130 (* CR is only valid if followed by LF (CRLF at end of comment) *)
131 if c = '\r' then begin
132 (* Check if this CR is followed by LF - if so, it ends the comment *)
133- if l.pos + 1 < String.length l.input && l.input.[l.pos + 1] = '\n' then
134 (* This is CRLF - stop the loop, let the main lexer handle it *)
135 continue := false
136 else
137- failwith (Printf.sprintf "Bare carriage return not allowed in comment at line %d" l.line)
138 end else if code >= 0x80 then begin
139 (* Multi-byte UTF-8 character - validate it *)
140 validate_utf8_char l
141 end else begin
142 (* ASCII control characters other than tab are not allowed in comments *)
143 if code < 0x09 || (code > 0x09 && code < 0x20) || code = 0x7F then
144- failwith (Printf.sprintf "Control character U+%04X not allowed in comment at line %d" code l.line);
145 advance l
146 end
147 done
···150let skip_ws_and_comments l =
151 let rec loop () =
152 skip_whitespace l;
153- if not (is_eof l) && l.input.[l.pos] = '#' then begin
154 skip_comment l;
155 loop ()
156 end
···170 if c >= '0' && c <= '9' then Char.code c - Char.code '0'
171 else if c >= 'a' && c <= 'f' then Char.code c - Char.code 'a' + 10
172 else if c >= 'A' && c <= 'F' then Char.code c - Char.code 'A' + 10
173- else failwith "Invalid hex digit"
174175-(* Parse Unicode escape and convert to UTF-8 using uutf *)
176-let unicode_to_utf8 codepoint =
177 if codepoint < 0 || codepoint > 0x10FFFF then
178 failwith (Printf.sprintf "Invalid Unicode codepoint: U+%X" codepoint);
179 if codepoint >= 0xD800 && codepoint <= 0xDFFF then
180- failwith (Printf.sprintf "Surrogate codepoint not allowed: U+%X" codepoint);
0000000000181 let buf = Buffer.create 4 in
182 Uutf.Buffer.add_utf_8 buf (Uchar.of_int codepoint);
183 Buffer.contents buf
184185let parse_escape l =
186 advance l; (* skip backslash *)
187- if is_eof l then failwith "Unexpected end of input in escape sequence";
188- let c = l.input.[l.pos] in
0189 advance l;
190 match c with
191 | 'b' -> "\b"
···198 | '\\' -> "\\"
199 | 'x' ->
200 (* \xHH - 2 hex digits *)
201- if l.pos + 1 >= String.length l.input then
202- failwith "Incomplete \\x escape sequence";
203- let c1 = l.input.[l.pos] in
204- let c2 = l.input.[l.pos + 1] in
205 if not (is_hex_digit c1 && is_hex_digit c2) then
206- failwith "Invalid \\x escape sequence";
207 let cp = (hex_value c1 * 16) + hex_value c2 in
208 advance l; advance l;
209- unicode_to_utf8 cp
210 | 'u' ->
211 (* \uHHHH - 4 hex digits *)
212- if l.pos + 3 >= String.length l.input then
213- failwith "Incomplete \\u escape sequence";
214- let s = String.sub l.input l.pos 4 in
215 for i = 0 to 3 do
216 if not (is_hex_digit s.[i]) then
217- failwith "Invalid \\u escape sequence"
218 done;
219 let cp = int_of_string ("0x" ^ s) in
220 advance_n l 4;
221- unicode_to_utf8 cp
222 | 'U' ->
223 (* \UHHHHHHHH - 8 hex digits *)
224- if l.pos + 7 >= String.length l.input then
225- failwith "Incomplete \\U escape sequence";
226- let s = String.sub l.input l.pos 8 in
227 for i = 0 to 7 do
228 if not (is_hex_digit s.[i]) then
229- failwith "Invalid \\U escape sequence"
230 done;
231 let cp = int_of_string ("0x" ^ s) in
232 advance_n l 8;
233- unicode_to_utf8 cp
234- | _ -> failwith (Printf.sprintf "Invalid escape sequence: \\%c" c)
0235236let validate_string_char l c is_multiline =
237 let code = Char.code c in
238 (* Control characters other than tab (and LF/CR for multiline) are not allowed *)
239 if code < 0x09 then
240- failwith (Printf.sprintf "Control character U+%04X not allowed in string at line %d" code l.line);
241 if code > 0x09 && code < 0x20 && not (is_multiline && (code = 0x0A || code = 0x0D)) then
242- failwith (Printf.sprintf "Control character U+%04X not allowed in string at line %d" code l.line);
243 if code = 0x7F then
244- failwith (Printf.sprintf "Control character U+007F not allowed in string at line %d" l.line)
245246(* Validate UTF-8 in string context and add bytes to buffer *)
247let validate_and_add_utf8_to_buffer l buf =
248- let byte_len = validate_utf8_at_pos l.input l.pos l.line in
249- Buffer.add_substring buf l.input l.pos byte_len;
250 for _ = 1 to byte_len do advance l done
251252let parse_basic_string l =
···270 let rec loop () =
271 if is_eof l then
272 failwith "Unterminated string";
273- let c = l.input.[l.pos] in
274 if multiline then begin
275 if c = '"' then begin
276 (* Count consecutive quotes *)
277 let quote_count = ref 0 in
278 let p = ref l.pos in
279- while !p < String.length l.input && l.input.[!p] = '"' do
280 incr quote_count;
281 incr p
282 done;
···415 let rec loop () =
416 if is_eof l then
417 failwith "Unterminated literal string";
418- let c = l.input.[l.pos] in
419 if multiline then begin
420 if c = '\'' then begin
421 (* Count consecutive quotes *)
422 let quote_count = ref 0 in
423 let p = ref l.pos in
424- while !p < String.length l.input && l.input.[!p] = '\'' do
425 incr quote_count;
426 incr p
427 done;
···502 match peek_n l 3 with
503 | Some "inf" ->
504 advance_n l 3;
505- let s = String.sub l.input start (l.pos - start) in
506 Tok_float ((if neg then Float.neg_infinity else Float.infinity), s)
507 | Some "nan" ->
508 advance_n l 3;
509- let s = String.sub l.input start (l.pos - start) in
510 Tok_float (Float.nan, s)
511 | _ ->
512 (* Check for hex, octal, or binary *)
···530 if first then failwith "Expected hex digit after 0x"
531 in
532 read_hex true;
533- let s = String.sub l.input num_start (l.pos - num_start) in
534 let s = String.concat "" (String.split_on_char '_' s) in
535- let orig = String.sub l.input start (l.pos - start) in
536 Tok_integer (Int64.of_string ("0x" ^ s), orig)
537 | Some '0', Some 'o' when not neg ->
538 advance l; advance l;
···553 if first then failwith "Expected octal digit after 0o"
554 in
555 read_oct true;
556- let s = String.sub l.input num_start (l.pos - num_start) in
557 let s = String.concat "" (String.split_on_char '_' s) in
558- let orig = String.sub l.input start (l.pos - start) in
559 Tok_integer (Int64.of_string ("0o" ^ s), orig)
560 | Some '0', Some 'b' when not neg ->
561 advance l; advance l;
···576 if first then failwith "Expected binary digit after 0b"
577 in
578 read_bin true;
579- let s = String.sub l.input num_start (l.pos - num_start) in
580 let s = String.concat "" (String.split_on_char '_' s) in
581- let orig = String.sub l.input start (l.pos - start) in
582 Tok_integer (Int64.of_string ("0b" ^ s), orig)
583 | _ ->
584 (* Regular decimal number *)
···630 | _ -> ());
631 read_int true
632 | _ -> ());
633- let s = String.sub l.input start (l.pos - start) in
634 let s' = String.concat "" (String.split_on_char '_' s) in
635 if !is_float then
636 Tok_float (float_of_string s', s)
···642 (* YYYY-MM-DD or HH:MM - need to ensure it's not a bare key that starts with numbers *)
643 let check_datetime () =
644 let pos = l.pos in
645- let len = String.length l.input in
646 (* Check for YYYY-MM-DD pattern - must have exactly this structure *)
647 if pos + 10 <= len then begin
648- let c0 = l.input.[pos] in
649- let c1 = l.input.[pos + 1] in
650- let c2 = l.input.[pos + 2] in
651- let c3 = l.input.[pos + 3] in
652- let c4 = l.input.[pos + 4] in
653- let c5 = l.input.[pos + 5] in
654- let c6 = l.input.[pos + 6] in
655- let c7 = l.input.[pos + 7] in
656- let c8 = l.input.[pos + 8] in
657- let c9 = l.input.[pos + 9] in
658 (* Must match YYYY-MM-DD pattern AND not be followed by bare key chars (except T or space for time) *)
659 if is_digit c0 && is_digit c1 && is_digit c2 && is_digit c3 && c4 = '-' &&
660 is_digit c5 && is_digit c6 && c7 = '-' && is_digit c8 && is_digit c9 then begin
661 (* Check what follows - if it's a bare key char other than T/t/space, it's not a date *)
662 if pos + 10 < len then begin
663- let next = l.input.[pos + 10] in
664 if next = 'T' || next = 't' then
665 `Date (* Datetime continues with time part *)
666 else if next = ' ' || next = '\t' then begin
667 (* Check if followed by = (key context) or time part *)
668 let rec skip_ws p =
669 if p >= len then p
670- else match l.input.[p] with
671 | ' ' | '\t' -> skip_ws (p + 1)
672 | _ -> p
673 in
674 let after_ws = skip_ws (pos + 11) in
675- if after_ws < len && l.input.[after_ws] = '=' then
676 `Other (* It's a key followed by = *)
677- else if after_ws < len && is_digit l.input.[after_ws] then
678 `Date (* Could be "2001-02-03 12:34:56" format *)
679 else
680 `Date
···693 else
694 `Other
695 end else if pos + 5 <= len then begin
696- let c0 = l.input.[pos] in
697- let c1 = l.input.[pos + 1] in
698- let c2 = l.input.[pos + 2] in
699- let c3 = l.input.[pos + 3] in
700- let c4 = l.input.[pos + 4] in
701 if is_digit c0 && is_digit c1 && c2 = ':' && is_digit c3 && is_digit c4 then
702 `Time
703 else
···920 skip_ws_and_comments l;
921 if is_eof l then Tok_eof
922 else begin
923- let c = l.input.[l.pos] in
924 match c with
925 | '[' -> advance l; Tok_lbracket
926 | ']' -> advance l; Tok_rbracket
···953 (* A key like -01 should be followed by whitespace then =, not by . or e (number syntax) *)
954 let is_key_context =
955 let rec scan_ahead p =
956- if p >= String.length l.input then false
957 else
958- let c = l.input.[p] in
959 if is_digit c || c = '_' then scan_ahead (p + 1)
960 else if c = ' ' || c = '\t' then
961 (* Skip whitespace and check for = *)
962 let rec skip_ws pp =
963- if pp >= String.length l.input then false
964- else match l.input.[pp] with
965 | ' ' | '\t' -> skip_ws (pp + 1)
966 | '=' -> true
967 | _ -> false
···970 else if c = '=' then true
971 else if c = '.' then
972 (* Check if . is followed by digit (number) vs letter/underscore (dotted key) *)
973- if p + 1 < String.length l.input then
974- let next = l.input.[p + 1] in
975 if is_digit next then false (* It's a decimal number like -3.14 *)
976 else if is_bare_key_char next then true (* Dotted key *)
977 else false
···986 in
987 if is_key_context then begin
988 (* Treat as bare key *)
989- while not (is_eof l) && is_bare_key_char l.input.[l.pos] do
990 advance l
991 done;
992- Tok_bare_key (String.sub l.input start (l.pos - start))
993 end else
994 parse_number l
995 | Some 'i' ->
996 (* Check for inf *)
997- if l.pos + 3 < String.length l.input &&
998- l.input.[l.pos + 1] = 'i' && l.input.[l.pos + 2] = 'n' && l.input.[l.pos + 3] = 'f' then begin
999 advance_n l 4;
1000- let s = String.sub l.input start (l.pos - start) in
1001 if sign = '-' then Tok_float (Float.neg_infinity, s)
1002 else Tok_float (Float.infinity, s)
1003 end else if sign = '-' then begin
1004 (* Could be bare key like -inf-key *)
1005- while not (is_eof l) && is_bare_key_char l.input.[l.pos] do
1006 advance l
1007 done;
1008- Tok_bare_key (String.sub l.input start (l.pos - start))
1009 end else
1010 failwith (Printf.sprintf "Unexpected character after %c" sign)
1011 | Some 'n' ->
1012 (* Check for nan *)
1013- if l.pos + 3 < String.length l.input &&
1014- l.input.[l.pos + 1] = 'n' && l.input.[l.pos + 2] = 'a' && l.input.[l.pos + 3] = 'n' then begin
1015 advance_n l 4;
1016- let s = String.sub l.input start (l.pos - start) in
1017 Tok_float (Float.nan, s) (* Sign on NaN doesn't change the value *)
1018 end else if sign = '-' then begin
1019 (* Could be bare key like -name *)
1020- while not (is_eof l) && is_bare_key_char l.input.[l.pos] do
1021 advance l
1022 done;
1023- Tok_bare_key (String.sub l.input start (l.pos - start))
1024 end else
1025 failwith (Printf.sprintf "Unexpected character after %c" sign)
1026 | _ when sign = '-' ->
1027 (* Bare key starting with - like -key or --- *)
1028- while not (is_eof l) && is_bare_key_char l.input.[l.pos] do
1029 advance l
1030 done;
1031- Tok_bare_key (String.sub l.input start (l.pos - start))
1032 | _ -> failwith (Printf.sprintf "Unexpected character after %c" sign))
1033 | c when is_digit c ->
1034 (* Could be number, datetime, or bare key starting with digits *)
···1039 (* Check for hex/octal/binary prefix first - these are always numbers *)
1040 let start = l.pos in
1041 let is_prefixed_number =
1042- start + 1 < String.length l.input && l.input.[start] = '0' &&
1043- (let c1 = l.input.[start + 1] in
1044 c1 = 'x' || c1 = 'X' || c1 = 'o' || c1 = 'O' || c1 = 'b' || c1 = 'B')
1045 in
1046 if is_prefixed_number then
···1050 - Contains letters (like "123abc")
1051 - Has leading zeros (like "0123") which would be invalid as a number *)
1052 let has_leading_zero =
1053- l.input.[start] = '0' && start + 1 < String.length l.input &&
1054- let c1 = l.input.[start + 1] in
1055 is_digit c1
1056 in
1057 (* Scan to see if this is a bare key or a number
1058 - If it looks like scientific notation (digits + e/E + optional sign + digits), it's a number
1059 - If it contains letters OR dashes between digits, it's a bare key *)
1060 let rec scan_for_bare_key pos has_dash_between_digits =
1061- if pos >= String.length l.input then has_dash_between_digits
1062 else
1063- let c = l.input.[pos] in
1064 if is_digit c || c = '_' then scan_for_bare_key (pos + 1) has_dash_between_digits
1065 else if c = '.' then scan_for_bare_key (pos + 1) has_dash_between_digits
1066 else if c = '-' then
1067 (* Dash in key - check what follows *)
1068 let next_pos = pos + 1 in
1069- if next_pos < String.length l.input then
1070- let next = l.input.[next_pos] in
1071 if is_digit next then
1072 scan_for_bare_key (next_pos) true (* Dash between digits - bare key *)
1073 else if is_bare_key_char next then
···1079 else if c = 'e' || c = 'E' then
1080 (* Check if this looks like scientific notation *)
1081 let next_pos = pos + 1 in
1082- if next_pos >= String.length l.input then true (* Just 'e' at end, bare key *)
1083 else
1084- let next = l.input.[next_pos] in
1085 if next = '+' || next = '-' then
1086 (* Has exponent sign - check if followed by digit *)
1087 let after_sign = next_pos + 1 in
1088- if after_sign < String.length l.input && is_digit l.input.[after_sign] then
1089 has_dash_between_digits (* Scientific notation, but might have dash earlier *)
1090 else
1091 true (* e.g., "3e-abc" - bare key *)
···1100 in
1101 if has_leading_zero || scan_for_bare_key start false then begin
1102 (* It's a bare key *)
1103- while not (is_eof l) && is_bare_key_char l.input.[l.pos] do
1104 advance l
1105 done;
1106- Tok_bare_key (String.sub l.input start (l.pos - start))
1107 end else
1108 (* It's a number - use parse_number *)
1109 parse_number l
···1112 (* These could be keywords (true, false, inf, nan) or bare keys
1113 Always read as bare key and let parser interpret *)
1114 let start = l.pos in
1115- while not (is_eof l) && is_bare_key_char l.input.[l.pos] do
1116 advance l
1117 done;
1118- Tok_bare_key (String.sub l.input start (l.pos - start))
1119 | c when is_bare_key_char c ->
1120 let start = l.pos in
1121- while not (is_eof l) && is_bare_key_char l.input.[l.pos] do
1122 advance l
1123 done;
1124- Tok_bare_key (String.sub l.input start (l.pos - start))
1125 | c ->
1126 let code = Char.code c in
1127 if code < 0x20 || code = 0x7F then
···11551156(* Check if next raw character (without skipping whitespace) matches *)
1157let next_raw_char_is p c =
1158- p.lexer.pos < String.length p.lexer.input && p.lexer.input.[p.lexer.pos] = c
11591160let expect_token p expected =
1161 let tok = consume_token p in
···12231224let rec parse_value p =
1225 match peek_token p with
1226- | Tok_basic_string s -> ignore (consume_token p); Toml_string s
1227- | Tok_literal_string s -> ignore (consume_token p); Toml_string s
1228- | Tok_ml_basic_string s -> ignore (consume_token p); Toml_string s
1229- | Tok_ml_literal_string s -> ignore (consume_token p); Toml_string s
1230- | Tok_integer (i, _) -> ignore (consume_token p); Toml_int i
1231- | Tok_float (f, _) -> ignore (consume_token p); Toml_float f
1232- | Tok_datetime s -> ignore (consume_token p); Toml_datetime s
1233- | Tok_datetime_local s -> ignore (consume_token p); Toml_datetime_local s
1234- | Tok_date_local s -> ignore (consume_token p); Toml_date_local s
1235- | Tok_time_local s -> ignore (consume_token p); Toml_time_local s
1236 | Tok_lbracket -> parse_array p
1237 | Tok_lbrace -> parse_inline_table p
1238 | Tok_bare_key s ->
1239 (* Interpret bare keys as boolean, float keywords, or numbers in value context *)
1240 ignore (consume_token p);
1241 (match s with
1242- | "true" -> Toml_bool true
1243- | "false" -> Toml_bool false
1244- | "inf" -> Toml_float Float.infinity
1245- | "nan" -> Toml_float Float.nan
1246 | _ ->
1247 (* Validate underscore placement in the original string *)
1248 let validate_underscores str =
···1286 if String.contains s_no_underscore '.' ||
1287 String.contains s_no_underscore 'e' ||
1288 String.contains s_no_underscore 'E' then
1289- Toml_float (float_of_string s_no_underscore)
1290 else
1291- Toml_int (Int64.of_string s_no_underscore)
1292 with _ ->
1293 failwith (Printf.sprintf "Unexpected bare key '%s' as value" s)
1294 end else
···1304 match peek_token p with
1305 | Tok_rbracket ->
1306 ignore (consume_token p);
1307- Toml_array (List.rev acc)
1308 | _ ->
1309 let v = parse_value p in
1310 skip_newlines p;
···1315 loop (v :: acc)
1316 | Tok_rbracket ->
1317 ignore (consume_token p);
1318- Toml_array (List.rev (v :: acc))
1319 | _ -> failwith "Expected ',' or ']' in array"
1320 in
1321 loop []
···1329 match peek_token p with
1330 | Tok_rbrace ->
1331 ignore (consume_token p);
1332- Toml_table (List.rev acc)
1333 | _ ->
1334 let keys = parse_dotted_key p in
1335 skip_ws p;
···1361 loop acc
1362 | Tok_rbrace ->
1363 ignore (consume_token p);
1364- Toml_table (List.rev acc)
1365 | _ -> failwith "Expected ',' or '}' in inline table"
1366 in
1367 loop []
···1375 | [] -> failwith "Empty key"
1376 | [k] -> (k, value)
1377 | k :: rest ->
1378- (k, Toml_table [build_nested_table rest value])
13791380(* Merge two TOML values - used for combining dotted keys in inline tables *)
1381and merge_toml_values v1 v2 =
1382 match v1, v2 with
1383- | Toml_table entries1, Toml_table entries2 ->
1384 (* Merge the entries *)
1385 let merged = List.fold_left (fun acc (k, v) ->
1386 match List.assoc_opt k acc with
···1391 | None ->
1392 (k, v) :: acc
1393 ) entries1 entries2 in
1394- Toml_table (List.rev merged)
1395 | _, _ ->
1396 (* Can't merge non-table values with same key *)
1397 failwith "Conflicting keys in inline table"
···14481449(* Table management for the parser *)
1450type table_state = {
1451- mutable values : (string * toml_value) list;
1452 subtables : (string, table_state) Hashtbl.t;
1453 mutable is_array : bool;
1454 mutable is_inline : bool;
···1550 let subtable_values = Hashtbl.fold (fun k sub acc ->
1551 let v =
1552 if sub.is_array then
1553- Toml_array (List.map table_state_to_toml (get_array_elements sub))
1554 else
1555 table_state_to_toml sub
1556 in
1557 (k, v) :: acc
1558 ) state.subtables [] in
1559- Toml_table (List.rev state.values @ subtable_values)
15601561and get_array_elements state =
1562 List.rev state.array_elements
15631564(* Main parser function *)
1565-let parse_toml input =
1566- let lexer = make_lexer input in
1567 let parser = make_parser lexer in
1568 let root = create_table_state () in
1569 let current_table = ref root in
···1786 parse_document ();
1787 table_state_to_toml root
178800000000001789(* Convert TOML to tagged JSON for toml-test compatibility *)
1790let rec toml_to_tagged_json value =
1791 match value with
1792- | Toml_string s ->
1793 Printf.sprintf "{\"type\":\"string\",\"value\":%s}" (json_encode_string s)
1794- | Toml_int i ->
1795 Printf.sprintf "{\"type\":\"integer\",\"value\":\"%Ld\"}" i
1796- | Toml_float f ->
1797 let value_str =
1798 (* Normalize exponent format - lowercase e, keep + for positive exponents *)
1799 let format_exp s =
···1909 try_precision 1
1910 in
1911 Printf.sprintf "{\"type\":\"float\",\"value\":\"%s\"}" value_str
1912- | Toml_bool b ->
1913 Printf.sprintf "{\"type\":\"bool\",\"value\":\"%s\"}" (if b then "true" else "false")
1914- | Toml_datetime s ->
1915 validate_datetime_string s;
1916 Printf.sprintf "{\"type\":\"datetime\",\"value\":\"%s\"}" s
1917- | Toml_datetime_local s ->
1918 validate_datetime_string s;
1919 Printf.sprintf "{\"type\":\"datetime-local\",\"value\":\"%s\"}" s
1920- | Toml_date_local s ->
1921 validate_date_string s;
1922 Printf.sprintf "{\"type\":\"date-local\",\"value\":\"%s\"}" s
1923- | Toml_time_local s ->
1924 validate_time_string s;
1925 Printf.sprintf "{\"type\":\"time-local\",\"value\":\"%s\"}" s
1926- | Toml_array items ->
1927 let json_items = List.map toml_to_tagged_json items in
1928 Printf.sprintf "[%s]" (String.concat "," json_items)
1929- | Toml_table pairs ->
1930 let json_pairs = List.map (fun (k, v) ->
1931 Printf.sprintf "%s:%s" (json_encode_string k) (toml_to_tagged_json v)
1932 ) pairs in
···1951 Buffer.add_char buf '"';
1952 Buffer.contents buf
19531954-(* Main decode function *)
1955-let decode_string input =
1956- try
1957- let toml = parse_toml input in
1958- Ok toml
1959- with
1960- | Failure msg -> Error msg
1961- | e -> Error (Printexc.to_string e)
1962-1963(* Tagged JSON to TOML for encoder *)
1964let decode_tagged_json_string s =
1965 (* Simple JSON parser for tagged format *)
···2006 if !pos + 3 >= len then failwith "Invalid unicode escape";
2007 let hex = String.sub s !pos 4 in
2008 let cp = int_of_string ("0x" ^ hex) in
2009- Buffer.add_string buf (unicode_to_utf8 cp);
2010 pos := !pos + 4
2011 | c -> failwith (Printf.sprintf "Invalid escape: \\%c" c)
2012 end else begin
···2021 (* Convert a tagged JSON object to a TOML primitive if applicable *)
2022 let convert_tagged_value value =
2023 match value with
2024- | Toml_table [("type", Toml_string typ); ("value", Toml_string v)]
2025- | Toml_table [("value", Toml_string v); ("type", Toml_string typ)] ->
2026 (match typ with
2027- | "string" -> Toml_string v
2028- | "integer" -> Toml_int (Int64.of_string v)
2029 | "float" ->
2030 (match v with
2031- | "inf" -> Toml_float Float.infinity
2032- | "-inf" -> Toml_float Float.neg_infinity
2033- | "nan" -> Toml_float Float.nan
2034- | _ -> Toml_float (float_of_string v))
2035- | "bool" -> Toml_bool (v = "true")
2036- | "datetime" -> Toml_datetime v
2037- | "datetime-local" -> Toml_datetime_local v
2038- | "date-local" -> Toml_date_local v
2039- | "time-local" -> Toml_time_local v
2040 | _ -> failwith (Printf.sprintf "Unknown type: %s" typ))
2041 | _ -> value
2042 in
···2046 match peek () with
2047 | Some '{' -> parse_object ()
2048 | Some '[' -> parse_array ()
2049- | Some '"' -> Toml_string (parse_json_string ())
2050 | _ -> failwith "Expected value"
20512052 and parse_object () =
···2054 skip_ws ();
2055 if peek () = Some '}' then begin
2056 incr pos;
2057- Toml_table []
2058 end else begin
2059 let pairs = ref [] in
2060 let first = ref true in
···2068 pairs := (key, convert_tagged_value value) :: !pairs
2069 done;
2070 expect '}';
2071- Toml_table (List.rev !pairs)
2072 end
20732074 and parse_array () =
···2076 skip_ws ();
2077 if peek () = Some ']' then begin
2078 incr pos;
2079- Toml_array []
2080 end else begin
2081 let items = ref [] in
2082 let first = ref true in
···2086 items := convert_tagged_value (parse_value ()) :: !items
2087 done;
2088 expect ']';
2089- Toml_array (List.rev !items)
2090 end
2091 in
20922093 parse_value ()
20942095-(* Encode TOML value to TOML string *)
2096-let rec encode_toml_value ?(inline=false) value =
2097- match value with
2098- | Toml_string s -> encode_toml_string s
2099- | Toml_int i -> Int64.to_string i
2100- | Toml_float f ->
2101- if Float.is_nan f then "nan"
2102- else if f = Float.infinity then "inf"
2103- else if f = Float.neg_infinity then "-inf"
2104- else
2105- let s = Printf.sprintf "%.17g" f in
2106- (* Ensure it looks like a float *)
2107- if String.contains s '.' || String.contains s 'e' || String.contains s 'E' then s
2108- else s ^ ".0"
2109- | Toml_bool b -> if b then "true" else "false"
2110- | Toml_datetime s -> s
2111- | Toml_datetime_local s -> s
2112- | Toml_date_local s -> s
2113- | Toml_time_local s -> s
2114- | Toml_array items ->
2115- let encoded = List.map (encode_toml_value ~inline:true) items in
2116- Printf.sprintf "[%s]" (String.concat ", " encoded)
2117- | Toml_table pairs when inline ->
2118- let encoded = List.map (fun (k, v) ->
2119- Printf.sprintf "%s = %s" (encode_toml_key k) (encode_toml_value ~inline:true v)
2120- ) pairs in
2121- Printf.sprintf "{%s}" (String.concat ", " encoded)
2122- | Toml_table _ -> failwith "Cannot encode table inline without inline flag"
21232124-and encode_toml_string s =
2125 (* Check if we need to escape *)
2126 let needs_escape = String.exists (fun c ->
2127 let code = Char.code c in
···2129 code < 0x20 || code = 0x7F
2130 ) s in
2131 if needs_escape then begin
2132- let buf = Buffer.create (String.length s + 2) in
2133- Buffer.add_char buf '"';
2134 String.iter (fun c ->
2135 match c with
2136- | '"' -> Buffer.add_string buf "\\\""
2137- | '\\' -> Buffer.add_string buf "\\\\"
2138- | '\n' -> Buffer.add_string buf "\\n"
2139- | '\r' -> Buffer.add_string buf "\\r"
2140- | '\t' -> Buffer.add_string buf "\\t"
2141- | '\b' -> Buffer.add_string buf "\\b"
2142- | c when Char.code c = 0x0C -> Buffer.add_string buf "\\f"
2143 | c when Char.code c < 0x20 || Char.code c = 0x7F ->
2144- Buffer.add_string buf (Printf.sprintf "\\u%04X" (Char.code c))
2145- | c -> Buffer.add_char buf c
0002146 ) s;
2147- Buffer.add_char buf '"';
2148- Buffer.contents buf
2149- end else
2150- Printf.sprintf "\"%s\"" s
0021512152-and encode_toml_key k =
2153 (* Check if it can be a bare key *)
2154 let is_bare = String.length k > 0 && String.for_all is_bare_key_char k in
2155- if is_bare then k else encode_toml_string k
021562157-(* Streaming TOML encoder - writes directly to a buffer *)
2158-let encode_toml_to_buffer buf value =
000000000000000000000000000000000000002159 let has_content = ref false in
216000000002161 let rec encode_at_path path value =
2162 match value with
2163- | Toml_table pairs ->
2164 (* Separate simple values from nested tables *)
2165 (* Only PURE table arrays (all items are tables) use [[array]] syntax.
2166 Mixed arrays (primitives + tables) must be encoded inline. *)
2167 let is_pure_table_array items =
2168- items <> [] && List.for_all (function Toml_table _ -> true | _ -> false) items
2169 in
2170 let simple, nested = List.partition (fun (_, v) ->
2171 match v with
2172- | Toml_table _ -> false
2173- | Toml_array items -> not (is_pure_table_array items)
2174 | _ -> true
2175 ) pairs in
21762177 (* Emit simple values first *)
2178 List.iter (fun (k, v) ->
2179- Buffer.add_string buf (encode_toml_key k);
2180- Buffer.add_string buf " = ";
2181- Buffer.add_string buf (encode_toml_value ~inline:true v);
2182- Buffer.add_char buf '\n';
2183 has_content := true
2184 ) simple;
2185···2187 List.iter (fun (k, v) ->
2188 let new_path = path @ [k] in
2189 match v with
2190- | Toml_table _ ->
2191- if !has_content then Buffer.add_char buf '\n';
2192- Buffer.add_char buf '[';
2193- Buffer.add_string buf (String.concat "." (List.map encode_toml_key new_path));
2194- Buffer.add_string buf "]\n";
2195 has_content := true;
2196 encode_at_path new_path v
2197- | Toml_array items when items <> [] && List.for_all (function Toml_table _ -> true | _ -> false) items ->
2198 (* Pure table array - use [[array]] syntax *)
2199 List.iter (fun item ->
2200 match item with
2201- | Toml_table _ ->
2202- if !has_content then Buffer.add_char buf '\n';
2203- Buffer.add_string buf "[[";
2204- Buffer.add_string buf (String.concat "." (List.map encode_toml_key new_path));
2205- Buffer.add_string buf "]]\n";
2206 has_content := true;
2207 encode_at_path new_path item
2208 | _ -> assert false (* Impossible - we checked for_all above *)
2209 ) items
2210 | _ ->
2211- Buffer.add_string buf (encode_toml_key k);
2212- Buffer.add_string buf " = ";
2213- Buffer.add_string buf (encode_toml_value ~inline:true v);
2214- Buffer.add_char buf '\n';
2215 has_content := true
2216 ) nested
2217 | _ ->
···22202221 encode_at_path [] value
22222223-(* Full TOML encoder with proper table handling *)
2224-let encode_toml value =
000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002225 let buf = Buffer.create 256 in
2226- encode_toml_to_buffer buf value;
2227 Buffer.contents buf
22282229-(* Streaming encoder that writes directly to a Bytes.Writer *)
2230-let encode_to_writer w value =
2231- let buf = Buffer.create 4096 in
2232- encode_toml_to_buffer buf value;
2233- Bytes.Writer.write_string w (Buffer.contents buf)
22342235-(* Bytesrw interface *)
0022362237-let decode ?file:_ r =
2238- let contents = Bytes.Reader.to_string r in
2239- match decode_string contents with
2240- | Ok toml -> Ok toml
2241- | Error msg -> Error msg
0022422243-let decode_to_tagged_json ?file:_ r =
2244- let contents = Bytes.Reader.to_string r in
2245- match decode_string contents with
2246- | Ok toml -> Ok (toml_to_tagged_json toml)
2247- | Error msg -> Error msg
2248-2249-let encode_from_tagged_json json_str =
2250 try
2251- let toml = decode_tagged_json_string json_str in
2252- Ok (encode_toml toml)
2253 with
2254- | Failure msg -> Error msg
2255- | e -> Error (Printexc.to_string e)
0000000000000000000000000000000000000000000000000000000000000000000000000000000022562257-(* Re-export the error module *)
0000000000000000000000002258module Error = Tomlt_error
00000000000000000
···78(* TOML value representation *)
910+type t =
11+ | String of string
12+ | Int of int64
13+ | Float of float
14+ | Bool of bool
15+ | Datetime of string (* Offset datetime *)
16+ | Datetime_local of string (* Local datetime *)
17+ | Date_local of string (* Local date *)
18+ | Time_local of string (* Local time *)
19+ | Array of t list
20+ | Table of (string * t) list
2122+(* Lexer - works directly on bytes buffer filled from Bytes.Reader *)
2324type token =
25 | Tok_lbracket
···44 | Tok_time_local of string
4546type lexer = {
47+ input : bytes; (* Buffer containing input data *)
48+ input_len : int; (* Length of valid data in input *)
49 mutable pos : int;
50 mutable line : int;
51 mutable col : int;
52 file : string;
53}
5455+(* Create lexer from string (copies to bytes) *)
56+let make_lexer ?(file = "-") s =
57+ let input = Bytes.of_string s in
58+ { input; input_len = Bytes.length input; pos = 0; line = 1; col = 1; file }
59+60+(* Create lexer directly from Bytes.Reader - reads all data into buffer *)
61+let make_lexer_from_reader ?(file = "-") r =
62+ (* Read all slices into a buffer *)
63+ let buf = Buffer.create 4096 in
64+ let rec read_all () =
65+ let slice = Bytes.Reader.read r in
66+ if Bytes.Slice.is_eod slice then ()
67+ else begin
68+ Bytes.Slice.add_to_buffer buf slice;
69+ read_all ()
70+ end
71+ in
72+ read_all ();
73+ let input = Buffer.to_bytes buf in
74+ { input; input_len = Bytes.length input; pos = 0; line = 1; col = 1; file }
7576+let is_eof l = l.pos >= l.input_len
7778+let peek l = if is_eof l then None else Some (Bytes.get l.input l.pos)
7980let peek2 l =
81+ if l.pos + 1 >= l.input_len then None
82+ else Some (Bytes.get l.input (l.pos + 1))
8384let peek_n l n =
85+ if l.pos + n - 1 >= l.input_len then None
86+ else Some (Bytes.sub_string l.input l.pos n)
8788let advance l =
89 if not (is_eof l) then begin
90+ if Bytes.get l.input l.pos = '\n' then begin
91 l.line <- l.line + 1;
92 l.col <- 1
93 end else
···99 for _ = 1 to n do advance l done
100101let skip_whitespace l =
102+ while not (is_eof l) && (Bytes.get l.input l.pos = ' ' || Bytes.get l.input l.pos = '\t') do
103 advance l
104 done
105106+(* Helper functions for bytes access *)
107+let[@inline] get_char l pos = Bytes.unsafe_get l.input pos
108+let[@inline] get_current l = Bytes.unsafe_get l.input l.pos
109+let sub_string l pos len = Bytes.sub_string l.input pos len
110+111+(* Helper to create error location from lexer state *)
112+let lexer_loc l = Tomlt_error.loc ~file:l.file ~line:l.line ~column:l.col ()
113+114(* Get expected byte length of UTF-8 char from first byte *)
115let utf8_byte_length_from_first_byte c =
116 let code = Char.code c in
···121 else if code < 0xF8 then 4
122 else 0 (* Invalid: 5+ byte sequence *)
123124+(* Validate UTF-8 at position in lexer's bytes buffer, returns byte length *)
125+let validate_utf8_at_pos_bytes l =
126+ if l.pos >= l.input_len then
127+ Tomlt_error.raise_lexer ~location:(lexer_loc l) Unexpected_eof;
128+ let byte_len = utf8_byte_length_from_first_byte (Bytes.unsafe_get l.input l.pos) in
129 if byte_len = 0 then
130+ Tomlt_error.raise_lexer ~location:(lexer_loc l) Invalid_utf8;
131+ if l.pos + byte_len > l.input_len then
132+ Tomlt_error.raise_lexer ~location:(lexer_loc l) Incomplete_utf8;
133 (* Validate using uutf - it checks overlong encodings, surrogates, etc. *)
134+ let sub = Bytes.sub_string l.input l.pos byte_len in
135 let valid = ref false in
136 Uutf.String.fold_utf_8 (fun () _ -> function
137 | `Uchar _ -> valid := true
138 | `Malformed _ -> ()
139 ) () sub;
140 if not !valid then
141+ Tomlt_error.raise_lexer ~location:(lexer_loc l) Invalid_utf8;
142 byte_len
143144(* UTF-8 validation - validates and advances over a single UTF-8 character *)
145let validate_utf8_char l =
146+ let byte_len = validate_utf8_at_pos_bytes l in
147 for _ = 1 to byte_len do advance l done
148149let skip_comment l =
150+ if not (is_eof l) && get_current l = '#' then begin
151 (* Validate comment characters *)
152 advance l;
153 let continue = ref true in
154+ while !continue && not (is_eof l) && get_current l <> '\n' do
155+ let c = get_current l in
156 let code = Char.code c in
157 (* CR is only valid if followed by LF (CRLF at end of comment) *)
158 if c = '\r' then begin
159 (* Check if this CR is followed by LF - if so, it ends the comment *)
160+ if l.pos + 1 < l.input_len && get_char l (l.pos + 1) = '\n' then
161 (* This is CRLF - stop the loop, let the main lexer handle it *)
162 continue := false
163 else
164+ Tomlt_error.raise_lexer ~location:(lexer_loc l) Bare_carriage_return
165 end else if code >= 0x80 then begin
166 (* Multi-byte UTF-8 character - validate it *)
167 validate_utf8_char l
168 end else begin
169 (* ASCII control characters other than tab are not allowed in comments *)
170 if code < 0x09 || (code > 0x09 && code < 0x20) || code = 0x7F then
171+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Control_character code);
172 advance l
173 end
174 done
···177let skip_ws_and_comments l =
178 let rec loop () =
179 skip_whitespace l;
180+ if not (is_eof l) && get_current l = '#' then begin
181 skip_comment l;
182 loop ()
183 end
···197 if c >= '0' && c <= '9' then Char.code c - Char.code '0'
198 else if c >= 'a' && c <= 'f' then Char.code c - Char.code 'a' + 10
199 else if c >= 'A' && c <= 'F' then Char.code c - Char.code 'A' + 10
200+ else Tomlt_error.raise_number Invalid_hex_digit
201202+(* Convert Unicode codepoint to UTF-8 using uutf *)
203+let codepoint_to_utf8 codepoint =
204 if codepoint < 0 || codepoint > 0x10FFFF then
205 failwith (Printf.sprintf "Invalid Unicode codepoint: U+%X" codepoint);
206 if codepoint >= 0xD800 && codepoint <= 0xDFFF then
207+ failwith (Printf.sprintf "Surrogate codepoint not allowed: U+%04X" codepoint);
208+ let buf = Buffer.create 4 in
209+ Uutf.Buffer.add_utf_8 buf (Uchar.of_int codepoint);
210+ Buffer.contents buf
211+212+(* Parse Unicode escape with error location from lexer *)
213+let unicode_to_utf8 l codepoint =
214+ if codepoint < 0 || codepoint > 0x10FFFF then
215+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_codepoint codepoint);
216+ if codepoint >= 0xD800 && codepoint <= 0xDFFF then
217+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Surrogate_codepoint codepoint);
218 let buf = Buffer.create 4 in
219 Uutf.Buffer.add_utf_8 buf (Uchar.of_int codepoint);
220 Buffer.contents buf
221222let parse_escape l =
223 advance l; (* skip backslash *)
224+ if is_eof l then
225+ Tomlt_error.raise_lexer ~location:(lexer_loc l) Unexpected_eof;
226+ let c = get_current l in
227 advance l;
228 match c with
229 | 'b' -> "\b"
···236 | '\\' -> "\\"
237 | 'x' ->
238 (* \xHH - 2 hex digits *)
239+ if l.pos + 1 >= l.input_len then
240+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\x");
241+ let c1 = get_char l l.pos in
242+ let c2 = get_char l (l.pos + 1) in
243 if not (is_hex_digit c1 && is_hex_digit c2) then
244+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\x");
245 let cp = (hex_value c1 * 16) + hex_value c2 in
246 advance l; advance l;
247+ unicode_to_utf8 l cp
248 | 'u' ->
249 (* \uHHHH - 4 hex digits *)
250+ if l.pos + 3 >= l.input_len then
251+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\u");
252+ let s = sub_string l l.pos 4 in
253 for i = 0 to 3 do
254 if not (is_hex_digit s.[i]) then
255+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\u")
256 done;
257 let cp = int_of_string ("0x" ^ s) in
258 advance_n l 4;
259+ unicode_to_utf8 l cp
260 | 'U' ->
261 (* \UHHHHHHHH - 8 hex digits *)
262+ if l.pos + 7 >= l.input_len then
263+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\U");
264+ let s = sub_string l l.pos 8 in
265 for i = 0 to 7 do
266 if not (is_hex_digit s.[i]) then
267+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\U")
268 done;
269 let cp = int_of_string ("0x" ^ s) in
270 advance_n l 8;
271+ unicode_to_utf8 l cp
272+ | _ ->
273+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_escape c)
274275let validate_string_char l c is_multiline =
276 let code = Char.code c in
277 (* Control characters other than tab (and LF/CR for multiline) are not allowed *)
278 if code < 0x09 then
279+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Control_character code);
280 if code > 0x09 && code < 0x20 && not (is_multiline && (code = 0x0A || code = 0x0D)) then
281+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Control_character code);
282 if code = 0x7F then
283+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Control_character code)
284285(* Validate UTF-8 in string context and add bytes to buffer *)
286let validate_and_add_utf8_to_buffer l buf =
287+ let byte_len = validate_utf8_at_pos_bytes l in
288+ Buffer.add_string buf (sub_string l l.pos byte_len);
289 for _ = 1 to byte_len do advance l done
290291let parse_basic_string l =
···309 let rec loop () =
310 if is_eof l then
311 failwith "Unterminated string";
312+ let c = get_current l in
313 if multiline then begin
314 if c = '"' then begin
315 (* Count consecutive quotes *)
316 let quote_count = ref 0 in
317 let p = ref l.pos in
318+ while !p < l.input_len && get_char l !p = '"' do
319 incr quote_count;
320 incr p
321 done;
···454 let rec loop () =
455 if is_eof l then
456 failwith "Unterminated literal string";
457+ let c = get_current l in
458 if multiline then begin
459 if c = '\'' then begin
460 (* Count consecutive quotes *)
461 let quote_count = ref 0 in
462 let p = ref l.pos in
463+ while !p < l.input_len && get_char l !p = '\'' do
464 incr quote_count;
465 incr p
466 done;
···541 match peek_n l 3 with
542 | Some "inf" ->
543 advance_n l 3;
544+ let s = sub_string l start (l.pos - start) in
545 Tok_float ((if neg then Float.neg_infinity else Float.infinity), s)
546 | Some "nan" ->
547 advance_n l 3;
548+ let s = sub_string l start (l.pos - start) in
549 Tok_float (Float.nan, s)
550 | _ ->
551 (* Check for hex, octal, or binary *)
···569 if first then failwith "Expected hex digit after 0x"
570 in
571 read_hex true;
572+ let s = sub_string l num_start (l.pos - num_start) in
573 let s = String.concat "" (String.split_on_char '_' s) in
574+ let orig = sub_string l start (l.pos - start) in
575 Tok_integer (Int64.of_string ("0x" ^ s), orig)
576 | Some '0', Some 'o' when not neg ->
577 advance l; advance l;
···592 if first then failwith "Expected octal digit after 0o"
593 in
594 read_oct true;
595+ let s = sub_string l num_start (l.pos - num_start) in
596 let s = String.concat "" (String.split_on_char '_' s) in
597+ let orig = sub_string l start (l.pos - start) in
598 Tok_integer (Int64.of_string ("0o" ^ s), orig)
599 | Some '0', Some 'b' when not neg ->
600 advance l; advance l;
···615 if first then failwith "Expected binary digit after 0b"
616 in
617 read_bin true;
618+ let s = sub_string l num_start (l.pos - num_start) in
619 let s = String.concat "" (String.split_on_char '_' s) in
620+ let orig = sub_string l start (l.pos - start) in
621 Tok_integer (Int64.of_string ("0b" ^ s), orig)
622 | _ ->
623 (* Regular decimal number *)
···669 | _ -> ());
670 read_int true
671 | _ -> ());
672+ let s = sub_string l start (l.pos - start) in
673 let s' = String.concat "" (String.split_on_char '_' s) in
674 if !is_float then
675 Tok_float (float_of_string s', s)
···681 (* YYYY-MM-DD or HH:MM - need to ensure it's not a bare key that starts with numbers *)
682 let check_datetime () =
683 let pos = l.pos in
684+ let len = l.input_len in
685 (* Check for YYYY-MM-DD pattern - must have exactly this structure *)
686 if pos + 10 <= len then begin
687+ let c0 = get_char l pos in
688+ let c1 = get_char l (pos + 1) in
689+ let c2 = get_char l (pos + 2) in
690+ let c3 = get_char l (pos + 3) in
691+ let c4 = get_char l (pos + 4) in
692+ let c5 = get_char l (pos + 5) in
693+ let c6 = get_char l (pos + 6) in
694+ let c7 = get_char l (pos + 7) in
695+ let c8 = get_char l (pos + 8) in
696+ let c9 = get_char l (pos + 9) in
697 (* Must match YYYY-MM-DD pattern AND not be followed by bare key chars (except T or space for time) *)
698 if is_digit c0 && is_digit c1 && is_digit c2 && is_digit c3 && c4 = '-' &&
699 is_digit c5 && is_digit c6 && c7 = '-' && is_digit c8 && is_digit c9 then begin
700 (* Check what follows - if it's a bare key char other than T/t/space, it's not a date *)
701 if pos + 10 < len then begin
702+ let next = get_char l (pos + 10) in
703 if next = 'T' || next = 't' then
704 `Date (* Datetime continues with time part *)
705 else if next = ' ' || next = '\t' then begin
706 (* Check if followed by = (key context) or time part *)
707 let rec skip_ws p =
708 if p >= len then p
709+ else match get_char l p with
710 | ' ' | '\t' -> skip_ws (p + 1)
711 | _ -> p
712 in
713 let after_ws = skip_ws (pos + 11) in
714+ if after_ws < len && get_char l after_ws = '=' then
715 `Other (* It's a key followed by = *)
716+ else if after_ws < len && is_digit (get_char l after_ws) then
717 `Date (* Could be "2001-02-03 12:34:56" format *)
718 else
719 `Date
···732 else
733 `Other
734 end else if pos + 5 <= len then begin
735+ let c0 = get_char l pos in
736+ let c1 = get_char l (pos + 1) in
737+ let c2 = get_char l (pos + 2) in
738+ let c3 = get_char l (pos + 3) in
739+ let c4 = get_char l (pos + 4) in
740 if is_digit c0 && is_digit c1 && c2 = ':' && is_digit c3 && is_digit c4 then
741 `Time
742 else
···959 skip_ws_and_comments l;
960 if is_eof l then Tok_eof
961 else begin
962+ let c = get_current l in
963 match c with
964 | '[' -> advance l; Tok_lbracket
965 | ']' -> advance l; Tok_rbracket
···992 (* A key like -01 should be followed by whitespace then =, not by . or e (number syntax) *)
993 let is_key_context =
994 let rec scan_ahead p =
995+ if p >= l.input_len then false
996 else
997+ let c = get_char l p in
998 if is_digit c || c = '_' then scan_ahead (p + 1)
999 else if c = ' ' || c = '\t' then
1000 (* Skip whitespace and check for = *)
1001 let rec skip_ws pp =
1002+ if pp >= l.input_len then false
1003+ else match get_char l pp with
1004 | ' ' | '\t' -> skip_ws (pp + 1)
1005 | '=' -> true
1006 | _ -> false
···1009 else if c = '=' then true
1010 else if c = '.' then
1011 (* Check if . is followed by digit (number) vs letter/underscore (dotted key) *)
1012+ if p + 1 < l.input_len then
1013+ let next = get_char l (p + 1) in
1014 if is_digit next then false (* It's a decimal number like -3.14 *)
1015 else if is_bare_key_char next then true (* Dotted key *)
1016 else false
···1025 in
1026 if is_key_context then begin
1027 (* Treat as bare key *)
1028+ while not (is_eof l) && is_bare_key_char (get_current l) do
1029 advance l
1030 done;
1031+ Tok_bare_key (sub_string l start (l.pos - start))
1032 end else
1033 parse_number l
1034 | Some 'i' ->
1035 (* Check for inf *)
1036+ if l.pos + 3 < l.input_len &&
1037+ get_char l (l.pos + 1) = 'i' && get_char l (l.pos + 2) = 'n' && get_char l (l.pos + 3) = 'f' then begin
1038 advance_n l 4;
1039+ let s = sub_string l start (l.pos - start) in
1040 if sign = '-' then Tok_float (Float.neg_infinity, s)
1041 else Tok_float (Float.infinity, s)
1042 end else if sign = '-' then begin
1043 (* Could be bare key like -inf-key *)
1044+ while not (is_eof l) && is_bare_key_char (get_current l) do
1045 advance l
1046 done;
1047+ Tok_bare_key (sub_string l start (l.pos - start))
1048 end else
1049 failwith (Printf.sprintf "Unexpected character after %c" sign)
1050 | Some 'n' ->
1051 (* Check for nan *)
1052+ if l.pos + 3 < l.input_len &&
1053+ get_char l (l.pos + 1) = 'n' && get_char l (l.pos + 2) = 'a' && get_char l (l.pos + 3) = 'n' then begin
1054 advance_n l 4;
1055+ let s = sub_string l start (l.pos - start) in
1056 Tok_float (Float.nan, s) (* Sign on NaN doesn't change the value *)
1057 end else if sign = '-' then begin
1058 (* Could be bare key like -name *)
1059+ while not (is_eof l) && is_bare_key_char (get_current l) do
1060 advance l
1061 done;
1062+ Tok_bare_key (sub_string l start (l.pos - start))
1063 end else
1064 failwith (Printf.sprintf "Unexpected character after %c" sign)
1065 | _ when sign = '-' ->
1066 (* Bare key starting with - like -key or --- *)
1067+ while not (is_eof l) && is_bare_key_char (get_current l) do
1068 advance l
1069 done;
1070+ Tok_bare_key (sub_string l start (l.pos - start))
1071 | _ -> failwith (Printf.sprintf "Unexpected character after %c" sign))
1072 | c when is_digit c ->
1073 (* Could be number, datetime, or bare key starting with digits *)
···1078 (* Check for hex/octal/binary prefix first - these are always numbers *)
1079 let start = l.pos in
1080 let is_prefixed_number =
1081+ start + 1 < l.input_len && get_char l start = '0' &&
1082+ (let c1 = get_char l (start + 1) in
1083 c1 = 'x' || c1 = 'X' || c1 = 'o' || c1 = 'O' || c1 = 'b' || c1 = 'B')
1084 in
1085 if is_prefixed_number then
···1089 - Contains letters (like "123abc")
1090 - Has leading zeros (like "0123") which would be invalid as a number *)
1091 let has_leading_zero =
1092+ get_char l start = '0' && start + 1 < l.input_len &&
1093+ let c1 = get_char l (start + 1) in
1094 is_digit c1
1095 in
1096 (* Scan to see if this is a bare key or a number
1097 - If it looks like scientific notation (digits + e/E + optional sign + digits), it's a number
1098 - If it contains letters OR dashes between digits, it's a bare key *)
1099 let rec scan_for_bare_key pos has_dash_between_digits =
1100+ if pos >= l.input_len then has_dash_between_digits
1101 else
1102+ let c = get_char l pos in
1103 if is_digit c || c = '_' then scan_for_bare_key (pos + 1) has_dash_between_digits
1104 else if c = '.' then scan_for_bare_key (pos + 1) has_dash_between_digits
1105 else if c = '-' then
1106 (* Dash in key - check what follows *)
1107 let next_pos = pos + 1 in
1108+ if next_pos < l.input_len then
1109+ let next = get_char l next_pos in
1110 if is_digit next then
1111 scan_for_bare_key (next_pos) true (* Dash between digits - bare key *)
1112 else if is_bare_key_char next then
···1118 else if c = 'e' || c = 'E' then
1119 (* Check if this looks like scientific notation *)
1120 let next_pos = pos + 1 in
1121+ if next_pos >= l.input_len then true (* Just 'e' at end, bare key *)
1122 else
1123+ let next = get_char l next_pos in
1124 if next = '+' || next = '-' then
1125 (* Has exponent sign - check if followed by digit *)
1126 let after_sign = next_pos + 1 in
1127+ if after_sign < l.input_len && is_digit (get_char l after_sign) then
1128 has_dash_between_digits (* Scientific notation, but might have dash earlier *)
1129 else
1130 true (* e.g., "3e-abc" - bare key *)
···1139 in
1140 if has_leading_zero || scan_for_bare_key start false then begin
1141 (* It's a bare key *)
1142+ while not (is_eof l) && is_bare_key_char (get_current l) do
1143 advance l
1144 done;
1145+ Tok_bare_key (sub_string l start (l.pos - start))
1146 end else
1147 (* It's a number - use parse_number *)
1148 parse_number l
···1151 (* These could be keywords (true, false, inf, nan) or bare keys
1152 Always read as bare key and let parser interpret *)
1153 let start = l.pos in
1154+ while not (is_eof l) && is_bare_key_char (get_current l) do
1155 advance l
1156 done;
1157+ Tok_bare_key (sub_string l start (l.pos - start))
1158 | c when is_bare_key_char c ->
1159 let start = l.pos in
1160+ while not (is_eof l) && is_bare_key_char (get_current l) do
1161 advance l
1162 done;
1163+ Tok_bare_key (sub_string l start (l.pos - start))
1164 | c ->
1165 let code = Char.code c in
1166 if code < 0x20 || code = 0x7F then
···11941195(* Check if next raw character (without skipping whitespace) matches *)
1196let next_raw_char_is p c =
1197+ p.lexer.pos < p.lexer.input_len && get_char p.lexer p.lexer.pos = c
11981199let expect_token p expected =
1200 let tok = consume_token p in
···12621263let rec parse_value p =
1264 match peek_token p with
1265+ | Tok_basic_string s -> ignore (consume_token p); String s
1266+ | Tok_literal_string s -> ignore (consume_token p); String s
1267+ | Tok_ml_basic_string s -> ignore (consume_token p); String s
1268+ | Tok_ml_literal_string s -> ignore (consume_token p); String s
1269+ | Tok_integer (i, _) -> ignore (consume_token p); Int i
1270+ | Tok_float (f, _) -> ignore (consume_token p); Float f
1271+ | Tok_datetime s -> ignore (consume_token p); Datetime s
1272+ | Tok_datetime_local s -> ignore (consume_token p); Datetime_local s
1273+ | Tok_date_local s -> ignore (consume_token p); Date_local s
1274+ | Tok_time_local s -> ignore (consume_token p); Time_local s
1275 | Tok_lbracket -> parse_array p
1276 | Tok_lbrace -> parse_inline_table p
1277 | Tok_bare_key s ->
1278 (* Interpret bare keys as boolean, float keywords, or numbers in value context *)
1279 ignore (consume_token p);
1280 (match s with
1281+ | "true" -> Bool true
1282+ | "false" -> Bool false
1283+ | "inf" -> Float Float.infinity
1284+ | "nan" -> Float Float.nan
1285 | _ ->
1286 (* Validate underscore placement in the original string *)
1287 let validate_underscores str =
···1325 if String.contains s_no_underscore '.' ||
1326 String.contains s_no_underscore 'e' ||
1327 String.contains s_no_underscore 'E' then
1328+ Float (float_of_string s_no_underscore)
1329 else
1330+ Int (Int64.of_string s_no_underscore)
1331 with _ ->
1332 failwith (Printf.sprintf "Unexpected bare key '%s' as value" s)
1333 end else
···1343 match peek_token p with
1344 | Tok_rbracket ->
1345 ignore (consume_token p);
1346+ Array (List.rev acc)
1347 | _ ->
1348 let v = parse_value p in
1349 skip_newlines p;
···1354 loop (v :: acc)
1355 | Tok_rbracket ->
1356 ignore (consume_token p);
1357+ Array (List.rev (v :: acc))
1358 | _ -> failwith "Expected ',' or ']' in array"
1359 in
1360 loop []
···1368 match peek_token p with
1369 | Tok_rbrace ->
1370 ignore (consume_token p);
1371+ Table (List.rev acc)
1372 | _ ->
1373 let keys = parse_dotted_key p in
1374 skip_ws p;
···1400 loop acc
1401 | Tok_rbrace ->
1402 ignore (consume_token p);
1403+ Table (List.rev acc)
1404 | _ -> failwith "Expected ',' or '}' in inline table"
1405 in
1406 loop []
···1414 | [] -> failwith "Empty key"
1415 | [k] -> (k, value)
1416 | k :: rest ->
1417+ (k, Table [build_nested_table rest value])
14181419(* Merge two TOML values - used for combining dotted keys in inline tables *)
1420and merge_toml_values v1 v2 =
1421 match v1, v2 with
1422+ | Table entries1, Table entries2 ->
1423 (* Merge the entries *)
1424 let merged = List.fold_left (fun acc (k, v) ->
1425 match List.assoc_opt k acc with
···1430 | None ->
1431 (k, v) :: acc
1432 ) entries1 entries2 in
1433+ Table (List.rev merged)
1434 | _, _ ->
1435 (* Can't merge non-table values with same key *)
1436 failwith "Conflicting keys in inline table"
···14871488(* Table management for the parser *)
1489type table_state = {
1490+ mutable values : (string * t) list;
1491 subtables : (string, table_state) Hashtbl.t;
1492 mutable is_array : bool;
1493 mutable is_inline : bool;
···1589 let subtable_values = Hashtbl.fold (fun k sub acc ->
1590 let v =
1591 if sub.is_array then
1592+ Array (List.map table_state_to_toml (get_array_elements sub))
1593 else
1594 table_state_to_toml sub
1595 in
1596 (k, v) :: acc
1597 ) state.subtables [] in
1598+ Table (List.rev state.values @ subtable_values)
15991600and get_array_elements state =
1601 List.rev state.array_elements
16021603(* Main parser function *)
1604+let parse_toml_from_lexer lexer =
01605 let parser = make_parser lexer in
1606 let root = create_table_state () in
1607 let current_table = ref root in
···1824 parse_document ();
1825 table_state_to_toml root
18261827+(* Parse TOML from string - creates lexer internally *)
1828+let parse_toml input =
1829+ let lexer = make_lexer input in
1830+ parse_toml_from_lexer lexer
1831+1832+(* Parse TOML directly from Bytes.Reader - no intermediate string *)
1833+let parse_toml_from_reader ?file r =
1834+ let lexer = make_lexer_from_reader ?file r in
1835+ parse_toml_from_lexer lexer
1836+1837(* Convert TOML to tagged JSON for toml-test compatibility *)
1838let rec toml_to_tagged_json value =
1839 match value with
1840+ | String s ->
1841 Printf.sprintf "{\"type\":\"string\",\"value\":%s}" (json_encode_string s)
1842+ | Int i ->
1843 Printf.sprintf "{\"type\":\"integer\",\"value\":\"%Ld\"}" i
1844+ | Float f ->
1845 let value_str =
1846 (* Normalize exponent format - lowercase e, keep + for positive exponents *)
1847 let format_exp s =
···1957 try_precision 1
1958 in
1959 Printf.sprintf "{\"type\":\"float\",\"value\":\"%s\"}" value_str
1960+ | Bool b ->
1961 Printf.sprintf "{\"type\":\"bool\",\"value\":\"%s\"}" (if b then "true" else "false")
1962+ | Datetime s ->
1963 validate_datetime_string s;
1964 Printf.sprintf "{\"type\":\"datetime\",\"value\":\"%s\"}" s
1965+ | Datetime_local s ->
1966 validate_datetime_string s;
1967 Printf.sprintf "{\"type\":\"datetime-local\",\"value\":\"%s\"}" s
1968+ | Date_local s ->
1969 validate_date_string s;
1970 Printf.sprintf "{\"type\":\"date-local\",\"value\":\"%s\"}" s
1971+ | Time_local s ->
1972 validate_time_string s;
1973 Printf.sprintf "{\"type\":\"time-local\",\"value\":\"%s\"}" s
1974+ | Array items ->
1975 let json_items = List.map toml_to_tagged_json items in
1976 Printf.sprintf "[%s]" (String.concat "," json_items)
1977+ | Table pairs ->
1978 let json_pairs = List.map (fun (k, v) ->
1979 Printf.sprintf "%s:%s" (json_encode_string k) (toml_to_tagged_json v)
1980 ) pairs in
···1999 Buffer.add_char buf '"';
2000 Buffer.contents buf
20010000000002002(* Tagged JSON to TOML for encoder *)
2003let decode_tagged_json_string s =
2004 (* Simple JSON parser for tagged format *)
···2045 if !pos + 3 >= len then failwith "Invalid unicode escape";
2046 let hex = String.sub s !pos 4 in
2047 let cp = int_of_string ("0x" ^ hex) in
2048+ Buffer.add_string buf (codepoint_to_utf8 cp);
2049 pos := !pos + 4
2050 | c -> failwith (Printf.sprintf "Invalid escape: \\%c" c)
2051 end else begin
···2060 (* Convert a tagged JSON object to a TOML primitive if applicable *)
2061 let convert_tagged_value value =
2062 match value with
2063+ | Table [("type", String typ); ("value", String v)]
2064+ | Table [("value", String v); ("type", String typ)] ->
2065 (match typ with
2066+ | "string" -> String v
2067+ | "integer" -> Int (Int64.of_string v)
2068 | "float" ->
2069 (match v with
2070+ | "inf" -> Float Float.infinity
2071+ | "-inf" -> Float Float.neg_infinity
2072+ | "nan" -> Float Float.nan
2073+ | _ -> Float (float_of_string v))
2074+ | "bool" -> Bool (v = "true")
2075+ | "datetime" -> Datetime v
2076+ | "datetime-local" -> Datetime_local v
2077+ | "date-local" -> Date_local v
2078+ | "time-local" -> Time_local v
2079 | _ -> failwith (Printf.sprintf "Unknown type: %s" typ))
2080 | _ -> value
2081 in
···2085 match peek () with
2086 | Some '{' -> parse_object ()
2087 | Some '[' -> parse_array ()
2088+ | Some '"' -> String (parse_json_string ())
2089 | _ -> failwith "Expected value"
20902091 and parse_object () =
···2093 skip_ws ();
2094 if peek () = Some '}' then begin
2095 incr pos;
2096+ Table []
2097 end else begin
2098 let pairs = ref [] in
2099 let first = ref true in
···2107 pairs := (key, convert_tagged_value value) :: !pairs
2108 done;
2109 expect '}';
2110+ Table (List.rev !pairs)
2111 end
21122113 and parse_array () =
···2115 skip_ws ();
2116 if peek () = Some ']' then begin
2117 incr pos;
2118+ Array []
2119 end else begin
2120 let items = ref [] in
2121 let first = ref true in
···2125 items := convert_tagged_value (parse_value ()) :: !items
2126 done;
2127 expect ']';
2128+ Array (List.rev !items)
2129 end
2130 in
21312132 parse_value ()
21332134+(* Streaming TOML encoder - writes directly to a Bytes.Writer *)
00000000000000000000000000021352136+let rec write_toml_string w s =
2137 (* Check if we need to escape *)
2138 let needs_escape = String.exists (fun c ->
2139 let code = Char.code c in
···2141 code < 0x20 || code = 0x7F
2142 ) s in
2143 if needs_escape then begin
2144+ Bytes.Writer.write_string w "\"";
02145 String.iter (fun c ->
2146 match c with
2147+ | '"' -> Bytes.Writer.write_string w "\\\""
2148+ | '\\' -> Bytes.Writer.write_string w "\\\\"
2149+ | '\n' -> Bytes.Writer.write_string w "\\n"
2150+ | '\r' -> Bytes.Writer.write_string w "\\r"
2151+ | '\t' -> Bytes.Writer.write_string w "\\t"
2152+ | '\b' -> Bytes.Writer.write_string w "\\b"
2153+ | c when Char.code c = 0x0C -> Bytes.Writer.write_string w "\\f"
2154 | c when Char.code c < 0x20 || Char.code c = 0x7F ->
2155+ Bytes.Writer.write_string w (Printf.sprintf "\\u%04X" (Char.code c))
2156+ | c ->
2157+ let b = Bytes.create 1 in
2158+ Bytes.set b 0 c;
2159+ Bytes.Writer.write_bytes w b
2160 ) s;
2161+ Bytes.Writer.write_string w "\""
2162+ end else begin
2163+ Bytes.Writer.write_string w "\"";
2164+ Bytes.Writer.write_string w s;
2165+ Bytes.Writer.write_string w "\""
2166+ end
21672168+and write_toml_key w k =
2169 (* Check if it can be a bare key *)
2170 let is_bare = String.length k > 0 && String.for_all is_bare_key_char k in
2171+ if is_bare then Bytes.Writer.write_string w k
2172+ else write_toml_string w k
21732174+and write_toml_value w ?(inline=false) value =
2175+ match value with
2176+ | String s -> write_toml_string w s
2177+ | Int i -> Bytes.Writer.write_string w (Int64.to_string i)
2178+ | Float f ->
2179+ if Float.is_nan f then Bytes.Writer.write_string w "nan"
2180+ else if f = Float.infinity then Bytes.Writer.write_string w "inf"
2181+ else if f = Float.neg_infinity then Bytes.Writer.write_string w "-inf"
2182+ else begin
2183+ let s = Printf.sprintf "%.17g" f in
2184+ (* Ensure it looks like a float *)
2185+ let s = if String.contains s '.' || String.contains s 'e' || String.contains s 'E'
2186+ then s else s ^ ".0" in
2187+ Bytes.Writer.write_string w s
2188+ end
2189+ | Bool b -> Bytes.Writer.write_string w (if b then "true" else "false")
2190+ | Datetime s -> Bytes.Writer.write_string w s
2191+ | Datetime_local s -> Bytes.Writer.write_string w s
2192+ | Date_local s -> Bytes.Writer.write_string w s
2193+ | Time_local s -> Bytes.Writer.write_string w s
2194+ | Array items ->
2195+ Bytes.Writer.write_string w "[";
2196+ List.iteri (fun i item ->
2197+ if i > 0 then Bytes.Writer.write_string w ", ";
2198+ write_toml_value w ~inline:true item
2199+ ) items;
2200+ Bytes.Writer.write_string w "]"
2201+ | Table pairs when inline ->
2202+ Bytes.Writer.write_string w "{";
2203+ List.iteri (fun i (k, v) ->
2204+ if i > 0 then Bytes.Writer.write_string w ", ";
2205+ write_toml_key w k;
2206+ Bytes.Writer.write_string w " = ";
2207+ write_toml_value w ~inline:true v
2208+ ) pairs;
2209+ Bytes.Writer.write_string w "}"
2210+ | Table _ -> failwith "Cannot encode table inline without inline flag"
2211+2212+(* True streaming TOML encoder - writes directly to Bytes.Writer *)
2213+let encode_to_writer w value =
2214 let has_content = ref false in
22152216+ let write_path path =
2217+ List.iteri (fun i k ->
2218+ if i > 0 then Bytes.Writer.write_string w ".";
2219+ write_toml_key w k
2220+ ) path
2221+ in
2222+2223 let rec encode_at_path path value =
2224 match value with
2225+ | Table pairs ->
2226 (* Separate simple values from nested tables *)
2227 (* Only PURE table arrays (all items are tables) use [[array]] syntax.
2228 Mixed arrays (primitives + tables) must be encoded inline. *)
2229 let is_pure_table_array items =
2230+ items <> [] && List.for_all (function Table _ -> true | _ -> false) items
2231 in
2232 let simple, nested = List.partition (fun (_, v) ->
2233 match v with
2234+ | Table _ -> false
2235+ | Array items -> not (is_pure_table_array items)
2236 | _ -> true
2237 ) pairs in
22382239 (* Emit simple values first *)
2240 List.iter (fun (k, v) ->
2241+ write_toml_key w k;
2242+ Bytes.Writer.write_string w " = ";
2243+ write_toml_value w ~inline:true v;
2244+ Bytes.Writer.write_string w "\n";
2245 has_content := true
2246 ) simple;
2247···2249 List.iter (fun (k, v) ->
2250 let new_path = path @ [k] in
2251 match v with
2252+ | Table _ ->
2253+ if !has_content then Bytes.Writer.write_string w "\n";
2254+ Bytes.Writer.write_string w "[";
2255+ write_path new_path;
2256+ Bytes.Writer.write_string w "]\n";
2257 has_content := true;
2258 encode_at_path new_path v
2259+ | Array items when items <> [] && List.for_all (function Table _ -> true | _ -> false) items ->
2260 (* Pure table array - use [[array]] syntax *)
2261 List.iter (fun item ->
2262 match item with
2263+ | Table _ ->
2264+ if !has_content then Bytes.Writer.write_string w "\n";
2265+ Bytes.Writer.write_string w "[[";
2266+ write_path new_path;
2267+ Bytes.Writer.write_string w "]]\n";
2268 has_content := true;
2269 encode_at_path new_path item
2270 | _ -> assert false (* Impossible - we checked for_all above *)
2271 ) items
2272 | _ ->
2273+ write_toml_key w k;
2274+ Bytes.Writer.write_string w " = ";
2275+ write_toml_value w ~inline:true v;
2276+ Bytes.Writer.write_string w "\n";
2277 has_content := true
2278 ) nested
2279 | _ ->
···22822283 encode_at_path [] value
22842285+(* ============================================
2286+ Public Interface - Constructors
2287+ ============================================ *)
2288+2289+let string s = String s
2290+let int i = Int i
2291+let int_of_int i = Int (Int64.of_int i)
2292+let float f = Float f
2293+let bool b = Bool b
2294+let array vs = Array vs
2295+let table pairs = Table pairs
2296+let datetime s = Datetime s
2297+let datetime_local s = Datetime_local s
2298+let date_local s = Date_local s
2299+let time_local s = Time_local s
2300+2301+(* ============================================
2302+ Public Interface - Accessors
2303+ ============================================ *)
2304+2305+let to_string = function
2306+ | String s -> s
2307+ | _ -> invalid_arg "Tomlt.to_string: not a string"
2308+2309+let to_string_opt = function
2310+ | String s -> Some s
2311+ | _ -> None
2312+2313+let to_int = function
2314+ | Int i -> i
2315+ | _ -> invalid_arg "Tomlt.to_int: not an integer"
2316+2317+let to_int_opt = function
2318+ | Int i -> Some i
2319+ | _ -> None
2320+2321+let to_float = function
2322+ | Float f -> f
2323+ | _ -> invalid_arg "Tomlt.to_float: not a float"
2324+2325+let to_float_opt = function
2326+ | Float f -> Some f
2327+ | _ -> None
2328+2329+let to_bool = function
2330+ | Bool b -> b
2331+ | _ -> invalid_arg "Tomlt.to_bool: not a boolean"
2332+2333+let to_bool_opt = function
2334+ | Bool b -> Some b
2335+ | _ -> None
2336+2337+let to_array = function
2338+ | Array vs -> vs
2339+ | _ -> invalid_arg "Tomlt.to_array: not an array"
2340+2341+let to_array_opt = function
2342+ | Array vs -> Some vs
2343+ | _ -> None
2344+2345+let to_table = function
2346+ | Table pairs -> pairs
2347+ | _ -> invalid_arg "Tomlt.to_table: not a table"
2348+2349+let to_table_opt = function
2350+ | Table pairs -> Some pairs
2351+ | _ -> None
2352+2353+let to_datetime = function
2354+ | Datetime s | Datetime_local s | Date_local s | Time_local s -> s
2355+ | _ -> invalid_arg "Tomlt.to_datetime: not a datetime"
2356+2357+let to_datetime_opt = function
2358+ | Datetime s | Datetime_local s | Date_local s | Time_local s -> Some s
2359+ | _ -> None
2360+2361+(* ============================================
2362+ Public Interface - Type Predicates
2363+ ============================================ *)
2364+2365+let is_string = function String _ -> true | _ -> false
2366+let is_int = function Int _ -> true | _ -> false
2367+let is_float = function Float _ -> true | _ -> false
2368+let is_bool = function Bool _ -> true | _ -> false
2369+let is_array = function Array _ -> true | _ -> false
2370+let is_table = function Table _ -> true | _ -> false
2371+let is_datetime = function
2372+ | Datetime _ | Datetime_local _ | Date_local _ | Time_local _ -> true
2373+ | _ -> false
2374+2375+(* ============================================
2376+ Public Interface - Table Navigation
2377+ ============================================ *)
2378+2379+let find key = function
2380+ | Table pairs -> List.assoc key pairs
2381+ | _ -> invalid_arg "Tomlt.find: not a table"
2382+2383+let find_opt key = function
2384+ | Table pairs -> List.assoc_opt key pairs
2385+ | _ -> None
2386+2387+let mem key = function
2388+ | Table pairs -> List.mem_assoc key pairs
2389+ | _ -> false
2390+2391+let keys = function
2392+ | Table pairs -> List.map fst pairs
2393+ | _ -> invalid_arg "Tomlt.keys: not a table"
2394+2395+let rec get path t =
2396+ match path with
2397+ | [] -> t
2398+ | key :: rest ->
2399+ match t with
2400+ | Table pairs ->
2401+ (match List.assoc_opt key pairs with
2402+ | Some v -> get rest v
2403+ | None -> raise Not_found)
2404+ | _ -> invalid_arg "Tomlt.get: intermediate value is not a table"
2405+2406+let get_opt path t =
2407+ try Some (get path t) with Not_found | Invalid_argument _ -> None
2408+2409+let ( .%{} ) t path = get path t
2410+2411+let rec set_at_path path v t =
2412+ match path with
2413+ | [] -> v
2414+ | [key] ->
2415+ (match t with
2416+ | Table pairs ->
2417+ let pairs' = List.filter (fun (k, _) -> k <> key) pairs in
2418+ Table ((key, v) :: pairs')
2419+ | _ -> invalid_arg "Tomlt.(.%{}<-): not a table")
2420+ | key :: rest ->
2421+ match t with
2422+ | Table pairs ->
2423+ let existing = List.assoc_opt key pairs in
2424+ let subtable = match existing with
2425+ | Some (Table _ as sub) -> sub
2426+ | Some _ -> invalid_arg "Tomlt.(.%{}<-): intermediate value is not a table"
2427+ | None -> Table []
2428+ in
2429+ let updated = set_at_path rest v subtable in
2430+ let pairs' = List.filter (fun (k, _) -> k <> key) pairs in
2431+ Table ((key, updated) :: pairs')
2432+ | _ -> invalid_arg "Tomlt.(.%{}<-): not a table"
2433+2434+let ( .%{}<- ) t path v = set_at_path path v t
2435+2436+(* ============================================
2437+ Public Interface - Encoding
2438+ ============================================ *)
2439+2440+let to_buffer buf value =
2441+ let w = Bytes.Writer.of_buffer buf in
2442+ encode_to_writer w value
2443+2444+let to_toml_string value =
2445 let buf = Buffer.create 256 in
2446+ to_buffer buf value;
2447 Buffer.contents buf
24482449+let to_writer = encode_to_writer
000024502451+(* ============================================
2452+ Public Interface - Decoding
2453+ ============================================ *)
24542455+let of_string input =
2456+ try
2457+ Ok (parse_toml input)
2458+ with
2459+ | Failure msg -> Error (Tomlt_error.make (Tomlt_error.Syntax (Tomlt_error.Expected msg)))
2460+ | Tomlt_error.Error e -> Error e
2461+ | e -> Error (Tomlt_error.make (Tomlt_error.Syntax (Tomlt_error.Expected (Printexc.to_string e))))
24622463+let of_reader ?file r =
0000002464 try
2465+ Ok (parse_toml_from_reader ?file r)
02466 with
2467+ | Failure msg -> Error (Tomlt_error.make (Tomlt_error.Syntax (Tomlt_error.Expected msg)))
2468+ | Tomlt_error.Error e -> Error e
2469+ | e -> Error (Tomlt_error.make (Tomlt_error.Syntax (Tomlt_error.Expected (Printexc.to_string e))))
2470+2471+let parse = parse_toml
2472+2473+let parse_reader ?file r = parse_toml_from_reader ?file r
2474+2475+(* ============================================
2476+ Public Interface - Pretty Printing
2477+ ============================================ *)
2478+2479+let rec pp_value fmt = function
2480+ | String s ->
2481+ Format.fprintf fmt "\"%s\"" (String.escaped s)
2482+ | Int i ->
2483+ Format.fprintf fmt "%Ld" i
2484+ | Float f ->
2485+ if Float.is_nan f then Format.fprintf fmt "nan"
2486+ else if f = Float.infinity then Format.fprintf fmt "inf"
2487+ else if f = Float.neg_infinity then Format.fprintf fmt "-inf"
2488+ else Format.fprintf fmt "%g" f
2489+ | Bool b ->
2490+ Format.fprintf fmt "%s" (if b then "true" else "false")
2491+ | Datetime s | Datetime_local s | Date_local s | Time_local s ->
2492+ Format.fprintf fmt "%s" s
2493+ | Array items ->
2494+ Format.fprintf fmt "[";
2495+ List.iteri (fun i item ->
2496+ if i > 0 then Format.fprintf fmt ", ";
2497+ pp_value fmt item
2498+ ) items;
2499+ Format.fprintf fmt "]"
2500+ | Table pairs ->
2501+ Format.fprintf fmt "{";
2502+ List.iteri (fun i (k, v) ->
2503+ if i > 0 then Format.fprintf fmt ", ";
2504+ Format.fprintf fmt "%s = " k;
2505+ pp_value fmt v
2506+ ) pairs;
2507+ Format.fprintf fmt "}"
2508+2509+let pp fmt t =
2510+ Format.fprintf fmt "%s" (to_toml_string t)
2511+2512+(* ============================================
2513+ Public Interface - Equality and Comparison
2514+ ============================================ *)
2515+2516+let rec equal a b =
2517+ match a, b with
2518+ | String s1, String s2 -> String.equal s1 s2
2519+ | Int i1, Int i2 -> Int64.equal i1 i2
2520+ | Float f1, Float f2 ->
2521+ (* NaN = NaN for TOML equality *)
2522+ (Float.is_nan f1 && Float.is_nan f2) || Float.equal f1 f2
2523+ | Bool b1, Bool b2 -> Bool.equal b1 b2
2524+ | Datetime s1, Datetime s2 -> String.equal s1 s2
2525+ | Datetime_local s1, Datetime_local s2 -> String.equal s1 s2
2526+ | Date_local s1, Date_local s2 -> String.equal s1 s2
2527+ | Time_local s1, Time_local s2 -> String.equal s1 s2
2528+ | Array vs1, Array vs2 ->
2529+ List.length vs1 = List.length vs2 &&
2530+ List.for_all2 equal vs1 vs2
2531+ | Table ps1, Table ps2 ->
2532+ List.length ps1 = List.length ps2 &&
2533+ List.for_all2 (fun (k1, v1) (k2, v2) ->
2534+ String.equal k1 k2 && equal v1 v2
2535+ ) ps1 ps2
2536+ | _ -> false
2537+2538+let type_order = function
2539+ | String _ -> 0
2540+ | Int _ -> 1
2541+ | Float _ -> 2
2542+ | Bool _ -> 3
2543+ | Datetime _ -> 4
2544+ | Datetime_local _ -> 5
2545+ | Date_local _ -> 6
2546+ | Time_local _ -> 7
2547+ | Array _ -> 8
2548+ | Table _ -> 9
25492550+let rec compare a b =
2551+ let ta, tb = type_order a, type_order b in
2552+ if ta <> tb then Int.compare ta tb
2553+ else match a, b with
2554+ | String s1, String s2 -> String.compare s1 s2
2555+ | Int i1, Int i2 -> Int64.compare i1 i2
2556+ | Float f1, Float f2 -> Float.compare f1 f2
2557+ | Bool b1, Bool b2 -> Bool.compare b1 b2
2558+ | Datetime s1, Datetime s2 -> String.compare s1 s2
2559+ | Datetime_local s1, Datetime_local s2 -> String.compare s1 s2
2560+ | Date_local s1, Date_local s2 -> String.compare s1 s2
2561+ | Time_local s1, Time_local s2 -> String.compare s1 s2
2562+ | Array vs1, Array vs2 ->
2563+ List.compare compare vs1 vs2
2564+ | Table ps1, Table ps2 ->
2565+ List.compare (fun (k1, v1) (k2, v2) ->
2566+ let c = String.compare k1 k2 in
2567+ if c <> 0 then c else compare v1 v2
2568+ ) ps1 ps2
2569+ | _ -> 0 (* Impossible - handled by type_order check *)
2570+2571+(* ============================================
2572+ Error Module
2573+ ============================================ *)
2574+2575module Error = Tomlt_error
2576+2577+(* ============================================
2578+ Internal Module (for testing)
2579+ ============================================ *)
2580+2581+module Internal = struct
2582+ let to_tagged_json = toml_to_tagged_json
2583+ let of_tagged_json = decode_tagged_json_string
2584+2585+ let encode_from_tagged_json json_str =
2586+ try
2587+ let toml = decode_tagged_json_string json_str in
2588+ Ok (to_toml_string toml)
2589+ with
2590+ | Failure msg -> Error msg
2591+ | e -> Error (Printexc.to_string e)
2592+end
+292-50
lib/tomlt.mli
···56(** TOML 1.1 codec.
78- This module provides TOML 1.1 parsing and encoding with Bytesrw streaming
9- support.
001011- {b Example:}
00000000000000012 {[
13- let contents = Bytesrw.Bytes.Reader.of_string toml_input in
14- match Tomlt.decode contents with
15- | Ok toml -> (* use toml *)
16- | Error msg -> (* handle error *)
17- ]} *)
000000000000001819open Bytesrw
2021(** {1:types TOML Value Types} *)
2223-type toml_value =
24- | Toml_string of string
25- | Toml_int of int64
26- | Toml_float of float
27- | Toml_bool of bool
28- | Toml_datetime of string (** Offset datetime (RFC 3339 with timezone) *)
29- | Toml_datetime_local of string (** Local datetime (no timezone) *)
30- | Toml_date_local of string (** Local date only *)
31- | Toml_time_local of string (** Local time only *)
32- | Toml_array of toml_value list
33- | Toml_table of (string * toml_value) list
34-(** The type for TOML values. *)
00000000000000000000000000000000000003536-(** {1:decode Decode} *)
003738-val decode : ?file:string -> Bytes.Reader.t -> (toml_value, string) result
39-(** [decode r] decodes a TOML document from reader [r].
40- - [file] is the file path for error messages. Defaults to ["-"]. *)
04142-val decode_string : string -> (toml_value, string) result
43-(** [decode_string s] decodes a TOML document from string [s]. *)
04445-val decode_to_tagged_json : ?file:string -> Bytes.Reader.t -> (string, string) result
46-(** [decode_to_tagged_json r] decodes TOML and outputs tagged JSON
47- in the format used by toml-test. *)
4849-(** {1:encode Encode} *)
005051-val encode_toml : toml_value -> string
52-(** [encode_toml v] encodes TOML value [v] to a TOML string. *)
5354-val encode_toml_to_buffer : Buffer.t -> toml_value -> unit
55-(** [encode_toml_to_buffer buf v] encodes TOML value [v] directly to buffer [buf].
56- This avoids allocating an intermediate string. *)
5758-val encode_to_writer : Bytes.Writer.t -> toml_value -> unit
59-(** [encode_to_writer w v] encodes TOML value [v] directly to writer [w].
60- Useful for streaming output to files or network without building the
61- full string in memory first. *)
6263-val encode_from_tagged_json : string -> (string, string) result
64-(** [encode_from_tagged_json json] converts tagged JSON to TOML. *)
6566-(** {1:helpers Helpers} *)
006768-val toml_to_tagged_json : toml_value -> string
69-(** [toml_to_tagged_json v] converts a TOML value to tagged JSON format
70- used by toml-test. *)
7172-val decode_tagged_json_string : string -> toml_value
73-(** [decode_tagged_json_string s] parses tagged JSON into TOML values. *)
07475-val parse_toml : string -> toml_value
76-(** [parse_toml s] parses a TOML string. Raises [Error.Error] on failure. *)
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007778(** {1:errors Error Handling} *)
7980module Error = Tomlt_error
81-(** Error types for TOML parsing and encoding. *)
000000000000000000
···56(** TOML 1.1 codec.
78+ Tomlt provides TOML 1.1 parsing and encoding with efficient streaming
9+ support via {{:https://erratique.ch/software/bytesrw}Bytesrw}.
10+11+ {2 Quick Start}
1213+ Parse a TOML string:
14+ {[
15+ let config = Tomlt.of_string {|
16+ [server]
17+ host = "localhost"
18+ port = 8080
19+ |} in
20+ match config with
21+ | Ok t ->
22+ let host = Tomlt.(t.%{"server"; "host"} |> to_string) in
23+ let port = Tomlt.(t.%{"server"; "port"} |> to_int) in
24+ Printf.printf "Server: %s:%Ld\n" host port
25+ | Error e -> prerr_endline (Tomlt.Error.to_string e)
26+ ]}
27+28+ Create and encode TOML:
29 {[
30+ let config = Tomlt.(table [
31+ "title", string "My App";
32+ "database", table [
33+ "host", string "localhost";
34+ "ports", array [int 5432L; int 5433L]
35+ ]
36+ ]) in
37+ print_endline (Tomlt.to_string config)
38+ ]}
39+40+ {2 Module Overview}
41+42+ - {!section:types} - TOML value representation
43+ - {!section:construct} - Value constructors
44+ - {!section:access} - Value accessors and type conversion
45+ - {!section:navigate} - Table navigation
46+ - {!section:decode} - Parsing from strings and readers
47+ - {!section:encode} - Encoding to strings and writers
48+ - {!module:Error} - Structured error types *)
4950open Bytesrw
5152(** {1:types TOML Value Types} *)
5354+(** The type of TOML values.
55+56+ TOML supports the following value types:
57+ - Strings (UTF-8 encoded)
58+ - Integers (64-bit signed)
59+ - Floats (IEEE 754 double precision)
60+ - Booleans
61+ - Offset date-times (RFC 3339 with timezone)
62+ - Local date-times (no timezone)
63+ - Local dates
64+ - Local times
65+ - Arrays (heterogeneous in TOML 1.1)
66+ - Tables (string-keyed maps) *)
67+type t =
68+ | String of string
69+ | Int of int64
70+ | Float of float
71+ | Bool of bool
72+ | Datetime of string (** Offset datetime, e.g. [1979-05-27T07:32:00Z] *)
73+ | Datetime_local of string (** Local datetime, e.g. [1979-05-27T07:32:00] *)
74+ | Date_local of string (** Local date, e.g. [1979-05-27] *)
75+ | Time_local of string (** Local time, e.g. [07:32:00] *)
76+ | Array of t list
77+ | Table of (string * t) list
78+(** A TOML value. Tables preserve key insertion order. *)
79+80+(** {1:construct Value Constructors}
81+82+ These functions create TOML values. Use them to build TOML documents
83+ programmatically. *)
84+85+val string : string -> t
86+(** [string s] creates a string value. *)
87+88+val int : int64 -> t
89+(** [int i] creates an integer value. *)
90+91+val int_of_int : int -> t
92+(** [int_of_int i] creates an integer value from an [int]. *)
93+94+val float : float -> t
95+(** [float f] creates a float value. *)
96+97+val bool : bool -> t
98+(** [bool b] creates a boolean value. *)
99+100+val array : t list -> t
101+(** [array vs] creates an array value from a list of values.
102+ TOML 1.1 allows heterogeneous arrays. *)
103104+val table : (string * t) list -> t
105+(** [table pairs] creates a table value from key-value pairs.
106+ Keys should be unique; later bindings shadow earlier ones during lookup. *)
107108+val datetime : string -> t
109+(** [datetime s] creates an offset datetime value.
110+ The string should be in RFC 3339 format with timezone,
111+ e.g. ["1979-05-27T07:32:00Z"] or ["1979-05-27T07:32:00-07:00"]. *)
112113+val datetime_local : string -> t
114+(** [datetime_local s] creates a local datetime value (no timezone).
115+ E.g. ["1979-05-27T07:32:00"]. *)
116117+val date_local : string -> t
118+(** [date_local s] creates a local date value.
119+ E.g. ["1979-05-27"]. *)
120121+val time_local : string -> t
122+(** [time_local s] creates a local time value.
123+ E.g. ["07:32:00"] or ["07:32:00.999"]. *)
124125+(** {1:access Value Accessors}
0126127+ These functions extract OCaml values from TOML values.
128+ They raise [Invalid_argument] if the value is not of the expected type. *)
0129130+val to_string : t -> string
131+(** [to_string t] returns the string if [t] is a [String].
132+ @raise Invalid_argument if [t] is not a string. *)
0133134+val to_string_opt : t -> string option
135+(** [to_string_opt t] returns [Some s] if [t] is [String s], [None] otherwise. *)
136137+val to_int : t -> int64
138+(** [to_int t] returns the integer if [t] is an [Int].
139+ @raise Invalid_argument if [t] is not an integer. *)
140141+val to_int_opt : t -> int64 option
142+(** [to_int_opt t] returns [Some i] if [t] is [Int i], [None] otherwise. *)
0143144+val to_float : t -> float
145+(** [to_float t] returns the float if [t] is a [Float].
146+ @raise Invalid_argument if [t] is not a float. *)
147148+val to_float_opt : t -> float option
149+(** [to_float_opt t] returns [Some f] if [t] is [Float f], [None] otherwise. *)
150+151+val to_bool : t -> bool
152+(** [to_bool t] returns the boolean if [t] is a [Bool].
153+ @raise Invalid_argument if [t] is not a boolean. *)
154+155+val to_bool_opt : t -> bool option
156+(** [to_bool_opt t] returns [Some b] if [t] is [Bool b], [None] otherwise. *)
157+158+val to_array : t -> t list
159+(** [to_array t] returns the list if [t] is an [Array].
160+ @raise Invalid_argument if [t] is not an array. *)
161+162+val to_array_opt : t -> t list option
163+(** [to_array_opt t] returns [Some vs] if [t] is [Array vs], [None] otherwise. *)
164+165+val to_table : t -> (string * t) list
166+(** [to_table t] returns the association list if [t] is a [Table].
167+ @raise Invalid_argument if [t] is not a table. *)
168+169+val to_table_opt : t -> (string * t) list option
170+(** [to_table_opt t] returns [Some pairs] if [t] is [Table pairs], [None] otherwise. *)
171+172+val to_datetime : t -> string
173+(** [to_datetime t] returns the datetime string for any datetime type.
174+ @raise Invalid_argument if [t] is not a datetime variant. *)
175+176+val to_datetime_opt : t -> string option
177+(** [to_datetime_opt t] returns [Some s] if [t] is any datetime variant. *)
178+179+(** {2 Type Predicates} *)
180+181+val is_string : t -> bool
182+(** [is_string t] is [true] iff [t] is a [String]. *)
183+184+val is_int : t -> bool
185+(** [is_int t] is [true] iff [t] is an [Int]. *)
186+187+val is_float : t -> bool
188+(** [is_float t] is [true] iff [t] is a [Float]. *)
189+190+val is_bool : t -> bool
191+(** [is_bool t] is [true] iff [t] is a [Bool]. *)
192+193+val is_array : t -> bool
194+(** [is_array t] is [true] iff [t] is an [Array]. *)
195+196+val is_table : t -> bool
197+(** [is_table t] is [true] iff [t] is a [Table]. *)
198+199+val is_datetime : t -> bool
200+(** [is_datetime t] is [true] iff [t] is any datetime variant. *)
201+202+(** {1:navigate Table Navigation}
203+204+ Functions for navigating and querying TOML tables. *)
205+206+val find : string -> t -> t
207+(** [find key t] returns the value associated with [key] in table [t].
208+ @raise Invalid_argument if [t] is not a table.
209+ @raise Not_found if [key] is not in the table. *)
210+211+val find_opt : string -> t -> t option
212+(** [find_opt key t] returns [Some v] if [key] maps to [v] in table [t],
213+ or [None] if [key] is not bound or [t] is not a table. *)
214+215+val mem : string -> t -> bool
216+(** [mem key t] is [true] if [key] is bound in table [t], [false] otherwise.
217+ Returns [false] if [t] is not a table. *)
218+219+val keys : t -> string list
220+(** [keys t] returns all keys in table [t].
221+ @raise Invalid_argument if [t] is not a table. *)
222+223+val get : string list -> t -> t
224+(** [get path t] navigates through nested tables following [path].
225+ For example, [get ["server"; "port"] t] returns [t.server.port].
226+ @raise Invalid_argument if any intermediate value is not a table.
227+ @raise Not_found if any key in [path] is not found. *)
228+229+val get_opt : string list -> t -> t option
230+(** [get_opt path t] is like [get] but returns [None] on any error. *)
231+232+val ( .%{} ) : t -> string list -> t
233+(** [t.%{path}] is [get path t].
234+235+ Example: [config.%{["database"; "port"]}]
236+237+ @raise Invalid_argument if any intermediate value is not a table.
238+ @raise Not_found if any key in the path is not found. *)
239+240+val ( .%{}<- ) : t -> string list -> t -> t
241+(** [t.%{path} <- v] returns a new table with value [v] at [path].
242+ Creates intermediate tables as needed.
243+244+ Example: [config.%{["server"; "host"]} <- string "localhost"]
245+246+ @raise Invalid_argument if [t] is not a table or if an intermediate
247+ value exists but is not a table. *)
248+249+(** {1:decode Decoding (Parsing)}
250+251+ Parse TOML from various sources. *)
252+253+val of_string : string -> (t, Tomlt_error.t) result
254+(** [of_string s] parses [s] as a TOML document. *)
255+256+val of_reader : ?file:string -> Bytes.Reader.t -> (t, Tomlt_error.t) result
257+(** [of_reader r] parses a TOML document from reader [r].
258+ @param file Optional filename for error messages. *)
259+260+val parse : string -> t
261+(** [parse s] parses [s] as a TOML document.
262+ @raise Error.Error on parse errors. *)
263+264+val parse_reader : ?file:string -> Bytes.Reader.t -> t
265+(** [parse_reader r] parses a TOML document from reader [r].
266+ @param file Optional filename for error messages.
267+ @raise Error.Error on parse errors. *)
268+269+(** {1:encode Encoding}
270+271+ Encode TOML values to various outputs. *)
272+273+val to_toml_string : t -> string
274+(** [to_toml_string t] encodes [t] as a TOML document string.
275+ @raise Invalid_argument if [t] is not a [Table]. *)
276+277+val to_buffer : Buffer.t -> t -> unit
278+(** [to_buffer buf t] writes [t] as TOML to buffer [buf].
279+ @raise Invalid_argument if [t] is not a [Table]. *)
280+281+val to_writer : Bytes.Writer.t -> t -> unit
282+(** [to_writer w t] writes [t] as TOML to writer [w].
283+ Useful for streaming output without building the full string in memory.
284+ @raise Invalid_argument if [t] is not a [Table]. *)
285+286+(** {1:pp Pretty Printing} *)
287+288+val pp : Format.formatter -> t -> unit
289+(** [pp fmt t] pretty-prints [t] in TOML format. *)
290+291+val pp_value : Format.formatter -> t -> unit
292+(** [pp_value fmt t] pretty-prints a single TOML value (not a full document).
293+ Useful for debugging. Tables are printed as inline tables. *)
294+295+val equal : t -> t -> bool
296+(** [equal a b] is structural equality on TOML values.
297+ NaN floats are considered equal to each other. *)
298+299+val compare : t -> t -> int
300+(** [compare a b] is a total ordering on TOML values. *)
301302(** {1:errors Error Handling} *)
303304module Error = Tomlt_error
305+(** Structured error types for TOML parsing and encoding.
306+307+ See {!Tomlt_error} for detailed documentation. *)
308+309+(** {1:internal Internal}
310+311+ These functions are primarily for testing and interoperability.
312+ They may change between versions. *)
313+314+module Internal : sig
315+ val to_tagged_json : t -> string
316+ (** Convert TOML value to tagged JSON format used by toml-test. *)
317+318+ val of_tagged_json : string -> t
319+ (** Parse tagged JSON format into TOML value. *)
320+321+ val encode_from_tagged_json : string -> (string, string) result
322+ (** Convert tagged JSON to TOML string. For toml-test encoder. *)
323+end
+5-18
lib_eio/tomlt_eio.ml
···3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
56-(** Eio integration for TOML errors.
7-8- This module registers TOML errors with Eio's exception system,
9- allowing them to be used with [Eio.Io] and providing context tracking. *)
10-11module Error = Tomlt.Error
1213-(** Extend Eio.Exn.err with TOML errors *)
14type Eio.Exn.err += E of Error.t
1516-(** Create an Eio.Io exception from a TOML error *)
17let err e = Eio.Exn.create (E e)
1819-(** Register pretty-printer with Eio *)
20let () =
21 Eio.Exn.register_pp (fun f -> function
22 | E e ->
···25 | _ -> false
26 )
2728-(** Convert a Error.Error exception to Eio.Io *)
29let wrap_error f =
30 try f ()
31 with Error.Error e ->
32 raise (err e)
3334-(** Parse TOML with Eio error handling *)
35-let parse_toml ?file input =
36- try Tomlt.parse_toml input
37 with Error.Error e ->
38 let bt = Printexc.get_raw_backtrace () in
39 let eio_exn = err e in
···43 in
44 Printexc.raise_with_backtrace eio_exn bt
4546-(** Read and parse TOML from an Eio flow *)
47let of_flow ?file flow =
48 let input = Eio.Flow.read_all flow in
49- parse_toml ?file input
5051-(** Read and parse TOML from an Eio path *)
52let of_path ~fs path =
53 let file = Eio.Path.(/) fs path |> Eio.Path.native_exn in
54 Eio.Path.load (Eio.Path.(/) fs path)
55- |> parse_toml ~file
5657-(** Write TOML to an Eio flow *)
58let to_flow flow value =
59- let output = Tomlt.encode_toml value in
60 Eio.Flow.copy_string output flow
···3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5000006module Error = Tomlt.Error
708type Eio.Exn.err += E of Error.t
9010let err e = Eio.Exn.create (E e)
11012let () =
13 Eio.Exn.register_pp (fun f -> function
14 | E e ->
···17 | _ -> false
18 )
19020let wrap_error f =
21 try f ()
22 with Error.Error e ->
23 raise (err e)
2425+let parse ?file input =
26+ try Tomlt.parse input
027 with Error.Error e ->
28 let bt = Printexc.get_raw_backtrace () in
29 let eio_exn = err e in
···33 in
34 Printexc.raise_with_backtrace eio_exn bt
35036let of_flow ?file flow =
37 let input = Eio.Flow.read_all flow in
38+ parse ?file input
39040let of_path ~fs path =
41 let file = Eio.Path.(/) fs path |> Eio.Path.native_exn in
42 Eio.Path.load (Eio.Path.(/) fs path)
43+ |> parse ~file
44045let to_flow flow value =
46+ let output = Tomlt.to_toml_string value in
47 Eio.Flow.copy_string output flow
+20-16
lib_eio/tomlt_eio.mli
···3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
56-(** Eio integration for TOML errors.
78- This module registers TOML errors with Eio's exception system,
9- allowing them to be used with {!Eio.Io} and providing context tracking.
1011 {2 Example}
12 {[
···1819(** {1 Eio Exception Integration} *)
2021-(** TOML errors as Eio errors *)
22type Eio.Exn.err += E of Tomlt.Error.t
02324-(** Create an [Eio.Io] exception from a TOML error *)
25val err : Tomlt.Error.t -> exn
02627-(** Wrap a function, converting [Tomlt_error.Error] to [Eio.Io] *)
28val wrap_error : (unit -> 'a) -> 'a
02930(** {1 Parsing with Eio} *)
3132-(** Parse TOML string with Eio error handling.
33- @param file optional filename for error context *)
34-val parse_toml : ?file:string -> string -> Tomlt.toml_value
03536-(** Read and parse TOML from an Eio flow.
37- @param file optional filename for error context *)
38-val of_flow : ?file:string -> _ Eio.Flow.source -> Tomlt.toml_value
03940-(** Read and parse TOML from an Eio path *)
41-val of_path : fs:_ Eio.Path.t -> string -> Tomlt.toml_value
04243(** {1 Encoding with Eio} *)
4445-(** Write TOML to an Eio flow *)
46-val to_flow : _ Eio.Flow.sink -> Tomlt.toml_value -> unit
0
···3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
56+(** Eio integration for TOML.
78+ This module provides Eio-native functions for parsing and encoding TOML,
9+ with proper integration into Eio's exception system.
1011 {2 Example}
12 {[
···1819(** {1 Eio Exception Integration} *)
20021type Eio.Exn.err += E of Tomlt.Error.t
22+(** TOML errors as Eio errors. *)
23024val err : Tomlt.Error.t -> exn
25+(** [err e] creates an [Eio.Io] exception from TOML error [e]. *)
26027val wrap_error : (unit -> 'a) -> 'a
28+(** [wrap_error f] runs [f] and converts [Tomlt.Error.Error] to [Eio.Io]. *)
2930(** {1 Parsing with Eio} *)
3132+val parse : ?file:string -> string -> Tomlt.t
33+(** [parse s] parses TOML string [s] with Eio error handling.
34+ @param file optional filename for error context.
35+ @raise Eio.Io on parse errors. *)
3637+val of_flow : ?file:string -> _ Eio.Flow.source -> Tomlt.t
38+(** [of_flow flow] reads and parses TOML from an Eio flow.
39+ @param file optional filename for error context.
40+ @raise Eio.Io on read or parse errors. *)
4142+val of_path : fs:_ Eio.Path.t -> string -> Tomlt.t
43+(** [of_path ~fs path] reads and parses TOML from a file path.
44+ @raise Eio.Io on file or parse errors. *)
4546(** {1 Encoding with Eio} *)
4748+val to_flow : _ Eio.Flow.sink -> Tomlt.t -> unit
49+(** [to_flow flow t] writes TOML value [t] to an Eio flow.
50+ @raise Invalid_argument if [t] is not a table. *)