···238238239239let run_valid_test toml_file json_file =
240240 let toml_content = In_channel.with_open_bin toml_file In_channel.input_all in
241241- match Tomlt.decode_string toml_content with
242242- | Error msg -> `Fail (Printf.sprintf "Decode error: %s" msg)
241241+ match Tomlt.of_string toml_content with
242242+ | Error e -> `Fail (Printf.sprintf "Decode error: %s" (Tomlt.Error.to_string e))
243243 | Ok toml ->
244244- let actual_json = Tomlt.toml_to_tagged_json toml in
244244+ let actual_json = Tomlt.Internal.to_tagged_json toml in
245245 let expected_json = In_channel.with_open_bin json_file In_channel.input_all in
246246 if json_equal actual_json expected_json then
247247 `Pass
···251251252252let run_invalid_test toml_file =
253253 let toml_content = In_channel.with_open_bin toml_file In_channel.input_all in
254254- match Tomlt.decode_string toml_content with
254254+ match Tomlt.of_string toml_content with
255255 | Error _ -> `Pass (* Should fail *)
256256 | Ok _ -> `Fail "Should have failed but parsed successfully"
257257···259259let run_encoder_test json_file =
260260 let json_content = In_channel.with_open_bin json_file In_channel.input_all in
261261 (* First, encode JSON to TOML *)
262262- match Tomlt.encode_from_tagged_json json_content with
262262+ match Tomlt.Internal.encode_from_tagged_json json_content with
263263 | Error msg -> `Fail (Printf.sprintf "Encode error: %s" msg)
264264 | Ok toml_output ->
265265 (* Then decode the TOML back to check round-trip *)
266266- match Tomlt.decode_string toml_output with
267267- | Error msg -> `Fail (Printf.sprintf "Round-trip decode error: %s\nTOML was:\n%s" msg toml_output)
266266+ match Tomlt.of_string toml_output with
267267+ | Error e -> `Fail (Printf.sprintf "Round-trip decode error: %s\nTOML was:\n%s" (Tomlt.Error.to_string e) toml_output)
268268 | Ok decoded_toml ->
269269 (* Compare the decoded result with original JSON *)
270270- let actual_json = Tomlt.toml_to_tagged_json decoded_toml in
270270+ let actual_json = Tomlt.Internal.to_tagged_json decoded_toml in
271271 if json_equal actual_json json_content then
272272 `Pass
273273 else
+4-4
bin/toml_test_decoder.ml
···2233let () =
44 let input = In_channel.input_all In_channel.stdin in
55- match Tomlt.decode_string input with
55+ match Tomlt.of_string input with
66 | Ok toml ->
77- let json = Tomlt.toml_to_tagged_json toml in
77+ let json = Tomlt.Internal.to_tagged_json toml in
88 print_string json;
99 print_newline ()
1010- | Error msg ->
1111- Printf.eprintf "Error: %s\n" msg;
1010+ | Error e ->
1111+ Printf.eprintf "Error: %s\n" (Tomlt.Error.to_string e);
1212 exit 1
+1-1
bin/toml_test_encoder.ml
···2233let () =
44 let input = In_channel.input_all In_channel.stdin in
55- match Tomlt.encode_from_tagged_json input with
55+ match Tomlt.Internal.encode_from_tagged_json input with
66 | Ok toml ->
77 print_string toml
88 | Error msg ->
···7788(* TOML value representation *)
991010-type toml_value =
1111- | Toml_string of string
1212- | Toml_int of int64
1313- | Toml_float of float
1414- | Toml_bool of bool
1515- | Toml_datetime of string (* Offset datetime *)
1616- | Toml_datetime_local of string (* Local datetime *)
1717- | Toml_date_local of string (* Local date *)
1818- | Toml_time_local of string (* Local time *)
1919- | Toml_array of toml_value list
2020- | Toml_table of (string * toml_value) list
1010+type t =
1111+ | String of string
1212+ | Int of int64
1313+ | Float of float
1414+ | Bool of bool
1515+ | Datetime of string (* Offset datetime *)
1616+ | Datetime_local of string (* Local datetime *)
1717+ | Date_local of string (* Local date *)
1818+ | Time_local of string (* Local time *)
1919+ | Array of t list
2020+ | Table of (string * t) list
21212222-(* Lexer *)
2222+(* Lexer - works directly on bytes buffer filled from Bytes.Reader *)
23232424type token =
2525 | Tok_lbracket
···4444 | Tok_time_local of string
45454646type lexer = {
4747- mutable input : string;
4747+ input : bytes; (* Buffer containing input data *)
4848+ input_len : int; (* Length of valid data in input *)
4849 mutable pos : int;
4950 mutable line : int;
5051 mutable col : int;
5152 file : string;
5253}
53545454-let make_lexer ?(file = "-") input =
5555- { input; pos = 0; line = 1; col = 1; file }
5555+(* Create lexer from string (copies to bytes) *)
5656+let make_lexer ?(file = "-") s =
5757+ let input = Bytes.of_string s in
5858+ { input; input_len = Bytes.length input; pos = 0; line = 1; col = 1; file }
5959+6060+(* Create lexer directly from Bytes.Reader - reads all data into buffer *)
6161+let make_lexer_from_reader ?(file = "-") r =
6262+ (* Read all slices into a buffer *)
6363+ let buf = Buffer.create 4096 in
6464+ let rec read_all () =
6565+ let slice = Bytes.Reader.read r in
6666+ if Bytes.Slice.is_eod slice then ()
6767+ else begin
6868+ Bytes.Slice.add_to_buffer buf slice;
6969+ read_all ()
7070+ end
7171+ in
7272+ read_all ();
7373+ let input = Buffer.to_bytes buf in
7474+ { input; input_len = Bytes.length input; pos = 0; line = 1; col = 1; file }
56755757-let is_eof l = l.pos >= String.length l.input
7676+let is_eof l = l.pos >= l.input_len
58775959-let peek l = if is_eof l then None else Some l.input.[l.pos]
7878+let peek l = if is_eof l then None else Some (Bytes.get l.input l.pos)
60796180let peek2 l =
6262- if l.pos + 1 >= String.length l.input then None
6363- else Some l.input.[l.pos + 1]
8181+ if l.pos + 1 >= l.input_len then None
8282+ else Some (Bytes.get l.input (l.pos + 1))
64836584let peek_n l n =
6666- if l.pos + n - 1 >= String.length l.input then None
6767- else Some (String.sub l.input l.pos n)
8585+ if l.pos + n - 1 >= l.input_len then None
8686+ else Some (Bytes.sub_string l.input l.pos n)
68876988let advance l =
7089 if not (is_eof l) then begin
7171- if l.input.[l.pos] = '\n' then begin
9090+ if Bytes.get l.input l.pos = '\n' then begin
7291 l.line <- l.line + 1;
7392 l.col <- 1
7493 end else
···8099 for _ = 1 to n do advance l done
8110082101let skip_whitespace l =
8383- while not (is_eof l) && (l.input.[l.pos] = ' ' || l.input.[l.pos] = '\t') do
102102+ while not (is_eof l) && (Bytes.get l.input l.pos = ' ' || Bytes.get l.input l.pos = '\t') do
84103 advance l
85104 done
86105106106+(* Helper functions for bytes access *)
107107+let[@inline] get_char l pos = Bytes.unsafe_get l.input pos
108108+let[@inline] get_current l = Bytes.unsafe_get l.input l.pos
109109+let sub_string l pos len = Bytes.sub_string l.input pos len
110110+111111+(* Helper to create error location from lexer state *)
112112+let lexer_loc l = Tomlt_error.loc ~file:l.file ~line:l.line ~column:l.col ()
113113+87114(* Get expected byte length of UTF-8 char from first byte *)
88115let utf8_byte_length_from_first_byte c =
89116 let code = Char.code c in
···94121 else if code < 0xF8 then 4
95122 else 0 (* Invalid: 5+ byte sequence *)
961239797-(* Validate UTF-8 at position using uutf, returns byte length *)
9898-let validate_utf8_at_pos input pos line =
9999- if pos >= String.length input then
100100- failwith "Unexpected end of input";
101101- let byte_len = utf8_byte_length_from_first_byte input.[pos] in
124124+(* Validate UTF-8 at position in lexer's bytes buffer, returns byte length *)
125125+let validate_utf8_at_pos_bytes l =
126126+ if l.pos >= l.input_len then
127127+ Tomlt_error.raise_lexer ~location:(lexer_loc l) Unexpected_eof;
128128+ let byte_len = utf8_byte_length_from_first_byte (Bytes.unsafe_get l.input l.pos) in
102129 if byte_len = 0 then
103103- failwith (Printf.sprintf "Invalid UTF-8 sequence at line %d" line);
104104- if pos + byte_len > String.length input then
105105- failwith (Printf.sprintf "Incomplete UTF-8 sequence at line %d" line);
130130+ Tomlt_error.raise_lexer ~location:(lexer_loc l) Invalid_utf8;
131131+ if l.pos + byte_len > l.input_len then
132132+ Tomlt_error.raise_lexer ~location:(lexer_loc l) Incomplete_utf8;
106133 (* Validate using uutf - it checks overlong encodings, surrogates, etc. *)
107107- let sub = String.sub input pos byte_len in
134134+ let sub = Bytes.sub_string l.input l.pos byte_len in
108135 let valid = ref false in
109136 Uutf.String.fold_utf_8 (fun () _ -> function
110137 | `Uchar _ -> valid := true
111138 | `Malformed _ -> ()
112139 ) () sub;
113140 if not !valid then
114114- failwith (Printf.sprintf "Invalid UTF-8 sequence at line %d" line);
141141+ Tomlt_error.raise_lexer ~location:(lexer_loc l) Invalid_utf8;
115142 byte_len
116143117144(* UTF-8 validation - validates and advances over a single UTF-8 character *)
118145let validate_utf8_char l =
119119- let byte_len = validate_utf8_at_pos l.input l.pos l.line in
146146+ let byte_len = validate_utf8_at_pos_bytes l in
120147 for _ = 1 to byte_len do advance l done
121148122149let skip_comment l =
123123- if not (is_eof l) && l.input.[l.pos] = '#' then begin
150150+ if not (is_eof l) && get_current l = '#' then begin
124151 (* Validate comment characters *)
125152 advance l;
126153 let continue = ref true in
127127- while !continue && not (is_eof l) && l.input.[l.pos] <> '\n' do
128128- let c = l.input.[l.pos] in
154154+ while !continue && not (is_eof l) && get_current l <> '\n' do
155155+ let c = get_current l in
129156 let code = Char.code c in
130157 (* CR is only valid if followed by LF (CRLF at end of comment) *)
131158 if c = '\r' then begin
132159 (* Check if this CR is followed by LF - if so, it ends the comment *)
133133- if l.pos + 1 < String.length l.input && l.input.[l.pos + 1] = '\n' then
160160+ if l.pos + 1 < l.input_len && get_char l (l.pos + 1) = '\n' then
134161 (* This is CRLF - stop the loop, let the main lexer handle it *)
135162 continue := false
136163 else
137137- failwith (Printf.sprintf "Bare carriage return not allowed in comment at line %d" l.line)
164164+ Tomlt_error.raise_lexer ~location:(lexer_loc l) Bare_carriage_return
138165 end else if code >= 0x80 then begin
139166 (* Multi-byte UTF-8 character - validate it *)
140167 validate_utf8_char l
141168 end else begin
142169 (* ASCII control characters other than tab are not allowed in comments *)
143170 if code < 0x09 || (code > 0x09 && code < 0x20) || code = 0x7F then
144144- failwith (Printf.sprintf "Control character U+%04X not allowed in comment at line %d" code l.line);
171171+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Control_character code);
145172 advance l
146173 end
147174 done
···150177let skip_ws_and_comments l =
151178 let rec loop () =
152179 skip_whitespace l;
153153- if not (is_eof l) && l.input.[l.pos] = '#' then begin
180180+ if not (is_eof l) && get_current l = '#' then begin
154181 skip_comment l;
155182 loop ()
156183 end
···170197 if c >= '0' && c <= '9' then Char.code c - Char.code '0'
171198 else if c >= 'a' && c <= 'f' then Char.code c - Char.code 'a' + 10
172199 else if c >= 'A' && c <= 'F' then Char.code c - Char.code 'A' + 10
173173- else failwith "Invalid hex digit"
200200+ else Tomlt_error.raise_number Invalid_hex_digit
174201175175-(* Parse Unicode escape and convert to UTF-8 using uutf *)
176176-let unicode_to_utf8 codepoint =
202202+(* Convert Unicode codepoint to UTF-8 using uutf *)
203203+let codepoint_to_utf8 codepoint =
177204 if codepoint < 0 || codepoint > 0x10FFFF then
178205 failwith (Printf.sprintf "Invalid Unicode codepoint: U+%X" codepoint);
179206 if codepoint >= 0xD800 && codepoint <= 0xDFFF then
180180- failwith (Printf.sprintf "Surrogate codepoint not allowed: U+%X" codepoint);
207207+ failwith (Printf.sprintf "Surrogate codepoint not allowed: U+%04X" codepoint);
208208+ let buf = Buffer.create 4 in
209209+ Uutf.Buffer.add_utf_8 buf (Uchar.of_int codepoint);
210210+ Buffer.contents buf
211211+212212+(* Parse Unicode escape with error location from lexer *)
213213+let unicode_to_utf8 l codepoint =
214214+ if codepoint < 0 || codepoint > 0x10FFFF then
215215+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_codepoint codepoint);
216216+ if codepoint >= 0xD800 && codepoint <= 0xDFFF then
217217+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Surrogate_codepoint codepoint);
181218 let buf = Buffer.create 4 in
182219 Uutf.Buffer.add_utf_8 buf (Uchar.of_int codepoint);
183220 Buffer.contents buf
184221185222let parse_escape l =
186223 advance l; (* skip backslash *)
187187- if is_eof l then failwith "Unexpected end of input in escape sequence";
188188- let c = l.input.[l.pos] in
224224+ if is_eof l then
225225+ Tomlt_error.raise_lexer ~location:(lexer_loc l) Unexpected_eof;
226226+ let c = get_current l in
189227 advance l;
190228 match c with
191229 | 'b' -> "\b"
···198236 | '\\' -> "\\"
199237 | 'x' ->
200238 (* \xHH - 2 hex digits *)
201201- if l.pos + 1 >= String.length l.input then
202202- failwith "Incomplete \\x escape sequence";
203203- let c1 = l.input.[l.pos] in
204204- let c2 = l.input.[l.pos + 1] in
239239+ if l.pos + 1 >= l.input_len then
240240+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\x");
241241+ let c1 = get_char l l.pos in
242242+ let c2 = get_char l (l.pos + 1) in
205243 if not (is_hex_digit c1 && is_hex_digit c2) then
206206- failwith "Invalid \\x escape sequence";
244244+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\x");
207245 let cp = (hex_value c1 * 16) + hex_value c2 in
208246 advance l; advance l;
209209- unicode_to_utf8 cp
247247+ unicode_to_utf8 l cp
210248 | 'u' ->
211249 (* \uHHHH - 4 hex digits *)
212212- if l.pos + 3 >= String.length l.input then
213213- failwith "Incomplete \\u escape sequence";
214214- let s = String.sub l.input l.pos 4 in
250250+ if l.pos + 3 >= l.input_len then
251251+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\u");
252252+ let s = sub_string l l.pos 4 in
215253 for i = 0 to 3 do
216254 if not (is_hex_digit s.[i]) then
217217- failwith "Invalid \\u escape sequence"
255255+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\u")
218256 done;
219257 let cp = int_of_string ("0x" ^ s) in
220258 advance_n l 4;
221221- unicode_to_utf8 cp
259259+ unicode_to_utf8 l cp
222260 | 'U' ->
223261 (* \UHHHHHHHH - 8 hex digits *)
224224- if l.pos + 7 >= String.length l.input then
225225- failwith "Incomplete \\U escape sequence";
226226- let s = String.sub l.input l.pos 8 in
262262+ if l.pos + 7 >= l.input_len then
263263+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Incomplete_escape "\\U");
264264+ let s = sub_string l l.pos 8 in
227265 for i = 0 to 7 do
228266 if not (is_hex_digit s.[i]) then
229229- failwith "Invalid \\U escape sequence"
267267+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_unicode_escape "\\U")
230268 done;
231269 let cp = int_of_string ("0x" ^ s) in
232270 advance_n l 8;
233233- unicode_to_utf8 cp
234234- | _ -> failwith (Printf.sprintf "Invalid escape sequence: \\%c" c)
271271+ unicode_to_utf8 l cp
272272+ | _ ->
273273+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Invalid_escape c)
235274236275let validate_string_char l c is_multiline =
237276 let code = Char.code c in
238277 (* Control characters other than tab (and LF/CR for multiline) are not allowed *)
239278 if code < 0x09 then
240240- failwith (Printf.sprintf "Control character U+%04X not allowed in string at line %d" code l.line);
279279+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Control_character code);
241280 if code > 0x09 && code < 0x20 && not (is_multiline && (code = 0x0A || code = 0x0D)) then
242242- failwith (Printf.sprintf "Control character U+%04X not allowed in string at line %d" code l.line);
281281+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Control_character code);
243282 if code = 0x7F then
244244- failwith (Printf.sprintf "Control character U+007F not allowed in string at line %d" l.line)
283283+ Tomlt_error.raise_lexer ~location:(lexer_loc l) (Control_character code)
245284246285(* Validate UTF-8 in string context and add bytes to buffer *)
247286let validate_and_add_utf8_to_buffer l buf =
248248- let byte_len = validate_utf8_at_pos l.input l.pos l.line in
249249- Buffer.add_substring buf l.input l.pos byte_len;
287287+ let byte_len = validate_utf8_at_pos_bytes l in
288288+ Buffer.add_string buf (sub_string l l.pos byte_len);
250289 for _ = 1 to byte_len do advance l done
251290252291let parse_basic_string l =
···270309 let rec loop () =
271310 if is_eof l then
272311 failwith "Unterminated string";
273273- let c = l.input.[l.pos] in
312312+ let c = get_current l in
274313 if multiline then begin
275314 if c = '"' then begin
276315 (* Count consecutive quotes *)
277316 let quote_count = ref 0 in
278317 let p = ref l.pos in
279279- while !p < String.length l.input && l.input.[!p] = '"' do
318318+ while !p < l.input_len && get_char l !p = '"' do
280319 incr quote_count;
281320 incr p
282321 done;
···415454 let rec loop () =
416455 if is_eof l then
417456 failwith "Unterminated literal string";
418418- let c = l.input.[l.pos] in
457457+ let c = get_current l in
419458 if multiline then begin
420459 if c = '\'' then begin
421460 (* Count consecutive quotes *)
422461 let quote_count = ref 0 in
423462 let p = ref l.pos in
424424- while !p < String.length l.input && l.input.[!p] = '\'' do
463463+ while !p < l.input_len && get_char l !p = '\'' do
425464 incr quote_count;
426465 incr p
427466 done;
···502541 match peek_n l 3 with
503542 | Some "inf" ->
504543 advance_n l 3;
505505- let s = String.sub l.input start (l.pos - start) in
544544+ let s = sub_string l start (l.pos - start) in
506545 Tok_float ((if neg then Float.neg_infinity else Float.infinity), s)
507546 | Some "nan" ->
508547 advance_n l 3;
509509- let s = String.sub l.input start (l.pos - start) in
548548+ let s = sub_string l start (l.pos - start) in
510549 Tok_float (Float.nan, s)
511550 | _ ->
512551 (* Check for hex, octal, or binary *)
···530569 if first then failwith "Expected hex digit after 0x"
531570 in
532571 read_hex true;
533533- let s = String.sub l.input num_start (l.pos - num_start) in
572572+ let s = sub_string l num_start (l.pos - num_start) in
534573 let s = String.concat "" (String.split_on_char '_' s) in
535535- let orig = String.sub l.input start (l.pos - start) in
574574+ let orig = sub_string l start (l.pos - start) in
536575 Tok_integer (Int64.of_string ("0x" ^ s), orig)
537576 | Some '0', Some 'o' when not neg ->
538577 advance l; advance l;
···553592 if first then failwith "Expected octal digit after 0o"
554593 in
555594 read_oct true;
556556- let s = String.sub l.input num_start (l.pos - num_start) in
595595+ let s = sub_string l num_start (l.pos - num_start) in
557596 let s = String.concat "" (String.split_on_char '_' s) in
558558- let orig = String.sub l.input start (l.pos - start) in
597597+ let orig = sub_string l start (l.pos - start) in
559598 Tok_integer (Int64.of_string ("0o" ^ s), orig)
560599 | Some '0', Some 'b' when not neg ->
561600 advance l; advance l;
···576615 if first then failwith "Expected binary digit after 0b"
577616 in
578617 read_bin true;
579579- let s = String.sub l.input num_start (l.pos - num_start) in
618618+ let s = sub_string l num_start (l.pos - num_start) in
580619 let s = String.concat "" (String.split_on_char '_' s) in
581581- let orig = String.sub l.input start (l.pos - start) in
620620+ let orig = sub_string l start (l.pos - start) in
582621 Tok_integer (Int64.of_string ("0b" ^ s), orig)
583622 | _ ->
584623 (* Regular decimal number *)
···630669 | _ -> ());
631670 read_int true
632671 | _ -> ());
633633- let s = String.sub l.input start (l.pos - start) in
672672+ let s = sub_string l start (l.pos - start) in
634673 let s' = String.concat "" (String.split_on_char '_' s) in
635674 if !is_float then
636675 Tok_float (float_of_string s', s)
···642681 (* YYYY-MM-DD or HH:MM - need to ensure it's not a bare key that starts with numbers *)
643682 let check_datetime () =
644683 let pos = l.pos in
645645- let len = String.length l.input in
684684+ let len = l.input_len in
646685 (* Check for YYYY-MM-DD pattern - must have exactly this structure *)
647686 if pos + 10 <= len then begin
648648- let c0 = l.input.[pos] in
649649- let c1 = l.input.[pos + 1] in
650650- let c2 = l.input.[pos + 2] in
651651- let c3 = l.input.[pos + 3] in
652652- let c4 = l.input.[pos + 4] in
653653- let c5 = l.input.[pos + 5] in
654654- let c6 = l.input.[pos + 6] in
655655- let c7 = l.input.[pos + 7] in
656656- let c8 = l.input.[pos + 8] in
657657- let c9 = l.input.[pos + 9] in
687687+ let c0 = get_char l pos in
688688+ let c1 = get_char l (pos + 1) in
689689+ let c2 = get_char l (pos + 2) in
690690+ let c3 = get_char l (pos + 3) in
691691+ let c4 = get_char l (pos + 4) in
692692+ let c5 = get_char l (pos + 5) in
693693+ let c6 = get_char l (pos + 6) in
694694+ let c7 = get_char l (pos + 7) in
695695+ let c8 = get_char l (pos + 8) in
696696+ let c9 = get_char l (pos + 9) in
658697 (* Must match YYYY-MM-DD pattern AND not be followed by bare key chars (except T or space for time) *)
659698 if is_digit c0 && is_digit c1 && is_digit c2 && is_digit c3 && c4 = '-' &&
660699 is_digit c5 && is_digit c6 && c7 = '-' && is_digit c8 && is_digit c9 then begin
661700 (* Check what follows - if it's a bare key char other than T/t/space, it's not a date *)
662701 if pos + 10 < len then begin
663663- let next = l.input.[pos + 10] in
702702+ let next = get_char l (pos + 10) in
664703 if next = 'T' || next = 't' then
665704 `Date (* Datetime continues with time part *)
666705 else if next = ' ' || next = '\t' then begin
667706 (* Check if followed by = (key context) or time part *)
668707 let rec skip_ws p =
669708 if p >= len then p
670670- else match l.input.[p] with
709709+ else match get_char l p with
671710 | ' ' | '\t' -> skip_ws (p + 1)
672711 | _ -> p
673712 in
674713 let after_ws = skip_ws (pos + 11) in
675675- if after_ws < len && l.input.[after_ws] = '=' then
714714+ if after_ws < len && get_char l after_ws = '=' then
676715 `Other (* It's a key followed by = *)
677677- else if after_ws < len && is_digit l.input.[after_ws] then
716716+ else if after_ws < len && is_digit (get_char l after_ws) then
678717 `Date (* Could be "2001-02-03 12:34:56" format *)
679718 else
680719 `Date
···693732 else
694733 `Other
695734 end else if pos + 5 <= len then begin
696696- let c0 = l.input.[pos] in
697697- let c1 = l.input.[pos + 1] in
698698- let c2 = l.input.[pos + 2] in
699699- let c3 = l.input.[pos + 3] in
700700- let c4 = l.input.[pos + 4] in
735735+ let c0 = get_char l pos in
736736+ let c1 = get_char l (pos + 1) in
737737+ let c2 = get_char l (pos + 2) in
738738+ let c3 = get_char l (pos + 3) in
739739+ let c4 = get_char l (pos + 4) in
701740 if is_digit c0 && is_digit c1 && c2 = ':' && is_digit c3 && is_digit c4 then
702741 `Time
703742 else
···920959 skip_ws_and_comments l;
921960 if is_eof l then Tok_eof
922961 else begin
923923- let c = l.input.[l.pos] in
962962+ let c = get_current l in
924963 match c with
925964 | '[' -> advance l; Tok_lbracket
926965 | ']' -> advance l; Tok_rbracket
···953992 (* A key like -01 should be followed by whitespace then =, not by . or e (number syntax) *)
954993 let is_key_context =
955994 let rec scan_ahead p =
956956- if p >= String.length l.input then false
995995+ if p >= l.input_len then false
957996 else
958958- let c = l.input.[p] in
997997+ let c = get_char l p in
959998 if is_digit c || c = '_' then scan_ahead (p + 1)
960999 else if c = ' ' || c = '\t' then
9611000 (* Skip whitespace and check for = *)
9621001 let rec skip_ws pp =
963963- if pp >= String.length l.input then false
964964- else match l.input.[pp] with
10021002+ if pp >= l.input_len then false
10031003+ else match get_char l pp with
9651004 | ' ' | '\t' -> skip_ws (pp + 1)
9661005 | '=' -> true
9671006 | _ -> false
···9701009 else if c = '=' then true
9711010 else if c = '.' then
9721011 (* Check if . is followed by digit (number) vs letter/underscore (dotted key) *)
973973- if p + 1 < String.length l.input then
974974- let next = l.input.[p + 1] in
10121012+ if p + 1 < l.input_len then
10131013+ let next = get_char l (p + 1) in
9751014 if is_digit next then false (* It's a decimal number like -3.14 *)
9761015 else if is_bare_key_char next then true (* Dotted key *)
9771016 else false
···9861025 in
9871026 if is_key_context then begin
9881027 (* Treat as bare key *)
989989- while not (is_eof l) && is_bare_key_char l.input.[l.pos] do
10281028+ while not (is_eof l) && is_bare_key_char (get_current l) do
9901029 advance l
9911030 done;
992992- Tok_bare_key (String.sub l.input start (l.pos - start))
10311031+ Tok_bare_key (sub_string l start (l.pos - start))
9931032 end else
9941033 parse_number l
9951034 | Some 'i' ->
9961035 (* Check for inf *)
997997- if l.pos + 3 < String.length l.input &&
998998- l.input.[l.pos + 1] = 'i' && l.input.[l.pos + 2] = 'n' && l.input.[l.pos + 3] = 'f' then begin
10361036+ if l.pos + 3 < l.input_len &&
10371037+ get_char l (l.pos + 1) = 'i' && get_char l (l.pos + 2) = 'n' && get_char l (l.pos + 3) = 'f' then begin
9991038 advance_n l 4;
10001000- let s = String.sub l.input start (l.pos - start) in
10391039+ let s = sub_string l start (l.pos - start) in
10011040 if sign = '-' then Tok_float (Float.neg_infinity, s)
10021041 else Tok_float (Float.infinity, s)
10031042 end else if sign = '-' then begin
10041043 (* Could be bare key like -inf-key *)
10051005- while not (is_eof l) && is_bare_key_char l.input.[l.pos] do
10441044+ while not (is_eof l) && is_bare_key_char (get_current l) do
10061045 advance l
10071046 done;
10081008- Tok_bare_key (String.sub l.input start (l.pos - start))
10471047+ Tok_bare_key (sub_string l start (l.pos - start))
10091048 end else
10101049 failwith (Printf.sprintf "Unexpected character after %c" sign)
10111050 | Some 'n' ->
10121051 (* Check for nan *)
10131013- if l.pos + 3 < String.length l.input &&
10141014- l.input.[l.pos + 1] = 'n' && l.input.[l.pos + 2] = 'a' && l.input.[l.pos + 3] = 'n' then begin
10521052+ if l.pos + 3 < l.input_len &&
10531053+ get_char l (l.pos + 1) = 'n' && get_char l (l.pos + 2) = 'a' && get_char l (l.pos + 3) = 'n' then begin
10151054 advance_n l 4;
10161016- let s = String.sub l.input start (l.pos - start) in
10551055+ let s = sub_string l start (l.pos - start) in
10171056 Tok_float (Float.nan, s) (* Sign on NaN doesn't change the value *)
10181057 end else if sign = '-' then begin
10191058 (* Could be bare key like -name *)
10201020- while not (is_eof l) && is_bare_key_char l.input.[l.pos] do
10591059+ while not (is_eof l) && is_bare_key_char (get_current l) do
10211060 advance l
10221061 done;
10231023- Tok_bare_key (String.sub l.input start (l.pos - start))
10621062+ Tok_bare_key (sub_string l start (l.pos - start))
10241063 end else
10251064 failwith (Printf.sprintf "Unexpected character after %c" sign)
10261065 | _ when sign = '-' ->
10271066 (* Bare key starting with - like -key or --- *)
10281028- while not (is_eof l) && is_bare_key_char l.input.[l.pos] do
10671067+ while not (is_eof l) && is_bare_key_char (get_current l) do
10291068 advance l
10301069 done;
10311031- Tok_bare_key (String.sub l.input start (l.pos - start))
10701070+ Tok_bare_key (sub_string l start (l.pos - start))
10321071 | _ -> failwith (Printf.sprintf "Unexpected character after %c" sign))
10331072 | c when is_digit c ->
10341073 (* Could be number, datetime, or bare key starting with digits *)
···10391078 (* Check for hex/octal/binary prefix first - these are always numbers *)
10401079 let start = l.pos in
10411080 let is_prefixed_number =
10421042- start + 1 < String.length l.input && l.input.[start] = '0' &&
10431043- (let c1 = l.input.[start + 1] in
10811081+ start + 1 < l.input_len && get_char l start = '0' &&
10821082+ (let c1 = get_char l (start + 1) in
10441083 c1 = 'x' || c1 = 'X' || c1 = 'o' || c1 = 'O' || c1 = 'b' || c1 = 'B')
10451084 in
10461085 if is_prefixed_number then
···10501089 - Contains letters (like "123abc")
10511090 - Has leading zeros (like "0123") which would be invalid as a number *)
10521091 let has_leading_zero =
10531053- l.input.[start] = '0' && start + 1 < String.length l.input &&
10541054- let c1 = l.input.[start + 1] in
10921092+ get_char l start = '0' && start + 1 < l.input_len &&
10931093+ let c1 = get_char l (start + 1) in
10551094 is_digit c1
10561095 in
10571096 (* Scan to see if this is a bare key or a number
10581097 - If it looks like scientific notation (digits + e/E + optional sign + digits), it's a number
10591098 - If it contains letters OR dashes between digits, it's a bare key *)
10601099 let rec scan_for_bare_key pos has_dash_between_digits =
10611061- if pos >= String.length l.input then has_dash_between_digits
11001100+ if pos >= l.input_len then has_dash_between_digits
10621101 else
10631063- let c = l.input.[pos] in
11021102+ let c = get_char l pos in
10641103 if is_digit c || c = '_' then scan_for_bare_key (pos + 1) has_dash_between_digits
10651104 else if c = '.' then scan_for_bare_key (pos + 1) has_dash_between_digits
10661105 else if c = '-' then
10671106 (* Dash in key - check what follows *)
10681107 let next_pos = pos + 1 in
10691069- if next_pos < String.length l.input then
10701070- let next = l.input.[next_pos] in
11081108+ if next_pos < l.input_len then
11091109+ let next = get_char l next_pos in
10711110 if is_digit next then
10721111 scan_for_bare_key (next_pos) true (* Dash between digits - bare key *)
10731112 else if is_bare_key_char next then
···10791118 else if c = 'e' || c = 'E' then
10801119 (* Check if this looks like scientific notation *)
10811120 let next_pos = pos + 1 in
10821082- if next_pos >= String.length l.input then true (* Just 'e' at end, bare key *)
11211121+ if next_pos >= l.input_len then true (* Just 'e' at end, bare key *)
10831122 else
10841084- let next = l.input.[next_pos] in
11231123+ let next = get_char l next_pos in
10851124 if next = '+' || next = '-' then
10861125 (* Has exponent sign - check if followed by digit *)
10871126 let after_sign = next_pos + 1 in
10881088- if after_sign < String.length l.input && is_digit l.input.[after_sign] then
11271127+ if after_sign < l.input_len && is_digit (get_char l after_sign) then
10891128 has_dash_between_digits (* Scientific notation, but might have dash earlier *)
10901129 else
10911130 true (* e.g., "3e-abc" - bare key *)
···11001139 in
11011140 if has_leading_zero || scan_for_bare_key start false then begin
11021141 (* It's a bare key *)
11031103- while not (is_eof l) && is_bare_key_char l.input.[l.pos] do
11421142+ while not (is_eof l) && is_bare_key_char (get_current l) do
11041143 advance l
11051144 done;
11061106- Tok_bare_key (String.sub l.input start (l.pos - start))
11451145+ Tok_bare_key (sub_string l start (l.pos - start))
11071146 end else
11081147 (* It's a number - use parse_number *)
11091148 parse_number l
···11121151 (* These could be keywords (true, false, inf, nan) or bare keys
11131152 Always read as bare key and let parser interpret *)
11141153 let start = l.pos in
11151115- while not (is_eof l) && is_bare_key_char l.input.[l.pos] do
11541154+ while not (is_eof l) && is_bare_key_char (get_current l) do
11161155 advance l
11171156 done;
11181118- Tok_bare_key (String.sub l.input start (l.pos - start))
11571157+ Tok_bare_key (sub_string l start (l.pos - start))
11191158 | c when is_bare_key_char c ->
11201159 let start = l.pos in
11211121- while not (is_eof l) && is_bare_key_char l.input.[l.pos] do
11601160+ while not (is_eof l) && is_bare_key_char (get_current l) do
11221161 advance l
11231162 done;
11241124- Tok_bare_key (String.sub l.input start (l.pos - start))
11631163+ Tok_bare_key (sub_string l start (l.pos - start))
11251164 | c ->
11261165 let code = Char.code c in
11271166 if code < 0x20 || code = 0x7F then
···1155119411561195(* Check if next raw character (without skipping whitespace) matches *)
11571196let next_raw_char_is p c =
11581158- p.lexer.pos < String.length p.lexer.input && p.lexer.input.[p.lexer.pos] = c
11971197+ p.lexer.pos < p.lexer.input_len && get_char p.lexer p.lexer.pos = c
1159119811601199let expect_token p expected =
11611200 let tok = consume_token p in
···1223126212241263let rec parse_value p =
12251264 match peek_token p with
12261226- | Tok_basic_string s -> ignore (consume_token p); Toml_string s
12271227- | Tok_literal_string s -> ignore (consume_token p); Toml_string s
12281228- | Tok_ml_basic_string s -> ignore (consume_token p); Toml_string s
12291229- | Tok_ml_literal_string s -> ignore (consume_token p); Toml_string s
12301230- | Tok_integer (i, _) -> ignore (consume_token p); Toml_int i
12311231- | Tok_float (f, _) -> ignore (consume_token p); Toml_float f
12321232- | Tok_datetime s -> ignore (consume_token p); Toml_datetime s
12331233- | Tok_datetime_local s -> ignore (consume_token p); Toml_datetime_local s
12341234- | Tok_date_local s -> ignore (consume_token p); Toml_date_local s
12351235- | Tok_time_local s -> ignore (consume_token p); Toml_time_local s
12651265+ | Tok_basic_string s -> ignore (consume_token p); String s
12661266+ | Tok_literal_string s -> ignore (consume_token p); String s
12671267+ | Tok_ml_basic_string s -> ignore (consume_token p); String s
12681268+ | Tok_ml_literal_string s -> ignore (consume_token p); String s
12691269+ | Tok_integer (i, _) -> ignore (consume_token p); Int i
12701270+ | Tok_float (f, _) -> ignore (consume_token p); Float f
12711271+ | Tok_datetime s -> ignore (consume_token p); Datetime s
12721272+ | Tok_datetime_local s -> ignore (consume_token p); Datetime_local s
12731273+ | Tok_date_local s -> ignore (consume_token p); Date_local s
12741274+ | Tok_time_local s -> ignore (consume_token p); Time_local s
12361275 | Tok_lbracket -> parse_array p
12371276 | Tok_lbrace -> parse_inline_table p
12381277 | Tok_bare_key s ->
12391278 (* Interpret bare keys as boolean, float keywords, or numbers in value context *)
12401279 ignore (consume_token p);
12411280 (match s with
12421242- | "true" -> Toml_bool true
12431243- | "false" -> Toml_bool false
12441244- | "inf" -> Toml_float Float.infinity
12451245- | "nan" -> Toml_float Float.nan
12811281+ | "true" -> Bool true
12821282+ | "false" -> Bool false
12831283+ | "inf" -> Float Float.infinity
12841284+ | "nan" -> Float Float.nan
12461285 | _ ->
12471286 (* Validate underscore placement in the original string *)
12481287 let validate_underscores str =
···12861325 if String.contains s_no_underscore '.' ||
12871326 String.contains s_no_underscore 'e' ||
12881327 String.contains s_no_underscore 'E' then
12891289- Toml_float (float_of_string s_no_underscore)
13281328+ Float (float_of_string s_no_underscore)
12901329 else
12911291- Toml_int (Int64.of_string s_no_underscore)
13301330+ Int (Int64.of_string s_no_underscore)
12921331 with _ ->
12931332 failwith (Printf.sprintf "Unexpected bare key '%s' as value" s)
12941333 end else
···13041343 match peek_token p with
13051344 | Tok_rbracket ->
13061345 ignore (consume_token p);
13071307- Toml_array (List.rev acc)
13461346+ Array (List.rev acc)
13081347 | _ ->
13091348 let v = parse_value p in
13101349 skip_newlines p;
···13151354 loop (v :: acc)
13161355 | Tok_rbracket ->
13171356 ignore (consume_token p);
13181318- Toml_array (List.rev (v :: acc))
13571357+ Array (List.rev (v :: acc))
13191358 | _ -> failwith "Expected ',' or ']' in array"
13201359 in
13211360 loop []
···13291368 match peek_token p with
13301369 | Tok_rbrace ->
13311370 ignore (consume_token p);
13321332- Toml_table (List.rev acc)
13711371+ Table (List.rev acc)
13331372 | _ ->
13341373 let keys = parse_dotted_key p in
13351374 skip_ws p;
···13611400 loop acc
13621401 | Tok_rbrace ->
13631402 ignore (consume_token p);
13641364- Toml_table (List.rev acc)
14031403+ Table (List.rev acc)
13651404 | _ -> failwith "Expected ',' or '}' in inline table"
13661405 in
13671406 loop []
···13751414 | [] -> failwith "Empty key"
13761415 | [k] -> (k, value)
13771416 | k :: rest ->
13781378- (k, Toml_table [build_nested_table rest value])
14171417+ (k, Table [build_nested_table rest value])
1379141813801419(* Merge two TOML values - used for combining dotted keys in inline tables *)
13811420and merge_toml_values v1 v2 =
13821421 match v1, v2 with
13831383- | Toml_table entries1, Toml_table entries2 ->
14221422+ | Table entries1, Table entries2 ->
13841423 (* Merge the entries *)
13851424 let merged = List.fold_left (fun acc (k, v) ->
13861425 match List.assoc_opt k acc with
···13911430 | None ->
13921431 (k, v) :: acc
13931432 ) entries1 entries2 in
13941394- Toml_table (List.rev merged)
14331433+ Table (List.rev merged)
13951434 | _, _ ->
13961435 (* Can't merge non-table values with same key *)
13971436 failwith "Conflicting keys in inline table"
···1448148714491488(* Table management for the parser *)
14501489type table_state = {
14511451- mutable values : (string * toml_value) list;
14901490+ mutable values : (string * t) list;
14521491 subtables : (string, table_state) Hashtbl.t;
14531492 mutable is_array : bool;
14541493 mutable is_inline : bool;
···15501589 let subtable_values = Hashtbl.fold (fun k sub acc ->
15511590 let v =
15521591 if sub.is_array then
15531553- Toml_array (List.map table_state_to_toml (get_array_elements sub))
15921592+ Array (List.map table_state_to_toml (get_array_elements sub))
15541593 else
15551594 table_state_to_toml sub
15561595 in
15571596 (k, v) :: acc
15581597 ) state.subtables [] in
15591559- Toml_table (List.rev state.values @ subtable_values)
15981598+ Table (List.rev state.values @ subtable_values)
1560159915611600and get_array_elements state =
15621601 List.rev state.array_elements
1563160215641603(* Main parser function *)
15651565-let parse_toml input =
15661566- let lexer = make_lexer input in
16041604+let parse_toml_from_lexer lexer =
15671605 let parser = make_parser lexer in
15681606 let root = create_table_state () in
15691607 let current_table = ref root in
···17861824 parse_document ();
17871825 table_state_to_toml root
1788182618271827+(* Parse TOML from string - creates lexer internally *)
18281828+let parse_toml input =
18291829+ let lexer = make_lexer input in
18301830+ parse_toml_from_lexer lexer
18311831+18321832+(* Parse TOML directly from Bytes.Reader - no intermediate string *)
18331833+let parse_toml_from_reader ?file r =
18341834+ let lexer = make_lexer_from_reader ?file r in
18351835+ parse_toml_from_lexer lexer
18361836+17891837(* Convert TOML to tagged JSON for toml-test compatibility *)
17901838let rec toml_to_tagged_json value =
17911839 match value with
17921792- | Toml_string s ->
18401840+ | String s ->
17931841 Printf.sprintf "{\"type\":\"string\",\"value\":%s}" (json_encode_string s)
17941794- | Toml_int i ->
18421842+ | Int i ->
17951843 Printf.sprintf "{\"type\":\"integer\",\"value\":\"%Ld\"}" i
17961796- | Toml_float f ->
18441844+ | Float f ->
17971845 let value_str =
17981846 (* Normalize exponent format - lowercase e, keep + for positive exponents *)
17991847 let format_exp s =
···19091957 try_precision 1
19101958 in
19111959 Printf.sprintf "{\"type\":\"float\",\"value\":\"%s\"}" value_str
19121912- | Toml_bool b ->
19601960+ | Bool b ->
19131961 Printf.sprintf "{\"type\":\"bool\",\"value\":\"%s\"}" (if b then "true" else "false")
19141914- | Toml_datetime s ->
19621962+ | Datetime s ->
19151963 validate_datetime_string s;
19161964 Printf.sprintf "{\"type\":\"datetime\",\"value\":\"%s\"}" s
19171917- | Toml_datetime_local s ->
19651965+ | Datetime_local s ->
19181966 validate_datetime_string s;
19191967 Printf.sprintf "{\"type\":\"datetime-local\",\"value\":\"%s\"}" s
19201920- | Toml_date_local s ->
19681968+ | Date_local s ->
19211969 validate_date_string s;
19221970 Printf.sprintf "{\"type\":\"date-local\",\"value\":\"%s\"}" s
19231923- | Toml_time_local s ->
19711971+ | Time_local s ->
19241972 validate_time_string s;
19251973 Printf.sprintf "{\"type\":\"time-local\",\"value\":\"%s\"}" s
19261926- | Toml_array items ->
19741974+ | Array items ->
19271975 let json_items = List.map toml_to_tagged_json items in
19281976 Printf.sprintf "[%s]" (String.concat "," json_items)
19291929- | Toml_table pairs ->
19771977+ | Table pairs ->
19301978 let json_pairs = List.map (fun (k, v) ->
19311979 Printf.sprintf "%s:%s" (json_encode_string k) (toml_to_tagged_json v)
19321980 ) pairs in
···19511999 Buffer.add_char buf '"';
19522000 Buffer.contents buf
1953200119541954-(* Main decode function *)
19551955-let decode_string input =
19561956- try
19571957- let toml = parse_toml input in
19581958- Ok toml
19591959- with
19601960- | Failure msg -> Error msg
19611961- | e -> Error (Printexc.to_string e)
19621962-19632002(* Tagged JSON to TOML for encoder *)
19642003let decode_tagged_json_string s =
19652004 (* Simple JSON parser for tagged format *)
···20062045 if !pos + 3 >= len then failwith "Invalid unicode escape";
20072046 let hex = String.sub s !pos 4 in
20082047 let cp = int_of_string ("0x" ^ hex) in
20092009- Buffer.add_string buf (unicode_to_utf8 cp);
20482048+ Buffer.add_string buf (codepoint_to_utf8 cp);
20102049 pos := !pos + 4
20112050 | c -> failwith (Printf.sprintf "Invalid escape: \\%c" c)
20122051 end else begin
···20212060 (* Convert a tagged JSON object to a TOML primitive if applicable *)
20222061 let convert_tagged_value value =
20232062 match value with
20242024- | Toml_table [("type", Toml_string typ); ("value", Toml_string v)]
20252025- | Toml_table [("value", Toml_string v); ("type", Toml_string typ)] ->
20632063+ | Table [("type", String typ); ("value", String v)]
20642064+ | Table [("value", String v); ("type", String typ)] ->
20262065 (match typ with
20272027- | "string" -> Toml_string v
20282028- | "integer" -> Toml_int (Int64.of_string v)
20662066+ | "string" -> String v
20672067+ | "integer" -> Int (Int64.of_string v)
20292068 | "float" ->
20302069 (match v with
20312031- | "inf" -> Toml_float Float.infinity
20322032- | "-inf" -> Toml_float Float.neg_infinity
20332033- | "nan" -> Toml_float Float.nan
20342034- | _ -> Toml_float (float_of_string v))
20352035- | "bool" -> Toml_bool (v = "true")
20362036- | "datetime" -> Toml_datetime v
20372037- | "datetime-local" -> Toml_datetime_local v
20382038- | "date-local" -> Toml_date_local v
20392039- | "time-local" -> Toml_time_local v
20702070+ | "inf" -> Float Float.infinity
20712071+ | "-inf" -> Float Float.neg_infinity
20722072+ | "nan" -> Float Float.nan
20732073+ | _ -> Float (float_of_string v))
20742074+ | "bool" -> Bool (v = "true")
20752075+ | "datetime" -> Datetime v
20762076+ | "datetime-local" -> Datetime_local v
20772077+ | "date-local" -> Date_local v
20782078+ | "time-local" -> Time_local v
20402079 | _ -> failwith (Printf.sprintf "Unknown type: %s" typ))
20412080 | _ -> value
20422081 in
···20462085 match peek () with
20472086 | Some '{' -> parse_object ()
20482087 | Some '[' -> parse_array ()
20492049- | Some '"' -> Toml_string (parse_json_string ())
20882088+ | Some '"' -> String (parse_json_string ())
20502089 | _ -> failwith "Expected value"
2051209020522091 and parse_object () =
···20542093 skip_ws ();
20552094 if peek () = Some '}' then begin
20562095 incr pos;
20572057- Toml_table []
20962096+ Table []
20582097 end else begin
20592098 let pairs = ref [] in
20602099 let first = ref true in
···20682107 pairs := (key, convert_tagged_value value) :: !pairs
20692108 done;
20702109 expect '}';
20712071- Toml_table (List.rev !pairs)
21102110+ Table (List.rev !pairs)
20722111 end
2073211220742113 and parse_array () =
···20762115 skip_ws ();
20772116 if peek () = Some ']' then begin
20782117 incr pos;
20792079- Toml_array []
21182118+ Array []
20802119 end else begin
20812120 let items = ref [] in
20822121 let first = ref true in
···20862125 items := convert_tagged_value (parse_value ()) :: !items
20872126 done;
20882127 expect ']';
20892089- Toml_array (List.rev !items)
21282128+ Array (List.rev !items)
20902129 end
20912130 in
2092213120932132 parse_value ()
2094213320952095-(* Encode TOML value to TOML string *)
20962096-let rec encode_toml_value ?(inline=false) value =
20972097- match value with
20982098- | Toml_string s -> encode_toml_string s
20992099- | Toml_int i -> Int64.to_string i
21002100- | Toml_float f ->
21012101- if Float.is_nan f then "nan"
21022102- else if f = Float.infinity then "inf"
21032103- else if f = Float.neg_infinity then "-inf"
21042104- else
21052105- let s = Printf.sprintf "%.17g" f in
21062106- (* Ensure it looks like a float *)
21072107- if String.contains s '.' || String.contains s 'e' || String.contains s 'E' then s
21082108- else s ^ ".0"
21092109- | Toml_bool b -> if b then "true" else "false"
21102110- | Toml_datetime s -> s
21112111- | Toml_datetime_local s -> s
21122112- | Toml_date_local s -> s
21132113- | Toml_time_local s -> s
21142114- | Toml_array items ->
21152115- let encoded = List.map (encode_toml_value ~inline:true) items in
21162116- Printf.sprintf "[%s]" (String.concat ", " encoded)
21172117- | Toml_table pairs when inline ->
21182118- let encoded = List.map (fun (k, v) ->
21192119- Printf.sprintf "%s = %s" (encode_toml_key k) (encode_toml_value ~inline:true v)
21202120- ) pairs in
21212121- Printf.sprintf "{%s}" (String.concat ", " encoded)
21222122- | Toml_table _ -> failwith "Cannot encode table inline without inline flag"
21342134+(* Streaming TOML encoder - writes directly to a Bytes.Writer *)
2123213521242124-and encode_toml_string s =
21362136+let rec write_toml_string w s =
21252137 (* Check if we need to escape *)
21262138 let needs_escape = String.exists (fun c ->
21272139 let code = Char.code c in
···21292141 code < 0x20 || code = 0x7F
21302142 ) s in
21312143 if needs_escape then begin
21322132- let buf = Buffer.create (String.length s + 2) in
21332133- Buffer.add_char buf '"';
21442144+ Bytes.Writer.write_string w "\"";
21342145 String.iter (fun c ->
21352146 match c with
21362136- | '"' -> Buffer.add_string buf "\\\""
21372137- | '\\' -> Buffer.add_string buf "\\\\"
21382138- | '\n' -> Buffer.add_string buf "\\n"
21392139- | '\r' -> Buffer.add_string buf "\\r"
21402140- | '\t' -> Buffer.add_string buf "\\t"
21412141- | '\b' -> Buffer.add_string buf "\\b"
21422142- | c when Char.code c = 0x0C -> Buffer.add_string buf "\\f"
21472147+ | '"' -> Bytes.Writer.write_string w "\\\""
21482148+ | '\\' -> Bytes.Writer.write_string w "\\\\"
21492149+ | '\n' -> Bytes.Writer.write_string w "\\n"
21502150+ | '\r' -> Bytes.Writer.write_string w "\\r"
21512151+ | '\t' -> Bytes.Writer.write_string w "\\t"
21522152+ | '\b' -> Bytes.Writer.write_string w "\\b"
21532153+ | c when Char.code c = 0x0C -> Bytes.Writer.write_string w "\\f"
21432154 | c when Char.code c < 0x20 || Char.code c = 0x7F ->
21442144- Buffer.add_string buf (Printf.sprintf "\\u%04X" (Char.code c))
21452145- | c -> Buffer.add_char buf c
21552155+ Bytes.Writer.write_string w (Printf.sprintf "\\u%04X" (Char.code c))
21562156+ | c ->
21572157+ let b = Bytes.create 1 in
21582158+ Bytes.set b 0 c;
21592159+ Bytes.Writer.write_bytes w b
21462160 ) s;
21472147- Buffer.add_char buf '"';
21482148- Buffer.contents buf
21492149- end else
21502150- Printf.sprintf "\"%s\"" s
21612161+ Bytes.Writer.write_string w "\""
21622162+ end else begin
21632163+ Bytes.Writer.write_string w "\"";
21642164+ Bytes.Writer.write_string w s;
21652165+ Bytes.Writer.write_string w "\""
21662166+ end
2151216721522152-and encode_toml_key k =
21682168+and write_toml_key w k =
21532169 (* Check if it can be a bare key *)
21542170 let is_bare = String.length k > 0 && String.for_all is_bare_key_char k in
21552155- if is_bare then k else encode_toml_string k
21712171+ if is_bare then Bytes.Writer.write_string w k
21722172+ else write_toml_string w k
2156217321572157-(* Streaming TOML encoder - writes directly to a buffer *)
21582158-let encode_toml_to_buffer buf value =
21742174+and write_toml_value w ?(inline=false) value =
21752175+ match value with
21762176+ | String s -> write_toml_string w s
21772177+ | Int i -> Bytes.Writer.write_string w (Int64.to_string i)
21782178+ | Float f ->
21792179+ if Float.is_nan f then Bytes.Writer.write_string w "nan"
21802180+ else if f = Float.infinity then Bytes.Writer.write_string w "inf"
21812181+ else if f = Float.neg_infinity then Bytes.Writer.write_string w "-inf"
21822182+ else begin
21832183+ let s = Printf.sprintf "%.17g" f in
21842184+ (* Ensure it looks like a float *)
21852185+ let s = if String.contains s '.' || String.contains s 'e' || String.contains s 'E'
21862186+ then s else s ^ ".0" in
21872187+ Bytes.Writer.write_string w s
21882188+ end
21892189+ | Bool b -> Bytes.Writer.write_string w (if b then "true" else "false")
21902190+ | Datetime s -> Bytes.Writer.write_string w s
21912191+ | Datetime_local s -> Bytes.Writer.write_string w s
21922192+ | Date_local s -> Bytes.Writer.write_string w s
21932193+ | Time_local s -> Bytes.Writer.write_string w s
21942194+ | Array items ->
21952195+ Bytes.Writer.write_string w "[";
21962196+ List.iteri (fun i item ->
21972197+ if i > 0 then Bytes.Writer.write_string w ", ";
21982198+ write_toml_value w ~inline:true item
21992199+ ) items;
22002200+ Bytes.Writer.write_string w "]"
22012201+ | Table pairs when inline ->
22022202+ Bytes.Writer.write_string w "{";
22032203+ List.iteri (fun i (k, v) ->
22042204+ if i > 0 then Bytes.Writer.write_string w ", ";
22052205+ write_toml_key w k;
22062206+ Bytes.Writer.write_string w " = ";
22072207+ write_toml_value w ~inline:true v
22082208+ ) pairs;
22092209+ Bytes.Writer.write_string w "}"
22102210+ | Table _ -> failwith "Cannot encode table inline without inline flag"
22112211+22122212+(* True streaming TOML encoder - writes directly to Bytes.Writer *)
22132213+let encode_to_writer w value =
21592214 let has_content = ref false in
2160221522162216+ let write_path path =
22172217+ List.iteri (fun i k ->
22182218+ if i > 0 then Bytes.Writer.write_string w ".";
22192219+ write_toml_key w k
22202220+ ) path
22212221+ in
22222222+21612223 let rec encode_at_path path value =
21622224 match value with
21632163- | Toml_table pairs ->
22252225+ | Table pairs ->
21642226 (* Separate simple values from nested tables *)
21652227 (* Only PURE table arrays (all items are tables) use [[array]] syntax.
21662228 Mixed arrays (primitives + tables) must be encoded inline. *)
21672229 let is_pure_table_array items =
21682168- items <> [] && List.for_all (function Toml_table _ -> true | _ -> false) items
22302230+ items <> [] && List.for_all (function Table _ -> true | _ -> false) items
21692231 in
21702232 let simple, nested = List.partition (fun (_, v) ->
21712233 match v with
21722172- | Toml_table _ -> false
21732173- | Toml_array items -> not (is_pure_table_array items)
22342234+ | Table _ -> false
22352235+ | Array items -> not (is_pure_table_array items)
21742236 | _ -> true
21752237 ) pairs in
2176223821772239 (* Emit simple values first *)
21782240 List.iter (fun (k, v) ->
21792179- Buffer.add_string buf (encode_toml_key k);
21802180- Buffer.add_string buf " = ";
21812181- Buffer.add_string buf (encode_toml_value ~inline:true v);
21822182- Buffer.add_char buf '\n';
22412241+ write_toml_key w k;
22422242+ Bytes.Writer.write_string w " = ";
22432243+ write_toml_value w ~inline:true v;
22442244+ Bytes.Writer.write_string w "\n";
21832245 has_content := true
21842246 ) simple;
21852247···21872249 List.iter (fun (k, v) ->
21882250 let new_path = path @ [k] in
21892251 match v with
21902190- | Toml_table _ ->
21912191- if !has_content then Buffer.add_char buf '\n';
21922192- Buffer.add_char buf '[';
21932193- Buffer.add_string buf (String.concat "." (List.map encode_toml_key new_path));
21942194- Buffer.add_string buf "]\n";
22522252+ | Table _ ->
22532253+ if !has_content then Bytes.Writer.write_string w "\n";
22542254+ Bytes.Writer.write_string w "[";
22552255+ write_path new_path;
22562256+ Bytes.Writer.write_string w "]\n";
21952257 has_content := true;
21962258 encode_at_path new_path v
21972197- | Toml_array items when items <> [] && List.for_all (function Toml_table _ -> true | _ -> false) items ->
22592259+ | Array items when items <> [] && List.for_all (function Table _ -> true | _ -> false) items ->
21982260 (* Pure table array - use [[array]] syntax *)
21992261 List.iter (fun item ->
22002262 match item with
22012201- | Toml_table _ ->
22022202- if !has_content then Buffer.add_char buf '\n';
22032203- Buffer.add_string buf "[[";
22042204- Buffer.add_string buf (String.concat "." (List.map encode_toml_key new_path));
22052205- Buffer.add_string buf "]]\n";
22632263+ | Table _ ->
22642264+ if !has_content then Bytes.Writer.write_string w "\n";
22652265+ Bytes.Writer.write_string w "[[";
22662266+ write_path new_path;
22672267+ Bytes.Writer.write_string w "]]\n";
22062268 has_content := true;
22072269 encode_at_path new_path item
22082270 | _ -> assert false (* Impossible - we checked for_all above *)
22092271 ) items
22102272 | _ ->
22112211- Buffer.add_string buf (encode_toml_key k);
22122212- Buffer.add_string buf " = ";
22132213- Buffer.add_string buf (encode_toml_value ~inline:true v);
22142214- Buffer.add_char buf '\n';
22732273+ write_toml_key w k;
22742274+ Bytes.Writer.write_string w " = ";
22752275+ write_toml_value w ~inline:true v;
22762276+ Bytes.Writer.write_string w "\n";
22152277 has_content := true
22162278 ) nested
22172279 | _ ->
···2220228222212283 encode_at_path [] value
2222228422232223-(* Full TOML encoder with proper table handling *)
22242224-let encode_toml value =
22852285+(* ============================================
22862286+ Public Interface - Constructors
22872287+ ============================================ *)
22882288+22892289+let string s = String s
22902290+let int i = Int i
22912291+let int_of_int i = Int (Int64.of_int i)
22922292+let float f = Float f
22932293+let bool b = Bool b
22942294+let array vs = Array vs
22952295+let table pairs = Table pairs
22962296+let datetime s = Datetime s
22972297+let datetime_local s = Datetime_local s
22982298+let date_local s = Date_local s
22992299+let time_local s = Time_local s
23002300+23012301+(* ============================================
23022302+ Public Interface - Accessors
23032303+ ============================================ *)
23042304+23052305+let to_string = function
23062306+ | String s -> s
23072307+ | _ -> invalid_arg "Tomlt.to_string: not a string"
23082308+23092309+let to_string_opt = function
23102310+ | String s -> Some s
23112311+ | _ -> None
23122312+23132313+let to_int = function
23142314+ | Int i -> i
23152315+ | _ -> invalid_arg "Tomlt.to_int: not an integer"
23162316+23172317+let to_int_opt = function
23182318+ | Int i -> Some i
23192319+ | _ -> None
23202320+23212321+let to_float = function
23222322+ | Float f -> f
23232323+ | _ -> invalid_arg "Tomlt.to_float: not a float"
23242324+23252325+let to_float_opt = function
23262326+ | Float f -> Some f
23272327+ | _ -> None
23282328+23292329+let to_bool = function
23302330+ | Bool b -> b
23312331+ | _ -> invalid_arg "Tomlt.to_bool: not a boolean"
23322332+23332333+let to_bool_opt = function
23342334+ | Bool b -> Some b
23352335+ | _ -> None
23362336+23372337+let to_array = function
23382338+ | Array vs -> vs
23392339+ | _ -> invalid_arg "Tomlt.to_array: not an array"
23402340+23412341+let to_array_opt = function
23422342+ | Array vs -> Some vs
23432343+ | _ -> None
23442344+23452345+let to_table = function
23462346+ | Table pairs -> pairs
23472347+ | _ -> invalid_arg "Tomlt.to_table: not a table"
23482348+23492349+let to_table_opt = function
23502350+ | Table pairs -> Some pairs
23512351+ | _ -> None
23522352+23532353+let to_datetime = function
23542354+ | Datetime s | Datetime_local s | Date_local s | Time_local s -> s
23552355+ | _ -> invalid_arg "Tomlt.to_datetime: not a datetime"
23562356+23572357+let to_datetime_opt = function
23582358+ | Datetime s | Datetime_local s | Date_local s | Time_local s -> Some s
23592359+ | _ -> None
23602360+23612361+(* ============================================
23622362+ Public Interface - Type Predicates
23632363+ ============================================ *)
23642364+23652365+let is_string = function String _ -> true | _ -> false
23662366+let is_int = function Int _ -> true | _ -> false
23672367+let is_float = function Float _ -> true | _ -> false
23682368+let is_bool = function Bool _ -> true | _ -> false
23692369+let is_array = function Array _ -> true | _ -> false
23702370+let is_table = function Table _ -> true | _ -> false
23712371+let is_datetime = function
23722372+ | Datetime _ | Datetime_local _ | Date_local _ | Time_local _ -> true
23732373+ | _ -> false
23742374+23752375+(* ============================================
23762376+ Public Interface - Table Navigation
23772377+ ============================================ *)
23782378+23792379+let find key = function
23802380+ | Table pairs -> List.assoc key pairs
23812381+ | _ -> invalid_arg "Tomlt.find: not a table"
23822382+23832383+let find_opt key = function
23842384+ | Table pairs -> List.assoc_opt key pairs
23852385+ | _ -> None
23862386+23872387+let mem key = function
23882388+ | Table pairs -> List.mem_assoc key pairs
23892389+ | _ -> false
23902390+23912391+let keys = function
23922392+ | Table pairs -> List.map fst pairs
23932393+ | _ -> invalid_arg "Tomlt.keys: not a table"
23942394+23952395+let rec get path t =
23962396+ match path with
23972397+ | [] -> t
23982398+ | key :: rest ->
23992399+ match t with
24002400+ | Table pairs ->
24012401+ (match List.assoc_opt key pairs with
24022402+ | Some v -> get rest v
24032403+ | None -> raise Not_found)
24042404+ | _ -> invalid_arg "Tomlt.get: intermediate value is not a table"
24052405+24062406+let get_opt path t =
24072407+ try Some (get path t) with Not_found | Invalid_argument _ -> None
24082408+24092409+let ( .%{} ) t path = get path t
24102410+24112411+let rec set_at_path path v t =
24122412+ match path with
24132413+ | [] -> v
24142414+ | [key] ->
24152415+ (match t with
24162416+ | Table pairs ->
24172417+ let pairs' = List.filter (fun (k, _) -> k <> key) pairs in
24182418+ Table ((key, v) :: pairs')
24192419+ | _ -> invalid_arg "Tomlt.(.%{}<-): not a table")
24202420+ | key :: rest ->
24212421+ match t with
24222422+ | Table pairs ->
24232423+ let existing = List.assoc_opt key pairs in
24242424+ let subtable = match existing with
24252425+ | Some (Table _ as sub) -> sub
24262426+ | Some _ -> invalid_arg "Tomlt.(.%{}<-): intermediate value is not a table"
24272427+ | None -> Table []
24282428+ in
24292429+ let updated = set_at_path rest v subtable in
24302430+ let pairs' = List.filter (fun (k, _) -> k <> key) pairs in
24312431+ Table ((key, updated) :: pairs')
24322432+ | _ -> invalid_arg "Tomlt.(.%{}<-): not a table"
24332433+24342434+let ( .%{}<- ) t path v = set_at_path path v t
24352435+24362436+(* ============================================
24372437+ Public Interface - Encoding
24382438+ ============================================ *)
24392439+24402440+let to_buffer buf value =
24412441+ let w = Bytes.Writer.of_buffer buf in
24422442+ encode_to_writer w value
24432443+24442444+let to_toml_string value =
22252445 let buf = Buffer.create 256 in
22262226- encode_toml_to_buffer buf value;
24462446+ to_buffer buf value;
22272447 Buffer.contents buf
2228244822292229-(* Streaming encoder that writes directly to a Bytes.Writer *)
22302230-let encode_to_writer w value =
22312231- let buf = Buffer.create 4096 in
22322232- encode_toml_to_buffer buf value;
22332233- Bytes.Writer.write_string w (Buffer.contents buf)
24492449+let to_writer = encode_to_writer
2234245022352235-(* Bytesrw interface *)
24512451+(* ============================================
24522452+ Public Interface - Decoding
24532453+ ============================================ *)
2236245422372237-let decode ?file:_ r =
22382238- let contents = Bytes.Reader.to_string r in
22392239- match decode_string contents with
22402240- | Ok toml -> Ok toml
22412241- | Error msg -> Error msg
24552455+let of_string input =
24562456+ try
24572457+ Ok (parse_toml input)
24582458+ with
24592459+ | Failure msg -> Error (Tomlt_error.make (Tomlt_error.Syntax (Tomlt_error.Expected msg)))
24602460+ | Tomlt_error.Error e -> Error e
24612461+ | e -> Error (Tomlt_error.make (Tomlt_error.Syntax (Tomlt_error.Expected (Printexc.to_string e))))
2242246222432243-let decode_to_tagged_json ?file:_ r =
22442244- let contents = Bytes.Reader.to_string r in
22452245- match decode_string contents with
22462246- | Ok toml -> Ok (toml_to_tagged_json toml)
22472247- | Error msg -> Error msg
22482248-22492249-let encode_from_tagged_json json_str =
24632463+let of_reader ?file r =
22502464 try
22512251- let toml = decode_tagged_json_string json_str in
22522252- Ok (encode_toml toml)
24652465+ Ok (parse_toml_from_reader ?file r)
22532466 with
22542254- | Failure msg -> Error msg
22552255- | e -> Error (Printexc.to_string e)
24672467+ | Failure msg -> Error (Tomlt_error.make (Tomlt_error.Syntax (Tomlt_error.Expected msg)))
24682468+ | Tomlt_error.Error e -> Error e
24692469+ | e -> Error (Tomlt_error.make (Tomlt_error.Syntax (Tomlt_error.Expected (Printexc.to_string e))))
24702470+24712471+let parse = parse_toml
24722472+24732473+let parse_reader ?file r = parse_toml_from_reader ?file r
24742474+24752475+(* ============================================
24762476+ Public Interface - Pretty Printing
24772477+ ============================================ *)
24782478+24792479+let rec pp_value fmt = function
24802480+ | String s ->
24812481+ Format.fprintf fmt "\"%s\"" (String.escaped s)
24822482+ | Int i ->
24832483+ Format.fprintf fmt "%Ld" i
24842484+ | Float f ->
24852485+ if Float.is_nan f then Format.fprintf fmt "nan"
24862486+ else if f = Float.infinity then Format.fprintf fmt "inf"
24872487+ else if f = Float.neg_infinity then Format.fprintf fmt "-inf"
24882488+ else Format.fprintf fmt "%g" f
24892489+ | Bool b ->
24902490+ Format.fprintf fmt "%s" (if b then "true" else "false")
24912491+ | Datetime s | Datetime_local s | Date_local s | Time_local s ->
24922492+ Format.fprintf fmt "%s" s
24932493+ | Array items ->
24942494+ Format.fprintf fmt "[";
24952495+ List.iteri (fun i item ->
24962496+ if i > 0 then Format.fprintf fmt ", ";
24972497+ pp_value fmt item
24982498+ ) items;
24992499+ Format.fprintf fmt "]"
25002500+ | Table pairs ->
25012501+ Format.fprintf fmt "{";
25022502+ List.iteri (fun i (k, v) ->
25032503+ if i > 0 then Format.fprintf fmt ", ";
25042504+ Format.fprintf fmt "%s = " k;
25052505+ pp_value fmt v
25062506+ ) pairs;
25072507+ Format.fprintf fmt "}"
25082508+25092509+let pp fmt t =
25102510+ Format.fprintf fmt "%s" (to_toml_string t)
25112511+25122512+(* ============================================
25132513+ Public Interface - Equality and Comparison
25142514+ ============================================ *)
25152515+25162516+let rec equal a b =
25172517+ match a, b with
25182518+ | String s1, String s2 -> String.equal s1 s2
25192519+ | Int i1, Int i2 -> Int64.equal i1 i2
25202520+ | Float f1, Float f2 ->
25212521+ (* NaN = NaN for TOML equality *)
25222522+ (Float.is_nan f1 && Float.is_nan f2) || Float.equal f1 f2
25232523+ | Bool b1, Bool b2 -> Bool.equal b1 b2
25242524+ | Datetime s1, Datetime s2 -> String.equal s1 s2
25252525+ | Datetime_local s1, Datetime_local s2 -> String.equal s1 s2
25262526+ | Date_local s1, Date_local s2 -> String.equal s1 s2
25272527+ | Time_local s1, Time_local s2 -> String.equal s1 s2
25282528+ | Array vs1, Array vs2 ->
25292529+ List.length vs1 = List.length vs2 &&
25302530+ List.for_all2 equal vs1 vs2
25312531+ | Table ps1, Table ps2 ->
25322532+ List.length ps1 = List.length ps2 &&
25332533+ List.for_all2 (fun (k1, v1) (k2, v2) ->
25342534+ String.equal k1 k2 && equal v1 v2
25352535+ ) ps1 ps2
25362536+ | _ -> false
25372537+25382538+let type_order = function
25392539+ | String _ -> 0
25402540+ | Int _ -> 1
25412541+ | Float _ -> 2
25422542+ | Bool _ -> 3
25432543+ | Datetime _ -> 4
25442544+ | Datetime_local _ -> 5
25452545+ | Date_local _ -> 6
25462546+ | Time_local _ -> 7
25472547+ | Array _ -> 8
25482548+ | Table _ -> 9
2256254922572257-(* Re-export the error module *)
25502550+let rec compare a b =
25512551+ let ta, tb = type_order a, type_order b in
25522552+ if ta <> tb then Int.compare ta tb
25532553+ else match a, b with
25542554+ | String s1, String s2 -> String.compare s1 s2
25552555+ | Int i1, Int i2 -> Int64.compare i1 i2
25562556+ | Float f1, Float f2 -> Float.compare f1 f2
25572557+ | Bool b1, Bool b2 -> Bool.compare b1 b2
25582558+ | Datetime s1, Datetime s2 -> String.compare s1 s2
25592559+ | Datetime_local s1, Datetime_local s2 -> String.compare s1 s2
25602560+ | Date_local s1, Date_local s2 -> String.compare s1 s2
25612561+ | Time_local s1, Time_local s2 -> String.compare s1 s2
25622562+ | Array vs1, Array vs2 ->
25632563+ List.compare compare vs1 vs2
25642564+ | Table ps1, Table ps2 ->
25652565+ List.compare (fun (k1, v1) (k2, v2) ->
25662566+ let c = String.compare k1 k2 in
25672567+ if c <> 0 then c else compare v1 v2
25682568+ ) ps1 ps2
25692569+ | _ -> 0 (* Impossible - handled by type_order check *)
25702570+25712571+(* ============================================
25722572+ Error Module
25732573+ ============================================ *)
25742574+22582575module Error = Tomlt_error
25762576+25772577+(* ============================================
25782578+ Internal Module (for testing)
25792579+ ============================================ *)
25802580+25812581+module Internal = struct
25822582+ let to_tagged_json = toml_to_tagged_json
25832583+ let of_tagged_json = decode_tagged_json_string
25842584+25852585+ let encode_from_tagged_json json_str =
25862586+ try
25872587+ let toml = decode_tagged_json_string json_str in
25882588+ Ok (to_toml_string toml)
25892589+ with
25902590+ | Failure msg -> Error msg
25912591+ | e -> Error (Printexc.to_string e)
25922592+end
+292-50
lib/tomlt.mli
···5566(** TOML 1.1 codec.
7788- This module provides TOML 1.1 parsing and encoding with Bytesrw streaming
99- support.
88+ Tomlt provides TOML 1.1 parsing and encoding with efficient streaming
99+ support via {{:https://erratique.ch/software/bytesrw}Bytesrw}.
1010+1111+ {2 Quick Start}
10121111- {b Example:}
1313+ Parse a TOML string:
1414+ {[
1515+ let config = Tomlt.of_string {|
1616+ [server]
1717+ host = "localhost"
1818+ port = 8080
1919+ |} in
2020+ match config with
2121+ | Ok t ->
2222+ let host = Tomlt.(t.%{"server"; "host"} |> to_string) in
2323+ let port = Tomlt.(t.%{"server"; "port"} |> to_int) in
2424+ Printf.printf "Server: %s:%Ld\n" host port
2525+ | Error e -> prerr_endline (Tomlt.Error.to_string e)
2626+ ]}
2727+2828+ Create and encode TOML:
1229 {[
1313- let contents = Bytesrw.Bytes.Reader.of_string toml_input in
1414- match Tomlt.decode contents with
1515- | Ok toml -> (* use toml *)
1616- | Error msg -> (* handle error *)
1717- ]} *)
3030+ let config = Tomlt.(table [
3131+ "title", string "My App";
3232+ "database", table [
3333+ "host", string "localhost";
3434+ "ports", array [int 5432L; int 5433L]
3535+ ]
3636+ ]) in
3737+ print_endline (Tomlt.to_string config)
3838+ ]}
3939+4040+ {2 Module Overview}
4141+4242+ - {!section:types} - TOML value representation
4343+ - {!section:construct} - Value constructors
4444+ - {!section:access} - Value accessors and type conversion
4545+ - {!section:navigate} - Table navigation
4646+ - {!section:decode} - Parsing from strings and readers
4747+ - {!section:encode} - Encoding to strings and writers
4848+ - {!module:Error} - Structured error types *)
18491950open Bytesrw
20512152(** {1:types TOML Value Types} *)
22532323-type toml_value =
2424- | Toml_string of string
2525- | Toml_int of int64
2626- | Toml_float of float
2727- | Toml_bool of bool
2828- | Toml_datetime of string (** Offset datetime (RFC 3339 with timezone) *)
2929- | Toml_datetime_local of string (** Local datetime (no timezone) *)
3030- | Toml_date_local of string (** Local date only *)
3131- | Toml_time_local of string (** Local time only *)
3232- | Toml_array of toml_value list
3333- | Toml_table of (string * toml_value) list
3434-(** The type for TOML values. *)
5454+(** The type of TOML values.
5555+5656+ TOML supports the following value types:
5757+ - Strings (UTF-8 encoded)
5858+ - Integers (64-bit signed)
5959+ - Floats (IEEE 754 double precision)
6060+ - Booleans
6161+ - Offset date-times (RFC 3339 with timezone)
6262+ - Local date-times (no timezone)
6363+ - Local dates
6464+ - Local times
6565+ - Arrays (heterogeneous in TOML 1.1)
6666+ - Tables (string-keyed maps) *)
6767+type t =
6868+ | String of string
6969+ | Int of int64
7070+ | Float of float
7171+ | Bool of bool
7272+ | Datetime of string (** Offset datetime, e.g. [1979-05-27T07:32:00Z] *)
7373+ | Datetime_local of string (** Local datetime, e.g. [1979-05-27T07:32:00] *)
7474+ | Date_local of string (** Local date, e.g. [1979-05-27] *)
7575+ | Time_local of string (** Local time, e.g. [07:32:00] *)
7676+ | Array of t list
7777+ | Table of (string * t) list
7878+(** A TOML value. Tables preserve key insertion order. *)
7979+8080+(** {1:construct Value Constructors}
8181+8282+ These functions create TOML values. Use them to build TOML documents
8383+ programmatically. *)
8484+8585+val string : string -> t
8686+(** [string s] creates a string value. *)
8787+8888+val int : int64 -> t
8989+(** [int i] creates an integer value. *)
9090+9191+val int_of_int : int -> t
9292+(** [int_of_int i] creates an integer value from an [int]. *)
9393+9494+val float : float -> t
9595+(** [float f] creates a float value. *)
9696+9797+val bool : bool -> t
9898+(** [bool b] creates a boolean value. *)
9999+100100+val array : t list -> t
101101+(** [array vs] creates an array value from a list of values.
102102+ TOML 1.1 allows heterogeneous arrays. *)
351033636-(** {1:decode Decode} *)
104104+val table : (string * t) list -> t
105105+(** [table pairs] creates a table value from key-value pairs.
106106+ Keys should be unique; later bindings shadow earlier ones during lookup. *)
371073838-val decode : ?file:string -> Bytes.Reader.t -> (toml_value, string) result
3939-(** [decode r] decodes a TOML document from reader [r].
4040- - [file] is the file path for error messages. Defaults to ["-"]. *)
108108+val datetime : string -> t
109109+(** [datetime s] creates an offset datetime value.
110110+ The string should be in RFC 3339 format with timezone,
111111+ e.g. ["1979-05-27T07:32:00Z"] or ["1979-05-27T07:32:00-07:00"]. *)
411124242-val decode_string : string -> (toml_value, string) result
4343-(** [decode_string s] decodes a TOML document from string [s]. *)
113113+val datetime_local : string -> t
114114+(** [datetime_local s] creates a local datetime value (no timezone).
115115+ E.g. ["1979-05-27T07:32:00"]. *)
441164545-val decode_to_tagged_json : ?file:string -> Bytes.Reader.t -> (string, string) result
4646-(** [decode_to_tagged_json r] decodes TOML and outputs tagged JSON
4747- in the format used by toml-test. *)
117117+val date_local : string -> t
118118+(** [date_local s] creates a local date value.
119119+ E.g. ["1979-05-27"]. *)
481204949-(** {1:encode Encode} *)
121121+val time_local : string -> t
122122+(** [time_local s] creates a local time value.
123123+ E.g. ["07:32:00"] or ["07:32:00.999"]. *)
501245151-val encode_toml : toml_value -> string
5252-(** [encode_toml v] encodes TOML value [v] to a TOML string. *)
125125+(** {1:access Value Accessors}
531265454-val encode_toml_to_buffer : Buffer.t -> toml_value -> unit
5555-(** [encode_toml_to_buffer buf v] encodes TOML value [v] directly to buffer [buf].
5656- This avoids allocating an intermediate string. *)
127127+ These functions extract OCaml values from TOML values.
128128+ They raise [Invalid_argument] if the value is not of the expected type. *)
571295858-val encode_to_writer : Bytes.Writer.t -> toml_value -> unit
5959-(** [encode_to_writer w v] encodes TOML value [v] directly to writer [w].
6060- Useful for streaming output to files or network without building the
6161- full string in memory first. *)
130130+val to_string : t -> string
131131+(** [to_string t] returns the string if [t] is a [String].
132132+ @raise Invalid_argument if [t] is not a string. *)
621336363-val encode_from_tagged_json : string -> (string, string) result
6464-(** [encode_from_tagged_json json] converts tagged JSON to TOML. *)
134134+val to_string_opt : t -> string option
135135+(** [to_string_opt t] returns [Some s] if [t] is [String s], [None] otherwise. *)
651366666-(** {1:helpers Helpers} *)
137137+val to_int : t -> int64
138138+(** [to_int t] returns the integer if [t] is an [Int].
139139+ @raise Invalid_argument if [t] is not an integer. *)
671406868-val toml_to_tagged_json : toml_value -> string
6969-(** [toml_to_tagged_json v] converts a TOML value to tagged JSON format
7070- used by toml-test. *)
141141+val to_int_opt : t -> int64 option
142142+(** [to_int_opt t] returns [Some i] if [t] is [Int i], [None] otherwise. *)
711437272-val decode_tagged_json_string : string -> toml_value
7373-(** [decode_tagged_json_string s] parses tagged JSON into TOML values. *)
144144+val to_float : t -> float
145145+(** [to_float t] returns the float if [t] is a [Float].
146146+ @raise Invalid_argument if [t] is not a float. *)
741477575-val parse_toml : string -> toml_value
7676-(** [parse_toml s] parses a TOML string. Raises [Error.Error] on failure. *)
148148+val to_float_opt : t -> float option
149149+(** [to_float_opt t] returns [Some f] if [t] is [Float f], [None] otherwise. *)
150150+151151+val to_bool : t -> bool
152152+(** [to_bool t] returns the boolean if [t] is a [Bool].
153153+ @raise Invalid_argument if [t] is not a boolean. *)
154154+155155+val to_bool_opt : t -> bool option
156156+(** [to_bool_opt t] returns [Some b] if [t] is [Bool b], [None] otherwise. *)
157157+158158+val to_array : t -> t list
159159+(** [to_array t] returns the list if [t] is an [Array].
160160+ @raise Invalid_argument if [t] is not an array. *)
161161+162162+val to_array_opt : t -> t list option
163163+(** [to_array_opt t] returns [Some vs] if [t] is [Array vs], [None] otherwise. *)
164164+165165+val to_table : t -> (string * t) list
166166+(** [to_table t] returns the association list if [t] is a [Table].
167167+ @raise Invalid_argument if [t] is not a table. *)
168168+169169+val to_table_opt : t -> (string * t) list option
170170+(** [to_table_opt t] returns [Some pairs] if [t] is [Table pairs], [None] otherwise. *)
171171+172172+val to_datetime : t -> string
173173+(** [to_datetime t] returns the datetime string for any datetime type.
174174+ @raise Invalid_argument if [t] is not a datetime variant. *)
175175+176176+val to_datetime_opt : t -> string option
177177+(** [to_datetime_opt t] returns [Some s] if [t] is any datetime variant. *)
178178+179179+(** {2 Type Predicates} *)
180180+181181+val is_string : t -> bool
182182+(** [is_string t] is [true] iff [t] is a [String]. *)
183183+184184+val is_int : t -> bool
185185+(** [is_int t] is [true] iff [t] is an [Int]. *)
186186+187187+val is_float : t -> bool
188188+(** [is_float t] is [true] iff [t] is a [Float]. *)
189189+190190+val is_bool : t -> bool
191191+(** [is_bool t] is [true] iff [t] is a [Bool]. *)
192192+193193+val is_array : t -> bool
194194+(** [is_array t] is [true] iff [t] is an [Array]. *)
195195+196196+val is_table : t -> bool
197197+(** [is_table t] is [true] iff [t] is a [Table]. *)
198198+199199+val is_datetime : t -> bool
200200+(** [is_datetime t] is [true] iff [t] is any datetime variant. *)
201201+202202+(** {1:navigate Table Navigation}
203203+204204+ Functions for navigating and querying TOML tables. *)
205205+206206+val find : string -> t -> t
207207+(** [find key t] returns the value associated with [key] in table [t].
208208+ @raise Invalid_argument if [t] is not a table.
209209+ @raise Not_found if [key] is not in the table. *)
210210+211211+val find_opt : string -> t -> t option
212212+(** [find_opt key t] returns [Some v] if [key] maps to [v] in table [t],
213213+ or [None] if [key] is not bound or [t] is not a table. *)
214214+215215+val mem : string -> t -> bool
216216+(** [mem key t] is [true] if [key] is bound in table [t], [false] otherwise.
217217+ Returns [false] if [t] is not a table. *)
218218+219219+val keys : t -> string list
220220+(** [keys t] returns all keys in table [t].
221221+ @raise Invalid_argument if [t] is not a table. *)
222222+223223+val get : string list -> t -> t
224224+(** [get path t] navigates through nested tables following [path].
225225+ For example, [get ["server"; "port"] t] returns [t.server.port].
226226+ @raise Invalid_argument if any intermediate value is not a table.
227227+ @raise Not_found if any key in [path] is not found. *)
228228+229229+val get_opt : string list -> t -> t option
230230+(** [get_opt path t] is like [get] but returns [None] on any error. *)
231231+232232+val ( .%{} ) : t -> string list -> t
233233+(** [t.%{path}] is [get path t].
234234+235235+ Example: [config.%{["database"; "port"]}]
236236+237237+ @raise Invalid_argument if any intermediate value is not a table.
238238+ @raise Not_found if any key in the path is not found. *)
239239+240240+val ( .%{}<- ) : t -> string list -> t -> t
241241+(** [t.%{path} <- v] returns a new table with value [v] at [path].
242242+ Creates intermediate tables as needed.
243243+244244+ Example: [config.%{["server"; "host"]} <- string "localhost"]
245245+246246+ @raise Invalid_argument if [t] is not a table or if an intermediate
247247+ value exists but is not a table. *)
248248+249249+(** {1:decode Decoding (Parsing)}
250250+251251+ Parse TOML from various sources. *)
252252+253253+val of_string : string -> (t, Tomlt_error.t) result
254254+(** [of_string s] parses [s] as a TOML document. *)
255255+256256+val of_reader : ?file:string -> Bytes.Reader.t -> (t, Tomlt_error.t) result
257257+(** [of_reader r] parses a TOML document from reader [r].
258258+ @param file Optional filename for error messages. *)
259259+260260+val parse : string -> t
261261+(** [parse s] parses [s] as a TOML document.
262262+ @raise Error.Error on parse errors. *)
263263+264264+val parse_reader : ?file:string -> Bytes.Reader.t -> t
265265+(** [parse_reader r] parses a TOML document from reader [r].
266266+ @param file Optional filename for error messages.
267267+ @raise Error.Error on parse errors. *)
268268+269269+(** {1:encode Encoding}
270270+271271+ Encode TOML values to various outputs. *)
272272+273273+val to_toml_string : t -> string
274274+(** [to_toml_string t] encodes [t] as a TOML document string.
275275+ @raise Invalid_argument if [t] is not a [Table]. *)
276276+277277+val to_buffer : Buffer.t -> t -> unit
278278+(** [to_buffer buf t] writes [t] as TOML to buffer [buf].
279279+ @raise Invalid_argument if [t] is not a [Table]. *)
280280+281281+val to_writer : Bytes.Writer.t -> t -> unit
282282+(** [to_writer w t] writes [t] as TOML to writer [w].
283283+ Useful for streaming output without building the full string in memory.
284284+ @raise Invalid_argument if [t] is not a [Table]. *)
285285+286286+(** {1:pp Pretty Printing} *)
287287+288288+val pp : Format.formatter -> t -> unit
289289+(** [pp fmt t] pretty-prints [t] in TOML format. *)
290290+291291+val pp_value : Format.formatter -> t -> unit
292292+(** [pp_value fmt t] pretty-prints a single TOML value (not a full document).
293293+ Useful for debugging. Tables are printed as inline tables. *)
294294+295295+val equal : t -> t -> bool
296296+(** [equal a b] is structural equality on TOML values.
297297+ NaN floats are considered equal to each other. *)
298298+299299+val compare : t -> t -> int
300300+(** [compare a b] is a total ordering on TOML values. *)
7730178302(** {1:errors Error Handling} *)
7930380304module Error = Tomlt_error
8181-(** Error types for TOML parsing and encoding. *)
305305+(** Structured error types for TOML parsing and encoding.
306306+307307+ See {!Tomlt_error} for detailed documentation. *)
308308+309309+(** {1:internal Internal}
310310+311311+ These functions are primarily for testing and interoperability.
312312+ They may change between versions. *)
313313+314314+module Internal : sig
315315+ val to_tagged_json : t -> string
316316+ (** Convert TOML value to tagged JSON format used by toml-test. *)
317317+318318+ val of_tagged_json : string -> t
319319+ (** Parse tagged JSON format into TOML value. *)
320320+321321+ val encode_from_tagged_json : string -> (string, string) result
322322+ (** Convert tagged JSON to TOML string. For toml-test encoder. *)
323323+end
+5-18
lib_eio/tomlt_eio.ml
···33 SPDX-License-Identifier: ISC
44 ---------------------------------------------------------------------------*)
5566-(** Eio integration for TOML errors.
77-88- This module registers TOML errors with Eio's exception system,
99- allowing them to be used with [Eio.Io] and providing context tracking. *)
1010-116module Error = Tomlt.Error
1271313-(** Extend Eio.Exn.err with TOML errors *)
148type Eio.Exn.err += E of Error.t
1591616-(** Create an Eio.Io exception from a TOML error *)
1710let err e = Eio.Exn.create (E e)
18111919-(** Register pretty-printer with Eio *)
2012let () =
2113 Eio.Exn.register_pp (fun f -> function
2214 | E e ->
···2517 | _ -> false
2618 )
27192828-(** Convert a Error.Error exception to Eio.Io *)
2920let wrap_error f =
3021 try f ()
3122 with Error.Error e ->
3223 raise (err e)
33243434-(** Parse TOML with Eio error handling *)
3535-let parse_toml ?file input =
3636- try Tomlt.parse_toml input
2525+let parse ?file input =
2626+ try Tomlt.parse input
3727 with Error.Error e ->
3828 let bt = Printexc.get_raw_backtrace () in
3929 let eio_exn = err e in
···4333 in
4434 Printexc.raise_with_backtrace eio_exn bt
45354646-(** Read and parse TOML from an Eio flow *)
4736let of_flow ?file flow =
4837 let input = Eio.Flow.read_all flow in
4949- parse_toml ?file input
3838+ parse ?file input
50395151-(** Read and parse TOML from an Eio path *)
5240let of_path ~fs path =
5341 let file = Eio.Path.(/) fs path |> Eio.Path.native_exn in
5442 Eio.Path.load (Eio.Path.(/) fs path)
5555- |> parse_toml ~file
4343+ |> parse ~file
56445757-(** Write TOML to an Eio flow *)
5845let to_flow flow value =
5959- let output = Tomlt.encode_toml value in
4646+ let output = Tomlt.to_toml_string value in
6047 Eio.Flow.copy_string output flow
+20-16
lib_eio/tomlt_eio.mli
···33 SPDX-License-Identifier: ISC
44 ---------------------------------------------------------------------------*)
5566-(** Eio integration for TOML errors.
66+(** Eio integration for TOML.
7788- This module registers TOML errors with Eio's exception system,
99- allowing them to be used with {!Eio.Io} and providing context tracking.
88+ This module provides Eio-native functions for parsing and encoding TOML,
99+ with proper integration into Eio's exception system.
10101111 {2 Example}
1212 {[
···18181919(** {1 Eio Exception Integration} *)
20202121-(** TOML errors as Eio errors *)
2221type Eio.Exn.err += E of Tomlt.Error.t
2222+(** TOML errors as Eio errors. *)
23232424-(** Create an [Eio.Io] exception from a TOML error *)
2524val err : Tomlt.Error.t -> exn
2525+(** [err e] creates an [Eio.Io] exception from TOML error [e]. *)
26262727-(** Wrap a function, converting [Tomlt_error.Error] to [Eio.Io] *)
2827val wrap_error : (unit -> 'a) -> 'a
2828+(** [wrap_error f] runs [f] and converts [Tomlt.Error.Error] to [Eio.Io]. *)
29293030(** {1 Parsing with Eio} *)
31313232-(** Parse TOML string with Eio error handling.
3333- @param file optional filename for error context *)
3434-val parse_toml : ?file:string -> string -> Tomlt.toml_value
3232+val parse : ?file:string -> string -> Tomlt.t
3333+(** [parse s] parses TOML string [s] with Eio error handling.
3434+ @param file optional filename for error context.
3535+ @raise Eio.Io on parse errors. *)
35363636-(** Read and parse TOML from an Eio flow.
3737- @param file optional filename for error context *)
3838-val of_flow : ?file:string -> _ Eio.Flow.source -> Tomlt.toml_value
3737+val of_flow : ?file:string -> _ Eio.Flow.source -> Tomlt.t
3838+(** [of_flow flow] reads and parses TOML from an Eio flow.
3939+ @param file optional filename for error context.
4040+ @raise Eio.Io on read or parse errors. *)
39414040-(** Read and parse TOML from an Eio path *)
4141-val of_path : fs:_ Eio.Path.t -> string -> Tomlt.toml_value
4242+val of_path : fs:_ Eio.Path.t -> string -> Tomlt.t
4343+(** [of_path ~fs path] reads and parses TOML from a file path.
4444+ @raise Eio.Io on file or parse errors. *)
42454346(** {1 Encoding with Eio} *)
44474545-(** Write TOML to an Eio flow *)
4646-val to_flow : _ Eio.Flow.sink -> Tomlt.toml_value -> unit
4848+val to_flow : _ Eio.Flow.sink -> Tomlt.t -> unit
4949+(** [to_flow flow t] writes TOML value [t] to an Eio flow.
5050+ @raise Invalid_argument if [t] is not a table. *)