My aggregated monorepo of OCaml code, automaintained
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** INI parser and encoder using bytesrw.
7
8 This module provides functions to parse and encode INI files using the
9 {{:https://erratique.ch/software/bytesrw}bytesrw} streaming I/O library.
10 It implements {{:https://docs.python.org/3/library/configparser.html}Python's
11 configparser} semantics for maximum compatibility.
12
13 {1:basic_usage Basic Usage}
14
15 {@ocaml[
16 (* Define your configuration type and codec *)
17 let config_codec = Init.Document.(
18 obj (fun server -> server)
19 |> section "server" server_codec ~enc:Fun.id
20 |> finish
21 )
22
23 (* Decode from a string *)
24 match Init_bytesrw.decode_string config_codec ini_text with
25 | Ok config -> (* use config *)
26 | Error msg -> (* handle error *)
27
28 (* Encode back to a string *)
29 match Init_bytesrw.encode_string config_codec config with
30 | Ok text -> (* write text *)
31 | Error msg -> (* handle error *)
32 ]}
33
34 {1:python_compat Python Compatibility}
35
36 This parser implements the same semantics as Python's [configparser] module.
37 Configuration files that work with Python will work here, and vice versa.
38
39 {2:syntax Supported Syntax}
40
41 {@ini[
42 # Comments start with # or ;
43 ; This is also a comment
44
45 [section]
46 key = value
47 key2 : value2 ; Both = and : are delimiters
48 key3=no spaces needed
49
50 [multiline]
51 long_value = This is a long value
52 that continues on indented lines
53 for as long as needed
54
55 [types]
56 integer = 42
57 float = 3.14
58 boolean = yes ; Also: true, on, 1, no, false, off, 0
59 list = a, b, c, d
60 ]}
61
62 {2:edge_cases Edge Cases and Gotchas}
63
64 {ul
65 {- {b Section names are case-sensitive}: [[Server]] and [[server]] are
66 different.}
67 {- {b Option names are case-insensitive}: [Port] and [port] are the same.}
68 {- {b Whitespace is trimmed} from keys and values automatically.}
69 {- {b Empty values are allowed}: [key =] gives an empty string.}
70 {- {b Comments are NOT preserved} during round-trips (matching Python).}
71 {- {b Inline comments are disabled by default}: [key = value ; comment]
72 gives the value ["value ; comment"] unless you configure
73 {!field-inline_comment_prefixes}.}} *)
74
75open Bytesrw
76
77(** {1:config Parser Configuration}
78
79 Configure the parser to match different INI dialects. The default
80 configuration matches Python's [ConfigParser]. *)
81
82type interpolation =
83 [ `No_interpolation
84 (** No variable substitution. Values like ["%(foo)s"] are returned
85 literally. Equivalent to Python's [RawConfigParser].
86
87 Use this for configuration files that contain literal [%] or [$]
88 characters that shouldn't be interpreted. *)
89 | `Basic_interpolation
90 (** Basic variable substitution using [%(name)s] syntax (default).
91 Equivalent to Python's [ConfigParser] default.
92
93 Variables reference options in the current section or the DEFAULT
94 section:
95 {@ini[
96 [paths]
97 base = /opt/app
98 data = %(base)s/data ; Becomes "/opt/app/data"
99 ]}
100
101 {b Escaping:} Use [%%] to get a literal [%]. *)
102 | `Extended_interpolation
103 (** Extended substitution using [$\{section:name\}] syntax.
104 Equivalent to Python's [ExtendedInterpolation].
105
106 Variables can reference options in any section:
107 {@ini[
108 [common]
109 base = /opt/app
110
111 [server]
112 data = ${common:base}/data ; Cross-section reference
113 logs = ${base}/logs ; Same section or DEFAULT
114 ]}
115
116 {b Escaping:} Use [$$] to get a literal [$]. *)
117 ]
118(** The type for interpolation modes. Controls how variable references
119 in values are expanded.
120
121 {b Recursion limit:} Interpolation follows references up to 10 levels
122 deep to prevent infinite loops. Deeper nesting raises an error.
123
124 {b Missing references:} If a referenced option doesn't exist, decoding
125 fails with {!Init.Error.Interpolation}. *)
126
127type config = {
128 delimiters : string list;
129 (** Characters that separate option names from values.
130 Default: [["="; ":"]].
131
132 The {e first} delimiter on a line is used, so values can contain
133 delimiter characters:
134 {@ini[
135 url = https://example.com:8080 ; Colon in value is fine
136 ]} *)
137
138 comment_prefixes : string list;
139 (** Prefixes that start full-line comments. Default: [["#"; ";"]].
140
141 A line starting with any of these (after optional whitespace) is
142 treated as a comment and ignored. *)
143
144 inline_comment_prefixes : string list;
145 (** Prefixes that start inline comments. Default: [[]] (disabled).
146
147 {b Warning:} Enabling inline comments (e.g., [[";"]]) prevents using
148 those characters in values. For example:
149 {@ini[
150 url = https://example.com;port=8080 ; Would be truncated!
151 ]}
152
153 A space must precede inline comments: [value;comment] keeps the
154 semicolon, but [value ; comment] removes it. *)
155
156 default_section : string;
157 (** Name of the default section. Default: ["DEFAULT"].
158
159 Options in this section are inherited by all other sections and
160 available for interpolation. You can customize this, e.g., to
161 ["general"] or ["common"]. *)
162
163 interpolation : interpolation;
164 (** How to handle variable references. Default: [`Basic_interpolation].
165
166 See {!type-interpolation} for details on each mode. *)
167
168 allow_no_value : bool;
169 (** Allow options without values. Default: [false].
170
171 When [true], options can appear without a delimiter:
172 {@ini[
173 [mysqld]
174 skip-innodb ; No = sign, value is None
175 port = 3306
176 ]}
177
178 Such options decode as [None] when using {!Init.option}. *)
179
180 strict : bool;
181 (** Reject duplicate sections and options. Default: [true].
182
183 When [true], if the same section or option appears twice, decoding
184 fails with {!Init.Error.Duplicate_section} or
185 {!Init.Error.Duplicate_option}.
186
187 When [false], later values silently override earlier ones. *)
188
189 empty_lines_in_values : bool;
190 (** Allow empty lines in multiline values. Default: [true].
191
192 When [true], empty lines can be part of multiline values:
193 {@ini[
194 [section]
195 key = line 1
196
197 line 3 ; Empty line 2 is preserved
198 ]}
199
200 When [false], empty lines terminate the multiline value. *)
201}
202(** Parser configuration. Adjust these settings to parse different INI
203 dialects or to match specific Python configparser settings. *)
204
205val default_config : config
206(** Default configuration matching Python's [configparser.ConfigParser]:
207
208 {ul
209 {- [delimiters = ["="; ":"]]}
210 {- [comment_prefixes = ["#"; ";"]]}
211 {- [inline_comment_prefixes = []] (disabled)}
212 {- [default_section = "DEFAULT"]}
213 {- [interpolation = `Basic_interpolation]}
214 {- [allow_no_value = false]}
215 {- [strict = true]}
216 {- [empty_lines_in_values = true]}} *)
217
218val raw_config : config
219(** Configuration matching Python's [configparser.RawConfigParser]:
220 same as {!default_config} but with [interpolation = `No_interpolation].
221
222 Use this when your values contain literal [%] or [$] characters. *)
223
224
225(** {1:decode Decoding}
226
227 Parse INI data into OCaml values. All decode functions return
228 [Result.t] - they never raise exceptions for parse errors. *)
229
230val decode :
231 ?config:config -> ?locs:bool -> ?layout:bool -> ?file:Init.Textloc.fpath ->
232 'a Init.t -> Bytes.Reader.t -> ('a, string) result
233(** [decode codec r] decodes INI data from reader [r] using [codec].
234
235 {ul
236 {- [config] configures the parser. Default: {!default_config}.}
237 {- [locs] if [true], preserves source locations in metadata.
238 Default: [false].}
239 {- [layout] if [true], preserves whitespace in metadata for
240 layout-preserving round-trips. Default: [false].}
241 {- [file] is the file path for error messages. Default: ["-"].}}
242
243 Returns [Ok value] on success or [Error message] on failure, where
244 [message] includes location information when available. *)
245
246val decode' :
247 ?config:config -> ?locs:bool -> ?layout:bool -> ?file:Init.Textloc.fpath ->
248 'a Init.t -> Bytes.Reader.t -> ('a, Init.Error.t) result
249(** [decode'] is like {!val-decode} but returns a structured error
250 with separate {!Init.Error.type-kind}, location, and path information.
251
252 Use this when you need to programmatically handle different error
253 types or extract location information. *)
254
255val decode_string :
256 ?config:config -> ?locs:bool -> ?layout:bool -> ?file:Init.Textloc.fpath ->
257 'a Init.t -> string -> ('a, string) result
258(** [decode_string codec s] decodes INI data from string [s].
259
260 This is the most common entry point for parsing:
261 {@ocaml[
262 let ini_text = {|
263 [server]
264 host = localhost
265 port = 8080
266 |} in
267 Init_bytesrw.decode_string config_codec ini_text
268 ]} *)
269
270val decode_string' :
271 ?config:config -> ?locs:bool -> ?layout:bool -> ?file:Init.Textloc.fpath ->
272 'a Init.t -> string -> ('a, Init.Error.t) result
273(** [decode_string'] is like {!val-decode_string} with structured errors. *)
274
275
276(** {1:encode Encoding}
277
278 Serialize OCaml values to INI format. *)
279
280val encode :
281 ?buf:Bytes.t -> 'a Init.t -> 'a -> eod:bool -> Bytes.Writer.t ->
282 (unit, string) result
283(** [encode codec v ~eod w] encodes [v] to writer [w] using [codec].
284
285 {ul
286 {- [buf] is an optional scratch buffer for writing.}
287 {- [eod] if [true], signals end-of-data after writing.}}
288
289 The output format follows standard INI conventions:
290 - Sections are written as [[section_name]]
291 - Options are written as [key = value]
292 - Multiline values are continued with indentation *)
293
294val encode' :
295 ?buf:Bytes.t -> 'a Init.t -> 'a -> eod:bool -> Bytes.Writer.t ->
296 (unit, Init.Error.t) result
297(** [encode'] is like {!val-encode} with structured errors. *)
298
299val encode_string :
300 ?buf:Bytes.t -> 'a Init.t -> 'a -> (string, string) result
301(** [encode_string codec v] encodes [v] to a string.
302
303 {@ocaml[
304 let config = { server = { host = "localhost"; port = 8080 } } in
305 match Init_bytesrw.encode_string config_codec config with
306 | Ok text -> print_endline text
307 | Error msg -> failwith msg
308 ]}
309
310 Produces:
311 {@ini[
312 [server]
313 host = localhost
314 port = 8080
315 ]} *)
316
317val encode_string' :
318 ?buf:Bytes.t -> 'a Init.t -> 'a -> (string, Init.Error.t) result
319(** [encode_string'] is like {!val-encode_string} with structured errors. *)
320
321
322(** {1:layout Layout Preservation}
323
324 When decoding with [~layout:true], whitespace and comment positions
325 are preserved in the {!Init.Meta.t} values attached to each element.
326 When re-encoding, this information is used to reproduce the original
327 formatting as closely as possible.
328
329 {b Limitations:}
330 {ul
331 {- Comments are NOT preserved (matching Python's behavior).}
332 {- Whitespace within values may be normalized.}
333 {- The output may differ slightly from the input in edge cases.}}
334
335 {b Performance tip:} For maximum performance when you don't need
336 layout preservation, use [~layout:false ~locs:false] (the default).
337 Enabling [~locs:true] improves error messages at a small cost. *)
338
339
340(** {1:examples Examples}
341
342 {2:simple Simple Configuration}
343
344 {@ocaml[
345 type config = { debug : bool; port : int }
346
347 let codec = Init.Document.(
348 let section = Init.Section.(
349 obj (fun debug port -> { debug; port })
350 |> mem "debug" Init.bool ~dec_absent:false ~enc:(fun c -> c.debug)
351 |> mem "port" Init.int ~dec_absent:8080 ~enc:(fun c -> c.port)
352 |> finish
353 ) in
354 obj Fun.id
355 |> section "server" section ~enc:Fun.id
356 |> finish
357 )
358
359 let config = Init_bytesrw.decode_string codec "[server]\nport = 9000"
360 (* Ok { debug = false; port = 9000 } *)
361 ]}
362
363 {2:multi_section Multiple Sections}
364
365 {@ocaml[
366 type db = { host : string; port : int }
367 type cache = { enabled : bool; ttl : int }
368 type config = { db : db; cache : cache option }
369
370 let db_codec = Init.Section.(
371 obj (fun host port -> { host; port })
372 |> mem "host" Init.string ~enc:(fun d -> d.host)
373 |> mem "port" Init.int ~dec_absent:5432 ~enc:(fun d -> d.port)
374 |> finish
375 )
376
377 let cache_codec = Init.Section.(
378 obj (fun enabled ttl -> { enabled; ttl })
379 |> mem "enabled" Init.bool ~enc:(fun c -> c.enabled)
380 |> mem "ttl" Init.int ~dec_absent:3600 ~enc:(fun c -> c.ttl)
381 |> finish
382 )
383
384 let config_codec = Init.Document.(
385 obj (fun db cache -> { db; cache })
386 |> section "database" db_codec ~enc:(fun c -> c.db)
387 |> opt_section "cache" cache_codec ~enc:(fun c -> c.cache)
388 |> finish
389 )
390 ]}
391
392 {2:interpolation_example Interpolation}
393
394 {@ocaml[
395 let paths_codec = Init.Section.(
396 obj (fun base data logs -> (base, data, logs))
397 |> mem "base" Init.string ~enc:(fun (b,_,_) -> b)
398 |> mem "data" Init.string ~enc:(fun (_,d,_) -> d)
399 |> mem "logs" Init.string ~enc:(fun (_,_,l) -> l)
400 |> finish
401 )
402
403 let doc_codec = Init.Document.(
404 obj Fun.id
405 |> section "paths" paths_codec ~enc:Fun.id
406 |> finish
407 )
408
409 (* Basic interpolation expands %(base)s *)
410 let ini = {|
411 [paths]
412 base = /opt/app
413 data = %(base)s/data
414 logs = %(base)s/logs
415 |}
416
417 match Init_bytesrw.decode_string doc_codec ini with
418 | Ok (_, data, logs) ->
419 assert (data = "/opt/app/data");
420 assert (logs = "/opt/app/logs")
421 | Error _ -> assert false
422 ]}
423
424 {2:raw_parser Disabling Interpolation}
425
426 {@ocaml[
427 (* Use raw_config for files with literal % characters *)
428 let config = Init_bytesrw.raw_config
429
430 let result = Init_bytesrw.decode_string ~config codec {|
431 [display]
432 format = 100%% complete ; Would fail with basic interpolation
433 |}
434 ]} *)