My aggregated monorepo of OCaml code, automaintained
at main 434 lines 14 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** INI parser and encoder using bytesrw. 7 8 This module provides functions to parse and encode INI files using the 9 {{:https://erratique.ch/software/bytesrw}bytesrw} streaming I/O library. 10 It implements {{:https://docs.python.org/3/library/configparser.html}Python's 11 configparser} semantics for maximum compatibility. 12 13 {1:basic_usage Basic Usage} 14 15 {@ocaml[ 16 (* Define your configuration type and codec *) 17 let config_codec = Init.Document.( 18 obj (fun server -> server) 19 |> section "server" server_codec ~enc:Fun.id 20 |> finish 21 ) 22 23 (* Decode from a string *) 24 match Init_bytesrw.decode_string config_codec ini_text with 25 | Ok config -> (* use config *) 26 | Error msg -> (* handle error *) 27 28 (* Encode back to a string *) 29 match Init_bytesrw.encode_string config_codec config with 30 | Ok text -> (* write text *) 31 | Error msg -> (* handle error *) 32 ]} 33 34 {1:python_compat Python Compatibility} 35 36 This parser implements the same semantics as Python's [configparser] module. 37 Configuration files that work with Python will work here, and vice versa. 38 39 {2:syntax Supported Syntax} 40 41 {@ini[ 42 # Comments start with # or ; 43 ; This is also a comment 44 45 [section] 46 key = value 47 key2 : value2 ; Both = and : are delimiters 48 key3=no spaces needed 49 50 [multiline] 51 long_value = This is a long value 52 that continues on indented lines 53 for as long as needed 54 55 [types] 56 integer = 42 57 float = 3.14 58 boolean = yes ; Also: true, on, 1, no, false, off, 0 59 list = a, b, c, d 60 ]} 61 62 {2:edge_cases Edge Cases and Gotchas} 63 64 {ul 65 {- {b Section names are case-sensitive}: [[Server]] and [[server]] are 66 different.} 67 {- {b Option names are case-insensitive}: [Port] and [port] are the same.} 68 {- {b Whitespace is trimmed} from keys and values automatically.} 69 {- {b Empty values are allowed}: [key =] gives an empty string.} 70 {- {b Comments are NOT preserved} during round-trips (matching Python).} 71 {- {b Inline comments are disabled by default}: [key = value ; comment] 72 gives the value ["value ; comment"] unless you configure 73 {!field-inline_comment_prefixes}.}} *) 74 75open Bytesrw 76 77(** {1:config Parser Configuration} 78 79 Configure the parser to match different INI dialects. The default 80 configuration matches Python's [ConfigParser]. *) 81 82type interpolation = 83 [ `No_interpolation 84 (** No variable substitution. Values like ["%(foo)s"] are returned 85 literally. Equivalent to Python's [RawConfigParser]. 86 87 Use this for configuration files that contain literal [%] or [$] 88 characters that shouldn't be interpreted. *) 89 | `Basic_interpolation 90 (** Basic variable substitution using [%(name)s] syntax (default). 91 Equivalent to Python's [ConfigParser] default. 92 93 Variables reference options in the current section or the DEFAULT 94 section: 95 {@ini[ 96 [paths] 97 base = /opt/app 98 data = %(base)s/data ; Becomes "/opt/app/data" 99 ]} 100 101 {b Escaping:} Use [%%] to get a literal [%]. *) 102 | `Extended_interpolation 103 (** Extended substitution using [$\{section:name\}] syntax. 104 Equivalent to Python's [ExtendedInterpolation]. 105 106 Variables can reference options in any section: 107 {@ini[ 108 [common] 109 base = /opt/app 110 111 [server] 112 data = ${common:base}/data ; Cross-section reference 113 logs = ${base}/logs ; Same section or DEFAULT 114 ]} 115 116 {b Escaping:} Use [$$] to get a literal [$]. *) 117 ] 118(** The type for interpolation modes. Controls how variable references 119 in values are expanded. 120 121 {b Recursion limit:} Interpolation follows references up to 10 levels 122 deep to prevent infinite loops. Deeper nesting raises an error. 123 124 {b Missing references:} If a referenced option doesn't exist, decoding 125 fails with {!Init.Error.Interpolation}. *) 126 127type config = { 128 delimiters : string list; 129 (** Characters that separate option names from values. 130 Default: [["="; ":"]]. 131 132 The {e first} delimiter on a line is used, so values can contain 133 delimiter characters: 134 {@ini[ 135 url = https://example.com:8080 ; Colon in value is fine 136 ]} *) 137 138 comment_prefixes : string list; 139 (** Prefixes that start full-line comments. Default: [["#"; ";"]]. 140 141 A line starting with any of these (after optional whitespace) is 142 treated as a comment and ignored. *) 143 144 inline_comment_prefixes : string list; 145 (** Prefixes that start inline comments. Default: [[]] (disabled). 146 147 {b Warning:} Enabling inline comments (e.g., [[";"]]) prevents using 148 those characters in values. For example: 149 {@ini[ 150 url = https://example.com;port=8080 ; Would be truncated! 151 ]} 152 153 A space must precede inline comments: [value;comment] keeps the 154 semicolon, but [value ; comment] removes it. *) 155 156 default_section : string; 157 (** Name of the default section. Default: ["DEFAULT"]. 158 159 Options in this section are inherited by all other sections and 160 available for interpolation. You can customize this, e.g., to 161 ["general"] or ["common"]. *) 162 163 interpolation : interpolation; 164 (** How to handle variable references. Default: [`Basic_interpolation]. 165 166 See {!type-interpolation} for details on each mode. *) 167 168 allow_no_value : bool; 169 (** Allow options without values. Default: [false]. 170 171 When [true], options can appear without a delimiter: 172 {@ini[ 173 [mysqld] 174 skip-innodb ; No = sign, value is None 175 port = 3306 176 ]} 177 178 Such options decode as [None] when using {!Init.option}. *) 179 180 strict : bool; 181 (** Reject duplicate sections and options. Default: [true]. 182 183 When [true], if the same section or option appears twice, decoding 184 fails with {!Init.Error.Duplicate_section} or 185 {!Init.Error.Duplicate_option}. 186 187 When [false], later values silently override earlier ones. *) 188 189 empty_lines_in_values : bool; 190 (** Allow empty lines in multiline values. Default: [true]. 191 192 When [true], empty lines can be part of multiline values: 193 {@ini[ 194 [section] 195 key = line 1 196 197 line 3 ; Empty line 2 is preserved 198 ]} 199 200 When [false], empty lines terminate the multiline value. *) 201} 202(** Parser configuration. Adjust these settings to parse different INI 203 dialects or to match specific Python configparser settings. *) 204 205val default_config : config 206(** Default configuration matching Python's [configparser.ConfigParser]: 207 208 {ul 209 {- [delimiters = ["="; ":"]]} 210 {- [comment_prefixes = ["#"; ";"]]} 211 {- [inline_comment_prefixes = []] (disabled)} 212 {- [default_section = "DEFAULT"]} 213 {- [interpolation = `Basic_interpolation]} 214 {- [allow_no_value = false]} 215 {- [strict = true]} 216 {- [empty_lines_in_values = true]}} *) 217 218val raw_config : config 219(** Configuration matching Python's [configparser.RawConfigParser]: 220 same as {!default_config} but with [interpolation = `No_interpolation]. 221 222 Use this when your values contain literal [%] or [$] characters. *) 223 224 225(** {1:decode Decoding} 226 227 Parse INI data into OCaml values. All decode functions return 228 [Result.t] - they never raise exceptions for parse errors. *) 229 230val decode : 231 ?config:config -> ?locs:bool -> ?layout:bool -> ?file:Init.Textloc.fpath -> 232 'a Init.t -> Bytes.Reader.t -> ('a, string) result 233(** [decode codec r] decodes INI data from reader [r] using [codec]. 234 235 {ul 236 {- [config] configures the parser. Default: {!default_config}.} 237 {- [locs] if [true], preserves source locations in metadata. 238 Default: [false].} 239 {- [layout] if [true], preserves whitespace in metadata for 240 layout-preserving round-trips. Default: [false].} 241 {- [file] is the file path for error messages. Default: ["-"].}} 242 243 Returns [Ok value] on success or [Error message] on failure, where 244 [message] includes location information when available. *) 245 246val decode' : 247 ?config:config -> ?locs:bool -> ?layout:bool -> ?file:Init.Textloc.fpath -> 248 'a Init.t -> Bytes.Reader.t -> ('a, Init.Error.t) result 249(** [decode'] is like {!val-decode} but returns a structured error 250 with separate {!Init.Error.type-kind}, location, and path information. 251 252 Use this when you need to programmatically handle different error 253 types or extract location information. *) 254 255val decode_string : 256 ?config:config -> ?locs:bool -> ?layout:bool -> ?file:Init.Textloc.fpath -> 257 'a Init.t -> string -> ('a, string) result 258(** [decode_string codec s] decodes INI data from string [s]. 259 260 This is the most common entry point for parsing: 261 {@ocaml[ 262 let ini_text = {| 263 [server] 264 host = localhost 265 port = 8080 266 |} in 267 Init_bytesrw.decode_string config_codec ini_text 268 ]} *) 269 270val decode_string' : 271 ?config:config -> ?locs:bool -> ?layout:bool -> ?file:Init.Textloc.fpath -> 272 'a Init.t -> string -> ('a, Init.Error.t) result 273(** [decode_string'] is like {!val-decode_string} with structured errors. *) 274 275 276(** {1:encode Encoding} 277 278 Serialize OCaml values to INI format. *) 279 280val encode : 281 ?buf:Bytes.t -> 'a Init.t -> 'a -> eod:bool -> Bytes.Writer.t -> 282 (unit, string) result 283(** [encode codec v ~eod w] encodes [v] to writer [w] using [codec]. 284 285 {ul 286 {- [buf] is an optional scratch buffer for writing.} 287 {- [eod] if [true], signals end-of-data after writing.}} 288 289 The output format follows standard INI conventions: 290 - Sections are written as [[section_name]] 291 - Options are written as [key = value] 292 - Multiline values are continued with indentation *) 293 294val encode' : 295 ?buf:Bytes.t -> 'a Init.t -> 'a -> eod:bool -> Bytes.Writer.t -> 296 (unit, Init.Error.t) result 297(** [encode'] is like {!val-encode} with structured errors. *) 298 299val encode_string : 300 ?buf:Bytes.t -> 'a Init.t -> 'a -> (string, string) result 301(** [encode_string codec v] encodes [v] to a string. 302 303 {@ocaml[ 304 let config = { server = { host = "localhost"; port = 8080 } } in 305 match Init_bytesrw.encode_string config_codec config with 306 | Ok text -> print_endline text 307 | Error msg -> failwith msg 308 ]} 309 310 Produces: 311 {@ini[ 312 [server] 313 host = localhost 314 port = 8080 315 ]} *) 316 317val encode_string' : 318 ?buf:Bytes.t -> 'a Init.t -> 'a -> (string, Init.Error.t) result 319(** [encode_string'] is like {!val-encode_string} with structured errors. *) 320 321 322(** {1:layout Layout Preservation} 323 324 When decoding with [~layout:true], whitespace and comment positions 325 are preserved in the {!Init.Meta.t} values attached to each element. 326 When re-encoding, this information is used to reproduce the original 327 formatting as closely as possible. 328 329 {b Limitations:} 330 {ul 331 {- Comments are NOT preserved (matching Python's behavior).} 332 {- Whitespace within values may be normalized.} 333 {- The output may differ slightly from the input in edge cases.}} 334 335 {b Performance tip:} For maximum performance when you don't need 336 layout preservation, use [~layout:false ~locs:false] (the default). 337 Enabling [~locs:true] improves error messages at a small cost. *) 338 339 340(** {1:examples Examples} 341 342 {2:simple Simple Configuration} 343 344 {@ocaml[ 345 type config = { debug : bool; port : int } 346 347 let codec = Init.Document.( 348 let section = Init.Section.( 349 obj (fun debug port -> { debug; port }) 350 |> mem "debug" Init.bool ~dec_absent:false ~enc:(fun c -> c.debug) 351 |> mem "port" Init.int ~dec_absent:8080 ~enc:(fun c -> c.port) 352 |> finish 353 ) in 354 obj Fun.id 355 |> section "server" section ~enc:Fun.id 356 |> finish 357 ) 358 359 let config = Init_bytesrw.decode_string codec "[server]\nport = 9000" 360 (* Ok { debug = false; port = 9000 } *) 361 ]} 362 363 {2:multi_section Multiple Sections} 364 365 {@ocaml[ 366 type db = { host : string; port : int } 367 type cache = { enabled : bool; ttl : int } 368 type config = { db : db; cache : cache option } 369 370 let db_codec = Init.Section.( 371 obj (fun host port -> { host; port }) 372 |> mem "host" Init.string ~enc:(fun d -> d.host) 373 |> mem "port" Init.int ~dec_absent:5432 ~enc:(fun d -> d.port) 374 |> finish 375 ) 376 377 let cache_codec = Init.Section.( 378 obj (fun enabled ttl -> { enabled; ttl }) 379 |> mem "enabled" Init.bool ~enc:(fun c -> c.enabled) 380 |> mem "ttl" Init.int ~dec_absent:3600 ~enc:(fun c -> c.ttl) 381 |> finish 382 ) 383 384 let config_codec = Init.Document.( 385 obj (fun db cache -> { db; cache }) 386 |> section "database" db_codec ~enc:(fun c -> c.db) 387 |> opt_section "cache" cache_codec ~enc:(fun c -> c.cache) 388 |> finish 389 ) 390 ]} 391 392 {2:interpolation_example Interpolation} 393 394 {@ocaml[ 395 let paths_codec = Init.Section.( 396 obj (fun base data logs -> (base, data, logs)) 397 |> mem "base" Init.string ~enc:(fun (b,_,_) -> b) 398 |> mem "data" Init.string ~enc:(fun (_,d,_) -> d) 399 |> mem "logs" Init.string ~enc:(fun (_,_,l) -> l) 400 |> finish 401 ) 402 403 let doc_codec = Init.Document.( 404 obj Fun.id 405 |> section "paths" paths_codec ~enc:Fun.id 406 |> finish 407 ) 408 409 (* Basic interpolation expands %(base)s *) 410 let ini = {| 411 [paths] 412 base = /opt/app 413 data = %(base)s/data 414 logs = %(base)s/logs 415 |} 416 417 match Init_bytesrw.decode_string doc_codec ini with 418 | Ok (_, data, logs) -> 419 assert (data = "/opt/app/data"); 420 assert (logs = "/opt/app/logs") 421 | Error _ -> assert false 422 ]} 423 424 {2:raw_parser Disabling Interpolation} 425 426 {@ocaml[ 427 (* Use raw_config for files with literal % characters *) 428 let config = Init_bytesrw.raw_config 429 430 let result = Init_bytesrw.decode_string ~config codec {| 431 [display] 432 format = 100%% complete ; Would fail with basic interpolation 433 |} 434 ]} *)