···1920(** Hexadecimal digit *)
21let is_hex c =
22- (c >= '0' && c <= '9') ||
23- (c >= 'a' && c <= 'f') ||
24- (c >= 'A' && c <= 'F')
2526(** Alphabetic character *)
27-let is_alpha c =
28- (c >= 'a' && c <= 'z') ||
29- (c >= 'A' && c <= 'Z')
3031(** Alphanumeric character *)
32let is_alnum c = is_alpha c || is_digit c
···34(** YAML indicator characters *)
35let is_indicator c =
36 match c with
37- | '-' | '?' | ':' | ',' | '[' | ']' | '{' | '}'
38- | '#' | '&' | '*' | '!' | '|' | '>' | '\'' | '"'
39- | '%' | '@' | '`' -> true
40 | _ -> false
4142(** Flow context indicator characters *)
43let is_flow_indicator c =
44- match c with
45- | ',' | '[' | ']' | '{' | '}' -> true
46- | _ -> false
···1920(** Hexadecimal digit *)
21let is_hex c =
22+ (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
002324(** Alphabetic character *)
25+let is_alpha c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
002627(** Alphanumeric character *)
28let is_alnum c = is_alpha c || is_digit c
···30(** YAML indicator characters *)
31let is_indicator c =
32 match c with
33+ | '-' | '?' | ':' | ',' | '[' | ']' | '{' | '}' | '#' | '&' | '*' | '!' | '|'
34+ | '>' | '\'' | '"' | '%' | '@' | '`' ->
35+ true
36 | _ -> false
3738(** Flow context indicator characters *)
39let is_flow_indicator c =
40+ match c with ',' | '[' | ']' | '{' | '}' -> true | _ -> false
00
+5-19
lib/chomping.ml
···6(** Block scalar chomping indicators *)
78type t =
9- | Strip (** Remove final line break and trailing empty lines *)
10 | Clip (** Keep final line break, remove trailing empty lines (default) *)
11 | Keep (** Keep final line break and trailing empty lines *)
1213-let to_string = function
14- | Strip -> "strip"
15- | Clip -> "clip"
16- | Keep -> "keep"
17-18-let pp fmt t =
19- Format.pp_print_string fmt (to_string t)
20-21-let of_char = function
22- | '-' -> Some Strip
23- | '+' -> Some Keep
24- | _ -> None
25-26-let to_char = function
27- | Strip -> Some '-'
28- | Clip -> None
29- | Keep -> Some '+'
30-31let equal a b = a = b
···6(** Block scalar chomping indicators *)
78type t =
9+ | Strip (** Remove final line break and trailing empty lines *)
10 | Clip (** Keep final line break, remove trailing empty lines (default) *)
11 | Keep (** Keep final line break and trailing empty lines *)
1213+let to_string = function Strip -> "strip" | Clip -> "clip" | Keep -> "keep"
14+let pp fmt t = Format.pp_print_string fmt (to_string t)
15+let of_char = function '-' -> Some Strip | '+' -> Some Keep | _ -> None
16+let to_char = function Strip -> Some '-' | Clip -> None | Keep -> Some '+'
0000000000000017let equal a b = a = b
+16-20
lib/document.ml
···13 implicit_end : bool;
14}
1516-let make
17- ?(version : (int * int) option)
18- ?(tags : (string * string) list = [])
19- ?(implicit_start = true)
20- ?(implicit_end = true)
21- root =
22 { version; tags; root; implicit_start; implicit_end }
2324let version t = t.version
···26let root t = t.root
27let implicit_start t = t.implicit_start
28let implicit_end t = t.implicit_end
29-30let with_version version t = { t with version = Some version }
31let with_tags tags t = { t with tags }
32let with_root root t = { t with root = Some root }
···34let pp fmt t =
35 Format.fprintf fmt "@[<v 2>document(@,";
36 (match t.version with
37- | Some (maj, min) -> Format.fprintf fmt "version=%d.%d,@ " maj min
38- | None -> ());
39 if t.tags <> [] then begin
40 Format.fprintf fmt "tags=[";
41- List.iteri (fun i (h, p) ->
42- if i > 0 then Format.fprintf fmt ", ";
43- Format.fprintf fmt "%s -> %s" h p
44- ) t.tags;
045 Format.fprintf fmt "],@ "
46 end;
47 Format.fprintf fmt "implicit_start=%b,@ " t.implicit_start;
48 Format.fprintf fmt "implicit_end=%b,@ " t.implicit_end;
49 (match t.root with
50- | Some root -> Format.fprintf fmt "root=%a" Yaml.pp root
51- | None -> Format.fprintf fmt "root=<empty>");
52 Format.fprintf fmt "@]@,)"
5354let equal a b =
55- Option.equal ( = ) a.version b.version &&
56- List.equal ( = ) a.tags b.tags &&
57- Option.equal Yaml.equal a.root b.root &&
58- a.implicit_start = b.implicit_start &&
59- a.implicit_end = b.implicit_end
···13 implicit_end : bool;
14}
1516+let make ?(version : (int * int) option) ?(tags : (string * string) list = [])
17+ ?(implicit_start = true) ?(implicit_end = true) root =
000018 { version; tags; root; implicit_start; implicit_end }
1920let version t = t.version
···22let root t = t.root
23let implicit_start t = t.implicit_start
24let implicit_end t = t.implicit_end
025let with_version version t = { t with version = Some version }
26let with_tags tags t = { t with tags }
27let with_root root t = { t with root = Some root }
···29let pp fmt t =
30 Format.fprintf fmt "@[<v 2>document(@,";
31 (match t.version with
32+ | Some (maj, min) -> Format.fprintf fmt "version=%d.%d,@ " maj min
33+ | None -> ());
34 if t.tags <> [] then begin
35 Format.fprintf fmt "tags=[";
36+ List.iteri
37+ (fun i (h, p) ->
38+ if i > 0 then Format.fprintf fmt ", ";
39+ Format.fprintf fmt "%s -> %s" h p)
40+ t.tags;
41 Format.fprintf fmt "],@ "
42 end;
43 Format.fprintf fmt "implicit_start=%b,@ " t.implicit_start;
44 Format.fprintf fmt "implicit_end=%b,@ " t.implicit_end;
45 (match t.root with
46+ | Some root -> Format.fprintf fmt "root=%a" Yaml.pp root
47+ | None -> Format.fprintf fmt "root=<empty>");
48 Format.fprintf fmt "@]@,)"
4950let equal a b =
51+ Option.equal ( = ) a.version b.version
52+ && List.equal ( = ) a.tags b.tags
53+ && Option.equal Yaml.equal a.root b.root
54+ && a.implicit_start = b.implicit_start
55+ && a.implicit_end = b.implicit_end
+63-107
lib/eio/yamlrw_eio.ml
···56(** Yamlrw Eio - Streaming YAML parsing and emitting with Eio
78- This module provides Eio-compatible streaming YAML parsing and emitting.
9- It uses bytesrw adapters to convert Eio sources/sinks to the standard
10- YAML scanner/parser/emitter, eliminating code duplication. *)
1112open Yamlrw
13···30 Scanner.of_input input
3132 (** Create a parser from an Eio flow *)
33- let parser_of_flow flow =
34- Parser.of_scanner (scanner_of_flow flow)
3536 (** Parse a JSON-compatible value from an Eio flow.
3738 @param resolve_aliases Whether to expand aliases (default: true)
39 @param max_nodes Maximum nodes during alias expansion (default: 10M)
40 @param max_depth Maximum alias nesting depth (default: 100) *)
41- let value
42- ?(resolve_aliases = true)
43 ?(max_nodes = Yaml.default_max_alias_nodes)
44- ?(max_depth = Yaml.default_max_alias_depth)
45- flow =
46 let parser = parser_of_flow flow in
47- Loader.value_of_parser
48- ~resolve_aliases ~max_nodes ~max_depth
49- (fun () -> Parser.next parser)
5051 (** Parse a full YAML value from an Eio flow.
52···55 @param resolve_aliases Whether to expand aliases (default: false)
56 @param max_nodes Maximum nodes during alias expansion (default: 10M)
57 @param max_depth Maximum alias nesting depth (default: 100) *)
58- let yaml
59- ?(resolve_aliases = false)
60 ?(max_nodes = Yaml.default_max_alias_nodes)
61- ?(max_depth = Yaml.default_max_alias_depth)
62- flow =
63 let parser = parser_of_flow flow in
64- Loader.yaml_of_parser
65- ~resolve_aliases ~max_nodes ~max_depth
66- (fun () -> Parser.next parser)
6768 (** Parse multiple YAML documents from an Eio flow. *)
69 let documents flow =
···7273 (** {2 Event-Based Streaming} *)
74075 (** A streaming event reader backed by a flow *)
76- type event_reader = {
77- parser : Parser.t;
78- }
7980- (** Create an event reader from an Eio flow.
81- This reads data incrementally as events are requested. *)
82- let event_reader flow =
83- { parser = parser_of_flow flow }
8485- (** Get the next event from an event reader.
86- Returns [None] when parsing is complete. *)
87- let next_event reader =
88- Parser.next reader.parser
8990 (** Iterate over all events from a flow.
91···127 @param encoding Output encoding (default: UTF-8)
128 @param scalar_style Preferred scalar style (default: Any)
129 @param layout_style Preferred layout style (default: Any) *)
130- let value
131- ?(encoding = `Utf8)
132- ?(scalar_style = `Any)
133- ?(layout_style = `Any)
134- flow
135- (v : value) =
136- let config = { Emitter.default_config with encoding; scalar_style; layout_style } in
137 let writer = Bytesrw_eio.bytes_writer_of_flow flow in
138 Serialize.value_to_writer ~config writer v
139···142 @param encoding Output encoding (default: UTF-8)
143 @param scalar_style Preferred scalar style (default: Any)
144 @param layout_style Preferred layout style (default: Any) *)
145- let yaml
146- ?(encoding = `Utf8)
147- ?(scalar_style = `Any)
148- ?(layout_style = `Any)
149- flow
150- (v : yaml) =
151- let config = { Emitter.default_config with encoding; scalar_style; layout_style } in
152 let writer = Bytesrw_eio.bytes_writer_of_flow flow in
153 Serialize.yaml_to_writer ~config writer v
154···158 @param scalar_style Preferred scalar style (default: Any)
159 @param layout_style Preferred layout style (default: Any)
160 @param resolve_aliases Whether to expand aliases (default: true) *)
161- let documents
162- ?(encoding = `Utf8)
163- ?(scalar_style = `Any)
164- ?(layout_style = `Any)
165- ?(resolve_aliases = true)
166- flow
167- docs =
168- let config = { Emitter.default_config with encoding; scalar_style; layout_style } in
169 let writer = Bytesrw_eio.bytes_writer_of_flow flow in
170 Serialize.documents_to_writer ~config ~resolve_aliases writer docs
171172 (** {2 Event-Based Streaming} *)
1730174 (** A streaming event writer that writes directly to a flow *)
175- type event_writer = {
176- emitter : Emitter.t;
177- }
178179- (** Create an event writer that writes directly to a flow.
180- Events are written incrementally as they are emitted.
181182 @param encoding Output encoding (default: UTF-8)
183 @param scalar_style Preferred scalar style (default: Any)
184 @param layout_style Preferred layout style (default: Any) *)
185- let event_writer
186- ?(encoding = `Utf8)
187- ?(scalar_style = `Any)
188- ?(layout_style = `Any)
189- flow =
190- let config = { Emitter.default_config with encoding; scalar_style; layout_style } in
191 let writer = Bytesrw_eio.bytes_writer_of_flow flow in
192 { emitter = Emitter.of_writer ~config writer }
193194 (** Emit a single event to the writer. *)
195- let emit ew ev =
196- Emitter.emit ew.emitter ev
197198 (** Flush the writer by sending end-of-data. *)
199- let flush ew =
200- Emitter.flush ew.emitter
201202 (** Emit events from a list to a flow. *)
203 let emit_all flow events =
···209(** {1 Convenience Functions} *)
210211(** Read a value from a file path *)
212-let of_file
213- ?(resolve_aliases = true)
214 ?(max_nodes = Yaml.default_max_alias_nodes)
215- ?(max_depth = Yaml.default_max_alias_depth)
216- ~fs
217- path =
218 Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow ->
219 Read.value ~resolve_aliases ~max_nodes ~max_depth flow
220221(** Read full YAML from a file path *)
222-let yaml_of_file
223- ?(resolve_aliases = false)
224 ?(max_nodes = Yaml.default_max_alias_nodes)
225- ?(max_depth = Yaml.default_max_alias_depth)
226- ~fs
227- path =
228 Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow ->
229 Read.yaml ~resolve_aliases ~max_nodes ~max_depth flow
230231(** Read documents from a file path *)
232let documents_of_file ~fs path =
233- Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow ->
234- Read.documents flow
235236(** Write a value to a file path *)
237-let to_file
238- ?(encoding = `Utf8)
239- ?(scalar_style = `Any)
240- ?(layout_style = `Any)
241- ~fs
242- path
243- v =
244- Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path) @@ fun flow ->
245- Write.value ~encoding ~scalar_style ~layout_style flow v
246247(** Write full YAML to a file path *)
248-let yaml_to_file
249- ?(encoding = `Utf8)
250- ?(scalar_style = `Any)
251- ?(layout_style = `Any)
252- ~fs
253- path
254- v =
255- Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path) @@ fun flow ->
256- Write.yaml ~encoding ~scalar_style ~layout_style flow v
257258(** Write documents to a file path *)
259-let documents_to_file
260- ?(encoding = `Utf8)
261- ?(scalar_style = `Any)
262- ?(layout_style = `Any)
263- ?(resolve_aliases = true)
264- ~fs
265- path
266- docs =
267- Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path) @@ fun flow ->
268- Write.documents ~encoding ~scalar_style ~layout_style ~resolve_aliases flow docs
···56(** Yamlrw Eio - Streaming YAML parsing and emitting with Eio
78+ This module provides Eio-compatible streaming YAML parsing and emitting. It
9+ uses bytesrw adapters to convert Eio sources/sinks to the standard YAML
10+ scanner/parser/emitter, eliminating code duplication. *)
1112open Yamlrw
13···30 Scanner.of_input input
3132 (** Create a parser from an Eio flow *)
33+ let parser_of_flow flow = Parser.of_scanner (scanner_of_flow flow)
03435 (** Parse a JSON-compatible value from an Eio flow.
3637 @param resolve_aliases Whether to expand aliases (default: true)
38 @param max_nodes Maximum nodes during alias expansion (default: 10M)
39 @param max_depth Maximum alias nesting depth (default: 100) *)
40+ let value ?(resolve_aliases = true)
041 ?(max_nodes = Yaml.default_max_alias_nodes)
42+ ?(max_depth = Yaml.default_max_alias_depth) flow =
043 let parser = parser_of_flow flow in
44+ Loader.value_of_parser ~resolve_aliases ~max_nodes ~max_depth (fun () ->
45+ Parser.next parser)
04647 (** Parse a full YAML value from an Eio flow.
48···51 @param resolve_aliases Whether to expand aliases (default: false)
52 @param max_nodes Maximum nodes during alias expansion (default: 10M)
53 @param max_depth Maximum alias nesting depth (default: 100) *)
54+ let yaml ?(resolve_aliases = false)
055 ?(max_nodes = Yaml.default_max_alias_nodes)
56+ ?(max_depth = Yaml.default_max_alias_depth) flow =
057 let parser = parser_of_flow flow in
58+ Loader.yaml_of_parser ~resolve_aliases ~max_nodes ~max_depth (fun () ->
59+ Parser.next parser)
06061 (** Parse multiple YAML documents from an Eio flow. *)
62 let documents flow =
···6566 (** {2 Event-Based Streaming} *)
6768+ type event_reader = { parser : Parser.t }
69 (** A streaming event reader backed by a flow *)
0007071+ (** Create an event reader from an Eio flow. This reads data incrementally as
72+ events are requested. *)
73+ let event_reader flow = { parser = parser_of_flow flow }
07475+ (** Get the next event from an event reader. Returns [None] when parsing is
76+ complete. *)
77+ let next_event reader = Parser.next reader.parser
07879 (** Iterate over all events from a flow.
80···116 @param encoding Output encoding (default: UTF-8)
117 @param scalar_style Preferred scalar style (default: Any)
118 @param layout_style Preferred layout style (default: Any) *)
119+ let value ?(encoding = `Utf8) ?(scalar_style = `Any) ?(layout_style = `Any)
120+ flow (v : value) =
121+ let config =
122+ { Emitter.default_config with encoding; scalar_style; layout_style }
123+ in
00124 let writer = Bytesrw_eio.bytes_writer_of_flow flow in
125 Serialize.value_to_writer ~config writer v
126···129 @param encoding Output encoding (default: UTF-8)
130 @param scalar_style Preferred scalar style (default: Any)
131 @param layout_style Preferred layout style (default: Any) *)
132+ let yaml ?(encoding = `Utf8) ?(scalar_style = `Any) ?(layout_style = `Any)
133+ flow (v : yaml) =
134+ let config =
135+ { Emitter.default_config with encoding; scalar_style; layout_style }
136+ in
00137 let writer = Bytesrw_eio.bytes_writer_of_flow flow in
138 Serialize.yaml_to_writer ~config writer v
139···143 @param scalar_style Preferred scalar style (default: Any)
144 @param layout_style Preferred layout style (default: Any)
145 @param resolve_aliases Whether to expand aliases (default: true) *)
146+ let documents ?(encoding = `Utf8) ?(scalar_style = `Any)
147+ ?(layout_style = `Any) ?(resolve_aliases = true) flow docs =
148+ let config =
149+ { Emitter.default_config with encoding; scalar_style; layout_style }
150+ in
000151 let writer = Bytesrw_eio.bytes_writer_of_flow flow in
152 Serialize.documents_to_writer ~config ~resolve_aliases writer docs
153154 (** {2 Event-Based Streaming} *)
155156+ type event_writer = { emitter : Emitter.t }
157 (** A streaming event writer that writes directly to a flow *)
000158159+ (** Create an event writer that writes directly to a flow. Events are written
160+ incrementally as they are emitted.
161162 @param encoding Output encoding (default: UTF-8)
163 @param scalar_style Preferred scalar style (default: Any)
164 @param layout_style Preferred layout style (default: Any) *)
165+ let event_writer ?(encoding = `Utf8) ?(scalar_style = `Any)
166+ ?(layout_style = `Any) flow =
167+ let config =
168+ { Emitter.default_config with encoding; scalar_style; layout_style }
169+ in
0170 let writer = Bytesrw_eio.bytes_writer_of_flow flow in
171 { emitter = Emitter.of_writer ~config writer }
172173 (** Emit a single event to the writer. *)
174+ let emit ew ev = Emitter.emit ew.emitter ev
0175176 (** Flush the writer by sending end-of-data. *)
177+ let flush ew = Emitter.flush ew.emitter
0178179 (** Emit events from a list to a flow. *)
180 let emit_all flow events =
···186(** {1 Convenience Functions} *)
187188(** Read a value from a file path *)
189+let of_file ?(resolve_aliases = true)
0190 ?(max_nodes = Yaml.default_max_alias_nodes)
191+ ?(max_depth = Yaml.default_max_alias_depth) ~fs path =
00192 Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow ->
193 Read.value ~resolve_aliases ~max_nodes ~max_depth flow
194195(** Read full YAML from a file path *)
196+let yaml_of_file ?(resolve_aliases = false)
0197 ?(max_nodes = Yaml.default_max_alias_nodes)
198+ ?(max_depth = Yaml.default_max_alias_depth) ~fs path =
00199 Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow ->
200 Read.yaml ~resolve_aliases ~max_nodes ~max_depth flow
201202(** Read documents from a file path *)
203let documents_of_file ~fs path =
204+ Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow -> Read.documents flow
0205206(** Write a value to a file path *)
207+let to_file ?(encoding = `Utf8) ?(scalar_style = `Any) ?(layout_style = `Any)
208+ ~fs path v =
209+ Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path)
210+ @@ fun flow -> Write.value ~encoding ~scalar_style ~layout_style flow v
00000211212(** Write full YAML to a file path *)
213+let yaml_to_file ?(encoding = `Utf8) ?(scalar_style = `Any)
214+ ?(layout_style = `Any) ~fs path v =
215+ Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path)
216+ @@ fun flow -> Write.yaml ~encoding ~scalar_style ~layout_style flow v
00000217218(** Write documents to a file path *)
219+let documents_to_file ?(encoding = `Utf8) ?(scalar_style = `Any)
220+ ?(layout_style = `Any) ?(resolve_aliases = true) ~fs path docs =
221+ Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path)
222+ @@ fun flow ->
223+ Write.documents ~encoding ~scalar_style ~layout_style ~resolve_aliases flow
224+ docs
0000
+66-52
lib/eio/yamlrw_eio.mli
···56(** Yamlrw Eio - Streaming YAML parsing and emitting with Eio
78- This library provides Eio-based streaming support for YAML parsing
9- and emitting. It uses bytesrw adapters that read/write directly to
10- Eio flows, with bytesrw handling internal buffering.
1112 {2 Quick Start}
13···24 Eio_main.run @@ fun env ->
25 let fs = Eio.Stdenv.fs env in
26 Eio.Path.with_open_out Eio.Path.(fs / "output.yaml") @@ fun flow ->
27- Yaml_eio.Write.value flow (`O [("name", `String "test")])
28 ]}
2930 Stream events incrementally:
···32 Eio_main.run @@ fun env ->
33 let fs = Eio.Stdenv.fs env in
34 Eio.Path.with_open_in Eio.Path.(fs / "data.yaml") @@ fun flow ->
35- Yaml_eio.Read.iter_events (fun event span ->
36- Format.printf "Event at %a@." Yamlrw.Span.pp span
37- ) flow
38 ]}
3940 {2 Streaming Architecture}
4142 This library uses bytesrw for direct I/O with Eio flows:
4344- - {b Reading}: Data is read directly from the flow as the
45- parser requests it. Bytesrw handles internal buffering.
4647- - {b Writing}: Output is written directly to the flow.
48- Bytesrw handles chunking and buffering. *)
4950(** {1 Types} *)
51···66module Read : sig
67 (** Parse YAML from Eio flows.
6869- All functions read data incrementally from the underlying flow,
70- without loading the entire file into memory first. *)
7172 (** {2 High-Level Parsing} *)
73···75 ?resolve_aliases:bool ->
76 ?max_nodes:int ->
77 ?max_depth:int ->
78- _ Eio.Flow.source -> value
079 (** Parse a JSON-compatible value from an Eio flow.
8081 @param resolve_aliases Whether to expand aliases (default: true)
···86 ?resolve_aliases:bool ->
87 ?max_nodes:int ->
88 ?max_depth:int ->
89- _ Eio.Flow.source -> yaml
090 (** Parse a full YAML value from an Eio flow.
9192 By default, aliases are NOT resolved, preserving the document structure.
···101 (** {2 Event-Based Streaming} *)
102103 type event_reader
104- (** A streaming event reader backed by a flow.
105- Events are parsed incrementally as requested. *)
106107 val event_reader : _ Eio.Flow.source -> event_reader
108 (** Create an event reader from an Eio flow. *)
109110 val next_event : event_reader -> Yamlrw.Event.spanned option
111- (** Get the next event from an event reader.
112- Returns [None] when parsing is complete. *)
113114 val iter_events :
115- (event -> Yamlrw.Span.t -> unit) ->
116- _ Eio.Flow.source -> unit
117 (** Iterate over all events from a flow. *)
118119- val fold_events :
120- ('a -> event -> 'a) -> 'a ->
121- _ Eio.Flow.source -> 'a
122 (** Fold over all events from a flow. *)
123124- val iter_documents :
125- (document -> unit) ->
126- _ Eio.Flow.source -> unit
127 (** Iterate over documents from a flow, calling [f] for each document. *)
128129- val fold_documents :
130- ('a -> document -> 'a) -> 'a ->
131- _ Eio.Flow.source -> 'a
132 (** Fold over documents from a flow. *)
133end
134···145 ?encoding:Yamlrw.Encoding.t ->
146 ?scalar_style:Yamlrw.Scalar_style.t ->
147 ?layout_style:Yamlrw.Layout_style.t ->
148- _ Eio.Flow.sink -> value -> unit
00149 (** Write a JSON-compatible value to an Eio flow.
150151 @param encoding Output encoding (default: UTF-8)
···156 ?encoding:Yamlrw.Encoding.t ->
157 ?scalar_style:Yamlrw.Scalar_style.t ->
158 ?layout_style:Yamlrw.Layout_style.t ->
159- _ Eio.Flow.sink -> yaml -> unit
00160 (** Write a full YAML value to an Eio flow.
161162 @param encoding Output encoding (default: UTF-8)
···168 ?scalar_style:Yamlrw.Scalar_style.t ->
169 ?layout_style:Yamlrw.Layout_style.t ->
170 ?resolve_aliases:bool ->
171- _ Eio.Flow.sink -> document list -> unit
00172 (** Write multiple YAML documents to an Eio flow.
173174 @param encoding Output encoding (default: UTF-8)
···179 (** {2 Event-Based Streaming} *)
180181 type event_writer
182- (** A streaming event writer backed by a flow.
183- Events are written incrementally to the underlying flow. *)
184185 val event_writer :
186 ?encoding:Yamlrw.Encoding.t ->
187 ?scalar_style:Yamlrw.Scalar_style.t ->
188 ?layout_style:Yamlrw.Layout_style.t ->
189- _ Eio.Flow.sink -> event_writer
190- (** Create an event writer that writes directly to a flow.
191- Events are written incrementally as they are emitted.
0192193 @param encoding Output encoding (default: UTF-8)
194 @param scalar_style Preferred scalar style (default: Any)
···211 ?max_nodes:int ->
212 ?max_depth:int ->
213 fs:_ Eio.Path.t ->
214- string -> value
0215(** Read a value from a file path.
216217- @param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
0218219val yaml_of_file :
220 ?resolve_aliases:bool ->
221 ?max_nodes:int ->
222 ?max_depth:int ->
223 fs:_ Eio.Path.t ->
224- string -> yaml
0225(** Read full YAML from a file path.
226227- @param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
0228229-val documents_of_file :
230- fs:_ Eio.Path.t ->
231- string -> document list
232(** Read documents from a file path.
233234- @param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
0235236val to_file :
237 ?encoding:Yamlrw.Encoding.t ->
238 ?scalar_style:Yamlrw.Scalar_style.t ->
239 ?layout_style:Yamlrw.Layout_style.t ->
240 fs:_ Eio.Path.t ->
241- string -> value -> unit
00242(** Write a value to a file path.
243244- @param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
0245246val yaml_to_file :
247 ?encoding:Yamlrw.Encoding.t ->
248 ?scalar_style:Yamlrw.Scalar_style.t ->
249 ?layout_style:Yamlrw.Layout_style.t ->
250 fs:_ Eio.Path.t ->
251- string -> yaml -> unit
00252(** Write full YAML to a file path.
253254- @param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
0255256val documents_to_file :
257 ?encoding:Yamlrw.Encoding.t ->
···259 ?layout_style:Yamlrw.Layout_style.t ->
260 ?resolve_aliases:bool ->
261 fs:_ Eio.Path.t ->
262- string -> document list -> unit
00263(** Write documents to a file path.
264265- @param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
0
···56(** Yamlrw Eio - Streaming YAML parsing and emitting with Eio
78+ This library provides Eio-based streaming support for YAML parsing and
9+ emitting. It uses bytesrw adapters that read/write directly to Eio flows,
10+ with bytesrw handling internal buffering.
1112 {2 Quick Start}
13···24 Eio_main.run @@ fun env ->
25 let fs = Eio.Stdenv.fs env in
26 Eio.Path.with_open_out Eio.Path.(fs / "output.yaml") @@ fun flow ->
27+ Yaml_eio.Write.value flow (`O [ ("name", `String "test") ])
28 ]}
2930 Stream events incrementally:
···32 Eio_main.run @@ fun env ->
33 let fs = Eio.Stdenv.fs env in
34 Eio.Path.with_open_in Eio.Path.(fs / "data.yaml") @@ fun flow ->
35+ Yaml_eio.Read.iter_events
36+ (fun event span -> Format.printf "Event at %a@." Yamlrw.Span.pp span)
37+ flow
38 ]}
3940 {2 Streaming Architecture}
4142 This library uses bytesrw for direct I/O with Eio flows:
4344+ - {b Reading}: Data is read directly from the flow as the parser requests
45+ it. Bytesrw handles internal buffering.
4647+ - {b Writing}: Output is written directly to the flow. Bytesrw handles
48+ chunking and buffering. *)
4950(** {1 Types} *)
51···66module Read : sig
67 (** Parse YAML from Eio flows.
6869+ All functions read data incrementally from the underlying flow, without
70+ loading the entire file into memory first. *)
7172 (** {2 High-Level Parsing} *)
73···75 ?resolve_aliases:bool ->
76 ?max_nodes:int ->
77 ?max_depth:int ->
78+ _ Eio.Flow.source ->
79+ value
80 (** Parse a JSON-compatible value from an Eio flow.
8182 @param resolve_aliases Whether to expand aliases (default: true)
···87 ?resolve_aliases:bool ->
88 ?max_nodes:int ->
89 ?max_depth:int ->
90+ _ Eio.Flow.source ->
91+ yaml
92 (** Parse a full YAML value from an Eio flow.
9394 By default, aliases are NOT resolved, preserving the document structure.
···103 (** {2 Event-Based Streaming} *)
104105 type event_reader
106+ (** A streaming event reader backed by a flow. Events are parsed incrementally
107+ as requested. *)
108109 val event_reader : _ Eio.Flow.source -> event_reader
110 (** Create an event reader from an Eio flow. *)
111112 val next_event : event_reader -> Yamlrw.Event.spanned option
113+ (** Get the next event from an event reader. Returns [None] when parsing is
114+ complete. *)
115116 val iter_events :
117+ (event -> Yamlrw.Span.t -> unit) -> _ Eio.Flow.source -> unit
0118 (** Iterate over all events from a flow. *)
119120+ val fold_events : ('a -> event -> 'a) -> 'a -> _ Eio.Flow.source -> 'a
00121 (** Fold over all events from a flow. *)
122123+ val iter_documents : (document -> unit) -> _ Eio.Flow.source -> unit
00124 (** Iterate over documents from a flow, calling [f] for each document. *)
125126+ val fold_documents : ('a -> document -> 'a) -> 'a -> _ Eio.Flow.source -> 'a
00127 (** Fold over documents from a flow. *)
128end
129···140 ?encoding:Yamlrw.Encoding.t ->
141 ?scalar_style:Yamlrw.Scalar_style.t ->
142 ?layout_style:Yamlrw.Layout_style.t ->
143+ _ Eio.Flow.sink ->
144+ value ->
145+ unit
146 (** Write a JSON-compatible value to an Eio flow.
147148 @param encoding Output encoding (default: UTF-8)
···153 ?encoding:Yamlrw.Encoding.t ->
154 ?scalar_style:Yamlrw.Scalar_style.t ->
155 ?layout_style:Yamlrw.Layout_style.t ->
156+ _ Eio.Flow.sink ->
157+ yaml ->
158+ unit
159 (** Write a full YAML value to an Eio flow.
160161 @param encoding Output encoding (default: UTF-8)
···167 ?scalar_style:Yamlrw.Scalar_style.t ->
168 ?layout_style:Yamlrw.Layout_style.t ->
169 ?resolve_aliases:bool ->
170+ _ Eio.Flow.sink ->
171+ document list ->
172+ unit
173 (** Write multiple YAML documents to an Eio flow.
174175 @param encoding Output encoding (default: UTF-8)
···180 (** {2 Event-Based Streaming} *)
181182 type event_writer
183+ (** A streaming event writer backed by a flow. Events are written
184+ incrementally to the underlying flow. *)
185186 val event_writer :
187 ?encoding:Yamlrw.Encoding.t ->
188 ?scalar_style:Yamlrw.Scalar_style.t ->
189 ?layout_style:Yamlrw.Layout_style.t ->
190+ _ Eio.Flow.sink ->
191+ event_writer
192+ (** Create an event writer that writes directly to a flow. Events are written
193+ incrementally as they are emitted.
194195 @param encoding Output encoding (default: UTF-8)
196 @param scalar_style Preferred scalar style (default: Any)
···213 ?max_nodes:int ->
214 ?max_depth:int ->
215 fs:_ Eio.Path.t ->
216+ string ->
217+ value
218(** Read a value from a file path.
219220+ @param fs
221+ The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
222223val yaml_of_file :
224 ?resolve_aliases:bool ->
225 ?max_nodes:int ->
226 ?max_depth:int ->
227 fs:_ Eio.Path.t ->
228+ string ->
229+ yaml
230(** Read full YAML from a file path.
231232+ @param fs
233+ The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
234235+val documents_of_file : fs:_ Eio.Path.t -> string -> document list
00236(** Read documents from a file path.
237238+ @param fs
239+ The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
240241val to_file :
242 ?encoding:Yamlrw.Encoding.t ->
243 ?scalar_style:Yamlrw.Scalar_style.t ->
244 ?layout_style:Yamlrw.Layout_style.t ->
245 fs:_ Eio.Path.t ->
246+ string ->
247+ value ->
248+ unit
249(** Write a value to a file path.
250251+ @param fs
252+ The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
253254val yaml_to_file :
255 ?encoding:Yamlrw.Encoding.t ->
256 ?scalar_style:Yamlrw.Scalar_style.t ->
257 ?layout_style:Yamlrw.Layout_style.t ->
258 fs:_ Eio.Path.t ->
259+ string ->
260+ yaml ->
261+ unit
262(** Write full YAML to a file path.
263264+ @param fs
265+ The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
266267val documents_to_file :
268 ?encoding:Yamlrw.Encoding.t ->
···270 ?layout_style:Yamlrw.Layout_style.t ->
271 ?resolve_aliases:bool ->
272 fs:_ Eio.Path.t ->
273+ string ->
274+ document list ->
275+ unit
276(** Write documents to a file path.
277278+ @param fs
279+ The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
+209-211
lib/emitter.ml
···56(** Emitter - converts YAML data structures to string output
78- The emitter can write to either a Buffer (default) or directly to a
9- bytesrw Bytes.Writer for streaming output. *)
1011type config = {
12 encoding : Encoding.t;
···17 canonical : bool;
18}
1920-let default_config = {
21- encoding = `Utf8;
22- scalar_style = `Any;
23- layout_style = `Any;
24- indent = 2;
25- width = 80;
26- canonical = false;
27-}
02829type state =
30 | Initial
31 | Stream_started
32 | Document_started
33- | In_block_sequence of int (* indent level *)
34 | In_block_mapping_key of int
35 | In_block_mapping_value of int
36- | In_block_mapping_first_key of int (* first key after "- ", no indent needed *)
037 | In_flow_sequence
38 | In_flow_mapping_key
39 | In_flow_mapping_value
···41 | Stream_ended
4243(** Output sink - either a Buffer or a bytesrw Writer *)
44-type sink =
45- | Buffer_sink of Buffer.t
46- | Writer_sink of Bytesrw.Bytes.Writer.t
4748type t = {
49 config : config;
···55 mutable need_separator : bool;
56}
5758-let create ?(config = default_config) () = {
59- config;
60- sink = Buffer_sink (Buffer.create 1024);
61- state = Initial;
62- states = [];
63- indent = 0;
64- flow_level = 0;
65- need_separator = false;
66-}
06768(** Create an emitter that writes directly to a Bytes.Writer *)
69-let of_writer ?(config = default_config) writer = {
70- config;
71- sink = Writer_sink writer;
72- state = Initial;
73- states = [];
74- indent = 0;
75- flow_level = 0;
76- need_separator = false;
77-}
07879let contents t =
80 match t.sink with
81 | Buffer_sink buf -> Buffer.contents buf
82- | Writer_sink _ -> "" (* No accumulated content for writer sink *)
8384let reset t =
85 (match t.sink with
86- | Buffer_sink buf -> Buffer.clear buf
87- | Writer_sink _ -> ());
88 t.state <- Initial;
89 t.states <- [];
90 t.indent <- 0;
···107108let write_indent t =
109 if t.indent <= 8 then
110- for _ = 1 to t.indent do write_char t ' ' done
111- else
112- write t (String.make t.indent ' ')
0113114-let write_newline t =
115- write_char t '\n'
116117let push_state t s =
118 t.states <- t.state :: t.states;
···123 | s :: rest ->
124 t.state <- s;
125 t.states <- rest
126- | [] ->
127- t.state <- Stream_ended
128129-(** Escape a string for double-quoted output.
130- Uses a buffer to batch writes instead of character-by-character. *)
131let escape_double_quoted value =
132 let len = String.length value in
133 (* Check if any escaping is needed *)
···140 done;
141 if not !needs_escape then value
142 else begin
143- let buf = Buffer.create (len + len / 4) in
144 for i = 0 to len - 1 do
145 match value.[i] with
146 | '"' -> Buffer.add_string buf "\\\""
···148 | '\n' -> Buffer.add_string buf "\\n"
149 | '\r' -> Buffer.add_string buf "\\r"
150 | '\t' -> Buffer.add_string buf "\\t"
151- | c when c < ' ' -> Buffer.add_string buf (Printf.sprintf "\\x%02x" (Char.code c))
0152 | c -> Buffer.add_char buf c
153 done;
154 Buffer.contents buf
···159 if not (String.contains value '\'') then value
160 else begin
161 let len = String.length value in
162- let buf = Buffer.create (len + len / 8) in
163 for i = 0 to len - 1 do
164 let c = value.[i] in
165- if c = '\'' then Buffer.add_string buf "''"
166- else Buffer.add_char buf c
167 done;
168 Buffer.contents buf
169 end
170171(** Write scalar with appropriate quoting *)
172let write_scalar t ?(style = `Any) value =
173- match (match style with `Any -> Quoting.choose_style value | s -> s) with
174- | `Plain | `Any ->
175- write t value
176 | `Single_quoted ->
177 write_char t '\'';
178 write t (escape_single_quoted value);
···184 | `Literal ->
185 write t "|";
186 write_newline t;
187- String.split_on_char '\n' value |> List.iter (fun line ->
188- write_indent t;
189- write t line;
190- write_newline t
191- )
192 | `Folded ->
193 write t ">";
194 write_newline t;
195- String.split_on_char '\n' value |> List.iter (fun line ->
196- write_indent t;
197- write t line;
198- write_newline t
199- )
200201(** Write anchor if present *)
202let write_anchor t anchor =
···221222let emit t (ev : Event.t) =
223 match ev with
224- | Event.Stream_start _ ->
225- t.state <- Stream_started
226-227- | Event.Stream_end ->
228- t.state <- Stream_ended
229-230 | Event.Document_start { version; implicit } ->
231 if not implicit then begin
232 (match version with
233- | Some (maj, min) ->
234- write t (Printf.sprintf "%%YAML %d.%d\n" maj min)
235- | None -> ());
236 write t "---";
237 write_newline t
238 end;
239 t.state <- Document_started
240-241 | Event.Document_end { implicit } ->
242 if not implicit then begin
243 write t "...";
244 write_newline t
245 end;
246 t.state <- Document_ended
247-248 | Event.Alias { anchor } ->
249 if t.flow_level > 0 then begin
250 if t.need_separator then write t ", ";
251 t.need_separator <- true;
252 write_char t '*';
253 write t anchor
254- end else begin
255- (match t.state with
256- | In_block_sequence _ ->
257- write_indent t;
258- write t "- *";
259- write t anchor;
260- write_newline t
261- | In_block_mapping_key _ ->
262- write_indent t;
263- write_char t '*';
264- write t anchor;
265- write t ": ";
266- t.state <- In_block_mapping_value t.indent
267- | In_block_mapping_value indent ->
268- write_char t '*';
269- write t anchor;
270- write_newline t;
271- t.state <- In_block_mapping_key indent
272- | _ ->
273- write_char t '*';
274- write t anchor;
275- write_newline t)
0276 end
277-278 | Event.Scalar { anchor; tag; value; plain_implicit; style; _ } ->
279 if t.flow_level > 0 then begin
280- (match t.state with
281- | In_flow_mapping_key ->
282- if t.need_separator then write t ", ";
283- write_anchor t anchor;
284- write_tag t ~implicit:plain_implicit tag;
285- write_scalar t ~style value;
286- write t ": ";
287- t.need_separator <- false;
288- t.state <- In_flow_mapping_value
289- | In_flow_mapping_value ->
290- if t.need_separator then begin
291- (* We just finished a nested structure (array/mapping),
292 so this scalar is the next key, not a value *)
293- write t ", ";
294- write_anchor t anchor;
295- write_tag t ~implicit:plain_implicit tag;
296- write_scalar t ~style value;
297- write t ": ";
298- t.need_separator <- false;
299- t.state <- In_flow_mapping_value
300- end else begin
301- (* Normal value scalar *)
302- write_anchor t anchor;
303- write_tag t ~implicit:plain_implicit tag;
304- write_scalar t ~style value;
305- t.need_separator <- true;
306- t.state <- In_flow_mapping_key
307- end
308- | _ ->
309- if t.need_separator then write t ", ";
310- t.need_separator <- true;
311- write_anchor t anchor;
312- write_tag t ~implicit:plain_implicit tag;
313- write_scalar t ~style value)
314- end else begin
00315 match t.state with
316 | In_block_sequence _ ->
317 write_indent t;
···347 write_scalar t ~style value;
348 write_newline t
349 end
350-351 | Event.Sequence_start { anchor; tag; implicit; style } ->
352 let use_flow = style = `Flow || t.flow_level > 0 in
353 if t.flow_level > 0 then begin
354- (match t.state with
355- | In_flow_mapping_key ->
356- if t.need_separator then write t ", ";
357- write_anchor t anchor;
358- write_tag t ~implicit tag;
359- write_char t '[';
360- t.flow_level <- t.flow_level + 1;
361- t.need_separator <- false;
362- push_state t In_flow_mapping_value; (* After ] we'll be in value position but sequence handles it *)
363- t.state <- In_flow_sequence
364- | In_flow_mapping_value ->
365- write_anchor t anchor;
366- write_tag t ~implicit tag;
367- write_char t '[';
368- t.flow_level <- t.flow_level + 1;
369- t.need_separator <- false;
370- push_state t In_flow_mapping_key;
371- t.state <- In_flow_sequence
372- | _ ->
373- if t.need_separator then write t ", ";
374- write_anchor t anchor;
375- write_tag t ~implicit tag;
376- write_char t '[';
377- t.flow_level <- t.flow_level + 1;
378- t.need_separator <- false;
379- push_state t In_flow_sequence)
380- end else begin
00381 match t.state with
382 | In_block_sequence _ ->
383 write_indent t;
···389 t.flow_level <- t.flow_level + 1;
390 t.need_separator <- false;
391 push_state t In_flow_sequence
392- end else begin
0393 write_newline t;
394 push_state t (In_block_sequence t.indent);
395 t.indent <- t.indent + t.config.indent
···423 (* Save key state to return to after flow sequence *)
424 t.state <- In_block_mapping_key indent;
425 push_state t In_flow_sequence
426- end else begin
0427 write_newline t;
428 (* Save key state to return to after nested sequence *)
429 t.state <- In_block_mapping_key indent;
···438 t.flow_level <- t.flow_level + 1;
439 t.need_separator <- false;
440 push_state t In_flow_sequence
441- end else begin
0442 push_state t (In_block_sequence t.indent);
443 t.state <- In_block_sequence t.indent
444 end
445 end
446-447 | Event.Sequence_end ->
448 if t.flow_level > 0 then begin
449 write_char t ']';
···451 t.need_separator <- true;
452 pop_state t;
453 (* Write newline if returning to block context *)
454- (match t.state with
455- | In_block_mapping_key _ | In_block_sequence _ -> write_newline t
456- | _ -> ())
457- end else begin
0458 t.indent <- t.indent - t.config.indent;
459 pop_state t
460 end
461-462 | Event.Mapping_start { anchor; tag; implicit; style } ->
463 let use_flow = style = `Flow || t.flow_level > 0 in
464 if t.flow_level > 0 then begin
465- (match t.state with
466- | In_flow_mapping_key ->
467- if t.need_separator then write t ", ";
468- write_anchor t anchor;
469- write_tag t ~implicit tag;
470- write_char t '{';
471- t.flow_level <- t.flow_level + 1;
472- t.need_separator <- false;
473- push_state t In_flow_mapping_value;
474- t.state <- In_flow_mapping_key
475- | In_flow_mapping_value ->
476- write_anchor t anchor;
477- write_tag t ~implicit tag;
478- write_char t '{';
479- t.flow_level <- t.flow_level + 1;
480- t.need_separator <- false;
481- push_state t In_flow_mapping_key;
482- t.state <- In_flow_mapping_key
483- | _ ->
484- if t.need_separator then write t ", ";
485- write_anchor t anchor;
486- write_tag t ~implicit tag;
487- write_char t '{';
488- t.flow_level <- t.flow_level + 1;
489- t.need_separator <- false;
490- push_state t In_flow_mapping_key)
491- end else begin
0492 match t.state with
493 | In_block_sequence _ ->
494 write_indent t;
···500 t.flow_level <- t.flow_level + 1;
501 t.need_separator <- false;
502 push_state t In_flow_mapping_key
503- end else begin
0504 (* Don't write newline - first key goes on same line as "- " *)
505 push_state t (In_block_sequence t.indent);
506 t.indent <- t.indent + t.config.indent;
···535 (* Save key state to return to after flow mapping *)
536 t.state <- In_block_mapping_key indent;
537 push_state t In_flow_mapping_key
538- end else begin
0539 write_newline t;
540 (* Save key state to return to after nested mapping *)
541 t.state <- In_block_mapping_key indent;
···550 t.flow_level <- t.flow_level + 1;
551 t.need_separator <- false;
552 push_state t In_flow_mapping_key
553- end else begin
0554 push_state t (In_block_mapping_key t.indent);
555 t.state <- In_block_mapping_key t.indent
556 end
557 end
558-559 | Event.Mapping_end ->
560 if t.flow_level > 0 then begin
561 write_char t '}';
···563 t.need_separator <- true;
564 pop_state t;
565 (* Write newline if returning to block context *)
566- (match t.state with
567- | In_block_mapping_key _ | In_block_sequence _ -> write_newline t
568- | _ -> ())
569- end else begin
0570 t.indent <- t.indent - t.config.indent;
571 pop_state t
572 end
573574-(** Access to the underlying buffer for advanced use.
575- Returns None if emitter is writing to a Writer instead of Buffer. *)
576let buffer t =
577- match t.sink with
578- | Buffer_sink buf -> Some buf
579- | Writer_sink _ -> None
580581(** Get config *)
582let config t = t.config
583584(** Check if emitter is writing to a Writer *)
585let is_streaming t =
586- match t.sink with
587- | Writer_sink _ -> true
588- | Buffer_sink _ -> false
589590(** Flush the writer sink (no-op for buffer sink) *)
591let flush t =
···56(** Emitter - converts YAML data structures to string output
78+ The emitter can write to either a Buffer (default) or directly to a bytesrw
9+ Bytes.Writer for streaming output. *)
1011type config = {
12 encoding : Encoding.t;
···17 canonical : bool;
18}
1920+let default_config =
21+ {
22+ encoding = `Utf8;
23+ scalar_style = `Any;
24+ layout_style = `Any;
25+ indent = 2;
26+ width = 80;
27+ canonical = false;
28+ }
2930type state =
31 | Initial
32 | Stream_started
33 | Document_started
34+ | In_block_sequence of int (* indent level *)
35 | In_block_mapping_key of int
36 | In_block_mapping_value of int
37+ | In_block_mapping_first_key of
38+ int (* first key after "- ", no indent needed *)
39 | In_flow_sequence
40 | In_flow_mapping_key
41 | In_flow_mapping_value
···43 | Stream_ended
4445(** Output sink - either a Buffer or a bytesrw Writer *)
46+type sink = Buffer_sink of Buffer.t | Writer_sink of Bytesrw.Bytes.Writer.t
004748type t = {
49 config : config;
···55 mutable need_separator : bool;
56}
5758+let create ?(config = default_config) () =
59+ {
60+ config;
61+ sink = Buffer_sink (Buffer.create 1024);
62+ state = Initial;
63+ states = [];
64+ indent = 0;
65+ flow_level = 0;
66+ need_separator = false;
67+ }
6869(** Create an emitter that writes directly to a Bytes.Writer *)
70+let of_writer ?(config = default_config) writer =
71+ {
72+ config;
73+ sink = Writer_sink writer;
74+ state = Initial;
75+ states = [];
76+ indent = 0;
77+ flow_level = 0;
78+ need_separator = false;
79+ }
8081let contents t =
82 match t.sink with
83 | Buffer_sink buf -> Buffer.contents buf
84+ | Writer_sink _ -> "" (* No accumulated content for writer sink *)
8586let reset t =
87 (match t.sink with
88+ | Buffer_sink buf -> Buffer.clear buf
89+ | Writer_sink _ -> ());
90 t.state <- Initial;
91 t.states <- [];
92 t.indent <- 0;
···109110let write_indent t =
111 if t.indent <= 8 then
112+ for _ = 1 to t.indent do
113+ write_char t ' '
114+ done
115+ else write t (String.make t.indent ' ')
116117+let write_newline t = write_char t '\n'
0118119let push_state t s =
120 t.states <- t.state :: t.states;
···125 | s :: rest ->
126 t.state <- s;
127 t.states <- rest
128+ | [] -> t.state <- Stream_ended
0129130+(** Escape a string for double-quoted output. Uses a buffer to batch writes
131+ instead of character-by-character. *)
132let escape_double_quoted value =
133 let len = String.length value in
134 (* Check if any escaping is needed *)
···141 done;
142 if not !needs_escape then value
143 else begin
144+ let buf = Buffer.create (len + (len / 4)) in
145 for i = 0 to len - 1 do
146 match value.[i] with
147 | '"' -> Buffer.add_string buf "\\\""
···149 | '\n' -> Buffer.add_string buf "\\n"
150 | '\r' -> Buffer.add_string buf "\\r"
151 | '\t' -> Buffer.add_string buf "\\t"
152+ | c when c < ' ' ->
153+ Buffer.add_string buf (Printf.sprintf "\\x%02x" (Char.code c))
154 | c -> Buffer.add_char buf c
155 done;
156 Buffer.contents buf
···161 if not (String.contains value '\'') then value
162 else begin
163 let len = String.length value in
164+ let buf = Buffer.create (len + (len / 8)) in
165 for i = 0 to len - 1 do
166 let c = value.[i] in
167+ if c = '\'' then Buffer.add_string buf "''" else Buffer.add_char buf c
0168 done;
169 Buffer.contents buf
170 end
171172(** Write scalar with appropriate quoting *)
173let write_scalar t ?(style = `Any) value =
174+ match match style with `Any -> Quoting.choose_style value | s -> s with
175+ | `Plain | `Any -> write t value
0176 | `Single_quoted ->
177 write_char t '\'';
178 write t (escape_single_quoted value);
···184 | `Literal ->
185 write t "|";
186 write_newline t;
187+ String.split_on_char '\n' value
188+ |> List.iter (fun line ->
189+ write_indent t;
190+ write t line;
191+ write_newline t)
192 | `Folded ->
193 write t ">";
194 write_newline t;
195+ String.split_on_char '\n' value
196+ |> List.iter (fun line ->
197+ write_indent t;
198+ write t line;
199+ write_newline t)
200201(** Write anchor if present *)
202let write_anchor t anchor =
···221222let emit t (ev : Event.t) =
223 match ev with
224+ | Event.Stream_start _ -> t.state <- Stream_started
225+ | Event.Stream_end -> t.state <- Stream_ended
0000226 | Event.Document_start { version; implicit } ->
227 if not implicit then begin
228 (match version with
229+ | Some (maj, min) -> write t (Printf.sprintf "%%YAML %d.%d\n" maj min)
230+ | None -> ());
0231 write t "---";
232 write_newline t
233 end;
234 t.state <- Document_started
0235 | Event.Document_end { implicit } ->
236 if not implicit then begin
237 write t "...";
238 write_newline t
239 end;
240 t.state <- Document_ended
0241 | Event.Alias { anchor } ->
242 if t.flow_level > 0 then begin
243 if t.need_separator then write t ", ";
244 t.need_separator <- true;
245 write_char t '*';
246 write t anchor
247+ end
248+ else begin
249+ match t.state with
250+ | In_block_sequence _ ->
251+ write_indent t;
252+ write t "- *";
253+ write t anchor;
254+ write_newline t
255+ | In_block_mapping_key _ ->
256+ write_indent t;
257+ write_char t '*';
258+ write t anchor;
259+ write t ": ";
260+ t.state <- In_block_mapping_value t.indent
261+ | In_block_mapping_value indent ->
262+ write_char t '*';
263+ write t anchor;
264+ write_newline t;
265+ t.state <- In_block_mapping_key indent
266+ | _ ->
267+ write_char t '*';
268+ write t anchor;
269+ write_newline t
270 end
0271 | Event.Scalar { anchor; tag; value; plain_implicit; style; _ } ->
272 if t.flow_level > 0 then begin
273+ match t.state with
274+ | In_flow_mapping_key ->
275+ if t.need_separator then write t ", ";
276+ write_anchor t anchor;
277+ write_tag t ~implicit:plain_implicit tag;
278+ write_scalar t ~style value;
279+ write t ": ";
280+ t.need_separator <- false;
281+ t.state <- In_flow_mapping_value
282+ | In_flow_mapping_value ->
283+ if t.need_separator then begin
284+ (* We just finished a nested structure (array/mapping),
285 so this scalar is the next key, not a value *)
286+ write t ", ";
287+ write_anchor t anchor;
288+ write_tag t ~implicit:plain_implicit tag;
289+ write_scalar t ~style value;
290+ write t ": ";
291+ t.need_separator <- false;
292+ t.state <- In_flow_mapping_value
293+ end
294+ else begin
295+ (* Normal value scalar *)
296+ write_anchor t anchor;
297+ write_tag t ~implicit:plain_implicit tag;
298+ write_scalar t ~style value;
299+ t.need_separator <- true;
300+ t.state <- In_flow_mapping_key
301+ end
302+ | _ ->
303+ if t.need_separator then write t ", ";
304+ t.need_separator <- true;
305+ write_anchor t anchor;
306+ write_tag t ~implicit:plain_implicit tag;
307+ write_scalar t ~style value
308+ end
309+ else begin
310 match t.state with
311 | In_block_sequence _ ->
312 write_indent t;
···342 write_scalar t ~style value;
343 write_newline t
344 end
0345 | Event.Sequence_start { anchor; tag; implicit; style } ->
346 let use_flow = style = `Flow || t.flow_level > 0 in
347 if t.flow_level > 0 then begin
348+ match t.state with
349+ | In_flow_mapping_key ->
350+ if t.need_separator then write t ", ";
351+ write_anchor t anchor;
352+ write_tag t ~implicit tag;
353+ write_char t '[';
354+ t.flow_level <- t.flow_level + 1;
355+ t.need_separator <- false;
356+ push_state t In_flow_mapping_value;
357+ (* After ] we'll be in value position but sequence handles it *)
358+ t.state <- In_flow_sequence
359+ | In_flow_mapping_value ->
360+ write_anchor t anchor;
361+ write_tag t ~implicit tag;
362+ write_char t '[';
363+ t.flow_level <- t.flow_level + 1;
364+ t.need_separator <- false;
365+ push_state t In_flow_mapping_key;
366+ t.state <- In_flow_sequence
367+ | _ ->
368+ if t.need_separator then write t ", ";
369+ write_anchor t anchor;
370+ write_tag t ~implicit tag;
371+ write_char t '[';
372+ t.flow_level <- t.flow_level + 1;
373+ t.need_separator <- false;
374+ push_state t In_flow_sequence
375+ end
376+ else begin
377 match t.state with
378 | In_block_sequence _ ->
379 write_indent t;
···385 t.flow_level <- t.flow_level + 1;
386 t.need_separator <- false;
387 push_state t In_flow_sequence
388+ end
389+ else begin
390 write_newline t;
391 push_state t (In_block_sequence t.indent);
392 t.indent <- t.indent + t.config.indent
···420 (* Save key state to return to after flow sequence *)
421 t.state <- In_block_mapping_key indent;
422 push_state t In_flow_sequence
423+ end
424+ else begin
425 write_newline t;
426 (* Save key state to return to after nested sequence *)
427 t.state <- In_block_mapping_key indent;
···436 t.flow_level <- t.flow_level + 1;
437 t.need_separator <- false;
438 push_state t In_flow_sequence
439+ end
440+ else begin
441 push_state t (In_block_sequence t.indent);
442 t.state <- In_block_sequence t.indent
443 end
444 end
0445 | Event.Sequence_end ->
446 if t.flow_level > 0 then begin
447 write_char t ']';
···449 t.need_separator <- true;
450 pop_state t;
451 (* Write newline if returning to block context *)
452+ match t.state with
453+ | In_block_mapping_key _ | In_block_sequence _ -> write_newline t
454+ | _ -> ()
455+ end
456+ else begin
457 t.indent <- t.indent - t.config.indent;
458 pop_state t
459 end
0460 | Event.Mapping_start { anchor; tag; implicit; style } ->
461 let use_flow = style = `Flow || t.flow_level > 0 in
462 if t.flow_level > 0 then begin
463+ match t.state with
464+ | In_flow_mapping_key ->
465+ if t.need_separator then write t ", ";
466+ write_anchor t anchor;
467+ write_tag t ~implicit tag;
468+ write_char t '{';
469+ t.flow_level <- t.flow_level + 1;
470+ t.need_separator <- false;
471+ push_state t In_flow_mapping_value;
472+ t.state <- In_flow_mapping_key
473+ | In_flow_mapping_value ->
474+ write_anchor t anchor;
475+ write_tag t ~implicit tag;
476+ write_char t '{';
477+ t.flow_level <- t.flow_level + 1;
478+ t.need_separator <- false;
479+ push_state t In_flow_mapping_key;
480+ t.state <- In_flow_mapping_key
481+ | _ ->
482+ if t.need_separator then write t ", ";
483+ write_anchor t anchor;
484+ write_tag t ~implicit tag;
485+ write_char t '{';
486+ t.flow_level <- t.flow_level + 1;
487+ t.need_separator <- false;
488+ push_state t In_flow_mapping_key
489+ end
490+ else begin
491 match t.state with
492 | In_block_sequence _ ->
493 write_indent t;
···499 t.flow_level <- t.flow_level + 1;
500 t.need_separator <- false;
501 push_state t In_flow_mapping_key
502+ end
503+ else begin
504 (* Don't write newline - first key goes on same line as "- " *)
505 push_state t (In_block_sequence t.indent);
506 t.indent <- t.indent + t.config.indent;
···535 (* Save key state to return to after flow mapping *)
536 t.state <- In_block_mapping_key indent;
537 push_state t In_flow_mapping_key
538+ end
539+ else begin
540 write_newline t;
541 (* Save key state to return to after nested mapping *)
542 t.state <- In_block_mapping_key indent;
···551 t.flow_level <- t.flow_level + 1;
552 t.need_separator <- false;
553 push_state t In_flow_mapping_key
554+ end
555+ else begin
556 push_state t (In_block_mapping_key t.indent);
557 t.state <- In_block_mapping_key t.indent
558 end
559 end
0560 | Event.Mapping_end ->
561 if t.flow_level > 0 then begin
562 write_char t '}';
···564 t.need_separator <- true;
565 pop_state t;
566 (* Write newline if returning to block context *)
567+ match t.state with
568+ | In_block_mapping_key _ | In_block_sequence _ -> write_newline t
569+ | _ -> ()
570+ end
571+ else begin
572 t.indent <- t.indent - t.config.indent;
573 pop_state t
574 end
575576+(** Access to the underlying buffer for advanced use. Returns None if emitter is
577+ writing to a Writer instead of Buffer. *)
578let buffer t =
579+ match t.sink with Buffer_sink buf -> Some buf | Writer_sink _ -> None
00580581(** Get config *)
582let config t = t.config
583584(** Check if emitter is writing to a Writer *)
585let is_streaming t =
586+ match t.sink with Writer_sink _ -> true | Buffer_sink _ -> false
00587588(** Flush the writer sink (no-op for buffer sink) *)
589let flush t =
+12-20
lib/encoding.ml
···56(** Character encoding detection and handling *)
78-type t = [
9- | `Utf8
10- | `Utf16be
11- | `Utf16le
12- | `Utf32be
13- | `Utf32le
14-]
1516let to_string = function
17 | `Utf8 -> "UTF-8"
···20 | `Utf32be -> "UTF-32BE"
21 | `Utf32le -> "UTF-32LE"
2223-let pp fmt t =
24- Format.pp_print_string fmt (to_string t)
2526-(** Detect encoding from BOM or first bytes.
27- Returns (encoding, bom_length) *)
28let detect s =
29 let len = String.length s in
30 if len = 0 then (`Utf8, 0)
···35 let b3 = if len > 3 then Char.code s.[3] else 0 in
36 match (b0, b1, b2, b3) with
37 (* BOM patterns *)
38- | (0xEF, 0xBB, 0xBF, _) -> (`Utf8, 3)
39- | (0xFE, 0xFF, _, _) -> (`Utf16be, 2)
40- | (0xFF, 0xFE, 0x00, 0x00) -> (`Utf32le, 4)
41- | (0xFF, 0xFE, _, _) -> (`Utf16le, 2)
42- | (0x00, 0x00, 0xFE, 0xFF) -> (`Utf32be, 4)
43 (* Content pattern detection (no BOM) *)
44- | (0x00, 0x00, 0x00, b3) when b3 <> 0x00 -> (`Utf32be, 0)
45- | (b0, 0x00, 0x00, 0x00) when b0 <> 0x00 -> (`Utf32le, 0)
46- | (0x00, b1, _, _) when b1 <> 0x00 -> (`Utf16be, 0)
47- | (b0, 0x00, _, _) when b0 <> 0x00 -> (`Utf16le, 0)
48 | _ -> (`Utf8, 0)
4950let equal a b = a = b
···56(** Character encoding detection and handling *)
78+type t = [ `Utf8 | `Utf16be | `Utf16le | `Utf32be | `Utf32le ]
000000910let to_string = function
11 | `Utf8 -> "UTF-8"
···14 | `Utf32be -> "UTF-32BE"
15 | `Utf32le -> "UTF-32LE"
1617+let pp fmt t = Format.pp_print_string fmt (to_string t)
01819+(** Detect encoding from BOM or first bytes. Returns (encoding, bom_length) *)
020let detect s =
21 let len = String.length s in
22 if len = 0 then (`Utf8, 0)
···27 let b3 = if len > 3 then Char.code s.[3] else 0 in
28 match (b0, b1, b2, b3) with
29 (* BOM patterns *)
30+ | 0xEF, 0xBB, 0xBF, _ -> (`Utf8, 3)
31+ | 0xFE, 0xFF, _, _ -> (`Utf16be, 2)
32+ | 0xFF, 0xFE, 0x00, 0x00 -> (`Utf32le, 4)
33+ | 0xFF, 0xFE, _, _ -> (`Utf16le, 2)
34+ | 0x00, 0x00, 0xFE, 0xFF -> (`Utf32be, 4)
35 (* Content pattern detection (no BOM) *)
36+ | 0x00, 0x00, 0x00, b3 when b3 <> 0x00 -> (`Utf32be, 0)
37+ | b0, 0x00, 0x00, 0x00 when b0 <> 0x00 -> (`Utf32le, 0)
38+ | 0x00, b1, _, _ when b1 <> 0x00 -> (`Utf16be, 0)
39+ | b0, 0x00, _, _ when b0 <> 0x00 -> (`Utf16le, 0)
40 | _ -> (`Utf8, 0)
4142let equal a b = a = b
+135-105
lib/error.ml
···78 Comprehensive error reporting for YAML parsing and emission.
910- This module provides detailed error types that correspond to various
11- failure modes in YAML processing, as specified in the
12 {{:https://yaml.org/spec/1.2.2/}YAML 1.2.2 specification}.
1314 Each error includes:
···17 - A context stack showing where the error occurred
18 - Optional source text for error display
1920- See also {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)}
21- for background on the YAML processing model. *)
02223(** {2 Error Classification}
24···31 (* Scanner errors - see {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1} *)
32 | Unexpected_character of char
33 (** Invalid character in input. See
34- {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1 (Character Set)}. *)
35- | Unexpected_eof
36- (** Premature end of input. *)
37 | Invalid_escape_sequence of string
38 (** Invalid escape in double-quoted string. See
39- {{:https://yaml.org/spec/1.2.2/#57-escaped-characters}Section 5.7 (Escaped Characters)}. *)
040 | Invalid_unicode_escape of string
41 (** Invalid Unicode escape sequence (\uXXXX or \UXXXXXXXX). *)
42 | Invalid_hex_escape of string
43 (** Invalid hexadecimal escape sequence (\xXX). *)
44 | Invalid_tag of string
45 (** Malformed tag syntax. See
46- {{:https://yaml.org/spec/1.2.2/#681-node-tags}Section 6.8.1 (Node Tags)}. *)
047 | Invalid_anchor of string
48 (** Malformed anchor name. See
49- {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
050 | Invalid_alias of string
51 (** Malformed alias reference. See
52- {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
053 | Invalid_comment
54 (** Comment not properly separated from content. See
55- {{:https://yaml.org/spec/1.2.2/#62-comments}Section 6.2 (Comments)}. *)
056 | Unclosed_single_quote
57 (** Unterminated single-quoted scalar. See
58- {{:https://yaml.org/spec/1.2.2/#72-single-quoted-style}Section 7.2 (Single-Quoted Style)}. *)
059 | Unclosed_double_quote
60 (** Unterminated double-quoted scalar. See
61- {{:https://yaml.org/spec/1.2.2/#73-double-quoted-style}Section 7.3 (Double-Quoted Style)}. *)
062 | Unclosed_flow_sequence
63 (** Missing closing bracket \] for flow sequence. See
64- {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow Sequences)}. *)
065 | Unclosed_flow_mapping
66 (** Missing closing brace \} for flow mapping. See
67- {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *)
068 | Invalid_indentation of int * int
69 (** Incorrect indentation level (expected, got). See
70- {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1 (Indentation Spaces)}. *)
071 | Invalid_flow_indentation
72 (** Content in flow collection must be indented. See
73- {{:https://yaml.org/spec/1.2.2/#74-flow-styles}Section 7.4 (Flow Styles)}. *)
074 | Tab_in_indentation
75 (** Tab character used for indentation (only spaces allowed). See
76- {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1 (Indentation Spaces)}. *)
077 | Invalid_block_scalar_header of string
78 (** Malformed block scalar header (| or >). See
79- {{:https://yaml.org/spec/1.2.2/#81-block-scalar-styles}Section 8.1 (Block Scalar Styles)}. *)
080 | Invalid_quoted_scalar_indentation of string
81 (** Incorrect indentation in quoted scalar. *)
82 | Invalid_directive of string
83 (** Malformed directive. See
84- {{:https://yaml.org/spec/1.2.2/#68-directives}Section 6.8 (Directives)}. *)
085 | Invalid_yaml_version of string
86 (** Unsupported YAML version in %YAML directive. See
87- {{:https://yaml.org/spec/1.2.2/#681-yaml-directives}Section 6.8.1 (YAML Directives)}. *)
088 | Invalid_tag_directive of string
89 (** Malformed %TAG directive. See
90- {{:https://yaml.org/spec/1.2.2/#682-tag-directives}Section 6.8.2 (TAG Directives)}. *)
091 | Reserved_directive of string
92 (** Reserved directive name. See
93- {{:https://yaml.org/spec/1.2.2/#683-reserved-directives}Section 6.8.3 (Reserved Directives)}. *)
094 | Illegal_flow_key_line
95 (** Key and colon must be on same line in flow context. See
96- {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *)
097 | Block_sequence_disallowed
98 (** Block sequence entries not allowed in this context. See
99- {{:https://yaml.org/spec/1.2.2/#82-block-collection-styles}Section 8.2 (Block Collection Styles)}. *)
100-101 (* Parser errors - see {{:https://yaml.org/spec/1.2.2/#3-processing-yaml-information}Section 3 (Processing)} *)
102- | Unexpected_token of string
103- (** Unexpected token in event stream. *)
104 | Expected_document_start
105 (** Expected document start marker (---). See
106- {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *)
0107 | Expected_document_end
108 (** Expected document end marker (...). See
109- {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *)
0110 | Expected_block_entry
111 (** Expected block sequence entry marker (-). See
112- {{:https://yaml.org/spec/1.2.2/#821-block-sequences}Section 8.2.1 (Block Sequences)}. *)
0113 | Expected_key
114 (** Expected mapping key. See
115- {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2 (Block Mappings)}. *)
0116 | Expected_value
117 (** Expected mapping value after colon. See
118- {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2 (Block Mappings)}. *)
119- | Expected_node
120- (** Expected a YAML node. *)
121- | Expected_scalar
122- (** Expected a scalar value. *)
123 | Expected_sequence_end
124 (** Expected closing bracket \] for flow sequence. See
125- {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow Sequences)}. *)
0126 | Expected_mapping_end
127 (** Expected closing brace \} for flow mapping. See
128- {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *)
0129 | Duplicate_anchor of string
130 (** Anchor name defined multiple times. See
131- {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
0132 | Undefined_alias of string
133 (** Alias references non-existent anchor. See
134- {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
0135 | Alias_cycle of string
136 (** Circular reference in alias chain. See
137- {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
0138 | Multiple_documents
139 (** Multiple documents found when single document expected. See
140- {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *)
0141 | Mapping_key_too_long
142 (** Mapping key exceeds maximum length (1024 characters). *)
143-144 (* Loader errors - see {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} *)
145 | Invalid_scalar_conversion of string * string
146- (** Cannot convert scalar value to target type (value, target type).
147- See {{:https://yaml.org/spec/1.2.2/#103-core-schema}Section 10.3 (Core Schema)}. *)
0148 | Type_mismatch of string * string
149 (** Value has wrong type for operation (expected, got). *)
150 | Unresolved_alias of string
151- (** Alias encountered during conversion but not resolved.
152- See {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
153- | Key_not_found of string
154- (** Mapping key not found. *)
155 | Alias_expansion_node_limit of int
156- (** Alias expansion exceeded maximum node count (protection against billion laughs attack).
157- See {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}.
00158159- The "billion laughs attack" (also known as an XML bomb) is a denial-of-service
160- attack where a small YAML document expands to enormous size through recursive
161- alias expansion. This limit prevents such attacks. *)
0162 | Alias_expansion_depth_limit of int
163- (** Alias expansion exceeded maximum nesting depth (protection against deeply nested aliases).
164- See {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}. *)
165-0166 (* Emitter errors *)
167 | Invalid_encoding of string
168 (** Invalid character encoding specified. See
169- {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1 (Character Set)}. *)
0170 | Scalar_contains_invalid_chars of string
171 (** Scalar contains characters invalid for chosen style. *)
172- | Anchor_not_set
173- (** Attempted to emit alias before anchor was defined. *)
174 | Invalid_state of string
175 (** Emitter in invalid state for requested operation. *)
176-177 (* Generic *)
178- | Custom of string
179- (** Custom error message. *)
180181-(** {2 Error Value}
182-183- Full error information including classification, location, and context. *)
184type t = {
185- kind : kind;
186- (** The specific error classification. *)
187 span : Span.t option;
188 (** Source location where the error occurred (if available). *)
189 context : string list;
···191 source : string option;
192 (** Source text for displaying the error in context. *)
193}
0194000195(** {2 Exception}
196197 The main exception type raised by all yamlrw operations.
198199- All parsing, loading, and emitting errors are reported by raising
200- this exception with detailed error information. *)
201-exception Yamlrw_error of t
202203let () =
204 Printexc.register_printer (function
205 | Yamlrw_error e ->
206- let loc = match e.span with
0207 | None -> ""
208 | Some span -> " at " ^ Span.to_string span
209 in
210- Some (Printf.sprintf "Yamlrw_error: %s%s"
211- (match e.kind with Custom s -> s | _ -> "error") loc)
00212 | _ -> None)
213214(** {2 Error Construction} *)
···219 @param context Context stack (defaults to empty)
220 @param source Source text
221 @param kind Error classification *)
222-let make ?span ?(context=[]) ?source kind =
223- { kind; span; context; source }
224225(** [raise ?span ?context ?source kind] constructs and raises an error.
226···248 @param span Source span
249 @param kind Error classification
250 @raise Yamlrw_error *)
251-let raise_span span kind =
252- raise ~span kind
253254-(** [with_context ctx f] executes [f ()] and adds [ctx] to any raised error's context.
0255256 This is useful for tracking the processing path through nested structures.
257258 @param ctx Context description (e.g., "parsing mapping key")
259 @param f Function to execute *)
260let with_context ctx f =
261- try f () with
262- | Yamlrw_error e ->
263- Stdlib.raise (Yamlrw_error { e with context = ctx :: e.context })
264265(** {2 Error Formatting} *)
266···274 | Invalid_tag s -> Printf.sprintf "invalid tag: %s" s
275 | Invalid_anchor s -> Printf.sprintf "invalid anchor: %s" s
276 | Invalid_alias s -> Printf.sprintf "invalid alias: %s" s
277- | Invalid_comment -> "comments must be separated from other tokens by whitespace"
0278 | Unclosed_single_quote -> "unclosed single quote"
279 | Unclosed_double_quote -> "unclosed double quote"
280 | Unclosed_flow_sequence -> "unclosed flow sequence '['"
···285 | Tab_in_indentation -> "tab character in indentation"
286 | Invalid_block_scalar_header s ->
287 Printf.sprintf "invalid block scalar header: %s" s
288- | Invalid_quoted_scalar_indentation s ->
289- Printf.sprintf "%s" s
290 | Invalid_directive s -> Printf.sprintf "invalid directive: %s" s
291 | Invalid_yaml_version s -> Printf.sprintf "invalid YAML version: %s" s
292 | Invalid_tag_directive s -> Printf.sprintf "invalid TAG directive: %s" s
293 | Reserved_directive s -> Printf.sprintf "reserved directive: %s" s
294- | Illegal_flow_key_line -> "key and ':' must be on the same line in flow context"
295- | Block_sequence_disallowed -> "block sequence entries are not allowed in this context"
00296 | Unexpected_token s -> Printf.sprintf "unexpected token: %s" s
297 | Expected_document_start -> "expected document start '---'"
298 | Expected_document_end -> "expected document end '...'"
···329330 Includes error kind, source location (if available), and context stack. *)
331let to_string t =
332- let loc = match t.span with
333- | None -> ""
334- | Some span -> " at " ^ Span.to_string span
335 in
336- let ctx = match t.context with
0337 | [] -> ""
338 | ctxs -> " (in " ^ String.concat " > " (List.rev ctxs) ^ ")"
339 in
340 kind_to_string t.kind ^ loc ^ ctx
341342(** [pp fmt t] pretty-prints an error to a formatter. *)
343-let pp fmt t =
344- Format.fprintf fmt "Yamlrw error: %s" (to_string t)
345346(** [pp_with_source ~source fmt t] pretty-prints an error with source context.
347348- Shows the error message followed by the relevant source line with
349- a caret (^) pointing to the error location.
350351 @param source The source text
352 @param fmt Output formatter
353 @param t The error to display *)
354let pp_with_source ~source fmt t =
355-let extract_line source line_num =
356- let lines = String.split_on_char '\n' source in
357- if line_num >= 1 && line_num <= List.length lines then
358- Some (List.nth lines (line_num - 1))
359- else
360- None
361 in
362363 pp fmt t;
364 match t.span with
365 | None -> ()
366- | Some span ->
367 match extract_line source span.start.line with
368 | None -> ()
369 | Some line ->
370 Format.fprintf fmt "\n %d | %s\n" span.start.line line;
371 let padding = String.make (span.start.column - 1) ' ' in
372- Format.fprintf fmt " | %s^" padding
···78 Comprehensive error reporting for YAML parsing and emission.
910+ This module provides detailed error types that correspond to various failure
11+ modes in YAML processing, as specified in the
12 {{:https://yaml.org/spec/1.2.2/}YAML 1.2.2 specification}.
1314 Each error includes:
···17 - A context stack showing where the error occurred
18 - Optional source text for error display
1920+ See also
21+ {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} for
22+ background on the YAML processing model. *)
2324(** {2 Error Classification}
25···32 (* Scanner errors - see {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1} *)
33 | Unexpected_character of char
34 (** Invalid character in input. See
35+ {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1
36+ (Character Set)}. *)
37+ | Unexpected_eof (** Premature end of input. *)
38 | Invalid_escape_sequence of string
39 (** Invalid escape in double-quoted string. See
40+ {{:https://yaml.org/spec/1.2.2/#57-escaped-characters}Section 5.7
41+ (Escaped Characters)}. *)
42 | Invalid_unicode_escape of string
43 (** Invalid Unicode escape sequence (\uXXXX or \UXXXXXXXX). *)
44 | Invalid_hex_escape of string
45 (** Invalid hexadecimal escape sequence (\xXX). *)
46 | Invalid_tag of string
47 (** Malformed tag syntax. See
48+ {{:https://yaml.org/spec/1.2.2/#681-node-tags}Section 6.8.1 (Node
49+ Tags)}. *)
50 | Invalid_anchor of string
51 (** Malformed anchor name. See
52+ {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section
53+ 3.2.2.2 (Anchors and Aliases)}. *)
54 | Invalid_alias of string
55 (** Malformed alias reference. See
56+ {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section
57+ 3.2.2.2 (Anchors and Aliases)}. *)
58 | Invalid_comment
59 (** Comment not properly separated from content. See
60+ {{:https://yaml.org/spec/1.2.2/#62-comments}Section 6.2 (Comments)}.
61+ *)
62 | Unclosed_single_quote
63 (** Unterminated single-quoted scalar. See
64+ {{:https://yaml.org/spec/1.2.2/#72-single-quoted-style}Section 7.2
65+ (Single-Quoted Style)}. *)
66 | Unclosed_double_quote
67 (** Unterminated double-quoted scalar. See
68+ {{:https://yaml.org/spec/1.2.2/#73-double-quoted-style}Section 7.3
69+ (Double-Quoted Style)}. *)
70 | Unclosed_flow_sequence
71 (** Missing closing bracket \] for flow sequence. See
72+ {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow
73+ Sequences)}. *)
74 | Unclosed_flow_mapping
75 (** Missing closing brace \} for flow mapping. See
76+ {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow
77+ Mappings)}. *)
78 | Invalid_indentation of int * int
79 (** Incorrect indentation level (expected, got). See
80+ {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1
81+ (Indentation Spaces)}. *)
82 | Invalid_flow_indentation
83 (** Content in flow collection must be indented. See
84+ {{:https://yaml.org/spec/1.2.2/#74-flow-styles}Section 7.4 (Flow
85+ Styles)}. *)
86 | Tab_in_indentation
87 (** Tab character used for indentation (only spaces allowed). See
88+ {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1
89+ (Indentation Spaces)}. *)
90 | Invalid_block_scalar_header of string
91 (** Malformed block scalar header (| or >). See
92+ {{:https://yaml.org/spec/1.2.2/#81-block-scalar-styles}Section 8.1
93+ (Block Scalar Styles)}. *)
94 | Invalid_quoted_scalar_indentation of string
95 (** Incorrect indentation in quoted scalar. *)
96 | Invalid_directive of string
97 (** Malformed directive. See
98+ {{:https://yaml.org/spec/1.2.2/#68-directives}Section 6.8
99+ (Directives)}. *)
100 | Invalid_yaml_version of string
101 (** Unsupported YAML version in %YAML directive. See
102+ {{:https://yaml.org/spec/1.2.2/#681-yaml-directives}Section 6.8.1
103+ (YAML Directives)}. *)
104 | Invalid_tag_directive of string
105 (** Malformed %TAG directive. See
106+ {{:https://yaml.org/spec/1.2.2/#682-tag-directives}Section 6.8.2 (TAG
107+ Directives)}. *)
108 | Reserved_directive of string
109 (** Reserved directive name. See
110+ {{:https://yaml.org/spec/1.2.2/#683-reserved-directives}Section 6.8.3
111+ (Reserved Directives)}. *)
112 | Illegal_flow_key_line
113 (** Key and colon must be on same line in flow context. See
114+ {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow
115+ Mappings)}. *)
116 | Block_sequence_disallowed
117 (** Block sequence entries not allowed in this context. See
118+ {{:https://yaml.org/spec/1.2.2/#82-block-collection-styles}Section 8.2
119+ (Block Collection Styles)}. *)
120 (* Parser errors - see {{:https://yaml.org/spec/1.2.2/#3-processing-yaml-information}Section 3 (Processing)} *)
121+ | Unexpected_token of string (** Unexpected token in event stream. *)
0122 | Expected_document_start
123 (** Expected document start marker (---). See
124+ {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2
125+ (Document Markers)}. *)
126 | Expected_document_end
127 (** Expected document end marker (...). See
128+ {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2
129+ (Document Markers)}. *)
130 | Expected_block_entry
131 (** Expected block sequence entry marker (-). See
132+ {{:https://yaml.org/spec/1.2.2/#821-block-sequences}Section 8.2.1
133+ (Block Sequences)}. *)
134 | Expected_key
135 (** Expected mapping key. See
136+ {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2
137+ (Block Mappings)}. *)
138 | Expected_value
139 (** Expected mapping value after colon. See
140+ {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2
141+ (Block Mappings)}. *)
142+ | Expected_node (** Expected a YAML node. *)
143+ | Expected_scalar (** Expected a scalar value. *)
0144 | Expected_sequence_end
145 (** Expected closing bracket \] for flow sequence. See
146+ {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow
147+ Sequences)}. *)
148 | Expected_mapping_end
149 (** Expected closing brace \} for flow mapping. See
150+ {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow
151+ Mappings)}. *)
152 | Duplicate_anchor of string
153 (** Anchor name defined multiple times. See
154+ {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section
155+ 3.2.2.2 (Anchors and Aliases)}. *)
156 | Undefined_alias of string
157 (** Alias references non-existent anchor. See
158+ {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section
159+ 3.2.2.2 (Anchors and Aliases)}. *)
160 | Alias_cycle of string
161 (** Circular reference in alias chain. See
162+ {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section
163+ 3.2.2.2 (Anchors and Aliases)}. *)
164 | Multiple_documents
165 (** Multiple documents found when single document expected. See
166+ {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2
167+ (Document Markers)}. *)
168 | Mapping_key_too_long
169 (** Mapping key exceeds maximum length (1024 characters). *)
0170 (* Loader errors - see {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} *)
171 | Invalid_scalar_conversion of string * string
172+ (** Cannot convert scalar value to target type (value, target type). See
173+ {{:https://yaml.org/spec/1.2.2/#103-core-schema}Section 10.3 (Core
174+ Schema)}. *)
175 | Type_mismatch of string * string
176 (** Value has wrong type for operation (expected, got). *)
177 | Unresolved_alias of string
178+ (** Alias encountered during conversion but not resolved. See
179+ {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section
180+ 3.2.2.2 (Anchors and Aliases)}. *)
181+ | Key_not_found of string (** Mapping key not found. *)
182 | Alias_expansion_node_limit of int
183+ (** Alias expansion exceeded maximum node count (protection against
184+ billion laughs attack). See
185+ {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1
186+ (Processes)}.
187188+ The "billion laughs attack" (also known as an XML bomb) is a
189+ denial-of-service attack where a small YAML document expands to
190+ enormous size through recursive alias expansion. This limit prevents
191+ such attacks. *)
192 | Alias_expansion_depth_limit of int
193+ (** Alias expansion exceeded maximum nesting depth (protection against
194+ deeply nested aliases). See
195+ {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1
196+ (Processes)}. *)
197 (* Emitter errors *)
198 | Invalid_encoding of string
199 (** Invalid character encoding specified. See
200+ {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1
201+ (Character Set)}. *)
202 | Scalar_contains_invalid_chars of string
203 (** Scalar contains characters invalid for chosen style. *)
204+ | Anchor_not_set (** Attempted to emit alias before anchor was defined. *)
0205 | Invalid_state of string
206 (** Emitter in invalid state for requested operation. *)
0207 (* Generic *)
208+ | Custom of string (** Custom error message. *)
0209000210type t = {
211+ kind : kind; (** The specific error classification. *)
0212 span : Span.t option;
213 (** Source location where the error occurred (if available). *)
214 context : string list;
···216 source : string option;
217 (** Source text for displaying the error in context. *)
218}
219+(** {2 Error Value}
220221+ Full error information including classification, location, and context. *)
222+223+exception Yamlrw_error of t
224(** {2 Exception}
225226 The main exception type raised by all yamlrw operations.
227228+ All parsing, loading, and emitting errors are reported by raising this
229+ exception with detailed error information. *)
0230231let () =
232 Printexc.register_printer (function
233 | Yamlrw_error e ->
234+ let loc =
235+ match e.span with
236 | None -> ""
237 | Some span -> " at " ^ Span.to_string span
238 in
239+ Some
240+ (Printf.sprintf "Yamlrw_error: %s%s"
241+ (match e.kind with Custom s -> s | _ -> "error")
242+ loc)
243 | _ -> None)
244245(** {2 Error Construction} *)
···250 @param context Context stack (defaults to empty)
251 @param source Source text
252 @param kind Error classification *)
253+let make ?span ?(context = []) ?source kind = { kind; span; context; source }
0254255(** [raise ?span ?context ?source kind] constructs and raises an error.
256···278 @param span Source span
279 @param kind Error classification
280 @raise Yamlrw_error *)
281+let raise_span span kind = raise ~span kind
0282283+(** [with_context ctx f] executes [f ()] and adds [ctx] to any raised error's
284+ context.
285286 This is useful for tracking the processing path through nested structures.
287288 @param ctx Context description (e.g., "parsing mapping key")
289 @param f Function to execute *)
290let with_context ctx f =
291+ try f ()
292+ with Yamlrw_error e ->
293+ Stdlib.raise (Yamlrw_error { e with context = ctx :: e.context })
294295(** {2 Error Formatting} *)
296···304 | Invalid_tag s -> Printf.sprintf "invalid tag: %s" s
305 | Invalid_anchor s -> Printf.sprintf "invalid anchor: %s" s
306 | Invalid_alias s -> Printf.sprintf "invalid alias: %s" s
307+ | Invalid_comment ->
308+ "comments must be separated from other tokens by whitespace"
309 | Unclosed_single_quote -> "unclosed single quote"
310 | Unclosed_double_quote -> "unclosed double quote"
311 | Unclosed_flow_sequence -> "unclosed flow sequence '['"
···316 | Tab_in_indentation -> "tab character in indentation"
317 | Invalid_block_scalar_header s ->
318 Printf.sprintf "invalid block scalar header: %s" s
319+ | Invalid_quoted_scalar_indentation s -> Printf.sprintf "%s" s
0320 | Invalid_directive s -> Printf.sprintf "invalid directive: %s" s
321 | Invalid_yaml_version s -> Printf.sprintf "invalid YAML version: %s" s
322 | Invalid_tag_directive s -> Printf.sprintf "invalid TAG directive: %s" s
323 | Reserved_directive s -> Printf.sprintf "reserved directive: %s" s
324+ | Illegal_flow_key_line ->
325+ "key and ':' must be on the same line in flow context"
326+ | Block_sequence_disallowed ->
327+ "block sequence entries are not allowed in this context"
328 | Unexpected_token s -> Printf.sprintf "unexpected token: %s" s
329 | Expected_document_start -> "expected document start '---'"
330 | Expected_document_end -> "expected document end '...'"
···361362 Includes error kind, source location (if available), and context stack. *)
363let to_string t =
364+ let loc =
365+ match t.span with None -> "" | Some span -> " at " ^ Span.to_string span
0366 in
367+ let ctx =
368+ match t.context with
369 | [] -> ""
370 | ctxs -> " (in " ^ String.concat " > " (List.rev ctxs) ^ ")"
371 in
372 kind_to_string t.kind ^ loc ^ ctx
373374(** [pp fmt t] pretty-prints an error to a formatter. *)
375+let pp fmt t = Format.fprintf fmt "Yamlrw error: %s" (to_string t)
0376377(** [pp_with_source ~source fmt t] pretty-prints an error with source context.
378379+ Shows the error message followed by the relevant source line with a caret
380+ (^) pointing to the error location.
381382 @param source The source text
383 @param fmt Output formatter
384 @param t The error to display *)
385let pp_with_source ~source fmt t =
386+ let extract_line source line_num =
387+ let lines = String.split_on_char '\n' source in
388+ if line_num >= 1 && line_num <= List.length lines then
389+ Some (List.nth lines (line_num - 1))
390+ else None
0391 in
392393 pp fmt t;
394 match t.span with
395 | None -> ()
396+ | Some span -> (
397 match extract_line source span.start.line with
398 | None -> ()
399 | Some line ->
400 Format.fprintf fmt "\n %d | %s\n" span.start.line line;
401 let padding = String.make (span.start.column - 1) ' ' in
402+ Format.fprintf fmt " | %s^" padding)
···56(** Character input source with lookahead, based on Bytes.Reader.t
78- This module wraps a bytesrw [Bytes.Reader.t] to provide character-by-character
9- access with lookahead for the YAML scanner. Uses bytesrw's sniff and push_back
10- for efficient lookahead without excessive copying.
01112 The same input type works with any reader source: strings, files, channels,
13 or streaming sources like Eio. *)
1415open Bytesrw
1617-(** Re-export character classification *)
18include Char_class
01920type t = {
21 reader : Bytes.Reader.t;
22- mutable current_slice : Bytes.Slice.t option; (** Current slice being consumed *)
23- mutable slice_pos : int; (** Position within current slice *)
24- mutable position : Position.t; (** Line/column tracking *)
025}
2627(** Ensure we have a current slice. Returns true if data available. *)
···33 if Bytes.Slice.is_eod slice then begin
34 t.current_slice <- None;
35 false
36- end else begin
037 t.current_slice <- Some slice;
38 t.slice_pos <- 0;
39 true
···5051(** Create input from a Bytes.Reader.t *)
52let of_reader ?(initial_position = Position.initial) reader =
53- let t = {
54- reader;
55- current_slice = None;
56- slice_pos = 0;
57- position = initial_position;
58- } in
59 (* Use sniff for BOM detection - this is exactly what sniff is for *)
60 let sample = Bytes.Reader.sniff 4 t.reader in
61 let bom_len =
62- if String.length sample >= 3 &&
63- sample.[0] = '\xEF' &&
64- sample.[1] = '\xBB' &&
65- sample.[2] = '\xBF'
66- then 3 (* UTF-8 BOM *)
067 else 0
68 in
69 (* Skip BOM if present *)
70- if bom_len > 0 then
71- Bytes.Reader.skip bom_len t.reader;
72 t
7374(** Create input from a string *)
···77 of_reader reader
7879let position t = t.position
80-81-let is_eof t =
82- not (ensure_slice t)
83-84-let peek t =
85- if ensure_slice t then
86- peek_current t
87- else
88- None
8990let peek_exn t =
91 match peek t with
···112 let sample_offset = n - slice_remaining in
113 if sample_offset < String.length sample then
114 Some sample.[sample_offset]
115- else
116- None
117 end
118- | None ->
119- if n < String.length sample then
120- Some sample.[n]
121- else
122- None
123 end
124125(** Peek at up to n characters as a string *)
···139 let needed_from_reader = n - slice_remaining in
140 let sample = Bytes.Reader.sniff needed_from_reader t.reader in
141 let buf = Buffer.create n in
142- Buffer.add_subbytes buf slice_bytes (slice_first + t.slice_pos) slice_remaining;
00143 Buffer.add_string buf sample;
144 Buffer.contents buf
145 end
146- | None ->
147- if ensure_slice t then
148- peek_string t n
149- else
150- ""
151 end
152153(** Consume next character *)
···161 t.slice_pos <- t.slice_pos + 1;
162 t.position <- Position.advance_char c t.position;
163 (* Check if we've exhausted this slice *)
164- if t.slice_pos >= Bytes.Slice.length slice then
165- t.current_slice <- None;
166 Some c
167 | None -> None
168- end else
169- None
170171let next_exn t =
172 match next t with
···181let skip_while t pred =
182 let rec loop () =
183 match peek t with
184- | Some c when pred c -> ignore (next t); loop ()
00185 | _ -> ()
186 in
187 loop ()
188189(** Check if next char satisfies predicate *)
190-let next_is pred t =
191- match peek t with
192- | None -> false
193- | Some c -> pred c
194195let next_is_break t = next_is is_break t
196let next_is_blank t = next_is is_blank t
···209 if len < 3 then false
210 else
211 let prefix = String.sub s 0 3 in
212- (prefix = "---" || prefix = "...") &&
213- (len = 3 || is_whitespace s.[3])
214 end
215216(** Consume line break, handling \r\n as single break *)
217let consume_break t =
218 match peek t with
219- | Some '\r' ->
220 ignore (next t);
221- (match peek t with
222- | Some '\n' -> ignore (next t)
223- | _ -> ())
224- | Some '\n' ->
225- ignore (next t)
226 | _ -> ()
227228(** Get remaining content from current position *)
···230 let buf = Buffer.create 256 in
231 (* Add current slice remainder *)
232 (match t.current_slice with
233- | Some slice ->
234- let bytes = Bytes.Slice.bytes slice in
235- let first = Bytes.Slice.first slice in
236- let remaining = Bytes.Slice.length slice - t.slice_pos in
237- if remaining > 0 then
238- Buffer.add_subbytes buf bytes (first + t.slice_pos) remaining
239- | None -> ());
240 (* Add remaining from reader *)
241 Bytes.Reader.add_to_buffer buf t.reader;
242 Buffer.contents buf
···253 Some (Stdlib.Bytes.get bytes (first + t.slice_pos - 1))
254 | _ -> None
255256-(** Get a sample of the source for encoding detection.
257- Uses sniff to peek without consuming. *)
258let source t =
259 (* First check current slice *)
260 match t.current_slice with
···268 Bytes.Reader.sniff 4 t.reader
269270(** Get the byte position in the underlying stream *)
271-let byte_pos t =
272- Bytes.Reader.pos t.reader
···56(** Character input source with lookahead, based on Bytes.Reader.t
78+ This module wraps a bytesrw [Bytes.Reader.t] to provide
9+ character-by-character access with lookahead for the YAML scanner. Uses
10+ bytesrw's sniff and push_back for efficient lookahead without excessive
11+ copying.
1213 The same input type works with any reader source: strings, files, channels,
14 or streaming sources like Eio. *)
1516open Bytesrw
17018include Char_class
19+(** Re-export character classification *)
2021type t = {
22 reader : Bytes.Reader.t;
23+ mutable current_slice : Bytes.Slice.t option;
24+ (** Current slice being consumed *)
25+ mutable slice_pos : int; (** Position within current slice *)
26+ mutable position : Position.t; (** Line/column tracking *)
27}
2829(** Ensure we have a current slice. Returns true if data available. *)
···35 if Bytes.Slice.is_eod slice then begin
36 t.current_slice <- None;
37 false
38+ end
39+ else begin
40 t.current_slice <- Some slice;
41 t.slice_pos <- 0;
42 true
···5354(** Create input from a Bytes.Reader.t *)
55let of_reader ?(initial_position = Position.initial) reader =
56+ let t =
57+ { reader; current_slice = None; slice_pos = 0; position = initial_position }
58+ in
00059 (* Use sniff for BOM detection - this is exactly what sniff is for *)
60 let sample = Bytes.Reader.sniff 4 t.reader in
61 let bom_len =
62+ if
63+ String.length sample >= 3
64+ && sample.[0] = '\xEF'
65+ && sample.[1] = '\xBB'
66+ && sample.[2] = '\xBF'
67+ then 3 (* UTF-8 BOM *)
68 else 0
69 in
70 (* Skip BOM if present *)
71+ if bom_len > 0 then Bytes.Reader.skip bom_len t.reader;
072 t
7374(** Create input from a string *)
···77 of_reader reader
7879let position t = t.position
80+let is_eof t = not (ensure_slice t)
81+let peek t = if ensure_slice t then peek_current t else None
00000008283let peek_exn t =
84 match peek t with
···105 let sample_offset = n - slice_remaining in
106 if sample_offset < String.length sample then
107 Some sample.[sample_offset]
108+ else None
0109 end
110+ | None -> if n < String.length sample then Some sample.[n] else None
0000111 end
112113(** Peek at up to n characters as a string *)
···127 let needed_from_reader = n - slice_remaining in
128 let sample = Bytes.Reader.sniff needed_from_reader t.reader in
129 let buf = Buffer.create n in
130+ Buffer.add_subbytes buf slice_bytes
131+ (slice_first + t.slice_pos)
132+ slice_remaining;
133 Buffer.add_string buf sample;
134 Buffer.contents buf
135 end
136+ | None -> if ensure_slice t then peek_string t n else ""
0000137 end
138139(** Consume next character *)
···147 t.slice_pos <- t.slice_pos + 1;
148 t.position <- Position.advance_char c t.position;
149 (* Check if we've exhausted this slice *)
150+ if t.slice_pos >= Bytes.Slice.length slice then t.current_slice <- None;
0151 Some c
152 | None -> None
153+ end
154+ else None
155156let next_exn t =
157 match next t with
···166let skip_while t pred =
167 let rec loop () =
168 match peek t with
169+ | Some c when pred c ->
170+ ignore (next t);
171+ loop ()
172 | _ -> ()
173 in
174 loop ()
175176(** Check if next char satisfies predicate *)
177+let next_is pred t = match peek t with None -> false | Some c -> pred c
000178179let next_is_break t = next_is is_break t
180let next_is_blank t = next_is is_blank t
···193 if len < 3 then false
194 else
195 let prefix = String.sub s 0 3 in
196+ (prefix = "---" || prefix = "...") && (len = 3 || is_whitespace s.[3])
0197 end
198199(** Consume line break, handling \r\n as single break *)
200let consume_break t =
201 match peek t with
202+ | Some '\r' -> (
203 ignore (next t);
204+ match peek t with Some '\n' -> ignore (next t) | _ -> ())
205+ | Some '\n' -> ignore (next t)
000206 | _ -> ()
207208(** Get remaining content from current position *)
···210 let buf = Buffer.create 256 in
211 (* Add current slice remainder *)
212 (match t.current_slice with
213+ | Some slice ->
214+ let bytes = Bytes.Slice.bytes slice in
215+ let first = Bytes.Slice.first slice in
216+ let remaining = Bytes.Slice.length slice - t.slice_pos in
217+ if remaining > 0 then
218+ Buffer.add_subbytes buf bytes (first + t.slice_pos) remaining
219+ | None -> ());
220 (* Add remaining from reader *)
221 Bytes.Reader.add_to_buffer buf t.reader;
222 Buffer.contents buf
···233 Some (Stdlib.Bytes.get bytes (first + t.slice_pos - 1))
234 | _ -> None
235236+(** Get a sample of the source for encoding detection. Uses sniff to peek
237+ without consuming. *)
238let source t =
239 (* First check current slice *)
240 match t.current_slice with
···248 Bytes.Reader.sniff 4 t.reader
249250(** Get the byte position in the underlying stream *)
251+let byte_pos t = Bytes.Reader.pos t.reader
0
+7-18
lib/layout_style.ml
···56(** Collection layout styles *)
78-type t = [
9- | `Any (** Let emitter choose *)
10- | `Block (** Indentation-based *)
11- | `Flow (** Inline with brackets *)
12-]
1314-let to_string = function
15- | `Any -> "any"
16- | `Block -> "block"
17- | `Flow -> "flow"
18-19-let pp fmt t =
20- Format.pp_print_string fmt (to_string t)
21-22let equal a b = a = b
2324let compare a b =
25- let to_int = function
26- | `Any -> 0
27- | `Block -> 1
28- | `Flow -> 2
29- in
30 Int.compare (to_int a) (to_int b)
···56(** Collection layout styles *)
78+type t =
9+ [ `Any (** Let emitter choose *)
10+ | `Block (** Indentation-based *)
11+ | `Flow (** Inline with brackets *) ]
01213+let to_string = function `Any -> "any" | `Block -> "block" | `Flow -> "flow"
14+let pp fmt t = Format.pp_print_string fmt (to_string t)
00000015let equal a b = a = b
1617let compare a b =
18+ let to_int = function `Any -> 0 | `Block -> 1 | `Flow -> 2 in
000019 Int.compare (to_int a) (to_int b)
+138-162
lib/loader.ml
···31 mutable doc_implicit_start : bool;
32}
3334-let create_state () = {
35- stack = [];
36- current = None;
37- documents = [];
38- doc_version = None;
39- doc_implicit_start = true;
40-}
04142(** Process a single event *)
43let rec process_event state (ev : Event.spanned) =
44 match ev.event with
45 | Event.Stream_start _ -> ()
46-47 | Event.Stream_end -> ()
48-49 | Event.Document_start { version; implicit } ->
50 state.doc_version <- version;
51 state.doc_implicit_start <- implicit
52-53 | Event.Document_end { implicit } ->
54- let doc = Document.make
55- ?version:state.doc_version
56- ~implicit_start:state.doc_implicit_start
57- ~implicit_end:implicit
58- state.current
59 in
60 state.documents <- doc :: state.documents;
61 state.current <- None;
62 state.doc_version <- None;
63 state.doc_implicit_start <- true
64-65 | Event.Alias { anchor } ->
66 let node : Yaml.t = `Alias anchor in
67 add_node state node
68-69- | Event.Scalar { anchor; tag; value; plain_implicit; quoted_implicit; style } ->
70- let scalar = Scalar.make
71- ?anchor ?tag
72- ~plain_implicit ~quoted_implicit
73- ~style value
74 in
75 let node : Yaml.t = `Scalar scalar in
76 add_node state node
77-78 | Event.Sequence_start { anchor; tag; implicit; style } ->
79- let frame = Sequence_frame {
80- anchor; tag; implicit; style;
81- items = [];
82- } in
83 state.stack <- frame :: state.stack
84-85- | Event.Sequence_end ->
86- (match state.stack with
87- | Sequence_frame { anchor; tag; implicit; style; items } :: rest ->
88- let seq = Sequence.make ?anchor ?tag ~implicit ~style (List.rev items) in
89- let node : Yaml.t = `A seq in
90- state.stack <- rest;
91- add_node state node
92- | _ -> Error.raise (Invalid_state "unexpected sequence end"))
93-94 | Event.Mapping_start { anchor; tag; implicit; style } ->
95- let frame = Mapping_frame {
96- anchor; tag; implicit; style;
97- pairs = [];
98- pending_key = None;
99- } in
100 state.stack <- frame :: state.stack
101-102- | Event.Mapping_end ->
103- (match state.stack with
104- | Mapping_frame { anchor; tag; implicit; style; pairs; pending_key = None } :: rest ->
105- let map = Mapping.make ?anchor ?tag ~implicit ~style (List.rev pairs) in
106- let node : Yaml.t = `O map in
107- state.stack <- rest;
108- add_node state node
109- | Mapping_frame { pending_key = Some _; _ } :: _ ->
110- Error.raise (Invalid_state "mapping ended with pending key")
111- | _ -> Error.raise (Invalid_state "unexpected mapping end"))
000112113(** Add a node to current context *)
114and add_node state node =
115 match state.stack with
116- | [] ->
117- state.current <- Some node
118-119 | Sequence_frame f :: rest ->
120 state.stack <- Sequence_frame { f with items = node :: f.items } :: rest
121-122- | Mapping_frame f :: rest ->
123- (match f.pending_key with
124- | None ->
125- (* This is a key *)
126- state.stack <- Mapping_frame { f with pending_key = Some node } :: rest
127- | Some key ->
128- (* This is a value *)
129- state.stack <- Mapping_frame {
130- f with
131- pairs = (key, node) :: f.pairs;
132- pending_key = None;
133- } :: rest)
134135(** Internal: parse all documents from a parser *)
136let parse_all_documents parser =
···149150 @param resolve_aliases Whether to resolve aliases (default true)
151 @param max_nodes Maximum nodes during alias expansion (default 10M)
152- @param max_depth Maximum alias nesting depth (default 100)
153-*)
154-let value_of_string
155- ?(resolve_aliases = true)
156 ?(max_nodes = Yaml.default_max_alias_nodes)
157- ?(max_depth = Yaml.default_max_alias_depth)
158- s =
159 let docs = parse_all_documents (Parser.of_string s) in
160 let doc = single_document_or_error docs ~empty:(Document.make None) in
161 match Document.root doc with
162 | None -> `Null
163 | Some yaml ->
164- Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml
0165166(** Load single document as Yaml.
167168 @param resolve_aliases Whether to resolve aliases (default false for Yaml.t)
169 @param max_nodes Maximum nodes during alias expansion (default 10M)
170- @param max_depth Maximum alias nesting depth (default 100)
171-*)
172-let yaml_of_string
173- ?(resolve_aliases = false)
174 ?(max_nodes = Yaml.default_max_alias_nodes)
175- ?(max_depth = Yaml.default_max_alias_depth)
176- s =
177 let docs = parse_all_documents (Parser.of_string s) in
178 let doc = single_document_or_error docs ~empty:(Document.make None) in
179 match Document.root doc with
180 | None -> `Scalar (Scalar.make "")
181 | Some yaml ->
182- if resolve_aliases then
183- Yaml.resolve_aliases ~max_nodes ~max_depth yaml
184- else
185- yaml
186187(** Load all documents *)
188let documents_of_string s =
···194195 @param resolve_aliases Whether to resolve aliases (default true)
196 @param max_nodes Maximum nodes during alias expansion (default 10M)
197- @param max_depth Maximum alias nesting depth (default 100)
198-*)
199-let value_of_reader
200- ?(resolve_aliases = true)
201 ?(max_nodes = Yaml.default_max_alias_nodes)
202- ?(max_depth = Yaml.default_max_alias_depth)
203- reader =
204 let docs = parse_all_documents (Parser.of_reader reader) in
205 let doc = single_document_or_error docs ~empty:(Document.make None) in
206 match Document.root doc with
207 | None -> `Null
208 | Some yaml ->
209- Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml
0210211(** Load single document as Yaml from a Bytes.Reader.
212213 @param resolve_aliases Whether to resolve aliases (default false for Yaml.t)
214 @param max_nodes Maximum nodes during alias expansion (default 10M)
215- @param max_depth Maximum alias nesting depth (default 100)
216-*)
217-let yaml_of_reader
218- ?(resolve_aliases = false)
219 ?(max_nodes = Yaml.default_max_alias_nodes)
220- ?(max_depth = Yaml.default_max_alias_depth)
221- reader =
222 let docs = parse_all_documents (Parser.of_reader reader) in
223 let doc = single_document_or_error docs ~empty:(Document.make None) in
224 match Document.root doc with
225 | None -> `Scalar (Scalar.make "")
226 | Some yaml ->
227- if resolve_aliases then
228- Yaml.resolve_aliases ~max_nodes ~max_depth yaml
229- else
230- yaml
231232(** Load all documents from a Bytes.Reader *)
233let documents_of_reader reader =
···245 let rec loop () =
246 match next_event () with
247 | None -> None
248- | Some ev ->
249 process_event state ev;
250 match ev.event with
251- | Event.Document_end _ ->
252- (match state.documents with
253- | doc :: _ ->
254- state.documents <- [];
255- Some (extract doc)
256- | [] -> None)
257 | Event.Stream_end -> None
258- | _ -> loop ()
259 in
260 loop ()
261···267268 @param resolve_aliases Whether to resolve aliases (default true)
269 @param max_nodes Maximum nodes during alias expansion (default 10M)
270- @param max_depth Maximum alias nesting depth (default 100)
271-*)
272-let load_value
273- ?(resolve_aliases = true)
274 ?(max_nodes = Yaml.default_max_alias_nodes)
275- ?(max_depth = Yaml.default_max_alias_depth)
276- parser =
277- load_generic (fun doc ->
278- match Document.root doc with
279- | None -> `Null
280- | Some yaml ->
281- Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml
282- ) parser
0283284(** Load single Yaml from parser *)
285let load_yaml parser =
286- load_generic (fun doc ->
287- Document.root doc |> Option.value ~default:(`Scalar (Scalar.make ""))
288- ) parser
0289290(** Load single Document from parser *)
291-let load_document parser =
292- load_generic Fun.id parser
293294(** Iterate over documents *)
295let iter_documents f parser =
296 let rec loop () =
297 match load_document parser with
298 | None -> ()
299- | Some doc -> f doc; loop ()
00300 in
301 loop ()
302303(** Fold over documents *)
304let fold_documents f init parser =
305 let rec loop acc =
306- match load_document parser with
307- | None -> acc
308- | Some doc -> loop (f acc doc)
309 in
310 loop init
0311312(** Load single Value from event source.
313314 @param resolve_aliases Whether to resolve aliases (default true)
315 @param max_nodes Maximum nodes during alias expansion (default 10M)
316- @param max_depth Maximum alias nesting depth (default 100)
317-*)
318-let value_of_parser
319- ?(resolve_aliases = true)
320 ?(max_nodes = Yaml.default_max_alias_nodes)
321- ?(max_depth = Yaml.default_max_alias_depth)
322- next_event =
323- match load_generic_fn (fun doc ->
324- match Document.root doc with
325- | None -> `Null
326- | Some yaml ->
327- Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml
328- ) next_event with
000329 | Some v -> v
330 | None -> `Null
331···333334 @param resolve_aliases Whether to resolve aliases (default false)
335 @param max_nodes Maximum nodes during alias expansion (default 10M)
336- @param max_depth Maximum alias nesting depth (default 100)
337-*)
338-let yaml_of_parser
339- ?(resolve_aliases = false)
340 ?(max_nodes = Yaml.default_max_alias_nodes)
341- ?(max_depth = Yaml.default_max_alias_depth)
342- next_event =
343- match load_generic_fn (fun doc ->
344- match Document.root doc with
345- | None -> `Scalar (Scalar.make "")
346- | Some yaml ->
347- if resolve_aliases then
348- Yaml.resolve_aliases ~max_nodes ~max_depth yaml
349- else
350- yaml
351- ) next_event with
0352 | Some v -> v
353 | None -> `Scalar (Scalar.make "")
354355(** Load single Document from event source *)
356-let document_of_parser next_event =
357- load_generic_fn Fun.id next_event
358359(** Load all documents from event source *)
360let documents_of_parser next_event =
···373 let rec loop () =
374 match document_of_parser next_event with
375 | None -> ()
376- | Some doc -> f doc; loop ()
00377 in
378 loop ()
379
···31 mutable doc_implicit_start : bool;
32}
3334+let create_state () =
35+ {
36+ stack = [];
37+ current = None;
38+ documents = [];
39+ doc_version = None;
40+ doc_implicit_start = true;
41+ }
4243(** Process a single event *)
44let rec process_event state (ev : Event.spanned) =
45 match ev.event with
46 | Event.Stream_start _ -> ()
047 | Event.Stream_end -> ()
048 | Event.Document_start { version; implicit } ->
49 state.doc_version <- version;
50 state.doc_implicit_start <- implicit
051 | Event.Document_end { implicit } ->
52+ let doc =
53+ Document.make ?version:state.doc_version
54+ ~implicit_start:state.doc_implicit_start ~implicit_end:implicit
55+ state.current
056 in
57 state.documents <- doc :: state.documents;
58 state.current <- None;
59 state.doc_version <- None;
60 state.doc_implicit_start <- true
061 | Event.Alias { anchor } ->
62 let node : Yaml.t = `Alias anchor in
63 add_node state node
64+ | Event.Scalar { anchor; tag; value; plain_implicit; quoted_implicit; style }
65+ ->
66+ let scalar =
67+ Scalar.make ?anchor ?tag ~plain_implicit ~quoted_implicit ~style value
0068 in
69 let node : Yaml.t = `Scalar scalar in
70 add_node state node
071 | Event.Sequence_start { anchor; tag; implicit; style } ->
72+ let frame = Sequence_frame { anchor; tag; implicit; style; items = [] } in
00073 state.stack <- frame :: state.stack
74+ | Event.Sequence_end -> (
75+ match state.stack with
76+ | Sequence_frame { anchor; tag; implicit; style; items } :: rest ->
77+ let seq =
78+ Sequence.make ?anchor ?tag ~implicit ~style (List.rev items)
79+ in
80+ let node : Yaml.t = `A seq in
81+ state.stack <- rest;
82+ add_node state node
83+ | _ -> Error.raise (Invalid_state "unexpected sequence end"))
84 | Event.Mapping_start { anchor; tag; implicit; style } ->
85+ let frame =
86+ Mapping_frame
87+ { anchor; tag; implicit; style; pairs = []; pending_key = None }
88+ in
089 state.stack <- frame :: state.stack
90+ | Event.Mapping_end -> (
91+ match state.stack with
92+ | Mapping_frame
93+ { anchor; tag; implicit; style; pairs; pending_key = None }
94+ :: rest ->
95+ let map =
96+ Mapping.make ?anchor ?tag ~implicit ~style (List.rev pairs)
97+ in
98+ let node : Yaml.t = `O map in
99+ state.stack <- rest;
100+ add_node state node
101+ | Mapping_frame { pending_key = Some _; _ } :: _ ->
102+ Error.raise (Invalid_state "mapping ended with pending key")
103+ | _ -> Error.raise (Invalid_state "unexpected mapping end"))
104105(** Add a node to current context *)
106and add_node state node =
107 match state.stack with
108+ | [] -> state.current <- Some node
00109 | Sequence_frame f :: rest ->
110 state.stack <- Sequence_frame { f with items = node :: f.items } :: rest
111+ | Mapping_frame f :: rest -> (
112+ match f.pending_key with
113+ | None ->
114+ (* This is a key *)
115+ state.stack <-
116+ Mapping_frame { f with pending_key = Some node } :: rest
117+ | Some key ->
118+ (* This is a value *)
119+ state.stack <-
120+ Mapping_frame
121+ { f with pairs = (key, node) :: f.pairs; pending_key = None }
122+ :: rest)
0123124(** Internal: parse all documents from a parser *)
125let parse_all_documents parser =
···138139 @param resolve_aliases Whether to resolve aliases (default true)
140 @param max_nodes Maximum nodes during alias expansion (default 10M)
141+ @param max_depth Maximum alias nesting depth (default 100) *)
142+let value_of_string ?(resolve_aliases = true)
00143 ?(max_nodes = Yaml.default_max_alias_nodes)
144+ ?(max_depth = Yaml.default_max_alias_depth) s =
0145 let docs = parse_all_documents (Parser.of_string s) in
146 let doc = single_document_or_error docs ~empty:(Document.make None) in
147 match Document.root doc with
148 | None -> `Null
149 | Some yaml ->
150+ Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth
151+ yaml
152153(** Load single document as Yaml.
154155 @param resolve_aliases Whether to resolve aliases (default false for Yaml.t)
156 @param max_nodes Maximum nodes during alias expansion (default 10M)
157+ @param max_depth Maximum alias nesting depth (default 100) *)
158+let yaml_of_string ?(resolve_aliases = false)
00159 ?(max_nodes = Yaml.default_max_alias_nodes)
160+ ?(max_depth = Yaml.default_max_alias_depth) s =
0161 let docs = parse_all_documents (Parser.of_string s) in
162 let doc = single_document_or_error docs ~empty:(Document.make None) in
163 match Document.root doc with
164 | None -> `Scalar (Scalar.make "")
165 | Some yaml ->
166+ if resolve_aliases then Yaml.resolve_aliases ~max_nodes ~max_depth yaml
167+ else yaml
00168169(** Load all documents *)
170let documents_of_string s =
···176177 @param resolve_aliases Whether to resolve aliases (default true)
178 @param max_nodes Maximum nodes during alias expansion (default 10M)
179+ @param max_depth Maximum alias nesting depth (default 100) *)
180+let value_of_reader ?(resolve_aliases = true)
00181 ?(max_nodes = Yaml.default_max_alias_nodes)
182+ ?(max_depth = Yaml.default_max_alias_depth) reader =
0183 let docs = parse_all_documents (Parser.of_reader reader) in
184 let doc = single_document_or_error docs ~empty:(Document.make None) in
185 match Document.root doc with
186 | None -> `Null
187 | Some yaml ->
188+ Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth
189+ yaml
190191(** Load single document as Yaml from a Bytes.Reader.
192193 @param resolve_aliases Whether to resolve aliases (default false for Yaml.t)
194 @param max_nodes Maximum nodes during alias expansion (default 10M)
195+ @param max_depth Maximum alias nesting depth (default 100) *)
196+let yaml_of_reader ?(resolve_aliases = false)
00197 ?(max_nodes = Yaml.default_max_alias_nodes)
198+ ?(max_depth = Yaml.default_max_alias_depth) reader =
0199 let docs = parse_all_documents (Parser.of_reader reader) in
200 let doc = single_document_or_error docs ~empty:(Document.make None) in
201 match Document.root doc with
202 | None -> `Scalar (Scalar.make "")
203 | Some yaml ->
204+ if resolve_aliases then Yaml.resolve_aliases ~max_nodes ~max_depth yaml
205+ else yaml
00206207(** Load all documents from a Bytes.Reader *)
208let documents_of_reader reader =
···220 let rec loop () =
221 match next_event () with
222 | None -> None
223+ | Some ev -> (
224 process_event state ev;
225 match ev.event with
226+ | Event.Document_end _ -> (
227+ match state.documents with
228+ | doc :: _ ->
229+ state.documents <- [];
230+ Some (extract doc)
231+ | [] -> None)
232 | Event.Stream_end -> None
233+ | _ -> loop ())
234 in
235 loop ()
236···242243 @param resolve_aliases Whether to resolve aliases (default true)
244 @param max_nodes Maximum nodes during alias expansion (default 10M)
245+ @param max_depth Maximum alias nesting depth (default 100) *)
246+let load_value ?(resolve_aliases = true)
00247 ?(max_nodes = Yaml.default_max_alias_nodes)
248+ ?(max_depth = Yaml.default_max_alias_depth) parser =
249+ load_generic
250+ (fun doc ->
251+ match Document.root doc with
252+ | None -> `Null
253+ | Some yaml ->
254+ Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes
255+ ~max_depth yaml)
256+ parser
257258(** Load single Yaml from parser *)
259let load_yaml parser =
260+ load_generic
261+ (fun doc ->
262+ Document.root doc |> Option.value ~default:(`Scalar (Scalar.make "")))
263+ parser
264265(** Load single Document from parser *)
266+let load_document parser = load_generic Fun.id parser
0267268(** Iterate over documents *)
269let iter_documents f parser =
270 let rec loop () =
271 match load_document parser with
272 | None -> ()
273+ | Some doc ->
274+ f doc;
275+ loop ()
276 in
277 loop ()
278279(** Fold over documents *)
280let fold_documents f init parser =
281 let rec loop acc =
282+ match load_document parser with None -> acc | Some doc -> loop (f acc doc)
00283 in
284 loop init
285+286287(** Load single Value from event source.
288289 @param resolve_aliases Whether to resolve aliases (default true)
290 @param max_nodes Maximum nodes during alias expansion (default 10M)
291+ @param max_depth Maximum alias nesting depth (default 100) *)
292+let value_of_parser ?(resolve_aliases = true)
00293 ?(max_nodes = Yaml.default_max_alias_nodes)
294+ ?(max_depth = Yaml.default_max_alias_depth) next_event =
295+ match
296+ load_generic_fn
297+ (fun doc ->
298+ match Document.root doc with
299+ | None -> `Null
300+ | Some yaml ->
301+ Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes
302+ ~max_depth yaml)
303+ next_event
304+ with
305 | Some v -> v
306 | None -> `Null
307···309310 @param resolve_aliases Whether to resolve aliases (default false)
311 @param max_nodes Maximum nodes during alias expansion (default 10M)
312+ @param max_depth Maximum alias nesting depth (default 100) *)
313+let yaml_of_parser ?(resolve_aliases = false)
00314 ?(max_nodes = Yaml.default_max_alias_nodes)
315+ ?(max_depth = Yaml.default_max_alias_depth) next_event =
316+ match
317+ load_generic_fn
318+ (fun doc ->
319+ match Document.root doc with
320+ | None -> `Scalar (Scalar.make "")
321+ | Some yaml ->
322+ if resolve_aliases then
323+ Yaml.resolve_aliases ~max_nodes ~max_depth yaml
324+ else yaml)
325+ next_event
326+ with
327 | Some v -> v
328 | None -> `Scalar (Scalar.make "")
329330(** Load single Document from event source *)
331+let document_of_parser next_event = load_generic_fn Fun.id next_event
0332333(** Load all documents from event source *)
334let documents_of_parser next_event =
···347 let rec loop () =
348 match document_of_parser next_event with
349 | None -> ()
350+ | Some doc ->
351+ f doc;
352+ loop ()
353 in
354 loop ()
355
+38-41
lib/mapping.ml
···13 members : ('k * 'v) list;
14}
1516-let make
17- ?(anchor : string option)
18- ?(tag : string option)
19- ?(implicit = true)
20- ?(style = `Any)
21- members =
22 { anchor; tag; implicit; style; members }
2324let members t = t.members
···26let tag t = t.tag
27let implicit t = t.implicit
28let style t = t.style
29-30let with_anchor anchor t = { t with anchor = Some anchor }
31let with_tag tag t = { t with tag = Some tag }
32let with_style style t = { t with style }
3334-let map_keys f t = { t with members = List.map (fun (k, v) -> (f k, v)) t.members }
35-let map_values f t = { t with members = List.map (fun (k, v) -> (k, f v)) t.members }
36-let map f t = { t with members = List.map (fun (k, v) -> f k v) t.members }
3738-let length t = List.length t.members
0390040let is_empty t = t.members = []
4142let find pred t =
43 List.find_opt (fun (k, _) -> pred k) t.members |> Option.map snd
4445-let find_key pred t =
46- List.find_opt (fun (k, _) -> pred k) t.members
47-48-let mem pred t =
49- List.exists (fun (k, _) -> pred k) t.members
50-51let keys t = List.map fst t.members
52-53let values t = List.map snd t.members
54-55let iter f t = List.iter (fun (k, v) -> f k v) t.members
56-57let fold f init t = List.fold_left (fun acc (k, v) -> f acc k v) init t.members
5859let pp pp_key pp_val fmt t =
···62 Option.iter (Format.fprintf fmt "tag=%s,@ ") t.tag;
63 Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style;
64 Format.fprintf fmt "members={@,";
65- List.iteri (fun i (k, v) ->
66- if i > 0 then Format.fprintf fmt ",@ ";
67- Format.fprintf fmt "@[<hv 2>%a:@ %a@]" pp_key k pp_val v
68- ) t.members;
069 Format.fprintf fmt "@]@,})"
7071let equal eq_k eq_v a b =
72- Option.equal String.equal a.anchor b.anchor &&
73- Option.equal String.equal a.tag b.tag &&
74- a.implicit = b.implicit &&
75- Layout_style.equal a.style b.style &&
76- List.equal (fun (k1, v1) (k2, v2) -> eq_k k1 k2 && eq_v v1 v2) a.members b.members
007778let compare cmp_k cmp_v a b =
79 let c = Option.compare String.compare a.anchor b.anchor in
80- if c <> 0 then c else
81- let c = Option.compare String.compare a.tag b.tag in
82- if c <> 0 then c else
83- let c = Bool.compare a.implicit b.implicit in
84- if c <> 0 then c else
85- let c = Layout_style.compare a.style b.style in
86- if c <> 0 then c else
87- let cmp_pair (k1, v1) (k2, v2) =
88- let c = cmp_k k1 k2 in
89- if c <> 0 then c else cmp_v v1 v2
90- in
91- List.compare cmp_pair a.members b.members
0000
···13 members : ('k * 'v) list;
14}
1516+let make ?(anchor : string option) ?(tag : string option) ?(implicit = true)
17+ ?(style = `Any) members =
000018 { anchor; tag; implicit; style; members }
1920let members t = t.members
···22let tag t = t.tag
23let implicit t = t.implicit
24let style t = t.style
025let with_anchor anchor t = { t with anchor = Some anchor }
26let with_tag tag t = { t with tag = Some tag }
27let with_style style t = { t with style }
2829+let map_keys f t =
30+ { t with members = List.map (fun (k, v) -> (f k, v)) t.members }
03132+let map_values f t =
33+ { t with members = List.map (fun (k, v) -> (k, f v)) t.members }
3435+let map f t = { t with members = List.map (fun (k, v) -> f k v) t.members }
36+let length t = List.length t.members
37let is_empty t = t.members = []
3839let find pred t =
40 List.find_opt (fun (k, _) -> pred k) t.members |> Option.map snd
4142+let find_key pred t = List.find_opt (fun (k, _) -> pred k) t.members
43+let mem pred t = List.exists (fun (k, _) -> pred k) t.members
000044let keys t = List.map fst t.members
045let values t = List.map snd t.members
046let iter f t = List.iter (fun (k, v) -> f k v) t.members
047let fold f init t = List.fold_left (fun acc (k, v) -> f acc k v) init t.members
4849let pp pp_key pp_val fmt t =
···52 Option.iter (Format.fprintf fmt "tag=%s,@ ") t.tag;
53 Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style;
54 Format.fprintf fmt "members={@,";
55+ List.iteri
56+ (fun i (k, v) ->
57+ if i > 0 then Format.fprintf fmt ",@ ";
58+ Format.fprintf fmt "@[<hv 2>%a:@ %a@]" pp_key k pp_val v)
59+ t.members;
60 Format.fprintf fmt "@]@,})"
6162let equal eq_k eq_v a b =
63+ Option.equal String.equal a.anchor b.anchor
64+ && Option.equal String.equal a.tag b.tag
65+ && a.implicit = b.implicit
66+ && Layout_style.equal a.style b.style
67+ && List.equal
68+ (fun (k1, v1) (k2, v2) -> eq_k k1 k2 && eq_v v1 v2)
69+ a.members b.members
7071let compare cmp_k cmp_v a b =
72 let c = Option.compare String.compare a.anchor b.anchor in
73+ if c <> 0 then c
74+ else
75+ let c = Option.compare String.compare a.tag b.tag in
76+ if c <> 0 then c
77+ else
78+ let c = Bool.compare a.implicit b.implicit in
79+ if c <> 0 then c
80+ else
81+ let c = Layout_style.compare a.style b.style in
82+ if c <> 0 then c
83+ else
84+ let cmp_pair (k1, v1) (k2, v2) =
85+ let c = cmp_k k1 k2 in
86+ if c <> 0 then c else cmp_v v1 v2
87+ in
88+ List.compare cmp_pair a.members b.members
+266-305
lib/parser.ml
···10 | Stream_start
11 | Implicit_document_start
12 | Document_content
13- | Document_content_done (* After parsing a node, check for unexpected content *)
014 | Document_end
15 | Block_sequence_first_entry
16 | Block_sequence_entry
···36 mutable tag_directives : (string * string) list;
37 mutable current_token : Token.spanned option;
38 mutable finished : bool;
39- mutable explicit_doc_end : bool; (** True if last doc ended with explicit ... *)
40- mutable stream_start : bool; (** True if we haven't emitted any documents yet *)
0041}
4243-let create scanner = {
44- scanner;
45- state = Stream_start;
46- states = [];
47- version = None;
48- tag_directives = [
49- ("!", "!");
50- ("!!", "tag:yaml.org,2002:");
51- ];
52- current_token = None;
53- finished = false;
54- explicit_doc_end = false;
55- stream_start = true;
56-}
5758let of_string s = create (Scanner.of_string s)
59let of_scanner = create
···64let current_token t =
65 match t.current_token with
66 | Some tok -> tok
67- | None ->
68 let tok = Scanner.next t.scanner in
69 t.current_token <- tok;
70- match tok with
71- | Some tok -> tok
72- | None -> Error.raise Unexpected_eof
7374(** Peek at current token *)
75let peek_token t =
···80 t.current_token
8182(** Skip current token *)
83-let skip_token t =
84- t.current_token <- None
8586(** Check if current token matches predicate *)
87let check t pred =
88- match peek_token t with
89- | Some tok -> pred tok.token
90- | None -> false
9192(** Push state onto stack *)
93-let push_state t s =
94- t.states <- s :: t.states
9596(** Pop state from stack *)
97let pop_state t =
···115(** Process directives at document start *)
116let process_directives t =
117 t.version <- None;
118- t.tag_directives <- [("!", "!"); ("!!", "tag:yaml.org,2002:")];
119120- while check t (function
121- | Token.Version_directive _ | Token.Tag_directive _ -> true
122- | _ -> false)
0123 do
124 let tok = current_token t in
125 skip_token t;
126 match tok.token with
127 | Token.Version_directive { major; minor } ->
128 if t.version <> None then
129- Error.raise_span tok.span (Invalid_yaml_version "duplicate YAML directive");
0130 t.version <- Some (major, minor)
131 | Token.Tag_directive { handle; prefix } ->
132 (* Skip empty tag directives (these are reserved/unknown directives that were ignored) *)
133- if handle = "" && prefix = "" then
134- () (* Ignore reserved directives *)
135 else begin
136- if List.mem_assoc handle t.tag_directives &&
137- handle <> "!" && handle <> "!!" then
138- Error.raise_span tok.span (Invalid_tag_directive ("duplicate tag handle: " ^ handle));
000139 t.tag_directives <- (handle, prefix) :: t.tag_directives
140 end
141 | _ -> ()
···146 let anchor = ref None in
147 let tag = ref None in
148149- while check t (function
150- | Token.Anchor _ | Token.Tag _ -> true
151- | _ -> false)
152 do
153 let tok = current_token t in
154 skip_token t;
···172173(** Empty scalar event *)
174let empty_scalar_event ~anchor ~tag span =
175- Event.Scalar {
176- anchor;
177- tag;
178- value = "";
179- plain_implicit = tag = None;
180- quoted_implicit = false;
181- style = `Plain;
182- }, span
00183184(** Parse stream start *)
185let parse_stream_start t =
···188 match tok.token with
189 | Token.Stream_start encoding ->
190 t.state <- Implicit_document_start;
191- Event.Stream_start { encoding }, tok.span
192- | _ ->
193- Error.raise_span tok.span (Unexpected_token "expected stream start")
194195(** Parse document start (implicit or explicit) *)
196let parse_document_start t ~implicit =
···199 if not implicit then begin
200 let tok = current_token t in
201 match tok.token with
202- | Token.Document_start ->
203- skip_token t
204- | _ ->
205- Error.raise_span tok.span Expected_document_start
206 end;
207208- let span = match peek_token t with
0209 | Some tok -> tok.span
210 | None -> Span.point Position.initial
211 in
···214 t.stream_start <- false;
215 push_state t Document_end;
216 t.state <- Document_content;
217- Event.Document_start { version = t.version; implicit }, span
218219(** Parse document end *)
220let parse_document_end t =
221- let implicit = not (check t (function Token.Document_end -> true | _ -> false)) in
222- let span = match peek_token t with
000223 | Some tok -> tok.span
224 | None -> Span.point Position.initial
225 in
···229 (* Track if this document ended explicitly with ... *)
230 t.explicit_doc_end <- not implicit;
231 t.state <- Implicit_document_start;
232- Event.Document_end { implicit }, span
233234(** Parse node in various contexts *)
235let parse_node t ~block ~indentless =
···238 | Token.Alias name ->
239 skip_token t;
240 t.state <- pop_state t;
241- Event.Alias { anchor = name }, tok.span
242-243- | Token.Anchor _ | Token.Tag _ ->
244 let anchor, tag = parse_properties t in
245 let tok = current_token t in
246- (match tok.token with
247- | Token.Block_entry when indentless ->
248- t.state <- Indentless_sequence_entry;
249- Event.Sequence_start {
250- anchor; tag;
251- implicit = tag = None;
252- style = `Block;
253- }, tok.span
254-255- | Token.Block_sequence_start when block ->
256- t.state <- Block_sequence_first_entry;
257- skip_token t;
258- Event.Sequence_start {
259- anchor; tag;
260- implicit = tag = None;
261- style = `Block;
262- }, tok.span
263-264- | Token.Block_mapping_start when block ->
265- t.state <- Block_mapping_first_key;
266- skip_token t;
267- Event.Mapping_start {
268- anchor; tag;
269- implicit = tag = None;
270- style = `Block;
271- }, tok.span
272-273- | Token.Flow_sequence_start ->
274- t.state <- Flow_sequence_first_entry;
275- skip_token t;
276- Event.Sequence_start {
277- anchor; tag;
278- implicit = tag = None;
279- style = `Flow;
280- }, tok.span
281-282- | Token.Flow_mapping_start ->
283- t.state <- Flow_mapping_first_key;
284- skip_token t;
285- Event.Mapping_start {
286- anchor; tag;
287- implicit = tag = None;
288- style = `Flow;
289- }, tok.span
290-291- | Token.Scalar { style; value } ->
292- skip_token t;
293- t.state <- pop_state t;
294- let plain_implicit = tag = None && style = `Plain in
295- let quoted_implicit = tag = None && style <> `Plain in
296- Event.Scalar {
297- anchor; tag; value;
298- plain_implicit; quoted_implicit; style;
299- }, tok.span
300-301- | _ ->
302- (* Empty node *)
303- t.state <- pop_state t;
304- empty_scalar_event ~anchor ~tag tok.span)
305-306 | Token.Block_sequence_start when block ->
307 t.state <- Block_sequence_first_entry;
308 skip_token t;
309- Event.Sequence_start {
310- anchor = None; tag = None;
311- implicit = true;
312- style = `Block;
313- }, tok.span
314-315 | Token.Block_mapping_start when block ->
316 t.state <- Block_mapping_first_key;
317 skip_token t;
318- Event.Mapping_start {
319- anchor = None; tag = None;
320- implicit = true;
321- style = `Block;
322- }, tok.span
323-324 | Token.Flow_sequence_start ->
325 t.state <- Flow_sequence_first_entry;
326 skip_token t;
327- Event.Sequence_start {
328- anchor = None; tag = None;
329- implicit = true;
330- style = `Flow;
331- }, tok.span
332-333 | Token.Flow_mapping_start ->
334 t.state <- Flow_mapping_first_key;
335 skip_token t;
336- Event.Mapping_start {
337- anchor = None; tag = None;
338- implicit = true;
339- style = `Flow;
340- }, tok.span
341-342 | Token.Block_entry when indentless ->
343 t.state <- Indentless_sequence_entry;
344- Event.Sequence_start {
345- anchor = None; tag = None;
346- implicit = true;
347- style = `Block;
348- }, tok.span
349-350 | Token.Scalar { style; value } ->
351 skip_token t;
352 t.state <- pop_state t;
353 let plain_implicit = style = `Plain in
354 let quoted_implicit = style <> `Plain in
355- Event.Scalar {
356- anchor = None; tag = None; value;
357- plain_implicit; quoted_implicit; style;
358- }, tok.span
359-00000360 | _ ->
361 (* Empty node *)
362 t.state <- pop_state t;
···368 match tok.token with
369 | Token.Block_entry ->
370 skip_token t;
371- if check t (function
372- | Token.Block_entry | Token.Block_end -> true
373- | _ -> false)
0374 then begin
375 t.state <- Block_sequence_entry;
376 empty_scalar_event ~anchor:None ~tag:None tok.span
377- end else begin
0378 push_state t Block_sequence_entry;
379 parse_node t ~block:true ~indentless:false
380 end
381 | Token.Block_end ->
382 skip_token t;
383 t.state <- pop_state t;
384- Event.Sequence_end, tok.span
385- | _ ->
386- Error.raise_span tok.span Expected_block_entry
387388(** Parse block mapping key *)
389let parse_block_mapping_key t =
···391 match tok.token with
392 | Token.Key ->
393 skip_token t;
394- if check t (function
395- | Token.Key | Token.Value | Token.Block_end -> true
396- | _ -> false)
0397 then begin
398 t.state <- Block_mapping_value;
399 empty_scalar_event ~anchor:None ~tag:None tok.span
400- end else begin
0401 push_state t Block_mapping_value;
402 parse_node t ~block:true ~indentless:true
403 end
···408 | Token.Block_end ->
409 skip_token t;
410 t.state <- pop_state t;
411- Event.Mapping_end, tok.span
412- | _ ->
413- Error.raise_span tok.span Expected_key
414415(** Parse block mapping value *)
416let parse_block_mapping_value t =
···418 match tok.token with
419 | Token.Value ->
420 skip_token t;
421- if check t (function
422- | Token.Key | Token.Value | Token.Block_end -> true
423- | _ -> false)
0424 then begin
425 t.state <- Block_mapping_key;
426 empty_scalar_event ~anchor:None ~tag:None tok.span
427- end else begin
0428 push_state t Block_mapping_key;
429 parse_node t ~block:true ~indentless:true
430 end
···439 match tok.token with
440 | Token.Block_entry ->
441 skip_token t;
442- if check t (function
443- | Token.Block_entry | Token.Key | Token.Value | Token.Block_end -> true
444- | _ -> false)
00445 then begin
446 t.state <- Indentless_sequence_entry;
447 empty_scalar_event ~anchor:None ~tag:None tok.span
448- end else begin
0449 push_state t Indentless_sequence_entry;
450 parse_node t ~block:true ~indentless:false
451 end
452 | _ ->
453 t.state <- pop_state t;
454- Event.Sequence_end, tok.span
455456(** Parse flow sequence *)
457let rec parse_flow_sequence_entry t ~first =
···460 | Token.Flow_sequence_end ->
461 skip_token t;
462 t.state <- pop_state t;
463- Event.Sequence_end, tok.span
464 | Token.Flow_entry when not first ->
465 skip_token t;
466 parse_flow_sequence_entry_internal t
467- | _ when first ->
468- parse_flow_sequence_entry_internal t
469- | _ ->
470- Error.raise_span tok.span Expected_sequence_end
471472and parse_flow_sequence_entry_internal t =
473 let tok = current_token t in
···476 (* Trailing comma case - don't emit empty scalar, just go back to sequence entry state *)
477 skip_token t;
478 t.state <- pop_state t;
479- Event.Sequence_end, tok.span
480 | Token.Flow_entry ->
481 (* Double comma or comma after comma - invalid *)
482- Error.raise_span tok.span (Unexpected_token "unexpected ',' in flow sequence")
0483 | Token.Key ->
484 skip_token t;
485 t.state <- Flow_sequence_entry_mapping_key;
486- Event.Mapping_start {
487- anchor = None; tag = None;
488- implicit = true;
489- style = `Flow;
490- }, tok.span
491 | Token.Value ->
492 (* Implicit empty key mapping: [ : value ] *)
493 t.state <- Flow_sequence_entry_mapping_key;
494- Event.Mapping_start {
495- anchor = None; tag = None;
496- implicit = true;
497- style = `Flow;
498- }, tok.span
499 | _ ->
500 push_state t Flow_sequence_entry;
501 parse_node t ~block:false ~indentless:false
···503(** Parse flow sequence entry mapping *)
504let parse_flow_sequence_entry_mapping_key t =
505 let tok = current_token t in
506- if check t (function
507- | Token.Value | Token.Flow_entry | Token.Flow_sequence_end -> true
508- | _ -> false)
0509 then begin
510 t.state <- Flow_sequence_entry_mapping_value;
511 empty_scalar_event ~anchor:None ~tag:None tok.span
512- end else begin
0513 push_state t Flow_sequence_entry_mapping_value;
514 parse_node t ~block:false ~indentless:false
515 end
···519 match tok.token with
520 | Token.Value ->
521 skip_token t;
522- if check t (function
523- | Token.Flow_entry | Token.Flow_sequence_end -> true
524- | _ -> false)
0525 then begin
526 t.state <- Flow_sequence_entry_mapping_end;
527 empty_scalar_event ~anchor:None ~tag:None tok.span
528- end else begin
0529 push_state t Flow_sequence_entry_mapping_end;
530 parse_node t ~block:false ~indentless:false
531 end
···536let parse_flow_sequence_entry_mapping_end t =
537 let tok = current_token t in
538 t.state <- Flow_sequence_entry;
539- Event.Mapping_end, tok.span
540541(** Parse flow mapping *)
542let rec parse_flow_mapping_key t ~first =
···545 | Token.Flow_mapping_end ->
546 skip_token t;
547 t.state <- pop_state t;
548- Event.Mapping_end, tok.span
549 | Token.Flow_entry when not first ->
550 skip_token t;
551 parse_flow_mapping_key_internal t
552- | _ when first ->
553- parse_flow_mapping_key_internal t
554- | _ ->
555- Error.raise_span tok.span Expected_mapping_end
556557and parse_flow_mapping_key_internal t =
558 let tok = current_token t in
···561 (* Trailing comma case - don't emit empty scalar, just return to key state *)
562 skip_token t;
563 t.state <- pop_state t;
564- Event.Mapping_end, tok.span
565 | Token.Flow_entry ->
566 (* Double comma or comma after comma - invalid *)
567- Error.raise_span tok.span (Unexpected_token "unexpected ',' in flow mapping")
0568 | Token.Key ->
569 skip_token t;
570- if check t (function
571- | Token.Value | Token.Flow_entry | Token.Flow_mapping_end -> true
572- | _ -> false)
0573 then begin
574 t.state <- Flow_mapping_value;
575 empty_scalar_event ~anchor:None ~tag:None tok.span
576- end else begin
0577 push_state t Flow_mapping_value;
578 parse_node t ~block:false ~indentless:false
579 end
···586 if empty then begin
587 t.state <- Flow_mapping_key;
588 empty_scalar_event ~anchor:None ~tag:None tok.span
589- end else
0590 match tok.token with
591 | Token.Value ->
592 skip_token t;
593- if check t (function
594- | Token.Flow_entry | Token.Flow_mapping_end -> true
595- | _ -> false)
0596 then begin
597 t.state <- Flow_mapping_key;
598 empty_scalar_event ~anchor:None ~tag:None tok.span
599- end else begin
0600 push_state t Flow_mapping_key;
601 parse_node t ~block:false ~indentless:false
602 end
···607(** Main state machine dispatcher *)
608let rec parse t =
609 match t.state with
610- | Stream_start ->
611- parse_stream_start t
612-613- | Implicit_document_start ->
614 (* Skip any document end markers before checking what's next *)
615 while check t (function Token.Document_end -> true | _ -> false) do
616- t.explicit_doc_end <- true; (* Seeing ... counts as explicit end *)
0617 skip_token t
618 done;
619620 let tok = current_token t in
621- (match tok.token with
622- | Token.Stream_end ->
623- skip_token t;
624- t.state <- End;
625- t.finished <- true;
626- Event.Stream_end, tok.span
627- | Token.Version_directive _ | Token.Tag_directive _ ->
628- (* Directives are only allowed at stream start or after explicit ... (MUS6/01) *)
629- if not t.stream_start && not t.explicit_doc_end then
630- Error.raise_span tok.span (Invalid_directive "directives require explicit document end '...' before them");
631- parse_document_start t ~implicit:false
632- | Token.Document_start ->
633- parse_document_start t ~implicit:false
634- (* These tokens are invalid at document start - they indicate leftover junk *)
635- | Token.Flow_sequence_end | Token.Flow_mapping_end | Token.Flow_entry
636- | Token.Block_end | Token.Value ->
637- Error.raise_span tok.span (Unexpected_token "unexpected token at document start")
638- | _ ->
639- parse_document_start t ~implicit:true)
0640641 | Document_content ->
642- if check t (function
643- | Token.Version_directive _ | Token.Tag_directive _
644- | Token.Document_start | Token.Document_end | Token.Stream_end -> true
645- | _ -> false)
00646 then begin
647 let tok = current_token t in
648 t.state <- pop_state t;
649 empty_scalar_event ~anchor:None ~tag:None tok.span
650- end else begin
0651 (* Push Document_content_done so we return there after parsing the node.
652 This allows us to check for unexpected content after the node. *)
653 push_state t Document_content_done;
654 parse_node t ~block:true ~indentless:false
655 end
656-657 | Document_content_done ->
658 (* After parsing a node in document content, check for unexpected content *)
659- if check t (function
660- | Token.Version_directive _ | Token.Tag_directive _
661- | Token.Document_start | Token.Document_end | Token.Stream_end -> true
662- | _ -> false)
00663 then begin
664 (* Valid document boundary - continue to Document_end *)
665 t.state <- pop_state t;
666- parse t (* Continue to emit the next event *)
667- end else begin
0668 (* Unexpected content after document value - this is an error (KS4U, BS4K) *)
669 let tok = current_token t in
670 Error.raise_span tok.span
671 (Unexpected_token "content not allowed after document value")
672 end
673-674- | Document_end ->
675- parse_document_end t
676677 | Block_sequence_first_entry ->
678 t.state <- Block_sequence_entry;
679 parse_block_sequence_entry t
680-681- | Block_sequence_entry ->
682- parse_block_sequence_entry t
683-684- | Indentless_sequence_entry ->
685- parse_indentless_sequence_entry t
686-687 | Block_mapping_first_key ->
688 t.state <- Block_mapping_key;
689 parse_block_mapping_key t
690-691- | Block_mapping_key ->
692- parse_block_mapping_key t
693-694- | Block_mapping_value ->
695- parse_block_mapping_value t
696-697- | Flow_sequence_first_entry ->
698- parse_flow_sequence_entry t ~first:true
699-700- | Flow_sequence_entry ->
701- parse_flow_sequence_entry t ~first:false
702-703- | Flow_sequence_entry_mapping_key ->
704- parse_flow_sequence_entry_mapping_key t
705-706 | Flow_sequence_entry_mapping_value ->
707 parse_flow_sequence_entry_mapping_value t
708-709- | Flow_sequence_entry_mapping_end ->
710- parse_flow_sequence_entry_mapping_end t
711-712- | Flow_mapping_first_key ->
713- parse_flow_mapping_key t ~first:true
714-715- | Flow_mapping_key ->
716- parse_flow_mapping_key t ~first:false
717-718- | Flow_mapping_value ->
719- parse_flow_mapping_value t ~empty:false
720721 | End ->
722 let span = Span.point Position.initial in
723 t.finished <- true;
724- Event.Stream_end, span
725726(** Get next event *)
727let next t =
···735 let rec loop () =
736 match next t with
737 | None -> ()
738- | Some ev -> f ev; loop ()
00739 in
740 loop ()
741742(** Fold over all events *)
743let fold f init t =
744 let rec loop acc =
745- match next t with
746- | None -> acc
747- | Some ev -> loop (f acc ev)
748 in
749 loop init
750751(** Convert to list *)
752-let to_list t =
753- fold (fun acc ev -> ev :: acc) [] t |> List.rev
···10 | Stream_start
11 | Implicit_document_start
12 | Document_content
13+ | Document_content_done
14+ (* After parsing a node, check for unexpected content *)
15 | Document_end
16 | Block_sequence_first_entry
17 | Block_sequence_entry
···37 mutable tag_directives : (string * string) list;
38 mutable current_token : Token.spanned option;
39 mutable finished : bool;
40+ mutable explicit_doc_end : bool;
41+ (** True if last doc ended with explicit ... *)
42+ mutable stream_start : bool;
43+ (** True if we haven't emitted any documents yet *)
44}
4546+let create scanner =
47+ {
48+ scanner;
49+ state = Stream_start;
50+ states = [];
51+ version = None;
52+ tag_directives = [ ("!", "!"); ("!!", "tag:yaml.org,2002:") ];
53+ current_token = None;
54+ finished = false;
55+ explicit_doc_end = false;
56+ stream_start = true;
57+ }
005859let of_string s = create (Scanner.of_string s)
60let of_scanner = create
···65let current_token t =
66 match t.current_token with
67 | Some tok -> tok
68+ | None -> (
69 let tok = Scanner.next t.scanner in
70 t.current_token <- tok;
71+ match tok with Some tok -> tok | None -> Error.raise Unexpected_eof)
007273(** Peek at current token *)
74let peek_token t =
···79 t.current_token
8081(** Skip current token *)
82+let skip_token t = t.current_token <- None
08384(** Check if current token matches predicate *)
85let check t pred =
86+ match peek_token t with Some tok -> pred tok.token | None -> false
87+08889(** Push state onto stack *)
90+let push_state t s = t.states <- s :: t.states
09192(** Pop state from stack *)
93let pop_state t =
···111(** Process directives at document start *)
112let process_directives t =
113 t.version <- None;
114+ t.tag_directives <- [ ("!", "!"); ("!!", "tag:yaml.org,2002:") ];
115116+ while
117+ check t (function
118+ | Token.Version_directive _ | Token.Tag_directive _ -> true
119+ | _ -> false)
120 do
121 let tok = current_token t in
122 skip_token t;
123 match tok.token with
124 | Token.Version_directive { major; minor } ->
125 if t.version <> None then
126+ Error.raise_span tok.span
127+ (Invalid_yaml_version "duplicate YAML directive");
128 t.version <- Some (major, minor)
129 | Token.Tag_directive { handle; prefix } ->
130 (* Skip empty tag directives (these are reserved/unknown directives that were ignored) *)
131+ if handle = "" && prefix = "" then () (* Ignore reserved directives *)
0132 else begin
133+ if
134+ List.mem_assoc handle t.tag_directives
135+ && handle <> "!" && handle <> "!!"
136+ then
137+ Error.raise_span tok.span
138+ (Invalid_tag_directive ("duplicate tag handle: " ^ handle));
139 t.tag_directives <- (handle, prefix) :: t.tag_directives
140 end
141 | _ -> ()
···146 let anchor = ref None in
147 let tag = ref None in
148149+ while
150+ check t (function Token.Anchor _ | Token.Tag _ -> true | _ -> false)
0151 do
152 let tok = current_token t in
153 skip_token t;
···171172(** Empty scalar event *)
173let empty_scalar_event ~anchor ~tag span =
174+ ( Event.Scalar
175+ {
176+ anchor;
177+ tag;
178+ value = "";
179+ plain_implicit = tag = None;
180+ quoted_implicit = false;
181+ style = `Plain;
182+ },
183+ span )
184185(** Parse stream start *)
186let parse_stream_start t =
···189 match tok.token with
190 | Token.Stream_start encoding ->
191 t.state <- Implicit_document_start;
192+ (Event.Stream_start { encoding }, tok.span)
193+ | _ -> Error.raise_span tok.span (Unexpected_token "expected stream start")
0194195(** Parse document start (implicit or explicit) *)
196let parse_document_start t ~implicit =
···199 if not implicit then begin
200 let tok = current_token t in
201 match tok.token with
202+ | Token.Document_start -> skip_token t
203+ | _ -> Error.raise_span tok.span Expected_document_start
00204 end;
205206+ let span =
207+ match peek_token t with
208 | Some tok -> tok.span
209 | None -> Span.point Position.initial
210 in
···213 t.stream_start <- false;
214 push_state t Document_end;
215 t.state <- Document_content;
216+ (Event.Document_start { version = t.version; implicit }, span)
217218(** Parse document end *)
219let parse_document_end t =
220+ let implicit =
221+ not (check t (function Token.Document_end -> true | _ -> false))
222+ in
223+ let span =
224+ match peek_token t with
225 | Some tok -> tok.span
226 | None -> Span.point Position.initial
227 in
···231 (* Track if this document ended explicitly with ... *)
232 t.explicit_doc_end <- not implicit;
233 t.state <- Implicit_document_start;
234+ (Event.Document_end { implicit }, span)
235236(** Parse node in various contexts *)
237let parse_node t ~block ~indentless =
···240 | Token.Alias name ->
241 skip_token t;
242 t.state <- pop_state t;
243+ (Event.Alias { anchor = name }, tok.span)
244+ | Token.Anchor _ | Token.Tag _ -> (
0245 let anchor, tag = parse_properties t in
246 let tok = current_token t in
247+ match tok.token with
248+ | Token.Block_entry when indentless ->
249+ t.state <- Indentless_sequence_entry;
250+ ( Event.Sequence_start
251+ { anchor; tag; implicit = tag = None; style = `Block },
252+ tok.span )
253+ | Token.Block_sequence_start when block ->
254+ t.state <- Block_sequence_first_entry;
255+ skip_token t;
256+ ( Event.Sequence_start
257+ { anchor; tag; implicit = tag = None; style = `Block },
258+ tok.span )
259+ | Token.Block_mapping_start when block ->
260+ t.state <- Block_mapping_first_key;
261+ skip_token t;
262+ ( Event.Mapping_start
263+ { anchor; tag; implicit = tag = None; style = `Block },
264+ tok.span )
265+ | Token.Flow_sequence_start ->
266+ t.state <- Flow_sequence_first_entry;
267+ skip_token t;
268+ ( Event.Sequence_start
269+ { anchor; tag; implicit = tag = None; style = `Flow },
270+ tok.span )
271+ | Token.Flow_mapping_start ->
272+ t.state <- Flow_mapping_first_key;
273+ skip_token t;
274+ ( Event.Mapping_start
275+ { anchor; tag; implicit = tag = None; style = `Flow },
276+ tok.span )
277+ | Token.Scalar { style; value } ->
278+ skip_token t;
279+ t.state <- pop_state t;
280+ let plain_implicit = tag = None && style = `Plain in
281+ let quoted_implicit = tag = None && style <> `Plain in
282+ ( Event.Scalar
283+ { anchor; tag; value; plain_implicit; quoted_implicit; style },
284+ tok.span )
285+ | _ ->
286+ (* Empty node *)
287+ t.state <- pop_state t;
288+ empty_scalar_event ~anchor ~tag tok.span)
000000000000000000289 | Token.Block_sequence_start when block ->
290 t.state <- Block_sequence_first_entry;
291 skip_token t;
292+ ( Event.Sequence_start
293+ { anchor = None; tag = None; implicit = true; style = `Block },
294+ tok.span )
000295 | Token.Block_mapping_start when block ->
296 t.state <- Block_mapping_first_key;
297 skip_token t;
298+ ( Event.Mapping_start
299+ { anchor = None; tag = None; implicit = true; style = `Block },
300+ tok.span )
000301 | Token.Flow_sequence_start ->
302 t.state <- Flow_sequence_first_entry;
303 skip_token t;
304+ ( Event.Sequence_start
305+ { anchor = None; tag = None; implicit = true; style = `Flow },
306+ tok.span )
000307 | Token.Flow_mapping_start ->
308 t.state <- Flow_mapping_first_key;
309 skip_token t;
310+ ( Event.Mapping_start
311+ { anchor = None; tag = None; implicit = true; style = `Flow },
312+ tok.span )
000313 | Token.Block_entry when indentless ->
314 t.state <- Indentless_sequence_entry;
315+ ( Event.Sequence_start
316+ { anchor = None; tag = None; implicit = true; style = `Block },
317+ tok.span )
000318 | Token.Scalar { style; value } ->
319 skip_token t;
320 t.state <- pop_state t;
321 let plain_implicit = style = `Plain in
322 let quoted_implicit = style <> `Plain in
323+ ( Event.Scalar
324+ {
325+ anchor = None;
326+ tag = None;
327+ value;
328+ plain_implicit;
329+ quoted_implicit;
330+ style;
331+ },
332+ tok.span )
333 | _ ->
334 (* Empty node *)
335 t.state <- pop_state t;
···341 match tok.token with
342 | Token.Block_entry ->
343 skip_token t;
344+ if
345+ check t (function
346+ | Token.Block_entry | Token.Block_end -> true
347+ | _ -> false)
348 then begin
349 t.state <- Block_sequence_entry;
350 empty_scalar_event ~anchor:None ~tag:None tok.span
351+ end
352+ else begin
353 push_state t Block_sequence_entry;
354 parse_node t ~block:true ~indentless:false
355 end
356 | Token.Block_end ->
357 skip_token t;
358 t.state <- pop_state t;
359+ (Event.Sequence_end, tok.span)
360+ | _ -> Error.raise_span tok.span Expected_block_entry
0361362(** Parse block mapping key *)
363let parse_block_mapping_key t =
···365 match tok.token with
366 | Token.Key ->
367 skip_token t;
368+ if
369+ check t (function
370+ | Token.Key | Token.Value | Token.Block_end -> true
371+ | _ -> false)
372 then begin
373 t.state <- Block_mapping_value;
374 empty_scalar_event ~anchor:None ~tag:None tok.span
375+ end
376+ else begin
377 push_state t Block_mapping_value;
378 parse_node t ~block:true ~indentless:true
379 end
···384 | Token.Block_end ->
385 skip_token t;
386 t.state <- pop_state t;
387+ (Event.Mapping_end, tok.span)
388+ | _ -> Error.raise_span tok.span Expected_key
0389390(** Parse block mapping value *)
391let parse_block_mapping_value t =
···393 match tok.token with
394 | Token.Value ->
395 skip_token t;
396+ if
397+ check t (function
398+ | Token.Key | Token.Value | Token.Block_end -> true
399+ | _ -> false)
400 then begin
401 t.state <- Block_mapping_key;
402 empty_scalar_event ~anchor:None ~tag:None tok.span
403+ end
404+ else begin
405 push_state t Block_mapping_key;
406 parse_node t ~block:true ~indentless:true
407 end
···416 match tok.token with
417 | Token.Block_entry ->
418 skip_token t;
419+ if
420+ check t (function
421+ | Token.Block_entry | Token.Key | Token.Value | Token.Block_end ->
422+ true
423+ | _ -> false)
424 then begin
425 t.state <- Indentless_sequence_entry;
426 empty_scalar_event ~anchor:None ~tag:None tok.span
427+ end
428+ else begin
429 push_state t Indentless_sequence_entry;
430 parse_node t ~block:true ~indentless:false
431 end
432 | _ ->
433 t.state <- pop_state t;
434+ (Event.Sequence_end, tok.span)
435436(** Parse flow sequence *)
437let rec parse_flow_sequence_entry t ~first =
···440 | Token.Flow_sequence_end ->
441 skip_token t;
442 t.state <- pop_state t;
443+ (Event.Sequence_end, tok.span)
444 | Token.Flow_entry when not first ->
445 skip_token t;
446 parse_flow_sequence_entry_internal t
447+ | _ when first -> parse_flow_sequence_entry_internal t
448+ | _ -> Error.raise_span tok.span Expected_sequence_end
00449450and parse_flow_sequence_entry_internal t =
451 let tok = current_token t in
···454 (* Trailing comma case - don't emit empty scalar, just go back to sequence entry state *)
455 skip_token t;
456 t.state <- pop_state t;
457+ (Event.Sequence_end, tok.span)
458 | Token.Flow_entry ->
459 (* Double comma or comma after comma - invalid *)
460+ Error.raise_span tok.span
461+ (Unexpected_token "unexpected ',' in flow sequence")
462 | Token.Key ->
463 skip_token t;
464 t.state <- Flow_sequence_entry_mapping_key;
465+ ( Event.Mapping_start
466+ { anchor = None; tag = None; implicit = true; style = `Flow },
467+ tok.span )
00468 | Token.Value ->
469 (* Implicit empty key mapping: [ : value ] *)
470 t.state <- Flow_sequence_entry_mapping_key;
471+ ( Event.Mapping_start
472+ { anchor = None; tag = None; implicit = true; style = `Flow },
473+ tok.span )
00474 | _ ->
475 push_state t Flow_sequence_entry;
476 parse_node t ~block:false ~indentless:false
···478(** Parse flow sequence entry mapping *)
479let parse_flow_sequence_entry_mapping_key t =
480 let tok = current_token t in
481+ if
482+ check t (function
483+ | Token.Value | Token.Flow_entry | Token.Flow_sequence_end -> true
484+ | _ -> false)
485 then begin
486 t.state <- Flow_sequence_entry_mapping_value;
487 empty_scalar_event ~anchor:None ~tag:None tok.span
488+ end
489+ else begin
490 push_state t Flow_sequence_entry_mapping_value;
491 parse_node t ~block:false ~indentless:false
492 end
···496 match tok.token with
497 | Token.Value ->
498 skip_token t;
499+ if
500+ check t (function
501+ | Token.Flow_entry | Token.Flow_sequence_end -> true
502+ | _ -> false)
503 then begin
504 t.state <- Flow_sequence_entry_mapping_end;
505 empty_scalar_event ~anchor:None ~tag:None tok.span
506+ end
507+ else begin
508 push_state t Flow_sequence_entry_mapping_end;
509 parse_node t ~block:false ~indentless:false
510 end
···515let parse_flow_sequence_entry_mapping_end t =
516 let tok = current_token t in
517 t.state <- Flow_sequence_entry;
518+ (Event.Mapping_end, tok.span)
519520(** Parse flow mapping *)
521let rec parse_flow_mapping_key t ~first =
···524 | Token.Flow_mapping_end ->
525 skip_token t;
526 t.state <- pop_state t;
527+ (Event.Mapping_end, tok.span)
528 | Token.Flow_entry when not first ->
529 skip_token t;
530 parse_flow_mapping_key_internal t
531+ | _ when first -> parse_flow_mapping_key_internal t
532+ | _ -> Error.raise_span tok.span Expected_mapping_end
00533534and parse_flow_mapping_key_internal t =
535 let tok = current_token t in
···538 (* Trailing comma case - don't emit empty scalar, just return to key state *)
539 skip_token t;
540 t.state <- pop_state t;
541+ (Event.Mapping_end, tok.span)
542 | Token.Flow_entry ->
543 (* Double comma or comma after comma - invalid *)
544+ Error.raise_span tok.span
545+ (Unexpected_token "unexpected ',' in flow mapping")
546 | Token.Key ->
547 skip_token t;
548+ if
549+ check t (function
550+ | Token.Value | Token.Flow_entry | Token.Flow_mapping_end -> true
551+ | _ -> false)
552 then begin
553 t.state <- Flow_mapping_value;
554 empty_scalar_event ~anchor:None ~tag:None tok.span
555+ end
556+ else begin
557 push_state t Flow_mapping_value;
558 parse_node t ~block:false ~indentless:false
559 end
···566 if empty then begin
567 t.state <- Flow_mapping_key;
568 empty_scalar_event ~anchor:None ~tag:None tok.span
569+ end
570+ else
571 match tok.token with
572 | Token.Value ->
573 skip_token t;
574+ if
575+ check t (function
576+ | Token.Flow_entry | Token.Flow_mapping_end -> true
577+ | _ -> false)
578 then begin
579 t.state <- Flow_mapping_key;
580 empty_scalar_event ~anchor:None ~tag:None tok.span
581+ end
582+ else begin
583 push_state t Flow_mapping_key;
584 parse_node t ~block:false ~indentless:false
585 end
···590(** Main state machine dispatcher *)
591let rec parse t =
592 match t.state with
593+ | Stream_start -> parse_stream_start t
594+ | Implicit_document_start -> (
00595 (* Skip any document end markers before checking what's next *)
596 while check t (function Token.Document_end -> true | _ -> false) do
597+ t.explicit_doc_end <- true;
598+ (* Seeing ... counts as explicit end *)
599 skip_token t
600 done;
601602 let tok = current_token t in
603+ match tok.token with
604+ | Token.Stream_end ->
605+ skip_token t;
606+ t.state <- End;
607+ t.finished <- true;
608+ (Event.Stream_end, tok.span)
609+ | Token.Version_directive _ | Token.Tag_directive _ ->
610+ (* Directives are only allowed at stream start or after explicit ... (MUS6/01) *)
611+ if (not t.stream_start) && not t.explicit_doc_end then
612+ Error.raise_span tok.span
613+ (Invalid_directive
614+ "directives require explicit document end '...' before them");
615+ parse_document_start t ~implicit:false
616+ | Token.Document_start -> parse_document_start t ~implicit:false
617+ (* These tokens are invalid at document start - they indicate leftover junk *)
618+ | Token.Flow_sequence_end | Token.Flow_mapping_end | Token.Flow_entry
619+ | Token.Block_end | Token.Value ->
620+ Error.raise_span tok.span
621+ (Unexpected_token "unexpected token at document start")
622+ | _ -> parse_document_start t ~implicit:true)
623624 | Document_content ->
625+ if
626+ check t (function
627+ | Token.Version_directive _ | Token.Tag_directive _
628+ | Token.Document_start | Token.Document_end | Token.Stream_end ->
629+ true
630+ | _ -> false)
631 then begin
632 let tok = current_token t in
633 t.state <- pop_state t;
634 empty_scalar_event ~anchor:None ~tag:None tok.span
635+ end
636+ else begin
637 (* Push Document_content_done so we return there after parsing the node.
638 This allows us to check for unexpected content after the node. *)
639 push_state t Document_content_done;
640 parse_node t ~block:true ~indentless:false
641 end
0642 | Document_content_done ->
643 (* After parsing a node in document content, check for unexpected content *)
644+ if
645+ check t (function
646+ | Token.Version_directive _ | Token.Tag_directive _
647+ | Token.Document_start | Token.Document_end | Token.Stream_end ->
648+ true
649+ | _ -> false)
650 then begin
651 (* Valid document boundary - continue to Document_end *)
652 t.state <- pop_state t;
653+ parse t (* Continue to emit the next event *)
654+ end
655+ else begin
656 (* Unexpected content after document value - this is an error (KS4U, BS4K) *)
657 let tok = current_token t in
658 Error.raise_span tok.span
659 (Unexpected_token "content not allowed after document value")
660 end
661+ | Document_end -> parse_document_end t
00662663 | Block_sequence_first_entry ->
664 t.state <- Block_sequence_entry;
665 parse_block_sequence_entry t
666+ | Block_sequence_entry -> parse_block_sequence_entry t
667+ | Indentless_sequence_entry -> parse_indentless_sequence_entry t
00000668 | Block_mapping_first_key ->
669 t.state <- Block_mapping_key;
670 parse_block_mapping_key t
671+ | Block_mapping_key -> parse_block_mapping_key t
672+ | Block_mapping_value -> parse_block_mapping_value t
673+ | Flow_sequence_first_entry -> parse_flow_sequence_entry t ~first:true
674+ | Flow_sequence_entry -> parse_flow_sequence_entry t ~first:false
675+ | Flow_sequence_entry_mapping_key -> parse_flow_sequence_entry_mapping_key t
00000000000676 | Flow_sequence_entry_mapping_value ->
677 parse_flow_sequence_entry_mapping_value t
678+ | Flow_sequence_entry_mapping_end -> parse_flow_sequence_entry_mapping_end t
679+ | Flow_mapping_first_key -> parse_flow_mapping_key t ~first:true
680+ | Flow_mapping_key -> parse_flow_mapping_key t ~first:false
681+ | Flow_mapping_value -> parse_flow_mapping_value t ~empty:false
00000000682683 | End ->
684 let span = Span.point Position.initial in
685 t.finished <- true;
686+ (Event.Stream_end, span)
687688(** Get next event *)
689let next t =
···697 let rec loop () =
698 match next t with
699 | None -> ()
700+ | Some ev ->
701+ f ev;
702+ loop ()
703 in
704 loop ()
705706(** Fold over all events *)
707let fold f init t =
708 let rec loop acc =
709+ match next t with None -> acc | Some ev -> loop (f acc ev)
00710 in
711 loop init
712713(** Convert to list *)
714+let to_list t = fold (fun acc ev -> ev :: acc) [] t |> List.rev
0
+11-28
lib/position.ml
···6(** Position tracking for source locations *)
78type t = {
9- index : int; (** Byte offset from start *)
10- line : int; (** 1-indexed line number *)
11 column : int; (** 1-indexed column number *)
12}
1314let initial = { index = 0; line = 1; column = 1 }
15-16-let advance_byte t =
17- { t with index = t.index + 1; column = t.column + 1 }
18-19-let advance_line t =
20- { index = t.index + 1; line = t.line + 1; column = 1 }
21-22-let advance_char c t =
23- if c = '\n' then advance_line t
24- else advance_byte t
2526let advance_utf8 uchar t =
27 let len = Uchar.utf_8_byte_length uchar in
28 let code = Uchar.to_int uchar in
29 if code = 0x0A (* LF *) then
30 { index = t.index + len; line = t.line + 1; column = 1 }
31- else
32- { t with index = t.index + len; column = t.column + 1 }
3334-let advance_bytes n t =
35- { t with index = t.index + n; column = t.column + n }
36-37-let pp fmt t =
38- Format.fprintf fmt "line %d, column %d" t.line t.column
39-40-let to_string t =
41- Format.asprintf "%a" pp t
42-43-let compare a b =
44- Int.compare a.index b.index
45-46-let equal a b =
47- a.index = b.index
···6(** Position tracking for source locations *)
78type t = {
9+ index : int; (** Byte offset from start *)
10+ line : int; (** 1-indexed line number *)
11 column : int; (** 1-indexed column number *)
12}
1314let initial = { index = 0; line = 1; column = 1 }
15+let advance_byte t = { t with index = t.index + 1; column = t.column + 1 }
16+let advance_line t = { index = t.index + 1; line = t.line + 1; column = 1 }
17+let advance_char c t = if c = '\n' then advance_line t else advance_byte t
00000001819let advance_utf8 uchar t =
20 let len = Uchar.utf_8_byte_length uchar in
21 let code = Uchar.to_int uchar in
22 if code = 0x0A (* LF *) then
23 { index = t.index + len; line = t.line + 1; column = 1 }
24+ else { t with index = t.index + len; column = t.column + 1 }
02526+let advance_bytes n t = { t with index = t.index + n; column = t.column + n }
27+let pp fmt t = Format.fprintf fmt "line %d, column %d" t.line t.column
28+let to_string t = Format.asprintf "%a" pp t
29+let compare a b = Int.compare a.index b.index
30+let equal a b = a.index = b.index
000000000
+31-27
lib/quoting.ml
···56(** YAML scalar quoting detection *)
78-(** Check if a string value needs quoting in YAML output.
9- Returns true if the string:
10 - Is empty
11 - Starts with an indicator character
12 - Is a reserved word (null, true, false, yes, no, etc.)
···17 else
18 let first = s.[0] in
19 (* Check first character for indicators *)
20- if first = '-' || first = '?' || first = ':' || first = ',' ||
21- first = '[' || first = ']' || first = '{' || first = '}' ||
22- first = '#' || first = '&' || first = '*' || first = '!' ||
23- first = '|' || first = '>' || first = '\'' || first = '"' ||
24- first = '%' || first = '@' || first = '`' || first = ' ' then
25- true
026 else
27 (* Check for reserved/special values *)
28 let lower = String.lowercase_ascii s in
29- if lower = "null" || lower = "true" || lower = "false" ||
30- lower = "yes" || lower = "no" || lower = "on" || lower = "off" ||
31- lower = "~" || lower = ".inf" || lower = "-.inf" || lower = ".nan" then
32- true
033 else
34 (* Check for problematic characters *)
35 try
36- String.iter (fun c ->
37- if c = ':' || c = '#' || c = '\n' || c = '\r' then
38- raise Exit
39- ) s;
40 (* Check if it looks like a number *)
41- (try ignore (Float.of_string s); true with _ -> false)
00042 with Exit -> true
4344-(** Check if a string requires double quotes (vs single quotes).
45- Returns true if the string contains characters that need escape sequences. *)
46let needs_double_quotes s =
47 try
48- String.iter (fun c ->
49- if c = '\n' || c = '\r' || c = '\t' || c = '\\' ||
50- c < ' ' || c = '"' then
51- raise Exit
52- ) s;
53 false
54 with Exit -> true
5556(** Choose the appropriate quoting style for a string value *)
57let choose_style s =
58 match (needs_double_quotes s, needs_quoting s) with
59- | (true, _) -> `Double_quoted
60- | (_, true) -> `Single_quoted
61 | _ -> `Plain
62-
···56(** YAML scalar quoting detection *)
78+(** Check if a string value needs quoting in YAML output. Returns true if the
9+ string:
10 - Is empty
11 - Starts with an indicator character
12 - Is a reserved word (null, true, false, yes, no, etc.)
···17 else
18 let first = s.[0] in
19 (* Check first character for indicators *)
20+ if
21+ first = '-' || first = '?' || first = ':' || first = ',' || first = '['
22+ || first = ']' || first = '{' || first = '}' || first = '#' || first = '&'
23+ || first = '*' || first = '!' || first = '|' || first = '>'
24+ || first = '\'' || first = '"' || first = '%' || first = '@'
25+ || first = '`' || first = ' '
26+ then true
27 else
28 (* Check for reserved/special values *)
29 let lower = String.lowercase_ascii s in
30+ if
31+ lower = "null" || lower = "true" || lower = "false" || lower = "yes"
32+ || lower = "no" || lower = "on" || lower = "off" || lower = "~"
33+ || lower = ".inf" || lower = "-.inf" || lower = ".nan"
34+ then true
35 else
36 (* Check for problematic characters *)
37 try
38+ String.iter
39+ (fun c ->
40+ if c = ':' || c = '#' || c = '\n' || c = '\r' then raise Exit)
41+ s;
42 (* Check if it looks like a number *)
43+ try
44+ ignore (Float.of_string s);
45+ true
46+ with _ -> false
47 with Exit -> true
4849+(** Check if a string requires double quotes (vs single quotes). Returns true if
50+ the string contains characters that need escape sequences. *)
51let needs_double_quotes s =
52 try
53+ String.iter
54+ (fun c ->
55+ if c = '\n' || c = '\r' || c = '\t' || c = '\\' || c < ' ' || c = '"'
56+ then raise Exit)
57+ s;
58 false
59 with Exit -> true
6061(** Choose the appropriate quoting style for a string value *)
62let choose_style s =
63 match (needs_double_quotes s, needs_quoting s) with
64+ | true, _ -> `Double_quoted
65+ | _, true -> `Single_quoted
66 | _ -> `Plain
0
+22-24
lib/scalar.ml
···14 style : Scalar_style.t;
15}
1617-let make
18- ?(anchor : string option)
19- ?(tag : string option)
20- ?(plain_implicit = true)
21- ?(quoted_implicit = false)
22- ?(style = `Plain)
23- value =
24 { anchor; tag; value; plain_implicit; quoted_implicit; style }
2526let value t = t.value
···29let style t = t.style
30let plain_implicit t = t.plain_implicit
31let quoted_implicit t = t.quoted_implicit
32-33let with_anchor anchor t = { t with anchor = Some anchor }
34let with_tag tag t = { t with tag = Some tag }
35let with_style style t = { t with style }
···41 Format.fprintf fmt ", style=%a)" Scalar_style.pp t.style
4243let equal a b =
44- Option.equal String.equal a.anchor b.anchor &&
45- Option.equal String.equal a.tag b.tag &&
46- String.equal a.value b.value &&
47- a.plain_implicit = b.plain_implicit &&
48- a.quoted_implicit = b.quoted_implicit &&
49- Scalar_style.equal a.style b.style
5051let compare a b =
52 let c = Option.compare String.compare a.anchor b.anchor in
53- if c <> 0 then c else
54- let c = Option.compare String.compare a.tag b.tag in
55- if c <> 0 then c else
56- let c = String.compare a.value b.value in
57- if c <> 0 then c else
58- let c = Bool.compare a.plain_implicit b.plain_implicit in
59- if c <> 0 then c else
60- let c = Bool.compare a.quoted_implicit b.quoted_implicit in
61- if c <> 0 then c else
62- Scalar_style.compare a.style b.style
000
···14 style : Scalar_style.t;
15}
1617+let make ?(anchor : string option) ?(tag : string option)
18+ ?(plain_implicit = true) ?(quoted_implicit = false) ?(style = `Plain) value
19+ =
000020 { anchor; tag; value; plain_implicit; quoted_implicit; style }
2122let value t = t.value
···25let style t = t.style
26let plain_implicit t = t.plain_implicit
27let quoted_implicit t = t.quoted_implicit
028let with_anchor anchor t = { t with anchor = Some anchor }
29let with_tag tag t = { t with tag = Some tag }
30let with_style style t = { t with style }
···36 Format.fprintf fmt ", style=%a)" Scalar_style.pp t.style
3738let equal a b =
39+ Option.equal String.equal a.anchor b.anchor
40+ && Option.equal String.equal a.tag b.tag
41+ && String.equal a.value b.value
42+ && a.plain_implicit = b.plain_implicit
43+ && a.quoted_implicit = b.quoted_implicit
44+ && Scalar_style.equal a.style b.style
4546let compare a b =
47 let c = Option.compare String.compare a.anchor b.anchor in
48+ if c <> 0 then c
49+ else
50+ let c = Option.compare String.compare a.tag b.tag in
51+ if c <> 0 then c
52+ else
53+ let c = String.compare a.value b.value in
54+ if c <> 0 then c
55+ else
56+ let c = Bool.compare a.plain_implicit b.plain_implicit in
57+ if c <> 0 then c
58+ else
59+ let c = Bool.compare a.quoted_implicit b.quoted_implicit in
60+ if c <> 0 then c else Scalar_style.compare a.style b.style
+8-11
lib/scalar_style.ml
···56(** Scalar formatting styles *)
78-type t = [
9- | `Any (** Let emitter choose *)
10- | `Plain (** Unquoted: foo *)
11- | `Single_quoted (** 'foo' *)
12- | `Double_quoted (** "foo" *)
13- | `Literal (** | block *)
14- | `Folded (** > block *)
15-]
1617let to_string = function
18 | `Any -> "any"
···22 | `Literal -> "literal"
23 | `Folded -> "folded"
2425-let pp fmt t =
26- Format.pp_print_string fmt (to_string t)
27-28let equal a b = a = b
2930let compare a b =
···56(** Scalar formatting styles *)
78+type t =
9+ [ `Any (** Let emitter choose *)
10+ | `Plain (** Unquoted: foo *)
11+ | `Single_quoted (** 'foo' *)
12+ | `Double_quoted (** "foo" *)
13+ | `Literal (** | block *)
14+ | `Folded (** > block *) ]
01516let to_string = function
17 | `Any -> "any"
···21 | `Literal -> "literal"
22 | `Folded -> "folded"
2324+let pp fmt t = Format.pp_print_string fmt (to_string t)
0025let equal a b = a = b
2627let compare a b =
+536-399
lib/scanner.ml
···56(** YAML tokenizer/scanner with lookahead for ambiguity resolution *)
78-(** Simple key tracking for mapping key disambiguation *)
9type simple_key = {
10 sk_possible : bool;
11 sk_required : bool;
12 sk_token_number : int;
13 sk_position : Position.t;
14}
01516-(** Indent level tracking *)
17type indent = {
18 indent : int;
19 needs_block_end : bool;
20}
02122type t = {
23 input : Input.t;
···27 mutable stream_started : bool;
28 mutable stream_ended : bool;
29 mutable indent_stack : indent list; (** Stack of indentation levels *)
30- mutable flow_level : int; (** Nesting depth in \[\] or \{\} *)
31- mutable flow_indent : int; (** Column where outermost flow collection started *)
32- mutable simple_keys : simple_key option list; (** Per flow-level simple key tracking *)
0033 mutable allow_simple_key : bool;
34- mutable leading_whitespace : bool; (** True when at start of line (only whitespace seen) *)
35- mutable document_has_content : bool; (** True if we've emitted content tokens in current document *)
36- mutable adjacent_value_allowed_at : Position.t option; (** Position where adjacent : is allowed *)
37- mutable flow_mapping_stack : bool list; (** Stack of whether each flow level is a mapping *)
000038}
3940let create input =
···48 indent_stack = [];
49 flow_level = 0;
50 flow_indent = 0;
51- simple_keys = [None]; (* One entry for the base level *)
052 allow_simple_key = true;
53- leading_whitespace = true; (* Start at beginning of stream *)
054 document_has_content = false;
55 adjacent_value_allowed_at = None;
56 flow_mapping_stack = [];
···59let of_string s = create (Input.of_string s)
60let of_input = create
61let of_reader r = create (Input.of_reader r)
62-63let position t = Input.position t.input
6465(** Add a token to the queue *)
···7273(** Get current indent level *)
74let current_indent t =
75- match t.indent_stack with
76- | [] -> -1
77- | { indent; _ } :: _ -> indent
7879-(** Skip whitespace to end of line, checking for valid comments.
80- Returns true if any whitespace (including tabs) was found before a comment. *)
81let skip_whitespace_and_comment t =
82 let has_whitespace = ref false in
83 (* Skip blanks (spaces and tabs) *)
···98 Error.raise_at (Input.mark t.input) Invalid_comment
99 end;
100 (* Skip to end of line *)
101- while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
102 ignore (Input.next t.input)
103 done
104 end
···109 let found_space = ref false in
110 while Input.next_is_blank t.input do
111 (match Input.peek t.input with
112- | Some '\t' -> found_tab := true
113- | Some ' ' -> found_space := true
114- | _ -> ());
115 ignore (Input.next t.input)
116 done;
117 (!found_tab, !found_space)
···120let rec skip_to_next_token t =
121 (* Check for tabs used as indentation in block context *)
122 (match Input.peek t.input with
123- | Some '\t' when t.flow_level = 0 && t.leading_whitespace &&
124- (column t - 1) < current_indent t ->
125- (* Tab found in indentation zone - this is invalid *)
126- (* Skip to end of line to check if line has content *)
127- let start_pos = Input.mark t.input in
128- while Input.next_is_blank t.input do
129- ignore (Input.next t.input)
130- done;
131- (* If we have content on this line with a tab, raise error *)
132- if not (Input.next_is_break t.input) && not (Input.is_eof t.input) then
133- Error.raise_at start_pos Tab_in_indentation
134- | _ -> ());
0135136 (* Skip blanks and validate comments *)
137 skip_whitespace_and_comment t;
···158 ignore (Input.next t.input)
159 done;
160 (* If only tabs were used (no spaces) and column < flow_indent, error *)
161- if not (Input.next_is_break t.input) && not (Input.is_eof t.input) &&
162- column t < t.flow_indent then
163- Error.raise_at start_mark Invalid_flow_indentation
00164 end;
165 skip_to_next_token t
166- end else begin
0167 ignore (Input.next t.input);
168 skip_to_next_token t
169 end
···174 if t.flow_level = 0 && col > current_indent t then begin
175 t.indent_stack <- { indent = col; needs_block_end = true } :: t.indent_stack;
176 true
177- end else
178- false
179180(** Unroll indentation to given column *)
181let unroll_indent t col =
182- while t.flow_level = 0 &&
183- match t.indent_stack with
184- | { indent; needs_block_end = true; _ } :: _ when indent > col -> true
185- | _ -> false
00186 do
187 match t.indent_stack with
188 | { indent = _; needs_block_end = true; _ } :: rest ->
···199 (* A simple key is required only if we're in a block context,
200 at the current indentation level, AND the current indent needs a block end.
201 This matches saphyr's logic and prevents false positives for values. *)
202- let required = t.flow_level = 0 &&
203- match t.indent_stack with
204- | { indent; needs_block_end = true; _ } :: _ ->
205- indent = column t
206- | _ -> false
0207 in
208- let sk = {
209- sk_possible = true;
210- sk_required = required;
211- sk_token_number = t.token_number;
212- sk_position = Input.position t.input;
213- } in
00214 (* Remove any existing simple key at current level *)
215- t.simple_keys <- (
216- match t.simple_keys with
217 | _ :: rest -> Some sk :: rest
218- | [] -> [Some sk]
219- )
220 end
221222(** Remove simple key at current level *)
···229230(** Stale simple keys that span too many tokens *)
231let stale_simple_keys t =
232- t.simple_keys <- List.map (fun sk_opt ->
233- match sk_opt with
234- | Some sk when sk.sk_possible &&
235- (Input.position t.input).line > sk.sk_position.line &&
236- t.flow_level = 0 ->
237- if sk.sk_required then
238- Error.raise_at sk.sk_position Expected_key;
239- None
240- | _ -> sk_opt
241- ) t.simple_keys
00242243(** Read anchor or alias name *)
244let scan_anchor_alias t =
···251 This matches the saphyr implementation: is_yaml_non_space && !is_flow *)
252 while
253 match Input.peek t.input with
254- | Some c when not (Input.is_whitespace c) &&
255- not (Input.is_flow_indicator c) &&
256- c <> '\x00' ->
0257 Buffer.add_char buf c;
258 ignore (Input.next t.input);
259 true
260 | _ -> false
261- do () done;
00262 let name = Buffer.contents buf in
263 if String.length name = 0 then
264 Error.raise_at start (Invalid_anchor "empty anchor name");
···270 let buf = Buffer.create 16 in
271 (* Expect ! *)
272 (match Input.peek t.input with
273- | Some '!' ->
274- Buffer.add_char buf '!';
275- ignore (Input.next t.input)
276- | _ -> Error.raise_at start (Invalid_tag "expected '!'"));
277 (* Read word chars *)
278 while
279 match Input.peek t.input with
···282 ignore (Input.next t.input);
283 true
284 | _ -> false
285- do () done;
00286 (* Check for secondary ! *)
287 (match Input.peek t.input with
288- | Some '!' ->
289- Buffer.add_char buf '!';
290- ignore (Input.next t.input)
291- | _ -> ());
292 Buffer.contents buf
293294(** Scan tag suffix (after handle) *)
···298 in
299 let hex_val c =
300 match c with
301- | '0'..'9' -> Char.code c - Char.code '0'
302- | 'A'..'F' -> Char.code c - Char.code 'A' + 10
303- | 'a'..'f' -> Char.code c - Char.code 'a' + 10
304 | _ -> 0
305 in
306 let buf = Buffer.create 32 in
307 while
308 match Input.peek t.input with
309- | Some '%' ->
310 (* Percent-encoded character *)
311 ignore (Input.next t.input);
312- (match Input.peek t.input, Input.peek_nth t.input 1 with
313- | Some c1, Some c2 when is_hex_digit c1 && is_hex_digit c2 ->
314- ignore (Input.next t.input);
315- ignore (Input.next t.input);
316- let code = (hex_val c1) * 16 + (hex_val c2) in
317- Buffer.add_char buf (Char.chr code);
318- true
319- | _ ->
320- (* Invalid percent encoding - keep the % *)
321- Buffer.add_char buf '%';
322- true)
323- | Some c when not (Input.is_whitespace c) &&
324- not (Input.is_flow_indicator c) ->
325 Buffer.add_char buf c;
326 ignore (Input.next t.input);
327 true
328 | _ -> false
329- do () done;
00330 Buffer.contents buf
331332(** Scan a tag *)
333let scan_tag t =
334 let start = Input.mark t.input in
335- ignore (Input.next t.input); (* consume ! *)
0336 let handle, suffix =
337 match Input.peek t.input with
338 | Some '<' ->
···346 Buffer.add_char buf c;
347 ignore (Input.next t.input);
348 true
349- | None -> Error.raise_at (Input.mark t.input) (Invalid_tag "unclosed verbatim tag")
350- do () done;
351- ignore (Input.next t.input); (* consume > *)
00000352 ("", Buffer.contents buf)
353 | Some c when Input.is_whitespace c || Input.is_flow_indicator c ->
354 (* Non-specific tag: ! *)
355 ("!", "")
356 | Some '!' ->
357 (* Secondary handle: !! *)
358- ignore (Input.next t.input); (* consume second ! *)
0359 let suffix = scan_tag_suffix t in
360 ("!!", suffix)
361- | _ ->
362 (* Primary handle or just suffix: !foo or !e!foo *)
363 (* Read alphanumeric characters *)
364 let buf = Buffer.create 16 in
···369 ignore (Input.next t.input);
370 true
371 | _ -> false
372- do () done;
00373 (* Check if next character is ! - if so, this is a named handle *)
374- (match Input.peek t.input with
375- | Some '!' ->
376- (* Named handle like !e! *)
377- ignore (Input.next t.input);
378- let handle_name = Buffer.contents buf in
379- let suffix = scan_tag_suffix t in
380- ("!" ^ handle_name ^ "!", suffix)
381- | _ ->
382- (* Just ! followed by suffix *)
383- ("!", Buffer.contents buf ^ scan_tag_suffix t))
384 in
385 (* Validate that tag is followed by whitespace, break, or (in flow) flow indicator *)
386 (match Input.peek t.input with
387- | None -> () (* EOF is ok *)
388- | Some c when Input.is_whitespace c || Input.is_break c -> ()
389- | Some c when t.flow_level > 0 && Input.is_flow_indicator c -> ()
390- | _ -> Error.raise_at start (Invalid_tag "expected whitespace or line break after tag"));
00391 let span = Span.make ~start ~stop:(Input.mark t.input) in
392 (handle, suffix, span)
393394(** Scan single-quoted scalar *)
395let scan_single_quoted t =
396 let start = Input.mark t.input in
397- ignore (Input.next t.input); (* consume opening single-quote *)
0398 let buf = Buffer.create 64 in
399- let whitespace = Buffer.create 16 in (* Track trailing whitespace *)
0400401 let flush_whitespace () =
402 if Buffer.length whitespace > 0 then begin
···408 let rec loop () =
409 match Input.peek t.input with
410 | None -> Error.raise_at start Unclosed_single_quote
411- | Some '\'' ->
412 ignore (Input.next t.input);
413 (* Check for escaped quote ('') *)
414- (match Input.peek t.input with
415- | Some '\'' ->
416- flush_whitespace ();
417- Buffer.add_char buf '\'';
418- ignore (Input.next t.input);
419- loop ()
420- | _ ->
421- (* End of string - flush any trailing whitespace *)
422- flush_whitespace ())
423 | Some ' ' | Some '\t' ->
424 (* Track whitespace - don't add to buf yet *)
425 Buffer.add_char whitespace (Option.get (Input.peek t.input));
···439 (* Check indentation: continuation must be > block indent (QB6E, DK95) *)
440 let col = column t in
441 let indent = current_indent t in
442- if not (Input.is_eof t.input) && not (Input.next_is_break t.input) && col <= indent && indent >= 0 then
443- Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar");
000000444 (* Count empty lines (consecutive line breaks) *)
445 let empty_lines = ref 0 in
446 while Input.next_is_break t.input do
···454 (* Check indentation after each empty line too *)
455 let col = column t in
456 let indent = current_indent t in
457- if not (Input.is_eof t.input) && not (Input.next_is_break t.input) && col <= indent && indent >= 0 then
458- Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar")
000000459 done;
460 (* Apply folding rules *)
461 if !empty_lines > 0 then begin
···463 for _ = 1 to !empty_lines do
464 Buffer.add_char buf '\n'
465 done
466- end else
0467 (* Single break: fold to space (even at start of string) *)
468 Buffer.add_char buf ' ';
469 loop ()
···486 | Some c when Input.is_hex c ->
487 Buffer.add_char buf c;
488 ignore (Input.next t.input)
489- | _ ->
490- Error.raise_at start (Invalid_hex_escape (Buffer.contents buf))
491 done;
492 let code = int_of_string ("0x" ^ Buffer.contents buf) in
493- if code <= 0x7F then
494- String.make 1 (Char.chr code)
495 else if code <= 0x7FF then
496 let b1 = 0xC0 lor (code lsr 6) in
497 let b2 = 0x80 lor (code land 0x3F) in
···500 let b1 = 0xE0 lor (code lsr 12) in
501 let b2 = 0x80 lor ((code lsr 6) land 0x3F) in
502 let b3 = 0x80 lor (code land 0x3F) in
503- String.init 3 (fun i -> Char.chr (match i with 0 -> b1 | 1 -> b2 | _ -> b3))
0504 else
505 let b1 = 0xF0 lor (code lsr 18) in
506 let b2 = 0x80 lor ((code lsr 12) land 0x3F) in
507 let b3 = 0x80 lor ((code lsr 6) land 0x3F) in
508 let b4 = 0x80 lor (code land 0x3F) in
509- String.init 4 (fun i -> Char.chr (match i with 0 -> b1 | 1 -> b2 | 2 -> b3 | _ -> b4))
0510511(** Scan double-quoted scalar *)
512let scan_double_quoted t =
513 let start = Input.mark t.input in
514- ignore (Input.next t.input); (* consume opening double-quote *)
0515 let buf = Buffer.create 64 in
516- let whitespace = Buffer.create 16 in (* Track pending whitespace *)
0517518 let flush_whitespace () =
519 if Buffer.length whitespace > 0 then begin
···529 (* Flush trailing whitespace before closing quote to preserve it *)
530 flush_whitespace ();
531 ignore (Input.next t.input)
532- | Some ' ' | Some '\t' as c_opt ->
533 (* Track whitespace - don't add to buf yet *)
534 let c = match c_opt with Some c -> c | None -> assert false in
535 Buffer.add_char whitespace c;
···537 loop ()
538 | Some '\\' ->
539 (* Escape sequence - this is non-whitespace content *)
540- flush_whitespace (); (* Commit any pending whitespace *)
0541 ignore (Input.next t.input);
542 (match Input.peek t.input with
543- | None -> Error.raise_at start (Invalid_escape_sequence "\\<EOF>")
544- | Some '0' -> Buffer.add_char buf '\x00'; ignore (Input.next t.input)
545- | Some 'a' -> Buffer.add_char buf '\x07'; ignore (Input.next t.input)
546- | Some 'b' -> Buffer.add_char buf '\x08'; ignore (Input.next t.input)
547- | Some 't' | Some '\t' -> Buffer.add_char buf '\t'; ignore (Input.next t.input)
548- | Some 'n' -> Buffer.add_char buf '\n'; ignore (Input.next t.input)
549- | Some 'v' -> Buffer.add_char buf '\x0B'; ignore (Input.next t.input)
550- | Some 'f' -> Buffer.add_char buf '\x0C'; ignore (Input.next t.input)
551- | Some 'r' -> Buffer.add_char buf '\r'; ignore (Input.next t.input)
552- | Some 'e' -> Buffer.add_char buf '\x1B'; ignore (Input.next t.input)
553- | Some ' ' -> Buffer.add_char buf ' '; ignore (Input.next t.input)
554- | Some '"' -> Buffer.add_char buf '"'; ignore (Input.next t.input)
555- | Some '/' -> Buffer.add_char buf '/'; ignore (Input.next t.input)
556- | Some '\\' -> Buffer.add_char buf '\\'; ignore (Input.next t.input)
557- | Some 'N' -> Buffer.add_string buf "\xC2\x85"; ignore (Input.next t.input) (* NEL *)
558- | Some '_' -> Buffer.add_string buf "\xC2\xA0"; ignore (Input.next t.input) (* NBSP *)
559- | Some 'L' -> Buffer.add_string buf "\xE2\x80\xA8"; ignore (Input.next t.input) (* LS *)
560- | Some 'P' -> Buffer.add_string buf "\xE2\x80\xA9"; ignore (Input.next t.input) (* PS *)
561- | Some 'x' ->
562- ignore (Input.next t.input);
563- Buffer.add_string buf (decode_hex t 2)
564- | Some 'u' ->
565- ignore (Input.next t.input);
566- Buffer.add_string buf (decode_hex t 4)
567- | Some 'U' ->
568- ignore (Input.next t.input);
569- Buffer.add_string buf (decode_hex t 8)
570- | Some '\n' | Some '\r' ->
571- (* Line continuation escape *)
572- Input.consume_break t.input;
573- while Input.next_is_blank t.input do
574- ignore (Input.next t.input)
575- done
576- | Some c ->
577- Error.raise_at (Input.mark t.input)
578- (Invalid_escape_sequence (Printf.sprintf "\\%c" c)));
0000000000000000000000000000000000579 loop ()
580 | Some '\n' | Some '\r' ->
581 (* Line break: discard any pending trailing whitespace *)
···596 if Input.next_is_break t.input then begin
597 Input.consume_break t.input;
598 incr empty_lines;
599- started_with_tab := false (* Reset for next line *)
600- end else
601- continue := false
602 done;
603 (* Check for document boundary - this terminates the quoted string *)
604 if Input.at_document_boundary t.input then
···609 let indent = current_indent t in
610 let start_col = start.column in
611 (* DK95/01: if continuation started with tabs and column < start column, error *)
612- if not (Input.is_eof t.input) && !started_with_tab && col < start_col then
613- Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar");
614- if not (Input.is_eof t.input) && col <= indent && indent >= 0 then
615- Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar");
00000616 (* Per YAML spec: single break = space, break + empty lines = newlines *)
617 if !empty_lines > 0 then begin
618 (* Empty lines: output N newlines where N = number of empty lines *)
619 for _ = 1 to !empty_lines do
620 Buffer.add_char buf '\n'
621 done
622- end else
0623 (* Single break folds to space *)
624 Buffer.add_char buf ' ';
625 loop ()
626 | Some c ->
627 (* Non-whitespace character *)
628- flush_whitespace (); (* Commit any pending whitespace *)
0629 Buffer.add_char buf c;
630 ignore (Input.next t.input);
631 loop ()
···637(** Check if character can appear in plain scalar at this position *)
638let can_continue_plain t c ~in_flow =
639 match c with
640- | ':' ->
641 (* : is OK if not followed by whitespace or flow indicator *)
642- (match Input.peek_nth t.input 1 with
643- | None -> true
644- | Some c2 when Input.is_whitespace c2 -> false
645- | Some c2 when in_flow && Input.is_flow_indicator c2 -> false
646- | _ -> true)
647- | '#' ->
648 (* # is a comment indicator only if preceded by whitespace *)
649 (* Check the previous character to determine if this is a comment *)
650- (match Input.peek_back t.input with
651- | None -> true (* At start - can't be comment indicator, allow it *)
652- | Some c when Input.is_whitespace c -> false (* Preceded by whitespace - comment *)
653- | Some c when Input.is_break c -> false (* At start of line - comment *)
654- | _ -> true) (* Not preceded by whitespace - part of scalar *)
0655 | c when in_flow && Input.is_flow_indicator c -> false
656 | _ when Input.is_break c -> false
657 | _ -> true
···663 let indent = current_indent t in
664 (* In flow context, scalars must be indented more than the current block indent.
665 This ensures that content at block indent or less ends the flow context. *)
666- if in_flow && (column t - 1) < indent then
667 Error.raise_at start Invalid_flow_indentation;
668 let buf = Buffer.create 64 in
669 let spaces = Buffer.create 16 in
670- let whitespace = Buffer.create 16 in (* Track whitespace within a line *)
0671 let leading_blanks = ref false in
672673 let rec scan_line () =
···684 if Buffer.length spaces > 0 then begin
685 if !leading_blanks then begin
686 (* Fold line break *)
687- if Buffer.contents spaces = "\n" then
688- Buffer.add_char buf ' '
689 else begin
690 (* Multiple breaks - preserve all but first *)
691 let s = Buffer.contents spaces in
692 Buffer.add_substring buf s 1 (String.length s - 1)
693 end
694- end else
695- Buffer.add_buffer buf spaces;
696 Buffer.clear spaces
697 end;
698 (* Add any pending whitespace from within the line *)
···719 if !leading_blanks then begin
720 (* We already had a break - this is an additional break (empty line) *)
721 Buffer.add_char spaces '\n'
722- end else begin
0723 (* First line break *)
724 Buffer.clear spaces;
725 Buffer.add_char spaces '\n';
···739 (* However, allow empty lines (line breaks) to continue even if dedented *)
740 if Input.next_is_break t.input then
741 scan_lines () (* Empty line - continue *)
742- else if not in_flow && col <= indent then
743- () (* Stop - dedented or at parent level in block context *)
744- else if Input.at_document_boundary t.input then
745- () (* Stop - document boundary *)
746- else
747- scan_lines ()
748 end
749 in
750···755 let len = String.length value in
756 let rec find_end i =
757 if i < 0 then 0
758- else match value.[i] with
759- | ' ' | '\t' -> find_end (i - 1)
760- | _ -> i + 1
761 in
762 let end_pos = find_end (len - 1) in
763 String.sub value 0 end_pos
···769(** Scan block scalar (literal | or folded >) *)
770let scan_block_scalar t literal =
771 let start = Input.mark t.input in
772- ignore (Input.next t.input); (* consume | or > *)
00773774 (* Parse header: optional indentation indicator and chomping *)
775 let explicit_indent = ref None in
···777778 (* First character of header *)
779 (match Input.peek t.input with
780- | Some c when Input.is_digit c && c <> '0' ->
781- explicit_indent := Some (Char.code c - Char.code '0');
782- ignore (Input.next t.input)
783- | Some '-' -> chomping := Chomping.Strip; ignore (Input.next t.input)
784- | Some '+' -> chomping := Chomping.Keep; ignore (Input.next t.input)
785- | _ -> ());
0000786787 (* Second character of header *)
788 (match Input.peek t.input with
789- | Some c when Input.is_digit c && c <> '0' && !explicit_indent = None ->
790- explicit_indent := Some (Char.code c - Char.code '0');
791- ignore (Input.next t.input)
792- | Some '-' when !chomping = Chomping.Clip ->
793- chomping := Chomping.Strip; ignore (Input.next t.input)
794- | Some '+' when !chomping = Chomping.Clip ->
795- chomping := Chomping.Keep; ignore (Input.next t.input)
796- | _ -> ());
00797798 (* Skip whitespace and optional comment *)
799 skip_whitespace_and_comment t;
800801 (* Consume line break *)
802- if Input.next_is_break t.input then
803- Input.consume_break t.input
804 else if not (Input.is_eof t.input) then
805 Error.raise_at (Input.mark t.input)
806 (Invalid_block_scalar_header "expected newline after header");
···808 let base_indent = current_indent t in
809 (* base_indent is the indent level from the stack, -1 if empty.
810 It's used directly for comparisons in implicit indent case. *)
811- let content_indent = ref (
812- match !explicit_indent with
813- | Some n ->
814- (* Explicit indent: base_indent is 1-indexed column, convert to 0-indexed.
0815 content_indent = (base_indent - 1) + n, but at least n for document level. *)
816- let base_level = max 0 (base_indent - 1) in
817- base_level + n
818- | None -> 0 (* Will be determined by first non-empty line *)
819- ) in
820821 let buf = Buffer.create 256 in
822 let trailing_breaks = Buffer.create 16 in
823- let leading_blank = ref false in (* Was the previous line "more indented"? *)
824- let max_empty_line_indent = ref 0 in (* Track max indent of empty lines before first content *)
00825826 (* Skip to content indentation, skipping empty lines.
827 Returns the number of spaces actually skipped (important for detecting dedentation). *)
···829 if !content_indent > 0 then begin
830 (* Explicit indent - skip up to content_indent spaces *)
831 let spaces_skipped = ref 0 in
832- while !spaces_skipped < !content_indent && Input.next_is (( = ) ' ') t.input do
00833 incr spaces_skipped;
834 ignore (Input.next t.input)
835 done;
···840 Buffer.add_char trailing_breaks '\n';
841 Input.consume_break t.input;
842 skip_to_content_indent ()
843- end else if !spaces_skipped < !content_indent then begin
0844 (* Line starts with fewer spaces than content_indent - dedented *)
845 !spaces_skipped
846- end else if Input.next_is_blank t.input then begin
0847 (* Line has spaces/tabs beyond content_indent - could be whitespace content or empty line.
848 For literal scalars, whitespace-only lines ARE content (not empty).
849 For folded scalars, whitespace-only lines that are "more indented" are preserved. *)
···853 else begin
854 (* Folded: check if rest is only blanks *)
855 let idx = ref 0 in
856- while match Input.peek_nth t.input !idx with
857- | Some c when Input.is_blank c -> incr idx; true
858- | _ -> false
859- do () done;
860- match Input.peek_nth t.input (!idx) with
00000861 | None | Some '\n' | Some '\r' ->
862 (* Empty/whitespace-only line in folded - skip spaces *)
863 while Input.next_is_blank t.input do
···870 (* Has non-whitespace content *)
871 !content_indent
872 end
873- end else
874- !content_indent
875- end else begin
0876 (* Implicit indent - skip empty lines without consuming spaces.
877 Note: Only SPACES count as indentation. Tabs are content, not indentation.
878 So we only check for spaces when determining if a line is "empty". *)
···880 Buffer.add_char trailing_breaks '\n';
881 Input.consume_break t.input;
882 skip_to_content_indent ()
883- end else if Input.next_is (( = ) ' ') t.input then begin
0884 (* Check if line is empty (only spaces before break) *)
885 let idx = ref 0 in
886- while match Input.peek_nth t.input !idx with
887- | Some ' ' -> incr idx; true
888- | _ -> false
889- do () done;
890- match Input.peek_nth t.input (!idx) with
00000891 | None | Some '\n' | Some '\r' ->
892 (* Line has only spaces - empty line *)
893 (* Track max indent of empty lines for later validation *)
894- if !idx > !max_empty_line_indent then
895- max_empty_line_indent := !idx;
896 while Input.next_is (( = ) ' ') t.input do
897 ignore (Input.next t.input)
898 done;
···902 | _ ->
903 (* Has content (including tabs which are content, not indentation) *)
904 0
905- end else if Input.next_is (( = ) '\t') t.input then begin
0906 (* Tab at start of line in implicit indent mode - this is an error (Y79Y)
907 because tabs cannot be used as indentation in YAML *)
908 Error.raise_at (Input.mark t.input) Tab_in_indentation
909- end else
0910 (* Not at break or space - other content character *)
911 0
912 end
···938 let should_process =
939 if !content_indent = 0 then begin
940 (* For implicit indent, content must be more indented than base_level. *)
941- if line_indent <= base_level then
942- false (* No content - first line not indented enough *)
943 else begin
944 (* Validate: first content line must be indented at least as much as
945 the maximum indent seen on empty lines before it (5LLU, S98Z, W9L4) *)
946- if line_indent < !max_empty_line_indent && line_indent > base_level then
0947 Error.raise_at (Input.mark t.input)
948- (Invalid_block_scalar_header "wrongly indented line in block scalar");
0949 content_indent := line_indent;
950 true
951 end
952- end else if line_indent < !content_indent then
953- false (* Dedented - done with content *)
954- else
955- true
956 in
957958 if should_process then begin
···960 For folded scalars, lines that start with any whitespace (space or tab) after the
961 content indentation are "more indented" and preserve breaks.
962 Note: we check Input.next_is_blank BEFORE reading content to see if content starts with whitespace. *)
963- let trailing_blank = line_indent > !content_indent || Input.next_is_blank t.input in
00964965 (* Add trailing breaks to buffer *)
966 if Buffer.length buf > 0 then begin
967 if Buffer.length trailing_breaks > 0 then begin
968- if literal then
969- Buffer.add_buffer buf trailing_breaks
970 else begin
971 (* Folded scalar: fold only if both previous and current lines are not more-indented *)
972- if not !leading_blank && not trailing_blank then begin
973 let breaks = Buffer.contents trailing_breaks in
974- if String.length breaks = 1 then
975- Buffer.add_char buf ' '
976- else
977- Buffer.add_substring buf breaks 1 (String.length breaks - 1)
978- end else begin
979 (* Preserve breaks for more-indented lines *)
980 Buffer.add_buffer buf trailing_breaks
981 end
982 end
983- end else if not literal then
984- Buffer.add_char buf ' '
985- end else
986- Buffer.add_buffer buf trailing_breaks;
987 Buffer.clear trailing_breaks;
988989 (* Add extra indentation for literal or more-indented folded lines *)
990 (* On the first line (when determining content_indent), we've already consumed all spaces,
991 so we should NOT add any back. On subsequent lines, we add only the spaces beyond content_indent. *)
992- if not first_line && (literal || (!extra_spaces > 0 && not literal)) then begin
0993 for _ = 1 to !extra_spaces do
994 Buffer.add_char buf ' '
995 done
996 end;
997998 (* Read line content *)
999- while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
001000 Buffer.add_char buf (Input.next_exn t.input)
1001 done;
1002···1023 | Chomping.Strip -> content
1024 | Chomping.Clip ->
1025 if String.length content > 0 then content ^ "\n" else content
1026- | Chomping.Keep ->
1027- content ^ Buffer.contents trailing_breaks
1028 in
10291030 let span = Span.make ~start ~stop:(Input.mark t.input) in
···1034(** Scan directive (after %) *)
1035let scan_directive t =
1036 let start = Input.mark t.input in
1037- ignore (Input.next t.input); (* consume % *)
0010381039 (* Read directive name *)
1040 let name_buf = Buffer.create 16 in
···1045 ignore (Input.next t.input);
1046 true
1047 | _ -> false
1048- do () done;
001049 let name = Buffer.contents name_buf in
10501051 (* Skip blanks *)
···1060 let minor = ref 0 in
1061 (* Read major version *)
1062 while Input.next_is_digit t.input do
1063- major := !major * 10 + (Char.code (Input.next_exn t.input) - Char.code '0')
01064 done;
1065 (* Expect . *)
1066 (match Input.peek t.input with
1067- | Some '.' -> ignore (Input.next t.input)
1068- | _ -> Error.raise_at (Input.mark t.input) (Invalid_yaml_version "expected '.'"));
001069 (* Read minor version *)
1070 while Input.next_is_digit t.input do
1071- minor := !minor * 10 + (Char.code (Input.next_exn t.input) - Char.code '0')
01072 done;
1073 (* Validate: only whitespace and comments allowed before line break (MUS6) *)
1074 skip_whitespace_and_comment t;
1075- if not (Input.next_is_break t.input) && not (Input.is_eof t.input) then
1076- Error.raise_at (Input.mark t.input) (Invalid_directive "expected comment or line break after version");
01077 let span = Span.make ~start ~stop:(Input.mark t.input) in
1078- Token.Version_directive { major = !major; minor = !minor }, span
1079-1080 | "TAG" ->
1081 (* Tag directive: %TAG !foo! tag:example.com,2000: *)
1082 let handle = scan_tag_handle t in
···1093 ignore (Input.next t.input);
1094 true
1095 | _ -> false
1096- do () done;
001097 let prefix = Buffer.contents prefix_buf in
1098 let span = Span.make ~start ~stop:(Input.mark t.input) in
1099- Token.Tag_directive { handle; prefix }, span
1100-1101 | _ ->
1102 (* Reserved/Unknown directive - skip to end of line and ignore *)
1103 (* Per YAML spec, reserved directives should be ignored with a warning *)
1104- while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
1105 ignore (Input.next t.input)
1106 done;
1107 let span = Span.make ~start ~stop:(Input.mark t.input) in
1108 (* Return an empty tag directive token to indicate directive was processed but ignored *)
1109- Token.Tag_directive { handle = ""; prefix = "" }, span
11101111(** Fetch the next token(s) into the queue *)
1112let rec fetch_next_token t =
···1120 (* We're about to process actual content, not leading whitespace *)
1121 t.leading_whitespace <- false;
11221123- if Input.is_eof t.input then
1124- fetch_stream_end t
1125- else if Input.at_document_boundary t.input then
1126- fetch_document_indicator t
1127 else begin
1128 match Input.peek t.input with
1129 | None -> fetch_stream_end t
1130- | Some '%' when (Input.position t.input).column = 1 ->
1131- fetch_directive t
1132 | Some '[' -> fetch_flow_collection_start t Token.Flow_sequence_start
1133 | Some '{' -> fetch_flow_collection_start t Token.Flow_mapping_start
1134 | Some ']' -> fetch_flow_collection_end t Token.Flow_sequence_end
···1136 | Some ',' -> fetch_flow_entry t
1137 | Some '-' when t.flow_level = 0 && check_block_entry t ->
1138 fetch_block_entry t
1139- | Some '?' when check_key t ->
1140- fetch_key t
1141- | Some ':' when check_value t ->
1142- fetch_value t
1143 | Some '*' -> fetch_alias t
1144 | Some '&' -> fetch_anchor t
1145 | Some '!' -> fetch_tag t
···1147 | Some '>' when t.flow_level = 0 -> fetch_block_scalar t false
1148 | Some '\'' -> fetch_single_quoted t
1149 | Some '"' -> fetch_double_quoted t
1150- | Some '-' when can_start_plain t ->
1151- fetch_plain_scalar t
1152- | Some '?' when can_start_plain t ->
1153- fetch_plain_scalar t
1154- | Some ':' when can_start_plain t ->
1155- fetch_plain_scalar t
1156- | Some c when can_start_plain_char c t ->
1157- fetch_plain_scalar t
1158- | Some c ->
1159- Error.raise_at (Input.mark t.input) (Unexpected_character c)
1160 end
11611162and fetch_stream_end t =
···1177 let indicator = Input.peek_string t.input 3 in
1178 Input.skip t.input 3;
1179 let span = Span.make ~start ~stop:(Input.mark t.input) in
1180- let token = if indicator = "---" then Token.Document_start else Token.Document_end in
001181 (* Reset document content flag after document end marker *)
1182 if indicator = "..." then begin
1183 t.document_has_content <- false;
1184 (* After document end marker, skip whitespace and check for end of line or comment *)
1185- while Input.next_is_blank t.input do ignore (Input.next t.input) done;
1186- (match Input.peek t.input with
1187- | None -> () (* EOF is ok *)
1188- | Some c when Input.is_break c -> ()
1189- | Some '#' -> () (* Comment is ok *)
1190- | _ -> Error.raise_at start (Invalid_directive "content not allowed after document end marker on same line"))
000001191 end;
1192 emit t span token
1193···1198 If we've emitted content in the current document, we need a document end marker first *)
1199 if t.document_has_content then
1200 Error.raise_at (Input.mark t.input)
1201- (Unexpected_token "directives must be separated from document content by document end marker (...)");
001202 unroll_indent t (-1);
1203 remove_simple_key t;
1204 t.allow_simple_key <- false;
···1208and fetch_flow_collection_start t token_type =
1209 save_simple_key t;
1210 (* Record indent of outermost flow collection *)
1211- if t.flow_level = 0 then
1212- t.flow_indent <- column t;
1213 t.flow_level <- t.flow_level + 1;
1214 (* Track whether this is a mapping or sequence *)
1215- let is_mapping = (token_type = Token.Flow_mapping_start) in
1216 t.flow_mapping_stack <- is_mapping :: t.flow_mapping_stack;
1217 t.allow_simple_key <- true;
1218 t.simple_keys <- None :: t.simple_keys;
···1225and fetch_flow_collection_end t token_type =
1226 remove_simple_key t;
1227 t.flow_level <- t.flow_level - 1;
1228- t.flow_mapping_stack <- (match t.flow_mapping_stack with _ :: rest -> rest | [] -> []);
01229 t.simple_keys <- (match t.simple_keys with _ :: rest -> rest | [] -> []);
1230 t.allow_simple_key <- false;
1231 let start = Input.mark t.input in
···1270 ignore (Input.next t.input);
12711272 (* Check for tabs after - : pattern like -\t- is invalid *)
1273- let (found_tabs, _found_spaces) = skip_blanks_check_tabs t in
1274 if found_tabs then begin
1275 (* If we found tabs and next char is - followed by whitespace, error *)
1276 match Input.peek t.input with
1277- | Some '-' ->
1278- (match Input.peek_nth t.input 1 with
1279- | None -> Error.raise_at start Tab_in_indentation
1280- | Some c when Input.is_whitespace c ->
1281- Error.raise_at start Tab_in_indentation
1282- | Some _ -> ())
1283 | _ -> ()
1284 end;
1285···1289and check_key t =
1290 (* ? followed by whitespace or flow indicator in both block and flow *)
1291 match Input.peek_nth t.input 1 with
1292- | None -> true
1293- | Some c ->
1294- Input.is_whitespace c ||
1295- (t.flow_level > 0 && Input.is_flow_indicator c)
12961297and fetch_key t =
1298 if t.flow_level = 0 then begin
···1311 ignore (Input.next t.input);
13121313 (* Check for tabs after ? : pattern like ?\t- or ?\tkey is invalid *)
1314- let (found_tabs, _found_spaces) = skip_blanks_check_tabs t in
1315 if found_tabs && t.flow_level = 0 then begin
1316 (* In block context, tabs after ? are not allowed *)
1317 Error.raise_at start Tab_in_indentation
···1324 (* : followed by whitespace in block, or whitespace/flow indicator in flow, or adjacent value *)
1325 match Input.peek_nth t.input 1 with
1326 | None -> true
1327- | Some c ->
1328- Input.is_whitespace c ||
1329- (t.flow_level > 0 && Input.is_flow_indicator c) ||
01330 (* Allow adjacent values in flow context at designated positions *)
1331- (t.flow_level > 0 &&
1332- match t.adjacent_value_allowed_at with
1333- | Some pos -> pos.Position.line = (Input.position t.input).Position.line &&
1334- pos.Position.column = (Input.position t.input).Position.column
1335- | None -> false)
0013361337and fetch_value t =
1338 let start = Input.mark t.input in
···1342 | Some sk :: _ when sk.sk_possible ->
1343 (* In implicit flow mapping (inside a flow sequence), key and : must be on the same line.
1344 In explicit flow mapping { }, key and : can span lines. *)
1345- let is_implicit_flow_mapping = match t.flow_mapping_stack with
1346- | false :: _ -> true (* false = we're in a sequence, so any mapping is implicit *)
001347 | _ -> false
1348 in
1349- if is_implicit_flow_mapping && sk.sk_position.line < (Input.position t.input).line then
1350- Error.raise_at start Illegal_flow_key_line;
001351 (* Insert KEY token before the simple key value *)
1352 let key_span = Span.point sk.sk_position in
1353 let key_token = { Token.token = Token.Key; span = key_span } in
···1355 let tokens = Queue.to_seq t.tokens |> Array.of_seq in
1356 Queue.clear t.tokens;
1357 let insert_pos = sk.sk_token_number - t.tokens_taken in
1358- Array.iteri (fun i tok ->
1359- if i = insert_pos then Queue.add key_token t.tokens;
1360- Queue.add tok t.tokens
1361- ) tokens;
1362- if insert_pos >= Array.length tokens then
1363- Queue.add key_token t.tokens;
1364 t.token_number <- t.token_number + 1;
1365 (* Roll indent for implicit block mapping *)
1366 if t.flow_level = 0 then begin
···1371 let bm_token = { Token.token = Token.Block_mapping_start; span } in
1372 let tokens = Queue.to_seq t.tokens |> Array.of_seq in
1373 Queue.clear t.tokens;
1374- Array.iteri (fun i tok ->
1375- if i = insert_pos then Queue.add bm_token t.tokens;
1376- Queue.add tok t.tokens
1377- ) tokens;
01378 if insert_pos >= Array.length tokens then
1379 Queue.add bm_token t.tokens;
1380 t.token_number <- t.token_number + 1
1381 end
1382 end;
1383- t.simple_keys <- None :: (List.tl t.simple_keys);
1384 true
1385 | _ ->
1386 (* No simple key - this is a complex value (or empty key) *)
···1400 remove_simple_key t;
1401 (* In block context without simple key, allow simple keys for compact mappings like ": moon: white"
1402 In flow context or after using a simple key, disallow simple keys *)
1403- t.allow_simple_key <- (not used_simple_key) && (t.flow_level = 0);
1404 t.document_has_content <- true;
1405 let start = Input.mark t.input in
1406 ignore (Input.next t.input);
14071408 (* Check for tabs after : : patterns like :\t- or :\tkey: are invalid in block context (Y79Y/09)
1409 However, :\t bar (tab followed by space then content) is valid (6BCT) *)
1410- let (found_tabs, found_spaces) = skip_blanks_check_tabs t in
1411- if found_tabs && not found_spaces && t.flow_level = 0 then begin
1412 (* In block context, tabs-only after : followed by indicator or alphanumeric are not allowed *)
1413 match Input.peek t.input with
1414- | Some ('-' | '?') ->
1415- Error.raise_at start Tab_in_indentation
1416- | Some c when (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') ->
001417 (* Tab-only followed by alphanumeric - likely a key, which is invalid *)
1418 Error.raise_at start Tab_in_indentation
1419 | _ -> ()
···1430 t.allow_simple_key <- false;
1431 t.document_has_content <- true;
1432 let start = Input.mark t.input in
1433- ignore (Input.next t.input); (* consume * or & *)
01434 let name, span = scan_anchor_alias t in
1435 let span = Span.make ~start ~stop:span.stop in
1436 let token = if is_alias then Token.Alias name else Token.Anchor name in
···1475 match Input.peek_nth t.input 1 with
1476 | None -> false
1477 | Some c ->
1478- not (Input.is_whitespace c) &&
1479- (t.flow_level = 0 || not (Input.is_flow_indicator c))
14801481and can_start_plain_char c _t =
1482 (* Characters that can start a plain scalar *)
···1492 (* If the plain scalar ended after crossing a line break (leading_blanks = true),
1493 allow simple keys. This is important because the scanner already consumed the
1494 line break and leading whitespace when checking for continuation. *)
1495- if ended_with_linebreak then
1496- t.allow_simple_key <- true;
1497 emit t span (Token.Scalar { style = `Plain; value })
14981499(** Check if we need more tokens to resolve simple keys *)
···1502 else if Queue.is_empty t.tokens then true
1503 else
1504 (* Check if any simple key could affect the first queued token *)
1505- List.exists (function
1506- | Some sk when sk.sk_possible ->
1507- sk.sk_token_number >= t.tokens_taken
1508- | _ -> false
1509- ) t.simple_keys
15101511(** Ensure we have enough tokens to return one safely *)
1512let ensure_tokens t =
···1523(** Get next token *)
1524let next t =
1525 ensure_tokens t;
1526- if Queue.is_empty t.tokens then
1527- None
1528 else begin
1529 t.tokens_taken <- t.tokens_taken + 1;
1530 Some (Queue.pop t.tokens)
···1540 let rec loop () =
1541 match next t with
1542 | None -> ()
1543- | Some tok -> f tok; loop ()
001544 in
1545 loop ()
15461547(** Fold over all tokens *)
1548let fold f init t =
1549 let rec loop acc =
1550- match next t with
1551- | None -> acc
1552- | Some tok -> loop (f acc tok)
1553 in
1554 loop init
15551556(** Convert to list *)
1557-let to_list t =
1558- fold (fun acc tok -> tok :: acc) [] t |> List.rev
···56(** YAML tokenizer/scanner with lookahead for ambiguity resolution *)
708type simple_key = {
9 sk_possible : bool;
10 sk_required : bool;
11 sk_token_number : int;
12 sk_position : Position.t;
13}
14+(** Simple key tracking for mapping key disambiguation *)
15016type indent = {
17 indent : int;
18 needs_block_end : bool;
19}
20+(** Indent level tracking *)
2122type t = {
23 input : Input.t;
···27 mutable stream_started : bool;
28 mutable stream_ended : bool;
29 mutable indent_stack : indent list; (** Stack of indentation levels *)
30+ mutable flow_level : int; (** Nesting depth in \[\] or \{\} *)
31+ mutable flow_indent : int;
32+ (** Column where outermost flow collection started *)
33+ mutable simple_keys : simple_key option list;
34+ (** Per flow-level simple key tracking *)
35 mutable allow_simple_key : bool;
36+ mutable leading_whitespace : bool;
37+ (** True when at start of line (only whitespace seen) *)
38+ mutable document_has_content : bool;
39+ (** True if we've emitted content tokens in current document *)
40+ mutable adjacent_value_allowed_at : Position.t option;
41+ (** Position where adjacent : is allowed *)
42+ mutable flow_mapping_stack : bool list;
43+ (** Stack of whether each flow level is a mapping *)
44}
4546let create input =
···54 indent_stack = [];
55 flow_level = 0;
56 flow_indent = 0;
57+ simple_keys = [ None ];
58+ (* One entry for the base level *)
59 allow_simple_key = true;
60+ leading_whitespace = true;
61+ (* Start at beginning of stream *)
62 document_has_content = false;
63 adjacent_value_allowed_at = None;
64 flow_mapping_stack = [];
···67let of_string s = create (Input.of_string s)
68let of_input = create
69let of_reader r = create (Input.of_reader r)
070let position t = Input.position t.input
7172(** Add a token to the queue *)
···7980(** Get current indent level *)
81let current_indent t =
82+ match t.indent_stack with [] -> -1 | { indent; _ } :: _ -> indent
008384+(** Skip whitespace to end of line, checking for valid comments. Returns true if
85+ any whitespace (including tabs) was found before a comment. *)
86let skip_whitespace_and_comment t =
87 let has_whitespace = ref false in
88 (* Skip blanks (spaces and tabs) *)
···103 Error.raise_at (Input.mark t.input) Invalid_comment
104 end;
105 (* Skip to end of line *)
106+ while (not (Input.is_eof t.input)) && not (Input.next_is_break t.input) do
107 ignore (Input.next t.input)
108 done
109 end
···114 let found_space = ref false in
115 while Input.next_is_blank t.input do
116 (match Input.peek t.input with
117+ | Some '\t' -> found_tab := true
118+ | Some ' ' -> found_space := true
119+ | _ -> ());
120 ignore (Input.next t.input)
121 done;
122 (!found_tab, !found_space)
···125let rec skip_to_next_token t =
126 (* Check for tabs used as indentation in block context *)
127 (match Input.peek t.input with
128+ | Some '\t'
129+ when t.flow_level = 0 && t.leading_whitespace
130+ && column t - 1 < current_indent t ->
131+ (* Tab found in indentation zone - this is invalid *)
132+ (* Skip to end of line to check if line has content *)
133+ let start_pos = Input.mark t.input in
134+ while Input.next_is_blank t.input do
135+ ignore (Input.next t.input)
136+ done;
137+ (* If we have content on this line with a tab, raise error *)
138+ if (not (Input.next_is_break t.input)) && not (Input.is_eof t.input) then
139+ Error.raise_at start_pos Tab_in_indentation
140+ | _ -> ());
141142 (* Skip blanks and validate comments *)
143 skip_whitespace_and_comment t;
···164 ignore (Input.next t.input)
165 done;
166 (* If only tabs were used (no spaces) and column < flow_indent, error *)
167+ if
168+ (not (Input.next_is_break t.input))
169+ && (not (Input.is_eof t.input))
170+ && column t < t.flow_indent
171+ then Error.raise_at start_mark Invalid_flow_indentation
172 end;
173 skip_to_next_token t
174+ end
175+ else begin
176 ignore (Input.next t.input);
177 skip_to_next_token t
178 end
···183 if t.flow_level = 0 && col > current_indent t then begin
184 t.indent_stack <- { indent = col; needs_block_end = true } :: t.indent_stack;
185 true
186+ end
187+ else false
188189(** Unroll indentation to given column *)
190let unroll_indent t col =
191+ while
192+ t.flow_level = 0
193+ &&
194+ match t.indent_stack with
195+ | { indent; needs_block_end = true; _ } :: _ when indent > col -> true
196+ | _ -> false
197 do
198 match t.indent_stack with
199 | { indent = _; needs_block_end = true; _ } :: rest ->
···210 (* A simple key is required only if we're in a block context,
211 at the current indentation level, AND the current indent needs a block end.
212 This matches saphyr's logic and prevents false positives for values. *)
213+ let required =
214+ t.flow_level = 0
215+ &&
216+ match t.indent_stack with
217+ | { indent; needs_block_end = true; _ } :: _ -> indent = column t
218+ | _ -> false
219 in
220+ let sk =
221+ {
222+ sk_possible = true;
223+ sk_required = required;
224+ sk_token_number = t.token_number;
225+ sk_position = Input.position t.input;
226+ }
227+ in
228 (* Remove any existing simple key at current level *)
229+ t.simple_keys <-
230+ (match t.simple_keys with
231 | _ :: rest -> Some sk :: rest
232+ | [] -> [ Some sk ])
0233 end
234235(** Remove simple key at current level *)
···242243(** Stale simple keys that span too many tokens *)
244let stale_simple_keys t =
245+ t.simple_keys <-
246+ List.map
247+ (fun sk_opt ->
248+ match sk_opt with
249+ | Some sk
250+ when sk.sk_possible
251+ && (Input.position t.input).line > sk.sk_position.line
252+ && t.flow_level = 0 ->
253+ if sk.sk_required then Error.raise_at sk.sk_position Expected_key;
254+ None
255+ | _ -> sk_opt)
256+ t.simple_keys
257258(** Read anchor or alias name *)
259let scan_anchor_alias t =
···266 This matches the saphyr implementation: is_yaml_non_space && !is_flow *)
267 while
268 match Input.peek t.input with
269+ | Some c
270+ when (not (Input.is_whitespace c))
271+ && (not (Input.is_flow_indicator c))
272+ && c <> '\x00' ->
273 Buffer.add_char buf c;
274 ignore (Input.next t.input);
275 true
276 | _ -> false
277+ do
278+ ()
279+ done;
280 let name = Buffer.contents buf in
281 if String.length name = 0 then
282 Error.raise_at start (Invalid_anchor "empty anchor name");
···288 let buf = Buffer.create 16 in
289 (* Expect ! *)
290 (match Input.peek t.input with
291+ | Some '!' ->
292+ Buffer.add_char buf '!';
293+ ignore (Input.next t.input)
294+ | _ -> Error.raise_at start (Invalid_tag "expected '!'"));
295 (* Read word chars *)
296 while
297 match Input.peek t.input with
···300 ignore (Input.next t.input);
301 true
302 | _ -> false
303+ do
304+ ()
305+ done;
306 (* Check for secondary ! *)
307 (match Input.peek t.input with
308+ | Some '!' ->
309+ Buffer.add_char buf '!';
310+ ignore (Input.next t.input)
311+ | _ -> ());
312 Buffer.contents buf
313314(** Scan tag suffix (after handle) *)
···318 in
319 let hex_val c =
320 match c with
321+ | '0' .. '9' -> Char.code c - Char.code '0'
322+ | 'A' .. 'F' -> Char.code c - Char.code 'A' + 10
323+ | 'a' .. 'f' -> Char.code c - Char.code 'a' + 10
324 | _ -> 0
325 in
326 let buf = Buffer.create 32 in
327 while
328 match Input.peek t.input with
329+ | Some '%' -> (
330 (* Percent-encoded character *)
331 ignore (Input.next t.input);
332+ match (Input.peek t.input, Input.peek_nth t.input 1) with
333+ | Some c1, Some c2 when is_hex_digit c1 && is_hex_digit c2 ->
334+ ignore (Input.next t.input);
335+ ignore (Input.next t.input);
336+ let code = (hex_val c1 * 16) + hex_val c2 in
337+ Buffer.add_char buf (Char.chr code);
338+ true
339+ | _ ->
340+ (* Invalid percent encoding - keep the % *)
341+ Buffer.add_char buf '%';
342+ true)
343+ | Some c
344+ when (not (Input.is_whitespace c)) && not (Input.is_flow_indicator c) ->
345 Buffer.add_char buf c;
346 ignore (Input.next t.input);
347 true
348 | _ -> false
349+ do
350+ ()
351+ done;
352 Buffer.contents buf
353354(** Scan a tag *)
355let scan_tag t =
356 let start = Input.mark t.input in
357+ ignore (Input.next t.input);
358+ (* consume ! *)
359 let handle, suffix =
360 match Input.peek t.input with
361 | Some '<' ->
···369 Buffer.add_char buf c;
370 ignore (Input.next t.input);
371 true
372+ | None ->
373+ Error.raise_at (Input.mark t.input)
374+ (Invalid_tag "unclosed verbatim tag")
375+ do
376+ ()
377+ done;
378+ ignore (Input.next t.input);
379+ (* consume > *)
380 ("", Buffer.contents buf)
381 | Some c when Input.is_whitespace c || Input.is_flow_indicator c ->
382 (* Non-specific tag: ! *)
383 ("!", "")
384 | Some '!' ->
385 (* Secondary handle: !! *)
386+ ignore (Input.next t.input);
387+ (* consume second ! *)
388 let suffix = scan_tag_suffix t in
389 ("!!", suffix)
390+ | _ -> (
391 (* Primary handle or just suffix: !foo or !e!foo *)
392 (* Read alphanumeric characters *)
393 let buf = Buffer.create 16 in
···398 ignore (Input.next t.input);
399 true
400 | _ -> false
401+ do
402+ ()
403+ done;
404 (* Check if next character is ! - if so, this is a named handle *)
405+ match Input.peek t.input with
406+ | Some '!' ->
407+ (* Named handle like !e! *)
408+ ignore (Input.next t.input);
409+ let handle_name = Buffer.contents buf in
410+ let suffix = scan_tag_suffix t in
411+ ("!" ^ handle_name ^ "!", suffix)
412+ | _ ->
413+ (* Just ! followed by suffix *)
414+ ("!", Buffer.contents buf ^ scan_tag_suffix t))
415 in
416 (* Validate that tag is followed by whitespace, break, or (in flow) flow indicator *)
417 (match Input.peek t.input with
418+ | None -> () (* EOF is ok *)
419+ | Some c when Input.is_whitespace c || Input.is_break c -> ()
420+ | Some c when t.flow_level > 0 && Input.is_flow_indicator c -> ()
421+ | _ ->
422+ Error.raise_at start
423+ (Invalid_tag "expected whitespace or line break after tag"));
424 let span = Span.make ~start ~stop:(Input.mark t.input) in
425 (handle, suffix, span)
426427(** Scan single-quoted scalar *)
428let scan_single_quoted t =
429 let start = Input.mark t.input in
430+ ignore (Input.next t.input);
431+ (* consume opening single-quote *)
432 let buf = Buffer.create 64 in
433+ let whitespace = Buffer.create 16 in
434+ (* Track trailing whitespace *)
435436 let flush_whitespace () =
437 if Buffer.length whitespace > 0 then begin
···443 let rec loop () =
444 match Input.peek t.input with
445 | None -> Error.raise_at start Unclosed_single_quote
446+ | Some '\'' -> (
447 ignore (Input.next t.input);
448 (* Check for escaped quote ('') *)
449+ match Input.peek t.input with
450+ | Some '\'' ->
451+ flush_whitespace ();
452+ Buffer.add_char buf '\'';
453+ ignore (Input.next t.input);
454+ loop ()
455+ | _ ->
456+ (* End of string - flush any trailing whitespace *)
457+ flush_whitespace ())
458 | Some ' ' | Some '\t' ->
459 (* Track whitespace - don't add to buf yet *)
460 Buffer.add_char whitespace (Option.get (Input.peek t.input));
···474 (* Check indentation: continuation must be > block indent (QB6E, DK95) *)
475 let col = column t in
476 let indent = current_indent t in
477+ if
478+ (not (Input.is_eof t.input))
479+ && (not (Input.next_is_break t.input))
480+ && col <= indent && indent >= 0
481+ then
482+ Error.raise_at (Input.mark t.input)
483+ (Invalid_quoted_scalar_indentation
484+ "invalid indentation in quoted scalar");
485 (* Count empty lines (consecutive line breaks) *)
486 let empty_lines = ref 0 in
487 while Input.next_is_break t.input do
···495 (* Check indentation after each empty line too *)
496 let col = column t in
497 let indent = current_indent t in
498+ if
499+ (not (Input.is_eof t.input))
500+ && (not (Input.next_is_break t.input))
501+ && col <= indent && indent >= 0
502+ then
503+ Error.raise_at (Input.mark t.input)
504+ (Invalid_quoted_scalar_indentation
505+ "invalid indentation in quoted scalar")
506 done;
507 (* Apply folding rules *)
508 if !empty_lines > 0 then begin
···510 for _ = 1 to !empty_lines do
511 Buffer.add_char buf '\n'
512 done
513+ end
514+ else
515 (* Single break: fold to space (even at start of string) *)
516 Buffer.add_char buf ' ';
517 loop ()
···534 | Some c when Input.is_hex c ->
535 Buffer.add_char buf c;
536 ignore (Input.next t.input)
537+ | _ -> Error.raise_at start (Invalid_hex_escape (Buffer.contents buf))
0538 done;
539 let code = int_of_string ("0x" ^ Buffer.contents buf) in
540+ if code <= 0x7F then String.make 1 (Char.chr code)
0541 else if code <= 0x7FF then
542 let b1 = 0xC0 lor (code lsr 6) in
543 let b2 = 0x80 lor (code land 0x3F) in
···546 let b1 = 0xE0 lor (code lsr 12) in
547 let b2 = 0x80 lor ((code lsr 6) land 0x3F) in
548 let b3 = 0x80 lor (code land 0x3F) in
549+ String.init 3 (fun i ->
550+ Char.chr (match i with 0 -> b1 | 1 -> b2 | _ -> b3))
551 else
552 let b1 = 0xF0 lor (code lsr 18) in
553 let b2 = 0x80 lor ((code lsr 12) land 0x3F) in
554 let b3 = 0x80 lor ((code lsr 6) land 0x3F) in
555 let b4 = 0x80 lor (code land 0x3F) in
556+ String.init 4 (fun i ->
557+ Char.chr (match i with 0 -> b1 | 1 -> b2 | 2 -> b3 | _ -> b4))
558559(** Scan double-quoted scalar *)
560let scan_double_quoted t =
561 let start = Input.mark t.input in
562+ ignore (Input.next t.input);
563+ (* consume opening double-quote *)
564 let buf = Buffer.create 64 in
565+ let whitespace = Buffer.create 16 in
566+ (* Track pending whitespace *)
567568 let flush_whitespace () =
569 if Buffer.length whitespace > 0 then begin
···579 (* Flush trailing whitespace before closing quote to preserve it *)
580 flush_whitespace ();
581 ignore (Input.next t.input)
582+ | (Some ' ' | Some '\t') as c_opt ->
583 (* Track whitespace - don't add to buf yet *)
584 let c = match c_opt with Some c -> c | None -> assert false in
585 Buffer.add_char whitespace c;
···587 loop ()
588 | Some '\\' ->
589 (* Escape sequence - this is non-whitespace content *)
590+ flush_whitespace ();
591+ (* Commit any pending whitespace *)
592 ignore (Input.next t.input);
593 (match Input.peek t.input with
594+ | None -> Error.raise_at start (Invalid_escape_sequence "\\<EOF>")
595+ | Some '0' ->
596+ Buffer.add_char buf '\x00';
597+ ignore (Input.next t.input)
598+ | Some 'a' ->
599+ Buffer.add_char buf '\x07';
600+ ignore (Input.next t.input)
601+ | Some 'b' ->
602+ Buffer.add_char buf '\x08';
603+ ignore (Input.next t.input)
604+ | Some 't' | Some '\t' ->
605+ Buffer.add_char buf '\t';
606+ ignore (Input.next t.input)
607+ | Some 'n' ->
608+ Buffer.add_char buf '\n';
609+ ignore (Input.next t.input)
610+ | Some 'v' ->
611+ Buffer.add_char buf '\x0B';
612+ ignore (Input.next t.input)
613+ | Some 'f' ->
614+ Buffer.add_char buf '\x0C';
615+ ignore (Input.next t.input)
616+ | Some 'r' ->
617+ Buffer.add_char buf '\r';
618+ ignore (Input.next t.input)
619+ | Some 'e' ->
620+ Buffer.add_char buf '\x1B';
621+ ignore (Input.next t.input)
622+ | Some ' ' ->
623+ Buffer.add_char buf ' ';
624+ ignore (Input.next t.input)
625+ | Some '"' ->
626+ Buffer.add_char buf '"';
627+ ignore (Input.next t.input)
628+ | Some '/' ->
629+ Buffer.add_char buf '/';
630+ ignore (Input.next t.input)
631+ | Some '\\' ->
632+ Buffer.add_char buf '\\';
633+ ignore (Input.next t.input)
634+ | Some 'N' ->
635+ Buffer.add_string buf "\xC2\x85";
636+ ignore (Input.next t.input) (* NEL *)
637+ | Some '_' ->
638+ Buffer.add_string buf "\xC2\xA0";
639+ ignore (Input.next t.input) (* NBSP *)
640+ | Some 'L' ->
641+ Buffer.add_string buf "\xE2\x80\xA8";
642+ ignore (Input.next t.input) (* LS *)
643+ | Some 'P' ->
644+ Buffer.add_string buf "\xE2\x80\xA9";
645+ ignore (Input.next t.input) (* PS *)
646+ | Some 'x' ->
647+ ignore (Input.next t.input);
648+ Buffer.add_string buf (decode_hex t 2)
649+ | Some 'u' ->
650+ ignore (Input.next t.input);
651+ Buffer.add_string buf (decode_hex t 4)
652+ | Some 'U' ->
653+ ignore (Input.next t.input);
654+ Buffer.add_string buf (decode_hex t 8)
655+ | Some '\n' | Some '\r' ->
656+ (* Line continuation escape *)
657+ Input.consume_break t.input;
658+ while Input.next_is_blank t.input do
659+ ignore (Input.next t.input)
660+ done
661+ | Some c ->
662+ Error.raise_at (Input.mark t.input)
663+ (Invalid_escape_sequence (Printf.sprintf "\\%c" c)));
664 loop ()
665 | Some '\n' | Some '\r' ->
666 (* Line break: discard any pending trailing whitespace *)
···681 if Input.next_is_break t.input then begin
682 Input.consume_break t.input;
683 incr empty_lines;
684+ started_with_tab := false (* Reset for next line *)
685+ end
686+ else continue := false
687 done;
688 (* Check for document boundary - this terminates the quoted string *)
689 if Input.at_document_boundary t.input then
···694 let indent = current_indent t in
695 let start_col = start.column in
696 (* DK95/01: if continuation started with tabs and column < start column, error *)
697+ if (not (Input.is_eof t.input)) && !started_with_tab && col < start_col
698+ then
699+ Error.raise_at (Input.mark t.input)
700+ (Invalid_quoted_scalar_indentation
701+ "invalid indentation in quoted scalar");
702+ if (not (Input.is_eof t.input)) && col <= indent && indent >= 0 then
703+ Error.raise_at (Input.mark t.input)
704+ (Invalid_quoted_scalar_indentation
705+ "invalid indentation in quoted scalar");
706 (* Per YAML spec: single break = space, break + empty lines = newlines *)
707 if !empty_lines > 0 then begin
708 (* Empty lines: output N newlines where N = number of empty lines *)
709 for _ = 1 to !empty_lines do
710 Buffer.add_char buf '\n'
711 done
712+ end
713+ else
714 (* Single break folds to space *)
715 Buffer.add_char buf ' ';
716 loop ()
717 | Some c ->
718 (* Non-whitespace character *)
719+ flush_whitespace ();
720+ (* Commit any pending whitespace *)
721 Buffer.add_char buf c;
722 ignore (Input.next t.input);
723 loop ()
···729(** Check if character can appear in plain scalar at this position *)
730let can_continue_plain t c ~in_flow =
731 match c with
732+ | ':' -> (
733 (* : is OK if not followed by whitespace or flow indicator *)
734+ match Input.peek_nth t.input 1 with
735+ | None -> true
736+ | Some c2 when Input.is_whitespace c2 -> false
737+ | Some c2 when in_flow && Input.is_flow_indicator c2 -> false
738+ | _ -> true)
739+ | '#' -> (
740 (* # is a comment indicator only if preceded by whitespace *)
741 (* Check the previous character to determine if this is a comment *)
742+ match Input.peek_back t.input with
743+ | None -> true (* At start - can't be comment indicator, allow it *)
744+ | Some c when Input.is_whitespace c ->
745+ false (* Preceded by whitespace - comment *)
746+ | Some c when Input.is_break c -> false (* At start of line - comment *)
747+ | _ -> true (* Not preceded by whitespace - part of scalar *))
748 | c when in_flow && Input.is_flow_indicator c -> false
749 | _ when Input.is_break c -> false
750 | _ -> true
···756 let indent = current_indent t in
757 (* In flow context, scalars must be indented more than the current block indent.
758 This ensures that content at block indent or less ends the flow context. *)
759+ if in_flow && column t - 1 < indent then
760 Error.raise_at start Invalid_flow_indentation;
761 let buf = Buffer.create 64 in
762 let spaces = Buffer.create 16 in
763+ let whitespace = Buffer.create 16 in
764+ (* Track whitespace within a line *)
765 let leading_blanks = ref false in
766767 let rec scan_line () =
···778 if Buffer.length spaces > 0 then begin
779 if !leading_blanks then begin
780 (* Fold line break *)
781+ if Buffer.contents spaces = "\n" then Buffer.add_char buf ' '
0782 else begin
783 (* Multiple breaks - preserve all but first *)
784 let s = Buffer.contents spaces in
785 Buffer.add_substring buf s 1 (String.length s - 1)
786 end
787+ end
788+ else Buffer.add_buffer buf spaces;
789 Buffer.clear spaces
790 end;
791 (* Add any pending whitespace from within the line *)
···812 if !leading_blanks then begin
813 (* We already had a break - this is an additional break (empty line) *)
814 Buffer.add_char spaces '\n'
815+ end
816+ else begin
817 (* First line break *)
818 Buffer.clear spaces;
819 Buffer.add_char spaces '\n';
···833 (* However, allow empty lines (line breaks) to continue even if dedented *)
834 if Input.next_is_break t.input then
835 scan_lines () (* Empty line - continue *)
836+ else if (not in_flow) && col <= indent then ()
837+ (* Stop - dedented or at parent level in block context *)
838+ else if Input.at_document_boundary t.input then ()
839+ (* Stop - document boundary *)
840+ else scan_lines ()
0841 end
842 in
843···848 let len = String.length value in
849 let rec find_end i =
850 if i < 0 then 0
851+ else match value.[i] with ' ' | '\t' -> find_end (i - 1) | _ -> i + 1
00852 in
853 let end_pos = find_end (len - 1) in
854 String.sub value 0 end_pos
···860(** Scan block scalar (literal | or folded >) *)
861let scan_block_scalar t literal =
862 let start = Input.mark t.input in
863+ ignore (Input.next t.input);
864+865+ (* consume | or > *)
866867 (* Parse header: optional indentation indicator and chomping *)
868 let explicit_indent = ref None in
···870871 (* First character of header *)
872 (match Input.peek t.input with
873+ | Some c when Input.is_digit c && c <> '0' ->
874+ explicit_indent := Some (Char.code c - Char.code '0');
875+ ignore (Input.next t.input)
876+ | Some '-' ->
877+ chomping := Chomping.Strip;
878+ ignore (Input.next t.input)
879+ | Some '+' ->
880+ chomping := Chomping.Keep;
881+ ignore (Input.next t.input)
882+ | _ -> ());
883884 (* Second character of header *)
885 (match Input.peek t.input with
886+ | Some c when Input.is_digit c && c <> '0' && !explicit_indent = None ->
887+ explicit_indent := Some (Char.code c - Char.code '0');
888+ ignore (Input.next t.input)
889+ | Some '-' when !chomping = Chomping.Clip ->
890+ chomping := Chomping.Strip;
891+ ignore (Input.next t.input)
892+ | Some '+' when !chomping = Chomping.Clip ->
893+ chomping := Chomping.Keep;
894+ ignore (Input.next t.input)
895+ | _ -> ());
896897 (* Skip whitespace and optional comment *)
898 skip_whitespace_and_comment t;
899900 (* Consume line break *)
901+ if Input.next_is_break t.input then Input.consume_break t.input
0902 else if not (Input.is_eof t.input) then
903 Error.raise_at (Input.mark t.input)
904 (Invalid_block_scalar_header "expected newline after header");
···906 let base_indent = current_indent t in
907 (* base_indent is the indent level from the stack, -1 if empty.
908 It's used directly for comparisons in implicit indent case. *)
909+ let content_indent =
910+ ref
911+ (match !explicit_indent with
912+ | Some n ->
913+ (* Explicit indent: base_indent is 1-indexed column, convert to 0-indexed.
914 content_indent = (base_indent - 1) + n, but at least n for document level. *)
915+ let base_level = max 0 (base_indent - 1) in
916+ base_level + n
917+ | None -> 0 (* Will be determined by first non-empty line *))
918+ in
919920 let buf = Buffer.create 256 in
921 let trailing_breaks = Buffer.create 16 in
922+ let leading_blank = ref false in
923+ (* Was the previous line "more indented"? *)
924+ let max_empty_line_indent = ref 0 in
925+ (* Track max indent of empty lines before first content *)
926927 (* Skip to content indentation, skipping empty lines.
928 Returns the number of spaces actually skipped (important for detecting dedentation). *)
···930 if !content_indent > 0 then begin
931 (* Explicit indent - skip up to content_indent spaces *)
932 let spaces_skipped = ref 0 in
933+ while
934+ !spaces_skipped < !content_indent && Input.next_is (( = ) ' ') t.input
935+ do
936 incr spaces_skipped;
937 ignore (Input.next t.input)
938 done;
···943 Buffer.add_char trailing_breaks '\n';
944 Input.consume_break t.input;
945 skip_to_content_indent ()
946+ end
947+ else if !spaces_skipped < !content_indent then begin
948 (* Line starts with fewer spaces than content_indent - dedented *)
949 !spaces_skipped
950+ end
951+ else if Input.next_is_blank t.input then begin
952 (* Line has spaces/tabs beyond content_indent - could be whitespace content or empty line.
953 For literal scalars, whitespace-only lines ARE content (not empty).
954 For folded scalars, whitespace-only lines that are "more indented" are preserved. *)
···958 else begin
959 (* Folded: check if rest is only blanks *)
960 let idx = ref 0 in
961+ while
962+ match Input.peek_nth t.input !idx with
963+ | Some c when Input.is_blank c ->
964+ incr idx;
965+ true
966+ | _ -> false
967+ do
968+ ()
969+ done;
970+ match Input.peek_nth t.input !idx with
971 | None | Some '\n' | Some '\r' ->
972 (* Empty/whitespace-only line in folded - skip spaces *)
973 while Input.next_is_blank t.input do
···980 (* Has non-whitespace content *)
981 !content_indent
982 end
983+ end
984+ else !content_indent
985+ end
986+ else begin
987 (* Implicit indent - skip empty lines without consuming spaces.
988 Note: Only SPACES count as indentation. Tabs are content, not indentation.
989 So we only check for spaces when determining if a line is "empty". *)
···991 Buffer.add_char trailing_breaks '\n';
992 Input.consume_break t.input;
993 skip_to_content_indent ()
994+ end
995+ else if Input.next_is (( = ) ' ') t.input then begin
996 (* Check if line is empty (only spaces before break) *)
997 let idx = ref 0 in
998+ while
999+ match Input.peek_nth t.input !idx with
1000+ | Some ' ' ->
1001+ incr idx;
1002+ true
1003+ | _ -> false
1004+ do
1005+ ()
1006+ done;
1007+ match Input.peek_nth t.input !idx with
1008 | None | Some '\n' | Some '\r' ->
1009 (* Line has only spaces - empty line *)
1010 (* Track max indent of empty lines for later validation *)
1011+ if !idx > !max_empty_line_indent then max_empty_line_indent := !idx;
01012 while Input.next_is (( = ) ' ') t.input do
1013 ignore (Input.next t.input)
1014 done;
···1018 | _ ->
1019 (* Has content (including tabs which are content, not indentation) *)
1020 0
1021+ end
1022+ else if Input.next_is (( = ) '\t') t.input then begin
1023 (* Tab at start of line in implicit indent mode - this is an error (Y79Y)
1024 because tabs cannot be used as indentation in YAML *)
1025 Error.raise_at (Input.mark t.input) Tab_in_indentation
1026+ end
1027+ else
1028 (* Not at break or space - other content character *)
1029 0
1030 end
···1056 let should_process =
1057 if !content_indent = 0 then begin
1058 (* For implicit indent, content must be more indented than base_level. *)
1059+ if line_indent <= base_level then false
1060+ (* No content - first line not indented enough *)
1061 else begin
1062 (* Validate: first content line must be indented at least as much as
1063 the maximum indent seen on empty lines before it (5LLU, S98Z, W9L4) *)
1064+ if line_indent < !max_empty_line_indent && line_indent > base_level
1065+ then
1066 Error.raise_at (Input.mark t.input)
1067+ (Invalid_block_scalar_header
1068+ "wrongly indented line in block scalar");
1069 content_indent := line_indent;
1070 true
1071 end
1072+ end
1073+ else if line_indent < !content_indent then false
1074+ (* Dedented - done with content *)
1075+ else true
1076 in
10771078 if should_process then begin
···1080 For folded scalars, lines that start with any whitespace (space or tab) after the
1081 content indentation are "more indented" and preserve breaks.
1082 Note: we check Input.next_is_blank BEFORE reading content to see if content starts with whitespace. *)
1083+ let trailing_blank =
1084+ line_indent > !content_indent || Input.next_is_blank t.input
1085+ in
10861087 (* Add trailing breaks to buffer *)
1088 if Buffer.length buf > 0 then begin
1089 if Buffer.length trailing_breaks > 0 then begin
1090+ if literal then Buffer.add_buffer buf trailing_breaks
01091 else begin
1092 (* Folded scalar: fold only if both previous and current lines are not more-indented *)
1093+ if (not !leading_blank) && not trailing_blank then begin
1094 let breaks = Buffer.contents trailing_breaks in
1095+ if String.length breaks = 1 then Buffer.add_char buf ' '
1096+ else Buffer.add_substring buf breaks 1 (String.length breaks - 1)
1097+ end
1098+ else begin
01099 (* Preserve breaks for more-indented lines *)
1100 Buffer.add_buffer buf trailing_breaks
1101 end
1102 end
1103+ end
1104+ else if not literal then Buffer.add_char buf ' '
1105+ end
1106+ else Buffer.add_buffer buf trailing_breaks;
1107 Buffer.clear trailing_breaks;
11081109 (* Add extra indentation for literal or more-indented folded lines *)
1110 (* On the first line (when determining content_indent), we've already consumed all spaces,
1111 so we should NOT add any back. On subsequent lines, we add only the spaces beyond content_indent. *)
1112+ if (not first_line) && (literal || (!extra_spaces > 0 && not literal))
1113+ then begin
1114 for _ = 1 to !extra_spaces do
1115 Buffer.add_char buf ' '
1116 done
1117 end;
11181119 (* Read line content *)
1120+ while
1121+ (not (Input.is_eof t.input)) && not (Input.next_is_break t.input)
1122+ do
1123 Buffer.add_char buf (Input.next_exn t.input)
1124 done;
1125···1146 | Chomping.Strip -> content
1147 | Chomping.Clip ->
1148 if String.length content > 0 then content ^ "\n" else content
1149+ | Chomping.Keep -> content ^ Buffer.contents trailing_breaks
01150 in
11511152 let span = Span.make ~start ~stop:(Input.mark t.input) in
···1156(** Scan directive (after %) *)
1157let scan_directive t =
1158 let start = Input.mark t.input in
1159+ ignore (Input.next t.input);
1160+1161+ (* consume % *)
11621163 (* Read directive name *)
1164 let name_buf = Buffer.create 16 in
···1169 ignore (Input.next t.input);
1170 true
1171 | _ -> false
1172+ do
1173+ ()
1174+ done;
1175 let name = Buffer.contents name_buf in
11761177 (* Skip blanks *)
···1186 let minor = ref 0 in
1187 (* Read major version *)
1188 while Input.next_is_digit t.input do
1189+ major :=
1190+ (!major * 10) + (Char.code (Input.next_exn t.input) - Char.code '0')
1191 done;
1192 (* Expect . *)
1193 (match Input.peek t.input with
1194+ | Some '.' -> ignore (Input.next t.input)
1195+ | _ ->
1196+ Error.raise_at (Input.mark t.input)
1197+ (Invalid_yaml_version "expected '.'"));
1198 (* Read minor version *)
1199 while Input.next_is_digit t.input do
1200+ minor :=
1201+ (!minor * 10) + (Char.code (Input.next_exn t.input) - Char.code '0')
1202 done;
1203 (* Validate: only whitespace and comments allowed before line break (MUS6) *)
1204 skip_whitespace_and_comment t;
1205+ if (not (Input.next_is_break t.input)) && not (Input.is_eof t.input) then
1206+ Error.raise_at (Input.mark t.input)
1207+ (Invalid_directive "expected comment or line break after version");
1208 let span = Span.make ~start ~stop:(Input.mark t.input) in
1209+ (Token.Version_directive { major = !major; minor = !minor }, span)
01210 | "TAG" ->
1211 (* Tag directive: %TAG !foo! tag:example.com,2000: *)
1212 let handle = scan_tag_handle t in
···1223 ignore (Input.next t.input);
1224 true
1225 | _ -> false
1226+ do
1227+ ()
1228+ done;
1229 let prefix = Buffer.contents prefix_buf in
1230 let span = Span.make ~start ~stop:(Input.mark t.input) in
1231+ (Token.Tag_directive { handle; prefix }, span)
01232 | _ ->
1233 (* Reserved/Unknown directive - skip to end of line and ignore *)
1234 (* Per YAML spec, reserved directives should be ignored with a warning *)
1235+ while (not (Input.is_eof t.input)) && not (Input.next_is_break t.input) do
1236 ignore (Input.next t.input)
1237 done;
1238 let span = Span.make ~start ~stop:(Input.mark t.input) in
1239 (* Return an empty tag directive token to indicate directive was processed but ignored *)
1240+ (Token.Tag_directive { handle = ""; prefix = "" }, span)
12411242(** Fetch the next token(s) into the queue *)
1243let rec fetch_next_token t =
···1251 (* We're about to process actual content, not leading whitespace *)
1252 t.leading_whitespace <- false;
12531254+ if Input.is_eof t.input then fetch_stream_end t
1255+ else if Input.at_document_boundary t.input then fetch_document_indicator t
001256 else begin
1257 match Input.peek t.input with
1258 | None -> fetch_stream_end t
1259+ | Some '%' when (Input.position t.input).column = 1 -> fetch_directive t
01260 | Some '[' -> fetch_flow_collection_start t Token.Flow_sequence_start
1261 | Some '{' -> fetch_flow_collection_start t Token.Flow_mapping_start
1262 | Some ']' -> fetch_flow_collection_end t Token.Flow_sequence_end
···1264 | Some ',' -> fetch_flow_entry t
1265 | Some '-' when t.flow_level = 0 && check_block_entry t ->
1266 fetch_block_entry t
1267+ | Some '?' when check_key t -> fetch_key t
1268+ | Some ':' when check_value t -> fetch_value t
001269 | Some '*' -> fetch_alias t
1270 | Some '&' -> fetch_anchor t
1271 | Some '!' -> fetch_tag t
···1273 | Some '>' when t.flow_level = 0 -> fetch_block_scalar t false
1274 | Some '\'' -> fetch_single_quoted t
1275 | Some '"' -> fetch_double_quoted t
1276+ | Some '-' when can_start_plain t -> fetch_plain_scalar t
1277+ | Some '?' when can_start_plain t -> fetch_plain_scalar t
1278+ | Some ':' when can_start_plain t -> fetch_plain_scalar t
1279+ | Some c when can_start_plain_char c t -> fetch_plain_scalar t
1280+ | Some c -> Error.raise_at (Input.mark t.input) (Unexpected_character c)
000001281 end
12821283and fetch_stream_end t =
···1298 let indicator = Input.peek_string t.input 3 in
1299 Input.skip t.input 3;
1300 let span = Span.make ~start ~stop:(Input.mark t.input) in
1301+ let token =
1302+ if indicator = "---" then Token.Document_start else Token.Document_end
1303+ in
1304 (* Reset document content flag after document end marker *)
1305 if indicator = "..." then begin
1306 t.document_has_content <- false;
1307 (* After document end marker, skip whitespace and check for end of line or comment *)
1308+ while Input.next_is_blank t.input do
1309+ ignore (Input.next t.input)
1310+ done;
1311+ match Input.peek t.input with
1312+ | None -> () (* EOF is ok *)
1313+ | Some c when Input.is_break c -> ()
1314+ | Some '#' -> () (* Comment is ok *)
1315+ | _ ->
1316+ Error.raise_at start
1317+ (Invalid_directive
1318+ "content not allowed after document end marker on same line")
1319 end;
1320 emit t span token
1321···1326 If we've emitted content in the current document, we need a document end marker first *)
1327 if t.document_has_content then
1328 Error.raise_at (Input.mark t.input)
1329+ (Unexpected_token
1330+ "directives must be separated from document content by document end \
1331+ marker (...)");
1332 unroll_indent t (-1);
1333 remove_simple_key t;
1334 t.allow_simple_key <- false;
···1338and fetch_flow_collection_start t token_type =
1339 save_simple_key t;
1340 (* Record indent of outermost flow collection *)
1341+ if t.flow_level = 0 then t.flow_indent <- column t;
01342 t.flow_level <- t.flow_level + 1;
1343 (* Track whether this is a mapping or sequence *)
1344+ let is_mapping = token_type = Token.Flow_mapping_start in
1345 t.flow_mapping_stack <- is_mapping :: t.flow_mapping_stack;
1346 t.allow_simple_key <- true;
1347 t.simple_keys <- None :: t.simple_keys;
···1354and fetch_flow_collection_end t token_type =
1355 remove_simple_key t;
1356 t.flow_level <- t.flow_level - 1;
1357+ t.flow_mapping_stack <-
1358+ (match t.flow_mapping_stack with _ :: rest -> rest | [] -> []);
1359 t.simple_keys <- (match t.simple_keys with _ :: rest -> rest | [] -> []);
1360 t.allow_simple_key <- false;
1361 let start = Input.mark t.input in
···1400 ignore (Input.next t.input);
14011402 (* Check for tabs after - : pattern like -\t- is invalid *)
1403+ let found_tabs, _found_spaces = skip_blanks_check_tabs t in
1404 if found_tabs then begin
1405 (* If we found tabs and next char is - followed by whitespace, error *)
1406 match Input.peek t.input with
1407+ | Some '-' -> (
1408+ match Input.peek_nth t.input 1 with
1409+ | None -> Error.raise_at start Tab_in_indentation
1410+ | Some c when Input.is_whitespace c ->
1411+ Error.raise_at start Tab_in_indentation
1412+ | Some _ -> ())
1413 | _ -> ()
1414 end;
1415···1419and check_key t =
1420 (* ? followed by whitespace or flow indicator in both block and flow *)
1421 match Input.peek_nth t.input 1 with
1422+ | None -> true
1423+ | Some c ->
1424+ Input.is_whitespace c || (t.flow_level > 0 && Input.is_flow_indicator c)
014251426and fetch_key t =
1427 if t.flow_level = 0 then begin
···1440 ignore (Input.next t.input);
14411442 (* Check for tabs after ? : pattern like ?\t- or ?\tkey is invalid *)
1443+ let found_tabs, _found_spaces = skip_blanks_check_tabs t in
1444 if found_tabs && t.flow_level = 0 then begin
1445 (* In block context, tabs after ? are not allowed *)
1446 Error.raise_at start Tab_in_indentation
···1453 (* : followed by whitespace in block, or whitespace/flow indicator in flow, or adjacent value *)
1454 match Input.peek_nth t.input 1 with
1455 | None -> true
1456+ | Some c -> (
1457+ Input.is_whitespace c
1458+ || (t.flow_level > 0 && Input.is_flow_indicator c)
1459+ ||
1460 (* Allow adjacent values in flow context at designated positions *)
1461+ t.flow_level > 0
1462+ &&
1463+ match t.adjacent_value_allowed_at with
1464+ | Some pos ->
1465+ pos.Position.line = (Input.position t.input).Position.line
1466+ && pos.Position.column = (Input.position t.input).Position.column
1467+ | None -> false)
14681469and fetch_value t =
1470 let start = Input.mark t.input in
···1474 | Some sk :: _ when sk.sk_possible ->
1475 (* In implicit flow mapping (inside a flow sequence), key and : must be on the same line.
1476 In explicit flow mapping { }, key and : can span lines. *)
1477+ let is_implicit_flow_mapping =
1478+ match t.flow_mapping_stack with
1479+ | false :: _ ->
1480+ true (* false = we're in a sequence, so any mapping is implicit *)
1481 | _ -> false
1482 in
1483+ if
1484+ is_implicit_flow_mapping
1485+ && sk.sk_position.line < (Input.position t.input).line
1486+ then Error.raise_at start Illegal_flow_key_line;
1487 (* Insert KEY token before the simple key value *)
1488 let key_span = Span.point sk.sk_position in
1489 let key_token = { Token.token = Token.Key; span = key_span } in
···1491 let tokens = Queue.to_seq t.tokens |> Array.of_seq in
1492 Queue.clear t.tokens;
1493 let insert_pos = sk.sk_token_number - t.tokens_taken in
1494+ Array.iteri
1495+ (fun i tok ->
1496+ if i = insert_pos then Queue.add key_token t.tokens;
1497+ Queue.add tok t.tokens)
1498+ tokens;
1499+ if insert_pos >= Array.length tokens then Queue.add key_token t.tokens;
1500 t.token_number <- t.token_number + 1;
1501 (* Roll indent for implicit block mapping *)
1502 if t.flow_level = 0 then begin
···1507 let bm_token = { Token.token = Token.Block_mapping_start; span } in
1508 let tokens = Queue.to_seq t.tokens |> Array.of_seq in
1509 Queue.clear t.tokens;
1510+ Array.iteri
1511+ (fun i tok ->
1512+ if i = insert_pos then Queue.add bm_token t.tokens;
1513+ Queue.add tok t.tokens)
1514+ tokens;
1515 if insert_pos >= Array.length tokens then
1516 Queue.add bm_token t.tokens;
1517 t.token_number <- t.token_number + 1
1518 end
1519 end;
1520+ t.simple_keys <- None :: List.tl t.simple_keys;
1521 true
1522 | _ ->
1523 (* No simple key - this is a complex value (or empty key) *)
···1537 remove_simple_key t;
1538 (* In block context without simple key, allow simple keys for compact mappings like ": moon: white"
1539 In flow context or after using a simple key, disallow simple keys *)
1540+ t.allow_simple_key <- (not used_simple_key) && t.flow_level = 0;
1541 t.document_has_content <- true;
1542 let start = Input.mark t.input in
1543 ignore (Input.next t.input);
15441545 (* Check for tabs after : : patterns like :\t- or :\tkey: are invalid in block context (Y79Y/09)
1546 However, :\t bar (tab followed by space then content) is valid (6BCT) *)
1547+ let found_tabs, found_spaces = skip_blanks_check_tabs t in
1548+ if found_tabs && (not found_spaces) && t.flow_level = 0 then begin
1549 (* In block context, tabs-only after : followed by indicator or alphanumeric are not allowed *)
1550 match Input.peek t.input with
1551+ | Some ('-' | '?') -> Error.raise_at start Tab_in_indentation
1552+ | Some c
1553+ when (c >= 'a' && c <= 'z')
1554+ || (c >= 'A' && c <= 'Z')
1555+ || (c >= '0' && c <= '9') ->
1556 (* Tab-only followed by alphanumeric - likely a key, which is invalid *)
1557 Error.raise_at start Tab_in_indentation
1558 | _ -> ()
···1569 t.allow_simple_key <- false;
1570 t.document_has_content <- true;
1571 let start = Input.mark t.input in
1572+ ignore (Input.next t.input);
1573+ (* consume * or & *)
1574 let name, span = scan_anchor_alias t in
1575 let span = Span.make ~start ~stop:span.stop in
1576 let token = if is_alias then Token.Alias name else Token.Anchor name in
···1615 match Input.peek_nth t.input 1 with
1616 | None -> false
1617 | Some c ->
1618+ (not (Input.is_whitespace c))
1619+ && (t.flow_level = 0 || not (Input.is_flow_indicator c))
16201621and can_start_plain_char c _t =
1622 (* Characters that can start a plain scalar *)
···1632 (* If the plain scalar ended after crossing a line break (leading_blanks = true),
1633 allow simple keys. This is important because the scanner already consumed the
1634 line break and leading whitespace when checking for continuation. *)
1635+ if ended_with_linebreak then t.allow_simple_key <- true;
01636 emit t span (Token.Scalar { style = `Plain; value })
16371638(** Check if we need more tokens to resolve simple keys *)
···1641 else if Queue.is_empty t.tokens then true
1642 else
1643 (* Check if any simple key could affect the first queued token *)
1644+ List.exists
1645+ (function
1646+ | Some sk when sk.sk_possible -> sk.sk_token_number >= t.tokens_taken
1647+ | _ -> false)
1648+ t.simple_keys
16491650(** Ensure we have enough tokens to return one safely *)
1651let ensure_tokens t =
···1662(** Get next token *)
1663let next t =
1664 ensure_tokens t;
1665+ if Queue.is_empty t.tokens then None
01666 else begin
1667 t.tokens_taken <- t.tokens_taken + 1;
1668 Some (Queue.pop t.tokens)
···1678 let rec loop () =
1679 match next t with
1680 | None -> ()
1681+ | Some tok ->
1682+ f tok;
1683+ loop ()
1684 in
1685 loop ()
16861687(** Fold over all tokens *)
1688let fold f init t =
1689 let rec loop acc =
1690+ match next t with None -> acc | Some tok -> loop (f acc tok)
001691 in
1692 loop init
16931694(** Convert to list *)
1695+let to_list t = fold (fun acc tok -> tok :: acc) [] t |> List.rev
0
+20-28
lib/sequence.ml
···13 members : 'a list;
14}
1516-let make
17- ?(anchor : string option)
18- ?(tag : string option)
19- ?(implicit = true)
20- ?(style = `Any)
21- members =
22 { anchor; tag; implicit; style; members }
2324let members t = t.members
···26let tag t = t.tag
27let implicit t = t.implicit
28let style t = t.style
29-30let with_anchor anchor t = { t with anchor = Some anchor }
31let with_tag tag t = { t with tag = Some tag }
32let with_style style t = { t with style }
33-34let map f t = { t with members = List.map f t.members }
35-36let length t = List.length t.members
37-38let is_empty t = t.members = []
39-40let nth t n = List.nth t.members n
41-42let nth_opt t n = List.nth_opt t.members n
43-44let iter f t = List.iter f t.members
45-46let fold f init t = List.fold_left f init t.members
4748let pp pp_elem fmt t =
···51 Option.iter (Format.fprintf fmt "tag=%s,@ ") t.tag;
52 Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style;
53 Format.fprintf fmt "members=[@,%a@]@,)"
54- (Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") pp_elem)
0055 t.members
5657let equal eq a b =
58- Option.equal String.equal a.anchor b.anchor &&
59- Option.equal String.equal a.tag b.tag &&
60- a.implicit = b.implicit &&
61- Layout_style.equal a.style b.style &&
62- List.equal eq a.members b.members
6364let compare cmp a b =
65 let c = Option.compare String.compare a.anchor b.anchor in
66- if c <> 0 then c else
67- let c = Option.compare String.compare a.tag b.tag in
68- if c <> 0 then c else
69- let c = Bool.compare a.implicit b.implicit in
70- if c <> 0 then c else
71- let c = Layout_style.compare a.style b.style in
72- if c <> 0 then c else
73- List.compare cmp a.members b.members
00
···13 members : 'a list;
14}
1516+let make ?(anchor : string option) ?(tag : string option) ?(implicit = true)
17+ ?(style = `Any) members =
000018 { anchor; tag; implicit; style; members }
1920let members t = t.members
···22let tag t = t.tag
23let implicit t = t.implicit
24let style t = t.style
025let with_anchor anchor t = { t with anchor = Some anchor }
26let with_tag tag t = { t with tag = Some tag }
27let with_style style t = { t with style }
028let map f t = { t with members = List.map f t.members }
029let length t = List.length t.members
030let is_empty t = t.members = []
031let nth t n = List.nth t.members n
032let nth_opt t n = List.nth_opt t.members n
033let iter f t = List.iter f t.members
034let fold f init t = List.fold_left f init t.members
3536let pp pp_elem fmt t =
···39 Option.iter (Format.fprintf fmt "tag=%s,@ ") t.tag;
40 Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style;
41 Format.fprintf fmt "members=[@,%a@]@,)"
42+ (Format.pp_print_list
43+ ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ")
44+ pp_elem)
45 t.members
4647let equal eq a b =
48+ Option.equal String.equal a.anchor b.anchor
49+ && Option.equal String.equal a.tag b.tag
50+ && a.implicit = b.implicit
51+ && Layout_style.equal a.style b.style
52+ && List.equal eq a.members b.members
5354let compare cmp a b =
55 let c = Option.compare String.compare a.anchor b.anchor in
56+ if c <> 0 then c
57+ else
58+ let c = Option.compare String.compare a.tag b.tag in
59+ if c <> 0 then c
60+ else
61+ let c = Bool.compare a.implicit b.implicit in
62+ if c <> 0 then c
63+ else
64+ let c = Layout_style.compare a.style b.style in
65+ if c <> 0 then c else List.compare cmp a.members b.members
+166-139
lib/serialize.ml
···1011(** {1 Internal Helpers} *)
1213-(** Emit a YAML node using an emit function.
14- This is the core implementation used by both Emitter.t and function-based APIs. *)
15let rec emit_yaml_node_impl ~emit (yaml : Yaml.t) =
16 match yaml with
17 | `Scalar s ->
18- emit (Event.Scalar {
19- anchor = Scalar.anchor s;
20- tag = Scalar.tag s;
21- value = Scalar.value s;
22- plain_implicit = Scalar.plain_implicit s;
23- quoted_implicit = Scalar.quoted_implicit s;
24- style = Scalar.style s;
25- })
26-27- | `Alias name ->
28- emit (Event.Alias { anchor = name })
29-30 | `A seq ->
31 let members = Sequence.members seq in
32 (* Force flow style for empty sequences *)
33 let style = if members = [] then `Flow else Sequence.style seq in
34- emit (Event.Sequence_start {
35- anchor = Sequence.anchor seq;
36- tag = Sequence.tag seq;
37- implicit = Sequence.implicit seq;
38- style;
39- });
0040 List.iter (emit_yaml_node_impl ~emit) members;
41 emit Event.Sequence_end
42-43 | `O map ->
44 let members = Mapping.members map in
45 (* Force flow style for empty mappings *)
46 let style = if members = [] then `Flow else Mapping.style map in
47- emit (Event.Mapping_start {
48- anchor = Mapping.anchor map;
49- tag = Mapping.tag map;
50- implicit = Mapping.implicit map;
51- style;
52- });
53- List.iter (fun (k, v) ->
54- emit_yaml_node_impl ~emit k;
55- emit_yaml_node_impl ~emit v
56- ) members;
00057 emit Event.Mapping_end
5859-(** Emit a Value node using an emit function.
60- This is the core implementation used by both Emitter.t and function-based APIs. *)
61let rec emit_value_node_impl ~emit ~config (value : Value.t) =
62 match value with
63 | `Null ->
64- emit (Event.Scalar {
65- anchor = None; tag = None;
66- value = "null";
67- plain_implicit = true; quoted_implicit = false;
68- style = `Plain;
69- })
70-00071 | `Bool b ->
72- emit (Event.Scalar {
73- anchor = None; tag = None;
74- value = if b then "true" else "false";
75- plain_implicit = true; quoted_implicit = false;
76- style = `Plain;
77- })
78-00079 | `Float f ->
80 let value =
81 match Float.classify_float f with
···84 | _ ->
85 if Float.is_integer f && Float.abs f < 1e15 then
86 Printf.sprintf "%.0f" f
87- else
88- Printf.sprintf "%g" f
89 in
90- emit (Event.Scalar {
91- anchor = None; tag = None;
92- value;
93- plain_implicit = true; quoted_implicit = false;
94- style = `Plain;
95- })
96-00097 | `String s ->
98 let style = Quoting.choose_style s in
99- emit (Event.Scalar {
100- anchor = None; tag = None;
101- value = s;
102- plain_implicit = style = `Plain;
103- quoted_implicit = style <> `Plain;
104- style;
105- })
106-00107 | `A items ->
108 (* Force flow style for empty sequences, otherwise use config *)
109 let style =
110 if items = [] || config.Emitter.layout_style = `Flow then `Flow else `Block
111 in
112- emit (Event.Sequence_start {
113- anchor = None; tag = None;
114- implicit = true;
115- style;
116- });
117 List.iter (emit_value_node_impl ~emit ~config) items;
118 emit Event.Sequence_end
119-120 | `O pairs ->
121 (* Force flow style for empty mappings, otherwise use config *)
122 let style =
123 if pairs = [] || config.Emitter.layout_style = `Flow then `Flow else `Block
124 in
125- emit (Event.Mapping_start {
126- anchor = None; tag = None;
127- implicit = true;
128- style;
129- });
130- List.iter (fun (k, v) ->
131- let style = Quoting.choose_style k in
132- emit (Event.Scalar {
133- anchor = None; tag = None;
134- value = k;
135- plain_implicit = style = `Plain;
136- quoted_implicit = style <> `Plain;
137- style;
138- });
139- emit_value_node_impl ~emit ~config v
140- ) pairs;
00141 emit Event.Mapping_end
142143(** Strip anchors from a YAML tree (used when resolving aliases for output) *)
···146 | `Scalar s ->
147 if Option.is_none (Scalar.anchor s) then yaml
148 else
149- `Scalar (Scalar.make
150- ?tag:(Scalar.tag s)
151- ~plain_implicit:(Scalar.plain_implicit s)
152- ~quoted_implicit:(Scalar.quoted_implicit s)
153- ~style:(Scalar.style s)
154- (Scalar.value s))
155 | `Alias _ -> yaml
156 | `A seq ->
157- `A (Sequence.make
158- ?tag:(Sequence.tag seq)
159- ~implicit:(Sequence.implicit seq)
160- ~style:(Sequence.style seq)
161- (List.map strip_anchors (Sequence.members seq)))
162 | `O map ->
163- `O (Mapping.make
164- ?tag:(Mapping.tag map)
165- ~implicit:(Mapping.implicit map)
166- ~style:(Mapping.style map)
167- (List.map (fun (k, v) -> (strip_anchors k, strip_anchors v)) (Mapping.members map)))
0168169(** Emit a document using an emit function *)
170let emit_document_impl ?(resolve_aliases = true) ~emit doc =
171- emit (Event.Document_start {
172- version = Document.version doc;
173- implicit = Document.implicit_start doc;
174- });
00175 (match Document.root doc with
176- | Some yaml ->
177- let yaml = if resolve_aliases then
178- yaml |> Yaml.resolve_aliases |> strip_anchors
179- else yaml in
180- emit_yaml_node_impl ~emit yaml
181- | None ->
182- emit (Event.Scalar {
183- anchor = None; tag = None;
184- value = "";
185- plain_implicit = true; quoted_implicit = false;
186- style = `Plain;
187- }));
00000188 emit (Event.Document_end { implicit = Document.implicit_end doc })
189190(** {1 Emitter.t-based API} *)
191192(** Emit a YAML node to an emitter *)
193-let emit_yaml_node t yaml =
194- emit_yaml_node_impl ~emit:(Emitter.emit t) yaml
195196(** Emit a complete YAML document to an emitter *)
197let emit_yaml t yaml =
···249(** Serialize documents to a buffer.
250251 @param config Emitter configuration (default: {!Emitter.default_config})
252- @param resolve_aliases Whether to resolve aliases before emission (default: true)
0253 @param buffer Optional buffer to append to; creates new one if not provided
254 @return The buffer containing serialized YAML *)
255-let documents_to_buffer ?(config = Emitter.default_config) ?(resolve_aliases = true) ?buffer documents =
0256 let buf = Option.value buffer ~default:(Buffer.create 1024) in
257 let t = Emitter.create ~config () in
258 Emitter.emit t (Event.Stream_start { encoding = config.encoding });
···278(** Serialize documents to a string.
279280 @param config Emitter configuration (default: {!Emitter.default_config})
281- @param resolve_aliases Whether to resolve aliases before emission (default: true) *)
282-let documents_to_string ?(config = Emitter.default_config) ?(resolve_aliases = true) documents =
00283 Buffer.contents (documents_to_buffer ~config ~resolve_aliases documents)
284285(** {1 Writer-based API}
286287- These functions write directly to a bytesrw [Bytes.Writer.t],
288- enabling true streaming output without intermediate string allocation.
289- Uses the emitter's native Writer support for efficiency. *)
290291(** Serialize a Value directly to a Bytes.Writer.
292293 @param config Emitter configuration (default: {!Emitter.default_config})
294- @param eod Whether to write end-of-data after serialization (default: true) *)
295-let value_to_writer ?(config = Emitter.default_config) ?(eod = true) writer value =
00296 let t = Emitter.of_writer ~config writer in
297 emit_value t value;
298 if eod then Emitter.flush t
···300(** Serialize a Yaml.t directly to a Bytes.Writer.
301302 @param config Emitter configuration (default: {!Emitter.default_config})
303- @param eod Whether to write end-of-data after serialization (default: true) *)
304-let yaml_to_writer ?(config = Emitter.default_config) ?(eod = true) writer yaml =
00305 let t = Emitter.of_writer ~config writer in
306 emit_yaml t yaml;
307 if eod then Emitter.flush t
···309(** Serialize documents directly to a Bytes.Writer.
310311 @param config Emitter configuration (default: {!Emitter.default_config})
312- @param resolve_aliases Whether to resolve aliases before emission (default: true)
313- @param eod Whether to write end-of-data after serialization (default: true) *)
314-let documents_to_writer ?(config = Emitter.default_config) ?(resolve_aliases = true) ?(eod = true) writer documents =
000315 let t = Emitter.of_writer ~config writer in
316 Emitter.emit t (Event.Stream_start { encoding = config.encoding });
317 List.iter (emit_document ~resolve_aliases t) documents;
···320321(** {1 Function-based API}
322323- These functions accept an emit function [Event.t -> unit] instead of
324- an {!Emitter.t}, allowing them to work with any event sink
325- (e.g., streaming writers, custom processors). *)
326327(** Emit a YAML node using an emitter function *)
328-let emit_yaml_node_fn ~emitter yaml =
329- emit_yaml_node_impl ~emit:emitter yaml
330331(** Emit a complete YAML stream using an emitter function *)
332let emit_yaml_fn ~emitter ~config yaml =
···1011(** {1 Internal Helpers} *)
1213+(** Emit a YAML node using an emit function. This is the core implementation
14+ used by both Emitter.t and function-based APIs. *)
15let rec emit_yaml_node_impl ~emit (yaml : Yaml.t) =
16 match yaml with
17 | `Scalar s ->
18+ emit
19+ (Event.Scalar
20+ {
21+ anchor = Scalar.anchor s;
22+ tag = Scalar.tag s;
23+ value = Scalar.value s;
24+ plain_implicit = Scalar.plain_implicit s;
25+ quoted_implicit = Scalar.quoted_implicit s;
26+ style = Scalar.style s;
27+ })
28+ | `Alias name -> emit (Event.Alias { anchor = name })
029 | `A seq ->
30 let members = Sequence.members seq in
31 (* Force flow style for empty sequences *)
32 let style = if members = [] then `Flow else Sequence.style seq in
33+ emit
34+ (Event.Sequence_start
35+ {
36+ anchor = Sequence.anchor seq;
37+ tag = Sequence.tag seq;
38+ implicit = Sequence.implicit seq;
39+ style;
40+ });
41 List.iter (emit_yaml_node_impl ~emit) members;
42 emit Event.Sequence_end
043 | `O map ->
44 let members = Mapping.members map in
45 (* Force flow style for empty mappings *)
46 let style = if members = [] then `Flow else Mapping.style map in
47+ emit
48+ (Event.Mapping_start
49+ {
50+ anchor = Mapping.anchor map;
51+ tag = Mapping.tag map;
52+ implicit = Mapping.implicit map;
53+ style;
54+ });
55+ List.iter
56+ (fun (k, v) ->
57+ emit_yaml_node_impl ~emit k;
58+ emit_yaml_node_impl ~emit v)
59+ members;
60 emit Event.Mapping_end
6162+(** Emit a Value node using an emit function. This is the core implementation
63+ used by both Emitter.t and function-based APIs. *)
64let rec emit_value_node_impl ~emit ~config (value : Value.t) =
65 match value with
66 | `Null ->
67+ emit
68+ (Event.Scalar
69+ {
70+ anchor = None;
71+ tag = None;
72+ value = "null";
73+ plain_implicit = true;
74+ quoted_implicit = false;
75+ style = `Plain;
76+ })
77 | `Bool b ->
78+ emit
79+ (Event.Scalar
80+ {
81+ anchor = None;
82+ tag = None;
83+ value = (if b then "true" else "false");
84+ plain_implicit = true;
85+ quoted_implicit = false;
86+ style = `Plain;
87+ })
88 | `Float f ->
89 let value =
90 match Float.classify_float f with
···93 | _ ->
94 if Float.is_integer f && Float.abs f < 1e15 then
95 Printf.sprintf "%.0f" f
96+ else Printf.sprintf "%g" f
097 in
98+ emit
99+ (Event.Scalar
100+ {
101+ anchor = None;
102+ tag = None;
103+ value;
104+ plain_implicit = true;
105+ quoted_implicit = false;
106+ style = `Plain;
107+ })
108 | `String s ->
109 let style = Quoting.choose_style s in
110+ emit
111+ (Event.Scalar
112+ {
113+ anchor = None;
114+ tag = None;
115+ value = s;
116+ plain_implicit = style = `Plain;
117+ quoted_implicit = style <> `Plain;
118+ style;
119+ })
120 | `A items ->
121 (* Force flow style for empty sequences, otherwise use config *)
122 let style =
123 if items = [] || config.Emitter.layout_style = `Flow then `Flow else `Block
124 in
125+ emit
126+ (Event.Sequence_start
127+ { anchor = None; tag = None; implicit = true; style });
00128 List.iter (emit_value_node_impl ~emit ~config) items;
129 emit Event.Sequence_end
0130 | `O pairs ->
131 (* Force flow style for empty mappings, otherwise use config *)
132 let style =
133 if pairs = [] || config.Emitter.layout_style = `Flow then `Flow else `Block
134 in
135+ emit
136+ (Event.Mapping_start
137+ { anchor = None; tag = None; implicit = true; style });
138+ List.iter
139+ (fun (k, v) ->
140+ let style = Quoting.choose_style k in
141+ emit
142+ (Event.Scalar
143+ {
144+ anchor = None;
145+ tag = None;
146+ value = k;
147+ plain_implicit = style = `Plain;
148+ quoted_implicit = style <> `Plain;
149+ style;
150+ });
151+ emit_value_node_impl ~emit ~config v)
152+ pairs;
153 emit Event.Mapping_end
154155(** Strip anchors from a YAML tree (used when resolving aliases for output) *)
···158 | `Scalar s ->
159 if Option.is_none (Scalar.anchor s) then yaml
160 else
161+ `Scalar
162+ (Scalar.make ?tag:(Scalar.tag s)
163+ ~plain_implicit:(Scalar.plain_implicit s)
164+ ~quoted_implicit:(Scalar.quoted_implicit s) ~style:(Scalar.style s)
165+ (Scalar.value s))
0166 | `Alias _ -> yaml
167 | `A seq ->
168+ `A
169+ (Sequence.make ?tag:(Sequence.tag seq) ~implicit:(Sequence.implicit seq)
170+ ~style:(Sequence.style seq)
171+ (List.map strip_anchors (Sequence.members seq)))
0172 | `O map ->
173+ `O
174+ (Mapping.make ?tag:(Mapping.tag map) ~implicit:(Mapping.implicit map)
175+ ~style:(Mapping.style map)
176+ (List.map
177+ (fun (k, v) -> (strip_anchors k, strip_anchors v))
178+ (Mapping.members map)))
179180(** Emit a document using an emit function *)
181let emit_document_impl ?(resolve_aliases = true) ~emit doc =
182+ emit
183+ (Event.Document_start
184+ {
185+ version = Document.version doc;
186+ implicit = Document.implicit_start doc;
187+ });
188 (match Document.root doc with
189+ | Some yaml ->
190+ let yaml =
191+ if resolve_aliases then yaml |> Yaml.resolve_aliases |> strip_anchors
192+ else yaml
193+ in
194+ emit_yaml_node_impl ~emit yaml
195+ | None ->
196+ emit
197+ (Event.Scalar
198+ {
199+ anchor = None;
200+ tag = None;
201+ value = "";
202+ plain_implicit = true;
203+ quoted_implicit = false;
204+ style = `Plain;
205+ }));
206 emit (Event.Document_end { implicit = Document.implicit_end doc })
207208(** {1 Emitter.t-based API} *)
209210(** Emit a YAML node to an emitter *)
211+let emit_yaml_node t yaml = emit_yaml_node_impl ~emit:(Emitter.emit t) yaml
0212213(** Emit a complete YAML document to an emitter *)
214let emit_yaml t yaml =
···266(** Serialize documents to a buffer.
267268 @param config Emitter configuration (default: {!Emitter.default_config})
269+ @param resolve_aliases
270+ Whether to resolve aliases before emission (default: true)
271 @param buffer Optional buffer to append to; creates new one if not provided
272 @return The buffer containing serialized YAML *)
273+let documents_to_buffer ?(config = Emitter.default_config)
274+ ?(resolve_aliases = true) ?buffer documents =
275 let buf = Option.value buffer ~default:(Buffer.create 1024) in
276 let t = Emitter.create ~config () in
277 Emitter.emit t (Event.Stream_start { encoding = config.encoding });
···297(** Serialize documents to a string.
298299 @param config Emitter configuration (default: {!Emitter.default_config})
300+ @param resolve_aliases
301+ Whether to resolve aliases before emission (default: true) *)
302+let documents_to_string ?(config = Emitter.default_config)
303+ ?(resolve_aliases = true) documents =
304 Buffer.contents (documents_to_buffer ~config ~resolve_aliases documents)
305306(** {1 Writer-based API}
307308+ These functions write directly to a bytesrw [Bytes.Writer.t], enabling true
309+ streaming output without intermediate string allocation. Uses the emitter's
310+ native Writer support for efficiency. *)
311312(** Serialize a Value directly to a Bytes.Writer.
313314 @param config Emitter configuration (default: {!Emitter.default_config})
315+ @param eod Whether to write end-of-data after serialization (default: true)
316+*)
317+let value_to_writer ?(config = Emitter.default_config) ?(eod = true) writer
318+ value =
319 let t = Emitter.of_writer ~config writer in
320 emit_value t value;
321 if eod then Emitter.flush t
···323(** Serialize a Yaml.t directly to a Bytes.Writer.
324325 @param config Emitter configuration (default: {!Emitter.default_config})
326+ @param eod Whether to write end-of-data after serialization (default: true)
327+*)
328+let yaml_to_writer ?(config = Emitter.default_config) ?(eod = true) writer yaml
329+ =
330 let t = Emitter.of_writer ~config writer in
331 emit_yaml t yaml;
332 if eod then Emitter.flush t
···334(** Serialize documents directly to a Bytes.Writer.
335336 @param config Emitter configuration (default: {!Emitter.default_config})
337+ @param resolve_aliases
338+ Whether to resolve aliases before emission (default: true)
339+ @param eod Whether to write end-of-data after serialization (default: true)
340+*)
341+let documents_to_writer ?(config = Emitter.default_config)
342+ ?(resolve_aliases = true) ?(eod = true) writer documents =
343 let t = Emitter.of_writer ~config writer in
344 Emitter.emit t (Event.Stream_start { encoding = config.encoding });
345 List.iter (emit_document ~resolve_aliases t) documents;
···348349(** {1 Function-based API}
350351+ These functions accept an emit function [Event.t -> unit] instead of an
352+ {!Emitter.t}, allowing them to work with any event sink (e.g., streaming
353+ writers, custom processors). *)
354355(** Emit a YAML node using an emitter function *)
356+let emit_yaml_node_fn ~emitter yaml = emit_yaml_node_impl ~emit:emitter yaml
0357358(** Emit a complete YAML stream using an emitter function *)
359let emit_yaml_fn ~emitter ~config yaml =
+10-16
lib/span.ml
···56(** Source spans representing ranges in input *)
78-type t = {
9- start : Position.t;
10- stop : Position.t;
11-}
1213let make ~start ~stop = { start; stop }
14-15let point pos = { start = pos; stop = pos }
1617let merge a b =
18- let start = if Position.compare a.start b.start <= 0 then a.start else b.start in
0019 let stop = if Position.compare a.stop b.stop >= 0 then a.stop else b.stop in
20 { start; stop }
2122-let extend span pos =
23- { span with stop = pos }
2425let pp fmt t =
26 if t.start.line = t.stop.line then
27- Format.fprintf fmt "line %d, columns %d-%d"
28- t.start.line t.start.column t.stop.column
29- else
30- Format.fprintf fmt "lines %d-%d" t.start.line t.stop.line
3132-let to_string t =
33- Format.asprintf "%a" pp t
3435let compare a b =
36 let c = Position.compare a.start b.start in
37 if c <> 0 then c else Position.compare a.stop b.stop
3839-let equal a b =
40- Position.equal a.start b.start && Position.equal a.stop b.stop
···56(** Source spans representing ranges in input *)
78+type t = { start : Position.t; stop : Position.t }
000910let make ~start ~stop = { start; stop }
011let point pos = { start = pos; stop = pos }
1213let merge a b =
14+ let start =
15+ if Position.compare a.start b.start <= 0 then a.start else b.start
16+ in
17 let stop = if Position.compare a.stop b.stop >= 0 then a.stop else b.stop in
18 { start; stop }
1920+let extend span pos = { span with stop = pos }
02122let pp fmt t =
23 if t.start.line = t.stop.line then
24+ Format.fprintf fmt "line %d, columns %d-%d" t.start.line t.start.column
25+ t.stop.column
26+ else Format.fprintf fmt "lines %d-%d" t.start.line t.stop.line
02728+let to_string t = Format.asprintf "%a" pp t
02930let compare a b =
31 let c = Position.compare a.start b.start in
32 if c <> 0 then c else Position.compare a.stop b.stop
3334+let equal a b = Position.equal a.start b.start && Position.equal a.stop b.stop
0
+12-14
lib/tag.ml
···18 | 0 -> None
19 | _ when s.[0] <> '!' -> None
20 | 1 -> Some { handle = "!"; suffix = "" }
21- | _ ->
22 match s.[1] with
23- | '!' -> (* !! handle *)
024 Some { handle = "!!"; suffix = String.sub s 2 (len - 2) }
25- | '<' -> (* Verbatim tag !<...> *)
026 if len > 2 && s.[len - 1] = '>' then
27 Some { handle = "!"; suffix = String.sub s 2 (len - 3) }
28- else
29- None
30- | _ -> (* Primary handle or local tag *)
31- Some { handle = "!"; suffix = String.sub s 1 (len - 1) }
3233let to_string t =
34- if t.handle = "!" && t.suffix = "" then "!"
35- else t.handle ^ t.suffix
3637let to_uri t =
38 match t.handle with
···40 | "!" -> "!" ^ t.suffix
41 | h -> h ^ t.suffix
4243-let pp fmt t =
44- Format.pp_print_string fmt (to_string t)
45-46-let equal a b =
47- String.equal a.handle b.handle && String.equal a.suffix b.suffix
4849let compare a b =
50 let c = String.compare a.handle b.handle in
···18 | 0 -> None
19 | _ when s.[0] <> '!' -> None
20 | 1 -> Some { handle = "!"; suffix = "" }
21+ | _ -> (
22 match s.[1] with
23+ | '!' ->
24+ (* !! handle *)
25 Some { handle = "!!"; suffix = String.sub s 2 (len - 2) }
26+ | '<' ->
27+ (* Verbatim tag !<...> *)
28 if len > 2 && s.[len - 1] = '>' then
29 Some { handle = "!"; suffix = String.sub s 2 (len - 3) }
30+ else None
31+ | _ ->
32+ (* Primary handle or local tag *)
33+ Some { handle = "!"; suffix = String.sub s 1 (len - 1) })
3435let to_string t =
36+ if t.handle = "!" && t.suffix = "" then "!" else t.handle ^ t.suffix
03738let to_uri t =
39 match t.handle with
···41 | "!" -> "!" ^ t.suffix
42 | h -> h ^ t.suffix
4344+let pp fmt t = Format.pp_print_string fmt (to_string t)
45+let equal a b = String.equal a.handle b.handle && String.equal a.suffix b.suffix
0004647let compare a b =
48 let c = String.compare a.handle b.handle in
···56(** Yamlrw Unix - Channel and file I/O for YAML
78- This module provides channel and file operations for parsing
9- and emitting YAML using bytesrw for efficient streaming I/O. *)
1011(** {1 Types} *)
12···76(** {1 File Input} *)
7778val value_of_file :
79- ?resolve_aliases:bool ->
80- ?max_nodes:int ->
81- ?max_depth:int ->
82- string ->
83- value
84(** Parse a JSON-compatible value from a file. *)
8586val yaml_of_file :
87- ?resolve_aliases:bool ->
88- ?max_nodes:int ->
89- ?max_depth:int ->
90- string ->
91- yaml
92(** Parse a full YAML value from a file. *)
9394val documents_of_file : string -> document list
···56(** Yamlrw Unix - Channel and file I/O for YAML
78+ This module provides channel and file operations for parsing and emitting
9+ YAML using bytesrw for efficient streaming I/O. *)
1011(** {1 Types} *)
12···76(** {1 File Input} *)
7778val value_of_file :
79+ ?resolve_aliases:bool -> ?max_nodes:int -> ?max_depth:int -> string -> value
000080(** Parse a JSON-compatible value from a file. *)
8182val yaml_of_file :
83+ ?resolve_aliases:bool -> ?max_nodes:int -> ?max_depth:int -> string -> yaml
000084(** Parse a full YAML value from a file. *)
8586val documents_of_file : string -> document list
+16-20
lib/value.ml
···56(** JSON-compatible YAML value representation *)
78-type t = [
9- | `Null
10 | `Bool of bool
11 | `Float of float
12 | `String of string
13 | `A of t list
14- | `O of (string * t) list
15-]
1617(* Type equality is ensured by structural compatibility with Yamlrw.value *)
18···23let int n : t = `Float (Float.of_int n)
24let float f : t = `Float f
25let string s : t = `String s
26-27let list f xs : t = `A (List.map f xs)
28let obj pairs : t = `O pairs
29···72 | `O pairs -> List.exists (fun (k, _) -> k = key) pairs
73 | _ -> false
7475-let find key = function
76- | `O pairs -> List.assoc_opt key pairs
77- | _ -> None
7879let get key v =
80- match find key v with
81- | Some v -> v
82- | None -> Error.raise (Key_not_found key)
8384let keys = function
85 | `O pairs -> List.map fst pairs
···92(** Combinators *)
9394let combine v1 v2 =
95- match v1, v2 with
96 | `O o1, `O o2 -> `O (o1 @ o2)
97 | v1, _ -> Error.raise (Type_mismatch ("object", type_name v1))
98···113 | `Float f ->
114 if Float.is_integer f && Float.abs f < 1e15 then
115 Format.fprintf fmt "%.0f" f
116- else
117- Format.fprintf fmt "%g" f
118 | `String s -> Format.fprintf fmt "%S" s
119 | `A [] -> Format.pp_print_string fmt "[]"
120 | `A items ->
121 Format.fprintf fmt "@[<hv 2>[@,%a@]@,]"
122- (Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") pp)
00123 items
124 | `O [] -> Format.pp_print_string fmt "{}"
125 | `O pairs ->
126 Format.fprintf fmt "@[<hv 2>{@,%a@]@,}"
127- (Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ")
0128 (fun fmt (k, v) -> Format.fprintf fmt "@[<hv 2>%S:@ %a@]" k pp v))
129 pairs
130131(** Equality and comparison *)
132133let rec equal (a : t) (b : t) =
134- match a, b with
135 | `Null, `Null -> true
136 | `Bool a, `Bool b -> a = b
137 | `Float a, `Float b -> Float.equal a b
138 | `String a, `String b -> String.equal a b
139 | `A a, `A b -> List.equal equal a b
140 | `O a, `O b ->
141- List.length a = List.length b &&
142- List.for_all2 (fun (k1, v1) (k2, v2) -> k1 = k2 && equal v1 v2) a b
143 | _ -> false
144145let rec compare (a : t) (b : t) =
146- match a, b with
147 | `Null, `Null -> 0
148 | `Null, _ -> -1
149 | _, `Null -> 1
···56(** JSON-compatible YAML value representation *)
78+type t =
9+ [ `Null
10 | `Bool of bool
11 | `Float of float
12 | `String of string
13 | `A of t list
14+ | `O of (string * t) list ]
01516(* Type equality is ensured by structural compatibility with Yamlrw.value *)
17···22let int n : t = `Float (Float.of_int n)
23let float f : t = `Float f
24let string s : t = `String s
025let list f xs : t = `A (List.map f xs)
26let obj pairs : t = `O pairs
27···70 | `O pairs -> List.exists (fun (k, _) -> k = key) pairs
71 | _ -> false
7273+let find key = function `O pairs -> List.assoc_opt key pairs | _ -> None
007475let get key v =
76+ match find key v with Some v -> v | None -> Error.raise (Key_not_found key)
007778let keys = function
79 | `O pairs -> List.map fst pairs
···86(** Combinators *)
8788let combine v1 v2 =
89+ match (v1, v2) with
90 | `O o1, `O o2 -> `O (o1 @ o2)
91 | v1, _ -> Error.raise (Type_mismatch ("object", type_name v1))
92···107 | `Float f ->
108 if Float.is_integer f && Float.abs f < 1e15 then
109 Format.fprintf fmt "%.0f" f
110+ else Format.fprintf fmt "%g" f
0111 | `String s -> Format.fprintf fmt "%S" s
112 | `A [] -> Format.pp_print_string fmt "[]"
113 | `A items ->
114 Format.fprintf fmt "@[<hv 2>[@,%a@]@,]"
115+ (Format.pp_print_list
116+ ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ")
117+ pp)
118 items
119 | `O [] -> Format.pp_print_string fmt "{}"
120 | `O pairs ->
121 Format.fprintf fmt "@[<hv 2>{@,%a@]@,}"
122+ (Format.pp_print_list
123+ ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ")
124 (fun fmt (k, v) -> Format.fprintf fmt "@[<hv 2>%S:@ %a@]" k pp v))
125 pairs
126127(** Equality and comparison *)
128129let rec equal (a : t) (b : t) =
130+ match (a, b) with
131 | `Null, `Null -> true
132 | `Bool a, `Bool b -> a = b
133 | `Float a, `Float b -> Float.equal a b
134 | `String a, `String b -> String.equal a b
135 | `A a, `A b -> List.equal equal a b
136 | `O a, `O b ->
137+ List.length a = List.length b
138+ && List.for_all2 (fun (k1, v1) (k2, v2) -> k1 = k2 && equal v1 v2) a b
139 | _ -> false
140141let rec compare (a : t) (b : t) =
142+ match (a, b) with
143 | `Null, `Null -> 0
144 | `Null, _ -> -1
145 | _, `Null -> 1
+115-102
lib/yaml.ml
···56(** Full YAML representation with anchors, tags, and aliases *)
78-type t = [
9- | `Scalar of Scalar.t
10 | `Alias of string
11 | `A of t Sequence.t
12- | `O of (t, t) Mapping.t
13-]
1415(** Pretty printing *)
16···24(** Equality *)
2526let rec equal (a : t) (b : t) =
27- match a, b with
28 | `Scalar a, `Scalar b -> Scalar.equal a b
29 | `Alias a, `Alias b -> String.equal a b
30 | `A a, `A b -> Sequence.equal equal a b
···40 | `Bool false -> `Scalar (Scalar.make "false")
41 | `Float f ->
42 let s =
43- if Float.is_integer f && Float.abs f < 1e15 then
44- Printf.sprintf "%.0f" f
45- else
46- Printf.sprintf "%g" f
47 in
48 `Scalar (Scalar.make s)
49- | `String s ->
50- `Scalar (Scalar.make s ~style:`Double_quoted)
51- | `A items ->
52- `A (Sequence.make (List.map of_value items))
53 | `O pairs ->
54- `O (Mapping.make (List.map (fun (k, v) ->
55- (`Scalar (Scalar.make k), of_value v)
56- ) pairs))
005758-(** Default limits for alias expansion (protection against billion laughs attack) *)
059let default_max_alias_nodes = 10_000_000
060let default_max_alias_depth = 100
6162(** Resolve aliases by replacing them with referenced nodes.
6364- Processes the tree in document order so that aliases resolve to the
65- anchor value that was defined at the point the alias was encountered.
6667- See {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2
68- (Anchors and Aliases)} of the YAML 1.2.2 specification for details on
69- how anchors and aliases work in YAML.
07071- This implements protection against the "billion laughs attack"
72- (see {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)})
73- by limiting both the total number of nodes and the nesting depth during expansion.
07475- @param max_nodes Maximum number of nodes to create during expansion (default 10M)
76- @param max_depth Maximum depth of alias-within-alias resolution (default 100)
77- @raise Error.Yamlrw_error with {!type:Error.kind} [Alias_expansion_node_limit] if max_nodes is exceeded
78- @raise Error.Yamlrw_error with {!type:Error.kind} [Alias_expansion_depth_limit] if max_depth is exceeded
79-*)
80-let resolve_aliases ?(max_nodes = default_max_alias_nodes) ?(max_depth = default_max_alias_depth) (root : t) : t =
00000081 let anchors = Hashtbl.create 16 in
82 let node_count = ref 0 in
83···103 need expansion if it was registered before those anchors existed *)
104 resolve ~depth:(depth + 1) target
105 | None -> Error.raise (Undefined_alias name)
106-107 (* Single pass: process in document order, registering anchors and resolving aliases *)
108 and resolve ~depth (v : t) : t =
109 check_node_limit ();
···112 (* Register anchor after we have the resolved node *)
113 Option.iter (fun name -> register_anchor name v) (Scalar.anchor s);
114 v
115- | `Alias name ->
116- expand_alias ~depth name
117 | `A seq ->
118 (* First resolve all members in order *)
119- let resolved_members = List.map (resolve ~depth) (Sequence.members seq) in
120- let resolved = `A (Sequence.make
121- ?anchor:(Sequence.anchor seq)
122- ?tag:(Sequence.tag seq)
123- ~implicit:(Sequence.implicit seq)
124- ~style:(Sequence.style seq)
125- resolved_members) in
00126 (* Register anchor with resolved node *)
127 Option.iter (fun name -> register_anchor name resolved) (Sequence.anchor seq);
128 resolved
129 | `O map ->
130 (* Process key-value pairs in document order *)
131- let resolved_pairs = List.map (fun (k, v) ->
132- let resolved_k = resolve ~depth k in
133- let resolved_v = resolve ~depth v in
134- (resolved_k, resolved_v)
135- ) (Mapping.members map) in
136- let resolved = `O (Mapping.make
137- ?anchor:(Mapping.anchor map)
138- ?tag:(Mapping.tag map)
139- ~implicit:(Mapping.implicit map)
140- ~style:(Mapping.style map)
141- resolved_pairs) in
000142 (* Register anchor with resolved node *)
143 Option.iter (fun name -> register_anchor name resolved) (Mapping.anchor map);
144 resolved
···153154 (* If explicitly tagged, respect the tag *)
155 match tag with
156- | Some "tag:yaml.org,2002:null" | Some "!!null" ->
157- `Null
158- | Some "tag:yaml.org,2002:bool" | Some "!!bool" ->
159- (match String.lowercase_ascii value with
160- | "true" | "yes" | "on" -> `Bool true
161- | "false" | "no" | "off" -> `Bool false
162- | _ -> Error.raise (Invalid_scalar_conversion (value, "bool")))
163- | Some "tag:yaml.org,2002:int" | Some "!!int" ->
164- (try `Float (Float.of_string value)
165- with _ -> Error.raise (Invalid_scalar_conversion (value, "int")))
166- | Some "tag:yaml.org,2002:float" | Some "!!float" ->
167- (try `Float (Float.of_string value)
168- with _ -> Error.raise (Invalid_scalar_conversion (value, "float")))
169- | Some "tag:yaml.org,2002:str" | Some "!!str" ->
170- `String value
171 | Some _ ->
172 (* Unknown tag - treat as string *)
173 `String value
174 | None ->
175 (* Implicit type resolution for plain scalars *)
176- if style <> `Plain then
177- `String value
178- else
179- infer_scalar_type value
180181(** Infer type from plain scalar value *)
182and infer_scalar_type value =
···208 else if (first = '-' || first = '+') && len >= 2 then
209 let second = value.[1] in
210 (* After sign, must be digit or dot-digit (for +.5, -.5) *)
211- second >= '0' && second <= '9' ||
212- (second = '.' && len >= 3 && value.[2] >= '0' && value.[2] <= '9')
213 else false
214 in
215 (* Try integer/float *)
···231 | _ ->
232 (* Decimal with leading zero or octal in YAML 1.1 *)
233 Some (`Float (Float.of_string value))
234- else
235- Some (`Float (Float.of_string value))
236 with _ -> None
237 else None
238 in
···244 OCaml accepts "nan", "inf", "infinity" which are NOT valid YAML floats.
245 YAML requires the leading dot: .nan, .inf, -.inf
246 See: https://github.com/avsm/ocaml-yaml/issues/82 *)
247- if String.length value >= 2 && value.[0] = '.' &&
248- value.[1] >= '0' && value.[1] <= '9' then
249- try `Float (Float.of_string value)
250- with _ -> `String value
251- else
252- `String value
0253254(** Convert to JSON-compatible Value.
255256- Converts a full YAML representation to a simplified JSON-compatible value type.
257- This process implements the representation graph to serialization tree conversion
258- described in {{:https://yaml.org/spec/1.2.2/#32-processes}Section 3.2 (Processes)}
259- of the YAML 1.2.2 specification.
0260261- See also {{:https://yaml.org/spec/1.2.2/#10212-json-schema}Section 10.2.1.2
262- (JSON Schema)} for the tag resolution used during conversion.
0263264- @param resolve_aliases_first Whether to resolve aliases before conversion (default true)
0265 @param max_nodes Maximum nodes during alias expansion (default 10M)
266 @param max_depth Maximum alias nesting depth (default 100)
267- @raise Error.Yamlrw_error with {!type:Error.kind} [Unresolved_alias] if resolve_aliases_first is false and an alias is encountered
268-*)
269-let to_value
270- ?(resolve_aliases_first = true)
271 ?(max_nodes = default_max_alias_nodes)
272- ?(max_depth = default_max_alias_depth)
273- (v : t) : Value.t =
274- let v = if resolve_aliases_first then resolve_aliases ~max_nodes ~max_depth v else v in
0275 let rec convert (v : t) : Value.t =
276 match v with
277 | `Scalar s -> scalar_to_value s
278 | `Alias name -> Error.raise (Unresolved_alias name)
279 | `A seq -> `A (List.map convert (Sequence.members seq))
280 | `O map ->
281- `O (List.map (fun (k, v) ->
282- let key = match k with
283- | `Scalar s -> Scalar.value s
284- | _ -> Error.raise (Type_mismatch ("string key", "complex key"))
285- in
286- (key, convert v)
287- ) (Mapping.members map))
0000288 in
289 convert v
290
···56(** Full YAML representation with anchors, tags, and aliases *)
78+type t =
9+ [ `Scalar of Scalar.t
10 | `Alias of string
11 | `A of t Sequence.t
12+ | `O of (t, t) Mapping.t ]
01314(** Pretty printing *)
15···23(** Equality *)
2425let rec equal (a : t) (b : t) =
26+ match (a, b) with
27 | `Scalar a, `Scalar b -> Scalar.equal a b
28 | `Alias a, `Alias b -> String.equal a b
29 | `A a, `A b -> Sequence.equal equal a b
···39 | `Bool false -> `Scalar (Scalar.make "false")
40 | `Float f ->
41 let s =
42+ if Float.is_integer f && Float.abs f < 1e15 then Printf.sprintf "%.0f" f
43+ else Printf.sprintf "%g" f
0044 in
45 `Scalar (Scalar.make s)
46+ | `String s -> `Scalar (Scalar.make s ~style:`Double_quoted)
47+ | `A items -> `A (Sequence.make (List.map of_value items))
0048 | `O pairs ->
49+ `O
50+ (Mapping.make
51+ (List.map
52+ (fun (k, v) -> (`Scalar (Scalar.make k), of_value v))
53+ pairs))
5455+(** Default limits for alias expansion (protection against billion laughs
56+ attack) *)
57let default_max_alias_nodes = 10_000_000
58+59let default_max_alias_depth = 100
6061(** Resolve aliases by replacing them with referenced nodes.
6263+ Processes the tree in document order so that aliases resolve to the anchor
64+ value that was defined at the point the alias was encountered.
6566+ See
67+ {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2
68+ (Anchors and Aliases)} of the YAML 1.2.2 specification for details on how
69+ anchors and aliases work in YAML.
7071+ This implements protection against the "billion laughs attack" (see
72+ {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}) by
73+ limiting both the total number of nodes and the nesting depth during
74+ expansion.
7576+ @param max_nodes
77+ Maximum number of nodes to create during expansion (default 10M)
78+ @param max_depth
79+ Maximum depth of alias-within-alias resolution (default 100)
80+ @raise Error.Yamlrw_error
81+ with {!type:Error.kind} [Alias_expansion_node_limit] if max_nodes is
82+ exceeded
83+ @raise Error.Yamlrw_error
84+ with {!type:Error.kind} [Alias_expansion_depth_limit] if max_depth is
85+ exceeded *)
86+let resolve_aliases ?(max_nodes = default_max_alias_nodes)
87+ ?(max_depth = default_max_alias_depth) (root : t) : t =
88 let anchors = Hashtbl.create 16 in
89 let node_count = ref 0 in
90···110 need expansion if it was registered before those anchors existed *)
111 resolve ~depth:(depth + 1) target
112 | None -> Error.raise (Undefined_alias name)
0113 (* Single pass: process in document order, registering anchors and resolving aliases *)
114 and resolve ~depth (v : t) : t =
115 check_node_limit ();
···118 (* Register anchor after we have the resolved node *)
119 Option.iter (fun name -> register_anchor name v) (Scalar.anchor s);
120 v
121+ | `Alias name -> expand_alias ~depth name
0122 | `A seq ->
123 (* First resolve all members in order *)
124+ let resolved_members =
125+ List.map (resolve ~depth) (Sequence.members seq)
126+ in
127+ let resolved =
128+ `A
129+ (Sequence.make ?anchor:(Sequence.anchor seq) ?tag:(Sequence.tag seq)
130+ ~implicit:(Sequence.implicit seq) ~style:(Sequence.style seq)
131+ resolved_members)
132+ in
133 (* Register anchor with resolved node *)
134 Option.iter (fun name -> register_anchor name resolved) (Sequence.anchor seq);
135 resolved
136 | `O map ->
137 (* Process key-value pairs in document order *)
138+ let resolved_pairs =
139+ List.map
140+ (fun (k, v) ->
141+ let resolved_k = resolve ~depth k in
142+ let resolved_v = resolve ~depth v in
143+ (resolved_k, resolved_v))
144+ (Mapping.members map)
145+ in
146+ let resolved =
147+ `O
148+ (Mapping.make ?anchor:(Mapping.anchor map) ?tag:(Mapping.tag map)
149+ ~implicit:(Mapping.implicit map) ~style:(Mapping.style map)
150+ resolved_pairs)
151+ in
152 (* Register anchor with resolved node *)
153 Option.iter (fun name -> register_anchor name resolved) (Mapping.anchor map);
154 resolved
···163164 (* If explicitly tagged, respect the tag *)
165 match tag with
166+ | Some "tag:yaml.org,2002:null" | Some "!!null" -> `Null
167+ | Some "tag:yaml.org,2002:bool" | Some "!!bool" -> (
168+ match String.lowercase_ascii value with
169+ | "true" | "yes" | "on" -> `Bool true
170+ | "false" | "no" | "off" -> `Bool false
171+ | _ -> Error.raise (Invalid_scalar_conversion (value, "bool")))
172+ | Some "tag:yaml.org,2002:int" | Some "!!int" -> (
173+ try `Float (Float.of_string value)
174+ with _ -> Error.raise (Invalid_scalar_conversion (value, "int")))
175+ | Some "tag:yaml.org,2002:float" | Some "!!float" -> (
176+ try `Float (Float.of_string value)
177+ with _ -> Error.raise (Invalid_scalar_conversion (value, "float")))
178+ | Some "tag:yaml.org,2002:str" | Some "!!str" -> `String value
00179 | Some _ ->
180 (* Unknown tag - treat as string *)
181 `String value
182 | None ->
183 (* Implicit type resolution for plain scalars *)
184+ if style <> `Plain then `String value else infer_scalar_type value
000185186(** Infer type from plain scalar value *)
187and infer_scalar_type value =
···213 else if (first = '-' || first = '+') && len >= 2 then
214 let second = value.[1] in
215 (* After sign, must be digit or dot-digit (for +.5, -.5) *)
216+ (second >= '0' && second <= '9')
217+ || (second = '.' && len >= 3 && value.[2] >= '0' && value.[2] <= '9')
218 else false
219 in
220 (* Try integer/float *)
···236 | _ ->
237 (* Decimal with leading zero or octal in YAML 1.1 *)
238 Some (`Float (Float.of_string value))
239+ else Some (`Float (Float.of_string value))
0240 with _ -> None
241 else None
242 in
···248 OCaml accepts "nan", "inf", "infinity" which are NOT valid YAML floats.
249 YAML requires the leading dot: .nan, .inf, -.inf
250 See: https://github.com/avsm/ocaml-yaml/issues/82 *)
251+ if
252+ String.length value >= 2
253+ && value.[0] = '.'
254+ && value.[1] >= '0'
255+ && value.[1] <= '9'
256+ then try `Float (Float.of_string value) with _ -> `String value
257+ else `String value
258259(** Convert to JSON-compatible Value.
260261+ Converts a full YAML representation to a simplified JSON-compatible value
262+ type. This process implements the representation graph to serialization tree
263+ conversion described in
264+ {{:https://yaml.org/spec/1.2.2/#32-processes}Section 3.2 (Processes)} of the
265+ YAML 1.2.2 specification.
266267+ See also
268+ {{:https://yaml.org/spec/1.2.2/#10212-json-schema}Section 10.2.1.2 (JSON
269+ Schema)} for the tag resolution used during conversion.
270271+ @param resolve_aliases_first
272+ Whether to resolve aliases before conversion (default true)
273 @param max_nodes Maximum nodes during alias expansion (default 10M)
274 @param max_depth Maximum alias nesting depth (default 100)
275+ @raise Error.Yamlrw_error
276+ with {!type:Error.kind} [Unresolved_alias] if resolve_aliases_first is
277+ false and an alias is encountered *)
278+let to_value ?(resolve_aliases_first = true)
279 ?(max_nodes = default_max_alias_nodes)
280+ ?(max_depth = default_max_alias_depth) (v : t) : Value.t =
281+ let v =
282+ if resolve_aliases_first then resolve_aliases ~max_nodes ~max_depth v else v
283+ in
284 let rec convert (v : t) : Value.t =
285 match v with
286 | `Scalar s -> scalar_to_value s
287 | `Alias name -> Error.raise (Unresolved_alias name)
288 | `A seq -> `A (List.map convert (Sequence.members seq))
289 | `O map ->
290+ `O
291+ (List.map
292+ (fun (k, v) ->
293+ let key =
294+ match k with
295+ | `Scalar s -> Scalar.value s
296+ | _ ->
297+ Error.raise (Type_mismatch ("string key", "complex key"))
298+ in
299+ (key, convert v))
300+ (Mapping.members map))
301 in
302 convert v
303
+183-253
lib/yamlrw.ml
···1112exception Yamlrw_error = Error.Yamlrw_error
1314-15(** {2 Core Types} *)
160000000017(** JSON-compatible YAML representation. Use this for simple data interchange.
1819 This type is structurally equivalent to {!Value.t} and compatible with the
20- ezjsonm representation. For additional operations, see {!Value} and {!Util}. *)
21-type value = [
22- | `Null (** YAML null, ~, or empty values *)
23- | `Bool of bool (** YAML booleans (true, false, yes, no, on, off) *)
24- | `Float of float (** All YAML numbers (integers stored as floats) *)
25- | `String of string (** YAML strings *)
26- | `A of value list (** YAML sequences/arrays *)
27- | `O of (string * value) list (** YAML mappings/objects with string keys *)
28-]
2900000030(** Full YAML representation preserving anchors, tags, and aliases.
3132 This type is structurally equivalent to {!Yaml.t}. Use this when you need
···34 type tags for custom types, scalar styles (plain, quoted, literal, folded),
35 and collection styles (block vs flow).
3637- For additional operations, see {!Yaml}, {!Scalar}, {!Sequence}, and {!Mapping}. *)
38-type yaml = [
39- | `Scalar of Scalar.t (** YAML scalar value with style and metadata *)
40- | `Alias of string (** Alias reference to an anchored node *)
41- | `A of yaml Sequence.t (** YAML sequence with style and metadata *)
42- | `O of (yaml, yaml) Mapping.t (** YAML mapping with style and metadata *)
43-]
44000000000045(** A YAML document with directives and metadata.
4647 This type is structurally equivalent to {!Document.t}. A YAML stream can
48 contain multiple documents, each separated by document markers.
4950 For additional operations, see {!Document}. *)
51-type document = {
52- version : (int * int) option; (** Optional YAML version directive (e.g., (1, 2) for YAML 1.2) *)
53- tags : (string * string) list; (** TAG directives mapping handles to prefixes *)
54- root : yaml option; (** Root content of the document *)
55- implicit_start : bool; (** Whether the document start marker (---) is implicit *)
56- implicit_end : bool; (** Whether the document end marker (...) is implicit *)
57-}
58-5960(** {2 Character Encoding} *)
6162module Encoding = Encoding
63-6465(** {2 Parsing} *)
66···72(** Default maximum alias nesting depth (100). *)
73let default_max_alias_depth = Yaml.default_max_alias_depth
7475-let of_string
76- ?(resolve_aliases = true)
77- ?(max_nodes = default_max_alias_nodes)
78- ?(max_depth = default_max_alias_depth)
79- s : value =
80- (Loader.value_of_string ~resolve_aliases ~max_nodes ~max_depth s :> value)
81(** Parse a YAML string into a JSON-compatible value.
8283 @param resolve_aliases Whether to expand aliases (default: true)
84 @param max_nodes Maximum nodes during alias expansion (default: 10M)
85 @param max_depth Maximum alias nesting depth (default: 100)
86 @raise Yamlrw_error on parse error or if multiple documents found *)
0008788-let yaml_of_string
89- ?(resolve_aliases = false)
90- ?(max_nodes = default_max_alias_nodes)
91- ?(max_depth = default_max_alias_depth)
92- s : yaml =
93- (Loader.yaml_of_string ~resolve_aliases ~max_nodes ~max_depth s :> yaml)
94(** Parse a YAML string preserving full YAML metadata (anchors, tags, etc).
9596 By default, aliases are NOT resolved, preserving the document structure.
···99 @param max_nodes Maximum nodes during alias expansion (default: 10M)
100 @param max_depth Maximum alias nesting depth (default: 100)
101 @raise Yamlrw_error on parse error or if multiple documents found *)
0000102103-let documents_of_string s : document list =
104- let docs = Loader.documents_of_string s in
105- List.map (fun (d : Document.t) : document -> {
106- version = d.version;
107- tags = d.tags;
108- root = (d.root :> yaml option);
109- implicit_start = d.implicit_start;
110- implicit_end = d.implicit_end;
111- }) docs
112(** Parse a multi-document YAML stream.
113114- Use this when your YAML input contains multiple documents separated
115- by document markers (---).
116117 @raise Yamlrw_error on parse error *)
118-00000000000119120(** {2 Formatting Styles} *)
121122module Scalar_style = Scalar_style
123-124module Layout_style = Layout_style
125126-127(** {2 Serialization} *)
128129let make_config ~encoding ~scalar_style ~layout_style =
130 { Emitter.default_config with encoding; scalar_style; layout_style }
131132-let to_buffer
133- ?(encoding = `Utf8)
134- ?(scalar_style = `Any)
135- ?(layout_style = `Any)
136- ?buffer
137- (value : value) =
138- let config = make_config ~encoding ~scalar_style ~layout_style in
139- Serialize.value_to_buffer ~config ?buffer (value :> Value.t)
140(** Serialize a value to a buffer.
141142 @param encoding Output encoding (default: UTF-8)
143 @param scalar_style Preferred scalar style (default: Any)
144 @param layout_style Preferred layout style (default: Any)
145- @param buffer Optional buffer to append to (allocates new one if not provided)
0146 @return The buffer containing the serialized YAML *)
0000147148-let to_string
149- ?(encoding = `Utf8)
150- ?(scalar_style = `Any)
151- ?(layout_style = `Any)
152- (value : value) =
153- Buffer.contents (to_buffer ~encoding ~scalar_style ~layout_style value)
154(** Serialize a value to a YAML string.
155156 @param encoding Output encoding (default: UTF-8)
157 @param scalar_style Preferred scalar style (default: Any)
158 @param layout_style Preferred layout style (default: Any) *)
000159160-let yaml_to_buffer
161- ?(encoding = `Utf8)
162- ?(scalar_style = `Any)
163- ?(layout_style = `Any)
164- ?buffer
165- (yaml : yaml) =
166- let config = make_config ~encoding ~scalar_style ~layout_style in
167- Serialize.yaml_to_buffer ~config ?buffer (yaml :> Yaml.t)
168(** Serialize a full YAML value to a buffer.
169170 @param encoding Output encoding (default: UTF-8)
171 @param scalar_style Preferred scalar style (default: Any)
172 @param layout_style Preferred layout style (default: Any)
173- @param buffer Optional buffer to append to (allocates new one if not provided)
0174 @return The buffer containing the serialized YAML *)
0000175176-let yaml_to_string
177- ?(encoding = `Utf8)
178- ?(scalar_style = `Any)
179- ?(layout_style = `Any)
180- (yaml : yaml) =
181- Buffer.contents (yaml_to_buffer ~encoding ~scalar_style ~layout_style yaml)
182(** Serialize a full YAML value to a string.
183184 @param encoding Output encoding (default: UTF-8)
185 @param scalar_style Preferred scalar style (default: Any)
186 @param layout_style Preferred layout style (default: Any) *)
000187188-let documents_to_buffer
189- ?(encoding = `Utf8)
190- ?(scalar_style = `Any)
191- ?(layout_style = `Any)
192- ?(resolve_aliases = true)
193- ?buffer
194- (documents : document list) =
195- let config = make_config ~encoding ~scalar_style ~layout_style in
196- let docs' = List.map (fun (d : document) : Document.t -> {
197- Document.version = d.version;
198- Document.tags = d.tags;
199- Document.root = (d.root :> Yaml.t option);
200- Document.implicit_start = d.implicit_start;
201- Document.implicit_end = d.implicit_end;
202- }) documents in
203- Serialize.documents_to_buffer ~config ~resolve_aliases ?buffer docs'
204(** Serialize multiple documents to a buffer.
205206 @param encoding Output encoding (default: UTF-8)
207 @param scalar_style Preferred scalar style (default: Any)
208 @param layout_style Preferred layout style (default: Any)
209 @param resolve_aliases Whether to expand aliases (default: true)
210- @param buffer Optional buffer to append to (allocates new one if not provided)
0211 @return The buffer containing the serialized YAML *)
212-213-let documents_to_string
214- ?(encoding = `Utf8)
215- ?(scalar_style = `Any)
216- ?(layout_style = `Any)
217- ?(resolve_aliases = true)
218 (documents : document list) =
219- Buffer.contents (documents_to_buffer ~encoding ~scalar_style ~layout_style ~resolve_aliases documents)
00000000000000220(** Serialize multiple documents to a YAML stream.
221222 @param encoding Output encoding (default: UTF-8)
223 @param scalar_style Preferred scalar style (default: Any)
224 @param layout_style Preferred layout style (default: Any)
225 @param resolve_aliases Whether to expand aliases (default: true) *)
000000226227(** {2 Buffer Parsing} *)
228229-let of_buffer
230- ?(resolve_aliases = true)
231- ?(max_nodes = default_max_alias_nodes)
232- ?(max_depth = default_max_alias_depth)
233- buffer : value =
234- of_string ~resolve_aliases ~max_nodes ~max_depth (Buffer.contents buffer)
235(** Parse YAML from a buffer into a JSON-compatible value.
236237 @param resolve_aliases Whether to expand aliases (default: true)
238 @param max_nodes Maximum nodes during alias expansion (default: 10M)
239 @param max_depth Maximum alias nesting depth (default: 100)
240 @raise Yamlrw_error on parse error or if multiple documents found *)
000241242-let yaml_of_buffer
243- ?(resolve_aliases = false)
244- ?(max_nodes = default_max_alias_nodes)
245- ?(max_depth = default_max_alias_depth)
246- buffer : yaml =
247- yaml_of_string ~resolve_aliases ~max_nodes ~max_depth (Buffer.contents buffer)
248(** Parse YAML from a buffer preserving full YAML metadata.
249250 @param resolve_aliases Whether to expand aliases (default: false)
251 @param max_nodes Maximum nodes during alias expansion (default: 10M)
252 @param max_depth Maximum alias nesting depth (default: 100)
253 @raise Yamlrw_error on parse error or if multiple documents found *)
0000254255-let documents_of_buffer buffer : document list =
256- documents_of_string (Buffer.contents buffer)
257(** Parse a multi-document YAML stream from a buffer.
258259 @raise Yamlrw_error on parse error *)
260-0261262(** {2 Conversion} *)
263264-let to_json
265- ?(resolve_aliases = true)
266- ?(max_nodes = default_max_alias_nodes)
267- ?(max_depth = default_max_alias_depth)
268- (yaml : yaml) : value =
269- (Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth (yaml :> Yaml.t) :> value)
270(** Convert full YAML to JSON-compatible value.
271272 @param resolve_aliases Whether to expand aliases (default: true)
273 @param max_nodes Maximum nodes during alias expansion (default: 10M)
274 @param max_depth Maximum alias nesting depth (default: 100)
275 @raise Yamlrw_error if alias limits exceeded or complex keys found *)
00000276277-let of_json (value : value) : yaml =
278- (Yaml.of_value (value :> Value.t) :> yaml)
279(** Convert JSON-compatible value to full YAML representation. *)
280-281282(** {2 Pretty Printing & Equality} *)
2830284let pp = Value.pp
285-(** Pretty-print a value. *)
286287-let equal = Value.equal
288(** Test equality of two values. *)
289-290291(** {2 Util - Value Combinators} *)
292293module Util = struct
294 (** Combinators for working with {!type:value} values.
295296- This module provides constructors, accessors, and transformations
297- for JSON-compatible YAML values. *)
298299 type t = Value.t
300···349 let get_string v = match v with `String s -> s | _ -> type_error "string" v
350 let get_list v = match v with `A l -> l | _ -> type_error "list" v
351 let get_obj v = match v with `O o -> o | _ -> type_error "object" v
352-353- let get_int v =
354- match as_int v with
355- | Some i -> i
356- | None -> type_error "int" v
357358 (** {3 Object Operations} *)
359···361 | `O pairs -> List.exists (fun (k, _) -> k = key) pairs
362 | _ -> false
363364- let find key = function
365- | `O pairs -> List.assoc_opt key pairs
366- | _ -> None
367368- let get key v =
369- match find key v with
370- | Some v -> v
371- | None -> raise Not_found
372373- let keys v = match v with
374- | `O pairs -> List.map fst pairs
375- | _ -> type_error "object" v
376-377- let values v = match v with
378- | `O pairs -> List.map snd pairs
379- | _ -> type_error "object" v
380381 let update key value = function
382 | `O pairs ->
383 let rec go = function
384- | [] -> [(key, value)]
385 | (k, _) :: rest when k = key -> (key, value) :: rest
386 | kv :: rest -> kv :: go rest
387 in
···393 | v -> type_error "object" v
394395 let combine v1 v2 =
396- match v1, v2 with
397 | `O o1, `O o2 -> `O (o1 @ o2)
398 | `O _, _ -> type_error "object" v2
399 | _, _ -> type_error "object" v1
400401 (** {3 List Operations} *)
402403- let map f = function
404- | `A l -> `A (List.map f l)
405- | v -> type_error "list" v
406-407- let mapi f = function
408- | `A l -> `A (List.mapi f l)
409- | v -> type_error "list" v
410411 let filter pred = function
412 | `A l -> `A (List.filter pred l)
···416 | `A l -> List.fold_left f init l
417 | v -> type_error "list" v
418419- let nth n = function
420- | `A l -> List.nth_opt l n
421- | _ -> None
422-423- let length = function
424- | `A l -> List.length l
425- | `O o -> List.length o
426- | _ -> 0
427428 let flatten = function
429- | `A l ->
430- `A (List.concat_map (function `A inner -> inner | v -> [v]) l)
431 | v -> type_error "list" v
432433 (** {3 Path Operations} *)
···435 let rec get_path path v =
436 match path with
437 | [] -> Some v
438- | key :: rest ->
439- match find key v with
440- | Some child -> get_path rest child
441- | None -> None
442443 let get_path_exn path v =
444- match get_path path v with
445- | Some v -> v
446- | None -> raise Not_found
447448 (** {3 Iteration} *)
449···451 | `O pairs -> List.iter (fun (k, v) -> f k v) pairs
452 | v -> type_error "object" v
453454- let iter_list f = function
455- | `A l -> List.iter f l
456- | v -> type_error "list" v
457458 let fold_obj f init = function
459 | `O pairs -> List.fold_left (fun acc (k, v) -> f acc k v) init pairs
···472 (** {3 Conversion Helpers} *)
473474 let to_bool ?default v =
475- match as_bool v, default with
476 | Some b, _ -> b
477 | None, Some d -> d
478 | None, None -> type_error "bool" v
479480 let to_int ?default v =
481- match as_int v, default with
482 | Some i, _ -> i
483 | None, Some d -> d
484 | None, None -> type_error "int" v
485486 let to_float ?default v =
487- match as_float v, default with
488 | Some f, _ -> f
489 | None, Some d -> d
490 | None, None -> type_error "float" v
491492 let to_string ?default v =
493- match as_string v, default with
494 | Some s, _ -> s
495 | None, Some d -> d
496 | None, None -> type_error "string" v
497498 let to_list ?default v =
499- match as_list v, default with
500 | Some l, _ -> l
501 | None, Some d -> d
502 | None, None -> type_error "list" v
503end
504-505506(** {2 Stream - Low-Level Event API} *)
507···521 type position = Position.t
522 (** A position in the source (line, column, byte offset). *)
523524- (** Result of parsing an event. *)
525 type event_result = {
526 event : event;
527 start_pos : position;
528 end_pos : position;
529 }
0530531 (** {3 Parsing} *)
532533 type parser = Parser.t
534 (** A streaming YAML parser. *)
535536- let parser s = Parser.of_string s
537 (** Create a parser from a string. *)
053800539 let next p =
540 match Parser.next p with
541 | Some { event; span } ->
542- Some {
543- event;
544- start_pos = span.start;
545- end_pos = span.stop;
546- }
547 | None -> None
548- (** Get the next event from the parser.
549- Returns [None] when parsing is complete. *)
5500551 let iter f p =
552 let rec go () =
553 match next p with
···557 | None -> ()
558 in
559 go ()
560- (** Iterate over all events from the parser. *)
5610562 let fold f init p =
563 let rec go acc =
564 match Parser.next p with
···566 | None -> acc
567 in
568 go init
569- (** Fold over all events from the parser. *)
570571 (** {3 Emitting} *)
572573 type emitter = Emitter.t
574 (** A streaming YAML emitter. *)
575576- let emitter ?len:_ () = Emitter.create ()
577 (** Create a new emitter. *)
0578579- let contents e = Emitter.contents e
580 (** Get the emitted YAML string. *)
0581582- let emit e ev = Emitter.emit e ev
583 (** Emit an event. *)
0584585 (** {3 Event Emission Helpers} *)
586587 let stream_start e enc =
588 Emitter.emit e (Event.Stream_start { encoding = enc })
589590- let stream_end e =
591- Emitter.emit e Event.Stream_end
592593 let document_start e ?version ?(implicit = true) () =
594- let version = match version with
0595 | Some `V1_1 -> Some (1, 1)
596 | Some `V1_2 -> Some (1, 2)
597 | None -> None
···602 Emitter.emit e (Event.Document_end { implicit })
603604 let scalar e ?anchor ?tag ?(style = `Any) value =
605- Emitter.emit e (Event.Scalar {
606- anchor;
607- tag;
608- value;
609- plain_implicit = true;
610- quoted_implicit = true;
611- style;
612- })
00613614- let alias e name =
615- Emitter.emit e (Event.Alias { anchor = name })
616617 let sequence_start e ?anchor ?tag ?(style = `Any) () =
618- Emitter.emit e (Event.Sequence_start {
619- anchor;
620- tag;
621- implicit = true;
622- style;
623- })
624625- let sequence_end e =
626- Emitter.emit e Event.Sequence_end
627628 let mapping_start e ?anchor ?tag ?(style = `Any) () =
629- Emitter.emit e (Event.Mapping_start {
630- anchor;
631- tag;
632- implicit = true;
633- style;
634- })
635636- let mapping_end e =
637- Emitter.emit e Event.Mapping_end
638end
639-640641(** {2 Internal Modules} *)
642643-(** These modules are exposed for advanced use cases requiring
644- fine-grained control over parsing, emission, or data structures.
645646 For typical usage, prefer the top-level functions and {!Util}. *)
647648-(** Source position tracking. *)
649module Position = Position
06500651(** Source span (range of positions). *)
652-module Span = Span
6530654(** Block scalar chomping modes. *)
655-module Chomping = Chomping
656657-(** YAML type tags. *)
658module Tag = Tag
0659660-(** JSON-compatible value type and operations. *)
661module Value = Value
0662663-(** YAML scalar with metadata. *)
664module Scalar = Scalar
06650666(** YAML sequence with metadata. *)
667-module Sequence = Sequence
6680669(** YAML mapping with metadata. *)
670-module Mapping = Mapping
671672-(** Full YAML value type. *)
673module Yaml = Yaml
0674675-(** YAML document with directives. *)
676module Document = Document
0677678-(** Lexical tokens. *)
679module Token = Token
06800681(** Lexical scanner. *)
682-module Scanner = Scanner
683684-(** Parser events. *)
685module Event = Event
06860687(** Event-based parser. *)
688-module Parser = Parser
6890690(** Document loader. *)
691-module Loader = Loader
692693-(** Event-based emitter. *)
694module Emitter = Emitter
0695696-(** Input stream utilities. *)
697module Input = Input
06980699(** Buffer serialization utilities. *)
700-module Serialize = Serialize
···1112exception Yamlrw_error = Error.Yamlrw_error
13014(** {2 Core Types} *)
1516+type value =
17+ [ `Null (** YAML null, ~, or empty values *)
18+ | `Bool of bool (** YAML booleans (true, false, yes, no, on, off) *)
19+ | `Float of float (** All YAML numbers (integers stored as floats) *)
20+ | `String of string (** YAML strings *)
21+ | `A of value list (** YAML sequences/arrays *)
22+ | `O of (string * value) list (** YAML mappings/objects with string keys *)
23+ ]
24(** JSON-compatible YAML representation. Use this for simple data interchange.
2526 This type is structurally equivalent to {!Value.t} and compatible with the
27+ ezjsonm representation. For additional operations, see {!Value} and {!Util}.
28+*)
00000002930+type yaml =
31+ [ `Scalar of Scalar.t (** YAML scalar value with style and metadata *)
32+ | `Alias of string (** Alias reference to an anchored node *)
33+ | `A of yaml Sequence.t (** YAML sequence with style and metadata *)
34+ | `O of (yaml, yaml) Mapping.t (** YAML mapping with style and metadata *)
35+ ]
36(** Full YAML representation preserving anchors, tags, and aliases.
3738 This type is structurally equivalent to {!Yaml.t}. Use this when you need
···40 type tags for custom types, scalar styles (plain, quoted, literal, folded),
41 and collection styles (block vs flow).
4243+ For additional operations, see {!Yaml}, {!Scalar}, {!Sequence}, and
44+ {!Mapping}. *)
000004546+type document = {
47+ version : (int * int) option;
48+ (** Optional YAML version directive (e.g., (1, 2) for YAML 1.2) *)
49+ tags : (string * string) list;
50+ (** TAG directives mapping handles to prefixes *)
51+ root : yaml option; (** Root content of the document *)
52+ implicit_start : bool;
53+ (** Whether the document start marker (---) is implicit *)
54+ implicit_end : bool; (** Whether the document end marker (...) is implicit *)
55+}
56(** A YAML document with directives and metadata.
5758 This type is structurally equivalent to {!Document.t}. A YAML stream can
59 contain multiple documents, each separated by document markers.
6061 For additional operations, see {!Document}. *)
000000006263(** {2 Character Encoding} *)
6465module Encoding = Encoding
06667(** {2 Parsing} *)
68···74(** Default maximum alias nesting depth (100). *)
75let default_max_alias_depth = Yaml.default_max_alias_depth
7600000077(** Parse a YAML string into a JSON-compatible value.
7879 @param resolve_aliases Whether to expand aliases (default: true)
80 @param max_nodes Maximum nodes during alias expansion (default: 10M)
81 @param max_depth Maximum alias nesting depth (default: 100)
82 @raise Yamlrw_error on parse error or if multiple documents found *)
83+let of_string ?(resolve_aliases = true) ?(max_nodes = default_max_alias_nodes)
84+ ?(max_depth = default_max_alias_depth) s : value =
85+ (Loader.value_of_string ~resolve_aliases ~max_nodes ~max_depth s :> value)
8600000087(** Parse a YAML string preserving full YAML metadata (anchors, tags, etc).
8889 By default, aliases are NOT resolved, preserving the document structure.
···92 @param max_nodes Maximum nodes during alias expansion (default: 10M)
93 @param max_depth Maximum alias nesting depth (default: 100)
94 @raise Yamlrw_error on parse error or if multiple documents found *)
95+let yaml_of_string ?(resolve_aliases = false)
96+ ?(max_nodes = default_max_alias_nodes)
97+ ?(max_depth = default_max_alias_depth) s : yaml =
98+ (Loader.yaml_of_string ~resolve_aliases ~max_nodes ~max_depth s :> yaml)
99000000000100(** Parse a multi-document YAML stream.
101102+ Use this when your YAML input contains multiple documents separated by
103+ document markers (---).
104105 @raise Yamlrw_error on parse error *)
106+let documents_of_string s : document list =
107+ let docs = Loader.documents_of_string s in
108+ List.map
109+ (fun (d : Document.t) : document ->
110+ {
111+ version = d.version;
112+ tags = d.tags;
113+ root = (d.root :> yaml option);
114+ implicit_start = d.implicit_start;
115+ implicit_end = d.implicit_end;
116+ })
117+ docs
118119(** {2 Formatting Styles} *)
120121module Scalar_style = Scalar_style
0122module Layout_style = Layout_style
1230124(** {2 Serialization} *)
125126let make_config ~encoding ~scalar_style ~layout_style =
127 { Emitter.default_config with encoding; scalar_style; layout_style }
12800000000129(** Serialize a value to a buffer.
130131 @param encoding Output encoding (default: UTF-8)
132 @param scalar_style Preferred scalar style (default: Any)
133 @param layout_style Preferred layout style (default: Any)
134+ @param buffer
135+ Optional buffer to append to (allocates new one if not provided)
136 @return The buffer containing the serialized YAML *)
137+let to_buffer ?(encoding = `Utf8) ?(scalar_style = `Any) ?(layout_style = `Any)
138+ ?buffer (value : value) =
139+ let config = make_config ~encoding ~scalar_style ~layout_style in
140+ Serialize.value_to_buffer ~config ?buffer (value :> Value.t)
141000000142(** Serialize a value to a YAML string.
143144 @param encoding Output encoding (default: UTF-8)
145 @param scalar_style Preferred scalar style (default: Any)
146 @param layout_style Preferred layout style (default: Any) *)
147+let to_string ?(encoding = `Utf8) ?(scalar_style = `Any) ?(layout_style = `Any)
148+ (value : value) =
149+ Buffer.contents (to_buffer ~encoding ~scalar_style ~layout_style value)
15000000000151(** Serialize a full YAML value to a buffer.
152153 @param encoding Output encoding (default: UTF-8)
154 @param scalar_style Preferred scalar style (default: Any)
155 @param layout_style Preferred layout style (default: Any)
156+ @param buffer
157+ Optional buffer to append to (allocates new one if not provided)
158 @return The buffer containing the serialized YAML *)
159+let yaml_to_buffer ?(encoding = `Utf8) ?(scalar_style = `Any)
160+ ?(layout_style = `Any) ?buffer (yaml : yaml) =
161+ let config = make_config ~encoding ~scalar_style ~layout_style in
162+ Serialize.yaml_to_buffer ~config ?buffer (yaml :> Yaml.t)
163000000164(** Serialize a full YAML value to a string.
165166 @param encoding Output encoding (default: UTF-8)
167 @param scalar_style Preferred scalar style (default: Any)
168 @param layout_style Preferred layout style (default: Any) *)
169+let yaml_to_string ?(encoding = `Utf8) ?(scalar_style = `Any)
170+ ?(layout_style = `Any) (yaml : yaml) =
171+ Buffer.contents (yaml_to_buffer ~encoding ~scalar_style ~layout_style yaml)
1720000000000000000173(** Serialize multiple documents to a buffer.
174175 @param encoding Output encoding (default: UTF-8)
176 @param scalar_style Preferred scalar style (default: Any)
177 @param layout_style Preferred layout style (default: Any)
178 @param resolve_aliases Whether to expand aliases (default: true)
179+ @param buffer
180+ Optional buffer to append to (allocates new one if not provided)
181 @return The buffer containing the serialized YAML *)
182+let documents_to_buffer ?(encoding = `Utf8) ?(scalar_style = `Any)
183+ ?(layout_style = `Any) ?(resolve_aliases = true) ?buffer
0000184 (documents : document list) =
185+ let config = make_config ~encoding ~scalar_style ~layout_style in
186+ let docs' =
187+ List.map
188+ (fun (d : document) : Document.t ->
189+ {
190+ Document.version = d.version;
191+ Document.tags = d.tags;
192+ Document.root = (d.root :> Yaml.t option);
193+ Document.implicit_start = d.implicit_start;
194+ Document.implicit_end = d.implicit_end;
195+ })
196+ documents
197+ in
198+ Serialize.documents_to_buffer ~config ~resolve_aliases ?buffer docs'
199+200(** Serialize multiple documents to a YAML stream.
201202 @param encoding Output encoding (default: UTF-8)
203 @param scalar_style Preferred scalar style (default: Any)
204 @param layout_style Preferred layout style (default: Any)
205 @param resolve_aliases Whether to expand aliases (default: true) *)
206+let documents_to_string ?(encoding = `Utf8) ?(scalar_style = `Any)
207+ ?(layout_style = `Any) ?(resolve_aliases = true) (documents : document list)
208+ =
209+ Buffer.contents
210+ (documents_to_buffer ~encoding ~scalar_style ~layout_style ~resolve_aliases
211+ documents)
212213(** {2 Buffer Parsing} *)
214000000215(** Parse YAML from a buffer into a JSON-compatible value.
216217 @param resolve_aliases Whether to expand aliases (default: true)
218 @param max_nodes Maximum nodes during alias expansion (default: 10M)
219 @param max_depth Maximum alias nesting depth (default: 100)
220 @raise Yamlrw_error on parse error or if multiple documents found *)
221+let of_buffer ?(resolve_aliases = true) ?(max_nodes = default_max_alias_nodes)
222+ ?(max_depth = default_max_alias_depth) buffer : value =
223+ of_string ~resolve_aliases ~max_nodes ~max_depth (Buffer.contents buffer)
224000000225(** Parse YAML from a buffer preserving full YAML metadata.
226227 @param resolve_aliases Whether to expand aliases (default: false)
228 @param max_nodes Maximum nodes during alias expansion (default: 10M)
229 @param max_depth Maximum alias nesting depth (default: 100)
230 @raise Yamlrw_error on parse error or if multiple documents found *)
231+let yaml_of_buffer ?(resolve_aliases = false)
232+ ?(max_nodes = default_max_alias_nodes)
233+ ?(max_depth = default_max_alias_depth) buffer : yaml =
234+ yaml_of_string ~resolve_aliases ~max_nodes ~max_depth (Buffer.contents buffer)
23500236(** Parse a multi-document YAML stream from a buffer.
237238 @raise Yamlrw_error on parse error *)
239+let documents_of_buffer buffer : document list =
240+ documents_of_string (Buffer.contents buffer)
241242(** {2 Conversion} *)
243000000244(** Convert full YAML to JSON-compatible value.
245246 @param resolve_aliases Whether to expand aliases (default: true)
247 @param max_nodes Maximum nodes during alias expansion (default: 10M)
248 @param max_depth Maximum alias nesting depth (default: 100)
249 @raise Yamlrw_error if alias limits exceeded or complex keys found *)
250+let to_json ?(resolve_aliases = true) ?(max_nodes = default_max_alias_nodes)
251+ ?(max_depth = default_max_alias_depth) (yaml : yaml) : value =
252+ (Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth
253+ (yaml :> Yaml.t)
254+ :> value)
25500256(** Convert JSON-compatible value to full YAML representation. *)
257+let of_json (value : value) : yaml = (Yaml.of_value (value :> Value.t) :> yaml)
258259(** {2 Pretty Printing & Equality} *)
260261+(** Pretty-print a value. *)
262let pp = Value.pp
02630264(** Test equality of two values. *)
265+let equal = Value.equal
266267(** {2 Util - Value Combinators} *)
268269module Util = struct
270 (** Combinators for working with {!type:value} values.
271272+ This module provides constructors, accessors, and transformations for
273+ JSON-compatible YAML values. *)
274275 type t = Value.t
276···325 let get_string v = match v with `String s -> s | _ -> type_error "string" v
326 let get_list v = match v with `A l -> l | _ -> type_error "list" v
327 let get_obj v = match v with `O o -> o | _ -> type_error "object" v
328+ let get_int v = match as_int v with Some i -> i | None -> type_error "int" v
0000329330 (** {3 Object Operations} *)
331···333 | `O pairs -> List.exists (fun (k, _) -> k = key) pairs
334 | _ -> false
335336+ let find key = function `O pairs -> List.assoc_opt key pairs | _ -> None
337+ let get key v = match find key v with Some v -> v | None -> raise Not_found
0338339+ let keys v =
340+ match v with `O pairs -> List.map fst pairs | _ -> type_error "object" v
00341342+ let values v =
343+ match v with `O pairs -> List.map snd pairs | _ -> type_error "object" v
00000344345 let update key value = function
346 | `O pairs ->
347 let rec go = function
348+ | [] -> [ (key, value) ]
349 | (k, _) :: rest when k = key -> (key, value) :: rest
350 | kv :: rest -> kv :: go rest
351 in
···357 | v -> type_error "object" v
358359 let combine v1 v2 =
360+ match (v1, v2) with
361 | `O o1, `O o2 -> `O (o1 @ o2)
362 | `O _, _ -> type_error "object" v2
363 | _, _ -> type_error "object" v1
364365 (** {3 List Operations} *)
366367+ let map f = function `A l -> `A (List.map f l) | v -> type_error "list" v
368+ let mapi f = function `A l -> `A (List.mapi f l) | v -> type_error "list" v
00000369370 let filter pred = function
371 | `A l -> `A (List.filter pred l)
···375 | `A l -> List.fold_left f init l
376 | v -> type_error "list" v
377378+ let nth n = function `A l -> List.nth_opt l n | _ -> None
379+ let length = function `A l -> List.length l | `O o -> List.length o | _ -> 0
000000380381 let flatten = function
382+ | `A l -> `A (List.concat_map (function `A inner -> inner | v -> [ v ]) l)
0383 | v -> type_error "list" v
384385 (** {3 Path Operations} *)
···387 let rec get_path path v =
388 match path with
389 | [] -> Some v
390+ | key :: rest -> (
391+ match find key v with Some child -> get_path rest child | None -> None)
00392393 let get_path_exn path v =
394+ match get_path path v with Some v -> v | None -> raise Not_found
00395396 (** {3 Iteration} *)
397···399 | `O pairs -> List.iter (fun (k, v) -> f k v) pairs
400 | v -> type_error "object" v
401402+ let iter_list f = function `A l -> List.iter f l | v -> type_error "list" v
00403404 let fold_obj f init = function
405 | `O pairs -> List.fold_left (fun acc (k, v) -> f acc k v) init pairs
···418 (** {3 Conversion Helpers} *)
419420 let to_bool ?default v =
421+ match (as_bool v, default) with
422 | Some b, _ -> b
423 | None, Some d -> d
424 | None, None -> type_error "bool" v
425426 let to_int ?default v =
427+ match (as_int v, default) with
428 | Some i, _ -> i
429 | None, Some d -> d
430 | None, None -> type_error "int" v
431432 let to_float ?default v =
433+ match (as_float v, default) with
434 | Some f, _ -> f
435 | None, Some d -> d
436 | None, None -> type_error "float" v
437438 let to_string ?default v =
439+ match (as_string v, default) with
440 | Some s, _ -> s
441 | None, Some d -> d
442 | None, None -> type_error "string" v
443444 let to_list ?default v =
445+ match (as_list v, default) with
446 | Some l, _ -> l
447 | None, Some d -> d
448 | None, None -> type_error "list" v
449end
0450451(** {2 Stream - Low-Level Event API} *)
452···466 type position = Position.t
467 (** A position in the source (line, column, byte offset). *)
4680469 type event_result = {
470 event : event;
471 start_pos : position;
472 end_pos : position;
473 }
474+ (** Result of parsing an event. *)
475476 (** {3 Parsing} *)
477478 type parser = Parser.t
479 (** A streaming YAML parser. *)
4800481 (** Create a parser from a string. *)
482+ let parser s = Parser.of_string s
483484+ (** Get the next event from the parser. Returns [None] when parsing is
485+ complete. *)
486 let next p =
487 match Parser.next p with
488 | Some { event; span } ->
489+ Some { event; start_pos = span.start; end_pos = span.stop }
0000490 | None -> None
00491492+ (** Iterate over all events from the parser. *)
493 let iter f p =
494 let rec go () =
495 match next p with
···499 | None -> ()
500 in
501 go ()
0502503+ (** Fold over all events from the parser. *)
504 let fold f init p =
505 let rec go acc =
506 match Parser.next p with
···508 | None -> acc
509 in
510 go init
0511512 (** {3 Emitting} *)
513514 type emitter = Emitter.t
515 (** A streaming YAML emitter. *)
5160517 (** Create a new emitter. *)
518+ let emitter ?len:_ () = Emitter.create ()
5190520 (** Get the emitted YAML string. *)
521+ let contents e = Emitter.contents e
5220523 (** Emit an event. *)
524+ let emit e ev = Emitter.emit e ev
525526 (** {3 Event Emission Helpers} *)
527528 let stream_start e enc =
529 Emitter.emit e (Event.Stream_start { encoding = enc })
530531+ let stream_end e = Emitter.emit e Event.Stream_end
0532533 let document_start e ?version ?(implicit = true) () =
534+ let version =
535+ match version with
536 | Some `V1_1 -> Some (1, 1)
537 | Some `V1_2 -> Some (1, 2)
538 | None -> None
···543 Emitter.emit e (Event.Document_end { implicit })
544545 let scalar e ?anchor ?tag ?(style = `Any) value =
546+ Emitter.emit e
547+ (Event.Scalar
548+ {
549+ anchor;
550+ tag;
551+ value;
552+ plain_implicit = true;
553+ quoted_implicit = true;
554+ style;
555+ })
556557+ let alias e name = Emitter.emit e (Event.Alias { anchor = name })
0558559 let sequence_start e ?anchor ?tag ?(style = `Any) () =
560+ Emitter.emit e
561+ (Event.Sequence_start { anchor; tag; implicit = true; style })
0000562563+ let sequence_end e = Emitter.emit e Event.Sequence_end
0564565 let mapping_start e ?anchor ?tag ?(style = `Any) () =
566+ Emitter.emit e (Event.Mapping_start { anchor; tag; implicit = true; style })
00000567568+ let mapping_end e = Emitter.emit e Event.Mapping_end
0569end
0570571(** {2 Internal Modules} *)
572573+(** These modules are exposed for advanced use cases requiring fine-grained
574+ control over parsing, emission, or data structures.
575576 For typical usage, prefer the top-level functions and {!Util}. *)
5770578module Position = Position
579+(** Source position tracking. *)
580581+module Span = Span
582(** Source span (range of positions). *)
0583584+module Chomping = Chomping
585(** Block scalar chomping modes. *)
05860587module Tag = Tag
588+(** YAML type tags. *)
5890590module Value = Value
591+(** JSON-compatible value type and operations. *)
5920593module Scalar = Scalar
594+(** YAML scalar with metadata. *)
595596+module Sequence = Sequence
597(** YAML sequence with metadata. *)
0598599+module Mapping = Mapping
600(** YAML mapping with metadata. *)
06010602module Yaml = Yaml
603+(** Full YAML value type. *)
6040605module Document = Document
606+(** YAML document with directives. *)
6070608module Token = Token
609+(** Lexical tokens. *)
610611+module Scanner = Scanner
612(** Lexical scanner. *)
06130614module Event = Event
615+(** Parser events. *)
616617+module Parser = Parser
618(** Event-based parser. *)
0619620+module Loader = Loader
621(** Document loader. *)
06220623module Emitter = Emitter
624+(** Event-based emitter. *)
6250626module Input = Input
627+(** Input stream utilities. *)
628629+module Serialize = Serialize
630(** Buffer serialization utilities. *)
0
+96-90
lib/yamlrw.mli
···32 let age = Yamlrw.Util.(get_int (get "age" value)) in
33 ]} *)
3435-36(** {2 Error Handling} *)
3738module Error = Error
···40exception Yamlrw_error of Error.t
41(** Raised on parse or emit errors. *)
4243-44(** {2 Core Types} *)
4546-type value = [
47- | `Null (** YAML null, ~, or empty values *)
48- | `Bool of bool (** YAML booleans (true, false, yes, no, on, off) *)
49- | `Float of float (** All YAML numbers (integers stored as floats) *)
50- | `String of string (** YAML strings *)
51- | `A of value list (** YAML sequences/arrays *)
52- | `O of (string * value) list (** YAML mappings/objects with string keys *)
53-]
54(** JSON-compatible YAML representation. Use this for simple data interchange.
5556 This type is structurally equivalent to {!Value.t} and compatible with the
57- ezjsonm representation. For additional operations, see {!Value} and {!Util}. *)
05859-type yaml = [
60- | `Scalar of Scalar.t (** YAML scalar value with style and metadata *)
61- | `Alias of string (** Alias reference to an anchored node *)
62- | `A of yaml Sequence.t (** YAML sequence with style and metadata *)
63- | `O of (yaml, yaml) Mapping.t (** YAML mapping with style and metadata *)
64-]
65(** Full YAML representation preserving anchors, tags, and aliases.
6667 This type is structurally equivalent to {!Yaml.t}. Use this when you need
···69 type tags for custom types, scalar styles (plain, quoted, literal, folded),
70 and collection styles (block vs flow).
7172- For additional operations, see {!Yaml}, {!Scalar}, {!Sequence}, and {!Mapping}. *)
07374type document = {
75- version : (int * int) option; (** Optional YAML version directive (e.g., (1, 2) for YAML 1.2) *)
76- tags : (string * string) list; (** TAG directives mapping handles to prefixes *)
77- root : yaml option; (** Root content of the document *)
78- implicit_start : bool; (** Whether the document start marker (---) is implicit *)
79- implicit_end : bool; (** Whether the document end marker (...) is implicit *)
00080}
81(** A YAML document with directives and metadata.
82···84 contain multiple documents, each separated by document markers.
8586 For additional operations, see {!Document}. *)
87-8889(** {2 Character Encoding} *)
9091module Encoding = Encoding
9293-94(** {2 Parsing} *)
9596type version = [ `V1_1 | `V1_2 ]
···103(** Default maximum alias nesting depth (100). *)
104105val of_string :
106- ?resolve_aliases:bool ->
107- ?max_nodes:int ->
108- ?max_depth:int ->
109- string -> value
110(** Parse a YAML string into a JSON-compatible value.
111112 @param resolve_aliases Whether to expand aliases (default: true)
···115 @raise Yamlrw_error on parse error or if multiple documents found *)
116117val yaml_of_string :
118- ?resolve_aliases:bool ->
119- ?max_nodes:int ->
120- ?max_depth:int ->
121- string -> yaml
122(** Parse a YAML string preserving full YAML metadata (anchors, tags, etc).
123124 By default, aliases are NOT resolved, preserving the document structure.
···131val documents_of_string : string -> document list
132(** Parse a multi-document YAML stream.
133134- Use this when your YAML input contains multiple documents separated
135- by document markers (---).
136137 @raise Yamlrw_error on parse error *)
138139-140(** {2 Formatting Styles} *)
141142module Scalar_style = Scalar_style
143-144module Layout_style = Layout_style
145-146147(** {2 Serialization} *)
148···151 ?scalar_style:Scalar_style.t ->
152 ?layout_style:Layout_style.t ->
153 ?buffer:Buffer.t ->
154- value -> Buffer.t
0155(** Serialize a value to a buffer.
156157 @param encoding Output encoding (default: UTF-8)
158 @param scalar_style Preferred scalar style (default: Any)
159 @param layout_style Preferred layout style (default: Any)
160- @param buffer Optional buffer to append to (allocates new one if not provided)
0161 @return The buffer containing the serialized YAML *)
162163val to_string :
164 ?encoding:Encoding.t ->
165 ?scalar_style:Scalar_style.t ->
166 ?layout_style:Layout_style.t ->
167- value -> string
0168(** Serialize a value to a YAML string.
169170 @param encoding Output encoding (default: UTF-8)
···176 ?scalar_style:Scalar_style.t ->
177 ?layout_style:Layout_style.t ->
178 ?buffer:Buffer.t ->
179- yaml -> Buffer.t
0180(** Serialize a full YAML value to a buffer.
181182 @param encoding Output encoding (default: UTF-8)
183 @param scalar_style Preferred scalar style (default: Any)
184 @param layout_style Preferred layout style (default: Any)
185- @param buffer Optional buffer to append to (allocates new one if not provided)
0186 @return The buffer containing the serialized YAML *)
187188val yaml_to_string :
189 ?encoding:Encoding.t ->
190 ?scalar_style:Scalar_style.t ->
191 ?layout_style:Layout_style.t ->
192- yaml -> string
0193(** Serialize a full YAML value to a string.
194195 @param encoding Output encoding (default: UTF-8)
···202 ?layout_style:Layout_style.t ->
203 ?resolve_aliases:bool ->
204 ?buffer:Buffer.t ->
205- document list -> Buffer.t
0206(** Serialize multiple documents to a buffer.
207208 @param encoding Output encoding (default: UTF-8)
209 @param scalar_style Preferred scalar style (default: Any)
210 @param layout_style Preferred layout style (default: Any)
211 @param resolve_aliases Whether to expand aliases (default: true)
212- @param buffer Optional buffer to append to (allocates new one if not provided)
0213 @return The buffer containing the serialized YAML *)
214215val documents_to_string :
···217 ?scalar_style:Scalar_style.t ->
218 ?layout_style:Layout_style.t ->
219 ?resolve_aliases:bool ->
220- document list -> string
0221(** Serialize multiple documents to a YAML stream.
222223 @param encoding Output encoding (default: UTF-8)
···228(** {2 Buffer Parsing} *)
229230val of_buffer :
231- ?resolve_aliases:bool ->
232- ?max_nodes:int ->
233- ?max_depth:int ->
234- Buffer.t -> value
235(** Parse YAML from a buffer into a JSON-compatible value.
236237 @param resolve_aliases Whether to expand aliases (default: true)
···240 @raise Yamlrw_error on parse error or if multiple documents found *)
241242val yaml_of_buffer :
243- ?resolve_aliases:bool ->
244- ?max_nodes:int ->
245- ?max_depth:int ->
246- Buffer.t -> yaml
247(** Parse YAML from a buffer preserving full YAML metadata.
248249 @param resolve_aliases Whether to expand aliases (default: false)
···256257 @raise Yamlrw_error on parse error *)
258259-260(** {2 Conversion} *)
261262val to_json :
263- ?resolve_aliases:bool ->
264- ?max_nodes:int ->
265- ?max_depth:int ->
266- yaml -> value
267(** Convert full YAML to JSON-compatible value.
268269 @param resolve_aliases Whether to expand aliases (default: true)
···274val of_json : value -> yaml
275(** Convert JSON-compatible value to full YAML representation. *)
276277-278(** {2 Pretty Printing & Equality} *)
279280val pp : Format.formatter -> value -> unit
···283val equal : value -> value -> bool
284(** Test equality of two values. *)
285286-287(** {2 Util - Value Combinators}
288289 Combinators for working with {!type:value} values.
290291- This module provides constructors, accessors, and transformations
292- for JSON-compatible YAML values. *)
293294module Util : sig
295 type t = Value.t
···400 (** {3 Object Operations} *)
401402 val mem : string -> t -> bool
403- (** [mem key obj] checks if [key] exists in object [obj].
404- Returns [false] if [obj] is not an object. *)
405406 val find : string -> t -> t option
407- (** [find key obj] looks up [key] in object [obj].
408- Returns [None] if key not found or if [obj] is not an object. *)
409410 val get : string -> t -> t
411- (** [get key obj] looks up [key] in object [obj].
412- Raises [Not_found] if key not found. *)
413414 val keys : t -> string list
415 (** Get all keys from an object.
···420 @raise Type_error if not an object *)
421422 val update : string -> t -> t -> t
423- (** [update key value obj] sets [key] to [value] in [obj].
424- Adds the key if it doesn't exist.
425 @raise Type_error if [obj] is not an object *)
426427 val remove : string -> t -> t
···429 @raise Type_error if [obj] is not an object *)
430431 val combine : t -> t -> t
432- (** [combine obj1 obj2] merges two objects, with [obj2] values taking precedence.
0433 @raise Type_error if either argument is not an object *)
434435 (** {3 List Operations} *)
···451 @raise Type_error if [lst] is not a list *)
452453 val nth : int -> t -> t option
454- (** [nth n lst] gets element at index [n].
455- Returns [None] if [lst] is not a list or index out of bounds. *)
456457 val length : t -> int
458 (** Get the length of a list or object. Returns 0 for other types. *)
459460 val flatten : t -> t
461- (** Flatten a list of lists into a single list.
462- Non-list elements are kept as-is.
463 @raise Type_error if not a list *)
464465 (** {3 Path Operations} *)
466467 val get_path : string list -> t -> t option
468- (** [get_path ["a"; "b"; "c"] obj] looks up nested path [obj.a.b.c].
469- Returns [None] if any key is not found. *)
470471 val get_path_exn : string list -> t -> t
472 (** Like {!get_path} but raises [Not_found] if path not found. *)
···521 @raise Type_error if type doesn't match and no default provided *)
522end
523524-525(** {2 Stream - Low-Level Event API}
526527 Low-level streaming API for event-based YAML processing.
···532 - Fine-grained control over YAML emission *)
533534module Stream : sig
535-536 (** {3 Event Types} *)
537538 type event = Event.t
···557 (** Create a parser from a string. *)
558559 val next : parser -> event_result option
560- (** Get the next event from the parser.
561- Returns [None] when parsing is complete. *)
562563 val iter : (event -> position -> position -> unit) -> parser -> unit
564 (** [iter f parser] calls [f event start_pos end_pos] for each event. *)
···589 val stream_end : emitter -> unit
590 (** Emit a stream end event. *)
591592- val document_start : emitter -> ?version:version -> ?implicit:bool -> unit -> unit
0593 (** Emit a document start event.
594 @param version YAML version directive
595 @param implicit Whether start marker is implicit (default: true) *)
···598 (** Emit a document end event.
599 @param implicit Whether end marker is implicit (default: true) *)
600601- val scalar : emitter -> ?anchor:string -> ?tag:string -> ?style:Scalar_style.t -> string -> unit
000000602 (** Emit a scalar value.
603 @param anchor Optional anchor name
604 @param tag Optional type tag
···607 val alias : emitter -> string -> unit
608 (** Emit an alias reference. *)
609610- val sequence_start : emitter -> ?anchor:string -> ?tag:string -> ?style:Layout_style.t -> unit -> unit
000000611 (** Emit a sequence start event.
612 @param anchor Optional anchor name
613 @param tag Optional type tag
···616 val sequence_end : emitter -> unit
617 (** Emit a sequence end event. *)
618619- val mapping_start : emitter -> ?anchor:string -> ?tag:string -> ?style:Layout_style.t -> unit -> unit
000000620 (** Emit a mapping start event.
621 @param anchor Optional anchor name
622 @param tag Optional type tag
···626 (** Emit a mapping end event. *)
627end
628629-630(** {2 Internal Modules}
631632- These modules are exposed for advanced use cases requiring
633- fine-grained control over parsing, emission, or data structures.
634635 For typical usage, prefer the top-level functions and {!Util}. *)
636
···32 let age = Yamlrw.Util.(get_int (get "age" value)) in
33 ]} *)
34035(** {2 Error Handling} *)
3637module Error = Error
···39exception Yamlrw_error of Error.t
40(** Raised on parse or emit errors. *)
41042(** {2 Core Types} *)
4344+type value =
45+ [ `Null (** YAML null, ~, or empty values *)
46+ | `Bool of bool (** YAML booleans (true, false, yes, no, on, off) *)
47+ | `Float of float (** All YAML numbers (integers stored as floats) *)
48+ | `String of string (** YAML strings *)
49+ | `A of value list (** YAML sequences/arrays *)
50+ | `O of (string * value) list (** YAML mappings/objects with string keys *)
51+ ]
52(** JSON-compatible YAML representation. Use this for simple data interchange.
5354 This type is structurally equivalent to {!Value.t} and compatible with the
55+ ezjsonm representation. For additional operations, see {!Value} and {!Util}.
56+*)
5758+type yaml =
59+ [ `Scalar of Scalar.t (** YAML scalar value with style and metadata *)
60+ | `Alias of string (** Alias reference to an anchored node *)
61+ | `A of yaml Sequence.t (** YAML sequence with style and metadata *)
62+ | `O of (yaml, yaml) Mapping.t (** YAML mapping with style and metadata *)
63+ ]
64(** Full YAML representation preserving anchors, tags, and aliases.
6566 This type is structurally equivalent to {!Yaml.t}. Use this when you need
···68 type tags for custom types, scalar styles (plain, quoted, literal, folded),
69 and collection styles (block vs flow).
7071+ For additional operations, see {!Yaml}, {!Scalar}, {!Sequence}, and
72+ {!Mapping}. *)
7374type document = {
75+ version : (int * int) option;
76+ (** Optional YAML version directive (e.g., (1, 2) for YAML 1.2) *)
77+ tags : (string * string) list;
78+ (** TAG directives mapping handles to prefixes *)
79+ root : yaml option; (** Root content of the document *)
80+ implicit_start : bool;
81+ (** Whether the document start marker (---) is implicit *)
82+ implicit_end : bool; (** Whether the document end marker (...) is implicit *)
83}
84(** A YAML document with directives and metadata.
85···87 contain multiple documents, each separated by document markers.
8889 For additional operations, see {!Document}. *)
09091(** {2 Character Encoding} *)
9293module Encoding = Encoding
94095(** {2 Parsing} *)
9697type version = [ `V1_1 | `V1_2 ]
···104(** Default maximum alias nesting depth (100). *)
105106val of_string :
107+ ?resolve_aliases:bool -> ?max_nodes:int -> ?max_depth:int -> string -> value
000108(** Parse a YAML string into a JSON-compatible value.
109110 @param resolve_aliases Whether to expand aliases (default: true)
···113 @raise Yamlrw_error on parse error or if multiple documents found *)
114115val yaml_of_string :
116+ ?resolve_aliases:bool -> ?max_nodes:int -> ?max_depth:int -> string -> yaml
000117(** Parse a YAML string preserving full YAML metadata (anchors, tags, etc).
118119 By default, aliases are NOT resolved, preserving the document structure.
···126val documents_of_string : string -> document list
127(** Parse a multi-document YAML stream.
128129+ Use this when your YAML input contains multiple documents separated by
130+ document markers (---).
131132 @raise Yamlrw_error on parse error *)
1330134(** {2 Formatting Styles} *)
135136module Scalar_style = Scalar_style
0137module Layout_style = Layout_style
0138139(** {2 Serialization} *)
140···143 ?scalar_style:Scalar_style.t ->
144 ?layout_style:Layout_style.t ->
145 ?buffer:Buffer.t ->
146+ value ->
147+ Buffer.t
148(** Serialize a value to a buffer.
149150 @param encoding Output encoding (default: UTF-8)
151 @param scalar_style Preferred scalar style (default: Any)
152 @param layout_style Preferred layout style (default: Any)
153+ @param buffer
154+ Optional buffer to append to (allocates new one if not provided)
155 @return The buffer containing the serialized YAML *)
156157val to_string :
158 ?encoding:Encoding.t ->
159 ?scalar_style:Scalar_style.t ->
160 ?layout_style:Layout_style.t ->
161+ value ->
162+ string
163(** Serialize a value to a YAML string.
164165 @param encoding Output encoding (default: UTF-8)
···171 ?scalar_style:Scalar_style.t ->
172 ?layout_style:Layout_style.t ->
173 ?buffer:Buffer.t ->
174+ yaml ->
175+ Buffer.t
176(** Serialize a full YAML value to a buffer.
177178 @param encoding Output encoding (default: UTF-8)
179 @param scalar_style Preferred scalar style (default: Any)
180 @param layout_style Preferred layout style (default: Any)
181+ @param buffer
182+ Optional buffer to append to (allocates new one if not provided)
183 @return The buffer containing the serialized YAML *)
184185val yaml_to_string :
186 ?encoding:Encoding.t ->
187 ?scalar_style:Scalar_style.t ->
188 ?layout_style:Layout_style.t ->
189+ yaml ->
190+ string
191(** Serialize a full YAML value to a string.
192193 @param encoding Output encoding (default: UTF-8)
···200 ?layout_style:Layout_style.t ->
201 ?resolve_aliases:bool ->
202 ?buffer:Buffer.t ->
203+ document list ->
204+ Buffer.t
205(** Serialize multiple documents to a buffer.
206207 @param encoding Output encoding (default: UTF-8)
208 @param scalar_style Preferred scalar style (default: Any)
209 @param layout_style Preferred layout style (default: Any)
210 @param resolve_aliases Whether to expand aliases (default: true)
211+ @param buffer
212+ Optional buffer to append to (allocates new one if not provided)
213 @return The buffer containing the serialized YAML *)
214215val documents_to_string :
···217 ?scalar_style:Scalar_style.t ->
218 ?layout_style:Layout_style.t ->
219 ?resolve_aliases:bool ->
220+ document list ->
221+ string
222(** Serialize multiple documents to a YAML stream.
223224 @param encoding Output encoding (default: UTF-8)
···229(** {2 Buffer Parsing} *)
230231val of_buffer :
232+ ?resolve_aliases:bool -> ?max_nodes:int -> ?max_depth:int -> Buffer.t -> value
000233(** Parse YAML from a buffer into a JSON-compatible value.
234235 @param resolve_aliases Whether to expand aliases (default: true)
···238 @raise Yamlrw_error on parse error or if multiple documents found *)
239240val yaml_of_buffer :
241+ ?resolve_aliases:bool -> ?max_nodes:int -> ?max_depth:int -> Buffer.t -> yaml
000242(** Parse YAML from a buffer preserving full YAML metadata.
243244 @param resolve_aliases Whether to expand aliases (default: false)
···251252 @raise Yamlrw_error on parse error *)
2530254(** {2 Conversion} *)
255256val to_json :
257+ ?resolve_aliases:bool -> ?max_nodes:int -> ?max_depth:int -> yaml -> value
000258(** Convert full YAML to JSON-compatible value.
259260 @param resolve_aliases Whether to expand aliases (default: true)
···265val of_json : value -> yaml
266(** Convert JSON-compatible value to full YAML representation. *)
2670268(** {2 Pretty Printing & Equality} *)
269270val pp : Format.formatter -> value -> unit
···273val equal : value -> value -> bool
274(** Test equality of two values. *)
2750276(** {2 Util - Value Combinators}
277278 Combinators for working with {!type:value} values.
279280+ This module provides constructors, accessors, and transformations for
281+ JSON-compatible YAML values. *)
282283module Util : sig
284 type t = Value.t
···389 (** {3 Object Operations} *)
390391 val mem : string -> t -> bool
392+ (** [mem key obj] checks if [key] exists in object [obj]. Returns [false] if
393+ [obj] is not an object. *)
394395 val find : string -> t -> t option
396+ (** [find key obj] looks up [key] in object [obj]. Returns [None] if key not
397+ found or if [obj] is not an object. *)
398399 val get : string -> t -> t
400+ (** [get key obj] looks up [key] in object [obj]. Raises [Not_found] if key
401+ not found. *)
402403 val keys : t -> string list
404 (** Get all keys from an object.
···409 @raise Type_error if not an object *)
410411 val update : string -> t -> t -> t
412+ (** [update key value obj] sets [key] to [value] in [obj]. Adds the key if it
413+ doesn't exist.
414 @raise Type_error if [obj] is not an object *)
415416 val remove : string -> t -> t
···418 @raise Type_error if [obj] is not an object *)
419420 val combine : t -> t -> t
421+ (** [combine obj1 obj2] merges two objects, with [obj2] values taking
422+ precedence.
423 @raise Type_error if either argument is not an object *)
424425 (** {3 List Operations} *)
···441 @raise Type_error if [lst] is not a list *)
442443 val nth : int -> t -> t option
444+ (** [nth n lst] gets element at index [n]. Returns [None] if [lst] is not a
445+ list or index out of bounds. *)
446447 val length : t -> int
448 (** Get the length of a list or object. Returns 0 for other types. *)
449450 val flatten : t -> t
451+ (** Flatten a list of lists into a single list. Non-list elements are kept
452+ as-is.
453 @raise Type_error if not a list *)
454455 (** {3 Path Operations} *)
456457 val get_path : string list -> t -> t option
458+ (** [get_path ["a"; "b"; "c"] obj] looks up nested path [obj.a.b.c]. Returns
459+ [None] if any key is not found. *)
460461 val get_path_exn : string list -> t -> t
462 (** Like {!get_path} but raises [Not_found] if path not found. *)
···511 @raise Type_error if type doesn't match and no default provided *)
512end
5130514(** {2 Stream - Low-Level Event API}
515516 Low-level streaming API for event-based YAML processing.
···521 - Fine-grained control over YAML emission *)
522523module Stream : sig
0524 (** {3 Event Types} *)
525526 type event = Event.t
···545 (** Create a parser from a string. *)
546547 val next : parser -> event_result option
548+ (** Get the next event from the parser. Returns [None] when parsing is
549+ complete. *)
550551 val iter : (event -> position -> position -> unit) -> parser -> unit
552 (** [iter f parser] calls [f event start_pos end_pos] for each event. *)
···577 val stream_end : emitter -> unit
578 (** Emit a stream end event. *)
579580+ val document_start :
581+ emitter -> ?version:version -> ?implicit:bool -> unit -> unit
582 (** Emit a document start event.
583 @param version YAML version directive
584 @param implicit Whether start marker is implicit (default: true) *)
···587 (** Emit a document end event.
588 @param implicit Whether end marker is implicit (default: true) *)
589590+ val scalar :
591+ emitter ->
592+ ?anchor:string ->
593+ ?tag:string ->
594+ ?style:Scalar_style.t ->
595+ string ->
596+ unit
597 (** Emit a scalar value.
598 @param anchor Optional anchor name
599 @param tag Optional type tag
···602 val alias : emitter -> string -> unit
603 (** Emit an alias reference. *)
604605+ val sequence_start :
606+ emitter ->
607+ ?anchor:string ->
608+ ?tag:string ->
609+ ?style:Layout_style.t ->
610+ unit ->
611+ unit
612 (** Emit a sequence start event.
613 @param anchor Optional anchor name
614 @param tag Optional type tag
···617 val sequence_end : emitter -> unit
618 (** Emit a sequence end event. *)
619620+ val mapping_start :
621+ emitter ->
622+ ?anchor:string ->
623+ ?tag:string ->
624+ ?style:Layout_style.t ->
625+ unit ->
626+ unit
627 (** Emit a mapping start event.
628 @param anchor Optional anchor name
629 @param tag Optional type tag
···633 (** Emit a mapping end event. *)
634end
6350636(** {2 Internal Modules}
637638+ These modules are exposed for advanced use cases requiring fine-grained
639+ control over parsing, emission, or data structures.
640641 For typical usage, prefer the top-level functions and {!Util}. *)
642
+17-8
tests/dune
···1213; Alias to run the full YAML test suite and generate HTML report
14; Requires yaml-test-suite to be cloned to tests/yaml-test-suite
015(rule
16 (alias yaml-test-suite)
17- (deps (source_tree yaml-test-suite))
018 (targets yaml-test-results.html)
19 (action
20- (run %{exe:run_all_tests.exe}
21- --test-suite-path %{workspace_root}/tests/yaml-test-suite
22- --html yaml-test-results.html)))
0002324(rule
25 (alias yaml-test-suite-eio)
26- (deps (source_tree yaml-test-suite))
027 (targets yaml-test-results-eio.html)
28 (action
29- (run %{exe:run_all_tests_eio.exe}
30- --test-suite-path %{workspace_root}/tests/yaml-test-suite
31- --html yaml-test-results-eio.html)))
000
···1213; Alias to run the full YAML test suite and generate HTML report
14; Requires yaml-test-suite to be cloned to tests/yaml-test-suite
15+16(rule
17 (alias yaml-test-suite)
18+ (deps
19+ (source_tree yaml-test-suite))
20 (targets yaml-test-results.html)
21 (action
22+ (run
23+ %{exe:run_all_tests.exe}
24+ --test-suite-path
25+ %{workspace_root}/tests/yaml-test-suite
26+ --html
27+ yaml-test-results.html)))
2829(rule
30 (alias yaml-test-suite-eio)
31+ (deps
32+ (source_tree yaml-test-suite))
33 (targets yaml-test-results-eio.html)
34 (action
35+ (run
36+ %{exe:run_all_tests_eio.exe}
37+ --test-suite-path
38+ %{workspace_root}/tests/yaml-test-suite
39+ --html
40+ yaml-test-results-eio.html)))
+209-144
tests/run_all_tests.ml
···14(* HTML escape function *)
15let html_escape s =
16 let buf = Buffer.create (String.length s) in
17- String.iter (function
18- | '<' -> Buffer.add_string buf "<"
19- | '>' -> Buffer.add_string buf ">"
20- | '&' -> Buffer.add_string buf "&"
21- | '"' -> Buffer.add_string buf """
22- | c -> Buffer.add_char buf c
23- ) s;
024 Buffer.contents buf
2526let normalize_tree s =
···33 name : string;
34 yaml : string;
35 is_error_test : bool;
36- status : [`Pass | `Fail of string | `Skip];
37 output : string;
38- json_status : [`Pass | `Fail of string | `Skip];
39 json_expected : string;
40 json_actual : string;
41}
···45 This handles formatting differences and object key ordering. *)
46 JC.compare_json_strings expected actual
4748-let run_json_test (test : TL.test_case) : [`Pass | `Fail of string | `Skip] * string =
049 match test.json with
50 | None -> (`Skip, "")
51- | Some expected_json ->
52 if test.fail then
53 (* Error tests shouldn't have JSON comparison *)
54 (`Skip, "")
···56 try
57 (* Handle multi-document YAML by using documents_of_string *)
58 let docs = Loader.documents_of_string test.yaml in
59- let values = List.filter_map (fun doc ->
60- match Document.root doc with
61- | None -> None
62- | Some yaml -> Some (Yaml.to_value ~resolve_aliases_first:true yaml)
63- ) docs in
64- let actual_json = match values with
65- | [] -> "" (* Empty document produces empty JSON *)
66- | [v] -> JF.to_json v
0000067 | vs -> JF.documents_to_json vs
68 in
69- if compare_json expected_json actual_json then
70- (`Pass, actual_json)
71- else
72- (`Fail "JSON mismatch", actual_json)
73 with
74 | Yamlrw_error e ->
75 (`Fail (Format.asprintf "Parse error: %a" Error.pp e), "")
76 | exn ->
77 (`Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn)), "")
07879let run_test (test : TL.test_case) : test_result =
80 let json_status, json_actual = run_json_test test in
81- let base = {
82- id = test.id;
83- name = test.name;
84- yaml = test.yaml;
85- is_error_test = test.fail;
86- status = `Skip;
87- output = "";
88- json_status;
89- json_expected = Option.value ~default:"" test.json;
90- json_actual;
91- } in
0092 if test.fail then begin
93 try
94 let parser = Parser.of_string test.yaml in
95 let events = Parser.to_list parser in
96 let tree = TF.of_spanned_events events in
97- { base with
98- status = `Fail "Expected parsing to fail";
99- output = tree;
100- }
101 with
102 | Yamlrw_error e ->
103- { base with
104- status = `Pass;
105- output = Format.asprintf "%a" Error.pp e;
106- }
107- | exn ->
108- { base with
109- status = `Pass;
110- output = Printexc.to_string exn;
111- }
112 end
113 else begin
114 match test.tree with
115- | None ->
116 (* No expected tree - check if json indicates expected success *)
117- (match test.json with
118- | Some _ ->
119- (* Has json output, so should parse successfully *)
120- (try
121- let parser = Parser.of_string test.yaml in
122- let events = Parser.to_list parser in
123- let tree = TF.of_spanned_events events in
124- { base with status = `Pass; output = tree }
125- with exn ->
126- { base with
127- status = `Fail (Printf.sprintf "Should parse but got: %s" (Printexc.to_string exn));
128- output = Printexc.to_string exn;
129- })
130- | None ->
131- (* No tree, no json, no fail - ambiguous edge case, skip *)
132- { base with status = `Skip; output = "(no expected tree or json)" })
133- | Some expected ->
0000134 try
135 let parser = Parser.of_string test.yaml in
136 let events = Parser.to_list parser in
···140 if expected_norm = actual_norm then
141 { base with status = `Pass; output = actual }
142 else
143- { base with
0144 status = `Fail (Printf.sprintf "Tree mismatch");
145- output = Printf.sprintf "Expected:\n%s\n\nActual:\n%s" expected_norm actual_norm;
00146 }
147 with exn ->
148- { base with
149- status = `Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn));
00150 output = Printexc.to_string exn;
151- }
152 end
153154let status_class = function
···163164let generate_html results output_file =
165 let oc = open_out output_file in
166- let pass_count = List.length (List.filter (fun r -> r.status = `Pass) results) in
167- let fail_count = List.length (List.filter (fun r -> match r.status with `Fail _ -> true | _ -> false) results) in
168- let skip_count = List.length (List.filter (fun r -> r.status = `Skip) results) in
000000000169 let total = List.length results in
170- let json_pass_count = List.length (List.filter (fun r -> r.json_status = `Pass) results) in
171- let json_fail_count = List.length (List.filter (fun r -> match r.json_status with `Fail _ -> true | _ -> false) results) in
172- let json_skip_count = List.length (List.filter (fun r -> r.json_status = `Skip) results) in
000000000173174- Printf.fprintf oc {|<!DOCTYPE html>
0175<html lang="en">
176<head>
177 <meta charset="UTF-8">
···335 <input type="text" class="search" placeholder="Search by ID or name...">
336 </div>
337 <div class="tests">
338-|} pass_count fail_count skip_count total json_pass_count json_fail_count json_skip_count;
00339340- List.iter (fun result ->
341- let error_badge = if result.is_error_test then
342- {|<span class="badge error-test">Error Test</span>|}
343- else "" in
344- let json_badge = Printf.sprintf {|<span class="badge %s" style="margin-left: 4px;">JSON: %s</span>|}
345- (status_class result.json_status) (status_text result.json_status) in
346- let json_section = if result.json_expected <> "" || result.json_actual <> "" then
347- Printf.sprintf {|
000000000348 <div class="section">
349 <div class="section-title">Expected JSON</div>
350 <pre>%s</pre>
···353 <div class="section-title">Actual JSON</div>
354 <pre>%s</pre>
355 </div>|}
356- (html_escape result.json_expected)
357- (html_escape result.json_actual)
358- else "" in
359- Printf.fprintf oc {| <div class="test" data-status="%s" data-json-status="%s" data-id="%s" data-name="%s">
00360 <div class="test-header" onclick="this.parentElement.classList.toggle('expanded')">
361 <span class="expand-icon">▶</span>
362 <span class="badge %s">%s</span>
···377 </div>
378 </div>
379|}
380- (status_class result.status)
381- (status_class result.json_status)
382- (html_escape result.id)
383- (html_escape (String.lowercase_ascii result.name))
384- (status_class result.status)
385- (status_text result.status)
386- json_badge
387- (html_escape result.id)
388- (html_escape result.name)
389- error_badge
390- (html_escape result.yaml)
391- (html_escape result.output)
392- json_section
393- ) results;
394395- Printf.fprintf oc {| </div>
0396 </div>
397 <script>
398 document.querySelectorAll('.filter-btn').forEach(btn => {
···425 let html_output = ref None in
426 let show_skipped = ref false in
427 let test_suite_path_ref = ref test_suite_path in
428- let args = [
429- "--html", Arg.String (fun s -> html_output := Some s),
430- "<file> Generate HTML report to file";
431- "--show-skipped", Arg.Set show_skipped,
432- " Show details of skipped tests";
433- "--test-suite-path", Arg.Set_string test_suite_path_ref,
434- "<path> Path to yaml-test-suite directory";
435- ] in
436- Arg.parse args (fun _ -> ()) "Usage: run_all_tests [--html <file>] [--show-skipped] [--test-suite-path <path>]";
000000437438 let all_tests = TL.load_directory !test_suite_path_ref in
439 Printf.printf "Total tests loaded: %d\n%!" (List.length all_tests);
440441 let results = List.map run_test all_tests in
442443- let pass_count = List.length (List.filter (fun r -> r.status = `Pass) results) in
444- let fail_count = List.length (List.filter (fun r -> match r.status with `Fail _ -> true | _ -> false) results) in
445- let skip_count = List.length (List.filter (fun r -> r.status = `Skip) results) in
000000000446447- let json_pass_count = List.length (List.filter (fun r -> r.json_status = `Pass) results) in
448- let json_fail_count = List.length (List.filter (fun r -> match r.json_status with `Fail _ -> true | _ -> false) results) in
449- let json_skip_count = List.length (List.filter (fun r -> r.json_status = `Skip) results) in
000000000450451- Printf.printf "\nEvent Tree Results: %d pass, %d fail, %d skip (total: %d)\n%!"
452- pass_count fail_count skip_count (pass_count + fail_count + skip_count);
00453454- Printf.printf "JSON Results: %d pass, %d fail, %d skip\n%!"
455- json_pass_count json_fail_count json_skip_count;
456457 if fail_count > 0 then begin
458 Printf.printf "\nFailing event tree tests:\n";
459- List.iter (fun r ->
460- match r.status with
461- | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg
462- | _ -> ()
463- ) results
0464 end;
465466 if json_fail_count > 0 then begin
467 Printf.printf "\nFailing JSON tests:\n";
468- List.iter (fun r ->
469- match r.json_status with
470- | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg
471- | _ -> ()
472- ) results
0473 end;
474475 if !show_skipped && skip_count > 0 then begin
476 Printf.printf "\nSkipped tests (no expected tree):\n";
477- List.iter (fun r ->
478- if r.status = `Skip then begin
479- Printf.printf " %s: %s\n" r.id r.name;
480- Printf.printf " YAML (%d chars): %S\n" (String.length r.yaml)
481- (if String.length r.yaml <= 60 then r.yaml
482- else String.sub r.yaml 0 60 ^ "...")
483- end
484- ) results
0485 end;
486487 (match !html_output with
···491 | None -> ());
492493 (* Exit with non-zero code if any tests failed *)
494- if fail_count > 0 || json_fail_count > 0 then
495- exit 1
···14(* HTML escape function *)
15let html_escape s =
16 let buf = Buffer.create (String.length s) in
17+ String.iter
18+ (function
19+ | '<' -> Buffer.add_string buf "<"
20+ | '>' -> Buffer.add_string buf ">"
21+ | '&' -> Buffer.add_string buf "&"
22+ | '"' -> Buffer.add_string buf """
23+ | c -> Buffer.add_char buf c)
24+ s;
25 Buffer.contents buf
2627let normalize_tree s =
···34 name : string;
35 yaml : string;
36 is_error_test : bool;
37+ status : [ `Pass | `Fail of string | `Skip ];
38 output : string;
39+ json_status : [ `Pass | `Fail of string | `Skip ];
40 json_expected : string;
41 json_actual : string;
42}
···46 This handles formatting differences and object key ordering. *)
47 JC.compare_json_strings expected actual
4849+let run_json_test (test : TL.test_case) :
50+ [ `Pass | `Fail of string | `Skip ] * string =
51 match test.json with
52 | None -> (`Skip, "")
53+ | Some expected_json -> (
54 if test.fail then
55 (* Error tests shouldn't have JSON comparison *)
56 (`Skip, "")
···58 try
59 (* Handle multi-document YAML by using documents_of_string *)
60 let docs = Loader.documents_of_string test.yaml in
61+ let values =
62+ List.filter_map
63+ (fun doc ->
64+ match Document.root doc with
65+ | None -> None
66+ | Some yaml ->
67+ Some (Yaml.to_value ~resolve_aliases_first:true yaml))
68+ docs
69+ in
70+ let actual_json =
71+ match values with
72+ | [] -> "" (* Empty document produces empty JSON *)
73+ | [ v ] -> JF.to_json v
74 | vs -> JF.documents_to_json vs
75 in
76+ if compare_json expected_json actual_json then (`Pass, actual_json)
77+ else (`Fail "JSON mismatch", actual_json)
0078 with
79 | Yamlrw_error e ->
80 (`Fail (Format.asprintf "Parse error: %a" Error.pp e), "")
81 | exn ->
82 (`Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn)), "")
83+ )
8485let run_test (test : TL.test_case) : test_result =
86 let json_status, json_actual = run_json_test test in
87+ let base =
88+ {
89+ id = test.id;
90+ name = test.name;
91+ yaml = test.yaml;
92+ is_error_test = test.fail;
93+ status = `Skip;
94+ output = "";
95+ json_status;
96+ json_expected = Option.value ~default:"" test.json;
97+ json_actual;
98+ }
99+ in
100 if test.fail then begin
101 try
102 let parser = Parser.of_string test.yaml in
103 let events = Parser.to_list parser in
104 let tree = TF.of_spanned_events events in
105+ { base with status = `Fail "Expected parsing to fail"; output = tree }
000106 with
107 | Yamlrw_error e ->
108+ { base with status = `Pass; output = Format.asprintf "%a" Error.pp e }
109+ | exn -> { base with status = `Pass; output = Printexc.to_string exn }
0000000110 end
111 else begin
112 match test.tree with
113+ | None -> (
114 (* No expected tree - check if json indicates expected success *)
115+ match test.json with
116+ | Some _ -> (
117+ (* Has json output, so should parse successfully *)
118+ try
119+ let parser = Parser.of_string test.yaml in
120+ let events = Parser.to_list parser in
121+ let tree = TF.of_spanned_events events in
122+ { base with status = `Pass; output = tree }
123+ with exn ->
124+ {
125+ base with
126+ status =
127+ `Fail
128+ (Printf.sprintf "Should parse but got: %s"
129+ (Printexc.to_string exn));
130+ output = Printexc.to_string exn;
131+ })
132+ | None ->
133+ (* No tree, no json, no fail - ambiguous edge case, skip *)
134+ { base with status = `Skip; output = "(no expected tree or json)" })
135+ | Some expected -> (
136 try
137 let parser = Parser.of_string test.yaml in
138 let events = Parser.to_list parser in
···142 if expected_norm = actual_norm then
143 { base with status = `Pass; output = actual }
144 else
145+ {
146+ base with
147 status = `Fail (Printf.sprintf "Tree mismatch");
148+ output =
149+ Printf.sprintf "Expected:\n%s\n\nActual:\n%s" expected_norm
150+ actual_norm;
151 }
152 with exn ->
153+ {
154+ base with
155+ status =
156+ `Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn));
157 output = Printexc.to_string exn;
158+ })
159 end
160161let status_class = function
···170171let generate_html results output_file =
172 let oc = open_out output_file in
173+ let pass_count =
174+ List.length (List.filter (fun r -> r.status = `Pass) results)
175+ in
176+ let fail_count =
177+ List.length
178+ (List.filter
179+ (fun r -> match r.status with `Fail _ -> true | _ -> false)
180+ results)
181+ in
182+ let skip_count =
183+ List.length (List.filter (fun r -> r.status = `Skip) results)
184+ in
185 let total = List.length results in
186+ let json_pass_count =
187+ List.length (List.filter (fun r -> r.json_status = `Pass) results)
188+ in
189+ let json_fail_count =
190+ List.length
191+ (List.filter
192+ (fun r -> match r.json_status with `Fail _ -> true | _ -> false)
193+ results)
194+ in
195+ let json_skip_count =
196+ List.length (List.filter (fun r -> r.json_status = `Skip) results)
197+ in
198199+ Printf.fprintf oc
200+ {|<!DOCTYPE html>
201<html lang="en">
202<head>
203 <meta charset="UTF-8">
···361 <input type="text" class="search" placeholder="Search by ID or name...">
362 </div>
363 <div class="tests">
364+|}
365+ pass_count fail_count skip_count total json_pass_count json_fail_count
366+ json_skip_count;
367368+ List.iter
369+ (fun result ->
370+ let error_badge =
371+ if result.is_error_test then
372+ {|<span class="badge error-test">Error Test</span>|}
373+ else ""
374+ in
375+ let json_badge =
376+ Printf.sprintf
377+ {|<span class="badge %s" style="margin-left: 4px;">JSON: %s</span>|}
378+ (status_class result.json_status)
379+ (status_text result.json_status)
380+ in
381+ let json_section =
382+ if result.json_expected <> "" || result.json_actual <> "" then
383+ Printf.sprintf
384+ {|
385 <div class="section">
386 <div class="section-title">Expected JSON</div>
387 <pre>%s</pre>
···390 <div class="section-title">Actual JSON</div>
391 <pre>%s</pre>
392 </div>|}
393+ (html_escape result.json_expected)
394+ (html_escape result.json_actual)
395+ else ""
396+ in
397+ Printf.fprintf oc
398+ {| <div class="test" data-status="%s" data-json-status="%s" data-id="%s" data-name="%s">
399 <div class="test-header" onclick="this.parentElement.classList.toggle('expanded')">
400 <span class="expand-icon">▶</span>
401 <span class="badge %s">%s</span>
···416 </div>
417 </div>
418|}
419+ (status_class result.status)
420+ (status_class result.json_status)
421+ (html_escape result.id)
422+ (html_escape (String.lowercase_ascii result.name))
423+ (status_class result.status)
424+ (status_text result.status)
425+ json_badge (html_escape result.id) (html_escape result.name) error_badge
426+ (html_escape result.yaml)
427+ (html_escape result.output)
428+ json_section)
429+ results;
000430431+ Printf.fprintf oc
432+ {| </div>
433 </div>
434 <script>
435 document.querySelectorAll('.filter-btn').forEach(btn => {
···462 let html_output = ref None in
463 let show_skipped = ref false in
464 let test_suite_path_ref = ref test_suite_path in
465+ let args =
466+ [
467+ ( "--html",
468+ Arg.String (fun s -> html_output := Some s),
469+ "<file> Generate HTML report to file" );
470+ ("--show-skipped", Arg.Set show_skipped, " Show details of skipped tests");
471+ ( "--test-suite-path",
472+ Arg.Set_string test_suite_path_ref,
473+ "<path> Path to yaml-test-suite directory" );
474+ ]
475+ in
476+ Arg.parse args
477+ (fun _ -> ())
478+ "Usage: run_all_tests [--html <file>] [--show-skipped] [--test-suite-path \
479+ <path>]";
480481 let all_tests = TL.load_directory !test_suite_path_ref in
482 Printf.printf "Total tests loaded: %d\n%!" (List.length all_tests);
483484 let results = List.map run_test all_tests in
485486+ let pass_count =
487+ List.length (List.filter (fun r -> r.status = `Pass) results)
488+ in
489+ let fail_count =
490+ List.length
491+ (List.filter
492+ (fun r -> match r.status with `Fail _ -> true | _ -> false)
493+ results)
494+ in
495+ let skip_count =
496+ List.length (List.filter (fun r -> r.status = `Skip) results)
497+ in
498499+ let json_pass_count =
500+ List.length (List.filter (fun r -> r.json_status = `Pass) results)
501+ in
502+ let json_fail_count =
503+ List.length
504+ (List.filter
505+ (fun r -> match r.json_status with `Fail _ -> true | _ -> false)
506+ results)
507+ in
508+ let json_skip_count =
509+ List.length (List.filter (fun r -> r.json_status = `Skip) results)
510+ in
511512+ Printf.printf
513+ "\nEvent Tree Results: %d pass, %d fail, %d skip (total: %d)\n%!" pass_count
514+ fail_count skip_count
515+ (pass_count + fail_count + skip_count);
516517+ Printf.printf "JSON Results: %d pass, %d fail, %d skip\n%!" json_pass_count
518+ json_fail_count json_skip_count;
519520 if fail_count > 0 then begin
521 Printf.printf "\nFailing event tree tests:\n";
522+ List.iter
523+ (fun r ->
524+ match r.status with
525+ | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg
526+ | _ -> ())
527+ results
528 end;
529530 if json_fail_count > 0 then begin
531 Printf.printf "\nFailing JSON tests:\n";
532+ List.iter
533+ (fun r ->
534+ match r.json_status with
535+ | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg
536+ | _ -> ())
537+ results
538 end;
539540 if !show_skipped && skip_count > 0 then begin
541 Printf.printf "\nSkipped tests (no expected tree):\n";
542+ List.iter
543+ (fun r ->
544+ if r.status = `Skip then begin
545+ Printf.printf " %s: %s\n" r.id r.name;
546+ Printf.printf " YAML (%d chars): %S\n" (String.length r.yaml)
547+ (if String.length r.yaml <= 60 then r.yaml
548+ else String.sub r.yaml 0 60 ^ "...")
549+ end)
550+ results
551 end;
552553 (match !html_output with
···557 | None -> ());
558559 (* Exit with non-zero code if any tests failed *)
560+ if fail_count > 0 || json_fail_count > 0 then exit 1
0
+224-162
tests/run_all_tests_eio.ml
···16(* HTML escape function *)
17let html_escape s =
18 let buf = Buffer.create (String.length s) in
19- String.iter (function
20- | '<' -> Buffer.add_string buf "<"
21- | '>' -> Buffer.add_string buf ">"
22- | '&' -> Buffer.add_string buf "&"
23- | '"' -> Buffer.add_string buf """
24- | c -> Buffer.add_char buf c
25- ) s;
026 Buffer.contents buf
2728let normalize_tree s =
···35 name : string;
36 yaml : string;
37 is_error_test : bool;
38- status : [`Pass | `Fail of string | `Skip];
39 output : string;
40- json_status : [`Pass | `Fail of string | `Skip];
41 json_expected : string;
42 json_actual : string;
43}
4445-let compare_json expected actual =
46- JC.compare_json_strings expected actual
4748-let run_json_test (test : TL.test_case) : [`Pass | `Fail of string | `Skip] * string =
049 match test.json with
50 | None -> (`Skip, "")
51- | Some expected_json ->
52- if test.fail then
53- (`Skip, "")
54 else
55 try
56 let docs = Loader.documents_of_string test.yaml in
57- let values = List.filter_map (fun doc ->
58- match Document.root doc with
59- | None -> None
60- | Some yaml -> Some (Yaml.to_value ~resolve_aliases_first:true yaml)
61- ) docs in
62- let actual_json = match values with
0000063 | [] -> ""
64- | [v] -> JF.to_json v
65 | vs -> JF.documents_to_json vs
66 in
67- if compare_json expected_json actual_json then
68- (`Pass, actual_json)
69- else
70- (`Fail "JSON mismatch", actual_json)
71 with
72 | Yamlrw_error e ->
73 (`Fail (Format.asprintf "Parse error: %a" Error.pp e), "")
74 | exn ->
75 (`Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn)), "")
07677let run_test (test : TL.test_case) : test_result =
78 let json_status, json_actual = run_json_test test in
79- let base = {
80- id = test.id;
81- name = test.name;
82- yaml = test.yaml;
83- is_error_test = test.fail;
84- status = `Skip;
85- output = "";
86- json_status;
87- json_expected = Option.value ~default:"" test.json;
88- json_actual;
89- } in
0090 if test.fail then begin
91 try
92 let parser = Parser.of_string test.yaml in
93 let events = Parser.to_list parser in
94 let tree = TF.of_spanned_events events in
95- { base with
96- status = `Fail "Expected parsing to fail";
97- output = tree;
98- }
99 with
100 | Yamlrw_error e ->
101- { base with
102- status = `Pass;
103- output = Format.asprintf "%a" Error.pp e;
104- }
105- | exn ->
106- { base with
107- status = `Pass;
108- output = Printexc.to_string exn;
109- }
110 end
111 else begin
112 match test.tree with
113- | None ->
114- (match test.json with
115- | Some _ ->
116- (try
117- let parser = Parser.of_string test.yaml in
118- let events = Parser.to_list parser in
119- let tree = TF.of_spanned_events events in
120- { base with status = `Pass; output = tree }
121- with exn ->
122- { base with
123- status = `Fail (Printf.sprintf "Should parse but got: %s" (Printexc.to_string exn));
124- output = Printexc.to_string exn;
125- })
126- | None ->
127- { base with status = `Skip; output = "(no expected tree or json)" })
128- | Some expected ->
0000129 try
130 let parser = Parser.of_string test.yaml in
131 let events = Parser.to_list parser in
···135 if expected_norm = actual_norm then
136 { base with status = `Pass; output = actual }
137 else
138- { base with
0139 status = `Fail (Printf.sprintf "Tree mismatch");
140- output = Printf.sprintf "Expected:\n%s\n\nActual:\n%s" expected_norm actual_norm;
00141 }
142 with exn ->
143- { base with
144- status = `Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn));
00145 output = Printexc.to_string exn;
146- }
147 end
148149(* Run tests in parallel using Eio fibers *)
150-let run_tests_parallel tests =
151- Eio.Fiber.List.map run_test tests
152153let status_class = function
154 | `Pass -> "pass"
···161 | `Skip -> "SKIP"
162163let generate_html ~fs results output_file =
164- let pass_count = List.length (List.filter (fun r -> r.status = `Pass) results) in
165- let fail_count = List.length (List.filter (fun r -> match r.status with `Fail _ -> true | _ -> false) results) in
166- let skip_count = List.length (List.filter (fun r -> r.status = `Skip) results) in
000000000167 let total = List.length results in
168- let json_pass_count = List.length (List.filter (fun r -> r.json_status = `Pass) results) in
169- let json_fail_count = List.length (List.filter (fun r -> match r.json_status with `Fail _ -> true | _ -> false) results) in
170- let json_skip_count = List.length (List.filter (fun r -> r.json_status = `Skip) results) in
000000000171172 let buf = Buffer.create 65536 in
173- Printf.bprintf buf {|<!DOCTYPE html>
0174<html lang="en">
175<head>
176 <meta charset="UTF-8">
···344 <input type="text" class="search" placeholder="Search by ID or name...">
345 </div>
346 <div class="tests">
347-|} pass_count fail_count skip_count total json_pass_count json_fail_count json_skip_count;
00348349- List.iter (fun result ->
350- let error_badge = if result.is_error_test then
351- {|<span class="badge error-test">Error Test</span>|}
352- else "" in
353- let json_badge = Printf.sprintf {|<span class="badge %s" style="margin-left: 4px;">JSON: %s</span>|}
354- (status_class result.json_status) (status_text result.json_status) in
355- let json_section = if result.json_expected <> "" || result.json_actual <> "" then
356- Printf.sprintf {|
000000000357 <div class="section">
358 <div class="section-title">Expected JSON</div>
359 <pre>%s</pre>
···362 <div class="section-title">Actual JSON</div>
363 <pre>%s</pre>
364 </div>|}
365- (html_escape result.json_expected)
366- (html_escape result.json_actual)
367- else "" in
368- Printf.bprintf buf {| <div class="test" data-status="%s" data-json-status="%s" data-id="%s" data-name="%s">
00369 <div class="test-header" onclick="this.parentElement.classList.toggle('expanded')">
370 <span class="expand-icon">▶</span>
371 <span class="badge %s">%s</span>
···386 </div>
387 </div>
388|}
389- (status_class result.status)
390- (status_class result.json_status)
391- (html_escape result.id)
392- (html_escape (String.lowercase_ascii result.name))
393- (status_class result.status)
394- (status_text result.status)
395- json_badge
396- (html_escape result.id)
397- (html_escape result.name)
398- error_badge
399- (html_escape result.yaml)
400- (html_escape result.output)
401- json_section
402- ) results;
403404- Printf.bprintf buf {| </div>
0405 </div>
406 <script>
407 document.querySelectorAll('.filter-btn').forEach(btn => {
···438 let show_skipped = ref false in
439 let sequential = ref false in
440 let test_suite_path_ref = ref test_suite_path in
441- let args = [
442- "--html", Arg.String (fun s -> html_output := Some s),
443- "<file> Generate HTML report to file";
444- "--show-skipped", Arg.Set show_skipped,
445- " Show details of skipped tests";
446- "--sequential", Arg.Set sequential,
447- " Run tests sequentially instead of in parallel";
448- "--test-suite-path", Arg.Set_string test_suite_path_ref,
449- "<path> Path to yaml-test-suite directory";
450- ] in
451- Arg.parse args (fun _ -> ()) "Usage: run_all_tests_eio [--html <file>] [--show-skipped] [--sequential] [--test-suite-path <path>]";
0000000452453 Eio_main.run @@ fun env ->
454 (* Use fs (full filesystem) rather than cwd (sandboxed) to allow ".." navigation *)
455 let fs = Eio.Stdenv.fs env in
456 (* Get the absolute path to the test suite *)
457- let test_suite_abs = if Filename.is_relative !test_suite_path_ref then
458- Filename.concat (Sys.getcwd ()) !test_suite_path_ref
459- else
460- !test_suite_path_ref
461 in
462463 let start_time = Unix.gettimeofday () in
464465 (* Load tests using Eio (parallel by default) *)
466- let all_tests = if !sequential then
467- TL.load_directory ~fs test_suite_abs
468- else
469- TL.load_directory_parallel ~fs test_suite_abs
470 in
471 let load_time = Unix.gettimeofday () in
472- Printf.printf "Loaded %d tests in %.3fs\n%!" (List.length all_tests) (load_time -. start_time);
0473474 (* Run tests (parallel or sequential based on flag) *)
475- let results = if !sequential then
476- List.map run_test all_tests
477- else
478- run_tests_parallel all_tests
479 in
480 let run_time = Unix.gettimeofday () in
481 Printf.printf "Ran tests in %.3fs\n%!" (run_time -. load_time);
482483- let pass_count = List.length (List.filter (fun r -> r.status = `Pass) results) in
484- let fail_count = List.length (List.filter (fun r -> match r.status with `Fail _ -> true | _ -> false) results) in
485- let skip_count = List.length (List.filter (fun r -> r.status = `Skip) results) in
000000000486487- let json_pass_count = List.length (List.filter (fun r -> r.json_status = `Pass) results) in
488- let json_fail_count = List.length (List.filter (fun r -> match r.json_status with `Fail _ -> true | _ -> false) results) in
489- let json_skip_count = List.length (List.filter (fun r -> r.json_status = `Skip) results) in
000000000490491- Printf.printf "\nEvent Tree Results: %d pass, %d fail, %d skip (total: %d)\n%!"
492- pass_count fail_count skip_count (pass_count + fail_count + skip_count);
00493494- Printf.printf "JSON Results: %d pass, %d fail, %d skip\n%!"
495- json_pass_count json_fail_count json_skip_count;
496497 if fail_count > 0 then begin
498 Printf.printf "\nFailing event tree tests:\n";
499- List.iter (fun r ->
500- match r.status with
501- | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg
502- | _ -> ()
503- ) results
0504 end;
505506 if json_fail_count > 0 then begin
507 Printf.printf "\nFailing JSON tests:\n";
508- List.iter (fun r ->
509- match r.json_status with
510- | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg
511- | _ -> ()
512- ) results
0513 end;
514515 if !show_skipped && skip_count > 0 then begin
516 Printf.printf "\nSkipped tests (no expected tree):\n";
517- List.iter (fun r ->
518- if r.status = `Skip then begin
519- Printf.printf " %s: %s\n" r.id r.name;
520- Printf.printf " YAML (%d chars): %S\n" (String.length r.yaml)
521- (if String.length r.yaml <= 60 then r.yaml
522- else String.sub r.yaml 0 60 ^ "...")
523- end
524- ) results
0525 end;
526527 let total_time = Unix.gettimeofday () in
···534 | None -> ());
535536 (* Exit with non-zero code if any tests failed *)
537- if fail_count > 0 || json_fail_count > 0 then
538- exit 1
···16(* HTML escape function *)
17let html_escape s =
18 let buf = Buffer.create (String.length s) in
19+ String.iter
20+ (function
21+ | '<' -> Buffer.add_string buf "<"
22+ | '>' -> Buffer.add_string buf ">"
23+ | '&' -> Buffer.add_string buf "&"
24+ | '"' -> Buffer.add_string buf """
25+ | c -> Buffer.add_char buf c)
26+ s;
27 Buffer.contents buf
2829let normalize_tree s =
···36 name : string;
37 yaml : string;
38 is_error_test : bool;
39+ status : [ `Pass | `Fail of string | `Skip ];
40 output : string;
41+ json_status : [ `Pass | `Fail of string | `Skip ];
42 json_expected : string;
43 json_actual : string;
44}
4546+let compare_json expected actual = JC.compare_json_strings expected actual
04748+let run_json_test (test : TL.test_case) :
49+ [ `Pass | `Fail of string | `Skip ] * string =
50 match test.json with
51 | None -> (`Skip, "")
52+ | Some expected_json -> (
53+ if test.fail then (`Skip, "")
054 else
55 try
56 let docs = Loader.documents_of_string test.yaml in
57+ let values =
58+ List.filter_map
59+ (fun doc ->
60+ match Document.root doc with
61+ | None -> None
62+ | Some yaml ->
63+ Some (Yaml.to_value ~resolve_aliases_first:true yaml))
64+ docs
65+ in
66+ let actual_json =
67+ match values with
68 | [] -> ""
69+ | [ v ] -> JF.to_json v
70 | vs -> JF.documents_to_json vs
71 in
72+ if compare_json expected_json actual_json then (`Pass, actual_json)
73+ else (`Fail "JSON mismatch", actual_json)
0074 with
75 | Yamlrw_error e ->
76 (`Fail (Format.asprintf "Parse error: %a" Error.pp e), "")
77 | exn ->
78 (`Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn)), "")
79+ )
8081let run_test (test : TL.test_case) : test_result =
82 let json_status, json_actual = run_json_test test in
83+ let base =
84+ {
85+ id = test.id;
86+ name = test.name;
87+ yaml = test.yaml;
88+ is_error_test = test.fail;
89+ status = `Skip;
90+ output = "";
91+ json_status;
92+ json_expected = Option.value ~default:"" test.json;
93+ json_actual;
94+ }
95+ in
96 if test.fail then begin
97 try
98 let parser = Parser.of_string test.yaml in
99 let events = Parser.to_list parser in
100 let tree = TF.of_spanned_events events in
101+ { base with status = `Fail "Expected parsing to fail"; output = tree }
000102 with
103 | Yamlrw_error e ->
104+ { base with status = `Pass; output = Format.asprintf "%a" Error.pp e }
105+ | exn -> { base with status = `Pass; output = Printexc.to_string exn }
0000000106 end
107 else begin
108 match test.tree with
109+ | None -> (
110+ match test.json with
111+ | Some _ -> (
112+ try
113+ let parser = Parser.of_string test.yaml in
114+ let events = Parser.to_list parser in
115+ let tree = TF.of_spanned_events events in
116+ { base with status = `Pass; output = tree }
117+ with exn ->
118+ {
119+ base with
120+ status =
121+ `Fail
122+ (Printf.sprintf "Should parse but got: %s"
123+ (Printexc.to_string exn));
124+ output = Printexc.to_string exn;
125+ })
126+ | None ->
127+ { base with status = `Skip; output = "(no expected tree or json)" })
128+ | Some expected -> (
129 try
130 let parser = Parser.of_string test.yaml in
131 let events = Parser.to_list parser in
···135 if expected_norm = actual_norm then
136 { base with status = `Pass; output = actual }
137 else
138+ {
139+ base with
140 status = `Fail (Printf.sprintf "Tree mismatch");
141+ output =
142+ Printf.sprintf "Expected:\n%s\n\nActual:\n%s" expected_norm
143+ actual_norm;
144 }
145 with exn ->
146+ {
147+ base with
148+ status =
149+ `Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn));
150 output = Printexc.to_string exn;
151+ })
152 end
153154(* Run tests in parallel using Eio fibers *)
155+let run_tests_parallel tests = Eio.Fiber.List.map run_test tests
0156157let status_class = function
158 | `Pass -> "pass"
···165 | `Skip -> "SKIP"
166167let generate_html ~fs results output_file =
168+ let pass_count =
169+ List.length (List.filter (fun r -> r.status = `Pass) results)
170+ in
171+ let fail_count =
172+ List.length
173+ (List.filter
174+ (fun r -> match r.status with `Fail _ -> true | _ -> false)
175+ results)
176+ in
177+ let skip_count =
178+ List.length (List.filter (fun r -> r.status = `Skip) results)
179+ in
180 let total = List.length results in
181+ let json_pass_count =
182+ List.length (List.filter (fun r -> r.json_status = `Pass) results)
183+ in
184+ let json_fail_count =
185+ List.length
186+ (List.filter
187+ (fun r -> match r.json_status with `Fail _ -> true | _ -> false)
188+ results)
189+ in
190+ let json_skip_count =
191+ List.length (List.filter (fun r -> r.json_status = `Skip) results)
192+ in
193194 let buf = Buffer.create 65536 in
195+ Printf.bprintf buf
196+ {|<!DOCTYPE html>
197<html lang="en">
198<head>
199 <meta charset="UTF-8">
···367 <input type="text" class="search" placeholder="Search by ID or name...">
368 </div>
369 <div class="tests">
370+|}
371+ pass_count fail_count skip_count total json_pass_count json_fail_count
372+ json_skip_count;
373374+ List.iter
375+ (fun result ->
376+ let error_badge =
377+ if result.is_error_test then
378+ {|<span class="badge error-test">Error Test</span>|}
379+ else ""
380+ in
381+ let json_badge =
382+ Printf.sprintf
383+ {|<span class="badge %s" style="margin-left: 4px;">JSON: %s</span>|}
384+ (status_class result.json_status)
385+ (status_text result.json_status)
386+ in
387+ let json_section =
388+ if result.json_expected <> "" || result.json_actual <> "" then
389+ Printf.sprintf
390+ {|
391 <div class="section">
392 <div class="section-title">Expected JSON</div>
393 <pre>%s</pre>
···396 <div class="section-title">Actual JSON</div>
397 <pre>%s</pre>
398 </div>|}
399+ (html_escape result.json_expected)
400+ (html_escape result.json_actual)
401+ else ""
402+ in
403+ Printf.bprintf buf
404+ {| <div class="test" data-status="%s" data-json-status="%s" data-id="%s" data-name="%s">
405 <div class="test-header" onclick="this.parentElement.classList.toggle('expanded')">
406 <span class="expand-icon">▶</span>
407 <span class="badge %s">%s</span>
···422 </div>
423 </div>
424|}
425+ (status_class result.status)
426+ (status_class result.json_status)
427+ (html_escape result.id)
428+ (html_escape (String.lowercase_ascii result.name))
429+ (status_class result.status)
430+ (status_text result.status)
431+ json_badge (html_escape result.id) (html_escape result.name) error_badge
432+ (html_escape result.yaml)
433+ (html_escape result.output)
434+ json_section)
435+ results;
000436437+ Printf.bprintf buf
438+ {| </div>
439 </div>
440 <script>
441 document.querySelectorAll('.filter-btn').forEach(btn => {
···472 let show_skipped = ref false in
473 let sequential = ref false in
474 let test_suite_path_ref = ref test_suite_path in
475+ let args =
476+ [
477+ ( "--html",
478+ Arg.String (fun s -> html_output := Some s),
479+ "<file> Generate HTML report to file" );
480+ ("--show-skipped", Arg.Set show_skipped, " Show details of skipped tests");
481+ ( "--sequential",
482+ Arg.Set sequential,
483+ " Run tests sequentially instead of in parallel" );
484+ ( "--test-suite-path",
485+ Arg.Set_string test_suite_path_ref,
486+ "<path> Path to yaml-test-suite directory" );
487+ ]
488+ in
489+ Arg.parse args
490+ (fun _ -> ())
491+ "Usage: run_all_tests_eio [--html <file>] [--show-skipped] [--sequential] \
492+ [--test-suite-path <path>]";
493494 Eio_main.run @@ fun env ->
495 (* Use fs (full filesystem) rather than cwd (sandboxed) to allow ".." navigation *)
496 let fs = Eio.Stdenv.fs env in
497 (* Get the absolute path to the test suite *)
498+ let test_suite_abs =
499+ if Filename.is_relative !test_suite_path_ref then
500+ Filename.concat (Sys.getcwd ()) !test_suite_path_ref
501+ else !test_suite_path_ref
502 in
503504 let start_time = Unix.gettimeofday () in
505506 (* Load tests using Eio (parallel by default) *)
507+ let all_tests =
508+ if !sequential then TL.load_directory ~fs test_suite_abs
509+ else TL.load_directory_parallel ~fs test_suite_abs
0510 in
511 let load_time = Unix.gettimeofday () in
512+ Printf.printf "Loaded %d tests in %.3fs\n%!" (List.length all_tests)
513+ (load_time -. start_time);
514515 (* Run tests (parallel or sequential based on flag) *)
516+ let results =
517+ if !sequential then List.map run_test all_tests
518+ else run_tests_parallel all_tests
0519 in
520 let run_time = Unix.gettimeofday () in
521 Printf.printf "Ran tests in %.3fs\n%!" (run_time -. load_time);
522523+ let pass_count =
524+ List.length (List.filter (fun r -> r.status = `Pass) results)
525+ in
526+ let fail_count =
527+ List.length
528+ (List.filter
529+ (fun r -> match r.status with `Fail _ -> true | _ -> false)
530+ results)
531+ in
532+ let skip_count =
533+ List.length (List.filter (fun r -> r.status = `Skip) results)
534+ in
535536+ let json_pass_count =
537+ List.length (List.filter (fun r -> r.json_status = `Pass) results)
538+ in
539+ let json_fail_count =
540+ List.length
541+ (List.filter
542+ (fun r -> match r.json_status with `Fail _ -> true | _ -> false)
543+ results)
544+ in
545+ let json_skip_count =
546+ List.length (List.filter (fun r -> r.json_status = `Skip) results)
547+ in
548549+ Printf.printf
550+ "\nEvent Tree Results: %d pass, %d fail, %d skip (total: %d)\n%!" pass_count
551+ fail_count skip_count
552+ (pass_count + fail_count + skip_count);
553554+ Printf.printf "JSON Results: %d pass, %d fail, %d skip\n%!" json_pass_count
555+ json_fail_count json_skip_count;
556557 if fail_count > 0 then begin
558 Printf.printf "\nFailing event tree tests:\n";
559+ List.iter
560+ (fun r ->
561+ match r.status with
562+ | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg
563+ | _ -> ())
564+ results
565 end;
566567 if json_fail_count > 0 then begin
568 Printf.printf "\nFailing JSON tests:\n";
569+ List.iter
570+ (fun r ->
571+ match r.json_status with
572+ | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg
573+ | _ -> ())
574+ results
575 end;
576577 if !show_skipped && skip_count > 0 then begin
578 Printf.printf "\nSkipped tests (no expected tree):\n";
579+ List.iter
580+ (fun r ->
581+ if r.status = `Skip then begin
582+ Printf.printf " %s: %s\n" r.id r.name;
583+ Printf.printf " YAML (%d chars): %S\n" (String.length r.yaml)
584+ (if String.length r.yaml <= 60 then r.yaml
585+ else String.sub r.yaml 0 60 ^ "...")
586+ end)
587+ results
588 end;
589590 let total_time = Unix.gettimeofday () in
···597 | None -> ());
598599 (* Exit with non-zero code if any tests failed *)
600+ if fail_count > 0 || json_fail_count > 0 then exit 1
0
···14 | Object of (string * json) list
1516let rec equal a b =
17- match a, b with
18 | Null, Null -> true
19 | Bool a, Bool b -> a = b
20 | Float a, Float b -> Float.equal a b
···22 | Array a, Array b -> List.equal equal a b
23 | Object a, Object b ->
24 (* Compare objects as sets of key-value pairs (order independent) *)
25- let sorted_a = List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) a in
26- let sorted_b = List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) b in
27- List.length sorted_a = List.length sorted_b &&
28- List.for_all2 (fun (k1, v1) (k2, v2) -> k1 = k2 && equal v1 v2) sorted_a sorted_b
00000029 | _ -> false
3031(* Parse JSON string using jsonm *)
···46 and parse_array acc =
47 match Jsonm.decode decoder with
48 | `Lexeme `Ae -> Ok (Array (List.rev acc))
49- | `Lexeme _ as lex ->
50 (* Push back and parse value *)
51 let result = parse_value_with_lex lex in
52- (match result with
53- | Ok v -> parse_array (v :: acc)
54- | Error _ as e -> e)
55 | `Error e -> Error (Format.asprintf "%a" Jsonm.pp_error e)
56 | `End -> Error "unexpected end in array"
57 | `Await -> Error "unexpected await"
58 and parse_object acc =
59 match Jsonm.decode decoder with
60 | `Lexeme `Oe -> Ok (Object (List.rev acc))
61- | `Lexeme (`Name key) ->
62- (match parse_value () with
63- | Ok v -> parse_object ((key, v) :: acc)
64- | Error _ as e -> e)
65 | `Lexeme _ -> Error "expected object key"
66 | `Error e -> Error (Format.asprintf "%a" Jsonm.pp_error e)
67 | `End -> Error "unexpected end in object"
···99 and parse_array acc =
100 match Jsonm.decode decoder with
101 | `Lexeme `Ae -> Some (Array (List.rev acc))
102- | `Lexeme _ as lex ->
103- (match parse_value_with_lex lex with
104- | Some v -> parse_array (v :: acc)
105- | None -> None)
106 | _ -> None
107 and parse_object acc =
108 match Jsonm.decode decoder with
109 | `Lexeme `Oe -> Some (Object (List.rev acc))
110- | `Lexeme (`Name key) ->
111- (match parse_value () with
112- | Some v -> parse_object ((key, v) :: acc)
113- | None -> None)
114 | _ -> None
115 and parse_value_with_lex lex =
116 match lex with
···134 (* Handle empty strings *)
135 let expected_trimmed = String.trim expected in
136 let actual_trimmed = String.trim actual in
137- if expected_trimmed = "" && actual_trimmed = "" then
138- true
139- else if expected_trimmed = "" || actual_trimmed = "" then
140- false
141 else
142 (* Parse as potentially multiple JSON values *)
143 let expected_values = parse_json_multi expected in
144 let actual_values = parse_json_multi actual in
145- List.length expected_values = List.length actual_values &&
146- List.for_all2 equal expected_values actual_values
···14 | Object of (string * json) list
1516let rec equal a b =
17+ match (a, b) with
18 | Null, Null -> true
19 | Bool a, Bool b -> a = b
20 | Float a, Float b -> Float.equal a b
···22 | Array a, Array b -> List.equal equal a b
23 | Object a, Object b ->
24 (* Compare objects as sets of key-value pairs (order independent) *)
25+ let sorted_a =
26+ List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) a
27+ in
28+ let sorted_b =
29+ List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) b
30+ in
31+ List.length sorted_a = List.length sorted_b
32+ && List.for_all2
33+ (fun (k1, v1) (k2, v2) -> k1 = k2 && equal v1 v2)
34+ sorted_a sorted_b
35 | _ -> false
3637(* Parse JSON string using jsonm *)
···52 and parse_array acc =
53 match Jsonm.decode decoder with
54 | `Lexeme `Ae -> Ok (Array (List.rev acc))
55+ | `Lexeme _ as lex -> (
56 (* Push back and parse value *)
57 let result = parse_value_with_lex lex in
58+ match result with Ok v -> parse_array (v :: acc) | Error _ as e -> e)
0059 | `Error e -> Error (Format.asprintf "%a" Jsonm.pp_error e)
60 | `End -> Error "unexpected end in array"
61 | `Await -> Error "unexpected await"
62 and parse_object acc =
63 match Jsonm.decode decoder with
64 | `Lexeme `Oe -> Ok (Object (List.rev acc))
65+ | `Lexeme (`Name key) -> (
66+ match parse_value () with
67+ | Ok v -> parse_object ((key, v) :: acc)
68+ | Error _ as e -> e)
69 | `Lexeme _ -> Error "expected object key"
70 | `Error e -> Error (Format.asprintf "%a" Jsonm.pp_error e)
71 | `End -> Error "unexpected end in object"
···103 and parse_array acc =
104 match Jsonm.decode decoder with
105 | `Lexeme `Ae -> Some (Array (List.rev acc))
106+ | `Lexeme _ as lex -> (
107+ match parse_value_with_lex lex with
108+ | Some v -> parse_array (v :: acc)
109+ | None -> None)
110 | _ -> None
111 and parse_object acc =
112 match Jsonm.decode decoder with
113 | `Lexeme `Oe -> Some (Object (List.rev acc))
114+ | `Lexeme (`Name key) -> (
115+ match parse_value () with
116+ | Some v -> parse_object ((key, v) :: acc)
117+ | None -> None)
118 | _ -> None
119 and parse_value_with_lex lex =
120 match lex with
···138 (* Handle empty strings *)
139 let expected_trimmed = String.trim expected in
140 let actual_trimmed = String.trim actual in
141+ if expected_trimmed = "" && actual_trimmed = "" then true
142+ else if expected_trimmed = "" || actual_trimmed = "" then false
00143 else
144 (* Parse as potentially multiple JSON values *)
145 let expected_values = parse_json_multi expected in
146 let actual_values = parse_json_multi actual in
147+ List.length expected_values = List.length actual_values
148+ && List.for_all2 equal expected_values actual_values
+37-27
tests/test_suite_lib/json_format.ml
···10let escape_string s =
11 let buf = Buffer.create (String.length s * 2) in
12 Buffer.add_char buf '"';
13- String.iter (fun c ->
14- match c with
15- | '"' -> Buffer.add_string buf "\\\""
16- | '\\' -> Buffer.add_string buf "\\\\"
17- | '\n' -> Buffer.add_string buf "\\n"
18- | '\r' -> Buffer.add_string buf "\\r"
19- | '\t' -> Buffer.add_string buf "\\t"
20- | '\x08' -> Buffer.add_string buf "\\b"
21- | '\x0c' -> Buffer.add_string buf "\\f"
22- | c when Char.code c < 32 ->
23- Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c))
24- | c -> Buffer.add_char buf c
25- ) s;
026 Buffer.add_char buf '"';
27 Buffer.contents buf
2829-let rec format_value ?(indent=0) (v : Value.t) =
30 let spaces n = String.make n ' ' in
31 match v with
32 | `Null -> "null"
33 | `Bool true -> "true"
34 | `Bool false -> "false"
35 | `Float f ->
36- if Float.is_nan f then "null" (* JSON doesn't support NaN *)
37- else if f = Float.infinity || f = Float.neg_infinity then "null" (* JSON doesn't support Inf *)
038 else if Float.is_integer f && Float.abs f < 1e15 then
39 Printf.sprintf "%.0f" f
40 else
41 (* Try to match yaml-test-suite's number formatting *)
42 let s = Printf.sprintf "%g" f in
43 (* Ensure we have a decimal point for floats *)
44- if String.contains s '.' || String.contains s 'e' || String.contains s 'E' then s
00045 else s ^ ".0"
46 | `String s -> escape_string s
47 | `A [] -> "[]"
48 | `A items ->
49 let inner_indent = indent + 2 in
50- let formatted_items = List.map (fun item ->
51- spaces inner_indent ^ format_value ~indent:inner_indent item
52- ) items in
00053 "[\n" ^ String.concat ",\n" formatted_items ^ "\n" ^ spaces indent ^ "]"
54 | `O [] -> "{}"
55 | `O pairs ->
56 let inner_indent = indent + 2 in
57- let formatted_pairs = List.map (fun (k, v) ->
58- let key = escape_string k in
59- let value = format_value ~indent:inner_indent v in
60- spaces inner_indent ^ key ^ ": " ^ value
61- ) pairs in
00062 "{\n" ^ String.concat ",\n" formatted_pairs ^ "\n" ^ spaces indent ^ "}"
6364-let to_json (v : Value.t) : string =
65- format_value v
6667(* Format multiple documents (for multi-doc YAML) *)
68let documents_to_json (docs : Value.t list) : string =
···10let escape_string s =
11 let buf = Buffer.create (String.length s * 2) in
12 Buffer.add_char buf '"';
13+ String.iter
14+ (fun c ->
15+ match c with
16+ | '"' -> Buffer.add_string buf "\\\""
17+ | '\\' -> Buffer.add_string buf "\\\\"
18+ | '\n' -> Buffer.add_string buf "\\n"
19+ | '\r' -> Buffer.add_string buf "\\r"
20+ | '\t' -> Buffer.add_string buf "\\t"
21+ | '\x08' -> Buffer.add_string buf "\\b"
22+ | '\x0c' -> Buffer.add_string buf "\\f"
23+ | c when Char.code c < 32 ->
24+ Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c))
25+ | c -> Buffer.add_char buf c)
26+ s;
27 Buffer.add_char buf '"';
28 Buffer.contents buf
2930+let rec format_value ?(indent = 0) (v : Value.t) =
31 let spaces n = String.make n ' ' in
32 match v with
33 | `Null -> "null"
34 | `Bool true -> "true"
35 | `Bool false -> "false"
36 | `Float f ->
37+ if Float.is_nan f then "null" (* JSON doesn't support NaN *)
38+ else if f = Float.infinity || f = Float.neg_infinity then "null"
39+ (* JSON doesn't support Inf *)
40 else if Float.is_integer f && Float.abs f < 1e15 then
41 Printf.sprintf "%.0f" f
42 else
43 (* Try to match yaml-test-suite's number formatting *)
44 let s = Printf.sprintf "%g" f in
45 (* Ensure we have a decimal point for floats *)
46+ if
47+ String.contains s '.' || String.contains s 'e'
48+ || String.contains s 'E'
49+ then s
50 else s ^ ".0"
51 | `String s -> escape_string s
52 | `A [] -> "[]"
53 | `A items ->
54 let inner_indent = indent + 2 in
55+ let formatted_items =
56+ List.map
57+ (fun item ->
58+ spaces inner_indent ^ format_value ~indent:inner_indent item)
59+ items
60+ in
61 "[\n" ^ String.concat ",\n" formatted_items ^ "\n" ^ spaces indent ^ "]"
62 | `O [] -> "{}"
63 | `O pairs ->
64 let inner_indent = indent + 2 in
65+ let formatted_pairs =
66+ List.map
67+ (fun (k, v) ->
68+ let key = escape_string k in
69+ let value = format_value ~indent:inner_indent v in
70+ spaces inner_indent ^ key ^ ": " ^ value)
71+ pairs
72+ in
73 "{\n" ^ String.concat ",\n" formatted_pairs ^ "\n" ^ spaces indent ^ "}"
7475+let to_json (v : Value.t) : string = format_value v
07677(* Format multiple documents (for multi-doc YAML) *)
78let documents_to_json (docs : Value.t list) : string =
+5-10
tests/test_suite_lib/test_suite_loader.ml
···18 Some s
19 with _ -> None
2021- let file_exists () path =
22- Sys.file_exists path
23-24- let is_directory () path =
25- Sys.file_exists path && Sys.is_directory path
26-27- let read_dir () path =
28- Array.to_list (Sys.readdir path)
29end
30031(** Internal loader module *)
32-module Loader = Test_suite_loader_generic.Make(Sync_io)
3334-(** Re-export test_case type from loader *)
35type test_case = Loader.test_case = {
36 id : string;
37 name : string;
···40 json : string option;
41 fail : bool;
42}
04344(** Load tests without needing to pass a context *)
45let load_directory path : test_case list = Loader.load_directory () path
···18 Some s
19 with _ -> None
2021+ let file_exists () path = Sys.file_exists path
22+ let is_directory () path = Sys.file_exists path && Sys.is_directory path
23+ let read_dir () path = Array.to_list (Sys.readdir path)
0000024end
2526+module Loader = Test_suite_loader_generic.Make (Sync_io)
27(** Internal loader module *)
028029type test_case = Loader.test_case = {
30 id : string;
31 name : string;
···34 json : string option;
35 fail : bool;
36}
37+(** Re-export test_case type from loader *)
3839(** Load tests without needing to pass a context *)
40let load_directory path : test_case list = Loader.load_directory () path
+15-15
tests/test_suite_lib/test_suite_loader_eio.ml
···8module Generic = Test_suite_lib.Test_suite_loader_generic
910(** Eio file I/O implementation *)
11-module Eio_io : Generic.FILE_IO with type ctx = Eio.Fs.dir_ty Eio.Path.t = struct
012 type ctx = Eio.Fs.dir_ty Eio.Path.t
1314 let read_file fs path =
15- try
16- Some (Eio.Path.load Eio.Path.(fs / path))
17- with _ -> None
1819 let file_exists fs path =
20 match Eio.Path.kind ~follow:true Eio.Path.(fs / path) with
···28 | _ -> false
29 | exception _ -> false
3031- let read_dir fs path =
32- Eio.Path.read_dir Eio.Path.(fs / path)
33end
34035(** Internal loader module *)
36-module Loader = Generic.Make(Eio_io)
3738-(** Re-export test_case type from loader *)
39type test_case = Loader.test_case = {
40 id : string;
41 name : string;
···44 json : string option;
45 fail : bool;
46}
04748(** Load tests with Eio filesystem context *)
49let load_directory ~fs path : test_case list = Loader.load_directory fs path
···53 if not (Eio_io.is_directory fs test_suite_path) then []
54 else
55 let entries = Eio_io.read_dir fs test_suite_path in
56- let test_ids = entries
057 |> List.filter (fun e ->
58- Eio_io.is_directory fs (Filename.concat test_suite_path e) &&
59- String.length e >= 4 &&
60- e.[0] >= '0' && e.[0] <= 'Z')
061 |> List.sort String.compare
62 in
63 (* Load each test ID in parallel using fibers *)
64- Eio.Fiber.List.map (fun test_id ->
65- Loader.load_test_id fs test_suite_path test_id
66- ) test_ids
67 |> List.concat
···8module Generic = Test_suite_lib.Test_suite_loader_generic
910(** Eio file I/O implementation *)
11+module Eio_io : Generic.FILE_IO with type ctx = Eio.Fs.dir_ty Eio.Path.t =
12+struct
13 type ctx = Eio.Fs.dir_ty Eio.Path.t
1415 let read_file fs path =
16+ try Some (Eio.Path.load Eio.Path.(fs / path)) with _ -> None
001718 let file_exists fs path =
19 match Eio.Path.kind ~follow:true Eio.Path.(fs / path) with
···27 | _ -> false
28 | exception _ -> false
2930+ let read_dir fs path = Eio.Path.read_dir Eio.Path.(fs / path)
031end
3233+module Loader = Generic.Make (Eio_io)
34(** Internal loader module *)
035036type test_case = Loader.test_case = {
37 id : string;
38 name : string;
···41 json : string option;
42 fail : bool;
43}
44+(** Re-export test_case type from loader *)
4546(** Load tests with Eio filesystem context *)
47let load_directory ~fs path : test_case list = Loader.load_directory fs path
···51 if not (Eio_io.is_directory fs test_suite_path) then []
52 else
53 let entries = Eio_io.read_dir fs test_suite_path in
54+ let test_ids =
55+ entries
56 |> List.filter (fun e ->
57+ Eio_io.is_directory fs (Filename.concat test_suite_path e)
58+ && String.length e >= 4
59+ && e.[0] >= '0'
60+ && e.[0] <= 'Z')
61 |> List.sort String.compare
62 in
63 (* Load each test ID in parallel using fibers *)
64+ Eio.Fiber.List.map
65+ (fun test_id -> Loader.load_test_id fs test_suite_path test_id)
66+ test_ids
67 |> List.concat
+40-31
tests/test_suite_lib/test_suite_loader_generic.ml
···56(** Generic test suite loader - parameterized by file I/O operations *)
78-(** Test case representation *)
9type test_case = {
10 id : string;
11 name : string;
···14 json : string option;
15 fail : bool;
16}
01718(** Module type for file I/O operations *)
19module type FILE_IO = sig
20- (** Context type for file operations (unit for sync, ~fs for Eio) *)
21 type ctx
022023 (** Read a file, returning None if it doesn't exist or can't be read *)
24- val read_file : ctx -> string -> string option
2526- (** Check if a path exists and is a regular file *)
27 val file_exists : ctx -> string -> bool
02829- (** Check if a path exists and is a directory *)
30 val is_directory : ctx -> string -> bool
031032 (** List directory entries *)
33- val read_dir : ctx -> string -> string list
34end
3536(** Create a test loader from file I/O operations *)
···45 }
4647 let read_file_required ctx path =
48- match IO.read_file ctx path with
49- | Some s -> s
50- | None -> ""
5152 (** Load a single test from a directory *)
53 let load_test_dir ctx base_id dir_path =
···60 (* Must have in.yaml to be a valid test *)
61 if not (IO.file_exists ctx yaml_file) then None
62 else
63- let name = match IO.read_file ctx name_file with
064 | Some s -> String.trim s
65 | None -> base_id
66 in
···70 let fail = IO.file_exists ctx error_file in
71 Some { id = base_id; name; yaml; tree; json; fail }
7273- (** Load tests from a test ID directory (may have subdirectories for variants) *)
074 let load_test_id ctx test_suite_path test_id =
75 let dir_path = Filename.concat test_suite_path test_id in
76 if not (IO.is_directory ctx dir_path) then []
77 else
78 let entries = IO.read_dir ctx dir_path in
79 (* Check if this directory has variant subdirectories (00, 01, etc.) *)
80- let has_variants = List.exists (fun e ->
81- let subdir = Filename.concat dir_path e in
82- IO.is_directory ctx subdir &&
83- String.length e >= 2 &&
84- e.[0] >= '0' && e.[0] <= '9'
85- ) entries in
00008687 if has_variants then
88 (* Load each variant subdirectory *)
89- let variants = entries
090 |> List.filter (fun e ->
91 let subdir = Filename.concat dir_path e in
92- IO.is_directory ctx subdir &&
93- String.length e >= 2 &&
94- e.[0] >= '0' && e.[0] <= '9')
095 |> List.sort String.compare
96 in
97- List.filter_map (fun variant ->
98- let variant_path = Filename.concat dir_path variant in
99- let variant_id = Printf.sprintf "%s:%s" test_id variant in
100- load_test_dir ctx variant_id variant_path
101- ) variants
0102 else
103 (* Single test in this directory *)
104 match load_test_dir ctx test_id dir_path with
105- | Some t -> [t]
106 | None -> []
107108 (** Load all tests from a test suite directory *)
···110 if not (IO.is_directory ctx test_suite_path) then []
111 else
112 let entries = IO.read_dir ctx test_suite_path in
113- let test_ids = entries
0114 |> List.filter (fun e ->
115- IO.is_directory ctx (Filename.concat test_suite_path e) &&
116- String.length e >= 4 &&
117- e.[0] >= '0' && e.[0] <= 'Z')
0118 |> List.sort String.compare
119 in
120 List.concat_map (load_test_id ctx test_suite_path) test_ids
···56(** Generic test suite loader - parameterized by file I/O operations *)
708type test_case = {
9 id : string;
10 name : string;
···13 json : string option;
14 fail : bool;
15}
16+(** Test case representation *)
1718(** Module type for file I/O operations *)
19module type FILE_IO = sig
020 type ctx
21+ (** Context type for file operations (unit for sync, ~fs for Eio) *)
2223+ val read_file : ctx -> string -> string option
24 (** Read a file, returning None if it doesn't exist or can't be read *)
025026 val file_exists : ctx -> string -> bool
27+ (** Check if a path exists and is a regular file *)
28029 val is_directory : ctx -> string -> bool
30+ (** Check if a path exists and is a directory *)
3132+ val read_dir : ctx -> string -> string list
33 (** List directory entries *)
034end
3536(** Create a test loader from file I/O operations *)
···45 }
4647 let read_file_required ctx path =
48+ match IO.read_file ctx path with Some s -> s | None -> ""
004950 (** Load a single test from a directory *)
51 let load_test_dir ctx base_id dir_path =
···58 (* Must have in.yaml to be a valid test *)
59 if not (IO.file_exists ctx yaml_file) then None
60 else
61+ let name =
62+ match IO.read_file ctx name_file with
63 | Some s -> String.trim s
64 | None -> base_id
65 in
···69 let fail = IO.file_exists ctx error_file in
70 Some { id = base_id; name; yaml; tree; json; fail }
7172+ (** Load tests from a test ID directory (may have subdirectories for variants)
73+ *)
74 let load_test_id ctx test_suite_path test_id =
75 let dir_path = Filename.concat test_suite_path test_id in
76 if not (IO.is_directory ctx dir_path) then []
77 else
78 let entries = IO.read_dir ctx dir_path in
79 (* Check if this directory has variant subdirectories (00, 01, etc.) *)
80+ let has_variants =
81+ List.exists
82+ (fun e ->
83+ let subdir = Filename.concat dir_path e in
84+ IO.is_directory ctx subdir
85+ && String.length e >= 2
86+ && e.[0] >= '0'
87+ && e.[0] <= '9')
88+ entries
89+ in
9091 if has_variants then
92 (* Load each variant subdirectory *)
93+ let variants =
94+ entries
95 |> List.filter (fun e ->
96 let subdir = Filename.concat dir_path e in
97+ IO.is_directory ctx subdir
98+ && String.length e >= 2
99+ && e.[0] >= '0'
100+ && e.[0] <= '9')
101 |> List.sort String.compare
102 in
103+ List.filter_map
104+ (fun variant ->
105+ let variant_path = Filename.concat dir_path variant in
106+ let variant_id = Printf.sprintf "%s:%s" test_id variant in
107+ load_test_dir ctx variant_id variant_path)
108+ variants
109 else
110 (* Single test in this directory *)
111 match load_test_dir ctx test_id dir_path with
112+ | Some t -> [ t ]
113 | None -> []
114115 (** Load all tests from a test suite directory *)
···117 if not (IO.is_directory ctx test_suite_path) then []
118 else
119 let entries = IO.read_dir ctx test_suite_path in
120+ let test_ids =
121+ entries
122 |> List.filter (fun e ->
123+ IO.is_directory ctx (Filename.concat test_suite_path e)
124+ && String.length e >= 4
125+ && e.[0] >= '0'
126+ && e.[0] <= 'Z')
127 |> List.sort String.compare
128 in
129 List.concat_map (load_test_id ctx test_suite_path) test_ids
+27-28
tests/test_suite_lib/tree_format.ml
···910let escape_string s =
11 let buf = Buffer.create (String.length s * 2) in
12- String.iter (fun c ->
13- match c with
14- | '\n' -> Buffer.add_string buf "\\n"
15- | '\t' -> Buffer.add_string buf "\\t"
16- | '\r' -> Buffer.add_string buf "\\r"
17- | '\\' -> Buffer.add_string buf "\\\\"
18- | '\x00' -> Buffer.add_string buf "\\0"
19- | '\x07' -> Buffer.add_string buf "\\a"
20- | '\x08' -> Buffer.add_string buf "\\b"
21- | '\x0b' -> Buffer.add_string buf "\\v"
22- | '\x0c' -> Buffer.add_string buf "\\f"
23- | '\x1b' -> Buffer.add_string buf "\\e"
24- | '\xa0' -> Buffer.add_string buf "\\_"
25- | c -> Buffer.add_char buf c
26- ) s;
027 Buffer.contents buf
2829let style_char = function
···39 | Event.Stream_start _ -> "+STR"
40 | Event.Stream_end -> "-STR"
41 | Event.Document_start { implicit; _ } ->
42- if implicit then "+DOC"
43- else "+DOC ---"
44- | Event.Document_end { implicit } ->
45- if implicit then "-DOC"
46- else "-DOC ..."
47 | Event.Mapping_start { anchor; tag; style; _ } ->
48 let anchor_str = match anchor with Some a -> " &" ^ a | None -> "" in
49 let tag_str = match tag with Some t -> " <" ^ t ^ ">" | None -> "" in
···60 let anchor_str = match anchor with Some a -> " &" ^ a | None -> "" in
61 let tag_str = match tag with Some t -> " <" ^ t ^ ">" | None -> "" in
62 let style_c = style_char style in
63- Printf.sprintf "=VAL%s%s %c%s" anchor_str tag_str style_c (escape_string value)
64- | Event.Alias { anchor } ->
65- Printf.sprintf "=ALI *%s" anchor
6667let of_spanned_events events =
68 let buf = Buffer.create 256 in
69- List.iter (fun (e : Event.spanned) ->
70- let line = format_event e in
71- Buffer.add_string buf line;
72- Buffer.add_char buf '\n'
73- ) events;
074 Buffer.contents buf
···910let escape_string s =
11 let buf = Buffer.create (String.length s * 2) in
12+ String.iter
13+ (fun c ->
14+ match c with
15+ | '\n' -> Buffer.add_string buf "\\n"
16+ | '\t' -> Buffer.add_string buf "\\t"
17+ | '\r' -> Buffer.add_string buf "\\r"
18+ | '\\' -> Buffer.add_string buf "\\\\"
19+ | '\x00' -> Buffer.add_string buf "\\0"
20+ | '\x07' -> Buffer.add_string buf "\\a"
21+ | '\x08' -> Buffer.add_string buf "\\b"
22+ | '\x0b' -> Buffer.add_string buf "\\v"
23+ | '\x0c' -> Buffer.add_string buf "\\f"
24+ | '\x1b' -> Buffer.add_string buf "\\e"
25+ | '\xa0' -> Buffer.add_string buf "\\_"
26+ | c -> Buffer.add_char buf c)
27+ s;
28 Buffer.contents buf
2930let style_char = function
···40 | Event.Stream_start _ -> "+STR"
41 | Event.Stream_end -> "-STR"
42 | Event.Document_start { implicit; _ } ->
43+ if implicit then "+DOC" else "+DOC ---"
44+ | Event.Document_end { implicit } -> if implicit then "-DOC" else "-DOC ..."
00045 | Event.Mapping_start { anchor; tag; style; _ } ->
46 let anchor_str = match anchor with Some a -> " &" ^ a | None -> "" in
47 let tag_str = match tag with Some t -> " <" ^ t ^ ">" | None -> "" in
···58 let anchor_str = match anchor with Some a -> " &" ^ a | None -> "" in
59 let tag_str = match tag with Some t -> " <" ^ t ^ ">" | None -> "" in
60 let style_c = style_char style in
61+ Printf.sprintf "=VAL%s%s %c%s" anchor_str tag_str style_c
62+ (escape_string value)
63+ | Event.Alias { anchor } -> Printf.sprintf "=ALI *%s" anchor
6465let of_spanned_events events =
66 let buf = Buffer.create 256 in
67+ List.iter
68+ (fun (e : Event.spanned) ->
69+ let line = format_event e in
70+ Buffer.add_string buf line;
71+ Buffer.add_char buf '\n')
72+ events;
73 Buffer.contents buf
+147-112
tests/test_yamlrw.ml
···24 Alcotest.(check int) "token count" 8 (List.length token_types);
25 (* Stream_start, Block_mapping_start, Key, Scalar, Value, Scalar, Block_end, Stream_end *)
26 match token_types with
27- | Token.Stream_start _ :: Token.Block_mapping_start :: Token.Key ::
28- Token.Scalar { value = "hello"; _ } :: Token.Value ::
29- Token.Scalar { value = "world"; _ } :: Token.Block_end :: Token.Stream_end :: [] ->
000000030 ()
31- | _ ->
32- Alcotest.fail "unexpected token sequence"
3334let test_scanner_sequence () =
35 let scanner = Scanner.of_string "- one\n- two\n- three" in
···39let test_scanner_flow () =
40 let scanner = Scanner.of_string "[1, 2, 3]" in
41 let tokens = Scanner.to_list scanner in
42- let has_flow_start = List.exists (fun (t : Token.spanned) ->
43- match t.token with Token.Flow_sequence_start -> true | _ -> false
44- ) tokens in
00045 Alcotest.(check bool) "has flow sequence start" true has_flow_start
4647-let scanner_tests = [
48- "simple mapping", `Quick, test_scanner_simple;
49- "sequence", `Quick, test_scanner_sequence;
50- "flow sequence", `Quick, test_scanner_flow;
51-]
05253(** Parser tests *)
54···56 let parser = Parser.of_string "key: value" in
57 let events = Parser.to_list parser in
58 Alcotest.(check bool) "has events" true (List.length events > 0);
59- let has_stream_start = List.exists (fun (e : Event.spanned) ->
60- match e.event with Event.Stream_start _ -> true | _ -> false
61- ) events in
00062 Alcotest.(check bool) "has stream start" true has_stream_start
6364let test_parser_sequence_events () =
65 let parser = Parser.of_string "- a\n- b" in
66 let events = Parser.to_list parser in
67- let has_seq_start = List.exists (fun (e : Event.spanned) ->
68- match e.event with Event.Sequence_start _ -> true | _ -> false
69- ) events in
00070 Alcotest.(check bool) "has sequence start" true has_seq_start
7172-let parser_tests = [
73- "parse events", `Quick, test_parser_events;
74- "sequence events", `Quick, test_parser_sequence_events;
75-]
07677(** Value parsing tests *)
78···93 check_value "float" (`Float 3.14) (of_string "3.14")
9495let test_parse_string () =
96- check_value "plain" (`String "hello") (of_string "hello world" |> function `String s -> `String (String.sub s 0 5) | v -> v);
00097 check_value "quoted" (`String "hello") (of_string {|"hello"|})
9899let test_parse_sequence () =
100 let result = of_string "- one\n- two\n- three" in
101 match result with
102- | `A [_; _; _] -> ()
103 | _ -> Alcotest.fail "expected sequence with 3 elements"
104105let test_parse_mapping () =
···118|} in
119 let result = of_string yaml in
120 match result with
121- | `O [("person", `O _)] -> ()
122 | _ -> Alcotest.fail "expected nested structure"
123124let test_parse_flow_sequence () =
125 let result = of_string "[1, 2, 3]" in
126 match result with
127- | `A [`Float 1.0; `Float 2.0; `Float 3.0] -> ()
128 | _ -> Alcotest.fail "expected flow sequence [1, 2, 3]"
129130let test_parse_flow_mapping () =
131 let result = of_string "{a: 1, b: 2}" in
132 match result with
133- | `O [("a", `Float 1.0); ("b", `Float 2.0)] -> ()
134 | _ -> Alcotest.fail "expected flow mapping {a: 1, b: 2}"
135136let test_parse_flow_mapping_trailing_comma () =
137 let result = of_string "{ a: 1, }" in
138 match result with
139- | `O [("a", `Float 1.0)] -> ()
140 | `O pairs ->
141- Alcotest.failf "expected 1 pair but got %d pairs (trailing comma should not create empty entry)"
00142 (List.length pairs)
143 | _ -> Alcotest.fail "expected flow mapping with 1 pair"
144145-let value_tests = [
146- "parse null", `Quick, test_parse_null;
147- "parse bool", `Quick, test_parse_bool;
148- "parse number", `Quick, test_parse_number;
149- "parse string", `Quick, test_parse_string;
150- "parse sequence", `Quick, test_parse_sequence;
151- "parse mapping", `Quick, test_parse_mapping;
152- "parse nested", `Quick, test_parse_nested;
153- "parse flow sequence", `Quick, test_parse_flow_sequence;
154- "parse flow mapping", `Quick, test_parse_flow_mapping;
155- "flow mapping trailing comma", `Quick, test_parse_flow_mapping_trailing_comma;
156-]
000157158(** Emitter tests *)
159···162 Alcotest.(check bool) "contains null" true (String.length result > 0)
163164let starts_with prefix s =
165- String.length s >= String.length prefix &&
166- String.sub s 0 (String.length prefix) = prefix
167168let test_emit_mapping () =
169- let value = `O [("name", `String "Alice"); ("age", `Float 30.0)] in
170 let result = to_string value in
171 let trimmed = String.trim result in
172- Alcotest.(check bool) "contains name" true (starts_with "name" trimmed || starts_with "\"name\"" trimmed)
00173174let test_roundtrip_simple () =
175 let yaml = "name: Alice" in
···187 ()
188 | _ -> Alcotest.fail "roundtrip failed"
189190-let emitter_tests = [
191- "emit null", `Quick, test_emit_null;
192- "emit mapping", `Quick, test_emit_mapping;
193- "roundtrip simple", `Quick, test_roundtrip_simple;
194- "roundtrip sequence", `Quick, test_roundtrip_sequence;
195-]
0196197(** YAML-specific tests *)
198···204 | _ -> Alcotest.fail "expected scalar with anchor"
205206let test_yaml_alias () =
207- let yaml = {|
0208defaults: &defaults
209 timeout: 30
210production:
211 <<: *defaults
212 port: 8080
213-|} in
0214 (* Just check it parses without error *)
215 let _ = yaml_of_string yaml in
216 ()
217218-let yaml_tests = [
219- "yaml anchor", `Quick, test_yaml_anchor;
220- "yaml alias", `Quick, test_yaml_alias;
221-]
0222223(** Multiline scalar tests *)
224···230|} in
231 let result = of_string yaml in
232 match result with
233- | `O [("description", `String _)] -> ()
234 | _ -> Alcotest.fail "expected mapping with literal block"
235236let test_folded_block () =
···241|} in
242 let result = of_string yaml in
243 match result with
244- | `O [("description", `String _)] -> ()
245 | _ -> Alcotest.fail "expected mapping with folded block"
246247-let multiline_tests = [
248- "literal block", `Quick, test_literal_block;
249- "folded block", `Quick, test_folded_block;
250-]
0251252(** Error handling tests *)
253···255 try
256 let _ = of_string "key: [unclosed" in
257 Alcotest.fail "expected error"
258- with
259- | Yamlrw_error e ->
260- Alcotest.(check bool) "has span" true (e.span <> None)
261262-let error_tests = [
263- "error position", `Quick, test_error_position;
264-]
265266(** Alias expansion limit tests (billion laughs protection) *)
267268let test_node_limit () =
269 (* Small bomb that would expand to 9^4 = 6561 nodes *)
270- let yaml = {|
0271a: &a [1,2,3,4,5,6,7,8,9]
272b: &b [*a,*a,*a,*a,*a,*a,*a,*a,*a]
273c: &c [*b,*b,*b,*b,*b,*b,*b,*b,*b]
274d: &d [*c,*c,*c,*c,*c,*c,*c,*c,*c]
275-|} in
0276 (* Should fail with a small node limit *)
277 try
278 let _ = of_string ~max_nodes:100 yaml in
279 Alcotest.fail "expected node limit error"
280- with
281- | Yamlrw_error e ->
282- (match e.Error.kind with
283- | Error.Alias_expansion_node_limit _ -> ()
284- | _ -> Alcotest.fail "expected Alias_expansion_node_limit error")
285286let test_depth_limit () =
287 (* Create deeply nested alias chain:
288 *e -> [*d,*d] -> [*c,*c] -> [*b,*b] -> [*a,*a] -> [x,y,z]
289 Each alias resolution increases depth by 1 *)
290- let yaml = {|
0291a: &a [x, y, z]
292b: &b [*a, *a]
293c: &c [*b, *b]
294d: &d [*c, *c]
295e: &e [*d, *d]
296result: *e
297-|} in
0298 (* Should fail with a small depth limit (depth 3 means max 3 alias hops) *)
299 try
300 let _ = of_string ~max_depth:3 yaml in
301 Alcotest.fail "expected depth limit error"
302- with
303- | Yamlrw_error e ->
304- (match e.Error.kind with
305- | Error.Alias_expansion_depth_limit _ -> ()
306- | _ -> Alcotest.fail ("expected Alias_expansion_depth_limit error, got: " ^ Error.kind_to_string e.Error.kind))
00307308let test_normal_aliases_work () =
309 (* Normal alias usage should work fine *)
310- let yaml = {|
0311defaults: &defaults
312 timeout: 30
313 retries: 3
314production:
315 <<: *defaults
316 port: 8080
317-|} in
0318 let result = of_string yaml in
319- match result with
320- | `O _ -> ()
321- | _ -> Alcotest.fail "expected mapping"
322323let test_resolve_aliases_false () =
324 (* With resolve_aliases=false, aliases should remain unresolved *)
···329 let result = yaml_of_string ~resolve_aliases:false yaml in
330 (* Check that alias is preserved *)
331 match result with
332- | `O map ->
333 let pairs = Mapping.members map in
334- (match List.assoc_opt (`Scalar (Scalar.make "b")) pairs with
335- | Some (`Alias "anchor") -> ()
336- | _ -> Alcotest.fail "expected alias to be preserved")
337 | _ -> Alcotest.fail "expected mapping"
338339-let alias_limit_tests = [
340- "node limit", `Quick, test_node_limit;
341- "depth limit", `Quick, test_depth_limit;
342- "normal aliases work", `Quick, test_normal_aliases_work;
343- "resolve_aliases false", `Quick, test_resolve_aliases_false;
344-]
0345346(** Bug fix regression tests
347 These tests verify that issues fixed in ocaml-yaml don't occur in ocaml-yamlrw *)
···411let test_parse_special_floats () =
412 let inf_result = of_string ".inf" in
413 (match inf_result with
414- | `Float f when Float.is_inf f && f > 0.0 -> ()
415 | _ -> Alcotest.fail "expected positive infinity");
416 let neg_inf_result = of_string "-.inf" in
417 (match neg_inf_result with
418- | `Float f when Float.is_inf f && f < 0.0 -> ()
419 | _ -> Alcotest.fail "expected negative infinity");
420 let nan_result = of_string ".nan" in
421 (match nan_result with
···485(** Run all tests *)
486487let () =
488- Alcotest.run "yamlrw" [
489- "scanner", scanner_tests;
490- "parser", parser_tests;
491- "value", value_tests;
492- "emitter", emitter_tests;
493- "yaml", yaml_tests;
494- "multiline", multiline_tests;
495- "errors", error_tests;
496- "alias_limits", alias_limit_tests;
497- "bugfix_regression", bugfix_regression_tests;
498- ]
0
···24 Alcotest.(check int) "token count" 8 (List.length token_types);
25 (* Stream_start, Block_mapping_start, Key, Scalar, Value, Scalar, Block_end, Stream_end *)
26 match token_types with
27+ | [
28+ Token.Stream_start _;
29+ Token.Block_mapping_start;
30+ Token.Key;
31+ Token.Scalar { value = "hello"; _ };
32+ Token.Value;
33+ Token.Scalar { value = "world"; _ };
34+ Token.Block_end;
35+ Token.Stream_end;
36+ ] ->
37 ()
38+ | _ -> Alcotest.fail "unexpected token sequence"
03940let test_scanner_sequence () =
41 let scanner = Scanner.of_string "- one\n- two\n- three" in
···45let test_scanner_flow () =
46 let scanner = Scanner.of_string "[1, 2, 3]" in
47 let tokens = Scanner.to_list scanner in
48+ let has_flow_start =
49+ List.exists
50+ (fun (t : Token.spanned) ->
51+ match t.token with Token.Flow_sequence_start -> true | _ -> false)
52+ tokens
53+ in
54 Alcotest.(check bool) "has flow sequence start" true has_flow_start
5556+let scanner_tests =
57+ [
58+ ("simple mapping", `Quick, test_scanner_simple);
59+ ("sequence", `Quick, test_scanner_sequence);
60+ ("flow sequence", `Quick, test_scanner_flow);
61+ ]
6263(** Parser tests *)
64···66 let parser = Parser.of_string "key: value" in
67 let events = Parser.to_list parser in
68 Alcotest.(check bool) "has events" true (List.length events > 0);
69+ let has_stream_start =
70+ List.exists
71+ (fun (e : Event.spanned) ->
72+ match e.event with Event.Stream_start _ -> true | _ -> false)
73+ events
74+ in
75 Alcotest.(check bool) "has stream start" true has_stream_start
7677let test_parser_sequence_events () =
78 let parser = Parser.of_string "- a\n- b" in
79 let events = Parser.to_list parser in
80+ let has_seq_start =
81+ List.exists
82+ (fun (e : Event.spanned) ->
83+ match e.event with Event.Sequence_start _ -> true | _ -> false)
84+ events
85+ in
86 Alcotest.(check bool) "has sequence start" true has_seq_start
8788+let parser_tests =
89+ [
90+ ("parse events", `Quick, test_parser_events);
91+ ("sequence events", `Quick, test_parser_sequence_events);
92+ ]
9394(** Value parsing tests *)
95···110 check_value "float" (`Float 3.14) (of_string "3.14")
111112let test_parse_string () =
113+ check_value "plain" (`String "hello")
114+ ( of_string "hello world" |> function
115+ | `String s -> `String (String.sub s 0 5)
116+ | v -> v );
117 check_value "quoted" (`String "hello") (of_string {|"hello"|})
118119let test_parse_sequence () =
120 let result = of_string "- one\n- two\n- three" in
121 match result with
122+ | `A [ _; _; _ ] -> ()
123 | _ -> Alcotest.fail "expected sequence with 3 elements"
124125let test_parse_mapping () =
···138|} in
139 let result = of_string yaml in
140 match result with
141+ | `O [ ("person", `O _) ] -> ()
142 | _ -> Alcotest.fail "expected nested structure"
143144let test_parse_flow_sequence () =
145 let result = of_string "[1, 2, 3]" in
146 match result with
147+ | `A [ `Float 1.0; `Float 2.0; `Float 3.0 ] -> ()
148 | _ -> Alcotest.fail "expected flow sequence [1, 2, 3]"
149150let test_parse_flow_mapping () =
151 let result = of_string "{a: 1, b: 2}" in
152 match result with
153+ | `O [ ("a", `Float 1.0); ("b", `Float 2.0) ] -> ()
154 | _ -> Alcotest.fail "expected flow mapping {a: 1, b: 2}"
155156let test_parse_flow_mapping_trailing_comma () =
157 let result = of_string "{ a: 1, }" in
158 match result with
159+ | `O [ ("a", `Float 1.0) ] -> ()
160 | `O pairs ->
161+ Alcotest.failf
162+ "expected 1 pair but got %d pairs (trailing comma should not create \
163+ empty entry)"
164 (List.length pairs)
165 | _ -> Alcotest.fail "expected flow mapping with 1 pair"
166167+let value_tests =
168+ [
169+ ("parse null", `Quick, test_parse_null);
170+ ("parse bool", `Quick, test_parse_bool);
171+ ("parse number", `Quick, test_parse_number);
172+ ("parse string", `Quick, test_parse_string);
173+ ("parse sequence", `Quick, test_parse_sequence);
174+ ("parse mapping", `Quick, test_parse_mapping);
175+ ("parse nested", `Quick, test_parse_nested);
176+ ("parse flow sequence", `Quick, test_parse_flow_sequence);
177+ ("parse flow mapping", `Quick, test_parse_flow_mapping);
178+ ( "flow mapping trailing comma",
179+ `Quick,
180+ test_parse_flow_mapping_trailing_comma );
181+ ]
182183(** Emitter tests *)
184···187 Alcotest.(check bool) "contains null" true (String.length result > 0)
188189let starts_with prefix s =
190+ String.length s >= String.length prefix
191+ && String.sub s 0 (String.length prefix) = prefix
192193let test_emit_mapping () =
194+ let value = `O [ ("name", `String "Alice"); ("age", `Float 30.0) ] in
195 let result = to_string value in
196 let trimmed = String.trim result in
197+ Alcotest.(check bool)
198+ "contains name" true
199+ (starts_with "name" trimmed || starts_with "\"name\"" trimmed)
200201let test_roundtrip_simple () =
202 let yaml = "name: Alice" in
···214 ()
215 | _ -> Alcotest.fail "roundtrip failed"
216217+let emitter_tests =
218+ [
219+ ("emit null", `Quick, test_emit_null);
220+ ("emit mapping", `Quick, test_emit_mapping);
221+ ("roundtrip simple", `Quick, test_roundtrip_simple);
222+ ("roundtrip sequence", `Quick, test_roundtrip_sequence);
223+ ]
224225(** YAML-specific tests *)
226···232 | _ -> Alcotest.fail "expected scalar with anchor"
233234let test_yaml_alias () =
235+ let yaml =
236+ {|
237defaults: &defaults
238 timeout: 30
239production:
240 <<: *defaults
241 port: 8080
242+|}
243+ in
244 (* Just check it parses without error *)
245 let _ = yaml_of_string yaml in
246 ()
247248+let yaml_tests =
249+ [
250+ ("yaml anchor", `Quick, test_yaml_anchor);
251+ ("yaml alias", `Quick, test_yaml_alias);
252+ ]
253254(** Multiline scalar tests *)
255···261|} in
262 let result = of_string yaml in
263 match result with
264+ | `O [ ("description", `String _) ] -> ()
265 | _ -> Alcotest.fail "expected mapping with literal block"
266267let test_folded_block () =
···272|} in
273 let result = of_string yaml in
274 match result with
275+ | `O [ ("description", `String _) ] -> ()
276 | _ -> Alcotest.fail "expected mapping with folded block"
277278+let multiline_tests =
279+ [
280+ ("literal block", `Quick, test_literal_block);
281+ ("folded block", `Quick, test_folded_block);
282+ ]
283284(** Error handling tests *)
285···287 try
288 let _ = of_string "key: [unclosed" in
289 Alcotest.fail "expected error"
290+ with Yamlrw_error e -> Alcotest.(check bool) "has span" true (e.span <> None)
00291292+let error_tests = [ ("error position", `Quick, test_error_position) ]
00293294(** Alias expansion limit tests (billion laughs protection) *)
295296let test_node_limit () =
297 (* Small bomb that would expand to 9^4 = 6561 nodes *)
298+ let yaml =
299+ {|
300a: &a [1,2,3,4,5,6,7,8,9]
301b: &b [*a,*a,*a,*a,*a,*a,*a,*a,*a]
302c: &c [*b,*b,*b,*b,*b,*b,*b,*b,*b]
303d: &d [*c,*c,*c,*c,*c,*c,*c,*c,*c]
304+|}
305+ in
306 (* Should fail with a small node limit *)
307 try
308 let _ = of_string ~max_nodes:100 yaml in
309 Alcotest.fail "expected node limit error"
310+ with Yamlrw_error e -> (
311+ match e.Error.kind with
312+ | Error.Alias_expansion_node_limit _ -> ()
313+ | _ -> Alcotest.fail "expected Alias_expansion_node_limit error")
0314315let test_depth_limit () =
316 (* Create deeply nested alias chain:
317 *e -> [*d,*d] -> [*c,*c] -> [*b,*b] -> [*a,*a] -> [x,y,z]
318 Each alias resolution increases depth by 1 *)
319+ let yaml =
320+ {|
321a: &a [x, y, z]
322b: &b [*a, *a]
323c: &c [*b, *b]
324d: &d [*c, *c]
325e: &e [*d, *d]
326result: *e
327+|}
328+ in
329 (* Should fail with a small depth limit (depth 3 means max 3 alias hops) *)
330 try
331 let _ = of_string ~max_depth:3 yaml in
332 Alcotest.fail "expected depth limit error"
333+ with Yamlrw_error e -> (
334+ match e.Error.kind with
335+ | Error.Alias_expansion_depth_limit _ -> ()
336+ | _ ->
337+ Alcotest.fail
338+ ("expected Alias_expansion_depth_limit error, got: "
339+ ^ Error.kind_to_string e.Error.kind))
340341let test_normal_aliases_work () =
342 (* Normal alias usage should work fine *)
343+ let yaml =
344+ {|
345defaults: &defaults
346 timeout: 30
347 retries: 3
348production:
349 <<: *defaults
350 port: 8080
351+|}
352+ in
353 let result = of_string yaml in
354+ match result with `O _ -> () | _ -> Alcotest.fail "expected mapping"
00355356let test_resolve_aliases_false () =
357 (* With resolve_aliases=false, aliases should remain unresolved *)
···362 let result = yaml_of_string ~resolve_aliases:false yaml in
363 (* Check that alias is preserved *)
364 match result with
365+ | `O map -> (
366 let pairs = Mapping.members map in
367+ match List.assoc_opt (`Scalar (Scalar.make "b")) pairs with
368+ | Some (`Alias "anchor") -> ()
369+ | _ -> Alcotest.fail "expected alias to be preserved")
370 | _ -> Alcotest.fail "expected mapping"
371372+let alias_limit_tests =
373+ [
374+ ("node limit", `Quick, test_node_limit);
375+ ("depth limit", `Quick, test_depth_limit);
376+ ("normal aliases work", `Quick, test_normal_aliases_work);
377+ ("resolve_aliases false", `Quick, test_resolve_aliases_false);
378+ ]
379380(** Bug fix regression tests
381 These tests verify that issues fixed in ocaml-yaml don't occur in ocaml-yamlrw *)
···445let test_parse_special_floats () =
446 let inf_result = of_string ".inf" in
447 (match inf_result with
448+ | `Float f when Float.is_infinite f && f > 0.0 -> ()
449 | _ -> Alcotest.fail "expected positive infinity");
450 let neg_inf_result = of_string "-.inf" in
451 (match neg_inf_result with
452+ | `Float f when Float.is_infinite f && f < 0.0 -> ()
453 | _ -> Alcotest.fail "expected negative infinity");
454 let nan_result = of_string ".nan" in
455 (match nan_result with
···519(** Run all tests *)
520521let () =
522+ Alcotest.run "yamlrw"
523+ [
524+ ("scanner", scanner_tests);
525+ ("parser", parser_tests);
526+ ("value", value_tests);
527+ ("emitter", emitter_tests);
528+ ("yaml", yaml_tests);
529+ ("multiline", multiline_tests);
530+ ("errors", error_tests);
531+ ("alias_limits", alias_limit_tests);
532+ ("bugfix_regression", bugfix_regression_tests);
533+ ]