Pure OCaml Yaml 1.2 reader and writer using Bytesrw

more

+3196 -3105
+120 -99
bin/yamlcat.ml
··· 16 | `Float f -> 17 if Float.is_integer f && Float.abs f < 1e15 then 18 Buffer.add_string buf (Printf.sprintf "%.0f" f) 19 - else 20 - Buffer.add_string buf (Printf.sprintf "%g" f) 21 | `String s -> Buffer.add_string buf (Printf.sprintf "%S" s) 22 | `A items -> 23 Buffer.add_char buf '['; 24 - List.iteri (fun i item -> 25 - if i > 0 then Buffer.add_string buf ", "; 26 - json_to_string buf item 27 - ) items; 28 Buffer.add_char buf ']' 29 | `O pairs -> 30 Buffer.add_char buf '{'; 31 - List.iteri (fun i (k, v) -> 32 - if i > 0 then Buffer.add_string buf ", "; 33 - Buffer.add_string buf (Printf.sprintf "%S: " k); 34 - json_to_string buf v 35 - ) pairs; 36 Buffer.add_char buf '}' 37 38 let value_to_json v = ··· 49 | Yaml -> 50 (* Convert through Value to apply tag-based type coercion *) 51 let first = ref true in 52 - List.iter (fun (doc : Yamlrw.document) -> 53 - if not !first then print_string "---\n"; 54 - first := false; 55 - match doc.root with 56 - | None -> print_endline "" 57 - | Some yaml -> 58 - let value = Yamlrw.to_json ~resolve_aliases ~max_nodes ~max_depth yaml in 59 - print_string (Yamlrw.to_string value) 60 - ) documents 61 | Flow -> 62 (* Convert through Value to apply tag-based type coercion *) 63 let first = ref true in 64 - List.iter (fun (doc : Yamlrw.document) -> 65 - if not !first then print_string "---\n"; 66 - first := false; 67 - match doc.root with 68 - | None -> print_endline "" 69 - | Some yaml -> 70 - let value = Yamlrw.to_json ~resolve_aliases ~max_nodes ~max_depth yaml in 71 - print_string (Yamlrw.to_string ~layout_style:`Flow value) 72 - ) documents 73 | Json -> 74 let first = ref true in 75 - List.iter (fun (doc : Yamlrw.document) -> 76 - match doc.root with 77 - | None -> () 78 - | Some yaml -> 79 - if not !first then print_endline "---"; 80 - first := false; 81 - let value = Yamlrw.to_json ~resolve_aliases ~max_nodes ~max_depth yaml in 82 - print_endline (value_to_json value) 83 - ) documents 84 | Debug -> 85 - List.iteri (fun i (doc : Yamlrw.document) -> 86 - Format.printf "Document %d:@." (i + 1); 87 - (* Convert back to Document.t for printing *) 88 - let doc' : Yamlrw.Document.t = { 89 - Yamlrw.Document.version = doc.version; 90 - Yamlrw.Document.tags = doc.tags; 91 - Yamlrw.Document.root = (doc.root :> Yamlrw.Yaml.t option); 92 - Yamlrw.Document.implicit_start = doc.implicit_start; 93 - Yamlrw.Document.implicit_end = doc.implicit_end; 94 - } in 95 - Format.printf "%a@." Yamlrw.Document.pp doc' 96 - ) documents 97 - with 98 - | Yamlrw.Yamlrw_error e -> 99 - Printf.eprintf "Error: %s\n" (Yamlrw.Error.to_string e); 100 - exit 1 101 102 let process_file ~format ~resolve_aliases ~max_nodes ~max_depth filename = 103 let content = 104 - if filename = "-" then 105 - In_channel.input_all In_channel.stdin 106 - else 107 - In_channel.with_open_text filename In_channel.input_all 108 in 109 process_string ~format ~resolve_aliases ~max_nodes ~max_depth content 110 111 let run format _all resolve_aliases max_nodes max_depth files = 112 - let files = if files = [] then ["-"] else files in 113 List.iter (process_file ~format ~resolve_aliases ~max_nodes ~max_depth) files; 114 `Ok () 115 ··· 117 118 let format_arg = 119 let doc = "Output format: yaml (default), json, flow, or debug." in 120 - let formats = [ 121 - ("yaml", Yaml); 122 - ("json", Json); 123 - ("flow", Flow); 124 - ("debug", Debug); 125 - ] in 126 - Arg.(value & opt (enum formats) Yaml & info ["format"; "f"] ~docv:"FORMAT" ~doc) 127 128 let json_arg = 129 let doc = "Output as JSON (shorthand for --format=json)." in 130 - Arg.(value & flag & info ["json"] ~doc) 131 132 let flow_arg = 133 let doc = "Output in flow style (shorthand for --format=flow)." in 134 - Arg.(value & flag & info ["flow"] ~doc) 135 136 let debug_arg = 137 let doc = "Output internal representation (shorthand for --format=debug)." in 138 - Arg.(value & flag & info ["debug"] ~doc) 139 140 let all_arg = 141 let doc = "Output all documents (for multi-document YAML)." in 142 - Arg.(value & flag & info ["all"; "a"] ~doc) 143 144 let no_resolve_aliases_arg = 145 let doc = "Don't resolve aliases (keep them as references)." in 146 - Arg.(value & flag & info ["no-resolve-aliases"] ~doc) 147 148 let max_nodes_arg = 149 - let doc = "Maximum number of nodes during alias expansion (default: 10000000). \ 150 - Protection against billion laughs attack." in 151 - Arg.(value & opt int Yamlrw.default_max_alias_nodes & info ["max-nodes"] ~docv:"N" ~doc) 152 153 let max_depth_arg = 154 - let doc = "Maximum alias nesting depth (default: 100). \ 155 - Protection against deeply nested alias chains." in 156 - Arg.(value & opt int Yamlrw.default_max_alias_depth & info ["max-depth"] ~docv:"N" ~doc) 157 158 let files_arg = 159 let doc = "YAML file(s) to process. Use '-' for stdin." in 160 Arg.(value & pos_all file [] & info [] ~docv:"FILE" ~doc) 161 162 let combined_format format json flow debug = 163 - if json then Json 164 - else if flow then Flow 165 - else if debug then Debug 166 - else format 167 168 let term = 169 let combine format json flow debug all no_resolve max_nodes max_depth files = ··· 171 let resolve_aliases = not no_resolve in 172 run format all resolve_aliases max_nodes max_depth files 173 in 174 - Term.(ret (const combine $ format_arg $ json_arg $ flow_arg $ debug_arg $ 175 - all_arg $ no_resolve_aliases_arg $ max_nodes_arg $ max_depth_arg $ files_arg)) 176 177 let info = 178 let doc = "Parse and reprint YAML files" in 179 - let man = [ 180 - `S Manpage.s_description; 181 - `P "$(tname) parses YAML files and reprints them in various formats. \ 182 - It can be used to validate YAML, convert between styles, or convert to JSON."; 183 - `S Manpage.s_examples; 184 - `P "Parse and reprint a YAML file:"; 185 - `Pre " $(tname) config.yaml"; 186 - `P "Convert YAML to JSON:"; 187 - `Pre " $(tname) --json config.yaml"; 188 - `P "Process multi-document YAML:"; 189 - `Pre " $(tname) --all multi.yaml"; 190 - `P "Limit alias expansion (protection against malicious YAML):"; 191 - `Pre " $(tname) --max-nodes 1000 --max-depth 10 untrusted.yaml"; 192 - `S Manpage.s_bugs; 193 - `P "Report bugs at https://github.com/avsm/ocaml-yaml/issues"; 194 - ] in 195 Cmd.info "yamlcat" ~version:"0.1.0" ~doc ~man 196 197 let () = exit (Cmd.eval (Cmd.v info term))
··· 16 | `Float f -> 17 if Float.is_integer f && Float.abs f < 1e15 then 18 Buffer.add_string buf (Printf.sprintf "%.0f" f) 19 + else Buffer.add_string buf (Printf.sprintf "%g" f) 20 | `String s -> Buffer.add_string buf (Printf.sprintf "%S" s) 21 | `A items -> 22 Buffer.add_char buf '['; 23 + List.iteri 24 + (fun i item -> 25 + if i > 0 then Buffer.add_string buf ", "; 26 + json_to_string buf item) 27 + items; 28 Buffer.add_char buf ']' 29 | `O pairs -> 30 Buffer.add_char buf '{'; 31 + List.iteri 32 + (fun i (k, v) -> 33 + if i > 0 then Buffer.add_string buf ", "; 34 + Buffer.add_string buf (Printf.sprintf "%S: " k); 35 + json_to_string buf v) 36 + pairs; 37 Buffer.add_char buf '}' 38 39 let value_to_json v = ··· 50 | Yaml -> 51 (* Convert through Value to apply tag-based type coercion *) 52 let first = ref true in 53 + List.iter 54 + (fun (doc : Yamlrw.document) -> 55 + if not !first then print_string "---\n"; 56 + first := false; 57 + match doc.root with 58 + | None -> print_endline "" 59 + | Some yaml -> 60 + let value = 61 + Yamlrw.to_json ~resolve_aliases ~max_nodes ~max_depth yaml 62 + in 63 + print_string (Yamlrw.to_string value)) 64 + documents 65 | Flow -> 66 (* Convert through Value to apply tag-based type coercion *) 67 let first = ref true in 68 + List.iter 69 + (fun (doc : Yamlrw.document) -> 70 + if not !first then print_string "---\n"; 71 + first := false; 72 + match doc.root with 73 + | None -> print_endline "" 74 + | Some yaml -> 75 + let value = 76 + Yamlrw.to_json ~resolve_aliases ~max_nodes ~max_depth yaml 77 + in 78 + print_string (Yamlrw.to_string ~layout_style:`Flow value)) 79 + documents 80 | Json -> 81 let first = ref true in 82 + List.iter 83 + (fun (doc : Yamlrw.document) -> 84 + match doc.root with 85 + | None -> () 86 + | Some yaml -> 87 + if not !first then print_endline "---"; 88 + first := false; 89 + let value = 90 + Yamlrw.to_json ~resolve_aliases ~max_nodes ~max_depth yaml 91 + in 92 + print_endline (value_to_json value)) 93 + documents 94 | Debug -> 95 + List.iteri 96 + (fun i (doc : Yamlrw.document) -> 97 + Format.printf "Document %d:@." (i + 1); 98 + (* Convert back to Document.t for printing *) 99 + let doc' : Yamlrw.Document.t = 100 + { 101 + Yamlrw.Document.version = doc.version; 102 + Yamlrw.Document.tags = doc.tags; 103 + Yamlrw.Document.root = (doc.root :> Yamlrw.Yaml.t option); 104 + Yamlrw.Document.implicit_start = doc.implicit_start; 105 + Yamlrw.Document.implicit_end = doc.implicit_end; 106 + } 107 + in 108 + Format.printf "%a@." Yamlrw.Document.pp doc') 109 + documents 110 + with Yamlrw.Yamlrw_error e -> 111 + Printf.eprintf "Error: %s\n" (Yamlrw.Error.to_string e); 112 + exit 1 113 114 let process_file ~format ~resolve_aliases ~max_nodes ~max_depth filename = 115 let content = 116 + if filename = "-" then In_channel.input_all In_channel.stdin 117 + else In_channel.with_open_text filename In_channel.input_all 118 in 119 process_string ~format ~resolve_aliases ~max_nodes ~max_depth content 120 121 let run format _all resolve_aliases max_nodes max_depth files = 122 + let files = if files = [] then [ "-" ] else files in 123 List.iter (process_file ~format ~resolve_aliases ~max_nodes ~max_depth) files; 124 `Ok () 125 ··· 127 128 let format_arg = 129 let doc = "Output format: yaml (default), json, flow, or debug." in 130 + let formats = 131 + [ ("yaml", Yaml); ("json", Json); ("flow", Flow); ("debug", Debug) ] 132 + in 133 + Arg.( 134 + value & opt (enum formats) Yaml & info [ "format"; "f" ] ~docv:"FORMAT" ~doc) 135 136 let json_arg = 137 let doc = "Output as JSON (shorthand for --format=json)." in 138 + Arg.(value & flag & info [ "json" ] ~doc) 139 140 let flow_arg = 141 let doc = "Output in flow style (shorthand for --format=flow)." in 142 + Arg.(value & flag & info [ "flow" ] ~doc) 143 144 let debug_arg = 145 let doc = "Output internal representation (shorthand for --format=debug)." in 146 + Arg.(value & flag & info [ "debug" ] ~doc) 147 148 let all_arg = 149 let doc = "Output all documents (for multi-document YAML)." in 150 + Arg.(value & flag & info [ "all"; "a" ] ~doc) 151 152 let no_resolve_aliases_arg = 153 let doc = "Don't resolve aliases (keep them as references)." in 154 + Arg.(value & flag & info [ "no-resolve-aliases" ] ~doc) 155 156 let max_nodes_arg = 157 + let doc = 158 + "Maximum number of nodes during alias expansion (default: 10000000). \ 159 + Protection against billion laughs attack." 160 + in 161 + Arg.( 162 + value 163 + & opt int Yamlrw.default_max_alias_nodes 164 + & info [ "max-nodes" ] ~docv:"N" ~doc) 165 166 let max_depth_arg = 167 + let doc = 168 + "Maximum alias nesting depth (default: 100). Protection against deeply \ 169 + nested alias chains." 170 + in 171 + Arg.( 172 + value 173 + & opt int Yamlrw.default_max_alias_depth 174 + & info [ "max-depth" ] ~docv:"N" ~doc) 175 176 let files_arg = 177 let doc = "YAML file(s) to process. Use '-' for stdin." in 178 Arg.(value & pos_all file [] & info [] ~docv:"FILE" ~doc) 179 180 let combined_format format json flow debug = 181 + if json then Json else if flow then Flow else if debug then Debug else format 182 183 let term = 184 let combine format json flow debug all no_resolve max_nodes max_depth files = ··· 186 let resolve_aliases = not no_resolve in 187 run format all resolve_aliases max_nodes max_depth files 188 in 189 + Term.( 190 + ret 191 + (const combine $ format_arg $ json_arg $ flow_arg $ debug_arg $ all_arg 192 + $ no_resolve_aliases_arg $ max_nodes_arg $ max_depth_arg $ files_arg)) 193 194 let info = 195 let doc = "Parse and reprint YAML files" in 196 + let man = 197 + [ 198 + `S Manpage.s_description; 199 + `P 200 + "$(tname) parses YAML files and reprints them in various formats. It \ 201 + can be used to validate YAML, convert between styles, or convert to \ 202 + JSON."; 203 + `S Manpage.s_examples; 204 + `P "Parse and reprint a YAML file:"; 205 + `Pre " $(tname) config.yaml"; 206 + `P "Convert YAML to JSON:"; 207 + `Pre " $(tname) --json config.yaml"; 208 + `P "Process multi-document YAML:"; 209 + `Pre " $(tname) --all multi.yaml"; 210 + `P "Limit alias expansion (protection against malicious YAML):"; 211 + `Pre " $(tname) --max-nodes 1000 --max-depth 10 untrusted.yaml"; 212 + `S Manpage.s_bugs; 213 + `P "Report bugs at https://github.com/avsm/ocaml-yaml/issues"; 214 + ] 215 + in 216 Cmd.info "yamlcat" ~version:"0.1.0" ~doc ~man 217 218 let () = exit (Cmd.eval (Cmd.v info term))
+1
dune
··· 1 ; Root dune file 2 3 ; Ignore third_party directory (for fetched dependency sources) 4 (data_only_dirs third_party)
··· 1 ; Root dune file 2 3 ; Ignore third_party directory (for fetched dependency sources) 4 + 5 (data_only_dirs third_party)
+6 -12
lib/char_class.ml
··· 19 20 (** Hexadecimal digit *) 21 let is_hex c = 22 - (c >= '0' && c <= '9') || 23 - (c >= 'a' && c <= 'f') || 24 - (c >= 'A' && c <= 'F') 25 26 (** Alphabetic character *) 27 - let is_alpha c = 28 - (c >= 'a' && c <= 'z') || 29 - (c >= 'A' && c <= 'Z') 30 31 (** Alphanumeric character *) 32 let is_alnum c = is_alpha c || is_digit c ··· 34 (** YAML indicator characters *) 35 let is_indicator c = 36 match c with 37 - | '-' | '?' | ':' | ',' | '[' | ']' | '{' | '}' 38 - | '#' | '&' | '*' | '!' | '|' | '>' | '\'' | '"' 39 - | '%' | '@' | '`' -> true 40 | _ -> false 41 42 (** Flow context indicator characters *) 43 let is_flow_indicator c = 44 - match c with 45 - | ',' | '[' | ']' | '{' | '}' -> true 46 - | _ -> false
··· 19 20 (** Hexadecimal digit *) 21 let is_hex c = 22 + (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') 23 24 (** Alphabetic character *) 25 + let is_alpha c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 26 27 (** Alphanumeric character *) 28 let is_alnum c = is_alpha c || is_digit c ··· 30 (** YAML indicator characters *) 31 let is_indicator c = 32 match c with 33 + | '-' | '?' | ':' | ',' | '[' | ']' | '{' | '}' | '#' | '&' | '*' | '!' | '|' 34 + | '>' | '\'' | '"' | '%' | '@' | '`' -> 35 + true 36 | _ -> false 37 38 (** Flow context indicator characters *) 39 let is_flow_indicator c = 40 + match c with ',' | '[' | ']' | '{' | '}' -> true | _ -> false
+5 -19
lib/chomping.ml
··· 6 (** Block scalar chomping indicators *) 7 8 type t = 9 - | Strip (** Remove final line break and trailing empty lines *) 10 | Clip (** Keep final line break, remove trailing empty lines (default) *) 11 | Keep (** Keep final line break and trailing empty lines *) 12 13 - let to_string = function 14 - | Strip -> "strip" 15 - | Clip -> "clip" 16 - | Keep -> "keep" 17 - 18 - let pp fmt t = 19 - Format.pp_print_string fmt (to_string t) 20 - 21 - let of_char = function 22 - | '-' -> Some Strip 23 - | '+' -> Some Keep 24 - | _ -> None 25 - 26 - let to_char = function 27 - | Strip -> Some '-' 28 - | Clip -> None 29 - | Keep -> Some '+' 30 - 31 let equal a b = a = b
··· 6 (** Block scalar chomping indicators *) 7 8 type t = 9 + | Strip (** Remove final line break and trailing empty lines *) 10 | Clip (** Keep final line break, remove trailing empty lines (default) *) 11 | Keep (** Keep final line break and trailing empty lines *) 12 13 + let to_string = function Strip -> "strip" | Clip -> "clip" | Keep -> "keep" 14 + let pp fmt t = Format.pp_print_string fmt (to_string t) 15 + let of_char = function '-' -> Some Strip | '+' -> Some Keep | _ -> None 16 + let to_char = function Strip -> Some '-' | Clip -> None | Keep -> Some '+' 17 let equal a b = a = b
+16 -20
lib/document.ml
··· 13 implicit_end : bool; 14 } 15 16 - let make 17 - ?(version : (int * int) option) 18 - ?(tags : (string * string) list = []) 19 - ?(implicit_start = true) 20 - ?(implicit_end = true) 21 - root = 22 { version; tags; root; implicit_start; implicit_end } 23 24 let version t = t.version ··· 26 let root t = t.root 27 let implicit_start t = t.implicit_start 28 let implicit_end t = t.implicit_end 29 - 30 let with_version version t = { t with version = Some version } 31 let with_tags tags t = { t with tags } 32 let with_root root t = { t with root = Some root } ··· 34 let pp fmt t = 35 Format.fprintf fmt "@[<v 2>document(@,"; 36 (match t.version with 37 - | Some (maj, min) -> Format.fprintf fmt "version=%d.%d,@ " maj min 38 - | None -> ()); 39 if t.tags <> [] then begin 40 Format.fprintf fmt "tags=["; 41 - List.iteri (fun i (h, p) -> 42 - if i > 0 then Format.fprintf fmt ", "; 43 - Format.fprintf fmt "%s -> %s" h p 44 - ) t.tags; 45 Format.fprintf fmt "],@ " 46 end; 47 Format.fprintf fmt "implicit_start=%b,@ " t.implicit_start; 48 Format.fprintf fmt "implicit_end=%b,@ " t.implicit_end; 49 (match t.root with 50 - | Some root -> Format.fprintf fmt "root=%a" Yaml.pp root 51 - | None -> Format.fprintf fmt "root=<empty>"); 52 Format.fprintf fmt "@]@,)" 53 54 let equal a b = 55 - Option.equal ( = ) a.version b.version && 56 - List.equal ( = ) a.tags b.tags && 57 - Option.equal Yaml.equal a.root b.root && 58 - a.implicit_start = b.implicit_start && 59 - a.implicit_end = b.implicit_end
··· 13 implicit_end : bool; 14 } 15 16 + let make ?(version : (int * int) option) ?(tags : (string * string) list = []) 17 + ?(implicit_start = true) ?(implicit_end = true) root = 18 { version; tags; root; implicit_start; implicit_end } 19 20 let version t = t.version ··· 22 let root t = t.root 23 let implicit_start t = t.implicit_start 24 let implicit_end t = t.implicit_end 25 let with_version version t = { t with version = Some version } 26 let with_tags tags t = { t with tags } 27 let with_root root t = { t with root = Some root } ··· 29 let pp fmt t = 30 Format.fprintf fmt "@[<v 2>document(@,"; 31 (match t.version with 32 + | Some (maj, min) -> Format.fprintf fmt "version=%d.%d,@ " maj min 33 + | None -> ()); 34 if t.tags <> [] then begin 35 Format.fprintf fmt "tags=["; 36 + List.iteri 37 + (fun i (h, p) -> 38 + if i > 0 then Format.fprintf fmt ", "; 39 + Format.fprintf fmt "%s -> %s" h p) 40 + t.tags; 41 Format.fprintf fmt "],@ " 42 end; 43 Format.fprintf fmt "implicit_start=%b,@ " t.implicit_start; 44 Format.fprintf fmt "implicit_end=%b,@ " t.implicit_end; 45 (match t.root with 46 + | Some root -> Format.fprintf fmt "root=%a" Yaml.pp root 47 + | None -> Format.fprintf fmt "root=<empty>"); 48 Format.fprintf fmt "@]@,)" 49 50 let equal a b = 51 + Option.equal ( = ) a.version b.version 52 + && List.equal ( = ) a.tags b.tags 53 + && Option.equal Yaml.equal a.root b.root 54 + && a.implicit_start = b.implicit_start 55 + && a.implicit_end = b.implicit_end
+63 -107
lib/eio/yamlrw_eio.ml
··· 5 6 (** Yamlrw Eio - Streaming YAML parsing and emitting with Eio 7 8 - This module provides Eio-compatible streaming YAML parsing and emitting. 9 - It uses bytesrw adapters to convert Eio sources/sinks to the standard 10 - YAML scanner/parser/emitter, eliminating code duplication. *) 11 12 open Yamlrw 13 ··· 30 Scanner.of_input input 31 32 (** Create a parser from an Eio flow *) 33 - let parser_of_flow flow = 34 - Parser.of_scanner (scanner_of_flow flow) 35 36 (** Parse a JSON-compatible value from an Eio flow. 37 38 @param resolve_aliases Whether to expand aliases (default: true) 39 @param max_nodes Maximum nodes during alias expansion (default: 10M) 40 @param max_depth Maximum alias nesting depth (default: 100) *) 41 - let value 42 - ?(resolve_aliases = true) 43 ?(max_nodes = Yaml.default_max_alias_nodes) 44 - ?(max_depth = Yaml.default_max_alias_depth) 45 - flow = 46 let parser = parser_of_flow flow in 47 - Loader.value_of_parser 48 - ~resolve_aliases ~max_nodes ~max_depth 49 - (fun () -> Parser.next parser) 50 51 (** Parse a full YAML value from an Eio flow. 52 ··· 55 @param resolve_aliases Whether to expand aliases (default: false) 56 @param max_nodes Maximum nodes during alias expansion (default: 10M) 57 @param max_depth Maximum alias nesting depth (default: 100) *) 58 - let yaml 59 - ?(resolve_aliases = false) 60 ?(max_nodes = Yaml.default_max_alias_nodes) 61 - ?(max_depth = Yaml.default_max_alias_depth) 62 - flow = 63 let parser = parser_of_flow flow in 64 - Loader.yaml_of_parser 65 - ~resolve_aliases ~max_nodes ~max_depth 66 - (fun () -> Parser.next parser) 67 68 (** Parse multiple YAML documents from an Eio flow. *) 69 let documents flow = ··· 72 73 (** {2 Event-Based Streaming} *) 74 75 (** A streaming event reader backed by a flow *) 76 - type event_reader = { 77 - parser : Parser.t; 78 - } 79 80 - (** Create an event reader from an Eio flow. 81 - This reads data incrementally as events are requested. *) 82 - let event_reader flow = 83 - { parser = parser_of_flow flow } 84 85 - (** Get the next event from an event reader. 86 - Returns [None] when parsing is complete. *) 87 - let next_event reader = 88 - Parser.next reader.parser 89 90 (** Iterate over all events from a flow. 91 ··· 127 @param encoding Output encoding (default: UTF-8) 128 @param scalar_style Preferred scalar style (default: Any) 129 @param layout_style Preferred layout style (default: Any) *) 130 - let value 131 - ?(encoding = `Utf8) 132 - ?(scalar_style = `Any) 133 - ?(layout_style = `Any) 134 - flow 135 - (v : value) = 136 - let config = { Emitter.default_config with encoding; scalar_style; layout_style } in 137 let writer = Bytesrw_eio.bytes_writer_of_flow flow in 138 Serialize.value_to_writer ~config writer v 139 ··· 142 @param encoding Output encoding (default: UTF-8) 143 @param scalar_style Preferred scalar style (default: Any) 144 @param layout_style Preferred layout style (default: Any) *) 145 - let yaml 146 - ?(encoding = `Utf8) 147 - ?(scalar_style = `Any) 148 - ?(layout_style = `Any) 149 - flow 150 - (v : yaml) = 151 - let config = { Emitter.default_config with encoding; scalar_style; layout_style } in 152 let writer = Bytesrw_eio.bytes_writer_of_flow flow in 153 Serialize.yaml_to_writer ~config writer v 154 ··· 158 @param scalar_style Preferred scalar style (default: Any) 159 @param layout_style Preferred layout style (default: Any) 160 @param resolve_aliases Whether to expand aliases (default: true) *) 161 - let documents 162 - ?(encoding = `Utf8) 163 - ?(scalar_style = `Any) 164 - ?(layout_style = `Any) 165 - ?(resolve_aliases = true) 166 - flow 167 - docs = 168 - let config = { Emitter.default_config with encoding; scalar_style; layout_style } in 169 let writer = Bytesrw_eio.bytes_writer_of_flow flow in 170 Serialize.documents_to_writer ~config ~resolve_aliases writer docs 171 172 (** {2 Event-Based Streaming} *) 173 174 (** A streaming event writer that writes directly to a flow *) 175 - type event_writer = { 176 - emitter : Emitter.t; 177 - } 178 179 - (** Create an event writer that writes directly to a flow. 180 - Events are written incrementally as they are emitted. 181 182 @param encoding Output encoding (default: UTF-8) 183 @param scalar_style Preferred scalar style (default: Any) 184 @param layout_style Preferred layout style (default: Any) *) 185 - let event_writer 186 - ?(encoding = `Utf8) 187 - ?(scalar_style = `Any) 188 - ?(layout_style = `Any) 189 - flow = 190 - let config = { Emitter.default_config with encoding; scalar_style; layout_style } in 191 let writer = Bytesrw_eio.bytes_writer_of_flow flow in 192 { emitter = Emitter.of_writer ~config writer } 193 194 (** Emit a single event to the writer. *) 195 - let emit ew ev = 196 - Emitter.emit ew.emitter ev 197 198 (** Flush the writer by sending end-of-data. *) 199 - let flush ew = 200 - Emitter.flush ew.emitter 201 202 (** Emit events from a list to a flow. *) 203 let emit_all flow events = ··· 209 (** {1 Convenience Functions} *) 210 211 (** Read a value from a file path *) 212 - let of_file 213 - ?(resolve_aliases = true) 214 ?(max_nodes = Yaml.default_max_alias_nodes) 215 - ?(max_depth = Yaml.default_max_alias_depth) 216 - ~fs 217 - path = 218 Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow -> 219 Read.value ~resolve_aliases ~max_nodes ~max_depth flow 220 221 (** Read full YAML from a file path *) 222 - let yaml_of_file 223 - ?(resolve_aliases = false) 224 ?(max_nodes = Yaml.default_max_alias_nodes) 225 - ?(max_depth = Yaml.default_max_alias_depth) 226 - ~fs 227 - path = 228 Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow -> 229 Read.yaml ~resolve_aliases ~max_nodes ~max_depth flow 230 231 (** Read documents from a file path *) 232 let documents_of_file ~fs path = 233 - Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow -> 234 - Read.documents flow 235 236 (** Write a value to a file path *) 237 - let to_file 238 - ?(encoding = `Utf8) 239 - ?(scalar_style = `Any) 240 - ?(layout_style = `Any) 241 - ~fs 242 - path 243 - v = 244 - Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path) @@ fun flow -> 245 - Write.value ~encoding ~scalar_style ~layout_style flow v 246 247 (** Write full YAML to a file path *) 248 - let yaml_to_file 249 - ?(encoding = `Utf8) 250 - ?(scalar_style = `Any) 251 - ?(layout_style = `Any) 252 - ~fs 253 - path 254 - v = 255 - Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path) @@ fun flow -> 256 - Write.yaml ~encoding ~scalar_style ~layout_style flow v 257 258 (** Write documents to a file path *) 259 - let documents_to_file 260 - ?(encoding = `Utf8) 261 - ?(scalar_style = `Any) 262 - ?(layout_style = `Any) 263 - ?(resolve_aliases = true) 264 - ~fs 265 - path 266 - docs = 267 - Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path) @@ fun flow -> 268 - Write.documents ~encoding ~scalar_style ~layout_style ~resolve_aliases flow docs
··· 5 6 (** Yamlrw Eio - Streaming YAML parsing and emitting with Eio 7 8 + This module provides Eio-compatible streaming YAML parsing and emitting. It 9 + uses bytesrw adapters to convert Eio sources/sinks to the standard YAML 10 + scanner/parser/emitter, eliminating code duplication. *) 11 12 open Yamlrw 13 ··· 30 Scanner.of_input input 31 32 (** Create a parser from an Eio flow *) 33 + let parser_of_flow flow = Parser.of_scanner (scanner_of_flow flow) 34 35 (** Parse a JSON-compatible value from an Eio flow. 36 37 @param resolve_aliases Whether to expand aliases (default: true) 38 @param max_nodes Maximum nodes during alias expansion (default: 10M) 39 @param max_depth Maximum alias nesting depth (default: 100) *) 40 + let value ?(resolve_aliases = true) 41 ?(max_nodes = Yaml.default_max_alias_nodes) 42 + ?(max_depth = Yaml.default_max_alias_depth) flow = 43 let parser = parser_of_flow flow in 44 + Loader.value_of_parser ~resolve_aliases ~max_nodes ~max_depth (fun () -> 45 + Parser.next parser) 46 47 (** Parse a full YAML value from an Eio flow. 48 ··· 51 @param resolve_aliases Whether to expand aliases (default: false) 52 @param max_nodes Maximum nodes during alias expansion (default: 10M) 53 @param max_depth Maximum alias nesting depth (default: 100) *) 54 + let yaml ?(resolve_aliases = false) 55 ?(max_nodes = Yaml.default_max_alias_nodes) 56 + ?(max_depth = Yaml.default_max_alias_depth) flow = 57 let parser = parser_of_flow flow in 58 + Loader.yaml_of_parser ~resolve_aliases ~max_nodes ~max_depth (fun () -> 59 + Parser.next parser) 60 61 (** Parse multiple YAML documents from an Eio flow. *) 62 let documents flow = ··· 65 66 (** {2 Event-Based Streaming} *) 67 68 + type event_reader = { parser : Parser.t } 69 (** A streaming event reader backed by a flow *) 70 71 + (** Create an event reader from an Eio flow. This reads data incrementally as 72 + events are requested. *) 73 + let event_reader flow = { parser = parser_of_flow flow } 74 75 + (** Get the next event from an event reader. Returns [None] when parsing is 76 + complete. *) 77 + let next_event reader = Parser.next reader.parser 78 79 (** Iterate over all events from a flow. 80 ··· 116 @param encoding Output encoding (default: UTF-8) 117 @param scalar_style Preferred scalar style (default: Any) 118 @param layout_style Preferred layout style (default: Any) *) 119 + let value ?(encoding = `Utf8) ?(scalar_style = `Any) ?(layout_style = `Any) 120 + flow (v : value) = 121 + let config = 122 + { Emitter.default_config with encoding; scalar_style; layout_style } 123 + in 124 let writer = Bytesrw_eio.bytes_writer_of_flow flow in 125 Serialize.value_to_writer ~config writer v 126 ··· 129 @param encoding Output encoding (default: UTF-8) 130 @param scalar_style Preferred scalar style (default: Any) 131 @param layout_style Preferred layout style (default: Any) *) 132 + let yaml ?(encoding = `Utf8) ?(scalar_style = `Any) ?(layout_style = `Any) 133 + flow (v : yaml) = 134 + let config = 135 + { Emitter.default_config with encoding; scalar_style; layout_style } 136 + in 137 let writer = Bytesrw_eio.bytes_writer_of_flow flow in 138 Serialize.yaml_to_writer ~config writer v 139 ··· 143 @param scalar_style Preferred scalar style (default: Any) 144 @param layout_style Preferred layout style (default: Any) 145 @param resolve_aliases Whether to expand aliases (default: true) *) 146 + let documents ?(encoding = `Utf8) ?(scalar_style = `Any) 147 + ?(layout_style = `Any) ?(resolve_aliases = true) flow docs = 148 + let config = 149 + { Emitter.default_config with encoding; scalar_style; layout_style } 150 + in 151 let writer = Bytesrw_eio.bytes_writer_of_flow flow in 152 Serialize.documents_to_writer ~config ~resolve_aliases writer docs 153 154 (** {2 Event-Based Streaming} *) 155 156 + type event_writer = { emitter : Emitter.t } 157 (** A streaming event writer that writes directly to a flow *) 158 159 + (** Create an event writer that writes directly to a flow. Events are written 160 + incrementally as they are emitted. 161 162 @param encoding Output encoding (default: UTF-8) 163 @param scalar_style Preferred scalar style (default: Any) 164 @param layout_style Preferred layout style (default: Any) *) 165 + let event_writer ?(encoding = `Utf8) ?(scalar_style = `Any) 166 + ?(layout_style = `Any) flow = 167 + let config = 168 + { Emitter.default_config with encoding; scalar_style; layout_style } 169 + in 170 let writer = Bytesrw_eio.bytes_writer_of_flow flow in 171 { emitter = Emitter.of_writer ~config writer } 172 173 (** Emit a single event to the writer. *) 174 + let emit ew ev = Emitter.emit ew.emitter ev 175 176 (** Flush the writer by sending end-of-data. *) 177 + let flush ew = Emitter.flush ew.emitter 178 179 (** Emit events from a list to a flow. *) 180 let emit_all flow events = ··· 186 (** {1 Convenience Functions} *) 187 188 (** Read a value from a file path *) 189 + let of_file ?(resolve_aliases = true) 190 ?(max_nodes = Yaml.default_max_alias_nodes) 191 + ?(max_depth = Yaml.default_max_alias_depth) ~fs path = 192 Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow -> 193 Read.value ~resolve_aliases ~max_nodes ~max_depth flow 194 195 (** Read full YAML from a file path *) 196 + let yaml_of_file ?(resolve_aliases = false) 197 ?(max_nodes = Yaml.default_max_alias_nodes) 198 + ?(max_depth = Yaml.default_max_alias_depth) ~fs path = 199 Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow -> 200 Read.yaml ~resolve_aliases ~max_nodes ~max_depth flow 201 202 (** Read documents from a file path *) 203 let documents_of_file ~fs path = 204 + Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow -> Read.documents flow 205 206 (** Write a value to a file path *) 207 + let to_file ?(encoding = `Utf8) ?(scalar_style = `Any) ?(layout_style = `Any) 208 + ~fs path v = 209 + Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path) 210 + @@ fun flow -> Write.value ~encoding ~scalar_style ~layout_style flow v 211 212 (** Write full YAML to a file path *) 213 + let yaml_to_file ?(encoding = `Utf8) ?(scalar_style = `Any) 214 + ?(layout_style = `Any) ~fs path v = 215 + Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path) 216 + @@ fun flow -> Write.yaml ~encoding ~scalar_style ~layout_style flow v 217 218 (** Write documents to a file path *) 219 + let documents_to_file ?(encoding = `Utf8) ?(scalar_style = `Any) 220 + ?(layout_style = `Any) ?(resolve_aliases = true) ~fs path docs = 221 + Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path) 222 + @@ fun flow -> 223 + Write.documents ~encoding ~scalar_style ~layout_style ~resolve_aliases flow 224 + docs
+66 -52
lib/eio/yamlrw_eio.mli
··· 5 6 (** Yamlrw Eio - Streaming YAML parsing and emitting with Eio 7 8 - This library provides Eio-based streaming support for YAML parsing 9 - and emitting. It uses bytesrw adapters that read/write directly to 10 - Eio flows, with bytesrw handling internal buffering. 11 12 {2 Quick Start} 13 ··· 24 Eio_main.run @@ fun env -> 25 let fs = Eio.Stdenv.fs env in 26 Eio.Path.with_open_out Eio.Path.(fs / "output.yaml") @@ fun flow -> 27 - Yaml_eio.Write.value flow (`O [("name", `String "test")]) 28 ]} 29 30 Stream events incrementally: ··· 32 Eio_main.run @@ fun env -> 33 let fs = Eio.Stdenv.fs env in 34 Eio.Path.with_open_in Eio.Path.(fs / "data.yaml") @@ fun flow -> 35 - Yaml_eio.Read.iter_events (fun event span -> 36 - Format.printf "Event at %a@." Yamlrw.Span.pp span 37 - ) flow 38 ]} 39 40 {2 Streaming Architecture} 41 42 This library uses bytesrw for direct I/O with Eio flows: 43 44 - - {b Reading}: Data is read directly from the flow as the 45 - parser requests it. Bytesrw handles internal buffering. 46 47 - - {b Writing}: Output is written directly to the flow. 48 - Bytesrw handles chunking and buffering. *) 49 50 (** {1 Types} *) 51 ··· 66 module Read : sig 67 (** Parse YAML from Eio flows. 68 69 - All functions read data incrementally from the underlying flow, 70 - without loading the entire file into memory first. *) 71 72 (** {2 High-Level Parsing} *) 73 ··· 75 ?resolve_aliases:bool -> 76 ?max_nodes:int -> 77 ?max_depth:int -> 78 - _ Eio.Flow.source -> value 79 (** Parse a JSON-compatible value from an Eio flow. 80 81 @param resolve_aliases Whether to expand aliases (default: true) ··· 86 ?resolve_aliases:bool -> 87 ?max_nodes:int -> 88 ?max_depth:int -> 89 - _ Eio.Flow.source -> yaml 90 (** Parse a full YAML value from an Eio flow. 91 92 By default, aliases are NOT resolved, preserving the document structure. ··· 101 (** {2 Event-Based Streaming} *) 102 103 type event_reader 104 - (** A streaming event reader backed by a flow. 105 - Events are parsed incrementally as requested. *) 106 107 val event_reader : _ Eio.Flow.source -> event_reader 108 (** Create an event reader from an Eio flow. *) 109 110 val next_event : event_reader -> Yamlrw.Event.spanned option 111 - (** Get the next event from an event reader. 112 - Returns [None] when parsing is complete. *) 113 114 val iter_events : 115 - (event -> Yamlrw.Span.t -> unit) -> 116 - _ Eio.Flow.source -> unit 117 (** Iterate over all events from a flow. *) 118 119 - val fold_events : 120 - ('a -> event -> 'a) -> 'a -> 121 - _ Eio.Flow.source -> 'a 122 (** Fold over all events from a flow. *) 123 124 - val iter_documents : 125 - (document -> unit) -> 126 - _ Eio.Flow.source -> unit 127 (** Iterate over documents from a flow, calling [f] for each document. *) 128 129 - val fold_documents : 130 - ('a -> document -> 'a) -> 'a -> 131 - _ Eio.Flow.source -> 'a 132 (** Fold over documents from a flow. *) 133 end 134 ··· 145 ?encoding:Yamlrw.Encoding.t -> 146 ?scalar_style:Yamlrw.Scalar_style.t -> 147 ?layout_style:Yamlrw.Layout_style.t -> 148 - _ Eio.Flow.sink -> value -> unit 149 (** Write a JSON-compatible value to an Eio flow. 150 151 @param encoding Output encoding (default: UTF-8) ··· 156 ?encoding:Yamlrw.Encoding.t -> 157 ?scalar_style:Yamlrw.Scalar_style.t -> 158 ?layout_style:Yamlrw.Layout_style.t -> 159 - _ Eio.Flow.sink -> yaml -> unit 160 (** Write a full YAML value to an Eio flow. 161 162 @param encoding Output encoding (default: UTF-8) ··· 168 ?scalar_style:Yamlrw.Scalar_style.t -> 169 ?layout_style:Yamlrw.Layout_style.t -> 170 ?resolve_aliases:bool -> 171 - _ Eio.Flow.sink -> document list -> unit 172 (** Write multiple YAML documents to an Eio flow. 173 174 @param encoding Output encoding (default: UTF-8) ··· 179 (** {2 Event-Based Streaming} *) 180 181 type event_writer 182 - (** A streaming event writer backed by a flow. 183 - Events are written incrementally to the underlying flow. *) 184 185 val event_writer : 186 ?encoding:Yamlrw.Encoding.t -> 187 ?scalar_style:Yamlrw.Scalar_style.t -> 188 ?layout_style:Yamlrw.Layout_style.t -> 189 - _ Eio.Flow.sink -> event_writer 190 - (** Create an event writer that writes directly to a flow. 191 - Events are written incrementally as they are emitted. 192 193 @param encoding Output encoding (default: UTF-8) 194 @param scalar_style Preferred scalar style (default: Any) ··· 211 ?max_nodes:int -> 212 ?max_depth:int -> 213 fs:_ Eio.Path.t -> 214 - string -> value 215 (** Read a value from a file path. 216 217 - @param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *) 218 219 val yaml_of_file : 220 ?resolve_aliases:bool -> 221 ?max_nodes:int -> 222 ?max_depth:int -> 223 fs:_ Eio.Path.t -> 224 - string -> yaml 225 (** Read full YAML from a file path. 226 227 - @param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *) 228 229 - val documents_of_file : 230 - fs:_ Eio.Path.t -> 231 - string -> document list 232 (** Read documents from a file path. 233 234 - @param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *) 235 236 val to_file : 237 ?encoding:Yamlrw.Encoding.t -> 238 ?scalar_style:Yamlrw.Scalar_style.t -> 239 ?layout_style:Yamlrw.Layout_style.t -> 240 fs:_ Eio.Path.t -> 241 - string -> value -> unit 242 (** Write a value to a file path. 243 244 - @param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *) 245 246 val yaml_to_file : 247 ?encoding:Yamlrw.Encoding.t -> 248 ?scalar_style:Yamlrw.Scalar_style.t -> 249 ?layout_style:Yamlrw.Layout_style.t -> 250 fs:_ Eio.Path.t -> 251 - string -> yaml -> unit 252 (** Write full YAML to a file path. 253 254 - @param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *) 255 256 val documents_to_file : 257 ?encoding:Yamlrw.Encoding.t -> ··· 259 ?layout_style:Yamlrw.Layout_style.t -> 260 ?resolve_aliases:bool -> 261 fs:_ Eio.Path.t -> 262 - string -> document list -> unit 263 (** Write documents to a file path. 264 265 - @param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
··· 5 6 (** Yamlrw Eio - Streaming YAML parsing and emitting with Eio 7 8 + This library provides Eio-based streaming support for YAML parsing and 9 + emitting. It uses bytesrw adapters that read/write directly to Eio flows, 10 + with bytesrw handling internal buffering. 11 12 {2 Quick Start} 13 ··· 24 Eio_main.run @@ fun env -> 25 let fs = Eio.Stdenv.fs env in 26 Eio.Path.with_open_out Eio.Path.(fs / "output.yaml") @@ fun flow -> 27 + Yaml_eio.Write.value flow (`O [ ("name", `String "test") ]) 28 ]} 29 30 Stream events incrementally: ··· 32 Eio_main.run @@ fun env -> 33 let fs = Eio.Stdenv.fs env in 34 Eio.Path.with_open_in Eio.Path.(fs / "data.yaml") @@ fun flow -> 35 + Yaml_eio.Read.iter_events 36 + (fun event span -> Format.printf "Event at %a@." Yamlrw.Span.pp span) 37 + flow 38 ]} 39 40 {2 Streaming Architecture} 41 42 This library uses bytesrw for direct I/O with Eio flows: 43 44 + - {b Reading}: Data is read directly from the flow as the parser requests 45 + it. Bytesrw handles internal buffering. 46 47 + - {b Writing}: Output is written directly to the flow. Bytesrw handles 48 + chunking and buffering. *) 49 50 (** {1 Types} *) 51 ··· 66 module Read : sig 67 (** Parse YAML from Eio flows. 68 69 + All functions read data incrementally from the underlying flow, without 70 + loading the entire file into memory first. *) 71 72 (** {2 High-Level Parsing} *) 73 ··· 75 ?resolve_aliases:bool -> 76 ?max_nodes:int -> 77 ?max_depth:int -> 78 + _ Eio.Flow.source -> 79 + value 80 (** Parse a JSON-compatible value from an Eio flow. 81 82 @param resolve_aliases Whether to expand aliases (default: true) ··· 87 ?resolve_aliases:bool -> 88 ?max_nodes:int -> 89 ?max_depth:int -> 90 + _ Eio.Flow.source -> 91 + yaml 92 (** Parse a full YAML value from an Eio flow. 93 94 By default, aliases are NOT resolved, preserving the document structure. ··· 103 (** {2 Event-Based Streaming} *) 104 105 type event_reader 106 + (** A streaming event reader backed by a flow. Events are parsed incrementally 107 + as requested. *) 108 109 val event_reader : _ Eio.Flow.source -> event_reader 110 (** Create an event reader from an Eio flow. *) 111 112 val next_event : event_reader -> Yamlrw.Event.spanned option 113 + (** Get the next event from an event reader. Returns [None] when parsing is 114 + complete. *) 115 116 val iter_events : 117 + (event -> Yamlrw.Span.t -> unit) -> _ Eio.Flow.source -> unit 118 (** Iterate over all events from a flow. *) 119 120 + val fold_events : ('a -> event -> 'a) -> 'a -> _ Eio.Flow.source -> 'a 121 (** Fold over all events from a flow. *) 122 123 + val iter_documents : (document -> unit) -> _ Eio.Flow.source -> unit 124 (** Iterate over documents from a flow, calling [f] for each document. *) 125 126 + val fold_documents : ('a -> document -> 'a) -> 'a -> _ Eio.Flow.source -> 'a 127 (** Fold over documents from a flow. *) 128 end 129 ··· 140 ?encoding:Yamlrw.Encoding.t -> 141 ?scalar_style:Yamlrw.Scalar_style.t -> 142 ?layout_style:Yamlrw.Layout_style.t -> 143 + _ Eio.Flow.sink -> 144 + value -> 145 + unit 146 (** Write a JSON-compatible value to an Eio flow. 147 148 @param encoding Output encoding (default: UTF-8) ··· 153 ?encoding:Yamlrw.Encoding.t -> 154 ?scalar_style:Yamlrw.Scalar_style.t -> 155 ?layout_style:Yamlrw.Layout_style.t -> 156 + _ Eio.Flow.sink -> 157 + yaml -> 158 + unit 159 (** Write a full YAML value to an Eio flow. 160 161 @param encoding Output encoding (default: UTF-8) ··· 167 ?scalar_style:Yamlrw.Scalar_style.t -> 168 ?layout_style:Yamlrw.Layout_style.t -> 169 ?resolve_aliases:bool -> 170 + _ Eio.Flow.sink -> 171 + document list -> 172 + unit 173 (** Write multiple YAML documents to an Eio flow. 174 175 @param encoding Output encoding (default: UTF-8) ··· 180 (** {2 Event-Based Streaming} *) 181 182 type event_writer 183 + (** A streaming event writer backed by a flow. Events are written 184 + incrementally to the underlying flow. *) 185 186 val event_writer : 187 ?encoding:Yamlrw.Encoding.t -> 188 ?scalar_style:Yamlrw.Scalar_style.t -> 189 ?layout_style:Yamlrw.Layout_style.t -> 190 + _ Eio.Flow.sink -> 191 + event_writer 192 + (** Create an event writer that writes directly to a flow. Events are written 193 + incrementally as they are emitted. 194 195 @param encoding Output encoding (default: UTF-8) 196 @param scalar_style Preferred scalar style (default: Any) ··· 213 ?max_nodes:int -> 214 ?max_depth:int -> 215 fs:_ Eio.Path.t -> 216 + string -> 217 + value 218 (** Read a value from a file path. 219 220 + @param fs 221 + The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *) 222 223 val yaml_of_file : 224 ?resolve_aliases:bool -> 225 ?max_nodes:int -> 226 ?max_depth:int -> 227 fs:_ Eio.Path.t -> 228 + string -> 229 + yaml 230 (** Read full YAML from a file path. 231 232 + @param fs 233 + The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *) 234 235 + val documents_of_file : fs:_ Eio.Path.t -> string -> document list 236 (** Read documents from a file path. 237 238 + @param fs 239 + The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *) 240 241 val to_file : 242 ?encoding:Yamlrw.Encoding.t -> 243 ?scalar_style:Yamlrw.Scalar_style.t -> 244 ?layout_style:Yamlrw.Layout_style.t -> 245 fs:_ Eio.Path.t -> 246 + string -> 247 + value -> 248 + unit 249 (** Write a value to a file path. 250 251 + @param fs 252 + The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *) 253 254 val yaml_to_file : 255 ?encoding:Yamlrw.Encoding.t -> 256 ?scalar_style:Yamlrw.Scalar_style.t -> 257 ?layout_style:Yamlrw.Layout_style.t -> 258 fs:_ Eio.Path.t -> 259 + string -> 260 + yaml -> 261 + unit 262 (** Write full YAML to a file path. 263 264 + @param fs 265 + The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *) 266 267 val documents_to_file : 268 ?encoding:Yamlrw.Encoding.t -> ··· 270 ?layout_style:Yamlrw.Layout_style.t -> 271 ?resolve_aliases:bool -> 272 fs:_ Eio.Path.t -> 273 + string -> 274 + document list -> 275 + unit 276 (** Write documents to a file path. 277 278 + @param fs 279 + The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
+209 -211
lib/emitter.ml
··· 5 6 (** Emitter - converts YAML data structures to string output 7 8 - The emitter can write to either a Buffer (default) or directly to a 9 - bytesrw Bytes.Writer for streaming output. *) 10 11 type config = { 12 encoding : Encoding.t; ··· 17 canonical : bool; 18 } 19 20 - let default_config = { 21 - encoding = `Utf8; 22 - scalar_style = `Any; 23 - layout_style = `Any; 24 - indent = 2; 25 - width = 80; 26 - canonical = false; 27 - } 28 29 type state = 30 | Initial 31 | Stream_started 32 | Document_started 33 - | In_block_sequence of int (* indent level *) 34 | In_block_mapping_key of int 35 | In_block_mapping_value of int 36 - | In_block_mapping_first_key of int (* first key after "- ", no indent needed *) 37 | In_flow_sequence 38 | In_flow_mapping_key 39 | In_flow_mapping_value ··· 41 | Stream_ended 42 43 (** Output sink - either a Buffer or a bytesrw Writer *) 44 - type sink = 45 - | Buffer_sink of Buffer.t 46 - | Writer_sink of Bytesrw.Bytes.Writer.t 47 48 type t = { 49 config : config; ··· 55 mutable need_separator : bool; 56 } 57 58 - let create ?(config = default_config) () = { 59 - config; 60 - sink = Buffer_sink (Buffer.create 1024); 61 - state = Initial; 62 - states = []; 63 - indent = 0; 64 - flow_level = 0; 65 - need_separator = false; 66 - } 67 68 (** Create an emitter that writes directly to a Bytes.Writer *) 69 - let of_writer ?(config = default_config) writer = { 70 - config; 71 - sink = Writer_sink writer; 72 - state = Initial; 73 - states = []; 74 - indent = 0; 75 - flow_level = 0; 76 - need_separator = false; 77 - } 78 79 let contents t = 80 match t.sink with 81 | Buffer_sink buf -> Buffer.contents buf 82 - | Writer_sink _ -> "" (* No accumulated content for writer sink *) 83 84 let reset t = 85 (match t.sink with 86 - | Buffer_sink buf -> Buffer.clear buf 87 - | Writer_sink _ -> ()); 88 t.state <- Initial; 89 t.states <- []; 90 t.indent <- 0; ··· 107 108 let write_indent t = 109 if t.indent <= 8 then 110 - for _ = 1 to t.indent do write_char t ' ' done 111 - else 112 - write t (String.make t.indent ' ') 113 114 - let write_newline t = 115 - write_char t '\n' 116 117 let push_state t s = 118 t.states <- t.state :: t.states; ··· 123 | s :: rest -> 124 t.state <- s; 125 t.states <- rest 126 - | [] -> 127 - t.state <- Stream_ended 128 129 - (** Escape a string for double-quoted output. 130 - Uses a buffer to batch writes instead of character-by-character. *) 131 let escape_double_quoted value = 132 let len = String.length value in 133 (* Check if any escaping is needed *) ··· 140 done; 141 if not !needs_escape then value 142 else begin 143 - let buf = Buffer.create (len + len / 4) in 144 for i = 0 to len - 1 do 145 match value.[i] with 146 | '"' -> Buffer.add_string buf "\\\"" ··· 148 | '\n' -> Buffer.add_string buf "\\n" 149 | '\r' -> Buffer.add_string buf "\\r" 150 | '\t' -> Buffer.add_string buf "\\t" 151 - | c when c < ' ' -> Buffer.add_string buf (Printf.sprintf "\\x%02x" (Char.code c)) 152 | c -> Buffer.add_char buf c 153 done; 154 Buffer.contents buf ··· 159 if not (String.contains value '\'') then value 160 else begin 161 let len = String.length value in 162 - let buf = Buffer.create (len + len / 8) in 163 for i = 0 to len - 1 do 164 let c = value.[i] in 165 - if c = '\'' then Buffer.add_string buf "''" 166 - else Buffer.add_char buf c 167 done; 168 Buffer.contents buf 169 end 170 171 (** Write scalar with appropriate quoting *) 172 let write_scalar t ?(style = `Any) value = 173 - match (match style with `Any -> Quoting.choose_style value | s -> s) with 174 - | `Plain | `Any -> 175 - write t value 176 | `Single_quoted -> 177 write_char t '\''; 178 write t (escape_single_quoted value); ··· 184 | `Literal -> 185 write t "|"; 186 write_newline t; 187 - String.split_on_char '\n' value |> List.iter (fun line -> 188 - write_indent t; 189 - write t line; 190 - write_newline t 191 - ) 192 | `Folded -> 193 write t ">"; 194 write_newline t; 195 - String.split_on_char '\n' value |> List.iter (fun line -> 196 - write_indent t; 197 - write t line; 198 - write_newline t 199 - ) 200 201 (** Write anchor if present *) 202 let write_anchor t anchor = ··· 221 222 let emit t (ev : Event.t) = 223 match ev with 224 - | Event.Stream_start _ -> 225 - t.state <- Stream_started 226 - 227 - | Event.Stream_end -> 228 - t.state <- Stream_ended 229 - 230 | Event.Document_start { version; implicit } -> 231 if not implicit then begin 232 (match version with 233 - | Some (maj, min) -> 234 - write t (Printf.sprintf "%%YAML %d.%d\n" maj min) 235 - | None -> ()); 236 write t "---"; 237 write_newline t 238 end; 239 t.state <- Document_started 240 - 241 | Event.Document_end { implicit } -> 242 if not implicit then begin 243 write t "..."; 244 write_newline t 245 end; 246 t.state <- Document_ended 247 - 248 | Event.Alias { anchor } -> 249 if t.flow_level > 0 then begin 250 if t.need_separator then write t ", "; 251 t.need_separator <- true; 252 write_char t '*'; 253 write t anchor 254 - end else begin 255 - (match t.state with 256 - | In_block_sequence _ -> 257 - write_indent t; 258 - write t "- *"; 259 - write t anchor; 260 - write_newline t 261 - | In_block_mapping_key _ -> 262 - write_indent t; 263 - write_char t '*'; 264 - write t anchor; 265 - write t ": "; 266 - t.state <- In_block_mapping_value t.indent 267 - | In_block_mapping_value indent -> 268 - write_char t '*'; 269 - write t anchor; 270 - write_newline t; 271 - t.state <- In_block_mapping_key indent 272 - | _ -> 273 - write_char t '*'; 274 - write t anchor; 275 - write_newline t) 276 end 277 - 278 | Event.Scalar { anchor; tag; value; plain_implicit; style; _ } -> 279 if t.flow_level > 0 then begin 280 - (match t.state with 281 - | In_flow_mapping_key -> 282 - if t.need_separator then write t ", "; 283 - write_anchor t anchor; 284 - write_tag t ~implicit:plain_implicit tag; 285 - write_scalar t ~style value; 286 - write t ": "; 287 - t.need_separator <- false; 288 - t.state <- In_flow_mapping_value 289 - | In_flow_mapping_value -> 290 - if t.need_separator then begin 291 - (* We just finished a nested structure (array/mapping), 292 so this scalar is the next key, not a value *) 293 - write t ", "; 294 - write_anchor t anchor; 295 - write_tag t ~implicit:plain_implicit tag; 296 - write_scalar t ~style value; 297 - write t ": "; 298 - t.need_separator <- false; 299 - t.state <- In_flow_mapping_value 300 - end else begin 301 - (* Normal value scalar *) 302 - write_anchor t anchor; 303 - write_tag t ~implicit:plain_implicit tag; 304 - write_scalar t ~style value; 305 - t.need_separator <- true; 306 - t.state <- In_flow_mapping_key 307 - end 308 - | _ -> 309 - if t.need_separator then write t ", "; 310 - t.need_separator <- true; 311 - write_anchor t anchor; 312 - write_tag t ~implicit:plain_implicit tag; 313 - write_scalar t ~style value) 314 - end else begin 315 match t.state with 316 | In_block_sequence _ -> 317 write_indent t; ··· 347 write_scalar t ~style value; 348 write_newline t 349 end 350 - 351 | Event.Sequence_start { anchor; tag; implicit; style } -> 352 let use_flow = style = `Flow || t.flow_level > 0 in 353 if t.flow_level > 0 then begin 354 - (match t.state with 355 - | In_flow_mapping_key -> 356 - if t.need_separator then write t ", "; 357 - write_anchor t anchor; 358 - write_tag t ~implicit tag; 359 - write_char t '['; 360 - t.flow_level <- t.flow_level + 1; 361 - t.need_separator <- false; 362 - push_state t In_flow_mapping_value; (* After ] we'll be in value position but sequence handles it *) 363 - t.state <- In_flow_sequence 364 - | In_flow_mapping_value -> 365 - write_anchor t anchor; 366 - write_tag t ~implicit tag; 367 - write_char t '['; 368 - t.flow_level <- t.flow_level + 1; 369 - t.need_separator <- false; 370 - push_state t In_flow_mapping_key; 371 - t.state <- In_flow_sequence 372 - | _ -> 373 - if t.need_separator then write t ", "; 374 - write_anchor t anchor; 375 - write_tag t ~implicit tag; 376 - write_char t '['; 377 - t.flow_level <- t.flow_level + 1; 378 - t.need_separator <- false; 379 - push_state t In_flow_sequence) 380 - end else begin 381 match t.state with 382 | In_block_sequence _ -> 383 write_indent t; ··· 389 t.flow_level <- t.flow_level + 1; 390 t.need_separator <- false; 391 push_state t In_flow_sequence 392 - end else begin 393 write_newline t; 394 push_state t (In_block_sequence t.indent); 395 t.indent <- t.indent + t.config.indent ··· 423 (* Save key state to return to after flow sequence *) 424 t.state <- In_block_mapping_key indent; 425 push_state t In_flow_sequence 426 - end else begin 427 write_newline t; 428 (* Save key state to return to after nested sequence *) 429 t.state <- In_block_mapping_key indent; ··· 438 t.flow_level <- t.flow_level + 1; 439 t.need_separator <- false; 440 push_state t In_flow_sequence 441 - end else begin 442 push_state t (In_block_sequence t.indent); 443 t.state <- In_block_sequence t.indent 444 end 445 end 446 - 447 | Event.Sequence_end -> 448 if t.flow_level > 0 then begin 449 write_char t ']'; ··· 451 t.need_separator <- true; 452 pop_state t; 453 (* Write newline if returning to block context *) 454 - (match t.state with 455 - | In_block_mapping_key _ | In_block_sequence _ -> write_newline t 456 - | _ -> ()) 457 - end else begin 458 t.indent <- t.indent - t.config.indent; 459 pop_state t 460 end 461 - 462 | Event.Mapping_start { anchor; tag; implicit; style } -> 463 let use_flow = style = `Flow || t.flow_level > 0 in 464 if t.flow_level > 0 then begin 465 - (match t.state with 466 - | In_flow_mapping_key -> 467 - if t.need_separator then write t ", "; 468 - write_anchor t anchor; 469 - write_tag t ~implicit tag; 470 - write_char t '{'; 471 - t.flow_level <- t.flow_level + 1; 472 - t.need_separator <- false; 473 - push_state t In_flow_mapping_value; 474 - t.state <- In_flow_mapping_key 475 - | In_flow_mapping_value -> 476 - write_anchor t anchor; 477 - write_tag t ~implicit tag; 478 - write_char t '{'; 479 - t.flow_level <- t.flow_level + 1; 480 - t.need_separator <- false; 481 - push_state t In_flow_mapping_key; 482 - t.state <- In_flow_mapping_key 483 - | _ -> 484 - if t.need_separator then write t ", "; 485 - write_anchor t anchor; 486 - write_tag t ~implicit tag; 487 - write_char t '{'; 488 - t.flow_level <- t.flow_level + 1; 489 - t.need_separator <- false; 490 - push_state t In_flow_mapping_key) 491 - end else begin 492 match t.state with 493 | In_block_sequence _ -> 494 write_indent t; ··· 500 t.flow_level <- t.flow_level + 1; 501 t.need_separator <- false; 502 push_state t In_flow_mapping_key 503 - end else begin 504 (* Don't write newline - first key goes on same line as "- " *) 505 push_state t (In_block_sequence t.indent); 506 t.indent <- t.indent + t.config.indent; ··· 535 (* Save key state to return to after flow mapping *) 536 t.state <- In_block_mapping_key indent; 537 push_state t In_flow_mapping_key 538 - end else begin 539 write_newline t; 540 (* Save key state to return to after nested mapping *) 541 t.state <- In_block_mapping_key indent; ··· 550 t.flow_level <- t.flow_level + 1; 551 t.need_separator <- false; 552 push_state t In_flow_mapping_key 553 - end else begin 554 push_state t (In_block_mapping_key t.indent); 555 t.state <- In_block_mapping_key t.indent 556 end 557 end 558 - 559 | Event.Mapping_end -> 560 if t.flow_level > 0 then begin 561 write_char t '}'; ··· 563 t.need_separator <- true; 564 pop_state t; 565 (* Write newline if returning to block context *) 566 - (match t.state with 567 - | In_block_mapping_key _ | In_block_sequence _ -> write_newline t 568 - | _ -> ()) 569 - end else begin 570 t.indent <- t.indent - t.config.indent; 571 pop_state t 572 end 573 574 - (** Access to the underlying buffer for advanced use. 575 - Returns None if emitter is writing to a Writer instead of Buffer. *) 576 let buffer t = 577 - match t.sink with 578 - | Buffer_sink buf -> Some buf 579 - | Writer_sink _ -> None 580 581 (** Get config *) 582 let config t = t.config 583 584 (** Check if emitter is writing to a Writer *) 585 let is_streaming t = 586 - match t.sink with 587 - | Writer_sink _ -> true 588 - | Buffer_sink _ -> false 589 590 (** Flush the writer sink (no-op for buffer sink) *) 591 let flush t =
··· 5 6 (** Emitter - converts YAML data structures to string output 7 8 + The emitter can write to either a Buffer (default) or directly to a bytesrw 9 + Bytes.Writer for streaming output. *) 10 11 type config = { 12 encoding : Encoding.t; ··· 17 canonical : bool; 18 } 19 20 + let default_config = 21 + { 22 + encoding = `Utf8; 23 + scalar_style = `Any; 24 + layout_style = `Any; 25 + indent = 2; 26 + width = 80; 27 + canonical = false; 28 + } 29 30 type state = 31 | Initial 32 | Stream_started 33 | Document_started 34 + | In_block_sequence of int (* indent level *) 35 | In_block_mapping_key of int 36 | In_block_mapping_value of int 37 + | In_block_mapping_first_key of 38 + int (* first key after "- ", no indent needed *) 39 | In_flow_sequence 40 | In_flow_mapping_key 41 | In_flow_mapping_value ··· 43 | Stream_ended 44 45 (** Output sink - either a Buffer or a bytesrw Writer *) 46 + type sink = Buffer_sink of Buffer.t | Writer_sink of Bytesrw.Bytes.Writer.t 47 48 type t = { 49 config : config; ··· 55 mutable need_separator : bool; 56 } 57 58 + let create ?(config = default_config) () = 59 + { 60 + config; 61 + sink = Buffer_sink (Buffer.create 1024); 62 + state = Initial; 63 + states = []; 64 + indent = 0; 65 + flow_level = 0; 66 + need_separator = false; 67 + } 68 69 (** Create an emitter that writes directly to a Bytes.Writer *) 70 + let of_writer ?(config = default_config) writer = 71 + { 72 + config; 73 + sink = Writer_sink writer; 74 + state = Initial; 75 + states = []; 76 + indent = 0; 77 + flow_level = 0; 78 + need_separator = false; 79 + } 80 81 let contents t = 82 match t.sink with 83 | Buffer_sink buf -> Buffer.contents buf 84 + | Writer_sink _ -> "" (* No accumulated content for writer sink *) 85 86 let reset t = 87 (match t.sink with 88 + | Buffer_sink buf -> Buffer.clear buf 89 + | Writer_sink _ -> ()); 90 t.state <- Initial; 91 t.states <- []; 92 t.indent <- 0; ··· 109 110 let write_indent t = 111 if t.indent <= 8 then 112 + for _ = 1 to t.indent do 113 + write_char t ' ' 114 + done 115 + else write t (String.make t.indent ' ') 116 117 + let write_newline t = write_char t '\n' 118 119 let push_state t s = 120 t.states <- t.state :: t.states; ··· 125 | s :: rest -> 126 t.state <- s; 127 t.states <- rest 128 + | [] -> t.state <- Stream_ended 129 130 + (** Escape a string for double-quoted output. Uses a buffer to batch writes 131 + instead of character-by-character. *) 132 let escape_double_quoted value = 133 let len = String.length value in 134 (* Check if any escaping is needed *) ··· 141 done; 142 if not !needs_escape then value 143 else begin 144 + let buf = Buffer.create (len + (len / 4)) in 145 for i = 0 to len - 1 do 146 match value.[i] with 147 | '"' -> Buffer.add_string buf "\\\"" ··· 149 | '\n' -> Buffer.add_string buf "\\n" 150 | '\r' -> Buffer.add_string buf "\\r" 151 | '\t' -> Buffer.add_string buf "\\t" 152 + | c when c < ' ' -> 153 + Buffer.add_string buf (Printf.sprintf "\\x%02x" (Char.code c)) 154 | c -> Buffer.add_char buf c 155 done; 156 Buffer.contents buf ··· 161 if not (String.contains value '\'') then value 162 else begin 163 let len = String.length value in 164 + let buf = Buffer.create (len + (len / 8)) in 165 for i = 0 to len - 1 do 166 let c = value.[i] in 167 + if c = '\'' then Buffer.add_string buf "''" else Buffer.add_char buf c 168 done; 169 Buffer.contents buf 170 end 171 172 (** Write scalar with appropriate quoting *) 173 let write_scalar t ?(style = `Any) value = 174 + match match style with `Any -> Quoting.choose_style value | s -> s with 175 + | `Plain | `Any -> write t value 176 | `Single_quoted -> 177 write_char t '\''; 178 write t (escape_single_quoted value); ··· 184 | `Literal -> 185 write t "|"; 186 write_newline t; 187 + String.split_on_char '\n' value 188 + |> List.iter (fun line -> 189 + write_indent t; 190 + write t line; 191 + write_newline t) 192 | `Folded -> 193 write t ">"; 194 write_newline t; 195 + String.split_on_char '\n' value 196 + |> List.iter (fun line -> 197 + write_indent t; 198 + write t line; 199 + write_newline t) 200 201 (** Write anchor if present *) 202 let write_anchor t anchor = ··· 221 222 let emit t (ev : Event.t) = 223 match ev with 224 + | Event.Stream_start _ -> t.state <- Stream_started 225 + | Event.Stream_end -> t.state <- Stream_ended 226 | Event.Document_start { version; implicit } -> 227 if not implicit then begin 228 (match version with 229 + | Some (maj, min) -> write t (Printf.sprintf "%%YAML %d.%d\n" maj min) 230 + | None -> ()); 231 write t "---"; 232 write_newline t 233 end; 234 t.state <- Document_started 235 | Event.Document_end { implicit } -> 236 if not implicit then begin 237 write t "..."; 238 write_newline t 239 end; 240 t.state <- Document_ended 241 | Event.Alias { anchor } -> 242 if t.flow_level > 0 then begin 243 if t.need_separator then write t ", "; 244 t.need_separator <- true; 245 write_char t '*'; 246 write t anchor 247 + end 248 + else begin 249 + match t.state with 250 + | In_block_sequence _ -> 251 + write_indent t; 252 + write t "- *"; 253 + write t anchor; 254 + write_newline t 255 + | In_block_mapping_key _ -> 256 + write_indent t; 257 + write_char t '*'; 258 + write t anchor; 259 + write t ": "; 260 + t.state <- In_block_mapping_value t.indent 261 + | In_block_mapping_value indent -> 262 + write_char t '*'; 263 + write t anchor; 264 + write_newline t; 265 + t.state <- In_block_mapping_key indent 266 + | _ -> 267 + write_char t '*'; 268 + write t anchor; 269 + write_newline t 270 end 271 | Event.Scalar { anchor; tag; value; plain_implicit; style; _ } -> 272 if t.flow_level > 0 then begin 273 + match t.state with 274 + | In_flow_mapping_key -> 275 + if t.need_separator then write t ", "; 276 + write_anchor t anchor; 277 + write_tag t ~implicit:plain_implicit tag; 278 + write_scalar t ~style value; 279 + write t ": "; 280 + t.need_separator <- false; 281 + t.state <- In_flow_mapping_value 282 + | In_flow_mapping_value -> 283 + if t.need_separator then begin 284 + (* We just finished a nested structure (array/mapping), 285 so this scalar is the next key, not a value *) 286 + write t ", "; 287 + write_anchor t anchor; 288 + write_tag t ~implicit:plain_implicit tag; 289 + write_scalar t ~style value; 290 + write t ": "; 291 + t.need_separator <- false; 292 + t.state <- In_flow_mapping_value 293 + end 294 + else begin 295 + (* Normal value scalar *) 296 + write_anchor t anchor; 297 + write_tag t ~implicit:plain_implicit tag; 298 + write_scalar t ~style value; 299 + t.need_separator <- true; 300 + t.state <- In_flow_mapping_key 301 + end 302 + | _ -> 303 + if t.need_separator then write t ", "; 304 + t.need_separator <- true; 305 + write_anchor t anchor; 306 + write_tag t ~implicit:plain_implicit tag; 307 + write_scalar t ~style value 308 + end 309 + else begin 310 match t.state with 311 | In_block_sequence _ -> 312 write_indent t; ··· 342 write_scalar t ~style value; 343 write_newline t 344 end 345 | Event.Sequence_start { anchor; tag; implicit; style } -> 346 let use_flow = style = `Flow || t.flow_level > 0 in 347 if t.flow_level > 0 then begin 348 + match t.state with 349 + | In_flow_mapping_key -> 350 + if t.need_separator then write t ", "; 351 + write_anchor t anchor; 352 + write_tag t ~implicit tag; 353 + write_char t '['; 354 + t.flow_level <- t.flow_level + 1; 355 + t.need_separator <- false; 356 + push_state t In_flow_mapping_value; 357 + (* After ] we'll be in value position but sequence handles it *) 358 + t.state <- In_flow_sequence 359 + | In_flow_mapping_value -> 360 + write_anchor t anchor; 361 + write_tag t ~implicit tag; 362 + write_char t '['; 363 + t.flow_level <- t.flow_level + 1; 364 + t.need_separator <- false; 365 + push_state t In_flow_mapping_key; 366 + t.state <- In_flow_sequence 367 + | _ -> 368 + if t.need_separator then write t ", "; 369 + write_anchor t anchor; 370 + write_tag t ~implicit tag; 371 + write_char t '['; 372 + t.flow_level <- t.flow_level + 1; 373 + t.need_separator <- false; 374 + push_state t In_flow_sequence 375 + end 376 + else begin 377 match t.state with 378 | In_block_sequence _ -> 379 write_indent t; ··· 385 t.flow_level <- t.flow_level + 1; 386 t.need_separator <- false; 387 push_state t In_flow_sequence 388 + end 389 + else begin 390 write_newline t; 391 push_state t (In_block_sequence t.indent); 392 t.indent <- t.indent + t.config.indent ··· 420 (* Save key state to return to after flow sequence *) 421 t.state <- In_block_mapping_key indent; 422 push_state t In_flow_sequence 423 + end 424 + else begin 425 write_newline t; 426 (* Save key state to return to after nested sequence *) 427 t.state <- In_block_mapping_key indent; ··· 436 t.flow_level <- t.flow_level + 1; 437 t.need_separator <- false; 438 push_state t In_flow_sequence 439 + end 440 + else begin 441 push_state t (In_block_sequence t.indent); 442 t.state <- In_block_sequence t.indent 443 end 444 end 445 | Event.Sequence_end -> 446 if t.flow_level > 0 then begin 447 write_char t ']'; ··· 449 t.need_separator <- true; 450 pop_state t; 451 (* Write newline if returning to block context *) 452 + match t.state with 453 + | In_block_mapping_key _ | In_block_sequence _ -> write_newline t 454 + | _ -> () 455 + end 456 + else begin 457 t.indent <- t.indent - t.config.indent; 458 pop_state t 459 end 460 | Event.Mapping_start { anchor; tag; implicit; style } -> 461 let use_flow = style = `Flow || t.flow_level > 0 in 462 if t.flow_level > 0 then begin 463 + match t.state with 464 + | In_flow_mapping_key -> 465 + if t.need_separator then write t ", "; 466 + write_anchor t anchor; 467 + write_tag t ~implicit tag; 468 + write_char t '{'; 469 + t.flow_level <- t.flow_level + 1; 470 + t.need_separator <- false; 471 + push_state t In_flow_mapping_value; 472 + t.state <- In_flow_mapping_key 473 + | In_flow_mapping_value -> 474 + write_anchor t anchor; 475 + write_tag t ~implicit tag; 476 + write_char t '{'; 477 + t.flow_level <- t.flow_level + 1; 478 + t.need_separator <- false; 479 + push_state t In_flow_mapping_key; 480 + t.state <- In_flow_mapping_key 481 + | _ -> 482 + if t.need_separator then write t ", "; 483 + write_anchor t anchor; 484 + write_tag t ~implicit tag; 485 + write_char t '{'; 486 + t.flow_level <- t.flow_level + 1; 487 + t.need_separator <- false; 488 + push_state t In_flow_mapping_key 489 + end 490 + else begin 491 match t.state with 492 | In_block_sequence _ -> 493 write_indent t; ··· 499 t.flow_level <- t.flow_level + 1; 500 t.need_separator <- false; 501 push_state t In_flow_mapping_key 502 + end 503 + else begin 504 (* Don't write newline - first key goes on same line as "- " *) 505 push_state t (In_block_sequence t.indent); 506 t.indent <- t.indent + t.config.indent; ··· 535 (* Save key state to return to after flow mapping *) 536 t.state <- In_block_mapping_key indent; 537 push_state t In_flow_mapping_key 538 + end 539 + else begin 540 write_newline t; 541 (* Save key state to return to after nested mapping *) 542 t.state <- In_block_mapping_key indent; ··· 551 t.flow_level <- t.flow_level + 1; 552 t.need_separator <- false; 553 push_state t In_flow_mapping_key 554 + end 555 + else begin 556 push_state t (In_block_mapping_key t.indent); 557 t.state <- In_block_mapping_key t.indent 558 end 559 end 560 | Event.Mapping_end -> 561 if t.flow_level > 0 then begin 562 write_char t '}'; ··· 564 t.need_separator <- true; 565 pop_state t; 566 (* Write newline if returning to block context *) 567 + match t.state with 568 + | In_block_mapping_key _ | In_block_sequence _ -> write_newline t 569 + | _ -> () 570 + end 571 + else begin 572 t.indent <- t.indent - t.config.indent; 573 pop_state t 574 end 575 576 + (** Access to the underlying buffer for advanced use. Returns None if emitter is 577 + writing to a Writer instead of Buffer. *) 578 let buffer t = 579 + match t.sink with Buffer_sink buf -> Some buf | Writer_sink _ -> None 580 581 (** Get config *) 582 let config t = t.config 583 584 (** Check if emitter is writing to a Writer *) 585 let is_streaming t = 586 + match t.sink with Writer_sink _ -> true | Buffer_sink _ -> false 587 588 (** Flush the writer sink (no-op for buffer sink) *) 589 let flush t =
+12 -20
lib/encoding.ml
··· 5 6 (** Character encoding detection and handling *) 7 8 - type t = [ 9 - | `Utf8 10 - | `Utf16be 11 - | `Utf16le 12 - | `Utf32be 13 - | `Utf32le 14 - ] 15 16 let to_string = function 17 | `Utf8 -> "UTF-8" ··· 20 | `Utf32be -> "UTF-32BE" 21 | `Utf32le -> "UTF-32LE" 22 23 - let pp fmt t = 24 - Format.pp_print_string fmt (to_string t) 25 26 - (** Detect encoding from BOM or first bytes. 27 - Returns (encoding, bom_length) *) 28 let detect s = 29 let len = String.length s in 30 if len = 0 then (`Utf8, 0) ··· 35 let b3 = if len > 3 then Char.code s.[3] else 0 in 36 match (b0, b1, b2, b3) with 37 (* BOM patterns *) 38 - | (0xEF, 0xBB, 0xBF, _) -> (`Utf8, 3) 39 - | (0xFE, 0xFF, _, _) -> (`Utf16be, 2) 40 - | (0xFF, 0xFE, 0x00, 0x00) -> (`Utf32le, 4) 41 - | (0xFF, 0xFE, _, _) -> (`Utf16le, 2) 42 - | (0x00, 0x00, 0xFE, 0xFF) -> (`Utf32be, 4) 43 (* Content pattern detection (no BOM) *) 44 - | (0x00, 0x00, 0x00, b3) when b3 <> 0x00 -> (`Utf32be, 0) 45 - | (b0, 0x00, 0x00, 0x00) when b0 <> 0x00 -> (`Utf32le, 0) 46 - | (0x00, b1, _, _) when b1 <> 0x00 -> (`Utf16be, 0) 47 - | (b0, 0x00, _, _) when b0 <> 0x00 -> (`Utf16le, 0) 48 | _ -> (`Utf8, 0) 49 50 let equal a b = a = b
··· 5 6 (** Character encoding detection and handling *) 7 8 + type t = [ `Utf8 | `Utf16be | `Utf16le | `Utf32be | `Utf32le ] 9 10 let to_string = function 11 | `Utf8 -> "UTF-8" ··· 14 | `Utf32be -> "UTF-32BE" 15 | `Utf32le -> "UTF-32LE" 16 17 + let pp fmt t = Format.pp_print_string fmt (to_string t) 18 19 + (** Detect encoding from BOM or first bytes. Returns (encoding, bom_length) *) 20 let detect s = 21 let len = String.length s in 22 if len = 0 then (`Utf8, 0) ··· 27 let b3 = if len > 3 then Char.code s.[3] else 0 in 28 match (b0, b1, b2, b3) with 29 (* BOM patterns *) 30 + | 0xEF, 0xBB, 0xBF, _ -> (`Utf8, 3) 31 + | 0xFE, 0xFF, _, _ -> (`Utf16be, 2) 32 + | 0xFF, 0xFE, 0x00, 0x00 -> (`Utf32le, 4) 33 + | 0xFF, 0xFE, _, _ -> (`Utf16le, 2) 34 + | 0x00, 0x00, 0xFE, 0xFF -> (`Utf32be, 4) 35 (* Content pattern detection (no BOM) *) 36 + | 0x00, 0x00, 0x00, b3 when b3 <> 0x00 -> (`Utf32be, 0) 37 + | b0, 0x00, 0x00, 0x00 when b0 <> 0x00 -> (`Utf32le, 0) 38 + | 0x00, b1, _, _ when b1 <> 0x00 -> (`Utf16be, 0) 39 + | b0, 0x00, _, _ when b0 <> 0x00 -> (`Utf16le, 0) 40 | _ -> (`Utf8, 0) 41 42 let equal a b = a = b
+135 -105
lib/error.ml
··· 7 8 Comprehensive error reporting for YAML parsing and emission. 9 10 - This module provides detailed error types that correspond to various 11 - failure modes in YAML processing, as specified in the 12 {{:https://yaml.org/spec/1.2.2/}YAML 1.2.2 specification}. 13 14 Each error includes: ··· 17 - A context stack showing where the error occurred 18 - Optional source text for error display 19 20 - See also {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} 21 - for background on the YAML processing model. *) 22 23 (** {2 Error Classification} 24 ··· 31 (* Scanner errors - see {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1} *) 32 | Unexpected_character of char 33 (** Invalid character in input. See 34 - {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1 (Character Set)}. *) 35 - | Unexpected_eof 36 - (** Premature end of input. *) 37 | Invalid_escape_sequence of string 38 (** Invalid escape in double-quoted string. See 39 - {{:https://yaml.org/spec/1.2.2/#57-escaped-characters}Section 5.7 (Escaped Characters)}. *) 40 | Invalid_unicode_escape of string 41 (** Invalid Unicode escape sequence (\uXXXX or \UXXXXXXXX). *) 42 | Invalid_hex_escape of string 43 (** Invalid hexadecimal escape sequence (\xXX). *) 44 | Invalid_tag of string 45 (** Malformed tag syntax. See 46 - {{:https://yaml.org/spec/1.2.2/#681-node-tags}Section 6.8.1 (Node Tags)}. *) 47 | Invalid_anchor of string 48 (** Malformed anchor name. See 49 - {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *) 50 | Invalid_alias of string 51 (** Malformed alias reference. See 52 - {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *) 53 | Invalid_comment 54 (** Comment not properly separated from content. See 55 - {{:https://yaml.org/spec/1.2.2/#62-comments}Section 6.2 (Comments)}. *) 56 | Unclosed_single_quote 57 (** Unterminated single-quoted scalar. See 58 - {{:https://yaml.org/spec/1.2.2/#72-single-quoted-style}Section 7.2 (Single-Quoted Style)}. *) 59 | Unclosed_double_quote 60 (** Unterminated double-quoted scalar. See 61 - {{:https://yaml.org/spec/1.2.2/#73-double-quoted-style}Section 7.3 (Double-Quoted Style)}. *) 62 | Unclosed_flow_sequence 63 (** Missing closing bracket \] for flow sequence. See 64 - {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow Sequences)}. *) 65 | Unclosed_flow_mapping 66 (** Missing closing brace \} for flow mapping. See 67 - {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *) 68 | Invalid_indentation of int * int 69 (** Incorrect indentation level (expected, got). See 70 - {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1 (Indentation Spaces)}. *) 71 | Invalid_flow_indentation 72 (** Content in flow collection must be indented. See 73 - {{:https://yaml.org/spec/1.2.2/#74-flow-styles}Section 7.4 (Flow Styles)}. *) 74 | Tab_in_indentation 75 (** Tab character used for indentation (only spaces allowed). See 76 - {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1 (Indentation Spaces)}. *) 77 | Invalid_block_scalar_header of string 78 (** Malformed block scalar header (| or >). See 79 - {{:https://yaml.org/spec/1.2.2/#81-block-scalar-styles}Section 8.1 (Block Scalar Styles)}. *) 80 | Invalid_quoted_scalar_indentation of string 81 (** Incorrect indentation in quoted scalar. *) 82 | Invalid_directive of string 83 (** Malformed directive. See 84 - {{:https://yaml.org/spec/1.2.2/#68-directives}Section 6.8 (Directives)}. *) 85 | Invalid_yaml_version of string 86 (** Unsupported YAML version in %YAML directive. See 87 - {{:https://yaml.org/spec/1.2.2/#681-yaml-directives}Section 6.8.1 (YAML Directives)}. *) 88 | Invalid_tag_directive of string 89 (** Malformed %TAG directive. See 90 - {{:https://yaml.org/spec/1.2.2/#682-tag-directives}Section 6.8.2 (TAG Directives)}. *) 91 | Reserved_directive of string 92 (** Reserved directive name. See 93 - {{:https://yaml.org/spec/1.2.2/#683-reserved-directives}Section 6.8.3 (Reserved Directives)}. *) 94 | Illegal_flow_key_line 95 (** Key and colon must be on same line in flow context. See 96 - {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *) 97 | Block_sequence_disallowed 98 (** Block sequence entries not allowed in this context. See 99 - {{:https://yaml.org/spec/1.2.2/#82-block-collection-styles}Section 8.2 (Block Collection Styles)}. *) 100 - 101 (* Parser errors - see {{:https://yaml.org/spec/1.2.2/#3-processing-yaml-information}Section 3 (Processing)} *) 102 - | Unexpected_token of string 103 - (** Unexpected token in event stream. *) 104 | Expected_document_start 105 (** Expected document start marker (---). See 106 - {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *) 107 | Expected_document_end 108 (** Expected document end marker (...). See 109 - {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *) 110 | Expected_block_entry 111 (** Expected block sequence entry marker (-). See 112 - {{:https://yaml.org/spec/1.2.2/#821-block-sequences}Section 8.2.1 (Block Sequences)}. *) 113 | Expected_key 114 (** Expected mapping key. See 115 - {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2 (Block Mappings)}. *) 116 | Expected_value 117 (** Expected mapping value after colon. See 118 - {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2 (Block Mappings)}. *) 119 - | Expected_node 120 - (** Expected a YAML node. *) 121 - | Expected_scalar 122 - (** Expected a scalar value. *) 123 | Expected_sequence_end 124 (** Expected closing bracket \] for flow sequence. See 125 - {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow Sequences)}. *) 126 | Expected_mapping_end 127 (** Expected closing brace \} for flow mapping. See 128 - {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *) 129 | Duplicate_anchor of string 130 (** Anchor name defined multiple times. See 131 - {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *) 132 | Undefined_alias of string 133 (** Alias references non-existent anchor. See 134 - {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *) 135 | Alias_cycle of string 136 (** Circular reference in alias chain. See 137 - {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *) 138 | Multiple_documents 139 (** Multiple documents found when single document expected. See 140 - {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *) 141 | Mapping_key_too_long 142 (** Mapping key exceeds maximum length (1024 characters). *) 143 - 144 (* Loader errors - see {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} *) 145 | Invalid_scalar_conversion of string * string 146 - (** Cannot convert scalar value to target type (value, target type). 147 - See {{:https://yaml.org/spec/1.2.2/#103-core-schema}Section 10.3 (Core Schema)}. *) 148 | Type_mismatch of string * string 149 (** Value has wrong type for operation (expected, got). *) 150 | Unresolved_alias of string 151 - (** Alias encountered during conversion but not resolved. 152 - See {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *) 153 - | Key_not_found of string 154 - (** Mapping key not found. *) 155 | Alias_expansion_node_limit of int 156 - (** Alias expansion exceeded maximum node count (protection against billion laughs attack). 157 - See {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}. 158 159 - The "billion laughs attack" (also known as an XML bomb) is a denial-of-service 160 - attack where a small YAML document expands to enormous size through recursive 161 - alias expansion. This limit prevents such attacks. *) 162 | Alias_expansion_depth_limit of int 163 - (** Alias expansion exceeded maximum nesting depth (protection against deeply nested aliases). 164 - See {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}. *) 165 - 166 (* Emitter errors *) 167 | Invalid_encoding of string 168 (** Invalid character encoding specified. See 169 - {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1 (Character Set)}. *) 170 | Scalar_contains_invalid_chars of string 171 (** Scalar contains characters invalid for chosen style. *) 172 - | Anchor_not_set 173 - (** Attempted to emit alias before anchor was defined. *) 174 | Invalid_state of string 175 (** Emitter in invalid state for requested operation. *) 176 - 177 (* Generic *) 178 - | Custom of string 179 - (** Custom error message. *) 180 181 - (** {2 Error Value} 182 - 183 - Full error information including classification, location, and context. *) 184 type t = { 185 - kind : kind; 186 - (** The specific error classification. *) 187 span : Span.t option; 188 (** Source location where the error occurred (if available). *) 189 context : string list; ··· 191 source : string option; 192 (** Source text for displaying the error in context. *) 193 } 194 195 (** {2 Exception} 196 197 The main exception type raised by all yamlrw operations. 198 199 - All parsing, loading, and emitting errors are reported by raising 200 - this exception with detailed error information. *) 201 - exception Yamlrw_error of t 202 203 let () = 204 Printexc.register_printer (function 205 | Yamlrw_error e -> 206 - let loc = match e.span with 207 | None -> "" 208 | Some span -> " at " ^ Span.to_string span 209 in 210 - Some (Printf.sprintf "Yamlrw_error: %s%s" 211 - (match e.kind with Custom s -> s | _ -> "error") loc) 212 | _ -> None) 213 214 (** {2 Error Construction} *) ··· 219 @param context Context stack (defaults to empty) 220 @param source Source text 221 @param kind Error classification *) 222 - let make ?span ?(context=[]) ?source kind = 223 - { kind; span; context; source } 224 225 (** [raise ?span ?context ?source kind] constructs and raises an error. 226 ··· 248 @param span Source span 249 @param kind Error classification 250 @raise Yamlrw_error *) 251 - let raise_span span kind = 252 - raise ~span kind 253 254 - (** [with_context ctx f] executes [f ()] and adds [ctx] to any raised error's context. 255 256 This is useful for tracking the processing path through nested structures. 257 258 @param ctx Context description (e.g., "parsing mapping key") 259 @param f Function to execute *) 260 let with_context ctx f = 261 - try f () with 262 - | Yamlrw_error e -> 263 - Stdlib.raise (Yamlrw_error { e with context = ctx :: e.context }) 264 265 (** {2 Error Formatting} *) 266 ··· 274 | Invalid_tag s -> Printf.sprintf "invalid tag: %s" s 275 | Invalid_anchor s -> Printf.sprintf "invalid anchor: %s" s 276 | Invalid_alias s -> Printf.sprintf "invalid alias: %s" s 277 - | Invalid_comment -> "comments must be separated from other tokens by whitespace" 278 | Unclosed_single_quote -> "unclosed single quote" 279 | Unclosed_double_quote -> "unclosed double quote" 280 | Unclosed_flow_sequence -> "unclosed flow sequence '['" ··· 285 | Tab_in_indentation -> "tab character in indentation" 286 | Invalid_block_scalar_header s -> 287 Printf.sprintf "invalid block scalar header: %s" s 288 - | Invalid_quoted_scalar_indentation s -> 289 - Printf.sprintf "%s" s 290 | Invalid_directive s -> Printf.sprintf "invalid directive: %s" s 291 | Invalid_yaml_version s -> Printf.sprintf "invalid YAML version: %s" s 292 | Invalid_tag_directive s -> Printf.sprintf "invalid TAG directive: %s" s 293 | Reserved_directive s -> Printf.sprintf "reserved directive: %s" s 294 - | Illegal_flow_key_line -> "key and ':' must be on the same line in flow context" 295 - | Block_sequence_disallowed -> "block sequence entries are not allowed in this context" 296 | Unexpected_token s -> Printf.sprintf "unexpected token: %s" s 297 | Expected_document_start -> "expected document start '---'" 298 | Expected_document_end -> "expected document end '...'" ··· 329 330 Includes error kind, source location (if available), and context stack. *) 331 let to_string t = 332 - let loc = match t.span with 333 - | None -> "" 334 - | Some span -> " at " ^ Span.to_string span 335 in 336 - let ctx = match t.context with 337 | [] -> "" 338 | ctxs -> " (in " ^ String.concat " > " (List.rev ctxs) ^ ")" 339 in 340 kind_to_string t.kind ^ loc ^ ctx 341 342 (** [pp fmt t] pretty-prints an error to a formatter. *) 343 - let pp fmt t = 344 - Format.fprintf fmt "Yamlrw error: %s" (to_string t) 345 346 (** [pp_with_source ~source fmt t] pretty-prints an error with source context. 347 348 - Shows the error message followed by the relevant source line with 349 - a caret (^) pointing to the error location. 350 351 @param source The source text 352 @param fmt Output formatter 353 @param t The error to display *) 354 let pp_with_source ~source fmt t = 355 - let extract_line source line_num = 356 - let lines = String.split_on_char '\n' source in 357 - if line_num >= 1 && line_num <= List.length lines then 358 - Some (List.nth lines (line_num - 1)) 359 - else 360 - None 361 in 362 363 pp fmt t; 364 match t.span with 365 | None -> () 366 - | Some span -> 367 match extract_line source span.start.line with 368 | None -> () 369 | Some line -> 370 Format.fprintf fmt "\n %d | %s\n" span.start.line line; 371 let padding = String.make (span.start.column - 1) ' ' in 372 - Format.fprintf fmt " | %s^" padding
··· 7 8 Comprehensive error reporting for YAML parsing and emission. 9 10 + This module provides detailed error types that correspond to various failure 11 + modes in YAML processing, as specified in the 12 {{:https://yaml.org/spec/1.2.2/}YAML 1.2.2 specification}. 13 14 Each error includes: ··· 17 - A context stack showing where the error occurred 18 - Optional source text for error display 19 20 + See also 21 + {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} for 22 + background on the YAML processing model. *) 23 24 (** {2 Error Classification} 25 ··· 32 (* Scanner errors - see {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1} *) 33 | Unexpected_character of char 34 (** Invalid character in input. See 35 + {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1 36 + (Character Set)}. *) 37 + | Unexpected_eof (** Premature end of input. *) 38 | Invalid_escape_sequence of string 39 (** Invalid escape in double-quoted string. See 40 + {{:https://yaml.org/spec/1.2.2/#57-escaped-characters}Section 5.7 41 + (Escaped Characters)}. *) 42 | Invalid_unicode_escape of string 43 (** Invalid Unicode escape sequence (\uXXXX or \UXXXXXXXX). *) 44 | Invalid_hex_escape of string 45 (** Invalid hexadecimal escape sequence (\xXX). *) 46 | Invalid_tag of string 47 (** Malformed tag syntax. See 48 + {{:https://yaml.org/spec/1.2.2/#681-node-tags}Section 6.8.1 (Node 49 + Tags)}. *) 50 | Invalid_anchor of string 51 (** Malformed anchor name. See 52 + {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 53 + 3.2.2.2 (Anchors and Aliases)}. *) 54 | Invalid_alias of string 55 (** Malformed alias reference. See 56 + {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 57 + 3.2.2.2 (Anchors and Aliases)}. *) 58 | Invalid_comment 59 (** Comment not properly separated from content. See 60 + {{:https://yaml.org/spec/1.2.2/#62-comments}Section 6.2 (Comments)}. 61 + *) 62 | Unclosed_single_quote 63 (** Unterminated single-quoted scalar. See 64 + {{:https://yaml.org/spec/1.2.2/#72-single-quoted-style}Section 7.2 65 + (Single-Quoted Style)}. *) 66 | Unclosed_double_quote 67 (** Unterminated double-quoted scalar. See 68 + {{:https://yaml.org/spec/1.2.2/#73-double-quoted-style}Section 7.3 69 + (Double-Quoted Style)}. *) 70 | Unclosed_flow_sequence 71 (** Missing closing bracket \] for flow sequence. See 72 + {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow 73 + Sequences)}. *) 74 | Unclosed_flow_mapping 75 (** Missing closing brace \} for flow mapping. See 76 + {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow 77 + Mappings)}. *) 78 | Invalid_indentation of int * int 79 (** Incorrect indentation level (expected, got). See 80 + {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1 81 + (Indentation Spaces)}. *) 82 | Invalid_flow_indentation 83 (** Content in flow collection must be indented. See 84 + {{:https://yaml.org/spec/1.2.2/#74-flow-styles}Section 7.4 (Flow 85 + Styles)}. *) 86 | Tab_in_indentation 87 (** Tab character used for indentation (only spaces allowed). See 88 + {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1 89 + (Indentation Spaces)}. *) 90 | Invalid_block_scalar_header of string 91 (** Malformed block scalar header (| or >). See 92 + {{:https://yaml.org/spec/1.2.2/#81-block-scalar-styles}Section 8.1 93 + (Block Scalar Styles)}. *) 94 | Invalid_quoted_scalar_indentation of string 95 (** Incorrect indentation in quoted scalar. *) 96 | Invalid_directive of string 97 (** Malformed directive. See 98 + {{:https://yaml.org/spec/1.2.2/#68-directives}Section 6.8 99 + (Directives)}. *) 100 | Invalid_yaml_version of string 101 (** Unsupported YAML version in %YAML directive. See 102 + {{:https://yaml.org/spec/1.2.2/#681-yaml-directives}Section 6.8.1 103 + (YAML Directives)}. *) 104 | Invalid_tag_directive of string 105 (** Malformed %TAG directive. See 106 + {{:https://yaml.org/spec/1.2.2/#682-tag-directives}Section 6.8.2 (TAG 107 + Directives)}. *) 108 | Reserved_directive of string 109 (** Reserved directive name. See 110 + {{:https://yaml.org/spec/1.2.2/#683-reserved-directives}Section 6.8.3 111 + (Reserved Directives)}. *) 112 | Illegal_flow_key_line 113 (** Key and colon must be on same line in flow context. See 114 + {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow 115 + Mappings)}. *) 116 | Block_sequence_disallowed 117 (** Block sequence entries not allowed in this context. See 118 + {{:https://yaml.org/spec/1.2.2/#82-block-collection-styles}Section 8.2 119 + (Block Collection Styles)}. *) 120 (* Parser errors - see {{:https://yaml.org/spec/1.2.2/#3-processing-yaml-information}Section 3 (Processing)} *) 121 + | Unexpected_token of string (** Unexpected token in event stream. *) 122 | Expected_document_start 123 (** Expected document start marker (---). See 124 + {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 125 + (Document Markers)}. *) 126 | Expected_document_end 127 (** Expected document end marker (...). See 128 + {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 129 + (Document Markers)}. *) 130 | Expected_block_entry 131 (** Expected block sequence entry marker (-). See 132 + {{:https://yaml.org/spec/1.2.2/#821-block-sequences}Section 8.2.1 133 + (Block Sequences)}. *) 134 | Expected_key 135 (** Expected mapping key. See 136 + {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2 137 + (Block Mappings)}. *) 138 | Expected_value 139 (** Expected mapping value after colon. See 140 + {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2 141 + (Block Mappings)}. *) 142 + | Expected_node (** Expected a YAML node. *) 143 + | Expected_scalar (** Expected a scalar value. *) 144 | Expected_sequence_end 145 (** Expected closing bracket \] for flow sequence. See 146 + {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow 147 + Sequences)}. *) 148 | Expected_mapping_end 149 (** Expected closing brace \} for flow mapping. See 150 + {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow 151 + Mappings)}. *) 152 | Duplicate_anchor of string 153 (** Anchor name defined multiple times. See 154 + {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 155 + 3.2.2.2 (Anchors and Aliases)}. *) 156 | Undefined_alias of string 157 (** Alias references non-existent anchor. See 158 + {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 159 + 3.2.2.2 (Anchors and Aliases)}. *) 160 | Alias_cycle of string 161 (** Circular reference in alias chain. See 162 + {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 163 + 3.2.2.2 (Anchors and Aliases)}. *) 164 | Multiple_documents 165 (** Multiple documents found when single document expected. See 166 + {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 167 + (Document Markers)}. *) 168 | Mapping_key_too_long 169 (** Mapping key exceeds maximum length (1024 characters). *) 170 (* Loader errors - see {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} *) 171 | Invalid_scalar_conversion of string * string 172 + (** Cannot convert scalar value to target type (value, target type). See 173 + {{:https://yaml.org/spec/1.2.2/#103-core-schema}Section 10.3 (Core 174 + Schema)}. *) 175 | Type_mismatch of string * string 176 (** Value has wrong type for operation (expected, got). *) 177 | Unresolved_alias of string 178 + (** Alias encountered during conversion but not resolved. See 179 + {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 180 + 3.2.2.2 (Anchors and Aliases)}. *) 181 + | Key_not_found of string (** Mapping key not found. *) 182 | Alias_expansion_node_limit of int 183 + (** Alias expansion exceeded maximum node count (protection against 184 + billion laughs attack). See 185 + {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 186 + (Processes)}. 187 188 + The "billion laughs attack" (also known as an XML bomb) is a 189 + denial-of-service attack where a small YAML document expands to 190 + enormous size through recursive alias expansion. This limit prevents 191 + such attacks. *) 192 | Alias_expansion_depth_limit of int 193 + (** Alias expansion exceeded maximum nesting depth (protection against 194 + deeply nested aliases). See 195 + {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 196 + (Processes)}. *) 197 (* Emitter errors *) 198 | Invalid_encoding of string 199 (** Invalid character encoding specified. See 200 + {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1 201 + (Character Set)}. *) 202 | Scalar_contains_invalid_chars of string 203 (** Scalar contains characters invalid for chosen style. *) 204 + | Anchor_not_set (** Attempted to emit alias before anchor was defined. *) 205 | Invalid_state of string 206 (** Emitter in invalid state for requested operation. *) 207 (* Generic *) 208 + | Custom of string (** Custom error message. *) 209 210 type t = { 211 + kind : kind; (** The specific error classification. *) 212 span : Span.t option; 213 (** Source location where the error occurred (if available). *) 214 context : string list; ··· 216 source : string option; 217 (** Source text for displaying the error in context. *) 218 } 219 + (** {2 Error Value} 220 221 + Full error information including classification, location, and context. *) 222 + 223 + exception Yamlrw_error of t 224 (** {2 Exception} 225 226 The main exception type raised by all yamlrw operations. 227 228 + All parsing, loading, and emitting errors are reported by raising this 229 + exception with detailed error information. *) 230 231 let () = 232 Printexc.register_printer (function 233 | Yamlrw_error e -> 234 + let loc = 235 + match e.span with 236 | None -> "" 237 | Some span -> " at " ^ Span.to_string span 238 in 239 + Some 240 + (Printf.sprintf "Yamlrw_error: %s%s" 241 + (match e.kind with Custom s -> s | _ -> "error") 242 + loc) 243 | _ -> None) 244 245 (** {2 Error Construction} *) ··· 250 @param context Context stack (defaults to empty) 251 @param source Source text 252 @param kind Error classification *) 253 + let make ?span ?(context = []) ?source kind = { kind; span; context; source } 254 255 (** [raise ?span ?context ?source kind] constructs and raises an error. 256 ··· 278 @param span Source span 279 @param kind Error classification 280 @raise Yamlrw_error *) 281 + let raise_span span kind = raise ~span kind 282 283 + (** [with_context ctx f] executes [f ()] and adds [ctx] to any raised error's 284 + context. 285 286 This is useful for tracking the processing path through nested structures. 287 288 @param ctx Context description (e.g., "parsing mapping key") 289 @param f Function to execute *) 290 let with_context ctx f = 291 + try f () 292 + with Yamlrw_error e -> 293 + Stdlib.raise (Yamlrw_error { e with context = ctx :: e.context }) 294 295 (** {2 Error Formatting} *) 296 ··· 304 | Invalid_tag s -> Printf.sprintf "invalid tag: %s" s 305 | Invalid_anchor s -> Printf.sprintf "invalid anchor: %s" s 306 | Invalid_alias s -> Printf.sprintf "invalid alias: %s" s 307 + | Invalid_comment -> 308 + "comments must be separated from other tokens by whitespace" 309 | Unclosed_single_quote -> "unclosed single quote" 310 | Unclosed_double_quote -> "unclosed double quote" 311 | Unclosed_flow_sequence -> "unclosed flow sequence '['" ··· 316 | Tab_in_indentation -> "tab character in indentation" 317 | Invalid_block_scalar_header s -> 318 Printf.sprintf "invalid block scalar header: %s" s 319 + | Invalid_quoted_scalar_indentation s -> Printf.sprintf "%s" s 320 | Invalid_directive s -> Printf.sprintf "invalid directive: %s" s 321 | Invalid_yaml_version s -> Printf.sprintf "invalid YAML version: %s" s 322 | Invalid_tag_directive s -> Printf.sprintf "invalid TAG directive: %s" s 323 | Reserved_directive s -> Printf.sprintf "reserved directive: %s" s 324 + | Illegal_flow_key_line -> 325 + "key and ':' must be on the same line in flow context" 326 + | Block_sequence_disallowed -> 327 + "block sequence entries are not allowed in this context" 328 | Unexpected_token s -> Printf.sprintf "unexpected token: %s" s 329 | Expected_document_start -> "expected document start '---'" 330 | Expected_document_end -> "expected document end '...'" ··· 361 362 Includes error kind, source location (if available), and context stack. *) 363 let to_string t = 364 + let loc = 365 + match t.span with None -> "" | Some span -> " at " ^ Span.to_string span 366 in 367 + let ctx = 368 + match t.context with 369 | [] -> "" 370 | ctxs -> " (in " ^ String.concat " > " (List.rev ctxs) ^ ")" 371 in 372 kind_to_string t.kind ^ loc ^ ctx 373 374 (** [pp fmt t] pretty-prints an error to a formatter. *) 375 + let pp fmt t = Format.fprintf fmt "Yamlrw error: %s" (to_string t) 376 377 (** [pp_with_source ~source fmt t] pretty-prints an error with source context. 378 379 + Shows the error message followed by the relevant source line with a caret 380 + (^) pointing to the error location. 381 382 @param source The source text 383 @param fmt Output formatter 384 @param t The error to display *) 385 let pp_with_source ~source fmt t = 386 + let extract_line source line_num = 387 + let lines = String.split_on_char '\n' source in 388 + if line_num >= 1 && line_num <= List.length lines then 389 + Some (List.nth lines (line_num - 1)) 390 + else None 391 in 392 393 pp fmt t; 394 match t.span with 395 | None -> () 396 + | Some span -> ( 397 match extract_line source span.start.line with 398 | None -> () 399 | Some line -> 400 Format.fprintf fmt "\n %d | %s\n" span.start.line line; 401 let padding = String.make (span.start.column - 1) ' ' in 402 + Format.fprintf fmt " | %s^" padding)
+10 -18
lib/event.ml
··· 8 type t = 9 | Stream_start of { encoding : Encoding.t } 10 | Stream_end 11 - | Document_start of { 12 - version : (int * int) option; 13 - implicit : bool; 14 - } 15 | Document_end of { implicit : bool } 16 | Alias of { anchor : string } 17 | Scalar of { ··· 37 } 38 | Mapping_end 39 40 - type spanned = { 41 - event : t; 42 - span : Span.t; 43 - } 44 45 let pp_opt_str = Option.value ~default:"none" 46 47 let pp fmt = function 48 | Stream_start { encoding } -> 49 Format.fprintf fmt "stream-start(%a)" Encoding.pp encoding 50 - | Stream_end -> 51 - Format.fprintf fmt "stream-end" 52 | Document_start { version; implicit } -> 53 - let version_str = match version with 54 | None -> "none" 55 | Some (maj, min) -> Printf.sprintf "%d.%d" maj min 56 in 57 - Format.fprintf fmt "document-start(version=%s, implicit=%b)" version_str implicit 58 | Document_end { implicit } -> 59 Format.fprintf fmt "document-end(implicit=%b)" implicit 60 - | Alias { anchor } -> 61 - Format.fprintf fmt "alias(%s)" anchor 62 | Scalar { anchor; tag; value; style; _ } -> 63 Format.fprintf fmt "scalar(anchor=%s, tag=%s, style=%a, value=%S)" 64 (pp_opt_str anchor) (pp_opt_str tag) Scalar_style.pp style value 65 | Sequence_start { anchor; tag; implicit; style } -> 66 Format.fprintf fmt "sequence-start(anchor=%s, tag=%s, implicit=%b, style=%a)" 67 (pp_opt_str anchor) (pp_opt_str tag) implicit Layout_style.pp style 68 - | Sequence_end -> 69 - Format.fprintf fmt "sequence-end" 70 | Mapping_start { anchor; tag; implicit; style } -> 71 Format.fprintf fmt "mapping-start(anchor=%s, tag=%s, implicit=%b, style=%a)" 72 (pp_opt_str anchor) (pp_opt_str tag) implicit Layout_style.pp style 73 - | Mapping_end -> 74 - Format.fprintf fmt "mapping-end" 75 76 let pp_spanned fmt { event; span } = 77 Format.fprintf fmt "%a at %a" pp event Span.pp span
··· 8 type t = 9 | Stream_start of { encoding : Encoding.t } 10 | Stream_end 11 + | Document_start of { version : (int * int) option; implicit : bool } 12 | Document_end of { implicit : bool } 13 | Alias of { anchor : string } 14 | Scalar of { ··· 34 } 35 | Mapping_end 36 37 + type spanned = { event : t; span : Span.t } 38 39 let pp_opt_str = Option.value ~default:"none" 40 41 let pp fmt = function 42 | Stream_start { encoding } -> 43 Format.fprintf fmt "stream-start(%a)" Encoding.pp encoding 44 + | Stream_end -> Format.fprintf fmt "stream-end" 45 | Document_start { version; implicit } -> 46 + let version_str = 47 + match version with 48 | None -> "none" 49 | Some (maj, min) -> Printf.sprintf "%d.%d" maj min 50 in 51 + Format.fprintf fmt "document-start(version=%s, implicit=%b)" version_str 52 + implicit 53 | Document_end { implicit } -> 54 Format.fprintf fmt "document-end(implicit=%b)" implicit 55 + | Alias { anchor } -> Format.fprintf fmt "alias(%s)" anchor 56 | Scalar { anchor; tag; value; style; _ } -> 57 Format.fprintf fmt "scalar(anchor=%s, tag=%s, style=%a, value=%S)" 58 (pp_opt_str anchor) (pp_opt_str tag) Scalar_style.pp style value 59 | Sequence_start { anchor; tag; implicit; style } -> 60 Format.fprintf fmt "sequence-start(anchor=%s, tag=%s, implicit=%b, style=%a)" 61 (pp_opt_str anchor) (pp_opt_str tag) implicit Layout_style.pp style 62 + | Sequence_end -> Format.fprintf fmt "sequence-end" 63 | Mapping_start { anchor; tag; implicit; style } -> 64 Format.fprintf fmt "mapping-start(anchor=%s, tag=%s, implicit=%b, style=%a)" 65 (pp_opt_str anchor) (pp_opt_str tag) implicit Layout_style.pp style 66 + | Mapping_end -> Format.fprintf fmt "mapping-end" 67 68 let pp_spanned fmt { event; span } = 69 Format.fprintf fmt "%a at %a" pp event Span.pp span
+50 -71
lib/input.ml
··· 5 6 (** Character input source with lookahead, based on Bytes.Reader.t 7 8 - This module wraps a bytesrw [Bytes.Reader.t] to provide character-by-character 9 - access with lookahead for the YAML scanner. Uses bytesrw's sniff and push_back 10 - for efficient lookahead without excessive copying. 11 12 The same input type works with any reader source: strings, files, channels, 13 or streaming sources like Eio. *) 14 15 open Bytesrw 16 17 - (** Re-export character classification *) 18 include Char_class 19 20 type t = { 21 reader : Bytes.Reader.t; 22 - mutable current_slice : Bytes.Slice.t option; (** Current slice being consumed *) 23 - mutable slice_pos : int; (** Position within current slice *) 24 - mutable position : Position.t; (** Line/column tracking *) 25 } 26 27 (** Ensure we have a current slice. Returns true if data available. *) ··· 33 if Bytes.Slice.is_eod slice then begin 34 t.current_slice <- None; 35 false 36 - end else begin 37 t.current_slice <- Some slice; 38 t.slice_pos <- 0; 39 true ··· 50 51 (** Create input from a Bytes.Reader.t *) 52 let of_reader ?(initial_position = Position.initial) reader = 53 - let t = { 54 - reader; 55 - current_slice = None; 56 - slice_pos = 0; 57 - position = initial_position; 58 - } in 59 (* Use sniff for BOM detection - this is exactly what sniff is for *) 60 let sample = Bytes.Reader.sniff 4 t.reader in 61 let bom_len = 62 - if String.length sample >= 3 && 63 - sample.[0] = '\xEF' && 64 - sample.[1] = '\xBB' && 65 - sample.[2] = '\xBF' 66 - then 3 (* UTF-8 BOM *) 67 else 0 68 in 69 (* Skip BOM if present *) 70 - if bom_len > 0 then 71 - Bytes.Reader.skip bom_len t.reader; 72 t 73 74 (** Create input from a string *) ··· 77 of_reader reader 78 79 let position t = t.position 80 - 81 - let is_eof t = 82 - not (ensure_slice t) 83 - 84 - let peek t = 85 - if ensure_slice t then 86 - peek_current t 87 - else 88 - None 89 90 let peek_exn t = 91 match peek t with ··· 112 let sample_offset = n - slice_remaining in 113 if sample_offset < String.length sample then 114 Some sample.[sample_offset] 115 - else 116 - None 117 end 118 - | None -> 119 - if n < String.length sample then 120 - Some sample.[n] 121 - else 122 - None 123 end 124 125 (** Peek at up to n characters as a string *) ··· 139 let needed_from_reader = n - slice_remaining in 140 let sample = Bytes.Reader.sniff needed_from_reader t.reader in 141 let buf = Buffer.create n in 142 - Buffer.add_subbytes buf slice_bytes (slice_first + t.slice_pos) slice_remaining; 143 Buffer.add_string buf sample; 144 Buffer.contents buf 145 end 146 - | None -> 147 - if ensure_slice t then 148 - peek_string t n 149 - else 150 - "" 151 end 152 153 (** Consume next character *) ··· 161 t.slice_pos <- t.slice_pos + 1; 162 t.position <- Position.advance_char c t.position; 163 (* Check if we've exhausted this slice *) 164 - if t.slice_pos >= Bytes.Slice.length slice then 165 - t.current_slice <- None; 166 Some c 167 | None -> None 168 - end else 169 - None 170 171 let next_exn t = 172 match next t with ··· 181 let skip_while t pred = 182 let rec loop () = 183 match peek t with 184 - | Some c when pred c -> ignore (next t); loop () 185 | _ -> () 186 in 187 loop () 188 189 (** Check if next char satisfies predicate *) 190 - let next_is pred t = 191 - match peek t with 192 - | None -> false 193 - | Some c -> pred c 194 195 let next_is_break t = next_is is_break t 196 let next_is_blank t = next_is is_blank t ··· 209 if len < 3 then false 210 else 211 let prefix = String.sub s 0 3 in 212 - (prefix = "---" || prefix = "...") && 213 - (len = 3 || is_whitespace s.[3]) 214 end 215 216 (** Consume line break, handling \r\n as single break *) 217 let consume_break t = 218 match peek t with 219 - | Some '\r' -> 220 ignore (next t); 221 - (match peek t with 222 - | Some '\n' -> ignore (next t) 223 - | _ -> ()) 224 - | Some '\n' -> 225 - ignore (next t) 226 | _ -> () 227 228 (** Get remaining content from current position *) ··· 230 let buf = Buffer.create 256 in 231 (* Add current slice remainder *) 232 (match t.current_slice with 233 - | Some slice -> 234 - let bytes = Bytes.Slice.bytes slice in 235 - let first = Bytes.Slice.first slice in 236 - let remaining = Bytes.Slice.length slice - t.slice_pos in 237 - if remaining > 0 then 238 - Buffer.add_subbytes buf bytes (first + t.slice_pos) remaining 239 - | None -> ()); 240 (* Add remaining from reader *) 241 Bytes.Reader.add_to_buffer buf t.reader; 242 Buffer.contents buf ··· 253 Some (Stdlib.Bytes.get bytes (first + t.slice_pos - 1)) 254 | _ -> None 255 256 - (** Get a sample of the source for encoding detection. 257 - Uses sniff to peek without consuming. *) 258 let source t = 259 (* First check current slice *) 260 match t.current_slice with ··· 268 Bytes.Reader.sniff 4 t.reader 269 270 (** Get the byte position in the underlying stream *) 271 - let byte_pos t = 272 - Bytes.Reader.pos t.reader
··· 5 6 (** Character input source with lookahead, based on Bytes.Reader.t 7 8 + This module wraps a bytesrw [Bytes.Reader.t] to provide 9 + character-by-character access with lookahead for the YAML scanner. Uses 10 + bytesrw's sniff and push_back for efficient lookahead without excessive 11 + copying. 12 13 The same input type works with any reader source: strings, files, channels, 14 or streaming sources like Eio. *) 15 16 open Bytesrw 17 18 include Char_class 19 + (** Re-export character classification *) 20 21 type t = { 22 reader : Bytes.Reader.t; 23 + mutable current_slice : Bytes.Slice.t option; 24 + (** Current slice being consumed *) 25 + mutable slice_pos : int; (** Position within current slice *) 26 + mutable position : Position.t; (** Line/column tracking *) 27 } 28 29 (** Ensure we have a current slice. Returns true if data available. *) ··· 35 if Bytes.Slice.is_eod slice then begin 36 t.current_slice <- None; 37 false 38 + end 39 + else begin 40 t.current_slice <- Some slice; 41 t.slice_pos <- 0; 42 true ··· 53 54 (** Create input from a Bytes.Reader.t *) 55 let of_reader ?(initial_position = Position.initial) reader = 56 + let t = 57 + { reader; current_slice = None; slice_pos = 0; position = initial_position } 58 + in 59 (* Use sniff for BOM detection - this is exactly what sniff is for *) 60 let sample = Bytes.Reader.sniff 4 t.reader in 61 let bom_len = 62 + if 63 + String.length sample >= 3 64 + && sample.[0] = '\xEF' 65 + && sample.[1] = '\xBB' 66 + && sample.[2] = '\xBF' 67 + then 3 (* UTF-8 BOM *) 68 else 0 69 in 70 (* Skip BOM if present *) 71 + if bom_len > 0 then Bytes.Reader.skip bom_len t.reader; 72 t 73 74 (** Create input from a string *) ··· 77 of_reader reader 78 79 let position t = t.position 80 + let is_eof t = not (ensure_slice t) 81 + let peek t = if ensure_slice t then peek_current t else None 82 83 let peek_exn t = 84 match peek t with ··· 105 let sample_offset = n - slice_remaining in 106 if sample_offset < String.length sample then 107 Some sample.[sample_offset] 108 + else None 109 end 110 + | None -> if n < String.length sample then Some sample.[n] else None 111 end 112 113 (** Peek at up to n characters as a string *) ··· 127 let needed_from_reader = n - slice_remaining in 128 let sample = Bytes.Reader.sniff needed_from_reader t.reader in 129 let buf = Buffer.create n in 130 + Buffer.add_subbytes buf slice_bytes 131 + (slice_first + t.slice_pos) 132 + slice_remaining; 133 Buffer.add_string buf sample; 134 Buffer.contents buf 135 end 136 + | None -> if ensure_slice t then peek_string t n else "" 137 end 138 139 (** Consume next character *) ··· 147 t.slice_pos <- t.slice_pos + 1; 148 t.position <- Position.advance_char c t.position; 149 (* Check if we've exhausted this slice *) 150 + if t.slice_pos >= Bytes.Slice.length slice then t.current_slice <- None; 151 Some c 152 | None -> None 153 + end 154 + else None 155 156 let next_exn t = 157 match next t with ··· 166 let skip_while t pred = 167 let rec loop () = 168 match peek t with 169 + | Some c when pred c -> 170 + ignore (next t); 171 + loop () 172 | _ -> () 173 in 174 loop () 175 176 (** Check if next char satisfies predicate *) 177 + let next_is pred t = match peek t with None -> false | Some c -> pred c 178 179 let next_is_break t = next_is is_break t 180 let next_is_blank t = next_is is_blank t ··· 193 if len < 3 then false 194 else 195 let prefix = String.sub s 0 3 in 196 + (prefix = "---" || prefix = "...") && (len = 3 || is_whitespace s.[3]) 197 end 198 199 (** Consume line break, handling \r\n as single break *) 200 let consume_break t = 201 match peek t with 202 + | Some '\r' -> ( 203 ignore (next t); 204 + match peek t with Some '\n' -> ignore (next t) | _ -> ()) 205 + | Some '\n' -> ignore (next t) 206 | _ -> () 207 208 (** Get remaining content from current position *) ··· 210 let buf = Buffer.create 256 in 211 (* Add current slice remainder *) 212 (match t.current_slice with 213 + | Some slice -> 214 + let bytes = Bytes.Slice.bytes slice in 215 + let first = Bytes.Slice.first slice in 216 + let remaining = Bytes.Slice.length slice - t.slice_pos in 217 + if remaining > 0 then 218 + Buffer.add_subbytes buf bytes (first + t.slice_pos) remaining 219 + | None -> ()); 220 (* Add remaining from reader *) 221 Bytes.Reader.add_to_buffer buf t.reader; 222 Buffer.contents buf ··· 233 Some (Stdlib.Bytes.get bytes (first + t.slice_pos - 1)) 234 | _ -> None 235 236 + (** Get a sample of the source for encoding detection. Uses sniff to peek 237 + without consuming. *) 238 let source t = 239 (* First check current slice *) 240 match t.current_slice with ··· 248 Bytes.Reader.sniff 4 t.reader 249 250 (** Get the byte position in the underlying stream *) 251 + let byte_pos t = Bytes.Reader.pos t.reader
+7 -18
lib/layout_style.ml
··· 5 6 (** Collection layout styles *) 7 8 - type t = [ 9 - | `Any (** Let emitter choose *) 10 - | `Block (** Indentation-based *) 11 - | `Flow (** Inline with brackets *) 12 - ] 13 14 - let to_string = function 15 - | `Any -> "any" 16 - | `Block -> "block" 17 - | `Flow -> "flow" 18 - 19 - let pp fmt t = 20 - Format.pp_print_string fmt (to_string t) 21 - 22 let equal a b = a = b 23 24 let compare a b = 25 - let to_int = function 26 - | `Any -> 0 27 - | `Block -> 1 28 - | `Flow -> 2 29 - in 30 Int.compare (to_int a) (to_int b)
··· 5 6 (** Collection layout styles *) 7 8 + type t = 9 + [ `Any (** Let emitter choose *) 10 + | `Block (** Indentation-based *) 11 + | `Flow (** Inline with brackets *) ] 12 13 + let to_string = function `Any -> "any" | `Block -> "block" | `Flow -> "flow" 14 + let pp fmt t = Format.pp_print_string fmt (to_string t) 15 let equal a b = a = b 16 17 let compare a b = 18 + let to_int = function `Any -> 0 | `Block -> 1 | `Flow -> 2 in 19 Int.compare (to_int a) (to_int b)
+138 -162
lib/loader.ml
··· 31 mutable doc_implicit_start : bool; 32 } 33 34 - let create_state () = { 35 - stack = []; 36 - current = None; 37 - documents = []; 38 - doc_version = None; 39 - doc_implicit_start = true; 40 - } 41 42 (** Process a single event *) 43 let rec process_event state (ev : Event.spanned) = 44 match ev.event with 45 | Event.Stream_start _ -> () 46 - 47 | Event.Stream_end -> () 48 - 49 | Event.Document_start { version; implicit } -> 50 state.doc_version <- version; 51 state.doc_implicit_start <- implicit 52 - 53 | Event.Document_end { implicit } -> 54 - let doc = Document.make 55 - ?version:state.doc_version 56 - ~implicit_start:state.doc_implicit_start 57 - ~implicit_end:implicit 58 - state.current 59 in 60 state.documents <- doc :: state.documents; 61 state.current <- None; 62 state.doc_version <- None; 63 state.doc_implicit_start <- true 64 - 65 | Event.Alias { anchor } -> 66 let node : Yaml.t = `Alias anchor in 67 add_node state node 68 - 69 - | Event.Scalar { anchor; tag; value; plain_implicit; quoted_implicit; style } -> 70 - let scalar = Scalar.make 71 - ?anchor ?tag 72 - ~plain_implicit ~quoted_implicit 73 - ~style value 74 in 75 let node : Yaml.t = `Scalar scalar in 76 add_node state node 77 - 78 | Event.Sequence_start { anchor; tag; implicit; style } -> 79 - let frame = Sequence_frame { 80 - anchor; tag; implicit; style; 81 - items = []; 82 - } in 83 state.stack <- frame :: state.stack 84 - 85 - | Event.Sequence_end -> 86 - (match state.stack with 87 - | Sequence_frame { anchor; tag; implicit; style; items } :: rest -> 88 - let seq = Sequence.make ?anchor ?tag ~implicit ~style (List.rev items) in 89 - let node : Yaml.t = `A seq in 90 - state.stack <- rest; 91 - add_node state node 92 - | _ -> Error.raise (Invalid_state "unexpected sequence end")) 93 - 94 | Event.Mapping_start { anchor; tag; implicit; style } -> 95 - let frame = Mapping_frame { 96 - anchor; tag; implicit; style; 97 - pairs = []; 98 - pending_key = None; 99 - } in 100 state.stack <- frame :: state.stack 101 - 102 - | Event.Mapping_end -> 103 - (match state.stack with 104 - | Mapping_frame { anchor; tag; implicit; style; pairs; pending_key = None } :: rest -> 105 - let map = Mapping.make ?anchor ?tag ~implicit ~style (List.rev pairs) in 106 - let node : Yaml.t = `O map in 107 - state.stack <- rest; 108 - add_node state node 109 - | Mapping_frame { pending_key = Some _; _ } :: _ -> 110 - Error.raise (Invalid_state "mapping ended with pending key") 111 - | _ -> Error.raise (Invalid_state "unexpected mapping end")) 112 113 (** Add a node to current context *) 114 and add_node state node = 115 match state.stack with 116 - | [] -> 117 - state.current <- Some node 118 - 119 | Sequence_frame f :: rest -> 120 state.stack <- Sequence_frame { f with items = node :: f.items } :: rest 121 - 122 - | Mapping_frame f :: rest -> 123 - (match f.pending_key with 124 - | None -> 125 - (* This is a key *) 126 - state.stack <- Mapping_frame { f with pending_key = Some node } :: rest 127 - | Some key -> 128 - (* This is a value *) 129 - state.stack <- Mapping_frame { 130 - f with 131 - pairs = (key, node) :: f.pairs; 132 - pending_key = None; 133 - } :: rest) 134 135 (** Internal: parse all documents from a parser *) 136 let parse_all_documents parser = ··· 149 150 @param resolve_aliases Whether to resolve aliases (default true) 151 @param max_nodes Maximum nodes during alias expansion (default 10M) 152 - @param max_depth Maximum alias nesting depth (default 100) 153 - *) 154 - let value_of_string 155 - ?(resolve_aliases = true) 156 ?(max_nodes = Yaml.default_max_alias_nodes) 157 - ?(max_depth = Yaml.default_max_alias_depth) 158 - s = 159 let docs = parse_all_documents (Parser.of_string s) in 160 let doc = single_document_or_error docs ~empty:(Document.make None) in 161 match Document.root doc with 162 | None -> `Null 163 | Some yaml -> 164 - Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml 165 166 (** Load single document as Yaml. 167 168 @param resolve_aliases Whether to resolve aliases (default false for Yaml.t) 169 @param max_nodes Maximum nodes during alias expansion (default 10M) 170 - @param max_depth Maximum alias nesting depth (default 100) 171 - *) 172 - let yaml_of_string 173 - ?(resolve_aliases = false) 174 ?(max_nodes = Yaml.default_max_alias_nodes) 175 - ?(max_depth = Yaml.default_max_alias_depth) 176 - s = 177 let docs = parse_all_documents (Parser.of_string s) in 178 let doc = single_document_or_error docs ~empty:(Document.make None) in 179 match Document.root doc with 180 | None -> `Scalar (Scalar.make "") 181 | Some yaml -> 182 - if resolve_aliases then 183 - Yaml.resolve_aliases ~max_nodes ~max_depth yaml 184 - else 185 - yaml 186 187 (** Load all documents *) 188 let documents_of_string s = ··· 194 195 @param resolve_aliases Whether to resolve aliases (default true) 196 @param max_nodes Maximum nodes during alias expansion (default 10M) 197 - @param max_depth Maximum alias nesting depth (default 100) 198 - *) 199 - let value_of_reader 200 - ?(resolve_aliases = true) 201 ?(max_nodes = Yaml.default_max_alias_nodes) 202 - ?(max_depth = Yaml.default_max_alias_depth) 203 - reader = 204 let docs = parse_all_documents (Parser.of_reader reader) in 205 let doc = single_document_or_error docs ~empty:(Document.make None) in 206 match Document.root doc with 207 | None -> `Null 208 | Some yaml -> 209 - Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml 210 211 (** Load single document as Yaml from a Bytes.Reader. 212 213 @param resolve_aliases Whether to resolve aliases (default false for Yaml.t) 214 @param max_nodes Maximum nodes during alias expansion (default 10M) 215 - @param max_depth Maximum alias nesting depth (default 100) 216 - *) 217 - let yaml_of_reader 218 - ?(resolve_aliases = false) 219 ?(max_nodes = Yaml.default_max_alias_nodes) 220 - ?(max_depth = Yaml.default_max_alias_depth) 221 - reader = 222 let docs = parse_all_documents (Parser.of_reader reader) in 223 let doc = single_document_or_error docs ~empty:(Document.make None) in 224 match Document.root doc with 225 | None -> `Scalar (Scalar.make "") 226 | Some yaml -> 227 - if resolve_aliases then 228 - Yaml.resolve_aliases ~max_nodes ~max_depth yaml 229 - else 230 - yaml 231 232 (** Load all documents from a Bytes.Reader *) 233 let documents_of_reader reader = ··· 245 let rec loop () = 246 match next_event () with 247 | None -> None 248 - | Some ev -> 249 process_event state ev; 250 match ev.event with 251 - | Event.Document_end _ -> 252 - (match state.documents with 253 - | doc :: _ -> 254 - state.documents <- []; 255 - Some (extract doc) 256 - | [] -> None) 257 | Event.Stream_end -> None 258 - | _ -> loop () 259 in 260 loop () 261 ··· 267 268 @param resolve_aliases Whether to resolve aliases (default true) 269 @param max_nodes Maximum nodes during alias expansion (default 10M) 270 - @param max_depth Maximum alias nesting depth (default 100) 271 - *) 272 - let load_value 273 - ?(resolve_aliases = true) 274 ?(max_nodes = Yaml.default_max_alias_nodes) 275 - ?(max_depth = Yaml.default_max_alias_depth) 276 - parser = 277 - load_generic (fun doc -> 278 - match Document.root doc with 279 - | None -> `Null 280 - | Some yaml -> 281 - Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml 282 - ) parser 283 284 (** Load single Yaml from parser *) 285 let load_yaml parser = 286 - load_generic (fun doc -> 287 - Document.root doc |> Option.value ~default:(`Scalar (Scalar.make "")) 288 - ) parser 289 290 (** Load single Document from parser *) 291 - let load_document parser = 292 - load_generic Fun.id parser 293 294 (** Iterate over documents *) 295 let iter_documents f parser = 296 let rec loop () = 297 match load_document parser with 298 | None -> () 299 - | Some doc -> f doc; loop () 300 in 301 loop () 302 303 (** Fold over documents *) 304 let fold_documents f init parser = 305 let rec loop acc = 306 - match load_document parser with 307 - | None -> acc 308 - | Some doc -> loop (f acc doc) 309 in 310 loop init 311 312 (** Load single Value from event source. 313 314 @param resolve_aliases Whether to resolve aliases (default true) 315 @param max_nodes Maximum nodes during alias expansion (default 10M) 316 - @param max_depth Maximum alias nesting depth (default 100) 317 - *) 318 - let value_of_parser 319 - ?(resolve_aliases = true) 320 ?(max_nodes = Yaml.default_max_alias_nodes) 321 - ?(max_depth = Yaml.default_max_alias_depth) 322 - next_event = 323 - match load_generic_fn (fun doc -> 324 - match Document.root doc with 325 - | None -> `Null 326 - | Some yaml -> 327 - Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml 328 - ) next_event with 329 | Some v -> v 330 | None -> `Null 331 ··· 333 334 @param resolve_aliases Whether to resolve aliases (default false) 335 @param max_nodes Maximum nodes during alias expansion (default 10M) 336 - @param max_depth Maximum alias nesting depth (default 100) 337 - *) 338 - let yaml_of_parser 339 - ?(resolve_aliases = false) 340 ?(max_nodes = Yaml.default_max_alias_nodes) 341 - ?(max_depth = Yaml.default_max_alias_depth) 342 - next_event = 343 - match load_generic_fn (fun doc -> 344 - match Document.root doc with 345 - | None -> `Scalar (Scalar.make "") 346 - | Some yaml -> 347 - if resolve_aliases then 348 - Yaml.resolve_aliases ~max_nodes ~max_depth yaml 349 - else 350 - yaml 351 - ) next_event with 352 | Some v -> v 353 | None -> `Scalar (Scalar.make "") 354 355 (** Load single Document from event source *) 356 - let document_of_parser next_event = 357 - load_generic_fn Fun.id next_event 358 359 (** Load all documents from event source *) 360 let documents_of_parser next_event = ··· 373 let rec loop () = 374 match document_of_parser next_event with 375 | None -> () 376 - | Some doc -> f doc; loop () 377 in 378 loop () 379
··· 31 mutable doc_implicit_start : bool; 32 } 33 34 + let create_state () = 35 + { 36 + stack = []; 37 + current = None; 38 + documents = []; 39 + doc_version = None; 40 + doc_implicit_start = true; 41 + } 42 43 (** Process a single event *) 44 let rec process_event state (ev : Event.spanned) = 45 match ev.event with 46 | Event.Stream_start _ -> () 47 | Event.Stream_end -> () 48 | Event.Document_start { version; implicit } -> 49 state.doc_version <- version; 50 state.doc_implicit_start <- implicit 51 | Event.Document_end { implicit } -> 52 + let doc = 53 + Document.make ?version:state.doc_version 54 + ~implicit_start:state.doc_implicit_start ~implicit_end:implicit 55 + state.current 56 in 57 state.documents <- doc :: state.documents; 58 state.current <- None; 59 state.doc_version <- None; 60 state.doc_implicit_start <- true 61 | Event.Alias { anchor } -> 62 let node : Yaml.t = `Alias anchor in 63 add_node state node 64 + | Event.Scalar { anchor; tag; value; plain_implicit; quoted_implicit; style } 65 + -> 66 + let scalar = 67 + Scalar.make ?anchor ?tag ~plain_implicit ~quoted_implicit ~style value 68 in 69 let node : Yaml.t = `Scalar scalar in 70 add_node state node 71 | Event.Sequence_start { anchor; tag; implicit; style } -> 72 + let frame = Sequence_frame { anchor; tag; implicit; style; items = [] } in 73 state.stack <- frame :: state.stack 74 + | Event.Sequence_end -> ( 75 + match state.stack with 76 + | Sequence_frame { anchor; tag; implicit; style; items } :: rest -> 77 + let seq = 78 + Sequence.make ?anchor ?tag ~implicit ~style (List.rev items) 79 + in 80 + let node : Yaml.t = `A seq in 81 + state.stack <- rest; 82 + add_node state node 83 + | _ -> Error.raise (Invalid_state "unexpected sequence end")) 84 | Event.Mapping_start { anchor; tag; implicit; style } -> 85 + let frame = 86 + Mapping_frame 87 + { anchor; tag; implicit; style; pairs = []; pending_key = None } 88 + in 89 state.stack <- frame :: state.stack 90 + | Event.Mapping_end -> ( 91 + match state.stack with 92 + | Mapping_frame 93 + { anchor; tag; implicit; style; pairs; pending_key = None } 94 + :: rest -> 95 + let map = 96 + Mapping.make ?anchor ?tag ~implicit ~style (List.rev pairs) 97 + in 98 + let node : Yaml.t = `O map in 99 + state.stack <- rest; 100 + add_node state node 101 + | Mapping_frame { pending_key = Some _; _ } :: _ -> 102 + Error.raise (Invalid_state "mapping ended with pending key") 103 + | _ -> Error.raise (Invalid_state "unexpected mapping end")) 104 105 (** Add a node to current context *) 106 and add_node state node = 107 match state.stack with 108 + | [] -> state.current <- Some node 109 | Sequence_frame f :: rest -> 110 state.stack <- Sequence_frame { f with items = node :: f.items } :: rest 111 + | Mapping_frame f :: rest -> ( 112 + match f.pending_key with 113 + | None -> 114 + (* This is a key *) 115 + state.stack <- 116 + Mapping_frame { f with pending_key = Some node } :: rest 117 + | Some key -> 118 + (* This is a value *) 119 + state.stack <- 120 + Mapping_frame 121 + { f with pairs = (key, node) :: f.pairs; pending_key = None } 122 + :: rest) 123 124 (** Internal: parse all documents from a parser *) 125 let parse_all_documents parser = ··· 138 139 @param resolve_aliases Whether to resolve aliases (default true) 140 @param max_nodes Maximum nodes during alias expansion (default 10M) 141 + @param max_depth Maximum alias nesting depth (default 100) *) 142 + let value_of_string ?(resolve_aliases = true) 143 ?(max_nodes = Yaml.default_max_alias_nodes) 144 + ?(max_depth = Yaml.default_max_alias_depth) s = 145 let docs = parse_all_documents (Parser.of_string s) in 146 let doc = single_document_or_error docs ~empty:(Document.make None) in 147 match Document.root doc with 148 | None -> `Null 149 | Some yaml -> 150 + Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth 151 + yaml 152 153 (** Load single document as Yaml. 154 155 @param resolve_aliases Whether to resolve aliases (default false for Yaml.t) 156 @param max_nodes Maximum nodes during alias expansion (default 10M) 157 + @param max_depth Maximum alias nesting depth (default 100) *) 158 + let yaml_of_string ?(resolve_aliases = false) 159 ?(max_nodes = Yaml.default_max_alias_nodes) 160 + ?(max_depth = Yaml.default_max_alias_depth) s = 161 let docs = parse_all_documents (Parser.of_string s) in 162 let doc = single_document_or_error docs ~empty:(Document.make None) in 163 match Document.root doc with 164 | None -> `Scalar (Scalar.make "") 165 | Some yaml -> 166 + if resolve_aliases then Yaml.resolve_aliases ~max_nodes ~max_depth yaml 167 + else yaml 168 169 (** Load all documents *) 170 let documents_of_string s = ··· 176 177 @param resolve_aliases Whether to resolve aliases (default true) 178 @param max_nodes Maximum nodes during alias expansion (default 10M) 179 + @param max_depth Maximum alias nesting depth (default 100) *) 180 + let value_of_reader ?(resolve_aliases = true) 181 ?(max_nodes = Yaml.default_max_alias_nodes) 182 + ?(max_depth = Yaml.default_max_alias_depth) reader = 183 let docs = parse_all_documents (Parser.of_reader reader) in 184 let doc = single_document_or_error docs ~empty:(Document.make None) in 185 match Document.root doc with 186 | None -> `Null 187 | Some yaml -> 188 + Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth 189 + yaml 190 191 (** Load single document as Yaml from a Bytes.Reader. 192 193 @param resolve_aliases Whether to resolve aliases (default false for Yaml.t) 194 @param max_nodes Maximum nodes during alias expansion (default 10M) 195 + @param max_depth Maximum alias nesting depth (default 100) *) 196 + let yaml_of_reader ?(resolve_aliases = false) 197 ?(max_nodes = Yaml.default_max_alias_nodes) 198 + ?(max_depth = Yaml.default_max_alias_depth) reader = 199 let docs = parse_all_documents (Parser.of_reader reader) in 200 let doc = single_document_or_error docs ~empty:(Document.make None) in 201 match Document.root doc with 202 | None -> `Scalar (Scalar.make "") 203 | Some yaml -> 204 + if resolve_aliases then Yaml.resolve_aliases ~max_nodes ~max_depth yaml 205 + else yaml 206 207 (** Load all documents from a Bytes.Reader *) 208 let documents_of_reader reader = ··· 220 let rec loop () = 221 match next_event () with 222 | None -> None 223 + | Some ev -> ( 224 process_event state ev; 225 match ev.event with 226 + | Event.Document_end _ -> ( 227 + match state.documents with 228 + | doc :: _ -> 229 + state.documents <- []; 230 + Some (extract doc) 231 + | [] -> None) 232 | Event.Stream_end -> None 233 + | _ -> loop ()) 234 in 235 loop () 236 ··· 242 243 @param resolve_aliases Whether to resolve aliases (default true) 244 @param max_nodes Maximum nodes during alias expansion (default 10M) 245 + @param max_depth Maximum alias nesting depth (default 100) *) 246 + let load_value ?(resolve_aliases = true) 247 ?(max_nodes = Yaml.default_max_alias_nodes) 248 + ?(max_depth = Yaml.default_max_alias_depth) parser = 249 + load_generic 250 + (fun doc -> 251 + match Document.root doc with 252 + | None -> `Null 253 + | Some yaml -> 254 + Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes 255 + ~max_depth yaml) 256 + parser 257 258 (** Load single Yaml from parser *) 259 let load_yaml parser = 260 + load_generic 261 + (fun doc -> 262 + Document.root doc |> Option.value ~default:(`Scalar (Scalar.make ""))) 263 + parser 264 265 (** Load single Document from parser *) 266 + let load_document parser = load_generic Fun.id parser 267 268 (** Iterate over documents *) 269 let iter_documents f parser = 270 let rec loop () = 271 match load_document parser with 272 | None -> () 273 + | Some doc -> 274 + f doc; 275 + loop () 276 in 277 loop () 278 279 (** Fold over documents *) 280 let fold_documents f init parser = 281 let rec loop acc = 282 + match load_document parser with None -> acc | Some doc -> loop (f acc doc) 283 in 284 loop init 285 + 286 287 (** Load single Value from event source. 288 289 @param resolve_aliases Whether to resolve aliases (default true) 290 @param max_nodes Maximum nodes during alias expansion (default 10M) 291 + @param max_depth Maximum alias nesting depth (default 100) *) 292 + let value_of_parser ?(resolve_aliases = true) 293 ?(max_nodes = Yaml.default_max_alias_nodes) 294 + ?(max_depth = Yaml.default_max_alias_depth) next_event = 295 + match 296 + load_generic_fn 297 + (fun doc -> 298 + match Document.root doc with 299 + | None -> `Null 300 + | Some yaml -> 301 + Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes 302 + ~max_depth yaml) 303 + next_event 304 + with 305 | Some v -> v 306 | None -> `Null 307 ··· 309 310 @param resolve_aliases Whether to resolve aliases (default false) 311 @param max_nodes Maximum nodes during alias expansion (default 10M) 312 + @param max_depth Maximum alias nesting depth (default 100) *) 313 + let yaml_of_parser ?(resolve_aliases = false) 314 ?(max_nodes = Yaml.default_max_alias_nodes) 315 + ?(max_depth = Yaml.default_max_alias_depth) next_event = 316 + match 317 + load_generic_fn 318 + (fun doc -> 319 + match Document.root doc with 320 + | None -> `Scalar (Scalar.make "") 321 + | Some yaml -> 322 + if resolve_aliases then 323 + Yaml.resolve_aliases ~max_nodes ~max_depth yaml 324 + else yaml) 325 + next_event 326 + with 327 | Some v -> v 328 | None -> `Scalar (Scalar.make "") 329 330 (** Load single Document from event source *) 331 + let document_of_parser next_event = load_generic_fn Fun.id next_event 332 333 (** Load all documents from event source *) 334 let documents_of_parser next_event = ··· 347 let rec loop () = 348 match document_of_parser next_event with 349 | None -> () 350 + | Some doc -> 351 + f doc; 352 + loop () 353 in 354 loop () 355
+38 -41
lib/mapping.ml
··· 13 members : ('k * 'v) list; 14 } 15 16 - let make 17 - ?(anchor : string option) 18 - ?(tag : string option) 19 - ?(implicit = true) 20 - ?(style = `Any) 21 - members = 22 { anchor; tag; implicit; style; members } 23 24 let members t = t.members ··· 26 let tag t = t.tag 27 let implicit t = t.implicit 28 let style t = t.style 29 - 30 let with_anchor anchor t = { t with anchor = Some anchor } 31 let with_tag tag t = { t with tag = Some tag } 32 let with_style style t = { t with style } 33 34 - let map_keys f t = { t with members = List.map (fun (k, v) -> (f k, v)) t.members } 35 - let map_values f t = { t with members = List.map (fun (k, v) -> (k, f v)) t.members } 36 - let map f t = { t with members = List.map (fun (k, v) -> f k v) t.members } 37 38 - let length t = List.length t.members 39 40 let is_empty t = t.members = [] 41 42 let find pred t = 43 List.find_opt (fun (k, _) -> pred k) t.members |> Option.map snd 44 45 - let find_key pred t = 46 - List.find_opt (fun (k, _) -> pred k) t.members 47 - 48 - let mem pred t = 49 - List.exists (fun (k, _) -> pred k) t.members 50 - 51 let keys t = List.map fst t.members 52 - 53 let values t = List.map snd t.members 54 - 55 let iter f t = List.iter (fun (k, v) -> f k v) t.members 56 - 57 let fold f init t = List.fold_left (fun acc (k, v) -> f acc k v) init t.members 58 59 let pp pp_key pp_val fmt t = ··· 62 Option.iter (Format.fprintf fmt "tag=%s,@ ") t.tag; 63 Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style; 64 Format.fprintf fmt "members={@,"; 65 - List.iteri (fun i (k, v) -> 66 - if i > 0 then Format.fprintf fmt ",@ "; 67 - Format.fprintf fmt "@[<hv 2>%a:@ %a@]" pp_key k pp_val v 68 - ) t.members; 69 Format.fprintf fmt "@]@,})" 70 71 let equal eq_k eq_v a b = 72 - Option.equal String.equal a.anchor b.anchor && 73 - Option.equal String.equal a.tag b.tag && 74 - a.implicit = b.implicit && 75 - Layout_style.equal a.style b.style && 76 - List.equal (fun (k1, v1) (k2, v2) -> eq_k k1 k2 && eq_v v1 v2) a.members b.members 77 78 let compare cmp_k cmp_v a b = 79 let c = Option.compare String.compare a.anchor b.anchor in 80 - if c <> 0 then c else 81 - let c = Option.compare String.compare a.tag b.tag in 82 - if c <> 0 then c else 83 - let c = Bool.compare a.implicit b.implicit in 84 - if c <> 0 then c else 85 - let c = Layout_style.compare a.style b.style in 86 - if c <> 0 then c else 87 - let cmp_pair (k1, v1) (k2, v2) = 88 - let c = cmp_k k1 k2 in 89 - if c <> 0 then c else cmp_v v1 v2 90 - in 91 - List.compare cmp_pair a.members b.members
··· 13 members : ('k * 'v) list; 14 } 15 16 + let make ?(anchor : string option) ?(tag : string option) ?(implicit = true) 17 + ?(style = `Any) members = 18 { anchor; tag; implicit; style; members } 19 20 let members t = t.members ··· 22 let tag t = t.tag 23 let implicit t = t.implicit 24 let style t = t.style 25 let with_anchor anchor t = { t with anchor = Some anchor } 26 let with_tag tag t = { t with tag = Some tag } 27 let with_style style t = { t with style } 28 29 + let map_keys f t = 30 + { t with members = List.map (fun (k, v) -> (f k, v)) t.members } 31 32 + let map_values f t = 33 + { t with members = List.map (fun (k, v) -> (k, f v)) t.members } 34 35 + let map f t = { t with members = List.map (fun (k, v) -> f k v) t.members } 36 + let length t = List.length t.members 37 let is_empty t = t.members = [] 38 39 let find pred t = 40 List.find_opt (fun (k, _) -> pred k) t.members |> Option.map snd 41 42 + let find_key pred t = List.find_opt (fun (k, _) -> pred k) t.members 43 + let mem pred t = List.exists (fun (k, _) -> pred k) t.members 44 let keys t = List.map fst t.members 45 let values t = List.map snd t.members 46 let iter f t = List.iter (fun (k, v) -> f k v) t.members 47 let fold f init t = List.fold_left (fun acc (k, v) -> f acc k v) init t.members 48 49 let pp pp_key pp_val fmt t = ··· 52 Option.iter (Format.fprintf fmt "tag=%s,@ ") t.tag; 53 Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style; 54 Format.fprintf fmt "members={@,"; 55 + List.iteri 56 + (fun i (k, v) -> 57 + if i > 0 then Format.fprintf fmt ",@ "; 58 + Format.fprintf fmt "@[<hv 2>%a:@ %a@]" pp_key k pp_val v) 59 + t.members; 60 Format.fprintf fmt "@]@,})" 61 62 let equal eq_k eq_v a b = 63 + Option.equal String.equal a.anchor b.anchor 64 + && Option.equal String.equal a.tag b.tag 65 + && a.implicit = b.implicit 66 + && Layout_style.equal a.style b.style 67 + && List.equal 68 + (fun (k1, v1) (k2, v2) -> eq_k k1 k2 && eq_v v1 v2) 69 + a.members b.members 70 71 let compare cmp_k cmp_v a b = 72 let c = Option.compare String.compare a.anchor b.anchor in 73 + if c <> 0 then c 74 + else 75 + let c = Option.compare String.compare a.tag b.tag in 76 + if c <> 0 then c 77 + else 78 + let c = Bool.compare a.implicit b.implicit in 79 + if c <> 0 then c 80 + else 81 + let c = Layout_style.compare a.style b.style in 82 + if c <> 0 then c 83 + else 84 + let cmp_pair (k1, v1) (k2, v2) = 85 + let c = cmp_k k1 k2 in 86 + if c <> 0 then c else cmp_v v1 v2 87 + in 88 + List.compare cmp_pair a.members b.members
+266 -305
lib/parser.ml
··· 10 | Stream_start 11 | Implicit_document_start 12 | Document_content 13 - | Document_content_done (* After parsing a node, check for unexpected content *) 14 | Document_end 15 | Block_sequence_first_entry 16 | Block_sequence_entry ··· 36 mutable tag_directives : (string * string) list; 37 mutable current_token : Token.spanned option; 38 mutable finished : bool; 39 - mutable explicit_doc_end : bool; (** True if last doc ended with explicit ... *) 40 - mutable stream_start : bool; (** True if we haven't emitted any documents yet *) 41 } 42 43 - let create scanner = { 44 - scanner; 45 - state = Stream_start; 46 - states = []; 47 - version = None; 48 - tag_directives = [ 49 - ("!", "!"); 50 - ("!!", "tag:yaml.org,2002:"); 51 - ]; 52 - current_token = None; 53 - finished = false; 54 - explicit_doc_end = false; 55 - stream_start = true; 56 - } 57 58 let of_string s = create (Scanner.of_string s) 59 let of_scanner = create ··· 64 let current_token t = 65 match t.current_token with 66 | Some tok -> tok 67 - | None -> 68 let tok = Scanner.next t.scanner in 69 t.current_token <- tok; 70 - match tok with 71 - | Some tok -> tok 72 - | None -> Error.raise Unexpected_eof 73 74 (** Peek at current token *) 75 let peek_token t = ··· 80 t.current_token 81 82 (** Skip current token *) 83 - let skip_token t = 84 - t.current_token <- None 85 86 (** Check if current token matches predicate *) 87 let check t pred = 88 - match peek_token t with 89 - | Some tok -> pred tok.token 90 - | None -> false 91 92 (** Push state onto stack *) 93 - let push_state t s = 94 - t.states <- s :: t.states 95 96 (** Pop state from stack *) 97 let pop_state t = ··· 115 (** Process directives at document start *) 116 let process_directives t = 117 t.version <- None; 118 - t.tag_directives <- [("!", "!"); ("!!", "tag:yaml.org,2002:")]; 119 120 - while check t (function 121 - | Token.Version_directive _ | Token.Tag_directive _ -> true 122 - | _ -> false) 123 do 124 let tok = current_token t in 125 skip_token t; 126 match tok.token with 127 | Token.Version_directive { major; minor } -> 128 if t.version <> None then 129 - Error.raise_span tok.span (Invalid_yaml_version "duplicate YAML directive"); 130 t.version <- Some (major, minor) 131 | Token.Tag_directive { handle; prefix } -> 132 (* Skip empty tag directives (these are reserved/unknown directives that were ignored) *) 133 - if handle = "" && prefix = "" then 134 - () (* Ignore reserved directives *) 135 else begin 136 - if List.mem_assoc handle t.tag_directives && 137 - handle <> "!" && handle <> "!!" then 138 - Error.raise_span tok.span (Invalid_tag_directive ("duplicate tag handle: " ^ handle)); 139 t.tag_directives <- (handle, prefix) :: t.tag_directives 140 end 141 | _ -> () ··· 146 let anchor = ref None in 147 let tag = ref None in 148 149 - while check t (function 150 - | Token.Anchor _ | Token.Tag _ -> true 151 - | _ -> false) 152 do 153 let tok = current_token t in 154 skip_token t; ··· 172 173 (** Empty scalar event *) 174 let empty_scalar_event ~anchor ~tag span = 175 - Event.Scalar { 176 - anchor; 177 - tag; 178 - value = ""; 179 - plain_implicit = tag = None; 180 - quoted_implicit = false; 181 - style = `Plain; 182 - }, span 183 184 (** Parse stream start *) 185 let parse_stream_start t = ··· 188 match tok.token with 189 | Token.Stream_start encoding -> 190 t.state <- Implicit_document_start; 191 - Event.Stream_start { encoding }, tok.span 192 - | _ -> 193 - Error.raise_span tok.span (Unexpected_token "expected stream start") 194 195 (** Parse document start (implicit or explicit) *) 196 let parse_document_start t ~implicit = ··· 199 if not implicit then begin 200 let tok = current_token t in 201 match tok.token with 202 - | Token.Document_start -> 203 - skip_token t 204 - | _ -> 205 - Error.raise_span tok.span Expected_document_start 206 end; 207 208 - let span = match peek_token t with 209 | Some tok -> tok.span 210 | None -> Span.point Position.initial 211 in ··· 214 t.stream_start <- false; 215 push_state t Document_end; 216 t.state <- Document_content; 217 - Event.Document_start { version = t.version; implicit }, span 218 219 (** Parse document end *) 220 let parse_document_end t = 221 - let implicit = not (check t (function Token.Document_end -> true | _ -> false)) in 222 - let span = match peek_token t with 223 | Some tok -> tok.span 224 | None -> Span.point Position.initial 225 in ··· 229 (* Track if this document ended explicitly with ... *) 230 t.explicit_doc_end <- not implicit; 231 t.state <- Implicit_document_start; 232 - Event.Document_end { implicit }, span 233 234 (** Parse node in various contexts *) 235 let parse_node t ~block ~indentless = ··· 238 | Token.Alias name -> 239 skip_token t; 240 t.state <- pop_state t; 241 - Event.Alias { anchor = name }, tok.span 242 - 243 - | Token.Anchor _ | Token.Tag _ -> 244 let anchor, tag = parse_properties t in 245 let tok = current_token t in 246 - (match tok.token with 247 - | Token.Block_entry when indentless -> 248 - t.state <- Indentless_sequence_entry; 249 - Event.Sequence_start { 250 - anchor; tag; 251 - implicit = tag = None; 252 - style = `Block; 253 - }, tok.span 254 - 255 - | Token.Block_sequence_start when block -> 256 - t.state <- Block_sequence_first_entry; 257 - skip_token t; 258 - Event.Sequence_start { 259 - anchor; tag; 260 - implicit = tag = None; 261 - style = `Block; 262 - }, tok.span 263 - 264 - | Token.Block_mapping_start when block -> 265 - t.state <- Block_mapping_first_key; 266 - skip_token t; 267 - Event.Mapping_start { 268 - anchor; tag; 269 - implicit = tag = None; 270 - style = `Block; 271 - }, tok.span 272 - 273 - | Token.Flow_sequence_start -> 274 - t.state <- Flow_sequence_first_entry; 275 - skip_token t; 276 - Event.Sequence_start { 277 - anchor; tag; 278 - implicit = tag = None; 279 - style = `Flow; 280 - }, tok.span 281 - 282 - | Token.Flow_mapping_start -> 283 - t.state <- Flow_mapping_first_key; 284 - skip_token t; 285 - Event.Mapping_start { 286 - anchor; tag; 287 - implicit = tag = None; 288 - style = `Flow; 289 - }, tok.span 290 - 291 - | Token.Scalar { style; value } -> 292 - skip_token t; 293 - t.state <- pop_state t; 294 - let plain_implicit = tag = None && style = `Plain in 295 - let quoted_implicit = tag = None && style <> `Plain in 296 - Event.Scalar { 297 - anchor; tag; value; 298 - plain_implicit; quoted_implicit; style; 299 - }, tok.span 300 - 301 - | _ -> 302 - (* Empty node *) 303 - t.state <- pop_state t; 304 - empty_scalar_event ~anchor ~tag tok.span) 305 - 306 | Token.Block_sequence_start when block -> 307 t.state <- Block_sequence_first_entry; 308 skip_token t; 309 - Event.Sequence_start { 310 - anchor = None; tag = None; 311 - implicit = true; 312 - style = `Block; 313 - }, tok.span 314 - 315 | Token.Block_mapping_start when block -> 316 t.state <- Block_mapping_first_key; 317 skip_token t; 318 - Event.Mapping_start { 319 - anchor = None; tag = None; 320 - implicit = true; 321 - style = `Block; 322 - }, tok.span 323 - 324 | Token.Flow_sequence_start -> 325 t.state <- Flow_sequence_first_entry; 326 skip_token t; 327 - Event.Sequence_start { 328 - anchor = None; tag = None; 329 - implicit = true; 330 - style = `Flow; 331 - }, tok.span 332 - 333 | Token.Flow_mapping_start -> 334 t.state <- Flow_mapping_first_key; 335 skip_token t; 336 - Event.Mapping_start { 337 - anchor = None; tag = None; 338 - implicit = true; 339 - style = `Flow; 340 - }, tok.span 341 - 342 | Token.Block_entry when indentless -> 343 t.state <- Indentless_sequence_entry; 344 - Event.Sequence_start { 345 - anchor = None; tag = None; 346 - implicit = true; 347 - style = `Block; 348 - }, tok.span 349 - 350 | Token.Scalar { style; value } -> 351 skip_token t; 352 t.state <- pop_state t; 353 let plain_implicit = style = `Plain in 354 let quoted_implicit = style <> `Plain in 355 - Event.Scalar { 356 - anchor = None; tag = None; value; 357 - plain_implicit; quoted_implicit; style; 358 - }, tok.span 359 - 360 | _ -> 361 (* Empty node *) 362 t.state <- pop_state t; ··· 368 match tok.token with 369 | Token.Block_entry -> 370 skip_token t; 371 - if check t (function 372 - | Token.Block_entry | Token.Block_end -> true 373 - | _ -> false) 374 then begin 375 t.state <- Block_sequence_entry; 376 empty_scalar_event ~anchor:None ~tag:None tok.span 377 - end else begin 378 push_state t Block_sequence_entry; 379 parse_node t ~block:true ~indentless:false 380 end 381 | Token.Block_end -> 382 skip_token t; 383 t.state <- pop_state t; 384 - Event.Sequence_end, tok.span 385 - | _ -> 386 - Error.raise_span tok.span Expected_block_entry 387 388 (** Parse block mapping key *) 389 let parse_block_mapping_key t = ··· 391 match tok.token with 392 | Token.Key -> 393 skip_token t; 394 - if check t (function 395 - | Token.Key | Token.Value | Token.Block_end -> true 396 - | _ -> false) 397 then begin 398 t.state <- Block_mapping_value; 399 empty_scalar_event ~anchor:None ~tag:None tok.span 400 - end else begin 401 push_state t Block_mapping_value; 402 parse_node t ~block:true ~indentless:true 403 end ··· 408 | Token.Block_end -> 409 skip_token t; 410 t.state <- pop_state t; 411 - Event.Mapping_end, tok.span 412 - | _ -> 413 - Error.raise_span tok.span Expected_key 414 415 (** Parse block mapping value *) 416 let parse_block_mapping_value t = ··· 418 match tok.token with 419 | Token.Value -> 420 skip_token t; 421 - if check t (function 422 - | Token.Key | Token.Value | Token.Block_end -> true 423 - | _ -> false) 424 then begin 425 t.state <- Block_mapping_key; 426 empty_scalar_event ~anchor:None ~tag:None tok.span 427 - end else begin 428 push_state t Block_mapping_key; 429 parse_node t ~block:true ~indentless:true 430 end ··· 439 match tok.token with 440 | Token.Block_entry -> 441 skip_token t; 442 - if check t (function 443 - | Token.Block_entry | Token.Key | Token.Value | Token.Block_end -> true 444 - | _ -> false) 445 then begin 446 t.state <- Indentless_sequence_entry; 447 empty_scalar_event ~anchor:None ~tag:None tok.span 448 - end else begin 449 push_state t Indentless_sequence_entry; 450 parse_node t ~block:true ~indentless:false 451 end 452 | _ -> 453 t.state <- pop_state t; 454 - Event.Sequence_end, tok.span 455 456 (** Parse flow sequence *) 457 let rec parse_flow_sequence_entry t ~first = ··· 460 | Token.Flow_sequence_end -> 461 skip_token t; 462 t.state <- pop_state t; 463 - Event.Sequence_end, tok.span 464 | Token.Flow_entry when not first -> 465 skip_token t; 466 parse_flow_sequence_entry_internal t 467 - | _ when first -> 468 - parse_flow_sequence_entry_internal t 469 - | _ -> 470 - Error.raise_span tok.span Expected_sequence_end 471 472 and parse_flow_sequence_entry_internal t = 473 let tok = current_token t in ··· 476 (* Trailing comma case - don't emit empty scalar, just go back to sequence entry state *) 477 skip_token t; 478 t.state <- pop_state t; 479 - Event.Sequence_end, tok.span 480 | Token.Flow_entry -> 481 (* Double comma or comma after comma - invalid *) 482 - Error.raise_span tok.span (Unexpected_token "unexpected ',' in flow sequence") 483 | Token.Key -> 484 skip_token t; 485 t.state <- Flow_sequence_entry_mapping_key; 486 - Event.Mapping_start { 487 - anchor = None; tag = None; 488 - implicit = true; 489 - style = `Flow; 490 - }, tok.span 491 | Token.Value -> 492 (* Implicit empty key mapping: [ : value ] *) 493 t.state <- Flow_sequence_entry_mapping_key; 494 - Event.Mapping_start { 495 - anchor = None; tag = None; 496 - implicit = true; 497 - style = `Flow; 498 - }, tok.span 499 | _ -> 500 push_state t Flow_sequence_entry; 501 parse_node t ~block:false ~indentless:false ··· 503 (** Parse flow sequence entry mapping *) 504 let parse_flow_sequence_entry_mapping_key t = 505 let tok = current_token t in 506 - if check t (function 507 - | Token.Value | Token.Flow_entry | Token.Flow_sequence_end -> true 508 - | _ -> false) 509 then begin 510 t.state <- Flow_sequence_entry_mapping_value; 511 empty_scalar_event ~anchor:None ~tag:None tok.span 512 - end else begin 513 push_state t Flow_sequence_entry_mapping_value; 514 parse_node t ~block:false ~indentless:false 515 end ··· 519 match tok.token with 520 | Token.Value -> 521 skip_token t; 522 - if check t (function 523 - | Token.Flow_entry | Token.Flow_sequence_end -> true 524 - | _ -> false) 525 then begin 526 t.state <- Flow_sequence_entry_mapping_end; 527 empty_scalar_event ~anchor:None ~tag:None tok.span 528 - end else begin 529 push_state t Flow_sequence_entry_mapping_end; 530 parse_node t ~block:false ~indentless:false 531 end ··· 536 let parse_flow_sequence_entry_mapping_end t = 537 let tok = current_token t in 538 t.state <- Flow_sequence_entry; 539 - Event.Mapping_end, tok.span 540 541 (** Parse flow mapping *) 542 let rec parse_flow_mapping_key t ~first = ··· 545 | Token.Flow_mapping_end -> 546 skip_token t; 547 t.state <- pop_state t; 548 - Event.Mapping_end, tok.span 549 | Token.Flow_entry when not first -> 550 skip_token t; 551 parse_flow_mapping_key_internal t 552 - | _ when first -> 553 - parse_flow_mapping_key_internal t 554 - | _ -> 555 - Error.raise_span tok.span Expected_mapping_end 556 557 and parse_flow_mapping_key_internal t = 558 let tok = current_token t in ··· 561 (* Trailing comma case - don't emit empty scalar, just return to key state *) 562 skip_token t; 563 t.state <- pop_state t; 564 - Event.Mapping_end, tok.span 565 | Token.Flow_entry -> 566 (* Double comma or comma after comma - invalid *) 567 - Error.raise_span tok.span (Unexpected_token "unexpected ',' in flow mapping") 568 | Token.Key -> 569 skip_token t; 570 - if check t (function 571 - | Token.Value | Token.Flow_entry | Token.Flow_mapping_end -> true 572 - | _ -> false) 573 then begin 574 t.state <- Flow_mapping_value; 575 empty_scalar_event ~anchor:None ~tag:None tok.span 576 - end else begin 577 push_state t Flow_mapping_value; 578 parse_node t ~block:false ~indentless:false 579 end ··· 586 if empty then begin 587 t.state <- Flow_mapping_key; 588 empty_scalar_event ~anchor:None ~tag:None tok.span 589 - end else 590 match tok.token with 591 | Token.Value -> 592 skip_token t; 593 - if check t (function 594 - | Token.Flow_entry | Token.Flow_mapping_end -> true 595 - | _ -> false) 596 then begin 597 t.state <- Flow_mapping_key; 598 empty_scalar_event ~anchor:None ~tag:None tok.span 599 - end else begin 600 push_state t Flow_mapping_key; 601 parse_node t ~block:false ~indentless:false 602 end ··· 607 (** Main state machine dispatcher *) 608 let rec parse t = 609 match t.state with 610 - | Stream_start -> 611 - parse_stream_start t 612 - 613 - | Implicit_document_start -> 614 (* Skip any document end markers before checking what's next *) 615 while check t (function Token.Document_end -> true | _ -> false) do 616 - t.explicit_doc_end <- true; (* Seeing ... counts as explicit end *) 617 skip_token t 618 done; 619 620 let tok = current_token t in 621 - (match tok.token with 622 - | Token.Stream_end -> 623 - skip_token t; 624 - t.state <- End; 625 - t.finished <- true; 626 - Event.Stream_end, tok.span 627 - | Token.Version_directive _ | Token.Tag_directive _ -> 628 - (* Directives are only allowed at stream start or after explicit ... (MUS6/01) *) 629 - if not t.stream_start && not t.explicit_doc_end then 630 - Error.raise_span tok.span (Invalid_directive "directives require explicit document end '...' before them"); 631 - parse_document_start t ~implicit:false 632 - | Token.Document_start -> 633 - parse_document_start t ~implicit:false 634 - (* These tokens are invalid at document start - they indicate leftover junk *) 635 - | Token.Flow_sequence_end | Token.Flow_mapping_end | Token.Flow_entry 636 - | Token.Block_end | Token.Value -> 637 - Error.raise_span tok.span (Unexpected_token "unexpected token at document start") 638 - | _ -> 639 - parse_document_start t ~implicit:true) 640 641 | Document_content -> 642 - if check t (function 643 - | Token.Version_directive _ | Token.Tag_directive _ 644 - | Token.Document_start | Token.Document_end | Token.Stream_end -> true 645 - | _ -> false) 646 then begin 647 let tok = current_token t in 648 t.state <- pop_state t; 649 empty_scalar_event ~anchor:None ~tag:None tok.span 650 - end else begin 651 (* Push Document_content_done so we return there after parsing the node. 652 This allows us to check for unexpected content after the node. *) 653 push_state t Document_content_done; 654 parse_node t ~block:true ~indentless:false 655 end 656 - 657 | Document_content_done -> 658 (* After parsing a node in document content, check for unexpected content *) 659 - if check t (function 660 - | Token.Version_directive _ | Token.Tag_directive _ 661 - | Token.Document_start | Token.Document_end | Token.Stream_end -> true 662 - | _ -> false) 663 then begin 664 (* Valid document boundary - continue to Document_end *) 665 t.state <- pop_state t; 666 - parse t (* Continue to emit the next event *) 667 - end else begin 668 (* Unexpected content after document value - this is an error (KS4U, BS4K) *) 669 let tok = current_token t in 670 Error.raise_span tok.span 671 (Unexpected_token "content not allowed after document value") 672 end 673 - 674 - | Document_end -> 675 - parse_document_end t 676 677 | Block_sequence_first_entry -> 678 t.state <- Block_sequence_entry; 679 parse_block_sequence_entry t 680 - 681 - | Block_sequence_entry -> 682 - parse_block_sequence_entry t 683 - 684 - | Indentless_sequence_entry -> 685 - parse_indentless_sequence_entry t 686 - 687 | Block_mapping_first_key -> 688 t.state <- Block_mapping_key; 689 parse_block_mapping_key t 690 - 691 - | Block_mapping_key -> 692 - parse_block_mapping_key t 693 - 694 - | Block_mapping_value -> 695 - parse_block_mapping_value t 696 - 697 - | Flow_sequence_first_entry -> 698 - parse_flow_sequence_entry t ~first:true 699 - 700 - | Flow_sequence_entry -> 701 - parse_flow_sequence_entry t ~first:false 702 - 703 - | Flow_sequence_entry_mapping_key -> 704 - parse_flow_sequence_entry_mapping_key t 705 - 706 | Flow_sequence_entry_mapping_value -> 707 parse_flow_sequence_entry_mapping_value t 708 - 709 - | Flow_sequence_entry_mapping_end -> 710 - parse_flow_sequence_entry_mapping_end t 711 - 712 - | Flow_mapping_first_key -> 713 - parse_flow_mapping_key t ~first:true 714 - 715 - | Flow_mapping_key -> 716 - parse_flow_mapping_key t ~first:false 717 - 718 - | Flow_mapping_value -> 719 - parse_flow_mapping_value t ~empty:false 720 721 | End -> 722 let span = Span.point Position.initial in 723 t.finished <- true; 724 - Event.Stream_end, span 725 726 (** Get next event *) 727 let next t = ··· 735 let rec loop () = 736 match next t with 737 | None -> () 738 - | Some ev -> f ev; loop () 739 in 740 loop () 741 742 (** Fold over all events *) 743 let fold f init t = 744 let rec loop acc = 745 - match next t with 746 - | None -> acc 747 - | Some ev -> loop (f acc ev) 748 in 749 loop init 750 751 (** Convert to list *) 752 - let to_list t = 753 - fold (fun acc ev -> ev :: acc) [] t |> List.rev
··· 10 | Stream_start 11 | Implicit_document_start 12 | Document_content 13 + | Document_content_done 14 + (* After parsing a node, check for unexpected content *) 15 | Document_end 16 | Block_sequence_first_entry 17 | Block_sequence_entry ··· 37 mutable tag_directives : (string * string) list; 38 mutable current_token : Token.spanned option; 39 mutable finished : bool; 40 + mutable explicit_doc_end : bool; 41 + (** True if last doc ended with explicit ... *) 42 + mutable stream_start : bool; 43 + (** True if we haven't emitted any documents yet *) 44 } 45 46 + let create scanner = 47 + { 48 + scanner; 49 + state = Stream_start; 50 + states = []; 51 + version = None; 52 + tag_directives = [ ("!", "!"); ("!!", "tag:yaml.org,2002:") ]; 53 + current_token = None; 54 + finished = false; 55 + explicit_doc_end = false; 56 + stream_start = true; 57 + } 58 59 let of_string s = create (Scanner.of_string s) 60 let of_scanner = create ··· 65 let current_token t = 66 match t.current_token with 67 | Some tok -> tok 68 + | None -> ( 69 let tok = Scanner.next t.scanner in 70 t.current_token <- tok; 71 + match tok with Some tok -> tok | None -> Error.raise Unexpected_eof) 72 73 (** Peek at current token *) 74 let peek_token t = ··· 79 t.current_token 80 81 (** Skip current token *) 82 + let skip_token t = t.current_token <- None 83 84 (** Check if current token matches predicate *) 85 let check t pred = 86 + match peek_token t with Some tok -> pred tok.token | None -> false 87 + 88 89 (** Push state onto stack *) 90 + let push_state t s = t.states <- s :: t.states 91 92 (** Pop state from stack *) 93 let pop_state t = ··· 111 (** Process directives at document start *) 112 let process_directives t = 113 t.version <- None; 114 + t.tag_directives <- [ ("!", "!"); ("!!", "tag:yaml.org,2002:") ]; 115 116 + while 117 + check t (function 118 + | Token.Version_directive _ | Token.Tag_directive _ -> true 119 + | _ -> false) 120 do 121 let tok = current_token t in 122 skip_token t; 123 match tok.token with 124 | Token.Version_directive { major; minor } -> 125 if t.version <> None then 126 + Error.raise_span tok.span 127 + (Invalid_yaml_version "duplicate YAML directive"); 128 t.version <- Some (major, minor) 129 | Token.Tag_directive { handle; prefix } -> 130 (* Skip empty tag directives (these are reserved/unknown directives that were ignored) *) 131 + if handle = "" && prefix = "" then () (* Ignore reserved directives *) 132 else begin 133 + if 134 + List.mem_assoc handle t.tag_directives 135 + && handle <> "!" && handle <> "!!" 136 + then 137 + Error.raise_span tok.span 138 + (Invalid_tag_directive ("duplicate tag handle: " ^ handle)); 139 t.tag_directives <- (handle, prefix) :: t.tag_directives 140 end 141 | _ -> () ··· 146 let anchor = ref None in 147 let tag = ref None in 148 149 + while 150 + check t (function Token.Anchor _ | Token.Tag _ -> true | _ -> false) 151 do 152 let tok = current_token t in 153 skip_token t; ··· 171 172 (** Empty scalar event *) 173 let empty_scalar_event ~anchor ~tag span = 174 + ( Event.Scalar 175 + { 176 + anchor; 177 + tag; 178 + value = ""; 179 + plain_implicit = tag = None; 180 + quoted_implicit = false; 181 + style = `Plain; 182 + }, 183 + span ) 184 185 (** Parse stream start *) 186 let parse_stream_start t = ··· 189 match tok.token with 190 | Token.Stream_start encoding -> 191 t.state <- Implicit_document_start; 192 + (Event.Stream_start { encoding }, tok.span) 193 + | _ -> Error.raise_span tok.span (Unexpected_token "expected stream start") 194 195 (** Parse document start (implicit or explicit) *) 196 let parse_document_start t ~implicit = ··· 199 if not implicit then begin 200 let tok = current_token t in 201 match tok.token with 202 + | Token.Document_start -> skip_token t 203 + | _ -> Error.raise_span tok.span Expected_document_start 204 end; 205 206 + let span = 207 + match peek_token t with 208 | Some tok -> tok.span 209 | None -> Span.point Position.initial 210 in ··· 213 t.stream_start <- false; 214 push_state t Document_end; 215 t.state <- Document_content; 216 + (Event.Document_start { version = t.version; implicit }, span) 217 218 (** Parse document end *) 219 let parse_document_end t = 220 + let implicit = 221 + not (check t (function Token.Document_end -> true | _ -> false)) 222 + in 223 + let span = 224 + match peek_token t with 225 | Some tok -> tok.span 226 | None -> Span.point Position.initial 227 in ··· 231 (* Track if this document ended explicitly with ... *) 232 t.explicit_doc_end <- not implicit; 233 t.state <- Implicit_document_start; 234 + (Event.Document_end { implicit }, span) 235 236 (** Parse node in various contexts *) 237 let parse_node t ~block ~indentless = ··· 240 | Token.Alias name -> 241 skip_token t; 242 t.state <- pop_state t; 243 + (Event.Alias { anchor = name }, tok.span) 244 + | Token.Anchor _ | Token.Tag _ -> ( 245 let anchor, tag = parse_properties t in 246 let tok = current_token t in 247 + match tok.token with 248 + | Token.Block_entry when indentless -> 249 + t.state <- Indentless_sequence_entry; 250 + ( Event.Sequence_start 251 + { anchor; tag; implicit = tag = None; style = `Block }, 252 + tok.span ) 253 + | Token.Block_sequence_start when block -> 254 + t.state <- Block_sequence_first_entry; 255 + skip_token t; 256 + ( Event.Sequence_start 257 + { anchor; tag; implicit = tag = None; style = `Block }, 258 + tok.span ) 259 + | Token.Block_mapping_start when block -> 260 + t.state <- Block_mapping_first_key; 261 + skip_token t; 262 + ( Event.Mapping_start 263 + { anchor; tag; implicit = tag = None; style = `Block }, 264 + tok.span ) 265 + | Token.Flow_sequence_start -> 266 + t.state <- Flow_sequence_first_entry; 267 + skip_token t; 268 + ( Event.Sequence_start 269 + { anchor; tag; implicit = tag = None; style = `Flow }, 270 + tok.span ) 271 + | Token.Flow_mapping_start -> 272 + t.state <- Flow_mapping_first_key; 273 + skip_token t; 274 + ( Event.Mapping_start 275 + { anchor; tag; implicit = tag = None; style = `Flow }, 276 + tok.span ) 277 + | Token.Scalar { style; value } -> 278 + skip_token t; 279 + t.state <- pop_state t; 280 + let plain_implicit = tag = None && style = `Plain in 281 + let quoted_implicit = tag = None && style <> `Plain in 282 + ( Event.Scalar 283 + { anchor; tag; value; plain_implicit; quoted_implicit; style }, 284 + tok.span ) 285 + | _ -> 286 + (* Empty node *) 287 + t.state <- pop_state t; 288 + empty_scalar_event ~anchor ~tag tok.span) 289 | Token.Block_sequence_start when block -> 290 t.state <- Block_sequence_first_entry; 291 skip_token t; 292 + ( Event.Sequence_start 293 + { anchor = None; tag = None; implicit = true; style = `Block }, 294 + tok.span ) 295 | Token.Block_mapping_start when block -> 296 t.state <- Block_mapping_first_key; 297 skip_token t; 298 + ( Event.Mapping_start 299 + { anchor = None; tag = None; implicit = true; style = `Block }, 300 + tok.span ) 301 | Token.Flow_sequence_start -> 302 t.state <- Flow_sequence_first_entry; 303 skip_token t; 304 + ( Event.Sequence_start 305 + { anchor = None; tag = None; implicit = true; style = `Flow }, 306 + tok.span ) 307 | Token.Flow_mapping_start -> 308 t.state <- Flow_mapping_first_key; 309 skip_token t; 310 + ( Event.Mapping_start 311 + { anchor = None; tag = None; implicit = true; style = `Flow }, 312 + tok.span ) 313 | Token.Block_entry when indentless -> 314 t.state <- Indentless_sequence_entry; 315 + ( Event.Sequence_start 316 + { anchor = None; tag = None; implicit = true; style = `Block }, 317 + tok.span ) 318 | Token.Scalar { style; value } -> 319 skip_token t; 320 t.state <- pop_state t; 321 let plain_implicit = style = `Plain in 322 let quoted_implicit = style <> `Plain in 323 + ( Event.Scalar 324 + { 325 + anchor = None; 326 + tag = None; 327 + value; 328 + plain_implicit; 329 + quoted_implicit; 330 + style; 331 + }, 332 + tok.span ) 333 | _ -> 334 (* Empty node *) 335 t.state <- pop_state t; ··· 341 match tok.token with 342 | Token.Block_entry -> 343 skip_token t; 344 + if 345 + check t (function 346 + | Token.Block_entry | Token.Block_end -> true 347 + | _ -> false) 348 then begin 349 t.state <- Block_sequence_entry; 350 empty_scalar_event ~anchor:None ~tag:None tok.span 351 + end 352 + else begin 353 push_state t Block_sequence_entry; 354 parse_node t ~block:true ~indentless:false 355 end 356 | Token.Block_end -> 357 skip_token t; 358 t.state <- pop_state t; 359 + (Event.Sequence_end, tok.span) 360 + | _ -> Error.raise_span tok.span Expected_block_entry 361 362 (** Parse block mapping key *) 363 let parse_block_mapping_key t = ··· 365 match tok.token with 366 | Token.Key -> 367 skip_token t; 368 + if 369 + check t (function 370 + | Token.Key | Token.Value | Token.Block_end -> true 371 + | _ -> false) 372 then begin 373 t.state <- Block_mapping_value; 374 empty_scalar_event ~anchor:None ~tag:None tok.span 375 + end 376 + else begin 377 push_state t Block_mapping_value; 378 parse_node t ~block:true ~indentless:true 379 end ··· 384 | Token.Block_end -> 385 skip_token t; 386 t.state <- pop_state t; 387 + (Event.Mapping_end, tok.span) 388 + | _ -> Error.raise_span tok.span Expected_key 389 390 (** Parse block mapping value *) 391 let parse_block_mapping_value t = ··· 393 match tok.token with 394 | Token.Value -> 395 skip_token t; 396 + if 397 + check t (function 398 + | Token.Key | Token.Value | Token.Block_end -> true 399 + | _ -> false) 400 then begin 401 t.state <- Block_mapping_key; 402 empty_scalar_event ~anchor:None ~tag:None tok.span 403 + end 404 + else begin 405 push_state t Block_mapping_key; 406 parse_node t ~block:true ~indentless:true 407 end ··· 416 match tok.token with 417 | Token.Block_entry -> 418 skip_token t; 419 + if 420 + check t (function 421 + | Token.Block_entry | Token.Key | Token.Value | Token.Block_end -> 422 + true 423 + | _ -> false) 424 then begin 425 t.state <- Indentless_sequence_entry; 426 empty_scalar_event ~anchor:None ~tag:None tok.span 427 + end 428 + else begin 429 push_state t Indentless_sequence_entry; 430 parse_node t ~block:true ~indentless:false 431 end 432 | _ -> 433 t.state <- pop_state t; 434 + (Event.Sequence_end, tok.span) 435 436 (** Parse flow sequence *) 437 let rec parse_flow_sequence_entry t ~first = ··· 440 | Token.Flow_sequence_end -> 441 skip_token t; 442 t.state <- pop_state t; 443 + (Event.Sequence_end, tok.span) 444 | Token.Flow_entry when not first -> 445 skip_token t; 446 parse_flow_sequence_entry_internal t 447 + | _ when first -> parse_flow_sequence_entry_internal t 448 + | _ -> Error.raise_span tok.span Expected_sequence_end 449 450 and parse_flow_sequence_entry_internal t = 451 let tok = current_token t in ··· 454 (* Trailing comma case - don't emit empty scalar, just go back to sequence entry state *) 455 skip_token t; 456 t.state <- pop_state t; 457 + (Event.Sequence_end, tok.span) 458 | Token.Flow_entry -> 459 (* Double comma or comma after comma - invalid *) 460 + Error.raise_span tok.span 461 + (Unexpected_token "unexpected ',' in flow sequence") 462 | Token.Key -> 463 skip_token t; 464 t.state <- Flow_sequence_entry_mapping_key; 465 + ( Event.Mapping_start 466 + { anchor = None; tag = None; implicit = true; style = `Flow }, 467 + tok.span ) 468 | Token.Value -> 469 (* Implicit empty key mapping: [ : value ] *) 470 t.state <- Flow_sequence_entry_mapping_key; 471 + ( Event.Mapping_start 472 + { anchor = None; tag = None; implicit = true; style = `Flow }, 473 + tok.span ) 474 | _ -> 475 push_state t Flow_sequence_entry; 476 parse_node t ~block:false ~indentless:false ··· 478 (** Parse flow sequence entry mapping *) 479 let parse_flow_sequence_entry_mapping_key t = 480 let tok = current_token t in 481 + if 482 + check t (function 483 + | Token.Value | Token.Flow_entry | Token.Flow_sequence_end -> true 484 + | _ -> false) 485 then begin 486 t.state <- Flow_sequence_entry_mapping_value; 487 empty_scalar_event ~anchor:None ~tag:None tok.span 488 + end 489 + else begin 490 push_state t Flow_sequence_entry_mapping_value; 491 parse_node t ~block:false ~indentless:false 492 end ··· 496 match tok.token with 497 | Token.Value -> 498 skip_token t; 499 + if 500 + check t (function 501 + | Token.Flow_entry | Token.Flow_sequence_end -> true 502 + | _ -> false) 503 then begin 504 t.state <- Flow_sequence_entry_mapping_end; 505 empty_scalar_event ~anchor:None ~tag:None tok.span 506 + end 507 + else begin 508 push_state t Flow_sequence_entry_mapping_end; 509 parse_node t ~block:false ~indentless:false 510 end ··· 515 let parse_flow_sequence_entry_mapping_end t = 516 let tok = current_token t in 517 t.state <- Flow_sequence_entry; 518 + (Event.Mapping_end, tok.span) 519 520 (** Parse flow mapping *) 521 let rec parse_flow_mapping_key t ~first = ··· 524 | Token.Flow_mapping_end -> 525 skip_token t; 526 t.state <- pop_state t; 527 + (Event.Mapping_end, tok.span) 528 | Token.Flow_entry when not first -> 529 skip_token t; 530 parse_flow_mapping_key_internal t 531 + | _ when first -> parse_flow_mapping_key_internal t 532 + | _ -> Error.raise_span tok.span Expected_mapping_end 533 534 and parse_flow_mapping_key_internal t = 535 let tok = current_token t in ··· 538 (* Trailing comma case - don't emit empty scalar, just return to key state *) 539 skip_token t; 540 t.state <- pop_state t; 541 + (Event.Mapping_end, tok.span) 542 | Token.Flow_entry -> 543 (* Double comma or comma after comma - invalid *) 544 + Error.raise_span tok.span 545 + (Unexpected_token "unexpected ',' in flow mapping") 546 | Token.Key -> 547 skip_token t; 548 + if 549 + check t (function 550 + | Token.Value | Token.Flow_entry | Token.Flow_mapping_end -> true 551 + | _ -> false) 552 then begin 553 t.state <- Flow_mapping_value; 554 empty_scalar_event ~anchor:None ~tag:None tok.span 555 + end 556 + else begin 557 push_state t Flow_mapping_value; 558 parse_node t ~block:false ~indentless:false 559 end ··· 566 if empty then begin 567 t.state <- Flow_mapping_key; 568 empty_scalar_event ~anchor:None ~tag:None tok.span 569 + end 570 + else 571 match tok.token with 572 | Token.Value -> 573 skip_token t; 574 + if 575 + check t (function 576 + | Token.Flow_entry | Token.Flow_mapping_end -> true 577 + | _ -> false) 578 then begin 579 t.state <- Flow_mapping_key; 580 empty_scalar_event ~anchor:None ~tag:None tok.span 581 + end 582 + else begin 583 push_state t Flow_mapping_key; 584 parse_node t ~block:false ~indentless:false 585 end ··· 590 (** Main state machine dispatcher *) 591 let rec parse t = 592 match t.state with 593 + | Stream_start -> parse_stream_start t 594 + | Implicit_document_start -> ( 595 (* Skip any document end markers before checking what's next *) 596 while check t (function Token.Document_end -> true | _ -> false) do 597 + t.explicit_doc_end <- true; 598 + (* Seeing ... counts as explicit end *) 599 skip_token t 600 done; 601 602 let tok = current_token t in 603 + match tok.token with 604 + | Token.Stream_end -> 605 + skip_token t; 606 + t.state <- End; 607 + t.finished <- true; 608 + (Event.Stream_end, tok.span) 609 + | Token.Version_directive _ | Token.Tag_directive _ -> 610 + (* Directives are only allowed at stream start or after explicit ... (MUS6/01) *) 611 + if (not t.stream_start) && not t.explicit_doc_end then 612 + Error.raise_span tok.span 613 + (Invalid_directive 614 + "directives require explicit document end '...' before them"); 615 + parse_document_start t ~implicit:false 616 + | Token.Document_start -> parse_document_start t ~implicit:false 617 + (* These tokens are invalid at document start - they indicate leftover junk *) 618 + | Token.Flow_sequence_end | Token.Flow_mapping_end | Token.Flow_entry 619 + | Token.Block_end | Token.Value -> 620 + Error.raise_span tok.span 621 + (Unexpected_token "unexpected token at document start") 622 + | _ -> parse_document_start t ~implicit:true) 623 624 | Document_content -> 625 + if 626 + check t (function 627 + | Token.Version_directive _ | Token.Tag_directive _ 628 + | Token.Document_start | Token.Document_end | Token.Stream_end -> 629 + true 630 + | _ -> false) 631 then begin 632 let tok = current_token t in 633 t.state <- pop_state t; 634 empty_scalar_event ~anchor:None ~tag:None tok.span 635 + end 636 + else begin 637 (* Push Document_content_done so we return there after parsing the node. 638 This allows us to check for unexpected content after the node. *) 639 push_state t Document_content_done; 640 parse_node t ~block:true ~indentless:false 641 end 642 | Document_content_done -> 643 (* After parsing a node in document content, check for unexpected content *) 644 + if 645 + check t (function 646 + | Token.Version_directive _ | Token.Tag_directive _ 647 + | Token.Document_start | Token.Document_end | Token.Stream_end -> 648 + true 649 + | _ -> false) 650 then begin 651 (* Valid document boundary - continue to Document_end *) 652 t.state <- pop_state t; 653 + parse t (* Continue to emit the next event *) 654 + end 655 + else begin 656 (* Unexpected content after document value - this is an error (KS4U, BS4K) *) 657 let tok = current_token t in 658 Error.raise_span tok.span 659 (Unexpected_token "content not allowed after document value") 660 end 661 + | Document_end -> parse_document_end t 662 663 | Block_sequence_first_entry -> 664 t.state <- Block_sequence_entry; 665 parse_block_sequence_entry t 666 + | Block_sequence_entry -> parse_block_sequence_entry t 667 + | Indentless_sequence_entry -> parse_indentless_sequence_entry t 668 | Block_mapping_first_key -> 669 t.state <- Block_mapping_key; 670 parse_block_mapping_key t 671 + | Block_mapping_key -> parse_block_mapping_key t 672 + | Block_mapping_value -> parse_block_mapping_value t 673 + | Flow_sequence_first_entry -> parse_flow_sequence_entry t ~first:true 674 + | Flow_sequence_entry -> parse_flow_sequence_entry t ~first:false 675 + | Flow_sequence_entry_mapping_key -> parse_flow_sequence_entry_mapping_key t 676 | Flow_sequence_entry_mapping_value -> 677 parse_flow_sequence_entry_mapping_value t 678 + | Flow_sequence_entry_mapping_end -> parse_flow_sequence_entry_mapping_end t 679 + | Flow_mapping_first_key -> parse_flow_mapping_key t ~first:true 680 + | Flow_mapping_key -> parse_flow_mapping_key t ~first:false 681 + | Flow_mapping_value -> parse_flow_mapping_value t ~empty:false 682 683 | End -> 684 let span = Span.point Position.initial in 685 t.finished <- true; 686 + (Event.Stream_end, span) 687 688 (** Get next event *) 689 let next t = ··· 697 let rec loop () = 698 match next t with 699 | None -> () 700 + | Some ev -> 701 + f ev; 702 + loop () 703 in 704 loop () 705 706 (** Fold over all events *) 707 let fold f init t = 708 let rec loop acc = 709 + match next t with None -> acc | Some ev -> loop (f acc ev) 710 in 711 loop init 712 713 (** Convert to list *) 714 + let to_list t = fold (fun acc ev -> ev :: acc) [] t |> List.rev
+11 -28
lib/position.ml
··· 6 (** Position tracking for source locations *) 7 8 type t = { 9 - index : int; (** Byte offset from start *) 10 - line : int; (** 1-indexed line number *) 11 column : int; (** 1-indexed column number *) 12 } 13 14 let initial = { index = 0; line = 1; column = 1 } 15 - 16 - let advance_byte t = 17 - { t with index = t.index + 1; column = t.column + 1 } 18 - 19 - let advance_line t = 20 - { index = t.index + 1; line = t.line + 1; column = 1 } 21 - 22 - let advance_char c t = 23 - if c = '\n' then advance_line t 24 - else advance_byte t 25 26 let advance_utf8 uchar t = 27 let len = Uchar.utf_8_byte_length uchar in 28 let code = Uchar.to_int uchar in 29 if code = 0x0A (* LF *) then 30 { index = t.index + len; line = t.line + 1; column = 1 } 31 - else 32 - { t with index = t.index + len; column = t.column + 1 } 33 34 - let advance_bytes n t = 35 - { t with index = t.index + n; column = t.column + n } 36 - 37 - let pp fmt t = 38 - Format.fprintf fmt "line %d, column %d" t.line t.column 39 - 40 - let to_string t = 41 - Format.asprintf "%a" pp t 42 - 43 - let compare a b = 44 - Int.compare a.index b.index 45 - 46 - let equal a b = 47 - a.index = b.index
··· 6 (** Position tracking for source locations *) 7 8 type t = { 9 + index : int; (** Byte offset from start *) 10 + line : int; (** 1-indexed line number *) 11 column : int; (** 1-indexed column number *) 12 } 13 14 let initial = { index = 0; line = 1; column = 1 } 15 + let advance_byte t = { t with index = t.index + 1; column = t.column + 1 } 16 + let advance_line t = { index = t.index + 1; line = t.line + 1; column = 1 } 17 + let advance_char c t = if c = '\n' then advance_line t else advance_byte t 18 19 let advance_utf8 uchar t = 20 let len = Uchar.utf_8_byte_length uchar in 21 let code = Uchar.to_int uchar in 22 if code = 0x0A (* LF *) then 23 { index = t.index + len; line = t.line + 1; column = 1 } 24 + else { t with index = t.index + len; column = t.column + 1 } 25 26 + let advance_bytes n t = { t with index = t.index + n; column = t.column + n } 27 + let pp fmt t = Format.fprintf fmt "line %d, column %d" t.line t.column 28 + let to_string t = Format.asprintf "%a" pp t 29 + let compare a b = Int.compare a.index b.index 30 + let equal a b = a.index = b.index
+31 -27
lib/quoting.ml
··· 5 6 (** YAML scalar quoting detection *) 7 8 - (** Check if a string value needs quoting in YAML output. 9 - Returns true if the string: 10 - Is empty 11 - Starts with an indicator character 12 - Is a reserved word (null, true, false, yes, no, etc.) ··· 17 else 18 let first = s.[0] in 19 (* Check first character for indicators *) 20 - if first = '-' || first = '?' || first = ':' || first = ',' || 21 - first = '[' || first = ']' || first = '{' || first = '}' || 22 - first = '#' || first = '&' || first = '*' || first = '!' || 23 - first = '|' || first = '>' || first = '\'' || first = '"' || 24 - first = '%' || first = '@' || first = '`' || first = ' ' then 25 - true 26 else 27 (* Check for reserved/special values *) 28 let lower = String.lowercase_ascii s in 29 - if lower = "null" || lower = "true" || lower = "false" || 30 - lower = "yes" || lower = "no" || lower = "on" || lower = "off" || 31 - lower = "~" || lower = ".inf" || lower = "-.inf" || lower = ".nan" then 32 - true 33 else 34 (* Check for problematic characters *) 35 try 36 - String.iter (fun c -> 37 - if c = ':' || c = '#' || c = '\n' || c = '\r' then 38 - raise Exit 39 - ) s; 40 (* Check if it looks like a number *) 41 - (try ignore (Float.of_string s); true with _ -> false) 42 with Exit -> true 43 44 - (** Check if a string requires double quotes (vs single quotes). 45 - Returns true if the string contains characters that need escape sequences. *) 46 let needs_double_quotes s = 47 try 48 - String.iter (fun c -> 49 - if c = '\n' || c = '\r' || c = '\t' || c = '\\' || 50 - c < ' ' || c = '"' then 51 - raise Exit 52 - ) s; 53 false 54 with Exit -> true 55 56 (** Choose the appropriate quoting style for a string value *) 57 let choose_style s = 58 match (needs_double_quotes s, needs_quoting s) with 59 - | (true, _) -> `Double_quoted 60 - | (_, true) -> `Single_quoted 61 | _ -> `Plain 62 -
··· 5 6 (** YAML scalar quoting detection *) 7 8 + (** Check if a string value needs quoting in YAML output. Returns true if the 9 + string: 10 - Is empty 11 - Starts with an indicator character 12 - Is a reserved word (null, true, false, yes, no, etc.) ··· 17 else 18 let first = s.[0] in 19 (* Check first character for indicators *) 20 + if 21 + first = '-' || first = '?' || first = ':' || first = ',' || first = '[' 22 + || first = ']' || first = '{' || first = '}' || first = '#' || first = '&' 23 + || first = '*' || first = '!' || first = '|' || first = '>' 24 + || first = '\'' || first = '"' || first = '%' || first = '@' 25 + || first = '`' || first = ' ' 26 + then true 27 else 28 (* Check for reserved/special values *) 29 let lower = String.lowercase_ascii s in 30 + if 31 + lower = "null" || lower = "true" || lower = "false" || lower = "yes" 32 + || lower = "no" || lower = "on" || lower = "off" || lower = "~" 33 + || lower = ".inf" || lower = "-.inf" || lower = ".nan" 34 + then true 35 else 36 (* Check for problematic characters *) 37 try 38 + String.iter 39 + (fun c -> 40 + if c = ':' || c = '#' || c = '\n' || c = '\r' then raise Exit) 41 + s; 42 (* Check if it looks like a number *) 43 + try 44 + ignore (Float.of_string s); 45 + true 46 + with _ -> false 47 with Exit -> true 48 49 + (** Check if a string requires double quotes (vs single quotes). Returns true if 50 + the string contains characters that need escape sequences. *) 51 let needs_double_quotes s = 52 try 53 + String.iter 54 + (fun c -> 55 + if c = '\n' || c = '\r' || c = '\t' || c = '\\' || c < ' ' || c = '"' 56 + then raise Exit) 57 + s; 58 false 59 with Exit -> true 60 61 (** Choose the appropriate quoting style for a string value *) 62 let choose_style s = 63 match (needs_double_quotes s, needs_quoting s) with 64 + | true, _ -> `Double_quoted 65 + | _, true -> `Single_quoted 66 | _ -> `Plain
+22 -24
lib/scalar.ml
··· 14 style : Scalar_style.t; 15 } 16 17 - let make 18 - ?(anchor : string option) 19 - ?(tag : string option) 20 - ?(plain_implicit = true) 21 - ?(quoted_implicit = false) 22 - ?(style = `Plain) 23 - value = 24 { anchor; tag; value; plain_implicit; quoted_implicit; style } 25 26 let value t = t.value ··· 29 let style t = t.style 30 let plain_implicit t = t.plain_implicit 31 let quoted_implicit t = t.quoted_implicit 32 - 33 let with_anchor anchor t = { t with anchor = Some anchor } 34 let with_tag tag t = { t with tag = Some tag } 35 let with_style style t = { t with style } ··· 41 Format.fprintf fmt ", style=%a)" Scalar_style.pp t.style 42 43 let equal a b = 44 - Option.equal String.equal a.anchor b.anchor && 45 - Option.equal String.equal a.tag b.tag && 46 - String.equal a.value b.value && 47 - a.plain_implicit = b.plain_implicit && 48 - a.quoted_implicit = b.quoted_implicit && 49 - Scalar_style.equal a.style b.style 50 51 let compare a b = 52 let c = Option.compare String.compare a.anchor b.anchor in 53 - if c <> 0 then c else 54 - let c = Option.compare String.compare a.tag b.tag in 55 - if c <> 0 then c else 56 - let c = String.compare a.value b.value in 57 - if c <> 0 then c else 58 - let c = Bool.compare a.plain_implicit b.plain_implicit in 59 - if c <> 0 then c else 60 - let c = Bool.compare a.quoted_implicit b.quoted_implicit in 61 - if c <> 0 then c else 62 - Scalar_style.compare a.style b.style
··· 14 style : Scalar_style.t; 15 } 16 17 + let make ?(anchor : string option) ?(tag : string option) 18 + ?(plain_implicit = true) ?(quoted_implicit = false) ?(style = `Plain) value 19 + = 20 { anchor; tag; value; plain_implicit; quoted_implicit; style } 21 22 let value t = t.value ··· 25 let style t = t.style 26 let plain_implicit t = t.plain_implicit 27 let quoted_implicit t = t.quoted_implicit 28 let with_anchor anchor t = { t with anchor = Some anchor } 29 let with_tag tag t = { t with tag = Some tag } 30 let with_style style t = { t with style } ··· 36 Format.fprintf fmt ", style=%a)" Scalar_style.pp t.style 37 38 let equal a b = 39 + Option.equal String.equal a.anchor b.anchor 40 + && Option.equal String.equal a.tag b.tag 41 + && String.equal a.value b.value 42 + && a.plain_implicit = b.plain_implicit 43 + && a.quoted_implicit = b.quoted_implicit 44 + && Scalar_style.equal a.style b.style 45 46 let compare a b = 47 let c = Option.compare String.compare a.anchor b.anchor in 48 + if c <> 0 then c 49 + else 50 + let c = Option.compare String.compare a.tag b.tag in 51 + if c <> 0 then c 52 + else 53 + let c = String.compare a.value b.value in 54 + if c <> 0 then c 55 + else 56 + let c = Bool.compare a.plain_implicit b.plain_implicit in 57 + if c <> 0 then c 58 + else 59 + let c = Bool.compare a.quoted_implicit b.quoted_implicit in 60 + if c <> 0 then c else Scalar_style.compare a.style b.style
+8 -11
lib/scalar_style.ml
··· 5 6 (** Scalar formatting styles *) 7 8 - type t = [ 9 - | `Any (** Let emitter choose *) 10 - | `Plain (** Unquoted: foo *) 11 - | `Single_quoted (** 'foo' *) 12 - | `Double_quoted (** "foo" *) 13 - | `Literal (** | block *) 14 - | `Folded (** > block *) 15 - ] 16 17 let to_string = function 18 | `Any -> "any" ··· 22 | `Literal -> "literal" 23 | `Folded -> "folded" 24 25 - let pp fmt t = 26 - Format.pp_print_string fmt (to_string t) 27 - 28 let equal a b = a = b 29 30 let compare a b =
··· 5 6 (** Scalar formatting styles *) 7 8 + type t = 9 + [ `Any (** Let emitter choose *) 10 + | `Plain (** Unquoted: foo *) 11 + | `Single_quoted (** 'foo' *) 12 + | `Double_quoted (** "foo" *) 13 + | `Literal (** | block *) 14 + | `Folded (** > block *) ] 15 16 let to_string = function 17 | `Any -> "any" ··· 21 | `Literal -> "literal" 22 | `Folded -> "folded" 23 24 + let pp fmt t = Format.pp_print_string fmt (to_string t) 25 let equal a b = a = b 26 27 let compare a b =
+536 -399
lib/scanner.ml
··· 5 6 (** YAML tokenizer/scanner with lookahead for ambiguity resolution *) 7 8 - (** Simple key tracking for mapping key disambiguation *) 9 type simple_key = { 10 sk_possible : bool; 11 sk_required : bool; 12 sk_token_number : int; 13 sk_position : Position.t; 14 } 15 16 - (** Indent level tracking *) 17 type indent = { 18 indent : int; 19 needs_block_end : bool; 20 } 21 22 type t = { 23 input : Input.t; ··· 27 mutable stream_started : bool; 28 mutable stream_ended : bool; 29 mutable indent_stack : indent list; (** Stack of indentation levels *) 30 - mutable flow_level : int; (** Nesting depth in \[\] or \{\} *) 31 - mutable flow_indent : int; (** Column where outermost flow collection started *) 32 - mutable simple_keys : simple_key option list; (** Per flow-level simple key tracking *) 33 mutable allow_simple_key : bool; 34 - mutable leading_whitespace : bool; (** True when at start of line (only whitespace seen) *) 35 - mutable document_has_content : bool; (** True if we've emitted content tokens in current document *) 36 - mutable adjacent_value_allowed_at : Position.t option; (** Position where adjacent : is allowed *) 37 - mutable flow_mapping_stack : bool list; (** Stack of whether each flow level is a mapping *) 38 } 39 40 let create input = ··· 48 indent_stack = []; 49 flow_level = 0; 50 flow_indent = 0; 51 - simple_keys = [None]; (* One entry for the base level *) 52 allow_simple_key = true; 53 - leading_whitespace = true; (* Start at beginning of stream *) 54 document_has_content = false; 55 adjacent_value_allowed_at = None; 56 flow_mapping_stack = []; ··· 59 let of_string s = create (Input.of_string s) 60 let of_input = create 61 let of_reader r = create (Input.of_reader r) 62 - 63 let position t = Input.position t.input 64 65 (** Add a token to the queue *) ··· 72 73 (** Get current indent level *) 74 let current_indent t = 75 - match t.indent_stack with 76 - | [] -> -1 77 - | { indent; _ } :: _ -> indent 78 79 - (** Skip whitespace to end of line, checking for valid comments. 80 - Returns true if any whitespace (including tabs) was found before a comment. *) 81 let skip_whitespace_and_comment t = 82 let has_whitespace = ref false in 83 (* Skip blanks (spaces and tabs) *) ··· 98 Error.raise_at (Input.mark t.input) Invalid_comment 99 end; 100 (* Skip to end of line *) 101 - while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do 102 ignore (Input.next t.input) 103 done 104 end ··· 109 let found_space = ref false in 110 while Input.next_is_blank t.input do 111 (match Input.peek t.input with 112 - | Some '\t' -> found_tab := true 113 - | Some ' ' -> found_space := true 114 - | _ -> ()); 115 ignore (Input.next t.input) 116 done; 117 (!found_tab, !found_space) ··· 120 let rec skip_to_next_token t = 121 (* Check for tabs used as indentation in block context *) 122 (match Input.peek t.input with 123 - | Some '\t' when t.flow_level = 0 && t.leading_whitespace && 124 - (column t - 1) < current_indent t -> 125 - (* Tab found in indentation zone - this is invalid *) 126 - (* Skip to end of line to check if line has content *) 127 - let start_pos = Input.mark t.input in 128 - while Input.next_is_blank t.input do 129 - ignore (Input.next t.input) 130 - done; 131 - (* If we have content on this line with a tab, raise error *) 132 - if not (Input.next_is_break t.input) && not (Input.is_eof t.input) then 133 - Error.raise_at start_pos Tab_in_indentation 134 - | _ -> ()); 135 136 (* Skip blanks and validate comments *) 137 skip_whitespace_and_comment t; ··· 158 ignore (Input.next t.input) 159 done; 160 (* If only tabs were used (no spaces) and column < flow_indent, error *) 161 - if not (Input.next_is_break t.input) && not (Input.is_eof t.input) && 162 - column t < t.flow_indent then 163 - Error.raise_at start_mark Invalid_flow_indentation 164 end; 165 skip_to_next_token t 166 - end else begin 167 ignore (Input.next t.input); 168 skip_to_next_token t 169 end ··· 174 if t.flow_level = 0 && col > current_indent t then begin 175 t.indent_stack <- { indent = col; needs_block_end = true } :: t.indent_stack; 176 true 177 - end else 178 - false 179 180 (** Unroll indentation to given column *) 181 let unroll_indent t col = 182 - while t.flow_level = 0 && 183 - match t.indent_stack with 184 - | { indent; needs_block_end = true; _ } :: _ when indent > col -> true 185 - | _ -> false 186 do 187 match t.indent_stack with 188 | { indent = _; needs_block_end = true; _ } :: rest -> ··· 199 (* A simple key is required only if we're in a block context, 200 at the current indentation level, AND the current indent needs a block end. 201 This matches saphyr's logic and prevents false positives for values. *) 202 - let required = t.flow_level = 0 && 203 - match t.indent_stack with 204 - | { indent; needs_block_end = true; _ } :: _ -> 205 - indent = column t 206 - | _ -> false 207 in 208 - let sk = { 209 - sk_possible = true; 210 - sk_required = required; 211 - sk_token_number = t.token_number; 212 - sk_position = Input.position t.input; 213 - } in 214 (* Remove any existing simple key at current level *) 215 - t.simple_keys <- ( 216 - match t.simple_keys with 217 | _ :: rest -> Some sk :: rest 218 - | [] -> [Some sk] 219 - ) 220 end 221 222 (** Remove simple key at current level *) ··· 229 230 (** Stale simple keys that span too many tokens *) 231 let stale_simple_keys t = 232 - t.simple_keys <- List.map (fun sk_opt -> 233 - match sk_opt with 234 - | Some sk when sk.sk_possible && 235 - (Input.position t.input).line > sk.sk_position.line && 236 - t.flow_level = 0 -> 237 - if sk.sk_required then 238 - Error.raise_at sk.sk_position Expected_key; 239 - None 240 - | _ -> sk_opt 241 - ) t.simple_keys 242 243 (** Read anchor or alias name *) 244 let scan_anchor_alias t = ··· 251 This matches the saphyr implementation: is_yaml_non_space && !is_flow *) 252 while 253 match Input.peek t.input with 254 - | Some c when not (Input.is_whitespace c) && 255 - not (Input.is_flow_indicator c) && 256 - c <> '\x00' -> 257 Buffer.add_char buf c; 258 ignore (Input.next t.input); 259 true 260 | _ -> false 261 - do () done; 262 let name = Buffer.contents buf in 263 if String.length name = 0 then 264 Error.raise_at start (Invalid_anchor "empty anchor name"); ··· 270 let buf = Buffer.create 16 in 271 (* Expect ! *) 272 (match Input.peek t.input with 273 - | Some '!' -> 274 - Buffer.add_char buf '!'; 275 - ignore (Input.next t.input) 276 - | _ -> Error.raise_at start (Invalid_tag "expected '!'")); 277 (* Read word chars *) 278 while 279 match Input.peek t.input with ··· 282 ignore (Input.next t.input); 283 true 284 | _ -> false 285 - do () done; 286 (* Check for secondary ! *) 287 (match Input.peek t.input with 288 - | Some '!' -> 289 - Buffer.add_char buf '!'; 290 - ignore (Input.next t.input) 291 - | _ -> ()); 292 Buffer.contents buf 293 294 (** Scan tag suffix (after handle) *) ··· 298 in 299 let hex_val c = 300 match c with 301 - | '0'..'9' -> Char.code c - Char.code '0' 302 - | 'A'..'F' -> Char.code c - Char.code 'A' + 10 303 - | 'a'..'f' -> Char.code c - Char.code 'a' + 10 304 | _ -> 0 305 in 306 let buf = Buffer.create 32 in 307 while 308 match Input.peek t.input with 309 - | Some '%' -> 310 (* Percent-encoded character *) 311 ignore (Input.next t.input); 312 - (match Input.peek t.input, Input.peek_nth t.input 1 with 313 - | Some c1, Some c2 when is_hex_digit c1 && is_hex_digit c2 -> 314 - ignore (Input.next t.input); 315 - ignore (Input.next t.input); 316 - let code = (hex_val c1) * 16 + (hex_val c2) in 317 - Buffer.add_char buf (Char.chr code); 318 - true 319 - | _ -> 320 - (* Invalid percent encoding - keep the % *) 321 - Buffer.add_char buf '%'; 322 - true) 323 - | Some c when not (Input.is_whitespace c) && 324 - not (Input.is_flow_indicator c) -> 325 Buffer.add_char buf c; 326 ignore (Input.next t.input); 327 true 328 | _ -> false 329 - do () done; 330 Buffer.contents buf 331 332 (** Scan a tag *) 333 let scan_tag t = 334 let start = Input.mark t.input in 335 - ignore (Input.next t.input); (* consume ! *) 336 let handle, suffix = 337 match Input.peek t.input with 338 | Some '<' -> ··· 346 Buffer.add_char buf c; 347 ignore (Input.next t.input); 348 true 349 - | None -> Error.raise_at (Input.mark t.input) (Invalid_tag "unclosed verbatim tag") 350 - do () done; 351 - ignore (Input.next t.input); (* consume > *) 352 ("", Buffer.contents buf) 353 | Some c when Input.is_whitespace c || Input.is_flow_indicator c -> 354 (* Non-specific tag: ! *) 355 ("!", "") 356 | Some '!' -> 357 (* Secondary handle: !! *) 358 - ignore (Input.next t.input); (* consume second ! *) 359 let suffix = scan_tag_suffix t in 360 ("!!", suffix) 361 - | _ -> 362 (* Primary handle or just suffix: !foo or !e!foo *) 363 (* Read alphanumeric characters *) 364 let buf = Buffer.create 16 in ··· 369 ignore (Input.next t.input); 370 true 371 | _ -> false 372 - do () done; 373 (* Check if next character is ! - if so, this is a named handle *) 374 - (match Input.peek t.input with 375 - | Some '!' -> 376 - (* Named handle like !e! *) 377 - ignore (Input.next t.input); 378 - let handle_name = Buffer.contents buf in 379 - let suffix = scan_tag_suffix t in 380 - ("!" ^ handle_name ^ "!", suffix) 381 - | _ -> 382 - (* Just ! followed by suffix *) 383 - ("!", Buffer.contents buf ^ scan_tag_suffix t)) 384 in 385 (* Validate that tag is followed by whitespace, break, or (in flow) flow indicator *) 386 (match Input.peek t.input with 387 - | None -> () (* EOF is ok *) 388 - | Some c when Input.is_whitespace c || Input.is_break c -> () 389 - | Some c when t.flow_level > 0 && Input.is_flow_indicator c -> () 390 - | _ -> Error.raise_at start (Invalid_tag "expected whitespace or line break after tag")); 391 let span = Span.make ~start ~stop:(Input.mark t.input) in 392 (handle, suffix, span) 393 394 (** Scan single-quoted scalar *) 395 let scan_single_quoted t = 396 let start = Input.mark t.input in 397 - ignore (Input.next t.input); (* consume opening single-quote *) 398 let buf = Buffer.create 64 in 399 - let whitespace = Buffer.create 16 in (* Track trailing whitespace *) 400 401 let flush_whitespace () = 402 if Buffer.length whitespace > 0 then begin ··· 408 let rec loop () = 409 match Input.peek t.input with 410 | None -> Error.raise_at start Unclosed_single_quote 411 - | Some '\'' -> 412 ignore (Input.next t.input); 413 (* Check for escaped quote ('') *) 414 - (match Input.peek t.input with 415 - | Some '\'' -> 416 - flush_whitespace (); 417 - Buffer.add_char buf '\''; 418 - ignore (Input.next t.input); 419 - loop () 420 - | _ -> 421 - (* End of string - flush any trailing whitespace *) 422 - flush_whitespace ()) 423 | Some ' ' | Some '\t' -> 424 (* Track whitespace - don't add to buf yet *) 425 Buffer.add_char whitespace (Option.get (Input.peek t.input)); ··· 439 (* Check indentation: continuation must be > block indent (QB6E, DK95) *) 440 let col = column t in 441 let indent = current_indent t in 442 - if not (Input.is_eof t.input) && not (Input.next_is_break t.input) && col <= indent && indent >= 0 then 443 - Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar"); 444 (* Count empty lines (consecutive line breaks) *) 445 let empty_lines = ref 0 in 446 while Input.next_is_break t.input do ··· 454 (* Check indentation after each empty line too *) 455 let col = column t in 456 let indent = current_indent t in 457 - if not (Input.is_eof t.input) && not (Input.next_is_break t.input) && col <= indent && indent >= 0 then 458 - Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar") 459 done; 460 (* Apply folding rules *) 461 if !empty_lines > 0 then begin ··· 463 for _ = 1 to !empty_lines do 464 Buffer.add_char buf '\n' 465 done 466 - end else 467 (* Single break: fold to space (even at start of string) *) 468 Buffer.add_char buf ' '; 469 loop () ··· 486 | Some c when Input.is_hex c -> 487 Buffer.add_char buf c; 488 ignore (Input.next t.input) 489 - | _ -> 490 - Error.raise_at start (Invalid_hex_escape (Buffer.contents buf)) 491 done; 492 let code = int_of_string ("0x" ^ Buffer.contents buf) in 493 - if code <= 0x7F then 494 - String.make 1 (Char.chr code) 495 else if code <= 0x7FF then 496 let b1 = 0xC0 lor (code lsr 6) in 497 let b2 = 0x80 lor (code land 0x3F) in ··· 500 let b1 = 0xE0 lor (code lsr 12) in 501 let b2 = 0x80 lor ((code lsr 6) land 0x3F) in 502 let b3 = 0x80 lor (code land 0x3F) in 503 - String.init 3 (fun i -> Char.chr (match i with 0 -> b1 | 1 -> b2 | _ -> b3)) 504 else 505 let b1 = 0xF0 lor (code lsr 18) in 506 let b2 = 0x80 lor ((code lsr 12) land 0x3F) in 507 let b3 = 0x80 lor ((code lsr 6) land 0x3F) in 508 let b4 = 0x80 lor (code land 0x3F) in 509 - String.init 4 (fun i -> Char.chr (match i with 0 -> b1 | 1 -> b2 | 2 -> b3 | _ -> b4)) 510 511 (** Scan double-quoted scalar *) 512 let scan_double_quoted t = 513 let start = Input.mark t.input in 514 - ignore (Input.next t.input); (* consume opening double-quote *) 515 let buf = Buffer.create 64 in 516 - let whitespace = Buffer.create 16 in (* Track pending whitespace *) 517 518 let flush_whitespace () = 519 if Buffer.length whitespace > 0 then begin ··· 529 (* Flush trailing whitespace before closing quote to preserve it *) 530 flush_whitespace (); 531 ignore (Input.next t.input) 532 - | Some ' ' | Some '\t' as c_opt -> 533 (* Track whitespace - don't add to buf yet *) 534 let c = match c_opt with Some c -> c | None -> assert false in 535 Buffer.add_char whitespace c; ··· 537 loop () 538 | Some '\\' -> 539 (* Escape sequence - this is non-whitespace content *) 540 - flush_whitespace (); (* Commit any pending whitespace *) 541 ignore (Input.next t.input); 542 (match Input.peek t.input with 543 - | None -> Error.raise_at start (Invalid_escape_sequence "\\<EOF>") 544 - | Some '0' -> Buffer.add_char buf '\x00'; ignore (Input.next t.input) 545 - | Some 'a' -> Buffer.add_char buf '\x07'; ignore (Input.next t.input) 546 - | Some 'b' -> Buffer.add_char buf '\x08'; ignore (Input.next t.input) 547 - | Some 't' | Some '\t' -> Buffer.add_char buf '\t'; ignore (Input.next t.input) 548 - | Some 'n' -> Buffer.add_char buf '\n'; ignore (Input.next t.input) 549 - | Some 'v' -> Buffer.add_char buf '\x0B'; ignore (Input.next t.input) 550 - | Some 'f' -> Buffer.add_char buf '\x0C'; ignore (Input.next t.input) 551 - | Some 'r' -> Buffer.add_char buf '\r'; ignore (Input.next t.input) 552 - | Some 'e' -> Buffer.add_char buf '\x1B'; ignore (Input.next t.input) 553 - | Some ' ' -> Buffer.add_char buf ' '; ignore (Input.next t.input) 554 - | Some '"' -> Buffer.add_char buf '"'; ignore (Input.next t.input) 555 - | Some '/' -> Buffer.add_char buf '/'; ignore (Input.next t.input) 556 - | Some '\\' -> Buffer.add_char buf '\\'; ignore (Input.next t.input) 557 - | Some 'N' -> Buffer.add_string buf "\xC2\x85"; ignore (Input.next t.input) (* NEL *) 558 - | Some '_' -> Buffer.add_string buf "\xC2\xA0"; ignore (Input.next t.input) (* NBSP *) 559 - | Some 'L' -> Buffer.add_string buf "\xE2\x80\xA8"; ignore (Input.next t.input) (* LS *) 560 - | Some 'P' -> Buffer.add_string buf "\xE2\x80\xA9"; ignore (Input.next t.input) (* PS *) 561 - | Some 'x' -> 562 - ignore (Input.next t.input); 563 - Buffer.add_string buf (decode_hex t 2) 564 - | Some 'u' -> 565 - ignore (Input.next t.input); 566 - Buffer.add_string buf (decode_hex t 4) 567 - | Some 'U' -> 568 - ignore (Input.next t.input); 569 - Buffer.add_string buf (decode_hex t 8) 570 - | Some '\n' | Some '\r' -> 571 - (* Line continuation escape *) 572 - Input.consume_break t.input; 573 - while Input.next_is_blank t.input do 574 - ignore (Input.next t.input) 575 - done 576 - | Some c -> 577 - Error.raise_at (Input.mark t.input) 578 - (Invalid_escape_sequence (Printf.sprintf "\\%c" c))); 579 loop () 580 | Some '\n' | Some '\r' -> 581 (* Line break: discard any pending trailing whitespace *) ··· 596 if Input.next_is_break t.input then begin 597 Input.consume_break t.input; 598 incr empty_lines; 599 - started_with_tab := false (* Reset for next line *) 600 - end else 601 - continue := false 602 done; 603 (* Check for document boundary - this terminates the quoted string *) 604 if Input.at_document_boundary t.input then ··· 609 let indent = current_indent t in 610 let start_col = start.column in 611 (* DK95/01: if continuation started with tabs and column < start column, error *) 612 - if not (Input.is_eof t.input) && !started_with_tab && col < start_col then 613 - Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar"); 614 - if not (Input.is_eof t.input) && col <= indent && indent >= 0 then 615 - Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar"); 616 (* Per YAML spec: single break = space, break + empty lines = newlines *) 617 if !empty_lines > 0 then begin 618 (* Empty lines: output N newlines where N = number of empty lines *) 619 for _ = 1 to !empty_lines do 620 Buffer.add_char buf '\n' 621 done 622 - end else 623 (* Single break folds to space *) 624 Buffer.add_char buf ' '; 625 loop () 626 | Some c -> 627 (* Non-whitespace character *) 628 - flush_whitespace (); (* Commit any pending whitespace *) 629 Buffer.add_char buf c; 630 ignore (Input.next t.input); 631 loop () ··· 637 (** Check if character can appear in plain scalar at this position *) 638 let can_continue_plain t c ~in_flow = 639 match c with 640 - | ':' -> 641 (* : is OK if not followed by whitespace or flow indicator *) 642 - (match Input.peek_nth t.input 1 with 643 - | None -> true 644 - | Some c2 when Input.is_whitespace c2 -> false 645 - | Some c2 when in_flow && Input.is_flow_indicator c2 -> false 646 - | _ -> true) 647 - | '#' -> 648 (* # is a comment indicator only if preceded by whitespace *) 649 (* Check the previous character to determine if this is a comment *) 650 - (match Input.peek_back t.input with 651 - | None -> true (* At start - can't be comment indicator, allow it *) 652 - | Some c when Input.is_whitespace c -> false (* Preceded by whitespace - comment *) 653 - | Some c when Input.is_break c -> false (* At start of line - comment *) 654 - | _ -> true) (* Not preceded by whitespace - part of scalar *) 655 | c when in_flow && Input.is_flow_indicator c -> false 656 | _ when Input.is_break c -> false 657 | _ -> true ··· 663 let indent = current_indent t in 664 (* In flow context, scalars must be indented more than the current block indent. 665 This ensures that content at block indent or less ends the flow context. *) 666 - if in_flow && (column t - 1) < indent then 667 Error.raise_at start Invalid_flow_indentation; 668 let buf = Buffer.create 64 in 669 let spaces = Buffer.create 16 in 670 - let whitespace = Buffer.create 16 in (* Track whitespace within a line *) 671 let leading_blanks = ref false in 672 673 let rec scan_line () = ··· 684 if Buffer.length spaces > 0 then begin 685 if !leading_blanks then begin 686 (* Fold line break *) 687 - if Buffer.contents spaces = "\n" then 688 - Buffer.add_char buf ' ' 689 else begin 690 (* Multiple breaks - preserve all but first *) 691 let s = Buffer.contents spaces in 692 Buffer.add_substring buf s 1 (String.length s - 1) 693 end 694 - end else 695 - Buffer.add_buffer buf spaces; 696 Buffer.clear spaces 697 end; 698 (* Add any pending whitespace from within the line *) ··· 719 if !leading_blanks then begin 720 (* We already had a break - this is an additional break (empty line) *) 721 Buffer.add_char spaces '\n' 722 - end else begin 723 (* First line break *) 724 Buffer.clear spaces; 725 Buffer.add_char spaces '\n'; ··· 739 (* However, allow empty lines (line breaks) to continue even if dedented *) 740 if Input.next_is_break t.input then 741 scan_lines () (* Empty line - continue *) 742 - else if not in_flow && col <= indent then 743 - () (* Stop - dedented or at parent level in block context *) 744 - else if Input.at_document_boundary t.input then 745 - () (* Stop - document boundary *) 746 - else 747 - scan_lines () 748 end 749 in 750 ··· 755 let len = String.length value in 756 let rec find_end i = 757 if i < 0 then 0 758 - else match value.[i] with 759 - | ' ' | '\t' -> find_end (i - 1) 760 - | _ -> i + 1 761 in 762 let end_pos = find_end (len - 1) in 763 String.sub value 0 end_pos ··· 769 (** Scan block scalar (literal | or folded >) *) 770 let scan_block_scalar t literal = 771 let start = Input.mark t.input in 772 - ignore (Input.next t.input); (* consume | or > *) 773 774 (* Parse header: optional indentation indicator and chomping *) 775 let explicit_indent = ref None in ··· 777 778 (* First character of header *) 779 (match Input.peek t.input with 780 - | Some c when Input.is_digit c && c <> '0' -> 781 - explicit_indent := Some (Char.code c - Char.code '0'); 782 - ignore (Input.next t.input) 783 - | Some '-' -> chomping := Chomping.Strip; ignore (Input.next t.input) 784 - | Some '+' -> chomping := Chomping.Keep; ignore (Input.next t.input) 785 - | _ -> ()); 786 787 (* Second character of header *) 788 (match Input.peek t.input with 789 - | Some c when Input.is_digit c && c <> '0' && !explicit_indent = None -> 790 - explicit_indent := Some (Char.code c - Char.code '0'); 791 - ignore (Input.next t.input) 792 - | Some '-' when !chomping = Chomping.Clip -> 793 - chomping := Chomping.Strip; ignore (Input.next t.input) 794 - | Some '+' when !chomping = Chomping.Clip -> 795 - chomping := Chomping.Keep; ignore (Input.next t.input) 796 - | _ -> ()); 797 798 (* Skip whitespace and optional comment *) 799 skip_whitespace_and_comment t; 800 801 (* Consume line break *) 802 - if Input.next_is_break t.input then 803 - Input.consume_break t.input 804 else if not (Input.is_eof t.input) then 805 Error.raise_at (Input.mark t.input) 806 (Invalid_block_scalar_header "expected newline after header"); ··· 808 let base_indent = current_indent t in 809 (* base_indent is the indent level from the stack, -1 if empty. 810 It's used directly for comparisons in implicit indent case. *) 811 - let content_indent = ref ( 812 - match !explicit_indent with 813 - | Some n -> 814 - (* Explicit indent: base_indent is 1-indexed column, convert to 0-indexed. 815 content_indent = (base_indent - 1) + n, but at least n for document level. *) 816 - let base_level = max 0 (base_indent - 1) in 817 - base_level + n 818 - | None -> 0 (* Will be determined by first non-empty line *) 819 - ) in 820 821 let buf = Buffer.create 256 in 822 let trailing_breaks = Buffer.create 16 in 823 - let leading_blank = ref false in (* Was the previous line "more indented"? *) 824 - let max_empty_line_indent = ref 0 in (* Track max indent of empty lines before first content *) 825 826 (* Skip to content indentation, skipping empty lines. 827 Returns the number of spaces actually skipped (important for detecting dedentation). *) ··· 829 if !content_indent > 0 then begin 830 (* Explicit indent - skip up to content_indent spaces *) 831 let spaces_skipped = ref 0 in 832 - while !spaces_skipped < !content_indent && Input.next_is (( = ) ' ') t.input do 833 incr spaces_skipped; 834 ignore (Input.next t.input) 835 done; ··· 840 Buffer.add_char trailing_breaks '\n'; 841 Input.consume_break t.input; 842 skip_to_content_indent () 843 - end else if !spaces_skipped < !content_indent then begin 844 (* Line starts with fewer spaces than content_indent - dedented *) 845 !spaces_skipped 846 - end else if Input.next_is_blank t.input then begin 847 (* Line has spaces/tabs beyond content_indent - could be whitespace content or empty line. 848 For literal scalars, whitespace-only lines ARE content (not empty). 849 For folded scalars, whitespace-only lines that are "more indented" are preserved. *) ··· 853 else begin 854 (* Folded: check if rest is only blanks *) 855 let idx = ref 0 in 856 - while match Input.peek_nth t.input !idx with 857 - | Some c when Input.is_blank c -> incr idx; true 858 - | _ -> false 859 - do () done; 860 - match Input.peek_nth t.input (!idx) with 861 | None | Some '\n' | Some '\r' -> 862 (* Empty/whitespace-only line in folded - skip spaces *) 863 while Input.next_is_blank t.input do ··· 870 (* Has non-whitespace content *) 871 !content_indent 872 end 873 - end else 874 - !content_indent 875 - end else begin 876 (* Implicit indent - skip empty lines without consuming spaces. 877 Note: Only SPACES count as indentation. Tabs are content, not indentation. 878 So we only check for spaces when determining if a line is "empty". *) ··· 880 Buffer.add_char trailing_breaks '\n'; 881 Input.consume_break t.input; 882 skip_to_content_indent () 883 - end else if Input.next_is (( = ) ' ') t.input then begin 884 (* Check if line is empty (only spaces before break) *) 885 let idx = ref 0 in 886 - while match Input.peek_nth t.input !idx with 887 - | Some ' ' -> incr idx; true 888 - | _ -> false 889 - do () done; 890 - match Input.peek_nth t.input (!idx) with 891 | None | Some '\n' | Some '\r' -> 892 (* Line has only spaces - empty line *) 893 (* Track max indent of empty lines for later validation *) 894 - if !idx > !max_empty_line_indent then 895 - max_empty_line_indent := !idx; 896 while Input.next_is (( = ) ' ') t.input do 897 ignore (Input.next t.input) 898 done; ··· 902 | _ -> 903 (* Has content (including tabs which are content, not indentation) *) 904 0 905 - end else if Input.next_is (( = ) '\t') t.input then begin 906 (* Tab at start of line in implicit indent mode - this is an error (Y79Y) 907 because tabs cannot be used as indentation in YAML *) 908 Error.raise_at (Input.mark t.input) Tab_in_indentation 909 - end else 910 (* Not at break or space - other content character *) 911 0 912 end ··· 938 let should_process = 939 if !content_indent = 0 then begin 940 (* For implicit indent, content must be more indented than base_level. *) 941 - if line_indent <= base_level then 942 - false (* No content - first line not indented enough *) 943 else begin 944 (* Validate: first content line must be indented at least as much as 945 the maximum indent seen on empty lines before it (5LLU, S98Z, W9L4) *) 946 - if line_indent < !max_empty_line_indent && line_indent > base_level then 947 Error.raise_at (Input.mark t.input) 948 - (Invalid_block_scalar_header "wrongly indented line in block scalar"); 949 content_indent := line_indent; 950 true 951 end 952 - end else if line_indent < !content_indent then 953 - false (* Dedented - done with content *) 954 - else 955 - true 956 in 957 958 if should_process then begin ··· 960 For folded scalars, lines that start with any whitespace (space or tab) after the 961 content indentation are "more indented" and preserve breaks. 962 Note: we check Input.next_is_blank BEFORE reading content to see if content starts with whitespace. *) 963 - let trailing_blank = line_indent > !content_indent || Input.next_is_blank t.input in 964 965 (* Add trailing breaks to buffer *) 966 if Buffer.length buf > 0 then begin 967 if Buffer.length trailing_breaks > 0 then begin 968 - if literal then 969 - Buffer.add_buffer buf trailing_breaks 970 else begin 971 (* Folded scalar: fold only if both previous and current lines are not more-indented *) 972 - if not !leading_blank && not trailing_blank then begin 973 let breaks = Buffer.contents trailing_breaks in 974 - if String.length breaks = 1 then 975 - Buffer.add_char buf ' ' 976 - else 977 - Buffer.add_substring buf breaks 1 (String.length breaks - 1) 978 - end else begin 979 (* Preserve breaks for more-indented lines *) 980 Buffer.add_buffer buf trailing_breaks 981 end 982 end 983 - end else if not literal then 984 - Buffer.add_char buf ' ' 985 - end else 986 - Buffer.add_buffer buf trailing_breaks; 987 Buffer.clear trailing_breaks; 988 989 (* Add extra indentation for literal or more-indented folded lines *) 990 (* On the first line (when determining content_indent), we've already consumed all spaces, 991 so we should NOT add any back. On subsequent lines, we add only the spaces beyond content_indent. *) 992 - if not first_line && (literal || (!extra_spaces > 0 && not literal)) then begin 993 for _ = 1 to !extra_spaces do 994 Buffer.add_char buf ' ' 995 done 996 end; 997 998 (* Read line content *) 999 - while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do 1000 Buffer.add_char buf (Input.next_exn t.input) 1001 done; 1002 ··· 1023 | Chomping.Strip -> content 1024 | Chomping.Clip -> 1025 if String.length content > 0 then content ^ "\n" else content 1026 - | Chomping.Keep -> 1027 - content ^ Buffer.contents trailing_breaks 1028 in 1029 1030 let span = Span.make ~start ~stop:(Input.mark t.input) in ··· 1034 (** Scan directive (after %) *) 1035 let scan_directive t = 1036 let start = Input.mark t.input in 1037 - ignore (Input.next t.input); (* consume % *) 1038 1039 (* Read directive name *) 1040 let name_buf = Buffer.create 16 in ··· 1045 ignore (Input.next t.input); 1046 true 1047 | _ -> false 1048 - do () done; 1049 let name = Buffer.contents name_buf in 1050 1051 (* Skip blanks *) ··· 1060 let minor = ref 0 in 1061 (* Read major version *) 1062 while Input.next_is_digit t.input do 1063 - major := !major * 10 + (Char.code (Input.next_exn t.input) - Char.code '0') 1064 done; 1065 (* Expect . *) 1066 (match Input.peek t.input with 1067 - | Some '.' -> ignore (Input.next t.input) 1068 - | _ -> Error.raise_at (Input.mark t.input) (Invalid_yaml_version "expected '.'")); 1069 (* Read minor version *) 1070 while Input.next_is_digit t.input do 1071 - minor := !minor * 10 + (Char.code (Input.next_exn t.input) - Char.code '0') 1072 done; 1073 (* Validate: only whitespace and comments allowed before line break (MUS6) *) 1074 skip_whitespace_and_comment t; 1075 - if not (Input.next_is_break t.input) && not (Input.is_eof t.input) then 1076 - Error.raise_at (Input.mark t.input) (Invalid_directive "expected comment or line break after version"); 1077 let span = Span.make ~start ~stop:(Input.mark t.input) in 1078 - Token.Version_directive { major = !major; minor = !minor }, span 1079 - 1080 | "TAG" -> 1081 (* Tag directive: %TAG !foo! tag:example.com,2000: *) 1082 let handle = scan_tag_handle t in ··· 1093 ignore (Input.next t.input); 1094 true 1095 | _ -> false 1096 - do () done; 1097 let prefix = Buffer.contents prefix_buf in 1098 let span = Span.make ~start ~stop:(Input.mark t.input) in 1099 - Token.Tag_directive { handle; prefix }, span 1100 - 1101 | _ -> 1102 (* Reserved/Unknown directive - skip to end of line and ignore *) 1103 (* Per YAML spec, reserved directives should be ignored with a warning *) 1104 - while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do 1105 ignore (Input.next t.input) 1106 done; 1107 let span = Span.make ~start ~stop:(Input.mark t.input) in 1108 (* Return an empty tag directive token to indicate directive was processed but ignored *) 1109 - Token.Tag_directive { handle = ""; prefix = "" }, span 1110 1111 (** Fetch the next token(s) into the queue *) 1112 let rec fetch_next_token t = ··· 1120 (* We're about to process actual content, not leading whitespace *) 1121 t.leading_whitespace <- false; 1122 1123 - if Input.is_eof t.input then 1124 - fetch_stream_end t 1125 - else if Input.at_document_boundary t.input then 1126 - fetch_document_indicator t 1127 else begin 1128 match Input.peek t.input with 1129 | None -> fetch_stream_end t 1130 - | Some '%' when (Input.position t.input).column = 1 -> 1131 - fetch_directive t 1132 | Some '[' -> fetch_flow_collection_start t Token.Flow_sequence_start 1133 | Some '{' -> fetch_flow_collection_start t Token.Flow_mapping_start 1134 | Some ']' -> fetch_flow_collection_end t Token.Flow_sequence_end ··· 1136 | Some ',' -> fetch_flow_entry t 1137 | Some '-' when t.flow_level = 0 && check_block_entry t -> 1138 fetch_block_entry t 1139 - | Some '?' when check_key t -> 1140 - fetch_key t 1141 - | Some ':' when check_value t -> 1142 - fetch_value t 1143 | Some '*' -> fetch_alias t 1144 | Some '&' -> fetch_anchor t 1145 | Some '!' -> fetch_tag t ··· 1147 | Some '>' when t.flow_level = 0 -> fetch_block_scalar t false 1148 | Some '\'' -> fetch_single_quoted t 1149 | Some '"' -> fetch_double_quoted t 1150 - | Some '-' when can_start_plain t -> 1151 - fetch_plain_scalar t 1152 - | Some '?' when can_start_plain t -> 1153 - fetch_plain_scalar t 1154 - | Some ':' when can_start_plain t -> 1155 - fetch_plain_scalar t 1156 - | Some c when can_start_plain_char c t -> 1157 - fetch_plain_scalar t 1158 - | Some c -> 1159 - Error.raise_at (Input.mark t.input) (Unexpected_character c) 1160 end 1161 1162 and fetch_stream_end t = ··· 1177 let indicator = Input.peek_string t.input 3 in 1178 Input.skip t.input 3; 1179 let span = Span.make ~start ~stop:(Input.mark t.input) in 1180 - let token = if indicator = "---" then Token.Document_start else Token.Document_end in 1181 (* Reset document content flag after document end marker *) 1182 if indicator = "..." then begin 1183 t.document_has_content <- false; 1184 (* After document end marker, skip whitespace and check for end of line or comment *) 1185 - while Input.next_is_blank t.input do ignore (Input.next t.input) done; 1186 - (match Input.peek t.input with 1187 - | None -> () (* EOF is ok *) 1188 - | Some c when Input.is_break c -> () 1189 - | Some '#' -> () (* Comment is ok *) 1190 - | _ -> Error.raise_at start (Invalid_directive "content not allowed after document end marker on same line")) 1191 end; 1192 emit t span token 1193 ··· 1198 If we've emitted content in the current document, we need a document end marker first *) 1199 if t.document_has_content then 1200 Error.raise_at (Input.mark t.input) 1201 - (Unexpected_token "directives must be separated from document content by document end marker (...)"); 1202 unroll_indent t (-1); 1203 remove_simple_key t; 1204 t.allow_simple_key <- false; ··· 1208 and fetch_flow_collection_start t token_type = 1209 save_simple_key t; 1210 (* Record indent of outermost flow collection *) 1211 - if t.flow_level = 0 then 1212 - t.flow_indent <- column t; 1213 t.flow_level <- t.flow_level + 1; 1214 (* Track whether this is a mapping or sequence *) 1215 - let is_mapping = (token_type = Token.Flow_mapping_start) in 1216 t.flow_mapping_stack <- is_mapping :: t.flow_mapping_stack; 1217 t.allow_simple_key <- true; 1218 t.simple_keys <- None :: t.simple_keys; ··· 1225 and fetch_flow_collection_end t token_type = 1226 remove_simple_key t; 1227 t.flow_level <- t.flow_level - 1; 1228 - t.flow_mapping_stack <- (match t.flow_mapping_stack with _ :: rest -> rest | [] -> []); 1229 t.simple_keys <- (match t.simple_keys with _ :: rest -> rest | [] -> []); 1230 t.allow_simple_key <- false; 1231 let start = Input.mark t.input in ··· 1270 ignore (Input.next t.input); 1271 1272 (* Check for tabs after - : pattern like -\t- is invalid *) 1273 - let (found_tabs, _found_spaces) = skip_blanks_check_tabs t in 1274 if found_tabs then begin 1275 (* If we found tabs and next char is - followed by whitespace, error *) 1276 match Input.peek t.input with 1277 - | Some '-' -> 1278 - (match Input.peek_nth t.input 1 with 1279 - | None -> Error.raise_at start Tab_in_indentation 1280 - | Some c when Input.is_whitespace c -> 1281 - Error.raise_at start Tab_in_indentation 1282 - | Some _ -> ()) 1283 | _ -> () 1284 end; 1285 ··· 1289 and check_key t = 1290 (* ? followed by whitespace or flow indicator in both block and flow *) 1291 match Input.peek_nth t.input 1 with 1292 - | None -> true 1293 - | Some c -> 1294 - Input.is_whitespace c || 1295 - (t.flow_level > 0 && Input.is_flow_indicator c) 1296 1297 and fetch_key t = 1298 if t.flow_level = 0 then begin ··· 1311 ignore (Input.next t.input); 1312 1313 (* Check for tabs after ? : pattern like ?\t- or ?\tkey is invalid *) 1314 - let (found_tabs, _found_spaces) = skip_blanks_check_tabs t in 1315 if found_tabs && t.flow_level = 0 then begin 1316 (* In block context, tabs after ? are not allowed *) 1317 Error.raise_at start Tab_in_indentation ··· 1324 (* : followed by whitespace in block, or whitespace/flow indicator in flow, or adjacent value *) 1325 match Input.peek_nth t.input 1 with 1326 | None -> true 1327 - | Some c -> 1328 - Input.is_whitespace c || 1329 - (t.flow_level > 0 && Input.is_flow_indicator c) || 1330 (* Allow adjacent values in flow context at designated positions *) 1331 - (t.flow_level > 0 && 1332 - match t.adjacent_value_allowed_at with 1333 - | Some pos -> pos.Position.line = (Input.position t.input).Position.line && 1334 - pos.Position.column = (Input.position t.input).Position.column 1335 - | None -> false) 1336 1337 and fetch_value t = 1338 let start = Input.mark t.input in ··· 1342 | Some sk :: _ when sk.sk_possible -> 1343 (* In implicit flow mapping (inside a flow sequence), key and : must be on the same line. 1344 In explicit flow mapping { }, key and : can span lines. *) 1345 - let is_implicit_flow_mapping = match t.flow_mapping_stack with 1346 - | false :: _ -> true (* false = we're in a sequence, so any mapping is implicit *) 1347 | _ -> false 1348 in 1349 - if is_implicit_flow_mapping && sk.sk_position.line < (Input.position t.input).line then 1350 - Error.raise_at start Illegal_flow_key_line; 1351 (* Insert KEY token before the simple key value *) 1352 let key_span = Span.point sk.sk_position in 1353 let key_token = { Token.token = Token.Key; span = key_span } in ··· 1355 let tokens = Queue.to_seq t.tokens |> Array.of_seq in 1356 Queue.clear t.tokens; 1357 let insert_pos = sk.sk_token_number - t.tokens_taken in 1358 - Array.iteri (fun i tok -> 1359 - if i = insert_pos then Queue.add key_token t.tokens; 1360 - Queue.add tok t.tokens 1361 - ) tokens; 1362 - if insert_pos >= Array.length tokens then 1363 - Queue.add key_token t.tokens; 1364 t.token_number <- t.token_number + 1; 1365 (* Roll indent for implicit block mapping *) 1366 if t.flow_level = 0 then begin ··· 1371 let bm_token = { Token.token = Token.Block_mapping_start; span } in 1372 let tokens = Queue.to_seq t.tokens |> Array.of_seq in 1373 Queue.clear t.tokens; 1374 - Array.iteri (fun i tok -> 1375 - if i = insert_pos then Queue.add bm_token t.tokens; 1376 - Queue.add tok t.tokens 1377 - ) tokens; 1378 if insert_pos >= Array.length tokens then 1379 Queue.add bm_token t.tokens; 1380 t.token_number <- t.token_number + 1 1381 end 1382 end; 1383 - t.simple_keys <- None :: (List.tl t.simple_keys); 1384 true 1385 | _ -> 1386 (* No simple key - this is a complex value (or empty key) *) ··· 1400 remove_simple_key t; 1401 (* In block context without simple key, allow simple keys for compact mappings like ": moon: white" 1402 In flow context or after using a simple key, disallow simple keys *) 1403 - t.allow_simple_key <- (not used_simple_key) && (t.flow_level = 0); 1404 t.document_has_content <- true; 1405 let start = Input.mark t.input in 1406 ignore (Input.next t.input); 1407 1408 (* Check for tabs after : : patterns like :\t- or :\tkey: are invalid in block context (Y79Y/09) 1409 However, :\t bar (tab followed by space then content) is valid (6BCT) *) 1410 - let (found_tabs, found_spaces) = skip_blanks_check_tabs t in 1411 - if found_tabs && not found_spaces && t.flow_level = 0 then begin 1412 (* In block context, tabs-only after : followed by indicator or alphanumeric are not allowed *) 1413 match Input.peek t.input with 1414 - | Some ('-' | '?') -> 1415 - Error.raise_at start Tab_in_indentation 1416 - | Some c when (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') -> 1417 (* Tab-only followed by alphanumeric - likely a key, which is invalid *) 1418 Error.raise_at start Tab_in_indentation 1419 | _ -> () ··· 1430 t.allow_simple_key <- false; 1431 t.document_has_content <- true; 1432 let start = Input.mark t.input in 1433 - ignore (Input.next t.input); (* consume * or & *) 1434 let name, span = scan_anchor_alias t in 1435 let span = Span.make ~start ~stop:span.stop in 1436 let token = if is_alias then Token.Alias name else Token.Anchor name in ··· 1475 match Input.peek_nth t.input 1 with 1476 | None -> false 1477 | Some c -> 1478 - not (Input.is_whitespace c) && 1479 - (t.flow_level = 0 || not (Input.is_flow_indicator c)) 1480 1481 and can_start_plain_char c _t = 1482 (* Characters that can start a plain scalar *) ··· 1492 (* If the plain scalar ended after crossing a line break (leading_blanks = true), 1493 allow simple keys. This is important because the scanner already consumed the 1494 line break and leading whitespace when checking for continuation. *) 1495 - if ended_with_linebreak then 1496 - t.allow_simple_key <- true; 1497 emit t span (Token.Scalar { style = `Plain; value }) 1498 1499 (** Check if we need more tokens to resolve simple keys *) ··· 1502 else if Queue.is_empty t.tokens then true 1503 else 1504 (* Check if any simple key could affect the first queued token *) 1505 - List.exists (function 1506 - | Some sk when sk.sk_possible -> 1507 - sk.sk_token_number >= t.tokens_taken 1508 - | _ -> false 1509 - ) t.simple_keys 1510 1511 (** Ensure we have enough tokens to return one safely *) 1512 let ensure_tokens t = ··· 1523 (** Get next token *) 1524 let next t = 1525 ensure_tokens t; 1526 - if Queue.is_empty t.tokens then 1527 - None 1528 else begin 1529 t.tokens_taken <- t.tokens_taken + 1; 1530 Some (Queue.pop t.tokens) ··· 1540 let rec loop () = 1541 match next t with 1542 | None -> () 1543 - | Some tok -> f tok; loop () 1544 in 1545 loop () 1546 1547 (** Fold over all tokens *) 1548 let fold f init t = 1549 let rec loop acc = 1550 - match next t with 1551 - | None -> acc 1552 - | Some tok -> loop (f acc tok) 1553 in 1554 loop init 1555 1556 (** Convert to list *) 1557 - let to_list t = 1558 - fold (fun acc tok -> tok :: acc) [] t |> List.rev
··· 5 6 (** YAML tokenizer/scanner with lookahead for ambiguity resolution *) 7 8 type simple_key = { 9 sk_possible : bool; 10 sk_required : bool; 11 sk_token_number : int; 12 sk_position : Position.t; 13 } 14 + (** Simple key tracking for mapping key disambiguation *) 15 16 type indent = { 17 indent : int; 18 needs_block_end : bool; 19 } 20 + (** Indent level tracking *) 21 22 type t = { 23 input : Input.t; ··· 27 mutable stream_started : bool; 28 mutable stream_ended : bool; 29 mutable indent_stack : indent list; (** Stack of indentation levels *) 30 + mutable flow_level : int; (** Nesting depth in \[\] or \{\} *) 31 + mutable flow_indent : int; 32 + (** Column where outermost flow collection started *) 33 + mutable simple_keys : simple_key option list; 34 + (** Per flow-level simple key tracking *) 35 mutable allow_simple_key : bool; 36 + mutable leading_whitespace : bool; 37 + (** True when at start of line (only whitespace seen) *) 38 + mutable document_has_content : bool; 39 + (** True if we've emitted content tokens in current document *) 40 + mutable adjacent_value_allowed_at : Position.t option; 41 + (** Position where adjacent : is allowed *) 42 + mutable flow_mapping_stack : bool list; 43 + (** Stack of whether each flow level is a mapping *) 44 } 45 46 let create input = ··· 54 indent_stack = []; 55 flow_level = 0; 56 flow_indent = 0; 57 + simple_keys = [ None ]; 58 + (* One entry for the base level *) 59 allow_simple_key = true; 60 + leading_whitespace = true; 61 + (* Start at beginning of stream *) 62 document_has_content = false; 63 adjacent_value_allowed_at = None; 64 flow_mapping_stack = []; ··· 67 let of_string s = create (Input.of_string s) 68 let of_input = create 69 let of_reader r = create (Input.of_reader r) 70 let position t = Input.position t.input 71 72 (** Add a token to the queue *) ··· 79 80 (** Get current indent level *) 81 let current_indent t = 82 + match t.indent_stack with [] -> -1 | { indent; _ } :: _ -> indent 83 84 + (** Skip whitespace to end of line, checking for valid comments. Returns true if 85 + any whitespace (including tabs) was found before a comment. *) 86 let skip_whitespace_and_comment t = 87 let has_whitespace = ref false in 88 (* Skip blanks (spaces and tabs) *) ··· 103 Error.raise_at (Input.mark t.input) Invalid_comment 104 end; 105 (* Skip to end of line *) 106 + while (not (Input.is_eof t.input)) && not (Input.next_is_break t.input) do 107 ignore (Input.next t.input) 108 done 109 end ··· 114 let found_space = ref false in 115 while Input.next_is_blank t.input do 116 (match Input.peek t.input with 117 + | Some '\t' -> found_tab := true 118 + | Some ' ' -> found_space := true 119 + | _ -> ()); 120 ignore (Input.next t.input) 121 done; 122 (!found_tab, !found_space) ··· 125 let rec skip_to_next_token t = 126 (* Check for tabs used as indentation in block context *) 127 (match Input.peek t.input with 128 + | Some '\t' 129 + when t.flow_level = 0 && t.leading_whitespace 130 + && column t - 1 < current_indent t -> 131 + (* Tab found in indentation zone - this is invalid *) 132 + (* Skip to end of line to check if line has content *) 133 + let start_pos = Input.mark t.input in 134 + while Input.next_is_blank t.input do 135 + ignore (Input.next t.input) 136 + done; 137 + (* If we have content on this line with a tab, raise error *) 138 + if (not (Input.next_is_break t.input)) && not (Input.is_eof t.input) then 139 + Error.raise_at start_pos Tab_in_indentation 140 + | _ -> ()); 141 142 (* Skip blanks and validate comments *) 143 skip_whitespace_and_comment t; ··· 164 ignore (Input.next t.input) 165 done; 166 (* If only tabs were used (no spaces) and column < flow_indent, error *) 167 + if 168 + (not (Input.next_is_break t.input)) 169 + && (not (Input.is_eof t.input)) 170 + && column t < t.flow_indent 171 + then Error.raise_at start_mark Invalid_flow_indentation 172 end; 173 skip_to_next_token t 174 + end 175 + else begin 176 ignore (Input.next t.input); 177 skip_to_next_token t 178 end ··· 183 if t.flow_level = 0 && col > current_indent t then begin 184 t.indent_stack <- { indent = col; needs_block_end = true } :: t.indent_stack; 185 true 186 + end 187 + else false 188 189 (** Unroll indentation to given column *) 190 let unroll_indent t col = 191 + while 192 + t.flow_level = 0 193 + && 194 + match t.indent_stack with 195 + | { indent; needs_block_end = true; _ } :: _ when indent > col -> true 196 + | _ -> false 197 do 198 match t.indent_stack with 199 | { indent = _; needs_block_end = true; _ } :: rest -> ··· 210 (* A simple key is required only if we're in a block context, 211 at the current indentation level, AND the current indent needs a block end. 212 This matches saphyr's logic and prevents false positives for values. *) 213 + let required = 214 + t.flow_level = 0 215 + && 216 + match t.indent_stack with 217 + | { indent; needs_block_end = true; _ } :: _ -> indent = column t 218 + | _ -> false 219 in 220 + let sk = 221 + { 222 + sk_possible = true; 223 + sk_required = required; 224 + sk_token_number = t.token_number; 225 + sk_position = Input.position t.input; 226 + } 227 + in 228 (* Remove any existing simple key at current level *) 229 + t.simple_keys <- 230 + (match t.simple_keys with 231 | _ :: rest -> Some sk :: rest 232 + | [] -> [ Some sk ]) 233 end 234 235 (** Remove simple key at current level *) ··· 242 243 (** Stale simple keys that span too many tokens *) 244 let stale_simple_keys t = 245 + t.simple_keys <- 246 + List.map 247 + (fun sk_opt -> 248 + match sk_opt with 249 + | Some sk 250 + when sk.sk_possible 251 + && (Input.position t.input).line > sk.sk_position.line 252 + && t.flow_level = 0 -> 253 + if sk.sk_required then Error.raise_at sk.sk_position Expected_key; 254 + None 255 + | _ -> sk_opt) 256 + t.simple_keys 257 258 (** Read anchor or alias name *) 259 let scan_anchor_alias t = ··· 266 This matches the saphyr implementation: is_yaml_non_space && !is_flow *) 267 while 268 match Input.peek t.input with 269 + | Some c 270 + when (not (Input.is_whitespace c)) 271 + && (not (Input.is_flow_indicator c)) 272 + && c <> '\x00' -> 273 Buffer.add_char buf c; 274 ignore (Input.next t.input); 275 true 276 | _ -> false 277 + do 278 + () 279 + done; 280 let name = Buffer.contents buf in 281 if String.length name = 0 then 282 Error.raise_at start (Invalid_anchor "empty anchor name"); ··· 288 let buf = Buffer.create 16 in 289 (* Expect ! *) 290 (match Input.peek t.input with 291 + | Some '!' -> 292 + Buffer.add_char buf '!'; 293 + ignore (Input.next t.input) 294 + | _ -> Error.raise_at start (Invalid_tag "expected '!'")); 295 (* Read word chars *) 296 while 297 match Input.peek t.input with ··· 300 ignore (Input.next t.input); 301 true 302 | _ -> false 303 + do 304 + () 305 + done; 306 (* Check for secondary ! *) 307 (match Input.peek t.input with 308 + | Some '!' -> 309 + Buffer.add_char buf '!'; 310 + ignore (Input.next t.input) 311 + | _ -> ()); 312 Buffer.contents buf 313 314 (** Scan tag suffix (after handle) *) ··· 318 in 319 let hex_val c = 320 match c with 321 + | '0' .. '9' -> Char.code c - Char.code '0' 322 + | 'A' .. 'F' -> Char.code c - Char.code 'A' + 10 323 + | 'a' .. 'f' -> Char.code c - Char.code 'a' + 10 324 | _ -> 0 325 in 326 let buf = Buffer.create 32 in 327 while 328 match Input.peek t.input with 329 + | Some '%' -> ( 330 (* Percent-encoded character *) 331 ignore (Input.next t.input); 332 + match (Input.peek t.input, Input.peek_nth t.input 1) with 333 + | Some c1, Some c2 when is_hex_digit c1 && is_hex_digit c2 -> 334 + ignore (Input.next t.input); 335 + ignore (Input.next t.input); 336 + let code = (hex_val c1 * 16) + hex_val c2 in 337 + Buffer.add_char buf (Char.chr code); 338 + true 339 + | _ -> 340 + (* Invalid percent encoding - keep the % *) 341 + Buffer.add_char buf '%'; 342 + true) 343 + | Some c 344 + when (not (Input.is_whitespace c)) && not (Input.is_flow_indicator c) -> 345 Buffer.add_char buf c; 346 ignore (Input.next t.input); 347 true 348 | _ -> false 349 + do 350 + () 351 + done; 352 Buffer.contents buf 353 354 (** Scan a tag *) 355 let scan_tag t = 356 let start = Input.mark t.input in 357 + ignore (Input.next t.input); 358 + (* consume ! *) 359 let handle, suffix = 360 match Input.peek t.input with 361 | Some '<' -> ··· 369 Buffer.add_char buf c; 370 ignore (Input.next t.input); 371 true 372 + | None -> 373 + Error.raise_at (Input.mark t.input) 374 + (Invalid_tag "unclosed verbatim tag") 375 + do 376 + () 377 + done; 378 + ignore (Input.next t.input); 379 + (* consume > *) 380 ("", Buffer.contents buf) 381 | Some c when Input.is_whitespace c || Input.is_flow_indicator c -> 382 (* Non-specific tag: ! *) 383 ("!", "") 384 | Some '!' -> 385 (* Secondary handle: !! *) 386 + ignore (Input.next t.input); 387 + (* consume second ! *) 388 let suffix = scan_tag_suffix t in 389 ("!!", suffix) 390 + | _ -> ( 391 (* Primary handle or just suffix: !foo or !e!foo *) 392 (* Read alphanumeric characters *) 393 let buf = Buffer.create 16 in ··· 398 ignore (Input.next t.input); 399 true 400 | _ -> false 401 + do 402 + () 403 + done; 404 (* Check if next character is ! - if so, this is a named handle *) 405 + match Input.peek t.input with 406 + | Some '!' -> 407 + (* Named handle like !e! *) 408 + ignore (Input.next t.input); 409 + let handle_name = Buffer.contents buf in 410 + let suffix = scan_tag_suffix t in 411 + ("!" ^ handle_name ^ "!", suffix) 412 + | _ -> 413 + (* Just ! followed by suffix *) 414 + ("!", Buffer.contents buf ^ scan_tag_suffix t)) 415 in 416 (* Validate that tag is followed by whitespace, break, or (in flow) flow indicator *) 417 (match Input.peek t.input with 418 + | None -> () (* EOF is ok *) 419 + | Some c when Input.is_whitespace c || Input.is_break c -> () 420 + | Some c when t.flow_level > 0 && Input.is_flow_indicator c -> () 421 + | _ -> 422 + Error.raise_at start 423 + (Invalid_tag "expected whitespace or line break after tag")); 424 let span = Span.make ~start ~stop:(Input.mark t.input) in 425 (handle, suffix, span) 426 427 (** Scan single-quoted scalar *) 428 let scan_single_quoted t = 429 let start = Input.mark t.input in 430 + ignore (Input.next t.input); 431 + (* consume opening single-quote *) 432 let buf = Buffer.create 64 in 433 + let whitespace = Buffer.create 16 in 434 + (* Track trailing whitespace *) 435 436 let flush_whitespace () = 437 if Buffer.length whitespace > 0 then begin ··· 443 let rec loop () = 444 match Input.peek t.input with 445 | None -> Error.raise_at start Unclosed_single_quote 446 + | Some '\'' -> ( 447 ignore (Input.next t.input); 448 (* Check for escaped quote ('') *) 449 + match Input.peek t.input with 450 + | Some '\'' -> 451 + flush_whitespace (); 452 + Buffer.add_char buf '\''; 453 + ignore (Input.next t.input); 454 + loop () 455 + | _ -> 456 + (* End of string - flush any trailing whitespace *) 457 + flush_whitespace ()) 458 | Some ' ' | Some '\t' -> 459 (* Track whitespace - don't add to buf yet *) 460 Buffer.add_char whitespace (Option.get (Input.peek t.input)); ··· 474 (* Check indentation: continuation must be > block indent (QB6E, DK95) *) 475 let col = column t in 476 let indent = current_indent t in 477 + if 478 + (not (Input.is_eof t.input)) 479 + && (not (Input.next_is_break t.input)) 480 + && col <= indent && indent >= 0 481 + then 482 + Error.raise_at (Input.mark t.input) 483 + (Invalid_quoted_scalar_indentation 484 + "invalid indentation in quoted scalar"); 485 (* Count empty lines (consecutive line breaks) *) 486 let empty_lines = ref 0 in 487 while Input.next_is_break t.input do ··· 495 (* Check indentation after each empty line too *) 496 let col = column t in 497 let indent = current_indent t in 498 + if 499 + (not (Input.is_eof t.input)) 500 + && (not (Input.next_is_break t.input)) 501 + && col <= indent && indent >= 0 502 + then 503 + Error.raise_at (Input.mark t.input) 504 + (Invalid_quoted_scalar_indentation 505 + "invalid indentation in quoted scalar") 506 done; 507 (* Apply folding rules *) 508 if !empty_lines > 0 then begin ··· 510 for _ = 1 to !empty_lines do 511 Buffer.add_char buf '\n' 512 done 513 + end 514 + else 515 (* Single break: fold to space (even at start of string) *) 516 Buffer.add_char buf ' '; 517 loop () ··· 534 | Some c when Input.is_hex c -> 535 Buffer.add_char buf c; 536 ignore (Input.next t.input) 537 + | _ -> Error.raise_at start (Invalid_hex_escape (Buffer.contents buf)) 538 done; 539 let code = int_of_string ("0x" ^ Buffer.contents buf) in 540 + if code <= 0x7F then String.make 1 (Char.chr code) 541 else if code <= 0x7FF then 542 let b1 = 0xC0 lor (code lsr 6) in 543 let b2 = 0x80 lor (code land 0x3F) in ··· 546 let b1 = 0xE0 lor (code lsr 12) in 547 let b2 = 0x80 lor ((code lsr 6) land 0x3F) in 548 let b3 = 0x80 lor (code land 0x3F) in 549 + String.init 3 (fun i -> 550 + Char.chr (match i with 0 -> b1 | 1 -> b2 | _ -> b3)) 551 else 552 let b1 = 0xF0 lor (code lsr 18) in 553 let b2 = 0x80 lor ((code lsr 12) land 0x3F) in 554 let b3 = 0x80 lor ((code lsr 6) land 0x3F) in 555 let b4 = 0x80 lor (code land 0x3F) in 556 + String.init 4 (fun i -> 557 + Char.chr (match i with 0 -> b1 | 1 -> b2 | 2 -> b3 | _ -> b4)) 558 559 (** Scan double-quoted scalar *) 560 let scan_double_quoted t = 561 let start = Input.mark t.input in 562 + ignore (Input.next t.input); 563 + (* consume opening double-quote *) 564 let buf = Buffer.create 64 in 565 + let whitespace = Buffer.create 16 in 566 + (* Track pending whitespace *) 567 568 let flush_whitespace () = 569 if Buffer.length whitespace > 0 then begin ··· 579 (* Flush trailing whitespace before closing quote to preserve it *) 580 flush_whitespace (); 581 ignore (Input.next t.input) 582 + | (Some ' ' | Some '\t') as c_opt -> 583 (* Track whitespace - don't add to buf yet *) 584 let c = match c_opt with Some c -> c | None -> assert false in 585 Buffer.add_char whitespace c; ··· 587 loop () 588 | Some '\\' -> 589 (* Escape sequence - this is non-whitespace content *) 590 + flush_whitespace (); 591 + (* Commit any pending whitespace *) 592 ignore (Input.next t.input); 593 (match Input.peek t.input with 594 + | None -> Error.raise_at start (Invalid_escape_sequence "\\<EOF>") 595 + | Some '0' -> 596 + Buffer.add_char buf '\x00'; 597 + ignore (Input.next t.input) 598 + | Some 'a' -> 599 + Buffer.add_char buf '\x07'; 600 + ignore (Input.next t.input) 601 + | Some 'b' -> 602 + Buffer.add_char buf '\x08'; 603 + ignore (Input.next t.input) 604 + | Some 't' | Some '\t' -> 605 + Buffer.add_char buf '\t'; 606 + ignore (Input.next t.input) 607 + | Some 'n' -> 608 + Buffer.add_char buf '\n'; 609 + ignore (Input.next t.input) 610 + | Some 'v' -> 611 + Buffer.add_char buf '\x0B'; 612 + ignore (Input.next t.input) 613 + | Some 'f' -> 614 + Buffer.add_char buf '\x0C'; 615 + ignore (Input.next t.input) 616 + | Some 'r' -> 617 + Buffer.add_char buf '\r'; 618 + ignore (Input.next t.input) 619 + | Some 'e' -> 620 + Buffer.add_char buf '\x1B'; 621 + ignore (Input.next t.input) 622 + | Some ' ' -> 623 + Buffer.add_char buf ' '; 624 + ignore (Input.next t.input) 625 + | Some '"' -> 626 + Buffer.add_char buf '"'; 627 + ignore (Input.next t.input) 628 + | Some '/' -> 629 + Buffer.add_char buf '/'; 630 + ignore (Input.next t.input) 631 + | Some '\\' -> 632 + Buffer.add_char buf '\\'; 633 + ignore (Input.next t.input) 634 + | Some 'N' -> 635 + Buffer.add_string buf "\xC2\x85"; 636 + ignore (Input.next t.input) (* NEL *) 637 + | Some '_' -> 638 + Buffer.add_string buf "\xC2\xA0"; 639 + ignore (Input.next t.input) (* NBSP *) 640 + | Some 'L' -> 641 + Buffer.add_string buf "\xE2\x80\xA8"; 642 + ignore (Input.next t.input) (* LS *) 643 + | Some 'P' -> 644 + Buffer.add_string buf "\xE2\x80\xA9"; 645 + ignore (Input.next t.input) (* PS *) 646 + | Some 'x' -> 647 + ignore (Input.next t.input); 648 + Buffer.add_string buf (decode_hex t 2) 649 + | Some 'u' -> 650 + ignore (Input.next t.input); 651 + Buffer.add_string buf (decode_hex t 4) 652 + | Some 'U' -> 653 + ignore (Input.next t.input); 654 + Buffer.add_string buf (decode_hex t 8) 655 + | Some '\n' | Some '\r' -> 656 + (* Line continuation escape *) 657 + Input.consume_break t.input; 658 + while Input.next_is_blank t.input do 659 + ignore (Input.next t.input) 660 + done 661 + | Some c -> 662 + Error.raise_at (Input.mark t.input) 663 + (Invalid_escape_sequence (Printf.sprintf "\\%c" c))); 664 loop () 665 | Some '\n' | Some '\r' -> 666 (* Line break: discard any pending trailing whitespace *) ··· 681 if Input.next_is_break t.input then begin 682 Input.consume_break t.input; 683 incr empty_lines; 684 + started_with_tab := false (* Reset for next line *) 685 + end 686 + else continue := false 687 done; 688 (* Check for document boundary - this terminates the quoted string *) 689 if Input.at_document_boundary t.input then ··· 694 let indent = current_indent t in 695 let start_col = start.column in 696 (* DK95/01: if continuation started with tabs and column < start column, error *) 697 + if (not (Input.is_eof t.input)) && !started_with_tab && col < start_col 698 + then 699 + Error.raise_at (Input.mark t.input) 700 + (Invalid_quoted_scalar_indentation 701 + "invalid indentation in quoted scalar"); 702 + if (not (Input.is_eof t.input)) && col <= indent && indent >= 0 then 703 + Error.raise_at (Input.mark t.input) 704 + (Invalid_quoted_scalar_indentation 705 + "invalid indentation in quoted scalar"); 706 (* Per YAML spec: single break = space, break + empty lines = newlines *) 707 if !empty_lines > 0 then begin 708 (* Empty lines: output N newlines where N = number of empty lines *) 709 for _ = 1 to !empty_lines do 710 Buffer.add_char buf '\n' 711 done 712 + end 713 + else 714 (* Single break folds to space *) 715 Buffer.add_char buf ' '; 716 loop () 717 | Some c -> 718 (* Non-whitespace character *) 719 + flush_whitespace (); 720 + (* Commit any pending whitespace *) 721 Buffer.add_char buf c; 722 ignore (Input.next t.input); 723 loop () ··· 729 (** Check if character can appear in plain scalar at this position *) 730 let can_continue_plain t c ~in_flow = 731 match c with 732 + | ':' -> ( 733 (* : is OK if not followed by whitespace or flow indicator *) 734 + match Input.peek_nth t.input 1 with 735 + | None -> true 736 + | Some c2 when Input.is_whitespace c2 -> false 737 + | Some c2 when in_flow && Input.is_flow_indicator c2 -> false 738 + | _ -> true) 739 + | '#' -> ( 740 (* # is a comment indicator only if preceded by whitespace *) 741 (* Check the previous character to determine if this is a comment *) 742 + match Input.peek_back t.input with 743 + | None -> true (* At start - can't be comment indicator, allow it *) 744 + | Some c when Input.is_whitespace c -> 745 + false (* Preceded by whitespace - comment *) 746 + | Some c when Input.is_break c -> false (* At start of line - comment *) 747 + | _ -> true (* Not preceded by whitespace - part of scalar *)) 748 | c when in_flow && Input.is_flow_indicator c -> false 749 | _ when Input.is_break c -> false 750 | _ -> true ··· 756 let indent = current_indent t in 757 (* In flow context, scalars must be indented more than the current block indent. 758 This ensures that content at block indent or less ends the flow context. *) 759 + if in_flow && column t - 1 < indent then 760 Error.raise_at start Invalid_flow_indentation; 761 let buf = Buffer.create 64 in 762 let spaces = Buffer.create 16 in 763 + let whitespace = Buffer.create 16 in 764 + (* Track whitespace within a line *) 765 let leading_blanks = ref false in 766 767 let rec scan_line () = ··· 778 if Buffer.length spaces > 0 then begin 779 if !leading_blanks then begin 780 (* Fold line break *) 781 + if Buffer.contents spaces = "\n" then Buffer.add_char buf ' ' 782 else begin 783 (* Multiple breaks - preserve all but first *) 784 let s = Buffer.contents spaces in 785 Buffer.add_substring buf s 1 (String.length s - 1) 786 end 787 + end 788 + else Buffer.add_buffer buf spaces; 789 Buffer.clear spaces 790 end; 791 (* Add any pending whitespace from within the line *) ··· 812 if !leading_blanks then begin 813 (* We already had a break - this is an additional break (empty line) *) 814 Buffer.add_char spaces '\n' 815 + end 816 + else begin 817 (* First line break *) 818 Buffer.clear spaces; 819 Buffer.add_char spaces '\n'; ··· 833 (* However, allow empty lines (line breaks) to continue even if dedented *) 834 if Input.next_is_break t.input then 835 scan_lines () (* Empty line - continue *) 836 + else if (not in_flow) && col <= indent then () 837 + (* Stop - dedented or at parent level in block context *) 838 + else if Input.at_document_boundary t.input then () 839 + (* Stop - document boundary *) 840 + else scan_lines () 841 end 842 in 843 ··· 848 let len = String.length value in 849 let rec find_end i = 850 if i < 0 then 0 851 + else match value.[i] with ' ' | '\t' -> find_end (i - 1) | _ -> i + 1 852 in 853 let end_pos = find_end (len - 1) in 854 String.sub value 0 end_pos ··· 860 (** Scan block scalar (literal | or folded >) *) 861 let scan_block_scalar t literal = 862 let start = Input.mark t.input in 863 + ignore (Input.next t.input); 864 + 865 + (* consume | or > *) 866 867 (* Parse header: optional indentation indicator and chomping *) 868 let explicit_indent = ref None in ··· 870 871 (* First character of header *) 872 (match Input.peek t.input with 873 + | Some c when Input.is_digit c && c <> '0' -> 874 + explicit_indent := Some (Char.code c - Char.code '0'); 875 + ignore (Input.next t.input) 876 + | Some '-' -> 877 + chomping := Chomping.Strip; 878 + ignore (Input.next t.input) 879 + | Some '+' -> 880 + chomping := Chomping.Keep; 881 + ignore (Input.next t.input) 882 + | _ -> ()); 883 884 (* Second character of header *) 885 (match Input.peek t.input with 886 + | Some c when Input.is_digit c && c <> '0' && !explicit_indent = None -> 887 + explicit_indent := Some (Char.code c - Char.code '0'); 888 + ignore (Input.next t.input) 889 + | Some '-' when !chomping = Chomping.Clip -> 890 + chomping := Chomping.Strip; 891 + ignore (Input.next t.input) 892 + | Some '+' when !chomping = Chomping.Clip -> 893 + chomping := Chomping.Keep; 894 + ignore (Input.next t.input) 895 + | _ -> ()); 896 897 (* Skip whitespace and optional comment *) 898 skip_whitespace_and_comment t; 899 900 (* Consume line break *) 901 + if Input.next_is_break t.input then Input.consume_break t.input 902 else if not (Input.is_eof t.input) then 903 Error.raise_at (Input.mark t.input) 904 (Invalid_block_scalar_header "expected newline after header"); ··· 906 let base_indent = current_indent t in 907 (* base_indent is the indent level from the stack, -1 if empty. 908 It's used directly for comparisons in implicit indent case. *) 909 + let content_indent = 910 + ref 911 + (match !explicit_indent with 912 + | Some n -> 913 + (* Explicit indent: base_indent is 1-indexed column, convert to 0-indexed. 914 content_indent = (base_indent - 1) + n, but at least n for document level. *) 915 + let base_level = max 0 (base_indent - 1) in 916 + base_level + n 917 + | None -> 0 (* Will be determined by first non-empty line *)) 918 + in 919 920 let buf = Buffer.create 256 in 921 let trailing_breaks = Buffer.create 16 in 922 + let leading_blank = ref false in 923 + (* Was the previous line "more indented"? *) 924 + let max_empty_line_indent = ref 0 in 925 + (* Track max indent of empty lines before first content *) 926 927 (* Skip to content indentation, skipping empty lines. 928 Returns the number of spaces actually skipped (important for detecting dedentation). *) ··· 930 if !content_indent > 0 then begin 931 (* Explicit indent - skip up to content_indent spaces *) 932 let spaces_skipped = ref 0 in 933 + while 934 + !spaces_skipped < !content_indent && Input.next_is (( = ) ' ') t.input 935 + do 936 incr spaces_skipped; 937 ignore (Input.next t.input) 938 done; ··· 943 Buffer.add_char trailing_breaks '\n'; 944 Input.consume_break t.input; 945 skip_to_content_indent () 946 + end 947 + else if !spaces_skipped < !content_indent then begin 948 (* Line starts with fewer spaces than content_indent - dedented *) 949 !spaces_skipped 950 + end 951 + else if Input.next_is_blank t.input then begin 952 (* Line has spaces/tabs beyond content_indent - could be whitespace content or empty line. 953 For literal scalars, whitespace-only lines ARE content (not empty). 954 For folded scalars, whitespace-only lines that are "more indented" are preserved. *) ··· 958 else begin 959 (* Folded: check if rest is only blanks *) 960 let idx = ref 0 in 961 + while 962 + match Input.peek_nth t.input !idx with 963 + | Some c when Input.is_blank c -> 964 + incr idx; 965 + true 966 + | _ -> false 967 + do 968 + () 969 + done; 970 + match Input.peek_nth t.input !idx with 971 | None | Some '\n' | Some '\r' -> 972 (* Empty/whitespace-only line in folded - skip spaces *) 973 while Input.next_is_blank t.input do ··· 980 (* Has non-whitespace content *) 981 !content_indent 982 end 983 + end 984 + else !content_indent 985 + end 986 + else begin 987 (* Implicit indent - skip empty lines without consuming spaces. 988 Note: Only SPACES count as indentation. Tabs are content, not indentation. 989 So we only check for spaces when determining if a line is "empty". *) ··· 991 Buffer.add_char trailing_breaks '\n'; 992 Input.consume_break t.input; 993 skip_to_content_indent () 994 + end 995 + else if Input.next_is (( = ) ' ') t.input then begin 996 (* Check if line is empty (only spaces before break) *) 997 let idx = ref 0 in 998 + while 999 + match Input.peek_nth t.input !idx with 1000 + | Some ' ' -> 1001 + incr idx; 1002 + true 1003 + | _ -> false 1004 + do 1005 + () 1006 + done; 1007 + match Input.peek_nth t.input !idx with 1008 | None | Some '\n' | Some '\r' -> 1009 (* Line has only spaces - empty line *) 1010 (* Track max indent of empty lines for later validation *) 1011 + if !idx > !max_empty_line_indent then max_empty_line_indent := !idx; 1012 while Input.next_is (( = ) ' ') t.input do 1013 ignore (Input.next t.input) 1014 done; ··· 1018 | _ -> 1019 (* Has content (including tabs which are content, not indentation) *) 1020 0 1021 + end 1022 + else if Input.next_is (( = ) '\t') t.input then begin 1023 (* Tab at start of line in implicit indent mode - this is an error (Y79Y) 1024 because tabs cannot be used as indentation in YAML *) 1025 Error.raise_at (Input.mark t.input) Tab_in_indentation 1026 + end 1027 + else 1028 (* Not at break or space - other content character *) 1029 0 1030 end ··· 1056 let should_process = 1057 if !content_indent = 0 then begin 1058 (* For implicit indent, content must be more indented than base_level. *) 1059 + if line_indent <= base_level then false 1060 + (* No content - first line not indented enough *) 1061 else begin 1062 (* Validate: first content line must be indented at least as much as 1063 the maximum indent seen on empty lines before it (5LLU, S98Z, W9L4) *) 1064 + if line_indent < !max_empty_line_indent && line_indent > base_level 1065 + then 1066 Error.raise_at (Input.mark t.input) 1067 + (Invalid_block_scalar_header 1068 + "wrongly indented line in block scalar"); 1069 content_indent := line_indent; 1070 true 1071 end 1072 + end 1073 + else if line_indent < !content_indent then false 1074 + (* Dedented - done with content *) 1075 + else true 1076 in 1077 1078 if should_process then begin ··· 1080 For folded scalars, lines that start with any whitespace (space or tab) after the 1081 content indentation are "more indented" and preserve breaks. 1082 Note: we check Input.next_is_blank BEFORE reading content to see if content starts with whitespace. *) 1083 + let trailing_blank = 1084 + line_indent > !content_indent || Input.next_is_blank t.input 1085 + in 1086 1087 (* Add trailing breaks to buffer *) 1088 if Buffer.length buf > 0 then begin 1089 if Buffer.length trailing_breaks > 0 then begin 1090 + if literal then Buffer.add_buffer buf trailing_breaks 1091 else begin 1092 (* Folded scalar: fold only if both previous and current lines are not more-indented *) 1093 + if (not !leading_blank) && not trailing_blank then begin 1094 let breaks = Buffer.contents trailing_breaks in 1095 + if String.length breaks = 1 then Buffer.add_char buf ' ' 1096 + else Buffer.add_substring buf breaks 1 (String.length breaks - 1) 1097 + end 1098 + else begin 1099 (* Preserve breaks for more-indented lines *) 1100 Buffer.add_buffer buf trailing_breaks 1101 end 1102 end 1103 + end 1104 + else if not literal then Buffer.add_char buf ' ' 1105 + end 1106 + else Buffer.add_buffer buf trailing_breaks; 1107 Buffer.clear trailing_breaks; 1108 1109 (* Add extra indentation for literal or more-indented folded lines *) 1110 (* On the first line (when determining content_indent), we've already consumed all spaces, 1111 so we should NOT add any back. On subsequent lines, we add only the spaces beyond content_indent. *) 1112 + if (not first_line) && (literal || (!extra_spaces > 0 && not literal)) 1113 + then begin 1114 for _ = 1 to !extra_spaces do 1115 Buffer.add_char buf ' ' 1116 done 1117 end; 1118 1119 (* Read line content *) 1120 + while 1121 + (not (Input.is_eof t.input)) && not (Input.next_is_break t.input) 1122 + do 1123 Buffer.add_char buf (Input.next_exn t.input) 1124 done; 1125 ··· 1146 | Chomping.Strip -> content 1147 | Chomping.Clip -> 1148 if String.length content > 0 then content ^ "\n" else content 1149 + | Chomping.Keep -> content ^ Buffer.contents trailing_breaks 1150 in 1151 1152 let span = Span.make ~start ~stop:(Input.mark t.input) in ··· 1156 (** Scan directive (after %) *) 1157 let scan_directive t = 1158 let start = Input.mark t.input in 1159 + ignore (Input.next t.input); 1160 + 1161 + (* consume % *) 1162 1163 (* Read directive name *) 1164 let name_buf = Buffer.create 16 in ··· 1169 ignore (Input.next t.input); 1170 true 1171 | _ -> false 1172 + do 1173 + () 1174 + done; 1175 let name = Buffer.contents name_buf in 1176 1177 (* Skip blanks *) ··· 1186 let minor = ref 0 in 1187 (* Read major version *) 1188 while Input.next_is_digit t.input do 1189 + major := 1190 + (!major * 10) + (Char.code (Input.next_exn t.input) - Char.code '0') 1191 done; 1192 (* Expect . *) 1193 (match Input.peek t.input with 1194 + | Some '.' -> ignore (Input.next t.input) 1195 + | _ -> 1196 + Error.raise_at (Input.mark t.input) 1197 + (Invalid_yaml_version "expected '.'")); 1198 (* Read minor version *) 1199 while Input.next_is_digit t.input do 1200 + minor := 1201 + (!minor * 10) + (Char.code (Input.next_exn t.input) - Char.code '0') 1202 done; 1203 (* Validate: only whitespace and comments allowed before line break (MUS6) *) 1204 skip_whitespace_and_comment t; 1205 + if (not (Input.next_is_break t.input)) && not (Input.is_eof t.input) then 1206 + Error.raise_at (Input.mark t.input) 1207 + (Invalid_directive "expected comment or line break after version"); 1208 let span = Span.make ~start ~stop:(Input.mark t.input) in 1209 + (Token.Version_directive { major = !major; minor = !minor }, span) 1210 | "TAG" -> 1211 (* Tag directive: %TAG !foo! tag:example.com,2000: *) 1212 let handle = scan_tag_handle t in ··· 1223 ignore (Input.next t.input); 1224 true 1225 | _ -> false 1226 + do 1227 + () 1228 + done; 1229 let prefix = Buffer.contents prefix_buf in 1230 let span = Span.make ~start ~stop:(Input.mark t.input) in 1231 + (Token.Tag_directive { handle; prefix }, span) 1232 | _ -> 1233 (* Reserved/Unknown directive - skip to end of line and ignore *) 1234 (* Per YAML spec, reserved directives should be ignored with a warning *) 1235 + while (not (Input.is_eof t.input)) && not (Input.next_is_break t.input) do 1236 ignore (Input.next t.input) 1237 done; 1238 let span = Span.make ~start ~stop:(Input.mark t.input) in 1239 (* Return an empty tag directive token to indicate directive was processed but ignored *) 1240 + (Token.Tag_directive { handle = ""; prefix = "" }, span) 1241 1242 (** Fetch the next token(s) into the queue *) 1243 let rec fetch_next_token t = ··· 1251 (* We're about to process actual content, not leading whitespace *) 1252 t.leading_whitespace <- false; 1253 1254 + if Input.is_eof t.input then fetch_stream_end t 1255 + else if Input.at_document_boundary t.input then fetch_document_indicator t 1256 else begin 1257 match Input.peek t.input with 1258 | None -> fetch_stream_end t 1259 + | Some '%' when (Input.position t.input).column = 1 -> fetch_directive t 1260 | Some '[' -> fetch_flow_collection_start t Token.Flow_sequence_start 1261 | Some '{' -> fetch_flow_collection_start t Token.Flow_mapping_start 1262 | Some ']' -> fetch_flow_collection_end t Token.Flow_sequence_end ··· 1264 | Some ',' -> fetch_flow_entry t 1265 | Some '-' when t.flow_level = 0 && check_block_entry t -> 1266 fetch_block_entry t 1267 + | Some '?' when check_key t -> fetch_key t 1268 + | Some ':' when check_value t -> fetch_value t 1269 | Some '*' -> fetch_alias t 1270 | Some '&' -> fetch_anchor t 1271 | Some '!' -> fetch_tag t ··· 1273 | Some '>' when t.flow_level = 0 -> fetch_block_scalar t false 1274 | Some '\'' -> fetch_single_quoted t 1275 | Some '"' -> fetch_double_quoted t 1276 + | Some '-' when can_start_plain t -> fetch_plain_scalar t 1277 + | Some '?' when can_start_plain t -> fetch_plain_scalar t 1278 + | Some ':' when can_start_plain t -> fetch_plain_scalar t 1279 + | Some c when can_start_plain_char c t -> fetch_plain_scalar t 1280 + | Some c -> Error.raise_at (Input.mark t.input) (Unexpected_character c) 1281 end 1282 1283 and fetch_stream_end t = ··· 1298 let indicator = Input.peek_string t.input 3 in 1299 Input.skip t.input 3; 1300 let span = Span.make ~start ~stop:(Input.mark t.input) in 1301 + let token = 1302 + if indicator = "---" then Token.Document_start else Token.Document_end 1303 + in 1304 (* Reset document content flag after document end marker *) 1305 if indicator = "..." then begin 1306 t.document_has_content <- false; 1307 (* After document end marker, skip whitespace and check for end of line or comment *) 1308 + while Input.next_is_blank t.input do 1309 + ignore (Input.next t.input) 1310 + done; 1311 + match Input.peek t.input with 1312 + | None -> () (* EOF is ok *) 1313 + | Some c when Input.is_break c -> () 1314 + | Some '#' -> () (* Comment is ok *) 1315 + | _ -> 1316 + Error.raise_at start 1317 + (Invalid_directive 1318 + "content not allowed after document end marker on same line") 1319 end; 1320 emit t span token 1321 ··· 1326 If we've emitted content in the current document, we need a document end marker first *) 1327 if t.document_has_content then 1328 Error.raise_at (Input.mark t.input) 1329 + (Unexpected_token 1330 + "directives must be separated from document content by document end \ 1331 + marker (...)"); 1332 unroll_indent t (-1); 1333 remove_simple_key t; 1334 t.allow_simple_key <- false; ··· 1338 and fetch_flow_collection_start t token_type = 1339 save_simple_key t; 1340 (* Record indent of outermost flow collection *) 1341 + if t.flow_level = 0 then t.flow_indent <- column t; 1342 t.flow_level <- t.flow_level + 1; 1343 (* Track whether this is a mapping or sequence *) 1344 + let is_mapping = token_type = Token.Flow_mapping_start in 1345 t.flow_mapping_stack <- is_mapping :: t.flow_mapping_stack; 1346 t.allow_simple_key <- true; 1347 t.simple_keys <- None :: t.simple_keys; ··· 1354 and fetch_flow_collection_end t token_type = 1355 remove_simple_key t; 1356 t.flow_level <- t.flow_level - 1; 1357 + t.flow_mapping_stack <- 1358 + (match t.flow_mapping_stack with _ :: rest -> rest | [] -> []); 1359 t.simple_keys <- (match t.simple_keys with _ :: rest -> rest | [] -> []); 1360 t.allow_simple_key <- false; 1361 let start = Input.mark t.input in ··· 1400 ignore (Input.next t.input); 1401 1402 (* Check for tabs after - : pattern like -\t- is invalid *) 1403 + let found_tabs, _found_spaces = skip_blanks_check_tabs t in 1404 if found_tabs then begin 1405 (* If we found tabs and next char is - followed by whitespace, error *) 1406 match Input.peek t.input with 1407 + | Some '-' -> ( 1408 + match Input.peek_nth t.input 1 with 1409 + | None -> Error.raise_at start Tab_in_indentation 1410 + | Some c when Input.is_whitespace c -> 1411 + Error.raise_at start Tab_in_indentation 1412 + | Some _ -> ()) 1413 | _ -> () 1414 end; 1415 ··· 1419 and check_key t = 1420 (* ? followed by whitespace or flow indicator in both block and flow *) 1421 match Input.peek_nth t.input 1 with 1422 + | None -> true 1423 + | Some c -> 1424 + Input.is_whitespace c || (t.flow_level > 0 && Input.is_flow_indicator c) 1425 1426 and fetch_key t = 1427 if t.flow_level = 0 then begin ··· 1440 ignore (Input.next t.input); 1441 1442 (* Check for tabs after ? : pattern like ?\t- or ?\tkey is invalid *) 1443 + let found_tabs, _found_spaces = skip_blanks_check_tabs t in 1444 if found_tabs && t.flow_level = 0 then begin 1445 (* In block context, tabs after ? are not allowed *) 1446 Error.raise_at start Tab_in_indentation ··· 1453 (* : followed by whitespace in block, or whitespace/flow indicator in flow, or adjacent value *) 1454 match Input.peek_nth t.input 1 with 1455 | None -> true 1456 + | Some c -> ( 1457 + Input.is_whitespace c 1458 + || (t.flow_level > 0 && Input.is_flow_indicator c) 1459 + || 1460 (* Allow adjacent values in flow context at designated positions *) 1461 + t.flow_level > 0 1462 + && 1463 + match t.adjacent_value_allowed_at with 1464 + | Some pos -> 1465 + pos.Position.line = (Input.position t.input).Position.line 1466 + && pos.Position.column = (Input.position t.input).Position.column 1467 + | None -> false) 1468 1469 and fetch_value t = 1470 let start = Input.mark t.input in ··· 1474 | Some sk :: _ when sk.sk_possible -> 1475 (* In implicit flow mapping (inside a flow sequence), key and : must be on the same line. 1476 In explicit flow mapping { }, key and : can span lines. *) 1477 + let is_implicit_flow_mapping = 1478 + match t.flow_mapping_stack with 1479 + | false :: _ -> 1480 + true (* false = we're in a sequence, so any mapping is implicit *) 1481 | _ -> false 1482 in 1483 + if 1484 + is_implicit_flow_mapping 1485 + && sk.sk_position.line < (Input.position t.input).line 1486 + then Error.raise_at start Illegal_flow_key_line; 1487 (* Insert KEY token before the simple key value *) 1488 let key_span = Span.point sk.sk_position in 1489 let key_token = { Token.token = Token.Key; span = key_span } in ··· 1491 let tokens = Queue.to_seq t.tokens |> Array.of_seq in 1492 Queue.clear t.tokens; 1493 let insert_pos = sk.sk_token_number - t.tokens_taken in 1494 + Array.iteri 1495 + (fun i tok -> 1496 + if i = insert_pos then Queue.add key_token t.tokens; 1497 + Queue.add tok t.tokens) 1498 + tokens; 1499 + if insert_pos >= Array.length tokens then Queue.add key_token t.tokens; 1500 t.token_number <- t.token_number + 1; 1501 (* Roll indent for implicit block mapping *) 1502 if t.flow_level = 0 then begin ··· 1507 let bm_token = { Token.token = Token.Block_mapping_start; span } in 1508 let tokens = Queue.to_seq t.tokens |> Array.of_seq in 1509 Queue.clear t.tokens; 1510 + Array.iteri 1511 + (fun i tok -> 1512 + if i = insert_pos then Queue.add bm_token t.tokens; 1513 + Queue.add tok t.tokens) 1514 + tokens; 1515 if insert_pos >= Array.length tokens then 1516 Queue.add bm_token t.tokens; 1517 t.token_number <- t.token_number + 1 1518 end 1519 end; 1520 + t.simple_keys <- None :: List.tl t.simple_keys; 1521 true 1522 | _ -> 1523 (* No simple key - this is a complex value (or empty key) *) ··· 1537 remove_simple_key t; 1538 (* In block context without simple key, allow simple keys for compact mappings like ": moon: white" 1539 In flow context or after using a simple key, disallow simple keys *) 1540 + t.allow_simple_key <- (not used_simple_key) && t.flow_level = 0; 1541 t.document_has_content <- true; 1542 let start = Input.mark t.input in 1543 ignore (Input.next t.input); 1544 1545 (* Check for tabs after : : patterns like :\t- or :\tkey: are invalid in block context (Y79Y/09) 1546 However, :\t bar (tab followed by space then content) is valid (6BCT) *) 1547 + let found_tabs, found_spaces = skip_blanks_check_tabs t in 1548 + if found_tabs && (not found_spaces) && t.flow_level = 0 then begin 1549 (* In block context, tabs-only after : followed by indicator or alphanumeric are not allowed *) 1550 match Input.peek t.input with 1551 + | Some ('-' | '?') -> Error.raise_at start Tab_in_indentation 1552 + | Some c 1553 + when (c >= 'a' && c <= 'z') 1554 + || (c >= 'A' && c <= 'Z') 1555 + || (c >= '0' && c <= '9') -> 1556 (* Tab-only followed by alphanumeric - likely a key, which is invalid *) 1557 Error.raise_at start Tab_in_indentation 1558 | _ -> () ··· 1569 t.allow_simple_key <- false; 1570 t.document_has_content <- true; 1571 let start = Input.mark t.input in 1572 + ignore (Input.next t.input); 1573 + (* consume * or & *) 1574 let name, span = scan_anchor_alias t in 1575 let span = Span.make ~start ~stop:span.stop in 1576 let token = if is_alias then Token.Alias name else Token.Anchor name in ··· 1615 match Input.peek_nth t.input 1 with 1616 | None -> false 1617 | Some c -> 1618 + (not (Input.is_whitespace c)) 1619 + && (t.flow_level = 0 || not (Input.is_flow_indicator c)) 1620 1621 and can_start_plain_char c _t = 1622 (* Characters that can start a plain scalar *) ··· 1632 (* If the plain scalar ended after crossing a line break (leading_blanks = true), 1633 allow simple keys. This is important because the scanner already consumed the 1634 line break and leading whitespace when checking for continuation. *) 1635 + if ended_with_linebreak then t.allow_simple_key <- true; 1636 emit t span (Token.Scalar { style = `Plain; value }) 1637 1638 (** Check if we need more tokens to resolve simple keys *) ··· 1641 else if Queue.is_empty t.tokens then true 1642 else 1643 (* Check if any simple key could affect the first queued token *) 1644 + List.exists 1645 + (function 1646 + | Some sk when sk.sk_possible -> sk.sk_token_number >= t.tokens_taken 1647 + | _ -> false) 1648 + t.simple_keys 1649 1650 (** Ensure we have enough tokens to return one safely *) 1651 let ensure_tokens t = ··· 1662 (** Get next token *) 1663 let next t = 1664 ensure_tokens t; 1665 + if Queue.is_empty t.tokens then None 1666 else begin 1667 t.tokens_taken <- t.tokens_taken + 1; 1668 Some (Queue.pop t.tokens) ··· 1678 let rec loop () = 1679 match next t with 1680 | None -> () 1681 + | Some tok -> 1682 + f tok; 1683 + loop () 1684 in 1685 loop () 1686 1687 (** Fold over all tokens *) 1688 let fold f init t = 1689 let rec loop acc = 1690 + match next t with None -> acc | Some tok -> loop (f acc tok) 1691 in 1692 loop init 1693 1694 (** Convert to list *) 1695 + let to_list t = fold (fun acc tok -> tok :: acc) [] t |> List.rev
+20 -28
lib/sequence.ml
··· 13 members : 'a list; 14 } 15 16 - let make 17 - ?(anchor : string option) 18 - ?(tag : string option) 19 - ?(implicit = true) 20 - ?(style = `Any) 21 - members = 22 { anchor; tag; implicit; style; members } 23 24 let members t = t.members ··· 26 let tag t = t.tag 27 let implicit t = t.implicit 28 let style t = t.style 29 - 30 let with_anchor anchor t = { t with anchor = Some anchor } 31 let with_tag tag t = { t with tag = Some tag } 32 let with_style style t = { t with style } 33 - 34 let map f t = { t with members = List.map f t.members } 35 - 36 let length t = List.length t.members 37 - 38 let is_empty t = t.members = [] 39 - 40 let nth t n = List.nth t.members n 41 - 42 let nth_opt t n = List.nth_opt t.members n 43 - 44 let iter f t = List.iter f t.members 45 - 46 let fold f init t = List.fold_left f init t.members 47 48 let pp pp_elem fmt t = ··· 51 Option.iter (Format.fprintf fmt "tag=%s,@ ") t.tag; 52 Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style; 53 Format.fprintf fmt "members=[@,%a@]@,)" 54 - (Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") pp_elem) 55 t.members 56 57 let equal eq a b = 58 - Option.equal String.equal a.anchor b.anchor && 59 - Option.equal String.equal a.tag b.tag && 60 - a.implicit = b.implicit && 61 - Layout_style.equal a.style b.style && 62 - List.equal eq a.members b.members 63 64 let compare cmp a b = 65 let c = Option.compare String.compare a.anchor b.anchor in 66 - if c <> 0 then c else 67 - let c = Option.compare String.compare a.tag b.tag in 68 - if c <> 0 then c else 69 - let c = Bool.compare a.implicit b.implicit in 70 - if c <> 0 then c else 71 - let c = Layout_style.compare a.style b.style in 72 - if c <> 0 then c else 73 - List.compare cmp a.members b.members
··· 13 members : 'a list; 14 } 15 16 + let make ?(anchor : string option) ?(tag : string option) ?(implicit = true) 17 + ?(style = `Any) members = 18 { anchor; tag; implicit; style; members } 19 20 let members t = t.members ··· 22 let tag t = t.tag 23 let implicit t = t.implicit 24 let style t = t.style 25 let with_anchor anchor t = { t with anchor = Some anchor } 26 let with_tag tag t = { t with tag = Some tag } 27 let with_style style t = { t with style } 28 let map f t = { t with members = List.map f t.members } 29 let length t = List.length t.members 30 let is_empty t = t.members = [] 31 let nth t n = List.nth t.members n 32 let nth_opt t n = List.nth_opt t.members n 33 let iter f t = List.iter f t.members 34 let fold f init t = List.fold_left f init t.members 35 36 let pp pp_elem fmt t = ··· 39 Option.iter (Format.fprintf fmt "tag=%s,@ ") t.tag; 40 Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style; 41 Format.fprintf fmt "members=[@,%a@]@,)" 42 + (Format.pp_print_list 43 + ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") 44 + pp_elem) 45 t.members 46 47 let equal eq a b = 48 + Option.equal String.equal a.anchor b.anchor 49 + && Option.equal String.equal a.tag b.tag 50 + && a.implicit = b.implicit 51 + && Layout_style.equal a.style b.style 52 + && List.equal eq a.members b.members 53 54 let compare cmp a b = 55 let c = Option.compare String.compare a.anchor b.anchor in 56 + if c <> 0 then c 57 + else 58 + let c = Option.compare String.compare a.tag b.tag in 59 + if c <> 0 then c 60 + else 61 + let c = Bool.compare a.implicit b.implicit in 62 + if c <> 0 then c 63 + else 64 + let c = Layout_style.compare a.style b.style in 65 + if c <> 0 then c else List.compare cmp a.members b.members
+166 -139
lib/serialize.ml
··· 10 11 (** {1 Internal Helpers} *) 12 13 - (** Emit a YAML node using an emit function. 14 - This is the core implementation used by both Emitter.t and function-based APIs. *) 15 let rec emit_yaml_node_impl ~emit (yaml : Yaml.t) = 16 match yaml with 17 | `Scalar s -> 18 - emit (Event.Scalar { 19 - anchor = Scalar.anchor s; 20 - tag = Scalar.tag s; 21 - value = Scalar.value s; 22 - plain_implicit = Scalar.plain_implicit s; 23 - quoted_implicit = Scalar.quoted_implicit s; 24 - style = Scalar.style s; 25 - }) 26 - 27 - | `Alias name -> 28 - emit (Event.Alias { anchor = name }) 29 - 30 | `A seq -> 31 let members = Sequence.members seq in 32 (* Force flow style for empty sequences *) 33 let style = if members = [] then `Flow else Sequence.style seq in 34 - emit (Event.Sequence_start { 35 - anchor = Sequence.anchor seq; 36 - tag = Sequence.tag seq; 37 - implicit = Sequence.implicit seq; 38 - style; 39 - }); 40 List.iter (emit_yaml_node_impl ~emit) members; 41 emit Event.Sequence_end 42 - 43 | `O map -> 44 let members = Mapping.members map in 45 (* Force flow style for empty mappings *) 46 let style = if members = [] then `Flow else Mapping.style map in 47 - emit (Event.Mapping_start { 48 - anchor = Mapping.anchor map; 49 - tag = Mapping.tag map; 50 - implicit = Mapping.implicit map; 51 - style; 52 - }); 53 - List.iter (fun (k, v) -> 54 - emit_yaml_node_impl ~emit k; 55 - emit_yaml_node_impl ~emit v 56 - ) members; 57 emit Event.Mapping_end 58 59 - (** Emit a Value node using an emit function. 60 - This is the core implementation used by both Emitter.t and function-based APIs. *) 61 let rec emit_value_node_impl ~emit ~config (value : Value.t) = 62 match value with 63 | `Null -> 64 - emit (Event.Scalar { 65 - anchor = None; tag = None; 66 - value = "null"; 67 - plain_implicit = true; quoted_implicit = false; 68 - style = `Plain; 69 - }) 70 - 71 | `Bool b -> 72 - emit (Event.Scalar { 73 - anchor = None; tag = None; 74 - value = if b then "true" else "false"; 75 - plain_implicit = true; quoted_implicit = false; 76 - style = `Plain; 77 - }) 78 - 79 | `Float f -> 80 let value = 81 match Float.classify_float f with ··· 84 | _ -> 85 if Float.is_integer f && Float.abs f < 1e15 then 86 Printf.sprintf "%.0f" f 87 - else 88 - Printf.sprintf "%g" f 89 in 90 - emit (Event.Scalar { 91 - anchor = None; tag = None; 92 - value; 93 - plain_implicit = true; quoted_implicit = false; 94 - style = `Plain; 95 - }) 96 - 97 | `String s -> 98 let style = Quoting.choose_style s in 99 - emit (Event.Scalar { 100 - anchor = None; tag = None; 101 - value = s; 102 - plain_implicit = style = `Plain; 103 - quoted_implicit = style <> `Plain; 104 - style; 105 - }) 106 - 107 | `A items -> 108 (* Force flow style for empty sequences, otherwise use config *) 109 let style = 110 if items = [] || config.Emitter.layout_style = `Flow then `Flow else `Block 111 in 112 - emit (Event.Sequence_start { 113 - anchor = None; tag = None; 114 - implicit = true; 115 - style; 116 - }); 117 List.iter (emit_value_node_impl ~emit ~config) items; 118 emit Event.Sequence_end 119 - 120 | `O pairs -> 121 (* Force flow style for empty mappings, otherwise use config *) 122 let style = 123 if pairs = [] || config.Emitter.layout_style = `Flow then `Flow else `Block 124 in 125 - emit (Event.Mapping_start { 126 - anchor = None; tag = None; 127 - implicit = true; 128 - style; 129 - }); 130 - List.iter (fun (k, v) -> 131 - let style = Quoting.choose_style k in 132 - emit (Event.Scalar { 133 - anchor = None; tag = None; 134 - value = k; 135 - plain_implicit = style = `Plain; 136 - quoted_implicit = style <> `Plain; 137 - style; 138 - }); 139 - emit_value_node_impl ~emit ~config v 140 - ) pairs; 141 emit Event.Mapping_end 142 143 (** Strip anchors from a YAML tree (used when resolving aliases for output) *) ··· 146 | `Scalar s -> 147 if Option.is_none (Scalar.anchor s) then yaml 148 else 149 - `Scalar (Scalar.make 150 - ?tag:(Scalar.tag s) 151 - ~plain_implicit:(Scalar.plain_implicit s) 152 - ~quoted_implicit:(Scalar.quoted_implicit s) 153 - ~style:(Scalar.style s) 154 - (Scalar.value s)) 155 | `Alias _ -> yaml 156 | `A seq -> 157 - `A (Sequence.make 158 - ?tag:(Sequence.tag seq) 159 - ~implicit:(Sequence.implicit seq) 160 - ~style:(Sequence.style seq) 161 - (List.map strip_anchors (Sequence.members seq))) 162 | `O map -> 163 - `O (Mapping.make 164 - ?tag:(Mapping.tag map) 165 - ~implicit:(Mapping.implicit map) 166 - ~style:(Mapping.style map) 167 - (List.map (fun (k, v) -> (strip_anchors k, strip_anchors v)) (Mapping.members map))) 168 169 (** Emit a document using an emit function *) 170 let emit_document_impl ?(resolve_aliases = true) ~emit doc = 171 - emit (Event.Document_start { 172 - version = Document.version doc; 173 - implicit = Document.implicit_start doc; 174 - }); 175 (match Document.root doc with 176 - | Some yaml -> 177 - let yaml = if resolve_aliases then 178 - yaml |> Yaml.resolve_aliases |> strip_anchors 179 - else yaml in 180 - emit_yaml_node_impl ~emit yaml 181 - | None -> 182 - emit (Event.Scalar { 183 - anchor = None; tag = None; 184 - value = ""; 185 - plain_implicit = true; quoted_implicit = false; 186 - style = `Plain; 187 - })); 188 emit (Event.Document_end { implicit = Document.implicit_end doc }) 189 190 (** {1 Emitter.t-based API} *) 191 192 (** Emit a YAML node to an emitter *) 193 - let emit_yaml_node t yaml = 194 - emit_yaml_node_impl ~emit:(Emitter.emit t) yaml 195 196 (** Emit a complete YAML document to an emitter *) 197 let emit_yaml t yaml = ··· 249 (** Serialize documents to a buffer. 250 251 @param config Emitter configuration (default: {!Emitter.default_config}) 252 - @param resolve_aliases Whether to resolve aliases before emission (default: true) 253 @param buffer Optional buffer to append to; creates new one if not provided 254 @return The buffer containing serialized YAML *) 255 - let documents_to_buffer ?(config = Emitter.default_config) ?(resolve_aliases = true) ?buffer documents = 256 let buf = Option.value buffer ~default:(Buffer.create 1024) in 257 let t = Emitter.create ~config () in 258 Emitter.emit t (Event.Stream_start { encoding = config.encoding }); ··· 278 (** Serialize documents to a string. 279 280 @param config Emitter configuration (default: {!Emitter.default_config}) 281 - @param resolve_aliases Whether to resolve aliases before emission (default: true) *) 282 - let documents_to_string ?(config = Emitter.default_config) ?(resolve_aliases = true) documents = 283 Buffer.contents (documents_to_buffer ~config ~resolve_aliases documents) 284 285 (** {1 Writer-based API} 286 287 - These functions write directly to a bytesrw [Bytes.Writer.t], 288 - enabling true streaming output without intermediate string allocation. 289 - Uses the emitter's native Writer support for efficiency. *) 290 291 (** Serialize a Value directly to a Bytes.Writer. 292 293 @param config Emitter configuration (default: {!Emitter.default_config}) 294 - @param eod Whether to write end-of-data after serialization (default: true) *) 295 - let value_to_writer ?(config = Emitter.default_config) ?(eod = true) writer value = 296 let t = Emitter.of_writer ~config writer in 297 emit_value t value; 298 if eod then Emitter.flush t ··· 300 (** Serialize a Yaml.t directly to a Bytes.Writer. 301 302 @param config Emitter configuration (default: {!Emitter.default_config}) 303 - @param eod Whether to write end-of-data after serialization (default: true) *) 304 - let yaml_to_writer ?(config = Emitter.default_config) ?(eod = true) writer yaml = 305 let t = Emitter.of_writer ~config writer in 306 emit_yaml t yaml; 307 if eod then Emitter.flush t ··· 309 (** Serialize documents directly to a Bytes.Writer. 310 311 @param config Emitter configuration (default: {!Emitter.default_config}) 312 - @param resolve_aliases Whether to resolve aliases before emission (default: true) 313 - @param eod Whether to write end-of-data after serialization (default: true) *) 314 - let documents_to_writer ?(config = Emitter.default_config) ?(resolve_aliases = true) ?(eod = true) writer documents = 315 let t = Emitter.of_writer ~config writer in 316 Emitter.emit t (Event.Stream_start { encoding = config.encoding }); 317 List.iter (emit_document ~resolve_aliases t) documents; ··· 320 321 (** {1 Function-based API} 322 323 - These functions accept an emit function [Event.t -> unit] instead of 324 - an {!Emitter.t}, allowing them to work with any event sink 325 - (e.g., streaming writers, custom processors). *) 326 327 (** Emit a YAML node using an emitter function *) 328 - let emit_yaml_node_fn ~emitter yaml = 329 - emit_yaml_node_impl ~emit:emitter yaml 330 331 (** Emit a complete YAML stream using an emitter function *) 332 let emit_yaml_fn ~emitter ~config yaml =
··· 10 11 (** {1 Internal Helpers} *) 12 13 + (** Emit a YAML node using an emit function. This is the core implementation 14 + used by both Emitter.t and function-based APIs. *) 15 let rec emit_yaml_node_impl ~emit (yaml : Yaml.t) = 16 match yaml with 17 | `Scalar s -> 18 + emit 19 + (Event.Scalar 20 + { 21 + anchor = Scalar.anchor s; 22 + tag = Scalar.tag s; 23 + value = Scalar.value s; 24 + plain_implicit = Scalar.plain_implicit s; 25 + quoted_implicit = Scalar.quoted_implicit s; 26 + style = Scalar.style s; 27 + }) 28 + | `Alias name -> emit (Event.Alias { anchor = name }) 29 | `A seq -> 30 let members = Sequence.members seq in 31 (* Force flow style for empty sequences *) 32 let style = if members = [] then `Flow else Sequence.style seq in 33 + emit 34 + (Event.Sequence_start 35 + { 36 + anchor = Sequence.anchor seq; 37 + tag = Sequence.tag seq; 38 + implicit = Sequence.implicit seq; 39 + style; 40 + }); 41 List.iter (emit_yaml_node_impl ~emit) members; 42 emit Event.Sequence_end 43 | `O map -> 44 let members = Mapping.members map in 45 (* Force flow style for empty mappings *) 46 let style = if members = [] then `Flow else Mapping.style map in 47 + emit 48 + (Event.Mapping_start 49 + { 50 + anchor = Mapping.anchor map; 51 + tag = Mapping.tag map; 52 + implicit = Mapping.implicit map; 53 + style; 54 + }); 55 + List.iter 56 + (fun (k, v) -> 57 + emit_yaml_node_impl ~emit k; 58 + emit_yaml_node_impl ~emit v) 59 + members; 60 emit Event.Mapping_end 61 62 + (** Emit a Value node using an emit function. This is the core implementation 63 + used by both Emitter.t and function-based APIs. *) 64 let rec emit_value_node_impl ~emit ~config (value : Value.t) = 65 match value with 66 | `Null -> 67 + emit 68 + (Event.Scalar 69 + { 70 + anchor = None; 71 + tag = None; 72 + value = "null"; 73 + plain_implicit = true; 74 + quoted_implicit = false; 75 + style = `Plain; 76 + }) 77 | `Bool b -> 78 + emit 79 + (Event.Scalar 80 + { 81 + anchor = None; 82 + tag = None; 83 + value = (if b then "true" else "false"); 84 + plain_implicit = true; 85 + quoted_implicit = false; 86 + style = `Plain; 87 + }) 88 | `Float f -> 89 let value = 90 match Float.classify_float f with ··· 93 | _ -> 94 if Float.is_integer f && Float.abs f < 1e15 then 95 Printf.sprintf "%.0f" f 96 + else Printf.sprintf "%g" f 97 in 98 + emit 99 + (Event.Scalar 100 + { 101 + anchor = None; 102 + tag = None; 103 + value; 104 + plain_implicit = true; 105 + quoted_implicit = false; 106 + style = `Plain; 107 + }) 108 | `String s -> 109 let style = Quoting.choose_style s in 110 + emit 111 + (Event.Scalar 112 + { 113 + anchor = None; 114 + tag = None; 115 + value = s; 116 + plain_implicit = style = `Plain; 117 + quoted_implicit = style <> `Plain; 118 + style; 119 + }) 120 | `A items -> 121 (* Force flow style for empty sequences, otherwise use config *) 122 let style = 123 if items = [] || config.Emitter.layout_style = `Flow then `Flow else `Block 124 in 125 + emit 126 + (Event.Sequence_start 127 + { anchor = None; tag = None; implicit = true; style }); 128 List.iter (emit_value_node_impl ~emit ~config) items; 129 emit Event.Sequence_end 130 | `O pairs -> 131 (* Force flow style for empty mappings, otherwise use config *) 132 let style = 133 if pairs = [] || config.Emitter.layout_style = `Flow then `Flow else `Block 134 in 135 + emit 136 + (Event.Mapping_start 137 + { anchor = None; tag = None; implicit = true; style }); 138 + List.iter 139 + (fun (k, v) -> 140 + let style = Quoting.choose_style k in 141 + emit 142 + (Event.Scalar 143 + { 144 + anchor = None; 145 + tag = None; 146 + value = k; 147 + plain_implicit = style = `Plain; 148 + quoted_implicit = style <> `Plain; 149 + style; 150 + }); 151 + emit_value_node_impl ~emit ~config v) 152 + pairs; 153 emit Event.Mapping_end 154 155 (** Strip anchors from a YAML tree (used when resolving aliases for output) *) ··· 158 | `Scalar s -> 159 if Option.is_none (Scalar.anchor s) then yaml 160 else 161 + `Scalar 162 + (Scalar.make ?tag:(Scalar.tag s) 163 + ~plain_implicit:(Scalar.plain_implicit s) 164 + ~quoted_implicit:(Scalar.quoted_implicit s) ~style:(Scalar.style s) 165 + (Scalar.value s)) 166 | `Alias _ -> yaml 167 | `A seq -> 168 + `A 169 + (Sequence.make ?tag:(Sequence.tag seq) ~implicit:(Sequence.implicit seq) 170 + ~style:(Sequence.style seq) 171 + (List.map strip_anchors (Sequence.members seq))) 172 | `O map -> 173 + `O 174 + (Mapping.make ?tag:(Mapping.tag map) ~implicit:(Mapping.implicit map) 175 + ~style:(Mapping.style map) 176 + (List.map 177 + (fun (k, v) -> (strip_anchors k, strip_anchors v)) 178 + (Mapping.members map))) 179 180 (** Emit a document using an emit function *) 181 let emit_document_impl ?(resolve_aliases = true) ~emit doc = 182 + emit 183 + (Event.Document_start 184 + { 185 + version = Document.version doc; 186 + implicit = Document.implicit_start doc; 187 + }); 188 (match Document.root doc with 189 + | Some yaml -> 190 + let yaml = 191 + if resolve_aliases then yaml |> Yaml.resolve_aliases |> strip_anchors 192 + else yaml 193 + in 194 + emit_yaml_node_impl ~emit yaml 195 + | None -> 196 + emit 197 + (Event.Scalar 198 + { 199 + anchor = None; 200 + tag = None; 201 + value = ""; 202 + plain_implicit = true; 203 + quoted_implicit = false; 204 + style = `Plain; 205 + })); 206 emit (Event.Document_end { implicit = Document.implicit_end doc }) 207 208 (** {1 Emitter.t-based API} *) 209 210 (** Emit a YAML node to an emitter *) 211 + let emit_yaml_node t yaml = emit_yaml_node_impl ~emit:(Emitter.emit t) yaml 212 213 (** Emit a complete YAML document to an emitter *) 214 let emit_yaml t yaml = ··· 266 (** Serialize documents to a buffer. 267 268 @param config Emitter configuration (default: {!Emitter.default_config}) 269 + @param resolve_aliases 270 + Whether to resolve aliases before emission (default: true) 271 @param buffer Optional buffer to append to; creates new one if not provided 272 @return The buffer containing serialized YAML *) 273 + let documents_to_buffer ?(config = Emitter.default_config) 274 + ?(resolve_aliases = true) ?buffer documents = 275 let buf = Option.value buffer ~default:(Buffer.create 1024) in 276 let t = Emitter.create ~config () in 277 Emitter.emit t (Event.Stream_start { encoding = config.encoding }); ··· 297 (** Serialize documents to a string. 298 299 @param config Emitter configuration (default: {!Emitter.default_config}) 300 + @param resolve_aliases 301 + Whether to resolve aliases before emission (default: true) *) 302 + let documents_to_string ?(config = Emitter.default_config) 303 + ?(resolve_aliases = true) documents = 304 Buffer.contents (documents_to_buffer ~config ~resolve_aliases documents) 305 306 (** {1 Writer-based API} 307 308 + These functions write directly to a bytesrw [Bytes.Writer.t], enabling true 309 + streaming output without intermediate string allocation. Uses the emitter's 310 + native Writer support for efficiency. *) 311 312 (** Serialize a Value directly to a Bytes.Writer. 313 314 @param config Emitter configuration (default: {!Emitter.default_config}) 315 + @param eod Whether to write end-of-data after serialization (default: true) 316 + *) 317 + let value_to_writer ?(config = Emitter.default_config) ?(eod = true) writer 318 + value = 319 let t = Emitter.of_writer ~config writer in 320 emit_value t value; 321 if eod then Emitter.flush t ··· 323 (** Serialize a Yaml.t directly to a Bytes.Writer. 324 325 @param config Emitter configuration (default: {!Emitter.default_config}) 326 + @param eod Whether to write end-of-data after serialization (default: true) 327 + *) 328 + let yaml_to_writer ?(config = Emitter.default_config) ?(eod = true) writer yaml 329 + = 330 let t = Emitter.of_writer ~config writer in 331 emit_yaml t yaml; 332 if eod then Emitter.flush t ··· 334 (** Serialize documents directly to a Bytes.Writer. 335 336 @param config Emitter configuration (default: {!Emitter.default_config}) 337 + @param resolve_aliases 338 + Whether to resolve aliases before emission (default: true) 339 + @param eod Whether to write end-of-data after serialization (default: true) 340 + *) 341 + let documents_to_writer ?(config = Emitter.default_config) 342 + ?(resolve_aliases = true) ?(eod = true) writer documents = 343 let t = Emitter.of_writer ~config writer in 344 Emitter.emit t (Event.Stream_start { encoding = config.encoding }); 345 List.iter (emit_document ~resolve_aliases t) documents; ··· 348 349 (** {1 Function-based API} 350 351 + These functions accept an emit function [Event.t -> unit] instead of an 352 + {!Emitter.t}, allowing them to work with any event sink (e.g., streaming 353 + writers, custom processors). *) 354 355 (** Emit a YAML node using an emitter function *) 356 + let emit_yaml_node_fn ~emitter yaml = emit_yaml_node_impl ~emit:emitter yaml 357 358 (** Emit a complete YAML stream using an emitter function *) 359 let emit_yaml_fn ~emitter ~config yaml =
+10 -16
lib/span.ml
··· 5 6 (** Source spans representing ranges in input *) 7 8 - type t = { 9 - start : Position.t; 10 - stop : Position.t; 11 - } 12 13 let make ~start ~stop = { start; stop } 14 - 15 let point pos = { start = pos; stop = pos } 16 17 let merge a b = 18 - let start = if Position.compare a.start b.start <= 0 then a.start else b.start in 19 let stop = if Position.compare a.stop b.stop >= 0 then a.stop else b.stop in 20 { start; stop } 21 22 - let extend span pos = 23 - { span with stop = pos } 24 25 let pp fmt t = 26 if t.start.line = t.stop.line then 27 - Format.fprintf fmt "line %d, columns %d-%d" 28 - t.start.line t.start.column t.stop.column 29 - else 30 - Format.fprintf fmt "lines %d-%d" t.start.line t.stop.line 31 32 - let to_string t = 33 - Format.asprintf "%a" pp t 34 35 let compare a b = 36 let c = Position.compare a.start b.start in 37 if c <> 0 then c else Position.compare a.stop b.stop 38 39 - let equal a b = 40 - Position.equal a.start b.start && Position.equal a.stop b.stop
··· 5 6 (** Source spans representing ranges in input *) 7 8 + type t = { start : Position.t; stop : Position.t } 9 10 let make ~start ~stop = { start; stop } 11 let point pos = { start = pos; stop = pos } 12 13 let merge a b = 14 + let start = 15 + if Position.compare a.start b.start <= 0 then a.start else b.start 16 + in 17 let stop = if Position.compare a.stop b.stop >= 0 then a.stop else b.stop in 18 { start; stop } 19 20 + let extend span pos = { span with stop = pos } 21 22 let pp fmt t = 23 if t.start.line = t.stop.line then 24 + Format.fprintf fmt "line %d, columns %d-%d" t.start.line t.start.column 25 + t.stop.column 26 + else Format.fprintf fmt "lines %d-%d" t.start.line t.stop.line 27 28 + let to_string t = Format.asprintf "%a" pp t 29 30 let compare a b = 31 let c = Position.compare a.start b.start in 32 if c <> 0 then c else Position.compare a.stop b.stop 33 34 + let equal a b = Position.equal a.start b.start && Position.equal a.stop b.stop
+12 -14
lib/tag.ml
··· 18 | 0 -> None 19 | _ when s.[0] <> '!' -> None 20 | 1 -> Some { handle = "!"; suffix = "" } 21 - | _ -> 22 match s.[1] with 23 - | '!' -> (* !! handle *) 24 Some { handle = "!!"; suffix = String.sub s 2 (len - 2) } 25 - | '<' -> (* Verbatim tag !<...> *) 26 if len > 2 && s.[len - 1] = '>' then 27 Some { handle = "!"; suffix = String.sub s 2 (len - 3) } 28 - else 29 - None 30 - | _ -> (* Primary handle or local tag *) 31 - Some { handle = "!"; suffix = String.sub s 1 (len - 1) } 32 33 let to_string t = 34 - if t.handle = "!" && t.suffix = "" then "!" 35 - else t.handle ^ t.suffix 36 37 let to_uri t = 38 match t.handle with ··· 40 | "!" -> "!" ^ t.suffix 41 | h -> h ^ t.suffix 42 43 - let pp fmt t = 44 - Format.pp_print_string fmt (to_string t) 45 - 46 - let equal a b = 47 - String.equal a.handle b.handle && String.equal a.suffix b.suffix 48 49 let compare a b = 50 let c = String.compare a.handle b.handle in
··· 18 | 0 -> None 19 | _ when s.[0] <> '!' -> None 20 | 1 -> Some { handle = "!"; suffix = "" } 21 + | _ -> ( 22 match s.[1] with 23 + | '!' -> 24 + (* !! handle *) 25 Some { handle = "!!"; suffix = String.sub s 2 (len - 2) } 26 + | '<' -> 27 + (* Verbatim tag !<...> *) 28 if len > 2 && s.[len - 1] = '>' then 29 Some { handle = "!"; suffix = String.sub s 2 (len - 3) } 30 + else None 31 + | _ -> 32 + (* Primary handle or local tag *) 33 + Some { handle = "!"; suffix = String.sub s 1 (len - 1) }) 34 35 let to_string t = 36 + if t.handle = "!" && t.suffix = "" then "!" else t.handle ^ t.suffix 37 38 let to_uri t = 39 match t.handle with ··· 41 | "!" -> "!" ^ t.suffix 42 | h -> h ^ t.suffix 43 44 + let pp fmt t = Format.pp_print_string fmt (to_string t) 45 + let equal a b = String.equal a.handle b.handle && String.equal a.suffix b.suffix 46 47 let compare a b = 48 let c = String.compare a.handle b.handle in
+30 -51
lib/token.ml
··· 10 | Stream_end 11 | Version_directive of { major : int; minor : int } 12 | Tag_directive of { handle : string; prefix : string } 13 - | Document_start (** --- *) 14 - | Document_end (** ... *) 15 | Block_sequence_start 16 | Block_mapping_start 17 - | Block_entry (** [-] *) 18 - | Block_end (** implicit, from dedent *) 19 | Flow_sequence_start (** \[ *) 20 - | Flow_sequence_end (** \] *) 21 - | Flow_mapping_start (** \{ *) 22 - | Flow_mapping_end (** \} *) 23 - | Flow_entry (** [,] *) 24 - | Key (** ? or implicit key *) 25 - | Value (** : *) 26 | Anchor of string (** &name *) 27 - | Alias of string (** *name *) 28 | Tag of { handle : string; suffix : string } 29 | Scalar of { style : Scalar_style.t; value : string } 30 31 - type spanned = { 32 - token : t; 33 - span : Span.t; 34 - } 35 36 let pp_token fmt = function 37 - | Stream_start enc -> 38 - Format.fprintf fmt "STREAM-START(%a)" Encoding.pp enc 39 - | Stream_end -> 40 - Format.fprintf fmt "STREAM-END" 41 | Version_directive { major; minor } -> 42 Format.fprintf fmt "VERSION-DIRECTIVE(%d.%d)" major minor 43 | Tag_directive { handle; prefix } -> 44 Format.fprintf fmt "TAG-DIRECTIVE(%s, %s)" handle prefix 45 - | Document_start -> 46 - Format.fprintf fmt "DOCUMENT-START" 47 - | Document_end -> 48 - Format.fprintf fmt "DOCUMENT-END" 49 - | Block_sequence_start -> 50 - Format.fprintf fmt "BLOCK-SEQUENCE-START" 51 - | Block_mapping_start -> 52 - Format.fprintf fmt "BLOCK-MAPPING-START" 53 - | Block_entry -> 54 - Format.fprintf fmt "BLOCK-ENTRY" 55 - | Block_end -> 56 - Format.fprintf fmt "BLOCK-END" 57 - | Flow_sequence_start -> 58 - Format.fprintf fmt "FLOW-SEQUENCE-START" 59 - | Flow_sequence_end -> 60 - Format.fprintf fmt "FLOW-SEQUENCE-END" 61 - | Flow_mapping_start -> 62 - Format.fprintf fmt "FLOW-MAPPING-START" 63 - | Flow_mapping_end -> 64 - Format.fprintf fmt "FLOW-MAPPING-END" 65 - | Flow_entry -> 66 - Format.fprintf fmt "FLOW-ENTRY" 67 - | Key -> 68 - Format.fprintf fmt "KEY" 69 - | Value -> 70 - Format.fprintf fmt "VALUE" 71 - | Anchor name -> 72 - Format.fprintf fmt "ANCHOR(%s)" name 73 - | Alias name -> 74 - Format.fprintf fmt "ALIAS(%s)" name 75 - | Tag { handle; suffix } -> 76 - Format.fprintf fmt "TAG(%s, %s)" handle suffix 77 | Scalar { style; value } -> 78 Format.fprintf fmt "SCALAR(%a, %S)" Scalar_style.pp style value 79
··· 10 | Stream_end 11 | Version_directive of { major : int; minor : int } 12 | Tag_directive of { handle : string; prefix : string } 13 + | Document_start (** --- *) 14 + | Document_end (** ... *) 15 | Block_sequence_start 16 | Block_mapping_start 17 + | Block_entry (** [-] *) 18 + | Block_end (** implicit, from dedent *) 19 | Flow_sequence_start (** \[ *) 20 + | Flow_sequence_end (** \] *) 21 + | Flow_mapping_start (** \{ *) 22 + | Flow_mapping_end (** \} *) 23 + | Flow_entry (** [,] *) 24 + | Key (** ? or implicit key *) 25 + | Value (** : *) 26 | Anchor of string (** &name *) 27 + | Alias of string (** *name *) 28 | Tag of { handle : string; suffix : string } 29 | Scalar of { style : Scalar_style.t; value : string } 30 31 + type spanned = { token : t; span : Span.t } 32 33 let pp_token fmt = function 34 + | Stream_start enc -> Format.fprintf fmt "STREAM-START(%a)" Encoding.pp enc 35 + | Stream_end -> Format.fprintf fmt "STREAM-END" 36 | Version_directive { major; minor } -> 37 Format.fprintf fmt "VERSION-DIRECTIVE(%d.%d)" major minor 38 | Tag_directive { handle; prefix } -> 39 Format.fprintf fmt "TAG-DIRECTIVE(%s, %s)" handle prefix 40 + | Document_start -> Format.fprintf fmt "DOCUMENT-START" 41 + | Document_end -> Format.fprintf fmt "DOCUMENT-END" 42 + | Block_sequence_start -> Format.fprintf fmt "BLOCK-SEQUENCE-START" 43 + | Block_mapping_start -> Format.fprintf fmt "BLOCK-MAPPING-START" 44 + | Block_entry -> Format.fprintf fmt "BLOCK-ENTRY" 45 + | Block_end -> Format.fprintf fmt "BLOCK-END" 46 + | Flow_sequence_start -> Format.fprintf fmt "FLOW-SEQUENCE-START" 47 + | Flow_sequence_end -> Format.fprintf fmt "FLOW-SEQUENCE-END" 48 + | Flow_mapping_start -> Format.fprintf fmt "FLOW-MAPPING-START" 49 + | Flow_mapping_end -> Format.fprintf fmt "FLOW-MAPPING-END" 50 + | Flow_entry -> Format.fprintf fmt "FLOW-ENTRY" 51 + | Key -> Format.fprintf fmt "KEY" 52 + | Value -> Format.fprintf fmt "VALUE" 53 + | Anchor name -> Format.fprintf fmt "ANCHOR(%s)" name 54 + | Alias name -> Format.fprintf fmt "ALIAS(%s)" name 55 + | Tag { handle; suffix } -> Format.fprintf fmt "TAG(%s, %s)" handle suffix 56 | Scalar { style; value } -> 57 Format.fprintf fmt "SCALAR(%a, %S)" Scalar_style.pp style value 58
+38 -66
lib/unix/yamlrw_unix.ml
··· 5 6 (** Yamlrw Unix - Channel and file I/O for YAML 7 8 - This module provides channel and file operations for parsing 9 - and emitting YAML using bytesrw for efficient streaming I/O. *) 10 11 open Bytesrw 12 open Yamlrw ··· 19 20 (** {1 Channel Input} *) 21 22 - let value_of_channel 23 - ?(resolve_aliases = true) 24 ?(max_nodes = Yaml.default_max_alias_nodes) 25 - ?(max_depth = Yaml.default_max_alias_depth) 26 - ic = 27 let reader = Bytes.Reader.of_in_channel ic in 28 Loader.value_of_reader ~resolve_aliases ~max_nodes ~max_depth reader 29 30 - let yaml_of_channel 31 - ?(resolve_aliases = false) 32 ?(max_nodes = Yaml.default_max_alias_nodes) 33 - ?(max_depth = Yaml.default_max_alias_depth) 34 - ic = 35 let reader = Bytes.Reader.of_in_channel ic in 36 Loader.yaml_of_reader ~resolve_aliases ~max_nodes ~max_depth reader 37 ··· 41 42 (** {1 Channel Output} *) 43 44 - let value_to_channel 45 - ?(encoding = `Utf8) 46 - ?(scalar_style = `Any) 47 - ?(layout_style = `Any) 48 - oc 49 - (v : value) = 50 - let config = { Emitter.default_config with encoding; scalar_style; layout_style } in 51 let writer = Bytes.Writer.of_out_channel oc in 52 Serialize.value_to_writer ~config writer v 53 54 - let yaml_to_channel 55 - ?(encoding = `Utf8) 56 - ?(scalar_style = `Any) 57 - ?(layout_style = `Any) 58 - oc 59 - (v : yaml) = 60 - let config = { Emitter.default_config with encoding; scalar_style; layout_style } in 61 let writer = Bytes.Writer.of_out_channel oc in 62 Serialize.yaml_to_writer ~config writer v 63 64 - let documents_to_channel 65 - ?(encoding = `Utf8) 66 - ?(scalar_style = `Any) 67 - ?(layout_style = `Any) 68 - ?(resolve_aliases = true) 69 - oc 70 - docs = 71 - let config = { Emitter.default_config with encoding; scalar_style; layout_style } in 72 let writer = Bytes.Writer.of_out_channel oc in 73 Serialize.documents_to_writer ~config ~resolve_aliases writer docs 74 75 (** {1 File Input} *) 76 77 - let value_of_file 78 - ?(resolve_aliases = true) 79 ?(max_nodes = Yaml.default_max_alias_nodes) 80 - ?(max_depth = Yaml.default_max_alias_depth) 81 - path = 82 In_channel.with_open_bin path (fun ic -> 83 - value_of_channel ~resolve_aliases ~max_nodes ~max_depth ic) 84 85 - let yaml_of_file 86 - ?(resolve_aliases = false) 87 ?(max_nodes = Yaml.default_max_alias_nodes) 88 - ?(max_depth = Yaml.default_max_alias_depth) 89 - path = 90 In_channel.with_open_bin path (fun ic -> 91 - yaml_of_channel ~resolve_aliases ~max_nodes ~max_depth ic) 92 93 - let documents_of_file path = 94 - In_channel.with_open_bin path documents_of_channel 95 96 (** {1 File Output} *) 97 98 - let value_to_file 99 - ?(encoding = `Utf8) 100 - ?(scalar_style = `Any) 101 - ?(layout_style = `Any) 102 - path 103 - v = 104 Out_channel.with_open_bin path (fun oc -> 105 - value_to_channel ~encoding ~scalar_style ~layout_style oc v) 106 107 - let yaml_to_file 108 - ?(encoding = `Utf8) 109 - ?(scalar_style = `Any) 110 - ?(layout_style = `Any) 111 - path 112 - v = 113 Out_channel.with_open_bin path (fun oc -> 114 - yaml_to_channel ~encoding ~scalar_style ~layout_style oc v) 115 116 - let documents_to_file 117 - ?(encoding = `Utf8) 118 - ?(scalar_style = `Any) 119 - ?(layout_style = `Any) 120 - ?(resolve_aliases = true) 121 - path 122 - docs = 123 Out_channel.with_open_bin path (fun oc -> 124 - documents_to_channel ~encoding ~scalar_style ~layout_style ~resolve_aliases oc docs)
··· 5 6 (** Yamlrw Unix - Channel and file I/O for YAML 7 8 + This module provides channel and file operations for parsing and emitting 9 + YAML using bytesrw for efficient streaming I/O. *) 10 11 open Bytesrw 12 open Yamlrw ··· 19 20 (** {1 Channel Input} *) 21 22 + let value_of_channel ?(resolve_aliases = true) 23 ?(max_nodes = Yaml.default_max_alias_nodes) 24 + ?(max_depth = Yaml.default_max_alias_depth) ic = 25 let reader = Bytes.Reader.of_in_channel ic in 26 Loader.value_of_reader ~resolve_aliases ~max_nodes ~max_depth reader 27 28 + let yaml_of_channel ?(resolve_aliases = false) 29 ?(max_nodes = Yaml.default_max_alias_nodes) 30 + ?(max_depth = Yaml.default_max_alias_depth) ic = 31 let reader = Bytes.Reader.of_in_channel ic in 32 Loader.yaml_of_reader ~resolve_aliases ~max_nodes ~max_depth reader 33 ··· 37 38 (** {1 Channel Output} *) 39 40 + let value_to_channel ?(encoding = `Utf8) ?(scalar_style = `Any) 41 + ?(layout_style = `Any) oc (v : value) = 42 + let config = 43 + { Emitter.default_config with encoding; scalar_style; layout_style } 44 + in 45 let writer = Bytes.Writer.of_out_channel oc in 46 Serialize.value_to_writer ~config writer v 47 48 + let yaml_to_channel ?(encoding = `Utf8) ?(scalar_style = `Any) 49 + ?(layout_style = `Any) oc (v : yaml) = 50 + let config = 51 + { Emitter.default_config with encoding; scalar_style; layout_style } 52 + in 53 let writer = Bytes.Writer.of_out_channel oc in 54 Serialize.yaml_to_writer ~config writer v 55 56 + let documents_to_channel ?(encoding = `Utf8) ?(scalar_style = `Any) 57 + ?(layout_style = `Any) ?(resolve_aliases = true) oc docs = 58 + let config = 59 + { Emitter.default_config with encoding; scalar_style; layout_style } 60 + in 61 let writer = Bytes.Writer.of_out_channel oc in 62 Serialize.documents_to_writer ~config ~resolve_aliases writer docs 63 64 (** {1 File Input} *) 65 66 + let value_of_file ?(resolve_aliases = true) 67 ?(max_nodes = Yaml.default_max_alias_nodes) 68 + ?(max_depth = Yaml.default_max_alias_depth) path = 69 In_channel.with_open_bin path (fun ic -> 70 + value_of_channel ~resolve_aliases ~max_nodes ~max_depth ic) 71 72 + let yaml_of_file ?(resolve_aliases = false) 73 ?(max_nodes = Yaml.default_max_alias_nodes) 74 + ?(max_depth = Yaml.default_max_alias_depth) path = 75 In_channel.with_open_bin path (fun ic -> 76 + yaml_of_channel ~resolve_aliases ~max_nodes ~max_depth ic) 77 78 + let documents_of_file path = In_channel.with_open_bin path documents_of_channel 79 80 (** {1 File Output} *) 81 82 + let value_to_file ?(encoding = `Utf8) ?(scalar_style = `Any) 83 + ?(layout_style = `Any) path v = 84 Out_channel.with_open_bin path (fun oc -> 85 + value_to_channel ~encoding ~scalar_style ~layout_style oc v) 86 87 + let yaml_to_file ?(encoding = `Utf8) ?(scalar_style = `Any) 88 + ?(layout_style = `Any) path v = 89 Out_channel.with_open_bin path (fun oc -> 90 + yaml_to_channel ~encoding ~scalar_style ~layout_style oc v) 91 92 + let documents_to_file ?(encoding = `Utf8) ?(scalar_style = `Any) 93 + ?(layout_style = `Any) ?(resolve_aliases = true) path docs = 94 Out_channel.with_open_bin path (fun oc -> 95 + documents_to_channel ~encoding ~scalar_style ~layout_style 96 + ~resolve_aliases oc docs)
+4 -12
lib/unix/yamlrw_unix.mli
··· 5 6 (** Yamlrw Unix - Channel and file I/O for YAML 7 8 - This module provides channel and file operations for parsing 9 - and emitting YAML using bytesrw for efficient streaming I/O. *) 10 11 (** {1 Types} *) 12 ··· 76 (** {1 File Input} *) 77 78 val value_of_file : 79 - ?resolve_aliases:bool -> 80 - ?max_nodes:int -> 81 - ?max_depth:int -> 82 - string -> 83 - value 84 (** Parse a JSON-compatible value from a file. *) 85 86 val yaml_of_file : 87 - ?resolve_aliases:bool -> 88 - ?max_nodes:int -> 89 - ?max_depth:int -> 90 - string -> 91 - yaml 92 (** Parse a full YAML value from a file. *) 93 94 val documents_of_file : string -> document list
··· 5 6 (** Yamlrw Unix - Channel and file I/O for YAML 7 8 + This module provides channel and file operations for parsing and emitting 9 + YAML using bytesrw for efficient streaming I/O. *) 10 11 (** {1 Types} *) 12 ··· 76 (** {1 File Input} *) 77 78 val value_of_file : 79 + ?resolve_aliases:bool -> ?max_nodes:int -> ?max_depth:int -> string -> value 80 (** Parse a JSON-compatible value from a file. *) 81 82 val yaml_of_file : 83 + ?resolve_aliases:bool -> ?max_nodes:int -> ?max_depth:int -> string -> yaml 84 (** Parse a full YAML value from a file. *) 85 86 val documents_of_file : string -> document list
+16 -20
lib/value.ml
··· 5 6 (** JSON-compatible YAML value representation *) 7 8 - type t = [ 9 - | `Null 10 | `Bool of bool 11 | `Float of float 12 | `String of string 13 | `A of t list 14 - | `O of (string * t) list 15 - ] 16 17 (* Type equality is ensured by structural compatibility with Yamlrw.value *) 18 ··· 23 let int n : t = `Float (Float.of_int n) 24 let float f : t = `Float f 25 let string s : t = `String s 26 - 27 let list f xs : t = `A (List.map f xs) 28 let obj pairs : t = `O pairs 29 ··· 72 | `O pairs -> List.exists (fun (k, _) -> k = key) pairs 73 | _ -> false 74 75 - let find key = function 76 - | `O pairs -> List.assoc_opt key pairs 77 - | _ -> None 78 79 let get key v = 80 - match find key v with 81 - | Some v -> v 82 - | None -> Error.raise (Key_not_found key) 83 84 let keys = function 85 | `O pairs -> List.map fst pairs ··· 92 (** Combinators *) 93 94 let combine v1 v2 = 95 - match v1, v2 with 96 | `O o1, `O o2 -> `O (o1 @ o2) 97 | v1, _ -> Error.raise (Type_mismatch ("object", type_name v1)) 98 ··· 113 | `Float f -> 114 if Float.is_integer f && Float.abs f < 1e15 then 115 Format.fprintf fmt "%.0f" f 116 - else 117 - Format.fprintf fmt "%g" f 118 | `String s -> Format.fprintf fmt "%S" s 119 | `A [] -> Format.pp_print_string fmt "[]" 120 | `A items -> 121 Format.fprintf fmt "@[<hv 2>[@,%a@]@,]" 122 - (Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") pp) 123 items 124 | `O [] -> Format.pp_print_string fmt "{}" 125 | `O pairs -> 126 Format.fprintf fmt "@[<hv 2>{@,%a@]@,}" 127 - (Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") 128 (fun fmt (k, v) -> Format.fprintf fmt "@[<hv 2>%S:@ %a@]" k pp v)) 129 pairs 130 131 (** Equality and comparison *) 132 133 let rec equal (a : t) (b : t) = 134 - match a, b with 135 | `Null, `Null -> true 136 | `Bool a, `Bool b -> a = b 137 | `Float a, `Float b -> Float.equal a b 138 | `String a, `String b -> String.equal a b 139 | `A a, `A b -> List.equal equal a b 140 | `O a, `O b -> 141 - List.length a = List.length b && 142 - List.for_all2 (fun (k1, v1) (k2, v2) -> k1 = k2 && equal v1 v2) a b 143 | _ -> false 144 145 let rec compare (a : t) (b : t) = 146 - match a, b with 147 | `Null, `Null -> 0 148 | `Null, _ -> -1 149 | _, `Null -> 1
··· 5 6 (** JSON-compatible YAML value representation *) 7 8 + type t = 9 + [ `Null 10 | `Bool of bool 11 | `Float of float 12 | `String of string 13 | `A of t list 14 + | `O of (string * t) list ] 15 16 (* Type equality is ensured by structural compatibility with Yamlrw.value *) 17 ··· 22 let int n : t = `Float (Float.of_int n) 23 let float f : t = `Float f 24 let string s : t = `String s 25 let list f xs : t = `A (List.map f xs) 26 let obj pairs : t = `O pairs 27 ··· 70 | `O pairs -> List.exists (fun (k, _) -> k = key) pairs 71 | _ -> false 72 73 + let find key = function `O pairs -> List.assoc_opt key pairs | _ -> None 74 75 let get key v = 76 + match find key v with Some v -> v | None -> Error.raise (Key_not_found key) 77 78 let keys = function 79 | `O pairs -> List.map fst pairs ··· 86 (** Combinators *) 87 88 let combine v1 v2 = 89 + match (v1, v2) with 90 | `O o1, `O o2 -> `O (o1 @ o2) 91 | v1, _ -> Error.raise (Type_mismatch ("object", type_name v1)) 92 ··· 107 | `Float f -> 108 if Float.is_integer f && Float.abs f < 1e15 then 109 Format.fprintf fmt "%.0f" f 110 + else Format.fprintf fmt "%g" f 111 | `String s -> Format.fprintf fmt "%S" s 112 | `A [] -> Format.pp_print_string fmt "[]" 113 | `A items -> 114 Format.fprintf fmt "@[<hv 2>[@,%a@]@,]" 115 + (Format.pp_print_list 116 + ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") 117 + pp) 118 items 119 | `O [] -> Format.pp_print_string fmt "{}" 120 | `O pairs -> 121 Format.fprintf fmt "@[<hv 2>{@,%a@]@,}" 122 + (Format.pp_print_list 123 + ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") 124 (fun fmt (k, v) -> Format.fprintf fmt "@[<hv 2>%S:@ %a@]" k pp v)) 125 pairs 126 127 (** Equality and comparison *) 128 129 let rec equal (a : t) (b : t) = 130 + match (a, b) with 131 | `Null, `Null -> true 132 | `Bool a, `Bool b -> a = b 133 | `Float a, `Float b -> Float.equal a b 134 | `String a, `String b -> String.equal a b 135 | `A a, `A b -> List.equal equal a b 136 | `O a, `O b -> 137 + List.length a = List.length b 138 + && List.for_all2 (fun (k1, v1) (k2, v2) -> k1 = k2 && equal v1 v2) a b 139 | _ -> false 140 141 let rec compare (a : t) (b : t) = 142 + match (a, b) with 143 | `Null, `Null -> 0 144 | `Null, _ -> -1 145 | _, `Null -> 1
+115 -102
lib/yaml.ml
··· 5 6 (** Full YAML representation with anchors, tags, and aliases *) 7 8 - type t = [ 9 - | `Scalar of Scalar.t 10 | `Alias of string 11 | `A of t Sequence.t 12 - | `O of (t, t) Mapping.t 13 - ] 14 15 (** Pretty printing *) 16 ··· 24 (** Equality *) 25 26 let rec equal (a : t) (b : t) = 27 - match a, b with 28 | `Scalar a, `Scalar b -> Scalar.equal a b 29 | `Alias a, `Alias b -> String.equal a b 30 | `A a, `A b -> Sequence.equal equal a b ··· 40 | `Bool false -> `Scalar (Scalar.make "false") 41 | `Float f -> 42 let s = 43 - if Float.is_integer f && Float.abs f < 1e15 then 44 - Printf.sprintf "%.0f" f 45 - else 46 - Printf.sprintf "%g" f 47 in 48 `Scalar (Scalar.make s) 49 - | `String s -> 50 - `Scalar (Scalar.make s ~style:`Double_quoted) 51 - | `A items -> 52 - `A (Sequence.make (List.map of_value items)) 53 | `O pairs -> 54 - `O (Mapping.make (List.map (fun (k, v) -> 55 - (`Scalar (Scalar.make k), of_value v) 56 - ) pairs)) 57 58 - (** Default limits for alias expansion (protection against billion laughs attack) *) 59 let default_max_alias_nodes = 10_000_000 60 let default_max_alias_depth = 100 61 62 (** Resolve aliases by replacing them with referenced nodes. 63 64 - Processes the tree in document order so that aliases resolve to the 65 - anchor value that was defined at the point the alias was encountered. 66 67 - See {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 68 - (Anchors and Aliases)} of the YAML 1.2.2 specification for details on 69 - how anchors and aliases work in YAML. 70 71 - This implements protection against the "billion laughs attack" 72 - (see {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}) 73 - by limiting both the total number of nodes and the nesting depth during expansion. 74 75 - @param max_nodes Maximum number of nodes to create during expansion (default 10M) 76 - @param max_depth Maximum depth of alias-within-alias resolution (default 100) 77 - @raise Error.Yamlrw_error with {!type:Error.kind} [Alias_expansion_node_limit] if max_nodes is exceeded 78 - @raise Error.Yamlrw_error with {!type:Error.kind} [Alias_expansion_depth_limit] if max_depth is exceeded 79 - *) 80 - let resolve_aliases ?(max_nodes = default_max_alias_nodes) ?(max_depth = default_max_alias_depth) (root : t) : t = 81 let anchors = Hashtbl.create 16 in 82 let node_count = ref 0 in 83 ··· 103 need expansion if it was registered before those anchors existed *) 104 resolve ~depth:(depth + 1) target 105 | None -> Error.raise (Undefined_alias name) 106 - 107 (* Single pass: process in document order, registering anchors and resolving aliases *) 108 and resolve ~depth (v : t) : t = 109 check_node_limit (); ··· 112 (* Register anchor after we have the resolved node *) 113 Option.iter (fun name -> register_anchor name v) (Scalar.anchor s); 114 v 115 - | `Alias name -> 116 - expand_alias ~depth name 117 | `A seq -> 118 (* First resolve all members in order *) 119 - let resolved_members = List.map (resolve ~depth) (Sequence.members seq) in 120 - let resolved = `A (Sequence.make 121 - ?anchor:(Sequence.anchor seq) 122 - ?tag:(Sequence.tag seq) 123 - ~implicit:(Sequence.implicit seq) 124 - ~style:(Sequence.style seq) 125 - resolved_members) in 126 (* Register anchor with resolved node *) 127 Option.iter (fun name -> register_anchor name resolved) (Sequence.anchor seq); 128 resolved 129 | `O map -> 130 (* Process key-value pairs in document order *) 131 - let resolved_pairs = List.map (fun (k, v) -> 132 - let resolved_k = resolve ~depth k in 133 - let resolved_v = resolve ~depth v in 134 - (resolved_k, resolved_v) 135 - ) (Mapping.members map) in 136 - let resolved = `O (Mapping.make 137 - ?anchor:(Mapping.anchor map) 138 - ?tag:(Mapping.tag map) 139 - ~implicit:(Mapping.implicit map) 140 - ~style:(Mapping.style map) 141 - resolved_pairs) in 142 (* Register anchor with resolved node *) 143 Option.iter (fun name -> register_anchor name resolved) (Mapping.anchor map); 144 resolved ··· 153 154 (* If explicitly tagged, respect the tag *) 155 match tag with 156 - | Some "tag:yaml.org,2002:null" | Some "!!null" -> 157 - `Null 158 - | Some "tag:yaml.org,2002:bool" | Some "!!bool" -> 159 - (match String.lowercase_ascii value with 160 - | "true" | "yes" | "on" -> `Bool true 161 - | "false" | "no" | "off" -> `Bool false 162 - | _ -> Error.raise (Invalid_scalar_conversion (value, "bool"))) 163 - | Some "tag:yaml.org,2002:int" | Some "!!int" -> 164 - (try `Float (Float.of_string value) 165 - with _ -> Error.raise (Invalid_scalar_conversion (value, "int"))) 166 - | Some "tag:yaml.org,2002:float" | Some "!!float" -> 167 - (try `Float (Float.of_string value) 168 - with _ -> Error.raise (Invalid_scalar_conversion (value, "float"))) 169 - | Some "tag:yaml.org,2002:str" | Some "!!str" -> 170 - `String value 171 | Some _ -> 172 (* Unknown tag - treat as string *) 173 `String value 174 | None -> 175 (* Implicit type resolution for plain scalars *) 176 - if style <> `Plain then 177 - `String value 178 - else 179 - infer_scalar_type value 180 181 (** Infer type from plain scalar value *) 182 and infer_scalar_type value = ··· 208 else if (first = '-' || first = '+') && len >= 2 then 209 let second = value.[1] in 210 (* After sign, must be digit or dot-digit (for +.5, -.5) *) 211 - second >= '0' && second <= '9' || 212 - (second = '.' && len >= 3 && value.[2] >= '0' && value.[2] <= '9') 213 else false 214 in 215 (* Try integer/float *) ··· 231 | _ -> 232 (* Decimal with leading zero or octal in YAML 1.1 *) 233 Some (`Float (Float.of_string value)) 234 - else 235 - Some (`Float (Float.of_string value)) 236 with _ -> None 237 else None 238 in ··· 244 OCaml accepts "nan", "inf", "infinity" which are NOT valid YAML floats. 245 YAML requires the leading dot: .nan, .inf, -.inf 246 See: https://github.com/avsm/ocaml-yaml/issues/82 *) 247 - if String.length value >= 2 && value.[0] = '.' && 248 - value.[1] >= '0' && value.[1] <= '9' then 249 - try `Float (Float.of_string value) 250 - with _ -> `String value 251 - else 252 - `String value 253 254 (** Convert to JSON-compatible Value. 255 256 - Converts a full YAML representation to a simplified JSON-compatible value type. 257 - This process implements the representation graph to serialization tree conversion 258 - described in {{:https://yaml.org/spec/1.2.2/#32-processes}Section 3.2 (Processes)} 259 - of the YAML 1.2.2 specification. 260 261 - See also {{:https://yaml.org/spec/1.2.2/#10212-json-schema}Section 10.2.1.2 262 - (JSON Schema)} for the tag resolution used during conversion. 263 264 - @param resolve_aliases_first Whether to resolve aliases before conversion (default true) 265 @param max_nodes Maximum nodes during alias expansion (default 10M) 266 @param max_depth Maximum alias nesting depth (default 100) 267 - @raise Error.Yamlrw_error with {!type:Error.kind} [Unresolved_alias] if resolve_aliases_first is false and an alias is encountered 268 - *) 269 - let to_value 270 - ?(resolve_aliases_first = true) 271 ?(max_nodes = default_max_alias_nodes) 272 - ?(max_depth = default_max_alias_depth) 273 - (v : t) : Value.t = 274 - let v = if resolve_aliases_first then resolve_aliases ~max_nodes ~max_depth v else v in 275 let rec convert (v : t) : Value.t = 276 match v with 277 | `Scalar s -> scalar_to_value s 278 | `Alias name -> Error.raise (Unresolved_alias name) 279 | `A seq -> `A (List.map convert (Sequence.members seq)) 280 | `O map -> 281 - `O (List.map (fun (k, v) -> 282 - let key = match k with 283 - | `Scalar s -> Scalar.value s 284 - | _ -> Error.raise (Type_mismatch ("string key", "complex key")) 285 - in 286 - (key, convert v) 287 - ) (Mapping.members map)) 288 in 289 convert v 290
··· 5 6 (** Full YAML representation with anchors, tags, and aliases *) 7 8 + type t = 9 + [ `Scalar of Scalar.t 10 | `Alias of string 11 | `A of t Sequence.t 12 + | `O of (t, t) Mapping.t ] 13 14 (** Pretty printing *) 15 ··· 23 (** Equality *) 24 25 let rec equal (a : t) (b : t) = 26 + match (a, b) with 27 | `Scalar a, `Scalar b -> Scalar.equal a b 28 | `Alias a, `Alias b -> String.equal a b 29 | `A a, `A b -> Sequence.equal equal a b ··· 39 | `Bool false -> `Scalar (Scalar.make "false") 40 | `Float f -> 41 let s = 42 + if Float.is_integer f && Float.abs f < 1e15 then Printf.sprintf "%.0f" f 43 + else Printf.sprintf "%g" f 44 in 45 `Scalar (Scalar.make s) 46 + | `String s -> `Scalar (Scalar.make s ~style:`Double_quoted) 47 + | `A items -> `A (Sequence.make (List.map of_value items)) 48 | `O pairs -> 49 + `O 50 + (Mapping.make 51 + (List.map 52 + (fun (k, v) -> (`Scalar (Scalar.make k), of_value v)) 53 + pairs)) 54 55 + (** Default limits for alias expansion (protection against billion laughs 56 + attack) *) 57 let default_max_alias_nodes = 10_000_000 58 + 59 let default_max_alias_depth = 100 60 61 (** Resolve aliases by replacing them with referenced nodes. 62 63 + Processes the tree in document order so that aliases resolve to the anchor 64 + value that was defined at the point the alias was encountered. 65 66 + See 67 + {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 68 + (Anchors and Aliases)} of the YAML 1.2.2 specification for details on how 69 + anchors and aliases work in YAML. 70 71 + This implements protection against the "billion laughs attack" (see 72 + {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}) by 73 + limiting both the total number of nodes and the nesting depth during 74 + expansion. 75 76 + @param max_nodes 77 + Maximum number of nodes to create during expansion (default 10M) 78 + @param max_depth 79 + Maximum depth of alias-within-alias resolution (default 100) 80 + @raise Error.Yamlrw_error 81 + with {!type:Error.kind} [Alias_expansion_node_limit] if max_nodes is 82 + exceeded 83 + @raise Error.Yamlrw_error 84 + with {!type:Error.kind} [Alias_expansion_depth_limit] if max_depth is 85 + exceeded *) 86 + let resolve_aliases ?(max_nodes = default_max_alias_nodes) 87 + ?(max_depth = default_max_alias_depth) (root : t) : t = 88 let anchors = Hashtbl.create 16 in 89 let node_count = ref 0 in 90 ··· 110 need expansion if it was registered before those anchors existed *) 111 resolve ~depth:(depth + 1) target 112 | None -> Error.raise (Undefined_alias name) 113 (* Single pass: process in document order, registering anchors and resolving aliases *) 114 and resolve ~depth (v : t) : t = 115 check_node_limit (); ··· 118 (* Register anchor after we have the resolved node *) 119 Option.iter (fun name -> register_anchor name v) (Scalar.anchor s); 120 v 121 + | `Alias name -> expand_alias ~depth name 122 | `A seq -> 123 (* First resolve all members in order *) 124 + let resolved_members = 125 + List.map (resolve ~depth) (Sequence.members seq) 126 + in 127 + let resolved = 128 + `A 129 + (Sequence.make ?anchor:(Sequence.anchor seq) ?tag:(Sequence.tag seq) 130 + ~implicit:(Sequence.implicit seq) ~style:(Sequence.style seq) 131 + resolved_members) 132 + in 133 (* Register anchor with resolved node *) 134 Option.iter (fun name -> register_anchor name resolved) (Sequence.anchor seq); 135 resolved 136 | `O map -> 137 (* Process key-value pairs in document order *) 138 + let resolved_pairs = 139 + List.map 140 + (fun (k, v) -> 141 + let resolved_k = resolve ~depth k in 142 + let resolved_v = resolve ~depth v in 143 + (resolved_k, resolved_v)) 144 + (Mapping.members map) 145 + in 146 + let resolved = 147 + `O 148 + (Mapping.make ?anchor:(Mapping.anchor map) ?tag:(Mapping.tag map) 149 + ~implicit:(Mapping.implicit map) ~style:(Mapping.style map) 150 + resolved_pairs) 151 + in 152 (* Register anchor with resolved node *) 153 Option.iter (fun name -> register_anchor name resolved) (Mapping.anchor map); 154 resolved ··· 163 164 (* If explicitly tagged, respect the tag *) 165 match tag with 166 + | Some "tag:yaml.org,2002:null" | Some "!!null" -> `Null 167 + | Some "tag:yaml.org,2002:bool" | Some "!!bool" -> ( 168 + match String.lowercase_ascii value with 169 + | "true" | "yes" | "on" -> `Bool true 170 + | "false" | "no" | "off" -> `Bool false 171 + | _ -> Error.raise (Invalid_scalar_conversion (value, "bool"))) 172 + | Some "tag:yaml.org,2002:int" | Some "!!int" -> ( 173 + try `Float (Float.of_string value) 174 + with _ -> Error.raise (Invalid_scalar_conversion (value, "int"))) 175 + | Some "tag:yaml.org,2002:float" | Some "!!float" -> ( 176 + try `Float (Float.of_string value) 177 + with _ -> Error.raise (Invalid_scalar_conversion (value, "float"))) 178 + | Some "tag:yaml.org,2002:str" | Some "!!str" -> `String value 179 | Some _ -> 180 (* Unknown tag - treat as string *) 181 `String value 182 | None -> 183 (* Implicit type resolution for plain scalars *) 184 + if style <> `Plain then `String value else infer_scalar_type value 185 186 (** Infer type from plain scalar value *) 187 and infer_scalar_type value = ··· 213 else if (first = '-' || first = '+') && len >= 2 then 214 let second = value.[1] in 215 (* After sign, must be digit or dot-digit (for +.5, -.5) *) 216 + (second >= '0' && second <= '9') 217 + || (second = '.' && len >= 3 && value.[2] >= '0' && value.[2] <= '9') 218 else false 219 in 220 (* Try integer/float *) ··· 236 | _ -> 237 (* Decimal with leading zero or octal in YAML 1.1 *) 238 Some (`Float (Float.of_string value)) 239 + else Some (`Float (Float.of_string value)) 240 with _ -> None 241 else None 242 in ··· 248 OCaml accepts "nan", "inf", "infinity" which are NOT valid YAML floats. 249 YAML requires the leading dot: .nan, .inf, -.inf 250 See: https://github.com/avsm/ocaml-yaml/issues/82 *) 251 + if 252 + String.length value >= 2 253 + && value.[0] = '.' 254 + && value.[1] >= '0' 255 + && value.[1] <= '9' 256 + then try `Float (Float.of_string value) with _ -> `String value 257 + else `String value 258 259 (** Convert to JSON-compatible Value. 260 261 + Converts a full YAML representation to a simplified JSON-compatible value 262 + type. This process implements the representation graph to serialization tree 263 + conversion described in 264 + {{:https://yaml.org/spec/1.2.2/#32-processes}Section 3.2 (Processes)} of the 265 + YAML 1.2.2 specification. 266 267 + See also 268 + {{:https://yaml.org/spec/1.2.2/#10212-json-schema}Section 10.2.1.2 (JSON 269 + Schema)} for the tag resolution used during conversion. 270 271 + @param resolve_aliases_first 272 + Whether to resolve aliases before conversion (default true) 273 @param max_nodes Maximum nodes during alias expansion (default 10M) 274 @param max_depth Maximum alias nesting depth (default 100) 275 + @raise Error.Yamlrw_error 276 + with {!type:Error.kind} [Unresolved_alias] if resolve_aliases_first is 277 + false and an alias is encountered *) 278 + let to_value ?(resolve_aliases_first = true) 279 ?(max_nodes = default_max_alias_nodes) 280 + ?(max_depth = default_max_alias_depth) (v : t) : Value.t = 281 + let v = 282 + if resolve_aliases_first then resolve_aliases ~max_nodes ~max_depth v else v 283 + in 284 let rec convert (v : t) : Value.t = 285 match v with 286 | `Scalar s -> scalar_to_value s 287 | `Alias name -> Error.raise (Unresolved_alias name) 288 | `A seq -> `A (List.map convert (Sequence.members seq)) 289 | `O map -> 290 + `O 291 + (List.map 292 + (fun (k, v) -> 293 + let key = 294 + match k with 295 + | `Scalar s -> Scalar.value s 296 + | _ -> 297 + Error.raise (Type_mismatch ("string key", "complex key")) 298 + in 299 + (key, convert v)) 300 + (Mapping.members map)) 301 in 302 convert v 303
+183 -253
lib/yamlrw.ml
··· 11 12 exception Yamlrw_error = Error.Yamlrw_error 13 14 - 15 (** {2 Core Types} *) 16 17 (** JSON-compatible YAML representation. Use this for simple data interchange. 18 19 This type is structurally equivalent to {!Value.t} and compatible with the 20 - ezjsonm representation. For additional operations, see {!Value} and {!Util}. *) 21 - type value = [ 22 - | `Null (** YAML null, ~, or empty values *) 23 - | `Bool of bool (** YAML booleans (true, false, yes, no, on, off) *) 24 - | `Float of float (** All YAML numbers (integers stored as floats) *) 25 - | `String of string (** YAML strings *) 26 - | `A of value list (** YAML sequences/arrays *) 27 - | `O of (string * value) list (** YAML mappings/objects with string keys *) 28 - ] 29 30 (** Full YAML representation preserving anchors, tags, and aliases. 31 32 This type is structurally equivalent to {!Yaml.t}. Use this when you need ··· 34 type tags for custom types, scalar styles (plain, quoted, literal, folded), 35 and collection styles (block vs flow). 36 37 - For additional operations, see {!Yaml}, {!Scalar}, {!Sequence}, and {!Mapping}. *) 38 - type yaml = [ 39 - | `Scalar of Scalar.t (** YAML scalar value with style and metadata *) 40 - | `Alias of string (** Alias reference to an anchored node *) 41 - | `A of yaml Sequence.t (** YAML sequence with style and metadata *) 42 - | `O of (yaml, yaml) Mapping.t (** YAML mapping with style and metadata *) 43 - ] 44 45 (** A YAML document with directives and metadata. 46 47 This type is structurally equivalent to {!Document.t}. A YAML stream can 48 contain multiple documents, each separated by document markers. 49 50 For additional operations, see {!Document}. *) 51 - type document = { 52 - version : (int * int) option; (** Optional YAML version directive (e.g., (1, 2) for YAML 1.2) *) 53 - tags : (string * string) list; (** TAG directives mapping handles to prefixes *) 54 - root : yaml option; (** Root content of the document *) 55 - implicit_start : bool; (** Whether the document start marker (---) is implicit *) 56 - implicit_end : bool; (** Whether the document end marker (...) is implicit *) 57 - } 58 - 59 60 (** {2 Character Encoding} *) 61 62 module Encoding = Encoding 63 - 64 65 (** {2 Parsing} *) 66 ··· 72 (** Default maximum alias nesting depth (100). *) 73 let default_max_alias_depth = Yaml.default_max_alias_depth 74 75 - let of_string 76 - ?(resolve_aliases = true) 77 - ?(max_nodes = default_max_alias_nodes) 78 - ?(max_depth = default_max_alias_depth) 79 - s : value = 80 - (Loader.value_of_string ~resolve_aliases ~max_nodes ~max_depth s :> value) 81 (** Parse a YAML string into a JSON-compatible value. 82 83 @param resolve_aliases Whether to expand aliases (default: true) 84 @param max_nodes Maximum nodes during alias expansion (default: 10M) 85 @param max_depth Maximum alias nesting depth (default: 100) 86 @raise Yamlrw_error on parse error or if multiple documents found *) 87 88 - let yaml_of_string 89 - ?(resolve_aliases = false) 90 - ?(max_nodes = default_max_alias_nodes) 91 - ?(max_depth = default_max_alias_depth) 92 - s : yaml = 93 - (Loader.yaml_of_string ~resolve_aliases ~max_nodes ~max_depth s :> yaml) 94 (** Parse a YAML string preserving full YAML metadata (anchors, tags, etc). 95 96 By default, aliases are NOT resolved, preserving the document structure. ··· 99 @param max_nodes Maximum nodes during alias expansion (default: 10M) 100 @param max_depth Maximum alias nesting depth (default: 100) 101 @raise Yamlrw_error on parse error or if multiple documents found *) 102 103 - let documents_of_string s : document list = 104 - let docs = Loader.documents_of_string s in 105 - List.map (fun (d : Document.t) : document -> { 106 - version = d.version; 107 - tags = d.tags; 108 - root = (d.root :> yaml option); 109 - implicit_start = d.implicit_start; 110 - implicit_end = d.implicit_end; 111 - }) docs 112 (** Parse a multi-document YAML stream. 113 114 - Use this when your YAML input contains multiple documents separated 115 - by document markers (---). 116 117 @raise Yamlrw_error on parse error *) 118 - 119 120 (** {2 Formatting Styles} *) 121 122 module Scalar_style = Scalar_style 123 - 124 module Layout_style = Layout_style 125 126 - 127 (** {2 Serialization} *) 128 129 let make_config ~encoding ~scalar_style ~layout_style = 130 { Emitter.default_config with encoding; scalar_style; layout_style } 131 132 - let to_buffer 133 - ?(encoding = `Utf8) 134 - ?(scalar_style = `Any) 135 - ?(layout_style = `Any) 136 - ?buffer 137 - (value : value) = 138 - let config = make_config ~encoding ~scalar_style ~layout_style in 139 - Serialize.value_to_buffer ~config ?buffer (value :> Value.t) 140 (** Serialize a value to a buffer. 141 142 @param encoding Output encoding (default: UTF-8) 143 @param scalar_style Preferred scalar style (default: Any) 144 @param layout_style Preferred layout style (default: Any) 145 - @param buffer Optional buffer to append to (allocates new one if not provided) 146 @return The buffer containing the serialized YAML *) 147 148 - let to_string 149 - ?(encoding = `Utf8) 150 - ?(scalar_style = `Any) 151 - ?(layout_style = `Any) 152 - (value : value) = 153 - Buffer.contents (to_buffer ~encoding ~scalar_style ~layout_style value) 154 (** Serialize a value to a YAML string. 155 156 @param encoding Output encoding (default: UTF-8) 157 @param scalar_style Preferred scalar style (default: Any) 158 @param layout_style Preferred layout style (default: Any) *) 159 160 - let yaml_to_buffer 161 - ?(encoding = `Utf8) 162 - ?(scalar_style = `Any) 163 - ?(layout_style = `Any) 164 - ?buffer 165 - (yaml : yaml) = 166 - let config = make_config ~encoding ~scalar_style ~layout_style in 167 - Serialize.yaml_to_buffer ~config ?buffer (yaml :> Yaml.t) 168 (** Serialize a full YAML value to a buffer. 169 170 @param encoding Output encoding (default: UTF-8) 171 @param scalar_style Preferred scalar style (default: Any) 172 @param layout_style Preferred layout style (default: Any) 173 - @param buffer Optional buffer to append to (allocates new one if not provided) 174 @return The buffer containing the serialized YAML *) 175 176 - let yaml_to_string 177 - ?(encoding = `Utf8) 178 - ?(scalar_style = `Any) 179 - ?(layout_style = `Any) 180 - (yaml : yaml) = 181 - Buffer.contents (yaml_to_buffer ~encoding ~scalar_style ~layout_style yaml) 182 (** Serialize a full YAML value to a string. 183 184 @param encoding Output encoding (default: UTF-8) 185 @param scalar_style Preferred scalar style (default: Any) 186 @param layout_style Preferred layout style (default: Any) *) 187 188 - let documents_to_buffer 189 - ?(encoding = `Utf8) 190 - ?(scalar_style = `Any) 191 - ?(layout_style = `Any) 192 - ?(resolve_aliases = true) 193 - ?buffer 194 - (documents : document list) = 195 - let config = make_config ~encoding ~scalar_style ~layout_style in 196 - let docs' = List.map (fun (d : document) : Document.t -> { 197 - Document.version = d.version; 198 - Document.tags = d.tags; 199 - Document.root = (d.root :> Yaml.t option); 200 - Document.implicit_start = d.implicit_start; 201 - Document.implicit_end = d.implicit_end; 202 - }) documents in 203 - Serialize.documents_to_buffer ~config ~resolve_aliases ?buffer docs' 204 (** Serialize multiple documents to a buffer. 205 206 @param encoding Output encoding (default: UTF-8) 207 @param scalar_style Preferred scalar style (default: Any) 208 @param layout_style Preferred layout style (default: Any) 209 @param resolve_aliases Whether to expand aliases (default: true) 210 - @param buffer Optional buffer to append to (allocates new one if not provided) 211 @return The buffer containing the serialized YAML *) 212 - 213 - let documents_to_string 214 - ?(encoding = `Utf8) 215 - ?(scalar_style = `Any) 216 - ?(layout_style = `Any) 217 - ?(resolve_aliases = true) 218 (documents : document list) = 219 - Buffer.contents (documents_to_buffer ~encoding ~scalar_style ~layout_style ~resolve_aliases documents) 220 (** Serialize multiple documents to a YAML stream. 221 222 @param encoding Output encoding (default: UTF-8) 223 @param scalar_style Preferred scalar style (default: Any) 224 @param layout_style Preferred layout style (default: Any) 225 @param resolve_aliases Whether to expand aliases (default: true) *) 226 227 (** {2 Buffer Parsing} *) 228 229 - let of_buffer 230 - ?(resolve_aliases = true) 231 - ?(max_nodes = default_max_alias_nodes) 232 - ?(max_depth = default_max_alias_depth) 233 - buffer : value = 234 - of_string ~resolve_aliases ~max_nodes ~max_depth (Buffer.contents buffer) 235 (** Parse YAML from a buffer into a JSON-compatible value. 236 237 @param resolve_aliases Whether to expand aliases (default: true) 238 @param max_nodes Maximum nodes during alias expansion (default: 10M) 239 @param max_depth Maximum alias nesting depth (default: 100) 240 @raise Yamlrw_error on parse error or if multiple documents found *) 241 242 - let yaml_of_buffer 243 - ?(resolve_aliases = false) 244 - ?(max_nodes = default_max_alias_nodes) 245 - ?(max_depth = default_max_alias_depth) 246 - buffer : yaml = 247 - yaml_of_string ~resolve_aliases ~max_nodes ~max_depth (Buffer.contents buffer) 248 (** Parse YAML from a buffer preserving full YAML metadata. 249 250 @param resolve_aliases Whether to expand aliases (default: false) 251 @param max_nodes Maximum nodes during alias expansion (default: 10M) 252 @param max_depth Maximum alias nesting depth (default: 100) 253 @raise Yamlrw_error on parse error or if multiple documents found *) 254 255 - let documents_of_buffer buffer : document list = 256 - documents_of_string (Buffer.contents buffer) 257 (** Parse a multi-document YAML stream from a buffer. 258 259 @raise Yamlrw_error on parse error *) 260 - 261 262 (** {2 Conversion} *) 263 264 - let to_json 265 - ?(resolve_aliases = true) 266 - ?(max_nodes = default_max_alias_nodes) 267 - ?(max_depth = default_max_alias_depth) 268 - (yaml : yaml) : value = 269 - (Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth (yaml :> Yaml.t) :> value) 270 (** Convert full YAML to JSON-compatible value. 271 272 @param resolve_aliases Whether to expand aliases (default: true) 273 @param max_nodes Maximum nodes during alias expansion (default: 10M) 274 @param max_depth Maximum alias nesting depth (default: 100) 275 @raise Yamlrw_error if alias limits exceeded or complex keys found *) 276 277 - let of_json (value : value) : yaml = 278 - (Yaml.of_value (value :> Value.t) :> yaml) 279 (** Convert JSON-compatible value to full YAML representation. *) 280 - 281 282 (** {2 Pretty Printing & Equality} *) 283 284 let pp = Value.pp 285 - (** Pretty-print a value. *) 286 287 - let equal = Value.equal 288 (** Test equality of two values. *) 289 - 290 291 (** {2 Util - Value Combinators} *) 292 293 module Util = struct 294 (** Combinators for working with {!type:value} values. 295 296 - This module provides constructors, accessors, and transformations 297 - for JSON-compatible YAML values. *) 298 299 type t = Value.t 300 ··· 349 let get_string v = match v with `String s -> s | _ -> type_error "string" v 350 let get_list v = match v with `A l -> l | _ -> type_error "list" v 351 let get_obj v = match v with `O o -> o | _ -> type_error "object" v 352 - 353 - let get_int v = 354 - match as_int v with 355 - | Some i -> i 356 - | None -> type_error "int" v 357 358 (** {3 Object Operations} *) 359 ··· 361 | `O pairs -> List.exists (fun (k, _) -> k = key) pairs 362 | _ -> false 363 364 - let find key = function 365 - | `O pairs -> List.assoc_opt key pairs 366 - | _ -> None 367 368 - let get key v = 369 - match find key v with 370 - | Some v -> v 371 - | None -> raise Not_found 372 373 - let keys v = match v with 374 - | `O pairs -> List.map fst pairs 375 - | _ -> type_error "object" v 376 - 377 - let values v = match v with 378 - | `O pairs -> List.map snd pairs 379 - | _ -> type_error "object" v 380 381 let update key value = function 382 | `O pairs -> 383 let rec go = function 384 - | [] -> [(key, value)] 385 | (k, _) :: rest when k = key -> (key, value) :: rest 386 | kv :: rest -> kv :: go rest 387 in ··· 393 | v -> type_error "object" v 394 395 let combine v1 v2 = 396 - match v1, v2 with 397 | `O o1, `O o2 -> `O (o1 @ o2) 398 | `O _, _ -> type_error "object" v2 399 | _, _ -> type_error "object" v1 400 401 (** {3 List Operations} *) 402 403 - let map f = function 404 - | `A l -> `A (List.map f l) 405 - | v -> type_error "list" v 406 - 407 - let mapi f = function 408 - | `A l -> `A (List.mapi f l) 409 - | v -> type_error "list" v 410 411 let filter pred = function 412 | `A l -> `A (List.filter pred l) ··· 416 | `A l -> List.fold_left f init l 417 | v -> type_error "list" v 418 419 - let nth n = function 420 - | `A l -> List.nth_opt l n 421 - | _ -> None 422 - 423 - let length = function 424 - | `A l -> List.length l 425 - | `O o -> List.length o 426 - | _ -> 0 427 428 let flatten = function 429 - | `A l -> 430 - `A (List.concat_map (function `A inner -> inner | v -> [v]) l) 431 | v -> type_error "list" v 432 433 (** {3 Path Operations} *) ··· 435 let rec get_path path v = 436 match path with 437 | [] -> Some v 438 - | key :: rest -> 439 - match find key v with 440 - | Some child -> get_path rest child 441 - | None -> None 442 443 let get_path_exn path v = 444 - match get_path path v with 445 - | Some v -> v 446 - | None -> raise Not_found 447 448 (** {3 Iteration} *) 449 ··· 451 | `O pairs -> List.iter (fun (k, v) -> f k v) pairs 452 | v -> type_error "object" v 453 454 - let iter_list f = function 455 - | `A l -> List.iter f l 456 - | v -> type_error "list" v 457 458 let fold_obj f init = function 459 | `O pairs -> List.fold_left (fun acc (k, v) -> f acc k v) init pairs ··· 472 (** {3 Conversion Helpers} *) 473 474 let to_bool ?default v = 475 - match as_bool v, default with 476 | Some b, _ -> b 477 | None, Some d -> d 478 | None, None -> type_error "bool" v 479 480 let to_int ?default v = 481 - match as_int v, default with 482 | Some i, _ -> i 483 | None, Some d -> d 484 | None, None -> type_error "int" v 485 486 let to_float ?default v = 487 - match as_float v, default with 488 | Some f, _ -> f 489 | None, Some d -> d 490 | None, None -> type_error "float" v 491 492 let to_string ?default v = 493 - match as_string v, default with 494 | Some s, _ -> s 495 | None, Some d -> d 496 | None, None -> type_error "string" v 497 498 let to_list ?default v = 499 - match as_list v, default with 500 | Some l, _ -> l 501 | None, Some d -> d 502 | None, None -> type_error "list" v 503 end 504 - 505 506 (** {2 Stream - Low-Level Event API} *) 507 ··· 521 type position = Position.t 522 (** A position in the source (line, column, byte offset). *) 523 524 - (** Result of parsing an event. *) 525 type event_result = { 526 event : event; 527 start_pos : position; 528 end_pos : position; 529 } 530 531 (** {3 Parsing} *) 532 533 type parser = Parser.t 534 (** A streaming YAML parser. *) 535 536 - let parser s = Parser.of_string s 537 (** Create a parser from a string. *) 538 539 let next p = 540 match Parser.next p with 541 | Some { event; span } -> 542 - Some { 543 - event; 544 - start_pos = span.start; 545 - end_pos = span.stop; 546 - } 547 | None -> None 548 - (** Get the next event from the parser. 549 - Returns [None] when parsing is complete. *) 550 551 let iter f p = 552 let rec go () = 553 match next p with ··· 557 | None -> () 558 in 559 go () 560 - (** Iterate over all events from the parser. *) 561 562 let fold f init p = 563 let rec go acc = 564 match Parser.next p with ··· 566 | None -> acc 567 in 568 go init 569 - (** Fold over all events from the parser. *) 570 571 (** {3 Emitting} *) 572 573 type emitter = Emitter.t 574 (** A streaming YAML emitter. *) 575 576 - let emitter ?len:_ () = Emitter.create () 577 (** Create a new emitter. *) 578 579 - let contents e = Emitter.contents e 580 (** Get the emitted YAML string. *) 581 582 - let emit e ev = Emitter.emit e ev 583 (** Emit an event. *) 584 585 (** {3 Event Emission Helpers} *) 586 587 let stream_start e enc = 588 Emitter.emit e (Event.Stream_start { encoding = enc }) 589 590 - let stream_end e = 591 - Emitter.emit e Event.Stream_end 592 593 let document_start e ?version ?(implicit = true) () = 594 - let version = match version with 595 | Some `V1_1 -> Some (1, 1) 596 | Some `V1_2 -> Some (1, 2) 597 | None -> None ··· 602 Emitter.emit e (Event.Document_end { implicit }) 603 604 let scalar e ?anchor ?tag ?(style = `Any) value = 605 - Emitter.emit e (Event.Scalar { 606 - anchor; 607 - tag; 608 - value; 609 - plain_implicit = true; 610 - quoted_implicit = true; 611 - style; 612 - }) 613 614 - let alias e name = 615 - Emitter.emit e (Event.Alias { anchor = name }) 616 617 let sequence_start e ?anchor ?tag ?(style = `Any) () = 618 - Emitter.emit e (Event.Sequence_start { 619 - anchor; 620 - tag; 621 - implicit = true; 622 - style; 623 - }) 624 625 - let sequence_end e = 626 - Emitter.emit e Event.Sequence_end 627 628 let mapping_start e ?anchor ?tag ?(style = `Any) () = 629 - Emitter.emit e (Event.Mapping_start { 630 - anchor; 631 - tag; 632 - implicit = true; 633 - style; 634 - }) 635 636 - let mapping_end e = 637 - Emitter.emit e Event.Mapping_end 638 end 639 - 640 641 (** {2 Internal Modules} *) 642 643 - (** These modules are exposed for advanced use cases requiring 644 - fine-grained control over parsing, emission, or data structures. 645 646 For typical usage, prefer the top-level functions and {!Util}. *) 647 648 - (** Source position tracking. *) 649 module Position = Position 650 651 (** Source span (range of positions). *) 652 - module Span = Span 653 654 (** Block scalar chomping modes. *) 655 - module Chomping = Chomping 656 657 - (** YAML type tags. *) 658 module Tag = Tag 659 660 - (** JSON-compatible value type and operations. *) 661 module Value = Value 662 663 - (** YAML scalar with metadata. *) 664 module Scalar = Scalar 665 666 (** YAML sequence with metadata. *) 667 - module Sequence = Sequence 668 669 (** YAML mapping with metadata. *) 670 - module Mapping = Mapping 671 672 - (** Full YAML value type. *) 673 module Yaml = Yaml 674 675 - (** YAML document with directives. *) 676 module Document = Document 677 678 - (** Lexical tokens. *) 679 module Token = Token 680 681 (** Lexical scanner. *) 682 - module Scanner = Scanner 683 684 - (** Parser events. *) 685 module Event = Event 686 687 (** Event-based parser. *) 688 - module Parser = Parser 689 690 (** Document loader. *) 691 - module Loader = Loader 692 693 - (** Event-based emitter. *) 694 module Emitter = Emitter 695 696 - (** Input stream utilities. *) 697 module Input = Input 698 699 (** Buffer serialization utilities. *) 700 - module Serialize = Serialize
··· 11 12 exception Yamlrw_error = Error.Yamlrw_error 13 14 (** {2 Core Types} *) 15 16 + type value = 17 + [ `Null (** YAML null, ~, or empty values *) 18 + | `Bool of bool (** YAML booleans (true, false, yes, no, on, off) *) 19 + | `Float of float (** All YAML numbers (integers stored as floats) *) 20 + | `String of string (** YAML strings *) 21 + | `A of value list (** YAML sequences/arrays *) 22 + | `O of (string * value) list (** YAML mappings/objects with string keys *) 23 + ] 24 (** JSON-compatible YAML representation. Use this for simple data interchange. 25 26 This type is structurally equivalent to {!Value.t} and compatible with the 27 + ezjsonm representation. For additional operations, see {!Value} and {!Util}. 28 + *) 29 30 + type yaml = 31 + [ `Scalar of Scalar.t (** YAML scalar value with style and metadata *) 32 + | `Alias of string (** Alias reference to an anchored node *) 33 + | `A of yaml Sequence.t (** YAML sequence with style and metadata *) 34 + | `O of (yaml, yaml) Mapping.t (** YAML mapping with style and metadata *) 35 + ] 36 (** Full YAML representation preserving anchors, tags, and aliases. 37 38 This type is structurally equivalent to {!Yaml.t}. Use this when you need ··· 40 type tags for custom types, scalar styles (plain, quoted, literal, folded), 41 and collection styles (block vs flow). 42 43 + For additional operations, see {!Yaml}, {!Scalar}, {!Sequence}, and 44 + {!Mapping}. *) 45 46 + type document = { 47 + version : (int * int) option; 48 + (** Optional YAML version directive (e.g., (1, 2) for YAML 1.2) *) 49 + tags : (string * string) list; 50 + (** TAG directives mapping handles to prefixes *) 51 + root : yaml option; (** Root content of the document *) 52 + implicit_start : bool; 53 + (** Whether the document start marker (---) is implicit *) 54 + implicit_end : bool; (** Whether the document end marker (...) is implicit *) 55 + } 56 (** A YAML document with directives and metadata. 57 58 This type is structurally equivalent to {!Document.t}. A YAML stream can 59 contain multiple documents, each separated by document markers. 60 61 For additional operations, see {!Document}. *) 62 63 (** {2 Character Encoding} *) 64 65 module Encoding = Encoding 66 67 (** {2 Parsing} *) 68 ··· 74 (** Default maximum alias nesting depth (100). *) 75 let default_max_alias_depth = Yaml.default_max_alias_depth 76 77 (** Parse a YAML string into a JSON-compatible value. 78 79 @param resolve_aliases Whether to expand aliases (default: true) 80 @param max_nodes Maximum nodes during alias expansion (default: 10M) 81 @param max_depth Maximum alias nesting depth (default: 100) 82 @raise Yamlrw_error on parse error or if multiple documents found *) 83 + let of_string ?(resolve_aliases = true) ?(max_nodes = default_max_alias_nodes) 84 + ?(max_depth = default_max_alias_depth) s : value = 85 + (Loader.value_of_string ~resolve_aliases ~max_nodes ~max_depth s :> value) 86 87 (** Parse a YAML string preserving full YAML metadata (anchors, tags, etc). 88 89 By default, aliases are NOT resolved, preserving the document structure. ··· 92 @param max_nodes Maximum nodes during alias expansion (default: 10M) 93 @param max_depth Maximum alias nesting depth (default: 100) 94 @raise Yamlrw_error on parse error or if multiple documents found *) 95 + let yaml_of_string ?(resolve_aliases = false) 96 + ?(max_nodes = default_max_alias_nodes) 97 + ?(max_depth = default_max_alias_depth) s : yaml = 98 + (Loader.yaml_of_string ~resolve_aliases ~max_nodes ~max_depth s :> yaml) 99 100 (** Parse a multi-document YAML stream. 101 102 + Use this when your YAML input contains multiple documents separated by 103 + document markers (---). 104 105 @raise Yamlrw_error on parse error *) 106 + let documents_of_string s : document list = 107 + let docs = Loader.documents_of_string s in 108 + List.map 109 + (fun (d : Document.t) : document -> 110 + { 111 + version = d.version; 112 + tags = d.tags; 113 + root = (d.root :> yaml option); 114 + implicit_start = d.implicit_start; 115 + implicit_end = d.implicit_end; 116 + }) 117 + docs 118 119 (** {2 Formatting Styles} *) 120 121 module Scalar_style = Scalar_style 122 module Layout_style = Layout_style 123 124 (** {2 Serialization} *) 125 126 let make_config ~encoding ~scalar_style ~layout_style = 127 { Emitter.default_config with encoding; scalar_style; layout_style } 128 129 (** Serialize a value to a buffer. 130 131 @param encoding Output encoding (default: UTF-8) 132 @param scalar_style Preferred scalar style (default: Any) 133 @param layout_style Preferred layout style (default: Any) 134 + @param buffer 135 + Optional buffer to append to (allocates new one if not provided) 136 @return The buffer containing the serialized YAML *) 137 + let to_buffer ?(encoding = `Utf8) ?(scalar_style = `Any) ?(layout_style = `Any) 138 + ?buffer (value : value) = 139 + let config = make_config ~encoding ~scalar_style ~layout_style in 140 + Serialize.value_to_buffer ~config ?buffer (value :> Value.t) 141 142 (** Serialize a value to a YAML string. 143 144 @param encoding Output encoding (default: UTF-8) 145 @param scalar_style Preferred scalar style (default: Any) 146 @param layout_style Preferred layout style (default: Any) *) 147 + let to_string ?(encoding = `Utf8) ?(scalar_style = `Any) ?(layout_style = `Any) 148 + (value : value) = 149 + Buffer.contents (to_buffer ~encoding ~scalar_style ~layout_style value) 150 151 (** Serialize a full YAML value to a buffer. 152 153 @param encoding Output encoding (default: UTF-8) 154 @param scalar_style Preferred scalar style (default: Any) 155 @param layout_style Preferred layout style (default: Any) 156 + @param buffer 157 + Optional buffer to append to (allocates new one if not provided) 158 @return The buffer containing the serialized YAML *) 159 + let yaml_to_buffer ?(encoding = `Utf8) ?(scalar_style = `Any) 160 + ?(layout_style = `Any) ?buffer (yaml : yaml) = 161 + let config = make_config ~encoding ~scalar_style ~layout_style in 162 + Serialize.yaml_to_buffer ~config ?buffer (yaml :> Yaml.t) 163 164 (** Serialize a full YAML value to a string. 165 166 @param encoding Output encoding (default: UTF-8) 167 @param scalar_style Preferred scalar style (default: Any) 168 @param layout_style Preferred layout style (default: Any) *) 169 + let yaml_to_string ?(encoding = `Utf8) ?(scalar_style = `Any) 170 + ?(layout_style = `Any) (yaml : yaml) = 171 + Buffer.contents (yaml_to_buffer ~encoding ~scalar_style ~layout_style yaml) 172 173 (** Serialize multiple documents to a buffer. 174 175 @param encoding Output encoding (default: UTF-8) 176 @param scalar_style Preferred scalar style (default: Any) 177 @param layout_style Preferred layout style (default: Any) 178 @param resolve_aliases Whether to expand aliases (default: true) 179 + @param buffer 180 + Optional buffer to append to (allocates new one if not provided) 181 @return The buffer containing the serialized YAML *) 182 + let documents_to_buffer ?(encoding = `Utf8) ?(scalar_style = `Any) 183 + ?(layout_style = `Any) ?(resolve_aliases = true) ?buffer 184 (documents : document list) = 185 + let config = make_config ~encoding ~scalar_style ~layout_style in 186 + let docs' = 187 + List.map 188 + (fun (d : document) : Document.t -> 189 + { 190 + Document.version = d.version; 191 + Document.tags = d.tags; 192 + Document.root = (d.root :> Yaml.t option); 193 + Document.implicit_start = d.implicit_start; 194 + Document.implicit_end = d.implicit_end; 195 + }) 196 + documents 197 + in 198 + Serialize.documents_to_buffer ~config ~resolve_aliases ?buffer docs' 199 + 200 (** Serialize multiple documents to a YAML stream. 201 202 @param encoding Output encoding (default: UTF-8) 203 @param scalar_style Preferred scalar style (default: Any) 204 @param layout_style Preferred layout style (default: Any) 205 @param resolve_aliases Whether to expand aliases (default: true) *) 206 + let documents_to_string ?(encoding = `Utf8) ?(scalar_style = `Any) 207 + ?(layout_style = `Any) ?(resolve_aliases = true) (documents : document list) 208 + = 209 + Buffer.contents 210 + (documents_to_buffer ~encoding ~scalar_style ~layout_style ~resolve_aliases 211 + documents) 212 213 (** {2 Buffer Parsing} *) 214 215 (** Parse YAML from a buffer into a JSON-compatible value. 216 217 @param resolve_aliases Whether to expand aliases (default: true) 218 @param max_nodes Maximum nodes during alias expansion (default: 10M) 219 @param max_depth Maximum alias nesting depth (default: 100) 220 @raise Yamlrw_error on parse error or if multiple documents found *) 221 + let of_buffer ?(resolve_aliases = true) ?(max_nodes = default_max_alias_nodes) 222 + ?(max_depth = default_max_alias_depth) buffer : value = 223 + of_string ~resolve_aliases ~max_nodes ~max_depth (Buffer.contents buffer) 224 225 (** Parse YAML from a buffer preserving full YAML metadata. 226 227 @param resolve_aliases Whether to expand aliases (default: false) 228 @param max_nodes Maximum nodes during alias expansion (default: 10M) 229 @param max_depth Maximum alias nesting depth (default: 100) 230 @raise Yamlrw_error on parse error or if multiple documents found *) 231 + let yaml_of_buffer ?(resolve_aliases = false) 232 + ?(max_nodes = default_max_alias_nodes) 233 + ?(max_depth = default_max_alias_depth) buffer : yaml = 234 + yaml_of_string ~resolve_aliases ~max_nodes ~max_depth (Buffer.contents buffer) 235 236 (** Parse a multi-document YAML stream from a buffer. 237 238 @raise Yamlrw_error on parse error *) 239 + let documents_of_buffer buffer : document list = 240 + documents_of_string (Buffer.contents buffer) 241 242 (** {2 Conversion} *) 243 244 (** Convert full YAML to JSON-compatible value. 245 246 @param resolve_aliases Whether to expand aliases (default: true) 247 @param max_nodes Maximum nodes during alias expansion (default: 10M) 248 @param max_depth Maximum alias nesting depth (default: 100) 249 @raise Yamlrw_error if alias limits exceeded or complex keys found *) 250 + let to_json ?(resolve_aliases = true) ?(max_nodes = default_max_alias_nodes) 251 + ?(max_depth = default_max_alias_depth) (yaml : yaml) : value = 252 + (Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth 253 + (yaml :> Yaml.t) 254 + :> value) 255 256 (** Convert JSON-compatible value to full YAML representation. *) 257 + let of_json (value : value) : yaml = (Yaml.of_value (value :> Value.t) :> yaml) 258 259 (** {2 Pretty Printing & Equality} *) 260 261 + (** Pretty-print a value. *) 262 let pp = Value.pp 263 264 (** Test equality of two values. *) 265 + let equal = Value.equal 266 267 (** {2 Util - Value Combinators} *) 268 269 module Util = struct 270 (** Combinators for working with {!type:value} values. 271 272 + This module provides constructors, accessors, and transformations for 273 + JSON-compatible YAML values. *) 274 275 type t = Value.t 276 ··· 325 let get_string v = match v with `String s -> s | _ -> type_error "string" v 326 let get_list v = match v with `A l -> l | _ -> type_error "list" v 327 let get_obj v = match v with `O o -> o | _ -> type_error "object" v 328 + let get_int v = match as_int v with Some i -> i | None -> type_error "int" v 329 330 (** {3 Object Operations} *) 331 ··· 333 | `O pairs -> List.exists (fun (k, _) -> k = key) pairs 334 | _ -> false 335 336 + let find key = function `O pairs -> List.assoc_opt key pairs | _ -> None 337 + let get key v = match find key v with Some v -> v | None -> raise Not_found 338 339 + let keys v = 340 + match v with `O pairs -> List.map fst pairs | _ -> type_error "object" v 341 342 + let values v = 343 + match v with `O pairs -> List.map snd pairs | _ -> type_error "object" v 344 345 let update key value = function 346 | `O pairs -> 347 let rec go = function 348 + | [] -> [ (key, value) ] 349 | (k, _) :: rest when k = key -> (key, value) :: rest 350 | kv :: rest -> kv :: go rest 351 in ··· 357 | v -> type_error "object" v 358 359 let combine v1 v2 = 360 + match (v1, v2) with 361 | `O o1, `O o2 -> `O (o1 @ o2) 362 | `O _, _ -> type_error "object" v2 363 | _, _ -> type_error "object" v1 364 365 (** {3 List Operations} *) 366 367 + let map f = function `A l -> `A (List.map f l) | v -> type_error "list" v 368 + let mapi f = function `A l -> `A (List.mapi f l) | v -> type_error "list" v 369 370 let filter pred = function 371 | `A l -> `A (List.filter pred l) ··· 375 | `A l -> List.fold_left f init l 376 | v -> type_error "list" v 377 378 + let nth n = function `A l -> List.nth_opt l n | _ -> None 379 + let length = function `A l -> List.length l | `O o -> List.length o | _ -> 0 380 381 let flatten = function 382 + | `A l -> `A (List.concat_map (function `A inner -> inner | v -> [ v ]) l) 383 | v -> type_error "list" v 384 385 (** {3 Path Operations} *) ··· 387 let rec get_path path v = 388 match path with 389 | [] -> Some v 390 + | key :: rest -> ( 391 + match find key v with Some child -> get_path rest child | None -> None) 392 393 let get_path_exn path v = 394 + match get_path path v with Some v -> v | None -> raise Not_found 395 396 (** {3 Iteration} *) 397 ··· 399 | `O pairs -> List.iter (fun (k, v) -> f k v) pairs 400 | v -> type_error "object" v 401 402 + let iter_list f = function `A l -> List.iter f l | v -> type_error "list" v 403 404 let fold_obj f init = function 405 | `O pairs -> List.fold_left (fun acc (k, v) -> f acc k v) init pairs ··· 418 (** {3 Conversion Helpers} *) 419 420 let to_bool ?default v = 421 + match (as_bool v, default) with 422 | Some b, _ -> b 423 | None, Some d -> d 424 | None, None -> type_error "bool" v 425 426 let to_int ?default v = 427 + match (as_int v, default) with 428 | Some i, _ -> i 429 | None, Some d -> d 430 | None, None -> type_error "int" v 431 432 let to_float ?default v = 433 + match (as_float v, default) with 434 | Some f, _ -> f 435 | None, Some d -> d 436 | None, None -> type_error "float" v 437 438 let to_string ?default v = 439 + match (as_string v, default) with 440 | Some s, _ -> s 441 | None, Some d -> d 442 | None, None -> type_error "string" v 443 444 let to_list ?default v = 445 + match (as_list v, default) with 446 | Some l, _ -> l 447 | None, Some d -> d 448 | None, None -> type_error "list" v 449 end 450 451 (** {2 Stream - Low-Level Event API} *) 452 ··· 466 type position = Position.t 467 (** A position in the source (line, column, byte offset). *) 468 469 type event_result = { 470 event : event; 471 start_pos : position; 472 end_pos : position; 473 } 474 + (** Result of parsing an event. *) 475 476 (** {3 Parsing} *) 477 478 type parser = Parser.t 479 (** A streaming YAML parser. *) 480 481 (** Create a parser from a string. *) 482 + let parser s = Parser.of_string s 483 484 + (** Get the next event from the parser. Returns [None] when parsing is 485 + complete. *) 486 let next p = 487 match Parser.next p with 488 | Some { event; span } -> 489 + Some { event; start_pos = span.start; end_pos = span.stop } 490 | None -> None 491 492 + (** Iterate over all events from the parser. *) 493 let iter f p = 494 let rec go () = 495 match next p with ··· 499 | None -> () 500 in 501 go () 502 503 + (** Fold over all events from the parser. *) 504 let fold f init p = 505 let rec go acc = 506 match Parser.next p with ··· 508 | None -> acc 509 in 510 go init 511 512 (** {3 Emitting} *) 513 514 type emitter = Emitter.t 515 (** A streaming YAML emitter. *) 516 517 (** Create a new emitter. *) 518 + let emitter ?len:_ () = Emitter.create () 519 520 (** Get the emitted YAML string. *) 521 + let contents e = Emitter.contents e 522 523 (** Emit an event. *) 524 + let emit e ev = Emitter.emit e ev 525 526 (** {3 Event Emission Helpers} *) 527 528 let stream_start e enc = 529 Emitter.emit e (Event.Stream_start { encoding = enc }) 530 531 + let stream_end e = Emitter.emit e Event.Stream_end 532 533 let document_start e ?version ?(implicit = true) () = 534 + let version = 535 + match version with 536 | Some `V1_1 -> Some (1, 1) 537 | Some `V1_2 -> Some (1, 2) 538 | None -> None ··· 543 Emitter.emit e (Event.Document_end { implicit }) 544 545 let scalar e ?anchor ?tag ?(style = `Any) value = 546 + Emitter.emit e 547 + (Event.Scalar 548 + { 549 + anchor; 550 + tag; 551 + value; 552 + plain_implicit = true; 553 + quoted_implicit = true; 554 + style; 555 + }) 556 557 + let alias e name = Emitter.emit e (Event.Alias { anchor = name }) 558 559 let sequence_start e ?anchor ?tag ?(style = `Any) () = 560 + Emitter.emit e 561 + (Event.Sequence_start { anchor; tag; implicit = true; style }) 562 563 + let sequence_end e = Emitter.emit e Event.Sequence_end 564 565 let mapping_start e ?anchor ?tag ?(style = `Any) () = 566 + Emitter.emit e (Event.Mapping_start { anchor; tag; implicit = true; style }) 567 568 + let mapping_end e = Emitter.emit e Event.Mapping_end 569 end 570 571 (** {2 Internal Modules} *) 572 573 + (** These modules are exposed for advanced use cases requiring fine-grained 574 + control over parsing, emission, or data structures. 575 576 For typical usage, prefer the top-level functions and {!Util}. *) 577 578 module Position = Position 579 + (** Source position tracking. *) 580 581 + module Span = Span 582 (** Source span (range of positions). *) 583 584 + module Chomping = Chomping 585 (** Block scalar chomping modes. *) 586 587 module Tag = Tag 588 + (** YAML type tags. *) 589 590 module Value = Value 591 + (** JSON-compatible value type and operations. *) 592 593 module Scalar = Scalar 594 + (** YAML scalar with metadata. *) 595 596 + module Sequence = Sequence 597 (** YAML sequence with metadata. *) 598 599 + module Mapping = Mapping 600 (** YAML mapping with metadata. *) 601 602 module Yaml = Yaml 603 + (** Full YAML value type. *) 604 605 module Document = Document 606 + (** YAML document with directives. *) 607 608 module Token = Token 609 + (** Lexical tokens. *) 610 611 + module Scanner = Scanner 612 (** Lexical scanner. *) 613 614 module Event = Event 615 + (** Parser events. *) 616 617 + module Parser = Parser 618 (** Event-based parser. *) 619 620 + module Loader = Loader 621 (** Document loader. *) 622 623 module Emitter = Emitter 624 + (** Event-based emitter. *) 625 626 module Input = Input 627 + (** Input stream utilities. *) 628 629 + module Serialize = Serialize 630 (** Buffer serialization utilities. *)
+96 -90
lib/yamlrw.mli
··· 32 let age = Yamlrw.Util.(get_int (get "age" value)) in 33 ]} *) 34 35 - 36 (** {2 Error Handling} *) 37 38 module Error = Error ··· 40 exception Yamlrw_error of Error.t 41 (** Raised on parse or emit errors. *) 42 43 - 44 (** {2 Core Types} *) 45 46 - type value = [ 47 - | `Null (** YAML null, ~, or empty values *) 48 - | `Bool of bool (** YAML booleans (true, false, yes, no, on, off) *) 49 - | `Float of float (** All YAML numbers (integers stored as floats) *) 50 - | `String of string (** YAML strings *) 51 - | `A of value list (** YAML sequences/arrays *) 52 - | `O of (string * value) list (** YAML mappings/objects with string keys *) 53 - ] 54 (** JSON-compatible YAML representation. Use this for simple data interchange. 55 56 This type is structurally equivalent to {!Value.t} and compatible with the 57 - ezjsonm representation. For additional operations, see {!Value} and {!Util}. *) 58 59 - type yaml = [ 60 - | `Scalar of Scalar.t (** YAML scalar value with style and metadata *) 61 - | `Alias of string (** Alias reference to an anchored node *) 62 - | `A of yaml Sequence.t (** YAML sequence with style and metadata *) 63 - | `O of (yaml, yaml) Mapping.t (** YAML mapping with style and metadata *) 64 - ] 65 (** Full YAML representation preserving anchors, tags, and aliases. 66 67 This type is structurally equivalent to {!Yaml.t}. Use this when you need ··· 69 type tags for custom types, scalar styles (plain, quoted, literal, folded), 70 and collection styles (block vs flow). 71 72 - For additional operations, see {!Yaml}, {!Scalar}, {!Sequence}, and {!Mapping}. *) 73 74 type document = { 75 - version : (int * int) option; (** Optional YAML version directive (e.g., (1, 2) for YAML 1.2) *) 76 - tags : (string * string) list; (** TAG directives mapping handles to prefixes *) 77 - root : yaml option; (** Root content of the document *) 78 - implicit_start : bool; (** Whether the document start marker (---) is implicit *) 79 - implicit_end : bool; (** Whether the document end marker (...) is implicit *) 80 } 81 (** A YAML document with directives and metadata. 82 ··· 84 contain multiple documents, each separated by document markers. 85 86 For additional operations, see {!Document}. *) 87 - 88 89 (** {2 Character Encoding} *) 90 91 module Encoding = Encoding 92 93 - 94 (** {2 Parsing} *) 95 96 type version = [ `V1_1 | `V1_2 ] ··· 103 (** Default maximum alias nesting depth (100). *) 104 105 val of_string : 106 - ?resolve_aliases:bool -> 107 - ?max_nodes:int -> 108 - ?max_depth:int -> 109 - string -> value 110 (** Parse a YAML string into a JSON-compatible value. 111 112 @param resolve_aliases Whether to expand aliases (default: true) ··· 115 @raise Yamlrw_error on parse error or if multiple documents found *) 116 117 val yaml_of_string : 118 - ?resolve_aliases:bool -> 119 - ?max_nodes:int -> 120 - ?max_depth:int -> 121 - string -> yaml 122 (** Parse a YAML string preserving full YAML metadata (anchors, tags, etc). 123 124 By default, aliases are NOT resolved, preserving the document structure. ··· 131 val documents_of_string : string -> document list 132 (** Parse a multi-document YAML stream. 133 134 - Use this when your YAML input contains multiple documents separated 135 - by document markers (---). 136 137 @raise Yamlrw_error on parse error *) 138 139 - 140 (** {2 Formatting Styles} *) 141 142 module Scalar_style = Scalar_style 143 - 144 module Layout_style = Layout_style 145 - 146 147 (** {2 Serialization} *) 148 ··· 151 ?scalar_style:Scalar_style.t -> 152 ?layout_style:Layout_style.t -> 153 ?buffer:Buffer.t -> 154 - value -> Buffer.t 155 (** Serialize a value to a buffer. 156 157 @param encoding Output encoding (default: UTF-8) 158 @param scalar_style Preferred scalar style (default: Any) 159 @param layout_style Preferred layout style (default: Any) 160 - @param buffer Optional buffer to append to (allocates new one if not provided) 161 @return The buffer containing the serialized YAML *) 162 163 val to_string : 164 ?encoding:Encoding.t -> 165 ?scalar_style:Scalar_style.t -> 166 ?layout_style:Layout_style.t -> 167 - value -> string 168 (** Serialize a value to a YAML string. 169 170 @param encoding Output encoding (default: UTF-8) ··· 176 ?scalar_style:Scalar_style.t -> 177 ?layout_style:Layout_style.t -> 178 ?buffer:Buffer.t -> 179 - yaml -> Buffer.t 180 (** Serialize a full YAML value to a buffer. 181 182 @param encoding Output encoding (default: UTF-8) 183 @param scalar_style Preferred scalar style (default: Any) 184 @param layout_style Preferred layout style (default: Any) 185 - @param buffer Optional buffer to append to (allocates new one if not provided) 186 @return The buffer containing the serialized YAML *) 187 188 val yaml_to_string : 189 ?encoding:Encoding.t -> 190 ?scalar_style:Scalar_style.t -> 191 ?layout_style:Layout_style.t -> 192 - yaml -> string 193 (** Serialize a full YAML value to a string. 194 195 @param encoding Output encoding (default: UTF-8) ··· 202 ?layout_style:Layout_style.t -> 203 ?resolve_aliases:bool -> 204 ?buffer:Buffer.t -> 205 - document list -> Buffer.t 206 (** Serialize multiple documents to a buffer. 207 208 @param encoding Output encoding (default: UTF-8) 209 @param scalar_style Preferred scalar style (default: Any) 210 @param layout_style Preferred layout style (default: Any) 211 @param resolve_aliases Whether to expand aliases (default: true) 212 - @param buffer Optional buffer to append to (allocates new one if not provided) 213 @return The buffer containing the serialized YAML *) 214 215 val documents_to_string : ··· 217 ?scalar_style:Scalar_style.t -> 218 ?layout_style:Layout_style.t -> 219 ?resolve_aliases:bool -> 220 - document list -> string 221 (** Serialize multiple documents to a YAML stream. 222 223 @param encoding Output encoding (default: UTF-8) ··· 228 (** {2 Buffer Parsing} *) 229 230 val of_buffer : 231 - ?resolve_aliases:bool -> 232 - ?max_nodes:int -> 233 - ?max_depth:int -> 234 - Buffer.t -> value 235 (** Parse YAML from a buffer into a JSON-compatible value. 236 237 @param resolve_aliases Whether to expand aliases (default: true) ··· 240 @raise Yamlrw_error on parse error or if multiple documents found *) 241 242 val yaml_of_buffer : 243 - ?resolve_aliases:bool -> 244 - ?max_nodes:int -> 245 - ?max_depth:int -> 246 - Buffer.t -> yaml 247 (** Parse YAML from a buffer preserving full YAML metadata. 248 249 @param resolve_aliases Whether to expand aliases (default: false) ··· 256 257 @raise Yamlrw_error on parse error *) 258 259 - 260 (** {2 Conversion} *) 261 262 val to_json : 263 - ?resolve_aliases:bool -> 264 - ?max_nodes:int -> 265 - ?max_depth:int -> 266 - yaml -> value 267 (** Convert full YAML to JSON-compatible value. 268 269 @param resolve_aliases Whether to expand aliases (default: true) ··· 274 val of_json : value -> yaml 275 (** Convert JSON-compatible value to full YAML representation. *) 276 277 - 278 (** {2 Pretty Printing & Equality} *) 279 280 val pp : Format.formatter -> value -> unit ··· 283 val equal : value -> value -> bool 284 (** Test equality of two values. *) 285 286 - 287 (** {2 Util - Value Combinators} 288 289 Combinators for working with {!type:value} values. 290 291 - This module provides constructors, accessors, and transformations 292 - for JSON-compatible YAML values. *) 293 294 module Util : sig 295 type t = Value.t ··· 400 (** {3 Object Operations} *) 401 402 val mem : string -> t -> bool 403 - (** [mem key obj] checks if [key] exists in object [obj]. 404 - Returns [false] if [obj] is not an object. *) 405 406 val find : string -> t -> t option 407 - (** [find key obj] looks up [key] in object [obj]. 408 - Returns [None] if key not found or if [obj] is not an object. *) 409 410 val get : string -> t -> t 411 - (** [get key obj] looks up [key] in object [obj]. 412 - Raises [Not_found] if key not found. *) 413 414 val keys : t -> string list 415 (** Get all keys from an object. ··· 420 @raise Type_error if not an object *) 421 422 val update : string -> t -> t -> t 423 - (** [update key value obj] sets [key] to [value] in [obj]. 424 - Adds the key if it doesn't exist. 425 @raise Type_error if [obj] is not an object *) 426 427 val remove : string -> t -> t ··· 429 @raise Type_error if [obj] is not an object *) 430 431 val combine : t -> t -> t 432 - (** [combine obj1 obj2] merges two objects, with [obj2] values taking precedence. 433 @raise Type_error if either argument is not an object *) 434 435 (** {3 List Operations} *) ··· 451 @raise Type_error if [lst] is not a list *) 452 453 val nth : int -> t -> t option 454 - (** [nth n lst] gets element at index [n]. 455 - Returns [None] if [lst] is not a list or index out of bounds. *) 456 457 val length : t -> int 458 (** Get the length of a list or object. Returns 0 for other types. *) 459 460 val flatten : t -> t 461 - (** Flatten a list of lists into a single list. 462 - Non-list elements are kept as-is. 463 @raise Type_error if not a list *) 464 465 (** {3 Path Operations} *) 466 467 val get_path : string list -> t -> t option 468 - (** [get_path ["a"; "b"; "c"] obj] looks up nested path [obj.a.b.c]. 469 - Returns [None] if any key is not found. *) 470 471 val get_path_exn : string list -> t -> t 472 (** Like {!get_path} but raises [Not_found] if path not found. *) ··· 521 @raise Type_error if type doesn't match and no default provided *) 522 end 523 524 - 525 (** {2 Stream - Low-Level Event API} 526 527 Low-level streaming API for event-based YAML processing. ··· 532 - Fine-grained control over YAML emission *) 533 534 module Stream : sig 535 - 536 (** {3 Event Types} *) 537 538 type event = Event.t ··· 557 (** Create a parser from a string. *) 558 559 val next : parser -> event_result option 560 - (** Get the next event from the parser. 561 - Returns [None] when parsing is complete. *) 562 563 val iter : (event -> position -> position -> unit) -> parser -> unit 564 (** [iter f parser] calls [f event start_pos end_pos] for each event. *) ··· 589 val stream_end : emitter -> unit 590 (** Emit a stream end event. *) 591 592 - val document_start : emitter -> ?version:version -> ?implicit:bool -> unit -> unit 593 (** Emit a document start event. 594 @param version YAML version directive 595 @param implicit Whether start marker is implicit (default: true) *) ··· 598 (** Emit a document end event. 599 @param implicit Whether end marker is implicit (default: true) *) 600 601 - val scalar : emitter -> ?anchor:string -> ?tag:string -> ?style:Scalar_style.t -> string -> unit 602 (** Emit a scalar value. 603 @param anchor Optional anchor name 604 @param tag Optional type tag ··· 607 val alias : emitter -> string -> unit 608 (** Emit an alias reference. *) 609 610 - val sequence_start : emitter -> ?anchor:string -> ?tag:string -> ?style:Layout_style.t -> unit -> unit 611 (** Emit a sequence start event. 612 @param anchor Optional anchor name 613 @param tag Optional type tag ··· 616 val sequence_end : emitter -> unit 617 (** Emit a sequence end event. *) 618 619 - val mapping_start : emitter -> ?anchor:string -> ?tag:string -> ?style:Layout_style.t -> unit -> unit 620 (** Emit a mapping start event. 621 @param anchor Optional anchor name 622 @param tag Optional type tag ··· 626 (** Emit a mapping end event. *) 627 end 628 629 - 630 (** {2 Internal Modules} 631 632 - These modules are exposed for advanced use cases requiring 633 - fine-grained control over parsing, emission, or data structures. 634 635 For typical usage, prefer the top-level functions and {!Util}. *) 636
··· 32 let age = Yamlrw.Util.(get_int (get "age" value)) in 33 ]} *) 34 35 (** {2 Error Handling} *) 36 37 module Error = Error ··· 39 exception Yamlrw_error of Error.t 40 (** Raised on parse or emit errors. *) 41 42 (** {2 Core Types} *) 43 44 + type value = 45 + [ `Null (** YAML null, ~, or empty values *) 46 + | `Bool of bool (** YAML booleans (true, false, yes, no, on, off) *) 47 + | `Float of float (** All YAML numbers (integers stored as floats) *) 48 + | `String of string (** YAML strings *) 49 + | `A of value list (** YAML sequences/arrays *) 50 + | `O of (string * value) list (** YAML mappings/objects with string keys *) 51 + ] 52 (** JSON-compatible YAML representation. Use this for simple data interchange. 53 54 This type is structurally equivalent to {!Value.t} and compatible with the 55 + ezjsonm representation. For additional operations, see {!Value} and {!Util}. 56 + *) 57 58 + type yaml = 59 + [ `Scalar of Scalar.t (** YAML scalar value with style and metadata *) 60 + | `Alias of string (** Alias reference to an anchored node *) 61 + | `A of yaml Sequence.t (** YAML sequence with style and metadata *) 62 + | `O of (yaml, yaml) Mapping.t (** YAML mapping with style and metadata *) 63 + ] 64 (** Full YAML representation preserving anchors, tags, and aliases. 65 66 This type is structurally equivalent to {!Yaml.t}. Use this when you need ··· 68 type tags for custom types, scalar styles (plain, quoted, literal, folded), 69 and collection styles (block vs flow). 70 71 + For additional operations, see {!Yaml}, {!Scalar}, {!Sequence}, and 72 + {!Mapping}. *) 73 74 type document = { 75 + version : (int * int) option; 76 + (** Optional YAML version directive (e.g., (1, 2) for YAML 1.2) *) 77 + tags : (string * string) list; 78 + (** TAG directives mapping handles to prefixes *) 79 + root : yaml option; (** Root content of the document *) 80 + implicit_start : bool; 81 + (** Whether the document start marker (---) is implicit *) 82 + implicit_end : bool; (** Whether the document end marker (...) is implicit *) 83 } 84 (** A YAML document with directives and metadata. 85 ··· 87 contain multiple documents, each separated by document markers. 88 89 For additional operations, see {!Document}. *) 90 91 (** {2 Character Encoding} *) 92 93 module Encoding = Encoding 94 95 (** {2 Parsing} *) 96 97 type version = [ `V1_1 | `V1_2 ] ··· 104 (** Default maximum alias nesting depth (100). *) 105 106 val of_string : 107 + ?resolve_aliases:bool -> ?max_nodes:int -> ?max_depth:int -> string -> value 108 (** Parse a YAML string into a JSON-compatible value. 109 110 @param resolve_aliases Whether to expand aliases (default: true) ··· 113 @raise Yamlrw_error on parse error or if multiple documents found *) 114 115 val yaml_of_string : 116 + ?resolve_aliases:bool -> ?max_nodes:int -> ?max_depth:int -> string -> yaml 117 (** Parse a YAML string preserving full YAML metadata (anchors, tags, etc). 118 119 By default, aliases are NOT resolved, preserving the document structure. ··· 126 val documents_of_string : string -> document list 127 (** Parse a multi-document YAML stream. 128 129 + Use this when your YAML input contains multiple documents separated by 130 + document markers (---). 131 132 @raise Yamlrw_error on parse error *) 133 134 (** {2 Formatting Styles} *) 135 136 module Scalar_style = Scalar_style 137 module Layout_style = Layout_style 138 139 (** {2 Serialization} *) 140 ··· 143 ?scalar_style:Scalar_style.t -> 144 ?layout_style:Layout_style.t -> 145 ?buffer:Buffer.t -> 146 + value -> 147 + Buffer.t 148 (** Serialize a value to a buffer. 149 150 @param encoding Output encoding (default: UTF-8) 151 @param scalar_style Preferred scalar style (default: Any) 152 @param layout_style Preferred layout style (default: Any) 153 + @param buffer 154 + Optional buffer to append to (allocates new one if not provided) 155 @return The buffer containing the serialized YAML *) 156 157 val to_string : 158 ?encoding:Encoding.t -> 159 ?scalar_style:Scalar_style.t -> 160 ?layout_style:Layout_style.t -> 161 + value -> 162 + string 163 (** Serialize a value to a YAML string. 164 165 @param encoding Output encoding (default: UTF-8) ··· 171 ?scalar_style:Scalar_style.t -> 172 ?layout_style:Layout_style.t -> 173 ?buffer:Buffer.t -> 174 + yaml -> 175 + Buffer.t 176 (** Serialize a full YAML value to a buffer. 177 178 @param encoding Output encoding (default: UTF-8) 179 @param scalar_style Preferred scalar style (default: Any) 180 @param layout_style Preferred layout style (default: Any) 181 + @param buffer 182 + Optional buffer to append to (allocates new one if not provided) 183 @return The buffer containing the serialized YAML *) 184 185 val yaml_to_string : 186 ?encoding:Encoding.t -> 187 ?scalar_style:Scalar_style.t -> 188 ?layout_style:Layout_style.t -> 189 + yaml -> 190 + string 191 (** Serialize a full YAML value to a string. 192 193 @param encoding Output encoding (default: UTF-8) ··· 200 ?layout_style:Layout_style.t -> 201 ?resolve_aliases:bool -> 202 ?buffer:Buffer.t -> 203 + document list -> 204 + Buffer.t 205 (** Serialize multiple documents to a buffer. 206 207 @param encoding Output encoding (default: UTF-8) 208 @param scalar_style Preferred scalar style (default: Any) 209 @param layout_style Preferred layout style (default: Any) 210 @param resolve_aliases Whether to expand aliases (default: true) 211 + @param buffer 212 + Optional buffer to append to (allocates new one if not provided) 213 @return The buffer containing the serialized YAML *) 214 215 val documents_to_string : ··· 217 ?scalar_style:Scalar_style.t -> 218 ?layout_style:Layout_style.t -> 219 ?resolve_aliases:bool -> 220 + document list -> 221 + string 222 (** Serialize multiple documents to a YAML stream. 223 224 @param encoding Output encoding (default: UTF-8) ··· 229 (** {2 Buffer Parsing} *) 230 231 val of_buffer : 232 + ?resolve_aliases:bool -> ?max_nodes:int -> ?max_depth:int -> Buffer.t -> value 233 (** Parse YAML from a buffer into a JSON-compatible value. 234 235 @param resolve_aliases Whether to expand aliases (default: true) ··· 238 @raise Yamlrw_error on parse error or if multiple documents found *) 239 240 val yaml_of_buffer : 241 + ?resolve_aliases:bool -> ?max_nodes:int -> ?max_depth:int -> Buffer.t -> yaml 242 (** Parse YAML from a buffer preserving full YAML metadata. 243 244 @param resolve_aliases Whether to expand aliases (default: false) ··· 251 252 @raise Yamlrw_error on parse error *) 253 254 (** {2 Conversion} *) 255 256 val to_json : 257 + ?resolve_aliases:bool -> ?max_nodes:int -> ?max_depth:int -> yaml -> value 258 (** Convert full YAML to JSON-compatible value. 259 260 @param resolve_aliases Whether to expand aliases (default: true) ··· 265 val of_json : value -> yaml 266 (** Convert JSON-compatible value to full YAML representation. *) 267 268 (** {2 Pretty Printing & Equality} *) 269 270 val pp : Format.formatter -> value -> unit ··· 273 val equal : value -> value -> bool 274 (** Test equality of two values. *) 275 276 (** {2 Util - Value Combinators} 277 278 Combinators for working with {!type:value} values. 279 280 + This module provides constructors, accessors, and transformations for 281 + JSON-compatible YAML values. *) 282 283 module Util : sig 284 type t = Value.t ··· 389 (** {3 Object Operations} *) 390 391 val mem : string -> t -> bool 392 + (** [mem key obj] checks if [key] exists in object [obj]. Returns [false] if 393 + [obj] is not an object. *) 394 395 val find : string -> t -> t option 396 + (** [find key obj] looks up [key] in object [obj]. Returns [None] if key not 397 + found or if [obj] is not an object. *) 398 399 val get : string -> t -> t 400 + (** [get key obj] looks up [key] in object [obj]. Raises [Not_found] if key 401 + not found. *) 402 403 val keys : t -> string list 404 (** Get all keys from an object. ··· 409 @raise Type_error if not an object *) 410 411 val update : string -> t -> t -> t 412 + (** [update key value obj] sets [key] to [value] in [obj]. Adds the key if it 413 + doesn't exist. 414 @raise Type_error if [obj] is not an object *) 415 416 val remove : string -> t -> t ··· 418 @raise Type_error if [obj] is not an object *) 419 420 val combine : t -> t -> t 421 + (** [combine obj1 obj2] merges two objects, with [obj2] values taking 422 + precedence. 423 @raise Type_error if either argument is not an object *) 424 425 (** {3 List Operations} *) ··· 441 @raise Type_error if [lst] is not a list *) 442 443 val nth : int -> t -> t option 444 + (** [nth n lst] gets element at index [n]. Returns [None] if [lst] is not a 445 + list or index out of bounds. *) 446 447 val length : t -> int 448 (** Get the length of a list or object. Returns 0 for other types. *) 449 450 val flatten : t -> t 451 + (** Flatten a list of lists into a single list. Non-list elements are kept 452 + as-is. 453 @raise Type_error if not a list *) 454 455 (** {3 Path Operations} *) 456 457 val get_path : string list -> t -> t option 458 + (** [get_path ["a"; "b"; "c"] obj] looks up nested path [obj.a.b.c]. Returns 459 + [None] if any key is not found. *) 460 461 val get_path_exn : string list -> t -> t 462 (** Like {!get_path} but raises [Not_found] if path not found. *) ··· 511 @raise Type_error if type doesn't match and no default provided *) 512 end 513 514 (** {2 Stream - Low-Level Event API} 515 516 Low-level streaming API for event-based YAML processing. ··· 521 - Fine-grained control over YAML emission *) 522 523 module Stream : sig 524 (** {3 Event Types} *) 525 526 type event = Event.t ··· 545 (** Create a parser from a string. *) 546 547 val next : parser -> event_result option 548 + (** Get the next event from the parser. Returns [None] when parsing is 549 + complete. *) 550 551 val iter : (event -> position -> position -> unit) -> parser -> unit 552 (** [iter f parser] calls [f event start_pos end_pos] for each event. *) ··· 577 val stream_end : emitter -> unit 578 (** Emit a stream end event. *) 579 580 + val document_start : 581 + emitter -> ?version:version -> ?implicit:bool -> unit -> unit 582 (** Emit a document start event. 583 @param version YAML version directive 584 @param implicit Whether start marker is implicit (default: true) *) ··· 587 (** Emit a document end event. 588 @param implicit Whether end marker is implicit (default: true) *) 589 590 + val scalar : 591 + emitter -> 592 + ?anchor:string -> 593 + ?tag:string -> 594 + ?style:Scalar_style.t -> 595 + string -> 596 + unit 597 (** Emit a scalar value. 598 @param anchor Optional anchor name 599 @param tag Optional type tag ··· 602 val alias : emitter -> string -> unit 603 (** Emit an alias reference. *) 604 605 + val sequence_start : 606 + emitter -> 607 + ?anchor:string -> 608 + ?tag:string -> 609 + ?style:Layout_style.t -> 610 + unit -> 611 + unit 612 (** Emit a sequence start event. 613 @param anchor Optional anchor name 614 @param tag Optional type tag ··· 617 val sequence_end : emitter -> unit 618 (** Emit a sequence end event. *) 619 620 + val mapping_start : 621 + emitter -> 622 + ?anchor:string -> 623 + ?tag:string -> 624 + ?style:Layout_style.t -> 625 + unit -> 626 + unit 627 (** Emit a mapping start event. 628 @param anchor Optional anchor name 629 @param tag Optional type tag ··· 633 (** Emit a mapping end event. *) 634 end 635 636 (** {2 Internal Modules} 637 638 + These modules are exposed for advanced use cases requiring fine-grained 639 + control over parsing, emission, or data structures. 640 641 For typical usage, prefer the top-level functions and {!Util}. *) 642
+17 -8
tests/dune
··· 12 13 ; Alias to run the full YAML test suite and generate HTML report 14 ; Requires yaml-test-suite to be cloned to tests/yaml-test-suite 15 (rule 16 (alias yaml-test-suite) 17 - (deps (source_tree yaml-test-suite)) 18 (targets yaml-test-results.html) 19 (action 20 - (run %{exe:run_all_tests.exe} 21 - --test-suite-path %{workspace_root}/tests/yaml-test-suite 22 - --html yaml-test-results.html))) 23 24 (rule 25 (alias yaml-test-suite-eio) 26 - (deps (source_tree yaml-test-suite)) 27 (targets yaml-test-results-eio.html) 28 (action 29 - (run %{exe:run_all_tests_eio.exe} 30 - --test-suite-path %{workspace_root}/tests/yaml-test-suite 31 - --html yaml-test-results-eio.html)))
··· 12 13 ; Alias to run the full YAML test suite and generate HTML report 14 ; Requires yaml-test-suite to be cloned to tests/yaml-test-suite 15 + 16 (rule 17 (alias yaml-test-suite) 18 + (deps 19 + (source_tree yaml-test-suite)) 20 (targets yaml-test-results.html) 21 (action 22 + (run 23 + %{exe:run_all_tests.exe} 24 + --test-suite-path 25 + %{workspace_root}/tests/yaml-test-suite 26 + --html 27 + yaml-test-results.html))) 28 29 (rule 30 (alias yaml-test-suite-eio) 31 + (deps 32 + (source_tree yaml-test-suite)) 33 (targets yaml-test-results-eio.html) 34 (action 35 + (run 36 + %{exe:run_all_tests_eio.exe} 37 + --test-suite-path 38 + %{workspace_root}/tests/yaml-test-suite 39 + --html 40 + yaml-test-results-eio.html)))
+209 -144
tests/run_all_tests.ml
··· 14 (* HTML escape function *) 15 let html_escape s = 16 let buf = Buffer.create (String.length s) in 17 - String.iter (function 18 - | '<' -> Buffer.add_string buf "&lt;" 19 - | '>' -> Buffer.add_string buf "&gt;" 20 - | '&' -> Buffer.add_string buf "&amp;" 21 - | '"' -> Buffer.add_string buf "&quot;" 22 - | c -> Buffer.add_char buf c 23 - ) s; 24 Buffer.contents buf 25 26 let normalize_tree s = ··· 33 name : string; 34 yaml : string; 35 is_error_test : bool; 36 - status : [`Pass | `Fail of string | `Skip]; 37 output : string; 38 - json_status : [`Pass | `Fail of string | `Skip]; 39 json_expected : string; 40 json_actual : string; 41 } ··· 45 This handles formatting differences and object key ordering. *) 46 JC.compare_json_strings expected actual 47 48 - let run_json_test (test : TL.test_case) : [`Pass | `Fail of string | `Skip] * string = 49 match test.json with 50 | None -> (`Skip, "") 51 - | Some expected_json -> 52 if test.fail then 53 (* Error tests shouldn't have JSON comparison *) 54 (`Skip, "") ··· 56 try 57 (* Handle multi-document YAML by using documents_of_string *) 58 let docs = Loader.documents_of_string test.yaml in 59 - let values = List.filter_map (fun doc -> 60 - match Document.root doc with 61 - | None -> None 62 - | Some yaml -> Some (Yaml.to_value ~resolve_aliases_first:true yaml) 63 - ) docs in 64 - let actual_json = match values with 65 - | [] -> "" (* Empty document produces empty JSON *) 66 - | [v] -> JF.to_json v 67 | vs -> JF.documents_to_json vs 68 in 69 - if compare_json expected_json actual_json then 70 - (`Pass, actual_json) 71 - else 72 - (`Fail "JSON mismatch", actual_json) 73 with 74 | Yamlrw_error e -> 75 (`Fail (Format.asprintf "Parse error: %a" Error.pp e), "") 76 | exn -> 77 (`Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn)), "") 78 79 let run_test (test : TL.test_case) : test_result = 80 let json_status, json_actual = run_json_test test in 81 - let base = { 82 - id = test.id; 83 - name = test.name; 84 - yaml = test.yaml; 85 - is_error_test = test.fail; 86 - status = `Skip; 87 - output = ""; 88 - json_status; 89 - json_expected = Option.value ~default:"" test.json; 90 - json_actual; 91 - } in 92 if test.fail then begin 93 try 94 let parser = Parser.of_string test.yaml in 95 let events = Parser.to_list parser in 96 let tree = TF.of_spanned_events events in 97 - { base with 98 - status = `Fail "Expected parsing to fail"; 99 - output = tree; 100 - } 101 with 102 | Yamlrw_error e -> 103 - { base with 104 - status = `Pass; 105 - output = Format.asprintf "%a" Error.pp e; 106 - } 107 - | exn -> 108 - { base with 109 - status = `Pass; 110 - output = Printexc.to_string exn; 111 - } 112 end 113 else begin 114 match test.tree with 115 - | None -> 116 (* No expected tree - check if json indicates expected success *) 117 - (match test.json with 118 - | Some _ -> 119 - (* Has json output, so should parse successfully *) 120 - (try 121 - let parser = Parser.of_string test.yaml in 122 - let events = Parser.to_list parser in 123 - let tree = TF.of_spanned_events events in 124 - { base with status = `Pass; output = tree } 125 - with exn -> 126 - { base with 127 - status = `Fail (Printf.sprintf "Should parse but got: %s" (Printexc.to_string exn)); 128 - output = Printexc.to_string exn; 129 - }) 130 - | None -> 131 - (* No tree, no json, no fail - ambiguous edge case, skip *) 132 - { base with status = `Skip; output = "(no expected tree or json)" }) 133 - | Some expected -> 134 try 135 let parser = Parser.of_string test.yaml in 136 let events = Parser.to_list parser in ··· 140 if expected_norm = actual_norm then 141 { base with status = `Pass; output = actual } 142 else 143 - { base with 144 status = `Fail (Printf.sprintf "Tree mismatch"); 145 - output = Printf.sprintf "Expected:\n%s\n\nActual:\n%s" expected_norm actual_norm; 146 } 147 with exn -> 148 - { base with 149 - status = `Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn)); 150 output = Printexc.to_string exn; 151 - } 152 end 153 154 let status_class = function ··· 163 164 let generate_html results output_file = 165 let oc = open_out output_file in 166 - let pass_count = List.length (List.filter (fun r -> r.status = `Pass) results) in 167 - let fail_count = List.length (List.filter (fun r -> match r.status with `Fail _ -> true | _ -> false) results) in 168 - let skip_count = List.length (List.filter (fun r -> r.status = `Skip) results) in 169 let total = List.length results in 170 - let json_pass_count = List.length (List.filter (fun r -> r.json_status = `Pass) results) in 171 - let json_fail_count = List.length (List.filter (fun r -> match r.json_status with `Fail _ -> true | _ -> false) results) in 172 - let json_skip_count = List.length (List.filter (fun r -> r.json_status = `Skip) results) in 173 174 - Printf.fprintf oc {|<!DOCTYPE html> 175 <html lang="en"> 176 <head> 177 <meta charset="UTF-8"> ··· 335 <input type="text" class="search" placeholder="Search by ID or name..."> 336 </div> 337 <div class="tests"> 338 - |} pass_count fail_count skip_count total json_pass_count json_fail_count json_skip_count; 339 340 - List.iter (fun result -> 341 - let error_badge = if result.is_error_test then 342 - {|<span class="badge error-test">Error Test</span>|} 343 - else "" in 344 - let json_badge = Printf.sprintf {|<span class="badge %s" style="margin-left: 4px;">JSON: %s</span>|} 345 - (status_class result.json_status) (status_text result.json_status) in 346 - let json_section = if result.json_expected <> "" || result.json_actual <> "" then 347 - Printf.sprintf {| 348 <div class="section"> 349 <div class="section-title">Expected JSON</div> 350 <pre>%s</pre> ··· 353 <div class="section-title">Actual JSON</div> 354 <pre>%s</pre> 355 </div>|} 356 - (html_escape result.json_expected) 357 - (html_escape result.json_actual) 358 - else "" in 359 - Printf.fprintf oc {| <div class="test" data-status="%s" data-json-status="%s" data-id="%s" data-name="%s"> 360 <div class="test-header" onclick="this.parentElement.classList.toggle('expanded')"> 361 <span class="expand-icon">▶</span> 362 <span class="badge %s">%s</span> ··· 377 </div> 378 </div> 379 |} 380 - (status_class result.status) 381 - (status_class result.json_status) 382 - (html_escape result.id) 383 - (html_escape (String.lowercase_ascii result.name)) 384 - (status_class result.status) 385 - (status_text result.status) 386 - json_badge 387 - (html_escape result.id) 388 - (html_escape result.name) 389 - error_badge 390 - (html_escape result.yaml) 391 - (html_escape result.output) 392 - json_section 393 - ) results; 394 395 - Printf.fprintf oc {| </div> 396 </div> 397 <script> 398 document.querySelectorAll('.filter-btn').forEach(btn => { ··· 425 let html_output = ref None in 426 let show_skipped = ref false in 427 let test_suite_path_ref = ref test_suite_path in 428 - let args = [ 429 - "--html", Arg.String (fun s -> html_output := Some s), 430 - "<file> Generate HTML report to file"; 431 - "--show-skipped", Arg.Set show_skipped, 432 - " Show details of skipped tests"; 433 - "--test-suite-path", Arg.Set_string test_suite_path_ref, 434 - "<path> Path to yaml-test-suite directory"; 435 - ] in 436 - Arg.parse args (fun _ -> ()) "Usage: run_all_tests [--html <file>] [--show-skipped] [--test-suite-path <path>]"; 437 438 let all_tests = TL.load_directory !test_suite_path_ref in 439 Printf.printf "Total tests loaded: %d\n%!" (List.length all_tests); 440 441 let results = List.map run_test all_tests in 442 443 - let pass_count = List.length (List.filter (fun r -> r.status = `Pass) results) in 444 - let fail_count = List.length (List.filter (fun r -> match r.status with `Fail _ -> true | _ -> false) results) in 445 - let skip_count = List.length (List.filter (fun r -> r.status = `Skip) results) in 446 447 - let json_pass_count = List.length (List.filter (fun r -> r.json_status = `Pass) results) in 448 - let json_fail_count = List.length (List.filter (fun r -> match r.json_status with `Fail _ -> true | _ -> false) results) in 449 - let json_skip_count = List.length (List.filter (fun r -> r.json_status = `Skip) results) in 450 451 - Printf.printf "\nEvent Tree Results: %d pass, %d fail, %d skip (total: %d)\n%!" 452 - pass_count fail_count skip_count (pass_count + fail_count + skip_count); 453 454 - Printf.printf "JSON Results: %d pass, %d fail, %d skip\n%!" 455 - json_pass_count json_fail_count json_skip_count; 456 457 if fail_count > 0 then begin 458 Printf.printf "\nFailing event tree tests:\n"; 459 - List.iter (fun r -> 460 - match r.status with 461 - | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg 462 - | _ -> () 463 - ) results 464 end; 465 466 if json_fail_count > 0 then begin 467 Printf.printf "\nFailing JSON tests:\n"; 468 - List.iter (fun r -> 469 - match r.json_status with 470 - | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg 471 - | _ -> () 472 - ) results 473 end; 474 475 if !show_skipped && skip_count > 0 then begin 476 Printf.printf "\nSkipped tests (no expected tree):\n"; 477 - List.iter (fun r -> 478 - if r.status = `Skip then begin 479 - Printf.printf " %s: %s\n" r.id r.name; 480 - Printf.printf " YAML (%d chars): %S\n" (String.length r.yaml) 481 - (if String.length r.yaml <= 60 then r.yaml 482 - else String.sub r.yaml 0 60 ^ "...") 483 - end 484 - ) results 485 end; 486 487 (match !html_output with ··· 491 | None -> ()); 492 493 (* Exit with non-zero code if any tests failed *) 494 - if fail_count > 0 || json_fail_count > 0 then 495 - exit 1
··· 14 (* HTML escape function *) 15 let html_escape s = 16 let buf = Buffer.create (String.length s) in 17 + String.iter 18 + (function 19 + | '<' -> Buffer.add_string buf "&lt;" 20 + | '>' -> Buffer.add_string buf "&gt;" 21 + | '&' -> Buffer.add_string buf "&amp;" 22 + | '"' -> Buffer.add_string buf "&quot;" 23 + | c -> Buffer.add_char buf c) 24 + s; 25 Buffer.contents buf 26 27 let normalize_tree s = ··· 34 name : string; 35 yaml : string; 36 is_error_test : bool; 37 + status : [ `Pass | `Fail of string | `Skip ]; 38 output : string; 39 + json_status : [ `Pass | `Fail of string | `Skip ]; 40 json_expected : string; 41 json_actual : string; 42 } ··· 46 This handles formatting differences and object key ordering. *) 47 JC.compare_json_strings expected actual 48 49 + let run_json_test (test : TL.test_case) : 50 + [ `Pass | `Fail of string | `Skip ] * string = 51 match test.json with 52 | None -> (`Skip, "") 53 + | Some expected_json -> ( 54 if test.fail then 55 (* Error tests shouldn't have JSON comparison *) 56 (`Skip, "") ··· 58 try 59 (* Handle multi-document YAML by using documents_of_string *) 60 let docs = Loader.documents_of_string test.yaml in 61 + let values = 62 + List.filter_map 63 + (fun doc -> 64 + match Document.root doc with 65 + | None -> None 66 + | Some yaml -> 67 + Some (Yaml.to_value ~resolve_aliases_first:true yaml)) 68 + docs 69 + in 70 + let actual_json = 71 + match values with 72 + | [] -> "" (* Empty document produces empty JSON *) 73 + | [ v ] -> JF.to_json v 74 | vs -> JF.documents_to_json vs 75 in 76 + if compare_json expected_json actual_json then (`Pass, actual_json) 77 + else (`Fail "JSON mismatch", actual_json) 78 with 79 | Yamlrw_error e -> 80 (`Fail (Format.asprintf "Parse error: %a" Error.pp e), "") 81 | exn -> 82 (`Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn)), "") 83 + ) 84 85 let run_test (test : TL.test_case) : test_result = 86 let json_status, json_actual = run_json_test test in 87 + let base = 88 + { 89 + id = test.id; 90 + name = test.name; 91 + yaml = test.yaml; 92 + is_error_test = test.fail; 93 + status = `Skip; 94 + output = ""; 95 + json_status; 96 + json_expected = Option.value ~default:"" test.json; 97 + json_actual; 98 + } 99 + in 100 if test.fail then begin 101 try 102 let parser = Parser.of_string test.yaml in 103 let events = Parser.to_list parser in 104 let tree = TF.of_spanned_events events in 105 + { base with status = `Fail "Expected parsing to fail"; output = tree } 106 with 107 | Yamlrw_error e -> 108 + { base with status = `Pass; output = Format.asprintf "%a" Error.pp e } 109 + | exn -> { base with status = `Pass; output = Printexc.to_string exn } 110 end 111 else begin 112 match test.tree with 113 + | None -> ( 114 (* No expected tree - check if json indicates expected success *) 115 + match test.json with 116 + | Some _ -> ( 117 + (* Has json output, so should parse successfully *) 118 + try 119 + let parser = Parser.of_string test.yaml in 120 + let events = Parser.to_list parser in 121 + let tree = TF.of_spanned_events events in 122 + { base with status = `Pass; output = tree } 123 + with exn -> 124 + { 125 + base with 126 + status = 127 + `Fail 128 + (Printf.sprintf "Should parse but got: %s" 129 + (Printexc.to_string exn)); 130 + output = Printexc.to_string exn; 131 + }) 132 + | None -> 133 + (* No tree, no json, no fail - ambiguous edge case, skip *) 134 + { base with status = `Skip; output = "(no expected tree or json)" }) 135 + | Some expected -> ( 136 try 137 let parser = Parser.of_string test.yaml in 138 let events = Parser.to_list parser in ··· 142 if expected_norm = actual_norm then 143 { base with status = `Pass; output = actual } 144 else 145 + { 146 + base with 147 status = `Fail (Printf.sprintf "Tree mismatch"); 148 + output = 149 + Printf.sprintf "Expected:\n%s\n\nActual:\n%s" expected_norm 150 + actual_norm; 151 } 152 with exn -> 153 + { 154 + base with 155 + status = 156 + `Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn)); 157 output = Printexc.to_string exn; 158 + }) 159 end 160 161 let status_class = function ··· 170 171 let generate_html results output_file = 172 let oc = open_out output_file in 173 + let pass_count = 174 + List.length (List.filter (fun r -> r.status = `Pass) results) 175 + in 176 + let fail_count = 177 + List.length 178 + (List.filter 179 + (fun r -> match r.status with `Fail _ -> true | _ -> false) 180 + results) 181 + in 182 + let skip_count = 183 + List.length (List.filter (fun r -> r.status = `Skip) results) 184 + in 185 let total = List.length results in 186 + let json_pass_count = 187 + List.length (List.filter (fun r -> r.json_status = `Pass) results) 188 + in 189 + let json_fail_count = 190 + List.length 191 + (List.filter 192 + (fun r -> match r.json_status with `Fail _ -> true | _ -> false) 193 + results) 194 + in 195 + let json_skip_count = 196 + List.length (List.filter (fun r -> r.json_status = `Skip) results) 197 + in 198 199 + Printf.fprintf oc 200 + {|<!DOCTYPE html> 201 <html lang="en"> 202 <head> 203 <meta charset="UTF-8"> ··· 361 <input type="text" class="search" placeholder="Search by ID or name..."> 362 </div> 363 <div class="tests"> 364 + |} 365 + pass_count fail_count skip_count total json_pass_count json_fail_count 366 + json_skip_count; 367 368 + List.iter 369 + (fun result -> 370 + let error_badge = 371 + if result.is_error_test then 372 + {|<span class="badge error-test">Error Test</span>|} 373 + else "" 374 + in 375 + let json_badge = 376 + Printf.sprintf 377 + {|<span class="badge %s" style="margin-left: 4px;">JSON: %s</span>|} 378 + (status_class result.json_status) 379 + (status_text result.json_status) 380 + in 381 + let json_section = 382 + if result.json_expected <> "" || result.json_actual <> "" then 383 + Printf.sprintf 384 + {| 385 <div class="section"> 386 <div class="section-title">Expected JSON</div> 387 <pre>%s</pre> ··· 390 <div class="section-title">Actual JSON</div> 391 <pre>%s</pre> 392 </div>|} 393 + (html_escape result.json_expected) 394 + (html_escape result.json_actual) 395 + else "" 396 + in 397 + Printf.fprintf oc 398 + {| <div class="test" data-status="%s" data-json-status="%s" data-id="%s" data-name="%s"> 399 <div class="test-header" onclick="this.parentElement.classList.toggle('expanded')"> 400 <span class="expand-icon">▶</span> 401 <span class="badge %s">%s</span> ··· 416 </div> 417 </div> 418 |} 419 + (status_class result.status) 420 + (status_class result.json_status) 421 + (html_escape result.id) 422 + (html_escape (String.lowercase_ascii result.name)) 423 + (status_class result.status) 424 + (status_text result.status) 425 + json_badge (html_escape result.id) (html_escape result.name) error_badge 426 + (html_escape result.yaml) 427 + (html_escape result.output) 428 + json_section) 429 + results; 430 431 + Printf.fprintf oc 432 + {| </div> 433 </div> 434 <script> 435 document.querySelectorAll('.filter-btn').forEach(btn => { ··· 462 let html_output = ref None in 463 let show_skipped = ref false in 464 let test_suite_path_ref = ref test_suite_path in 465 + let args = 466 + [ 467 + ( "--html", 468 + Arg.String (fun s -> html_output := Some s), 469 + "<file> Generate HTML report to file" ); 470 + ("--show-skipped", Arg.Set show_skipped, " Show details of skipped tests"); 471 + ( "--test-suite-path", 472 + Arg.Set_string test_suite_path_ref, 473 + "<path> Path to yaml-test-suite directory" ); 474 + ] 475 + in 476 + Arg.parse args 477 + (fun _ -> ()) 478 + "Usage: run_all_tests [--html <file>] [--show-skipped] [--test-suite-path \ 479 + <path>]"; 480 481 let all_tests = TL.load_directory !test_suite_path_ref in 482 Printf.printf "Total tests loaded: %d\n%!" (List.length all_tests); 483 484 let results = List.map run_test all_tests in 485 486 + let pass_count = 487 + List.length (List.filter (fun r -> r.status = `Pass) results) 488 + in 489 + let fail_count = 490 + List.length 491 + (List.filter 492 + (fun r -> match r.status with `Fail _ -> true | _ -> false) 493 + results) 494 + in 495 + let skip_count = 496 + List.length (List.filter (fun r -> r.status = `Skip) results) 497 + in 498 499 + let json_pass_count = 500 + List.length (List.filter (fun r -> r.json_status = `Pass) results) 501 + in 502 + let json_fail_count = 503 + List.length 504 + (List.filter 505 + (fun r -> match r.json_status with `Fail _ -> true | _ -> false) 506 + results) 507 + in 508 + let json_skip_count = 509 + List.length (List.filter (fun r -> r.json_status = `Skip) results) 510 + in 511 512 + Printf.printf 513 + "\nEvent Tree Results: %d pass, %d fail, %d skip (total: %d)\n%!" pass_count 514 + fail_count skip_count 515 + (pass_count + fail_count + skip_count); 516 517 + Printf.printf "JSON Results: %d pass, %d fail, %d skip\n%!" json_pass_count 518 + json_fail_count json_skip_count; 519 520 if fail_count > 0 then begin 521 Printf.printf "\nFailing event tree tests:\n"; 522 + List.iter 523 + (fun r -> 524 + match r.status with 525 + | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg 526 + | _ -> ()) 527 + results 528 end; 529 530 if json_fail_count > 0 then begin 531 Printf.printf "\nFailing JSON tests:\n"; 532 + List.iter 533 + (fun r -> 534 + match r.json_status with 535 + | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg 536 + | _ -> ()) 537 + results 538 end; 539 540 if !show_skipped && skip_count > 0 then begin 541 Printf.printf "\nSkipped tests (no expected tree):\n"; 542 + List.iter 543 + (fun r -> 544 + if r.status = `Skip then begin 545 + Printf.printf " %s: %s\n" r.id r.name; 546 + Printf.printf " YAML (%d chars): %S\n" (String.length r.yaml) 547 + (if String.length r.yaml <= 60 then r.yaml 548 + else String.sub r.yaml 0 60 ^ "...") 549 + end) 550 + results 551 end; 552 553 (match !html_output with ··· 557 | None -> ()); 558 559 (* Exit with non-zero code if any tests failed *) 560 + if fail_count > 0 || json_fail_count > 0 then exit 1
+224 -162
tests/run_all_tests_eio.ml
··· 16 (* HTML escape function *) 17 let html_escape s = 18 let buf = Buffer.create (String.length s) in 19 - String.iter (function 20 - | '<' -> Buffer.add_string buf "&lt;" 21 - | '>' -> Buffer.add_string buf "&gt;" 22 - | '&' -> Buffer.add_string buf "&amp;" 23 - | '"' -> Buffer.add_string buf "&quot;" 24 - | c -> Buffer.add_char buf c 25 - ) s; 26 Buffer.contents buf 27 28 let normalize_tree s = ··· 35 name : string; 36 yaml : string; 37 is_error_test : bool; 38 - status : [`Pass | `Fail of string | `Skip]; 39 output : string; 40 - json_status : [`Pass | `Fail of string | `Skip]; 41 json_expected : string; 42 json_actual : string; 43 } 44 45 - let compare_json expected actual = 46 - JC.compare_json_strings expected actual 47 48 - let run_json_test (test : TL.test_case) : [`Pass | `Fail of string | `Skip] * string = 49 match test.json with 50 | None -> (`Skip, "") 51 - | Some expected_json -> 52 - if test.fail then 53 - (`Skip, "") 54 else 55 try 56 let docs = Loader.documents_of_string test.yaml in 57 - let values = List.filter_map (fun doc -> 58 - match Document.root doc with 59 - | None -> None 60 - | Some yaml -> Some (Yaml.to_value ~resolve_aliases_first:true yaml) 61 - ) docs in 62 - let actual_json = match values with 63 | [] -> "" 64 - | [v] -> JF.to_json v 65 | vs -> JF.documents_to_json vs 66 in 67 - if compare_json expected_json actual_json then 68 - (`Pass, actual_json) 69 - else 70 - (`Fail "JSON mismatch", actual_json) 71 with 72 | Yamlrw_error e -> 73 (`Fail (Format.asprintf "Parse error: %a" Error.pp e), "") 74 | exn -> 75 (`Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn)), "") 76 77 let run_test (test : TL.test_case) : test_result = 78 let json_status, json_actual = run_json_test test in 79 - let base = { 80 - id = test.id; 81 - name = test.name; 82 - yaml = test.yaml; 83 - is_error_test = test.fail; 84 - status = `Skip; 85 - output = ""; 86 - json_status; 87 - json_expected = Option.value ~default:"" test.json; 88 - json_actual; 89 - } in 90 if test.fail then begin 91 try 92 let parser = Parser.of_string test.yaml in 93 let events = Parser.to_list parser in 94 let tree = TF.of_spanned_events events in 95 - { base with 96 - status = `Fail "Expected parsing to fail"; 97 - output = tree; 98 - } 99 with 100 | Yamlrw_error e -> 101 - { base with 102 - status = `Pass; 103 - output = Format.asprintf "%a" Error.pp e; 104 - } 105 - | exn -> 106 - { base with 107 - status = `Pass; 108 - output = Printexc.to_string exn; 109 - } 110 end 111 else begin 112 match test.tree with 113 - | None -> 114 - (match test.json with 115 - | Some _ -> 116 - (try 117 - let parser = Parser.of_string test.yaml in 118 - let events = Parser.to_list parser in 119 - let tree = TF.of_spanned_events events in 120 - { base with status = `Pass; output = tree } 121 - with exn -> 122 - { base with 123 - status = `Fail (Printf.sprintf "Should parse but got: %s" (Printexc.to_string exn)); 124 - output = Printexc.to_string exn; 125 - }) 126 - | None -> 127 - { base with status = `Skip; output = "(no expected tree or json)" }) 128 - | Some expected -> 129 try 130 let parser = Parser.of_string test.yaml in 131 let events = Parser.to_list parser in ··· 135 if expected_norm = actual_norm then 136 { base with status = `Pass; output = actual } 137 else 138 - { base with 139 status = `Fail (Printf.sprintf "Tree mismatch"); 140 - output = Printf.sprintf "Expected:\n%s\n\nActual:\n%s" expected_norm actual_norm; 141 } 142 with exn -> 143 - { base with 144 - status = `Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn)); 145 output = Printexc.to_string exn; 146 - } 147 end 148 149 (* Run tests in parallel using Eio fibers *) 150 - let run_tests_parallel tests = 151 - Eio.Fiber.List.map run_test tests 152 153 let status_class = function 154 | `Pass -> "pass" ··· 161 | `Skip -> "SKIP" 162 163 let generate_html ~fs results output_file = 164 - let pass_count = List.length (List.filter (fun r -> r.status = `Pass) results) in 165 - let fail_count = List.length (List.filter (fun r -> match r.status with `Fail _ -> true | _ -> false) results) in 166 - let skip_count = List.length (List.filter (fun r -> r.status = `Skip) results) in 167 let total = List.length results in 168 - let json_pass_count = List.length (List.filter (fun r -> r.json_status = `Pass) results) in 169 - let json_fail_count = List.length (List.filter (fun r -> match r.json_status with `Fail _ -> true | _ -> false) results) in 170 - let json_skip_count = List.length (List.filter (fun r -> r.json_status = `Skip) results) in 171 172 let buf = Buffer.create 65536 in 173 - Printf.bprintf buf {|<!DOCTYPE html> 174 <html lang="en"> 175 <head> 176 <meta charset="UTF-8"> ··· 344 <input type="text" class="search" placeholder="Search by ID or name..."> 345 </div> 346 <div class="tests"> 347 - |} pass_count fail_count skip_count total json_pass_count json_fail_count json_skip_count; 348 349 - List.iter (fun result -> 350 - let error_badge = if result.is_error_test then 351 - {|<span class="badge error-test">Error Test</span>|} 352 - else "" in 353 - let json_badge = Printf.sprintf {|<span class="badge %s" style="margin-left: 4px;">JSON: %s</span>|} 354 - (status_class result.json_status) (status_text result.json_status) in 355 - let json_section = if result.json_expected <> "" || result.json_actual <> "" then 356 - Printf.sprintf {| 357 <div class="section"> 358 <div class="section-title">Expected JSON</div> 359 <pre>%s</pre> ··· 362 <div class="section-title">Actual JSON</div> 363 <pre>%s</pre> 364 </div>|} 365 - (html_escape result.json_expected) 366 - (html_escape result.json_actual) 367 - else "" in 368 - Printf.bprintf buf {| <div class="test" data-status="%s" data-json-status="%s" data-id="%s" data-name="%s"> 369 <div class="test-header" onclick="this.parentElement.classList.toggle('expanded')"> 370 <span class="expand-icon">▶</span> 371 <span class="badge %s">%s</span> ··· 386 </div> 387 </div> 388 |} 389 - (status_class result.status) 390 - (status_class result.json_status) 391 - (html_escape result.id) 392 - (html_escape (String.lowercase_ascii result.name)) 393 - (status_class result.status) 394 - (status_text result.status) 395 - json_badge 396 - (html_escape result.id) 397 - (html_escape result.name) 398 - error_badge 399 - (html_escape result.yaml) 400 - (html_escape result.output) 401 - json_section 402 - ) results; 403 404 - Printf.bprintf buf {| </div> 405 </div> 406 <script> 407 document.querySelectorAll('.filter-btn').forEach(btn => { ··· 438 let show_skipped = ref false in 439 let sequential = ref false in 440 let test_suite_path_ref = ref test_suite_path in 441 - let args = [ 442 - "--html", Arg.String (fun s -> html_output := Some s), 443 - "<file> Generate HTML report to file"; 444 - "--show-skipped", Arg.Set show_skipped, 445 - " Show details of skipped tests"; 446 - "--sequential", Arg.Set sequential, 447 - " Run tests sequentially instead of in parallel"; 448 - "--test-suite-path", Arg.Set_string test_suite_path_ref, 449 - "<path> Path to yaml-test-suite directory"; 450 - ] in 451 - Arg.parse args (fun _ -> ()) "Usage: run_all_tests_eio [--html <file>] [--show-skipped] [--sequential] [--test-suite-path <path>]"; 452 453 Eio_main.run @@ fun env -> 454 (* Use fs (full filesystem) rather than cwd (sandboxed) to allow ".." navigation *) 455 let fs = Eio.Stdenv.fs env in 456 (* Get the absolute path to the test suite *) 457 - let test_suite_abs = if Filename.is_relative !test_suite_path_ref then 458 - Filename.concat (Sys.getcwd ()) !test_suite_path_ref 459 - else 460 - !test_suite_path_ref 461 in 462 463 let start_time = Unix.gettimeofday () in 464 465 (* Load tests using Eio (parallel by default) *) 466 - let all_tests = if !sequential then 467 - TL.load_directory ~fs test_suite_abs 468 - else 469 - TL.load_directory_parallel ~fs test_suite_abs 470 in 471 let load_time = Unix.gettimeofday () in 472 - Printf.printf "Loaded %d tests in %.3fs\n%!" (List.length all_tests) (load_time -. start_time); 473 474 (* Run tests (parallel or sequential based on flag) *) 475 - let results = if !sequential then 476 - List.map run_test all_tests 477 - else 478 - run_tests_parallel all_tests 479 in 480 let run_time = Unix.gettimeofday () in 481 Printf.printf "Ran tests in %.3fs\n%!" (run_time -. load_time); 482 483 - let pass_count = List.length (List.filter (fun r -> r.status = `Pass) results) in 484 - let fail_count = List.length (List.filter (fun r -> match r.status with `Fail _ -> true | _ -> false) results) in 485 - let skip_count = List.length (List.filter (fun r -> r.status = `Skip) results) in 486 487 - let json_pass_count = List.length (List.filter (fun r -> r.json_status = `Pass) results) in 488 - let json_fail_count = List.length (List.filter (fun r -> match r.json_status with `Fail _ -> true | _ -> false) results) in 489 - let json_skip_count = List.length (List.filter (fun r -> r.json_status = `Skip) results) in 490 491 - Printf.printf "\nEvent Tree Results: %d pass, %d fail, %d skip (total: %d)\n%!" 492 - pass_count fail_count skip_count (pass_count + fail_count + skip_count); 493 494 - Printf.printf "JSON Results: %d pass, %d fail, %d skip\n%!" 495 - json_pass_count json_fail_count json_skip_count; 496 497 if fail_count > 0 then begin 498 Printf.printf "\nFailing event tree tests:\n"; 499 - List.iter (fun r -> 500 - match r.status with 501 - | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg 502 - | _ -> () 503 - ) results 504 end; 505 506 if json_fail_count > 0 then begin 507 Printf.printf "\nFailing JSON tests:\n"; 508 - List.iter (fun r -> 509 - match r.json_status with 510 - | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg 511 - | _ -> () 512 - ) results 513 end; 514 515 if !show_skipped && skip_count > 0 then begin 516 Printf.printf "\nSkipped tests (no expected tree):\n"; 517 - List.iter (fun r -> 518 - if r.status = `Skip then begin 519 - Printf.printf " %s: %s\n" r.id r.name; 520 - Printf.printf " YAML (%d chars): %S\n" (String.length r.yaml) 521 - (if String.length r.yaml <= 60 then r.yaml 522 - else String.sub r.yaml 0 60 ^ "...") 523 - end 524 - ) results 525 end; 526 527 let total_time = Unix.gettimeofday () in ··· 534 | None -> ()); 535 536 (* Exit with non-zero code if any tests failed *) 537 - if fail_count > 0 || json_fail_count > 0 then 538 - exit 1
··· 16 (* HTML escape function *) 17 let html_escape s = 18 let buf = Buffer.create (String.length s) in 19 + String.iter 20 + (function 21 + | '<' -> Buffer.add_string buf "&lt;" 22 + | '>' -> Buffer.add_string buf "&gt;" 23 + | '&' -> Buffer.add_string buf "&amp;" 24 + | '"' -> Buffer.add_string buf "&quot;" 25 + | c -> Buffer.add_char buf c) 26 + s; 27 Buffer.contents buf 28 29 let normalize_tree s = ··· 36 name : string; 37 yaml : string; 38 is_error_test : bool; 39 + status : [ `Pass | `Fail of string | `Skip ]; 40 output : string; 41 + json_status : [ `Pass | `Fail of string | `Skip ]; 42 json_expected : string; 43 json_actual : string; 44 } 45 46 + let compare_json expected actual = JC.compare_json_strings expected actual 47 48 + let run_json_test (test : TL.test_case) : 49 + [ `Pass | `Fail of string | `Skip ] * string = 50 match test.json with 51 | None -> (`Skip, "") 52 + | Some expected_json -> ( 53 + if test.fail then (`Skip, "") 54 else 55 try 56 let docs = Loader.documents_of_string test.yaml in 57 + let values = 58 + List.filter_map 59 + (fun doc -> 60 + match Document.root doc with 61 + | None -> None 62 + | Some yaml -> 63 + Some (Yaml.to_value ~resolve_aliases_first:true yaml)) 64 + docs 65 + in 66 + let actual_json = 67 + match values with 68 | [] -> "" 69 + | [ v ] -> JF.to_json v 70 | vs -> JF.documents_to_json vs 71 in 72 + if compare_json expected_json actual_json then (`Pass, actual_json) 73 + else (`Fail "JSON mismatch", actual_json) 74 with 75 | Yamlrw_error e -> 76 (`Fail (Format.asprintf "Parse error: %a" Error.pp e), "") 77 | exn -> 78 (`Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn)), "") 79 + ) 80 81 let run_test (test : TL.test_case) : test_result = 82 let json_status, json_actual = run_json_test test in 83 + let base = 84 + { 85 + id = test.id; 86 + name = test.name; 87 + yaml = test.yaml; 88 + is_error_test = test.fail; 89 + status = `Skip; 90 + output = ""; 91 + json_status; 92 + json_expected = Option.value ~default:"" test.json; 93 + json_actual; 94 + } 95 + in 96 if test.fail then begin 97 try 98 let parser = Parser.of_string test.yaml in 99 let events = Parser.to_list parser in 100 let tree = TF.of_spanned_events events in 101 + { base with status = `Fail "Expected parsing to fail"; output = tree } 102 with 103 | Yamlrw_error e -> 104 + { base with status = `Pass; output = Format.asprintf "%a" Error.pp e } 105 + | exn -> { base with status = `Pass; output = Printexc.to_string exn } 106 end 107 else begin 108 match test.tree with 109 + | None -> ( 110 + match test.json with 111 + | Some _ -> ( 112 + try 113 + let parser = Parser.of_string test.yaml in 114 + let events = Parser.to_list parser in 115 + let tree = TF.of_spanned_events events in 116 + { base with status = `Pass; output = tree } 117 + with exn -> 118 + { 119 + base with 120 + status = 121 + `Fail 122 + (Printf.sprintf "Should parse but got: %s" 123 + (Printexc.to_string exn)); 124 + output = Printexc.to_string exn; 125 + }) 126 + | None -> 127 + { base with status = `Skip; output = "(no expected tree or json)" }) 128 + | Some expected -> ( 129 try 130 let parser = Parser.of_string test.yaml in 131 let events = Parser.to_list parser in ··· 135 if expected_norm = actual_norm then 136 { base with status = `Pass; output = actual } 137 else 138 + { 139 + base with 140 status = `Fail (Printf.sprintf "Tree mismatch"); 141 + output = 142 + Printf.sprintf "Expected:\n%s\n\nActual:\n%s" expected_norm 143 + actual_norm; 144 } 145 with exn -> 146 + { 147 + base with 148 + status = 149 + `Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn)); 150 output = Printexc.to_string exn; 151 + }) 152 end 153 154 (* Run tests in parallel using Eio fibers *) 155 + let run_tests_parallel tests = Eio.Fiber.List.map run_test tests 156 157 let status_class = function 158 | `Pass -> "pass" ··· 165 | `Skip -> "SKIP" 166 167 let generate_html ~fs results output_file = 168 + let pass_count = 169 + List.length (List.filter (fun r -> r.status = `Pass) results) 170 + in 171 + let fail_count = 172 + List.length 173 + (List.filter 174 + (fun r -> match r.status with `Fail _ -> true | _ -> false) 175 + results) 176 + in 177 + let skip_count = 178 + List.length (List.filter (fun r -> r.status = `Skip) results) 179 + in 180 let total = List.length results in 181 + let json_pass_count = 182 + List.length (List.filter (fun r -> r.json_status = `Pass) results) 183 + in 184 + let json_fail_count = 185 + List.length 186 + (List.filter 187 + (fun r -> match r.json_status with `Fail _ -> true | _ -> false) 188 + results) 189 + in 190 + let json_skip_count = 191 + List.length (List.filter (fun r -> r.json_status = `Skip) results) 192 + in 193 194 let buf = Buffer.create 65536 in 195 + Printf.bprintf buf 196 + {|<!DOCTYPE html> 197 <html lang="en"> 198 <head> 199 <meta charset="UTF-8"> ··· 367 <input type="text" class="search" placeholder="Search by ID or name..."> 368 </div> 369 <div class="tests"> 370 + |} 371 + pass_count fail_count skip_count total json_pass_count json_fail_count 372 + json_skip_count; 373 374 + List.iter 375 + (fun result -> 376 + let error_badge = 377 + if result.is_error_test then 378 + {|<span class="badge error-test">Error Test</span>|} 379 + else "" 380 + in 381 + let json_badge = 382 + Printf.sprintf 383 + {|<span class="badge %s" style="margin-left: 4px;">JSON: %s</span>|} 384 + (status_class result.json_status) 385 + (status_text result.json_status) 386 + in 387 + let json_section = 388 + if result.json_expected <> "" || result.json_actual <> "" then 389 + Printf.sprintf 390 + {| 391 <div class="section"> 392 <div class="section-title">Expected JSON</div> 393 <pre>%s</pre> ··· 396 <div class="section-title">Actual JSON</div> 397 <pre>%s</pre> 398 </div>|} 399 + (html_escape result.json_expected) 400 + (html_escape result.json_actual) 401 + else "" 402 + in 403 + Printf.bprintf buf 404 + {| <div class="test" data-status="%s" data-json-status="%s" data-id="%s" data-name="%s"> 405 <div class="test-header" onclick="this.parentElement.classList.toggle('expanded')"> 406 <span class="expand-icon">▶</span> 407 <span class="badge %s">%s</span> ··· 422 </div> 423 </div> 424 |} 425 + (status_class result.status) 426 + (status_class result.json_status) 427 + (html_escape result.id) 428 + (html_escape (String.lowercase_ascii result.name)) 429 + (status_class result.status) 430 + (status_text result.status) 431 + json_badge (html_escape result.id) (html_escape result.name) error_badge 432 + (html_escape result.yaml) 433 + (html_escape result.output) 434 + json_section) 435 + results; 436 437 + Printf.bprintf buf 438 + {| </div> 439 </div> 440 <script> 441 document.querySelectorAll('.filter-btn').forEach(btn => { ··· 472 let show_skipped = ref false in 473 let sequential = ref false in 474 let test_suite_path_ref = ref test_suite_path in 475 + let args = 476 + [ 477 + ( "--html", 478 + Arg.String (fun s -> html_output := Some s), 479 + "<file> Generate HTML report to file" ); 480 + ("--show-skipped", Arg.Set show_skipped, " Show details of skipped tests"); 481 + ( "--sequential", 482 + Arg.Set sequential, 483 + " Run tests sequentially instead of in parallel" ); 484 + ( "--test-suite-path", 485 + Arg.Set_string test_suite_path_ref, 486 + "<path> Path to yaml-test-suite directory" ); 487 + ] 488 + in 489 + Arg.parse args 490 + (fun _ -> ()) 491 + "Usage: run_all_tests_eio [--html <file>] [--show-skipped] [--sequential] \ 492 + [--test-suite-path <path>]"; 493 494 Eio_main.run @@ fun env -> 495 (* Use fs (full filesystem) rather than cwd (sandboxed) to allow ".." navigation *) 496 let fs = Eio.Stdenv.fs env in 497 (* Get the absolute path to the test suite *) 498 + let test_suite_abs = 499 + if Filename.is_relative !test_suite_path_ref then 500 + Filename.concat (Sys.getcwd ()) !test_suite_path_ref 501 + else !test_suite_path_ref 502 in 503 504 let start_time = Unix.gettimeofday () in 505 506 (* Load tests using Eio (parallel by default) *) 507 + let all_tests = 508 + if !sequential then TL.load_directory ~fs test_suite_abs 509 + else TL.load_directory_parallel ~fs test_suite_abs 510 in 511 let load_time = Unix.gettimeofday () in 512 + Printf.printf "Loaded %d tests in %.3fs\n%!" (List.length all_tests) 513 + (load_time -. start_time); 514 515 (* Run tests (parallel or sequential based on flag) *) 516 + let results = 517 + if !sequential then List.map run_test all_tests 518 + else run_tests_parallel all_tests 519 in 520 let run_time = Unix.gettimeofday () in 521 Printf.printf "Ran tests in %.3fs\n%!" (run_time -. load_time); 522 523 + let pass_count = 524 + List.length (List.filter (fun r -> r.status = `Pass) results) 525 + in 526 + let fail_count = 527 + List.length 528 + (List.filter 529 + (fun r -> match r.status with `Fail _ -> true | _ -> false) 530 + results) 531 + in 532 + let skip_count = 533 + List.length (List.filter (fun r -> r.status = `Skip) results) 534 + in 535 536 + let json_pass_count = 537 + List.length (List.filter (fun r -> r.json_status = `Pass) results) 538 + in 539 + let json_fail_count = 540 + List.length 541 + (List.filter 542 + (fun r -> match r.json_status with `Fail _ -> true | _ -> false) 543 + results) 544 + in 545 + let json_skip_count = 546 + List.length (List.filter (fun r -> r.json_status = `Skip) results) 547 + in 548 549 + Printf.printf 550 + "\nEvent Tree Results: %d pass, %d fail, %d skip (total: %d)\n%!" pass_count 551 + fail_count skip_count 552 + (pass_count + fail_count + skip_count); 553 554 + Printf.printf "JSON Results: %d pass, %d fail, %d skip\n%!" json_pass_count 555 + json_fail_count json_skip_count; 556 557 if fail_count > 0 then begin 558 Printf.printf "\nFailing event tree tests:\n"; 559 + List.iter 560 + (fun r -> 561 + match r.status with 562 + | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg 563 + | _ -> ()) 564 + results 565 end; 566 567 if json_fail_count > 0 then begin 568 Printf.printf "\nFailing JSON tests:\n"; 569 + List.iter 570 + (fun r -> 571 + match r.json_status with 572 + | `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg 573 + | _ -> ()) 574 + results 575 end; 576 577 if !show_skipped && skip_count > 0 then begin 578 Printf.printf "\nSkipped tests (no expected tree):\n"; 579 + List.iter 580 + (fun r -> 581 + if r.status = `Skip then begin 582 + Printf.printf " %s: %s\n" r.id r.name; 583 + Printf.printf " YAML (%d chars): %S\n" (String.length r.yaml) 584 + (if String.length r.yaml <= 60 then r.yaml 585 + else String.sub r.yaml 0 60 ^ "...") 586 + end) 587 + results 588 end; 589 590 let total_time = Unix.gettimeofday () in ··· 597 | None -> ()); 598 599 (* Exit with non-zero code if any tests failed *) 600 + if fail_count > 0 || json_fail_count > 0 then exit 1
+6 -1
tests/test_suite_lib/dune
··· 1 (library 2 (name test_suite_lib) 3 - (modules test_suite_loader_generic test_suite_loader tree_format json_format json_compare) 4 (libraries yamlrw jsonm)) 5 6 (library
··· 1 (library 2 (name test_suite_lib) 3 + (modules 4 + test_suite_loader_generic 5 + test_suite_loader 6 + tree_format 7 + json_format 8 + json_compare) 9 (libraries yamlrw jsonm)) 10 11 (library
+29 -27
tests/test_suite_lib/json_compare.ml
··· 14 | Object of (string * json) list 15 16 let rec equal a b = 17 - match a, b with 18 | Null, Null -> true 19 | Bool a, Bool b -> a = b 20 | Float a, Float b -> Float.equal a b ··· 22 | Array a, Array b -> List.equal equal a b 23 | Object a, Object b -> 24 (* Compare objects as sets of key-value pairs (order independent) *) 25 - let sorted_a = List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) a in 26 - let sorted_b = List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) b in 27 - List.length sorted_a = List.length sorted_b && 28 - List.for_all2 (fun (k1, v1) (k2, v2) -> k1 = k2 && equal v1 v2) sorted_a sorted_b 29 | _ -> false 30 31 (* Parse JSON string using jsonm *) ··· 46 and parse_array acc = 47 match Jsonm.decode decoder with 48 | `Lexeme `Ae -> Ok (Array (List.rev acc)) 49 - | `Lexeme _ as lex -> 50 (* Push back and parse value *) 51 let result = parse_value_with_lex lex in 52 - (match result with 53 - | Ok v -> parse_array (v :: acc) 54 - | Error _ as e -> e) 55 | `Error e -> Error (Format.asprintf "%a" Jsonm.pp_error e) 56 | `End -> Error "unexpected end in array" 57 | `Await -> Error "unexpected await" 58 and parse_object acc = 59 match Jsonm.decode decoder with 60 | `Lexeme `Oe -> Ok (Object (List.rev acc)) 61 - | `Lexeme (`Name key) -> 62 - (match parse_value () with 63 - | Ok v -> parse_object ((key, v) :: acc) 64 - | Error _ as e -> e) 65 | `Lexeme _ -> Error "expected object key" 66 | `Error e -> Error (Format.asprintf "%a" Jsonm.pp_error e) 67 | `End -> Error "unexpected end in object" ··· 99 and parse_array acc = 100 match Jsonm.decode decoder with 101 | `Lexeme `Ae -> Some (Array (List.rev acc)) 102 - | `Lexeme _ as lex -> 103 - (match parse_value_with_lex lex with 104 - | Some v -> parse_array (v :: acc) 105 - | None -> None) 106 | _ -> None 107 and parse_object acc = 108 match Jsonm.decode decoder with 109 | `Lexeme `Oe -> Some (Object (List.rev acc)) 110 - | `Lexeme (`Name key) -> 111 - (match parse_value () with 112 - | Some v -> parse_object ((key, v) :: acc) 113 - | None -> None) 114 | _ -> None 115 and parse_value_with_lex lex = 116 match lex with ··· 134 (* Handle empty strings *) 135 let expected_trimmed = String.trim expected in 136 let actual_trimmed = String.trim actual in 137 - if expected_trimmed = "" && actual_trimmed = "" then 138 - true 139 - else if expected_trimmed = "" || actual_trimmed = "" then 140 - false 141 else 142 (* Parse as potentially multiple JSON values *) 143 let expected_values = parse_json_multi expected in 144 let actual_values = parse_json_multi actual in 145 - List.length expected_values = List.length actual_values && 146 - List.for_all2 equal expected_values actual_values
··· 14 | Object of (string * json) list 15 16 let rec equal a b = 17 + match (a, b) with 18 | Null, Null -> true 19 | Bool a, Bool b -> a = b 20 | Float a, Float b -> Float.equal a b ··· 22 | Array a, Array b -> List.equal equal a b 23 | Object a, Object b -> 24 (* Compare objects as sets of key-value pairs (order independent) *) 25 + let sorted_a = 26 + List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) a 27 + in 28 + let sorted_b = 29 + List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) b 30 + in 31 + List.length sorted_a = List.length sorted_b 32 + && List.for_all2 33 + (fun (k1, v1) (k2, v2) -> k1 = k2 && equal v1 v2) 34 + sorted_a sorted_b 35 | _ -> false 36 37 (* Parse JSON string using jsonm *) ··· 52 and parse_array acc = 53 match Jsonm.decode decoder with 54 | `Lexeme `Ae -> Ok (Array (List.rev acc)) 55 + | `Lexeme _ as lex -> ( 56 (* Push back and parse value *) 57 let result = parse_value_with_lex lex in 58 + match result with Ok v -> parse_array (v :: acc) | Error _ as e -> e) 59 | `Error e -> Error (Format.asprintf "%a" Jsonm.pp_error e) 60 | `End -> Error "unexpected end in array" 61 | `Await -> Error "unexpected await" 62 and parse_object acc = 63 match Jsonm.decode decoder with 64 | `Lexeme `Oe -> Ok (Object (List.rev acc)) 65 + | `Lexeme (`Name key) -> ( 66 + match parse_value () with 67 + | Ok v -> parse_object ((key, v) :: acc) 68 + | Error _ as e -> e) 69 | `Lexeme _ -> Error "expected object key" 70 | `Error e -> Error (Format.asprintf "%a" Jsonm.pp_error e) 71 | `End -> Error "unexpected end in object" ··· 103 and parse_array acc = 104 match Jsonm.decode decoder with 105 | `Lexeme `Ae -> Some (Array (List.rev acc)) 106 + | `Lexeme _ as lex -> ( 107 + match parse_value_with_lex lex with 108 + | Some v -> parse_array (v :: acc) 109 + | None -> None) 110 | _ -> None 111 and parse_object acc = 112 match Jsonm.decode decoder with 113 | `Lexeme `Oe -> Some (Object (List.rev acc)) 114 + | `Lexeme (`Name key) -> ( 115 + match parse_value () with 116 + | Some v -> parse_object ((key, v) :: acc) 117 + | None -> None) 118 | _ -> None 119 and parse_value_with_lex lex = 120 match lex with ··· 138 (* Handle empty strings *) 139 let expected_trimmed = String.trim expected in 140 let actual_trimmed = String.trim actual in 141 + if expected_trimmed = "" && actual_trimmed = "" then true 142 + else if expected_trimmed = "" || actual_trimmed = "" then false 143 else 144 (* Parse as potentially multiple JSON values *) 145 let expected_values = parse_json_multi expected in 146 let actual_values = parse_json_multi actual in 147 + List.length expected_values = List.length actual_values 148 + && List.for_all2 equal expected_values actual_values
+37 -27
tests/test_suite_lib/json_format.ml
··· 10 let escape_string s = 11 let buf = Buffer.create (String.length s * 2) in 12 Buffer.add_char buf '"'; 13 - String.iter (fun c -> 14 - match c with 15 - | '"' -> Buffer.add_string buf "\\\"" 16 - | '\\' -> Buffer.add_string buf "\\\\" 17 - | '\n' -> Buffer.add_string buf "\\n" 18 - | '\r' -> Buffer.add_string buf "\\r" 19 - | '\t' -> Buffer.add_string buf "\\t" 20 - | '\x08' -> Buffer.add_string buf "\\b" 21 - | '\x0c' -> Buffer.add_string buf "\\f" 22 - | c when Char.code c < 32 -> 23 - Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c)) 24 - | c -> Buffer.add_char buf c 25 - ) s; 26 Buffer.add_char buf '"'; 27 Buffer.contents buf 28 29 - let rec format_value ?(indent=0) (v : Value.t) = 30 let spaces n = String.make n ' ' in 31 match v with 32 | `Null -> "null" 33 | `Bool true -> "true" 34 | `Bool false -> "false" 35 | `Float f -> 36 - if Float.is_nan f then "null" (* JSON doesn't support NaN *) 37 - else if f = Float.infinity || f = Float.neg_infinity then "null" (* JSON doesn't support Inf *) 38 else if Float.is_integer f && Float.abs f < 1e15 then 39 Printf.sprintf "%.0f" f 40 else 41 (* Try to match yaml-test-suite's number formatting *) 42 let s = Printf.sprintf "%g" f in 43 (* Ensure we have a decimal point for floats *) 44 - if String.contains s '.' || String.contains s 'e' || String.contains s 'E' then s 45 else s ^ ".0" 46 | `String s -> escape_string s 47 | `A [] -> "[]" 48 | `A items -> 49 let inner_indent = indent + 2 in 50 - let formatted_items = List.map (fun item -> 51 - spaces inner_indent ^ format_value ~indent:inner_indent item 52 - ) items in 53 "[\n" ^ String.concat ",\n" formatted_items ^ "\n" ^ spaces indent ^ "]" 54 | `O [] -> "{}" 55 | `O pairs -> 56 let inner_indent = indent + 2 in 57 - let formatted_pairs = List.map (fun (k, v) -> 58 - let key = escape_string k in 59 - let value = format_value ~indent:inner_indent v in 60 - spaces inner_indent ^ key ^ ": " ^ value 61 - ) pairs in 62 "{\n" ^ String.concat ",\n" formatted_pairs ^ "\n" ^ spaces indent ^ "}" 63 64 - let to_json (v : Value.t) : string = 65 - format_value v 66 67 (* Format multiple documents (for multi-doc YAML) *) 68 let documents_to_json (docs : Value.t list) : string =
··· 10 let escape_string s = 11 let buf = Buffer.create (String.length s * 2) in 12 Buffer.add_char buf '"'; 13 + String.iter 14 + (fun c -> 15 + match c with 16 + | '"' -> Buffer.add_string buf "\\\"" 17 + | '\\' -> Buffer.add_string buf "\\\\" 18 + | '\n' -> Buffer.add_string buf "\\n" 19 + | '\r' -> Buffer.add_string buf "\\r" 20 + | '\t' -> Buffer.add_string buf "\\t" 21 + | '\x08' -> Buffer.add_string buf "\\b" 22 + | '\x0c' -> Buffer.add_string buf "\\f" 23 + | c when Char.code c < 32 -> 24 + Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c)) 25 + | c -> Buffer.add_char buf c) 26 + s; 27 Buffer.add_char buf '"'; 28 Buffer.contents buf 29 30 + let rec format_value ?(indent = 0) (v : Value.t) = 31 let spaces n = String.make n ' ' in 32 match v with 33 | `Null -> "null" 34 | `Bool true -> "true" 35 | `Bool false -> "false" 36 | `Float f -> 37 + if Float.is_nan f then "null" (* JSON doesn't support NaN *) 38 + else if f = Float.infinity || f = Float.neg_infinity then "null" 39 + (* JSON doesn't support Inf *) 40 else if Float.is_integer f && Float.abs f < 1e15 then 41 Printf.sprintf "%.0f" f 42 else 43 (* Try to match yaml-test-suite's number formatting *) 44 let s = Printf.sprintf "%g" f in 45 (* Ensure we have a decimal point for floats *) 46 + if 47 + String.contains s '.' || String.contains s 'e' 48 + || String.contains s 'E' 49 + then s 50 else s ^ ".0" 51 | `String s -> escape_string s 52 | `A [] -> "[]" 53 | `A items -> 54 let inner_indent = indent + 2 in 55 + let formatted_items = 56 + List.map 57 + (fun item -> 58 + spaces inner_indent ^ format_value ~indent:inner_indent item) 59 + items 60 + in 61 "[\n" ^ String.concat ",\n" formatted_items ^ "\n" ^ spaces indent ^ "]" 62 | `O [] -> "{}" 63 | `O pairs -> 64 let inner_indent = indent + 2 in 65 + let formatted_pairs = 66 + List.map 67 + (fun (k, v) -> 68 + let key = escape_string k in 69 + let value = format_value ~indent:inner_indent v in 70 + spaces inner_indent ^ key ^ ": " ^ value) 71 + pairs 72 + in 73 "{\n" ^ String.concat ",\n" formatted_pairs ^ "\n" ^ spaces indent ^ "}" 74 75 + let to_json (v : Value.t) : string = format_value v 76 77 (* Format multiple documents (for multi-doc YAML) *) 78 let documents_to_json (docs : Value.t list) : string =
+5 -10
tests/test_suite_lib/test_suite_loader.ml
··· 18 Some s 19 with _ -> None 20 21 - let file_exists () path = 22 - Sys.file_exists path 23 - 24 - let is_directory () path = 25 - Sys.file_exists path && Sys.is_directory path 26 - 27 - let read_dir () path = 28 - Array.to_list (Sys.readdir path) 29 end 30 31 (** Internal loader module *) 32 - module Loader = Test_suite_loader_generic.Make(Sync_io) 33 34 - (** Re-export test_case type from loader *) 35 type test_case = Loader.test_case = { 36 id : string; 37 name : string; ··· 40 json : string option; 41 fail : bool; 42 } 43 44 (** Load tests without needing to pass a context *) 45 let load_directory path : test_case list = Loader.load_directory () path
··· 18 Some s 19 with _ -> None 20 21 + let file_exists () path = Sys.file_exists path 22 + let is_directory () path = Sys.file_exists path && Sys.is_directory path 23 + let read_dir () path = Array.to_list (Sys.readdir path) 24 end 25 26 + module Loader = Test_suite_loader_generic.Make (Sync_io) 27 (** Internal loader module *) 28 29 type test_case = Loader.test_case = { 30 id : string; 31 name : string; ··· 34 json : string option; 35 fail : bool; 36 } 37 + (** Re-export test_case type from loader *) 38 39 (** Load tests without needing to pass a context *) 40 let load_directory path : test_case list = Loader.load_directory () path
+15 -15
tests/test_suite_lib/test_suite_loader_eio.ml
··· 8 module Generic = Test_suite_lib.Test_suite_loader_generic 9 10 (** Eio file I/O implementation *) 11 - module Eio_io : Generic.FILE_IO with type ctx = Eio.Fs.dir_ty Eio.Path.t = struct 12 type ctx = Eio.Fs.dir_ty Eio.Path.t 13 14 let read_file fs path = 15 - try 16 - Some (Eio.Path.load Eio.Path.(fs / path)) 17 - with _ -> None 18 19 let file_exists fs path = 20 match Eio.Path.kind ~follow:true Eio.Path.(fs / path) with ··· 28 | _ -> false 29 | exception _ -> false 30 31 - let read_dir fs path = 32 - Eio.Path.read_dir Eio.Path.(fs / path) 33 end 34 35 (** Internal loader module *) 36 - module Loader = Generic.Make(Eio_io) 37 38 - (** Re-export test_case type from loader *) 39 type test_case = Loader.test_case = { 40 id : string; 41 name : string; ··· 44 json : string option; 45 fail : bool; 46 } 47 48 (** Load tests with Eio filesystem context *) 49 let load_directory ~fs path : test_case list = Loader.load_directory fs path ··· 53 if not (Eio_io.is_directory fs test_suite_path) then [] 54 else 55 let entries = Eio_io.read_dir fs test_suite_path in 56 - let test_ids = entries 57 |> List.filter (fun e -> 58 - Eio_io.is_directory fs (Filename.concat test_suite_path e) && 59 - String.length e >= 4 && 60 - e.[0] >= '0' && e.[0] <= 'Z') 61 |> List.sort String.compare 62 in 63 (* Load each test ID in parallel using fibers *) 64 - Eio.Fiber.List.map (fun test_id -> 65 - Loader.load_test_id fs test_suite_path test_id 66 - ) test_ids 67 |> List.concat
··· 8 module Generic = Test_suite_lib.Test_suite_loader_generic 9 10 (** Eio file I/O implementation *) 11 + module Eio_io : Generic.FILE_IO with type ctx = Eio.Fs.dir_ty Eio.Path.t = 12 + struct 13 type ctx = Eio.Fs.dir_ty Eio.Path.t 14 15 let read_file fs path = 16 + try Some (Eio.Path.load Eio.Path.(fs / path)) with _ -> None 17 18 let file_exists fs path = 19 match Eio.Path.kind ~follow:true Eio.Path.(fs / path) with ··· 27 | _ -> false 28 | exception _ -> false 29 30 + let read_dir fs path = Eio.Path.read_dir Eio.Path.(fs / path) 31 end 32 33 + module Loader = Generic.Make (Eio_io) 34 (** Internal loader module *) 35 36 type test_case = Loader.test_case = { 37 id : string; 38 name : string; ··· 41 json : string option; 42 fail : bool; 43 } 44 + (** Re-export test_case type from loader *) 45 46 (** Load tests with Eio filesystem context *) 47 let load_directory ~fs path : test_case list = Loader.load_directory fs path ··· 51 if not (Eio_io.is_directory fs test_suite_path) then [] 52 else 53 let entries = Eio_io.read_dir fs test_suite_path in 54 + let test_ids = 55 + entries 56 |> List.filter (fun e -> 57 + Eio_io.is_directory fs (Filename.concat test_suite_path e) 58 + && String.length e >= 4 59 + && e.[0] >= '0' 60 + && e.[0] <= 'Z') 61 |> List.sort String.compare 62 in 63 (* Load each test ID in parallel using fibers *) 64 + Eio.Fiber.List.map 65 + (fun test_id -> Loader.load_test_id fs test_suite_path test_id) 66 + test_ids 67 |> List.concat
+40 -31
tests/test_suite_lib/test_suite_loader_generic.ml
··· 5 6 (** Generic test suite loader - parameterized by file I/O operations *) 7 8 - (** Test case representation *) 9 type test_case = { 10 id : string; 11 name : string; ··· 14 json : string option; 15 fail : bool; 16 } 17 18 (** Module type for file I/O operations *) 19 module type FILE_IO = sig 20 - (** Context type for file operations (unit for sync, ~fs for Eio) *) 21 type ctx 22 23 (** Read a file, returning None if it doesn't exist or can't be read *) 24 - val read_file : ctx -> string -> string option 25 26 - (** Check if a path exists and is a regular file *) 27 val file_exists : ctx -> string -> bool 28 29 - (** Check if a path exists and is a directory *) 30 val is_directory : ctx -> string -> bool 31 32 (** List directory entries *) 33 - val read_dir : ctx -> string -> string list 34 end 35 36 (** Create a test loader from file I/O operations *) ··· 45 } 46 47 let read_file_required ctx path = 48 - match IO.read_file ctx path with 49 - | Some s -> s 50 - | None -> "" 51 52 (** Load a single test from a directory *) 53 let load_test_dir ctx base_id dir_path = ··· 60 (* Must have in.yaml to be a valid test *) 61 if not (IO.file_exists ctx yaml_file) then None 62 else 63 - let name = match IO.read_file ctx name_file with 64 | Some s -> String.trim s 65 | None -> base_id 66 in ··· 70 let fail = IO.file_exists ctx error_file in 71 Some { id = base_id; name; yaml; tree; json; fail } 72 73 - (** Load tests from a test ID directory (may have subdirectories for variants) *) 74 let load_test_id ctx test_suite_path test_id = 75 let dir_path = Filename.concat test_suite_path test_id in 76 if not (IO.is_directory ctx dir_path) then [] 77 else 78 let entries = IO.read_dir ctx dir_path in 79 (* Check if this directory has variant subdirectories (00, 01, etc.) *) 80 - let has_variants = List.exists (fun e -> 81 - let subdir = Filename.concat dir_path e in 82 - IO.is_directory ctx subdir && 83 - String.length e >= 2 && 84 - e.[0] >= '0' && e.[0] <= '9' 85 - ) entries in 86 87 if has_variants then 88 (* Load each variant subdirectory *) 89 - let variants = entries 90 |> List.filter (fun e -> 91 let subdir = Filename.concat dir_path e in 92 - IO.is_directory ctx subdir && 93 - String.length e >= 2 && 94 - e.[0] >= '0' && e.[0] <= '9') 95 |> List.sort String.compare 96 in 97 - List.filter_map (fun variant -> 98 - let variant_path = Filename.concat dir_path variant in 99 - let variant_id = Printf.sprintf "%s:%s" test_id variant in 100 - load_test_dir ctx variant_id variant_path 101 - ) variants 102 else 103 (* Single test in this directory *) 104 match load_test_dir ctx test_id dir_path with 105 - | Some t -> [t] 106 | None -> [] 107 108 (** Load all tests from a test suite directory *) ··· 110 if not (IO.is_directory ctx test_suite_path) then [] 111 else 112 let entries = IO.read_dir ctx test_suite_path in 113 - let test_ids = entries 114 |> List.filter (fun e -> 115 - IO.is_directory ctx (Filename.concat test_suite_path e) && 116 - String.length e >= 4 && 117 - e.[0] >= '0' && e.[0] <= 'Z') 118 |> List.sort String.compare 119 in 120 List.concat_map (load_test_id ctx test_suite_path) test_ids
··· 5 6 (** Generic test suite loader - parameterized by file I/O operations *) 7 8 type test_case = { 9 id : string; 10 name : string; ··· 13 json : string option; 14 fail : bool; 15 } 16 + (** Test case representation *) 17 18 (** Module type for file I/O operations *) 19 module type FILE_IO = sig 20 type ctx 21 + (** Context type for file operations (unit for sync, ~fs for Eio) *) 22 23 + val read_file : ctx -> string -> string option 24 (** Read a file, returning None if it doesn't exist or can't be read *) 25 26 val file_exists : ctx -> string -> bool 27 + (** Check if a path exists and is a regular file *) 28 29 val is_directory : ctx -> string -> bool 30 + (** Check if a path exists and is a directory *) 31 32 + val read_dir : ctx -> string -> string list 33 (** List directory entries *) 34 end 35 36 (** Create a test loader from file I/O operations *) ··· 45 } 46 47 let read_file_required ctx path = 48 + match IO.read_file ctx path with Some s -> s | None -> "" 49 50 (** Load a single test from a directory *) 51 let load_test_dir ctx base_id dir_path = ··· 58 (* Must have in.yaml to be a valid test *) 59 if not (IO.file_exists ctx yaml_file) then None 60 else 61 + let name = 62 + match IO.read_file ctx name_file with 63 | Some s -> String.trim s 64 | None -> base_id 65 in ··· 69 let fail = IO.file_exists ctx error_file in 70 Some { id = base_id; name; yaml; tree; json; fail } 71 72 + (** Load tests from a test ID directory (may have subdirectories for variants) 73 + *) 74 let load_test_id ctx test_suite_path test_id = 75 let dir_path = Filename.concat test_suite_path test_id in 76 if not (IO.is_directory ctx dir_path) then [] 77 else 78 let entries = IO.read_dir ctx dir_path in 79 (* Check if this directory has variant subdirectories (00, 01, etc.) *) 80 + let has_variants = 81 + List.exists 82 + (fun e -> 83 + let subdir = Filename.concat dir_path e in 84 + IO.is_directory ctx subdir 85 + && String.length e >= 2 86 + && e.[0] >= '0' 87 + && e.[0] <= '9') 88 + entries 89 + in 90 91 if has_variants then 92 (* Load each variant subdirectory *) 93 + let variants = 94 + entries 95 |> List.filter (fun e -> 96 let subdir = Filename.concat dir_path e in 97 + IO.is_directory ctx subdir 98 + && String.length e >= 2 99 + && e.[0] >= '0' 100 + && e.[0] <= '9') 101 |> List.sort String.compare 102 in 103 + List.filter_map 104 + (fun variant -> 105 + let variant_path = Filename.concat dir_path variant in 106 + let variant_id = Printf.sprintf "%s:%s" test_id variant in 107 + load_test_dir ctx variant_id variant_path) 108 + variants 109 else 110 (* Single test in this directory *) 111 match load_test_dir ctx test_id dir_path with 112 + | Some t -> [ t ] 113 | None -> [] 114 115 (** Load all tests from a test suite directory *) ··· 117 if not (IO.is_directory ctx test_suite_path) then [] 118 else 119 let entries = IO.read_dir ctx test_suite_path in 120 + let test_ids = 121 + entries 122 |> List.filter (fun e -> 123 + IO.is_directory ctx (Filename.concat test_suite_path e) 124 + && String.length e >= 4 125 + && e.[0] >= '0' 126 + && e.[0] <= 'Z') 127 |> List.sort String.compare 128 in 129 List.concat_map (load_test_id ctx test_suite_path) test_ids
+27 -28
tests/test_suite_lib/tree_format.ml
··· 9 10 let escape_string s = 11 let buf = Buffer.create (String.length s * 2) in 12 - String.iter (fun c -> 13 - match c with 14 - | '\n' -> Buffer.add_string buf "\\n" 15 - | '\t' -> Buffer.add_string buf "\\t" 16 - | '\r' -> Buffer.add_string buf "\\r" 17 - | '\\' -> Buffer.add_string buf "\\\\" 18 - | '\x00' -> Buffer.add_string buf "\\0" 19 - | '\x07' -> Buffer.add_string buf "\\a" 20 - | '\x08' -> Buffer.add_string buf "\\b" 21 - | '\x0b' -> Buffer.add_string buf "\\v" 22 - | '\x0c' -> Buffer.add_string buf "\\f" 23 - | '\x1b' -> Buffer.add_string buf "\\e" 24 - | '\xa0' -> Buffer.add_string buf "\\_" 25 - | c -> Buffer.add_char buf c 26 - ) s; 27 Buffer.contents buf 28 29 let style_char = function ··· 39 | Event.Stream_start _ -> "+STR" 40 | Event.Stream_end -> "-STR" 41 | Event.Document_start { implicit; _ } -> 42 - if implicit then "+DOC" 43 - else "+DOC ---" 44 - | Event.Document_end { implicit } -> 45 - if implicit then "-DOC" 46 - else "-DOC ..." 47 | Event.Mapping_start { anchor; tag; style; _ } -> 48 let anchor_str = match anchor with Some a -> " &" ^ a | None -> "" in 49 let tag_str = match tag with Some t -> " <" ^ t ^ ">" | None -> "" in ··· 60 let anchor_str = match anchor with Some a -> " &" ^ a | None -> "" in 61 let tag_str = match tag with Some t -> " <" ^ t ^ ">" | None -> "" in 62 let style_c = style_char style in 63 - Printf.sprintf "=VAL%s%s %c%s" anchor_str tag_str style_c (escape_string value) 64 - | Event.Alias { anchor } -> 65 - Printf.sprintf "=ALI *%s" anchor 66 67 let of_spanned_events events = 68 let buf = Buffer.create 256 in 69 - List.iter (fun (e : Event.spanned) -> 70 - let line = format_event e in 71 - Buffer.add_string buf line; 72 - Buffer.add_char buf '\n' 73 - ) events; 74 Buffer.contents buf
··· 9 10 let escape_string s = 11 let buf = Buffer.create (String.length s * 2) in 12 + String.iter 13 + (fun c -> 14 + match c with 15 + | '\n' -> Buffer.add_string buf "\\n" 16 + | '\t' -> Buffer.add_string buf "\\t" 17 + | '\r' -> Buffer.add_string buf "\\r" 18 + | '\\' -> Buffer.add_string buf "\\\\" 19 + | '\x00' -> Buffer.add_string buf "\\0" 20 + | '\x07' -> Buffer.add_string buf "\\a" 21 + | '\x08' -> Buffer.add_string buf "\\b" 22 + | '\x0b' -> Buffer.add_string buf "\\v" 23 + | '\x0c' -> Buffer.add_string buf "\\f" 24 + | '\x1b' -> Buffer.add_string buf "\\e" 25 + | '\xa0' -> Buffer.add_string buf "\\_" 26 + | c -> Buffer.add_char buf c) 27 + s; 28 Buffer.contents buf 29 30 let style_char = function ··· 40 | Event.Stream_start _ -> "+STR" 41 | Event.Stream_end -> "-STR" 42 | Event.Document_start { implicit; _ } -> 43 + if implicit then "+DOC" else "+DOC ---" 44 + | Event.Document_end { implicit } -> if implicit then "-DOC" else "-DOC ..." 45 | Event.Mapping_start { anchor; tag; style; _ } -> 46 let anchor_str = match anchor with Some a -> " &" ^ a | None -> "" in 47 let tag_str = match tag with Some t -> " <" ^ t ^ ">" | None -> "" in ··· 58 let anchor_str = match anchor with Some a -> " &" ^ a | None -> "" in 59 let tag_str = match tag with Some t -> " <" ^ t ^ ">" | None -> "" in 60 let style_c = style_char style in 61 + Printf.sprintf "=VAL%s%s %c%s" anchor_str tag_str style_c 62 + (escape_string value) 63 + | Event.Alias { anchor } -> Printf.sprintf "=ALI *%s" anchor 64 65 let of_spanned_events events = 66 let buf = Buffer.create 256 in 67 + List.iter 68 + (fun (e : Event.spanned) -> 69 + let line = format_event e in 70 + Buffer.add_string buf line; 71 + Buffer.add_char buf '\n') 72 + events; 73 Buffer.contents buf
+147 -112
tests/test_yamlrw.ml
··· 24 Alcotest.(check int) "token count" 8 (List.length token_types); 25 (* Stream_start, Block_mapping_start, Key, Scalar, Value, Scalar, Block_end, Stream_end *) 26 match token_types with 27 - | Token.Stream_start _ :: Token.Block_mapping_start :: Token.Key :: 28 - Token.Scalar { value = "hello"; _ } :: Token.Value :: 29 - Token.Scalar { value = "world"; _ } :: Token.Block_end :: Token.Stream_end :: [] -> 30 () 31 - | _ -> 32 - Alcotest.fail "unexpected token sequence" 33 34 let test_scanner_sequence () = 35 let scanner = Scanner.of_string "- one\n- two\n- three" in ··· 39 let test_scanner_flow () = 40 let scanner = Scanner.of_string "[1, 2, 3]" in 41 let tokens = Scanner.to_list scanner in 42 - let has_flow_start = List.exists (fun (t : Token.spanned) -> 43 - match t.token with Token.Flow_sequence_start -> true | _ -> false 44 - ) tokens in 45 Alcotest.(check bool) "has flow sequence start" true has_flow_start 46 47 - let scanner_tests = [ 48 - "simple mapping", `Quick, test_scanner_simple; 49 - "sequence", `Quick, test_scanner_sequence; 50 - "flow sequence", `Quick, test_scanner_flow; 51 - ] 52 53 (** Parser tests *) 54 ··· 56 let parser = Parser.of_string "key: value" in 57 let events = Parser.to_list parser in 58 Alcotest.(check bool) "has events" true (List.length events > 0); 59 - let has_stream_start = List.exists (fun (e : Event.spanned) -> 60 - match e.event with Event.Stream_start _ -> true | _ -> false 61 - ) events in 62 Alcotest.(check bool) "has stream start" true has_stream_start 63 64 let test_parser_sequence_events () = 65 let parser = Parser.of_string "- a\n- b" in 66 let events = Parser.to_list parser in 67 - let has_seq_start = List.exists (fun (e : Event.spanned) -> 68 - match e.event with Event.Sequence_start _ -> true | _ -> false 69 - ) events in 70 Alcotest.(check bool) "has sequence start" true has_seq_start 71 72 - let parser_tests = [ 73 - "parse events", `Quick, test_parser_events; 74 - "sequence events", `Quick, test_parser_sequence_events; 75 - ] 76 77 (** Value parsing tests *) 78 ··· 93 check_value "float" (`Float 3.14) (of_string "3.14") 94 95 let test_parse_string () = 96 - check_value "plain" (`String "hello") (of_string "hello world" |> function `String s -> `String (String.sub s 0 5) | v -> v); 97 check_value "quoted" (`String "hello") (of_string {|"hello"|}) 98 99 let test_parse_sequence () = 100 let result = of_string "- one\n- two\n- three" in 101 match result with 102 - | `A [_; _; _] -> () 103 | _ -> Alcotest.fail "expected sequence with 3 elements" 104 105 let test_parse_mapping () = ··· 118 |} in 119 let result = of_string yaml in 120 match result with 121 - | `O [("person", `O _)] -> () 122 | _ -> Alcotest.fail "expected nested structure" 123 124 let test_parse_flow_sequence () = 125 let result = of_string "[1, 2, 3]" in 126 match result with 127 - | `A [`Float 1.0; `Float 2.0; `Float 3.0] -> () 128 | _ -> Alcotest.fail "expected flow sequence [1, 2, 3]" 129 130 let test_parse_flow_mapping () = 131 let result = of_string "{a: 1, b: 2}" in 132 match result with 133 - | `O [("a", `Float 1.0); ("b", `Float 2.0)] -> () 134 | _ -> Alcotest.fail "expected flow mapping {a: 1, b: 2}" 135 136 let test_parse_flow_mapping_trailing_comma () = 137 let result = of_string "{ a: 1, }" in 138 match result with 139 - | `O [("a", `Float 1.0)] -> () 140 | `O pairs -> 141 - Alcotest.failf "expected 1 pair but got %d pairs (trailing comma should not create empty entry)" 142 (List.length pairs) 143 | _ -> Alcotest.fail "expected flow mapping with 1 pair" 144 145 - let value_tests = [ 146 - "parse null", `Quick, test_parse_null; 147 - "parse bool", `Quick, test_parse_bool; 148 - "parse number", `Quick, test_parse_number; 149 - "parse string", `Quick, test_parse_string; 150 - "parse sequence", `Quick, test_parse_sequence; 151 - "parse mapping", `Quick, test_parse_mapping; 152 - "parse nested", `Quick, test_parse_nested; 153 - "parse flow sequence", `Quick, test_parse_flow_sequence; 154 - "parse flow mapping", `Quick, test_parse_flow_mapping; 155 - "flow mapping trailing comma", `Quick, test_parse_flow_mapping_trailing_comma; 156 - ] 157 158 (** Emitter tests *) 159 ··· 162 Alcotest.(check bool) "contains null" true (String.length result > 0) 163 164 let starts_with prefix s = 165 - String.length s >= String.length prefix && 166 - String.sub s 0 (String.length prefix) = prefix 167 168 let test_emit_mapping () = 169 - let value = `O [("name", `String "Alice"); ("age", `Float 30.0)] in 170 let result = to_string value in 171 let trimmed = String.trim result in 172 - Alcotest.(check bool) "contains name" true (starts_with "name" trimmed || starts_with "\"name\"" trimmed) 173 174 let test_roundtrip_simple () = 175 let yaml = "name: Alice" in ··· 187 () 188 | _ -> Alcotest.fail "roundtrip failed" 189 190 - let emitter_tests = [ 191 - "emit null", `Quick, test_emit_null; 192 - "emit mapping", `Quick, test_emit_mapping; 193 - "roundtrip simple", `Quick, test_roundtrip_simple; 194 - "roundtrip sequence", `Quick, test_roundtrip_sequence; 195 - ] 196 197 (** YAML-specific tests *) 198 ··· 204 | _ -> Alcotest.fail "expected scalar with anchor" 205 206 let test_yaml_alias () = 207 - let yaml = {| 208 defaults: &defaults 209 timeout: 30 210 production: 211 <<: *defaults 212 port: 8080 213 - |} in 214 (* Just check it parses without error *) 215 let _ = yaml_of_string yaml in 216 () 217 218 - let yaml_tests = [ 219 - "yaml anchor", `Quick, test_yaml_anchor; 220 - "yaml alias", `Quick, test_yaml_alias; 221 - ] 222 223 (** Multiline scalar tests *) 224 ··· 230 |} in 231 let result = of_string yaml in 232 match result with 233 - | `O [("description", `String _)] -> () 234 | _ -> Alcotest.fail "expected mapping with literal block" 235 236 let test_folded_block () = ··· 241 |} in 242 let result = of_string yaml in 243 match result with 244 - | `O [("description", `String _)] -> () 245 | _ -> Alcotest.fail "expected mapping with folded block" 246 247 - let multiline_tests = [ 248 - "literal block", `Quick, test_literal_block; 249 - "folded block", `Quick, test_folded_block; 250 - ] 251 252 (** Error handling tests *) 253 ··· 255 try 256 let _ = of_string "key: [unclosed" in 257 Alcotest.fail "expected error" 258 - with 259 - | Yamlrw_error e -> 260 - Alcotest.(check bool) "has span" true (e.span <> None) 261 262 - let error_tests = [ 263 - "error position", `Quick, test_error_position; 264 - ] 265 266 (** Alias expansion limit tests (billion laughs protection) *) 267 268 let test_node_limit () = 269 (* Small bomb that would expand to 9^4 = 6561 nodes *) 270 - let yaml = {| 271 a: &a [1,2,3,4,5,6,7,8,9] 272 b: &b [*a,*a,*a,*a,*a,*a,*a,*a,*a] 273 c: &c [*b,*b,*b,*b,*b,*b,*b,*b,*b] 274 d: &d [*c,*c,*c,*c,*c,*c,*c,*c,*c] 275 - |} in 276 (* Should fail with a small node limit *) 277 try 278 let _ = of_string ~max_nodes:100 yaml in 279 Alcotest.fail "expected node limit error" 280 - with 281 - | Yamlrw_error e -> 282 - (match e.Error.kind with 283 - | Error.Alias_expansion_node_limit _ -> () 284 - | _ -> Alcotest.fail "expected Alias_expansion_node_limit error") 285 286 let test_depth_limit () = 287 (* Create deeply nested alias chain: 288 *e -> [*d,*d] -> [*c,*c] -> [*b,*b] -> [*a,*a] -> [x,y,z] 289 Each alias resolution increases depth by 1 *) 290 - let yaml = {| 291 a: &a [x, y, z] 292 b: &b [*a, *a] 293 c: &c [*b, *b] 294 d: &d [*c, *c] 295 e: &e [*d, *d] 296 result: *e 297 - |} in 298 (* Should fail with a small depth limit (depth 3 means max 3 alias hops) *) 299 try 300 let _ = of_string ~max_depth:3 yaml in 301 Alcotest.fail "expected depth limit error" 302 - with 303 - | Yamlrw_error e -> 304 - (match e.Error.kind with 305 - | Error.Alias_expansion_depth_limit _ -> () 306 - | _ -> Alcotest.fail ("expected Alias_expansion_depth_limit error, got: " ^ Error.kind_to_string e.Error.kind)) 307 308 let test_normal_aliases_work () = 309 (* Normal alias usage should work fine *) 310 - let yaml = {| 311 defaults: &defaults 312 timeout: 30 313 retries: 3 314 production: 315 <<: *defaults 316 port: 8080 317 - |} in 318 let result = of_string yaml in 319 - match result with 320 - | `O _ -> () 321 - | _ -> Alcotest.fail "expected mapping" 322 323 let test_resolve_aliases_false () = 324 (* With resolve_aliases=false, aliases should remain unresolved *) ··· 329 let result = yaml_of_string ~resolve_aliases:false yaml in 330 (* Check that alias is preserved *) 331 match result with 332 - | `O map -> 333 let pairs = Mapping.members map in 334 - (match List.assoc_opt (`Scalar (Scalar.make "b")) pairs with 335 - | Some (`Alias "anchor") -> () 336 - | _ -> Alcotest.fail "expected alias to be preserved") 337 | _ -> Alcotest.fail "expected mapping" 338 339 - let alias_limit_tests = [ 340 - "node limit", `Quick, test_node_limit; 341 - "depth limit", `Quick, test_depth_limit; 342 - "normal aliases work", `Quick, test_normal_aliases_work; 343 - "resolve_aliases false", `Quick, test_resolve_aliases_false; 344 - ] 345 346 (** Bug fix regression tests 347 These tests verify that issues fixed in ocaml-yaml don't occur in ocaml-yamlrw *) ··· 411 let test_parse_special_floats () = 412 let inf_result = of_string ".inf" in 413 (match inf_result with 414 - | `Float f when Float.is_inf f && f > 0.0 -> () 415 | _ -> Alcotest.fail "expected positive infinity"); 416 let neg_inf_result = of_string "-.inf" in 417 (match neg_inf_result with 418 - | `Float f when Float.is_inf f && f < 0.0 -> () 419 | _ -> Alcotest.fail "expected negative infinity"); 420 let nan_result = of_string ".nan" in 421 (match nan_result with ··· 485 (** Run all tests *) 486 487 let () = 488 - Alcotest.run "yamlrw" [ 489 - "scanner", scanner_tests; 490 - "parser", parser_tests; 491 - "value", value_tests; 492 - "emitter", emitter_tests; 493 - "yaml", yaml_tests; 494 - "multiline", multiline_tests; 495 - "errors", error_tests; 496 - "alias_limits", alias_limit_tests; 497 - "bugfix_regression", bugfix_regression_tests; 498 - ]
··· 24 Alcotest.(check int) "token count" 8 (List.length token_types); 25 (* Stream_start, Block_mapping_start, Key, Scalar, Value, Scalar, Block_end, Stream_end *) 26 match token_types with 27 + | [ 28 + Token.Stream_start _; 29 + Token.Block_mapping_start; 30 + Token.Key; 31 + Token.Scalar { value = "hello"; _ }; 32 + Token.Value; 33 + Token.Scalar { value = "world"; _ }; 34 + Token.Block_end; 35 + Token.Stream_end; 36 + ] -> 37 () 38 + | _ -> Alcotest.fail "unexpected token sequence" 39 40 let test_scanner_sequence () = 41 let scanner = Scanner.of_string "- one\n- two\n- three" in ··· 45 let test_scanner_flow () = 46 let scanner = Scanner.of_string "[1, 2, 3]" in 47 let tokens = Scanner.to_list scanner in 48 + let has_flow_start = 49 + List.exists 50 + (fun (t : Token.spanned) -> 51 + match t.token with Token.Flow_sequence_start -> true | _ -> false) 52 + tokens 53 + in 54 Alcotest.(check bool) "has flow sequence start" true has_flow_start 55 56 + let scanner_tests = 57 + [ 58 + ("simple mapping", `Quick, test_scanner_simple); 59 + ("sequence", `Quick, test_scanner_sequence); 60 + ("flow sequence", `Quick, test_scanner_flow); 61 + ] 62 63 (** Parser tests *) 64 ··· 66 let parser = Parser.of_string "key: value" in 67 let events = Parser.to_list parser in 68 Alcotest.(check bool) "has events" true (List.length events > 0); 69 + let has_stream_start = 70 + List.exists 71 + (fun (e : Event.spanned) -> 72 + match e.event with Event.Stream_start _ -> true | _ -> false) 73 + events 74 + in 75 Alcotest.(check bool) "has stream start" true has_stream_start 76 77 let test_parser_sequence_events () = 78 let parser = Parser.of_string "- a\n- b" in 79 let events = Parser.to_list parser in 80 + let has_seq_start = 81 + List.exists 82 + (fun (e : Event.spanned) -> 83 + match e.event with Event.Sequence_start _ -> true | _ -> false) 84 + events 85 + in 86 Alcotest.(check bool) "has sequence start" true has_seq_start 87 88 + let parser_tests = 89 + [ 90 + ("parse events", `Quick, test_parser_events); 91 + ("sequence events", `Quick, test_parser_sequence_events); 92 + ] 93 94 (** Value parsing tests *) 95 ··· 110 check_value "float" (`Float 3.14) (of_string "3.14") 111 112 let test_parse_string () = 113 + check_value "plain" (`String "hello") 114 + ( of_string "hello world" |> function 115 + | `String s -> `String (String.sub s 0 5) 116 + | v -> v ); 117 check_value "quoted" (`String "hello") (of_string {|"hello"|}) 118 119 let test_parse_sequence () = 120 let result = of_string "- one\n- two\n- three" in 121 match result with 122 + | `A [ _; _; _ ] -> () 123 | _ -> Alcotest.fail "expected sequence with 3 elements" 124 125 let test_parse_mapping () = ··· 138 |} in 139 let result = of_string yaml in 140 match result with 141 + | `O [ ("person", `O _) ] -> () 142 | _ -> Alcotest.fail "expected nested structure" 143 144 let test_parse_flow_sequence () = 145 let result = of_string "[1, 2, 3]" in 146 match result with 147 + | `A [ `Float 1.0; `Float 2.0; `Float 3.0 ] -> () 148 | _ -> Alcotest.fail "expected flow sequence [1, 2, 3]" 149 150 let test_parse_flow_mapping () = 151 let result = of_string "{a: 1, b: 2}" in 152 match result with 153 + | `O [ ("a", `Float 1.0); ("b", `Float 2.0) ] -> () 154 | _ -> Alcotest.fail "expected flow mapping {a: 1, b: 2}" 155 156 let test_parse_flow_mapping_trailing_comma () = 157 let result = of_string "{ a: 1, }" in 158 match result with 159 + | `O [ ("a", `Float 1.0) ] -> () 160 | `O pairs -> 161 + Alcotest.failf 162 + "expected 1 pair but got %d pairs (trailing comma should not create \ 163 + empty entry)" 164 (List.length pairs) 165 | _ -> Alcotest.fail "expected flow mapping with 1 pair" 166 167 + let value_tests = 168 + [ 169 + ("parse null", `Quick, test_parse_null); 170 + ("parse bool", `Quick, test_parse_bool); 171 + ("parse number", `Quick, test_parse_number); 172 + ("parse string", `Quick, test_parse_string); 173 + ("parse sequence", `Quick, test_parse_sequence); 174 + ("parse mapping", `Quick, test_parse_mapping); 175 + ("parse nested", `Quick, test_parse_nested); 176 + ("parse flow sequence", `Quick, test_parse_flow_sequence); 177 + ("parse flow mapping", `Quick, test_parse_flow_mapping); 178 + ( "flow mapping trailing comma", 179 + `Quick, 180 + test_parse_flow_mapping_trailing_comma ); 181 + ] 182 183 (** Emitter tests *) 184 ··· 187 Alcotest.(check bool) "contains null" true (String.length result > 0) 188 189 let starts_with prefix s = 190 + String.length s >= String.length prefix 191 + && String.sub s 0 (String.length prefix) = prefix 192 193 let test_emit_mapping () = 194 + let value = `O [ ("name", `String "Alice"); ("age", `Float 30.0) ] in 195 let result = to_string value in 196 let trimmed = String.trim result in 197 + Alcotest.(check bool) 198 + "contains name" true 199 + (starts_with "name" trimmed || starts_with "\"name\"" trimmed) 200 201 let test_roundtrip_simple () = 202 let yaml = "name: Alice" in ··· 214 () 215 | _ -> Alcotest.fail "roundtrip failed" 216 217 + let emitter_tests = 218 + [ 219 + ("emit null", `Quick, test_emit_null); 220 + ("emit mapping", `Quick, test_emit_mapping); 221 + ("roundtrip simple", `Quick, test_roundtrip_simple); 222 + ("roundtrip sequence", `Quick, test_roundtrip_sequence); 223 + ] 224 225 (** YAML-specific tests *) 226 ··· 232 | _ -> Alcotest.fail "expected scalar with anchor" 233 234 let test_yaml_alias () = 235 + let yaml = 236 + {| 237 defaults: &defaults 238 timeout: 30 239 production: 240 <<: *defaults 241 port: 8080 242 + |} 243 + in 244 (* Just check it parses without error *) 245 let _ = yaml_of_string yaml in 246 () 247 248 + let yaml_tests = 249 + [ 250 + ("yaml anchor", `Quick, test_yaml_anchor); 251 + ("yaml alias", `Quick, test_yaml_alias); 252 + ] 253 254 (** Multiline scalar tests *) 255 ··· 261 |} in 262 let result = of_string yaml in 263 match result with 264 + | `O [ ("description", `String _) ] -> () 265 | _ -> Alcotest.fail "expected mapping with literal block" 266 267 let test_folded_block () = ··· 272 |} in 273 let result = of_string yaml in 274 match result with 275 + | `O [ ("description", `String _) ] -> () 276 | _ -> Alcotest.fail "expected mapping with folded block" 277 278 + let multiline_tests = 279 + [ 280 + ("literal block", `Quick, test_literal_block); 281 + ("folded block", `Quick, test_folded_block); 282 + ] 283 284 (** Error handling tests *) 285 ··· 287 try 288 let _ = of_string "key: [unclosed" in 289 Alcotest.fail "expected error" 290 + with Yamlrw_error e -> Alcotest.(check bool) "has span" true (e.span <> None) 291 292 + let error_tests = [ ("error position", `Quick, test_error_position) ] 293 294 (** Alias expansion limit tests (billion laughs protection) *) 295 296 let test_node_limit () = 297 (* Small bomb that would expand to 9^4 = 6561 nodes *) 298 + let yaml = 299 + {| 300 a: &a [1,2,3,4,5,6,7,8,9] 301 b: &b [*a,*a,*a,*a,*a,*a,*a,*a,*a] 302 c: &c [*b,*b,*b,*b,*b,*b,*b,*b,*b] 303 d: &d [*c,*c,*c,*c,*c,*c,*c,*c,*c] 304 + |} 305 + in 306 (* Should fail with a small node limit *) 307 try 308 let _ = of_string ~max_nodes:100 yaml in 309 Alcotest.fail "expected node limit error" 310 + with Yamlrw_error e -> ( 311 + match e.Error.kind with 312 + | Error.Alias_expansion_node_limit _ -> () 313 + | _ -> Alcotest.fail "expected Alias_expansion_node_limit error") 314 315 let test_depth_limit () = 316 (* Create deeply nested alias chain: 317 *e -> [*d,*d] -> [*c,*c] -> [*b,*b] -> [*a,*a] -> [x,y,z] 318 Each alias resolution increases depth by 1 *) 319 + let yaml = 320 + {| 321 a: &a [x, y, z] 322 b: &b [*a, *a] 323 c: &c [*b, *b] 324 d: &d [*c, *c] 325 e: &e [*d, *d] 326 result: *e 327 + |} 328 + in 329 (* Should fail with a small depth limit (depth 3 means max 3 alias hops) *) 330 try 331 let _ = of_string ~max_depth:3 yaml in 332 Alcotest.fail "expected depth limit error" 333 + with Yamlrw_error e -> ( 334 + match e.Error.kind with 335 + | Error.Alias_expansion_depth_limit _ -> () 336 + | _ -> 337 + Alcotest.fail 338 + ("expected Alias_expansion_depth_limit error, got: " 339 + ^ Error.kind_to_string e.Error.kind)) 340 341 let test_normal_aliases_work () = 342 (* Normal alias usage should work fine *) 343 + let yaml = 344 + {| 345 defaults: &defaults 346 timeout: 30 347 retries: 3 348 production: 349 <<: *defaults 350 port: 8080 351 + |} 352 + in 353 let result = of_string yaml in 354 + match result with `O _ -> () | _ -> Alcotest.fail "expected mapping" 355 356 let test_resolve_aliases_false () = 357 (* With resolve_aliases=false, aliases should remain unresolved *) ··· 362 let result = yaml_of_string ~resolve_aliases:false yaml in 363 (* Check that alias is preserved *) 364 match result with 365 + | `O map -> ( 366 let pairs = Mapping.members map in 367 + match List.assoc_opt (`Scalar (Scalar.make "b")) pairs with 368 + | Some (`Alias "anchor") -> () 369 + | _ -> Alcotest.fail "expected alias to be preserved") 370 | _ -> Alcotest.fail "expected mapping" 371 372 + let alias_limit_tests = 373 + [ 374 + ("node limit", `Quick, test_node_limit); 375 + ("depth limit", `Quick, test_depth_limit); 376 + ("normal aliases work", `Quick, test_normal_aliases_work); 377 + ("resolve_aliases false", `Quick, test_resolve_aliases_false); 378 + ] 379 380 (** Bug fix regression tests 381 These tests verify that issues fixed in ocaml-yaml don't occur in ocaml-yamlrw *) ··· 445 let test_parse_special_floats () = 446 let inf_result = of_string ".inf" in 447 (match inf_result with 448 + | `Float f when Float.is_infinite f && f > 0.0 -> () 449 | _ -> Alcotest.fail "expected positive infinity"); 450 let neg_inf_result = of_string "-.inf" in 451 (match neg_inf_result with 452 + | `Float f when Float.is_infinite f && f < 0.0 -> () 453 | _ -> Alcotest.fail "expected negative infinity"); 454 let nan_result = of_string ".nan" in 455 (match nan_result with ··· 519 (** Run all tests *) 520 521 let () = 522 + Alcotest.run "yamlrw" 523 + [ 524 + ("scanner", scanner_tests); 525 + ("parser", parser_tests); 526 + ("value", value_tests); 527 + ("emitter", emitter_tests); 528 + ("yaml", yaml_tests); 529 + ("multiline", multiline_tests); 530 + ("errors", error_tests); 531 + ("alias_limits", alias_limit_tests); 532 + ("bugfix_regression", bugfix_regression_tests); 533 + ]