RFC6901 JSON Pointer implementation in OCaml using jsont
at ff0fffb172600f6b50dfbe3e404008d2f3e88337 526 lines 20 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** RFC 6901 JSON Pointer implementation for jsont. 7 8 This module provides {{:https://www.rfc-editor.org/rfc/rfc6901}RFC 6901} 9 JSON Pointer parsing, serialization, and evaluation compatible with 10 {!Jsont} codecs. 11 12 {1 JSON Pointer vs JSON Path} 13 14 JSON Pointer (RFC 6901) and {!Jsont.Path} serve similar purposes but 15 have important differences: 16 17 {ul 18 {- {b JSON Pointer} is a {e string syntax} for addressing JSON values, 19 designed for use in URIs and JSON documents (like JSON Patch). 20 It uses [/] as separator and has escape sequences ([~0], [~1]).} 21 {- {b Jsont.Path} is an {e OCaml data structure} for programmatic 22 navigation, with no string representation defined.}} 23 24 A key difference is the [-] token: JSON Pointer's [-] refers to the 25 (nonexistent) element {e after} the last array element. This is used 26 for append operations in JSON Patch but is meaningless for retrieval. 27 {!Jsont.Path} has no equivalent concept. 28 29 This library uses phantom types to enforce this distinction at compile 30 time: pointers that may contain [-] ({!append} pointers) cannot be 31 passed to retrieval functions like {!get}. 32 33 {2 Example} 34 35 Given the JSON document: 36 {v 37 { 38 "foo": ["bar", "baz"], 39 "": 0, 40 "a/b": 1, 41 "m~n": 2 42 } 43 v} 44 45 The following JSON Pointers evaluate to: 46 {ul 47 {- [""] - the whole document} 48 {- ["/foo"] - the array [\["bar", "baz"\]]} 49 {- ["/foo/0"] - the string ["bar"]} 50 {- ["/"] - the integer [0] (empty string key)} 51 {- ["/a~1b"] - the integer [1] ([~1] escapes [/])} 52 {- ["/m~0n"] - the integer [2] ([~0] escapes [~])} 53 {- ["/foo/-"] - nonexistent; only valid for mutations}} 54 55 {1:tokens Reference Tokens} 56 57 JSON Pointer uses escape sequences for special characters in reference 58 tokens. The character [~] must be encoded as [~0] and [/] as [~1]. 59 When unescaping, [~1] is processed before [~0] to correctly handle 60 sequences like [~01] which should become [~1], not [/]. *) 61 62(** {1 Reference tokens} 63 64 Reference tokens are the individual segments between [/] characters 65 in a JSON Pointer string. They require escaping of [~] and [/]. *) 66module Token : sig 67 68 type t = string 69 (** The type for unescaped reference tokens. These are plain strings 70 representing object member names or array index strings. *) 71 72 val escape : t -> string 73 (** [escape s] escapes special characters in [s] for use in a JSON Pointer. 74 Specifically, [~] becomes [~0] and [/] becomes [~1]. *) 75 76 val unescape : string -> t 77 (** [unescape s] unescapes a JSON Pointer reference token. 78 Specifically, [~1] becomes [/] and [~0] becomes [~]. 79 80 @raise Jsont.Error if [s] contains invalid escape sequences 81 (a [~] not followed by [0] or [1]). *) 82end 83 84(** {1 Indices} 85 86 Indices are the individual navigation steps in a JSON Pointer. 87 This library reuses {!Jsont.Path.index} directly - the JSON Pointer 88 specific [-] token is handled separately via phantom types on the 89 pointer type itself. *) 90 91type index = Jsont.Path.index 92(** The type for navigation indices. This is exactly {!Jsont.Path.index}: 93 either [Jsont.Path.Mem (name, meta)] for object member access or 94 [Jsont.Path.Nth (n, meta)] for array index access. *) 95 96val mem : ?meta:Jsont.Meta.t -> string -> index 97(** [mem ?meta s] is [Jsont.Path.Mem (s, meta)]. 98 Convenience constructor for object member access. 99 [meta] defaults to {!Jsont.Meta.none}. *) 100 101val nth : ?meta:Jsont.Meta.t -> int -> index 102(** [nth ?meta n] is [Jsont.Path.Nth (n, meta)]. 103 Convenience constructor for array index access. 104 [meta] defaults to {!Jsont.Meta.none}. *) 105 106val pp_index : Format.formatter -> index -> unit 107(** [pp_index] formats an index in JSON Pointer string notation. *) 108 109val equal_index : index -> index -> bool 110(** [equal_index i1 i2] is [true] iff [i1] and [i2] are the same index. *) 111 112val compare_index : index -> index -> int 113(** [compare_index i1 i2] is a total order on indices. *) 114 115(** {1 Pointers} 116 117 JSON Pointers use phantom types to distinguish between: 118 {ul 119 {- {!nav} pointers that reference existing elements (safe for all operations)} 120 {- {!append} pointers that end with [-] (only valid for {!add} and {!set})}} 121 122 This ensures at compile time that you cannot accidentally try to 123 retrieve a nonexistent "end of array" position. *) 124 125type 'a t 126(** The type for JSON Pointers. The phantom type ['a] indicates whether 127 the pointer can be used for navigation ([nav]) or only for append 128 operations ([append]). *) 129 130type nav 131(** Phantom type for pointers that reference existing elements. 132 These can be used with all operations including {!get} and {!find}. *) 133 134type append 135(** Phantom type for pointers ending with [-] (the "after last element" 136 position). These can only be used with {!add} and {!set}. *) 137 138val root : nav t 139(** [root] is the empty pointer that references the whole document. 140 In string form this is [""]. *) 141 142val is_root : _ t -> bool 143(** [is_root p] is [true] iff [p] is the {!root} pointer. *) 144 145val make : index list -> nav t 146(** [make indices] creates a navigation pointer from a list of indices. 147 The list is ordered from root to target (i.e., the first element 148 is the first step from the root). *) 149 150val ( / ) : nav t -> index -> nav t 151(** [p / idx] appends [idx] to pointer [p]. Operator form of {!append_index}. *) 152 153val append_index : nav t -> index -> nav t 154(** [append_index p idx] appends [idx] to the end of pointer [p]. *) 155 156val at_end : nav t -> append t 157(** [at_end p] creates an append pointer by adding [-] to [p]. 158 The resulting pointer refers to the position after the last element 159 of the array at [p]. Only valid for use with {!add} and {!set}. *) 160 161val concat : nav t -> nav t -> nav t 162(** [concat p1 p2] appends all indices of [p2] to [p1]. *) 163 164val parent : nav t -> nav t option 165(** [parent p] returns the parent pointer of [p], or [None] if [p] 166 is the {!root}. *) 167 168val last : nav t -> index option 169(** [last p] returns the last index of [p], or [None] if [p] is 170 the {!root}. *) 171 172val indices : _ t -> index list 173(** [indices p] returns the indices of [p] from root to target. 174 Note: for append pointers, this returns the indices of the path 175 portion; the [-] (append position) is not represented as an index. *) 176 177(** {2:parsing Parsing} *) 178 179val of_string : string -> [ `Nav of nav t | `Append of append t ] 180(** [of_string s] parses a JSON Pointer from its string representation. 181 182 Returns [`Nav p] for pointers without [-], or [`Append p] for 183 pointers ending with [-]. 184 185 The string must be either empty (representing the root) or start 186 with [/]. Each segment between [/] characters is unescaped as a 187 reference token. 188 189 @raise Jsont.Error if [s] has invalid syntax: 190 - Non-empty string not starting with [/] 191 - Invalid escape sequence ([~] not followed by [0] or [1]) 192 - [-] appears in non-final position *) 193 194val of_string_nav : string -> nav t 195(** [of_string_nav s] parses a JSON Pointer that must not contain [-]. 196 197 @raise Jsont.Error if [s] has invalid syntax or contains [-]. *) 198 199val of_string_result : string -> ([ `Nav of nav t | `Append of append t ], string) result 200(** [of_string_result s] is like {!of_string} but returns a result 201 instead of raising. *) 202 203val of_uri_fragment : string -> [ `Nav of nav t | `Append of append t ] 204(** [of_uri_fragment s] parses a JSON Pointer from URI fragment form. 205 206 This is like {!of_string} but first percent-decodes the string 207 according to {{:https://www.rfc-editor.org/rfc/rfc3986}RFC 3986}. 208 The leading [#] should {b not} be included in [s]. 209 210 @raise Jsont.Error on invalid syntax or invalid percent-encoding. *) 211 212val of_uri_fragment_nav : string -> nav t 213(** [of_uri_fragment_nav s] is like {!of_uri_fragment} but requires 214 the pointer to not contain [-]. 215 216 @raise Jsont.Error if invalid or contains [-]. *) 217 218val of_uri_fragment_result : string -> ([ `Nav of nav t | `Append of append t ], string) result 219(** [of_uri_fragment_result s] is like {!of_uri_fragment} but returns 220 a result instead of raising. *) 221 222(** {2:serializing Serializing} *) 223 224val to_string : _ t -> string 225(** [to_string p] serializes [p] to its JSON Pointer string representation. 226 227 Returns [""] for the root pointer, otherwise [/] followed by 228 escaped reference tokens joined by [/]. Append pointers include 229 the trailing [/-]. *) 230 231val to_uri_fragment : _ t -> string 232(** [to_uri_fragment p] serializes [p] to URI fragment form. 233 234 This is like {!to_string} but additionally percent-encodes 235 characters that are not allowed in URI fragments per RFC 3986. 236 The leading [#] is {b not} included in the result. *) 237 238val pp : Format.formatter -> _ t -> unit 239(** [pp] formats a pointer using {!to_string}. *) 240 241val pp_verbose : Format.formatter -> _ t -> unit 242(** [pp_verbose] formats a pointer showing its index structure. 243 For example, [/foo/0] is formatted as [[Mem "foo"; Nth 0]]. 244 Append pointers show [/-] at the end. 245 Useful for debugging and understanding pointer structure. *) 246 247(** {2:comparison Comparison} *) 248 249val equal : _ t -> _ t -> bool 250(** [equal p1 p2] is [true] iff [p1] and [p2] have the same indices 251 and the same append status. *) 252 253val compare : _ t -> _ t -> int 254(** [compare p1 p2] is a total order on pointers, comparing indices 255 lexicographically. Append pointers sort after nav pointers with 256 the same prefix. *) 257 258(** {2:jsont_path Conversion with Jsont.Path} *) 259 260val of_path : Jsont.Path.t -> nav t 261(** [of_path p] converts a {!Jsont.Path.t} to a JSON Pointer. 262 Always returns a {!nav} pointer since {!Jsont.Path} has no [-] concept. *) 263 264val to_path : nav t -> Jsont.Path.t 265(** [to_path p] converts a navigation pointer to a {!Jsont.Path.t}. *) 266 267(** {1 Evaluation} 268 269 These functions evaluate a JSON Pointer against a {!Jsont.json} value 270 to retrieve the referenced value. They only accept {!nav} pointers 271 since {!append} pointers refer to nonexistent positions. *) 272 273val get : nav t -> Jsont.json -> Jsont.json 274(** [get p json] retrieves the value at pointer [p] in [json]. 275 276 @raise Jsont.Error if: 277 - The pointer references a nonexistent object member 278 - The pointer references an out-of-bounds array index 279 - An index type doesn't match the JSON value (e.g., [Nth] 280 on an object) *) 281 282val get_result : nav t -> Jsont.json -> (Jsont.json, Jsont.Error.t) result 283(** [get_result p json] is like {!get} but returns a result. *) 284 285val find : nav t -> Jsont.json -> Jsont.json option 286(** [find p json] is like {!get} but returns [None] instead of 287 raising when the pointer doesn't resolve to a value. *) 288 289(** {1 Mutation} 290 291 These functions modify a {!Jsont.json} value at a location specified 292 by a JSON Pointer. They are designed to support 293 {{:https://www.rfc-editor.org/rfc/rfc6902}RFC 6902 JSON Patch} 294 operations. 295 296 All mutation functions return a new JSON value with the modification 297 applied; they do not mutate the input. 298 299 Functions that support the [-] token ({!add}, {!set}) accept any 300 pointer type ([_ t]). Functions that require an existing element 301 ({!remove}, {!replace}) only accept {!nav} pointers. *) 302 303val set : _ t -> Jsont.json -> value:Jsont.json -> Jsont.json 304(** [set p json ~value] replaces the value at pointer [p] with [value]. 305 306 For {!append} pointers, appends [value] to the end of the array. 307 308 @raise Jsont.Error if the pointer doesn't resolve to an existing 309 location (except for {!append} pointers on arrays). *) 310 311val add : _ t -> Jsont.json -> value:Jsont.json -> Jsont.json 312(** [add p json ~value] adds [value] at the location specified by [p]. 313 314 The behavior depends on the target: 315 {ul 316 {- For objects: If the member exists, it is replaced. If it doesn't 317 exist, a new member is added.} 318 {- For arrays with [Nth]: Inserts [value] {e before} the 319 specified index, shifting subsequent elements. The index must be 320 valid (0 to length inclusive).} 321 {- For {!append} pointers: Appends [value] to the array.}} 322 323 @raise Jsont.Error if: 324 - The parent of the target location doesn't exist 325 - An array index is out of bounds (except for {!append} pointers) 326 - The parent is not an object or array *) 327 328val remove : nav t -> Jsont.json -> Jsont.json 329(** [remove p json] removes the value at pointer [p]. 330 331 For objects, removes the member. For arrays, removes the element 332 and shifts subsequent elements. 333 334 @raise Jsont.Error if: 335 - [p] is the root (cannot remove the root) 336 - The pointer doesn't resolve to an existing value *) 337 338val replace : nav t -> Jsont.json -> value:Jsont.json -> Jsont.json 339(** [replace p json ~value] replaces the value at pointer [p] with [value]. 340 341 Unlike {!add}, this requires the target to exist. 342 343 @raise Jsont.Error if the pointer doesn't resolve to an existing value. *) 344 345val move : from:nav t -> path:_ t -> Jsont.json -> Jsont.json 346(** [move ~from ~path json] moves the value from [from] to [path]. 347 348 This is equivalent to {!remove} at [from] followed by {!add} 349 at [path] with the removed value. 350 351 @raise Jsont.Error if: 352 - [from] doesn't resolve to a value 353 - [path] is a proper prefix of [from] (would create a cycle) *) 354 355val copy : from:nav t -> path:_ t -> Jsont.json -> Jsont.json 356(** [copy ~from ~path json] copies the value from [from] to [path]. 357 358 This is equivalent to {!get} at [from] followed by {!add} 359 at [path] with the retrieved value. 360 361 @raise Jsont.Error if [from] doesn't resolve to a value. *) 362 363val test : nav t -> Jsont.json -> expected:Jsont.json -> bool 364(** [test p json ~expected] tests if the value at [p] equals [expected]. 365 366 Returns [true] if the values are equal according to {!Jsont.Json.equal}, 367 [false] otherwise. Also returns [false] (rather than raising) if the 368 pointer doesn't resolve. 369 370 Note: This implements the semantics of the JSON Patch "test" operation. *) 371 372(** {1 Jsont Integration} 373 374 These types and functions integrate JSON Pointers with the {!Jsont} 375 codec system. *) 376 377val jsont : [ `Nav of nav t | `Append of append t ] Jsont.t 378(** [jsont] is a {!Jsont.t} codec for JSON Pointers. 379 380 On decode, parses a JSON string as a JSON Pointer using {!of_string}. 381 On encode, serializes a pointer to a JSON string using {!to_string}. *) 382 383val jsont_nav : nav t Jsont.t 384(** [jsont_nav] is a {!Jsont.t} codec for navigation JSON Pointers. 385 386 On decode, parses using {!of_string_nav} (fails on [-]). 387 On encode, serializes using {!to_string}. *) 388 389val jsont_uri_fragment : [ `Nav of nav t | `Append of append t ] Jsont.t 390(** [jsont_uri_fragment] is like {!jsont} but uses URI fragment encoding. 391 392 On decode, parses using {!of_uri_fragment}. 393 On encode, serializes using {!to_uri_fragment}. *) 394 395(** {2:query Query combinators} 396 397 These combinators integrate with jsont's query system, allowing 398 JSON Pointers to be used with jsont codecs for typed access. *) 399 400val path : ?absent:'a -> nav t -> 'a Jsont.t -> 'a Jsont.t 401(** [path p t] decodes the value at pointer [p] using codec [t]. 402 403 If [absent] is provided and the pointer doesn't resolve, returns 404 [absent] instead of raising. 405 406 This is similar to {!Jsont.path} but uses JSON Pointer syntax. *) 407 408val set_path : ?allow_absent:bool -> 'a Jsont.t -> _ t -> 'a -> Jsont.json Jsont.t 409(** [set_path t p v] sets the value at pointer [p] to [v] encoded with [t]. 410 411 If [allow_absent] is [true] (default [false]), creates missing 412 intermediate structure as needed. 413 414 This is similar to {!Jsont.set_path} but uses JSON Pointer syntax. *) 415 416val update_path : ?absent:'a -> nav t -> 'a Jsont.t -> Jsont.json Jsont.t 417(** [update_path p t] recodes the value at pointer [p] with codec [t]. 418 419 This is similar to {!Jsont.update_path} but uses JSON Pointer syntax. *) 420 421val delete_path : ?allow_absent:bool -> nav t -> Jsont.json Jsont.t 422(** [delete_path p] removes the value at pointer [p]. 423 424 If [allow_absent] is [true] (default [false]), does nothing if 425 the pointer doesn't resolve instead of raising. *) 426 427(** {1:jmap JMAP Extended Pointers} 428 429 {{:https://www.rfc-editor.org/rfc/rfc8620#section-3.7}RFC 8620 Section 3.7} 430 extends JSON Pointer with a wildcard token [*] for mapping through arrays. 431 This is used in JMAP result references. 432 433 The wildcard semantics are: 434 {ul 435 {- When the current value is an array and the token is [*], apply the rest 436 of the pointer to each element, collecting results into a new array.} 437 {- If a mapped result is itself an array, its contents are flattened into 438 the output (i.e., array of arrays becomes a single array).}} 439 440 Example: Given [{"list": \[{"id": "a"}, {"id": "b"}\]}], the extended 441 pointer [/list/*/id] evaluates to [["a", "b"]]. 442 443 {b Note}: These extended pointers are {e not} valid RFC 6901 JSON Pointers. 444 They should only be used for JMAP result reference resolution. *) 445 446module Jmap : sig 447 (** JMAP extended JSON Pointer with wildcard support. *) 448 449 type t 450 (** The type for JMAP extended pointers. Unlike standard pointers, these 451 may contain [*] tokens for array mapping. *) 452 453 val of_string : string -> t 454 (** [of_string s] parses a JMAP extended pointer. 455 456 The syntax is the same as RFC 6901 JSON Pointer, except [*] is allowed 457 as a reference token for array mapping. 458 459 @raise Jsont.Error if [s] has invalid syntax. *) 460 461 val of_string_result : string -> (t, string) result 462 (** [of_string_result s] is like {!of_string} but returns a result. *) 463 464 val to_string : t -> string 465 (** [to_string p] serializes [p] to string form. *) 466 467 val pp : Format.formatter -> t -> unit 468 (** [pp] formats a pointer using {!to_string}. *) 469 470 val eval : t -> Jsont.json -> Jsont.json 471 (** [eval p json] evaluates the extended pointer [p] against [json]. 472 473 For [*] tokens on arrays, maps through all elements and collects results. 474 Results that are arrays are flattened into the output. 475 476 @raise Jsont.Error if: 477 - A standard token doesn't resolve (member not found, index out of bounds) 478 - [*] is used on a non-array value 479 - [-] appears in the pointer (not supported in JMAP extended pointers) *) 480 481 val eval_result : t -> Jsont.json -> (Jsont.json, Jsont.Error.t) result 482 (** [eval_result p json] is like {!eval} but returns a result. *) 483 484 val find : t -> Jsont.json -> Jsont.json option 485 (** [find p json] is like {!eval} but returns [None] on errors. *) 486 487 val jsont : t Jsont.t 488 (** [jsont] is a {!Jsont.t} codec for JMAP extended pointers. *) 489 490 (** {2:combinators Query combinators} 491 492 These combinators integrate JMAP extended pointers with jsont codecs, 493 enabling typed extraction from JSON using pointer paths. *) 494 495 val path : ?absent:'a -> t -> 'a Jsont.t -> 'a Jsont.t 496 (** [path p codec] extracts the value at pointer [p] and decodes it with [codec]. 497 498 If [absent] is provided and the pointer doesn't resolve, returns [absent]. 499 Otherwise raises on pointer resolution failure. 500 501 Example: Extract all thread IDs from an Email/get response: 502 {[ 503 let thread_ids = 504 Jmap.path 505 (Jmap.of_string "/list/*/threadId") 506 (Jsont.list Jsont.string) 507 ]} 508 509 @raise Jsont.Error if the pointer fails to resolve (and no [absent]) 510 or if decoding with [codec] fails. *) 511 512 val path_list : t -> 'a Jsont.t -> 'a list Jsont.t 513 (** [path_list p codec] extracts the array at pointer [p] and decodes each 514 element with [codec]. 515 516 This is a convenience for the common JMAP pattern where wildcards produce 517 arrays that need element-wise decoding: 518 {[ 519 (* These are equivalent: *) 520 Jmap.path_list (Jmap.of_string "/list/*/id") Jsont.string 521 Jmap.path (Jmap.of_string "/list/*/id") (Jsont.list Jsont.string) 522 ]} 523 524 @raise Jsont.Error if pointer resolution fails, the result is not an array, 525 or any element fails to decode. *) 526end