OCaml wire format DSL with EverParse 3D output for verified parsers
at main 2445 lines 86 kB view raw
1(* Wire: Dependent Data Descriptions for EverParse 3D *) 2 3open Result.Syntax 4 5(** Staged computations, following the pattern from Jane Street's Base library. 6 Forces users to explicitly unstage functions to make specialization visible. 7 See also Irmin's repr which uses the same pattern. *) 8module Staged = struct 9 type +'a t = { unstage : 'a } [@@unboxed] 10 11 let stage x = { unstage = x } 12 let unstage { unstage } = unstage 13end 14 15(* UInt32: unboxed on 64-bit (uses int), boxed on 32-bit (uses int32) *) 16module UInt32 = struct 17 type t = int (* On 64-bit, int is 63 bits - enough for uint32 *) 18 19 let () = 20 if Sys.int_size < 32 then 21 failwith "Wire.UInt32 requires 64-bit OCaml (int must be >= 32 bits)" 22 23 let get_le buf off = 24 let b0 = Bytes.get_uint8 buf off in 25 let b1 = Bytes.get_uint8 buf (off + 1) in 26 let b2 = Bytes.get_uint8 buf (off + 2) in 27 let b3 = Bytes.get_uint8 buf (off + 3) in 28 b0 lor (b1 lsl 8) lor (b2 lsl 16) lor (b3 lsl 24) 29 30 let get_be buf off = 31 let b0 = Bytes.get_uint8 buf off in 32 let b1 = Bytes.get_uint8 buf (off + 1) in 33 let b2 = Bytes.get_uint8 buf (off + 2) in 34 let b3 = Bytes.get_uint8 buf (off + 3) in 35 (b0 lsl 24) lor (b1 lsl 16) lor (b2 lsl 8) lor b3 36 37 let mask v = v land ((1 lsl 32) - 1) 38 39 let set_le buf off v = 40 let v = mask v in 41 Bytes.set_uint8 buf off (v land 0xFF); 42 Bytes.set_uint8 buf (off + 1) ((v lsr 8) land 0xFF); 43 Bytes.set_uint8 buf (off + 2) ((v lsr 16) land 0xFF); 44 Bytes.set_uint8 buf (off + 3) ((v lsr 24) land 0xFF) 45 46 let set_be buf off v = 47 let v = mask v in 48 Bytes.set_uint8 buf off ((v lsr 24) land 0xFF); 49 Bytes.set_uint8 buf (off + 1) ((v lsr 16) land 0xFF); 50 Bytes.set_uint8 buf (off + 2) ((v lsr 8) land 0xFF); 51 Bytes.set_uint8 buf (off + 3) (v land 0xFF) 52 53 let to_int t = t 54 let of_int t = mask t 55end 56 57(* UInt63: unboxed on 64-bit (uses int), reads 8 bytes but masks to 63 bits *) 58module UInt63 = struct 59 type t = int (* 63-bit int on 64-bit platforms *) 60 61 let () = 62 if Sys.int_size < 63 then 63 failwith "Wire.UInt63 requires 64-bit OCaml (int must be 63 bits)" 64 65 let get_le buf off = 66 let b0 = Bytes.get_uint8 buf off in 67 let b1 = Bytes.get_uint8 buf (off + 1) in 68 let b2 = Bytes.get_uint8 buf (off + 2) in 69 let b3 = Bytes.get_uint8 buf (off + 3) in 70 let b4 = Bytes.get_uint8 buf (off + 4) in 71 let b5 = Bytes.get_uint8 buf (off + 5) in 72 let b6 = Bytes.get_uint8 buf (off + 6) in 73 let b7 = Bytes.get_uint8 buf (off + 7) in 74 b0 lor (b1 lsl 8) lor (b2 lsl 16) lor (b3 lsl 24) lor (b4 lsl 32) 75 lor (b5 lsl 40) lor (b6 lsl 48) 76 lor ((b7 land 0x7F) lsl 56) 77 78 let get_be buf off = 79 let b0 = Bytes.get_uint8 buf off in 80 let b1 = Bytes.get_uint8 buf (off + 1) in 81 let b2 = Bytes.get_uint8 buf (off + 2) in 82 let b3 = Bytes.get_uint8 buf (off + 3) in 83 let b4 = Bytes.get_uint8 buf (off + 4) in 84 let b5 = Bytes.get_uint8 buf (off + 5) in 85 let b6 = Bytes.get_uint8 buf (off + 6) in 86 let b7 = Bytes.get_uint8 buf (off + 7) in 87 ((b0 land 0x7F) lsl 56) 88 lor (b1 lsl 48) lor (b2 lsl 40) lor (b3 lsl 32) lor (b4 lsl 24) 89 lor (b5 lsl 16) lor (b6 lsl 8) lor b7 90 91 let set_le buf off v = 92 Bytes.set_uint8 buf off (v land 0xFF); 93 Bytes.set_uint8 buf (off + 1) ((v lsr 8) land 0xFF); 94 Bytes.set_uint8 buf (off + 2) ((v lsr 16) land 0xFF); 95 Bytes.set_uint8 buf (off + 3) ((v lsr 24) land 0xFF); 96 Bytes.set_uint8 buf (off + 4) ((v lsr 32) land 0xFF); 97 Bytes.set_uint8 buf (off + 5) ((v lsr 40) land 0xFF); 98 Bytes.set_uint8 buf (off + 6) ((v lsr 48) land 0xFF); 99 Bytes.set_uint8 buf (off + 7) ((v lsr 56) land 0x7F) 100 101 let set_be buf off v = 102 Bytes.set_uint8 buf off ((v lsr 56) land 0x7F); 103 Bytes.set_uint8 buf (off + 1) ((v lsr 48) land 0xFF); 104 Bytes.set_uint8 buf (off + 2) ((v lsr 40) land 0xFF); 105 Bytes.set_uint8 buf (off + 3) ((v lsr 32) land 0xFF); 106 Bytes.set_uint8 buf (off + 4) ((v lsr 24) land 0xFF); 107 Bytes.set_uint8 buf (off + 5) ((v lsr 16) land 0xFF); 108 Bytes.set_uint8 buf (off + 6) ((v lsr 8) land 0xFF); 109 Bytes.set_uint8 buf (off + 7) (v land 0xFF) 110 111 let to_int t = t 112 let of_int t = t 113end 114 115type endian = Little | Big 116 117(* Expressions *) 118type _ expr = 119 | Int : int -> int expr 120 | Int64 : int64 -> int64 expr 121 | Bool : bool -> bool expr 122 | Ref : string -> int expr 123 | Sizeof : 'a typ -> int expr 124 | Sizeof_this : int expr 125 | Field_pos : int expr 126 | Add : int expr * int expr -> int expr 127 | Sub : int expr * int expr -> int expr 128 | Mul : int expr * int expr -> int expr 129 | Div : int expr * int expr -> int expr 130 | Mod : int expr * int expr -> int expr 131 | Land : int expr * int expr -> int expr 132 | Lor : int expr * int expr -> int expr 133 | Lxor : int expr * int expr -> int expr 134 | Lnot : int expr -> int expr 135 | Lsl : int expr * int expr -> int expr 136 | Lsr : int expr * int expr -> int expr 137 | Eq : 'a expr * 'a expr -> bool expr 138 | Ne : 'a expr * 'a expr -> bool expr 139 | Lt : int expr * int expr -> bool expr 140 | Le : int expr * int expr -> bool expr 141 | Gt : int expr * int expr -> bool expr 142 | Ge : int expr * int expr -> bool expr 143 | And : bool expr * bool expr -> bool expr 144 | Or : bool expr * bool expr -> bool expr 145 | Not : bool expr -> bool expr 146 | Cast : [ `U8 | `U16 | `U32 | `U64 ] * int expr -> int expr 147 148(* Bitfield base types - standalone, not mutually recursive *) 149and bitfield_base = BF_U8 | BF_U16 of endian | BF_U32 of endian 150 151(* Types *) 152and _ typ = 153 | Uint8 : int typ 154 | Uint16 : endian -> int typ 155 | Uint32 : endian -> UInt32.t typ 156 | Uint63 : endian -> UInt63.t typ 157 | Uint64 : endian -> int64 typ (* boxed, for full 64-bit *) 158 | Bits : { width : int; base : bitfield_base } -> int typ 159 | Unit : unit typ 160 | All_bytes : string typ 161 | All_zeros : string typ 162 | Where : { cond : bool expr; inner : 'a typ } -> 'a typ 163 | Array : { len : int expr; elem : 'a typ } -> 'a list typ 164 | Byte_array : { size : int expr } -> string typ 165 | Single_elem : { size : int expr; elem : 'a typ; at_most : bool } -> 'a typ 166 | Enum : { 167 name : string; 168 cases : (string * int) list; 169 base : int typ; 170 } 171 -> int typ 172 | Casetype : { 173 name : string; 174 tag : 'tag typ; 175 cases : ('tag option * 'a typ) list; 176 } 177 -> 'a typ 178 | Struct : struct_ -> unit typ 179 | Type_ref : string -> 'a typ 180 | Qualified_ref : { module_ : string; name : string } -> 'a typ 181 | Map : { inner : 'w typ; decode : 'w -> 'a; encode : 'a -> 'w } -> 'a typ 182 | Apply : { typ : 'a typ; args : packed_expr list } -> 'a typ 183 184and packed_expr = Pack_expr : 'a expr -> packed_expr 185 186(* Structs *) 187and struct_ = { 188 name : string; 189 params : param list; 190 where : bool expr option; 191 fields : field list; 192} 193 194and field = 195 | Field : { 196 field_name : string option; 197 field_typ : 'a typ; 198 constraint_ : bool expr option; 199 action : action option; 200 } 201 -> field 202 203and param = { param_name : string; param_typ : packed_typ; mutable_ : bool } 204and packed_typ = Pack_typ : 'a typ -> packed_typ 205 206(* Actions *) 207and action = On_success of action_stmt list | On_act of action_stmt list 208 209and action_stmt = 210 | Assign of string * int expr 211 | Return of bool expr 212 | Abort 213 | If of bool expr * action_stmt list * action_stmt list option 214 | Var of string * int expr 215 216(* Expression constructors *) 217let int n = Int n 218let int64 n = Int64 n 219let true_ = Bool true 220let false_ = Bool false 221let ref name = Ref name 222let sizeof t = Sizeof t 223let sizeof_this = Sizeof_this 224let field_pos = Field_pos 225 226module Expr = struct 227 let ( + ) a b = Add (a, b) 228 let ( - ) a b = Sub (a, b) 229 let ( * ) a b = Mul (a, b) 230 let ( / ) a b = Div (a, b) 231 let ( mod ) a b = Mod (a, b) 232 let ( land ) a b = Land (a, b) 233 let ( lor ) a b = Lor (a, b) 234 let ( lxor ) a b = Lxor (a, b) 235 let lnot a = Lnot a 236 let ( lsl ) a b = Lsl (a, b) 237 let ( lsr ) a b = Lsr (a, b) 238 let ( = ) a b = Eq (a, b) 239 let ( <> ) a b = Ne (a, b) 240 let ( < ) a b = Lt (a, b) 241 let ( <= ) a b = Le (a, b) 242 let ( > ) a b = Gt (a, b) 243 let ( >= ) a b = Ge (a, b) 244 let ( && ) a b = And (a, b) 245 let ( || ) a b = Or (a, b) 246 let not a = Not a 247 let to_uint8 e = Cast (`U8, e) 248 let to_uint16 e = Cast (`U16, e) 249 let to_uint32 e = Cast (`U32, e) 250 let to_uint64 e = Cast (`U64, e) 251end 252 253(* Type constructors *) 254let uint8 = Uint8 255let uint16 = Uint16 Little 256let uint16be = Uint16 Big 257let uint32 = Uint32 Little 258let uint32be = Uint32 Big 259let uint63 = Uint63 Little 260let uint63be = Uint63 Big 261let uint64 = Uint64 Little 262let uint64be = Uint64 Big 263 264(* Bitfield bases *) 265let bf_uint8 = BF_U8 266let bf_uint16 = BF_U16 Little 267let bf_uint16be = BF_U16 Big 268let bf_uint32 = BF_U32 Little 269let bf_uint32be = BF_U32 Big 270let bits ~width base = Bits { width; base } 271let bit b = Bool.to_int b 272let is_set n = n <> 0 273let map decode encode inner = Map { inner; decode; encode } 274let bool inner = Map { inner; decode = is_set; encode = bit } 275 276let cases variants inner = 277 let arr = Array.of_list variants in 278 let decode n = 279 if n >= 0 && n < Array.length arr then arr.(n) 280 else Fmt.invalid_arg "Wire.cases: unknown value %d" n 281 in 282 let encode v = 283 let rec go i = 284 if i >= Array.length arr then invalid_arg "Wire.cases: unknown variant" 285 else if arr.(i) = v then i 286 else go (i + 1) 287 in 288 go 0 289 in 290 Map { inner; decode; encode } 291 292let unit = Unit 293let all_bytes = All_bytes 294let all_zeros = All_zeros 295let where cond inner = Where { cond; inner } 296let array ~len elem = Array { len; elem } 297let byte_array ~size = Byte_array { size } 298let single_elem_array ~size elem = Single_elem { size; elem; at_most = false } 299 300let single_elem_array_at_most ~size elem = 301 Single_elem { size; elem; at_most = true } 302 303let enum name cases base = Enum { name; cases; base } 304 305(* Casetype *) 306type ('tag, 'a) case = 'tag option * 'a typ 307 308let case tag typ = (Some tag, typ) 309let default typ = (None, typ) 310let casetype name tag cases = Casetype { name; tag; cases } 311 312(* Struct fields *) 313let field name ?constraint_ ?action typ = 314 Field { field_name = Some name; field_typ = typ; constraint_; action } 315 316let anon_field typ = 317 Field 318 { field_name = None; field_typ = typ; constraint_ = None; action = None } 319 320(* Struct constructors *) 321let struct_ name fields = { name; params = []; where = None; fields } 322let struct_name s = s.name 323let struct_typ s = Struct s 324 325(* Parameters *) 326let param name typ = 327 { param_name = name; param_typ = Pack_typ typ; mutable_ = false } 328 329let mutable_param name typ = 330 { param_name = name; param_typ = Pack_typ typ; mutable_ = true } 331 332let param_struct name params ?where fields = { name; params; where; fields } 333let apply typ args = Apply { typ; args = List.map (fun e -> Pack_expr e) args } 334 335(* Type references *) 336let type_ref name = Type_ref name 337let qualified_ref module_ name = Qualified_ref { module_; name } 338 339(* Actions *) 340let on_success stmts = On_success stmts 341let on_act stmts = On_act stmts 342let assign ptr e = Assign (ptr, e) 343let return_bool e = Return e 344let abort = Abort 345let action_if cond then_ else_ = If (cond, then_, else_) 346let var name e = Var (name, e) 347 348(* Declarations *) 349type decl = 350 | Typedef of { 351 entrypoint : bool; 352 export : bool; 353 doc : string option; 354 struct_ : struct_; 355 } 356 | Define of { name : string; value : int } 357 | Extern_fn of { name : string; params : param list; ret : packed_typ } 358 | Extern_probe of { init : bool; name : string } 359 | Enum_decl of { 360 name : string; 361 cases : (string * int) list; 362 base : packed_typ; 363 } 364 | Casetype_decl of { 365 name : string; 366 params : param list; 367 tag : packed_typ; 368 cases : (packed_expr option * packed_typ) list; 369 } 370 371let typedef ?(entrypoint = false) ?(export = false) ?doc struct_ = 372 Typedef { entrypoint; export; doc; struct_ } 373 374let define name value = Define { name; value } 375let extern_fn name params ret = Extern_fn { name; params; ret = Pack_typ ret } 376let extern_probe ?(init = false) name = Extern_probe { init; name } 377let enum_decl name cases base = Enum_decl { name; cases; base = Pack_typ base } 378 379type decl_case = packed_expr option * packed_typ 380 381let decl_case tag typ = (Some (Pack_expr (Int tag)), Pack_typ typ) 382let decl_default typ = (None, Pack_typ typ) 383 384let casetype_decl name params tag cases = 385 Casetype_decl { name; params; tag = Pack_typ tag; cases } 386 387(* Module *) 388type module_ = { doc : string option; name : string; decls : decl list } 389 390let module_ ?doc name decls = { doc; name; decls } 391 392(* Pretty printing using Fmt *) 393 394let pp_endian ppf = function Little -> () | Big -> Fmt.string ppf "BE" 395 396let pp_bitfield_base ppf = function 397 | BF_U8 -> Fmt.string ppf "UINT8" 398 | BF_U16 e -> Fmt.pf ppf "UINT16%a" pp_endian e 399 | BF_U32 e -> Fmt.pf ppf "UINT32%a" pp_endian e 400 401let pp_cast_type ppf = function 402 | `U8 -> Fmt.string ppf "UINT8" 403 | `U16 -> Fmt.string ppf "UINT16" 404 | `U32 -> Fmt.string ppf "UINT32" 405 | `U64 -> Fmt.string ppf "UINT64" 406 407let rec pp_expr : type a. a expr Fmt.t = 408 fun ppf expr -> 409 match expr with 410 | Int n when n < 0 -> Fmt.pf ppf "(%d)" n 411 | Int n -> Fmt.int ppf n 412 | Int64 n -> Fmt.pf ppf "%LduL" n 413 | Bool true -> Fmt.string ppf "true" 414 | Bool false -> Fmt.string ppf "false" 415 | Ref name -> Fmt.string ppf name 416 | Sizeof t -> Fmt.pf ppf "sizeof (%a)" pp_typ t 417 | Sizeof_this -> Fmt.string ppf "sizeof (this)" 418 | Field_pos -> Fmt.string ppf "field_pos" 419 | Add (a, b) -> Fmt.pf ppf "(%a + %a)" pp_expr a pp_expr b 420 | Sub (a, b) -> Fmt.pf ppf "(%a - %a)" pp_expr a pp_expr b 421 | Mul (a, b) -> Fmt.pf ppf "(%a * %a)" pp_expr a pp_expr b 422 | Div (a, b) -> Fmt.pf ppf "(%a / %a)" pp_expr a pp_expr b 423 | Mod (a, b) -> Fmt.pf ppf "(%a %% %a)" pp_expr a pp_expr b 424 | Land (a, b) -> Fmt.pf ppf "(%a & %a)" pp_expr a pp_expr b 425 | Lor (a, b) -> Fmt.pf ppf "(%a | %a)" pp_expr a pp_expr b 426 | Lxor (a, b) -> Fmt.pf ppf "(%a ^ %a)" pp_expr a pp_expr b 427 | Lnot a -> Fmt.pf ppf "(~%a)" pp_expr a 428 | Lsl (a, b) -> Fmt.pf ppf "(%a << %a)" pp_expr a pp_expr b 429 | Lsr (a, b) -> Fmt.pf ppf "(%a >> %a)" pp_expr a pp_expr b 430 | Eq (a, b) -> Fmt.pf ppf "(%a == %a)" pp_expr a pp_expr b 431 | Ne (a, b) -> Fmt.pf ppf "(%a != %a)" pp_expr a pp_expr b 432 | Lt (a, b) -> Fmt.pf ppf "(%a < %a)" pp_expr a pp_expr b 433 | Le (a, b) -> Fmt.pf ppf "(%a <= %a)" pp_expr a pp_expr b 434 | Gt (a, b) -> Fmt.pf ppf "(%a > %a)" pp_expr a pp_expr b 435 | Ge (a, b) -> Fmt.pf ppf "(%a >= %a)" pp_expr a pp_expr b 436 | And (a, b) -> Fmt.pf ppf "(%a && %a)" pp_expr a pp_expr b 437 | Or (a, b) -> Fmt.pf ppf "(%a || %a)" pp_expr a pp_expr b 438 | Not a -> Fmt.pf ppf "(!%a)" pp_expr a 439 | Cast (t, e) -> Fmt.pf ppf "((%a) %a)" pp_cast_type t pp_expr e 440 441and pp_typ : type a. a typ Fmt.t = 442 fun ppf typ -> 443 match typ with 444 | Uint8 -> Fmt.string ppf "UINT8" 445 | Uint16 e -> Fmt.pf ppf "UINT16%a" pp_endian e 446 | Uint32 e -> Fmt.pf ppf "UINT32%a" pp_endian e 447 | Uint63 e -> Fmt.pf ppf "UINT63%a" pp_endian e 448 | Uint64 e -> Fmt.pf ppf "UINT64%a" pp_endian e 449 | Bits { base; _ } -> pp_bitfield_base ppf base 450 | Unit -> Fmt.string ppf "unit" 451 | All_bytes -> Fmt.string ppf "all_bytes" 452 | All_zeros -> Fmt.string ppf "all_zeros" 453 | Where { cond; inner } -> Fmt.pf ppf "%a { %a }" pp_typ inner pp_expr cond 454 | Array { len; elem } -> Fmt.pf ppf "%a[%a]" pp_typ elem pp_expr len 455 | Byte_array { size } -> Fmt.pf ppf "UINT8[:byte-size %a]" pp_expr size 456 | Single_elem { size; elem; at_most = false } -> 457 Fmt.pf ppf "%a[:byte-size-single-element-array %a]" pp_typ elem pp_expr 458 size 459 | Single_elem { size; elem; at_most = true } -> 460 Fmt.pf ppf "%a[:byte-size-single-element-array-at-most %a]" pp_typ elem 461 pp_expr size 462 | Enum { name; _ } -> Fmt.string ppf name 463 | Casetype { name; _ } -> Fmt.string ppf name 464 | Struct { name; _ } -> Fmt.string ppf name 465 | Type_ref name -> Fmt.string ppf name 466 | Qualified_ref { module_; name } -> Fmt.pf ppf "%s::%s" module_ name 467 | Apply { typ; args } -> 468 Fmt.pf ppf "%a(%a)" pp_typ typ Fmt.(list ~sep:comma pp_packed_expr) args 469 | Map { inner; _ } -> pp_typ ppf inner 470 471and pp_packed_expr ppf (Pack_expr e) = pp_expr ppf e 472 473let rec pp_action_stmt ppf = function 474 | Assign (ptr, e) -> Fmt.pf ppf "*%s = %a;" ptr pp_expr e 475 | Return e -> Fmt.pf ppf "return %a;" pp_expr e 476 | Abort -> Fmt.string ppf "abort;" 477 | If (cond, then_, None) -> 478 Fmt.pf ppf "if (%a) { %a }" pp_expr cond 479 Fmt.(list ~sep:sp pp_action_stmt) 480 then_ 481 | If (cond, then_, Some else_) -> 482 Fmt.pf ppf "if (%a) { %a } else { %a }" pp_expr cond 483 Fmt.(list ~sep:sp pp_action_stmt) 484 then_ 485 Fmt.(list ~sep:sp pp_action_stmt) 486 else_ 487 | Var (name, e) -> Fmt.pf ppf "var %s = %a;" name pp_expr e 488 489let pp_action ppf = function 490 | On_success stmts -> 491 Fmt.pf ppf "{:on-success %a }" Fmt.(list ~sep:sp pp_action_stmt) stmts 492 | On_act stmts -> 493 Fmt.pf ppf "{:act %a }" Fmt.(list ~sep:sp pp_action_stmt) stmts 494 495let pp_bitwidth : type a. a typ -> int option = function 496 | Bits { width; _ } -> Some width 497 | _ -> None 498 499(* Extract field suffix for arrays - the modifier goes after the field name *) 500type field_suffix = 501 | No_suffix 502 | Bitwidth of int 503 | Byte_array of int expr 504 | Single_elem of { size : int expr; at_most : bool } 505 | Array of int expr 506 507let field_suffix : type a. a typ -> field_suffix * (Format.formatter -> unit) = 508 fun typ -> 509 match typ with 510 | Bits { width; base } -> 511 (Bitwidth width, fun ppf -> pp_bitfield_base ppf base) 512 | Byte_array { size } -> (Byte_array size, fun ppf -> Fmt.string ppf "UINT8") 513 | Single_elem { size; elem; at_most } -> 514 (Single_elem { size; at_most }, fun ppf -> pp_typ ppf elem) 515 | Array { len; elem } -> (Array len, fun ppf -> pp_typ ppf elem) 516 | _ -> (No_suffix, fun ppf -> pp_typ ppf typ) 517 518let pp_field ppf (Field f) = 519 match f.field_name with 520 | Some name -> 521 let suffix, pp_base = field_suffix f.field_typ in 522 Fmt.pf ppf "@,%t %s" pp_base name; 523 (* Print suffix after field name *) 524 (match suffix with 525 | No_suffix -> () 526 | Bitwidth w -> Fmt.pf ppf " : %d" w 527 | Byte_array size -> Fmt.pf ppf "[:byte-size %a]" pp_expr size 528 | Single_elem { size; at_most = false } -> 529 Fmt.pf ppf "[:byte-size-single-element-array %a]" pp_expr size 530 | Single_elem { size; at_most = true } -> 531 Fmt.pf ppf "[:byte-size-single-element-array-at-most %a]" pp_expr size 532 | Array len -> Fmt.pf ppf "[%a]" pp_expr len); 533 Option.iter (Fmt.pf ppf " { %a }" pp_expr) f.constraint_; 534 Option.iter (Fmt.pf ppf " %a" pp_action) f.action; 535 Fmt.string ppf ";" 536 | None -> Fmt.pf ppf "@,%a;" pp_typ f.field_typ 537 538let pp_param ppf p = 539 let (Pack_typ t) = p.param_typ in 540 if p.mutable_ then Fmt.pf ppf "mutable %a *%s" pp_typ t p.param_name 541 else Fmt.pf ppf "%a %s" pp_typ t p.param_name 542 543let pp_params ppf params = 544 if not (List.is_empty params) then 545 Fmt.pf ppf "(%a)" Fmt.(list ~sep:comma pp_param) params 546 547let pp_struct ppf (s : struct_) = 548 Fmt.pf ppf "typedef struct _%s%a" s.name pp_params s.params; 549 Option.iter (Fmt.pf ppf "@,where (%a)" pp_expr) s.where; 550 Fmt.pf ppf "@,{@[<v 2>"; 551 List.iter (pp_field ppf) s.fields; 552 Fmt.pf ppf "@]@,} %s" s.name 553 554let pp_decl ppf = function 555 | Typedef { entrypoint; export; doc; struct_ = st } -> 556 Option.iter (Fmt.pf ppf "/*++ %s --*/@,") doc; 557 if export then Fmt.pf ppf "export@,"; 558 if entrypoint then Fmt.pf ppf "entrypoint@,"; 559 Fmt.pf ppf "%a;@,@," pp_struct st 560 | Define { name; value } -> 561 if value < 0 then Fmt.pf ppf "#define %s (%d)@," name value 562 else Fmt.pf ppf "#define %s 0x%x@," name value 563 | Extern_fn { name; params; ret = Pack_typ ret } -> 564 Fmt.pf ppf "extern %a %s(%a);@,@," pp_typ ret name 565 Fmt.(list ~sep:comma pp_param) 566 params 567 | Extern_probe { init; name } -> 568 if init then Fmt.pf ppf "extern probe (INIT) %s;@,@," name 569 else Fmt.pf ppf "extern probe %s;@,@," name 570 | Enum_decl { name; cases; base = Pack_typ base } -> 571 Fmt.pf ppf "%a enum %s {@[<v 2>" pp_typ base name; 572 List.iteri 573 (fun i (cname, value) -> 574 if not (Int.equal i 0) then Fmt.string ppf ","; 575 Fmt.pf ppf "@,%s = %d" cname value) 576 cases; 577 Fmt.pf ppf "@]@,}@,@," 578 | Casetype_decl { name; params; tag = Pack_typ _; cases } -> 579 (* First param is the switch discriminant *) 580 let disc_name = 581 match params with p :: _ -> p.param_name | [] -> "tag" 582 in 583 (* Internal name has underscore prefix, public name doesn't *) 584 let internal_name, public_name = 585 if String.length name > 0 && name.[0] = '_' then 586 (name, String.sub name 1 (String.length name - 1)) 587 else ("_" ^ name, name) 588 in 589 Fmt.pf ppf "casetype %s%a {@[<v 2>@,switch (%s) {" internal_name pp_params 590 params disc_name; 591 List.iteri 592 (fun i (tag_opt, Pack_typ typ) -> 593 let field_name = Fmt.str "v%d" i in 594 match tag_opt with 595 | Some e -> 596 Fmt.pf ppf "@,case %a: %a %s;" pp_packed_expr e pp_typ typ 597 field_name 598 | None -> Fmt.pf ppf "@,default: %a %s;" pp_typ typ field_name) 599 cases; 600 Fmt.pf ppf "@,}@]@,} %s;@,@," public_name 601 602let pp_module ppf m = 603 Option.iter (Fmt.pf ppf "/*++ %s --*/@,@,") m.doc; 604 List.iter (pp_decl ppf) m.decls 605 606let to_3d m = Fmt.str "@[<v>%a@]" pp_module m 607 608let to_3d_file path m = 609 let oc = open_out path in 610 let ppf = Format.formatter_of_out_channel oc in 611 Fmt.pf ppf "@[<v>%a@]@." pp_module m; 612 close_out oc 613 614(* Binary parsing with bytesrw *) 615 616module Br = Bytesrw.Bytes.Reader 617module Bs = Bytesrw.Bytes.Slice 618 619type parse_error = 620 | Unexpected_eof of { expected : int; got : int } 621 | Constraint_failed of string 622 | Invalid_enum of { value : int; valid : int list } 623 | Invalid_tag of int 624 | All_zeros_failed of { offset : int } 625 626exception Parse_error of parse_error 627 628let raise_eof ~expected ~got = 629 raise (Parse_error (Unexpected_eof { expected; got })) 630 631let pp_parse_error ppf = function 632 | Unexpected_eof { expected; got } -> 633 Fmt.pf ppf "unexpected EOF: expected %d bytes, got %d" expected got 634 | Constraint_failed msg -> Fmt.pf ppf "constraint failed: %s" msg 635 | Invalid_enum { value; valid } -> 636 Fmt.pf ppf "invalid enum value %d, valid: [%a]" value 637 Fmt.(list ~sep:comma int) 638 valid 639 | Invalid_tag tag -> Fmt.pf ppf "invalid tag: %d" tag 640 | All_zeros_failed { offset } -> 641 Fmt.pf ppf "non-zero byte at offset %d" offset 642 643(* Parsing context - tracks field values for dependent types. 644 645 All field values are stored as [int] after conversion via [val_to_int]. 646 This is sound because constraint expressions (the only consumers of 647 context values) operate on integers. *) 648module Ctx = Map.Make (String) 649 650type ctx = int Ctx.t 651 652let empty_ctx = Ctx.empty 653 654(* Convert a typed value to [int] for context storage. All types that 655 appear in constraint expressions are numeric, so this conversion is 656 lossless for practical schemas. Non-numeric types store 0. *) 657let rec val_to_int : type a. a typ -> a -> int = 658 fun typ v -> 659 match typ with 660 | Uint8 -> v 661 | Uint16 _ -> v 662 | Uint32 _ -> UInt32.to_int v 663 | Uint63 _ -> UInt63.to_int v 664 | Uint64 _ -> 665 (* Unsigned interpretation — values >= 2^62 don't fit in OCaml int, 666 return max_int so constraints [value <= K] fail correctly *) 667 Int64.unsigned_to_int v |> Option.value ~default:max_int 668 | Bits _ -> v 669 | Enum { base; _ } -> val_to_int base v 670 | Where { inner; _ } -> val_to_int inner v 671 | Single_elem { elem; _ } -> val_to_int elem v 672 | Apply { typ; _ } -> val_to_int typ v 673 | Map { inner; encode; _ } -> val_to_int inner (encode v) 674 | Unit | All_bytes | All_zeros | Array _ | Byte_array _ | Casetype _ 675 | Struct _ | Type_ref _ | Qualified_ref _ -> 676 0 677 678let ctx_get ctx name = 679 match Ctx.find_opt name ctx with 680 | Some v -> v 681 | None -> failwith ("unbound field: " ^ name) 682 683(* Decoder state - tracks position within slices *) 684type decoder = { 685 reader : Br.t; 686 mutable slice : Bs.t; 687 mutable slice_pos : int; 688 mutable position : int; 689} 690 691let decoder reader = { reader; slice = Bs.eod; slice_pos = 0; position = 0 } 692 693let refill dec = 694 dec.slice <- Br.read dec.reader; 695 dec.slice_pos <- 0 696 697let slice_get_byte slice pos = 698 Bytes.get_uint8 (Bs.bytes slice) (Bs.first slice + pos) 699 700let read_byte dec = 701 if dec.slice_pos >= Bs.length dec.slice then begin 702 refill dec; 703 if Bs.is_eod dec.slice then None 704 else begin 705 let b = slice_get_byte dec.slice dec.slice_pos in 706 dec.slice_pos <- dec.slice_pos + 1; 707 dec.position <- dec.position + 1; 708 Some b 709 end 710 end 711 else begin 712 let b = slice_get_byte dec.slice dec.slice_pos in 713 dec.slice_pos <- dec.slice_pos + 1; 714 dec.position <- dec.position + 1; 715 Some b 716 end 717 718(* Read exactly n bytes *) 719let read_bytes dec n = 720 if n = 0 then Ok Bytes.empty 721 else 722 let buf = Bytes.create n in 723 let rec loop off remaining = 724 if remaining = 0 then Ok buf 725 else begin 726 (* Refill if needed *) 727 if dec.slice_pos >= Bs.length dec.slice then begin 728 refill dec; 729 if Bs.is_eod dec.slice then 730 Error (Unexpected_eof { expected = n; got = off }) 731 else loop off remaining 732 end 733 else 734 let available = Bs.length dec.slice - dec.slice_pos in 735 let to_copy = min available remaining in 736 Bytes.blit (Bs.bytes dec.slice) 737 (Bs.first dec.slice + dec.slice_pos) 738 buf off to_copy; 739 dec.slice_pos <- dec.slice_pos + to_copy; 740 dec.position <- dec.position + to_copy; 741 loop (off + to_copy) (remaining - to_copy) 742 end 743 in 744 loop 0 n 745 746(* Read all remaining bytes *) 747let read_all dec = 748 let buf = Buffer.create 256 in 749 let rec loop () = 750 if dec.slice_pos >= Bs.length dec.slice then begin 751 refill dec; 752 if Bs.is_eod dec.slice then Buffer.contents buf else loop () 753 end 754 else begin 755 let slice_bytes = Bs.bytes dec.slice in 756 let first = Bs.first dec.slice + dec.slice_pos in 757 let len = Bs.length dec.slice - dec.slice_pos in 758 Buffer.add_subbytes buf slice_bytes first len; 759 dec.position <- dec.position + len; 760 dec.slice_pos <- Bs.length dec.slice; 761 loop () 762 end 763 in 764 loop () 765 766(* Evaluate an expression in context *) 767let rec eval_expr : type a. ctx -> a expr -> a = 768 fun ctx expr -> 769 match expr with 770 | Int n -> n 771 | Int64 n -> n 772 | Bool b -> b 773 | Ref name -> ctx_get ctx name 774 | Sizeof _ -> 0 (* TODO: compute actual size *) 775 | Sizeof_this -> 0 776 | Field_pos -> 0 777 | Add (a, b) -> eval_expr ctx a + eval_expr ctx b 778 | Sub (a, b) -> eval_expr ctx a - eval_expr ctx b 779 | Mul (a, b) -> eval_expr ctx a * eval_expr ctx b 780 | Div (a, b) -> eval_expr ctx a / eval_expr ctx b 781 | Mod (a, b) -> eval_expr ctx a mod eval_expr ctx b 782 | Land (a, b) -> eval_expr ctx a land eval_expr ctx b 783 | Lor (a, b) -> eval_expr ctx a lor eval_expr ctx b 784 | Lxor (a, b) -> eval_expr ctx a lxor eval_expr ctx b 785 | Lnot a -> lnot (eval_expr ctx a) 786 | Lsl (a, b) -> eval_expr ctx a lsl eval_expr ctx b 787 | Lsr (a, b) -> eval_expr ctx a lsr eval_expr ctx b 788 | Eq (a, b) -> eval_expr ctx a = eval_expr ctx b 789 | Ne (a, b) -> eval_expr ctx a <> eval_expr ctx b 790 | Lt (a, b) -> eval_expr ctx a < eval_expr ctx b 791 | Le (a, b) -> eval_expr ctx a <= eval_expr ctx b 792 | Gt (a, b) -> eval_expr ctx a > eval_expr ctx b 793 | Ge (a, b) -> eval_expr ctx a >= eval_expr ctx b 794 | And (a, b) -> eval_expr ctx a && eval_expr ctx b 795 | Or (a, b) -> eval_expr ctx a || eval_expr ctx b 796 | Not a -> not (eval_expr ctx a) 797 | Cast (_, e) -> eval_expr ctx e (* TODO: proper casting *) 798 799(* Bitfield accumulator for packed struct parsing. 800 Consecutive bitfields sharing the same base type are packed together. 801 Bits are extracted from MSB to LSB (big-endian style) per EverParse 3D. *) 802type bf_accum = { 803 bf_base : bitfield_base; 804 bf_word : int; 805 bf_bits_used : int; 806 bf_total_bits : int; 807} 808 809let bf_total_bits = function BF_U8 -> 8 | BF_U16 _ -> 16 | BF_U32 _ -> 32 810let bf_base_size = function BF_U8 -> 1 | BF_U16 _ -> 2 | BF_U32 _ -> 4 811 812let bf_compatible base1 base2 = 813 match (base1, base2) with 814 | BF_U8, BF_U8 -> true 815 | BF_U16 e1, BF_U16 e2 -> e1 = e2 816 | BF_U32 e1, BF_U32 e2 -> e1 = e2 817 | _ -> false 818 819let bf_read_word dec base = 820 let size = bf_base_size base in 821 match read_bytes dec size with 822 | Error e -> Error e 823 | Ok buf -> 824 let v = 825 match base with 826 | BF_U8 -> Bytes.get_uint8 buf 0 827 | BF_U16 Little -> Bytes.get_uint16_le buf 0 828 | BF_U16 Big -> Bytes.get_uint16_be buf 0 829 | BF_U32 Little -> Int32.to_int (Bytes.get_int32_le buf 0) 830 | BF_U32 Big -> Int32.to_int (Bytes.get_int32_be buf 0) 831 in 832 Ok v 833 834(* Extract bits from accumulated word (MSB first, big-endian style) *) 835let bf_extract accum width = 836 let shift = accum.bf_total_bits - accum.bf_bits_used - width in 837 let mask = (1 lsl width) - 1 in 838 let value = (accum.bf_word lsr shift) land mask in 839 let new_accum = { accum with bf_bits_used = accum.bf_bits_used + width } in 840 (value, new_accum) 841 842(* Check if accumulator can provide more bits *) 843let bf_has_room accum width = accum.bf_bits_used + width <= accum.bf_total_bits 844 845(* Legacy parse_bits for standalone bitfield parsing (non-struct context) *) 846let parse_bits dec base width = 847 match bf_read_word dec base with 848 | Error e -> Error e 849 | Ok v -> Ok (v land ((1 lsl width) - 1)) 850 851(* Helper: parse fixed-size integer from decoder *) 852let parse_int dec n get ctx = 853 match read_bytes dec n with 854 | Ok buf -> Ok (get buf 0, ctx) 855 | Error e -> Error e 856 857(** Read a bitfield from a decoder, reusing the accumulator when possible. *) 858let parse_bf_field dec (accum_opt : bf_accum option) (base : bitfield_base) 859 width : (int * bf_accum option, parse_error) result = 860 match accum_opt with 861 | Some accum when bf_compatible accum.bf_base base && bf_has_room accum width 862 -> 863 let v, new_accum = bf_extract accum width in 864 let accum_opt' = 865 if new_accum.bf_bits_used = new_accum.bf_total_bits then None 866 else Some new_accum 867 in 868 Ok (v, accum_opt') 869 | _ -> 870 let* word = bf_read_word dec base in 871 let total = bf_total_bits base in 872 let accum = 873 { 874 bf_base = base; 875 bf_word = word; 876 bf_bits_used = 0; 877 bf_total_bits = total; 878 } 879 in 880 let v, new_accum = bf_extract accum width in 881 let accum_opt' = 882 if new_accum.bf_bits_used = new_accum.bf_total_bits then None 883 else Some new_accum 884 in 885 Ok (v, accum_opt') 886 887(** Verify that all bytes in [s] are zero. *) 888let check_all_zeros s = 889 let rec go i = 890 if i >= String.length s then Ok s 891 else if s.[i] <> '\000' then Error (All_zeros_failed { offset = i }) 892 else go (i + 1) 893 in 894 go 0 895 896(* Parse a type from a decoder *) 897let rec parse_with_ctx : type a. 898 ctx -> a typ -> decoder -> (a * ctx, parse_error) result = 899 fun ctx typ dec -> 900 match typ with 901 | Uint8 -> parse_int dec 1 Bytes.get_uint8 ctx 902 | Uint16 Little -> parse_int dec 2 Bytes.get_uint16_le ctx 903 | Uint16 Big -> parse_int dec 2 Bytes.get_uint16_be ctx 904 | Uint32 Little -> parse_int dec 4 UInt32.get_le ctx 905 | Uint32 Big -> parse_int dec 4 UInt32.get_be ctx 906 | Uint63 Little -> parse_int dec 8 UInt63.get_le ctx 907 | Uint63 Big -> parse_int dec 8 UInt63.get_be ctx 908 | Uint64 Little -> parse_int dec 8 Bytes.get_int64_le ctx 909 | Uint64 Big -> parse_int dec 8 Bytes.get_int64_be ctx 910 | Bits { width; base } -> ( 911 match parse_bits dec base width with 912 | Ok v -> Ok (v, ctx) 913 | Error e -> Error e) 914 | Unit -> Ok ((), ctx) 915 | All_bytes -> Ok (read_all dec, ctx) 916 | All_zeros -> 917 let s = read_all dec in 918 check_all_zeros s |> Result.map (fun s -> (s, ctx)) 919 | Where { cond; inner } -> ( 920 match parse_with_ctx ctx inner dec with 921 | Ok (v, ctx') -> 922 if eval_expr ctx' cond then Ok (v, ctx') 923 else Error (Constraint_failed "where clause") 924 | Error e -> Error e) 925 | Array { len; elem } -> 926 let n = eval_expr ctx len in 927 let rec loop acc i ctx' = 928 if i >= n then Ok (List.rev acc, ctx') 929 else 930 match parse_with_ctx ctx' elem dec with 931 | Ok (v, ctx'') -> loop (v :: acc) (i + 1) ctx'' 932 | Error e -> Error e 933 in 934 loop [] 0 ctx 935 | Byte_array { size } -> ( 936 let n = eval_expr ctx size in 937 match read_bytes dec n with 938 | Ok buf -> Ok (Bytes.to_string buf, ctx) 939 | Error e -> Error e) 940 | Single_elem { size = _; elem; at_most = _ } -> parse_with_ctx ctx elem dec 941 | Enum { cases; base; _ } -> ( 942 match parse_with_ctx ctx base dec with 943 | Ok (v, ctx') -> 944 let valid = List.map snd cases in 945 if List.mem v valid then Ok (v, ctx') 946 else Error (Invalid_enum { value = v; valid }) 947 | Error e -> Error e) 948 | Casetype { cases; tag; _ } -> ( 949 match parse_with_ctx ctx tag dec with 950 | Error e -> Error e 951 | Ok (tag_val, ctx') -> 952 let rec find_case = function 953 | [] -> Error (Invalid_tag (val_to_int tag tag_val)) 954 | (Some expected, case_typ) :: rest -> 955 if expected = tag_val then parse_with_ctx ctx' case_typ dec 956 else find_case rest 957 | (None, case_typ) :: _ -> parse_with_ctx ctx' case_typ dec 958 in 959 find_case cases) 960 | Struct { fields; _ } -> 961 let rec go ctx' accum_opt = function 962 | [] -> Ok ((), ctx') 963 | Field { field_name; field_typ = Bits { width; base }; constraint_; _ } 964 :: rest -> ( 965 let* v, accum_opt' = parse_bf_field dec accum_opt base width in 966 let ctx'' = 967 match field_name with Some n -> Ctx.add n v ctx' | None -> ctx' 968 in 969 match constraint_ with 970 | Some cond when not (eval_expr ctx'' cond) -> 971 Error (Constraint_failed "field constraint") 972 | _ -> go ctx'' accum_opt' rest) 973 | Field { field_name; field_typ; constraint_; _ } :: rest -> ( 974 let* v, ctx'' = parse_with_ctx ctx' field_typ dec in 975 let ctx'' = 976 match field_name with 977 | Some n -> Ctx.add n (val_to_int field_typ v) ctx'' 978 | None -> ctx'' 979 in 980 match constraint_ with 981 | Some cond when not (eval_expr ctx'' cond) -> 982 Error (Constraint_failed "field constraint") 983 | _ -> go ctx'' None rest) 984 in 985 go ctx None fields 986 | Map { inner; decode; _ } -> 987 parse_with_ctx ctx inner dec 988 |> Result.map (fun (v, ctx') -> (decode v, ctx')) 989 | Type_ref _ -> failwith "type_ref requires a type registry" 990 | Qualified_ref _ -> failwith "qualified_ref requires a type registry" 991 | Apply _ -> failwith "apply requires a type registry" 992 993let parse typ reader = 994 let dec = decoder reader in 995 match parse_with_ctx empty_ctx typ dec with 996 | Ok (v, _) -> Ok v 997 | Error e -> Error e 998 999let parse_string typ s = 1000 let reader = Br.of_string s in 1001 parse typ reader 1002 1003let parse_bytes typ b = 1004 let reader = Br.of_bytes b in 1005 parse typ reader 1006 1007(* Binary encoding with Bytesrw.Bytes.Writer *) 1008 1009module Bw = Bytesrw.Bytes.Writer 1010 1011(* Encoder state *) 1012type encoder = { writer : Bw.t; buf : bytes } 1013 1014let encoder writer = { writer; buf = Bytes.create 8 } 1015 1016let write_slice enc len = 1017 let slice = Bs.make enc.buf ~first:0 ~length:len in 1018 Bw.write enc.writer slice 1019 1020let write_byte enc b = 1021 Bytes.set_uint8 enc.buf 0 b; 1022 write_slice enc 1 1023 1024let write_uint16_le enc v = 1025 Bytes.set_uint16_le enc.buf 0 v; 1026 write_slice enc 2 1027 1028let write_uint16_be enc v = 1029 Bytes.set_uint16_be enc.buf 0 v; 1030 write_slice enc 2 1031 1032let write_int32_le enc v = 1033 Bytes.set_int32_le enc.buf 0 v; 1034 write_slice enc 4 1035 1036let write_int32_be enc v = 1037 Bytes.set_int32_be enc.buf 0 v; 1038 write_slice enc 4 1039 1040let write_uint32_le enc v = 1041 UInt32.set_le enc.buf 0 v; 1042 write_slice enc 4 1043 1044let write_uint32_be enc v = 1045 UInt32.set_be enc.buf 0 v; 1046 write_slice enc 4 1047 1048let write_int64_le enc v = 1049 Bytes.set_int64_le enc.buf 0 v; 1050 write_slice enc 8 1051 1052let write_int64_be enc v = 1053 Bytes.set_int64_be enc.buf 0 v; 1054 write_slice enc 8 1055 1056let write_uint63_le enc v = 1057 UInt63.set_le enc.buf 0 v; 1058 write_slice enc 8 1059 1060let write_uint63_be enc v = 1061 UInt63.set_be enc.buf 0 v; 1062 write_slice enc 8 1063 1064let write_string enc s = Bw.write_string enc.writer s 1065 1066let rec encode_with_ctx : type a. ctx -> a typ -> a -> encoder -> ctx = 1067 fun ctx typ v enc -> 1068 match typ with 1069 | Uint8 -> 1070 write_byte enc v; 1071 ctx 1072 | Uint16 Little -> 1073 write_uint16_le enc v; 1074 ctx 1075 | Uint16 Big -> 1076 write_uint16_be enc v; 1077 ctx 1078 | Uint32 Little -> 1079 write_uint32_le enc v; 1080 ctx 1081 | Uint32 Big -> 1082 write_uint32_be enc v; 1083 ctx 1084 | Uint63 Little -> 1085 write_uint63_le enc v; 1086 ctx 1087 | Uint63 Big -> 1088 write_uint63_be enc v; 1089 ctx 1090 | Uint64 Little -> 1091 write_int64_le enc v; 1092 ctx 1093 | Uint64 Big -> 1094 write_int64_be enc v; 1095 ctx 1096 | Bits { width; base } -> 1097 let mask = (1 lsl width) - 1 in 1098 let masked = v land mask in 1099 (match base with 1100 | BF_U8 -> write_byte enc masked 1101 | BF_U16 Little -> write_uint16_le enc masked 1102 | BF_U16 Big -> write_uint16_be enc masked 1103 | BF_U32 Little -> write_int32_le enc (Int32.of_int masked) 1104 | BF_U32 Big -> write_int32_be enc (Int32.of_int masked)); 1105 ctx 1106 | Unit -> ctx 1107 | All_bytes -> 1108 write_string enc v; 1109 ctx 1110 | All_zeros -> 1111 write_string enc v; 1112 ctx 1113 | Where { inner; _ } -> encode_with_ctx ctx inner v enc 1114 | Array { elem; _ } -> 1115 List.fold_left 1116 (fun ctx' elem_v -> encode_with_ctx ctx' elem elem_v enc) 1117 ctx v 1118 | Byte_array _ -> 1119 write_string enc v; 1120 ctx 1121 | Single_elem { elem; _ } -> encode_with_ctx ctx elem v enc 1122 | Enum { base; _ } -> encode_with_ctx ctx base v enc 1123 | Map { inner; encode; _ } -> encode_with_ctx ctx inner (encode v) enc 1124 | Casetype _ -> failwith "casetype encoding: use Record module" 1125 | Struct _ -> failwith "struct encoding: use Record module" 1126 | Type_ref _ -> failwith "type_ref requires a type registry" 1127 | Qualified_ref _ -> failwith "qualified_ref requires a type registry" 1128 | Apply _ -> failwith "apply requires a type registry" 1129 1130let encode typ v writer = 1131 let enc = encoder writer in 1132 ignore (encode_with_ctx empty_ctx typ v enc) 1133 1134let encode_to_bytes typ v = 1135 let buf = Buffer.create 64 in 1136 let writer = Bw.of_buffer buf in 1137 encode typ v writer; 1138 Buffer.to_bytes buf 1139 1140let encode_to_string typ v = 1141 let buf = Buffer.create 64 in 1142 let writer = Bw.of_buffer buf in 1143 encode typ v writer; 1144 Buffer.contents buf 1145 1146(* ==================== Typed Record DSL (ctypes-like) ==================== *) 1147 1148type ('a, 'r) field_codec = { 1149 name : string; 1150 constraint_ : bool expr option; 1151 typ : 'a typ; 1152 get : 'r -> 'a; 1153 set : 'a -> 'r -> 'r; 1154} 1155(** A field codec for field of type ['a] in record of type ['r] *) 1156 1157(** Specialized encode/decode functions built at codec construction time. This 1158 avoids interpretation overhead by generating direct operations on 1159 Bytesrw.Slice, similar to hand-written codecs but integrated with the 1160 streaming API. *) 1161 1162type 'r record_codec = { 1163 record_name : string; 1164 fields : 'r field_codec_packed list; 1165 default : 'r; 1166 wire_size : int option; (* Pre-computed fixed wire size, None if variable *) 1167} 1168(** A record codec for type ['r]. Contains only the schema description. 1169 Specialized encode/decode functions are built by 1170 [Record.encode]/[Record.decode]. *) 1171 1172and 'r field_codec_packed = 1173 | Field_codec : ('a, 'r) field_codec -> 'r field_codec_packed 1174 1175(** Compute wire size of a field codec's type *) 1176let rec field_wire_size : type a. a typ -> int option = function 1177 | Uint8 -> Some 1 1178 | Uint16 _ -> Some 2 1179 | Uint32 _ -> Some 4 1180 | Uint64 _ -> Some 8 1181 | Bits { base; _ } -> ( 1182 match base with 1183 | BF_U8 -> Some 1 1184 | BF_U16 _ -> Some 2 1185 | BF_U32 _ -> Some 4) 1186 | Unit -> Some 0 1187 | Byte_array { size = Int n } -> Some n 1188 | Where { inner; _ } -> field_wire_size inner 1189 | Enum { base; _ } -> field_wire_size base 1190 | Map { inner; _ } -> field_wire_size inner 1191 | _ -> None 1192 1193(** Build a specialized field encoder: writes field value to bytes at offset. 1194 Returns the new offset. Works directly on the slice's underlying bytes. *) 1195let rec build_field_encoder : type a. a typ -> bytes -> int -> a -> int = 1196 fun typ -> 1197 match typ with 1198 | Uint8 -> 1199 fun buf off v -> 1200 Bytes.set_uint8 buf off v; 1201 off + 1 1202 | Uint16 Little -> 1203 fun buf off v -> 1204 Bytes.set_uint16_le buf off v; 1205 off + 2 1206 | Uint16 Big -> 1207 fun buf off v -> 1208 Bytes.set_uint16_be buf off v; 1209 off + 2 1210 | Uint32 Little -> 1211 fun buf off v -> 1212 UInt32.set_le buf off v; 1213 off + 4 1214 | Uint32 Big -> 1215 fun buf off v -> 1216 UInt32.set_be buf off v; 1217 off + 4 1218 | Uint63 Little -> 1219 fun buf off v -> 1220 UInt63.set_le buf off v; 1221 off + 8 1222 | Uint63 Big -> 1223 fun buf off v -> 1224 UInt63.set_be buf off v; 1225 off + 8 1226 | Uint64 Little -> 1227 fun buf off v -> 1228 Bytes.set_int64_le buf off v; 1229 off + 8 1230 | Uint64 Big -> 1231 fun buf off v -> 1232 Bytes.set_int64_be buf off v; 1233 off + 8 1234 | Byte_array { size = Int n } -> 1235 fun buf off v -> 1236 let len = min n (String.length v) in 1237 Bytes.blit_string v 0 buf off len; 1238 if len < n then Bytes.fill buf (off + len) (n - len) '\x00'; 1239 off + n 1240 | Where { inner; _ } -> build_field_encoder inner 1241 | Enum { base; _ } -> build_field_encoder base 1242 | Map { inner; encode; _ } -> 1243 let enc = build_field_encoder inner in 1244 fun buf off v -> enc buf off (encode v) 1245 | Unit -> fun _buf off () -> off 1246 | _ -> 1247 (* Fallback for complex types - not specialized *) 1248 fun _buf _off _v -> failwith "build_field_encoder: unsupported type" 1249 1250(** Build a specialized field decoder: reads field value from slice's bytes. 1251 Takes the slice's bytes, first offset, and field offset within the record. 1252 Returns the value and new field offset. *) 1253let rec build_field_decoder : type a. a typ -> bytes -> int -> int -> a * int = 1254 fun typ -> 1255 match typ with 1256 | Uint8 -> fun buf base off -> (Bytes.get_uint8 buf (base + off), off + 1) 1257 | Uint16 Little -> 1258 fun buf base off -> (Bytes.get_uint16_le buf (base + off), off + 2) 1259 | Uint16 Big -> 1260 fun buf base off -> (Bytes.get_uint16_be buf (base + off), off + 2) 1261 | Uint32 Little -> 1262 fun buf base off -> (UInt32.get_le buf (base + off), off + 4) 1263 | Uint32 Big -> fun buf base off -> (UInt32.get_be buf (base + off), off + 4) 1264 | Uint63 Little -> 1265 fun buf base off -> (UInt63.get_le buf (base + off), off + 8) 1266 | Uint63 Big -> fun buf base off -> (UInt63.get_be buf (base + off), off + 8) 1267 | Uint64 Little -> 1268 fun buf base off -> (Bytes.get_int64_le buf (base + off), off + 8) 1269 | Uint64 Big -> 1270 fun buf base off -> (Bytes.get_int64_be buf (base + off), off + 8) 1271 | Byte_array { size = Int n } -> 1272 fun buf base off -> (Bytes.sub_string buf (base + off) n, off + n) 1273 | Where { inner; _ } -> build_field_decoder inner 1274 | Enum { base; _ } -> build_field_decoder base 1275 | Map { inner; decode; _ } -> 1276 let dec = build_field_decoder inner in 1277 fun buf base off -> 1278 let v, off' = dec buf base off in 1279 (decode v, off') 1280 | Unit -> fun _buf _base off -> ((), off) 1281 | _ -> 1282 (* Fallback for complex types *) 1283 fun _buf _base _off -> failwith "build_field_decoder: unsupported type" 1284 1285(** Build a mutable-offset field decoder (avoids tuple allocation). Takes bytes, 1286 base, mutable offset ref. Returns value, mutates offset. *) 1287let rec build_field_decoder_mut : type a. a typ -> bytes -> int -> int ref -> a 1288 = 1289 fun typ -> 1290 match typ with 1291 | Uint8 -> 1292 fun buf base off -> 1293 let v = Bytes.get_uint8 buf (base + !off) in 1294 off := !off + 1; 1295 v 1296 | Uint16 Little -> 1297 fun buf base off -> 1298 let v = Bytes.get_uint16_le buf (base + !off) in 1299 off := !off + 2; 1300 v 1301 | Uint16 Big -> 1302 fun buf base off -> 1303 let v = Bytes.get_uint16_be buf (base + !off) in 1304 off := !off + 2; 1305 v 1306 | Uint32 Little -> 1307 fun buf base off -> 1308 let v = UInt32.get_le buf (base + !off) in 1309 off := !off + 4; 1310 v 1311 | Uint32 Big -> 1312 fun buf base off -> 1313 let v = UInt32.get_be buf (base + !off) in 1314 off := !off + 4; 1315 v 1316 | Uint63 Little -> 1317 fun buf base off -> 1318 let v = UInt63.get_le buf (base + !off) in 1319 off := !off + 8; 1320 v 1321 | Uint63 Big -> 1322 fun buf base off -> 1323 let v = UInt63.get_be buf (base + !off) in 1324 off := !off + 8; 1325 v 1326 | Uint64 Little -> 1327 fun buf base off -> 1328 let v = Bytes.get_int64_le buf (base + !off) in 1329 off := !off + 8; 1330 v 1331 | Uint64 Big -> 1332 fun buf base off -> 1333 let v = Bytes.get_int64_be buf (base + !off) in 1334 off := !off + 8; 1335 v 1336 | Byte_array { size = Int n } -> 1337 fun buf base off -> 1338 let v = Bytes.sub_string buf (base + !off) n in 1339 off := !off + n; 1340 v 1341 | Where { inner; _ } -> build_field_decoder_mut inner 1342 | Enum { base; _ } -> build_field_decoder_mut base 1343 | Map { inner; decode; _ } -> 1344 let dec = build_field_decoder_mut inner in 1345 fun buf base off -> decode (dec buf base off) 1346 | Unit -> fun _buf _base _off -> () 1347 | _ -> 1348 fun _buf _base _off -> 1349 failwith "build_field_decoder_mut: unsupported type" 1350 1351(** CPS-style field decoder: threads constructor through decode chain. This is 1352 the repr pattern that avoids intermediate record allocations. Type: bytes -> 1353 int -> int ref -> ('a -> 'b) -> 'b *) 1354let rec build_field_decoder_cps : type a. 1355 a typ -> bytes -> int -> int ref -> (a -> 'k) -> 'k = 1356 fun typ -> 1357 match typ with 1358 | Uint8 -> 1359 fun buf base off k -> 1360 let v = Bytes.get_uint8 buf (base + !off) in 1361 off := !off + 1; 1362 k v 1363 | Uint16 Little -> 1364 fun buf base off k -> 1365 let v = Bytes.get_uint16_le buf (base + !off) in 1366 off := !off + 2; 1367 k v 1368 | Uint16 Big -> 1369 fun buf base off k -> 1370 let v = Bytes.get_uint16_be buf (base + !off) in 1371 off := !off + 2; 1372 k v 1373 | Uint32 Little -> 1374 fun buf base off k -> 1375 let v = UInt32.get_le buf (base + !off) in 1376 off := !off + 4; 1377 k v 1378 | Uint32 Big -> 1379 fun buf base off k -> 1380 let v = UInt32.get_be buf (base + !off) in 1381 off := !off + 4; 1382 k v 1383 | Uint63 Little -> 1384 fun buf base off k -> 1385 let v = UInt63.get_le buf (base + !off) in 1386 off := !off + 8; 1387 k v 1388 | Uint63 Big -> 1389 fun buf base off k -> 1390 let v = UInt63.get_be buf (base + !off) in 1391 off := !off + 8; 1392 k v 1393 | Uint64 Little -> 1394 fun buf base off k -> 1395 let v = Bytes.get_int64_le buf (base + !off) in 1396 off := !off + 8; 1397 k v 1398 | Uint64 Big -> 1399 fun buf base off k -> 1400 let v = Bytes.get_int64_be buf (base + !off) in 1401 off := !off + 8; 1402 k v 1403 | Byte_array { size = Int n } -> 1404 fun buf base off k -> 1405 let v = Bytes.sub_string buf (base + !off) n in 1406 off := !off + n; 1407 k v 1408 | Where { inner; _ } -> build_field_decoder_cps inner 1409 | Enum { base; _ } -> build_field_decoder_cps base 1410 | Map { inner; decode; _ } -> 1411 let dec = build_field_decoder_cps inner in 1412 fun buf base off k -> dec buf base off (fun v -> k (decode v)) 1413 | Unit -> fun _buf _base _off k -> k () 1414 | _ -> 1415 fun _buf _base _off _k -> 1416 failwith "build_field_decoder_cps: unsupported type" 1417 1418(** Create a field codec *) 1419let field_codec name ?constraint_ typ ~get ~set = 1420 { name; constraint_; typ; get; set } 1421 1422(** Create a record codec schema. This is just data - no specialization yet. 1423 Specialization happens when you call [Record.encode]/[Record.decode]. *) 1424let record_codec name ~default fields = 1425 let wire_size = 1426 List.fold_left 1427 (fun acc (Field_codec fc) -> 1428 match (acc, field_wire_size fc.typ) with 1429 | Some a, Some b -> Some (a + b) 1430 | _ -> None) 1431 (Some 0) fields 1432 in 1433 { record_name = name; fields; default; wire_size } 1434 1435(** [record_wire_size codec] returns the fixed wire size of the codec, or [None] 1436 if the codec has variable-length fields. Callers should check buffer length 1437 before calling decode to avoid index-out-of-bounds errors. *) 1438let record_wire_size codec = codec.wire_size 1439 1440(** Pack a field codec for storage in record *) 1441let pack_field fc = Field_codec fc 1442 1443(* Bitfield encoder accumulator *) 1444type bf_enc_accum = { 1445 bfe_base : bitfield_base; 1446 bfe_word : int; 1447 bfe_bits_used : int; 1448 bfe_total_bits : int; 1449} 1450 1451let bf_write_word enc base word = 1452 match base with 1453 | BF_U8 -> write_byte enc word 1454 | BF_U16 Little -> write_uint16_le enc word 1455 | BF_U16 Big -> write_uint16_be enc word 1456 | BF_U32 Little -> write_int32_le enc (Int32.of_int word) 1457 | BF_U32 Big -> write_int32_be enc (Int32.of_int word) 1458 1459(* Insert bits into accumulator at current position (MSB first) *) 1460let bf_insert accum width value = 1461 let shift = accum.bfe_total_bits - accum.bfe_bits_used - width in 1462 let mask = (1 lsl width) - 1 in 1463 let masked = value land mask in 1464 let word' = accum.bfe_word lor (masked lsl shift) in 1465 { accum with bfe_word = word'; bfe_bits_used = accum.bfe_bits_used + width } 1466 1467(** Insert a bitfield value into the accumulator, flushing and resetting if the 1468 base type changed or there is no room. *) 1469let encode_bf_accum enc flush_accum accum_opt base width field_val = 1470 let accum_opt' = 1471 match accum_opt with 1472 | Some accum 1473 when bf_compatible accum.bfe_base base 1474 && accum.bfe_bits_used + width <= accum.bfe_total_bits -> 1475 Some (bf_insert accum width field_val) 1476 | _ -> 1477 flush_accum accum_opt; 1478 let total = bf_total_bits base in 1479 let accum = 1480 { 1481 bfe_base = base; 1482 bfe_word = 0; 1483 bfe_bits_used = 0; 1484 bfe_total_bits = total; 1485 } 1486 in 1487 Some (bf_insert accum width field_val) 1488 in 1489 (* Flush accumulator if full *) 1490 match accum_opt' with 1491 | Some a when a.bfe_bits_used = a.bfe_total_bits -> 1492 bf_write_word enc a.bfe_base a.bfe_word; 1493 None 1494 | other -> other 1495 1496(** Encode a record value to a writer with bitfield packing *) 1497let encode_record : type r. 1498 r record_codec -> r -> Bw.t -> (unit, parse_error) result = 1499 fun codec v writer -> 1500 let enc = encoder writer in 1501 let flush_accum = function 1502 | None -> () 1503 | Some accum -> bf_write_word enc accum.bfe_base accum.bfe_word 1504 in 1505 let rec encode_fields (ctx : int Ctx.t) accum_opt = function 1506 | [] -> 1507 flush_accum accum_opt; 1508 Ok () 1509 | Field_codec fc :: rest -> ( 1510 let field_val = fc.get v in 1511 match fc.typ with 1512 | Bits { width; base } -> ( 1513 let accum_opt' = 1514 encode_bf_accum enc flush_accum accum_opt base width field_val 1515 in 1516 let ctx' = Ctx.add fc.name field_val ctx in 1517 match fc.constraint_ with 1518 | Some cond when not (eval_expr ctx' cond) -> 1519 Error (Constraint_failed "field constraint") 1520 | _ -> encode_fields ctx' accum_opt' rest) 1521 | _ -> ( 1522 flush_accum accum_opt; 1523 let ctx' = encode_with_ctx ctx fc.typ field_val enc in 1524 let ctx'' = Ctx.add fc.name (val_to_int fc.typ field_val) ctx' in 1525 match fc.constraint_ with 1526 | Some cond when not (eval_expr ctx'' cond) -> 1527 Error (Constraint_failed "field constraint") 1528 | _ -> encode_fields ctx'' None rest)) 1529 in 1530 encode_fields empty_ctx None codec.fields 1531 1532(** Build a staged record encoder: returns a slice with encoded data. Following 1533 repr's pattern: iteration over fields happens once at staging time, building 1534 closures that are fast to execute. 1535 1536 WARNING: The returned slice's underlying buffer may be reused between calls. 1537 Copy the slice data before the next encode if you need to keep it. *) 1538let encode_record_to_slice : type r. r record_codec -> (r -> Bs.t) Staged.t = 1539 fun codec -> 1540 match codec.wire_size with 1541 | Some wire_size -> 1542 (* Build field encoders at staging time - this is the repr pattern *) 1543 let buf = Bytes.create wire_size in 1544 let field_encoders = 1545 List.filter_map 1546 (fun (Field_codec fc) -> 1547 match field_wire_size fc.typ with 1548 | Some _ -> 1549 let encoder = build_field_encoder fc.typ in 1550 Some (fun b off v -> encoder b off (fc.get v)) 1551 | None -> None) 1552 codec.fields 1553 in 1554 if List.length field_encoders <> List.length codec.fields then 1555 (* Not all fields can be specialized - return empty slice *) 1556 Staged.stage (fun _v -> Bs.eod) 1557 else 1558 (* All fields specialized - this closure captures pre-built encoders *) 1559 Staged.stage (fun v -> 1560 let _ = 1561 List.fold_left (fun off enc -> enc buf off v) 0 field_encoders 1562 in 1563 Bs.make buf ~first:0 ~length:wire_size) 1564 | None -> 1565 (* Variable-size: can't return slice *) 1566 Staged.stage (fun _v -> Bs.eod) 1567 1568(** Build a staged record decoder: reads from a slice. Following repr's pattern: 1569 iteration over fields happens once at staging time, building closures that 1570 are fast to execute. *) 1571let decode_record_from_slice : type r. r record_codec -> (Bs.t -> r) Staged.t = 1572 fun codec -> 1573 (* Build field decoders using mutable offset to avoid tuple allocation *) 1574 let field_decoders = 1575 List.filter_map 1576 (fun (Field_codec fc) -> 1577 match field_wire_size fc.typ with 1578 | Some _ -> 1579 let decoder = build_field_decoder_mut fc.typ in 1580 let set = fc.set in 1581 Some (fun buf base off acc -> set (decoder buf base off) acc) 1582 | None -> None) 1583 codec.fields 1584 in 1585 let default = codec.default in 1586 if List.length field_decoders <> List.length codec.fields then 1587 Staged.stage (fun _slice -> default) 1588 else 1589 (* Convert to array for faster iteration *) 1590 let decoders = Array.of_list field_decoders in 1591 let n = Array.length decoders in 1592 Staged.stage (fun slice -> 1593 let buf = Bs.bytes slice in 1594 let base = Bs.first slice in 1595 let off = Stdlib.ref 0 in 1596 let acc = Stdlib.ref default in 1597 for i = 0 to n - 1 do 1598 acc := decoders.(i) buf base off !acc 1599 done; 1600 !acc) 1601 1602(** {2 Zero-allocation encode/decode} 1603 1604 These functions provide direct bytes access without slice allocation. Use 1605 when you need maximum performance and can manage buffer lifetime. *) 1606 1607type 'r encode_context = { 1608 buffer : bytes; 1609 wire_size : int; 1610 encode : 'r -> unit; 1611} 1612(** Context for zero-allocation encoding. The [buffer] is shared and reused 1613 between calls to [encode]. Copy the bytes before the next [encode] if you 1614 need to keep them. *) 1615 1616let encode_record_to_bytes : type r. r record_codec -> r encode_context option = 1617 fun codec -> 1618 match codec.wire_size with 1619 | Some wire_size -> 1620 let buf = Bytes.create wire_size in 1621 (* Build field encoders at staging time - each captures its getter *) 1622 let field_encoders = 1623 List.filter_map 1624 (fun (Field_codec fc) -> 1625 match field_wire_size fc.typ with 1626 | Some _ -> 1627 let encoder = build_field_encoder fc.typ in 1628 let get = fc.get in 1629 (* This function is built once, captures encoder and get *) 1630 Some (fun v b off -> encoder b off (get v)) 1631 | None -> None) 1632 codec.fields 1633 in 1634 if List.length field_encoders <> List.length codec.fields then None 1635 else 1636 (* Convert to array for faster iteration without closure allocation *) 1637 let encoders = Array.of_list field_encoders in 1638 let n = Array.length encoders in 1639 let encode v = 1640 let off = Stdlib.ref 0 in 1641 for i = 0 to n - 1 do 1642 off := encoders.(i) v buf !off 1643 done 1644 in 1645 Some { buffer = buf; wire_size; encode } 1646 | None -> None 1647 1648(** Build a direct field reader that reads at a fixed offset. No tuples, no refs 1649 \- just pure value read. *) 1650let rec build_field_reader : type a. a typ -> int -> bytes -> int -> a = 1651 fun typ field_off -> 1652 match typ with 1653 | Uint8 -> fun buf base -> Bytes.get_uint8 buf (base + field_off) 1654 | Uint16 Little -> fun buf base -> Bytes.get_uint16_le buf (base + field_off) 1655 | Uint16 Big -> fun buf base -> Bytes.get_uint16_be buf (base + field_off) 1656 | Uint32 Little -> fun buf base -> UInt32.get_le buf (base + field_off) 1657 | Uint32 Big -> fun buf base -> UInt32.get_be buf (base + field_off) 1658 | Uint63 Little -> fun buf base -> UInt63.get_le buf (base + field_off) 1659 | Uint63 Big -> fun buf base -> UInt63.get_be buf (base + field_off) 1660 | Uint64 Little -> fun buf base -> Bytes.get_int64_le buf (base + field_off) 1661 | Uint64 Big -> fun buf base -> Bytes.get_int64_be buf (base + field_off) 1662 | Byte_array { size = Int n } -> 1663 fun buf base -> Bytes.sub_string buf (base + field_off) n 1664 | Where { inner; _ } -> build_field_reader inner field_off 1665 | Enum { base; _ } -> build_field_reader base field_off 1666 | Map { inner; decode; _ } -> 1667 let read = build_field_reader inner field_off in 1668 fun buf base -> decode (read buf base) 1669 | Unit -> fun _buf _base -> () 1670 | _ -> fun _buf _base -> failwith "build_field_reader: unsupported type" 1671 1672let decode_record_from_bytes : type r. 1673 r record_codec -> (bytes -> int -> r) Staged.t = 1674 fun codec -> 1675 (* Precompute field offsets and build readers at staging time *) 1676 let field_info = 1677 let current_off = Stdlib.ref 0 in 1678 List.filter_map 1679 (fun (Field_codec fc) -> 1680 match field_wire_size fc.typ with 1681 | Some size -> 1682 let off = !current_off in 1683 current_off := !current_off + size; 1684 (* Build a direct reader at this fixed offset *) 1685 let reader = build_field_reader fc.typ off in 1686 let set = fc.set in 1687 Some (fun buf base acc -> set (reader buf base) acc) 1688 | None -> None) 1689 codec.fields 1690 in 1691 let default = codec.default in 1692 if List.length field_info <> List.length codec.fields then 1693 Staged.stage (fun _buf _base -> default) 1694 else 1695 (* Convert to array for iteration *) 1696 let decoders = Array.of_list field_info in 1697 let n = Array.length decoders in 1698 Staged.stage (fun buf base -> 1699 let acc = Stdlib.ref default in 1700 for i = 0 to n - 1 do 1701 acc := decoders.(i) buf base !acc 1702 done; 1703 !acc) 1704 1705(** {2 Zero-alloc decode with explicit types} 1706 1707 For truly zero intermediate allocations, use these typed decode functions 1708 that build readers at staging time and call a make function directly. Only 1709 the final record is allocated - no intermediate records or refs. *) 1710 1711(** Build zero-alloc decoder for 1-field record *) 1712let decode_make1 : type a1 r. 1713 a1 typ -> make:(a1 -> r) -> (bytes -> int -> r) Staged.t = 1714 fun t1 ~make -> 1715 let read1 = build_field_reader t1 0 in 1716 Staged.stage (fun buf base -> make (read1 buf base)) 1717 1718(** Build zero-alloc decoder for 2-field record *) 1719let decode_make2 : type a1 a2 r. 1720 a1 typ -> a2 typ -> make:(a1 -> a2 -> r) -> (bytes -> int -> r) Staged.t = 1721 fun t1 t2 ~make -> 1722 match (field_wire_size t1, field_wire_size t2) with 1723 | Some s1, Some _ -> 1724 let read1 = build_field_reader t1 0 in 1725 let read2 = build_field_reader t2 s1 in 1726 Staged.stage (fun buf base -> make (read1 buf base) (read2 buf base)) 1727 | _ -> failwith "decode_make2: variable-size fields not supported" 1728 1729(** Build zero-alloc decoder for 3-field record *) 1730let decode_make3 : type a1 a2 a3 r. 1731 a1 typ -> 1732 a2 typ -> 1733 a3 typ -> 1734 make:(a1 -> a2 -> a3 -> r) -> 1735 (bytes -> int -> r) Staged.t = 1736 fun t1 t2 t3 ~make -> 1737 match (field_wire_size t1, field_wire_size t2, field_wire_size t3) with 1738 | Some s1, Some s2, Some _ -> 1739 let read1 = build_field_reader t1 0 in 1740 let read2 = build_field_reader t2 s1 in 1741 let read3 = build_field_reader t3 (s1 + s2) in 1742 Staged.stage (fun buf base -> 1743 make (read1 buf base) (read2 buf base) (read3 buf base)) 1744 | _ -> failwith "decode_make3: variable-size fields not supported" 1745 1746(** Build zero-alloc decoder for 4-field record *) 1747let decode_make4 : type a1 a2 a3 a4 r. 1748 a1 typ -> 1749 a2 typ -> 1750 a3 typ -> 1751 a4 typ -> 1752 make:(a1 -> a2 -> a3 -> a4 -> r) -> 1753 (bytes -> int -> r) Staged.t = 1754 fun t1 t2 t3 t4 ~make -> 1755 match 1756 ( field_wire_size t1, 1757 field_wire_size t2, 1758 field_wire_size t3, 1759 field_wire_size t4 ) 1760 with 1761 | Some s1, Some s2, Some s3, Some _ -> 1762 let read1 = build_field_reader t1 0 in 1763 let read2 = build_field_reader t2 s1 in 1764 let read3 = build_field_reader t3 (s1 + s2) in 1765 let read4 = build_field_reader t4 (s1 + s2 + s3) in 1766 Staged.stage (fun buf base -> 1767 make (read1 buf base) (read2 buf base) (read3 buf base) 1768 (read4 buf base)) 1769 | _ -> failwith "decode_make4: variable-size fields not supported" 1770 1771(** {2 Bounds-checked decode with exceptions} 1772 1773 Same as decode_make* but with bounds checking that raises Parse_error. *) 1774 1775(** Build bounds-checked decoder for 1-field record *) 1776let decode_make1_exn : type a1 r. 1777 a1 typ -> make:(a1 -> r) -> (bytes -> int -> r) Staged.t = 1778 fun t1 ~make -> 1779 match field_wire_size t1 with 1780 | Some total_size -> 1781 let read1 = build_field_reader t1 0 in 1782 Staged.stage (fun buf base -> 1783 let len = Bytes.length buf in 1784 if base + total_size > len then 1785 raise_eof ~expected:total_size ~got:(len - base); 1786 make (read1 buf base)) 1787 | None -> failwith "decode_make1_exn: variable-size fields not supported" 1788 1789(** Build bounds-checked decoder for 2-field record *) 1790let decode_make2_exn : type a1 a2 r. 1791 a1 typ -> a2 typ -> make:(a1 -> a2 -> r) -> (bytes -> int -> r) Staged.t = 1792 fun t1 t2 ~make -> 1793 match (field_wire_size t1, field_wire_size t2) with 1794 | Some s1, Some s2 -> 1795 let total_size = s1 + s2 in 1796 let read1 = build_field_reader t1 0 in 1797 let read2 = build_field_reader t2 s1 in 1798 Staged.stage (fun buf base -> 1799 let len = Bytes.length buf in 1800 if base + total_size > len then 1801 raise_eof ~expected:total_size ~got:(len - base); 1802 make (read1 buf base) (read2 buf base)) 1803 | _ -> failwith "decode_make2_exn: variable-size fields not supported" 1804 1805(** Build bounds-checked decoder for 3-field record *) 1806let decode_make3_exn : type a1 a2 a3 r. 1807 a1 typ -> 1808 a2 typ -> 1809 a3 typ -> 1810 make:(a1 -> a2 -> a3 -> r) -> 1811 (bytes -> int -> r) Staged.t = 1812 fun t1 t2 t3 ~make -> 1813 match (field_wire_size t1, field_wire_size t2, field_wire_size t3) with 1814 | Some s1, Some s2, Some s3 -> 1815 let total_size = s1 + s2 + s3 in 1816 let read1 = build_field_reader t1 0 in 1817 let read2 = build_field_reader t2 s1 in 1818 let read3 = build_field_reader t3 (s1 + s2) in 1819 Staged.stage (fun buf base -> 1820 let len = Bytes.length buf in 1821 if base + total_size > len then 1822 raise_eof ~expected:total_size ~got:(len - base); 1823 make (read1 buf base) (read2 buf base) (read3 buf base)) 1824 | _ -> failwith "decode_make3_exn: variable-size fields not supported" 1825 1826(** Build bounds-checked decoder for 4-field record *) 1827let decode_make4_exn : type a1 a2 a3 a4 r. 1828 a1 typ -> 1829 a2 typ -> 1830 a3 typ -> 1831 a4 typ -> 1832 make:(a1 -> a2 -> a3 -> a4 -> r) -> 1833 (bytes -> int -> r) Staged.t = 1834 fun t1 t2 t3 t4 ~make -> 1835 match 1836 ( field_wire_size t1, 1837 field_wire_size t2, 1838 field_wire_size t3, 1839 field_wire_size t4 ) 1840 with 1841 | Some s1, Some s2, Some s3, Some s4 -> 1842 let total_size = s1 + s2 + s3 + s4 in 1843 let read1 = build_field_reader t1 0 in 1844 let read2 = build_field_reader t2 s1 in 1845 let read3 = build_field_reader t3 (s1 + s2) in 1846 let read4 = build_field_reader t4 (s1 + s2 + s3) in 1847 Staged.stage (fun buf base -> 1848 let len = Bytes.length buf in 1849 if base + total_size > len then 1850 raise_eof ~expected:total_size ~got:(len - base); 1851 make (read1 buf base) (read2 buf base) (read3 buf base) 1852 (read4 buf base)) 1853 | _ -> failwith "decode_make4_exn: variable-size fields not supported" 1854 1855(** Convert record codec to struct_ for 3D generation *) 1856let record_to_struct codec = 1857 let fields = 1858 List.map 1859 (fun (Field_codec fc) -> field fc.name ?constraint_:fc.constraint_ fc.typ) 1860 codec.fields 1861 in 1862 struct_ codec.record_name fields 1863 1864(* ==================== EverParse FFI Helpers ==================== *) 1865 1866(* NOTE: wire does NOT generate C parsing code. C parsers come from EverParse. 1867 This section provides helpers for generating OCaml FFI stubs that call 1868 EverParse-generated C code. 1869 1870 Workflow: 1871 1. Define schema in OCaml using wire 1872 2. Generate .3d file with to_3d 1873 3. Run EverParse to generate C parser (.h with struct + read/write) 1874 4. Use to_c_stubs to generate OCaml FFI bindings to call EverParse C *) 1875 1876(** Compute the fixed wire size of a struct (None if variable-length) *) 1877let rec size_of_typ : type a. a typ -> int option = function 1878 | Uint8 -> Some 1 1879 | Uint16 _ -> Some 2 1880 | Uint32 _ -> Some 4 1881 | Uint64 _ -> Some 8 1882 | Byte_array { size = Int n } -> Some n 1883 | Enum { base; _ } -> wire_size_of_int_typ base 1884 | Where { inner; _ } -> size_of_typ inner 1885 | Map { inner; _ } -> size_of_typ inner 1886 | _ -> None 1887 1888and wire_size_of_int_typ : int typ -> int option = function 1889 | Uint8 -> Some 1 1890 | Uint16 _ -> Some 2 1891 | Enum { base; _ } -> wire_size_of_int_typ base 1892 | Where { inner; _ } -> size_base inner 1893 | _ -> None 1894 1895and size_base : type a. a typ -> int option = function 1896 | Uint8 -> Some 1 1897 | Uint16 _ -> Some 2 1898 | _ -> None 1899 1900let size_of_struct (s : struct_) = 1901 List.fold_left 1902 (fun acc (Field f) -> 1903 match (acc, size_of_typ f.field_typ) with 1904 | Some a, Some b -> Some (a + b) 1905 | _ -> None) 1906 (Some 0) s.fields 1907 1908(** OCaml type name for a wire type (for generated external declarations). *) 1909let rec ml_type_of : type a. a typ -> string = function 1910 | Uint8 -> "int" 1911 | Uint16 _ -> "int" 1912 | Uint32 _ -> "int32" 1913 | Uint64 _ -> "int64" 1914 | Enum { base; _ } -> ml_type_of_int base 1915 | Where { inner; _ } -> ml_type_of inner 1916 | Map { inner; _ } -> ml_type_of inner 1917 | _ -> failwith "ml_type_of: unsupported type" 1918 1919and ml_type_of_int : int typ -> string = function 1920 | Uint8 -> "int" 1921 | Uint16 _ -> "int" 1922 | Enum { base; _ } -> ml_type_of_int base 1923 | Where { inner; _ } -> ml_type_of_int inner 1924 | Map { inner; _ } -> ml_type_of inner 1925 | _ -> failwith "ml_type_of_int: unsupported type" 1926 1927(** C expression to store a C struct field into an OCaml value. *) 1928let c_to_ml : type a. a typ -> string -> string = 1929 fun typ c_expr -> 1930 match ml_type_of typ with 1931 | "int" -> Fmt.str "Val_int(%s)" c_expr 1932 | "int32" -> Fmt.str "caml_copy_int32(%s)" c_expr 1933 | "int64" -> Fmt.str "caml_copy_int64(%s)" c_expr 1934 | _ -> failwith "c_to_ml: unsupported type" 1935 1936(** C expression to extract an OCaml value into a C value. *) 1937let ml_to_c : type a. a typ -> string -> string = 1938 fun typ ml_expr -> 1939 match ml_type_of typ with 1940 | "int" -> Fmt.str "Int_val(%s)" ml_expr 1941 | "int32" -> Fmt.str "Int32_val(%s)" ml_expr 1942 | "int64" -> Fmt.str "Int64_val(%s)" ml_expr 1943 | _ -> failwith "ml_to_c: unsupported type" 1944 1945(** Does this type require a boxed OCaml allocation (int32/int64)? *) 1946let is_boxed : type a. a typ -> bool = 1947 fun typ -> match ml_type_of typ with "int32" | "int64" -> true | _ -> false 1948 1949(** Named fields with existential type hidden. *) 1950type named_field = Named : string * 'a typ -> named_field 1951 1952let named_fields (s : struct_) = 1953 List.filter_map 1954 (fun (Field f) -> 1955 match f.field_name with 1956 | Some name -> Some (Named (name, f.field_typ)) 1957 | None -> None) 1958 s.fields 1959 1960(** Generate C read stub: [string -> (t1 * t2 * ...) option]. Calls 1961 EverParse-generated [Name_read] function. *) 1962let c_stub_read ppf (s : struct_) fields = 1963 let n = List.length fields in 1964 let has_boxed = List.exists (fun (Named (_, typ)) -> is_boxed typ) fields in 1965 Fmt.pf ppf "CAMLprim value caml_wire_%s_read(value v_buf) {@\n" s.name; 1966 Fmt.pf ppf " CAMLparam1(v_buf);@\n"; 1967 if has_boxed then Fmt.pf ppf " CAMLlocal3(v_some, v_tuple, v_tmp);@\n" 1968 else Fmt.pf ppf " CAMLlocal2(v_some, v_tuple);@\n"; 1969 Fmt.pf ppf " const uint8_t *buf = (const uint8_t *)String_val(v_buf);@\n"; 1970 Fmt.pf ppf " uint32_t len = caml_string_length(v_buf);@\n"; 1971 Fmt.pf ppf " %s val;@\n" s.name; 1972 Fmt.pf ppf " int32_t rc = %s_read(buf, len, &val);@\n" s.name; 1973 Fmt.pf ppf " if (rc <= 0) { CAMLreturn(Val_none); }@\n"; 1974 Fmt.pf ppf " v_tuple = caml_alloc_tuple(%d);@\n" n; 1975 List.iteri 1976 (fun i (Named (name, typ)) -> 1977 if is_boxed typ then begin 1978 Fmt.pf ppf " v_tmp = %s;@\n" (c_to_ml typ ("val." ^ name)); 1979 Fmt.pf ppf " Store_field(v_tuple, %d, v_tmp);@\n" i 1980 end 1981 else 1982 Fmt.pf ppf " Store_field(v_tuple, %d, %s);@\n" i 1983 (c_to_ml typ ("val." ^ name))) 1984 fields; 1985 Fmt.pf ppf " v_some = caml_alloc(1, 0);@\n"; 1986 Fmt.pf ppf " Store_field(v_some, 0, v_tuple);@\n"; 1987 Fmt.pf ppf " CAMLreturn(v_some);@\n"; 1988 Fmt.pf ppf "}@\n@\n" 1989 1990(** Generate C write stub: [(t1 * t2 * ...) -> string option]. Calls 1991 EverParse-generated [Name_write] function. *) 1992let c_stub_write ppf (s : struct_) fields = 1993 let sz = match size_of_struct s with Some n -> n | None -> 4096 in 1994 Fmt.pf ppf "CAMLprim value caml_wire_%s_write(value v_tuple) {@\n" s.name; 1995 Fmt.pf ppf " CAMLparam1(v_tuple);@\n"; 1996 Fmt.pf ppf " CAMLlocal2(v_some, v_str);@\n"; 1997 Fmt.pf ppf " %s val;@\n" s.name; 1998 List.iteri 1999 (fun i (Named (name, typ)) -> 2000 Fmt.pf ppf " val.%s = %s;@\n" name 2001 (ml_to_c typ (Fmt.str "Field(v_tuple, %d)" i))) 2002 fields; 2003 Fmt.pf ppf " uint8_t out[%d];@\n" sz; 2004 Fmt.pf ppf " int32_t wc = %s_write(&val, out, sizeof(out));@\n" s.name; 2005 Fmt.pf ppf " if (wc <= 0) { CAMLreturn(Val_none); }@\n"; 2006 Fmt.pf ppf " v_str = caml_alloc_string(wc);@\n"; 2007 Fmt.pf ppf " memcpy((char *)String_val(v_str), out, wc);@\n"; 2008 Fmt.pf ppf " v_some = caml_alloc(1, 0);@\n"; 2009 Fmt.pf ppf " Store_field(v_some, 0, v_str);@\n"; 2010 Fmt.pf ppf " CAMLreturn(v_some);@\n"; 2011 Fmt.pf ppf "}@\n@\n" 2012 2013(** Generate C FFI stubs that call EverParse-generated [Name_read] and 2014 [Name_write]. 2015 2016 For each struct [Foo] with fields [a : t1, b : t2, ...], generates: 2017 - [caml_wire_Foo_read(v_buf)] returning [(t1 * t2 * ...) option] 2018 - [caml_wire_Foo_write(v_tuple)] taking [(t1 * t2 * ...)] and returning 2019 [string option]. 2020 2021 The generated code expects EverParse headers to be available: 2022 - [Foo.h] with struct typedef and [Foo_read]/[Foo_write] functions *) 2023let to_c_stubs (structs : struct_ list) = 2024 let buf = Buffer.create 4096 in 2025 let ppf = Format.formatter_of_buffer buf in 2026 Fmt.pf ppf 2027 "/* wire_stubs.c - OCaml FFI stubs for EverParse-generated C */@\n@\n"; 2028 Fmt.pf ppf "#include <caml/mlvalues.h>@\n"; 2029 Fmt.pf ppf "#include <caml/memory.h>@\n"; 2030 Fmt.pf ppf "#include <caml/alloc.h>@\n"; 2031 Fmt.pf ppf "#include <string.h>@\n@\n"; 2032 Fmt.pf ppf "/* Include EverParse-generated headers */@\n"; 2033 List.iter 2034 (fun (s : struct_) -> Fmt.pf ppf "#include \"%s.h\"@\n" s.name) 2035 structs; 2036 Fmt.pf ppf "@\n"; 2037 List.iter 2038 (fun (s : struct_) -> 2039 let fields = named_fields s in 2040 c_stub_read ppf s fields; 2041 c_stub_write ppf s fields) 2042 structs; 2043 Format.pp_print_flush ppf (); 2044 Buffer.contents buf 2045 2046(** Generate OCaml [external] declarations matching the C stubs from 2047 {!to_c_stubs}. For each struct [Foo] with fields [a : t1, b : t2, ...], 2048 generates a module: 2049 {[ 2050 module Foo : sig 2051 val read : string -> (t1 * t2 * ...) option 2052 val write : (t1 * t2 * ...) -> string option 2053 end 2054 ]} *) 2055let to_ml_stubs (structs : struct_ list) = 2056 let buf = Buffer.create 256 in 2057 let ppf = Format.formatter_of_buffer buf in 2058 Fmt.pf ppf "(* Generated by wire (do not edit) *)@\n@\n"; 2059 List.iter 2060 (fun (s : struct_) -> 2061 let fields = named_fields s in 2062 let tuple_type = 2063 String.concat " * " 2064 (List.map (fun (Named (_, typ)) -> ml_type_of typ) fields) 2065 in 2066 Fmt.pf ppf "module %s = struct@\n" s.name; 2067 Fmt.pf ppf " external read : string -> (%s) option@\n" tuple_type; 2068 Fmt.pf ppf " = \"caml_wire_%s_read\"@\n" s.name; 2069 Fmt.pf ppf " external write : (%s) -> string option@\n" tuple_type; 2070 Fmt.pf ppf " = \"caml_wire_%s_write\"@\n" s.name; 2071 Fmt.pf ppf "end@\n@\n") 2072 structs; 2073 Format.pp_print_flush ppf (); 2074 Buffer.contents buf 2075 2076(** Module name for a generated per-struct OCaml stub file. Converts CamelCase 2077 to snake_case, e.g., [SimpleHeader] maps to ["simple_header"]. *) 2078let to_ml_stub_name (s : struct_) = 2079 let name = s.name in 2080 let buf = Buffer.create (String.length name + 4) in 2081 String.iteri 2082 (fun i c -> 2083 if i > 0 && Char.uppercase_ascii c = c && Char.lowercase_ascii c <> c then 2084 Buffer.add_char buf '_'; 2085 Buffer.add_char buf (Char.lowercase_ascii c)) 2086 name; 2087 Buffer.contents buf 2088 2089(** Generate a flat OCaml stub module for a single struct. Produces a file with 2090 [type t] and [external read/write] declarations: 2091 {[ 2092 (* Generated by wire *) 2093 type t = int * int * int32 2094 2095 external read : string -> t option = "caml_wire_Foo_read" 2096 external write : t -> string option = "caml_wire_Foo_write" 2097 ]} *) 2098let to_ml_stub (s : struct_) = 2099 let buf = Buffer.create 256 in 2100 let ppf = Format.formatter_of_buffer buf in 2101 let fields = named_fields s in 2102 let tuple_type = 2103 String.concat " * " 2104 (List.map (fun (Named (_, typ)) -> ml_type_of typ) fields) 2105 in 2106 Fmt.pf ppf "(* Generated by wire (do not edit) *)@\n@\n"; 2107 Fmt.pf ppf "type t = %s@\n@\n" tuple_type; 2108 Fmt.pf ppf "external read : string -> t option@\n"; 2109 Fmt.pf ppf " = \"caml_wire_%s_read\"@\n@\n" s.name; 2110 Fmt.pf ppf "external write : t -> string option@\n"; 2111 Fmt.pf ppf " = \"caml_wire_%s_write\"@\n" s.name; 2112 Format.pp_print_flush ppf (); 2113 Buffer.contents buf 2114 2115(* ==================== Struct-level read/write ==================== *) 2116 2117type parsed_value = Parsed : 'a typ * 'a -> parsed_value 2118type parsed_struct = (string option * parsed_value) list 2119 2120let read_struct (s : struct_) buf = 2121 let reader = Br.of_string buf in 2122 let dec = decoder reader in 2123 let rec go ctx acc = function 2124 | [] -> Ok (List.rev acc) 2125 | Field { field_name; field_typ; constraint_; _ } :: rest -> ( 2126 let* v, ctx' = parse_with_ctx ctx field_typ dec in 2127 let ctx' = 2128 match field_name with 2129 | Some n -> Ctx.add n (val_to_int field_typ v) ctx' 2130 | None -> ctx' 2131 in 2132 match constraint_ with 2133 | Some cond when not (eval_expr ctx' cond) -> 2134 Error (Constraint_failed "field constraint") 2135 | _ -> go ctx' ((field_name, Parsed (field_typ, v)) :: acc) rest) 2136 in 2137 go empty_ctx [] s.fields 2138 2139let write_struct (s : struct_) (ps : parsed_struct) = 2140 let buf = Buffer.create 64 in 2141 let writer = Bw.of_buffer buf in 2142 let enc = encoder writer in 2143 let fields_with_vals = List.combine s.fields ps in 2144 let rec go ctx = function 2145 | [] -> Ok (Buffer.contents buf) 2146 | (Field { field_name; constraint_; _ }, (_, Parsed (typ, v))) :: rest -> ( 2147 let ctx' = encode_with_ctx ctx typ v enc in 2148 let ctx' = 2149 match field_name with 2150 | Some n -> Ctx.add n (val_to_int typ v) ctx' 2151 | None -> ctx' 2152 in 2153 match constraint_ with 2154 | Some cond when not (eval_expr ctx' cond) -> 2155 Error (Constraint_failed "field constraint") 2156 | _ -> go ctx' rest) 2157 in 2158 go empty_ctx fields_with_vals 2159 2160(* Capture top-level names before Codec shadows them *) 2161type struct_field = field 2162 2163let struct_field = field 2164let struct' = struct_ 2165 2166module Codec = struct 2167 type ('a, 'r) field = { name : string; typ : 'a typ; get : 'r -> 'a } 2168 2169 (* GADT snoc-list of typed field readers, built in forward order by |+. 2170 ('full, 'remaining) readers tracks: 2171 - 'full: the original constructor type 2172 - 'remaining: what's left after consuming the readers in this list 2173 2174 Snoc appends at the end, so readers are in field order. 2175 At seal time, pattern-matching reconstructs the full application 2176 without partial application closures (for up to 6 fields). *) 2177 type (_, _) readers = 2178 | Nil : ('f, 'f) readers 2179 | Snoc : 2180 ('full, 'a -> 'rest) readers * (bytes -> int -> 'a) 2181 -> ('full, 'rest) readers 2182 2183 (* Bitfield group state: tracks the current base word being packed. *) 2184 type bf_codec_state = { 2185 bfc_base : bitfield_base; 2186 bfc_base_off : int; (* byte offset of base word within record *) 2187 bfc_bits_used : int; (* bits consumed so far in current group *) 2188 bfc_total_bits : int; (* 8, 16, or 32 *) 2189 } 2190 2191 type ('f, 'r) record = 2192 | Record : { 2193 r_name : string; 2194 r_make : 'full; 2195 r_readers : ('full, 'f) readers; 2196 r_writers_rev : ('r -> bytes -> int -> unit) list; 2197 r_wire_size : int; 2198 r_fields_rev : struct_field list; 2199 r_bf : bf_codec_state option; 2200 } 2201 -> ('f, 'r) record 2202 2203 type 'r t = { 2204 t_name : string; 2205 t_decode : bytes -> int -> 'r; 2206 t_encode : 'r -> bytes -> int -> unit; 2207 t_wire_size : int; 2208 t_struct_fields : struct_field list; 2209 } 2210 2211 let record name make = 2212 Record 2213 { 2214 r_name = name; 2215 r_make = make; 2216 r_readers = Nil; 2217 r_writers_rev = []; 2218 r_wire_size = 0; 2219 r_fields_rev = []; 2220 r_bf = None; 2221 } 2222 2223 let field name typ get = { name; typ; get } 2224 2225 (* Bitfield helpers *) 2226 2227 let bf_base_byte_size = function BF_U8 -> 1 | BF_U16 _ -> 2 | BF_U32 _ -> 4 2228 2229 let bf_base_total_bits = function 2230 | BF_U8 -> 8 2231 | BF_U16 _ -> 16 2232 | BF_U32 _ -> 32 2233 2234 let bf_base_equal a b = 2235 match (a, b) with 2236 | BF_U8, BF_U8 -> true 2237 | BF_U16 e1, BF_U16 e2 -> e1 = e2 2238 | BF_U32 e1, BF_U32 e2 -> e1 = e2 2239 | _ -> false 2240 2241 let bf_read_base base buf off = 2242 match base with 2243 | BF_U8 -> Bytes.get_uint8 buf off 2244 | BF_U16 Little -> Bytes.get_uint16_le buf off 2245 | BF_U16 Big -> Bytes.get_uint16_be buf off 2246 | BF_U32 Little -> UInt32.get_le buf off 2247 | BF_U32 Big -> UInt32.get_be buf off 2248 2249 let bf_write_base base buf off v = 2250 match base with 2251 | BF_U8 -> Bytes.set_uint8 buf off v 2252 | BF_U16 Little -> Bytes.set_uint16_le buf off v 2253 | BF_U16 Big -> Bytes.set_uint16_be buf off v 2254 | BF_U32 Little -> UInt32.set_le buf off v 2255 | BF_U32 Big -> UInt32.set_be buf off v 2256 2257 let build_bf_reader base byte_off shift width = 2258 let mask = (1 lsl width) - 1 in 2259 fun buf off -> (bf_read_base base buf (off + byte_off) lsr shift) land mask 2260 2261 let build_bf_writer base byte_off shift width = 2262 let mask = (1 lsl width) - 1 in 2263 fun buf off value -> 2264 let cur = bf_read_base base buf (off + byte_off) in 2265 bf_write_base base buf (off + byte_off) 2266 (cur lor ((value land mask) lsl shift)) 2267 2268 let build_bf_clear base byte_off = 2269 fun buf off -> bf_write_base base buf (off + byte_off) 0 2270 2271 let ( |+ ) : type a f r. (a -> f, r) record -> (a, r) field -> (f, r) record = 2272 fun (Record r) { name; typ; get } -> 2273 (* Recursively unwrap Map layers to reach the wire-level type, composing 2274 encode/decode conversions along the way. *) 2275 let rec add : type w. 2276 w typ -> 2277 (r -> w) -> 2278 ((bytes -> int -> w) -> bytes -> int -> a) -> 2279 (f, r) record = 2280 fun typ get_wire wrap_reader -> 2281 match typ with 2282 | Map { inner; decode; encode } -> 2283 add inner 2284 (fun v -> encode (get_wire v)) 2285 (fun reader -> wrap_reader (fun buf off -> decode (reader buf off))) 2286 | Bits { width; base } -> 2287 let total = bf_base_total_bits base in 2288 let need_new_group = 2289 match r.r_bf with 2290 | None -> true 2291 | Some bf -> 2292 (not (bf_base_equal bf.bfc_base base)) 2293 || bf.bfc_bits_used + width > bf.bfc_total_bits 2294 in 2295 let base_off, bits_used, size_delta, extra_writers = 2296 if need_new_group then 2297 let base_off = r.r_wire_size in 2298 let clear = build_bf_clear base base_off in 2299 ( base_off, 2300 0, 2301 bf_base_byte_size base, 2302 [ (fun _v buf off -> clear buf off) ] ) 2303 else 2304 let bf = Option.get r.r_bf in 2305 (bf.bfc_base_off, bf.bfc_bits_used, 0, []) 2306 in 2307 let shift = total - bits_used - width in 2308 let raw_reader = build_bf_reader base base_off shift width in 2309 let raw_writer = build_bf_writer base base_off shift width in 2310 let new_bf = 2311 { 2312 bfc_base = base; 2313 bfc_base_off = base_off; 2314 bfc_bits_used = bits_used + width; 2315 bfc_total_bits = total; 2316 } 2317 in 2318 Record 2319 { 2320 r_name = r.r_name; 2321 r_make = r.r_make; 2322 r_readers = Snoc (r.r_readers, wrap_reader raw_reader); 2323 r_writers_rev = 2324 (fun v buf off -> raw_writer buf off (get_wire v)) 2325 :: (extra_writers @ r.r_writers_rev); 2326 r_wire_size = r.r_wire_size + size_delta; 2327 r_fields_rev = struct_field name typ :: r.r_fields_rev; 2328 r_bf = Some new_bf; 2329 } 2330 | _ -> 2331 let fsize = 2332 match field_wire_size typ with 2333 | Some s -> s 2334 | None -> failwith "Codec.(|+): variable-size fields not supported" 2335 in 2336 let field_off = r.r_wire_size in 2337 let raw_reader = build_field_reader typ field_off in 2338 let raw_encoder = build_field_encoder typ in 2339 Record 2340 { 2341 r_name = r.r_name; 2342 r_make = r.r_make; 2343 r_readers = Snoc (r.r_readers, wrap_reader raw_reader); 2344 r_writers_rev = 2345 (fun v buf off -> 2346 let _ = raw_encoder buf (off + field_off) (get_wire v) in 2347 ()) 2348 :: r.r_writers_rev; 2349 r_wire_size = r.r_wire_size + fsize; 2350 r_fields_rev = struct_field name typ :: r.r_fields_rev; 2351 r_bf = None; 2352 } 2353 in 2354 add typ get (fun reader -> reader) 2355 2356 (* Chunked application: peels off up to 6 fields per recursion step. 2357 Cost: ceil(n/6) - 1 partial applications instead of n - 1. *) 2358 let rec apply_readers : type full current. 2359 full -> (full, current) readers -> bytes -> int -> current = 2360 fun make readers buf off -> 2361 match readers with 2362 | Nil -> make 2363 | Snoc (Nil, r1) -> make (r1 buf off) 2364 | Snoc (Snoc (Nil, r1), r2) -> make (r1 buf off) (r2 buf off) 2365 | Snoc (Snoc (Snoc (Nil, r1), r2), r3) -> 2366 make (r1 buf off) (r2 buf off) (r3 buf off) 2367 | Snoc (Snoc (Snoc (Snoc (Nil, r1), r2), r3), r4) -> 2368 make (r1 buf off) (r2 buf off) (r3 buf off) (r4 buf off) 2369 | Snoc (Snoc (Snoc (Snoc (Snoc (Nil, r1), r2), r3), r4), r5) -> 2370 make (r1 buf off) (r2 buf off) (r3 buf off) (r4 buf off) (r5 buf off) 2371 | Snoc (Snoc (Snoc (Snoc (Snoc (Snoc (rest, r1), r2), r3), r4), r5), r6) -> 2372 let f = apply_readers make rest buf off in 2373 f (r1 buf off) (r2 buf off) (r3 buf off) (r4 buf off) (r5 buf off) 2374 (r6 buf off) 2375 2376 let seal : type r. (r, r) record -> r t = 2377 fun (Record r) -> 2378 let total = r.r_wire_size in 2379 let writers = Array.of_list (List.rev r.r_writers_rev) in 2380 let n_writers = Array.length writers in 2381 let build_decode : type full. full -> (full, r) readers -> bytes -> int -> r 2382 = 2383 fun make readers -> 2384 match readers with 2385 | Nil -> fun _buf _off -> make 2386 | Snoc (Nil, r1) -> fun buf off -> make (r1 buf off) 2387 | Snoc (Snoc (Nil, r1), r2) -> 2388 fun buf off -> make (r1 buf off) (r2 buf off) 2389 | Snoc (Snoc (Snoc (Nil, r1), r2), r3) -> 2390 fun buf off -> make (r1 buf off) (r2 buf off) (r3 buf off) 2391 | Snoc (Snoc (Snoc (Snoc (Nil, r1), r2), r3), r4) -> 2392 fun buf off -> 2393 make (r1 buf off) (r2 buf off) (r3 buf off) (r4 buf off) 2394 | Snoc (Snoc (Snoc (Snoc (Snoc (Nil, r1), r2), r3), r4), r5) -> 2395 fun buf off -> 2396 make (r1 buf off) (r2 buf off) (r3 buf off) (r4 buf off) 2397 (r5 buf off) 2398 | Snoc (Snoc (Snoc (Snoc (Snoc (Snoc (Nil, r1), r2), r3), r4), r5), r6) -> 2399 fun buf off -> 2400 make (r1 buf off) (r2 buf off) (r3 buf off) (r4 buf off) 2401 (r5 buf off) (r6 buf off) 2402 | readers -> fun buf off -> apply_readers make readers buf off 2403 in 2404 let raw_decode = build_decode r.r_make r.r_readers in 2405 { 2406 t_name = r.r_name; 2407 t_decode = 2408 (fun buf off -> 2409 if off + total > Bytes.length buf then 2410 raise_eof ~expected:total ~got:(Bytes.length buf - off); 2411 raw_decode buf off); 2412 t_encode = 2413 (fun v buf off -> 2414 for i = 0 to n_writers - 1 do 2415 writers.(i) v buf off 2416 done); 2417 t_wire_size = total; 2418 t_struct_fields = List.rev r.r_fields_rev; 2419 } 2420 2421 let wire_size t = t.t_wire_size 2422 let decode t buf off = t.t_decode buf off 2423 let encode t v buf off = t.t_encode v buf off 2424 let to_struct t = struct' t.t_name t.t_struct_fields 2425end 2426 2427module Record = struct 2428 type ('a, 'r) field = ('a, 'r) field_codec 2429 type 'r t = 'r record_codec 2430 2431 let field = field_codec 2432 let ( @: ) name (typ, get, set) = field_codec name typ ~get ~set 2433 2434 let ( @:? ) name (constraint_, typ, get, set) = 2435 field_codec name ~constraint_ typ ~get ~set 2436 2437 let record name ~default fields = 2438 record_codec name ~default (List.map pack_field fields) 2439 2440 let encode codec = encode_record_to_slice codec 2441 let decode codec = decode_record_from_slice codec 2442 let encode_bytes codec = encode_record_to_bytes codec 2443 let decode_bytes codec = decode_record_from_bytes codec 2444 let to_struct = record_to_struct 2445end