OCaml wire format DSL with EverParse 3D output for verified parsers
1(* Wire: Dependent Data Descriptions for EverParse 3D *)
2
3open Result.Syntax
4
5(** Staged computations, following the pattern from Jane Street's Base library.
6 Forces users to explicitly unstage functions to make specialization visible.
7 See also Irmin's repr which uses the same pattern. *)
8module Staged = struct
9 type +'a t = { unstage : 'a } [@@unboxed]
10
11 let stage x = { unstage = x }
12 let unstage { unstage } = unstage
13end
14
15(* UInt32: unboxed on 64-bit (uses int), boxed on 32-bit (uses int32) *)
16module UInt32 = struct
17 type t = int (* On 64-bit, int is 63 bits - enough for uint32 *)
18
19 let () =
20 if Sys.int_size < 32 then
21 failwith "Wire.UInt32 requires 64-bit OCaml (int must be >= 32 bits)"
22
23 let get_le buf off =
24 let b0 = Bytes.get_uint8 buf off in
25 let b1 = Bytes.get_uint8 buf (off + 1) in
26 let b2 = Bytes.get_uint8 buf (off + 2) in
27 let b3 = Bytes.get_uint8 buf (off + 3) in
28 b0 lor (b1 lsl 8) lor (b2 lsl 16) lor (b3 lsl 24)
29
30 let get_be buf off =
31 let b0 = Bytes.get_uint8 buf off in
32 let b1 = Bytes.get_uint8 buf (off + 1) in
33 let b2 = Bytes.get_uint8 buf (off + 2) in
34 let b3 = Bytes.get_uint8 buf (off + 3) in
35 (b0 lsl 24) lor (b1 lsl 16) lor (b2 lsl 8) lor b3
36
37 let mask v = v land ((1 lsl 32) - 1)
38
39 let set_le buf off v =
40 let v = mask v in
41 Bytes.set_uint8 buf off (v land 0xFF);
42 Bytes.set_uint8 buf (off + 1) ((v lsr 8) land 0xFF);
43 Bytes.set_uint8 buf (off + 2) ((v lsr 16) land 0xFF);
44 Bytes.set_uint8 buf (off + 3) ((v lsr 24) land 0xFF)
45
46 let set_be buf off v =
47 let v = mask v in
48 Bytes.set_uint8 buf off ((v lsr 24) land 0xFF);
49 Bytes.set_uint8 buf (off + 1) ((v lsr 16) land 0xFF);
50 Bytes.set_uint8 buf (off + 2) ((v lsr 8) land 0xFF);
51 Bytes.set_uint8 buf (off + 3) (v land 0xFF)
52
53 let to_int t = t
54 let of_int t = mask t
55end
56
57(* UInt63: unboxed on 64-bit (uses int), reads 8 bytes but masks to 63 bits *)
58module UInt63 = struct
59 type t = int (* 63-bit int on 64-bit platforms *)
60
61 let () =
62 if Sys.int_size < 63 then
63 failwith "Wire.UInt63 requires 64-bit OCaml (int must be 63 bits)"
64
65 let get_le buf off =
66 let b0 = Bytes.get_uint8 buf off in
67 let b1 = Bytes.get_uint8 buf (off + 1) in
68 let b2 = Bytes.get_uint8 buf (off + 2) in
69 let b3 = Bytes.get_uint8 buf (off + 3) in
70 let b4 = Bytes.get_uint8 buf (off + 4) in
71 let b5 = Bytes.get_uint8 buf (off + 5) in
72 let b6 = Bytes.get_uint8 buf (off + 6) in
73 let b7 = Bytes.get_uint8 buf (off + 7) in
74 b0 lor (b1 lsl 8) lor (b2 lsl 16) lor (b3 lsl 24) lor (b4 lsl 32)
75 lor (b5 lsl 40) lor (b6 lsl 48)
76 lor ((b7 land 0x7F) lsl 56)
77
78 let get_be buf off =
79 let b0 = Bytes.get_uint8 buf off in
80 let b1 = Bytes.get_uint8 buf (off + 1) in
81 let b2 = Bytes.get_uint8 buf (off + 2) in
82 let b3 = Bytes.get_uint8 buf (off + 3) in
83 let b4 = Bytes.get_uint8 buf (off + 4) in
84 let b5 = Bytes.get_uint8 buf (off + 5) in
85 let b6 = Bytes.get_uint8 buf (off + 6) in
86 let b7 = Bytes.get_uint8 buf (off + 7) in
87 ((b0 land 0x7F) lsl 56)
88 lor (b1 lsl 48) lor (b2 lsl 40) lor (b3 lsl 32) lor (b4 lsl 24)
89 lor (b5 lsl 16) lor (b6 lsl 8) lor b7
90
91 let set_le buf off v =
92 Bytes.set_uint8 buf off (v land 0xFF);
93 Bytes.set_uint8 buf (off + 1) ((v lsr 8) land 0xFF);
94 Bytes.set_uint8 buf (off + 2) ((v lsr 16) land 0xFF);
95 Bytes.set_uint8 buf (off + 3) ((v lsr 24) land 0xFF);
96 Bytes.set_uint8 buf (off + 4) ((v lsr 32) land 0xFF);
97 Bytes.set_uint8 buf (off + 5) ((v lsr 40) land 0xFF);
98 Bytes.set_uint8 buf (off + 6) ((v lsr 48) land 0xFF);
99 Bytes.set_uint8 buf (off + 7) ((v lsr 56) land 0x7F)
100
101 let set_be buf off v =
102 Bytes.set_uint8 buf off ((v lsr 56) land 0x7F);
103 Bytes.set_uint8 buf (off + 1) ((v lsr 48) land 0xFF);
104 Bytes.set_uint8 buf (off + 2) ((v lsr 40) land 0xFF);
105 Bytes.set_uint8 buf (off + 3) ((v lsr 32) land 0xFF);
106 Bytes.set_uint8 buf (off + 4) ((v lsr 24) land 0xFF);
107 Bytes.set_uint8 buf (off + 5) ((v lsr 16) land 0xFF);
108 Bytes.set_uint8 buf (off + 6) ((v lsr 8) land 0xFF);
109 Bytes.set_uint8 buf (off + 7) (v land 0xFF)
110
111 let to_int t = t
112 let of_int t = t
113end
114
115type endian = Little | Big
116
117(* Expressions *)
118type _ expr =
119 | Int : int -> int expr
120 | Int64 : int64 -> int64 expr
121 | Bool : bool -> bool expr
122 | Ref : string -> int expr
123 | Sizeof : 'a typ -> int expr
124 | Sizeof_this : int expr
125 | Field_pos : int expr
126 | Add : int expr * int expr -> int expr
127 | Sub : int expr * int expr -> int expr
128 | Mul : int expr * int expr -> int expr
129 | Div : int expr * int expr -> int expr
130 | Mod : int expr * int expr -> int expr
131 | Land : int expr * int expr -> int expr
132 | Lor : int expr * int expr -> int expr
133 | Lxor : int expr * int expr -> int expr
134 | Lnot : int expr -> int expr
135 | Lsl : int expr * int expr -> int expr
136 | Lsr : int expr * int expr -> int expr
137 | Eq : 'a expr * 'a expr -> bool expr
138 | Ne : 'a expr * 'a expr -> bool expr
139 | Lt : int expr * int expr -> bool expr
140 | Le : int expr * int expr -> bool expr
141 | Gt : int expr * int expr -> bool expr
142 | Ge : int expr * int expr -> bool expr
143 | And : bool expr * bool expr -> bool expr
144 | Or : bool expr * bool expr -> bool expr
145 | Not : bool expr -> bool expr
146 | Cast : [ `U8 | `U16 | `U32 | `U64 ] * int expr -> int expr
147
148(* Bitfield base types - standalone, not mutually recursive *)
149and bitfield_base = BF_U8 | BF_U16 of endian | BF_U32 of endian
150
151(* Types *)
152and _ typ =
153 | Uint8 : int typ
154 | Uint16 : endian -> int typ
155 | Uint32 : endian -> UInt32.t typ
156 | Uint63 : endian -> UInt63.t typ
157 | Uint64 : endian -> int64 typ (* boxed, for full 64-bit *)
158 | Bits : { width : int; base : bitfield_base } -> int typ
159 | Unit : unit typ
160 | All_bytes : string typ
161 | All_zeros : string typ
162 | Where : { cond : bool expr; inner : 'a typ } -> 'a typ
163 | Array : { len : int expr; elem : 'a typ } -> 'a list typ
164 | Byte_array : { size : int expr } -> string typ
165 | Single_elem : { size : int expr; elem : 'a typ; at_most : bool } -> 'a typ
166 | Enum : {
167 name : string;
168 cases : (string * int) list;
169 base : int typ;
170 }
171 -> int typ
172 | Casetype : {
173 name : string;
174 tag : 'tag typ;
175 cases : ('tag option * 'a typ) list;
176 }
177 -> 'a typ
178 | Struct : struct_ -> unit typ
179 | Type_ref : string -> 'a typ
180 | Qualified_ref : { module_ : string; name : string } -> 'a typ
181 | Map : { inner : 'w typ; decode : 'w -> 'a; encode : 'a -> 'w } -> 'a typ
182 | Apply : { typ : 'a typ; args : packed_expr list } -> 'a typ
183
184and packed_expr = Pack_expr : 'a expr -> packed_expr
185
186(* Structs *)
187and struct_ = {
188 name : string;
189 params : param list;
190 where : bool expr option;
191 fields : field list;
192}
193
194and field =
195 | Field : {
196 field_name : string option;
197 field_typ : 'a typ;
198 constraint_ : bool expr option;
199 action : action option;
200 }
201 -> field
202
203and param = { param_name : string; param_typ : packed_typ; mutable_ : bool }
204and packed_typ = Pack_typ : 'a typ -> packed_typ
205
206(* Actions *)
207and action = On_success of action_stmt list | On_act of action_stmt list
208
209and action_stmt =
210 | Assign of string * int expr
211 | Return of bool expr
212 | Abort
213 | If of bool expr * action_stmt list * action_stmt list option
214 | Var of string * int expr
215
216(* Expression constructors *)
217let int n = Int n
218let int64 n = Int64 n
219let true_ = Bool true
220let false_ = Bool false
221let ref name = Ref name
222let sizeof t = Sizeof t
223let sizeof_this = Sizeof_this
224let field_pos = Field_pos
225
226module Expr = struct
227 let ( + ) a b = Add (a, b)
228 let ( - ) a b = Sub (a, b)
229 let ( * ) a b = Mul (a, b)
230 let ( / ) a b = Div (a, b)
231 let ( mod ) a b = Mod (a, b)
232 let ( land ) a b = Land (a, b)
233 let ( lor ) a b = Lor (a, b)
234 let ( lxor ) a b = Lxor (a, b)
235 let lnot a = Lnot a
236 let ( lsl ) a b = Lsl (a, b)
237 let ( lsr ) a b = Lsr (a, b)
238 let ( = ) a b = Eq (a, b)
239 let ( <> ) a b = Ne (a, b)
240 let ( < ) a b = Lt (a, b)
241 let ( <= ) a b = Le (a, b)
242 let ( > ) a b = Gt (a, b)
243 let ( >= ) a b = Ge (a, b)
244 let ( && ) a b = And (a, b)
245 let ( || ) a b = Or (a, b)
246 let not a = Not a
247 let to_uint8 e = Cast (`U8, e)
248 let to_uint16 e = Cast (`U16, e)
249 let to_uint32 e = Cast (`U32, e)
250 let to_uint64 e = Cast (`U64, e)
251end
252
253(* Type constructors *)
254let uint8 = Uint8
255let uint16 = Uint16 Little
256let uint16be = Uint16 Big
257let uint32 = Uint32 Little
258let uint32be = Uint32 Big
259let uint63 = Uint63 Little
260let uint63be = Uint63 Big
261let uint64 = Uint64 Little
262let uint64be = Uint64 Big
263
264(* Bitfield bases *)
265let bf_uint8 = BF_U8
266let bf_uint16 = BF_U16 Little
267let bf_uint16be = BF_U16 Big
268let bf_uint32 = BF_U32 Little
269let bf_uint32be = BF_U32 Big
270let bits ~width base = Bits { width; base }
271let bit b = Bool.to_int b
272let is_set n = n <> 0
273let map decode encode inner = Map { inner; decode; encode }
274let bool inner = Map { inner; decode = is_set; encode = bit }
275
276let cases variants inner =
277 let arr = Array.of_list variants in
278 let decode n =
279 if n >= 0 && n < Array.length arr then arr.(n)
280 else Fmt.invalid_arg "Wire.cases: unknown value %d" n
281 in
282 let encode v =
283 let rec go i =
284 if i >= Array.length arr then invalid_arg "Wire.cases: unknown variant"
285 else if arr.(i) = v then i
286 else go (i + 1)
287 in
288 go 0
289 in
290 Map { inner; decode; encode }
291
292let unit = Unit
293let all_bytes = All_bytes
294let all_zeros = All_zeros
295let where cond inner = Where { cond; inner }
296let array ~len elem = Array { len; elem }
297let byte_array ~size = Byte_array { size }
298let single_elem_array ~size elem = Single_elem { size; elem; at_most = false }
299
300let single_elem_array_at_most ~size elem =
301 Single_elem { size; elem; at_most = true }
302
303let enum name cases base = Enum { name; cases; base }
304
305(* Casetype *)
306type ('tag, 'a) case = 'tag option * 'a typ
307
308let case tag typ = (Some tag, typ)
309let default typ = (None, typ)
310let casetype name tag cases = Casetype { name; tag; cases }
311
312(* Struct fields *)
313let field name ?constraint_ ?action typ =
314 Field { field_name = Some name; field_typ = typ; constraint_; action }
315
316let anon_field typ =
317 Field
318 { field_name = None; field_typ = typ; constraint_ = None; action = None }
319
320(* Struct constructors *)
321let struct_ name fields = { name; params = []; where = None; fields }
322let struct_name s = s.name
323let struct_typ s = Struct s
324
325(* Parameters *)
326let param name typ =
327 { param_name = name; param_typ = Pack_typ typ; mutable_ = false }
328
329let mutable_param name typ =
330 { param_name = name; param_typ = Pack_typ typ; mutable_ = true }
331
332let param_struct name params ?where fields = { name; params; where; fields }
333let apply typ args = Apply { typ; args = List.map (fun e -> Pack_expr e) args }
334
335(* Type references *)
336let type_ref name = Type_ref name
337let qualified_ref module_ name = Qualified_ref { module_; name }
338
339(* Actions *)
340let on_success stmts = On_success stmts
341let on_act stmts = On_act stmts
342let assign ptr e = Assign (ptr, e)
343let return_bool e = Return e
344let abort = Abort
345let action_if cond then_ else_ = If (cond, then_, else_)
346let var name e = Var (name, e)
347
348(* Declarations *)
349type decl =
350 | Typedef of {
351 entrypoint : bool;
352 export : bool;
353 doc : string option;
354 struct_ : struct_;
355 }
356 | Define of { name : string; value : int }
357 | Extern_fn of { name : string; params : param list; ret : packed_typ }
358 | Extern_probe of { init : bool; name : string }
359 | Enum_decl of {
360 name : string;
361 cases : (string * int) list;
362 base : packed_typ;
363 }
364 | Casetype_decl of {
365 name : string;
366 params : param list;
367 tag : packed_typ;
368 cases : (packed_expr option * packed_typ) list;
369 }
370
371let typedef ?(entrypoint = false) ?(export = false) ?doc struct_ =
372 Typedef { entrypoint; export; doc; struct_ }
373
374let define name value = Define { name; value }
375let extern_fn name params ret = Extern_fn { name; params; ret = Pack_typ ret }
376let extern_probe ?(init = false) name = Extern_probe { init; name }
377let enum_decl name cases base = Enum_decl { name; cases; base = Pack_typ base }
378
379type decl_case = packed_expr option * packed_typ
380
381let decl_case tag typ = (Some (Pack_expr (Int tag)), Pack_typ typ)
382let decl_default typ = (None, Pack_typ typ)
383
384let casetype_decl name params tag cases =
385 Casetype_decl { name; params; tag = Pack_typ tag; cases }
386
387(* Module *)
388type module_ = { doc : string option; name : string; decls : decl list }
389
390let module_ ?doc name decls = { doc; name; decls }
391
392(* Pretty printing using Fmt *)
393
394let pp_endian ppf = function Little -> () | Big -> Fmt.string ppf "BE"
395
396let pp_bitfield_base ppf = function
397 | BF_U8 -> Fmt.string ppf "UINT8"
398 | BF_U16 e -> Fmt.pf ppf "UINT16%a" pp_endian e
399 | BF_U32 e -> Fmt.pf ppf "UINT32%a" pp_endian e
400
401let pp_cast_type ppf = function
402 | `U8 -> Fmt.string ppf "UINT8"
403 | `U16 -> Fmt.string ppf "UINT16"
404 | `U32 -> Fmt.string ppf "UINT32"
405 | `U64 -> Fmt.string ppf "UINT64"
406
407let rec pp_expr : type a. a expr Fmt.t =
408 fun ppf expr ->
409 match expr with
410 | Int n when n < 0 -> Fmt.pf ppf "(%d)" n
411 | Int n -> Fmt.int ppf n
412 | Int64 n -> Fmt.pf ppf "%LduL" n
413 | Bool true -> Fmt.string ppf "true"
414 | Bool false -> Fmt.string ppf "false"
415 | Ref name -> Fmt.string ppf name
416 | Sizeof t -> Fmt.pf ppf "sizeof (%a)" pp_typ t
417 | Sizeof_this -> Fmt.string ppf "sizeof (this)"
418 | Field_pos -> Fmt.string ppf "field_pos"
419 | Add (a, b) -> Fmt.pf ppf "(%a + %a)" pp_expr a pp_expr b
420 | Sub (a, b) -> Fmt.pf ppf "(%a - %a)" pp_expr a pp_expr b
421 | Mul (a, b) -> Fmt.pf ppf "(%a * %a)" pp_expr a pp_expr b
422 | Div (a, b) -> Fmt.pf ppf "(%a / %a)" pp_expr a pp_expr b
423 | Mod (a, b) -> Fmt.pf ppf "(%a %% %a)" pp_expr a pp_expr b
424 | Land (a, b) -> Fmt.pf ppf "(%a & %a)" pp_expr a pp_expr b
425 | Lor (a, b) -> Fmt.pf ppf "(%a | %a)" pp_expr a pp_expr b
426 | Lxor (a, b) -> Fmt.pf ppf "(%a ^ %a)" pp_expr a pp_expr b
427 | Lnot a -> Fmt.pf ppf "(~%a)" pp_expr a
428 | Lsl (a, b) -> Fmt.pf ppf "(%a << %a)" pp_expr a pp_expr b
429 | Lsr (a, b) -> Fmt.pf ppf "(%a >> %a)" pp_expr a pp_expr b
430 | Eq (a, b) -> Fmt.pf ppf "(%a == %a)" pp_expr a pp_expr b
431 | Ne (a, b) -> Fmt.pf ppf "(%a != %a)" pp_expr a pp_expr b
432 | Lt (a, b) -> Fmt.pf ppf "(%a < %a)" pp_expr a pp_expr b
433 | Le (a, b) -> Fmt.pf ppf "(%a <= %a)" pp_expr a pp_expr b
434 | Gt (a, b) -> Fmt.pf ppf "(%a > %a)" pp_expr a pp_expr b
435 | Ge (a, b) -> Fmt.pf ppf "(%a >= %a)" pp_expr a pp_expr b
436 | And (a, b) -> Fmt.pf ppf "(%a && %a)" pp_expr a pp_expr b
437 | Or (a, b) -> Fmt.pf ppf "(%a || %a)" pp_expr a pp_expr b
438 | Not a -> Fmt.pf ppf "(!%a)" pp_expr a
439 | Cast (t, e) -> Fmt.pf ppf "((%a) %a)" pp_cast_type t pp_expr e
440
441and pp_typ : type a. a typ Fmt.t =
442 fun ppf typ ->
443 match typ with
444 | Uint8 -> Fmt.string ppf "UINT8"
445 | Uint16 e -> Fmt.pf ppf "UINT16%a" pp_endian e
446 | Uint32 e -> Fmt.pf ppf "UINT32%a" pp_endian e
447 | Uint63 e -> Fmt.pf ppf "UINT63%a" pp_endian e
448 | Uint64 e -> Fmt.pf ppf "UINT64%a" pp_endian e
449 | Bits { base; _ } -> pp_bitfield_base ppf base
450 | Unit -> Fmt.string ppf "unit"
451 | All_bytes -> Fmt.string ppf "all_bytes"
452 | All_zeros -> Fmt.string ppf "all_zeros"
453 | Where { cond; inner } -> Fmt.pf ppf "%a { %a }" pp_typ inner pp_expr cond
454 | Array { len; elem } -> Fmt.pf ppf "%a[%a]" pp_typ elem pp_expr len
455 | Byte_array { size } -> Fmt.pf ppf "UINT8[:byte-size %a]" pp_expr size
456 | Single_elem { size; elem; at_most = false } ->
457 Fmt.pf ppf "%a[:byte-size-single-element-array %a]" pp_typ elem pp_expr
458 size
459 | Single_elem { size; elem; at_most = true } ->
460 Fmt.pf ppf "%a[:byte-size-single-element-array-at-most %a]" pp_typ elem
461 pp_expr size
462 | Enum { name; _ } -> Fmt.string ppf name
463 | Casetype { name; _ } -> Fmt.string ppf name
464 | Struct { name; _ } -> Fmt.string ppf name
465 | Type_ref name -> Fmt.string ppf name
466 | Qualified_ref { module_; name } -> Fmt.pf ppf "%s::%s" module_ name
467 | Apply { typ; args } ->
468 Fmt.pf ppf "%a(%a)" pp_typ typ Fmt.(list ~sep:comma pp_packed_expr) args
469 | Map { inner; _ } -> pp_typ ppf inner
470
471and pp_packed_expr ppf (Pack_expr e) = pp_expr ppf e
472
473let rec pp_action_stmt ppf = function
474 | Assign (ptr, e) -> Fmt.pf ppf "*%s = %a;" ptr pp_expr e
475 | Return e -> Fmt.pf ppf "return %a;" pp_expr e
476 | Abort -> Fmt.string ppf "abort;"
477 | If (cond, then_, None) ->
478 Fmt.pf ppf "if (%a) { %a }" pp_expr cond
479 Fmt.(list ~sep:sp pp_action_stmt)
480 then_
481 | If (cond, then_, Some else_) ->
482 Fmt.pf ppf "if (%a) { %a } else { %a }" pp_expr cond
483 Fmt.(list ~sep:sp pp_action_stmt)
484 then_
485 Fmt.(list ~sep:sp pp_action_stmt)
486 else_
487 | Var (name, e) -> Fmt.pf ppf "var %s = %a;" name pp_expr e
488
489let pp_action ppf = function
490 | On_success stmts ->
491 Fmt.pf ppf "{:on-success %a }" Fmt.(list ~sep:sp pp_action_stmt) stmts
492 | On_act stmts ->
493 Fmt.pf ppf "{:act %a }" Fmt.(list ~sep:sp pp_action_stmt) stmts
494
495let pp_bitwidth : type a. a typ -> int option = function
496 | Bits { width; _ } -> Some width
497 | _ -> None
498
499(* Extract field suffix for arrays - the modifier goes after the field name *)
500type field_suffix =
501 | No_suffix
502 | Bitwidth of int
503 | Byte_array of int expr
504 | Single_elem of { size : int expr; at_most : bool }
505 | Array of int expr
506
507let field_suffix : type a. a typ -> field_suffix * (Format.formatter -> unit) =
508 fun typ ->
509 match typ with
510 | Bits { width; base } ->
511 (Bitwidth width, fun ppf -> pp_bitfield_base ppf base)
512 | Byte_array { size } -> (Byte_array size, fun ppf -> Fmt.string ppf "UINT8")
513 | Single_elem { size; elem; at_most } ->
514 (Single_elem { size; at_most }, fun ppf -> pp_typ ppf elem)
515 | Array { len; elem } -> (Array len, fun ppf -> pp_typ ppf elem)
516 | _ -> (No_suffix, fun ppf -> pp_typ ppf typ)
517
518let pp_field ppf (Field f) =
519 match f.field_name with
520 | Some name ->
521 let suffix, pp_base = field_suffix f.field_typ in
522 Fmt.pf ppf "@,%t %s" pp_base name;
523 (* Print suffix after field name *)
524 (match suffix with
525 | No_suffix -> ()
526 | Bitwidth w -> Fmt.pf ppf " : %d" w
527 | Byte_array size -> Fmt.pf ppf "[:byte-size %a]" pp_expr size
528 | Single_elem { size; at_most = false } ->
529 Fmt.pf ppf "[:byte-size-single-element-array %a]" pp_expr size
530 | Single_elem { size; at_most = true } ->
531 Fmt.pf ppf "[:byte-size-single-element-array-at-most %a]" pp_expr size
532 | Array len -> Fmt.pf ppf "[%a]" pp_expr len);
533 Option.iter (Fmt.pf ppf " { %a }" pp_expr) f.constraint_;
534 Option.iter (Fmt.pf ppf " %a" pp_action) f.action;
535 Fmt.string ppf ";"
536 | None -> Fmt.pf ppf "@,%a;" pp_typ f.field_typ
537
538let pp_param ppf p =
539 let (Pack_typ t) = p.param_typ in
540 if p.mutable_ then Fmt.pf ppf "mutable %a *%s" pp_typ t p.param_name
541 else Fmt.pf ppf "%a %s" pp_typ t p.param_name
542
543let pp_params ppf params =
544 if not (List.is_empty params) then
545 Fmt.pf ppf "(%a)" Fmt.(list ~sep:comma pp_param) params
546
547let pp_struct ppf (s : struct_) =
548 Fmt.pf ppf "typedef struct _%s%a" s.name pp_params s.params;
549 Option.iter (Fmt.pf ppf "@,where (%a)" pp_expr) s.where;
550 Fmt.pf ppf "@,{@[<v 2>";
551 List.iter (pp_field ppf) s.fields;
552 Fmt.pf ppf "@]@,} %s" s.name
553
554let pp_decl ppf = function
555 | Typedef { entrypoint; export; doc; struct_ = st } ->
556 Option.iter (Fmt.pf ppf "/*++ %s --*/@,") doc;
557 if export then Fmt.pf ppf "export@,";
558 if entrypoint then Fmt.pf ppf "entrypoint@,";
559 Fmt.pf ppf "%a;@,@," pp_struct st
560 | Define { name; value } ->
561 if value < 0 then Fmt.pf ppf "#define %s (%d)@," name value
562 else Fmt.pf ppf "#define %s 0x%x@," name value
563 | Extern_fn { name; params; ret = Pack_typ ret } ->
564 Fmt.pf ppf "extern %a %s(%a);@,@," pp_typ ret name
565 Fmt.(list ~sep:comma pp_param)
566 params
567 | Extern_probe { init; name } ->
568 if init then Fmt.pf ppf "extern probe (INIT) %s;@,@," name
569 else Fmt.pf ppf "extern probe %s;@,@," name
570 | Enum_decl { name; cases; base = Pack_typ base } ->
571 Fmt.pf ppf "%a enum %s {@[<v 2>" pp_typ base name;
572 List.iteri
573 (fun i (cname, value) ->
574 if not (Int.equal i 0) then Fmt.string ppf ",";
575 Fmt.pf ppf "@,%s = %d" cname value)
576 cases;
577 Fmt.pf ppf "@]@,}@,@,"
578 | Casetype_decl { name; params; tag = Pack_typ _; cases } ->
579 (* First param is the switch discriminant *)
580 let disc_name =
581 match params with p :: _ -> p.param_name | [] -> "tag"
582 in
583 (* Internal name has underscore prefix, public name doesn't *)
584 let internal_name, public_name =
585 if String.length name > 0 && name.[0] = '_' then
586 (name, String.sub name 1 (String.length name - 1))
587 else ("_" ^ name, name)
588 in
589 Fmt.pf ppf "casetype %s%a {@[<v 2>@,switch (%s) {" internal_name pp_params
590 params disc_name;
591 List.iteri
592 (fun i (tag_opt, Pack_typ typ) ->
593 let field_name = Fmt.str "v%d" i in
594 match tag_opt with
595 | Some e ->
596 Fmt.pf ppf "@,case %a: %a %s;" pp_packed_expr e pp_typ typ
597 field_name
598 | None -> Fmt.pf ppf "@,default: %a %s;" pp_typ typ field_name)
599 cases;
600 Fmt.pf ppf "@,}@]@,} %s;@,@," public_name
601
602let pp_module ppf m =
603 Option.iter (Fmt.pf ppf "/*++ %s --*/@,@,") m.doc;
604 List.iter (pp_decl ppf) m.decls
605
606let to_3d m = Fmt.str "@[<v>%a@]" pp_module m
607
608let to_3d_file path m =
609 let oc = open_out path in
610 let ppf = Format.formatter_of_out_channel oc in
611 Fmt.pf ppf "@[<v>%a@]@." pp_module m;
612 close_out oc
613
614(* Binary parsing with bytesrw *)
615
616module Br = Bytesrw.Bytes.Reader
617module Bs = Bytesrw.Bytes.Slice
618
619type parse_error =
620 | Unexpected_eof of { expected : int; got : int }
621 | Constraint_failed of string
622 | Invalid_enum of { value : int; valid : int list }
623 | Invalid_tag of int
624 | All_zeros_failed of { offset : int }
625
626exception Parse_error of parse_error
627
628let raise_eof ~expected ~got =
629 raise (Parse_error (Unexpected_eof { expected; got }))
630
631let pp_parse_error ppf = function
632 | Unexpected_eof { expected; got } ->
633 Fmt.pf ppf "unexpected EOF: expected %d bytes, got %d" expected got
634 | Constraint_failed msg -> Fmt.pf ppf "constraint failed: %s" msg
635 | Invalid_enum { value; valid } ->
636 Fmt.pf ppf "invalid enum value %d, valid: [%a]" value
637 Fmt.(list ~sep:comma int)
638 valid
639 | Invalid_tag tag -> Fmt.pf ppf "invalid tag: %d" tag
640 | All_zeros_failed { offset } ->
641 Fmt.pf ppf "non-zero byte at offset %d" offset
642
643(* Parsing context - tracks field values for dependent types.
644
645 All field values are stored as [int] after conversion via [val_to_int].
646 This is sound because constraint expressions (the only consumers of
647 context values) operate on integers. *)
648module Ctx = Map.Make (String)
649
650type ctx = int Ctx.t
651
652let empty_ctx = Ctx.empty
653
654(* Convert a typed value to [int] for context storage. All types that
655 appear in constraint expressions are numeric, so this conversion is
656 lossless for practical schemas. Non-numeric types store 0. *)
657let rec val_to_int : type a. a typ -> a -> int =
658 fun typ v ->
659 match typ with
660 | Uint8 -> v
661 | Uint16 _ -> v
662 | Uint32 _ -> UInt32.to_int v
663 | Uint63 _ -> UInt63.to_int v
664 | Uint64 _ ->
665 (* Unsigned interpretation — values >= 2^62 don't fit in OCaml int,
666 return max_int so constraints [value <= K] fail correctly *)
667 Int64.unsigned_to_int v |> Option.value ~default:max_int
668 | Bits _ -> v
669 | Enum { base; _ } -> val_to_int base v
670 | Where { inner; _ } -> val_to_int inner v
671 | Single_elem { elem; _ } -> val_to_int elem v
672 | Apply { typ; _ } -> val_to_int typ v
673 | Map { inner; encode; _ } -> val_to_int inner (encode v)
674 | Unit | All_bytes | All_zeros | Array _ | Byte_array _ | Casetype _
675 | Struct _ | Type_ref _ | Qualified_ref _ ->
676 0
677
678let ctx_get ctx name =
679 match Ctx.find_opt name ctx with
680 | Some v -> v
681 | None -> failwith ("unbound field: " ^ name)
682
683(* Decoder state - tracks position within slices *)
684type decoder = {
685 reader : Br.t;
686 mutable slice : Bs.t;
687 mutable slice_pos : int;
688 mutable position : int;
689}
690
691let decoder reader = { reader; slice = Bs.eod; slice_pos = 0; position = 0 }
692
693let refill dec =
694 dec.slice <- Br.read dec.reader;
695 dec.slice_pos <- 0
696
697let slice_get_byte slice pos =
698 Bytes.get_uint8 (Bs.bytes slice) (Bs.first slice + pos)
699
700let read_byte dec =
701 if dec.slice_pos >= Bs.length dec.slice then begin
702 refill dec;
703 if Bs.is_eod dec.slice then None
704 else begin
705 let b = slice_get_byte dec.slice dec.slice_pos in
706 dec.slice_pos <- dec.slice_pos + 1;
707 dec.position <- dec.position + 1;
708 Some b
709 end
710 end
711 else begin
712 let b = slice_get_byte dec.slice dec.slice_pos in
713 dec.slice_pos <- dec.slice_pos + 1;
714 dec.position <- dec.position + 1;
715 Some b
716 end
717
718(* Read exactly n bytes *)
719let read_bytes dec n =
720 if n = 0 then Ok Bytes.empty
721 else
722 let buf = Bytes.create n in
723 let rec loop off remaining =
724 if remaining = 0 then Ok buf
725 else begin
726 (* Refill if needed *)
727 if dec.slice_pos >= Bs.length dec.slice then begin
728 refill dec;
729 if Bs.is_eod dec.slice then
730 Error (Unexpected_eof { expected = n; got = off })
731 else loop off remaining
732 end
733 else
734 let available = Bs.length dec.slice - dec.slice_pos in
735 let to_copy = min available remaining in
736 Bytes.blit (Bs.bytes dec.slice)
737 (Bs.first dec.slice + dec.slice_pos)
738 buf off to_copy;
739 dec.slice_pos <- dec.slice_pos + to_copy;
740 dec.position <- dec.position + to_copy;
741 loop (off + to_copy) (remaining - to_copy)
742 end
743 in
744 loop 0 n
745
746(* Read all remaining bytes *)
747let read_all dec =
748 let buf = Buffer.create 256 in
749 let rec loop () =
750 if dec.slice_pos >= Bs.length dec.slice then begin
751 refill dec;
752 if Bs.is_eod dec.slice then Buffer.contents buf else loop ()
753 end
754 else begin
755 let slice_bytes = Bs.bytes dec.slice in
756 let first = Bs.first dec.slice + dec.slice_pos in
757 let len = Bs.length dec.slice - dec.slice_pos in
758 Buffer.add_subbytes buf slice_bytes first len;
759 dec.position <- dec.position + len;
760 dec.slice_pos <- Bs.length dec.slice;
761 loop ()
762 end
763 in
764 loop ()
765
766(* Evaluate an expression in context *)
767let rec eval_expr : type a. ctx -> a expr -> a =
768 fun ctx expr ->
769 match expr with
770 | Int n -> n
771 | Int64 n -> n
772 | Bool b -> b
773 | Ref name -> ctx_get ctx name
774 | Sizeof _ -> 0 (* TODO: compute actual size *)
775 | Sizeof_this -> 0
776 | Field_pos -> 0
777 | Add (a, b) -> eval_expr ctx a + eval_expr ctx b
778 | Sub (a, b) -> eval_expr ctx a - eval_expr ctx b
779 | Mul (a, b) -> eval_expr ctx a * eval_expr ctx b
780 | Div (a, b) -> eval_expr ctx a / eval_expr ctx b
781 | Mod (a, b) -> eval_expr ctx a mod eval_expr ctx b
782 | Land (a, b) -> eval_expr ctx a land eval_expr ctx b
783 | Lor (a, b) -> eval_expr ctx a lor eval_expr ctx b
784 | Lxor (a, b) -> eval_expr ctx a lxor eval_expr ctx b
785 | Lnot a -> lnot (eval_expr ctx a)
786 | Lsl (a, b) -> eval_expr ctx a lsl eval_expr ctx b
787 | Lsr (a, b) -> eval_expr ctx a lsr eval_expr ctx b
788 | Eq (a, b) -> eval_expr ctx a = eval_expr ctx b
789 | Ne (a, b) -> eval_expr ctx a <> eval_expr ctx b
790 | Lt (a, b) -> eval_expr ctx a < eval_expr ctx b
791 | Le (a, b) -> eval_expr ctx a <= eval_expr ctx b
792 | Gt (a, b) -> eval_expr ctx a > eval_expr ctx b
793 | Ge (a, b) -> eval_expr ctx a >= eval_expr ctx b
794 | And (a, b) -> eval_expr ctx a && eval_expr ctx b
795 | Or (a, b) -> eval_expr ctx a || eval_expr ctx b
796 | Not a -> not (eval_expr ctx a)
797 | Cast (_, e) -> eval_expr ctx e (* TODO: proper casting *)
798
799(* Bitfield accumulator for packed struct parsing.
800 Consecutive bitfields sharing the same base type are packed together.
801 Bits are extracted from MSB to LSB (big-endian style) per EverParse 3D. *)
802type bf_accum = {
803 bf_base : bitfield_base;
804 bf_word : int;
805 bf_bits_used : int;
806 bf_total_bits : int;
807}
808
809let bf_total_bits = function BF_U8 -> 8 | BF_U16 _ -> 16 | BF_U32 _ -> 32
810let bf_base_size = function BF_U8 -> 1 | BF_U16 _ -> 2 | BF_U32 _ -> 4
811
812let bf_compatible base1 base2 =
813 match (base1, base2) with
814 | BF_U8, BF_U8 -> true
815 | BF_U16 e1, BF_U16 e2 -> e1 = e2
816 | BF_U32 e1, BF_U32 e2 -> e1 = e2
817 | _ -> false
818
819let bf_read_word dec base =
820 let size = bf_base_size base in
821 match read_bytes dec size with
822 | Error e -> Error e
823 | Ok buf ->
824 let v =
825 match base with
826 | BF_U8 -> Bytes.get_uint8 buf 0
827 | BF_U16 Little -> Bytes.get_uint16_le buf 0
828 | BF_U16 Big -> Bytes.get_uint16_be buf 0
829 | BF_U32 Little -> Int32.to_int (Bytes.get_int32_le buf 0)
830 | BF_U32 Big -> Int32.to_int (Bytes.get_int32_be buf 0)
831 in
832 Ok v
833
834(* Extract bits from accumulated word (MSB first, big-endian style) *)
835let bf_extract accum width =
836 let shift = accum.bf_total_bits - accum.bf_bits_used - width in
837 let mask = (1 lsl width) - 1 in
838 let value = (accum.bf_word lsr shift) land mask in
839 let new_accum = { accum with bf_bits_used = accum.bf_bits_used + width } in
840 (value, new_accum)
841
842(* Check if accumulator can provide more bits *)
843let bf_has_room accum width = accum.bf_bits_used + width <= accum.bf_total_bits
844
845(* Legacy parse_bits for standalone bitfield parsing (non-struct context) *)
846let parse_bits dec base width =
847 match bf_read_word dec base with
848 | Error e -> Error e
849 | Ok v -> Ok (v land ((1 lsl width) - 1))
850
851(* Helper: parse fixed-size integer from decoder *)
852let parse_int dec n get ctx =
853 match read_bytes dec n with
854 | Ok buf -> Ok (get buf 0, ctx)
855 | Error e -> Error e
856
857(** Read a bitfield from a decoder, reusing the accumulator when possible. *)
858let parse_bf_field dec (accum_opt : bf_accum option) (base : bitfield_base)
859 width : (int * bf_accum option, parse_error) result =
860 match accum_opt with
861 | Some accum when bf_compatible accum.bf_base base && bf_has_room accum width
862 ->
863 let v, new_accum = bf_extract accum width in
864 let accum_opt' =
865 if new_accum.bf_bits_used = new_accum.bf_total_bits then None
866 else Some new_accum
867 in
868 Ok (v, accum_opt')
869 | _ ->
870 let* word = bf_read_word dec base in
871 let total = bf_total_bits base in
872 let accum =
873 {
874 bf_base = base;
875 bf_word = word;
876 bf_bits_used = 0;
877 bf_total_bits = total;
878 }
879 in
880 let v, new_accum = bf_extract accum width in
881 let accum_opt' =
882 if new_accum.bf_bits_used = new_accum.bf_total_bits then None
883 else Some new_accum
884 in
885 Ok (v, accum_opt')
886
887(** Verify that all bytes in [s] are zero. *)
888let check_all_zeros s =
889 let rec go i =
890 if i >= String.length s then Ok s
891 else if s.[i] <> '\000' then Error (All_zeros_failed { offset = i })
892 else go (i + 1)
893 in
894 go 0
895
896(* Parse a type from a decoder *)
897let rec parse_with_ctx : type a.
898 ctx -> a typ -> decoder -> (a * ctx, parse_error) result =
899 fun ctx typ dec ->
900 match typ with
901 | Uint8 -> parse_int dec 1 Bytes.get_uint8 ctx
902 | Uint16 Little -> parse_int dec 2 Bytes.get_uint16_le ctx
903 | Uint16 Big -> parse_int dec 2 Bytes.get_uint16_be ctx
904 | Uint32 Little -> parse_int dec 4 UInt32.get_le ctx
905 | Uint32 Big -> parse_int dec 4 UInt32.get_be ctx
906 | Uint63 Little -> parse_int dec 8 UInt63.get_le ctx
907 | Uint63 Big -> parse_int dec 8 UInt63.get_be ctx
908 | Uint64 Little -> parse_int dec 8 Bytes.get_int64_le ctx
909 | Uint64 Big -> parse_int dec 8 Bytes.get_int64_be ctx
910 | Bits { width; base } -> (
911 match parse_bits dec base width with
912 | Ok v -> Ok (v, ctx)
913 | Error e -> Error e)
914 | Unit -> Ok ((), ctx)
915 | All_bytes -> Ok (read_all dec, ctx)
916 | All_zeros ->
917 let s = read_all dec in
918 check_all_zeros s |> Result.map (fun s -> (s, ctx))
919 | Where { cond; inner } -> (
920 match parse_with_ctx ctx inner dec with
921 | Ok (v, ctx') ->
922 if eval_expr ctx' cond then Ok (v, ctx')
923 else Error (Constraint_failed "where clause")
924 | Error e -> Error e)
925 | Array { len; elem } ->
926 let n = eval_expr ctx len in
927 let rec loop acc i ctx' =
928 if i >= n then Ok (List.rev acc, ctx')
929 else
930 match parse_with_ctx ctx' elem dec with
931 | Ok (v, ctx'') -> loop (v :: acc) (i + 1) ctx''
932 | Error e -> Error e
933 in
934 loop [] 0 ctx
935 | Byte_array { size } -> (
936 let n = eval_expr ctx size in
937 match read_bytes dec n with
938 | Ok buf -> Ok (Bytes.to_string buf, ctx)
939 | Error e -> Error e)
940 | Single_elem { size = _; elem; at_most = _ } -> parse_with_ctx ctx elem dec
941 | Enum { cases; base; _ } -> (
942 match parse_with_ctx ctx base dec with
943 | Ok (v, ctx') ->
944 let valid = List.map snd cases in
945 if List.mem v valid then Ok (v, ctx')
946 else Error (Invalid_enum { value = v; valid })
947 | Error e -> Error e)
948 | Casetype { cases; tag; _ } -> (
949 match parse_with_ctx ctx tag dec with
950 | Error e -> Error e
951 | Ok (tag_val, ctx') ->
952 let rec find_case = function
953 | [] -> Error (Invalid_tag (val_to_int tag tag_val))
954 | (Some expected, case_typ) :: rest ->
955 if expected = tag_val then parse_with_ctx ctx' case_typ dec
956 else find_case rest
957 | (None, case_typ) :: _ -> parse_with_ctx ctx' case_typ dec
958 in
959 find_case cases)
960 | Struct { fields; _ } ->
961 let rec go ctx' accum_opt = function
962 | [] -> Ok ((), ctx')
963 | Field { field_name; field_typ = Bits { width; base }; constraint_; _ }
964 :: rest -> (
965 let* v, accum_opt' = parse_bf_field dec accum_opt base width in
966 let ctx'' =
967 match field_name with Some n -> Ctx.add n v ctx' | None -> ctx'
968 in
969 match constraint_ with
970 | Some cond when not (eval_expr ctx'' cond) ->
971 Error (Constraint_failed "field constraint")
972 | _ -> go ctx'' accum_opt' rest)
973 | Field { field_name; field_typ; constraint_; _ } :: rest -> (
974 let* v, ctx'' = parse_with_ctx ctx' field_typ dec in
975 let ctx'' =
976 match field_name with
977 | Some n -> Ctx.add n (val_to_int field_typ v) ctx''
978 | None -> ctx''
979 in
980 match constraint_ with
981 | Some cond when not (eval_expr ctx'' cond) ->
982 Error (Constraint_failed "field constraint")
983 | _ -> go ctx'' None rest)
984 in
985 go ctx None fields
986 | Map { inner; decode; _ } ->
987 parse_with_ctx ctx inner dec
988 |> Result.map (fun (v, ctx') -> (decode v, ctx'))
989 | Type_ref _ -> failwith "type_ref requires a type registry"
990 | Qualified_ref _ -> failwith "qualified_ref requires a type registry"
991 | Apply _ -> failwith "apply requires a type registry"
992
993let parse typ reader =
994 let dec = decoder reader in
995 match parse_with_ctx empty_ctx typ dec with
996 | Ok (v, _) -> Ok v
997 | Error e -> Error e
998
999let parse_string typ s =
1000 let reader = Br.of_string s in
1001 parse typ reader
1002
1003let parse_bytes typ b =
1004 let reader = Br.of_bytes b in
1005 parse typ reader
1006
1007(* Binary encoding with Bytesrw.Bytes.Writer *)
1008
1009module Bw = Bytesrw.Bytes.Writer
1010
1011(* Encoder state *)
1012type encoder = { writer : Bw.t; buf : bytes }
1013
1014let encoder writer = { writer; buf = Bytes.create 8 }
1015
1016let write_slice enc len =
1017 let slice = Bs.make enc.buf ~first:0 ~length:len in
1018 Bw.write enc.writer slice
1019
1020let write_byte enc b =
1021 Bytes.set_uint8 enc.buf 0 b;
1022 write_slice enc 1
1023
1024let write_uint16_le enc v =
1025 Bytes.set_uint16_le enc.buf 0 v;
1026 write_slice enc 2
1027
1028let write_uint16_be enc v =
1029 Bytes.set_uint16_be enc.buf 0 v;
1030 write_slice enc 2
1031
1032let write_int32_le enc v =
1033 Bytes.set_int32_le enc.buf 0 v;
1034 write_slice enc 4
1035
1036let write_int32_be enc v =
1037 Bytes.set_int32_be enc.buf 0 v;
1038 write_slice enc 4
1039
1040let write_uint32_le enc v =
1041 UInt32.set_le enc.buf 0 v;
1042 write_slice enc 4
1043
1044let write_uint32_be enc v =
1045 UInt32.set_be enc.buf 0 v;
1046 write_slice enc 4
1047
1048let write_int64_le enc v =
1049 Bytes.set_int64_le enc.buf 0 v;
1050 write_slice enc 8
1051
1052let write_int64_be enc v =
1053 Bytes.set_int64_be enc.buf 0 v;
1054 write_slice enc 8
1055
1056let write_uint63_le enc v =
1057 UInt63.set_le enc.buf 0 v;
1058 write_slice enc 8
1059
1060let write_uint63_be enc v =
1061 UInt63.set_be enc.buf 0 v;
1062 write_slice enc 8
1063
1064let write_string enc s = Bw.write_string enc.writer s
1065
1066let rec encode_with_ctx : type a. ctx -> a typ -> a -> encoder -> ctx =
1067 fun ctx typ v enc ->
1068 match typ with
1069 | Uint8 ->
1070 write_byte enc v;
1071 ctx
1072 | Uint16 Little ->
1073 write_uint16_le enc v;
1074 ctx
1075 | Uint16 Big ->
1076 write_uint16_be enc v;
1077 ctx
1078 | Uint32 Little ->
1079 write_uint32_le enc v;
1080 ctx
1081 | Uint32 Big ->
1082 write_uint32_be enc v;
1083 ctx
1084 | Uint63 Little ->
1085 write_uint63_le enc v;
1086 ctx
1087 | Uint63 Big ->
1088 write_uint63_be enc v;
1089 ctx
1090 | Uint64 Little ->
1091 write_int64_le enc v;
1092 ctx
1093 | Uint64 Big ->
1094 write_int64_be enc v;
1095 ctx
1096 | Bits { width; base } ->
1097 let mask = (1 lsl width) - 1 in
1098 let masked = v land mask in
1099 (match base with
1100 | BF_U8 -> write_byte enc masked
1101 | BF_U16 Little -> write_uint16_le enc masked
1102 | BF_U16 Big -> write_uint16_be enc masked
1103 | BF_U32 Little -> write_int32_le enc (Int32.of_int masked)
1104 | BF_U32 Big -> write_int32_be enc (Int32.of_int masked));
1105 ctx
1106 | Unit -> ctx
1107 | All_bytes ->
1108 write_string enc v;
1109 ctx
1110 | All_zeros ->
1111 write_string enc v;
1112 ctx
1113 | Where { inner; _ } -> encode_with_ctx ctx inner v enc
1114 | Array { elem; _ } ->
1115 List.fold_left
1116 (fun ctx' elem_v -> encode_with_ctx ctx' elem elem_v enc)
1117 ctx v
1118 | Byte_array _ ->
1119 write_string enc v;
1120 ctx
1121 | Single_elem { elem; _ } -> encode_with_ctx ctx elem v enc
1122 | Enum { base; _ } -> encode_with_ctx ctx base v enc
1123 | Map { inner; encode; _ } -> encode_with_ctx ctx inner (encode v) enc
1124 | Casetype _ -> failwith "casetype encoding: use Record module"
1125 | Struct _ -> failwith "struct encoding: use Record module"
1126 | Type_ref _ -> failwith "type_ref requires a type registry"
1127 | Qualified_ref _ -> failwith "qualified_ref requires a type registry"
1128 | Apply _ -> failwith "apply requires a type registry"
1129
1130let encode typ v writer =
1131 let enc = encoder writer in
1132 ignore (encode_with_ctx empty_ctx typ v enc)
1133
1134let encode_to_bytes typ v =
1135 let buf = Buffer.create 64 in
1136 let writer = Bw.of_buffer buf in
1137 encode typ v writer;
1138 Buffer.to_bytes buf
1139
1140let encode_to_string typ v =
1141 let buf = Buffer.create 64 in
1142 let writer = Bw.of_buffer buf in
1143 encode typ v writer;
1144 Buffer.contents buf
1145
1146(* ==================== Typed Record DSL (ctypes-like) ==================== *)
1147
1148type ('a, 'r) field_codec = {
1149 name : string;
1150 constraint_ : bool expr option;
1151 typ : 'a typ;
1152 get : 'r -> 'a;
1153 set : 'a -> 'r -> 'r;
1154}
1155(** A field codec for field of type ['a] in record of type ['r] *)
1156
1157(** Specialized encode/decode functions built at codec construction time. This
1158 avoids interpretation overhead by generating direct operations on
1159 Bytesrw.Slice, similar to hand-written codecs but integrated with the
1160 streaming API. *)
1161
1162type 'r record_codec = {
1163 record_name : string;
1164 fields : 'r field_codec_packed list;
1165 default : 'r;
1166 wire_size : int option; (* Pre-computed fixed wire size, None if variable *)
1167}
1168(** A record codec for type ['r]. Contains only the schema description.
1169 Specialized encode/decode functions are built by
1170 [Record.encode]/[Record.decode]. *)
1171
1172and 'r field_codec_packed =
1173 | Field_codec : ('a, 'r) field_codec -> 'r field_codec_packed
1174
1175(** Compute wire size of a field codec's type *)
1176let rec field_wire_size : type a. a typ -> int option = function
1177 | Uint8 -> Some 1
1178 | Uint16 _ -> Some 2
1179 | Uint32 _ -> Some 4
1180 | Uint64 _ -> Some 8
1181 | Bits { base; _ } -> (
1182 match base with
1183 | BF_U8 -> Some 1
1184 | BF_U16 _ -> Some 2
1185 | BF_U32 _ -> Some 4)
1186 | Unit -> Some 0
1187 | Byte_array { size = Int n } -> Some n
1188 | Where { inner; _ } -> field_wire_size inner
1189 | Enum { base; _ } -> field_wire_size base
1190 | Map { inner; _ } -> field_wire_size inner
1191 | _ -> None
1192
1193(** Build a specialized field encoder: writes field value to bytes at offset.
1194 Returns the new offset. Works directly on the slice's underlying bytes. *)
1195let rec build_field_encoder : type a. a typ -> bytes -> int -> a -> int =
1196 fun typ ->
1197 match typ with
1198 | Uint8 ->
1199 fun buf off v ->
1200 Bytes.set_uint8 buf off v;
1201 off + 1
1202 | Uint16 Little ->
1203 fun buf off v ->
1204 Bytes.set_uint16_le buf off v;
1205 off + 2
1206 | Uint16 Big ->
1207 fun buf off v ->
1208 Bytes.set_uint16_be buf off v;
1209 off + 2
1210 | Uint32 Little ->
1211 fun buf off v ->
1212 UInt32.set_le buf off v;
1213 off + 4
1214 | Uint32 Big ->
1215 fun buf off v ->
1216 UInt32.set_be buf off v;
1217 off + 4
1218 | Uint63 Little ->
1219 fun buf off v ->
1220 UInt63.set_le buf off v;
1221 off + 8
1222 | Uint63 Big ->
1223 fun buf off v ->
1224 UInt63.set_be buf off v;
1225 off + 8
1226 | Uint64 Little ->
1227 fun buf off v ->
1228 Bytes.set_int64_le buf off v;
1229 off + 8
1230 | Uint64 Big ->
1231 fun buf off v ->
1232 Bytes.set_int64_be buf off v;
1233 off + 8
1234 | Byte_array { size = Int n } ->
1235 fun buf off v ->
1236 let len = min n (String.length v) in
1237 Bytes.blit_string v 0 buf off len;
1238 if len < n then Bytes.fill buf (off + len) (n - len) '\x00';
1239 off + n
1240 | Where { inner; _ } -> build_field_encoder inner
1241 | Enum { base; _ } -> build_field_encoder base
1242 | Map { inner; encode; _ } ->
1243 let enc = build_field_encoder inner in
1244 fun buf off v -> enc buf off (encode v)
1245 | Unit -> fun _buf off () -> off
1246 | _ ->
1247 (* Fallback for complex types - not specialized *)
1248 fun _buf _off _v -> failwith "build_field_encoder: unsupported type"
1249
1250(** Build a specialized field decoder: reads field value from slice's bytes.
1251 Takes the slice's bytes, first offset, and field offset within the record.
1252 Returns the value and new field offset. *)
1253let rec build_field_decoder : type a. a typ -> bytes -> int -> int -> a * int =
1254 fun typ ->
1255 match typ with
1256 | Uint8 -> fun buf base off -> (Bytes.get_uint8 buf (base + off), off + 1)
1257 | Uint16 Little ->
1258 fun buf base off -> (Bytes.get_uint16_le buf (base + off), off + 2)
1259 | Uint16 Big ->
1260 fun buf base off -> (Bytes.get_uint16_be buf (base + off), off + 2)
1261 | Uint32 Little ->
1262 fun buf base off -> (UInt32.get_le buf (base + off), off + 4)
1263 | Uint32 Big -> fun buf base off -> (UInt32.get_be buf (base + off), off + 4)
1264 | Uint63 Little ->
1265 fun buf base off -> (UInt63.get_le buf (base + off), off + 8)
1266 | Uint63 Big -> fun buf base off -> (UInt63.get_be buf (base + off), off + 8)
1267 | Uint64 Little ->
1268 fun buf base off -> (Bytes.get_int64_le buf (base + off), off + 8)
1269 | Uint64 Big ->
1270 fun buf base off -> (Bytes.get_int64_be buf (base + off), off + 8)
1271 | Byte_array { size = Int n } ->
1272 fun buf base off -> (Bytes.sub_string buf (base + off) n, off + n)
1273 | Where { inner; _ } -> build_field_decoder inner
1274 | Enum { base; _ } -> build_field_decoder base
1275 | Map { inner; decode; _ } ->
1276 let dec = build_field_decoder inner in
1277 fun buf base off ->
1278 let v, off' = dec buf base off in
1279 (decode v, off')
1280 | Unit -> fun _buf _base off -> ((), off)
1281 | _ ->
1282 (* Fallback for complex types *)
1283 fun _buf _base _off -> failwith "build_field_decoder: unsupported type"
1284
1285(** Build a mutable-offset field decoder (avoids tuple allocation). Takes bytes,
1286 base, mutable offset ref. Returns value, mutates offset. *)
1287let rec build_field_decoder_mut : type a. a typ -> bytes -> int -> int ref -> a
1288 =
1289 fun typ ->
1290 match typ with
1291 | Uint8 ->
1292 fun buf base off ->
1293 let v = Bytes.get_uint8 buf (base + !off) in
1294 off := !off + 1;
1295 v
1296 | Uint16 Little ->
1297 fun buf base off ->
1298 let v = Bytes.get_uint16_le buf (base + !off) in
1299 off := !off + 2;
1300 v
1301 | Uint16 Big ->
1302 fun buf base off ->
1303 let v = Bytes.get_uint16_be buf (base + !off) in
1304 off := !off + 2;
1305 v
1306 | Uint32 Little ->
1307 fun buf base off ->
1308 let v = UInt32.get_le buf (base + !off) in
1309 off := !off + 4;
1310 v
1311 | Uint32 Big ->
1312 fun buf base off ->
1313 let v = UInt32.get_be buf (base + !off) in
1314 off := !off + 4;
1315 v
1316 | Uint63 Little ->
1317 fun buf base off ->
1318 let v = UInt63.get_le buf (base + !off) in
1319 off := !off + 8;
1320 v
1321 | Uint63 Big ->
1322 fun buf base off ->
1323 let v = UInt63.get_be buf (base + !off) in
1324 off := !off + 8;
1325 v
1326 | Uint64 Little ->
1327 fun buf base off ->
1328 let v = Bytes.get_int64_le buf (base + !off) in
1329 off := !off + 8;
1330 v
1331 | Uint64 Big ->
1332 fun buf base off ->
1333 let v = Bytes.get_int64_be buf (base + !off) in
1334 off := !off + 8;
1335 v
1336 | Byte_array { size = Int n } ->
1337 fun buf base off ->
1338 let v = Bytes.sub_string buf (base + !off) n in
1339 off := !off + n;
1340 v
1341 | Where { inner; _ } -> build_field_decoder_mut inner
1342 | Enum { base; _ } -> build_field_decoder_mut base
1343 | Map { inner; decode; _ } ->
1344 let dec = build_field_decoder_mut inner in
1345 fun buf base off -> decode (dec buf base off)
1346 | Unit -> fun _buf _base _off -> ()
1347 | _ ->
1348 fun _buf _base _off ->
1349 failwith "build_field_decoder_mut: unsupported type"
1350
1351(** CPS-style field decoder: threads constructor through decode chain. This is
1352 the repr pattern that avoids intermediate record allocations. Type: bytes ->
1353 int -> int ref -> ('a -> 'b) -> 'b *)
1354let rec build_field_decoder_cps : type a.
1355 a typ -> bytes -> int -> int ref -> (a -> 'k) -> 'k =
1356 fun typ ->
1357 match typ with
1358 | Uint8 ->
1359 fun buf base off k ->
1360 let v = Bytes.get_uint8 buf (base + !off) in
1361 off := !off + 1;
1362 k v
1363 | Uint16 Little ->
1364 fun buf base off k ->
1365 let v = Bytes.get_uint16_le buf (base + !off) in
1366 off := !off + 2;
1367 k v
1368 | Uint16 Big ->
1369 fun buf base off k ->
1370 let v = Bytes.get_uint16_be buf (base + !off) in
1371 off := !off + 2;
1372 k v
1373 | Uint32 Little ->
1374 fun buf base off k ->
1375 let v = UInt32.get_le buf (base + !off) in
1376 off := !off + 4;
1377 k v
1378 | Uint32 Big ->
1379 fun buf base off k ->
1380 let v = UInt32.get_be buf (base + !off) in
1381 off := !off + 4;
1382 k v
1383 | Uint63 Little ->
1384 fun buf base off k ->
1385 let v = UInt63.get_le buf (base + !off) in
1386 off := !off + 8;
1387 k v
1388 | Uint63 Big ->
1389 fun buf base off k ->
1390 let v = UInt63.get_be buf (base + !off) in
1391 off := !off + 8;
1392 k v
1393 | Uint64 Little ->
1394 fun buf base off k ->
1395 let v = Bytes.get_int64_le buf (base + !off) in
1396 off := !off + 8;
1397 k v
1398 | Uint64 Big ->
1399 fun buf base off k ->
1400 let v = Bytes.get_int64_be buf (base + !off) in
1401 off := !off + 8;
1402 k v
1403 | Byte_array { size = Int n } ->
1404 fun buf base off k ->
1405 let v = Bytes.sub_string buf (base + !off) n in
1406 off := !off + n;
1407 k v
1408 | Where { inner; _ } -> build_field_decoder_cps inner
1409 | Enum { base; _ } -> build_field_decoder_cps base
1410 | Map { inner; decode; _ } ->
1411 let dec = build_field_decoder_cps inner in
1412 fun buf base off k -> dec buf base off (fun v -> k (decode v))
1413 | Unit -> fun _buf _base _off k -> k ()
1414 | _ ->
1415 fun _buf _base _off _k ->
1416 failwith "build_field_decoder_cps: unsupported type"
1417
1418(** Create a field codec *)
1419let field_codec name ?constraint_ typ ~get ~set =
1420 { name; constraint_; typ; get; set }
1421
1422(** Create a record codec schema. This is just data - no specialization yet.
1423 Specialization happens when you call [Record.encode]/[Record.decode]. *)
1424let record_codec name ~default fields =
1425 let wire_size =
1426 List.fold_left
1427 (fun acc (Field_codec fc) ->
1428 match (acc, field_wire_size fc.typ) with
1429 | Some a, Some b -> Some (a + b)
1430 | _ -> None)
1431 (Some 0) fields
1432 in
1433 { record_name = name; fields; default; wire_size }
1434
1435(** [record_wire_size codec] returns the fixed wire size of the codec, or [None]
1436 if the codec has variable-length fields. Callers should check buffer length
1437 before calling decode to avoid index-out-of-bounds errors. *)
1438let record_wire_size codec = codec.wire_size
1439
1440(** Pack a field codec for storage in record *)
1441let pack_field fc = Field_codec fc
1442
1443(* Bitfield encoder accumulator *)
1444type bf_enc_accum = {
1445 bfe_base : bitfield_base;
1446 bfe_word : int;
1447 bfe_bits_used : int;
1448 bfe_total_bits : int;
1449}
1450
1451let bf_write_word enc base word =
1452 match base with
1453 | BF_U8 -> write_byte enc word
1454 | BF_U16 Little -> write_uint16_le enc word
1455 | BF_U16 Big -> write_uint16_be enc word
1456 | BF_U32 Little -> write_int32_le enc (Int32.of_int word)
1457 | BF_U32 Big -> write_int32_be enc (Int32.of_int word)
1458
1459(* Insert bits into accumulator at current position (MSB first) *)
1460let bf_insert accum width value =
1461 let shift = accum.bfe_total_bits - accum.bfe_bits_used - width in
1462 let mask = (1 lsl width) - 1 in
1463 let masked = value land mask in
1464 let word' = accum.bfe_word lor (masked lsl shift) in
1465 { accum with bfe_word = word'; bfe_bits_used = accum.bfe_bits_used + width }
1466
1467(** Insert a bitfield value into the accumulator, flushing and resetting if the
1468 base type changed or there is no room. *)
1469let encode_bf_accum enc flush_accum accum_opt base width field_val =
1470 let accum_opt' =
1471 match accum_opt with
1472 | Some accum
1473 when bf_compatible accum.bfe_base base
1474 && accum.bfe_bits_used + width <= accum.bfe_total_bits ->
1475 Some (bf_insert accum width field_val)
1476 | _ ->
1477 flush_accum accum_opt;
1478 let total = bf_total_bits base in
1479 let accum =
1480 {
1481 bfe_base = base;
1482 bfe_word = 0;
1483 bfe_bits_used = 0;
1484 bfe_total_bits = total;
1485 }
1486 in
1487 Some (bf_insert accum width field_val)
1488 in
1489 (* Flush accumulator if full *)
1490 match accum_opt' with
1491 | Some a when a.bfe_bits_used = a.bfe_total_bits ->
1492 bf_write_word enc a.bfe_base a.bfe_word;
1493 None
1494 | other -> other
1495
1496(** Encode a record value to a writer with bitfield packing *)
1497let encode_record : type r.
1498 r record_codec -> r -> Bw.t -> (unit, parse_error) result =
1499 fun codec v writer ->
1500 let enc = encoder writer in
1501 let flush_accum = function
1502 | None -> ()
1503 | Some accum -> bf_write_word enc accum.bfe_base accum.bfe_word
1504 in
1505 let rec encode_fields (ctx : int Ctx.t) accum_opt = function
1506 | [] ->
1507 flush_accum accum_opt;
1508 Ok ()
1509 | Field_codec fc :: rest -> (
1510 let field_val = fc.get v in
1511 match fc.typ with
1512 | Bits { width; base } -> (
1513 let accum_opt' =
1514 encode_bf_accum enc flush_accum accum_opt base width field_val
1515 in
1516 let ctx' = Ctx.add fc.name field_val ctx in
1517 match fc.constraint_ with
1518 | Some cond when not (eval_expr ctx' cond) ->
1519 Error (Constraint_failed "field constraint")
1520 | _ -> encode_fields ctx' accum_opt' rest)
1521 | _ -> (
1522 flush_accum accum_opt;
1523 let ctx' = encode_with_ctx ctx fc.typ field_val enc in
1524 let ctx'' = Ctx.add fc.name (val_to_int fc.typ field_val) ctx' in
1525 match fc.constraint_ with
1526 | Some cond when not (eval_expr ctx'' cond) ->
1527 Error (Constraint_failed "field constraint")
1528 | _ -> encode_fields ctx'' None rest))
1529 in
1530 encode_fields empty_ctx None codec.fields
1531
1532(** Build a staged record encoder: returns a slice with encoded data. Following
1533 repr's pattern: iteration over fields happens once at staging time, building
1534 closures that are fast to execute.
1535
1536 WARNING: The returned slice's underlying buffer may be reused between calls.
1537 Copy the slice data before the next encode if you need to keep it. *)
1538let encode_record_to_slice : type r. r record_codec -> (r -> Bs.t) Staged.t =
1539 fun codec ->
1540 match codec.wire_size with
1541 | Some wire_size ->
1542 (* Build field encoders at staging time - this is the repr pattern *)
1543 let buf = Bytes.create wire_size in
1544 let field_encoders =
1545 List.filter_map
1546 (fun (Field_codec fc) ->
1547 match field_wire_size fc.typ with
1548 | Some _ ->
1549 let encoder = build_field_encoder fc.typ in
1550 Some (fun b off v -> encoder b off (fc.get v))
1551 | None -> None)
1552 codec.fields
1553 in
1554 if List.length field_encoders <> List.length codec.fields then
1555 (* Not all fields can be specialized - return empty slice *)
1556 Staged.stage (fun _v -> Bs.eod)
1557 else
1558 (* All fields specialized - this closure captures pre-built encoders *)
1559 Staged.stage (fun v ->
1560 let _ =
1561 List.fold_left (fun off enc -> enc buf off v) 0 field_encoders
1562 in
1563 Bs.make buf ~first:0 ~length:wire_size)
1564 | None ->
1565 (* Variable-size: can't return slice *)
1566 Staged.stage (fun _v -> Bs.eod)
1567
1568(** Build a staged record decoder: reads from a slice. Following repr's pattern:
1569 iteration over fields happens once at staging time, building closures that
1570 are fast to execute. *)
1571let decode_record_from_slice : type r. r record_codec -> (Bs.t -> r) Staged.t =
1572 fun codec ->
1573 (* Build field decoders using mutable offset to avoid tuple allocation *)
1574 let field_decoders =
1575 List.filter_map
1576 (fun (Field_codec fc) ->
1577 match field_wire_size fc.typ with
1578 | Some _ ->
1579 let decoder = build_field_decoder_mut fc.typ in
1580 let set = fc.set in
1581 Some (fun buf base off acc -> set (decoder buf base off) acc)
1582 | None -> None)
1583 codec.fields
1584 in
1585 let default = codec.default in
1586 if List.length field_decoders <> List.length codec.fields then
1587 Staged.stage (fun _slice -> default)
1588 else
1589 (* Convert to array for faster iteration *)
1590 let decoders = Array.of_list field_decoders in
1591 let n = Array.length decoders in
1592 Staged.stage (fun slice ->
1593 let buf = Bs.bytes slice in
1594 let base = Bs.first slice in
1595 let off = Stdlib.ref 0 in
1596 let acc = Stdlib.ref default in
1597 for i = 0 to n - 1 do
1598 acc := decoders.(i) buf base off !acc
1599 done;
1600 !acc)
1601
1602(** {2 Zero-allocation encode/decode}
1603
1604 These functions provide direct bytes access without slice allocation. Use
1605 when you need maximum performance and can manage buffer lifetime. *)
1606
1607type 'r encode_context = {
1608 buffer : bytes;
1609 wire_size : int;
1610 encode : 'r -> unit;
1611}
1612(** Context for zero-allocation encoding. The [buffer] is shared and reused
1613 between calls to [encode]. Copy the bytes before the next [encode] if you
1614 need to keep them. *)
1615
1616let encode_record_to_bytes : type r. r record_codec -> r encode_context option =
1617 fun codec ->
1618 match codec.wire_size with
1619 | Some wire_size ->
1620 let buf = Bytes.create wire_size in
1621 (* Build field encoders at staging time - each captures its getter *)
1622 let field_encoders =
1623 List.filter_map
1624 (fun (Field_codec fc) ->
1625 match field_wire_size fc.typ with
1626 | Some _ ->
1627 let encoder = build_field_encoder fc.typ in
1628 let get = fc.get in
1629 (* This function is built once, captures encoder and get *)
1630 Some (fun v b off -> encoder b off (get v))
1631 | None -> None)
1632 codec.fields
1633 in
1634 if List.length field_encoders <> List.length codec.fields then None
1635 else
1636 (* Convert to array for faster iteration without closure allocation *)
1637 let encoders = Array.of_list field_encoders in
1638 let n = Array.length encoders in
1639 let encode v =
1640 let off = Stdlib.ref 0 in
1641 for i = 0 to n - 1 do
1642 off := encoders.(i) v buf !off
1643 done
1644 in
1645 Some { buffer = buf; wire_size; encode }
1646 | None -> None
1647
1648(** Build a direct field reader that reads at a fixed offset. No tuples, no refs
1649 \- just pure value read. *)
1650let rec build_field_reader : type a. a typ -> int -> bytes -> int -> a =
1651 fun typ field_off ->
1652 match typ with
1653 | Uint8 -> fun buf base -> Bytes.get_uint8 buf (base + field_off)
1654 | Uint16 Little -> fun buf base -> Bytes.get_uint16_le buf (base + field_off)
1655 | Uint16 Big -> fun buf base -> Bytes.get_uint16_be buf (base + field_off)
1656 | Uint32 Little -> fun buf base -> UInt32.get_le buf (base + field_off)
1657 | Uint32 Big -> fun buf base -> UInt32.get_be buf (base + field_off)
1658 | Uint63 Little -> fun buf base -> UInt63.get_le buf (base + field_off)
1659 | Uint63 Big -> fun buf base -> UInt63.get_be buf (base + field_off)
1660 | Uint64 Little -> fun buf base -> Bytes.get_int64_le buf (base + field_off)
1661 | Uint64 Big -> fun buf base -> Bytes.get_int64_be buf (base + field_off)
1662 | Byte_array { size = Int n } ->
1663 fun buf base -> Bytes.sub_string buf (base + field_off) n
1664 | Where { inner; _ } -> build_field_reader inner field_off
1665 | Enum { base; _ } -> build_field_reader base field_off
1666 | Map { inner; decode; _ } ->
1667 let read = build_field_reader inner field_off in
1668 fun buf base -> decode (read buf base)
1669 | Unit -> fun _buf _base -> ()
1670 | _ -> fun _buf _base -> failwith "build_field_reader: unsupported type"
1671
1672let decode_record_from_bytes : type r.
1673 r record_codec -> (bytes -> int -> r) Staged.t =
1674 fun codec ->
1675 (* Precompute field offsets and build readers at staging time *)
1676 let field_info =
1677 let current_off = Stdlib.ref 0 in
1678 List.filter_map
1679 (fun (Field_codec fc) ->
1680 match field_wire_size fc.typ with
1681 | Some size ->
1682 let off = !current_off in
1683 current_off := !current_off + size;
1684 (* Build a direct reader at this fixed offset *)
1685 let reader = build_field_reader fc.typ off in
1686 let set = fc.set in
1687 Some (fun buf base acc -> set (reader buf base) acc)
1688 | None -> None)
1689 codec.fields
1690 in
1691 let default = codec.default in
1692 if List.length field_info <> List.length codec.fields then
1693 Staged.stage (fun _buf _base -> default)
1694 else
1695 (* Convert to array for iteration *)
1696 let decoders = Array.of_list field_info in
1697 let n = Array.length decoders in
1698 Staged.stage (fun buf base ->
1699 let acc = Stdlib.ref default in
1700 for i = 0 to n - 1 do
1701 acc := decoders.(i) buf base !acc
1702 done;
1703 !acc)
1704
1705(** {2 Zero-alloc decode with explicit types}
1706
1707 For truly zero intermediate allocations, use these typed decode functions
1708 that build readers at staging time and call a make function directly. Only
1709 the final record is allocated - no intermediate records or refs. *)
1710
1711(** Build zero-alloc decoder for 1-field record *)
1712let decode_make1 : type a1 r.
1713 a1 typ -> make:(a1 -> r) -> (bytes -> int -> r) Staged.t =
1714 fun t1 ~make ->
1715 let read1 = build_field_reader t1 0 in
1716 Staged.stage (fun buf base -> make (read1 buf base))
1717
1718(** Build zero-alloc decoder for 2-field record *)
1719let decode_make2 : type a1 a2 r.
1720 a1 typ -> a2 typ -> make:(a1 -> a2 -> r) -> (bytes -> int -> r) Staged.t =
1721 fun t1 t2 ~make ->
1722 match (field_wire_size t1, field_wire_size t2) with
1723 | Some s1, Some _ ->
1724 let read1 = build_field_reader t1 0 in
1725 let read2 = build_field_reader t2 s1 in
1726 Staged.stage (fun buf base -> make (read1 buf base) (read2 buf base))
1727 | _ -> failwith "decode_make2: variable-size fields not supported"
1728
1729(** Build zero-alloc decoder for 3-field record *)
1730let decode_make3 : type a1 a2 a3 r.
1731 a1 typ ->
1732 a2 typ ->
1733 a3 typ ->
1734 make:(a1 -> a2 -> a3 -> r) ->
1735 (bytes -> int -> r) Staged.t =
1736 fun t1 t2 t3 ~make ->
1737 match (field_wire_size t1, field_wire_size t2, field_wire_size t3) with
1738 | Some s1, Some s2, Some _ ->
1739 let read1 = build_field_reader t1 0 in
1740 let read2 = build_field_reader t2 s1 in
1741 let read3 = build_field_reader t3 (s1 + s2) in
1742 Staged.stage (fun buf base ->
1743 make (read1 buf base) (read2 buf base) (read3 buf base))
1744 | _ -> failwith "decode_make3: variable-size fields not supported"
1745
1746(** Build zero-alloc decoder for 4-field record *)
1747let decode_make4 : type a1 a2 a3 a4 r.
1748 a1 typ ->
1749 a2 typ ->
1750 a3 typ ->
1751 a4 typ ->
1752 make:(a1 -> a2 -> a3 -> a4 -> r) ->
1753 (bytes -> int -> r) Staged.t =
1754 fun t1 t2 t3 t4 ~make ->
1755 match
1756 ( field_wire_size t1,
1757 field_wire_size t2,
1758 field_wire_size t3,
1759 field_wire_size t4 )
1760 with
1761 | Some s1, Some s2, Some s3, Some _ ->
1762 let read1 = build_field_reader t1 0 in
1763 let read2 = build_field_reader t2 s1 in
1764 let read3 = build_field_reader t3 (s1 + s2) in
1765 let read4 = build_field_reader t4 (s1 + s2 + s3) in
1766 Staged.stage (fun buf base ->
1767 make (read1 buf base) (read2 buf base) (read3 buf base)
1768 (read4 buf base))
1769 | _ -> failwith "decode_make4: variable-size fields not supported"
1770
1771(** {2 Bounds-checked decode with exceptions}
1772
1773 Same as decode_make* but with bounds checking that raises Parse_error. *)
1774
1775(** Build bounds-checked decoder for 1-field record *)
1776let decode_make1_exn : type a1 r.
1777 a1 typ -> make:(a1 -> r) -> (bytes -> int -> r) Staged.t =
1778 fun t1 ~make ->
1779 match field_wire_size t1 with
1780 | Some total_size ->
1781 let read1 = build_field_reader t1 0 in
1782 Staged.stage (fun buf base ->
1783 let len = Bytes.length buf in
1784 if base + total_size > len then
1785 raise_eof ~expected:total_size ~got:(len - base);
1786 make (read1 buf base))
1787 | None -> failwith "decode_make1_exn: variable-size fields not supported"
1788
1789(** Build bounds-checked decoder for 2-field record *)
1790let decode_make2_exn : type a1 a2 r.
1791 a1 typ -> a2 typ -> make:(a1 -> a2 -> r) -> (bytes -> int -> r) Staged.t =
1792 fun t1 t2 ~make ->
1793 match (field_wire_size t1, field_wire_size t2) with
1794 | Some s1, Some s2 ->
1795 let total_size = s1 + s2 in
1796 let read1 = build_field_reader t1 0 in
1797 let read2 = build_field_reader t2 s1 in
1798 Staged.stage (fun buf base ->
1799 let len = Bytes.length buf in
1800 if base + total_size > len then
1801 raise_eof ~expected:total_size ~got:(len - base);
1802 make (read1 buf base) (read2 buf base))
1803 | _ -> failwith "decode_make2_exn: variable-size fields not supported"
1804
1805(** Build bounds-checked decoder for 3-field record *)
1806let decode_make3_exn : type a1 a2 a3 r.
1807 a1 typ ->
1808 a2 typ ->
1809 a3 typ ->
1810 make:(a1 -> a2 -> a3 -> r) ->
1811 (bytes -> int -> r) Staged.t =
1812 fun t1 t2 t3 ~make ->
1813 match (field_wire_size t1, field_wire_size t2, field_wire_size t3) with
1814 | Some s1, Some s2, Some s3 ->
1815 let total_size = s1 + s2 + s3 in
1816 let read1 = build_field_reader t1 0 in
1817 let read2 = build_field_reader t2 s1 in
1818 let read3 = build_field_reader t3 (s1 + s2) in
1819 Staged.stage (fun buf base ->
1820 let len = Bytes.length buf in
1821 if base + total_size > len then
1822 raise_eof ~expected:total_size ~got:(len - base);
1823 make (read1 buf base) (read2 buf base) (read3 buf base))
1824 | _ -> failwith "decode_make3_exn: variable-size fields not supported"
1825
1826(** Build bounds-checked decoder for 4-field record *)
1827let decode_make4_exn : type a1 a2 a3 a4 r.
1828 a1 typ ->
1829 a2 typ ->
1830 a3 typ ->
1831 a4 typ ->
1832 make:(a1 -> a2 -> a3 -> a4 -> r) ->
1833 (bytes -> int -> r) Staged.t =
1834 fun t1 t2 t3 t4 ~make ->
1835 match
1836 ( field_wire_size t1,
1837 field_wire_size t2,
1838 field_wire_size t3,
1839 field_wire_size t4 )
1840 with
1841 | Some s1, Some s2, Some s3, Some s4 ->
1842 let total_size = s1 + s2 + s3 + s4 in
1843 let read1 = build_field_reader t1 0 in
1844 let read2 = build_field_reader t2 s1 in
1845 let read3 = build_field_reader t3 (s1 + s2) in
1846 let read4 = build_field_reader t4 (s1 + s2 + s3) in
1847 Staged.stage (fun buf base ->
1848 let len = Bytes.length buf in
1849 if base + total_size > len then
1850 raise_eof ~expected:total_size ~got:(len - base);
1851 make (read1 buf base) (read2 buf base) (read3 buf base)
1852 (read4 buf base))
1853 | _ -> failwith "decode_make4_exn: variable-size fields not supported"
1854
1855(** Convert record codec to struct_ for 3D generation *)
1856let record_to_struct codec =
1857 let fields =
1858 List.map
1859 (fun (Field_codec fc) -> field fc.name ?constraint_:fc.constraint_ fc.typ)
1860 codec.fields
1861 in
1862 struct_ codec.record_name fields
1863
1864(* ==================== EverParse FFI Helpers ==================== *)
1865
1866(* NOTE: wire does NOT generate C parsing code. C parsers come from EverParse.
1867 This section provides helpers for generating OCaml FFI stubs that call
1868 EverParse-generated C code.
1869
1870 Workflow:
1871 1. Define schema in OCaml using wire
1872 2. Generate .3d file with to_3d
1873 3. Run EverParse to generate C parser (.h with struct + read/write)
1874 4. Use to_c_stubs to generate OCaml FFI bindings to call EverParse C *)
1875
1876(** Compute the fixed wire size of a struct (None if variable-length) *)
1877let rec size_of_typ : type a. a typ -> int option = function
1878 | Uint8 -> Some 1
1879 | Uint16 _ -> Some 2
1880 | Uint32 _ -> Some 4
1881 | Uint64 _ -> Some 8
1882 | Byte_array { size = Int n } -> Some n
1883 | Enum { base; _ } -> wire_size_of_int_typ base
1884 | Where { inner; _ } -> size_of_typ inner
1885 | Map { inner; _ } -> size_of_typ inner
1886 | _ -> None
1887
1888and wire_size_of_int_typ : int typ -> int option = function
1889 | Uint8 -> Some 1
1890 | Uint16 _ -> Some 2
1891 | Enum { base; _ } -> wire_size_of_int_typ base
1892 | Where { inner; _ } -> size_base inner
1893 | _ -> None
1894
1895and size_base : type a. a typ -> int option = function
1896 | Uint8 -> Some 1
1897 | Uint16 _ -> Some 2
1898 | _ -> None
1899
1900let size_of_struct (s : struct_) =
1901 List.fold_left
1902 (fun acc (Field f) ->
1903 match (acc, size_of_typ f.field_typ) with
1904 | Some a, Some b -> Some (a + b)
1905 | _ -> None)
1906 (Some 0) s.fields
1907
1908(** OCaml type name for a wire type (for generated external declarations). *)
1909let rec ml_type_of : type a. a typ -> string = function
1910 | Uint8 -> "int"
1911 | Uint16 _ -> "int"
1912 | Uint32 _ -> "int32"
1913 | Uint64 _ -> "int64"
1914 | Enum { base; _ } -> ml_type_of_int base
1915 | Where { inner; _ } -> ml_type_of inner
1916 | Map { inner; _ } -> ml_type_of inner
1917 | _ -> failwith "ml_type_of: unsupported type"
1918
1919and ml_type_of_int : int typ -> string = function
1920 | Uint8 -> "int"
1921 | Uint16 _ -> "int"
1922 | Enum { base; _ } -> ml_type_of_int base
1923 | Where { inner; _ } -> ml_type_of_int inner
1924 | Map { inner; _ } -> ml_type_of inner
1925 | _ -> failwith "ml_type_of_int: unsupported type"
1926
1927(** C expression to store a C struct field into an OCaml value. *)
1928let c_to_ml : type a. a typ -> string -> string =
1929 fun typ c_expr ->
1930 match ml_type_of typ with
1931 | "int" -> Fmt.str "Val_int(%s)" c_expr
1932 | "int32" -> Fmt.str "caml_copy_int32(%s)" c_expr
1933 | "int64" -> Fmt.str "caml_copy_int64(%s)" c_expr
1934 | _ -> failwith "c_to_ml: unsupported type"
1935
1936(** C expression to extract an OCaml value into a C value. *)
1937let ml_to_c : type a. a typ -> string -> string =
1938 fun typ ml_expr ->
1939 match ml_type_of typ with
1940 | "int" -> Fmt.str "Int_val(%s)" ml_expr
1941 | "int32" -> Fmt.str "Int32_val(%s)" ml_expr
1942 | "int64" -> Fmt.str "Int64_val(%s)" ml_expr
1943 | _ -> failwith "ml_to_c: unsupported type"
1944
1945(** Does this type require a boxed OCaml allocation (int32/int64)? *)
1946let is_boxed : type a. a typ -> bool =
1947 fun typ -> match ml_type_of typ with "int32" | "int64" -> true | _ -> false
1948
1949(** Named fields with existential type hidden. *)
1950type named_field = Named : string * 'a typ -> named_field
1951
1952let named_fields (s : struct_) =
1953 List.filter_map
1954 (fun (Field f) ->
1955 match f.field_name with
1956 | Some name -> Some (Named (name, f.field_typ))
1957 | None -> None)
1958 s.fields
1959
1960(** Generate C read stub: [string -> (t1 * t2 * ...) option]. Calls
1961 EverParse-generated [Name_read] function. *)
1962let c_stub_read ppf (s : struct_) fields =
1963 let n = List.length fields in
1964 let has_boxed = List.exists (fun (Named (_, typ)) -> is_boxed typ) fields in
1965 Fmt.pf ppf "CAMLprim value caml_wire_%s_read(value v_buf) {@\n" s.name;
1966 Fmt.pf ppf " CAMLparam1(v_buf);@\n";
1967 if has_boxed then Fmt.pf ppf " CAMLlocal3(v_some, v_tuple, v_tmp);@\n"
1968 else Fmt.pf ppf " CAMLlocal2(v_some, v_tuple);@\n";
1969 Fmt.pf ppf " const uint8_t *buf = (const uint8_t *)String_val(v_buf);@\n";
1970 Fmt.pf ppf " uint32_t len = caml_string_length(v_buf);@\n";
1971 Fmt.pf ppf " %s val;@\n" s.name;
1972 Fmt.pf ppf " int32_t rc = %s_read(buf, len, &val);@\n" s.name;
1973 Fmt.pf ppf " if (rc <= 0) { CAMLreturn(Val_none); }@\n";
1974 Fmt.pf ppf " v_tuple = caml_alloc_tuple(%d);@\n" n;
1975 List.iteri
1976 (fun i (Named (name, typ)) ->
1977 if is_boxed typ then begin
1978 Fmt.pf ppf " v_tmp = %s;@\n" (c_to_ml typ ("val." ^ name));
1979 Fmt.pf ppf " Store_field(v_tuple, %d, v_tmp);@\n" i
1980 end
1981 else
1982 Fmt.pf ppf " Store_field(v_tuple, %d, %s);@\n" i
1983 (c_to_ml typ ("val." ^ name)))
1984 fields;
1985 Fmt.pf ppf " v_some = caml_alloc(1, 0);@\n";
1986 Fmt.pf ppf " Store_field(v_some, 0, v_tuple);@\n";
1987 Fmt.pf ppf " CAMLreturn(v_some);@\n";
1988 Fmt.pf ppf "}@\n@\n"
1989
1990(** Generate C write stub: [(t1 * t2 * ...) -> string option]. Calls
1991 EverParse-generated [Name_write] function. *)
1992let c_stub_write ppf (s : struct_) fields =
1993 let sz = match size_of_struct s with Some n -> n | None -> 4096 in
1994 Fmt.pf ppf "CAMLprim value caml_wire_%s_write(value v_tuple) {@\n" s.name;
1995 Fmt.pf ppf " CAMLparam1(v_tuple);@\n";
1996 Fmt.pf ppf " CAMLlocal2(v_some, v_str);@\n";
1997 Fmt.pf ppf " %s val;@\n" s.name;
1998 List.iteri
1999 (fun i (Named (name, typ)) ->
2000 Fmt.pf ppf " val.%s = %s;@\n" name
2001 (ml_to_c typ (Fmt.str "Field(v_tuple, %d)" i)))
2002 fields;
2003 Fmt.pf ppf " uint8_t out[%d];@\n" sz;
2004 Fmt.pf ppf " int32_t wc = %s_write(&val, out, sizeof(out));@\n" s.name;
2005 Fmt.pf ppf " if (wc <= 0) { CAMLreturn(Val_none); }@\n";
2006 Fmt.pf ppf " v_str = caml_alloc_string(wc);@\n";
2007 Fmt.pf ppf " memcpy((char *)String_val(v_str), out, wc);@\n";
2008 Fmt.pf ppf " v_some = caml_alloc(1, 0);@\n";
2009 Fmt.pf ppf " Store_field(v_some, 0, v_str);@\n";
2010 Fmt.pf ppf " CAMLreturn(v_some);@\n";
2011 Fmt.pf ppf "}@\n@\n"
2012
2013(** Generate C FFI stubs that call EverParse-generated [Name_read] and
2014 [Name_write].
2015
2016 For each struct [Foo] with fields [a : t1, b : t2, ...], generates:
2017 - [caml_wire_Foo_read(v_buf)] returning [(t1 * t2 * ...) option]
2018 - [caml_wire_Foo_write(v_tuple)] taking [(t1 * t2 * ...)] and returning
2019 [string option].
2020
2021 The generated code expects EverParse headers to be available:
2022 - [Foo.h] with struct typedef and [Foo_read]/[Foo_write] functions *)
2023let to_c_stubs (structs : struct_ list) =
2024 let buf = Buffer.create 4096 in
2025 let ppf = Format.formatter_of_buffer buf in
2026 Fmt.pf ppf
2027 "/* wire_stubs.c - OCaml FFI stubs for EverParse-generated C */@\n@\n";
2028 Fmt.pf ppf "#include <caml/mlvalues.h>@\n";
2029 Fmt.pf ppf "#include <caml/memory.h>@\n";
2030 Fmt.pf ppf "#include <caml/alloc.h>@\n";
2031 Fmt.pf ppf "#include <string.h>@\n@\n";
2032 Fmt.pf ppf "/* Include EverParse-generated headers */@\n";
2033 List.iter
2034 (fun (s : struct_) -> Fmt.pf ppf "#include \"%s.h\"@\n" s.name)
2035 structs;
2036 Fmt.pf ppf "@\n";
2037 List.iter
2038 (fun (s : struct_) ->
2039 let fields = named_fields s in
2040 c_stub_read ppf s fields;
2041 c_stub_write ppf s fields)
2042 structs;
2043 Format.pp_print_flush ppf ();
2044 Buffer.contents buf
2045
2046(** Generate OCaml [external] declarations matching the C stubs from
2047 {!to_c_stubs}. For each struct [Foo] with fields [a : t1, b : t2, ...],
2048 generates a module:
2049 {[
2050 module Foo : sig
2051 val read : string -> (t1 * t2 * ...) option
2052 val write : (t1 * t2 * ...) -> string option
2053 end
2054 ]} *)
2055let to_ml_stubs (structs : struct_ list) =
2056 let buf = Buffer.create 256 in
2057 let ppf = Format.formatter_of_buffer buf in
2058 Fmt.pf ppf "(* Generated by wire (do not edit) *)@\n@\n";
2059 List.iter
2060 (fun (s : struct_) ->
2061 let fields = named_fields s in
2062 let tuple_type =
2063 String.concat " * "
2064 (List.map (fun (Named (_, typ)) -> ml_type_of typ) fields)
2065 in
2066 Fmt.pf ppf "module %s = struct@\n" s.name;
2067 Fmt.pf ppf " external read : string -> (%s) option@\n" tuple_type;
2068 Fmt.pf ppf " = \"caml_wire_%s_read\"@\n" s.name;
2069 Fmt.pf ppf " external write : (%s) -> string option@\n" tuple_type;
2070 Fmt.pf ppf " = \"caml_wire_%s_write\"@\n" s.name;
2071 Fmt.pf ppf "end@\n@\n")
2072 structs;
2073 Format.pp_print_flush ppf ();
2074 Buffer.contents buf
2075
2076(** Module name for a generated per-struct OCaml stub file. Converts CamelCase
2077 to snake_case, e.g., [SimpleHeader] maps to ["simple_header"]. *)
2078let to_ml_stub_name (s : struct_) =
2079 let name = s.name in
2080 let buf = Buffer.create (String.length name + 4) in
2081 String.iteri
2082 (fun i c ->
2083 if i > 0 && Char.uppercase_ascii c = c && Char.lowercase_ascii c <> c then
2084 Buffer.add_char buf '_';
2085 Buffer.add_char buf (Char.lowercase_ascii c))
2086 name;
2087 Buffer.contents buf
2088
2089(** Generate a flat OCaml stub module for a single struct. Produces a file with
2090 [type t] and [external read/write] declarations:
2091 {[
2092 (* Generated by wire *)
2093 type t = int * int * int32
2094
2095 external read : string -> t option = "caml_wire_Foo_read"
2096 external write : t -> string option = "caml_wire_Foo_write"
2097 ]} *)
2098let to_ml_stub (s : struct_) =
2099 let buf = Buffer.create 256 in
2100 let ppf = Format.formatter_of_buffer buf in
2101 let fields = named_fields s in
2102 let tuple_type =
2103 String.concat " * "
2104 (List.map (fun (Named (_, typ)) -> ml_type_of typ) fields)
2105 in
2106 Fmt.pf ppf "(* Generated by wire (do not edit) *)@\n@\n";
2107 Fmt.pf ppf "type t = %s@\n@\n" tuple_type;
2108 Fmt.pf ppf "external read : string -> t option@\n";
2109 Fmt.pf ppf " = \"caml_wire_%s_read\"@\n@\n" s.name;
2110 Fmt.pf ppf "external write : t -> string option@\n";
2111 Fmt.pf ppf " = \"caml_wire_%s_write\"@\n" s.name;
2112 Format.pp_print_flush ppf ();
2113 Buffer.contents buf
2114
2115(* ==================== Struct-level read/write ==================== *)
2116
2117type parsed_value = Parsed : 'a typ * 'a -> parsed_value
2118type parsed_struct = (string option * parsed_value) list
2119
2120let read_struct (s : struct_) buf =
2121 let reader = Br.of_string buf in
2122 let dec = decoder reader in
2123 let rec go ctx acc = function
2124 | [] -> Ok (List.rev acc)
2125 | Field { field_name; field_typ; constraint_; _ } :: rest -> (
2126 let* v, ctx' = parse_with_ctx ctx field_typ dec in
2127 let ctx' =
2128 match field_name with
2129 | Some n -> Ctx.add n (val_to_int field_typ v) ctx'
2130 | None -> ctx'
2131 in
2132 match constraint_ with
2133 | Some cond when not (eval_expr ctx' cond) ->
2134 Error (Constraint_failed "field constraint")
2135 | _ -> go ctx' ((field_name, Parsed (field_typ, v)) :: acc) rest)
2136 in
2137 go empty_ctx [] s.fields
2138
2139let write_struct (s : struct_) (ps : parsed_struct) =
2140 let buf = Buffer.create 64 in
2141 let writer = Bw.of_buffer buf in
2142 let enc = encoder writer in
2143 let fields_with_vals = List.combine s.fields ps in
2144 let rec go ctx = function
2145 | [] -> Ok (Buffer.contents buf)
2146 | (Field { field_name; constraint_; _ }, (_, Parsed (typ, v))) :: rest -> (
2147 let ctx' = encode_with_ctx ctx typ v enc in
2148 let ctx' =
2149 match field_name with
2150 | Some n -> Ctx.add n (val_to_int typ v) ctx'
2151 | None -> ctx'
2152 in
2153 match constraint_ with
2154 | Some cond when not (eval_expr ctx' cond) ->
2155 Error (Constraint_failed "field constraint")
2156 | _ -> go ctx' rest)
2157 in
2158 go empty_ctx fields_with_vals
2159
2160(* Capture top-level names before Codec shadows them *)
2161type struct_field = field
2162
2163let struct_field = field
2164let struct' = struct_
2165
2166module Codec = struct
2167 type ('a, 'r) field = { name : string; typ : 'a typ; get : 'r -> 'a }
2168
2169 (* GADT snoc-list of typed field readers, built in forward order by |+.
2170 ('full, 'remaining) readers tracks:
2171 - 'full: the original constructor type
2172 - 'remaining: what's left after consuming the readers in this list
2173
2174 Snoc appends at the end, so readers are in field order.
2175 At seal time, pattern-matching reconstructs the full application
2176 without partial application closures (for up to 6 fields). *)
2177 type (_, _) readers =
2178 | Nil : ('f, 'f) readers
2179 | Snoc :
2180 ('full, 'a -> 'rest) readers * (bytes -> int -> 'a)
2181 -> ('full, 'rest) readers
2182
2183 (* Bitfield group state: tracks the current base word being packed. *)
2184 type bf_codec_state = {
2185 bfc_base : bitfield_base;
2186 bfc_base_off : int; (* byte offset of base word within record *)
2187 bfc_bits_used : int; (* bits consumed so far in current group *)
2188 bfc_total_bits : int; (* 8, 16, or 32 *)
2189 }
2190
2191 type ('f, 'r) record =
2192 | Record : {
2193 r_name : string;
2194 r_make : 'full;
2195 r_readers : ('full, 'f) readers;
2196 r_writers_rev : ('r -> bytes -> int -> unit) list;
2197 r_wire_size : int;
2198 r_fields_rev : struct_field list;
2199 r_bf : bf_codec_state option;
2200 }
2201 -> ('f, 'r) record
2202
2203 type 'r t = {
2204 t_name : string;
2205 t_decode : bytes -> int -> 'r;
2206 t_encode : 'r -> bytes -> int -> unit;
2207 t_wire_size : int;
2208 t_struct_fields : struct_field list;
2209 }
2210
2211 let record name make =
2212 Record
2213 {
2214 r_name = name;
2215 r_make = make;
2216 r_readers = Nil;
2217 r_writers_rev = [];
2218 r_wire_size = 0;
2219 r_fields_rev = [];
2220 r_bf = None;
2221 }
2222
2223 let field name typ get = { name; typ; get }
2224
2225 (* Bitfield helpers *)
2226
2227 let bf_base_byte_size = function BF_U8 -> 1 | BF_U16 _ -> 2 | BF_U32 _ -> 4
2228
2229 let bf_base_total_bits = function
2230 | BF_U8 -> 8
2231 | BF_U16 _ -> 16
2232 | BF_U32 _ -> 32
2233
2234 let bf_base_equal a b =
2235 match (a, b) with
2236 | BF_U8, BF_U8 -> true
2237 | BF_U16 e1, BF_U16 e2 -> e1 = e2
2238 | BF_U32 e1, BF_U32 e2 -> e1 = e2
2239 | _ -> false
2240
2241 let bf_read_base base buf off =
2242 match base with
2243 | BF_U8 -> Bytes.get_uint8 buf off
2244 | BF_U16 Little -> Bytes.get_uint16_le buf off
2245 | BF_U16 Big -> Bytes.get_uint16_be buf off
2246 | BF_U32 Little -> UInt32.get_le buf off
2247 | BF_U32 Big -> UInt32.get_be buf off
2248
2249 let bf_write_base base buf off v =
2250 match base with
2251 | BF_U8 -> Bytes.set_uint8 buf off v
2252 | BF_U16 Little -> Bytes.set_uint16_le buf off v
2253 | BF_U16 Big -> Bytes.set_uint16_be buf off v
2254 | BF_U32 Little -> UInt32.set_le buf off v
2255 | BF_U32 Big -> UInt32.set_be buf off v
2256
2257 let build_bf_reader base byte_off shift width =
2258 let mask = (1 lsl width) - 1 in
2259 fun buf off -> (bf_read_base base buf (off + byte_off) lsr shift) land mask
2260
2261 let build_bf_writer base byte_off shift width =
2262 let mask = (1 lsl width) - 1 in
2263 fun buf off value ->
2264 let cur = bf_read_base base buf (off + byte_off) in
2265 bf_write_base base buf (off + byte_off)
2266 (cur lor ((value land mask) lsl shift))
2267
2268 let build_bf_clear base byte_off =
2269 fun buf off -> bf_write_base base buf (off + byte_off) 0
2270
2271 let ( |+ ) : type a f r. (a -> f, r) record -> (a, r) field -> (f, r) record =
2272 fun (Record r) { name; typ; get } ->
2273 (* Recursively unwrap Map layers to reach the wire-level type, composing
2274 encode/decode conversions along the way. *)
2275 let rec add : type w.
2276 w typ ->
2277 (r -> w) ->
2278 ((bytes -> int -> w) -> bytes -> int -> a) ->
2279 (f, r) record =
2280 fun typ get_wire wrap_reader ->
2281 match typ with
2282 | Map { inner; decode; encode } ->
2283 add inner
2284 (fun v -> encode (get_wire v))
2285 (fun reader -> wrap_reader (fun buf off -> decode (reader buf off)))
2286 | Bits { width; base } ->
2287 let total = bf_base_total_bits base in
2288 let need_new_group =
2289 match r.r_bf with
2290 | None -> true
2291 | Some bf ->
2292 (not (bf_base_equal bf.bfc_base base))
2293 || bf.bfc_bits_used + width > bf.bfc_total_bits
2294 in
2295 let base_off, bits_used, size_delta, extra_writers =
2296 if need_new_group then
2297 let base_off = r.r_wire_size in
2298 let clear = build_bf_clear base base_off in
2299 ( base_off,
2300 0,
2301 bf_base_byte_size base,
2302 [ (fun _v buf off -> clear buf off) ] )
2303 else
2304 let bf = Option.get r.r_bf in
2305 (bf.bfc_base_off, bf.bfc_bits_used, 0, [])
2306 in
2307 let shift = total - bits_used - width in
2308 let raw_reader = build_bf_reader base base_off shift width in
2309 let raw_writer = build_bf_writer base base_off shift width in
2310 let new_bf =
2311 {
2312 bfc_base = base;
2313 bfc_base_off = base_off;
2314 bfc_bits_used = bits_used + width;
2315 bfc_total_bits = total;
2316 }
2317 in
2318 Record
2319 {
2320 r_name = r.r_name;
2321 r_make = r.r_make;
2322 r_readers = Snoc (r.r_readers, wrap_reader raw_reader);
2323 r_writers_rev =
2324 (fun v buf off -> raw_writer buf off (get_wire v))
2325 :: (extra_writers @ r.r_writers_rev);
2326 r_wire_size = r.r_wire_size + size_delta;
2327 r_fields_rev = struct_field name typ :: r.r_fields_rev;
2328 r_bf = Some new_bf;
2329 }
2330 | _ ->
2331 let fsize =
2332 match field_wire_size typ with
2333 | Some s -> s
2334 | None -> failwith "Codec.(|+): variable-size fields not supported"
2335 in
2336 let field_off = r.r_wire_size in
2337 let raw_reader = build_field_reader typ field_off in
2338 let raw_encoder = build_field_encoder typ in
2339 Record
2340 {
2341 r_name = r.r_name;
2342 r_make = r.r_make;
2343 r_readers = Snoc (r.r_readers, wrap_reader raw_reader);
2344 r_writers_rev =
2345 (fun v buf off ->
2346 let _ = raw_encoder buf (off + field_off) (get_wire v) in
2347 ())
2348 :: r.r_writers_rev;
2349 r_wire_size = r.r_wire_size + fsize;
2350 r_fields_rev = struct_field name typ :: r.r_fields_rev;
2351 r_bf = None;
2352 }
2353 in
2354 add typ get (fun reader -> reader)
2355
2356 (* Chunked application: peels off up to 6 fields per recursion step.
2357 Cost: ceil(n/6) - 1 partial applications instead of n - 1. *)
2358 let rec apply_readers : type full current.
2359 full -> (full, current) readers -> bytes -> int -> current =
2360 fun make readers buf off ->
2361 match readers with
2362 | Nil -> make
2363 | Snoc (Nil, r1) -> make (r1 buf off)
2364 | Snoc (Snoc (Nil, r1), r2) -> make (r1 buf off) (r2 buf off)
2365 | Snoc (Snoc (Snoc (Nil, r1), r2), r3) ->
2366 make (r1 buf off) (r2 buf off) (r3 buf off)
2367 | Snoc (Snoc (Snoc (Snoc (Nil, r1), r2), r3), r4) ->
2368 make (r1 buf off) (r2 buf off) (r3 buf off) (r4 buf off)
2369 | Snoc (Snoc (Snoc (Snoc (Snoc (Nil, r1), r2), r3), r4), r5) ->
2370 make (r1 buf off) (r2 buf off) (r3 buf off) (r4 buf off) (r5 buf off)
2371 | Snoc (Snoc (Snoc (Snoc (Snoc (Snoc (rest, r1), r2), r3), r4), r5), r6) ->
2372 let f = apply_readers make rest buf off in
2373 f (r1 buf off) (r2 buf off) (r3 buf off) (r4 buf off) (r5 buf off)
2374 (r6 buf off)
2375
2376 let seal : type r. (r, r) record -> r t =
2377 fun (Record r) ->
2378 let total = r.r_wire_size in
2379 let writers = Array.of_list (List.rev r.r_writers_rev) in
2380 let n_writers = Array.length writers in
2381 let build_decode : type full. full -> (full, r) readers -> bytes -> int -> r
2382 =
2383 fun make readers ->
2384 match readers with
2385 | Nil -> fun _buf _off -> make
2386 | Snoc (Nil, r1) -> fun buf off -> make (r1 buf off)
2387 | Snoc (Snoc (Nil, r1), r2) ->
2388 fun buf off -> make (r1 buf off) (r2 buf off)
2389 | Snoc (Snoc (Snoc (Nil, r1), r2), r3) ->
2390 fun buf off -> make (r1 buf off) (r2 buf off) (r3 buf off)
2391 | Snoc (Snoc (Snoc (Snoc (Nil, r1), r2), r3), r4) ->
2392 fun buf off ->
2393 make (r1 buf off) (r2 buf off) (r3 buf off) (r4 buf off)
2394 | Snoc (Snoc (Snoc (Snoc (Snoc (Nil, r1), r2), r3), r4), r5) ->
2395 fun buf off ->
2396 make (r1 buf off) (r2 buf off) (r3 buf off) (r4 buf off)
2397 (r5 buf off)
2398 | Snoc (Snoc (Snoc (Snoc (Snoc (Snoc (Nil, r1), r2), r3), r4), r5), r6) ->
2399 fun buf off ->
2400 make (r1 buf off) (r2 buf off) (r3 buf off) (r4 buf off)
2401 (r5 buf off) (r6 buf off)
2402 | readers -> fun buf off -> apply_readers make readers buf off
2403 in
2404 let raw_decode = build_decode r.r_make r.r_readers in
2405 {
2406 t_name = r.r_name;
2407 t_decode =
2408 (fun buf off ->
2409 if off + total > Bytes.length buf then
2410 raise_eof ~expected:total ~got:(Bytes.length buf - off);
2411 raw_decode buf off);
2412 t_encode =
2413 (fun v buf off ->
2414 for i = 0 to n_writers - 1 do
2415 writers.(i) v buf off
2416 done);
2417 t_wire_size = total;
2418 t_struct_fields = List.rev r.r_fields_rev;
2419 }
2420
2421 let wire_size t = t.t_wire_size
2422 let decode t buf off = t.t_decode buf off
2423 let encode t v buf off = t.t_encode v buf off
2424 let to_struct t = struct' t.t_name t.t_struct_fields
2425end
2426
2427module Record = struct
2428 type ('a, 'r) field = ('a, 'r) field_codec
2429 type 'r t = 'r record_codec
2430
2431 let field = field_codec
2432 let ( @: ) name (typ, get, set) = field_codec name typ ~get ~set
2433
2434 let ( @:? ) name (constraint_, typ, get, set) =
2435 field_codec name ~constraint_ typ ~get ~set
2436
2437 let record name ~default fields =
2438 record_codec name ~default (List.map pack_field fields)
2439
2440 let encode codec = encode_record_to_slice codec
2441 let decode codec = decode_record_from_slice codec
2442 let encode_bytes codec = encode_record_to_bytes codec
2443 let decode_bytes codec = decode_record_from_bytes codec
2444 let to_struct = record_to_struct
2445end