OCaml wire format DSL with EverParse 3D output for verified parsers
1(* Generate .3d files from random wire schemas for EverParse.
2
3 All schemas are randomly generated with deterministic seeds. Fields of any
4 type may get constraints (~25% probability per field). *)
5
6(* ---- Field type metadata ---- *)
7
8type ft = {
9 make_field : string -> bool Wire.expr option -> Wire.field;
10 wire_size : int;
11 gen_constraint : Random.State.t -> int;
12 big_endian : bool;
13}
14
15let gen_uint8 rng = Random.State.int rng 256
16let gen_uint16 rng = Random.State.int rng 65536
17
18let gen_uint32 rng =
19 Int32.unsigned_to_int (Random.State.bits32 rng) |> Option.get
20
21let gen_uint64 rng =
22 Int64.to_int (Int64.logand (Random.State.bits64 rng) 0x3FFF_FFFF_FFFF_FFFFL)
23
24let field_types =
25 [|
26 {
27 make_field = (fun n c -> Wire.field n ?constraint_:c Wire.uint8);
28 wire_size = 1;
29 gen_constraint = gen_uint8;
30 big_endian = false;
31 };
32 {
33 make_field = (fun n c -> Wire.field n ?constraint_:c Wire.uint16);
34 wire_size = 2;
35 gen_constraint = gen_uint16;
36 big_endian = false;
37 };
38 {
39 make_field = (fun n c -> Wire.field n ?constraint_:c Wire.uint16be);
40 wire_size = 2;
41 gen_constraint = gen_uint16;
42 big_endian = true;
43 };
44 {
45 make_field = (fun n c -> Wire.field n ?constraint_:c Wire.uint32);
46 wire_size = 4;
47 gen_constraint = gen_uint32;
48 big_endian = false;
49 };
50 {
51 make_field = (fun n c -> Wire.field n ?constraint_:c Wire.uint32be);
52 wire_size = 4;
53 gen_constraint = gen_uint32;
54 big_endian = true;
55 };
56 {
57 make_field = (fun n c -> Wire.field n ?constraint_:c Wire.uint64);
58 wire_size = 8;
59 gen_constraint = gen_uint64;
60 big_endian = false;
61 };
62 {
63 make_field = (fun n c -> Wire.field n ?constraint_:c Wire.uint64be);
64 wire_size = 8;
65 gen_constraint = gen_uint64;
66 big_endian = true;
67 };
68 |]
69
70(* ---- Random schema generation ---- *)
71
72type random_field = {
73 name : string;
74 ft : ft;
75 constraint_val : int option;
76 big_endian : bool;
77}
78
79type random_schema = {
80 struct_ : Wire.struct_;
81 fields : random_field list;
82 total_wire_size : int;
83}
84
85let random_struct seed =
86 let rng = Random.State.make [| seed |] in
87 let n = 1 + Random.State.int rng 6 in
88 let fields =
89 List.init n (fun i ->
90 let ft =
91 field_types.(Random.State.int rng (Array.length field_types))
92 in
93 let name = Fmt.str "f%d" i in
94 let constraint_val =
95 if Random.State.int rng 4 = 0 then Some (ft.gen_constraint rng)
96 else None
97 in
98 { name; ft; constraint_val; big_endian = ft.big_endian })
99 in
100 let struct_name = Fmt.str "Random%d" seed in
101 let wire_fields =
102 List.map
103 (fun rf ->
104 let constraint_ =
105 Option.map
106 (fun k -> Wire.Expr.(Wire.ref rf.name <= Wire.int k))
107 rf.constraint_val
108 in
109 rf.ft.make_field rf.name constraint_)
110 fields
111 in
112 let total_wire_size =
113 List.fold_left (fun acc rf -> acc + rf.ft.wire_size) 0 fields
114 in
115 { struct_ = Wire.struct_ struct_name wire_fields; fields; total_wire_size }
116
117(* ---- Code generation for differential testing ---- *)
118
119let generate_c_stubs ~schema_dir outdir schemas =
120 let oc = open_out (Filename.concat outdir "stubs.c") in
121 let pr fmt = Printf.fprintf oc fmt in
122 pr "#include <caml/mlvalues.h>\n";
123 pr "#include <caml/memory.h>\n";
124 pr "#include <caml/alloc.h>\n";
125 pr "#include <stdint.h>\n\n";
126 (* Include all wrapper headers - they declare the check functions *)
127 List.iter
128 (fun rs ->
129 let name = Wire.struct_name rs.struct_ in
130 pr "#include \"%s/%sWrapper.h\"\n" schema_dir name)
131 schemas;
132 pr "\n";
133 (* Include wrapper implementations with unique error handlers *)
134 List.iteri
135 (fun i rs ->
136 let name = Wire.struct_name rs.struct_ in
137 (* Include EverParse.h and parser *)
138 if i = 0 then pr "#include \"%s/EverParse.h\"\n" schema_dir;
139 pr "#include \"%s/%s.h\"\n" schema_dir name;
140 pr "#include \"%s/%s.c\"\n" schema_dir name;
141 (* Inline wrapper with renamed error handler *)
142 pr
143 "void %sEverParseError(const char *s, const char *f, const char *r) { \
144 (void)s; (void)f; (void)r; }\n"
145 name;
146 pr "static void %s_ErrorHandler(\n" name;
147 pr " const char *t, const char *f, const char *r,\n";
148 pr " uint64_t c, uint8_t *ctx, EVERPARSE_INPUT_BUFFER i, uint64_t p) {\n";
149 pr " (void)t; (void)f; (void)r; (void)c; (void)ctx; (void)i; (void)p;\n";
150 pr "}\n";
151 pr "BOOLEAN %sCheck%s(uint8_t *base, uint32_t len) {\n" name name;
152 pr
153 " uint64_t result = %sValidate%s(NULL, %s_ErrorHandler, base, len, 0);\n"
154 name name name;
155 pr " return EverParseIsSuccess(result);\n";
156 pr "}\n\n")
157 schemas;
158 (* Generate OCaml stubs *)
159 List.iter
160 (fun rs ->
161 let name = Wire.struct_name rs.struct_ in
162 pr "CAMLprim value caml_%s_check(value v_bytes) {\n"
163 (String.lowercase_ascii name);
164 pr " CAMLparam1(v_bytes);\n";
165 pr " uint8_t *data = (uint8_t *)Bytes_val(v_bytes);\n";
166 pr " uint32_t len = caml_string_length(v_bytes);\n";
167 pr " BOOLEAN result = %sCheck%s(data, len);\n" name name;
168 pr " CAMLreturn(Val_bool(result));\n";
169 pr "}\n\n")
170 schemas;
171 close_out oc
172
173let generate_ml_stubs outdir schemas =
174 let oc = open_out (Filename.concat outdir "stubs.ml") in
175 let pr fmt = Printf.fprintf oc fmt in
176 List.iter
177 (fun rs ->
178 let name = Wire.struct_name rs.struct_ in
179 let lower = String.lowercase_ascii name in
180 pr "external %s_check : bytes -> bool = \"caml_%s_check\"\n" lower lower)
181 schemas;
182 close_out oc
183
184(** Emit OCaml code that reads one constrained field and checks its value. *)
185let emit_field_constraint pr rf offset k =
186 let endian = if rf.big_endian then "be" else "le" in
187 match rf.ft.wire_size with
188 | 1 ->
189 pr " let %s = Bytes.get_uint8 buf %d in\n" rf.name offset;
190 pr " if %s > %d then false else\n" rf.name k
191 | 2 ->
192 pr " let %s = Bytes.get_uint16_%s buf %d in\n" rf.name endian offset;
193 pr " if %s > %d then false else\n" rf.name k
194 | 4 ->
195 pr " let %s = Bytes.get_int32_%s buf %d in\n" rf.name endian offset;
196 pr " if Int32.unsigned_compare %s (%ldl) > 0 then false else\n" rf.name
197 (Int32.of_int k)
198 | 8 ->
199 pr " let %s = Bytes.get_int64_%s buf %d in\n" rf.name endian offset;
200 pr " if Int64.unsigned_compare %s (%LdL) > 0 then false else\n" rf.name
201 (Int64.of_int k)
202 | _ ->
203 pr " let %s = Bytes.get_uint8 buf %d in\n" rf.name offset;
204 pr " if %s > %d then false else\n" rf.name k
205
206(** Emit constraint checks for all fields in a schema, with computed offsets. *)
207let emit_schema_constraints pr fields =
208 let rec add_offsets offset = function
209 | [] -> []
210 | rf :: rest -> (rf, offset) :: add_offsets (offset + rf.ft.wire_size) rest
211 in
212 let fields_with_offsets = add_offsets 0 fields in
213 List.iter
214 (fun (rf, offset) ->
215 match rf.constraint_val with
216 | Some k -> emit_field_constraint pr rf offset k
217 | None -> ())
218 fields_with_offsets
219
220(** Emit the wire_check function for one schema. *)
221let emit_wire_check pr rs =
222 let name = Wire.struct_name rs.struct_ in
223 let lower = String.lowercase_ascii name in
224 pr "(* %s: wire_size=%d *)\n" name rs.total_wire_size;
225 pr "let %s_wire_check (buf : bytes) : bool =\n" lower;
226 pr " if Bytes.length buf < %d then false else\n" rs.total_wire_size;
227 let has_constraints =
228 List.exists (fun rf -> rf.constraint_val <> None) rs.fields
229 in
230 if has_constraints then begin
231 emit_schema_constraints pr rs.fields;
232 pr " true\n\n"
233 end
234 else pr " true\n\n"
235
236let generate_test_runner outdir schemas =
237 let oc = open_out (Filename.concat outdir "diff_test.ml") in
238 let pr fmt = Printf.fprintf oc fmt in
239 pr "(* Auto-generated differential test runner *)\n\n";
240 pr "let num_values = 100\n\n";
241 pr "type schema = {\n";
242 pr " name : string;\n";
243 pr " wire_size : int;\n";
244 pr " wire_check : bytes -> bool;\n";
245 pr " c_check : bytes -> bool;\n";
246 pr "}\n\n";
247 List.iter (emit_wire_check pr) schemas;
248 (* Generate schema list *)
249 pr "let schemas = [\n";
250 List.iter
251 (fun rs ->
252 let name = Wire.struct_name rs.struct_ in
253 let lower = String.lowercase_ascii name in
254 pr
255 " { name = %S; wire_size = %d; wire_check = %s_wire_check; c_check = \
256 Stubs.%s_check };\n"
257 name rs.total_wire_size lower lower)
258 schemas;
259 pr "]\n\n";
260 (* Test runner *)
261 pr "let () =\n";
262 pr " let seed = 42 in\n";
263 pr " let rng = Random.State.make [| seed |] in\n";
264 pr " let total_tests = ref 0 in\n";
265 pr " let mismatches = ref 0 in\n";
266 pr " List.iter (fun schema ->\n";
267 pr " for _ = 1 to num_values do\n";
268 pr " let buf = Bytes.create schema.wire_size in\n";
269 pr " for i = 0 to schema.wire_size - 1 do\n";
270 pr " Bytes.set buf i (Char.chr (Random.State.int rng 256))\n";
271 pr " done;\n";
272 pr " let wire_ok = schema.wire_check buf in\n";
273 pr " let c_ok = schema.c_check buf in\n";
274 pr " incr total_tests;\n";
275 pr " if wire_ok <> c_ok then begin\n";
276 pr " incr mismatches;\n";
277 pr
278 " Printf.printf \"MISMATCH %%s: wire=%%b c=%%b\\n\" schema.name \
279 wire_ok c_ok\n";
280 pr " end\n";
281 pr " done\n";
282 pr " ) schemas;\n";
283 pr
284 " Printf.printf \"Tested %%d values across %%d schemas, %%d mismatches\\n\"\n";
285 pr " !total_tests (List.length schemas) !mismatches;\n";
286 pr " if !mismatches > 0 then exit 1\n";
287 close_out oc
288
289let run_everparse schema_dir =
290 let cmd =
291 Fmt.str
292 "cd %s && for f in *.3d; do ~/.local/everparse/bin/3d.exe --batch \"$f\" \
293 || exit 1; done"
294 schema_dir
295 in
296 let ret = Sys.command cmd in
297 if ret <> 0 then Fmt.failwith "EverParse failed with code %d" ret
298
299(* ---- Main ---- *)
300
301let () =
302 let schema_dir =
303 if Array.length Sys.argv > 1 then Sys.argv.(1) else "schemas"
304 in
305 let num_random =
306 if Array.length Sys.argv > 2 then int_of_string Sys.argv.(2) else 20
307 in
308 (try Unix.mkdir schema_dir 0o755
309 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
310 let schemas = List.init num_random (fun i -> random_struct i) in
311 (* Generate .3d files for EverParse into schema_dir *)
312 List.iter
313 (fun rs ->
314 let name = Wire.struct_name rs.struct_ in
315 let m = Wire.module_ name [ Wire.typedef ~entrypoint:true rs.struct_ ] in
316 Wire.to_3d_file (Filename.concat schema_dir (name ^ ".3d")) m)
317 schemas;
318 (* Run EverParse to generate C parsers *)
319 run_everparse schema_dir;
320 (* Generate FFI stubs and test runner in current dir *)
321 generate_c_stubs ~schema_dir "." schemas;
322 generate_ml_stubs "." schemas;
323 generate_test_runner "." schemas