OCaml wire format DSL with EverParse 3D output for verified parsers
at main 323 lines 11 kB view raw
1(* Generate .3d files from random wire schemas for EverParse. 2 3 All schemas are randomly generated with deterministic seeds. Fields of any 4 type may get constraints (~25% probability per field). *) 5 6(* ---- Field type metadata ---- *) 7 8type ft = { 9 make_field : string -> bool Wire.expr option -> Wire.field; 10 wire_size : int; 11 gen_constraint : Random.State.t -> int; 12 big_endian : bool; 13} 14 15let gen_uint8 rng = Random.State.int rng 256 16let gen_uint16 rng = Random.State.int rng 65536 17 18let gen_uint32 rng = 19 Int32.unsigned_to_int (Random.State.bits32 rng) |> Option.get 20 21let gen_uint64 rng = 22 Int64.to_int (Int64.logand (Random.State.bits64 rng) 0x3FFF_FFFF_FFFF_FFFFL) 23 24let field_types = 25 [| 26 { 27 make_field = (fun n c -> Wire.field n ?constraint_:c Wire.uint8); 28 wire_size = 1; 29 gen_constraint = gen_uint8; 30 big_endian = false; 31 }; 32 { 33 make_field = (fun n c -> Wire.field n ?constraint_:c Wire.uint16); 34 wire_size = 2; 35 gen_constraint = gen_uint16; 36 big_endian = false; 37 }; 38 { 39 make_field = (fun n c -> Wire.field n ?constraint_:c Wire.uint16be); 40 wire_size = 2; 41 gen_constraint = gen_uint16; 42 big_endian = true; 43 }; 44 { 45 make_field = (fun n c -> Wire.field n ?constraint_:c Wire.uint32); 46 wire_size = 4; 47 gen_constraint = gen_uint32; 48 big_endian = false; 49 }; 50 { 51 make_field = (fun n c -> Wire.field n ?constraint_:c Wire.uint32be); 52 wire_size = 4; 53 gen_constraint = gen_uint32; 54 big_endian = true; 55 }; 56 { 57 make_field = (fun n c -> Wire.field n ?constraint_:c Wire.uint64); 58 wire_size = 8; 59 gen_constraint = gen_uint64; 60 big_endian = false; 61 }; 62 { 63 make_field = (fun n c -> Wire.field n ?constraint_:c Wire.uint64be); 64 wire_size = 8; 65 gen_constraint = gen_uint64; 66 big_endian = true; 67 }; 68 |] 69 70(* ---- Random schema generation ---- *) 71 72type random_field = { 73 name : string; 74 ft : ft; 75 constraint_val : int option; 76 big_endian : bool; 77} 78 79type random_schema = { 80 struct_ : Wire.struct_; 81 fields : random_field list; 82 total_wire_size : int; 83} 84 85let random_struct seed = 86 let rng = Random.State.make [| seed |] in 87 let n = 1 + Random.State.int rng 6 in 88 let fields = 89 List.init n (fun i -> 90 let ft = 91 field_types.(Random.State.int rng (Array.length field_types)) 92 in 93 let name = Fmt.str "f%d" i in 94 let constraint_val = 95 if Random.State.int rng 4 = 0 then Some (ft.gen_constraint rng) 96 else None 97 in 98 { name; ft; constraint_val; big_endian = ft.big_endian }) 99 in 100 let struct_name = Fmt.str "Random%d" seed in 101 let wire_fields = 102 List.map 103 (fun rf -> 104 let constraint_ = 105 Option.map 106 (fun k -> Wire.Expr.(Wire.ref rf.name <= Wire.int k)) 107 rf.constraint_val 108 in 109 rf.ft.make_field rf.name constraint_) 110 fields 111 in 112 let total_wire_size = 113 List.fold_left (fun acc rf -> acc + rf.ft.wire_size) 0 fields 114 in 115 { struct_ = Wire.struct_ struct_name wire_fields; fields; total_wire_size } 116 117(* ---- Code generation for differential testing ---- *) 118 119let generate_c_stubs ~schema_dir outdir schemas = 120 let oc = open_out (Filename.concat outdir "stubs.c") in 121 let pr fmt = Printf.fprintf oc fmt in 122 pr "#include <caml/mlvalues.h>\n"; 123 pr "#include <caml/memory.h>\n"; 124 pr "#include <caml/alloc.h>\n"; 125 pr "#include <stdint.h>\n\n"; 126 (* Include all wrapper headers - they declare the check functions *) 127 List.iter 128 (fun rs -> 129 let name = Wire.struct_name rs.struct_ in 130 pr "#include \"%s/%sWrapper.h\"\n" schema_dir name) 131 schemas; 132 pr "\n"; 133 (* Include wrapper implementations with unique error handlers *) 134 List.iteri 135 (fun i rs -> 136 let name = Wire.struct_name rs.struct_ in 137 (* Include EverParse.h and parser *) 138 if i = 0 then pr "#include \"%s/EverParse.h\"\n" schema_dir; 139 pr "#include \"%s/%s.h\"\n" schema_dir name; 140 pr "#include \"%s/%s.c\"\n" schema_dir name; 141 (* Inline wrapper with renamed error handler *) 142 pr 143 "void %sEverParseError(const char *s, const char *f, const char *r) { \ 144 (void)s; (void)f; (void)r; }\n" 145 name; 146 pr "static void %s_ErrorHandler(\n" name; 147 pr " const char *t, const char *f, const char *r,\n"; 148 pr " uint64_t c, uint8_t *ctx, EVERPARSE_INPUT_BUFFER i, uint64_t p) {\n"; 149 pr " (void)t; (void)f; (void)r; (void)c; (void)ctx; (void)i; (void)p;\n"; 150 pr "}\n"; 151 pr "BOOLEAN %sCheck%s(uint8_t *base, uint32_t len) {\n" name name; 152 pr 153 " uint64_t result = %sValidate%s(NULL, %s_ErrorHandler, base, len, 0);\n" 154 name name name; 155 pr " return EverParseIsSuccess(result);\n"; 156 pr "}\n\n") 157 schemas; 158 (* Generate OCaml stubs *) 159 List.iter 160 (fun rs -> 161 let name = Wire.struct_name rs.struct_ in 162 pr "CAMLprim value caml_%s_check(value v_bytes) {\n" 163 (String.lowercase_ascii name); 164 pr " CAMLparam1(v_bytes);\n"; 165 pr " uint8_t *data = (uint8_t *)Bytes_val(v_bytes);\n"; 166 pr " uint32_t len = caml_string_length(v_bytes);\n"; 167 pr " BOOLEAN result = %sCheck%s(data, len);\n" name name; 168 pr " CAMLreturn(Val_bool(result));\n"; 169 pr "}\n\n") 170 schemas; 171 close_out oc 172 173let generate_ml_stubs outdir schemas = 174 let oc = open_out (Filename.concat outdir "stubs.ml") in 175 let pr fmt = Printf.fprintf oc fmt in 176 List.iter 177 (fun rs -> 178 let name = Wire.struct_name rs.struct_ in 179 let lower = String.lowercase_ascii name in 180 pr "external %s_check : bytes -> bool = \"caml_%s_check\"\n" lower lower) 181 schemas; 182 close_out oc 183 184(** Emit OCaml code that reads one constrained field and checks its value. *) 185let emit_field_constraint pr rf offset k = 186 let endian = if rf.big_endian then "be" else "le" in 187 match rf.ft.wire_size with 188 | 1 -> 189 pr " let %s = Bytes.get_uint8 buf %d in\n" rf.name offset; 190 pr " if %s > %d then false else\n" rf.name k 191 | 2 -> 192 pr " let %s = Bytes.get_uint16_%s buf %d in\n" rf.name endian offset; 193 pr " if %s > %d then false else\n" rf.name k 194 | 4 -> 195 pr " let %s = Bytes.get_int32_%s buf %d in\n" rf.name endian offset; 196 pr " if Int32.unsigned_compare %s (%ldl) > 0 then false else\n" rf.name 197 (Int32.of_int k) 198 | 8 -> 199 pr " let %s = Bytes.get_int64_%s buf %d in\n" rf.name endian offset; 200 pr " if Int64.unsigned_compare %s (%LdL) > 0 then false else\n" rf.name 201 (Int64.of_int k) 202 | _ -> 203 pr " let %s = Bytes.get_uint8 buf %d in\n" rf.name offset; 204 pr " if %s > %d then false else\n" rf.name k 205 206(** Emit constraint checks for all fields in a schema, with computed offsets. *) 207let emit_schema_constraints pr fields = 208 let rec add_offsets offset = function 209 | [] -> [] 210 | rf :: rest -> (rf, offset) :: add_offsets (offset + rf.ft.wire_size) rest 211 in 212 let fields_with_offsets = add_offsets 0 fields in 213 List.iter 214 (fun (rf, offset) -> 215 match rf.constraint_val with 216 | Some k -> emit_field_constraint pr rf offset k 217 | None -> ()) 218 fields_with_offsets 219 220(** Emit the wire_check function for one schema. *) 221let emit_wire_check pr rs = 222 let name = Wire.struct_name rs.struct_ in 223 let lower = String.lowercase_ascii name in 224 pr "(* %s: wire_size=%d *)\n" name rs.total_wire_size; 225 pr "let %s_wire_check (buf : bytes) : bool =\n" lower; 226 pr " if Bytes.length buf < %d then false else\n" rs.total_wire_size; 227 let has_constraints = 228 List.exists (fun rf -> rf.constraint_val <> None) rs.fields 229 in 230 if has_constraints then begin 231 emit_schema_constraints pr rs.fields; 232 pr " true\n\n" 233 end 234 else pr " true\n\n" 235 236let generate_test_runner outdir schemas = 237 let oc = open_out (Filename.concat outdir "diff_test.ml") in 238 let pr fmt = Printf.fprintf oc fmt in 239 pr "(* Auto-generated differential test runner *)\n\n"; 240 pr "let num_values = 100\n\n"; 241 pr "type schema = {\n"; 242 pr " name : string;\n"; 243 pr " wire_size : int;\n"; 244 pr " wire_check : bytes -> bool;\n"; 245 pr " c_check : bytes -> bool;\n"; 246 pr "}\n\n"; 247 List.iter (emit_wire_check pr) schemas; 248 (* Generate schema list *) 249 pr "let schemas = [\n"; 250 List.iter 251 (fun rs -> 252 let name = Wire.struct_name rs.struct_ in 253 let lower = String.lowercase_ascii name in 254 pr 255 " { name = %S; wire_size = %d; wire_check = %s_wire_check; c_check = \ 256 Stubs.%s_check };\n" 257 name rs.total_wire_size lower lower) 258 schemas; 259 pr "]\n\n"; 260 (* Test runner *) 261 pr "let () =\n"; 262 pr " let seed = 42 in\n"; 263 pr " let rng = Random.State.make [| seed |] in\n"; 264 pr " let total_tests = ref 0 in\n"; 265 pr " let mismatches = ref 0 in\n"; 266 pr " List.iter (fun schema ->\n"; 267 pr " for _ = 1 to num_values do\n"; 268 pr " let buf = Bytes.create schema.wire_size in\n"; 269 pr " for i = 0 to schema.wire_size - 1 do\n"; 270 pr " Bytes.set buf i (Char.chr (Random.State.int rng 256))\n"; 271 pr " done;\n"; 272 pr " let wire_ok = schema.wire_check buf in\n"; 273 pr " let c_ok = schema.c_check buf in\n"; 274 pr " incr total_tests;\n"; 275 pr " if wire_ok <> c_ok then begin\n"; 276 pr " incr mismatches;\n"; 277 pr 278 " Printf.printf \"MISMATCH %%s: wire=%%b c=%%b\\n\" schema.name \ 279 wire_ok c_ok\n"; 280 pr " end\n"; 281 pr " done\n"; 282 pr " ) schemas;\n"; 283 pr 284 " Printf.printf \"Tested %%d values across %%d schemas, %%d mismatches\\n\"\n"; 285 pr " !total_tests (List.length schemas) !mismatches;\n"; 286 pr " if !mismatches > 0 then exit 1\n"; 287 close_out oc 288 289let run_everparse schema_dir = 290 let cmd = 291 Fmt.str 292 "cd %s && for f in *.3d; do ~/.local/everparse/bin/3d.exe --batch \"$f\" \ 293 || exit 1; done" 294 schema_dir 295 in 296 let ret = Sys.command cmd in 297 if ret <> 0 then Fmt.failwith "EverParse failed with code %d" ret 298 299(* ---- Main ---- *) 300 301let () = 302 let schema_dir = 303 if Array.length Sys.argv > 1 then Sys.argv.(1) else "schemas" 304 in 305 let num_random = 306 if Array.length Sys.argv > 2 then int_of_string Sys.argv.(2) else 20 307 in 308 (try Unix.mkdir schema_dir 0o755 309 with Unix.Unix_error (Unix.EEXIST, _, _) -> ()); 310 let schemas = List.init num_random (fun i -> random_struct i) in 311 (* Generate .3d files for EverParse into schema_dir *) 312 List.iter 313 (fun rs -> 314 let name = Wire.struct_name rs.struct_ in 315 let m = Wire.module_ name [ Wire.typedef ~entrypoint:true rs.struct_ ] in 316 Wire.to_3d_file (Filename.concat schema_dir (name ^ ".3d")) m) 317 schemas; 318 (* Run EverParse to generate C parsers *) 319 run_everparse schema_dir; 320 (* Generate FFI stubs and test runner in current dir *) 321 generate_c_stubs ~schema_dir "." schemas; 322 generate_ml_stubs "." schemas; 323 generate_test_runner "." schemas