OCaml Fuzzing with Crowbar#
This guide covers setting up property-based fuzz testing for OCaml libraries using Crowbar. Crowbar supports two modes:
- QuickCheck mode - Fast, deterministic testing without AFL (good for CI)
- AFL mode - Coverage-guided fuzzing with American Fuzzy Lop
Quick Start#
1. Add Dependencies#
Add crowbar to your opam dependencies:
opam install crowbar
2. Create Fuzz Test Directory#
Create a fuzz/ directory at the root of your project:
project/
├── lib/
├── fuzz/
│ ├── dune
│ └── fuzz_mylib.ml
└── dune-project
3. Add Dune Configuration#
Create fuzz/dune:
(executable
(name fuzz_mylib)
(libraries mylib crowbar))
For multiple fuzz targets:
(executable
(name fuzz_parser)
(libraries mylib crowbar))
(executable
(name fuzz_encoder)
(libraries mylib crowbar))
4. Write Fuzz Tests#
See Test Structure below for patterns and examples.
5. Run Tests#
QuickCheck mode (no AFL required):
dune exec ./fuzz/fuzz_mylib.exe
AFL mode (requires AFL instrumentation):
# Build with AFL instrumentation
dune build --context afl
# Run AFL fuzzer
mkdir -p input_corpus output
echo "seed" > input_corpus/seed
afl-fuzz -i input_corpus -o output -- _build/afl/fuzz/fuzz_mylib.exe @@
Test Structure#
Basic Test Pattern#
open Crowbar
(* Property: operation shouldn't crash on arbitrary input *)
let () =
add_test ~name:"mylib_no_crash" [ bytes ] @@ fun input ->
match MyLib.parse input with
| Ok _ | Error _ -> check true
(* Property: roundtrip encode/decode *)
let () =
add_test ~name:"mylib_roundtrip" [ int ] @@ fun n ->
let encoded = MyLib.encode n in
match MyLib.decode encoded with
| Error e -> failwith ("roundtrip failed: " ^ e)
| Ok decoded -> check (decoded = n)
Crowbar Generators#
Crowbar provides generators for creating test inputs:
open Crowbar
(* Primitive generators *)
let _ = bool (* bool *)
let _ = int (* int (small range in quickcheck mode) *)
let _ = int8 (* 0-255 *)
let _ = int32 (* int32 *)
let _ = int64 (* int64 *)
let _ = float (* float *)
let _ = bytes (* arbitrary byte string *)
let _ = bytes_fixed 10 (* exactly 10 bytes *)
(* Range generator *)
let _ = range 100 (* 0 to 99 *)
(* Choice from constants *)
let _ = choose [
const "hello";
const "world";
const "";
]
(* Mapping over generators *)
let positive_int = map [int] (fun n -> abs n)
(* Optional values *)
let maybe_int = option int (* int option *)
(* Lists *)
let int_list = list int (* int list *)
(* Pairs and tuples *)
let pair = pair int bytes (* int * bytes *)
Common Test Patterns#
1. Crash Resistance (Robustness)#
Test that arbitrary input doesn't cause crashes:
let () =
add_test ~name:"parser_no_crash" [ bytes ] @@ fun input ->
(* Should never raise an exception *)
let _ = try Parser.parse input with _ -> [] in
check true
2. Roundtrip Property#
Test encode/decode symmetry:
let () =
add_test ~name:"codec_roundtrip" [ int ] @@ fun value ->
match encode value with
| Error e -> failwith ("encode failed: " ^ e)
| Ok encoded ->
match decode encoded with
| Error e -> failwith ("decode failed: " ^ e)
| Ok decoded -> check (decoded = value)
3. Idempotence#
Test that applying operation twice equals applying once:
let () =
add_test ~name:"normalize_idempotent" [ bytes ] @@ fun input ->
let once = normalize input in
let twice = normalize once in
check (once = twice)
4. Known Edge Cases#
Test specific problematic inputs:
let () =
add_test ~name:"empty_input" [ const () ] @@ fun () ->
match parse "" with
| Ok _ | Error _ -> check true
let () =
add_test ~name:"boundary_values"
[ choose [
const 0;
const (-1);
const max_int;
const min_int;
] ]
@@ fun n ->
match process n with
| Ok _ | Error _ -> check true
5. Type Mismatch Handling#
Test graceful handling of wrong types:
let () =
add_test ~name:"type_mismatch" [ bytes ] @@ fun input ->
(* Parsing arbitrary bytes as int should fail gracefully *)
match decode_int input with
| Ok _ -> check true (* unexpected success is fine *)
| Error _ -> check true (* expected failure *)
6. Comparison with Reference Implementation#
let () =
add_test ~name:"matches_reference" [ int; int ] @@ fun a b ->
let result = MyLib.add a b in
let expected = a + b in
check (result = expected)
Handling Expected Failures#
When some inputs are expected to fail:
(* Large integers may exceed safe precision *)
let max_safe_int = 9007199254740991
let () =
add_test ~name:"large_int_handling" [ range max_int ] @@ fun n ->
match encode n with
| Error _ ->
(* Large ints beyond safe precision correctly rejected *)
check (n > max_safe_int)
| Ok encoded ->
match decode encoded with
| Error _ -> check (n > max_safe_int)
| Ok decoded -> check (decoded = n)
Float Comparison#
Use epsilon comparison for floating-point roundtrips:
let () =
add_test ~name:"float_roundtrip"
[ choose [ const 0.0; const 3.14; const (-2.5) ] ]
@@ fun f ->
match encode f |> Result.bind decode with
| Error e -> failwith e
| Ok decoded ->
(* Use epsilon comparison for floats *)
check (Float.abs (decoded -. f) < 0.0001)
AFL Fuzzing Setup#
Prerequisites#
- OCaml compiler with AFL support
- AFL or AFL++ installed
- Dune 3.0+
Dune Workspace Configuration#
Create or update dune-workspace at project root:
(lang dune 3.20)
(context default)
(context
(default
(name afl)
(profile afl)))
(env
(afl
(ocamlopt_flags (:standard -afl-instrument))))
This creates an afl build context that instruments binaries for coverage-guided fuzzing.
Building for AFL#
dune build --context afl
Instrumented binaries are output to _build/afl/.
Running AFL#
# Create input corpus with seed files
mkdir -p input_corpus
echo '{"key": "value"}' > input_corpus/json1
echo 'key: value' > input_corpus/yaml1
# Run AFL fuzzer
afl-fuzz -i input_corpus -o findings -- _build/afl/fuzz/fuzz_mylib.exe @@
Common AFL Issues#
Core Pattern Error#
AFL: Oops, your system is configured to send core dump notifications to an external utility...
Fix:
sudo su -c 'echo core > /proc/sys/kernel/core_pattern'
CPU Scaling Error#
AFL: The CPU frequency scaling governor is set to "powersave"...
Fix:
sudo cpufreq-set -g performance
# Or use AFL_SKIP_CPUFREQ=1 to ignore (not recommended)
Complete Example#
Here's a complete fuzz test file for a YAML library:
(** Property-based tests for MyYaml using Crowbar *)
open Crowbar
(* Helper functions *)
let decode codec s =
let reader = Bytesrw.Bytes.Reader.of_string s in
MyYaml.decode codec reader
let encode codec v =
let buf = Buffer.create 256 in
let writer = Bytesrw.Bytes.Writer.of_buffer buf in
match MyYaml.encode codec v writer with
| Ok () -> Ok (Buffer.contents buf)
| Error e -> Error e
(* Test: Arbitrary input shouldn't crash *)
let () =
add_test ~name:"yaml_no_crash" [ bytes ] @@ fun input ->
match decode Codec.string input with
| Ok _ | Error _ -> check true
(* Test: String roundtrip *)
let () =
add_test ~name:"yaml_string_roundtrip"
[ choose [ const "hello"; const ""; const "with\nnewline" ] ]
@@ fun s ->
match encode Codec.string s with
| Error e -> failwith ("encode: " ^ e)
| Ok yaml ->
match decode Codec.string yaml with
| Error e -> failwith ("decode: " ^ e)
| Ok decoded -> check (decoded = s)
(* Test: Int roundtrip *)
let () =
add_test ~name:"yaml_int_roundtrip" [ range 1000000 ] @@ fun n ->
match encode Codec.int n with
| Error e -> failwith ("encode: " ^ e)
| Ok yaml ->
match decode Codec.int yaml with
| Error e -> failwith ("decode: " ^ e)
| Ok decoded -> check (decoded = n)
(* Test: Bool roundtrip *)
let () =
add_test ~name:"yaml_bool_roundtrip" [ bool ] @@ fun b ->
match encode Codec.bool b with
| Error e -> failwith ("encode: " ^ e)
| Ok yaml ->
match decode Codec.bool yaml with
| Error e -> failwith ("decode: " ^ e)
| Ok decoded -> check (decoded = b)
(* Test: List roundtrip *)
let () =
add_test ~name:"yaml_list_roundtrip"
[ choose [ const []; const [1;2;3]; const [0] ] ]
@@ fun lst ->
let codec = Codec.list Codec.int in
match encode codec lst with
| Error e -> failwith ("encode: " ^ e)
| Ok yaml ->
match decode codec yaml with
| Error e -> failwith ("decode: " ^ e)
| Ok decoded -> check (decoded = lst)
(* Test: Empty input *)
let () =
add_test ~name:"yaml_empty" [ const () ] @@ fun () ->
match decode Codec.string "" with
| Ok _ | Error _ -> check true
(* Test: Nested structures *)
let () =
add_test ~name:"yaml_nested"
[ choose [ const [[]]; const [[1;2];[3;4]] ] ]
@@ fun nested ->
let codec = Codec.list (Codec.list Codec.int) in
match encode codec nested with
| Error e -> failwith ("encode: " ^ e)
| Ok yaml ->
match decode codec yaml with
| Error e -> failwith ("decode: " ^ e)
| Ok decoded -> check (decoded = nested)
Tips for Writing Effective Fuzz Tests#
-
Start with crash resistance - The most basic test ensures arbitrary input doesn't crash your code.
-
Test roundtrips - If you have encode/decode pairs, roundtrip tests catch many bugs.
-
Use
choosefor edge cases - Combine random generation with known problematic values. -
Keep generators focused - Generate only valid inputs for properties that require them.
-
Handle expected failures gracefully - Use pattern matching to distinguish expected vs unexpected failures.
-
Test at boundaries - Empty inputs, maximum values, and type boundaries often reveal bugs.
-
Run QuickCheck mode in CI - It's fast and catches many issues without AFL setup.
-
Use AFL for deep testing - AFL's coverage guidance finds edge cases that random testing misses.
Bugs Found by This Approach#
Example bugs discovered through fuzz testing:
- Empty string encoding - Empty strings weren't properly quoted, causing decode failures
- Empty array encoding - Empty arrays
[]weren't using flow style, breaking roundtrips - Large integer precision - Integers > 2^53-1 lost precision in float-based formats
- Option type decoding - Nested type combinators weren't being unwrapped during scalar decoding
- Special character handling - Strings with newlines or special YAML characters needed escaping
HTML Parser Bugs (html5rw)#
Fuzz testing html5rw discovered these serialization bugs:
- Raw text element double-escaping - Elements like
<script>,<style>,<xmp>,<iframe>,<noembed>,<noframes>,<noscript>were having their content HTML-escaped during serialization, which broke roundtrips (e.g.,<becoming<inside script tags) - Escapable raw text handling - Elements
<textarea>and<title>need special treatment: only&needs escaping, not<or> - Plaintext element serialization - The
<plaintext>element cannot be closed in HTML5, so serialization must stop after emitting its content
AFL++ Compatibility Notes#
When using AFL++ (modern fork of AFL) with OCaml:
Crowbar Persistent Mode Issues#
Crowbar's built-in AFL support may not work with AFL++. The @@ file-based mode with Crowbar often results in "No instrumentation detected" errors even when instrumentation is present.
Solution: Create a separate AFL fuzzer using afl-persistent directly:
(* fuzz_afl.ml - Simple AFL-compatible fuzzer *)
let fuzz_input input =
try
(* Your fuzzing logic here *)
let result = MyLib.parse input in
let serialized = MyLib.serialize result in
(* Check properties, assert on failures *)
if not (some_property serialized) then
assert false;
true
with
| Assert_failure _ -> raise (Assert_failure ("", 0, 0))
| _ -> true (* Expected failures for malformed input *)
let read_file filename =
let ic = open_in_bin filename in
let n = in_channel_length ic in
let s = really_input_string ic n in
close_in ic;
s
let () =
AflPersistent.run (fun () ->
if Array.length Sys.argv < 2 then begin
Printf.eprintf "Usage: %s <input_file>\n" Sys.argv.(0);
exit 1
end;
let input = read_file Sys.argv.(1) in
ignore (fuzz_input input)
)
Add afl-persistent to dependencies in dune:
(executable
(name fuzz_afl)
(libraries mylib afl-persistent))
Building with AFL Instrumentation#
Use -x afl flag (not --context afl):
dune build -x afl ./fuzz/fuzz_afl.exe
Environment Variables for AFL++#
# Suppress core_pattern warning (when you can't modify /proc/sys/kernel/core_pattern)
export AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1
# Skip CPU frequency scaling check
export AFL_SKIP_CPUFREQ=1
# Run AFL
afl-fuzz -i input_corpus -o output -- _build/afl/fuzz/fuzz_afl.exe @@
Verifying Instrumentation#
Use afl-showmap to verify instrumentation is detected:
afl-showmap -o /tmp/map -- _build/afl/fuzz/fuzz_afl.exe /tmp/test_input
cat /tmp/map | head -20 # Should show tuples like "000062:3"
A healthy instrumentation should capture 200+ tuples for a non-trivial program.
Creating Effective Seed Corpus#
For HTML parsers, include diverse constructs:
mkdir -p fuzz/input_corpus
# Basic structure
echo '<html><head><title>Test</title></head><body><p>Hello</p></body></html>' > fuzz/input_corpus/basic.html
# Raw text elements (critical for escaping bugs)
echo '<script>if (a < b && c > d) {}</script>' > fuzz/input_corpus/script.html
echo '<style>.foo { content: "<bar>" }</style>' > fuzz/input_corpus/style.html
# Escapable raw text elements
echo '<textarea>Text with <html> tags & entities</textarea>' > fuzz/input_corpus/textarea.html
# Tables (complex nesting rules)
echo '<table><tr><td>Cell</td></tr></table>' > fuzz/input_corpus/table.html
# Forms
echo '<form><input type="text"><button>Submit</button></form>' > fuzz/input_corpus/form.html
# SVG/MathML (foreign content)
echo '<svg><circle r="50"/></svg>' > fuzz/input_corpus/svg.html
echo '<math><mrow><mi>x</mi></mrow></math>' > fuzz/input_corpus/math.html
# Entities
echo '<p>& < > </p>' > fuzz/input_corpus/entities.html
# Template elements
echo '<template><div>Content</div></template>' > fuzz/input_corpus/template.html
# Malformed HTML (parser recovery)
echo '<p>Unclosed<div>Mixed</p></div>' > fuzz/input_corpus/malformed.html
# Comments
echo '<!--comment-->Text<!---->More' > fuzz/input_corpus/comment.html
Recommended Test Properties for Parsers#
1. Roundtrip Stabilization#
The most effective parser test: parse → serialize → parse → serialize should produce identical output on the second iteration:
let () =
add_test ~name:"roundtrip_stabilizes" [ bytes ] @@ fun input ->
let r1 = parse input in
let s1 = serialize r1 in
let r2 = parse s1 in
let s2 = serialize r2 in
let r3 = parse s2 in
let s3 = serialize r3 in
(* First roundtrip may differ, but second must stabilize *)
check (s2 = s3)
2. Clone Consistency#
For DOM-like structures with clone operations:
let () =
add_test ~name:"clone_identical" [ bytes ] @@ fun input ->
let doc = parse input in
let root = get_root doc in
let cloned = clone ~deep:true root in
check (to_html root = to_html cloned)
3. Error Consistency#
Parse errors should be deterministic:
let () =
add_test ~name:"errors_consistent" [ bytes ] @@ fun input ->
let r1 = parse input in
let r2 = parse input in
check (get_errors r1 = get_errors r2)
Workflow for Fuzz-Driven Bug Fixing#
-
Run QuickCheck mode first - Fast iteration, catches obvious bugs
dune exec ./fuzz/fuzz_mylib.exe -
Fix bugs, re-run - Iterate until QuickCheck passes
-
Run AFL for deeper coverage - 5-10 minutes initially
dune build -x afl ./fuzz/fuzz_afl.exe AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_SKIP_CPUFREQ=1 \ timeout 600 afl-fuzz -i fuzz/input_corpus -o fuzz/output -- \ _build/afl/fuzz/fuzz_afl.exe @@ -
Check for crashes - Examine
fuzz/output/default/crashes/ls fuzz/output/default/crashes/ # Reproduce crash: _build/afl/fuzz/fuzz_afl.exe fuzz/output/default/crashes/id:000000,... -
Fix and repeat - Continue until no crashes found after extended runs
-
Measure coverage - Track corpus growth and edge coverage
cat fuzz/output/default/fuzzer_stats | grep -E "(execs_done|corpus_count|saved_crashes)"