forked from
anil.recoil.org/ocaml-punycode
Punycode (RFC3492) in OCaml
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Thomas Gazagnaire. All rights reserved.
3 SPDX-License-Identifier: MIT
4 ---------------------------------------------------------------------------*)
5
6(* Crowbar-based fuzz testing for Punycode encoding/decoding *)
7
8open Crowbar
9
10(* Test that encode_utf8 never crashes on arbitrary input *)
11let test_encode_no_crash input =
12 (try ignore (Punycode.encode_utf8 input) with Punycode.Error _ -> ());
13 check true
14
15(* Test that decode_utf8 never crashes on arbitrary input *)
16let test_decode_no_crash input =
17 (try ignore (Punycode.decode_utf8 input) with Punycode.Error _ -> ());
18 check true
19
20(* Test roundtrip: encode then decode should give back original (case-insensitive)
21 IDNA/Punycode lowercases ASCII characters during encoding per RFC 5891 *)
22let test_roundtrip input =
23 try
24 let encoded = Punycode.encode_utf8 input in
25 let decoded = Punycode.decode_utf8 encoded in
26 (* Compare lowercase versions since IDNA lowercases ASCII *)
27 check_eq ~pp:Format.pp_print_string
28 (String.lowercase_ascii input)
29 (String.lowercase_ascii decoded)
30 with Punycode.Error _ ->
31 (* Some inputs might not encode/decode, that's ok *)
32 check true
33
34(* Test ASCII-only strings (should pass through mostly unchanged) *)
35let test_ascii_string input =
36 if String.length input > 0 then begin
37 let ascii_only =
38 String.init
39 (String.length input mod 64)
40 (fun i ->
41 Char.chr (Char.code input.[i mod String.length input] mod 128))
42 in
43 if String.length ascii_only > 0 then
44 try ignore (Punycode.encode_utf8 ascii_only) with Punycode.Error _ -> ()
45 end;
46 check true
47
48(* Test inputs starting with ACE prefix "xn--" *)
49let test_ace_prefix input =
50 let ace_input = "xn--" ^ input in
51 (try ignore (Punycode.decode_utf8 ace_input) with Punycode.Error _ -> ());
52 check true
53
54let suite =
55 ( "punycode",
56 [
57 test_case "encode no crash" [ bytes ] test_encode_no_crash;
58 test_case "decode no crash" [ bytes ] test_decode_no_crash;
59 test_case "roundtrip" [ bytes ] test_roundtrip;
60 test_case "ascii string" [ bytes ] test_ascii_string;
61 test_case "ace prefix" [ bytes ] test_ace_prefix;
62 ] )