(*--------------------------------------------------------------------------- Copyright (c) 2025 Anil Madhavapeddy . All rights reserved. SPDX-License-Identifier: ISC ---------------------------------------------------------------------------*) (* Comprehensive tests for Punycode_idna (RFC 5891 IDNA) implementation *) open Alcotest (* {1 Test Helpers} *) let check_to_ascii ~msg expected input = try let result = Punycode_idna.to_ascii input in check string msg expected result with Punycode_idna.Error e -> failf "%s: to_ascii failed: %a" msg Punycode_idna.pp_error_reason e let check_to_unicode ~msg expected input = try let result = Punycode_idna.to_unicode input in check string msg expected result with Punycode_idna.Error e -> fail (Fmt.str "%s: to_unicode failed: %a" msg Punycode_idna.pp_error_reason e) let check_label_to_ascii ~msg expected input = try let result = Punycode_idna.label_to_ascii input in check string msg expected result with Punycode_idna.Error e -> fail (Fmt.str "%s: label_to_ascii failed: %a" msg Punycode_idna.pp_error_reason e) let check_label_to_unicode ~msg expected input = try let result = Punycode_idna.label_to_unicode input in check string msg expected result with Punycode_idna.Error e -> fail (Fmt.str "%s: label_to_unicode failed: %a" msg Punycode_idna.pp_error_reason e) let check_roundtrip ~msg input = try let ascii = Punycode_idna.to_ascii input in let unicode = Punycode_idna.to_unicode ascii in check string msg input unicode with Punycode_idna.Error e -> fail (Fmt.str "%s: roundtrip failed: %a" msg Punycode_idna.pp_error_reason e) let check_raises_error ~msg f = try ignore (f ()); failf "%s: expected Error but succeeded" msg with Punycode_idna.Error _ -> () (* {1 to_ascii Test Vectors} *) let test_to_ascii_german () = check_to_ascii ~msg:"German domain" "xn--mnchen-3ya.de" "m\xc3\xbcnchen.de" let test_to_ascii_japanese () = check_to_ascii ~msg:"Japanese domain" "xn--r8jz45g.jp" "\xe4\xbe\x8b\xe3\x81\x88.jp" let test_to_ascii_ascii_passthrough () = check_to_ascii ~msg:"ASCII passthrough" "example.com" "example.com" let test_to_ascii_mixed_labels () = check_to_ascii ~msg:"mixed labels" "www.xn--mnchen-3ya.de" "www.m\xc3\xbcnchen.de" let test_to_ascii_multi_idn () = (* Both labels are non-ASCII *) let input = "\xe4\xbe\x8b\xe3\x81\x88.\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88" in try let result = Punycode_idna.to_ascii input in let labels = String.split_on_char '.' result in List.iter (fun l -> check bool "each label has ACE prefix or is ASCII" true (Punycode_idna.is_ace_label l || Punycode.is_ascii_string l)) labels with Punycode_idna.Error e -> fail (Fmt.str "multiple IDN labels: to_ascii failed: %a" Punycode_idna.pp_error_reason e) let test_to_ascii_chinese () = check_to_ascii ~msg:"Chinese domain" "xn--fiq228c.cn" "\xe4\xb8\xad\xe6\x96\x87.cn" let test_to_ascii_russian () = (* "example" in Russian Cyrillic + .ru *) let input = "\xd0\xbf\xd1\x80\xd0\xb8\xd0\xbc\xd0\xb5\xd1\x80.ru" in try let result = Punycode_idna.to_ascii input in let labels = String.split_on_char '.' result in check bool "first label has ACE prefix" true (Punycode_idna.is_ace_label (List.hd labels)); check string "TLD preserved" "ru" (List.nth labels 1) with Punycode_idna.Error e -> fail (Fmt.str "Russian domain: to_ascii failed: %a" Punycode_idna.pp_error_reason e) (* {1 to_unicode Test Vectors} *) let test_to_unicode_german () = check_to_unicode ~msg:"German domain" "m\xc3\xbcnchen.de" "xn--mnchen-3ya.de" let test_to_unicode_japanese () = check_to_unicode ~msg:"Japanese domain" "\xe4\xbe\x8b\xe3\x81\x88.jp" "xn--r8jz45g.jp" let test_to_unicode_ascii_passthrough () = check_to_unicode ~msg:"ASCII passthrough" "example.com" "example.com" let test_to_unicode_mixed () = check_to_unicode ~msg:"mixed domain" "www.m\xc3\xbcnchen.de" "www.xn--mnchen-3ya.de" let test_to_unicode_chinese () = check_to_unicode ~msg:"Chinese domain" "\xe4\xb8\xad\xe6\x96\x87.cn" "xn--fiq228c.cn" (* {1 Roundtrip Tests} *) let test_roundtrip_german () = check_roundtrip ~msg:"German roundtrip" "m\xc3\xbcnchen.de" let test_roundtrip_japanese () = check_roundtrip ~msg:"Japanese roundtrip" "\xe4\xbe\x8b\xe3\x81\x88.jp" let test_roundtrip_chinese () = check_roundtrip ~msg:"Chinese roundtrip" "\xe4\xb8\xad\xe6\x96\x87.cn" let test_roundtrip_mixed () = check_roundtrip ~msg:"mixed roundtrip" "www.m\xc3\xbcnchen.de" let test_roundtrip_russian () = check_roundtrip ~msg:"Russian roundtrip" "\xd0\xbf\xd1\x80\xd0\xb8\xd0\xbc\xd0\xb5\xd1\x80.ru" let test_roundtrip_multi_idn () = check_roundtrip ~msg:"multi-IDN roundtrip" "\xe4\xbe\x8b\xe3\x81\x88.\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88" let test_roundtrip_ascii () = check_roundtrip ~msg:"ASCII roundtrip" "www.example.com" (* {1 label_to_ascii / label_to_unicode Tests} *) let test_label_to_ascii_german () = check_label_to_ascii ~msg:"German label" "xn--mnchen-3ya" "m\xc3\xbcnchen" let test_label_to_ascii_ascii () = check_label_to_ascii ~msg:"ASCII label passthrough" "example" "example" let test_label_to_ascii_japanese () = check_label_to_ascii ~msg:"Japanese label" "xn--r8jz45g" "\xe4\xbe\x8b\xe3\x81\x88" let test_label_to_unicode_german () = check_label_to_unicode ~msg:"German label" "m\xc3\xbcnchen" "xn--mnchen-3ya" let test_label_to_unicode_ascii () = check_label_to_unicode ~msg:"ASCII label passthrough" "example" "example" let test_label_to_unicode_japanese () = check_label_to_unicode ~msg:"Japanese label" "\xe4\xbe\x8b\xe3\x81\x88" "xn--r8jz45g" let test_label_roundtrip () = let label = "m\xc3\xbcnchen" in let ascii = Punycode_idna.label_to_ascii label in let unicode = Punycode_idna.label_to_unicode ascii in check string "label roundtrip" label unicode (* {1 is_ace_label Tests} *) let test_is_ace_label_valid () = check bool "xn-- prefix" true (Punycode_idna.is_ace_label "xn--mnchen-3ya") let test_is_ace_label_uppercase () = check bool "XN-- prefix (case insensitive)" true (Punycode_idna.is_ace_label "XN--mnchen-3ya") let test_ace_label_mixed_case () = check bool "Xn-- prefix (mixed case)" true (Punycode_idna.is_ace_label "Xn--mnchen-3ya") let test_is_ace_label_plain () = check bool "plain label" false (Punycode_idna.is_ace_label "example") let test_is_ace_label_short () = check bool "too short (xn-)" false (Punycode_idna.is_ace_label "xn-") let test_ace_label_single_dash () = check bool "single dash (xn-notvalid)" false (Punycode_idna.is_ace_label "xn-notvalid") let test_is_ace_label_empty () = check bool "empty string" false (Punycode_idna.is_ace_label "") (* {1 is_idna_valid Tests} *) let test_is_idna_valid_ascii () = check bool "ASCII domain valid" true (Punycode_idna.is_idna_valid "example.com") let test_is_idna_valid_idn () = check bool "IDN domain valid" true (Punycode_idna.is_idna_valid "m\xc3\xbcnchen.de") let test_is_idna_valid_ace () = check bool "ACE domain valid" true (Punycode_idna.is_idna_valid "xn--mnchen-3ya.de") let test_idna_valid_empty_label () = (* Empty label (double dot) should be invalid *) check bool "empty label invalid" false (Punycode_idna.is_idna_valid "example..com") (* {1 normalize_nfc Tests} *) let test_normalize_nfc_composed () = (* e followed by combining acute accent (U+0065 U+0301) should become precomposed e-acute (U+00E9) *) let decomposed = "\x65\xcc\x81" in let expected = "\xc3\xa9" in let result = Punycode_idna.normalize_nfc decomposed in check string "NFC normalization: decomposed to composed" expected result let test_normalize_nfc_already_composed () = let composed = "\xc3\xa9" in let result = Punycode_idna.normalize_nfc composed in check string "NFC normalization: already composed" composed result let test_normalize_nfc_ascii () = let ascii = "hello" in let result = Punycode_idna.normalize_nfc ascii in check string "NFC normalization: ASCII unchanged" ascii result let test_normalize_nfc_hangul () = (* Hangul syllable composition: U+1100 U+1161 -> U+AC00 *) let decomposed = "\xe1\x84\x80\xe1\x85\xa1" in let composed = "\xea\xb0\x80" in let result = Punycode_idna.normalize_nfc decomposed in check string "NFC normalization: Hangul composition" composed result (* {1 max_domain_length Tests} *) let test_max_domain_length_value () = check int "max_domain_length is 253" 253 Punycode_idna.max_domain_length let test_domain_too_long () = (* Create a domain that exceeds 253 bytes *) let long_label = String.make 60 'a' in let domain = String.concat "." [ long_label; long_label; long_label; long_label; long_label ] in (* 60*5 + 4 dots = 304 bytes > 253 *) check_raises_error ~msg:"domain too long" (fun () -> Punycode_idna.to_ascii domain) (* {1 Edge Case Tests} *) let test_empty_label_error () = check_raises_error ~msg:"empty label" (fun () -> Punycode_idna.label_to_ascii "") let test_single_label_domain () = check_to_ascii ~msg:"single label domain" "example" "example" let test_trailing_dot () = (* A trailing dot produces an empty final label after splitting on '.'. The implementation raises Error on empty labels. *) check_raises_error ~msg:"trailing dot" (fun () -> Punycode_idna.to_ascii "example.com.") let test_leading_hyphen_std3 () = (* With use_std3_rules, leading hyphens should be rejected *) try ignore (Punycode_idna.to_ascii ~use_std3_rules:true "-example.com"); fail "leading hyphen with STD3 rules: expected Error" with Punycode_idna.Error _ -> () let test_trailing_hyphen_std3 () = try ignore (Punycode_idna.to_ascii ~use_std3_rules:true "example-.com"); fail "trailing hyphen with STD3 rules: expected Error" with Punycode_idna.Error _ -> () let test_ascii_no_ace_prefix () = (* ASCII labels should not get an xn-- prefix *) let result = Punycode_idna.label_to_ascii "example" in check bool "no ACE prefix for ASCII" false (Punycode_idna.is_ace_label result) let test_error_reason_to_string () = let s = Punycode_idna.error_reason_to_string (Punycode_idna.Invalid_label "test") in check bool "error string is non-empty" true (String.length s > 0) let test_pp_error_reason () = let buf = Buffer.create 64 in let fmt = Format.formatter_of_buffer buf in Punycode_idna.pp_error_reason fmt (Punycode_idna.Domain_too_long 300); Format.pp_print_flush fmt (); let s = Buffer.contents buf in check bool "pp_error_reason produces output" true (String.length s > 0) (* {1 Test Suites} *) let to_ascii_tests = [ ("German domain", `Quick, test_to_ascii_german); ("Japanese domain", `Quick, test_to_ascii_japanese); ("ASCII passthrough", `Quick, test_to_ascii_ascii_passthrough); ("mixed labels", `Quick, test_to_ascii_mixed_labels); ("multiple IDN labels", `Quick, test_to_ascii_multi_idn); ("Chinese domain", `Quick, test_to_ascii_chinese); ("Russian domain", `Quick, test_to_ascii_russian); ] let to_unicode_tests = [ ("German domain", `Quick, test_to_unicode_german); ("Japanese domain", `Quick, test_to_unicode_japanese); ("ASCII passthrough", `Quick, test_to_unicode_ascii_passthrough); ("mixed domain", `Quick, test_to_unicode_mixed); ("Chinese domain", `Quick, test_to_unicode_chinese); ] let roundtrip_tests = [ ("German", `Quick, test_roundtrip_german); ("Japanese", `Quick, test_roundtrip_japanese); ("Chinese", `Quick, test_roundtrip_chinese); ("mixed", `Quick, test_roundtrip_mixed); ("Russian", `Quick, test_roundtrip_russian); ("multi-IDN", `Quick, test_roundtrip_multi_idn); ("ASCII", `Quick, test_roundtrip_ascii); ] let label_tests = [ ("label_to_ascii German", `Quick, test_label_to_ascii_german); ("label_to_ascii ASCII", `Quick, test_label_to_ascii_ascii); ("label_to_ascii Japanese", `Quick, test_label_to_ascii_japanese); ("label_to_unicode German", `Quick, test_label_to_unicode_german); ("label_to_unicode ASCII", `Quick, test_label_to_unicode_ascii); ("label_to_unicode Japanese", `Quick, test_label_to_unicode_japanese); ("label roundtrip", `Quick, test_label_roundtrip); ] let is_ace_label_tests = [ ("valid ACE prefix", `Quick, test_is_ace_label_valid); ("uppercase prefix", `Quick, test_is_ace_label_uppercase); ("mixed case prefix", `Quick, test_ace_label_mixed_case); ("plain label", `Quick, test_is_ace_label_plain); ("too short", `Quick, test_is_ace_label_short); ("single dash", `Quick, test_ace_label_single_dash); ("empty string", `Quick, test_is_ace_label_empty); ] let is_idna_valid_tests = [ ("ASCII domain", `Quick, test_is_idna_valid_ascii); ("IDN domain", `Quick, test_is_idna_valid_idn); ("ACE domain", `Quick, test_is_idna_valid_ace); ("empty label", `Quick, test_idna_valid_empty_label); ] let normalize_nfc_tests = [ ("decomposed to composed", `Quick, test_normalize_nfc_composed); ("already composed", `Quick, test_normalize_nfc_already_composed); ("ASCII unchanged", `Quick, test_normalize_nfc_ascii); ("Hangul composition", `Quick, test_normalize_nfc_hangul); ] let edge_case_tests = [ ("max_domain_length value", `Quick, test_max_domain_length_value); ("domain too long", `Quick, test_domain_too_long); ("empty label error", `Quick, test_empty_label_error); ("single label domain", `Quick, test_single_label_domain); ("trailing dot", `Quick, test_trailing_dot); ("leading hyphen STD3", `Quick, test_leading_hyphen_std3); ("trailing hyphen STD3", `Quick, test_trailing_hyphen_std3); ("ASCII no ACE prefix", `Quick, test_ascii_no_ace_prefix); ("error_reason_to_string", `Quick, test_error_reason_to_string); ("pp_error_reason", `Quick, test_pp_error_reason); ] let suite = ( "punycode_idna", to_ascii_tests @ to_unicode_tests @ roundtrip_tests @ label_tests @ is_ace_label_tests @ is_idna_valid_tests @ normalize_nfc_tests @ edge_case_tests )