Punycode (RFC3492) in OCaml
at main 873 lines 23 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(* Comprehensive tests for Punycode (RFC 3492) implementation *) 7 8open Alcotest 9module Punycode = Punycode 10module Punycode_idna = Punycode_idna 11 12(* Helper to convert hex code points to Uchar array *) 13let codepoints_of_hex_list hex_list = 14 Array.of_list (List.map Uchar.of_int hex_list) 15 16(* Helper to convert string to code points *) 17let codepoints_of_string s = 18 let acc = ref [] in 19 let i = ref 0 in 20 while !i < String.length s do 21 let dec = String.get_utf_8_uchar s !i in 22 acc := Uchar.utf_decode_uchar dec :: !acc; 23 i := !i + Uchar.utf_decode_length dec 24 done; 25 Array.of_list (List.rev !acc) 26 27(* Test result helper *) 28let check_encode_ok expected input = 29 try 30 let result = Punycode.encode input in 31 check string "encode" expected result 32 with Punycode.Error e -> 33 fail (Format.asprintf "encode failed: %a" Punycode.pp_error_reason e) 34 35let check_decode_ok expected input = 36 try 37 let result = Punycode.decode input in 38 let expected_arr = codepoints_of_hex_list expected in 39 check int "length" (Array.length expected_arr) (Array.length result); 40 Array.iteri 41 (fun i u -> 42 check int 43 (Printf.sprintf "char %d" i) 44 (Uchar.to_int expected_arr.(i)) 45 (Uchar.to_int u)) 46 result 47 with Punycode.Error e -> 48 fail (Format.asprintf "decode failed: %a" Punycode.pp_error_reason e) 49 50let check_utf8_roundtrip s = 51 try 52 let encoded = Punycode.encode_utf8 s in 53 let decoded = Punycode.decode_utf8 encoded in 54 check string "roundtrip" s decoded 55 with Punycode.Error e -> 56 fail (Format.asprintf "roundtrip failed: %a" Punycode.pp_error_reason e) 57 58(* RFC 3492 Section 7.1 Test Vectors *) 59 60(* (A) Arabic (Egyptian) *) 61let arabic_codepoints = 62 [ 63 0x0644; 64 0x064A; 65 0x0647; 66 0x0645; 67 0x0627; 68 0x0628; 69 0x062A; 70 0x0643; 71 0x0644; 72 0x0645; 73 0x0648; 74 0x0634; 75 0x0639; 76 0x0631; 77 0x0628; 78 0x064A; 79 0x061F; 80 ] 81 82let arabic_punycode = "egbpdaj6bu4bxfgehfvwxn" 83 84(* (B) Chinese (simplified) *) 85let chinese_simplified_codepoints = 86 [ 0x4ED6; 0x4EEC; 0x4E3A; 0x4EC0; 0x4E48; 0x4E0D; 0x8BF4; 0x4E2D; 0x6587 ] 87 88let chinese_simplified_punycode = "ihqwcrb4cv8a8dqg056pqjye" 89 90(* (C) Chinese (traditional) *) 91let chinese_traditional_codepoints = 92 [ 0x4ED6; 0x5011; 0x7232; 0x4EC0; 0x9EBD; 0x4E0D; 0x8AAA; 0x4E2D; 0x6587 ] 93 94let chinese_traditional_punycode = "ihqwctvzc91f659drss3x8bo0yb" 95 96(* (D) Czech *) 97let czech_codepoints = 98 [ 99 0x0050; 100 0x0072; 101 0x006F; 102 0x010D; 103 0x0070; 104 0x0072; 105 0x006F; 106 0x0073; 107 0x0074; 108 0x011B; 109 0x006E; 110 0x0065; 111 0x006D; 112 0x006C; 113 0x0075; 114 0x0076; 115 0x00ED; 116 0x010D; 117 0x0065; 118 0x0073; 119 0x006B; 120 0x0079; 121 ] 122 123let czech_punycode = "Proprostnemluvesky-uyb24dma41a" 124 125(* (E) Hebrew *) 126let hebrew_codepoints = 127 [ 128 0x05DC; 129 0x05DE; 130 0x05D4; 131 0x05D4; 132 0x05DD; 133 0x05E4; 134 0x05E9; 135 0x05D5; 136 0x05D8; 137 0x05DC; 138 0x05D0; 139 0x05DE; 140 0x05D3; 141 0x05D1; 142 0x05E8; 143 0x05D9; 144 0x05DD; 145 0x05E2; 146 0x05D1; 147 0x05E8; 148 0x05D9; 149 0x05EA; 150 ] 151 152let hebrew_punycode = "4dbcagdahymbxekheh6e0a7fei0b" 153 154(* (F) Hindi (Devanagari) *) 155let hindi_codepoints = 156 [ 157 0x092F; 158 0x0939; 159 0x0932; 160 0x094B; 161 0x0917; 162 0x0939; 163 0x093F; 164 0x0928; 165 0x094D; 166 0x0926; 167 0x0940; 168 0x0915; 169 0x094D; 170 0x092F; 171 0x094B; 172 0x0902; 173 0x0928; 174 0x0939; 175 0x0940; 176 0x0902; 177 0x092C; 178 0x094B; 179 0x0932; 180 0x0938; 181 0x0915; 182 0x0924; 183 0x0947; 184 0x0939; 185 0x0948; 186 0x0902; 187 ] 188 189let hindi_punycode = "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" 190 191(* (G) Japanese (kanji and hiragana) *) 192let japanese_codepoints = 193 [ 194 0x306A; 195 0x305C; 196 0x307F; 197 0x3093; 198 0x306A; 199 0x65E5; 200 0x672C; 201 0x8A9E; 202 0x3092; 203 0x8A71; 204 0x3057; 205 0x3066; 206 0x304F; 207 0x308C; 208 0x306A; 209 0x3044; 210 0x306E; 211 0x304B; 212 ] 213 214let japanese_punycode = "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" 215 216(* (H) Korean (Hangul syllables) *) 217let korean_codepoints = 218 [ 219 0xC138; 220 0xACC4; 221 0xC758; 222 0xBAA8; 223 0xB4E0; 224 0xC0AC; 225 0xB78C; 226 0xB4E4; 227 0xC774; 228 0xD55C; 229 0xAD6D; 230 0xC5B4; 231 0xB97C; 232 0xC774; 233 0xD574; 234 0xD55C; 235 0xB2E4; 236 0xBA74; 237 0xC5BC; 238 0xB9C8; 239 0xB098; 240 0xC88B; 241 0xC744; 242 0xAE4C; 243 ] 244 245let korean_punycode = 246 "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c" 247 248(* (I) Russian (Cyrillic) *) 249let russian_codepoints = 250 [ 251 0x043F; 252 0x043E; 253 0x0447; 254 0x0435; 255 0x043C; 256 0x0443; 257 0x0436; 258 0x0435; 259 0x043E; 260 0x043D; 261 0x0438; 262 0x043D; 263 0x0435; 264 0x0433; 265 0x043E; 266 0x0432; 267 0x043E; 268 0x0440; 269 0x044F; 270 0x0442; 271 0x043F; 272 0x043E; 273 0x0440; 274 0x0443; 275 0x0441; 276 0x0441; 277 0x043A; 278 0x0438; 279 ] 280 281let russian_punycode = "b1abfaaepdrnnbgefbadotcwatmq2g4l" 282 283(* (J) Spanish *) 284let spanish_codepoints = 285 [ 286 0x0050; 287 0x006F; 288 0x0072; 289 0x0071; 290 0x0075; 291 0x00E9; 292 0x006E; 293 0x006F; 294 0x0070; 295 0x0075; 296 0x0065; 297 0x0064; 298 0x0065; 299 0x006E; 300 0x0073; 301 0x0069; 302 0x006D; 303 0x0070; 304 0x006C; 305 0x0065; 306 0x006D; 307 0x0065; 308 0x006E; 309 0x0074; 310 0x0065; 311 0x0068; 312 0x0061; 313 0x0062; 314 0x006C; 315 0x0061; 316 0x0072; 317 0x0065; 318 0x006E; 319 0x0045; 320 0x0073; 321 0x0070; 322 0x0061; 323 0x00F1; 324 0x006F; 325 0x006C; 326 ] 327 328let spanish_punycode = "PorqunopuedensimplementehablarenEspaol-fmd56a" 329 330(* (K) Vietnamese *) 331let vietnamese_codepoints = 332 [ 333 0x0054; 334 0x1EA1; 335 0x0069; 336 0x0073; 337 0x0061; 338 0x006F; 339 0x0068; 340 0x1ECD; 341 0x006B; 342 0x0068; 343 0x00F4; 344 0x006E; 345 0x0067; 346 0x0074; 347 0x0068; 348 0x1EC3; 349 0x0063; 350 0x0068; 351 0x1EC9; 352 0x006E; 353 0x00F3; 354 0x0069; 355 0x0074; 356 0x0069; 357 0x1EBF; 358 0x006E; 359 0x0067; 360 0x0056; 361 0x0069; 362 0x1EC7; 363 0x0074; 364 ] 365 366let vietnamese_punycode = "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" 367 368(* (L) 3年B組金八先生 - Japanese with ASCII *) 369let example_l_codepoints = 370 [ 0x0033; 0x5E74; 0x0042; 0x7D44; 0x91D1; 0x516B; 0x5148; 0x751F ] 371 372let example_l_punycode = "3B-ww4c5e180e575a65lsy2b" 373 374(* (M) 安室奈美恵-with-SUPER-MONKEYS *) 375let example_m_codepoints = 376 [ 377 0x5B89; 378 0x5BA4; 379 0x5948; 380 0x7F8E; 381 0x6075; 382 0x002D; 383 0x0077; 384 0x0069; 385 0x0074; 386 0x0068; 387 0x002D; 388 0x0053; 389 0x0055; 390 0x0050; 391 0x0045; 392 0x0052; 393 0x002D; 394 0x004D; 395 0x004F; 396 0x004E; 397 0x004B; 398 0x0045; 399 0x0059; 400 0x0053; 401 ] 402 403let example_m_punycode = "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n" 404 405(* (N) Hello-Another-Way-それぞれの場所 *) 406let example_n_codepoints = 407 [ 408 0x0048; 409 0x0065; 410 0x006C; 411 0x006C; 412 0x006F; 413 0x002D; 414 0x0041; 415 0x006E; 416 0x006F; 417 0x0074; 418 0x0068; 419 0x0065; 420 0x0072; 421 0x002D; 422 0x0057; 423 0x0061; 424 0x0079; 425 0x002D; 426 0x305D; 427 0x308C; 428 0x305E; 429 0x308C; 430 0x306E; 431 0x5834; 432 0x6240; 433 ] 434 435let example_n_punycode = "Hello-Another-Way--fc4qua05auwb3674vfr0b" 436 437(* (O) ひとつ屋根の下2 *) 438let example_o_codepoints = 439 [ 0x3072; 0x3068; 0x3064; 0x5C4B; 0x6839; 0x306E; 0x4E0B; 0x0032 ] 440 441let example_o_punycode = "2-u9tlzr9756bt3uc0v" 442 443(* (P) MaijでKoiする5秒前 *) 444let example_p_codepoints = 445 [ 446 0x004D; 447 0x0061; 448 0x006A; 449 0x0069; 450 0x3067; 451 0x004B; 452 0x006F; 453 0x0069; 454 0x3059; 455 0x308B; 456 0x0035; 457 0x79D2; 458 0x524D; 459 ] 460 461let example_p_punycode = "MajiKoi5-783gue6qz075azm5e" 462 463(* (Q) パフィーdeルンバ *) 464let example_q_codepoints = 465 [ 0x30D1; 0x30D5; 0x30A3; 0x30FC; 0x0064; 0x0065; 0x30EB; 0x30F3; 0x30D0 ] 466 467let example_q_punycode = "de-jg4avhby1noc0d" 468 469(* (R) そのスピードで *) 470let example_r_codepoints = 471 [ 0x305D; 0x306E; 0x30B9; 0x30D4; 0x30FC; 0x30C9; 0x3067 ] 472 473let example_r_punycode = "d9juau41awczczp" 474 475(* (S) -> $1.00 <- (pure ASCII) *) 476let example_s_codepoints = 477 [ 478 0x002D; 479 0x003E; 480 0x0020; 481 0x0024; 482 0x0031; 483 0x002E; 484 0x0030; 485 0x0030; 486 0x0020; 487 0x003C; 488 0x002D; 489 ] 490 491let example_s_punycode = "-> $1.00 <--" 492 493(* Test functions *) 494 495let test_decode_arabic () = check_decode_ok arabic_codepoints arabic_punycode 496 497let test_decode_chinese_simplified () = 498 check_decode_ok chinese_simplified_codepoints chinese_simplified_punycode 499 500let test_decode_chinese_traditional () = 501 check_decode_ok chinese_traditional_codepoints chinese_traditional_punycode 502 503let test_decode_hebrew () = check_decode_ok hebrew_codepoints hebrew_punycode 504let test_decode_hindi () = check_decode_ok hindi_codepoints hindi_punycode 505 506let test_decode_japanese () = 507 check_decode_ok japanese_codepoints japanese_punycode 508 509let test_decode_korean () = check_decode_ok korean_codepoints korean_punycode 510 511let test_decode_example_l () = 512 check_decode_ok example_l_codepoints example_l_punycode 513 514let test_decode_example_m () = 515 check_decode_ok example_m_codepoints example_m_punycode 516 517let test_decode_example_n () = 518 check_decode_ok example_n_codepoints example_n_punycode 519 520let test_decode_example_o () = 521 check_decode_ok example_o_codepoints example_o_punycode 522 523let test_decode_example_q () = 524 check_decode_ok example_q_codepoints example_q_punycode 525 526let test_decode_example_r () = 527 check_decode_ok example_r_codepoints example_r_punycode 528 529let test_decode_czech () = check_decode_ok czech_codepoints czech_punycode 530 531let test_decode_russian () = 532 check_decode_ok russian_codepoints (String.lowercase_ascii russian_punycode) 533 534let test_decode_spanish () = check_decode_ok spanish_codepoints spanish_punycode 535 536let test_decode_vietnamese () = 537 check_decode_ok vietnamese_codepoints vietnamese_punycode 538 539let test_decode_example_p () = 540 check_decode_ok example_p_codepoints example_p_punycode 541 542let test_decode_example_s () = 543 check_decode_ok example_s_codepoints example_s_punycode 544 545let test_encode_arabic () = 546 check_encode_ok arabic_punycode (codepoints_of_hex_list arabic_codepoints) 547 548let test_encode_chinese_simplified () = 549 check_encode_ok chinese_simplified_punycode 550 (codepoints_of_hex_list chinese_simplified_codepoints) 551 552let test_encode_chinese_traditional () = 553 check_encode_ok chinese_traditional_punycode 554 (codepoints_of_hex_list chinese_traditional_codepoints) 555 556let test_encode_hebrew () = 557 check_encode_ok hebrew_punycode (codepoints_of_hex_list hebrew_codepoints) 558 559let test_encode_hindi () = 560 check_encode_ok hindi_punycode (codepoints_of_hex_list hindi_codepoints) 561 562let test_encode_japanese () = 563 check_encode_ok japanese_punycode (codepoints_of_hex_list japanese_codepoints) 564 565let test_encode_korean () = 566 check_encode_ok korean_punycode (codepoints_of_hex_list korean_codepoints) 567 568let test_encode_example_l () = 569 check_encode_ok 570 (String.lowercase_ascii example_l_punycode) 571 (codepoints_of_hex_list example_l_codepoints) 572 573let test_encode_example_m () = 574 check_encode_ok 575 (String.lowercase_ascii example_m_punycode) 576 (codepoints_of_hex_list example_m_codepoints) 577 578let test_encode_example_n () = 579 check_encode_ok 580 (String.lowercase_ascii example_n_punycode) 581 (codepoints_of_hex_list example_n_codepoints) 582 583let test_encode_example_o () = 584 check_encode_ok 585 (String.lowercase_ascii example_o_punycode) 586 (codepoints_of_hex_list example_o_codepoints) 587 588let test_encode_example_q () = 589 check_encode_ok example_q_punycode 590 (codepoints_of_hex_list example_q_codepoints) 591 592let test_encode_example_r () = 593 check_encode_ok example_r_punycode 594 (codepoints_of_hex_list example_r_codepoints) 595 596(* UTF-8 roundtrip tests *) 597let test_utf8_roundtrip_german () = check_utf8_roundtrip "münchen" 598let test_utf8_roundtrip_chinese () = check_utf8_roundtrip "中文" 599let test_utf8_roundtrip_japanese () = check_utf8_roundtrip "日本語" 600let test_utf8_roundtrip_arabic () = check_utf8_roundtrip "العربية" 601let test_utf8_roundtrip_russian () = check_utf8_roundtrip "русский" 602let test_utf8_roundtrip_greek () = check_utf8_roundtrip "ελληνικά" 603let test_utf8_roundtrip_korean () = check_utf8_roundtrip "한국어" 604let test_utf8_roundtrip_emoji () = check_utf8_roundtrip "hello👋world" 605 606(* Label encoding tests *) 607let test_label_encode_ascii () = 608 try 609 let result = Punycode.encode_label "example" in 610 check string "ascii passthrough" "example" result 611 with Punycode.Error e -> 612 fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error_reason e) 613 614let test_label_encode_german () = 615 try 616 let result = Punycode.encode_label "münchen" in 617 check string "german label" "xn--mnchen-3ya" result 618 with Punycode.Error e -> 619 fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error_reason e) 620 621let test_label_decode_german () = 622 try 623 let result = Punycode.decode_label "xn--mnchen-3ya" in 624 check string "german decode" "münchen" result 625 with Punycode.Error e -> 626 fail (Format.asprintf "decode_label failed: %a" Punycode.pp_error_reason e) 627 628(* IDNA tests *) 629let test_idna_to_ascii_simple () = 630 try 631 let result = Punycode_idna.to_ascii "münchen.example.com" in 632 check string "idna to_ascii" "xn--mnchen-3ya.example.com" result 633 with Punycode_idna.Error e -> 634 fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error_reason e) 635 636let test_idna_to_unicode_simple () = 637 try 638 let result = Punycode_idna.to_unicode "xn--mnchen-3ya.example.com" in 639 check string "idna to_unicode" "münchen.example.com" result 640 with Punycode_idna.Error e -> 641 fail (Format.asprintf "to_unicode failed: %a" Punycode_idna.pp_error_reason e) 642 643let test_idna_roundtrip () = 644 let original = "münchen.example.com" in 645 try 646 let ascii = Punycode_idna.to_ascii original in 647 let unicode = Punycode_idna.to_unicode ascii in 648 check string "idna roundtrip" original unicode 649 with Punycode_idna.Error e -> 650 fail (Format.asprintf "roundtrip failed: %a" Punycode_idna.pp_error_reason e) 651 652let test_idna_all_ascii () = 653 try 654 let result = Punycode_idna.to_ascii "www.example.com" in 655 check string "all ascii passthrough" "www.example.com" result 656 with Punycode_idna.Error e -> 657 fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error_reason e) 658 659let test_idna_mixed_labels () = 660 try 661 let result = Punycode_idna.to_ascii "日本語.example.com" in 662 (* Check that result starts with xn-- and ends with .example.com *) 663 check bool "has ace prefix" true (Punycode.has_ace_prefix result); 664 check bool "ends with example.com" true 665 (String.length result > 12 666 && String.sub result (String.length result - 12) 12 = ".example.com") 667 with Punycode_idna.Error e -> 668 fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error_reason e) 669 670(* Case annotation tests *) 671let test_case_annotation_decode () = 672 (* RFC example: uppercase letters indicate case flags *) 673 try 674 let codepoints, case_flags = 675 Punycode.decode_with_case "MajiKoi5-783gue6qz075azm5e" 676 in 677 check int "codepoints length" 678 (List.length example_p_codepoints) 679 (Array.length codepoints); 680 check int "case_flags length" (Array.length codepoints) 681 (Array.length case_flags); 682 (* M should be uppercase *) 683 check bool "M uppercase" true (case_flags.(0) = Punycode.Uppercase); 684 (* a should be lowercase *) 685 check bool "a lowercase" true (case_flags.(1) = Punycode.Lowercase) 686 with Punycode.Error e -> 687 fail (Format.asprintf "decode_with_case failed: %a" Punycode.pp_error_reason e) 688 689let test_case_annotation_encode () = 690 let codepoints = codepoints_of_hex_list [ 0x0061; 0x0062; 0x0063 ] in 691 (* "abc" *) 692 let case_flags = 693 [| Punycode.Uppercase; Punycode.Lowercase; Punycode.Uppercase |] 694 in 695 try 696 let result = Punycode.encode_with_case codepoints case_flags in 697 (* Should encode as "AbC-" (basic code points with case annotation) *) 698 check string "case encoded" "AbC-" result 699 with Punycode.Error e -> 700 fail (Format.asprintf "encode_with_case failed: %a" Punycode.pp_error_reason e) 701 702(* Edge case tests *) 703let test_empty_input () = 704 try 705 let result = Punycode.encode [||] in 706 check string "empty encode" "" result 707 with Punycode.Error _ -> fail "empty encode should succeed" 708 709let test_empty_decode () = 710 try 711 let result = Punycode.decode "" in 712 check int "empty decode length" 0 (Array.length result) 713 with Punycode.Error _ -> fail "empty decode should succeed" 714 715let test_pure_ascii () = 716 let input = codepoints_of_string "hello" in 717 try 718 let result = Punycode.encode input in 719 check string "pure ascii" "hello-" result 720 with Punycode.Error e -> 721 fail (Format.asprintf "encode failed: %a" Punycode.pp_error_reason e) 722 723let test_invalid_digit () = 724 try 725 ignore (Punycode.decode "hello!"); 726 fail "should fail on invalid digit" 727 with 728 | Punycode.Error (Punycode.Invalid_digit _) -> () 729 | Punycode.Error e -> 730 fail (Format.asprintf "wrong error type: %a" Punycode.pp_error_reason e) 731 732let test_label_too_long () = 733 let long_label = String.make 100 'a' in 734 try 735 ignore (Punycode.encode_label long_label); 736 fail "should fail on long label" 737 with 738 | Punycode.Error (Punycode.Label_too_long _) -> () 739 | Punycode.Error e -> 740 fail (Format.asprintf "wrong error type: %a" Punycode.pp_error_reason e) 741 742let test_empty_label () = 743 try 744 ignore (Punycode.encode_label ""); 745 fail "should fail on empty label" 746 with 747 | Punycode.Error Punycode.Empty_label -> () 748 | Punycode.Error e -> 749 fail (Format.asprintf "wrong error type: %a" Punycode.pp_error_reason e) 750 751(* Validation tests *) 752let test_is_basic () = 753 check bool "space is basic" true (Punycode.is_basic (Uchar.of_int 0x20)); 754 check bool "A is basic" true (Punycode.is_basic (Uchar.of_int 0x41)); 755 check bool "DEL is basic" true (Punycode.is_basic (Uchar.of_int 0x7F)); 756 check bool "0x80 not basic" false (Punycode.is_basic (Uchar.of_int 0x80)); 757 check bool "ü not basic" false (Punycode.is_basic (Uchar.of_int 0xFC)) 758 759let test_is_ascii_string () = 760 check bool "ascii string" true (Punycode.is_ascii_string "hello"); 761 check bool "non-ascii string" false (Punycode.is_ascii_string "héllo"); 762 check bool "empty string" true (Punycode.is_ascii_string "") 763 764let test_has_ace_prefix () = 765 check bool "has xn--" true (Punycode.has_ace_prefix "xn--mnchen-3ya"); 766 check bool "has XN--" true (Punycode.has_ace_prefix "XN--mnchen-3ya"); 767 check bool "no prefix" false (Punycode.has_ace_prefix "example"); 768 check bool "too short" false (Punycode.has_ace_prefix "xn-") 769 770(* Test suites *) 771let decode_tests = 772 [ 773 ("Arabic", `Quick, test_decode_arabic); 774 ("Chinese simplified", `Quick, test_decode_chinese_simplified); 775 ("Chinese traditional", `Quick, test_decode_chinese_traditional); 776 ("Czech", `Quick, test_decode_czech); 777 ("Hebrew", `Quick, test_decode_hebrew); 778 ("Hindi", `Quick, test_decode_hindi); 779 ("Japanese", `Quick, test_decode_japanese); 780 ("Korean", `Quick, test_decode_korean); 781 ("Russian", `Quick, test_decode_russian); 782 ("Spanish", `Quick, test_decode_spanish); 783 ("Vietnamese", `Quick, test_decode_vietnamese); 784 ("Example L (mixed)", `Quick, test_decode_example_l); 785 ("Example M (mixed)", `Quick, test_decode_example_m); 786 ("Example N (mixed)", `Quick, test_decode_example_n); 787 ("Example O (mixed)", `Quick, test_decode_example_o); 788 ("Example P (mixed)", `Quick, test_decode_example_p); 789 ("Example Q (mixed)", `Quick, test_decode_example_q); 790 ("Example R", `Quick, test_decode_example_r); 791 ("Example S (ASCII)", `Quick, test_decode_example_s); 792 ] 793 794let encode_tests = 795 [ 796 ("Arabic", `Quick, test_encode_arabic); 797 ("Chinese simplified", `Quick, test_encode_chinese_simplified); 798 ("Chinese traditional", `Quick, test_encode_chinese_traditional); 799 ("Hebrew", `Quick, test_encode_hebrew); 800 ("Hindi", `Quick, test_encode_hindi); 801 ("Japanese", `Quick, test_encode_japanese); 802 ("Korean", `Quick, test_encode_korean); 803 ("Example L (mixed)", `Quick, test_encode_example_l); 804 ("Example M (mixed)", `Quick, test_encode_example_m); 805 ("Example N (mixed)", `Quick, test_encode_example_n); 806 ("Example O (mixed)", `Quick, test_encode_example_o); 807 ("Example Q (mixed)", `Quick, test_encode_example_q); 808 ("Example R", `Quick, test_encode_example_r); 809 ] 810 811let utf8_tests = 812 [ 813 ("German roundtrip", `Quick, test_utf8_roundtrip_german); 814 ("Chinese roundtrip", `Quick, test_utf8_roundtrip_chinese); 815 ("Japanese roundtrip", `Quick, test_utf8_roundtrip_japanese); 816 ("Arabic roundtrip", `Quick, test_utf8_roundtrip_arabic); 817 ("Russian roundtrip", `Quick, test_utf8_roundtrip_russian); 818 ("Greek roundtrip", `Quick, test_utf8_roundtrip_greek); 819 ("Korean roundtrip", `Quick, test_utf8_roundtrip_korean); 820 ("Emoji roundtrip", `Quick, test_utf8_roundtrip_emoji); 821 ] 822 823let label_tests = 824 [ 825 ("ASCII passthrough", `Quick, test_label_encode_ascii); 826 ("German encode", `Quick, test_label_encode_german); 827 ("German decode", `Quick, test_label_decode_german); 828 ] 829 830let idna_tests = 831 [ 832 ("to_ascii simple", `Quick, test_idna_to_ascii_simple); 833 ("to_unicode simple", `Quick, test_idna_to_unicode_simple); 834 ("roundtrip", `Quick, test_idna_roundtrip); 835 ("all ASCII", `Quick, test_idna_all_ascii); 836 ("mixed labels", `Quick, test_idna_mixed_labels); 837 ] 838 839let case_tests = 840 [ 841 ("decode with case", `Quick, test_case_annotation_decode); 842 ("encode with case", `Quick, test_case_annotation_encode); 843 ] 844 845let edge_case_tests = 846 [ 847 ("empty encode", `Quick, test_empty_input); 848 ("empty decode", `Quick, test_empty_decode); 849 ("pure ASCII", `Quick, test_pure_ascii); 850 ("invalid digit", `Quick, test_invalid_digit); 851 ("label too long", `Quick, test_label_too_long); 852 ("empty label", `Quick, test_empty_label); 853 ] 854 855let validation_tests = 856 [ 857 ("is_basic", `Quick, test_is_basic); 858 ("is_ascii_string", `Quick, test_is_ascii_string); 859 ("has_ace_prefix", `Quick, test_has_ace_prefix); 860 ] 861 862let () = 863 run "Punycode" 864 [ 865 ("decode RFC vectors", decode_tests); 866 ("encode RFC vectors", encode_tests); 867 ("UTF-8 roundtrip", utf8_tests); 868 ("label operations", label_tests); 869 ("IDNA operations", idna_tests); 870 ("case annotation", case_tests); 871 ("edge cases", edge_case_tests); 872 ("validation", validation_tests); 873 ]