Punycode (RFC3492) in OCaml
at main 861 lines 22 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(* Comprehensive tests for Punycode (RFC 3492) implementation *) 7 8open Alcotest 9module Punycode = Punycode 10module Punycode_idna = Punycode_idna 11 12(* Helper to convert hex code points to Uchar array *) 13let codepoints_of_hex_list hex_list = 14 Array.of_list (List.map Uchar.of_int hex_list) 15 16(* Helper to convert string to code points *) 17let codepoints_of_string s = 18 let acc = ref [] in 19 let i = ref 0 in 20 while !i < String.length s do 21 let dec = String.get_utf_8_uchar s !i in 22 acc := Uchar.utf_decode_uchar dec :: !acc; 23 i := !i + Uchar.utf_decode_length dec 24 done; 25 Array.of_list (List.rev !acc) 26 27(* Test result helper *) 28let check_encode_ok expected input = 29 try 30 let result = Punycode.encode input in 31 check string "encode" expected result 32 with Punycode.Error e -> 33 failf "encode failed: %a" Punycode.pp_error_reason e 34 35let check_decode_ok expected input = 36 try 37 let result = Punycode.decode input in 38 let expected_arr = codepoints_of_hex_list expected in 39 check int "length" (Array.length expected_arr) (Array.length result); 40 Array.iteri 41 (fun i u -> 42 check int (Fmt.str "char %d" i) 43 (Uchar.to_int expected_arr.(i)) 44 (Uchar.to_int u)) 45 result 46 with Punycode.Error e -> 47 failf "decode failed: %a" Punycode.pp_error_reason e 48 49let check_utf8_roundtrip s = 50 try 51 let encoded = Punycode.encode_utf8 s in 52 let decoded = Punycode.decode_utf8 encoded in 53 check string "roundtrip" s decoded 54 with Punycode.Error e -> 55 failf "roundtrip failed: %a" Punycode.pp_error_reason e 56 57(* RFC 3492 Section 7.1 Test Vectors *) 58 59(* (A) Arabic (Egyptian) *) 60let arabic_codepoints = 61 [ 62 0x0644; 63 0x064A; 64 0x0647; 65 0x0645; 66 0x0627; 67 0x0628; 68 0x062A; 69 0x0643; 70 0x0644; 71 0x0645; 72 0x0648; 73 0x0634; 74 0x0639; 75 0x0631; 76 0x0628; 77 0x064A; 78 0x061F; 79 ] 80 81let arabic_punycode = "egbpdaj6bu4bxfgehfvwxn" 82 83(* (B) Chinese (simplified) *) 84let chinese_simplified_codepoints = 85 [ 0x4ED6; 0x4EEC; 0x4E3A; 0x4EC0; 0x4E48; 0x4E0D; 0x8BF4; 0x4E2D; 0x6587 ] 86 87let chinese_simplified_punycode = "ihqwcrb4cv8a8dqg056pqjye" 88 89(* (C) Chinese (traditional) *) 90let chinese_traditional_codepoints = 91 [ 0x4ED6; 0x5011; 0x7232; 0x4EC0; 0x9EBD; 0x4E0D; 0x8AAA; 0x4E2D; 0x6587 ] 92 93let chinese_traditional_punycode = "ihqwctvzc91f659drss3x8bo0yb" 94 95(* (D) Czech *) 96let czech_codepoints = 97 [ 98 0x0050; 99 0x0072; 100 0x006F; 101 0x010D; 102 0x0070; 103 0x0072; 104 0x006F; 105 0x0073; 106 0x0074; 107 0x011B; 108 0x006E; 109 0x0065; 110 0x006D; 111 0x006C; 112 0x0075; 113 0x0076; 114 0x00ED; 115 0x010D; 116 0x0065; 117 0x0073; 118 0x006B; 119 0x0079; 120 ] 121 122let czech_punycode = "Proprostnemluvesky-uyb24dma41a" 123 124(* (E) Hebrew *) 125let hebrew_codepoints = 126 [ 127 0x05DC; 128 0x05DE; 129 0x05D4; 130 0x05D4; 131 0x05DD; 132 0x05E4; 133 0x05E9; 134 0x05D5; 135 0x05D8; 136 0x05DC; 137 0x05D0; 138 0x05DE; 139 0x05D3; 140 0x05D1; 141 0x05E8; 142 0x05D9; 143 0x05DD; 144 0x05E2; 145 0x05D1; 146 0x05E8; 147 0x05D9; 148 0x05EA; 149 ] 150 151let hebrew_punycode = "4dbcagdahymbxekheh6e0a7fei0b" 152 153(* (F) Hindi (Devanagari) *) 154let hindi_codepoints = 155 [ 156 0x092F; 157 0x0939; 158 0x0932; 159 0x094B; 160 0x0917; 161 0x0939; 162 0x093F; 163 0x0928; 164 0x094D; 165 0x0926; 166 0x0940; 167 0x0915; 168 0x094D; 169 0x092F; 170 0x094B; 171 0x0902; 172 0x0928; 173 0x0939; 174 0x0940; 175 0x0902; 176 0x092C; 177 0x094B; 178 0x0932; 179 0x0938; 180 0x0915; 181 0x0924; 182 0x0947; 183 0x0939; 184 0x0948; 185 0x0902; 186 ] 187 188let hindi_punycode = "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" 189 190(* (G) Japanese (kanji and hiragana) *) 191let japanese_codepoints = 192 [ 193 0x306A; 194 0x305C; 195 0x307F; 196 0x3093; 197 0x306A; 198 0x65E5; 199 0x672C; 200 0x8A9E; 201 0x3092; 202 0x8A71; 203 0x3057; 204 0x3066; 205 0x304F; 206 0x308C; 207 0x306A; 208 0x3044; 209 0x306E; 210 0x304B; 211 ] 212 213let japanese_punycode = "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" 214 215(* (H) Korean (Hangul syllables) *) 216let korean_codepoints = 217 [ 218 0xC138; 219 0xACC4; 220 0xC758; 221 0xBAA8; 222 0xB4E0; 223 0xC0AC; 224 0xB78C; 225 0xB4E4; 226 0xC774; 227 0xD55C; 228 0xAD6D; 229 0xC5B4; 230 0xB97C; 231 0xC774; 232 0xD574; 233 0xD55C; 234 0xB2E4; 235 0xBA74; 236 0xC5BC; 237 0xB9C8; 238 0xB098; 239 0xC88B; 240 0xC744; 241 0xAE4C; 242 ] 243 244let korean_punycode = 245 "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c" 246 247(* (I) Russian (Cyrillic) *) 248let russian_codepoints = 249 [ 250 0x043F; 251 0x043E; 252 0x0447; 253 0x0435; 254 0x043C; 255 0x0443; 256 0x0436; 257 0x0435; 258 0x043E; 259 0x043D; 260 0x0438; 261 0x043D; 262 0x0435; 263 0x0433; 264 0x043E; 265 0x0432; 266 0x043E; 267 0x0440; 268 0x044F; 269 0x0442; 270 0x043F; 271 0x043E; 272 0x0440; 273 0x0443; 274 0x0441; 275 0x0441; 276 0x043A; 277 0x0438; 278 ] 279 280let russian_punycode = "b1abfaaepdrnnbgefbadotcwatmq2g4l" 281 282(* (J) Spanish *) 283let spanish_codepoints = 284 [ 285 0x0050; 286 0x006F; 287 0x0072; 288 0x0071; 289 0x0075; 290 0x00E9; 291 0x006E; 292 0x006F; 293 0x0070; 294 0x0075; 295 0x0065; 296 0x0064; 297 0x0065; 298 0x006E; 299 0x0073; 300 0x0069; 301 0x006D; 302 0x0070; 303 0x006C; 304 0x0065; 305 0x006D; 306 0x0065; 307 0x006E; 308 0x0074; 309 0x0065; 310 0x0068; 311 0x0061; 312 0x0062; 313 0x006C; 314 0x0061; 315 0x0072; 316 0x0065; 317 0x006E; 318 0x0045; 319 0x0073; 320 0x0070; 321 0x0061; 322 0x00F1; 323 0x006F; 324 0x006C; 325 ] 326 327let spanish_punycode = "PorqunopuedensimplementehablarenEspaol-fmd56a" 328 329(* (K) Vietnamese *) 330let vietnamese_codepoints = 331 [ 332 0x0054; 333 0x1EA1; 334 0x0069; 335 0x0073; 336 0x0061; 337 0x006F; 338 0x0068; 339 0x1ECD; 340 0x006B; 341 0x0068; 342 0x00F4; 343 0x006E; 344 0x0067; 345 0x0074; 346 0x0068; 347 0x1EC3; 348 0x0063; 349 0x0068; 350 0x1EC9; 351 0x006E; 352 0x00F3; 353 0x0069; 354 0x0074; 355 0x0069; 356 0x1EBF; 357 0x006E; 358 0x0067; 359 0x0056; 360 0x0069; 361 0x1EC7; 362 0x0074; 363 ] 364 365let vietnamese_punycode = "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" 366 367(* (L) 3年B組金八先生 - Japanese with ASCII *) 368let example_l_codepoints = 369 [ 0x0033; 0x5E74; 0x0042; 0x7D44; 0x91D1; 0x516B; 0x5148; 0x751F ] 370 371let example_l_punycode = "3B-ww4c5e180e575a65lsy2b" 372 373(* (M) 安室奈美恵-with-SUPER-MONKEYS *) 374let example_m_codepoints = 375 [ 376 0x5B89; 377 0x5BA4; 378 0x5948; 379 0x7F8E; 380 0x6075; 381 0x002D; 382 0x0077; 383 0x0069; 384 0x0074; 385 0x0068; 386 0x002D; 387 0x0053; 388 0x0055; 389 0x0050; 390 0x0045; 391 0x0052; 392 0x002D; 393 0x004D; 394 0x004F; 395 0x004E; 396 0x004B; 397 0x0045; 398 0x0059; 399 0x0053; 400 ] 401 402let example_m_punycode = "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n" 403 404(* (N) Hello-Another-Way-それぞれの場所 *) 405let example_n_codepoints = 406 [ 407 0x0048; 408 0x0065; 409 0x006C; 410 0x006C; 411 0x006F; 412 0x002D; 413 0x0041; 414 0x006E; 415 0x006F; 416 0x0074; 417 0x0068; 418 0x0065; 419 0x0072; 420 0x002D; 421 0x0057; 422 0x0061; 423 0x0079; 424 0x002D; 425 0x305D; 426 0x308C; 427 0x305E; 428 0x308C; 429 0x306E; 430 0x5834; 431 0x6240; 432 ] 433 434let example_n_punycode = "Hello-Another-Way--fc4qua05auwb3674vfr0b" 435 436(* (O) ひとつ屋根の下2 *) 437let example_o_codepoints = 438 [ 0x3072; 0x3068; 0x3064; 0x5C4B; 0x6839; 0x306E; 0x4E0B; 0x0032 ] 439 440let example_o_punycode = "2-u9tlzr9756bt3uc0v" 441 442(* (P) MaijでKoiする5秒前 *) 443let example_p_codepoints = 444 [ 445 0x004D; 446 0x0061; 447 0x006A; 448 0x0069; 449 0x3067; 450 0x004B; 451 0x006F; 452 0x0069; 453 0x3059; 454 0x308B; 455 0x0035; 456 0x79D2; 457 0x524D; 458 ] 459 460let example_p_punycode = "MajiKoi5-783gue6qz075azm5e" 461 462(* (Q) パフィーdeルンバ *) 463let example_q_codepoints = 464 [ 0x30D1; 0x30D5; 0x30A3; 0x30FC; 0x0064; 0x0065; 0x30EB; 0x30F3; 0x30D0 ] 465 466let example_q_punycode = "de-jg4avhby1noc0d" 467 468(* (R) そのスピードで *) 469let example_r_codepoints = 470 [ 0x305D; 0x306E; 0x30B9; 0x30D4; 0x30FC; 0x30C9; 0x3067 ] 471 472let example_r_punycode = "d9juau41awczczp" 473 474(* (S) -> $1.00 <- (pure ASCII) *) 475let example_s_codepoints = 476 [ 477 0x002D; 478 0x003E; 479 0x0020; 480 0x0024; 481 0x0031; 482 0x002E; 483 0x0030; 484 0x0030; 485 0x0020; 486 0x003C; 487 0x002D; 488 ] 489 490let example_s_punycode = "-> $1.00 <--" 491 492(* Test functions *) 493 494let test_decode_arabic () = check_decode_ok arabic_codepoints arabic_punycode 495 496let test_decode_chinese_simplified () = 497 check_decode_ok chinese_simplified_codepoints chinese_simplified_punycode 498 499let test_decode_chinese_traditional () = 500 check_decode_ok chinese_traditional_codepoints chinese_traditional_punycode 501 502let test_decode_hebrew () = check_decode_ok hebrew_codepoints hebrew_punycode 503let test_decode_hindi () = check_decode_ok hindi_codepoints hindi_punycode 504 505let test_decode_japanese () = 506 check_decode_ok japanese_codepoints japanese_punycode 507 508let test_decode_korean () = check_decode_ok korean_codepoints korean_punycode 509 510let test_decode_example_l () = 511 check_decode_ok example_l_codepoints example_l_punycode 512 513let test_decode_example_m () = 514 check_decode_ok example_m_codepoints example_m_punycode 515 516let test_decode_example_n () = 517 check_decode_ok example_n_codepoints example_n_punycode 518 519let test_decode_example_o () = 520 check_decode_ok example_o_codepoints example_o_punycode 521 522let test_decode_example_q () = 523 check_decode_ok example_q_codepoints example_q_punycode 524 525let test_decode_example_r () = 526 check_decode_ok example_r_codepoints example_r_punycode 527 528let test_decode_czech () = check_decode_ok czech_codepoints czech_punycode 529 530let test_decode_russian () = 531 check_decode_ok russian_codepoints (String.lowercase_ascii russian_punycode) 532 533let test_decode_spanish () = check_decode_ok spanish_codepoints spanish_punycode 534 535let test_decode_vietnamese () = 536 check_decode_ok vietnamese_codepoints vietnamese_punycode 537 538let test_decode_example_p () = 539 check_decode_ok example_p_codepoints example_p_punycode 540 541let test_decode_example_s () = 542 check_decode_ok example_s_codepoints example_s_punycode 543 544let test_encode_arabic () = 545 check_encode_ok arabic_punycode (codepoints_of_hex_list arabic_codepoints) 546 547let test_encode_chinese_simplified () = 548 check_encode_ok chinese_simplified_punycode 549 (codepoints_of_hex_list chinese_simplified_codepoints) 550 551let test_encode_chinese_traditional () = 552 check_encode_ok chinese_traditional_punycode 553 (codepoints_of_hex_list chinese_traditional_codepoints) 554 555let test_encode_hebrew () = 556 check_encode_ok hebrew_punycode (codepoints_of_hex_list hebrew_codepoints) 557 558let test_encode_hindi () = 559 check_encode_ok hindi_punycode (codepoints_of_hex_list hindi_codepoints) 560 561let test_encode_japanese () = 562 check_encode_ok japanese_punycode (codepoints_of_hex_list japanese_codepoints) 563 564let test_encode_korean () = 565 check_encode_ok korean_punycode (codepoints_of_hex_list korean_codepoints) 566 567let test_encode_example_l () = 568 check_encode_ok 569 (String.lowercase_ascii example_l_punycode) 570 (codepoints_of_hex_list example_l_codepoints) 571 572let test_encode_example_m () = 573 check_encode_ok 574 (String.lowercase_ascii example_m_punycode) 575 (codepoints_of_hex_list example_m_codepoints) 576 577let test_encode_example_n () = 578 check_encode_ok 579 (String.lowercase_ascii example_n_punycode) 580 (codepoints_of_hex_list example_n_codepoints) 581 582let test_encode_example_o () = 583 check_encode_ok 584 (String.lowercase_ascii example_o_punycode) 585 (codepoints_of_hex_list example_o_codepoints) 586 587let test_encode_example_q () = 588 check_encode_ok example_q_punycode 589 (codepoints_of_hex_list example_q_codepoints) 590 591let test_encode_example_r () = 592 check_encode_ok example_r_punycode 593 (codepoints_of_hex_list example_r_codepoints) 594 595(* UTF-8 roundtrip tests *) 596let test_utf8_roundtrip_german () = check_utf8_roundtrip "münchen" 597let test_utf8_roundtrip_chinese () = check_utf8_roundtrip "中文" 598let test_utf8_roundtrip_japanese () = check_utf8_roundtrip "日本語" 599let test_utf8_roundtrip_arabic () = check_utf8_roundtrip "العربية" 600let test_utf8_roundtrip_russian () = check_utf8_roundtrip "русский" 601let test_utf8_roundtrip_greek () = check_utf8_roundtrip "ελληνικά" 602let test_utf8_roundtrip_korean () = check_utf8_roundtrip "한국어" 603let test_utf8_roundtrip_emoji () = check_utf8_roundtrip "hello👋world" 604 605(* Label encoding tests *) 606let test_label_encode_ascii () = 607 try 608 let result = Punycode.encode_label "example" in 609 check string "ascii passthrough" "example" result 610 with Punycode.Error e -> 611 failf "encode_label failed: %a" Punycode.pp_error_reason e 612 613let test_label_encode_german () = 614 try 615 let result = Punycode.encode_label "münchen" in 616 check string "german label" "xn--mnchen-3ya" result 617 with Punycode.Error e -> 618 failf "encode_label failed: %a" Punycode.pp_error_reason e 619 620let test_label_decode_german () = 621 try 622 let result = Punycode.decode_label "xn--mnchen-3ya" in 623 check string "german decode" "münchen" result 624 with Punycode.Error e -> 625 failf "decode_label failed: %a" Punycode.pp_error_reason e 626 627(* IDNA tests *) 628let test_idna_to_ascii_simple () = 629 try 630 let result = Punycode_idna.to_ascii "münchen.example.com" in 631 check string "idna to_ascii" "xn--mnchen-3ya.example.com" result 632 with Punycode_idna.Error e -> 633 failf "to_ascii failed: %a" Punycode_idna.pp_error_reason e 634 635let test_idna_to_unicode_simple () = 636 try 637 let result = Punycode_idna.to_unicode "xn--mnchen-3ya.example.com" in 638 check string "idna to_unicode" "münchen.example.com" result 639 with Punycode_idna.Error e -> 640 failf "to_unicode failed: %a" Punycode_idna.pp_error_reason e 641 642let test_idna_roundtrip () = 643 let original = "münchen.example.com" in 644 try 645 let ascii = Punycode_idna.to_ascii original in 646 let unicode = Punycode_idna.to_unicode ascii in 647 check string "idna roundtrip" original unicode 648 with Punycode_idna.Error e -> 649 failf "roundtrip failed: %a" Punycode_idna.pp_error_reason e 650 651let test_idna_all_ascii () = 652 try 653 let result = Punycode_idna.to_ascii "www.example.com" in 654 check string "all ascii passthrough" "www.example.com" result 655 with Punycode_idna.Error e -> 656 failf "to_ascii failed: %a" Punycode_idna.pp_error_reason e 657 658let test_idna_mixed_labels () = 659 try 660 let result = Punycode_idna.to_ascii "日本語.example.com" in 661 (* Check that result starts with xn-- and ends with .example.com *) 662 check bool "has ace prefix" true (Punycode.has_ace_prefix result); 663 check bool "ends with example.com" true 664 (String.length result > 12 665 && String.sub result (String.length result - 12) 12 = ".example.com") 666 with Punycode_idna.Error e -> 667 failf "to_ascii failed: %a" Punycode_idna.pp_error_reason e 668 669(* Case annotation tests *) 670let test_case_annotation_decode () = 671 (* RFC example: uppercase letters indicate case flags *) 672 try 673 let codepoints, case_flags = 674 Punycode.decode_with_case "MajiKoi5-783gue6qz075azm5e" 675 in 676 check int "codepoints length" 677 (List.length example_p_codepoints) 678 (Array.length codepoints); 679 check int "case_flags length" (Array.length codepoints) 680 (Array.length case_flags); 681 (* M should be uppercase *) 682 check bool "M uppercase" true (case_flags.(0) = Punycode.Uppercase); 683 (* a should be lowercase *) 684 check bool "a lowercase" true (case_flags.(1) = Punycode.Lowercase) 685 with Punycode.Error e -> 686 failf "decode_with_case failed: %a" Punycode.pp_error_reason e 687 688let test_case_annotation_encode () = 689 let codepoints = codepoints_of_hex_list [ 0x0061; 0x0062; 0x0063 ] in 690 (* "abc" *) 691 let case_flags = 692 [| Punycode.Uppercase; Punycode.Lowercase; Punycode.Uppercase |] 693 in 694 try 695 let result = Punycode.encode_with_case codepoints case_flags in 696 (* Should encode as "AbC-" (basic code points with case annotation) *) 697 check string "case encoded" "AbC-" result 698 with Punycode.Error e -> 699 failf "encode_with_case failed: %a" Punycode.pp_error_reason e 700 701(* Edge case tests *) 702let test_empty_input () = 703 try 704 let result = Punycode.encode [||] in 705 check string "empty encode" "" result 706 with Punycode.Error _ -> fail "empty encode should succeed" 707 708let test_empty_decode () = 709 try 710 let result = Punycode.decode "" in 711 check int "empty decode length" 0 (Array.length result) 712 with Punycode.Error _ -> fail "empty decode should succeed" 713 714let test_pure_ascii () = 715 let input = codepoints_of_string "hello" in 716 try 717 let result = Punycode.encode input in 718 check string "pure ascii" "hello-" result 719 with Punycode.Error e -> 720 failf "encode failed: %a" Punycode.pp_error_reason e 721 722let test_invalid_digit () = 723 try 724 ignore (Punycode.decode "hello!"); 725 fail "should fail on invalid digit" 726 with 727 | Punycode.Error (Punycode.Invalid_digit _) -> () 728 | Punycode.Error e -> failf "wrong error type: %a" Punycode.pp_error_reason e 729 730let test_label_too_long () = 731 let long_label = String.make 100 'a' in 732 try 733 ignore (Punycode.encode_label long_label); 734 fail "should fail on long label" 735 with 736 | Punycode.Error (Punycode.Label_too_long _) -> () 737 | Punycode.Error e -> failf "wrong error type: %a" Punycode.pp_error_reason e 738 739let test_empty_label () = 740 try 741 ignore (Punycode.encode_label ""); 742 fail "should fail on empty label" 743 with 744 | Punycode.Error Punycode.Empty_label -> () 745 | Punycode.Error e -> failf "wrong error type: %a" Punycode.pp_error_reason e 746 747(* Validation tests *) 748let test_is_basic () = 749 check bool "space is basic" true (Punycode.is_basic (Uchar.of_int 0x20)); 750 check bool "A is basic" true (Punycode.is_basic (Uchar.of_int 0x41)); 751 check bool "DEL is basic" true (Punycode.is_basic (Uchar.of_int 0x7F)); 752 check bool "0x80 not basic" false (Punycode.is_basic (Uchar.of_int 0x80)); 753 check bool "ü not basic" false (Punycode.is_basic (Uchar.of_int 0xFC)) 754 755let test_is_ascii_string () = 756 check bool "ascii string" true (Punycode.is_ascii_string "hello"); 757 check bool "non-ascii string" false (Punycode.is_ascii_string "héllo"); 758 check bool "empty string" true (Punycode.is_ascii_string "") 759 760let test_has_ace_prefix () = 761 check bool "has xn--" true (Punycode.has_ace_prefix "xn--mnchen-3ya"); 762 check bool "has XN--" true (Punycode.has_ace_prefix "XN--mnchen-3ya"); 763 check bool "no prefix" false (Punycode.has_ace_prefix "example"); 764 check bool "too short" false (Punycode.has_ace_prefix "xn-") 765 766(* Test suites *) 767let decode_tests = 768 [ 769 ("Arabic", `Quick, test_decode_arabic); 770 ("Chinese simplified", `Quick, test_decode_chinese_simplified); 771 ("Chinese traditional", `Quick, test_decode_chinese_traditional); 772 ("Czech", `Quick, test_decode_czech); 773 ("Hebrew", `Quick, test_decode_hebrew); 774 ("Hindi", `Quick, test_decode_hindi); 775 ("Japanese", `Quick, test_decode_japanese); 776 ("Korean", `Quick, test_decode_korean); 777 ("Russian", `Quick, test_decode_russian); 778 ("Spanish", `Quick, test_decode_spanish); 779 ("Vietnamese", `Quick, test_decode_vietnamese); 780 ("Example L (mixed)", `Quick, test_decode_example_l); 781 ("Example M (mixed)", `Quick, test_decode_example_m); 782 ("Example N (mixed)", `Quick, test_decode_example_n); 783 ("Example O (mixed)", `Quick, test_decode_example_o); 784 ("Example P (mixed)", `Quick, test_decode_example_p); 785 ("Example Q (mixed)", `Quick, test_decode_example_q); 786 ("Example R", `Quick, test_decode_example_r); 787 ("Example S (ASCII)", `Quick, test_decode_example_s); 788 ] 789 790let encode_tests = 791 [ 792 ("Arabic", `Quick, test_encode_arabic); 793 ("Chinese simplified", `Quick, test_encode_chinese_simplified); 794 ("Chinese traditional", `Quick, test_encode_chinese_traditional); 795 ("Hebrew", `Quick, test_encode_hebrew); 796 ("Hindi", `Quick, test_encode_hindi); 797 ("Japanese", `Quick, test_encode_japanese); 798 ("Korean", `Quick, test_encode_korean); 799 ("Example L (mixed)", `Quick, test_encode_example_l); 800 ("Example M (mixed)", `Quick, test_encode_example_m); 801 ("Example N (mixed)", `Quick, test_encode_example_n); 802 ("Example O (mixed)", `Quick, test_encode_example_o); 803 ("Example Q (mixed)", `Quick, test_encode_example_q); 804 ("Example R", `Quick, test_encode_example_r); 805 ] 806 807let utf8_tests = 808 [ 809 ("German roundtrip", `Quick, test_utf8_roundtrip_german); 810 ("Chinese roundtrip", `Quick, test_utf8_roundtrip_chinese); 811 ("Japanese roundtrip", `Quick, test_utf8_roundtrip_japanese); 812 ("Arabic roundtrip", `Quick, test_utf8_roundtrip_arabic); 813 ("Russian roundtrip", `Quick, test_utf8_roundtrip_russian); 814 ("Greek roundtrip", `Quick, test_utf8_roundtrip_greek); 815 ("Korean roundtrip", `Quick, test_utf8_roundtrip_korean); 816 ("Emoji roundtrip", `Quick, test_utf8_roundtrip_emoji); 817 ] 818 819let label_tests = 820 [ 821 ("ASCII passthrough", `Quick, test_label_encode_ascii); 822 ("German encode", `Quick, test_label_encode_german); 823 ("German decode", `Quick, test_label_decode_german); 824 ] 825 826let idna_tests = 827 [ 828 ("to_ascii simple", `Quick, test_idna_to_ascii_simple); 829 ("to_unicode simple", `Quick, test_idna_to_unicode_simple); 830 ("roundtrip", `Quick, test_idna_roundtrip); 831 ("all ASCII", `Quick, test_idna_all_ascii); 832 ("mixed labels", `Quick, test_idna_mixed_labels); 833 ] 834 835let case_tests = 836 [ 837 ("decode with case", `Quick, test_case_annotation_decode); 838 ("encode with case", `Quick, test_case_annotation_encode); 839 ] 840 841let edge_case_tests = 842 [ 843 ("empty encode", `Quick, test_empty_input); 844 ("empty decode", `Quick, test_empty_decode); 845 ("pure ASCII", `Quick, test_pure_ascii); 846 ("invalid digit", `Quick, test_invalid_digit); 847 ("label too long", `Quick, test_label_too_long); 848 ("empty label", `Quick, test_empty_label); 849 ] 850 851let validation_tests = 852 [ 853 ("is_basic", `Quick, test_is_basic); 854 ("is_ascii_string", `Quick, test_is_ascii_string); 855 ("has_ace_prefix", `Quick, test_has_ace_prefix); 856 ] 857 858let suite = 859 ( "punycode", 860 decode_tests @ encode_tests @ utf8_tests @ label_tests @ idna_tests 861 @ case_tests @ edge_case_tests @ validation_tests )