Punycode (RFC3492) in OCaml
at 4825af1dd20dce1f31dcdc8b5c6ea7cf720fc994 855 lines 22 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(* Comprehensive tests for Punycode (RFC 3492) implementation *) 7 8open Alcotest 9module Punycode = Punycode 10module Punycode_idna = Punycode_idna 11 12(* Helper to convert hex code points to Uchar array *) 13let codepoints_of_hex_list hex_list = 14 Array.of_list (List.map Uchar.of_int hex_list) 15 16(* Helper to convert string to code points *) 17let codepoints_of_string s = 18 let acc = ref [] in 19 let i = ref 0 in 20 while !i < String.length s do 21 let dec = String.get_utf_8_uchar s !i in 22 acc := Uchar.utf_decode_uchar dec :: !acc; 23 i := !i + Uchar.utf_decode_length dec 24 done; 25 Array.of_list (List.rev !acc) 26 27(* Test result helper *) 28let check_encode_ok expected input = 29 match Punycode.encode input with 30 | Ok result -> check string "encode" expected result 31 | Error e -> fail (Format.asprintf "encode failed: %a" Punycode.pp_error e) 32 33let check_decode_ok expected input = 34 match Punycode.decode input with 35 | Ok result -> 36 let expected_arr = codepoints_of_hex_list expected in 37 check int "length" (Array.length expected_arr) (Array.length result); 38 Array.iteri 39 (fun i u -> 40 check int 41 (Printf.sprintf "char %d" i) 42 (Uchar.to_int expected_arr.(i)) 43 (Uchar.to_int u)) 44 result 45 | Error e -> fail (Format.asprintf "decode failed: %a" Punycode.pp_error e) 46 47let check_utf8_roundtrip s = 48 match Punycode.encode_utf8 s with 49 | Error e -> 50 fail (Format.asprintf "encode_utf8 failed: %a" Punycode.pp_error e) 51 | Ok encoded -> ( 52 match Punycode.decode_utf8 encoded with 53 | Error e -> 54 fail (Format.asprintf "decode_utf8 failed: %a" Punycode.pp_error e) 55 | Ok decoded -> check string "roundtrip" s decoded) 56 57(* RFC 3492 Section 7.1 Test Vectors *) 58 59(* (A) Arabic (Egyptian) *) 60let arabic_codepoints = 61 [ 62 0x0644; 63 0x064A; 64 0x0647; 65 0x0645; 66 0x0627; 67 0x0628; 68 0x062A; 69 0x0643; 70 0x0644; 71 0x0645; 72 0x0648; 73 0x0634; 74 0x0639; 75 0x0631; 76 0x0628; 77 0x064A; 78 0x061F; 79 ] 80 81let arabic_punycode = "egbpdaj6bu4bxfgehfvwxn" 82 83(* (B) Chinese (simplified) *) 84let chinese_simplified_codepoints = 85 [ 0x4ED6; 0x4EEC; 0x4E3A; 0x4EC0; 0x4E48; 0x4E0D; 0x8BF4; 0x4E2D; 0x6587 ] 86 87let chinese_simplified_punycode = "ihqwcrb4cv8a8dqg056pqjye" 88 89(* (C) Chinese (traditional) *) 90let chinese_traditional_codepoints = 91 [ 0x4ED6; 0x5011; 0x7232; 0x4EC0; 0x9EBD; 0x4E0D; 0x8AAA; 0x4E2D; 0x6587 ] 92 93let chinese_traditional_punycode = "ihqwctvzc91f659drss3x8bo0yb" 94 95(* (D) Czech *) 96let czech_codepoints = 97 [ 98 0x0050; 99 0x0072; 100 0x006F; 101 0x010D; 102 0x0070; 103 0x0072; 104 0x006F; 105 0x0073; 106 0x0074; 107 0x011B; 108 0x006E; 109 0x0065; 110 0x006D; 111 0x006C; 112 0x0075; 113 0x0076; 114 0x00ED; 115 0x010D; 116 0x0065; 117 0x0073; 118 0x006B; 119 0x0079; 120 ] 121 122let czech_punycode = "Proprostnemluvesky-uyb24dma41a" 123 124(* (E) Hebrew *) 125let hebrew_codepoints = 126 [ 127 0x05DC; 128 0x05DE; 129 0x05D4; 130 0x05D4; 131 0x05DD; 132 0x05E4; 133 0x05E9; 134 0x05D5; 135 0x05D8; 136 0x05DC; 137 0x05D0; 138 0x05DE; 139 0x05D3; 140 0x05D1; 141 0x05E8; 142 0x05D9; 143 0x05DD; 144 0x05E2; 145 0x05D1; 146 0x05E8; 147 0x05D9; 148 0x05EA; 149 ] 150 151let hebrew_punycode = "4dbcagdahymbxekheh6e0a7fei0b" 152 153(* (F) Hindi (Devanagari) *) 154let hindi_codepoints = 155 [ 156 0x092F; 157 0x0939; 158 0x0932; 159 0x094B; 160 0x0917; 161 0x0939; 162 0x093F; 163 0x0928; 164 0x094D; 165 0x0926; 166 0x0940; 167 0x0915; 168 0x094D; 169 0x092F; 170 0x094B; 171 0x0902; 172 0x0928; 173 0x0939; 174 0x0940; 175 0x0902; 176 0x092C; 177 0x094B; 178 0x0932; 179 0x0938; 180 0x0915; 181 0x0924; 182 0x0947; 183 0x0939; 184 0x0948; 185 0x0902; 186 ] 187 188let hindi_punycode = "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" 189 190(* (G) Japanese (kanji and hiragana) *) 191let japanese_codepoints = 192 [ 193 0x306A; 194 0x305C; 195 0x307F; 196 0x3093; 197 0x306A; 198 0x65E5; 199 0x672C; 200 0x8A9E; 201 0x3092; 202 0x8A71; 203 0x3057; 204 0x3066; 205 0x304F; 206 0x308C; 207 0x306A; 208 0x3044; 209 0x306E; 210 0x304B; 211 ] 212 213let japanese_punycode = "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" 214 215(* (H) Korean (Hangul syllables) *) 216let korean_codepoints = 217 [ 218 0xC138; 219 0xACC4; 220 0xC758; 221 0xBAA8; 222 0xB4E0; 223 0xC0AC; 224 0xB78C; 225 0xB4E4; 226 0xC774; 227 0xD55C; 228 0xAD6D; 229 0xC5B4; 230 0xB97C; 231 0xC774; 232 0xD574; 233 0xD55C; 234 0xB2E4; 235 0xBA74; 236 0xC5BC; 237 0xB9C8; 238 0xB098; 239 0xC88B; 240 0xC744; 241 0xAE4C; 242 ] 243 244let korean_punycode = 245 "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c" 246 247(* (I) Russian (Cyrillic) *) 248let russian_codepoints = 249 [ 250 0x043F; 251 0x043E; 252 0x0447; 253 0x0435; 254 0x043C; 255 0x0443; 256 0x0436; 257 0x0435; 258 0x043E; 259 0x043D; 260 0x0438; 261 0x043D; 262 0x0435; 263 0x0433; 264 0x043E; 265 0x0432; 266 0x043E; 267 0x0440; 268 0x044F; 269 0x0442; 270 0x043F; 271 0x043E; 272 0x0440; 273 0x0443; 274 0x0441; 275 0x0441; 276 0x043A; 277 0x0438; 278 ] 279 280let russian_punycode = "b1abfaaepdrnnbgefbadotcwatmq2g4l" 281 282(* (J) Spanish *) 283let spanish_codepoints = 284 [ 285 0x0050; 286 0x006F; 287 0x0072; 288 0x0071; 289 0x0075; 290 0x00E9; 291 0x006E; 292 0x006F; 293 0x0070; 294 0x0075; 295 0x0065; 296 0x0064; 297 0x0065; 298 0x006E; 299 0x0073; 300 0x0069; 301 0x006D; 302 0x0070; 303 0x006C; 304 0x0065; 305 0x006D; 306 0x0065; 307 0x006E; 308 0x0074; 309 0x0065; 310 0x0068; 311 0x0061; 312 0x0062; 313 0x006C; 314 0x0061; 315 0x0072; 316 0x0065; 317 0x006E; 318 0x0045; 319 0x0073; 320 0x0070; 321 0x0061; 322 0x00F1; 323 0x006F; 324 0x006C; 325 ] 326 327let spanish_punycode = "PorqunopuedensimplementehablarenEspaol-fmd56a" 328 329(* (K) Vietnamese *) 330let vietnamese_codepoints = 331 [ 332 0x0054; 333 0x1EA1; 334 0x0069; 335 0x0073; 336 0x0061; 337 0x006F; 338 0x0068; 339 0x1ECD; 340 0x006B; 341 0x0068; 342 0x00F4; 343 0x006E; 344 0x0067; 345 0x0074; 346 0x0068; 347 0x1EC3; 348 0x0063; 349 0x0068; 350 0x1EC9; 351 0x006E; 352 0x00F3; 353 0x0069; 354 0x0074; 355 0x0069; 356 0x1EBF; 357 0x006E; 358 0x0067; 359 0x0056; 360 0x0069; 361 0x1EC7; 362 0x0074; 363 ] 364 365let vietnamese_punycode = "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" 366 367(* (L) 3年B組金八先生 - Japanese with ASCII *) 368let example_l_codepoints = 369 [ 0x0033; 0x5E74; 0x0042; 0x7D44; 0x91D1; 0x516B; 0x5148; 0x751F ] 370 371let example_l_punycode = "3B-ww4c5e180e575a65lsy2b" 372 373(* (M) 安室奈美恵-with-SUPER-MONKEYS *) 374let example_m_codepoints = 375 [ 376 0x5B89; 377 0x5BA4; 378 0x5948; 379 0x7F8E; 380 0x6075; 381 0x002D; 382 0x0077; 383 0x0069; 384 0x0074; 385 0x0068; 386 0x002D; 387 0x0053; 388 0x0055; 389 0x0050; 390 0x0045; 391 0x0052; 392 0x002D; 393 0x004D; 394 0x004F; 395 0x004E; 396 0x004B; 397 0x0045; 398 0x0059; 399 0x0053; 400 ] 401 402let example_m_punycode = "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n" 403 404(* (N) Hello-Another-Way-それぞれの場所 *) 405let example_n_codepoints = 406 [ 407 0x0048; 408 0x0065; 409 0x006C; 410 0x006C; 411 0x006F; 412 0x002D; 413 0x0041; 414 0x006E; 415 0x006F; 416 0x0074; 417 0x0068; 418 0x0065; 419 0x0072; 420 0x002D; 421 0x0057; 422 0x0061; 423 0x0079; 424 0x002D; 425 0x305D; 426 0x308C; 427 0x305E; 428 0x308C; 429 0x306E; 430 0x5834; 431 0x6240; 432 ] 433 434let example_n_punycode = "Hello-Another-Way--fc4qua05auwb3674vfr0b" 435 436(* (O) ひとつ屋根の下2 *) 437let example_o_codepoints = 438 [ 0x3072; 0x3068; 0x3064; 0x5C4B; 0x6839; 0x306E; 0x4E0B; 0x0032 ] 439 440let example_o_punycode = "2-u9tlzr9756bt3uc0v" 441 442(* (P) MaijでKoiする5秒前 *) 443let example_p_codepoints = 444 [ 445 0x004D; 446 0x0061; 447 0x006A; 448 0x0069; 449 0x3067; 450 0x004B; 451 0x006F; 452 0x0069; 453 0x3059; 454 0x308B; 455 0x0035; 456 0x79D2; 457 0x524D; 458 ] 459 460let example_p_punycode = "MajiKoi5-783gue6qz075azm5e" 461 462(* (Q) パフィーdeルンバ *) 463let example_q_codepoints = 464 [ 0x30D1; 0x30D5; 0x30A3; 0x30FC; 0x0064; 0x0065; 0x30EB; 0x30F3; 0x30D0 ] 465 466let example_q_punycode = "de-jg4avhby1noc0d" 467 468(* (R) そのスピードで *) 469let example_r_codepoints = 470 [ 0x305D; 0x306E; 0x30B9; 0x30D4; 0x30FC; 0x30C9; 0x3067 ] 471 472let example_r_punycode = "d9juau41awczczp" 473 474(* (S) -> $1.00 <- (pure ASCII) *) 475let example_s_codepoints = 476 [ 477 0x002D; 478 0x003E; 479 0x0020; 480 0x0024; 481 0x0031; 482 0x002E; 483 0x0030; 484 0x0030; 485 0x0020; 486 0x003C; 487 0x002D; 488 ] 489 490let example_s_punycode = "-> $1.00 <--" 491 492(* Test functions *) 493 494let test_decode_arabic () = check_decode_ok arabic_codepoints arabic_punycode 495 496let test_decode_chinese_simplified () = 497 check_decode_ok chinese_simplified_codepoints chinese_simplified_punycode 498 499let test_decode_chinese_traditional () = 500 check_decode_ok chinese_traditional_codepoints chinese_traditional_punycode 501 502let test_decode_hebrew () = check_decode_ok hebrew_codepoints hebrew_punycode 503let test_decode_hindi () = check_decode_ok hindi_codepoints hindi_punycode 504 505let test_decode_japanese () = 506 check_decode_ok japanese_codepoints japanese_punycode 507 508let test_decode_korean () = check_decode_ok korean_codepoints korean_punycode 509 510let test_decode_example_l () = 511 check_decode_ok example_l_codepoints example_l_punycode 512 513let test_decode_example_m () = 514 check_decode_ok example_m_codepoints example_m_punycode 515 516let test_decode_example_n () = 517 check_decode_ok example_n_codepoints example_n_punycode 518 519let test_decode_example_o () = 520 check_decode_ok example_o_codepoints example_o_punycode 521 522let test_decode_example_q () = 523 check_decode_ok example_q_codepoints example_q_punycode 524 525let test_decode_example_r () = 526 check_decode_ok example_r_codepoints example_r_punycode 527 528let test_decode_czech () = check_decode_ok czech_codepoints czech_punycode 529 530let test_decode_russian () = 531 check_decode_ok russian_codepoints (String.lowercase_ascii russian_punycode) 532 533let test_decode_spanish () = check_decode_ok spanish_codepoints spanish_punycode 534 535let test_decode_vietnamese () = 536 check_decode_ok vietnamese_codepoints vietnamese_punycode 537 538let test_decode_example_p () = 539 check_decode_ok example_p_codepoints example_p_punycode 540 541let test_decode_example_s () = 542 check_decode_ok example_s_codepoints example_s_punycode 543 544let test_encode_arabic () = 545 check_encode_ok arabic_punycode (codepoints_of_hex_list arabic_codepoints) 546 547let test_encode_chinese_simplified () = 548 check_encode_ok chinese_simplified_punycode 549 (codepoints_of_hex_list chinese_simplified_codepoints) 550 551let test_encode_chinese_traditional () = 552 check_encode_ok chinese_traditional_punycode 553 (codepoints_of_hex_list chinese_traditional_codepoints) 554 555let test_encode_hebrew () = 556 check_encode_ok hebrew_punycode (codepoints_of_hex_list hebrew_codepoints) 557 558let test_encode_hindi () = 559 check_encode_ok hindi_punycode (codepoints_of_hex_list hindi_codepoints) 560 561let test_encode_japanese () = 562 check_encode_ok japanese_punycode (codepoints_of_hex_list japanese_codepoints) 563 564let test_encode_korean () = 565 check_encode_ok korean_punycode (codepoints_of_hex_list korean_codepoints) 566 567let test_encode_example_l () = 568 check_encode_ok 569 (String.lowercase_ascii example_l_punycode) 570 (codepoints_of_hex_list example_l_codepoints) 571 572let test_encode_example_m () = 573 check_encode_ok 574 (String.lowercase_ascii example_m_punycode) 575 (codepoints_of_hex_list example_m_codepoints) 576 577let test_encode_example_n () = 578 check_encode_ok 579 (String.lowercase_ascii example_n_punycode) 580 (codepoints_of_hex_list example_n_codepoints) 581 582let test_encode_example_o () = 583 check_encode_ok 584 (String.lowercase_ascii example_o_punycode) 585 (codepoints_of_hex_list example_o_codepoints) 586 587let test_encode_example_q () = 588 check_encode_ok example_q_punycode 589 (codepoints_of_hex_list example_q_codepoints) 590 591let test_encode_example_r () = 592 check_encode_ok example_r_punycode 593 (codepoints_of_hex_list example_r_codepoints) 594 595(* UTF-8 roundtrip tests *) 596let test_utf8_roundtrip_german () = check_utf8_roundtrip "münchen" 597let test_utf8_roundtrip_chinese () = check_utf8_roundtrip "中文" 598let test_utf8_roundtrip_japanese () = check_utf8_roundtrip "日本語" 599let test_utf8_roundtrip_arabic () = check_utf8_roundtrip "العربية" 600let test_utf8_roundtrip_russian () = check_utf8_roundtrip "русский" 601let test_utf8_roundtrip_greek () = check_utf8_roundtrip "ελληνικά" 602let test_utf8_roundtrip_korean () = check_utf8_roundtrip "한국어" 603let test_utf8_roundtrip_emoji () = check_utf8_roundtrip "hello👋world" 604 605(* Label encoding tests *) 606let test_label_encode_ascii () = 607 match Punycode.encode_label "example" with 608 | Ok result -> check string "ascii passthrough" "example" result 609 | Error e -> 610 fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error e) 611 612let test_label_encode_german () = 613 match Punycode.encode_label "münchen" with 614 | Ok result -> check string "german label" "xn--mnchen-3ya" result 615 | Error e -> 616 fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error e) 617 618let test_label_decode_german () = 619 match Punycode.decode_label "xn--mnchen-3ya" with 620 | Ok result -> check string "german decode" "münchen" result 621 | Error e -> 622 fail (Format.asprintf "decode_label failed: %a" Punycode.pp_error e) 623 624(* IDNA tests *) 625let test_idna_to_ascii_simple () = 626 match Punycode_idna.to_ascii "münchen.example.com" with 627 | Ok result -> 628 check string "idna to_ascii" "xn--mnchen-3ya.example.com" result 629 | Error e -> 630 fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e) 631 632let test_idna_to_unicode_simple () = 633 match Punycode_idna.to_unicode "xn--mnchen-3ya.example.com" with 634 | Ok result -> check string "idna to_unicode" "münchen.example.com" result 635 | Error e -> 636 fail (Format.asprintf "to_unicode failed: %a" Punycode_idna.pp_error e) 637 638let test_idna_roundtrip () = 639 let original = "münchen.example.com" in 640 match Punycode_idna.to_ascii original with 641 | Error e -> 642 fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e) 643 | Ok ascii -> ( 644 match Punycode_idna.to_unicode ascii with 645 | Error e -> 646 fail 647 (Format.asprintf "to_unicode failed: %a" Punycode_idna.pp_error e) 648 | Ok unicode -> check string "idna roundtrip" original unicode) 649 650let test_idna_all_ascii () = 651 match Punycode_idna.to_ascii "www.example.com" with 652 | Ok result -> check string "all ascii passthrough" "www.example.com" result 653 | Error e -> 654 fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e) 655 656let test_idna_mixed_labels () = 657 match Punycode_idna.to_ascii "日本語.example.com" with 658 | Ok result -> 659 (* Check that result starts with xn-- and ends with .example.com *) 660 check bool "has ace prefix" true (Punycode.has_ace_prefix result); 661 check bool "ends with example.com" true 662 (String.length result > 12 663 && String.sub result (String.length result - 12) 12 = ".example.com") 664 | Error e -> 665 fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e) 666 667(* Case annotation tests *) 668let test_case_annotation_decode () = 669 (* RFC example: uppercase letters indicate case flags *) 670 match Punycode.decode_with_case "MajiKoi5-783gue6qz075azm5e" with 671 | Ok (codepoints, case_flags) -> 672 check int "codepoints length" 673 (List.length example_p_codepoints) 674 (Array.length codepoints); 675 check int "case_flags length" (Array.length codepoints) 676 (Array.length case_flags); 677 (* M should be uppercase *) 678 check bool "M uppercase" true (case_flags.(0) = Punycode.Uppercase); 679 (* a should be lowercase *) 680 check bool "a lowercase" true (case_flags.(1) = Punycode.Lowercase) 681 | Error e -> 682 fail (Format.asprintf "decode_with_case failed: %a" Punycode.pp_error e) 683 684let test_case_annotation_encode () = 685 let codepoints = codepoints_of_hex_list [ 0x0061; 0x0062; 0x0063 ] in 686 (* "abc" *) 687 let case_flags = 688 [| Punycode.Uppercase; Punycode.Lowercase; Punycode.Uppercase |] 689 in 690 match Punycode.encode_with_case codepoints case_flags with 691 | Ok result -> 692 (* Should encode as "AbC-" (basic code points with case annotation) *) 693 check string "case encoded" "AbC-" result 694 | Error e -> 695 fail (Format.asprintf "encode_with_case failed: %a" Punycode.pp_error e) 696 697(* Edge case tests *) 698let test_empty_input () = 699 match Punycode.encode [||] with 700 | Ok result -> check string "empty encode" "" result 701 | Error _ -> fail "empty encode should succeed" 702 703let test_empty_decode () = 704 match Punycode.decode "" with 705 | Ok result -> check int "empty decode length" 0 (Array.length result) 706 | Error _ -> fail "empty decode should succeed" 707 708let test_pure_ascii () = 709 let input = codepoints_of_string "hello" in 710 match Punycode.encode input with 711 | Ok result -> check string "pure ascii" "hello-" result 712 | Error e -> fail (Format.asprintf "encode failed: %a" Punycode.pp_error e) 713 714let test_invalid_digit () = 715 match Punycode.decode "hello!" with 716 | Ok _ -> fail "should fail on invalid digit" 717 | Error (Punycode.Invalid_digit _) -> () 718 | Error e -> fail (Format.asprintf "wrong error type: %a" Punycode.pp_error e) 719 720let test_label_too_long () = 721 let long_label = String.make 100 'a' in 722 match Punycode.encode_label long_label with 723 | Ok _ -> fail "should fail on long label" 724 | Error (Punycode.Label_too_long _) -> () 725 | Error e -> fail (Format.asprintf "wrong error type: %a" Punycode.pp_error e) 726 727let test_empty_label () = 728 match Punycode.encode_label "" with 729 | Ok _ -> fail "should fail on empty label" 730 | Error Punycode.Empty_label -> () 731 | Error e -> fail (Format.asprintf "wrong error type: %a" Punycode.pp_error e) 732 733(* Validation tests *) 734let test_is_basic () = 735 check bool "space is basic" true (Punycode.is_basic (Uchar.of_int 0x20)); 736 check bool "A is basic" true (Punycode.is_basic (Uchar.of_int 0x41)); 737 check bool "DEL is basic" true (Punycode.is_basic (Uchar.of_int 0x7F)); 738 check bool "0x80 not basic" false (Punycode.is_basic (Uchar.of_int 0x80)); 739 check bool "ü not basic" false (Punycode.is_basic (Uchar.of_int 0xFC)) 740 741let test_is_ascii_string () = 742 check bool "ascii string" true (Punycode.is_ascii_string "hello"); 743 check bool "non-ascii string" false (Punycode.is_ascii_string "héllo"); 744 check bool "empty string" true (Punycode.is_ascii_string "") 745 746let test_has_ace_prefix () = 747 check bool "has xn--" true (Punycode.has_ace_prefix "xn--mnchen-3ya"); 748 check bool "has XN--" true (Punycode.has_ace_prefix "XN--mnchen-3ya"); 749 check bool "no prefix" false (Punycode.has_ace_prefix "example"); 750 check bool "too short" false (Punycode.has_ace_prefix "xn-") 751 752(* Test suites *) 753let decode_tests = 754 [ 755 ("Arabic", `Quick, test_decode_arabic); 756 ("Chinese simplified", `Quick, test_decode_chinese_simplified); 757 ("Chinese traditional", `Quick, test_decode_chinese_traditional); 758 ("Czech", `Quick, test_decode_czech); 759 ("Hebrew", `Quick, test_decode_hebrew); 760 ("Hindi", `Quick, test_decode_hindi); 761 ("Japanese", `Quick, test_decode_japanese); 762 ("Korean", `Quick, test_decode_korean); 763 ("Russian", `Quick, test_decode_russian); 764 ("Spanish", `Quick, test_decode_spanish); 765 ("Vietnamese", `Quick, test_decode_vietnamese); 766 ("Example L (mixed)", `Quick, test_decode_example_l); 767 ("Example M (mixed)", `Quick, test_decode_example_m); 768 ("Example N (mixed)", `Quick, test_decode_example_n); 769 ("Example O (mixed)", `Quick, test_decode_example_o); 770 ("Example P (mixed)", `Quick, test_decode_example_p); 771 ("Example Q (mixed)", `Quick, test_decode_example_q); 772 ("Example R", `Quick, test_decode_example_r); 773 ("Example S (ASCII)", `Quick, test_decode_example_s); 774 ] 775 776let encode_tests = 777 [ 778 ("Arabic", `Quick, test_encode_arabic); 779 ("Chinese simplified", `Quick, test_encode_chinese_simplified); 780 ("Chinese traditional", `Quick, test_encode_chinese_traditional); 781 ("Hebrew", `Quick, test_encode_hebrew); 782 ("Hindi", `Quick, test_encode_hindi); 783 ("Japanese", `Quick, test_encode_japanese); 784 ("Korean", `Quick, test_encode_korean); 785 ("Example L (mixed)", `Quick, test_encode_example_l); 786 ("Example M (mixed)", `Quick, test_encode_example_m); 787 ("Example N (mixed)", `Quick, test_encode_example_n); 788 ("Example O (mixed)", `Quick, test_encode_example_o); 789 ("Example Q (mixed)", `Quick, test_encode_example_q); 790 ("Example R", `Quick, test_encode_example_r); 791 ] 792 793let utf8_tests = 794 [ 795 ("German roundtrip", `Quick, test_utf8_roundtrip_german); 796 ("Chinese roundtrip", `Quick, test_utf8_roundtrip_chinese); 797 ("Japanese roundtrip", `Quick, test_utf8_roundtrip_japanese); 798 ("Arabic roundtrip", `Quick, test_utf8_roundtrip_arabic); 799 ("Russian roundtrip", `Quick, test_utf8_roundtrip_russian); 800 ("Greek roundtrip", `Quick, test_utf8_roundtrip_greek); 801 ("Korean roundtrip", `Quick, test_utf8_roundtrip_korean); 802 ("Emoji roundtrip", `Quick, test_utf8_roundtrip_emoji); 803 ] 804 805let label_tests = 806 [ 807 ("ASCII passthrough", `Quick, test_label_encode_ascii); 808 ("German encode", `Quick, test_label_encode_german); 809 ("German decode", `Quick, test_label_decode_german); 810 ] 811 812let idna_tests = 813 [ 814 ("to_ascii simple", `Quick, test_idna_to_ascii_simple); 815 ("to_unicode simple", `Quick, test_idna_to_unicode_simple); 816 ("roundtrip", `Quick, test_idna_roundtrip); 817 ("all ASCII", `Quick, test_idna_all_ascii); 818 ("mixed labels", `Quick, test_idna_mixed_labels); 819 ] 820 821let case_tests = 822 [ 823 ("decode with case", `Quick, test_case_annotation_decode); 824 ("encode with case", `Quick, test_case_annotation_encode); 825 ] 826 827let edge_case_tests = 828 [ 829 ("empty encode", `Quick, test_empty_input); 830 ("empty decode", `Quick, test_empty_decode); 831 ("pure ASCII", `Quick, test_pure_ascii); 832 ("invalid digit", `Quick, test_invalid_digit); 833 ("label too long", `Quick, test_label_too_long); 834 ("empty label", `Quick, test_empty_label); 835 ] 836 837let validation_tests = 838 [ 839 ("is_basic", `Quick, test_is_basic); 840 ("is_ascii_string", `Quick, test_is_ascii_string); 841 ("has_ace_prefix", `Quick, test_has_ace_prefix); 842 ] 843 844let () = 845 run "Punycode" 846 [ 847 ("decode RFC vectors", decode_tests); 848 ("encode RFC vectors", encode_tests); 849 ("UTF-8 roundtrip", utf8_tests); 850 ("label operations", label_tests); 851 ("IDNA operations", idna_tests); 852 ("case annotation", case_tests); 853 ("edge cases", edge_case_tests); 854 ("validation", validation_tests); 855 ]