Punycode (RFC3492) in OCaml
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(* Comprehensive tests for Punycode (RFC 3492) implementation *)
7
8open Alcotest
9module Punycode = Punycode
10module Punycode_idna = Punycode_idna
11
12(* Helper to convert hex code points to Uchar array *)
13let codepoints_of_hex_list hex_list =
14 Array.of_list (List.map Uchar.of_int hex_list)
15
16(* Helper to convert string to code points *)
17let codepoints_of_string s =
18 let acc = ref [] in
19 let i = ref 0 in
20 while !i < String.length s do
21 let dec = String.get_utf_8_uchar s !i in
22 acc := Uchar.utf_decode_uchar dec :: !acc;
23 i := !i + Uchar.utf_decode_length dec
24 done;
25 Array.of_list (List.rev !acc)
26
27(* Test result helper *)
28let check_encode_ok expected input =
29 try
30 let result = Punycode.encode input in
31 check string "encode" expected result
32 with Punycode.Error e ->
33 fail (Format.asprintf "encode failed: %a" Punycode.pp_error_reason e)
34
35let check_decode_ok expected input =
36 try
37 let result = Punycode.decode input in
38 let expected_arr = codepoints_of_hex_list expected in
39 check int "length" (Array.length expected_arr) (Array.length result);
40 Array.iteri
41 (fun i u ->
42 check int
43 (Printf.sprintf "char %d" i)
44 (Uchar.to_int expected_arr.(i))
45 (Uchar.to_int u))
46 result
47 with Punycode.Error e ->
48 fail (Format.asprintf "decode failed: %a" Punycode.pp_error_reason e)
49
50let check_utf8_roundtrip s =
51 try
52 let encoded = Punycode.encode_utf8 s in
53 let decoded = Punycode.decode_utf8 encoded in
54 check string "roundtrip" s decoded
55 with Punycode.Error e ->
56 fail (Format.asprintf "roundtrip failed: %a" Punycode.pp_error_reason e)
57
58(* RFC 3492 Section 7.1 Test Vectors *)
59
60(* (A) Arabic (Egyptian) *)
61let arabic_codepoints =
62 [
63 0x0644;
64 0x064A;
65 0x0647;
66 0x0645;
67 0x0627;
68 0x0628;
69 0x062A;
70 0x0643;
71 0x0644;
72 0x0645;
73 0x0648;
74 0x0634;
75 0x0639;
76 0x0631;
77 0x0628;
78 0x064A;
79 0x061F;
80 ]
81
82let arabic_punycode = "egbpdaj6bu4bxfgehfvwxn"
83
84(* (B) Chinese (simplified) *)
85let chinese_simplified_codepoints =
86 [ 0x4ED6; 0x4EEC; 0x4E3A; 0x4EC0; 0x4E48; 0x4E0D; 0x8BF4; 0x4E2D; 0x6587 ]
87
88let chinese_simplified_punycode = "ihqwcrb4cv8a8dqg056pqjye"
89
90(* (C) Chinese (traditional) *)
91let chinese_traditional_codepoints =
92 [ 0x4ED6; 0x5011; 0x7232; 0x4EC0; 0x9EBD; 0x4E0D; 0x8AAA; 0x4E2D; 0x6587 ]
93
94let chinese_traditional_punycode = "ihqwctvzc91f659drss3x8bo0yb"
95
96(* (D) Czech *)
97let czech_codepoints =
98 [
99 0x0050;
100 0x0072;
101 0x006F;
102 0x010D;
103 0x0070;
104 0x0072;
105 0x006F;
106 0x0073;
107 0x0074;
108 0x011B;
109 0x006E;
110 0x0065;
111 0x006D;
112 0x006C;
113 0x0075;
114 0x0076;
115 0x00ED;
116 0x010D;
117 0x0065;
118 0x0073;
119 0x006B;
120 0x0079;
121 ]
122
123let czech_punycode = "Proprostnemluvesky-uyb24dma41a"
124
125(* (E) Hebrew *)
126let hebrew_codepoints =
127 [
128 0x05DC;
129 0x05DE;
130 0x05D4;
131 0x05D4;
132 0x05DD;
133 0x05E4;
134 0x05E9;
135 0x05D5;
136 0x05D8;
137 0x05DC;
138 0x05D0;
139 0x05DE;
140 0x05D3;
141 0x05D1;
142 0x05E8;
143 0x05D9;
144 0x05DD;
145 0x05E2;
146 0x05D1;
147 0x05E8;
148 0x05D9;
149 0x05EA;
150 ]
151
152let hebrew_punycode = "4dbcagdahymbxekheh6e0a7fei0b"
153
154(* (F) Hindi (Devanagari) *)
155let hindi_codepoints =
156 [
157 0x092F;
158 0x0939;
159 0x0932;
160 0x094B;
161 0x0917;
162 0x0939;
163 0x093F;
164 0x0928;
165 0x094D;
166 0x0926;
167 0x0940;
168 0x0915;
169 0x094D;
170 0x092F;
171 0x094B;
172 0x0902;
173 0x0928;
174 0x0939;
175 0x0940;
176 0x0902;
177 0x092C;
178 0x094B;
179 0x0932;
180 0x0938;
181 0x0915;
182 0x0924;
183 0x0947;
184 0x0939;
185 0x0948;
186 0x0902;
187 ]
188
189let hindi_punycode = "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
190
191(* (G) Japanese (kanji and hiragana) *)
192let japanese_codepoints =
193 [
194 0x306A;
195 0x305C;
196 0x307F;
197 0x3093;
198 0x306A;
199 0x65E5;
200 0x672C;
201 0x8A9E;
202 0x3092;
203 0x8A71;
204 0x3057;
205 0x3066;
206 0x304F;
207 0x308C;
208 0x306A;
209 0x3044;
210 0x306E;
211 0x304B;
212 ]
213
214let japanese_punycode = "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
215
216(* (H) Korean (Hangul syllables) *)
217let korean_codepoints =
218 [
219 0xC138;
220 0xACC4;
221 0xC758;
222 0xBAA8;
223 0xB4E0;
224 0xC0AC;
225 0xB78C;
226 0xB4E4;
227 0xC774;
228 0xD55C;
229 0xAD6D;
230 0xC5B4;
231 0xB97C;
232 0xC774;
233 0xD574;
234 0xD55C;
235 0xB2E4;
236 0xBA74;
237 0xC5BC;
238 0xB9C8;
239 0xB098;
240 0xC88B;
241 0xC744;
242 0xAE4C;
243 ]
244
245let korean_punycode =
246 "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
247
248(* (I) Russian (Cyrillic) *)
249let russian_codepoints =
250 [
251 0x043F;
252 0x043E;
253 0x0447;
254 0x0435;
255 0x043C;
256 0x0443;
257 0x0436;
258 0x0435;
259 0x043E;
260 0x043D;
261 0x0438;
262 0x043D;
263 0x0435;
264 0x0433;
265 0x043E;
266 0x0432;
267 0x043E;
268 0x0440;
269 0x044F;
270 0x0442;
271 0x043F;
272 0x043E;
273 0x0440;
274 0x0443;
275 0x0441;
276 0x0441;
277 0x043A;
278 0x0438;
279 ]
280
281let russian_punycode = "b1abfaaepdrnnbgefbadotcwatmq2g4l"
282
283(* (J) Spanish *)
284let spanish_codepoints =
285 [
286 0x0050;
287 0x006F;
288 0x0072;
289 0x0071;
290 0x0075;
291 0x00E9;
292 0x006E;
293 0x006F;
294 0x0070;
295 0x0075;
296 0x0065;
297 0x0064;
298 0x0065;
299 0x006E;
300 0x0073;
301 0x0069;
302 0x006D;
303 0x0070;
304 0x006C;
305 0x0065;
306 0x006D;
307 0x0065;
308 0x006E;
309 0x0074;
310 0x0065;
311 0x0068;
312 0x0061;
313 0x0062;
314 0x006C;
315 0x0061;
316 0x0072;
317 0x0065;
318 0x006E;
319 0x0045;
320 0x0073;
321 0x0070;
322 0x0061;
323 0x00F1;
324 0x006F;
325 0x006C;
326 ]
327
328let spanish_punycode = "PorqunopuedensimplementehablarenEspaol-fmd56a"
329
330(* (K) Vietnamese *)
331let vietnamese_codepoints =
332 [
333 0x0054;
334 0x1EA1;
335 0x0069;
336 0x0073;
337 0x0061;
338 0x006F;
339 0x0068;
340 0x1ECD;
341 0x006B;
342 0x0068;
343 0x00F4;
344 0x006E;
345 0x0067;
346 0x0074;
347 0x0068;
348 0x1EC3;
349 0x0063;
350 0x0068;
351 0x1EC9;
352 0x006E;
353 0x00F3;
354 0x0069;
355 0x0074;
356 0x0069;
357 0x1EBF;
358 0x006E;
359 0x0067;
360 0x0056;
361 0x0069;
362 0x1EC7;
363 0x0074;
364 ]
365
366let vietnamese_punycode = "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
367
368(* (L) 3年B組金八先生 - Japanese with ASCII *)
369let example_l_codepoints =
370 [ 0x0033; 0x5E74; 0x0042; 0x7D44; 0x91D1; 0x516B; 0x5148; 0x751F ]
371
372let example_l_punycode = "3B-ww4c5e180e575a65lsy2b"
373
374(* (M) 安室奈美恵-with-SUPER-MONKEYS *)
375let example_m_codepoints =
376 [
377 0x5B89;
378 0x5BA4;
379 0x5948;
380 0x7F8E;
381 0x6075;
382 0x002D;
383 0x0077;
384 0x0069;
385 0x0074;
386 0x0068;
387 0x002D;
388 0x0053;
389 0x0055;
390 0x0050;
391 0x0045;
392 0x0052;
393 0x002D;
394 0x004D;
395 0x004F;
396 0x004E;
397 0x004B;
398 0x0045;
399 0x0059;
400 0x0053;
401 ]
402
403let example_m_punycode = "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"
404
405(* (N) Hello-Another-Way-それぞれの場所 *)
406let example_n_codepoints =
407 [
408 0x0048;
409 0x0065;
410 0x006C;
411 0x006C;
412 0x006F;
413 0x002D;
414 0x0041;
415 0x006E;
416 0x006F;
417 0x0074;
418 0x0068;
419 0x0065;
420 0x0072;
421 0x002D;
422 0x0057;
423 0x0061;
424 0x0079;
425 0x002D;
426 0x305D;
427 0x308C;
428 0x305E;
429 0x308C;
430 0x306E;
431 0x5834;
432 0x6240;
433 ]
434
435let example_n_punycode = "Hello-Another-Way--fc4qua05auwb3674vfr0b"
436
437(* (O) ひとつ屋根の下2 *)
438let example_o_codepoints =
439 [ 0x3072; 0x3068; 0x3064; 0x5C4B; 0x6839; 0x306E; 0x4E0B; 0x0032 ]
440
441let example_o_punycode = "2-u9tlzr9756bt3uc0v"
442
443(* (P) MaijでKoiする5秒前 *)
444let example_p_codepoints =
445 [
446 0x004D;
447 0x0061;
448 0x006A;
449 0x0069;
450 0x3067;
451 0x004B;
452 0x006F;
453 0x0069;
454 0x3059;
455 0x308B;
456 0x0035;
457 0x79D2;
458 0x524D;
459 ]
460
461let example_p_punycode = "MajiKoi5-783gue6qz075azm5e"
462
463(* (Q) パフィーdeルンバ *)
464let example_q_codepoints =
465 [ 0x30D1; 0x30D5; 0x30A3; 0x30FC; 0x0064; 0x0065; 0x30EB; 0x30F3; 0x30D0 ]
466
467let example_q_punycode = "de-jg4avhby1noc0d"
468
469(* (R) そのスピードで *)
470let example_r_codepoints =
471 [ 0x305D; 0x306E; 0x30B9; 0x30D4; 0x30FC; 0x30C9; 0x3067 ]
472
473let example_r_punycode = "d9juau41awczczp"
474
475(* (S) -> $1.00 <- (pure ASCII) *)
476let example_s_codepoints =
477 [
478 0x002D;
479 0x003E;
480 0x0020;
481 0x0024;
482 0x0031;
483 0x002E;
484 0x0030;
485 0x0030;
486 0x0020;
487 0x003C;
488 0x002D;
489 ]
490
491let example_s_punycode = "-> $1.00 <--"
492
493(* Test functions *)
494
495let test_decode_arabic () = check_decode_ok arabic_codepoints arabic_punycode
496
497let test_decode_chinese_simplified () =
498 check_decode_ok chinese_simplified_codepoints chinese_simplified_punycode
499
500let test_decode_chinese_traditional () =
501 check_decode_ok chinese_traditional_codepoints chinese_traditional_punycode
502
503let test_decode_hebrew () = check_decode_ok hebrew_codepoints hebrew_punycode
504let test_decode_hindi () = check_decode_ok hindi_codepoints hindi_punycode
505
506let test_decode_japanese () =
507 check_decode_ok japanese_codepoints japanese_punycode
508
509let test_decode_korean () = check_decode_ok korean_codepoints korean_punycode
510
511let test_decode_example_l () =
512 check_decode_ok example_l_codepoints example_l_punycode
513
514let test_decode_example_m () =
515 check_decode_ok example_m_codepoints example_m_punycode
516
517let test_decode_example_n () =
518 check_decode_ok example_n_codepoints example_n_punycode
519
520let test_decode_example_o () =
521 check_decode_ok example_o_codepoints example_o_punycode
522
523let test_decode_example_q () =
524 check_decode_ok example_q_codepoints example_q_punycode
525
526let test_decode_example_r () =
527 check_decode_ok example_r_codepoints example_r_punycode
528
529let test_decode_czech () = check_decode_ok czech_codepoints czech_punycode
530
531let test_decode_russian () =
532 check_decode_ok russian_codepoints (String.lowercase_ascii russian_punycode)
533
534let test_decode_spanish () = check_decode_ok spanish_codepoints spanish_punycode
535
536let test_decode_vietnamese () =
537 check_decode_ok vietnamese_codepoints vietnamese_punycode
538
539let test_decode_example_p () =
540 check_decode_ok example_p_codepoints example_p_punycode
541
542let test_decode_example_s () =
543 check_decode_ok example_s_codepoints example_s_punycode
544
545let test_encode_arabic () =
546 check_encode_ok arabic_punycode (codepoints_of_hex_list arabic_codepoints)
547
548let test_encode_chinese_simplified () =
549 check_encode_ok chinese_simplified_punycode
550 (codepoints_of_hex_list chinese_simplified_codepoints)
551
552let test_encode_chinese_traditional () =
553 check_encode_ok chinese_traditional_punycode
554 (codepoints_of_hex_list chinese_traditional_codepoints)
555
556let test_encode_hebrew () =
557 check_encode_ok hebrew_punycode (codepoints_of_hex_list hebrew_codepoints)
558
559let test_encode_hindi () =
560 check_encode_ok hindi_punycode (codepoints_of_hex_list hindi_codepoints)
561
562let test_encode_japanese () =
563 check_encode_ok japanese_punycode (codepoints_of_hex_list japanese_codepoints)
564
565let test_encode_korean () =
566 check_encode_ok korean_punycode (codepoints_of_hex_list korean_codepoints)
567
568let test_encode_example_l () =
569 check_encode_ok
570 (String.lowercase_ascii example_l_punycode)
571 (codepoints_of_hex_list example_l_codepoints)
572
573let test_encode_example_m () =
574 check_encode_ok
575 (String.lowercase_ascii example_m_punycode)
576 (codepoints_of_hex_list example_m_codepoints)
577
578let test_encode_example_n () =
579 check_encode_ok
580 (String.lowercase_ascii example_n_punycode)
581 (codepoints_of_hex_list example_n_codepoints)
582
583let test_encode_example_o () =
584 check_encode_ok
585 (String.lowercase_ascii example_o_punycode)
586 (codepoints_of_hex_list example_o_codepoints)
587
588let test_encode_example_q () =
589 check_encode_ok example_q_punycode
590 (codepoints_of_hex_list example_q_codepoints)
591
592let test_encode_example_r () =
593 check_encode_ok example_r_punycode
594 (codepoints_of_hex_list example_r_codepoints)
595
596(* UTF-8 roundtrip tests *)
597let test_utf8_roundtrip_german () = check_utf8_roundtrip "münchen"
598let test_utf8_roundtrip_chinese () = check_utf8_roundtrip "中文"
599let test_utf8_roundtrip_japanese () = check_utf8_roundtrip "日本語"
600let test_utf8_roundtrip_arabic () = check_utf8_roundtrip "العربية"
601let test_utf8_roundtrip_russian () = check_utf8_roundtrip "русский"
602let test_utf8_roundtrip_greek () = check_utf8_roundtrip "ελληνικά"
603let test_utf8_roundtrip_korean () = check_utf8_roundtrip "한국어"
604let test_utf8_roundtrip_emoji () = check_utf8_roundtrip "hello👋world"
605
606(* Label encoding tests *)
607let test_label_encode_ascii () =
608 try
609 let result = Punycode.encode_label "example" in
610 check string "ascii passthrough" "example" result
611 with Punycode.Error e ->
612 fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error_reason e)
613
614let test_label_encode_german () =
615 try
616 let result = Punycode.encode_label "münchen" in
617 check string "german label" "xn--mnchen-3ya" result
618 with Punycode.Error e ->
619 fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error_reason e)
620
621let test_label_decode_german () =
622 try
623 let result = Punycode.decode_label "xn--mnchen-3ya" in
624 check string "german decode" "münchen" result
625 with Punycode.Error e ->
626 fail (Format.asprintf "decode_label failed: %a" Punycode.pp_error_reason e)
627
628(* IDNA tests *)
629let test_idna_to_ascii_simple () =
630 try
631 let result = Punycode_idna.to_ascii "münchen.example.com" in
632 check string "idna to_ascii" "xn--mnchen-3ya.example.com" result
633 with Punycode_idna.Error e ->
634 fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error_reason e)
635
636let test_idna_to_unicode_simple () =
637 try
638 let result = Punycode_idna.to_unicode "xn--mnchen-3ya.example.com" in
639 check string "idna to_unicode" "münchen.example.com" result
640 with Punycode_idna.Error e ->
641 fail (Format.asprintf "to_unicode failed: %a" Punycode_idna.pp_error_reason e)
642
643let test_idna_roundtrip () =
644 let original = "münchen.example.com" in
645 try
646 let ascii = Punycode_idna.to_ascii original in
647 let unicode = Punycode_idna.to_unicode ascii in
648 check string "idna roundtrip" original unicode
649 with Punycode_idna.Error e ->
650 fail (Format.asprintf "roundtrip failed: %a" Punycode_idna.pp_error_reason e)
651
652let test_idna_all_ascii () =
653 try
654 let result = Punycode_idna.to_ascii "www.example.com" in
655 check string "all ascii passthrough" "www.example.com" result
656 with Punycode_idna.Error e ->
657 fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error_reason e)
658
659let test_idna_mixed_labels () =
660 try
661 let result = Punycode_idna.to_ascii "日本語.example.com" in
662 (* Check that result starts with xn-- and ends with .example.com *)
663 check bool "has ace prefix" true (Punycode.has_ace_prefix result);
664 check bool "ends with example.com" true
665 (String.length result > 12
666 && String.sub result (String.length result - 12) 12 = ".example.com")
667 with Punycode_idna.Error e ->
668 fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error_reason e)
669
670(* Case annotation tests *)
671let test_case_annotation_decode () =
672 (* RFC example: uppercase letters indicate case flags *)
673 try
674 let codepoints, case_flags =
675 Punycode.decode_with_case "MajiKoi5-783gue6qz075azm5e"
676 in
677 check int "codepoints length"
678 (List.length example_p_codepoints)
679 (Array.length codepoints);
680 check int "case_flags length" (Array.length codepoints)
681 (Array.length case_flags);
682 (* M should be uppercase *)
683 check bool "M uppercase" true (case_flags.(0) = Punycode.Uppercase);
684 (* a should be lowercase *)
685 check bool "a lowercase" true (case_flags.(1) = Punycode.Lowercase)
686 with Punycode.Error e ->
687 fail (Format.asprintf "decode_with_case failed: %a" Punycode.pp_error_reason e)
688
689let test_case_annotation_encode () =
690 let codepoints = codepoints_of_hex_list [ 0x0061; 0x0062; 0x0063 ] in
691 (* "abc" *)
692 let case_flags =
693 [| Punycode.Uppercase; Punycode.Lowercase; Punycode.Uppercase |]
694 in
695 try
696 let result = Punycode.encode_with_case codepoints case_flags in
697 (* Should encode as "AbC-" (basic code points with case annotation) *)
698 check string "case encoded" "AbC-" result
699 with Punycode.Error e ->
700 fail (Format.asprintf "encode_with_case failed: %a" Punycode.pp_error_reason e)
701
702(* Edge case tests *)
703let test_empty_input () =
704 try
705 let result = Punycode.encode [||] in
706 check string "empty encode" "" result
707 with Punycode.Error _ -> fail "empty encode should succeed"
708
709let test_empty_decode () =
710 try
711 let result = Punycode.decode "" in
712 check int "empty decode length" 0 (Array.length result)
713 with Punycode.Error _ -> fail "empty decode should succeed"
714
715let test_pure_ascii () =
716 let input = codepoints_of_string "hello" in
717 try
718 let result = Punycode.encode input in
719 check string "pure ascii" "hello-" result
720 with Punycode.Error e ->
721 fail (Format.asprintf "encode failed: %a" Punycode.pp_error_reason e)
722
723let test_invalid_digit () =
724 try
725 ignore (Punycode.decode "hello!");
726 fail "should fail on invalid digit"
727 with
728 | Punycode.Error (Punycode.Invalid_digit _) -> ()
729 | Punycode.Error e ->
730 fail (Format.asprintf "wrong error type: %a" Punycode.pp_error_reason e)
731
732let test_label_too_long () =
733 let long_label = String.make 100 'a' in
734 try
735 ignore (Punycode.encode_label long_label);
736 fail "should fail on long label"
737 with
738 | Punycode.Error (Punycode.Label_too_long _) -> ()
739 | Punycode.Error e ->
740 fail (Format.asprintf "wrong error type: %a" Punycode.pp_error_reason e)
741
742let test_empty_label () =
743 try
744 ignore (Punycode.encode_label "");
745 fail "should fail on empty label"
746 with
747 | Punycode.Error Punycode.Empty_label -> ()
748 | Punycode.Error e ->
749 fail (Format.asprintf "wrong error type: %a" Punycode.pp_error_reason e)
750
751(* Validation tests *)
752let test_is_basic () =
753 check bool "space is basic" true (Punycode.is_basic (Uchar.of_int 0x20));
754 check bool "A is basic" true (Punycode.is_basic (Uchar.of_int 0x41));
755 check bool "DEL is basic" true (Punycode.is_basic (Uchar.of_int 0x7F));
756 check bool "0x80 not basic" false (Punycode.is_basic (Uchar.of_int 0x80));
757 check bool "ü not basic" false (Punycode.is_basic (Uchar.of_int 0xFC))
758
759let test_is_ascii_string () =
760 check bool "ascii string" true (Punycode.is_ascii_string "hello");
761 check bool "non-ascii string" false (Punycode.is_ascii_string "héllo");
762 check bool "empty string" true (Punycode.is_ascii_string "")
763
764let test_has_ace_prefix () =
765 check bool "has xn--" true (Punycode.has_ace_prefix "xn--mnchen-3ya");
766 check bool "has XN--" true (Punycode.has_ace_prefix "XN--mnchen-3ya");
767 check bool "no prefix" false (Punycode.has_ace_prefix "example");
768 check bool "too short" false (Punycode.has_ace_prefix "xn-")
769
770(* Test suites *)
771let decode_tests =
772 [
773 ("Arabic", `Quick, test_decode_arabic);
774 ("Chinese simplified", `Quick, test_decode_chinese_simplified);
775 ("Chinese traditional", `Quick, test_decode_chinese_traditional);
776 ("Czech", `Quick, test_decode_czech);
777 ("Hebrew", `Quick, test_decode_hebrew);
778 ("Hindi", `Quick, test_decode_hindi);
779 ("Japanese", `Quick, test_decode_japanese);
780 ("Korean", `Quick, test_decode_korean);
781 ("Russian", `Quick, test_decode_russian);
782 ("Spanish", `Quick, test_decode_spanish);
783 ("Vietnamese", `Quick, test_decode_vietnamese);
784 ("Example L (mixed)", `Quick, test_decode_example_l);
785 ("Example M (mixed)", `Quick, test_decode_example_m);
786 ("Example N (mixed)", `Quick, test_decode_example_n);
787 ("Example O (mixed)", `Quick, test_decode_example_o);
788 ("Example P (mixed)", `Quick, test_decode_example_p);
789 ("Example Q (mixed)", `Quick, test_decode_example_q);
790 ("Example R", `Quick, test_decode_example_r);
791 ("Example S (ASCII)", `Quick, test_decode_example_s);
792 ]
793
794let encode_tests =
795 [
796 ("Arabic", `Quick, test_encode_arabic);
797 ("Chinese simplified", `Quick, test_encode_chinese_simplified);
798 ("Chinese traditional", `Quick, test_encode_chinese_traditional);
799 ("Hebrew", `Quick, test_encode_hebrew);
800 ("Hindi", `Quick, test_encode_hindi);
801 ("Japanese", `Quick, test_encode_japanese);
802 ("Korean", `Quick, test_encode_korean);
803 ("Example L (mixed)", `Quick, test_encode_example_l);
804 ("Example M (mixed)", `Quick, test_encode_example_m);
805 ("Example N (mixed)", `Quick, test_encode_example_n);
806 ("Example O (mixed)", `Quick, test_encode_example_o);
807 ("Example Q (mixed)", `Quick, test_encode_example_q);
808 ("Example R", `Quick, test_encode_example_r);
809 ]
810
811let utf8_tests =
812 [
813 ("German roundtrip", `Quick, test_utf8_roundtrip_german);
814 ("Chinese roundtrip", `Quick, test_utf8_roundtrip_chinese);
815 ("Japanese roundtrip", `Quick, test_utf8_roundtrip_japanese);
816 ("Arabic roundtrip", `Quick, test_utf8_roundtrip_arabic);
817 ("Russian roundtrip", `Quick, test_utf8_roundtrip_russian);
818 ("Greek roundtrip", `Quick, test_utf8_roundtrip_greek);
819 ("Korean roundtrip", `Quick, test_utf8_roundtrip_korean);
820 ("Emoji roundtrip", `Quick, test_utf8_roundtrip_emoji);
821 ]
822
823let label_tests =
824 [
825 ("ASCII passthrough", `Quick, test_label_encode_ascii);
826 ("German encode", `Quick, test_label_encode_german);
827 ("German decode", `Quick, test_label_decode_german);
828 ]
829
830let idna_tests =
831 [
832 ("to_ascii simple", `Quick, test_idna_to_ascii_simple);
833 ("to_unicode simple", `Quick, test_idna_to_unicode_simple);
834 ("roundtrip", `Quick, test_idna_roundtrip);
835 ("all ASCII", `Quick, test_idna_all_ascii);
836 ("mixed labels", `Quick, test_idna_mixed_labels);
837 ]
838
839let case_tests =
840 [
841 ("decode with case", `Quick, test_case_annotation_decode);
842 ("encode with case", `Quick, test_case_annotation_encode);
843 ]
844
845let edge_case_tests =
846 [
847 ("empty encode", `Quick, test_empty_input);
848 ("empty decode", `Quick, test_empty_decode);
849 ("pure ASCII", `Quick, test_pure_ascii);
850 ("invalid digit", `Quick, test_invalid_digit);
851 ("label too long", `Quick, test_label_too_long);
852 ("empty label", `Quick, test_empty_label);
853 ]
854
855let validation_tests =
856 [
857 ("is_basic", `Quick, test_is_basic);
858 ("is_ascii_string", `Quick, test_is_ascii_string);
859 ("has_ace_prefix", `Quick, test_has_ace_prefix);
860 ]
861
862let () =
863 run "Punycode"
864 [
865 ("decode RFC vectors", decode_tests);
866 ("encode RFC vectors", encode_tests);
867 ("UTF-8 roundtrip", utf8_tests);
868 ("label operations", label_tests);
869 ("IDNA operations", idna_tests);
870 ("case annotation", case_tests);
871 ("edge cases", edge_case_tests);
872 ("validation", validation_tests);
873 ]