Punycode (RFC3492) in OCaml
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(* Comprehensive tests for Punycode (RFC 3492) implementation *)
7
8open Alcotest
9module Punycode = Punycode
10module Punycode_idna = Punycode_idna
11
12(* Helper to convert hex code points to Uchar array *)
13let codepoints_of_hex_list hex_list =
14 Array.of_list (List.map Uchar.of_int hex_list)
15
16(* Helper to convert string to code points *)
17let codepoints_of_string s =
18 let acc = ref [] in
19 let i = ref 0 in
20 while !i < String.length s do
21 let dec = String.get_utf_8_uchar s !i in
22 acc := Uchar.utf_decode_uchar dec :: !acc;
23 i := !i + Uchar.utf_decode_length dec
24 done;
25 Array.of_list (List.rev !acc)
26
27(* Test result helper *)
28let check_encode_ok expected input =
29 match Punycode.encode input with
30 | Ok result -> check string "encode" expected result
31 | Error e -> fail (Format.asprintf "encode failed: %a" Punycode.pp_error e)
32
33let check_decode_ok expected input =
34 match Punycode.decode input with
35 | Ok result ->
36 let expected_arr = codepoints_of_hex_list expected in
37 check int "length" (Array.length expected_arr) (Array.length result);
38 Array.iteri
39 (fun i u ->
40 check int
41 (Printf.sprintf "char %d" i)
42 (Uchar.to_int expected_arr.(i))
43 (Uchar.to_int u))
44 result
45 | Error e -> fail (Format.asprintf "decode failed: %a" Punycode.pp_error e)
46
47let check_utf8_roundtrip s =
48 match Punycode.encode_utf8 s with
49 | Error e ->
50 fail (Format.asprintf "encode_utf8 failed: %a" Punycode.pp_error e)
51 | Ok encoded -> (
52 match Punycode.decode_utf8 encoded with
53 | Error e ->
54 fail (Format.asprintf "decode_utf8 failed: %a" Punycode.pp_error e)
55 | Ok decoded -> check string "roundtrip" s decoded)
56
57(* RFC 3492 Section 7.1 Test Vectors *)
58
59(* (A) Arabic (Egyptian) *)
60let arabic_codepoints =
61 [
62 0x0644;
63 0x064A;
64 0x0647;
65 0x0645;
66 0x0627;
67 0x0628;
68 0x062A;
69 0x0643;
70 0x0644;
71 0x0645;
72 0x0648;
73 0x0634;
74 0x0639;
75 0x0631;
76 0x0628;
77 0x064A;
78 0x061F;
79 ]
80
81let arabic_punycode = "egbpdaj6bu4bxfgehfvwxn"
82
83(* (B) Chinese (simplified) *)
84let chinese_simplified_codepoints =
85 [ 0x4ED6; 0x4EEC; 0x4E3A; 0x4EC0; 0x4E48; 0x4E0D; 0x8BF4; 0x4E2D; 0x6587 ]
86
87let chinese_simplified_punycode = "ihqwcrb4cv8a8dqg056pqjye"
88
89(* (C) Chinese (traditional) *)
90let chinese_traditional_codepoints =
91 [ 0x4ED6; 0x5011; 0x7232; 0x4EC0; 0x9EBD; 0x4E0D; 0x8AAA; 0x4E2D; 0x6587 ]
92
93let chinese_traditional_punycode = "ihqwctvzc91f659drss3x8bo0yb"
94
95(* (D) Czech *)
96let czech_codepoints =
97 [
98 0x0050;
99 0x0072;
100 0x006F;
101 0x010D;
102 0x0070;
103 0x0072;
104 0x006F;
105 0x0073;
106 0x0074;
107 0x011B;
108 0x006E;
109 0x0065;
110 0x006D;
111 0x006C;
112 0x0075;
113 0x0076;
114 0x00ED;
115 0x010D;
116 0x0065;
117 0x0073;
118 0x006B;
119 0x0079;
120 ]
121
122let czech_punycode = "Proprostnemluvesky-uyb24dma41a"
123
124(* (E) Hebrew *)
125let hebrew_codepoints =
126 [
127 0x05DC;
128 0x05DE;
129 0x05D4;
130 0x05D4;
131 0x05DD;
132 0x05E4;
133 0x05E9;
134 0x05D5;
135 0x05D8;
136 0x05DC;
137 0x05D0;
138 0x05DE;
139 0x05D3;
140 0x05D1;
141 0x05E8;
142 0x05D9;
143 0x05DD;
144 0x05E2;
145 0x05D1;
146 0x05E8;
147 0x05D9;
148 0x05EA;
149 ]
150
151let hebrew_punycode = "4dbcagdahymbxekheh6e0a7fei0b"
152
153(* (F) Hindi (Devanagari) *)
154let hindi_codepoints =
155 [
156 0x092F;
157 0x0939;
158 0x0932;
159 0x094B;
160 0x0917;
161 0x0939;
162 0x093F;
163 0x0928;
164 0x094D;
165 0x0926;
166 0x0940;
167 0x0915;
168 0x094D;
169 0x092F;
170 0x094B;
171 0x0902;
172 0x0928;
173 0x0939;
174 0x0940;
175 0x0902;
176 0x092C;
177 0x094B;
178 0x0932;
179 0x0938;
180 0x0915;
181 0x0924;
182 0x0947;
183 0x0939;
184 0x0948;
185 0x0902;
186 ]
187
188let hindi_punycode = "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
189
190(* (G) Japanese (kanji and hiragana) *)
191let japanese_codepoints =
192 [
193 0x306A;
194 0x305C;
195 0x307F;
196 0x3093;
197 0x306A;
198 0x65E5;
199 0x672C;
200 0x8A9E;
201 0x3092;
202 0x8A71;
203 0x3057;
204 0x3066;
205 0x304F;
206 0x308C;
207 0x306A;
208 0x3044;
209 0x306E;
210 0x304B;
211 ]
212
213let japanese_punycode = "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
214
215(* (H) Korean (Hangul syllables) *)
216let korean_codepoints =
217 [
218 0xC138;
219 0xACC4;
220 0xC758;
221 0xBAA8;
222 0xB4E0;
223 0xC0AC;
224 0xB78C;
225 0xB4E4;
226 0xC774;
227 0xD55C;
228 0xAD6D;
229 0xC5B4;
230 0xB97C;
231 0xC774;
232 0xD574;
233 0xD55C;
234 0xB2E4;
235 0xBA74;
236 0xC5BC;
237 0xB9C8;
238 0xB098;
239 0xC88B;
240 0xC744;
241 0xAE4C;
242 ]
243
244let korean_punycode =
245 "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
246
247(* (I) Russian (Cyrillic) *)
248let russian_codepoints =
249 [
250 0x043F;
251 0x043E;
252 0x0447;
253 0x0435;
254 0x043C;
255 0x0443;
256 0x0436;
257 0x0435;
258 0x043E;
259 0x043D;
260 0x0438;
261 0x043D;
262 0x0435;
263 0x0433;
264 0x043E;
265 0x0432;
266 0x043E;
267 0x0440;
268 0x044F;
269 0x0442;
270 0x043F;
271 0x043E;
272 0x0440;
273 0x0443;
274 0x0441;
275 0x0441;
276 0x043A;
277 0x0438;
278 ]
279
280let russian_punycode = "b1abfaaepdrnnbgefbadotcwatmq2g4l"
281
282(* (J) Spanish *)
283let spanish_codepoints =
284 [
285 0x0050;
286 0x006F;
287 0x0072;
288 0x0071;
289 0x0075;
290 0x00E9;
291 0x006E;
292 0x006F;
293 0x0070;
294 0x0075;
295 0x0065;
296 0x0064;
297 0x0065;
298 0x006E;
299 0x0073;
300 0x0069;
301 0x006D;
302 0x0070;
303 0x006C;
304 0x0065;
305 0x006D;
306 0x0065;
307 0x006E;
308 0x0074;
309 0x0065;
310 0x0068;
311 0x0061;
312 0x0062;
313 0x006C;
314 0x0061;
315 0x0072;
316 0x0065;
317 0x006E;
318 0x0045;
319 0x0073;
320 0x0070;
321 0x0061;
322 0x00F1;
323 0x006F;
324 0x006C;
325 ]
326
327let spanish_punycode = "PorqunopuedensimplementehablarenEspaol-fmd56a"
328
329(* (K) Vietnamese *)
330let vietnamese_codepoints =
331 [
332 0x0054;
333 0x1EA1;
334 0x0069;
335 0x0073;
336 0x0061;
337 0x006F;
338 0x0068;
339 0x1ECD;
340 0x006B;
341 0x0068;
342 0x00F4;
343 0x006E;
344 0x0067;
345 0x0074;
346 0x0068;
347 0x1EC3;
348 0x0063;
349 0x0068;
350 0x1EC9;
351 0x006E;
352 0x00F3;
353 0x0069;
354 0x0074;
355 0x0069;
356 0x1EBF;
357 0x006E;
358 0x0067;
359 0x0056;
360 0x0069;
361 0x1EC7;
362 0x0074;
363 ]
364
365let vietnamese_punycode = "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
366
367(* (L) 3年B組金八先生 - Japanese with ASCII *)
368let example_l_codepoints =
369 [ 0x0033; 0x5E74; 0x0042; 0x7D44; 0x91D1; 0x516B; 0x5148; 0x751F ]
370
371let example_l_punycode = "3B-ww4c5e180e575a65lsy2b"
372
373(* (M) 安室奈美恵-with-SUPER-MONKEYS *)
374let example_m_codepoints =
375 [
376 0x5B89;
377 0x5BA4;
378 0x5948;
379 0x7F8E;
380 0x6075;
381 0x002D;
382 0x0077;
383 0x0069;
384 0x0074;
385 0x0068;
386 0x002D;
387 0x0053;
388 0x0055;
389 0x0050;
390 0x0045;
391 0x0052;
392 0x002D;
393 0x004D;
394 0x004F;
395 0x004E;
396 0x004B;
397 0x0045;
398 0x0059;
399 0x0053;
400 ]
401
402let example_m_punycode = "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"
403
404(* (N) Hello-Another-Way-それぞれの場所 *)
405let example_n_codepoints =
406 [
407 0x0048;
408 0x0065;
409 0x006C;
410 0x006C;
411 0x006F;
412 0x002D;
413 0x0041;
414 0x006E;
415 0x006F;
416 0x0074;
417 0x0068;
418 0x0065;
419 0x0072;
420 0x002D;
421 0x0057;
422 0x0061;
423 0x0079;
424 0x002D;
425 0x305D;
426 0x308C;
427 0x305E;
428 0x308C;
429 0x306E;
430 0x5834;
431 0x6240;
432 ]
433
434let example_n_punycode = "Hello-Another-Way--fc4qua05auwb3674vfr0b"
435
436(* (O) ひとつ屋根の下2 *)
437let example_o_codepoints =
438 [ 0x3072; 0x3068; 0x3064; 0x5C4B; 0x6839; 0x306E; 0x4E0B; 0x0032 ]
439
440let example_o_punycode = "2-u9tlzr9756bt3uc0v"
441
442(* (P) MaijでKoiする5秒前 *)
443let example_p_codepoints =
444 [
445 0x004D;
446 0x0061;
447 0x006A;
448 0x0069;
449 0x3067;
450 0x004B;
451 0x006F;
452 0x0069;
453 0x3059;
454 0x308B;
455 0x0035;
456 0x79D2;
457 0x524D;
458 ]
459
460let example_p_punycode = "MajiKoi5-783gue6qz075azm5e"
461
462(* (Q) パフィーdeルンバ *)
463let example_q_codepoints =
464 [ 0x30D1; 0x30D5; 0x30A3; 0x30FC; 0x0064; 0x0065; 0x30EB; 0x30F3; 0x30D0 ]
465
466let example_q_punycode = "de-jg4avhby1noc0d"
467
468(* (R) そのスピードで *)
469let example_r_codepoints =
470 [ 0x305D; 0x306E; 0x30B9; 0x30D4; 0x30FC; 0x30C9; 0x3067 ]
471
472let example_r_punycode = "d9juau41awczczp"
473
474(* (S) -> $1.00 <- (pure ASCII) *)
475let example_s_codepoints =
476 [
477 0x002D;
478 0x003E;
479 0x0020;
480 0x0024;
481 0x0031;
482 0x002E;
483 0x0030;
484 0x0030;
485 0x0020;
486 0x003C;
487 0x002D;
488 ]
489
490let example_s_punycode = "-> $1.00 <--"
491
492(* Test functions *)
493
494let test_decode_arabic () = check_decode_ok arabic_codepoints arabic_punycode
495
496let test_decode_chinese_simplified () =
497 check_decode_ok chinese_simplified_codepoints chinese_simplified_punycode
498
499let test_decode_chinese_traditional () =
500 check_decode_ok chinese_traditional_codepoints chinese_traditional_punycode
501
502let test_decode_hebrew () = check_decode_ok hebrew_codepoints hebrew_punycode
503let test_decode_hindi () = check_decode_ok hindi_codepoints hindi_punycode
504
505let test_decode_japanese () =
506 check_decode_ok japanese_codepoints japanese_punycode
507
508let test_decode_korean () = check_decode_ok korean_codepoints korean_punycode
509
510let test_decode_example_l () =
511 check_decode_ok example_l_codepoints example_l_punycode
512
513let test_decode_example_m () =
514 check_decode_ok example_m_codepoints example_m_punycode
515
516let test_decode_example_n () =
517 check_decode_ok example_n_codepoints example_n_punycode
518
519let test_decode_example_o () =
520 check_decode_ok example_o_codepoints example_o_punycode
521
522let test_decode_example_q () =
523 check_decode_ok example_q_codepoints example_q_punycode
524
525let test_decode_example_r () =
526 check_decode_ok example_r_codepoints example_r_punycode
527
528let test_decode_czech () = check_decode_ok czech_codepoints czech_punycode
529
530let test_decode_russian () =
531 check_decode_ok russian_codepoints (String.lowercase_ascii russian_punycode)
532
533let test_decode_spanish () = check_decode_ok spanish_codepoints spanish_punycode
534
535let test_decode_vietnamese () =
536 check_decode_ok vietnamese_codepoints vietnamese_punycode
537
538let test_decode_example_p () =
539 check_decode_ok example_p_codepoints example_p_punycode
540
541let test_decode_example_s () =
542 check_decode_ok example_s_codepoints example_s_punycode
543
544let test_encode_arabic () =
545 check_encode_ok arabic_punycode (codepoints_of_hex_list arabic_codepoints)
546
547let test_encode_chinese_simplified () =
548 check_encode_ok chinese_simplified_punycode
549 (codepoints_of_hex_list chinese_simplified_codepoints)
550
551let test_encode_chinese_traditional () =
552 check_encode_ok chinese_traditional_punycode
553 (codepoints_of_hex_list chinese_traditional_codepoints)
554
555let test_encode_hebrew () =
556 check_encode_ok hebrew_punycode (codepoints_of_hex_list hebrew_codepoints)
557
558let test_encode_hindi () =
559 check_encode_ok hindi_punycode (codepoints_of_hex_list hindi_codepoints)
560
561let test_encode_japanese () =
562 check_encode_ok japanese_punycode (codepoints_of_hex_list japanese_codepoints)
563
564let test_encode_korean () =
565 check_encode_ok korean_punycode (codepoints_of_hex_list korean_codepoints)
566
567let test_encode_example_l () =
568 check_encode_ok
569 (String.lowercase_ascii example_l_punycode)
570 (codepoints_of_hex_list example_l_codepoints)
571
572let test_encode_example_m () =
573 check_encode_ok
574 (String.lowercase_ascii example_m_punycode)
575 (codepoints_of_hex_list example_m_codepoints)
576
577let test_encode_example_n () =
578 check_encode_ok
579 (String.lowercase_ascii example_n_punycode)
580 (codepoints_of_hex_list example_n_codepoints)
581
582let test_encode_example_o () =
583 check_encode_ok
584 (String.lowercase_ascii example_o_punycode)
585 (codepoints_of_hex_list example_o_codepoints)
586
587let test_encode_example_q () =
588 check_encode_ok example_q_punycode
589 (codepoints_of_hex_list example_q_codepoints)
590
591let test_encode_example_r () =
592 check_encode_ok example_r_punycode
593 (codepoints_of_hex_list example_r_codepoints)
594
595(* UTF-8 roundtrip tests *)
596let test_utf8_roundtrip_german () = check_utf8_roundtrip "münchen"
597let test_utf8_roundtrip_chinese () = check_utf8_roundtrip "中文"
598let test_utf8_roundtrip_japanese () = check_utf8_roundtrip "日本語"
599let test_utf8_roundtrip_arabic () = check_utf8_roundtrip "العربية"
600let test_utf8_roundtrip_russian () = check_utf8_roundtrip "русский"
601let test_utf8_roundtrip_greek () = check_utf8_roundtrip "ελληνικά"
602let test_utf8_roundtrip_korean () = check_utf8_roundtrip "한국어"
603let test_utf8_roundtrip_emoji () = check_utf8_roundtrip "hello👋world"
604
605(* Label encoding tests *)
606let test_label_encode_ascii () =
607 match Punycode.encode_label "example" with
608 | Ok result -> check string "ascii passthrough" "example" result
609 | Error e ->
610 fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error e)
611
612let test_label_encode_german () =
613 match Punycode.encode_label "münchen" with
614 | Ok result -> check string "german label" "xn--mnchen-3ya" result
615 | Error e ->
616 fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error e)
617
618let test_label_decode_german () =
619 match Punycode.decode_label "xn--mnchen-3ya" with
620 | Ok result -> check string "german decode" "münchen" result
621 | Error e ->
622 fail (Format.asprintf "decode_label failed: %a" Punycode.pp_error e)
623
624(* IDNA tests *)
625let test_idna_to_ascii_simple () =
626 match Punycode_idna.to_ascii "münchen.example.com" with
627 | Ok result ->
628 check string "idna to_ascii" "xn--mnchen-3ya.example.com" result
629 | Error e ->
630 fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e)
631
632let test_idna_to_unicode_simple () =
633 match Punycode_idna.to_unicode "xn--mnchen-3ya.example.com" with
634 | Ok result -> check string "idna to_unicode" "münchen.example.com" result
635 | Error e ->
636 fail (Format.asprintf "to_unicode failed: %a" Punycode_idna.pp_error e)
637
638let test_idna_roundtrip () =
639 let original = "münchen.example.com" in
640 match Punycode_idna.to_ascii original with
641 | Error e ->
642 fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e)
643 | Ok ascii -> (
644 match Punycode_idna.to_unicode ascii with
645 | Error e ->
646 fail
647 (Format.asprintf "to_unicode failed: %a" Punycode_idna.pp_error e)
648 | Ok unicode -> check string "idna roundtrip" original unicode)
649
650let test_idna_all_ascii () =
651 match Punycode_idna.to_ascii "www.example.com" with
652 | Ok result -> check string "all ascii passthrough" "www.example.com" result
653 | Error e ->
654 fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e)
655
656let test_idna_mixed_labels () =
657 match Punycode_idna.to_ascii "日本語.example.com" with
658 | Ok result ->
659 (* Check that result starts with xn-- and ends with .example.com *)
660 check bool "has ace prefix" true (Punycode.has_ace_prefix result);
661 check bool "ends with example.com" true
662 (String.length result > 12
663 && String.sub result (String.length result - 12) 12 = ".example.com")
664 | Error e ->
665 fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e)
666
667(* Case annotation tests *)
668let test_case_annotation_decode () =
669 (* RFC example: uppercase letters indicate case flags *)
670 match Punycode.decode_with_case "MajiKoi5-783gue6qz075azm5e" with
671 | Ok (codepoints, case_flags) ->
672 check int "codepoints length"
673 (List.length example_p_codepoints)
674 (Array.length codepoints);
675 check int "case_flags length" (Array.length codepoints)
676 (Array.length case_flags);
677 (* M should be uppercase *)
678 check bool "M uppercase" true (case_flags.(0) = Punycode.Uppercase);
679 (* a should be lowercase *)
680 check bool "a lowercase" true (case_flags.(1) = Punycode.Lowercase)
681 | Error e ->
682 fail (Format.asprintf "decode_with_case failed: %a" Punycode.pp_error e)
683
684let test_case_annotation_encode () =
685 let codepoints = codepoints_of_hex_list [ 0x0061; 0x0062; 0x0063 ] in
686 (* "abc" *)
687 let case_flags =
688 [| Punycode.Uppercase; Punycode.Lowercase; Punycode.Uppercase |]
689 in
690 match Punycode.encode_with_case codepoints case_flags with
691 | Ok result ->
692 (* Should encode as "AbC-" (basic code points with case annotation) *)
693 check string "case encoded" "AbC-" result
694 | Error e ->
695 fail (Format.asprintf "encode_with_case failed: %a" Punycode.pp_error e)
696
697(* Edge case tests *)
698let test_empty_input () =
699 match Punycode.encode [||] with
700 | Ok result -> check string "empty encode" "" result
701 | Error _ -> fail "empty encode should succeed"
702
703let test_empty_decode () =
704 match Punycode.decode "" with
705 | Ok result -> check int "empty decode length" 0 (Array.length result)
706 | Error _ -> fail "empty decode should succeed"
707
708let test_pure_ascii () =
709 let input = codepoints_of_string "hello" in
710 match Punycode.encode input with
711 | Ok result -> check string "pure ascii" "hello-" result
712 | Error e -> fail (Format.asprintf "encode failed: %a" Punycode.pp_error e)
713
714let test_invalid_digit () =
715 match Punycode.decode "hello!" with
716 | Ok _ -> fail "should fail on invalid digit"
717 | Error (Punycode.Invalid_digit _) -> ()
718 | Error e -> fail (Format.asprintf "wrong error type: %a" Punycode.pp_error e)
719
720let test_label_too_long () =
721 let long_label = String.make 100 'a' in
722 match Punycode.encode_label long_label with
723 | Ok _ -> fail "should fail on long label"
724 | Error (Punycode.Label_too_long _) -> ()
725 | Error e -> fail (Format.asprintf "wrong error type: %a" Punycode.pp_error e)
726
727let test_empty_label () =
728 match Punycode.encode_label "" with
729 | Ok _ -> fail "should fail on empty label"
730 | Error Punycode.Empty_label -> ()
731 | Error e -> fail (Format.asprintf "wrong error type: %a" Punycode.pp_error e)
732
733(* Validation tests *)
734let test_is_basic () =
735 check bool "space is basic" true (Punycode.is_basic (Uchar.of_int 0x20));
736 check bool "A is basic" true (Punycode.is_basic (Uchar.of_int 0x41));
737 check bool "DEL is basic" true (Punycode.is_basic (Uchar.of_int 0x7F));
738 check bool "0x80 not basic" false (Punycode.is_basic (Uchar.of_int 0x80));
739 check bool "ü not basic" false (Punycode.is_basic (Uchar.of_int 0xFC))
740
741let test_is_ascii_string () =
742 check bool "ascii string" true (Punycode.is_ascii_string "hello");
743 check bool "non-ascii string" false (Punycode.is_ascii_string "héllo");
744 check bool "empty string" true (Punycode.is_ascii_string "")
745
746let test_has_ace_prefix () =
747 check bool "has xn--" true (Punycode.has_ace_prefix "xn--mnchen-3ya");
748 check bool "has XN--" true (Punycode.has_ace_prefix "XN--mnchen-3ya");
749 check bool "no prefix" false (Punycode.has_ace_prefix "example");
750 check bool "too short" false (Punycode.has_ace_prefix "xn-")
751
752(* Test suites *)
753let decode_tests =
754 [
755 ("Arabic", `Quick, test_decode_arabic);
756 ("Chinese simplified", `Quick, test_decode_chinese_simplified);
757 ("Chinese traditional", `Quick, test_decode_chinese_traditional);
758 ("Czech", `Quick, test_decode_czech);
759 ("Hebrew", `Quick, test_decode_hebrew);
760 ("Hindi", `Quick, test_decode_hindi);
761 ("Japanese", `Quick, test_decode_japanese);
762 ("Korean", `Quick, test_decode_korean);
763 ("Russian", `Quick, test_decode_russian);
764 ("Spanish", `Quick, test_decode_spanish);
765 ("Vietnamese", `Quick, test_decode_vietnamese);
766 ("Example L (mixed)", `Quick, test_decode_example_l);
767 ("Example M (mixed)", `Quick, test_decode_example_m);
768 ("Example N (mixed)", `Quick, test_decode_example_n);
769 ("Example O (mixed)", `Quick, test_decode_example_o);
770 ("Example P (mixed)", `Quick, test_decode_example_p);
771 ("Example Q (mixed)", `Quick, test_decode_example_q);
772 ("Example R", `Quick, test_decode_example_r);
773 ("Example S (ASCII)", `Quick, test_decode_example_s);
774 ]
775
776let encode_tests =
777 [
778 ("Arabic", `Quick, test_encode_arabic);
779 ("Chinese simplified", `Quick, test_encode_chinese_simplified);
780 ("Chinese traditional", `Quick, test_encode_chinese_traditional);
781 ("Hebrew", `Quick, test_encode_hebrew);
782 ("Hindi", `Quick, test_encode_hindi);
783 ("Japanese", `Quick, test_encode_japanese);
784 ("Korean", `Quick, test_encode_korean);
785 ("Example L (mixed)", `Quick, test_encode_example_l);
786 ("Example M (mixed)", `Quick, test_encode_example_m);
787 ("Example N (mixed)", `Quick, test_encode_example_n);
788 ("Example O (mixed)", `Quick, test_encode_example_o);
789 ("Example Q (mixed)", `Quick, test_encode_example_q);
790 ("Example R", `Quick, test_encode_example_r);
791 ]
792
793let utf8_tests =
794 [
795 ("German roundtrip", `Quick, test_utf8_roundtrip_german);
796 ("Chinese roundtrip", `Quick, test_utf8_roundtrip_chinese);
797 ("Japanese roundtrip", `Quick, test_utf8_roundtrip_japanese);
798 ("Arabic roundtrip", `Quick, test_utf8_roundtrip_arabic);
799 ("Russian roundtrip", `Quick, test_utf8_roundtrip_russian);
800 ("Greek roundtrip", `Quick, test_utf8_roundtrip_greek);
801 ("Korean roundtrip", `Quick, test_utf8_roundtrip_korean);
802 ("Emoji roundtrip", `Quick, test_utf8_roundtrip_emoji);
803 ]
804
805let label_tests =
806 [
807 ("ASCII passthrough", `Quick, test_label_encode_ascii);
808 ("German encode", `Quick, test_label_encode_german);
809 ("German decode", `Quick, test_label_decode_german);
810 ]
811
812let idna_tests =
813 [
814 ("to_ascii simple", `Quick, test_idna_to_ascii_simple);
815 ("to_unicode simple", `Quick, test_idna_to_unicode_simple);
816 ("roundtrip", `Quick, test_idna_roundtrip);
817 ("all ASCII", `Quick, test_idna_all_ascii);
818 ("mixed labels", `Quick, test_idna_mixed_labels);
819 ]
820
821let case_tests =
822 [
823 ("decode with case", `Quick, test_case_annotation_decode);
824 ("encode with case", `Quick, test_case_annotation_encode);
825 ]
826
827let edge_case_tests =
828 [
829 ("empty encode", `Quick, test_empty_input);
830 ("empty decode", `Quick, test_empty_decode);
831 ("pure ASCII", `Quick, test_pure_ascii);
832 ("invalid digit", `Quick, test_invalid_digit);
833 ("label too long", `Quick, test_label_too_long);
834 ("empty label", `Quick, test_empty_label);
835 ]
836
837let validation_tests =
838 [
839 ("is_basic", `Quick, test_is_basic);
840 ("is_ascii_string", `Quick, test_is_ascii_string);
841 ("has_ace_prefix", `Quick, test_has_ace_prefix);
842 ]
843
844let () =
845 run "Punycode"
846 [
847 ("decode RFC vectors", decode_tests);
848 ("encode RFC vectors", encode_tests);
849 ("UTF-8 roundtrip", utf8_tests);
850 ("label operations", label_tests);
851 ("IDNA operations", idna_tests);
852 ("case annotation", case_tests);
853 ("edge cases", edge_case_tests);
854 ("validation", validation_tests);
855 ]