···991010(* Test that encode_utf8 never crashes on arbitrary input *)
1111let test_encode_no_crash input =
1212- ignore (Punycode.encode_utf8 input);
1212+ (try ignore (Punycode.encode_utf8 input) with Punycode.Error _ -> ());
1313 check true
14141515(* Test that decode_utf8 never crashes on arbitrary input *)
1616let test_decode_no_crash input =
1717- ignore (Punycode.decode_utf8 input);
1717+ (try ignore (Punycode.decode_utf8 input) with Punycode.Error _ -> ());
1818 check true
19192020(* Test roundtrip: encode then decode should give back original (case-insensitive)
2121 IDNA/Punycode lowercases ASCII characters during encoding per RFC 5891 *)
2222let test_roundtrip input =
2323- match Punycode.encode_utf8 input with
2424- | Ok encoded -> (
2525- match Punycode.decode_utf8 encoded with
2626- | Ok decoded ->
2727- (* Compare lowercase versions since IDNA lowercases ASCII *)
2828- check_eq ~pp:Format.pp_print_string
2929- (String.lowercase_ascii input)
3030- (String.lowercase_ascii decoded)
3131- | Error _ ->
3232- (* Some encoded values might not decode, that's ok for fuzz testing *)
3333- check true)
3434- | Error _ ->
3535- (* Some inputs might not encode, that's ok *)
3636- check true
2323+ (try
2424+ let encoded = Punycode.encode_utf8 input in
2525+ let decoded = Punycode.decode_utf8 encoded in
2626+ (* Compare lowercase versions since IDNA lowercases ASCII *)
2727+ check_eq ~pp:Format.pp_print_string
2828+ (String.lowercase_ascii input)
2929+ (String.lowercase_ascii decoded)
3030+ with Punycode.Error _ ->
3131+ (* Some inputs might not encode/decode, that's ok *)
3232+ check true)
37333834(* Test ASCII-only strings (should pass through mostly unchanged) *)
3935let test_ascii_string input =
···4541 Char.chr (Char.code input.[i mod String.length input] mod 128))
4642 in
4743 if String.length ascii_only > 0 then
4848- ignore (Punycode.encode_utf8 ascii_only)
4444+ (try ignore (Punycode.encode_utf8 ascii_only) with Punycode.Error _ -> ())
4945 end;
5046 check true
51475248(* Test inputs starting with ACE prefix "xn--" *)
5349let test_ace_prefix input =
5450 let ace_input = "xn--" ^ input in
5555- ignore (Punycode.decode_utf8 ace_input);
5151+ (try ignore (Punycode.decode_utf8 ace_input) with Punycode.Error _ -> ());
5652 check true
57535854let () =
+178-211
lib/punycode.ml
···30303131(* {1 Error Types} *)
32323333-type error =
3333+type error_reason =
3434 | Overflow of position
3535 | Invalid_character of position * Uchar.t
3636 | Invalid_digit of position * char
···3939 | Label_too_long of int
4040 | Empty_label
41414242-let pp_error fmt = function
4242+let pp_error_reason fmt = function
4343 | Overflow pos ->
4444 Format.fprintf fmt "arithmetic overflow at %a" pp_position pos
4545 | Invalid_character (pos, u) ->
···5757 max_label_length
5858 | Empty_label -> Format.fprintf fmt "empty label"
59596060-let error_to_string err = Format.asprintf "%a" pp_error err
6060+exception Error of error_reason
6161+6262+let () = Printexc.register_printer (function
6363+ | Error reason -> Some (Format.asprintf "Punycode.Error: %a" pp_error_reason reason)
6464+ | _ -> None)
6565+6666+let error_reason_to_string reason = Format.asprintf "%a" pp_error_reason reason
61676268(* {1 Error Constructors} *)
63696464-let overflow pos = Error (Overflow pos)
6565-let invalid_character pos u = Error (Invalid_character (pos, u))
6666-let invalid_digit pos c = Error (Invalid_digit (pos, c))
6767-let unexpected_end pos = Error (Unexpected_end pos)
6868-let _invalid_utf8 pos = Error (Invalid_utf8 pos)
6969-let label_too_long len = Error (Label_too_long len)
7070-let empty_label = Error Empty_label
7070+let overflow pos = raise (Error (Overflow pos))
7171+let invalid_character pos u = raise (Error (Invalid_character (pos, u)))
7272+let invalid_digit pos c = raise (Error (Invalid_digit (pos, c)))
7373+let unexpected_end pos = raise (Error (Unexpected_end pos))
7474+let invalid_utf8 pos = raise (Error (Invalid_utf8 pos))
7575+let label_too_long len = raise (Error (Label_too_long len))
7676+let empty_label () = raise (Error Empty_label)
71777278(* {1 Case Flags} *)
7379···134140let max_int_value = max_int
135141136142let safe_mul_add a b c pos =
137137- if c = 0 then Ok a
143143+ if c = 0 then a
138144 else if b > (max_int_value - a) / c then overflow pos
139139- else Ok (a + (b * c))
145145+ else a + (b * c)
140146141147(* {1 UTF-8 to Code Points Conversion} *)
142148···145151 let acc = ref [] in
146152 let byte_offset = ref 0 in
147153 let char_index = ref 0 in
148148- let error = ref None in
149149- while !byte_offset < len && !error = None do
154154+ while !byte_offset < len do
150155 let pos = { byte_offset = !byte_offset; char_index = !char_index } in
151156 let dec = String.get_utf_8_uchar s !byte_offset in
152157 if Uchar.utf_decode_is_valid dec then begin
···154159 byte_offset := !byte_offset + Uchar.utf_decode_length dec;
155160 incr char_index
156161 end
157157- else begin
158158- error := Some (Invalid_utf8 pos)
159159- end
162162+ else invalid_utf8 pos
160163 done;
161161- match !error with
162162- | Some e -> Error e
163163- | None -> Ok (Array.of_list (List.rev !acc))
164164+ Array.of_list (List.rev !acc)
164165165166(* {1 Code Points to UTF-8 Conversion} *)
166167···173174174175let encode_impl codepoints case_flags =
175176 let input_length = Array.length codepoints in
176176- if input_length = 0 then Ok ""
177177+ if input_length = 0 then ""
177178 else begin
178179 let output = Buffer.create (input_length * 2) in
179180···210211 let delta = ref 0 in
211212 let bias = ref initial_bias in
212213213213- let result = ref (Ok ()) in
214214-215215- while !h < input_length && !result = Ok () do
214214+ while !h < input_length do
216215 (* Find minimum code point >= n *)
217216 let m =
218217 Array.fold_left
···224223225224 (* Increase delta to advance state to <m, 0> *)
226225 let pos = { byte_offset = 0; char_index = !h } in
227227- match safe_mul_add !delta (m - !n) (!h + 1) pos with
228228- | Error e -> result := Error e
229229- | Ok new_delta ->
230230- delta := new_delta;
231231- n := m;
226226+ delta := safe_mul_add !delta (m - !n) (!h + 1) pos;
227227+ n := m;
228228+229229+ (* Process each code point *)
230230+ for j = 0 to input_length - 1 do
231231+ let cp = Uchar.to_int codepoints.(j) in
232232+ let pos = { byte_offset = 0; char_index = j } in
232233233233- (* Process each code point *)
234234- let j = ref 0 in
235235- while !j < input_length && !result = Ok () do
236236- let cp = Uchar.to_int codepoints.(!j) in
237237- let pos = { byte_offset = 0; char_index = !j } in
234234+ if cp < !n then begin
235235+ incr delta;
236236+ if !delta = 0 then (* Overflow *)
237237+ overflow pos
238238+ end
239239+ else if cp = !n then begin
240240+ (* Encode delta as variable-length integer *)
241241+ let q = ref !delta in
242242+ let k = ref base in
243243+ let done_encoding = ref false in
238244239239- if cp < !n then begin
240240- incr delta;
241241- if !delta = 0 then (* Overflow *)
242242- result := overflow pos
245245+ while not !done_encoding do
246246+ let t =
247247+ if !k <= !bias then tmin
248248+ else if !k >= !bias + tmax then tmax
249249+ else !k - !bias
250250+ in
251251+ if !q < t then begin
252252+ (* Output final digit *)
253253+ let case =
254254+ match case_flags with
255255+ | Some flags -> flags.(j)
256256+ | None -> Lowercase
257257+ in
258258+ Buffer.add_char output (encode_digit !q case);
259259+ done_encoding := true
260260+ end
261261+ else begin
262262+ (* Output intermediate digit and continue *)
263263+ let digit = t + ((!q - t) mod (base - t)) in
264264+ Buffer.add_char output (encode_digit digit Lowercase);
265265+ q := (!q - t) / (base - t);
266266+ k := !k + base
243267 end
244244- else if cp = !n then begin
245245- (* Encode delta as variable-length integer *)
246246- let q = ref !delta in
247247- let k = ref base in
248248- let done_encoding = ref false in
268268+ done;
249269250250- while not !done_encoding do
251251- let t =
252252- if !k <= !bias then tmin
253253- else if !k >= !bias + tmax then tmax
254254- else !k - !bias
255255- in
256256- if !q < t then begin
257257- (* Output final digit *)
258258- let case =
259259- match case_flags with
260260- | Some flags -> flags.(!j)
261261- | None -> Lowercase
262262- in
263263- Buffer.add_char output (encode_digit !q case);
264264- done_encoding := true
265265- end
266266- else begin
267267- (* Output intermediate digit and continue *)
268268- let digit = t + ((!q - t) mod (base - t)) in
269269- Buffer.add_char output (encode_digit digit Lowercase);
270270- q := (!q - t) / (base - t);
271271- k := !k + base
272272- end
273273- done;
270270+ bias := adapt ~delta:!delta ~numpoints:(!h + 1) ~firsttime:(!h = b);
271271+ delta := 0;
272272+ incr h
273273+ end
274274+ done;
274275275275- bias := adapt ~delta:!delta ~numpoints:(!h + 1) ~firsttime:(!h = b);
276276- delta := 0;
277277- incr h
278278- end;
279279- incr j
280280- done;
281281-282282- incr delta;
283283- incr n
276276+ incr delta;
277277+ incr n
284278 done;
285279286286- match !result with
287287- | Error e -> Error e
288288- | Ok () -> Ok (Buffer.contents output)
280280+ Buffer.contents output
289281 end
290282291283let encode codepoints = encode_impl codepoints None
···299291300292let decode_impl input =
301293 let input_length = String.length input in
302302- if input_length = 0 then Ok ([||], [||])
294294+ if input_length = 0 then ([||], [||])
303295 else begin
304296 (* Find last delimiter *)
305297 let b = Option.value ~default:0 (String.rindex_opt input delimiter) in
···307299 (* Copy basic code points and extract case flags *)
308300 let output = ref [] in
309301 let case_output = ref [] in
310310- let error = ref None in
311302312303 for j = 0 to b - 1 do
313313- if !error = None then begin
314314- let c = input.[j] in
315315- let pos = { byte_offset = j; char_index = j } in
316316- let code = Char.code c in
317317- if code >= 0x80 then
318318- error := Some (Invalid_character (pos, Uchar.of_int code))
319319- else begin
320320- output := Uchar.of_int code :: !output;
321321- case_output :=
322322- (if is_flagged c then Uppercase else Lowercase) :: !case_output
323323- end
304304+ let c = input.[j] in
305305+ let pos = { byte_offset = j; char_index = j } in
306306+ let code = Char.code c in
307307+ if code >= 0x80 then
308308+ invalid_character pos (Uchar.of_int code)
309309+ else begin
310310+ output := Uchar.of_int code :: !output;
311311+ case_output :=
312312+ (if is_flagged c then Uppercase else Lowercase) :: !case_output
324313 end
325314 done;
326315327327- match !error with
328328- | Some e -> Error e
329329- | None -> (
330330- let output = ref (Array.of_list (List.rev !output)) in
331331- let case_output = ref (Array.of_list (List.rev !case_output)) in
316316+ let output = ref (Array.of_list (List.rev !output)) in
317317+ let case_output = ref (Array.of_list (List.rev !case_output)) in
332318333333- (* Main decoding loop *)
334334- let n = ref initial_n in
335335- let i = ref 0 in
336336- let bias = ref initial_bias in
337337- let in_pos = ref (if b > 0 then b + 1 else 0) in
338338- let result = ref (Ok ()) in
319319+ (* Main decoding loop *)
320320+ let n = ref initial_n in
321321+ let i = ref 0 in
322322+ let bias = ref initial_bias in
323323+ let in_pos = ref (if b > 0 then b + 1 else 0) in
339324340340- while !in_pos < input_length && !result = Ok () do
341341- let oldi = !i in
342342- let w = ref 1 in
343343- let k = ref base in
344344- let done_decoding = ref false in
325325+ while !in_pos < input_length do
326326+ let oldi = !i in
327327+ let w = ref 1 in
328328+ let k = ref base in
329329+ let done_decoding = ref false in
345330346346- while (not !done_decoding) && !result = Ok () do
347347- let pos =
348348- { byte_offset = !in_pos; char_index = Array.length !output }
349349- in
331331+ while not !done_decoding do
332332+ let pos =
333333+ { byte_offset = !in_pos; char_index = Array.length !output }
334334+ in
350335351351- if !in_pos >= input_length then begin
352352- result := unexpected_end pos;
353353- done_decoding := true
354354- end
355355- else begin
356356- let c = input.[!in_pos] in
357357- incr in_pos;
336336+ if !in_pos >= input_length then
337337+ unexpected_end pos
338338+ else begin
339339+ let c = input.[!in_pos] in
340340+ incr in_pos;
358341359359- match decode_digit c with
360360- | None ->
361361- result := invalid_digit pos c;
362362- done_decoding := true
363363- | Some digit -> (
364364- (* i = i + digit * w, with overflow check *)
365365- match safe_mul_add !i digit !w pos with
366366- | Error e ->
367367- result := Error e;
368368- done_decoding := true
369369- | Ok new_i ->
370370- i := new_i;
342342+ match decode_digit c with
343343+ | None -> invalid_digit pos c
344344+ | Some digit ->
345345+ (* i = i + digit * w, with overflow check *)
346346+ i := safe_mul_add !i digit !w pos;
371347372372- let t =
373373- if !k <= !bias then tmin
374374- else if !k >= !bias + tmax then tmax
375375- else !k - !bias
376376- in
348348+ let t =
349349+ if !k <= !bias then tmin
350350+ else if !k >= !bias + tmax then tmax
351351+ else !k - !bias
352352+ in
377353378378- if digit < t then begin
379379- (* Record case flag from this final digit *)
380380- done_decoding := true
381381- end
382382- else begin
383383- (* w = w * (base - t), with overflow check *)
384384- let base_minus_t = base - t in
385385- if !w > max_int_value / base_minus_t then begin
386386- result := overflow pos;
387387- done_decoding := true
388388- end
389389- else begin
390390- w := !w * base_minus_t;
391391- k := !k + base
392392- end
393393- end)
394394- end
395395- done;
354354+ if digit < t then
355355+ (* Record case flag from this final digit *)
356356+ done_decoding := true
357357+ else begin
358358+ (* w = w * (base - t), with overflow check *)
359359+ let base_minus_t = base - t in
360360+ if !w > max_int_value / base_minus_t then
361361+ overflow pos
362362+ else begin
363363+ w := !w * base_minus_t;
364364+ k := !k + base
365365+ end
366366+ end
367367+ end
368368+ done;
396369397397- if !result = Ok () then begin
398398- let out_len = Array.length !output in
399399- bias :=
400400- adapt ~delta:(!i - oldi) ~numpoints:(out_len + 1)
401401- ~firsttime:(oldi = 0);
370370+ let out_len = Array.length !output in
371371+ bias :=
372372+ adapt ~delta:(!i - oldi) ~numpoints:(out_len + 1)
373373+ ~firsttime:(oldi = 0);
402374403403- let pos = { byte_offset = !in_pos - 1; char_index = out_len } in
375375+ let pos = { byte_offset = !in_pos - 1; char_index = out_len } in
404376405405- (* n = n + i / (out_len + 1), with overflow check *)
406406- let increment = !i / (out_len + 1) in
407407- if increment > max_int_value - !n then result := overflow pos
408408- else begin
409409- n := !n + increment;
410410- i := !i mod (out_len + 1);
377377+ (* n = n + i / (out_len + 1), with overflow check *)
378378+ let increment = !i / (out_len + 1) in
379379+ if increment > max_int_value - !n then overflow pos
380380+ else begin
381381+ n := !n + increment;
382382+ i := !i mod (out_len + 1);
411383412412- (* Validate that n is a valid Unicode scalar value *)
413413- if not (Uchar.is_valid !n) then
414414- result := invalid_character pos Uchar.rep
415415- else begin
416416- (* Insert n at position i *)
417417- let new_output = Array.make (out_len + 1) (Uchar.of_int 0) in
418418- let new_case = Array.make (out_len + 1) Lowercase in
384384+ (* Validate that n is a valid Unicode scalar value *)
385385+ if not (Uchar.is_valid !n) then
386386+ invalid_character pos Uchar.rep
387387+ else begin
388388+ (* Insert n at position i *)
389389+ let new_output = Array.make (out_len + 1) (Uchar.of_int 0) in
390390+ let new_case = Array.make (out_len + 1) Lowercase in
419391420420- for j = 0 to !i - 1 do
421421- new_output.(j) <- !output.(j);
422422- new_case.(j) <- !case_output.(j)
423423- done;
424424- new_output.(!i) <- Uchar.of_int !n;
425425- (* Case flag from final digit of this delta *)
426426- new_case.(!i) <-
427427- (if !in_pos > 0 && is_flagged input.[!in_pos - 1] then
428428- Uppercase
429429- else Lowercase);
430430- for j = !i to out_len - 1 do
431431- new_output.(j + 1) <- !output.(j);
432432- new_case.(j + 1) <- !case_output.(j)
433433- done;
392392+ for j = 0 to !i - 1 do
393393+ new_output.(j) <- !output.(j);
394394+ new_case.(j) <- !case_output.(j)
395395+ done;
396396+ new_output.(!i) <- Uchar.of_int !n;
397397+ (* Case flag from final digit of this delta *)
398398+ new_case.(!i) <-
399399+ (if !in_pos > 0 && is_flagged input.[!in_pos - 1] then
400400+ Uppercase
401401+ else Lowercase);
402402+ for j = !i to out_len - 1 do
403403+ new_output.(j + 1) <- !output.(j);
404404+ new_case.(j + 1) <- !case_output.(j)
405405+ done;
434406435435- output := new_output;
436436- case_output := new_case;
437437- incr i
438438- end
439439- end
440440- end
441441- done;
407407+ output := new_output;
408408+ case_output := new_case;
409409+ incr i
410410+ end
411411+ end
412412+ done;
442413443443- match !result with
444444- | Error e -> Error e
445445- | Ok () -> Ok (!output, !case_output))
414414+ (!output, !case_output)
446415 end
447416448448-let decode input = Result.map fst (decode_impl input)
417417+let decode input = fst (decode_impl input)
449418let decode_with_case input = decode_impl input
450419451420(* {1 UTF-8 String Operations} *)
452421453422let encode_utf8 s =
454454- let open Result.Syntax in
455455- let* codepoints = utf8_to_codepoints s in
423423+ let codepoints = utf8_to_codepoints s in
456424 encode codepoints
457425458426let decode_utf8 punycode =
459459- let open Result.Syntax in
460460- let+ codepoints = decode punycode in
427427+ let codepoints = decode punycode in
461428 codepoints_to_utf8 codepoints
462429463430(* {1 Domain Label Operations} *)
464431465432let encode_label label =
466466- if String.length label = 0 then empty_label
433433+ if String.length label = 0 then empty_label ()
467434 else if is_ascii_string label then begin
468435 (* All ASCII - return as-is, but check length *)
469436 let len = String.length label in
470470- if len > max_label_length then label_too_long len else Ok label
437437+ if len > max_label_length then label_too_long len else label
471438 end
472472- else
439439+ else begin
473440 (* Has non-ASCII - encode with Punycode *)
474474- let open Result.Syntax in
475475- let* encoded = encode_utf8 label in
441441+ let encoded = encode_utf8 label in
476442 let result = ace_prefix ^ encoded in
477443 let len = String.length result in
478478- if len > max_label_length then label_too_long len else Ok result
444444+ if len > max_label_length then label_too_long len else result
445445+ end
479446480447let decode_label label =
481481- if String.length label = 0 then empty_label
448448+ if String.length label = 0 then empty_label ()
482449 else if has_ace_prefix label then begin
483450 (* Remove ACE prefix and decode *)
484451 let punycode = String.sub label 4 (String.length label - 4) in
···486453 end
487454 else begin
488455 (* No ACE prefix - validate and return *)
489489- if is_ascii_string label then Ok label
456456+ if is_ascii_string label then label
490457 else
491458 (* Has non-ASCII but no ACE prefix - return as-is *)
492492- Ok label
459459+ label
493460 end
+41-25
lib/punycode.mli
···38383939(** {1 Error Types} *)
40404141-type error =
4141+type error_reason =
4242 | Overflow of position
4343 (** Arithmetic overflow during encode/decode. This can occur with very
4444 long strings or extreme Unicode code point values. See
···6666 is the actual length. *)
6767 | Empty_label (** Empty label is not valid for encoding. *)
68686969-val pp_error : Format.formatter -> error -> unit
7070-(** [pp_error fmt e] pretty-prints an error with position information. *)
6969+exception Error of error_reason
7070+(** Exception raised for all Punycode encoding/decoding errors. *)
7171+7272+val pp_error_reason : Format.formatter -> error_reason -> unit
7373+(** [pp_error_reason fmt e] pretty-prints an error with position information. *)
71747272-val error_to_string : error -> string
7373-(** [error_to_string e] converts an error to a human-readable string. *)
7575+val error_reason_to_string : error_reason -> string
7676+(** [error_reason_to_string e] converts an error to a human-readable string. *)
74777578(** {1 Constants}
7679···108111 6}. They operate on arrays of Unicode code points ([Uchar.t array]). The
109112 encoded output is a plain ASCII string without the ACE prefix. *)
110113111111-val encode : Uchar.t array -> (string, error) result
114114+val encode : Uchar.t array -> string
112115(** [encode codepoints] encodes an array of Unicode code points to a Punycode
113116 ASCII string.
114117···123126 are any basic code points 3. Non-basic code points are encoded as deltas
124127 using the generalized variable-length integer representation from
125128 {{:https://datatracker.ietf.org/doc/html/rfc3492#section-3.3}Section 3.3}
129129+130130+ @raise Error on encoding failure (overflow, etc.)
126131127132 Example:
128133 {[
129134 encode [| Uchar.of_int 0x4ED6; Uchar.of_int 0x4EEC; ... |]
130130- (* = Ok "ihqwcrb4cv8a8dqg056pqjye" *)
135135+ (* = "ihqwcrb4cv8a8dqg056pqjye" *)
131136 ]} *)
132137133133-val decode : string -> (Uchar.t array, error) result
138138+val decode : string -> Uchar.t array
134139(** [decode punycode] decodes a Punycode ASCII string to an array of Unicode
135140 code points.
136141···144149 5}: "A decoder MUST recognize the letters in both uppercase and lowercase
145150 forms".
146151152152+ @raise Error on decoding failure (invalid digit, unexpected end, etc.)
153153+147154 Example:
148155 {[
149156 decode "ihqwcrb4cv8a8dqg056pqjye"
150150- (* = Ok [| U+4ED6; U+4EEC; U+4E3A; ... |] (Chinese simplified) *)
157157+ (* = [| U+4ED6; U+4EEC; U+4E3A; ... |] (Chinese simplified) *)
151158 ]} *)
152159153160(** {1 Mixed-Case Annotation}
···156163 {{:https://datatracker.ietf.org/doc/html/rfc3492#appendix-A}RFC 3492
157164 Appendix A}. *)
158165159159-val encode_with_case :
160160- Uchar.t array -> case_flag array -> (string, error) result
166166+val encode_with_case : Uchar.t array -> case_flag array -> string
161167(** [encode_with_case codepoints case_flags] encodes with case annotation.
162168163169 Per
···169175170176 The [case_flags] array must have the same length as [codepoints].
171177172172- @raise Invalid_argument if array lengths don't match. *)
178178+ @raise Invalid_argument if array lengths don't match.
179179+ @raise Error on encoding failure. *)
173180174174-val decode_with_case : string -> (Uchar.t array * case_flag array, error) result
181181+val decode_with_case : string -> Uchar.t array * case_flag array
175182(** [decode_with_case punycode] decodes and extracts case annotations.
176183177184 Per
178185 {{:https://datatracker.ietf.org/doc/html/rfc3492#appendix-A}RFC 3492
179186 Appendix A}, returns both the decoded code points and an array of case
180187 flags indicating the suggested case for each character based on the
181181- uppercase/lowercase form of the encoding digits. *)
188188+ uppercase/lowercase form of the encoding digits.
189189+190190+ @raise Error on decoding failure. *)
182191183192(** {1 UTF-8 String Operations}
184193185194 Convenience functions that work directly with UTF-8 encoded OCaml strings.
186195 These combine UTF-8 decoding/encoding with the core Punycode operations. *)
187196188188-val encode_utf8 : string -> (string, error) result
197197+val encode_utf8 : string -> string
189198(** [encode_utf8 s] encodes a UTF-8 string to Punycode (no ACE prefix).
190199191200 This is equivalent to decoding [s] from UTF-8 to code points, then calling
192201 {!encode}.
193202203203+ @raise Error on encoding failure.
204204+194205 Example:
195206 {[
196207 encode_utf8 "münchen"
197197- (* = Ok "mnchen-3ya" *)
208208+ (* = "mnchen-3ya" *)
198209 ]} *)
199210200200-val decode_utf8 : string -> (string, error) result
211211+val decode_utf8 : string -> string
201212(** [decode_utf8 punycode] decodes Punycode to a UTF-8 string (no ACE prefix).
202213203214 This is equivalent to calling {!decode} then encoding the result as UTF-8.
204215216216+ @raise Error on decoding failure.
217217+205218 Example:
206219 {[
207220 decode_utf8 "mnchen-3ya"
208208- (* = Ok "münchen" *)
221221+ (* = "münchen" *)
209222 ]} *)
210223211224(** {1 Domain Label Operations}
···214227 length limits per
215228 {{:https://datatracker.ietf.org/doc/html/rfc1035}RFC 1035}. *)
216229217217-val encode_label : string -> (string, error) result
230230+val encode_label : string -> string
218231(** [encode_label label] encodes a domain label for use in DNS.
219232220233 If the label contains only ASCII characters, it is returned unchanged.
···223236 {{:https://datatracker.ietf.org/doc/html/rfc3492#section-5} RFC 3492 Section
224237 5}.
225238226226- Returns {!Error} {!Label_too_long} if the result exceeds 63 bytes.
239239+ @raise Error with {!Label_too_long} if the result exceeds 63 bytes.
240240+ @raise Error with {!Empty_label} if the label is empty.
227241228242 Example:
229243 {[
230244 encode_label "münchen"
231231- (* = Ok "xn--mnchen-3ya" *)
245245+ (* = "xn--mnchen-3ya" *)
232246 encode_label "example"
233233- (* = Ok "example" *)
247247+ (* = "example" *)
234248 ]} *)
235249236236-val decode_label : string -> (string, error) result
250250+val decode_label : string -> string
237251(** [decode_label label] decodes a domain label.
238252239253 If the label starts with the ACE prefix ("xn--", case-insensitive), it is
240254 Punycode-decoded. Otherwise, it is returned unchanged.
255255+256256+ @raise Error on decoding failure.
241257242258 Example:
243259 {[
244260 decode_label "xn--mnchen-3ya"
245245- (* = Ok "münchen" *)
261261+ (* = "münchen" *)
246262 decode_label "example"
247247- (* = Ok "example" *)
263263+ (* = "example" *)
248264 ]} *)
249265250266(** {1 Validation}
+47-53
lib/punycode_idna.ml
···991010(* {1 Error Types} *)
11111212-type error =
1313- | Punycode_error of Punycode.error
1212+type error_reason =
1313+ | Punycode_error of Punycode.error_reason
1414 | Invalid_label of string
1515 | Domain_too_long of int
1616 | Normalization_failed
1717 | Verification_failed
18181919-let pp_error fmt = function
1919+let pp_error_reason fmt = function
2020 | Punycode_error e ->
2121- Format.fprintf fmt "Punycode error: %a" Punycode.pp_error e
2121+ Format.fprintf fmt "Punycode error: %a" Punycode.pp_error_reason e
2222 | Invalid_label msg -> Format.fprintf fmt "invalid label: %s" msg
2323 | Domain_too_long len ->
2424 Format.fprintf fmt "domain too long: %d bytes (max %d)" len
···2727 | Verification_failed ->
2828 Format.fprintf fmt "IDNA verification failed (round-trip mismatch)"
29293030-let error_to_string err = Format.asprintf "%a" pp_error err
3030+exception Error of error_reason
3131+3232+let () = Printexc.register_printer (function
3333+ | Error reason -> Some (Format.asprintf "Punycode_idna.Error: %a" pp_error_reason reason)
3434+ | _ -> None)
3535+3636+let error_reason_to_string reason = Format.asprintf "%a" pp_error_reason reason
31373238(* {1 Error Constructors} *)
33393434-let punycode_error e = Error (Punycode_error e)
3535-let invalid_label msg = Error (Invalid_label msg)
3636-let domain_too_long len = Error (Domain_too_long len)
3737-let _normalization_failed = Error Normalization_failed
3838-let verification_failed = Error Verification_failed
4040+let punycode_error e = raise (Error (Punycode_error e))
4141+let invalid_label msg = raise (Error (Invalid_label msg))
4242+let domain_too_long len = raise (Error (Domain_too_long len))
4343+let verification_failed () = raise (Error Verification_failed)
39444045(* {1 Unicode Normalization} *)
4146···8287 invalid_label "STD3 rules violation"
8388 else if check_hyphens && not (check_hyphen_rules label) then
8489 invalid_label "invalid hyphen placement"
8585- else Ok label
9090+ else label
8691 end
8792 else begin
8893 (* Has non-ASCII - normalize and encode *)
8994 let normalized = normalize_nfc label in
90959196 (* Encode to Punycode *)
9292- match Punycode.encode_utf8 normalized with
9393- | Error e -> punycode_error e
9494- | Ok encoded -> (
9595- let result = Punycode.ace_prefix ^ encoded in
9696- let result_len = String.length result in
9797- if result_len > Punycode.max_label_length then
9898- punycode_error (Punycode.Label_too_long result_len)
9999- else if check_hyphens && not (check_hyphen_rules result) then
100100- invalid_label "invalid hyphen placement in encoded label"
101101- else
102102- (* Verification: decode and compare to original normalized form *)
103103- match Punycode.decode_utf8 encoded with
104104- | Error _ -> verification_failed
105105- | Ok decoded ->
106106- if decoded <> normalized then verification_failed else Ok result)
9797+ let encoded =
9898+ try Punycode.encode_utf8 normalized
9999+ with Punycode.Error e -> punycode_error e
100100+ in
101101+ let result = Punycode.ace_prefix ^ encoded in
102102+ let result_len = String.length result in
103103+ if result_len > Punycode.max_label_length then
104104+ punycode_error (Punycode.Label_too_long result_len)
105105+ else if check_hyphens && not (check_hyphen_rules result) then
106106+ invalid_label "invalid hyphen placement in encoded label"
107107+ else
108108+ (* Verification: decode and compare to original normalized form *)
109109+ let decoded =
110110+ try Punycode.decode_utf8 encoded
111111+ with Punycode.Error _ -> verification_failed ()
112112+ in
113113+ if decoded <> normalized then verification_failed () else result
107114 end
108115109116let label_to_ascii ?(check_hyphens = true) ?(use_std3_rules = false) label =
···112119let label_to_unicode label =
113120 if is_ace_label label then begin
114121 let encoded = String.sub label 4 (String.length label - 4) in
115115- match Punycode.decode_utf8 encoded with
116116- | Error e -> punycode_error e
117117- | Ok decoded -> Ok decoded
122122+ try Punycode.decode_utf8 encoded
123123+ with Punycode.Error e -> punycode_error e
118124 end
119119- else Ok label
125125+ else label
120126121127(* {1 Domain Operations} *)
122128···126132(* Join labels into domain *)
127133let join_labels labels = String.concat "." labels
128134129129-(* Map a function returning Result over a list, short-circuiting on first Error *)
130130-let map_result f lst =
131131- List.fold_right
132132- (fun x acc ->
133133- let open Result.Syntax in
134134- let* y = f x in
135135- let+ ys = acc in
136136- y :: ys)
137137- lst (Ok [])
138138-139135let to_ascii ?(check_hyphens = true) ?(check_bidi = false)
140136 ?(check_joiners = false) ?(use_std3_rules = false) ?(transitional = false)
141137 domain =
···145141 let _ = check_joiners in
146142 let _ = transitional in
147143148148- let open Result.Syntax in
149144 let labels = split_domain domain in
150150- let* encoded_labels =
151151- map_result (label_to_ascii_impl ~check_hyphens ~use_std3_rules) labels
145145+ let encoded_labels =
146146+ List.map (label_to_ascii_impl ~check_hyphens ~use_std3_rules) labels
152147 in
153148 let result = join_labels encoded_labels in
154149 let len = String.length result in
155155- if len > max_domain_length then domain_too_long len else Ok result
150150+ if len > max_domain_length then domain_too_long len else result
156151157152let to_unicode domain =
158158- let open Result.Syntax in
159153 let labels = split_domain domain in
160160- let+ decoded_labels = map_result label_to_unicode labels in
154154+ let decoded_labels = List.map label_to_unicode labels in
161155 join_labels decoded_labels
162156163157(* {1 Domain Name Library Integration} *)
164158165159let domain_to_ascii ?(check_hyphens = true) ?(use_std3_rules = false) domain =
166166- let open Result.Syntax in
167160 let s = Domain_name.to_string domain in
168168- let* ascii = to_ascii ~check_hyphens ~use_std3_rules s in
161161+ let ascii = to_ascii ~check_hyphens ~use_std3_rules s in
169162 match Domain_name.of_string ascii with
170163 | Error (`Msg msg) -> invalid_label msg
171171- | Ok d -> Ok d
164164+ | Ok d -> d
172165173166let domain_to_unicode domain =
174174- let open Result.Syntax in
175167 let s = Domain_name.to_string domain in
176176- let* unicode = to_unicode s in
168168+ let unicode = to_unicode s in
177169 match Domain_name.of_string unicode with
178170 | Error (`Msg msg) -> invalid_label msg
179179- | Ok d -> Ok d
171171+ | Ok d -> d
180172181173(* {1 Validation} *)
182174183183-let is_idna_valid domain = Result.is_ok (to_ascii domain)
175175+let is_idna_valid domain =
176176+ try ignore (to_ascii domain); true
177177+ with Error _ -> false
+35-25
lib/punycode_idna.mli
···26262727(** {1 Error Types} *)
28282929-type error =
3030- | Punycode_error of Punycode.error
3131- (** Error during Punycode encoding/decoding. See {!Punycode.error} for
3232- details. *)
2929+type error_reason =
3030+ | Punycode_error of Punycode.error_reason
3131+ (** Error during Punycode encoding/decoding. See {!Punycode.error_reason}
3232+ for details. *)
3333 | Invalid_label of string
3434 (** Label violates IDNA constraints. The string describes the violation.
3535 See
···4949 Section 4.2}, the result of encoding must decode back to the original
5050 input. *)
51515252-val pp_error : Format.formatter -> error -> unit
5353-(** [pp_error fmt e] pretty-prints an error. *)
5252+exception Error of error_reason
5353+(** Exception raised for all IDNA processing errors. *)
54545555-val error_to_string : error -> string
5656-(** [error_to_string e] converts an error to a human-readable string. *)
5555+val pp_error_reason : Format.formatter -> error_reason -> unit
5656+(** [pp_error_reason fmt e] pretty-prints an error. *)
5757+5858+val error_reason_to_string : error_reason -> string
5959+(** [error_reason_to_string e] converts an error to a human-readable string. *)
57605861(** {1 Constants} *)
5962···7780 ?use_std3_rules:bool ->
7881 ?transitional:bool ->
7982 string ->
8080- (string, error) result
8383+ string
8184(** [to_ascii domain] converts an internationalized domain name to ASCII.
82858386 Implements the ToASCII operation from
···105108 {{:https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2}Section
106109 4.2.3.2} (default: false)
107110 - [transitional]: Use IDNA 2003 transitional processing (default: false)
111111+112112+ @raise Error on conversion failure.
108113109114 Example:
110115 {[
111116 to_ascii "münchen.example.com"
112112- (* = Ok "xn--mnchen-3ya.example.com" *)
117117+ (* = "xn--mnchen-3ya.example.com" *)
113118 ]} *)
114119115115-val label_to_ascii :
116116- ?check_hyphens:bool ->
117117- ?use_std3_rules:bool ->
118118- string ->
119119- (string, error) result
120120+val label_to_ascii : ?check_hyphens:bool -> ?use_std3_rules:bool -> string -> string
120121(** [label_to_ascii label] converts a single label to ASCII.
121122122123 This implements the core ToASCII operation for one label, as described in
123124 {{:https://datatracker.ietf.org/doc/html/rfc5891#section-4.1}RFC 5891
124124- Section 4.1}. *)
125125+ Section 4.1}.
126126+127127+ @raise Error on conversion failure. *)
125128126129(** {1 ToUnicode Operation}
127130···131134 {{:https://datatracker.ietf.org/doc/html/rfc5891#section-4.2} RFC 5891
132135 Section 4.2} for the complete ToUnicode specification. *)
133136134134-val to_unicode : string -> (string, error) result
137137+val to_unicode : string -> string
135138(** [to_unicode domain] converts an ACE domain name to Unicode.
136139137140 Implements the ToUnicode operation from
···143146 {{:https://datatracker.ietf.org/doc/html/rfc3492#section-6.2}RFC 3492
144147 Section 6.2} 2. Otherwise, pass through unchanged
145148149149+ @raise Error on decoding failure.
150150+146151 Example:
147152 {[
148153 to_unicode "xn--mnchen-3ya.example.com"
149149- (* = Ok "münchen.example.com" *)
154154+ (* = "münchen.example.com" *)
150155 ]} *)
151156152152-val label_to_unicode : string -> (string, error) result
157157+val label_to_unicode : string -> string
153158(** [label_to_unicode label] converts a single ACE label to Unicode.
154159155160 This implements the core ToUnicode operation for one label, as described in
156161 {{:https://datatracker.ietf.org/doc/html/rfc5891#section-4.2}RFC 5891
157157- Section 4.2}. *)
162162+ Section 4.2}.
163163+164164+ @raise Error on decoding failure. *)
158165159166(** {1 Domain Name Integration}
160167···168175 ?check_hyphens:bool ->
169176 ?use_std3_rules:bool ->
170177 [ `raw ] Domain_name.t ->
171171- ([ `raw ] Domain_name.t, error) result
178178+ [ `raw ] Domain_name.t
172179(** [domain_to_ascii domain] converts a domain name to ASCII form.
173180174181 Applies {!to_ascii} to the string representation and returns the result as a
175182 [Domain_name.t].
176183184184+ @raise Error on conversion failure.
185185+177186 Example:
178187 {[
179188 let d = Domain_name.of_string_exn "münchen.example.com" in
180189 domain_to_ascii d
181181- (* = Ok (Domain_name.of_string_exn "xn--mnchen-3ya.example.com") *)
190190+ (* = Domain_name.of_string_exn "xn--mnchen-3ya.example.com" *)
182191 ]} *)
183192184184-val domain_to_unicode :
185185- [ `raw ] Domain_name.t -> ([ `raw ] Domain_name.t, error) result
193193+val domain_to_unicode : [ `raw ] Domain_name.t -> [ `raw ] Domain_name.t
186194(** [domain_to_unicode domain] converts a domain name to Unicode form.
187195188196 Applies {!to_unicode} to the string representation and returns the result as
189189- a [Domain_name.t]. *)
197197+ a [Domain_name.t].
198198+199199+ @raise Error on decoding failure. *)
190200191201(** {1 Validation} *)
192202
+124-106
test/test_punycode.ml
···26262727(* Test result helper *)
2828let check_encode_ok expected input =
2929- match Punycode.encode input with
3030- | Ok result -> check string "encode" expected result
3131- | Error e -> fail (Format.asprintf "encode failed: %a" Punycode.pp_error e)
2929+ try
3030+ let result = Punycode.encode input in
3131+ check string "encode" expected result
3232+ with Punycode.Error e ->
3333+ fail (Format.asprintf "encode failed: %a" Punycode.pp_error_reason e)
32343335let check_decode_ok expected input =
3434- match Punycode.decode input with
3535- | Ok result ->
3636- let expected_arr = codepoints_of_hex_list expected in
3737- check int "length" (Array.length expected_arr) (Array.length result);
3838- Array.iteri
3939- (fun i u ->
4040- check int
4141- (Printf.sprintf "char %d" i)
4242- (Uchar.to_int expected_arr.(i))
4343- (Uchar.to_int u))
4444- result
4545- | Error e -> fail (Format.asprintf "decode failed: %a" Punycode.pp_error e)
3636+ try
3737+ let result = Punycode.decode input in
3838+ let expected_arr = codepoints_of_hex_list expected in
3939+ check int "length" (Array.length expected_arr) (Array.length result);
4040+ Array.iteri
4141+ (fun i u ->
4242+ check int
4343+ (Printf.sprintf "char %d" i)
4444+ (Uchar.to_int expected_arr.(i))
4545+ (Uchar.to_int u))
4646+ result
4747+ with Punycode.Error e ->
4848+ fail (Format.asprintf "decode failed: %a" Punycode.pp_error_reason e)
46494750let check_utf8_roundtrip s =
4848- match Punycode.encode_utf8 s with
4949- | Error e ->
5050- fail (Format.asprintf "encode_utf8 failed: %a" Punycode.pp_error e)
5151- | Ok encoded -> (
5252- match Punycode.decode_utf8 encoded with
5353- | Error e ->
5454- fail (Format.asprintf "decode_utf8 failed: %a" Punycode.pp_error e)
5555- | Ok decoded -> check string "roundtrip" s decoded)
5151+ try
5252+ let encoded = Punycode.encode_utf8 s in
5353+ let decoded = Punycode.decode_utf8 encoded in
5454+ check string "roundtrip" s decoded
5555+ with Punycode.Error e ->
5656+ fail (Format.asprintf "roundtrip failed: %a" Punycode.pp_error_reason e)
56575758(* RFC 3492 Section 7.1 Test Vectors *)
5859···604605605606(* Label encoding tests *)
606607let test_label_encode_ascii () =
607607- match Punycode.encode_label "example" with
608608- | Ok result -> check string "ascii passthrough" "example" result
609609- | Error e ->
610610- fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error e)
608608+ try
609609+ let result = Punycode.encode_label "example" in
610610+ check string "ascii passthrough" "example" result
611611+ with Punycode.Error e ->
612612+ fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error_reason e)
611613612614let test_label_encode_german () =
613613- match Punycode.encode_label "münchen" with
614614- | Ok result -> check string "german label" "xn--mnchen-3ya" result
615615- | Error e ->
616616- fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error e)
615615+ try
616616+ let result = Punycode.encode_label "münchen" in
617617+ check string "german label" "xn--mnchen-3ya" result
618618+ with Punycode.Error e ->
619619+ fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error_reason e)
617620618621let test_label_decode_german () =
619619- match Punycode.decode_label "xn--mnchen-3ya" with
620620- | Ok result -> check string "german decode" "münchen" result
621621- | Error e ->
622622- fail (Format.asprintf "decode_label failed: %a" Punycode.pp_error e)
622622+ try
623623+ let result = Punycode.decode_label "xn--mnchen-3ya" in
624624+ check string "german decode" "münchen" result
625625+ with Punycode.Error e ->
626626+ fail (Format.asprintf "decode_label failed: %a" Punycode.pp_error_reason e)
623627624628(* IDNA tests *)
625629let test_idna_to_ascii_simple () =
626626- match Punycode_idna.to_ascii "münchen.example.com" with
627627- | Ok result ->
628628- check string "idna to_ascii" "xn--mnchen-3ya.example.com" result
629629- | Error e ->
630630- fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e)
630630+ try
631631+ let result = Punycode_idna.to_ascii "münchen.example.com" in
632632+ check string "idna to_ascii" "xn--mnchen-3ya.example.com" result
633633+ with Punycode_idna.Error e ->
634634+ fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error_reason e)
631635632636let test_idna_to_unicode_simple () =
633633- match Punycode_idna.to_unicode "xn--mnchen-3ya.example.com" with
634634- | Ok result -> check string "idna to_unicode" "münchen.example.com" result
635635- | Error e ->
636636- fail (Format.asprintf "to_unicode failed: %a" Punycode_idna.pp_error e)
637637+ try
638638+ let result = Punycode_idna.to_unicode "xn--mnchen-3ya.example.com" in
639639+ check string "idna to_unicode" "münchen.example.com" result
640640+ with Punycode_idna.Error e ->
641641+ fail (Format.asprintf "to_unicode failed: %a" Punycode_idna.pp_error_reason e)
637642638643let test_idna_roundtrip () =
639644 let original = "münchen.example.com" in
640640- match Punycode_idna.to_ascii original with
641641- | Error e ->
642642- fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e)
643643- | Ok ascii -> (
644644- match Punycode_idna.to_unicode ascii with
645645- | Error e ->
646646- fail
647647- (Format.asprintf "to_unicode failed: %a" Punycode_idna.pp_error e)
648648- | Ok unicode -> check string "idna roundtrip" original unicode)
645645+ try
646646+ let ascii = Punycode_idna.to_ascii original in
647647+ let unicode = Punycode_idna.to_unicode ascii in
648648+ check string "idna roundtrip" original unicode
649649+ with Punycode_idna.Error e ->
650650+ fail (Format.asprintf "roundtrip failed: %a" Punycode_idna.pp_error_reason e)
649651650652let test_idna_all_ascii () =
651651- match Punycode_idna.to_ascii "www.example.com" with
652652- | Ok result -> check string "all ascii passthrough" "www.example.com" result
653653- | Error e ->
654654- fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e)
653653+ try
654654+ let result = Punycode_idna.to_ascii "www.example.com" in
655655+ check string "all ascii passthrough" "www.example.com" result
656656+ with Punycode_idna.Error e ->
657657+ fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error_reason e)
655658656659let test_idna_mixed_labels () =
657657- match Punycode_idna.to_ascii "日本語.example.com" with
658658- | Ok result ->
659659- (* Check that result starts with xn-- and ends with .example.com *)
660660- check bool "has ace prefix" true (Punycode.has_ace_prefix result);
661661- check bool "ends with example.com" true
662662- (String.length result > 12
663663- && String.sub result (String.length result - 12) 12 = ".example.com")
664664- | Error e ->
665665- fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e)
660660+ try
661661+ let result = Punycode_idna.to_ascii "日本語.example.com" in
662662+ (* Check that result starts with xn-- and ends with .example.com *)
663663+ check bool "has ace prefix" true (Punycode.has_ace_prefix result);
664664+ check bool "ends with example.com" true
665665+ (String.length result > 12
666666+ && String.sub result (String.length result - 12) 12 = ".example.com")
667667+ with Punycode_idna.Error e ->
668668+ fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error_reason e)
666669667670(* Case annotation tests *)
668671let test_case_annotation_decode () =
669672 (* RFC example: uppercase letters indicate case flags *)
670670- match Punycode.decode_with_case "MajiKoi5-783gue6qz075azm5e" with
671671- | Ok (codepoints, case_flags) ->
672672- check int "codepoints length"
673673- (List.length example_p_codepoints)
674674- (Array.length codepoints);
675675- check int "case_flags length" (Array.length codepoints)
676676- (Array.length case_flags);
677677- (* M should be uppercase *)
678678- check bool "M uppercase" true (case_flags.(0) = Punycode.Uppercase);
679679- (* a should be lowercase *)
680680- check bool "a lowercase" true (case_flags.(1) = Punycode.Lowercase)
681681- | Error e ->
682682- fail (Format.asprintf "decode_with_case failed: %a" Punycode.pp_error e)
673673+ try
674674+ let codepoints, case_flags =
675675+ Punycode.decode_with_case "MajiKoi5-783gue6qz075azm5e"
676676+ in
677677+ check int "codepoints length"
678678+ (List.length example_p_codepoints)
679679+ (Array.length codepoints);
680680+ check int "case_flags length" (Array.length codepoints)
681681+ (Array.length case_flags);
682682+ (* M should be uppercase *)
683683+ check bool "M uppercase" true (case_flags.(0) = Punycode.Uppercase);
684684+ (* a should be lowercase *)
685685+ check bool "a lowercase" true (case_flags.(1) = Punycode.Lowercase)
686686+ with Punycode.Error e ->
687687+ fail (Format.asprintf "decode_with_case failed: %a" Punycode.pp_error_reason e)
683688684689let test_case_annotation_encode () =
685690 let codepoints = codepoints_of_hex_list [ 0x0061; 0x0062; 0x0063 ] in
···687692 let case_flags =
688693 [| Punycode.Uppercase; Punycode.Lowercase; Punycode.Uppercase |]
689694 in
690690- match Punycode.encode_with_case codepoints case_flags with
691691- | Ok result ->
692692- (* Should encode as "AbC-" (basic code points with case annotation) *)
693693- check string "case encoded" "AbC-" result
694694- | Error e ->
695695- fail (Format.asprintf "encode_with_case failed: %a" Punycode.pp_error e)
695695+ try
696696+ let result = Punycode.encode_with_case codepoints case_flags in
697697+ (* Should encode as "AbC-" (basic code points with case annotation) *)
698698+ check string "case encoded" "AbC-" result
699699+ with Punycode.Error e ->
700700+ fail (Format.asprintf "encode_with_case failed: %a" Punycode.pp_error_reason e)
696701697702(* Edge case tests *)
698703let test_empty_input () =
699699- match Punycode.encode [||] with
700700- | Ok result -> check string "empty encode" "" result
701701- | Error _ -> fail "empty encode should succeed"
704704+ try
705705+ let result = Punycode.encode [||] in
706706+ check string "empty encode" "" result
707707+ with Punycode.Error _ -> fail "empty encode should succeed"
702708703709let test_empty_decode () =
704704- match Punycode.decode "" with
705705- | Ok result -> check int "empty decode length" 0 (Array.length result)
706706- | Error _ -> fail "empty decode should succeed"
710710+ try
711711+ let result = Punycode.decode "" in
712712+ check int "empty decode length" 0 (Array.length result)
713713+ with Punycode.Error _ -> fail "empty decode should succeed"
707714708715let test_pure_ascii () =
709716 let input = codepoints_of_string "hello" in
710710- match Punycode.encode input with
711711- | Ok result -> check string "pure ascii" "hello-" result
712712- | Error e -> fail (Format.asprintf "encode failed: %a" Punycode.pp_error e)
717717+ try
718718+ let result = Punycode.encode input in
719719+ check string "pure ascii" "hello-" result
720720+ with Punycode.Error e ->
721721+ fail (Format.asprintf "encode failed: %a" Punycode.pp_error_reason e)
713722714723let test_invalid_digit () =
715715- match Punycode.decode "hello!" with
716716- | Ok _ -> fail "should fail on invalid digit"
717717- | Error (Punycode.Invalid_digit _) -> ()
718718- | Error e -> fail (Format.asprintf "wrong error type: %a" Punycode.pp_error e)
724724+ try
725725+ ignore (Punycode.decode "hello!");
726726+ fail "should fail on invalid digit"
727727+ with
728728+ | Punycode.Error (Punycode.Invalid_digit _) -> ()
729729+ | Punycode.Error e ->
730730+ fail (Format.asprintf "wrong error type: %a" Punycode.pp_error_reason e)
719731720732let test_label_too_long () =
721733 let long_label = String.make 100 'a' in
722722- match Punycode.encode_label long_label with
723723- | Ok _ -> fail "should fail on long label"
724724- | Error (Punycode.Label_too_long _) -> ()
725725- | Error e -> fail (Format.asprintf "wrong error type: %a" Punycode.pp_error e)
734734+ try
735735+ ignore (Punycode.encode_label long_label);
736736+ fail "should fail on long label"
737737+ with
738738+ | Punycode.Error (Punycode.Label_too_long _) -> ()
739739+ | Punycode.Error e ->
740740+ fail (Format.asprintf "wrong error type: %a" Punycode.pp_error_reason e)
726741727742let test_empty_label () =
728728- match Punycode.encode_label "" with
729729- | Ok _ -> fail "should fail on empty label"
730730- | Error Punycode.Empty_label -> ()
731731- | Error e -> fail (Format.asprintf "wrong error type: %a" Punycode.pp_error e)
743743+ try
744744+ ignore (Punycode.encode_label "");
745745+ fail "should fail on empty label"
746746+ with
747747+ | Punycode.Error Punycode.Empty_label -> ()
748748+ | Punycode.Error e ->
749749+ fail (Format.asprintf "wrong error type: %a" Punycode.pp_error_reason e)
732750733751(* Validation tests *)
734752let test_is_basic () =