Punycode (RFC3492) in OCaml

remove result and just use exceptions

+439 -438
+14 -18
fuzz/fuzz_punycode.ml
··· 9 9 10 10 (* Test that encode_utf8 never crashes on arbitrary input *) 11 11 let test_encode_no_crash input = 12 - ignore (Punycode.encode_utf8 input); 12 + (try ignore (Punycode.encode_utf8 input) with Punycode.Error _ -> ()); 13 13 check true 14 14 15 15 (* Test that decode_utf8 never crashes on arbitrary input *) 16 16 let test_decode_no_crash input = 17 - ignore (Punycode.decode_utf8 input); 17 + (try ignore (Punycode.decode_utf8 input) with Punycode.Error _ -> ()); 18 18 check true 19 19 20 20 (* Test roundtrip: encode then decode should give back original (case-insensitive) 21 21 IDNA/Punycode lowercases ASCII characters during encoding per RFC 5891 *) 22 22 let test_roundtrip input = 23 - match Punycode.encode_utf8 input with 24 - | Ok encoded -> ( 25 - match Punycode.decode_utf8 encoded with 26 - | Ok decoded -> 27 - (* Compare lowercase versions since IDNA lowercases ASCII *) 28 - check_eq ~pp:Format.pp_print_string 29 - (String.lowercase_ascii input) 30 - (String.lowercase_ascii decoded) 31 - | Error _ -> 32 - (* Some encoded values might not decode, that's ok for fuzz testing *) 33 - check true) 34 - | Error _ -> 35 - (* Some inputs might not encode, that's ok *) 36 - check true 23 + (try 24 + let encoded = Punycode.encode_utf8 input in 25 + let decoded = Punycode.decode_utf8 encoded in 26 + (* Compare lowercase versions since IDNA lowercases ASCII *) 27 + check_eq ~pp:Format.pp_print_string 28 + (String.lowercase_ascii input) 29 + (String.lowercase_ascii decoded) 30 + with Punycode.Error _ -> 31 + (* Some inputs might not encode/decode, that's ok *) 32 + check true) 37 33 38 34 (* Test ASCII-only strings (should pass through mostly unchanged) *) 39 35 let test_ascii_string input = ··· 45 41 Char.chr (Char.code input.[i mod String.length input] mod 128)) 46 42 in 47 43 if String.length ascii_only > 0 then 48 - ignore (Punycode.encode_utf8 ascii_only) 44 + (try ignore (Punycode.encode_utf8 ascii_only) with Punycode.Error _ -> ()) 49 45 end; 50 46 check true 51 47 52 48 (* Test inputs starting with ACE prefix "xn--" *) 53 49 let test_ace_prefix input = 54 50 let ace_input = "xn--" ^ input in 55 - ignore (Punycode.decode_utf8 ace_input); 51 + (try ignore (Punycode.decode_utf8 ace_input) with Punycode.Error _ -> ()); 56 52 check true 57 53 58 54 let () =
+178 -211
lib/punycode.ml
··· 30 30 31 31 (* {1 Error Types} *) 32 32 33 - type error = 33 + type error_reason = 34 34 | Overflow of position 35 35 | Invalid_character of position * Uchar.t 36 36 | Invalid_digit of position * char ··· 39 39 | Label_too_long of int 40 40 | Empty_label 41 41 42 - let pp_error fmt = function 42 + let pp_error_reason fmt = function 43 43 | Overflow pos -> 44 44 Format.fprintf fmt "arithmetic overflow at %a" pp_position pos 45 45 | Invalid_character (pos, u) -> ··· 57 57 max_label_length 58 58 | Empty_label -> Format.fprintf fmt "empty label" 59 59 60 - let error_to_string err = Format.asprintf "%a" pp_error err 60 + exception Error of error_reason 61 + 62 + let () = Printexc.register_printer (function 63 + | Error reason -> Some (Format.asprintf "Punycode.Error: %a" pp_error_reason reason) 64 + | _ -> None) 65 + 66 + let error_reason_to_string reason = Format.asprintf "%a" pp_error_reason reason 61 67 62 68 (* {1 Error Constructors} *) 63 69 64 - let overflow pos = Error (Overflow pos) 65 - let invalid_character pos u = Error (Invalid_character (pos, u)) 66 - let invalid_digit pos c = Error (Invalid_digit (pos, c)) 67 - let unexpected_end pos = Error (Unexpected_end pos) 68 - let _invalid_utf8 pos = Error (Invalid_utf8 pos) 69 - let label_too_long len = Error (Label_too_long len) 70 - let empty_label = Error Empty_label 70 + let overflow pos = raise (Error (Overflow pos)) 71 + let invalid_character pos u = raise (Error (Invalid_character (pos, u))) 72 + let invalid_digit pos c = raise (Error (Invalid_digit (pos, c))) 73 + let unexpected_end pos = raise (Error (Unexpected_end pos)) 74 + let invalid_utf8 pos = raise (Error (Invalid_utf8 pos)) 75 + let label_too_long len = raise (Error (Label_too_long len)) 76 + let empty_label () = raise (Error Empty_label) 71 77 72 78 (* {1 Case Flags} *) 73 79 ··· 134 140 let max_int_value = max_int 135 141 136 142 let safe_mul_add a b c pos = 137 - if c = 0 then Ok a 143 + if c = 0 then a 138 144 else if b > (max_int_value - a) / c then overflow pos 139 - else Ok (a + (b * c)) 145 + else a + (b * c) 140 146 141 147 (* {1 UTF-8 to Code Points Conversion} *) 142 148 ··· 145 151 let acc = ref [] in 146 152 let byte_offset = ref 0 in 147 153 let char_index = ref 0 in 148 - let error = ref None in 149 - while !byte_offset < len && !error = None do 154 + while !byte_offset < len do 150 155 let pos = { byte_offset = !byte_offset; char_index = !char_index } in 151 156 let dec = String.get_utf_8_uchar s !byte_offset in 152 157 if Uchar.utf_decode_is_valid dec then begin ··· 154 159 byte_offset := !byte_offset + Uchar.utf_decode_length dec; 155 160 incr char_index 156 161 end 157 - else begin 158 - error := Some (Invalid_utf8 pos) 159 - end 162 + else invalid_utf8 pos 160 163 done; 161 - match !error with 162 - | Some e -> Error e 163 - | None -> Ok (Array.of_list (List.rev !acc)) 164 + Array.of_list (List.rev !acc) 164 165 165 166 (* {1 Code Points to UTF-8 Conversion} *) 166 167 ··· 173 174 174 175 let encode_impl codepoints case_flags = 175 176 let input_length = Array.length codepoints in 176 - if input_length = 0 then Ok "" 177 + if input_length = 0 then "" 177 178 else begin 178 179 let output = Buffer.create (input_length * 2) in 179 180 ··· 210 211 let delta = ref 0 in 211 212 let bias = ref initial_bias in 212 213 213 - let result = ref (Ok ()) in 214 - 215 - while !h < input_length && !result = Ok () do 214 + while !h < input_length do 216 215 (* Find minimum code point >= n *) 217 216 let m = 218 217 Array.fold_left ··· 224 223 225 224 (* Increase delta to advance state to <m, 0> *) 226 225 let pos = { byte_offset = 0; char_index = !h } in 227 - match safe_mul_add !delta (m - !n) (!h + 1) pos with 228 - | Error e -> result := Error e 229 - | Ok new_delta -> 230 - delta := new_delta; 231 - n := m; 226 + delta := safe_mul_add !delta (m - !n) (!h + 1) pos; 227 + n := m; 228 + 229 + (* Process each code point *) 230 + for j = 0 to input_length - 1 do 231 + let cp = Uchar.to_int codepoints.(j) in 232 + let pos = { byte_offset = 0; char_index = j } in 232 233 233 - (* Process each code point *) 234 - let j = ref 0 in 235 - while !j < input_length && !result = Ok () do 236 - let cp = Uchar.to_int codepoints.(!j) in 237 - let pos = { byte_offset = 0; char_index = !j } in 234 + if cp < !n then begin 235 + incr delta; 236 + if !delta = 0 then (* Overflow *) 237 + overflow pos 238 + end 239 + else if cp = !n then begin 240 + (* Encode delta as variable-length integer *) 241 + let q = ref !delta in 242 + let k = ref base in 243 + let done_encoding = ref false in 238 244 239 - if cp < !n then begin 240 - incr delta; 241 - if !delta = 0 then (* Overflow *) 242 - result := overflow pos 245 + while not !done_encoding do 246 + let t = 247 + if !k <= !bias then tmin 248 + else if !k >= !bias + tmax then tmax 249 + else !k - !bias 250 + in 251 + if !q < t then begin 252 + (* Output final digit *) 253 + let case = 254 + match case_flags with 255 + | Some flags -> flags.(j) 256 + | None -> Lowercase 257 + in 258 + Buffer.add_char output (encode_digit !q case); 259 + done_encoding := true 260 + end 261 + else begin 262 + (* Output intermediate digit and continue *) 263 + let digit = t + ((!q - t) mod (base - t)) in 264 + Buffer.add_char output (encode_digit digit Lowercase); 265 + q := (!q - t) / (base - t); 266 + k := !k + base 243 267 end 244 - else if cp = !n then begin 245 - (* Encode delta as variable-length integer *) 246 - let q = ref !delta in 247 - let k = ref base in 248 - let done_encoding = ref false in 268 + done; 249 269 250 - while not !done_encoding do 251 - let t = 252 - if !k <= !bias then tmin 253 - else if !k >= !bias + tmax then tmax 254 - else !k - !bias 255 - in 256 - if !q < t then begin 257 - (* Output final digit *) 258 - let case = 259 - match case_flags with 260 - | Some flags -> flags.(!j) 261 - | None -> Lowercase 262 - in 263 - Buffer.add_char output (encode_digit !q case); 264 - done_encoding := true 265 - end 266 - else begin 267 - (* Output intermediate digit and continue *) 268 - let digit = t + ((!q - t) mod (base - t)) in 269 - Buffer.add_char output (encode_digit digit Lowercase); 270 - q := (!q - t) / (base - t); 271 - k := !k + base 272 - end 273 - done; 270 + bias := adapt ~delta:!delta ~numpoints:(!h + 1) ~firsttime:(!h = b); 271 + delta := 0; 272 + incr h 273 + end 274 + done; 274 275 275 - bias := adapt ~delta:!delta ~numpoints:(!h + 1) ~firsttime:(!h = b); 276 - delta := 0; 277 - incr h 278 - end; 279 - incr j 280 - done; 281 - 282 - incr delta; 283 - incr n 276 + incr delta; 277 + incr n 284 278 done; 285 279 286 - match !result with 287 - | Error e -> Error e 288 - | Ok () -> Ok (Buffer.contents output) 280 + Buffer.contents output 289 281 end 290 282 291 283 let encode codepoints = encode_impl codepoints None ··· 299 291 300 292 let decode_impl input = 301 293 let input_length = String.length input in 302 - if input_length = 0 then Ok ([||], [||]) 294 + if input_length = 0 then ([||], [||]) 303 295 else begin 304 296 (* Find last delimiter *) 305 297 let b = Option.value ~default:0 (String.rindex_opt input delimiter) in ··· 307 299 (* Copy basic code points and extract case flags *) 308 300 let output = ref [] in 309 301 let case_output = ref [] in 310 - let error = ref None in 311 302 312 303 for j = 0 to b - 1 do 313 - if !error = None then begin 314 - let c = input.[j] in 315 - let pos = { byte_offset = j; char_index = j } in 316 - let code = Char.code c in 317 - if code >= 0x80 then 318 - error := Some (Invalid_character (pos, Uchar.of_int code)) 319 - else begin 320 - output := Uchar.of_int code :: !output; 321 - case_output := 322 - (if is_flagged c then Uppercase else Lowercase) :: !case_output 323 - end 304 + let c = input.[j] in 305 + let pos = { byte_offset = j; char_index = j } in 306 + let code = Char.code c in 307 + if code >= 0x80 then 308 + invalid_character pos (Uchar.of_int code) 309 + else begin 310 + output := Uchar.of_int code :: !output; 311 + case_output := 312 + (if is_flagged c then Uppercase else Lowercase) :: !case_output 324 313 end 325 314 done; 326 315 327 - match !error with 328 - | Some e -> Error e 329 - | None -> ( 330 - let output = ref (Array.of_list (List.rev !output)) in 331 - let case_output = ref (Array.of_list (List.rev !case_output)) in 316 + let output = ref (Array.of_list (List.rev !output)) in 317 + let case_output = ref (Array.of_list (List.rev !case_output)) in 332 318 333 - (* Main decoding loop *) 334 - let n = ref initial_n in 335 - let i = ref 0 in 336 - let bias = ref initial_bias in 337 - let in_pos = ref (if b > 0 then b + 1 else 0) in 338 - let result = ref (Ok ()) in 319 + (* Main decoding loop *) 320 + let n = ref initial_n in 321 + let i = ref 0 in 322 + let bias = ref initial_bias in 323 + let in_pos = ref (if b > 0 then b + 1 else 0) in 339 324 340 - while !in_pos < input_length && !result = Ok () do 341 - let oldi = !i in 342 - let w = ref 1 in 343 - let k = ref base in 344 - let done_decoding = ref false in 325 + while !in_pos < input_length do 326 + let oldi = !i in 327 + let w = ref 1 in 328 + let k = ref base in 329 + let done_decoding = ref false in 345 330 346 - while (not !done_decoding) && !result = Ok () do 347 - let pos = 348 - { byte_offset = !in_pos; char_index = Array.length !output } 349 - in 331 + while not !done_decoding do 332 + let pos = 333 + { byte_offset = !in_pos; char_index = Array.length !output } 334 + in 350 335 351 - if !in_pos >= input_length then begin 352 - result := unexpected_end pos; 353 - done_decoding := true 354 - end 355 - else begin 356 - let c = input.[!in_pos] in 357 - incr in_pos; 336 + if !in_pos >= input_length then 337 + unexpected_end pos 338 + else begin 339 + let c = input.[!in_pos] in 340 + incr in_pos; 358 341 359 - match decode_digit c with 360 - | None -> 361 - result := invalid_digit pos c; 362 - done_decoding := true 363 - | Some digit -> ( 364 - (* i = i + digit * w, with overflow check *) 365 - match safe_mul_add !i digit !w pos with 366 - | Error e -> 367 - result := Error e; 368 - done_decoding := true 369 - | Ok new_i -> 370 - i := new_i; 342 + match decode_digit c with 343 + | None -> invalid_digit pos c 344 + | Some digit -> 345 + (* i = i + digit * w, with overflow check *) 346 + i := safe_mul_add !i digit !w pos; 371 347 372 - let t = 373 - if !k <= !bias then tmin 374 - else if !k >= !bias + tmax then tmax 375 - else !k - !bias 376 - in 348 + let t = 349 + if !k <= !bias then tmin 350 + else if !k >= !bias + tmax then tmax 351 + else !k - !bias 352 + in 377 353 378 - if digit < t then begin 379 - (* Record case flag from this final digit *) 380 - done_decoding := true 381 - end 382 - else begin 383 - (* w = w * (base - t), with overflow check *) 384 - let base_minus_t = base - t in 385 - if !w > max_int_value / base_minus_t then begin 386 - result := overflow pos; 387 - done_decoding := true 388 - end 389 - else begin 390 - w := !w * base_minus_t; 391 - k := !k + base 392 - end 393 - end) 394 - end 395 - done; 354 + if digit < t then 355 + (* Record case flag from this final digit *) 356 + done_decoding := true 357 + else begin 358 + (* w = w * (base - t), with overflow check *) 359 + let base_minus_t = base - t in 360 + if !w > max_int_value / base_minus_t then 361 + overflow pos 362 + else begin 363 + w := !w * base_minus_t; 364 + k := !k + base 365 + end 366 + end 367 + end 368 + done; 396 369 397 - if !result = Ok () then begin 398 - let out_len = Array.length !output in 399 - bias := 400 - adapt ~delta:(!i - oldi) ~numpoints:(out_len + 1) 401 - ~firsttime:(oldi = 0); 370 + let out_len = Array.length !output in 371 + bias := 372 + adapt ~delta:(!i - oldi) ~numpoints:(out_len + 1) 373 + ~firsttime:(oldi = 0); 402 374 403 - let pos = { byte_offset = !in_pos - 1; char_index = out_len } in 375 + let pos = { byte_offset = !in_pos - 1; char_index = out_len } in 404 376 405 - (* n = n + i / (out_len + 1), with overflow check *) 406 - let increment = !i / (out_len + 1) in 407 - if increment > max_int_value - !n then result := overflow pos 408 - else begin 409 - n := !n + increment; 410 - i := !i mod (out_len + 1); 377 + (* n = n + i / (out_len + 1), with overflow check *) 378 + let increment = !i / (out_len + 1) in 379 + if increment > max_int_value - !n then overflow pos 380 + else begin 381 + n := !n + increment; 382 + i := !i mod (out_len + 1); 411 383 412 - (* Validate that n is a valid Unicode scalar value *) 413 - if not (Uchar.is_valid !n) then 414 - result := invalid_character pos Uchar.rep 415 - else begin 416 - (* Insert n at position i *) 417 - let new_output = Array.make (out_len + 1) (Uchar.of_int 0) in 418 - let new_case = Array.make (out_len + 1) Lowercase in 384 + (* Validate that n is a valid Unicode scalar value *) 385 + if not (Uchar.is_valid !n) then 386 + invalid_character pos Uchar.rep 387 + else begin 388 + (* Insert n at position i *) 389 + let new_output = Array.make (out_len + 1) (Uchar.of_int 0) in 390 + let new_case = Array.make (out_len + 1) Lowercase in 419 391 420 - for j = 0 to !i - 1 do 421 - new_output.(j) <- !output.(j); 422 - new_case.(j) <- !case_output.(j) 423 - done; 424 - new_output.(!i) <- Uchar.of_int !n; 425 - (* Case flag from final digit of this delta *) 426 - new_case.(!i) <- 427 - (if !in_pos > 0 && is_flagged input.[!in_pos - 1] then 428 - Uppercase 429 - else Lowercase); 430 - for j = !i to out_len - 1 do 431 - new_output.(j + 1) <- !output.(j); 432 - new_case.(j + 1) <- !case_output.(j) 433 - done; 392 + for j = 0 to !i - 1 do 393 + new_output.(j) <- !output.(j); 394 + new_case.(j) <- !case_output.(j) 395 + done; 396 + new_output.(!i) <- Uchar.of_int !n; 397 + (* Case flag from final digit of this delta *) 398 + new_case.(!i) <- 399 + (if !in_pos > 0 && is_flagged input.[!in_pos - 1] then 400 + Uppercase 401 + else Lowercase); 402 + for j = !i to out_len - 1 do 403 + new_output.(j + 1) <- !output.(j); 404 + new_case.(j + 1) <- !case_output.(j) 405 + done; 434 406 435 - output := new_output; 436 - case_output := new_case; 437 - incr i 438 - end 439 - end 440 - end 441 - done; 407 + output := new_output; 408 + case_output := new_case; 409 + incr i 410 + end 411 + end 412 + done; 442 413 443 - match !result with 444 - | Error e -> Error e 445 - | Ok () -> Ok (!output, !case_output)) 414 + (!output, !case_output) 446 415 end 447 416 448 - let decode input = Result.map fst (decode_impl input) 417 + let decode input = fst (decode_impl input) 449 418 let decode_with_case input = decode_impl input 450 419 451 420 (* {1 UTF-8 String Operations} *) 452 421 453 422 let encode_utf8 s = 454 - let open Result.Syntax in 455 - let* codepoints = utf8_to_codepoints s in 423 + let codepoints = utf8_to_codepoints s in 456 424 encode codepoints 457 425 458 426 let decode_utf8 punycode = 459 - let open Result.Syntax in 460 - let+ codepoints = decode punycode in 427 + let codepoints = decode punycode in 461 428 codepoints_to_utf8 codepoints 462 429 463 430 (* {1 Domain Label Operations} *) 464 431 465 432 let encode_label label = 466 - if String.length label = 0 then empty_label 433 + if String.length label = 0 then empty_label () 467 434 else if is_ascii_string label then begin 468 435 (* All ASCII - return as-is, but check length *) 469 436 let len = String.length label in 470 - if len > max_label_length then label_too_long len else Ok label 437 + if len > max_label_length then label_too_long len else label 471 438 end 472 - else 439 + else begin 473 440 (* Has non-ASCII - encode with Punycode *) 474 - let open Result.Syntax in 475 - let* encoded = encode_utf8 label in 441 + let encoded = encode_utf8 label in 476 442 let result = ace_prefix ^ encoded in 477 443 let len = String.length result in 478 - if len > max_label_length then label_too_long len else Ok result 444 + if len > max_label_length then label_too_long len else result 445 + end 479 446 480 447 let decode_label label = 481 - if String.length label = 0 then empty_label 448 + if String.length label = 0 then empty_label () 482 449 else if has_ace_prefix label then begin 483 450 (* Remove ACE prefix and decode *) 484 451 let punycode = String.sub label 4 (String.length label - 4) in ··· 486 453 end 487 454 else begin 488 455 (* No ACE prefix - validate and return *) 489 - if is_ascii_string label then Ok label 456 + if is_ascii_string label then label 490 457 else 491 458 (* Has non-ASCII but no ACE prefix - return as-is *) 492 - Ok label 459 + label 493 460 end
+41 -25
lib/punycode.mli
··· 38 38 39 39 (** {1 Error Types} *) 40 40 41 - type error = 41 + type error_reason = 42 42 | Overflow of position 43 43 (** Arithmetic overflow during encode/decode. This can occur with very 44 44 long strings or extreme Unicode code point values. See ··· 66 66 is the actual length. *) 67 67 | Empty_label (** Empty label is not valid for encoding. *) 68 68 69 - val pp_error : Format.formatter -> error -> unit 70 - (** [pp_error fmt e] pretty-prints an error with position information. *) 69 + exception Error of error_reason 70 + (** Exception raised for all Punycode encoding/decoding errors. *) 71 + 72 + val pp_error_reason : Format.formatter -> error_reason -> unit 73 + (** [pp_error_reason fmt e] pretty-prints an error with position information. *) 71 74 72 - val error_to_string : error -> string 73 - (** [error_to_string e] converts an error to a human-readable string. *) 75 + val error_reason_to_string : error_reason -> string 76 + (** [error_reason_to_string e] converts an error to a human-readable string. *) 74 77 75 78 (** {1 Constants} 76 79 ··· 108 111 6}. They operate on arrays of Unicode code points ([Uchar.t array]). The 109 112 encoded output is a plain ASCII string without the ACE prefix. *) 110 113 111 - val encode : Uchar.t array -> (string, error) result 114 + val encode : Uchar.t array -> string 112 115 (** [encode codepoints] encodes an array of Unicode code points to a Punycode 113 116 ASCII string. 114 117 ··· 123 126 are any basic code points 3. Non-basic code points are encoded as deltas 124 127 using the generalized variable-length integer representation from 125 128 {{:https://datatracker.ietf.org/doc/html/rfc3492#section-3.3}Section 3.3} 129 + 130 + @raise Error on encoding failure (overflow, etc.) 126 131 127 132 Example: 128 133 {[ 129 134 encode [| Uchar.of_int 0x4ED6; Uchar.of_int 0x4EEC; ... |] 130 - (* = Ok "ihqwcrb4cv8a8dqg056pqjye" *) 135 + (* = "ihqwcrb4cv8a8dqg056pqjye" *) 131 136 ]} *) 132 137 133 - val decode : string -> (Uchar.t array, error) result 138 + val decode : string -> Uchar.t array 134 139 (** [decode punycode] decodes a Punycode ASCII string to an array of Unicode 135 140 code points. 136 141 ··· 144 149 5}: "A decoder MUST recognize the letters in both uppercase and lowercase 145 150 forms". 146 151 152 + @raise Error on decoding failure (invalid digit, unexpected end, etc.) 153 + 147 154 Example: 148 155 {[ 149 156 decode "ihqwcrb4cv8a8dqg056pqjye" 150 - (* = Ok [| U+4ED6; U+4EEC; U+4E3A; ... |] (Chinese simplified) *) 157 + (* = [| U+4ED6; U+4EEC; U+4E3A; ... |] (Chinese simplified) *) 151 158 ]} *) 152 159 153 160 (** {1 Mixed-Case Annotation} ··· 156 163 {{:https://datatracker.ietf.org/doc/html/rfc3492#appendix-A}RFC 3492 157 164 Appendix A}. *) 158 165 159 - val encode_with_case : 160 - Uchar.t array -> case_flag array -> (string, error) result 166 + val encode_with_case : Uchar.t array -> case_flag array -> string 161 167 (** [encode_with_case codepoints case_flags] encodes with case annotation. 162 168 163 169 Per ··· 169 175 170 176 The [case_flags] array must have the same length as [codepoints]. 171 177 172 - @raise Invalid_argument if array lengths don't match. *) 178 + @raise Invalid_argument if array lengths don't match. 179 + @raise Error on encoding failure. *) 173 180 174 - val decode_with_case : string -> (Uchar.t array * case_flag array, error) result 181 + val decode_with_case : string -> Uchar.t array * case_flag array 175 182 (** [decode_with_case punycode] decodes and extracts case annotations. 176 183 177 184 Per 178 185 {{:https://datatracker.ietf.org/doc/html/rfc3492#appendix-A}RFC 3492 179 186 Appendix A}, returns both the decoded code points and an array of case 180 187 flags indicating the suggested case for each character based on the 181 - uppercase/lowercase form of the encoding digits. *) 188 + uppercase/lowercase form of the encoding digits. 189 + 190 + @raise Error on decoding failure. *) 182 191 183 192 (** {1 UTF-8 String Operations} 184 193 185 194 Convenience functions that work directly with UTF-8 encoded OCaml strings. 186 195 These combine UTF-8 decoding/encoding with the core Punycode operations. *) 187 196 188 - val encode_utf8 : string -> (string, error) result 197 + val encode_utf8 : string -> string 189 198 (** [encode_utf8 s] encodes a UTF-8 string to Punycode (no ACE prefix). 190 199 191 200 This is equivalent to decoding [s] from UTF-8 to code points, then calling 192 201 {!encode}. 193 202 203 + @raise Error on encoding failure. 204 + 194 205 Example: 195 206 {[ 196 207 encode_utf8 "münchen" 197 - (* = Ok "mnchen-3ya" *) 208 + (* = "mnchen-3ya" *) 198 209 ]} *) 199 210 200 - val decode_utf8 : string -> (string, error) result 211 + val decode_utf8 : string -> string 201 212 (** [decode_utf8 punycode] decodes Punycode to a UTF-8 string (no ACE prefix). 202 213 203 214 This is equivalent to calling {!decode} then encoding the result as UTF-8. 204 215 216 + @raise Error on decoding failure. 217 + 205 218 Example: 206 219 {[ 207 220 decode_utf8 "mnchen-3ya" 208 - (* = Ok "münchen" *) 221 + (* = "münchen" *) 209 222 ]} *) 210 223 211 224 (** {1 Domain Label Operations} ··· 214 227 length limits per 215 228 {{:https://datatracker.ietf.org/doc/html/rfc1035}RFC 1035}. *) 216 229 217 - val encode_label : string -> (string, error) result 230 + val encode_label : string -> string 218 231 (** [encode_label label] encodes a domain label for use in DNS. 219 232 220 233 If the label contains only ASCII characters, it is returned unchanged. ··· 223 236 {{:https://datatracker.ietf.org/doc/html/rfc3492#section-5} RFC 3492 Section 224 237 5}. 225 238 226 - Returns {!Error} {!Label_too_long} if the result exceeds 63 bytes. 239 + @raise Error with {!Label_too_long} if the result exceeds 63 bytes. 240 + @raise Error with {!Empty_label} if the label is empty. 227 241 228 242 Example: 229 243 {[ 230 244 encode_label "münchen" 231 - (* = Ok "xn--mnchen-3ya" *) 245 + (* = "xn--mnchen-3ya" *) 232 246 encode_label "example" 233 - (* = Ok "example" *) 247 + (* = "example" *) 234 248 ]} *) 235 249 236 - val decode_label : string -> (string, error) result 250 + val decode_label : string -> string 237 251 (** [decode_label label] decodes a domain label. 238 252 239 253 If the label starts with the ACE prefix ("xn--", case-insensitive), it is 240 254 Punycode-decoded. Otherwise, it is returned unchanged. 255 + 256 + @raise Error on decoding failure. 241 257 242 258 Example: 243 259 {[ 244 260 decode_label "xn--mnchen-3ya" 245 - (* = Ok "münchen" *) 261 + (* = "münchen" *) 246 262 decode_label "example" 247 - (* = Ok "example" *) 263 + (* = "example" *) 248 264 ]} *) 249 265 250 266 (** {1 Validation}
+47 -53
lib/punycode_idna.ml
··· 9 9 10 10 (* {1 Error Types} *) 11 11 12 - type error = 13 - | Punycode_error of Punycode.error 12 + type error_reason = 13 + | Punycode_error of Punycode.error_reason 14 14 | Invalid_label of string 15 15 | Domain_too_long of int 16 16 | Normalization_failed 17 17 | Verification_failed 18 18 19 - let pp_error fmt = function 19 + let pp_error_reason fmt = function 20 20 | Punycode_error e -> 21 - Format.fprintf fmt "Punycode error: %a" Punycode.pp_error e 21 + Format.fprintf fmt "Punycode error: %a" Punycode.pp_error_reason e 22 22 | Invalid_label msg -> Format.fprintf fmt "invalid label: %s" msg 23 23 | Domain_too_long len -> 24 24 Format.fprintf fmt "domain too long: %d bytes (max %d)" len ··· 27 27 | Verification_failed -> 28 28 Format.fprintf fmt "IDNA verification failed (round-trip mismatch)" 29 29 30 - let error_to_string err = Format.asprintf "%a" pp_error err 30 + exception Error of error_reason 31 + 32 + let () = Printexc.register_printer (function 33 + | Error reason -> Some (Format.asprintf "Punycode_idna.Error: %a" pp_error_reason reason) 34 + | _ -> None) 35 + 36 + let error_reason_to_string reason = Format.asprintf "%a" pp_error_reason reason 31 37 32 38 (* {1 Error Constructors} *) 33 39 34 - let punycode_error e = Error (Punycode_error e) 35 - let invalid_label msg = Error (Invalid_label msg) 36 - let domain_too_long len = Error (Domain_too_long len) 37 - let _normalization_failed = Error Normalization_failed 38 - let verification_failed = Error Verification_failed 40 + let punycode_error e = raise (Error (Punycode_error e)) 41 + let invalid_label msg = raise (Error (Invalid_label msg)) 42 + let domain_too_long len = raise (Error (Domain_too_long len)) 43 + let verification_failed () = raise (Error Verification_failed) 39 44 40 45 (* {1 Unicode Normalization} *) 41 46 ··· 82 87 invalid_label "STD3 rules violation" 83 88 else if check_hyphens && not (check_hyphen_rules label) then 84 89 invalid_label "invalid hyphen placement" 85 - else Ok label 90 + else label 86 91 end 87 92 else begin 88 93 (* Has non-ASCII - normalize and encode *) 89 94 let normalized = normalize_nfc label in 90 95 91 96 (* Encode to Punycode *) 92 - match Punycode.encode_utf8 normalized with 93 - | Error e -> punycode_error e 94 - | Ok encoded -> ( 95 - let result = Punycode.ace_prefix ^ encoded in 96 - let result_len = String.length result in 97 - if result_len > Punycode.max_label_length then 98 - punycode_error (Punycode.Label_too_long result_len) 99 - else if check_hyphens && not (check_hyphen_rules result) then 100 - invalid_label "invalid hyphen placement in encoded label" 101 - else 102 - (* Verification: decode and compare to original normalized form *) 103 - match Punycode.decode_utf8 encoded with 104 - | Error _ -> verification_failed 105 - | Ok decoded -> 106 - if decoded <> normalized then verification_failed else Ok result) 97 + let encoded = 98 + try Punycode.encode_utf8 normalized 99 + with Punycode.Error e -> punycode_error e 100 + in 101 + let result = Punycode.ace_prefix ^ encoded in 102 + let result_len = String.length result in 103 + if result_len > Punycode.max_label_length then 104 + punycode_error (Punycode.Label_too_long result_len) 105 + else if check_hyphens && not (check_hyphen_rules result) then 106 + invalid_label "invalid hyphen placement in encoded label" 107 + else 108 + (* Verification: decode and compare to original normalized form *) 109 + let decoded = 110 + try Punycode.decode_utf8 encoded 111 + with Punycode.Error _ -> verification_failed () 112 + in 113 + if decoded <> normalized then verification_failed () else result 107 114 end 108 115 109 116 let label_to_ascii ?(check_hyphens = true) ?(use_std3_rules = false) label = ··· 112 119 let label_to_unicode label = 113 120 if is_ace_label label then begin 114 121 let encoded = String.sub label 4 (String.length label - 4) in 115 - match Punycode.decode_utf8 encoded with 116 - | Error e -> punycode_error e 117 - | Ok decoded -> Ok decoded 122 + try Punycode.decode_utf8 encoded 123 + with Punycode.Error e -> punycode_error e 118 124 end 119 - else Ok label 125 + else label 120 126 121 127 (* {1 Domain Operations} *) 122 128 ··· 126 132 (* Join labels into domain *) 127 133 let join_labels labels = String.concat "." labels 128 134 129 - (* Map a function returning Result over a list, short-circuiting on first Error *) 130 - let map_result f lst = 131 - List.fold_right 132 - (fun x acc -> 133 - let open Result.Syntax in 134 - let* y = f x in 135 - let+ ys = acc in 136 - y :: ys) 137 - lst (Ok []) 138 - 139 135 let to_ascii ?(check_hyphens = true) ?(check_bidi = false) 140 136 ?(check_joiners = false) ?(use_std3_rules = false) ?(transitional = false) 141 137 domain = ··· 145 141 let _ = check_joiners in 146 142 let _ = transitional in 147 143 148 - let open Result.Syntax in 149 144 let labels = split_domain domain in 150 - let* encoded_labels = 151 - map_result (label_to_ascii_impl ~check_hyphens ~use_std3_rules) labels 145 + let encoded_labels = 146 + List.map (label_to_ascii_impl ~check_hyphens ~use_std3_rules) labels 152 147 in 153 148 let result = join_labels encoded_labels in 154 149 let len = String.length result in 155 - if len > max_domain_length then domain_too_long len else Ok result 150 + if len > max_domain_length then domain_too_long len else result 156 151 157 152 let to_unicode domain = 158 - let open Result.Syntax in 159 153 let labels = split_domain domain in 160 - let+ decoded_labels = map_result label_to_unicode labels in 154 + let decoded_labels = List.map label_to_unicode labels in 161 155 join_labels decoded_labels 162 156 163 157 (* {1 Domain Name Library Integration} *) 164 158 165 159 let domain_to_ascii ?(check_hyphens = true) ?(use_std3_rules = false) domain = 166 - let open Result.Syntax in 167 160 let s = Domain_name.to_string domain in 168 - let* ascii = to_ascii ~check_hyphens ~use_std3_rules s in 161 + let ascii = to_ascii ~check_hyphens ~use_std3_rules s in 169 162 match Domain_name.of_string ascii with 170 163 | Error (`Msg msg) -> invalid_label msg 171 - | Ok d -> Ok d 164 + | Ok d -> d 172 165 173 166 let domain_to_unicode domain = 174 - let open Result.Syntax in 175 167 let s = Domain_name.to_string domain in 176 - let* unicode = to_unicode s in 168 + let unicode = to_unicode s in 177 169 match Domain_name.of_string unicode with 178 170 | Error (`Msg msg) -> invalid_label msg 179 - | Ok d -> Ok d 171 + | Ok d -> d 180 172 181 173 (* {1 Validation} *) 182 174 183 - let is_idna_valid domain = Result.is_ok (to_ascii domain) 175 + let is_idna_valid domain = 176 + try ignore (to_ascii domain); true 177 + with Error _ -> false
+35 -25
lib/punycode_idna.mli
··· 26 26 27 27 (** {1 Error Types} *) 28 28 29 - type error = 30 - | Punycode_error of Punycode.error 31 - (** Error during Punycode encoding/decoding. See {!Punycode.error} for 32 - details. *) 29 + type error_reason = 30 + | Punycode_error of Punycode.error_reason 31 + (** Error during Punycode encoding/decoding. See {!Punycode.error_reason} 32 + for details. *) 33 33 | Invalid_label of string 34 34 (** Label violates IDNA constraints. The string describes the violation. 35 35 See ··· 49 49 Section 4.2}, the result of encoding must decode back to the original 50 50 input. *) 51 51 52 - val pp_error : Format.formatter -> error -> unit 53 - (** [pp_error fmt e] pretty-prints an error. *) 52 + exception Error of error_reason 53 + (** Exception raised for all IDNA processing errors. *) 54 54 55 - val error_to_string : error -> string 56 - (** [error_to_string e] converts an error to a human-readable string. *) 55 + val pp_error_reason : Format.formatter -> error_reason -> unit 56 + (** [pp_error_reason fmt e] pretty-prints an error. *) 57 + 58 + val error_reason_to_string : error_reason -> string 59 + (** [error_reason_to_string e] converts an error to a human-readable string. *) 57 60 58 61 (** {1 Constants} *) 59 62 ··· 77 80 ?use_std3_rules:bool -> 78 81 ?transitional:bool -> 79 82 string -> 80 - (string, error) result 83 + string 81 84 (** [to_ascii domain] converts an internationalized domain name to ASCII. 82 85 83 86 Implements the ToASCII operation from ··· 105 108 {{:https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2}Section 106 109 4.2.3.2} (default: false) 107 110 - [transitional]: Use IDNA 2003 transitional processing (default: false) 111 + 112 + @raise Error on conversion failure. 108 113 109 114 Example: 110 115 {[ 111 116 to_ascii "münchen.example.com" 112 - (* = Ok "xn--mnchen-3ya.example.com" *) 117 + (* = "xn--mnchen-3ya.example.com" *) 113 118 ]} *) 114 119 115 - val label_to_ascii : 116 - ?check_hyphens:bool -> 117 - ?use_std3_rules:bool -> 118 - string -> 119 - (string, error) result 120 + val label_to_ascii : ?check_hyphens:bool -> ?use_std3_rules:bool -> string -> string 120 121 (** [label_to_ascii label] converts a single label to ASCII. 121 122 122 123 This implements the core ToASCII operation for one label, as described in 123 124 {{:https://datatracker.ietf.org/doc/html/rfc5891#section-4.1}RFC 5891 124 - Section 4.1}. *) 125 + Section 4.1}. 126 + 127 + @raise Error on conversion failure. *) 125 128 126 129 (** {1 ToUnicode Operation} 127 130 ··· 131 134 {{:https://datatracker.ietf.org/doc/html/rfc5891#section-4.2} RFC 5891 132 135 Section 4.2} for the complete ToUnicode specification. *) 133 136 134 - val to_unicode : string -> (string, error) result 137 + val to_unicode : string -> string 135 138 (** [to_unicode domain] converts an ACE domain name to Unicode. 136 139 137 140 Implements the ToUnicode operation from ··· 143 146 {{:https://datatracker.ietf.org/doc/html/rfc3492#section-6.2}RFC 3492 144 147 Section 6.2} 2. Otherwise, pass through unchanged 145 148 149 + @raise Error on decoding failure. 150 + 146 151 Example: 147 152 {[ 148 153 to_unicode "xn--mnchen-3ya.example.com" 149 - (* = Ok "münchen.example.com" *) 154 + (* = "münchen.example.com" *) 150 155 ]} *) 151 156 152 - val label_to_unicode : string -> (string, error) result 157 + val label_to_unicode : string -> string 153 158 (** [label_to_unicode label] converts a single ACE label to Unicode. 154 159 155 160 This implements the core ToUnicode operation for one label, as described in 156 161 {{:https://datatracker.ietf.org/doc/html/rfc5891#section-4.2}RFC 5891 157 - Section 4.2}. *) 162 + Section 4.2}. 163 + 164 + @raise Error on decoding failure. *) 158 165 159 166 (** {1 Domain Name Integration} 160 167 ··· 168 175 ?check_hyphens:bool -> 169 176 ?use_std3_rules:bool -> 170 177 [ `raw ] Domain_name.t -> 171 - ([ `raw ] Domain_name.t, error) result 178 + [ `raw ] Domain_name.t 172 179 (** [domain_to_ascii domain] converts a domain name to ASCII form. 173 180 174 181 Applies {!to_ascii} to the string representation and returns the result as a 175 182 [Domain_name.t]. 176 183 184 + @raise Error on conversion failure. 185 + 177 186 Example: 178 187 {[ 179 188 let d = Domain_name.of_string_exn "münchen.example.com" in 180 189 domain_to_ascii d 181 - (* = Ok (Domain_name.of_string_exn "xn--mnchen-3ya.example.com") *) 190 + (* = Domain_name.of_string_exn "xn--mnchen-3ya.example.com" *) 182 191 ]} *) 183 192 184 - val domain_to_unicode : 185 - [ `raw ] Domain_name.t -> ([ `raw ] Domain_name.t, error) result 193 + val domain_to_unicode : [ `raw ] Domain_name.t -> [ `raw ] Domain_name.t 186 194 (** [domain_to_unicode domain] converts a domain name to Unicode form. 187 195 188 196 Applies {!to_unicode} to the string representation and returns the result as 189 - a [Domain_name.t]. *) 197 + a [Domain_name.t]. 198 + 199 + @raise Error on decoding failure. *) 190 200 191 201 (** {1 Validation} *) 192 202
+124 -106
test/test_punycode.ml
··· 26 26 27 27 (* Test result helper *) 28 28 let check_encode_ok expected input = 29 - match Punycode.encode input with 30 - | Ok result -> check string "encode" expected result 31 - | Error e -> fail (Format.asprintf "encode failed: %a" Punycode.pp_error e) 29 + try 30 + let result = Punycode.encode input in 31 + check string "encode" expected result 32 + with Punycode.Error e -> 33 + fail (Format.asprintf "encode failed: %a" Punycode.pp_error_reason e) 32 34 33 35 let check_decode_ok expected input = 34 - match Punycode.decode input with 35 - | Ok result -> 36 - let expected_arr = codepoints_of_hex_list expected in 37 - check int "length" (Array.length expected_arr) (Array.length result); 38 - Array.iteri 39 - (fun i u -> 40 - check int 41 - (Printf.sprintf "char %d" i) 42 - (Uchar.to_int expected_arr.(i)) 43 - (Uchar.to_int u)) 44 - result 45 - | Error e -> fail (Format.asprintf "decode failed: %a" Punycode.pp_error e) 36 + try 37 + let result = Punycode.decode input in 38 + let expected_arr = codepoints_of_hex_list expected in 39 + check int "length" (Array.length expected_arr) (Array.length result); 40 + Array.iteri 41 + (fun i u -> 42 + check int 43 + (Printf.sprintf "char %d" i) 44 + (Uchar.to_int expected_arr.(i)) 45 + (Uchar.to_int u)) 46 + result 47 + with Punycode.Error e -> 48 + fail (Format.asprintf "decode failed: %a" Punycode.pp_error_reason e) 46 49 47 50 let check_utf8_roundtrip s = 48 - match Punycode.encode_utf8 s with 49 - | Error e -> 50 - fail (Format.asprintf "encode_utf8 failed: %a" Punycode.pp_error e) 51 - | Ok encoded -> ( 52 - match Punycode.decode_utf8 encoded with 53 - | Error e -> 54 - fail (Format.asprintf "decode_utf8 failed: %a" Punycode.pp_error e) 55 - | Ok decoded -> check string "roundtrip" s decoded) 51 + try 52 + let encoded = Punycode.encode_utf8 s in 53 + let decoded = Punycode.decode_utf8 encoded in 54 + check string "roundtrip" s decoded 55 + with Punycode.Error e -> 56 + fail (Format.asprintf "roundtrip failed: %a" Punycode.pp_error_reason e) 56 57 57 58 (* RFC 3492 Section 7.1 Test Vectors *) 58 59 ··· 604 605 605 606 (* Label encoding tests *) 606 607 let test_label_encode_ascii () = 607 - match Punycode.encode_label "example" with 608 - | Ok result -> check string "ascii passthrough" "example" result 609 - | Error e -> 610 - fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error e) 608 + try 609 + let result = Punycode.encode_label "example" in 610 + check string "ascii passthrough" "example" result 611 + with Punycode.Error e -> 612 + fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error_reason e) 611 613 612 614 let test_label_encode_german () = 613 - match Punycode.encode_label "münchen" with 614 - | Ok result -> check string "german label" "xn--mnchen-3ya" result 615 - | Error e -> 616 - fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error e) 615 + try 616 + let result = Punycode.encode_label "münchen" in 617 + check string "german label" "xn--mnchen-3ya" result 618 + with Punycode.Error e -> 619 + fail (Format.asprintf "encode_label failed: %a" Punycode.pp_error_reason e) 617 620 618 621 let test_label_decode_german () = 619 - match Punycode.decode_label "xn--mnchen-3ya" with 620 - | Ok result -> check string "german decode" "münchen" result 621 - | Error e -> 622 - fail (Format.asprintf "decode_label failed: %a" Punycode.pp_error e) 622 + try 623 + let result = Punycode.decode_label "xn--mnchen-3ya" in 624 + check string "german decode" "münchen" result 625 + with Punycode.Error e -> 626 + fail (Format.asprintf "decode_label failed: %a" Punycode.pp_error_reason e) 623 627 624 628 (* IDNA tests *) 625 629 let test_idna_to_ascii_simple () = 626 - match Punycode_idna.to_ascii "münchen.example.com" with 627 - | Ok result -> 628 - check string "idna to_ascii" "xn--mnchen-3ya.example.com" result 629 - | Error e -> 630 - fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e) 630 + try 631 + let result = Punycode_idna.to_ascii "münchen.example.com" in 632 + check string "idna to_ascii" "xn--mnchen-3ya.example.com" result 633 + with Punycode_idna.Error e -> 634 + fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error_reason e) 631 635 632 636 let test_idna_to_unicode_simple () = 633 - match Punycode_idna.to_unicode "xn--mnchen-3ya.example.com" with 634 - | Ok result -> check string "idna to_unicode" "münchen.example.com" result 635 - | Error e -> 636 - fail (Format.asprintf "to_unicode failed: %a" Punycode_idna.pp_error e) 637 + try 638 + let result = Punycode_idna.to_unicode "xn--mnchen-3ya.example.com" in 639 + check string "idna to_unicode" "münchen.example.com" result 640 + with Punycode_idna.Error e -> 641 + fail (Format.asprintf "to_unicode failed: %a" Punycode_idna.pp_error_reason e) 637 642 638 643 let test_idna_roundtrip () = 639 644 let original = "münchen.example.com" in 640 - match Punycode_idna.to_ascii original with 641 - | Error e -> 642 - fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e) 643 - | Ok ascii -> ( 644 - match Punycode_idna.to_unicode ascii with 645 - | Error e -> 646 - fail 647 - (Format.asprintf "to_unicode failed: %a" Punycode_idna.pp_error e) 648 - | Ok unicode -> check string "idna roundtrip" original unicode) 645 + try 646 + let ascii = Punycode_idna.to_ascii original in 647 + let unicode = Punycode_idna.to_unicode ascii in 648 + check string "idna roundtrip" original unicode 649 + with Punycode_idna.Error e -> 650 + fail (Format.asprintf "roundtrip failed: %a" Punycode_idna.pp_error_reason e) 649 651 650 652 let test_idna_all_ascii () = 651 - match Punycode_idna.to_ascii "www.example.com" with 652 - | Ok result -> check string "all ascii passthrough" "www.example.com" result 653 - | Error e -> 654 - fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e) 653 + try 654 + let result = Punycode_idna.to_ascii "www.example.com" in 655 + check string "all ascii passthrough" "www.example.com" result 656 + with Punycode_idna.Error e -> 657 + fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error_reason e) 655 658 656 659 let test_idna_mixed_labels () = 657 - match Punycode_idna.to_ascii "日本語.example.com" with 658 - | Ok result -> 659 - (* Check that result starts with xn-- and ends with .example.com *) 660 - check bool "has ace prefix" true (Punycode.has_ace_prefix result); 661 - check bool "ends with example.com" true 662 - (String.length result > 12 663 - && String.sub result (String.length result - 12) 12 = ".example.com") 664 - | Error e -> 665 - fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error e) 660 + try 661 + let result = Punycode_idna.to_ascii "日本語.example.com" in 662 + (* Check that result starts with xn-- and ends with .example.com *) 663 + check bool "has ace prefix" true (Punycode.has_ace_prefix result); 664 + check bool "ends with example.com" true 665 + (String.length result > 12 666 + && String.sub result (String.length result - 12) 12 = ".example.com") 667 + with Punycode_idna.Error e -> 668 + fail (Format.asprintf "to_ascii failed: %a" Punycode_idna.pp_error_reason e) 666 669 667 670 (* Case annotation tests *) 668 671 let test_case_annotation_decode () = 669 672 (* RFC example: uppercase letters indicate case flags *) 670 - match Punycode.decode_with_case "MajiKoi5-783gue6qz075azm5e" with 671 - | Ok (codepoints, case_flags) -> 672 - check int "codepoints length" 673 - (List.length example_p_codepoints) 674 - (Array.length codepoints); 675 - check int "case_flags length" (Array.length codepoints) 676 - (Array.length case_flags); 677 - (* M should be uppercase *) 678 - check bool "M uppercase" true (case_flags.(0) = Punycode.Uppercase); 679 - (* a should be lowercase *) 680 - check bool "a lowercase" true (case_flags.(1) = Punycode.Lowercase) 681 - | Error e -> 682 - fail (Format.asprintf "decode_with_case failed: %a" Punycode.pp_error e) 673 + try 674 + let codepoints, case_flags = 675 + Punycode.decode_with_case "MajiKoi5-783gue6qz075azm5e" 676 + in 677 + check int "codepoints length" 678 + (List.length example_p_codepoints) 679 + (Array.length codepoints); 680 + check int "case_flags length" (Array.length codepoints) 681 + (Array.length case_flags); 682 + (* M should be uppercase *) 683 + check bool "M uppercase" true (case_flags.(0) = Punycode.Uppercase); 684 + (* a should be lowercase *) 685 + check bool "a lowercase" true (case_flags.(1) = Punycode.Lowercase) 686 + with Punycode.Error e -> 687 + fail (Format.asprintf "decode_with_case failed: %a" Punycode.pp_error_reason e) 683 688 684 689 let test_case_annotation_encode () = 685 690 let codepoints = codepoints_of_hex_list [ 0x0061; 0x0062; 0x0063 ] in ··· 687 692 let case_flags = 688 693 [| Punycode.Uppercase; Punycode.Lowercase; Punycode.Uppercase |] 689 694 in 690 - match Punycode.encode_with_case codepoints case_flags with 691 - | Ok result -> 692 - (* Should encode as "AbC-" (basic code points with case annotation) *) 693 - check string "case encoded" "AbC-" result 694 - | Error e -> 695 - fail (Format.asprintf "encode_with_case failed: %a" Punycode.pp_error e) 695 + try 696 + let result = Punycode.encode_with_case codepoints case_flags in 697 + (* Should encode as "AbC-" (basic code points with case annotation) *) 698 + check string "case encoded" "AbC-" result 699 + with Punycode.Error e -> 700 + fail (Format.asprintf "encode_with_case failed: %a" Punycode.pp_error_reason e) 696 701 697 702 (* Edge case tests *) 698 703 let test_empty_input () = 699 - match Punycode.encode [||] with 700 - | Ok result -> check string "empty encode" "" result 701 - | Error _ -> fail "empty encode should succeed" 704 + try 705 + let result = Punycode.encode [||] in 706 + check string "empty encode" "" result 707 + with Punycode.Error _ -> fail "empty encode should succeed" 702 708 703 709 let test_empty_decode () = 704 - match Punycode.decode "" with 705 - | Ok result -> check int "empty decode length" 0 (Array.length result) 706 - | Error _ -> fail "empty decode should succeed" 710 + try 711 + let result = Punycode.decode "" in 712 + check int "empty decode length" 0 (Array.length result) 713 + with Punycode.Error _ -> fail "empty decode should succeed" 707 714 708 715 let test_pure_ascii () = 709 716 let input = codepoints_of_string "hello" in 710 - match Punycode.encode input with 711 - | Ok result -> check string "pure ascii" "hello-" result 712 - | Error e -> fail (Format.asprintf "encode failed: %a" Punycode.pp_error e) 717 + try 718 + let result = Punycode.encode input in 719 + check string "pure ascii" "hello-" result 720 + with Punycode.Error e -> 721 + fail (Format.asprintf "encode failed: %a" Punycode.pp_error_reason e) 713 722 714 723 let test_invalid_digit () = 715 - match Punycode.decode "hello!" with 716 - | Ok _ -> fail "should fail on invalid digit" 717 - | Error (Punycode.Invalid_digit _) -> () 718 - | Error e -> fail (Format.asprintf "wrong error type: %a" Punycode.pp_error e) 724 + try 725 + ignore (Punycode.decode "hello!"); 726 + fail "should fail on invalid digit" 727 + with 728 + | Punycode.Error (Punycode.Invalid_digit _) -> () 729 + | Punycode.Error e -> 730 + fail (Format.asprintf "wrong error type: %a" Punycode.pp_error_reason e) 719 731 720 732 let test_label_too_long () = 721 733 let long_label = String.make 100 'a' in 722 - match Punycode.encode_label long_label with 723 - | Ok _ -> fail "should fail on long label" 724 - | Error (Punycode.Label_too_long _) -> () 725 - | Error e -> fail (Format.asprintf "wrong error type: %a" Punycode.pp_error e) 734 + try 735 + ignore (Punycode.encode_label long_label); 736 + fail "should fail on long label" 737 + with 738 + | Punycode.Error (Punycode.Label_too_long _) -> () 739 + | Punycode.Error e -> 740 + fail (Format.asprintf "wrong error type: %a" Punycode.pp_error_reason e) 726 741 727 742 let test_empty_label () = 728 - match Punycode.encode_label "" with 729 - | Ok _ -> fail "should fail on empty label" 730 - | Error Punycode.Empty_label -> () 731 - | Error e -> fail (Format.asprintf "wrong error type: %a" Punycode.pp_error e) 743 + try 744 + ignore (Punycode.encode_label ""); 745 + fail "should fail on empty label" 746 + with 747 + | Punycode.Error Punycode.Empty_label -> () 748 + | Punycode.Error e -> 749 + fail (Format.asprintf "wrong error type: %a" Punycode.pp_error_reason e) 732 750 733 751 (* Validation tests *) 734 752 let test_is_basic () =