···6161 | Empty_label ->
6262 Format.fprintf fmt "empty label"
63636464+(* {1 Error Constructors} *)
6565+6666+let overflow pos = Error (Overflow pos)
6767+let invalid_character pos u = Error (Invalid_character (pos, u))
6868+let invalid_digit pos c = Error (Invalid_digit (pos, c))
6969+let unexpected_end pos = Error (Unexpected_end pos)
7070+let _invalid_utf8 pos = Error (Invalid_utf8 pos)
7171+let label_too_long len = Error (Label_too_long len)
7272+let empty_label = Error Empty_label
64736574(* {1 Case Flags} *)
6675···70797180let is_basic u =
7281 Uchar.to_int u < 0x80
7373-7474-7575-let is_delimiter c = c = delimiter
76827783let is_ascii_string s =
7878- let rec loop i =
7979- if i >= String.length s then true
8080- else if Char.code s.[i] >= 0x80 then false
8181- else loop (i + 1)
8282- in
8383- loop 0
8484+ String.for_all (fun c -> Char.code c < 0x80) s
84858586let has_ace_prefix s =
8687 let len = String.length s in
···144145let safe_mul_add a b c pos =
145146 if c = 0 then Ok a
146147 else if b > (max_int_value - a) / c then
147147- Error (Overflow pos)
148148+ overflow pos
148149 else
149150 Ok (a + b * c)
150151···228229229230 while !h < input_length && !result = Ok () do
230231 (* Find minimum code point >= n *)
231231- let m = ref max_int_value in
232232- for j = 0 to input_length - 1 do
233233- let cp = Uchar.to_int codepoints.(j) in
234234- if cp >= !n && cp < !m then
235235- m := cp
236236- done;
232232+ let m = Array.fold_left (fun acc cp ->
233233+ let cp_val = Uchar.to_int cp in
234234+ if cp_val >= !n && cp_val < acc then cp_val else acc
235235+ ) max_int_value codepoints in
237236238237 (* Increase delta to advance state to <m, 0> *)
239238 let pos = { byte_offset = 0; char_index = !h } in
240240- (match safe_mul_add !delta (!m - !n) (!h + 1) pos with
239239+ (match safe_mul_add !delta (m - !n) (!h + 1) pos with
241240 | Error e -> result := Error e
242241 | Ok new_delta ->
243242 delta := new_delta;
244244- n := !m;
243243+ n := m;
245244246245 (* Process each code point *)
247246 let j = ref 0 in
···252251 if cp < !n then begin
253252 incr delta;
254253 if !delta = 0 then (* Overflow *)
255255- result := Error (Overflow pos)
254254+ result := overflow pos
256255 end
257256 else if cp = !n then begin
258257 (* Encode delta as variable-length integer *)
···316315 Ok ([||], [||])
317316 else begin
318317 (* Find last delimiter *)
319319- let last_delim = ref (-1) in
320320- for j = 0 to input_length - 1 do
321321- if is_delimiter input.[j] then
322322- last_delim := j
323323- done;
324324- let b = if !last_delim < 0 then 0 else !last_delim in
318318+ let b = Option.value ~default:0 (String.rindex_opt input delimiter) in
325319326320 (* Copy basic code points and extract case flags *)
327321 let output = ref [] in
···365359 let pos = { byte_offset = !in_pos; char_index = Array.length !output } in
366360367361 if !in_pos >= input_length then begin
368368- result := Error (Unexpected_end pos);
362362+ result := unexpected_end pos;
369363 done_decoding := true
370364 end else begin
371365 let c = input.[!in_pos] in
···373367374368 match decode_digit c with
375369 | None ->
376376- result := Error (Invalid_digit (pos, c));
370370+ result := invalid_digit pos c;
377371 done_decoding := true
378372 | Some digit ->
379373 (* i = i + digit * w, with overflow check *)
···397391 (* w = w * (base - t), with overflow check *)
398392 let base_minus_t = base - t in
399393 if !w > max_int_value / base_minus_t then begin
400400- result := Error (Overflow pos);
394394+ result := overflow pos;
401395 done_decoding := true
402396 end else begin
403397 w := !w * base_minus_t;
···416410 (* n = n + i / (out_len + 1), with overflow check *)
417411 let increment = !i / (out_len + 1) in
418412 if increment > max_int_value - !n then
419419- result := Error (Overflow pos)
413413+ result := overflow pos
420414 else begin
421415 n := !n + increment;
422416 i := !i mod (out_len + 1);
423417424418 (* Validate that n is a valid Unicode scalar value *)
425419 if not (Uchar.is_valid !n) then
426426- result := Error (Invalid_character (pos, Uchar.rep))
420420+ result := invalid_character pos Uchar.rep
427421 else begin
428422 (* Insert n at position i *)
429423 let new_output = Array.make (out_len + 1) (Uchar.of_int 0) in
···456450 end
457451458452let decode input =
459459- match decode_impl input with
460460- | Error e -> Error e
461461- | Ok (codepoints, _) -> Ok codepoints
453453+ Result.map fst (decode_impl input)
462454463455let decode_with_case input =
464456 decode_impl input
···466458(* {1 UTF-8 String Operations} *)
467459468460let encode_utf8 s =
469469- match utf8_to_codepoints s with
470470- | Error e -> Error e
471471- | Ok codepoints -> encode codepoints
461461+ let open Result.Syntax in
462462+ let* codepoints = utf8_to_codepoints s in
463463+ encode codepoints
472464473465let decode_utf8 punycode =
474474- match decode punycode with
475475- | Error e -> Error e
476476- | Ok codepoints -> Ok (codepoints_to_utf8 codepoints)
466466+ let open Result.Syntax in
467467+ let+ codepoints = decode punycode in
468468+ codepoints_to_utf8 codepoints
477469478470(* {1 Domain Label Operations} *)
479471480472let encode_label label =
481473 if String.length label = 0 then
482482- Error Empty_label
474474+ empty_label
483475 else if is_ascii_string label then begin
484476 (* All ASCII - return as-is, but check length *)
485477 let len = String.length label in
486478 if len > max_label_length then
487487- Error (Label_too_long len)
479479+ label_too_long len
488480 else
489481 Ok label
490490- end else begin
482482+ end else
491483 (* Has non-ASCII - encode with Punycode *)
492492- match encode_utf8 label with
493493- | Error e -> Error e
494494- | Ok encoded ->
495495- let result = ace_prefix ^ encoded in
496496- let len = String.length result in
497497- if len > max_label_length then
498498- Error (Label_too_long len)
499499- else
500500- Ok result
501501- end
484484+ let open Result.Syntax in
485485+ let* encoded = encode_utf8 label in
486486+ let result = ace_prefix ^ encoded in
487487+ let len = String.length result in
488488+ if len > max_label_length then
489489+ label_too_long len
490490+ else
491491+ Ok result
502492503493let decode_label label =
504494 if String.length label = 0 then
505505- Error Empty_label
495495+ empty_label
506496 else if has_ace_prefix label then begin
507497 (* Remove ACE prefix and decode *)
508498 let punycode = String.sub label 4 (String.length label - 4) in
+58-63
lib/punycode_idna.ml
···2828 | Verification_failed ->
2929 Format.fprintf fmt "IDNA verification failed (round-trip mismatch)"
30303131+(* {1 Error Constructors} *)
3232+3333+let punycode_error e = Error (Punycode_error e)
3434+let invalid_label msg = Error (Invalid_label msg)
3535+let domain_too_long len = Error (Domain_too_long len)
3636+let _normalization_failed = Error Normalization_failed
3737+let verification_failed = Error Verification_failed
31383239(* {1 Unicode Normalization} *)
3340···4451 - Cannot start or end with hyphen *)
4552let is_std3_valid label =
4653 let len = String.length label in
4747- if len = 0 then false
4848- else if label.[0] = '-' || label.[len - 1] = '-' then false
4949- else
5050- let rec check i =
5151- if i >= len then true
5252- else
5353- let c = label.[i] in
5454- let valid =
5555- (c >= 'a' && c <= 'z') ||
5656- (c >= 'A' && c <= 'Z') ||
5757- (c >= '0' && c <= '9') ||
5858- c = '-'
5959- in
6060- if valid then check (i + 1) else false
6161- in
6262- check 0
5454+ let is_ldh c =
5555+ (c >= 'a' && c <= 'z') ||
5656+ (c >= 'A' && c <= 'Z') ||
5757+ (c >= '0' && c <= '9') ||
5858+ c = '-'
5959+ in
6060+ len > 0 &&
6161+ label.[0] <> '-' &&
6262+ label.[len - 1] <> '-' &&
6363+ String.for_all is_ldh label
63646465(* Check hyphen placement: hyphens not in positions 3 and 4 (except for ACE) *)
6566let check_hyphen_rules label =
···7576let label_to_ascii_impl ~check_hyphens ~use_std3_rules label =
7677 let len = String.length label in
7778 if len = 0 then
7878- Error (Invalid_label "empty label")
7979+ invalid_label "empty label"
7980 else if len > Punycode.max_label_length then
8080- Error (Punycode_error (Punycode.Label_too_long len))
8181+ punycode_error (Punycode.Label_too_long len)
8182 else if Punycode.is_ascii_string label then begin
8283 (* All ASCII - validate and pass through *)
8384 if use_std3_rules && not (is_std3_valid label) then
8484- Error (Invalid_label "STD3 rules violation")
8585+ invalid_label "STD3 rules violation"
8586 else if check_hyphens && not (check_hyphen_rules label) then
8686- Error (Invalid_label "invalid hyphen placement")
8787+ invalid_label "invalid hyphen placement"
8788 else
8889 Ok label
8990 end else begin
···92939394 (* Encode to Punycode *)
9495 match Punycode.encode_utf8 normalized with
9595- | Error e -> Error (Punycode_error e)
9696+ | Error e -> punycode_error e
9697 | Ok encoded ->
9798 let result = Punycode.ace_prefix ^ encoded in
9899 let result_len = String.length result in
99100 if result_len > Punycode.max_label_length then
100100- Error (Punycode_error (Punycode.Label_too_long result_len))
101101+ punycode_error (Punycode.Label_too_long result_len)
101102 else if check_hyphens && not (check_hyphen_rules result) then
102102- Error (Invalid_label "invalid hyphen placement in encoded label")
103103+ invalid_label "invalid hyphen placement in encoded label"
103104 else
104105 (* Verification: decode and compare to original normalized form *)
105106 match Punycode.decode_utf8 encoded with
106106- | Error _ -> Error Verification_failed
107107+ | Error _ -> verification_failed
107108 | Ok decoded ->
108109 if decoded <> normalized then
109109- Error Verification_failed
110110+ verification_failed
110111 else
111112 Ok result
112113 end
···118119 if is_ace_label label then begin
119120 let encoded = String.sub label 4 (String.length label - 4) in
120121 match Punycode.decode_utf8 encoded with
121121- | Error e -> Error (Punycode_error e)
122122+ | Error e -> punycode_error e
122123 | Ok decoded -> Ok decoded
123124 end else
124125 Ok label
···133134let join_labels labels =
134135 String.concat "." labels
135136137137+(* Map a function returning Result over a list, short-circuiting on first Error *)
138138+let map_result f lst =
139139+ List.fold_right (fun x acc ->
140140+ let open Result.Syntax in
141141+ let* y = f x in
142142+ let+ ys = acc in
143143+ y :: ys
144144+ ) lst (Ok [])
145145+136146let to_ascii ?(check_hyphens = true) ?(check_bidi = false)
137147 ?(check_joiners = false) ?(use_std3_rules = false)
138148 ?(transitional = false) domain =
···142152 let _ = check_joiners in
143153 let _ = transitional in
144154155155+ let open Result.Syntax in
145156 let labels = split_domain domain in
146146- let rec process acc = function
147147- | [] ->
148148- let result = join_labels (List.rev acc) in
149149- let len = String.length result in
150150- if len > max_domain_length then
151151- Error (Domain_too_long len)
152152- else
153153- Ok result
154154- | label :: rest ->
155155- match label_to_ascii_impl ~check_hyphens ~use_std3_rules label with
156156- | Error e -> Error e
157157- | Ok encoded -> process (encoded :: acc) rest
158158- in
159159- process [] labels
157157+ let* encoded_labels = map_result (label_to_ascii_impl ~check_hyphens ~use_std3_rules) labels in
158158+ let result = join_labels encoded_labels in
159159+ let len = String.length result in
160160+ if len > max_domain_length then
161161+ domain_too_long len
162162+ else
163163+ Ok result
160164161165let to_unicode domain =
166166+ let open Result.Syntax in
162167 let labels = split_domain domain in
163163- let rec process acc = function
164164- | [] -> Ok (join_labels (List.rev acc))
165165- | label :: rest ->
166166- match label_to_unicode label with
167167- | Error e -> Error e
168168- | Ok decoded -> process (decoded :: acc) rest
169169- in
170170- process [] labels
168168+ let+ decoded_labels = map_result label_to_unicode labels in
169169+ join_labels decoded_labels
171170172171(* {1 Domain Name Library Integration} *)
173172174173let domain_to_ascii ?(check_hyphens = true) ?(use_std3_rules = false) domain =
174174+ let open Result.Syntax in
175175 let s = Domain_name.to_string domain in
176176- match to_ascii ~check_hyphens ~use_std3_rules s with
177177- | Error e -> Error e
178178- | Ok ascii ->
179179- match Domain_name.of_string ascii with
180180- | Error (`Msg msg) -> Error (Invalid_label msg)
181181- | Ok d -> Ok d
176176+ let* ascii = to_ascii ~check_hyphens ~use_std3_rules s in
177177+ match Domain_name.of_string ascii with
178178+ | Error (`Msg msg) -> invalid_label msg
179179+ | Ok d -> Ok d
182180183181let domain_to_unicode domain =
182182+ let open Result.Syntax in
184183 let s = Domain_name.to_string domain in
185185- match to_unicode s with
186186- | Error e -> Error e
187187- | Ok unicode ->
188188- match Domain_name.of_string unicode with
189189- | Error (`Msg msg) -> Error (Invalid_label msg)
190190- | Ok d -> Ok d
184184+ let* unicode = to_unicode s in
185185+ match Domain_name.of_string unicode with
186186+ | Error (`Msg msg) -> invalid_label msg
187187+ | Ok d -> Ok d
191188192189(* {1 Validation} *)
193190194191let is_idna_valid domain =
195195- match to_ascii domain with
196196- | Ok _ -> true
197197- | Error _ -> false
192192+ Result.is_ok (to_ascii domain)