···61 | Empty_label ->
62 Format.fprintf fmt "empty label"
630000000006465(* {1 Case Flags} *)
66···7071let is_basic u =
72 Uchar.to_int u < 0x80
73-74-75-let is_delimiter c = c = delimiter
7677let is_ascii_string s =
78- let rec loop i =
79- if i >= String.length s then true
80- else if Char.code s.[i] >= 0x80 then false
81- else loop (i + 1)
82- in
83- loop 0
8485let has_ace_prefix s =
86 let len = String.length s in
···144let safe_mul_add a b c pos =
145 if c = 0 then Ok a
146 else if b > (max_int_value - a) / c then
147- Error (Overflow pos)
148 else
149 Ok (a + b * c)
150···228229 while !h < input_length && !result = Ok () do
230 (* Find minimum code point >= n *)
231- let m = ref max_int_value in
232- for j = 0 to input_length - 1 do
233- let cp = Uchar.to_int codepoints.(j) in
234- if cp >= !n && cp < !m then
235- m := cp
236- done;
237238 (* Increase delta to advance state to <m, 0> *)
239 let pos = { byte_offset = 0; char_index = !h } in
240- (match safe_mul_add !delta (!m - !n) (!h + 1) pos with
241 | Error e -> result := Error e
242 | Ok new_delta ->
243 delta := new_delta;
244- n := !m;
245246 (* Process each code point *)
247 let j = ref 0 in
···252 if cp < !n then begin
253 incr delta;
254 if !delta = 0 then (* Overflow *)
255- result := Error (Overflow pos)
256 end
257 else if cp = !n then begin
258 (* Encode delta as variable-length integer *)
···316 Ok ([||], [||])
317 else begin
318 (* Find last delimiter *)
319- let last_delim = ref (-1) in
320- for j = 0 to input_length - 1 do
321- if is_delimiter input.[j] then
322- last_delim := j
323- done;
324- let b = if !last_delim < 0 then 0 else !last_delim in
325326 (* Copy basic code points and extract case flags *)
327 let output = ref [] in
···365 let pos = { byte_offset = !in_pos; char_index = Array.length !output } in
366367 if !in_pos >= input_length then begin
368- result := Error (Unexpected_end pos);
369 done_decoding := true
370 end else begin
371 let c = input.[!in_pos] in
···373374 match decode_digit c with
375 | None ->
376- result := Error (Invalid_digit (pos, c));
377 done_decoding := true
378 | Some digit ->
379 (* i = i + digit * w, with overflow check *)
···397 (* w = w * (base - t), with overflow check *)
398 let base_minus_t = base - t in
399 if !w > max_int_value / base_minus_t then begin
400- result := Error (Overflow pos);
401 done_decoding := true
402 end else begin
403 w := !w * base_minus_t;
···416 (* n = n + i / (out_len + 1), with overflow check *)
417 let increment = !i / (out_len + 1) in
418 if increment > max_int_value - !n then
419- result := Error (Overflow pos)
420 else begin
421 n := !n + increment;
422 i := !i mod (out_len + 1);
423424 (* Validate that n is a valid Unicode scalar value *)
425 if not (Uchar.is_valid !n) then
426- result := Error (Invalid_character (pos, Uchar.rep))
427 else begin
428 (* Insert n at position i *)
429 let new_output = Array.make (out_len + 1) (Uchar.of_int 0) in
···456 end
457458let decode input =
459- match decode_impl input with
460- | Error e -> Error e
461- | Ok (codepoints, _) -> Ok codepoints
462463let decode_with_case input =
464 decode_impl input
···466(* {1 UTF-8 String Operations} *)
467468let encode_utf8 s =
469- match utf8_to_codepoints s with
470- | Error e -> Error e
471- | Ok codepoints -> encode codepoints
472473let decode_utf8 punycode =
474- match decode punycode with
475- | Error e -> Error e
476- | Ok codepoints -> Ok (codepoints_to_utf8 codepoints)
477478(* {1 Domain Label Operations} *)
479480let encode_label label =
481 if String.length label = 0 then
482- Error Empty_label
483 else if is_ascii_string label then begin
484 (* All ASCII - return as-is, but check length *)
485 let len = String.length label in
486 if len > max_label_length then
487- Error (Label_too_long len)
488 else
489 Ok label
490- end else begin
491 (* Has non-ASCII - encode with Punycode *)
492- match encode_utf8 label with
493- | Error e -> Error e
494- | Ok encoded ->
495- let result = ace_prefix ^ encoded in
496- let len = String.length result in
497- if len > max_label_length then
498- Error (Label_too_long len)
499- else
500- Ok result
501- end
502503let decode_label label =
504 if String.length label = 0 then
505- Error Empty_label
506 else if has_ace_prefix label then begin
507 (* Remove ACE prefix and decode *)
508 let punycode = String.sub label 4 (String.length label - 4) in
···61 | Empty_label ->
62 Format.fprintf fmt "empty label"
6364+(* {1 Error Constructors} *)
65+66+let overflow pos = Error (Overflow pos)
67+let invalid_character pos u = Error (Invalid_character (pos, u))
68+let invalid_digit pos c = Error (Invalid_digit (pos, c))
69+let unexpected_end pos = Error (Unexpected_end pos)
70+let _invalid_utf8 pos = Error (Invalid_utf8 pos)
71+let label_too_long len = Error (Label_too_long len)
72+let empty_label = Error Empty_label
7374(* {1 Case Flags} *)
75···7980let is_basic u =
81 Uchar.to_int u < 0x80
0008283let is_ascii_string s =
84+ String.for_all (fun c -> Char.code c < 0x80) s
000008586let has_ace_prefix s =
87 let len = String.length s in
···145let safe_mul_add a b c pos =
146 if c = 0 then Ok a
147 else if b > (max_int_value - a) / c then
148+ overflow pos
149 else
150 Ok (a + b * c)
151···229230 while !h < input_length && !result = Ok () do
231 (* Find minimum code point >= n *)
232+ let m = Array.fold_left (fun acc cp ->
233+ let cp_val = Uchar.to_int cp in
234+ if cp_val >= !n && cp_val < acc then cp_val else acc
235+ ) max_int_value codepoints in
00236237 (* Increase delta to advance state to <m, 0> *)
238 let pos = { byte_offset = 0; char_index = !h } in
239+ (match safe_mul_add !delta (m - !n) (!h + 1) pos with
240 | Error e -> result := Error e
241 | Ok new_delta ->
242 delta := new_delta;
243+ n := m;
244245 (* Process each code point *)
246 let j = ref 0 in
···251 if cp < !n then begin
252 incr delta;
253 if !delta = 0 then (* Overflow *)
254+ result := overflow pos
255 end
256 else if cp = !n then begin
257 (* Encode delta as variable-length integer *)
···315 Ok ([||], [||])
316 else begin
317 (* Find last delimiter *)
318+ let b = Option.value ~default:0 (String.rindex_opt input delimiter) in
00000319320 (* Copy basic code points and extract case flags *)
321 let output = ref [] in
···359 let pos = { byte_offset = !in_pos; char_index = Array.length !output } in
360361 if !in_pos >= input_length then begin
362+ result := unexpected_end pos;
363 done_decoding := true
364 end else begin
365 let c = input.[!in_pos] in
···367368 match decode_digit c with
369 | None ->
370+ result := invalid_digit pos c;
371 done_decoding := true
372 | Some digit ->
373 (* i = i + digit * w, with overflow check *)
···391 (* w = w * (base - t), with overflow check *)
392 let base_minus_t = base - t in
393 if !w > max_int_value / base_minus_t then begin
394+ result := overflow pos;
395 done_decoding := true
396 end else begin
397 w := !w * base_minus_t;
···410 (* n = n + i / (out_len + 1), with overflow check *)
411 let increment = !i / (out_len + 1) in
412 if increment > max_int_value - !n then
413+ result := overflow pos
414 else begin
415 n := !n + increment;
416 i := !i mod (out_len + 1);
417418 (* Validate that n is a valid Unicode scalar value *)
419 if not (Uchar.is_valid !n) then
420+ result := invalid_character pos Uchar.rep
421 else begin
422 (* Insert n at position i *)
423 let new_output = Array.make (out_len + 1) (Uchar.of_int 0) in
···450 end
451452let decode input =
453+ Result.map fst (decode_impl input)
00454455let decode_with_case input =
456 decode_impl input
···458(* {1 UTF-8 String Operations} *)
459460let encode_utf8 s =
461+ let open Result.Syntax in
462+ let* codepoints = utf8_to_codepoints s in
463+ encode codepoints
464465let decode_utf8 punycode =
466+ let open Result.Syntax in
467+ let+ codepoints = decode punycode in
468+ codepoints_to_utf8 codepoints
469470(* {1 Domain Label Operations} *)
471472let encode_label label =
473 if String.length label = 0 then
474+ empty_label
475 else if is_ascii_string label then begin
476 (* All ASCII - return as-is, but check length *)
477 let len = String.length label in
478 if len > max_label_length then
479+ label_too_long len
480 else
481 Ok label
482+ end else
483 (* Has non-ASCII - encode with Punycode *)
484+ let open Result.Syntax in
485+ let* encoded = encode_utf8 label in
486+ let result = ace_prefix ^ encoded in
487+ let len = String.length result in
488+ if len > max_label_length then
489+ label_too_long len
490+ else
491+ Ok result
00492493let decode_label label =
494 if String.length label = 0 then
495+ empty_label
496 else if has_ace_prefix label then begin
497 (* Remove ACE prefix and decode *)
498 let punycode = String.sub label 4 (String.length label - 4) in
+58-63
lib/punycode_idna.ml
···28 | Verification_failed ->
29 Format.fprintf fmt "IDNA verification failed (round-trip mismatch)"
3000000003132(* {1 Unicode Normalization} *)
33···44 - Cannot start or end with hyphen *)
45let is_std3_valid label =
46 let len = String.length label in
47- if len = 0 then false
48- else if label.[0] = '-' || label.[len - 1] = '-' then false
49- else
50- let rec check i =
51- if i >= len then true
52- else
53- let c = label.[i] in
54- let valid =
55- (c >= 'a' && c <= 'z') ||
56- (c >= 'A' && c <= 'Z') ||
57- (c >= '0' && c <= '9') ||
58- c = '-'
59- in
60- if valid then check (i + 1) else false
61- in
62- check 0
6364(* Check hyphen placement: hyphens not in positions 3 and 4 (except for ACE) *)
65let check_hyphen_rules label =
···75let label_to_ascii_impl ~check_hyphens ~use_std3_rules label =
76 let len = String.length label in
77 if len = 0 then
78- Error (Invalid_label "empty label")
79 else if len > Punycode.max_label_length then
80- Error (Punycode_error (Punycode.Label_too_long len))
81 else if Punycode.is_ascii_string label then begin
82 (* All ASCII - validate and pass through *)
83 if use_std3_rules && not (is_std3_valid label) then
84- Error (Invalid_label "STD3 rules violation")
85 else if check_hyphens && not (check_hyphen_rules label) then
86- Error (Invalid_label "invalid hyphen placement")
87 else
88 Ok label
89 end else begin
···9293 (* Encode to Punycode *)
94 match Punycode.encode_utf8 normalized with
95- | Error e -> Error (Punycode_error e)
96 | Ok encoded ->
97 let result = Punycode.ace_prefix ^ encoded in
98 let result_len = String.length result in
99 if result_len > Punycode.max_label_length then
100- Error (Punycode_error (Punycode.Label_too_long result_len))
101 else if check_hyphens && not (check_hyphen_rules result) then
102- Error (Invalid_label "invalid hyphen placement in encoded label")
103 else
104 (* Verification: decode and compare to original normalized form *)
105 match Punycode.decode_utf8 encoded with
106- | Error _ -> Error Verification_failed
107 | Ok decoded ->
108 if decoded <> normalized then
109- Error Verification_failed
110 else
111 Ok result
112 end
···118 if is_ace_label label then begin
119 let encoded = String.sub label 4 (String.length label - 4) in
120 match Punycode.decode_utf8 encoded with
121- | Error e -> Error (Punycode_error e)
122 | Ok decoded -> Ok decoded
123 end else
124 Ok label
···133let join_labels labels =
134 String.concat "." labels
135000000000136let to_ascii ?(check_hyphens = true) ?(check_bidi = false)
137 ?(check_joiners = false) ?(use_std3_rules = false)
138 ?(transitional = false) domain =
···142 let _ = check_joiners in
143 let _ = transitional in
1440145 let labels = split_domain domain in
146- let rec process acc = function
147- | [] ->
148- let result = join_labels (List.rev acc) in
149- let len = String.length result in
150- if len > max_domain_length then
151- Error (Domain_too_long len)
152- else
153- Ok result
154- | label :: rest ->
155- match label_to_ascii_impl ~check_hyphens ~use_std3_rules label with
156- | Error e -> Error e
157- | Ok encoded -> process (encoded :: acc) rest
158- in
159- process [] labels
160161let to_unicode domain =
0162 let labels = split_domain domain in
163- let rec process acc = function
164- | [] -> Ok (join_labels (List.rev acc))
165- | label :: rest ->
166- match label_to_unicode label with
167- | Error e -> Error e
168- | Ok decoded -> process (decoded :: acc) rest
169- in
170- process [] labels
171172(* {1 Domain Name Library Integration} *)
173174let domain_to_ascii ?(check_hyphens = true) ?(use_std3_rules = false) domain =
0175 let s = Domain_name.to_string domain in
176- match to_ascii ~check_hyphens ~use_std3_rules s with
177- | Error e -> Error e
178- | Ok ascii ->
179- match Domain_name.of_string ascii with
180- | Error (`Msg msg) -> Error (Invalid_label msg)
181- | Ok d -> Ok d
182183let domain_to_unicode domain =
0184 let s = Domain_name.to_string domain in
185- match to_unicode s with
186- | Error e -> Error e
187- | Ok unicode ->
188- match Domain_name.of_string unicode with
189- | Error (`Msg msg) -> Error (Invalid_label msg)
190- | Ok d -> Ok d
191192(* {1 Validation} *)
193194let is_idna_valid domain =
195- match to_ascii domain with
196- | Ok _ -> true
197- | Error _ -> false
···28 | Verification_failed ->
29 Format.fprintf fmt "IDNA verification failed (round-trip mismatch)"
3031+(* {1 Error Constructors} *)
32+33+let punycode_error e = Error (Punycode_error e)
34+let invalid_label msg = Error (Invalid_label msg)
35+let domain_too_long len = Error (Domain_too_long len)
36+let _normalization_failed = Error Normalization_failed
37+let verification_failed = Error Verification_failed
3839(* {1 Unicode Normalization} *)
40···51 - Cannot start or end with hyphen *)
52let is_std3_valid label =
53 let len = String.length label in
54+ let is_ldh c =
55+ (c >= 'a' && c <= 'z') ||
56+ (c >= 'A' && c <= 'Z') ||
57+ (c >= '0' && c <= '9') ||
58+ c = '-'
59+ in
60+ len > 0 &&
61+ label.[0] <> '-' &&
62+ label.[len - 1] <> '-' &&
63+ String.for_all is_ldh label
0000006465(* Check hyphen placement: hyphens not in positions 3 and 4 (except for ACE) *)
66let check_hyphen_rules label =
···76let label_to_ascii_impl ~check_hyphens ~use_std3_rules label =
77 let len = String.length label in
78 if len = 0 then
79+ invalid_label "empty label"
80 else if len > Punycode.max_label_length then
81+ punycode_error (Punycode.Label_too_long len)
82 else if Punycode.is_ascii_string label then begin
83 (* All ASCII - validate and pass through *)
84 if use_std3_rules && not (is_std3_valid label) then
85+ invalid_label "STD3 rules violation"
86 else if check_hyphens && not (check_hyphen_rules label) then
87+ invalid_label "invalid hyphen placement"
88 else
89 Ok label
90 end else begin
···9394 (* Encode to Punycode *)
95 match Punycode.encode_utf8 normalized with
96+ | Error e -> punycode_error e
97 | Ok encoded ->
98 let result = Punycode.ace_prefix ^ encoded in
99 let result_len = String.length result in
100 if result_len > Punycode.max_label_length then
101+ punycode_error (Punycode.Label_too_long result_len)
102 else if check_hyphens && not (check_hyphen_rules result) then
103+ invalid_label "invalid hyphen placement in encoded label"
104 else
105 (* Verification: decode and compare to original normalized form *)
106 match Punycode.decode_utf8 encoded with
107+ | Error _ -> verification_failed
108 | Ok decoded ->
109 if decoded <> normalized then
110+ verification_failed
111 else
112 Ok result
113 end
···119 if is_ace_label label then begin
120 let encoded = String.sub label 4 (String.length label - 4) in
121 match Punycode.decode_utf8 encoded with
122+ | Error e -> punycode_error e
123 | Ok decoded -> Ok decoded
124 end else
125 Ok label
···134let join_labels labels =
135 String.concat "." labels
136137+(* Map a function returning Result over a list, short-circuiting on first Error *)
138+let map_result f lst =
139+ List.fold_right (fun x acc ->
140+ let open Result.Syntax in
141+ let* y = f x in
142+ let+ ys = acc in
143+ y :: ys
144+ ) lst (Ok [])
145+146let to_ascii ?(check_hyphens = true) ?(check_bidi = false)
147 ?(check_joiners = false) ?(use_std3_rules = false)
148 ?(transitional = false) domain =
···152 let _ = check_joiners in
153 let _ = transitional in
154155+ let open Result.Syntax in
156 let labels = split_domain domain in
157+ let* encoded_labels = map_result (label_to_ascii_impl ~check_hyphens ~use_std3_rules) labels in
158+ let result = join_labels encoded_labels in
159+ let len = String.length result in
160+ if len > max_domain_length then
161+ domain_too_long len
162+ else
163+ Ok result
0000000164165let to_unicode domain =
166+ let open Result.Syntax in
167 let labels = split_domain domain in
168+ let+ decoded_labels = map_result label_to_unicode labels in
169+ join_labels decoded_labels
000000170171(* {1 Domain Name Library Integration} *)
172173let domain_to_ascii ?(check_hyphens = true) ?(use_std3_rules = false) domain =
174+ let open Result.Syntax in
175 let s = Domain_name.to_string domain in
176+ let* ascii = to_ascii ~check_hyphens ~use_std3_rules s in
177+ match Domain_name.of_string ascii with
178+ | Error (`Msg msg) -> invalid_label msg
179+ | Ok d -> Ok d
00180181let domain_to_unicode domain =
182+ let open Result.Syntax in
183 let s = Domain_name.to_string domain in
184+ let* unicode = to_unicode s in
185+ match Domain_name.of_string unicode with
186+ | Error (`Msg msg) -> invalid_label msg
187+ | Ok d -> Ok d
00188189(* {1 Validation} *)
190191let is_idna_valid domain =
192+ Result.is_ok (to_ascii domain)
00