Punycode (RFC3492) in OCaml

metadata

+247 -23
+17 -1
.gitignore
··· 1 - _build
··· 1 + # OCaml build artifacts 2 + _build/ 3 + *.install 4 + *.merlin 5 + 6 + # Third-party sources (fetch locally with opam source) 7 + third_party/ 8 + 9 + # Editor and OS files 10 + .DS_Store 11 + *.swp 12 + *~ 13 + .vscode/ 14 + .idea/ 15 + 16 + # Opam local switch 17 + _opam/
+1
.ocamlformat
···
··· 1 + version=0.28.1
+53
.tangled/workflows/build.yml
···
··· 1 + when: 2 + - event: ["push", "pull_request"] 3 + branch: ["main"] 4 + 5 + engine: nixery 6 + 7 + dependencies: 8 + nixpkgs: 9 + - shell 10 + - stdenv 11 + - findutils 12 + - binutils 13 + - libunwind 14 + - ncurses 15 + - opam 16 + - git 17 + - gawk 18 + - gnupatch 19 + - gnum4 20 + - gnumake 21 + - gnutar 22 + - gnused 23 + - gnugrep 24 + - diffutils 25 + - gzip 26 + - bzip2 27 + - gcc 28 + - ocaml 29 + - pkg-config 30 + 31 + steps: 32 + - name: opam 33 + command: | 34 + opam init --disable-sandboxing -a -y 35 + - name: repo 36 + command: | 37 + opam repo add aoah https://tangled.org/anil.recoil.org/aoah-opam-repo.git 38 + - name: switch 39 + command: | 40 + opam install . --confirm-level=unsafe-yes --deps-only 41 + - name: build 42 + command: | 43 + opam exec -- dune build -p punycode 44 + - name: switch-test 45 + command: | 46 + opam install . --confirm-level=unsafe-yes --deps-only --with-test 47 + - name: test 48 + command: | 49 + opam exec -- dune runtest --verbose 50 + - name: doc 51 + command: | 52 + opam install -y odoc 53 + opam exec -- dune build @doc
+15
LICENSE.md
···
··· 1 + ISC License 2 + 3 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org> 4 + 5 + Permission to use, copy, modify, and distribute this software for any 6 + purpose with or without fee is hereby granted, provided that the above 7 + copyright notice and this permission notice appear in all copies. 8 + 9 + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+102
README.md
···
··· 1 + # puny - RFC 3492 Punycode and IDNA for OCaml 2 + 3 + High-quality implementation of RFC 3492 (Punycode) with IDNA (Internationalized Domain Names in Applications) support for OCaml. Enables encoding and decoding of internationalized domain names with proper Unicode normalization. 4 + 5 + ## Key Features 6 + 7 + - **RFC 3492 Punycode**: Complete implementation of the Bootstring algorithm for encoding Unicode in ASCII-compatible form 8 + - **IDNA Support**: ToASCII and ToUnicode operations per RFC 5891 (IDNA 2008) for internationalized domain names 9 + - **Unicode Normalization**: Automatic NFC normalization using `uunf` for proper IDNA compliance 10 + - **Mixed-Case Annotation**: Optional case preservation through Punycode encoding round-trips 11 + - **Domain Integration**: Native support for the `domain-name` library 12 + - **Comprehensive Error Handling**: Detailed position tracking and RFC-compliant error reporting 13 + 14 + ## Usage 15 + 16 + ### Basic Punycode Encoding/Decoding 17 + 18 + ```ocaml 19 + (* Encode a UTF-8 string to Punycode *) 20 + let encoded = Punycode.encode_utf8 "münchen" 21 + (* = Ok "mnchen-3ya" *) 22 + 23 + (* Decode Punycode back to UTF-8 *) 24 + let decoded = Punycode.decode_utf8 "mnchen-3ya" 25 + (* = Ok "münchen" *) 26 + ``` 27 + 28 + ### Domain Label Operations 29 + 30 + ```ocaml 31 + (* Encode a domain label with ACE prefix *) 32 + let label = Punycode.encode_label "münchen" 33 + (* = Ok "xn--mnchen-3ya" *) 34 + 35 + (* Decode an ACE-prefixed label *) 36 + let original = Punycode.decode_label "xn--mnchen-3ya" 37 + (* = Ok "münchen" *) 38 + ``` 39 + 40 + ### IDNA Domain Name Conversion 41 + 42 + ```ocaml 43 + (* Convert internationalized domain to ASCII for DNS lookup *) 44 + let ascii_domain = Punycode_idna.to_ascii "münchen.example.com" 45 + (* = Ok "xn--mnchen-3ya.example.com" *) 46 + 47 + (* Convert ASCII domain back to Unicode for display *) 48 + let unicode_domain = Punycode_idna.to_unicode "xn--mnchen-3ya.example.com" 49 + (* = Ok "münchen.example.com" *) 50 + ``` 51 + 52 + ### Working with Unicode Code Points 53 + 54 + ```ocaml 55 + (* Encode an array of Unicode code points *) 56 + let codepoints = [| Uchar.of_int 0x4ED6; Uchar.of_int 0x4EEC |] 57 + let encoded = Punycode.encode codepoints 58 + (* Result is Punycode string *) 59 + 60 + (* Decode to code points *) 61 + let decoded = Punycode.decode "ihqwcrb4cv8a8dqg056pqjye" 62 + (* Result is Uchar.t array *) 63 + ``` 64 + 65 + ### Integration with domain-name Library 66 + 67 + ```ocaml 68 + (* Convert a Domain_name.t to ASCII *) 69 + let domain = Domain_name.of_string_exn "münchen.example.com" in 70 + let ascii = Punycode_idna.domain_to_ascii domain 71 + (* = Ok (Domain_name for "xn--mnchen-3ya.example.com") *) 72 + 73 + (* Convert back to Unicode *) 74 + let unicode = Punycode_idna.domain_to_unicode ascii 75 + (* = Ok (original domain) *) 76 + ``` 77 + 78 + ## Installation 79 + 80 + ``` 81 + opam install puny 82 + ``` 83 + 84 + ## Documentation 85 + 86 + API documentation is available at https://tangled.org/@anil.recoil.org/ocaml-punycode or via: 87 + 88 + ``` 89 + opam install puny 90 + odig doc puny 91 + ``` 92 + 93 + ## References 94 + 95 + - [RFC 3492](https://datatracker.ietf.org/doc/html/rfc3492) - Punycode: A Bootstring encoding of Unicode for IDNA 96 + - [RFC 5891](https://datatracker.ietf.org/doc/html/rfc5891) - Internationalized Domain Names in Applications (IDNA): Protocol 97 + - [RFC 5892](https://datatracker.ietf.org/doc/html/rfc5892) - Unicode Code Points and IDNA 98 + - [RFC 1035](https://datatracker.ietf.org/doc/html/rfc1035) - Domain Names Implementation and Specification 99 + 100 + ## License 101 + 102 + ISC
+4
dune
···
··· 1 + ; Root dune file 2 + 3 + ; Ignore third_party directory (for fetched dependency sources) 4 + (data_only_dirs third_party)
+10 -7
dune-project
··· 1 - (lang dune 3.0) 2 - (name puny) 3 - (version 0.1.0) 4 5 (generate_opam_files true) 6 7 - (source (github username/puny)) 8 (license ISC) 9 - (authors "Author Name") 10 - (maintainers "maintainer@example.com") 11 12 (package 13 - (name puny) 14 (synopsis "RFC 3492 Punycode and IDNA implementation for OCaml") 15 (description 16 "A high-quality implementation of RFC 3492 (Punycode) with IDNA support. ··· 22 (uutf (>= 1.0.0)) 23 (uunf (>= 15.0.0)) 24 (domain-name (>= 0.4.0)) 25 (alcotest :with-test)))
··· 1 + (lang dune 3.20) 2 + 3 + (name punycode) 4 5 (generate_opam_files true) 6 7 (license ISC) 8 + (authors "Anil Madhavapeddy") 9 + (homepage "https://tangled.org/@anil.recoil.org/ocaml-punycode") 10 + (maintainers "Anil Madhavapeddy <anil@recoil.org>") 11 + (bug_reports "https://tangled.org/@anil.recoil.org/ocaml-punycode/issues") 12 + (maintenance_intent "(latest)") 13 14 (package 15 + (name punycode) 16 (synopsis "RFC 3492 Punycode and IDNA implementation for OCaml") 17 (description 18 "A high-quality implementation of RFC 3492 (Punycode) with IDNA support. ··· 24 (uutf (>= 1.0.0)) 25 (uunf (>= 15.0.0)) 26 (domain-name (>= 0.4.0)) 27 + (odoc :with-doc) 28 (alcotest :with-test)))
+10 -4
lib/dune
··· 1 (library 2 - (name puny) 3 - (public_name puny) 4 - (modules punycode punycode_idna) 5 - (libraries uutf uunf uunf.string domain-name))
··· 1 (library 2 + (name punycode) 3 + (public_name punycode) 4 + (modules punycode) 5 + (libraries uutf)) 6 + 7 + (library 8 + (name punycode_idna) 9 + (public_name punycode.idna) 10 + (modules punycode_idna) 11 + (libraries punycode uunf domain-name))
+5
lib/punycode.ml
··· 1 (* RFC 3492 Punycode Implementation *) 2 3 (* {1 Bootstring Parameters for Punycode (RFC 3492 Section 5)} *)
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 (* RFC 3492 Punycode Implementation *) 7 8 (* {1 Bootstring Parameters for Punycode (RFC 3492 Section 5)} *)
+5
lib/punycode.mli
··· 1 (** RFC 3492 Punycode: A Bootstring encoding of Unicode for IDNA. 2 3 This module implements the Punycode algorithm as specified in
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 (** RFC 3492 Punycode: A Bootstring encoding of Unicode for IDNA. 7 8 This module implements the Punycode algorithm as specified in
+5
lib/punycode_idna.ml
··· 1 (* IDNA (Internationalized Domain Names in Applications) Implementation *) 2 3 let max_domain_length = 253
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 (* IDNA (Internationalized Domain Names in Applications) Implementation *) 7 8 let max_domain_length = 253
+5
lib/punycode_idna.mli
··· 1 (** IDNA (Internationalized Domain Names in Applications) support. 2 3 This module provides ToASCII and ToUnicode operations as specified
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 (** IDNA (Internationalized Domain Names in Applications) support. 7 8 This module provides ToASCII and ToUnicode operations as specified
+7 -8
puny.opam punycode.opam
··· 1 # This file is generated by dune, edit dune-project instead 2 opam-version: "2.0" 3 - version: "0.1.0" 4 synopsis: "RFC 3492 Punycode and IDNA implementation for OCaml" 5 description: """ 6 A high-quality implementation of RFC 3492 (Punycode) with IDNA support. 7 Provides encoding and decoding of internationalized domain names, 8 with proper Unicode normalization and mixed-case annotation support.""" 9 - maintainer: ["maintainer@example.com"] 10 - authors: ["Author Name"] 11 license: "ISC" 12 - homepage: "https://github.com/username/puny" 13 - bug-reports: "https://github.com/username/puny/issues" 14 depends: [ 15 "ocaml" {>= "4.14.0"} 16 - "dune" {>= "3.0" & >= "3.0"} 17 "uutf" {>= "1.0.0"} 18 "uunf" {>= "15.0.0"} 19 "domain-name" {>= "0.4.0"} 20 - "alcotest" {with-test} 21 "odoc" {with-doc} 22 ] 23 build: [ 24 ["dune" "subst"] {dev} ··· 34 "@doc" {with-doc} 35 ] 36 ] 37 - dev-repo: "git+https://github.com/username/puny.git"
··· 1 # This file is generated by dune, edit dune-project instead 2 opam-version: "2.0" 3 synopsis: "RFC 3492 Punycode and IDNA implementation for OCaml" 4 description: """ 5 A high-quality implementation of RFC 3492 (Punycode) with IDNA support. 6 Provides encoding and decoding of internationalized domain names, 7 with proper Unicode normalization and mixed-case annotation support.""" 8 + maintainer: ["Anil Madhavapeddy <anil@recoil.org>"] 9 + authors: ["Anil Madhavapeddy"] 10 license: "ISC" 11 + homepage: "https://tangled.org/@anil.recoil.org/ocaml-punycode" 12 + bug-reports: "https://tangled.org/@anil.recoil.org/ocaml-punycode/issues" 13 depends: [ 14 "ocaml" {>= "4.14.0"} 15 + "dune" {>= "3.20" & >= "3.0"} 16 "uutf" {>= "1.0.0"} 17 "uunf" {>= "15.0.0"} 18 "domain-name" {>= "0.4.0"} 19 "odoc" {with-doc} 20 + "alcotest" {with-test} 21 ] 22 build: [ 23 ["dune" "subst"] {dev} ··· 33 "@doc" {with-doc} 34 ] 35 ] 36 + x-maintenance-intent: ["(latest)"]
+1 -1
test/dune
··· 1 (test 2 (name test_punycode) 3 - (libraries puny alcotest) 4 (modules test_punycode))
··· 1 (test 2 (name test_punycode) 3 + (libraries punycode punycode.idna alcotest) 4 (modules test_punycode))
+7 -2
test/test_punycode.ml
··· 1 (* Comprehensive tests for Punycode (RFC 3492) implementation *) 2 3 open Alcotest 4 - module Punycode = Puny.Punycode 5 - module Punycode_idna = Puny.Punycode_idna 6 7 (* Helper to convert hex code points to Uchar array *) 8 let codepoints_of_hex_list hex_list =
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 (* Comprehensive tests for Punycode (RFC 3492) implementation *) 7 8 open Alcotest 9 + module Punycode = Punycode 10 + module Punycode_idna = Punycode_idna 11 12 (* Helper to convert hex code points to Uchar array *) 13 let codepoints_of_hex_list hex_list =