···11+MIT License
22+33+Copyright (c) 2024-2025 Front Matter
44+Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>
55+66+Permission is hereby granted, free of charge, to any person obtaining a copy
77+of this software and associated documentation files (the "Software"), to deal
88+in the Software without restriction, including without limitation the rights
99+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1010+copies of the Software, and to permit persons to whom the Software is
1111+furnished to do so, subject to the following conditions:
1212+1313+The above copyright notice and this permission notice shall be included in all
1414+copies or substantial portions of the Software.
1515+1616+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1717+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1818+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1919+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2020+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2121+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2222+SOFTWARE.
+95
README.md
···11+# Crockford Base32 Encoding for OCaml
22+33+An OCaml implementation of [Douglas Crockford's
44+Base32](https://www.crockford.com/base32.html) encoding with ISO 7064 checksum
55+support. Provides encoding and decoding of int64 values to URI-friendly base32
66+strings, with optional checksum validation, padding, splitting, and random ID
77+generation. Ported from <https://github.com/front-matter/commonmeta>.
88+99+## Installation
1010+1111+```bash
1212+opam install crockford
1313+```
1414+1515+Or add to your `dune-project`:
1616+1717+```scheme
1818+(package
1919+ (depends
2020+ (crockford)))
2121+```
2222+2323+## Usage
2424+2525+### Basic Encoding and Decoding
2626+2727+```ocaml
2828+(* Encode a number *)
2929+let encoded = Crockford.encode 1234567890L in
3030+(* "16jkpa2" *)
3131+3232+(* Decode back to number *)
3333+let decoded = Crockford.decode "16jkpa2" in
3434+(* 1234567890L *)
3535+```
3636+3737+### With Checksum
3838+3939+```ocaml
4040+(* Encode with checksum *)
4141+let encoded = Crockford.encode ~checksum:true 1234567890L in
4242+(* "16jkpa2d" *)
4343+4444+(* Decode and validate checksum *)
4545+let decoded = Crockford.decode ~checksum:true "16jkpa2d" in
4646+(* 1234567890L - or raises Checksum_mismatch if invalid *)
4747+```
4848+4949+### Formatted Output
5050+5151+```ocaml
5252+(* Split with dashes for readability *)
5353+let encoded = Crockford.encode ~split_every:4 1234567890L in
5454+(* "16jk-pa2" *)
5555+5656+(* With minimum length (zero-padded) *)
5757+let encoded = Crockford.encode ~min_length:10 1234L in
5858+(* "000000016j" *)
5959+```
6060+6161+### Random ID Generation
6262+6363+```ocaml
6464+Random.self_init ();
6565+6666+(* Generate random IDs *)
6767+let id = Crockford.generate ~length:8 ~checksum:true () in
6868+(* e.g., "a3x7m9q5" *)
6969+7070+(* Generate formatted IDs *)
7171+let id = Crockford.generate ~length:16 ~split_every:4 ~checksum:true () in
7272+(* e.g., "7n2q-8xkm-5pwt-3hr9" *)
7373+```
7474+7575+### Normalization
7676+7777+```ocaml
7878+(* Handles common character confusions *)
7979+let decoded = Crockford.decode "ILO" in (* Treated as "110" *)
8080+let decoded = Crockford.decode "16-JK-PA" in (* Dashes ignored *)
8181+```
8282+8383+## License
8484+8585+MIT License
8686+8787+## Author
8888+8989+Anil Madhavapeddy <anil@recoil.org>
9090+(based on code from https://github.com/front-matter/commonmeta)
9191+9292+## Links
9393+9494+- [Homepage](https://tangled.org/@anil.recoil.org/ocaml-crockford)
9595+- [Crockford Base32 Specification](https://www.crockford.com/base32.html)
···11+(* roguedoi.ml - Generate random DOI identifiers with Crockford base32 encoding *)
22+33+let generate_doi prefix length split =
44+ Random.self_init ();
55+ let suffix = Crockford.generate ~length ~split_every:split ~checksum:true () in
66+ Printf.printf "https://doi.org/%s/%s\n%!" prefix suffix
77+88+let () =
99+ let open Cmdliner in
1010+1111+ let prefix =
1212+ let doc = "DOI prefix to use (e.g., 10.59350)" in
1313+ Arg.(value & opt string "10.59350" & info ["p"; "prefix"] ~docv:"PREFIX" ~doc)
1414+ in
1515+1616+ let length =
1717+ let doc = "Total length of the generated suffix (including checksum)" in
1818+ Arg.(value & opt int 10 & info ["l"; "length"] ~docv:"LENGTH" ~doc)
1919+ in
2020+2121+ let split =
2222+ let doc = "Split the suffix every N characters with hyphens (0 = no splitting)" in
2323+ Arg.(value & opt int 5 & info ["s"; "split"] ~docv:"SPLIT" ~doc)
2424+ in
2525+2626+ let generate_cmd =
2727+ let doc = "Generate a random DOI with Crockford base32 encoding" in
2828+ let info = Cmd.info "roguedoi" ~version:"0.1.0" ~doc in
2929+ Cmd.v info Term.(const generate_doi $ prefix $ length $ split)
3030+ in
3131+3232+ exit (Cmd.eval generate_cmd)
+32
crockford.opam
···11+# This file is generated by dune, edit dune-project instead
22+opam-version: "2.0"
33+synopsis: "Crockford Base32 encoding for OCaml"
44+description:
55+ "An OCaml implementation of Douglas Crockford's Base32 encoding with ISO 7064 checksum support. Provides encoding and decoding of int64 values to URI-friendly base32 strings, with optional checksum validation, padding, splitting, and random ID generation."
66+maintainer: ["Anil Madhavapeddy <anil@recoil.org>"]
77+authors: ["Anil Madhavapeddy"]
88+license: "ISC"
99+homepage: "https://tangled.org/@anil.recoil.org/ocaml-crockford"
1010+bug-reports: "https://tangled.org/@anil.recoil.org/ocaml-crockford/issues"
1111+depends: [
1212+ "dune" {>= "3.20"}
1313+ "ocaml" {>= "4.14.1"}
1414+ "odoc" {with-doc}
1515+ "alcotest" {with-test & >= "1.9.0"}
1616+ "cmdliner" {>= "1.1.0"}
1717+]
1818+build: [
1919+ ["dune" "subst"] {dev}
2020+ [
2121+ "dune"
2222+ "build"
2323+ "-p"
2424+ name
2525+ "-j"
2626+ jobs
2727+ "@install"
2828+ "@runtest" {with-test}
2929+ "@doc" {with-doc}
3030+ ]
3131+]
3232+x-maintenance-intent: ["(latest)"]
+26
dune-project
···11+(lang dune 3.20)
22+33+(name crockford)
44+55+(generate_opam_files true)
66+77+(license ISC)
88+(authors "Anil Madhavapeddy")
99+(homepage "https://tangled.org/@anil.recoil.org/ocaml-crockford")
1010+(maintainers "Anil Madhavapeddy <anil@recoil.org>")
1111+(bug_reports "https://tangled.org/@anil.recoil.org/ocaml-crockford/issues")
1212+(maintenance_intent "(latest)")
1313+1414+(package
1515+ (name crockford)
1616+ (synopsis "Crockford Base32 encoding for OCaml")
1717+ (description
1818+ "An OCaml implementation of Douglas Crockford's Base32 encoding with \
1919+ ISO 7064 checksum support. Provides encoding and decoding of int64 values \
2020+ to URI-friendly base32 strings, with optional checksum validation, padding, \
2121+ splitting, and random ID generation.")
2222+ (depends
2323+ (ocaml (>= 4.14.1))
2424+ (odoc :with-doc)
2525+ (alcotest (and :with-test (>= 1.9.0)))
2626+ (cmdliner (>= 1.1.0))))
+177
lib/crockford.ml
···11+type invalid_length = { length: int; message: string }
22+type invalid_character = { char: char; message: string }
33+type invalid_checksum = { checksum: string; message: string }
44+type checksum_mismatch = { expected: int64; got: int64; identifier: string }
55+66+type decode_error =
77+ | Invalid_length of invalid_length
88+ | Invalid_character of invalid_character
99+ | Invalid_checksum of invalid_checksum
1010+ | Checksum_mismatch of checksum_mismatch
1111+1212+exception Decode_error of decode_error
1313+1414+let pp_invalid_length fmt { length; message } =
1515+ Format.fprintf fmt "Invalid_length: length=%d, %s" length message
1616+1717+let pp_invalid_character fmt { char; message } =
1818+ Format.fprintf fmt "Invalid_character: char='%c', %s" char message
1919+2020+let pp_invalid_checksum fmt { checksum; message } =
2121+ Format.fprintf fmt "Invalid_checksum: checksum=%s, %s" checksum message
2222+2323+let pp_checksum_mismatch fmt { expected; got; identifier } =
2424+ Format.fprintf fmt "Checksum_mismatch: expected=%Ld, got=%Ld, identifier=%s"
2525+ expected got identifier
2626+2727+let pp_decode_error fmt = function
2828+ | Invalid_length e -> pp_invalid_length fmt e
2929+ | Invalid_character e -> pp_invalid_character fmt e
3030+ | Invalid_checksum e -> pp_invalid_checksum fmt e
3131+ | Checksum_mismatch e -> pp_checksum_mismatch fmt e
3232+3333+let encoding_chars = "0123456789abcdefghjkmnpqrstvwxyz"
3434+3535+let generate_checksum number =
3636+ Int64.(sub (add (sub 97L (rem (mul 100L number) 97L)) 1L) 0L)
3737+3838+let validate number ~checksum =
3939+ Int64.equal checksum (generate_checksum number)
4040+4141+let normalize str =
4242+ let len = String.length str in
4343+ let buf = Bytes.create len in
4444+ let rec process i j =
4545+ if i >= len then Bytes.sub_string buf 0 j
4646+ else
4747+ let c = String.get str i in
4848+ let c_lower = Char.lowercase_ascii c in
4949+ match c_lower with
5050+ | '-' -> process (i + 1) j
5151+ | 'i' | 'l' -> Bytes.set buf j '1'; process (i + 1) (j + 1)
5252+ | 'o' -> Bytes.set buf j '0'; process (i + 1) (j + 1)
5353+ | _ -> Bytes.set buf j c_lower; process (i + 1) (j + 1)
5454+ in
5555+ process 0 0
5656+5757+let encode ?(split_every=0) ?(min_length=0) ?(checksum=false) number =
5858+ let original_number = number in
5959+6060+ (* Build base32 encoding *)
6161+ let rec build_encoding acc n =
6262+ if Int64.equal n 0L then acc
6363+ else
6464+ let remainder = Int64.to_int (Int64.rem n 32L) in
6565+ let n' = Int64.div n 32L in
6666+ build_encoding (encoding_chars.[remainder] :: acc) n'
6767+ in
6868+6969+ let encoded_list =
7070+ if Int64.equal number 0L then ['0']
7171+ else build_encoding [] number
7272+ in
7373+7474+ let encoded_str = String.concat "" (List.map (String.make 1) encoded_list) in
7575+7676+ (* Adjust min_length if checksum is enabled *)
7777+ let adjusted_length =
7878+ if checksum && min_length > 2 then min_length - 2
7979+ else min_length
8080+ in
8181+8282+ (* Pad with zeros if needed *)
8383+ let padded =
8484+ if adjusted_length > 0 && String.length encoded_str < adjusted_length then
8585+ String.make (adjusted_length - String.length encoded_str) '0' ^ encoded_str
8686+ else
8787+ encoded_str
8888+ in
8989+9090+ (* Add checksum *)
9191+ let with_checksum =
9292+ if checksum then
9393+ let cs = generate_checksum original_number in
9494+ padded ^ Printf.sprintf "%02Ld" cs
9595+ else
9696+ padded
9797+ in
9898+9999+ (* Split if requested *)
100100+ if split_every > 0 then
101101+ let len = String.length with_checksum in
102102+ let num_splits = (len + split_every - 1) / split_every in
103103+ let splits = Array.make num_splits "" in
104104+ for i = 0 to num_splits - 1 do
105105+ let start = i * split_every in
106106+ let chunk_len = min split_every (len - start) in
107107+ splits.(i) <- String.sub with_checksum start chunk_len
108108+ done;
109109+ String.concat "-" (Array.to_list splits)
110110+ else
111111+ with_checksum
112112+113113+let decode ?(checksum=false) str =
114114+ let encoded = normalize str in
115115+116116+ let (encoded_part, checksum_value) =
117117+ if checksum then begin
118118+ if String.length encoded < 3 then
119119+ raise (Decode_error (Invalid_checksum {
120120+ checksum = encoded;
121121+ message = "encoded string too short for checksum"
122122+ }));
123123+124124+ let cs_str = String.sub encoded (String.length encoded - 2) 2 in
125125+ let cs =
126126+ try Int64.of_string cs_str
127127+ with Failure _ ->
128128+ raise (Decode_error (Invalid_checksum {
129129+ checksum = cs_str;
130130+ message = "invalid checksum format"
131131+ }))
132132+ in
133133+ (String.sub encoded 0 (String.length encoded - 2), Some cs)
134134+ end else
135135+ (encoded, None)
136136+ in
137137+138138+ (* Decode base32 *)
139139+ let number = ref 0L in
140140+ String.iter (fun c ->
141141+ number := Int64.mul !number 32L;
142142+ match String.index_opt encoding_chars c with
143143+ | Some pos -> number := Int64.add !number (Int64.of_int pos)
144144+ | None ->
145145+ raise (Decode_error (Invalid_character {
146146+ char = c;
147147+ message = Printf.sprintf "character '%c' not in base32 alphabet" c
148148+ }))
149149+ ) encoded_part;
150150+151151+ (* Validate checksum if present *)
152152+ (match checksum_value with
153153+ | Some cs ->
154154+ if not (validate !number ~checksum:cs) then
155155+ raise (Decode_error (Checksum_mismatch {
156156+ expected = generate_checksum !number;
157157+ got = cs;
158158+ identifier = str
159159+ }))
160160+ | None -> ());
161161+162162+ !number
163163+164164+let generate ~length ?(split_every=0) ?(checksum=false) () =
165165+ if checksum && length < 3 then
166166+ raise (Decode_error (Invalid_length {
167167+ length;
168168+ message = "length must be >= 3 if checksum is enabled"
169169+ }));
170170+171171+ let adjusted_length = if checksum then length - 2 else length in
172172+173173+ (* Generate random number between 0 and 32^length *)
174174+ let max_val = 32.0 ** float_of_int adjusted_length in
175175+ let random_num = Int64.of_float (Random.float max_val) in
176176+177177+ encode ~split_every ~min_length:adjusted_length ~checksum random_num
+89
lib/crockford.mli
···11+(** Crockford Base32 encoding for OCaml *)
22+33+(** {1 Error Types} *)
44+55+type invalid_length = { length: int; message: string }
66+(** Error for invalid length parameters *)
77+88+type invalid_character = { char: char; message: string }
99+(** Error for invalid characters during decoding *)
1010+1111+type invalid_checksum = { checksum: string; message: string }
1212+(** Error for invalid checksum format *)
1313+1414+type checksum_mismatch = { expected: int64; got: int64; identifier: string }
1515+(** Error for checksum validation failures *)
1616+1717+type decode_error =
1818+ | Invalid_length of invalid_length
1919+ | Invalid_character of invalid_character
2020+ | Invalid_checksum of invalid_checksum
2121+ | Checksum_mismatch of checksum_mismatch
2222+(** Union of all possible decode errors *)
2323+2424+exception Decode_error of decode_error
2525+(** Main exception raised for all decoding errors *)
2626+2727+val pp_invalid_length : Format.formatter -> invalid_length -> unit
2828+(** Pretty-print an invalid_length error *)
2929+3030+val pp_invalid_character : Format.formatter -> invalid_character -> unit
3131+(** Pretty-print an invalid_character error *)
3232+3333+val pp_invalid_checksum : Format.formatter -> invalid_checksum -> unit
3434+(** Pretty-print an invalid_checksum error *)
3535+3636+val pp_checksum_mismatch : Format.formatter -> checksum_mismatch -> unit
3737+(** Pretty-print a checksum_mismatch error *)
3838+3939+val pp_decode_error : Format.formatter -> decode_error -> unit
4040+(** Pretty-print a decode_error *)
4141+4242+(** {1 Constants} *)
4343+4444+val encoding_chars : string
4545+(** The Crockford base32 encoding alphabet (excludes i, l, o, u) *)
4646+4747+(** {1 Encoding and Decoding} *)
4848+4949+val encode :
5050+ ?split_every:int ->
5151+ ?min_length:int ->
5252+ ?checksum:bool ->
5353+ int64 -> string
5454+(** [encode ?split_every ?min_length ?checksum n] encodes an int64 to a Crockford base32 string.
5555+ @param split_every Split the output with '-' every n characters (default: no splitting)
5656+ @param min_length Pad with zeros to this minimum length (default: no padding)
5757+ @param checksum Append ISO 7064 checksum as 2 digits (default: false) *)
5858+5959+val decode : ?checksum:bool -> string -> int64
6060+(** [decode ?checksum str] decodes a Crockford base32 string to int64.
6161+ @param checksum Expect and validate ISO 7064 checksum (default: false)
6262+ @raise Decode_error if decoding fails (invalid characters, invalid checksum format, or checksum mismatch) *)
6363+6464+(** {1 ID Generation} *)
6565+6666+val generate :
6767+ length:int ->
6868+ ?split_every:int ->
6969+ ?checksum:bool ->
7070+ unit -> string
7171+(** [generate ~length ?split_every ?checksum ()] generates a random Crockford base32 string.
7272+ @param length The length of the generated string (excluding checksum)
7373+ @param split_every Split the output with '-' every n characters (default: no splitting)
7474+ @param checksum Append ISO 7064 checksum as 2 digits (default: false)
7575+ @raise Decode_error if checksum is true and length < 3
7676+7777+ Note: Caller must initialize Random module with {!Random.self_init} before use *)
7878+7979+(** {1 Utility Functions} *)
8080+8181+val normalize : string -> string
8282+(** [normalize str] normalizes a string for decoding by converting to lowercase,
8383+ removing dashes, and mapping confusable characters (i→1, l→1, o→0) *)
8484+8585+val validate : int64 -> checksum:int64 -> bool
8686+(** [validate n ~checksum] validates that a checksum matches the number *)
8787+8888+val generate_checksum : int64 -> int64
8989+(** [generate_checksum n] generates an ISO 7064 (mod 97-10) checksum for a number *)