···66(** Citation File Format (CFF) codec for OCaml. *)
7788(* Module aliases *)
99-module Config = Cff_config
109module Date = Cff_date
1110module Country = Cff_country
1211module License = Cff_license
+55-115
lib/cff.mli
···1010 version 1.2.0, a human- and machine-readable format for software and
1111 dataset citation metadata.
12121313- CFF files are plain text files named [CITATION.cff] written in
1414- {{:https://yaml.org/}YAML 1.2}. They provide citation metadata for
1515- software and datasets, enabling proper academic credit for research
1616- software.
1313+ CFF files are named [CITATION.cff] written in the {{:https://yaml.org/}YAML 1.2}
1414+ format. They provide citation metadata for software and datasets, enabling
1515+proper academic credit for research software.
17161817 {1 Overview}
1918···3433 ~title:"My Research Software"
3534 ~authors:[author]
3635 ~version:"1.0.0"
3737- ~doi:"10.5281/zenodo.1234567"
3636+ ~doi:"10.12345/zenodo.1234567"
3837 ()
3938 ]}
40394140 {2 File I/O}
42414342 For file operations, use the backend-specific subpackages:
4444- - [cff.unix] - Unix file I/O using [In_channel]/[Out_channel]
4545- - [cff.eio] - Eio-based I/O using [bytesrw-eio]
4343+ - [cff.unix] - Unix file I/O using {!In_channel}/{!Out_channel}
4444+ - [cff.eio] - Eio-based I/O using {!Bytesrw_eio} to serialise to flows
46454746 Example with [cff.unix]:
4847 {[
4948 match Cff_unix.of_file "CITATION.cff" with
5050- | Ok cff -> Printf.printf "Title: %s\n" (Cff.title cff)
5151- | Error msg -> Printf.eprintf "Error: %s\n" msg
4949+ | Ok cff -> Printf.printf "Title: %s\n%!" (Cff.title cff)
5050+ | Error msg -> Printf.eprintf "Error: %s\n%!" msg
5251 ]}
53525454- {1 Module Structure}
5555-5656- The library uses a flat internal structure ([Cff_author], [Cff_date], etc.)
5757- but exposes a convenient nested API through module aliases:
5858-5959- - {!module:Author} - Person and entity types for authorship
6060- - {!module:Reference} - Bibliographic reference with 60+ fields
6161- - {!module:Identifier} - DOI, URL, SWH, and other identifiers
6262- - {!module:License} - SPDX license identifiers
6363- - {!module:Date} - ISO 8601 date handling
6464-6553 {1 CFF Specification}
66546755 This implementation follows the
6856 {{:https://github.com/citation-file-format/citation-file-format}CFF 1.2.0 specification}.
6969- Key concepts:
5757+ Key concepts include:
70587171- - {b Authors}: Can be persons (with family/given names) or entities
5959+ - {!module:Author}: Can be persons (with family/given names) or entities
7260 (organizations, identified by a [name] field)
7373- - {b References}: Bibliography entries that the work cites or depends on
7474- - {b Preferred citation}: An alternate work to cite instead of the
7575- software itself (e.g., a journal article about the software)
7676- - {b Identifiers}: Typed identifiers including DOIs, URLs, and
6161+ - {!module:Reference}: Bibliography entries that the work cites or depends on
6262+ - {!module:Identifier}: Typed identifiers including DOIs, URLs, and
7763 Software Heritage IDs (SWH)
7878- - {b Licenses}: SPDX license identifiers; multiple licenses imply OR
6464+ - {!module:License}: SPDX license identifiers where multiple licenses imply "OR"
79658066 {1 Core Types} *)
81678282-(** Configuration for validation strictness. *)
8383-module Config = Cff_config
6868+(** The main [t] type represents a complete [CITATION.cff] file with all
6969+ required and optional fields from the CFF 1.2.0 specification.
7070+7171+ Every valid CFF file must include:
7272+ - {!cff_version}: Schema version (defaults to ["1.2.0"])
7373+ - {!message}: Instructions for citing the work (has sensible default)
7474+ - {!title}: Name of the software or dataset
7575+ - {!authors}: List of persons and/or entities
7676+7777+ Optional fields are:
7878+ - {!version}: Software version string
7979+ - {!doi}: Digital Object Identifier
8080+ - {!date_released}: Publication/release date
8181+ - {!license}: SPDX license identifier(s)
8282+ - {!keywords}: Descriptive keywords
8383+ - {!abstract}: Description of the work
8484+8585+ The {!preferred_citation} field allows redirecting citations to
8686+ a related work (e.g., a journal article describing the software).
8787+ The {!section-references} field lists works that the software cites or
8888+ depends upon. *)
8989+9090+(** The abstract type representing a complete CFF document. *)
9191+type t
84928593(** Date representation as [(year, month, day)] tuple.
8694···99107 (the user may choose any of the listed licenses). *)
100108module License = Cff_license
101109102102-(** {1 Enumeration Types} *)
103103-104104-(** Identifier types for the [identifiers] field.
105105-106106- - [`Doi] - Digital Object Identifier
107107- - [`Url] - Web URL
108108- - [`Swh] - Software Heritage identifier
109109- - [`Other] - Other identifier type *)
110110-module Identifier_type = Cff_enums.Identifier_type
111111-112112-(** Reference types for bibliographic entries.
113113-114114- CFF supports 40+ reference types including [`Article], [`Book],
115115- [`Software], [`Conference_paper], [`Thesis], [`Dataset], and more.
116116- See {!Cff_enums.Reference_type} for the complete list. *)
117117-module Reference_type = Cff_enums.Reference_type
118118-119119-(** Publication status for works in progress.
120120-121121- - [`Preprint] - Available as preprint
122122- - [`Submitted] - Submitted for publication
123123- - [`In_press] - Accepted, awaiting publication
124124- - [`Advance_online] - Published online ahead of print *)
125125-module Status = Cff_enums.Status
126126-127110(** CFF file type: [`Software] (default) or [`Dataset]. *)
128111module Cff_type = Cff_enums.Cff_type
129112130130-(** {1 Address and Contact Information} *)
131131-132132-(** Physical address with street, city, region, postal code, and country. *)
133133-module Address = Cff_address.Address
134134-135135-(** Contact information: email, telephone, fax, website, and ORCID. *)
136136-module Contact = Cff_address.Contact
137137-138113(** {1 Authors and Entities} *)
139114140115(** Authors as a discriminated union of {!Person} or {!Entity}.
···146121 When parsing, the presence of a [name] field indicates an entity;
147122 otherwise, the entry is treated as a person. *)
148123module Author = Cff_author
149149-150150-(** Person name components: family names, given names, particle, suffix, alias. *)
151151-module Name = Cff_author.Name
152124153125(** A person (individual author or contributor). *)
154126module Person = Cff_author.Person
···186158 - {!Reference.Technical} - Commit, version, format *)
187159module Reference = Cff_reference
188160189189-(** {1 Root CFF Type}
190190-191191- The main [t] type represents a complete [CITATION.cff] file with all
192192- required and optional fields from the CFF 1.2.0 specification.
193193-194194- {2 Required Fields}
195195-196196- Every valid CFF file must include:
197197- - {!cff_version}: Schema version (defaults to ["1.2.0"])
198198- - {!message}: Instructions for citing the work (has sensible default)
199199- - {!title}: Name of the software or dataset
200200- - {!authors}: List of persons and/or entities
201201-202202- {2 Common Optional Fields}
203203-204204- - {!version}: Software version string
205205- - {!doi}: Digital Object Identifier
206206- - {!date_released}: Publication/release date
207207- - {!license}: SPDX license identifier(s)
208208- - {!keywords}: Descriptive keywords
209209- - {!abstract}: Description of the work
210210-211211- {2 Citation Redirection}
212212-213213- The {!preferred_citation} field allows redirecting citations to
214214- a related work (e.g., a journal article describing the software).
215215- The {!references} field lists works that the software cites or
216216- depends upon. *)
217217-218218-(** The abstract type representing a complete CFF document. *)
219219-type t
220220-221161(** {1 Construction} *)
222162223163val default_cff_version : string
···231171 ?cff_version:string ->
232172 ?message:string ->
233173 title:string ->
234234- authors:Cff_author.t list ->
174174+ authors:Author.t list ->
235175 ?abstract:string ->
236176 ?commit:string ->
237237- ?contact:Cff_author.t list ->
238238- ?date_released:Cff_date.t ->
177177+ ?contact:Author.t list ->
178178+ ?date_released:Date.t ->
239179 ?doi:string ->
240240- ?identifiers:Cff_identifier.t list ->
180180+ ?identifiers:Identifier.t list ->
241181 ?keywords:string list ->
242242- ?license:Cff_license.t ->
182182+ ?license:License.t ->
243183 ?license_url:string ->
244244- ?preferred_citation:Cff_reference.t ->
245245- ?references:Cff_reference.t list ->
184184+ ?preferred_citation:Reference.t ->
185185+ ?references:Reference.t list ->
246186 ?repository:string ->
247187 ?repository_artifact:string ->
248188 ?repository_code:string ->
249249- ?type_:Cff_enums.Cff_type.t ->
189189+ ?type_:Cff_type.t ->
250190 ?url:string ->
251191 ?version:string ->
252192 unit -> t
···257197 @param title The name of the software or dataset
258198 @param authors List of persons and/or entities who created the work *)
259199260260-(** {1 Required Fields} *)
200200+(** {2 Required Fields} *)
261201262202val cff_version : t -> string
263203(** The CFF schema version that this file adheres to.
···280220 This is the title that should appear in citations. For software, it's
281221 typically the project name; for datasets, the dataset title. *)
282222283283-val authors : t -> Cff_author.t list
223223+val authors : t -> Author.t list
284224(** The creators of the software or dataset.
285225286226 Authors can be persons (individuals) or entities (organizations).
287227 At least one author is required for a valid CFF file. The order
288228 typically reflects contribution significance. *)
289229290290-(** {1 Optional Fields} *)
230230+(** {2 Optional Fields} *)
291231292232val abstract : t -> string option
293233(** A description of the software or dataset.
···300240 Useful for precise version identification beyond semantic versioning.
301241 Example: ["1ff847d81f29c45a3a1a5ce73d38e45c2f319bba"] *)
302242303303-val contact : t -> Cff_author.t list option
243243+val contact : t -> Author.t list option
304244(** Contact persons or entities for the software or dataset.
305245306246 May differ from authors; useful when the primary contact is a
307247 project maintainer rather than the original author. *)
308248309309-val date_released : t -> Cff_date.t option
249249+val date_released : t -> Date.t option
310250(** The date when the software or dataset was released.
311251312252 Format is [(year, month, day)], corresponding to ISO 8601 [YYYY-MM-DD]. *)
···318258 for a single DOI; use {!identifiers} for multiple DOIs or other
319259 identifier types. Example: ["10.5281/zenodo.1234567"] *)
320260321321-val identifiers : t -> Cff_identifier.t list option
261261+val identifiers : t -> Identifier.t list option
322262(** Additional identifiers beyond the primary DOI.
323263324264 Each identifier has a type (DOI, URL, SWH, other), value, and
···331271 Help with discoverability and categorization. Example:
332272 [["machine learning"; "image processing"; "python"]] *)
333273334334-val license : t -> Cff_license.t option
274274+val license : t -> License.t option
335275(** The SPDX license identifier(s) for the work.
336276337277 Uses {{:https://spdx.org/licenses/}SPDX identifiers}. Multiple
···344284 Only needed for licenses not in the SPDX list. Standard SPDX
345285 licenses have well-known URLs. *)
346286347347-val preferred_citation : t -> Cff_reference.t option
287287+val preferred_citation : t -> Reference.t option
348288(** A reference to cite instead of the software itself.
349289350290 Used for "credit redirection" when authors prefer citation of
···352292 Note: Software citation principles recommend citing software
353293 directly; use this field judiciously. *)
354294355355-val references : t -> Cff_reference.t list option
295295+val references : t -> Reference.t list option
356296(** Works that this software cites or depends upon.
357297358298 Functions like a bibliography, listing dependencies, foundational
···377317 Typically a GitHub, GitLab, or similar URL where the source
378318 code is publicly accessible. *)
379319380380-val type_ : t -> Cff_enums.Cff_type.t option
320320+val type_ : t -> Cff_type.t option
381321(** The type of work: [`Software] (default) or [`Dataset].
382322383323 Most CFF files describe software; use [`Dataset] for data packages. *)
···11-(*---------------------------------------------------------------------------
22- Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33- SPDX-License-Identifier: ISC
44- ---------------------------------------------------------------------------*)
55-66-(** Configuration for CFF parsing and validation.
77-88- CFF files in the wild may contain non-standard or deprecated values.
99- This module provides configuration options to control validation
1010- strictness during parsing.
1111-1212- {1 Validation Modes}
1313-1414- {2 Strict Mode}
1515-1616- Validates all fields according to their specifications:
1717-1818- - URLs must be well-formed
1919- - Dates must be valid ISO 8601 dates
2020- - DOIs must match the DOI pattern
2121- - ORCIDs must be valid ORCID URLs
2222- - License IDs must be valid SPDX identifiers
2323-2424- Use strict mode for validating CFF files or when you control the input.
2525-2626- {2 Lenient Mode}
2727-2828- Accepts any string value without validation. Use lenient mode when:
2929-3030- - Parsing CFF files from unknown sources
3131- - Handling legacy files with deprecated license IDs
3232- - Round-tripping files without data loss
3333-3434- {2 Default Mode}
3535-3636- A balanced approach that:
3737- - Keeps unknown fields (for round-tripping)
3838- - Uses lenient validation for most fields
3939-4040- {1 Unknown Fields}
4141-4242- The [keep_unknown] option controls handling of unrecognized fields:
4343-4444- - [true]: Preserve unknown fields in the parsed structure
4545- - [false]: Silently ignore unknown fields
4646-4747- Keeping unknown fields allows round-tripping CFF files that contain
4848- extensions or newer fields not yet supported by this library. *)
4949-5050-type t
5151-(** Configuration type. *)
5252-5353-val default : t
5454-(** Default configuration.
5555-5656- Uses lenient validation and keeps unknown fields. Suitable for
5757- general parsing where round-tripping is desired. *)
5858-5959-val strict : t
6060-(** Strict configuration.
6161-6262- Validates all fields according to CFF 1.2.0 specification.
6363- Fails on invalid URLs, dates, DOIs, ORCIDs, and license IDs.
6464-6565- Keeps unknown fields for compatibility. *)
6666-6767-val lenient : t
6868-(** Fully lenient configuration.
6969-7070- Accepts any string values without validation. Useful for parsing
7171- malformed or non-standard CFF files. *)
7272-7373-val make :
7474- ?strict_urls:bool ->
7575- ?strict_dates:bool ->
7676- ?strict_dois:bool ->
7777- ?strict_orcids:bool ->
7878- ?strict_licenses:bool ->
7979- ?keep_unknown:bool ->
8080- unit -> t
8181-(** Create a custom configuration.
8282-8383- All strictness options default to [false] (lenient).
8484- [keep_unknown] defaults to [true].
8585-8686- @param strict_urls Validate URL format
8787- @param strict_dates Validate date format and values
8888- @param strict_dois Validate DOI pattern
8989- @param strict_orcids Validate ORCID format
9090- @param strict_licenses Validate SPDX license identifiers
9191- @param keep_unknown Preserve unrecognized fields *)
9292-9393-val strict_urls : t -> bool
9494-(** Whether URL fields are validated. *)
9595-9696-val strict_dates : t -> bool
9797-(** Whether date fields are validated. *)
9898-9999-val strict_dois : t -> bool
100100-(** Whether DOI fields are validated. *)
101101-102102-val strict_orcids : t -> bool
103103-(** Whether ORCID fields are validated. *)
104104-105105-val strict_licenses : t -> bool
106106-(** Whether license identifiers are validated against SPDX. *)
107107-108108-val keep_unknown : t -> bool
109109-(** Whether unknown fields are preserved in the parsed structure. *)