OCaml codecs for the Citation File Format (CFF)
1(*---------------------------------------------------------------------------
2 Copyright (c) 2026 The ocaml-cff programmers. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** Citation File Format (CFF) codec for OCaml.
7
8 This library provides codecs for the
9 {{:https://citation-file-format.github.io/}Citation File Format (CFF)}
10 version 1.2.0, a human- and machine-readable format for software and
11 dataset citation metadata.
12
13 CFF files are named [CITATION.cff] written in the {{:https://yaml.org/}YAML 1.2}
14 format. They provide citation metadata for software and datasets, enabling
15proper academic credit for research software.
16
17 {1 Overview}
18
19 A minimal [CITATION.cff] file requires four fields:
20 - [cff-version]: The CFF schema version (currently ["1.2.0"])
21 - [message]: Instructions for citing the work
22 - [title]: The name of the software or dataset
23 - [authors]: A list of persons and/or entities
24
25 {2 Creating a CFF record}
26
27 {[
28 let author = Cff.Author.person
29 ~family_names:"Smith" ~given_names:"Jane" () in
30 let cff = Cff.make
31 ~title:"My Research Software"
32 ~authors:[author]
33 ~version:"1.0.0"
34 ~doi:"10.12345/zenodo.1234567"
35 ()
36 ]}
37
38 {2 I/O}
39
40 For file operations, use the backend-specific subpackages:
41 - [cff.unix] - Unix file I/O using {!In_channel}/{!Out_channel}
42 - [cff.eio] - Eio-based I/O using [Bytesrw_eio] to serialise to flows
43
44 Example with [cff.unix]:
45 {[
46 match Cff_unix.of_file "CITATION.cff" with
47 | Ok cff -> Printf.printf "Title: %s\n%!" (Cff.title cff)
48 | Error msg -> Printf.eprintf "Error: %s\n%!" msg
49 ]}
50
51 {1 CFF Specification}
52
53 This implementation follows the
54 {{:https://github.com/citation-file-format/citation-file-format}CFF 1.2.0 specification}.
55 Useful modules include:
56
57 - {!module:Author}: Can be persons (with family/given names) or entities
58 (organizations, identified by a [name] field)
59 - {!module:Reference}: Bibliography entries that the work cites or depends on
60 - {!module:Identifier}: Typed identifiers including DOIs, URLs, and
61 Software Heritage IDs (SWH)
62 - {!module:License}: SPDX license identifiers where multiple licenses imply "OR"
63
64 {1 Core Types} *)
65
66(** The main [t] type represents a complete [CITATION.cff] file with all
67 required and optional fields from the CFF 1.2.0 specification.
68
69 Every valid CFF file must include:
70 - {!cff_version}: Schema version (defaults to ["1.2.0"])
71 - {!message}: Instructions for citing the work (has sensible default)
72 - {!title}: Name of the software or dataset
73 - {!authors}: List of persons and/or entities
74
75 Optional fields are:
76 - {!version}: Software version string
77 - {!doi}: Digital Object Identifier
78 - {!date_released}: Publication/release date
79 - {!license}: SPDX license identifier(s)
80 - {!keywords}: Descriptive keywords
81 - {!abstract}: Description of the work
82
83 The {!preferred_citation} field allows redirecting citations to
84 a related work (e.g., a journal article describing the software).
85 The {!section-references} field lists works that the software cites or
86 depends upon. *)
87
88(** The abstract type representing a complete CFF document. *)
89type t
90
91(** Date representation as [(year, month, day)] tuple.
92
93 CFF uses ISO 8601 dates in [YYYY-MM-DD] format (e.g., ["2024-01-15"]). *)
94module Date = Cff_date
95
96(** ISO 3166-1 alpha-2 country codes (e.g., ["US"], ["DE"], ["GB"]).
97
98 Used for author and entity addresses. *)
99module Country = Cff_country
100
101(** Physical address information.
102
103 Address fields used for persons and entities: street address, city,
104 region (state/province), postal code, and country code. *)
105module Address = Cff_address.Address
106
107(** Contact information.
108
109 Contact fields used for persons and entities: email, telephone, fax,
110 website URL, and ORCID identifier. *)
111module Contact = Cff_address.Contact
112
113(** SPDX license identifiers.
114
115 CFF uses {{:https://spdx.org/licenses/}SPDX license identifiers} for
116 the [license] field. Multiple licenses indicate an OR relationship
117 (the user may choose any of the listed licenses). *)
118module License = Cff_license
119
120(** CFF file type: [`Software] (default) or [`Dataset]. *)
121module Cff_type = Cff_enums.Cff_type
122
123(** {1 Authors and Entities} *)
124
125(** Authors as a discriminated union of {!Person} or {!Entity}.
126
127 CFF distinguishes between:
128 - {b Persons}: Individual humans with family names, given names, etc.
129 - {b Entities}: Organizations, projects, or groups with a [name] field
130
131 When parsing, the presence of a [name] field indicates an entity;
132 otherwise, the entry is treated as a person. *)
133module Author = Cff_author
134
135(** A person (individual author or contributor). *)
136module Person = Cff_author.Person
137
138(** An entity (organization, institution, project, conference). *)
139module Entity = Cff_author.Entity
140
141(** {1 Identifiers and References} *)
142
143(** Typed identifiers for DOI, URL, SWH, or other schemes.
144
145 Each identifier has a type, value, and optional description. Example:
146 {[
147 let id = Cff.Identifier.make
148 ~type_:`Doi
149 ~value:"10.5281/zenodo.1234567"
150 ~description:"The concept DOI for all versions"
151 ()
152 ]} *)
153module Identifier = Cff_identifier
154
155(** Bibliographic references with comprehensive metadata.
156
157 References can represent any citable work: articles, books, software,
158 datasets, conference papers, theses, etc. The {!Reference} module
159 provides 60+ fields organized into logical sub-records:
160
161 - {!Reference.Core} - Type, title, authors, abstract
162 - {!Reference.Publication} - Journal, volume, issue, pages
163 - {!Reference.Collection} - Proceedings, book series
164 - {!Reference.Dates} - Various date fields and year
165 - {!Reference.Identifiers} - DOI, URL, ISBN, ISSN, etc.
166 - {!Reference.Entities} - Editors, publisher, institution
167 - {!Reference.Metadata} - Keywords, license, notes
168 - {!Reference.Technical} - Commit, version, format *)
169module Reference = Cff_reference
170
171(** {1 Construction} *)
172
173(** The default CFF version used when not specified: ["1.2.0"]. *)
174val default_cff_version : string
175
176(** The default citation message:
177 ["If you use this software, please cite it using the metadata from this file."] *)
178val default_message : string
179
180(** [make ~title ~authors ...] constructs a CFF value.
181
182 @param cff_version The CFF schema version (default: {!default_cff_version})
183 @param message Instructions for users on how to cite (default: {!default_message})
184 @param title The name of the software or dataset
185 @param authors List of persons and/or entities who created the work *)
186val make
187 : ?cff_version:string
188 -> ?message:string
189 -> title:string
190 -> authors:Author.t list
191 -> ?abstract:string
192 -> ?commit:string
193 -> ?contact:Author.t list
194 -> ?date_released:Date.t
195 -> ?doi:string
196 -> ?identifiers:Identifier.t list
197 -> ?keywords:string list
198 -> ?license:License.t
199 -> ?preferred_citation:Reference.t
200 -> ?references:Reference.t list
201 -> ?repository:string
202 -> ?repository_artifact:string
203 -> ?repository_code:string
204 -> ?type_:Cff_type.t
205 -> ?url:string
206 -> ?version:string
207 -> unit
208 -> t
209
210(** {2 Required Fields} *)
211
212(** The CFF schema version that this file adheres to.
213
214 For CFF 1.2.0 files, this should be ["1.2.0"]. The version determines
215 which keys are valid and how they should be interpreted. *)
216val cff_version : t -> string
217
218(** A message to readers explaining how to cite the work.
219
220 Common examples:
221 - ["If you use this software, please cite it using the metadata from this file."]
222 - ["Please cite this software using the metadata from 'preferred-citation'."]
223
224 The message should guide users toward the preferred citation method. *)
225val message : t -> string
226
227(** The name of the software or dataset.
228
229 This is the title that should appear in citations. For software, it's
230 typically the project name; for datasets, the dataset title. *)
231val title : t -> string
232
233(** The creators of the software or dataset.
234
235 Authors can be persons (individuals) or entities (organizations).
236 At least one author is required for a valid CFF file. The order
237 typically reflects contribution significance. *)
238val authors : t -> Author.t list
239
240(** {2 Optional Fields} *)
241
242(** A description of the software or dataset.
243
244 Provides context about what the work does, its purpose, and scope. *)
245val abstract : t -> string option
246
247(** The commit hash or revision number of the software version.
248
249 Useful for precise version identification beyond semantic versioning.
250 Example: ["1ff847d81f29c45a3a1a5ce73d38e45c2f319bba"] *)
251val commit : t -> string option
252
253(** Contact persons or entities for the software or dataset.
254
255 May differ from authors; useful when the primary contact is a
256 project maintainer rather than the original author. *)
257val contact : t -> Author.t list option
258
259(** The date when the software or dataset was released.
260
261 Format is [(year, month, day)], corresponding to ISO 8601 [YYYY-MM-DD]. *)
262val date_released : t -> Date.t option
263
264(** The Digital Object Identifier for the software or dataset.
265
266 DOIs provide persistent, citable identifiers. This is a shorthand
267 for a single DOI; use {!identifiers} for multiple DOIs or other
268 identifier types. Example: ["10.5281/zenodo.1234567"] *)
269val doi : t -> string option
270
271(** Additional identifiers beyond the primary DOI.
272
273 Each identifier has a type (DOI, URL, SWH, other), value, and
274 optional description. Useful for versioned DOIs, Software Heritage
275 identifiers, or repository URLs. *)
276val identifiers : t -> Identifier.t list option
277
278(** Descriptive keywords for the work.
279
280 Help with discoverability and categorization. Example:
281 [["machine learning"; "image processing"; "python"]] *)
282val keywords : t -> string list option
283
284(** The SPDX license identifier(s) for the work.
285
286 Uses {{:https://spdx.org/licenses/}SPDX identifiers}. Multiple
287 licenses imply an OR relationship (user may choose any).
288 Example: ["MIT"], ["Apache-2.0"], or [["GPL-3.0-only"; "MIT"]]. *)
289val license : t -> License.t option
290
291(** A reference to cite instead of the software itself.
292
293 Used for "credit redirection" when authors prefer citation of
294 a related publication (e.g., a methods paper) over the software.
295 Note: Software citation principles recommend citing software
296 directly; use this field judiciously. *)
297val preferred_citation : t -> Reference.t option
298
299(** Works that this software cites or depends upon.
300
301 Functions like a bibliography, listing dependencies, foundational
302 works, or related publications. Each reference includes full
303 bibliographic metadata. *)
304val references : t -> Reference.t list option
305
306(** URL to the repository where the software is developed.
307
308 Typically a version control system URL. For source code repositories,
309 prefer {!repository_code}. *)
310val repository : t -> string option
311
312(** URL to the built/compiled artifact repository.
313
314 For binary distributions, package registries (npm, PyPI, CRAN),
315 or container registries. *)
316val repository_artifact : t -> string option
317
318(** URL to the source code repository.
319
320 Typically a GitHub, GitLab, or similar URL where the source
321 code is publicly accessible. *)
322val repository_code : t -> string option
323
324(** The type of work: [`Software] (default) or [`Dataset].
325
326 Most CFF files describe software; use [`Dataset] for data packages. *)
327val type_ : t -> Cff_type.t option
328
329(** The URL of the software or dataset homepage.
330
331 A general landing page, documentation site, or project website. *)
332val url : t -> string option
333
334(** The version string of the software or dataset.
335
336 Can be any version format: semantic versioning (["1.2.3"]),
337 date-based (["2024.01"]), or other schemes. *)
338val version : t -> string option
339
340(** {1 Formatting and Codec} *)
341
342(** Pretty-print a CFF value in a human-readable YAML-like format. *)
343val pp : Format.formatter -> t -> unit
344
345(** JSON/YAML codec for serialization and deserialization.
346
347 Used internally by the YAML codec functions. *)
348val jsont : t Jsont.t