OCaml codecs for the Citation File Format (CFF)
1(*---------------------------------------------------------------------------
2 Copyright (c) 2026 The ocaml-cff programmers. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** Bibliographic reference type for CFF.
7
8 References represent citable works in the [references] and
9 [preferred-citation] fields of a CFF file. They can describe any
10 type of scholarly output: journal articles, books, conference papers,
11 software, datasets, theses, patents, and many more.
12
13 {1 Structure}
14
15 CFF references have 60+ possible fields. This module organizes them
16 into logical sub-records for easier manipulation:
17
18 - {!Core} - Required fields: type, title, authors
19 - {!Publication} - Journal articles: journal, volume, issue, pages
20 - {!Collection} - Book chapters, proceedings: collection title, DOI
21 - {!Dates} - When the work was published, accessed, etc.
22 - {!Identifiers} - DOI, URL, ISBN, ISSN, repository links
23 - {!Entities} - Editors, publisher, institution, conference
24 - {!Metadata} - Keywords, license, languages, copyright
25 - {!Technical} - Software-specific: commit, version, format
26
27 {1 Reference Types}
28
29 The [type] field determines what kind of work is being referenced.
30 CFF 1.2.0 supports 40+ types including:
31
32 - Academic: [`Article], [`Book], [`Conference_paper], [`Thesis]
33 - Software: [`Software], [`Software_code], [`Software_container]
34 - Data: [`Data], [`Database], [`Dataset]
35 - Legal: [`Patent], [`Legal_case], [`Statute]
36 - Media: [`Video], [`Sound_recording], [`Film_broadcast]
37
38 {1 Example}
39
40 {[
41 (* A journal article reference *)
42 let article = Cff_reference.make_simple
43 ~type_:`Article
44 ~title:"The Software Citation Principles"
45 ~authors:[
46 Cff_author.Person (Cff_author.Person.make
47 ~family_names:"Smith"
48 ~given_names:"Arfon M."
49 ());
50 ]
51 ~doi:"10.7717/peerj-cs.86"
52 ~year:2016
53 ~journal:"PeerJ Computer Science"
54 ()
55
56 (* A software reference with more details *)
57 let core = Cff_reference.Core.make
58 ~type_:`Software
59 ~title:"NumPy"
60 ~authors:[...]
61 () in
62 let dates = Cff_reference.Dates.make ~year:2020 () in
63 let ids = Cff_reference.Identifiers.make
64 ~doi:"10.1038/s41586-020-2649-2"
65 ~url:"https://numpy.org"
66 () in
67 let software = Cff_reference.make ~core ~dates ~identifiers:ids ()
68 ]}
69
70 {1 Sub-records} *)
71
72(** Core identity fields (required for all references).
73
74 Every reference must have a type, title, and at least one author.
75 The type determines what additional fields are relevant. *)
76module Core : sig
77 type t
78
79 (** Create a core record.
80
81 @param type_ The reference type (article, book, software, etc.)
82 @param title The title of the work
83 @param authors List of persons and/or entities *)
84 val make
85 : type_:Cff_enums.Reference_type.t
86 -> title:string
87 -> authors:Cff_author.t list
88 -> ?abstract:string
89 -> ?abbreviation:string
90 -> unit
91 -> t
92
93 (** The reference type. Determines which other fields are applicable. *)
94 val type_ : t -> Cff_enums.Reference_type.t
95
96 (** The title of the referenced work. *)
97 val title : t -> string
98
99 (** The authors/creators of the work. *)
100 val authors : t -> Cff_author.t list
101
102 (** A description or abstract of the work. *)
103 val abstract : t -> string option
104
105 (** Abbreviated form of the title (e.g., for journal names). *)
106 val abbreviation : t -> string option
107
108 val pp : Format.formatter -> t -> unit
109end
110
111(** Publication metadata for journal articles and periodicals.
112
113 Fields for works published in journals, magazines, or other
114 serial publications. Page numbers can be specified as a range
115 ([pages]) or as separate [start] and [end_] values. *)
116module Publication : sig
117 type t
118
119 (** Empty publication record with all fields as [None]. *)
120 val empty : t
121
122 val make
123 : ?journal:string
124 -> ?volume:string
125 -> ?issue:string
126 -> ?pages:string
127 -> ?start:string
128 -> ?end_:string
129 -> ?edition:string
130 -> ?section:string
131 -> ?status:Cff_enums.Status.t
132 -> unit
133 -> t
134
135 (** The name of the journal or magazine. *)
136 val journal : t -> string option
137
138 (** The volume number of the journal. *)
139 val volume : t -> string option
140
141 (** The issue number within the volume. *)
142 val issue : t -> string option
143
144 (** Page range (e.g., ["123-145"]). Alternative to [start]/[end_]. *)
145 val pages : t -> string option
146
147 (** Starting page number. *)
148 val start : t -> string option
149
150 (** Ending page number. *)
151 val end_ : t -> string option
152
153 (** The edition of the work (e.g., ["2nd edition"]). *)
154 val edition : t -> string option
155
156 (** The section of a work (e.g., newspaper section). *)
157 val section : t -> string option
158
159 (** Publication status: preprint, in-press, submitted, etc. *)
160 val status : t -> Cff_enums.Status.t option
161
162 (** [true] if all fields are [None]. *)
163 val is_empty : t -> bool
164end
165
166(** Collection metadata for works in edited volumes.
167
168 Used for book chapters, conference proceedings, and other works
169 that appear within a larger collection. *)
170module Collection : sig
171 type t
172
173 val empty : t
174
175 val make
176 : ?collection_title:string
177 -> ?collection_type:string
178 -> ?collection_doi:string
179 -> ?volume_title:string
180 -> ?number_volumes:string
181 -> unit
182 -> t
183
184 (** Title of the collection (proceedings, book series, etc.). *)
185 val collection_title : t -> string option
186
187 (** Type of collection (e.g., ["proceedings"], ["book series"]). *)
188 val collection_type : t -> string option
189
190 (** DOI of the collection itself (not the individual work). *)
191 val collection_doi : t -> string option
192
193 (** Title of the specific volume within a multi-volume collection. *)
194 val volume_title : t -> string option
195
196 (** Total number of volumes in the collection. *)
197 val number_volumes : t -> string option
198
199 val is_empty : t -> bool
200end
201
202(** Date-related fields.
203
204 CFF distinguishes between several date types:
205 - {b date-released}: When the software/dataset was released
206 - {b date-published}: When the work was formally published
207 - {b date-accessed}: When an online resource was last accessed
208 - {b date-downloaded}: When a resource was downloaded
209
210 For older works or when only the year is known, use [year] instead
211 of a full date. *)
212module Dates : sig
213 type t
214
215 val empty : t
216
217 val make
218 : ?date_accessed:Cff_date.t
219 -> ?date_downloaded:Cff_date.t
220 -> ?date_published:Cff_date.t
221 -> ?date_released:Cff_date.t
222 -> ?year:int
223 -> ?year_original:int
224 -> ?month:int
225 -> ?issue_date:string
226 -> unit
227 -> t
228
229 (** Date when an online resource was accessed for citation. *)
230 val date_accessed : t -> Cff_date.t option
231
232 (** Date when a resource was downloaded. *)
233 val date_downloaded : t -> Cff_date.t option
234
235 (** Formal publication date. *)
236 val date_published : t -> Cff_date.t option
237
238 (** Release date (typically for software). *)
239 val date_released : t -> Cff_date.t option
240
241 (** Publication year when full date is unknown. *)
242 val year : t -> int option
243
244 (** Year of original publication (for reprints, translations). *)
245 val year_original : t -> int option
246
247 (** Publication month (1-12) when only month/year is known. *)
248 val month : t -> int option
249
250 (** Issue date as a string (for periodicals with specific dates). *)
251 val issue_date : t -> string option
252
253 val is_empty : t -> bool
254end
255
256(** Identifiers and repository links.
257
258 Various identifier schemes for locating and citing works:
259 - DOI: Digital Object Identifier (preferred for academic works)
260 - URL: Web address
261 - ISBN: International Standard Book Number
262 - ISSN: International Standard Serial Number (journals)
263 - PMCID: PubMed Central ID
264 - NIHMSID: NIH Manuscript Submission ID *)
265module Identifiers : sig
266 type t
267
268 val empty : t
269
270 val make
271 : ?doi:string
272 -> ?url:string
273 -> ?repository:string
274 -> ?repository_code:string
275 -> ?repository_artifact:string
276 -> ?isbn:string
277 -> ?issn:string
278 -> ?pmcid:string
279 -> ?nihmsid:string
280 -> ?identifiers:Cff_identifier.t list
281 -> unit
282 -> t
283
284 (** Digital Object Identifier (e.g., ["10.1234/example"]). *)
285 val doi : t -> string option
286
287 (** URL where the work can be accessed. *)
288 val url : t -> string option
289
290 (** General repository URL. *)
291 val repository : t -> string option
292
293 (** Source code repository (GitHub, GitLab, etc.). *)
294 val repository_code : t -> string option
295
296 (** Built artifact repository (npm, PyPI, Docker Hub, etc.). *)
297 val repository_artifact : t -> string option
298
299 (** International Standard Book Number. *)
300 val isbn : t -> string option
301
302 (** International Standard Serial Number (for journals). *)
303 val issn : t -> string option
304
305 (** PubMed Central identifier. *)
306 val pmcid : t -> string option
307
308 (** NIH Manuscript Submission System identifier. *)
309 val nihmsid : t -> string option
310
311 (** Additional typed identifiers (DOI, URL, SWH, other). *)
312 val identifiers : t -> Cff_identifier.t list option
313
314 val is_empty : t -> bool
315end
316
317(** Related entities: editors, publishers, institutions.
318
319 Persons and organizations involved in the work beyond the authors:
320 - Editors of collections or journals
321 - Publishers and their locations
322 - Academic institutions (for theses, reports)
323 - Conferences (for proceedings, presentations) *)
324module Entities : sig
325 type t
326
327 val empty : t
328
329 val make
330 : ?editors:Cff_author.t list
331 -> ?editors_series:Cff_author.t list
332 -> ?translators:Cff_author.t list
333 -> ?recipients:Cff_author.t list
334 -> ?senders:Cff_author.t list
335 -> ?contact:Cff_author.t list
336 -> ?publisher:Cff_author.Entity.t
337 -> ?institution:Cff_author.Entity.t
338 -> ?conference:Cff_author.Entity.t
339 -> ?database_provider:Cff_author.Entity.t
340 -> ?location:Cff_author.Entity.t
341 -> unit
342 -> t
343
344 (** Editors of the work (for edited volumes). *)
345 val editors : t -> Cff_author.t list option
346
347 (** Series editors (for book series). *)
348 val editors_series : t -> Cff_author.t list option
349
350 (** Translators of the work. *)
351 val translators : t -> Cff_author.t list option
352
353 (** Recipients (for personal communications). *)
354 val recipients : t -> Cff_author.t list option
355
356 (** Senders (for personal communications). *)
357 val senders : t -> Cff_author.t list option
358
359 (** Contact persons for the work. *)
360 val contact : t -> Cff_author.t list option
361
362 (** Publishing organization. *)
363 val publisher : t -> Cff_author.Entity.t option
364
365 (** Academic/research institution (for theses, reports). *)
366 val institution : t -> Cff_author.Entity.t option
367
368 (** Conference where the work was presented. *)
369 val conference : t -> Cff_author.Entity.t option
370
371 (** Provider of a database (for data references). *)
372 val database_provider : t -> Cff_author.Entity.t option
373
374 (** Location entity (city, venue for conferences). *)
375 val location : t -> Cff_author.Entity.t option
376
377 val is_empty : t -> bool
378end
379
380(** Descriptive metadata: keywords, license, notes.
381
382 Additional information about the work for discovery and rights. *)
383module Metadata : sig
384 type t
385
386 val empty : t
387
388 val make
389 : ?keywords:string list
390 -> ?languages:string list
391 -> ?license:Cff_license.t
392 -> ?copyright:string
393 -> ?scope:string
394 -> ?notes:string
395 -> unit
396 -> t
397
398 (** Descriptive keywords for the work. *)
399 val keywords : t -> string list option
400
401 (** Languages the work is available in (ISO 639 codes). *)
402 val languages : t -> string list option
403
404 (** SPDX license identifier(s), or unknown license with optional URL. *)
405 val license : t -> Cff_license.t option
406
407 (** Copyright statement. *)
408 val copyright : t -> string option
409
410 (** Scope of the reference (what aspect it covers). *)
411 val scope : t -> string option
412
413 (** Additional notes or comments. *)
414 val notes : t -> string option
415
416 val is_empty : t -> bool
417end
418
419(** Technical and domain-specific fields.
420
421 Fields for software, data, and specialized reference types:
422 - Software: commit hash, version, filename
423 - Theses: thesis type, department
424 - Data: data type, database, format
425 - Patents: patent states
426 - Dictionaries/encyclopedias: term, entry *)
427module Technical : sig
428 type t
429
430 val empty : t
431
432 val make
433 : ?commit:string
434 -> ?version:string
435 -> ?filename:string
436 -> ?format:string
437 -> ?medium:string
438 -> ?data_type:string
439 -> ?database:string
440 -> ?number:string
441 -> ?patent_states:string list
442 -> ?thesis_type:string
443 -> ?term:string
444 -> ?entry:string
445 -> ?department:string
446 -> ?loc_start:string
447 -> ?loc_end:string
448 -> unit
449 -> t
450
451 (** Git commit hash or VCS revision. *)
452 val commit : t -> string option
453
454 (** Version string of the software/data. *)
455 val version : t -> string option
456
457 (** Name of the file being referenced. *)
458 val filename : t -> string option
459
460 (** Format of the work (e.g., ["PDF"], ["HTML"]). *)
461 val format : t -> string option
462
463 (** Physical medium (e.g., ["CD-ROM"], ["print"]). *)
464 val medium : t -> string option
465
466 (** Type of data (for datasets). *)
467 val data_type : t -> string option
468
469 (** Name of the database. *)
470 val database : t -> string option
471
472 (** Report/patent/standard number. *)
473 val number : t -> string option
474
475 (** Countries where a patent is held. *)
476 val patent_states : t -> string list option
477
478 (** Type of thesis (["PhD"], ["Master's"], etc.). *)
479 val thesis_type : t -> string option
480
481 (** Dictionary/encyclopedia term being referenced. *)
482 val term : t -> string option
483
484 (** Encyclopedia entry name. *)
485 val entry : t -> string option
486
487 (** Academic department (for theses). *)
488 val department : t -> string option
489
490 (** Starting line/location in source code. *)
491 val loc_start : t -> string option
492
493 (** Ending line/location in source code. *)
494 val loc_end : t -> string option
495
496 val is_empty : t -> bool
497end
498
499(** {1 Reference Type} *)
500
501(** The complete reference type combining all sub-records. *)
502type t
503
504(** Construct a reference from sub-records.
505
506 Only [core] is required; other sub-records default to empty. *)
507val make
508 : core:Core.t
509 -> ?publication:Publication.t
510 -> ?collection:Collection.t
511 -> ?dates:Dates.t
512 -> ?identifiers:Identifiers.t
513 -> ?entities:Entities.t
514 -> ?metadata:Metadata.t
515 -> ?technical:Technical.t
516 -> unit
517 -> t
518
519(** Convenience constructor for simple references.
520
521 Creates a reference with just the most common fields. Suitable
522 for quick article or software references. *)
523val make_simple
524 : type_:Cff_enums.Reference_type.t
525 -> title:string
526 -> authors:Cff_author.t list
527 -> ?doi:string
528 -> ?year:int
529 -> ?journal:string
530 -> unit
531 -> t
532
533(** {2 Sub-record Accessors} *)
534
535(** The core identity fields. *)
536val core : t -> Core.t
537
538(** Publication metadata (journal, volume, pages). *)
539val publication : t -> Publication.t
540
541(** Collection metadata (proceedings, book series). *)
542val collection : t -> Collection.t
543
544(** Date-related fields. *)
545val dates : t -> Dates.t
546
547(** Identifiers and links. *)
548val identifiers : t -> Identifiers.t
549
550(** Related entities (editors, publisher). *)
551val entities : t -> Entities.t
552
553(** Descriptive metadata (keywords, license). *)
554val metadata : t -> Metadata.t
555
556(** Technical fields (commit, version, format). *)
557val technical : t -> Technical.t
558
559(** {2 Direct Accessors for Common Fields}
560
561 Convenience accessors that delegate to sub-records. *)
562
563(** Shortcut for [Core.type_ (core t)]. *)
564val type_ : t -> Cff_enums.Reference_type.t
565
566(** Shortcut for [Core.title (core t)]. *)
567val title : t -> string
568
569(** Shortcut for [Core.authors (core t)]. *)
570val authors : t -> Cff_author.t list
571
572(** Shortcut for [Identifiers.doi (identifiers t)]. *)
573val doi : t -> string option
574
575(** Shortcut for [Dates.year (dates t)]. *)
576val year : t -> int option
577
578(** {1 Formatting and Codec} *)
579
580(** Pretty-print a reference in a human-readable format. *)
581val pp : Format.formatter -> t -> unit
582
583(** JSON/YAML codec for serialization. *)
584val jsont : t Jsont.t