OCaml codecs for the Citation File Format (CFF)
at main 584 lines 16 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2026 The ocaml-cff programmers. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** Bibliographic reference type for CFF. 7 8 References represent citable works in the [references] and 9 [preferred-citation] fields of a CFF file. They can describe any 10 type of scholarly output: journal articles, books, conference papers, 11 software, datasets, theses, patents, and many more. 12 13 {1 Structure} 14 15 CFF references have 60+ possible fields. This module organizes them 16 into logical sub-records for easier manipulation: 17 18 - {!Core} - Required fields: type, title, authors 19 - {!Publication} - Journal articles: journal, volume, issue, pages 20 - {!Collection} - Book chapters, proceedings: collection title, DOI 21 - {!Dates} - When the work was published, accessed, etc. 22 - {!Identifiers} - DOI, URL, ISBN, ISSN, repository links 23 - {!Entities} - Editors, publisher, institution, conference 24 - {!Metadata} - Keywords, license, languages, copyright 25 - {!Technical} - Software-specific: commit, version, format 26 27 {1 Reference Types} 28 29 The [type] field determines what kind of work is being referenced. 30 CFF 1.2.0 supports 40+ types including: 31 32 - Academic: [`Article], [`Book], [`Conference_paper], [`Thesis] 33 - Software: [`Software], [`Software_code], [`Software_container] 34 - Data: [`Data], [`Database], [`Dataset] 35 - Legal: [`Patent], [`Legal_case], [`Statute] 36 - Media: [`Video], [`Sound_recording], [`Film_broadcast] 37 38 {1 Example} 39 40 {[ 41 (* A journal article reference *) 42 let article = Cff_reference.make_simple 43 ~type_:`Article 44 ~title:"The Software Citation Principles" 45 ~authors:[ 46 Cff_author.Person (Cff_author.Person.make 47 ~family_names:"Smith" 48 ~given_names:"Arfon M." 49 ()); 50 ] 51 ~doi:"10.7717/peerj-cs.86" 52 ~year:2016 53 ~journal:"PeerJ Computer Science" 54 () 55 56 (* A software reference with more details *) 57 let core = Cff_reference.Core.make 58 ~type_:`Software 59 ~title:"NumPy" 60 ~authors:[...] 61 () in 62 let dates = Cff_reference.Dates.make ~year:2020 () in 63 let ids = Cff_reference.Identifiers.make 64 ~doi:"10.1038/s41586-020-2649-2" 65 ~url:"https://numpy.org" 66 () in 67 let software = Cff_reference.make ~core ~dates ~identifiers:ids () 68 ]} 69 70 {1 Sub-records} *) 71 72(** Core identity fields (required for all references). 73 74 Every reference must have a type, title, and at least one author. 75 The type determines what additional fields are relevant. *) 76module Core : sig 77 type t 78 79 (** Create a core record. 80 81 @param type_ The reference type (article, book, software, etc.) 82 @param title The title of the work 83 @param authors List of persons and/or entities *) 84 val make 85 : type_:Cff_enums.Reference_type.t 86 -> title:string 87 -> authors:Cff_author.t list 88 -> ?abstract:string 89 -> ?abbreviation:string 90 -> unit 91 -> t 92 93 (** The reference type. Determines which other fields are applicable. *) 94 val type_ : t -> Cff_enums.Reference_type.t 95 96 (** The title of the referenced work. *) 97 val title : t -> string 98 99 (** The authors/creators of the work. *) 100 val authors : t -> Cff_author.t list 101 102 (** A description or abstract of the work. *) 103 val abstract : t -> string option 104 105 (** Abbreviated form of the title (e.g., for journal names). *) 106 val abbreviation : t -> string option 107 108 val pp : Format.formatter -> t -> unit 109end 110 111(** Publication metadata for journal articles and periodicals. 112 113 Fields for works published in journals, magazines, or other 114 serial publications. Page numbers can be specified as a range 115 ([pages]) or as separate [start] and [end_] values. *) 116module Publication : sig 117 type t 118 119 (** Empty publication record with all fields as [None]. *) 120 val empty : t 121 122 val make 123 : ?journal:string 124 -> ?volume:string 125 -> ?issue:string 126 -> ?pages:string 127 -> ?start:string 128 -> ?end_:string 129 -> ?edition:string 130 -> ?section:string 131 -> ?status:Cff_enums.Status.t 132 -> unit 133 -> t 134 135 (** The name of the journal or magazine. *) 136 val journal : t -> string option 137 138 (** The volume number of the journal. *) 139 val volume : t -> string option 140 141 (** The issue number within the volume. *) 142 val issue : t -> string option 143 144 (** Page range (e.g., ["123-145"]). Alternative to [start]/[end_]. *) 145 val pages : t -> string option 146 147 (** Starting page number. *) 148 val start : t -> string option 149 150 (** Ending page number. *) 151 val end_ : t -> string option 152 153 (** The edition of the work (e.g., ["2nd edition"]). *) 154 val edition : t -> string option 155 156 (** The section of a work (e.g., newspaper section). *) 157 val section : t -> string option 158 159 (** Publication status: preprint, in-press, submitted, etc. *) 160 val status : t -> Cff_enums.Status.t option 161 162 (** [true] if all fields are [None]. *) 163 val is_empty : t -> bool 164end 165 166(** Collection metadata for works in edited volumes. 167 168 Used for book chapters, conference proceedings, and other works 169 that appear within a larger collection. *) 170module Collection : sig 171 type t 172 173 val empty : t 174 175 val make 176 : ?collection_title:string 177 -> ?collection_type:string 178 -> ?collection_doi:string 179 -> ?volume_title:string 180 -> ?number_volumes:string 181 -> unit 182 -> t 183 184 (** Title of the collection (proceedings, book series, etc.). *) 185 val collection_title : t -> string option 186 187 (** Type of collection (e.g., ["proceedings"], ["book series"]). *) 188 val collection_type : t -> string option 189 190 (** DOI of the collection itself (not the individual work). *) 191 val collection_doi : t -> string option 192 193 (** Title of the specific volume within a multi-volume collection. *) 194 val volume_title : t -> string option 195 196 (** Total number of volumes in the collection. *) 197 val number_volumes : t -> string option 198 199 val is_empty : t -> bool 200end 201 202(** Date-related fields. 203 204 CFF distinguishes between several date types: 205 - {b date-released}: When the software/dataset was released 206 - {b date-published}: When the work was formally published 207 - {b date-accessed}: When an online resource was last accessed 208 - {b date-downloaded}: When a resource was downloaded 209 210 For older works or when only the year is known, use [year] instead 211 of a full date. *) 212module Dates : sig 213 type t 214 215 val empty : t 216 217 val make 218 : ?date_accessed:Cff_date.t 219 -> ?date_downloaded:Cff_date.t 220 -> ?date_published:Cff_date.t 221 -> ?date_released:Cff_date.t 222 -> ?year:int 223 -> ?year_original:int 224 -> ?month:int 225 -> ?issue_date:string 226 -> unit 227 -> t 228 229 (** Date when an online resource was accessed for citation. *) 230 val date_accessed : t -> Cff_date.t option 231 232 (** Date when a resource was downloaded. *) 233 val date_downloaded : t -> Cff_date.t option 234 235 (** Formal publication date. *) 236 val date_published : t -> Cff_date.t option 237 238 (** Release date (typically for software). *) 239 val date_released : t -> Cff_date.t option 240 241 (** Publication year when full date is unknown. *) 242 val year : t -> int option 243 244 (** Year of original publication (for reprints, translations). *) 245 val year_original : t -> int option 246 247 (** Publication month (1-12) when only month/year is known. *) 248 val month : t -> int option 249 250 (** Issue date as a string (for periodicals with specific dates). *) 251 val issue_date : t -> string option 252 253 val is_empty : t -> bool 254end 255 256(** Identifiers and repository links. 257 258 Various identifier schemes for locating and citing works: 259 - DOI: Digital Object Identifier (preferred for academic works) 260 - URL: Web address 261 - ISBN: International Standard Book Number 262 - ISSN: International Standard Serial Number (journals) 263 - PMCID: PubMed Central ID 264 - NIHMSID: NIH Manuscript Submission ID *) 265module Identifiers : sig 266 type t 267 268 val empty : t 269 270 val make 271 : ?doi:string 272 -> ?url:string 273 -> ?repository:string 274 -> ?repository_code:string 275 -> ?repository_artifact:string 276 -> ?isbn:string 277 -> ?issn:string 278 -> ?pmcid:string 279 -> ?nihmsid:string 280 -> ?identifiers:Cff_identifier.t list 281 -> unit 282 -> t 283 284 (** Digital Object Identifier (e.g., ["10.1234/example"]). *) 285 val doi : t -> string option 286 287 (** URL where the work can be accessed. *) 288 val url : t -> string option 289 290 (** General repository URL. *) 291 val repository : t -> string option 292 293 (** Source code repository (GitHub, GitLab, etc.). *) 294 val repository_code : t -> string option 295 296 (** Built artifact repository (npm, PyPI, Docker Hub, etc.). *) 297 val repository_artifact : t -> string option 298 299 (** International Standard Book Number. *) 300 val isbn : t -> string option 301 302 (** International Standard Serial Number (for journals). *) 303 val issn : t -> string option 304 305 (** PubMed Central identifier. *) 306 val pmcid : t -> string option 307 308 (** NIH Manuscript Submission System identifier. *) 309 val nihmsid : t -> string option 310 311 (** Additional typed identifiers (DOI, URL, SWH, other). *) 312 val identifiers : t -> Cff_identifier.t list option 313 314 val is_empty : t -> bool 315end 316 317(** Related entities: editors, publishers, institutions. 318 319 Persons and organizations involved in the work beyond the authors: 320 - Editors of collections or journals 321 - Publishers and their locations 322 - Academic institutions (for theses, reports) 323 - Conferences (for proceedings, presentations) *) 324module Entities : sig 325 type t 326 327 val empty : t 328 329 val make 330 : ?editors:Cff_author.t list 331 -> ?editors_series:Cff_author.t list 332 -> ?translators:Cff_author.t list 333 -> ?recipients:Cff_author.t list 334 -> ?senders:Cff_author.t list 335 -> ?contact:Cff_author.t list 336 -> ?publisher:Cff_author.Entity.t 337 -> ?institution:Cff_author.Entity.t 338 -> ?conference:Cff_author.Entity.t 339 -> ?database_provider:Cff_author.Entity.t 340 -> ?location:Cff_author.Entity.t 341 -> unit 342 -> t 343 344 (** Editors of the work (for edited volumes). *) 345 val editors : t -> Cff_author.t list option 346 347 (** Series editors (for book series). *) 348 val editors_series : t -> Cff_author.t list option 349 350 (** Translators of the work. *) 351 val translators : t -> Cff_author.t list option 352 353 (** Recipients (for personal communications). *) 354 val recipients : t -> Cff_author.t list option 355 356 (** Senders (for personal communications). *) 357 val senders : t -> Cff_author.t list option 358 359 (** Contact persons for the work. *) 360 val contact : t -> Cff_author.t list option 361 362 (** Publishing organization. *) 363 val publisher : t -> Cff_author.Entity.t option 364 365 (** Academic/research institution (for theses, reports). *) 366 val institution : t -> Cff_author.Entity.t option 367 368 (** Conference where the work was presented. *) 369 val conference : t -> Cff_author.Entity.t option 370 371 (** Provider of a database (for data references). *) 372 val database_provider : t -> Cff_author.Entity.t option 373 374 (** Location entity (city, venue for conferences). *) 375 val location : t -> Cff_author.Entity.t option 376 377 val is_empty : t -> bool 378end 379 380(** Descriptive metadata: keywords, license, notes. 381 382 Additional information about the work for discovery and rights. *) 383module Metadata : sig 384 type t 385 386 val empty : t 387 388 val make 389 : ?keywords:string list 390 -> ?languages:string list 391 -> ?license:Cff_license.t 392 -> ?copyright:string 393 -> ?scope:string 394 -> ?notes:string 395 -> unit 396 -> t 397 398 (** Descriptive keywords for the work. *) 399 val keywords : t -> string list option 400 401 (** Languages the work is available in (ISO 639 codes). *) 402 val languages : t -> string list option 403 404 (** SPDX license identifier(s), or unknown license with optional URL. *) 405 val license : t -> Cff_license.t option 406 407 (** Copyright statement. *) 408 val copyright : t -> string option 409 410 (** Scope of the reference (what aspect it covers). *) 411 val scope : t -> string option 412 413 (** Additional notes or comments. *) 414 val notes : t -> string option 415 416 val is_empty : t -> bool 417end 418 419(** Technical and domain-specific fields. 420 421 Fields for software, data, and specialized reference types: 422 - Software: commit hash, version, filename 423 - Theses: thesis type, department 424 - Data: data type, database, format 425 - Patents: patent states 426 - Dictionaries/encyclopedias: term, entry *) 427module Technical : sig 428 type t 429 430 val empty : t 431 432 val make 433 : ?commit:string 434 -> ?version:string 435 -> ?filename:string 436 -> ?format:string 437 -> ?medium:string 438 -> ?data_type:string 439 -> ?database:string 440 -> ?number:string 441 -> ?patent_states:string list 442 -> ?thesis_type:string 443 -> ?term:string 444 -> ?entry:string 445 -> ?department:string 446 -> ?loc_start:string 447 -> ?loc_end:string 448 -> unit 449 -> t 450 451 (** Git commit hash or VCS revision. *) 452 val commit : t -> string option 453 454 (** Version string of the software/data. *) 455 val version : t -> string option 456 457 (** Name of the file being referenced. *) 458 val filename : t -> string option 459 460 (** Format of the work (e.g., ["PDF"], ["HTML"]). *) 461 val format : t -> string option 462 463 (** Physical medium (e.g., ["CD-ROM"], ["print"]). *) 464 val medium : t -> string option 465 466 (** Type of data (for datasets). *) 467 val data_type : t -> string option 468 469 (** Name of the database. *) 470 val database : t -> string option 471 472 (** Report/patent/standard number. *) 473 val number : t -> string option 474 475 (** Countries where a patent is held. *) 476 val patent_states : t -> string list option 477 478 (** Type of thesis (["PhD"], ["Master's"], etc.). *) 479 val thesis_type : t -> string option 480 481 (** Dictionary/encyclopedia term being referenced. *) 482 val term : t -> string option 483 484 (** Encyclopedia entry name. *) 485 val entry : t -> string option 486 487 (** Academic department (for theses). *) 488 val department : t -> string option 489 490 (** Starting line/location in source code. *) 491 val loc_start : t -> string option 492 493 (** Ending line/location in source code. *) 494 val loc_end : t -> string option 495 496 val is_empty : t -> bool 497end 498 499(** {1 Reference Type} *) 500 501(** The complete reference type combining all sub-records. *) 502type t 503 504(** Construct a reference from sub-records. 505 506 Only [core] is required; other sub-records default to empty. *) 507val make 508 : core:Core.t 509 -> ?publication:Publication.t 510 -> ?collection:Collection.t 511 -> ?dates:Dates.t 512 -> ?identifiers:Identifiers.t 513 -> ?entities:Entities.t 514 -> ?metadata:Metadata.t 515 -> ?technical:Technical.t 516 -> unit 517 -> t 518 519(** Convenience constructor for simple references. 520 521 Creates a reference with just the most common fields. Suitable 522 for quick article or software references. *) 523val make_simple 524 : type_:Cff_enums.Reference_type.t 525 -> title:string 526 -> authors:Cff_author.t list 527 -> ?doi:string 528 -> ?year:int 529 -> ?journal:string 530 -> unit 531 -> t 532 533(** {2 Sub-record Accessors} *) 534 535(** The core identity fields. *) 536val core : t -> Core.t 537 538(** Publication metadata (journal, volume, pages). *) 539val publication : t -> Publication.t 540 541(** Collection metadata (proceedings, book series). *) 542val collection : t -> Collection.t 543 544(** Date-related fields. *) 545val dates : t -> Dates.t 546 547(** Identifiers and links. *) 548val identifiers : t -> Identifiers.t 549 550(** Related entities (editors, publisher). *) 551val entities : t -> Entities.t 552 553(** Descriptive metadata (keywords, license). *) 554val metadata : t -> Metadata.t 555 556(** Technical fields (commit, version, format). *) 557val technical : t -> Technical.t 558 559(** {2 Direct Accessors for Common Fields} 560 561 Convenience accessors that delegate to sub-records. *) 562 563(** Shortcut for [Core.type_ (core t)]. *) 564val type_ : t -> Cff_enums.Reference_type.t 565 566(** Shortcut for [Core.title (core t)]. *) 567val title : t -> string 568 569(** Shortcut for [Core.authors (core t)]. *) 570val authors : t -> Cff_author.t list 571 572(** Shortcut for [Identifiers.doi (identifiers t)]. *) 573val doi : t -> string option 574 575(** Shortcut for [Dates.year (dates t)]. *) 576val year : t -> int option 577 578(** {1 Formatting and Codec} *) 579 580(** Pretty-print a reference in a human-readable format. *) 581val pp : Format.formatter -> t -> unit 582 583(** JSON/YAML codec for serialization. *) 584val jsont : t Jsont.t