(*--------------------------------------------------------------------------- Copyright (c) 2026 The ocaml-cff programmers. All rights reserved. SPDX-License-Identifier: ISC ---------------------------------------------------------------------------*) (** Citation File Format (CFF) codec for OCaml. This library provides codecs for the {{:https://citation-file-format.github.io/}Citation File Format (CFF)} version 1.2.0, a human- and machine-readable format for software and dataset citation metadata. CFF files are named [CITATION.cff] written in the {{:https://yaml.org/}YAML 1.2} format. They provide citation metadata for software and datasets, enabling proper academic credit for research software. {1 Overview} A minimal [CITATION.cff] file requires four fields: - [cff-version]: The CFF schema version (currently ["1.2.0"]) - [message]: Instructions for citing the work - [title]: The name of the software or dataset - [authors]: A list of persons and/or entities {2 Creating a CFF record} {[ let author = Cff.Author.person ~family_names:"Smith" ~given_names:"Jane" () in let cff = Cff.make ~title:"My Research Software" ~authors:[author] ~version:"1.0.0" ~doi:"10.12345/zenodo.1234567" () ]} {2 I/O} For file operations, use the backend-specific subpackages: - [cff.unix] - Unix file I/O using {!In_channel}/{!Out_channel} - [cff.eio] - Eio-based I/O using [Bytesrw_eio] to serialise to flows Example with [cff.unix]: {[ match Cff_unix.of_file "CITATION.cff" with | Ok cff -> Printf.printf "Title: %s\n%!" (Cff.title cff) | Error msg -> Printf.eprintf "Error: %s\n%!" msg ]} {1 CFF Specification} This implementation follows the {{:https://github.com/citation-file-format/citation-file-format}CFF 1.2.0 specification}. Useful modules include: - {!module:Author}: Can be persons (with family/given names) or entities (organizations, identified by a [name] field) - {!module:Reference}: Bibliography entries that the work cites or depends on - {!module:Identifier}: Typed identifiers including DOIs, URLs, and Software Heritage IDs (SWH) - {!module:License}: SPDX license identifiers where multiple licenses imply "OR" {1 Core Types} *) (** The main [t] type represents a complete [CITATION.cff] file with all required and optional fields from the CFF 1.2.0 specification. Every valid CFF file must include: - {!cff_version}: Schema version (defaults to ["1.2.0"]) - {!message}: Instructions for citing the work (has sensible default) - {!title}: Name of the software or dataset - {!authors}: List of persons and/or entities Optional fields are: - {!version}: Software version string - {!doi}: Digital Object Identifier - {!date_released}: Publication/release date - {!license}: SPDX license identifier(s) - {!keywords}: Descriptive keywords - {!abstract}: Description of the work The {!preferred_citation} field allows redirecting citations to a related work (e.g., a journal article describing the software). The {!section-references} field lists works that the software cites or depends upon. *) (** The abstract type representing a complete CFF document. *) type t (** Date representation as [(year, month, day)] tuple. CFF uses ISO 8601 dates in [YYYY-MM-DD] format (e.g., ["2024-01-15"]). *) module Date = Cff_date (** ISO 3166-1 alpha-2 country codes (e.g., ["US"], ["DE"], ["GB"]). Used for author and entity addresses. *) module Country = Cff_country (** Physical address information. Address fields used for persons and entities: street address, city, region (state/province), postal code, and country code. *) module Address = Cff_address.Address (** Contact information. Contact fields used for persons and entities: email, telephone, fax, website URL, and ORCID identifier. *) module Contact = Cff_address.Contact (** SPDX license identifiers. CFF uses {{:https://spdx.org/licenses/}SPDX license identifiers} for the [license] field. Multiple licenses indicate an OR relationship (the user may choose any of the listed licenses). *) module License = Cff_license (** CFF file type: [`Software] (default) or [`Dataset]. *) module Cff_type = Cff_enums.Cff_type (** {1 Authors and Entities} *) (** Authors as a discriminated union of {!Person} or {!Entity}. CFF distinguishes between: - {b Persons}: Individual humans with family names, given names, etc. - {b Entities}: Organizations, projects, or groups with a [name] field When parsing, the presence of a [name] field indicates an entity; otherwise, the entry is treated as a person. *) module Author = Cff_author (** A person (individual author or contributor). *) module Person = Cff_author.Person (** An entity (organization, institution, project, conference). *) module Entity = Cff_author.Entity (** {1 Identifiers and References} *) (** Typed identifiers for DOI, URL, SWH, or other schemes. Each identifier has a type, value, and optional description. Example: {[ let id = Cff.Identifier.make ~type_:`Doi ~value:"10.5281/zenodo.1234567" ~description:"The concept DOI for all versions" () ]} *) module Identifier = Cff_identifier (** Bibliographic references with comprehensive metadata. References can represent any citable work: articles, books, software, datasets, conference papers, theses, etc. The {!Reference} module provides 60+ fields organized into logical sub-records: - {!Reference.Core} - Type, title, authors, abstract - {!Reference.Publication} - Journal, volume, issue, pages - {!Reference.Collection} - Proceedings, book series - {!Reference.Dates} - Various date fields and year - {!Reference.Identifiers} - DOI, URL, ISBN, ISSN, etc. - {!Reference.Entities} - Editors, publisher, institution - {!Reference.Metadata} - Keywords, license, notes - {!Reference.Technical} - Commit, version, format *) module Reference = Cff_reference (** {1 Construction} *) (** The default CFF version used when not specified: ["1.2.0"]. *) val default_cff_version : string (** The default citation message: ["If you use this software, please cite it using the metadata from this file."] *) val default_message : string (** [make ~title ~authors ...] constructs a CFF value. @param cff_version The CFF schema version (default: {!default_cff_version}) @param message Instructions for users on how to cite (default: {!default_message}) @param title The name of the software or dataset @param authors List of persons and/or entities who created the work *) val make : ?cff_version:string -> ?message:string -> title:string -> authors:Author.t list -> ?abstract:string -> ?commit:string -> ?contact:Author.t list -> ?date_released:Date.t -> ?doi:string -> ?identifiers:Identifier.t list -> ?keywords:string list -> ?license:License.t -> ?preferred_citation:Reference.t -> ?references:Reference.t list -> ?repository:string -> ?repository_artifact:string -> ?repository_code:string -> ?type_:Cff_type.t -> ?url:string -> ?version:string -> unit -> t (** {2 Required Fields} *) (** The CFF schema version that this file adheres to. For CFF 1.2.0 files, this should be ["1.2.0"]. The version determines which keys are valid and how they should be interpreted. *) val cff_version : t -> string (** A message to readers explaining how to cite the work. Common examples: - ["If you use this software, please cite it using the metadata from this file."] - ["Please cite this software using the metadata from 'preferred-citation'."] The message should guide users toward the preferred citation method. *) val message : t -> string (** The name of the software or dataset. This is the title that should appear in citations. For software, it's typically the project name; for datasets, the dataset title. *) val title : t -> string (** The creators of the software or dataset. Authors can be persons (individuals) or entities (organizations). At least one author is required for a valid CFF file. The order typically reflects contribution significance. *) val authors : t -> Author.t list (** {2 Optional Fields} *) (** A description of the software or dataset. Provides context about what the work does, its purpose, and scope. *) val abstract : t -> string option (** The commit hash or revision number of the software version. Useful for precise version identification beyond semantic versioning. Example: ["1ff847d81f29c45a3a1a5ce73d38e45c2f319bba"] *) val commit : t -> string option (** Contact persons or entities for the software or dataset. May differ from authors; useful when the primary contact is a project maintainer rather than the original author. *) val contact : t -> Author.t list option (** The date when the software or dataset was released. Format is [(year, month, day)], corresponding to ISO 8601 [YYYY-MM-DD]. *) val date_released : t -> Date.t option (** The Digital Object Identifier for the software or dataset. DOIs provide persistent, citable identifiers. This is a shorthand for a single DOI; use {!identifiers} for multiple DOIs or other identifier types. Example: ["10.5281/zenodo.1234567"] *) val doi : t -> string option (** Additional identifiers beyond the primary DOI. Each identifier has a type (DOI, URL, SWH, other), value, and optional description. Useful for versioned DOIs, Software Heritage identifiers, or repository URLs. *) val identifiers : t -> Identifier.t list option (** Descriptive keywords for the work. Help with discoverability and categorization. Example: [["machine learning"; "image processing"; "python"]] *) val keywords : t -> string list option (** The SPDX license identifier(s) for the work. Uses {{:https://spdx.org/licenses/}SPDX identifiers}. Multiple licenses imply an OR relationship (user may choose any). Example: ["MIT"], ["Apache-2.0"], or [["GPL-3.0-only"; "MIT"]]. *) val license : t -> License.t option (** A reference to cite instead of the software itself. Used for "credit redirection" when authors prefer citation of a related publication (e.g., a methods paper) over the software. Note: Software citation principles recommend citing software directly; use this field judiciously. *) val preferred_citation : t -> Reference.t option (** Works that this software cites or depends upon. Functions like a bibliography, listing dependencies, foundational works, or related publications. Each reference includes full bibliographic metadata. *) val references : t -> Reference.t list option (** URL to the repository where the software is developed. Typically a version control system URL. For source code repositories, prefer {!repository_code}. *) val repository : t -> string option (** URL to the built/compiled artifact repository. For binary distributions, package registries (npm, PyPI, CRAN), or container registries. *) val repository_artifact : t -> string option (** URL to the source code repository. Typically a GitHub, GitLab, or similar URL where the source code is publicly accessible. *) val repository_code : t -> string option (** The type of work: [`Software] (default) or [`Dataset]. Most CFF files describe software; use [`Dataset] for data packages. *) val type_ : t -> Cff_type.t option (** The URL of the software or dataset homepage. A general landing page, documentation site, or project website. *) val url : t -> string option (** The version string of the software or dataset. Can be any version format: semantic versioning (["1.2.3"]), date-based (["2024.01"]), or other schemes. *) val version : t -> string option (** {1 Formatting and Codec} *) (** Pretty-print a CFF value in a human-readable YAML-like format. *) val pp : Format.formatter -> t -> unit (** JSON/YAML codec for serialization and deserialization. Used internally by the YAML codec functions. *) val jsont : t Jsont.t