My personal data management layer
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** DOI entries resolved from external sources via Zotero Translation Server *)
7
8type status =
9 | Resolved
10 | Failed of string
11
12type t = {
13 doi : string;
14 title : string;
15 authors : string list;
16 year : int;
17 bibtype : string;
18 publisher : string;
19 resolved_at : string;
20 source_urls : string list;
21 status : status;
22 ignore : bool;
23}
24
25type ts = t list
26
27let get_string key fields =
28 match List.assoc_opt key fields with
29 | Some (`String s) -> s
30 | _ -> ""
31
32let get_string_opt key fields =
33 match List.assoc_opt key fields with
34 | Some (`String s) -> Some s
35 | _ -> None
36
37let get_int key fields =
38 match List.assoc_opt key fields with
39 | Some (`Float f) -> int_of_float f
40 | _ -> 0
41
42let get_bool key fields =
43 match List.assoc_opt key fields with
44 | Some (`Bool b) -> b
45 | _ -> false
46
47let get_strings key fields =
48 match List.assoc_opt key fields with
49 | Some (`A items) ->
50 List.filter_map (function `String s -> Some s | _ -> None) items
51 | _ -> []
52
53let of_yaml_value = function
54 | `O fields ->
55 let doi = get_string "doi" fields in
56 let resolved_at = get_string "resolved_at" fields in
57 let source_urls =
58 match get_strings "source_urls" fields with
59 | [] ->
60 (match get_string_opt "source_url" fields with
61 | Some u -> [u]
62 | None -> [])
63 | urls -> urls
64 in
65 let ignore = get_bool "ignore" fields in
66 let error = get_string_opt "error" fields in
67 (match error with
68 | Some err ->
69 Some { doi; title = ""; authors = []; year = 0; bibtype = "";
70 publisher = ""; resolved_at; source_urls;
71 status = Failed err; ignore }
72 | None ->
73 let title = get_string "title" fields in
74 let authors = get_strings "authors" fields in
75 let year = get_int "year" fields in
76 let bibtype = get_string "bibtype" fields in
77 let publisher = get_string "publisher" fields in
78 Some { doi; title; authors; year; bibtype; publisher;
79 resolved_at; source_urls; status = Resolved; ignore })
80 | _ -> None
81
82(** Load DOI entries from a YAML string *)
83let of_yaml_string str =
84 try
85 match Yamlrw.of_string str with
86 | `A entries -> List.filter_map of_yaml_value entries
87 | _ -> []
88 with Yamlrw.Yamlrw_error _ -> []
89
90(** Find entry by DOI (excludes ignored entries) *)
91let find_by_doi entries doi =
92 List.find_opt (fun entry -> not entry.ignore && entry.doi = doi) entries
93
94(** Find entry by source URL (excludes ignored entries) *)
95let find_by_url entries url =
96 List.find_opt (fun entry ->
97 not entry.ignore && List.mem url entry.source_urls
98 ) entries