My personal data management layer
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** External link tracking for Bushel *)
7
8type karakeep_data = {
9 remote_url : string;
10 id : string;
11 tags : string list;
12 metadata : (string * string) list;
13}
14
15type bushel_data = {
16 slugs : string list;
17 tags : string list;
18}
19
20type t = {
21 url : string;
22 date : Ptime.date;
23 description : string;
24 karakeep : karakeep_data option;
25 bushel : bushel_data option;
26}
27
28type ts = t list
29
30(** {1 Accessors} *)
31
32let url { url; _ } = url
33let date { date; _ } = date
34let description { description; _ } = description
35let datetime v = Bushel_types.ptime_of_date_exn (date v)
36
37(** {1 Comparison} *)
38
39let compare a b = Ptime.compare (datetime b) (datetime a)
40
41(** {1 YAML Parsing} *)
42
43let t_of_yaml = function
44 | `O fields ->
45 let url =
46 match List.assoc_opt "url" fields with
47 | Some (`String v) -> v
48 | _ -> failwith "link: missing or invalid url"
49 in
50 let date =
51 match List.assoc_opt "date" fields with
52 | Some (`String v) ->
53 (try
54 match String.split_on_char '-' v with
55 | [y; m; d] -> (int_of_string y, int_of_string m, int_of_string d)
56 | _ ->
57 v |> Ptime.of_rfc3339 |> Result.get_ok |> fun (a, _, _) -> Ptime.to_date a
58 with _ ->
59 v |> Ptime.of_rfc3339 |> Result.get_ok |> fun (a, _, _) -> Ptime.to_date a)
60 | _ -> failwith "link: missing or invalid date"
61 in
62 let description =
63 match List.assoc_opt "description" fields with
64 | Some (`String v) -> v
65 | _ -> ""
66 in
67 let karakeep =
68 match List.assoc_opt "karakeep" fields with
69 | Some (`O k_fields) ->
70 let remote_url =
71 match List.assoc_opt "remote_url" k_fields with
72 | Some (`String v) -> v
73 | _ -> failwith "link: invalid karakeep.remote_url"
74 in
75 let id =
76 match List.assoc_opt "id" k_fields with
77 | Some (`String v) -> v
78 | _ -> failwith "link: invalid karakeep.id"
79 in
80 let tags =
81 match List.assoc_opt "tags" k_fields with
82 | Some (`A tag_list) ->
83 List.filter_map (function `String t -> Some t | _ -> None) tag_list
84 | _ -> []
85 in
86 let metadata =
87 match List.assoc_opt "metadata" k_fields with
88 | Some (`O meta_fields) ->
89 List.filter_map (fun (k, v) ->
90 match v with `String value -> Some (k, value) | _ -> None
91 ) meta_fields
92 | _ -> []
93 in
94 Some { remote_url; id; tags; metadata }
95 | _ -> None
96 in
97 let bushel =
98 match List.assoc_opt "bushel" fields with
99 | Some (`O b_fields) ->
100 let slugs =
101 match List.assoc_opt "slugs" b_fields with
102 | Some (`A slug_list) ->
103 List.filter_map (function `String s -> Some s | _ -> None) slug_list
104 | _ -> []
105 in
106 let tags =
107 match List.assoc_opt "tags" b_fields with
108 | Some (`A tag_list) ->
109 List.filter_map (function `String t -> Some t | _ -> None) tag_list
110 | _ -> []
111 in
112 Some { slugs; tags }
113 | _ -> None
114 in
115 { url; date; description; karakeep; bushel }
116 | _ -> failwith "link: invalid yaml"
117
118(** {1 YAML Serialization} *)
119
120let to_yaml t =
121 let (year, month, day) = t.date in
122 let date_str = Printf.sprintf "%04d-%02d-%02d" year month day in
123
124 let base_fields = [
125 ("url", `String t.url);
126 ("date", `String date_str);
127 ] @
128 (if t.description = "" then [] else [("description", `String t.description)])
129 in
130
131 let karakeep_fields =
132 match t.karakeep with
133 | Some { remote_url; id; tags; metadata } ->
134 let karakeep_obj = [
135 ("remote_url", `String remote_url);
136 ("id", `String id);
137 ] in
138 let karakeep_obj =
139 if tags = [] then karakeep_obj
140 else ("tags", `A (List.map (fun t -> `String t) tags)) :: karakeep_obj
141 in
142 let karakeep_obj =
143 if metadata = [] then karakeep_obj
144 else ("metadata", `O (List.map (fun (k, v) -> (k, `String v)) metadata)) :: karakeep_obj
145 in
146 [("karakeep", `O karakeep_obj)]
147 | None -> []
148 in
149
150 let bushel_fields =
151 match t.bushel with
152 | Some { slugs; tags } ->
153 let bushel_obj = [] in
154 let bushel_obj =
155 if slugs = [] then bushel_obj
156 else ("slugs", `A (List.map (fun s -> `String s) slugs)) :: bushel_obj
157 in
158 let bushel_obj =
159 if tags = [] then bushel_obj
160 else ("tags", `A (List.map (fun t -> `String t) tags)) :: bushel_obj
161 in
162 if bushel_obj = [] then [] else [("bushel", `O bushel_obj)]
163 | None -> []
164 in
165
166 `O (base_fields @ karakeep_fields @ bushel_fields)
167
168(** {1 File Operations} *)
169
170let load_links_file path =
171 try
172 let yaml_str = In_channel.(with_open_bin path input_all) in
173 match Yamlrw.of_string yaml_str with
174 | `A links -> List.map t_of_yaml links
175 | _ -> []
176 with _ -> []
177
178let save_links_file path links =
179 let yaml = `A (List.map to_yaml links) in
180 let yaml_str = Yamlrw.to_string yaml in
181 let oc = open_out path in
182 output_string oc yaml_str;
183 close_out oc
184
185(** {1 Merging} *)
186
187let merge_links ?(prefer_new_date=false) existing new_links =
188 let links_by_url = Hashtbl.create (List.length existing) in
189
190 List.iter (fun link -> Hashtbl.replace links_by_url link.url link) existing;
191
192 List.iter (fun new_link ->
193 match Hashtbl.find_opt links_by_url new_link.url with
194 | None ->
195 Hashtbl.add links_by_url new_link.url new_link
196 | Some old_link ->
197 let description =
198 if new_link.description <> "" then new_link.description
199 else old_link.description
200 in
201 let karakeep =
202 match new_link.karakeep, old_link.karakeep with
203 | Some new_k, Some old_k when new_k.remote_url = old_k.remote_url ->
204 let merged_metadata =
205 let meta_tbl = Hashtbl.create (List.length old_k.metadata) in
206 List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) old_k.metadata;
207 List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) new_k.metadata;
208 Hashtbl.fold (fun k v acc -> (k, v) :: acc) meta_tbl []
209 in
210 let merged_tags = List.sort_uniq String.compare (old_k.tags @ new_k.tags) in
211 Some { new_k with metadata = merged_metadata; tags = merged_tags }
212 | Some new_k, _ -> Some new_k
213 | None, old_k -> old_k
214 in
215 let bushel =
216 match new_link.bushel, old_link.bushel with
217 | Some new_b, Some old_b ->
218 let merged_slugs = List.sort_uniq String.compare (old_b.slugs @ new_b.slugs) in
219 let merged_tags = List.sort_uniq String.compare (old_b.tags @ new_b.tags) in
220 Some { slugs = merged_slugs; tags = merged_tags }
221 | Some new_b, _ -> Some new_b
222 | None, old_b -> old_b
223 in
224 let date =
225 if prefer_new_date then new_link.date
226 else if compare new_link old_link > 0 then new_link.date
227 else old_link.date
228 in
229 let merged_link = { url = new_link.url; date; description; karakeep; bushel } in
230 Hashtbl.replace links_by_url new_link.url merged_link
231 ) new_links;
232
233 Hashtbl.to_seq_values links_by_url
234 |> List.of_seq
235 |> List.sort compare