My personal data management layer
at main 235 lines 7.5 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** External link tracking for Bushel *) 7 8type karakeep_data = { 9 remote_url : string; 10 id : string; 11 tags : string list; 12 metadata : (string * string) list; 13} 14 15type bushel_data = { 16 slugs : string list; 17 tags : string list; 18} 19 20type t = { 21 url : string; 22 date : Ptime.date; 23 description : string; 24 karakeep : karakeep_data option; 25 bushel : bushel_data option; 26} 27 28type ts = t list 29 30(** {1 Accessors} *) 31 32let url { url; _ } = url 33let date { date; _ } = date 34let description { description; _ } = description 35let datetime v = Bushel_types.ptime_of_date_exn (date v) 36 37(** {1 Comparison} *) 38 39let compare a b = Ptime.compare (datetime b) (datetime a) 40 41(** {1 YAML Parsing} *) 42 43let t_of_yaml = function 44 | `O fields -> 45 let url = 46 match List.assoc_opt "url" fields with 47 | Some (`String v) -> v 48 | _ -> failwith "link: missing or invalid url" 49 in 50 let date = 51 match List.assoc_opt "date" fields with 52 | Some (`String v) -> 53 (try 54 match String.split_on_char '-' v with 55 | [y; m; d] -> (int_of_string y, int_of_string m, int_of_string d) 56 | _ -> 57 v |> Ptime.of_rfc3339 |> Result.get_ok |> fun (a, _, _) -> Ptime.to_date a 58 with _ -> 59 v |> Ptime.of_rfc3339 |> Result.get_ok |> fun (a, _, _) -> Ptime.to_date a) 60 | _ -> failwith "link: missing or invalid date" 61 in 62 let description = 63 match List.assoc_opt "description" fields with 64 | Some (`String v) -> v 65 | _ -> "" 66 in 67 let karakeep = 68 match List.assoc_opt "karakeep" fields with 69 | Some (`O k_fields) -> 70 let remote_url = 71 match List.assoc_opt "remote_url" k_fields with 72 | Some (`String v) -> v 73 | _ -> failwith "link: invalid karakeep.remote_url" 74 in 75 let id = 76 match List.assoc_opt "id" k_fields with 77 | Some (`String v) -> v 78 | _ -> failwith "link: invalid karakeep.id" 79 in 80 let tags = 81 match List.assoc_opt "tags" k_fields with 82 | Some (`A tag_list) -> 83 List.filter_map (function `String t -> Some t | _ -> None) tag_list 84 | _ -> [] 85 in 86 let metadata = 87 match List.assoc_opt "metadata" k_fields with 88 | Some (`O meta_fields) -> 89 List.filter_map (fun (k, v) -> 90 match v with `String value -> Some (k, value) | _ -> None 91 ) meta_fields 92 | _ -> [] 93 in 94 Some { remote_url; id; tags; metadata } 95 | _ -> None 96 in 97 let bushel = 98 match List.assoc_opt "bushel" fields with 99 | Some (`O b_fields) -> 100 let slugs = 101 match List.assoc_opt "slugs" b_fields with 102 | Some (`A slug_list) -> 103 List.filter_map (function `String s -> Some s | _ -> None) slug_list 104 | _ -> [] 105 in 106 let tags = 107 match List.assoc_opt "tags" b_fields with 108 | Some (`A tag_list) -> 109 List.filter_map (function `String t -> Some t | _ -> None) tag_list 110 | _ -> [] 111 in 112 Some { slugs; tags } 113 | _ -> None 114 in 115 { url; date; description; karakeep; bushel } 116 | _ -> failwith "link: invalid yaml" 117 118(** {1 YAML Serialization} *) 119 120let to_yaml t = 121 let (year, month, day) = t.date in 122 let date_str = Printf.sprintf "%04d-%02d-%02d" year month day in 123 124 let base_fields = [ 125 ("url", `String t.url); 126 ("date", `String date_str); 127 ] @ 128 (if t.description = "" then [] else [("description", `String t.description)]) 129 in 130 131 let karakeep_fields = 132 match t.karakeep with 133 | Some { remote_url; id; tags; metadata } -> 134 let karakeep_obj = [ 135 ("remote_url", `String remote_url); 136 ("id", `String id); 137 ] in 138 let karakeep_obj = 139 if tags = [] then karakeep_obj 140 else ("tags", `A (List.map (fun t -> `String t) tags)) :: karakeep_obj 141 in 142 let karakeep_obj = 143 if metadata = [] then karakeep_obj 144 else ("metadata", `O (List.map (fun (k, v) -> (k, `String v)) metadata)) :: karakeep_obj 145 in 146 [("karakeep", `O karakeep_obj)] 147 | None -> [] 148 in 149 150 let bushel_fields = 151 match t.bushel with 152 | Some { slugs; tags } -> 153 let bushel_obj = [] in 154 let bushel_obj = 155 if slugs = [] then bushel_obj 156 else ("slugs", `A (List.map (fun s -> `String s) slugs)) :: bushel_obj 157 in 158 let bushel_obj = 159 if tags = [] then bushel_obj 160 else ("tags", `A (List.map (fun t -> `String t) tags)) :: bushel_obj 161 in 162 if bushel_obj = [] then [] else [("bushel", `O bushel_obj)] 163 | None -> [] 164 in 165 166 `O (base_fields @ karakeep_fields @ bushel_fields) 167 168(** {1 File Operations} *) 169 170let load_links_file path = 171 try 172 let yaml_str = In_channel.(with_open_bin path input_all) in 173 match Yamlrw.of_string yaml_str with 174 | `A links -> List.map t_of_yaml links 175 | _ -> [] 176 with _ -> [] 177 178let save_links_file path links = 179 let yaml = `A (List.map to_yaml links) in 180 let yaml_str = Yamlrw.to_string yaml in 181 let oc = open_out path in 182 output_string oc yaml_str; 183 close_out oc 184 185(** {1 Merging} *) 186 187let merge_links ?(prefer_new_date=false) existing new_links = 188 let links_by_url = Hashtbl.create (List.length existing) in 189 190 List.iter (fun link -> Hashtbl.replace links_by_url link.url link) existing; 191 192 List.iter (fun new_link -> 193 match Hashtbl.find_opt links_by_url new_link.url with 194 | None -> 195 Hashtbl.add links_by_url new_link.url new_link 196 | Some old_link -> 197 let description = 198 if new_link.description <> "" then new_link.description 199 else old_link.description 200 in 201 let karakeep = 202 match new_link.karakeep, old_link.karakeep with 203 | Some new_k, Some old_k when new_k.remote_url = old_k.remote_url -> 204 let merged_metadata = 205 let meta_tbl = Hashtbl.create (List.length old_k.metadata) in 206 List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) old_k.metadata; 207 List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) new_k.metadata; 208 Hashtbl.fold (fun k v acc -> (k, v) :: acc) meta_tbl [] 209 in 210 let merged_tags = List.sort_uniq String.compare (old_k.tags @ new_k.tags) in 211 Some { new_k with metadata = merged_metadata; tags = merged_tags } 212 | Some new_k, _ -> Some new_k 213 | None, old_k -> old_k 214 in 215 let bushel = 216 match new_link.bushel, old_link.bushel with 217 | Some new_b, Some old_b -> 218 let merged_slugs = List.sort_uniq String.compare (old_b.slugs @ new_b.slugs) in 219 let merged_tags = List.sort_uniq String.compare (old_b.tags @ new_b.tags) in 220 Some { slugs = merged_slugs; tags = merged_tags } 221 | Some new_b, _ -> Some new_b 222 | None, old_b -> old_b 223 in 224 let date = 225 if prefer_new_date then new_link.date 226 else if compare new_link old_link > 0 then new_link.date 227 else old_link.date 228 in 229 let merged_link = { url = new_link.url; date; description; karakeep; bushel } in 230 Hashtbl.replace links_by_url new_link.url merged_link 231 ) new_links; 232 233 Hashtbl.to_seq_values links_by_url 234 |> List.of_seq 235 |> List.sort compare