My personal data management layer

wip import

+5699
+17
.gitignore
··· 1 + # OCaml build artifacts 2 + _build/ 3 + *.install 4 + *.merlin 5 + 6 + # Third-party sources (fetch locally with opam source) 7 + third_party/ 8 + 9 + # Editor and OS files 10 + .DS_Store 11 + *.swp 12 + *~ 13 + .vscode/ 14 + .idea/ 15 + 16 + # Opam local switch 17 + _opam/
+1
.ocamlformat
··· 1 + version=0.28.1
+53
.tangled/workflows/build.yml
··· 1 + when: 2 + - event: ["push", "pull_request"] 3 + branch: ["main"] 4 + 5 + engine: nixery 6 + 7 + dependencies: 8 + nixpkgs: 9 + - shell 10 + - stdenv 11 + - findutils 12 + - binutils 13 + - libunwind 14 + - ncurses 15 + - opam 16 + - git 17 + - gawk 18 + - gnupatch 19 + - gnum4 20 + - gnumake 21 + - gnutar 22 + - gnused 23 + - gnugrep 24 + - diffutils 25 + - gzip 26 + - bzip2 27 + - gcc 28 + - ocaml 29 + - pkg-config 30 + 31 + steps: 32 + - name: opam 33 + command: | 34 + opam init --disable-sandboxing -a -y 35 + - name: repo 36 + command: | 37 + opam repo add aoah https://tangled.org/anil.recoil.org/aoah-opam-repo.git 38 + - name: switch 39 + command: | 40 + opam install . --confirm-level=unsafe-yes --deps-only 41 + - name: build 42 + command: | 43 + opam exec -- dune build 44 + - name: switch-test 45 + command: | 46 + opam install . --confirm-level=unsafe-yes --deps-only --with-test 47 + - name: test 48 + command: | 49 + opam exec -- dune runtest --verbose 50 + - name: doc 51 + command: | 52 + opam install -y odoc 53 + opam exec -- dune build @doc
+15
LICENSE.md
··· 1 + ISC License 2 + 3 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org> 4 + 5 + Permission to use, copy, modify, and/or distribute this software for any 6 + purpose with or without fee is hereby granted, provided that the above 7 + copyright notice and this permission notice appear in all copies. 8 + 9 + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 10 + REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11 + AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 12 + INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13 + LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 14 + OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 + PERFORMANCE OF THIS SOFTWARE.
+65
README.md
··· 1 + # Bushel 2 + 3 + Personal knowledge base and research entry management for OCaml. 4 + 5 + Bushel is a library for managing structured research entries including notes, 6 + papers, projects, ideas, videos, and contacts. It provides typed access to 7 + markdown files with YAML frontmatter and supports link graphs, markdown 8 + processing with custom extensions, and search integration. 9 + 10 + ## Features 11 + 12 + - **Entry Types**: Papers, notes, projects, ideas, videos, and contacts 13 + - **Frontmatter Parsing**: YAML metadata extraction using `frontmatter` 14 + - **Markdown Extensions**: Custom `:slug`, `@handle`, and `##tag` link syntax 15 + - **Link Graph**: Bidirectional link tracking between entries 16 + - **Typesense Integration**: Full-text search indexing 17 + - **Eio-based I/O**: Async directory loading with Eio 18 + 19 + ## Subpackages 20 + 21 + - `bushel`: Core library with entry types and utilities 22 + - `bushel.eio`: Eio-based directory loading 23 + - `bushel.config`: XDG-compliant TOML configuration 24 + - `bushel.sync`: Sync pipeline for images and thumbnails 25 + - `bushel.typesense`: Typesense search schema definitions 26 + 27 + ## Installation 28 + 29 + ```bash 30 + opam install bushel 31 + ``` 32 + 33 + ## Usage 34 + 35 + ```ocaml 36 + (* Load entries using Eio *) 37 + Eio_main.run @@ fun env -> 38 + let fs = Eio.Stdenv.fs env in 39 + let entries = Bushel_loader.load fs "/path/to/data" in 40 + 41 + (* Look up entries by slug *) 42 + match Bushel.Entry.lookup entries "my-note" with 43 + | Some (`Note n) -> Printf.printf "Title: %s\n" (Bushel.Note.title n) 44 + | _ -> () 45 + 46 + (* Get backlinks *) 47 + let backlinks = Bushel.Link_graph.get_backlinks_for_slug "my-note" in 48 + List.iter print_endline backlinks 49 + ``` 50 + 51 + ## CLI 52 + 53 + The `bushel` binary provides commands for: 54 + 55 + - `bushel list` - List all entries 56 + - `bushel show <slug>` - Show entry details 57 + - `bushel stats` - Show knowledge base statistics 58 + - `bushel sync` - Sync images and thumbnails 59 + - `bushel paper <doi>` - Add paper from DOI 60 + - `bushel config` - Show configuration 61 + - `bushel init` - Initialize configuration 62 + 63 + ## License 64 + 65 + ISC License. See [LICENSE.md](LICENSE.md).
+15
bin/dune
··· 1 + (executable 2 + (name main) 3 + (public_name bushel) 4 + (libraries 5 + bushel 6 + bushel.eio 7 + bushel.config 8 + bushel.sync 9 + cmdliner 10 + eio_main 11 + logs 12 + logs.cli 13 + logs.fmt 14 + fmt.tty 15 + fmt.cli))
+588
bin/main.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Bushel CLI - knowledge base management tool *) 7 + 8 + open Cmdliner 9 + 10 + (** Simple table formatting *) 11 + module Table = struct 12 + type row = string list 13 + type t = { headers : string list; rows : row list } 14 + 15 + let make ~headers rows = { headers; rows } 16 + 17 + let column_widths t = 18 + let num_cols = List.length t.headers in 19 + let widths = Array.make num_cols 0 in 20 + (* Headers *) 21 + List.iteri (fun i h -> widths.(i) <- String.length h) t.headers; 22 + (* Rows *) 23 + List.iter (fun row -> 24 + List.iteri (fun i cell -> 25 + if i < num_cols then 26 + widths.(i) <- max widths.(i) (String.length cell) 27 + ) row 28 + ) t.rows; 29 + Array.to_list widths 30 + 31 + let pad s width = 32 + let len = String.length s in 33 + if len >= width then s 34 + else s ^ String.make (width - len) ' ' 35 + 36 + let print t = 37 + let widths = column_widths t in 38 + let print_row row = 39 + List.iter2 (fun cell width -> 40 + Printf.printf "%s " (pad cell width) 41 + ) row widths; 42 + print_newline () 43 + in 44 + (* Print header *) 45 + print_row t.headers; 46 + (* Print separator *) 47 + List.iter (fun w -> Printf.printf "%s " (String.make w '-')) widths; 48 + print_newline (); 49 + (* Print rows *) 50 + List.iter print_row t.rows 51 + end 52 + 53 + (** Truncate string to max length with ellipsis *) 54 + let truncate max_len s = 55 + if String.length s <= max_len then s 56 + else String.sub s 0 (max_len - 3) ^ "..." 57 + 58 + (** Format date tuple *) 59 + let format_date (year, month, day) = 60 + Printf.sprintf "%04d-%02d-%02d" year month day 61 + 62 + (** Entry type to string *) 63 + let type_string = function 64 + | `Paper _ -> "paper" 65 + | `Project _ -> "project" 66 + | `Idea _ -> "idea" 67 + | `Video _ -> "video" 68 + | `Note _ -> "note" 69 + 70 + (** {1 Common Options} *) 71 + 72 + let data_dir = 73 + let doc = "Path to the bushel data repository." in 74 + let env = Cmd.Env.info "BUSHEL_DATA" in 75 + Arg.(value & opt (some string) None & info ["d"; "data-dir"] ~env ~docv:"DIR" ~doc) 76 + 77 + let config_file = 78 + let doc = "Path to config file (default: ~/.config/bushel/config.toml)." in 79 + Arg.(value & opt (some string) None & info ["c"; "config"] ~docv:"FILE" ~doc) 80 + 81 + (** Setup logging *) 82 + let setup_log style_renderer level = 83 + Fmt_tty.setup_std_outputs ?style_renderer (); 84 + Logs.set_level level; 85 + Logs.set_reporter (Logs_fmt.reporter ()) 86 + 87 + let logging_t = 88 + Term.(const setup_log $ Fmt_cli.style_renderer () $ Logs_cli.level ()) 89 + 90 + (** Load config *) 91 + let load_config config_file = 92 + match config_file with 93 + | Some path -> Bushel_config.load_file path 94 + | None -> Bushel_config.load () 95 + 96 + (** Get data directory from config or CLI *) 97 + let get_data_dir config data_dir_opt = 98 + match data_dir_opt with 99 + | Some d -> d 100 + | None -> config.Bushel_config.data_dir 101 + 102 + (** Load entries using Eio *) 103 + let with_entries data_dir f = 104 + Eio_main.run @@ fun env -> 105 + let fs = Eio.Stdenv.fs env in 106 + let entries = Bushel_eio.Bushel_loader.load fs data_dir in 107 + f env entries 108 + 109 + (** {1 List Command} *) 110 + 111 + let list_cmd = 112 + let type_filter = 113 + let doc = "Filter by entry type (paper, project, idea, video, note)." in 114 + Arg.(value & opt (some string) None & info ["t"; "type"] ~docv:"TYPE" ~doc) 115 + in 116 + let limit = 117 + let doc = "Maximum number of entries to show." in 118 + Arg.(value & opt (some int) None & info ["n"; "limit"] ~docv:"N" ~doc) 119 + in 120 + let sort_by = 121 + let doc = "Sort by field (date, title, type). Default: date." in 122 + Arg.(value & opt string "date" & info ["s"; "sort"] ~docv:"FIELD" ~doc) 123 + in 124 + let run () config_file data_dir type_filter limit sort_by = 125 + match load_config config_file with 126 + | Error e -> Printf.eprintf "Config error: %s\n" e; 1 127 + | Ok config -> 128 + let data_dir = get_data_dir config data_dir in 129 + with_entries data_dir @@ fun _env entries -> 130 + let all = Bushel.Entry.all_entries entries in 131 + (* Filter by type *) 132 + let filtered = match type_filter with 133 + | None -> all 134 + | Some t -> 135 + List.filter (fun e -> 136 + String.lowercase_ascii (type_string e) = String.lowercase_ascii t 137 + ) all 138 + in 139 + (* Sort *) 140 + let sorted = match sort_by with 141 + | "title" -> 142 + List.sort (fun a b -> 143 + String.compare (Bushel.Entry.title a) (Bushel.Entry.title b) 144 + ) filtered 145 + | "type" -> 146 + List.sort (fun a b -> 147 + let cmp = String.compare (type_string a) (type_string b) in 148 + if cmp <> 0 then cmp 149 + else Bushel.Entry.compare a b 150 + ) filtered 151 + | _ -> (* date, default *) 152 + List.sort (fun a b -> Bushel.Entry.compare b a) filtered (* newest first *) 153 + in 154 + (* Limit *) 155 + let limited = match limit with 156 + | None -> sorted 157 + | Some n -> List.filteri (fun i _ -> i < n) sorted 158 + in 159 + (* Build table *) 160 + let rows = List.map (fun e -> 161 + [ type_string e 162 + ; Bushel.Entry.slug e 163 + ; truncate 50 (Bushel.Entry.title e) 164 + ; format_date (Bushel.Entry.date e) 165 + ] 166 + ) limited in 167 + let table = Table.make 168 + ~headers:["TYPE"; "SLUG"; "TITLE"; "DATE"] 169 + rows 170 + in 171 + Table.print table; 172 + Printf.printf "\nTotal: %d entries\n" (List.length limited); 173 + 0 174 + in 175 + let doc = "List all entries in the knowledge base." in 176 + let info = Cmd.info "list" ~doc in 177 + Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ type_filter $ limit $ sort_by) 178 + 179 + (** {1 Stats Command} *) 180 + 181 + let stats_cmd = 182 + let run () config_file data_dir = 183 + match load_config config_file with 184 + | Error e -> Printf.eprintf "Config error: %s\n" e; 1 185 + | Ok config -> 186 + let data_dir = get_data_dir config data_dir in 187 + with_entries data_dir @@ fun _env entries -> 188 + let papers = List.length (Bushel.Entry.papers entries) in 189 + let notes = List.length (Bushel.Entry.notes entries) in 190 + let projects = List.length (Bushel.Entry.projects entries) in 191 + let ideas = List.length (Bushel.Entry.ideas entries) in 192 + let videos = List.length (Bushel.Entry.videos entries) in 193 + let contacts = List.length (Bushel.Entry.contacts entries) in 194 + Printf.printf "Bushel Statistics\n"; 195 + Printf.printf "=================\n"; 196 + Printf.printf "Papers: %4d\n" papers; 197 + Printf.printf "Notes: %4d\n" notes; 198 + Printf.printf "Projects: %4d\n" projects; 199 + Printf.printf "Ideas: %4d\n" ideas; 200 + Printf.printf "Videos: %4d\n" videos; 201 + Printf.printf "Contacts: %4d\n" contacts; 202 + Printf.printf "-----------------\n"; 203 + Printf.printf "Total: %4d\n" (papers + notes + projects + ideas + videos); 204 + 0 205 + in 206 + let doc = "Show statistics about the knowledge base." in 207 + let info = Cmd.info "stats" ~doc in 208 + Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir) 209 + 210 + (** {1 Show Command} *) 211 + 212 + let show_cmd = 213 + let slug_arg = 214 + let doc = "The slug of the entry to show." in 215 + Arg.(required & pos 0 (some string) None & info [] ~docv:"SLUG" ~doc) 216 + in 217 + let run () config_file data_dir slug = 218 + match load_config config_file with 219 + | Error e -> Printf.eprintf "Config error: %s\n" e; 1 220 + | Ok config -> 221 + let data_dir = get_data_dir config data_dir in 222 + with_entries data_dir @@ fun _env entries -> 223 + match Bushel.Entry.lookup entries slug with 224 + | None -> 225 + Printf.eprintf "Entry not found: %s\n" slug; 226 + 1 227 + | Some entry -> 228 + Printf.printf "Type: %s\n" (type_string entry); 229 + Printf.printf "Slug: %s\n" (Bushel.Entry.slug entry); 230 + Printf.printf "Title: %s\n" (Bushel.Entry.title entry); 231 + Printf.printf "Date: %s\n" (format_date (Bushel.Entry.date entry)); 232 + Printf.printf "URL: %s\n" (Bushel.Entry.site_url entry); 233 + (match Bushel.Entry.synopsis entry with 234 + | Some s -> Printf.printf "Synopsis: %s\n" s 235 + | None -> ()); 236 + Printf.printf "\n--- Body ---\n%s\n" (Bushel.Entry.body entry); 237 + 0 238 + in 239 + let doc = "Show details of a specific entry." in 240 + let info = Cmd.info "show" ~doc in 241 + Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ slug_arg) 242 + 243 + (** {1 Sync Command} *) 244 + 245 + let sync_cmd = 246 + let remote = 247 + let doc = "Also upload to Typesense (remote sync)." in 248 + Arg.(value & flag & info ["remote"] ~doc) 249 + in 250 + let only = 251 + let doc = "Only run specific step (images, srcsetter, thumbs, faces, videos, typesense)." in 252 + Arg.(value & opt (some string) None & info ["only"] ~docv:"STEP" ~doc) 253 + in 254 + let run () config_file data_dir remote only = 255 + match load_config config_file with 256 + | Error e -> Printf.eprintf "Config error: %s\n" e; 1 257 + | Ok config -> 258 + let data_dir = get_data_dir config data_dir in 259 + (* Determine which steps to run *) 260 + let steps = match only with 261 + | Some step_name -> 262 + (match Bushel_sync.step_of_string step_name with 263 + | Some step -> [step] 264 + | None -> 265 + Printf.eprintf "Unknown step: %s\n" step_name; 266 + Printf.eprintf "Valid steps: images, srcsetter, thumbs, faces, videos, typesense\n"; 267 + exit 1) 268 + | None -> 269 + if remote then Bushel_sync.all_steps_with_remote 270 + else Bushel_sync.all_steps 271 + in 272 + 273 + Eio_main.run @@ fun env -> 274 + let fs = Eio.Stdenv.fs env in 275 + let entries = Bushel_eio.Bushel_loader.load fs data_dir in 276 + 277 + Printf.printf "Running sync pipeline...\n"; 278 + List.iter (fun step -> 279 + Printf.printf " - %s\n" (Bushel_sync.string_of_step step) 280 + ) steps; 281 + Printf.printf "\n"; 282 + 283 + let results = Bushel_sync.run ~env ~config ~steps ~entries in 284 + 285 + Printf.printf "\nResults:\n"; 286 + List.iter (fun r -> 287 + let status = if r.Bushel_sync.success then "OK" else "FAIL" in 288 + Printf.printf " [%s] %s: %s\n" 289 + status 290 + (Bushel_sync.string_of_step r.step) 291 + r.message 292 + ) results; 293 + 294 + let failures = List.filter (fun r -> not r.Bushel_sync.success) results in 295 + if failures = [] then 0 else 1 296 + in 297 + let doc = "Sync images, thumbnails, and optionally upload to Typesense." in 298 + let man = [ 299 + `S Manpage.s_description; 300 + `P "The sync command runs a pipeline to synchronize images and thumbnails:"; 301 + `P "1. $(b,images) - Rsync images from remote server"; 302 + `P "2. $(b,srcsetter) - Convert images to WebP srcset variants"; 303 + `P "3. $(b,thumbs) - Generate paper thumbnails from PDFs"; 304 + `P "4. $(b,faces) - Fetch contact face thumbnails from Immich"; 305 + `P "5. $(b,videos) - Fetch video thumbnails from PeerTube"; 306 + `P "6. $(b,typesense) - Upload to Typesense (with --remote)"; 307 + ] in 308 + let info = Cmd.info "sync" ~doc ~man in 309 + Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ remote $ only) 310 + 311 + (** {1 Paper Add Command} *) 312 + 313 + let paper_add_cmd = 314 + let doi_arg = 315 + let doc = "The DOI to resolve." in 316 + Arg.(required & pos 0 (some string) None & info [] ~docv:"DOI" ~doc) 317 + in 318 + let slug = 319 + let doc = "Slug for the paper (e.g., 2024-venue-name)." in 320 + Arg.(required & opt (some string) None & info ["slug"] ~docv:"SLUG" ~doc) 321 + in 322 + let version = 323 + let doc = "Paper version (e.g., v1, v2). Auto-increments if not specified." in 324 + Arg.(value & opt (some string) None & info ["ver"] ~docv:"VER" ~doc) 325 + in 326 + let run () config_file data_dir doi slug version = 327 + match load_config config_file with 328 + | Error e -> Printf.eprintf "Config error: %s\n" e; 1 329 + | Ok config -> 330 + let data_dir = get_data_dir config data_dir in 331 + 332 + Eio_main.run @@ fun env -> 333 + let fs = Eio.Stdenv.fs env in 334 + let proc_mgr = Eio.Stdenv.process_mgr env in 335 + let entries = Bushel_eio.Bushel_loader.load fs data_dir in 336 + 337 + (* Determine version *) 338 + let papers_dir = Filename.concat data_dir ("data/papers/" ^ slug) in 339 + let version = match version with 340 + | Some v -> v 341 + | None -> 342 + (* Auto-increment: find highest existing version *) 343 + if Sys.file_exists papers_dir then begin 344 + let files = Sys.readdir papers_dir |> Array.to_list in 345 + let versions = List.filter_map (fun f -> 346 + if Filename.check_suffix f ".md" then 347 + Some (Filename.chop_extension f) 348 + else None 349 + ) files in 350 + let max_ver = List.fold_left (fun acc v -> 351 + try 352 + let n = Scanf.sscanf v "v%d" Fun.id in 353 + max acc n 354 + with _ -> acc 355 + ) 0 versions in 356 + Printf.sprintf "v%d" (max_ver + 1) 357 + end else "v1" 358 + in 359 + 360 + Printf.printf "Resolving DOI: %s\n" doi; 361 + Printf.printf "Slug: %s, Version: %s\n" slug version; 362 + 363 + match Bushel_sync.Zotero.resolve ~proc_mgr 364 + ~server_url:config.zotero_translation_server 365 + ~slug doi with 366 + | Error e -> 367 + Printf.eprintf "Error resolving DOI: %s\n" e; 368 + 1 369 + | Ok metadata -> 370 + Printf.printf "Title: %s\n" metadata.title; 371 + Printf.printf "Authors: %s\n" (String.concat ", " metadata.authors); 372 + Printf.printf "Year: %d\n" metadata.year; 373 + 374 + (* Check for existing versions and merge *) 375 + let metadata = 376 + let existing_papers = Bushel.Entry.papers entries in 377 + match Bushel.Paper.lookup existing_papers slug with 378 + | Some existing -> 379 + Printf.printf "Merging with existing paper data...\n"; 380 + Bushel_sync.Zotero.merge_with_existing ~existing metadata 381 + | None -> metadata 382 + in 383 + 384 + (* Generate file content *) 385 + let content = Bushel_sync.Zotero.to_yaml_frontmatter ~slug ~ver:version metadata in 386 + 387 + (* Create directory if needed *) 388 + if not (Sys.file_exists papers_dir) then 389 + Unix.mkdir papers_dir 0o755; 390 + 391 + (* Write file *) 392 + let filepath = Filename.concat papers_dir (version ^ ".md") in 393 + let oc = open_out filepath in 394 + output_string oc content; 395 + close_out oc; 396 + 397 + Printf.printf "Created: %s\n" filepath; 398 + 0 399 + in 400 + let doc = "Add a paper from DOI, merging with existing versions." in 401 + let man = [ 402 + `S Manpage.s_description; 403 + `P "Resolves a DOI using the Zotero Translation Server and creates a paper entry."; 404 + `P "If older versions of the paper exist, preserves abstract, tags, projects, \ 405 + selected flag, and slides from the existing paper."; 406 + ] in 407 + let info = Cmd.info "paper" ~doc ~man in 408 + Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ doi_arg $ slug $ version) 409 + 410 + (** {1 Video Fetch Command} *) 411 + 412 + let video_fetch_cmd = 413 + let server = 414 + let doc = "PeerTube server name from config." in 415 + Arg.(required & opt (some string) None & info ["server"; "s"] ~docv:"NAME" ~doc) 416 + in 417 + let channel = 418 + let doc = "Channel name to fetch videos from." in 419 + Arg.(required & opt (some string) None & info ["channel"] ~docv:"CHANNEL" ~doc) 420 + in 421 + let run () config_file data_dir server channel = 422 + match load_config config_file with 423 + | Error e -> Printf.eprintf "Config error: %s\n" e; 1 424 + | Ok config -> 425 + let data_dir = get_data_dir config data_dir in 426 + 427 + (* Find server endpoint *) 428 + let endpoint = List.find_map (fun (s : Bushel_config.peertube_server) -> 429 + if s.name = server then Some s.endpoint else None 430 + ) config.peertube_servers in 431 + 432 + match endpoint with 433 + | None -> 434 + Printf.eprintf "Unknown server: %s\n" server; 435 + Printf.eprintf "Available servers:\n"; 436 + List.iter (fun (s : Bushel_config.peertube_server) -> 437 + Printf.eprintf " - %s (%s)\n" s.name s.endpoint 438 + ) config.peertube_servers; 439 + 1 440 + | Some endpoint -> 441 + Eio_main.run @@ fun env -> 442 + let proc_mgr = Eio.Stdenv.process_mgr env in 443 + 444 + Printf.printf "Fetching videos from %s channel %s...\n" server channel; 445 + 446 + let videos = Bushel_sync.Peertube.fetch_all_channel_videos 447 + ~proc_mgr ~endpoint ~channel () in 448 + 449 + Printf.printf "Found %d videos\n" (List.length videos); 450 + 451 + (* Load or create videos index *) 452 + let index_path = Filename.concat data_dir "data/videos.yml" in 453 + let index = Bushel_sync.Peertube.VideoIndex.load_file index_path in 454 + 455 + (* Create video files and update index *) 456 + let videos_dir = Filename.concat data_dir "data/videos" in 457 + if not (Sys.file_exists videos_dir) then 458 + Unix.mkdir videos_dir 0o755; 459 + 460 + let new_count = ref 0 in 461 + List.iter (fun (video : Bushel_sync.Peertube.video) -> 462 + let video_path = Filename.concat videos_dir (video.uuid ^ ".md") in 463 + 464 + if Sys.file_exists video_path then 465 + Printf.printf " Skipping %s (exists)\n" video.uuid 466 + else begin 467 + Printf.printf " Creating %s: %s\n" video.uuid video.name; 468 + 469 + (* Generate markdown file *) 470 + let content = Printf.sprintf {|--- 471 + title: %s 472 + published_date: %s 473 + uuid: %s 474 + url: %s 475 + talk: false 476 + tags: [] 477 + --- 478 + 479 + %s 480 + |} 481 + video.name 482 + (Ptime.to_rfc3339 video.published_at) 483 + video.uuid 484 + video.url 485 + (Option.value ~default:"" video.description) 486 + in 487 + 488 + let oc = open_out video_path in 489 + output_string oc content; 490 + close_out oc; 491 + 492 + (* Update index *) 493 + Bushel_sync.Peertube.VideoIndex.add index ~uuid:video.uuid ~server; 494 + incr new_count 495 + end 496 + ) videos; 497 + 498 + (* Save updated index *) 499 + Bushel_sync.Peertube.VideoIndex.save_file index_path index; 500 + 501 + Printf.printf "\nCreated %d new video entries\n" !new_count; 502 + Printf.printf "Updated index: %s\n" index_path; 503 + 0 504 + in 505 + let doc = "Fetch videos from a PeerTube channel." in 506 + let info = Cmd.info "video" ~doc in 507 + Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ server $ channel) 508 + 509 + (** {1 Config Command} *) 510 + 511 + let config_cmd = 512 + let run () config_file = 513 + match load_config config_file with 514 + | Error e -> Printf.eprintf "Config error: %s\n" e; 1 515 + | Ok config -> 516 + Printf.printf "Config file: %s\n" (Bushel_config.config_file ()); 517 + Printf.printf "\n"; 518 + Fmt.pr "%a\n" Bushel_config.pp config; 519 + 0 520 + in 521 + let doc = "Show current configuration." in 522 + let info = Cmd.info "config" ~doc in 523 + Cmd.v info Term.(const run $ logging_t $ config_file) 524 + 525 + (** {1 Init Command} *) 526 + 527 + let init_cmd = 528 + let force = 529 + let doc = "Overwrite existing config file." in 530 + Arg.(value & flag & info ["force"; "f"] ~doc) 531 + in 532 + let run () force = 533 + match Bushel_config.write_default_config ~force () with 534 + | Error e -> 535 + Printf.eprintf "%s\n" e; 536 + 1 537 + | Ok path -> 538 + Printf.printf "Created config file: %s\n" path; 539 + Printf.printf "\nEdit this file to configure:\n"; 540 + Printf.printf " - Remote server for image sync\n"; 541 + Printf.printf " - Local data and image directories\n"; 542 + Printf.printf " - Immich endpoint and API key\n"; 543 + Printf.printf " - PeerTube servers\n"; 544 + Printf.printf " - Typesense and OpenAI API keys\n"; 545 + Printf.printf " - Zotero Translation Server URL\n"; 546 + 0 547 + in 548 + let doc = "Initialize a default configuration file." in 549 + let man = [ 550 + `S Manpage.s_description; 551 + `P "Creates a default config.toml file at ~/.config/bushel/config.toml"; 552 + `P "The generated file includes comments explaining each option."; 553 + `P "Use --force to overwrite an existing config file."; 554 + ] in 555 + let info = Cmd.info "init" ~doc ~man in 556 + Cmd.v info Term.(const run $ logging_t $ force) 557 + 558 + (** {1 Main Command Group} *) 559 + 560 + let main_cmd = 561 + let doc = "Bushel knowledge base CLI" in 562 + let man = [ 563 + `S Manpage.s_description; 564 + `P "Bushel is a CLI tool for managing and querying a knowledge base \ 565 + containing papers, notes, projects, ideas, and videos."; 566 + `S Manpage.s_commands; 567 + `P "Use $(b,bushel COMMAND --help) for help on a specific command."; 568 + `S "CONFIGURATION"; 569 + `P "Configuration is read from ~/.config/bushel/config.toml"; 570 + `P "See $(b,bushel config) for current settings."; 571 + ] in 572 + let info = Cmd.info "bushel" ~version:"0.2.0" ~doc ~man in 573 + Cmd.group info [ 574 + init_cmd; 575 + list_cmd; 576 + stats_cmd; 577 + show_cmd; 578 + sync_cmd; 579 + paper_add_cmd; 580 + video_fetch_cmd; 581 + config_cmd; 582 + ] 583 + 584 + let () = 585 + match Cmd.eval_value main_cmd with 586 + | Ok (`Ok exit_code) -> exit exit_code 587 + | Ok (`Help | `Version) -> exit 0 588 + | Error _ -> exit 1
+56
bushel.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + synopsis: "Personal knowledge base and research entry management" 4 + description: """ 5 + Bushel is a library for managing structured research entries including 6 + notes, papers, projects, ideas, videos, and contacts. It provides typed 7 + access to markdown files with YAML frontmatter and supports link graphs, 8 + markdown processing with custom extensions, and search integration. 9 + Right now this is a fairly specific workflow used by Anil Madhavapeddy, 10 + but it may generalise in the future. 11 + 12 + Subpackages: 13 + - bushel.eio: Eio-based directory loading 14 + - bushel.config: XDG-compliant TOML configuration 15 + - bushel.sync: Sync pipeline for images and thumbnails 16 + - bushel.typesense: Typesense search integration""" 17 + maintainer: ["Anil Madhavapeddy <anil@recoil.org>"] 18 + authors: ["Anil Madhavapeddy <anil@recoil.org>"] 19 + license: "ISC" 20 + depends: [ 21 + "dune" {>= "3.18"} 22 + "ocaml" {>= "5.2"} 23 + "frontmatter" {>= "0.1"} 24 + "frontmatter-eio" {>= "0.1"} 25 + "cmarkit" {>= "0.3"} 26 + "jsont" {>= "0.1"} 27 + "bytesrw" 28 + "ptime" {>= "1.2"} 29 + "re" {>= "1.11"} 30 + "uri" {>= "4.4"} 31 + "fmt" {>= "0.9"} 32 + "eio" {>= "1.2"} 33 + "tomlt" {>= "0.1"} 34 + "typesense" {>= "0.1"} 35 + "astring" {>= "0.8"} 36 + "logs" {>= "0.7"} 37 + "yamlrw" 38 + "cmdliner" 39 + "eio_main" 40 + "odoc" {with-doc} 41 + ] 42 + build: [ 43 + ["dune" "subst"] {dev} 44 + [ 45 + "dune" 46 + "build" 47 + "-p" 48 + name 49 + "-j" 50 + jobs 51 + "@install" 52 + "@runtest" {with-test} 53 + "@doc" {with-doc} 54 + ] 55 + ] 56 + x-maintenance-intent: ["(latest)"]
+46
dune-project
··· 1 + (lang dune 3.18) 2 + (name bushel) 3 + 4 + (generate_opam_files true) 5 + (maintenance_intent "(latest)") 6 + 7 + (license ISC) 8 + (authors "Anil Madhavapeddy <anil@recoil.org>") 9 + (maintainers "Anil Madhavapeddy <anil@recoil.org>") 10 + 11 + (package 12 + (name bushel) 13 + (synopsis "Personal knowledge base and research entry management") 14 + (description 15 + "Bushel is a library for managing structured research entries including 16 + notes, papers, projects, ideas, videos, and contacts. It provides typed 17 + access to markdown files with YAML frontmatter and supports link graphs, 18 + markdown processing with custom extensions, and search integration. 19 + Right now this is a fairly specific workflow used by Anil Madhavapeddy, 20 + but it may generalise in the future. 21 + 22 + Subpackages: 23 + - bushel.eio: Eio-based directory loading 24 + - bushel.config: XDG-compliant TOML configuration 25 + - bushel.sync: Sync pipeline for images and thumbnails 26 + - bushel.typesense: Typesense search integration") 27 + (depends 28 + (ocaml (>= 5.2)) 29 + (frontmatter (>= 0.1)) 30 + (frontmatter-eio (>= 0.1)) 31 + (cmarkit (>= 0.3)) 32 + (jsont (>= 0.1)) 33 + bytesrw 34 + (ptime (>= 1.2)) 35 + (re (>= 1.11)) 36 + (uri (>= 4.4)) 37 + (fmt (>= 0.9)) 38 + (eio (>= 1.2)) 39 + (tomlt (>= 0.1)) 40 + (typesense (>= 0.1)) 41 + (astring (>= 0.8)) 42 + (logs (>= 0.7)) 43 + yamlrw 44 + cmdliner 45 + eio_main 46 + (odoc :with-doc)))
+92
lib/bushel.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Bushel - Personal knowledge base and research entry management 7 + 8 + Bushel is a library for managing structured research entries including 9 + notes, papers, projects, ideas, videos, and contacts. It provides typed 10 + access to markdown files with YAML frontmatter and supports link graphs, 11 + markdown processing with custom extensions, and search integration. 12 + 13 + {1 Entry Types} 14 + 15 + - {!Contact} - People/researchers with social links 16 + - {!Note} - Blog posts and research notes 17 + - {!Paper} - Academic papers with BibTeX metadata 18 + - {!Project} - Research projects 19 + - {!Idea} - Research ideas/proposals 20 + - {!Video} - Talk videos and recordings 21 + 22 + {1 Core Modules} 23 + 24 + - {!Entry} - Union type for all entry types with common operations 25 + - {!Tags} - Tag parsing and filtering 26 + - {!Md} - Markdown processing with Bushel link extensions 27 + - {!Link_graph} - Bidirectional link tracking between entries 28 + 29 + {1 Quick Start} 30 + 31 + {[ 32 + (* Load entries using bushel-eio *) 33 + let entries = Bushel_loader.load fs "/path/to/data" in 34 + 35 + (* Look up entries by slug *) 36 + match Bushel.Entry.lookup entries "my-note" with 37 + | Some (`Note n) -> Printf.printf "Title: %s\n" (Bushel.Note.title n) 38 + | _ -> () 39 + 40 + (* Get backlinks *) 41 + let backlinks = Bushel.Link_graph.get_backlinks_for_slug "my-note" in 42 + List.iter print_endline backlinks 43 + ]} 44 + *) 45 + 46 + (** {1 Entry Types} *) 47 + 48 + module Contact = Bushel_contact 49 + (** Contact/person entries. *) 50 + 51 + module Note = Bushel_note 52 + (** Blog post and research note entries. *) 53 + 54 + module Paper = Bushel_paper 55 + (** Academic paper entries with BibTeX-style metadata. *) 56 + 57 + module Project = Bushel_project 58 + (** Research project entries. *) 59 + 60 + module Idea = Bushel_idea 61 + (** Research idea/proposal entries. *) 62 + 63 + module Video = Bushel_video 64 + (** Video/talk recording entries. *) 65 + 66 + (** {1 Core Modules} *) 67 + 68 + module Entry = Bushel_entry 69 + (** Union type for all entry types with common accessors. *) 70 + 71 + module Tags = Bushel_tags 72 + (** Tag parsing, filtering, and counting. *) 73 + 74 + module Md = Bushel_md 75 + (** Markdown processing with Bushel link extensions. *) 76 + 77 + module Link = Bushel_link 78 + (** External link tracking and merging. *) 79 + 80 + module Link_graph = Bushel_link_graph 81 + (** Bidirectional link graph for entry relationships. *) 82 + 83 + module Description = Bushel_description 84 + (** Generate descriptive text for entries. *) 85 + 86 + (** {1 Utilities} *) 87 + 88 + module Types = Bushel_types 89 + (** Common types and Jsont codecs. *) 90 + 91 + module Util = Bushel_util 92 + (** Utility functions (word counting, text processing). *)
+154
lib/bushel_contact.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Contact/person entry type for Bushel *) 7 + 8 + type t = { 9 + names : string list; 10 + handle : string; 11 + email : string option; 12 + icon : string option; 13 + github : string option; 14 + twitter : string option; 15 + bluesky : string option; 16 + mastodon : string option; 17 + orcid : string option; 18 + url : string option; 19 + atom : string list option; 20 + } 21 + 22 + type ts = t list 23 + 24 + (** {1 Constructors} *) 25 + 26 + let v ?email ?github ?twitter ?bluesky ?mastodon ?orcid ?icon ?url ?atom handle names = 27 + { names; handle; email; github; twitter; bluesky; mastodon; orcid; url; icon; atom } 28 + 29 + let make names email icon github twitter bluesky mastodon orcid url atom = 30 + v ?email ?github ?twitter ?bluesky ?mastodon ?orcid ?icon ?url ?atom "" names 31 + 32 + (** {1 Accessors} *) 33 + 34 + let names { names; _ } = names 35 + 36 + let name c = 37 + match c.names with 38 + | n :: _ -> n 39 + | [] -> failwith (Printf.sprintf "Contact with handle '%s' has empty names list" c.handle) 40 + 41 + let handle { handle; _ } = handle 42 + let email { email; _ } = email 43 + let icon { icon; _ } = icon 44 + let github { github; _ } = github 45 + let twitter { twitter; _ } = twitter 46 + let bluesky { bluesky; _ } = bluesky 47 + let mastodon { mastodon; _ } = mastodon 48 + let orcid { orcid; _ } = orcid 49 + let url { url; _ } = url 50 + let atom { atom; _ } = atom 51 + 52 + (** {1 Jsont Codec} *) 53 + 54 + let jsont : t Jsont.t = 55 + let open Jsont in 56 + let open Jsont.Object in 57 + let mem_opt f v ~enc = mem f v ~dec_absent:None ~enc_omit:Option.is_none ~enc in 58 + map ~kind:"Contact" make 59 + |> mem "names" (list string) ~dec_absent:[] ~enc:names 60 + |> mem_opt "email" (some string) ~enc:email 61 + |> mem_opt "icon" (some string) ~enc:icon 62 + |> mem_opt "github" (some string) ~enc:github 63 + |> mem_opt "twitter" (some string) ~enc:twitter 64 + |> mem_opt "bluesky" (some string) ~enc:bluesky 65 + |> mem_opt "mastodon" (some string) ~enc:mastodon 66 + |> mem_opt "orcid" (some string) ~enc:orcid 67 + |> mem_opt "url" (some string) ~enc:url 68 + |> mem_opt "atom" (some (list string)) ~enc:atom 69 + |> finish 70 + 71 + (** {1 Parsing} *) 72 + 73 + let of_frontmatter ~handle (fm : Frontmatter.t) : (t, string) result = 74 + match Frontmatter.decode jsont fm with 75 + | Ok c -> Ok { c with handle } 76 + | Error e -> Error e 77 + 78 + (** {1 Lookup Functions} *) 79 + 80 + let compare a b = String.compare a.handle b.handle 81 + 82 + let find_by_handle ts h = List.find_opt (fun { handle; _ } -> handle = h) ts 83 + 84 + let best_url c = 85 + match c.url with 86 + | Some _ as url -> url 87 + | None -> 88 + match c.github with 89 + | Some g -> Some ("https://github.com/" ^ g) 90 + | None -> Option.map (fun e -> "mailto:" ^ e) c.email 91 + 92 + (** Given a name, turn it lowercase and return the concatenation of the 93 + initials of all the words in the name and the full last name. *) 94 + let handle_of_name name = 95 + let name = String.lowercase_ascii name in 96 + let words = String.split_on_char ' ' name in 97 + let initials = String.concat "" (List.map (fun w -> String.sub w 0 1) words) in 98 + initials ^ List.hd (List.rev words) 99 + 100 + (** Fuzzy lookup for an author by name. *) 101 + let lookup_by_name ts a = 102 + let a = String.lowercase_ascii a in 103 + let rec aux acc = function 104 + | [] -> acc 105 + | t :: ts -> 106 + if List.exists (fun n -> String.lowercase_ascii n = a) t.names 107 + then aux (t :: acc) ts 108 + else aux acc ts 109 + in 110 + match aux [] ts with 111 + | [ a ] -> a 112 + | [] -> raise (Failure ("Contact not found: " ^ a)) 113 + | _ -> raise (Failure ("Ambiguous contact: " ^ a)) 114 + 115 + (** {1 Pretty Printing} *) 116 + 117 + let pp ppf c = 118 + let open Fmt in 119 + pf ppf "@[<v>"; 120 + pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Contact"; 121 + pf ppf "%a: @%a@," (styled `Bold string) "Handle" string (handle c); 122 + pf ppf "%a: %a@," (styled `Bold string) "Name" string (name c); 123 + let ns = names c in 124 + if List.length ns > 1 then 125 + pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Aliases" (list ~sep:comma string) (List.tl ns); 126 + (match email c with 127 + | Some e -> pf ppf "%a: %a@," (styled `Bold string) "Email" string e 128 + | None -> ()); 129 + (match github c with 130 + | Some g -> pf ppf "%a: https://github.com/%a@," (styled `Bold string) "GitHub" string g 131 + | None -> ()); 132 + (match twitter c with 133 + | Some t -> pf ppf "%a: https://twitter.com/%a@," (styled `Bold string) "Twitter" string t 134 + | None -> ()); 135 + (match bluesky c with 136 + | Some b -> pf ppf "%a: %a@," (styled `Bold string) "Bluesky" string b 137 + | None -> ()); 138 + (match mastodon c with 139 + | Some m -> pf ppf "%a: %a@," (styled `Bold string) "Mastodon" string m 140 + | None -> ()); 141 + (match orcid c with 142 + | Some o -> pf ppf "%a: https://orcid.org/%a@," (styled `Bold string) "ORCID" string o 143 + | None -> ()); 144 + (match url c with 145 + | Some u -> pf ppf "%a: %a@," (styled `Bold string) "URL" string u 146 + | None -> ()); 147 + (match icon c with 148 + | Some i -> pf ppf "%a: %a@," (styled `Bold string) "Icon" string i 149 + | None -> ()); 150 + (match atom c with 151 + | Some atoms when atoms <> [] -> 152 + pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Atom Feeds" (list ~sep:comma string) atoms 153 + | _ -> ()); 154 + pf ppf "@]"
+86
lib/bushel_description.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Generate descriptive text for Bushel entries *) 7 + 8 + (** Format a date as "Month Year" *) 9 + let format_date (year, month, _day) = 10 + Printf.sprintf "%s %d" (Bushel_types.month_name month) year 11 + 12 + (** Generate a descriptive sentence for a paper *) 13 + let paper_description (p : Bushel_paper.t) ~date_str = 14 + let venue = match String.lowercase_ascii (Bushel_paper.bibtype p) with 15 + | "inproceedings" -> Bushel_paper.booktitle p 16 + | "article" -> Bushel_paper.journal p 17 + | "book" -> 18 + let pub = Bushel_paper.publisher p in 19 + if pub = "" then "Book" else "Book by " ^ pub 20 + | "techreport" -> 21 + let inst = Bushel_paper.institution p in 22 + if inst = "" then "Technical report" else "Technical report at " ^ inst 23 + | "misc" -> 24 + let pub = Bushel_paper.publisher p in 25 + if pub = "" then "Working paper" else "Working paper at " ^ pub 26 + | _ -> "Publication" 27 + in 28 + Printf.sprintf "Paper in %s (%s)" venue date_str 29 + 30 + (** Generate a descriptive sentence for a note *) 31 + let note_description (n : Bushel_note.t) ~date_str ~lookup_fn = 32 + match Bushel_note.slug_ent n with 33 + | Some slug_ent -> 34 + (match lookup_fn slug_ent with 35 + | Some related_title -> 36 + Printf.sprintf "Note about %s (%s)" related_title date_str 37 + | None -> Printf.sprintf "Research note (%s)" date_str) 38 + | None -> Printf.sprintf "Research note (%s)" date_str 39 + 40 + (** Generate a descriptive sentence for an idea *) 41 + let idea_description (i : Bushel_idea.t) ~date_str = 42 + let status_str = String.lowercase_ascii (Bushel_idea.status_to_string (Bushel_idea.status i)) in 43 + let level_str = Bushel_idea.level_to_string (Bushel_idea.level i) in 44 + Printf.sprintf "Research idea (%s, %s level, %s)" status_str level_str date_str 45 + 46 + (** Generate a descriptive sentence for a video *) 47 + let video_description (v : Bushel_video.t) ~date_str ~lookup_fn = 48 + let video_type = if Bushel_video.talk v then "Talk video" else "Video" in 49 + let context = match Bushel_video.paper v with 50 + | Some paper_slug -> 51 + (match lookup_fn paper_slug with 52 + | Some title -> Printf.sprintf " about %s" title 53 + | None -> "") 54 + | None -> 55 + (match Bushel_video.project v with 56 + | Some project_slug -> 57 + (match lookup_fn project_slug with 58 + | Some title -> Printf.sprintf " about %s" title 59 + | None -> "") 60 + | None -> "") 61 + in 62 + Printf.sprintf "%s%s (%s)" video_type context date_str 63 + 64 + (** Generate a descriptive sentence for a project *) 65 + let project_description (pr : Bushel_project.t) = 66 + let end_str = match Bushel_project.finish pr with 67 + | Some year -> string_of_int year 68 + | None -> "present" 69 + in 70 + Printf.sprintf "Project (%d–%s)" (Bushel_project.start pr) end_str 71 + 72 + (** Generate description for any entry type *) 73 + let entry_description entries entry = 74 + let lookup_fn slug = 75 + match Bushel_entry.lookup entries slug with 76 + | Some e -> Some (Bushel_entry.title e) 77 + | None -> None 78 + in 79 + let date = Bushel_entry.date entry in 80 + let date_str = format_date date in 81 + match entry with 82 + | `Paper p -> paper_description p ~date_str 83 + | `Note n -> note_description n ~date_str ~lookup_fn 84 + | `Idea i -> idea_description i ~date_str 85 + | `Video v -> video_description v ~date_str ~lookup_fn 86 + | `Project p -> project_description p
+180
lib/bushel_entry.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Union entry type for all Bushel content *) 7 + 8 + type entry = 9 + [ `Paper of Bushel_paper.t 10 + | `Project of Bushel_project.t 11 + | `Idea of Bushel_idea.t 12 + | `Video of Bushel_video.t 13 + | `Note of Bushel_note.t 14 + ] 15 + 16 + type slugs = (string, entry) Hashtbl.t 17 + 18 + type t = { 19 + slugs : slugs; 20 + papers : Bushel_paper.ts; 21 + old_papers : Bushel_paper.ts; 22 + notes : Bushel_note.ts; 23 + projects : Bushel_project.ts; 24 + ideas : Bushel_idea.ts; 25 + videos : Bushel_video.ts; 26 + contacts : Bushel_contact.ts; 27 + data_dir : string; 28 + } 29 + 30 + (** {1 Constructors} *) 31 + 32 + let v ~papers ~notes ~projects ~ideas ~videos ~contacts ~data_dir = 33 + let slugs : slugs = Hashtbl.create 42 in 34 + let papers, old_papers = List.partition (fun p -> p.Bushel_paper.latest) papers in 35 + List.iter (fun n -> Hashtbl.add slugs n.Bushel_note.slug (`Note n)) notes; 36 + List.iter (fun p -> Hashtbl.add slugs p.Bushel_project.slug (`Project p)) projects; 37 + List.iter (fun i -> Hashtbl.add slugs i.Bushel_idea.slug (`Idea i)) ideas; 38 + List.iter (fun v -> Hashtbl.add slugs v.Bushel_video.slug (`Video v)) videos; 39 + List.iter (fun p -> Hashtbl.add slugs p.Bushel_paper.slug (`Paper p)) papers; 40 + { slugs; papers; old_papers; notes; projects; ideas; videos; contacts; data_dir } 41 + 42 + (** {1 Accessors} *) 43 + 44 + let contacts { contacts; _ } = contacts 45 + let videos { videos; _ } = videos 46 + let ideas { ideas; _ } = ideas 47 + let papers { papers; _ } = papers 48 + let notes { notes; _ } = notes 49 + let projects { projects; _ } = projects 50 + let old_papers { old_papers; _ } = old_papers 51 + let data_dir { data_dir; _ } = data_dir 52 + 53 + (** {1 Lookup Functions} *) 54 + 55 + let lookup { slugs; _ } slug = Hashtbl.find_opt slugs slug 56 + let lookup_exn { slugs; _ } slug = Hashtbl.find slugs slug 57 + 58 + (** {1 Entry Properties} *) 59 + 60 + let to_type_string = function 61 + | `Paper _ -> "paper" 62 + | `Note _ -> "note" 63 + | `Project _ -> "project" 64 + | `Idea _ -> "idea" 65 + | `Video _ -> "video" 66 + 67 + let slug = function 68 + | `Paper p -> Bushel_paper.slug p 69 + | `Note n -> Bushel_note.slug n 70 + | `Project p -> Bushel_project.slug p 71 + | `Idea i -> Bushel_idea.slug i 72 + | `Video v -> Bushel_video.slug v 73 + 74 + let title = function 75 + | `Paper p -> Bushel_paper.title p 76 + | `Note n -> Bushel_note.title n 77 + | `Project p -> Bushel_project.title p 78 + | `Idea i -> Bushel_idea.title i 79 + | `Video v -> Bushel_video.title v 80 + 81 + let body = function 82 + | `Paper _ -> "" 83 + | `Note n -> Bushel_note.body n 84 + | `Project p -> Bushel_project.body p 85 + | `Idea i -> Bushel_idea.body i 86 + | `Video _ -> "" 87 + 88 + let sidebar = function 89 + | `Note { Bushel_note.sidebar = Some s; _ } -> Some s 90 + | _ -> None 91 + 92 + let synopsis = function 93 + | `Note n -> Bushel_note.synopsis n 94 + | _ -> None 95 + 96 + let site_url = function 97 + | `Paper p -> "/papers/" ^ Bushel_paper.slug p 98 + | `Note n -> "/notes/" ^ Bushel_note.slug n 99 + | `Project p -> "/projects/" ^ Bushel_project.slug p 100 + | `Idea i -> "/ideas/" ^ Bushel_idea.slug i 101 + | `Video v -> "/videos/" ^ Bushel_video.slug v 102 + 103 + let date (x : entry) = 104 + match x with 105 + | `Paper p -> Bushel_paper.date p 106 + | `Note n -> Bushel_note.date n 107 + | `Project p -> (Bushel_project.start p, 1, 1) 108 + | `Idea i -> (Bushel_idea.year i, Bushel_idea.month i, 1) 109 + | `Video v -> Bushel_video.date v 110 + 111 + let datetime v = Bushel_types.ptime_of_date_exn (date v) 112 + 113 + let year x = 114 + let (y, _, _) = date x in y 115 + 116 + let is_index_entry = function 117 + | `Note n -> n.Bushel_note.index_page 118 + | _ -> false 119 + 120 + (** {1 Derived Lookups} *) 121 + 122 + let lookup_site_url t slug = 123 + match lookup t slug with 124 + | Some ent -> site_url ent 125 + | None -> "" 126 + 127 + let lookup_title t slug = 128 + match lookup t slug with 129 + | Some ent -> title ent 130 + | None -> "" 131 + 132 + let notes_for_slug { notes; _ } slug = 133 + List.filter (fun n -> 134 + match Bushel_note.slug_ent n with 135 + | Some s -> s = slug 136 + | None -> false 137 + ) notes 138 + 139 + let all_entries { slugs; _ } = 140 + Hashtbl.fold (fun _ v acc -> v :: acc) slugs [] 141 + 142 + let all_papers { papers; old_papers; _ } = 143 + List.map (fun x -> `Paper x) (papers @ old_papers) 144 + 145 + (** {1 Comparison} *) 146 + 147 + let compare a b = 148 + let da = datetime a in 149 + let db = datetime b in 150 + if Ptime.equal da db then String.compare (title a) (title b) 151 + else Ptime.compare da db 152 + 153 + (** {1 Contact Lookups} *) 154 + 155 + let lookup_by_name { contacts; _ } n = 156 + match Bushel_contact.lookup_by_name contacts n with 157 + | v -> Some v 158 + | exception _ -> None 159 + 160 + (** {1 Tag Functions} *) 161 + 162 + let tags_of_ent _entries ent : Bushel_tags.t list = 163 + match ent with 164 + | `Paper p -> Bushel_tags.of_string_list @@ Bushel_paper.tags p 165 + | `Video v -> Bushel_tags.of_string_list @@ Bushel_video.tags v 166 + | `Project p -> Bushel_tags.of_string_list @@ Bushel_project.tags p 167 + | `Note n -> Bushel_tags.of_string_list @@ Bushel_note.tags n 168 + | `Idea i -> Bushel_tags.of_string_list @@ Bushel_idea.tags i 169 + 170 + let mention_entries entries tags = 171 + let lk t = 172 + try Some (lookup_exn entries t) 173 + with Not_found -> 174 + Printf.eprintf "mention_entries not found: %s\n%!" t; 175 + None 176 + in 177 + List.filter_map (function 178 + | `Slug t -> lk t 179 + | _ -> None 180 + ) tags
+123
lib/bushel_entry.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Union entry type for all Bushel content *) 7 + 8 + (** A single entry in the knowledge base. *) 9 + type entry = 10 + [ `Paper of Bushel_paper.t 11 + | `Project of Bushel_project.t 12 + | `Idea of Bushel_idea.t 13 + | `Video of Bushel_video.t 14 + | `Note of Bushel_note.t 15 + ] 16 + 17 + (** Slug-to-entry lookup table. *) 18 + type slugs = (string, entry) Hashtbl.t 19 + 20 + (** The complete entry collection. *) 21 + type t 22 + 23 + (** {1 Constructors} *) 24 + 25 + val v : 26 + papers:Bushel_paper.t list -> 27 + notes:Bushel_note.t list -> 28 + projects:Bushel_project.t list -> 29 + ideas:Bushel_idea.t list -> 30 + videos:Bushel_video.t list -> 31 + contacts:Bushel_contact.t list -> 32 + data_dir:string -> 33 + t 34 + (** Create an entry collection from lists of each entry type. *) 35 + 36 + (** {1 Accessors} *) 37 + 38 + val contacts : t -> Bushel_contact.ts 39 + val videos : t -> Bushel_video.ts 40 + val ideas : t -> Bushel_idea.ts 41 + val papers : t -> Bushel_paper.ts 42 + val notes : t -> Bushel_note.ts 43 + val projects : t -> Bushel_project.ts 44 + val old_papers : t -> Bushel_paper.ts 45 + val data_dir : t -> string 46 + 47 + (** {1 Lookup Functions} *) 48 + 49 + val lookup : t -> string -> entry option 50 + (** [lookup entries slug] finds an entry by its slug. *) 51 + 52 + val lookup_exn : t -> string -> entry 53 + (** Like {!lookup} but raises [Not_found] if the slug doesn't exist. *) 54 + 55 + (** {1 Entry Properties} *) 56 + 57 + val to_type_string : entry -> string 58 + (** [to_type_string entry] returns the type name as a string. *) 59 + 60 + val slug : entry -> string 61 + (** [slug entry] returns the entry's slug. *) 62 + 63 + val title : entry -> string 64 + (** [title entry] returns the entry's title. *) 65 + 66 + val body : entry -> string 67 + (** [body entry] returns the entry's body content. *) 68 + 69 + val sidebar : entry -> string option 70 + (** [sidebar entry] returns the entry's sidebar content if present. *) 71 + 72 + val synopsis : entry -> string option 73 + (** [synopsis entry] returns the entry's synopsis if present. *) 74 + 75 + val site_url : entry -> string 76 + (** [site_url entry] returns the site URL path for the entry. *) 77 + 78 + val date : entry -> int * int * int 79 + (** [date entry] returns the entry's date as (year, month, day). *) 80 + 81 + val datetime : entry -> Ptime.t 82 + (** [datetime entry] returns the entry's date as a timestamp. *) 83 + 84 + val year : entry -> int 85 + (** [year entry] returns the entry's year. *) 86 + 87 + val is_index_entry : entry -> bool 88 + (** [is_index_entry entry] returns true if this is an index page. *) 89 + 90 + (** {1 Derived Lookups} *) 91 + 92 + val lookup_site_url : t -> string -> string 93 + (** [lookup_site_url entries slug] returns the site URL for a slug. *) 94 + 95 + val lookup_title : t -> string -> string 96 + (** [lookup_title entries slug] returns the title for a slug. *) 97 + 98 + val notes_for_slug : t -> string -> Bushel_note.t list 99 + (** [notes_for_slug entries slug] returns notes that reference the given slug. *) 100 + 101 + val all_entries : t -> entry list 102 + (** [all_entries entries] returns all entries as a list. *) 103 + 104 + val all_papers : t -> entry list 105 + (** [all_papers entries] returns all papers including old versions. *) 106 + 107 + (** {1 Comparison} *) 108 + 109 + val compare : entry -> entry -> int 110 + (** Compare entries by date, then by title. *) 111 + 112 + (** {1 Contact Lookups} *) 113 + 114 + val lookup_by_name : t -> string -> Bushel_contact.t option 115 + (** [lookup_by_name entries name] finds a contact by name. *) 116 + 117 + (** {1 Tag Functions} *) 118 + 119 + val tags_of_ent : t -> entry -> Bushel_tags.t list 120 + (** [tags_of_ent entries entry] returns the entry's tags. *) 121 + 122 + val mention_entries : t -> Bushel_tags.t list -> entry list 123 + (** [mention_entries entries tags] returns entries mentioned in the tags. *)
+214
lib/bushel_idea.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Idea entry type for Bushel *) 7 + 8 + (** Academic level for research ideas *) 9 + type level = 10 + | Any 11 + | PartII 12 + | MPhil 13 + | PhD 14 + | Postdoc 15 + 16 + let level_of_string = function 17 + | "Any" | "any" -> Any 18 + | "PartII" | "partii" -> PartII 19 + | "MPhil" | "mphil" -> MPhil 20 + | "PhD" | "phd" -> PhD 21 + | "postdoc" | "Postdoc" -> Postdoc 22 + | _ -> Any 23 + 24 + let level_to_string = function 25 + | Any -> "Any" 26 + | PartII -> "PartII" 27 + | MPhil -> "MPhil" 28 + | PhD -> "PhD" 29 + | Postdoc -> "postdoctoral" 30 + 31 + let level_to_tag = function 32 + | Any -> "idea-beginner" 33 + | PartII -> "idea-medium" 34 + | MPhil -> "idea-hard" 35 + | PhD -> "idea-phd" 36 + | Postdoc -> "idea-postdoc" 37 + 38 + (** Status of research idea *) 39 + type status = 40 + | Available 41 + | Discussion 42 + | Ongoing 43 + | Completed 44 + | Expired 45 + 46 + let status_of_string = function 47 + | "Available" | "available" -> Available 48 + | "Discussion" | "discussion" -> Discussion 49 + | "Ongoing" | "ongoing" -> Ongoing 50 + | "Completed" | "completed" -> Completed 51 + | "Expired" | "expired" -> Expired 52 + | _ -> Available 53 + 54 + let status_to_string = function 55 + | Available -> "Available" 56 + | Discussion -> "Discussion" 57 + | Ongoing -> "Ongoing" 58 + | Completed -> "Completed" 59 + | Expired -> "Expired" 60 + 61 + let status_to_tag = function 62 + | Available -> "idea-available" 63 + | Discussion -> "idea-discuss" 64 + | Ongoing -> "idea-ongoing" 65 + | Completed -> "idea-done" 66 + | Expired -> "idea-expired" 67 + 68 + type t = { 69 + slug : string; 70 + title : string; 71 + level : level; 72 + project : string; 73 + status : status; 74 + month : int; 75 + year : int; 76 + supervisors : string list; 77 + students : string list; 78 + reading : string; 79 + body : string; 80 + url : string option; 81 + tags : string list; 82 + } 83 + 84 + type ts = t list 85 + 86 + (** {1 Accessors} *) 87 + 88 + let slug { slug; _ } = slug 89 + let title { title; _ } = title 90 + let level { level; _ } = level 91 + let project { project; _ } = project 92 + let status { status; _ } = status 93 + let year { year; _ } = year 94 + let month { month; _ } = month 95 + let supervisors { supervisors; _ } = supervisors 96 + let students { students; _ } = students 97 + let reading { reading; _ } = reading 98 + let body { body; _ } = body 99 + let url { url; _ } = url 100 + let tags { tags; _ } = tags 101 + 102 + (** {1 Comparison} *) 103 + 104 + let compare a b = 105 + match Stdlib.compare a.status b.status with 106 + | 0 -> 107 + (match a.status with 108 + | Completed -> Int.compare b.year a.year 109 + | _ -> 110 + match Stdlib.compare a.level b.level with 111 + | 0 -> 112 + (match Int.compare b.year a.year with 113 + | 0 -> Int.compare b.month a.month 114 + | n -> n) 115 + | n -> n) 116 + | n -> n 117 + 118 + (** {1 Lookup} *) 119 + 120 + let lookup ideas slug = List.find_opt (fun i -> i.slug = slug) ideas 121 + 122 + (** {1 Jsont Codec} *) 123 + 124 + let level_jsont : level Jsont.t = 125 + Jsont.of_of_string ~kind:"level" 126 + (fun s -> Ok (level_of_string s)) 127 + ~enc:level_to_string 128 + 129 + let status_jsont : status Jsont.t = 130 + Jsont.of_of_string ~kind:"status" 131 + (fun s -> Ok (status_of_string s)) 132 + ~enc:status_to_string 133 + 134 + let jsont : t Jsont.t = 135 + let open Jsont in 136 + let open Jsont.Object in 137 + let make title level project status supervisors students tags reading url = 138 + { slug = ""; title; level; project; status; 139 + month = 1; year = 2000; supervisors; students; reading; 140 + body = ""; url; tags } 141 + in 142 + map ~kind:"Idea" make 143 + |> mem "title" string ~enc:(fun i -> i.title) 144 + |> mem "level" level_jsont ~enc:(fun i -> i.level) 145 + |> mem "project" string ~enc:(fun i -> i.project) 146 + |> mem "status" status_jsont ~enc:(fun i -> i.status) 147 + |> mem "supervisors" (list string) ~dec_absent:[] ~enc:(fun i -> i.supervisors) 148 + |> mem "students" (list string) ~dec_absent:[] ~enc:(fun i -> i.students) 149 + |> mem "tags" (list string) ~dec_absent:[] ~enc:(fun i -> i.tags) 150 + |> mem "reading" string ~dec_absent:"" ~enc:(fun i -> i.reading) 151 + |> mem "url" Bushel_types.string_option_jsont ~dec_absent:None 152 + ~enc_omit:Option.is_none ~enc:(fun i -> i.url) 153 + |> finish 154 + 155 + (** {1 Parsing} *) 156 + 157 + let of_frontmatter (fm : Frontmatter.t) : (t, string) result = 158 + (* Extract slug and date from filename *) 159 + let slug, date_opt = 160 + match Frontmatter.fname fm with 161 + | Some fname -> 162 + (match Frontmatter.slug_of_fname fname with 163 + | Ok (s, d) -> (s, d) 164 + | Error _ -> ("", None)) 165 + | None -> ("", None) 166 + in 167 + let year, month = 168 + match date_opt with 169 + | Some d -> let (y, m, _) = Ptime.to_date d in (y, m) 170 + | None -> (2000, 1) 171 + in 172 + match Frontmatter.decode jsont fm with 173 + | Error e -> Error e 174 + | Ok i -> 175 + Ok { i with 176 + slug; 177 + year; 178 + month; 179 + body = Frontmatter.body fm } 180 + 181 + (** {1 Pretty Printing} *) 182 + 183 + let pp ppf i = 184 + let open Fmt in 185 + pf ppf "@[<v>"; 186 + pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Idea"; 187 + pf ppf "%a: %a@," (styled `Bold string) "Slug" string i.slug; 188 + pf ppf "%a: %a@," (styled `Bold string) "Title" string (title i); 189 + pf ppf "%a: %a@," (styled `Bold string) "Level" string (level_to_string (level i)); 190 + pf ppf "%a: %a@," (styled `Bold string) "Status" string (status_to_string (status i)); 191 + pf ppf "%a: %a@," (styled `Bold string) "Project" string (project i); 192 + pf ppf "%a: %04d-%02d@," (styled `Bold string) "Date" (year i) i.month; 193 + let sups = supervisors i in 194 + if sups <> [] then 195 + pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Supervisors" (list ~sep:comma string) sups; 196 + let studs = students i in 197 + if studs <> [] then 198 + pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Students" (list ~sep:comma string) studs; 199 + (match i.url with 200 + | Some url -> pf ppf "%a: %a@," (styled `Bold string) "URL" string url 201 + | None -> ()); 202 + let t = i.tags in 203 + if t <> [] then 204 + pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t; 205 + let r = reading i in 206 + if r <> "" then begin 207 + pf ppf "@,"; 208 + pf ppf "%a:@," (styled `Bold string) "Reading"; 209 + pf ppf "%a@," string r; 210 + end; 211 + pf ppf "@,"; 212 + pf ppf "%a:@," (styled `Bold string) "Body"; 213 + pf ppf "%a@," string (body i); 214 + pf ppf "@]"
+235
lib/bushel_link.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** External link tracking for Bushel *) 7 + 8 + type karakeep_data = { 9 + remote_url : string; 10 + id : string; 11 + tags : string list; 12 + metadata : (string * string) list; 13 + } 14 + 15 + type bushel_data = { 16 + slugs : string list; 17 + tags : string list; 18 + } 19 + 20 + type t = { 21 + url : string; 22 + date : Ptime.date; 23 + description : string; 24 + karakeep : karakeep_data option; 25 + bushel : bushel_data option; 26 + } 27 + 28 + type ts = t list 29 + 30 + (** {1 Accessors} *) 31 + 32 + let url { url; _ } = url 33 + let date { date; _ } = date 34 + let description { description; _ } = description 35 + let datetime v = Bushel_types.ptime_of_date_exn (date v) 36 + 37 + (** {1 Comparison} *) 38 + 39 + let compare a b = Ptime.compare (datetime b) (datetime a) 40 + 41 + (** {1 YAML Parsing} *) 42 + 43 + let t_of_yaml = function 44 + | `O fields -> 45 + let url = 46 + match List.assoc_opt "url" fields with 47 + | Some (`String v) -> v 48 + | _ -> failwith "link: missing or invalid url" 49 + in 50 + let date = 51 + match List.assoc_opt "date" fields with 52 + | Some (`String v) -> 53 + (try 54 + match String.split_on_char '-' v with 55 + | [y; m; d] -> (int_of_string y, int_of_string m, int_of_string d) 56 + | _ -> 57 + v |> Ptime.of_rfc3339 |> Result.get_ok |> fun (a, _, _) -> Ptime.to_date a 58 + with _ -> 59 + v |> Ptime.of_rfc3339 |> Result.get_ok |> fun (a, _, _) -> Ptime.to_date a) 60 + | _ -> failwith "link: missing or invalid date" 61 + in 62 + let description = 63 + match List.assoc_opt "description" fields with 64 + | Some (`String v) -> v 65 + | _ -> "" 66 + in 67 + let karakeep = 68 + match List.assoc_opt "karakeep" fields with 69 + | Some (`O k_fields) -> 70 + let remote_url = 71 + match List.assoc_opt "remote_url" k_fields with 72 + | Some (`String v) -> v 73 + | _ -> failwith "link: invalid karakeep.remote_url" 74 + in 75 + let id = 76 + match List.assoc_opt "id" k_fields with 77 + | Some (`String v) -> v 78 + | _ -> failwith "link: invalid karakeep.id" 79 + in 80 + let tags = 81 + match List.assoc_opt "tags" k_fields with 82 + | Some (`A tag_list) -> 83 + List.filter_map (function `String t -> Some t | _ -> None) tag_list 84 + | _ -> [] 85 + in 86 + let metadata = 87 + match List.assoc_opt "metadata" k_fields with 88 + | Some (`O meta_fields) -> 89 + List.filter_map (fun (k, v) -> 90 + match v with `String value -> Some (k, value) | _ -> None 91 + ) meta_fields 92 + | _ -> [] 93 + in 94 + Some { remote_url; id; tags; metadata } 95 + | _ -> None 96 + in 97 + let bushel = 98 + match List.assoc_opt "bushel" fields with 99 + | Some (`O b_fields) -> 100 + let slugs = 101 + match List.assoc_opt "slugs" b_fields with 102 + | Some (`A slug_list) -> 103 + List.filter_map (function `String s -> Some s | _ -> None) slug_list 104 + | _ -> [] 105 + in 106 + let tags = 107 + match List.assoc_opt "tags" b_fields with 108 + | Some (`A tag_list) -> 109 + List.filter_map (function `String t -> Some t | _ -> None) tag_list 110 + | _ -> [] 111 + in 112 + Some { slugs; tags } 113 + | _ -> None 114 + in 115 + { url; date; description; karakeep; bushel } 116 + | _ -> failwith "link: invalid yaml" 117 + 118 + (** {1 YAML Serialization} *) 119 + 120 + let to_yaml t = 121 + let (year, month, day) = t.date in 122 + let date_str = Printf.sprintf "%04d-%02d-%02d" year month day in 123 + 124 + let base_fields = [ 125 + ("url", `String t.url); 126 + ("date", `String date_str); 127 + ] @ 128 + (if t.description = "" then [] else [("description", `String t.description)]) 129 + in 130 + 131 + let karakeep_fields = 132 + match t.karakeep with 133 + | Some { remote_url; id; tags; metadata } -> 134 + let karakeep_obj = [ 135 + ("remote_url", `String remote_url); 136 + ("id", `String id); 137 + ] in 138 + let karakeep_obj = 139 + if tags = [] then karakeep_obj 140 + else ("tags", `A (List.map (fun t -> `String t) tags)) :: karakeep_obj 141 + in 142 + let karakeep_obj = 143 + if metadata = [] then karakeep_obj 144 + else ("metadata", `O (List.map (fun (k, v) -> (k, `String v)) metadata)) :: karakeep_obj 145 + in 146 + [("karakeep", `O karakeep_obj)] 147 + | None -> [] 148 + in 149 + 150 + let bushel_fields = 151 + match t.bushel with 152 + | Some { slugs; tags } -> 153 + let bushel_obj = [] in 154 + let bushel_obj = 155 + if slugs = [] then bushel_obj 156 + else ("slugs", `A (List.map (fun s -> `String s) slugs)) :: bushel_obj 157 + in 158 + let bushel_obj = 159 + if tags = [] then bushel_obj 160 + else ("tags", `A (List.map (fun t -> `String t) tags)) :: bushel_obj 161 + in 162 + if bushel_obj = [] then [] else [("bushel", `O bushel_obj)] 163 + | None -> [] 164 + in 165 + 166 + `O (base_fields @ karakeep_fields @ bushel_fields) 167 + 168 + (** {1 File Operations} *) 169 + 170 + let load_links_file path = 171 + try 172 + let yaml_str = In_channel.(with_open_bin path input_all) in 173 + match Yamlrw.of_string yaml_str with 174 + | `A links -> List.map t_of_yaml links 175 + | _ -> [] 176 + with _ -> [] 177 + 178 + let save_links_file path links = 179 + let yaml = `A (List.map to_yaml links) in 180 + let yaml_str = Yamlrw.to_string yaml in 181 + let oc = open_out path in 182 + output_string oc yaml_str; 183 + close_out oc 184 + 185 + (** {1 Merging} *) 186 + 187 + let merge_links ?(prefer_new_date=false) existing new_links = 188 + let links_by_url = Hashtbl.create (List.length existing) in 189 + 190 + List.iter (fun link -> Hashtbl.replace links_by_url link.url link) existing; 191 + 192 + List.iter (fun new_link -> 193 + match Hashtbl.find_opt links_by_url new_link.url with 194 + | None -> 195 + Hashtbl.add links_by_url new_link.url new_link 196 + | Some old_link -> 197 + let description = 198 + if new_link.description <> "" then new_link.description 199 + else old_link.description 200 + in 201 + let karakeep = 202 + match new_link.karakeep, old_link.karakeep with 203 + | Some new_k, Some old_k when new_k.remote_url = old_k.remote_url -> 204 + let merged_metadata = 205 + let meta_tbl = Hashtbl.create (List.length old_k.metadata) in 206 + List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) old_k.metadata; 207 + List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) new_k.metadata; 208 + Hashtbl.fold (fun k v acc -> (k, v) :: acc) meta_tbl [] 209 + in 210 + let merged_tags = List.sort_uniq String.compare (old_k.tags @ new_k.tags) in 211 + Some { new_k with metadata = merged_metadata; tags = merged_tags } 212 + | Some new_k, _ -> Some new_k 213 + | None, old_k -> old_k 214 + in 215 + let bushel = 216 + match new_link.bushel, old_link.bushel with 217 + | Some new_b, Some old_b -> 218 + let merged_slugs = List.sort_uniq String.compare (old_b.slugs @ new_b.slugs) in 219 + let merged_tags = List.sort_uniq String.compare (old_b.tags @ new_b.tags) in 220 + Some { slugs = merged_slugs; tags = merged_tags } 221 + | Some new_b, _ -> Some new_b 222 + | None, old_b -> old_b 223 + in 224 + let date = 225 + if prefer_new_date then new_link.date 226 + else if compare new_link old_link > 0 then new_link.date 227 + else old_link.date 228 + in 229 + let merged_link = { url = new_link.url; date; description; karakeep; bushel } in 230 + Hashtbl.replace links_by_url new_link.url merged_link 231 + ) new_links; 232 + 233 + Hashtbl.to_seq_values links_by_url 234 + |> List.of_seq 235 + |> List.sort compare
+418
lib/bushel_md.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Bushel Markdown extensions and utilities 7 + 8 + This module provides mappers to convert Bushel markdown extensions to different 9 + output formats. Bushel extends standard markdown with: 10 + 11 + - [:slug] - Links to bushel entries by slug 12 + - [@@handle] - Links to contacts by handle 13 + - [##tag] - Tag references 14 + 15 + Two main mapper modes: 16 + - Sidenote mode for the main website (with previews) 17 + - Plain HTML mode for feeds and simple output 18 + *) 19 + 20 + (** {1 Link Detection} *) 21 + 22 + let is_bushel_slug = String.starts_with ~prefix:":" 23 + let is_tag_slug link = 24 + String.starts_with ~prefix:"##" link && 25 + not (String.starts_with ~prefix:"###" link) 26 + let is_type_filter_slug = String.starts_with ~prefix:"###" 27 + let is_contact_slug = String.starts_with ~prefix:"@" 28 + 29 + let strip_handle s = 30 + if String.length s = 0 then s 31 + else if s.[0] = '@' || s.[0] = ':' then 32 + String.sub s 1 (String.length s - 1) 33 + else if String.length s > 1 && s.[0] = '#' && s.[1] = '#' then 34 + String.sub s 2 (String.length s - 2) 35 + else s 36 + 37 + (** {1 Custom Link Resolution} *) 38 + 39 + let authorlink = Cmarkit.Meta.key () 40 + let sluglink = Cmarkit.Meta.key () 41 + 42 + let make_authorlink label = 43 + let meta = Cmarkit.Meta.tag authorlink (Cmarkit.Label.meta label) in 44 + Cmarkit.Label.with_meta meta label 45 + 46 + let make_sluglink label = 47 + let meta = Cmarkit.Meta.tag sluglink (Cmarkit.Label.meta label) in 48 + Cmarkit.Label.with_meta meta label 49 + 50 + (** Custom label resolver for Bushel links *) 51 + let with_bushel_links = function 52 + | `Def _ as ctx -> Cmarkit.Label.default_resolver ctx 53 + | `Ref (_, _, (Some _ as def)) -> def 54 + | `Ref (_, ref, None) -> 55 + let txt = Cmarkit.Label.key ref in 56 + if String.length txt = 0 then None 57 + else match txt.[0] with 58 + | '@' -> Some (make_authorlink ref) 59 + | ':' -> Some (make_sluglink ref) 60 + | '#' -> if String.length txt > 1 && txt.[1] = '#' then Some (make_sluglink ref) else None 61 + | _ -> None 62 + 63 + (** {1 Text Extraction} *) 64 + 65 + let text_of_inline lb = 66 + Cmarkit.Inline.to_plain_text ~break_on_soft:false lb 67 + |> fun r -> String.concat "\n" (List.map (String.concat "") r) 68 + 69 + (** {1 Link Target Detection} *) 70 + 71 + let link_target_is_bushel ?slugs lb = 72 + let open Cmarkit in 73 + let ref = Inline.Link.reference lb in 74 + match ref with 75 + | `Inline (ld, _) -> 76 + let dest = Link_definition.dest ld in 77 + (match dest with 78 + | Some (url, _) when is_bushel_slug url -> 79 + (match slugs with Some s -> Hashtbl.replace s url () | _ -> ()); 80 + Some (url, Inline.Link.text lb |> text_of_inline) 81 + | Some (url, _) when is_tag_slug url -> 82 + Some (url, Inline.Link.text lb |> text_of_inline) 83 + | Some (url, _) when is_contact_slug url -> 84 + Some (url, Inline.Link.text lb |> text_of_inline) 85 + | _ -> None) 86 + | _ -> None 87 + 88 + let image_target_is_bushel lb = 89 + let open Cmarkit in 90 + let ref = Inline.Link.reference lb in 91 + match ref with 92 + | `Inline (ld, _) -> 93 + let dest = Link_definition.dest ld in 94 + (match dest with 95 + | Some (url, _) when is_bushel_slug url -> 96 + let alt = Link_definition.title ld in 97 + let dir = 98 + Inline.Link.text lb 99 + |> Inline.to_plain_text ~break_on_soft:false 100 + |> fun r -> String.concat "\n" (List.map (String.concat "") r) 101 + in 102 + Some (url, alt, dir) 103 + | _ -> None) 104 + | _ -> None 105 + 106 + (** {1 Link-Only Mapper} 107 + 108 + Converts Bushel links to regular HTML links without sidenotes. 109 + Used for Atom feeds, RSS, search indexing. *) 110 + 111 + let make_link_only_mapper entries = 112 + let open Cmarkit in 113 + fun _m -> 114 + function 115 + | Inline.Link (lb, meta) -> 116 + (match link_target_is_bushel lb with 117 + | Some (url, title) -> 118 + let s = strip_handle url in 119 + let dest = Bushel_entry.lookup_site_url entries s in 120 + let link_text = 121 + if is_bushel_slug title then 122 + match Bushel_entry.lookup entries (strip_handle title) with 123 + | Some ent -> Bushel_entry.title ent 124 + | None -> title 125 + else title 126 + in 127 + let txt = Inline.Text (link_text, meta) in 128 + let ld = Link_definition.make ~dest:(dest, meta) () in 129 + let ll = `Inline (ld, meta) in 130 + let ld = Inline.Link.make txt ll in 131 + Mapper.ret (Inline.Link (ld, meta)) 132 + | None -> 133 + (match Inline.Link.referenced_label lb with 134 + | Some l -> 135 + let m = Label.meta l in 136 + (match Meta.find authorlink m with 137 + | Some () -> 138 + let slug = Label.key l in 139 + let s = strip_handle slug in 140 + (match Bushel_contact.find_by_handle (Bushel_entry.contacts entries) s with 141 + | Some c -> 142 + let name = Bushel_contact.name c in 143 + (match Bushel_contact.best_url c with 144 + | Some dest -> 145 + let txt = Inline.Text (name, meta) in 146 + let ld = Link_definition.make ~dest:(dest, meta) () in 147 + let ll = `Inline (ld, meta) in 148 + let ld = Inline.Link.make txt ll in 149 + Mapper.ret (Inline.Link (ld, meta)) 150 + | None -> 151 + let txt = Inline.Text (name, meta) in 152 + Mapper.ret txt) 153 + | None -> 154 + let title = Inline.Link.text lb |> text_of_inline in 155 + let txt = Inline.Text (title, meta) in 156 + Mapper.ret txt) 157 + | None -> 158 + (match Meta.find sluglink m with 159 + | Some () -> 160 + let slug = Label.key l in 161 + if is_bushel_slug slug || is_tag_slug slug || is_contact_slug slug then 162 + let s = strip_handle slug in 163 + let dest = Bushel_entry.lookup_site_url entries s in 164 + let title = Inline.Link.text lb |> text_of_inline in 165 + let link_text = 166 + let trimmed = String.trim title in 167 + if is_bushel_slug trimmed then 168 + match Bushel_entry.lookup entries (strip_handle trimmed) with 169 + | Some ent -> Bushel_entry.title ent 170 + | None -> title 171 + else title 172 + in 173 + let txt = Inline.Text (link_text, meta) in 174 + let ld = Link_definition.make ~dest:(dest, meta) () in 175 + let ll = `Inline (ld, meta) in 176 + let ld = Inline.Link.make txt ll in 177 + Mapper.ret (Inline.Link (ld, meta)) 178 + else Mapper.default 179 + | None -> Mapper.default)) 180 + | None -> Mapper.default)) 181 + | _ -> Mapper.default 182 + 183 + (** {1 Slug Scanning} *) 184 + 185 + let scan_for_slugs entries md = 186 + let open Cmarkit in 187 + let slugs = Hashtbl.create 7 in 188 + let doc = Doc.of_string ~strict:false ~resolver:with_bushel_links md in 189 + let inline_mapper _m = function 190 + | Inline.Link (lb, _meta) -> 191 + (match link_target_is_bushel ~slugs lb with 192 + | Some _ -> Mapper.default 193 + | None -> 194 + (match Inline.Link.referenced_label lb with 195 + | Some l -> 196 + let m = Label.meta l in 197 + (match Meta.find sluglink m with 198 + | Some () -> 199 + let slug = Label.key l in 200 + if is_bushel_slug slug then 201 + Hashtbl.replace slugs slug (); 202 + Mapper.default 203 + | None -> Mapper.default) 204 + | None -> Mapper.default)) 205 + | _ -> Mapper.default 206 + in 207 + let mapper = Mapper.make ~inline:inline_mapper () in 208 + let _ = Mapper.map_doc mapper doc in 209 + ignore entries; 210 + Hashtbl.fold (fun k () a -> k :: a) slugs [] 211 + 212 + (** {1 Link Extraction} *) 213 + 214 + (** Extract all links from markdown text, including from images *) 215 + let extract_all_links text = 216 + let open Cmarkit in 217 + let doc = Doc.of_string ~resolver:with_bushel_links text in 218 + let links = ref [] in 219 + 220 + let find_links_in_inline _mapper = function 221 + | Inline.Link (lb, _) | Inline.Image (lb, _) -> 222 + (match Inline.Link.reference lb with 223 + | `Inline (ld, _) -> 224 + (match Link_definition.dest ld with 225 + | Some (url, _) -> 226 + links := url :: !links; 227 + Mapper.default 228 + | None -> Mapper.default) 229 + | `Ref _ -> 230 + (match Inline.Link.referenced_label lb with 231 + | Some l -> 232 + let key = Label.key l in 233 + if String.length key > 0 && (key.[0] = ':' || key.[0] = '@' || 234 + (String.length key > 1 && key.[0] = '#' && key.[1] = '#')) then 235 + links := key :: !links; 236 + Mapper.default 237 + | None -> Mapper.default)) 238 + | _ -> Mapper.default 239 + in 240 + 241 + let mapper = Mapper.make ~inline:find_links_in_inline () in 242 + let _ = Mapper.map_doc mapper doc in 243 + 244 + let module StringSet = Set.Make(String) in 245 + StringSet.elements (StringSet.of_list !links) 246 + 247 + (** Extract external URLs from markdown content *) 248 + let extract_external_links md = 249 + let open Cmarkit in 250 + let urls = ref [] in 251 + 252 + let is_external_url url = 253 + if is_bushel_slug url || is_tag_slug url then false 254 + else 255 + try 256 + let uri = Uri.of_string url in 257 + match Uri.scheme uri with 258 + | Some s when s = "http" || s = "https" -> true 259 + | Some _ -> true 260 + | None -> false 261 + with _ -> false 262 + in 263 + 264 + let inline_mapper _ = function 265 + | Inline.Link (lb, _) | Inline.Image (lb, _) -> 266 + let ref = Inline.Link.reference lb in 267 + (match ref with 268 + | `Inline (ld, _) -> 269 + (match Link_definition.dest ld with 270 + | Some (url, _) when is_external_url url -> 271 + urls := url :: !urls; 272 + Mapper.default 273 + | _ -> Mapper.default) 274 + | `Ref (_, _, l) -> 275 + let defs = Doc.defs (Doc.of_string ~strict:false md) in 276 + (match Label.Map.find_opt (Label.key l) defs with 277 + | Some (Link_definition.Def (ld, _)) -> 278 + (match Link_definition.dest ld with 279 + | Some (url, _) when is_external_url url -> 280 + urls := url :: !urls 281 + | _ -> ()) 282 + | _ -> ()); 283 + Mapper.default) 284 + | Inline.Autolink (autolink, _) -> 285 + let url = Inline.Autolink.link autolink |> fst in 286 + if not (Inline.Autolink.is_email autolink) && is_external_url url then 287 + urls := url :: !urls; 288 + Mapper.default 289 + | _ -> Mapper.default 290 + in 291 + 292 + let mapper = Mapper.make ~inline:inline_mapper () in 293 + let doc = Doc.of_string ~strict:false md in 294 + let _ = Mapper.map_doc mapper doc in 295 + List.sort_uniq String.compare !urls 296 + 297 + (** {1 First Image Extraction} *) 298 + 299 + let extract_first_image md = 300 + let open Cmarkit in 301 + let doc = Doc.of_string md in 302 + let found_image = ref None in 303 + 304 + let find_image_in_inline _mapper = function 305 + | Inline.Image (img, _) -> 306 + (match Inline.Link.reference img with 307 + | `Inline (ld, _) -> 308 + (match Link_definition.dest ld with 309 + | Some (url, _) when !found_image = None -> 310 + found_image := Some url; 311 + Mapper.default 312 + | _ -> Mapper.default) 313 + | _ -> Mapper.default) 314 + | _ -> Mapper.default 315 + in 316 + 317 + let mapper = Mapper.make ~inline:find_image_in_inline () in 318 + let _ = Mapper.map_doc mapper doc in 319 + !found_image 320 + 321 + (** {1 Plaintext Conversion} *) 322 + 323 + (** Convert markdown text to plain text, resolving bushel links to just their text *) 324 + let markdown_to_plaintext _entries text = 325 + let open Cmarkit in 326 + let doc = Doc.of_string ~resolver:with_bushel_links text in 327 + 328 + let rec block_to_text = function 329 + | Block.Blank_line _ -> "" 330 + | Block.Thematic_break _ -> "\n---\n" 331 + | Block.Paragraph (p, _) -> 332 + let inline = Block.Paragraph.inline p in 333 + Inline.to_plain_text ~break_on_soft:false inline 334 + |> List.map (String.concat "") |> String.concat "\n" 335 + | Block.Heading (h, _) -> 336 + let inline = Block.Heading.inline h in 337 + Inline.to_plain_text ~break_on_soft:false inline 338 + |> List.map (String.concat "") |> String.concat "\n" 339 + | Block.Block_quote (bq, _) -> 340 + let blocks = Block.Block_quote.block bq in 341 + block_to_text blocks 342 + | Block.List (l, _) -> 343 + let items = Block.List'.items l in 344 + List.map (fun (item, _) -> 345 + let blocks = Block.List_item.block item in 346 + block_to_text blocks 347 + ) items |> String.concat "\n" 348 + | Block.Code_block (cb, _) -> 349 + let code = Block.Code_block.code cb in 350 + String.concat "\n" (List.map Block_line.to_string code) 351 + | Block.Html_block _ -> "" 352 + | Block.Link_reference_definition _ -> "" 353 + | Block.Ext_footnote_definition _ -> "" 354 + | Block.Blocks (blocks, _) -> 355 + List.map block_to_text blocks |> String.concat "\n" 356 + | _ -> "" 357 + in 358 + let blocks = Doc.block doc in 359 + block_to_text blocks 360 + 361 + (** {1 Validation} *) 362 + 363 + (** Validation mapper that collects broken references *) 364 + let make_validation_mapper entries broken_slugs broken_contacts = 365 + let open Cmarkit in 366 + fun _m -> 367 + function 368 + | Inline.Link (lb, _meta) -> 369 + (match link_target_is_bushel lb with 370 + | Some (url, _title) -> 371 + let s = strip_handle url in 372 + if is_contact_slug url then 373 + (match Bushel_contact.find_by_handle (Bushel_entry.contacts entries) s with 374 + | None -> Hashtbl.replace broken_contacts url () 375 + | Some _ -> ()) 376 + else if is_bushel_slug url then 377 + (match Bushel_entry.lookup entries s with 378 + | None -> Hashtbl.replace broken_slugs url () 379 + | Some _ -> ()); 380 + Mapper.default 381 + | None -> 382 + (match Inline.Link.referenced_label lb with 383 + | Some l -> 384 + let m = Label.meta l in 385 + (match Meta.find authorlink m with 386 + | Some () -> 387 + let slug = Label.key l in 388 + let handle = strip_handle slug in 389 + (match Bushel_contact.find_by_handle (Bushel_entry.contacts entries) handle with 390 + | None -> Hashtbl.replace broken_contacts slug () 391 + | Some _ -> ()); 392 + Mapper.default 393 + | None -> 394 + (match Meta.find sluglink m with 395 + | None -> Mapper.default 396 + | Some () -> 397 + let slug = Label.key l in 398 + if is_bushel_slug slug then begin 399 + let s = strip_handle slug in 400 + match Bushel_entry.lookup entries s with 401 + | None -> Hashtbl.replace broken_slugs slug () 402 + | Some _ -> () 403 + end; 404 + Mapper.default)) 405 + | None -> Mapper.default)) 406 + | _ -> Mapper.default 407 + 408 + (** Validate all bushel references in markdown and return broken ones *) 409 + let validate_references entries md = 410 + let open Cmarkit in 411 + let broken_slugs = Hashtbl.create 7 in 412 + let broken_contacts = Hashtbl.create 7 in 413 + let doc = Doc.of_string ~strict:false ~resolver:with_bushel_links md in 414 + let mapper = Mapper.make ~inline:(make_validation_mapper entries broken_slugs broken_contacts) () in 415 + let _ = Mapper.map_doc mapper doc in 416 + let slugs = Hashtbl.fold (fun k () a -> k :: a) broken_slugs [] in 417 + let contacts = Hashtbl.fold (fun k () a -> k :: a) broken_contacts [] in 418 + (slugs, contacts)
+195
lib/bushel_note.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Note entry type for Bushel *) 7 + 8 + type t = { 9 + title : string; 10 + date : Ptime.date; 11 + slug : string; 12 + body : string; 13 + tags : string list; 14 + draft : bool; 15 + updated : Ptime.date option; 16 + sidebar : string option; 17 + index_page : bool; 18 + perma : bool; (** Permanent article that will receive a DOI *) 19 + doi : string option; (** DOI identifier for permanent articles *) 20 + synopsis : string option; 21 + titleimage : string option; 22 + via : (string * string) option; (** (label, url) for link-style notes *) 23 + slug_ent : string option; (** Reference to another entry *) 24 + source : string option; (** Source for news-style notes *) 25 + url : string option; (** External URL for news-style notes *) 26 + author : string option; (** Author for news-style notes *) 27 + category : string option; (** Category for news-style notes *) 28 + standardsite : string option; (** Standards body site reference *) 29 + } 30 + 31 + type ts = t list 32 + 33 + (** {1 Accessors} *) 34 + 35 + let title { title; _ } = title 36 + let slug { slug; _ } = slug 37 + let body { body; _ } = body 38 + let tags { tags; _ } = tags 39 + let draft { draft; _ } = draft 40 + let sidebar { sidebar; _ } = sidebar 41 + let synopsis { synopsis; _ } = synopsis 42 + let perma { perma; _ } = perma 43 + let doi { doi; _ } = doi 44 + let titleimage { titleimage; _ } = titleimage 45 + let slug_ent { slug_ent; _ } = slug_ent 46 + let source { source; _ } = source 47 + let url { url; _ } = url 48 + let author { author; _ } = author 49 + let category { category; _ } = category 50 + let standardsite { standardsite; _ } = standardsite 51 + 52 + let origdate { date; _ } = Bushel_types.ptime_of_date_exn date 53 + 54 + let date { date; updated; _ } = 55 + match updated with 56 + | None -> date 57 + | Some v -> v 58 + 59 + let datetime v = Bushel_types.ptime_of_date_exn (date v) 60 + 61 + let link { body; via; slug; _ } = 62 + match body, via with 63 + | "", Some (l, u) -> `Ext (l, u) 64 + | "", None -> failwith (slug ^ ": note external without via, via-url") 65 + | _, _ -> `Local slug 66 + 67 + let words { body; _ } = Bushel_util.count_words body 68 + 69 + (** {1 Comparison} *) 70 + 71 + let compare a b = Ptime.compare (datetime b) (datetime a) 72 + 73 + (** {1 Lookup} *) 74 + 75 + let lookup slug notes = List.find_opt (fun n -> n.slug = slug) notes 76 + 77 + (** {1 Jsont Codec} *) 78 + 79 + let via_jsont : (string * string) option Jsont.t = 80 + (* via is encoded as two separate fields: via and via-url *) 81 + Jsont.null None (* Handled specially in of_frontmatter *) 82 + 83 + let jsont ~default_date ~default_slug : t Jsont.t = 84 + let open Jsont in 85 + let open Jsont.Object in 86 + let make title date slug tags draft updated index_page perma doi synopsis titleimage 87 + slug_ent source url author category standardsite = 88 + { title; date; slug; body = ""; tags; draft; updated; sidebar = None; 89 + index_page; perma; doi; synopsis; titleimage; via = None; 90 + slug_ent; source; url; author; category; standardsite } 91 + in 92 + map ~kind:"Note" make 93 + |> mem "title" string ~enc:(fun n -> n.title) 94 + |> mem "date" Bushel_types.ptime_date_jsont ~dec_absent:default_date ~enc:(fun n -> n.date) 95 + |> mem "slug" string ~dec_absent:default_slug ~enc:(fun n -> n.slug) 96 + |> mem "tags" (list string) ~dec_absent:[] ~enc:(fun n -> n.tags) 97 + |> mem "draft" bool ~dec_absent:false ~enc:(fun n -> n.draft) 98 + |> mem "updated" (option Bushel_types.ptime_date_jsont) ~dec_absent:None 99 + ~enc_omit:Option.is_none ~enc:(fun n -> n.updated) 100 + |> mem "index_page" bool ~dec_absent:false ~enc:(fun n -> n.index_page) 101 + |> mem "perma" bool ~dec_absent:false ~enc:(fun n -> n.perma) 102 + |> mem "doi" Bushel_types.string_option_jsont ~dec_absent:None 103 + ~enc_omit:Option.is_none ~enc:(fun n -> n.doi) 104 + |> mem "synopsis" Bushel_types.string_option_jsont ~dec_absent:None 105 + ~enc_omit:Option.is_none ~enc:(fun n -> n.synopsis) 106 + |> mem "titleimage" Bushel_types.string_option_jsont ~dec_absent:None 107 + ~enc_omit:Option.is_none ~enc:(fun n -> n.titleimage) 108 + |> mem "slug_ent" Bushel_types.string_option_jsont ~dec_absent:None 109 + ~enc_omit:Option.is_none ~enc:(fun n -> n.slug_ent) 110 + |> mem "source" Bushel_types.string_option_jsont ~dec_absent:None 111 + ~enc_omit:Option.is_none ~enc:(fun n -> n.source) 112 + |> mem "url" Bushel_types.string_option_jsont ~dec_absent:None 113 + ~enc_omit:Option.is_none ~enc:(fun n -> n.url) 114 + |> mem "author" Bushel_types.string_option_jsont ~dec_absent:None 115 + ~enc_omit:Option.is_none ~enc:(fun n -> n.author) 116 + |> mem "category" Bushel_types.string_option_jsont ~dec_absent:None 117 + ~enc_omit:Option.is_none ~enc:(fun n -> n.category) 118 + |> mem "standardsite" Bushel_types.string_option_jsont ~dec_absent:None 119 + ~enc_omit:Option.is_none ~enc:(fun n -> n.standardsite) 120 + |> finish 121 + 122 + (** {1 Parsing} *) 123 + 124 + let of_frontmatter (fm : Frontmatter.t) : (t, string) result = 125 + (* Extract slug and date from filename to use as defaults *) 126 + let default_slug, default_date = 127 + match Frontmatter.fname fm with 128 + | Some fname -> 129 + (match Frontmatter.slug_of_fname fname with 130 + | Ok (s, d) -> (s, Option.fold ~none:(1, 1, 1) ~some:Ptime.to_date d) 131 + | Error _ -> ("", (1, 1, 1))) 132 + | None -> ("", (1, 1, 1)) 133 + in 134 + (* Get via fields manually since they're two separate fields *) 135 + let via = 136 + match Frontmatter.find_string "via" fm, Frontmatter.find_string "via-url" fm with 137 + | Some a, Some b -> Some (a, b) 138 + | None, Some b -> Some ("", b) 139 + | _ -> None 140 + in 141 + match Frontmatter.decode (jsont ~default_date ~default_slug) fm with 142 + | Error e -> Error e 143 + | Ok n -> Ok { n with body = Frontmatter.body fm; via } 144 + 145 + (** {1 Pretty Printing} *) 146 + 147 + let pp ppf n = 148 + let open Fmt in 149 + pf ppf "@[<v>"; 150 + pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Note"; 151 + pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug n); 152 + pf ppf "%a: %a@," (styled `Bold string) "Title" string (title n); 153 + let (year, month, day) = date n in 154 + pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Date" year month day; 155 + (match n.updated with 156 + | Some (y, m, d) -> pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Updated" y m d 157 + | None -> ()); 158 + pf ppf "%a: %b@," (styled `Bold string) "Draft" (draft n); 159 + pf ppf "%a: %b@," (styled `Bold string) "Index Page" n.index_page; 160 + pf ppf "%a: %b@," (styled `Bold string) "Perma" (perma n); 161 + (match doi n with 162 + | Some d -> pf ppf "%a: %a@," (styled `Bold string) "DOI" string d 163 + | None -> ()); 164 + (match synopsis n with 165 + | Some syn -> pf ppf "%a: %a@," (styled `Bold string) "Synopsis" string syn 166 + | None -> ()); 167 + (match titleimage n with 168 + | Some img -> pf ppf "%a: %a@," (styled `Bold string) "Title Image" string img 169 + | None -> ()); 170 + (match n.via with 171 + | Some (label, url) -> 172 + if label <> "" then 173 + pf ppf "%a: %a (%a)@," (styled `Bold string) "Via" string label string url 174 + else 175 + pf ppf "%a: %a@," (styled `Bold string) "Via" string url 176 + | None -> ()); 177 + (match standardsite n with 178 + | Some site -> pf ppf "%a: %a@," (styled `Bold string) "Standard Site" string site 179 + | None -> ()); 180 + let t = tags n in 181 + if t <> [] then 182 + pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t; 183 + (match sidebar n with 184 + | Some sb -> 185 + pf ppf "@,"; 186 + pf ppf "%a:@," (styled `Bold string) "Sidebar"; 187 + pf ppf "%a@," string sb 188 + | None -> ()); 189 + let bd = body n in 190 + if bd <> "" then begin 191 + pf ppf "@,"; 192 + pf ppf "%a:@," (styled `Bold string) "Body"; 193 + pf ppf "%a@," string bd; 194 + end; 195 + pf ppf "@]"
+287
lib/bushel_paper.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Paper entry type for Bushel *) 7 + 8 + (** Classification of paper type *) 9 + type classification = Full | Short | Preprint 10 + 11 + let string_of_classification = function 12 + | Full -> "full" 13 + | Short -> "short" 14 + | Preprint -> "preprint" 15 + 16 + let classification_of_string = function 17 + | "full" -> Full 18 + | "short" -> Short 19 + | "preprint" -> Preprint 20 + | _ -> Full 21 + 22 + type t = { 23 + slug : string; 24 + ver : string; 25 + title : string; 26 + authors : string list; 27 + year : int; 28 + month : int; 29 + bibtype : string; 30 + publisher : string; 31 + booktitle : string; 32 + journal : string; 33 + institution : string; 34 + pages : string; 35 + volume : string option; 36 + number : string option; 37 + doi : string option; 38 + url : string option; 39 + video : string option; 40 + isbn : string; 41 + editor : string; 42 + bib : string; 43 + tags : string list; 44 + projects : string list; 45 + slides : string list; 46 + abstract : string; 47 + latest : bool; 48 + selected : bool; 49 + classification : classification option; 50 + note : string option; 51 + } 52 + 53 + type ts = t list 54 + 55 + (** {1 Accessors} *) 56 + 57 + let slug { slug; _ } = slug 58 + let title { title; _ } = title 59 + let authors { authors; _ } = authors 60 + let year { year; _ } = year 61 + let bibtype { bibtype; _ } = bibtype 62 + let publisher { publisher; _ } = publisher 63 + let booktitle { booktitle; _ } = booktitle 64 + let journal { journal; _ } = journal 65 + let institution { institution; _ } = institution 66 + let pages { pages; _ } = pages 67 + let volume { volume; _ } = volume 68 + let number { number; _ } = number 69 + let doi { doi; _ } = doi 70 + let url { url; _ } = url 71 + let video { video; _ } = video 72 + let isbn { isbn; _ } = isbn 73 + let editor { editor; _ } = editor 74 + let bib { bib; _ } = bib 75 + let tags { tags; _ } = tags 76 + let project_slugs { projects; _ } = projects 77 + let slides { slides; _ } = slides 78 + let abstract { abstract; _ } = abstract 79 + let selected { selected; _ } = selected 80 + let note { note; _ } = note 81 + let classification { classification; bibtype; journal; booktitle; title; _ } = 82 + match classification with 83 + | Some c -> c 84 + | None -> 85 + (* Heuristic classification based on metadata *) 86 + let bibtype_lower = String.lowercase_ascii bibtype in 87 + let journal_lower = String.lowercase_ascii journal in 88 + let booktitle_lower = String.lowercase_ascii booktitle in 89 + let title_lower = String.lowercase_ascii title in 90 + let contains_any text patterns = 91 + List.exists (fun p -> 92 + try 93 + let re = Re.Perl.compile_pat ~opts:[`Caseless] p in 94 + Re.execp re text 95 + with _ -> false 96 + ) patterns 97 + in 98 + if contains_any journal_lower ["arxiv"] || 99 + contains_any booktitle_lower ["arxiv"] || 100 + bibtype_lower = "misc" || bibtype_lower = "techreport" 101 + then Preprint 102 + else if contains_any journal_lower ["workshop"; "wip"; "poster"; "demo"; "hotdep"; "short"] || 103 + contains_any booktitle_lower ["workshop"; "wip"; "poster"; "demo"; "hotdep"; "short"] || 104 + contains_any title_lower ["poster"] 105 + then Short 106 + else Full 107 + 108 + let date { year; month; _ } = (year, month, 1) 109 + let datetime p = Bushel_types.ptime_of_date_exn (date p) 110 + 111 + (** {1 Comparison} *) 112 + 113 + let compare p2 p1 = 114 + let d1 = try datetime p1 with _ -> Bushel_types.ptime_of_date_exn (1977, 1, 1) in 115 + let d2 = try datetime p2 with _ -> Bushel_types.ptime_of_date_exn (1977, 1, 1) in 116 + Ptime.compare d1 d2 117 + 118 + (** {1 Lookup} *) 119 + 120 + let slugs ts = 121 + List.fold_left (fun acc t -> if List.mem t.slug acc then acc else t.slug :: acc) [] ts 122 + 123 + let lookup ts slug = List.find_opt (fun t -> t.slug = slug && t.latest) ts 124 + 125 + let get_papers ~slug ts = 126 + List.filter (fun p -> p.slug = slug && p.latest <> true) ts |> List.sort compare 127 + 128 + (** Convert bibtype to tag *) 129 + let tag_of_bibtype bt = 130 + match String.lowercase_ascii bt with 131 + | "article" -> "journal" 132 + | "inproceedings" -> "conference" 133 + | "techreport" -> "report" 134 + | "misc" -> "preprint" 135 + | "book" -> "book" 136 + | x -> x 137 + 138 + (** Compute version tracking *) 139 + let tv (l : t list) = 140 + let h = Hashtbl.create 7 in 141 + List.iter (fun { slug; ver; _ } -> 142 + match Hashtbl.find_opt h slug with 143 + | None -> Hashtbl.add h slug [ ver ] 144 + | Some l -> 145 + let l = ver :: l in 146 + let l = List.sort String.compare l in 147 + Hashtbl.replace h slug l 148 + ) l; 149 + List.map (fun p -> 150 + let latest = Hashtbl.find h p.slug |> List.rev |> List.hd in 151 + let latest = p.ver = latest in 152 + { p with latest } 153 + ) l 154 + 155 + let best_url p = url p 156 + 157 + (** {1 Jsont Codec} *) 158 + 159 + let month_of_string s = 160 + match String.lowercase_ascii s with 161 + | "jan" -> 1 | "feb" -> 2 | "mar" -> 3 | "apr" -> 4 162 + | "may" -> 5 | "jun" -> 6 | "jul" -> 7 | "aug" -> 8 163 + | "sep" -> 9 | "oct" -> 10 | "nov" -> 11 | "dec" -> 12 164 + | _ -> 1 165 + 166 + let jsont : t Jsont.t = 167 + let open Jsont in 168 + let open Jsont.Object in 169 + let make title authors year month bibtype publisher booktitle journal institution 170 + pages volume number doi url video isbn editor bib tags projects slides 171 + selected classification note = 172 + { slug = ""; ver = ""; title; authors; year; month; bibtype; publisher; booktitle; 173 + journal; institution; pages; volume; number; doi; url; video; isbn; editor; bib; 174 + tags; projects; slides; abstract = ""; latest = false; selected; 175 + classification; note } 176 + in 177 + map ~kind:"Paper" make 178 + |> mem "title" string ~enc:(fun p -> p.title) 179 + |> mem "author" (list string) ~dec_absent:[] ~enc:(fun p -> p.authors) 180 + |> mem "year" (of_of_string ~kind:"year" (fun s -> Ok (int_of_string s)) ~enc:string_of_int) 181 + ~enc:(fun p -> p.year) 182 + |> mem "month" (of_of_string ~kind:"month" (fun s -> Ok (month_of_string s)) ~enc:(fun m -> 183 + match m with 1 -> "jan" | 2 -> "feb" | 3 -> "mar" | 4 -> "apr" 184 + | 5 -> "may" | 6 -> "jun" | 7 -> "jul" | 8 -> "aug" 185 + | 9 -> "sep" | 10 -> "oct" | 11 -> "nov" | 12 -> "dec" | _ -> "jan")) 186 + ~dec_absent:1 ~enc:(fun p -> p.month) 187 + |> mem "bibtype" string ~enc:(fun p -> p.bibtype) 188 + |> mem "publisher" string ~dec_absent:"" ~enc:(fun p -> p.publisher) 189 + |> mem "booktitle" string ~dec_absent:"" ~enc:(fun p -> p.booktitle) 190 + |> mem "journal" string ~dec_absent:"" ~enc:(fun p -> p.journal) 191 + |> mem "institution" string ~dec_absent:"" ~enc:(fun p -> p.institution) 192 + |> mem "pages" string ~dec_absent:"" ~enc:(fun p -> p.pages) 193 + |> mem "volume" Bushel_types.string_option_jsont ~dec_absent:None 194 + ~enc_omit:Option.is_none ~enc:(fun p -> p.volume) 195 + |> mem "number" Bushel_types.string_option_jsont ~dec_absent:None 196 + ~enc_omit:Option.is_none ~enc:(fun p -> p.number) 197 + |> mem "doi" Bushel_types.string_option_jsont ~dec_absent:None 198 + ~enc_omit:Option.is_none ~enc:(fun p -> p.doi) 199 + |> mem "url" Bushel_types.string_option_jsont ~dec_absent:None 200 + ~enc_omit:Option.is_none ~enc:(fun p -> p.url) 201 + |> mem "video" Bushel_types.string_option_jsont ~dec_absent:None 202 + ~enc_omit:Option.is_none ~enc:(fun p -> p.video) 203 + |> mem "isbn" string ~dec_absent:"" ~enc:(fun p -> p.isbn) 204 + |> mem "editor" string ~dec_absent:"" ~enc:(fun p -> p.editor) 205 + |> mem "bib" string ~dec_absent:"" ~enc:(fun p -> p.bib) 206 + |> mem "tags" (list string) ~dec_absent:[] ~enc:(fun p -> p.tags) 207 + |> mem "projects" (list string) ~dec_absent:[] ~enc:(fun p -> p.projects) 208 + |> mem "slides" (list string) ~dec_absent:[] ~enc:(fun p -> p.slides) 209 + |> mem "selected" bool ~dec_absent:false ~enc:(fun p -> p.selected) 210 + |> mem "classification" (option (of_of_string ~kind:"classification" 211 + (fun s -> Ok (classification_of_string s)) ~enc:string_of_classification)) 212 + ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun p -> p.classification) 213 + |> mem "note" Bushel_types.string_option_jsont ~dec_absent:None 214 + ~enc_omit:Option.is_none ~enc:(fun p -> p.note) 215 + |> finish 216 + 217 + (** {1 Parsing} *) 218 + 219 + let of_frontmatter ~slug ~ver (fm : Frontmatter.t) : (t, string) result = 220 + match Frontmatter.decode jsont fm with 221 + | Error e -> Error e 222 + | Ok p -> 223 + (* Compute full tags including bibtype and projects *) 224 + let keywords = Frontmatter.find_strings "keywords" fm in 225 + let all_tags = 226 + List.flatten [p.tags; keywords; [tag_of_bibtype p.bibtype]; p.projects] 227 + in 228 + Ok { p with 229 + slug; 230 + ver; 231 + abstract = Frontmatter.body fm; 232 + tags = all_tags } 233 + 234 + (** {1 Pretty Printing} *) 235 + 236 + let pp ppf p = 237 + let open Fmt in 238 + pf ppf "@[<v>"; 239 + pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Paper"; 240 + pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug p); 241 + pf ppf "%a: %a@," (styled `Bold string) "Version" string p.ver; 242 + pf ppf "%a: %a@," (styled `Bold string) "Title" string (title p); 243 + pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Authors" (list ~sep:comma string) (authors p); 244 + pf ppf "%a: %a@," (styled `Bold string) "Year" int (year p); 245 + pf ppf "%a: %a@," (styled `Bold string) "Bibtype" string (bibtype p); 246 + (match doi p with 247 + | Some d -> pf ppf "%a: %a@," (styled `Bold string) "DOI" string d 248 + | None -> ()); 249 + (match url p with 250 + | Some u -> pf ppf "%a: %a@," (styled `Bold string) "URL" string u 251 + | None -> ()); 252 + (match video p with 253 + | Some v -> pf ppf "%a: %a@," (styled `Bold string) "Video" string v 254 + | None -> ()); 255 + let projs = project_slugs p in 256 + if projs <> [] then 257 + pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Projects" (list ~sep:comma string) projs; 258 + let sl = slides p in 259 + if sl <> [] then 260 + pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Slides" (list ~sep:comma string) sl; 261 + (match bibtype p with 262 + | "article" -> 263 + pf ppf "%a: %a@," (styled `Bold string) "Journal" string (journal p); 264 + (match volume p with 265 + | Some vol -> pf ppf "%a: %a@," (styled `Bold string) "Volume" string vol 266 + | None -> ()); 267 + (match number p with 268 + | Some iss -> pf ppf "%a: %a@," (styled `Bold string) "Issue" string iss 269 + | None -> ()); 270 + let pgs = pages p in 271 + if pgs <> "" then 272 + pf ppf "%a: %a@," (styled `Bold string) "Pages" string pgs; 273 + | "inproceedings" -> 274 + pf ppf "%a: %a@," (styled `Bold string) "Booktitle" string (booktitle p); 275 + let pgs = pages p in 276 + if pgs <> "" then 277 + pf ppf "%a: %a@," (styled `Bold string) "Pages" string pgs; 278 + | "techreport" -> 279 + pf ppf "%a: %a@," (styled `Bold string) "Institution" string (institution p); 280 + (match number p with 281 + | Some num -> pf ppf "%a: %a@," (styled `Bold string) "Number" string num 282 + | None -> ()); 283 + | _ -> ()); 284 + pf ppf "@,"; 285 + pf ppf "%a:@," (styled `Bold string) "Abstract"; 286 + pf ppf "%a@," (styled `Faint string) (abstract p); 287 + pf ppf "@]"
+100
lib/bushel_project.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Project entry type for Bushel *) 7 + 8 + type t = { 9 + slug : string; 10 + title : string; 11 + start : int; (** Start year *) 12 + finish : int option; (** End year, None if ongoing *) 13 + tags : string list; 14 + ideas : string; (** Ideas page reference *) 15 + body : string; 16 + } 17 + 18 + type ts = t list 19 + 20 + (** {1 Accessors} *) 21 + 22 + let slug { slug; _ } = slug 23 + let title { title; _ } = title 24 + let start { start; _ } = start 25 + let finish { finish; _ } = finish 26 + let tags { tags; _ } = tags 27 + let ideas { ideas; _ } = ideas 28 + let body { body; _ } = body 29 + 30 + (** {1 Comparison} *) 31 + 32 + let compare a b = 33 + match Int.compare a.start b.start with 34 + | 0 -> Int.compare (Option.value ~default:9999 b.finish) (Option.value ~default:9999 a.finish) 35 + | n -> n 36 + 37 + (** {1 Lookup} *) 38 + 39 + let lookup projects slug = List.find_opt (fun p -> p.slug = slug) projects 40 + 41 + (** {1 Parsing} *) 42 + 43 + let of_frontmatter (fm : Frontmatter.t) : (t, string) result = 44 + (* Extract slug from filename *) 45 + let slug = 46 + match Frontmatter.fname fm with 47 + | Some fname -> 48 + (match Frontmatter.slug_of_fname fname with 49 + | Ok (s, _) -> s 50 + | Error _ -> "") 51 + | None -> "" 52 + in 53 + (* Extract date to get start year *) 54 + let start = 55 + match Frontmatter.find "date" fm with 56 + | Some (`String s) -> 57 + (try 58 + match String.split_on_char '-' s with 59 + | y :: _ -> int_of_string y 60 + | _ -> 2000 61 + with _ -> 2000) 62 + | _ -> 2000 63 + in 64 + (* Extract finish year *) 65 + let finish = 66 + match Frontmatter.find_string "finish" fm with 67 + | Some s -> 68 + (try 69 + match String.split_on_char '-' s with 70 + | y :: _ -> Some (int_of_string y) 71 + | _ -> None 72 + with _ -> None) 73 + | None -> None 74 + in 75 + let title = Frontmatter.find_string "title" fm |> Option.value ~default:"" in 76 + let tags = Frontmatter.find_strings "tags" fm in 77 + let ideas = Frontmatter.find_string "ideas" fm |> Option.value ~default:"" in 78 + let body = Frontmatter.body fm in 79 + Ok { slug; title; start; finish; tags; ideas; body } 80 + 81 + (** {1 Pretty Printing} *) 82 + 83 + let pp ppf p = 84 + let open Fmt in 85 + pf ppf "@[<v>"; 86 + pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Project"; 87 + pf ppf "%a: %a@," (styled `Bold string) "Slug" string p.slug; 88 + pf ppf "%a: %a@," (styled `Bold string) "Title" string (title p); 89 + pf ppf "%a: %d@," (styled `Bold string) "Start" p.start; 90 + (match p.finish with 91 + | Some year -> pf ppf "%a: %d@," (styled `Bold string) "Finish" year 92 + | None -> pf ppf "%a: ongoing@," (styled `Bold string) "Finish"); 93 + let t = tags p in 94 + if t <> [] then 95 + pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t; 96 + pf ppf "%a: %a@," (styled `Bold string) "Ideas" string (ideas p); 97 + pf ppf "@,"; 98 + pf ppf "%a:@," (styled `Bold string) "Body"; 99 + pf ppf "%a@," string (body p); 100 + pf ppf "@]"
+88
lib/bushel_tags.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Tag system for Bushel entries *) 7 + 8 + type t = 9 + [ `Slug of string (** :foo points to the specific slug foo *) 10 + | `Contact of string (** \@foo points to contact foo *) 11 + | `Set of string (** #papers points to all Paper entries *) 12 + | `Text of string (** foo points to a free text "foo" *) 13 + | `Year of int (** a number between 1900--2100 is interpreted as a year *) 14 + ] 15 + 16 + (** {1 Predicates} *) 17 + 18 + let is_text = function `Text _ -> true | _ -> false 19 + let is_slug = function `Slug _ -> true | _ -> false 20 + let is_contact = function `Contact _ -> true | _ -> false 21 + let is_set = function `Set _ -> true | _ -> false 22 + let is_year = function `Year _ -> true | _ -> false 23 + 24 + (** {1 Parsing} *) 25 + 26 + let of_string s : t = 27 + if String.length s < 2 then invalid_arg ("Tag.of_string: " ^ s); 28 + match s.[0] with 29 + | ':' -> 30 + let slug = String.sub s 1 (String.length s - 1) in 31 + `Slug slug 32 + | '@' -> 33 + let handle = String.sub s 1 (String.length s - 1) in 34 + `Contact handle 35 + | '#' -> 36 + let cl = String.sub s 1 (String.length s - 1) in 37 + `Set cl 38 + | _ -> 39 + (try 40 + let x = int_of_string s in 41 + if x > 1900 && x < 2100 then `Year x else `Text s 42 + with _ -> `Text s) 43 + 44 + let of_string_list l = List.map of_string l 45 + 46 + (** {1 Serialization} *) 47 + 48 + let to_string = function 49 + | `Slug t -> ":" ^ t 50 + | `Contact c -> "@" ^ c 51 + | `Set s -> "#" ^ s 52 + | `Text t -> t 53 + | `Year y -> string_of_int y 54 + 55 + let to_raw_string = function 56 + | `Slug t -> t 57 + | `Contact c -> c 58 + | `Set s -> s 59 + | `Text t -> t 60 + | `Year y -> string_of_int y 61 + 62 + (** {1 Pretty Printing} *) 63 + 64 + let pp ppf t = Fmt.string ppf (to_string t) 65 + 66 + (** {1 Tag Filtering} *) 67 + 68 + let mentions tags = 69 + List.filter (function 70 + | `Contact _ | `Slug _ -> true 71 + | _ -> false 72 + ) tags 73 + 74 + (** {1 Tag Counting} *) 75 + 76 + let count_tags ?h fn vs = 77 + let h = match h with 78 + | Some h -> h 79 + | None -> Hashtbl.create 42 80 + in 81 + List.iter (fun ent -> 82 + List.iter (fun tag -> 83 + match Hashtbl.find_opt h tag with 84 + | Some num -> Hashtbl.replace h tag (num + 1) 85 + | None -> Hashtbl.add h tag 1 86 + ) (fn ent) 87 + ) vs; 88 + h
+96
lib/bushel_types.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Common types and Jsont codecs for Bushel *) 7 + 8 + (** {1 Date Types} *) 9 + 10 + type date = Ptime.date 11 + (** A calendar date (year, month, day). *) 12 + 13 + (** {1 Jsont Codecs} *) 14 + 15 + let ptime_date_jsont : Ptime.date Jsont.t = 16 + let dec s = 17 + try 18 + match String.split_on_char '-' s with 19 + | [y; m; d] -> 20 + let year = int_of_string y in 21 + let month = int_of_string m in 22 + let day = int_of_string d in 23 + Ok (year, month, day) 24 + | _ -> 25 + Error (Printf.sprintf "Invalid date format: %s (expected YYYY-MM-DD)" s) 26 + with _ -> 27 + Error (Printf.sprintf "Invalid date: %s" s) 28 + in 29 + let enc (y, m, d) = Printf.sprintf "%04d-%02d-%02d" y m d in 30 + Jsont.of_of_string ~kind:"Ptime.date" dec ~enc 31 + 32 + let ptime_jsont : Ptime.t Jsont.t = 33 + let dec s = 34 + (* Try RFC3339 first *) 35 + match Ptime.of_rfc3339 s with 36 + | Ok (t, _, _) -> Ok t 37 + | Error _ -> 38 + (* Try date-only format *) 39 + try 40 + match String.split_on_char '-' s with 41 + | [y; m; d] -> 42 + let year = int_of_string y in 43 + let month = int_of_string m in 44 + let day = int_of_string d in 45 + (match Ptime.of_date (year, month, day) with 46 + | Some t -> Ok t 47 + | None -> Error (Printf.sprintf "Invalid date: %s" s)) 48 + | _ -> 49 + Error (Printf.sprintf "Invalid timestamp: %s" s) 50 + with _ -> 51 + Error (Printf.sprintf "Invalid timestamp: %s" s) 52 + in 53 + let enc t = 54 + let (y, m, d), ((hh, mm, ss), _) = Ptime.to_date_time t in 55 + if hh = 0 && mm = 0 && ss = 0 then 56 + Printf.sprintf "%04d-%02d-%02d" y m d 57 + else 58 + Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ" y m d hh mm ss 59 + in 60 + Jsont.of_of_string ~kind:"Ptime.t" dec ~enc 61 + 62 + let ptime_option_jsont : Ptime.t option Jsont.t = 63 + let null = Jsont.null None in 64 + let some = Jsont.map ~dec:(fun t -> Some t) ~enc:(function Some t -> t | None -> assert false) ptime_jsont in 65 + Jsont.any ~dec_null:null ~dec_string:some ~enc:(function None -> null | Some _ -> some) () 66 + 67 + let string_option_jsont : string option Jsont.t = 68 + Jsont.option Jsont.string 69 + 70 + (** {1 Helper Functions} *) 71 + 72 + let ptime_of_date_exn date = 73 + match Ptime.of_date date with 74 + | Some t -> t 75 + | None -> 76 + let (y, m, d) = date in 77 + failwith (Printf.sprintf "Invalid date: %04d-%02d-%02d" y m d) 78 + 79 + let date_of_ptime t = Ptime.to_date t 80 + 81 + let compare_dates (d1 : date) (d2 : date) = 82 + let t1 = ptime_of_date_exn d1 in 83 + let t2 = ptime_of_date_exn d2 in 84 + Ptime.compare t1 t2 85 + 86 + let format_date (y, m, d) = 87 + Printf.sprintf "%04d-%02d-%02d" y m d 88 + 89 + let month_name = function 90 + | 1 -> "January" | 2 -> "February" | 3 -> "March" | 4 -> "April" 91 + | 5 -> "May" | 6 -> "June" | 7 -> "July" | 8 -> "August" 92 + | 9 -> "September" | 10 -> "October" | 11 -> "November" | 12 -> "December" 93 + | _ -> "Unknown" 94 + 95 + let format_date_human (y, m, _d) = 96 + Printf.sprintf "%s %d" (month_name m) y
+101
lib/bushel_util.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Utility functions for Bushel *) 7 + 8 + (** Count words in a string. *) 9 + let count_words (text : string) : int = 10 + let len = String.length text in 11 + let rec count_words_helper (index : int) (in_word : bool) (count : int) : int = 12 + if index >= len then 13 + if in_word then count + 1 else count 14 + else 15 + let char = String.get text index in 16 + let is_whitespace = 17 + Char.equal char ' ' 18 + || Char.equal char '\t' 19 + || Char.equal char '\n' 20 + || Char.equal char '\r' 21 + in 22 + if is_whitespace then 23 + if in_word then count_words_helper (index + 1) false (count + 1) 24 + else count_words_helper (index + 1) false count 25 + else count_words_helper (index + 1) true count 26 + in 27 + count_words_helper 0 false 0 28 + 29 + (** Get the first paragraph/hunk from text (up to double newline). *) 30 + let first_hunk s = 31 + let lines = String.split_on_char '\n' s in 32 + let rec aux acc = function 33 + | [] -> String.concat "\n" (List.rev acc) 34 + | "" :: "" :: _ -> String.concat "\n" (List.rev acc) 35 + | line :: rest -> aux (line :: acc) rest 36 + in 37 + aux [] lines 38 + 39 + (** Get first and last hunks from text. *) 40 + let first_and_last_hunks s = 41 + let lines = String.split_on_char '\n' s in 42 + let rec aux acc = function 43 + | [] -> String.concat "\n" (List.rev acc), "" 44 + | "" :: "" :: rest -> 45 + String.concat "\n" (List.rev acc), String.concat "\n" (List.rev rest) 46 + | line :: rest -> aux (line :: acc) rest 47 + in 48 + aux [] lines 49 + 50 + (** Find all footnote definition lines in text. *) 51 + let find_footnote_lines s = 52 + let lines = String.split_on_char '\n' s in 53 + let is_footnote_def line = 54 + String.length line > 3 && 55 + line.[0] = '[' && 56 + line.[1] = '^' && 57 + String.contains line ':' && 58 + let colon_pos = String.index line ':' in 59 + colon_pos > 2 && line.[colon_pos - 1] = ']' 60 + in 61 + let is_continuation line = 62 + String.length line > 0 && (line.[0] = ' ' || line.[0] = '\t') 63 + in 64 + let rec collect_footnotes acc in_footnote = function 65 + | [] -> List.rev acc 66 + | line :: rest -> 67 + if is_footnote_def line then 68 + collect_footnotes (line :: acc) true rest 69 + else if in_footnote && is_continuation line then 70 + collect_footnotes (line :: acc) true rest 71 + else 72 + collect_footnotes acc false rest 73 + in 74 + collect_footnotes [] false lines 75 + 76 + (** Augment first hunk with footnote definitions from last hunk. *) 77 + let first_hunk_with_footnotes s = 78 + let first, last = first_and_last_hunks s in 79 + let footnote_lines = find_footnote_lines last in 80 + if footnote_lines = [] then first 81 + else first ^ "\n\n" ^ String.concat "\n" footnote_lines 82 + 83 + (** Trim leading/trailing whitespace and normalize multiple blank lines. *) 84 + let normalize_body s = 85 + let trimmed = String.trim s in 86 + (* Replace 3+ consecutive newlines with exactly 2 newlines *) 87 + let re = Re.compile (Re.seq [Re.char '\n'; Re.char '\n'; Re.rep1 (Re.char '\n')]) in 88 + Re.replace_string re ~by:"\n\n" trimmed 89 + 90 + (** Extract domain from URL. *) 91 + let extract_domain url = 92 + try 93 + let uri = Uri.of_string url in 94 + match Uri.host uri with 95 + | Some host -> host 96 + | None -> "unknown" 97 + with _ -> "unknown" 98 + 99 + (** Check if a string is a valid URL. *) 100 + let is_url s = 101 + String.starts_with ~prefix:"http://" s || String.starts_with ~prefix:"https://" s
+129
lib/bushel_video.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Video entry type for Bushel *) 7 + 8 + type t = { 9 + slug : string; 10 + title : string; 11 + published_date : Ptime.t; 12 + uuid : string; 13 + description : string; 14 + url : string; 15 + talk : bool; 16 + paper : string option; 17 + project : string option; 18 + tags : string list; 19 + } 20 + 21 + type ts = t list 22 + 23 + (** {1 Accessors} *) 24 + 25 + let slug { slug; _ } = slug 26 + let title { title; _ } = title 27 + let uuid { uuid; _ } = uuid 28 + let url { url; _ } = url 29 + let description { description; _ } = description 30 + let body = description (* Alias for consistency *) 31 + let talk { talk; _ } = talk 32 + let paper { paper; _ } = paper 33 + let project { project; _ } = project 34 + let tags { tags; _ } = tags 35 + 36 + let date { published_date; _ } = Ptime.to_date published_date 37 + let datetime { published_date; _ } = published_date 38 + 39 + (** {1 Comparison} *) 40 + 41 + let compare a b = Ptime.compare b.published_date a.published_date 42 + 43 + (** {1 Lookup} *) 44 + 45 + let lookup videos uuid = List.find_opt (fun v -> v.uuid = uuid) videos 46 + let lookup_by_slug videos slug = List.find_opt (fun v -> v.slug = slug) videos 47 + 48 + (** {1 Jsont Codec} *) 49 + 50 + let jsont : t Jsont.t = 51 + let open Jsont in 52 + let open Jsont.Object in 53 + let make title published_date uuid url talk tags paper project = 54 + { slug = uuid; title; published_date; uuid; description = ""; url; 55 + talk; paper; project; tags } 56 + in 57 + map ~kind:"Video" make 58 + |> mem "title" string ~enc:(fun v -> v.title) 59 + |> mem "published_date" Bushel_types.ptime_jsont ~enc:(fun v -> v.published_date) 60 + |> mem "uuid" string ~enc:(fun v -> v.uuid) 61 + |> mem "url" string ~enc:(fun v -> v.url) 62 + |> mem "talk" bool ~dec_absent:false ~enc:(fun v -> v.talk) 63 + |> mem "tags" (list string) ~dec_absent:[] ~enc:(fun v -> v.tags) 64 + |> mem "paper" Bushel_types.string_option_jsont ~dec_absent:None 65 + ~enc_omit:Option.is_none ~enc:(fun v -> v.paper) 66 + |> mem "project" Bushel_types.string_option_jsont ~dec_absent:None 67 + ~enc_omit:Option.is_none ~enc:(fun v -> v.project) 68 + |> finish 69 + 70 + (** {1 Parsing} *) 71 + 72 + let of_frontmatter (fm : Frontmatter.t) : (t, string) result = 73 + match Frontmatter.decode jsont fm with 74 + | Error e -> Error e 75 + | Ok v -> 76 + Ok { v with 77 + slug = v.uuid; 78 + description = Frontmatter.body fm } 79 + 80 + (** {1 YAML Serialization} *) 81 + 82 + let to_yaml t = 83 + let open Yamlrw.Util in 84 + let fields = [ 85 + ("title", string t.title); 86 + ("description", string t.description); 87 + ("url", string t.url); 88 + ("uuid", string t.uuid); 89 + ("slug", string t.slug); 90 + ("published_date", string (Ptime.to_rfc3339 t.published_date)); 91 + ("talk", bool t.talk); 92 + ("tags", strings t.tags); 93 + ] in 94 + let fields = match t.paper with 95 + | None -> fields 96 + | Some p -> ("paper", string p) :: fields 97 + in 98 + let fields = match t.project with 99 + | None -> fields 100 + | Some p -> ("project", string p) :: fields 101 + in 102 + obj fields 103 + 104 + (** {1 Pretty Printing} *) 105 + 106 + let pp ppf v = 107 + let open Fmt in 108 + pf ppf "@[<v>"; 109 + pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Video"; 110 + pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug v); 111 + pf ppf "%a: %a@," (styled `Bold string) "UUID" string (uuid v); 112 + pf ppf "%a: %a@," (styled `Bold string) "Title" string (title v); 113 + let (year, month, day) = date v in 114 + pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Date" year month day; 115 + pf ppf "%a: %a@," (styled `Bold string) "URL" string (url v); 116 + pf ppf "%a: %b@," (styled `Bold string) "Talk" (talk v); 117 + (match paper v with 118 + | Some p -> pf ppf "%a: %a@," (styled `Bold string) "Paper" string p 119 + | None -> ()); 120 + (match project v with 121 + | Some p -> pf ppf "%a: %a@," (styled `Bold string) "Project" string p 122 + | None -> ()); 123 + let t = tags v in 124 + if t <> [] then 125 + pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t; 126 + pf ppf "@,"; 127 + pf ppf "%a:@," (styled `Bold string) "Description"; 128 + pf ppf "%a@," string v.description; 129 + pf ppf "@]"
+13
lib/dune
··· 1 + (library 2 + (name bushel) 3 + (public_name bushel) 4 + (libraries 5 + frontmatter 6 + cmarkit 7 + jsont 8 + jsont.bytesrw 9 + ptime 10 + re 11 + uri 12 + fmt 13 + yamlrw))
+390
lib_config/bushel_config.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Bushel configuration management with XDG paths *) 7 + 8 + (** {1 Types} *) 9 + 10 + type peertube_server = { 11 + name : string; 12 + endpoint : string; 13 + } 14 + 15 + type t = { 16 + (* Data paths *) 17 + data_dir : string; 18 + 19 + (* Image configuration *) 20 + remote_host : string; 21 + remote_user : string; 22 + remote_source_dir : string; 23 + local_source_dir : string; 24 + local_output_dir : string; 25 + paper_thumbs_subdir : string; 26 + contact_faces_subdir : string; 27 + video_thumbs_subdir : string; 28 + 29 + (* Paper PDFs *) 30 + paper_pdfs_dir : string; 31 + 32 + (* Immich *) 33 + immich_endpoint : string; 34 + immich_api_key_file : string; 35 + 36 + (* PeerTube *) 37 + peertube_servers : peertube_server list; 38 + 39 + (* Typesense *) 40 + typesense_endpoint : string; 41 + typesense_api_key_file : string; 42 + openai_api_key_file : string; 43 + 44 + (* Zotero *) 45 + zotero_translation_server : string; 46 + } 47 + 48 + (** {1 XDG Paths} *) 49 + 50 + let xdg_config_home () = 51 + match Sys.getenv_opt "XDG_CONFIG_HOME" with 52 + | Some dir -> dir 53 + | None -> 54 + match Sys.getenv_opt "HOME" with 55 + | Some home -> Filename.concat home ".config" 56 + | None -> ".config" 57 + 58 + let config_dir () = Filename.concat (xdg_config_home ()) "bushel" 59 + let config_file () = Filename.concat (config_dir ()) "config.toml" 60 + 61 + (** {1 Default Configuration} *) 62 + 63 + let default () = 64 + let home = Sys.getenv_opt "HOME" |> Option.value ~default:"." in 65 + { 66 + data_dir = Filename.concat home "bushel/data"; 67 + remote_host = "localhost"; 68 + remote_user = Sys.getenv_opt "USER" |> Option.value ~default:"user"; 69 + remote_source_dir = "/var/www/images/originals"; 70 + local_source_dir = Filename.concat home "bushel/images/originals"; 71 + local_output_dir = Filename.concat home "bushel/images/web"; 72 + paper_thumbs_subdir = "papers"; 73 + contact_faces_subdir = "faces"; 74 + video_thumbs_subdir = "videos"; 75 + paper_pdfs_dir = Filename.concat home "bushel/pdfs"; 76 + immich_endpoint = "http://localhost:2283"; 77 + immich_api_key_file = Filename.concat (config_dir ()) "immich-key"; 78 + peertube_servers = []; 79 + typesense_endpoint = "http://localhost:8108"; 80 + typesense_api_key_file = Filename.concat (config_dir ()) "typesense-key"; 81 + openai_api_key_file = Filename.concat (config_dir ()) "openai-key"; 82 + zotero_translation_server = "http://localhost:1969"; 83 + } 84 + 85 + (** {1 Path Helpers} *) 86 + 87 + let expand_path path = 88 + if String.length path > 0 && path.[0] = '~' then 89 + match Sys.getenv_opt "HOME" with 90 + | Some home -> home ^ String.sub path 1 (String.length path - 1) 91 + | None -> path 92 + else path 93 + 94 + let paper_thumbs_dir t = Filename.concat t.local_output_dir t.paper_thumbs_subdir 95 + let contact_faces_dir t = Filename.concat t.local_output_dir t.contact_faces_subdir 96 + let video_thumbs_dir t = Filename.concat t.local_output_dir t.video_thumbs_subdir 97 + 98 + (** {1 Tomlt Codecs} *) 99 + 100 + let peertube_server_codec = 101 + let open Tomlt in 102 + let open Tomlt.Table in 103 + obj (fun name endpoint -> { name; endpoint }) 104 + |> mem "name" string ~enc:(fun s -> s.name) 105 + |> mem "endpoint" string ~enc:(fun s -> s.endpoint) 106 + |> finish 107 + 108 + let data_codec ~default = 109 + let open Tomlt in 110 + let open Tomlt.Table in 111 + obj (fun local_dir -> local_dir) 112 + |> mem "local_dir" string ~dec_absent:default.data_dir ~enc:Fun.id 113 + |> finish 114 + 115 + let images_codec ~default = 116 + let open Tomlt in 117 + let open Tomlt.Table in 118 + obj (fun remote_host remote_user remote_source_dir local_source_dir 119 + local_output_dir paper_thumbs contact_faces video_thumbs -> 120 + (remote_host, remote_user, remote_source_dir, local_source_dir, 121 + local_output_dir, paper_thumbs, contact_faces, video_thumbs)) 122 + |> mem "remote_host" string ~dec_absent:default.remote_host 123 + ~enc:(fun (h,_,_,_,_,_,_,_) -> h) 124 + |> mem "remote_user" string ~dec_absent:default.remote_user 125 + ~enc:(fun (_,u,_,_,_,_,_,_) -> u) 126 + |> mem "remote_source_dir" string ~dec_absent:default.remote_source_dir 127 + ~enc:(fun (_,_,r,_,_,_,_,_) -> r) 128 + |> mem "local_source_dir" string ~dec_absent:default.local_source_dir 129 + ~enc:(fun (_,_,_,l,_,_,_,_) -> l) 130 + |> mem "local_output_dir" string ~dec_absent:default.local_output_dir 131 + ~enc:(fun (_,_,_,_,o,_,_,_) -> o) 132 + |> mem "paper_thumbs" string ~dec_absent:default.paper_thumbs_subdir 133 + ~enc:(fun (_,_,_,_,_,p,_,_) -> p) 134 + |> mem "contact_faces" string ~dec_absent:default.contact_faces_subdir 135 + ~enc:(fun (_,_,_,_,_,_,c,_) -> c) 136 + |> mem "video_thumbs" string ~dec_absent:default.video_thumbs_subdir 137 + ~enc:(fun (_,_,_,_,_,_,_,v) -> v) 138 + |> finish 139 + 140 + let papers_codec ~default = 141 + let open Tomlt in 142 + let open Tomlt.Table in 143 + obj Fun.id 144 + |> mem "pdfs_dir" string ~dec_absent:default.paper_pdfs_dir ~enc:Fun.id 145 + |> finish 146 + 147 + let immich_codec ~default = 148 + let open Tomlt in 149 + let open Tomlt.Table in 150 + obj (fun endpoint api_key_file -> (endpoint, api_key_file)) 151 + |> mem "endpoint" string ~dec_absent:default.immich_endpoint 152 + ~enc:(fun (e, _) -> e) 153 + |> mem "api_key_file" string ~dec_absent:default.immich_api_key_file 154 + ~enc:(fun (_, k) -> k) 155 + |> finish 156 + 157 + let peertube_codec = 158 + let open Tomlt in 159 + let open Tomlt.Table in 160 + obj Fun.id 161 + |> mem "servers" (list peertube_server_codec) ~dec_absent:[] ~enc:Fun.id 162 + |> finish 163 + 164 + let typesense_codec ~default = 165 + let open Tomlt in 166 + let open Tomlt.Table in 167 + obj (fun endpoint api_key_file openai_key_file -> 168 + (endpoint, api_key_file, openai_key_file)) 169 + |> mem "endpoint" string ~dec_absent:default.typesense_endpoint 170 + ~enc:(fun (e, _, _) -> e) 171 + |> mem "api_key_file" string ~dec_absent:default.typesense_api_key_file 172 + ~enc:(fun (_, k, _) -> k) 173 + |> mem "openai_key_file" string ~dec_absent:default.openai_api_key_file 174 + ~enc:(fun (_, _, o) -> o) 175 + |> finish 176 + 177 + let zotero_codec ~default = 178 + let open Tomlt in 179 + let open Tomlt.Table in 180 + obj Fun.id 181 + |> mem "translation_server" string ~dec_absent:default.zotero_translation_server 182 + ~enc:Fun.id 183 + |> finish 184 + 185 + let config_codec = 186 + let default = default () in 187 + let open Tomlt.Table in 188 + obj (fun data_dir images papers immich peertube typesense zotero -> 189 + let (remote_host, remote_user, remote_source_dir, local_source_dir, 190 + local_output_dir, paper_thumbs_subdir, contact_faces_subdir, 191 + video_thumbs_subdir) = images in 192 + let (immich_endpoint, immich_api_key_file) = immich in 193 + let (typesense_endpoint, typesense_api_key_file, openai_api_key_file) = typesense in 194 + { 195 + data_dir = expand_path data_dir; 196 + remote_host; 197 + remote_user; 198 + remote_source_dir = expand_path remote_source_dir; 199 + local_source_dir = expand_path local_source_dir; 200 + local_output_dir = expand_path local_output_dir; 201 + paper_thumbs_subdir; 202 + contact_faces_subdir; 203 + video_thumbs_subdir; 204 + paper_pdfs_dir = expand_path papers; 205 + immich_endpoint; 206 + immich_api_key_file = expand_path immich_api_key_file; 207 + peertube_servers = peertube; 208 + typesense_endpoint; 209 + typesense_api_key_file = expand_path typesense_api_key_file; 210 + openai_api_key_file = expand_path openai_api_key_file; 211 + zotero_translation_server = zotero; 212 + }) 213 + |> mem "data" (data_codec ~default) ~dec_absent:default.data_dir 214 + ~enc:(fun c -> c.data_dir) 215 + |> mem "images" (images_codec ~default) 216 + ~dec_absent:(default.remote_host, default.remote_user, 217 + default.remote_source_dir, default.local_source_dir, 218 + default.local_output_dir, default.paper_thumbs_subdir, 219 + default.contact_faces_subdir, default.video_thumbs_subdir) 220 + ~enc:(fun c -> (c.remote_host, c.remote_user, c.remote_source_dir, 221 + c.local_source_dir, c.local_output_dir, 222 + c.paper_thumbs_subdir, c.contact_faces_subdir, 223 + c.video_thumbs_subdir)) 224 + |> mem "papers" (papers_codec ~default) ~dec_absent:default.paper_pdfs_dir 225 + ~enc:(fun c -> c.paper_pdfs_dir) 226 + |> mem "immich" (immich_codec ~default) 227 + ~dec_absent:(default.immich_endpoint, default.immich_api_key_file) 228 + ~enc:(fun c -> (c.immich_endpoint, c.immich_api_key_file)) 229 + |> mem "peertube" peertube_codec ~dec_absent:[] 230 + ~enc:(fun c -> c.peertube_servers) 231 + |> mem "typesense" (typesense_codec ~default) 232 + ~dec_absent:(default.typesense_endpoint, default.typesense_api_key_file, 233 + default.openai_api_key_file) 234 + ~enc:(fun c -> (c.typesense_endpoint, c.typesense_api_key_file, 235 + c.openai_api_key_file)) 236 + |> mem "zotero" (zotero_codec ~default) 237 + ~dec_absent:default.zotero_translation_server 238 + ~enc:(fun c -> c.zotero_translation_server) 239 + |> finish 240 + 241 + (** {1 Loading} *) 242 + 243 + let of_string s = 244 + match Tomlt_bytesrw.decode_string config_codec s with 245 + | Ok config -> Ok config 246 + | Error e -> Error (Tomlt.Toml.Error.to_string e) 247 + 248 + let load_file path = 249 + try 250 + let ic = open_in path in 251 + let content = really_input_string ic (in_channel_length ic) in 252 + close_in ic; 253 + of_string content 254 + with 255 + | Sys_error msg -> Error (Printf.sprintf "Failed to read config: %s" msg) 256 + 257 + let load () = 258 + let path = config_file () in 259 + if Sys.file_exists path then 260 + load_file path 261 + else 262 + Ok (default ()) 263 + 264 + (** {1 API Key Loading} *) 265 + 266 + let read_api_key path = 267 + let path = expand_path path in 268 + try 269 + let ic = open_in path in 270 + let key = input_line ic |> String.trim in 271 + close_in ic; 272 + Ok key 273 + with 274 + | Sys_error msg -> Error (Printf.sprintf "Failed to read API key from %s: %s" path msg) 275 + | End_of_file -> Error (Printf.sprintf "API key file %s is empty" path) 276 + 277 + let immich_api_key t = read_api_key t.immich_api_key_file 278 + let typesense_api_key t = read_api_key t.typesense_api_key_file 279 + let openai_api_key t = read_api_key t.openai_api_key_file 280 + 281 + (** {1 Rsync Command} *) 282 + 283 + let rsync_source t = 284 + Printf.sprintf "%s@%s:%s" t.remote_user t.remote_host t.remote_source_dir 285 + 286 + let rsync_command t = 287 + Printf.sprintf "rsync -avz %s/ %s/" (rsync_source t) t.local_source_dir 288 + 289 + (** {1 Pretty Printing} *) 290 + 291 + let pp ppf t = 292 + let open Fmt in 293 + pf ppf "@[<v>"; 294 + pf ppf "%a:@," (styled `Bold string) "Bushel Configuration"; 295 + pf ppf " data_dir: %s@," t.data_dir; 296 + pf ppf " @[<v 2>images:@,"; 297 + pf ppf "remote: %s@%s:%s@," t.remote_user t.remote_host t.remote_source_dir; 298 + pf ppf "local_source: %s@," t.local_source_dir; 299 + pf ppf "local_output: %s@," t.local_output_dir; 300 + pf ppf "@]"; 301 + pf ppf " paper_pdfs: %s@," t.paper_pdfs_dir; 302 + pf ppf " immich: %s@," t.immich_endpoint; 303 + pf ppf " peertube servers: %d@," (List.length t.peertube_servers); 304 + pf ppf " typesense: %s@," t.typesense_endpoint; 305 + pf ppf " zotero: %s@," t.zotero_translation_server; 306 + pf ppf "@]" 307 + 308 + (** {1 Default Config Generation} *) 309 + 310 + let default_config_toml () = 311 + let home = Sys.getenv_opt "HOME" |> Option.value ~default:"~" in 312 + let user = Sys.getenv_opt "USER" |> Option.value ~default:"user" in 313 + Printf.sprintf {|# Bushel Configuration 314 + # Generated by: bushel init 315 + 316 + # Data directory containing your bushel entries 317 + [data] 318 + local_dir = "%s/bushel/data" 319 + 320 + # Image sync configuration 321 + # Images are rsynced from a remote server and processed locally 322 + [images] 323 + # Remote server settings (for rsync) 324 + remote_host = "example.com" 325 + remote_user = "%s" 326 + remote_source_dir = "/var/www/images/originals" 327 + 328 + # Local directories 329 + local_source_dir = "%s/bushel/images/originals" 330 + local_output_dir = "%s/bushel/images/web" 331 + 332 + # Subdirectories within local_output_dir for generated thumbnails 333 + paper_thumbs = "papers" 334 + contact_faces = "faces" 335 + video_thumbs = "videos" 336 + 337 + # Paper PDFs directory (for thumbnail generation) 338 + [papers] 339 + pdfs_dir = "%s/bushel/pdfs" 340 + 341 + # Immich integration for contact face thumbnails 342 + # Get your API key from Immich web UI -> Account Settings -> API Keys 343 + [immich] 344 + endpoint = "http://localhost:2283" 345 + api_key_file = "%s/.config/bushel/immich-key" 346 + 347 + # PeerTube servers for video thumbnails 348 + # Add servers as [[peertube.servers]] entries 349 + [peertube] 350 + # Example: 351 + # [[peertube.servers]] 352 + # name = "tilvids" 353 + # endpoint = "https://tilvids.com" 354 + # 355 + # [[peertube.servers]] 356 + # name = "spectra" 357 + # endpoint = "https://spectra.video" 358 + 359 + # Typesense search integration 360 + [typesense] 361 + endpoint = "http://localhost:8108" 362 + api_key_file = "%s/.config/bushel/typesense-key" 363 + openai_key_file = "%s/.config/bushel/openai-key" 364 + 365 + # Zotero Translation Server for DOI resolution 366 + # Run locally: docker run -p 1969:1969 zotero/translation-server 367 + [zotero] 368 + translation_server = "http://localhost:1969" 369 + |} home user home home home home home home 370 + 371 + let write_default_config ?(force=false) () = 372 + let dir = config_dir () in 373 + let path = config_file () in 374 + 375 + (* Check if config already exists *) 376 + if Sys.file_exists path && not force then 377 + Error (Printf.sprintf "Config file already exists: %s\nUse --force to overwrite." path) 378 + else begin 379 + (* Create directory if needed *) 380 + if not (Sys.file_exists dir) then begin 381 + Unix.mkdir dir 0o755 382 + end; 383 + 384 + (* Write config file *) 385 + let content = default_config_toml () in 386 + let oc = open_out path in 387 + output_string oc content; 388 + close_out oc; 389 + Ok path 390 + end
+157
lib_config/bushel_config.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Bushel configuration management with XDG paths 7 + 8 + Configuration is loaded from [~/.config/bushel/config.toml] by default, 9 + with support for environment variable overrides via [XDG_CONFIG_HOME]. 10 + 11 + {1 Example config.toml} 12 + 13 + {v 14 + [data] 15 + local_dir = "/path/to/bushel/data" 16 + 17 + [images] 18 + remote_host = "example.com" 19 + remote_user = "anil" 20 + remote_source_dir = "/var/www/images/originals" 21 + local_source_dir = "/path/to/images/originals" 22 + local_output_dir = "/path/to/images/web" 23 + paper_thumbs = "papers" 24 + contact_faces = "faces" 25 + video_thumbs = "videos" 26 + 27 + [papers] 28 + pdfs_dir = "/path/to/paper-pdfs" 29 + 30 + [immich] 31 + endpoint = "https://photos.example.com" 32 + api_key_file = "~/.config/bushel/immich-key" 33 + 34 + [peertube] 35 + [[peertube.servers]] 36 + name = "crank" 37 + endpoint = "https://crank.recoil.org" 38 + 39 + [[peertube.servers]] 40 + name = "talks" 41 + endpoint = "https://talks.example.com" 42 + 43 + [typesense] 44 + endpoint = "https://search.example.com" 45 + api_key_file = "~/.config/bushel/typesense-key" 46 + openai_key_file = "~/.config/bushel/openai-key" 47 + 48 + [zotero] 49 + translation_server = "http://localhost:1969" 50 + v} 51 + *) 52 + 53 + (** {1 Types} *) 54 + 55 + type peertube_server = { 56 + name : string; 57 + endpoint : string; 58 + } 59 + (** A PeerTube server configuration. *) 60 + 61 + type t = { 62 + data_dir : string; 63 + remote_host : string; 64 + remote_user : string; 65 + remote_source_dir : string; 66 + local_source_dir : string; 67 + local_output_dir : string; 68 + paper_thumbs_subdir : string; 69 + contact_faces_subdir : string; 70 + video_thumbs_subdir : string; 71 + paper_pdfs_dir : string; 72 + immich_endpoint : string; 73 + immich_api_key_file : string; 74 + peertube_servers : peertube_server list; 75 + typesense_endpoint : string; 76 + typesense_api_key_file : string; 77 + openai_api_key_file : string; 78 + zotero_translation_server : string; 79 + } 80 + (** Complete bushel configuration. *) 81 + 82 + (** {1 XDG Paths} *) 83 + 84 + val xdg_config_home : unit -> string 85 + (** Return the XDG config home directory. *) 86 + 87 + val config_dir : unit -> string 88 + (** Return the bushel config directory ([~/.config/bushel]). *) 89 + 90 + val config_file : unit -> string 91 + (** Return the path to the config file ([~/.config/bushel/config.toml]). *) 92 + 93 + (** {1 Loading} *) 94 + 95 + val default : unit -> t 96 + (** Return the default configuration. *) 97 + 98 + val load : unit -> (t, string) result 99 + (** Load configuration from the default config file. 100 + Returns default config if file doesn't exist. *) 101 + 102 + val load_file : string -> (t, string) result 103 + (** Load configuration from a specific file path. *) 104 + 105 + val of_string : string -> (t, string) result 106 + (** Parse configuration from a TOML string. *) 107 + 108 + (** {1 Path Helpers} *) 109 + 110 + val expand_path : string -> string 111 + (** Expand [~] in paths to the home directory. *) 112 + 113 + val paper_thumbs_dir : t -> string 114 + (** Full path to paper thumbnails directory. *) 115 + 116 + val contact_faces_dir : t -> string 117 + (** Full path to contact faces directory. *) 118 + 119 + val video_thumbs_dir : t -> string 120 + (** Full path to video thumbnails directory. *) 121 + 122 + (** {1 API Keys} *) 123 + 124 + val read_api_key : string -> (string, string) result 125 + (** Read an API key from a file. *) 126 + 127 + val immich_api_key : t -> (string, string) result 128 + (** Read the Immich API key. *) 129 + 130 + val typesense_api_key : t -> (string, string) result 131 + (** Read the Typesense API key. *) 132 + 133 + val openai_api_key : t -> (string, string) result 134 + (** Read the OpenAI API key. *) 135 + 136 + (** {1 Rsync} *) 137 + 138 + val rsync_source : t -> string 139 + (** Return the rsync source string ([user@host:path]). *) 140 + 141 + val rsync_command : t -> string 142 + (** Return the full rsync command. *) 143 + 144 + (** {1 Pretty Printing} *) 145 + 146 + val pp : t Fmt.t 147 + (** Pretty-print the configuration. *) 148 + 149 + (** {1 Initialization} *) 150 + 151 + val default_config_toml : unit -> string 152 + (** Generate a default config.toml content with comments. *) 153 + 154 + val write_default_config : ?force:bool -> unit -> (string, string) result 155 + (** Write a default config file to the config directory. 156 + Returns [Ok path] on success, or [Error msg] if the file exists 157 + and [force] is not set, or if writing fails. *)
+4
lib_config/dune
··· 1 + (library 2 + (name bushel_config) 3 + (public_name bushel.config) 4 + (libraries tomlt tomlt.bytesrw unix fmt))
+224
lib_eio/bushel_loader.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Eio-based directory scanner and file loader for Bushel entries *) 7 + 8 + let src = Logs.Src.create "bushel.loader" ~doc:"Bushel loader" 9 + module Log = (val Logs.src_log src : Logs.LOG) 10 + 11 + (** List markdown files in a directory *) 12 + let list_md_files fs dir = 13 + let path = Eio.Path.(fs / dir) in 14 + try 15 + Eio.Path.read_dir path 16 + |> List.filter (fun f -> Filename.check_suffix f ".md") 17 + |> List.map (fun f -> Filename.concat dir f) 18 + with 19 + | Eio.Io (Eio.Fs.E (Eio.Fs.Not_found _), _) -> 20 + Log.warn (fun m -> m "Directory not found: %s" dir); 21 + [] 22 + 23 + (** Load and map files from a directory *) 24 + let map_category fs base subdir parse_fn = 25 + let dir = Filename.concat base ("data/" ^ subdir) in 26 + Log.debug (fun m -> m "Loading %s" subdir); 27 + let files = list_md_files fs dir in 28 + List.filter_map (fun path -> 29 + match Frontmatter_eio.of_file fs path with 30 + | Ok fm -> 31 + (match parse_fn fm with 32 + | Ok entry -> Some entry 33 + | Error e -> 34 + Log.err (fun m -> m "Error parsing %s: %s" path e); 35 + None) 36 + | Error e -> 37 + Log.err (fun m -> m "Error reading %s: %s" path e); 38 + None 39 + ) files 40 + 41 + (** Load contacts from data/contacts/ *) 42 + let load_contacts fs base = 43 + map_category fs base "contacts" (fun fm -> 44 + let handle = 45 + match Frontmatter.fname fm with 46 + | Some fname -> Filename.basename fname |> Filename.chop_extension 47 + | None -> "" 48 + in 49 + Bushel.Contact.of_frontmatter ~handle fm 50 + ) 51 + 52 + (** Load projects from data/projects/ *) 53 + let load_projects fs base = 54 + map_category fs base "projects" Bushel.Project.of_frontmatter 55 + 56 + (** Load notes from data/notes/ and data/news/ *) 57 + let load_notes fs base = 58 + let notes_dir = map_category fs base "notes" Bushel.Note.of_frontmatter in 59 + let news_dir = map_category fs base "news" Bushel.Note.of_frontmatter in 60 + notes_dir @ news_dir 61 + 62 + (** Load ideas from data/ideas/ *) 63 + let load_ideas fs base = 64 + map_category fs base "ideas" Bushel.Idea.of_frontmatter 65 + 66 + (** Load videos from data/videos/ *) 67 + let load_videos fs base = 68 + map_category fs base "videos" Bushel.Video.of_frontmatter 69 + 70 + (** Load papers from data/papers/ (nested directory structure) *) 71 + let load_papers fs base = 72 + let papers_dir = Filename.concat base "data/papers" in 73 + Log.debug (fun m -> m "Loading papers from %s" papers_dir); 74 + let path = Eio.Path.(fs / papers_dir) in 75 + let slug_dirs = 76 + try 77 + Eio.Path.read_dir path 78 + |> List.filter (fun slug -> 79 + try 80 + let stat = Eio.Path.stat ~follow:true Eio.Path.(fs / papers_dir / slug) in 81 + stat.kind = `Directory 82 + with _ -> false) 83 + with _ -> [] 84 + in 85 + let papers = List.concat_map (fun slug -> 86 + let slug_path = Filename.concat papers_dir slug in 87 + let ver_files = 88 + try 89 + Eio.Path.(read_dir (fs / slug_path)) 90 + |> List.filter (fun f -> Filename.check_suffix f ".md") 91 + with _ -> [] 92 + in 93 + List.filter_map (fun ver_file -> 94 + let ver = Filename.chop_extension ver_file in 95 + let file_path = Filename.concat slug_path ver_file in 96 + match Frontmatter_eio.of_file fs file_path with 97 + | Ok fm -> 98 + (match Bushel.Paper.of_frontmatter ~slug ~ver fm with 99 + | Ok paper -> Some paper 100 + | Error e -> 101 + Log.err (fun m -> m "Error parsing paper %s/%s: %s" slug ver e); 102 + None) 103 + | Error e -> 104 + Log.err (fun m -> m "Error reading paper %s/%s: %s" slug ver e); 105 + None 106 + ) ver_files 107 + ) slug_dirs in 108 + Bushel.Paper.tv papers 109 + 110 + (** Load all entries from a base directory *) 111 + let rec load fs base = 112 + Log.info (fun m -> m "Loading bushel data from %s" base); 113 + let contacts = load_contacts fs base in 114 + Log.info (fun m -> m "Loaded %d contacts" (List.length contacts)); 115 + let projects = load_projects fs base in 116 + Log.info (fun m -> m "Loaded %d projects" (List.length projects)); 117 + let notes = load_notes fs base in 118 + Log.info (fun m -> m "Loaded %d notes" (List.length notes)); 119 + let ideas = load_ideas fs base in 120 + Log.info (fun m -> m "Loaded %d ideas" (List.length ideas)); 121 + let videos = load_videos fs base in 122 + Log.info (fun m -> m "Loaded %d videos" (List.length videos)); 123 + let papers = load_papers fs base in 124 + Log.info (fun m -> m "Loaded %d papers" (List.length papers)); 125 + let data_dir = Filename.concat base "data" in 126 + let entries = Bushel.Entry.v ~papers ~notes ~projects ~ideas ~videos ~contacts ~data_dir in 127 + Log.info (fun m -> m "Building link graph"); 128 + let graph = build_link_graph entries in 129 + Bushel.Link_graph.set_graph graph; 130 + Log.info (fun m -> m "Load complete: %a" Bushel.Link_graph.pp graph); 131 + entries 132 + 133 + (** Build link graph from entries *) 134 + and build_link_graph entries = 135 + let graph = Bushel.Link_graph.empty () in 136 + 137 + let add_internal_link source target target_type = 138 + let link = { Bushel.Link_graph.source; target; target_type } in 139 + graph.internal_links <- link :: graph.internal_links; 140 + Bushel.Link_graph.add_to_set_hashtbl graph.outbound source target; 141 + Bushel.Link_graph.add_to_set_hashtbl graph.backlinks target source 142 + in 143 + 144 + let add_external_link source url = 145 + let domain = Bushel.Util.extract_domain url in 146 + let link = { Bushel.Link_graph.source; domain; url } in 147 + graph.external_links <- link :: graph.external_links; 148 + Bushel.Link_graph.add_to_set_hashtbl graph.external_by_entry source url; 149 + Bushel.Link_graph.add_to_set_hashtbl graph.external_by_domain domain source 150 + in 151 + 152 + (* Process each entry *) 153 + List.iter (fun entry -> 154 + let source_slug = Bushel.Entry.slug entry in 155 + let md_content = Bushel.Entry.body entry in 156 + let all_links = Bushel.Md.extract_all_links md_content in 157 + 158 + List.iter (fun link -> 159 + if Bushel.Md.is_bushel_slug link then 160 + let target_slug = Bushel.Md.strip_handle link in 161 + (match Bushel.Entry.lookup entries target_slug with 162 + | Some target_entry -> 163 + let target_type = Bushel.Link_graph.entry_type_of_entry target_entry in 164 + add_internal_link source_slug target_slug target_type 165 + | None -> ()) 166 + else if Bushel.Md.is_contact_slug link then 167 + let handle = Bushel.Md.strip_handle link in 168 + (match Bushel.Contact.find_by_handle (Bushel.Entry.contacts entries) handle with 169 + | Some c -> 170 + add_internal_link source_slug (Bushel.Contact.handle c) `Contact 171 + | None -> ()) 172 + else if Bushel.Md.is_tag_slug link || Bushel.Md.is_type_filter_slug link then 173 + () (* Skip tag links *) 174 + else if String.starts_with ~prefix:"http://" link || 175 + String.starts_with ~prefix:"https://" link then 176 + add_external_link source_slug link 177 + ) all_links 178 + ) (Bushel.Entry.all_entries entries); 179 + 180 + (* Process slug_ent references from notes *) 181 + List.iter (fun note -> 182 + match Bushel.Note.slug_ent note with 183 + | Some target_slug -> 184 + let source_slug = Bushel.Note.slug note in 185 + (match Bushel.Entry.lookup entries target_slug with 186 + | Some target_entry -> 187 + let target_type = Bushel.Link_graph.entry_type_of_entry target_entry in 188 + add_internal_link source_slug target_slug target_type 189 + | None -> ()) 190 + | None -> () 191 + ) (Bushel.Entry.notes entries); 192 + 193 + (* Process project references from papers *) 194 + List.iter (fun paper -> 195 + let source_slug = Bushel.Paper.slug paper in 196 + List.iter (fun project_slug -> 197 + match Bushel.Entry.lookup entries project_slug with 198 + | Some (`Project _) -> 199 + add_internal_link source_slug project_slug `Project 200 + | _ -> () 201 + ) (Bushel.Paper.project_slugs paper) 202 + ) (Bushel.Entry.papers entries); 203 + 204 + (* Deduplicate links *) 205 + let module LinkSet = Set.Make(struct 206 + type t = Bushel.Link_graph.internal_link 207 + let compare (a : t) (b : t) = 208 + match String.compare a.source b.source with 209 + | 0 -> String.compare a.target b.target 210 + | n -> n 211 + end) in 212 + 213 + let module ExtLinkSet = Set.Make(struct 214 + type t = Bushel.Link_graph.external_link 215 + let compare (a : t) (b : t) = 216 + match String.compare a.source b.source with 217 + | 0 -> String.compare a.url b.url 218 + | n -> n 219 + end) in 220 + 221 + graph.internal_links <- LinkSet.elements (LinkSet.of_list graph.internal_links); 222 + graph.external_links <- ExtLinkSet.elements (ExtLinkSet.of_list graph.external_links); 223 + 224 + graph
+8
lib_eio/dune
··· 1 + (library 2 + (name bushel_eio) 3 + (public_name bushel.eio) 4 + (libraries 5 + bushel 6 + frontmatter-eio 7 + eio 8 + logs))
+47
lib_sync/bushel_http.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Simple HTTP client using curl via Eio.Process *) 7 + 8 + let src = Logs.Src.create "bushel.http" ~doc:"HTTP client" 9 + module Log = (val Logs.src_log src : Logs.LOG) 10 + 11 + (** Run curl and capture stdout *) 12 + let get ~proc_mgr url = 13 + Log.debug (fun m -> m "GET %s" url); 14 + let stdout = Buffer.create 4096 in 15 + try 16 + Eio.Process.run proc_mgr 17 + ~stdout:(Eio.Flow.buffer_sink stdout) 18 + ["curl"; "-s"; "-L"; url]; 19 + Ok (Buffer.contents stdout) 20 + with e -> 21 + Error (Printf.sprintf "curl failed: %s" (Printexc.to_string e)) 22 + 23 + let get_with_header ~proc_mgr ~header url = 24 + Log.debug (fun m -> m "GET %s (with header)" url); 25 + let stdout = Buffer.create 4096 in 26 + try 27 + Eio.Process.run proc_mgr 28 + ~stdout:(Eio.Flow.buffer_sink stdout) 29 + ["curl"; "-s"; "-L"; "-H"; header; url]; 30 + Ok (Buffer.contents stdout) 31 + with e -> 32 + Error (Printf.sprintf "curl failed: %s" (Printexc.to_string e)) 33 + 34 + let post ~proc_mgr ~content_type ~body url = 35 + Log.debug (fun m -> m "POST %s" url); 36 + let stdout = Buffer.create 4096 in 37 + try 38 + Eio.Process.run proc_mgr 39 + ~stdout:(Eio.Flow.buffer_sink stdout) 40 + ["curl"; "-s"; "-L"; 41 + "-X"; "POST"; 42 + "-H"; "Content-Type: " ^ content_type; 43 + "-d"; body; 44 + url]; 45 + Ok (Buffer.contents stdout) 46 + with e -> 47 + Error (Printf.sprintf "curl failed: %s" (Printexc.to_string e))
+130
lib_sync/bushel_immich.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Immich API client for contact face thumbnails *) 7 + 8 + let src = Logs.Src.create "bushel.immich" ~doc:"Immich face thumbnails" 9 + module Log = (val Logs.src_log src : Logs.LOG) 10 + 11 + (** {1 Types} *) 12 + 13 + type person = { 14 + id : string; 15 + name : string; 16 + thumbnail_path : string option; 17 + } 18 + 19 + type fetch_result = 20 + | Ok of string (** Saved to path *) 21 + | Skipped of string (** Already exists *) 22 + | NotFound of string (** No match found *) 23 + | Error of string (** Error message *) 24 + 25 + (** {1 Jsont Codecs} *) 26 + 27 + let person_jsont : person Jsont.t = 28 + let open Jsont in 29 + let open Object in 30 + map ~kind:"person" (fun id name thumbnail_path -> { id; name; thumbnail_path }) 31 + |> mem "id" string ~enc:(fun p -> p.id) 32 + |> mem "name" string ~enc:(fun p -> p.name) 33 + |> mem "thumbnailPath" (some string) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun p -> p.thumbnail_path) 34 + |> finish 35 + 36 + let people_jsont = Jsont.list person_jsont 37 + 38 + let decode_people json_str = 39 + match Jsont_bytesrw.decode_string people_jsont json_str with 40 + | Ok people -> Result.Ok people 41 + | Error e -> Result.Error e 42 + 43 + (** {1 Immich API} *) 44 + 45 + let search_person ~proc_mgr ~endpoint ~api_key name = 46 + let encoded_name = Uri.pct_encode name in 47 + let url = Printf.sprintf "%s/api/search/person?name=%s" endpoint encoded_name in 48 + let header = "X-Api-Key: " ^ api_key in 49 + 50 + match Bushel_http.get_with_header ~proc_mgr ~header url with 51 + | Result.Error e -> Result.Error e 52 + | Result.Ok body -> decode_people body 53 + 54 + let download_thumbnail ~proc_mgr ~endpoint ~api_key person_id output_path = 55 + let url = Printf.sprintf "%s/api/people/%s/thumbnail" endpoint person_id in 56 + let header = "X-Api-Key: " ^ api_key in 57 + 58 + match Bushel_http.get_with_header ~proc_mgr ~header url with 59 + | Result.Error e -> Result.Error e 60 + | Result.Ok body -> 61 + try 62 + (* Ensure output directory exists *) 63 + let dir = Filename.dirname output_path in 64 + if not (Sys.file_exists dir) then 65 + Unix.mkdir dir 0o755; 66 + let oc = open_out_bin output_path in 67 + output_string oc body; 68 + close_out oc; 69 + Result.Ok output_path 70 + with e -> 71 + Result.Error (Printf.sprintf "Failed to write file: %s" (Printexc.to_string e)) 72 + 73 + (** {1 Contact Face Fetching} *) 74 + 75 + let fetch_face_for_contact ~proc_mgr ~endpoint ~api_key ~output_dir contact = 76 + let names = Bushel.Contact.names contact in 77 + let handle = Bushel.Contact.handle contact in 78 + let output_path = Filename.concat output_dir (handle ^ ".jpg") in 79 + 80 + (* Skip if already exists *) 81 + if Sys.file_exists output_path then begin 82 + Log.debug (fun m -> m "Skipping %s: thumbnail already exists" handle); 83 + Skipped output_path 84 + end else begin 85 + Log.info (fun m -> m "Fetching face for contact: %s" handle); 86 + 87 + (* Try each name until we find a match *) 88 + let rec try_names = function 89 + | [] -> 90 + Log.warn (fun m -> m "No person found for contact %s" handle); 91 + NotFound handle 92 + | name :: rest -> 93 + Log.debug (fun m -> m "Trying name: %s" name); 94 + match search_person ~proc_mgr ~endpoint ~api_key name with 95 + | Result.Error e -> 96 + Log.err (fun m -> m "Search error for %s: %s" name e); 97 + Error e 98 + | Result.Ok [] -> 99 + Log.debug (fun m -> m "No results for %s, trying next name" name); 100 + try_names rest 101 + | Result.Ok (person :: _) -> 102 + Log.info (fun m -> m "Found match for %s: %s" name person.name); 103 + match download_thumbnail ~proc_mgr ~endpoint ~api_key person.id output_path with 104 + | Result.Ok path -> Ok path 105 + | Result.Error e -> Error e 106 + in 107 + try_names names 108 + end 109 + 110 + let fetch_all_faces ~proc_mgr ~endpoint ~api_key ~output_dir contacts = 111 + (* Ensure output directory exists *) 112 + if not (Sys.file_exists output_dir) then 113 + Unix.mkdir output_dir 0o755; 114 + 115 + let results = List.map (fun contact -> 116 + let handle = Bushel.Contact.handle contact in 117 + let result = fetch_face_for_contact ~proc_mgr ~endpoint ~api_key ~output_dir contact in 118 + (handle, result) 119 + ) contacts in 120 + 121 + (* Summary *) 122 + let ok_count = List.length (List.filter (fun (_, r) -> match r with Ok _ -> true | _ -> false) results) in 123 + let skipped_count = List.length (List.filter (fun (_, r) -> match r with Skipped _ -> true | _ -> false) results) in 124 + let not_found_count = List.length (List.filter (fun (_, r) -> match r with NotFound _ -> true | _ -> false) results) in 125 + let error_count = List.length (List.filter (fun (_, r) -> match r with Error _ -> true | _ -> false) results) in 126 + 127 + Log.info (fun m -> m "Face sync complete: %d ok, %d skipped, %d not found, %d errors" 128 + ok_count skipped_count not_found_count error_count); 129 + 130 + results
+246
lib_sync/bushel_peertube.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** PeerTube API client for video metadata and thumbnails *) 7 + 8 + let src = Logs.Src.create "bushel.peertube" ~doc:"PeerTube video sync" 9 + module Log = (val Logs.src_log src : Logs.LOG) 10 + 11 + (** {1 Types} *) 12 + 13 + type video = { 14 + id : int; 15 + uuid : string; 16 + name : string; 17 + description : string option; 18 + url : string; 19 + embed_path : string; 20 + published_at : Ptime.t; 21 + originally_published_at : Ptime.t option; 22 + thumbnail_path : string option; 23 + tags : string list; 24 + } 25 + 26 + type fetch_result = 27 + | Ok of string 28 + | Skipped of string 29 + | Error of string 30 + 31 + (** {1 Date Parsing} *) 32 + 33 + let parse_date str = 34 + match Ptime.of_rfc3339 str with 35 + | Ok (date, _, _) -> date 36 + | Error _ -> 37 + Log.warn (fun m -> m "Could not parse date: %s" str); 38 + Ptime.epoch 39 + 40 + (** {1 Jsont Codecs} *) 41 + 42 + let ptime_jsont = 43 + Jsont.string |> Jsont.map ~dec:parse_date ~enc:(fun t -> 44 + match Ptime.to_rfc3339 ~frac_s:0 t with 45 + | s -> s) 46 + 47 + let make_video ~id ~uuid ~name ~description ~url ~embed_path 48 + ~published_at ~originally_published_at ~thumbnail_path ~tags = 49 + { id; uuid; name; description; url; embed_path; 50 + published_at; originally_published_at; thumbnail_path; tags } 51 + 52 + let video_jsont : video Jsont.t = 53 + let open Jsont in 54 + let open Object in 55 + map ~kind:"video" (fun id uuid name description url embed_path 56 + published_at originally_published_at thumbnail_path tags -> 57 + make_video ~id ~uuid ~name ~description ~url ~embed_path 58 + ~published_at ~originally_published_at ~thumbnail_path ~tags) 59 + |> mem "id" int ~enc:(fun v -> v.id) 60 + |> mem "uuid" string ~enc:(fun v -> v.uuid) 61 + |> mem "name" string ~enc:(fun v -> v.name) 62 + |> mem "description" (some string) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun v -> v.description) 63 + |> mem "url" string ~enc:(fun v -> v.url) 64 + |> mem "embedPath" string ~enc:(fun v -> v.embed_path) 65 + |> mem "publishedAt" ptime_jsont ~enc:(fun v -> v.published_at) 66 + |> mem "originallyPublishedAt" (some ptime_jsont) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun v -> v.originally_published_at) 67 + |> mem "thumbnailPath" (some string) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun v -> v.thumbnail_path) 68 + |> mem "tags" (list string) ~dec_absent:[] ~enc:(fun v -> v.tags) 69 + |> finish 70 + 71 + type channel_response = { 72 + total : int; 73 + data : video list; 74 + } 75 + 76 + let channel_response_jsont : channel_response Jsont.t = 77 + let open Jsont in 78 + let open Object in 79 + map ~kind:"channel_response" (fun total data -> { total; data }) 80 + |> mem "total" int ~enc:(fun r -> r.total) 81 + |> mem "data" (list video_jsont) ~enc:(fun r -> r.data) 82 + |> finish 83 + 84 + (** {1 JSON decoding helpers} *) 85 + 86 + let decode_video json_str = 87 + match Jsont_bytesrw.decode_string video_jsont json_str with 88 + | Ok v -> Result.Ok v 89 + | Error e -> Result.Error e 90 + 91 + let decode_channel_response json_str = 92 + match Jsont_bytesrw.decode_string channel_response_jsont json_str with 93 + | Ok r -> Result.Ok r 94 + | Error e -> Result.Error e 95 + 96 + (** {1 PeerTube API} *) 97 + 98 + let fetch_video_details ~proc_mgr ~endpoint uuid = 99 + let url = Printf.sprintf "%s/api/v1/videos/%s" endpoint uuid in 100 + match Bushel_http.get ~proc_mgr url with 101 + | Result.Error e -> Result.Error e 102 + | Result.Ok body -> decode_video body 103 + 104 + let fetch_channel_videos ~proc_mgr ~endpoint ~channel ?(count=20) ?(start=0) () = 105 + let url = Printf.sprintf "%s/api/v1/video-channels/%s/videos?count=%d&start=%d" 106 + endpoint channel count start in 107 + match Bushel_http.get ~proc_mgr url with 108 + | Result.Error _ -> (0, []) 109 + | Result.Ok body -> 110 + match decode_channel_response body with 111 + | Result.Ok r -> (r.total, r.data) 112 + | Result.Error _ -> (0, []) 113 + 114 + let fetch_all_channel_videos ~proc_mgr ~endpoint ~channel ?(page_size=20) () = 115 + let rec fetch_pages start acc = 116 + let (total, videos) = fetch_channel_videos ~proc_mgr ~endpoint ~channel ~count:page_size ~start () in 117 + let all = acc @ videos in 118 + let fetched = start + List.length videos in 119 + if fetched < total && List.length videos > 0 then 120 + fetch_pages fetched all 121 + else 122 + all 123 + in 124 + fetch_pages 0 [] 125 + 126 + (** {1 Thumbnail Download} *) 127 + 128 + let thumbnail_url endpoint video = 129 + match video.thumbnail_path with 130 + | Some path -> Some (endpoint ^ path) 131 + | None -> None 132 + 133 + let download_thumbnail ~proc_mgr ~endpoint video output_path = 134 + match thumbnail_url endpoint video with 135 + | None -> 136 + Log.warn (fun m -> m "No thumbnail for video %s" video.uuid); 137 + Error "No thumbnail available" 138 + | Some url -> 139 + match Bushel_http.get ~proc_mgr url with 140 + | Result.Error e -> Error e 141 + | Result.Ok body -> 142 + try 143 + let dir = Filename.dirname output_path in 144 + if not (Sys.file_exists dir) then Unix.mkdir dir 0o755; 145 + let oc = open_out_bin output_path in 146 + output_string oc body; 147 + close_out oc; 148 + Ok output_path 149 + with e -> 150 + Error (Printf.sprintf "Failed to write: %s" (Printexc.to_string e)) 151 + 152 + (** {1 Videos Index (YAML)} *) 153 + 154 + module VideoIndex = struct 155 + (** Mapping of UUID -> server name *) 156 + type t = (string, string) Hashtbl.t 157 + 158 + let empty () = Hashtbl.create 64 159 + 160 + let load_file path = 161 + let index = empty () in 162 + if Sys.file_exists path then begin 163 + try 164 + let ic = open_in path in 165 + let rec read_lines () = 166 + match input_line ic with 167 + | line -> 168 + (match Astring.String.cut ~sep:":" line with 169 + | Some (uuid, server) -> 170 + Hashtbl.add index (String.trim uuid) (String.trim server) 171 + | None -> ()); 172 + read_lines () 173 + | exception End_of_file -> close_in ic 174 + in 175 + read_lines () 176 + with _ -> () 177 + end; 178 + index 179 + 180 + let save_file path index = 181 + let oc = open_out path in 182 + output_string oc "# UUID -> PeerTube server name mapping\n"; 183 + Hashtbl.iter (fun uuid server -> 184 + output_string oc (Printf.sprintf "%s: %s\n" uuid server) 185 + ) index; 186 + close_out oc 187 + 188 + let add index ~uuid ~server = 189 + Hashtbl.replace index uuid server 190 + 191 + let find index uuid = 192 + Hashtbl.find_opt index uuid 193 + 194 + let mem index uuid = 195 + Hashtbl.mem index uuid 196 + 197 + let to_list index = 198 + Hashtbl.fold (fun k v acc -> (k, v) :: acc) index [] 199 + end 200 + 201 + (** {1 Fetch Thumbnails from Index} *) 202 + 203 + let fetch_thumbnails_from_index ~proc_mgr ~servers ~output_dir index = 204 + (* Ensure output dir exists *) 205 + if not (Sys.file_exists output_dir) then 206 + Unix.mkdir output_dir 0o755; 207 + 208 + let server_map = 209 + List.fold_left (fun acc (s : Bushel_config.peertube_server) -> 210 + (s.name, s.endpoint) :: acc 211 + ) [] servers 212 + in 213 + 214 + let results = List.filter_map (fun (uuid, server_name) -> 215 + let output_path = Filename.concat output_dir (uuid ^ ".jpg") in 216 + 217 + (* Skip if exists *) 218 + if Sys.file_exists output_path then begin 219 + Log.debug (fun m -> m "Skipping %s: thumbnail exists" uuid); 220 + Some (uuid, Skipped output_path) 221 + end else begin 222 + match List.assoc_opt server_name server_map with 223 + | None -> 224 + Log.warn (fun m -> m "Unknown server %s for video %s" server_name uuid); 225 + Some (uuid, Error (Printf.sprintf "Unknown server: %s" server_name)) 226 + | Some endpoint -> 227 + Log.info (fun m -> m "Fetching thumbnail for %s from %s" uuid server_name); 228 + match fetch_video_details ~proc_mgr ~endpoint uuid with 229 + | Result.Error e -> 230 + Some (uuid, Error e) 231 + | Result.Ok video -> 232 + match download_thumbnail ~proc_mgr ~endpoint video output_path with 233 + | Ok path -> Some (uuid, Ok path) 234 + | Skipped path -> Some (uuid, Skipped path) 235 + | Error e -> Some (uuid, Error e) 236 + end 237 + ) (VideoIndex.to_list index) in 238 + 239 + let ok_count = List.length (List.filter (fun (_, r) -> match r with Ok _ -> true | _ -> false) results) in 240 + let skipped_count = List.length (List.filter (fun (_, r) -> match r with Skipped _ -> true | _ -> false) results) in 241 + let error_count = List.length (List.filter (fun (_, r) -> match r with Error _ -> true | _ -> false) results) in 242 + 243 + Log.info (fun m -> m "Video thumbnails: %d ok, %d skipped, %d errors" 244 + ok_count skipped_count error_count); 245 + 246 + results
+299
lib_sync/bushel_sync.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Bushel sync orchestration 7 + 8 + {1 Re-exported Modules} 9 + 10 + - {!Zotero} - DOI resolution via Zotero Translation Server 11 + - {!Immich} - Contact face thumbnails from Immich 12 + - {!Peertube} - Video thumbnails from PeerTube 13 + - {!Http} - Simple HTTP client using curl 14 + *) 15 + 16 + (** DOI resolution via Zotero Translation Server *) 17 + module Zotero = Bushel_zotero 18 + 19 + (** Contact face thumbnails from Immich *) 20 + module Immich = Bushel_immich 21 + 22 + (** Video metadata and thumbnails from PeerTube *) 23 + module Peertube = Bushel_peertube 24 + 25 + (** Simple HTTP client using curl via Eio.Process *) 26 + module Http = Bushel_http 27 + 28 + let src = Logs.Src.create "bushel.sync" ~doc:"Bushel sync pipeline" 29 + module Log = (val Logs.src_log src : Logs.LOG) 30 + 31 + (** {1 Sync Steps} *) 32 + 33 + type step = 34 + | Images (** Rsync images from remote *) 35 + | Srcsetter (** Run srcsetter on images *) 36 + | Thumbs (** Generate paper thumbnails from PDFs *) 37 + | Faces (** Fetch contact faces from Immich *) 38 + | Videos (** Fetch video thumbnails from PeerTube *) 39 + | Typesense (** Upload to Typesense *) 40 + 41 + let string_of_step = function 42 + | Images -> "images" 43 + | Srcsetter -> "srcsetter" 44 + | Thumbs -> "thumbs" 45 + | Faces -> "faces" 46 + | Videos -> "videos" 47 + | Typesense -> "typesense" 48 + 49 + let step_of_string = function 50 + | "images" -> Some Images 51 + | "srcsetter" -> Some Srcsetter 52 + | "thumbs" -> Some Thumbs 53 + | "faces" -> Some Faces 54 + | "videos" -> Some Videos 55 + | "typesense" -> Some Typesense 56 + | _ -> None 57 + 58 + let all_steps = [Images; Srcsetter; Thumbs; Faces; Videos] 59 + let all_steps_with_remote = all_steps @ [Typesense] 60 + 61 + (** {1 Step Results} *) 62 + 63 + type step_result = { 64 + step : step; 65 + success : bool; 66 + message : string; 67 + details : string list; 68 + } 69 + 70 + let pp_result ppf r = 71 + let status = if r.success then "OK" else "FAILED" in 72 + Fmt.pf ppf "[%s] %s: %s" status (string_of_step r.step) r.message; 73 + if r.details <> [] then begin 74 + Fmt.pf ppf "@,"; 75 + List.iter (fun d -> Fmt.pf ppf " - %s@," d) r.details 76 + end 77 + 78 + (** {1 Rsync Images} *) 79 + 80 + let sync_images ~proc_mgr config = 81 + Log.info (fun m -> m "Syncing images from remote..."); 82 + let cmd = Bushel_config.rsync_command config in 83 + Log.debug (fun m -> m "Running: %s" cmd); 84 + 85 + (* Ensure local directory exists *) 86 + let local_dir = config.Bushel_config.local_source_dir in 87 + if not (Sys.file_exists local_dir) then begin 88 + Log.info (fun m -> m "Creating directory: %s" local_dir); 89 + Unix.mkdir local_dir 0o755 90 + end; 91 + 92 + try 93 + let args = ["rsync"; "-avz"; 94 + Bushel_config.rsync_source config ^ "/"; 95 + local_dir ^ "/"] in 96 + Eio.Process.run proc_mgr args; 97 + { step = Images; success = true; 98 + message = "Images synced from remote"; 99 + details = [] } 100 + with e -> 101 + { step = Images; success = false; 102 + message = Printf.sprintf "Rsync failed: %s" (Printexc.to_string e); 103 + details = [] } 104 + 105 + (** {1 Srcsetter} *) 106 + 107 + let run_srcsetter ~proc_mgr config = 108 + Log.info (fun m -> m "Running srcsetter..."); 109 + let src_dir = config.Bushel_config.local_source_dir in 110 + let dst_dir = config.Bushel_config.local_output_dir in 111 + 112 + (* Ensure output directory exists *) 113 + if not (Sys.file_exists dst_dir) then begin 114 + Log.info (fun m -> m "Creating directory: %s" dst_dir); 115 + Unix.mkdir dst_dir 0o755 116 + end; 117 + 118 + try 119 + let args = ["srcsetter"; src_dir; dst_dir] in 120 + Eio.Process.run proc_mgr args; 121 + { step = Srcsetter; success = true; 122 + message = "Srcsetter completed"; 123 + details = [] } 124 + with e -> 125 + { step = Srcsetter; success = false; 126 + message = Printf.sprintf "Srcsetter failed: %s" (Printexc.to_string e); 127 + details = [] } 128 + 129 + (** {1 Paper Thumbnails} *) 130 + 131 + let generate_paper_thumbnails ~proc_mgr config = 132 + Log.info (fun m -> m "Generating paper thumbnails..."); 133 + let pdfs_dir = config.Bushel_config.paper_pdfs_dir in 134 + let output_dir = Bushel_config.paper_thumbs_dir config in 135 + 136 + if not (Sys.file_exists pdfs_dir) then begin 137 + Log.warn (fun m -> m "PDFs directory does not exist: %s" pdfs_dir); 138 + { step = Thumbs; success = true; 139 + message = "No PDFs directory"; 140 + details = [] } 141 + end else begin 142 + (* Ensure output directory exists *) 143 + if not (Sys.file_exists output_dir) then 144 + Unix.mkdir output_dir 0o755; 145 + 146 + let pdfs = Sys.readdir pdfs_dir |> Array.to_list 147 + |> List.filter (fun f -> Filename.check_suffix f ".pdf") in 148 + 149 + let results = List.map (fun pdf_file -> 150 + let slug = Filename.chop_extension pdf_file in 151 + let pdf_path = Filename.concat pdfs_dir pdf_file in 152 + let output_path = Filename.concat output_dir (slug ^ ".webp") in 153 + 154 + if Sys.file_exists output_path then begin 155 + Log.debug (fun m -> m "Skipping %s: thumbnail exists" slug); 156 + `Skipped slug 157 + end else begin 158 + Log.info (fun m -> m "Generating thumbnail for %s" slug); 159 + try 160 + (* ImageMagick command: render PDF at 600 DPI, crop top 50%, resize to 2048px *) 161 + let args = [ 162 + "magick"; 163 + "-density"; "600"; 164 + "-quality"; "100"; 165 + pdf_path ^ "[0]"; (* First page only *) 166 + "-gravity"; "North"; 167 + "-crop"; "100%x50%+0+0"; 168 + "-resize"; "2048x"; 169 + output_path 170 + ] in 171 + Eio.Process.run proc_mgr args; 172 + `Ok slug 173 + with e -> 174 + Log.err (fun m -> m "Failed to generate thumbnail for %s: %s" 175 + slug (Printexc.to_string e)); 176 + `Error slug 177 + end 178 + ) pdfs in 179 + 180 + let ok_count = List.fold_left (fun acc r -> match r with `Ok _ -> acc + 1 | _ -> acc) 0 results in 181 + let skipped_count = List.fold_left (fun acc r -> match r with `Skipped _ -> acc + 1 | _ -> acc) 0 results in 182 + let error_count = List.fold_left (fun acc r -> match r with `Error _ -> acc + 1 | _ -> acc) 0 results in 183 + 184 + { step = Thumbs; success = error_count = 0; 185 + message = Printf.sprintf "%d generated, %d skipped, %d errors" 186 + ok_count skipped_count error_count; 187 + details = List.filter_map (fun r -> match r with `Error s -> Some s | _ -> None) results } 188 + end 189 + 190 + (** {1 Contact Faces} *) 191 + 192 + let sync_faces ~proc_mgr config entries = 193 + Log.info (fun m -> m "Syncing contact faces from Immich..."); 194 + let output_dir = Bushel_config.contact_faces_dir config in 195 + 196 + match Bushel_config.immich_api_key config with 197 + | Error e -> 198 + Log.warn (fun m -> m "Cannot read Immich API key: %s" e); 199 + { step = Faces; success = false; 200 + message = "Missing Immich API key"; 201 + details = [e] } 202 + | Ok api_key -> 203 + let contacts = Bushel.Entry.contacts entries in 204 + let results = Bushel_immich.fetch_all_faces 205 + ~proc_mgr 206 + ~endpoint:config.immich_endpoint 207 + ~api_key 208 + ~output_dir 209 + contacts in 210 + 211 + let ok_count = List.length (List.filter (fun (_, r) -> 212 + match r with Bushel_immich.Ok _ -> true | _ -> false) results) in 213 + let skipped_count = List.length (List.filter (fun (_, r) -> 214 + match r with Bushel_immich.Skipped _ -> true | _ -> false) results) in 215 + let error_count = List.length (List.filter (fun (_, r) -> 216 + match r with Bushel_immich.Error _ -> true | _ -> false) results) in 217 + 218 + { step = Faces; success = true; 219 + message = Printf.sprintf "%d fetched, %d skipped, %d errors" 220 + ok_count skipped_count error_count; 221 + details = List.filter_map (fun (h, r) -> 222 + match r with Bushel_immich.Error e -> Some (h ^ ": " ^ e) | _ -> None 223 + ) results } 224 + 225 + (** {1 Video Thumbnails} *) 226 + 227 + let sync_video_thumbnails ~proc_mgr config = 228 + Log.info (fun m -> m "Syncing video thumbnails from PeerTube..."); 229 + let output_dir = Bushel_config.video_thumbs_dir config in 230 + let videos_yml = Filename.concat config.data_dir "videos.yml" in 231 + 232 + let index = Bushel_peertube.VideoIndex.load_file videos_yml in 233 + let count = List.length (Bushel_peertube.VideoIndex.to_list index) in 234 + 235 + if count = 0 then begin 236 + Log.info (fun m -> m "No videos in index"); 237 + { step = Videos; success = true; 238 + message = "No videos in index"; 239 + details = [] } 240 + end else begin 241 + let results = Bushel_peertube.fetch_thumbnails_from_index 242 + ~proc_mgr 243 + ~servers:config.peertube_servers 244 + ~output_dir 245 + index in 246 + 247 + let ok_count = List.length (List.filter (fun (_, r) -> 248 + match r with Bushel_peertube.Ok _ -> true | _ -> false) results) in 249 + let skipped_count = List.length (List.filter (fun (_, r) -> 250 + match r with Bushel_peertube.Skipped _ -> true | _ -> false) results) in 251 + let error_count = List.length (List.filter (fun (_, r) -> 252 + match r with Bushel_peertube.Error _ -> true | _ -> false) results) in 253 + 254 + { step = Videos; success = true; 255 + message = Printf.sprintf "%d fetched, %d skipped, %d errors" 256 + ok_count skipped_count error_count; 257 + details = List.filter_map (fun (uuid, r) -> 258 + match r with Bushel_peertube.Error e -> Some (uuid ^ ": " ^ e) | _ -> None 259 + ) results } 260 + end 261 + 262 + (** {1 Typesense Upload} *) 263 + 264 + let upload_typesense config _entries = 265 + Log.info (fun m -> m "Uploading to Typesense..."); 266 + 267 + match Bushel_config.typesense_api_key config with 268 + | Error e -> 269 + { step = Typesense; success = false; 270 + message = "Missing Typesense API key"; 271 + details = [e] } 272 + | Ok _api_key -> 273 + (* TODO: Implement actual Typesense upload using bushel-typesense *) 274 + { step = Typesense; success = true; 275 + message = "Typesense upload (not yet implemented)"; 276 + details = [] } 277 + 278 + (** {1 Run Pipeline} *) 279 + 280 + let run ~env ~config ~steps ~entries = 281 + let proc_mgr = Eio.Stdenv.process_mgr env in 282 + 283 + let results = List.map (fun step -> 284 + Log.info (fun m -> m "Running step: %s" (string_of_step step)); 285 + match step with 286 + | Images -> sync_images ~proc_mgr config 287 + | Srcsetter -> run_srcsetter ~proc_mgr config 288 + | Thumbs -> generate_paper_thumbnails ~proc_mgr config 289 + | Faces -> sync_faces ~proc_mgr config entries 290 + | Videos -> sync_video_thumbnails ~proc_mgr config 291 + | Typesense -> upload_typesense config entries 292 + ) steps in 293 + 294 + (* Summary *) 295 + let success_count = List.length (List.filter (fun r -> r.success) results) in 296 + let total = List.length results in 297 + Log.info (fun m -> m "Sync complete: %d/%d steps succeeded" success_count total); 298 + 299 + results
+330
lib_sync/bushel_zotero.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Zotero Translation Server client for DOI resolution *) 7 + 8 + let src = Logs.Src.create "bushel.zotero" ~doc:"Zotero DOI resolution" 9 + module Log = (val Logs.src_log src : Logs.LOG) 10 + 11 + (** {1 Types} *) 12 + 13 + type paper_metadata = { 14 + title : string; 15 + authors : string list; 16 + year : int; 17 + month : int; 18 + bibtype : string; 19 + publisher : string; 20 + booktitle : string; 21 + journal : string; 22 + institution : string; 23 + pages : string; 24 + volume : string option; 25 + number : string option; 26 + doi : string option; 27 + url : string option; 28 + abstract : string option; 29 + bib : string; 30 + } 31 + 32 + (** {1 Month Parsing} *) 33 + 34 + let month_of_string s = 35 + match String.lowercase_ascii s with 36 + | "jan" | "january" -> 1 37 + | "feb" | "february" -> 2 38 + | "mar" | "march" -> 3 39 + | "apr" | "april" -> 4 40 + | "may" -> 5 41 + | "jun" | "june" -> 6 42 + | "jul" | "july" -> 7 43 + | "aug" | "august" -> 8 44 + | "sep" | "september" -> 9 45 + | "oct" | "october" -> 10 46 + | "nov" | "november" -> 11 47 + | "dec" | "december" -> 12 48 + | _ -> 1 49 + 50 + let string_of_month = function 51 + | 1 -> "jan" | 2 -> "feb" | 3 -> "mar" | 4 -> "apr" 52 + | 5 -> "may" | 6 -> "jun" | 7 -> "jul" | 8 -> "aug" 53 + | 9 -> "sep" | 10 -> "oct" | 11 -> "nov" | 12 -> "dec" 54 + | _ -> "jan" 55 + 56 + (** {1 JSON Helpers for Zotero JSON} 57 + 58 + Zotero returns complex JSON with varying structure. 59 + We use pattern matching on the generic json type. *) 60 + 61 + type creator = { 62 + first_name : string; 63 + last_name : string; 64 + } 65 + 66 + let creator_jsont : creator Jsont.t = 67 + let open Jsont in 68 + let open Object in 69 + map ~kind:"creator" (fun first_name last_name -> { first_name; last_name }) 70 + |> mem "firstName" string ~dec_absent:"" ~enc:(fun c -> c.first_name) 71 + |> mem "lastName" string ~dec_absent:"" ~enc:(fun c -> c.last_name) 72 + |> finish 73 + 74 + (** Extract string from generic JSON, returning None if missing or wrong type *) 75 + let rec find_in_json json path = 76 + match path with 77 + | [] -> Some json 78 + | key :: path_rest -> 79 + match json with 80 + | Jsont.Object (mems, _) -> 81 + let rec find_mem = function 82 + | [] -> None 83 + | ((name, _), value) :: mems_rest -> 84 + if name = key then find_in_json value path_rest 85 + else find_mem mems_rest 86 + in 87 + find_mem mems 88 + | _ -> None 89 + 90 + let get_string json path = 91 + match find_in_json json path with 92 + | Some (Jsont.String (s, _)) -> Some s 93 + | _ -> None 94 + 95 + let get_string_exn json path ~default = 96 + get_string json path |> Option.value ~default 97 + 98 + let get_int json path ~default = 99 + match find_in_json json path with 100 + | Some (Jsont.Number (f, _)) -> int_of_float f 101 + | Some (Jsont.String (s, _)) -> (try int_of_string s with _ -> default) 102 + | _ -> default 103 + 104 + let get_creators json = 105 + match find_in_json json ["creators"] with 106 + | Some (Jsont.Array (items, _)) -> 107 + List.filter_map (fun item -> 108 + match Jsont.Json.decode creator_jsont item with 109 + | Ok c -> Some c 110 + | Error _ -> None 111 + ) items 112 + | _ -> [] 113 + 114 + (** {1 BibTeX Parsing} *) 115 + 116 + (** Simple BibTeX field extraction *) 117 + let extract_bibtex_field bib field = 118 + let pattern = Printf.sprintf "%s\\s*=\\s*[{\"](.*?)[}\"]" field in 119 + try 120 + let re = Re.Pcre.regexp ~flags:[`CASELESS] pattern in 121 + let groups = Re.exec re bib in 122 + Some (Re.Group.get groups 1) 123 + with _ -> None 124 + 125 + let extract_bibtex_type bib = 126 + try 127 + let re = Re.Pcre.regexp "@(\\w+)\\s*\\{" in 128 + let groups = Re.exec re bib in 129 + String.lowercase_ascii (Re.Group.get groups 1) 130 + with _ -> "misc" 131 + 132 + (** {1 Author Parsing} *) 133 + 134 + (** Split "Last, First and Last2, First2" into list of names *) 135 + let parse_authors author_str = 136 + let parts = String.split_on_char '&' author_str in 137 + let parts = List.concat_map (fun s -> 138 + Astring.String.cuts ~empty:false ~sep:" and " s 139 + ) parts in 140 + List.map (fun name -> 141 + let name = String.trim name in 142 + (* Handle "Last, First" format *) 143 + match Astring.String.cut ~sep:"," name with 144 + | Some (last, first) -> 145 + Printf.sprintf "%s %s" (String.trim first) (String.trim last) 146 + | None -> name 147 + ) parts 148 + 149 + (** {1 Zotero Translation Server API} *) 150 + 151 + let web_endpoint base_url = 152 + if String.ends_with ~suffix:"/" base_url then base_url ^ "web" 153 + else base_url ^ "/web" 154 + 155 + let export_endpoint base_url = 156 + if String.ends_with ~suffix:"/" base_url then base_url ^ "export" 157 + else base_url ^ "/export" 158 + 159 + let resolve_doi ~proc_mgr ~server_url doi = 160 + Log.info (fun m -> m "Resolving DOI: %s" doi); 161 + let url = web_endpoint server_url in 162 + let body = "https://doi.org/" ^ doi in 163 + match Bushel_http.post ~proc_mgr ~content_type:"text/plain" ~body url with 164 + | Error e -> Error e 165 + | Ok json_str -> 166 + match Jsont_bytesrw.decode_string Jsont.json json_str with 167 + | Ok json -> Ok json 168 + | Error e -> Error (Printf.sprintf "JSON parse error: %s" e) 169 + 170 + let export_bibtex ~proc_mgr ~server_url json = 171 + let url = export_endpoint server_url ^ "?format=bibtex" in 172 + match Jsont_bytesrw.encode_string Jsont.json json with 173 + | Error e -> Error e 174 + | Ok body -> Bushel_http.post ~proc_mgr ~content_type:"application/json" ~body url 175 + 176 + (** {1 DOI Resolution} *) 177 + 178 + let resolve ~proc_mgr ~server_url ~slug doi = 179 + match resolve_doi ~proc_mgr ~server_url doi with 180 + | Error e -> Error e 181 + | Ok json -> 182 + (* Export to BibTeX *) 183 + match export_bibtex ~proc_mgr ~server_url json with 184 + | Error e -> Error (Printf.sprintf "BibTeX export failed: %s" e) 185 + | Ok bib -> 186 + Log.debug (fun m -> m "Got BibTeX: %s" bib); 187 + (* Parse the JSON response for metadata *) 188 + let item = 189 + match json with 190 + | Jsont.Array (first :: _, _) -> first 191 + | _ -> json 192 + in 193 + 194 + (* Extract fields from JSON and BibTeX *) 195 + let title = get_string_exn item ["title"] ~default:"Untitled" in 196 + let authors = 197 + let creators = get_creators item in 198 + List.filter_map (fun c -> 199 + let first = c.first_name in 200 + let last = c.last_name in 201 + if first = "" && last = "" then None 202 + else Some (String.trim (first ^ " " ^ last)) 203 + ) creators 204 + in 205 + let authors = if authors = [] then 206 + (* Fallback to BibTeX author field *) 207 + match extract_bibtex_field bib "author" with 208 + | Some a -> parse_authors a 209 + | None -> [] 210 + else authors in 211 + 212 + let year = get_int item ["date"] ~default:( 213 + match extract_bibtex_field bib "year" with 214 + | Some y -> (try int_of_string y with _ -> 2024) 215 + | None -> 2024 216 + ) in 217 + let month = match extract_bibtex_field bib "month" with 218 + | Some m -> month_of_string m 219 + | None -> 1 220 + in 221 + 222 + let bibtype = extract_bibtex_type bib in 223 + let publisher = get_string_exn item ["publisher"] ~default:( 224 + extract_bibtex_field bib "publisher" |> Option.value ~default:"" 225 + ) in 226 + let booktitle = extract_bibtex_field bib "booktitle" |> Option.value ~default:"" in 227 + let journal = get_string_exn item ["publicationTitle"] ~default:( 228 + extract_bibtex_field bib "journal" |> Option.value ~default:"" 229 + ) in 230 + let institution = extract_bibtex_field bib "institution" |> Option.value ~default:"" in 231 + let pages = extract_bibtex_field bib "pages" |> Option.value ~default:"" in 232 + let volume = extract_bibtex_field bib "volume" in 233 + let number = extract_bibtex_field bib "number" in 234 + let url = get_string item ["url"] in 235 + let abstract = get_string item ["abstractNote"] in 236 + 237 + (* Generate clean BibTeX with slug as cite key *) 238 + let cite_key = Astring.String.map (function '-' -> '_' | x -> x) slug in 239 + let bib = Re.replace_string (Re.Pcre.regexp "@\\w+\\{[^,]+,") 240 + ~by:(Printf.sprintf "@%s{%s," bibtype cite_key) bib in 241 + 242 + Ok { 243 + title; 244 + authors; 245 + year; 246 + month; 247 + bibtype; 248 + publisher; 249 + booktitle; 250 + journal; 251 + institution; 252 + pages; 253 + volume; 254 + number; 255 + doi = Some doi; 256 + url; 257 + abstract; 258 + bib = String.trim bib; 259 + } 260 + 261 + (** {1 Paper File Generation} *) 262 + 263 + let to_yaml_frontmatter ~slug:_ ~ver:_ metadata = 264 + let buf = Buffer.create 1024 in 265 + let add key value = 266 + if value <> "" then 267 + Buffer.add_string buf (Printf.sprintf "%s: %s\n" key value) 268 + in 269 + let add_opt key = function 270 + | Some v when v <> "" -> add key v 271 + | _ -> () 272 + in 273 + let add_quoted key value = 274 + if value <> "" then 275 + Buffer.add_string buf (Printf.sprintf "%s: \"%s\"\n" key value) 276 + in 277 + 278 + Buffer.add_string buf "---\n"; 279 + add "title" metadata.title; 280 + 281 + (* Authors as list *) 282 + Buffer.add_string buf "author:\n"; 283 + List.iter (fun a -> 284 + Buffer.add_string buf (Printf.sprintf " - %s\n" a) 285 + ) metadata.authors; 286 + 287 + add_quoted "year" (string_of_int metadata.year); 288 + add "month" (string_of_month metadata.month); 289 + add "bibtype" metadata.bibtype; 290 + 291 + if metadata.publisher <> "" then add "publisher" metadata.publisher; 292 + if metadata.booktitle <> "" then add "booktitle" metadata.booktitle; 293 + if metadata.journal <> "" then add "journal" metadata.journal; 294 + if metadata.institution <> "" then add "institution" metadata.institution; 295 + if metadata.pages <> "" then add "pages" metadata.pages; 296 + add_opt "volume" metadata.volume; 297 + add_opt "number" metadata.number; 298 + add_opt "doi" metadata.doi; 299 + add_opt "url" metadata.url; 300 + 301 + (* BibTeX entry *) 302 + Buffer.add_string buf "bib: |\n"; 303 + String.split_on_char '\n' metadata.bib |> List.iter (fun line -> 304 + Buffer.add_string buf (Printf.sprintf " %s\n" line) 305 + ); 306 + 307 + Buffer.add_string buf "---\n"; 308 + 309 + (* Abstract as body *) 310 + (match metadata.abstract with 311 + | Some abstract when abstract <> "" -> 312 + Buffer.add_string buf "\n"; 313 + Buffer.add_string buf abstract; 314 + Buffer.add_string buf "\n" 315 + | _ -> ()); 316 + 317 + Buffer.contents buf 318 + 319 + (** {1 Merging with Existing Papers} *) 320 + 321 + let merge_with_existing ~existing metadata = 322 + (* Preserve fields from existing paper if new ones are empty *) 323 + { 324 + metadata with 325 + abstract = (match metadata.abstract with 326 + | Some a when a <> "" -> Some a 327 + | _ -> if Bushel.Paper.abstract existing <> "" then Some (Bushel.Paper.abstract existing) else None); 328 + } 329 + (* Note: tags, projects, selected, slides, video are preserved at a higher level 330 + when writing the file - they're not part of paper_metadata *)
+16
lib_sync/dune
··· 1 + (library 2 + (name bushel_sync) 3 + (public_name bushel.sync) 4 + (libraries 5 + bushel 6 + bushel.config 7 + eio 8 + unix 9 + jsont 10 + jsont.bytesrw 11 + astring 12 + re 13 + uri 14 + ptime 15 + logs 16 + fmt))
+282
lib_typesense/bushel_typesense.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Typesense search integration for Bushel entries *) 7 + 8 + (** {1 Schema Definitions} *) 9 + 10 + let field name type_ ?(facet=false) ?(optional=false) () = 11 + let fields = [ 12 + ("name", `String name); 13 + ("type", `String type_); 14 + ] in 15 + let fields = if facet then ("facet", `Bool true) :: fields else fields in 16 + let fields = if optional then ("optional", `Bool true) :: fields else fields in 17 + `O fields 18 + 19 + let notes_schema = 20 + `O [ 21 + ("name", `String "notes"); 22 + ("fields", `A [ 23 + field "id" "string" (); 24 + field "title" "string" (); 25 + field "content" "string" (); 26 + field "date" "string" (); 27 + field "date_timestamp" "int64" (); 28 + field "tags" "string[]" ~facet:true (); 29 + field "body" "string" ~optional:true (); 30 + field "draft" "bool" (); 31 + field "synopsis" "string[]" ~optional:true (); 32 + field "thumbnail_url" "string" ~optional:true (); 33 + field "type" "string" ~facet:true ~optional:true (); 34 + field "status" "string" ~facet:true ~optional:true (); 35 + field "related_papers" "string[]" ~optional:true (); 36 + field "related_projects" "string[]" ~optional:true (); 37 + field "related_contacts" "string[]" ~optional:true (); 38 + field "attachments" "string[]" ~optional:true (); 39 + field "source" "string" ~facet:true ~optional:true (); 40 + field "url" "string" ~optional:true (); 41 + field "author" "string" ~optional:true (); 42 + field "category" "string" ~facet:true ~optional:true (); 43 + field "slug_ent" "string" ~optional:true (); 44 + field "words" "int32" ~optional:true (); 45 + ]); 46 + ("default_sorting_field", `String "date_timestamp"); 47 + ] 48 + 49 + let papers_schema = 50 + `O [ 51 + ("name", `String "papers"); 52 + ("fields", `A [ 53 + field "id" "string" (); 54 + field "title" "string" (); 55 + field "authors" "string[]" (); 56 + field "abstract" "string" (); 57 + field "date" "string" (); 58 + field "date_timestamp" "int64" (); 59 + field "tags" "string[]" ~facet:true (); 60 + field "doi" "string[]" ~optional:true (); 61 + field "arxiv_id" "string" ~optional:true (); 62 + field "pdf_url" "string[]" ~optional:true (); 63 + field "thumbnail_url" "string" ~optional:true (); 64 + field "journal" "string[]" ~optional:true (); 65 + field "related_projects" "string[]" ~optional:true (); 66 + field "related_talks" "string[]" ~optional:true (); 67 + ]); 68 + ("default_sorting_field", `String "date_timestamp"); 69 + ] 70 + 71 + let projects_schema = 72 + `O [ 73 + ("name", `String "projects"); 74 + ("fields", `A [ 75 + field "id" "string" (); 76 + field "title" "string" (); 77 + field "description" "string" (); 78 + field "start_year" "int32" (); 79 + field "finish_year" "int32" ~optional:true (); 80 + field "date" "string" (); 81 + field "date_timestamp" "int64" (); 82 + field "tags" "string[]" ~facet:true (); 83 + field "repository_url" "string" ~optional:true (); 84 + field "homepage_url" "string" ~optional:true (); 85 + field "languages" "string[]" ~facet:true ~optional:true (); 86 + field "license" "string" ~facet:true ~optional:true (); 87 + field "status" "string" ~facet:true ~optional:true (); 88 + field "related_papers" "string[]" ~optional:true (); 89 + field "related_talks" "string[]" ~optional:true (); 90 + field "body" "string" ~optional:true (); 91 + field "ideas" "string" ~optional:true (); 92 + ]); 93 + ("default_sorting_field", `String "date_timestamp"); 94 + ] 95 + 96 + let ideas_schema = 97 + `O [ 98 + ("name", `String "ideas"); 99 + ("fields", `A [ 100 + field "id" "string" (); 101 + field "title" "string" (); 102 + field "description" "string" (); 103 + field "year" "int32" (); 104 + field "date" "string" (); 105 + field "date_timestamp" "int64" (); 106 + field "tags" "string[]" ~facet:true (); 107 + field "level" "string" ~facet:true (); 108 + field "status" "string" ~facet:true (); 109 + field "project" "string" ~facet:true (); 110 + field "supervisors" "string[]" ~optional:true (); 111 + field "body" "string" ~optional:true (); 112 + field "students" "string[]" ~optional:true (); 113 + field "reading" "string" ~optional:true (); 114 + field "url" "string" ~optional:true (); 115 + ]); 116 + ("default_sorting_field", `String "date_timestamp"); 117 + ] 118 + 119 + let videos_schema = 120 + `O [ 121 + ("name", `String "videos"); 122 + ("fields", `A [ 123 + field "id" "string" (); 124 + field "title" "string" (); 125 + field "description" "string" (); 126 + field "published_date" "string" (); 127 + field "date" "string" (); 128 + field "date_timestamp" "int64" (); 129 + field "tags" "string[]" ~facet:true (); 130 + field "url" "string" (); 131 + field "uuid" "string" (); 132 + field "is_talk" "bool" (); 133 + field "paper" "string[]" ~optional:true (); 134 + field "project" "string[]" ~optional:true (); 135 + field "video_url" "string" ~optional:true (); 136 + field "embed_url" "string" ~optional:true (); 137 + field "duration" "int32" ~optional:true (); 138 + field "channel" "string" ~facet:true ~optional:true (); 139 + field "platform" "string" ~facet:true ~optional:true (); 140 + field "views" "int32" ~optional:true (); 141 + field "related_papers" "string[]" ~optional:true (); 142 + field "related_talks" "string[]" ~optional:true (); 143 + ]); 144 + ("default_sorting_field", `String "date_timestamp"); 145 + ] 146 + 147 + let contacts_schema = 148 + `O [ 149 + ("name", `String "contacts"); 150 + ("fields", `A [ 151 + field "id" "string" (); 152 + field "handle" "string" (); 153 + field "name" "string" (); 154 + field "names" "string[]" ~optional:true (); 155 + field "email" "string[]" ~optional:true (); 156 + field "icon" "string[]" ~optional:true (); 157 + field "github" "string[]" ~optional:true (); 158 + field "twitter" "string[]" ~optional:true (); 159 + field "bluesky" "string[]" ~optional:true (); 160 + field "mastodon" "string[]" ~optional:true (); 161 + field "orcid" "string[]" ~optional:true (); 162 + field "url" "string[]" ~optional:true (); 163 + field "atom" "string[]" ~optional:true (); 164 + ]); 165 + ] 166 + 167 + (** {1 Document Conversion} *) 168 + 169 + let ptime_to_timestamp t = 170 + let span = Ptime.to_span t in 171 + Int64.of_float (Ptime.Span.to_float_s span) 172 + 173 + let date_to_timestamp (y, m, d) = 174 + match Ptime.of_date (y, m, d) with 175 + | Some t -> ptime_to_timestamp t 176 + | None -> 0L 177 + 178 + let note_to_document (n : Bushel.Note.t) = 179 + let date = Bushel.Note.date n in 180 + let (y, m, d) = date in 181 + `O ([ 182 + ("id", `String (Bushel.Note.slug n)); 183 + ("title", `String (Bushel.Note.title n)); 184 + ("content", `String (Bushel.Note.body n)); 185 + ("date", `String (Printf.sprintf "%04d-%02d-%02d" y m d)); 186 + ("date_timestamp", `Float (Int64.to_float (date_to_timestamp date))); 187 + ("tags", `A (List.map (fun t -> `String t) (Bushel.Note.tags n))); 188 + ("draft", `Bool (Bushel.Note.draft n)); 189 + ("words", `Float (Float.of_int (Bushel.Note.words n))); 190 + ] @ 191 + (match Bushel.Note.synopsis n with Some s -> [("synopsis", `A [`String s])] | None -> []) @ 192 + (match Bushel.Note.source n with Some s -> [("source", `String s)] | None -> []) @ 193 + (match Bushel.Note.url n with Some u -> [("url", `String u)] | None -> []) @ 194 + (match Bushel.Note.author n with Some a -> [("author", `String a)] | None -> []) @ 195 + (match Bushel.Note.category n with Some c -> [("category", `String c)] | None -> []) @ 196 + (match Bushel.Note.slug_ent n with Some s -> [("slug_ent", `String s)] | None -> [])) 197 + 198 + let paper_to_document (p : Bushel.Paper.t) = 199 + let date = Bushel.Paper.date p in 200 + let (y, m, d) = date in 201 + `O ([ 202 + ("id", `String (Bushel.Paper.slug p)); 203 + ("title", `String (Bushel.Paper.title p)); 204 + ("authors", `A (List.map (fun a -> `String a) (Bushel.Paper.authors p))); 205 + ("abstract", `String (Bushel.Paper.abstract p)); 206 + ("date", `String (Printf.sprintf "%04d-%02d-%02d" y m d)); 207 + ("date_timestamp", `Float (Int64.to_float (date_to_timestamp date))); 208 + ("tags", `A (List.map (fun t -> `String t) (Bushel.Paper.tags p))); 209 + ] @ 210 + (match Bushel.Paper.doi p with Some d -> [("doi", `A [`String d])] | None -> []) @ 211 + (match Bushel.Paper.url p with Some u -> [("pdf_url", `A [`String u])] | None -> []) @ 212 + (if Bushel.Paper.journal p <> "" then [("journal", `A [`String (Bushel.Paper.journal p)])] else [])) 213 + 214 + let project_to_document (p : Bushel.Project.t) = 215 + let date = (Bushel.Project.start p, 1, 1) in 216 + let (y, m, d) = date in 217 + `O ([ 218 + ("id", `String (Bushel.Project.slug p)); 219 + ("title", `String (Bushel.Project.title p)); 220 + ("description", `String (Bushel.Project.body p)); 221 + ("start_year", `Float (Float.of_int (Bushel.Project.start p))); 222 + ("date", `String (Printf.sprintf "%04d-%02d-%02d" y m d)); 223 + ("date_timestamp", `Float (Int64.to_float (date_to_timestamp date))); 224 + ("tags", `A (List.map (fun t -> `String t) (Bushel.Project.tags p))); 225 + ("body", `String (Bushel.Project.body p)); 226 + ("ideas", `String (Bushel.Project.ideas p)); 227 + ] @ 228 + (match Bushel.Project.finish p with Some f -> [("finish_year", `Float (Float.of_int f))] | None -> [])) 229 + 230 + let idea_to_document (i : Bushel.Idea.t) = 231 + let date = (Bushel.Idea.year i, Bushel.Idea.month i, 1) in 232 + let (y, m, d) = date in 233 + `O [ 234 + ("id", `String (Bushel.Idea.slug i)); 235 + ("title", `String (Bushel.Idea.title i)); 236 + ("description", `String (Bushel.Idea.body i)); 237 + ("year", `Float (Float.of_int (Bushel.Idea.year i))); 238 + ("date", `String (Printf.sprintf "%04d-%02d-%02d" y m d)); 239 + ("date_timestamp", `Float (Int64.to_float (date_to_timestamp date))); 240 + ("tags", `A (List.map (fun t -> `String t) (Bushel.Idea.tags i))); 241 + ("level", `String (Bushel.Idea.level_to_string (Bushel.Idea.level i))); 242 + ("status", `String (Bushel.Idea.status_to_string (Bushel.Idea.status i))); 243 + ("project", `String (Bushel.Idea.project i)); 244 + ("supervisors", `A (List.map (fun s -> `String s) (Bushel.Idea.supervisors i))); 245 + ("students", `A (List.map (fun s -> `String s) (Bushel.Idea.students i))); 246 + ("body", `String (Bushel.Idea.body i)); 247 + ("reading", `String (Bushel.Idea.reading i)); 248 + ] 249 + 250 + let video_to_document (v : Bushel.Video.t) = 251 + let date = Bushel.Video.date v in 252 + let (y, m, d) = date in 253 + `O ([ 254 + ("id", `String (Bushel.Video.uuid v)); 255 + ("title", `String (Bushel.Video.title v)); 256 + ("description", `String (Bushel.Video.description v)); 257 + ("published_date", `String (Ptime.to_rfc3339 (Bushel.Video.datetime v))); 258 + ("date", `String (Printf.sprintf "%04d-%02d-%02d" y m d)); 259 + ("date_timestamp", `Float (Int64.to_float (date_to_timestamp date))); 260 + ("tags", `A (List.map (fun t -> `String t) (Bushel.Video.tags v))); 261 + ("url", `String (Bushel.Video.url v)); 262 + ("uuid", `String (Bushel.Video.uuid v)); 263 + ("is_talk", `Bool (Bushel.Video.talk v)); 264 + ] @ 265 + (match Bushel.Video.paper v with Some p -> [("paper", `A [`String p])] | None -> []) @ 266 + (match Bushel.Video.project v with Some p -> [("project", `A [`String p])] | None -> [])) 267 + 268 + let contact_to_document (c : Bushel.Contact.t) = 269 + `O ([ 270 + ("id", `String (Bushel.Contact.handle c)); 271 + ("handle", `String (Bushel.Contact.handle c)); 272 + ("name", `String (Bushel.Contact.name c)); 273 + ("names", `A (List.map (fun n -> `String n) (Bushel.Contact.names c))); 274 + ] @ 275 + (match Bushel.Contact.email c with Some e -> [("email", `A [`String e])] | None -> []) @ 276 + (match Bushel.Contact.github c with Some g -> [("github", `A [`String g])] | None -> []) @ 277 + (match Bushel.Contact.twitter c with Some t -> [("twitter", `A [`String t])] | None -> []) @ 278 + (match Bushel.Contact.bluesky c with Some b -> [("bluesky", `A [`String b])] | None -> []) @ 279 + (match Bushel.Contact.mastodon c with Some m -> [("mastodon", `A [`String m])] | None -> []) @ 280 + (match Bushel.Contact.orcid c with Some o -> [("orcid", `A [`String o])] | None -> []) @ 281 + (match Bushel.Contact.url c with Some u -> [("url", `A [`String u])] | None -> []) @ 282 + (match Bushel.Contact.atom c with Some a -> [("atom", `A (List.map (fun x -> `String x) a))] | None -> []))
+4
lib_typesense/dune
··· 1 + (library 2 + (name bushel_typesense) 3 + (public_name bushel.typesense) 4 + (libraries bushel jsont ptime))