···1+ISC License
2+3+Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>
4+5+Permission to use, copy, modify, and/or distribute this software for any
6+purpose with or without fee is hereby granted, provided that the above
7+copyright notice and this permission notice appear in all copies.
8+9+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11+AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
12+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
14+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15+PERFORMANCE OF THIS SOFTWARE.
···1+(*---------------------------------------------------------------------------
2+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3+ SPDX-License-Identifier: ISC
4+ ---------------------------------------------------------------------------*)
5+6+(** Bushel CLI - knowledge base management tool *)
7+8+open Cmdliner
9+10+(** Simple table formatting *)
11+module Table = struct
12+ type row = string list
13+ type t = { headers : string list; rows : row list }
14+15+ let make ~headers rows = { headers; rows }
16+17+ let column_widths t =
18+ let num_cols = List.length t.headers in
19+ let widths = Array.make num_cols 0 in
20+ (* Headers *)
21+ List.iteri (fun i h -> widths.(i) <- String.length h) t.headers;
22+ (* Rows *)
23+ List.iter (fun row ->
24+ List.iteri (fun i cell ->
25+ if i < num_cols then
26+ widths.(i) <- max widths.(i) (String.length cell)
27+ ) row
28+ ) t.rows;
29+ Array.to_list widths
30+31+ let pad s width =
32+ let len = String.length s in
33+ if len >= width then s
34+ else s ^ String.make (width - len) ' '
35+36+ let print t =
37+ let widths = column_widths t in
38+ let print_row row =
39+ List.iter2 (fun cell width ->
40+ Printf.printf "%s " (pad cell width)
41+ ) row widths;
42+ print_newline ()
43+ in
44+ (* Print header *)
45+ print_row t.headers;
46+ (* Print separator *)
47+ List.iter (fun w -> Printf.printf "%s " (String.make w '-')) widths;
48+ print_newline ();
49+ (* Print rows *)
50+ List.iter print_row t.rows
51+end
52+53+(** Truncate string to max length with ellipsis *)
54+let truncate max_len s =
55+ if String.length s <= max_len then s
56+ else String.sub s 0 (max_len - 3) ^ "..."
57+58+(** Format date tuple *)
59+let format_date (year, month, day) =
60+ Printf.sprintf "%04d-%02d-%02d" year month day
61+62+(** Entry type to string *)
63+let type_string = function
64+ | `Paper _ -> "paper"
65+ | `Project _ -> "project"
66+ | `Idea _ -> "idea"
67+ | `Video _ -> "video"
68+ | `Note _ -> "note"
69+70+(** {1 Common Options} *)
71+72+let data_dir =
73+ let doc = "Path to the bushel data repository." in
74+ let env = Cmd.Env.info "BUSHEL_DATA" in
75+ Arg.(value & opt (some string) None & info ["d"; "data-dir"] ~env ~docv:"DIR" ~doc)
76+77+let config_file =
78+ let doc = "Path to config file (default: ~/.config/bushel/config.toml)." in
79+ Arg.(value & opt (some string) None & info ["c"; "config"] ~docv:"FILE" ~doc)
80+81+(** Setup logging *)
82+let setup_log style_renderer level =
83+ Fmt_tty.setup_std_outputs ?style_renderer ();
84+ Logs.set_level level;
85+ Logs.set_reporter (Logs_fmt.reporter ())
86+87+let logging_t =
88+ Term.(const setup_log $ Fmt_cli.style_renderer () $ Logs_cli.level ())
89+90+(** Load config *)
91+let load_config config_file =
92+ match config_file with
93+ | Some path -> Bushel_config.load_file path
94+ | None -> Bushel_config.load ()
95+96+(** Get data directory from config or CLI *)
97+let get_data_dir config data_dir_opt =
98+ match data_dir_opt with
99+ | Some d -> d
100+ | None -> config.Bushel_config.data_dir
101+102+(** Load entries using Eio *)
103+let with_entries data_dir f =
104+ Eio_main.run @@ fun env ->
105+ let fs = Eio.Stdenv.fs env in
106+ let entries = Bushel_eio.Bushel_loader.load fs data_dir in
107+ f env entries
108+109+(** {1 List Command} *)
110+111+let list_cmd =
112+ let type_filter =
113+ let doc = "Filter by entry type (paper, project, idea, video, note)." in
114+ Arg.(value & opt (some string) None & info ["t"; "type"] ~docv:"TYPE" ~doc)
115+ in
116+ let limit =
117+ let doc = "Maximum number of entries to show." in
118+ Arg.(value & opt (some int) None & info ["n"; "limit"] ~docv:"N" ~doc)
119+ in
120+ let sort_by =
121+ let doc = "Sort by field (date, title, type). Default: date." in
122+ Arg.(value & opt string "date" & info ["s"; "sort"] ~docv:"FIELD" ~doc)
123+ in
124+ let run () config_file data_dir type_filter limit sort_by =
125+ match load_config config_file with
126+ | Error e -> Printf.eprintf "Config error: %s\n" e; 1
127+ | Ok config ->
128+ let data_dir = get_data_dir config data_dir in
129+ with_entries data_dir @@ fun _env entries ->
130+ let all = Bushel.Entry.all_entries entries in
131+ (* Filter by type *)
132+ let filtered = match type_filter with
133+ | None -> all
134+ | Some t ->
135+ List.filter (fun e ->
136+ String.lowercase_ascii (type_string e) = String.lowercase_ascii t
137+ ) all
138+ in
139+ (* Sort *)
140+ let sorted = match sort_by with
141+ | "title" ->
142+ List.sort (fun a b ->
143+ String.compare (Bushel.Entry.title a) (Bushel.Entry.title b)
144+ ) filtered
145+ | "type" ->
146+ List.sort (fun a b ->
147+ let cmp = String.compare (type_string a) (type_string b) in
148+ if cmp <> 0 then cmp
149+ else Bushel.Entry.compare a b
150+ ) filtered
151+ | _ -> (* date, default *)
152+ List.sort (fun a b -> Bushel.Entry.compare b a) filtered (* newest first *)
153+ in
154+ (* Limit *)
155+ let limited = match limit with
156+ | None -> sorted
157+ | Some n -> List.filteri (fun i _ -> i < n) sorted
158+ in
159+ (* Build table *)
160+ let rows = List.map (fun e ->
161+ [ type_string e
162+ ; Bushel.Entry.slug e
163+ ; truncate 50 (Bushel.Entry.title e)
164+ ; format_date (Bushel.Entry.date e)
165+ ]
166+ ) limited in
167+ let table = Table.make
168+ ~headers:["TYPE"; "SLUG"; "TITLE"; "DATE"]
169+ rows
170+ in
171+ Table.print table;
172+ Printf.printf "\nTotal: %d entries\n" (List.length limited);
173+ 0
174+ in
175+ let doc = "List all entries in the knowledge base." in
176+ let info = Cmd.info "list" ~doc in
177+ Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ type_filter $ limit $ sort_by)
178+179+(** {1 Stats Command} *)
180+181+let stats_cmd =
182+ let run () config_file data_dir =
183+ match load_config config_file with
184+ | Error e -> Printf.eprintf "Config error: %s\n" e; 1
185+ | Ok config ->
186+ let data_dir = get_data_dir config data_dir in
187+ with_entries data_dir @@ fun _env entries ->
188+ let papers = List.length (Bushel.Entry.papers entries) in
189+ let notes = List.length (Bushel.Entry.notes entries) in
190+ let projects = List.length (Bushel.Entry.projects entries) in
191+ let ideas = List.length (Bushel.Entry.ideas entries) in
192+ let videos = List.length (Bushel.Entry.videos entries) in
193+ let contacts = List.length (Bushel.Entry.contacts entries) in
194+ Printf.printf "Bushel Statistics\n";
195+ Printf.printf "=================\n";
196+ Printf.printf "Papers: %4d\n" papers;
197+ Printf.printf "Notes: %4d\n" notes;
198+ Printf.printf "Projects: %4d\n" projects;
199+ Printf.printf "Ideas: %4d\n" ideas;
200+ Printf.printf "Videos: %4d\n" videos;
201+ Printf.printf "Contacts: %4d\n" contacts;
202+ Printf.printf "-----------------\n";
203+ Printf.printf "Total: %4d\n" (papers + notes + projects + ideas + videos);
204+ 0
205+ in
206+ let doc = "Show statistics about the knowledge base." in
207+ let info = Cmd.info "stats" ~doc in
208+ Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir)
209+210+(** {1 Show Command} *)
211+212+let show_cmd =
213+ let slug_arg =
214+ let doc = "The slug of the entry to show." in
215+ Arg.(required & pos 0 (some string) None & info [] ~docv:"SLUG" ~doc)
216+ in
217+ let run () config_file data_dir slug =
218+ match load_config config_file with
219+ | Error e -> Printf.eprintf "Config error: %s\n" e; 1
220+ | Ok config ->
221+ let data_dir = get_data_dir config data_dir in
222+ with_entries data_dir @@ fun _env entries ->
223+ match Bushel.Entry.lookup entries slug with
224+ | None ->
225+ Printf.eprintf "Entry not found: %s\n" slug;
226+ 1
227+ | Some entry ->
228+ Printf.printf "Type: %s\n" (type_string entry);
229+ Printf.printf "Slug: %s\n" (Bushel.Entry.slug entry);
230+ Printf.printf "Title: %s\n" (Bushel.Entry.title entry);
231+ Printf.printf "Date: %s\n" (format_date (Bushel.Entry.date entry));
232+ Printf.printf "URL: %s\n" (Bushel.Entry.site_url entry);
233+ (match Bushel.Entry.synopsis entry with
234+ | Some s -> Printf.printf "Synopsis: %s\n" s
235+ | None -> ());
236+ Printf.printf "\n--- Body ---\n%s\n" (Bushel.Entry.body entry);
237+ 0
238+ in
239+ let doc = "Show details of a specific entry." in
240+ let info = Cmd.info "show" ~doc in
241+ Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ slug_arg)
242+243+(** {1 Sync Command} *)
244+245+let sync_cmd =
246+ let remote =
247+ let doc = "Also upload to Typesense (remote sync)." in
248+ Arg.(value & flag & info ["remote"] ~doc)
249+ in
250+ let only =
251+ let doc = "Only run specific step (images, srcsetter, thumbs, faces, videos, typesense)." in
252+ Arg.(value & opt (some string) None & info ["only"] ~docv:"STEP" ~doc)
253+ in
254+ let run () config_file data_dir remote only =
255+ match load_config config_file with
256+ | Error e -> Printf.eprintf "Config error: %s\n" e; 1
257+ | Ok config ->
258+ let data_dir = get_data_dir config data_dir in
259+ (* Determine which steps to run *)
260+ let steps = match only with
261+ | Some step_name ->
262+ (match Bushel_sync.step_of_string step_name with
263+ | Some step -> [step]
264+ | None ->
265+ Printf.eprintf "Unknown step: %s\n" step_name;
266+ Printf.eprintf "Valid steps: images, srcsetter, thumbs, faces, videos, typesense\n";
267+ exit 1)
268+ | None ->
269+ if remote then Bushel_sync.all_steps_with_remote
270+ else Bushel_sync.all_steps
271+ in
272+273+ Eio_main.run @@ fun env ->
274+ let fs = Eio.Stdenv.fs env in
275+ let entries = Bushel_eio.Bushel_loader.load fs data_dir in
276+277+ Printf.printf "Running sync pipeline...\n";
278+ List.iter (fun step ->
279+ Printf.printf " - %s\n" (Bushel_sync.string_of_step step)
280+ ) steps;
281+ Printf.printf "\n";
282+283+ let results = Bushel_sync.run ~env ~config ~steps ~entries in
284+285+ Printf.printf "\nResults:\n";
286+ List.iter (fun r ->
287+ let status = if r.Bushel_sync.success then "OK" else "FAIL" in
288+ Printf.printf " [%s] %s: %s\n"
289+ status
290+ (Bushel_sync.string_of_step r.step)
291+ r.message
292+ ) results;
293+294+ let failures = List.filter (fun r -> not r.Bushel_sync.success) results in
295+ if failures = [] then 0 else 1
296+ in
297+ let doc = "Sync images, thumbnails, and optionally upload to Typesense." in
298+ let man = [
299+ `S Manpage.s_description;
300+ `P "The sync command runs a pipeline to synchronize images and thumbnails:";
301+ `P "1. $(b,images) - Rsync images from remote server";
302+ `P "2. $(b,srcsetter) - Convert images to WebP srcset variants";
303+ `P "3. $(b,thumbs) - Generate paper thumbnails from PDFs";
304+ `P "4. $(b,faces) - Fetch contact face thumbnails from Immich";
305+ `P "5. $(b,videos) - Fetch video thumbnails from PeerTube";
306+ `P "6. $(b,typesense) - Upload to Typesense (with --remote)";
307+ ] in
308+ let info = Cmd.info "sync" ~doc ~man in
309+ Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ remote $ only)
310+311+(** {1 Paper Add Command} *)
312+313+let paper_add_cmd =
314+ let doi_arg =
315+ let doc = "The DOI to resolve." in
316+ Arg.(required & pos 0 (some string) None & info [] ~docv:"DOI" ~doc)
317+ in
318+ let slug =
319+ let doc = "Slug for the paper (e.g., 2024-venue-name)." in
320+ Arg.(required & opt (some string) None & info ["slug"] ~docv:"SLUG" ~doc)
321+ in
322+ let version =
323+ let doc = "Paper version (e.g., v1, v2). Auto-increments if not specified." in
324+ Arg.(value & opt (some string) None & info ["ver"] ~docv:"VER" ~doc)
325+ in
326+ let run () config_file data_dir doi slug version =
327+ match load_config config_file with
328+ | Error e -> Printf.eprintf "Config error: %s\n" e; 1
329+ | Ok config ->
330+ let data_dir = get_data_dir config data_dir in
331+332+ Eio_main.run @@ fun env ->
333+ let fs = Eio.Stdenv.fs env in
334+ let proc_mgr = Eio.Stdenv.process_mgr env in
335+ let entries = Bushel_eio.Bushel_loader.load fs data_dir in
336+337+ (* Determine version *)
338+ let papers_dir = Filename.concat data_dir ("data/papers/" ^ slug) in
339+ let version = match version with
340+ | Some v -> v
341+ | None ->
342+ (* Auto-increment: find highest existing version *)
343+ if Sys.file_exists papers_dir then begin
344+ let files = Sys.readdir papers_dir |> Array.to_list in
345+ let versions = List.filter_map (fun f ->
346+ if Filename.check_suffix f ".md" then
347+ Some (Filename.chop_extension f)
348+ else None
349+ ) files in
350+ let max_ver = List.fold_left (fun acc v ->
351+ try
352+ let n = Scanf.sscanf v "v%d" Fun.id in
353+ max acc n
354+ with _ -> acc
355+ ) 0 versions in
356+ Printf.sprintf "v%d" (max_ver + 1)
357+ end else "v1"
358+ in
359+360+ Printf.printf "Resolving DOI: %s\n" doi;
361+ Printf.printf "Slug: %s, Version: %s\n" slug version;
362+363+ match Bushel_sync.Zotero.resolve ~proc_mgr
364+ ~server_url:config.zotero_translation_server
365+ ~slug doi with
366+ | Error e ->
367+ Printf.eprintf "Error resolving DOI: %s\n" e;
368+ 1
369+ | Ok metadata ->
370+ Printf.printf "Title: %s\n" metadata.title;
371+ Printf.printf "Authors: %s\n" (String.concat ", " metadata.authors);
372+ Printf.printf "Year: %d\n" metadata.year;
373+374+ (* Check for existing versions and merge *)
375+ let metadata =
376+ let existing_papers = Bushel.Entry.papers entries in
377+ match Bushel.Paper.lookup existing_papers slug with
378+ | Some existing ->
379+ Printf.printf "Merging with existing paper data...\n";
380+ Bushel_sync.Zotero.merge_with_existing ~existing metadata
381+ | None -> metadata
382+ in
383+384+ (* Generate file content *)
385+ let content = Bushel_sync.Zotero.to_yaml_frontmatter ~slug ~ver:version metadata in
386+387+ (* Create directory if needed *)
388+ if not (Sys.file_exists papers_dir) then
389+ Unix.mkdir papers_dir 0o755;
390+391+ (* Write file *)
392+ let filepath = Filename.concat papers_dir (version ^ ".md") in
393+ let oc = open_out filepath in
394+ output_string oc content;
395+ close_out oc;
396+397+ Printf.printf "Created: %s\n" filepath;
398+ 0
399+ in
400+ let doc = "Add a paper from DOI, merging with existing versions." in
401+ let man = [
402+ `S Manpage.s_description;
403+ `P "Resolves a DOI using the Zotero Translation Server and creates a paper entry.";
404+ `P "If older versions of the paper exist, preserves abstract, tags, projects, \
405+ selected flag, and slides from the existing paper.";
406+ ] in
407+ let info = Cmd.info "paper" ~doc ~man in
408+ Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ doi_arg $ slug $ version)
409+410+(** {1 Video Fetch Command} *)
411+412+let video_fetch_cmd =
413+ let server =
414+ let doc = "PeerTube server name from config." in
415+ Arg.(required & opt (some string) None & info ["server"; "s"] ~docv:"NAME" ~doc)
416+ in
417+ let channel =
418+ let doc = "Channel name to fetch videos from." in
419+ Arg.(required & opt (some string) None & info ["channel"] ~docv:"CHANNEL" ~doc)
420+ in
421+ let run () config_file data_dir server channel =
422+ match load_config config_file with
423+ | Error e -> Printf.eprintf "Config error: %s\n" e; 1
424+ | Ok config ->
425+ let data_dir = get_data_dir config data_dir in
426+427+ (* Find server endpoint *)
428+ let endpoint = List.find_map (fun (s : Bushel_config.peertube_server) ->
429+ if s.name = server then Some s.endpoint else None
430+ ) config.peertube_servers in
431+432+ match endpoint with
433+ | None ->
434+ Printf.eprintf "Unknown server: %s\n" server;
435+ Printf.eprintf "Available servers:\n";
436+ List.iter (fun (s : Bushel_config.peertube_server) ->
437+ Printf.eprintf " - %s (%s)\n" s.name s.endpoint
438+ ) config.peertube_servers;
439+ 1
440+ | Some endpoint ->
441+ Eio_main.run @@ fun env ->
442+ let proc_mgr = Eio.Stdenv.process_mgr env in
443+444+ Printf.printf "Fetching videos from %s channel %s...\n" server channel;
445+446+ let videos = Bushel_sync.Peertube.fetch_all_channel_videos
447+ ~proc_mgr ~endpoint ~channel () in
448+449+ Printf.printf "Found %d videos\n" (List.length videos);
450+451+ (* Load or create videos index *)
452+ let index_path = Filename.concat data_dir "data/videos.yml" in
453+ let index = Bushel_sync.Peertube.VideoIndex.load_file index_path in
454+455+ (* Create video files and update index *)
456+ let videos_dir = Filename.concat data_dir "data/videos" in
457+ if not (Sys.file_exists videos_dir) then
458+ Unix.mkdir videos_dir 0o755;
459+460+ let new_count = ref 0 in
461+ List.iter (fun (video : Bushel_sync.Peertube.video) ->
462+ let video_path = Filename.concat videos_dir (video.uuid ^ ".md") in
463+464+ if Sys.file_exists video_path then
465+ Printf.printf " Skipping %s (exists)\n" video.uuid
466+ else begin
467+ Printf.printf " Creating %s: %s\n" video.uuid video.name;
468+469+ (* Generate markdown file *)
470+ let content = Printf.sprintf {|---
471+title: %s
472+published_date: %s
473+uuid: %s
474+url: %s
475+talk: false
476+tags: []
477+---
478+479+%s
480+|}
481+ video.name
482+ (Ptime.to_rfc3339 video.published_at)
483+ video.uuid
484+ video.url
485+ (Option.value ~default:"" video.description)
486+ in
487+488+ let oc = open_out video_path in
489+ output_string oc content;
490+ close_out oc;
491+492+ (* Update index *)
493+ Bushel_sync.Peertube.VideoIndex.add index ~uuid:video.uuid ~server;
494+ incr new_count
495+ end
496+ ) videos;
497+498+ (* Save updated index *)
499+ Bushel_sync.Peertube.VideoIndex.save_file index_path index;
500+501+ Printf.printf "\nCreated %d new video entries\n" !new_count;
502+ Printf.printf "Updated index: %s\n" index_path;
503+ 0
504+ in
505+ let doc = "Fetch videos from a PeerTube channel." in
506+ let info = Cmd.info "video" ~doc in
507+ Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ server $ channel)
508+509+(** {1 Config Command} *)
510+511+let config_cmd =
512+ let run () config_file =
513+ match load_config config_file with
514+ | Error e -> Printf.eprintf "Config error: %s\n" e; 1
515+ | Ok config ->
516+ Printf.printf "Config file: %s\n" (Bushel_config.config_file ());
517+ Printf.printf "\n";
518+ Fmt.pr "%a\n" Bushel_config.pp config;
519+ 0
520+ in
521+ let doc = "Show current configuration." in
522+ let info = Cmd.info "config" ~doc in
523+ Cmd.v info Term.(const run $ logging_t $ config_file)
524+525+(** {1 Init Command} *)
526+527+let init_cmd =
528+ let force =
529+ let doc = "Overwrite existing config file." in
530+ Arg.(value & flag & info ["force"; "f"] ~doc)
531+ in
532+ let run () force =
533+ match Bushel_config.write_default_config ~force () with
534+ | Error e ->
535+ Printf.eprintf "%s\n" e;
536+ 1
537+ | Ok path ->
538+ Printf.printf "Created config file: %s\n" path;
539+ Printf.printf "\nEdit this file to configure:\n";
540+ Printf.printf " - Remote server for image sync\n";
541+ Printf.printf " - Local data and image directories\n";
542+ Printf.printf " - Immich endpoint and API key\n";
543+ Printf.printf " - PeerTube servers\n";
544+ Printf.printf " - Typesense and OpenAI API keys\n";
545+ Printf.printf " - Zotero Translation Server URL\n";
546+ 0
547+ in
548+ let doc = "Initialize a default configuration file." in
549+ let man = [
550+ `S Manpage.s_description;
551+ `P "Creates a default config.toml file at ~/.config/bushel/config.toml";
552+ `P "The generated file includes comments explaining each option.";
553+ `P "Use --force to overwrite an existing config file.";
554+ ] in
555+ let info = Cmd.info "init" ~doc ~man in
556+ Cmd.v info Term.(const run $ logging_t $ force)
557+558+(** {1 Main Command Group} *)
559+560+let main_cmd =
561+ let doc = "Bushel knowledge base CLI" in
562+ let man = [
563+ `S Manpage.s_description;
564+ `P "Bushel is a CLI tool for managing and querying a knowledge base \
565+ containing papers, notes, projects, ideas, and videos.";
566+ `S Manpage.s_commands;
567+ `P "Use $(b,bushel COMMAND --help) for help on a specific command.";
568+ `S "CONFIGURATION";
569+ `P "Configuration is read from ~/.config/bushel/config.toml";
570+ `P "See $(b,bushel config) for current settings.";
571+ ] in
572+ let info = Cmd.info "bushel" ~version:"0.2.0" ~doc ~man in
573+ Cmd.group info [
574+ init_cmd;
575+ list_cmd;
576+ stats_cmd;
577+ show_cmd;
578+ sync_cmd;
579+ paper_add_cmd;
580+ video_fetch_cmd;
581+ config_cmd;
582+ ]
583+584+let () =
585+ match Cmd.eval_value main_cmd with
586+ | Ok (`Ok exit_code) -> exit exit_code
587+ | Ok (`Help | `Version) -> exit 0
588+ | Error _ -> exit 1
···1+(*---------------------------------------------------------------------------
2+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3+ SPDX-License-Identifier: ISC
4+ ---------------------------------------------------------------------------*)
5+6+(** Bushel - Personal knowledge base and research entry management
7+8+ Bushel is a library for managing structured research entries including
9+ notes, papers, projects, ideas, videos, and contacts. It provides typed
10+ access to markdown files with YAML frontmatter and supports link graphs,
11+ markdown processing with custom extensions, and search integration.
12+13+ {1 Entry Types}
14+15+ - {!Contact} - People/researchers with social links
16+ - {!Note} - Blog posts and research notes
17+ - {!Paper} - Academic papers with BibTeX metadata
18+ - {!Project} - Research projects
19+ - {!Idea} - Research ideas/proposals
20+ - {!Video} - Talk videos and recordings
21+22+ {1 Core Modules}
23+24+ - {!Entry} - Union type for all entry types with common operations
25+ - {!Tags} - Tag parsing and filtering
26+ - {!Md} - Markdown processing with Bushel link extensions
27+ - {!Link_graph} - Bidirectional link tracking between entries
28+29+ {1 Quick Start}
30+31+ {[
32+ (* Load entries using bushel-eio *)
33+ let entries = Bushel_loader.load fs "/path/to/data" in
34+35+ (* Look up entries by slug *)
36+ match Bushel.Entry.lookup entries "my-note" with
37+ | Some (`Note n) -> Printf.printf "Title: %s\n" (Bushel.Note.title n)
38+ | _ -> ()
39+40+ (* Get backlinks *)
41+ let backlinks = Bushel.Link_graph.get_backlinks_for_slug "my-note" in
42+ List.iter print_endline backlinks
43+ ]}
44+*)
45+46+(** {1 Entry Types} *)
47+48+module Contact = Bushel_contact
49+(** Contact/person entries. *)
50+51+module Note = Bushel_note
52+(** Blog post and research note entries. *)
53+54+module Paper = Bushel_paper
55+(** Academic paper entries with BibTeX-style metadata. *)
56+57+module Project = Bushel_project
58+(** Research project entries. *)
59+60+module Idea = Bushel_idea
61+(** Research idea/proposal entries. *)
62+63+module Video = Bushel_video
64+(** Video/talk recording entries. *)
65+66+(** {1 Core Modules} *)
67+68+module Entry = Bushel_entry
69+(** Union type for all entry types with common accessors. *)
70+71+module Tags = Bushel_tags
72+(** Tag parsing, filtering, and counting. *)
73+74+module Md = Bushel_md
75+(** Markdown processing with Bushel link extensions. *)
76+77+module Link = Bushel_link
78+(** External link tracking and merging. *)
79+80+module Link_graph = Bushel_link_graph
81+(** Bidirectional link graph for entry relationships. *)
82+83+module Description = Bushel_description
84+(** Generate descriptive text for entries. *)
85+86+(** {1 Utilities} *)
87+88+module Types = Bushel_types
89+(** Common types and Jsont codecs. *)
90+91+module Util = Bushel_util
92+(** Utility functions (word counting, text processing). *)
···1+(*---------------------------------------------------------------------------
2+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3+ SPDX-License-Identifier: ISC
4+ ---------------------------------------------------------------------------*)
5+6+(** Generate descriptive text for Bushel entries *)
7+8+(** Format a date as "Month Year" *)
9+let format_date (year, month, _day) =
10+ Printf.sprintf "%s %d" (Bushel_types.month_name month) year
11+12+(** Generate a descriptive sentence for a paper *)
13+let paper_description (p : Bushel_paper.t) ~date_str =
14+ let venue = match String.lowercase_ascii (Bushel_paper.bibtype p) with
15+ | "inproceedings" -> Bushel_paper.booktitle p
16+ | "article" -> Bushel_paper.journal p
17+ | "book" ->
18+ let pub = Bushel_paper.publisher p in
19+ if pub = "" then "Book" else "Book by " ^ pub
20+ | "techreport" ->
21+ let inst = Bushel_paper.institution p in
22+ if inst = "" then "Technical report" else "Technical report at " ^ inst
23+ | "misc" ->
24+ let pub = Bushel_paper.publisher p in
25+ if pub = "" then "Working paper" else "Working paper at " ^ pub
26+ | _ -> "Publication"
27+ in
28+ Printf.sprintf "Paper in %s (%s)" venue date_str
29+30+(** Generate a descriptive sentence for a note *)
31+let note_description (n : Bushel_note.t) ~date_str ~lookup_fn =
32+ match Bushel_note.slug_ent n with
33+ | Some slug_ent ->
34+ (match lookup_fn slug_ent with
35+ | Some related_title ->
36+ Printf.sprintf "Note about %s (%s)" related_title date_str
37+ | None -> Printf.sprintf "Research note (%s)" date_str)
38+ | None -> Printf.sprintf "Research note (%s)" date_str
39+40+(** Generate a descriptive sentence for an idea *)
41+let idea_description (i : Bushel_idea.t) ~date_str =
42+ let status_str = String.lowercase_ascii (Bushel_idea.status_to_string (Bushel_idea.status i)) in
43+ let level_str = Bushel_idea.level_to_string (Bushel_idea.level i) in
44+ Printf.sprintf "Research idea (%s, %s level, %s)" status_str level_str date_str
45+46+(** Generate a descriptive sentence for a video *)
47+let video_description (v : Bushel_video.t) ~date_str ~lookup_fn =
48+ let video_type = if Bushel_video.talk v then "Talk video" else "Video" in
49+ let context = match Bushel_video.paper v with
50+ | Some paper_slug ->
51+ (match lookup_fn paper_slug with
52+ | Some title -> Printf.sprintf " about %s" title
53+ | None -> "")
54+ | None ->
55+ (match Bushel_video.project v with
56+ | Some project_slug ->
57+ (match lookup_fn project_slug with
58+ | Some title -> Printf.sprintf " about %s" title
59+ | None -> "")
60+ | None -> "")
61+ in
62+ Printf.sprintf "%s%s (%s)" video_type context date_str
63+64+(** Generate a descriptive sentence for a project *)
65+let project_description (pr : Bushel_project.t) =
66+ let end_str = match Bushel_project.finish pr with
67+ | Some year -> string_of_int year
68+ | None -> "present"
69+ in
70+ Printf.sprintf "Project (%d–%s)" (Bushel_project.start pr) end_str
71+72+(** Generate description for any entry type *)
73+let entry_description entries entry =
74+ let lookup_fn slug =
75+ match Bushel_entry.lookup entries slug with
76+ | Some e -> Some (Bushel_entry.title e)
77+ | None -> None
78+ in
79+ let date = Bushel_entry.date entry in
80+ let date_str = format_date date in
81+ match entry with
82+ | `Paper p -> paper_description p ~date_str
83+ | `Note n -> note_description n ~date_str ~lookup_fn
84+ | `Idea i -> idea_description i ~date_str
85+ | `Video v -> video_description v ~date_str ~lookup_fn
86+ | `Project p -> project_description p
···1+(*---------------------------------------------------------------------------
2+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3+ SPDX-License-Identifier: ISC
4+ ---------------------------------------------------------------------------*)
5+6+(** Union entry type for all Bushel content *)
7+8+(** A single entry in the knowledge base. *)
9+type entry =
10+ [ `Paper of Bushel_paper.t
11+ | `Project of Bushel_project.t
12+ | `Idea of Bushel_idea.t
13+ | `Video of Bushel_video.t
14+ | `Note of Bushel_note.t
15+ ]
16+17+(** Slug-to-entry lookup table. *)
18+type slugs = (string, entry) Hashtbl.t
19+20+(** The complete entry collection. *)
21+type t
22+23+(** {1 Constructors} *)
24+25+val v :
26+ papers:Bushel_paper.t list ->
27+ notes:Bushel_note.t list ->
28+ projects:Bushel_project.t list ->
29+ ideas:Bushel_idea.t list ->
30+ videos:Bushel_video.t list ->
31+ contacts:Bushel_contact.t list ->
32+ data_dir:string ->
33+ t
34+(** Create an entry collection from lists of each entry type. *)
35+36+(** {1 Accessors} *)
37+38+val contacts : t -> Bushel_contact.ts
39+val videos : t -> Bushel_video.ts
40+val ideas : t -> Bushel_idea.ts
41+val papers : t -> Bushel_paper.ts
42+val notes : t -> Bushel_note.ts
43+val projects : t -> Bushel_project.ts
44+val old_papers : t -> Bushel_paper.ts
45+val data_dir : t -> string
46+47+(** {1 Lookup Functions} *)
48+49+val lookup : t -> string -> entry option
50+(** [lookup entries slug] finds an entry by its slug. *)
51+52+val lookup_exn : t -> string -> entry
53+(** Like {!lookup} but raises [Not_found] if the slug doesn't exist. *)
54+55+(** {1 Entry Properties} *)
56+57+val to_type_string : entry -> string
58+(** [to_type_string entry] returns the type name as a string. *)
59+60+val slug : entry -> string
61+(** [slug entry] returns the entry's slug. *)
62+63+val title : entry -> string
64+(** [title entry] returns the entry's title. *)
65+66+val body : entry -> string
67+(** [body entry] returns the entry's body content. *)
68+69+val sidebar : entry -> string option
70+(** [sidebar entry] returns the entry's sidebar content if present. *)
71+72+val synopsis : entry -> string option
73+(** [synopsis entry] returns the entry's synopsis if present. *)
74+75+val site_url : entry -> string
76+(** [site_url entry] returns the site URL path for the entry. *)
77+78+val date : entry -> int * int * int
79+(** [date entry] returns the entry's date as (year, month, day). *)
80+81+val datetime : entry -> Ptime.t
82+(** [datetime entry] returns the entry's date as a timestamp. *)
83+84+val year : entry -> int
85+(** [year entry] returns the entry's year. *)
86+87+val is_index_entry : entry -> bool
88+(** [is_index_entry entry] returns true if this is an index page. *)
89+90+(** {1 Derived Lookups} *)
91+92+val lookup_site_url : t -> string -> string
93+(** [lookup_site_url entries slug] returns the site URL for a slug. *)
94+95+val lookup_title : t -> string -> string
96+(** [lookup_title entries slug] returns the title for a slug. *)
97+98+val notes_for_slug : t -> string -> Bushel_note.t list
99+(** [notes_for_slug entries slug] returns notes that reference the given slug. *)
100+101+val all_entries : t -> entry list
102+(** [all_entries entries] returns all entries as a list. *)
103+104+val all_papers : t -> entry list
105+(** [all_papers entries] returns all papers including old versions. *)
106+107+(** {1 Comparison} *)
108+109+val compare : entry -> entry -> int
110+(** Compare entries by date, then by title. *)
111+112+(** {1 Contact Lookups} *)
113+114+val lookup_by_name : t -> string -> Bushel_contact.t option
115+(** [lookup_by_name entries name] finds a contact by name. *)
116+117+(** {1 Tag Functions} *)
118+119+val tags_of_ent : t -> entry -> Bushel_tags.t list
120+(** [tags_of_ent entries entry] returns the entry's tags. *)
121+122+val mention_entries : t -> Bushel_tags.t list -> entry list
123+(** [mention_entries entries tags] returns entries mentioned in the tags. *)
···1+(*---------------------------------------------------------------------------
2+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3+ SPDX-License-Identifier: ISC
4+ ---------------------------------------------------------------------------*)
5+6+(** Bushel Markdown extensions and utilities
7+8+ This module provides mappers to convert Bushel markdown extensions to different
9+ output formats. Bushel extends standard markdown with:
10+11+ - [:slug] - Links to bushel entries by slug
12+ - [@@handle] - Links to contacts by handle
13+ - [##tag] - Tag references
14+15+ Two main mapper modes:
16+ - Sidenote mode for the main website (with previews)
17+ - Plain HTML mode for feeds and simple output
18+*)
19+20+(** {1 Link Detection} *)
21+22+let is_bushel_slug = String.starts_with ~prefix:":"
23+let is_tag_slug link =
24+ String.starts_with ~prefix:"##" link &&
25+ not (String.starts_with ~prefix:"###" link)
26+let is_type_filter_slug = String.starts_with ~prefix:"###"
27+let is_contact_slug = String.starts_with ~prefix:"@"
28+29+let strip_handle s =
30+ if String.length s = 0 then s
31+ else if s.[0] = '@' || s.[0] = ':' then
32+ String.sub s 1 (String.length s - 1)
33+ else if String.length s > 1 && s.[0] = '#' && s.[1] = '#' then
34+ String.sub s 2 (String.length s - 2)
35+ else s
36+37+(** {1 Custom Link Resolution} *)
38+39+let authorlink = Cmarkit.Meta.key ()
40+let sluglink = Cmarkit.Meta.key ()
41+42+let make_authorlink label =
43+ let meta = Cmarkit.Meta.tag authorlink (Cmarkit.Label.meta label) in
44+ Cmarkit.Label.with_meta meta label
45+46+let make_sluglink label =
47+ let meta = Cmarkit.Meta.tag sluglink (Cmarkit.Label.meta label) in
48+ Cmarkit.Label.with_meta meta label
49+50+(** Custom label resolver for Bushel links *)
51+let with_bushel_links = function
52+ | `Def _ as ctx -> Cmarkit.Label.default_resolver ctx
53+ | `Ref (_, _, (Some _ as def)) -> def
54+ | `Ref (_, ref, None) ->
55+ let txt = Cmarkit.Label.key ref in
56+ if String.length txt = 0 then None
57+ else match txt.[0] with
58+ | '@' -> Some (make_authorlink ref)
59+ | ':' -> Some (make_sluglink ref)
60+ | '#' -> if String.length txt > 1 && txt.[1] = '#' then Some (make_sluglink ref) else None
61+ | _ -> None
62+63+(** {1 Text Extraction} *)
64+65+let text_of_inline lb =
66+ Cmarkit.Inline.to_plain_text ~break_on_soft:false lb
67+ |> fun r -> String.concat "\n" (List.map (String.concat "") r)
68+69+(** {1 Link Target Detection} *)
70+71+let link_target_is_bushel ?slugs lb =
72+ let open Cmarkit in
73+ let ref = Inline.Link.reference lb in
74+ match ref with
75+ | `Inline (ld, _) ->
76+ let dest = Link_definition.dest ld in
77+ (match dest with
78+ | Some (url, _) when is_bushel_slug url ->
79+ (match slugs with Some s -> Hashtbl.replace s url () | _ -> ());
80+ Some (url, Inline.Link.text lb |> text_of_inline)
81+ | Some (url, _) when is_tag_slug url ->
82+ Some (url, Inline.Link.text lb |> text_of_inline)
83+ | Some (url, _) when is_contact_slug url ->
84+ Some (url, Inline.Link.text lb |> text_of_inline)
85+ | _ -> None)
86+ | _ -> None
87+88+let image_target_is_bushel lb =
89+ let open Cmarkit in
90+ let ref = Inline.Link.reference lb in
91+ match ref with
92+ | `Inline (ld, _) ->
93+ let dest = Link_definition.dest ld in
94+ (match dest with
95+ | Some (url, _) when is_bushel_slug url ->
96+ let alt = Link_definition.title ld in
97+ let dir =
98+ Inline.Link.text lb
99+ |> Inline.to_plain_text ~break_on_soft:false
100+ |> fun r -> String.concat "\n" (List.map (String.concat "") r)
101+ in
102+ Some (url, alt, dir)
103+ | _ -> None)
104+ | _ -> None
105+106+(** {1 Link-Only Mapper}
107+108+ Converts Bushel links to regular HTML links without sidenotes.
109+ Used for Atom feeds, RSS, search indexing. *)
110+111+let make_link_only_mapper entries =
112+ let open Cmarkit in
113+ fun _m ->
114+ function
115+ | Inline.Link (lb, meta) ->
116+ (match link_target_is_bushel lb with
117+ | Some (url, title) ->
118+ let s = strip_handle url in
119+ let dest = Bushel_entry.lookup_site_url entries s in
120+ let link_text =
121+ if is_bushel_slug title then
122+ match Bushel_entry.lookup entries (strip_handle title) with
123+ | Some ent -> Bushel_entry.title ent
124+ | None -> title
125+ else title
126+ in
127+ let txt = Inline.Text (link_text, meta) in
128+ let ld = Link_definition.make ~dest:(dest, meta) () in
129+ let ll = `Inline (ld, meta) in
130+ let ld = Inline.Link.make txt ll in
131+ Mapper.ret (Inline.Link (ld, meta))
132+ | None ->
133+ (match Inline.Link.referenced_label lb with
134+ | Some l ->
135+ let m = Label.meta l in
136+ (match Meta.find authorlink m with
137+ | Some () ->
138+ let slug = Label.key l in
139+ let s = strip_handle slug in
140+ (match Bushel_contact.find_by_handle (Bushel_entry.contacts entries) s with
141+ | Some c ->
142+ let name = Bushel_contact.name c in
143+ (match Bushel_contact.best_url c with
144+ | Some dest ->
145+ let txt = Inline.Text (name, meta) in
146+ let ld = Link_definition.make ~dest:(dest, meta) () in
147+ let ll = `Inline (ld, meta) in
148+ let ld = Inline.Link.make txt ll in
149+ Mapper.ret (Inline.Link (ld, meta))
150+ | None ->
151+ let txt = Inline.Text (name, meta) in
152+ Mapper.ret txt)
153+ | None ->
154+ let title = Inline.Link.text lb |> text_of_inline in
155+ let txt = Inline.Text (title, meta) in
156+ Mapper.ret txt)
157+ | None ->
158+ (match Meta.find sluglink m with
159+ | Some () ->
160+ let slug = Label.key l in
161+ if is_bushel_slug slug || is_tag_slug slug || is_contact_slug slug then
162+ let s = strip_handle slug in
163+ let dest = Bushel_entry.lookup_site_url entries s in
164+ let title = Inline.Link.text lb |> text_of_inline in
165+ let link_text =
166+ let trimmed = String.trim title in
167+ if is_bushel_slug trimmed then
168+ match Bushel_entry.lookup entries (strip_handle trimmed) with
169+ | Some ent -> Bushel_entry.title ent
170+ | None -> title
171+ else title
172+ in
173+ let txt = Inline.Text (link_text, meta) in
174+ let ld = Link_definition.make ~dest:(dest, meta) () in
175+ let ll = `Inline (ld, meta) in
176+ let ld = Inline.Link.make txt ll in
177+ Mapper.ret (Inline.Link (ld, meta))
178+ else Mapper.default
179+ | None -> Mapper.default))
180+ | None -> Mapper.default))
181+ | _ -> Mapper.default
182+183+(** {1 Slug Scanning} *)
184+185+let scan_for_slugs entries md =
186+ let open Cmarkit in
187+ let slugs = Hashtbl.create 7 in
188+ let doc = Doc.of_string ~strict:false ~resolver:with_bushel_links md in
189+ let inline_mapper _m = function
190+ | Inline.Link (lb, _meta) ->
191+ (match link_target_is_bushel ~slugs lb with
192+ | Some _ -> Mapper.default
193+ | None ->
194+ (match Inline.Link.referenced_label lb with
195+ | Some l ->
196+ let m = Label.meta l in
197+ (match Meta.find sluglink m with
198+ | Some () ->
199+ let slug = Label.key l in
200+ if is_bushel_slug slug then
201+ Hashtbl.replace slugs slug ();
202+ Mapper.default
203+ | None -> Mapper.default)
204+ | None -> Mapper.default))
205+ | _ -> Mapper.default
206+ in
207+ let mapper = Mapper.make ~inline:inline_mapper () in
208+ let _ = Mapper.map_doc mapper doc in
209+ ignore entries;
210+ Hashtbl.fold (fun k () a -> k :: a) slugs []
211+212+(** {1 Link Extraction} *)
213+214+(** Extract all links from markdown text, including from images *)
215+let extract_all_links text =
216+ let open Cmarkit in
217+ let doc = Doc.of_string ~resolver:with_bushel_links text in
218+ let links = ref [] in
219+220+ let find_links_in_inline _mapper = function
221+ | Inline.Link (lb, _) | Inline.Image (lb, _) ->
222+ (match Inline.Link.reference lb with
223+ | `Inline (ld, _) ->
224+ (match Link_definition.dest ld with
225+ | Some (url, _) ->
226+ links := url :: !links;
227+ Mapper.default
228+ | None -> Mapper.default)
229+ | `Ref _ ->
230+ (match Inline.Link.referenced_label lb with
231+ | Some l ->
232+ let key = Label.key l in
233+ if String.length key > 0 && (key.[0] = ':' || key.[0] = '@' ||
234+ (String.length key > 1 && key.[0] = '#' && key.[1] = '#')) then
235+ links := key :: !links;
236+ Mapper.default
237+ | None -> Mapper.default))
238+ | _ -> Mapper.default
239+ in
240+241+ let mapper = Mapper.make ~inline:find_links_in_inline () in
242+ let _ = Mapper.map_doc mapper doc in
243+244+ let module StringSet = Set.Make(String) in
245+ StringSet.elements (StringSet.of_list !links)
246+247+(** Extract external URLs from markdown content *)
248+let extract_external_links md =
249+ let open Cmarkit in
250+ let urls = ref [] in
251+252+ let is_external_url url =
253+ if is_bushel_slug url || is_tag_slug url then false
254+ else
255+ try
256+ let uri = Uri.of_string url in
257+ match Uri.scheme uri with
258+ | Some s when s = "http" || s = "https" -> true
259+ | Some _ -> true
260+ | None -> false
261+ with _ -> false
262+ in
263+264+ let inline_mapper _ = function
265+ | Inline.Link (lb, _) | Inline.Image (lb, _) ->
266+ let ref = Inline.Link.reference lb in
267+ (match ref with
268+ | `Inline (ld, _) ->
269+ (match Link_definition.dest ld with
270+ | Some (url, _) when is_external_url url ->
271+ urls := url :: !urls;
272+ Mapper.default
273+ | _ -> Mapper.default)
274+ | `Ref (_, _, l) ->
275+ let defs = Doc.defs (Doc.of_string ~strict:false md) in
276+ (match Label.Map.find_opt (Label.key l) defs with
277+ | Some (Link_definition.Def (ld, _)) ->
278+ (match Link_definition.dest ld with
279+ | Some (url, _) when is_external_url url ->
280+ urls := url :: !urls
281+ | _ -> ())
282+ | _ -> ());
283+ Mapper.default)
284+ | Inline.Autolink (autolink, _) ->
285+ let url = Inline.Autolink.link autolink |> fst in
286+ if not (Inline.Autolink.is_email autolink) && is_external_url url then
287+ urls := url :: !urls;
288+ Mapper.default
289+ | _ -> Mapper.default
290+ in
291+292+ let mapper = Mapper.make ~inline:inline_mapper () in
293+ let doc = Doc.of_string ~strict:false md in
294+ let _ = Mapper.map_doc mapper doc in
295+ List.sort_uniq String.compare !urls
296+297+(** {1 First Image Extraction} *)
298+299+let extract_first_image md =
300+ let open Cmarkit in
301+ let doc = Doc.of_string md in
302+ let found_image = ref None in
303+304+ let find_image_in_inline _mapper = function
305+ | Inline.Image (img, _) ->
306+ (match Inline.Link.reference img with
307+ | `Inline (ld, _) ->
308+ (match Link_definition.dest ld with
309+ | Some (url, _) when !found_image = None ->
310+ found_image := Some url;
311+ Mapper.default
312+ | _ -> Mapper.default)
313+ | _ -> Mapper.default)
314+ | _ -> Mapper.default
315+ in
316+317+ let mapper = Mapper.make ~inline:find_image_in_inline () in
318+ let _ = Mapper.map_doc mapper doc in
319+ !found_image
320+321+(** {1 Plaintext Conversion} *)
322+323+(** Convert markdown text to plain text, resolving bushel links to just their text *)
324+let markdown_to_plaintext _entries text =
325+ let open Cmarkit in
326+ let doc = Doc.of_string ~resolver:with_bushel_links text in
327+328+ let rec block_to_text = function
329+ | Block.Blank_line _ -> ""
330+ | Block.Thematic_break _ -> "\n---\n"
331+ | Block.Paragraph (p, _) ->
332+ let inline = Block.Paragraph.inline p in
333+ Inline.to_plain_text ~break_on_soft:false inline
334+ |> List.map (String.concat "") |> String.concat "\n"
335+ | Block.Heading (h, _) ->
336+ let inline = Block.Heading.inline h in
337+ Inline.to_plain_text ~break_on_soft:false inline
338+ |> List.map (String.concat "") |> String.concat "\n"
339+ | Block.Block_quote (bq, _) ->
340+ let blocks = Block.Block_quote.block bq in
341+ block_to_text blocks
342+ | Block.List (l, _) ->
343+ let items = Block.List'.items l in
344+ List.map (fun (item, _) ->
345+ let blocks = Block.List_item.block item in
346+ block_to_text blocks
347+ ) items |> String.concat "\n"
348+ | Block.Code_block (cb, _) ->
349+ let code = Block.Code_block.code cb in
350+ String.concat "\n" (List.map Block_line.to_string code)
351+ | Block.Html_block _ -> ""
352+ | Block.Link_reference_definition _ -> ""
353+ | Block.Ext_footnote_definition _ -> ""
354+ | Block.Blocks (blocks, _) ->
355+ List.map block_to_text blocks |> String.concat "\n"
356+ | _ -> ""
357+ in
358+ let blocks = Doc.block doc in
359+ block_to_text blocks
360+361+(** {1 Validation} *)
362+363+(** Validation mapper that collects broken references *)
364+let make_validation_mapper entries broken_slugs broken_contacts =
365+ let open Cmarkit in
366+ fun _m ->
367+ function
368+ | Inline.Link (lb, _meta) ->
369+ (match link_target_is_bushel lb with
370+ | Some (url, _title) ->
371+ let s = strip_handle url in
372+ if is_contact_slug url then
373+ (match Bushel_contact.find_by_handle (Bushel_entry.contacts entries) s with
374+ | None -> Hashtbl.replace broken_contacts url ()
375+ | Some _ -> ())
376+ else if is_bushel_slug url then
377+ (match Bushel_entry.lookup entries s with
378+ | None -> Hashtbl.replace broken_slugs url ()
379+ | Some _ -> ());
380+ Mapper.default
381+ | None ->
382+ (match Inline.Link.referenced_label lb with
383+ | Some l ->
384+ let m = Label.meta l in
385+ (match Meta.find authorlink m with
386+ | Some () ->
387+ let slug = Label.key l in
388+ let handle = strip_handle slug in
389+ (match Bushel_contact.find_by_handle (Bushel_entry.contacts entries) handle with
390+ | None -> Hashtbl.replace broken_contacts slug ()
391+ | Some _ -> ());
392+ Mapper.default
393+ | None ->
394+ (match Meta.find sluglink m with
395+ | None -> Mapper.default
396+ | Some () ->
397+ let slug = Label.key l in
398+ if is_bushel_slug slug then begin
399+ let s = strip_handle slug in
400+ match Bushel_entry.lookup entries s with
401+ | None -> Hashtbl.replace broken_slugs slug ()
402+ | Some _ -> ()
403+ end;
404+ Mapper.default))
405+ | None -> Mapper.default))
406+ | _ -> Mapper.default
407+408+(** Validate all bushel references in markdown and return broken ones *)
409+let validate_references entries md =
410+ let open Cmarkit in
411+ let broken_slugs = Hashtbl.create 7 in
412+ let broken_contacts = Hashtbl.create 7 in
413+ let doc = Doc.of_string ~strict:false ~resolver:with_bushel_links md in
414+ let mapper = Mapper.make ~inline:(make_validation_mapper entries broken_slugs broken_contacts) () in
415+ let _ = Mapper.map_doc mapper doc in
416+ let slugs = Hashtbl.fold (fun k () a -> k :: a) broken_slugs [] in
417+ let contacts = Hashtbl.fold (fun k () a -> k :: a) broken_contacts [] in
418+ (slugs, contacts)
···1+(*---------------------------------------------------------------------------
2+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3+ SPDX-License-Identifier: ISC
4+ ---------------------------------------------------------------------------*)
5+6+(** Tag system for Bushel entries *)
7+8+type t =
9+ [ `Slug of string (** :foo points to the specific slug foo *)
10+ | `Contact of string (** \@foo points to contact foo *)
11+ | `Set of string (** #papers points to all Paper entries *)
12+ | `Text of string (** foo points to a free text "foo" *)
13+ | `Year of int (** a number between 1900--2100 is interpreted as a year *)
14+ ]
15+16+(** {1 Predicates} *)
17+18+let is_text = function `Text _ -> true | _ -> false
19+let is_slug = function `Slug _ -> true | _ -> false
20+let is_contact = function `Contact _ -> true | _ -> false
21+let is_set = function `Set _ -> true | _ -> false
22+let is_year = function `Year _ -> true | _ -> false
23+24+(** {1 Parsing} *)
25+26+let of_string s : t =
27+ if String.length s < 2 then invalid_arg ("Tag.of_string: " ^ s);
28+ match s.[0] with
29+ | ':' ->
30+ let slug = String.sub s 1 (String.length s - 1) in
31+ `Slug slug
32+ | '@' ->
33+ let handle = String.sub s 1 (String.length s - 1) in
34+ `Contact handle
35+ | '#' ->
36+ let cl = String.sub s 1 (String.length s - 1) in
37+ `Set cl
38+ | _ ->
39+ (try
40+ let x = int_of_string s in
41+ if x > 1900 && x < 2100 then `Year x else `Text s
42+ with _ -> `Text s)
43+44+let of_string_list l = List.map of_string l
45+46+(** {1 Serialization} *)
47+48+let to_string = function
49+ | `Slug t -> ":" ^ t
50+ | `Contact c -> "@" ^ c
51+ | `Set s -> "#" ^ s
52+ | `Text t -> t
53+ | `Year y -> string_of_int y
54+55+let to_raw_string = function
56+ | `Slug t -> t
57+ | `Contact c -> c
58+ | `Set s -> s
59+ | `Text t -> t
60+ | `Year y -> string_of_int y
61+62+(** {1 Pretty Printing} *)
63+64+let pp ppf t = Fmt.string ppf (to_string t)
65+66+(** {1 Tag Filtering} *)
67+68+let mentions tags =
69+ List.filter (function
70+ | `Contact _ | `Slug _ -> true
71+ | _ -> false
72+ ) tags
73+74+(** {1 Tag Counting} *)
75+76+let count_tags ?h fn vs =
77+ let h = match h with
78+ | Some h -> h
79+ | None -> Hashtbl.create 42
80+ in
81+ List.iter (fun ent ->
82+ List.iter (fun tag ->
83+ match Hashtbl.find_opt h tag with
84+ | Some num -> Hashtbl.replace h tag (num + 1)
85+ | None -> Hashtbl.add h tag 1
86+ ) (fn ent)
87+ ) vs;
88+ h
···1+(*---------------------------------------------------------------------------
2+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3+ SPDX-License-Identifier: ISC
4+ ---------------------------------------------------------------------------*)
5+6+(** Common types and Jsont codecs for Bushel *)
7+8+(** {1 Date Types} *)
9+10+type date = Ptime.date
11+(** A calendar date (year, month, day). *)
12+13+(** {1 Jsont Codecs} *)
14+15+let ptime_date_jsont : Ptime.date Jsont.t =
16+ let dec s =
17+ try
18+ match String.split_on_char '-' s with
19+ | [y; m; d] ->
20+ let year = int_of_string y in
21+ let month = int_of_string m in
22+ let day = int_of_string d in
23+ Ok (year, month, day)
24+ | _ ->
25+ Error (Printf.sprintf "Invalid date format: %s (expected YYYY-MM-DD)" s)
26+ with _ ->
27+ Error (Printf.sprintf "Invalid date: %s" s)
28+ in
29+ let enc (y, m, d) = Printf.sprintf "%04d-%02d-%02d" y m d in
30+ Jsont.of_of_string ~kind:"Ptime.date" dec ~enc
31+32+let ptime_jsont : Ptime.t Jsont.t =
33+ let dec s =
34+ (* Try RFC3339 first *)
35+ match Ptime.of_rfc3339 s with
36+ | Ok (t, _, _) -> Ok t
37+ | Error _ ->
38+ (* Try date-only format *)
39+ try
40+ match String.split_on_char '-' s with
41+ | [y; m; d] ->
42+ let year = int_of_string y in
43+ let month = int_of_string m in
44+ let day = int_of_string d in
45+ (match Ptime.of_date (year, month, day) with
46+ | Some t -> Ok t
47+ | None -> Error (Printf.sprintf "Invalid date: %s" s))
48+ | _ ->
49+ Error (Printf.sprintf "Invalid timestamp: %s" s)
50+ with _ ->
51+ Error (Printf.sprintf "Invalid timestamp: %s" s)
52+ in
53+ let enc t =
54+ let (y, m, d), ((hh, mm, ss), _) = Ptime.to_date_time t in
55+ if hh = 0 && mm = 0 && ss = 0 then
56+ Printf.sprintf "%04d-%02d-%02d" y m d
57+ else
58+ Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ" y m d hh mm ss
59+ in
60+ Jsont.of_of_string ~kind:"Ptime.t" dec ~enc
61+62+let ptime_option_jsont : Ptime.t option Jsont.t =
63+ let null = Jsont.null None in
64+ let some = Jsont.map ~dec:(fun t -> Some t) ~enc:(function Some t -> t | None -> assert false) ptime_jsont in
65+ Jsont.any ~dec_null:null ~dec_string:some ~enc:(function None -> null | Some _ -> some) ()
66+67+let string_option_jsont : string option Jsont.t =
68+ Jsont.option Jsont.string
69+70+(** {1 Helper Functions} *)
71+72+let ptime_of_date_exn date =
73+ match Ptime.of_date date with
74+ | Some t -> t
75+ | None ->
76+ let (y, m, d) = date in
77+ failwith (Printf.sprintf "Invalid date: %04d-%02d-%02d" y m d)
78+79+let date_of_ptime t = Ptime.to_date t
80+81+let compare_dates (d1 : date) (d2 : date) =
82+ let t1 = ptime_of_date_exn d1 in
83+ let t2 = ptime_of_date_exn d2 in
84+ Ptime.compare t1 t2
85+86+let format_date (y, m, d) =
87+ Printf.sprintf "%04d-%02d-%02d" y m d
88+89+let month_name = function
90+ | 1 -> "January" | 2 -> "February" | 3 -> "March" | 4 -> "April"
91+ | 5 -> "May" | 6 -> "June" | 7 -> "July" | 8 -> "August"
92+ | 9 -> "September" | 10 -> "October" | 11 -> "November" | 12 -> "December"
93+ | _ -> "Unknown"
94+95+let format_date_human (y, m, _d) =
96+ Printf.sprintf "%s %d" (month_name m) y
···1+(*---------------------------------------------------------------------------
2+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3+ SPDX-License-Identifier: ISC
4+ ---------------------------------------------------------------------------*)
5+6+(** Utility functions for Bushel *)
7+8+(** Count words in a string. *)
9+let count_words (text : string) : int =
10+ let len = String.length text in
11+ let rec count_words_helper (index : int) (in_word : bool) (count : int) : int =
12+ if index >= len then
13+ if in_word then count + 1 else count
14+ else
15+ let char = String.get text index in
16+ let is_whitespace =
17+ Char.equal char ' '
18+ || Char.equal char '\t'
19+ || Char.equal char '\n'
20+ || Char.equal char '\r'
21+ in
22+ if is_whitespace then
23+ if in_word then count_words_helper (index + 1) false (count + 1)
24+ else count_words_helper (index + 1) false count
25+ else count_words_helper (index + 1) true count
26+ in
27+ count_words_helper 0 false 0
28+29+(** Get the first paragraph/hunk from text (up to double newline). *)
30+let first_hunk s =
31+ let lines = String.split_on_char '\n' s in
32+ let rec aux acc = function
33+ | [] -> String.concat "\n" (List.rev acc)
34+ | "" :: "" :: _ -> String.concat "\n" (List.rev acc)
35+ | line :: rest -> aux (line :: acc) rest
36+ in
37+ aux [] lines
38+39+(** Get first and last hunks from text. *)
40+let first_and_last_hunks s =
41+ let lines = String.split_on_char '\n' s in
42+ let rec aux acc = function
43+ | [] -> String.concat "\n" (List.rev acc), ""
44+ | "" :: "" :: rest ->
45+ String.concat "\n" (List.rev acc), String.concat "\n" (List.rev rest)
46+ | line :: rest -> aux (line :: acc) rest
47+ in
48+ aux [] lines
49+50+(** Find all footnote definition lines in text. *)
51+let find_footnote_lines s =
52+ let lines = String.split_on_char '\n' s in
53+ let is_footnote_def line =
54+ String.length line > 3 &&
55+ line.[0] = '[' &&
56+ line.[1] = '^' &&
57+ String.contains line ':' &&
58+ let colon_pos = String.index line ':' in
59+ colon_pos > 2 && line.[colon_pos - 1] = ']'
60+ in
61+ let is_continuation line =
62+ String.length line > 0 && (line.[0] = ' ' || line.[0] = '\t')
63+ in
64+ let rec collect_footnotes acc in_footnote = function
65+ | [] -> List.rev acc
66+ | line :: rest ->
67+ if is_footnote_def line then
68+ collect_footnotes (line :: acc) true rest
69+ else if in_footnote && is_continuation line then
70+ collect_footnotes (line :: acc) true rest
71+ else
72+ collect_footnotes acc false rest
73+ in
74+ collect_footnotes [] false lines
75+76+(** Augment first hunk with footnote definitions from last hunk. *)
77+let first_hunk_with_footnotes s =
78+ let first, last = first_and_last_hunks s in
79+ let footnote_lines = find_footnote_lines last in
80+ if footnote_lines = [] then first
81+ else first ^ "\n\n" ^ String.concat "\n" footnote_lines
82+83+(** Trim leading/trailing whitespace and normalize multiple blank lines. *)
84+let normalize_body s =
85+ let trimmed = String.trim s in
86+ (* Replace 3+ consecutive newlines with exactly 2 newlines *)
87+ let re = Re.compile (Re.seq [Re.char '\n'; Re.char '\n'; Re.rep1 (Re.char '\n')]) in
88+ Re.replace_string re ~by:"\n\n" trimmed
89+90+(** Extract domain from URL. *)
91+let extract_domain url =
92+ try
93+ let uri = Uri.of_string url in
94+ match Uri.host uri with
95+ | Some host -> host
96+ | None -> "unknown"
97+ with _ -> "unknown"
98+99+(** Check if a string is a valid URL. *)
100+let is_url s =
101+ String.starts_with ~prefix:"http://" s || String.starts_with ~prefix:"https://" s
···1+(*---------------------------------------------------------------------------
2+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3+ SPDX-License-Identifier: ISC
4+ ---------------------------------------------------------------------------*)
5+6+(** Eio-based directory scanner and file loader for Bushel entries *)
7+8+let src = Logs.Src.create "bushel.loader" ~doc:"Bushel loader"
9+module Log = (val Logs.src_log src : Logs.LOG)
10+11+(** List markdown files in a directory *)
12+let list_md_files fs dir =
13+ let path = Eio.Path.(fs / dir) in
14+ try
15+ Eio.Path.read_dir path
16+ |> List.filter (fun f -> Filename.check_suffix f ".md")
17+ |> List.map (fun f -> Filename.concat dir f)
18+ with
19+ | Eio.Io (Eio.Fs.E (Eio.Fs.Not_found _), _) ->
20+ Log.warn (fun m -> m "Directory not found: %s" dir);
21+ []
22+23+(** Load and map files from a directory *)
24+let map_category fs base subdir parse_fn =
25+ let dir = Filename.concat base ("data/" ^ subdir) in
26+ Log.debug (fun m -> m "Loading %s" subdir);
27+ let files = list_md_files fs dir in
28+ List.filter_map (fun path ->
29+ match Frontmatter_eio.of_file fs path with
30+ | Ok fm ->
31+ (match parse_fn fm with
32+ | Ok entry -> Some entry
33+ | Error e ->
34+ Log.err (fun m -> m "Error parsing %s: %s" path e);
35+ None)
36+ | Error e ->
37+ Log.err (fun m -> m "Error reading %s: %s" path e);
38+ None
39+ ) files
40+41+(** Load contacts from data/contacts/ *)
42+let load_contacts fs base =
43+ map_category fs base "contacts" (fun fm ->
44+ let handle =
45+ match Frontmatter.fname fm with
46+ | Some fname -> Filename.basename fname |> Filename.chop_extension
47+ | None -> ""
48+ in
49+ Bushel.Contact.of_frontmatter ~handle fm
50+ )
51+52+(** Load projects from data/projects/ *)
53+let load_projects fs base =
54+ map_category fs base "projects" Bushel.Project.of_frontmatter
55+56+(** Load notes from data/notes/ and data/news/ *)
57+let load_notes fs base =
58+ let notes_dir = map_category fs base "notes" Bushel.Note.of_frontmatter in
59+ let news_dir = map_category fs base "news" Bushel.Note.of_frontmatter in
60+ notes_dir @ news_dir
61+62+(** Load ideas from data/ideas/ *)
63+let load_ideas fs base =
64+ map_category fs base "ideas" Bushel.Idea.of_frontmatter
65+66+(** Load videos from data/videos/ *)
67+let load_videos fs base =
68+ map_category fs base "videos" Bushel.Video.of_frontmatter
69+70+(** Load papers from data/papers/ (nested directory structure) *)
71+let load_papers fs base =
72+ let papers_dir = Filename.concat base "data/papers" in
73+ Log.debug (fun m -> m "Loading papers from %s" papers_dir);
74+ let path = Eio.Path.(fs / papers_dir) in
75+ let slug_dirs =
76+ try
77+ Eio.Path.read_dir path
78+ |> List.filter (fun slug ->
79+ try
80+ let stat = Eio.Path.stat ~follow:true Eio.Path.(fs / papers_dir / slug) in
81+ stat.kind = `Directory
82+ with _ -> false)
83+ with _ -> []
84+ in
85+ let papers = List.concat_map (fun slug ->
86+ let slug_path = Filename.concat papers_dir slug in
87+ let ver_files =
88+ try
89+ Eio.Path.(read_dir (fs / slug_path))
90+ |> List.filter (fun f -> Filename.check_suffix f ".md")
91+ with _ -> []
92+ in
93+ List.filter_map (fun ver_file ->
94+ let ver = Filename.chop_extension ver_file in
95+ let file_path = Filename.concat slug_path ver_file in
96+ match Frontmatter_eio.of_file fs file_path with
97+ | Ok fm ->
98+ (match Bushel.Paper.of_frontmatter ~slug ~ver fm with
99+ | Ok paper -> Some paper
100+ | Error e ->
101+ Log.err (fun m -> m "Error parsing paper %s/%s: %s" slug ver e);
102+ None)
103+ | Error e ->
104+ Log.err (fun m -> m "Error reading paper %s/%s: %s" slug ver e);
105+ None
106+ ) ver_files
107+ ) slug_dirs in
108+ Bushel.Paper.tv papers
109+110+(** Load all entries from a base directory *)
111+let rec load fs base =
112+ Log.info (fun m -> m "Loading bushel data from %s" base);
113+ let contacts = load_contacts fs base in
114+ Log.info (fun m -> m "Loaded %d contacts" (List.length contacts));
115+ let projects = load_projects fs base in
116+ Log.info (fun m -> m "Loaded %d projects" (List.length projects));
117+ let notes = load_notes fs base in
118+ Log.info (fun m -> m "Loaded %d notes" (List.length notes));
119+ let ideas = load_ideas fs base in
120+ Log.info (fun m -> m "Loaded %d ideas" (List.length ideas));
121+ let videos = load_videos fs base in
122+ Log.info (fun m -> m "Loaded %d videos" (List.length videos));
123+ let papers = load_papers fs base in
124+ Log.info (fun m -> m "Loaded %d papers" (List.length papers));
125+ let data_dir = Filename.concat base "data" in
126+ let entries = Bushel.Entry.v ~papers ~notes ~projects ~ideas ~videos ~contacts ~data_dir in
127+ Log.info (fun m -> m "Building link graph");
128+ let graph = build_link_graph entries in
129+ Bushel.Link_graph.set_graph graph;
130+ Log.info (fun m -> m "Load complete: %a" Bushel.Link_graph.pp graph);
131+ entries
132+133+(** Build link graph from entries *)
134+and build_link_graph entries =
135+ let graph = Bushel.Link_graph.empty () in
136+137+ let add_internal_link source target target_type =
138+ let link = { Bushel.Link_graph.source; target; target_type } in
139+ graph.internal_links <- link :: graph.internal_links;
140+ Bushel.Link_graph.add_to_set_hashtbl graph.outbound source target;
141+ Bushel.Link_graph.add_to_set_hashtbl graph.backlinks target source
142+ in
143+144+ let add_external_link source url =
145+ let domain = Bushel.Util.extract_domain url in
146+ let link = { Bushel.Link_graph.source; domain; url } in
147+ graph.external_links <- link :: graph.external_links;
148+ Bushel.Link_graph.add_to_set_hashtbl graph.external_by_entry source url;
149+ Bushel.Link_graph.add_to_set_hashtbl graph.external_by_domain domain source
150+ in
151+152+ (* Process each entry *)
153+ List.iter (fun entry ->
154+ let source_slug = Bushel.Entry.slug entry in
155+ let md_content = Bushel.Entry.body entry in
156+ let all_links = Bushel.Md.extract_all_links md_content in
157+158+ List.iter (fun link ->
159+ if Bushel.Md.is_bushel_slug link then
160+ let target_slug = Bushel.Md.strip_handle link in
161+ (match Bushel.Entry.lookup entries target_slug with
162+ | Some target_entry ->
163+ let target_type = Bushel.Link_graph.entry_type_of_entry target_entry in
164+ add_internal_link source_slug target_slug target_type
165+ | None -> ())
166+ else if Bushel.Md.is_contact_slug link then
167+ let handle = Bushel.Md.strip_handle link in
168+ (match Bushel.Contact.find_by_handle (Bushel.Entry.contacts entries) handle with
169+ | Some c ->
170+ add_internal_link source_slug (Bushel.Contact.handle c) `Contact
171+ | None -> ())
172+ else if Bushel.Md.is_tag_slug link || Bushel.Md.is_type_filter_slug link then
173+ () (* Skip tag links *)
174+ else if String.starts_with ~prefix:"http://" link ||
175+ String.starts_with ~prefix:"https://" link then
176+ add_external_link source_slug link
177+ ) all_links
178+ ) (Bushel.Entry.all_entries entries);
179+180+ (* Process slug_ent references from notes *)
181+ List.iter (fun note ->
182+ match Bushel.Note.slug_ent note with
183+ | Some target_slug ->
184+ let source_slug = Bushel.Note.slug note in
185+ (match Bushel.Entry.lookup entries target_slug with
186+ | Some target_entry ->
187+ let target_type = Bushel.Link_graph.entry_type_of_entry target_entry in
188+ add_internal_link source_slug target_slug target_type
189+ | None -> ())
190+ | None -> ()
191+ ) (Bushel.Entry.notes entries);
192+193+ (* Process project references from papers *)
194+ List.iter (fun paper ->
195+ let source_slug = Bushel.Paper.slug paper in
196+ List.iter (fun project_slug ->
197+ match Bushel.Entry.lookup entries project_slug with
198+ | Some (`Project _) ->
199+ add_internal_link source_slug project_slug `Project
200+ | _ -> ()
201+ ) (Bushel.Paper.project_slugs paper)
202+ ) (Bushel.Entry.papers entries);
203+204+ (* Deduplicate links *)
205+ let module LinkSet = Set.Make(struct
206+ type t = Bushel.Link_graph.internal_link
207+ let compare (a : t) (b : t) =
208+ match String.compare a.source b.source with
209+ | 0 -> String.compare a.target b.target
210+ | n -> n
211+ end) in
212+213+ let module ExtLinkSet = Set.Make(struct
214+ type t = Bushel.Link_graph.external_link
215+ let compare (a : t) (b : t) =
216+ match String.compare a.source b.source with
217+ | 0 -> String.compare a.url b.url
218+ | n -> n
219+ end) in
220+221+ graph.internal_links <- LinkSet.elements (LinkSet.of_list graph.internal_links);
222+ graph.external_links <- ExtLinkSet.elements (ExtLinkSet.of_list graph.external_links);
223+224+ graph
···1+(*---------------------------------------------------------------------------
2+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3+ SPDX-License-Identifier: ISC
4+ ---------------------------------------------------------------------------*)
5+6+(** Immich API client for contact face thumbnails *)
7+8+let src = Logs.Src.create "bushel.immich" ~doc:"Immich face thumbnails"
9+module Log = (val Logs.src_log src : Logs.LOG)
10+11+(** {1 Types} *)
12+13+type person = {
14+ id : string;
15+ name : string;
16+ thumbnail_path : string option;
17+}
18+19+type fetch_result =
20+ | Ok of string (** Saved to path *)
21+ | Skipped of string (** Already exists *)
22+ | NotFound of string (** No match found *)
23+ | Error of string (** Error message *)
24+25+(** {1 Jsont Codecs} *)
26+27+let person_jsont : person Jsont.t =
28+ let open Jsont in
29+ let open Object in
30+ map ~kind:"person" (fun id name thumbnail_path -> { id; name; thumbnail_path })
31+ |> mem "id" string ~enc:(fun p -> p.id)
32+ |> mem "name" string ~enc:(fun p -> p.name)
33+ |> mem "thumbnailPath" (some string) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun p -> p.thumbnail_path)
34+ |> finish
35+36+let people_jsont = Jsont.list person_jsont
37+38+let decode_people json_str =
39+ match Jsont_bytesrw.decode_string people_jsont json_str with
40+ | Ok people -> Result.Ok people
41+ | Error e -> Result.Error e
42+43+(** {1 Immich API} *)
44+45+let search_person ~proc_mgr ~endpoint ~api_key name =
46+ let encoded_name = Uri.pct_encode name in
47+ let url = Printf.sprintf "%s/api/search/person?name=%s" endpoint encoded_name in
48+ let header = "X-Api-Key: " ^ api_key in
49+50+ match Bushel_http.get_with_header ~proc_mgr ~header url with
51+ | Result.Error e -> Result.Error e
52+ | Result.Ok body -> decode_people body
53+54+let download_thumbnail ~proc_mgr ~endpoint ~api_key person_id output_path =
55+ let url = Printf.sprintf "%s/api/people/%s/thumbnail" endpoint person_id in
56+ let header = "X-Api-Key: " ^ api_key in
57+58+ match Bushel_http.get_with_header ~proc_mgr ~header url with
59+ | Result.Error e -> Result.Error e
60+ | Result.Ok body ->
61+ try
62+ (* Ensure output directory exists *)
63+ let dir = Filename.dirname output_path in
64+ if not (Sys.file_exists dir) then
65+ Unix.mkdir dir 0o755;
66+ let oc = open_out_bin output_path in
67+ output_string oc body;
68+ close_out oc;
69+ Result.Ok output_path
70+ with e ->
71+ Result.Error (Printf.sprintf "Failed to write file: %s" (Printexc.to_string e))
72+73+(** {1 Contact Face Fetching} *)
74+75+let fetch_face_for_contact ~proc_mgr ~endpoint ~api_key ~output_dir contact =
76+ let names = Bushel.Contact.names contact in
77+ let handle = Bushel.Contact.handle contact in
78+ let output_path = Filename.concat output_dir (handle ^ ".jpg") in
79+80+ (* Skip if already exists *)
81+ if Sys.file_exists output_path then begin
82+ Log.debug (fun m -> m "Skipping %s: thumbnail already exists" handle);
83+ Skipped output_path
84+ end else begin
85+ Log.info (fun m -> m "Fetching face for contact: %s" handle);
86+87+ (* Try each name until we find a match *)
88+ let rec try_names = function
89+ | [] ->
90+ Log.warn (fun m -> m "No person found for contact %s" handle);
91+ NotFound handle
92+ | name :: rest ->
93+ Log.debug (fun m -> m "Trying name: %s" name);
94+ match search_person ~proc_mgr ~endpoint ~api_key name with
95+ | Result.Error e ->
96+ Log.err (fun m -> m "Search error for %s: %s" name e);
97+ Error e
98+ | Result.Ok [] ->
99+ Log.debug (fun m -> m "No results for %s, trying next name" name);
100+ try_names rest
101+ | Result.Ok (person :: _) ->
102+ Log.info (fun m -> m "Found match for %s: %s" name person.name);
103+ match download_thumbnail ~proc_mgr ~endpoint ~api_key person.id output_path with
104+ | Result.Ok path -> Ok path
105+ | Result.Error e -> Error e
106+ in
107+ try_names names
108+ end
109+110+let fetch_all_faces ~proc_mgr ~endpoint ~api_key ~output_dir contacts =
111+ (* Ensure output directory exists *)
112+ if not (Sys.file_exists output_dir) then
113+ Unix.mkdir output_dir 0o755;
114+115+ let results = List.map (fun contact ->
116+ let handle = Bushel.Contact.handle contact in
117+ let result = fetch_face_for_contact ~proc_mgr ~endpoint ~api_key ~output_dir contact in
118+ (handle, result)
119+ ) contacts in
120+121+ (* Summary *)
122+ let ok_count = List.length (List.filter (fun (_, r) -> match r with Ok _ -> true | _ -> false) results) in
123+ let skipped_count = List.length (List.filter (fun (_, r) -> match r with Skipped _ -> true | _ -> false) results) in
124+ let not_found_count = List.length (List.filter (fun (_, r) -> match r with NotFound _ -> true | _ -> false) results) in
125+ let error_count = List.length (List.filter (fun (_, r) -> match r with Error _ -> true | _ -> false) results) in
126+127+ Log.info (fun m -> m "Face sync complete: %d ok, %d skipped, %d not found, %d errors"
128+ ok_count skipped_count not_found_count error_count);
129+130+ results
···1+(*---------------------------------------------------------------------------
2+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3+ SPDX-License-Identifier: ISC
4+ ---------------------------------------------------------------------------*)
5+6+(** PeerTube API client for video metadata and thumbnails *)
7+8+let src = Logs.Src.create "bushel.peertube" ~doc:"PeerTube video sync"
9+module Log = (val Logs.src_log src : Logs.LOG)
10+11+(** {1 Types} *)
12+13+type video = {
14+ id : int;
15+ uuid : string;
16+ name : string;
17+ description : string option;
18+ url : string;
19+ embed_path : string;
20+ published_at : Ptime.t;
21+ originally_published_at : Ptime.t option;
22+ thumbnail_path : string option;
23+ tags : string list;
24+}
25+26+type fetch_result =
27+ | Ok of string
28+ | Skipped of string
29+ | Error of string
30+31+(** {1 Date Parsing} *)
32+33+let parse_date str =
34+ match Ptime.of_rfc3339 str with
35+ | Ok (date, _, _) -> date
36+ | Error _ ->
37+ Log.warn (fun m -> m "Could not parse date: %s" str);
38+ Ptime.epoch
39+40+(** {1 Jsont Codecs} *)
41+42+let ptime_jsont =
43+ Jsont.string |> Jsont.map ~dec:parse_date ~enc:(fun t ->
44+ match Ptime.to_rfc3339 ~frac_s:0 t with
45+ | s -> s)
46+47+let make_video ~id ~uuid ~name ~description ~url ~embed_path
48+ ~published_at ~originally_published_at ~thumbnail_path ~tags =
49+ { id; uuid; name; description; url; embed_path;
50+ published_at; originally_published_at; thumbnail_path; tags }
51+52+let video_jsont : video Jsont.t =
53+ let open Jsont in
54+ let open Object in
55+ map ~kind:"video" (fun id uuid name description url embed_path
56+ published_at originally_published_at thumbnail_path tags ->
57+ make_video ~id ~uuid ~name ~description ~url ~embed_path
58+ ~published_at ~originally_published_at ~thumbnail_path ~tags)
59+ |> mem "id" int ~enc:(fun v -> v.id)
60+ |> mem "uuid" string ~enc:(fun v -> v.uuid)
61+ |> mem "name" string ~enc:(fun v -> v.name)
62+ |> mem "description" (some string) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun v -> v.description)
63+ |> mem "url" string ~enc:(fun v -> v.url)
64+ |> mem "embedPath" string ~enc:(fun v -> v.embed_path)
65+ |> mem "publishedAt" ptime_jsont ~enc:(fun v -> v.published_at)
66+ |> mem "originallyPublishedAt" (some ptime_jsont) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun v -> v.originally_published_at)
67+ |> mem "thumbnailPath" (some string) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun v -> v.thumbnail_path)
68+ |> mem "tags" (list string) ~dec_absent:[] ~enc:(fun v -> v.tags)
69+ |> finish
70+71+type channel_response = {
72+ total : int;
73+ data : video list;
74+}
75+76+let channel_response_jsont : channel_response Jsont.t =
77+ let open Jsont in
78+ let open Object in
79+ map ~kind:"channel_response" (fun total data -> { total; data })
80+ |> mem "total" int ~enc:(fun r -> r.total)
81+ |> mem "data" (list video_jsont) ~enc:(fun r -> r.data)
82+ |> finish
83+84+(** {1 JSON decoding helpers} *)
85+86+let decode_video json_str =
87+ match Jsont_bytesrw.decode_string video_jsont json_str with
88+ | Ok v -> Result.Ok v
89+ | Error e -> Result.Error e
90+91+let decode_channel_response json_str =
92+ match Jsont_bytesrw.decode_string channel_response_jsont json_str with
93+ | Ok r -> Result.Ok r
94+ | Error e -> Result.Error e
95+96+(** {1 PeerTube API} *)
97+98+let fetch_video_details ~proc_mgr ~endpoint uuid =
99+ let url = Printf.sprintf "%s/api/v1/videos/%s" endpoint uuid in
100+ match Bushel_http.get ~proc_mgr url with
101+ | Result.Error e -> Result.Error e
102+ | Result.Ok body -> decode_video body
103+104+let fetch_channel_videos ~proc_mgr ~endpoint ~channel ?(count=20) ?(start=0) () =
105+ let url = Printf.sprintf "%s/api/v1/video-channels/%s/videos?count=%d&start=%d"
106+ endpoint channel count start in
107+ match Bushel_http.get ~proc_mgr url with
108+ | Result.Error _ -> (0, [])
109+ | Result.Ok body ->
110+ match decode_channel_response body with
111+ | Result.Ok r -> (r.total, r.data)
112+ | Result.Error _ -> (0, [])
113+114+let fetch_all_channel_videos ~proc_mgr ~endpoint ~channel ?(page_size=20) () =
115+ let rec fetch_pages start acc =
116+ let (total, videos) = fetch_channel_videos ~proc_mgr ~endpoint ~channel ~count:page_size ~start () in
117+ let all = acc @ videos in
118+ let fetched = start + List.length videos in
119+ if fetched < total && List.length videos > 0 then
120+ fetch_pages fetched all
121+ else
122+ all
123+ in
124+ fetch_pages 0 []
125+126+(** {1 Thumbnail Download} *)
127+128+let thumbnail_url endpoint video =
129+ match video.thumbnail_path with
130+ | Some path -> Some (endpoint ^ path)
131+ | None -> None
132+133+let download_thumbnail ~proc_mgr ~endpoint video output_path =
134+ match thumbnail_url endpoint video with
135+ | None ->
136+ Log.warn (fun m -> m "No thumbnail for video %s" video.uuid);
137+ Error "No thumbnail available"
138+ | Some url ->
139+ match Bushel_http.get ~proc_mgr url with
140+ | Result.Error e -> Error e
141+ | Result.Ok body ->
142+ try
143+ let dir = Filename.dirname output_path in
144+ if not (Sys.file_exists dir) then Unix.mkdir dir 0o755;
145+ let oc = open_out_bin output_path in
146+ output_string oc body;
147+ close_out oc;
148+ Ok output_path
149+ with e ->
150+ Error (Printf.sprintf "Failed to write: %s" (Printexc.to_string e))
151+152+(** {1 Videos Index (YAML)} *)
153+154+module VideoIndex = struct
155+ (** Mapping of UUID -> server name *)
156+ type t = (string, string) Hashtbl.t
157+158+ let empty () = Hashtbl.create 64
159+160+ let load_file path =
161+ let index = empty () in
162+ if Sys.file_exists path then begin
163+ try
164+ let ic = open_in path in
165+ let rec read_lines () =
166+ match input_line ic with
167+ | line ->
168+ (match Astring.String.cut ~sep:":" line with
169+ | Some (uuid, server) ->
170+ Hashtbl.add index (String.trim uuid) (String.trim server)
171+ | None -> ());
172+ read_lines ()
173+ | exception End_of_file -> close_in ic
174+ in
175+ read_lines ()
176+ with _ -> ()
177+ end;
178+ index
179+180+ let save_file path index =
181+ let oc = open_out path in
182+ output_string oc "# UUID -> PeerTube server name mapping\n";
183+ Hashtbl.iter (fun uuid server ->
184+ output_string oc (Printf.sprintf "%s: %s\n" uuid server)
185+ ) index;
186+ close_out oc
187+188+ let add index ~uuid ~server =
189+ Hashtbl.replace index uuid server
190+191+ let find index uuid =
192+ Hashtbl.find_opt index uuid
193+194+ let mem index uuid =
195+ Hashtbl.mem index uuid
196+197+ let to_list index =
198+ Hashtbl.fold (fun k v acc -> (k, v) :: acc) index []
199+end
200+201+(** {1 Fetch Thumbnails from Index} *)
202+203+let fetch_thumbnails_from_index ~proc_mgr ~servers ~output_dir index =
204+ (* Ensure output dir exists *)
205+ if not (Sys.file_exists output_dir) then
206+ Unix.mkdir output_dir 0o755;
207+208+ let server_map =
209+ List.fold_left (fun acc (s : Bushel_config.peertube_server) ->
210+ (s.name, s.endpoint) :: acc
211+ ) [] servers
212+ in
213+214+ let results = List.filter_map (fun (uuid, server_name) ->
215+ let output_path = Filename.concat output_dir (uuid ^ ".jpg") in
216+217+ (* Skip if exists *)
218+ if Sys.file_exists output_path then begin
219+ Log.debug (fun m -> m "Skipping %s: thumbnail exists" uuid);
220+ Some (uuid, Skipped output_path)
221+ end else begin
222+ match List.assoc_opt server_name server_map with
223+ | None ->
224+ Log.warn (fun m -> m "Unknown server %s for video %s" server_name uuid);
225+ Some (uuid, Error (Printf.sprintf "Unknown server: %s" server_name))
226+ | Some endpoint ->
227+ Log.info (fun m -> m "Fetching thumbnail for %s from %s" uuid server_name);
228+ match fetch_video_details ~proc_mgr ~endpoint uuid with
229+ | Result.Error e ->
230+ Some (uuid, Error e)
231+ | Result.Ok video ->
232+ match download_thumbnail ~proc_mgr ~endpoint video output_path with
233+ | Ok path -> Some (uuid, Ok path)
234+ | Skipped path -> Some (uuid, Skipped path)
235+ | Error e -> Some (uuid, Error e)
236+ end
237+ ) (VideoIndex.to_list index) in
238+239+ let ok_count = List.length (List.filter (fun (_, r) -> match r with Ok _ -> true | _ -> false) results) in
240+ let skipped_count = List.length (List.filter (fun (_, r) -> match r with Skipped _ -> true | _ -> false) results) in
241+ let error_count = List.length (List.filter (fun (_, r) -> match r with Error _ -> true | _ -> false) results) in
242+243+ Log.info (fun m -> m "Video thumbnails: %d ok, %d skipped, %d errors"
244+ ok_count skipped_count error_count);
245+246+ results