···11+ISC License
22+33+Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>
44+55+Permission to use, copy, modify, and/or distribute this software for any
66+purpose with or without fee is hereby granted, provided that the above
77+copyright notice and this permission notice appear in all copies.
88+99+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
1010+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
1111+AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
1212+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
1313+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
1414+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
1515+PERFORMANCE OF THIS SOFTWARE.
+65
README.md
···11+# Bushel
22+33+Personal knowledge base and research entry management for OCaml.
44+55+Bushel is a library for managing structured research entries including notes,
66+papers, projects, ideas, videos, and contacts. It provides typed access to
77+markdown files with YAML frontmatter and supports link graphs, markdown
88+processing with custom extensions, and search integration.
99+1010+## Features
1111+1212+- **Entry Types**: Papers, notes, projects, ideas, videos, and contacts
1313+- **Frontmatter Parsing**: YAML metadata extraction using `frontmatter`
1414+- **Markdown Extensions**: Custom `:slug`, `@handle`, and `##tag` link syntax
1515+- **Link Graph**: Bidirectional link tracking between entries
1616+- **Typesense Integration**: Full-text search indexing
1717+- **Eio-based I/O**: Async directory loading with Eio
1818+1919+## Subpackages
2020+2121+- `bushel`: Core library with entry types and utilities
2222+- `bushel.eio`: Eio-based directory loading
2323+- `bushel.config`: XDG-compliant TOML configuration
2424+- `bushel.sync`: Sync pipeline for images and thumbnails
2525+- `bushel.typesense`: Typesense search schema definitions
2626+2727+## Installation
2828+2929+```bash
3030+opam install bushel
3131+```
3232+3333+## Usage
3434+3535+```ocaml
3636+(* Load entries using Eio *)
3737+Eio_main.run @@ fun env ->
3838+let fs = Eio.Stdenv.fs env in
3939+let entries = Bushel_loader.load fs "/path/to/data" in
4040+4141+(* Look up entries by slug *)
4242+match Bushel.Entry.lookup entries "my-note" with
4343+| Some (`Note n) -> Printf.printf "Title: %s\n" (Bushel.Note.title n)
4444+| _ -> ()
4545+4646+(* Get backlinks *)
4747+let backlinks = Bushel.Link_graph.get_backlinks_for_slug "my-note" in
4848+List.iter print_endline backlinks
4949+```
5050+5151+## CLI
5252+5353+The `bushel` binary provides commands for:
5454+5555+- `bushel list` - List all entries
5656+- `bushel show <slug>` - Show entry details
5757+- `bushel stats` - Show knowledge base statistics
5858+- `bushel sync` - Sync images and thumbnails
5959+- `bushel paper <doi>` - Add paper from DOI
6060+- `bushel config` - Show configuration
6161+- `bushel init` - Initialize configuration
6262+6363+## License
6464+6565+ISC License. See [LICENSE.md](LICENSE.md).
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Bushel CLI - knowledge base management tool *)
77+88+open Cmdliner
99+1010+(** Simple table formatting *)
1111+module Table = struct
1212+ type row = string list
1313+ type t = { headers : string list; rows : row list }
1414+1515+ let make ~headers rows = { headers; rows }
1616+1717+ let column_widths t =
1818+ let num_cols = List.length t.headers in
1919+ let widths = Array.make num_cols 0 in
2020+ (* Headers *)
2121+ List.iteri (fun i h -> widths.(i) <- String.length h) t.headers;
2222+ (* Rows *)
2323+ List.iter (fun row ->
2424+ List.iteri (fun i cell ->
2525+ if i < num_cols then
2626+ widths.(i) <- max widths.(i) (String.length cell)
2727+ ) row
2828+ ) t.rows;
2929+ Array.to_list widths
3030+3131+ let pad s width =
3232+ let len = String.length s in
3333+ if len >= width then s
3434+ else s ^ String.make (width - len) ' '
3535+3636+ let print t =
3737+ let widths = column_widths t in
3838+ let print_row row =
3939+ List.iter2 (fun cell width ->
4040+ Printf.printf "%s " (pad cell width)
4141+ ) row widths;
4242+ print_newline ()
4343+ in
4444+ (* Print header *)
4545+ print_row t.headers;
4646+ (* Print separator *)
4747+ List.iter (fun w -> Printf.printf "%s " (String.make w '-')) widths;
4848+ print_newline ();
4949+ (* Print rows *)
5050+ List.iter print_row t.rows
5151+end
5252+5353+(** Truncate string to max length with ellipsis *)
5454+let truncate max_len s =
5555+ if String.length s <= max_len then s
5656+ else String.sub s 0 (max_len - 3) ^ "..."
5757+5858+(** Format date tuple *)
5959+let format_date (year, month, day) =
6060+ Printf.sprintf "%04d-%02d-%02d" year month day
6161+6262+(** Entry type to string *)
6363+let type_string = function
6464+ | `Paper _ -> "paper"
6565+ | `Project _ -> "project"
6666+ | `Idea _ -> "idea"
6767+ | `Video _ -> "video"
6868+ | `Note _ -> "note"
6969+7070+(** {1 Common Options} *)
7171+7272+let data_dir =
7373+ let doc = "Path to the bushel data repository." in
7474+ let env = Cmd.Env.info "BUSHEL_DATA" in
7575+ Arg.(value & opt (some string) None & info ["d"; "data-dir"] ~env ~docv:"DIR" ~doc)
7676+7777+let config_file =
7878+ let doc = "Path to config file (default: ~/.config/bushel/config.toml)." in
7979+ Arg.(value & opt (some string) None & info ["c"; "config"] ~docv:"FILE" ~doc)
8080+8181+(** Setup logging *)
8282+let setup_log style_renderer level =
8383+ Fmt_tty.setup_std_outputs ?style_renderer ();
8484+ Logs.set_level level;
8585+ Logs.set_reporter (Logs_fmt.reporter ())
8686+8787+let logging_t =
8888+ Term.(const setup_log $ Fmt_cli.style_renderer () $ Logs_cli.level ())
8989+9090+(** Load config *)
9191+let load_config config_file =
9292+ match config_file with
9393+ | Some path -> Bushel_config.load_file path
9494+ | None -> Bushel_config.load ()
9595+9696+(** Get data directory from config or CLI *)
9797+let get_data_dir config data_dir_opt =
9898+ match data_dir_opt with
9999+ | Some d -> d
100100+ | None -> config.Bushel_config.data_dir
101101+102102+(** Load entries using Eio *)
103103+let with_entries data_dir f =
104104+ Eio_main.run @@ fun env ->
105105+ let fs = Eio.Stdenv.fs env in
106106+ let entries = Bushel_eio.Bushel_loader.load fs data_dir in
107107+ f env entries
108108+109109+(** {1 List Command} *)
110110+111111+let list_cmd =
112112+ let type_filter =
113113+ let doc = "Filter by entry type (paper, project, idea, video, note)." in
114114+ Arg.(value & opt (some string) None & info ["t"; "type"] ~docv:"TYPE" ~doc)
115115+ in
116116+ let limit =
117117+ let doc = "Maximum number of entries to show." in
118118+ Arg.(value & opt (some int) None & info ["n"; "limit"] ~docv:"N" ~doc)
119119+ in
120120+ let sort_by =
121121+ let doc = "Sort by field (date, title, type). Default: date." in
122122+ Arg.(value & opt string "date" & info ["s"; "sort"] ~docv:"FIELD" ~doc)
123123+ in
124124+ let run () config_file data_dir type_filter limit sort_by =
125125+ match load_config config_file with
126126+ | Error e -> Printf.eprintf "Config error: %s\n" e; 1
127127+ | Ok config ->
128128+ let data_dir = get_data_dir config data_dir in
129129+ with_entries data_dir @@ fun _env entries ->
130130+ let all = Bushel.Entry.all_entries entries in
131131+ (* Filter by type *)
132132+ let filtered = match type_filter with
133133+ | None -> all
134134+ | Some t ->
135135+ List.filter (fun e ->
136136+ String.lowercase_ascii (type_string e) = String.lowercase_ascii t
137137+ ) all
138138+ in
139139+ (* Sort *)
140140+ let sorted = match sort_by with
141141+ | "title" ->
142142+ List.sort (fun a b ->
143143+ String.compare (Bushel.Entry.title a) (Bushel.Entry.title b)
144144+ ) filtered
145145+ | "type" ->
146146+ List.sort (fun a b ->
147147+ let cmp = String.compare (type_string a) (type_string b) in
148148+ if cmp <> 0 then cmp
149149+ else Bushel.Entry.compare a b
150150+ ) filtered
151151+ | _ -> (* date, default *)
152152+ List.sort (fun a b -> Bushel.Entry.compare b a) filtered (* newest first *)
153153+ in
154154+ (* Limit *)
155155+ let limited = match limit with
156156+ | None -> sorted
157157+ | Some n -> List.filteri (fun i _ -> i < n) sorted
158158+ in
159159+ (* Build table *)
160160+ let rows = List.map (fun e ->
161161+ [ type_string e
162162+ ; Bushel.Entry.slug e
163163+ ; truncate 50 (Bushel.Entry.title e)
164164+ ; format_date (Bushel.Entry.date e)
165165+ ]
166166+ ) limited in
167167+ let table = Table.make
168168+ ~headers:["TYPE"; "SLUG"; "TITLE"; "DATE"]
169169+ rows
170170+ in
171171+ Table.print table;
172172+ Printf.printf "\nTotal: %d entries\n" (List.length limited);
173173+ 0
174174+ in
175175+ let doc = "List all entries in the knowledge base." in
176176+ let info = Cmd.info "list" ~doc in
177177+ Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ type_filter $ limit $ sort_by)
178178+179179+(** {1 Stats Command} *)
180180+181181+let stats_cmd =
182182+ let run () config_file data_dir =
183183+ match load_config config_file with
184184+ | Error e -> Printf.eprintf "Config error: %s\n" e; 1
185185+ | Ok config ->
186186+ let data_dir = get_data_dir config data_dir in
187187+ with_entries data_dir @@ fun _env entries ->
188188+ let papers = List.length (Bushel.Entry.papers entries) in
189189+ let notes = List.length (Bushel.Entry.notes entries) in
190190+ let projects = List.length (Bushel.Entry.projects entries) in
191191+ let ideas = List.length (Bushel.Entry.ideas entries) in
192192+ let videos = List.length (Bushel.Entry.videos entries) in
193193+ let contacts = List.length (Bushel.Entry.contacts entries) in
194194+ Printf.printf "Bushel Statistics\n";
195195+ Printf.printf "=================\n";
196196+ Printf.printf "Papers: %4d\n" papers;
197197+ Printf.printf "Notes: %4d\n" notes;
198198+ Printf.printf "Projects: %4d\n" projects;
199199+ Printf.printf "Ideas: %4d\n" ideas;
200200+ Printf.printf "Videos: %4d\n" videos;
201201+ Printf.printf "Contacts: %4d\n" contacts;
202202+ Printf.printf "-----------------\n";
203203+ Printf.printf "Total: %4d\n" (papers + notes + projects + ideas + videos);
204204+ 0
205205+ in
206206+ let doc = "Show statistics about the knowledge base." in
207207+ let info = Cmd.info "stats" ~doc in
208208+ Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir)
209209+210210+(** {1 Show Command} *)
211211+212212+let show_cmd =
213213+ let slug_arg =
214214+ let doc = "The slug of the entry to show." in
215215+ Arg.(required & pos 0 (some string) None & info [] ~docv:"SLUG" ~doc)
216216+ in
217217+ let run () config_file data_dir slug =
218218+ match load_config config_file with
219219+ | Error e -> Printf.eprintf "Config error: %s\n" e; 1
220220+ | Ok config ->
221221+ let data_dir = get_data_dir config data_dir in
222222+ with_entries data_dir @@ fun _env entries ->
223223+ match Bushel.Entry.lookup entries slug with
224224+ | None ->
225225+ Printf.eprintf "Entry not found: %s\n" slug;
226226+ 1
227227+ | Some entry ->
228228+ Printf.printf "Type: %s\n" (type_string entry);
229229+ Printf.printf "Slug: %s\n" (Bushel.Entry.slug entry);
230230+ Printf.printf "Title: %s\n" (Bushel.Entry.title entry);
231231+ Printf.printf "Date: %s\n" (format_date (Bushel.Entry.date entry));
232232+ Printf.printf "URL: %s\n" (Bushel.Entry.site_url entry);
233233+ (match Bushel.Entry.synopsis entry with
234234+ | Some s -> Printf.printf "Synopsis: %s\n" s
235235+ | None -> ());
236236+ Printf.printf "\n--- Body ---\n%s\n" (Bushel.Entry.body entry);
237237+ 0
238238+ in
239239+ let doc = "Show details of a specific entry." in
240240+ let info = Cmd.info "show" ~doc in
241241+ Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ slug_arg)
242242+243243+(** {1 Sync Command} *)
244244+245245+let sync_cmd =
246246+ let remote =
247247+ let doc = "Also upload to Typesense (remote sync)." in
248248+ Arg.(value & flag & info ["remote"] ~doc)
249249+ in
250250+ let only =
251251+ let doc = "Only run specific step (images, srcsetter, thumbs, faces, videos, typesense)." in
252252+ Arg.(value & opt (some string) None & info ["only"] ~docv:"STEP" ~doc)
253253+ in
254254+ let run () config_file data_dir remote only =
255255+ match load_config config_file with
256256+ | Error e -> Printf.eprintf "Config error: %s\n" e; 1
257257+ | Ok config ->
258258+ let data_dir = get_data_dir config data_dir in
259259+ (* Determine which steps to run *)
260260+ let steps = match only with
261261+ | Some step_name ->
262262+ (match Bushel_sync.step_of_string step_name with
263263+ | Some step -> [step]
264264+ | None ->
265265+ Printf.eprintf "Unknown step: %s\n" step_name;
266266+ Printf.eprintf "Valid steps: images, srcsetter, thumbs, faces, videos, typesense\n";
267267+ exit 1)
268268+ | None ->
269269+ if remote then Bushel_sync.all_steps_with_remote
270270+ else Bushel_sync.all_steps
271271+ in
272272+273273+ Eio_main.run @@ fun env ->
274274+ let fs = Eio.Stdenv.fs env in
275275+ let entries = Bushel_eio.Bushel_loader.load fs data_dir in
276276+277277+ Printf.printf "Running sync pipeline...\n";
278278+ List.iter (fun step ->
279279+ Printf.printf " - %s\n" (Bushel_sync.string_of_step step)
280280+ ) steps;
281281+ Printf.printf "\n";
282282+283283+ let results = Bushel_sync.run ~env ~config ~steps ~entries in
284284+285285+ Printf.printf "\nResults:\n";
286286+ List.iter (fun r ->
287287+ let status = if r.Bushel_sync.success then "OK" else "FAIL" in
288288+ Printf.printf " [%s] %s: %s\n"
289289+ status
290290+ (Bushel_sync.string_of_step r.step)
291291+ r.message
292292+ ) results;
293293+294294+ let failures = List.filter (fun r -> not r.Bushel_sync.success) results in
295295+ if failures = [] then 0 else 1
296296+ in
297297+ let doc = "Sync images, thumbnails, and optionally upload to Typesense." in
298298+ let man = [
299299+ `S Manpage.s_description;
300300+ `P "The sync command runs a pipeline to synchronize images and thumbnails:";
301301+ `P "1. $(b,images) - Rsync images from remote server";
302302+ `P "2. $(b,srcsetter) - Convert images to WebP srcset variants";
303303+ `P "3. $(b,thumbs) - Generate paper thumbnails from PDFs";
304304+ `P "4. $(b,faces) - Fetch contact face thumbnails from Immich";
305305+ `P "5. $(b,videos) - Fetch video thumbnails from PeerTube";
306306+ `P "6. $(b,typesense) - Upload to Typesense (with --remote)";
307307+ ] in
308308+ let info = Cmd.info "sync" ~doc ~man in
309309+ Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ remote $ only)
310310+311311+(** {1 Paper Add Command} *)
312312+313313+let paper_add_cmd =
314314+ let doi_arg =
315315+ let doc = "The DOI to resolve." in
316316+ Arg.(required & pos 0 (some string) None & info [] ~docv:"DOI" ~doc)
317317+ in
318318+ let slug =
319319+ let doc = "Slug for the paper (e.g., 2024-venue-name)." in
320320+ Arg.(required & opt (some string) None & info ["slug"] ~docv:"SLUG" ~doc)
321321+ in
322322+ let version =
323323+ let doc = "Paper version (e.g., v1, v2). Auto-increments if not specified." in
324324+ Arg.(value & opt (some string) None & info ["ver"] ~docv:"VER" ~doc)
325325+ in
326326+ let run () config_file data_dir doi slug version =
327327+ match load_config config_file with
328328+ | Error e -> Printf.eprintf "Config error: %s\n" e; 1
329329+ | Ok config ->
330330+ let data_dir = get_data_dir config data_dir in
331331+332332+ Eio_main.run @@ fun env ->
333333+ let fs = Eio.Stdenv.fs env in
334334+ let proc_mgr = Eio.Stdenv.process_mgr env in
335335+ let entries = Bushel_eio.Bushel_loader.load fs data_dir in
336336+337337+ (* Determine version *)
338338+ let papers_dir = Filename.concat data_dir ("data/papers/" ^ slug) in
339339+ let version = match version with
340340+ | Some v -> v
341341+ | None ->
342342+ (* Auto-increment: find highest existing version *)
343343+ if Sys.file_exists papers_dir then begin
344344+ let files = Sys.readdir papers_dir |> Array.to_list in
345345+ let versions = List.filter_map (fun f ->
346346+ if Filename.check_suffix f ".md" then
347347+ Some (Filename.chop_extension f)
348348+ else None
349349+ ) files in
350350+ let max_ver = List.fold_left (fun acc v ->
351351+ try
352352+ let n = Scanf.sscanf v "v%d" Fun.id in
353353+ max acc n
354354+ with _ -> acc
355355+ ) 0 versions in
356356+ Printf.sprintf "v%d" (max_ver + 1)
357357+ end else "v1"
358358+ in
359359+360360+ Printf.printf "Resolving DOI: %s\n" doi;
361361+ Printf.printf "Slug: %s, Version: %s\n" slug version;
362362+363363+ match Bushel_sync.Zotero.resolve ~proc_mgr
364364+ ~server_url:config.zotero_translation_server
365365+ ~slug doi with
366366+ | Error e ->
367367+ Printf.eprintf "Error resolving DOI: %s\n" e;
368368+ 1
369369+ | Ok metadata ->
370370+ Printf.printf "Title: %s\n" metadata.title;
371371+ Printf.printf "Authors: %s\n" (String.concat ", " metadata.authors);
372372+ Printf.printf "Year: %d\n" metadata.year;
373373+374374+ (* Check for existing versions and merge *)
375375+ let metadata =
376376+ let existing_papers = Bushel.Entry.papers entries in
377377+ match Bushel.Paper.lookup existing_papers slug with
378378+ | Some existing ->
379379+ Printf.printf "Merging with existing paper data...\n";
380380+ Bushel_sync.Zotero.merge_with_existing ~existing metadata
381381+ | None -> metadata
382382+ in
383383+384384+ (* Generate file content *)
385385+ let content = Bushel_sync.Zotero.to_yaml_frontmatter ~slug ~ver:version metadata in
386386+387387+ (* Create directory if needed *)
388388+ if not (Sys.file_exists papers_dir) then
389389+ Unix.mkdir papers_dir 0o755;
390390+391391+ (* Write file *)
392392+ let filepath = Filename.concat papers_dir (version ^ ".md") in
393393+ let oc = open_out filepath in
394394+ output_string oc content;
395395+ close_out oc;
396396+397397+ Printf.printf "Created: %s\n" filepath;
398398+ 0
399399+ in
400400+ let doc = "Add a paper from DOI, merging with existing versions." in
401401+ let man = [
402402+ `S Manpage.s_description;
403403+ `P "Resolves a DOI using the Zotero Translation Server and creates a paper entry.";
404404+ `P "If older versions of the paper exist, preserves abstract, tags, projects, \
405405+ selected flag, and slides from the existing paper.";
406406+ ] in
407407+ let info = Cmd.info "paper" ~doc ~man in
408408+ Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ doi_arg $ slug $ version)
409409+410410+(** {1 Video Fetch Command} *)
411411+412412+let video_fetch_cmd =
413413+ let server =
414414+ let doc = "PeerTube server name from config." in
415415+ Arg.(required & opt (some string) None & info ["server"; "s"] ~docv:"NAME" ~doc)
416416+ in
417417+ let channel =
418418+ let doc = "Channel name to fetch videos from." in
419419+ Arg.(required & opt (some string) None & info ["channel"] ~docv:"CHANNEL" ~doc)
420420+ in
421421+ let run () config_file data_dir server channel =
422422+ match load_config config_file with
423423+ | Error e -> Printf.eprintf "Config error: %s\n" e; 1
424424+ | Ok config ->
425425+ let data_dir = get_data_dir config data_dir in
426426+427427+ (* Find server endpoint *)
428428+ let endpoint = List.find_map (fun (s : Bushel_config.peertube_server) ->
429429+ if s.name = server then Some s.endpoint else None
430430+ ) config.peertube_servers in
431431+432432+ match endpoint with
433433+ | None ->
434434+ Printf.eprintf "Unknown server: %s\n" server;
435435+ Printf.eprintf "Available servers:\n";
436436+ List.iter (fun (s : Bushel_config.peertube_server) ->
437437+ Printf.eprintf " - %s (%s)\n" s.name s.endpoint
438438+ ) config.peertube_servers;
439439+ 1
440440+ | Some endpoint ->
441441+ Eio_main.run @@ fun env ->
442442+ let proc_mgr = Eio.Stdenv.process_mgr env in
443443+444444+ Printf.printf "Fetching videos from %s channel %s...\n" server channel;
445445+446446+ let videos = Bushel_sync.Peertube.fetch_all_channel_videos
447447+ ~proc_mgr ~endpoint ~channel () in
448448+449449+ Printf.printf "Found %d videos\n" (List.length videos);
450450+451451+ (* Load or create videos index *)
452452+ let index_path = Filename.concat data_dir "data/videos.yml" in
453453+ let index = Bushel_sync.Peertube.VideoIndex.load_file index_path in
454454+455455+ (* Create video files and update index *)
456456+ let videos_dir = Filename.concat data_dir "data/videos" in
457457+ if not (Sys.file_exists videos_dir) then
458458+ Unix.mkdir videos_dir 0o755;
459459+460460+ let new_count = ref 0 in
461461+ List.iter (fun (video : Bushel_sync.Peertube.video) ->
462462+ let video_path = Filename.concat videos_dir (video.uuid ^ ".md") in
463463+464464+ if Sys.file_exists video_path then
465465+ Printf.printf " Skipping %s (exists)\n" video.uuid
466466+ else begin
467467+ Printf.printf " Creating %s: %s\n" video.uuid video.name;
468468+469469+ (* Generate markdown file *)
470470+ let content = Printf.sprintf {|---
471471+title: %s
472472+published_date: %s
473473+uuid: %s
474474+url: %s
475475+talk: false
476476+tags: []
477477+---
478478+479479+%s
480480+|}
481481+ video.name
482482+ (Ptime.to_rfc3339 video.published_at)
483483+ video.uuid
484484+ video.url
485485+ (Option.value ~default:"" video.description)
486486+ in
487487+488488+ let oc = open_out video_path in
489489+ output_string oc content;
490490+ close_out oc;
491491+492492+ (* Update index *)
493493+ Bushel_sync.Peertube.VideoIndex.add index ~uuid:video.uuid ~server;
494494+ incr new_count
495495+ end
496496+ ) videos;
497497+498498+ (* Save updated index *)
499499+ Bushel_sync.Peertube.VideoIndex.save_file index_path index;
500500+501501+ Printf.printf "\nCreated %d new video entries\n" !new_count;
502502+ Printf.printf "Updated index: %s\n" index_path;
503503+ 0
504504+ in
505505+ let doc = "Fetch videos from a PeerTube channel." in
506506+ let info = Cmd.info "video" ~doc in
507507+ Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ server $ channel)
508508+509509+(** {1 Config Command} *)
510510+511511+let config_cmd =
512512+ let run () config_file =
513513+ match load_config config_file with
514514+ | Error e -> Printf.eprintf "Config error: %s\n" e; 1
515515+ | Ok config ->
516516+ Printf.printf "Config file: %s\n" (Bushel_config.config_file ());
517517+ Printf.printf "\n";
518518+ Fmt.pr "%a\n" Bushel_config.pp config;
519519+ 0
520520+ in
521521+ let doc = "Show current configuration." in
522522+ let info = Cmd.info "config" ~doc in
523523+ Cmd.v info Term.(const run $ logging_t $ config_file)
524524+525525+(** {1 Init Command} *)
526526+527527+let init_cmd =
528528+ let force =
529529+ let doc = "Overwrite existing config file." in
530530+ Arg.(value & flag & info ["force"; "f"] ~doc)
531531+ in
532532+ let run () force =
533533+ match Bushel_config.write_default_config ~force () with
534534+ | Error e ->
535535+ Printf.eprintf "%s\n" e;
536536+ 1
537537+ | Ok path ->
538538+ Printf.printf "Created config file: %s\n" path;
539539+ Printf.printf "\nEdit this file to configure:\n";
540540+ Printf.printf " - Remote server for image sync\n";
541541+ Printf.printf " - Local data and image directories\n";
542542+ Printf.printf " - Immich endpoint and API key\n";
543543+ Printf.printf " - PeerTube servers\n";
544544+ Printf.printf " - Typesense and OpenAI API keys\n";
545545+ Printf.printf " - Zotero Translation Server URL\n";
546546+ 0
547547+ in
548548+ let doc = "Initialize a default configuration file." in
549549+ let man = [
550550+ `S Manpage.s_description;
551551+ `P "Creates a default config.toml file at ~/.config/bushel/config.toml";
552552+ `P "The generated file includes comments explaining each option.";
553553+ `P "Use --force to overwrite an existing config file.";
554554+ ] in
555555+ let info = Cmd.info "init" ~doc ~man in
556556+ Cmd.v info Term.(const run $ logging_t $ force)
557557+558558+(** {1 Main Command Group} *)
559559+560560+let main_cmd =
561561+ let doc = "Bushel knowledge base CLI" in
562562+ let man = [
563563+ `S Manpage.s_description;
564564+ `P "Bushel is a CLI tool for managing and querying a knowledge base \
565565+ containing papers, notes, projects, ideas, and videos.";
566566+ `S Manpage.s_commands;
567567+ `P "Use $(b,bushel COMMAND --help) for help on a specific command.";
568568+ `S "CONFIGURATION";
569569+ `P "Configuration is read from ~/.config/bushel/config.toml";
570570+ `P "See $(b,bushel config) for current settings.";
571571+ ] in
572572+ let info = Cmd.info "bushel" ~version:"0.2.0" ~doc ~man in
573573+ Cmd.group info [
574574+ init_cmd;
575575+ list_cmd;
576576+ stats_cmd;
577577+ show_cmd;
578578+ sync_cmd;
579579+ paper_add_cmd;
580580+ video_fetch_cmd;
581581+ config_cmd;
582582+ ]
583583+584584+let () =
585585+ match Cmd.eval_value main_cmd with
586586+ | Ok (`Ok exit_code) -> exit exit_code
587587+ | Ok (`Help | `Version) -> exit 0
588588+ | Error _ -> exit 1
+56
bushel.opam
···11+# This file is generated by dune, edit dune-project instead
22+opam-version: "2.0"
33+synopsis: "Personal knowledge base and research entry management"
44+description: """
55+Bushel is a library for managing structured research entries including
66+ notes, papers, projects, ideas, videos, and contacts. It provides typed
77+ access to markdown files with YAML frontmatter and supports link graphs,
88+ markdown processing with custom extensions, and search integration.
99+ Right now this is a fairly specific workflow used by Anil Madhavapeddy,
1010+ but it may generalise in the future.
1111+1212+ Subpackages:
1313+ - bushel.eio: Eio-based directory loading
1414+ - bushel.config: XDG-compliant TOML configuration
1515+ - bushel.sync: Sync pipeline for images and thumbnails
1616+ - bushel.typesense: Typesense search integration"""
1717+maintainer: ["Anil Madhavapeddy <anil@recoil.org>"]
1818+authors: ["Anil Madhavapeddy <anil@recoil.org>"]
1919+license: "ISC"
2020+depends: [
2121+ "dune" {>= "3.18"}
2222+ "ocaml" {>= "5.2"}
2323+ "frontmatter" {>= "0.1"}
2424+ "frontmatter-eio" {>= "0.1"}
2525+ "cmarkit" {>= "0.3"}
2626+ "jsont" {>= "0.1"}
2727+ "bytesrw"
2828+ "ptime" {>= "1.2"}
2929+ "re" {>= "1.11"}
3030+ "uri" {>= "4.4"}
3131+ "fmt" {>= "0.9"}
3232+ "eio" {>= "1.2"}
3333+ "tomlt" {>= "0.1"}
3434+ "typesense" {>= "0.1"}
3535+ "astring" {>= "0.8"}
3636+ "logs" {>= "0.7"}
3737+ "yamlrw"
3838+ "cmdliner"
3939+ "eio_main"
4040+ "odoc" {with-doc}
4141+]
4242+build: [
4343+ ["dune" "subst"] {dev}
4444+ [
4545+ "dune"
4646+ "build"
4747+ "-p"
4848+ name
4949+ "-j"
5050+ jobs
5151+ "@install"
5252+ "@runtest" {with-test}
5353+ "@doc" {with-doc}
5454+ ]
5555+]
5656+x-maintenance-intent: ["(latest)"]
+46
dune-project
···11+(lang dune 3.18)
22+(name bushel)
33+44+(generate_opam_files true)
55+(maintenance_intent "(latest)")
66+77+(license ISC)
88+(authors "Anil Madhavapeddy <anil@recoil.org>")
99+(maintainers "Anil Madhavapeddy <anil@recoil.org>")
1010+1111+(package
1212+ (name bushel)
1313+ (synopsis "Personal knowledge base and research entry management")
1414+ (description
1515+ "Bushel is a library for managing structured research entries including
1616+ notes, papers, projects, ideas, videos, and contacts. It provides typed
1717+ access to markdown files with YAML frontmatter and supports link graphs,
1818+ markdown processing with custom extensions, and search integration.
1919+ Right now this is a fairly specific workflow used by Anil Madhavapeddy,
2020+ but it may generalise in the future.
2121+2222+ Subpackages:
2323+ - bushel.eio: Eio-based directory loading
2424+ - bushel.config: XDG-compliant TOML configuration
2525+ - bushel.sync: Sync pipeline for images and thumbnails
2626+ - bushel.typesense: Typesense search integration")
2727+ (depends
2828+ (ocaml (>= 5.2))
2929+ (frontmatter (>= 0.1))
3030+ (frontmatter-eio (>= 0.1))
3131+ (cmarkit (>= 0.3))
3232+ (jsont (>= 0.1))
3333+ bytesrw
3434+ (ptime (>= 1.2))
3535+ (re (>= 1.11))
3636+ (uri (>= 4.4))
3737+ (fmt (>= 0.9))
3838+ (eio (>= 1.2))
3939+ (tomlt (>= 0.1))
4040+ (typesense (>= 0.1))
4141+ (astring (>= 0.8))
4242+ (logs (>= 0.7))
4343+ yamlrw
4444+ cmdliner
4545+ eio_main
4646+ (odoc :with-doc)))
+92
lib/bushel.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Bushel - Personal knowledge base and research entry management
77+88+ Bushel is a library for managing structured research entries including
99+ notes, papers, projects, ideas, videos, and contacts. It provides typed
1010+ access to markdown files with YAML frontmatter and supports link graphs,
1111+ markdown processing with custom extensions, and search integration.
1212+1313+ {1 Entry Types}
1414+1515+ - {!Contact} - People/researchers with social links
1616+ - {!Note} - Blog posts and research notes
1717+ - {!Paper} - Academic papers with BibTeX metadata
1818+ - {!Project} - Research projects
1919+ - {!Idea} - Research ideas/proposals
2020+ - {!Video} - Talk videos and recordings
2121+2222+ {1 Core Modules}
2323+2424+ - {!Entry} - Union type for all entry types with common operations
2525+ - {!Tags} - Tag parsing and filtering
2626+ - {!Md} - Markdown processing with Bushel link extensions
2727+ - {!Link_graph} - Bidirectional link tracking between entries
2828+2929+ {1 Quick Start}
3030+3131+ {[
3232+ (* Load entries using bushel-eio *)
3333+ let entries = Bushel_loader.load fs "/path/to/data" in
3434+3535+ (* Look up entries by slug *)
3636+ match Bushel.Entry.lookup entries "my-note" with
3737+ | Some (`Note n) -> Printf.printf "Title: %s\n" (Bushel.Note.title n)
3838+ | _ -> ()
3939+4040+ (* Get backlinks *)
4141+ let backlinks = Bushel.Link_graph.get_backlinks_for_slug "my-note" in
4242+ List.iter print_endline backlinks
4343+ ]}
4444+*)
4545+4646+(** {1 Entry Types} *)
4747+4848+module Contact = Bushel_contact
4949+(** Contact/person entries. *)
5050+5151+module Note = Bushel_note
5252+(** Blog post and research note entries. *)
5353+5454+module Paper = Bushel_paper
5555+(** Academic paper entries with BibTeX-style metadata. *)
5656+5757+module Project = Bushel_project
5858+(** Research project entries. *)
5959+6060+module Idea = Bushel_idea
6161+(** Research idea/proposal entries. *)
6262+6363+module Video = Bushel_video
6464+(** Video/talk recording entries. *)
6565+6666+(** {1 Core Modules} *)
6767+6868+module Entry = Bushel_entry
6969+(** Union type for all entry types with common accessors. *)
7070+7171+module Tags = Bushel_tags
7272+(** Tag parsing, filtering, and counting. *)
7373+7474+module Md = Bushel_md
7575+(** Markdown processing with Bushel link extensions. *)
7676+7777+module Link = Bushel_link
7878+(** External link tracking and merging. *)
7979+8080+module Link_graph = Bushel_link_graph
8181+(** Bidirectional link graph for entry relationships. *)
8282+8383+module Description = Bushel_description
8484+(** Generate descriptive text for entries. *)
8585+8686+(** {1 Utilities} *)
8787+8888+module Types = Bushel_types
8989+(** Common types and Jsont codecs. *)
9090+9191+module Util = Bushel_util
9292+(** Utility functions (word counting, text processing). *)
+154
lib/bushel_contact.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Contact/person entry type for Bushel *)
77+88+type t = {
99+ names : string list;
1010+ handle : string;
1111+ email : string option;
1212+ icon : string option;
1313+ github : string option;
1414+ twitter : string option;
1515+ bluesky : string option;
1616+ mastodon : string option;
1717+ orcid : string option;
1818+ url : string option;
1919+ atom : string list option;
2020+}
2121+2222+type ts = t list
2323+2424+(** {1 Constructors} *)
2525+2626+let v ?email ?github ?twitter ?bluesky ?mastodon ?orcid ?icon ?url ?atom handle names =
2727+ { names; handle; email; github; twitter; bluesky; mastodon; orcid; url; icon; atom }
2828+2929+let make names email icon github twitter bluesky mastodon orcid url atom =
3030+ v ?email ?github ?twitter ?bluesky ?mastodon ?orcid ?icon ?url ?atom "" names
3131+3232+(** {1 Accessors} *)
3333+3434+let names { names; _ } = names
3535+3636+let name c =
3737+ match c.names with
3838+ | n :: _ -> n
3939+ | [] -> failwith (Printf.sprintf "Contact with handle '%s' has empty names list" c.handle)
4040+4141+let handle { handle; _ } = handle
4242+let email { email; _ } = email
4343+let icon { icon; _ } = icon
4444+let github { github; _ } = github
4545+let twitter { twitter; _ } = twitter
4646+let bluesky { bluesky; _ } = bluesky
4747+let mastodon { mastodon; _ } = mastodon
4848+let orcid { orcid; _ } = orcid
4949+let url { url; _ } = url
5050+let atom { atom; _ } = atom
5151+5252+(** {1 Jsont Codec} *)
5353+5454+let jsont : t Jsont.t =
5555+ let open Jsont in
5656+ let open Jsont.Object in
5757+ let mem_opt f v ~enc = mem f v ~dec_absent:None ~enc_omit:Option.is_none ~enc in
5858+ map ~kind:"Contact" make
5959+ |> mem "names" (list string) ~dec_absent:[] ~enc:names
6060+ |> mem_opt "email" (some string) ~enc:email
6161+ |> mem_opt "icon" (some string) ~enc:icon
6262+ |> mem_opt "github" (some string) ~enc:github
6363+ |> mem_opt "twitter" (some string) ~enc:twitter
6464+ |> mem_opt "bluesky" (some string) ~enc:bluesky
6565+ |> mem_opt "mastodon" (some string) ~enc:mastodon
6666+ |> mem_opt "orcid" (some string) ~enc:orcid
6767+ |> mem_opt "url" (some string) ~enc:url
6868+ |> mem_opt "atom" (some (list string)) ~enc:atom
6969+ |> finish
7070+7171+(** {1 Parsing} *)
7272+7373+let of_frontmatter ~handle (fm : Frontmatter.t) : (t, string) result =
7474+ match Frontmatter.decode jsont fm with
7575+ | Ok c -> Ok { c with handle }
7676+ | Error e -> Error e
7777+7878+(** {1 Lookup Functions} *)
7979+8080+let compare a b = String.compare a.handle b.handle
8181+8282+let find_by_handle ts h = List.find_opt (fun { handle; _ } -> handle = h) ts
8383+8484+let best_url c =
8585+ match c.url with
8686+ | Some _ as url -> url
8787+ | None ->
8888+ match c.github with
8989+ | Some g -> Some ("https://github.com/" ^ g)
9090+ | None -> Option.map (fun e -> "mailto:" ^ e) c.email
9191+9292+(** Given a name, turn it lowercase and return the concatenation of the
9393+ initials of all the words in the name and the full last name. *)
9494+let handle_of_name name =
9595+ let name = String.lowercase_ascii name in
9696+ let words = String.split_on_char ' ' name in
9797+ let initials = String.concat "" (List.map (fun w -> String.sub w 0 1) words) in
9898+ initials ^ List.hd (List.rev words)
9999+100100+(** Fuzzy lookup for an author by name. *)
101101+let lookup_by_name ts a =
102102+ let a = String.lowercase_ascii a in
103103+ let rec aux acc = function
104104+ | [] -> acc
105105+ | t :: ts ->
106106+ if List.exists (fun n -> String.lowercase_ascii n = a) t.names
107107+ then aux (t :: acc) ts
108108+ else aux acc ts
109109+ in
110110+ match aux [] ts with
111111+ | [ a ] -> a
112112+ | [] -> raise (Failure ("Contact not found: " ^ a))
113113+ | _ -> raise (Failure ("Ambiguous contact: " ^ a))
114114+115115+(** {1 Pretty Printing} *)
116116+117117+let pp ppf c =
118118+ let open Fmt in
119119+ pf ppf "@[<v>";
120120+ pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Contact";
121121+ pf ppf "%a: @%a@," (styled `Bold string) "Handle" string (handle c);
122122+ pf ppf "%a: %a@," (styled `Bold string) "Name" string (name c);
123123+ let ns = names c in
124124+ if List.length ns > 1 then
125125+ pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Aliases" (list ~sep:comma string) (List.tl ns);
126126+ (match email c with
127127+ | Some e -> pf ppf "%a: %a@," (styled `Bold string) "Email" string e
128128+ | None -> ());
129129+ (match github c with
130130+ | Some g -> pf ppf "%a: https://github.com/%a@," (styled `Bold string) "GitHub" string g
131131+ | None -> ());
132132+ (match twitter c with
133133+ | Some t -> pf ppf "%a: https://twitter.com/%a@," (styled `Bold string) "Twitter" string t
134134+ | None -> ());
135135+ (match bluesky c with
136136+ | Some b -> pf ppf "%a: %a@," (styled `Bold string) "Bluesky" string b
137137+ | None -> ());
138138+ (match mastodon c with
139139+ | Some m -> pf ppf "%a: %a@," (styled `Bold string) "Mastodon" string m
140140+ | None -> ());
141141+ (match orcid c with
142142+ | Some o -> pf ppf "%a: https://orcid.org/%a@," (styled `Bold string) "ORCID" string o
143143+ | None -> ());
144144+ (match url c with
145145+ | Some u -> pf ppf "%a: %a@," (styled `Bold string) "URL" string u
146146+ | None -> ());
147147+ (match icon c with
148148+ | Some i -> pf ppf "%a: %a@," (styled `Bold string) "Icon" string i
149149+ | None -> ());
150150+ (match atom c with
151151+ | Some atoms when atoms <> [] ->
152152+ pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Atom Feeds" (list ~sep:comma string) atoms
153153+ | _ -> ());
154154+ pf ppf "@]"
+86
lib/bushel_description.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Generate descriptive text for Bushel entries *)
77+88+(** Format a date as "Month Year" *)
99+let format_date (year, month, _day) =
1010+ Printf.sprintf "%s %d" (Bushel_types.month_name month) year
1111+1212+(** Generate a descriptive sentence for a paper *)
1313+let paper_description (p : Bushel_paper.t) ~date_str =
1414+ let venue = match String.lowercase_ascii (Bushel_paper.bibtype p) with
1515+ | "inproceedings" -> Bushel_paper.booktitle p
1616+ | "article" -> Bushel_paper.journal p
1717+ | "book" ->
1818+ let pub = Bushel_paper.publisher p in
1919+ if pub = "" then "Book" else "Book by " ^ pub
2020+ | "techreport" ->
2121+ let inst = Bushel_paper.institution p in
2222+ if inst = "" then "Technical report" else "Technical report at " ^ inst
2323+ | "misc" ->
2424+ let pub = Bushel_paper.publisher p in
2525+ if pub = "" then "Working paper" else "Working paper at " ^ pub
2626+ | _ -> "Publication"
2727+ in
2828+ Printf.sprintf "Paper in %s (%s)" venue date_str
2929+3030+(** Generate a descriptive sentence for a note *)
3131+let note_description (n : Bushel_note.t) ~date_str ~lookup_fn =
3232+ match Bushel_note.slug_ent n with
3333+ | Some slug_ent ->
3434+ (match lookup_fn slug_ent with
3535+ | Some related_title ->
3636+ Printf.sprintf "Note about %s (%s)" related_title date_str
3737+ | None -> Printf.sprintf "Research note (%s)" date_str)
3838+ | None -> Printf.sprintf "Research note (%s)" date_str
3939+4040+(** Generate a descriptive sentence for an idea *)
4141+let idea_description (i : Bushel_idea.t) ~date_str =
4242+ let status_str = String.lowercase_ascii (Bushel_idea.status_to_string (Bushel_idea.status i)) in
4343+ let level_str = Bushel_idea.level_to_string (Bushel_idea.level i) in
4444+ Printf.sprintf "Research idea (%s, %s level, %s)" status_str level_str date_str
4545+4646+(** Generate a descriptive sentence for a video *)
4747+let video_description (v : Bushel_video.t) ~date_str ~lookup_fn =
4848+ let video_type = if Bushel_video.talk v then "Talk video" else "Video" in
4949+ let context = match Bushel_video.paper v with
5050+ | Some paper_slug ->
5151+ (match lookup_fn paper_slug with
5252+ | Some title -> Printf.sprintf " about %s" title
5353+ | None -> "")
5454+ | None ->
5555+ (match Bushel_video.project v with
5656+ | Some project_slug ->
5757+ (match lookup_fn project_slug with
5858+ | Some title -> Printf.sprintf " about %s" title
5959+ | None -> "")
6060+ | None -> "")
6161+ in
6262+ Printf.sprintf "%s%s (%s)" video_type context date_str
6363+6464+(** Generate a descriptive sentence for a project *)
6565+let project_description (pr : Bushel_project.t) =
6666+ let end_str = match Bushel_project.finish pr with
6767+ | Some year -> string_of_int year
6868+ | None -> "present"
6969+ in
7070+ Printf.sprintf "Project (%d–%s)" (Bushel_project.start pr) end_str
7171+7272+(** Generate description for any entry type *)
7373+let entry_description entries entry =
7474+ let lookup_fn slug =
7575+ match Bushel_entry.lookup entries slug with
7676+ | Some e -> Some (Bushel_entry.title e)
7777+ | None -> None
7878+ in
7979+ let date = Bushel_entry.date entry in
8080+ let date_str = format_date date in
8181+ match entry with
8282+ | `Paper p -> paper_description p ~date_str
8383+ | `Note n -> note_description n ~date_str ~lookup_fn
8484+ | `Idea i -> idea_description i ~date_str
8585+ | `Video v -> video_description v ~date_str ~lookup_fn
8686+ | `Project p -> project_description p
+180
lib/bushel_entry.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Union entry type for all Bushel content *)
77+88+type entry =
99+ [ `Paper of Bushel_paper.t
1010+ | `Project of Bushel_project.t
1111+ | `Idea of Bushel_idea.t
1212+ | `Video of Bushel_video.t
1313+ | `Note of Bushel_note.t
1414+ ]
1515+1616+type slugs = (string, entry) Hashtbl.t
1717+1818+type t = {
1919+ slugs : slugs;
2020+ papers : Bushel_paper.ts;
2121+ old_papers : Bushel_paper.ts;
2222+ notes : Bushel_note.ts;
2323+ projects : Bushel_project.ts;
2424+ ideas : Bushel_idea.ts;
2525+ videos : Bushel_video.ts;
2626+ contacts : Bushel_contact.ts;
2727+ data_dir : string;
2828+}
2929+3030+(** {1 Constructors} *)
3131+3232+let v ~papers ~notes ~projects ~ideas ~videos ~contacts ~data_dir =
3333+ let slugs : slugs = Hashtbl.create 42 in
3434+ let papers, old_papers = List.partition (fun p -> p.Bushel_paper.latest) papers in
3535+ List.iter (fun n -> Hashtbl.add slugs n.Bushel_note.slug (`Note n)) notes;
3636+ List.iter (fun p -> Hashtbl.add slugs p.Bushel_project.slug (`Project p)) projects;
3737+ List.iter (fun i -> Hashtbl.add slugs i.Bushel_idea.slug (`Idea i)) ideas;
3838+ List.iter (fun v -> Hashtbl.add slugs v.Bushel_video.slug (`Video v)) videos;
3939+ List.iter (fun p -> Hashtbl.add slugs p.Bushel_paper.slug (`Paper p)) papers;
4040+ { slugs; papers; old_papers; notes; projects; ideas; videos; contacts; data_dir }
4141+4242+(** {1 Accessors} *)
4343+4444+let contacts { contacts; _ } = contacts
4545+let videos { videos; _ } = videos
4646+let ideas { ideas; _ } = ideas
4747+let papers { papers; _ } = papers
4848+let notes { notes; _ } = notes
4949+let projects { projects; _ } = projects
5050+let old_papers { old_papers; _ } = old_papers
5151+let data_dir { data_dir; _ } = data_dir
5252+5353+(** {1 Lookup Functions} *)
5454+5555+let lookup { slugs; _ } slug = Hashtbl.find_opt slugs slug
5656+let lookup_exn { slugs; _ } slug = Hashtbl.find slugs slug
5757+5858+(** {1 Entry Properties} *)
5959+6060+let to_type_string = function
6161+ | `Paper _ -> "paper"
6262+ | `Note _ -> "note"
6363+ | `Project _ -> "project"
6464+ | `Idea _ -> "idea"
6565+ | `Video _ -> "video"
6666+6767+let slug = function
6868+ | `Paper p -> Bushel_paper.slug p
6969+ | `Note n -> Bushel_note.slug n
7070+ | `Project p -> Bushel_project.slug p
7171+ | `Idea i -> Bushel_idea.slug i
7272+ | `Video v -> Bushel_video.slug v
7373+7474+let title = function
7575+ | `Paper p -> Bushel_paper.title p
7676+ | `Note n -> Bushel_note.title n
7777+ | `Project p -> Bushel_project.title p
7878+ | `Idea i -> Bushel_idea.title i
7979+ | `Video v -> Bushel_video.title v
8080+8181+let body = function
8282+ | `Paper _ -> ""
8383+ | `Note n -> Bushel_note.body n
8484+ | `Project p -> Bushel_project.body p
8585+ | `Idea i -> Bushel_idea.body i
8686+ | `Video _ -> ""
8787+8888+let sidebar = function
8989+ | `Note { Bushel_note.sidebar = Some s; _ } -> Some s
9090+ | _ -> None
9191+9292+let synopsis = function
9393+ | `Note n -> Bushel_note.synopsis n
9494+ | _ -> None
9595+9696+let site_url = function
9797+ | `Paper p -> "/papers/" ^ Bushel_paper.slug p
9898+ | `Note n -> "/notes/" ^ Bushel_note.slug n
9999+ | `Project p -> "/projects/" ^ Bushel_project.slug p
100100+ | `Idea i -> "/ideas/" ^ Bushel_idea.slug i
101101+ | `Video v -> "/videos/" ^ Bushel_video.slug v
102102+103103+let date (x : entry) =
104104+ match x with
105105+ | `Paper p -> Bushel_paper.date p
106106+ | `Note n -> Bushel_note.date n
107107+ | `Project p -> (Bushel_project.start p, 1, 1)
108108+ | `Idea i -> (Bushel_idea.year i, Bushel_idea.month i, 1)
109109+ | `Video v -> Bushel_video.date v
110110+111111+let datetime v = Bushel_types.ptime_of_date_exn (date v)
112112+113113+let year x =
114114+ let (y, _, _) = date x in y
115115+116116+let is_index_entry = function
117117+ | `Note n -> n.Bushel_note.index_page
118118+ | _ -> false
119119+120120+(** {1 Derived Lookups} *)
121121+122122+let lookup_site_url t slug =
123123+ match lookup t slug with
124124+ | Some ent -> site_url ent
125125+ | None -> ""
126126+127127+let lookup_title t slug =
128128+ match lookup t slug with
129129+ | Some ent -> title ent
130130+ | None -> ""
131131+132132+let notes_for_slug { notes; _ } slug =
133133+ List.filter (fun n ->
134134+ match Bushel_note.slug_ent n with
135135+ | Some s -> s = slug
136136+ | None -> false
137137+ ) notes
138138+139139+let all_entries { slugs; _ } =
140140+ Hashtbl.fold (fun _ v acc -> v :: acc) slugs []
141141+142142+let all_papers { papers; old_papers; _ } =
143143+ List.map (fun x -> `Paper x) (papers @ old_papers)
144144+145145+(** {1 Comparison} *)
146146+147147+let compare a b =
148148+ let da = datetime a in
149149+ let db = datetime b in
150150+ if Ptime.equal da db then String.compare (title a) (title b)
151151+ else Ptime.compare da db
152152+153153+(** {1 Contact Lookups} *)
154154+155155+let lookup_by_name { contacts; _ } n =
156156+ match Bushel_contact.lookup_by_name contacts n with
157157+ | v -> Some v
158158+ | exception _ -> None
159159+160160+(** {1 Tag Functions} *)
161161+162162+let tags_of_ent _entries ent : Bushel_tags.t list =
163163+ match ent with
164164+ | `Paper p -> Bushel_tags.of_string_list @@ Bushel_paper.tags p
165165+ | `Video v -> Bushel_tags.of_string_list @@ Bushel_video.tags v
166166+ | `Project p -> Bushel_tags.of_string_list @@ Bushel_project.tags p
167167+ | `Note n -> Bushel_tags.of_string_list @@ Bushel_note.tags n
168168+ | `Idea i -> Bushel_tags.of_string_list @@ Bushel_idea.tags i
169169+170170+let mention_entries entries tags =
171171+ let lk t =
172172+ try Some (lookup_exn entries t)
173173+ with Not_found ->
174174+ Printf.eprintf "mention_entries not found: %s\n%!" t;
175175+ None
176176+ in
177177+ List.filter_map (function
178178+ | `Slug t -> lk t
179179+ | _ -> None
180180+ ) tags
+123
lib/bushel_entry.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Union entry type for all Bushel content *)
77+88+(** A single entry in the knowledge base. *)
99+type entry =
1010+ [ `Paper of Bushel_paper.t
1111+ | `Project of Bushel_project.t
1212+ | `Idea of Bushel_idea.t
1313+ | `Video of Bushel_video.t
1414+ | `Note of Bushel_note.t
1515+ ]
1616+1717+(** Slug-to-entry lookup table. *)
1818+type slugs = (string, entry) Hashtbl.t
1919+2020+(** The complete entry collection. *)
2121+type t
2222+2323+(** {1 Constructors} *)
2424+2525+val v :
2626+ papers:Bushel_paper.t list ->
2727+ notes:Bushel_note.t list ->
2828+ projects:Bushel_project.t list ->
2929+ ideas:Bushel_idea.t list ->
3030+ videos:Bushel_video.t list ->
3131+ contacts:Bushel_contact.t list ->
3232+ data_dir:string ->
3333+ t
3434+(** Create an entry collection from lists of each entry type. *)
3535+3636+(** {1 Accessors} *)
3737+3838+val contacts : t -> Bushel_contact.ts
3939+val videos : t -> Bushel_video.ts
4040+val ideas : t -> Bushel_idea.ts
4141+val papers : t -> Bushel_paper.ts
4242+val notes : t -> Bushel_note.ts
4343+val projects : t -> Bushel_project.ts
4444+val old_papers : t -> Bushel_paper.ts
4545+val data_dir : t -> string
4646+4747+(** {1 Lookup Functions} *)
4848+4949+val lookup : t -> string -> entry option
5050+(** [lookup entries slug] finds an entry by its slug. *)
5151+5252+val lookup_exn : t -> string -> entry
5353+(** Like {!lookup} but raises [Not_found] if the slug doesn't exist. *)
5454+5555+(** {1 Entry Properties} *)
5656+5757+val to_type_string : entry -> string
5858+(** [to_type_string entry] returns the type name as a string. *)
5959+6060+val slug : entry -> string
6161+(** [slug entry] returns the entry's slug. *)
6262+6363+val title : entry -> string
6464+(** [title entry] returns the entry's title. *)
6565+6666+val body : entry -> string
6767+(** [body entry] returns the entry's body content. *)
6868+6969+val sidebar : entry -> string option
7070+(** [sidebar entry] returns the entry's sidebar content if present. *)
7171+7272+val synopsis : entry -> string option
7373+(** [synopsis entry] returns the entry's synopsis if present. *)
7474+7575+val site_url : entry -> string
7676+(** [site_url entry] returns the site URL path for the entry. *)
7777+7878+val date : entry -> int * int * int
7979+(** [date entry] returns the entry's date as (year, month, day). *)
8080+8181+val datetime : entry -> Ptime.t
8282+(** [datetime entry] returns the entry's date as a timestamp. *)
8383+8484+val year : entry -> int
8585+(** [year entry] returns the entry's year. *)
8686+8787+val is_index_entry : entry -> bool
8888+(** [is_index_entry entry] returns true if this is an index page. *)
8989+9090+(** {1 Derived Lookups} *)
9191+9292+val lookup_site_url : t -> string -> string
9393+(** [lookup_site_url entries slug] returns the site URL for a slug. *)
9494+9595+val lookup_title : t -> string -> string
9696+(** [lookup_title entries slug] returns the title for a slug. *)
9797+9898+val notes_for_slug : t -> string -> Bushel_note.t list
9999+(** [notes_for_slug entries slug] returns notes that reference the given slug. *)
100100+101101+val all_entries : t -> entry list
102102+(** [all_entries entries] returns all entries as a list. *)
103103+104104+val all_papers : t -> entry list
105105+(** [all_papers entries] returns all papers including old versions. *)
106106+107107+(** {1 Comparison} *)
108108+109109+val compare : entry -> entry -> int
110110+(** Compare entries by date, then by title. *)
111111+112112+(** {1 Contact Lookups} *)
113113+114114+val lookup_by_name : t -> string -> Bushel_contact.t option
115115+(** [lookup_by_name entries name] finds a contact by name. *)
116116+117117+(** {1 Tag Functions} *)
118118+119119+val tags_of_ent : t -> entry -> Bushel_tags.t list
120120+(** [tags_of_ent entries entry] returns the entry's tags. *)
121121+122122+val mention_entries : t -> Bushel_tags.t list -> entry list
123123+(** [mention_entries entries tags] returns entries mentioned in the tags. *)
+214
lib/bushel_idea.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Idea entry type for Bushel *)
77+88+(** Academic level for research ideas *)
99+type level =
1010+ | Any
1111+ | PartII
1212+ | MPhil
1313+ | PhD
1414+ | Postdoc
1515+1616+let level_of_string = function
1717+ | "Any" | "any" -> Any
1818+ | "PartII" | "partii" -> PartII
1919+ | "MPhil" | "mphil" -> MPhil
2020+ | "PhD" | "phd" -> PhD
2121+ | "postdoc" | "Postdoc" -> Postdoc
2222+ | _ -> Any
2323+2424+let level_to_string = function
2525+ | Any -> "Any"
2626+ | PartII -> "PartII"
2727+ | MPhil -> "MPhil"
2828+ | PhD -> "PhD"
2929+ | Postdoc -> "postdoctoral"
3030+3131+let level_to_tag = function
3232+ | Any -> "idea-beginner"
3333+ | PartII -> "idea-medium"
3434+ | MPhil -> "idea-hard"
3535+ | PhD -> "idea-phd"
3636+ | Postdoc -> "idea-postdoc"
3737+3838+(** Status of research idea *)
3939+type status =
4040+ | Available
4141+ | Discussion
4242+ | Ongoing
4343+ | Completed
4444+ | Expired
4545+4646+let status_of_string = function
4747+ | "Available" | "available" -> Available
4848+ | "Discussion" | "discussion" -> Discussion
4949+ | "Ongoing" | "ongoing" -> Ongoing
5050+ | "Completed" | "completed" -> Completed
5151+ | "Expired" | "expired" -> Expired
5252+ | _ -> Available
5353+5454+let status_to_string = function
5555+ | Available -> "Available"
5656+ | Discussion -> "Discussion"
5757+ | Ongoing -> "Ongoing"
5858+ | Completed -> "Completed"
5959+ | Expired -> "Expired"
6060+6161+let status_to_tag = function
6262+ | Available -> "idea-available"
6363+ | Discussion -> "idea-discuss"
6464+ | Ongoing -> "idea-ongoing"
6565+ | Completed -> "idea-done"
6666+ | Expired -> "idea-expired"
6767+6868+type t = {
6969+ slug : string;
7070+ title : string;
7171+ level : level;
7272+ project : string;
7373+ status : status;
7474+ month : int;
7575+ year : int;
7676+ supervisors : string list;
7777+ students : string list;
7878+ reading : string;
7979+ body : string;
8080+ url : string option;
8181+ tags : string list;
8282+}
8383+8484+type ts = t list
8585+8686+(** {1 Accessors} *)
8787+8888+let slug { slug; _ } = slug
8989+let title { title; _ } = title
9090+let level { level; _ } = level
9191+let project { project; _ } = project
9292+let status { status; _ } = status
9393+let year { year; _ } = year
9494+let month { month; _ } = month
9595+let supervisors { supervisors; _ } = supervisors
9696+let students { students; _ } = students
9797+let reading { reading; _ } = reading
9898+let body { body; _ } = body
9999+let url { url; _ } = url
100100+let tags { tags; _ } = tags
101101+102102+(** {1 Comparison} *)
103103+104104+let compare a b =
105105+ match Stdlib.compare a.status b.status with
106106+ | 0 ->
107107+ (match a.status with
108108+ | Completed -> Int.compare b.year a.year
109109+ | _ ->
110110+ match Stdlib.compare a.level b.level with
111111+ | 0 ->
112112+ (match Int.compare b.year a.year with
113113+ | 0 -> Int.compare b.month a.month
114114+ | n -> n)
115115+ | n -> n)
116116+ | n -> n
117117+118118+(** {1 Lookup} *)
119119+120120+let lookup ideas slug = List.find_opt (fun i -> i.slug = slug) ideas
121121+122122+(** {1 Jsont Codec} *)
123123+124124+let level_jsont : level Jsont.t =
125125+ Jsont.of_of_string ~kind:"level"
126126+ (fun s -> Ok (level_of_string s))
127127+ ~enc:level_to_string
128128+129129+let status_jsont : status Jsont.t =
130130+ Jsont.of_of_string ~kind:"status"
131131+ (fun s -> Ok (status_of_string s))
132132+ ~enc:status_to_string
133133+134134+let jsont : t Jsont.t =
135135+ let open Jsont in
136136+ let open Jsont.Object in
137137+ let make title level project status supervisors students tags reading url =
138138+ { slug = ""; title; level; project; status;
139139+ month = 1; year = 2000; supervisors; students; reading;
140140+ body = ""; url; tags }
141141+ in
142142+ map ~kind:"Idea" make
143143+ |> mem "title" string ~enc:(fun i -> i.title)
144144+ |> mem "level" level_jsont ~enc:(fun i -> i.level)
145145+ |> mem "project" string ~enc:(fun i -> i.project)
146146+ |> mem "status" status_jsont ~enc:(fun i -> i.status)
147147+ |> mem "supervisors" (list string) ~dec_absent:[] ~enc:(fun i -> i.supervisors)
148148+ |> mem "students" (list string) ~dec_absent:[] ~enc:(fun i -> i.students)
149149+ |> mem "tags" (list string) ~dec_absent:[] ~enc:(fun i -> i.tags)
150150+ |> mem "reading" string ~dec_absent:"" ~enc:(fun i -> i.reading)
151151+ |> mem "url" Bushel_types.string_option_jsont ~dec_absent:None
152152+ ~enc_omit:Option.is_none ~enc:(fun i -> i.url)
153153+ |> finish
154154+155155+(** {1 Parsing} *)
156156+157157+let of_frontmatter (fm : Frontmatter.t) : (t, string) result =
158158+ (* Extract slug and date from filename *)
159159+ let slug, date_opt =
160160+ match Frontmatter.fname fm with
161161+ | Some fname ->
162162+ (match Frontmatter.slug_of_fname fname with
163163+ | Ok (s, d) -> (s, d)
164164+ | Error _ -> ("", None))
165165+ | None -> ("", None)
166166+ in
167167+ let year, month =
168168+ match date_opt with
169169+ | Some d -> let (y, m, _) = Ptime.to_date d in (y, m)
170170+ | None -> (2000, 1)
171171+ in
172172+ match Frontmatter.decode jsont fm with
173173+ | Error e -> Error e
174174+ | Ok i ->
175175+ Ok { i with
176176+ slug;
177177+ year;
178178+ month;
179179+ body = Frontmatter.body fm }
180180+181181+(** {1 Pretty Printing} *)
182182+183183+let pp ppf i =
184184+ let open Fmt in
185185+ pf ppf "@[<v>";
186186+ pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Idea";
187187+ pf ppf "%a: %a@," (styled `Bold string) "Slug" string i.slug;
188188+ pf ppf "%a: %a@," (styled `Bold string) "Title" string (title i);
189189+ pf ppf "%a: %a@," (styled `Bold string) "Level" string (level_to_string (level i));
190190+ pf ppf "%a: %a@," (styled `Bold string) "Status" string (status_to_string (status i));
191191+ pf ppf "%a: %a@," (styled `Bold string) "Project" string (project i);
192192+ pf ppf "%a: %04d-%02d@," (styled `Bold string) "Date" (year i) i.month;
193193+ let sups = supervisors i in
194194+ if sups <> [] then
195195+ pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Supervisors" (list ~sep:comma string) sups;
196196+ let studs = students i in
197197+ if studs <> [] then
198198+ pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Students" (list ~sep:comma string) studs;
199199+ (match i.url with
200200+ | Some url -> pf ppf "%a: %a@," (styled `Bold string) "URL" string url
201201+ | None -> ());
202202+ let t = i.tags in
203203+ if t <> [] then
204204+ pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
205205+ let r = reading i in
206206+ if r <> "" then begin
207207+ pf ppf "@,";
208208+ pf ppf "%a:@," (styled `Bold string) "Reading";
209209+ pf ppf "%a@," string r;
210210+ end;
211211+ pf ppf "@,";
212212+ pf ppf "%a:@," (styled `Bold string) "Body";
213213+ pf ppf "%a@," string (body i);
214214+ pf ppf "@]"
+235
lib/bushel_link.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** External link tracking for Bushel *)
77+88+type karakeep_data = {
99+ remote_url : string;
1010+ id : string;
1111+ tags : string list;
1212+ metadata : (string * string) list;
1313+}
1414+1515+type bushel_data = {
1616+ slugs : string list;
1717+ tags : string list;
1818+}
1919+2020+type t = {
2121+ url : string;
2222+ date : Ptime.date;
2323+ description : string;
2424+ karakeep : karakeep_data option;
2525+ bushel : bushel_data option;
2626+}
2727+2828+type ts = t list
2929+3030+(** {1 Accessors} *)
3131+3232+let url { url; _ } = url
3333+let date { date; _ } = date
3434+let description { description; _ } = description
3535+let datetime v = Bushel_types.ptime_of_date_exn (date v)
3636+3737+(** {1 Comparison} *)
3838+3939+let compare a b = Ptime.compare (datetime b) (datetime a)
4040+4141+(** {1 YAML Parsing} *)
4242+4343+let t_of_yaml = function
4444+ | `O fields ->
4545+ let url =
4646+ match List.assoc_opt "url" fields with
4747+ | Some (`String v) -> v
4848+ | _ -> failwith "link: missing or invalid url"
4949+ in
5050+ let date =
5151+ match List.assoc_opt "date" fields with
5252+ | Some (`String v) ->
5353+ (try
5454+ match String.split_on_char '-' v with
5555+ | [y; m; d] -> (int_of_string y, int_of_string m, int_of_string d)
5656+ | _ ->
5757+ v |> Ptime.of_rfc3339 |> Result.get_ok |> fun (a, _, _) -> Ptime.to_date a
5858+ with _ ->
5959+ v |> Ptime.of_rfc3339 |> Result.get_ok |> fun (a, _, _) -> Ptime.to_date a)
6060+ | _ -> failwith "link: missing or invalid date"
6161+ in
6262+ let description =
6363+ match List.assoc_opt "description" fields with
6464+ | Some (`String v) -> v
6565+ | _ -> ""
6666+ in
6767+ let karakeep =
6868+ match List.assoc_opt "karakeep" fields with
6969+ | Some (`O k_fields) ->
7070+ let remote_url =
7171+ match List.assoc_opt "remote_url" k_fields with
7272+ | Some (`String v) -> v
7373+ | _ -> failwith "link: invalid karakeep.remote_url"
7474+ in
7575+ let id =
7676+ match List.assoc_opt "id" k_fields with
7777+ | Some (`String v) -> v
7878+ | _ -> failwith "link: invalid karakeep.id"
7979+ in
8080+ let tags =
8181+ match List.assoc_opt "tags" k_fields with
8282+ | Some (`A tag_list) ->
8383+ List.filter_map (function `String t -> Some t | _ -> None) tag_list
8484+ | _ -> []
8585+ in
8686+ let metadata =
8787+ match List.assoc_opt "metadata" k_fields with
8888+ | Some (`O meta_fields) ->
8989+ List.filter_map (fun (k, v) ->
9090+ match v with `String value -> Some (k, value) | _ -> None
9191+ ) meta_fields
9292+ | _ -> []
9393+ in
9494+ Some { remote_url; id; tags; metadata }
9595+ | _ -> None
9696+ in
9797+ let bushel =
9898+ match List.assoc_opt "bushel" fields with
9999+ | Some (`O b_fields) ->
100100+ let slugs =
101101+ match List.assoc_opt "slugs" b_fields with
102102+ | Some (`A slug_list) ->
103103+ List.filter_map (function `String s -> Some s | _ -> None) slug_list
104104+ | _ -> []
105105+ in
106106+ let tags =
107107+ match List.assoc_opt "tags" b_fields with
108108+ | Some (`A tag_list) ->
109109+ List.filter_map (function `String t -> Some t | _ -> None) tag_list
110110+ | _ -> []
111111+ in
112112+ Some { slugs; tags }
113113+ | _ -> None
114114+ in
115115+ { url; date; description; karakeep; bushel }
116116+ | _ -> failwith "link: invalid yaml"
117117+118118+(** {1 YAML Serialization} *)
119119+120120+let to_yaml t =
121121+ let (year, month, day) = t.date in
122122+ let date_str = Printf.sprintf "%04d-%02d-%02d" year month day in
123123+124124+ let base_fields = [
125125+ ("url", `String t.url);
126126+ ("date", `String date_str);
127127+ ] @
128128+ (if t.description = "" then [] else [("description", `String t.description)])
129129+ in
130130+131131+ let karakeep_fields =
132132+ match t.karakeep with
133133+ | Some { remote_url; id; tags; metadata } ->
134134+ let karakeep_obj = [
135135+ ("remote_url", `String remote_url);
136136+ ("id", `String id);
137137+ ] in
138138+ let karakeep_obj =
139139+ if tags = [] then karakeep_obj
140140+ else ("tags", `A (List.map (fun t -> `String t) tags)) :: karakeep_obj
141141+ in
142142+ let karakeep_obj =
143143+ if metadata = [] then karakeep_obj
144144+ else ("metadata", `O (List.map (fun (k, v) -> (k, `String v)) metadata)) :: karakeep_obj
145145+ in
146146+ [("karakeep", `O karakeep_obj)]
147147+ | None -> []
148148+ in
149149+150150+ let bushel_fields =
151151+ match t.bushel with
152152+ | Some { slugs; tags } ->
153153+ let bushel_obj = [] in
154154+ let bushel_obj =
155155+ if slugs = [] then bushel_obj
156156+ else ("slugs", `A (List.map (fun s -> `String s) slugs)) :: bushel_obj
157157+ in
158158+ let bushel_obj =
159159+ if tags = [] then bushel_obj
160160+ else ("tags", `A (List.map (fun t -> `String t) tags)) :: bushel_obj
161161+ in
162162+ if bushel_obj = [] then [] else [("bushel", `O bushel_obj)]
163163+ | None -> []
164164+ in
165165+166166+ `O (base_fields @ karakeep_fields @ bushel_fields)
167167+168168+(** {1 File Operations} *)
169169+170170+let load_links_file path =
171171+ try
172172+ let yaml_str = In_channel.(with_open_bin path input_all) in
173173+ match Yamlrw.of_string yaml_str with
174174+ | `A links -> List.map t_of_yaml links
175175+ | _ -> []
176176+ with _ -> []
177177+178178+let save_links_file path links =
179179+ let yaml = `A (List.map to_yaml links) in
180180+ let yaml_str = Yamlrw.to_string yaml in
181181+ let oc = open_out path in
182182+ output_string oc yaml_str;
183183+ close_out oc
184184+185185+(** {1 Merging} *)
186186+187187+let merge_links ?(prefer_new_date=false) existing new_links =
188188+ let links_by_url = Hashtbl.create (List.length existing) in
189189+190190+ List.iter (fun link -> Hashtbl.replace links_by_url link.url link) existing;
191191+192192+ List.iter (fun new_link ->
193193+ match Hashtbl.find_opt links_by_url new_link.url with
194194+ | None ->
195195+ Hashtbl.add links_by_url new_link.url new_link
196196+ | Some old_link ->
197197+ let description =
198198+ if new_link.description <> "" then new_link.description
199199+ else old_link.description
200200+ in
201201+ let karakeep =
202202+ match new_link.karakeep, old_link.karakeep with
203203+ | Some new_k, Some old_k when new_k.remote_url = old_k.remote_url ->
204204+ let merged_metadata =
205205+ let meta_tbl = Hashtbl.create (List.length old_k.metadata) in
206206+ List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) old_k.metadata;
207207+ List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) new_k.metadata;
208208+ Hashtbl.fold (fun k v acc -> (k, v) :: acc) meta_tbl []
209209+ in
210210+ let merged_tags = List.sort_uniq String.compare (old_k.tags @ new_k.tags) in
211211+ Some { new_k with metadata = merged_metadata; tags = merged_tags }
212212+ | Some new_k, _ -> Some new_k
213213+ | None, old_k -> old_k
214214+ in
215215+ let bushel =
216216+ match new_link.bushel, old_link.bushel with
217217+ | Some new_b, Some old_b ->
218218+ let merged_slugs = List.sort_uniq String.compare (old_b.slugs @ new_b.slugs) in
219219+ let merged_tags = List.sort_uniq String.compare (old_b.tags @ new_b.tags) in
220220+ Some { slugs = merged_slugs; tags = merged_tags }
221221+ | Some new_b, _ -> Some new_b
222222+ | None, old_b -> old_b
223223+ in
224224+ let date =
225225+ if prefer_new_date then new_link.date
226226+ else if compare new_link old_link > 0 then new_link.date
227227+ else old_link.date
228228+ in
229229+ let merged_link = { url = new_link.url; date; description; karakeep; bushel } in
230230+ Hashtbl.replace links_by_url new_link.url merged_link
231231+ ) new_links;
232232+233233+ Hashtbl.to_seq_values links_by_url
234234+ |> List.of_seq
235235+ |> List.sort compare
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Bushel Markdown extensions and utilities
77+88+ This module provides mappers to convert Bushel markdown extensions to different
99+ output formats. Bushel extends standard markdown with:
1010+1111+ - [:slug] - Links to bushel entries by slug
1212+ - [@@handle] - Links to contacts by handle
1313+ - [##tag] - Tag references
1414+1515+ Two main mapper modes:
1616+ - Sidenote mode for the main website (with previews)
1717+ - Plain HTML mode for feeds and simple output
1818+*)
1919+2020+(** {1 Link Detection} *)
2121+2222+let is_bushel_slug = String.starts_with ~prefix:":"
2323+let is_tag_slug link =
2424+ String.starts_with ~prefix:"##" link &&
2525+ not (String.starts_with ~prefix:"###" link)
2626+let is_type_filter_slug = String.starts_with ~prefix:"###"
2727+let is_contact_slug = String.starts_with ~prefix:"@"
2828+2929+let strip_handle s =
3030+ if String.length s = 0 then s
3131+ else if s.[0] = '@' || s.[0] = ':' then
3232+ String.sub s 1 (String.length s - 1)
3333+ else if String.length s > 1 && s.[0] = '#' && s.[1] = '#' then
3434+ String.sub s 2 (String.length s - 2)
3535+ else s
3636+3737+(** {1 Custom Link Resolution} *)
3838+3939+let authorlink = Cmarkit.Meta.key ()
4040+let sluglink = Cmarkit.Meta.key ()
4141+4242+let make_authorlink label =
4343+ let meta = Cmarkit.Meta.tag authorlink (Cmarkit.Label.meta label) in
4444+ Cmarkit.Label.with_meta meta label
4545+4646+let make_sluglink label =
4747+ let meta = Cmarkit.Meta.tag sluglink (Cmarkit.Label.meta label) in
4848+ Cmarkit.Label.with_meta meta label
4949+5050+(** Custom label resolver for Bushel links *)
5151+let with_bushel_links = function
5252+ | `Def _ as ctx -> Cmarkit.Label.default_resolver ctx
5353+ | `Ref (_, _, (Some _ as def)) -> def
5454+ | `Ref (_, ref, None) ->
5555+ let txt = Cmarkit.Label.key ref in
5656+ if String.length txt = 0 then None
5757+ else match txt.[0] with
5858+ | '@' -> Some (make_authorlink ref)
5959+ | ':' -> Some (make_sluglink ref)
6060+ | '#' -> if String.length txt > 1 && txt.[1] = '#' then Some (make_sluglink ref) else None
6161+ | _ -> None
6262+6363+(** {1 Text Extraction} *)
6464+6565+let text_of_inline lb =
6666+ Cmarkit.Inline.to_plain_text ~break_on_soft:false lb
6767+ |> fun r -> String.concat "\n" (List.map (String.concat "") r)
6868+6969+(** {1 Link Target Detection} *)
7070+7171+let link_target_is_bushel ?slugs lb =
7272+ let open Cmarkit in
7373+ let ref = Inline.Link.reference lb in
7474+ match ref with
7575+ | `Inline (ld, _) ->
7676+ let dest = Link_definition.dest ld in
7777+ (match dest with
7878+ | Some (url, _) when is_bushel_slug url ->
7979+ (match slugs with Some s -> Hashtbl.replace s url () | _ -> ());
8080+ Some (url, Inline.Link.text lb |> text_of_inline)
8181+ | Some (url, _) when is_tag_slug url ->
8282+ Some (url, Inline.Link.text lb |> text_of_inline)
8383+ | Some (url, _) when is_contact_slug url ->
8484+ Some (url, Inline.Link.text lb |> text_of_inline)
8585+ | _ -> None)
8686+ | _ -> None
8787+8888+let image_target_is_bushel lb =
8989+ let open Cmarkit in
9090+ let ref = Inline.Link.reference lb in
9191+ match ref with
9292+ | `Inline (ld, _) ->
9393+ let dest = Link_definition.dest ld in
9494+ (match dest with
9595+ | Some (url, _) when is_bushel_slug url ->
9696+ let alt = Link_definition.title ld in
9797+ let dir =
9898+ Inline.Link.text lb
9999+ |> Inline.to_plain_text ~break_on_soft:false
100100+ |> fun r -> String.concat "\n" (List.map (String.concat "") r)
101101+ in
102102+ Some (url, alt, dir)
103103+ | _ -> None)
104104+ | _ -> None
105105+106106+(** {1 Link-Only Mapper}
107107+108108+ Converts Bushel links to regular HTML links without sidenotes.
109109+ Used for Atom feeds, RSS, search indexing. *)
110110+111111+let make_link_only_mapper entries =
112112+ let open Cmarkit in
113113+ fun _m ->
114114+ function
115115+ | Inline.Link (lb, meta) ->
116116+ (match link_target_is_bushel lb with
117117+ | Some (url, title) ->
118118+ let s = strip_handle url in
119119+ let dest = Bushel_entry.lookup_site_url entries s in
120120+ let link_text =
121121+ if is_bushel_slug title then
122122+ match Bushel_entry.lookup entries (strip_handle title) with
123123+ | Some ent -> Bushel_entry.title ent
124124+ | None -> title
125125+ else title
126126+ in
127127+ let txt = Inline.Text (link_text, meta) in
128128+ let ld = Link_definition.make ~dest:(dest, meta) () in
129129+ let ll = `Inline (ld, meta) in
130130+ let ld = Inline.Link.make txt ll in
131131+ Mapper.ret (Inline.Link (ld, meta))
132132+ | None ->
133133+ (match Inline.Link.referenced_label lb with
134134+ | Some l ->
135135+ let m = Label.meta l in
136136+ (match Meta.find authorlink m with
137137+ | Some () ->
138138+ let slug = Label.key l in
139139+ let s = strip_handle slug in
140140+ (match Bushel_contact.find_by_handle (Bushel_entry.contacts entries) s with
141141+ | Some c ->
142142+ let name = Bushel_contact.name c in
143143+ (match Bushel_contact.best_url c with
144144+ | Some dest ->
145145+ let txt = Inline.Text (name, meta) in
146146+ let ld = Link_definition.make ~dest:(dest, meta) () in
147147+ let ll = `Inline (ld, meta) in
148148+ let ld = Inline.Link.make txt ll in
149149+ Mapper.ret (Inline.Link (ld, meta))
150150+ | None ->
151151+ let txt = Inline.Text (name, meta) in
152152+ Mapper.ret txt)
153153+ | None ->
154154+ let title = Inline.Link.text lb |> text_of_inline in
155155+ let txt = Inline.Text (title, meta) in
156156+ Mapper.ret txt)
157157+ | None ->
158158+ (match Meta.find sluglink m with
159159+ | Some () ->
160160+ let slug = Label.key l in
161161+ if is_bushel_slug slug || is_tag_slug slug || is_contact_slug slug then
162162+ let s = strip_handle slug in
163163+ let dest = Bushel_entry.lookup_site_url entries s in
164164+ let title = Inline.Link.text lb |> text_of_inline in
165165+ let link_text =
166166+ let trimmed = String.trim title in
167167+ if is_bushel_slug trimmed then
168168+ match Bushel_entry.lookup entries (strip_handle trimmed) with
169169+ | Some ent -> Bushel_entry.title ent
170170+ | None -> title
171171+ else title
172172+ in
173173+ let txt = Inline.Text (link_text, meta) in
174174+ let ld = Link_definition.make ~dest:(dest, meta) () in
175175+ let ll = `Inline (ld, meta) in
176176+ let ld = Inline.Link.make txt ll in
177177+ Mapper.ret (Inline.Link (ld, meta))
178178+ else Mapper.default
179179+ | None -> Mapper.default))
180180+ | None -> Mapper.default))
181181+ | _ -> Mapper.default
182182+183183+(** {1 Slug Scanning} *)
184184+185185+let scan_for_slugs entries md =
186186+ let open Cmarkit in
187187+ let slugs = Hashtbl.create 7 in
188188+ let doc = Doc.of_string ~strict:false ~resolver:with_bushel_links md in
189189+ let inline_mapper _m = function
190190+ | Inline.Link (lb, _meta) ->
191191+ (match link_target_is_bushel ~slugs lb with
192192+ | Some _ -> Mapper.default
193193+ | None ->
194194+ (match Inline.Link.referenced_label lb with
195195+ | Some l ->
196196+ let m = Label.meta l in
197197+ (match Meta.find sluglink m with
198198+ | Some () ->
199199+ let slug = Label.key l in
200200+ if is_bushel_slug slug then
201201+ Hashtbl.replace slugs slug ();
202202+ Mapper.default
203203+ | None -> Mapper.default)
204204+ | None -> Mapper.default))
205205+ | _ -> Mapper.default
206206+ in
207207+ let mapper = Mapper.make ~inline:inline_mapper () in
208208+ let _ = Mapper.map_doc mapper doc in
209209+ ignore entries;
210210+ Hashtbl.fold (fun k () a -> k :: a) slugs []
211211+212212+(** {1 Link Extraction} *)
213213+214214+(** Extract all links from markdown text, including from images *)
215215+let extract_all_links text =
216216+ let open Cmarkit in
217217+ let doc = Doc.of_string ~resolver:with_bushel_links text in
218218+ let links = ref [] in
219219+220220+ let find_links_in_inline _mapper = function
221221+ | Inline.Link (lb, _) | Inline.Image (lb, _) ->
222222+ (match Inline.Link.reference lb with
223223+ | `Inline (ld, _) ->
224224+ (match Link_definition.dest ld with
225225+ | Some (url, _) ->
226226+ links := url :: !links;
227227+ Mapper.default
228228+ | None -> Mapper.default)
229229+ | `Ref _ ->
230230+ (match Inline.Link.referenced_label lb with
231231+ | Some l ->
232232+ let key = Label.key l in
233233+ if String.length key > 0 && (key.[0] = ':' || key.[0] = '@' ||
234234+ (String.length key > 1 && key.[0] = '#' && key.[1] = '#')) then
235235+ links := key :: !links;
236236+ Mapper.default
237237+ | None -> Mapper.default))
238238+ | _ -> Mapper.default
239239+ in
240240+241241+ let mapper = Mapper.make ~inline:find_links_in_inline () in
242242+ let _ = Mapper.map_doc mapper doc in
243243+244244+ let module StringSet = Set.Make(String) in
245245+ StringSet.elements (StringSet.of_list !links)
246246+247247+(** Extract external URLs from markdown content *)
248248+let extract_external_links md =
249249+ let open Cmarkit in
250250+ let urls = ref [] in
251251+252252+ let is_external_url url =
253253+ if is_bushel_slug url || is_tag_slug url then false
254254+ else
255255+ try
256256+ let uri = Uri.of_string url in
257257+ match Uri.scheme uri with
258258+ | Some s when s = "http" || s = "https" -> true
259259+ | Some _ -> true
260260+ | None -> false
261261+ with _ -> false
262262+ in
263263+264264+ let inline_mapper _ = function
265265+ | Inline.Link (lb, _) | Inline.Image (lb, _) ->
266266+ let ref = Inline.Link.reference lb in
267267+ (match ref with
268268+ | `Inline (ld, _) ->
269269+ (match Link_definition.dest ld with
270270+ | Some (url, _) when is_external_url url ->
271271+ urls := url :: !urls;
272272+ Mapper.default
273273+ | _ -> Mapper.default)
274274+ | `Ref (_, _, l) ->
275275+ let defs = Doc.defs (Doc.of_string ~strict:false md) in
276276+ (match Label.Map.find_opt (Label.key l) defs with
277277+ | Some (Link_definition.Def (ld, _)) ->
278278+ (match Link_definition.dest ld with
279279+ | Some (url, _) when is_external_url url ->
280280+ urls := url :: !urls
281281+ | _ -> ())
282282+ | _ -> ());
283283+ Mapper.default)
284284+ | Inline.Autolink (autolink, _) ->
285285+ let url = Inline.Autolink.link autolink |> fst in
286286+ if not (Inline.Autolink.is_email autolink) && is_external_url url then
287287+ urls := url :: !urls;
288288+ Mapper.default
289289+ | _ -> Mapper.default
290290+ in
291291+292292+ let mapper = Mapper.make ~inline:inline_mapper () in
293293+ let doc = Doc.of_string ~strict:false md in
294294+ let _ = Mapper.map_doc mapper doc in
295295+ List.sort_uniq String.compare !urls
296296+297297+(** {1 First Image Extraction} *)
298298+299299+let extract_first_image md =
300300+ let open Cmarkit in
301301+ let doc = Doc.of_string md in
302302+ let found_image = ref None in
303303+304304+ let find_image_in_inline _mapper = function
305305+ | Inline.Image (img, _) ->
306306+ (match Inline.Link.reference img with
307307+ | `Inline (ld, _) ->
308308+ (match Link_definition.dest ld with
309309+ | Some (url, _) when !found_image = None ->
310310+ found_image := Some url;
311311+ Mapper.default
312312+ | _ -> Mapper.default)
313313+ | _ -> Mapper.default)
314314+ | _ -> Mapper.default
315315+ in
316316+317317+ let mapper = Mapper.make ~inline:find_image_in_inline () in
318318+ let _ = Mapper.map_doc mapper doc in
319319+ !found_image
320320+321321+(** {1 Plaintext Conversion} *)
322322+323323+(** Convert markdown text to plain text, resolving bushel links to just their text *)
324324+let markdown_to_plaintext _entries text =
325325+ let open Cmarkit in
326326+ let doc = Doc.of_string ~resolver:with_bushel_links text in
327327+328328+ let rec block_to_text = function
329329+ | Block.Blank_line _ -> ""
330330+ | Block.Thematic_break _ -> "\n---\n"
331331+ | Block.Paragraph (p, _) ->
332332+ let inline = Block.Paragraph.inline p in
333333+ Inline.to_plain_text ~break_on_soft:false inline
334334+ |> List.map (String.concat "") |> String.concat "\n"
335335+ | Block.Heading (h, _) ->
336336+ let inline = Block.Heading.inline h in
337337+ Inline.to_plain_text ~break_on_soft:false inline
338338+ |> List.map (String.concat "") |> String.concat "\n"
339339+ | Block.Block_quote (bq, _) ->
340340+ let blocks = Block.Block_quote.block bq in
341341+ block_to_text blocks
342342+ | Block.List (l, _) ->
343343+ let items = Block.List'.items l in
344344+ List.map (fun (item, _) ->
345345+ let blocks = Block.List_item.block item in
346346+ block_to_text blocks
347347+ ) items |> String.concat "\n"
348348+ | Block.Code_block (cb, _) ->
349349+ let code = Block.Code_block.code cb in
350350+ String.concat "\n" (List.map Block_line.to_string code)
351351+ | Block.Html_block _ -> ""
352352+ | Block.Link_reference_definition _ -> ""
353353+ | Block.Ext_footnote_definition _ -> ""
354354+ | Block.Blocks (blocks, _) ->
355355+ List.map block_to_text blocks |> String.concat "\n"
356356+ | _ -> ""
357357+ in
358358+ let blocks = Doc.block doc in
359359+ block_to_text blocks
360360+361361+(** {1 Validation} *)
362362+363363+(** Validation mapper that collects broken references *)
364364+let make_validation_mapper entries broken_slugs broken_contacts =
365365+ let open Cmarkit in
366366+ fun _m ->
367367+ function
368368+ | Inline.Link (lb, _meta) ->
369369+ (match link_target_is_bushel lb with
370370+ | Some (url, _title) ->
371371+ let s = strip_handle url in
372372+ if is_contact_slug url then
373373+ (match Bushel_contact.find_by_handle (Bushel_entry.contacts entries) s with
374374+ | None -> Hashtbl.replace broken_contacts url ()
375375+ | Some _ -> ())
376376+ else if is_bushel_slug url then
377377+ (match Bushel_entry.lookup entries s with
378378+ | None -> Hashtbl.replace broken_slugs url ()
379379+ | Some _ -> ());
380380+ Mapper.default
381381+ | None ->
382382+ (match Inline.Link.referenced_label lb with
383383+ | Some l ->
384384+ let m = Label.meta l in
385385+ (match Meta.find authorlink m with
386386+ | Some () ->
387387+ let slug = Label.key l in
388388+ let handle = strip_handle slug in
389389+ (match Bushel_contact.find_by_handle (Bushel_entry.contacts entries) handle with
390390+ | None -> Hashtbl.replace broken_contacts slug ()
391391+ | Some _ -> ());
392392+ Mapper.default
393393+ | None ->
394394+ (match Meta.find sluglink m with
395395+ | None -> Mapper.default
396396+ | Some () ->
397397+ let slug = Label.key l in
398398+ if is_bushel_slug slug then begin
399399+ let s = strip_handle slug in
400400+ match Bushel_entry.lookup entries s with
401401+ | None -> Hashtbl.replace broken_slugs slug ()
402402+ | Some _ -> ()
403403+ end;
404404+ Mapper.default))
405405+ | None -> Mapper.default))
406406+ | _ -> Mapper.default
407407+408408+(** Validate all bushel references in markdown and return broken ones *)
409409+let validate_references entries md =
410410+ let open Cmarkit in
411411+ let broken_slugs = Hashtbl.create 7 in
412412+ let broken_contacts = Hashtbl.create 7 in
413413+ let doc = Doc.of_string ~strict:false ~resolver:with_bushel_links md in
414414+ let mapper = Mapper.make ~inline:(make_validation_mapper entries broken_slugs broken_contacts) () in
415415+ let _ = Mapper.map_doc mapper doc in
416416+ let slugs = Hashtbl.fold (fun k () a -> k :: a) broken_slugs [] in
417417+ let contacts = Hashtbl.fold (fun k () a -> k :: a) broken_contacts [] in
418418+ (slugs, contacts)
+195
lib/bushel_note.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Note entry type for Bushel *)
77+88+type t = {
99+ title : string;
1010+ date : Ptime.date;
1111+ slug : string;
1212+ body : string;
1313+ tags : string list;
1414+ draft : bool;
1515+ updated : Ptime.date option;
1616+ sidebar : string option;
1717+ index_page : bool;
1818+ perma : bool; (** Permanent article that will receive a DOI *)
1919+ doi : string option; (** DOI identifier for permanent articles *)
2020+ synopsis : string option;
2121+ titleimage : string option;
2222+ via : (string * string) option; (** (label, url) for link-style notes *)
2323+ slug_ent : string option; (** Reference to another entry *)
2424+ source : string option; (** Source for news-style notes *)
2525+ url : string option; (** External URL for news-style notes *)
2626+ author : string option; (** Author for news-style notes *)
2727+ category : string option; (** Category for news-style notes *)
2828+ standardsite : string option; (** Standards body site reference *)
2929+}
3030+3131+type ts = t list
3232+3333+(** {1 Accessors} *)
3434+3535+let title { title; _ } = title
3636+let slug { slug; _ } = slug
3737+let body { body; _ } = body
3838+let tags { tags; _ } = tags
3939+let draft { draft; _ } = draft
4040+let sidebar { sidebar; _ } = sidebar
4141+let synopsis { synopsis; _ } = synopsis
4242+let perma { perma; _ } = perma
4343+let doi { doi; _ } = doi
4444+let titleimage { titleimage; _ } = titleimage
4545+let slug_ent { slug_ent; _ } = slug_ent
4646+let source { source; _ } = source
4747+let url { url; _ } = url
4848+let author { author; _ } = author
4949+let category { category; _ } = category
5050+let standardsite { standardsite; _ } = standardsite
5151+5252+let origdate { date; _ } = Bushel_types.ptime_of_date_exn date
5353+5454+let date { date; updated; _ } =
5555+ match updated with
5656+ | None -> date
5757+ | Some v -> v
5858+5959+let datetime v = Bushel_types.ptime_of_date_exn (date v)
6060+6161+let link { body; via; slug; _ } =
6262+ match body, via with
6363+ | "", Some (l, u) -> `Ext (l, u)
6464+ | "", None -> failwith (slug ^ ": note external without via, via-url")
6565+ | _, _ -> `Local slug
6666+6767+let words { body; _ } = Bushel_util.count_words body
6868+6969+(** {1 Comparison} *)
7070+7171+let compare a b = Ptime.compare (datetime b) (datetime a)
7272+7373+(** {1 Lookup} *)
7474+7575+let lookup slug notes = List.find_opt (fun n -> n.slug = slug) notes
7676+7777+(** {1 Jsont Codec} *)
7878+7979+let via_jsont : (string * string) option Jsont.t =
8080+ (* via is encoded as two separate fields: via and via-url *)
8181+ Jsont.null None (* Handled specially in of_frontmatter *)
8282+8383+let jsont ~default_date ~default_slug : t Jsont.t =
8484+ let open Jsont in
8585+ let open Jsont.Object in
8686+ let make title date slug tags draft updated index_page perma doi synopsis titleimage
8787+ slug_ent source url author category standardsite =
8888+ { title; date; slug; body = ""; tags; draft; updated; sidebar = None;
8989+ index_page; perma; doi; synopsis; titleimage; via = None;
9090+ slug_ent; source; url; author; category; standardsite }
9191+ in
9292+ map ~kind:"Note" make
9393+ |> mem "title" string ~enc:(fun n -> n.title)
9494+ |> mem "date" Bushel_types.ptime_date_jsont ~dec_absent:default_date ~enc:(fun n -> n.date)
9595+ |> mem "slug" string ~dec_absent:default_slug ~enc:(fun n -> n.slug)
9696+ |> mem "tags" (list string) ~dec_absent:[] ~enc:(fun n -> n.tags)
9797+ |> mem "draft" bool ~dec_absent:false ~enc:(fun n -> n.draft)
9898+ |> mem "updated" (option Bushel_types.ptime_date_jsont) ~dec_absent:None
9999+ ~enc_omit:Option.is_none ~enc:(fun n -> n.updated)
100100+ |> mem "index_page" bool ~dec_absent:false ~enc:(fun n -> n.index_page)
101101+ |> mem "perma" bool ~dec_absent:false ~enc:(fun n -> n.perma)
102102+ |> mem "doi" Bushel_types.string_option_jsont ~dec_absent:None
103103+ ~enc_omit:Option.is_none ~enc:(fun n -> n.doi)
104104+ |> mem "synopsis" Bushel_types.string_option_jsont ~dec_absent:None
105105+ ~enc_omit:Option.is_none ~enc:(fun n -> n.synopsis)
106106+ |> mem "titleimage" Bushel_types.string_option_jsont ~dec_absent:None
107107+ ~enc_omit:Option.is_none ~enc:(fun n -> n.titleimage)
108108+ |> mem "slug_ent" Bushel_types.string_option_jsont ~dec_absent:None
109109+ ~enc_omit:Option.is_none ~enc:(fun n -> n.slug_ent)
110110+ |> mem "source" Bushel_types.string_option_jsont ~dec_absent:None
111111+ ~enc_omit:Option.is_none ~enc:(fun n -> n.source)
112112+ |> mem "url" Bushel_types.string_option_jsont ~dec_absent:None
113113+ ~enc_omit:Option.is_none ~enc:(fun n -> n.url)
114114+ |> mem "author" Bushel_types.string_option_jsont ~dec_absent:None
115115+ ~enc_omit:Option.is_none ~enc:(fun n -> n.author)
116116+ |> mem "category" Bushel_types.string_option_jsont ~dec_absent:None
117117+ ~enc_omit:Option.is_none ~enc:(fun n -> n.category)
118118+ |> mem "standardsite" Bushel_types.string_option_jsont ~dec_absent:None
119119+ ~enc_omit:Option.is_none ~enc:(fun n -> n.standardsite)
120120+ |> finish
121121+122122+(** {1 Parsing} *)
123123+124124+let of_frontmatter (fm : Frontmatter.t) : (t, string) result =
125125+ (* Extract slug and date from filename to use as defaults *)
126126+ let default_slug, default_date =
127127+ match Frontmatter.fname fm with
128128+ | Some fname ->
129129+ (match Frontmatter.slug_of_fname fname with
130130+ | Ok (s, d) -> (s, Option.fold ~none:(1, 1, 1) ~some:Ptime.to_date d)
131131+ | Error _ -> ("", (1, 1, 1)))
132132+ | None -> ("", (1, 1, 1))
133133+ in
134134+ (* Get via fields manually since they're two separate fields *)
135135+ let via =
136136+ match Frontmatter.find_string "via" fm, Frontmatter.find_string "via-url" fm with
137137+ | Some a, Some b -> Some (a, b)
138138+ | None, Some b -> Some ("", b)
139139+ | _ -> None
140140+ in
141141+ match Frontmatter.decode (jsont ~default_date ~default_slug) fm with
142142+ | Error e -> Error e
143143+ | Ok n -> Ok { n with body = Frontmatter.body fm; via }
144144+145145+(** {1 Pretty Printing} *)
146146+147147+let pp ppf n =
148148+ let open Fmt in
149149+ pf ppf "@[<v>";
150150+ pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Note";
151151+ pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug n);
152152+ pf ppf "%a: %a@," (styled `Bold string) "Title" string (title n);
153153+ let (year, month, day) = date n in
154154+ pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Date" year month day;
155155+ (match n.updated with
156156+ | Some (y, m, d) -> pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Updated" y m d
157157+ | None -> ());
158158+ pf ppf "%a: %b@," (styled `Bold string) "Draft" (draft n);
159159+ pf ppf "%a: %b@," (styled `Bold string) "Index Page" n.index_page;
160160+ pf ppf "%a: %b@," (styled `Bold string) "Perma" (perma n);
161161+ (match doi n with
162162+ | Some d -> pf ppf "%a: %a@," (styled `Bold string) "DOI" string d
163163+ | None -> ());
164164+ (match synopsis n with
165165+ | Some syn -> pf ppf "%a: %a@," (styled `Bold string) "Synopsis" string syn
166166+ | None -> ());
167167+ (match titleimage n with
168168+ | Some img -> pf ppf "%a: %a@," (styled `Bold string) "Title Image" string img
169169+ | None -> ());
170170+ (match n.via with
171171+ | Some (label, url) ->
172172+ if label <> "" then
173173+ pf ppf "%a: %a (%a)@," (styled `Bold string) "Via" string label string url
174174+ else
175175+ pf ppf "%a: %a@," (styled `Bold string) "Via" string url
176176+ | None -> ());
177177+ (match standardsite n with
178178+ | Some site -> pf ppf "%a: %a@," (styled `Bold string) "Standard Site" string site
179179+ | None -> ());
180180+ let t = tags n in
181181+ if t <> [] then
182182+ pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
183183+ (match sidebar n with
184184+ | Some sb ->
185185+ pf ppf "@,";
186186+ pf ppf "%a:@," (styled `Bold string) "Sidebar";
187187+ pf ppf "%a@," string sb
188188+ | None -> ());
189189+ let bd = body n in
190190+ if bd <> "" then begin
191191+ pf ppf "@,";
192192+ pf ppf "%a:@," (styled `Bold string) "Body";
193193+ pf ppf "%a@," string bd;
194194+ end;
195195+ pf ppf "@]"
+287
lib/bushel_paper.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Paper entry type for Bushel *)
77+88+(** Classification of paper type *)
99+type classification = Full | Short | Preprint
1010+1111+let string_of_classification = function
1212+ | Full -> "full"
1313+ | Short -> "short"
1414+ | Preprint -> "preprint"
1515+1616+let classification_of_string = function
1717+ | "full" -> Full
1818+ | "short" -> Short
1919+ | "preprint" -> Preprint
2020+ | _ -> Full
2121+2222+type t = {
2323+ slug : string;
2424+ ver : string;
2525+ title : string;
2626+ authors : string list;
2727+ year : int;
2828+ month : int;
2929+ bibtype : string;
3030+ publisher : string;
3131+ booktitle : string;
3232+ journal : string;
3333+ institution : string;
3434+ pages : string;
3535+ volume : string option;
3636+ number : string option;
3737+ doi : string option;
3838+ url : string option;
3939+ video : string option;
4040+ isbn : string;
4141+ editor : string;
4242+ bib : string;
4343+ tags : string list;
4444+ projects : string list;
4545+ slides : string list;
4646+ abstract : string;
4747+ latest : bool;
4848+ selected : bool;
4949+ classification : classification option;
5050+ note : string option;
5151+}
5252+5353+type ts = t list
5454+5555+(** {1 Accessors} *)
5656+5757+let slug { slug; _ } = slug
5858+let title { title; _ } = title
5959+let authors { authors; _ } = authors
6060+let year { year; _ } = year
6161+let bibtype { bibtype; _ } = bibtype
6262+let publisher { publisher; _ } = publisher
6363+let booktitle { booktitle; _ } = booktitle
6464+let journal { journal; _ } = journal
6565+let institution { institution; _ } = institution
6666+let pages { pages; _ } = pages
6767+let volume { volume; _ } = volume
6868+let number { number; _ } = number
6969+let doi { doi; _ } = doi
7070+let url { url; _ } = url
7171+let video { video; _ } = video
7272+let isbn { isbn; _ } = isbn
7373+let editor { editor; _ } = editor
7474+let bib { bib; _ } = bib
7575+let tags { tags; _ } = tags
7676+let project_slugs { projects; _ } = projects
7777+let slides { slides; _ } = slides
7878+let abstract { abstract; _ } = abstract
7979+let selected { selected; _ } = selected
8080+let note { note; _ } = note
8181+let classification { classification; bibtype; journal; booktitle; title; _ } =
8282+ match classification with
8383+ | Some c -> c
8484+ | None ->
8585+ (* Heuristic classification based on metadata *)
8686+ let bibtype_lower = String.lowercase_ascii bibtype in
8787+ let journal_lower = String.lowercase_ascii journal in
8888+ let booktitle_lower = String.lowercase_ascii booktitle in
8989+ let title_lower = String.lowercase_ascii title in
9090+ let contains_any text patterns =
9191+ List.exists (fun p ->
9292+ try
9393+ let re = Re.Perl.compile_pat ~opts:[`Caseless] p in
9494+ Re.execp re text
9595+ with _ -> false
9696+ ) patterns
9797+ in
9898+ if contains_any journal_lower ["arxiv"] ||
9999+ contains_any booktitle_lower ["arxiv"] ||
100100+ bibtype_lower = "misc" || bibtype_lower = "techreport"
101101+ then Preprint
102102+ else if contains_any journal_lower ["workshop"; "wip"; "poster"; "demo"; "hotdep"; "short"] ||
103103+ contains_any booktitle_lower ["workshop"; "wip"; "poster"; "demo"; "hotdep"; "short"] ||
104104+ contains_any title_lower ["poster"]
105105+ then Short
106106+ else Full
107107+108108+let date { year; month; _ } = (year, month, 1)
109109+let datetime p = Bushel_types.ptime_of_date_exn (date p)
110110+111111+(** {1 Comparison} *)
112112+113113+let compare p2 p1 =
114114+ let d1 = try datetime p1 with _ -> Bushel_types.ptime_of_date_exn (1977, 1, 1) in
115115+ let d2 = try datetime p2 with _ -> Bushel_types.ptime_of_date_exn (1977, 1, 1) in
116116+ Ptime.compare d1 d2
117117+118118+(** {1 Lookup} *)
119119+120120+let slugs ts =
121121+ List.fold_left (fun acc t -> if List.mem t.slug acc then acc else t.slug :: acc) [] ts
122122+123123+let lookup ts slug = List.find_opt (fun t -> t.slug = slug && t.latest) ts
124124+125125+let get_papers ~slug ts =
126126+ List.filter (fun p -> p.slug = slug && p.latest <> true) ts |> List.sort compare
127127+128128+(** Convert bibtype to tag *)
129129+let tag_of_bibtype bt =
130130+ match String.lowercase_ascii bt with
131131+ | "article" -> "journal"
132132+ | "inproceedings" -> "conference"
133133+ | "techreport" -> "report"
134134+ | "misc" -> "preprint"
135135+ | "book" -> "book"
136136+ | x -> x
137137+138138+(** Compute version tracking *)
139139+let tv (l : t list) =
140140+ let h = Hashtbl.create 7 in
141141+ List.iter (fun { slug; ver; _ } ->
142142+ match Hashtbl.find_opt h slug with
143143+ | None -> Hashtbl.add h slug [ ver ]
144144+ | Some l ->
145145+ let l = ver :: l in
146146+ let l = List.sort String.compare l in
147147+ Hashtbl.replace h slug l
148148+ ) l;
149149+ List.map (fun p ->
150150+ let latest = Hashtbl.find h p.slug |> List.rev |> List.hd in
151151+ let latest = p.ver = latest in
152152+ { p with latest }
153153+ ) l
154154+155155+let best_url p = url p
156156+157157+(** {1 Jsont Codec} *)
158158+159159+let month_of_string s =
160160+ match String.lowercase_ascii s with
161161+ | "jan" -> 1 | "feb" -> 2 | "mar" -> 3 | "apr" -> 4
162162+ | "may" -> 5 | "jun" -> 6 | "jul" -> 7 | "aug" -> 8
163163+ | "sep" -> 9 | "oct" -> 10 | "nov" -> 11 | "dec" -> 12
164164+ | _ -> 1
165165+166166+let jsont : t Jsont.t =
167167+ let open Jsont in
168168+ let open Jsont.Object in
169169+ let make title authors year month bibtype publisher booktitle journal institution
170170+ pages volume number doi url video isbn editor bib tags projects slides
171171+ selected classification note =
172172+ { slug = ""; ver = ""; title; authors; year; month; bibtype; publisher; booktitle;
173173+ journal; institution; pages; volume; number; doi; url; video; isbn; editor; bib;
174174+ tags; projects; slides; abstract = ""; latest = false; selected;
175175+ classification; note }
176176+ in
177177+ map ~kind:"Paper" make
178178+ |> mem "title" string ~enc:(fun p -> p.title)
179179+ |> mem "author" (list string) ~dec_absent:[] ~enc:(fun p -> p.authors)
180180+ |> mem "year" (of_of_string ~kind:"year" (fun s -> Ok (int_of_string s)) ~enc:string_of_int)
181181+ ~enc:(fun p -> p.year)
182182+ |> mem "month" (of_of_string ~kind:"month" (fun s -> Ok (month_of_string s)) ~enc:(fun m ->
183183+ match m with 1 -> "jan" | 2 -> "feb" | 3 -> "mar" | 4 -> "apr"
184184+ | 5 -> "may" | 6 -> "jun" | 7 -> "jul" | 8 -> "aug"
185185+ | 9 -> "sep" | 10 -> "oct" | 11 -> "nov" | 12 -> "dec" | _ -> "jan"))
186186+ ~dec_absent:1 ~enc:(fun p -> p.month)
187187+ |> mem "bibtype" string ~enc:(fun p -> p.bibtype)
188188+ |> mem "publisher" string ~dec_absent:"" ~enc:(fun p -> p.publisher)
189189+ |> mem "booktitle" string ~dec_absent:"" ~enc:(fun p -> p.booktitle)
190190+ |> mem "journal" string ~dec_absent:"" ~enc:(fun p -> p.journal)
191191+ |> mem "institution" string ~dec_absent:"" ~enc:(fun p -> p.institution)
192192+ |> mem "pages" string ~dec_absent:"" ~enc:(fun p -> p.pages)
193193+ |> mem "volume" Bushel_types.string_option_jsont ~dec_absent:None
194194+ ~enc_omit:Option.is_none ~enc:(fun p -> p.volume)
195195+ |> mem "number" Bushel_types.string_option_jsont ~dec_absent:None
196196+ ~enc_omit:Option.is_none ~enc:(fun p -> p.number)
197197+ |> mem "doi" Bushel_types.string_option_jsont ~dec_absent:None
198198+ ~enc_omit:Option.is_none ~enc:(fun p -> p.doi)
199199+ |> mem "url" Bushel_types.string_option_jsont ~dec_absent:None
200200+ ~enc_omit:Option.is_none ~enc:(fun p -> p.url)
201201+ |> mem "video" Bushel_types.string_option_jsont ~dec_absent:None
202202+ ~enc_omit:Option.is_none ~enc:(fun p -> p.video)
203203+ |> mem "isbn" string ~dec_absent:"" ~enc:(fun p -> p.isbn)
204204+ |> mem "editor" string ~dec_absent:"" ~enc:(fun p -> p.editor)
205205+ |> mem "bib" string ~dec_absent:"" ~enc:(fun p -> p.bib)
206206+ |> mem "tags" (list string) ~dec_absent:[] ~enc:(fun p -> p.tags)
207207+ |> mem "projects" (list string) ~dec_absent:[] ~enc:(fun p -> p.projects)
208208+ |> mem "slides" (list string) ~dec_absent:[] ~enc:(fun p -> p.slides)
209209+ |> mem "selected" bool ~dec_absent:false ~enc:(fun p -> p.selected)
210210+ |> mem "classification" (option (of_of_string ~kind:"classification"
211211+ (fun s -> Ok (classification_of_string s)) ~enc:string_of_classification))
212212+ ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun p -> p.classification)
213213+ |> mem "note" Bushel_types.string_option_jsont ~dec_absent:None
214214+ ~enc_omit:Option.is_none ~enc:(fun p -> p.note)
215215+ |> finish
216216+217217+(** {1 Parsing} *)
218218+219219+let of_frontmatter ~slug ~ver (fm : Frontmatter.t) : (t, string) result =
220220+ match Frontmatter.decode jsont fm with
221221+ | Error e -> Error e
222222+ | Ok p ->
223223+ (* Compute full tags including bibtype and projects *)
224224+ let keywords = Frontmatter.find_strings "keywords" fm in
225225+ let all_tags =
226226+ List.flatten [p.tags; keywords; [tag_of_bibtype p.bibtype]; p.projects]
227227+ in
228228+ Ok { p with
229229+ slug;
230230+ ver;
231231+ abstract = Frontmatter.body fm;
232232+ tags = all_tags }
233233+234234+(** {1 Pretty Printing} *)
235235+236236+let pp ppf p =
237237+ let open Fmt in
238238+ pf ppf "@[<v>";
239239+ pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Paper";
240240+ pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug p);
241241+ pf ppf "%a: %a@," (styled `Bold string) "Version" string p.ver;
242242+ pf ppf "%a: %a@," (styled `Bold string) "Title" string (title p);
243243+ pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Authors" (list ~sep:comma string) (authors p);
244244+ pf ppf "%a: %a@," (styled `Bold string) "Year" int (year p);
245245+ pf ppf "%a: %a@," (styled `Bold string) "Bibtype" string (bibtype p);
246246+ (match doi p with
247247+ | Some d -> pf ppf "%a: %a@," (styled `Bold string) "DOI" string d
248248+ | None -> ());
249249+ (match url p with
250250+ | Some u -> pf ppf "%a: %a@," (styled `Bold string) "URL" string u
251251+ | None -> ());
252252+ (match video p with
253253+ | Some v -> pf ppf "%a: %a@," (styled `Bold string) "Video" string v
254254+ | None -> ());
255255+ let projs = project_slugs p in
256256+ if projs <> [] then
257257+ pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Projects" (list ~sep:comma string) projs;
258258+ let sl = slides p in
259259+ if sl <> [] then
260260+ pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Slides" (list ~sep:comma string) sl;
261261+ (match bibtype p with
262262+ | "article" ->
263263+ pf ppf "%a: %a@," (styled `Bold string) "Journal" string (journal p);
264264+ (match volume p with
265265+ | Some vol -> pf ppf "%a: %a@," (styled `Bold string) "Volume" string vol
266266+ | None -> ());
267267+ (match number p with
268268+ | Some iss -> pf ppf "%a: %a@," (styled `Bold string) "Issue" string iss
269269+ | None -> ());
270270+ let pgs = pages p in
271271+ if pgs <> "" then
272272+ pf ppf "%a: %a@," (styled `Bold string) "Pages" string pgs;
273273+ | "inproceedings" ->
274274+ pf ppf "%a: %a@," (styled `Bold string) "Booktitle" string (booktitle p);
275275+ let pgs = pages p in
276276+ if pgs <> "" then
277277+ pf ppf "%a: %a@," (styled `Bold string) "Pages" string pgs;
278278+ | "techreport" ->
279279+ pf ppf "%a: %a@," (styled `Bold string) "Institution" string (institution p);
280280+ (match number p with
281281+ | Some num -> pf ppf "%a: %a@," (styled `Bold string) "Number" string num
282282+ | None -> ());
283283+ | _ -> ());
284284+ pf ppf "@,";
285285+ pf ppf "%a:@," (styled `Bold string) "Abstract";
286286+ pf ppf "%a@," (styled `Faint string) (abstract p);
287287+ pf ppf "@]"
+100
lib/bushel_project.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Project entry type for Bushel *)
77+88+type t = {
99+ slug : string;
1010+ title : string;
1111+ start : int; (** Start year *)
1212+ finish : int option; (** End year, None if ongoing *)
1313+ tags : string list;
1414+ ideas : string; (** Ideas page reference *)
1515+ body : string;
1616+}
1717+1818+type ts = t list
1919+2020+(** {1 Accessors} *)
2121+2222+let slug { slug; _ } = slug
2323+let title { title; _ } = title
2424+let start { start; _ } = start
2525+let finish { finish; _ } = finish
2626+let tags { tags; _ } = tags
2727+let ideas { ideas; _ } = ideas
2828+let body { body; _ } = body
2929+3030+(** {1 Comparison} *)
3131+3232+let compare a b =
3333+ match Int.compare a.start b.start with
3434+ | 0 -> Int.compare (Option.value ~default:9999 b.finish) (Option.value ~default:9999 a.finish)
3535+ | n -> n
3636+3737+(** {1 Lookup} *)
3838+3939+let lookup projects slug = List.find_opt (fun p -> p.slug = slug) projects
4040+4141+(** {1 Parsing} *)
4242+4343+let of_frontmatter (fm : Frontmatter.t) : (t, string) result =
4444+ (* Extract slug from filename *)
4545+ let slug =
4646+ match Frontmatter.fname fm with
4747+ | Some fname ->
4848+ (match Frontmatter.slug_of_fname fname with
4949+ | Ok (s, _) -> s
5050+ | Error _ -> "")
5151+ | None -> ""
5252+ in
5353+ (* Extract date to get start year *)
5454+ let start =
5555+ match Frontmatter.find "date" fm with
5656+ | Some (`String s) ->
5757+ (try
5858+ match String.split_on_char '-' s with
5959+ | y :: _ -> int_of_string y
6060+ | _ -> 2000
6161+ with _ -> 2000)
6262+ | _ -> 2000
6363+ in
6464+ (* Extract finish year *)
6565+ let finish =
6666+ match Frontmatter.find_string "finish" fm with
6767+ | Some s ->
6868+ (try
6969+ match String.split_on_char '-' s with
7070+ | y :: _ -> Some (int_of_string y)
7171+ | _ -> None
7272+ with _ -> None)
7373+ | None -> None
7474+ in
7575+ let title = Frontmatter.find_string "title" fm |> Option.value ~default:"" in
7676+ let tags = Frontmatter.find_strings "tags" fm in
7777+ let ideas = Frontmatter.find_string "ideas" fm |> Option.value ~default:"" in
7878+ let body = Frontmatter.body fm in
7979+ Ok { slug; title; start; finish; tags; ideas; body }
8080+8181+(** {1 Pretty Printing} *)
8282+8383+let pp ppf p =
8484+ let open Fmt in
8585+ pf ppf "@[<v>";
8686+ pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Project";
8787+ pf ppf "%a: %a@," (styled `Bold string) "Slug" string p.slug;
8888+ pf ppf "%a: %a@," (styled `Bold string) "Title" string (title p);
8989+ pf ppf "%a: %d@," (styled `Bold string) "Start" p.start;
9090+ (match p.finish with
9191+ | Some year -> pf ppf "%a: %d@," (styled `Bold string) "Finish" year
9292+ | None -> pf ppf "%a: ongoing@," (styled `Bold string) "Finish");
9393+ let t = tags p in
9494+ if t <> [] then
9595+ pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
9696+ pf ppf "%a: %a@," (styled `Bold string) "Ideas" string (ideas p);
9797+ pf ppf "@,";
9898+ pf ppf "%a:@," (styled `Bold string) "Body";
9999+ pf ppf "%a@," string (body p);
100100+ pf ppf "@]"
+88
lib/bushel_tags.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Tag system for Bushel entries *)
77+88+type t =
99+ [ `Slug of string (** :foo points to the specific slug foo *)
1010+ | `Contact of string (** \@foo points to contact foo *)
1111+ | `Set of string (** #papers points to all Paper entries *)
1212+ | `Text of string (** foo points to a free text "foo" *)
1313+ | `Year of int (** a number between 1900--2100 is interpreted as a year *)
1414+ ]
1515+1616+(** {1 Predicates} *)
1717+1818+let is_text = function `Text _ -> true | _ -> false
1919+let is_slug = function `Slug _ -> true | _ -> false
2020+let is_contact = function `Contact _ -> true | _ -> false
2121+let is_set = function `Set _ -> true | _ -> false
2222+let is_year = function `Year _ -> true | _ -> false
2323+2424+(** {1 Parsing} *)
2525+2626+let of_string s : t =
2727+ if String.length s < 2 then invalid_arg ("Tag.of_string: " ^ s);
2828+ match s.[0] with
2929+ | ':' ->
3030+ let slug = String.sub s 1 (String.length s - 1) in
3131+ `Slug slug
3232+ | '@' ->
3333+ let handle = String.sub s 1 (String.length s - 1) in
3434+ `Contact handle
3535+ | '#' ->
3636+ let cl = String.sub s 1 (String.length s - 1) in
3737+ `Set cl
3838+ | _ ->
3939+ (try
4040+ let x = int_of_string s in
4141+ if x > 1900 && x < 2100 then `Year x else `Text s
4242+ with _ -> `Text s)
4343+4444+let of_string_list l = List.map of_string l
4545+4646+(** {1 Serialization} *)
4747+4848+let to_string = function
4949+ | `Slug t -> ":" ^ t
5050+ | `Contact c -> "@" ^ c
5151+ | `Set s -> "#" ^ s
5252+ | `Text t -> t
5353+ | `Year y -> string_of_int y
5454+5555+let to_raw_string = function
5656+ | `Slug t -> t
5757+ | `Contact c -> c
5858+ | `Set s -> s
5959+ | `Text t -> t
6060+ | `Year y -> string_of_int y
6161+6262+(** {1 Pretty Printing} *)
6363+6464+let pp ppf t = Fmt.string ppf (to_string t)
6565+6666+(** {1 Tag Filtering} *)
6767+6868+let mentions tags =
6969+ List.filter (function
7070+ | `Contact _ | `Slug _ -> true
7171+ | _ -> false
7272+ ) tags
7373+7474+(** {1 Tag Counting} *)
7575+7676+let count_tags ?h fn vs =
7777+ let h = match h with
7878+ | Some h -> h
7979+ | None -> Hashtbl.create 42
8080+ in
8181+ List.iter (fun ent ->
8282+ List.iter (fun tag ->
8383+ match Hashtbl.find_opt h tag with
8484+ | Some num -> Hashtbl.replace h tag (num + 1)
8585+ | None -> Hashtbl.add h tag 1
8686+ ) (fn ent)
8787+ ) vs;
8888+ h
+96
lib/bushel_types.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Common types and Jsont codecs for Bushel *)
77+88+(** {1 Date Types} *)
99+1010+type date = Ptime.date
1111+(** A calendar date (year, month, day). *)
1212+1313+(** {1 Jsont Codecs} *)
1414+1515+let ptime_date_jsont : Ptime.date Jsont.t =
1616+ let dec s =
1717+ try
1818+ match String.split_on_char '-' s with
1919+ | [y; m; d] ->
2020+ let year = int_of_string y in
2121+ let month = int_of_string m in
2222+ let day = int_of_string d in
2323+ Ok (year, month, day)
2424+ | _ ->
2525+ Error (Printf.sprintf "Invalid date format: %s (expected YYYY-MM-DD)" s)
2626+ with _ ->
2727+ Error (Printf.sprintf "Invalid date: %s" s)
2828+ in
2929+ let enc (y, m, d) = Printf.sprintf "%04d-%02d-%02d" y m d in
3030+ Jsont.of_of_string ~kind:"Ptime.date" dec ~enc
3131+3232+let ptime_jsont : Ptime.t Jsont.t =
3333+ let dec s =
3434+ (* Try RFC3339 first *)
3535+ match Ptime.of_rfc3339 s with
3636+ | Ok (t, _, _) -> Ok t
3737+ | Error _ ->
3838+ (* Try date-only format *)
3939+ try
4040+ match String.split_on_char '-' s with
4141+ | [y; m; d] ->
4242+ let year = int_of_string y in
4343+ let month = int_of_string m in
4444+ let day = int_of_string d in
4545+ (match Ptime.of_date (year, month, day) with
4646+ | Some t -> Ok t
4747+ | None -> Error (Printf.sprintf "Invalid date: %s" s))
4848+ | _ ->
4949+ Error (Printf.sprintf "Invalid timestamp: %s" s)
5050+ with _ ->
5151+ Error (Printf.sprintf "Invalid timestamp: %s" s)
5252+ in
5353+ let enc t =
5454+ let (y, m, d), ((hh, mm, ss), _) = Ptime.to_date_time t in
5555+ if hh = 0 && mm = 0 && ss = 0 then
5656+ Printf.sprintf "%04d-%02d-%02d" y m d
5757+ else
5858+ Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ" y m d hh mm ss
5959+ in
6060+ Jsont.of_of_string ~kind:"Ptime.t" dec ~enc
6161+6262+let ptime_option_jsont : Ptime.t option Jsont.t =
6363+ let null = Jsont.null None in
6464+ let some = Jsont.map ~dec:(fun t -> Some t) ~enc:(function Some t -> t | None -> assert false) ptime_jsont in
6565+ Jsont.any ~dec_null:null ~dec_string:some ~enc:(function None -> null | Some _ -> some) ()
6666+6767+let string_option_jsont : string option Jsont.t =
6868+ Jsont.option Jsont.string
6969+7070+(** {1 Helper Functions} *)
7171+7272+let ptime_of_date_exn date =
7373+ match Ptime.of_date date with
7474+ | Some t -> t
7575+ | None ->
7676+ let (y, m, d) = date in
7777+ failwith (Printf.sprintf "Invalid date: %04d-%02d-%02d" y m d)
7878+7979+let date_of_ptime t = Ptime.to_date t
8080+8181+let compare_dates (d1 : date) (d2 : date) =
8282+ let t1 = ptime_of_date_exn d1 in
8383+ let t2 = ptime_of_date_exn d2 in
8484+ Ptime.compare t1 t2
8585+8686+let format_date (y, m, d) =
8787+ Printf.sprintf "%04d-%02d-%02d" y m d
8888+8989+let month_name = function
9090+ | 1 -> "January" | 2 -> "February" | 3 -> "March" | 4 -> "April"
9191+ | 5 -> "May" | 6 -> "June" | 7 -> "July" | 8 -> "August"
9292+ | 9 -> "September" | 10 -> "October" | 11 -> "November" | 12 -> "December"
9393+ | _ -> "Unknown"
9494+9595+let format_date_human (y, m, _d) =
9696+ Printf.sprintf "%s %d" (month_name m) y
+101
lib/bushel_util.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Utility functions for Bushel *)
77+88+(** Count words in a string. *)
99+let count_words (text : string) : int =
1010+ let len = String.length text in
1111+ let rec count_words_helper (index : int) (in_word : bool) (count : int) : int =
1212+ if index >= len then
1313+ if in_word then count + 1 else count
1414+ else
1515+ let char = String.get text index in
1616+ let is_whitespace =
1717+ Char.equal char ' '
1818+ || Char.equal char '\t'
1919+ || Char.equal char '\n'
2020+ || Char.equal char '\r'
2121+ in
2222+ if is_whitespace then
2323+ if in_word then count_words_helper (index + 1) false (count + 1)
2424+ else count_words_helper (index + 1) false count
2525+ else count_words_helper (index + 1) true count
2626+ in
2727+ count_words_helper 0 false 0
2828+2929+(** Get the first paragraph/hunk from text (up to double newline). *)
3030+let first_hunk s =
3131+ let lines = String.split_on_char '\n' s in
3232+ let rec aux acc = function
3333+ | [] -> String.concat "\n" (List.rev acc)
3434+ | "" :: "" :: _ -> String.concat "\n" (List.rev acc)
3535+ | line :: rest -> aux (line :: acc) rest
3636+ in
3737+ aux [] lines
3838+3939+(** Get first and last hunks from text. *)
4040+let first_and_last_hunks s =
4141+ let lines = String.split_on_char '\n' s in
4242+ let rec aux acc = function
4343+ | [] -> String.concat "\n" (List.rev acc), ""
4444+ | "" :: "" :: rest ->
4545+ String.concat "\n" (List.rev acc), String.concat "\n" (List.rev rest)
4646+ | line :: rest -> aux (line :: acc) rest
4747+ in
4848+ aux [] lines
4949+5050+(** Find all footnote definition lines in text. *)
5151+let find_footnote_lines s =
5252+ let lines = String.split_on_char '\n' s in
5353+ let is_footnote_def line =
5454+ String.length line > 3 &&
5555+ line.[0] = '[' &&
5656+ line.[1] = '^' &&
5757+ String.contains line ':' &&
5858+ let colon_pos = String.index line ':' in
5959+ colon_pos > 2 && line.[colon_pos - 1] = ']'
6060+ in
6161+ let is_continuation line =
6262+ String.length line > 0 && (line.[0] = ' ' || line.[0] = '\t')
6363+ in
6464+ let rec collect_footnotes acc in_footnote = function
6565+ | [] -> List.rev acc
6666+ | line :: rest ->
6767+ if is_footnote_def line then
6868+ collect_footnotes (line :: acc) true rest
6969+ else if in_footnote && is_continuation line then
7070+ collect_footnotes (line :: acc) true rest
7171+ else
7272+ collect_footnotes acc false rest
7373+ in
7474+ collect_footnotes [] false lines
7575+7676+(** Augment first hunk with footnote definitions from last hunk. *)
7777+let first_hunk_with_footnotes s =
7878+ let first, last = first_and_last_hunks s in
7979+ let footnote_lines = find_footnote_lines last in
8080+ if footnote_lines = [] then first
8181+ else first ^ "\n\n" ^ String.concat "\n" footnote_lines
8282+8383+(** Trim leading/trailing whitespace and normalize multiple blank lines. *)
8484+let normalize_body s =
8585+ let trimmed = String.trim s in
8686+ (* Replace 3+ consecutive newlines with exactly 2 newlines *)
8787+ let re = Re.compile (Re.seq [Re.char '\n'; Re.char '\n'; Re.rep1 (Re.char '\n')]) in
8888+ Re.replace_string re ~by:"\n\n" trimmed
8989+9090+(** Extract domain from URL. *)
9191+let extract_domain url =
9292+ try
9393+ let uri = Uri.of_string url in
9494+ match Uri.host uri with
9595+ | Some host -> host
9696+ | None -> "unknown"
9797+ with _ -> "unknown"
9898+9999+(** Check if a string is a valid URL. *)
100100+let is_url s =
101101+ String.starts_with ~prefix:"http://" s || String.starts_with ~prefix:"https://" s
+129
lib/bushel_video.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Video entry type for Bushel *)
77+88+type t = {
99+ slug : string;
1010+ title : string;
1111+ published_date : Ptime.t;
1212+ uuid : string;
1313+ description : string;
1414+ url : string;
1515+ talk : bool;
1616+ paper : string option;
1717+ project : string option;
1818+ tags : string list;
1919+}
2020+2121+type ts = t list
2222+2323+(** {1 Accessors} *)
2424+2525+let slug { slug; _ } = slug
2626+let title { title; _ } = title
2727+let uuid { uuid; _ } = uuid
2828+let url { url; _ } = url
2929+let description { description; _ } = description
3030+let body = description (* Alias for consistency *)
3131+let talk { talk; _ } = talk
3232+let paper { paper; _ } = paper
3333+let project { project; _ } = project
3434+let tags { tags; _ } = tags
3535+3636+let date { published_date; _ } = Ptime.to_date published_date
3737+let datetime { published_date; _ } = published_date
3838+3939+(** {1 Comparison} *)
4040+4141+let compare a b = Ptime.compare b.published_date a.published_date
4242+4343+(** {1 Lookup} *)
4444+4545+let lookup videos uuid = List.find_opt (fun v -> v.uuid = uuid) videos
4646+let lookup_by_slug videos slug = List.find_opt (fun v -> v.slug = slug) videos
4747+4848+(** {1 Jsont Codec} *)
4949+5050+let jsont : t Jsont.t =
5151+ let open Jsont in
5252+ let open Jsont.Object in
5353+ let make title published_date uuid url talk tags paper project =
5454+ { slug = uuid; title; published_date; uuid; description = ""; url;
5555+ talk; paper; project; tags }
5656+ in
5757+ map ~kind:"Video" make
5858+ |> mem "title" string ~enc:(fun v -> v.title)
5959+ |> mem "published_date" Bushel_types.ptime_jsont ~enc:(fun v -> v.published_date)
6060+ |> mem "uuid" string ~enc:(fun v -> v.uuid)
6161+ |> mem "url" string ~enc:(fun v -> v.url)
6262+ |> mem "talk" bool ~dec_absent:false ~enc:(fun v -> v.talk)
6363+ |> mem "tags" (list string) ~dec_absent:[] ~enc:(fun v -> v.tags)
6464+ |> mem "paper" Bushel_types.string_option_jsont ~dec_absent:None
6565+ ~enc_omit:Option.is_none ~enc:(fun v -> v.paper)
6666+ |> mem "project" Bushel_types.string_option_jsont ~dec_absent:None
6767+ ~enc_omit:Option.is_none ~enc:(fun v -> v.project)
6868+ |> finish
6969+7070+(** {1 Parsing} *)
7171+7272+let of_frontmatter (fm : Frontmatter.t) : (t, string) result =
7373+ match Frontmatter.decode jsont fm with
7474+ | Error e -> Error e
7575+ | Ok v ->
7676+ Ok { v with
7777+ slug = v.uuid;
7878+ description = Frontmatter.body fm }
7979+8080+(** {1 YAML Serialization} *)
8181+8282+let to_yaml t =
8383+ let open Yamlrw.Util in
8484+ let fields = [
8585+ ("title", string t.title);
8686+ ("description", string t.description);
8787+ ("url", string t.url);
8888+ ("uuid", string t.uuid);
8989+ ("slug", string t.slug);
9090+ ("published_date", string (Ptime.to_rfc3339 t.published_date));
9191+ ("talk", bool t.talk);
9292+ ("tags", strings t.tags);
9393+ ] in
9494+ let fields = match t.paper with
9595+ | None -> fields
9696+ | Some p -> ("paper", string p) :: fields
9797+ in
9898+ let fields = match t.project with
9999+ | None -> fields
100100+ | Some p -> ("project", string p) :: fields
101101+ in
102102+ obj fields
103103+104104+(** {1 Pretty Printing} *)
105105+106106+let pp ppf v =
107107+ let open Fmt in
108108+ pf ppf "@[<v>";
109109+ pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Video";
110110+ pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug v);
111111+ pf ppf "%a: %a@," (styled `Bold string) "UUID" string (uuid v);
112112+ pf ppf "%a: %a@," (styled `Bold string) "Title" string (title v);
113113+ let (year, month, day) = date v in
114114+ pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Date" year month day;
115115+ pf ppf "%a: %a@," (styled `Bold string) "URL" string (url v);
116116+ pf ppf "%a: %b@," (styled `Bold string) "Talk" (talk v);
117117+ (match paper v with
118118+ | Some p -> pf ppf "%a: %a@," (styled `Bold string) "Paper" string p
119119+ | None -> ());
120120+ (match project v with
121121+ | Some p -> pf ppf "%a: %a@," (styled `Bold string) "Project" string p
122122+ | None -> ());
123123+ let t = tags v in
124124+ if t <> [] then
125125+ pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
126126+ pf ppf "@,";
127127+ pf ppf "%a:@," (styled `Bold string) "Description";
128128+ pf ppf "%a@," string v.description;
129129+ pf ppf "@]"
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Eio-based directory scanner and file loader for Bushel entries *)
77+88+let src = Logs.Src.create "bushel.loader" ~doc:"Bushel loader"
99+module Log = (val Logs.src_log src : Logs.LOG)
1010+1111+(** List markdown files in a directory *)
1212+let list_md_files fs dir =
1313+ let path = Eio.Path.(fs / dir) in
1414+ try
1515+ Eio.Path.read_dir path
1616+ |> List.filter (fun f -> Filename.check_suffix f ".md")
1717+ |> List.map (fun f -> Filename.concat dir f)
1818+ with
1919+ | Eio.Io (Eio.Fs.E (Eio.Fs.Not_found _), _) ->
2020+ Log.warn (fun m -> m "Directory not found: %s" dir);
2121+ []
2222+2323+(** Load and map files from a directory *)
2424+let map_category fs base subdir parse_fn =
2525+ let dir = Filename.concat base ("data/" ^ subdir) in
2626+ Log.debug (fun m -> m "Loading %s" subdir);
2727+ let files = list_md_files fs dir in
2828+ List.filter_map (fun path ->
2929+ match Frontmatter_eio.of_file fs path with
3030+ | Ok fm ->
3131+ (match parse_fn fm with
3232+ | Ok entry -> Some entry
3333+ | Error e ->
3434+ Log.err (fun m -> m "Error parsing %s: %s" path e);
3535+ None)
3636+ | Error e ->
3737+ Log.err (fun m -> m "Error reading %s: %s" path e);
3838+ None
3939+ ) files
4040+4141+(** Load contacts from data/contacts/ *)
4242+let load_contacts fs base =
4343+ map_category fs base "contacts" (fun fm ->
4444+ let handle =
4545+ match Frontmatter.fname fm with
4646+ | Some fname -> Filename.basename fname |> Filename.chop_extension
4747+ | None -> ""
4848+ in
4949+ Bushel.Contact.of_frontmatter ~handle fm
5050+ )
5151+5252+(** Load projects from data/projects/ *)
5353+let load_projects fs base =
5454+ map_category fs base "projects" Bushel.Project.of_frontmatter
5555+5656+(** Load notes from data/notes/ and data/news/ *)
5757+let load_notes fs base =
5858+ let notes_dir = map_category fs base "notes" Bushel.Note.of_frontmatter in
5959+ let news_dir = map_category fs base "news" Bushel.Note.of_frontmatter in
6060+ notes_dir @ news_dir
6161+6262+(** Load ideas from data/ideas/ *)
6363+let load_ideas fs base =
6464+ map_category fs base "ideas" Bushel.Idea.of_frontmatter
6565+6666+(** Load videos from data/videos/ *)
6767+let load_videos fs base =
6868+ map_category fs base "videos" Bushel.Video.of_frontmatter
6969+7070+(** Load papers from data/papers/ (nested directory structure) *)
7171+let load_papers fs base =
7272+ let papers_dir = Filename.concat base "data/papers" in
7373+ Log.debug (fun m -> m "Loading papers from %s" papers_dir);
7474+ let path = Eio.Path.(fs / papers_dir) in
7575+ let slug_dirs =
7676+ try
7777+ Eio.Path.read_dir path
7878+ |> List.filter (fun slug ->
7979+ try
8080+ let stat = Eio.Path.stat ~follow:true Eio.Path.(fs / papers_dir / slug) in
8181+ stat.kind = `Directory
8282+ with _ -> false)
8383+ with _ -> []
8484+ in
8585+ let papers = List.concat_map (fun slug ->
8686+ let slug_path = Filename.concat papers_dir slug in
8787+ let ver_files =
8888+ try
8989+ Eio.Path.(read_dir (fs / slug_path))
9090+ |> List.filter (fun f -> Filename.check_suffix f ".md")
9191+ with _ -> []
9292+ in
9393+ List.filter_map (fun ver_file ->
9494+ let ver = Filename.chop_extension ver_file in
9595+ let file_path = Filename.concat slug_path ver_file in
9696+ match Frontmatter_eio.of_file fs file_path with
9797+ | Ok fm ->
9898+ (match Bushel.Paper.of_frontmatter ~slug ~ver fm with
9999+ | Ok paper -> Some paper
100100+ | Error e ->
101101+ Log.err (fun m -> m "Error parsing paper %s/%s: %s" slug ver e);
102102+ None)
103103+ | Error e ->
104104+ Log.err (fun m -> m "Error reading paper %s/%s: %s" slug ver e);
105105+ None
106106+ ) ver_files
107107+ ) slug_dirs in
108108+ Bushel.Paper.tv papers
109109+110110+(** Load all entries from a base directory *)
111111+let rec load fs base =
112112+ Log.info (fun m -> m "Loading bushel data from %s" base);
113113+ let contacts = load_contacts fs base in
114114+ Log.info (fun m -> m "Loaded %d contacts" (List.length contacts));
115115+ let projects = load_projects fs base in
116116+ Log.info (fun m -> m "Loaded %d projects" (List.length projects));
117117+ let notes = load_notes fs base in
118118+ Log.info (fun m -> m "Loaded %d notes" (List.length notes));
119119+ let ideas = load_ideas fs base in
120120+ Log.info (fun m -> m "Loaded %d ideas" (List.length ideas));
121121+ let videos = load_videos fs base in
122122+ Log.info (fun m -> m "Loaded %d videos" (List.length videos));
123123+ let papers = load_papers fs base in
124124+ Log.info (fun m -> m "Loaded %d papers" (List.length papers));
125125+ let data_dir = Filename.concat base "data" in
126126+ let entries = Bushel.Entry.v ~papers ~notes ~projects ~ideas ~videos ~contacts ~data_dir in
127127+ Log.info (fun m -> m "Building link graph");
128128+ let graph = build_link_graph entries in
129129+ Bushel.Link_graph.set_graph graph;
130130+ Log.info (fun m -> m "Load complete: %a" Bushel.Link_graph.pp graph);
131131+ entries
132132+133133+(** Build link graph from entries *)
134134+and build_link_graph entries =
135135+ let graph = Bushel.Link_graph.empty () in
136136+137137+ let add_internal_link source target target_type =
138138+ let link = { Bushel.Link_graph.source; target; target_type } in
139139+ graph.internal_links <- link :: graph.internal_links;
140140+ Bushel.Link_graph.add_to_set_hashtbl graph.outbound source target;
141141+ Bushel.Link_graph.add_to_set_hashtbl graph.backlinks target source
142142+ in
143143+144144+ let add_external_link source url =
145145+ let domain = Bushel.Util.extract_domain url in
146146+ let link = { Bushel.Link_graph.source; domain; url } in
147147+ graph.external_links <- link :: graph.external_links;
148148+ Bushel.Link_graph.add_to_set_hashtbl graph.external_by_entry source url;
149149+ Bushel.Link_graph.add_to_set_hashtbl graph.external_by_domain domain source
150150+ in
151151+152152+ (* Process each entry *)
153153+ List.iter (fun entry ->
154154+ let source_slug = Bushel.Entry.slug entry in
155155+ let md_content = Bushel.Entry.body entry in
156156+ let all_links = Bushel.Md.extract_all_links md_content in
157157+158158+ List.iter (fun link ->
159159+ if Bushel.Md.is_bushel_slug link then
160160+ let target_slug = Bushel.Md.strip_handle link in
161161+ (match Bushel.Entry.lookup entries target_slug with
162162+ | Some target_entry ->
163163+ let target_type = Bushel.Link_graph.entry_type_of_entry target_entry in
164164+ add_internal_link source_slug target_slug target_type
165165+ | None -> ())
166166+ else if Bushel.Md.is_contact_slug link then
167167+ let handle = Bushel.Md.strip_handle link in
168168+ (match Bushel.Contact.find_by_handle (Bushel.Entry.contacts entries) handle with
169169+ | Some c ->
170170+ add_internal_link source_slug (Bushel.Contact.handle c) `Contact
171171+ | None -> ())
172172+ else if Bushel.Md.is_tag_slug link || Bushel.Md.is_type_filter_slug link then
173173+ () (* Skip tag links *)
174174+ else if String.starts_with ~prefix:"http://" link ||
175175+ String.starts_with ~prefix:"https://" link then
176176+ add_external_link source_slug link
177177+ ) all_links
178178+ ) (Bushel.Entry.all_entries entries);
179179+180180+ (* Process slug_ent references from notes *)
181181+ List.iter (fun note ->
182182+ match Bushel.Note.slug_ent note with
183183+ | Some target_slug ->
184184+ let source_slug = Bushel.Note.slug note in
185185+ (match Bushel.Entry.lookup entries target_slug with
186186+ | Some target_entry ->
187187+ let target_type = Bushel.Link_graph.entry_type_of_entry target_entry in
188188+ add_internal_link source_slug target_slug target_type
189189+ | None -> ())
190190+ | None -> ()
191191+ ) (Bushel.Entry.notes entries);
192192+193193+ (* Process project references from papers *)
194194+ List.iter (fun paper ->
195195+ let source_slug = Bushel.Paper.slug paper in
196196+ List.iter (fun project_slug ->
197197+ match Bushel.Entry.lookup entries project_slug with
198198+ | Some (`Project _) ->
199199+ add_internal_link source_slug project_slug `Project
200200+ | _ -> ()
201201+ ) (Bushel.Paper.project_slugs paper)
202202+ ) (Bushel.Entry.papers entries);
203203+204204+ (* Deduplicate links *)
205205+ let module LinkSet = Set.Make(struct
206206+ type t = Bushel.Link_graph.internal_link
207207+ let compare (a : t) (b : t) =
208208+ match String.compare a.source b.source with
209209+ | 0 -> String.compare a.target b.target
210210+ | n -> n
211211+ end) in
212212+213213+ let module ExtLinkSet = Set.Make(struct
214214+ type t = Bushel.Link_graph.external_link
215215+ let compare (a : t) (b : t) =
216216+ match String.compare a.source b.source with
217217+ | 0 -> String.compare a.url b.url
218218+ | n -> n
219219+ end) in
220220+221221+ graph.internal_links <- LinkSet.elements (LinkSet.of_list graph.internal_links);
222222+ graph.external_links <- ExtLinkSet.elements (ExtLinkSet.of_list graph.external_links);
223223+224224+ graph
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Simple HTTP client using curl via Eio.Process *)
77+88+let src = Logs.Src.create "bushel.http" ~doc:"HTTP client"
99+module Log = (val Logs.src_log src : Logs.LOG)
1010+1111+(** Run curl and capture stdout *)
1212+let get ~proc_mgr url =
1313+ Log.debug (fun m -> m "GET %s" url);
1414+ let stdout = Buffer.create 4096 in
1515+ try
1616+ Eio.Process.run proc_mgr
1717+ ~stdout:(Eio.Flow.buffer_sink stdout)
1818+ ["curl"; "-s"; "-L"; url];
1919+ Ok (Buffer.contents stdout)
2020+ with e ->
2121+ Error (Printf.sprintf "curl failed: %s" (Printexc.to_string e))
2222+2323+let get_with_header ~proc_mgr ~header url =
2424+ Log.debug (fun m -> m "GET %s (with header)" url);
2525+ let stdout = Buffer.create 4096 in
2626+ try
2727+ Eio.Process.run proc_mgr
2828+ ~stdout:(Eio.Flow.buffer_sink stdout)
2929+ ["curl"; "-s"; "-L"; "-H"; header; url];
3030+ Ok (Buffer.contents stdout)
3131+ with e ->
3232+ Error (Printf.sprintf "curl failed: %s" (Printexc.to_string e))
3333+3434+let post ~proc_mgr ~content_type ~body url =
3535+ Log.debug (fun m -> m "POST %s" url);
3636+ let stdout = Buffer.create 4096 in
3737+ try
3838+ Eio.Process.run proc_mgr
3939+ ~stdout:(Eio.Flow.buffer_sink stdout)
4040+ ["curl"; "-s"; "-L";
4141+ "-X"; "POST";
4242+ "-H"; "Content-Type: " ^ content_type;
4343+ "-d"; body;
4444+ url];
4545+ Ok (Buffer.contents stdout)
4646+ with e ->
4747+ Error (Printf.sprintf "curl failed: %s" (Printexc.to_string e))
+130
lib_sync/bushel_immich.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Immich API client for contact face thumbnails *)
77+88+let src = Logs.Src.create "bushel.immich" ~doc:"Immich face thumbnails"
99+module Log = (val Logs.src_log src : Logs.LOG)
1010+1111+(** {1 Types} *)
1212+1313+type person = {
1414+ id : string;
1515+ name : string;
1616+ thumbnail_path : string option;
1717+}
1818+1919+type fetch_result =
2020+ | Ok of string (** Saved to path *)
2121+ | Skipped of string (** Already exists *)
2222+ | NotFound of string (** No match found *)
2323+ | Error of string (** Error message *)
2424+2525+(** {1 Jsont Codecs} *)
2626+2727+let person_jsont : person Jsont.t =
2828+ let open Jsont in
2929+ let open Object in
3030+ map ~kind:"person" (fun id name thumbnail_path -> { id; name; thumbnail_path })
3131+ |> mem "id" string ~enc:(fun p -> p.id)
3232+ |> mem "name" string ~enc:(fun p -> p.name)
3333+ |> mem "thumbnailPath" (some string) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun p -> p.thumbnail_path)
3434+ |> finish
3535+3636+let people_jsont = Jsont.list person_jsont
3737+3838+let decode_people json_str =
3939+ match Jsont_bytesrw.decode_string people_jsont json_str with
4040+ | Ok people -> Result.Ok people
4141+ | Error e -> Result.Error e
4242+4343+(** {1 Immich API} *)
4444+4545+let search_person ~proc_mgr ~endpoint ~api_key name =
4646+ let encoded_name = Uri.pct_encode name in
4747+ let url = Printf.sprintf "%s/api/search/person?name=%s" endpoint encoded_name in
4848+ let header = "X-Api-Key: " ^ api_key in
4949+5050+ match Bushel_http.get_with_header ~proc_mgr ~header url with
5151+ | Result.Error e -> Result.Error e
5252+ | Result.Ok body -> decode_people body
5353+5454+let download_thumbnail ~proc_mgr ~endpoint ~api_key person_id output_path =
5555+ let url = Printf.sprintf "%s/api/people/%s/thumbnail" endpoint person_id in
5656+ let header = "X-Api-Key: " ^ api_key in
5757+5858+ match Bushel_http.get_with_header ~proc_mgr ~header url with
5959+ | Result.Error e -> Result.Error e
6060+ | Result.Ok body ->
6161+ try
6262+ (* Ensure output directory exists *)
6363+ let dir = Filename.dirname output_path in
6464+ if not (Sys.file_exists dir) then
6565+ Unix.mkdir dir 0o755;
6666+ let oc = open_out_bin output_path in
6767+ output_string oc body;
6868+ close_out oc;
6969+ Result.Ok output_path
7070+ with e ->
7171+ Result.Error (Printf.sprintf "Failed to write file: %s" (Printexc.to_string e))
7272+7373+(** {1 Contact Face Fetching} *)
7474+7575+let fetch_face_for_contact ~proc_mgr ~endpoint ~api_key ~output_dir contact =
7676+ let names = Bushel.Contact.names contact in
7777+ let handle = Bushel.Contact.handle contact in
7878+ let output_path = Filename.concat output_dir (handle ^ ".jpg") in
7979+8080+ (* Skip if already exists *)
8181+ if Sys.file_exists output_path then begin
8282+ Log.debug (fun m -> m "Skipping %s: thumbnail already exists" handle);
8383+ Skipped output_path
8484+ end else begin
8585+ Log.info (fun m -> m "Fetching face for contact: %s" handle);
8686+8787+ (* Try each name until we find a match *)
8888+ let rec try_names = function
8989+ | [] ->
9090+ Log.warn (fun m -> m "No person found for contact %s" handle);
9191+ NotFound handle
9292+ | name :: rest ->
9393+ Log.debug (fun m -> m "Trying name: %s" name);
9494+ match search_person ~proc_mgr ~endpoint ~api_key name with
9595+ | Result.Error e ->
9696+ Log.err (fun m -> m "Search error for %s: %s" name e);
9797+ Error e
9898+ | Result.Ok [] ->
9999+ Log.debug (fun m -> m "No results for %s, trying next name" name);
100100+ try_names rest
101101+ | Result.Ok (person :: _) ->
102102+ Log.info (fun m -> m "Found match for %s: %s" name person.name);
103103+ match download_thumbnail ~proc_mgr ~endpoint ~api_key person.id output_path with
104104+ | Result.Ok path -> Ok path
105105+ | Result.Error e -> Error e
106106+ in
107107+ try_names names
108108+ end
109109+110110+let fetch_all_faces ~proc_mgr ~endpoint ~api_key ~output_dir contacts =
111111+ (* Ensure output directory exists *)
112112+ if not (Sys.file_exists output_dir) then
113113+ Unix.mkdir output_dir 0o755;
114114+115115+ let results = List.map (fun contact ->
116116+ let handle = Bushel.Contact.handle contact in
117117+ let result = fetch_face_for_contact ~proc_mgr ~endpoint ~api_key ~output_dir contact in
118118+ (handle, result)
119119+ ) contacts in
120120+121121+ (* Summary *)
122122+ let ok_count = List.length (List.filter (fun (_, r) -> match r with Ok _ -> true | _ -> false) results) in
123123+ let skipped_count = List.length (List.filter (fun (_, r) -> match r with Skipped _ -> true | _ -> false) results) in
124124+ let not_found_count = List.length (List.filter (fun (_, r) -> match r with NotFound _ -> true | _ -> false) results) in
125125+ let error_count = List.length (List.filter (fun (_, r) -> match r with Error _ -> true | _ -> false) results) in
126126+127127+ Log.info (fun m -> m "Face sync complete: %d ok, %d skipped, %d not found, %d errors"
128128+ ok_count skipped_count not_found_count error_count);
129129+130130+ results
+246
lib_sync/bushel_peertube.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** PeerTube API client for video metadata and thumbnails *)
77+88+let src = Logs.Src.create "bushel.peertube" ~doc:"PeerTube video sync"
99+module Log = (val Logs.src_log src : Logs.LOG)
1010+1111+(** {1 Types} *)
1212+1313+type video = {
1414+ id : int;
1515+ uuid : string;
1616+ name : string;
1717+ description : string option;
1818+ url : string;
1919+ embed_path : string;
2020+ published_at : Ptime.t;
2121+ originally_published_at : Ptime.t option;
2222+ thumbnail_path : string option;
2323+ tags : string list;
2424+}
2525+2626+type fetch_result =
2727+ | Ok of string
2828+ | Skipped of string
2929+ | Error of string
3030+3131+(** {1 Date Parsing} *)
3232+3333+let parse_date str =
3434+ match Ptime.of_rfc3339 str with
3535+ | Ok (date, _, _) -> date
3636+ | Error _ ->
3737+ Log.warn (fun m -> m "Could not parse date: %s" str);
3838+ Ptime.epoch
3939+4040+(** {1 Jsont Codecs} *)
4141+4242+let ptime_jsont =
4343+ Jsont.string |> Jsont.map ~dec:parse_date ~enc:(fun t ->
4444+ match Ptime.to_rfc3339 ~frac_s:0 t with
4545+ | s -> s)
4646+4747+let make_video ~id ~uuid ~name ~description ~url ~embed_path
4848+ ~published_at ~originally_published_at ~thumbnail_path ~tags =
4949+ { id; uuid; name; description; url; embed_path;
5050+ published_at; originally_published_at; thumbnail_path; tags }
5151+5252+let video_jsont : video Jsont.t =
5353+ let open Jsont in
5454+ let open Object in
5555+ map ~kind:"video" (fun id uuid name description url embed_path
5656+ published_at originally_published_at thumbnail_path tags ->
5757+ make_video ~id ~uuid ~name ~description ~url ~embed_path
5858+ ~published_at ~originally_published_at ~thumbnail_path ~tags)
5959+ |> mem "id" int ~enc:(fun v -> v.id)
6060+ |> mem "uuid" string ~enc:(fun v -> v.uuid)
6161+ |> mem "name" string ~enc:(fun v -> v.name)
6262+ |> mem "description" (some string) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun v -> v.description)
6363+ |> mem "url" string ~enc:(fun v -> v.url)
6464+ |> mem "embedPath" string ~enc:(fun v -> v.embed_path)
6565+ |> mem "publishedAt" ptime_jsont ~enc:(fun v -> v.published_at)
6666+ |> mem "originallyPublishedAt" (some ptime_jsont) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun v -> v.originally_published_at)
6767+ |> mem "thumbnailPath" (some string) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun v -> v.thumbnail_path)
6868+ |> mem "tags" (list string) ~dec_absent:[] ~enc:(fun v -> v.tags)
6969+ |> finish
7070+7171+type channel_response = {
7272+ total : int;
7373+ data : video list;
7474+}
7575+7676+let channel_response_jsont : channel_response Jsont.t =
7777+ let open Jsont in
7878+ let open Object in
7979+ map ~kind:"channel_response" (fun total data -> { total; data })
8080+ |> mem "total" int ~enc:(fun r -> r.total)
8181+ |> mem "data" (list video_jsont) ~enc:(fun r -> r.data)
8282+ |> finish
8383+8484+(** {1 JSON decoding helpers} *)
8585+8686+let decode_video json_str =
8787+ match Jsont_bytesrw.decode_string video_jsont json_str with
8888+ | Ok v -> Result.Ok v
8989+ | Error e -> Result.Error e
9090+9191+let decode_channel_response json_str =
9292+ match Jsont_bytesrw.decode_string channel_response_jsont json_str with
9393+ | Ok r -> Result.Ok r
9494+ | Error e -> Result.Error e
9595+9696+(** {1 PeerTube API} *)
9797+9898+let fetch_video_details ~proc_mgr ~endpoint uuid =
9999+ let url = Printf.sprintf "%s/api/v1/videos/%s" endpoint uuid in
100100+ match Bushel_http.get ~proc_mgr url with
101101+ | Result.Error e -> Result.Error e
102102+ | Result.Ok body -> decode_video body
103103+104104+let fetch_channel_videos ~proc_mgr ~endpoint ~channel ?(count=20) ?(start=0) () =
105105+ let url = Printf.sprintf "%s/api/v1/video-channels/%s/videos?count=%d&start=%d"
106106+ endpoint channel count start in
107107+ match Bushel_http.get ~proc_mgr url with
108108+ | Result.Error _ -> (0, [])
109109+ | Result.Ok body ->
110110+ match decode_channel_response body with
111111+ | Result.Ok r -> (r.total, r.data)
112112+ | Result.Error _ -> (0, [])
113113+114114+let fetch_all_channel_videos ~proc_mgr ~endpoint ~channel ?(page_size=20) () =
115115+ let rec fetch_pages start acc =
116116+ let (total, videos) = fetch_channel_videos ~proc_mgr ~endpoint ~channel ~count:page_size ~start () in
117117+ let all = acc @ videos in
118118+ let fetched = start + List.length videos in
119119+ if fetched < total && List.length videos > 0 then
120120+ fetch_pages fetched all
121121+ else
122122+ all
123123+ in
124124+ fetch_pages 0 []
125125+126126+(** {1 Thumbnail Download} *)
127127+128128+let thumbnail_url endpoint video =
129129+ match video.thumbnail_path with
130130+ | Some path -> Some (endpoint ^ path)
131131+ | None -> None
132132+133133+let download_thumbnail ~proc_mgr ~endpoint video output_path =
134134+ match thumbnail_url endpoint video with
135135+ | None ->
136136+ Log.warn (fun m -> m "No thumbnail for video %s" video.uuid);
137137+ Error "No thumbnail available"
138138+ | Some url ->
139139+ match Bushel_http.get ~proc_mgr url with
140140+ | Result.Error e -> Error e
141141+ | Result.Ok body ->
142142+ try
143143+ let dir = Filename.dirname output_path in
144144+ if not (Sys.file_exists dir) then Unix.mkdir dir 0o755;
145145+ let oc = open_out_bin output_path in
146146+ output_string oc body;
147147+ close_out oc;
148148+ Ok output_path
149149+ with e ->
150150+ Error (Printf.sprintf "Failed to write: %s" (Printexc.to_string e))
151151+152152+(** {1 Videos Index (YAML)} *)
153153+154154+module VideoIndex = struct
155155+ (** Mapping of UUID -> server name *)
156156+ type t = (string, string) Hashtbl.t
157157+158158+ let empty () = Hashtbl.create 64
159159+160160+ let load_file path =
161161+ let index = empty () in
162162+ if Sys.file_exists path then begin
163163+ try
164164+ let ic = open_in path in
165165+ let rec read_lines () =
166166+ match input_line ic with
167167+ | line ->
168168+ (match Astring.String.cut ~sep:":" line with
169169+ | Some (uuid, server) ->
170170+ Hashtbl.add index (String.trim uuid) (String.trim server)
171171+ | None -> ());
172172+ read_lines ()
173173+ | exception End_of_file -> close_in ic
174174+ in
175175+ read_lines ()
176176+ with _ -> ()
177177+ end;
178178+ index
179179+180180+ let save_file path index =
181181+ let oc = open_out path in
182182+ output_string oc "# UUID -> PeerTube server name mapping\n";
183183+ Hashtbl.iter (fun uuid server ->
184184+ output_string oc (Printf.sprintf "%s: %s\n" uuid server)
185185+ ) index;
186186+ close_out oc
187187+188188+ let add index ~uuid ~server =
189189+ Hashtbl.replace index uuid server
190190+191191+ let find index uuid =
192192+ Hashtbl.find_opt index uuid
193193+194194+ let mem index uuid =
195195+ Hashtbl.mem index uuid
196196+197197+ let to_list index =
198198+ Hashtbl.fold (fun k v acc -> (k, v) :: acc) index []
199199+end
200200+201201+(** {1 Fetch Thumbnails from Index} *)
202202+203203+let fetch_thumbnails_from_index ~proc_mgr ~servers ~output_dir index =
204204+ (* Ensure output dir exists *)
205205+ if not (Sys.file_exists output_dir) then
206206+ Unix.mkdir output_dir 0o755;
207207+208208+ let server_map =
209209+ List.fold_left (fun acc (s : Bushel_config.peertube_server) ->
210210+ (s.name, s.endpoint) :: acc
211211+ ) [] servers
212212+ in
213213+214214+ let results = List.filter_map (fun (uuid, server_name) ->
215215+ let output_path = Filename.concat output_dir (uuid ^ ".jpg") in
216216+217217+ (* Skip if exists *)
218218+ if Sys.file_exists output_path then begin
219219+ Log.debug (fun m -> m "Skipping %s: thumbnail exists" uuid);
220220+ Some (uuid, Skipped output_path)
221221+ end else begin
222222+ match List.assoc_opt server_name server_map with
223223+ | None ->
224224+ Log.warn (fun m -> m "Unknown server %s for video %s" server_name uuid);
225225+ Some (uuid, Error (Printf.sprintf "Unknown server: %s" server_name))
226226+ | Some endpoint ->
227227+ Log.info (fun m -> m "Fetching thumbnail for %s from %s" uuid server_name);
228228+ match fetch_video_details ~proc_mgr ~endpoint uuid with
229229+ | Result.Error e ->
230230+ Some (uuid, Error e)
231231+ | Result.Ok video ->
232232+ match download_thumbnail ~proc_mgr ~endpoint video output_path with
233233+ | Ok path -> Some (uuid, Ok path)
234234+ | Skipped path -> Some (uuid, Skipped path)
235235+ | Error e -> Some (uuid, Error e)
236236+ end
237237+ ) (VideoIndex.to_list index) in
238238+239239+ let ok_count = List.length (List.filter (fun (_, r) -> match r with Ok _ -> true | _ -> false) results) in
240240+ let skipped_count = List.length (List.filter (fun (_, r) -> match r with Skipped _ -> true | _ -> false) results) in
241241+ let error_count = List.length (List.filter (fun (_, r) -> match r with Error _ -> true | _ -> false) results) in
242242+243243+ Log.info (fun m -> m "Video thumbnails: %d ok, %d skipped, %d errors"
244244+ ok_count skipped_count error_count);
245245+246246+ results
+299
lib_sync/bushel_sync.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Bushel sync orchestration
77+88+ {1 Re-exported Modules}
99+1010+ - {!Zotero} - DOI resolution via Zotero Translation Server
1111+ - {!Immich} - Contact face thumbnails from Immich
1212+ - {!Peertube} - Video thumbnails from PeerTube
1313+ - {!Http} - Simple HTTP client using curl
1414+*)
1515+1616+(** DOI resolution via Zotero Translation Server *)
1717+module Zotero = Bushel_zotero
1818+1919+(** Contact face thumbnails from Immich *)
2020+module Immich = Bushel_immich
2121+2222+(** Video metadata and thumbnails from PeerTube *)
2323+module Peertube = Bushel_peertube
2424+2525+(** Simple HTTP client using curl via Eio.Process *)
2626+module Http = Bushel_http
2727+2828+let src = Logs.Src.create "bushel.sync" ~doc:"Bushel sync pipeline"
2929+module Log = (val Logs.src_log src : Logs.LOG)
3030+3131+(** {1 Sync Steps} *)
3232+3333+type step =
3434+ | Images (** Rsync images from remote *)
3535+ | Srcsetter (** Run srcsetter on images *)
3636+ | Thumbs (** Generate paper thumbnails from PDFs *)
3737+ | Faces (** Fetch contact faces from Immich *)
3838+ | Videos (** Fetch video thumbnails from PeerTube *)
3939+ | Typesense (** Upload to Typesense *)
4040+4141+let string_of_step = function
4242+ | Images -> "images"
4343+ | Srcsetter -> "srcsetter"
4444+ | Thumbs -> "thumbs"
4545+ | Faces -> "faces"
4646+ | Videos -> "videos"
4747+ | Typesense -> "typesense"
4848+4949+let step_of_string = function
5050+ | "images" -> Some Images
5151+ | "srcsetter" -> Some Srcsetter
5252+ | "thumbs" -> Some Thumbs
5353+ | "faces" -> Some Faces
5454+ | "videos" -> Some Videos
5555+ | "typesense" -> Some Typesense
5656+ | _ -> None
5757+5858+let all_steps = [Images; Srcsetter; Thumbs; Faces; Videos]
5959+let all_steps_with_remote = all_steps @ [Typesense]
6060+6161+(** {1 Step Results} *)
6262+6363+type step_result = {
6464+ step : step;
6565+ success : bool;
6666+ message : string;
6767+ details : string list;
6868+}
6969+7070+let pp_result ppf r =
7171+ let status = if r.success then "OK" else "FAILED" in
7272+ Fmt.pf ppf "[%s] %s: %s" status (string_of_step r.step) r.message;
7373+ if r.details <> [] then begin
7474+ Fmt.pf ppf "@,";
7575+ List.iter (fun d -> Fmt.pf ppf " - %s@," d) r.details
7676+ end
7777+7878+(** {1 Rsync Images} *)
7979+8080+let sync_images ~proc_mgr config =
8181+ Log.info (fun m -> m "Syncing images from remote...");
8282+ let cmd = Bushel_config.rsync_command config in
8383+ Log.debug (fun m -> m "Running: %s" cmd);
8484+8585+ (* Ensure local directory exists *)
8686+ let local_dir = config.Bushel_config.local_source_dir in
8787+ if not (Sys.file_exists local_dir) then begin
8888+ Log.info (fun m -> m "Creating directory: %s" local_dir);
8989+ Unix.mkdir local_dir 0o755
9090+ end;
9191+9292+ try
9393+ let args = ["rsync"; "-avz";
9494+ Bushel_config.rsync_source config ^ "/";
9595+ local_dir ^ "/"] in
9696+ Eio.Process.run proc_mgr args;
9797+ { step = Images; success = true;
9898+ message = "Images synced from remote";
9999+ details = [] }
100100+ with e ->
101101+ { step = Images; success = false;
102102+ message = Printf.sprintf "Rsync failed: %s" (Printexc.to_string e);
103103+ details = [] }
104104+105105+(** {1 Srcsetter} *)
106106+107107+let run_srcsetter ~proc_mgr config =
108108+ Log.info (fun m -> m "Running srcsetter...");
109109+ let src_dir = config.Bushel_config.local_source_dir in
110110+ let dst_dir = config.Bushel_config.local_output_dir in
111111+112112+ (* Ensure output directory exists *)
113113+ if not (Sys.file_exists dst_dir) then begin
114114+ Log.info (fun m -> m "Creating directory: %s" dst_dir);
115115+ Unix.mkdir dst_dir 0o755
116116+ end;
117117+118118+ try
119119+ let args = ["srcsetter"; src_dir; dst_dir] in
120120+ Eio.Process.run proc_mgr args;
121121+ { step = Srcsetter; success = true;
122122+ message = "Srcsetter completed";
123123+ details = [] }
124124+ with e ->
125125+ { step = Srcsetter; success = false;
126126+ message = Printf.sprintf "Srcsetter failed: %s" (Printexc.to_string e);
127127+ details = [] }
128128+129129+(** {1 Paper Thumbnails} *)
130130+131131+let generate_paper_thumbnails ~proc_mgr config =
132132+ Log.info (fun m -> m "Generating paper thumbnails...");
133133+ let pdfs_dir = config.Bushel_config.paper_pdfs_dir in
134134+ let output_dir = Bushel_config.paper_thumbs_dir config in
135135+136136+ if not (Sys.file_exists pdfs_dir) then begin
137137+ Log.warn (fun m -> m "PDFs directory does not exist: %s" pdfs_dir);
138138+ { step = Thumbs; success = true;
139139+ message = "No PDFs directory";
140140+ details = [] }
141141+ end else begin
142142+ (* Ensure output directory exists *)
143143+ if not (Sys.file_exists output_dir) then
144144+ Unix.mkdir output_dir 0o755;
145145+146146+ let pdfs = Sys.readdir pdfs_dir |> Array.to_list
147147+ |> List.filter (fun f -> Filename.check_suffix f ".pdf") in
148148+149149+ let results = List.map (fun pdf_file ->
150150+ let slug = Filename.chop_extension pdf_file in
151151+ let pdf_path = Filename.concat pdfs_dir pdf_file in
152152+ let output_path = Filename.concat output_dir (slug ^ ".webp") in
153153+154154+ if Sys.file_exists output_path then begin
155155+ Log.debug (fun m -> m "Skipping %s: thumbnail exists" slug);
156156+ `Skipped slug
157157+ end else begin
158158+ Log.info (fun m -> m "Generating thumbnail for %s" slug);
159159+ try
160160+ (* ImageMagick command: render PDF at 600 DPI, crop top 50%, resize to 2048px *)
161161+ let args = [
162162+ "magick";
163163+ "-density"; "600";
164164+ "-quality"; "100";
165165+ pdf_path ^ "[0]"; (* First page only *)
166166+ "-gravity"; "North";
167167+ "-crop"; "100%x50%+0+0";
168168+ "-resize"; "2048x";
169169+ output_path
170170+ ] in
171171+ Eio.Process.run proc_mgr args;
172172+ `Ok slug
173173+ with e ->
174174+ Log.err (fun m -> m "Failed to generate thumbnail for %s: %s"
175175+ slug (Printexc.to_string e));
176176+ `Error slug
177177+ end
178178+ ) pdfs in
179179+180180+ let ok_count = List.fold_left (fun acc r -> match r with `Ok _ -> acc + 1 | _ -> acc) 0 results in
181181+ let skipped_count = List.fold_left (fun acc r -> match r with `Skipped _ -> acc + 1 | _ -> acc) 0 results in
182182+ let error_count = List.fold_left (fun acc r -> match r with `Error _ -> acc + 1 | _ -> acc) 0 results in
183183+184184+ { step = Thumbs; success = error_count = 0;
185185+ message = Printf.sprintf "%d generated, %d skipped, %d errors"
186186+ ok_count skipped_count error_count;
187187+ details = List.filter_map (fun r -> match r with `Error s -> Some s | _ -> None) results }
188188+ end
189189+190190+(** {1 Contact Faces} *)
191191+192192+let sync_faces ~proc_mgr config entries =
193193+ Log.info (fun m -> m "Syncing contact faces from Immich...");
194194+ let output_dir = Bushel_config.contact_faces_dir config in
195195+196196+ match Bushel_config.immich_api_key config with
197197+ | Error e ->
198198+ Log.warn (fun m -> m "Cannot read Immich API key: %s" e);
199199+ { step = Faces; success = false;
200200+ message = "Missing Immich API key";
201201+ details = [e] }
202202+ | Ok api_key ->
203203+ let contacts = Bushel.Entry.contacts entries in
204204+ let results = Bushel_immich.fetch_all_faces
205205+ ~proc_mgr
206206+ ~endpoint:config.immich_endpoint
207207+ ~api_key
208208+ ~output_dir
209209+ contacts in
210210+211211+ let ok_count = List.length (List.filter (fun (_, r) ->
212212+ match r with Bushel_immich.Ok _ -> true | _ -> false) results) in
213213+ let skipped_count = List.length (List.filter (fun (_, r) ->
214214+ match r with Bushel_immich.Skipped _ -> true | _ -> false) results) in
215215+ let error_count = List.length (List.filter (fun (_, r) ->
216216+ match r with Bushel_immich.Error _ -> true | _ -> false) results) in
217217+218218+ { step = Faces; success = true;
219219+ message = Printf.sprintf "%d fetched, %d skipped, %d errors"
220220+ ok_count skipped_count error_count;
221221+ details = List.filter_map (fun (h, r) ->
222222+ match r with Bushel_immich.Error e -> Some (h ^ ": " ^ e) | _ -> None
223223+ ) results }
224224+225225+(** {1 Video Thumbnails} *)
226226+227227+let sync_video_thumbnails ~proc_mgr config =
228228+ Log.info (fun m -> m "Syncing video thumbnails from PeerTube...");
229229+ let output_dir = Bushel_config.video_thumbs_dir config in
230230+ let videos_yml = Filename.concat config.data_dir "videos.yml" in
231231+232232+ let index = Bushel_peertube.VideoIndex.load_file videos_yml in
233233+ let count = List.length (Bushel_peertube.VideoIndex.to_list index) in
234234+235235+ if count = 0 then begin
236236+ Log.info (fun m -> m "No videos in index");
237237+ { step = Videos; success = true;
238238+ message = "No videos in index";
239239+ details = [] }
240240+ end else begin
241241+ let results = Bushel_peertube.fetch_thumbnails_from_index
242242+ ~proc_mgr
243243+ ~servers:config.peertube_servers
244244+ ~output_dir
245245+ index in
246246+247247+ let ok_count = List.length (List.filter (fun (_, r) ->
248248+ match r with Bushel_peertube.Ok _ -> true | _ -> false) results) in
249249+ let skipped_count = List.length (List.filter (fun (_, r) ->
250250+ match r with Bushel_peertube.Skipped _ -> true | _ -> false) results) in
251251+ let error_count = List.length (List.filter (fun (_, r) ->
252252+ match r with Bushel_peertube.Error _ -> true | _ -> false) results) in
253253+254254+ { step = Videos; success = true;
255255+ message = Printf.sprintf "%d fetched, %d skipped, %d errors"
256256+ ok_count skipped_count error_count;
257257+ details = List.filter_map (fun (uuid, r) ->
258258+ match r with Bushel_peertube.Error e -> Some (uuid ^ ": " ^ e) | _ -> None
259259+ ) results }
260260+ end
261261+262262+(** {1 Typesense Upload} *)
263263+264264+let upload_typesense config _entries =
265265+ Log.info (fun m -> m "Uploading to Typesense...");
266266+267267+ match Bushel_config.typesense_api_key config with
268268+ | Error e ->
269269+ { step = Typesense; success = false;
270270+ message = "Missing Typesense API key";
271271+ details = [e] }
272272+ | Ok _api_key ->
273273+ (* TODO: Implement actual Typesense upload using bushel-typesense *)
274274+ { step = Typesense; success = true;
275275+ message = "Typesense upload (not yet implemented)";
276276+ details = [] }
277277+278278+(** {1 Run Pipeline} *)
279279+280280+let run ~env ~config ~steps ~entries =
281281+ let proc_mgr = Eio.Stdenv.process_mgr env in
282282+283283+ let results = List.map (fun step ->
284284+ Log.info (fun m -> m "Running step: %s" (string_of_step step));
285285+ match step with
286286+ | Images -> sync_images ~proc_mgr config
287287+ | Srcsetter -> run_srcsetter ~proc_mgr config
288288+ | Thumbs -> generate_paper_thumbnails ~proc_mgr config
289289+ | Faces -> sync_faces ~proc_mgr config entries
290290+ | Videos -> sync_video_thumbnails ~proc_mgr config
291291+ | Typesense -> upload_typesense config entries
292292+ ) steps in
293293+294294+ (* Summary *)
295295+ let success_count = List.length (List.filter (fun r -> r.success) results) in
296296+ let total = List.length results in
297297+ Log.info (fun m -> m "Sync complete: %d/%d steps succeeded" success_count total);
298298+299299+ results
+330
lib_sync/bushel_zotero.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Zotero Translation Server client for DOI resolution *)
77+88+let src = Logs.Src.create "bushel.zotero" ~doc:"Zotero DOI resolution"
99+module Log = (val Logs.src_log src : Logs.LOG)
1010+1111+(** {1 Types} *)
1212+1313+type paper_metadata = {
1414+ title : string;
1515+ authors : string list;
1616+ year : int;
1717+ month : int;
1818+ bibtype : string;
1919+ publisher : string;
2020+ booktitle : string;
2121+ journal : string;
2222+ institution : string;
2323+ pages : string;
2424+ volume : string option;
2525+ number : string option;
2626+ doi : string option;
2727+ url : string option;
2828+ abstract : string option;
2929+ bib : string;
3030+}
3131+3232+(** {1 Month Parsing} *)
3333+3434+let month_of_string s =
3535+ match String.lowercase_ascii s with
3636+ | "jan" | "january" -> 1
3737+ | "feb" | "february" -> 2
3838+ | "mar" | "march" -> 3
3939+ | "apr" | "april" -> 4
4040+ | "may" -> 5
4141+ | "jun" | "june" -> 6
4242+ | "jul" | "july" -> 7
4343+ | "aug" | "august" -> 8
4444+ | "sep" | "september" -> 9
4545+ | "oct" | "october" -> 10
4646+ | "nov" | "november" -> 11
4747+ | "dec" | "december" -> 12
4848+ | _ -> 1
4949+5050+let string_of_month = function
5151+ | 1 -> "jan" | 2 -> "feb" | 3 -> "mar" | 4 -> "apr"
5252+ | 5 -> "may" | 6 -> "jun" | 7 -> "jul" | 8 -> "aug"
5353+ | 9 -> "sep" | 10 -> "oct" | 11 -> "nov" | 12 -> "dec"
5454+ | _ -> "jan"
5555+5656+(** {1 JSON Helpers for Zotero JSON}
5757+5858+ Zotero returns complex JSON with varying structure.
5959+ We use pattern matching on the generic json type. *)
6060+6161+type creator = {
6262+ first_name : string;
6363+ last_name : string;
6464+}
6565+6666+let creator_jsont : creator Jsont.t =
6767+ let open Jsont in
6868+ let open Object in
6969+ map ~kind:"creator" (fun first_name last_name -> { first_name; last_name })
7070+ |> mem "firstName" string ~dec_absent:"" ~enc:(fun c -> c.first_name)
7171+ |> mem "lastName" string ~dec_absent:"" ~enc:(fun c -> c.last_name)
7272+ |> finish
7373+7474+(** Extract string from generic JSON, returning None if missing or wrong type *)
7575+let rec find_in_json json path =
7676+ match path with
7777+ | [] -> Some json
7878+ | key :: path_rest ->
7979+ match json with
8080+ | Jsont.Object (mems, _) ->
8181+ let rec find_mem = function
8282+ | [] -> None
8383+ | ((name, _), value) :: mems_rest ->
8484+ if name = key then find_in_json value path_rest
8585+ else find_mem mems_rest
8686+ in
8787+ find_mem mems
8888+ | _ -> None
8989+9090+let get_string json path =
9191+ match find_in_json json path with
9292+ | Some (Jsont.String (s, _)) -> Some s
9393+ | _ -> None
9494+9595+let get_string_exn json path ~default =
9696+ get_string json path |> Option.value ~default
9797+9898+let get_int json path ~default =
9999+ match find_in_json json path with
100100+ | Some (Jsont.Number (f, _)) -> int_of_float f
101101+ | Some (Jsont.String (s, _)) -> (try int_of_string s with _ -> default)
102102+ | _ -> default
103103+104104+let get_creators json =
105105+ match find_in_json json ["creators"] with
106106+ | Some (Jsont.Array (items, _)) ->
107107+ List.filter_map (fun item ->
108108+ match Jsont.Json.decode creator_jsont item with
109109+ | Ok c -> Some c
110110+ | Error _ -> None
111111+ ) items
112112+ | _ -> []
113113+114114+(** {1 BibTeX Parsing} *)
115115+116116+(** Simple BibTeX field extraction *)
117117+let extract_bibtex_field bib field =
118118+ let pattern = Printf.sprintf "%s\\s*=\\s*[{\"](.*?)[}\"]" field in
119119+ try
120120+ let re = Re.Pcre.regexp ~flags:[`CASELESS] pattern in
121121+ let groups = Re.exec re bib in
122122+ Some (Re.Group.get groups 1)
123123+ with _ -> None
124124+125125+let extract_bibtex_type bib =
126126+ try
127127+ let re = Re.Pcre.regexp "@(\\w+)\\s*\\{" in
128128+ let groups = Re.exec re bib in
129129+ String.lowercase_ascii (Re.Group.get groups 1)
130130+ with _ -> "misc"
131131+132132+(** {1 Author Parsing} *)
133133+134134+(** Split "Last, First and Last2, First2" into list of names *)
135135+let parse_authors author_str =
136136+ let parts = String.split_on_char '&' author_str in
137137+ let parts = List.concat_map (fun s ->
138138+ Astring.String.cuts ~empty:false ~sep:" and " s
139139+ ) parts in
140140+ List.map (fun name ->
141141+ let name = String.trim name in
142142+ (* Handle "Last, First" format *)
143143+ match Astring.String.cut ~sep:"," name with
144144+ | Some (last, first) ->
145145+ Printf.sprintf "%s %s" (String.trim first) (String.trim last)
146146+ | None -> name
147147+ ) parts
148148+149149+(** {1 Zotero Translation Server API} *)
150150+151151+let web_endpoint base_url =
152152+ if String.ends_with ~suffix:"/" base_url then base_url ^ "web"
153153+ else base_url ^ "/web"
154154+155155+let export_endpoint base_url =
156156+ if String.ends_with ~suffix:"/" base_url then base_url ^ "export"
157157+ else base_url ^ "/export"
158158+159159+let resolve_doi ~proc_mgr ~server_url doi =
160160+ Log.info (fun m -> m "Resolving DOI: %s" doi);
161161+ let url = web_endpoint server_url in
162162+ let body = "https://doi.org/" ^ doi in
163163+ match Bushel_http.post ~proc_mgr ~content_type:"text/plain" ~body url with
164164+ | Error e -> Error e
165165+ | Ok json_str ->
166166+ match Jsont_bytesrw.decode_string Jsont.json json_str with
167167+ | Ok json -> Ok json
168168+ | Error e -> Error (Printf.sprintf "JSON parse error: %s" e)
169169+170170+let export_bibtex ~proc_mgr ~server_url json =
171171+ let url = export_endpoint server_url ^ "?format=bibtex" in
172172+ match Jsont_bytesrw.encode_string Jsont.json json with
173173+ | Error e -> Error e
174174+ | Ok body -> Bushel_http.post ~proc_mgr ~content_type:"application/json" ~body url
175175+176176+(** {1 DOI Resolution} *)
177177+178178+let resolve ~proc_mgr ~server_url ~slug doi =
179179+ match resolve_doi ~proc_mgr ~server_url doi with
180180+ | Error e -> Error e
181181+ | Ok json ->
182182+ (* Export to BibTeX *)
183183+ match export_bibtex ~proc_mgr ~server_url json with
184184+ | Error e -> Error (Printf.sprintf "BibTeX export failed: %s" e)
185185+ | Ok bib ->
186186+ Log.debug (fun m -> m "Got BibTeX: %s" bib);
187187+ (* Parse the JSON response for metadata *)
188188+ let item =
189189+ match json with
190190+ | Jsont.Array (first :: _, _) -> first
191191+ | _ -> json
192192+ in
193193+194194+ (* Extract fields from JSON and BibTeX *)
195195+ let title = get_string_exn item ["title"] ~default:"Untitled" in
196196+ let authors =
197197+ let creators = get_creators item in
198198+ List.filter_map (fun c ->
199199+ let first = c.first_name in
200200+ let last = c.last_name in
201201+ if first = "" && last = "" then None
202202+ else Some (String.trim (first ^ " " ^ last))
203203+ ) creators
204204+ in
205205+ let authors = if authors = [] then
206206+ (* Fallback to BibTeX author field *)
207207+ match extract_bibtex_field bib "author" with
208208+ | Some a -> parse_authors a
209209+ | None -> []
210210+ else authors in
211211+212212+ let year = get_int item ["date"] ~default:(
213213+ match extract_bibtex_field bib "year" with
214214+ | Some y -> (try int_of_string y with _ -> 2024)
215215+ | None -> 2024
216216+ ) in
217217+ let month = match extract_bibtex_field bib "month" with
218218+ | Some m -> month_of_string m
219219+ | None -> 1
220220+ in
221221+222222+ let bibtype = extract_bibtex_type bib in
223223+ let publisher = get_string_exn item ["publisher"] ~default:(
224224+ extract_bibtex_field bib "publisher" |> Option.value ~default:""
225225+ ) in
226226+ let booktitle = extract_bibtex_field bib "booktitle" |> Option.value ~default:"" in
227227+ let journal = get_string_exn item ["publicationTitle"] ~default:(
228228+ extract_bibtex_field bib "journal" |> Option.value ~default:""
229229+ ) in
230230+ let institution = extract_bibtex_field bib "institution" |> Option.value ~default:"" in
231231+ let pages = extract_bibtex_field bib "pages" |> Option.value ~default:"" in
232232+ let volume = extract_bibtex_field bib "volume" in
233233+ let number = extract_bibtex_field bib "number" in
234234+ let url = get_string item ["url"] in
235235+ let abstract = get_string item ["abstractNote"] in
236236+237237+ (* Generate clean BibTeX with slug as cite key *)
238238+ let cite_key = Astring.String.map (function '-' -> '_' | x -> x) slug in
239239+ let bib = Re.replace_string (Re.Pcre.regexp "@\\w+\\{[^,]+,")
240240+ ~by:(Printf.sprintf "@%s{%s," bibtype cite_key) bib in
241241+242242+ Ok {
243243+ title;
244244+ authors;
245245+ year;
246246+ month;
247247+ bibtype;
248248+ publisher;
249249+ booktitle;
250250+ journal;
251251+ institution;
252252+ pages;
253253+ volume;
254254+ number;
255255+ doi = Some doi;
256256+ url;
257257+ abstract;
258258+ bib = String.trim bib;
259259+ }
260260+261261+(** {1 Paper File Generation} *)
262262+263263+let to_yaml_frontmatter ~slug:_ ~ver:_ metadata =
264264+ let buf = Buffer.create 1024 in
265265+ let add key value =
266266+ if value <> "" then
267267+ Buffer.add_string buf (Printf.sprintf "%s: %s\n" key value)
268268+ in
269269+ let add_opt key = function
270270+ | Some v when v <> "" -> add key v
271271+ | _ -> ()
272272+ in
273273+ let add_quoted key value =
274274+ if value <> "" then
275275+ Buffer.add_string buf (Printf.sprintf "%s: \"%s\"\n" key value)
276276+ in
277277+278278+ Buffer.add_string buf "---\n";
279279+ add "title" metadata.title;
280280+281281+ (* Authors as list *)
282282+ Buffer.add_string buf "author:\n";
283283+ List.iter (fun a ->
284284+ Buffer.add_string buf (Printf.sprintf " - %s\n" a)
285285+ ) metadata.authors;
286286+287287+ add_quoted "year" (string_of_int metadata.year);
288288+ add "month" (string_of_month metadata.month);
289289+ add "bibtype" metadata.bibtype;
290290+291291+ if metadata.publisher <> "" then add "publisher" metadata.publisher;
292292+ if metadata.booktitle <> "" then add "booktitle" metadata.booktitle;
293293+ if metadata.journal <> "" then add "journal" metadata.journal;
294294+ if metadata.institution <> "" then add "institution" metadata.institution;
295295+ if metadata.pages <> "" then add "pages" metadata.pages;
296296+ add_opt "volume" metadata.volume;
297297+ add_opt "number" metadata.number;
298298+ add_opt "doi" metadata.doi;
299299+ add_opt "url" metadata.url;
300300+301301+ (* BibTeX entry *)
302302+ Buffer.add_string buf "bib: |\n";
303303+ String.split_on_char '\n' metadata.bib |> List.iter (fun line ->
304304+ Buffer.add_string buf (Printf.sprintf " %s\n" line)
305305+ );
306306+307307+ Buffer.add_string buf "---\n";
308308+309309+ (* Abstract as body *)
310310+ (match metadata.abstract with
311311+ | Some abstract when abstract <> "" ->
312312+ Buffer.add_string buf "\n";
313313+ Buffer.add_string buf abstract;
314314+ Buffer.add_string buf "\n"
315315+ | _ -> ());
316316+317317+ Buffer.contents buf
318318+319319+(** {1 Merging with Existing Papers} *)
320320+321321+let merge_with_existing ~existing metadata =
322322+ (* Preserve fields from existing paper if new ones are empty *)
323323+ {
324324+ metadata with
325325+ abstract = (match metadata.abstract with
326326+ | Some a when a <> "" -> Some a
327327+ | _ -> if Bushel.Paper.abstract existing <> "" then Some (Bushel.Paper.abstract existing) else None);
328328+ }
329329+ (* Note: tags, projects, selected, slides, video are preserved at a higher level
330330+ when writing the file - they're not part of paper_metadata *)