My aggregated monorepo of OCaml code, automaintained

Squashed 'ocaml-bushel/' changes from b87c25ae..060a1cae

060a1cae Update clients to use regenerated Peer_tube module
6d9a13b4 Replace curl with requests library and fix sync pipeline order
fbe57508 Fix JSON feed differences: slugs, PDFs, DOI cache
6b2df6d5 Port full thumbnail logic and show thumbnails in CLI
d898093c wip website

git-subtree-dir: ocaml-bushel
git-subtree-split: 060a1cae8e689b8c600e437c969f7e34e71209a3

+1093 -249
+143 -90
bin/main.ml
··· 158 158 in 159 159 (* Build table *) 160 160 let rows = List.map (fun e -> 161 + let thumb = match Bushel.Entry.thumbnail_slug entries e with 162 + | Some s -> s 163 + | None -> "-" 164 + in 161 165 [ type_string e 162 166 ; Bushel.Entry.slug e 163 167 ; truncate 50 (Bushel.Entry.title e) 164 168 ; format_date (Bushel.Entry.date e) 169 + ; thumb 165 170 ] 166 171 ) limited in 167 172 let table = Table.make 168 - ~headers:["TYPE"; "SLUG"; "TITLE"; "DATE"] 173 + ~headers:["TYPE"; "SLUG"; "TITLE"; "DATE"; "THUMBNAIL"] 169 174 rows 170 175 in 171 176 Table.print table; ··· 235 240 Printf.printf "Title: %s\n" (Bushel.Entry.title entry); 236 241 Printf.printf "Date: %s\n" (format_date (Bushel.Entry.date entry)); 237 242 Printf.printf "URL: %s\n" (Bushel.Entry.site_url entry); 243 + (match Bushel.Entry.thumbnail_slug entries entry with 244 + | Some s -> Printf.printf "Thumbnail: %s\n" s 245 + | None -> Printf.printf "Thumbnail: -\n"); 238 246 (match Bushel.Entry.synopsis entry with 239 247 | Some s -> Printf.printf "Synopsis: %s\n" s 240 248 | None -> ()); ··· 324 332 in 325 333 326 334 Eio_main.run @@ fun env -> 335 + Eio.Switch.run @@ fun sw -> 327 336 let fs = Eio.Stdenv.fs env in 328 337 let entries = Bushel_eio.Bushel_loader.load fs data_dir in 329 338 ··· 333 342 ) steps; 334 343 Printf.printf "\n"; 335 344 336 - let results = Bushel_sync.run ~dry_run ~env ~config ~steps ~entries in 345 + let results = Bushel_sync.run ~dry_run ~sw ~env ~config ~steps ~entries in 337 346 338 347 Printf.printf "\nResults:\n"; 339 348 List.iter (fun r -> ··· 356 365 `S Manpage.s_description; 357 366 `P "The sync command runs a pipeline to synchronize images and thumbnails:"; 358 367 `P "1. $(b,images) - Rsync images from remote server"; 359 - `P "2. $(b,srcsetter) - Convert images to WebP srcset variants"; 360 - `P "3. $(b,thumbs) - Generate paper thumbnails from PDFs"; 361 - `P "4. $(b,faces) - Fetch contact face thumbnails from Immich"; 362 - `P "5. $(b,videos) - Fetch video thumbnails from PeerTube"; 368 + `P "2. $(b,thumbs) - Generate paper thumbnails from PDFs"; 369 + `P "3. $(b,faces) - Fetch contact face thumbnails from Sortal"; 370 + `P "4. $(b,videos) - Fetch video thumbnails from PeerTube"; 371 + `P "5. $(b,srcsetter) - Convert all images to WebP srcset variants"; 363 372 `P "6. $(b,typesense) - Upload to Typesense (with --remote)"; 364 373 `P "Use $(b,--dry-run) to see what commands would be run without executing them."; 365 374 ] in ··· 388 397 let data_dir = get_data_dir config data_dir in 389 398 390 399 Eio_main.run @@ fun env -> 400 + Eio.Switch.run @@ fun sw -> 391 401 let fs = Eio.Stdenv.fs env in 392 - let proc_mgr = Eio.Stdenv.process_mgr env in 402 + let http = Bushel_sync.Http.create ~sw env in 393 403 let entries = Bushel_eio.Bushel_loader.load fs data_dir in 394 404 395 405 (* Determine version *) 396 - let papers_dir = Filename.concat data_dir ("data/papers/" ^ slug) in 406 + let papers_dir = Filename.concat data_dir ("papers/" ^ slug) in 397 407 let version = match version with 398 408 | Some v -> v 399 409 | None -> ··· 418 428 Printf.printf "Resolving DOI: %s\n" doi; 419 429 Printf.printf "Slug: %s, Version: %s\n" slug version; 420 430 421 - match Bushel_sync.Zotero.resolve ~proc_mgr 431 + match Bushel_sync.Zotero.resolve ~http 422 432 ~server_url:config.zotero_translation_server 423 433 ~slug doi with 424 434 | Error e -> ··· 467 477 468 478 (** {1 Video Fetch Command} *) 469 479 470 - let video_fetch_cmd = 471 - let server = 472 - let doc = "PeerTube server name from config." in 473 - Arg.(required & opt (some string) None & info ["server"; "s"] ~docv:"NAME" ~doc) 474 - in 475 - let channel = 476 - let doc = "Channel name to fetch videos from." in 477 - Arg.(required & opt (some string) None & info ["channel"] ~docv:"CHANNEL" ~doc) 478 - in 479 - let run () config_file data_dir server channel = 480 - match load_config config_file with 481 - | Error e -> Printf.eprintf "Config error: %s\n" e; 1 482 - | Ok config -> 483 - let data_dir = get_data_dir config data_dir in 484 - 485 - (* Find server endpoint *) 486 - let endpoint = List.find_map (fun (s : Bushel_config.peertube_server) -> 487 - if s.name = server then Some s.endpoint else None 488 - ) config.peertube_servers in 489 - 490 - match endpoint with 491 - | None -> 492 - Printf.eprintf "Unknown server: %s\n" server; 493 - Printf.eprintf "Available servers:\n"; 494 - List.iter (fun (s : Bushel_config.peertube_server) -> 495 - Printf.eprintf " - %s (%s)\n" s.name s.endpoint 496 - ) config.peertube_servers; 497 - 1 498 - | Some endpoint -> 499 - Eio_main.run @@ fun env -> 500 - let proc_mgr = Eio.Stdenv.process_mgr env in 501 - 502 - Printf.printf "Fetching videos from %s channel %s...\n" server channel; 503 - 504 - let videos = Bushel_sync.Peertube.fetch_all_channel_videos 505 - ~proc_mgr ~endpoint ~channel () in 506 - 507 - Printf.printf "Found %d videos\n" (List.length videos); 508 - 509 - (* Load or create videos index *) 510 - let index_path = Filename.concat data_dir "data/videos.yml" in 511 - let index = Bushel_sync.Peertube.VideoIndex.load_file index_path in 512 - 513 - (* Create video files and update index *) 514 - let videos_dir = Filename.concat data_dir "data/videos" in 515 - if not (Sys.file_exists videos_dir) then 516 - Unix.mkdir videos_dir 0o755; 517 - 518 - let new_count = ref 0 in 519 - List.iter (fun (video : Bushel_sync.Peertube.video) -> 520 - let video_path = Filename.concat videos_dir (video.uuid ^ ".md") in 521 - 522 - if Sys.file_exists video_path then 523 - Printf.printf " Skipping %s (exists)\n" video.uuid 524 - else begin 525 - Printf.printf " Creating %s: %s\n" video.uuid video.name; 526 - 527 - (* Generate markdown file *) 528 - let content = Printf.sprintf {|--- 480 + (** Helper to create a video markdown file *) 481 + let create_video_file ~videos_dir ~index ~server (video : Bushel_sync.Peertube.video) = 482 + let video_path = Filename.concat videos_dir (video.uuid ^ ".md") in 483 + if Sys.file_exists video_path then begin 484 + Printf.printf " Skipping %s (exists)\n" video.uuid; 485 + false 486 + end else begin 487 + Printf.printf " Creating %s: %s\n" video.uuid video.name; 488 + let content = Printf.sprintf {|--- 529 489 title: %s 530 490 published_date: %s 531 491 uuid: %s ··· 536 496 537 497 %s 538 498 |} 539 - video.name 540 - (Ptime.to_rfc3339 video.published_at) 541 - video.uuid 542 - video.url 543 - (Option.value ~default:"" video.description) 544 - in 499 + video.name 500 + (Ptime.to_rfc3339 video.published_at) 501 + video.uuid 502 + video.url 503 + (Option.value ~default:"" video.description) 504 + in 505 + let oc = open_out video_path in 506 + output_string oc content; 507 + close_out oc; 508 + Bushel_sync.Peertube.VideoIndex.add index ~uuid:video.uuid ~server; 509 + true 510 + end 545 511 546 - let oc = open_out video_path in 547 - output_string oc content; 548 - close_out oc; 512 + let video_fetch_cmd = 513 + let url_arg = 514 + let doc = "PeerTube video URL to fetch (e.g., https://example.com/w/UUID)." in 515 + Arg.(value & pos 0 (some string) None & info [] ~docv:"URL" ~doc) 516 + in 517 + let server = 518 + let doc = "PeerTube server name from config (for channel mode)." in 519 + Arg.(value & opt (some string) None & info ["server"; "s"] ~docv:"NAME" ~doc) 520 + in 521 + let channel = 522 + let doc = "Channel name to fetch videos from (for channel mode)." in 523 + Arg.(value & opt (some string) None & info ["channel"] ~docv:"CHANNEL" ~doc) 524 + in 525 + let run () config_file data_dir url_arg server channel = 526 + match load_config config_file with 527 + | Error e -> Printf.eprintf "Config error: %s\n" e; 1 528 + | Ok config -> 529 + let data_dir = get_data_dir config data_dir in 530 + let index_path = Filename.concat data_dir "videos.yml" in 531 + let index = Bushel_sync.Peertube.VideoIndex.load_file index_path in 532 + let videos_dir = Filename.concat data_dir "videos" in 533 + if not (Sys.file_exists videos_dir) then 534 + Unix.mkdir videos_dir 0o755; 549 535 550 - (* Update index *) 551 - Bushel_sync.Peertube.VideoIndex.add index ~uuid:video.uuid ~server; 552 - incr new_count 553 - end 554 - ) videos; 536 + match url_arg, server, channel with 537 + (* Single video mode: fetch by URL *) 538 + | Some url, _, _ -> 539 + (match Bushel_sync.Peertube.find_server_for_url config.peertube_servers url with 540 + | None -> 541 + Printf.eprintf "No configured server matches URL: %s\n" url; 542 + Printf.eprintf "Configured servers:\n"; 543 + List.iter (fun (s : Bushel_config.peertube_server) -> 544 + Printf.eprintf " - %s (%s)\n" s.name s.endpoint 545 + ) config.peertube_servers; 546 + 1 547 + | Some matched_server -> 548 + match Bushel_sync.Peertube.uuid_of_url url with 549 + | None -> 550 + Printf.eprintf "Could not extract video UUID from URL: %s\n" url; 551 + 1 552 + | Some uuid -> 553 + Printf.printf "Fetching video %s from %s...\n" uuid matched_server.name; 554 + Eio_main.run @@ fun env -> 555 + Eio.Switch.run @@ fun sw -> 556 + let http = Bushel_sync.Http.create ~sw env in 557 + match Bushel_sync.Peertube.fetch_video_details ~http 558 + ~endpoint:matched_server.endpoint uuid with 559 + | Error e -> 560 + Printf.eprintf "Error fetching video: %s\n" e; 561 + 1 562 + | Ok video -> 563 + let created = create_video_file ~videos_dir ~index 564 + ~server:matched_server.name video in 565 + Bushel_sync.Peertube.VideoIndex.save_file index_path index; 566 + if created then 567 + Printf.printf "\nCreated video entry: %s\n" video.name 568 + else 569 + Printf.printf "\nVideo already exists: %s\n" video.name; 570 + 0) 555 571 556 - (* Save updated index *) 557 - Bushel_sync.Peertube.VideoIndex.save_file index_path index; 572 + (* Channel mode: fetch all videos from channel *) 573 + | None, Some server, Some channel -> 574 + let endpoint = List.find_map (fun (s : Bushel_config.peertube_server) -> 575 + if s.name = server then Some s.endpoint else None 576 + ) config.peertube_servers in 577 + (match endpoint with 578 + | None -> 579 + Printf.eprintf "Unknown server: %s\n" server; 580 + Printf.eprintf "Available servers:\n"; 581 + List.iter (fun (s : Bushel_config.peertube_server) -> 582 + Printf.eprintf " - %s (%s)\n" s.name s.endpoint 583 + ) config.peertube_servers; 584 + 1 585 + | Some endpoint -> 586 + Eio_main.run @@ fun env -> 587 + Eio.Switch.run @@ fun sw -> 588 + let http = Bushel_sync.Http.create ~sw env in 589 + Printf.printf "Fetching videos from %s channel %s...\n" server channel; 590 + let videos = Bushel_sync.Peertube.fetch_all_channel_videos 591 + ~http ~endpoint ~channel () in 592 + Printf.printf "Found %d videos\n" (List.length videos); 593 + let new_count = List.fold_left (fun count video -> 594 + if create_video_file ~videos_dir ~index ~server video 595 + then count + 1 else count 596 + ) 0 videos in 597 + Bushel_sync.Peertube.VideoIndex.save_file index_path index; 598 + Printf.printf "\nCreated %d new video entries\n" new_count; 599 + Printf.printf "Updated index: %s\n" index_path; 600 + 0) 558 601 559 - Printf.printf "\nCreated %d new video entries\n" !new_count; 560 - Printf.printf "Updated index: %s\n" index_path; 561 - 0 602 + (* Missing arguments *) 603 + | None, None, _ | None, _, None -> 604 + Printf.eprintf "Usage: bushel video <URL>\n"; 605 + Printf.eprintf " or: bushel video --server NAME --channel CHANNEL\n"; 606 + 1 562 607 in 563 - let doc = "Fetch videos from a PeerTube channel." in 564 - let info = Cmd.info "video" ~doc in 565 - Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ server $ channel) 608 + let doc = "Fetch videos from PeerTube." in 609 + let man = [ 610 + `S Manpage.s_description; 611 + `P "Fetch video metadata from a PeerTube instance."; 612 + `P "Single video mode: $(b,bushel video <URL>)"; 613 + `P " Fetches a single video by URL. The server is auto-detected from config."; 614 + `P "Channel mode: $(b,bushel video --server NAME --channel CHANNEL)"; 615 + `P " Fetches all videos from a channel on the named server."; 616 + ] in 617 + let info = Cmd.info "video" ~doc ~man in 618 + Cmd.v info Term.(const run $ logging_t $ config_file $ data_dir $ url_arg $ server $ channel) 566 619 567 620 (** {1 Images Command} *) 568 621
+3
lib/bushel.ml
··· 84 84 module Types = Bushel_types 85 85 (** Common types and Jsont codecs. *) 86 86 87 + module Doi_entry = Bushel_doi_entry 88 + (** DOI entries resolved from external sources. *) 89 + 87 90 module Util = Bushel_util 88 91 (** Utility functions (word counting, text processing). *)
+98
lib/bushel_doi_entry.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** DOI entries resolved from external sources via Zotero Translation Server *) 7 + 8 + type status = 9 + | Resolved 10 + | Failed of string 11 + 12 + type t = { 13 + doi : string; 14 + title : string; 15 + authors : string list; 16 + year : int; 17 + bibtype : string; 18 + publisher : string; 19 + resolved_at : string; 20 + source_urls : string list; 21 + status : status; 22 + ignore : bool; 23 + } 24 + 25 + type ts = t list 26 + 27 + let get_string key fields = 28 + match List.assoc_opt key fields with 29 + | Some (`String s) -> s 30 + | _ -> "" 31 + 32 + let get_string_opt key fields = 33 + match List.assoc_opt key fields with 34 + | Some (`String s) -> Some s 35 + | _ -> None 36 + 37 + let get_int key fields = 38 + match List.assoc_opt key fields with 39 + | Some (`Float f) -> int_of_float f 40 + | _ -> 0 41 + 42 + let get_bool key fields = 43 + match List.assoc_opt key fields with 44 + | Some (`Bool b) -> b 45 + | _ -> false 46 + 47 + let get_strings key fields = 48 + match List.assoc_opt key fields with 49 + | Some (`A items) -> 50 + List.filter_map (function `String s -> Some s | _ -> None) items 51 + | _ -> [] 52 + 53 + let of_yaml_value = function 54 + | `O fields -> 55 + let doi = get_string "doi" fields in 56 + let resolved_at = get_string "resolved_at" fields in 57 + let source_urls = 58 + match get_strings "source_urls" fields with 59 + | [] -> 60 + (match get_string_opt "source_url" fields with 61 + | Some u -> [u] 62 + | None -> []) 63 + | urls -> urls 64 + in 65 + let ignore = get_bool "ignore" fields in 66 + let error = get_string_opt "error" fields in 67 + (match error with 68 + | Some err -> 69 + Some { doi; title = ""; authors = []; year = 0; bibtype = ""; 70 + publisher = ""; resolved_at; source_urls; 71 + status = Failed err; ignore } 72 + | None -> 73 + let title = get_string "title" fields in 74 + let authors = get_strings "authors" fields in 75 + let year = get_int "year" fields in 76 + let bibtype = get_string "bibtype" fields in 77 + let publisher = get_string "publisher" fields in 78 + Some { doi; title; authors; year; bibtype; publisher; 79 + resolved_at; source_urls; status = Resolved; ignore }) 80 + | _ -> None 81 + 82 + (** Load DOI entries from a YAML string *) 83 + let of_yaml_string str = 84 + try 85 + match Yamlrw.of_string str with 86 + | `A entries -> List.filter_map of_yaml_value entries 87 + | _ -> [] 88 + with Yamlrw.Yamlrw_error _ -> [] 89 + 90 + (** Find entry by DOI (excludes ignored entries) *) 91 + let find_by_doi entries doi = 92 + List.find_opt (fun entry -> not entry.ignore && entry.doi = doi) entries 93 + 94 + (** Find entry by source URL (excludes ignored entries) *) 95 + let find_by_url entries url = 96 + List.find_opt (fun entry -> 97 + not entry.ignore && List.mem url entry.source_urls 98 + ) entries
+203 -2
lib/bushel_entry.ml
··· 27 27 images : Srcsetter.t list; 28 28 image_index : (string, Srcsetter.t) Hashtbl.t; 29 29 data_dir : string; 30 + doi_entries : Bushel_doi_entry.ts; 30 31 } 31 32 32 33 (** {1 Constructors} *) 33 34 34 - let v ~papers ~notes ~projects ~ideas ~videos ~contacts ?(images=[]) ~data_dir () = 35 + let v ~papers ~notes ~projects ~ideas ~videos ~contacts ?(images=[]) ?(doi_entries=[]) ~data_dir () = 35 36 let slugs : slugs = Hashtbl.create 42 in 36 37 let papers, old_papers = List.partition (fun p -> p.Bushel_paper.latest) papers in 37 38 List.iter (fun n -> Hashtbl.add slugs n.Bushel_note.slug (`Note n)) notes; ··· 42 43 (* Build image index *) 43 44 let image_index = Hashtbl.create (List.length images) in 44 45 List.iter (fun img -> Hashtbl.add image_index (Srcsetter.slug img) img) images; 45 - { slugs; papers; old_papers; notes; projects; ideas; videos; contacts; images; image_index; data_dir } 46 + { slugs; papers; old_papers; notes; projects; ideas; videos; contacts; images; image_index; data_dir; doi_entries } 46 47 47 48 (** {1 Accessors} *) 48 49 ··· 55 56 let old_papers { old_papers; _ } = old_papers 56 57 let images { images; _ } = images 57 58 let data_dir { data_dir; _ } = data_dir 59 + let doi_entries { doi_entries; _ } = doi_entries 58 60 59 61 (** {1 Image Lookup} *) 60 62 ··· 194 196 | `Slug t -> lk t 195 197 | _ -> None 196 198 ) tags 199 + 200 + (** {1 Thumbnail Functions} *) 201 + 202 + (** Get the smallest webp variant from a srcsetter image - prefers size just above 480px *) 203 + let smallest_webp_variant img = 204 + let variants = Srcsetter.variants img in 205 + let webp_variants = 206 + Srcsetter.MS.bindings variants 207 + |> List.filter (fun (name, _) -> String.ends_with ~suffix:".webp" name) 208 + in 209 + match webp_variants with 210 + | [] -> 211 + (* No webp variants - use the name field which is always webp *) 212 + "/images/" ^ Srcsetter.name img 213 + | variants -> 214 + (* Prefer variants with width > 480px, choosing the smallest one above 480 *) 215 + let large_variants = List.filter (fun (_, (w, _)) -> w > 480) variants in 216 + let candidates = if large_variants = [] then variants else large_variants in 217 + (* Find the smallest variant from candidates *) 218 + let smallest = List.fold_left (fun acc (name, (w, h)) -> 219 + match acc with 220 + | None -> Some (name, w, h) 221 + | Some (_, min_w, _) when w < min_w -> Some (name, w, h) 222 + | _ -> acc 223 + ) None candidates in 224 + match smallest with 225 + | Some (name, _, _) -> "/images/" ^ name 226 + | None -> "/images/" ^ Srcsetter.name img 227 + 228 + (** Get thumbnail slug for a contact *) 229 + let contact_thumbnail_slug contact = 230 + (* Contact images use just the handle as slug *) 231 + Some (Sortal_schema.Contact.handle contact) 232 + 233 + (** Get thumbnail URL for a contact - resolved through srcsetter *) 234 + let contact_thumbnail entries contact = 235 + match contact_thumbnail_slug contact with 236 + | None -> None 237 + | Some thumb_slug -> 238 + match lookup_image entries thumb_slug with 239 + | Some img -> Some (smallest_webp_variant img) 240 + | None -> None 241 + 242 + (** Extract the first image URL from markdown text *) 243 + let extract_first_image md = 244 + let open Cmarkit in 245 + let doc = Doc.of_string md in 246 + let found_image = ref None in 247 + let find_image_in_inline _mapper = function 248 + | Inline.Image (img, _) -> 249 + (match Inline.Link.reference img with 250 + | `Inline (ld, _) -> 251 + (match Link_definition.dest ld with 252 + | Some (url, _) when !found_image = None -> 253 + found_image := Some url; 254 + Mapper.default 255 + | _ -> Mapper.default) 256 + | _ -> Mapper.default) 257 + | _ -> Mapper.default 258 + in 259 + let mapper = Mapper.make ~inline:find_image_in_inline () in 260 + let _ = Mapper.map_doc mapper doc in 261 + !found_image 262 + 263 + (** Extract the first video slug from markdown text by looking for bushel video links *) 264 + let extract_first_video entries md = 265 + let open Cmarkit in 266 + let doc = Doc.of_string md in 267 + let found_video = ref None in 268 + let find_video_in_inline _mapper = function 269 + | Inline.Link (link, _) -> 270 + (match Inline.Link.reference link with 271 + | `Inline (ld, _) -> 272 + (match Link_definition.dest ld with 273 + | Some (url, _) when !found_video = None && String.starts_with ~prefix:":" url -> 274 + let slug = String.sub url 1 (String.length url - 1) in 275 + (match lookup entries slug with 276 + | Some (`Video v) -> 277 + found_video := Some (Bushel_video.uuid v); 278 + Mapper.default 279 + | _ -> Mapper.default) 280 + | _ -> Mapper.default) 281 + | _ -> Mapper.default) 282 + | _ -> Mapper.default 283 + in 284 + let mapper = Mapper.make ~inline:find_video_in_inline () in 285 + let _ = Mapper.map_doc mapper doc in 286 + !found_video 287 + 288 + (** Get thumbnail slug for an entry with fallbacks *) 289 + let rec thumbnail_slug entries = function 290 + | `Paper p -> Some (Bushel_paper.slug p) 291 + | `Video v -> Some (Bushel_video.uuid v) 292 + | `Project p -> Some (Printf.sprintf "project-%s" (Bushel_project.slug p)) 293 + | `Idea i -> 294 + let is_active = match Bushel_idea.status i with 295 + | Bushel_idea.Available | Bushel_idea.Discussion | Bushel_idea.Ongoing -> true 296 + | Bushel_idea.Completed | Bushel_idea.Expired -> false 297 + in 298 + if is_active then 299 + (* Use first supervisor's face image *) 300 + let supervisors = Bushel_idea.supervisors i in 301 + match supervisors with 302 + | sup :: _ -> 303 + let handle = if String.length sup > 0 && sup.[0] = '@' 304 + then String.sub sup 1 (String.length sup - 1) 305 + else sup 306 + in 307 + (match List.find_opt (fun c -> Sortal_schema.Contact.handle c = handle) (contacts entries) with 308 + | Some c -> 309 + Some (Sortal_schema.Contact.handle c) 310 + | None -> 311 + (* Fallback to project thumbnail *) 312 + let project_slug = Bushel_idea.project i in 313 + (match lookup entries project_slug with 314 + | Some p -> thumbnail_slug entries p 315 + | None -> None)) 316 + | [] -> 317 + (* No supervisors, use project thumbnail *) 318 + let project_slug = Bushel_idea.project i in 319 + (match lookup entries project_slug with 320 + | Some p -> thumbnail_slug entries p 321 + | None -> None) 322 + else 323 + (* Use project thumbnail for completed/expired ideas *) 324 + let project_slug = Bushel_idea.project i in 325 + (match lookup entries project_slug with 326 + | Some p -> thumbnail_slug entries p 327 + | None -> None) 328 + | `Note n -> 329 + (* Use titleimage if set, otherwise extract first image from body, 330 + then try video, otherwise use slug_ent's thumbnail *) 331 + (match Bushel_note.titleimage n with 332 + | Some slug -> Some slug 333 + | None -> 334 + match extract_first_image (Bushel_note.body n) with 335 + | Some url when String.starts_with ~prefix:":" url -> 336 + Some (String.sub url 1 (String.length url - 1)) 337 + | Some _ -> None 338 + | None -> 339 + match extract_first_video entries (Bushel_note.body n) with 340 + | Some video_uuid -> Some video_uuid 341 + | None -> 342 + (* Fallback to slug_ent's thumbnail if present *) 343 + match Bushel_note.slug_ent n with 344 + | Some slug_ent -> 345 + (match lookup entries slug_ent with 346 + | Some entry -> thumbnail_slug entries entry 347 + | None -> None) 348 + | None -> None) 349 + 350 + (** Get thumbnail URL for an entry with fallbacks - resolved through srcsetter *) 351 + let thumbnail entries entry = 352 + match thumbnail_slug entries entry with 353 + | None -> None 354 + | Some thumb_slug -> 355 + match lookup_image entries thumb_slug with 356 + | Some img -> Some (smallest_webp_variant img) 357 + | None -> 358 + (* For projects, fallback to supervisor faces if project image doesn't exist *) 359 + (match entry with 360 + | `Project p -> 361 + (* Find ideas for this project *) 362 + let project_ideas = List.filter (fun idea -> 363 + Bushel_idea.project idea = ":" ^ Bushel_project.slug p 364 + ) (ideas entries) in 365 + (* Collect all unique supervisors from these ideas *) 366 + let all_supervisors = 367 + List.fold_left (fun acc idea -> 368 + List.fold_left (fun acc2 sup -> 369 + if List.mem sup acc2 then acc2 else sup :: acc2 370 + ) acc (Bushel_idea.supervisors idea) 371 + ) [] project_ideas 372 + in 373 + (* Split into avsm and others, preferring others first *) 374 + let (others, avsm) = List.partition (fun sup -> 375 + let handle = if String.length sup > 0 && sup.[0] = '@' 376 + then String.sub sup 1 (String.length sup - 1) 377 + else sup 378 + in 379 + handle <> "avsm" 380 + ) all_supervisors in 381 + let ordered_supervisors = others @ avsm in 382 + let rec try_supervisors = function 383 + | [] -> None 384 + | sup :: rest -> 385 + let handle = if String.length sup > 0 && sup.[0] = '@' 386 + then String.sub sup 1 (String.length sup - 1) 387 + else sup 388 + in 389 + (match List.find_opt (fun c -> Sortal_schema.Contact.handle c = handle) (contacts entries) with 390 + | Some c -> 391 + (match lookup_image entries (Sortal_schema.Contact.handle c) with 392 + | Some img -> Some (smallest_webp_variant img) 393 + | None -> try_supervisors rest) 394 + | None -> try_supervisors rest) 395 + in 396 + try_supervisors ordered_supervisors 397 + | _ -> None)
+19
lib/bushel_entry.mli
··· 30 30 videos:Bushel_video.t list -> 31 31 contacts:Sortal_schema.Contact.t list -> 32 32 ?images:Srcsetter.t list -> 33 + ?doi_entries:Bushel_doi_entry.ts -> 33 34 data_dir:string -> 34 35 unit -> 35 36 t ··· 46 47 val old_papers : t -> Bushel_paper.ts 47 48 val images : t -> Srcsetter.t list 48 49 val data_dir : t -> string 50 + val doi_entries : t -> Bushel_doi_entry.ts 49 51 50 52 (** {1 Lookup Functions} *) 51 53 ··· 127 129 128 130 val mention_entries : t -> Bushel_tags.t list -> entry list 129 131 (** [mention_entries entries tags] returns entries mentioned in the tags. *) 132 + 133 + (** {1 Thumbnail Functions} *) 134 + 135 + val smallest_webp_variant : Srcsetter.t -> string 136 + (** [smallest_webp_variant img] returns URL path to smallest webp variant above 480px. *) 137 + 138 + val contact_thumbnail_slug : Sortal_schema.Contact.t -> string option 139 + (** [contact_thumbnail_slug contact] returns the image slug for a contact. *) 140 + 141 + val contact_thumbnail : t -> Sortal_schema.Contact.t -> string option 142 + (** [contact_thumbnail entries contact] returns the thumbnail URL for a contact. *) 143 + 144 + val thumbnail_slug : t -> entry -> string option 145 + (** [thumbnail_slug entries entry] returns the image slug for an entry. *) 146 + 147 + val thumbnail : t -> entry -> string option 148 + (** [thumbnail entries entry] returns the thumbnail URL for an entry. *)
+312
lib/bushel_md.ml
··· 17 17 - Plain HTML mode for feeds and simple output 18 18 *) 19 19 20 + (** {1 Sidenote Types} 21 + 22 + Sidenote data types for interactive previews on hover. 23 + These are defined here as Cmarkit inline extensions that can be 24 + generated by the sidenote mapper and rendered by the webserver. *) 25 + 26 + type sidenote_data = 27 + | Contact_note of Sortal_schema.Contact.t * string 28 + | Paper_note of Bushel_paper.t * string 29 + | Idea_note of Bushel_idea.t * string 30 + | Note_note of Bushel_note.t * string 31 + | Project_note of Bushel_project.t * string 32 + | Video_note of Bushel_video.t * string 33 + | Footnote_note of string * Cmarkit.Block.t * string 34 + 35 + (** Extensible inline for sidenotes *) 36 + type Cmarkit.Inline.t += Side_note of sidenote_data 37 + 20 38 (** {1 Link Detection} *) 21 39 22 40 let is_bushel_slug = String.starts_with ~prefix:":" ··· 103 121 | _ -> None) 104 122 | _ -> None 105 123 124 + (** {1 Sidenote Mapper} 125 + 126 + Creates sidenotes for Bushel links. Used for interactive previews 127 + on the main website. *) 128 + 129 + let make_sidenote_mapper entries = 130 + let open Cmarkit in 131 + fun _m -> 132 + function 133 + | Inline.Link (lb, meta) -> 134 + (match link_target_is_bushel lb with 135 + | Some (url, title) -> 136 + let s = strip_handle url in 137 + if is_tag_slug url then 138 + (* Tag link - keep as regular link with ## prefix for renderer *) 139 + let txt = Inline.Text (title, meta) in 140 + let ld = Link_definition.make ~dest:(url, meta) () in 141 + let ll = `Inline (ld, meta) in 142 + let link = Inline.Link.make txt ll in 143 + Mapper.ret (Inline.Link (link, meta)) 144 + else if is_contact_slug url then 145 + (* Contact sidenote *) 146 + (match List.find_opt (fun c -> Sortal_schema.Contact.handle c = s) (Bushel_entry.contacts entries) with 147 + | Some c -> 148 + let sidenote = Side_note (Contact_note (c, title)) in 149 + Mapper.ret sidenote 150 + | None -> 151 + (* Contact not found, fallback to text *) 152 + let txt = Inline.Text (title, meta) in 153 + Mapper.ret txt) 154 + else 155 + (* Check entry type and generate appropriate sidenote *) 156 + (match Bushel_entry.lookup entries s with 157 + | Some (`Paper p) -> 158 + let sidenote = Side_note (Paper_note (p, title)) in 159 + Mapper.ret sidenote 160 + | Some (`Idea i) -> 161 + let sidenote = Side_note (Idea_note (i, title)) in 162 + Mapper.ret sidenote 163 + | Some (`Note n) -> 164 + let sidenote = Side_note (Note_note (n, title)) in 165 + Mapper.ret sidenote 166 + | Some (`Project p) -> 167 + let sidenote = Side_note (Project_note (p, title)) in 168 + Mapper.ret sidenote 169 + | Some (`Video v) -> 170 + let sidenote = Side_note (Video_note (v, title)) in 171 + Mapper.ret sidenote 172 + | None -> 173 + (* Entry not found, use regular link *) 174 + let dest = Bushel_entry.lookup_site_url entries s in 175 + let txt = Inline.Text (title, meta) in 176 + let ld = Link_definition.make ~dest:(dest, meta) () in 177 + let ll = `Inline (ld, meta) in 178 + let link = Inline.Link.make txt ll in 179 + Mapper.ret (Inline.Link (link, meta))) 180 + | None -> 181 + (* Handle reference-style links *) 182 + (match Inline.Link.referenced_label lb with 183 + | Some l -> 184 + let m = Label.meta l in 185 + (match Meta.find authorlink m with 186 + | Some () -> 187 + let slug = Label.key l in 188 + let s = strip_handle slug in 189 + (match List.find_opt (fun c -> Sortal_schema.Contact.handle c = s) (Bushel_entry.contacts entries) with 190 + | Some c -> 191 + let name = Sortal_schema.Contact.name c in 192 + let sidenote = Side_note (Contact_note (c, name)) in 193 + Mapper.ret sidenote 194 + | None -> 195 + let title = Inline.Link.text lb |> text_of_inline in 196 + let txt = Inline.Text (title, meta) in 197 + Mapper.ret txt) 198 + | None -> 199 + (match Meta.find sluglink m with 200 + | Some () -> 201 + let slug = Label.key l in 202 + if is_bushel_slug slug then 203 + let s = strip_handle slug in 204 + let title = Inline.Link.text lb |> text_of_inline in 205 + (match Bushel_entry.lookup entries s with 206 + | Some (`Paper p) -> Mapper.ret (Side_note (Paper_note (p, title))) 207 + | Some (`Idea i) -> Mapper.ret (Side_note (Idea_note (i, title))) 208 + | Some (`Note n) -> Mapper.ret (Side_note (Note_note (n, title))) 209 + | Some (`Project p) -> Mapper.ret (Side_note (Project_note (p, title))) 210 + | Some (`Video v) -> Mapper.ret (Side_note (Video_note (v, title))) 211 + | None -> 212 + let dest = Bushel_entry.lookup_site_url entries s in 213 + let txt = Inline.Text (title, meta) in 214 + let ld = Link_definition.make ~dest:(dest, meta) () in 215 + let ll = `Inline (ld, meta) in 216 + let link = Inline.Link.make txt ll in 217 + Mapper.ret (Inline.Link (link, meta))) 218 + else if is_tag_slug slug then 219 + let sh = strip_handle slug in 220 + let txt = Inline.Text (sh, meta) in 221 + let ld = Link_definition.make ~dest:("#", meta) () in 222 + let ll = `Inline (ld, meta) in 223 + let link = Inline.Link.make txt ll in 224 + Mapper.ret (Inline.Link (link, meta)) 225 + else Mapper.default 226 + | None -> Mapper.default)) 227 + | None -> Mapper.default)) 228 + | Inline.Image (lb, meta) -> 229 + (* Handle images with bushel slugs *) 230 + (match image_target_is_bushel lb with 231 + | Some (url, alt, caption) -> 232 + let s = strip_handle url in 233 + (* Check if this is a video - if so, use /videos/ path *) 234 + (match Bushel_entry.lookup entries s with 235 + | Some (`Video _) -> 236 + let dest = Printf.sprintf "/videos/%s" s in 237 + let txt = Inline.Text (caption, meta) in 238 + let ld = Link_definition.make ?title:alt ~dest:(dest, meta) () in 239 + let ll = `Inline (ld, meta) in 240 + let img = Inline.Link.make txt ll in 241 + Mapper.ret (Inline.Image (img, meta)) 242 + | _ -> 243 + (* Convert bushel slug to /images/ path *) 244 + let dest = Printf.sprintf "/images/%s.webp" s in 245 + let txt = Inline.Text (caption, meta) in 246 + let ld = Link_definition.make ?title:alt ~dest:(dest, meta) () in 247 + let ll = `Inline (ld, meta) in 248 + let img = Inline.Link.make txt ll in 249 + Mapper.ret (Inline.Image (img, meta))) 250 + | None -> Mapper.default) 251 + | _ -> Mapper.default 252 + 253 + (** Alias for compatibility *) 254 + let make_bushel_inline_mapper = make_sidenote_mapper 255 + 106 256 (** {1 Link-Only Mapper} 107 257 108 258 Converts Bushel links to regular HTML links without sidenotes. ··· 179 329 | None -> Mapper.default)) 180 330 | None -> Mapper.default)) 181 331 | _ -> Mapper.default 332 + 333 + (** Alias for compatibility *) 334 + let make_bushel_link_only_mapper _defs = make_link_only_mapper 182 335 183 336 (** {1 Slug Scanning} *) 184 337 ··· 623 776 let mapper = Mapper.make ~inline:(make_to_markdown_mapper ~base_url ~image_base entries) () in 624 777 let mapped_doc = Mapper.map_doc mapper doc in 625 778 Cmarkit_commonmark.of_doc mapped_doc 779 + 780 + (** {1 References} 781 + 782 + Reference extraction for CiTO annotations. *) 783 + 784 + (** Reference source type for CiTO annotations *) 785 + type reference_source = 786 + | Paper (** CitesAsSourceDocument *) 787 + | Note (** CitesAsRelated *) 788 + | External (** Cites *) 789 + 790 + (** Extract references (papers/notes with DOIs) from a note. 791 + Returns a list of (doi, citation_text, reference_source) tuples. 792 + 793 + @param entries The entry collection 794 + @param default_author The default author contact for notes without explicit author 795 + @param note The note to extract references from *) 796 + let note_references entries (default_author:Sortal_schema.Contact.t) note = 797 + let refs = ref [] in 798 + 799 + (* Helper to format author name: extract last name from full name *) 800 + let format_author_last name = 801 + let parts = String.split_on_char ' ' name in 802 + List.nth parts (List.length parts - 1) 803 + in 804 + 805 + (* Helper to format a citation *) 806 + let format_citation ~authors ~year ~title ~publisher = 807 + let author_str = match authors with 808 + | [] -> "" 809 + | [author] -> format_author_last author ^ " " 810 + | author :: _ -> (format_author_last author) ^ " et al " 811 + in 812 + let pub_str = match publisher with 813 + | None | Some "" -> "" 814 + | Some p -> p ^ ". " 815 + in 816 + Printf.sprintf "%s(%d). %s. %s" author_str year title pub_str 817 + in 818 + 819 + (* Check slug_ent if it exists *) 820 + (match Bushel_note.slug_ent note with 821 + | Some slug -> 822 + (match Bushel_entry.lookup entries slug with 823 + | Some (`Paper p) -> 824 + (match Bushel_paper.doi p with 825 + | Some doi -> 826 + let authors = Bushel_paper.authors p in 827 + let year = Bushel_paper.year p in 828 + let title = Bushel_paper.title p in 829 + let publisher = Some (Bushel_paper.publisher p) in 830 + let citation = format_citation ~authors ~year ~title ~publisher in 831 + refs := (doi, citation, Paper) :: !refs 832 + | None -> ()) 833 + | Some (`Note n) -> 834 + (match Bushel_note.doi n with 835 + | Some doi -> 836 + let authors = match Bushel_note.author n with 837 + | Some a -> [a] 838 + | None -> [Sortal_schema.Contact.name default_author] 839 + in 840 + let (year, _, _) = Bushel_note.date n in 841 + let title = Bushel_note.title n in 842 + let publisher = None in 843 + let citation = format_citation ~authors ~year ~title ~publisher in 844 + refs := (doi, citation, Note) :: !refs 845 + | None -> ()) 846 + | _ -> ()) 847 + | None -> ()); 848 + 849 + (* Scan body for bushel references *) 850 + let slugs = scan_for_slugs entries (Bushel_note.body note) in 851 + List.iter (fun slug -> 852 + (* Strip leading : or @ from slug before lookup *) 853 + let normalized_slug = strip_handle slug in 854 + match Bushel_entry.lookup entries normalized_slug with 855 + | Some (`Paper p) -> 856 + (match Bushel_paper.doi p with 857 + | Some doi -> 858 + let authors = Bushel_paper.authors p in 859 + let year = Bushel_paper.year p in 860 + let title = Bushel_paper.title p in 861 + let publisher = Some (Bushel_paper.publisher p) in 862 + let citation = format_citation ~authors ~year ~title ~publisher in 863 + (* Check if doi already exists in refs *) 864 + if not (List.exists (fun (d, _, _) -> d = doi) !refs) then 865 + refs := (doi, citation, Paper) :: !refs 866 + | None -> ()) 867 + | Some (`Note n) -> 868 + (match Bushel_note.doi n with 869 + | Some doi -> 870 + let authors = match Bushel_note.author n with 871 + | Some a -> [a] 872 + | None -> [Sortal_schema.Contact.name default_author] 873 + in 874 + let (year, _, _) = Bushel_note.date n in 875 + let title = Bushel_note.title n in 876 + let publisher = None in 877 + let citation = format_citation ~authors ~year ~title ~publisher in 878 + (* Check if doi already exists in refs *) 879 + if not (List.exists (fun (d, _, _) -> d = doi) !refs) then 880 + refs := (doi, citation, Note) :: !refs 881 + | None -> ()) 882 + | _ -> () 883 + ) slugs; 884 + 885 + (* Scan body for external DOI URLs and resolve from cache *) 886 + let body = Bushel_note.body note in 887 + let doi_url_pattern = Re.Perl.compile_pat "https?://(?:dx\\.)?doi\\.org/([^)\\s\"'>]+)" in 888 + let doi_matches = Re.all doi_url_pattern body in 889 + let doi_entries = Bushel_entry.doi_entries entries in 890 + List.iter (fun group -> 891 + try 892 + let encoded_doi = Re.Group.get group 1 in 893 + let doi = Uri.pct_decode encoded_doi in 894 + if not (List.exists (fun (d, _, _) -> d = doi) !refs) then 895 + match Bushel_doi_entry.find_by_doi doi_entries doi with 896 + | Some doi_entry when doi_entry.status = Resolved -> 897 + let citation = format_citation 898 + ~authors:doi_entry.authors 899 + ~year:doi_entry.year 900 + ~title:doi_entry.title 901 + ~publisher:(Some doi_entry.publisher) 902 + in 903 + refs := (doi, citation, External) :: !refs 904 + | _ -> 905 + refs := (doi, doi, External) :: !refs 906 + with _ -> () 907 + ) doi_matches; 908 + 909 + (* Scan body for publisher URLs and resolve from DOI cache *) 910 + let publisher_pattern = Re.Perl.compile_pat "https?://(?:(?:www\\.)?(?:linkinghub\\.elsevier\\.com|(?:www\\.)?sciencedirect\\.com/science/article|ieeexplore\\.ieee\\.org|academic\\.oup\\.com|nature\\.com|journals\\.sagepub\\.com|garfield\\.library\\.upenn\\.edu|link\\.springer\\.com|arxiv\\.org/abs)/[^)\\s\"'>]+|(?:dl\\.acm\\.org|(?:www\\.)?tandfonline\\.com)/doi(?:/pdf)?/10\\.[^)\\s\"'>]+)" in 911 + let publisher_matches = Re.all publisher_pattern body in 912 + List.iter (fun group -> 913 + try 914 + let url = Re.Group.get group 0 in 915 + match Bushel_doi_entry.find_by_url doi_entries url with 916 + | Some doi_entry when doi_entry.status = Resolved -> 917 + let doi = doi_entry.doi in 918 + if not (List.exists (fun (d, _, _) -> d = doi) !refs) then 919 + let citation = format_citation 920 + ~authors:doi_entry.authors 921 + ~year:doi_entry.year 922 + ~title:doi_entry.title 923 + ~publisher:(Some doi_entry.publisher) 924 + in 925 + refs := (doi, citation, External) :: !refs 926 + | _ -> () 927 + with _ -> () 928 + ) publisher_matches; 929 + 930 + (* Filter out the note's own DOI from references *) 931 + let own_doi = Bushel_note.doi note in 932 + let filtered_refs = List.filter (fun (doi, _, _) -> 933 + match own_doi with 934 + | Some own -> doi <> own 935 + | None -> true 936 + ) !refs in 937 + List.rev filtered_refs
+3 -3
lib_config/bushel_config.ml
··· 91 91 | None -> path 92 92 else path 93 93 94 - let paper_thumbs_dir t = Filename.concat t.local_output_dir t.paper_thumbs_subdir 95 - let contact_faces_dir t = Filename.concat t.local_output_dir t.contact_faces_subdir 96 - let video_thumbs_dir t = Filename.concat t.local_output_dir t.video_thumbs_subdir 94 + let paper_thumbs_dir t = Filename.concat t.local_source_dir t.paper_thumbs_subdir 95 + let contact_faces_dir t = Filename.concat t.local_source_dir t.contact_faces_subdir 96 + let video_thumbs_dir t = Filename.concat t.local_source_dir t.video_thumbs_subdir 97 97 98 98 (** {1 Tomlt Codecs} *) 99 99
+21 -9
lib_eio/bushel_loader.ml
··· 40 40 41 41 (** Load and map files from a directory *) 42 42 let map_category fs base subdir parse_fn = 43 - let dir = Filename.concat base ("data/" ^ subdir) in 43 + let dir = Filename.concat base subdir in 44 44 Log.debug (fun m -> m "Loading %s" subdir); 45 45 let files = list_md_files fs dir in 46 46 List.filter_map (fun path -> ··· 61 61 let store = Sortal.Store.create fs "sortal" in 62 62 Sortal.Store.list store 63 63 64 - (** Load projects from data/projects/ *) 64 + (** Load projects from projects/ *) 65 65 let load_projects fs base = 66 66 map_category fs base "projects" Bushel.Project.of_frontmatter 67 67 68 - (** Load notes from data/notes/ and data/news/ *) 68 + (** Load notes from notes/ and news/ *) 69 69 let load_notes fs base = 70 70 let notes_dir = map_category fs base "notes" Bushel.Note.of_frontmatter in 71 71 let news_dir = map_category fs base "news" Bushel.Note.of_frontmatter in 72 72 notes_dir @ news_dir 73 73 74 - (** Load ideas from data/ideas/ *) 74 + (** Load ideas from ideas/ *) 75 75 let load_ideas fs base = 76 76 map_category fs base "ideas" Bushel.Idea.of_frontmatter 77 77 78 - (** Load videos from data/videos/ *) 78 + (** Load videos from videos/ *) 79 79 let load_videos fs base = 80 80 map_category fs base "videos" Bushel.Video.of_frontmatter 81 81 82 - (** Load papers from data/papers/ (nested directory structure) *) 82 + (** Load papers from papers/ (nested directory structure) *) 83 83 let load_papers fs base = 84 - let papers_dir = Filename.concat base "data/papers" in 84 + let papers_dir = Filename.concat base "papers" in 85 85 Log.debug (fun m -> m "Loading papers from %s" papers_dir); 86 86 let path = Eio.Path.(fs / papers_dir) in 87 87 let slug_dirs = ··· 139 139 | None -> [] 140 140 in 141 141 Log.info (fun m -> m "Loaded %d images" (List.length images)); 142 - let data_dir = Filename.concat base "data" in 143 - let entries = Bushel.Entry.v ~papers ~notes ~projects ~ideas ~videos ~contacts ~images ~data_dir () in 142 + let doi_entries = 143 + let doi_path = Filename.concat base "doi.yml" in 144 + try 145 + let content = Eio.Path.load Eio.Path.(fs / doi_path) in 146 + let entries = Bushel.Doi_entry.of_yaml_string content in 147 + Log.info (fun m -> m "Loaded %d DOI entries from %s" (List.length entries) doi_path); 148 + entries 149 + with 150 + | Eio.Io (Eio.Fs.E (Eio.Fs.Not_found _), _) -> 151 + Log.info (fun m -> m "No DOI cache found at %s" doi_path); 152 + [] 153 + in 154 + let data_dir = base in 155 + let entries = Bushel.Entry.v ~papers ~notes ~projects ~ideas ~videos ~contacts ~images ~doi_entries ~data_dir () in 144 156 Log.info (fun m -> m "Building link graph"); 145 157 let graph = build_link_graph entries in 146 158 Bushel.Link_graph.set_graph graph;
+50 -30
lib_sync/bushel_http.ml
··· 3 3 SPDX-License-Identifier: ISC 4 4 ---------------------------------------------------------------------------*) 5 5 6 - (** Simple HTTP client using curl via Eio.Process *) 6 + (** HTTP client using the requests library *) 7 7 8 8 let src = Logs.Src.create "bushel.http" ~doc:"HTTP client" 9 9 module Log = (val Logs.src_log src : Logs.LOG) 10 10 11 - (** Run curl and capture stdout *) 12 - let get ~proc_mgr url = 11 + type t = Requests.t 12 + 13 + let create ~sw env = 14 + Requests.create ~sw ~follow_redirects:true env 15 + 16 + let get ~http url = 13 17 Log.debug (fun m -> m "GET %s" url); 14 - let stdout = Buffer.create 4096 in 15 18 try 16 - Eio.Process.run proc_mgr 17 - ~stdout:(Eio.Flow.buffer_sink stdout) 18 - ["curl"; "-s"; "-L"; url]; 19 - Ok (Buffer.contents stdout) 20 - with e -> 21 - Error (Printf.sprintf "curl failed: %s" (Printexc.to_string e)) 19 + let response = Requests.get http url in 20 + if Requests.Response.ok response then begin 21 + let body = Requests.Response.body response |> Eio.Flow.read_all in 22 + Ok body 23 + end else begin 24 + let status = Requests.Response.status_code response in 25 + Error (Printf.sprintf "HTTP %d" status) 26 + end 27 + with exn -> 28 + Error (Printf.sprintf "Request failed: %s" (Printexc.to_string exn)) 22 29 23 - let get_with_header ~proc_mgr ~header url = 30 + let get_with_header ~http ~header url = 24 31 Log.debug (fun m -> m "GET %s (with header)" url); 25 - let stdout = Buffer.create 4096 in 26 32 try 27 - Eio.Process.run proc_mgr 28 - ~stdout:(Eio.Flow.buffer_sink stdout) 29 - ["curl"; "-s"; "-L"; "-H"; header; url]; 30 - Ok (Buffer.contents stdout) 31 - with e -> 32 - Error (Printf.sprintf "curl failed: %s" (Printexc.to_string e)) 33 + (* Parse header "Name: Value" format *) 34 + let name, value = match String.index_opt header ':' with 35 + | Some i -> 36 + let name = String.sub header 0 i in 37 + let value = String.trim (String.sub header (i + 1) (String.length header - i - 1)) in 38 + (name, value) 39 + | None -> (header, "") 40 + in 41 + let headers = Requests.Headers.empty |> Requests.Headers.add_string name value in 42 + let response = Requests.get http ~headers url in 43 + if Requests.Response.ok response then begin 44 + let body = Requests.Response.body response |> Eio.Flow.read_all in 45 + Ok body 46 + end else begin 47 + let status = Requests.Response.status_code response in 48 + Error (Printf.sprintf "HTTP %d" status) 49 + end 50 + with exn -> 51 + Error (Printf.sprintf "Request failed: %s" (Printexc.to_string exn)) 33 52 34 - let post ~proc_mgr ~content_type ~body url = 53 + let post ~http ~content_type ~body url = 35 54 Log.debug (fun m -> m "POST %s" url); 36 - let stdout = Buffer.create 4096 in 37 55 try 38 - Eio.Process.run proc_mgr 39 - ~stdout:(Eio.Flow.buffer_sink stdout) 40 - ["curl"; "-s"; "-L"; 41 - "-X"; "POST"; 42 - "-H"; "Content-Type: " ^ content_type; 43 - "-d"; body; 44 - url]; 45 - Ok (Buffer.contents stdout) 46 - with e -> 47 - Error (Printf.sprintf "curl failed: %s" (Printexc.to_string e)) 56 + let mime = Requests.Mime.of_string content_type in 57 + let body = Requests.Body.of_string mime body in 58 + let response = Requests.post http ~body url in 59 + if Requests.Response.ok response then begin 60 + let body = Requests.Response.body response |> Eio.Flow.read_all in 61 + Ok body 62 + end else begin 63 + let status = Requests.Response.status_code response in 64 + Error (Printf.sprintf "HTTP %d" status) 65 + end 66 + with exn -> 67 + Error (Printf.sprintf "Request failed: %s" (Printexc.to_string exn))
+9 -9
lib_sync/bushel_immich.ml
··· 42 42 43 43 (** {1 Immich API} *) 44 44 45 - let search_person ~proc_mgr ~endpoint ~api_key name = 45 + let search_person ~http ~endpoint ~api_key name = 46 46 let encoded_name = Uri.pct_encode name in 47 47 let url = Printf.sprintf "%s/api/search/person?name=%s" endpoint encoded_name in 48 48 let header = "X-Api-Key: " ^ api_key in 49 49 50 - match Bushel_http.get_with_header ~proc_mgr ~header url with 50 + match Bushel_http.get_with_header ~http ~header url with 51 51 | Result.Error e -> Result.Error e 52 52 | Result.Ok body -> decode_people body 53 53 54 - let download_thumbnail ~proc_mgr ~endpoint ~api_key person_id output_path = 54 + let download_thumbnail ~http ~endpoint ~api_key person_id output_path = 55 55 let url = Printf.sprintf "%s/api/people/%s/thumbnail" endpoint person_id in 56 56 let header = "X-Api-Key: " ^ api_key in 57 57 58 - match Bushel_http.get_with_header ~proc_mgr ~header url with 58 + match Bushel_http.get_with_header ~http ~header url with 59 59 | Result.Error e -> Result.Error e 60 60 | Result.Ok body -> 61 61 try ··· 72 72 73 73 (** {1 Contact Face Fetching} *) 74 74 75 - let fetch_face_for_contact ~proc_mgr ~endpoint ~api_key ~output_dir contact = 75 + let fetch_face_for_contact ~http ~endpoint ~api_key ~output_dir contact = 76 76 let names = Sortal_schema.Contact.names contact in 77 77 let handle = Sortal_schema.Contact.handle contact in 78 78 let output_path = Filename.concat output_dir (handle ^ ".jpg") in ··· 91 91 NotFound handle 92 92 | name :: rest -> 93 93 Log.debug (fun m -> m "Trying name: %s" name); 94 - match search_person ~proc_mgr ~endpoint ~api_key name with 94 + match search_person ~http ~endpoint ~api_key name with 95 95 | Result.Error e -> 96 96 Log.err (fun m -> m "Search error for %s: %s" name e); 97 97 Error e ··· 100 100 try_names rest 101 101 | Result.Ok (person :: _) -> 102 102 Log.info (fun m -> m "Found match for %s: %s" name person.name); 103 - match download_thumbnail ~proc_mgr ~endpoint ~api_key person.id output_path with 103 + match download_thumbnail ~http ~endpoint ~api_key person.id output_path with 104 104 | Result.Ok path -> Ok path 105 105 | Result.Error e -> Error e 106 106 in 107 107 try_names names 108 108 end 109 109 110 - let fetch_all_faces ~proc_mgr ~endpoint ~api_key ~output_dir contacts = 110 + let fetch_all_faces ~http ~endpoint ~api_key ~output_dir contacts = 111 111 (* Ensure output directory exists *) 112 112 if not (Sys.file_exists output_dir) then 113 113 Unix.mkdir output_dir 0o755; 114 114 115 115 let results = List.map (fun contact -> 116 116 let handle = Sortal_schema.Contact.handle contact in 117 - let result = fetch_face_for_contact ~proc_mgr ~endpoint ~api_key ~output_dir contact in 117 + let result = fetch_face_for_contact ~http ~endpoint ~api_key ~output_dir contact in 118 118 (handle, result) 119 119 ) contacts in 120 120
+194 -82
lib_sync/bushel_peertube.ml
··· 10 10 11 11 (** {1 Types} *) 12 12 13 + (** Simplified video type used in bushel - projects relevant fields from PeerTube API *) 13 14 type video = { 14 15 id : int; 15 16 uuid : string; ··· 28 29 | Skipped of string 29 30 | Error of string 30 31 31 - (** {1 Date Parsing} *) 32 + (** {1 Conversion from Generated Types} *) 32 33 33 - let parse_date str = 34 - match Ptime.of_rfc3339 str with 35 - | Ok (date, _, _) -> date 36 - | Error _ -> 37 - Log.warn (fun m -> m "Could not parse date: %s" str); 38 - Ptime.epoch 34 + module PT = Peer_tube 39 35 40 - (** {1 Jsont Codecs} *) 36 + (** Extract int from Jsont.json *) 37 + let int_of_json (json : Jsont.json) : int = 38 + match json with 39 + | Jsont.Number (f, _) -> int_of_float f 40 + | _ -> 0 41 41 42 - let ptime_jsont = 43 - Jsont.string |> Jsont.map ~dec:parse_date ~enc:(fun t -> 44 - match Ptime.to_rfc3339 ~frac_s:0 t with 45 - | s -> s) 42 + (** Extract string from Jsont.json *) 43 + let string_of_json (json : Jsont.json) : string = 44 + match json with 45 + | Jsont.String (s, _) -> s 46 + | _ -> "" 46 47 47 - let make_video ~id ~uuid ~name ~description ~url ~embed_path 48 - ~published_at ~originally_published_at ~thumbnail_path ~tags = 49 - { id; uuid; name; description; url; embed_path; 50 - published_at; originally_published_at; thumbnail_path; tags } 51 - 52 - let video_jsont : video Jsont.t = 53 - let open Jsont in 54 - let open Object in 55 - map ~kind:"video" (fun id uuid name description url embed_path 56 - published_at originally_published_at thumbnail_path tags -> 57 - make_video ~id ~uuid ~name ~description ~url ~embed_path 58 - ~published_at ~originally_published_at ~thumbnail_path ~tags) 59 - |> mem "id" int ~enc:(fun v -> v.id) 60 - |> mem "uuid" string ~enc:(fun v -> v.uuid) 61 - |> mem "name" string ~enc:(fun v -> v.name) 62 - |> mem "description" (some string) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun v -> v.description) 63 - |> mem "url" string ~enc:(fun v -> v.url) 64 - |> mem "embedPath" string ~enc:(fun v -> v.embed_path) 65 - |> mem "publishedAt" ptime_jsont ~enc:(fun v -> v.published_at) 66 - |> mem "originallyPublishedAt" (some ptime_jsont) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun v -> v.originally_published_at) 67 - |> mem "thumbnailPath" (some string) ~dec_absent:None ~enc_omit:Option.is_none ~enc:(fun v -> v.thumbnail_path) 68 - |> mem "tags" (list string) ~dec_absent:[] ~enc:(fun v -> v.tags) 69 - |> finish 48 + (** Convert from generated Peertube.Video.T.t to our simplified video type *) 49 + let video_of_peertube (pt : PT.Video.T.t) : video = 50 + let id = match PT.Video.T.id pt with 51 + | Some id_json -> int_of_json id_json 52 + | None -> 0 53 + in 54 + let uuid = match PT.Video.T.uuid pt with 55 + | Some uuid_json -> string_of_json uuid_json 56 + | None -> "" 57 + in 58 + { 59 + id; 60 + uuid; 61 + name = Option.value ~default:"" (PT.Video.T.name pt); 62 + description = PT.Video.T.truncated_description pt; 63 + url = ""; (* URL is constructed from endpoint + uuid *) 64 + embed_path = Option.value ~default:"" (PT.Video.T.embed_path pt); 65 + published_at = Option.value ~default:Ptime.epoch (PT.Video.T.published_at pt); 66 + originally_published_at = PT.Video.T.originally_published_at pt; 67 + thumbnail_path = PT.Video.T.thumbnail_path pt; 68 + tags = []; (* Tags not in base Video type, would need VideoDetails *) 69 + } 70 70 71 71 type channel_response = { 72 72 total : int; 73 73 data : video list; 74 74 } 75 75 76 - let channel_response_jsont : channel_response Jsont.t = 77 - let open Jsont in 78 - let open Object in 79 - map ~kind:"channel_response" (fun total data -> { total; data }) 80 - |> mem "total" int ~enc:(fun r -> r.total) 81 - |> mem "data" (list video_jsont) ~enc:(fun r -> r.data) 82 - |> finish 83 - 84 - (** {1 JSON decoding helpers} *) 76 + (** {1 JSON decoding using generated library} *) 85 77 86 78 let decode_video json_str = 87 - match Jsont_bytesrw.decode_string video_jsont json_str with 88 - | Ok v -> Result.Ok v 79 + match Jsont_bytesrw.decode_string PT.Video.T.jsont json_str with 80 + | Ok pt -> Result.Ok (video_of_peertube pt) 89 81 | Error e -> Result.Error e 90 82 91 83 let decode_channel_response json_str = 92 - match Jsont_bytesrw.decode_string channel_response_jsont json_str with 93 - | Ok r -> Result.Ok r 84 + match Jsont_bytesrw.decode_string PT.VideoList.Response.jsont json_str with 85 + | Ok r -> 86 + let total = Option.value ~default:0 (PT.VideoList.Response.total r) in 87 + let data = match PT.VideoList.Response.data r with 88 + | Some videos -> List.map video_of_peertube videos 89 + | None -> [] 90 + in 91 + Result.Ok { total; data } 94 92 | Error e -> Result.Error e 95 93 94 + (** {1 URL Parsing} *) 95 + 96 + (** Extract UUID from a PeerTube video URL. 97 + Handles formats like: 98 + - https://example.com/w/UUID 99 + - https://example.com/videos/watch/UUID *) 100 + let uuid_of_url url = 101 + let uri = Uri.of_string url in 102 + let path = Uri.path uri in 103 + (* Split path and find UUID *) 104 + let segments = String.split_on_char '/' path |> List.filter (fun s -> s <> "") in 105 + match segments with 106 + | ["w"; uuid] -> Some uuid 107 + | ["videos"; "watch"; uuid] -> Some uuid 108 + | _ -> None 109 + 110 + (** Extract the origin (scheme + host) from a URL *) 111 + let origin_of_url url = 112 + let uri = Uri.of_string url in 113 + match Uri.scheme uri, Uri.host uri with 114 + | Some scheme, Some host -> 115 + let port = match Uri.port uri with 116 + | Some p -> Printf.sprintf ":%d" p 117 + | None -> "" 118 + in 119 + Some (Printf.sprintf "%s://%s%s" scheme host port) 120 + | _ -> None 121 + 122 + (** Find a configured server that matches the URL's origin *) 123 + let find_server_for_url servers url = 124 + match origin_of_url url with 125 + | None -> None 126 + | Some origin -> 127 + List.find_opt (fun (s : Bushel_config.peertube_server) -> 128 + (* Normalize endpoints for comparison *) 129 + let endpoint = String.lowercase_ascii s.endpoint in 130 + let origin = String.lowercase_ascii origin in 131 + (* Strip trailing slashes *) 132 + let strip_slash s = 133 + if String.length s > 0 && s.[String.length s - 1] = '/' 134 + then String.sub s 0 (String.length s - 1) 135 + else s 136 + in 137 + strip_slash endpoint = strip_slash origin 138 + ) servers 139 + 96 140 (** {1 PeerTube API} *) 97 141 98 - let fetch_video_details ~proc_mgr ~endpoint uuid = 142 + let fetch_video_details ~http ~endpoint uuid = 99 143 let url = Printf.sprintf "%s/api/v1/videos/%s" endpoint uuid in 100 - match Bushel_http.get ~proc_mgr url with 144 + match Bushel_http.get ~http url with 101 145 | Result.Error e -> Result.Error e 102 - | Result.Ok body -> decode_video body 146 + | Result.Ok body -> 147 + match decode_video body with 148 + | Result.Ok v -> Result.Ok v 149 + | Result.Error e -> 150 + Log.warn (fun m -> m "Failed to decode video %s: %s" uuid e); 151 + Log.debug (fun m -> m "Response body: %s" (String.sub body 0 (min 500 (String.length body)))); 152 + Result.Error e 103 153 104 - let fetch_channel_videos ~proc_mgr ~endpoint ~channel ?(count=20) ?(start=0) () = 154 + let fetch_channel_videos ~http ~endpoint ~channel ?(count=20) ?(start=0) () = 105 155 let url = Printf.sprintf "%s/api/v1/video-channels/%s/videos?count=%d&start=%d" 106 156 endpoint channel count start in 107 - match Bushel_http.get ~proc_mgr url with 157 + match Bushel_http.get ~http url with 108 158 | Result.Error _ -> (0, []) 109 159 | Result.Ok body -> 110 160 match decode_channel_response body with 111 161 | Result.Ok r -> (r.total, r.data) 112 162 | Result.Error _ -> (0, []) 113 163 114 - let fetch_all_channel_videos ~proc_mgr ~endpoint ~channel ?(page_size=20) () = 164 + let fetch_all_channel_videos ~http ~endpoint ~channel ?(page_size=20) () = 115 165 let rec fetch_pages start acc = 116 - let (total, videos) = fetch_channel_videos ~proc_mgr ~endpoint ~channel ~count:page_size ~start () in 166 + let (total, videos) = fetch_channel_videos ~http ~endpoint ~channel ~count:page_size ~start () in 117 167 let all = acc @ videos in 118 168 let fetched = start + List.length videos in 119 169 if fetched < total && List.length videos > 0 then ··· 130 180 | Some path -> Some (endpoint ^ path) 131 181 | None -> None 132 182 133 - let download_thumbnail ~proc_mgr ~endpoint video output_path = 183 + let download_thumbnail ~http ~endpoint video output_path = 134 184 match thumbnail_url endpoint video with 135 185 | None -> 136 186 Log.warn (fun m -> m "No thumbnail for video %s" video.uuid); 137 187 Error "No thumbnail available" 138 188 | Some url -> 139 - match Bushel_http.get ~proc_mgr url with 189 + match Bushel_http.get ~http url with 140 190 | Result.Error e -> Error e 141 191 | Result.Ok body -> 142 192 try ··· 198 248 Hashtbl.fold (fun k v acc -> (k, v) :: acc) index [] 199 249 end 200 250 201 - (** {1 Fetch Thumbnails from Index} *) 251 + (** {1 Fetch Thumbnails} *) 252 + 253 + (** Try to fetch a video from a specific server *) 254 + let try_fetch_from_server ~http ~endpoint ~output_path uuid = 255 + match fetch_video_details ~http ~endpoint uuid with 256 + | Result.Error _ -> None 257 + | Result.Ok video -> 258 + match download_thumbnail ~http ~endpoint video output_path with 259 + | Ok path -> Some (Ok path) 260 + | Skipped path -> Some (Skipped path) 261 + | Error _ -> None 262 + 263 + (** Try each server until one succeeds, return the server that worked *) 264 + let try_all_servers ~http ~servers ~output_path uuid = 265 + let rec try_next = function 266 + | [] -> None 267 + | (server : Bushel_config.peertube_server) :: rest -> 268 + Log.debug (fun m -> m "Trying server %s for video %s" server.name uuid); 269 + match try_fetch_from_server ~http ~endpoint:server.endpoint ~output_path uuid with 270 + | Some result -> Some (server, result) 271 + | None -> try_next rest 272 + in 273 + try_next servers 202 274 203 - let fetch_thumbnails_from_index ~proc_mgr ~servers ~output_dir index = 275 + (** Fetch thumbnails for videos, using URL field, index, or server discovery. 276 + Updates the index when servers are discovered. *) 277 + let fetch_thumbnails ~http ~servers ~output_dir ~videos ~index = 204 278 (* Ensure output dir exists *) 205 279 if not (Sys.file_exists output_dir) then 206 280 Unix.mkdir output_dir 0o755; 207 281 208 - let server_map = 209 - List.fold_left (fun acc (s : Bushel_config.peertube_server) -> 210 - (s.name, s.endpoint) :: acc 211 - ) [] servers 212 - in 213 - 214 - let results = List.filter_map (fun (uuid, server_name) -> 282 + let results = List.filter_map (fun (video : Bushel.Video.t) -> 283 + let uuid = Bushel.Video.uuid video in 284 + let url = Bushel.Video.url video in 215 285 let output_path = Filename.concat output_dir (uuid ^ ".jpg") in 216 286 217 - (* Skip if exists *) 287 + (* Skip if thumbnail exists *) 218 288 if Sys.file_exists output_path then begin 219 289 Log.debug (fun m -> m "Skipping %s: thumbnail exists" uuid); 220 290 Some (uuid, Skipped output_path) 221 291 end else begin 222 - match List.assoc_opt server_name server_map with 223 - | None -> 224 - Log.warn (fun m -> m "Unknown server %s for video %s" server_name uuid); 225 - Some (uuid, Error (Printf.sprintf "Unknown server: %s" server_name)) 226 - | Some endpoint -> 227 - Log.info (fun m -> m "Fetching thumbnail for %s from %s" uuid server_name); 228 - match fetch_video_details ~proc_mgr ~endpoint uuid with 229 - | Result.Error e -> 230 - Some (uuid, Error e) 231 - | Result.Ok video -> 232 - match download_thumbnail ~proc_mgr ~endpoint video output_path with 233 - | Ok path -> Some (uuid, Ok path) 234 - | Skipped path -> Some (uuid, Skipped path) 235 - | Error e -> Some (uuid, Error e) 292 + (* Strategy 1: Try to derive server from video URL *) 293 + let server_from_url = 294 + if url <> "" then find_server_for_url servers url 295 + else None 296 + in 297 + 298 + (* Strategy 2: Check the index *) 299 + let server_from_index = 300 + match VideoIndex.find index uuid with 301 + | Some server_name -> 302 + List.find_opt (fun (s : Bushel_config.peertube_server) -> 303 + s.name = server_name) servers 304 + | None -> None 305 + in 306 + 307 + (* Helper to try all servers and update index on success *) 308 + let search_all_servers () = 309 + Log.info (fun m -> m "Searching all servers for video %s" uuid); 310 + match try_all_servers ~http ~servers ~output_path uuid with 311 + | Some (server, result) -> 312 + Log.info (fun m -> m "Found video %s on server %s" uuid server.name); 313 + VideoIndex.add index ~uuid ~server:server.name; 314 + Some (uuid, result) 315 + | None -> 316 + Log.warn (fun m -> m "Video %s not found on any server" uuid); 317 + Some (uuid, Error "Not found on any configured server") 318 + in 319 + 320 + match server_from_url, server_from_index with 321 + | Some server, _ -> 322 + (* Have server from URL - try it first, fall back to searching all *) 323 + Log.info (fun m -> m "Fetching thumbnail for %s from %s (from URL)" uuid server.name); 324 + (match try_fetch_from_server ~http ~endpoint:server.endpoint ~output_path uuid with 325 + | Some result -> 326 + VideoIndex.add index ~uuid ~server:server.name; 327 + Some (uuid, result) 328 + | None -> 329 + Log.info (fun m -> m "URL-derived server failed, trying others..."); 330 + search_all_servers ()) 331 + 332 + | None, Some server -> 333 + (* Have server from index - try it first, fall back to searching all *) 334 + Log.info (fun m -> m "Fetching thumbnail for %s from %s (from index)" uuid server.name); 335 + (match try_fetch_from_server ~http ~endpoint:server.endpoint ~output_path uuid with 336 + | Some result -> Some (uuid, result) 337 + | None -> 338 + Log.info (fun m -> m "Indexed server failed, trying others..."); 339 + search_all_servers ()) 340 + 341 + | None, None -> 342 + (* No server known - search all *) 343 + search_all_servers () 236 344 end 237 - ) (VideoIndex.to_list index) in 345 + ) videos in 238 346 239 347 let ok_count = List.length (List.filter (fun (_, r) -> match r with Ok _ -> true | _ -> false) results) in 240 348 let skipped_count = List.length (List.filter (fun (_, r) -> match r with Skipped _ -> true | _ -> false) results) in ··· 244 352 ok_count skipped_count error_count); 245 353 246 354 results 355 + 356 + (** Legacy function for compatibility - calls fetch_thumbnails with empty video list *) 357 + let fetch_thumbnails_from_index ~http ~servers ~output_dir index = 358 + fetch_thumbnails ~http ~servers ~output_dir ~videos:[] ~index
+28 -16
lib_sync/bushel_sync.ml
··· 22 22 (** Video metadata and thumbnails from PeerTube *) 23 23 module Peertube = Bushel_peertube 24 24 25 - (** Simple HTTP client using curl via Eio.Process *) 25 + (** HTTP client using the requests library *) 26 26 module Http = Bushel_http 27 27 28 28 let src = Logs.Src.create "bushel.sync" ~doc:"Bushel sync pipeline" ··· 55 55 | "typesense" -> Some Typesense 56 56 | _ -> None 57 57 58 - let all_steps = [Images; Thumbs; Faces; Srcsetter; Videos] 58 + let all_steps = [Images; Thumbs; Faces; Videos; Srcsetter] 59 59 let all_steps_with_remote = all_steps @ [Typesense] 60 60 61 61 (** {1 Step Results} *) ··· 303 303 304 304 (** {1 Video Thumbnails} *) 305 305 306 - let sync_video_thumbnails ~dry_run ~proc_mgr config = 306 + let sync_video_thumbnails ~dry_run ~http config entries = 307 307 Log.info (fun m -> m "Syncing video thumbnails from PeerTube..."); 308 308 let output_dir = Bushel_config.video_thumbs_dir config in 309 309 let videos_yml = Filename.concat config.data_dir "videos.yml" in 310 310 311 311 let index = Bushel_peertube.VideoIndex.load_file videos_yml in 312 - let video_list = Bushel_peertube.VideoIndex.to_list index in 313 - let count = List.length video_list in 312 + let videos = Bushel.Entry.videos entries in 313 + let count = List.length videos in 314 314 315 315 if count = 0 then begin 316 - Log.info (fun m -> m "No videos in index"); 316 + Log.info (fun m -> m "No videos found"); 317 317 { step = Videos; success = true; 318 - message = "No videos in index"; 318 + message = "No videos found"; 319 319 details = [] } 320 320 end else if dry_run then begin 321 - let would_fetch = List.filter (fun (uuid, _server) -> 321 + let would_fetch = List.filter (fun video -> 322 + let uuid = Bushel.Video.uuid video in 322 323 let output_path = Filename.concat output_dir (uuid ^ ".jpg") in 323 324 not (Sys.file_exists output_path) 324 - ) video_list in 325 + ) videos in 325 326 let skipped = count - List.length would_fetch in 326 327 { step = Videos; success = true; 327 328 message = Printf.sprintf "Would fetch %d video thumbnails from PeerTube (%d already exist)" 328 329 (List.length would_fetch) skipped; 329 - details = List.map (fun (uuid, server) -> 330 - Printf.sprintf "curl <server:%s>/api/v1/videos/%s -> %s.jpg" server uuid uuid 330 + details = List.map (fun video -> 331 + let uuid = Bushel.Video.uuid video in 332 + let url = Bushel.Video.url video in 333 + if url <> "" then 334 + Printf.sprintf "%s (from URL: %s)" uuid url 335 + else 336 + Printf.sprintf "%s (will search servers)" uuid 331 337 ) (List.filteri (fun i _ -> i < 5) would_fetch) @ 332 338 (if List.length would_fetch > 5 then ["...and more"] else []) } 333 339 end else begin 334 - let results = Bushel_peertube.fetch_thumbnails_from_index 335 - ~proc_mgr 340 + let results = Bushel_peertube.fetch_thumbnails 341 + ~http 336 342 ~servers:config.peertube_servers 337 343 ~output_dir 338 - index in 344 + ~videos 345 + ~index in 346 + 347 + (* Save updated index (may have discovered new server mappings) *) 348 + Bushel_peertube.VideoIndex.save_file videos_yml index; 339 349 340 350 let ok_count = List.length (List.filter (fun (_, r) -> 341 351 match r with Bushel_peertube.Ok _ -> true | _ -> false) results) in ··· 375 385 376 386 (** {1 Run Pipeline} *) 377 387 378 - let run ~dry_run ~env ~config ~steps ~entries = 388 + let run ~dry_run ~sw ~env ~config ~steps ~entries = 379 389 let proc_mgr = Eio.Stdenv.process_mgr env in 380 390 let fs = Eio.Stdenv.fs env in 391 + (* Create HTTP session for network requests *) 392 + let http = Bushel_http.create ~sw env in 381 393 382 394 let results = List.map (fun step -> 383 395 Log.info (fun m -> m "%s step: %s" ··· 388 400 | Srcsetter -> run_srcsetter ~dry_run ~fs ~proc_mgr config 389 401 | Thumbs -> generate_paper_thumbnails ~dry_run ~fs ~proc_mgr config 390 402 | Faces -> sync_faces ~dry_run ~fs config entries 391 - | Videos -> sync_video_thumbnails ~dry_run ~proc_mgr config 403 + | Videos -> sync_video_thumbnails ~dry_run ~http config entries 392 404 | Typesense -> upload_typesense ~dry_run config entries 393 405 ) steps in 394 406
+7 -7
lib_sync/bushel_zotero.ml
··· 156 156 if String.ends_with ~suffix:"/" base_url then base_url ^ "export" 157 157 else base_url ^ "/export" 158 158 159 - let resolve_doi ~proc_mgr ~server_url doi = 159 + let resolve_doi ~http ~server_url doi = 160 160 Log.info (fun m -> m "Resolving DOI: %s" doi); 161 161 let url = web_endpoint server_url in 162 162 let body = "https://doi.org/" ^ doi in 163 - match Bushel_http.post ~proc_mgr ~content_type:"text/plain" ~body url with 163 + match Bushel_http.post ~http ~content_type:"text/plain" ~body url with 164 164 | Error e -> Error e 165 165 | Ok json_str -> 166 166 match Jsont_bytesrw.decode_string Jsont.json json_str with 167 167 | Ok json -> Ok json 168 168 | Error e -> Error (Printf.sprintf "JSON parse error: %s" e) 169 169 170 - let export_bibtex ~proc_mgr ~server_url json = 170 + let export_bibtex ~http ~server_url json = 171 171 let url = export_endpoint server_url ^ "?format=bibtex" in 172 172 match Jsont_bytesrw.encode_string Jsont.json json with 173 173 | Error e -> Error e 174 - | Ok body -> Bushel_http.post ~proc_mgr ~content_type:"application/json" ~body url 174 + | Ok body -> Bushel_http.post ~http ~content_type:"application/json" ~body url 175 175 176 176 (** {1 DOI Resolution} *) 177 177 178 - let resolve ~proc_mgr ~server_url ~slug doi = 179 - match resolve_doi ~proc_mgr ~server_url doi with 178 + let resolve ~http ~server_url ~slug doi = 179 + match resolve_doi ~http ~server_url doi with 180 180 | Error e -> Error e 181 181 | Ok json -> 182 182 (* Export to BibTeX *) 183 - match export_bibtex ~proc_mgr ~server_url json with 183 + match export_bibtex ~http ~server_url json with 184 184 | Error e -> Error (Printf.sprintf "BibTeX export failed: %s" e) 185 185 | Ok bib -> 186 186 Log.debug (fun m -> m "Got BibTeX: %s" bib);
+3 -1
lib_sync/dune
··· 16 16 fmt 17 17 sortal.schema 18 18 sortal 19 - srcsetter-cmd)) 19 + srcsetter-cmd 20 + requests 21 + peertube))