My aggregated monorepo of OCaml code, automaintained
at main 264 lines 10 kB view raw
1(* Generate an Atom feed from compiled odocl blog posts. 2 3 Uses odoc's HTML generator to render full post content into the feed, 4 so feed readers get the same HTML as the website. No external Atom 5 library — we emit the XML directly. *) 6 7(** {1 Date helpers} *) 8 9(** Parse an ISO 8601 date string like "2026-03-02" into (y, m, d). *) 10let parse_date s = 11 try Scanf.sscanf s "%d-%d-%d" (fun y m d -> Some (y, m, d)) 12 with _ -> None 13 14(** Format a date triple as an Atom datetime (midnight UTC). *) 15let atom_datetime (y, m, d) = 16 Printf.sprintf "%04d-%02d-%02dT00:00:00Z" y m d 17 18(** {1 XML helpers} *) 19 20let xml_escape s = 21 let buf = Buffer.create (String.length s) in 22 String.iter 23 (function 24 | '&' -> Buffer.add_string buf "&amp;" 25 | '<' -> Buffer.add_string buf "&lt;" 26 | '>' -> Buffer.add_string buf "&gt;" 27 | '"' -> Buffer.add_string buf "&quot;" 28 | c -> Buffer.add_char buf c) 29 s; 30 Buffer.contents buf 31 32(** {1 Odoc content extraction} *) 33 34(** Extract text from a custom tag's payload. 35 Custom tags like [@published 2026-03-02] are stored as 36 [`Tag (`Custom ("published", elements))]. *) 37let text_of_tag_payload elements = 38 let buf = Buffer.create 32 in 39 List.iter 40 (fun (el : 41 Odoc_model.Comment.nestable_block_element 42 Odoc_model.Location_.with_location) -> 43 match el.Odoc_model.Location_.value with 44 | `Paragraph inlines -> 45 List.iter 46 (fun (il : 47 Odoc_model.Comment.inline_element 48 Odoc_model.Location_.with_location) -> 49 match il.value with 50 | `Word w -> Buffer.add_string buf w 51 | `Space -> Buffer.add_char buf ' ' 52 | _ -> ()) 53 inlines 54 | _ -> ()) 55 elements; 56 String.trim (Buffer.contents buf) 57 58(** Find a custom tag by name in the page's content elements. *) 59let find_custom_tag name (docs : Odoc_model.Comment.docs) = 60 List.find_map 61 (fun (el : 62 Odoc_model.Comment.block_element 63 Odoc_model.Location_.with_location) -> 64 match el.value with 65 | `Tag (`Custom (n, payload)) when n = name -> 66 Some (text_of_tag_payload payload) 67 | _ -> None) 68 docs.elements 69 70(** {1 Entry type and extraction} *) 71 72type entry = { 73 url : string; 74 title : string; 75 summary : string; 76 content : string; 77 published : int * int * int; 78} 79 80let entry_of_odocl odoc_file = 81 let report_error during msg = 82 Format.eprintf "Error processing '%s' while %s: %s\n%!" 83 (Fpath.to_string odoc_file) during msg; 84 None 85 in 86 match Odoc_odoc.Odoc_file.load odoc_file with 87 | Error (`Msg m) -> report_error "loading file" m 88 | Ok unit -> ( 89 match unit.content with 90 | Odoc_odoc.Odoc_file.Page_content page -> ( 91 let published_str = find_custom_tag "published" page.content in 92 match published_str with 93 | None -> None 94 | Some s when s = "never" || s = "draft" -> None 95 | Some published_str -> ( 96 match parse_date published_str with 97 | None -> 98 Format.eprintf "Bad date '%s' in %s\n%!" published_str 99 (Fpath.to_string odoc_file); 100 None 101 | Some published -> ( 102 let document = 103 Odoc_document.Renderer.document_of_page ~syntax:OCaml page 104 in 105 match document with 106 | Odoc_document.Types.Document.Source_page _ -> None 107 | Odoc_document.Types.Document.Page p -> ( 108 let first_heading = 109 List.find_map 110 (function 111 | Odoc_document.Types.Item.Heading h -> Some h 112 | _ -> None) 113 p.preamble 114 in 115 match first_heading with 116 | None -> report_error "parsing" "no heading found" 117 | Some h -> 118 let title = 119 List.filter_map 120 (function 121 | Odoc_document.Types.Inline.{ desc = Text t; _ } 122 -> 123 Some t 124 | _ -> None) 125 h.title 126 |> String.concat "" 127 in 128 if title = "" then None 129 else 130 let config = 131 Odoc_html.Config.v ~semantic_uris:false 132 ~indent:false ~flat:false ~open_details:false 133 ~as_json:false ~remap:[] () 134 in 135 let resolve = Odoc_html.Link.Current p.url in 136 let url = 137 let fp = 138 Odoc_html.Generator.filepath p.url ~config 139 in 140 Format.asprintf "https://jon.recoil.org/%s" 141 (Fpath.to_string fp) 142 in 143 let all_items = p.preamble @ p.items in 144 let html = 145 Odoc_html.Generator.items ~config ~resolve 146 all_items 147 in 148 let content = 149 Format.asprintf "%a" 150 (Fmt.list (Tyxml.Html.pp_elt ())) 151 html 152 in 153 let summary = 154 let first_text = 155 List.find_map 156 (function 157 | Odoc_document.Types.Item.Text blocks -> 158 List.find_map 159 (function 160 | { Odoc_document.Types.Block.desc = 161 Odoc_document.Types.Block 162 .Paragraph inline; 163 _ 164 } -> 165 let text = 166 List.filter_map 167 (function 168 | Odoc_document.Types 169 .Inline 170 .{ desc = Text t; _ } 171 -> 172 Some t 173 | _ -> None) 174 inline 175 in 176 if text = [] then None 177 else 178 Some (String.concat "" text) 179 | _ -> None) 180 blocks 181 | _ -> None) 182 p.preamble 183 in 184 match first_text with 185 | Some t when String.length t > 200 -> 186 String.sub t 0 200 ^ "..." 187 | Some t -> t 188 | None -> title 189 in 190 Some { url; title; summary; content; published })))) 191 | _ -> None) 192 193(** {1 Discovery and sorting} *) 194 195let is_blog_post path = 196 let basename = Fpath.basename path in 197 Fpath.has_ext "odocl" path 198 && String.length basename > 5 199 && String.sub basename 0 5 = "page-" 200 && basename <> "page-index.odocl" 201 202let compare_entries a b = 203 (* Newest first *) 204 let (y1, m1, d1) = b.published in 205 let (y2, m2, d2) = a.published in 206 compare (y1, m1, d1) (y2, m2, d2) 207 208(** {1 Atom XML generation} *) 209 210let write_atom entries out_path = 211 let oc = open_out out_path in 212 let p = Printf.fprintf in 213 let now = 214 let t = Unix.gettimeofday () in 215 let tm = Unix.gmtime t in 216 Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ" 217 (tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday 218 tm.tm_hour tm.tm_min tm.tm_sec 219 in 220 p oc "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; 221 p oc "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n"; 222 p oc " <id>https://jon.recoil.org/atom.xml</id>\n"; 223 p oc " <title>Jon's blog</title>\n"; 224 p oc " <updated>%s</updated>\n" now; 225 p oc " <author>\n"; 226 p oc " <name>Jon Ludlam</name>\n"; 227 p oc " <uri>https://jon.recoil.org/</uri>\n"; 228 p oc " </author>\n"; 229 p oc " <link rel=\"self\" href=\"https://jon.recoil.org/atom.xml\"/>\n"; 230 p oc " <link rel=\"alternate\" href=\"https://jon.recoil.org/blog/\"/>\n"; 231 List.iter 232 (fun e -> 233 let date = atom_datetime e.published in 234 p oc " <entry>\n"; 235 p oc " <id>%s</id>\n" (xml_escape e.url); 236 p oc " <title>%s</title>\n" (xml_escape e.title); 237 p oc " <published>%s</published>\n" date; 238 p oc " <updated>%s</updated>\n" date; 239 p oc " <link rel=\"alternate\" href=\"%s\"/>\n" (xml_escape e.url); 240 p oc " <summary>%s</summary>\n" (xml_escape e.summary); 241 p oc " <content type=\"html\"><![CDATA[%s]]></content>\n" e.content; 242 p oc " </entry>\n") 243 entries; 244 p oc "</feed>\n"; 245 close_out oc 246 247(** {1 Main} *) 248 249let () = 250 let odocl_dir = Fpath.v "_build/default/site/_odoc/blog" in 251 let mlds = 252 Bos.OS.Dir.fold_contents 253 (fun path acc -> if is_blog_post path then path :: acc else acc) 254 [] odocl_dir 255 in 256 match mlds with 257 | Error (`Msg m) -> 258 Format.eprintf "Error finding blog posts: %s\n%!" m; 259 exit 1 260 | Ok mlds -> 261 let entries = List.filter_map entry_of_odocl mlds in 262 let entries = List.sort compare_entries entries in 263 write_atom entries "atom.xml"; 264 Format.printf "Generated atom.xml with %d entries\n%!" (List.length entries)