My personal data management layer
at main 101 lines 3.5 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** Utility functions for Bushel *) 7 8(** Count words in a string. *) 9let count_words (text : string) : int = 10 let len = String.length text in 11 let rec count_words_helper (index : int) (in_word : bool) (count : int) : int = 12 if index >= len then 13 if in_word then count + 1 else count 14 else 15 let char = String.get text index in 16 let is_whitespace = 17 Char.equal char ' ' 18 || Char.equal char '\t' 19 || Char.equal char '\n' 20 || Char.equal char '\r' 21 in 22 if is_whitespace then 23 if in_word then count_words_helper (index + 1) false (count + 1) 24 else count_words_helper (index + 1) false count 25 else count_words_helper (index + 1) true count 26 in 27 count_words_helper 0 false 0 28 29(** Get the first paragraph/hunk from text (up to double newline). *) 30let first_hunk s = 31 let lines = String.split_on_char '\n' s in 32 let rec aux acc = function 33 | [] -> String.concat "\n" (List.rev acc) 34 | "" :: "" :: _ -> String.concat "\n" (List.rev acc) 35 | line :: rest -> aux (line :: acc) rest 36 in 37 aux [] lines 38 39(** Get first and last hunks from text. *) 40let first_and_last_hunks s = 41 let lines = String.split_on_char '\n' s in 42 let rec aux acc = function 43 | [] -> String.concat "\n" (List.rev acc), "" 44 | "" :: "" :: rest -> 45 String.concat "\n" (List.rev acc), String.concat "\n" (List.rev rest) 46 | line :: rest -> aux (line :: acc) rest 47 in 48 aux [] lines 49 50(** Find all footnote definition lines in text. *) 51let find_footnote_lines s = 52 let lines = String.split_on_char '\n' s in 53 let is_footnote_def line = 54 String.length line > 3 && 55 line.[0] = '[' && 56 line.[1] = '^' && 57 String.contains line ':' && 58 let colon_pos = String.index line ':' in 59 colon_pos > 2 && line.[colon_pos - 1] = ']' 60 in 61 let is_continuation line = 62 String.length line > 0 && (line.[0] = ' ' || line.[0] = '\t') 63 in 64 let rec collect_footnotes acc in_footnote = function 65 | [] -> List.rev acc 66 | line :: rest -> 67 if is_footnote_def line then 68 collect_footnotes (line :: acc) true rest 69 else if in_footnote && is_continuation line then 70 collect_footnotes (line :: acc) true rest 71 else 72 collect_footnotes acc false rest 73 in 74 collect_footnotes [] false lines 75 76(** Augment first hunk with footnote definitions from last hunk. *) 77let first_hunk_with_footnotes s = 78 let first, last = first_and_last_hunks s in 79 let footnote_lines = find_footnote_lines last in 80 if footnote_lines = [] then first 81 else first ^ "\n\n" ^ String.concat "\n" footnote_lines 82 83(** Trim leading/trailing whitespace and normalize multiple blank lines. *) 84let normalize_body s = 85 let trimmed = String.trim s in 86 (* Replace 3+ consecutive newlines with exactly 2 newlines *) 87 let re = Re.compile (Re.seq [Re.char '\n'; Re.char '\n'; Re.rep1 (Re.char '\n')]) in 88 Re.replace_string re ~by:"\n\n" trimmed 89 90(** Extract domain from URL. *) 91let extract_domain url = 92 try 93 let uri = Uri.of_string url in 94 match Uri.host uri with 95 | Some host -> host 96 | None -> "unknown" 97 with _ -> "unknown" 98 99(** Check if a string is a valid URL. *) 100let is_url s = 101 String.starts_with ~prefix:"http://" s || String.starts_with ~prefix:"https://" s