My working unpac space for OCaml projects in development
at opam/upstream/stringext 337 lines 10 kB view raw
1open String 2 3let string_after s n = String.sub s n (String.length s - n) 4 5let quote s = 6 let len = String.length s in 7 let buf = Buffer.create (2 * len) in 8 for i = 0 to len - 1 do 9 match s.[i] with 10 '[' | ']' | '*' | '.' | '\\' | '?' | '+' | '^' | '$' as c -> 11 Buffer.add_char buf '\\'; 12 Buffer.add_char buf c 13 | c -> Buffer.add_char buf c 14 done; 15 Buffer.contents buf 16 17 18(* Not tail recursive for "performance", please choose low values for 19 [max]. The idea is that max is always small because it's hard 20 code *) 21let split_char_bounded str ~on ~max = 22 if str = "" then [] 23 else if max = 1 then [str] 24 else 25 let rec loop offset tokens = 26 if tokens = max - 1 27 then [sub str offset (length str - offset)] 28 else 29 try 30 let index = index_from str offset on in 31 if index = offset then 32 ""::(loop (offset + 1) (tokens + 1)) 33 else 34 let token = String.sub str offset (index - offset) in 35 token::(loop (index + 1) (tokens + 1)) 36 with Not_found -> [sub str offset (length str - offset)] 37 in loop 0 0 38 39let split_char_unbounded str ~on = 40 if str = "" then [] 41 else 42 let rec loop acc offset = 43 try begin 44 let index = rindex_from str offset on in 45 if index = offset then 46 loop (""::acc) (index - 1) 47 else 48 let token = sub str (index + 1) (offset - index) in 49 loop (token::acc) (index - 1) 50 end 51 with Not_found -> (sub str 0 (offset + 1))::acc 52 in loop [] (length str - 1) 53 54let of_char = String.make 1 55 56let full_split str ~on = 57 if str = "" then [] 58 else 59 let sep = of_char on in 60 let rec loop acc offset = 61 try begin 62 let index = rindex_from str offset on in 63 if index = offset then 64 loop (sep::acc) (index - 1) 65 else 66 let token = sub str (index + 1) (offset - index) in 67 loop (sep::token::acc) (index - 1) 68 end 69 with Not_found -> 70 if offset >= 0 71 then (sub str 0 (offset + 1))::acc 72 else acc 73 in loop [] (length str - 1) 74 75(* copying core's convention for String.split but with an optional max 76 argument *) 77let split ?max s ~on = 78 match max with 79 | None -> split_char_unbounded s ~on 80 | Some max -> (* assert (max < 100); *) 81 split_char_bounded s ~on ~max 82 83let rindex_from_on s ~offset ~on = 84 let rec loop i = 85 if i < 0 then raise Not_found 86 else if String.contains on s.[i] then i 87 else loop (i - 1) 88 in loop offset 89 90let trim_left_sub s ~pos ~len ~chars = 91 let start_pos = 92 let final = pos + len in 93 let rec loop last_char i = 94 if i = final then last_char 95 else if String.contains chars s.[i] then loop (i + 1) (i + 1) 96 else last_char 97 in loop pos pos 98 in 99 let new_len = len - (start_pos - pos) in 100 String.sub s start_pos new_len 101 102let split_trim_left str ~on ~trim = 103 if str = "" then [] 104 else 105 let rec loop acc offset = 106 try begin 107 let index = rindex_from_on str ~offset ~on in 108 if index = offset then 109 loop (""::acc) (index - 1) 110 else 111 let token = trim_left_sub str ~pos:(index + 1) 112 ~len:(offset - index) ~chars:trim in 113 loop (token::acc) (index - 1) 114 end 115 with Not_found -> 116 (trim_left_sub str ~pos:0 ~len:(offset + 1) ~chars:trim)::acc 117 in loop [] (length str - 1) 118 119exception Found_int of int 120 121let first_char_ne s c = 122 String.length s > 0 && s.[0] <> c 123 124let trim_left s = 125 if first_char_ne s ' ' then s 126 else 127 let len = String.length s in 128 try 129 for i=0 to len - 1 do 130 if s.[i] <> ' ' then raise (Found_int i) 131 done; 132 "" 133 with Found_int non_space -> 134 sub s non_space (len - non_space) 135 136let substr_eq ?(start=0) s ~pattern = 137 try 138 for i = 0 to String.length pattern - 1 do 139 if s.[i + start] <> pattern.[i] then raise Exit 140 done; 141 true 142 with _ -> false 143 144let find_from ?(start=0) str ~pattern = 145 try 146 for i = start to (String.length str) - (String.length pattern) do 147 if substr_eq ~start:i str ~pattern then 148 raise (Found_int i) 149 done; 150 None 151 with 152 | Found_int i -> Some i 153 | _ -> None 154 155let find_min l ~f = 156 let rec loop x fx = function 157 | [] -> Some (x, fx) 158 | x'::xs -> 159 let fx' = f x' in 160 if fx' < fx then loop x' fx' xs 161 else loop x fx xs 162 in 163 match l with 164 | [] -> None 165 | x::xs -> loop x (f x) xs 166 167let replace_all str ~pattern ~with_ = 168 let (slen, plen) = String.(length str, length pattern) in 169 let buf = Buffer.create slen in 170 let rec loop i = 171 match find_from ~start:i str ~pattern with 172 | None -> 173 Buffer.add_substring buf str i (slen - i); 174 Buffer.contents buf 175 | Some j -> 176 Buffer.add_substring buf str i (j - i); 177 Buffer.add_string buf with_; 178 loop (j + plen) 179 in loop 0 180 181exception Found_replace of int * string * string 182 183let replace_all_assoc str tbl = 184 let slen = String.length str in 185 let buf = Buffer.create slen in 186 let rec loop i = 187 if i >= slen then Buffer.contents buf 188 else 189 let r = 190 try 191 let found = ref false in 192 let e = 193 find_min tbl ~f:(fun (pattern, with_) -> 194 match find_from ~start:i str ~pattern with 195 | None -> max_int 196 | Some j when j = i -> raise (Found_replace (j, pattern, with_)) 197 | Some j -> found := true; j) 198 in 199 match e with 200 | None -> None 201 | Some ((pattern, with_), j) when !found -> Some (j, pattern, with_) 202 | Some _ -> None 203 with Found_replace (j, pattern, with_) -> Some (j, pattern, with_) 204 in 205 match r with 206 | None -> 207 Buffer.add_substring buf str i (slen - i); 208 Buffer.contents buf 209 | Some (j, pattern, with_) -> 210 Buffer.add_substring buf str i (j - i); 211 Buffer.add_string buf with_; 212 loop (j + String.length pattern) 213 in loop 0 214 215let iteri f l = 216 let rec loop i = function 217 | [] -> () 218 | x::xs -> (f i x); loop (succ i) xs 219 in loop 0 l 220 221let of_list xs = 222 let l = List.length xs in 223 let s = Bytes.create l in 224 iteri (fun i c -> Bytes.set s i c) xs; 225 Bytes.unsafe_to_string s 226 227let to_list s = 228 let rec loop acc i = 229 if i = -1 then acc 230 else 231 loop (s.[i] :: acc) (pred i) 232 in loop [] (String.length s - 1) 233 234let of_array a = 235 let len = Array.length a in 236 let bytes = Bytes.create len in 237 for i = 0 to len - 1 do 238 Bytes.set bytes i a.(i) 239 done; 240 Bytes.unsafe_to_string bytes 241 242let to_array s = Array.init (String.length s) (String.get s) 243 244(* ripped off from one of dbuenzli's libs *) 245let cut s ~on = 246 let sep_max = length on - 1 in 247 if sep_max < 0 then invalid_arg "Stringext.cut: empty separator" else 248 let s_max = length s - 1 in 249 if s_max < 0 then None else 250 let k = ref 0 in 251 let i = ref 0 in 252 (* We run from the start of [s] to end with [i] trying to match the 253 first character of [on] in [s]. If this matches, we verify that 254 the whole [on] is matched using [k]. If it doesn't match we 255 continue to look for [on] with [i]. If it matches we exit the 256 loop and extract a substring from the start of [s] to the 257 position before the [on] we found and another from the position 258 after the [on] we found to end of string. If [i] is such that no 259 separator can be found we exit the loop and return the no match 260 case. *) 261 try 262 while (!i + sep_max <= s_max) do 263 (* Check remaining [on] chars match, access to unsafe s (!i + !k) is 264 guaranteed by loop invariant. *) 265 if unsafe_get s !i <> unsafe_get on 0 then incr i else begin 266 k := 1; 267 while (!k <= sep_max && unsafe_get s (!i + !k) = unsafe_get on !k) 268 do incr k done; 269 if !k <= sep_max then (* no match *) incr i else raise Exit 270 end 271 done; 272 None (* no match in the whole string. *) 273 with 274 | Exit -> (* i is at the beginning of the separator *) 275 let left_end = !i - 1 in 276 let right_start = !i + sep_max + 1 in 277 Some (sub s 0 (left_end + 1), 278 sub s right_start (s_max - right_start + 1)) 279 280let rcut s ~on = 281 let sep_max = length on - 1 in 282 if sep_max < 0 then invalid_arg "Stringext.rcut: empty separator" else 283 let s_max = length s - 1 in 284 if s_max < 0 then None else 285 let k = ref 0 in 286 let i = ref s_max in 287 (* We run from the end of [s] to the beginning with [i] trying to 288 match the last character of [on] in [s]. If this matches, we 289 verify that the whole [on] is matched using [k] (we do that 290 backwards). If it doesn't match we continue to look for [on] 291 with [i]. If it matches we exit the loop and extract a 292 substring from the start of [s] to the position before the 293 [on] we found and another from the position after the [on] we 294 found to end of string. If [i] is such that no separator can 295 be found we exit the loop and return the no match case. *) 296 try 297 while (!i >= sep_max) do 298 if unsafe_get s !i <> unsafe_get on sep_max then decr i else begin 299 (* Check remaining [on] chars match, access to unsafe_get 300 s (sep_start + !k) is guaranteed by loop invariant. *) 301 let sep_start = !i - sep_max in 302 k := sep_max - 1; 303 while (!k >= 0 && unsafe_get s (sep_start + !k) = unsafe_get on !k) 304 do decr k done; 305 if !k >= 0 then (* no match *) decr i else raise Exit 306 end 307 done; 308 None (* no match in the whole string. *) 309 with 310 | Exit -> (* i is at the end of the separator *) 311 let left_end = !i - sep_max - 1 in 312 let right_start = !i + 1 in 313 Some (sub s 0 (left_end + 1), 314 sub s right_start (s_max - right_start + 1)) 315 316let chop_prefix s ~prefix = 317 let prefix_l = String.length prefix in 318 let string_l = String.length s in 319 if prefix_l > string_l then None 320 else 321 try 322 for i = 0 to prefix_l - 1 do 323 if s.[i] <> prefix.[i] then raise Exit; 324 done; 325 Some (String.sub s prefix_l (string_l - prefix_l)) 326 with _ -> None 327 328let drop s n = 329 let l = String.length s in 330 if n >= l 331 then "" 332 else String.sub s n (l - n) 333 334let take s n = 335 if n >= String.length s 336 then s 337 else String.sub s 0 n