My aggregated monorepo of OCaml code, automaintained

tessera-geotessera: scaffold with grid math

Add new tessera-geotessera library with dune project structure, public
API (.mli), and grid math implementation (snap_to_grid, tiles_for_bbox,
tile_name, URL construction). Includes full implementations of
dequantize, mosaic, and fetch_mosaic_sync. Grid math tests all pass.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+295
+13
tessera-geotessera/dune-project
··· 1 + (lang dune 3.17) 2 + (name tessera-geotessera) 3 + (generate_opam_files true) 4 + (license ISC) 5 + (package 6 + (name tessera-geotessera) 7 + (synopsis "GeoTessera embedding tile client") 8 + (description "Fetch, dequantize, and mosaic GeoTessera embedding tiles. Parameterised over I/O for portability.") 9 + (depends 10 + (ocaml (>= 5.2)) 11 + (tessera-npy (>= 0.1)) 12 + (tessera-linalg (>= 0.1)) 13 + (alcotest (and :with-test (>= 0.8)))))
+4
tessera-geotessera/lib/dune
··· 1 + (library 2 + (name geotessera) 3 + (public_name tessera-geotessera) 4 + (libraries tessera-npy tessera-linalg))
+146
tessera-geotessera/lib/geotessera.ml
··· 1 + type bbox = { 2 + min_lon : float; 3 + min_lat : float; 4 + max_lon : float; 5 + max_lat : float; 6 + } 7 + 8 + type tile_coord = { 9 + lon : float; 10 + lat : float; 11 + } 12 + 13 + (* Grid math *) 14 + 15 + let snap_to_grid v = 16 + Float.round ((v -. 0.05) /. 0.1) *. 0.1 +. 0.05 17 + 18 + let tiles_for_bbox bbox = 19 + let lon0 = snap_to_grid bbox.min_lon in 20 + let lat0 = snap_to_grid bbox.min_lat in 21 + let lon1 = snap_to_grid bbox.max_lon in 22 + let lat1 = snap_to_grid bbox.max_lat in 23 + let acc = ref [] in 24 + let lat = ref lat0 in 25 + while !lat <= lat1 +. 1e-9 do 26 + let lon = ref lon0 in 27 + while !lon <= lon1 +. 1e-9 do 28 + acc := { lon = !lon; lat = !lat } :: !acc; 29 + lon := !lon +. 0.1 30 + done; 31 + lat := !lat +. 0.1 32 + done; 33 + List.rev !acc 34 + 35 + let format_coord v = 36 + let s = Printf.sprintf "%.2f" v in 37 + if s = "-0.00" then "0.00" 38 + else if String.length s > 1 && s.[0] = '-' then 39 + let rest = String.sub s 1 (String.length s - 1) in 40 + if rest = "0.00" then "0.00" else s 41 + else s 42 + 43 + let tile_name coord = 44 + Printf.sprintf "grid_%s_%s" (format_coord coord.lon) (format_coord coord.lat) 45 + 46 + let embedding_url ~base_url ~version ~year coord = 47 + let name = tile_name coord in 48 + Printf.sprintf "%s/%s/global_0.1_degree_representation/%d/%s/%s.npy" 49 + base_url version year name name 50 + 51 + let scales_url ~base_url ~version ~year coord = 52 + let name = tile_name coord in 53 + Printf.sprintf "%s/%s/global_0.1_degree_representation/%d/%s/%s_scales.npy" 54 + base_url version year name name 55 + 56 + (* Dequantization - stub *) 57 + 58 + let dequantize ~embeddings ~scales = 59 + let emb_shape = Npy.shape embeddings in 60 + let scales_shape = Npy.shape scales in 61 + let h = emb_shape.(0) in 62 + let w = emb_shape.(1) in 63 + let n_features = emb_shape.(2) in 64 + let _ = scales_shape in 65 + let emb_data = match Npy.data_int8 embeddings with 66 + | Some d -> d 67 + | None -> failwith "dequantize: embeddings must be int8" 68 + in 69 + let scales_data = match Npy.data_float32 scales with 70 + | Some d -> d 71 + | None -> failwith "dequantize: scales must be float32" 72 + in 73 + let result = Linalg.create_mat ~rows:(h * w) ~cols:n_features in 74 + for i = 0 to h - 1 do 75 + for j = 0 to w - 1 do 76 + let scale_val = Bigarray.Array1.get scales_data (i * w + j) in 77 + for f = 0 to n_features - 1 do 78 + let emb_val = Bigarray.Array1.get emb_data ((i * w * n_features) + (j * n_features) + f) in 79 + Linalg.mat_set result (i * w + j) f (Float.of_int emb_val *. scale_val) 80 + done 81 + done 82 + done; 83 + result 84 + 85 + (* Mosaic *) 86 + 87 + let mosaic tiles bbox = 88 + match tiles with 89 + | [] -> (Linalg.create_mat ~rows:0 ~cols:0, 0, 0) 90 + | (_, first_data, tile_h, tile_w) :: _ -> 91 + let n_features = first_data.Linalg.cols in 92 + (* Find grid extent *) 93 + let min_lat = ref Float.max_float in 94 + let max_lat = ref Float.neg_infinity in 95 + let min_lon = ref Float.max_float in 96 + let max_lon = ref Float.neg_infinity in 97 + let _ = bbox in 98 + List.iter (fun (coord, _, _, _) -> 99 + if coord.lat < !min_lat then min_lat := coord.lat; 100 + if coord.lat > !max_lat then max_lat := coord.lat; 101 + if coord.lon < !min_lon then min_lon := coord.lon; 102 + if coord.lon > !max_lon then max_lon := coord.lon; 103 + ) tiles; 104 + let n_tile_rows = Float.to_int (Float.round ((!max_lat -. !min_lat) /. 0.1)) + 1 in 105 + let n_tile_cols = Float.to_int (Float.round ((!max_lon -. !min_lon) /. 0.1)) + 1 in 106 + let total_h = n_tile_rows * tile_h in 107 + let total_w = n_tile_cols * tile_w in 108 + let output = Linalg.create_mat ~rows:(total_h * total_w) ~cols:n_features in 109 + List.iter (fun (coord, data, th, tw) -> 110 + (* lat increases north, but row 0 is northernmost *) 111 + let gi = Float.to_int (Float.round ((!max_lat -. coord.lat) /. 0.1)) in 112 + let gj = Float.to_int (Float.round ((coord.lon -. !min_lon) /. 0.1)) in 113 + for pi = 0 to th - 1 do 114 + for pj = 0 to tw - 1 do 115 + let out_row = (gi * tile_h + pi) * total_w + (gj * tile_w + pj) in 116 + let in_row = pi * tw + pj in 117 + for f = 0 to n_features - 1 do 118 + Linalg.mat_set output out_row f (Linalg.mat_get data in_row f) 119 + done 120 + done 121 + done 122 + ) tiles; 123 + (output, total_h, total_w) 124 + 125 + (* High-level API *) 126 + 127 + let fetch_mosaic_sync ~fetch ?(base_url="https://dl2.geotessera.org") ?(version="v1") ~year bbox = 128 + let tiles = tiles_for_bbox bbox in 129 + let tile_data = List.map (fun coord -> 130 + let emb_str = fetch (embedding_url ~base_url ~version ~year coord) in 131 + let scales_str = fetch (scales_url ~base_url ~version ~year coord) in 132 + let emb_npy = match Npy.of_string emb_str with 133 + | Ok t -> t 134 + | Error msg -> failwith ("Failed to parse embeddings: " ^ msg) 135 + in 136 + let scales_npy = match Npy.of_string scales_str with 137 + | Ok t -> t 138 + | Error msg -> failwith ("Failed to parse scales: " ^ msg) 139 + in 140 + let mat = dequantize ~embeddings:emb_npy ~scales:scales_npy in 141 + let shape = Npy.shape emb_npy in 142 + let tile_h = shape.(0) in 143 + let tile_w = shape.(1) in 144 + (coord, mat, tile_h, tile_w) 145 + ) tiles in 146 + mosaic tile_data bbox
+57
tessera-geotessera/lib/geotessera.mli
··· 1 + (** GeoTessera embedding tile client. 2 + 3 + Fetches, dequantizes, and mosaics GeoTessera embedding tiles. 4 + Parameterised over I/O for portability. *) 5 + 6 + type bbox = { 7 + min_lon : float; 8 + min_lat : float; 9 + max_lon : float; 10 + max_lat : float; 11 + } 12 + 13 + type tile_coord = { 14 + lon : float; 15 + lat : float; 16 + } 17 + 18 + (** {1 Grid math} *) 19 + 20 + val snap_to_grid : float -> float 21 + (** Snap a coordinate to the nearest 0.1-degree grid center. 22 + Grid centers are at 0.05, 0.15, 0.25, ... *) 23 + 24 + val tiles_for_bbox : bbox -> tile_coord list 25 + (** Enumerate all tile coordinates overlapping a bounding box. *) 26 + 27 + val tile_name : tile_coord -> string 28 + (** Format as grid name, e.g., ["grid_0.15_52.05"]. *) 29 + 30 + val embedding_url : base_url:string -> version:string -> year:int -> tile_coord -> string 31 + val scales_url : base_url:string -> version:string -> year:int -> tile_coord -> string 32 + 33 + (** {1 Dequantization} *) 34 + 35 + val dequantize : embeddings:Npy.t -> scales:Npy.t -> Linalg.mat 36 + (** [int8 * float32_scale -> float32]. Input embeddings shape (H,W,128), scales shape (H,W). 37 + Output: mat with rows=H*W, cols=128. *) 38 + 39 + (** {1 Mosaicing} *) 40 + 41 + val mosaic : (tile_coord * Linalg.mat * int * int) list -> bbox -> Linalg.mat * int * int 42 + (** Assemble tiles into a single matrix. Each tile is [(coord, data, tile_h, tile_w)]. 43 + Returns [(mosaic_mat, total_h, total_w)]. *) 44 + 45 + (** {1 High-level API} *) 46 + 47 + val fetch_mosaic_sync : 48 + fetch:(string -> string) -> 49 + ?base_url:string -> 50 + ?version:string -> 51 + year:int -> 52 + bbox -> 53 + Linalg.mat * int * int 54 + (** Fetch all tiles for a bbox, dequantize, and mosaic. 55 + [fetch] is a blocking function that retrieves URL contents as a string. 56 + Default base_url: ["https://dl2.geotessera.org"]. 57 + Default version: ["v1"]. *)
+4
tessera-geotessera/test/dune
··· 1 + (test 2 + (name test_geotessera) 3 + (libraries tessera-geotessera tessera-npy tessera-linalg alcotest) 4 + (deps (glob_files fixtures/*.npy)))
+71
tessera-geotessera/test/test_geotessera.ml
··· 1 + let () = 2 + Alcotest.run "geotessera" [ 3 + ("grid_math", [ 4 + Alcotest.test_case "snap_to_grid 0.12" `Quick (fun () -> 5 + let v = Geotessera.snap_to_grid 0.12 in 6 + Alcotest.(check (float 1e-9)) "0.12 -> 0.15" 0.15 v); 7 + 8 + Alcotest.test_case "snap_to_grid 0.18" `Quick (fun () -> 9 + let v = Geotessera.snap_to_grid 0.18 in 10 + Alcotest.(check (float 1e-9)) "0.18 -> 0.15" 0.15 v); 11 + 12 + Alcotest.test_case "snap_to_grid 0.02" `Quick (fun () -> 13 + let v = Geotessera.snap_to_grid 0.02 in 14 + Alcotest.(check (float 1e-9)) "0.02 -> 0.05" 0.05 v); 15 + 16 + Alcotest.test_case "snap_to_grid -0.12" `Quick (fun () -> 17 + let v = Geotessera.snap_to_grid (-0.12) in 18 + Alcotest.(check (float 1e-9)) "-0.12 -> -0.15" (-0.15) v); 19 + 20 + Alcotest.test_case "snap_to_grid 0.25" `Quick (fun () -> 21 + let v = Geotessera.snap_to_grid 0.25 in 22 + Alcotest.(check (float 1e-9)) "0.25 -> 0.25" 0.25 v); 23 + 24 + Alcotest.test_case "tiles_for_bbox small" `Quick (fun () -> 25 + let bbox : Geotessera.bbox = { 26 + min_lon = 0.11; min_lat = 52.01; 27 + max_lon = 0.29; max_lat = 52.19; 28 + } in 29 + let tiles = Geotessera.tiles_for_bbox bbox in 30 + (* 0.11 snaps to 0.15, 0.29 snaps to 0.25 => 2 lon 31 + 52.01 snaps to 52.05, 52.19 snaps to 52.15 => 2 lat 32 + total = 4 tiles *) 33 + Alcotest.(check int) "4 tiles" 4 (List.length tiles)); 34 + 35 + Alcotest.test_case "tiles_for_bbox single" `Quick (fun () -> 36 + let bbox : Geotessera.bbox = { 37 + min_lon = 0.12; min_lat = 52.12; 38 + max_lon = 0.18; max_lat = 52.18; 39 + } in 40 + let tiles = Geotessera.tiles_for_bbox bbox in 41 + Alcotest.(check int) "1 tile" 1 (List.length tiles); 42 + let t = List.hd tiles in 43 + Alcotest.(check (float 1e-9)) "lon" 0.15 t.lon; 44 + Alcotest.(check (float 1e-9)) "lat" 52.15 t.lat); 45 + 46 + Alcotest.test_case "tile_name positive" `Quick (fun () -> 47 + let name = Geotessera.tile_name { lon = 0.15; lat = 52.05 } in 48 + Alcotest.(check string) "name" "grid_0.15_52.05" name); 49 + 50 + Alcotest.test_case "tile_name negative zero" `Quick (fun () -> 51 + (* Test that -0.00 is handled *) 52 + let name = Geotessera.tile_name { lon = -0.0; lat = 0.05 } in 53 + Alcotest.(check string) "no negative zero" "grid_0.00_0.05" name); 54 + 55 + Alcotest.test_case "embedding_url" `Quick (fun () -> 56 + let url = Geotessera.embedding_url 57 + ~base_url:"https://dl2.geotessera.org" ~version:"v1" ~year:2021 58 + { lon = 0.15; lat = 52.05 } in 59 + Alcotest.(check string) "url" 60 + "https://dl2.geotessera.org/v1/global_0.1_degree_representation/2021/grid_0.15_52.05/grid_0.15_52.05.npy" 61 + url); 62 + 63 + Alcotest.test_case "scales_url" `Quick (fun () -> 64 + let url = Geotessera.scales_url 65 + ~base_url:"https://dl2.geotessera.org" ~version:"v1" ~year:2021 66 + { lon = 0.15; lat = 52.05 } in 67 + Alcotest.(check string) "url" 68 + "https://dl2.geotessera.org/v1/global_0.1_degree_representation/2021/grid_0.15_52.05/grid_0.15_52.05_scales.npy" 69 + url); 70 + ]); 71 + ]