My aggregated monorepo of OCaml code, automaintained

tessera-linalg: polish docs and generate opam file

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+87
+61
tessera-linalg/lib/linalg.mli
··· 1 + (** PCA and kNN for float32 Bigarray data. 2 + 3 + Portable OCaml implementations of PCA dimensionality reduction and 4 + k-nearest-neighbors classification. Operates on flat float32 Bigarray 5 + data with explicit shapes. 6 + 7 + {2 Example: embedding classification} 8 + 9 + {[ 10 + (* PCA: reduce 128-dim embeddings to 3 components *) 11 + let pca = Linalg.pca_fit embeddings ~n_components:3 in 12 + let projected = Linalg.pca_transform pca embeddings in 13 + 14 + (* kNN: classify using labeled training points *) 15 + let model = Linalg.knn_fit ~embeddings:train ~labels in 16 + let result = Linalg.knn_predict model ~k:5 projected in 17 + (* result.predictions.(i) is the class, result.confidences.(i) in [0,1] *) 18 + ]} *) 19 + 20 + (** {1 Data types} *) 21 + 1 22 type vec = (float, Bigarray.float32_elt, Bigarray.c_layout) Bigarray.Array1.t 23 + (** A flat float32 vector backed by Bigarray. *) 2 24 3 25 type mat = { 4 26 data : vec; 5 27 rows : int; 6 28 cols : int; 7 29 } 30 + (** A 2D float32 matrix in row-major order. Element [(i, j)] is at flat 31 + index [i * cols + j] in {!data}. *) 32 + 33 + (** {1 Matrix operations} *) 8 34 9 35 val create_mat : rows:int -> cols:int -> mat 36 + (** Create a zero-initialized matrix. *) 37 + 10 38 val mat_get : mat -> int -> int -> float 39 + (** [mat_get m row col] returns element at [(row, col)]. *) 40 + 11 41 val mat_set : mat -> int -> int -> float -> unit 42 + (** [mat_set m row col v] sets element at [(row, col)] to [v]. *) 43 + 44 + (** {1 PCA} *) 12 45 13 46 type pca_model 47 + (** A fitted PCA model storing the mean vector and principal components. *) 14 48 15 49 val pca_fit : ?max_samples:int -> mat -> n_components:int -> pca_model 50 + (** Fit PCA on a data matrix of shape [(n_samples, n_features)]. 51 + 52 + @param max_samples Limit rows used for covariance estimation 53 + (default: 100,000). Rows are sampled evenly if the matrix is larger. 54 + @param n_components Number of principal components to compute. 55 + 56 + Uses power iteration on the covariance matrix, which is efficient for 57 + the typical case of 128 features (128x128 covariance). *) 58 + 16 59 val pca_transform : pca_model -> mat -> mat 60 + (** Project data through a fitted PCA model. 61 + 62 + Input: [(n_samples, n_features)]. Output: [(n_samples, n_components)]. *) 63 + 64 + (** {1 kNN} *) 17 65 18 66 type knn_model 67 + (** A fitted kNN model storing training embeddings and labels. *) 19 68 20 69 val knn_fit : embeddings:mat -> labels:int array -> knn_model 70 + (** Create a kNN model from labeled training data. 71 + 72 + @param embeddings Training data of shape [(n_training, n_features)]. 73 + @param labels Integer class labels, length [n_training]. *) 21 74 22 75 type knn_result = { 23 76 predictions : int array; 24 77 confidences : float array; 25 78 } 79 + (** Classification output. [predictions.(i)] is the predicted class for 80 + sample [i]. [confidences.(i)] is in [\[0, 1\]] — the fraction of 81 + distance-weighted votes for the winning class. *) 26 82 27 83 val knn_predict : knn_model -> k:int -> mat -> knn_result 84 + (** Classify samples using k-nearest neighbors with distance weighting. 85 + 86 + Input: [(n_samples, n_features)]. [k] is capped to the number of 87 + training points. Weights are [1 / (distance + epsilon)] to handle 88 + exact matches gracefully. *)
+26
tessera-linalg/tessera-linalg.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + synopsis: "PCA and kNN for float32 Bigarray data" 4 + description: 5 + "Portable OCaml implementations of PCA dimensionality reduction and k-nearest-neighbors classification. Operates on flat float32 Bigarray data with explicit shapes." 6 + license: "ISC" 7 + depends: [ 8 + "dune" {>= "3.17"} 9 + "ocaml" {>= "5.2"} 10 + "alcotest" {with-test & >= "0.8"} 11 + "odoc" {with-doc} 12 + ] 13 + build: [ 14 + ["dune" "subst"] {dev} 15 + [ 16 + "dune" 17 + "build" 18 + "-p" 19 + name 20 + "-j" 21 + jobs 22 + "@install" 23 + "@runtest" {with-test} 24 + "@doc" {with-doc} 25 + ] 26 + ]