···11+(** PCA and kNN for float32 Bigarray data.
22+33+ Portable OCaml implementations of PCA dimensionality reduction and
44+ k-nearest-neighbors classification. Operates on flat float32 Bigarray
55+ data with explicit shapes.
66+77+ {2 Example: embedding classification}
88+99+ {[
1010+ (* PCA: reduce 128-dim embeddings to 3 components *)
1111+ let pca = Linalg.pca_fit embeddings ~n_components:3 in
1212+ let projected = Linalg.pca_transform pca embeddings in
1313+1414+ (* kNN: classify using labeled training points *)
1515+ let model = Linalg.knn_fit ~embeddings:train ~labels in
1616+ let result = Linalg.knn_predict model ~k:5 projected in
1717+ (* result.predictions.(i) is the class, result.confidences.(i) in [0,1] *)
1818+ ]} *)
1919+2020+(** {1 Data types} *)
2121+122type vec = (float, Bigarray.float32_elt, Bigarray.c_layout) Bigarray.Array1.t
2323+(** A flat float32 vector backed by Bigarray. *)
224325type mat = {
426 data : vec;
527 rows : int;
628 cols : int;
729}
3030+(** A 2D float32 matrix in row-major order. Element [(i, j)] is at flat
3131+ index [i * cols + j] in {!data}. *)
3232+3333+(** {1 Matrix operations} *)
834935val create_mat : rows:int -> cols:int -> mat
3636+(** Create a zero-initialized matrix. *)
3737+1038val mat_get : mat -> int -> int -> float
3939+(** [mat_get m row col] returns element at [(row, col)]. *)
4040+1141val mat_set : mat -> int -> int -> float -> unit
4242+(** [mat_set m row col v] sets element at [(row, col)] to [v]. *)
4343+4444+(** {1 PCA} *)
12451346type pca_model
4747+(** A fitted PCA model storing the mean vector and principal components. *)
14481549val pca_fit : ?max_samples:int -> mat -> n_components:int -> pca_model
5050+(** Fit PCA on a data matrix of shape [(n_samples, n_features)].
5151+5252+ @param max_samples Limit rows used for covariance estimation
5353+ (default: 100,000). Rows are sampled evenly if the matrix is larger.
5454+ @param n_components Number of principal components to compute.
5555+5656+ Uses power iteration on the covariance matrix, which is efficient for
5757+ the typical case of 128 features (128x128 covariance). *)
5858+1659val pca_transform : pca_model -> mat -> mat
6060+(** Project data through a fitted PCA model.
6161+6262+ Input: [(n_samples, n_features)]. Output: [(n_samples, n_components)]. *)
6363+6464+(** {1 kNN} *)
17651866type knn_model
6767+(** A fitted kNN model storing training embeddings and labels. *)
19682069val knn_fit : embeddings:mat -> labels:int array -> knn_model
7070+(** Create a kNN model from labeled training data.
7171+7272+ @param embeddings Training data of shape [(n_training, n_features)].
7373+ @param labels Integer class labels, length [n_training]. *)
21742275type knn_result = {
2376 predictions : int array;
2477 confidences : float array;
2578}
7979+(** Classification output. [predictions.(i)] is the predicted class for
8080+ sample [i]. [confidences.(i)] is in [\[0, 1\]] — the fraction of
8181+ distance-weighted votes for the winning class. *)
26822783val knn_predict : knn_model -> k:int -> mat -> knn_result
8484+(** Classify samples using k-nearest neighbors with distance weighting.
8585+8686+ Input: [(n_samples, n_features)]. [k] is capped to the number of
8787+ training points. Weights are [1 / (distance + epsilon)] to handle
8888+ exact matches gracefully. *)
+26
tessera-linalg/tessera-linalg.opam
···11+# This file is generated by dune, edit dune-project instead
22+opam-version: "2.0"
33+synopsis: "PCA and kNN for float32 Bigarray data"
44+description:
55+ "Portable OCaml implementations of PCA dimensionality reduction and k-nearest-neighbors classification. Operates on flat float32 Bigarray data with explicit shapes."
66+license: "ISC"
77+depends: [
88+ "dune" {>= "3.17"}
99+ "ocaml" {>= "5.2"}
1010+ "alcotest" {with-test & >= "0.8"}
1111+ "odoc" {with-doc}
1212+]
1313+build: [
1414+ ["dune" "subst"] {dev}
1515+ [
1616+ "dune"
1717+ "build"
1818+ "-p"
1919+ name
2020+ "-j"
2121+ jobs
2222+ "@install"
2323+ "@runtest" {with-test}
2424+ "@doc" {with-doc}
2525+ ]
2626+]