···0000000000000000000001type vec = (float, Bigarray.float32_elt, Bigarray.c_layout) Bigarray.Array1.t
023type mat = {
4 data : vec;
5 rows : int;
6 cols : int;
7}
000089val create_mat : rows:int -> cols:int -> mat
0010val mat_get : mat -> int -> int -> float
0011val mat_set : mat -> int -> int -> float -> unit
0001213type pca_model
01415val pca_fit : ?max_samples:int -> mat -> n_components:int -> pca_model
00000000016val pca_transform : pca_model -> mat -> mat
000001718type knn_model
01920val knn_fit : embeddings:mat -> labels:int array -> knn_model
00002122type knn_result = {
23 predictions : int array;
24 confidences : float array;
25}
0002627val knn_predict : knn_model -> k:int -> mat -> knn_result
00000
···1+(** PCA and kNN for float32 Bigarray data.
2+3+ Portable OCaml implementations of PCA dimensionality reduction and
4+ k-nearest-neighbors classification. Operates on flat float32 Bigarray
5+ data with explicit shapes.
6+7+ {2 Example: embedding classification}
8+9+ {[
10+ (* PCA: reduce 128-dim embeddings to 3 components *)
11+ let pca = Linalg.pca_fit embeddings ~n_components:3 in
12+ let projected = Linalg.pca_transform pca embeddings in
13+14+ (* kNN: classify using labeled training points *)
15+ let model = Linalg.knn_fit ~embeddings:train ~labels in
16+ let result = Linalg.knn_predict model ~k:5 projected in
17+ (* result.predictions.(i) is the class, result.confidences.(i) in [0,1] *)
18+ ]} *)
19+20+(** {1 Data types} *)
21+22type vec = (float, Bigarray.float32_elt, Bigarray.c_layout) Bigarray.Array1.t
23+(** A flat float32 vector backed by Bigarray. *)
2425type mat = {
26 data : vec;
27 rows : int;
28 cols : int;
29}
30+(** A 2D float32 matrix in row-major order. Element [(i, j)] is at flat
31+ index [i * cols + j] in {!data}. *)
32+33+(** {1 Matrix operations} *)
3435val create_mat : rows:int -> cols:int -> mat
36+(** Create a zero-initialized matrix. *)
37+38val mat_get : mat -> int -> int -> float
39+(** [mat_get m row col] returns element at [(row, col)]. *)
40+41val mat_set : mat -> int -> int -> float -> unit
42+(** [mat_set m row col v] sets element at [(row, col)] to [v]. *)
43+44+(** {1 PCA} *)
4546type pca_model
47+(** A fitted PCA model storing the mean vector and principal components. *)
4849val pca_fit : ?max_samples:int -> mat -> n_components:int -> pca_model
50+(** Fit PCA on a data matrix of shape [(n_samples, n_features)].
51+52+ @param max_samples Limit rows used for covariance estimation
53+ (default: 100,000). Rows are sampled evenly if the matrix is larger.
54+ @param n_components Number of principal components to compute.
55+56+ Uses power iteration on the covariance matrix, which is efficient for
57+ the typical case of 128 features (128x128 covariance). *)
58+59val pca_transform : pca_model -> mat -> mat
60+(** Project data through a fitted PCA model.
61+62+ Input: [(n_samples, n_features)]. Output: [(n_samples, n_components)]. *)
63+64+(** {1 kNN} *)
6566type knn_model
67+(** A fitted kNN model storing training embeddings and labels. *)
6869val knn_fit : embeddings:mat -> labels:int array -> knn_model
70+(** Create a kNN model from labeled training data.
71+72+ @param embeddings Training data of shape [(n_training, n_features)].
73+ @param labels Integer class labels, length [n_training]. *)
7475type knn_result = {
76 predictions : int array;
77 confidences : float array;
78}
79+(** Classification output. [predictions.(i)] is the predicted class for
80+ sample [i]. [confidences.(i)] is in [\[0, 1\]] — the fraction of
81+ distance-weighted votes for the winning class. *)
8283val knn_predict : knn_model -> k:int -> mat -> knn_result
84+(** Classify samples using k-nearest neighbors with distance weighting.
85+86+ Input: [(n_samples, n_features)]. [k] is capped to the number of
87+ training points. Weights are [1 / (distance + epsilon)] to handle
88+ exact matches gracefully. *)
+26
tessera-linalg/tessera-linalg.opam
···00000000000000000000000000
···1+# This file is generated by dune, edit dune-project instead
2+opam-version: "2.0"
3+synopsis: "PCA and kNN for float32 Bigarray data"
4+description:
5+ "Portable OCaml implementations of PCA dimensionality reduction and k-nearest-neighbors classification. Operates on flat float32 Bigarray data with explicit shapes."
6+license: "ISC"
7+depends: [
8+ "dune" {>= "3.17"}
9+ "ocaml" {>= "5.2"}
10+ "alcotest" {with-test & >= "0.8"}
11+ "odoc" {with-doc}
12+]
13+build: [
14+ ["dune" "subst"] {dev}
15+ [
16+ "dune"
17+ "build"
18+ "-p"
19+ name
20+ "-j"
21+ jobs
22+ "@install"
23+ "@runtest" {with-test}
24+ "@doc" {with-doc}
25+ ]
26+]