semantic bufo search
find-bufo.com
bufo
1//! provider abstractions for embedding and vector search backends
2//!
3//! these traits allow swapping implementations (e.g., voyage → openai embeddings)
4//! without changing the search logic.
5//!
6//! ## design notes
7//!
8//! we use `async fn` in traits directly (stabilized in rust 1.75). for this crate's
9//! use case (single-threaded actix-web), the Send bound issue doesn't apply.
10//!
11//! the trait design follows patterns from:
12//! - async-openai's `Config` trait for backend abstraction
13//! - tower's `Service` trait for composability (though simpler here)
14
15use std::future::Future;
16use thiserror::Error;
17
18/// errors that can occur when generating embeddings
19#[derive(Debug, Error)]
20pub enum EmbeddingError {
21 #[error("failed to send request: {0}")]
22 Request(#[from] reqwest::Error),
23
24 #[error("api error ({status}): {body}")]
25 Api { status: u16, body: String },
26
27 #[error("no embedding returned from provider")]
28 EmptyResponse,
29
30 #[error("{0}")]
31 Other(#[from] anyhow::Error),
32}
33
34/// a provider that can generate embeddings for text
35///
36/// implementations should be cheap to clone (wrap expensive resources in Arc).
37///
38/// # example
39///
40/// ```ignore
41/// let client = VoyageEmbedder::new(api_key);
42/// let embedding = client.embed("hello world").await?;
43/// ```
44pub trait Embedder: Send + Sync {
45 /// generate an embedding vector for the given text
46 fn embed(&self, text: &str) -> impl Future<Output = Result<Vec<f32>, EmbeddingError>> + Send;
47
48 /// human-readable name for logging/debugging
49 fn name(&self) -> &'static str;
50}
51
52/// errors that can occur during vector search
53#[derive(Debug, Error)]
54pub enum VectorSearchError {
55 #[error("request failed: {0}")]
56 Request(#[from] reqwest::Error),
57
58 #[error("api error ({status}): {body}")]
59 Api { status: u16, body: String },
60
61 #[error("query too long: {message}")]
62 QueryTooLong { message: String },
63
64 #[error("parse error: {0}")]
65 Parse(String),
66
67 #[error("{0}")]
68 Other(#[from] anyhow::Error),
69}
70
71/// a single result from a vector search
72#[derive(Debug, Clone)]
73pub struct SearchResult {
74 pub id: String,
75 /// raw distance/score from the backend (interpretation varies by method)
76 pub score: f32,
77 /// arbitrary key-value attributes
78 pub attributes: std::collections::HashMap<String, String>,
79}
80
81/// a provider that can perform vector similarity search
82pub trait VectorStore: Send + Sync {
83 /// search by vector embedding (ANN/cosine similarity)
84 fn search_by_vector(
85 &self,
86 embedding: &[f32],
87 top_k: usize,
88 ) -> impl Future<Output = Result<Vec<SearchResult>, VectorSearchError>> + Send;
89
90 /// search by keyword (BM25 full-text search)
91 fn search_by_keyword(
92 &self,
93 query: &str,
94 top_k: usize,
95 ) -> impl Future<Output = Result<Vec<SearchResult>, VectorSearchError>> + Send;
96
97 /// human-readable name for logging/debugging
98 fn name(&self) -> &'static str;
99}