context_harness_core/
embedding.rs

1//! Embedding provider trait and vector utilities.
2//!
3//! Defines the [`EmbeddingProvider`] trait that all embedding backends
4//! implement, plus pure helper functions for vector serialization and
5//! similarity computation.
6//!
7//! Concrete provider implementations (OpenAI, Ollama, fastembed, tract)
8//! live in the `context-harness` app crate.
9
10/// Trait for embedding providers.
11///
12/// Defines the interface that all embedding backends must implement.
13/// Implementations are created by the application and passed to core
14/// functions that need embedding metadata.
15pub trait EmbeddingProvider: Send + Sync {
16    /// Returns the model identifier (e.g. `"text-embedding-3-small"`).
17    fn model_name(&self) -> &str;
18    /// Returns the embedding vector dimensionality (e.g. `1536`).
19    fn dims(&self) -> usize;
20}
21
22/// Encode a float vector as a BLOB (little-endian f32 bytes).
23///
24/// Each `f32` is stored as 4 bytes in little-endian order, producing
25/// a BLOB of `vec.len() × 4` bytes.
26///
27/// # Example
28///
29/// ```rust
30/// use context_harness_core::embedding::{vec_to_blob, blob_to_vec};
31///
32/// let v = vec![1.0f32, -2.5, 3.125];
33/// let blob = vec_to_blob(&v);
34/// assert_eq!(blob.len(), 12); // 3 × 4 bytes
35/// assert_eq!(blob_to_vec(&blob), v);
36/// ```
37pub fn vec_to_blob(vec: &[f32]) -> Vec<u8> {
38    let mut bytes = Vec::with_capacity(vec.len() * 4);
39    for &v in vec {
40        bytes.extend_from_slice(&v.to_le_bytes());
41    }
42    bytes
43}
44
45/// Decode a BLOB back into a float vector.
46///
47/// Reverses [`vec_to_blob`]: reads 4-byte little-endian `f32` values
48/// from the byte slice.
49pub fn blob_to_vec(blob: &[u8]) -> Vec<f32> {
50    blob.chunks_exact(4)
51        .map(|chunk| f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
52        .collect()
53}
54
55/// Compute cosine similarity between two embedding vectors.
56///
57/// Returns a value in `[-1.0, 1.0]`:
58/// - `1.0` = identical direction
59/// - `0.0` = orthogonal (unrelated)
60/// - `-1.0` = opposite direction
61///
62/// Returns `0.0` for empty vectors or vectors of different lengths.
63pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
64    if a.len() != b.len() || a.is_empty() {
65        return 0.0;
66    }
67
68    let mut dot = 0.0f32;
69    let mut norm_a = 0.0f32;
70    let mut norm_b = 0.0f32;
71
72    for (x, y) in a.iter().zip(b.iter()) {
73        dot += x * y;
74        norm_a += x * x;
75        norm_b += y * y;
76    }
77
78    let denom = norm_a.sqrt() * norm_b.sqrt();
79    if denom < f32::EPSILON {
80        return 0.0;
81    }
82
83    dot / denom
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89
90    #[test]
91    fn test_vec_blob_roundtrip() {
92        let vec = vec![1.0f32, -2.5, 3.125, 0.0, -0.001];
93        let blob = vec_to_blob(&vec);
94        let restored = blob_to_vec(&blob);
95        assert_eq!(vec, restored);
96    }
97
98    #[test]
99    fn test_cosine_identical() {
100        let v = vec![1.0, 2.0, 3.0];
101        let sim = cosine_similarity(&v, &v);
102        assert!((sim - 1.0).abs() < 1e-6);
103    }
104
105    #[test]
106    fn test_cosine_orthogonal() {
107        let a = vec![1.0, 0.0, 0.0];
108        let b = vec![0.0, 1.0, 0.0];
109        let sim = cosine_similarity(&a, &b);
110        assert!(sim.abs() < 1e-6);
111    }
112
113    #[test]
114    fn test_cosine_opposite() {
115        let a = vec![1.0, 0.0];
116        let b = vec![-1.0, 0.0];
117        let sim = cosine_similarity(&a, &b);
118        assert!((sim + 1.0).abs() < 1e-6);
119    }
120
121    #[test]
122    fn test_cosine_empty() {
123        let sim = cosine_similarity(&[], &[]);
124        assert_eq!(sim, 0.0);
125    }
126
127    #[test]
128    fn test_cosine_different_lengths() {
129        let a = vec![1.0, 2.0];
130        let b = vec![1.0];
131        let sim = cosine_similarity(&a, &b);
132        assert_eq!(sim, 0.0);
133    }
134}