context_harness_core/store/mod.rs
1//! Storage abstraction for Context Harness.
2//!
3//! The [`Store`] trait defines all storage operations needed by the core
4//! search and retrieval pipeline, enabling pluggable backends (SQLite,
5//! in-memory, future WASM-compatible stores).
6//!
7//! Implementations must be `Send + Sync` to work with async runtimes.
8
9pub mod memory;
10
11use anyhow::Result;
12use async_trait::async_trait;
13use serde::Serialize;
14
15use crate::models::{Chunk, Document};
16
17/// A candidate chunk returned from keyword or vector search.
18///
19/// Carries enough information to perform score normalization, hybrid
20/// merging, and document aggregation without additional DB round-trips.
21#[derive(Debug, Clone)]
22pub struct ChunkCandidate {
23 /// Chunk UUID.
24 pub chunk_id: String,
25 /// Parent document UUID.
26 pub document_id: String,
27 /// Raw score from the search backend (BM25 rank or cosine similarity).
28 pub raw_score: f64,
29 /// Text excerpt for display.
30 pub snippet: String,
31}
32
33/// Full document response including metadata, body, and chunks.
34///
35/// Matches the `context.get` response shape defined in `docs/SCHEMAS.md`.
36#[derive(Debug, Clone, Serialize)]
37pub struct DocumentResponse {
38 pub id: String,
39 pub source: String,
40 pub source_id: String,
41 pub source_url: Option<String>,
42 pub title: Option<String>,
43 pub author: Option<String>,
44 pub created_at: String,
45 pub updated_at: String,
46 pub content_type: String,
47 pub body: String,
48 pub metadata: serde_json::Value,
49 pub chunks: Vec<ChunkResponse>,
50}
51
52/// A single chunk within a [`DocumentResponse`].
53#[derive(Debug, Clone, Serialize)]
54pub struct ChunkResponse {
55 pub index: i64,
56 pub text: String,
57}
58
59/// Lightweight document metadata for search result enrichment.
60///
61/// Contains only the fields needed to build a [`SearchResultItem`](crate::search::SearchResultItem),
62/// avoiding the cost of fetching the full document body.
63#[derive(Debug, Clone)]
64pub struct DocumentMetadata {
65 pub id: String,
66 pub title: Option<String>,
67 pub source: String,
68 pub source_id: String,
69 pub source_url: Option<String>,
70 pub updated_at: i64,
71}
72
73/// Abstract storage backend for Context Harness.
74///
75/// All operations are async (via `async-trait`) to support both native
76/// runtimes (tokio) and future WASM environments. In-memory
77/// implementations return immediately-ready futures.
78///
79/// # Operations
80///
81/// | Method | Purpose |
82/// |--------|---------|
83/// | [`upsert_document`](Store::upsert_document) | Insert or update a document |
84/// | [`replace_chunks`](Store::replace_chunks) | Replace all chunks for a document |
85/// | [`upsert_embedding`](Store::upsert_embedding) | Store an embedding vector for a chunk |
86/// | [`get_document`](Store::get_document) | Retrieve full document with chunks |
87/// | [`get_document_metadata`](Store::get_document_metadata) | Retrieve lightweight doc metadata |
88/// | [`keyword_search`](Store::keyword_search) | Full-text keyword search |
89/// | [`vector_search`](Store::vector_search) | Cosine similarity vector search |
90#[async_trait]
91pub trait Store: Send + Sync {
92 /// Insert or update a document.
93 ///
94 /// Returns the document ID (existing or newly generated).
95 async fn upsert_document(&self, doc: &Document) -> Result<String>;
96
97 /// Replace all chunks for a document, optionally storing vectors.
98 async fn replace_chunks(
99 &self,
100 doc_id: &str,
101 chunks: &[Chunk],
102 vectors: Option<&[Vec<f32>]>,
103 ) -> Result<()>;
104
105 /// Store or update an embedding vector for a chunk.
106 async fn upsert_embedding(
107 &self,
108 chunk_id: &str,
109 doc_id: &str,
110 vector: &[f32],
111 model: &str,
112 dims: usize,
113 content_hash: &str,
114 ) -> Result<()>;
115
116 /// Retrieve a full document with all its chunks, by ID.
117 async fn get_document(&self, id: &str) -> Result<Option<DocumentResponse>>;
118
119 /// Retrieve lightweight metadata for a document, by ID.
120 async fn get_document_metadata(&self, id: &str) -> Result<Option<DocumentMetadata>>;
121
122 /// Perform keyword (full-text) search, returning candidate chunks.
123 async fn keyword_search(
124 &self,
125 query: &str,
126 limit: i64,
127 source: Option<&str>,
128 since: Option<&str>,
129 ) -> Result<Vec<ChunkCandidate>>;
130
131 /// Perform vector similarity search, returning candidate chunks.
132 async fn vector_search(
133 &self,
134 query_vec: &[f32],
135 limit: i64,
136 source: Option<&str>,
137 since: Option<&str>,
138 ) -> Result<Vec<ChunkCandidate>>;
139}