Skip to main content

context_harness/
search.rs

1//! Search engine with keyword, semantic, and hybrid retrieval modes.
2//!
3//! This module provides the application-level entry points for search. The
4//! core algorithm (normalization, hybrid merge, aggregation) lives in
5//! `context-harness-core::search` and operates through the [`Store`] trait.
6//! This wrapper handles config parsing, database connection, embedding, and
7//! CLI output formatting.
8//!
9//! # Search Modes
10//!
11//! - **Keyword** — FTS5 full-text search using BM25 scoring.
12//! - **Semantic** — Cosine similarity over stored embedding vectors.
13//! - **Hybrid** — Weighted merge of keyword and semantic results.
14
15use anyhow::{bail, Result};
16
17#[allow(unused_imports)]
18pub use context_harness_core::search::{
19    normalize_scores, ScoreExplanation, SearchParams, SearchRequest, SearchResultItem,
20};
21#[allow(unused_imports)]
22pub use context_harness_core::store::ChunkCandidate;
23
24use crate::config::Config;
25use crate::db;
26use crate::embedding;
27use crate::sqlite_store::SqliteStore;
28use crate::vector_index;
29
30/// Core search function returning structured results.
31///
32/// This is the shared implementation used by both `ctx search` (CLI) and
33/// `POST /tools/search` (HTTP server). Delegates to
34/// `context_harness_core::search::search` via [`SqliteStore`].
35pub async fn search_documents(
36    config: &Config,
37    query: &str,
38    mode: &str,
39    source_filter: Option<&str>,
40    since: Option<&str>,
41    limit: Option<i64>,
42    explain: bool,
43) -> Result<Vec<SearchResultItem>> {
44    if query.trim().is_empty() {
45        return Ok(Vec::new());
46    }
47
48    match mode {
49        "keyword" | "semantic" | "hybrid" => {}
50        _ => bail!(
51            "Unknown search mode: {}. Use keyword, semantic, or hybrid.",
52            mode
53        ),
54    }
55
56    if (mode == "semantic" || mode == "hybrid") && !config.embedding.is_enabled() {
57        bail!(
58            "Mode '{}' requires embeddings. Set [embedding] provider in config.",
59            mode
60        );
61    }
62
63    let pool = db::connect(config).await?;
64
65    let query_vec = if mode != "keyword" {
66        let provider = embedding::create_provider(&config.embedding)?;
67        Some(embedding::embed_query(provider.as_ref(), &config.embedding, query).await?)
68    } else {
69        None
70    };
71
72    let params = SearchParams {
73        hybrid_alpha: config.retrieval.hybrid_alpha,
74        candidate_k_keyword: config.retrieval.candidate_k_keyword,
75        candidate_k_vector: config.retrieval.candidate_k_vector,
76        final_limit: limit.unwrap_or(config.retrieval.final_limit),
77    };
78
79    let req = SearchRequest {
80        query,
81        query_vec: query_vec.as_deref(),
82        mode,
83        source_filter,
84        since,
85        params,
86        explain,
87    };
88
89    let results = if mode == "keyword" {
90        let store = SqliteStore::new(pool.clone());
91        context_harness_core::search::search(&store, &req).await?
92    } else {
93        let store = vector_index::configured_vector_store(config, pool.clone()).await?;
94        context_harness_core::search::search(&store, &req).await?
95    };
96
97    pool.close().await;
98    Ok(results)
99}
100
101/// CLI entry point — calls [`search_documents`] and prints results to stdout.
102pub async fn run_search(
103    config: &Config,
104    query: &str,
105    mode: &str,
106    source_filter: Option<String>,
107    since: Option<String>,
108    limit: Option<i64>,
109    explain: bool,
110) -> Result<()> {
111    let results = search_documents(
112        config,
113        query,
114        mode,
115        source_filter.as_deref(),
116        since.as_deref(),
117        limit,
118        explain,
119    )
120    .await?;
121
122    if results.is_empty() {
123        println!("No results.");
124        return Ok(());
125    }
126
127    if explain {
128        if let Some(ex) = results.first().and_then(|r| r.explain.as_ref()) {
129            println!(
130                "Search: mode={}, alpha={:.2}, candidates: {} keyword + {} vector",
131                mode, ex.alpha, ex.keyword_candidates, ex.vector_candidates
132            );
133            println!();
134        }
135    }
136
137    for (i, result) in results.iter().enumerate() {
138        let title_display = result.title.as_deref().unwrap_or("(untitled)");
139        println!(
140            "{}. [{:.2}] {} / {}",
141            i + 1,
142            result.score,
143            result.source,
144            title_display
145        );
146        if let Some(ref ex) = result.explain {
147            println!(
148                "    scoring: keyword={:.3}  semantic={:.3}  → hybrid={:.3}",
149                ex.keyword_score, ex.semantic_score, result.score
150            );
151        }
152        println!("    updated: {}", result.updated_at);
153        println!("    source: {}", result.source);
154        if let Some(ref url) = result.source_url {
155            println!("    url: {}", url);
156        }
157        println!(
158            "    excerpt: \"{}\"",
159            result.snippet.replace('\n', " ").trim()
160        );
161        println!("    id: {}", result.id);
162        println!();
163    }
164
165    Ok(())
166}