context_harness/
search.rs

1//! Search engine with keyword, semantic, and hybrid retrieval modes.
2//!
3//! This module provides the application-level entry points for search. The
4//! core algorithm (normalization, hybrid merge, aggregation) lives in
5//! `context-harness-core::search` and operates through the [`Store`] trait.
6//! This wrapper handles config parsing, database connection, embedding, and
7//! CLI output formatting.
8//!
9//! # Search Modes
10//!
11//! - **Keyword** — FTS5 full-text search using BM25 scoring.
12//! - **Semantic** — Cosine similarity over stored embedding vectors.
13//! - **Hybrid** — Weighted merge of keyword and semantic results.
14
15use anyhow::{bail, Result};
16
17#[allow(unused_imports)]
18pub use context_harness_core::search::{
19    normalize_scores, ScoreExplanation, SearchParams, SearchRequest, SearchResultItem,
20};
21#[allow(unused_imports)]
22pub use context_harness_core::store::ChunkCandidate;
23
24use crate::config::Config;
25use crate::db;
26use crate::embedding;
27use crate::sqlite_store::SqliteStore;
28
29/// Core search function returning structured results.
30///
31/// This is the shared implementation used by both `ctx search` (CLI) and
32/// `POST /tools/search` (HTTP server). Delegates to
33/// `context_harness_core::search::search` via [`SqliteStore`].
34pub async fn search_documents(
35    config: &Config,
36    query: &str,
37    mode: &str,
38    source_filter: Option<&str>,
39    since: Option<&str>,
40    limit: Option<i64>,
41    explain: bool,
42) -> Result<Vec<SearchResultItem>> {
43    if query.trim().is_empty() {
44        return Ok(Vec::new());
45    }
46
47    match mode {
48        "keyword" | "semantic" | "hybrid" => {}
49        _ => bail!(
50            "Unknown search mode: {}. Use keyword, semantic, or hybrid.",
51            mode
52        ),
53    }
54
55    if (mode == "semantic" || mode == "hybrid") && !config.embedding.is_enabled() {
56        bail!(
57            "Mode '{}' requires embeddings. Set [embedding] provider in config.",
58            mode
59        );
60    }
61
62    let pool = db::connect(config).await?;
63    let store = SqliteStore::new(pool.clone());
64
65    let query_vec = if mode != "keyword" {
66        let provider = embedding::create_provider(&config.embedding)?;
67        Some(embedding::embed_query(provider.as_ref(), &config.embedding, query).await?)
68    } else {
69        None
70    };
71
72    let params = SearchParams {
73        hybrid_alpha: config.retrieval.hybrid_alpha,
74        candidate_k_keyword: config.retrieval.candidate_k_keyword,
75        candidate_k_vector: config.retrieval.candidate_k_vector,
76        final_limit: limit.unwrap_or(config.retrieval.final_limit),
77    };
78
79    let req = SearchRequest {
80        query,
81        query_vec: query_vec.as_deref(),
82        mode,
83        source_filter,
84        since,
85        params,
86        explain,
87    };
88
89    let results = context_harness_core::search::search(&store, &req).await?;
90
91    pool.close().await;
92    Ok(results)
93}
94
95/// CLI entry point — calls [`search_documents`] and prints results to stdout.
96pub async fn run_search(
97    config: &Config,
98    query: &str,
99    mode: &str,
100    source_filter: Option<String>,
101    since: Option<String>,
102    limit: Option<i64>,
103    explain: bool,
104) -> Result<()> {
105    let results = search_documents(
106        config,
107        query,
108        mode,
109        source_filter.as_deref(),
110        since.as_deref(),
111        limit,
112        explain,
113    )
114    .await?;
115
116    if results.is_empty() {
117        println!("No results.");
118        return Ok(());
119    }
120
121    if explain {
122        if let Some(ex) = results.first().and_then(|r| r.explain.as_ref()) {
123            println!(
124                "Search: mode={}, alpha={:.2}, candidates: {} keyword + {} vector",
125                mode, ex.alpha, ex.keyword_candidates, ex.vector_candidates
126            );
127            println!();
128        }
129    }
130
131    for (i, result) in results.iter().enumerate() {
132        let title_display = result.title.as_deref().unwrap_or("(untitled)");
133        println!(
134            "{}. [{:.2}] {} / {}",
135            i + 1,
136            result.score,
137            result.source,
138            title_display
139        );
140        if let Some(ref ex) = result.explain {
141            println!(
142                "    scoring: keyword={:.3}  semantic={:.3}  → hybrid={:.3}",
143                ex.keyword_score, ex.semantic_score, result.score
144            );
145        }
146        println!("    updated: {}", result.updated_at);
147        println!("    source: {}", result.source);
148        if let Some(ref url) = result.source_url {
149            println!("    url: {}", url);
150        }
151        println!(
152            "    excerpt: \"{}\"",
153            result.snippet.replace('\n', " ").trim()
154        );
155        println!("    id: {}", result.id);
156        println!();
157    }
158
159    Ok(())
160}