context_harness/
traits.rs

1//! Extension traits for custom connectors and tools.
2//!
3//! This module provides the trait-based extension system for Context Harness.
4//! Users can implement [`Connector`] and [`Tool`] in Rust to create compiled
5//! extensions that run alongside built-in and Lua-scripted ones.
6//!
7//! # Architecture
8//!
9//! ```text
10//! ┌──────────────────────────────────────────┐
11//! │           ConnectorRegistry              │
12//! │  ┌─────────┐ ┌─────────┐ ┌────────────┐ │
13//! │  │Built-in │ │  Lua    │ │  Custom    │ │
14//! │  │FS/Git/S3│ │ Script  │ │ (Rust)     │ │
15//! │  └─────────┘ └─────────┘ └────────────┘ │
16//! └──────────────┬───────────────────────────┘
17//!                ▼
18//!          run_sync() → ingest pipeline
19//! ```
20//!
21//! ```text
22//! ┌──────────────────────────────────────────┐
23//! │              ToolRegistry                │
24//! │  ┌─────────┐ ┌─────────┐ ┌────────────┐ │
25//! │  │Built-in │ │  Lua    │ │  Custom    │ │
26//! │  │search   │ │ Script  │ │ (Rust)     │ │
27//! │  │get/src  │ │ Tools   │ │  Tools     │ │
28//! │  └─────────┘ └─────────┘ └────────────┘ │
29//! └──────────────┬───────────────────────────┘
30//!                ▼
31//!          run_server() → MCP HTTP API
32//! ```
33//!
34//! # Usage
35//!
36//! ```rust
37//! use context_harness::traits::{ConnectorRegistry, ToolRegistry};
38//!
39//! let mut connectors = ConnectorRegistry::new();
40//! // connectors.register(Box::new(MyConnector::new()));
41//!
42//! let mut tools = ToolRegistry::new();
43//! // tools.register(Box::new(MyTool::new()));
44//! ```
45//!
46//! See `docs/RUST_TRAITS.md` for the full specification and examples.
47
48use anyhow::Result;
49use async_trait::async_trait;
50use serde_json::Value;
51use std::sync::Arc;
52
53use crate::config::Config;
54use crate::get::{get_document, DocumentResponse};
55use crate::models::SourceItem;
56use crate::search::{search_documents, SearchResultItem};
57use crate::sources::{get_sources, SourceStatus};
58
59// ═══════════════════════════════════════════════════════════════════════
60// Connector Trait
61// ═══════════════════════════════════════════════════════════════════════
62
63/// A data source connector that produces documents for ingestion.
64///
65/// Implement this trait to create a custom connector in Rust. The
66/// connector is responsible for scanning an external data source and
67/// returning a list of [`SourceItem`]s that flow through the standard
68/// ingestion pipeline (normalization → chunking → embedding).
69///
70/// # Lifecycle
71///
72/// 1. The connector is registered via [`ConnectorRegistry::register`].
73/// 2. [`scan`](Connector::scan) is called during `ctx sync custom:<name>`.
74/// 3. Returned items are normalized, chunked, and indexed.
75///
76/// # Example
77///
78/// ```rust
79/// use async_trait::async_trait;
80/// use anyhow::Result;
81/// use context_harness::models::SourceItem;
82/// use context_harness::traits::Connector;
83/// use chrono::Utc;
84///
85/// pub struct DatabaseConnector {
86///     connection_string: String,
87/// }
88///
89/// #[async_trait]
90/// impl Connector for DatabaseConnector {
91///     fn name(&self) -> &str { "database" }
92///     fn description(&self) -> &str { "Ingest rows from a database table" }
93///     fn connector_type(&self) -> &str { "custom" }
94///
95///     async fn scan(&self) -> Result<Vec<SourceItem>> {
96///         // ... query database and return SourceItems
97///         Ok(vec![])
98///     }
99/// }
100/// ```
101#[async_trait]
102pub trait Connector: Send + Sync {
103    /// Returns the connector instance name (e.g. `"docs"`, `"platform"`).
104    ///
105    /// Combined with [`connector_type`](Connector::connector_type) to form
106    /// the source label: `"{type}:{name}"`.
107    fn name(&self) -> &str;
108
109    /// Returns a one-line description of what this connector does.
110    ///
111    /// Used in `ctx sources` output and documentation.
112    fn description(&self) -> &str;
113
114    /// Returns the connector type identifier (e.g. `"filesystem"`, `"git"`, `"s3"`, `"custom"`).
115    ///
116    /// Built-in connectors return their type name; custom (user-defined)
117    /// connectors default to `"custom"`.
118    fn connector_type(&self) -> &str {
119        "custom"
120    }
121
122    /// Returns the source label used to tag documents from this connector.
123    ///
124    /// Defaults to `"{connector_type}:{name}"` (e.g. `"git:platform"`).
125    fn source_label(&self) -> String {
126        format!("{}:{}", self.connector_type(), self.name())
127    }
128
129    /// Scan the data source and return all items to ingest.
130    ///
131    /// Called on the tokio async runtime. May perform I/O operations
132    /// (HTTP requests, database queries, file reads).
133    ///
134    /// # Returns
135    ///
136    /// A vector of [`SourceItem`]s. Each item flows through the standard
137    /// ingestion pipeline. Items with empty `body` or `source_id` are
138    /// skipped with a warning.
139    async fn scan(&self) -> Result<Vec<SourceItem>>;
140}
141
142// ═══════════════════════════════════════════════════════════════════════
143// Tool Trait
144// ═══════════════════════════════════════════════════════════════════════
145
146/// A custom MCP tool that agents can discover and call.
147///
148/// Implement this trait to create a compiled Rust tool. Tools are
149/// registered at server startup and exposed via `GET /tools/list`
150/// for agent discovery and `POST /tools/{name}` for invocation.
151///
152/// # Lifecycle
153///
154/// 1. The tool is registered via [`ToolRegistry::register`].
155/// 2. [`name`](Tool::name), [`description`](Tool::description), and
156///    [`parameters_schema`](Tool::parameters_schema) are called at startup
157///    for the tool list.
158/// 3. [`execute`](Tool::execute) is called each time an agent invokes
159///    the tool.
160///
161/// # Example
162///
163/// ```rust
164/// use async_trait::async_trait;
165/// use anyhow::Result;
166/// use serde_json::{json, Value};
167/// use context_harness::traits::{Tool, ToolContext};
168///
169/// pub struct HealthCheckTool;
170///
171/// #[async_trait]
172/// impl Tool for HealthCheckTool {
173///     fn name(&self) -> &str { "health_check" }
174///     fn description(&self) -> &str { "Check connector health" }
175///
176///     fn parameters_schema(&self) -> Value {
177///         json!({
178///             "type": "object",
179///             "properties": {},
180///             "required": []
181///         })
182///     }
183///
184///     async fn execute(&self, _params: Value, ctx: &ToolContext) -> Result<Value> {
185///         let sources = ctx.sources()?;
186///         Ok(json!({ "sources": sources.len() }))
187///     }
188/// }
189/// ```
190#[async_trait]
191pub trait Tool: Send + Sync {
192    /// Returns the tool's name.
193    ///
194    /// Used as the route path (`POST /tools/{name}`) and in
195    /// `GET /tools/list` responses. Should be a lowercase
196    /// identifier with underscores (e.g., `"create_ticket"`).
197    fn name(&self) -> &str;
198
199    /// Returns a one-line description for agent discovery.
200    ///
201    /// Agents use this to decide whether to call the tool.
202    fn description(&self) -> &str;
203
204    /// Whether this tool is a built-in (true for search/get/sources).
205    ///
206    /// Built-in tools are marked with `"builtin": true` in the
207    /// `GET /tools/list` response. Defaults to `false`.
208    fn is_builtin(&self) -> bool {
209        false
210    }
211
212    /// Returns the OpenAI function-calling JSON Schema for parameters.
213    ///
214    /// Must be a valid JSON Schema object with `type: "object"`,
215    /// `properties`, and optionally `required`.
216    fn parameters_schema(&self) -> Value;
217
218    /// Execute the tool with validated parameters.
219    ///
220    /// Called each time an agent invokes the tool via `POST /tools/{name}`.
221    ///
222    /// # Arguments
223    ///
224    /// * `params` — JSON parameters (always a JSON object).
225    /// * `ctx` — Bridge to the Context Harness knowledge base.
226    ///
227    /// # Returns
228    ///
229    /// A JSON value that will be wrapped in `{ "result": ... }` in the
230    /// HTTP response.
231    async fn execute(&self, params: Value, ctx: &ToolContext) -> Result<Value>;
232}
233
234// ═══════════════════════════════════════════════════════════════════════
235// ToolContext
236// ═══════════════════════════════════════════════════════════════════════
237
238/// Options for [`ToolContext::search`].
239#[derive(Debug, Default)]
240pub struct SearchOptions {
241    /// Search mode: `"keyword"`, `"semantic"`, or `"hybrid"`.
242    pub mode: Option<String>,
243    /// Maximum number of results.
244    pub limit: Option<i64>,
245    /// Filter by source connector (e.g., `"git:platform"`).
246    pub source: Option<String>,
247}
248
249/// Context bridge for tool execution.
250///
251/// Provides tools with access to the Context Harness knowledge base
252/// during execution. Created by the server for each tool invocation.
253///
254/// All methods delegate to the same core functions used by the CLI
255/// and HTTP server, ensuring tools have identical capabilities.
256pub struct ToolContext {
257    config: Arc<Config>,
258}
259
260impl ToolContext {
261    /// Create a new tool context from the application config.
262    pub fn new(config: Arc<Config>) -> Self {
263        Self { config }
264    }
265
266    /// Search the knowledge base.
267    ///
268    /// Equivalent to `POST /tools/search` or `ctx search`.
269    ///
270    /// # Example
271    ///
272    /// ```rust,no_run
273    /// # use context_harness::traits::{ToolContext, SearchOptions};
274    /// # async fn example(ctx: &ToolContext) -> anyhow::Result<()> {
275    /// let results = ctx.search("deployment runbook", SearchOptions {
276    ///     mode: Some("hybrid".to_string()),
277    ///     limit: Some(5),
278    ///     ..Default::default()
279    /// }).await?;
280    /// # Ok(())
281    /// # }
282    /// ```
283    pub async fn search(&self, query: &str, opts: SearchOptions) -> Result<Vec<SearchResultItem>> {
284        search_documents(
285            &self.config,
286            query,
287            opts.mode.as_deref().unwrap_or("keyword"),
288            opts.source.as_deref(),
289            None,
290            opts.limit,
291            false,
292        )
293        .await
294    }
295
296    /// Retrieve a document by UUID.
297    ///
298    /// Equivalent to `POST /tools/get` or `ctx get`.
299    pub async fn get(&self, id: &str) -> Result<DocumentResponse> {
300        get_document(&self.config, id).await
301    }
302
303    /// List all configured connectors and their status.
304    ///
305    /// Equivalent to `GET /tools/sources` or `ctx sources`.
306    pub fn sources(&self) -> Result<Vec<SourceStatus>> {
307        Ok(get_sources(&self.config))
308    }
309}
310
311// ═══════════════════════════════════════════════════════════════════════
312// Built-in Tool Implementations
313// ═══════════════════════════════════════════════════════════════════════
314
315/// Built-in search tool. Delegates to [`ToolContext::search`].
316pub struct SearchTool;
317
318#[async_trait]
319impl Tool for SearchTool {
320    fn name(&self) -> &str {
321        "search"
322    }
323
324    fn description(&self) -> &str {
325        "Search the knowledge base"
326    }
327
328    fn is_builtin(&self) -> bool {
329        true
330    }
331
332    fn parameters_schema(&self) -> Value {
333        serde_json::json!({
334            "type": "object",
335            "properties": {
336                "query": { "type": "string", "description": "Search query" },
337                "mode": { "type": "string", "enum": ["keyword", "semantic", "hybrid"], "default": "keyword" },
338                "limit": { "type": "integer", "description": "Max results", "default": 12 },
339                "filters": {
340                    "type": "object",
341                    "properties": {
342                        "source": { "type": "string", "description": "Filter by connector source" },
343                        "since": { "type": "string", "description": "Only results updated after this date (YYYY-MM-DD)" }
344                    }
345                }
346            },
347            "required": ["query"]
348        })
349    }
350
351    async fn execute(&self, params: Value, ctx: &ToolContext) -> Result<Value> {
352        let query = params["query"].as_str().unwrap_or("");
353        if query.trim().is_empty() {
354            anyhow::bail!("query must not be empty");
355        }
356
357        let mode = params["mode"].as_str().unwrap_or("keyword");
358        let limit = params["limit"].as_i64().unwrap_or(12);
359
360        let source = params
361            .get("filters")
362            .and_then(|f| f.get("source"))
363            .and_then(|s| s.as_str());
364        let since = params
365            .get("filters")
366            .and_then(|f| f.get("since"))
367            .and_then(|s| s.as_str());
368
369        let results =
370            search_documents(&ctx.config, query, mode, source, since, Some(limit), false).await?;
371
372        Ok(serde_json::json!({ "results": results }))
373    }
374}
375
376/// Built-in document retrieval tool. Delegates to [`get_document`].
377pub struct GetTool;
378
379#[async_trait]
380impl Tool for GetTool {
381    fn name(&self) -> &str {
382        "get"
383    }
384
385    fn description(&self) -> &str {
386        "Retrieve a document by UUID"
387    }
388
389    fn is_builtin(&self) -> bool {
390        true
391    }
392
393    fn parameters_schema(&self) -> Value {
394        serde_json::json!({
395            "type": "object",
396            "properties": {
397                "id": { "type": "string", "description": "Document UUID" }
398            },
399            "required": ["id"]
400        })
401    }
402
403    async fn execute(&self, params: Value, ctx: &ToolContext) -> Result<Value> {
404        let id = params["id"].as_str().unwrap_or("");
405        if id.trim().is_empty() {
406            anyhow::bail!("id must not be empty");
407        }
408
409        let doc = get_document(&ctx.config, id).await?;
410        Ok(serde_json::to_value(&doc)?)
411    }
412}
413
414/// Built-in sources listing tool. Delegates to [`get_sources`].
415pub struct SourcesTool;
416
417#[async_trait]
418impl Tool for SourcesTool {
419    fn name(&self) -> &str {
420        "sources"
421    }
422
423    fn description(&self) -> &str {
424        "List connector configuration and health status"
425    }
426
427    fn is_builtin(&self) -> bool {
428        true
429    }
430
431    fn parameters_schema(&self) -> Value {
432        serde_json::json!({
433            "type": "object",
434            "properties": {}
435        })
436    }
437
438    async fn execute(&self, _params: Value, ctx: &ToolContext) -> Result<Value> {
439        let sources = get_sources(&ctx.config);
440        Ok(serde_json::json!({ "sources": sources }))
441    }
442}
443
444// ═══════════════════════════════════════════════════════════════════════
445// Registries
446// ═══════════════════════════════════════════════════════════════════════
447
448/// Registry for connectors (built-in and custom).
449///
450/// Use [`ConnectorRegistry::from_config`] to create a registry pre-loaded
451/// with all built-in connectors from the config file, then optionally
452/// call [`register`](ConnectorRegistry::register) to add custom ones.
453///
454/// # Example
455///
456/// ```rust
457/// use context_harness::traits::ConnectorRegistry;
458///
459/// let mut connectors = ConnectorRegistry::new();
460/// // connectors.register(Box::new(MyConnector::new()));
461/// ```
462pub struct ConnectorRegistry {
463    connectors: Vec<Box<dyn Connector>>,
464}
465
466impl ConnectorRegistry {
467    /// Create an empty connector registry.
468    pub fn new() -> Self {
469        Self {
470            connectors: Vec::new(),
471        }
472    }
473
474    /// Create a registry pre-loaded with all built-in connectors from the config.
475    ///
476    /// This resolves all filesystem, git, S3, and script connector instances
477    /// from the TOML config and wraps them as trait objects.
478    pub fn from_config(config: &Config) -> Self {
479        use crate::connector_fs::FilesystemConnector;
480        use crate::connector_git::GitConnector;
481        use crate::connector_s3::S3Connector;
482        use crate::connector_script::ScriptConnector;
483
484        let mut registry = Self::new();
485
486        for (name, cfg) in &config.connectors.filesystem {
487            registry.register(Box::new(FilesystemConnector::new(
488                name.clone(),
489                cfg.clone(),
490            )));
491        }
492        for (name, cfg) in &config.connectors.git {
493            registry.register(Box::new(GitConnector::new(
494                name.clone(),
495                cfg.clone(),
496                config.db.path.clone(),
497            )));
498        }
499        for (name, cfg) in &config.connectors.s3 {
500            registry.register(Box::new(S3Connector::new(name.clone(), cfg.clone())));
501        }
502        for (name, cfg) in &config.connectors.script {
503            registry.register(Box::new(ScriptConnector::new(name.clone(), cfg.clone())));
504        }
505
506        registry
507    }
508
509    /// Register a connector.
510    pub fn register(&mut self, connector: Box<dyn Connector>) {
511        self.connectors.push(connector);
512    }
513
514    /// Get all registered connectors.
515    pub fn connectors(&self) -> &[Box<dyn Connector>] {
516        &self.connectors
517    }
518
519    /// Get connectors filtered by type (e.g. `"git"`, `"filesystem"`).
520    pub fn connectors_by_type(&self, connector_type: &str) -> Vec<&dyn Connector> {
521        self.connectors
522            .iter()
523            .filter(|c| c.connector_type() == connector_type)
524            .map(|c| c.as_ref())
525            .collect()
526    }
527
528    /// Find a specific connector by type and name.
529    pub fn find(&self, connector_type: &str, name: &str) -> Option<&dyn Connector> {
530        self.connectors
531            .iter()
532            .find(|c| c.connector_type() == connector_type && c.name() == name)
533            .map(|c| c.as_ref())
534    }
535
536    /// Check if the registry is empty.
537    pub fn is_empty(&self) -> bool {
538        self.connectors.is_empty()
539    }
540
541    /// Return the count of registered connectors.
542    pub fn len(&self) -> usize {
543        self.connectors.len()
544    }
545}
546
547impl Default for ConnectorRegistry {
548    fn default() -> Self {
549        Self::new()
550    }
551}
552
553/// Registry for tools (built-in, Lua, and custom Rust).
554///
555/// Use [`ToolRegistry::with_builtins`] to create a registry pre-loaded
556/// with the core `search`, `get`, and `sources` tools, then optionally
557/// call [`register`](ToolRegistry::register) to add custom ones.
558///
559/// # Example
560///
561/// ```rust
562/// use context_harness::traits::ToolRegistry;
563///
564/// let mut tools = ToolRegistry::with_builtins();
565/// // tools.register(Box::new(MyTool::new()));
566/// ```
567pub struct ToolRegistry {
568    tools: Vec<Box<dyn Tool>>,
569}
570
571impl ToolRegistry {
572    /// Create an empty tool registry.
573    pub fn new() -> Self {
574        Self { tools: Vec::new() }
575    }
576
577    /// Create a tool registry pre-loaded with built-in tools (search, get, sources).
578    pub fn with_builtins() -> Self {
579        let mut registry = Self::new();
580        registry.register(Box::new(SearchTool));
581        registry.register(Box::new(GetTool));
582        registry.register(Box::new(SourcesTool));
583        registry
584    }
585
586    /// Register a tool.
587    pub fn register(&mut self, tool: Box<dyn Tool>) {
588        self.tools.push(tool);
589    }
590
591    /// Get all registered tools.
592    pub fn tools(&self) -> &[Box<dyn Tool>] {
593        &self.tools
594    }
595
596    /// Find a tool by name.
597    pub fn find(&self, name: &str) -> Option<&dyn Tool> {
598        self.tools
599            .iter()
600            .find(|t| t.name() == name)
601            .map(|t| t.as_ref())
602    }
603
604    /// Check if the registry is empty.
605    pub fn is_empty(&self) -> bool {
606        self.tools.is_empty()
607    }
608
609    /// Return the count of registered tools.
610    pub fn len(&self) -> usize {
611        self.tools.len()
612    }
613}
614
615impl Default for ToolRegistry {
616    fn default() -> Self {
617        Self::new()
618    }
619}