context_harness/
stats.rs

1//! Database statistics and health overview.
2//!
3//! Provides a quick summary of what's indexed: document counts, chunk counts,
4//! embedding coverage, and per-source breakdowns. Used by `ctx stats` to give
5//! confidence that syncs and embeddings are working as expected.
6
7use anyhow::Result;
8use sqlx::Row;
9
10use crate::config::Config;
11use crate::db;
12
13/// Per-source breakdown of document and chunk counts.
14struct SourceStats {
15    source: String,
16    doc_count: i64,
17    chunk_count: i64,
18    embedded_count: i64,
19    last_sync_ts: Option<i64>,
20}
21
22/// Run the stats command: query the database and print a summary.
23pub async fn run_stats(config: &Config) -> Result<()> {
24    let pool = db::connect(config).await?;
25
26    let total_docs: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM documents")
27        .fetch_one(&pool)
28        .await?;
29
30    let total_chunks: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM chunks")
31        .fetch_one(&pool)
32        .await?;
33
34    let total_embedded: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM chunk_vectors")
35        .fetch_one(&pool)
36        .await?;
37
38    let db_size = std::fs::metadata(&config.db.path)
39        .map(|m| m.len())
40        .unwrap_or(0);
41
42    println!("Context Harness — Database Stats");
43    println!("================================");
44    println!();
45    println!("  Database:    {}", config.db.path.display());
46    println!("  Size:        {}", format_bytes(db_size));
47    println!();
48    println!("  Documents:   {}", total_docs);
49    println!("  Chunks:      {}", total_chunks);
50    println!(
51        "  Embedded:    {} / {} ({}%)",
52        total_embedded,
53        total_chunks,
54        if total_chunks > 0 {
55            (total_embedded * 100) / total_chunks
56        } else {
57            0
58        }
59    );
60
61    // Per-source breakdown
62    let source_rows = sqlx::query(
63        r#"
64        SELECT
65            d.source,
66            COUNT(DISTINCT d.id) AS doc_count,
67            COUNT(DISTINCT c.id) AS chunk_count,
68            COUNT(DISTINCT cv.chunk_id) AS embedded_count
69        FROM documents d
70        LEFT JOIN chunks c ON c.document_id = d.id
71        LEFT JOIN chunk_vectors cv ON cv.chunk_id = c.id
72        GROUP BY d.source
73        ORDER BY doc_count DESC
74        "#,
75    )
76    .fetch_all(&pool)
77    .await?;
78
79    // Fetch checkpoint timestamps per source
80    let checkpoint_rows = sqlx::query("SELECT source, updated_at FROM checkpoints")
81        .fetch_all(&pool)
82        .await?;
83
84    let mut source_stats: Vec<SourceStats> = Vec::new();
85    for row in &source_rows {
86        let source: String = row.get("source");
87        let last_sync_ts = checkpoint_rows
88            .iter()
89            .find(|cp| {
90                let cp_source: String = cp.get("source");
91                cp_source == source
92            })
93            .map(|cp| cp.get::<i64, _>("updated_at"));
94
95        source_stats.push(SourceStats {
96            source,
97            doc_count: row.get("doc_count"),
98            chunk_count: row.get("chunk_count"),
99            embedded_count: row.get("embedded_count"),
100            last_sync_ts,
101        });
102    }
103
104    if !source_stats.is_empty() {
105        println!();
106        println!("  By source:");
107        println!(
108            "  {:<24} {:>6} {:>8} {:>10}   LAST SYNC",
109            "SOURCE", "DOCS", "CHUNKS", "EMBEDDED"
110        );
111        println!("  {}", "-".repeat(76));
112
113        for s in &source_stats {
114            let sync_display = match s.last_sync_ts {
115                Some(ts) => format_ts_relative(ts),
116                None => "never".to_string(),
117            };
118            println!(
119                "  {:<24} {:>6} {:>8} {:>10}   {}",
120                s.source, s.doc_count, s.chunk_count, s.embedded_count, sync_display
121            );
122        }
123    }
124
125    println!();
126
127    pool.close().await;
128    Ok(())
129}
130
131/// Format a byte count as a human-readable string.
132fn format_bytes(bytes: u64) -> String {
133    if bytes < 1024 {
134        format!("{} B", bytes)
135    } else if bytes < 1024 * 1024 {
136        format!("{:.1} KB", bytes as f64 / 1024.0)
137    } else if bytes < 1024 * 1024 * 1024 {
138        format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
139    } else {
140        format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
141    }
142}
143
144/// Format a Unix timestamp as a relative time string (e.g. "3 hours ago").
145fn format_ts_relative(ts: i64) -> String {
146    let now = chrono::Utc::now().timestamp();
147    let delta = now - ts;
148
149    if delta < 0 {
150        return format_ts_iso(ts);
151    }
152
153    if delta < 60 {
154        "just now".to_string()
155    } else if delta < 3600 {
156        let mins = delta / 60;
157        format!("{} min{} ago", mins, if mins == 1 { "" } else { "s" })
158    } else if delta < 86400 {
159        let hours = delta / 3600;
160        format!("{} hour{} ago", hours, if hours == 1 { "" } else { "s" })
161    } else if delta < 86400 * 30 {
162        let days = delta / 86400;
163        format!("{} day{} ago", days, if days == 1 { "" } else { "s" })
164    } else {
165        format_ts_iso(ts)
166    }
167}
168
169fn format_ts_iso(ts: i64) -> String {
170    chrono::DateTime::from_timestamp(ts, 0)
171        .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string())
172        .unwrap_or_else(|| ts.to_string())
173}