1use anyhow::Result;
8use sqlx::Row;
9
10use crate::config::Config;
11use crate::db;
12
13struct SourceStats {
15 source: String,
16 doc_count: i64,
17 chunk_count: i64,
18 embedded_count: i64,
19 last_sync_ts: Option<i64>,
20}
21
22pub async fn run_stats(config: &Config) -> Result<()> {
24 let pool = db::connect(config).await?;
25
26 let total_docs: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM documents")
27 .fetch_one(&pool)
28 .await?;
29
30 let total_chunks: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM chunks")
31 .fetch_one(&pool)
32 .await?;
33
34 let total_embedded: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM chunk_vectors")
35 .fetch_one(&pool)
36 .await?;
37
38 let db_size = std::fs::metadata(&config.db.path)
39 .map(|m| m.len())
40 .unwrap_or(0);
41
42 println!("Context Harness — Database Stats");
43 println!("================================");
44 println!();
45 println!(" Database: {}", config.db.path.display());
46 println!(" Size: {}", format_bytes(db_size));
47 println!();
48 println!(" Documents: {}", total_docs);
49 println!(" Chunks: {}", total_chunks);
50 println!(
51 " Embedded: {} / {} ({}%)",
52 total_embedded,
53 total_chunks,
54 if total_chunks > 0 {
55 (total_embedded * 100) / total_chunks
56 } else {
57 0
58 }
59 );
60
61 let source_rows = sqlx::query(
63 r#"
64 SELECT
65 d.source,
66 COUNT(DISTINCT d.id) AS doc_count,
67 COUNT(DISTINCT c.id) AS chunk_count,
68 COUNT(DISTINCT cv.chunk_id) AS embedded_count
69 FROM documents d
70 LEFT JOIN chunks c ON c.document_id = d.id
71 LEFT JOIN chunk_vectors cv ON cv.chunk_id = c.id
72 GROUP BY d.source
73 ORDER BY doc_count DESC
74 "#,
75 )
76 .fetch_all(&pool)
77 .await?;
78
79 let checkpoint_rows = sqlx::query("SELECT source, updated_at FROM checkpoints")
81 .fetch_all(&pool)
82 .await?;
83
84 let mut source_stats: Vec<SourceStats> = Vec::new();
85 for row in &source_rows {
86 let source: String = row.get("source");
87 let last_sync_ts = checkpoint_rows
88 .iter()
89 .find(|cp| {
90 let cp_source: String = cp.get("source");
91 cp_source == source
92 })
93 .map(|cp| cp.get::<i64, _>("updated_at"));
94
95 source_stats.push(SourceStats {
96 source,
97 doc_count: row.get("doc_count"),
98 chunk_count: row.get("chunk_count"),
99 embedded_count: row.get("embedded_count"),
100 last_sync_ts,
101 });
102 }
103
104 if !source_stats.is_empty() {
105 println!();
106 println!(" By source:");
107 println!(
108 " {:<24} {:>6} {:>8} {:>10} LAST SYNC",
109 "SOURCE", "DOCS", "CHUNKS", "EMBEDDED"
110 );
111 println!(" {}", "-".repeat(76));
112
113 for s in &source_stats {
114 let sync_display = match s.last_sync_ts {
115 Some(ts) => format_ts_relative(ts),
116 None => "never".to_string(),
117 };
118 println!(
119 " {:<24} {:>6} {:>8} {:>10} {}",
120 s.source, s.doc_count, s.chunk_count, s.embedded_count, sync_display
121 );
122 }
123 }
124
125 println!();
126
127 pool.close().await;
128 Ok(())
129}
130
131fn format_bytes(bytes: u64) -> String {
133 if bytes < 1024 {
134 format!("{} B", bytes)
135 } else if bytes < 1024 * 1024 {
136 format!("{:.1} KB", bytes as f64 / 1024.0)
137 } else if bytes < 1024 * 1024 * 1024 {
138 format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
139 } else {
140 format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
141 }
142}
143
144fn format_ts_relative(ts: i64) -> String {
146 let now = chrono::Utc::now().timestamp();
147 let delta = now - ts;
148
149 if delta < 0 {
150 return format_ts_iso(ts);
151 }
152
153 if delta < 60 {
154 "just now".to_string()
155 } else if delta < 3600 {
156 let mins = delta / 60;
157 format!("{} min{} ago", mins, if mins == 1 { "" } else { "s" })
158 } else if delta < 86400 {
159 let hours = delta / 3600;
160 format!("{} hour{} ago", hours, if hours == 1 { "" } else { "s" })
161 } else if delta < 86400 * 30 {
162 let days = delta / 86400;
163 format!("{} day{} ago", days, if days == 1 { "" } else { "s" })
164 } else {
165 format_ts_iso(ts)
166 }
167}
168
169fn format_ts_iso(ts: i64) -> String {
170 chrono::DateTime::from_timestamp(ts, 0)
171 .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string())
172 .unwrap_or_else(|| ts.to_string())
173}