context_harness/
export.rs1use anyhow::Result;
8use serde::Serialize;
9use sqlx::Row;
10use std::path::Path;
11
12use crate::config::Config;
13use crate::db;
14
15#[derive(Serialize)]
16struct ExportData {
17 documents: Vec<ExportDocument>,
18 chunks: Vec<ExportChunk>,
19}
20
21#[derive(Serialize)]
22struct ExportDocument {
23 id: String,
24 source: String,
25 source_id: String,
26 source_url: Option<String>,
27 title: Option<String>,
28 updated_at: i64,
29 body: String,
30}
31
32#[derive(Serialize)]
33struct ExportChunk {
34 id: String,
35 document_id: String,
36 chunk_index: i64,
37 text: String,
38}
39
40pub async fn run_export(config: &Config, output: Option<&Path>) -> Result<()> {
45 let pool = db::connect(config).await?;
46
47 let doc_rows = sqlx::query(
48 "SELECT id, source, source_id, source_url, title, updated_at, body \
49 FROM documents ORDER BY source_id",
50 )
51 .fetch_all(&pool)
52 .await?;
53
54 let chunk_rows = sqlx::query(
55 "SELECT id, document_id, chunk_index, text \
56 FROM chunks ORDER BY document_id, chunk_index",
57 )
58 .fetch_all(&pool)
59 .await?;
60
61 let documents: Vec<ExportDocument> = doc_rows
62 .iter()
63 .map(|row| ExportDocument {
64 id: row.get("id"),
65 source: row.get("source"),
66 source_id: row.get("source_id"),
67 source_url: row.get("source_url"),
68 title: row.get("title"),
69 updated_at: row.get("updated_at"),
70 body: row.get("body"),
71 })
72 .collect();
73
74 let chunks: Vec<ExportChunk> = chunk_rows
75 .iter()
76 .map(|row| ExportChunk {
77 id: row.get("id"),
78 document_id: row.get("document_id"),
79 chunk_index: row.get("chunk_index"),
80 text: row.get("text"),
81 })
82 .collect();
83
84 let doc_count = documents.len();
85 let chunk_count = chunks.len();
86
87 let data = ExportData { documents, chunks };
88 let json = serde_json::to_string_pretty(&data)?;
89
90 match output {
91 Some(path) => {
92 if let Some(parent) = path.parent() {
93 std::fs::create_dir_all(parent)?;
94 }
95 std::fs::write(path, &json)?;
96 eprintln!(
97 "Exported {} documents, {} chunks to {}",
98 doc_count,
99 chunk_count,
100 path.display()
101 );
102 }
103 None => {
104 println!("{}", json);
105 }
106 }
107
108 pool.close().await;
109 Ok(())
110}