context_harness/
export.rs

1//! Export the search index as JSON for static site search.
2//!
3//! Produces a `data.json` file containing all documents and chunks,
4//! suitable for use with `ctx-search.js` on static sites. Replaces
5//! the Python one-liner previously used in `build-docs.sh`.
6
7use anyhow::Result;
8use serde::Serialize;
9use sqlx::Row;
10use std::path::Path;
11
12use crate::config::Config;
13use crate::db;
14
15#[derive(Serialize)]
16struct ExportData {
17    documents: Vec<ExportDocument>,
18    chunks: Vec<ExportChunk>,
19}
20
21#[derive(Serialize)]
22struct ExportDocument {
23    id: String,
24    source: String,
25    source_id: String,
26    source_url: Option<String>,
27    title: Option<String>,
28    updated_at: i64,
29    body: String,
30}
31
32#[derive(Serialize)]
33struct ExportChunk {
34    id: String,
35    document_id: String,
36    chunk_index: i64,
37    text: String,
38}
39
40/// Export documents and chunks as JSON.
41///
42/// If `output` is `Some`, writes to that file path. Otherwise writes
43/// to stdout for piping.
44pub async fn run_export(config: &Config, output: Option<&Path>) -> Result<()> {
45    let pool = db::connect(config).await?;
46
47    let doc_rows = sqlx::query(
48        "SELECT id, source, source_id, source_url, title, updated_at, body \
49         FROM documents ORDER BY source_id",
50    )
51    .fetch_all(&pool)
52    .await?;
53
54    let chunk_rows = sqlx::query(
55        "SELECT id, document_id, chunk_index, text \
56         FROM chunks ORDER BY document_id, chunk_index",
57    )
58    .fetch_all(&pool)
59    .await?;
60
61    let documents: Vec<ExportDocument> = doc_rows
62        .iter()
63        .map(|row| ExportDocument {
64            id: row.get("id"),
65            source: row.get("source"),
66            source_id: row.get("source_id"),
67            source_url: row.get("source_url"),
68            title: row.get("title"),
69            updated_at: row.get("updated_at"),
70            body: row.get("body"),
71        })
72        .collect();
73
74    let chunks: Vec<ExportChunk> = chunk_rows
75        .iter()
76        .map(|row| ExportChunk {
77            id: row.get("id"),
78            document_id: row.get("document_id"),
79            chunk_index: row.get("chunk_index"),
80            text: row.get("text"),
81        })
82        .collect();
83
84    let doc_count = documents.len();
85    let chunk_count = chunks.len();
86
87    let data = ExportData { documents, chunks };
88    let json = serde_json::to_string_pretty(&data)?;
89
90    match output {
91        Some(path) => {
92            if let Some(parent) = path.parent() {
93                std::fs::create_dir_all(parent)?;
94            }
95            std::fs::write(path, &json)?;
96            eprintln!(
97                "Exported {} documents, {} chunks to {}",
98                doc_count,
99                chunk_count,
100                path.display()
101            );
102        }
103        None => {
104            println!("{}", json);
105        }
106    }
107
108    pool.close().await;
109    Ok(())
110}