Add convert-example command to zeta-cli

Agus Zubiaga created

Change summary

Cargo.lock                     |   2 
crates/zeta_cli/Cargo.toml     |   2 
crates/zeta_cli/src/example.rs | 166 ++++++++++++++++++++++++++++++++++++
crates/zeta_cli/src/main.rs    |  17 +++
4 files changed, 187 insertions(+)

Detailed changes

Cargo.lock 🔗

@@ -21746,6 +21746,7 @@ dependencies = [
  "polars",
  "project",
  "prompt_store",
+ "pulldown-cmark 0.12.2",
  "release_channel",
  "reqwest_client",
  "serde",
@@ -21755,6 +21756,7 @@ dependencies = [
  "smol",
  "soa-rs",
  "terminal_view",
+ "toml 0.8.23",
  "util",
  "watch",
  "zeta",

crates/zeta_cli/Cargo.toml 🔗

@@ -39,8 +39,10 @@ paths.workspace = true
 polars = { version = "0.51", features = ["lazy", "dtype-struct", "parquet"] }
 project.workspace = true
 prompt_store.workspace = true
+pulldown-cmark.workspace = true
 release_channel.workspace = true
 reqwest_client.workspace = true
+toml.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 settings.workspace = true

crates/zeta_cli/src/example.rs 🔗

@@ -0,0 +1,166 @@
+use std::{
+    fmt::{self, Display},
+    fs::File,
+    io::{Read, Write},
+    os::unix::ffi::OsStrExt,
+    path::{Path, PathBuf},
+};
+
+use anyhow::Result;
+use clap::ValueEnum;
+use serde::{Deserialize, Serialize};
+
+pub struct NamedExample {
+    name: String,
+    example: Example,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct Example {
+    repository_url: String,
+    commit: String,
+    edit_history: Vec<String>,
+    expected_hunks: Vec<String>,
+    expected_patch: String,
+    expected_excerpts: Vec<ExpectedExcerpt>,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct ExpectedExcerpt {
+    path: PathBuf,
+    text: String,
+}
+
+#[derive(ValueEnum, Debug, Clone)]
+pub enum ExampleFormat {
+    Json,
+    Toml,
+    Md,
+}
+
+impl NamedExample {
+    pub fn load(path: impl AsRef<Path>) -> Result<Self> {
+        let path = path.as_ref();
+        let mut file = File::open(path)?;
+        let ext = path.extension();
+
+        match ext.map(|s| s.as_bytes()) {
+            Some(b"json") => Ok(Self {
+                name: path.file_name().unwrap_or_default().display().to_string(),
+                example: serde_json::from_reader(file)?,
+            }),
+            Some(b"toml") => {
+                let mut content = String::new();
+                file.read_to_string(&mut content)?;
+                Ok(Self {
+                    name: path.file_name().unwrap_or_default().display().to_string(),
+                    example: toml::from_str(&content)?,
+                })
+            }
+            Some(b"md") => {
+                let mut content = String::new();
+                file.read_to_string(&mut content)?;
+                anyhow::bail!("md todo");
+            }
+            Some(_) => {
+                anyhow::bail!("Unrecognized example extension: {}", ext.unwrap().display());
+            }
+            None => {
+                anyhow::bail!(
+                    "Failed to determine example type since the file does not have an extension."
+                );
+            }
+        }
+    }
+
+    pub fn parse_md(input: &str) -> Result<Self> {
+        // use pulldown_cmark::{Event, Parser};
+
+        // let parser = Parser::new(input);
+
+        // for event in parser {
+        //     match event {
+        //         Event::Start(tag) => {}
+        //         Event::End(tag_end) => {}
+        //         Event::Text(cow_str) => {}
+        //         Event::Code(cow_str) => {
+        //             dbg!(cow_str);
+        //         }
+        //         Event::InlineMath(cow_str) => {}
+        //         Event::DisplayMath(cow_str) => {}
+        //         Event::Html(cow_str) => {}
+        //         Event::InlineHtml(cow_str) => {}
+        //         Event::FootnoteReference(cow_str) => {}
+        //         Event::SoftBreak => {}
+        //         Event::HardBreak => {}
+        //         Event::Rule => {}
+        //         Event::TaskListMarker(_) => {}
+        //     }
+        // }
+
+        todo!();
+    }
+
+    pub fn write(&self, format: ExampleFormat, mut out: impl Write) -> Result<()> {
+        match format {
+            ExampleFormat::Json => Ok(serde_json::to_writer(out, &self.example)?),
+            ExampleFormat::Toml => {
+                Ok(out.write_all(toml::to_string_pretty(&self.example)?.as_bytes())?)
+            }
+            ExampleFormat::Md => Ok(write!(out, "{}", self)?),
+        }
+    }
+}
+
+impl Display for NamedExample {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "# {}\n\n", self.name)?;
+        write!(f, "respository_url = {}\n", self.example.repository_url)?;
+        write!(f, "commit = {}\n\n", self.example.commit)?;
+        write!(f, "## Edit history\n\n")?;
+
+        if !self.example.edit_history.is_empty() {
+            write!(f, "`````diff\n")?;
+            for item in &self.example.edit_history {
+                write!(f, "{item}")?;
+            }
+            write!(f, "`````\n")?;
+        }
+
+        if !self.example.expected_hunks.is_empty() {
+            write!(f, "\n## Expected Hunks\n\n`````diff\n")?;
+            for hunk in &self.example.expected_hunks {
+                write!(f, "{hunk}")?;
+            }
+            write!(f, "`````\n")?;
+        }
+
+        if !self.example.expected_patch.is_empty() {
+            write!(
+                f,
+                "\n## Expected Patch\n\n`````diff\n{}`````\n",
+                self.example.expected_patch
+            )?;
+        }
+
+        if !self.example.expected_excerpts.is_empty() {
+            write!(f, "\n## Expected Excerpts\n\n")?;
+
+            for excerpt in &self.example.expected_excerpts {
+                write!(
+                    f,
+                    "`````{}path={}\n{}`````\n\n",
+                    excerpt
+                        .path
+                        .extension()
+                        .map(|ext| format!("{} ", ext.to_string_lossy().to_string()))
+                        .unwrap_or_default(),
+                    excerpt.path.display(),
+                    excerpt.text
+                )?;
+            }
+        }
+
+        Ok(())
+    }
+}

crates/zeta_cli/src/main.rs 🔗

@@ -1,8 +1,10 @@
+mod example;
 mod headless;
 mod source_location;
 mod syntax_retrieval_stats;
 mod util;
 
+use crate::example::{ExampleFormat, NamedExample};
 use crate::syntax_retrieval_stats::retrieval_stats;
 use ::serde::Serialize;
 use ::util::paths::PathStyle;
@@ -22,6 +24,7 @@ use language_model::LanguageModelRegistry;
 use project::{Project, Worktree};
 use reqwest_client::ReqwestClient;
 use serde_json::json;
+use std::io;
 use std::{collections::HashSet, path::PathBuf, process::exit, str::FromStr, sync::Arc};
 use zeta2::{ContextMode, LlmContextOptions, SearchToolQuery};
 
@@ -48,6 +51,11 @@ enum Command {
         #[command(subcommand)]
         command: Zeta2Command,
     },
+    ConvertExample {
+        path: PathBuf,
+        #[arg(long, value_enum, default_value_t = ExampleFormat::Md)]
+        output_format: ExampleFormat,
+    },
 }
 
 #[derive(Subcommand, Debug)]
@@ -641,6 +649,15 @@ fn main() {
                         }
                     },
                 },
+                Command::ConvertExample {
+                    path,
+                    output_format,
+                } => {
+                    let example = NamedExample::load(path).unwrap();
+                    example.write(output_format, io::stdout()).unwrap();
+                    let _ = cx.update(|cx| cx.quit());
+                    return;
+                }
             };
 
             match result {