eval.rs

  1use agent::Agent;
  2use anyhow::Result;
  3use gpui::Application;
  4use language_model::LanguageModelRegistry;
  5use reqwest_client::ReqwestClient;
  6use serde::Deserialize;
  7use std::{
  8    fs,
  9    path::{Path, PathBuf},
 10    sync::Arc,
 11};
 12mod agent;
 13
 14#[derive(Debug, Deserialize)]
 15pub struct ExampleBase {
 16    pub path: PathBuf,
 17    pub revision: String,
 18}
 19
 20#[derive(Debug)]
 21pub struct Example {
 22    pub base: ExampleBase,
 23
 24    /// Content of the prompt.md file
 25    pub prompt: String,
 26
 27    /// Content of the rubric.md file
 28    pub rubric: String,
 29}
 30
 31impl Example {
 32    /// Load an example from a directory containing base.toml, prompt.md, and rubric.md
 33    pub fn load_from_directory<P: AsRef<Path>>(dir_path: P) -> Result<Self> {
 34        let base_path = dir_path.as_ref().join("base.toml");
 35        let prompt_path = dir_path.as_ref().join("prompt.md");
 36        let rubric_path = dir_path.as_ref().join("rubric.md");
 37
 38        let mut base: ExampleBase = toml::from_str(&fs::read_to_string(&base_path)?)?;
 39        base.path = base.path.canonicalize()?;
 40
 41        Ok(Example {
 42            base,
 43            prompt: fs::read_to_string(prompt_path)?,
 44            rubric: fs::read_to_string(rubric_path)?,
 45        })
 46    }
 47
 48    /// Set up the example by checking out the specified Git revision
 49    pub fn setup(&self) -> Result<()> {
 50        use std::process::Command;
 51
 52        // Check if the directory exists
 53        let path = Path::new(&self.base.path);
 54        anyhow::ensure!(path.exists(), "Path does not exist: {:?}", self.base.path);
 55
 56        // Change to the project directory and checkout the specified revision
 57        let output = Command::new("git")
 58            .current_dir(&self.base.path)
 59            .arg("checkout")
 60            .arg(&self.base.revision)
 61            .output()?;
 62        anyhow::ensure!(
 63            output.status.success(),
 64            "Failed to checkout revision {}: {}",
 65            self.base.revision,
 66            String::from_utf8_lossy(&output.stderr),
 67        );
 68
 69        Ok(())
 70    }
 71}
 72
 73fn main() {
 74    env_logger::init();
 75    let http_client = Arc::new(ReqwestClient::new());
 76    let app = Application::headless().with_http_client(http_client.clone());
 77
 78    app.run(move |cx| {
 79        let app_state = crate::agent::init(cx);
 80        let _agent = Agent::new(app_state, cx);
 81
 82        let model = agent::find_model("claude-3-7-sonnet-thinking-latest", cx).unwrap();
 83
 84        LanguageModelRegistry::global(cx).update(cx, |registry, cx| {
 85            registry.set_default_model(Some(model.clone()), cx);
 86        });
 87
 88        let model_provider_id = model.provider_id();
 89
 90        let authenticate = agent::authenticate_model_provider(model_provider_id.clone(), cx);
 91
 92        cx.spawn(async move |_cx| {
 93            authenticate.await.unwrap();
 94        })
 95        .detach();
 96    });
 97
 98    // let example =
 99    //     Example::load_from_directory("./crates/eval/examples/find_and_replace_diff_card")?;
100    // example.setup()?;
101}