zeta eval: `--repeat` flag (#42569)

Agus Zubiaga , Ben Kunkle , and Michael created

Adds a `--repeat` flag to the zeta eval that runs each example as many
times as specified. Also makes the output nicer in a few ways.

Release Notes:

- N/A

---------

Co-authored-by: Ben Kunkle <ben@zed.dev>
Co-authored-by: Michael <michael@zed.dev>

Change summary

crates/zeta2/src/xml_edits.rs   |   4 
crates/zeta2/src/zeta2.rs       |  31 +++--
crates/zeta_cli/src/evaluate.rs | 195 ++++++++++++++++++++++++++--------
crates/zeta_cli/src/example.rs  |  84 ++++++++++++++
crates/zeta_cli/src/main.rs     |  26 +++-
crates/zeta_cli/src/paths.rs    |  29 +++-
crates/zeta_cli/src/predict.rs  | 122 +++++++++------------
7 files changed, 344 insertions(+), 147 deletions(-)

Detailed changes

crates/zeta2/src/xml_edits.rs šŸ”—

@@ -79,7 +79,7 @@ fn resolve_new_text_old_text_in_buffer(
             }
         }
         offset.ok_or_else(|| {
-            #[cfg(debug_assertions)]
+            #[cfg(any(debug_assertions, feature = "eval-support"))]
             if let Some(closest_match) = closest_old_text_match(buffer, old_text) {
                 log::info!(
                     "Closest `old_text` match: {}",
@@ -102,7 +102,7 @@ fn resolve_new_text_old_text_in_buffer(
         }))
 }
 
-#[cfg(debug_assertions)]
+#[cfg(any(debug_assertions, feature = "eval-support"))]
 fn closest_old_text_match(buffer: &TextBufferSnapshot, old_text: &str) -> Option<String> {
     let buffer_text = buffer.text();
     let len = old_text.len();

crates/zeta2/src/zeta2.rs šŸ”—

@@ -208,7 +208,7 @@ pub struct ZetaSearchQueryDebugInfo {
 pub type RequestDebugInfo = predict_edits_v3::DebugInfo;
 
 struct ZetaProject {
-    syntax_index: Entity<SyntaxIndex>,
+    syntax_index: Option<Entity<SyntaxIndex>>,
     events: VecDeque<Event>,
     registered_buffers: HashMap<gpui::EntityId, RegisteredBuffer>,
     current_prediction: Option<CurrentEditPrediction>,
@@ -445,9 +445,13 @@ impl Zeta {
         self.projects
             .entry(project.entity_id())
             .or_insert_with(|| ZetaProject {
-                syntax_index: cx.new(|cx| {
-                    SyntaxIndex::new(project, self.options.file_indexing_parallelism, cx)
-                }),
+                syntax_index: if let ContextMode::Syntax(_) = &self.options.context {
+                    Some(cx.new(|cx| {
+                        SyntaxIndex::new(project, self.options.file_indexing_parallelism, cx)
+                    }))
+                } else {
+                    None
+                },
                 events: VecDeque::new(),
                 registered_buffers: HashMap::default(),
                 current_prediction: None,
@@ -685,10 +689,11 @@ impl Zeta {
     ) -> Task<Result<Option<EditPrediction>>> {
         let project_state = self.projects.get(&project.entity_id());
 
-        let index_state = project_state.map(|state| {
+        let index_state = project_state.and_then(|state| {
             state
                 .syntax_index
-                .read_with(cx, |index, _cx| index.state().clone())
+                .as_ref()
+                .map(|syntax_index| syntax_index.read_with(cx, |index, _cx| index.state().clone()))
         });
         let options = self.options.clone();
         let active_snapshot = active_buffer.read(cx).snapshot();
@@ -1555,10 +1560,11 @@ impl Zeta {
     ) -> Task<Result<predict_edits_v3::PredictEditsRequest>> {
         let project_state = self.projects.get(&project.entity_id());
 
-        let index_state = project_state.map(|state| {
+        let index_state = project_state.and_then(|state| {
             state
                 .syntax_index
-                .read_with(cx, |index, _cx| index.state().clone())
+                .as_ref()
+                .map(|index| index.read_with(cx, |index, _cx| index.state().clone()))
         });
         let options = self.options.clone();
         let snapshot = buffer.read(cx).snapshot();
@@ -1628,10 +1634,11 @@ impl Zeta {
         cx: &mut App,
     ) -> Task<Result<()>> {
         let zeta_project = self.get_or_init_zeta_project(project, cx);
-        zeta_project
-            .syntax_index
-            .read(cx)
-            .wait_for_initial_file_indexing(cx)
+        if let Some(syntax_index) = &zeta_project.syntax_index {
+            syntax_index.read(cx).wait_for_initial_file_indexing(cx)
+        } else {
+            Task::ready(Ok(()))
+        }
     }
 }
 

crates/zeta_cli/src/evaluate.rs šŸ”—

@@ -1,14 +1,16 @@
 use std::{
-    io::IsTerminal,
-    path::{Path, PathBuf},
+    io::{IsTerminal, Write},
+    path::PathBuf,
     sync::Arc,
 };
 
 use anyhow::Result;
 use clap::Args;
 use collections::HashSet;
-use gpui::AsyncApp;
-use zeta2::udiff::DiffLine;
+use gpui::{AsyncApp, Entity};
+use project::Project;
+use util::ResultExt as _;
+use zeta2::{Zeta, udiff::DiffLine};
 
 use crate::{
     PromptFormat,
@@ -27,6 +29,8 @@ pub struct EvaluateArguments {
     use_expected_context: bool,
     #[clap(long, value_enum, default_value_t = CacheMode::default())]
     cache: CacheMode,
+    #[clap(short, long, default_value_t = 1, alias = "repeat")]
+    repetitions: u16,
 }
 
 pub async fn run_evaluate(
@@ -34,75 +38,169 @@ pub async fn run_evaluate(
     app_state: &Arc<ZetaCliAppState>,
     cx: &mut AsyncApp,
 ) {
-    let example_len = args.example_paths.len();
+    if args.example_paths.is_empty() {
+        eprintln!("No examples provided");
+        return;
+    }
     let all_tasks = args.example_paths.into_iter().map(|path| {
         let app_state = app_state.clone();
+        let example = NamedExample::load(&path).unwrap();
+
         cx.spawn(async move |cx| {
-            run_evaluate_one(
-                &path,
-                args.prompt_format,
-                args.use_expected_context,
-                args.cache,
-                app_state.clone(),
-                cx,
-            )
-            .await
+            let (project, zetas, _edited_buffers) = example
+                .setup_project(&app_state, args.repetitions, cx)
+                .await
+                .unwrap();
+
+            let tasks = zetas.into_iter().enumerate().map(|(repetition_ix, zeta)| {
+                let repetition_ix = (args.repetitions > 1).then(|| repetition_ix as u16);
+
+                let example = example.clone();
+                let project = project.clone();
+
+                cx.spawn(async move |cx| {
+                    let name = example.name.clone();
+                    run_evaluate_one(
+                        example,
+                        repetition_ix,
+                        project,
+                        zeta,
+                        args.prompt_format,
+                        args.use_expected_context,
+                        args.cache,
+                        cx,
+                    )
+                    .await
+                    .map_err(|err| (err, name, repetition_ix))
+                })
+            });
+            futures::future::join_all(tasks).await
         })
     });
-    let all_results = futures::future::try_join_all(all_tasks).await;
-
-    if let Ok(all_results) = &all_results {
-        let aggregated_result = EvaluationResult {
-            context: Scores::aggregate(all_results.iter().map(|r| &r.context)),
-            edit_prediction: Scores::aggregate(all_results.iter().map(|r| &r.edit_prediction)),
-        };
-
-        if example_len > 1 {
-            println!("\n{}", "-".repeat(80));
-            println!("\n## TOTAL SCORES");
-            println!("{}", aggregated_result.to_markdown());
+    let all_results = futures::future::join_all(all_tasks).await;
+
+    write_aggregated_scores(&mut std::io::stdout(), &all_results).unwrap();
+    if let Some(mut output_file) =
+        std::fs::File::create(crate::paths::RUN_DIR.join("aggregated_results.md")).log_err()
+    {
+        write_aggregated_scores(&mut output_file, &all_results).log_err();
+    };
+    print_run_data_dir(args.repetitions == 1);
+}
+
+fn write_aggregated_scores(
+    w: &mut impl std::io::Write,
+    all_results: &Vec<Vec<Result<EvaluationResult, (anyhow::Error, String, Option<u16>)>>>,
+) -> Result<()> {
+    let mut successful = Vec::new();
+    let mut failed_count = 0;
+    writeln!(w, "## Errors\n")?;
+    for result in all_results.iter().flatten() {
+        match result {
+            Ok(eval_result) => successful.push(eval_result),
+            Err((err, name, repetition_ix)) => {
+                failed_count += 1;
+                let err = err
+                    .to_string()
+                    .replace("<edits", "```xml\n<edits")
+                    .replace("</edits>", "</edits>\n```");
+                writeln!(
+                    w,
+                    "### ERROR {name}{}\n\n{err}\n",
+                    repetition_ix
+                        .map(|ix| format!(" [RUN {ix:03}]"))
+                        .unwrap_or_default()
+                )?;
+            }
         }
     }
-
-    print_run_data_dir();
-
-    all_results.unwrap();
+    let aggregated_result = EvaluationResult {
+        context: Scores::aggregate(successful.iter().map(|r| &r.context)),
+        edit_prediction: Scores::aggregate(successful.iter().map(|r| &r.edit_prediction)),
+    };
+
+    writeln!(w, "\n{}", "-".repeat(80))?;
+    writeln!(w, "\n## TOTAL SCORES")?;
+    writeln!(w, "\n### Success Rate")?;
+    writeln!(
+        w,
+        "\nCongratulations! {}/{} ({:.2}%) of runs weren't outright failures šŸŽ‰",
+        successful.len(),
+        successful.len() + failed_count,
+        (successful.len() as f64 / (successful.len() + failed_count) as f64) * 100.0
+    )?;
+    writeln!(w, "{}", aggregated_result)?;
+
+    Ok(())
 }
 
 pub async fn run_evaluate_one(
-    example_path: &Path,
+    example: NamedExample,
+    repetition_ix: Option<u16>,
+    project: Entity<Project>,
+    zeta: Entity<Zeta>,
     prompt_format: PromptFormat,
     use_expected_context: bool,
     cache_mode: CacheMode,
-    app_state: Arc<ZetaCliAppState>,
     cx: &mut AsyncApp,
 ) -> Result<EvaluationResult> {
-    let example = NamedExample::load(&example_path).unwrap();
-    let predictions = zeta2_predict(
+    let predict_result = zeta2_predict(
         example.clone(),
+        project,
+        zeta,
+        repetition_ix,
         prompt_format,
         use_expected_context,
         cache_mode,
-        &app_state,
         cx,
     )
-    .await
-    .unwrap();
+    .await?;
+
+    let evaluation_result = evaluate(&example.example, &predict_result);
+
+    if repetition_ix.is_none() {
+        write_eval_result(
+            &example,
+            &predict_result,
+            &evaluation_result,
+            &mut std::io::stdout(),
+        )?;
+    }
 
-    let evaluation_result = evaluate(&example.example, &predictions);
+    if let Some(mut results_file) =
+        std::fs::File::create(predict_result.run_example_dir.join("results.md")).log_err()
+    {
+        write_eval_result(
+            &example,
+            &predict_result,
+            &evaluation_result,
+            &mut results_file,
+        )
+        .log_err();
+    }
+
+    anyhow::Ok(evaluation_result)
+}
 
-    println!(
+fn write_eval_result(
+    example: &NamedExample,
+    predictions: &PredictionDetails,
+    evaluation_result: &EvaluationResult,
+    out: &mut impl Write,
+) -> Result<()> {
+    writeln!(
+        out,
         "## Expected edit prediction:\n\n```diff\n{}\n```\n",
         compare_diffs(&example.example.expected_patch, &predictions.diff)
-    );
-    println!(
+    )?;
+    writeln!(
+        out,
         "## Actual edit prediction:\n\n```diff\n{}\n```\n",
         compare_diffs(&predictions.diff, &example.example.expected_patch)
-    );
-
-    println!("{}", evaluation_result.to_markdown());
+    )?;
+    writeln!(out, "{}", evaluation_result)?;
 
-    anyhow::Ok(evaluation_result)
+    anyhow::Ok(())
 }
 
 #[derive(Debug, Default)]
@@ -194,9 +292,10 @@ False Negatives : {}",
     }
 }
 
-impl EvaluationResult {
-    pub fn to_markdown(&self) -> String {
-        format!(
+impl std::fmt::Display for EvaluationResult {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
             r#"
 ### Context Scores
 {}

crates/zeta_cli/src/example.rs šŸ”—

@@ -6,9 +6,10 @@ use std::{
     io::Write,
     mem,
     path::{Path, PathBuf},
-    sync::Arc,
+    sync::{Arc, OnceLock},
 };
 
+use crate::headless::ZetaCliAppState;
 use anyhow::{Context as _, Result, anyhow};
 use clap::ValueEnum;
 use cloud_zeta2_prompt::CURSOR_MARKER;
@@ -18,13 +19,14 @@ use futures::{
     AsyncWriteExt as _,
     lock::{Mutex, OwnedMutexGuard},
 };
-use gpui::{AsyncApp, Entity, http_client::Url};
+use futures::{FutureExt as _, future::Shared};
+use gpui::{AppContext as _, AsyncApp, Entity, Task, http_client::Url};
 use language::{Anchor, Buffer};
 use project::{Project, ProjectPath};
 use pulldown_cmark::CowStr;
 use serde::{Deserialize, Serialize};
 use util::{paths::PathStyle, rel_path::RelPath};
-use zeta2::udiff::OpenedBuffers;
+use zeta2::{Zeta, udiff::OpenedBuffers};
 
 use crate::paths::{REPOS_DIR, WORKTREES_DIR};
 
@@ -311,6 +313,82 @@ impl NamedExample {
         }
     }
 
+    pub async fn setup_project<'a>(
+        &'a self,
+        app_state: &Arc<ZetaCliAppState>,
+        repetitions: u16,
+        cx: &mut AsyncApp,
+    ) -> Result<(Entity<Project>, Vec<Entity<Zeta>>, OpenedBuffers<'a>)> {
+        let worktree_path = self.setup_worktree().await?;
+
+        static AUTHENTICATED: OnceLock<Shared<Task<()>>> = OnceLock::new();
+
+        AUTHENTICATED
+            .get_or_init(|| {
+                let client = app_state.client.clone();
+                cx.spawn(async move |cx| {
+                    client
+                        .sign_in_with_optional_connect(true, cx)
+                        .await
+                        .unwrap();
+                })
+                .shared()
+            })
+            .clone()
+            .await;
+
+        let project = cx.update(|cx| {
+            Project::local(
+                app_state.client.clone(),
+                app_state.node_runtime.clone(),
+                app_state.user_store.clone(),
+                app_state.languages.clone(),
+                app_state.fs.clone(),
+                None,
+                cx,
+            )
+        })?;
+
+        let worktree = project
+            .update(cx, |project, cx| {
+                project.create_worktree(&worktree_path, true, cx)
+            })?
+            .await?;
+        worktree
+            .read_with(cx, |worktree, _cx| {
+                worktree.as_local().unwrap().scan_complete()
+            })?
+            .await;
+
+        let buffer_store = project.read_with(cx, |project, _| project.buffer_store().clone())?;
+
+        let zetas = (0..repetitions)
+            .map(|_| {
+                let zeta = cx.new(|cx| {
+                    zeta2::Zeta::new(app_state.client.clone(), app_state.user_store.clone(), cx)
+                })?;
+
+                cx.subscribe(&buffer_store, {
+                    let project = project.clone();
+                    let zeta = zeta.clone();
+                    move |_, event, cx| match event {
+                        project::buffer_store::BufferStoreEvent::BufferAdded(buffer) => {
+                            zeta.update(cx, |zeta, cx| zeta.register_buffer(&buffer, &project, cx));
+                        }
+                        _ => {}
+                    }
+                })?
+                .detach();
+
+                anyhow::Ok(zeta)
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let edited_buffers = self.apply_edit_history(&project, cx).await?;
+
+        anyhow::Ok((project, zetas, edited_buffers))
+    }
+
     pub async fn setup_worktree(&self) -> Result<PathBuf> {
         let (repo_owner, repo_name) = self.repo_name()?;
         let file_name = self.file_name();

crates/zeta_cli/src/main.rs šŸ”—

@@ -35,8 +35,10 @@ use crate::util::{open_buffer, open_buffer_with_language_server};
 #[derive(Parser, Debug)]
 #[command(name = "zeta")]
 struct ZetaCliArgs {
+    #[arg(long, default_value_t = false)]
+    printenv: bool,
     #[command(subcommand)]
-    command: Command,
+    command: Option<Command>,
 }
 
 #[derive(Subcommand, Debug)]
@@ -413,14 +415,22 @@ fn main() {
         let app_state = Arc::new(headless::init(cx));
         cx.spawn(async move |cx| {
             match args.command {
-                Command::Zeta1 {
+                None => {
+                    if args.printenv {
+                        ::util::shell_env::print_env();
+                        return;
+                    } else {
+                        panic!("Expected a command");
+                    }
+                }
+                Some(Command::Zeta1 {
                     command: Zeta1Command::Context { context_args },
-                } => {
+                }) => {
                     let context = zeta1_context(context_args, &app_state, cx).await.unwrap();
                     let result = serde_json::to_string_pretty(&context.body).unwrap();
                     println!("{}", result);
                 }
-                Command::Zeta2 { command } => match command {
+                Some(Command::Zeta2 { command }) => match command {
                     Zeta2Command::Predict(arguments) => {
                         run_zeta2_predict(arguments, &app_state, cx).await;
                     }
@@ -464,14 +474,16 @@ fn main() {
                         println!("{}", result.unwrap());
                     }
                 },
-                Command::ConvertExample {
+                Some(Command::ConvertExample {
                     path,
                     output_format,
-                } => {
+                }) => {
                     let example = NamedExample::load(path).unwrap();
                     example.write(output_format, io::stdout()).unwrap();
                 }
-                Command::Clean => std::fs::remove_dir_all(&*crate::paths::TARGET_ZETA_DIR).unwrap(),
+                Some(Command::Clean) => {
+                    std::fs::remove_dir_all(&*crate::paths::TARGET_ZETA_DIR).unwrap()
+                }
             };
 
             let _ = cx.update(|cx| cx.quit());

crates/zeta_cli/src/paths.rs šŸ”—

@@ -13,28 +13,41 @@ pub static RUN_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
 pub static LATEST_EXAMPLE_RUN_DIR: LazyLock<PathBuf> =
     LazyLock::new(|| TARGET_ZETA_DIR.join("latest"));
 
-pub fn print_run_data_dir() {
+pub fn print_run_data_dir(deep: bool) {
     println!("\n## Run Data\n");
+    let mut files = Vec::new();
 
     let current_dir = std::env::current_dir().unwrap();
     for file in std::fs::read_dir(&*RUN_DIR).unwrap() {
         let file = file.unwrap();
-        if file.file_type().unwrap().is_dir() {
+        if file.file_type().unwrap().is_dir() && deep {
             for file in std::fs::read_dir(file.path()).unwrap() {
                 let path = file.unwrap().path();
                 let path = path.strip_prefix(&current_dir).unwrap_or(&path);
-                println!(
+                files.push(format!(
                     "- {}/\x1b[34m{}\x1b[0m",
                     path.parent().unwrap().display(),
                     path.file_name().unwrap().display(),
-                );
+                ));
             }
         } else {
             let path = file.path();
-            println!(
-                "- {} ",
-                path.strip_prefix(&current_dir).unwrap_or(&path).display()
-            );
+            let path = path.strip_prefix(&current_dir).unwrap_or(&path);
+            files.push(format!(
+                "- {}/\x1b[34m{}\x1b[0m",
+                path.parent().unwrap().display(),
+                path.file_name().unwrap().display(),
+            ));
         }
     }
+    files.sort();
+
+    for file in files {
+        println!("{}", file);
+    }
+
+    println!(
+        "\nšŸ’” Tip of the day: {} always points to the latest run\n",
+        LATEST_EXAMPLE_RUN_DIR.display()
+    );
 }

crates/zeta_cli/src/predict.rs šŸ”—

@@ -12,7 +12,6 @@ use gpui::{AppContext, AsyncApp, Entity};
 use language::{Anchor, Buffer, Point};
 use project::Project;
 use serde::Deserialize;
-use std::cell::Cell;
 use std::fs;
 use std::io::Write;
 use std::ops::Range;
@@ -20,7 +19,7 @@ use std::path::PathBuf;
 use std::sync::Arc;
 use std::sync::Mutex;
 use std::time::{Duration, Instant};
-use zeta2::{EvalCache, EvalCacheEntryKind, EvalCacheKey};
+use zeta2::{EvalCache, EvalCacheEntryKind, EvalCacheKey, Zeta};
 
 #[derive(Debug, Args)]
 pub struct PredictArguments {
@@ -35,10 +34,12 @@ pub struct PredictArguments {
     cache: CacheMode,
 }
 
-#[derive(Debug, ValueEnum, Default, Clone, Copy)]
+#[derive(Debug, ValueEnum, Default, Clone, Copy, PartialEq)]
 pub enum CacheMode {
-    /// Use cached LLM requests and responses, based on the hash of the prompt and the endpoint.
+    /// Use cached LLM requests and responses, except when multiple repetitions are requested
     #[default]
+    Auto,
+    /// Use cached LLM requests and responses, based on the hash of the prompt and the endpoint.
     #[value(alias = "request")]
     Requests,
     /// Ignore existing cache entries for both LLM and search.
@@ -50,12 +51,22 @@ pub enum CacheMode {
 
 impl CacheMode {
     fn use_cached_llm_responses(&self) -> bool {
+        self.assert_not_auto();
         matches!(self, CacheMode::Requests | CacheMode::Force)
     }
 
     fn use_cached_search_results(&self) -> bool {
+        self.assert_not_auto();
         matches!(self, CacheMode::Force)
     }
+
+    fn assert_not_auto(&self) {
+        assert_ne!(
+            *self,
+            CacheMode::Auto,
+            "Cache mode should not be auto at this point!"
+        );
+    }
 }
 
 #[derive(clap::ValueEnum, Debug, Clone)]
@@ -71,72 +82,49 @@ pub async fn run_zeta2_predict(
     cx: &mut AsyncApp,
 ) {
     let example = NamedExample::load(args.example_path).unwrap();
+    let (project, mut zetas, _edited_buffers) =
+        example.setup_project(app_state, 1, cx).await.unwrap();
     let result = zeta2_predict(
         example,
+        project,
+        zetas.remove(0),
+        None,
         args.prompt_format,
         args.use_expected_context,
         args.cache,
-        &app_state,
         cx,
     )
     .await
     .unwrap();
     result.write(args.format, std::io::stdout()).unwrap();
 
-    print_run_data_dir();
-}
-
-thread_local! {
-    static AUTHENTICATED: Cell<bool> = const { Cell::new(false) };
+    print_run_data_dir(true);
 }
 
 pub async fn zeta2_predict(
     example: NamedExample,
+    project: Entity<Project>,
+    zeta: Entity<Zeta>,
+    repetition_ix: Option<u16>,
     prompt_format: PromptFormat,
     use_expected_context: bool,
-    cache_mode: CacheMode,
-    app_state: &Arc<ZetaCliAppState>,
+    mut cache_mode: CacheMode,
     cx: &mut AsyncApp,
 ) -> Result<PredictionDetails> {
-    let worktree_path = example.setup_worktree().await?;
-
-    if !AUTHENTICATED.get() {
-        AUTHENTICATED.set(true);
-
-        app_state
-            .client
-            .sign_in_with_optional_connect(true, cx)
-            .await?;
+    if repetition_ix.is_some() {
+        if cache_mode != CacheMode::Auto && cache_mode != CacheMode::Skip {
+            panic!("Repetitions are not supported in Auto cache mode");
+        } else {
+            cache_mode = CacheMode::Skip;
+        }
+    } else if cache_mode == CacheMode::Auto {
+        cache_mode = CacheMode::Requests;
     }
 
-    let project = cx.update(|cx| {
-        Project::local(
-            app_state.client.clone(),
-            app_state.node_runtime.clone(),
-            app_state.user_store.clone(),
-            app_state.languages.clone(),
-            app_state.fs.clone(),
-            None,
-            cx,
-        )
-    })?;
-
-    let buffer_store = project.read_with(cx, |project, _| project.buffer_store().clone())?;
-
-    let worktree = project
-        .update(cx, |project, cx| {
-            project.create_worktree(&worktree_path, true, cx)
-        })?
-        .await?;
-    worktree
-        .read_with(cx, |worktree, _cx| {
-            worktree.as_local().unwrap().scan_complete()
-        })?
-        .await;
-
-    let zeta = cx.update(|cx| zeta2::Zeta::global(&app_state.client, &app_state.user_store, cx))?;
-
-    let example_run_dir = RUN_DIR.join(&example.file_name());
+    let mut example_run_dir = RUN_DIR.join(&example.file_name());
+    if let Some(repetition_ix) = repetition_ix {
+        example_run_dir = example_run_dir.join(format!("{:03}", repetition_ix));
+    }
     fs::create_dir_all(&example_run_dir)?;
     if LATEST_EXAMPLE_RUN_DIR.exists() {
         fs::remove_file(&*LATEST_EXAMPLE_RUN_DIR)?;
@@ -157,23 +145,9 @@ pub async fn zeta2_predict(
         }));
     })?;
 
-    cx.subscribe(&buffer_store, {
-        let project = project.clone();
-        move |_, event, cx| match event {
-            project::buffer_store::BufferStoreEvent::BufferAdded(buffer) => {
-                zeta2::Zeta::try_global(cx)
-                    .unwrap()
-                    .update(cx, |zeta, cx| zeta.register_buffer(&buffer, &project, cx));
-            }
-            _ => {}
-        }
-    })?
-    .detach();
-
-    let _edited_buffers = example.apply_edit_history(&project, cx).await?;
     let (cursor_buffer, cursor_anchor) = example.cursor_position(&project, cx).await?;
 
-    let result = Arc::new(Mutex::new(PredictionDetails::default()));
+    let result = Arc::new(Mutex::new(PredictionDetails::new(example_run_dir.clone())));
     let mut debug_rx = zeta.update(cx, |zeta, _| zeta.debug_info())?;
 
     let debug_task = cx.background_spawn({
@@ -397,7 +371,7 @@ impl EvalCache for RunCache {
                 self.link_to_run(&key);
                 Some(fs::read_to_string(path).unwrap())
             } else {
-                log::info!("Skipping cached entry: {}", path.display());
+                log::trace!("Skipping cached entry: {}", path.display());
                 None
             }
         } else if matches!(self.cache_mode, CacheMode::Force) {
@@ -417,14 +391,14 @@ impl EvalCache for RunCache {
         fs::write(&input_path, input).unwrap();
 
         let output_path = RunCache::output_cache_path(&key);
-        log::info!("Writing cache entry: {}", output_path.display());
+        log::trace!("Writing cache entry: {}", output_path.display());
         fs::write(&output_path, output).unwrap();
 
         self.link_to_run(&key);
     }
 }
 
-#[derive(Clone, Debug, Default, Serialize, Deserialize)]
+#[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct PredictionDetails {
     pub diff: String,
     pub excerpts: Vec<ActualExcerpt>,
@@ -433,9 +407,23 @@ pub struct PredictionDetails {
     pub running_search_time: Option<Duration>,
     pub prediction_time: Duration,
     pub total_time: Duration,
+    pub run_example_dir: PathBuf,
 }
 
 impl PredictionDetails {
+    pub fn new(run_example_dir: PathBuf) -> Self {
+        Self {
+            diff: Default::default(),
+            excerpts: Default::default(),
+            excerpts_text: Default::default(),
+            planning_search_time: Default::default(),
+            running_search_time: Default::default(),
+            prediction_time: Default::default(),
+            total_time: Default::default(),
+            run_example_dir,
+        }
+    }
+
     pub fn write(&self, format: PredictionsOutputFormat, mut out: impl Write) -> Result<()> {
         let formatted = match format {
             PredictionsOutputFormat::Md => self.to_markdown(),