Fix handling of excerpt regions in EP CLI (#49936)

Max Brunsfeld and Ben Kunkle created 1 month ago

Previously, we were not computing excerpt regions correctly for EP
examples captured from prod. This PR fixes that, and also simplifies the
data flow in the EP CLI. Examples either come from a concise spec (like
the markdown evals), or are collected from prod. Either way, we compute
from them a `ZetaPromptInput`, and the downstream steps like
prompt-formatting and scoring are derived from that.

Release Notes:

- N/A

---------

Co-authored-by: Ben Kunkle <ben@zed.dev>

Change summary

crates/edit_prediction/src/capture_example.rs       |  98 -----
crates/edit_prediction/src/example_spec.rs          |  78 ----
crates/edit_prediction_cli/src/example.rs           |  21 
crates/edit_prediction_cli/src/format_prompt.rs     |  85 +---
crates/edit_prediction_cli/src/load_project.rs      |  53 ++
crates/edit_prediction_cli/src/main.rs              |  15 
crates/edit_prediction_cli/src/parse_output.rs      |  14 
crates/edit_prediction_cli/src/predict.rs           |   4 
crates/edit_prediction_cli/src/pull_examples.rs     | 254 --------------
crates/edit_prediction_cli/src/qa.rs                |  29 
crates/edit_prediction_cli/src/retrieve_context.rs  |  49 --
crates/edit_prediction_cli/src/reversal_tracking.rs | 130 +++----
crates/edit_prediction_cli/src/score.rs             |   7 
crates/edit_prediction_cli/src/split_commit.rs      |   4 
crates/edit_prediction_cli/src/synthesize.rs        |   2 
crates/edit_prediction_ui/src/edit_prediction_ui.rs |  10 
crates/zeta_prompt/src/zeta_prompt.rs               |  12 
17 files changed, 191 insertions(+), 674 deletions(-)

Detailed changes

crates/edit_prediction/src/capture_example.rs 🔗

@@ -1,10 +1,6 @@
 use crate::{
-    StoredEvent,
-    cursor_excerpt::editable_and_context_ranges_for_cursor_position,
-    example_spec::{
-        CapturedEvent, CapturedPromptInput, CapturedRelatedExcerpt, CapturedRelatedFile,
-        ExampleSpec, MAX_CURSOR_FILE_SIZE,
-    },
+    StoredEvent, cursor_excerpt::editable_and_context_ranges_for_cursor_position,
+    example_spec::ExampleSpec,
 };
 use anyhow::Result;
 use buffer_diff::BufferDiffSnapshot;
@@ -13,14 +9,13 @@ use gpui::{App, Entity, Task};
 use language::{Buffer, ToPoint as _};
 use project::{Project, WorktreeId};
 use std::{collections::hash_map, fmt::Write as _, ops::Range, path::Path, sync::Arc};
-use text::{BufferSnapshot as TextBufferSnapshot, Point, ToOffset as _};
+use text::{BufferSnapshot as TextBufferSnapshot, Point};
 
 pub fn capture_example(
     project: Entity<Project>,
     buffer: Entity<Buffer>,
     cursor_anchor: language::Anchor,
     mut events: Vec<StoredEvent>,
-    related_files: Vec<zeta_prompt::RelatedFile>,
     populate_expected_patch: bool,
     cx: &mut App,
 ) -> Option<Task<Result<ExampleSpec>>> {
@@ -60,14 +55,6 @@ pub fn capture_example(
             .map(|s| s.to_string())
             .unwrap_or_default();
 
-        let full_cursor_offset = cursor_anchor.to_offset(&snapshot);
-        let cursor_point = cursor_anchor.to_point(&snapshot);
-        let cursor_file_content = if snapshot.len() <= MAX_CURSOR_FILE_SIZE {
-            Some(snapshot.text())
-        } else {
-            None
-        };
-
         let (cursor_excerpt, cursor_offset_in_excerpt, cursor_excerpt_range) = cx
             .background_executor()
             .spawn(async move { compute_cursor_excerpt(&snapshot, cursor_anchor) })
@@ -109,56 +96,6 @@ pub fn capture_example(
             rejected_patch = Some(empty_patch);
         }
 
-        let prompt_input = cursor_file_content.map(|content| {
-            let captured_events: Vec<CapturedEvent> = events
-                .iter()
-                .map(|stored_event| {
-                    let zeta_prompt::Event::BufferChange {
-                        path,
-                        old_path,
-                        diff,
-                        predicted,
-                        in_open_source_repo,
-                    } = stored_event.event.as_ref();
-                    CapturedEvent {
-                        path: strip_root_name(path, &root_name).into(),
-                        old_path: strip_root_name(old_path, &root_name).into(),
-                        diff: diff.clone(),
-                        predicted: *predicted,
-                        in_open_source_repo: *in_open_source_repo,
-                    }
-                })
-                .collect();
-
-            let captured_related_files: Vec<CapturedRelatedFile> = related_files
-                .iter()
-                .map(|rf| CapturedRelatedFile {
-                    path: strip_root_name(&rf.path, &root_name).into(),
-                    max_row: rf.max_row,
-                    excerpts: rf
-                        .excerpts
-                        .iter()
-                        .map(|e| CapturedRelatedExcerpt {
-                            row_range: e.row_range.clone(),
-                            text: e.text.to_string(),
-                        })
-                        .collect(),
-                })
-                .collect();
-
-            CapturedPromptInput {
-                cursor_file_content: content,
-                cursor_offset: full_cursor_offset,
-                cursor_row: cursor_point.row,
-                cursor_column: cursor_point.column,
-                excerpt_start_row: Some(0),
-                events: captured_events,
-                related_files: captured_related_files,
-                in_open_source_repo: false,
-                zed_version: None,
-            }
-        });
-
         let mut spec = ExampleSpec {
             name: generate_timestamp_name(),
             repository_url,
@@ -171,7 +108,6 @@ pub fn capture_example(
             edit_history,
             expected_patches,
             rejected_patch,
-            captured_prompt_input: prompt_input,
             telemetry: None,
             human_feedback: Vec::new(),
             rating: None,
@@ -466,7 +402,6 @@ mod tests {
                     buffer.clone(),
                     Anchor::MIN,
                     events,
-                    Vec::new(),
                     true,
                     cx,
                 )
@@ -584,38 +519,11 @@ mod tests {
                     "}
                     .to_string()
                 ),
-                captured_prompt_input: example.captured_prompt_input.clone(),
                 telemetry: None,
                 human_feedback: Vec::new(),
                 rating: None,
             }
         );
-
-        let prompt_input = example
-            .captured_prompt_input
-            .expect("should have captured prompt input");
-        assert!(
-            prompt_input.cursor_file_content.contains("fn main()"),
-            "cursor_file_content should contain file content"
-        );
-        assert_eq!(
-            prompt_input.cursor_offset, 0,
-            "cursor at Anchor::MIN should be offset 0"
-        );
-        assert_eq!(
-            prompt_input.cursor_row, 0,
-            "cursor at Anchor::MIN should be row 0"
-        );
-        assert_eq!(
-            prompt_input.cursor_column, 0,
-            "cursor at Anchor::MIN should be column 0"
-        );
-        assert!(prompt_input.events.len() > 0, "should have captured events");
-        assert_eq!(
-            prompt_input.related_files.len(),
-            0,
-            "should have no related files (none passed)"
-        );
     }
 
     fn init_test(cx: &mut TestAppContext) {

crates/edit_prediction/src/example_spec.rs 🔗

@@ -1,7 +1,7 @@
 use crate::udiff::DiffLine;
 use anyhow::{Context as _, Result};
 use serde::{Deserialize, Serialize};
-use std::{borrow::Cow, fmt::Write as _, mem, ops::Range, path::Path, sync::Arc};
+use std::{borrow::Cow, fmt::Write as _, mem, path::Path, sync::Arc};
 use telemetry_events::EditPredictionRating;
 
 pub const CURSOR_POSITION_MARKER: &str = "[CURSOR_POSITION]";
@@ -81,8 +81,6 @@ pub struct ExampleSpec {
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub rejected_patch: Option<String>,
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub captured_prompt_input: Option<CapturedPromptInput>,
-    #[serde(default, skip_serializing_if = "Option::is_none")]
     pub telemetry: Option<TelemetrySource>,
     #[serde(default, skip_serializing_if = "Vec::is_empty")]
     pub human_feedback: Vec<HumanFeedback>,
@@ -105,76 +103,6 @@ pub struct TelemetrySource {
     pub was_shown: bool,
 }
 
-/// All data needed to run format_prompt without loading the project.
-#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
-pub struct CapturedPromptInput {
-    pub cursor_file_content: String,
-    pub cursor_offset: usize,
-    pub cursor_row: u32,
-    pub cursor_column: u32,
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub excerpt_start_row: Option<u32>,
-    pub events: Vec<CapturedEvent>,
-    pub related_files: Vec<CapturedRelatedFile>,
-    #[serde(default)]
-    pub in_open_source_repo: bool,
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub zed_version: Option<String>,
-}
-
-#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
-pub struct CapturedEvent {
-    pub path: Arc<Path>,
-    pub old_path: Arc<Path>,
-    pub diff: String,
-    pub predicted: bool,
-    #[serde(default)]
-    pub in_open_source_repo: bool,
-}
-
-impl CapturedEvent {
-    pub fn to_event(&self) -> zeta_prompt::Event {
-        zeta_prompt::Event::BufferChange {
-            path: self.path.clone(),
-            old_path: self.old_path.clone(),
-            diff: self.diff.clone(),
-            predicted: self.predicted,
-            in_open_source_repo: self.in_open_source_repo,
-        }
-    }
-}
-
-#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
-pub struct CapturedRelatedFile {
-    pub path: Arc<Path>,
-    pub max_row: u32,
-    pub excerpts: Vec<CapturedRelatedExcerpt>,
-}
-
-impl CapturedRelatedFile {
-    pub fn to_related_file(&self) -> zeta_prompt::RelatedFile {
-        zeta_prompt::RelatedFile {
-            path: self.path.clone(),
-            max_row: self.max_row,
-            in_open_source_repo: false,
-            excerpts: self
-                .excerpts
-                .iter()
-                .map(|e| zeta_prompt::RelatedExcerpt {
-                    row_range: e.row_range.clone(),
-                    text: e.text.clone().into(),
-                })
-                .collect(),
-        }
-    }
-}
-
-#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
-pub struct CapturedRelatedExcerpt {
-    pub row_range: Range<u32>,
-    pub text: String,
-}
-
 const REASONING_HEADING: &str = "Reasoning";
 const UNCOMMITTED_DIFF_HEADING: &str = "Uncommitted Diff";
 const EDIT_HISTORY_HEADING: &str = "Edit History";
@@ -320,7 +248,6 @@ impl ExampleSpec {
             edit_history: String::new(),
             expected_patches: Vec::new(),
             rejected_patch: None,
-            captured_prompt_input: None,
             telemetry: None,
             human_feedback: Vec::new(),
             rating: None,
@@ -654,7 +581,6 @@ mod tests {
             edit_history: String::new(),
             expected_patches: Vec::new(),
             rejected_patch: None,
-            captured_prompt_input: None,
             telemetry: None,
             human_feedback: Vec::new(),
             rating: None,
@@ -791,7 +717,6 @@ mod tests {
             edit_history: String::new(),
             expected_patches: Vec::new(),
             rejected_patch: None,
-            captured_prompt_input: None,
             telemetry: None,
             human_feedback: Vec::new(),
             rating: None,
@@ -864,7 +789,6 @@ mod tests {
             edit_history: String::new(),
             expected_patches: Vec::new(),
             rejected_patch: None,
-            captured_prompt_input: None,
             telemetry: None,
             human_feedback: Vec::new(),
             rating: None,

crates/edit_prediction_cli/src/example.rs 🔗

@@ -15,9 +15,8 @@ use std::{
     collections::VecDeque,
     io::Read,
     path::{Path, PathBuf},
-    sync::Arc,
 };
-use zeta_prompt::RelatedFile;
+use zeta_prompt::ZetaPromptInput;
 
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct Example {
@@ -27,7 +26,7 @@ pub struct Example {
     /// The full content of the file where an edit is being predicted, and the
     /// actual cursor offset.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub prompt_inputs: Option<ExamplePromptInputs>,
+    pub prompt_inputs: Option<ZetaPromptInput>,
 
     /// The input and expected output from the edit prediction model.
     #[serde(skip_serializing_if = "Option::is_none")]
@@ -46,6 +45,9 @@ pub struct Example {
     #[serde(default, skip_serializing_if = "Vec::is_empty")]
     pub qa: Vec<Option<QaResult>>,
 
+    /// The Zed version used to generate this example.
+    pub zed_version: Option<String>,
+
     /// The application state used to process this example.
     #[serde(skip)]
     pub state: Option<ExampleState>,
@@ -59,18 +61,6 @@ pub struct ExampleState {
     pub _open_buffers: OpenedBuffers,
 }
 
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct ExamplePromptInputs {
-    pub content: String,
-    pub cursor_row: u32,
-    pub cursor_column: u32,
-    pub cursor_offset: usize,
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub excerpt_start_row: Option<u32>,
-    pub edit_history: Vec<Arc<zeta_prompt::Event>>,
-    pub related_files: Option<Vec<RelatedFile>>,
-}
-
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct ExamplePrompt {
     pub input: String,
@@ -340,5 +330,6 @@ fn parse_markdown_example(input: &str) -> Result<Example> {
         score: Vec::new(),
         qa: Vec::new(),
         state: None,
+        zed_version: None,
     })
 }

crates/edit_prediction_cli/src/format_prompt.rs 🔗

@@ -6,9 +6,8 @@ use crate::{
     retrieve_context::run_context_retrieval,
 };
 use anyhow::{Context as _, Result, anyhow};
-use edit_prediction::{cursor_excerpt::compute_excerpt_ranges, udiff};
-use gpui::{AppContext, AsyncApp};
-use language::{Buffer, Point};
+use edit_prediction::udiff;
+use gpui::AsyncApp;
 use similar::DiffableStr;
 use std::sync::Arc;
 use std::{fmt::Write as _, ops::Range};
@@ -31,23 +30,10 @@ pub async fn run_format_prompt(
         .as_ref()
         .context("prompt_inputs must be set after context retrieval")?;
 
-    let language = app_state
-        .languages
-        .load_language_for_file_path(&example.spec.cursor_path)
-        .await
-        .ok();
-    let snapshot_fut = cx.update(|cx| {
-        Buffer::build_snapshot(
-            prompt_inputs.content.as_str().into(),
-            language,
-            Some(app_state.languages.clone()),
-            cx,
-        )
-    });
-    let cursor_point = Point::new(prompt_inputs.cursor_row, prompt_inputs.cursor_column);
-    let snapshot = cx.background_spawn(snapshot_fut).await;
-
-    let (_, _, excerpt_ranges) = compute_excerpt_ranges(cursor_point, &snapshot);
+    let excerpt_ranges = prompt_inputs
+        .excerpt_ranges
+        .as_ref()
+        .context("prompt_inputs must have excerpt_ranges")?;
 
     match args.provider {
         PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
@@ -55,7 +41,7 @@ pub async fn run_format_prompt(
 
             let zeta_format = ZetaFormat::default();
             let (editable_range, context_range) =
-                excerpt_range_for_format(zeta_format, &excerpt_ranges);
+                excerpt_range_for_format(zeta_format, excerpt_ranges);
 
             let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
             example.prompt = Some(ExamplePrompt {
@@ -70,27 +56,25 @@ pub async fn run_format_prompt(
             step_progress.set_substatus("formatting zeta2 prompt");
 
             let (editable_range, context_range) =
-                excerpt_range_for_format(zeta_format, &excerpt_ranges);
+                excerpt_range_for_format(zeta_format, excerpt_ranges);
 
             let context_start = context_range.start;
-            let cursor_offset_in_excerpt = prompt_inputs.cursor_offset - context_start;
+            let cursor_offset_in_excerpt = prompt_inputs.cursor_offset_in_excerpt - context_start;
             let editable_range_in_excerpt =
                 (editable_range.start - context_start)..(editable_range.end - context_start);
             let input = zeta_prompt::ZetaPromptInput {
-                cursor_path: example.spec.cursor_path.clone(),
-                cursor_excerpt: prompt_inputs.content[context_range].to_string().into(),
+                cursor_path: prompt_inputs.cursor_path.clone(),
+                cursor_excerpt: prompt_inputs.cursor_excerpt[context_range]
+                    .to_string()
+                    .into(),
                 editable_range_in_excerpt,
                 cursor_offset_in_excerpt,
                 excerpt_start_row: prompt_inputs.excerpt_start_row,
-                events: prompt_inputs.edit_history.clone(),
-                related_files: prompt_inputs.related_files.clone().unwrap_or_default(),
-                excerpt_ranges: Some(excerpt_ranges),
+                events: prompt_inputs.events.clone(),
+                related_files: prompt_inputs.related_files.clone(),
+                excerpt_ranges: prompt_inputs.excerpt_ranges.clone(),
                 preferred_model: None,
-                in_open_source_repo: example
-                    .spec
-                    .captured_prompt_input
-                    .as_ref()
-                    .map_or(false, |input| input.in_open_source_repo),
+                in_open_source_repo: prompt_inputs.in_open_source_repo,
                 can_collect_data: false,
             };
             let prompt = format_zeta_prompt(&input, zeta_format);
@@ -241,14 +225,12 @@ impl TeacherPrompt {
             new_editable_region.insert(0, '\n');
         }
 
-        let (editable_region_offset, _) = prompt_inputs
-            .content
+        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
+        let (editable_region_offset, _) = excerpt
             .match_indices(&old_editable_region)
-            .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset))
+            .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
             .context("editable region not found in prompt content")?;
-        let editable_region_start_line = prompt_inputs.content[..editable_region_offset]
-            .matches('\n')
-            .count();
+        let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
 
         // Use full context so cursor offset (relative to editable region start) aligns with diff content
         let editable_region_lines = old_editable_region.lines().count() as u32;
@@ -273,7 +255,7 @@ impl TeacherPrompt {
                 &example.spec.cursor_path,
                 editable_region_cursor_offset,
                 &new_editable_region,
-                &prompt_inputs.content,
+                excerpt,
                 editable_region_offset,
                 editable_region_start_line,
             )
@@ -298,10 +280,7 @@ impl TeacherPrompt {
     }
 
     pub fn format_context(example: &Example) -> String {
-        let related_files = example
-            .prompt_inputs
-            .as_ref()
-            .and_then(|pi| pi.related_files.as_ref());
+        let related_files = example.prompt_inputs.as_ref().map(|pi| &pi.related_files);
 
         let Some(related_files) = related_files else {
             return "(No context)".to_string();
@@ -342,16 +321,18 @@ impl TeacherPrompt {
         let mut result = String::new();
 
         let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
+        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
+        let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
 
         let path_str = example.spec.cursor_path.to_string_lossy();
         result.push_str(&format!("`````{path_str}\n"));
-        result.push_str(&prompt_inputs.content[context_range.start..editable_range.start]);
+        result.push_str(&excerpt[context_range.start..editable_range.start]);
         result.push_str(Self::EDITABLE_REGION_START);
-        result.push_str(&prompt_inputs.content[editable_range.start..prompt_inputs.cursor_offset]);
+        result.push_str(&excerpt[editable_range.start..cursor_offset]);
         result.push_str(Self::USER_CURSOR_MARKER);
-        result.push_str(&prompt_inputs.content[prompt_inputs.cursor_offset..editable_range.end]);
+        result.push_str(&excerpt[cursor_offset..editable_range.end]);
         result.push_str(Self::EDITABLE_REGION_END);
-        result.push_str(&prompt_inputs.content[editable_range.end..context_range.end]);
+        result.push_str(&excerpt[editable_range.end..context_range.end]);
         result.push_str("\n`````");
 
         result
@@ -402,16 +383,16 @@ pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String>
 
     // Fallback: construct from prompt_inputs if available
     let prompt_inputs = example.prompt_inputs.as_ref()?;
-    let content = &prompt_inputs.content;
-    let cursor_offset = prompt_inputs.cursor_offset;
+    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
+    let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
 
     // Simple fallback: just show content around cursor with markers
     let path_str = example.spec.cursor_path.to_string_lossy();
     let mut result = format!("`````{path_str}\n");
     result.push_str(TeacherPrompt::EDITABLE_REGION_START);
-    result.push_str(&content[..cursor_offset]);
+    result.push_str(&excerpt[..cursor_offset]);
     result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
-    result.push_str(&content[cursor_offset..]);
+    result.push_str(&excerpt[cursor_offset..]);
     result.push_str(TeacherPrompt::EDITABLE_REGION_END);
     result.push_str("\n`````");

crates/edit_prediction_cli/src/load_project.rs 🔗

@@ -1,5 +1,5 @@
 use crate::{
-    example::{Example, ExamplePromptInputs, ExampleState},
+    example::{Example, ExampleState},
     git,
     headless::EpAppState,
     progress::{ExampleProgress, InfoStyle, Step, StepProgress},
@@ -7,6 +7,7 @@ use crate::{
 use anyhow::{Context as _, Result};
 use edit_prediction::{
     EditPredictionStore,
+    cursor_excerpt::compute_excerpt_ranges,
     udiff::{OpenedBuffers, refresh_worktree_entries, strip_diff_path_prefix},
 };
 use futures::AsyncWriteExt as _;
@@ -14,6 +15,7 @@ use gpui::{AsyncApp, Entity};
 use language::{Anchor, Buffer, LanguageNotFound, ToOffset, ToPoint};
 use project::{Project, ProjectPath, buffer_store::BufferStoreEvent};
 use std::{fs, path::PathBuf, sync::Arc};
+use zeta_prompt::ZetaPromptInput;
 
 pub async fn run_load_project(
     example: &mut Example,
@@ -58,7 +60,7 @@ pub async fn run_load_project(
         .read_with(&cx, |buffer, _| buffer.parsing_idle())
         .await;
 
-    let edit_history = ep_store.update(&mut cx, |store, cx| {
+    let events: Vec<Arc<zeta_prompt::Event>> = ep_store.update(&mut cx, |store, cx| {
         store
             .edit_history_for_project(&project, cx)
             .into_iter()
@@ -66,25 +68,46 @@ pub async fn run_load_project(
             .collect()
     });
 
+    let existing_related_files = example
+        .prompt_inputs
+        .take()
+        .map(|inputs| inputs.related_files)
+        .unwrap_or_default();
+
     let (prompt_inputs, language_name) = buffer.read_with(&cx, |buffer, _cx| {
-        let cursor_point = cursor_position.to_point(&buffer);
+        let snapshot = buffer.snapshot();
+        let cursor_point = cursor_position.to_point(&snapshot);
+        let cursor_offset = cursor_position.to_offset(&snapshot);
         let language_name = buffer
             .language()
             .map(|l| l.name().to_string())
             .unwrap_or_else(|| "Unknown".to_string());
+
+        let (full_context_point_range, full_context_offset_range, excerpt_ranges) =
+            compute_excerpt_ranges(cursor_point, &snapshot);
+
+        let cursor_excerpt: Arc<str> = buffer
+            .text_for_range(full_context_offset_range.clone())
+            .collect::<String>()
+            .into();
+        let cursor_offset_in_excerpt = cursor_offset - full_context_offset_range.start;
+        let excerpt_start_row = Some(full_context_point_range.start.row);
+
+        let editable_range_in_excerpt = excerpt_ranges.editable_350.clone();
+
         (
-            ExamplePromptInputs {
-                content: buffer.text(),
-                cursor_row: cursor_point.row,
-                cursor_column: cursor_point.column,
-                cursor_offset: cursor_position.to_offset(&buffer),
-                excerpt_start_row: Some(0),
-                edit_history,
-                related_files: example
-                    .prompt_inputs
-                    .take()
-                    .map(|inputs| inputs.related_files)
-                    .unwrap_or_default(),
+            ZetaPromptInput {
+                cursor_path: example.spec.cursor_path.clone(),
+                cursor_excerpt,
+                editable_range_in_excerpt,
+                cursor_offset_in_excerpt,
+                excerpt_start_row,
+                events,
+                related_files: existing_related_files,
+                excerpt_ranges: Some(excerpt_ranges),
+                preferred_model: None,
+                in_open_source_repo: false,
+                can_collect_data: false,
             },
             language_name,
         )

crates/edit_prediction_cli/src/main.rs 🔗

@@ -684,21 +684,6 @@ async fn load_examples(
     } else {
         let max_rows_per_timestamp = remaining_limit_for_snowflake.unwrap_or(5000);
 
-        if !captured_after_timestamps.is_empty() {
-            captured_after_timestamps.sort();
-
-            let mut captured_examples = pull_examples::fetch_captured_examples_after(
-                http_client.clone(),
-                &captured_after_timestamps,
-                max_rows_per_timestamp,
-                remaining_offset,
-                background_executor.clone(),
-                Some(MIN_CAPTURE_VERSION),
-            )
-            .await?;
-            examples.append(&mut captured_examples);
-        }
-
         if !rejected_after_timestamps.is_empty() {
             rejected_after_timestamps.sort();

crates/edit_prediction_cli/src/parse_output.rs 🔗

@@ -133,20 +133,18 @@ fn parse_zeta2_output(
     }
 
     let old_text_trimmed = old_text.trim_end_matches('\n');
-    let (editable_region_offset, _) = prompt_inputs
-        .content
+    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
+    let (editable_region_offset, _) = excerpt
         .match_indices(old_text_trimmed)
-        .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset))
+        .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
         .with_context(|| {
             format!(
                 "could not find editable region in content.\nLooking for:\n{}\n\nIn content:\n{}",
-                old_text_trimmed, &prompt_inputs.content
+                old_text_trimmed, excerpt
             )
         })?;
 
-    let editable_region_start_line = prompt_inputs.content[..editable_region_offset]
-        .matches('\n')
-        .count();
+    let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
 
     // Use full context so cursor offset (relative to editable region start) aligns with diff content
     let editable_region_lines = old_text_normalized.lines().count() as u32;
@@ -170,7 +168,7 @@ fn parse_zeta2_output(
             &example.spec.cursor_path,
             editable_region_cursor_offset,
             &new_text,
-            &prompt_inputs.content,
+            excerpt,
             editable_region_offset,
             editable_region_start_line,
         )

crates/edit_prediction_cli/src/predict.rs 🔗

@@ -53,11 +53,10 @@ pub async fn run_prediction(
         );
     };
 
-    run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
-
     if let PredictionProvider::Teacher(backend) | PredictionProvider::TeacherNonBatching(backend) =
         provider
     {
+        run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
         run_format_prompt(
             example,
             &FormatPromptArgs { provider },
@@ -81,6 +80,7 @@ pub async fn run_prediction(
     }
 
     run_load_project(example, app_state.clone(), example_progress, cx.clone()).await?;
+    run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
 
     let step_progress = example_progress.start(Step::Predict);

crates/edit_prediction_cli/src/pull_examples.rs 🔗

@@ -15,15 +15,11 @@ use zeta_prompt::ZetaPromptInput;
 use crate::example::Example;
 use crate::progress::{InfoStyle, Progress, Step};
 const EDIT_PREDICTION_DEPLOYMENT_EVENT: &str = "Edit Prediction Deployment";
-use edit_prediction::example_spec::{
-    CapturedEvent, CapturedPromptInput, CapturedRelatedExcerpt, CapturedRelatedFile, ExampleSpec,
-    TelemetrySource,
-};
+use edit_prediction::example_spec::{ExampleSpec, TelemetrySource};
 use std::fmt::Write as _;
 
 pub(crate) const SNOWFLAKE_SUCCESS_CODE: &str = "090001";
 pub(crate) const SNOWFLAKE_ASYNC_IN_PROGRESS_CODE: &str = "333334";
-const EDIT_PREDICTION_EXAMPLE_CAPTURED_EVENT: &str = "Edit Prediction Example Captured";
 const PREDICTIVE_EDIT_REQUESTED_EVENT: &str = "Predictive Edit Requested";
 const PREDICTIVE_EDIT_REJECTED_EVENT: &str = "Predictive Edit Rejected";
 const EDIT_PREDICTION_RATED_EVENT: &str = "Edit Prediction Rated";
@@ -71,135 +67,6 @@ pub fn parse_rated_after_input(input: &str) -> Option<(&str, Option<EditPredicti
     }
 }
 
-pub async fn fetch_captured_examples_after(
-    http_client: Arc<dyn HttpClient>,
-    after_timestamps: &[String],
-    max_rows_per_timestamp: usize,
-    offset: usize,
-    background_executor: BackgroundExecutor,
-    _min_capture_version: Option<MinCaptureVersion>,
-) -> Result<Vec<Example>> {
-    if after_timestamps.is_empty() {
-        return Ok(Vec::new());
-    }
-
-    let progress = Progress::global();
-
-    let token = std::env::var("EP_SNOWFLAKE_API_KEY")
-        .context("missing required environment variable EP_SNOWFLAKE_API_KEY")?;
-    let base_url = std::env::var("EP_SNOWFLAKE_BASE_URL").context(
-        "missing required environment variable EP_SNOWFLAKE_BASE_URL (e.g. https://<account>.snowflakecomputing.com)",
-    )?;
-    let role = std::env::var("EP_SNOWFLAKE_ROLE").ok();
-
-    let mut all_examples = Vec::new();
-
-    for after_date in after_timestamps.iter() {
-        let step_progress_name = format!(">{after_date}");
-        let step_progress = progress.start(Step::PullExamples, &step_progress_name);
-        step_progress.set_substatus("querying");
-
-        let statement = indoc! {r#"
-            SELECT
-                event_properties:example AS example
-            FROM events
-            WHERE event_type = ?
-                AND time > TRY_TO_TIMESTAMP_NTZ(?)
-                AND event_properties:can_collect_data = true
-            ORDER BY time ASC
-            LIMIT ?
-            OFFSET ?
-        "#};
-
-        let request = json!({
-            "statement": statement,
-            "timeout": DEFAULT_STATEMENT_TIMEOUT_SECONDS,
-            "database": "EVENTS",
-            "schema": "PUBLIC",
-            "warehouse": "DBT",
-            "role": role,
-            "bindings": {
-                "1": { "type": "TEXT", "value": EDIT_PREDICTION_EXAMPLE_CAPTURED_EVENT },
-                "2": { "type": "TEXT", "value": after_date },
-                "3": { "type": "FIXED", "value": max_rows_per_timestamp.to_string() },
-                "4": { "type": "FIXED", "value": offset.to_string() }
-            }
-        });
-
-        let response = run_sql_with_polling(
-            http_client.clone(),
-            &base_url,
-            &token,
-            &request,
-            &step_progress,
-            background_executor.clone(),
-        )
-        .await?;
-
-        let total_rows = response
-            .result_set_meta_data
-            .as_ref()
-            .and_then(|m| m.num_rows)
-            .unwrap_or(response.data.len() as i64);
-
-        let num_partitions = response
-            .result_set_meta_data
-            .as_ref()
-            .map(|m| m.partition_info.len())
-            .unwrap_or(1)
-            .max(1);
-
-        step_progress.set_info(format!("{} rows", total_rows), InfoStyle::Normal);
-        step_progress.set_substatus("parsing");
-
-        let example_index = response
-            .result_set_meta_data
-            .as_ref()
-            .and_then(|m| {
-                m.row_type.iter().enumerate().find_map(|(index, col)| {
-                    if col.name.eq_ignore_ascii_case("example") {
-                        Some(index)
-                    } else {
-                        None
-                    }
-                })
-            })
-            .unwrap_or(0);
-
-        all_examples.extend(examples_from_response(&response, example_index)?);
-
-        if num_partitions > 1 {
-            let statement_handle = response
-                .statement_handle
-                .as_ref()
-                .context("response has multiple partitions but no statementHandle")?;
-
-            for partition in 1..num_partitions {
-                step_progress.set_substatus(format!(
-                    "fetching partition {}/{}",
-                    partition + 1,
-                    num_partitions
-                ));
-
-                let partition_response = fetch_partition(
-                    http_client.clone(),
-                    &base_url,
-                    &token,
-                    statement_handle,
-                    partition,
-                )
-                .await?;
-
-                all_examples.extend(examples_from_response(&partition_response, example_index)?);
-            }
-        }
-
-        step_progress.set_substatus("done");
-    }
-
-    Ok(all_examples)
-}
-
 #[derive(Debug, Clone, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub(crate) struct SnowflakeStatementResponse {
@@ -236,56 +103,6 @@ struct SnowflakeColumnMeta {
     name: String,
 }
 
-fn examples_from_response(
-    response: &SnowflakeStatementResponse,
-    example_index: usize,
-) -> Result<impl Iterator<Item = Example> + '_> {
-    if let Some(code) = &response.code {
-        if code != SNOWFLAKE_SUCCESS_CODE {
-            anyhow::bail!(
-                "snowflake sql api returned error code={code} message={}",
-                response.message.as_deref().unwrap_or("<no message>")
-            );
-        }
-    }
-
-    let iter = response.data.iter().enumerate().filter_map(move |(row_index, data_row)| {
-        let Some(example_value) = data_row.get(example_index) else {
-            return None;
-        };
-        if example_value.is_null() {
-            return None;
-        }
-
-        let parse_result = match example_value {
-            JsonValue::String(encoded_json) => serde_json::from_str::<ExampleSpec>(encoded_json),
-            _ => serde_json::from_value::<ExampleSpec>(example_value.clone()),
-        };
-
-        match parse_result {
-            Ok(spec) => Some(Example {
-                spec,
-                prompt_inputs: None,
-                prompt: None,
-                predictions: Vec::new(),
-                score: Vec::new(),
-                qa: Vec::new(),
-                state: None,
-            }),
-            Err(error) => {
-                let raw_json = serde_json::to_string_pretty(example_value)
-                    .unwrap_or_else(|_| "<failed to serialize json>".to_string());
-                log::error!(
-                    "failed to parse ExampleSpec for row {row_index}: {error:#}\nraw json:\n{raw_json}"
-                );
-                None
-            }
-        }
-    });
-
-    Ok(iter)
-}
-
 async fn run_sql_with_polling(
     http_client: Arc<dyn HttpClient>,
     base_url: &str,
@@ -1306,48 +1123,9 @@ fn build_example_from_snowflake(
     rejection: Option<RejectionInfo>,
     zed_version: Option<String>,
 ) -> Example {
-    let events: Vec<CapturedEvent> = input
-        .events
-        .iter()
-        .map(|event| match event.as_ref() {
-            zeta_prompt::Event::BufferChange {
-                path,
-                old_path,
-                diff,
-                predicted,
-                in_open_source_repo,
-            } => CapturedEvent {
-                path: path.clone(),
-                old_path: old_path.clone(),
-                diff: diff.clone(),
-                predicted: *predicted,
-                in_open_source_repo: *in_open_source_repo,
-            },
-        })
-        .collect();
-
-    let related_files: Vec<CapturedRelatedFile> = input
-        .related_files
-        .iter()
-        .map(|rf| CapturedRelatedFile {
-            path: rf.path.clone(),
-            max_row: rf.max_row,
-            excerpts: rf
-                .excerpts
-                .iter()
-                .map(|e| CapturedRelatedExcerpt {
-                    row_range: e.row_range.clone(),
-                    text: e.text.to_string(),
-                })
-                .collect(),
-        })
-        .collect();
-
     let cursor_excerpt = input.cursor_excerpt.as_ref();
     let cursor_offset = input.cursor_offset_in_excerpt;
 
-    let (cursor_row, cursor_column) = compute_row_column(cursor_excerpt, cursor_offset);
-
     let mut edit_history = String::new();
     for event in &input.events {
         zeta_prompt::write_event(&mut edit_history, event);
@@ -1371,17 +1149,6 @@ fn build_example_from_snowflake(
         edit_history,
         expected_patches: Vec::new(),
         rejected_patch: None,
-        captured_prompt_input: Some(CapturedPromptInput {
-            cursor_file_content: cursor_excerpt.to_string(),
-            cursor_offset,
-            cursor_row,
-            cursor_column,
-            excerpt_start_row: None,
-            events,
-            related_files,
-            in_open_source_repo: input.in_open_source_repo,
-            zed_version,
-        }),
         telemetry: Some(TelemetrySource {
             request_id,
             device_id,
@@ -1395,7 +1162,8 @@ fn build_example_from_snowflake(
 
     Example {
         spec,
-        prompt_inputs: None,
+        zed_version,
+        prompt_inputs: Some(input),
         prompt: None,
         predictions: Vec::new(),
         score: Vec::new(),
@@ -1404,22 +1172,6 @@ fn build_example_from_snowflake(
     }
 }
 
-fn compute_row_column(text: &str, offset: usize) -> (u32, u32) {
-    let mut row = 0u32;
-    let mut last_newline_offset = 0;
-    for (i, c) in text.char_indices() {
-        if i >= offset {
-            break;
-        }
-        if c == '\n' {
-            row += 1;
-            last_newline_offset = i + 1;
-        }
-    }
-    let column = (offset - last_newline_offset) as u32;
-    (row, column)
-}
-
 fn build_cursor_position(excerpt: &str, cursor_offset: usize) -> String {
     let before = &excerpt[..cursor_offset.min(excerpt.len())];
     let after = &excerpt[cursor_offset.min(excerpt.len())..];

crates/edit_prediction_cli/src/qa.rs 🔗

@@ -82,22 +82,19 @@ pub fn build_prompt(example: &Example) -> Result<String> {
         extract_cursor_excerpt_from_example(example).context("failed to extract cursor excerpt")?;
 
     let mut edit_history = String::new();
-    for event in &prompt_inputs.edit_history {
-        match event.as_ref() {
-            zeta_prompt::Event::BufferChange {
-                path,
-                old_path,
-                diff,
-                predicted: _,
-                in_open_source_repo: _,
-            } => {
-                edit_history.push_str(&format!("--- a{}\n", old_path.display()));
-                edit_history.push_str(&format!("+++ b{}\n", path.display()));
-                let diff_word_diff = unified_to_word_diff(diff);
-                edit_history.push_str(&diff_word_diff);
-                edit_history.push_str("\n\n");
-            }
-        }
+    for event in &prompt_inputs.events {
+        let zeta_prompt::Event::BufferChange {
+            path,
+            old_path,
+            diff,
+            predicted: _,
+            in_open_source_repo: _,
+        } = event.as_ref();
+        edit_history.push_str(&format!("--- a{}\n", old_path.display()));
+        edit_history.push_str(&format!("+++ b{}\n", path.display()));
+        let diff_word_diff = unified_to_word_diff(&diff);
+        edit_history.push_str(&diff_word_diff);
+        edit_history.push_str("\n\n");
     }
 
     let prompt_template = crate::prompt_assets::get_prompt("qa.md");

crates/edit_prediction_cli/src/retrieve_context.rs 🔗

@@ -1,5 +1,5 @@
 use crate::{
-    example::{Example, ExamplePromptInputs},
+    example::Example,
     headless::EpAppState,
     load_project::run_load_project,
     progress::{ExampleProgress, InfoStyle, Step, StepProgress},
@@ -20,41 +20,18 @@ pub async fn run_context_retrieval(
     example_progress: &ExampleProgress,
     mut cx: AsyncApp,
 ) -> anyhow::Result<()> {
-    if example
-        .prompt_inputs
-        .as_ref()
-        .is_some_and(|inputs| inputs.related_files.is_some())
-    {
-        return Ok(());
-    }
-
-    if let Some(captured) = &example.spec.captured_prompt_input {
-        let step_progress = example_progress.start(Step::Context);
-        step_progress.set_substatus("using captured prompt input");
-
-        let edit_history: Vec<Arc<zeta_prompt::Event>> = captured
-            .events
-            .iter()
-            .map(|e| Arc::new(e.to_event()))
-            .collect();
-
-        let related_files: Vec<zeta_prompt::RelatedFile> = captured
-            .related_files
-            .iter()
-            .map(|rf| rf.to_related_file())
-            .collect();
-
-        example.prompt_inputs = Some(ExamplePromptInputs {
-            content: captured.cursor_file_content.clone(),
-            cursor_row: captured.cursor_row,
-            cursor_column: captured.cursor_column,
-            cursor_offset: captured.cursor_offset,
-            excerpt_start_row: captured.excerpt_start_row,
-            edit_history,
-            related_files: Some(related_files),
-        });
+    if example.prompt_inputs.is_some() {
+        if example.spec.repository_url.is_empty() {
+            return Ok(());
+        }
 
-        return Ok(());
+        if example
+            .prompt_inputs
+            .as_ref()
+            .is_some_and(|inputs| !inputs.related_files.is_empty())
+        {
+            return Ok(());
+        }
     }
 
     run_load_project(example, app_state.clone(), example_progress, cx.clone()).await?;
@@ -95,7 +72,7 @@ pub async fn run_context_retrieval(
     step_progress.set_info(format!("{} excerpts", excerpt_count), InfoStyle::Normal);
 
     if let Some(prompt_inputs) = example.prompt_inputs.as_mut() {
-        prompt_inputs.related_files = Some(context_files);
+        prompt_inputs.related_files = context_files;
     }
     Ok(())
 }

crates/edit_prediction_cli/src/reversal_tracking.rs 🔗

@@ -5,7 +5,7 @@ use std::sync::Arc;
 use edit_prediction::udiff::apply_diff_to_string;
 use language::{char_diff, text_diff};
 
-use crate::example::ExamplePromptInputs;
+use zeta_prompt::ZetaPromptInput;
 
 fn apply_diff_to_string_lenient(diff_str: &str, text: &str) -> String {
     let hunks = parse_diff_hunks(diff_str);
@@ -609,13 +609,13 @@ fn is_predicted_event(event: &zeta_prompt::Event) -> bool {
 }
 
 pub fn compute_prediction_reversal_ratio(
-    prompt_inputs: &ExamplePromptInputs,
+    prompt_inputs: &ZetaPromptInput,
     predicted_content: &str,
     cursor_path: &Path,
 ) -> f32 {
-    let current_content = &prompt_inputs.content;
+    let current_content: &str = prompt_inputs.cursor_excerpt.as_ref();
 
-    let edit_history: &[Arc<zeta_prompt::Event>] = &prompt_inputs.edit_history;
+    let edit_history: &[Arc<zeta_prompt::Event>] = &prompt_inputs.events;
     let relevant_events = filter_edit_history_by_path(edit_history, cursor_path);
 
     let most_recent = match relevant_events.last() {
@@ -656,6 +656,26 @@ mod tests {
     use edit_prediction::udiff::apply_diff_to_string;
     use indoc::indoc;
 
+    fn make_test_prompt_inputs(
+        content: &str,
+        events: Vec<Arc<zeta_prompt::Event>>,
+        excerpt_start_row: Option<u32>,
+    ) -> ZetaPromptInput {
+        ZetaPromptInput {
+            cursor_path: Arc::from(Path::new("src/test.rs")),
+            cursor_excerpt: content.into(),
+            editable_range_in_excerpt: 0..content.len(),
+            cursor_offset_in_excerpt: 0,
+            excerpt_start_row,
+            events,
+            related_files: Vec::new(),
+            excerpt_ranges: None,
+            preferred_model: None,
+            in_open_source_repo: false,
+            can_collect_data: false,
+        }
+    }
+
     #[test]
     fn test_reversal_overlap() {
         struct Case {
@@ -1729,17 +1749,13 @@ mod tests {
 
     #[test]
     fn test_compute_prediction_reversal_ratio_full_file() {
-        let prompt_inputs = ExamplePromptInputs {
-            content: indoc! {"
+        let prompt_inputs = make_test_prompt_inputs(
+            indoc! {"
                 line1
                 user_added
                 line2
-            "}
-            .to_string(),
-            cursor_row: 0,
-            cursor_column: 0,
-            cursor_offset: 0,
-            edit_history: vec![Arc::new(zeta_prompt::Event::BufferChange {
+            "},
+            vec![Arc::new(zeta_prompt::Event::BufferChange {
                 path: Arc::from(Path::new("src/test.rs")),
                 old_path: Arc::from(Path::new("src/test.rs")),
                 diff: indoc! {"
@@ -1752,9 +1768,8 @@ mod tests {
                 predicted: false,
                 in_open_source_repo: false,
             })],
-            excerpt_start_row: None,
-            related_files: None,
-        };
+            None,
+        );
 
         let predicted = indoc! {"
             line1
@@ -1772,17 +1787,13 @@ mod tests {
 
     #[test]
     fn test_compute_prediction_reversal_ratio_with_excerpt() {
-        let prompt_inputs = ExamplePromptInputs {
-            content: indoc! {"
+        let prompt_inputs = make_test_prompt_inputs(
+            indoc! {"
                 line10
                 user_added
                 line11
-            "}
-            .to_string(),
-            cursor_row: 0,
-            cursor_column: 0,
-            cursor_offset: 0,
-            edit_history: vec![Arc::new(zeta_prompt::Event::BufferChange {
+            "},
+            vec![Arc::new(zeta_prompt::Event::BufferChange {
                 path: Arc::from(Path::new("src/test.rs")),
                 old_path: Arc::from(Path::new("src/test.rs")),
                 diff: indoc! {"
@@ -1795,9 +1806,8 @@ mod tests {
                 predicted: false,
                 in_open_source_repo: false,
             })],
-            excerpt_start_row: Some(10),
-            related_files: None,
-        };
+            Some(10),
+        );
 
         let predicted = indoc! {"
             line10
@@ -1815,18 +1825,13 @@ mod tests {
 
     #[test]
     fn test_compute_prediction_reversal_ratio_no_history() {
-        let prompt_inputs = ExamplePromptInputs {
-            content: indoc! {"
+        let prompt_inputs = make_test_prompt_inputs(
+            indoc! {"
                 original content
-            "}
-            .to_string(),
-            cursor_row: 0,
-            cursor_column: 0,
-            cursor_offset: 0,
-            edit_history: vec![],
-            excerpt_start_row: None,
-            related_files: None,
-        };
+            "},
+            vec![],
+            None,
+        );
 
         let predicted = indoc! {"
             completely different
@@ -1842,17 +1847,13 @@ mod tests {
 
     #[test]
     fn test_compute_prediction_reversal_ratio_path_filtering() {
-        let prompt_inputs = ExamplePromptInputs {
-            content: indoc! {"
+        let prompt_inputs = make_test_prompt_inputs(
+            indoc! {"
                 line1
                 user_added
                 line2
-            "}
-            .to_string(),
-            cursor_row: 0,
-            cursor_column: 0,
-            cursor_offset: 0,
-            edit_history: vec![Arc::new(zeta_prompt::Event::BufferChange {
+            "},
+            vec![Arc::new(zeta_prompt::Event::BufferChange {
                 path: Arc::from(Path::new("src/other.rs")),
                 old_path: Arc::from(Path::new("src/other.rs")),
                 diff: indoc! {"
@@ -1865,9 +1866,8 @@ mod tests {
                 predicted: false,
                 in_open_source_repo: false,
             })],
-            excerpt_start_row: None,
-            related_files: None,
-        };
+            None,
+        );
 
         let predicted = indoc! {"
             line1
@@ -1884,17 +1884,13 @@ mod tests {
 
     #[test]
     fn test_compute_prediction_reversal_ratio_lenient_fallback() {
-        let prompt_inputs = ExamplePromptInputs {
-            content: indoc! {"
+        let prompt_inputs = make_test_prompt_inputs(
+            indoc! {"
                 actual_line1
                 user_added
                 actual_line2
-            "}
-            .to_string(),
-            cursor_row: 0,
-            cursor_column: 0,
-            cursor_offset: 0,
-            edit_history: vec![Arc::new(zeta_prompt::Event::BufferChange {
+            "},
+            vec![Arc::new(zeta_prompt::Event::BufferChange {
                 path: Arc::from(Path::new("src/test.rs")),
                 old_path: Arc::from(Path::new("src/test.rs")),
                 diff: indoc! {"
@@ -1907,9 +1903,8 @@ mod tests {
                 predicted: false,
                 in_open_source_repo: false,
             })],
-            excerpt_start_row: None,
-            related_files: None,
-        };
+            None,
+        );
 
         let predicted = indoc! {"
             actual_line1
@@ -1955,18 +1950,14 @@ mod tests {
 
     #[test]
     fn test_only_most_recent_edit_tracked() {
-        let prompt_inputs = ExamplePromptInputs {
-            content: indoc! {"
+        let prompt_inputs = make_test_prompt_inputs(
+            indoc! {"
                 line1
                 first_add
                 second_add
                 line2
-            "}
-            .to_string(),
-            cursor_row: 0,
-            cursor_column: 0,
-            cursor_offset: 0,
-            edit_history: vec![
+            "},
+            vec![
                 Arc::new(zeta_prompt::Event::BufferChange {
                     path: Arc::from(Path::new("src/test.rs")),
                     old_path: Arc::from(Path::new("src/test.rs")),
@@ -1994,9 +1985,8 @@ mod tests {
                     in_open_source_repo: false,
                 }),
             ],
-            excerpt_start_row: None,
-            related_files: None,
-        };
+            None,
+        );
 
         let predicted = indoc! {"
             line1

crates/edit_prediction_cli/src/score.rs 🔗

@@ -30,11 +30,11 @@ pub async fn run_scoring(
     let progress = example_progress.start(Step::Score);
 
     progress.set_substatus("applying patches");
-    let original_text = &example
+    let prompt_inputs = example
         .prompt_inputs
         .as_ref()
-        .context("prompt_inputs is required for scoring - run prediction first or ensure JSON includes prompt_inputs")?
-        .content;
+        .context("prompt_inputs is required for scoring - run prediction first or ensure JSON includes prompt_inputs")?;
+    let original_text: &str = prompt_inputs.cursor_excerpt.as_ref();
     let expected_patches_with_cursors = example.spec.expected_patches_with_cursor_positions();
 
     let expected_texts: Vec<String> = expected_patches_with_cursors
@@ -80,7 +80,6 @@ pub async fn run_scoring(
         deleted_tokens: 0,
     };
 
-    let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
     let cursor_path = example.spec.cursor_path.as_ref();
 
     progress.set_substatus("computing metrics");

crates/edit_prediction_cli/src/split_commit.rs 🔗

@@ -371,7 +371,7 @@ pub fn generate_evaluation_example_from_ordered_commit(
         reasoning: None,
         uncommitted_diff: String::new(),
         rejected_patch: None,
-        captured_prompt_input: None,
+
         telemetry: None,
         human_feedback: Vec::new(),
         rating: None,
@@ -1370,7 +1370,7 @@ Date: Mon Jan 1 00:00:00 2024
             reasoning: None,
             uncommitted_diff: String::new(),
             rejected_patch: None,
-            captured_prompt_input: None,
+
             telemetry: None,
             human_feedback: Vec::new(),
             rating: None,

crates/edit_prediction_cli/src/synthesize.rs 🔗

@@ -792,7 +792,7 @@ async fn build_example(
         edit_history,
         expected_patches: vec![expected_patch_with_header],
         rejected_patch: None,
-        captured_prompt_input: None,
+
         telemetry: None,
         human_feedback: Vec::new(),
         rating: None,

crates/edit_prediction_ui/src/edit_prediction_ui.rs 🔗

@@ -154,15 +154,7 @@ fn capture_example_as_markdown(
         .text_anchor_for_position(editor.selections.newest_anchor().head(), cx)?;
     let ep_store = EditPredictionStore::try_global(cx)?;
     let events = ep_store.update(cx, |store, cx| store.edit_history_for_project(&project, cx));
-    let example = capture_example(
-        project.clone(),
-        buffer,
-        cursor_anchor,
-        events,
-        Vec::new(),
-        true,
-        cx,
-    )?;
+    let example = capture_example(project.clone(), buffer, cursor_anchor, events, true, cx)?;
 
     let examples_dir = AllLanguageSettings::get_global(cx)
         .edit_predictions

crates/zeta_prompt/src/zeta_prompt.rs 🔗

@@ -19,7 +19,7 @@ fn estimate_tokens(bytes: usize) -> usize {
 }
 
 /// The client's preferred edit prediction model. The server may override this.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub enum EditPredictionModelKind {
     Zeta1,
     Zeta2,
@@ -28,7 +28,7 @@ pub enum EditPredictionModelKind {
 /// Pre-computed byte offset ranges within `cursor_excerpt` for different
 /// editable and context token budgets. Allows the server to select the
 /// appropriate ranges for whichever model it uses.
-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 pub struct ExcerptRanges {
     /// Editable region computed with a 150-token budget.
     pub editable_150: Range<usize>,
@@ -44,7 +44,7 @@ pub struct ExcerptRanges {
     pub editable_350_context_150: Range<usize>,
 }
 
-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 pub struct ZetaPromptInput {
     pub cursor_path: Arc<Path>,
     pub cursor_excerpt: Arc<str>,
@@ -149,7 +149,7 @@ impl ZetaFormat {
     }
 }
 
-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 #[serde(tag = "event")]
 pub enum Event {
     BufferChange {
@@ -200,7 +200,7 @@ pub fn write_event(prompt: &mut String, event: &Event) {
     }
 }
 
-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 pub struct RelatedFile {
     pub path: Arc<Path>,
     pub max_row: u32,
@@ -209,7 +209,7 @@ pub struct RelatedFile {
     pub in_open_source_repo: bool,
 }
 
-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 pub struct RelatedExcerpt {
     pub row_range: Range<u32>,
     pub text: Arc<str>,