ep: Store cursor position in global coordinates (#48469)

Oleksiy Syvokon created

This also fixes the isolated whitespace change metric, which is not
takes cursor position into account (it's okay to insert a newline if it
has a cursor on it)

Release Notes:

- N/A

Change summary

crates/edit_prediction/src/example_spec.rs      |  4 
crates/edit_prediction_cli/src/distill.rs       |  9 +
crates/edit_prediction_cli/src/example.rs       | 53 ++++++++++
crates/edit_prediction_cli/src/format_prompt.rs | 17 ++
crates/edit_prediction_cli/src/metrics.rs       | 92 +++++++++++++++++-
crates/edit_prediction_cli/src/parse_output.rs  | 27 ++++-
crates/edit_prediction_cli/src/predict.rs       | 10 +-
crates/edit_prediction_cli/src/repair.rs        |  5 
crates/edit_prediction_cli/src/score.rs         | 22 ++--
9 files changed, 200 insertions(+), 39 deletions(-)

Detailed changes

crates/edit_prediction/src/example_spec.rs 🔗

@@ -561,8 +561,8 @@ impl ExampleSpec {
     ) {
         self.expected_patches = patches
             .into_iter()
-            .map(|(patch, cursor_offset)| {
-                let Some(cursor_offset) = cursor_offset else {
+            .map(|(patch, cursor_editable_region_offset)| {
+                let Some(cursor_offset) = cursor_editable_region_offset else {
                     return patch;
                 };
 

crates/edit_prediction_cli/src/distill.rs 🔗

@@ -15,7 +15,14 @@ pub async fn run_distill(example: &mut Example) -> Result<()> {
 
     let expected_patches = predictions
         .into_iter()
-        .filter_map(|p| Some((p.actual_patch.clone()?, p.actual_cursor_offset)))
+        .filter_map(|p| {
+            Some((
+                p.actual_patch.clone()?,
+                p.actual_cursor
+                    .as_ref()
+                    .and_then(|c| c.editable_region_offset),
+            ))
+        })
         .collect();
 
     example

crates/edit_prediction_cli/src/example.rs 🔗

@@ -86,12 +86,63 @@ pub struct ExamplePrediction {
     #[serde(deserialize_with = "deserialize_null_as_empty_string")]
     pub actual_output: String,
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub actual_cursor_offset: Option<usize>,
+    pub actual_cursor: Option<ActualCursor>,
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub error: Option<String>,
     pub provider: PredictionProvider,
 }
 
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct ActualCursor {
+    pub path: String,
+    pub row: u32,
+    pub column: u32,
+    pub offset: usize,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub editable_region_offset: Option<usize>,
+}
+
+impl ActualCursor {
+    /// Construct an `ActualCursor` from a cursor offset within the new editable region.
+    ///
+    /// - `path`: file path the cursor is in
+    /// - `editable_region_cursor_offset`: byte offset of the cursor within the new editable region text
+    /// - `new_editable_region`: the full new editable region text (after marker removal)
+    /// - `content`: the full file content (before the edit)
+    /// - `editable_region_byte_offset`: byte offset where the editable region starts in `content`
+    /// - `editable_region_start_line`: 0-based line number where the editable region starts in `content`
+    pub fn from_editable_region(
+        path: &std::path::Path,
+        editable_region_cursor_offset: usize,
+        new_editable_region: &str,
+        content: &str,
+        editable_region_byte_offset: usize,
+        editable_region_start_line: usize,
+    ) -> Self {
+        let global_offset = editable_region_byte_offset + editable_region_cursor_offset;
+        let new_region_prefix = &new_editable_region[..editable_region_cursor_offset];
+        let row = (editable_region_start_line + new_region_prefix.matches('\n').count()) as u32;
+        let column = match new_region_prefix.rfind('\n') {
+            Some(pos) => (editable_region_cursor_offset - pos - 1) as u32,
+            None => {
+                let content_prefix = &content[..editable_region_byte_offset];
+                let content_column = match content_prefix.rfind('\n') {
+                    Some(pos) => editable_region_byte_offset - pos - 1,
+                    None => editable_region_byte_offset,
+                };
+                (content_column + editable_region_cursor_offset) as u32
+            }
+        };
+        ActualCursor {
+            path: path.to_string_lossy().to_string(),
+            row,
+            column,
+            offset: global_offset,
+            editable_region_offset: Some(editable_region_cursor_offset),
+        }
+    }
+}
+
 fn deserialize_null_as_empty_string<'de, D>(deserializer: D) -> Result<String, D::Error>
 where
     D: serde::Deserializer<'de>,

crates/edit_prediction_cli/src/format_prompt.rs 🔗

@@ -1,6 +1,6 @@
 use crate::{
     FormatPromptArgs, PredictionProvider,
-    example::{Example, ExamplePrompt},
+    example::{ActualCursor, Example, ExamplePrompt},
     headless::EpAppState,
     progress::{ExampleProgress, Step},
     retrieve_context::run_context_retrieval,
@@ -196,7 +196,7 @@ impl TeacherPrompt {
         prompt
     }
 
-    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<usize>)> {
+    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
         // Extract updated (new) editable region from the model response.
         // The model may include editable region markers in its output, so we need to strip them.
         let new_editable_region = extract_last_codeblock(response);
@@ -257,7 +257,18 @@ impl TeacherPrompt {
             diff = diff,
         };
 
-        Ok((diff, cursor_offset))
+        let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
+            ActualCursor::from_editable_region(
+                &example.spec.cursor_path,
+                editable_region_cursor_offset,
+                &new_editable_region,
+                &prompt_inputs.content,
+                editable_region_offset,
+                editable_region_start_line,
+            )
+        });
+
+        Ok((diff, actual_cursor))
     }
 
     fn format_edit_history(edit_history: &str) -> String {

crates/edit_prediction_cli/src/metrics.rs 🔗

@@ -1,6 +1,9 @@
 use collections::HashMap;
 
-use crate::reorder_patch::{Patch, PatchLine};
+use crate::{
+    example::ActualCursor,
+    reorder_patch::{Patch, PatchLine},
+};
 
 pub type Counts = HashMap<String, usize>;
 type CountsDelta = HashMap<String, isize>;
@@ -389,14 +392,30 @@ pub fn exact_lines_match(expected_patch: &str, actual_patch: &str) -> Classifica
 /// A whitespace-only change is an added or deleted line whose content is empty or
 /// contains only whitespace. It is "isolated" when it is not adjacent to any
 /// substantive (non-whitespace) change within the same contiguous change group.
-pub fn has_isolated_whitespace_changes(patch_str: &str) -> bool {
+pub fn has_isolated_whitespace_changes(patch_str: &str, cursor: Option<&ActualCursor>) -> bool {
     let patch = Patch::parse_unified_diff(patch_str);
 
+    let cursor_new_file_line = cursor.as_ref().map(|c| (c.row + 1) as usize);
+
     for hunk in &patch.hunks {
         let lines = &hunk.lines;
+        let mut new_text_line = hunk.new_start as usize;
+
         for (i, line) in lines.iter().enumerate() {
             let content = match line {
-                PatchLine::Addition(s) | PatchLine::Deletion(s) => s.as_str(),
+                PatchLine::Addition(s) => {
+                    let addition_line = new_text_line;
+                    new_text_line += 1;
+                    if s.trim().is_empty() && cursor_new_file_line == Some(addition_line) {
+                        continue;
+                    }
+                    s.as_str()
+                }
+                PatchLine::Deletion(s) => s.as_str(),
+                PatchLine::Context(_) => {
+                    new_text_line += 1;
+                    continue;
+                }
                 _ => continue,
             };
 
@@ -615,8 +634,19 @@ mod test_optimization {
 #[cfg(test)]
 mod test {
     use super::*;
+    use crate::example::ActualCursor;
     use indoc::indoc;
 
+    fn cursor_on_line(one_based_line: u32) -> ActualCursor {
+        ActualCursor {
+            path: String::new(),
+            row: one_based_line - 1,
+            column: 0,
+            offset: 0,
+            editable_region_offset: None,
+        }
+    }
+
     #[test]
     fn test_delta_chr_f_perfect_match() {
         let original = "fn main() {    println!(\"Hello\");}";
@@ -841,7 +871,7 @@ index abc123..def456 100644
                  println!(\"hello\");
              }
         "};
-        assert!(has_isolated_whitespace_changes(patch));
+        assert!(has_isolated_whitespace_changes(patch, None));
     }
 
     #[test]
@@ -854,7 +884,7 @@ index abc123..def456 100644
                  println!(\"hello\");
              }
         "};
-        assert!(!has_isolated_whitespace_changes(patch));
+        assert!(!has_isolated_whitespace_changes(patch, None));
     }
 
     #[test]
@@ -866,7 +896,7 @@ index abc123..def456 100644
             +    println!(\"world\");
              }
         "};
-        assert!(!has_isolated_whitespace_changes(patch));
+        assert!(!has_isolated_whitespace_changes(patch, None));
     }
 
     #[test]
@@ -878,7 +908,7 @@ index abc123..def456 100644
                  println!(\"hello\");
              }
         "};
-        assert!(has_isolated_whitespace_changes(patch));
+        assert!(has_isolated_whitespace_changes(patch, None));
     }
 
     #[test]
@@ -895,12 +925,56 @@ index abc123..def456 100644
                  println!(\"hello\");
              }
         "};
-        assert!(has_isolated_whitespace_changes(patch));
+        assert!(has_isolated_whitespace_changes(patch, None));
     }
 
     #[test]
     fn test_isolated_whitespace_empty_patch() {
         let patch = "";
-        assert!(!has_isolated_whitespace_changes(patch));
+        assert!(!has_isolated_whitespace_changes(patch, None));
+    }
+
+    #[test]
+    fn test_isolated_whitespace_skipped_on_cursor_line() {
+        // The addition of a blank line at new-file line 2 should be skipped
+        // because the cursor is on that line.
+        let patch = indoc! {"
+            @@ -1,3 +1,4 @@
+             fn main() {
+            +
+                 println!(\"hello\");
+             }
+        "};
+        // New-file line 2 is the added blank line
+        let cursor = cursor_on_line(2);
+        assert!(!has_isolated_whitespace_changes(patch, Some(&cursor)));
+    }
+
+    #[test]
+    fn test_isolated_whitespace_not_skipped_when_cursor_on_different_line() {
+        // The blank line is at new-file line 2, but the cursor is on line 1.
+        let patch = indoc! {"
+            @@ -1,3 +1,4 @@
+             fn main() {
+            +
+                 println!(\"hello\");
+             }
+        "};
+        let cursor = cursor_on_line(1);
+        assert!(has_isolated_whitespace_changes(patch, Some(&cursor)));
+    }
+
+    #[test]
+    fn test_isolated_whitespace_deletion_not_skipped_by_cursor() {
+        // Deletions don't have a new-file line, so cursor can't suppress them.
+        let patch = indoc! {"
+            @@ -1,4 +1,3 @@
+             fn main() {
+            -
+                 println!(\"hello\");
+             }
+        "};
+        let cursor = cursor_on_line(2);
+        assert!(has_isolated_whitespace_changes(patch, Some(&cursor)));
     }
 }

crates/edit_prediction_cli/src/parse_output.rs 🔗

@@ -1,4 +1,8 @@
-use crate::{PredictionProvider, example::Example, format_prompt::TeacherPrompt};
+use crate::{
+    PredictionProvider,
+    example::{ActualCursor, Example},
+    format_prompt::TeacherPrompt,
+};
 use anyhow::{Context as _, Result};
 use zeta_prompt::{CURSOR_MARKER, ZetaVersion};
 
@@ -24,9 +28,9 @@ pub fn run_parse_output(example: &mut Example) -> Result<()> {
         })
         .collect::<Result<Vec<_>>>()?;
 
-    for (ix, actual_patch, actual_cursor_offset) in parsed_patches {
+    for (ix, actual_patch, actual_cursor) in parsed_patches {
         example.predictions[ix].actual_patch = Some(actual_patch);
-        example.predictions[ix].actual_cursor_offset = actual_cursor_offset;
+        example.predictions[ix].actual_cursor = actual_cursor;
         example.predictions[ix].provider = provider;
     }
 
@@ -37,7 +41,7 @@ pub fn parse_prediction_output(
     example: &Example,
     actual_output: &str,
     provider: PredictionProvider,
-) -> Result<(String, Option<usize>)> {
+) -> Result<(String, Option<ActualCursor>)> {
     match provider {
         PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
             TeacherPrompt::parse(example, actual_output)
@@ -84,7 +88,7 @@ fn parse_zeta2_output(
     example: &Example,
     actual_output: &str,
     version: ZetaVersion,
-) -> Result<(String, Option<usize>)> {
+) -> Result<(String, Option<ActualCursor>)> {
     let prompt = &example.prompt.as_ref().context("prompt required")?.input;
     let prompt_inputs = example
         .prompt_inputs
@@ -154,7 +158,18 @@ fn parse_zeta2_output(
         path = example.spec.cursor_path.to_string_lossy(),
     );
 
-    Ok((formatted_diff, cursor_offset))
+    let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
+        ActualCursor::from_editable_region(
+            &example.spec.cursor_path,
+            editable_region_cursor_offset,
+            &new_text,
+            &prompt_inputs.content,
+            editable_region_offset,
+            editable_region_start_line,
+        )
+    });
+
+    Ok((formatted_diff, actual_cursor))
 }
 
 #[cfg(test)]

crates/edit_prediction_cli/src/predict.rs 🔗

@@ -201,7 +201,7 @@ pub async fn run_prediction(
             .push(ExamplePrediction {
                 actual_patch: None,
                 actual_output: String::new(),
-                actual_cursor_offset: None,
+                actual_cursor: None,
                 error: None,
                 provider,
             });
@@ -323,12 +323,12 @@ async fn predict_anthropic(
             .collect::<Vec<String>>()
             .join("\n");
 
-        let (actual_patch, actual_cursor_offset) = TeacherPrompt::parse(example, &actual_output)?;
+        let (actual_patch, actual_cursor) = TeacherPrompt::parse(example, &actual_output)?;
 
         let prediction = ExamplePrediction {
             actual_patch: Some(actual_patch),
             actual_output,
-            actual_cursor_offset,
+            actual_cursor,
             error: None,
             provider: if batched {
                 PredictionProvider::Teacher(backend)
@@ -396,12 +396,12 @@ async fn predict_openai(
             .collect::<Vec<String>>()
             .join("\n");
 
-        let (actual_patch, actual_cursor_offset) = TeacherPrompt::parse(example, &actual_output)?;
+        let (actual_patch, actual_cursor) = TeacherPrompt::parse(example, &actual_output)?;
 
         let prediction = ExamplePrediction {
             actual_patch: Some(actual_patch),
             actual_output,
-            actual_cursor_offset,
+            actual_cursor,
             error: None,
             provider: if batched {
                 PredictionProvider::Teacher(backend)

crates/edit_prediction_cli/src/repair.rs 🔗

@@ -257,12 +257,13 @@ pub async fn run_repair(
         .err()
         .map(|e| format!("Failed to parse repair response: {}", e));
 
-    let (actual_patch, actual_cursor_offset) = parse_result.ok().unzip();
+    let (actual_patch, actual_cursor) = parse_result.ok().unzip();
+    let actual_cursor = actual_cursor.flatten();
 
     example.predictions.push(ExamplePrediction {
         actual_patch,
         actual_output: response,
-        actual_cursor_offset: actual_cursor_offset.flatten(),
+        actual_cursor,
         error: err,
         provider: PredictionProvider::Repair,
     });

crates/edit_prediction_cli/src/score.rs 🔗

@@ -1,6 +1,6 @@
 use crate::{
     PredictArgs, PredictionProvider,
-    example::{Example, ExampleScore},
+    example::{ActualCursor, Example, ExampleScore},
     format_prompt::TeacherPrompt,
     headless::EpAppState,
     metrics,
@@ -159,14 +159,16 @@ pub async fn run_scoring(
 
         // Compute cursor position metrics
         let (cursor_distance, cursor_exact_match) =
-            compute_cursor_metrics(best_expected_cursor, prediction.actual_cursor_offset);
+            compute_cursor_metrics(best_expected_cursor, prediction.actual_cursor.as_ref());
 
         // Compute approximation of editable region correctness
         let wrong_editable_region = Some(!metrics::is_editable_region_correct(&actual_patch));
 
-        // Check for isolated whitespace changes
-        let has_isolated_whitespace_changes =
-            metrics::has_isolated_whitespace_changes(&actual_patch);
+        // Check for isolated whitespace changes.
+        let has_isolated_whitespace_changes = metrics::has_isolated_whitespace_changes(
+            &actual_patch,
+            prediction.actual_cursor.as_ref(),
+        );
 
         scores.push(ExampleScore {
             delta_chr_f: best_delta_chr_f,
@@ -187,13 +189,13 @@ pub async fn run_scoring(
 }
 
 fn compute_cursor_metrics(
-    expected_cursor: Option<usize>,
-    actual_cursor: Option<usize>,
+    expected_cursor_editable_region_offset: Option<usize>,
+    actual_cursor: Option<&ActualCursor>,
 ) -> (Option<usize>, Option<bool>) {
-    match (expected_cursor, actual_cursor) {
+    match (expected_cursor_editable_region_offset, actual_cursor) {
         (Some(expected), Some(actual)) => {
-            let distance = expected.abs_diff(actual);
-            let exact_match = expected == actual;
+            let distance = expected.abs_diff(actual.editable_region_offset.unwrap_or_default());
+            let exact_match = distance == 0;
             (Some(distance), Some(exact_match))
         }
         (None, None) => {