From 389f66591c35584ef5fc1ad59efdd4697e406877 Mon Sep 17 00:00:00 2001 From: Oleksiy Syvokon Date: Thu, 5 Feb 2026 14:31:12 +0200 Subject: [PATCH] ep: Store cursor position in global coordinates (#48469) This also fixes the isolated whitespace change metric, which is not takes cursor position into account (it's okay to insert a newline if it has a cursor on it) Release Notes: - N/A --- crates/edit_prediction/src/example_spec.rs | 4 +- crates/edit_prediction_cli/src/distill.rs | 9 +- crates/edit_prediction_cli/src/example.rs | 53 ++++++++++- .../edit_prediction_cli/src/format_prompt.rs | 17 +++- crates/edit_prediction_cli/src/metrics.rs | 92 +++++++++++++++++-- .../edit_prediction_cli/src/parse_output.rs | 27 ++++-- crates/edit_prediction_cli/src/predict.rs | 10 +- crates/edit_prediction_cli/src/repair.rs | 5 +- crates/edit_prediction_cli/src/score.rs | 22 +++-- 9 files changed, 200 insertions(+), 39 deletions(-) diff --git a/crates/edit_prediction/src/example_spec.rs b/crates/edit_prediction/src/example_spec.rs index f326ab749b31bb32895ea5f5d9e92e4b4f4853ec..5b9c98b83074cf5d4ead8af2bb974ff591c86e95 100644 --- a/crates/edit_prediction/src/example_spec.rs +++ b/crates/edit_prediction/src/example_spec.rs @@ -561,8 +561,8 @@ impl ExampleSpec { ) { self.expected_patches = patches .into_iter() - .map(|(patch, cursor_offset)| { - let Some(cursor_offset) = cursor_offset else { + .map(|(patch, cursor_editable_region_offset)| { + let Some(cursor_offset) = cursor_editable_region_offset else { return patch; }; diff --git a/crates/edit_prediction_cli/src/distill.rs b/crates/edit_prediction_cli/src/distill.rs index b1dbb0b9fcf1639d3a9da00502e97a0778393327..951d6cbcd41aa5e125826337f512264810086084 100644 --- a/crates/edit_prediction_cli/src/distill.rs +++ b/crates/edit_prediction_cli/src/distill.rs @@ -15,7 +15,14 @@ pub async fn run_distill(example: &mut Example) -> Result<()> { let expected_patches = predictions .into_iter() - .filter_map(|p| Some((p.actual_patch.clone()?, p.actual_cursor_offset))) + .filter_map(|p| { + Some(( + p.actual_patch.clone()?, + p.actual_cursor + .as_ref() + .and_then(|c| c.editable_region_offset), + )) + }) .collect(); example diff --git a/crates/edit_prediction_cli/src/example.rs b/crates/edit_prediction_cli/src/example.rs index 275646f5a28b62a0770cbfd54a02af94506aff52..d0627d0fadfe019dd10da0ab237a0f8829e32058 100644 --- a/crates/edit_prediction_cli/src/example.rs +++ b/crates/edit_prediction_cli/src/example.rs @@ -86,12 +86,63 @@ pub struct ExamplePrediction { #[serde(deserialize_with = "deserialize_null_as_empty_string")] pub actual_output: String, #[serde(default, skip_serializing_if = "Option::is_none")] - pub actual_cursor_offset: Option, + pub actual_cursor: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub error: Option, pub provider: PredictionProvider, } +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ActualCursor { + pub path: String, + pub row: u32, + pub column: u32, + pub offset: usize, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub editable_region_offset: Option, +} + +impl ActualCursor { + /// Construct an `ActualCursor` from a cursor offset within the new editable region. + /// + /// - `path`: file path the cursor is in + /// - `editable_region_cursor_offset`: byte offset of the cursor within the new editable region text + /// - `new_editable_region`: the full new editable region text (after marker removal) + /// - `content`: the full file content (before the edit) + /// - `editable_region_byte_offset`: byte offset where the editable region starts in `content` + /// - `editable_region_start_line`: 0-based line number where the editable region starts in `content` + pub fn from_editable_region( + path: &std::path::Path, + editable_region_cursor_offset: usize, + new_editable_region: &str, + content: &str, + editable_region_byte_offset: usize, + editable_region_start_line: usize, + ) -> Self { + let global_offset = editable_region_byte_offset + editable_region_cursor_offset; + let new_region_prefix = &new_editable_region[..editable_region_cursor_offset]; + let row = (editable_region_start_line + new_region_prefix.matches('\n').count()) as u32; + let column = match new_region_prefix.rfind('\n') { + Some(pos) => (editable_region_cursor_offset - pos - 1) as u32, + None => { + let content_prefix = &content[..editable_region_byte_offset]; + let content_column = match content_prefix.rfind('\n') { + Some(pos) => editable_region_byte_offset - pos - 1, + None => editable_region_byte_offset, + }; + (content_column + editable_region_cursor_offset) as u32 + } + }; + ActualCursor { + path: path.to_string_lossy().to_string(), + row, + column, + offset: global_offset, + editable_region_offset: Some(editable_region_cursor_offset), + } + } +} + fn deserialize_null_as_empty_string<'de, D>(deserializer: D) -> Result where D: serde::Deserializer<'de>, diff --git a/crates/edit_prediction_cli/src/format_prompt.rs b/crates/edit_prediction_cli/src/format_prompt.rs index daf275f6d33090b0eecb557a9eaf5ede37101dde..f76f4899092c7add08897b887a6398a5f7a2bded 100644 --- a/crates/edit_prediction_cli/src/format_prompt.rs +++ b/crates/edit_prediction_cli/src/format_prompt.rs @@ -1,6 +1,6 @@ use crate::{ FormatPromptArgs, PredictionProvider, - example::{Example, ExamplePrompt}, + example::{ActualCursor, Example, ExamplePrompt}, headless::EpAppState, progress::{ExampleProgress, Step}, retrieve_context::run_context_retrieval, @@ -196,7 +196,7 @@ impl TeacherPrompt { prompt } - pub fn parse(example: &Example, response: &str) -> Result<(String, Option)> { + pub fn parse(example: &Example, response: &str) -> Result<(String, Option)> { // Extract updated (new) editable region from the model response. // The model may include editable region markers in its output, so we need to strip them. let new_editable_region = extract_last_codeblock(response); @@ -257,7 +257,18 @@ impl TeacherPrompt { diff = diff, }; - Ok((diff, cursor_offset)) + let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| { + ActualCursor::from_editable_region( + &example.spec.cursor_path, + editable_region_cursor_offset, + &new_editable_region, + &prompt_inputs.content, + editable_region_offset, + editable_region_start_line, + ) + }); + + Ok((diff, actual_cursor)) } fn format_edit_history(edit_history: &str) -> String { diff --git a/crates/edit_prediction_cli/src/metrics.rs b/crates/edit_prediction_cli/src/metrics.rs index a61565921f168d51fa47ae092692a454868bb06b..2b649bbc63e3d0f8f50afd1b79e5c97816e46dda 100644 --- a/crates/edit_prediction_cli/src/metrics.rs +++ b/crates/edit_prediction_cli/src/metrics.rs @@ -1,6 +1,9 @@ use collections::HashMap; -use crate::reorder_patch::{Patch, PatchLine}; +use crate::{ + example::ActualCursor, + reorder_patch::{Patch, PatchLine}, +}; pub type Counts = HashMap; type CountsDelta = HashMap; @@ -389,14 +392,30 @@ pub fn exact_lines_match(expected_patch: &str, actual_patch: &str) -> Classifica /// A whitespace-only change is an added or deleted line whose content is empty or /// contains only whitespace. It is "isolated" when it is not adjacent to any /// substantive (non-whitespace) change within the same contiguous change group. -pub fn has_isolated_whitespace_changes(patch_str: &str) -> bool { +pub fn has_isolated_whitespace_changes(patch_str: &str, cursor: Option<&ActualCursor>) -> bool { let patch = Patch::parse_unified_diff(patch_str); + let cursor_new_file_line = cursor.as_ref().map(|c| (c.row + 1) as usize); + for hunk in &patch.hunks { let lines = &hunk.lines; + let mut new_text_line = hunk.new_start as usize; + for (i, line) in lines.iter().enumerate() { let content = match line { - PatchLine::Addition(s) | PatchLine::Deletion(s) => s.as_str(), + PatchLine::Addition(s) => { + let addition_line = new_text_line; + new_text_line += 1; + if s.trim().is_empty() && cursor_new_file_line == Some(addition_line) { + continue; + } + s.as_str() + } + PatchLine::Deletion(s) => s.as_str(), + PatchLine::Context(_) => { + new_text_line += 1; + continue; + } _ => continue, }; @@ -615,8 +634,19 @@ mod test_optimization { #[cfg(test)] mod test { use super::*; + use crate::example::ActualCursor; use indoc::indoc; + fn cursor_on_line(one_based_line: u32) -> ActualCursor { + ActualCursor { + path: String::new(), + row: one_based_line - 1, + column: 0, + offset: 0, + editable_region_offset: None, + } + } + #[test] fn test_delta_chr_f_perfect_match() { let original = "fn main() { println!(\"Hello\");}"; @@ -841,7 +871,7 @@ index abc123..def456 100644 println!(\"hello\"); } "}; - assert!(has_isolated_whitespace_changes(patch)); + assert!(has_isolated_whitespace_changes(patch, None)); } #[test] @@ -854,7 +884,7 @@ index abc123..def456 100644 println!(\"hello\"); } "}; - assert!(!has_isolated_whitespace_changes(patch)); + assert!(!has_isolated_whitespace_changes(patch, None)); } #[test] @@ -866,7 +896,7 @@ index abc123..def456 100644 + println!(\"world\"); } "}; - assert!(!has_isolated_whitespace_changes(patch)); + assert!(!has_isolated_whitespace_changes(patch, None)); } #[test] @@ -878,7 +908,7 @@ index abc123..def456 100644 println!(\"hello\"); } "}; - assert!(has_isolated_whitespace_changes(patch)); + assert!(has_isolated_whitespace_changes(patch, None)); } #[test] @@ -895,12 +925,56 @@ index abc123..def456 100644 println!(\"hello\"); } "}; - assert!(has_isolated_whitespace_changes(patch)); + assert!(has_isolated_whitespace_changes(patch, None)); } #[test] fn test_isolated_whitespace_empty_patch() { let patch = ""; - assert!(!has_isolated_whitespace_changes(patch)); + assert!(!has_isolated_whitespace_changes(patch, None)); + } + + #[test] + fn test_isolated_whitespace_skipped_on_cursor_line() { + // The addition of a blank line at new-file line 2 should be skipped + // because the cursor is on that line. + let patch = indoc! {" + @@ -1,3 +1,4 @@ + fn main() { + + + println!(\"hello\"); + } + "}; + // New-file line 2 is the added blank line + let cursor = cursor_on_line(2); + assert!(!has_isolated_whitespace_changes(patch, Some(&cursor))); + } + + #[test] + fn test_isolated_whitespace_not_skipped_when_cursor_on_different_line() { + // The blank line is at new-file line 2, but the cursor is on line 1. + let patch = indoc! {" + @@ -1,3 +1,4 @@ + fn main() { + + + println!(\"hello\"); + } + "}; + let cursor = cursor_on_line(1); + assert!(has_isolated_whitespace_changes(patch, Some(&cursor))); + } + + #[test] + fn test_isolated_whitespace_deletion_not_skipped_by_cursor() { + // Deletions don't have a new-file line, so cursor can't suppress them. + let patch = indoc! {" + @@ -1,4 +1,3 @@ + fn main() { + - + println!(\"hello\"); + } + "}; + let cursor = cursor_on_line(2); + assert!(has_isolated_whitespace_changes(patch, Some(&cursor))); } } diff --git a/crates/edit_prediction_cli/src/parse_output.rs b/crates/edit_prediction_cli/src/parse_output.rs index 884f4c340960d1525b6d6b080e4440f1a2de03f2..041624c55ab5cedfea79e2a7776e5be0e467d627 100644 --- a/crates/edit_prediction_cli/src/parse_output.rs +++ b/crates/edit_prediction_cli/src/parse_output.rs @@ -1,4 +1,8 @@ -use crate::{PredictionProvider, example::Example, format_prompt::TeacherPrompt}; +use crate::{ + PredictionProvider, + example::{ActualCursor, Example}, + format_prompt::TeacherPrompt, +}; use anyhow::{Context as _, Result}; use zeta_prompt::{CURSOR_MARKER, ZetaVersion}; @@ -24,9 +28,9 @@ pub fn run_parse_output(example: &mut Example) -> Result<()> { }) .collect::>>()?; - for (ix, actual_patch, actual_cursor_offset) in parsed_patches { + for (ix, actual_patch, actual_cursor) in parsed_patches { example.predictions[ix].actual_patch = Some(actual_patch); - example.predictions[ix].actual_cursor_offset = actual_cursor_offset; + example.predictions[ix].actual_cursor = actual_cursor; example.predictions[ix].provider = provider; } @@ -37,7 +41,7 @@ pub fn parse_prediction_output( example: &Example, actual_output: &str, provider: PredictionProvider, -) -> Result<(String, Option)> { +) -> Result<(String, Option)> { match provider { PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => { TeacherPrompt::parse(example, actual_output) @@ -84,7 +88,7 @@ fn parse_zeta2_output( example: &Example, actual_output: &str, version: ZetaVersion, -) -> Result<(String, Option)> { +) -> Result<(String, Option)> { let prompt = &example.prompt.as_ref().context("prompt required")?.input; let prompt_inputs = example .prompt_inputs @@ -154,7 +158,18 @@ fn parse_zeta2_output( path = example.spec.cursor_path.to_string_lossy(), ); - Ok((formatted_diff, cursor_offset)) + let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| { + ActualCursor::from_editable_region( + &example.spec.cursor_path, + editable_region_cursor_offset, + &new_text, + &prompt_inputs.content, + editable_region_offset, + editable_region_start_line, + ) + }); + + Ok((formatted_diff, actual_cursor)) } #[cfg(test)] diff --git a/crates/edit_prediction_cli/src/predict.rs b/crates/edit_prediction_cli/src/predict.rs index 69258c964ea109d27a1ede2a950a49a1066c0cfe..63be8e8b70dfbc5204ab530d839df4e9cdc34e41 100644 --- a/crates/edit_prediction_cli/src/predict.rs +++ b/crates/edit_prediction_cli/src/predict.rs @@ -201,7 +201,7 @@ pub async fn run_prediction( .push(ExamplePrediction { actual_patch: None, actual_output: String::new(), - actual_cursor_offset: None, + actual_cursor: None, error: None, provider, }); @@ -323,12 +323,12 @@ async fn predict_anthropic( .collect::>() .join("\n"); - let (actual_patch, actual_cursor_offset) = TeacherPrompt::parse(example, &actual_output)?; + let (actual_patch, actual_cursor) = TeacherPrompt::parse(example, &actual_output)?; let prediction = ExamplePrediction { actual_patch: Some(actual_patch), actual_output, - actual_cursor_offset, + actual_cursor, error: None, provider: if batched { PredictionProvider::Teacher(backend) @@ -396,12 +396,12 @@ async fn predict_openai( .collect::>() .join("\n"); - let (actual_patch, actual_cursor_offset) = TeacherPrompt::parse(example, &actual_output)?; + let (actual_patch, actual_cursor) = TeacherPrompt::parse(example, &actual_output)?; let prediction = ExamplePrediction { actual_patch: Some(actual_patch), actual_output, - actual_cursor_offset, + actual_cursor, error: None, provider: if batched { PredictionProvider::Teacher(backend) diff --git a/crates/edit_prediction_cli/src/repair.rs b/crates/edit_prediction_cli/src/repair.rs index 9f1504d530f60b90d2ad8171bd79a08c8067eae6..1112d5ea567e8b3083ebfd37843bd9d534bb9a7b 100644 --- a/crates/edit_prediction_cli/src/repair.rs +++ b/crates/edit_prediction_cli/src/repair.rs @@ -257,12 +257,13 @@ pub async fn run_repair( .err() .map(|e| format!("Failed to parse repair response: {}", e)); - let (actual_patch, actual_cursor_offset) = parse_result.ok().unzip(); + let (actual_patch, actual_cursor) = parse_result.ok().unzip(); + let actual_cursor = actual_cursor.flatten(); example.predictions.push(ExamplePrediction { actual_patch, actual_output: response, - actual_cursor_offset: actual_cursor_offset.flatten(), + actual_cursor, error: err, provider: PredictionProvider::Repair, }); diff --git a/crates/edit_prediction_cli/src/score.rs b/crates/edit_prediction_cli/src/score.rs index f4af865bbd2714b5e27f5bc27332f5790766a119..099b6fa1b85521a5a1b250ddfc5ef414b0eacd49 100644 --- a/crates/edit_prediction_cli/src/score.rs +++ b/crates/edit_prediction_cli/src/score.rs @@ -1,6 +1,6 @@ use crate::{ PredictArgs, PredictionProvider, - example::{Example, ExampleScore}, + example::{ActualCursor, Example, ExampleScore}, format_prompt::TeacherPrompt, headless::EpAppState, metrics, @@ -159,14 +159,16 @@ pub async fn run_scoring( // Compute cursor position metrics let (cursor_distance, cursor_exact_match) = - compute_cursor_metrics(best_expected_cursor, prediction.actual_cursor_offset); + compute_cursor_metrics(best_expected_cursor, prediction.actual_cursor.as_ref()); // Compute approximation of editable region correctness let wrong_editable_region = Some(!metrics::is_editable_region_correct(&actual_patch)); - // Check for isolated whitespace changes - let has_isolated_whitespace_changes = - metrics::has_isolated_whitespace_changes(&actual_patch); + // Check for isolated whitespace changes. + let has_isolated_whitespace_changes = metrics::has_isolated_whitespace_changes( + &actual_patch, + prediction.actual_cursor.as_ref(), + ); scores.push(ExampleScore { delta_chr_f: best_delta_chr_f, @@ -187,13 +189,13 @@ pub async fn run_scoring( } fn compute_cursor_metrics( - expected_cursor: Option, - actual_cursor: Option, + expected_cursor_editable_region_offset: Option, + actual_cursor: Option<&ActualCursor>, ) -> (Option, Option) { - match (expected_cursor, actual_cursor) { + match (expected_cursor_editable_region_offset, actual_cursor) { (Some(expected), Some(actual)) => { - let distance = expected.abs_diff(actual); - let exact_match = expected == actual; + let distance = expected.abs_diff(actual.editable_region_offset.unwrap_or_default()); + let exact_match = distance == 0; (Some(distance), Some(exact_match)) } (None, None) => {