Detailed changes
@@ -561,8 +561,8 @@ impl ExampleSpec {
) {
self.expected_patches = patches
.into_iter()
- .map(|(patch, cursor_offset)| {
- let Some(cursor_offset) = cursor_offset else {
+ .map(|(patch, cursor_editable_region_offset)| {
+ let Some(cursor_offset) = cursor_editable_region_offset else {
return patch;
};
@@ -15,7 +15,14 @@ pub async fn run_distill(example: &mut Example) -> Result<()> {
let expected_patches = predictions
.into_iter()
- .filter_map(|p| Some((p.actual_patch.clone()?, p.actual_cursor_offset)))
+ .filter_map(|p| {
+ Some((
+ p.actual_patch.clone()?,
+ p.actual_cursor
+ .as_ref()
+ .and_then(|c| c.editable_region_offset),
+ ))
+ })
.collect();
example
@@ -86,12 +86,63 @@ pub struct ExamplePrediction {
#[serde(deserialize_with = "deserialize_null_as_empty_string")]
pub actual_output: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
- pub actual_cursor_offset: Option<usize>,
+ pub actual_cursor: Option<ActualCursor>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
pub provider: PredictionProvider,
}
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct ActualCursor {
+ pub path: String,
+ pub row: u32,
+ pub column: u32,
+ pub offset: usize,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub editable_region_offset: Option<usize>,
+}
+
+impl ActualCursor {
+ /// Construct an `ActualCursor` from a cursor offset within the new editable region.
+ ///
+ /// - `path`: file path the cursor is in
+ /// - `editable_region_cursor_offset`: byte offset of the cursor within the new editable region text
+ /// - `new_editable_region`: the full new editable region text (after marker removal)
+ /// - `content`: the full file content (before the edit)
+ /// - `editable_region_byte_offset`: byte offset where the editable region starts in `content`
+ /// - `editable_region_start_line`: 0-based line number where the editable region starts in `content`
+ pub fn from_editable_region(
+ path: &std::path::Path,
+ editable_region_cursor_offset: usize,
+ new_editable_region: &str,
+ content: &str,
+ editable_region_byte_offset: usize,
+ editable_region_start_line: usize,
+ ) -> Self {
+ let global_offset = editable_region_byte_offset + editable_region_cursor_offset;
+ let new_region_prefix = &new_editable_region[..editable_region_cursor_offset];
+ let row = (editable_region_start_line + new_region_prefix.matches('\n').count()) as u32;
+ let column = match new_region_prefix.rfind('\n') {
+ Some(pos) => (editable_region_cursor_offset - pos - 1) as u32,
+ None => {
+ let content_prefix = &content[..editable_region_byte_offset];
+ let content_column = match content_prefix.rfind('\n') {
+ Some(pos) => editable_region_byte_offset - pos - 1,
+ None => editable_region_byte_offset,
+ };
+ (content_column + editable_region_cursor_offset) as u32
+ }
+ };
+ ActualCursor {
+ path: path.to_string_lossy().to_string(),
+ row,
+ column,
+ offset: global_offset,
+ editable_region_offset: Some(editable_region_cursor_offset),
+ }
+ }
+}
+
fn deserialize_null_as_empty_string<'de, D>(deserializer: D) -> Result<String, D::Error>
where
D: serde::Deserializer<'de>,
@@ -1,6 +1,6 @@
use crate::{
FormatPromptArgs, PredictionProvider,
- example::{Example, ExamplePrompt},
+ example::{ActualCursor, Example, ExamplePrompt},
headless::EpAppState,
progress::{ExampleProgress, Step},
retrieve_context::run_context_retrieval,
@@ -196,7 +196,7 @@ impl TeacherPrompt {
prompt
}
- pub fn parse(example: &Example, response: &str) -> Result<(String, Option<usize>)> {
+ pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
// Extract updated (new) editable region from the model response.
// The model may include editable region markers in its output, so we need to strip them.
let new_editable_region = extract_last_codeblock(response);
@@ -257,7 +257,18 @@ impl TeacherPrompt {
diff = diff,
};
- Ok((diff, cursor_offset))
+ let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
+ ActualCursor::from_editable_region(
+ &example.spec.cursor_path,
+ editable_region_cursor_offset,
+ &new_editable_region,
+ &prompt_inputs.content,
+ editable_region_offset,
+ editable_region_start_line,
+ )
+ });
+
+ Ok((diff, actual_cursor))
}
fn format_edit_history(edit_history: &str) -> String {
@@ -1,6 +1,9 @@
use collections::HashMap;
-use crate::reorder_patch::{Patch, PatchLine};
+use crate::{
+ example::ActualCursor,
+ reorder_patch::{Patch, PatchLine},
+};
pub type Counts = HashMap<String, usize>;
type CountsDelta = HashMap<String, isize>;
@@ -389,14 +392,30 @@ pub fn exact_lines_match(expected_patch: &str, actual_patch: &str) -> Classifica
/// A whitespace-only change is an added or deleted line whose content is empty or
/// contains only whitespace. It is "isolated" when it is not adjacent to any
/// substantive (non-whitespace) change within the same contiguous change group.
-pub fn has_isolated_whitespace_changes(patch_str: &str) -> bool {
+pub fn has_isolated_whitespace_changes(patch_str: &str, cursor: Option<&ActualCursor>) -> bool {
let patch = Patch::parse_unified_diff(patch_str);
+ let cursor_new_file_line = cursor.as_ref().map(|c| (c.row + 1) as usize);
+
for hunk in &patch.hunks {
let lines = &hunk.lines;
+ let mut new_text_line = hunk.new_start as usize;
+
for (i, line) in lines.iter().enumerate() {
let content = match line {
- PatchLine::Addition(s) | PatchLine::Deletion(s) => s.as_str(),
+ PatchLine::Addition(s) => {
+ let addition_line = new_text_line;
+ new_text_line += 1;
+ if s.trim().is_empty() && cursor_new_file_line == Some(addition_line) {
+ continue;
+ }
+ s.as_str()
+ }
+ PatchLine::Deletion(s) => s.as_str(),
+ PatchLine::Context(_) => {
+ new_text_line += 1;
+ continue;
+ }
_ => continue,
};
@@ -615,8 +634,19 @@ mod test_optimization {
#[cfg(test)]
mod test {
use super::*;
+ use crate::example::ActualCursor;
use indoc::indoc;
+ fn cursor_on_line(one_based_line: u32) -> ActualCursor {
+ ActualCursor {
+ path: String::new(),
+ row: one_based_line - 1,
+ column: 0,
+ offset: 0,
+ editable_region_offset: None,
+ }
+ }
+
#[test]
fn test_delta_chr_f_perfect_match() {
let original = "fn main() { println!(\"Hello\");}";
@@ -841,7 +871,7 @@ index abc123..def456 100644
println!(\"hello\");
}
"};
- assert!(has_isolated_whitespace_changes(patch));
+ assert!(has_isolated_whitespace_changes(patch, None));
}
#[test]
@@ -854,7 +884,7 @@ index abc123..def456 100644
println!(\"hello\");
}
"};
- assert!(!has_isolated_whitespace_changes(patch));
+ assert!(!has_isolated_whitespace_changes(patch, None));
}
#[test]
@@ -866,7 +896,7 @@ index abc123..def456 100644
+ println!(\"world\");
}
"};
- assert!(!has_isolated_whitespace_changes(patch));
+ assert!(!has_isolated_whitespace_changes(patch, None));
}
#[test]
@@ -878,7 +908,7 @@ index abc123..def456 100644
println!(\"hello\");
}
"};
- assert!(has_isolated_whitespace_changes(patch));
+ assert!(has_isolated_whitespace_changes(patch, None));
}
#[test]
@@ -895,12 +925,56 @@ index abc123..def456 100644
println!(\"hello\");
}
"};
- assert!(has_isolated_whitespace_changes(patch));
+ assert!(has_isolated_whitespace_changes(patch, None));
}
#[test]
fn test_isolated_whitespace_empty_patch() {
let patch = "";
- assert!(!has_isolated_whitespace_changes(patch));
+ assert!(!has_isolated_whitespace_changes(patch, None));
+ }
+
+ #[test]
+ fn test_isolated_whitespace_skipped_on_cursor_line() {
+ // The addition of a blank line at new-file line 2 should be skipped
+ // because the cursor is on that line.
+ let patch = indoc! {"
+ @@ -1,3 +1,4 @@
+ fn main() {
+ +
+ println!(\"hello\");
+ }
+ "};
+ // New-file line 2 is the added blank line
+ let cursor = cursor_on_line(2);
+ assert!(!has_isolated_whitespace_changes(patch, Some(&cursor)));
+ }
+
+ #[test]
+ fn test_isolated_whitespace_not_skipped_when_cursor_on_different_line() {
+ // The blank line is at new-file line 2, but the cursor is on line 1.
+ let patch = indoc! {"
+ @@ -1,3 +1,4 @@
+ fn main() {
+ +
+ println!(\"hello\");
+ }
+ "};
+ let cursor = cursor_on_line(1);
+ assert!(has_isolated_whitespace_changes(patch, Some(&cursor)));
+ }
+
+ #[test]
+ fn test_isolated_whitespace_deletion_not_skipped_by_cursor() {
+ // Deletions don't have a new-file line, so cursor can't suppress them.
+ let patch = indoc! {"
+ @@ -1,4 +1,3 @@
+ fn main() {
+ -
+ println!(\"hello\");
+ }
+ "};
+ let cursor = cursor_on_line(2);
+ assert!(has_isolated_whitespace_changes(patch, Some(&cursor)));
}
}
@@ -1,4 +1,8 @@
-use crate::{PredictionProvider, example::Example, format_prompt::TeacherPrompt};
+use crate::{
+ PredictionProvider,
+ example::{ActualCursor, Example},
+ format_prompt::TeacherPrompt,
+};
use anyhow::{Context as _, Result};
use zeta_prompt::{CURSOR_MARKER, ZetaVersion};
@@ -24,9 +28,9 @@ pub fn run_parse_output(example: &mut Example) -> Result<()> {
})
.collect::<Result<Vec<_>>>()?;
- for (ix, actual_patch, actual_cursor_offset) in parsed_patches {
+ for (ix, actual_patch, actual_cursor) in parsed_patches {
example.predictions[ix].actual_patch = Some(actual_patch);
- example.predictions[ix].actual_cursor_offset = actual_cursor_offset;
+ example.predictions[ix].actual_cursor = actual_cursor;
example.predictions[ix].provider = provider;
}
@@ -37,7 +41,7 @@ pub fn parse_prediction_output(
example: &Example,
actual_output: &str,
provider: PredictionProvider,
-) -> Result<(String, Option<usize>)> {
+) -> Result<(String, Option<ActualCursor>)> {
match provider {
PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
TeacherPrompt::parse(example, actual_output)
@@ -84,7 +88,7 @@ fn parse_zeta2_output(
example: &Example,
actual_output: &str,
version: ZetaVersion,
-) -> Result<(String, Option<usize>)> {
+) -> Result<(String, Option<ActualCursor>)> {
let prompt = &example.prompt.as_ref().context("prompt required")?.input;
let prompt_inputs = example
.prompt_inputs
@@ -154,7 +158,18 @@ fn parse_zeta2_output(
path = example.spec.cursor_path.to_string_lossy(),
);
- Ok((formatted_diff, cursor_offset))
+ let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
+ ActualCursor::from_editable_region(
+ &example.spec.cursor_path,
+ editable_region_cursor_offset,
+ &new_text,
+ &prompt_inputs.content,
+ editable_region_offset,
+ editable_region_start_line,
+ )
+ });
+
+ Ok((formatted_diff, actual_cursor))
}
#[cfg(test)]
@@ -201,7 +201,7 @@ pub async fn run_prediction(
.push(ExamplePrediction {
actual_patch: None,
actual_output: String::new(),
- actual_cursor_offset: None,
+ actual_cursor: None,
error: None,
provider,
});
@@ -323,12 +323,12 @@ async fn predict_anthropic(
.collect::<Vec<String>>()
.join("\n");
- let (actual_patch, actual_cursor_offset) = TeacherPrompt::parse(example, &actual_output)?;
+ let (actual_patch, actual_cursor) = TeacherPrompt::parse(example, &actual_output)?;
let prediction = ExamplePrediction {
actual_patch: Some(actual_patch),
actual_output,
- actual_cursor_offset,
+ actual_cursor,
error: None,
provider: if batched {
PredictionProvider::Teacher(backend)
@@ -396,12 +396,12 @@ async fn predict_openai(
.collect::<Vec<String>>()
.join("\n");
- let (actual_patch, actual_cursor_offset) = TeacherPrompt::parse(example, &actual_output)?;
+ let (actual_patch, actual_cursor) = TeacherPrompt::parse(example, &actual_output)?;
let prediction = ExamplePrediction {
actual_patch: Some(actual_patch),
actual_output,
- actual_cursor_offset,
+ actual_cursor,
error: None,
provider: if batched {
PredictionProvider::Teacher(backend)
@@ -257,12 +257,13 @@ pub async fn run_repair(
.err()
.map(|e| format!("Failed to parse repair response: {}", e));
- let (actual_patch, actual_cursor_offset) = parse_result.ok().unzip();
+ let (actual_patch, actual_cursor) = parse_result.ok().unzip();
+ let actual_cursor = actual_cursor.flatten();
example.predictions.push(ExamplePrediction {
actual_patch,
actual_output: response,
- actual_cursor_offset: actual_cursor_offset.flatten(),
+ actual_cursor,
error: err,
provider: PredictionProvider::Repair,
});
@@ -1,6 +1,6 @@
use crate::{
PredictArgs, PredictionProvider,
- example::{Example, ExampleScore},
+ example::{ActualCursor, Example, ExampleScore},
format_prompt::TeacherPrompt,
headless::EpAppState,
metrics,
@@ -159,14 +159,16 @@ pub async fn run_scoring(
// Compute cursor position metrics
let (cursor_distance, cursor_exact_match) =
- compute_cursor_metrics(best_expected_cursor, prediction.actual_cursor_offset);
+ compute_cursor_metrics(best_expected_cursor, prediction.actual_cursor.as_ref());
// Compute approximation of editable region correctness
let wrong_editable_region = Some(!metrics::is_editable_region_correct(&actual_patch));
- // Check for isolated whitespace changes
- let has_isolated_whitespace_changes =
- metrics::has_isolated_whitespace_changes(&actual_patch);
+ // Check for isolated whitespace changes.
+ let has_isolated_whitespace_changes = metrics::has_isolated_whitespace_changes(
+ &actual_patch,
+ prediction.actual_cursor.as_ref(),
+ );
scores.push(ExampleScore {
delta_chr_f: best_delta_chr_f,
@@ -187,13 +189,13 @@ pub async fn run_scoring(
}
fn compute_cursor_metrics(
- expected_cursor: Option<usize>,
- actual_cursor: Option<usize>,
+ expected_cursor_editable_region_offset: Option<usize>,
+ actual_cursor: Option<&ActualCursor>,
) -> (Option<usize>, Option<bool>) {
- match (expected_cursor, actual_cursor) {
+ match (expected_cursor_editable_region_offset, actual_cursor) {
(Some(expected), Some(actual)) => {
- let distance = expected.abs_diff(actual);
- let exact_match = expected == actual;
+ let distance = expected.abs_diff(actual.editable_region_offset.unwrap_or_default());
+ let exact_match = distance == 0;
(Some(distance), Some(exact_match))
}
(None, None) => {