diff --git a/crates/edit_prediction_cli/src/example.rs b/crates/edit_prediction_cli/src/example.rs index 42f67e91951d733d01af9c24d2682af5f663319e..275646f5a28b62a0770cbfd54a02af94506aff52 100644 --- a/crates/edit_prediction_cli/src/example.rs +++ b/crates/edit_prediction_cli/src/example.rs @@ -117,6 +117,8 @@ pub struct ExampleScore { #[serde(default, skip_serializing_if = "Option::is_none")] pub cursor_exact_match: Option, pub wrong_editable_region: Option, + #[serde(default)] + pub has_isolated_whitespace_changes: bool, } impl Example { diff --git a/crates/edit_prediction_cli/src/metrics.rs b/crates/edit_prediction_cli/src/metrics.rs index e6f1d49e19742a2a9acb70397a460a98c13b16c2..a61565921f168d51fa47ae092692a454868bb06b 100644 --- a/crates/edit_prediction_cli/src/metrics.rs +++ b/crates/edit_prediction_cli/src/metrics.rs @@ -384,6 +384,63 @@ pub fn exact_lines_match(expected_patch: &str, actual_patch: &str) -> Classifica ClassificationMetrics::from_counts(&expected_lines, &actual_lines) } +/// Returns whether the patch contains any isolated whitespace-only changes. +/// +/// A whitespace-only change is an added or deleted line whose content is empty or +/// contains only whitespace. It is "isolated" when it is not adjacent to any +/// substantive (non-whitespace) change within the same contiguous change group. +pub fn has_isolated_whitespace_changes(patch_str: &str) -> bool { + let patch = Patch::parse_unified_diff(patch_str); + + for hunk in &patch.hunks { + let lines = &hunk.lines; + for (i, line) in lines.iter().enumerate() { + let content = match line { + PatchLine::Addition(s) | PatchLine::Deletion(s) => s.as_str(), + _ => continue, + }; + + if !content.trim().is_empty() { + continue; + } + + if is_whitespace_change_isolated(lines, i) { + return true; + } + } + } + + false +} + +fn is_whitespace_change_isolated(lines: &[PatchLine], index: usize) -> bool { + // Look backward for a non-whitespace change before hitting a context line + for line in lines[..index].iter().rev() { + match line { + PatchLine::Addition(s) | PatchLine::Deletion(s) => { + if !s.trim().is_empty() { + return false; + } + } + _ => break, + } + } + + // Look forward for a non-whitespace change before hitting a context line + for line in &lines[index + 1..] { + match line { + PatchLine::Addition(s) | PatchLine::Deletion(s) => { + if !s.trim().is_empty() { + return false; + } + } + _ => break, + } + } + + true +} + /// A simple proxy for whether the prediction respects editable region. pub fn is_editable_region_correct(actual_patch: &str) -> bool { // A typical sign of a wrong editable region: a bunch of lines deletion @@ -774,4 +831,76 @@ index abc123..def456 100644 "}; assert!(is_editable_region_correct(patch)); } + + #[test] + fn test_isolated_whitespace_purely_whitespace_patch() { + let patch = indoc! {" + @@ -1,3 +1,4 @@ + fn main() { + + + println!(\"hello\"); + } + "}; + assert!(has_isolated_whitespace_changes(patch)); + } + + #[test] + fn test_isolated_whitespace_adjacent_to_real_change() { + let patch = indoc! {" + @@ -1,3 +1,4 @@ + fn main() { + + + + let x = 1; + println!(\"hello\"); + } + "}; + assert!(!has_isolated_whitespace_changes(patch)); + } + + #[test] + fn test_isolated_whitespace_no_whitespace_changes() { + let patch = indoc! {" + @@ -1,3 +1,3 @@ + fn main() { + - println!(\"hello\"); + + println!(\"world\"); + } + "}; + assert!(!has_isolated_whitespace_changes(patch)); + } + + #[test] + fn test_isolated_whitespace_deletion() { + let patch = indoc! {" + @@ -1,4 +1,3 @@ + fn main() { + - + println!(\"hello\"); + } + "}; + assert!(has_isolated_whitespace_changes(patch)); + } + + #[test] + fn test_isolated_whitespace_mixed_groups() { + let patch = indoc! {" + @@ -1,7 +1,8 @@ + fn main() { + + + let x = 1; + - let y = 2; + + let y = 3; + + + + println!(\"hello\"); + } + "}; + assert!(has_isolated_whitespace_changes(patch)); + } + + #[test] + fn test_isolated_whitespace_empty_patch() { + let patch = ""; + assert!(!has_isolated_whitespace_changes(patch)); + } } diff --git a/crates/edit_prediction_cli/src/score.rs b/crates/edit_prediction_cli/src/score.rs index 78a13095f6c5e1dd4dfff74c727c099aeb6cc598..f4af865bbd2714b5e27f5bc27332f5790766a119 100644 --- a/crates/edit_prediction_cli/src/score.rs +++ b/crates/edit_prediction_cli/src/score.rs @@ -75,6 +75,7 @@ pub async fn run_scoring( cursor_distance: None, cursor_exact_match: None, wrong_editable_region: None, + has_isolated_whitespace_changes: false, }; let prompt_inputs = example.prompt_inputs.as_ref().unwrap(); @@ -163,6 +164,10 @@ pub async fn run_scoring( // Compute approximation of editable region correctness let wrong_editable_region = Some(!metrics::is_editable_region_correct(&actual_patch)); + // Check for isolated whitespace changes + let has_isolated_whitespace_changes = + metrics::has_isolated_whitespace_changes(&actual_patch); + scores.push(ExampleScore { delta_chr_f: best_delta_chr_f, braces_disbalance, @@ -173,6 +178,7 @@ pub async fn run_scoring( cursor_distance, cursor_exact_match, wrong_editable_region, + has_isolated_whitespace_changes, }); } @@ -229,6 +235,7 @@ pub fn print_report(examples: &[Example]) { let mut cursor_distance_count: usize = 0; let mut wrong_editable_region_count: usize = 0; let mut wrong_editable_region_total: usize = 0; + let mut isolated_whitespace_count: usize = 0; for example in examples { for (score_idx, score) in example.score.iter().enumerate() { @@ -307,6 +314,11 @@ pub fn print_report(examples: &[Example]) { } } + // Accumulate isolated whitespace metrics + if score.has_isolated_whitespace_changes { + isolated_whitespace_count += 1; + } + // Accumulate cursor metrics if let Some(exact_match) = score.cursor_exact_match { cursor_total += 1; @@ -362,6 +374,16 @@ pub fn print_report(examples: &[Example]) { } else { "-".to_string() }; + let isolated_ws_str = if total_scores > 0 { + format!( + "{}/{} ({:.1}%)", + isolated_whitespace_count, + total_scores, + isolated_whitespace_count as f32 / total_scores as f32 * 100.0 + ) + } else { + "-".to_string() + }; let avg_cursor_distance = if cursor_distance_count > 0 { Some(cursor_distance_sum as f32 / cursor_distance_count as f32) } else { @@ -392,6 +414,11 @@ pub fn print_report(examples: &[Example]) { avg_dist ); } + + // Print isolated whitespace metrics + if total_scores > 0 { + println!("Isolated whitespace changes: {}", isolated_ws_str); + } } println!("\n"); @@ -429,6 +456,7 @@ pub struct SummaryJson { pub cursor_total_evaluated: Option, #[serde(skip_serializing_if = "Option::is_none")] pub wrong_editable_region_rate: Option, + pub isolated_whitespace_rate: Option, } pub fn compute_summary(examples: &[Example]) -> SummaryJson { @@ -449,6 +477,7 @@ pub fn compute_summary(examples: &[Example]) -> SummaryJson { let mut cursor_distance_count: usize = 0; let mut wrong_editable_region_count: usize = 0; let mut wrong_editable_region_total: usize = 0; + let mut isolated_whitespace_count: usize = 0; for example in examples { for (score_idx, score) in example.score.iter().enumerate() { @@ -482,6 +511,11 @@ pub fn compute_summary(examples: &[Example]) -> SummaryJson { } } + // Accumulate isolated whitespace metrics + if score.has_isolated_whitespace_changes { + isolated_whitespace_count += 1; + } + // Accumulate cursor metrics if let Some(exact_match) = score.cursor_exact_match { cursor_total += 1; @@ -550,6 +584,12 @@ pub fn compute_summary(examples: &[Example]) -> SummaryJson { None }; + let isolated_whitespace_rate = if total_scores > 0 { + Some(isolated_whitespace_count as f32 / total_scores as f32) + } else { + None + }; + SummaryJson { total_examples: total_scores, avg_delta_chr_f, @@ -567,6 +607,7 @@ pub fn compute_summary(examples: &[Example]) -> SummaryJson { cursor_avg_distance, cursor_total_evaluated, wrong_editable_region_rate, + isolated_whitespace_rate, } }