ep: Meaure isolated whitespace change ratio (#48352)

Oleksiy Syvokon created

Release Notes:

- N/A

Change summary

crates/edit_prediction_cli/src/example.rs |   2 
crates/edit_prediction_cli/src/metrics.rs | 129 +++++++++++++++++++++++++
crates/edit_prediction_cli/src/score.rs   |  41 +++++++
3 files changed, 172 insertions(+)

Detailed changes

crates/edit_prediction_cli/src/example.rs 🔗

@@ -117,6 +117,8 @@ pub struct ExampleScore {
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub cursor_exact_match: Option<bool>,
     pub wrong_editable_region: Option<bool>,
+    #[serde(default)]
+    pub has_isolated_whitespace_changes: bool,
 }
 
 impl Example {

crates/edit_prediction_cli/src/metrics.rs 🔗

@@ -384,6 +384,63 @@ pub fn exact_lines_match(expected_patch: &str, actual_patch: &str) -> Classifica
     ClassificationMetrics::from_counts(&expected_lines, &actual_lines)
 }
 
+/// Returns whether the patch contains any isolated whitespace-only changes.
+///
+/// A whitespace-only change is an added or deleted line whose content is empty or
+/// contains only whitespace. It is "isolated" when it is not adjacent to any
+/// substantive (non-whitespace) change within the same contiguous change group.
+pub fn has_isolated_whitespace_changes(patch_str: &str) -> bool {
+    let patch = Patch::parse_unified_diff(patch_str);
+
+    for hunk in &patch.hunks {
+        let lines = &hunk.lines;
+        for (i, line) in lines.iter().enumerate() {
+            let content = match line {
+                PatchLine::Addition(s) | PatchLine::Deletion(s) => s.as_str(),
+                _ => continue,
+            };
+
+            if !content.trim().is_empty() {
+                continue;
+            }
+
+            if is_whitespace_change_isolated(lines, i) {
+                return true;
+            }
+        }
+    }
+
+    false
+}
+
+fn is_whitespace_change_isolated(lines: &[PatchLine], index: usize) -> bool {
+    // Look backward for a non-whitespace change before hitting a context line
+    for line in lines[..index].iter().rev() {
+        match line {
+            PatchLine::Addition(s) | PatchLine::Deletion(s) => {
+                if !s.trim().is_empty() {
+                    return false;
+                }
+            }
+            _ => break,
+        }
+    }
+
+    // Look forward for a non-whitespace change before hitting a context line
+    for line in &lines[index + 1..] {
+        match line {
+            PatchLine::Addition(s) | PatchLine::Deletion(s) => {
+                if !s.trim().is_empty() {
+                    return false;
+                }
+            }
+            _ => break,
+        }
+    }
+
+    true
+}
+
 /// A simple proxy for whether the prediction respects editable region.
 pub fn is_editable_region_correct(actual_patch: &str) -> bool {
     // A typical sign of a wrong editable region: a bunch of lines deletion
@@ -774,4 +831,76 @@ index abc123..def456 100644
             "};
         assert!(is_editable_region_correct(patch));
     }
+
+    #[test]
+    fn test_isolated_whitespace_purely_whitespace_patch() {
+        let patch = indoc! {"
+            @@ -1,3 +1,4 @@
+             fn main() {
+            +
+                 println!(\"hello\");
+             }
+        "};
+        assert!(has_isolated_whitespace_changes(patch));
+    }
+
+    #[test]
+    fn test_isolated_whitespace_adjacent_to_real_change() {
+        let patch = indoc! {"
+            @@ -1,3 +1,4 @@
+             fn main() {
+            +
+            +    let x = 1;
+                 println!(\"hello\");
+             }
+        "};
+        assert!(!has_isolated_whitespace_changes(patch));
+    }
+
+    #[test]
+    fn test_isolated_whitespace_no_whitespace_changes() {
+        let patch = indoc! {"
+            @@ -1,3 +1,3 @@
+             fn main() {
+            -    println!(\"hello\");
+            +    println!(\"world\");
+             }
+        "};
+        assert!(!has_isolated_whitespace_changes(patch));
+    }
+
+    #[test]
+    fn test_isolated_whitespace_deletion() {
+        let patch = indoc! {"
+            @@ -1,4 +1,3 @@
+             fn main() {
+            -
+                 println!(\"hello\");
+             }
+        "};
+        assert!(has_isolated_whitespace_changes(patch));
+    }
+
+    #[test]
+    fn test_isolated_whitespace_mixed_groups() {
+        let patch = indoc! {"
+            @@ -1,7 +1,8 @@
+             fn main() {
+            +
+                 let x = 1;
+            -    let y = 2;
+            +    let y = 3;
+
+            +
+                 println!(\"hello\");
+             }
+        "};
+        assert!(has_isolated_whitespace_changes(patch));
+    }
+
+    #[test]
+    fn test_isolated_whitespace_empty_patch() {
+        let patch = "";
+        assert!(!has_isolated_whitespace_changes(patch));
+    }
 }

crates/edit_prediction_cli/src/score.rs 🔗

@@ -75,6 +75,7 @@ pub async fn run_scoring(
         cursor_distance: None,
         cursor_exact_match: None,
         wrong_editable_region: None,
+        has_isolated_whitespace_changes: false,
     };
 
     let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
@@ -163,6 +164,10 @@ pub async fn run_scoring(
         // Compute approximation of editable region correctness
         let wrong_editable_region = Some(!metrics::is_editable_region_correct(&actual_patch));
 
+        // Check for isolated whitespace changes
+        let has_isolated_whitespace_changes =
+            metrics::has_isolated_whitespace_changes(&actual_patch);
+
         scores.push(ExampleScore {
             delta_chr_f: best_delta_chr_f,
             braces_disbalance,
@@ -173,6 +178,7 @@ pub async fn run_scoring(
             cursor_distance,
             cursor_exact_match,
             wrong_editable_region,
+            has_isolated_whitespace_changes,
         });
     }
 
@@ -229,6 +235,7 @@ pub fn print_report(examples: &[Example]) {
     let mut cursor_distance_count: usize = 0;
     let mut wrong_editable_region_count: usize = 0;
     let mut wrong_editable_region_total: usize = 0;
+    let mut isolated_whitespace_count: usize = 0;
 
     for example in examples {
         for (score_idx, score) in example.score.iter().enumerate() {
@@ -307,6 +314,11 @@ pub fn print_report(examples: &[Example]) {
                 }
             }
 
+            // Accumulate isolated whitespace metrics
+            if score.has_isolated_whitespace_changes {
+                isolated_whitespace_count += 1;
+            }
+
             // Accumulate cursor metrics
             if let Some(exact_match) = score.cursor_exact_match {
                 cursor_total += 1;
@@ -362,6 +374,16 @@ pub fn print_report(examples: &[Example]) {
         } else {
             "-".to_string()
         };
+        let isolated_ws_str = if total_scores > 0 {
+            format!(
+                "{}/{} ({:.1}%)",
+                isolated_whitespace_count,
+                total_scores,
+                isolated_whitespace_count as f32 / total_scores as f32 * 100.0
+            )
+        } else {
+            "-".to_string()
+        };
         let avg_cursor_distance = if cursor_distance_count > 0 {
             Some(cursor_distance_sum as f32 / cursor_distance_count as f32)
         } else {
@@ -392,6 +414,11 @@ pub fn print_report(examples: &[Example]) {
                 avg_dist
             );
         }
+
+        // Print isolated whitespace metrics
+        if total_scores > 0 {
+            println!("Isolated whitespace changes: {}", isolated_ws_str);
+        }
     }
 
     println!("\n");
@@ -429,6 +456,7 @@ pub struct SummaryJson {
     pub cursor_total_evaluated: Option<usize>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub wrong_editable_region_rate: Option<f32>,
+    pub isolated_whitespace_rate: Option<f32>,
 }
 
 pub fn compute_summary(examples: &[Example]) -> SummaryJson {
@@ -449,6 +477,7 @@ pub fn compute_summary(examples: &[Example]) -> SummaryJson {
     let mut cursor_distance_count: usize = 0;
     let mut wrong_editable_region_count: usize = 0;
     let mut wrong_editable_region_total: usize = 0;
+    let mut isolated_whitespace_count: usize = 0;
 
     for example in examples {
         for (score_idx, score) in example.score.iter().enumerate() {
@@ -482,6 +511,11 @@ pub fn compute_summary(examples: &[Example]) -> SummaryJson {
                 }
             }
 
+            // Accumulate isolated whitespace metrics
+            if score.has_isolated_whitespace_changes {
+                isolated_whitespace_count += 1;
+            }
+
             // Accumulate cursor metrics
             if let Some(exact_match) = score.cursor_exact_match {
                 cursor_total += 1;
@@ -550,6 +584,12 @@ pub fn compute_summary(examples: &[Example]) -> SummaryJson {
         None
     };
 
+    let isolated_whitespace_rate = if total_scores > 0 {
+        Some(isolated_whitespace_count as f32 / total_scores as f32)
+    } else {
+        None
+    };
+
     SummaryJson {
         total_examples: total_scores,
         avg_delta_chr_f,
@@ -567,6 +607,7 @@ pub fn compute_summary(examples: &[Example]) -> SummaryJson {
         cursor_avg_distance,
         cursor_total_evaluated,
         wrong_editable_region_rate,
+        isolated_whitespace_rate,
     }
 }