metrics.rs

   1use collections::HashMap;
   2
   3use crate::{
   4    example::ActualCursor,
   5    reorder_patch::{Patch, PatchLine},
   6    word_diff::{DiffOp, diff_tokens, tokenize},
   7};
   8
   9pub type Counts = HashMap<String, usize>;
  10type CountsDelta = HashMap<String, isize>;
  11
  12/// Context characters needed on each side of a change to capture all affected n-grams
  13const CONTEXT_CHARS: usize = CHR_F_CHAR_ORDER - 1;
  14
  15#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
  16pub struct ClassificationMetrics {
  17    pub true_positives: usize,
  18    pub false_positives: usize,
  19    pub false_negatives: usize,
  20}
  21
  22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
  23pub enum TokenClass {
  24    TruePositive,
  25    FalsePositive,
  26    FalseNegative,
  27}
  28
  29#[derive(Debug, Clone, PartialEq, Eq)]
  30pub struct ClassifiedToken {
  31    pub token: String,
  32    pub class: TokenClass,
  33}
  34
  35#[derive(Debug, Default, Clone, PartialEq, Eq)]
  36pub struct TokenClassificationDetail {
  37    pub expected_tokens: Vec<ClassifiedToken>,
  38    pub actual_tokens: Vec<ClassifiedToken>,
  39}
  40
  41impl ClassificationMetrics {
  42    pub fn from_counts(expected: &Counts, actual: &Counts) -> ClassificationMetrics {
  43        let mut true_positives = 0;
  44        let mut false_positives = 0;
  45        let mut false_negatives = 0;
  46
  47        for (ngram, &expected_count) in expected {
  48            let actual_count = *actual.get(ngram).unwrap_or(&0);
  49            if actual_count > expected_count {
  50                false_positives += actual_count - expected_count;
  51            } else {
  52                false_negatives += expected_count - actual_count;
  53            }
  54            true_positives += expected_count.min(actual_count);
  55        }
  56
  57        for (ngram, &actual_count) in actual {
  58            if !expected.contains_key(ngram) {
  59                false_positives += actual_count;
  60            }
  61        }
  62
  63        ClassificationMetrics {
  64            true_positives,
  65            false_positives,
  66            false_negatives,
  67        }
  68    }
  69
  70    pub fn accumulate(&mut self, other: &ClassificationMetrics) {
  71        self.true_positives += other.true_positives;
  72        self.false_positives += other.false_positives;
  73        self.false_negatives += other.false_negatives;
  74    }
  75
  76    pub fn precision(&self) -> f64 {
  77        if self.true_positives + self.false_positives == 0 {
  78            0.0
  79        } else {
  80            self.true_positives as f64 / (self.true_positives + self.false_positives) as f64
  81        }
  82    }
  83
  84    pub fn recall(&self) -> f64 {
  85        if self.true_positives + self.false_negatives == 0 {
  86            0.0
  87        } else {
  88            self.true_positives as f64 / (self.true_positives + self.false_negatives) as f64
  89        }
  90    }
  91
  92    pub fn f1(&self) -> f64 {
  93        let precision = self.precision();
  94        let recall = self.recall();
  95        if precision + recall == 0.0 {
  96            0.0
  97        } else {
  98            2.0 * precision * recall / (precision + recall)
  99        }
 100    }
 101}
 102
 103pub fn compare_classification_metrics(
 104    left: &ClassificationMetrics,
 105    right: &ClassificationMetrics,
 106) -> std::cmp::Ordering {
 107    left.f1()
 108        .total_cmp(&right.f1())
 109        .then_with(|| left.precision().total_cmp(&right.precision()))
 110        .then_with(|| left.recall().total_cmp(&right.recall()))
 111        .then_with(|| left.true_positives.cmp(&right.true_positives))
 112        .then_with(|| right.false_positives.cmp(&left.false_positives))
 113        .then_with(|| right.false_negatives.cmp(&left.false_negatives))
 114}
 115
 116enum ChrfWhitespace {
 117    /// Preserve whitespace as-is
 118    #[allow(unused)]
 119    Unchanged,
 120
 121    /// Ignore all whitespace differences
 122    #[allow(unused)]
 123    Ignore,
 124
 125    /// Collapse whitespace into single spaces
 126    Collapse,
 127}
 128
 129const CHR_F_CHAR_ORDER: usize = 6;
 130const CHR_F_BETA: f64 = 2.0;
 131const CHR_F_WHITESPACE: ChrfWhitespace = ChrfWhitespace::Collapse;
 132
 133/// Computes a delta-chrF score that compares two sets of edits.
 134///
 135/// This metric works by:
 136/// 1. Computing n-gram count differences (deltas) between original→expected and original→actual
 137/// 2. Comparing these deltas to measure how well actual edits match expected edits
 138///
 139/// Returns a score from 0.0 to 100.0, where 100.0 means the actual edits perfectly match
 140/// the expected edits.
 141pub fn delta_chr_f(original: &str, expected: &str, actual: &str) -> f64 {
 142    // Edge case: if all texts are identical, the edits match perfectly
 143    if original == expected && expected == actual {
 144        return 100.0;
 145    }
 146
 147    // Pre-filter whitespace once for all texts
 148    let orig_chars: Vec<char> = filter_whitespace_chars(original);
 149    let exp_chars: Vec<char> = filter_whitespace_chars(expected);
 150    let act_chars: Vec<char> = filter_whitespace_chars(actual);
 151
 152    // Find the changed regions between original→expected and original→actual
 153    // We only need to compute n-grams on these regions (plus context for boundary n-grams)
 154    let (orig_for_exp, exp_region) = extract_changed_regions(&orig_chars, &exp_chars);
 155    let (orig_for_act, act_region) = extract_changed_regions(&orig_chars, &act_chars);
 156
 157    let mut total_precision = 0.0;
 158    let mut total_recall = 0.0;
 159
 160    for order in 1..=CHR_F_CHAR_ORDER {
 161        // Compute n-grams only on the affected regions
 162        let orig_ngrams_for_exp = count_ngrams_from_chars(&orig_for_exp, order);
 163        let exp_ngrams = count_ngrams_from_chars(&exp_region, order);
 164        let expected_delta = compute_ngram_delta(&exp_ngrams, &orig_ngrams_for_exp);
 165
 166        let orig_ngrams_for_act = count_ngrams_from_chars(&orig_for_act, order);
 167        let act_ngrams = count_ngrams_from_chars(&act_region, order);
 168        let actual_delta = compute_ngram_delta(&act_ngrams, &orig_ngrams_for_act);
 169
 170        if expected_delta.is_empty() && actual_delta.is_empty() {
 171            total_precision += 1.0;
 172            total_recall += 1.0;
 173            continue;
 174        }
 175
 176        let expected_counts = ngram_delta_to_counts(&expected_delta);
 177        let actual_counts = ngram_delta_to_counts(&actual_delta);
 178
 179        let score = ClassificationMetrics::from_counts(&expected_counts, &actual_counts);
 180        total_precision += score.precision();
 181        total_recall += score.recall();
 182    }
 183
 184    let prec = total_precision / CHR_F_CHAR_ORDER as f64;
 185    let recall = total_recall / CHR_F_CHAR_ORDER as f64;
 186    let f_score = if prec + recall == 0.0 {
 187        0.0
 188    } else {
 189        (1.0 + CHR_F_BETA * CHR_F_BETA) * prec * recall / (CHR_F_BETA * CHR_F_BETA * prec + recall)
 190    };
 191
 192    f_score * 100.0
 193}
 194
 195/// Reference implementation of delta_chr_f (original, non-optimized version).
 196/// Used for testing that the optimized version produces identical results.
 197#[cfg(test)]
 198fn delta_chr_f_reference(original: &str, expected: &str, actual: &str) -> f64 {
 199    if original == expected && expected == actual {
 200        return 100.0;
 201    }
 202
 203    let original_ngrams = chr_f_ngram_counts(original);
 204    let expected_ngrams = chr_f_ngram_counts(expected);
 205    let actual_ngrams = chr_f_ngram_counts(actual);
 206
 207    let mut total_precision = 0.0;
 208    let mut total_recall = 0.0;
 209
 210    for order in 0..CHR_F_CHAR_ORDER {
 211        let expected_delta = compute_ngram_delta(&expected_ngrams[order], &original_ngrams[order]);
 212        let actual_delta = compute_ngram_delta(&actual_ngrams[order], &original_ngrams[order]);
 213
 214        if expected_delta.is_empty() && actual_delta.is_empty() {
 215            total_precision += 1.0;
 216            total_recall += 1.0;
 217            continue;
 218        }
 219
 220        let expected_counts = ngram_delta_to_counts(&expected_delta);
 221        let actual_counts = ngram_delta_to_counts(&actual_delta);
 222
 223        let score = ClassificationMetrics::from_counts(&expected_counts, &actual_counts);
 224        total_precision += score.precision();
 225        total_recall += score.recall();
 226    }
 227
 228    let prec = total_precision / CHR_F_CHAR_ORDER as f64;
 229    let recall = total_recall / CHR_F_CHAR_ORDER as f64;
 230    let f_score = if prec + recall == 0.0 {
 231        0.0
 232    } else {
 233        (1.0 + CHR_F_BETA * CHR_F_BETA) * prec * recall / (CHR_F_BETA * CHR_F_BETA * prec + recall)
 234    };
 235
 236    f_score * 100.0
 237}
 238
 239/// Filter whitespace from a string and return as Vec<char>
 240fn filter_whitespace_chars(text: &str) -> Vec<char> {
 241    match CHR_F_WHITESPACE {
 242        ChrfWhitespace::Unchanged => text.chars().collect(),
 243        ChrfWhitespace::Ignore => text.chars().filter(|c| !c.is_whitespace()).collect(),
 244        ChrfWhitespace::Collapse => collapse_whitespace(text.chars()),
 245    }
 246}
 247
 248/// Collapse whitespace into single spaces.
 249/// Newlines and spaces are collapsed separately.
 250fn collapse_whitespace(chars: impl Iterator<Item = char>) -> Vec<char> {
 251    let mut result = Vec::new();
 252    let mut last_whitespace = None;
 253    for c in chars {
 254        if c.is_whitespace() && c != '\n' {
 255            if last_whitespace != Some(' ') {
 256                result.push(' ');
 257                last_whitespace = Some(' ');
 258            }
 259        } else if c == '\n' {
 260            if last_whitespace != Some('\n') {
 261                result.push(c);
 262                last_whitespace = Some('\n');
 263            }
 264        } else {
 265            result.push(c);
 266            last_whitespace = None;
 267        }
 268    }
 269    result
 270}
 271
 272/// Extract only the changed regions between two texts, with context for n-gram boundaries.
 273///
 274/// Returns (original_affected_region, modified_affected_region) as Vec<char>.
 275///
 276/// The key insight: when computing n-gram delta between two nearly-identical texts,
 277/// n-grams from unchanged regions cancel out. We only need to process:
 278/// 1. The changed content itself
 279/// 2. CONTEXT_CHARS (n-1) characters before and after, to capture boundary-crossing n-grams
 280fn extract_changed_regions(original: &[char], modified: &[char]) -> (Vec<char>, Vec<char>) {
 281    // Find longest common prefix
 282    let prefix_len = original
 283        .iter()
 284        .zip(modified.iter())
 285        .take_while(|(a, b)| a == b)
 286        .count();
 287
 288    // Find longest common suffix (that doesn't overlap with prefix)
 289    let orig_remaining = original.len().saturating_sub(prefix_len);
 290    let mod_remaining = modified.len().saturating_sub(prefix_len);
 291    let max_suffix = orig_remaining.min(mod_remaining);
 292
 293    let suffix_len = original
 294        .iter()
 295        .rev()
 296        .zip(modified.iter().rev())
 297        .take(max_suffix)
 298        .take_while(|(a, b)| a == b)
 299        .count();
 300
 301    // Calculate the changed region boundaries
 302    let orig_change_start = prefix_len;
 303    let orig_change_end = original.len().saturating_sub(suffix_len);
 304    let mod_change_start = prefix_len;
 305    let mod_change_end = modified.len().saturating_sub(suffix_len);
 306
 307    // If there's no actual change, return empty regions
 308    if orig_change_start >= orig_change_end && mod_change_start >= mod_change_end {
 309        return (Vec::new(), Vec::new());
 310    }
 311
 312    // Expand to include context for n-gram boundaries
 313    let orig_context_start = orig_change_start.saturating_sub(CONTEXT_CHARS);
 314    let orig_context_end = (orig_change_end + CONTEXT_CHARS).min(original.len());
 315    let mod_context_start = mod_change_start.saturating_sub(CONTEXT_CHARS);
 316    let mod_context_end = (mod_change_end + CONTEXT_CHARS).min(modified.len());
 317
 318    let orig_region: Vec<char> = original[orig_context_start..orig_context_end].to_vec();
 319    let mod_region: Vec<char> = modified[mod_context_start..mod_context_end].to_vec();
 320
 321    (orig_region, mod_region)
 322}
 323
 324/// Count n-grams directly from a char slice (avoids String allocation for the full text)
 325fn count_ngrams_from_chars(chars: &[char], n: usize) -> Counts {
 326    let mut counts = Counts::default();
 327
 328    if chars.len() < n {
 329        return counts;
 330    }
 331
 332    for window in chars.windows(n) {
 333        let ngram: String = window.iter().collect();
 334        *counts.entry(ngram).or_insert(0) += 1;
 335    }
 336
 337    counts
 338}
 339
 340#[allow(dead_code)]
 341fn chr_f_ngram_counts(text: &str) -> Vec<Counts> {
 342    let text = match CHR_F_WHITESPACE {
 343        ChrfWhitespace::Unchanged => text.to_string(),
 344        ChrfWhitespace::Ignore => text
 345            .chars()
 346            .filter(|c| !c.is_whitespace())
 347            .collect::<String>(),
 348        ChrfWhitespace::Collapse => collapse_whitespace(text.chars())
 349            .into_iter()
 350            .collect::<String>(),
 351    };
 352
 353    (1..=CHR_F_CHAR_ORDER)
 354        .map(|order| count_ngrams(&text, order))
 355        .collect()
 356}
 357
 358fn compute_ngram_delta(after: &Counts, before: &Counts) -> CountsDelta {
 359    let mut delta = CountsDelta::default();
 360
 361    for (ngram, &before_count) in before {
 362        let after_count = *after.get(ngram).unwrap_or(&0);
 363        delta.insert(ngram.clone(), after_count as isize - before_count as isize);
 364    }
 365
 366    for (ngram, &after_count) in after {
 367        if !before.contains_key(ngram) {
 368            delta.insert(ngram.clone(), after_count as isize);
 369        }
 370    }
 371
 372    delta
 373}
 374
 375/// Convert negative counts to special deletion tokens.
 376/// For example, if expected delta is {"foo": -1} and actual delta is {"bar": -1},
 377/// we convert it to {"¬foo": +1} and {"¬bar": +1}. This way _not_ deleting "foo"
 378/// will result in a false negative, and mistakenly deleting "bar" will result in a false positive.
 379fn ngram_delta_to_counts(delta: &CountsDelta) -> Counts {
 380    let mut counts = Counts::default();
 381
 382    for (ngram, &delta) in delta {
 383        if delta > 0 {
 384            counts.insert(ngram.clone(), delta as usize);
 385        } else if delta < 0 {
 386            counts.insert(format!("¬{ngram}"), delta.unsigned_abs());
 387        }
 388    }
 389
 390    counts
 391}
 392
 393#[allow(dead_code)]
 394fn count_ngrams(text: &str, n: usize) -> Counts {
 395    let chars: Vec<char> = text.chars().collect();
 396    let mut counts = Counts::default();
 397
 398    for window in chars.windows(n) {
 399        let ngram: String = window.iter().collect();
 400        *counts.entry(ngram).or_insert(0) += 1;
 401    }
 402
 403    counts
 404}
 405
 406pub fn braces_disbalance(text: &str) -> usize {
 407    let mut disbalance = 0isize;
 408
 409    let a = text.chars().filter(|&c| c == '{').count() as isize;
 410    let b = text.chars().filter(|&c| c == '}').count() as isize;
 411    disbalance += (a - b).abs();
 412
 413    let a = text.chars().filter(|&c| c == '(').count() as isize;
 414    let b = text.chars().filter(|&c| c == ')').count() as isize;
 415    disbalance += (a - b).abs();
 416
 417    let a = text.chars().filter(|&c| c == '[').count() as isize;
 418    let b = text.chars().filter(|&c| c == ']').count() as isize;
 419    disbalance += (a - b).abs();
 420
 421    disbalance as usize
 422}
 423
 424/// Extracts changed lines from a unified diff string.
 425/// Returns a bag (multiset) of lines that were added (+) or removed (-).
 426/// The +/- prefix is included in the line to distinguish additions from deletions.
 427pub fn extract_changed_lines_from_diff(diff: &str) -> Counts {
 428    let mut counts = Counts::default();
 429
 430    for line in diff.lines() {
 431        // Skip file headers (--- and +++)
 432        if line.starts_with("---") || line.starts_with("+++") {
 433            continue;
 434        }
 435        // Skip hunk headers (@@)
 436        if line.starts_with("@@") {
 437            continue;
 438        }
 439        // Skip diff header lines (diff --git, index, etc.)
 440        if line.starts_with("diff ") || line.starts_with("index ") {
 441            continue;
 442        }
 443        // Include added and removed lines (with their prefix)
 444        if line.starts_with('+') || line.starts_with('-') {
 445            *counts.entry(line.to_string()).or_insert(0) += 1;
 446        }
 447    }
 448
 449    counts
 450}
 451
 452/// Computes exact lines match metrics between expected and actual patches.
 453/// Treats changed lines as a bag (multiset) - order is discarded but count matters.
 454/// Returns ClassificationMetrics with TP/FP/FN counts.
 455pub fn exact_lines_match(expected_patch: &str, actual_patch: &str) -> ClassificationMetrics {
 456    let expected_lines = extract_changed_lines_from_diff(expected_patch);
 457    let actual_lines = extract_changed_lines_from_diff(actual_patch);
 458    ClassificationMetrics::from_counts(&expected_lines, &actual_lines)
 459}
 460
 461/// Returns whether the patch contains any isolated whitespace-only changes.
 462///
 463/// A whitespace-only change is an added or deleted line whose content is empty or
 464/// contains only whitespace. It is "isolated" when it is not adjacent to any
 465/// substantive (non-whitespace) change within the same contiguous change group.
 466pub fn has_isolated_whitespace_changes(patch_str: &str, cursor: Option<&ActualCursor>) -> bool {
 467    let patch = Patch::parse_unified_diff(patch_str);
 468
 469    let cursor_new_file_line = cursor.as_ref().map(|c| (c.row + 1) as usize);
 470
 471    for hunk in &patch.hunks {
 472        let lines = &hunk.lines;
 473        let mut new_text_line = hunk.new_start as usize;
 474
 475        for (i, line) in lines.iter().enumerate() {
 476            let content = match line {
 477                PatchLine::Addition(s) => {
 478                    let addition_line = new_text_line;
 479                    new_text_line += 1;
 480                    if s.trim().is_empty() && cursor_new_file_line == Some(addition_line) {
 481                        continue;
 482                    }
 483                    s.as_str()
 484                }
 485                PatchLine::Deletion(s) => s.as_str(),
 486                PatchLine::Context(_) => {
 487                    new_text_line += 1;
 488                    continue;
 489                }
 490                _ => continue,
 491            };
 492
 493            if !content.trim().is_empty() {
 494                continue;
 495            }
 496
 497            if is_whitespace_change_isolated(lines, i) {
 498                return true;
 499            }
 500        }
 501    }
 502
 503    false
 504}
 505
 506fn is_whitespace_change_isolated(lines: &[PatchLine], index: usize) -> bool {
 507    // Look backward for a non-whitespace change before hitting a context line
 508    for line in lines[..index].iter().rev() {
 509        match line {
 510            PatchLine::Addition(s) | PatchLine::Deletion(s) => {
 511                if !s.trim().is_empty() {
 512                    return false;
 513                }
 514            }
 515            _ => break,
 516        }
 517    }
 518
 519    // Look forward for a non-whitespace change before hitting a context line
 520    for line in &lines[index + 1..] {
 521        match line {
 522            PatchLine::Addition(s) | PatchLine::Deletion(s) => {
 523                if !s.trim().is_empty() {
 524                    return false;
 525                }
 526            }
 527            _ => break,
 528        }
 529    }
 530
 531    true
 532}
 533
 534/// A simple proxy for whether the prediction respects editable region.
 535pub fn is_editable_region_correct(actual_patch: &str) -> bool {
 536    // A typical sign of a wrong editable region: a bunch of lines deletion
 537    // at the beginning or end of the patch.
 538    let patch = Patch::parse_unified_diff(actual_patch);
 539    if patch.hunks.is_empty() {
 540        return true;
 541    }
 542
 543    let hunk = &patch.hunks[0];
 544    let mut deletions_at_start = 0;
 545
 546    for line in hunk.lines.iter() {
 547        match line {
 548            PatchLine::Deletion(_) => deletions_at_start += 1,
 549            _ => break,
 550        }
 551    }
 552
 553    if deletions_at_start >= 3 {
 554        return false;
 555    }
 556
 557    true
 558}
 559
 560#[derive(Debug, Default, Clone)]
 561pub struct TokenChangeCounts {
 562    pub inserted_tokens: usize,
 563    pub deleted_tokens: usize,
 564}
 565
 566fn classify_token_diff_ops(
 567    expected_tokens: &[&str],
 568    actual_tokens: &[&str],
 569) -> ClassificationMetrics {
 570    classify_token_diff_ops_detailed(expected_tokens, actual_tokens).0
 571}
 572
 573fn classify_token_diff_ops_detailed(
 574    expected_tokens: &[&str],
 575    actual_tokens: &[&str],
 576) -> (ClassificationMetrics, TokenClassificationDetail) {
 577    let mut metrics = ClassificationMetrics::default();
 578    let mut detail = TokenClassificationDetail::default();
 579
 580    for operation in diff_tokens(expected_tokens, actual_tokens) {
 581        match operation {
 582            DiffOp::Equal {
 583                old_start,
 584                old_end,
 585                new_start,
 586                new_end,
 587            } => {
 588                metrics.true_positives += old_end - old_start;
 589                for token in &expected_tokens[old_start..old_end] {
 590                    detail.expected_tokens.push(ClassifiedToken {
 591                        token: (*token).to_string(),
 592                        class: TokenClass::TruePositive,
 593                    });
 594                }
 595                for token in &actual_tokens[new_start..new_end] {
 596                    detail.actual_tokens.push(ClassifiedToken {
 597                        token: (*token).to_string(),
 598                        class: TokenClass::TruePositive,
 599                    });
 600                }
 601            }
 602            DiffOp::Delete(start, end) => {
 603                metrics.false_negatives += end - start;
 604                for token in &expected_tokens[start..end] {
 605                    detail.expected_tokens.push(ClassifiedToken {
 606                        token: (*token).to_string(),
 607                        class: TokenClass::FalseNegative,
 608                    });
 609                }
 610            }
 611            DiffOp::Insert(start, end) => {
 612                metrics.false_positives += end - start;
 613                for token in &actual_tokens[start..end] {
 614                    detail.actual_tokens.push(ClassifiedToken {
 615                        token: (*token).to_string(),
 616                        class: TokenClass::FalsePositive,
 617                    });
 618                }
 619            }
 620            DiffOp::Replace {
 621                old_start,
 622                old_end,
 623                new_start,
 624                new_end,
 625            } => {
 626                metrics.false_negatives += old_end - old_start;
 627                metrics.false_positives += new_end - new_start;
 628
 629                for token in &expected_tokens[old_start..old_end] {
 630                    detail.expected_tokens.push(ClassifiedToken {
 631                        token: (*token).to_string(),
 632                        class: TokenClass::FalseNegative,
 633                    });
 634                }
 635                for token in &actual_tokens[new_start..new_end] {
 636                    detail.actual_tokens.push(ClassifiedToken {
 637                        token: (*token).to_string(),
 638                        class: TokenClass::FalsePositive,
 639                    });
 640                }
 641            }
 642        }
 643    }
 644
 645    (metrics, detail)
 646}
 647
 648fn classify_token_texts(expected_text: &str, actual_text: &str) -> ClassificationMetrics {
 649    let expected_tokens = tokenize(expected_text);
 650    let actual_tokens = tokenize(actual_text);
 651    classify_token_diff_ops(&expected_tokens, &actual_tokens)
 652}
 653
 654fn classify_token_texts_detailed(
 655    expected_text: &str,
 656    actual_text: &str,
 657) -> (ClassificationMetrics, TokenClassificationDetail) {
 658    let expected_tokens = tokenize(expected_text);
 659    let actual_tokens = tokenize(actual_text);
 660    classify_token_diff_ops_detailed(&expected_tokens, &actual_tokens)
 661}
 662
 663fn strip_patch_line_prefix(line: &str) -> &str {
 664    line.strip_prefix('-')
 665        .or_else(|| line.strip_prefix('+'))
 666        .unwrap_or(line)
 667}
 668
 669fn extract_patch_change_blocks(patch: &str) -> Vec<(String, String)> {
 670    let mut blocks = Vec::new();
 671    let mut old_lines: Vec<&str> = Vec::new();
 672    let mut new_lines: Vec<&str> = Vec::new();
 673
 674    let flush = |old_lines: &mut Vec<&str>,
 675                 new_lines: &mut Vec<&str>,
 676                 blocks: &mut Vec<(String, String)>| {
 677        if old_lines.is_empty() && new_lines.is_empty() {
 678            return;
 679        }
 680
 681        let old_text = old_lines
 682            .iter()
 683            .map(|line| strip_patch_line_prefix(line))
 684            .collect::<Vec<_>>()
 685            .join("\n");
 686
 687        let new_text = new_lines
 688            .iter()
 689            .map(|line| strip_patch_line_prefix(line))
 690            .collect::<Vec<_>>()
 691            .join("\n");
 692
 693        blocks.push((old_text, new_text));
 694        old_lines.clear();
 695        new_lines.clear();
 696    };
 697
 698    for line in patch.lines() {
 699        if line.starts_with("---")
 700            || line.starts_with("+++")
 701            || line.starts_with("@@")
 702            || line.starts_with("diff ")
 703            || line.starts_with("index ")
 704        {
 705            flush(&mut old_lines, &mut new_lines, &mut blocks);
 706        } else if line.starts_with('-') {
 707            old_lines.push(line);
 708        } else if line.starts_with('+') {
 709            new_lines.push(line);
 710        } else {
 711            flush(&mut old_lines, &mut new_lines, &mut blocks);
 712        }
 713    }
 714
 715    flush(&mut old_lines, &mut new_lines, &mut blocks);
 716    blocks
 717}
 718
 719fn collect_patch_side_text<F>(patch: &str, mut select_side: F) -> String
 720where
 721    F: FnMut(&(String, String)) -> &str,
 722{
 723    let mut text = String::new();
 724
 725    for block in extract_patch_change_blocks(patch) {
 726        let block_text = select_side(&block);
 727        if block_text.is_empty() {
 728            continue;
 729        }
 730
 731        if !text.is_empty() {
 732            text.push('\n');
 733        }
 734        text.push_str(block_text);
 735    }
 736
 737    text
 738}
 739
 740#[derive(Debug, Clone, PartialEq, Eq)]
 741pub struct TokenMatchDebugReport {
 742    pub expected_deleted_text: String,
 743    pub actual_deleted_text: String,
 744    pub expected_inserted_text: String,
 745    pub actual_inserted_text: String,
 746    pub deleted: TokenClassificationDetail,
 747    pub inserted: TokenClassificationDetail,
 748    pub metrics: ClassificationMetrics,
 749}
 750
 751/// Computes token-match precision/recall counts between expected and actual patches.
 752///
 753/// Deletions and insertions are aligned independently, then their counts are summed.
 754/// Tokenization uses `word_diff::tokenize`, so identifiers, whitespace runs, and punctuation
 755/// are compared using the same token boundaries as the word-diff view.
 756pub fn token_match(expected_patch: &str, actual_patch: &str) -> ClassificationMetrics {
 757    token_match_debug_report(expected_patch, actual_patch).metrics
 758}
 759
 760pub fn token_match_debug_report(expected_patch: &str, actual_patch: &str) -> TokenMatchDebugReport {
 761    let expected_deleted =
 762        collect_patch_side_text(expected_patch, |(old_text, _)| old_text.as_str());
 763    let actual_deleted = collect_patch_side_text(actual_patch, |(old_text, _)| old_text.as_str());
 764    let expected_inserted =
 765        collect_patch_side_text(expected_patch, |(_, new_text)| new_text.as_str());
 766    let actual_inserted = collect_patch_side_text(actual_patch, |(_, new_text)| new_text.as_str());
 767
 768    let (mut metrics, deleted_detail) =
 769        classify_token_texts_detailed(&expected_deleted, &actual_deleted);
 770    let (inserted_metrics, inserted_detail) =
 771        classify_token_texts_detailed(&expected_inserted, &actual_inserted);
 772    metrics.accumulate(&inserted_metrics);
 773
 774    TokenMatchDebugReport {
 775        expected_deleted_text: expected_deleted,
 776        actual_deleted_text: actual_deleted,
 777        expected_inserted_text: expected_inserted,
 778        actual_inserted_text: actual_inserted,
 779        deleted: deleted_detail,
 780        inserted: inserted_detail,
 781        metrics,
 782    }
 783}
 784
 785/// Counts the number of inserted and deleted tokens in a unified diff patch.
 786///
 787/// Tokens are words and whitespace sequences (as defined by `word_diff::tokenize`).
 788/// Within each hunk, the old (`-`) and new (`+`) lines are compared at the token level
 789/// using an LCS-based diff, so modified lines only count the actually changed tokens
 790/// rather than the entire line.
 791
 792pub fn count_patch_token_changes(patch: &str) -> TokenChangeCounts {
 793    let mut counts = TokenChangeCounts::default();
 794
 795    for (old_text, new_text) in extract_patch_change_blocks(patch) {
 796        let metrics = classify_token_texts(&old_text, &new_text);
 797        counts.deleted_tokens += metrics.false_negatives;
 798        counts.inserted_tokens += metrics.false_positives;
 799    }
 800
 801    counts
 802}
 803
 804#[cfg(test)]
 805mod test_optimization {
 806    use super::*;
 807
 808    #[test]
 809    fn test_extract_changed_regions_simple() {
 810        let original: Vec<char> = "hello world".chars().collect();
 811        let modified: Vec<char> = "hello there".chars().collect();
 812
 813        let (orig_region, mod_region) = extract_changed_regions(&original, &modified);
 814
 815        // "world" vs "there" - with 5 chars context, we get "ello world" vs "ello there"
 816        // (or less if not enough chars available)
 817        assert!(orig_region.len() < original.len());
 818        assert!(mod_region.len() < modified.len());
 819    }
 820
 821    #[test]
 822    fn test_extract_changed_regions_insertion() {
 823        let original: Vec<char> = "abcdef".chars().collect();
 824        let modified: Vec<char> = "abcXYZdef".chars().collect();
 825
 826        let (orig_region, mod_region) = extract_changed_regions(&original, &modified);
 827
 828        // The insertion is between c and d, so we need context around that point
 829        assert!(orig_region.len() <= original.len());
 830        assert!(mod_region.iter().collect::<String>().contains("XYZ"));
 831    }
 832
 833    #[test]
 834    fn test_extract_changed_regions_identical() {
 835        let text: Vec<char> = "identical text".chars().collect();
 836
 837        let (orig_region, mod_region) = extract_changed_regions(&text, &text);
 838
 839        // When texts are identical, regions should be empty
 840        assert!(orig_region.is_empty());
 841        assert!(mod_region.is_empty());
 842    }
 843
 844    #[test]
 845    fn test_optimized_matches_original_score() {
 846        // Test that our optimized version produces the same results
 847        let test_cases = vec![
 848            ("hello world", "hello there", "hello world"),
 849            (
 850                "fn main() {}",
 851                "fn main() { println!(); }",
 852                "fn main() { print!(); }",
 853            ),
 854            ("abcdefghij", "abcXXXghij", "abcYYghij"),
 855            ("unchanged", "unchanged", "unchanged"),
 856            (
 857                "prefix middle suffix",
 858                "prefix CHANGED suffix",
 859                "prefix middle suffix",
 860            ),
 861        ];
 862
 863        for (original, expected, actual) in test_cases {
 864            let score = delta_chr_f(original, expected, actual);
 865            // Just verify it produces a reasonable score (0-100)
 866            assert!(
 867                score >= 0.0 && score <= 100.0,
 868                "Score {} out of range for ({}, {}, {})",
 869                score,
 870                original,
 871                expected,
 872                actual
 873            );
 874        }
 875    }
 876
 877    #[test]
 878    fn test_optimized_equals_reference() {
 879        // Comprehensive test that optimized version matches reference implementation exactly
 880        let test_cases = vec![
 881            // Basic cases
 882            ("hello world", "hello there", "hello world"),
 883            ("hello world", "hello there", "hello there"),
 884            ("unchanged", "unchanged", "unchanged"),
 885            // Code-like cases
 886            (
 887                "fn main() { println!(\"Hello\"); }",
 888                "fn main() { println!(\"Hello, World!\"); }",
 889                "fn main() { println!(\"Hello, World!\"); }",
 890            ),
 891            (
 892                "fn main() { println!(\"Hello\"); }",
 893                "fn main() { println!(\"Hello, World!\"); }",
 894                "fn main() { println!(\"Goodbye\"); }",
 895            ),
 896            // Insertion
 897            ("abcdef", "abcXYZdef", "abcdef"),
 898            ("abcdef", "abcXYZdef", "abcXYZdef"),
 899            ("abcdef", "abcXYZdef", "abcABCdef"),
 900            // Deletion
 901            ("abcXYZdef", "abcdef", "abcXYZdef"),
 902            ("abcXYZdef", "abcdef", "abcdef"),
 903            // Multiple changes (simulated by different expected/actual)
 904            ("one two three four", "one THREE four", "one two FOUR"),
 905            // Edge cases
 906            ("a", "b", "c"),
 907            ("", "abc", ""),
 908            ("abc", "", "abc"),
 909            // Longer text with small change
 910            (
 911                "This is a longer piece of text that contains many words and characters to process",
 912                "This is a longer piece of TEXT that contains many words and characters to process",
 913                "This is a longer piece of text that contains many words and characters to process",
 914            ),
 915            // Change at the beginning
 916            (
 917                "ORIGINAL start of text",
 918                "NEW start of text",
 919                "DIFFERENT start of text",
 920            ),
 921            // Change at the end
 922            (
 923                "text ending ORIGINAL",
 924                "text ending NEW",
 925                "text ending DIFFERENT",
 926            ),
 927            // Whitespace (should be ignored)
 928            ("hello   world", "hello   there", "hello   world"),
 929            ("a b c d", "a X c d", "a Y c d"),
 930        ];
 931
 932        for (original, expected, actual) in test_cases {
 933            let optimized_score = delta_chr_f(original, expected, actual);
 934            let reference_score = delta_chr_f_reference(original, expected, actual);
 935
 936            assert!(
 937                (optimized_score - reference_score).abs() < 1e-10,
 938                "Mismatch for ({:?}, {:?}, {:?}):\n  optimized: {}\n  reference: {}",
 939                original,
 940                expected,
 941                actual,
 942                optimized_score,
 943                reference_score
 944            );
 945        }
 946    }
 947}
 948
 949#[cfg(test)]
 950mod test {
 951    use super::*;
 952    use crate::example::ActualCursor;
 953    use indoc::indoc;
 954
 955    fn cursor_on_line(one_based_line: u32) -> ActualCursor {
 956        ActualCursor {
 957            path: String::new(),
 958            row: one_based_line - 1,
 959            column: 0,
 960            offset: 0,
 961            editable_region_offset: None,
 962        }
 963    }
 964
 965    #[test]
 966    fn test_delta_chr_f_perfect_match() {
 967        let original = "fn main() {    println!(\"Hello\");}";
 968        let expected = "fn main() {    println!(\"Hello, World!\");}";
 969
 970        let score = delta_chr_f(original, expected, expected);
 971        assert!((score - 100.0).abs() < 1e-2);
 972    }
 973
 974    #[test]
 975    fn test_delta_chr_f_wrong_edit() {
 976        // When the edit is wrong
 977        let original = "one two three";
 978        let expected = "one three"; // deleted "two "
 979        let actual = "one two four"; // deleted "three", added "four"
 980
 981        // Then the score should be low
 982        let score = delta_chr_f(original, expected, actual);
 983        assert!(score > 20.0 && score < 40.0);
 984    }
 985
 986    #[test]
 987    fn test_delta_chr_f_partial_match() {
 988        let original = "let x = 42;";
 989        let expected = "let x = 100;";
 990        let actual = "let x = 99;";
 991
 992        // We got the edit location right, but the replacement text is wrong.
 993        // Deleted ngrams will match, bringing the score somewhere in the middle.
 994        let score = delta_chr_f(original, expected, actual);
 995        assert!(score > 40.0 && score < 60.0);
 996    }
 997
 998    #[test]
 999    fn test_delta_chr_f_missed_edit() {
1000        // When predictions makes no changes
1001        let original = "prefix old suffix";
1002        let expected = "prefix new suffix";
1003        let actual = "prefix old suffix"; // no change
1004
1005        // Then the score should be low (all expected changes are false negatives)
1006        let score = delta_chr_f(original, expected, actual);
1007        assert!(score < 20.0);
1008    }
1009
1010    #[test]
1011    fn test_delta_chr_f_extra_edit() {
1012        // When adding unexpected content
1013        let original = "helloworld";
1014        let expected = "helloworld"; // no change expected
1015        let actual = "helloextraworld"; // added "extra"
1016
1017        // Then the score should be low (all actual changes are false positives)
1018        let score = delta_chr_f(original, expected, actual);
1019        assert!(score < 20.0);
1020    }
1021
1022    #[test]
1023    fn test_delta_chr_f_no_changes() {
1024        let text = "unchanged text";
1025        let score = delta_chr_f(text, text, text);
1026        assert!((score - 100.0).abs() < 1e-2);
1027    }
1028
1029    #[test]
1030    fn test_braces_disbalance() {
1031        let text = "let x = { 1 + 2 };";
1032        assert_eq!(braces_disbalance(text), 0);
1033
1034        let text = "let x = { 1 + 2";
1035        assert_eq!(braces_disbalance(text), 1);
1036
1037        let text = "let x = { 1 + 2 )";
1038        assert_eq!(braces_disbalance(text), 2);
1039    }
1040
1041    #[test]
1042    fn test_extract_changed_lines_from_diff() {
1043        let diff = r#"--- a/file.rs
1044+++ b/file.rs
1045@@ -1,3 +1,3 @@
1046 fn main() {
1047-    println!("hello");
1048+    println!("world");
1049 }"#;
1050
1051        let counts = extract_changed_lines_from_diff(diff);
1052        assert_eq!(counts.get("-    println!(\"hello\");"), Some(&1));
1053        assert_eq!(counts.get("+    println!(\"world\");"), Some(&1));
1054        assert_eq!(counts.len(), 2);
1055    }
1056
1057    #[test]
1058    fn test_extract_changed_lines_skips_headers() {
1059        let diff = r#"diff --git a/file.rs b/file.rs
1060index abc123..def456 100644
1061--- a/file.rs
1062+++ b/file.rs
1063@@ -1,2 +1,2 @@
1064-old line
1065+new line"#;
1066
1067        let counts = extract_changed_lines_from_diff(diff);
1068        assert_eq!(counts.get("-old line"), Some(&1));
1069        assert_eq!(counts.get("+new line"), Some(&1));
1070        assert_eq!(counts.len(), 2);
1071    }
1072
1073    #[test]
1074    fn test_exact_lines_match_perfect() {
1075        let expected = r#"--- a/file.rs
1076+++ b/file.rs
1077@@ -1,3 +1,3 @@
1078-old line 1
1079-old line 2
1080+new line 1
1081+new line 2"#;
1082
1083        let actual = r#"--- a/file.rs
1084+++ b/file.rs
1085@@ -1,3 +1,3 @@
1086-old line 1
1087-old line 2
1088+new line 1
1089+new line 2"#;
1090
1091        let metrics = exact_lines_match(expected, actual);
1092        assert_eq!(metrics.true_positives, 4);
1093        assert_eq!(metrics.false_positives, 0);
1094        assert_eq!(metrics.false_negatives, 0);
1095        assert!((metrics.precision() - 1.0).abs() < 1e-6);
1096        assert!((metrics.recall() - 1.0).abs() < 1e-6);
1097        assert!((metrics.f1() - 1.0).abs() < 1e-6);
1098    }
1099
1100    #[test]
1101    fn test_exact_lines_match_partial() {
1102        let expected = r#"-old line 1
1103-old line 2
1104+new line 1
1105+new line 2"#;
1106
1107        let actual = r#"-old line 1
1108+new line 1
1109+extra line"#;
1110
1111        let metrics = exact_lines_match(expected, actual);
1112        // TP: "-old line 1" and "+new line 1" (2)
1113        // FP: "+extra line" (1)
1114        // FN: "-old line 2" and "+new line 2" (2)
1115        assert_eq!(metrics.true_positives, 2);
1116        assert_eq!(metrics.false_positives, 1);
1117        assert_eq!(metrics.false_negatives, 2);
1118    }
1119
1120    #[test]
1121    fn test_exact_lines_match_no_overlap() {
1122        let expected = r#"-line a
1123+line b"#;
1124
1125        let actual = r#"-line x
1126+line y"#;
1127
1128        let metrics = exact_lines_match(expected, actual);
1129        assert_eq!(metrics.true_positives, 0);
1130        assert_eq!(metrics.false_positives, 2);
1131        assert_eq!(metrics.false_negatives, 2);
1132        assert!((metrics.precision()).abs() < 1e-6);
1133        assert!((metrics.recall()).abs() < 1e-6);
1134    }
1135
1136    #[test]
1137    fn test_exact_lines_match_duplicate_lines() {
1138        let expected = r#"+line a
1139+line a
1140+line a"#;
1141
1142        let actual = r#"+line a
1143+line a"#;
1144
1145        let metrics = exact_lines_match(expected, actual);
1146        // Expected has 3 "+line a", actual has 2
1147        // TP: 2, FN: 1, FP: 0
1148        assert_eq!(metrics.true_positives, 2);
1149        assert_eq!(metrics.false_positives, 0);
1150        assert_eq!(metrics.false_negatives, 1);
1151    }
1152
1153    #[test]
1154    fn test_exact_lines_match_empty_patches() {
1155        let metrics = exact_lines_match("", "");
1156        assert_eq!(metrics.true_positives, 0);
1157        assert_eq!(metrics.false_positives, 0);
1158        assert_eq!(metrics.false_negatives, 0);
1159    }
1160
1161    #[test]
1162    fn test_token_match_perfect() {
1163        let expected = indoc! {"
1164            @@ -1,2 +1,4 @@
1165            -str
1166            +struct LanguageEntry {
1167            +    path: PathBuf,
1168            +}
1169        "};
1170
1171        let actual = indoc! {"
1172            @@ -1,2 +1,4 @@
1173            -str
1174            +struct LanguageEntry {
1175            +    path: PathBuf,
1176            +}
1177        "};
1178
1179        let metrics = token_match(expected, actual);
1180        assert_eq!(metrics.false_positives, 0);
1181        assert_eq!(metrics.false_negatives, 0);
1182        assert!(metrics.true_positives > 0);
1183        assert!((metrics.precision() - 1.0).abs() < 1e-6);
1184        assert!((metrics.recall() - 1.0).abs() < 1e-6);
1185        assert!((metrics.f1() - 1.0).abs() < 1e-6);
1186    }
1187
1188    #[test]
1189    fn test_token_match_partial_subset_keeps_high_precision() {
1190        let expected = indoc! {"
1191            @@ -1,2 +1,6 @@
1192            -str
1193            +struct LanguageEntry {
1194            +    path: PathBuf,
1195            +    language: OnceCell<Language>,
1196            +    external_files: Option<Vec<PathBuf>>,
1197            +}
1198        "};
1199
1200        let actual = indoc! {"
1201            @@ -1,2 +1,3 @@
1202            -str
1203            +struct LanguageEntry {
1204            +}
1205        "};
1206
1207        let metrics = token_match(expected, actual);
1208        assert!(metrics.true_positives > 0);
1209        assert_eq!(metrics.false_positives, 0);
1210        assert!(metrics.false_negatives > 0);
1211        assert!((metrics.precision() - 1.0).abs() < 1e-6);
1212        assert!(metrics.recall() < 1.0);
1213    }
1214
1215    #[test]
1216    fn test_token_match_counts_wrong_tokens_as_fp_and_fn() {
1217        let expected = indoc! {"
1218            @@ -1,1 +1,1 @@
1219            -old_name
1220            +new_name
1221        "};
1222
1223        let actual = indoc! {"
1224            @@ -1,1 +1,1 @@
1225            -different_old
1226            +different_new
1227        "};
1228
1229        let metrics = token_match(expected, actual);
1230        assert_eq!(metrics.true_positives, 0);
1231        assert!(metrics.false_positives > 0);
1232        assert!(metrics.false_negatives > 0);
1233    }
1234
1235    #[test]
1236    fn test_token_match_debug_report_metrics_match_token_match() {
1237        let expected = indoc! {"
1238            @@ -1,2 +1,3 @@
1239            -str
1240            +struct LanguageEntry {
1241            +}
1242        "};
1243
1244        let actual = indoc! {"
1245            @@ -1,2 +1,4 @@
1246            -str
1247            +struct LanguageEntry {
1248            +    path: PathBuf,
1249            +}
1250        "};
1251
1252        let metrics = token_match(expected, actual);
1253        let report = token_match_debug_report(expected, actual);
1254
1255        assert_eq!(report.metrics, metrics);
1256
1257        let expected_tp = report
1258            .deleted
1259            .expected_tokens
1260            .iter()
1261            .chain(report.inserted.expected_tokens.iter())
1262            .filter(|token| token.class == TokenClass::TruePositive)
1263            .count();
1264        let expected_fn = report
1265            .deleted
1266            .expected_tokens
1267            .iter()
1268            .chain(report.inserted.expected_tokens.iter())
1269            .filter(|token| token.class == TokenClass::FalseNegative)
1270            .count();
1271        let actual_tp = report
1272            .deleted
1273            .actual_tokens
1274            .iter()
1275            .chain(report.inserted.actual_tokens.iter())
1276            .filter(|token| token.class == TokenClass::TruePositive)
1277            .count();
1278        let actual_fp = report
1279            .deleted
1280            .actual_tokens
1281            .iter()
1282            .chain(report.inserted.actual_tokens.iter())
1283            .filter(|token| token.class == TokenClass::FalsePositive)
1284            .count();
1285
1286        assert_eq!(expected_tp, report.metrics.true_positives);
1287        assert_eq!(actual_tp, report.metrics.true_positives);
1288        assert_eq!(expected_fn, report.metrics.false_negatives);
1289        assert_eq!(actual_fp, report.metrics.false_positives);
1290    }
1291
1292    #[test]
1293    fn test_token_match_debug_report_marks_inserted_extra_tokens_as_fp() {
1294        let expected = indoc! {"
1295            @@ -1,1 +1,1 @@
1296            -a
1297            +value
1298        "};
1299
1300        let actual = indoc! {"
1301            @@ -1,1 +1,1 @@
1302            -a
1303            +value_extra
1304        "};
1305
1306        let report = token_match_debug_report(expected, actual);
1307
1308        assert_eq!(report.metrics.false_positives, 1);
1309        assert_eq!(report.metrics.false_negatives, 1);
1310
1311        assert!(
1312            report
1313                .inserted
1314                .actual_tokens
1315                .iter()
1316                .any(|token| token.token == "value_extra"
1317                    && token.class == TokenClass::FalsePositive)
1318        );
1319        assert!(
1320            report
1321                .inserted
1322                .expected_tokens
1323                .iter()
1324                .any(|token| token.token == "value" && token.class == TokenClass::FalseNegative)
1325        );
1326    }
1327
1328    #[test]
1329    fn test_is_editable_region_correct() {
1330        let patch = indoc! {"
1331            @@ -1,1 +1,1 @@
1332            -context
1333            -removed
1334            -from the beginning of the file
1335            import sys
1336            +sys.exit(0)
1337
1338            "};
1339        assert!(!is_editable_region_correct(patch));
1340
1341        let patch = indoc! {"
1342            @@ -1,1 +1,1 @@
1343            "};
1344        assert!(is_editable_region_correct(patch));
1345    }
1346
1347    #[test]
1348    fn test_isolated_whitespace_purely_whitespace_patch() {
1349        let patch = indoc! {"
1350            @@ -1,3 +1,4 @@
1351             fn main() {
1352            +
1353                 println!(\"hello\");
1354             }
1355        "};
1356        assert!(has_isolated_whitespace_changes(patch, None));
1357    }
1358
1359    #[test]
1360    fn test_isolated_whitespace_adjacent_to_real_change() {
1361        let patch = indoc! {"
1362            @@ -1,3 +1,4 @@
1363             fn main() {
1364            +
1365            +    let x = 1;
1366                 println!(\"hello\");
1367             }
1368        "};
1369        assert!(!has_isolated_whitespace_changes(patch, None));
1370    }
1371
1372    #[test]
1373    fn test_isolated_whitespace_no_whitespace_changes() {
1374        let patch = indoc! {"
1375            @@ -1,3 +1,3 @@
1376             fn main() {
1377            -    println!(\"hello\");
1378            +    println!(\"world\");
1379             }
1380        "};
1381        assert!(!has_isolated_whitespace_changes(patch, None));
1382    }
1383
1384    #[test]
1385    fn test_isolated_whitespace_deletion() {
1386        let patch = indoc! {"
1387            @@ -1,4 +1,3 @@
1388             fn main() {
1389            -
1390                 println!(\"hello\");
1391             }
1392        "};
1393        assert!(has_isolated_whitespace_changes(patch, None));
1394    }
1395
1396    #[test]
1397    fn test_isolated_whitespace_mixed_groups() {
1398        let patch = indoc! {"
1399            @@ -1,7 +1,8 @@
1400             fn main() {
1401            +
1402                 let x = 1;
1403            -    let y = 2;
1404            +    let y = 3;
1405
1406            +
1407                 println!(\"hello\");
1408             }
1409        "};
1410        assert!(has_isolated_whitespace_changes(patch, None));
1411    }
1412
1413    #[test]
1414    fn test_isolated_whitespace_empty_patch() {
1415        let patch = "";
1416        assert!(!has_isolated_whitespace_changes(patch, None));
1417    }
1418
1419    #[test]
1420    fn test_isolated_whitespace_skipped_on_cursor_line() {
1421        // The addition of a blank line at new-file line 2 should be skipped
1422        // because the cursor is on that line.
1423        let patch = indoc! {"
1424            @@ -1,3 +1,4 @@
1425             fn main() {
1426            +
1427                 println!(\"hello\");
1428             }
1429        "};
1430        // New-file line 2 is the added blank line
1431        let cursor = cursor_on_line(2);
1432        assert!(!has_isolated_whitespace_changes(patch, Some(&cursor)));
1433    }
1434
1435    #[test]
1436    fn test_isolated_whitespace_not_skipped_when_cursor_on_different_line() {
1437        // The blank line is at new-file line 2, but the cursor is on line 1.
1438        let patch = indoc! {"
1439            @@ -1,3 +1,4 @@
1440             fn main() {
1441            +
1442                 println!(\"hello\");
1443             }
1444        "};
1445        let cursor = cursor_on_line(1);
1446        assert!(has_isolated_whitespace_changes(patch, Some(&cursor)));
1447    }
1448
1449    #[test]
1450    fn test_isolated_whitespace_deletion_not_skipped_by_cursor() {
1451        // Deletions don't have a new-file line, so cursor can't suppress them.
1452        let patch = indoc! {"
1453            @@ -1,4 +1,3 @@
1454             fn main() {
1455            -
1456                 println!(\"hello\");
1457             }
1458        "};
1459        let cursor = cursor_on_line(2);
1460        assert!(has_isolated_whitespace_changes(patch, Some(&cursor)));
1461    }
1462
1463    #[test]
1464    fn test_count_patch_token_changes_real_world_rename() {
1465        // Real-world patch that was reported as returning 0 tokens
1466        let patch = "--- a/sip_call\\README.md\n+++ b/sip_call\\README.md\n@@ -1,1 +1,1 @@\n-# \n+# SIP Call\n";
1467        let counts = count_patch_token_changes(patch);
1468        // "# " vs "# SIP Call" — the "SIP" and "Call" tokens (and a whitespace token) are inserted
1469        assert!(
1470            counts.inserted_tokens > 0,
1471            "expected inserted tokens > 0, got {}",
1472            counts.inserted_tokens
1473        );
1474        assert_eq!(counts.deleted_tokens, 0);
1475    }
1476
1477    #[test]
1478    fn test_count_patch_token_changes_real_world_expansion() {
1479        // Real-world patch: single token expanded to multiple lines
1480        let patch = "--- a/task1/src/app/app.html\n+++ b/task1/src/app/app.html\n@@ -1,7 +1,9 @@\n <style>\n-  m\n+  main {\n+    \n+  }\n </style>\n \n <main>\n   \n </main>\n";
1481        let counts = count_patch_token_changes(patch);
1482        assert!(
1483            counts.inserted_tokens > 0,
1484            "expected inserted tokens > 0, got {}",
1485            counts.inserted_tokens
1486        );
1487        assert!(
1488            counts.deleted_tokens > 0,
1489            "expected deleted tokens > 0, got {}",
1490            counts.deleted_tokens
1491        );
1492    }
1493
1494    #[test]
1495    fn test_count_patch_token_changes_simple_replacement() {
1496        let patch = indoc! {"
1497            @@ -1,3 +1,3 @@
1498             fn main() {
1499            -    println!(\"hello\");
1500            +    println!(\"world\");
1501             }
1502        "};
1503        let counts = count_patch_token_changes(patch);
1504        assert_eq!(counts.deleted_tokens, 1, "deleted: \"hello\"");
1505        assert_eq!(counts.inserted_tokens, 1, "inserted: \"world\"");
1506    }
1507
1508    #[test]
1509    fn test_count_patch_token_changes_insertion_only() {
1510        let patch = indoc! {"
1511            @@ -1,2 +1,3 @@
1512             fn main() {
1513            +    println!(\"hello\");
1514             }
1515        "};
1516        let counts = count_patch_token_changes(patch);
1517        assert_eq!(counts.deleted_tokens, 0);
1518        assert!(counts.inserted_tokens > 0);
1519    }
1520
1521    #[test]
1522    fn test_count_patch_token_changes_deletion_only() {
1523        let patch = indoc! {"
1524            @@ -1,3 +1,2 @@
1525             fn main() {
1526            -    println!(\"hello\");
1527             }
1528        "};
1529        let counts = count_patch_token_changes(patch);
1530        assert!(counts.deleted_tokens > 0);
1531        assert_eq!(counts.inserted_tokens, 0);
1532    }
1533
1534    #[test]
1535    fn test_count_patch_token_changes_empty_patch() {
1536        let patch = "";
1537        let counts = count_patch_token_changes(patch);
1538        assert_eq!(counts.deleted_tokens, 0);
1539        assert_eq!(counts.inserted_tokens, 0);
1540    }
1541
1542    #[test]
1543    fn test_count_patch_token_changes_multiple_hunks() {
1544        let patch = indoc! {"
1545            @@ -1,3 +1,3 @@
1546             fn main() {
1547            -    let x = 1;
1548            +    let x = 2;
1549             }
1550            @@ -10,3 +10,3 @@
1551             fn other() {
1552            -    let y = 3;
1553            +    let y = 4;
1554             }
1555        "};
1556        let counts = count_patch_token_changes(patch);
1557        assert_eq!(counts.deleted_tokens, 2, "deleted: \"1\" and \"3\"");
1558        assert_eq!(counts.inserted_tokens, 2, "inserted: \"2\" and \"4\"");
1559    }
1560
1561    #[test]
1562    fn test_count_patch_token_changes_multiword_change() {
1563        let patch = indoc! {"
1564            @@ -1,1 +1,1 @@
1565            -hello world foo
1566            +hello bar baz
1567        "};
1568        let counts = count_patch_token_changes(patch);
1569        // "world" and "foo" deleted, "bar" and "baz" inserted
1570        // (whitespace tokens between them may also count)
1571        assert!(counts.deleted_tokens >= 2);
1572        assert!(counts.inserted_tokens >= 2);
1573    }
1574
1575    #[test]
1576    fn test_whitespace_collapse() {
1577        let text = "abc   \n\n\n   123";
1578        let collapsed = collapse_whitespace(text.chars());
1579        assert_eq!(
1580            collapsed,
1581            vec!['a', 'b', 'c', ' ', '\n', ' ', '1', '2', '3']
1582        );
1583    }
1584}