patch_metrics.rs

   1use std::collections::HashMap;
   2
   3use crate::tokenize::tokenize;
   4use serde::Serialize;
   5use similar::{DiffTag, TextDiff};
   6
   7pub type Counts = HashMap<String, usize>;
   8type CountsDelta = HashMap<String, isize>;
   9
  10/// Context characters needed on each side of a change to capture all affected n-grams
  11const CONTEXT_CHARS: usize = CHR_F_CHAR_ORDER - 1;
  12
  13#[derive(Default, Debug, Clone, Serialize)]
  14pub struct ClassificationMetrics {
  15    pub true_positives: usize,
  16    pub false_positives: usize,
  17    pub false_negatives: usize,
  18}
  19
  20impl ClassificationMetrics {
  21    pub fn from_counts(expected: &Counts, actual: &Counts) -> ClassificationMetrics {
  22        let mut true_positives = 0;
  23        let mut false_positives = 0;
  24        let mut false_negatives = 0;
  25
  26        for (ngram, &expected_count) in expected {
  27            let actual_count = *actual.get(ngram).unwrap_or(&0);
  28            if actual_count > expected_count {
  29                false_positives += actual_count - expected_count;
  30            } else {
  31                false_negatives += expected_count - actual_count;
  32            }
  33            true_positives += expected_count.min(actual_count);
  34        }
  35
  36        for (ngram, &actual_count) in actual {
  37            if !expected.contains_key(ngram) {
  38                false_positives += actual_count;
  39            }
  40        }
  41
  42        ClassificationMetrics {
  43            true_positives,
  44            false_positives,
  45            false_negatives,
  46        }
  47    }
  48
  49    pub fn accumulate(&mut self, other: &ClassificationMetrics) {
  50        self.true_positives += other.true_positives;
  51        self.false_positives += other.false_positives;
  52        self.false_negatives += other.false_negatives;
  53    }
  54
  55    pub fn precision(&self) -> f64 {
  56        if self.true_positives + self.false_positives == 0 {
  57            0.0
  58        } else {
  59            self.true_positives as f64 / (self.true_positives + self.false_positives) as f64
  60        }
  61    }
  62
  63    pub fn recall(&self) -> f64 {
  64        if self.true_positives + self.false_negatives == 0 {
  65            0.0
  66        } else {
  67            self.true_positives as f64 / (self.true_positives + self.false_negatives) as f64
  68        }
  69    }
  70
  71    pub fn f1(&self) -> f64 {
  72        let precision = self.precision();
  73        let recall = self.recall();
  74        if precision + recall == 0.0 {
  75            0.0
  76        } else {
  77            2.0 * precision * recall / (precision + recall)
  78        }
  79    }
  80}
  81
  82enum ChrfWhitespace {
  83    /// Preserve whitespace as-is
  84    #[allow(unused)]
  85    Unchanged,
  86
  87    /// Ignore all whitespace differences
  88    #[allow(unused)]
  89    Ignore,
  90
  91    /// Collapse whitespace into single spaces
  92    Collapse,
  93}
  94
  95const CHR_F_CHAR_ORDER: usize = 6;
  96const CHR_F_BETA: f64 = 0.5;
  97const CHR_F_WHITESPACE: ChrfWhitespace = ChrfWhitespace::Collapse;
  98
  99pub fn delta_chr_f_beta() -> f64 {
 100    CHR_F_BETA
 101}
 102
 103#[derive(Default, Debug, Clone, Serialize)]
 104pub struct DeltaChrFMetrics {
 105    pub score: f64,
 106    pub beta: f64,
 107    pub counts: ClassificationMetrics,
 108    pub precision: f64,
 109    pub recall: f64,
 110}
 111
 112/// Computes delta-chrF metrics that compare two sets of edits.
 113///
 114/// This metric works by:
 115/// 1. Computing n-gram count differences (deltas) between original→expected and original→actual
 116/// 2. Comparing these deltas to measure how well actual edits match expected edits
 117///
 118/// Returns a score from 0.0 to 100.0, where 100.0 means the actual edits perfectly match
 119/// the expected edits.
 120pub fn delta_chr_f(original: &str, expected: &str, actual: &str) -> DeltaChrFMetrics {
 121    if original == expected && expected == actual {
 122        return DeltaChrFMetrics {
 123            score: 100.0,
 124            beta: CHR_F_BETA,
 125            precision: 1.0,
 126            recall: 1.0,
 127            ..DeltaChrFMetrics::default()
 128        };
 129    }
 130
 131    let orig_chars: Vec<char> = filter_whitespace_chars(original);
 132    let exp_chars: Vec<char> = filter_whitespace_chars(expected);
 133    let act_chars: Vec<char> = filter_whitespace_chars(actual);
 134
 135    // Find the changed regions between original→expected and original→actual
 136    // We only need to compute n-grams on these regions (plus context for boundary n-grams)
 137    let (orig_for_exp, exp_region) = extract_changed_regions(&orig_chars, &exp_chars);
 138    let (orig_for_act, act_region) = extract_changed_regions(&orig_chars, &act_chars);
 139
 140    let mut total_precision = 0.0;
 141    let mut total_recall = 0.0;
 142    let mut total_counts = ClassificationMetrics::default();
 143
 144    for order in 1..=CHR_F_CHAR_ORDER {
 145        let orig_ngrams_for_exp = count_ngrams_from_chars(&orig_for_exp, order);
 146        let exp_ngrams = count_ngrams_from_chars(&exp_region, order);
 147        let expected_delta = compute_ngram_delta(&exp_ngrams, &orig_ngrams_for_exp);
 148
 149        let orig_ngrams_for_act = count_ngrams_from_chars(&orig_for_act, order);
 150        let act_ngrams = count_ngrams_from_chars(&act_region, order);
 151        let actual_delta = compute_ngram_delta(&act_ngrams, &orig_ngrams_for_act);
 152
 153        if expected_delta.is_empty() && actual_delta.is_empty() {
 154            total_precision += 1.0;
 155            total_recall += 1.0;
 156            continue;
 157        }
 158
 159        let expected_counts = ngram_delta_to_counts(&expected_delta);
 160        let actual_counts = ngram_delta_to_counts(&actual_delta);
 161
 162        let counts = ClassificationMetrics::from_counts(&expected_counts, &actual_counts);
 163        total_precision += counts.precision();
 164        total_recall += counts.recall();
 165        total_counts.accumulate(&counts);
 166    }
 167
 168    let average_precision = total_precision / CHR_F_CHAR_ORDER as f64;
 169    let average_recall = total_recall / CHR_F_CHAR_ORDER as f64;
 170    let score = if average_precision + average_recall == 0.0 {
 171        0.0
 172    } else {
 173        (1.0 + CHR_F_BETA * CHR_F_BETA) * average_precision * average_recall
 174            / (CHR_F_BETA * CHR_F_BETA * average_precision + average_recall)
 175            * 100.0
 176    };
 177
 178    DeltaChrFMetrics {
 179        score,
 180        beta: CHR_F_BETA,
 181        counts: total_counts,
 182        precision: average_precision,
 183        recall: average_recall,
 184    }
 185}
 186
 187/// Reference implementation of delta-chrF metrics (original, non-optimized version).
 188/// Used for testing that the optimized version produces identical results.
 189#[cfg(test)]
 190fn delta_chr_f_reference(original: &str, expected: &str, actual: &str) -> DeltaChrFMetrics {
 191    if original == expected && expected == actual {
 192        return DeltaChrFMetrics {
 193            score: 100.0,
 194            beta: CHR_F_BETA,
 195            precision: 1.0,
 196            recall: 1.0,
 197            ..DeltaChrFMetrics::default()
 198        };
 199    }
 200
 201    let original_ngrams = chr_f_ngram_counts(original);
 202    let expected_ngrams = chr_f_ngram_counts(expected);
 203    let actual_ngrams = chr_f_ngram_counts(actual);
 204
 205    let mut total_precision = 0.0;
 206    let mut total_recall = 0.0;
 207    let mut total_counts = ClassificationMetrics::default();
 208
 209    for order in 0..CHR_F_CHAR_ORDER {
 210        let expected_delta = compute_ngram_delta(&expected_ngrams[order], &original_ngrams[order]);
 211        let actual_delta = compute_ngram_delta(&actual_ngrams[order], &original_ngrams[order]);
 212
 213        if expected_delta.is_empty() && actual_delta.is_empty() {
 214            total_precision += 1.0;
 215            total_recall += 1.0;
 216            continue;
 217        }
 218
 219        let expected_counts = ngram_delta_to_counts(&expected_delta);
 220        let actual_counts = ngram_delta_to_counts(&actual_delta);
 221
 222        let counts = ClassificationMetrics::from_counts(&expected_counts, &actual_counts);
 223        total_precision += counts.precision();
 224        total_recall += counts.recall();
 225        total_counts.accumulate(&counts);
 226    }
 227
 228    let average_precision = total_precision / CHR_F_CHAR_ORDER as f64;
 229    let average_recall = total_recall / CHR_F_CHAR_ORDER as f64;
 230    let score = if average_precision + average_recall == 0.0 {
 231        0.0
 232    } else {
 233        (1.0 + CHR_F_BETA * CHR_F_BETA) * average_precision * average_recall
 234            / (CHR_F_BETA * CHR_F_BETA * average_precision + average_recall)
 235            * 100.0
 236    };
 237
 238    DeltaChrFMetrics {
 239        score,
 240        beta: CHR_F_BETA,
 241        counts: total_counts,
 242        precision: average_precision,
 243        recall: average_recall,
 244    }
 245}
 246
 247/// Filter whitespace from a string and return as Vec<char>
 248fn filter_whitespace_chars(text: &str) -> Vec<char> {
 249    match CHR_F_WHITESPACE {
 250        ChrfWhitespace::Unchanged => text.chars().collect(),
 251        ChrfWhitespace::Ignore => text.chars().filter(|c| !c.is_whitespace()).collect(),
 252        ChrfWhitespace::Collapse => collapse_whitespace(text.chars()),
 253    }
 254}
 255
 256/// Collapse whitespace into single spaces.
 257/// Newlines and spaces are collapsed separately.
 258fn collapse_whitespace(chars: impl Iterator<Item = char>) -> Vec<char> {
 259    let mut result = Vec::new();
 260    let mut last_whitespace = None;
 261    for c in chars {
 262        if c.is_whitespace() && c != '\n' {
 263            if last_whitespace != Some(' ') {
 264                result.push(' ');
 265                last_whitespace = Some(' ');
 266            }
 267        } else if c == '\n' {
 268            if last_whitespace != Some('\n') {
 269                result.push(c);
 270                last_whitespace = Some('\n');
 271            }
 272        } else {
 273            result.push(c);
 274            last_whitespace = None;
 275        }
 276    }
 277    result
 278}
 279
 280/// Extract only the changed regions between two texts, with context for n-gram boundaries.
 281///
 282/// Returns (original_affected_region, modified_affected_region) as Vec<char>.
 283///
 284/// The key insight: when computing n-gram delta between two nearly-identical texts,
 285/// n-grams from unchanged regions cancel out. We only need to process:
 286/// 1. The changed content itself
 287/// 2. CONTEXT_CHARS (n-1) characters before and after, to capture boundary-crossing n-grams
 288fn extract_changed_regions(original: &[char], modified: &[char]) -> (Vec<char>, Vec<char>) {
 289    // Find longest common prefix
 290    let prefix_len = original
 291        .iter()
 292        .zip(modified.iter())
 293        .take_while(|(a, b)| a == b)
 294        .count();
 295
 296    // Find longest common suffix (that doesn't overlap with prefix)
 297    let orig_remaining = original.len().saturating_sub(prefix_len);
 298    let mod_remaining = modified.len().saturating_sub(prefix_len);
 299    let max_suffix = orig_remaining.min(mod_remaining);
 300
 301    let suffix_len = original
 302        .iter()
 303        .rev()
 304        .zip(modified.iter().rev())
 305        .take(max_suffix)
 306        .take_while(|(a, b)| a == b)
 307        .count();
 308
 309    // Calculate the changed region boundaries
 310    let orig_change_start = prefix_len;
 311    let orig_change_end = original.len().saturating_sub(suffix_len);
 312    let mod_change_start = prefix_len;
 313    let mod_change_end = modified.len().saturating_sub(suffix_len);
 314
 315    // If there's no actual change, return empty regions
 316    if orig_change_start >= orig_change_end && mod_change_start >= mod_change_end {
 317        return (Vec::new(), Vec::new());
 318    }
 319
 320    // Expand to include context for n-gram boundaries
 321    let orig_context_start = orig_change_start.saturating_sub(CONTEXT_CHARS);
 322    let orig_context_end = (orig_change_end + CONTEXT_CHARS).min(original.len());
 323    let mod_context_start = mod_change_start.saturating_sub(CONTEXT_CHARS);
 324    let mod_context_end = (mod_change_end + CONTEXT_CHARS).min(modified.len());
 325
 326    let orig_region: Vec<char> = original[orig_context_start..orig_context_end].to_vec();
 327    let mod_region: Vec<char> = modified[mod_context_start..mod_context_end].to_vec();
 328
 329    (orig_region, mod_region)
 330}
 331
 332/// Count n-grams directly from a char slice (avoids String allocation for the full text)
 333fn count_ngrams_from_chars(chars: &[char], n: usize) -> Counts {
 334    let mut counts = Counts::default();
 335
 336    if chars.len() < n {
 337        return counts;
 338    }
 339
 340    for window in chars.windows(n) {
 341        let ngram: String = window.iter().collect();
 342        *counts.entry(ngram).or_insert(0) += 1;
 343    }
 344
 345    counts
 346}
 347
 348#[allow(dead_code)]
 349fn chr_f_ngram_counts(text: &str) -> Vec<Counts> {
 350    let text = match CHR_F_WHITESPACE {
 351        ChrfWhitespace::Unchanged => text.to_string(),
 352        ChrfWhitespace::Ignore => text
 353            .chars()
 354            .filter(|c| !c.is_whitespace())
 355            .collect::<String>(),
 356        ChrfWhitespace::Collapse => collapse_whitespace(text.chars())
 357            .into_iter()
 358            .collect::<String>(),
 359    };
 360
 361    (1..=CHR_F_CHAR_ORDER)
 362        .map(|order| count_ngrams(&text, order))
 363        .collect()
 364}
 365
 366fn compute_ngram_delta(after: &Counts, before: &Counts) -> CountsDelta {
 367    let mut delta = CountsDelta::default();
 368
 369    for (ngram, &before_count) in before {
 370        let after_count = *after.get(ngram).unwrap_or(&0);
 371        delta.insert(ngram.clone(), after_count as isize - before_count as isize);
 372    }
 373
 374    for (ngram, &after_count) in after {
 375        if !before.contains_key(ngram) {
 376            delta.insert(ngram.clone(), after_count as isize);
 377        }
 378    }
 379
 380    delta
 381}
 382
 383/// Convert negative counts to special deletion tokens.
 384/// For example, if expected delta is {"foo": -1} and actual delta is {"bar": -1},
 385/// we convert it to {"¬foo": +1} and {"¬bar": +1}. This way _not_ deleting "foo"
 386/// will result in a false negative, and mistakenly deleting "bar" will result in a false positive.
 387fn ngram_delta_to_counts(delta: &CountsDelta) -> Counts {
 388    let mut counts = Counts::default();
 389
 390    for (ngram, &delta) in delta {
 391        if delta > 0 {
 392            counts.insert(ngram.clone(), delta as usize);
 393        } else if delta < 0 {
 394            counts.insert(format!("¬{ngram}"), delta.unsigned_abs());
 395        }
 396    }
 397
 398    counts
 399}
 400
 401#[allow(dead_code)]
 402fn count_ngrams(text: &str, n: usize) -> Counts {
 403    let chars: Vec<char> = text.chars().collect();
 404    let mut counts = Counts::default();
 405
 406    for window in chars.windows(n) {
 407        let ngram: String = window.iter().collect();
 408        *counts.entry(ngram).or_insert(0) += 1;
 409    }
 410
 411    counts
 412}
 413
 414pub fn braces_disbalance(text: &str) -> usize {
 415    let mut disbalance = 0isize;
 416
 417    let a = text.chars().filter(|&c| c == '{').count() as isize;
 418    let b = text.chars().filter(|&c| c == '}').count() as isize;
 419    disbalance += (a - b).abs();
 420
 421    let a = text.chars().filter(|&c| c == '(').count() as isize;
 422    let b = text.chars().filter(|&c| c == ')').count() as isize;
 423    disbalance += (a - b).abs();
 424
 425    let a = text.chars().filter(|&c| c == '[').count() as isize;
 426    let b = text.chars().filter(|&c| c == ']').count() as isize;
 427    disbalance += (a - b).abs();
 428
 429    disbalance as usize
 430}
 431
 432/// Extracts changed lines from a unified diff string.
 433/// Returns a bag (multiset) of lines that were added (+) or removed (-).
 434/// The +/- prefix is included in the line to distinguish additions from deletions.
 435pub fn extract_changed_lines_from_diff(diff: &str) -> Counts {
 436    let mut counts = Counts::default();
 437
 438    for line in diff.lines() {
 439        // Skip file headers (--- and +++)
 440        if line.starts_with("---") || line.starts_with("+++") {
 441            continue;
 442        }
 443        // Skip hunk headers (@@)
 444        if line.starts_with("@@") {
 445            continue;
 446        }
 447        // Skip diff header lines (diff --git, index, etc.)
 448        if line.starts_with("diff ") || line.starts_with("index ") {
 449            continue;
 450        }
 451        // Include added and removed lines (with their prefix)
 452        if line.starts_with('+') || line.starts_with('-') {
 453            *counts.entry(line.to_string()).or_insert(0) += 1;
 454        }
 455    }
 456
 457    counts
 458}
 459
 460/// Computes exact lines match metrics between expected and actual patches.
 461/// Treats changed lines as a bag (multiset) - order is discarded but count matters.
 462/// Returns ClassificationMetrics with TP/FP/FN counts.
 463pub fn exact_lines_match(expected_patch: &str, actual_patch: &str) -> ClassificationMetrics {
 464    let expected_lines = extract_changed_lines_from_diff(expected_patch);
 465    let actual_lines = extract_changed_lines_from_diff(actual_patch);
 466    ClassificationMetrics::from_counts(&expected_lines, &actual_lines)
 467}
 468
 469/// Returns whether the patch contains any isolated whitespace-only changes.
 470///
 471/// A whitespace-only change is an added or deleted line whose content is empty or
 472/// contains only whitespace. It is "isolated" when it is not adjacent to any
 473/// substantive (non-whitespace) change within the same contiguous change group.
 474pub fn has_isolated_whitespace_changes(patch_str: &str, cursor_row: Option<u32>) -> bool {
 475    let patch = Patch::parse_unified_diff(patch_str);
 476
 477    let cursor_new_file_line = cursor_row.map(|row| (row + 1) as usize);
 478
 479    for hunk in &patch.hunks {
 480        let lines = &hunk.lines;
 481        let mut new_text_line = hunk.new_start as usize;
 482
 483        for (i, line) in lines.iter().enumerate() {
 484            let content = match line {
 485                PatchLine::Addition(s) => {
 486                    let addition_line = new_text_line;
 487                    new_text_line += 1;
 488                    if s.trim().is_empty() && cursor_new_file_line == Some(addition_line) {
 489                        continue;
 490                    }
 491                    s.as_str()
 492                }
 493                PatchLine::Deletion(s) => s.as_str(),
 494                PatchLine::Context(_) => {
 495                    new_text_line += 1;
 496                    continue;
 497                }
 498                _ => continue,
 499            };
 500
 501            if !content.trim().is_empty() {
 502                continue;
 503            }
 504
 505            if is_whitespace_change_isolated(lines, i) {
 506                return true;
 507            }
 508        }
 509    }
 510
 511    false
 512}
 513
 514fn is_whitespace_change_isolated(lines: &[PatchLine], index: usize) -> bool {
 515    // Look backward for a non-whitespace change before hitting a context line
 516    for line in lines[..index].iter().rev() {
 517        match line {
 518            PatchLine::Addition(s) | PatchLine::Deletion(s) => {
 519                if !s.trim().is_empty() {
 520                    return false;
 521                }
 522            }
 523            _ => break,
 524        }
 525    }
 526
 527    // Look forward for a non-whitespace change before hitting a context line
 528    for line in &lines[index + 1..] {
 529        match line {
 530            PatchLine::Addition(s) | PatchLine::Deletion(s) => {
 531                if !s.trim().is_empty() {
 532                    return false;
 533                }
 534            }
 535            _ => break,
 536        }
 537    }
 538
 539    true
 540}
 541
 542/// A simple proxy for whether the prediction respects editable region.
 543pub fn is_editable_region_correct(actual_patch: &str) -> bool {
 544    // A typical sign of a wrong editable region: a bunch of lines deletion
 545    // at the beginning or end of the patch.
 546    let patch = Patch::parse_unified_diff(actual_patch);
 547    if patch.hunks.is_empty() {
 548        return true;
 549    }
 550
 551    let hunk = &patch.hunks[0];
 552    let mut deletions_at_start = 0;
 553
 554    for line in hunk.lines.iter() {
 555        match line {
 556            PatchLine::Deletion(_) => deletions_at_start += 1,
 557            _ => break,
 558        }
 559    }
 560
 561    if deletions_at_start >= 3 {
 562        return false;
 563    }
 564
 565    true
 566}
 567
 568#[derive(Debug, Default, Clone, Serialize)]
 569pub struct TokenChangeCounts {
 570    pub inserted_tokens: usize,
 571    pub deleted_tokens: usize,
 572}
 573
 574/// Counts the number of inserted and deleted tokens in a unified diff patch.
 575///
 576/// Tokens are words and whitespace sequences (as defined by `word_diff::tokenize`).
 577/// Within each hunk, the old (`-`) and new (`+`) lines are compared at the token level
 578/// using an LCS-based diff, so modified lines only count the actually changed tokens
 579/// rather than the entire line.
 580pub fn count_patch_token_changes(patch: &str) -> TokenChangeCounts {
 581    let mut counts = TokenChangeCounts::default();
 582    let mut old_lines: Vec<&str> = Vec::new();
 583    let mut new_lines: Vec<&str> = Vec::new();
 584
 585    let flush =
 586        |old_lines: &mut Vec<&str>, new_lines: &mut Vec<&str>, counts: &mut TokenChangeCounts| {
 587            if old_lines.is_empty() && new_lines.is_empty() {
 588                return;
 589            }
 590
 591            let old_text: String = old_lines
 592                .iter()
 593                .map(|line| if line.len() > 1 { &line[1..] } else { "" })
 594                .collect::<Vec<_>>()
 595                .join("\n");
 596
 597            let new_text: String = new_lines
 598                .iter()
 599                .map(|line| if line.len() > 1 { &line[1..] } else { "" })
 600                .collect::<Vec<_>>()
 601                .join("\n");
 602
 603            let old_tokens = tokenize(&old_text);
 604            let new_tokens = tokenize(&new_text);
 605            let ops = diff_tokens(&old_tokens, &new_tokens);
 606
 607            for op in ops {
 608                match op {
 609                    DiffOp::Equal(..) => {}
 610                    DiffOp::Delete(start, end) => {
 611                        counts.deleted_tokens += end - start;
 612                    }
 613                    DiffOp::Insert(start, end) => {
 614                        counts.inserted_tokens += end - start;
 615                    }
 616                    DiffOp::Replace {
 617                        old_start,
 618                        old_end,
 619                        new_start,
 620                        new_end,
 621                    } => {
 622                        counts.deleted_tokens += old_end - old_start;
 623                        counts.inserted_tokens += new_end - new_start;
 624                    }
 625                }
 626            }
 627
 628            old_lines.clear();
 629            new_lines.clear();
 630        };
 631
 632    for line in patch.lines() {
 633        if line.starts_with("---")
 634            || line.starts_with("+++")
 635            || line.starts_with("@@")
 636            || line.starts_with("diff ")
 637            || line.starts_with("index ")
 638        {
 639            flush(&mut old_lines, &mut new_lines, &mut counts);
 640        } else if line.starts_with('-') {
 641            old_lines.push(line);
 642        } else if line.starts_with('+') {
 643            new_lines.push(line);
 644        } else {
 645            flush(&mut old_lines, &mut new_lines, &mut counts);
 646        }
 647    }
 648
 649    flush(&mut old_lines, &mut new_lines, &mut counts);
 650    counts
 651}
 652
 653#[allow(dead_code)]
 654#[derive(Debug)]
 655enum DiffOp {
 656    Equal(usize, usize),
 657    Delete(usize, usize),
 658    Insert(usize, usize),
 659    Replace {
 660        old_start: usize,
 661        old_end: usize,
 662        new_start: usize,
 663        new_end: usize,
 664    },
 665}
 666
 667fn diff_tokens<'a>(old: &[&'a str], new: &[&'a str]) -> Vec<DiffOp> {
 668    let diff = TextDiff::from_slices(old, new);
 669    diff.ops()
 670        .iter()
 671        .map(|op| {
 672            let tag = op.tag();
 673            let old_range = op.old_range();
 674            let new_range = op.new_range();
 675            match tag {
 676                DiffTag::Equal => DiffOp::Equal(old_range.start, old_range.end),
 677                DiffTag::Delete => DiffOp::Delete(old_range.start, old_range.end),
 678                DiffTag::Insert => DiffOp::Insert(new_range.start, new_range.end),
 679                DiffTag::Replace => DiffOp::Replace {
 680                    old_start: old_range.start,
 681                    old_end: old_range.end,
 682                    new_start: new_range.start,
 683                    new_end: new_range.end,
 684                },
 685            }
 686        })
 687        .collect()
 688}
 689
 690/// Reconstruct old and new text from a unified diff.
 691///
 692/// Context and deletion lines form the old text; context and addition
 693/// lines form the new text. Returns `(old_text, new_text)`.
 694pub fn reconstruct_texts_from_diff(patch_str: &str) -> (String, String) {
 695    let patch = Patch::parse_unified_diff(patch_str);
 696    let mut old_lines: Vec<&str> = Vec::new();
 697    let mut new_lines: Vec<&str> = Vec::new();
 698
 699    for hunk in &patch.hunks {
 700        for line in &hunk.lines {
 701            match line {
 702                PatchLine::Context(content) => {
 703                    old_lines.push(content);
 704                    new_lines.push(content);
 705                }
 706                PatchLine::Deletion(content) => {
 707                    old_lines.push(content);
 708                }
 709                PatchLine::Addition(content) => {
 710                    new_lines.push(content);
 711                }
 712                PatchLine::Garbage(_) => {}
 713            }
 714        }
 715    }
 716
 717    (old_lines.join("\n"), new_lines.join("\n"))
 718}
 719#[derive(Debug, Default, Clone)]
 720struct Patch {
 721    hunks: Vec<Hunk>,
 722}
 723
 724impl Patch {
 725    fn parse_unified_diff(unified_diff: &str) -> Patch {
 726        let mut current_file = String::new();
 727        let mut is_filename_inherited = false;
 728        let mut hunk = Hunk::default();
 729        let mut patch = Patch::default();
 730        let mut in_header = true;
 731
 732        for line in unified_diff.lines() {
 733            if line.starts_with("--- ") || line.starts_with("+++") || line.starts_with("@@") {
 734                in_header = false;
 735            }
 736
 737            if in_header {
 738                continue;
 739            }
 740
 741            if line.starts_with("@@") {
 742                if !hunk.lines.is_empty() {
 743                    patch.hunks.push(hunk);
 744                }
 745                hunk = Hunk::from_header(line, &current_file, is_filename_inherited);
 746                is_filename_inherited = true;
 747            } else if let Some(path) = line.strip_prefix("--- ") {
 748                is_filename_inherited = false;
 749                let path = path.trim().strip_prefix("a/").unwrap_or(path);
 750                if path != "/dev/null" {
 751                    current_file = path.into();
 752                }
 753            } else if let Some(path) = line.strip_prefix("+++ ") {
 754                is_filename_inherited = false;
 755                let path = path.trim().strip_prefix("b/").unwrap_or(path);
 756                if path != "/dev/null" {
 757                    current_file = path.into();
 758                }
 759            } else if let Some(line) = line.strip_prefix('+') {
 760                hunk.lines.push(PatchLine::Addition(line.to_string()));
 761            } else if let Some(line) = line.strip_prefix('-') {
 762                hunk.lines.push(PatchLine::Deletion(line.to_string()));
 763            } else if let Some(line) = line.strip_prefix(' ') {
 764                hunk.lines.push(PatchLine::Context(line.to_string()));
 765            } else {
 766                hunk.lines.push(PatchLine::Garbage(line.to_string()));
 767            }
 768        }
 769
 770        if !hunk.lines.is_empty() {
 771            patch.hunks.push(hunk);
 772        }
 773
 774        patch
 775    }
 776}
 777
 778#[derive(Debug, Default, Clone)]
 779struct Hunk {
 780    new_start: isize,
 781    lines: Vec<PatchLine>,
 782}
 783
 784impl Hunk {
 785    fn from_header(header: &str, _filename: &str, _is_filename_inherited: bool) -> Self {
 786        let (_, _, new_start, _, _) = Self::parse_hunk_header(header);
 787        Self {
 788            new_start,
 789            lines: Vec::new(),
 790        }
 791    }
 792
 793    fn parse_hunk_header(line: &str) -> (isize, isize, isize, isize, String) {
 794        let header_part = line.trim_start_matches("@@").trim();
 795        let parts: Vec<&str> = header_part.split_whitespace().collect();
 796
 797        if parts.len() < 2 {
 798            return (0, 0, 0, 0, String::new());
 799        }
 800
 801        let old_part = parts[0].trim_start_matches('-');
 802        let new_part = parts[1].trim_start_matches('+');
 803
 804        let (old_start, old_count) = Hunk::parse_hunk_header_range(old_part);
 805        let (new_start, new_count) = Hunk::parse_hunk_header_range(new_part);
 806
 807        let comment = if parts.len() > 2 {
 808            parts[2..]
 809                .join(" ")
 810                .trim_start_matches("@@")
 811                .trim()
 812                .to_string()
 813        } else {
 814            String::new()
 815        };
 816
 817        (
 818            old_start as isize,
 819            old_count as isize,
 820            new_start as isize,
 821            new_count as isize,
 822            comment,
 823        )
 824    }
 825
 826    fn parse_hunk_header_range(part: &str) -> (usize, usize) {
 827        if let Some((start, count)) = part.split_once(',') {
 828            (start.parse().unwrap_or(0), count.parse().unwrap_or(0))
 829        } else {
 830            (part.parse().unwrap_or(0), 1)
 831        }
 832    }
 833}
 834
 835#[derive(Clone, Debug, Eq, PartialEq)]
 836enum PatchLine {
 837    Context(String),
 838    Addition(String),
 839    Deletion(String),
 840    Garbage(String),
 841}
 842
 843#[cfg(test)]
 844mod test_optimization {
 845    use super::*;
 846
 847    #[test]
 848    fn test_extract_changed_regions_simple() {
 849        let original: Vec<char> = "hello world".chars().collect();
 850        let modified: Vec<char> = "hello there".chars().collect();
 851
 852        let (orig_region, mod_region) = extract_changed_regions(&original, &modified);
 853
 854        // "world" vs "there" - with 5 chars context, we get "ello world" vs "ello there"
 855        // (or less if not enough chars available)
 856        assert!(orig_region.len() < original.len());
 857        assert!(mod_region.len() < modified.len());
 858    }
 859
 860    #[test]
 861    fn test_extract_changed_regions_insertion() {
 862        let original: Vec<char> = "abcdef".chars().collect();
 863        let modified: Vec<char> = "abcXYZdef".chars().collect();
 864
 865        let (orig_region, mod_region) = extract_changed_regions(&original, &modified);
 866
 867        // The insertion is between c and d, so we need context around that point
 868        assert!(orig_region.len() <= original.len());
 869        assert!(mod_region.iter().collect::<String>().contains("XYZ"));
 870    }
 871
 872    #[test]
 873    fn test_extract_changed_regions_identical() {
 874        let text: Vec<char> = "identical text".chars().collect();
 875
 876        let (orig_region, mod_region) = extract_changed_regions(&text, &text);
 877
 878        // When texts are identical, regions should be empty
 879        assert!(orig_region.is_empty());
 880        assert!(mod_region.is_empty());
 881    }
 882
 883    #[test]
 884    fn test_optimized_matches_original_score() {
 885        // Test that our optimized version produces the same results
 886        let test_cases = vec![
 887            ("hello world", "hello there", "hello world"),
 888            (
 889                "fn main() {}",
 890                "fn main() { println!(); }",
 891                "fn main() { print!(); }",
 892            ),
 893            ("abcdefghij", "abcXXXghij", "abcYYghij"),
 894            ("unchanged", "unchanged", "unchanged"),
 895            (
 896                "prefix middle suffix",
 897                "prefix CHANGED suffix",
 898                "prefix middle suffix",
 899            ),
 900        ];
 901
 902        for (original, expected, actual) in test_cases {
 903            let score = delta_chr_f(original, expected, actual).score;
 904            // Just verify it produces a reasonable score (0-100)
 905            assert!(
 906                score >= 0.0 && score <= 100.0,
 907                "Score {} out of range for ({}, {}, {})",
 908                score,
 909                original,
 910                expected,
 911                actual
 912            );
 913        }
 914    }
 915
 916    #[test]
 917    fn test_optimized_equals_reference() {
 918        // Comprehensive test that optimized version matches reference implementation exactly
 919        let test_cases = vec![
 920            // Basic cases
 921            ("hello world", "hello there", "hello world"),
 922            ("hello world", "hello there", "hello there"),
 923            ("unchanged", "unchanged", "unchanged"),
 924            // Code-like cases
 925            (
 926                "fn main() { println!(\"Hello\"); }",
 927                "fn main() { println!(\"Hello, World!\"); }",
 928                "fn main() { println!(\"Hello, World!\"); }",
 929            ),
 930            (
 931                "fn main() { println!(\"Hello\"); }",
 932                "fn main() { println!(\"Hello, World!\"); }",
 933                "fn main() { println!(\"Goodbye\"); }",
 934            ),
 935            // Insertion
 936            ("abcdef", "abcXYZdef", "abcdef"),
 937            ("abcdef", "abcXYZdef", "abcXYZdef"),
 938            ("abcdef", "abcXYZdef", "abcABCdef"),
 939            // Deletion
 940            ("abcXYZdef", "abcdef", "abcXYZdef"),
 941            ("abcXYZdef", "abcdef", "abcdef"),
 942            // Multiple changes (simulated by different expected/actual)
 943            ("one two three four", "one THREE four", "one two FOUR"),
 944            // Edge cases
 945            ("a", "b", "c"),
 946            ("", "abc", ""),
 947            ("abc", "", "abc"),
 948            // Longer text with small change
 949            (
 950                "This is a longer piece of text that contains many words and characters to process",
 951                "This is a longer piece of TEXT that contains many words and characters to process",
 952                "This is a longer piece of text that contains many words and characters to process",
 953            ),
 954            // Change at the beginning
 955            (
 956                "ORIGINAL start of text",
 957                "NEW start of text",
 958                "DIFFERENT start of text",
 959            ),
 960            // Change at the end
 961            (
 962                "text ending ORIGINAL",
 963                "text ending NEW",
 964                "text ending DIFFERENT",
 965            ),
 966            // Whitespace (should be ignored)
 967            ("hello   world", "hello   there", "hello   world"),
 968            ("a b c d", "a X c d", "a Y c d"),
 969        ];
 970
 971        for (original, expected, actual) in test_cases {
 972            let optimized_metrics = delta_chr_f(original, expected, actual);
 973            let reference_metrics = delta_chr_f_reference(original, expected, actual);
 974
 975            assert!(
 976                (optimized_metrics.score - reference_metrics.score).abs() < 1e-10,
 977                "Score mismatch for ({:?}, {:?}, {:?}):\n  optimized: {}\n  reference: {}",
 978                original,
 979                expected,
 980                actual,
 981                optimized_metrics.score,
 982                reference_metrics.score
 983            );
 984            assert_eq!(
 985                optimized_metrics.counts.true_positives,
 986                reference_metrics.counts.true_positives
 987            );
 988            assert_eq!(
 989                optimized_metrics.counts.false_positives,
 990                reference_metrics.counts.false_positives
 991            );
 992            assert_eq!(
 993                optimized_metrics.counts.false_negatives,
 994                reference_metrics.counts.false_negatives
 995            );
 996            assert!((optimized_metrics.precision - reference_metrics.precision).abs() < 1e-10);
 997            assert!((optimized_metrics.recall - reference_metrics.recall).abs() < 1e-10);
 998        }
 999    }
1000
1001    #[test]
1002    fn test_delta_chr_f_metrics_include_counts_and_rates() {
1003        let original = "one two three";
1004        let expected = "one three";
1005        let actual = "one two four";
1006
1007        let metrics = delta_chr_f(original, expected, actual);
1008
1009        assert!(metrics.score > 20.0 && metrics.score < 40.0);
1010        assert!(metrics.counts.true_positives > 0);
1011        assert!(metrics.counts.false_positives > 0);
1012        assert!(metrics.counts.false_negatives > 0);
1013        assert!(metrics.precision > 0.0 && metrics.precision < 1.0);
1014        assert!(metrics.recall > 0.0 && metrics.recall < 1.0);
1015        assert_eq!(metrics.beta, CHR_F_BETA);
1016    }
1017}
1018
1019#[cfg(test)]
1020mod test {
1021    use super::*;
1022    use indoc::indoc;
1023
1024    fn cursor_on_line(one_based_line: u32) -> u32 {
1025        one_based_line - 1
1026    }
1027
1028    #[test]
1029    fn test_delta_chr_f_perfect_match() {
1030        let original = "fn main() {    println!(\"Hello\");}";
1031        let expected = "fn main() {    println!(\"Hello, World!\");}";
1032
1033        let score = delta_chr_f(original, expected, expected).score;
1034        assert!((score - 100.0).abs() < 1e-2);
1035    }
1036
1037    #[test]
1038    fn test_delta_chr_f_wrong_edit() {
1039        // When the edit is wrong
1040        let original = "one two three";
1041        let expected = "one three"; // deleted "two "
1042        let actual = "one two four"; // deleted "three", added "four"
1043
1044        // Then the score should be low
1045        let score = delta_chr_f(original, expected, actual).score;
1046        assert!(score > 20.0 && score < 40.0);
1047    }
1048
1049    #[test]
1050    fn test_delta_chr_f_partial_match() {
1051        let original = "let x = 42;";
1052        let expected = "let x = 100;";
1053        let actual = "let x = 99;";
1054
1055        // We got the edit location right, but the replacement text is wrong.
1056        // Deleted ngrams will match, bringing the score somewhere in the middle.
1057        let score = delta_chr_f(original, expected, actual).score;
1058        assert!(score > 40.0 && score < 60.0);
1059    }
1060
1061    #[test]
1062    fn test_delta_chr_f_missed_edit() {
1063        // When predictions makes no changes
1064        let original = "prefix old suffix";
1065        let expected = "prefix new suffix";
1066        let actual = "prefix old suffix"; // no change
1067
1068        // Then the score should be low (all expected changes are false negatives)
1069        let score = delta_chr_f(original, expected, actual).score;
1070        assert!(score < 20.0);
1071    }
1072
1073    #[test]
1074    fn test_delta_chr_f_extra_edit() {
1075        // When adding unexpected content
1076        let original = "helloworld";
1077        let expected = "helloworld"; // no change expected
1078        let actual = "helloextraworld"; // added "extra"
1079
1080        // Then the score should be low (all actual changes are false positives)
1081        let score = delta_chr_f(original, expected, actual).score;
1082        assert!(score < 20.0);
1083    }
1084
1085    #[test]
1086    fn test_delta_chr_f_no_changes() {
1087        let text = "unchanged text";
1088        let score = delta_chr_f(text, text, text).score;
1089        assert!((score - 100.0).abs() < 1e-2);
1090    }
1091
1092    #[test]
1093    fn test_braces_disbalance() {
1094        let text = "let x = { 1 + 2 };";
1095        assert_eq!(braces_disbalance(text), 0);
1096
1097        let text = "let x = { 1 + 2";
1098        assert_eq!(braces_disbalance(text), 1);
1099
1100        let text = "let x = { 1 + 2 )";
1101        assert_eq!(braces_disbalance(text), 2);
1102    }
1103
1104    #[test]
1105    fn test_extract_changed_lines_from_diff() {
1106        let diff = r#"--- a/file.rs
1107+++ b/file.rs
1108@@ -1,3 +1,3 @@
1109 fn main() {
1110-    println!("hello");
1111+    println!("world");
1112 }"#;
1113
1114        let counts = extract_changed_lines_from_diff(diff);
1115        assert_eq!(counts.get("-    println!(\"hello\");"), Some(&1));
1116        assert_eq!(counts.get("+    println!(\"world\");"), Some(&1));
1117        assert_eq!(counts.len(), 2);
1118    }
1119
1120    #[test]
1121    fn test_extract_changed_lines_skips_headers() {
1122        let diff = r#"diff --git a/file.rs b/file.rs
1123index abc123..def456 100644
1124--- a/file.rs
1125+++ b/file.rs
1126@@ -1,2 +1,2 @@
1127-old line
1128+new line"#;
1129
1130        let counts = extract_changed_lines_from_diff(diff);
1131        assert_eq!(counts.get("-old line"), Some(&1));
1132        assert_eq!(counts.get("+new line"), Some(&1));
1133        assert_eq!(counts.len(), 2);
1134    }
1135
1136    #[test]
1137    fn test_exact_lines_match_perfect() {
1138        let expected = r#"--- a/file.rs
1139+++ b/file.rs
1140@@ -1,3 +1,3 @@
1141-old line 1
1142-old line 2
1143+new line 1
1144+new line 2"#;
1145
1146        let actual = r#"--- a/file.rs
1147+++ b/file.rs
1148@@ -1,3 +1,3 @@
1149-old line 1
1150-old line 2
1151+new line 1
1152+new line 2"#;
1153
1154        let metrics = exact_lines_match(expected, actual);
1155        assert_eq!(metrics.true_positives, 4);
1156        assert_eq!(metrics.false_positives, 0);
1157        assert_eq!(metrics.false_negatives, 0);
1158        assert!((metrics.precision() - 1.0).abs() < 1e-6);
1159        assert!((metrics.recall() - 1.0).abs() < 1e-6);
1160        assert!((metrics.f1() - 1.0).abs() < 1e-6);
1161    }
1162
1163    #[test]
1164    fn test_exact_lines_match_partial() {
1165        let expected = r#"-old line 1
1166-old line 2
1167+new line 1
1168+new line 2"#;
1169
1170        let actual = r#"-old line 1
1171+new line 1
1172+extra line"#;
1173
1174        let metrics = exact_lines_match(expected, actual);
1175        // TP: "-old line 1" and "+new line 1" (2)
1176        // FP: "+extra line" (1)
1177        // FN: "-old line 2" and "+new line 2" (2)
1178        assert_eq!(metrics.true_positives, 2);
1179        assert_eq!(metrics.false_positives, 1);
1180        assert_eq!(metrics.false_negatives, 2);
1181    }
1182
1183    #[test]
1184    fn test_exact_lines_match_no_overlap() {
1185        let expected = r#"-line a
1186+line b"#;
1187
1188        let actual = r#"-line x
1189+line y"#;
1190
1191        let metrics = exact_lines_match(expected, actual);
1192        assert_eq!(metrics.true_positives, 0);
1193        assert_eq!(metrics.false_positives, 2);
1194        assert_eq!(metrics.false_negatives, 2);
1195        assert!((metrics.precision()).abs() < 1e-6);
1196        assert!((metrics.recall()).abs() < 1e-6);
1197    }
1198
1199    #[test]
1200    fn test_exact_lines_match_duplicate_lines() {
1201        let expected = r#"+line a
1202+line a
1203+line a"#;
1204
1205        let actual = r#"+line a
1206+line a"#;
1207
1208        let metrics = exact_lines_match(expected, actual);
1209        // Expected has 3 "+line a", actual has 2
1210        // TP: 2, FN: 1, FP: 0
1211        assert_eq!(metrics.true_positives, 2);
1212        assert_eq!(metrics.false_positives, 0);
1213        assert_eq!(metrics.false_negatives, 1);
1214    }
1215
1216    #[test]
1217    fn test_exact_lines_match_empty_patches() {
1218        let metrics = exact_lines_match("", "");
1219        assert_eq!(metrics.true_positives, 0);
1220        assert_eq!(metrics.false_positives, 0);
1221        assert_eq!(metrics.false_negatives, 0);
1222    }
1223
1224    #[test]
1225    fn test_is_editable_region_correct() {
1226        let patch = indoc! {"
1227            @@ -1,1 +1,1 @@
1228            -context
1229            -removed
1230            -from the beginning of the file
1231            import sys
1232            +sys.exit(0)
1233
1234            "};
1235        assert!(!is_editable_region_correct(patch));
1236
1237        let patch = indoc! {"
1238            @@ -1,1 +1,1 @@
1239            "};
1240        assert!(is_editable_region_correct(patch));
1241    }
1242
1243    #[test]
1244    fn test_isolated_whitespace_purely_whitespace_patch() {
1245        let patch = indoc! {"
1246            @@ -1,3 +1,4 @@
1247             fn main() {
1248            +
1249                 println!(\"hello\");
1250             }
1251        "};
1252        assert!(has_isolated_whitespace_changes(patch, None));
1253    }
1254
1255    #[test]
1256    fn test_isolated_whitespace_adjacent_to_real_change() {
1257        let patch = indoc! {"
1258            @@ -1,3 +1,4 @@
1259             fn main() {
1260            +
1261            +    let x = 1;
1262                 println!(\"hello\");
1263             }
1264        "};
1265        assert!(!has_isolated_whitespace_changes(patch, None));
1266    }
1267
1268    #[test]
1269    fn test_isolated_whitespace_no_whitespace_changes() {
1270        let patch = indoc! {"
1271            @@ -1,3 +1,3 @@
1272             fn main() {
1273            -    println!(\"hello\");
1274            +    println!(\"world\");
1275             }
1276        "};
1277        assert!(!has_isolated_whitespace_changes(patch, None));
1278    }
1279
1280    #[test]
1281    fn test_isolated_whitespace_deletion() {
1282        let patch = indoc! {"
1283            @@ -1,4 +1,3 @@
1284             fn main() {
1285            -
1286                 println!(\"hello\");
1287             }
1288        "};
1289        assert!(has_isolated_whitespace_changes(patch, None));
1290    }
1291
1292    #[test]
1293    fn test_isolated_whitespace_mixed_groups() {
1294        let patch = indoc! {"
1295            @@ -1,7 +1,8 @@
1296             fn main() {
1297            +
1298                 let x = 1;
1299            -    let y = 2;
1300            +    let y = 3;
1301
1302            +
1303                 println!(\"hello\");
1304             }
1305        "};
1306        assert!(has_isolated_whitespace_changes(patch, None));
1307    }
1308
1309    #[test]
1310    fn test_isolated_whitespace_empty_patch() {
1311        let patch = "";
1312        assert!(!has_isolated_whitespace_changes(patch, None));
1313    }
1314
1315    #[test]
1316    fn test_isolated_whitespace_skipped_on_cursor_line() {
1317        // The addition of a blank line at new-file line 2 should be skipped
1318        // because the cursor is on that line.
1319        let patch = indoc! {"
1320            @@ -1,3 +1,4 @@
1321             fn main() {
1322            +
1323                 println!(\"hello\");
1324             }
1325        "};
1326        // New-file line 2 is the added blank line
1327        let cursor = cursor_on_line(2);
1328        assert!(!has_isolated_whitespace_changes(patch, Some(cursor)));
1329    }
1330
1331    #[test]
1332    fn test_isolated_whitespace_not_skipped_when_cursor_on_different_line() {
1333        // The blank line is at new-file line 2, but the cursor is on line 1.
1334        let patch = indoc! {"
1335            @@ -1,3 +1,4 @@
1336             fn main() {
1337            +
1338                 println!(\"hello\");
1339             }
1340        "};
1341        let cursor = cursor_on_line(1);
1342        assert!(has_isolated_whitespace_changes(patch, Some(cursor)));
1343    }
1344
1345    #[test]
1346    fn test_isolated_whitespace_deletion_not_skipped_by_cursor() {
1347        // Deletions don't have a new-file line, so cursor can't suppress them.
1348        let patch = indoc! {"
1349            @@ -1,4 +1,3 @@
1350             fn main() {
1351            -
1352                 println!(\"hello\");
1353             }
1354        "};
1355        let cursor = cursor_on_line(2);
1356        assert!(has_isolated_whitespace_changes(patch, Some(cursor)));
1357    }
1358
1359    #[test]
1360    fn test_count_patch_token_changes_real_world_rename() {
1361        // Real-world patch that was reported as returning 0 tokens
1362        let patch = "--- a/sip_call\\README.md\n+++ b/sip_call\\README.md\n@@ -1,1 +1,1 @@\n-# \n+# SIP Call\n";
1363        let counts = count_patch_token_changes(patch);
1364        // "# " vs "# SIP Call" — the "SIP" and "Call" tokens (and a whitespace token) are inserted
1365        assert!(
1366            counts.inserted_tokens > 0,
1367            "expected inserted tokens > 0, got {}",
1368            counts.inserted_tokens
1369        );
1370        assert_eq!(counts.deleted_tokens, 0);
1371    }
1372
1373    #[test]
1374    fn test_count_patch_token_changes_real_world_expansion() {
1375        // Real-world patch: single token expanded to multiple lines
1376        let patch = "--- a/task1/src/app/app.html\n+++ b/task1/src/app/app.html\n@@ -1,7 +1,9 @@\n <style>\n-  m\n+  main {\n+    \n+  }\n </style>\n \n <main>\n   \n </main>\n";
1377        let counts = count_patch_token_changes(patch);
1378        assert!(
1379            counts.inserted_tokens > 0,
1380            "expected inserted tokens > 0, got {}",
1381            counts.inserted_tokens
1382        );
1383        assert!(
1384            counts.deleted_tokens > 0,
1385            "expected deleted tokens > 0, got {}",
1386            counts.deleted_tokens
1387        );
1388    }
1389
1390    #[test]
1391    fn test_count_patch_token_changes_simple_replacement() {
1392        let patch = indoc! {"
1393            @@ -1,3 +1,3 @@
1394             fn main() {
1395            -    println!(\"hello\");
1396            +    println!(\"world\");
1397             }
1398        "};
1399        let counts = count_patch_token_changes(patch);
1400        assert_eq!(counts.deleted_tokens, 1, "deleted: \"hello\"");
1401        assert_eq!(counts.inserted_tokens, 1, "inserted: \"world\"");
1402    }
1403
1404    #[test]
1405    fn test_count_patch_token_changes_insertion_only() {
1406        let patch = indoc! {"
1407            @@ -1,2 +1,3 @@
1408             fn main() {
1409            +    println!(\"hello\");
1410             }
1411        "};
1412        let counts = count_patch_token_changes(patch);
1413        assert_eq!(counts.deleted_tokens, 0);
1414        assert!(counts.inserted_tokens > 0);
1415    }
1416
1417    #[test]
1418    fn test_count_patch_token_changes_deletion_only() {
1419        let patch = indoc! {"
1420            @@ -1,3 +1,2 @@
1421             fn main() {
1422            -    println!(\"hello\");
1423             }
1424        "};
1425        let counts = count_patch_token_changes(patch);
1426        assert!(counts.deleted_tokens > 0);
1427        assert_eq!(counts.inserted_tokens, 0);
1428    }
1429
1430    #[test]
1431    fn test_count_patch_token_changes_empty_patch() {
1432        let patch = "";
1433        let counts = count_patch_token_changes(patch);
1434        assert_eq!(counts.deleted_tokens, 0);
1435        assert_eq!(counts.inserted_tokens, 0);
1436    }
1437
1438    #[test]
1439    fn test_count_patch_token_changes_multiple_hunks() {
1440        let patch = indoc! {"
1441            @@ -1,3 +1,3 @@
1442             fn main() {
1443            -    let x = 1;
1444            +    let x = 2;
1445             }
1446            @@ -10,3 +10,3 @@
1447             fn other() {
1448            -    let y = 3;
1449            +    let y = 4;
1450             }
1451        "};
1452        let counts = count_patch_token_changes(patch);
1453        assert_eq!(counts.deleted_tokens, 2, "deleted: \"1\" and \"3\"");
1454        assert_eq!(counts.inserted_tokens, 2, "inserted: \"2\" and \"4\"");
1455    }
1456
1457    #[test]
1458    fn test_count_patch_token_changes_multiword_change() {
1459        let patch = indoc! {"
1460            @@ -1,1 +1,1 @@
1461            -hello world foo
1462            +hello bar baz
1463        "};
1464        let counts = count_patch_token_changes(patch);
1465        // "world" and "foo" deleted, "bar" and "baz" inserted
1466        // (whitespace tokens between them may also count)
1467        assert!(counts.deleted_tokens >= 2);
1468        assert!(counts.inserted_tokens >= 2);
1469    }
1470
1471    #[test]
1472    fn test_whitespace_collapse() {
1473        let text = "abc   \n\n\n   123";
1474        let collapsed = collapse_whitespace(text.chars());
1475        assert_eq!(
1476            collapsed,
1477            vec!['a', 'b', 'c', ' ', '\n', ' ', '1', '2', '3']
1478        );
1479    }
1480}