1use collections::HashMap;
2
3use crate::{
4 example::ActualCursor,
5 reorder_patch::{Patch, PatchLine},
6 word_diff::{DiffOp, diff_tokens, tokenize},
7};
8
9pub type Counts = HashMap<String, usize>;
10type CountsDelta = HashMap<String, isize>;
11
12/// Context characters needed on each side of a change to capture all affected n-grams
13const CONTEXT_CHARS: usize = CHR_F_CHAR_ORDER - 1;
14
15#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
16pub struct ClassificationMetrics {
17 pub true_positives: usize,
18 pub false_positives: usize,
19 pub false_negatives: usize,
20}
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum TokenClass {
24 TruePositive,
25 FalsePositive,
26 FalseNegative,
27}
28
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct ClassifiedToken {
31 pub token: String,
32 pub class: TokenClass,
33}
34
35#[derive(Debug, Default, Clone, PartialEq, Eq)]
36pub struct TokenClassificationDetail {
37 pub expected_tokens: Vec<ClassifiedToken>,
38 pub actual_tokens: Vec<ClassifiedToken>,
39}
40
41impl ClassificationMetrics {
42 pub fn from_counts(expected: &Counts, actual: &Counts) -> ClassificationMetrics {
43 let mut true_positives = 0;
44 let mut false_positives = 0;
45 let mut false_negatives = 0;
46
47 for (ngram, &expected_count) in expected {
48 let actual_count = *actual.get(ngram).unwrap_or(&0);
49 if actual_count > expected_count {
50 false_positives += actual_count - expected_count;
51 } else {
52 false_negatives += expected_count - actual_count;
53 }
54 true_positives += expected_count.min(actual_count);
55 }
56
57 for (ngram, &actual_count) in actual {
58 if !expected.contains_key(ngram) {
59 false_positives += actual_count;
60 }
61 }
62
63 ClassificationMetrics {
64 true_positives,
65 false_positives,
66 false_negatives,
67 }
68 }
69
70 pub fn accumulate(&mut self, other: &ClassificationMetrics) {
71 self.true_positives += other.true_positives;
72 self.false_positives += other.false_positives;
73 self.false_negatives += other.false_negatives;
74 }
75
76 pub fn precision(&self) -> f64 {
77 if self.true_positives + self.false_positives == 0 {
78 0.0
79 } else {
80 self.true_positives as f64 / (self.true_positives + self.false_positives) as f64
81 }
82 }
83
84 pub fn recall(&self) -> f64 {
85 if self.true_positives + self.false_negatives == 0 {
86 0.0
87 } else {
88 self.true_positives as f64 / (self.true_positives + self.false_negatives) as f64
89 }
90 }
91
92 pub fn f1(&self) -> f64 {
93 let precision = self.precision();
94 let recall = self.recall();
95 if precision + recall == 0.0 {
96 0.0
97 } else {
98 2.0 * precision * recall / (precision + recall)
99 }
100 }
101}
102
103pub fn compare_classification_metrics(
104 left: &ClassificationMetrics,
105 right: &ClassificationMetrics,
106) -> std::cmp::Ordering {
107 left.f1()
108 .total_cmp(&right.f1())
109 .then_with(|| left.precision().total_cmp(&right.precision()))
110 .then_with(|| left.recall().total_cmp(&right.recall()))
111 .then_with(|| left.true_positives.cmp(&right.true_positives))
112 .then_with(|| right.false_positives.cmp(&left.false_positives))
113 .then_with(|| right.false_negatives.cmp(&left.false_negatives))
114}
115
116enum ChrfWhitespace {
117 /// Preserve whitespace as-is
118 #[allow(unused)]
119 Unchanged,
120
121 /// Ignore all whitespace differences
122 #[allow(unused)]
123 Ignore,
124
125 /// Collapse whitespace into single spaces
126 Collapse,
127}
128
129const CHR_F_CHAR_ORDER: usize = 6;
130const CHR_F_BETA: f64 = 2.0;
131const CHR_F_WHITESPACE: ChrfWhitespace = ChrfWhitespace::Collapse;
132
133/// Computes a delta-chrF score that compares two sets of edits.
134///
135/// This metric works by:
136/// 1. Computing n-gram count differences (deltas) between original→expected and original→actual
137/// 2. Comparing these deltas to measure how well actual edits match expected edits
138///
139/// Returns a score from 0.0 to 100.0, where 100.0 means the actual edits perfectly match
140/// the expected edits.
141pub fn delta_chr_f(original: &str, expected: &str, actual: &str) -> f64 {
142 // Edge case: if all texts are identical, the edits match perfectly
143 if original == expected && expected == actual {
144 return 100.0;
145 }
146
147 // Pre-filter whitespace once for all texts
148 let orig_chars: Vec<char> = filter_whitespace_chars(original);
149 let exp_chars: Vec<char> = filter_whitespace_chars(expected);
150 let act_chars: Vec<char> = filter_whitespace_chars(actual);
151
152 // Find the changed regions between original→expected and original→actual
153 // We only need to compute n-grams on these regions (plus context for boundary n-grams)
154 let (orig_for_exp, exp_region) = extract_changed_regions(&orig_chars, &exp_chars);
155 let (orig_for_act, act_region) = extract_changed_regions(&orig_chars, &act_chars);
156
157 let mut total_precision = 0.0;
158 let mut total_recall = 0.0;
159
160 for order in 1..=CHR_F_CHAR_ORDER {
161 // Compute n-grams only on the affected regions
162 let orig_ngrams_for_exp = count_ngrams_from_chars(&orig_for_exp, order);
163 let exp_ngrams = count_ngrams_from_chars(&exp_region, order);
164 let expected_delta = compute_ngram_delta(&exp_ngrams, &orig_ngrams_for_exp);
165
166 let orig_ngrams_for_act = count_ngrams_from_chars(&orig_for_act, order);
167 let act_ngrams = count_ngrams_from_chars(&act_region, order);
168 let actual_delta = compute_ngram_delta(&act_ngrams, &orig_ngrams_for_act);
169
170 if expected_delta.is_empty() && actual_delta.is_empty() {
171 total_precision += 1.0;
172 total_recall += 1.0;
173 continue;
174 }
175
176 let expected_counts = ngram_delta_to_counts(&expected_delta);
177 let actual_counts = ngram_delta_to_counts(&actual_delta);
178
179 let score = ClassificationMetrics::from_counts(&expected_counts, &actual_counts);
180 total_precision += score.precision();
181 total_recall += score.recall();
182 }
183
184 let prec = total_precision / CHR_F_CHAR_ORDER as f64;
185 let recall = total_recall / CHR_F_CHAR_ORDER as f64;
186 let f_score = if prec + recall == 0.0 {
187 0.0
188 } else {
189 (1.0 + CHR_F_BETA * CHR_F_BETA) * prec * recall / (CHR_F_BETA * CHR_F_BETA * prec + recall)
190 };
191
192 f_score * 100.0
193}
194
195/// Reference implementation of delta_chr_f (original, non-optimized version).
196/// Used for testing that the optimized version produces identical results.
197#[cfg(test)]
198fn delta_chr_f_reference(original: &str, expected: &str, actual: &str) -> f64 {
199 if original == expected && expected == actual {
200 return 100.0;
201 }
202
203 let original_ngrams = chr_f_ngram_counts(original);
204 let expected_ngrams = chr_f_ngram_counts(expected);
205 let actual_ngrams = chr_f_ngram_counts(actual);
206
207 let mut total_precision = 0.0;
208 let mut total_recall = 0.0;
209
210 for order in 0..CHR_F_CHAR_ORDER {
211 let expected_delta = compute_ngram_delta(&expected_ngrams[order], &original_ngrams[order]);
212 let actual_delta = compute_ngram_delta(&actual_ngrams[order], &original_ngrams[order]);
213
214 if expected_delta.is_empty() && actual_delta.is_empty() {
215 total_precision += 1.0;
216 total_recall += 1.0;
217 continue;
218 }
219
220 let expected_counts = ngram_delta_to_counts(&expected_delta);
221 let actual_counts = ngram_delta_to_counts(&actual_delta);
222
223 let score = ClassificationMetrics::from_counts(&expected_counts, &actual_counts);
224 total_precision += score.precision();
225 total_recall += score.recall();
226 }
227
228 let prec = total_precision / CHR_F_CHAR_ORDER as f64;
229 let recall = total_recall / CHR_F_CHAR_ORDER as f64;
230 let f_score = if prec + recall == 0.0 {
231 0.0
232 } else {
233 (1.0 + CHR_F_BETA * CHR_F_BETA) * prec * recall / (CHR_F_BETA * CHR_F_BETA * prec + recall)
234 };
235
236 f_score * 100.0
237}
238
239/// Filter whitespace from a string and return as Vec<char>
240fn filter_whitespace_chars(text: &str) -> Vec<char> {
241 match CHR_F_WHITESPACE {
242 ChrfWhitespace::Unchanged => text.chars().collect(),
243 ChrfWhitespace::Ignore => text.chars().filter(|c| !c.is_whitespace()).collect(),
244 ChrfWhitespace::Collapse => collapse_whitespace(text.chars()),
245 }
246}
247
248/// Collapse whitespace into single spaces.
249/// Newlines and spaces are collapsed separately.
250fn collapse_whitespace(chars: impl Iterator<Item = char>) -> Vec<char> {
251 let mut result = Vec::new();
252 let mut last_whitespace = None;
253 for c in chars {
254 if c.is_whitespace() && c != '\n' {
255 if last_whitespace != Some(' ') {
256 result.push(' ');
257 last_whitespace = Some(' ');
258 }
259 } else if c == '\n' {
260 if last_whitespace != Some('\n') {
261 result.push(c);
262 last_whitespace = Some('\n');
263 }
264 } else {
265 result.push(c);
266 last_whitespace = None;
267 }
268 }
269 result
270}
271
272/// Extract only the changed regions between two texts, with context for n-gram boundaries.
273///
274/// Returns (original_affected_region, modified_affected_region) as Vec<char>.
275///
276/// The key insight: when computing n-gram delta between two nearly-identical texts,
277/// n-grams from unchanged regions cancel out. We only need to process:
278/// 1. The changed content itself
279/// 2. CONTEXT_CHARS (n-1) characters before and after, to capture boundary-crossing n-grams
280fn extract_changed_regions(original: &[char], modified: &[char]) -> (Vec<char>, Vec<char>) {
281 // Find longest common prefix
282 let prefix_len = original
283 .iter()
284 .zip(modified.iter())
285 .take_while(|(a, b)| a == b)
286 .count();
287
288 // Find longest common suffix (that doesn't overlap with prefix)
289 let orig_remaining = original.len().saturating_sub(prefix_len);
290 let mod_remaining = modified.len().saturating_sub(prefix_len);
291 let max_suffix = orig_remaining.min(mod_remaining);
292
293 let suffix_len = original
294 .iter()
295 .rev()
296 .zip(modified.iter().rev())
297 .take(max_suffix)
298 .take_while(|(a, b)| a == b)
299 .count();
300
301 // Calculate the changed region boundaries
302 let orig_change_start = prefix_len;
303 let orig_change_end = original.len().saturating_sub(suffix_len);
304 let mod_change_start = prefix_len;
305 let mod_change_end = modified.len().saturating_sub(suffix_len);
306
307 // If there's no actual change, return empty regions
308 if orig_change_start >= orig_change_end && mod_change_start >= mod_change_end {
309 return (Vec::new(), Vec::new());
310 }
311
312 // Expand to include context for n-gram boundaries
313 let orig_context_start = orig_change_start.saturating_sub(CONTEXT_CHARS);
314 let orig_context_end = (orig_change_end + CONTEXT_CHARS).min(original.len());
315 let mod_context_start = mod_change_start.saturating_sub(CONTEXT_CHARS);
316 let mod_context_end = (mod_change_end + CONTEXT_CHARS).min(modified.len());
317
318 let orig_region: Vec<char> = original[orig_context_start..orig_context_end].to_vec();
319 let mod_region: Vec<char> = modified[mod_context_start..mod_context_end].to_vec();
320
321 (orig_region, mod_region)
322}
323
324/// Count n-grams directly from a char slice (avoids String allocation for the full text)
325fn count_ngrams_from_chars(chars: &[char], n: usize) -> Counts {
326 let mut counts = Counts::default();
327
328 if chars.len() < n {
329 return counts;
330 }
331
332 for window in chars.windows(n) {
333 let ngram: String = window.iter().collect();
334 *counts.entry(ngram).or_insert(0) += 1;
335 }
336
337 counts
338}
339
340#[allow(dead_code)]
341fn chr_f_ngram_counts(text: &str) -> Vec<Counts> {
342 let text = match CHR_F_WHITESPACE {
343 ChrfWhitespace::Unchanged => text.to_string(),
344 ChrfWhitespace::Ignore => text
345 .chars()
346 .filter(|c| !c.is_whitespace())
347 .collect::<String>(),
348 ChrfWhitespace::Collapse => collapse_whitespace(text.chars())
349 .into_iter()
350 .collect::<String>(),
351 };
352
353 (1..=CHR_F_CHAR_ORDER)
354 .map(|order| count_ngrams(&text, order))
355 .collect()
356}
357
358fn compute_ngram_delta(after: &Counts, before: &Counts) -> CountsDelta {
359 let mut delta = CountsDelta::default();
360
361 for (ngram, &before_count) in before {
362 let after_count = *after.get(ngram).unwrap_or(&0);
363 delta.insert(ngram.clone(), after_count as isize - before_count as isize);
364 }
365
366 for (ngram, &after_count) in after {
367 if !before.contains_key(ngram) {
368 delta.insert(ngram.clone(), after_count as isize);
369 }
370 }
371
372 delta
373}
374
375/// Convert negative counts to special deletion tokens.
376/// For example, if expected delta is {"foo": -1} and actual delta is {"bar": -1},
377/// we convert it to {"¬foo": +1} and {"¬bar": +1}. This way _not_ deleting "foo"
378/// will result in a false negative, and mistakenly deleting "bar" will result in a false positive.
379fn ngram_delta_to_counts(delta: &CountsDelta) -> Counts {
380 let mut counts = Counts::default();
381
382 for (ngram, &delta) in delta {
383 if delta > 0 {
384 counts.insert(ngram.clone(), delta as usize);
385 } else if delta < 0 {
386 counts.insert(format!("¬{ngram}"), delta.unsigned_abs());
387 }
388 }
389
390 counts
391}
392
393#[allow(dead_code)]
394fn count_ngrams(text: &str, n: usize) -> Counts {
395 let chars: Vec<char> = text.chars().collect();
396 let mut counts = Counts::default();
397
398 for window in chars.windows(n) {
399 let ngram: String = window.iter().collect();
400 *counts.entry(ngram).or_insert(0) += 1;
401 }
402
403 counts
404}
405
406pub fn braces_disbalance(text: &str) -> usize {
407 let mut disbalance = 0isize;
408
409 let a = text.chars().filter(|&c| c == '{').count() as isize;
410 let b = text.chars().filter(|&c| c == '}').count() as isize;
411 disbalance += (a - b).abs();
412
413 let a = text.chars().filter(|&c| c == '(').count() as isize;
414 let b = text.chars().filter(|&c| c == ')').count() as isize;
415 disbalance += (a - b).abs();
416
417 let a = text.chars().filter(|&c| c == '[').count() as isize;
418 let b = text.chars().filter(|&c| c == ']').count() as isize;
419 disbalance += (a - b).abs();
420
421 disbalance as usize
422}
423
424/// Extracts changed lines from a unified diff string.
425/// Returns a bag (multiset) of lines that were added (+) or removed (-).
426/// The +/- prefix is included in the line to distinguish additions from deletions.
427pub fn extract_changed_lines_from_diff(diff: &str) -> Counts {
428 let mut counts = Counts::default();
429
430 for line in diff.lines() {
431 // Skip file headers (--- and +++)
432 if line.starts_with("---") || line.starts_with("+++") {
433 continue;
434 }
435 // Skip hunk headers (@@)
436 if line.starts_with("@@") {
437 continue;
438 }
439 // Skip diff header lines (diff --git, index, etc.)
440 if line.starts_with("diff ") || line.starts_with("index ") {
441 continue;
442 }
443 // Include added and removed lines (with their prefix)
444 if line.starts_with('+') || line.starts_with('-') {
445 *counts.entry(line.to_string()).or_insert(0) += 1;
446 }
447 }
448
449 counts
450}
451
452/// Computes exact lines match metrics between expected and actual patches.
453/// Treats changed lines as a bag (multiset) - order is discarded but count matters.
454/// Returns ClassificationMetrics with TP/FP/FN counts.
455pub fn exact_lines_match(expected_patch: &str, actual_patch: &str) -> ClassificationMetrics {
456 let expected_lines = extract_changed_lines_from_diff(expected_patch);
457 let actual_lines = extract_changed_lines_from_diff(actual_patch);
458 ClassificationMetrics::from_counts(&expected_lines, &actual_lines)
459}
460
461/// Returns whether the patch contains any isolated whitespace-only changes.
462///
463/// A whitespace-only change is an added or deleted line whose content is empty or
464/// contains only whitespace. It is "isolated" when it is not adjacent to any
465/// substantive (non-whitespace) change within the same contiguous change group.
466pub fn has_isolated_whitespace_changes(patch_str: &str, cursor: Option<&ActualCursor>) -> bool {
467 let patch = Patch::parse_unified_diff(patch_str);
468
469 let cursor_new_file_line = cursor.as_ref().map(|c| (c.row + 1) as usize);
470
471 for hunk in &patch.hunks {
472 let lines = &hunk.lines;
473 let mut new_text_line = hunk.new_start as usize;
474
475 for (i, line) in lines.iter().enumerate() {
476 let content = match line {
477 PatchLine::Addition(s) => {
478 let addition_line = new_text_line;
479 new_text_line += 1;
480 if s.trim().is_empty() && cursor_new_file_line == Some(addition_line) {
481 continue;
482 }
483 s.as_str()
484 }
485 PatchLine::Deletion(s) => s.as_str(),
486 PatchLine::Context(_) => {
487 new_text_line += 1;
488 continue;
489 }
490 _ => continue,
491 };
492
493 if !content.trim().is_empty() {
494 continue;
495 }
496
497 if is_whitespace_change_isolated(lines, i) {
498 return true;
499 }
500 }
501 }
502
503 false
504}
505
506fn is_whitespace_change_isolated(lines: &[PatchLine], index: usize) -> bool {
507 // Look backward for a non-whitespace change before hitting a context line
508 for line in lines[..index].iter().rev() {
509 match line {
510 PatchLine::Addition(s) | PatchLine::Deletion(s) => {
511 if !s.trim().is_empty() {
512 return false;
513 }
514 }
515 _ => break,
516 }
517 }
518
519 // Look forward for a non-whitespace change before hitting a context line
520 for line in &lines[index + 1..] {
521 match line {
522 PatchLine::Addition(s) | PatchLine::Deletion(s) => {
523 if !s.trim().is_empty() {
524 return false;
525 }
526 }
527 _ => break,
528 }
529 }
530
531 true
532}
533
534/// A simple proxy for whether the prediction respects editable region.
535pub fn is_editable_region_correct(actual_patch: &str) -> bool {
536 // A typical sign of a wrong editable region: a bunch of lines deletion
537 // at the beginning or end of the patch.
538 let patch = Patch::parse_unified_diff(actual_patch);
539 if patch.hunks.is_empty() {
540 return true;
541 }
542
543 let hunk = &patch.hunks[0];
544 let mut deletions_at_start = 0;
545
546 for line in hunk.lines.iter() {
547 match line {
548 PatchLine::Deletion(_) => deletions_at_start += 1,
549 _ => break,
550 }
551 }
552
553 if deletions_at_start >= 3 {
554 return false;
555 }
556
557 true
558}
559
560#[derive(Debug, Default, Clone)]
561pub struct TokenChangeCounts {
562 pub inserted_tokens: usize,
563 pub deleted_tokens: usize,
564}
565
566fn classify_token_diff_ops(
567 expected_tokens: &[&str],
568 actual_tokens: &[&str],
569) -> ClassificationMetrics {
570 classify_token_diff_ops_detailed(expected_tokens, actual_tokens).0
571}
572
573fn classify_token_diff_ops_detailed(
574 expected_tokens: &[&str],
575 actual_tokens: &[&str],
576) -> (ClassificationMetrics, TokenClassificationDetail) {
577 let mut metrics = ClassificationMetrics::default();
578 let mut detail = TokenClassificationDetail::default();
579
580 for operation in diff_tokens(expected_tokens, actual_tokens) {
581 match operation {
582 DiffOp::Equal {
583 old_start,
584 old_end,
585 new_start,
586 new_end,
587 } => {
588 metrics.true_positives += old_end - old_start;
589 for token in &expected_tokens[old_start..old_end] {
590 detail.expected_tokens.push(ClassifiedToken {
591 token: (*token).to_string(),
592 class: TokenClass::TruePositive,
593 });
594 }
595 for token in &actual_tokens[new_start..new_end] {
596 detail.actual_tokens.push(ClassifiedToken {
597 token: (*token).to_string(),
598 class: TokenClass::TruePositive,
599 });
600 }
601 }
602 DiffOp::Delete(start, end) => {
603 metrics.false_negatives += end - start;
604 for token in &expected_tokens[start..end] {
605 detail.expected_tokens.push(ClassifiedToken {
606 token: (*token).to_string(),
607 class: TokenClass::FalseNegative,
608 });
609 }
610 }
611 DiffOp::Insert(start, end) => {
612 metrics.false_positives += end - start;
613 for token in &actual_tokens[start..end] {
614 detail.actual_tokens.push(ClassifiedToken {
615 token: (*token).to_string(),
616 class: TokenClass::FalsePositive,
617 });
618 }
619 }
620 DiffOp::Replace {
621 old_start,
622 old_end,
623 new_start,
624 new_end,
625 } => {
626 metrics.false_negatives += old_end - old_start;
627 metrics.false_positives += new_end - new_start;
628
629 for token in &expected_tokens[old_start..old_end] {
630 detail.expected_tokens.push(ClassifiedToken {
631 token: (*token).to_string(),
632 class: TokenClass::FalseNegative,
633 });
634 }
635 for token in &actual_tokens[new_start..new_end] {
636 detail.actual_tokens.push(ClassifiedToken {
637 token: (*token).to_string(),
638 class: TokenClass::FalsePositive,
639 });
640 }
641 }
642 }
643 }
644
645 (metrics, detail)
646}
647
648fn classify_token_texts(expected_text: &str, actual_text: &str) -> ClassificationMetrics {
649 let expected_tokens = tokenize(expected_text);
650 let actual_tokens = tokenize(actual_text);
651 classify_token_diff_ops(&expected_tokens, &actual_tokens)
652}
653
654fn classify_token_texts_detailed(
655 expected_text: &str,
656 actual_text: &str,
657) -> (ClassificationMetrics, TokenClassificationDetail) {
658 let expected_tokens = tokenize(expected_text);
659 let actual_tokens = tokenize(actual_text);
660 classify_token_diff_ops_detailed(&expected_tokens, &actual_tokens)
661}
662
663fn strip_patch_line_prefix(line: &str) -> &str {
664 line.strip_prefix('-')
665 .or_else(|| line.strip_prefix('+'))
666 .unwrap_or(line)
667}
668
669fn extract_patch_change_blocks(patch: &str) -> Vec<(String, String)> {
670 let mut blocks = Vec::new();
671 let mut old_lines: Vec<&str> = Vec::new();
672 let mut new_lines: Vec<&str> = Vec::new();
673
674 let flush = |old_lines: &mut Vec<&str>,
675 new_lines: &mut Vec<&str>,
676 blocks: &mut Vec<(String, String)>| {
677 if old_lines.is_empty() && new_lines.is_empty() {
678 return;
679 }
680
681 let old_text = old_lines
682 .iter()
683 .map(|line| strip_patch_line_prefix(line))
684 .collect::<Vec<_>>()
685 .join("\n");
686
687 let new_text = new_lines
688 .iter()
689 .map(|line| strip_patch_line_prefix(line))
690 .collect::<Vec<_>>()
691 .join("\n");
692
693 blocks.push((old_text, new_text));
694 old_lines.clear();
695 new_lines.clear();
696 };
697
698 for line in patch.lines() {
699 if line.starts_with("---")
700 || line.starts_with("+++")
701 || line.starts_with("@@")
702 || line.starts_with("diff ")
703 || line.starts_with("index ")
704 {
705 flush(&mut old_lines, &mut new_lines, &mut blocks);
706 } else if line.starts_with('-') {
707 old_lines.push(line);
708 } else if line.starts_with('+') {
709 new_lines.push(line);
710 } else {
711 flush(&mut old_lines, &mut new_lines, &mut blocks);
712 }
713 }
714
715 flush(&mut old_lines, &mut new_lines, &mut blocks);
716 blocks
717}
718
719fn collect_patch_side_text<F>(patch: &str, mut select_side: F) -> String
720where
721 F: FnMut(&(String, String)) -> &str,
722{
723 let mut text = String::new();
724
725 for block in extract_patch_change_blocks(patch) {
726 let block_text = select_side(&block);
727 if block_text.is_empty() {
728 continue;
729 }
730
731 if !text.is_empty() {
732 text.push('\n');
733 }
734 text.push_str(block_text);
735 }
736
737 text
738}
739
740#[derive(Debug, Clone, PartialEq, Eq)]
741pub struct TokenMatchDebugReport {
742 pub expected_deleted_text: String,
743 pub actual_deleted_text: String,
744 pub expected_inserted_text: String,
745 pub actual_inserted_text: String,
746 pub deleted: TokenClassificationDetail,
747 pub inserted: TokenClassificationDetail,
748 pub metrics: ClassificationMetrics,
749}
750
751/// Computes token-match precision/recall counts between expected and actual patches.
752///
753/// Deletions and insertions are aligned independently, then their counts are summed.
754/// Tokenization uses `word_diff::tokenize`, so identifiers, whitespace runs, and punctuation
755/// are compared using the same token boundaries as the word-diff view.
756pub fn token_match(expected_patch: &str, actual_patch: &str) -> ClassificationMetrics {
757 token_match_debug_report(expected_patch, actual_patch).metrics
758}
759
760pub fn token_match_debug_report(expected_patch: &str, actual_patch: &str) -> TokenMatchDebugReport {
761 let expected_deleted =
762 collect_patch_side_text(expected_patch, |(old_text, _)| old_text.as_str());
763 let actual_deleted = collect_patch_side_text(actual_patch, |(old_text, _)| old_text.as_str());
764 let expected_inserted =
765 collect_patch_side_text(expected_patch, |(_, new_text)| new_text.as_str());
766 let actual_inserted = collect_patch_side_text(actual_patch, |(_, new_text)| new_text.as_str());
767
768 let (mut metrics, deleted_detail) =
769 classify_token_texts_detailed(&expected_deleted, &actual_deleted);
770 let (inserted_metrics, inserted_detail) =
771 classify_token_texts_detailed(&expected_inserted, &actual_inserted);
772 metrics.accumulate(&inserted_metrics);
773
774 TokenMatchDebugReport {
775 expected_deleted_text: expected_deleted,
776 actual_deleted_text: actual_deleted,
777 expected_inserted_text: expected_inserted,
778 actual_inserted_text: actual_inserted,
779 deleted: deleted_detail,
780 inserted: inserted_detail,
781 metrics,
782 }
783}
784
785/// Counts the number of inserted and deleted tokens in a unified diff patch.
786///
787/// Tokens are words and whitespace sequences (as defined by `word_diff::tokenize`).
788/// Within each hunk, the old (`-`) and new (`+`) lines are compared at the token level
789/// using an LCS-based diff, so modified lines only count the actually changed tokens
790/// rather than the entire line.
791
792pub fn count_patch_token_changes(patch: &str) -> TokenChangeCounts {
793 let mut counts = TokenChangeCounts::default();
794
795 for (old_text, new_text) in extract_patch_change_blocks(patch) {
796 let metrics = classify_token_texts(&old_text, &new_text);
797 counts.deleted_tokens += metrics.false_negatives;
798 counts.inserted_tokens += metrics.false_positives;
799 }
800
801 counts
802}
803
804#[cfg(test)]
805mod test_optimization {
806 use super::*;
807
808 #[test]
809 fn test_extract_changed_regions_simple() {
810 let original: Vec<char> = "hello world".chars().collect();
811 let modified: Vec<char> = "hello there".chars().collect();
812
813 let (orig_region, mod_region) = extract_changed_regions(&original, &modified);
814
815 // "world" vs "there" - with 5 chars context, we get "ello world" vs "ello there"
816 // (or less if not enough chars available)
817 assert!(orig_region.len() < original.len());
818 assert!(mod_region.len() < modified.len());
819 }
820
821 #[test]
822 fn test_extract_changed_regions_insertion() {
823 let original: Vec<char> = "abcdef".chars().collect();
824 let modified: Vec<char> = "abcXYZdef".chars().collect();
825
826 let (orig_region, mod_region) = extract_changed_regions(&original, &modified);
827
828 // The insertion is between c and d, so we need context around that point
829 assert!(orig_region.len() <= original.len());
830 assert!(mod_region.iter().collect::<String>().contains("XYZ"));
831 }
832
833 #[test]
834 fn test_extract_changed_regions_identical() {
835 let text: Vec<char> = "identical text".chars().collect();
836
837 let (orig_region, mod_region) = extract_changed_regions(&text, &text);
838
839 // When texts are identical, regions should be empty
840 assert!(orig_region.is_empty());
841 assert!(mod_region.is_empty());
842 }
843
844 #[test]
845 fn test_optimized_matches_original_score() {
846 // Test that our optimized version produces the same results
847 let test_cases = vec![
848 ("hello world", "hello there", "hello world"),
849 (
850 "fn main() {}",
851 "fn main() { println!(); }",
852 "fn main() { print!(); }",
853 ),
854 ("abcdefghij", "abcXXXghij", "abcYYghij"),
855 ("unchanged", "unchanged", "unchanged"),
856 (
857 "prefix middle suffix",
858 "prefix CHANGED suffix",
859 "prefix middle suffix",
860 ),
861 ];
862
863 for (original, expected, actual) in test_cases {
864 let score = delta_chr_f(original, expected, actual);
865 // Just verify it produces a reasonable score (0-100)
866 assert!(
867 score >= 0.0 && score <= 100.0,
868 "Score {} out of range for ({}, {}, {})",
869 score,
870 original,
871 expected,
872 actual
873 );
874 }
875 }
876
877 #[test]
878 fn test_optimized_equals_reference() {
879 // Comprehensive test that optimized version matches reference implementation exactly
880 let test_cases = vec![
881 // Basic cases
882 ("hello world", "hello there", "hello world"),
883 ("hello world", "hello there", "hello there"),
884 ("unchanged", "unchanged", "unchanged"),
885 // Code-like cases
886 (
887 "fn main() { println!(\"Hello\"); }",
888 "fn main() { println!(\"Hello, World!\"); }",
889 "fn main() { println!(\"Hello, World!\"); }",
890 ),
891 (
892 "fn main() { println!(\"Hello\"); }",
893 "fn main() { println!(\"Hello, World!\"); }",
894 "fn main() { println!(\"Goodbye\"); }",
895 ),
896 // Insertion
897 ("abcdef", "abcXYZdef", "abcdef"),
898 ("abcdef", "abcXYZdef", "abcXYZdef"),
899 ("abcdef", "abcXYZdef", "abcABCdef"),
900 // Deletion
901 ("abcXYZdef", "abcdef", "abcXYZdef"),
902 ("abcXYZdef", "abcdef", "abcdef"),
903 // Multiple changes (simulated by different expected/actual)
904 ("one two three four", "one THREE four", "one two FOUR"),
905 // Edge cases
906 ("a", "b", "c"),
907 ("", "abc", ""),
908 ("abc", "", "abc"),
909 // Longer text with small change
910 (
911 "This is a longer piece of text that contains many words and characters to process",
912 "This is a longer piece of TEXT that contains many words and characters to process",
913 "This is a longer piece of text that contains many words and characters to process",
914 ),
915 // Change at the beginning
916 (
917 "ORIGINAL start of text",
918 "NEW start of text",
919 "DIFFERENT start of text",
920 ),
921 // Change at the end
922 (
923 "text ending ORIGINAL",
924 "text ending NEW",
925 "text ending DIFFERENT",
926 ),
927 // Whitespace (should be ignored)
928 ("hello world", "hello there", "hello world"),
929 ("a b c d", "a X c d", "a Y c d"),
930 ];
931
932 for (original, expected, actual) in test_cases {
933 let optimized_score = delta_chr_f(original, expected, actual);
934 let reference_score = delta_chr_f_reference(original, expected, actual);
935
936 assert!(
937 (optimized_score - reference_score).abs() < 1e-10,
938 "Mismatch for ({:?}, {:?}, {:?}):\n optimized: {}\n reference: {}",
939 original,
940 expected,
941 actual,
942 optimized_score,
943 reference_score
944 );
945 }
946 }
947}
948
949#[cfg(test)]
950mod test {
951 use super::*;
952 use crate::example::ActualCursor;
953 use indoc::indoc;
954
955 fn cursor_on_line(one_based_line: u32) -> ActualCursor {
956 ActualCursor {
957 path: String::new(),
958 row: one_based_line - 1,
959 column: 0,
960 offset: 0,
961 editable_region_offset: None,
962 }
963 }
964
965 #[test]
966 fn test_delta_chr_f_perfect_match() {
967 let original = "fn main() { println!(\"Hello\");}";
968 let expected = "fn main() { println!(\"Hello, World!\");}";
969
970 let score = delta_chr_f(original, expected, expected);
971 assert!((score - 100.0).abs() < 1e-2);
972 }
973
974 #[test]
975 fn test_delta_chr_f_wrong_edit() {
976 // When the edit is wrong
977 let original = "one two three";
978 let expected = "one three"; // deleted "two "
979 let actual = "one two four"; // deleted "three", added "four"
980
981 // Then the score should be low
982 let score = delta_chr_f(original, expected, actual);
983 assert!(score > 20.0 && score < 40.0);
984 }
985
986 #[test]
987 fn test_delta_chr_f_partial_match() {
988 let original = "let x = 42;";
989 let expected = "let x = 100;";
990 let actual = "let x = 99;";
991
992 // We got the edit location right, but the replacement text is wrong.
993 // Deleted ngrams will match, bringing the score somewhere in the middle.
994 let score = delta_chr_f(original, expected, actual);
995 assert!(score > 40.0 && score < 60.0);
996 }
997
998 #[test]
999 fn test_delta_chr_f_missed_edit() {
1000 // When predictions makes no changes
1001 let original = "prefix old suffix";
1002 let expected = "prefix new suffix";
1003 let actual = "prefix old suffix"; // no change
1004
1005 // Then the score should be low (all expected changes are false negatives)
1006 let score = delta_chr_f(original, expected, actual);
1007 assert!(score < 20.0);
1008 }
1009
1010 #[test]
1011 fn test_delta_chr_f_extra_edit() {
1012 // When adding unexpected content
1013 let original = "helloworld";
1014 let expected = "helloworld"; // no change expected
1015 let actual = "helloextraworld"; // added "extra"
1016
1017 // Then the score should be low (all actual changes are false positives)
1018 let score = delta_chr_f(original, expected, actual);
1019 assert!(score < 20.0);
1020 }
1021
1022 #[test]
1023 fn test_delta_chr_f_no_changes() {
1024 let text = "unchanged text";
1025 let score = delta_chr_f(text, text, text);
1026 assert!((score - 100.0).abs() < 1e-2);
1027 }
1028
1029 #[test]
1030 fn test_braces_disbalance() {
1031 let text = "let x = { 1 + 2 };";
1032 assert_eq!(braces_disbalance(text), 0);
1033
1034 let text = "let x = { 1 + 2";
1035 assert_eq!(braces_disbalance(text), 1);
1036
1037 let text = "let x = { 1 + 2 )";
1038 assert_eq!(braces_disbalance(text), 2);
1039 }
1040
1041 #[test]
1042 fn test_extract_changed_lines_from_diff() {
1043 let diff = r#"--- a/file.rs
1044+++ b/file.rs
1045@@ -1,3 +1,3 @@
1046 fn main() {
1047- println!("hello");
1048+ println!("world");
1049 }"#;
1050
1051 let counts = extract_changed_lines_from_diff(diff);
1052 assert_eq!(counts.get("- println!(\"hello\");"), Some(&1));
1053 assert_eq!(counts.get("+ println!(\"world\");"), Some(&1));
1054 assert_eq!(counts.len(), 2);
1055 }
1056
1057 #[test]
1058 fn test_extract_changed_lines_skips_headers() {
1059 let diff = r#"diff --git a/file.rs b/file.rs
1060index abc123..def456 100644
1061--- a/file.rs
1062+++ b/file.rs
1063@@ -1,2 +1,2 @@
1064-old line
1065+new line"#;
1066
1067 let counts = extract_changed_lines_from_diff(diff);
1068 assert_eq!(counts.get("-old line"), Some(&1));
1069 assert_eq!(counts.get("+new line"), Some(&1));
1070 assert_eq!(counts.len(), 2);
1071 }
1072
1073 #[test]
1074 fn test_exact_lines_match_perfect() {
1075 let expected = r#"--- a/file.rs
1076+++ b/file.rs
1077@@ -1,3 +1,3 @@
1078-old line 1
1079-old line 2
1080+new line 1
1081+new line 2"#;
1082
1083 let actual = r#"--- a/file.rs
1084+++ b/file.rs
1085@@ -1,3 +1,3 @@
1086-old line 1
1087-old line 2
1088+new line 1
1089+new line 2"#;
1090
1091 let metrics = exact_lines_match(expected, actual);
1092 assert_eq!(metrics.true_positives, 4);
1093 assert_eq!(metrics.false_positives, 0);
1094 assert_eq!(metrics.false_negatives, 0);
1095 assert!((metrics.precision() - 1.0).abs() < 1e-6);
1096 assert!((metrics.recall() - 1.0).abs() < 1e-6);
1097 assert!((metrics.f1() - 1.0).abs() < 1e-6);
1098 }
1099
1100 #[test]
1101 fn test_exact_lines_match_partial() {
1102 let expected = r#"-old line 1
1103-old line 2
1104+new line 1
1105+new line 2"#;
1106
1107 let actual = r#"-old line 1
1108+new line 1
1109+extra line"#;
1110
1111 let metrics = exact_lines_match(expected, actual);
1112 // TP: "-old line 1" and "+new line 1" (2)
1113 // FP: "+extra line" (1)
1114 // FN: "-old line 2" and "+new line 2" (2)
1115 assert_eq!(metrics.true_positives, 2);
1116 assert_eq!(metrics.false_positives, 1);
1117 assert_eq!(metrics.false_negatives, 2);
1118 }
1119
1120 #[test]
1121 fn test_exact_lines_match_no_overlap() {
1122 let expected = r#"-line a
1123+line b"#;
1124
1125 let actual = r#"-line x
1126+line y"#;
1127
1128 let metrics = exact_lines_match(expected, actual);
1129 assert_eq!(metrics.true_positives, 0);
1130 assert_eq!(metrics.false_positives, 2);
1131 assert_eq!(metrics.false_negatives, 2);
1132 assert!((metrics.precision()).abs() < 1e-6);
1133 assert!((metrics.recall()).abs() < 1e-6);
1134 }
1135
1136 #[test]
1137 fn test_exact_lines_match_duplicate_lines() {
1138 let expected = r#"+line a
1139+line a
1140+line a"#;
1141
1142 let actual = r#"+line a
1143+line a"#;
1144
1145 let metrics = exact_lines_match(expected, actual);
1146 // Expected has 3 "+line a", actual has 2
1147 // TP: 2, FN: 1, FP: 0
1148 assert_eq!(metrics.true_positives, 2);
1149 assert_eq!(metrics.false_positives, 0);
1150 assert_eq!(metrics.false_negatives, 1);
1151 }
1152
1153 #[test]
1154 fn test_exact_lines_match_empty_patches() {
1155 let metrics = exact_lines_match("", "");
1156 assert_eq!(metrics.true_positives, 0);
1157 assert_eq!(metrics.false_positives, 0);
1158 assert_eq!(metrics.false_negatives, 0);
1159 }
1160
1161 #[test]
1162 fn test_token_match_perfect() {
1163 let expected = indoc! {"
1164 @@ -1,2 +1,4 @@
1165 -str
1166 +struct LanguageEntry {
1167 + path: PathBuf,
1168 +}
1169 "};
1170
1171 let actual = indoc! {"
1172 @@ -1,2 +1,4 @@
1173 -str
1174 +struct LanguageEntry {
1175 + path: PathBuf,
1176 +}
1177 "};
1178
1179 let metrics = token_match(expected, actual);
1180 assert_eq!(metrics.false_positives, 0);
1181 assert_eq!(metrics.false_negatives, 0);
1182 assert!(metrics.true_positives > 0);
1183 assert!((metrics.precision() - 1.0).abs() < 1e-6);
1184 assert!((metrics.recall() - 1.0).abs() < 1e-6);
1185 assert!((metrics.f1() - 1.0).abs() < 1e-6);
1186 }
1187
1188 #[test]
1189 fn test_token_match_partial_subset_keeps_high_precision() {
1190 let expected = indoc! {"
1191 @@ -1,2 +1,6 @@
1192 -str
1193 +struct LanguageEntry {
1194 + path: PathBuf,
1195 + language: OnceCell<Language>,
1196 + external_files: Option<Vec<PathBuf>>,
1197 +}
1198 "};
1199
1200 let actual = indoc! {"
1201 @@ -1,2 +1,3 @@
1202 -str
1203 +struct LanguageEntry {
1204 +}
1205 "};
1206
1207 let metrics = token_match(expected, actual);
1208 assert!(metrics.true_positives > 0);
1209 assert_eq!(metrics.false_positives, 0);
1210 assert!(metrics.false_negatives > 0);
1211 assert!((metrics.precision() - 1.0).abs() < 1e-6);
1212 assert!(metrics.recall() < 1.0);
1213 }
1214
1215 #[test]
1216 fn test_token_match_counts_wrong_tokens_as_fp_and_fn() {
1217 let expected = indoc! {"
1218 @@ -1,1 +1,1 @@
1219 -old_name
1220 +new_name
1221 "};
1222
1223 let actual = indoc! {"
1224 @@ -1,1 +1,1 @@
1225 -different_old
1226 +different_new
1227 "};
1228
1229 let metrics = token_match(expected, actual);
1230 assert_eq!(metrics.true_positives, 0);
1231 assert!(metrics.false_positives > 0);
1232 assert!(metrics.false_negatives > 0);
1233 }
1234
1235 #[test]
1236 fn test_token_match_debug_report_metrics_match_token_match() {
1237 let expected = indoc! {"
1238 @@ -1,2 +1,3 @@
1239 -str
1240 +struct LanguageEntry {
1241 +}
1242 "};
1243
1244 let actual = indoc! {"
1245 @@ -1,2 +1,4 @@
1246 -str
1247 +struct LanguageEntry {
1248 + path: PathBuf,
1249 +}
1250 "};
1251
1252 let metrics = token_match(expected, actual);
1253 let report = token_match_debug_report(expected, actual);
1254
1255 assert_eq!(report.metrics, metrics);
1256
1257 let expected_tp = report
1258 .deleted
1259 .expected_tokens
1260 .iter()
1261 .chain(report.inserted.expected_tokens.iter())
1262 .filter(|token| token.class == TokenClass::TruePositive)
1263 .count();
1264 let expected_fn = report
1265 .deleted
1266 .expected_tokens
1267 .iter()
1268 .chain(report.inserted.expected_tokens.iter())
1269 .filter(|token| token.class == TokenClass::FalseNegative)
1270 .count();
1271 let actual_tp = report
1272 .deleted
1273 .actual_tokens
1274 .iter()
1275 .chain(report.inserted.actual_tokens.iter())
1276 .filter(|token| token.class == TokenClass::TruePositive)
1277 .count();
1278 let actual_fp = report
1279 .deleted
1280 .actual_tokens
1281 .iter()
1282 .chain(report.inserted.actual_tokens.iter())
1283 .filter(|token| token.class == TokenClass::FalsePositive)
1284 .count();
1285
1286 assert_eq!(expected_tp, report.metrics.true_positives);
1287 assert_eq!(actual_tp, report.metrics.true_positives);
1288 assert_eq!(expected_fn, report.metrics.false_negatives);
1289 assert_eq!(actual_fp, report.metrics.false_positives);
1290 }
1291
1292 #[test]
1293 fn test_token_match_debug_report_marks_inserted_extra_tokens_as_fp() {
1294 let expected = indoc! {"
1295 @@ -1,1 +1,1 @@
1296 -a
1297 +value
1298 "};
1299
1300 let actual = indoc! {"
1301 @@ -1,1 +1,1 @@
1302 -a
1303 +value_extra
1304 "};
1305
1306 let report = token_match_debug_report(expected, actual);
1307
1308 assert_eq!(report.metrics.false_positives, 1);
1309 assert_eq!(report.metrics.false_negatives, 1);
1310
1311 assert!(
1312 report
1313 .inserted
1314 .actual_tokens
1315 .iter()
1316 .any(|token| token.token == "value_extra"
1317 && token.class == TokenClass::FalsePositive)
1318 );
1319 assert!(
1320 report
1321 .inserted
1322 .expected_tokens
1323 .iter()
1324 .any(|token| token.token == "value" && token.class == TokenClass::FalseNegative)
1325 );
1326 }
1327
1328 #[test]
1329 fn test_is_editable_region_correct() {
1330 let patch = indoc! {"
1331 @@ -1,1 +1,1 @@
1332 -context
1333 -removed
1334 -from the beginning of the file
1335 import sys
1336 +sys.exit(0)
1337
1338 "};
1339 assert!(!is_editable_region_correct(patch));
1340
1341 let patch = indoc! {"
1342 @@ -1,1 +1,1 @@
1343 "};
1344 assert!(is_editable_region_correct(patch));
1345 }
1346
1347 #[test]
1348 fn test_isolated_whitespace_purely_whitespace_patch() {
1349 let patch = indoc! {"
1350 @@ -1,3 +1,4 @@
1351 fn main() {
1352 +
1353 println!(\"hello\");
1354 }
1355 "};
1356 assert!(has_isolated_whitespace_changes(patch, None));
1357 }
1358
1359 #[test]
1360 fn test_isolated_whitespace_adjacent_to_real_change() {
1361 let patch = indoc! {"
1362 @@ -1,3 +1,4 @@
1363 fn main() {
1364 +
1365 + let x = 1;
1366 println!(\"hello\");
1367 }
1368 "};
1369 assert!(!has_isolated_whitespace_changes(patch, None));
1370 }
1371
1372 #[test]
1373 fn test_isolated_whitespace_no_whitespace_changes() {
1374 let patch = indoc! {"
1375 @@ -1,3 +1,3 @@
1376 fn main() {
1377 - println!(\"hello\");
1378 + println!(\"world\");
1379 }
1380 "};
1381 assert!(!has_isolated_whitespace_changes(patch, None));
1382 }
1383
1384 #[test]
1385 fn test_isolated_whitespace_deletion() {
1386 let patch = indoc! {"
1387 @@ -1,4 +1,3 @@
1388 fn main() {
1389 -
1390 println!(\"hello\");
1391 }
1392 "};
1393 assert!(has_isolated_whitespace_changes(patch, None));
1394 }
1395
1396 #[test]
1397 fn test_isolated_whitespace_mixed_groups() {
1398 let patch = indoc! {"
1399 @@ -1,7 +1,8 @@
1400 fn main() {
1401 +
1402 let x = 1;
1403 - let y = 2;
1404 + let y = 3;
1405
1406 +
1407 println!(\"hello\");
1408 }
1409 "};
1410 assert!(has_isolated_whitespace_changes(patch, None));
1411 }
1412
1413 #[test]
1414 fn test_isolated_whitespace_empty_patch() {
1415 let patch = "";
1416 assert!(!has_isolated_whitespace_changes(patch, None));
1417 }
1418
1419 #[test]
1420 fn test_isolated_whitespace_skipped_on_cursor_line() {
1421 // The addition of a blank line at new-file line 2 should be skipped
1422 // because the cursor is on that line.
1423 let patch = indoc! {"
1424 @@ -1,3 +1,4 @@
1425 fn main() {
1426 +
1427 println!(\"hello\");
1428 }
1429 "};
1430 // New-file line 2 is the added blank line
1431 let cursor = cursor_on_line(2);
1432 assert!(!has_isolated_whitespace_changes(patch, Some(&cursor)));
1433 }
1434
1435 #[test]
1436 fn test_isolated_whitespace_not_skipped_when_cursor_on_different_line() {
1437 // The blank line is at new-file line 2, but the cursor is on line 1.
1438 let patch = indoc! {"
1439 @@ -1,3 +1,4 @@
1440 fn main() {
1441 +
1442 println!(\"hello\");
1443 }
1444 "};
1445 let cursor = cursor_on_line(1);
1446 assert!(has_isolated_whitespace_changes(patch, Some(&cursor)));
1447 }
1448
1449 #[test]
1450 fn test_isolated_whitespace_deletion_not_skipped_by_cursor() {
1451 // Deletions don't have a new-file line, so cursor can't suppress them.
1452 let patch = indoc! {"
1453 @@ -1,4 +1,3 @@
1454 fn main() {
1455 -
1456 println!(\"hello\");
1457 }
1458 "};
1459 let cursor = cursor_on_line(2);
1460 assert!(has_isolated_whitespace_changes(patch, Some(&cursor)));
1461 }
1462
1463 #[test]
1464 fn test_count_patch_token_changes_real_world_rename() {
1465 // Real-world patch that was reported as returning 0 tokens
1466 let patch = "--- a/sip_call\\README.md\n+++ b/sip_call\\README.md\n@@ -1,1 +1,1 @@\n-# \n+# SIP Call\n";
1467 let counts = count_patch_token_changes(patch);
1468 // "# " vs "# SIP Call" — the "SIP" and "Call" tokens (and a whitespace token) are inserted
1469 assert!(
1470 counts.inserted_tokens > 0,
1471 "expected inserted tokens > 0, got {}",
1472 counts.inserted_tokens
1473 );
1474 assert_eq!(counts.deleted_tokens, 0);
1475 }
1476
1477 #[test]
1478 fn test_count_patch_token_changes_real_world_expansion() {
1479 // Real-world patch: single token expanded to multiple lines
1480 let patch = "--- a/task1/src/app/app.html\n+++ b/task1/src/app/app.html\n@@ -1,7 +1,9 @@\n <style>\n- m\n+ main {\n+ \n+ }\n </style>\n \n <main>\n \n </main>\n";
1481 let counts = count_patch_token_changes(patch);
1482 assert!(
1483 counts.inserted_tokens > 0,
1484 "expected inserted tokens > 0, got {}",
1485 counts.inserted_tokens
1486 );
1487 assert!(
1488 counts.deleted_tokens > 0,
1489 "expected deleted tokens > 0, got {}",
1490 counts.deleted_tokens
1491 );
1492 }
1493
1494 #[test]
1495 fn test_count_patch_token_changes_simple_replacement() {
1496 let patch = indoc! {"
1497 @@ -1,3 +1,3 @@
1498 fn main() {
1499 - println!(\"hello\");
1500 + println!(\"world\");
1501 }
1502 "};
1503 let counts = count_patch_token_changes(patch);
1504 assert_eq!(counts.deleted_tokens, 1, "deleted: \"hello\"");
1505 assert_eq!(counts.inserted_tokens, 1, "inserted: \"world\"");
1506 }
1507
1508 #[test]
1509 fn test_count_patch_token_changes_insertion_only() {
1510 let patch = indoc! {"
1511 @@ -1,2 +1,3 @@
1512 fn main() {
1513 + println!(\"hello\");
1514 }
1515 "};
1516 let counts = count_patch_token_changes(patch);
1517 assert_eq!(counts.deleted_tokens, 0);
1518 assert!(counts.inserted_tokens > 0);
1519 }
1520
1521 #[test]
1522 fn test_count_patch_token_changes_deletion_only() {
1523 let patch = indoc! {"
1524 @@ -1,3 +1,2 @@
1525 fn main() {
1526 - println!(\"hello\");
1527 }
1528 "};
1529 let counts = count_patch_token_changes(patch);
1530 assert!(counts.deleted_tokens > 0);
1531 assert_eq!(counts.inserted_tokens, 0);
1532 }
1533
1534 #[test]
1535 fn test_count_patch_token_changes_empty_patch() {
1536 let patch = "";
1537 let counts = count_patch_token_changes(patch);
1538 assert_eq!(counts.deleted_tokens, 0);
1539 assert_eq!(counts.inserted_tokens, 0);
1540 }
1541
1542 #[test]
1543 fn test_count_patch_token_changes_multiple_hunks() {
1544 let patch = indoc! {"
1545 @@ -1,3 +1,3 @@
1546 fn main() {
1547 - let x = 1;
1548 + let x = 2;
1549 }
1550 @@ -10,3 +10,3 @@
1551 fn other() {
1552 - let y = 3;
1553 + let y = 4;
1554 }
1555 "};
1556 let counts = count_patch_token_changes(patch);
1557 assert_eq!(counts.deleted_tokens, 2, "deleted: \"1\" and \"3\"");
1558 assert_eq!(counts.inserted_tokens, 2, "inserted: \"2\" and \"4\"");
1559 }
1560
1561 #[test]
1562 fn test_count_patch_token_changes_multiword_change() {
1563 let patch = indoc! {"
1564 @@ -1,1 +1,1 @@
1565 -hello world foo
1566 +hello bar baz
1567 "};
1568 let counts = count_patch_token_changes(patch);
1569 // "world" and "foo" deleted, "bar" and "baz" inserted
1570 // (whitespace tokens between them may also count)
1571 assert!(counts.deleted_tokens >= 2);
1572 assert!(counts.inserted_tokens >= 2);
1573 }
1574
1575 #[test]
1576 fn test_whitespace_collapse() {
1577 let text = "abc \n\n\n 123";
1578 let collapsed = collapse_whitespace(text.chars());
1579 assert_eq!(
1580 collapsed,
1581 vec!['a', 'b', 'c', ' ', '\n', ' ', '1', '2', '3']
1582 );
1583 }
1584}