1use std::collections::HashMap;
2
3use crate::tokenize::tokenize;
4use serde::Serialize;
5use similar::{DiffTag, TextDiff};
6
7pub type Counts = HashMap<String, usize>;
8type CountsDelta = HashMap<String, isize>;
9
10/// Context characters needed on each side of a change to capture all affected n-grams
11const CONTEXT_CHARS: usize = CHR_F_CHAR_ORDER - 1;
12
13#[derive(Default, Debug, Clone, Serialize)]
14pub struct ClassificationMetrics {
15 pub true_positives: usize,
16 pub false_positives: usize,
17 pub false_negatives: usize,
18}
19
20impl ClassificationMetrics {
21 pub fn from_counts(expected: &Counts, actual: &Counts) -> ClassificationMetrics {
22 let mut true_positives = 0;
23 let mut false_positives = 0;
24 let mut false_negatives = 0;
25
26 for (ngram, &expected_count) in expected {
27 let actual_count = *actual.get(ngram).unwrap_or(&0);
28 if actual_count > expected_count {
29 false_positives += actual_count - expected_count;
30 } else {
31 false_negatives += expected_count - actual_count;
32 }
33 true_positives += expected_count.min(actual_count);
34 }
35
36 for (ngram, &actual_count) in actual {
37 if !expected.contains_key(ngram) {
38 false_positives += actual_count;
39 }
40 }
41
42 ClassificationMetrics {
43 true_positives,
44 false_positives,
45 false_negatives,
46 }
47 }
48
49 pub fn accumulate(&mut self, other: &ClassificationMetrics) {
50 self.true_positives += other.true_positives;
51 self.false_positives += other.false_positives;
52 self.false_negatives += other.false_negatives;
53 }
54
55 pub fn precision(&self) -> f64 {
56 if self.true_positives + self.false_positives == 0 {
57 0.0
58 } else {
59 self.true_positives as f64 / (self.true_positives + self.false_positives) as f64
60 }
61 }
62
63 pub fn recall(&self) -> f64 {
64 if self.true_positives + self.false_negatives == 0 {
65 0.0
66 } else {
67 self.true_positives as f64 / (self.true_positives + self.false_negatives) as f64
68 }
69 }
70
71 pub fn f1(&self) -> f64 {
72 let precision = self.precision();
73 let recall = self.recall();
74 if precision + recall == 0.0 {
75 0.0
76 } else {
77 2.0 * precision * recall / (precision + recall)
78 }
79 }
80}
81
82enum ChrfWhitespace {
83 /// Preserve whitespace as-is
84 #[allow(unused)]
85 Unchanged,
86
87 /// Ignore all whitespace differences
88 #[allow(unused)]
89 Ignore,
90
91 /// Collapse whitespace into single spaces
92 Collapse,
93}
94
95const CHR_F_CHAR_ORDER: usize = 6;
96const CHR_F_BETA: f64 = 0.5;
97const CHR_F_WHITESPACE: ChrfWhitespace = ChrfWhitespace::Collapse;
98
99pub fn delta_chr_f_beta() -> f64 {
100 CHR_F_BETA
101}
102
103#[derive(Default, Debug, Clone, Serialize)]
104pub struct DeltaChrFMetrics {
105 pub score: f64,
106 pub beta: f64,
107 pub counts: ClassificationMetrics,
108 pub precision: f64,
109 pub recall: f64,
110}
111
112/// Computes delta-chrF metrics that compare two sets of edits.
113///
114/// This metric works by:
115/// 1. Computing n-gram count differences (deltas) between original→expected and original→actual
116/// 2. Comparing these deltas to measure how well actual edits match expected edits
117///
118/// Returns a score from 0.0 to 100.0, where 100.0 means the actual edits perfectly match
119/// the expected edits.
120pub fn delta_chr_f(original: &str, expected: &str, actual: &str) -> DeltaChrFMetrics {
121 if original == expected && expected == actual {
122 return DeltaChrFMetrics {
123 score: 100.0,
124 beta: CHR_F_BETA,
125 precision: 1.0,
126 recall: 1.0,
127 ..DeltaChrFMetrics::default()
128 };
129 }
130
131 let orig_chars: Vec<char> = filter_whitespace_chars(original);
132 let exp_chars: Vec<char> = filter_whitespace_chars(expected);
133 let act_chars: Vec<char> = filter_whitespace_chars(actual);
134
135 // Find the changed regions between original→expected and original→actual
136 // We only need to compute n-grams on these regions (plus context for boundary n-grams)
137 let (orig_for_exp, exp_region) = extract_changed_regions(&orig_chars, &exp_chars);
138 let (orig_for_act, act_region) = extract_changed_regions(&orig_chars, &act_chars);
139
140 let mut total_precision = 0.0;
141 let mut total_recall = 0.0;
142 let mut total_counts = ClassificationMetrics::default();
143
144 for order in 1..=CHR_F_CHAR_ORDER {
145 let orig_ngrams_for_exp = count_ngrams_from_chars(&orig_for_exp, order);
146 let exp_ngrams = count_ngrams_from_chars(&exp_region, order);
147 let expected_delta = compute_ngram_delta(&exp_ngrams, &orig_ngrams_for_exp);
148
149 let orig_ngrams_for_act = count_ngrams_from_chars(&orig_for_act, order);
150 let act_ngrams = count_ngrams_from_chars(&act_region, order);
151 let actual_delta = compute_ngram_delta(&act_ngrams, &orig_ngrams_for_act);
152
153 if expected_delta.is_empty() && actual_delta.is_empty() {
154 total_precision += 1.0;
155 total_recall += 1.0;
156 continue;
157 }
158
159 let expected_counts = ngram_delta_to_counts(&expected_delta);
160 let actual_counts = ngram_delta_to_counts(&actual_delta);
161
162 let counts = ClassificationMetrics::from_counts(&expected_counts, &actual_counts);
163 total_precision += counts.precision();
164 total_recall += counts.recall();
165 total_counts.accumulate(&counts);
166 }
167
168 let average_precision = total_precision / CHR_F_CHAR_ORDER as f64;
169 let average_recall = total_recall / CHR_F_CHAR_ORDER as f64;
170 let score = if average_precision + average_recall == 0.0 {
171 0.0
172 } else {
173 (1.0 + CHR_F_BETA * CHR_F_BETA) * average_precision * average_recall
174 / (CHR_F_BETA * CHR_F_BETA * average_precision + average_recall)
175 * 100.0
176 };
177
178 DeltaChrFMetrics {
179 score,
180 beta: CHR_F_BETA,
181 counts: total_counts,
182 precision: average_precision,
183 recall: average_recall,
184 }
185}
186
187/// Reference implementation of delta-chrF metrics (original, non-optimized version).
188/// Used for testing that the optimized version produces identical results.
189#[cfg(test)]
190fn delta_chr_f_reference(original: &str, expected: &str, actual: &str) -> DeltaChrFMetrics {
191 if original == expected && expected == actual {
192 return DeltaChrFMetrics {
193 score: 100.0,
194 beta: CHR_F_BETA,
195 precision: 1.0,
196 recall: 1.0,
197 ..DeltaChrFMetrics::default()
198 };
199 }
200
201 let original_ngrams = chr_f_ngram_counts(original);
202 let expected_ngrams = chr_f_ngram_counts(expected);
203 let actual_ngrams = chr_f_ngram_counts(actual);
204
205 let mut total_precision = 0.0;
206 let mut total_recall = 0.0;
207 let mut total_counts = ClassificationMetrics::default();
208
209 for order in 0..CHR_F_CHAR_ORDER {
210 let expected_delta = compute_ngram_delta(&expected_ngrams[order], &original_ngrams[order]);
211 let actual_delta = compute_ngram_delta(&actual_ngrams[order], &original_ngrams[order]);
212
213 if expected_delta.is_empty() && actual_delta.is_empty() {
214 total_precision += 1.0;
215 total_recall += 1.0;
216 continue;
217 }
218
219 let expected_counts = ngram_delta_to_counts(&expected_delta);
220 let actual_counts = ngram_delta_to_counts(&actual_delta);
221
222 let counts = ClassificationMetrics::from_counts(&expected_counts, &actual_counts);
223 total_precision += counts.precision();
224 total_recall += counts.recall();
225 total_counts.accumulate(&counts);
226 }
227
228 let average_precision = total_precision / CHR_F_CHAR_ORDER as f64;
229 let average_recall = total_recall / CHR_F_CHAR_ORDER as f64;
230 let score = if average_precision + average_recall == 0.0 {
231 0.0
232 } else {
233 (1.0 + CHR_F_BETA * CHR_F_BETA) * average_precision * average_recall
234 / (CHR_F_BETA * CHR_F_BETA * average_precision + average_recall)
235 * 100.0
236 };
237
238 DeltaChrFMetrics {
239 score,
240 beta: CHR_F_BETA,
241 counts: total_counts,
242 precision: average_precision,
243 recall: average_recall,
244 }
245}
246
247/// Filter whitespace from a string and return as Vec<char>
248fn filter_whitespace_chars(text: &str) -> Vec<char> {
249 match CHR_F_WHITESPACE {
250 ChrfWhitespace::Unchanged => text.chars().collect(),
251 ChrfWhitespace::Ignore => text.chars().filter(|c| !c.is_whitespace()).collect(),
252 ChrfWhitespace::Collapse => collapse_whitespace(text.chars()),
253 }
254}
255
256/// Collapse whitespace into single spaces.
257/// Newlines and spaces are collapsed separately.
258fn collapse_whitespace(chars: impl Iterator<Item = char>) -> Vec<char> {
259 let mut result = Vec::new();
260 let mut last_whitespace = None;
261 for c in chars {
262 if c.is_whitespace() && c != '\n' {
263 if last_whitespace != Some(' ') {
264 result.push(' ');
265 last_whitespace = Some(' ');
266 }
267 } else if c == '\n' {
268 if last_whitespace != Some('\n') {
269 result.push(c);
270 last_whitespace = Some('\n');
271 }
272 } else {
273 result.push(c);
274 last_whitespace = None;
275 }
276 }
277 result
278}
279
280/// Extract only the changed regions between two texts, with context for n-gram boundaries.
281///
282/// Returns (original_affected_region, modified_affected_region) as Vec<char>.
283///
284/// The key insight: when computing n-gram delta between two nearly-identical texts,
285/// n-grams from unchanged regions cancel out. We only need to process:
286/// 1. The changed content itself
287/// 2. CONTEXT_CHARS (n-1) characters before and after, to capture boundary-crossing n-grams
288fn extract_changed_regions(original: &[char], modified: &[char]) -> (Vec<char>, Vec<char>) {
289 // Find longest common prefix
290 let prefix_len = original
291 .iter()
292 .zip(modified.iter())
293 .take_while(|(a, b)| a == b)
294 .count();
295
296 // Find longest common suffix (that doesn't overlap with prefix)
297 let orig_remaining = original.len().saturating_sub(prefix_len);
298 let mod_remaining = modified.len().saturating_sub(prefix_len);
299 let max_suffix = orig_remaining.min(mod_remaining);
300
301 let suffix_len = original
302 .iter()
303 .rev()
304 .zip(modified.iter().rev())
305 .take(max_suffix)
306 .take_while(|(a, b)| a == b)
307 .count();
308
309 // Calculate the changed region boundaries
310 let orig_change_start = prefix_len;
311 let orig_change_end = original.len().saturating_sub(suffix_len);
312 let mod_change_start = prefix_len;
313 let mod_change_end = modified.len().saturating_sub(suffix_len);
314
315 // If there's no actual change, return empty regions
316 if orig_change_start >= orig_change_end && mod_change_start >= mod_change_end {
317 return (Vec::new(), Vec::new());
318 }
319
320 // Expand to include context for n-gram boundaries
321 let orig_context_start = orig_change_start.saturating_sub(CONTEXT_CHARS);
322 let orig_context_end = (orig_change_end + CONTEXT_CHARS).min(original.len());
323 let mod_context_start = mod_change_start.saturating_sub(CONTEXT_CHARS);
324 let mod_context_end = (mod_change_end + CONTEXT_CHARS).min(modified.len());
325
326 let orig_region: Vec<char> = original[orig_context_start..orig_context_end].to_vec();
327 let mod_region: Vec<char> = modified[mod_context_start..mod_context_end].to_vec();
328
329 (orig_region, mod_region)
330}
331
332/// Count n-grams directly from a char slice (avoids String allocation for the full text)
333fn count_ngrams_from_chars(chars: &[char], n: usize) -> Counts {
334 let mut counts = Counts::default();
335
336 if chars.len() < n {
337 return counts;
338 }
339
340 for window in chars.windows(n) {
341 let ngram: String = window.iter().collect();
342 *counts.entry(ngram).or_insert(0) += 1;
343 }
344
345 counts
346}
347
348#[allow(dead_code)]
349fn chr_f_ngram_counts(text: &str) -> Vec<Counts> {
350 let text = match CHR_F_WHITESPACE {
351 ChrfWhitespace::Unchanged => text.to_string(),
352 ChrfWhitespace::Ignore => text
353 .chars()
354 .filter(|c| !c.is_whitespace())
355 .collect::<String>(),
356 ChrfWhitespace::Collapse => collapse_whitespace(text.chars())
357 .into_iter()
358 .collect::<String>(),
359 };
360
361 (1..=CHR_F_CHAR_ORDER)
362 .map(|order| count_ngrams(&text, order))
363 .collect()
364}
365
366fn compute_ngram_delta(after: &Counts, before: &Counts) -> CountsDelta {
367 let mut delta = CountsDelta::default();
368
369 for (ngram, &before_count) in before {
370 let after_count = *after.get(ngram).unwrap_or(&0);
371 delta.insert(ngram.clone(), after_count as isize - before_count as isize);
372 }
373
374 for (ngram, &after_count) in after {
375 if !before.contains_key(ngram) {
376 delta.insert(ngram.clone(), after_count as isize);
377 }
378 }
379
380 delta
381}
382
383/// Convert negative counts to special deletion tokens.
384/// For example, if expected delta is {"foo": -1} and actual delta is {"bar": -1},
385/// we convert it to {"¬foo": +1} and {"¬bar": +1}. This way _not_ deleting "foo"
386/// will result in a false negative, and mistakenly deleting "bar" will result in a false positive.
387fn ngram_delta_to_counts(delta: &CountsDelta) -> Counts {
388 let mut counts = Counts::default();
389
390 for (ngram, &delta) in delta {
391 if delta > 0 {
392 counts.insert(ngram.clone(), delta as usize);
393 } else if delta < 0 {
394 counts.insert(format!("¬{ngram}"), delta.unsigned_abs());
395 }
396 }
397
398 counts
399}
400
401#[allow(dead_code)]
402fn count_ngrams(text: &str, n: usize) -> Counts {
403 let chars: Vec<char> = text.chars().collect();
404 let mut counts = Counts::default();
405
406 for window in chars.windows(n) {
407 let ngram: String = window.iter().collect();
408 *counts.entry(ngram).or_insert(0) += 1;
409 }
410
411 counts
412}
413
414pub fn braces_disbalance(text: &str) -> usize {
415 let mut disbalance = 0isize;
416
417 let a = text.chars().filter(|&c| c == '{').count() as isize;
418 let b = text.chars().filter(|&c| c == '}').count() as isize;
419 disbalance += (a - b).abs();
420
421 let a = text.chars().filter(|&c| c == '(').count() as isize;
422 let b = text.chars().filter(|&c| c == ')').count() as isize;
423 disbalance += (a - b).abs();
424
425 let a = text.chars().filter(|&c| c == '[').count() as isize;
426 let b = text.chars().filter(|&c| c == ']').count() as isize;
427 disbalance += (a - b).abs();
428
429 disbalance as usize
430}
431
432/// Extracts changed lines from a unified diff string.
433/// Returns a bag (multiset) of lines that were added (+) or removed (-).
434/// The +/- prefix is included in the line to distinguish additions from deletions.
435pub fn extract_changed_lines_from_diff(diff: &str) -> Counts {
436 let mut counts = Counts::default();
437
438 for line in diff.lines() {
439 // Skip file headers (--- and +++)
440 if line.starts_with("---") || line.starts_with("+++") {
441 continue;
442 }
443 // Skip hunk headers (@@)
444 if line.starts_with("@@") {
445 continue;
446 }
447 // Skip diff header lines (diff --git, index, etc.)
448 if line.starts_with("diff ") || line.starts_with("index ") {
449 continue;
450 }
451 // Include added and removed lines (with their prefix)
452 if line.starts_with('+') || line.starts_with('-') {
453 *counts.entry(line.to_string()).or_insert(0) += 1;
454 }
455 }
456
457 counts
458}
459
460/// Computes exact lines match metrics between expected and actual patches.
461/// Treats changed lines as a bag (multiset) - order is discarded but count matters.
462/// Returns ClassificationMetrics with TP/FP/FN counts.
463pub fn exact_lines_match(expected_patch: &str, actual_patch: &str) -> ClassificationMetrics {
464 let expected_lines = extract_changed_lines_from_diff(expected_patch);
465 let actual_lines = extract_changed_lines_from_diff(actual_patch);
466 ClassificationMetrics::from_counts(&expected_lines, &actual_lines)
467}
468
469/// Returns whether the patch contains any isolated whitespace-only changes.
470///
471/// A whitespace-only change is an added or deleted line whose content is empty or
472/// contains only whitespace. It is "isolated" when it is not adjacent to any
473/// substantive (non-whitespace) change within the same contiguous change group.
474pub fn has_isolated_whitespace_changes(patch_str: &str, cursor_row: Option<u32>) -> bool {
475 let patch = Patch::parse_unified_diff(patch_str);
476
477 let cursor_new_file_line = cursor_row.map(|row| (row + 1) as usize);
478
479 for hunk in &patch.hunks {
480 let lines = &hunk.lines;
481 let mut new_text_line = hunk.new_start as usize;
482
483 for (i, line) in lines.iter().enumerate() {
484 let content = match line {
485 PatchLine::Addition(s) => {
486 let addition_line = new_text_line;
487 new_text_line += 1;
488 if s.trim().is_empty() && cursor_new_file_line == Some(addition_line) {
489 continue;
490 }
491 s.as_str()
492 }
493 PatchLine::Deletion(s) => s.as_str(),
494 PatchLine::Context(_) => {
495 new_text_line += 1;
496 continue;
497 }
498 _ => continue,
499 };
500
501 if !content.trim().is_empty() {
502 continue;
503 }
504
505 if is_whitespace_change_isolated(lines, i) {
506 return true;
507 }
508 }
509 }
510
511 false
512}
513
514fn is_whitespace_change_isolated(lines: &[PatchLine], index: usize) -> bool {
515 // Look backward for a non-whitespace change before hitting a context line
516 for line in lines[..index].iter().rev() {
517 match line {
518 PatchLine::Addition(s) | PatchLine::Deletion(s) => {
519 if !s.trim().is_empty() {
520 return false;
521 }
522 }
523 _ => break,
524 }
525 }
526
527 // Look forward for a non-whitespace change before hitting a context line
528 for line in &lines[index + 1..] {
529 match line {
530 PatchLine::Addition(s) | PatchLine::Deletion(s) => {
531 if !s.trim().is_empty() {
532 return false;
533 }
534 }
535 _ => break,
536 }
537 }
538
539 true
540}
541
542/// A simple proxy for whether the prediction respects editable region.
543pub fn is_editable_region_correct(actual_patch: &str) -> bool {
544 // A typical sign of a wrong editable region: a bunch of lines deletion
545 // at the beginning or end of the patch.
546 let patch = Patch::parse_unified_diff(actual_patch);
547 if patch.hunks.is_empty() {
548 return true;
549 }
550
551 let hunk = &patch.hunks[0];
552 let mut deletions_at_start = 0;
553
554 for line in hunk.lines.iter() {
555 match line {
556 PatchLine::Deletion(_) => deletions_at_start += 1,
557 _ => break,
558 }
559 }
560
561 if deletions_at_start >= 3 {
562 return false;
563 }
564
565 true
566}
567
568#[derive(Debug, Default, Clone, Serialize)]
569pub struct TokenChangeCounts {
570 pub inserted_tokens: usize,
571 pub deleted_tokens: usize,
572}
573
574/// Counts the number of inserted and deleted tokens in a unified diff patch.
575///
576/// Tokens are words and whitespace sequences (as defined by `word_diff::tokenize`).
577/// Within each hunk, the old (`-`) and new (`+`) lines are compared at the token level
578/// using an LCS-based diff, so modified lines only count the actually changed tokens
579/// rather than the entire line.
580pub fn count_patch_token_changes(patch: &str) -> TokenChangeCounts {
581 let mut counts = TokenChangeCounts::default();
582 let mut old_lines: Vec<&str> = Vec::new();
583 let mut new_lines: Vec<&str> = Vec::new();
584
585 let flush =
586 |old_lines: &mut Vec<&str>, new_lines: &mut Vec<&str>, counts: &mut TokenChangeCounts| {
587 if old_lines.is_empty() && new_lines.is_empty() {
588 return;
589 }
590
591 let old_text: String = old_lines
592 .iter()
593 .map(|line| if line.len() > 1 { &line[1..] } else { "" })
594 .collect::<Vec<_>>()
595 .join("\n");
596
597 let new_text: String = new_lines
598 .iter()
599 .map(|line| if line.len() > 1 { &line[1..] } else { "" })
600 .collect::<Vec<_>>()
601 .join("\n");
602
603 let old_tokens = tokenize(&old_text);
604 let new_tokens = tokenize(&new_text);
605 let ops = diff_tokens(&old_tokens, &new_tokens);
606
607 for op in ops {
608 match op {
609 DiffOp::Equal(..) => {}
610 DiffOp::Delete(start, end) => {
611 counts.deleted_tokens += end - start;
612 }
613 DiffOp::Insert(start, end) => {
614 counts.inserted_tokens += end - start;
615 }
616 DiffOp::Replace {
617 old_start,
618 old_end,
619 new_start,
620 new_end,
621 } => {
622 counts.deleted_tokens += old_end - old_start;
623 counts.inserted_tokens += new_end - new_start;
624 }
625 }
626 }
627
628 old_lines.clear();
629 new_lines.clear();
630 };
631
632 for line in patch.lines() {
633 if line.starts_with("---")
634 || line.starts_with("+++")
635 || line.starts_with("@@")
636 || line.starts_with("diff ")
637 || line.starts_with("index ")
638 {
639 flush(&mut old_lines, &mut new_lines, &mut counts);
640 } else if line.starts_with('-') {
641 old_lines.push(line);
642 } else if line.starts_with('+') {
643 new_lines.push(line);
644 } else {
645 flush(&mut old_lines, &mut new_lines, &mut counts);
646 }
647 }
648
649 flush(&mut old_lines, &mut new_lines, &mut counts);
650 counts
651}
652
653#[allow(dead_code)]
654#[derive(Debug)]
655enum DiffOp {
656 Equal(usize, usize),
657 Delete(usize, usize),
658 Insert(usize, usize),
659 Replace {
660 old_start: usize,
661 old_end: usize,
662 new_start: usize,
663 new_end: usize,
664 },
665}
666
667fn diff_tokens<'a>(old: &[&'a str], new: &[&'a str]) -> Vec<DiffOp> {
668 let diff = TextDiff::from_slices(old, new);
669 diff.ops()
670 .iter()
671 .map(|op| {
672 let tag = op.tag();
673 let old_range = op.old_range();
674 let new_range = op.new_range();
675 match tag {
676 DiffTag::Equal => DiffOp::Equal(old_range.start, old_range.end),
677 DiffTag::Delete => DiffOp::Delete(old_range.start, old_range.end),
678 DiffTag::Insert => DiffOp::Insert(new_range.start, new_range.end),
679 DiffTag::Replace => DiffOp::Replace {
680 old_start: old_range.start,
681 old_end: old_range.end,
682 new_start: new_range.start,
683 new_end: new_range.end,
684 },
685 }
686 })
687 .collect()
688}
689
690/// Reconstruct old and new text from a unified diff.
691///
692/// Context and deletion lines form the old text; context and addition
693/// lines form the new text. Returns `(old_text, new_text)`.
694pub fn reconstruct_texts_from_diff(patch_str: &str) -> (String, String) {
695 let patch = Patch::parse_unified_diff(patch_str);
696 let mut old_lines: Vec<&str> = Vec::new();
697 let mut new_lines: Vec<&str> = Vec::new();
698
699 for hunk in &patch.hunks {
700 for line in &hunk.lines {
701 match line {
702 PatchLine::Context(content) => {
703 old_lines.push(content);
704 new_lines.push(content);
705 }
706 PatchLine::Deletion(content) => {
707 old_lines.push(content);
708 }
709 PatchLine::Addition(content) => {
710 new_lines.push(content);
711 }
712 PatchLine::Garbage(_) => {}
713 }
714 }
715 }
716
717 (old_lines.join("\n"), new_lines.join("\n"))
718}
719#[derive(Debug, Default, Clone)]
720struct Patch {
721 hunks: Vec<Hunk>,
722}
723
724impl Patch {
725 fn parse_unified_diff(unified_diff: &str) -> Patch {
726 let mut current_file = String::new();
727 let mut is_filename_inherited = false;
728 let mut hunk = Hunk::default();
729 let mut patch = Patch::default();
730 let mut in_header = true;
731
732 for line in unified_diff.lines() {
733 if line.starts_with("--- ") || line.starts_with("+++") || line.starts_with("@@") {
734 in_header = false;
735 }
736
737 if in_header {
738 continue;
739 }
740
741 if line.starts_with("@@") {
742 if !hunk.lines.is_empty() {
743 patch.hunks.push(hunk);
744 }
745 hunk = Hunk::from_header(line, ¤t_file, is_filename_inherited);
746 is_filename_inherited = true;
747 } else if let Some(path) = line.strip_prefix("--- ") {
748 is_filename_inherited = false;
749 let path = path.trim().strip_prefix("a/").unwrap_or(path);
750 if path != "/dev/null" {
751 current_file = path.into();
752 }
753 } else if let Some(path) = line.strip_prefix("+++ ") {
754 is_filename_inherited = false;
755 let path = path.trim().strip_prefix("b/").unwrap_or(path);
756 if path != "/dev/null" {
757 current_file = path.into();
758 }
759 } else if let Some(line) = line.strip_prefix('+') {
760 hunk.lines.push(PatchLine::Addition(line.to_string()));
761 } else if let Some(line) = line.strip_prefix('-') {
762 hunk.lines.push(PatchLine::Deletion(line.to_string()));
763 } else if let Some(line) = line.strip_prefix(' ') {
764 hunk.lines.push(PatchLine::Context(line.to_string()));
765 } else {
766 hunk.lines.push(PatchLine::Garbage(line.to_string()));
767 }
768 }
769
770 if !hunk.lines.is_empty() {
771 patch.hunks.push(hunk);
772 }
773
774 patch
775 }
776}
777
778#[derive(Debug, Default, Clone)]
779struct Hunk {
780 new_start: isize,
781 lines: Vec<PatchLine>,
782}
783
784impl Hunk {
785 fn from_header(header: &str, _filename: &str, _is_filename_inherited: bool) -> Self {
786 let (_, _, new_start, _, _) = Self::parse_hunk_header(header);
787 Self {
788 new_start,
789 lines: Vec::new(),
790 }
791 }
792
793 fn parse_hunk_header(line: &str) -> (isize, isize, isize, isize, String) {
794 let header_part = line.trim_start_matches("@@").trim();
795 let parts: Vec<&str> = header_part.split_whitespace().collect();
796
797 if parts.len() < 2 {
798 return (0, 0, 0, 0, String::new());
799 }
800
801 let old_part = parts[0].trim_start_matches('-');
802 let new_part = parts[1].trim_start_matches('+');
803
804 let (old_start, old_count) = Hunk::parse_hunk_header_range(old_part);
805 let (new_start, new_count) = Hunk::parse_hunk_header_range(new_part);
806
807 let comment = if parts.len() > 2 {
808 parts[2..]
809 .join(" ")
810 .trim_start_matches("@@")
811 .trim()
812 .to_string()
813 } else {
814 String::new()
815 };
816
817 (
818 old_start as isize,
819 old_count as isize,
820 new_start as isize,
821 new_count as isize,
822 comment,
823 )
824 }
825
826 fn parse_hunk_header_range(part: &str) -> (usize, usize) {
827 if let Some((start, count)) = part.split_once(',') {
828 (start.parse().unwrap_or(0), count.parse().unwrap_or(0))
829 } else {
830 (part.parse().unwrap_or(0), 1)
831 }
832 }
833}
834
835#[derive(Clone, Debug, Eq, PartialEq)]
836enum PatchLine {
837 Context(String),
838 Addition(String),
839 Deletion(String),
840 Garbage(String),
841}
842
843#[cfg(test)]
844mod test_optimization {
845 use super::*;
846
847 #[test]
848 fn test_extract_changed_regions_simple() {
849 let original: Vec<char> = "hello world".chars().collect();
850 let modified: Vec<char> = "hello there".chars().collect();
851
852 let (orig_region, mod_region) = extract_changed_regions(&original, &modified);
853
854 // "world" vs "there" - with 5 chars context, we get "ello world" vs "ello there"
855 // (or less if not enough chars available)
856 assert!(orig_region.len() < original.len());
857 assert!(mod_region.len() < modified.len());
858 }
859
860 #[test]
861 fn test_extract_changed_regions_insertion() {
862 let original: Vec<char> = "abcdef".chars().collect();
863 let modified: Vec<char> = "abcXYZdef".chars().collect();
864
865 let (orig_region, mod_region) = extract_changed_regions(&original, &modified);
866
867 // The insertion is between c and d, so we need context around that point
868 assert!(orig_region.len() <= original.len());
869 assert!(mod_region.iter().collect::<String>().contains("XYZ"));
870 }
871
872 #[test]
873 fn test_extract_changed_regions_identical() {
874 let text: Vec<char> = "identical text".chars().collect();
875
876 let (orig_region, mod_region) = extract_changed_regions(&text, &text);
877
878 // When texts are identical, regions should be empty
879 assert!(orig_region.is_empty());
880 assert!(mod_region.is_empty());
881 }
882
883 #[test]
884 fn test_optimized_matches_original_score() {
885 // Test that our optimized version produces the same results
886 let test_cases = vec![
887 ("hello world", "hello there", "hello world"),
888 (
889 "fn main() {}",
890 "fn main() { println!(); }",
891 "fn main() { print!(); }",
892 ),
893 ("abcdefghij", "abcXXXghij", "abcYYghij"),
894 ("unchanged", "unchanged", "unchanged"),
895 (
896 "prefix middle suffix",
897 "prefix CHANGED suffix",
898 "prefix middle suffix",
899 ),
900 ];
901
902 for (original, expected, actual) in test_cases {
903 let score = delta_chr_f(original, expected, actual).score;
904 // Just verify it produces a reasonable score (0-100)
905 assert!(
906 score >= 0.0 && score <= 100.0,
907 "Score {} out of range for ({}, {}, {})",
908 score,
909 original,
910 expected,
911 actual
912 );
913 }
914 }
915
916 #[test]
917 fn test_optimized_equals_reference() {
918 // Comprehensive test that optimized version matches reference implementation exactly
919 let test_cases = vec![
920 // Basic cases
921 ("hello world", "hello there", "hello world"),
922 ("hello world", "hello there", "hello there"),
923 ("unchanged", "unchanged", "unchanged"),
924 // Code-like cases
925 (
926 "fn main() { println!(\"Hello\"); }",
927 "fn main() { println!(\"Hello, World!\"); }",
928 "fn main() { println!(\"Hello, World!\"); }",
929 ),
930 (
931 "fn main() { println!(\"Hello\"); }",
932 "fn main() { println!(\"Hello, World!\"); }",
933 "fn main() { println!(\"Goodbye\"); }",
934 ),
935 // Insertion
936 ("abcdef", "abcXYZdef", "abcdef"),
937 ("abcdef", "abcXYZdef", "abcXYZdef"),
938 ("abcdef", "abcXYZdef", "abcABCdef"),
939 // Deletion
940 ("abcXYZdef", "abcdef", "abcXYZdef"),
941 ("abcXYZdef", "abcdef", "abcdef"),
942 // Multiple changes (simulated by different expected/actual)
943 ("one two three four", "one THREE four", "one two FOUR"),
944 // Edge cases
945 ("a", "b", "c"),
946 ("", "abc", ""),
947 ("abc", "", "abc"),
948 // Longer text with small change
949 (
950 "This is a longer piece of text that contains many words and characters to process",
951 "This is a longer piece of TEXT that contains many words and characters to process",
952 "This is a longer piece of text that contains many words and characters to process",
953 ),
954 // Change at the beginning
955 (
956 "ORIGINAL start of text",
957 "NEW start of text",
958 "DIFFERENT start of text",
959 ),
960 // Change at the end
961 (
962 "text ending ORIGINAL",
963 "text ending NEW",
964 "text ending DIFFERENT",
965 ),
966 // Whitespace (should be ignored)
967 ("hello world", "hello there", "hello world"),
968 ("a b c d", "a X c d", "a Y c d"),
969 ];
970
971 for (original, expected, actual) in test_cases {
972 let optimized_metrics = delta_chr_f(original, expected, actual);
973 let reference_metrics = delta_chr_f_reference(original, expected, actual);
974
975 assert!(
976 (optimized_metrics.score - reference_metrics.score).abs() < 1e-10,
977 "Score mismatch for ({:?}, {:?}, {:?}):\n optimized: {}\n reference: {}",
978 original,
979 expected,
980 actual,
981 optimized_metrics.score,
982 reference_metrics.score
983 );
984 assert_eq!(
985 optimized_metrics.counts.true_positives,
986 reference_metrics.counts.true_positives
987 );
988 assert_eq!(
989 optimized_metrics.counts.false_positives,
990 reference_metrics.counts.false_positives
991 );
992 assert_eq!(
993 optimized_metrics.counts.false_negatives,
994 reference_metrics.counts.false_negatives
995 );
996 assert!((optimized_metrics.precision - reference_metrics.precision).abs() < 1e-10);
997 assert!((optimized_metrics.recall - reference_metrics.recall).abs() < 1e-10);
998 }
999 }
1000
1001 #[test]
1002 fn test_delta_chr_f_metrics_include_counts_and_rates() {
1003 let original = "one two three";
1004 let expected = "one three";
1005 let actual = "one two four";
1006
1007 let metrics = delta_chr_f(original, expected, actual);
1008
1009 assert!(metrics.score > 20.0 && metrics.score < 40.0);
1010 assert!(metrics.counts.true_positives > 0);
1011 assert!(metrics.counts.false_positives > 0);
1012 assert!(metrics.counts.false_negatives > 0);
1013 assert!(metrics.precision > 0.0 && metrics.precision < 1.0);
1014 assert!(metrics.recall > 0.0 && metrics.recall < 1.0);
1015 assert_eq!(metrics.beta, CHR_F_BETA);
1016 }
1017}
1018
1019#[cfg(test)]
1020mod test {
1021 use super::*;
1022 use indoc::indoc;
1023
1024 fn cursor_on_line(one_based_line: u32) -> u32 {
1025 one_based_line - 1
1026 }
1027
1028 #[test]
1029 fn test_delta_chr_f_perfect_match() {
1030 let original = "fn main() { println!(\"Hello\");}";
1031 let expected = "fn main() { println!(\"Hello, World!\");}";
1032
1033 let score = delta_chr_f(original, expected, expected).score;
1034 assert!((score - 100.0).abs() < 1e-2);
1035 }
1036
1037 #[test]
1038 fn test_delta_chr_f_wrong_edit() {
1039 // When the edit is wrong
1040 let original = "one two three";
1041 let expected = "one three"; // deleted "two "
1042 let actual = "one two four"; // deleted "three", added "four"
1043
1044 // Then the score should be low
1045 let score = delta_chr_f(original, expected, actual).score;
1046 assert!(score > 20.0 && score < 40.0);
1047 }
1048
1049 #[test]
1050 fn test_delta_chr_f_partial_match() {
1051 let original = "let x = 42;";
1052 let expected = "let x = 100;";
1053 let actual = "let x = 99;";
1054
1055 // We got the edit location right, but the replacement text is wrong.
1056 // Deleted ngrams will match, bringing the score somewhere in the middle.
1057 let score = delta_chr_f(original, expected, actual).score;
1058 assert!(score > 40.0 && score < 60.0);
1059 }
1060
1061 #[test]
1062 fn test_delta_chr_f_missed_edit() {
1063 // When predictions makes no changes
1064 let original = "prefix old suffix";
1065 let expected = "prefix new suffix";
1066 let actual = "prefix old suffix"; // no change
1067
1068 // Then the score should be low (all expected changes are false negatives)
1069 let score = delta_chr_f(original, expected, actual).score;
1070 assert!(score < 20.0);
1071 }
1072
1073 #[test]
1074 fn test_delta_chr_f_extra_edit() {
1075 // When adding unexpected content
1076 let original = "helloworld";
1077 let expected = "helloworld"; // no change expected
1078 let actual = "helloextraworld"; // added "extra"
1079
1080 // Then the score should be low (all actual changes are false positives)
1081 let score = delta_chr_f(original, expected, actual).score;
1082 assert!(score < 20.0);
1083 }
1084
1085 #[test]
1086 fn test_delta_chr_f_no_changes() {
1087 let text = "unchanged text";
1088 let score = delta_chr_f(text, text, text).score;
1089 assert!((score - 100.0).abs() < 1e-2);
1090 }
1091
1092 #[test]
1093 fn test_braces_disbalance() {
1094 let text = "let x = { 1 + 2 };";
1095 assert_eq!(braces_disbalance(text), 0);
1096
1097 let text = "let x = { 1 + 2";
1098 assert_eq!(braces_disbalance(text), 1);
1099
1100 let text = "let x = { 1 + 2 )";
1101 assert_eq!(braces_disbalance(text), 2);
1102 }
1103
1104 #[test]
1105 fn test_extract_changed_lines_from_diff() {
1106 let diff = r#"--- a/file.rs
1107+++ b/file.rs
1108@@ -1,3 +1,3 @@
1109 fn main() {
1110- println!("hello");
1111+ println!("world");
1112 }"#;
1113
1114 let counts = extract_changed_lines_from_diff(diff);
1115 assert_eq!(counts.get("- println!(\"hello\");"), Some(&1));
1116 assert_eq!(counts.get("+ println!(\"world\");"), Some(&1));
1117 assert_eq!(counts.len(), 2);
1118 }
1119
1120 #[test]
1121 fn test_extract_changed_lines_skips_headers() {
1122 let diff = r#"diff --git a/file.rs b/file.rs
1123index abc123..def456 100644
1124--- a/file.rs
1125+++ b/file.rs
1126@@ -1,2 +1,2 @@
1127-old line
1128+new line"#;
1129
1130 let counts = extract_changed_lines_from_diff(diff);
1131 assert_eq!(counts.get("-old line"), Some(&1));
1132 assert_eq!(counts.get("+new line"), Some(&1));
1133 assert_eq!(counts.len(), 2);
1134 }
1135
1136 #[test]
1137 fn test_exact_lines_match_perfect() {
1138 let expected = r#"--- a/file.rs
1139+++ b/file.rs
1140@@ -1,3 +1,3 @@
1141-old line 1
1142-old line 2
1143+new line 1
1144+new line 2"#;
1145
1146 let actual = r#"--- a/file.rs
1147+++ b/file.rs
1148@@ -1,3 +1,3 @@
1149-old line 1
1150-old line 2
1151+new line 1
1152+new line 2"#;
1153
1154 let metrics = exact_lines_match(expected, actual);
1155 assert_eq!(metrics.true_positives, 4);
1156 assert_eq!(metrics.false_positives, 0);
1157 assert_eq!(metrics.false_negatives, 0);
1158 assert!((metrics.precision() - 1.0).abs() < 1e-6);
1159 assert!((metrics.recall() - 1.0).abs() < 1e-6);
1160 assert!((metrics.f1() - 1.0).abs() < 1e-6);
1161 }
1162
1163 #[test]
1164 fn test_exact_lines_match_partial() {
1165 let expected = r#"-old line 1
1166-old line 2
1167+new line 1
1168+new line 2"#;
1169
1170 let actual = r#"-old line 1
1171+new line 1
1172+extra line"#;
1173
1174 let metrics = exact_lines_match(expected, actual);
1175 // TP: "-old line 1" and "+new line 1" (2)
1176 // FP: "+extra line" (1)
1177 // FN: "-old line 2" and "+new line 2" (2)
1178 assert_eq!(metrics.true_positives, 2);
1179 assert_eq!(metrics.false_positives, 1);
1180 assert_eq!(metrics.false_negatives, 2);
1181 }
1182
1183 #[test]
1184 fn test_exact_lines_match_no_overlap() {
1185 let expected = r#"-line a
1186+line b"#;
1187
1188 let actual = r#"-line x
1189+line y"#;
1190
1191 let metrics = exact_lines_match(expected, actual);
1192 assert_eq!(metrics.true_positives, 0);
1193 assert_eq!(metrics.false_positives, 2);
1194 assert_eq!(metrics.false_negatives, 2);
1195 assert!((metrics.precision()).abs() < 1e-6);
1196 assert!((metrics.recall()).abs() < 1e-6);
1197 }
1198
1199 #[test]
1200 fn test_exact_lines_match_duplicate_lines() {
1201 let expected = r#"+line a
1202+line a
1203+line a"#;
1204
1205 let actual = r#"+line a
1206+line a"#;
1207
1208 let metrics = exact_lines_match(expected, actual);
1209 // Expected has 3 "+line a", actual has 2
1210 // TP: 2, FN: 1, FP: 0
1211 assert_eq!(metrics.true_positives, 2);
1212 assert_eq!(metrics.false_positives, 0);
1213 assert_eq!(metrics.false_negatives, 1);
1214 }
1215
1216 #[test]
1217 fn test_exact_lines_match_empty_patches() {
1218 let metrics = exact_lines_match("", "");
1219 assert_eq!(metrics.true_positives, 0);
1220 assert_eq!(metrics.false_positives, 0);
1221 assert_eq!(metrics.false_negatives, 0);
1222 }
1223
1224 #[test]
1225 fn test_is_editable_region_correct() {
1226 let patch = indoc! {"
1227 @@ -1,1 +1,1 @@
1228 -context
1229 -removed
1230 -from the beginning of the file
1231 import sys
1232 +sys.exit(0)
1233
1234 "};
1235 assert!(!is_editable_region_correct(patch));
1236
1237 let patch = indoc! {"
1238 @@ -1,1 +1,1 @@
1239 "};
1240 assert!(is_editable_region_correct(patch));
1241 }
1242
1243 #[test]
1244 fn test_isolated_whitespace_purely_whitespace_patch() {
1245 let patch = indoc! {"
1246 @@ -1,3 +1,4 @@
1247 fn main() {
1248 +
1249 println!(\"hello\");
1250 }
1251 "};
1252 assert!(has_isolated_whitespace_changes(patch, None));
1253 }
1254
1255 #[test]
1256 fn test_isolated_whitespace_adjacent_to_real_change() {
1257 let patch = indoc! {"
1258 @@ -1,3 +1,4 @@
1259 fn main() {
1260 +
1261 + let x = 1;
1262 println!(\"hello\");
1263 }
1264 "};
1265 assert!(!has_isolated_whitespace_changes(patch, None));
1266 }
1267
1268 #[test]
1269 fn test_isolated_whitespace_no_whitespace_changes() {
1270 let patch = indoc! {"
1271 @@ -1,3 +1,3 @@
1272 fn main() {
1273 - println!(\"hello\");
1274 + println!(\"world\");
1275 }
1276 "};
1277 assert!(!has_isolated_whitespace_changes(patch, None));
1278 }
1279
1280 #[test]
1281 fn test_isolated_whitespace_deletion() {
1282 let patch = indoc! {"
1283 @@ -1,4 +1,3 @@
1284 fn main() {
1285 -
1286 println!(\"hello\");
1287 }
1288 "};
1289 assert!(has_isolated_whitespace_changes(patch, None));
1290 }
1291
1292 #[test]
1293 fn test_isolated_whitespace_mixed_groups() {
1294 let patch = indoc! {"
1295 @@ -1,7 +1,8 @@
1296 fn main() {
1297 +
1298 let x = 1;
1299 - let y = 2;
1300 + let y = 3;
1301
1302 +
1303 println!(\"hello\");
1304 }
1305 "};
1306 assert!(has_isolated_whitespace_changes(patch, None));
1307 }
1308
1309 #[test]
1310 fn test_isolated_whitespace_empty_patch() {
1311 let patch = "";
1312 assert!(!has_isolated_whitespace_changes(patch, None));
1313 }
1314
1315 #[test]
1316 fn test_isolated_whitespace_skipped_on_cursor_line() {
1317 // The addition of a blank line at new-file line 2 should be skipped
1318 // because the cursor is on that line.
1319 let patch = indoc! {"
1320 @@ -1,3 +1,4 @@
1321 fn main() {
1322 +
1323 println!(\"hello\");
1324 }
1325 "};
1326 // New-file line 2 is the added blank line
1327 let cursor = cursor_on_line(2);
1328 assert!(!has_isolated_whitespace_changes(patch, Some(cursor)));
1329 }
1330
1331 #[test]
1332 fn test_isolated_whitespace_not_skipped_when_cursor_on_different_line() {
1333 // The blank line is at new-file line 2, but the cursor is on line 1.
1334 let patch = indoc! {"
1335 @@ -1,3 +1,4 @@
1336 fn main() {
1337 +
1338 println!(\"hello\");
1339 }
1340 "};
1341 let cursor = cursor_on_line(1);
1342 assert!(has_isolated_whitespace_changes(patch, Some(cursor)));
1343 }
1344
1345 #[test]
1346 fn test_isolated_whitespace_deletion_not_skipped_by_cursor() {
1347 // Deletions don't have a new-file line, so cursor can't suppress them.
1348 let patch = indoc! {"
1349 @@ -1,4 +1,3 @@
1350 fn main() {
1351 -
1352 println!(\"hello\");
1353 }
1354 "};
1355 let cursor = cursor_on_line(2);
1356 assert!(has_isolated_whitespace_changes(patch, Some(cursor)));
1357 }
1358
1359 #[test]
1360 fn test_count_patch_token_changes_real_world_rename() {
1361 // Real-world patch that was reported as returning 0 tokens
1362 let patch = "--- a/sip_call\\README.md\n+++ b/sip_call\\README.md\n@@ -1,1 +1,1 @@\n-# \n+# SIP Call\n";
1363 let counts = count_patch_token_changes(patch);
1364 // "# " vs "# SIP Call" — the "SIP" and "Call" tokens (and a whitespace token) are inserted
1365 assert!(
1366 counts.inserted_tokens > 0,
1367 "expected inserted tokens > 0, got {}",
1368 counts.inserted_tokens
1369 );
1370 assert_eq!(counts.deleted_tokens, 0);
1371 }
1372
1373 #[test]
1374 fn test_count_patch_token_changes_real_world_expansion() {
1375 // Real-world patch: single token expanded to multiple lines
1376 let patch = "--- a/task1/src/app/app.html\n+++ b/task1/src/app/app.html\n@@ -1,7 +1,9 @@\n <style>\n- m\n+ main {\n+ \n+ }\n </style>\n \n <main>\n \n </main>\n";
1377 let counts = count_patch_token_changes(patch);
1378 assert!(
1379 counts.inserted_tokens > 0,
1380 "expected inserted tokens > 0, got {}",
1381 counts.inserted_tokens
1382 );
1383 assert!(
1384 counts.deleted_tokens > 0,
1385 "expected deleted tokens > 0, got {}",
1386 counts.deleted_tokens
1387 );
1388 }
1389
1390 #[test]
1391 fn test_count_patch_token_changes_simple_replacement() {
1392 let patch = indoc! {"
1393 @@ -1,3 +1,3 @@
1394 fn main() {
1395 - println!(\"hello\");
1396 + println!(\"world\");
1397 }
1398 "};
1399 let counts = count_patch_token_changes(patch);
1400 assert_eq!(counts.deleted_tokens, 1, "deleted: \"hello\"");
1401 assert_eq!(counts.inserted_tokens, 1, "inserted: \"world\"");
1402 }
1403
1404 #[test]
1405 fn test_count_patch_token_changes_insertion_only() {
1406 let patch = indoc! {"
1407 @@ -1,2 +1,3 @@
1408 fn main() {
1409 + println!(\"hello\");
1410 }
1411 "};
1412 let counts = count_patch_token_changes(patch);
1413 assert_eq!(counts.deleted_tokens, 0);
1414 assert!(counts.inserted_tokens > 0);
1415 }
1416
1417 #[test]
1418 fn test_count_patch_token_changes_deletion_only() {
1419 let patch = indoc! {"
1420 @@ -1,3 +1,2 @@
1421 fn main() {
1422 - println!(\"hello\");
1423 }
1424 "};
1425 let counts = count_patch_token_changes(patch);
1426 assert!(counts.deleted_tokens > 0);
1427 assert_eq!(counts.inserted_tokens, 0);
1428 }
1429
1430 #[test]
1431 fn test_count_patch_token_changes_empty_patch() {
1432 let patch = "";
1433 let counts = count_patch_token_changes(patch);
1434 assert_eq!(counts.deleted_tokens, 0);
1435 assert_eq!(counts.inserted_tokens, 0);
1436 }
1437
1438 #[test]
1439 fn test_count_patch_token_changes_multiple_hunks() {
1440 let patch = indoc! {"
1441 @@ -1,3 +1,3 @@
1442 fn main() {
1443 - let x = 1;
1444 + let x = 2;
1445 }
1446 @@ -10,3 +10,3 @@
1447 fn other() {
1448 - let y = 3;
1449 + let y = 4;
1450 }
1451 "};
1452 let counts = count_patch_token_changes(patch);
1453 assert_eq!(counts.deleted_tokens, 2, "deleted: \"1\" and \"3\"");
1454 assert_eq!(counts.inserted_tokens, 2, "inserted: \"2\" and \"4\"");
1455 }
1456
1457 #[test]
1458 fn test_count_patch_token_changes_multiword_change() {
1459 let patch = indoc! {"
1460 @@ -1,1 +1,1 @@
1461 -hello world foo
1462 +hello bar baz
1463 "};
1464 let counts = count_patch_token_changes(patch);
1465 // "world" and "foo" deleted, "bar" and "baz" inserted
1466 // (whitespace tokens between them may also count)
1467 assert!(counts.deleted_tokens >= 2);
1468 assert!(counts.inserted_tokens >= 2);
1469 }
1470
1471 #[test]
1472 fn test_whitespace_collapse() {
1473 let text = "abc \n\n\n 123";
1474 let collapsed = collapse_whitespace(text.chars());
1475 assert_eq!(
1476 collapsed,
1477 vec!['a', 'b', 'c', ' ', '\n', ' ', '1', '2', '3']
1478 );
1479 }
1480}