1use collections::HashMap;
2
3use crate::reorder_patch::{Patch, PatchLine};
4
5pub type Counts = HashMap<String, usize>;
6type CountsDelta = HashMap<String, isize>;
7
8/// Context characters needed on each side of a change to capture all affected n-grams
9const CONTEXT_CHARS: usize = CHR_F_CHAR_ORDER - 1;
10
11#[derive(Default, Debug, Clone)]
12pub struct ClassificationMetrics {
13 pub true_positives: usize,
14 pub false_positives: usize,
15 pub false_negatives: usize,
16}
17
18impl ClassificationMetrics {
19 pub fn from_counts(expected: &Counts, actual: &Counts) -> ClassificationMetrics {
20 let mut true_positives = 0;
21 let mut false_positives = 0;
22 let mut false_negatives = 0;
23
24 for (ngram, &expected_count) in expected {
25 let actual_count = *actual.get(ngram).unwrap_or(&0);
26 if actual_count > expected_count {
27 false_positives += actual_count - expected_count;
28 } else {
29 false_negatives += expected_count - actual_count;
30 }
31 true_positives += expected_count.min(actual_count);
32 }
33
34 for (ngram, &actual_count) in actual {
35 if !expected.contains_key(ngram) {
36 false_positives += actual_count;
37 }
38 }
39
40 ClassificationMetrics {
41 true_positives,
42 false_positives,
43 false_negatives,
44 }
45 }
46
47 pub fn precision(&self) -> f64 {
48 if self.true_positives + self.false_positives == 0 {
49 0.0
50 } else {
51 self.true_positives as f64 / (self.true_positives + self.false_positives) as f64
52 }
53 }
54
55 pub fn recall(&self) -> f64 {
56 if self.true_positives + self.false_negatives == 0 {
57 0.0
58 } else {
59 self.true_positives as f64 / (self.true_positives + self.false_negatives) as f64
60 }
61 }
62
63 pub fn f1(&self) -> f64 {
64 let precision = self.precision();
65 let recall = self.recall();
66 if precision + recall == 0.0 {
67 0.0
68 } else {
69 2.0 * precision * recall / (precision + recall)
70 }
71 }
72}
73
74enum ChrfWhitespace {
75 #[allow(unused)]
76 Unchanged,
77 Ignore,
78}
79
80const CHR_F_CHAR_ORDER: usize = 6;
81const CHR_F_BETA: f64 = 2.0;
82const CHR_F_WHITESPACE: ChrfWhitespace = ChrfWhitespace::Ignore;
83
84/// Computes a delta-chrF score that compares two sets of edits.
85///
86/// This metric works by:
87/// 1. Computing n-gram count differences (deltas) between original→expected and original→actual
88/// 2. Comparing these deltas to measure how well actual edits match expected edits
89///
90/// Returns a score from 0.0 to 100.0, where 100.0 means the actual edits perfectly match
91/// the expected edits.
92pub fn delta_chr_f(original: &str, expected: &str, actual: &str) -> f64 {
93 // Edge case: if all texts are identical, the edits match perfectly
94 if original == expected && expected == actual {
95 return 100.0;
96 }
97
98 // Pre-filter whitespace once for all texts
99 let orig_chars: Vec<char> = filter_whitespace_chars(original);
100 let exp_chars: Vec<char> = filter_whitespace_chars(expected);
101 let act_chars: Vec<char> = filter_whitespace_chars(actual);
102
103 // Find the changed regions between original→expected and original→actual
104 // We only need to compute n-grams on these regions (plus context for boundary n-grams)
105 let (orig_for_exp, exp_region) = extract_changed_regions(&orig_chars, &exp_chars);
106 let (orig_for_act, act_region) = extract_changed_regions(&orig_chars, &act_chars);
107
108 let mut total_precision = 0.0;
109 let mut total_recall = 0.0;
110
111 for order in 1..=CHR_F_CHAR_ORDER {
112 // Compute n-grams only on the affected regions
113 let orig_ngrams_for_exp = count_ngrams_from_chars(&orig_for_exp, order);
114 let exp_ngrams = count_ngrams_from_chars(&exp_region, order);
115 let expected_delta = compute_ngram_delta(&exp_ngrams, &orig_ngrams_for_exp);
116
117 let orig_ngrams_for_act = count_ngrams_from_chars(&orig_for_act, order);
118 let act_ngrams = count_ngrams_from_chars(&act_region, order);
119 let actual_delta = compute_ngram_delta(&act_ngrams, &orig_ngrams_for_act);
120
121 if expected_delta.is_empty() && actual_delta.is_empty() {
122 total_precision += 1.0;
123 total_recall += 1.0;
124 continue;
125 }
126
127 let expected_counts = ngram_delta_to_counts(&expected_delta);
128 let actual_counts = ngram_delta_to_counts(&actual_delta);
129
130 let score = ClassificationMetrics::from_counts(&expected_counts, &actual_counts);
131 total_precision += score.precision();
132 total_recall += score.recall();
133 }
134
135 let prec = total_precision / CHR_F_CHAR_ORDER as f64;
136 let recall = total_recall / CHR_F_CHAR_ORDER as f64;
137 let f_score = if prec + recall == 0.0 {
138 0.0
139 } else {
140 (1.0 + CHR_F_BETA * CHR_F_BETA) * prec * recall / (CHR_F_BETA * CHR_F_BETA * prec + recall)
141 };
142
143 f_score * 100.0
144}
145
146/// Reference implementation of delta_chr_f (original, non-optimized version).
147/// Used for testing that the optimized version produces identical results.
148#[cfg(test)]
149fn delta_chr_f_reference(original: &str, expected: &str, actual: &str) -> f64 {
150 if original == expected && expected == actual {
151 return 100.0;
152 }
153
154 let original_ngrams = chr_f_ngram_counts(original);
155 let expected_ngrams = chr_f_ngram_counts(expected);
156 let actual_ngrams = chr_f_ngram_counts(actual);
157
158 let mut total_precision = 0.0;
159 let mut total_recall = 0.0;
160
161 for order in 0..CHR_F_CHAR_ORDER {
162 let expected_delta = compute_ngram_delta(&expected_ngrams[order], &original_ngrams[order]);
163 let actual_delta = compute_ngram_delta(&actual_ngrams[order], &original_ngrams[order]);
164
165 if expected_delta.is_empty() && actual_delta.is_empty() {
166 total_precision += 1.0;
167 total_recall += 1.0;
168 continue;
169 }
170
171 let expected_counts = ngram_delta_to_counts(&expected_delta);
172 let actual_counts = ngram_delta_to_counts(&actual_delta);
173
174 let score = ClassificationMetrics::from_counts(&expected_counts, &actual_counts);
175 total_precision += score.precision();
176 total_recall += score.recall();
177 }
178
179 let prec = total_precision / CHR_F_CHAR_ORDER as f64;
180 let recall = total_recall / CHR_F_CHAR_ORDER as f64;
181 let f_score = if prec + recall == 0.0 {
182 0.0
183 } else {
184 (1.0 + CHR_F_BETA * CHR_F_BETA) * prec * recall / (CHR_F_BETA * CHR_F_BETA * prec + recall)
185 };
186
187 f_score * 100.0
188}
189
190/// Filter whitespace from a string and return as Vec<char>
191fn filter_whitespace_chars(text: &str) -> Vec<char> {
192 match CHR_F_WHITESPACE {
193 ChrfWhitespace::Unchanged => text.chars().collect(),
194 ChrfWhitespace::Ignore => text.chars().filter(|c| !c.is_whitespace()).collect(),
195 }
196}
197
198/// Extract only the changed regions between two texts, with context for n-gram boundaries.
199///
200/// Returns (original_affected_region, modified_affected_region) as Vec<char>.
201///
202/// The key insight: when computing n-gram delta between two nearly-identical texts,
203/// n-grams from unchanged regions cancel out. We only need to process:
204/// 1. The changed content itself
205/// 2. CONTEXT_CHARS (n-1) characters before and after, to capture boundary-crossing n-grams
206fn extract_changed_regions(original: &[char], modified: &[char]) -> (Vec<char>, Vec<char>) {
207 // Find longest common prefix
208 let prefix_len = original
209 .iter()
210 .zip(modified.iter())
211 .take_while(|(a, b)| a == b)
212 .count();
213
214 // Find longest common suffix (that doesn't overlap with prefix)
215 let orig_remaining = original.len().saturating_sub(prefix_len);
216 let mod_remaining = modified.len().saturating_sub(prefix_len);
217 let max_suffix = orig_remaining.min(mod_remaining);
218
219 let suffix_len = original
220 .iter()
221 .rev()
222 .zip(modified.iter().rev())
223 .take(max_suffix)
224 .take_while(|(a, b)| a == b)
225 .count();
226
227 // Calculate the changed region boundaries
228 let orig_change_start = prefix_len;
229 let orig_change_end = original.len().saturating_sub(suffix_len);
230 let mod_change_start = prefix_len;
231 let mod_change_end = modified.len().saturating_sub(suffix_len);
232
233 // If there's no actual change, return empty regions
234 if orig_change_start >= orig_change_end && mod_change_start >= mod_change_end {
235 return (Vec::new(), Vec::new());
236 }
237
238 // Expand to include context for n-gram boundaries
239 let orig_context_start = orig_change_start.saturating_sub(CONTEXT_CHARS);
240 let orig_context_end = (orig_change_end + CONTEXT_CHARS).min(original.len());
241 let mod_context_start = mod_change_start.saturating_sub(CONTEXT_CHARS);
242 let mod_context_end = (mod_change_end + CONTEXT_CHARS).min(modified.len());
243
244 let orig_region: Vec<char> = original[orig_context_start..orig_context_end].to_vec();
245 let mod_region: Vec<char> = modified[mod_context_start..mod_context_end].to_vec();
246
247 (orig_region, mod_region)
248}
249
250/// Count n-grams directly from a char slice (avoids String allocation for the full text)
251fn count_ngrams_from_chars(chars: &[char], n: usize) -> Counts {
252 let mut counts = Counts::default();
253
254 if chars.len() < n {
255 return counts;
256 }
257
258 for window in chars.windows(n) {
259 let ngram: String = window.iter().collect();
260 *counts.entry(ngram).or_insert(0) += 1;
261 }
262
263 counts
264}
265
266#[allow(dead_code)]
267fn chr_f_ngram_counts(text: &str) -> Vec<Counts> {
268 // Ignore whitespace. The original chrF implementation skips all
269 // whitespace. We should consider compressing multiple consecutive
270 // spaces into one -- this may reflect our task more closely.
271 let text = match CHR_F_WHITESPACE {
272 ChrfWhitespace::Unchanged => text.to_string(),
273 ChrfWhitespace::Ignore => text
274 .chars()
275 .filter(|c| !c.is_whitespace())
276 .collect::<String>(),
277 };
278
279 (1..=CHR_F_CHAR_ORDER)
280 .map(|order| count_ngrams(&text, order))
281 .collect()
282}
283
284fn compute_ngram_delta(after: &Counts, before: &Counts) -> CountsDelta {
285 let mut delta = CountsDelta::default();
286
287 for (ngram, &before_count) in before {
288 let after_count = *after.get(ngram).unwrap_or(&0);
289 delta.insert(ngram.clone(), after_count as isize - before_count as isize);
290 }
291
292 for (ngram, &after_count) in after {
293 if !before.contains_key(ngram) {
294 delta.insert(ngram.clone(), after_count as isize);
295 }
296 }
297
298 delta
299}
300
301/// Convert negative counts to special deletion tokens.
302/// For example, if expected delta is {"foo": -1} and actual delta is {"bar": -1},
303/// we convert it to {"¬foo": +1} and {"¬bar": +1}. This way _not_ deleting "foo"
304/// will result in a false negative, and mistakenly deleting "bar" will result in a false positive.
305fn ngram_delta_to_counts(delta: &CountsDelta) -> Counts {
306 let mut counts = Counts::default();
307
308 for (ngram, &delta) in delta {
309 if delta > 0 {
310 counts.insert(ngram.clone(), delta as usize);
311 } else if delta < 0 {
312 counts.insert(format!("¬{ngram}"), delta.unsigned_abs());
313 }
314 }
315
316 counts
317}
318
319#[allow(dead_code)]
320fn count_ngrams(text: &str, n: usize) -> Counts {
321 let chars: Vec<char> = text.chars().collect();
322 let mut counts = Counts::default();
323
324 for window in chars.windows(n) {
325 let ngram: String = window.iter().collect();
326 *counts.entry(ngram).or_insert(0) += 1;
327 }
328
329 counts
330}
331
332pub fn braces_disbalance(text: &str) -> usize {
333 let mut disbalance = 0isize;
334
335 let a = text.chars().filter(|&c| c == '{').count() as isize;
336 let b = text.chars().filter(|&c| c == '}').count() as isize;
337 disbalance += (a - b).abs();
338
339 let a = text.chars().filter(|&c| c == '(').count() as isize;
340 let b = text.chars().filter(|&c| c == ')').count() as isize;
341 disbalance += (a - b).abs();
342
343 let a = text.chars().filter(|&c| c == '[').count() as isize;
344 let b = text.chars().filter(|&c| c == ']').count() as isize;
345 disbalance += (a - b).abs();
346
347 disbalance as usize
348}
349
350/// Extracts changed lines from a unified diff string.
351/// Returns a bag (multiset) of lines that were added (+) or removed (-).
352/// The +/- prefix is included in the line to distinguish additions from deletions.
353pub fn extract_changed_lines_from_diff(diff: &str) -> Counts {
354 let mut counts = Counts::default();
355
356 for line in diff.lines() {
357 // Skip file headers (--- and +++)
358 if line.starts_with("---") || line.starts_with("+++") {
359 continue;
360 }
361 // Skip hunk headers (@@)
362 if line.starts_with("@@") {
363 continue;
364 }
365 // Skip diff header lines (diff --git, index, etc.)
366 if line.starts_with("diff ") || line.starts_with("index ") {
367 continue;
368 }
369 // Include added and removed lines (with their prefix)
370 if line.starts_with('+') || line.starts_with('-') {
371 *counts.entry(line.to_string()).or_insert(0) += 1;
372 }
373 }
374
375 counts
376}
377
378/// Computes exact lines match metrics between expected and actual patches.
379/// Treats changed lines as a bag (multiset) - order is discarded but count matters.
380/// Returns ClassificationMetrics with TP/FP/FN counts.
381pub fn exact_lines_match(expected_patch: &str, actual_patch: &str) -> ClassificationMetrics {
382 let expected_lines = extract_changed_lines_from_diff(expected_patch);
383 let actual_lines = extract_changed_lines_from_diff(actual_patch);
384 ClassificationMetrics::from_counts(&expected_lines, &actual_lines)
385}
386
387/// Returns whether the patch contains any isolated whitespace-only changes.
388///
389/// A whitespace-only change is an added or deleted line whose content is empty or
390/// contains only whitespace. It is "isolated" when it is not adjacent to any
391/// substantive (non-whitespace) change within the same contiguous change group.
392pub fn has_isolated_whitespace_changes(patch_str: &str) -> bool {
393 let patch = Patch::parse_unified_diff(patch_str);
394
395 for hunk in &patch.hunks {
396 let lines = &hunk.lines;
397 for (i, line) in lines.iter().enumerate() {
398 let content = match line {
399 PatchLine::Addition(s) | PatchLine::Deletion(s) => s.as_str(),
400 _ => continue,
401 };
402
403 if !content.trim().is_empty() {
404 continue;
405 }
406
407 if is_whitespace_change_isolated(lines, i) {
408 return true;
409 }
410 }
411 }
412
413 false
414}
415
416fn is_whitespace_change_isolated(lines: &[PatchLine], index: usize) -> bool {
417 // Look backward for a non-whitespace change before hitting a context line
418 for line in lines[..index].iter().rev() {
419 match line {
420 PatchLine::Addition(s) | PatchLine::Deletion(s) => {
421 if !s.trim().is_empty() {
422 return false;
423 }
424 }
425 _ => break,
426 }
427 }
428
429 // Look forward for a non-whitespace change before hitting a context line
430 for line in &lines[index + 1..] {
431 match line {
432 PatchLine::Addition(s) | PatchLine::Deletion(s) => {
433 if !s.trim().is_empty() {
434 return false;
435 }
436 }
437 _ => break,
438 }
439 }
440
441 true
442}
443
444/// A simple proxy for whether the prediction respects editable region.
445pub fn is_editable_region_correct(actual_patch: &str) -> bool {
446 // A typical sign of a wrong editable region: a bunch of lines deletion
447 // at the beginning or end of the patch.
448 let patch = Patch::parse_unified_diff(actual_patch);
449 if patch.hunks.is_empty() {
450 return true;
451 }
452
453 let hunk = &patch.hunks[0];
454 let mut deletions_at_start = 0;
455
456 for line in hunk.lines.iter() {
457 match line {
458 PatchLine::Deletion(_) => deletions_at_start += 1,
459 _ => break,
460 }
461 }
462
463 if deletions_at_start >= 3 {
464 return false;
465 }
466
467 true
468}
469
470#[cfg(test)]
471mod test_optimization {
472 use super::*;
473
474 #[test]
475 fn test_extract_changed_regions_simple() {
476 let original: Vec<char> = "hello world".chars().collect();
477 let modified: Vec<char> = "hello there".chars().collect();
478
479 let (orig_region, mod_region) = extract_changed_regions(&original, &modified);
480
481 // "world" vs "there" - with 5 chars context, we get "ello world" vs "ello there"
482 // (or less if not enough chars available)
483 assert!(orig_region.len() < original.len());
484 assert!(mod_region.len() < modified.len());
485 }
486
487 #[test]
488 fn test_extract_changed_regions_insertion() {
489 let original: Vec<char> = "abcdef".chars().collect();
490 let modified: Vec<char> = "abcXYZdef".chars().collect();
491
492 let (orig_region, mod_region) = extract_changed_regions(&original, &modified);
493
494 // The insertion is between c and d, so we need context around that point
495 assert!(orig_region.len() <= original.len());
496 assert!(mod_region.iter().collect::<String>().contains("XYZ"));
497 }
498
499 #[test]
500 fn test_extract_changed_regions_identical() {
501 let text: Vec<char> = "identical text".chars().collect();
502
503 let (orig_region, mod_region) = extract_changed_regions(&text, &text);
504
505 // When texts are identical, regions should be empty
506 assert!(orig_region.is_empty());
507 assert!(mod_region.is_empty());
508 }
509
510 #[test]
511 fn test_optimized_matches_original_score() {
512 // Test that our optimized version produces the same results
513 let test_cases = vec![
514 ("hello world", "hello there", "hello world"),
515 (
516 "fn main() {}",
517 "fn main() { println!(); }",
518 "fn main() { print!(); }",
519 ),
520 ("abcdefghij", "abcXXXghij", "abcYYghij"),
521 ("unchanged", "unchanged", "unchanged"),
522 (
523 "prefix middle suffix",
524 "prefix CHANGED suffix",
525 "prefix middle suffix",
526 ),
527 ];
528
529 for (original, expected, actual) in test_cases {
530 let score = delta_chr_f(original, expected, actual);
531 // Just verify it produces a reasonable score (0-100)
532 assert!(
533 score >= 0.0 && score <= 100.0,
534 "Score {} out of range for ({}, {}, {})",
535 score,
536 original,
537 expected,
538 actual
539 );
540 }
541 }
542
543 #[test]
544 fn test_optimized_equals_reference() {
545 // Comprehensive test that optimized version matches reference implementation exactly
546 let test_cases = vec![
547 // Basic cases
548 ("hello world", "hello there", "hello world"),
549 ("hello world", "hello there", "hello there"),
550 ("unchanged", "unchanged", "unchanged"),
551 // Code-like cases
552 (
553 "fn main() { println!(\"Hello\"); }",
554 "fn main() { println!(\"Hello, World!\"); }",
555 "fn main() { println!(\"Hello, World!\"); }",
556 ),
557 (
558 "fn main() { println!(\"Hello\"); }",
559 "fn main() { println!(\"Hello, World!\"); }",
560 "fn main() { println!(\"Goodbye\"); }",
561 ),
562 // Insertion
563 ("abcdef", "abcXYZdef", "abcdef"),
564 ("abcdef", "abcXYZdef", "abcXYZdef"),
565 ("abcdef", "abcXYZdef", "abcABCdef"),
566 // Deletion
567 ("abcXYZdef", "abcdef", "abcXYZdef"),
568 ("abcXYZdef", "abcdef", "abcdef"),
569 // Multiple changes (simulated by different expected/actual)
570 ("one two three four", "one THREE four", "one two FOUR"),
571 // Edge cases
572 ("a", "b", "c"),
573 ("", "abc", ""),
574 ("abc", "", "abc"),
575 // Longer text with small change
576 (
577 "This is a longer piece of text that contains many words and characters to process",
578 "This is a longer piece of TEXT that contains many words and characters to process",
579 "This is a longer piece of text that contains many words and characters to process",
580 ),
581 // Change at the beginning
582 (
583 "ORIGINAL start of text",
584 "NEW start of text",
585 "DIFFERENT start of text",
586 ),
587 // Change at the end
588 (
589 "text ending ORIGINAL",
590 "text ending NEW",
591 "text ending DIFFERENT",
592 ),
593 // Whitespace (should be ignored)
594 ("hello world", "hello there", "hello world"),
595 ("a b c d", "a X c d", "a Y c d"),
596 ];
597
598 for (original, expected, actual) in test_cases {
599 let optimized_score = delta_chr_f(original, expected, actual);
600 let reference_score = delta_chr_f_reference(original, expected, actual);
601
602 assert!(
603 (optimized_score - reference_score).abs() < 1e-10,
604 "Mismatch for ({:?}, {:?}, {:?}):\n optimized: {}\n reference: {}",
605 original,
606 expected,
607 actual,
608 optimized_score,
609 reference_score
610 );
611 }
612 }
613}
614
615#[cfg(test)]
616mod test {
617 use super::*;
618 use indoc::indoc;
619
620 #[test]
621 fn test_delta_chr_f_perfect_match() {
622 let original = "fn main() { println!(\"Hello\");}";
623 let expected = "fn main() { println!(\"Hello, World!\");}";
624
625 let score = delta_chr_f(original, expected, expected);
626 assert!((score - 100.0).abs() < 1e-2);
627 }
628
629 #[test]
630 fn test_delta_chr_f_wrong_edit() {
631 // When the edit is wrong
632 let original = "one two three";
633 let expected = "one three"; // deleted "two "
634 let actual = "one two four"; // deleted "three", added "four"
635
636 // Then the score should be low
637 let score = delta_chr_f(original, expected, actual);
638 assert!(score > 20.0 && score < 40.0);
639 }
640
641 #[test]
642 fn test_delta_chr_f_partial_match() {
643 let original = "let x = 42;";
644 let expected = "let x = 100;";
645 let actual = "let x = 99;";
646
647 // We got the edit location right, but the replacement text is wrong.
648 // Deleted ngrams will match, bringing the score somewhere in the middle.
649 let score = delta_chr_f(original, expected, actual);
650 assert!(score > 40.0 && score < 60.0);
651 }
652
653 #[test]
654 fn test_delta_chr_f_missed_edit() {
655 // When predictions makes no changes
656 let original = "prefix old suffix";
657 let expected = "prefix new suffix";
658 let actual = "prefix old suffix"; // no change
659
660 // Then the score should be low (all expected changes are false negatives)
661 let score = delta_chr_f(original, expected, actual);
662 assert!(score < 20.0);
663 }
664
665 #[test]
666 fn test_delta_chr_f_extra_edit() {
667 // When adding unexpected content
668 let original = "helloworld";
669 let expected = "helloworld"; // no change expected
670 let actual = "helloextraworld"; // added "extra"
671
672 // Then the score should be low (all actual changes are false positives)
673 let score = delta_chr_f(original, expected, actual);
674 assert!(score < 20.0);
675 }
676
677 #[test]
678 fn test_delta_chr_f_no_changes() {
679 let text = "unchanged text";
680 let score = delta_chr_f(text, text, text);
681 assert!((score - 100.0).abs() < 1e-2);
682 }
683
684 #[test]
685 fn test_braces_disbalance() {
686 let text = "let x = { 1 + 2 };";
687 assert_eq!(braces_disbalance(text), 0);
688
689 let text = "let x = { 1 + 2";
690 assert_eq!(braces_disbalance(text), 1);
691
692 let text = "let x = { 1 + 2 )";
693 assert_eq!(braces_disbalance(text), 2);
694 }
695
696 #[test]
697 fn test_extract_changed_lines_from_diff() {
698 let diff = r#"--- a/file.rs
699+++ b/file.rs
700@@ -1,3 +1,3 @@
701 fn main() {
702- println!("hello");
703+ println!("world");
704 }"#;
705
706 let counts = extract_changed_lines_from_diff(diff);
707 assert_eq!(counts.get("- println!(\"hello\");"), Some(&1));
708 assert_eq!(counts.get("+ println!(\"world\");"), Some(&1));
709 assert_eq!(counts.len(), 2);
710 }
711
712 #[test]
713 fn test_extract_changed_lines_skips_headers() {
714 let diff = r#"diff --git a/file.rs b/file.rs
715index abc123..def456 100644
716--- a/file.rs
717+++ b/file.rs
718@@ -1,2 +1,2 @@
719-old line
720+new line"#;
721
722 let counts = extract_changed_lines_from_diff(diff);
723 assert_eq!(counts.get("-old line"), Some(&1));
724 assert_eq!(counts.get("+new line"), Some(&1));
725 assert_eq!(counts.len(), 2);
726 }
727
728 #[test]
729 fn test_exact_lines_match_perfect() {
730 let expected = r#"--- a/file.rs
731+++ b/file.rs
732@@ -1,3 +1,3 @@
733-old line 1
734-old line 2
735+new line 1
736+new line 2"#;
737
738 let actual = r#"--- a/file.rs
739+++ b/file.rs
740@@ -1,3 +1,3 @@
741-old line 1
742-old line 2
743+new line 1
744+new line 2"#;
745
746 let metrics = exact_lines_match(expected, actual);
747 assert_eq!(metrics.true_positives, 4);
748 assert_eq!(metrics.false_positives, 0);
749 assert_eq!(metrics.false_negatives, 0);
750 assert!((metrics.precision() - 1.0).abs() < 1e-6);
751 assert!((metrics.recall() - 1.0).abs() < 1e-6);
752 assert!((metrics.f1() - 1.0).abs() < 1e-6);
753 }
754
755 #[test]
756 fn test_exact_lines_match_partial() {
757 let expected = r#"-old line 1
758-old line 2
759+new line 1
760+new line 2"#;
761
762 let actual = r#"-old line 1
763+new line 1
764+extra line"#;
765
766 let metrics = exact_lines_match(expected, actual);
767 // TP: "-old line 1" and "+new line 1" (2)
768 // FP: "+extra line" (1)
769 // FN: "-old line 2" and "+new line 2" (2)
770 assert_eq!(metrics.true_positives, 2);
771 assert_eq!(metrics.false_positives, 1);
772 assert_eq!(metrics.false_negatives, 2);
773 }
774
775 #[test]
776 fn test_exact_lines_match_no_overlap() {
777 let expected = r#"-line a
778+line b"#;
779
780 let actual = r#"-line x
781+line y"#;
782
783 let metrics = exact_lines_match(expected, actual);
784 assert_eq!(metrics.true_positives, 0);
785 assert_eq!(metrics.false_positives, 2);
786 assert_eq!(metrics.false_negatives, 2);
787 assert!((metrics.precision()).abs() < 1e-6);
788 assert!((metrics.recall()).abs() < 1e-6);
789 }
790
791 #[test]
792 fn test_exact_lines_match_duplicate_lines() {
793 let expected = r#"+line a
794+line a
795+line a"#;
796
797 let actual = r#"+line a
798+line a"#;
799
800 let metrics = exact_lines_match(expected, actual);
801 // Expected has 3 "+line a", actual has 2
802 // TP: 2, FN: 1, FP: 0
803 assert_eq!(metrics.true_positives, 2);
804 assert_eq!(metrics.false_positives, 0);
805 assert_eq!(metrics.false_negatives, 1);
806 }
807
808 #[test]
809 fn test_exact_lines_match_empty_patches() {
810 let metrics = exact_lines_match("", "");
811 assert_eq!(metrics.true_positives, 0);
812 assert_eq!(metrics.false_positives, 0);
813 assert_eq!(metrics.false_negatives, 0);
814 }
815
816 #[test]
817 fn test_is_editable_region_correct() {
818 let patch = indoc! {"
819 @@ -1,1 +1,1 @@
820 -context
821 -removed
822 -from the beginning of the file
823 import sys
824 +sys.exit(0)
825
826 "};
827 assert!(!is_editable_region_correct(patch));
828
829 let patch = indoc! {"
830 @@ -1,1 +1,1 @@
831 "};
832 assert!(is_editable_region_correct(patch));
833 }
834
835 #[test]
836 fn test_isolated_whitespace_purely_whitespace_patch() {
837 let patch = indoc! {"
838 @@ -1,3 +1,4 @@
839 fn main() {
840 +
841 println!(\"hello\");
842 }
843 "};
844 assert!(has_isolated_whitespace_changes(patch));
845 }
846
847 #[test]
848 fn test_isolated_whitespace_adjacent_to_real_change() {
849 let patch = indoc! {"
850 @@ -1,3 +1,4 @@
851 fn main() {
852 +
853 + let x = 1;
854 println!(\"hello\");
855 }
856 "};
857 assert!(!has_isolated_whitespace_changes(patch));
858 }
859
860 #[test]
861 fn test_isolated_whitespace_no_whitespace_changes() {
862 let patch = indoc! {"
863 @@ -1,3 +1,3 @@
864 fn main() {
865 - println!(\"hello\");
866 + println!(\"world\");
867 }
868 "};
869 assert!(!has_isolated_whitespace_changes(patch));
870 }
871
872 #[test]
873 fn test_isolated_whitespace_deletion() {
874 let patch = indoc! {"
875 @@ -1,4 +1,3 @@
876 fn main() {
877 -
878 println!(\"hello\");
879 }
880 "};
881 assert!(has_isolated_whitespace_changes(patch));
882 }
883
884 #[test]
885 fn test_isolated_whitespace_mixed_groups() {
886 let patch = indoc! {"
887 @@ -1,7 +1,8 @@
888 fn main() {
889 +
890 let x = 1;
891 - let y = 2;
892 + let y = 3;
893
894 +
895 println!(\"hello\");
896 }
897 "};
898 assert!(has_isolated_whitespace_changes(patch));
899 }
900
901 #[test]
902 fn test_isolated_whitespace_empty_patch() {
903 let patch = "";
904 assert!(!has_isolated_whitespace_changes(patch));
905 }
906}