1use collections::HashMap;
2
3type Counts = HashMap<String, usize>;
4type CountsDelta = HashMap<String, isize>;
5
6#[derive(Default, Debug, Clone)]
7struct ClassificationMetrics {
8 true_positives: usize,
9 false_positives: usize,
10 false_negatives: usize,
11}
12
13impl ClassificationMetrics {
14 fn from_counts(expected: &Counts, actual: &Counts) -> ClassificationMetrics {
15 let mut true_positives = 0;
16 let mut false_positives = 0;
17 let mut false_negatives = 0;
18
19 for (ngram, &expected_count) in expected {
20 let actual_count = *actual.get(ngram).unwrap_or(&0);
21 if actual_count > expected_count {
22 false_positives += actual_count - expected_count;
23 } else {
24 false_negatives += expected_count - actual_count;
25 }
26 true_positives += expected_count.min(actual_count);
27 }
28
29 for (ngram, &actual_count) in actual {
30 if !expected.contains_key(ngram) {
31 false_positives += actual_count;
32 }
33 }
34
35 ClassificationMetrics {
36 true_positives,
37 false_positives,
38 false_negatives,
39 }
40 }
41
42 fn precision(&self) -> f64 {
43 if self.true_positives + self.false_positives == 0 {
44 0.0
45 } else {
46 self.true_positives as f64 / (self.true_positives + self.false_positives) as f64
47 }
48 }
49
50 fn recall(&self) -> f64 {
51 if self.true_positives + self.false_negatives == 0 {
52 0.0
53 } else {
54 self.true_positives as f64 / (self.true_positives + self.false_negatives) as f64
55 }
56 }
57}
58
59enum ChrfWhitespace {
60 #[allow(unused)]
61 Unchanged,
62 Ignore,
63}
64
65const CHR_F_CHAR_ORDER: usize = 6;
66const CHR_F_BETA: f64 = 2.0;
67const CHR_F_WHITESPACE: ChrfWhitespace = ChrfWhitespace::Ignore;
68
69/// Computes a delta-chrF score that compares two sets of edits.
70///
71/// This metric works by:
72/// 1. Computing n-gram count differences (deltas) between original→expected and original→actual
73/// 2. Comparing these deltas to measure how well actual edits match expected edits
74///
75/// Returns a score from 0.0 to 100.0, where 100.0 means the actual edits perfectly match
76/// the expected edits.
77pub fn delta_chr_f(original: &str, expected: &str, actual: &str) -> f64 {
78 // Edge case: if all texts are identical, the edits match perfectly
79 if original == expected && expected == actual {
80 return 100.0;
81 }
82
83 let original_ngrams = chr_f_ngram_counts(original);
84 let expected_ngrams = chr_f_ngram_counts(expected);
85 let actual_ngrams = chr_f_ngram_counts(actual);
86
87 let mut total_precision = 0.0;
88 let mut total_recall = 0.0;
89
90 for order in 0..CHR_F_CHAR_ORDER {
91 let expected_delta = compute_ngram_delta(&expected_ngrams[order], &original_ngrams[order]);
92 let actual_delta = compute_ngram_delta(&actual_ngrams[order], &original_ngrams[order]);
93
94 if expected_delta.is_empty() && actual_delta.is_empty() {
95 total_precision += 1.0;
96 total_recall += 1.0;
97 continue;
98 }
99
100 let expected_counts = ngram_delta_to_counts(&expected_delta);
101 let actual_counts = ngram_delta_to_counts(&actual_delta);
102
103 let score = ClassificationMetrics::from_counts(&expected_counts, &actual_counts);
104 total_precision += score.precision();
105 total_recall += score.recall();
106 }
107
108 let prec = total_precision / CHR_F_CHAR_ORDER as f64;
109 let recall = total_recall / CHR_F_CHAR_ORDER as f64;
110 let f_score = if prec + recall == 0.0 {
111 0.0
112 } else {
113 (1.0 + CHR_F_BETA * CHR_F_BETA) * prec * recall / (CHR_F_BETA * CHR_F_BETA * prec + recall)
114 };
115
116 f_score * 100.0
117}
118
119fn chr_f_ngram_counts(text: &str) -> Vec<Counts> {
120 // Ignore whitespace. The original chrF implementation skips all
121 // whitespace. We should consider compressing multiple consecutive
122 // spaces into one -- this may reflect our task more closely.
123 let text = match CHR_F_WHITESPACE {
124 ChrfWhitespace::Unchanged => text.to_string(),
125 ChrfWhitespace::Ignore => text
126 .chars()
127 .filter(|c| !c.is_whitespace())
128 .collect::<String>(),
129 };
130
131 (1..=CHR_F_CHAR_ORDER)
132 .map(|order| count_ngrams(&text, order))
133 .collect()
134}
135
136fn compute_ngram_delta(after: &Counts, before: &Counts) -> CountsDelta {
137 let mut delta = CountsDelta::default();
138
139 for (ngram, &before_count) in before {
140 let after_count = *after.get(ngram).unwrap_or(&0);
141 delta.insert(ngram.clone(), after_count as isize - before_count as isize);
142 }
143
144 for (ngram, &after_count) in after {
145 if !before.contains_key(ngram) {
146 delta.insert(ngram.clone(), after_count as isize);
147 }
148 }
149
150 delta
151}
152
153/// Convert negative counts to special deletion tokens.
154/// For example, if expected delta is {"foo": -1} and actual delta is {"bar": -1},
155/// we convert it to {"¬foo": +1} and {"¬bar": +1}. This way _not_ deleting "foo"
156/// will result in a false negative, and mistakenly deleting "bar" will result in a false positive.
157fn ngram_delta_to_counts(delta: &CountsDelta) -> Counts {
158 let mut counts = Counts::default();
159
160 for (ngram, &delta) in delta {
161 if delta > 0 {
162 counts.insert(ngram.clone(), delta as usize);
163 } else {
164 counts.insert(format!("¬{ngram}"), delta.unsigned_abs());
165 }
166 }
167
168 counts
169}
170
171fn count_ngrams(text: &str, n: usize) -> Counts {
172 let chars: Vec<char> = text.chars().collect();
173 let mut counts = Counts::default();
174
175 for window in chars.windows(n) {
176 let ngram: String = window.iter().collect();
177 *counts.entry(ngram).or_insert(0) += 1;
178 }
179
180 counts
181}
182
183#[cfg(test)]
184mod test {
185 use super::*;
186
187 #[test]
188 fn test_delta_chr_f_perfect_match() {
189 let original = "fn main() { println!(\"Hello\");}";
190 let expected = "fn main() { println!(\"Hello, World!\");}";
191
192 let score = delta_chr_f(original, expected, expected);
193 assert!((score - 100.0).abs() < 1e-2);
194 }
195
196 #[test]
197 fn test_delta_chr_f_wrong_edit() {
198 // When the edit is wrong
199 let original = "one two three";
200 let expected = "one three"; // deleted "two "
201 let actual = "one two four"; // deleted "three", added "four"
202
203 // Then the score should be low
204 let score = delta_chr_f(original, expected, actual);
205 assert!(score > 20.0 && score < 40.0);
206 }
207
208 #[test]
209 fn test_delta_chr_f_partial_match() {
210 let original = "let x = 42;";
211 let expected = "let x = 100;";
212 let actual = "let x = 99;";
213
214 // We got the edit location right, but the replacement text is wrong.
215 // Deleted ngrams will match, bringing the score somewhere in the middle.
216 let score = delta_chr_f(original, expected, actual);
217 assert!(score > 40.0 && score < 60.0);
218 }
219
220 #[test]
221 fn test_delta_chr_f_missed_edit() {
222 // When predictions makes no changes
223 let original = "prefix old suffix";
224 let expected = "prefix new suffix";
225 let actual = "prefix old suffix"; // no change
226
227 // Then the score should be low (all expected changes are false negatives)
228 let score = delta_chr_f(original, expected, actual);
229 assert!(score < 20.0);
230 }
231
232 #[test]
233 fn test_delta_chr_f_extra_edit() {
234 // When adding unexpected content
235 let original = "helloworld";
236 let expected = "helloworld"; // no change expected
237 let actual = "helloextraworld"; // added "extra"
238
239 // Then the score should be low (all actual changes are false positives)
240 let score = delta_chr_f(original, expected, actual);
241 assert!(score < 20.0);
242 }
243
244 #[test]
245 fn test_delta_chr_f_no_changes() {
246 let text = "unchanged text";
247 let score = delta_chr_f(text, text, text);
248 assert!((score - 100.0).abs() < 1e-2);
249 }
250}