1use std::ops::Range;
2use std::path::Path;
3use std::sync::Arc;
4
5use edit_prediction::udiff::apply_diff_to_string;
6use language::{char_diff, text_diff};
7
8use zeta_prompt::ZetaPromptInput;
9
10fn apply_diff_to_string_lenient(diff_str: &str, text: &str) -> String {
11 let hunks = parse_diff_hunks(diff_str);
12 let mut result = text.to_string();
13
14 for hunk in hunks {
15 let hunk_diff = format!("--- a/file\n+++ b/file\n{}", format_hunk(&hunk));
16 if let Ok(updated) = apply_diff_to_string(&hunk_diff, &result) {
17 result = updated;
18 }
19 }
20
21 result
22}
23
24#[derive(Debug, Clone, PartialEq, Eq)]
25struct ParsedHunk {
26 old_start: u32,
27 old_count: u32,
28 new_start: u32,
29 new_count: u32,
30 lines: Vec<HunkLine>,
31}
32
33#[derive(Debug, Clone, PartialEq, Eq)]
34enum HunkLine {
35 Context(String),
36 Addition(String),
37 Deletion(String),
38}
39
40fn parse_hunk_header(line: &str) -> Option<(u32, u32, u32, u32)> {
41 let line = line.strip_prefix("@@ -")?;
42 let (old_part, rest) = line.split_once(' ')?;
43 let rest = rest.strip_prefix('+')?;
44 let (new_part, _) = rest.split_once(" @@")?;
45
46 let (old_start, old_count) = if let Some((start, count)) = old_part.split_once(',') {
47 (start.parse().ok()?, count.parse().ok()?)
48 } else {
49 (old_part.parse().ok()?, 1)
50 };
51
52 let (new_start, new_count) = if let Some((start, count)) = new_part.split_once(',') {
53 (start.parse().ok()?, count.parse().ok()?)
54 } else {
55 (new_part.parse().ok()?, 1)
56 };
57
58 Some((old_start, old_count, new_start, new_count))
59}
60
61fn parse_diff_hunks(diff: &str) -> Vec<ParsedHunk> {
62 let mut hunks = Vec::new();
63 let mut current_hunk: Option<ParsedHunk> = None;
64
65 for line in diff.lines() {
66 if let Some((old_start, old_count, new_start, new_count)) = parse_hunk_header(line) {
67 if let Some(hunk) = current_hunk.take() {
68 hunks.push(hunk);
69 }
70 current_hunk = Some(ParsedHunk {
71 old_start,
72 old_count,
73 new_start,
74 new_count,
75 lines: Vec::new(),
76 });
77 } else if let Some(ref mut hunk) = current_hunk {
78 if let Some(stripped) = line.strip_prefix('+') {
79 hunk.lines.push(HunkLine::Addition(stripped.to_string()));
80 } else if let Some(stripped) = line.strip_prefix('-') {
81 hunk.lines.push(HunkLine::Deletion(stripped.to_string()));
82 } else if let Some(stripped) = line.strip_prefix(' ') {
83 hunk.lines.push(HunkLine::Context(stripped.to_string()));
84 } else if line.is_empty() {
85 hunk.lines.push(HunkLine::Context(String::new()));
86 }
87 }
88 }
89
90 if let Some(hunk) = current_hunk {
91 hunks.push(hunk);
92 }
93
94 hunks
95}
96
97fn format_hunk(hunk: &ParsedHunk) -> String {
98 let mut result = format!(
99 "@@ -{},{} +{},{} @@\n",
100 hunk.old_start, hunk.old_count, hunk.new_start, hunk.new_count
101 );
102 for line in &hunk.lines {
103 match line {
104 HunkLine::Context(text) => {
105 result.push(' ');
106 result.push_str(text);
107 result.push('\n');
108 }
109 HunkLine::Addition(text) => {
110 result.push('+');
111 result.push_str(text);
112 result.push('\n');
113 }
114 HunkLine::Deletion(text) => {
115 result.push('-');
116 result.push_str(text);
117 result.push('\n');
118 }
119 }
120 }
121 result
122}
123
124fn filter_diff_hunks_by_excerpt(
125 diff: &str,
126 excerpt_start_row: u32,
127 excerpt_row_count: u32,
128) -> (String, i32) {
129 let hunks = parse_diff_hunks(diff);
130 let excerpt_start_0based = excerpt_start_row;
131 let excerpt_end_0based = excerpt_start_row + excerpt_row_count;
132
133 let mut filtered_hunks = Vec::new();
134 let mut cumulative_line_offset: i32 = 0;
135
136 for hunk in hunks {
137 let hunk_start_0based = hunk.new_start.saturating_sub(1);
138 let hunk_end_0based = hunk_start_0based + hunk.new_count;
139
140 let additions: i32 = hunk
141 .lines
142 .iter()
143 .filter(|l| matches!(l, HunkLine::Addition(_)))
144 .count() as i32;
145 let deletions: i32 = hunk
146 .lines
147 .iter()
148 .filter(|l| matches!(l, HunkLine::Deletion(_)))
149 .count() as i32;
150 let hunk_line_delta = additions - deletions;
151
152 if hunk_end_0based <= excerpt_start_0based {
153 cumulative_line_offset += hunk_line_delta;
154 continue;
155 }
156
157 if hunk_start_0based >= excerpt_end_0based {
158 continue;
159 }
160
161 let mut filtered_lines = Vec::new();
162 let mut current_row_0based = hunk_start_0based;
163 let mut filtered_old_count = 0u32;
164 let mut filtered_new_count = 0u32;
165 let mut first_included_row: Option<u32> = None;
166
167 for line in &hunk.lines {
168 match line {
169 HunkLine::Context(text) => {
170 if current_row_0based >= excerpt_start_0based
171 && current_row_0based < excerpt_end_0based
172 {
173 if first_included_row.is_none() {
174 first_included_row = Some(current_row_0based);
175 }
176 filtered_lines.push(HunkLine::Context(text.clone()));
177 filtered_old_count += 1;
178 filtered_new_count += 1;
179 }
180 current_row_0based += 1;
181 }
182 HunkLine::Addition(text) => {
183 if current_row_0based >= excerpt_start_0based
184 && current_row_0based < excerpt_end_0based
185 {
186 if first_included_row.is_none() {
187 first_included_row = Some(current_row_0based);
188 }
189 filtered_lines.push(HunkLine::Addition(text.clone()));
190 filtered_new_count += 1;
191 }
192 current_row_0based += 1;
193 }
194 HunkLine::Deletion(text) => {
195 if current_row_0based >= excerpt_start_0based
196 && current_row_0based < excerpt_end_0based
197 {
198 if first_included_row.is_none() {
199 first_included_row = Some(current_row_0based);
200 }
201 filtered_lines.push(HunkLine::Deletion(text.clone()));
202 filtered_old_count += 1;
203 }
204 }
205 }
206 }
207
208 if !filtered_lines.is_empty() {
209 let first_row = first_included_row.unwrap_or(excerpt_start_0based);
210 let new_start_1based = (first_row - excerpt_start_0based) + 1;
211
212 filtered_hunks.push(ParsedHunk {
213 old_start: new_start_1based,
214 old_count: filtered_old_count,
215 new_start: new_start_1based,
216 new_count: filtered_new_count,
217 lines: filtered_lines,
218 });
219 }
220
221 cumulative_line_offset += hunk_line_delta;
222 }
223
224 let mut result = String::new();
225 for hunk in &filtered_hunks {
226 result.push_str(&format_hunk(hunk));
227 }
228
229 (result, cumulative_line_offset)
230}
231
232fn compute_excerpt_aware_reversal_overlap(
233 edit_history_diffs: &[&str],
234 excerpt_content: &str,
235 excerpt_start_row: u32,
236 predicted_content: &str,
237) -> ReversalOverlap {
238 let mut current_content = excerpt_content.to_string();
239 let mut current_excerpt_start_row = excerpt_start_row;
240
241 for diff in edit_history_diffs.iter().rev() {
242 if diff.is_empty() {
243 continue;
244 }
245
246 let current_row_count = current_content.lines().count() as u32;
247 let (filtered_diff, _line_offset) =
248 filter_diff_hunks_by_excerpt(diff, current_excerpt_start_row, current_row_count.max(1));
249
250 if filtered_diff.is_empty() {
251 let hunks = parse_diff_hunks(diff);
252 for hunk in hunks {
253 let hunk_end = hunk.new_start.saturating_sub(1) + hunk.new_count;
254 if hunk_end <= current_excerpt_start_row {
255 let additions: u32 = hunk
256 .lines
257 .iter()
258 .filter(|l| matches!(l, HunkLine::Addition(_)))
259 .count() as u32;
260 let deletions: u32 = hunk
261 .lines
262 .iter()
263 .filter(|l| matches!(l, HunkLine::Deletion(_)))
264 .count() as u32;
265 if additions >= deletions {
266 current_excerpt_start_row =
267 current_excerpt_start_row.saturating_sub(additions - deletions);
268 } else {
269 current_excerpt_start_row += deletions - additions;
270 }
271 }
272 }
273 continue;
274 }
275
276 let reversed = reverse_diff(&format!("--- a/file\n+++ b/file\n{}", filtered_diff));
277 match apply_diff_to_string(&reversed, ¤t_content) {
278 Ok(updated) => {
279 current_content = updated;
280 }
281 Err(_) => {
282 continue;
283 }
284 }
285
286 let hunks = parse_diff_hunks(diff);
287 for hunk in hunks {
288 let hunk_end = hunk.new_start.saturating_sub(1) + hunk.new_count;
289 if hunk_end <= current_excerpt_start_row {
290 let additions: u32 = hunk
291 .lines
292 .iter()
293 .filter(|l| matches!(l, HunkLine::Addition(_)))
294 .count() as u32;
295 let deletions: u32 = hunk
296 .lines
297 .iter()
298 .filter(|l| matches!(l, HunkLine::Deletion(_)))
299 .count() as u32;
300 if additions >= deletions {
301 current_excerpt_start_row =
302 current_excerpt_start_row.saturating_sub(additions - deletions);
303 } else {
304 current_excerpt_start_row += deletions - additions;
305 }
306 }
307 }
308 }
309
310 compute_reversal_overlap(¤t_content, excerpt_content, predicted_content)
311}
312
313fn reverse_diff(diff: &str) -> String {
314 let mut result: String = diff
315 .lines()
316 .map(|line| {
317 if line.starts_with("--- ") {
318 line.replacen("--- ", "+++ ", 1)
319 } else if line.starts_with("+++ ") {
320 line.replacen("+++ ", "--- ", 1)
321 } else if line.starts_with('+') && !line.starts_with("+++") {
322 format!("-{}", &line[1..])
323 } else if line.starts_with('-') && !line.starts_with("---") {
324 format!("+{}", &line[1..])
325 } else {
326 line.to_string()
327 }
328 })
329 .collect::<Vec<_>>()
330 .join("\n");
331 if diff.ends_with('\n') {
332 result.push('\n');
333 }
334 result
335}
336
337#[derive(Debug, Clone, PartialEq, Eq)]
338struct GranularEdit {
339 range: Range<usize>,
340 old_text: String,
341 new_text: String,
342}
343
344fn compute_granular_edits(old_text: &str, new_text: &str) -> Vec<GranularEdit> {
345 text_diff(old_text, new_text)
346 .into_iter()
347 .map(|(range, new_text)| GranularEdit {
348 old_text: old_text[range.clone()].to_string(),
349 range,
350 new_text: new_text.to_string(),
351 })
352 .collect()
353}
354
355#[derive(Debug, Clone)]
356struct HistoryAdditionRange {
357 range_in_current: Range<usize>,
358}
359
360#[derive(Debug, Clone)]
361struct HistoryDeletionRange {
362 deleted_text: String,
363 position_in_current: usize,
364}
365
366fn compute_history_addition_ranges(history_edits: &[GranularEdit]) -> Vec<HistoryAdditionRange> {
367 let mut result = Vec::new();
368 let mut offset_delta: isize = 0;
369
370 for edit in history_edits {
371 if !edit.new_text.is_empty() {
372 let new_start = (edit.range.start as isize + offset_delta) as usize;
373 let new_end = new_start + edit.new_text.len();
374 result.push(HistoryAdditionRange {
375 range_in_current: new_start..new_end,
376 });
377 }
378
379 offset_delta += edit.new_text.len() as isize - edit.old_text.len() as isize;
380 }
381
382 result
383}
384
385fn compute_history_deletion_ranges(history_edits: &[GranularEdit]) -> Vec<HistoryDeletionRange> {
386 let mut result = Vec::new();
387 let mut offset_delta: isize = 0;
388
389 for edit in history_edits {
390 if !edit.old_text.is_empty() {
391 let position_in_current = (edit.range.start as isize + offset_delta) as usize;
392 result.push(HistoryDeletionRange {
393 deleted_text: edit.old_text.clone(),
394 position_in_current,
395 });
396 }
397
398 offset_delta += edit.new_text.len() as isize - edit.old_text.len() as isize;
399 }
400
401 result
402}
403
404#[derive(Debug, Clone, Default, PartialEq, Eq)]
405struct ReversalOverlap {
406 chars_reversing_user_edits: usize,
407 total_chars_in_prediction: usize,
408}
409
410impl ReversalOverlap {
411 fn ratio(&self) -> f32 {
412 if self.total_chars_in_prediction == 0 {
413 0.0
414 } else {
415 self.chars_reversing_user_edits as f32 / self.total_chars_in_prediction as f32
416 }
417 }
418}
419
420/// Normalize edits where `old_text` appears as a subsequence within `new_text` (extension),
421/// or where `new_text` appears as a subsequence within `old_text` (reduction).
422///
423/// For extensions: when the user's text is preserved (in order) within the prediction,
424/// we only count the newly inserted characters, not the preserved ones.
425/// E.g., "epr" → "eprintln!()" becomes 8 inserted chars ("intln!()")
426/// E.g., "test_my_function" → "a_test_for_my_special_function_plz" becomes 18 inserted chars
427///
428/// For reductions: when the prediction's text is preserved (in order) within the original,
429/// we only count the deleted characters, not the preserved ones.
430/// E.g., "ifrom" → "from" becomes 1 deleted char ("i")
431fn normalize_extension_edits(edits: Vec<GranularEdit>) -> Vec<GranularEdit> {
432 edits
433 .into_iter()
434 .flat_map(|edit| {
435 if edit.old_text.is_empty() || edit.new_text.is_empty() {
436 return vec![edit];
437 }
438
439 // Use character-wise diff to find exact byte ranges of changes
440 let char_edits = char_diff(&edit.old_text, &edit.new_text);
441
442 let all_deletions = !char_edits.is_empty()
443 && char_edits
444 .iter()
445 .all(|(range, replacement)| !range.is_empty() && replacement.is_empty());
446 let all_insertions = !char_edits.is_empty()
447 && char_edits
448 .iter()
449 .all(|(range, replacement)| range.is_empty() && !replacement.is_empty());
450 if all_deletions || all_insertions {
451 return char_edits
452 .into_iter()
453 .map(|(range, replacement)| GranularEdit {
454 range: edit.range.start + range.start..edit.range.start + range.end,
455 old_text: edit.old_text[range].to_string(),
456 new_text: replacement.to_string(),
457 })
458 .collect();
459 }
460
461 // Otherwise, keep the original edit (mixed changes)
462 vec![edit]
463 })
464 .collect()
465}
466
467fn compute_reversal_overlap(
468 original_content: &str,
469 current_content: &str,
470 predicted_content: &str,
471) -> ReversalOverlap {
472 let history_edits =
473 normalize_extension_edits(compute_granular_edits(original_content, current_content));
474 let prediction_edits =
475 normalize_extension_edits(compute_granular_edits(current_content, predicted_content));
476
477 let history_addition_ranges = compute_history_addition_ranges(&history_edits);
478 let history_deletion_ranges = compute_history_deletion_ranges(&history_edits);
479
480 let reversed_additions =
481 compute_reversed_additions(&history_addition_ranges, &prediction_edits);
482 let restored_deletions =
483 compute_restored_deletions(&history_deletion_ranges, &prediction_edits);
484
485 let total_chars_in_prediction: usize = prediction_edits
486 .iter()
487 .map(|e| e.new_text.chars().count() + e.old_text.chars().count())
488 .sum();
489
490 ReversalOverlap {
491 chars_reversing_user_edits: reversed_additions + restored_deletions,
492 total_chars_in_prediction,
493 }
494}
495
496fn compute_reversed_additions(
497 history_addition_ranges: &[HistoryAdditionRange],
498 prediction_edits: &[GranularEdit],
499) -> usize {
500 let mut reversed_chars = 0;
501
502 for pred_edit in prediction_edits {
503 for history_addition in history_addition_ranges {
504 let overlap_start = pred_edit
505 .range
506 .start
507 .max(history_addition.range_in_current.start);
508 let overlap_end = pred_edit
509 .range
510 .end
511 .min(history_addition.range_in_current.end);
512
513 if overlap_start < overlap_end {
514 let relative_start = overlap_start - pred_edit.range.start;
515 let relative_end = overlap_end - pred_edit.range.start;
516 let overlap_text = &pred_edit.old_text[relative_start..relative_end];
517 reversed_chars += overlap_text.chars().count();
518 }
519 }
520 }
521
522 reversed_chars
523}
524
525fn compute_restored_deletions(
526 history_deletion_ranges: &[HistoryDeletionRange],
527 prediction_edits: &[GranularEdit],
528) -> usize {
529 let mut restored = 0;
530
531 for pred_edit in prediction_edits {
532 if pred_edit.new_text.is_empty() {
533 continue;
534 }
535
536 for deletion in history_deletion_ranges {
537 if pred_edit.range.contains(&deletion.position_in_current)
538 || deletion.position_in_current == pred_edit.range.start
539 {
540 restored += compute_lcs_length(&deletion.deleted_text, &pred_edit.new_text);
541 }
542 }
543 }
544
545 restored
546}
547
548fn compute_lcs_length(a: &str, b: &str) -> usize {
549 let a_chars: Vec<char> = a.chars().collect();
550 let b_chars: Vec<char> = b.chars().collect();
551 let m = a_chars.len();
552 let n = b_chars.len();
553
554 if m == 0 || n == 0 {
555 return 0;
556 }
557
558 let mut prev = vec![0; n + 1];
559 let mut curr = vec![0; n + 1];
560
561 for i in 1..=m {
562 for j in 1..=n {
563 if a_chars[i - 1] == b_chars[j - 1] {
564 curr[j] = prev[j - 1] + 1;
565 } else {
566 curr[j] = prev[j].max(curr[j - 1]);
567 }
568 }
569 std::mem::swap(&mut prev, &mut curr);
570 curr.fill(0);
571 }
572
573 prev[n]
574}
575
576fn filter_edit_history_by_path<'a>(
577 edit_history: &'a [Arc<zeta_prompt::Event>],
578 cursor_path: &std::path::Path,
579) -> Vec<&'a zeta_prompt::Event> {
580 edit_history
581 .iter()
582 .filter(|event| match event.as_ref() {
583 zeta_prompt::Event::BufferChange { path, .. } => {
584 let event_path = path.as_ref();
585 if event_path == cursor_path {
586 return true;
587 }
588 let stripped = event_path
589 .components()
590 .skip(1)
591 .collect::<std::path::PathBuf>();
592 stripped == cursor_path
593 }
594 })
595 .map(|arc| arc.as_ref())
596 .collect()
597}
598
599fn extract_diff_from_event(event: &zeta_prompt::Event) -> &str {
600 match event {
601 zeta_prompt::Event::BufferChange { diff, .. } => diff.as_str(),
602 }
603}
604
605fn is_predicted_event(event: &zeta_prompt::Event) -> bool {
606 match event {
607 zeta_prompt::Event::BufferChange { predicted, .. } => *predicted,
608 }
609}
610
611pub fn compute_prediction_reversal_ratio(
612 prompt_inputs: &ZetaPromptInput,
613 predicted_content: &str,
614 cursor_path: &Path,
615) -> f32 {
616 let current_content: &str = prompt_inputs.cursor_excerpt.as_ref();
617
618 let edit_history: &[Arc<zeta_prompt::Event>] = &prompt_inputs.events;
619 let relevant_events = filter_edit_history_by_path(edit_history, cursor_path);
620
621 let most_recent = match relevant_events.last() {
622 Some(event) if !is_predicted_event(event) => *event,
623 _ => return 0.0,
624 };
625
626 let diff = extract_diff_from_event(most_recent);
627 if diff.is_empty() {
628 return 0.0;
629 }
630
631 if let Some(excerpt_start_row) = prompt_inputs.excerpt_start_row {
632 let diffs = vec![diff];
633 let overlap = compute_excerpt_aware_reversal_overlap(
634 &diffs,
635 current_content,
636 excerpt_start_row,
637 predicted_content,
638 );
639 return overlap.ratio();
640 }
641
642 let reversed = reverse_diff(diff);
643 let with_headers = format!("--- a/file\n+++ b/file\n{}", reversed);
644 let original_content = match apply_diff_to_string(&with_headers, current_content) {
645 Ok(updated_content) => updated_content,
646 Err(_) => apply_diff_to_string_lenient(&reversed, current_content),
647 };
648
649 let overlap = compute_reversal_overlap(&original_content, current_content, predicted_content);
650 overlap.ratio()
651}
652
653#[cfg(test)]
654mod tests {
655 use super::*;
656 use edit_prediction::udiff::apply_diff_to_string;
657 use indoc::indoc;
658
659 fn make_test_prompt_inputs(
660 content: &str,
661 events: Vec<Arc<zeta_prompt::Event>>,
662 excerpt_start_row: Option<u32>,
663 ) -> ZetaPromptInput {
664 ZetaPromptInput {
665 cursor_path: Arc::from(Path::new("src/test.rs")),
666 cursor_excerpt: content.into(),
667 editable_range_in_excerpt: 0..content.len(),
668 cursor_offset_in_excerpt: 0,
669 excerpt_start_row,
670 events,
671 related_files: Vec::new(),
672 excerpt_ranges: None,
673 preferred_model: None,
674 in_open_source_repo: false,
675 can_collect_data: false,
676 }
677 }
678
679 #[test]
680 fn test_reversal_overlap() {
681 struct Case {
682 name: &'static str,
683 original: &'static str,
684 current: &'static str,
685 predicted: &'static str,
686 expected_reversal_chars: usize,
687 expected_total_chars: usize,
688 }
689
690 let cases = [
691 Case {
692 name: "user_adds_line_prediction_removes_it",
693 original: indoc! {"
694 a
695 b
696 c"},
697 current: indoc! {"
698 a
699 new line
700 b
701 c"},
702 predicted: indoc! {"
703 a
704 b
705 c"},
706 expected_reversal_chars: 9,
707 expected_total_chars: 9,
708 },
709 Case {
710 name: "user_deletes_line_prediction_restores_it",
711 original: indoc! {"
712 a
713 deleted
714 b"},
715 current: indoc! {"
716 a
717 b"},
718 predicted: indoc! {"
719 a
720 deleted
721 b"},
722 expected_reversal_chars: 8,
723 expected_total_chars: 8,
724 },
725 Case {
726 name: "user_deletes_text_prediction_restores_partial",
727 original: "hello beautiful world",
728 current: "hello world",
729 predicted: "hello beautiful world",
730 expected_reversal_chars: 10,
731 expected_total_chars: 10,
732 },
733 Case {
734 name: "user_deletes_foo_prediction_adds_bar",
735 original: "foo",
736 current: "",
737 predicted: "bar",
738 expected_reversal_chars: 0,
739 expected_total_chars: 3,
740 },
741 Case {
742 name: "independent_edits_different_locations",
743 original: indoc! {"
744 line1
745 line2
746 line3"},
747 current: indoc! {"
748 LINE1
749 line2
750 line3"},
751 predicted: indoc! {"
752 LINE1
753 line2
754 LINE3"},
755 expected_reversal_chars: 0,
756 expected_total_chars: 10,
757 },
758 Case {
759 name: "no_history_edits",
760 original: "same",
761 current: "same",
762 predicted: "different",
763 expected_reversal_chars: 0,
764 expected_total_chars: 13,
765 },
766 Case {
767 name: "user_replaces_text_prediction_reverses",
768 original: indoc! {"
769 keep
770 delete_me
771 keep2"},
772 current: indoc! {"
773 keep
774 added
775 keep2"},
776 predicted: indoc! {"
777 keep
778 delete_me
779 keep2"},
780 expected_reversal_chars: 14,
781 expected_total_chars: 14,
782 },
783 Case {
784 name: "user_modifies_word_prediction_modifies_differently",
785 original: "the quick brown fox",
786 current: "the slow brown fox",
787 predicted: "the fast brown fox",
788 expected_reversal_chars: 4,
789 expected_total_chars: 8,
790 },
791 Case {
792 name: "user finishes function name (suffix)",
793 original: "",
794 current: "epr",
795 predicted: "eprintln!()",
796 expected_reversal_chars: 0,
797 expected_total_chars: 8,
798 },
799 Case {
800 name: "user starts function name (prefix)",
801 original: "",
802 current: "my_function()",
803 predicted: "test_my_function()",
804 expected_reversal_chars: 0,
805 expected_total_chars: 5,
806 },
807 Case {
808 name: "user types partial, prediction extends in multiple places",
809 original: "",
810 current: "test_my_function",
811 predicted: "a_test_for_my_special_function_plz",
812 expected_reversal_chars: 0,
813 expected_total_chars: 18,
814 },
815 // Edge cases for subsequence matching
816 Case {
817 name: "subsequence with interleaved underscores",
818 original: "",
819 current: "a_b_c",
820 predicted: "_a__b__c__",
821 expected_reversal_chars: 0,
822 expected_total_chars: 5,
823 },
824 Case {
825 name: "not a subsequence - different characters",
826 original: "",
827 current: "abc",
828 predicted: "xyz",
829 expected_reversal_chars: 3,
830 expected_total_chars: 6,
831 },
832 Case {
833 name: "not a subsequence - wrong order",
834 original: "",
835 current: "abc",
836 predicted: "cba",
837 expected_reversal_chars: 3,
838 expected_total_chars: 6,
839 },
840 Case {
841 name: "partial subsequence - only some chars match",
842 original: "",
843 current: "abcd",
844 predicted: "axbx",
845 expected_reversal_chars: 4,
846 expected_total_chars: 8,
847 },
848 // Common completion patterns
849 Case {
850 name: "completing a method call",
851 original: "",
852 current: "vec.pu",
853 predicted: "vec.push(item)",
854 expected_reversal_chars: 0,
855 expected_total_chars: 8,
856 },
857 Case {
858 name: "completing an import statement",
859 original: "",
860 current: "use std::col",
861 predicted: "use std::collections::HashMap",
862 expected_reversal_chars: 0,
863 expected_total_chars: 17,
864 },
865 Case {
866 name: "completing a struct field",
867 original: "",
868 current: "name: St",
869 predicted: "name: String",
870 expected_reversal_chars: 0,
871 expected_total_chars: 4,
872 },
873 Case {
874 name: "prediction replaces with completely different text",
875 original: "",
876 current: "hello",
877 predicted: "world",
878 expected_reversal_chars: 5,
879 expected_total_chars: 10,
880 },
881 Case {
882 name: "empty prediction removes user text",
883 original: "",
884 current: "mistake",
885 predicted: "",
886 expected_reversal_chars: 7,
887 expected_total_chars: 7,
888 },
889 Case {
890 name: "fixing typo is not reversal",
891 original: "",
892 current: "<dv",
893 predicted: "<div>",
894 expected_reversal_chars: 0,
895 expected_total_chars: 2,
896 },
897 Case {
898 name: "infix insertion not reversal",
899 original: indoc! {"
900 from my_project import Foo
901 "},
902 current: indoc! {"
903 ifrom my_project import Foo
904 "},
905 predicted: indoc! {"
906 import
907 from my_project import Foo
908 "},
909 expected_reversal_chars: 0,
910 expected_total_chars: 6,
911 },
912 Case {
913 name: "non-word based reversal",
914 original: "from",
915 current: "ifrom",
916 predicted: "from",
917 expected_reversal_chars: 1,
918 expected_total_chars: 1,
919 },
920 Case {
921 name: "multiple insertions no reversal",
922 original: "print(\"Hello, World!\")",
923 current: "sys.(\"Hello, World!\")",
924 predicted: "sys.stdout.write(\"Hello, World!\\n\")",
925 expected_reversal_chars: 0,
926 expected_total_chars: 14,
927 },
928 ];
929
930 for case in &cases {
931 let overlap = compute_reversal_overlap(case.original, case.current, case.predicted);
932 assert_eq!(
933 overlap.chars_reversing_user_edits, case.expected_reversal_chars,
934 "Test '{}': expected {} reversal chars, got {}",
935 case.name, case.expected_reversal_chars, overlap.chars_reversing_user_edits
936 );
937 assert_eq!(
938 overlap.total_chars_in_prediction, case.expected_total_chars,
939 "Test '{}': expected {} total chars, got {}",
940 case.name, case.expected_total_chars, overlap.total_chars_in_prediction
941 );
942 }
943 }
944
945 #[test]
946 fn test_reverse_diff() {
947 let forward_diff = indoc! {"
948 --- a/file.rs
949 +++ b/file.rs
950 @@ -1,3 +1,4 @@
951 fn main() {
952 + let x = 42;
953 println!(\"hello\");
954 }"};
955
956 let reversed = reverse_diff(forward_diff);
957
958 assert!(
959 reversed.contains("+++ a/file.rs"),
960 "Should have +++ for old path"
961 );
962 assert!(
963 reversed.contains("--- b/file.rs"),
964 "Should have --- for new path"
965 );
966 assert!(
967 reversed.contains("- let x = 42;"),
968 "Added line should become deletion"
969 );
970 assert!(
971 reversed.contains(" fn main()"),
972 "Context lines should be unchanged"
973 );
974 }
975
976 #[test]
977 fn test_reverse_diff_roundtrip() {
978 // Applying a diff and then its reverse should get back to original
979 let original = indoc! {"
980 first line
981 hello world
982 last line
983 "};
984 let modified = indoc! {"
985 first line
986 hello beautiful world
987 last line
988 "};
989
990 // unified_diff doesn't include file headers, but apply_diff_to_string needs them
991 let diff_body = language::unified_diff(original, modified);
992 let forward_diff = format!("--- a/file\n+++ b/file\n{}", diff_body);
993 let reversed_diff = reverse_diff(&forward_diff);
994
995 // Apply forward diff to original
996 let after_forward = apply_diff_to_string(&forward_diff, original).unwrap();
997 assert_eq!(after_forward, modified);
998
999 // Apply reversed diff to modified
1000 let after_reverse = apply_diff_to_string(&reversed_diff, &after_forward).unwrap();
1001 assert_eq!(after_reverse, original);
1002 }
1003
1004 #[test]
1005 fn test_filter_edit_history_by_path() {
1006 // Test that filter_edit_history_by_path correctly matches paths when
1007 // the edit history has paths with a repo prefix (e.g., "repo/src/file.rs")
1008 // but the cursor_path doesn't have the repo prefix (e.g., "src/file.rs")
1009 let events = vec![
1010 Arc::new(zeta_prompt::Event::BufferChange {
1011 path: Arc::from(Path::new("myrepo/src/file.rs")),
1012 old_path: Arc::from(Path::new("myrepo/src/file.rs")),
1013 diff: indoc! {"
1014 @@ -1 +1 @@
1015 -old
1016 +new"}
1017 .into(),
1018 predicted: false,
1019 in_open_source_repo: true,
1020 }),
1021 Arc::new(zeta_prompt::Event::BufferChange {
1022 path: Arc::from(Path::new("myrepo/other.rs")),
1023 old_path: Arc::from(Path::new("myrepo/other.rs")),
1024 diff: indoc! {"
1025 @@ -1 +1 @@
1026 -a
1027 +b"}
1028 .into(),
1029 predicted: false,
1030 in_open_source_repo: true,
1031 }),
1032 Arc::new(zeta_prompt::Event::BufferChange {
1033 path: Arc::from(Path::new("src/file.rs")),
1034 old_path: Arc::from(Path::new("src/file.rs")),
1035 diff: indoc! {"
1036 @@ -1 +1 @@
1037 -x
1038 +y"}
1039 .into(),
1040 predicted: false,
1041 in_open_source_repo: true,
1042 }),
1043 ];
1044
1045 // "myrepo/src/file.rs" stripped -> "src/file.rs" matches cursor_path
1046 // "src/file.rs" exact match
1047 let cursor_path = Path::new("src/file.rs");
1048 let filtered = filter_edit_history_by_path(&events, cursor_path);
1049 assert_eq!(
1050 filtered.len(),
1051 2,
1052 "Should match myrepo/src/file.rs (stripped) and src/file.rs (exact)"
1053 );
1054
1055 // "myrepo/src/file.rs" stripped -> "src/file.rs" != "file.rs"
1056 // "src/file.rs" stripped -> "file.rs" == "file.rs"
1057 let cursor_path = Path::new("file.rs");
1058 let filtered = filter_edit_history_by_path(&events, cursor_path);
1059 assert_eq!(
1060 filtered.len(),
1061 1,
1062 "Should only match src/file.rs (stripped to file.rs)"
1063 );
1064
1065 // "myrepo/other.rs" stripped -> "other.rs" == "other.rs"
1066 let cursor_path = Path::new("other.rs");
1067 let filtered = filter_edit_history_by_path(&events, cursor_path);
1068 assert_eq!(filtered.len(), 1, "Should match only myrepo/other.rs");
1069 }
1070
1071 #[test]
1072 fn test_reverse_diff_preserves_trailing_newline() {
1073 let diff_with_trailing_newline = indoc! {"
1074 --- a/file
1075 +++ b/file
1076 @@ -1 +1 @@
1077 -old
1078 +new
1079 "};
1080 let reversed = reverse_diff(diff_with_trailing_newline);
1081 assert!(
1082 reversed.ends_with('\n'),
1083 "Reversed diff should preserve trailing newline"
1084 );
1085
1086 let diff_without_trailing_newline = indoc! {"
1087 --- a/file
1088 +++ b/file
1089 @@ -1 +1 @@
1090 -old
1091 +new"};
1092 let reversed = reverse_diff(diff_without_trailing_newline);
1093 assert!(
1094 !reversed.ends_with('\n'),
1095 "Reversed diff should not add trailing newline if original didn't have one"
1096 );
1097 }
1098
1099 #[test]
1100 fn test_filter_hunks_by_excerpt_region() {
1101 struct Case {
1102 name: &'static str,
1103 diff: &'static str,
1104 excerpt_start_row: u32,
1105 excerpt_row_count: u32,
1106 expected_filtered_diff: &'static str,
1107 expected_line_offset: i32,
1108 }
1109
1110 let cases = [
1111 Case {
1112 name: "hunk_entirely_before_excerpt",
1113 diff: indoc! {"
1114 @@ -1,3 +1,4 @@
1115 line1
1116 +inserted
1117 line2
1118 line3
1119 "},
1120 excerpt_start_row: 10,
1121 excerpt_row_count: 5,
1122 expected_filtered_diff: "",
1123 expected_line_offset: 1,
1124 },
1125 Case {
1126 name: "hunk_entirely_inside_excerpt",
1127 diff: indoc! {"
1128 @@ -12,3 +12,4 @@
1129 line12
1130 +inserted
1131 line13
1132 line14
1133 "},
1134 excerpt_start_row: 10,
1135 excerpt_row_count: 10,
1136 expected_filtered_diff: indoc! {"
1137 @@ -2,3 +2,4 @@
1138 line12
1139 +inserted
1140 line13
1141 line14
1142 "},
1143 expected_line_offset: 1,
1144 },
1145 Case {
1146 name: "hunk_entirely_after_excerpt",
1147 diff: indoc! {"
1148 @@ -50,3 +50,4 @@
1149 line50
1150 +inserted
1151 line51
1152 line52
1153 "},
1154 excerpt_start_row: 10,
1155 excerpt_row_count: 5,
1156 expected_filtered_diff: "",
1157 expected_line_offset: 0,
1158 },
1159 Case {
1160 name: "hunk_straddles_excerpt_start",
1161 diff: indoc! {"
1162 @@ -8,5 +8,6 @@
1163 line8
1164 line9
1165 +inserted
1166 line10
1167 line11
1168 line12
1169 "},
1170 excerpt_start_row: 10,
1171 excerpt_row_count: 10,
1172 expected_filtered_diff: indoc! {"
1173 @@ -1,3 +1,3 @@
1174 line10
1175 line11
1176 line12
1177 "},
1178 expected_line_offset: 1,
1179 },
1180 Case {
1181 name: "hunk_straddles_excerpt_end",
1182 diff: indoc! {"
1183 @@ -18,5 +18,6 @@
1184 line18
1185 line19
1186 +inserted
1187 line20
1188 line21
1189 line22
1190 "},
1191 excerpt_start_row: 10,
1192 excerpt_row_count: 10,
1193 expected_filtered_diff: indoc! {"
1194 @@ -8,2 +8,3 @@
1195 line18
1196 line19
1197 +inserted
1198 "},
1199 expected_line_offset: 1,
1200 },
1201 Case {
1202 name: "multiple_hunks_mixed",
1203 diff: indoc! {"
1204 @@ -1,2 +1,3 @@
1205 line1
1206 +before_excerpt
1207 line2
1208 @@ -12,2 +13,3 @@
1209 line12
1210 +inside_excerpt
1211 line13
1212 @@ -50,2 +52,3 @@
1213 line50
1214 +after_excerpt
1215 line51
1216 "},
1217 excerpt_start_row: 10,
1218 excerpt_row_count: 10,
1219 expected_filtered_diff: indoc! {"
1220 @@ -3,2 +3,3 @@
1221 line12
1222 +inside_excerpt
1223 line13
1224 "},
1225 expected_line_offset: 2,
1226 },
1227 Case {
1228 name: "deletion_before_excerpt",
1229 diff: indoc! {"
1230 @@ -1,4 +1,3 @@
1231 line1
1232 -deleted
1233 line2
1234 line3
1235 "},
1236 excerpt_start_row: 10,
1237 excerpt_row_count: 5,
1238 expected_filtered_diff: "",
1239 expected_line_offset: -1,
1240 },
1241 Case {
1242 name: "deletion_inside_excerpt",
1243 diff: indoc! {"
1244 @@ -12,4 +12,3 @@
1245 line12
1246 -deleted
1247 line13
1248 line14
1249 "},
1250 excerpt_start_row: 10,
1251 excerpt_row_count: 10,
1252 expected_filtered_diff: indoc! {"
1253 @@ -2,4 +2,3 @@
1254 line12
1255 -deleted
1256 line13
1257 line14
1258 "},
1259 expected_line_offset: -1,
1260 },
1261 Case {
1262 name: "empty_diff",
1263 diff: "",
1264 excerpt_start_row: 10,
1265 excerpt_row_count: 5,
1266 expected_filtered_diff: "",
1267 expected_line_offset: 0,
1268 },
1269 Case {
1270 name: "hunk_spans_entire_excerpt",
1271 diff: indoc! {"
1272 @@ -8,10 +8,12 @@
1273 line8
1274 line9
1275 line10
1276 line11
1277 +inserted1
1278 line12
1279 line13
1280 +inserted2
1281 line14
1282 line15
1283 line16
1284 line17
1285 "},
1286 excerpt_start_row: 10,
1287 excerpt_row_count: 5,
1288 expected_filtered_diff: indoc! {"
1289 @@ -1,3 +1,5 @@
1290 line11
1291 +inserted1
1292 line12
1293 line13
1294 +inserted2
1295 "},
1296 expected_line_offset: 2,
1297 },
1298 Case {
1299 name: "replacement_inside_excerpt",
1300 diff: indoc! {"
1301 @@ -12,3 +12,3 @@
1302 line12
1303 -old_text
1304 +new_text
1305 line14
1306 "},
1307 excerpt_start_row: 10,
1308 excerpt_row_count: 10,
1309 expected_filtered_diff: indoc! {"
1310 @@ -2,3 +2,3 @@
1311 line12
1312 -old_text
1313 +new_text
1314 line14
1315 "},
1316 expected_line_offset: 0,
1317 },
1318 ];
1319
1320 for case in &cases {
1321 let (filtered, line_offset) = filter_diff_hunks_by_excerpt(
1322 case.diff,
1323 case.excerpt_start_row,
1324 case.excerpt_row_count,
1325 );
1326 assert_eq!(
1327 filtered, case.expected_filtered_diff,
1328 "Test '{}': filtered diff mismatch.\nExpected:\n{}\nGot:\n{}",
1329 case.name, case.expected_filtered_diff, filtered
1330 );
1331 assert_eq!(
1332 line_offset, case.expected_line_offset,
1333 "Test '{}': line offset mismatch. Expected {}, got {}",
1334 case.name, case.expected_line_offset, line_offset
1335 );
1336 }
1337 }
1338
1339 #[test]
1340 fn test_excerpt_aware_reversal_tracking() {
1341 struct Case {
1342 name: &'static str,
1343 edit_history_diffs: Vec<&'static str>,
1344 excerpt_content: &'static str,
1345 excerpt_start_row: u32,
1346 predicted_content: &'static str,
1347 expected_reversal_chars: usize,
1348 expected_total_chars: usize,
1349 }
1350
1351 let cases = [
1352 Case {
1353 name: "edit_outside_excerpt_no_reversal",
1354 edit_history_diffs: vec![indoc! {"
1355 @@ -1,2 +1,3 @@
1356 line1
1357 +added_outside
1358 line2
1359 "}],
1360 excerpt_content: indoc! {"
1361 line10
1362 line11
1363 line12
1364 "},
1365 excerpt_start_row: 10,
1366 predicted_content: indoc! {"
1367 line10
1368 modified
1369 line12
1370 "},
1371 expected_reversal_chars: 0,
1372 expected_total_chars: 14,
1373 },
1374 Case {
1375 name: "edit_inside_excerpt_with_reversal",
1376 edit_history_diffs: vec![indoc! {"
1377 @@ -10,3 +10,4 @@
1378 line10
1379 +user_added
1380 line11
1381 line12
1382 "}],
1383 excerpt_content: indoc! {"
1384 line10
1385 user_added
1386 line11
1387 line12
1388 "},
1389 excerpt_start_row: 10,
1390 predicted_content: indoc! {"
1391 line10
1392 line11
1393 line12
1394 "},
1395 expected_reversal_chars: 11,
1396 expected_total_chars: 11,
1397 },
1398 Case {
1399 name: "straddling_edit_partial_reversal",
1400 edit_history_diffs: vec![indoc! {"
1401 @@ -8,6 +8,8 @@
1402 line8
1403 line9
1404 +before_excerpt
1405 line10
1406 +inside_excerpt
1407 line11
1408 line12
1409 line13
1410 "}],
1411 excerpt_content: indoc! {"
1412 line10
1413 inside_excerpt
1414 line11
1415 line12
1416 line13
1417 "},
1418 excerpt_start_row: 10,
1419 predicted_content: indoc! {"
1420 line10
1421 line11
1422 line12
1423 line13
1424 "},
1425 expected_reversal_chars: 15,
1426 expected_total_chars: 15,
1427 },
1428 Case {
1429 name: "multiple_edits_mixed_locations",
1430 edit_history_diffs: vec![
1431 indoc! {"
1432 @@ -1,2 +1,3 @@
1433 line1
1434 +outside1
1435 line2
1436 "},
1437 indoc! {"
1438 @@ -11,2 +12,3 @@
1439 line11
1440 +inside1
1441 line12
1442 "},
1443 ],
1444 excerpt_content: indoc! {"
1445 line10
1446 line11
1447 inside1
1448 line12
1449 line13
1450 "},
1451 excerpt_start_row: 10,
1452 predicted_content: indoc! {"
1453 line10
1454 line11
1455 line12
1456 line13
1457 "},
1458 expected_reversal_chars: 8,
1459 expected_total_chars: 8,
1460 },
1461 Case {
1462 name: "no_edit_history",
1463 edit_history_diffs: vec![],
1464 excerpt_content: indoc! {"
1465 line10
1466 line11
1467 line12
1468 "},
1469 excerpt_start_row: 10,
1470 predicted_content: indoc! {"
1471 line10
1472 modified
1473 line12
1474 "},
1475 expected_reversal_chars: 0,
1476 expected_total_chars: 14,
1477 },
1478 Case {
1479 name: "edit_after_excerpt_no_effect",
1480 edit_history_diffs: vec![indoc! {"
1481 @@ -50,2 +50,3 @@
1482 line50
1483 +added_after
1484 line51
1485 "}],
1486 excerpt_content: indoc! {"
1487 line10
1488 line11
1489 line12
1490 "},
1491 excerpt_start_row: 10,
1492 predicted_content: indoc! {"
1493 line10
1494 changed
1495 line12
1496 "},
1497 expected_reversal_chars: 0,
1498 expected_total_chars: 13,
1499 },
1500 Case {
1501 name: "line_offset_tracking_across_hunks",
1502 edit_history_diffs: vec![
1503 indoc! {"
1504 @@ -1,2 +1,4 @@
1505 line1
1506 +added1
1507 +added2
1508 line2
1509 "},
1510 indoc! {"
1511 @@ -12,2 +14,3 @@
1512 line12
1513 +inside_after_offset
1514 line13
1515 "},
1516 ],
1517 excerpt_content: indoc! {"
1518 line10
1519 line11
1520 line12
1521 inside_after_offset
1522 line13
1523 "},
1524 excerpt_start_row: 10,
1525 predicted_content: indoc! {"
1526 line10
1527 line11
1528 line12
1529 line13
1530 "},
1531 expected_reversal_chars: 20,
1532 expected_total_chars: 20,
1533 },
1534 ];
1535
1536 for case in &cases {
1537 let overlap = compute_excerpt_aware_reversal_overlap(
1538 &case.edit_history_diffs,
1539 case.excerpt_content,
1540 case.excerpt_start_row,
1541 case.predicted_content,
1542 );
1543 assert_eq!(
1544 overlap.chars_reversing_user_edits, case.expected_reversal_chars,
1545 "Test '{}': expected {} reversal chars, got {}",
1546 case.name, case.expected_reversal_chars, overlap.chars_reversing_user_edits
1547 );
1548 assert_eq!(
1549 overlap.total_chars_in_prediction, case.expected_total_chars,
1550 "Test '{}': expected {} total chars, got {}",
1551 case.name, case.expected_total_chars, overlap.total_chars_in_prediction
1552 );
1553 }
1554 }
1555
1556 #[test]
1557 fn test_lenient_diff_application() {
1558 struct Case {
1559 name: &'static str,
1560 diff: &'static str,
1561 content: &'static str,
1562 expected_result: &'static str,
1563 }
1564
1565 let cases = [
1566 Case {
1567 name: "hunk_context_not_found_skipped",
1568 diff: indoc! {"
1569 @@ -1,3 +1,4 @@
1570 context_not_in_content
1571 +added_line
1572 more_context
1573 final_context
1574 "},
1575 content: indoc! {"
1576 completely
1577 different
1578 content
1579 "},
1580 expected_result: indoc! {"
1581 completely
1582 different
1583 content
1584 "},
1585 },
1586 Case {
1587 name: "hunk_context_found_applied",
1588 diff: indoc! {"
1589 @@ -1,3 +1,4 @@
1590 line1
1591 +inserted
1592 line2
1593 line3
1594 "},
1595 content: indoc! {"
1596 line1
1597 line2
1598 line3
1599 "},
1600 expected_result: indoc! {"
1601 line1
1602 inserted
1603 line2
1604 line3
1605 "},
1606 },
1607 Case {
1608 name: "multiple_hunks_partial_match",
1609 diff: indoc! {"
1610 @@ -1,2 +1,3 @@
1611 not_found
1612 +skipped
1613 also_not_found
1614 @@ -5,2 +6,3 @@
1615 line5
1616 +applied
1617 line6
1618 "},
1619 content: indoc! {"
1620 line1
1621 line2
1622 line3
1623 line4
1624 line5
1625 line6
1626 "},
1627 expected_result: indoc! {"
1628 line1
1629 line2
1630 line3
1631 line4
1632 line5
1633 applied
1634 line6
1635 "},
1636 },
1637 Case {
1638 name: "empty_diff",
1639 diff: "",
1640 content: indoc! {"
1641 unchanged
1642 content
1643 "},
1644 expected_result: indoc! {"
1645 unchanged
1646 content
1647 "},
1648 },
1649 ];
1650
1651 for case in &cases {
1652 let result = apply_diff_to_string_lenient(case.diff, case.content);
1653 assert_eq!(
1654 result, case.expected_result,
1655 "Test '{}': expected:\n{}\ngot:\n{}",
1656 case.name, case.expected_result, result
1657 );
1658 }
1659 }
1660
1661 #[test]
1662 fn test_unicode_reversal_overlap() {
1663 struct Case {
1664 name: &'static str,
1665 original: &'static str,
1666 current: &'static str,
1667 predicted: &'static str,
1668 expected_reversal_chars: usize,
1669 expected_total_chars: usize,
1670 }
1671
1672 let cases = [
1673 Case {
1674 name: "unicode_extension_cjk",
1675 original: "",
1676 current: "日", // 1 char
1677 predicted: "日本語", // 3 chars, adds 2 chars
1678 expected_reversal_chars: 0,
1679 expected_total_chars: 2, // "本語" = 2 chars added
1680 },
1681 Case {
1682 name: "unicode_extension_emoji",
1683 original: "",
1684 current: "🎉", // 1 char
1685 predicted: "🎉🎊🎈", // 3 chars, adds 2 chars
1686 expected_reversal_chars: 0,
1687 expected_total_chars: 2, // "🎊🎈" = 2 chars added
1688 },
1689 Case {
1690 name: "unicode_deletion_restored",
1691 original: "héllo wörld", // 11 chars
1692 current: "héllo", // 5 chars
1693 predicted: "héllo wörld", // restores " wörld" = 6 chars
1694 expected_reversal_chars: 6, // LCS(" wörld", " wörld") = 6 chars
1695 expected_total_chars: 6,
1696 },
1697 Case {
1698 name: "unicode_addition_reversed",
1699 original: "café", // 4 chars
1700 current: "café latté", // 10 chars, added " latté" = 6 chars
1701 predicted: "café", // removes " latté"
1702 expected_reversal_chars: 6, // 6 chars removed
1703 expected_total_chars: 6,
1704 },
1705 Case {
1706 name: "mixed_ascii_unicode",
1707 original: "",
1708 current: "test日本", // 6 chars
1709 predicted: "test日本語です", // 9 chars
1710 expected_reversal_chars: 0,
1711 expected_total_chars: 3, // 3 new chars after subsequence normalization
1712 },
1713 Case {
1714 name: "unicode_replacement_not_subsequence",
1715 original: "",
1716 current: "日本", // 2 chars
1717 predicted: "中国", // 2 chars, different
1718 expected_reversal_chars: 2, // removes "日本" = 2 chars
1719 expected_total_chars: 4, // 2 removed + 2 added
1720 },
1721 ];
1722
1723 for case in &cases {
1724 let overlap = compute_reversal_overlap(case.original, case.current, case.predicted);
1725 assert_eq!(
1726 overlap.chars_reversing_user_edits, case.expected_reversal_chars,
1727 "Test '{}': expected {} reversal chars, got {}",
1728 case.name, case.expected_reversal_chars, overlap.chars_reversing_user_edits
1729 );
1730 assert_eq!(
1731 overlap.total_chars_in_prediction, case.expected_total_chars,
1732 "Test '{}': expected {} total chars, got {}",
1733 case.name, case.expected_total_chars, overlap.total_chars_in_prediction
1734 );
1735 }
1736 }
1737
1738 #[test]
1739 fn test_compute_lcs_length() {
1740 assert_eq!(compute_lcs_length("", ""), 0);
1741 assert_eq!(compute_lcs_length("abc", ""), 0);
1742 assert_eq!(compute_lcs_length("", "abc"), 0);
1743 assert_eq!(compute_lcs_length("abc", "abc"), 3);
1744 assert_eq!(compute_lcs_length("abc", "def"), 0);
1745 assert_eq!(compute_lcs_length("abcdef", "ace"), 3);
1746 assert_eq!(compute_lcs_length("AGGTAB", "GXTXAYB"), 4);
1747 assert_eq!(compute_lcs_length("日本語", "日語"), 2);
1748 }
1749
1750 #[test]
1751 fn test_compute_prediction_reversal_ratio_full_file() {
1752 let prompt_inputs = make_test_prompt_inputs(
1753 indoc! {"
1754 line1
1755 user_added
1756 line2
1757 "},
1758 vec![Arc::new(zeta_prompt::Event::BufferChange {
1759 path: Arc::from(Path::new("src/test.rs")),
1760 old_path: Arc::from(Path::new("src/test.rs")),
1761 diff: indoc! {"
1762 @@ -1,2 +1,3 @@
1763 line1
1764 +user_added
1765 line2
1766 "}
1767 .into(),
1768 predicted: false,
1769 in_open_source_repo: false,
1770 })],
1771 None,
1772 );
1773
1774 let predicted = indoc! {"
1775 line1
1776 line2
1777 "};
1778 let ratio =
1779 compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
1780
1781 assert!(
1782 ratio > 0.9,
1783 "Expected high reversal ratio when prediction removes user addition, got {}",
1784 ratio
1785 );
1786 }
1787
1788 #[test]
1789 fn test_compute_prediction_reversal_ratio_with_excerpt() {
1790 let prompt_inputs = make_test_prompt_inputs(
1791 indoc! {"
1792 line10
1793 user_added
1794 line11
1795 "},
1796 vec![Arc::new(zeta_prompt::Event::BufferChange {
1797 path: Arc::from(Path::new("src/test.rs")),
1798 old_path: Arc::from(Path::new("src/test.rs")),
1799 diff: indoc! {"
1800 @@ -10,2 +10,3 @@
1801 line10
1802 +user_added
1803 line11
1804 "}
1805 .into(),
1806 predicted: false,
1807 in_open_source_repo: false,
1808 })],
1809 Some(10),
1810 );
1811
1812 let predicted = indoc! {"
1813 line10
1814 line11
1815 "};
1816 let ratio =
1817 compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
1818
1819 assert!(
1820 ratio > 0.9,
1821 "Expected high reversal ratio for excerpt-aware computation, got {}",
1822 ratio
1823 );
1824 }
1825
1826 #[test]
1827 fn test_compute_prediction_reversal_ratio_no_history() {
1828 let prompt_inputs = make_test_prompt_inputs(
1829 indoc! {"
1830 original content
1831 "},
1832 vec![],
1833 None,
1834 );
1835
1836 let predicted = indoc! {"
1837 completely different
1838 "};
1839 let ratio =
1840 compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
1841
1842 assert_eq!(
1843 ratio, 0.0,
1844 "Expected zero reversal ratio with no edit history"
1845 );
1846 }
1847
1848 #[test]
1849 fn test_compute_prediction_reversal_ratio_path_filtering() {
1850 let prompt_inputs = make_test_prompt_inputs(
1851 indoc! {"
1852 line1
1853 user_added
1854 line2
1855 "},
1856 vec![Arc::new(zeta_prompt::Event::BufferChange {
1857 path: Arc::from(Path::new("src/other.rs")),
1858 old_path: Arc::from(Path::new("src/other.rs")),
1859 diff: indoc! {"
1860 @@ -1,2 +1,3 @@
1861 line1
1862 +user_added
1863 line2
1864 "}
1865 .into(),
1866 predicted: false,
1867 in_open_source_repo: false,
1868 })],
1869 None,
1870 );
1871
1872 let predicted = indoc! {"
1873 line1
1874 line2
1875 "};
1876 let ratio =
1877 compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
1878
1879 assert_eq!(
1880 ratio, 0.0,
1881 "Expected zero reversal when edit history is for different file"
1882 );
1883 }
1884
1885 #[test]
1886 fn test_compute_prediction_reversal_ratio_lenient_fallback() {
1887 let prompt_inputs = make_test_prompt_inputs(
1888 indoc! {"
1889 actual_line1
1890 user_added
1891 actual_line2
1892 "},
1893 vec![Arc::new(zeta_prompt::Event::BufferChange {
1894 path: Arc::from(Path::new("src/test.rs")),
1895 old_path: Arc::from(Path::new("src/test.rs")),
1896 diff: indoc! {"
1897 @@ -1,2 +1,3 @@
1898 wrong_context
1899 +user_added
1900 more_wrong
1901 "}
1902 .into(),
1903 predicted: false,
1904 in_open_source_repo: false,
1905 })],
1906 None,
1907 );
1908
1909 let predicted = indoc! {"
1910 actual_line1
1911 actual_line2
1912 "};
1913 let ratio =
1914 compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
1915
1916 assert!(
1917 ratio >= 0.0 && ratio <= 1.0,
1918 "Ratio should be valid even with lenient fallback, got {}",
1919 ratio
1920 );
1921 }
1922
1923 #[test]
1924 fn test_excerpt_aware_reversal_error_recovery() {
1925 let diffs = vec![indoc! {"
1926 @@ -1,2 +1,3 @@
1927 nonexistent_context
1928 +added
1929 more_nonexistent
1930 "}];
1931 let excerpt_content = indoc! {"
1932 completely
1933 different
1934 content
1935 "};
1936 let predicted_content = indoc! {"
1937 completely
1938 modified
1939 content
1940 "};
1941
1942 let overlap =
1943 compute_excerpt_aware_reversal_overlap(&diffs, excerpt_content, 0, predicted_content);
1944
1945 assert!(
1946 overlap.ratio() >= 0.0 && overlap.ratio() <= 1.0,
1947 "Should handle failed diff application gracefully"
1948 );
1949 }
1950
1951 #[test]
1952 fn test_only_most_recent_edit_tracked() {
1953 let prompt_inputs = make_test_prompt_inputs(
1954 indoc! {"
1955 line1
1956 first_add
1957 second_add
1958 line2
1959 "},
1960 vec![
1961 Arc::new(zeta_prompt::Event::BufferChange {
1962 path: Arc::from(Path::new("src/test.rs")),
1963 old_path: Arc::from(Path::new("src/test.rs")),
1964 diff: indoc! {"
1965 @@ -1,2 +1,3 @@
1966 line1
1967 +first_add
1968 line2
1969 "}
1970 .into(),
1971 predicted: false,
1972 in_open_source_repo: false,
1973 }),
1974 Arc::new(zeta_prompt::Event::BufferChange {
1975 path: Arc::from(Path::new("src/test.rs")),
1976 old_path: Arc::from(Path::new("src/test.rs")),
1977 diff: indoc! {"
1978 @@ -2,2 +2,3 @@
1979 first_add
1980 +second_add
1981 line2
1982 "}
1983 .into(),
1984 predicted: false,
1985 in_open_source_repo: false,
1986 }),
1987 ],
1988 None,
1989 );
1990
1991 let predicted = indoc! {"
1992 line1
1993 first_add
1994 line2
1995 "};
1996 let ratio =
1997 compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
1998
1999 assert!(
2000 ratio > 0.9,
2001 "Expected high reversal ratio when prediction exactly reverses the most recent edit, got {}",
2002 ratio
2003 );
2004 }
2005}