1use std::ops::Range;
2use std::path::Path;
3use std::sync::Arc;
4
5use edit_prediction::udiff::apply_diff_to_string;
6use language::text_diff;
7
8use crate::example::ExamplePromptInputs;
9
10fn apply_diff_to_string_lenient(diff_str: &str, text: &str) -> String {
11 let hunks = parse_diff_hunks(diff_str);
12 let mut result = text.to_string();
13
14 for hunk in hunks {
15 let hunk_diff = format!("--- a/file\n+++ b/file\n{}", format_hunk(&hunk));
16 if let Ok(updated) = apply_diff_to_string(&hunk_diff, &result) {
17 result = updated;
18 }
19 }
20
21 result
22}
23
24#[derive(Debug, Clone, PartialEq, Eq)]
25struct ParsedHunk {
26 old_start: u32,
27 old_count: u32,
28 new_start: u32,
29 new_count: u32,
30 lines: Vec<HunkLine>,
31}
32
33#[derive(Debug, Clone, PartialEq, Eq)]
34enum HunkLine {
35 Context(String),
36 Addition(String),
37 Deletion(String),
38}
39
40fn parse_hunk_header(line: &str) -> Option<(u32, u32, u32, u32)> {
41 let line = line.strip_prefix("@@ -")?;
42 let (old_part, rest) = line.split_once(' ')?;
43 let rest = rest.strip_prefix('+')?;
44 let (new_part, _) = rest.split_once(" @@")?;
45
46 let (old_start, old_count) = if let Some((start, count)) = old_part.split_once(',') {
47 (start.parse().ok()?, count.parse().ok()?)
48 } else {
49 (old_part.parse().ok()?, 1)
50 };
51
52 let (new_start, new_count) = if let Some((start, count)) = new_part.split_once(',') {
53 (start.parse().ok()?, count.parse().ok()?)
54 } else {
55 (new_part.parse().ok()?, 1)
56 };
57
58 Some((old_start, old_count, new_start, new_count))
59}
60
61fn parse_diff_hunks(diff: &str) -> Vec<ParsedHunk> {
62 let mut hunks = Vec::new();
63 let mut current_hunk: Option<ParsedHunk> = None;
64
65 for line in diff.lines() {
66 if let Some((old_start, old_count, new_start, new_count)) = parse_hunk_header(line) {
67 if let Some(hunk) = current_hunk.take() {
68 hunks.push(hunk);
69 }
70 current_hunk = Some(ParsedHunk {
71 old_start,
72 old_count,
73 new_start,
74 new_count,
75 lines: Vec::new(),
76 });
77 } else if let Some(ref mut hunk) = current_hunk {
78 if let Some(stripped) = line.strip_prefix('+') {
79 hunk.lines.push(HunkLine::Addition(stripped.to_string()));
80 } else if let Some(stripped) = line.strip_prefix('-') {
81 hunk.lines.push(HunkLine::Deletion(stripped.to_string()));
82 } else if let Some(stripped) = line.strip_prefix(' ') {
83 hunk.lines.push(HunkLine::Context(stripped.to_string()));
84 } else if line.is_empty() {
85 hunk.lines.push(HunkLine::Context(String::new()));
86 }
87 }
88 }
89
90 if let Some(hunk) = current_hunk {
91 hunks.push(hunk);
92 }
93
94 hunks
95}
96
97fn format_hunk(hunk: &ParsedHunk) -> String {
98 let mut result = format!(
99 "@@ -{},{} +{},{} @@\n",
100 hunk.old_start, hunk.old_count, hunk.new_start, hunk.new_count
101 );
102 for line in &hunk.lines {
103 match line {
104 HunkLine::Context(text) => {
105 result.push(' ');
106 result.push_str(text);
107 result.push('\n');
108 }
109 HunkLine::Addition(text) => {
110 result.push('+');
111 result.push_str(text);
112 result.push('\n');
113 }
114 HunkLine::Deletion(text) => {
115 result.push('-');
116 result.push_str(text);
117 result.push('\n');
118 }
119 }
120 }
121 result
122}
123
124fn filter_diff_hunks_by_excerpt(
125 diff: &str,
126 excerpt_start_row: u32,
127 excerpt_row_count: u32,
128) -> (String, i32) {
129 let hunks = parse_diff_hunks(diff);
130 let excerpt_start_0based = excerpt_start_row;
131 let excerpt_end_0based = excerpt_start_row + excerpt_row_count;
132
133 let mut filtered_hunks = Vec::new();
134 let mut cumulative_line_offset: i32 = 0;
135
136 for hunk in hunks {
137 let hunk_start_0based = hunk.new_start.saturating_sub(1);
138 let hunk_end_0based = hunk_start_0based + hunk.new_count;
139
140 let additions: i32 = hunk
141 .lines
142 .iter()
143 .filter(|l| matches!(l, HunkLine::Addition(_)))
144 .count() as i32;
145 let deletions: i32 = hunk
146 .lines
147 .iter()
148 .filter(|l| matches!(l, HunkLine::Deletion(_)))
149 .count() as i32;
150 let hunk_line_delta = additions - deletions;
151
152 if hunk_end_0based <= excerpt_start_0based {
153 cumulative_line_offset += hunk_line_delta;
154 continue;
155 }
156
157 if hunk_start_0based >= excerpt_end_0based {
158 continue;
159 }
160
161 let mut filtered_lines = Vec::new();
162 let mut current_row_0based = hunk_start_0based;
163 let mut filtered_old_count = 0u32;
164 let mut filtered_new_count = 0u32;
165 let mut first_included_row: Option<u32> = None;
166
167 for line in &hunk.lines {
168 match line {
169 HunkLine::Context(text) => {
170 if current_row_0based >= excerpt_start_0based
171 && current_row_0based < excerpt_end_0based
172 {
173 if first_included_row.is_none() {
174 first_included_row = Some(current_row_0based);
175 }
176 filtered_lines.push(HunkLine::Context(text.clone()));
177 filtered_old_count += 1;
178 filtered_new_count += 1;
179 }
180 current_row_0based += 1;
181 }
182 HunkLine::Addition(text) => {
183 if current_row_0based >= excerpt_start_0based
184 && current_row_0based < excerpt_end_0based
185 {
186 if first_included_row.is_none() {
187 first_included_row = Some(current_row_0based);
188 }
189 filtered_lines.push(HunkLine::Addition(text.clone()));
190 filtered_new_count += 1;
191 }
192 current_row_0based += 1;
193 }
194 HunkLine::Deletion(text) => {
195 if current_row_0based >= excerpt_start_0based
196 && current_row_0based < excerpt_end_0based
197 {
198 if first_included_row.is_none() {
199 first_included_row = Some(current_row_0based);
200 }
201 filtered_lines.push(HunkLine::Deletion(text.clone()));
202 filtered_old_count += 1;
203 }
204 }
205 }
206 }
207
208 if !filtered_lines.is_empty() {
209 let first_row = first_included_row.unwrap_or(excerpt_start_0based);
210 let new_start_1based = (first_row - excerpt_start_0based) + 1;
211
212 filtered_hunks.push(ParsedHunk {
213 old_start: new_start_1based,
214 old_count: filtered_old_count,
215 new_start: new_start_1based,
216 new_count: filtered_new_count,
217 lines: filtered_lines,
218 });
219 }
220
221 cumulative_line_offset += hunk_line_delta;
222 }
223
224 let mut result = String::new();
225 for hunk in &filtered_hunks {
226 result.push_str(&format_hunk(hunk));
227 }
228
229 (result, cumulative_line_offset)
230}
231
232fn compute_excerpt_aware_reversal_overlap(
233 edit_history_diffs: &[&str],
234 excerpt_content: &str,
235 excerpt_start_row: u32,
236 predicted_content: &str,
237) -> ReversalOverlap {
238 let mut current_content = excerpt_content.to_string();
239 let mut current_excerpt_start_row = excerpt_start_row;
240
241 for diff in edit_history_diffs.iter().rev() {
242 if diff.is_empty() {
243 continue;
244 }
245
246 let current_row_count = current_content.lines().count() as u32;
247 let (filtered_diff, _line_offset) =
248 filter_diff_hunks_by_excerpt(diff, current_excerpt_start_row, current_row_count.max(1));
249
250 if filtered_diff.is_empty() {
251 let hunks = parse_diff_hunks(diff);
252 for hunk in hunks {
253 let hunk_end = hunk.new_start.saturating_sub(1) + hunk.new_count;
254 if hunk_end <= current_excerpt_start_row {
255 let additions: u32 = hunk
256 .lines
257 .iter()
258 .filter(|l| matches!(l, HunkLine::Addition(_)))
259 .count() as u32;
260 let deletions: u32 = hunk
261 .lines
262 .iter()
263 .filter(|l| matches!(l, HunkLine::Deletion(_)))
264 .count() as u32;
265 if additions >= deletions {
266 current_excerpt_start_row =
267 current_excerpt_start_row.saturating_sub(additions - deletions);
268 } else {
269 current_excerpt_start_row += deletions - additions;
270 }
271 }
272 }
273 continue;
274 }
275
276 let reversed = reverse_diff(&format!("--- a/file\n+++ b/file\n{}", filtered_diff));
277 match apply_diff_to_string(&reversed, ¤t_content) {
278 Ok(updated) => {
279 current_content = updated;
280 }
281 Err(_) => {
282 continue;
283 }
284 }
285
286 let hunks = parse_diff_hunks(diff);
287 for hunk in hunks {
288 let hunk_end = hunk.new_start.saturating_sub(1) + hunk.new_count;
289 if hunk_end <= current_excerpt_start_row {
290 let additions: u32 = hunk
291 .lines
292 .iter()
293 .filter(|l| matches!(l, HunkLine::Addition(_)))
294 .count() as u32;
295 let deletions: u32 = hunk
296 .lines
297 .iter()
298 .filter(|l| matches!(l, HunkLine::Deletion(_)))
299 .count() as u32;
300 if additions >= deletions {
301 current_excerpt_start_row =
302 current_excerpt_start_row.saturating_sub(additions - deletions);
303 } else {
304 current_excerpt_start_row += deletions - additions;
305 }
306 }
307 }
308 }
309
310 compute_reversal_overlap(¤t_content, excerpt_content, predicted_content)
311}
312
313fn reverse_diff(diff: &str) -> String {
314 let mut result: String = diff
315 .lines()
316 .map(|line| {
317 if line.starts_with("--- ") {
318 line.replacen("--- ", "+++ ", 1)
319 } else if line.starts_with("+++ ") {
320 line.replacen("+++ ", "--- ", 1)
321 } else if line.starts_with('+') && !line.starts_with("+++") {
322 format!("-{}", &line[1..])
323 } else if line.starts_with('-') && !line.starts_with("---") {
324 format!("+{}", &line[1..])
325 } else {
326 line.to_string()
327 }
328 })
329 .collect::<Vec<_>>()
330 .join("\n");
331 if diff.ends_with('\n') {
332 result.push('\n');
333 }
334 result
335}
336
337#[derive(Debug, Clone, PartialEq, Eq)]
338struct GranularEdit {
339 range: Range<usize>,
340 old_text: String,
341 new_text: String,
342}
343
344fn compute_granular_edits(old_text: &str, new_text: &str) -> Vec<GranularEdit> {
345 text_diff(old_text, new_text)
346 .into_iter()
347 .map(|(range, new_text)| GranularEdit {
348 old_text: old_text[range.clone()].to_string(),
349 range,
350 new_text: new_text.to_string(),
351 })
352 .collect()
353}
354
355#[derive(Debug, Clone)]
356struct HistoryAdditionRange {
357 range_in_current: Range<usize>,
358}
359
360#[derive(Debug, Clone)]
361struct HistoryDeletionRange {
362 deleted_text: String,
363 position_in_current: usize,
364}
365
366fn compute_history_addition_ranges(history_edits: &[GranularEdit]) -> Vec<HistoryAdditionRange> {
367 let mut result = Vec::new();
368 let mut offset_delta: isize = 0;
369
370 for edit in history_edits {
371 if !edit.new_text.is_empty() {
372 let new_start = (edit.range.start as isize + offset_delta) as usize;
373 let new_end = new_start + edit.new_text.len();
374 result.push(HistoryAdditionRange {
375 range_in_current: new_start..new_end,
376 });
377 }
378
379 offset_delta += edit.new_text.len() as isize - edit.old_text.len() as isize;
380 }
381
382 result
383}
384
385fn compute_history_deletion_ranges(history_edits: &[GranularEdit]) -> Vec<HistoryDeletionRange> {
386 let mut result = Vec::new();
387 let mut offset_delta: isize = 0;
388
389 for edit in history_edits {
390 if !edit.old_text.is_empty() {
391 let position_in_current = (edit.range.start as isize + offset_delta) as usize;
392 result.push(HistoryDeletionRange {
393 deleted_text: edit.old_text.clone(),
394 position_in_current,
395 });
396 }
397
398 offset_delta += edit.new_text.len() as isize - edit.old_text.len() as isize;
399 }
400
401 result
402}
403
404#[derive(Debug, Clone, Default, PartialEq, Eq)]
405struct ReversalOverlap {
406 chars_reversing_user_edits: usize,
407 total_chars_in_prediction: usize,
408}
409
410impl ReversalOverlap {
411 fn ratio(&self) -> f32 {
412 if self.total_chars_in_prediction == 0 {
413 0.0
414 } else {
415 self.chars_reversing_user_edits as f32 / self.total_chars_in_prediction as f32
416 }
417 }
418}
419
420/// Check if `needle` is a subsequence of `haystack` (characters appear in order, not necessarily contiguous).
421fn is_subsequence(needle: &str, haystack: &str) -> bool {
422 let mut needle_chars = needle.chars().peekable();
423 for c in haystack.chars() {
424 if needle_chars.peek() == Some(&c) {
425 needle_chars.next();
426 }
427 }
428 needle_chars.peek().is_none()
429}
430
431/// Normalize edits where `old_text` appears as a subsequence within `new_text` (extension),
432/// or where `new_text` appears as a subsequence within `old_text` (reduction).
433///
434/// For extensions: when the user's text is preserved (in order) within the prediction,
435/// we only count the newly inserted characters, not the preserved ones.
436/// E.g., "epr" → "eprintln!()" becomes 8 inserted chars ("intln!()")
437/// E.g., "test_my_function" → "a_test_for_my_special_function_plz" becomes 18 inserted chars
438///
439/// For reductions: when the prediction's text is preserved (in order) within the original,
440/// we only count the deleted characters, not the preserved ones.
441/// E.g., "ifrom" → "from" becomes 1 deleted char ("i")
442fn normalize_extension_edits(edits: Vec<GranularEdit>) -> Vec<GranularEdit> {
443 edits
444 .into_iter()
445 .map(|edit| {
446 if edit.old_text.is_empty() || edit.new_text.is_empty() {
447 return edit;
448 }
449
450 if is_subsequence(&edit.old_text, &edit.new_text) {
451 let inserted_char_count =
452 edit.new_text.chars().count() - edit.old_text.chars().count();
453 GranularEdit {
454 range: edit.range.start..edit.range.start,
455 old_text: String::new(),
456 new_text: edit.new_text.chars().take(inserted_char_count).collect(),
457 }
458 } else if is_subsequence(&edit.new_text, &edit.old_text) {
459 let deleted_char_count =
460 edit.old_text.chars().count() - edit.new_text.chars().count();
461 let deleted_text: String = edit.old_text.chars().take(deleted_char_count).collect();
462 GranularEdit {
463 range: edit.range.start..edit.range.start + deleted_text.len(),
464 old_text: deleted_text,
465 new_text: String::new(),
466 }
467 } else {
468 edit
469 }
470 })
471 .collect()
472}
473
474fn compute_reversal_overlap(
475 original_content: &str,
476 current_content: &str,
477 predicted_content: &str,
478) -> ReversalOverlap {
479 let history_edits =
480 normalize_extension_edits(compute_granular_edits(original_content, current_content));
481 let prediction_edits =
482 normalize_extension_edits(compute_granular_edits(current_content, predicted_content));
483
484 let history_addition_ranges = compute_history_addition_ranges(&history_edits);
485 let history_deletion_ranges = compute_history_deletion_ranges(&history_edits);
486
487 let reversed_additions =
488 compute_reversed_additions(&history_addition_ranges, &prediction_edits);
489 let restored_deletions =
490 compute_restored_deletions(&history_deletion_ranges, &prediction_edits);
491
492 let total_chars_in_prediction: usize = prediction_edits
493 .iter()
494 .map(|e| e.new_text.chars().count() + e.old_text.chars().count())
495 .sum();
496
497 ReversalOverlap {
498 chars_reversing_user_edits: reversed_additions + restored_deletions,
499 total_chars_in_prediction,
500 }
501}
502
503fn compute_reversed_additions(
504 history_addition_ranges: &[HistoryAdditionRange],
505 prediction_edits: &[GranularEdit],
506) -> usize {
507 let mut reversed_chars = 0;
508
509 for pred_edit in prediction_edits {
510 for history_addition in history_addition_ranges {
511 let overlap_start = pred_edit
512 .range
513 .start
514 .max(history_addition.range_in_current.start);
515 let overlap_end = pred_edit
516 .range
517 .end
518 .min(history_addition.range_in_current.end);
519
520 if overlap_start < overlap_end {
521 let relative_start = overlap_start - pred_edit.range.start;
522 let relative_end = overlap_end - pred_edit.range.start;
523 let overlap_text = &pred_edit.old_text[relative_start..relative_end];
524 reversed_chars += overlap_text.chars().count();
525 }
526 }
527 }
528
529 reversed_chars
530}
531
532fn compute_restored_deletions(
533 history_deletion_ranges: &[HistoryDeletionRange],
534 prediction_edits: &[GranularEdit],
535) -> usize {
536 let mut restored = 0;
537
538 for pred_edit in prediction_edits {
539 if pred_edit.new_text.is_empty() {
540 continue;
541 }
542
543 for deletion in history_deletion_ranges {
544 if pred_edit.range.contains(&deletion.position_in_current)
545 || deletion.position_in_current == pred_edit.range.start
546 {
547 restored += compute_lcs_length(&deletion.deleted_text, &pred_edit.new_text);
548 }
549 }
550 }
551
552 restored
553}
554
555fn compute_lcs_length(a: &str, b: &str) -> usize {
556 let a_chars: Vec<char> = a.chars().collect();
557 let b_chars: Vec<char> = b.chars().collect();
558 let m = a_chars.len();
559 let n = b_chars.len();
560
561 if m == 0 || n == 0 {
562 return 0;
563 }
564
565 let mut prev = vec![0; n + 1];
566 let mut curr = vec![0; n + 1];
567
568 for i in 1..=m {
569 for j in 1..=n {
570 if a_chars[i - 1] == b_chars[j - 1] {
571 curr[j] = prev[j - 1] + 1;
572 } else {
573 curr[j] = prev[j].max(curr[j - 1]);
574 }
575 }
576 std::mem::swap(&mut prev, &mut curr);
577 curr.fill(0);
578 }
579
580 prev[n]
581}
582
583fn filter_edit_history_by_path<'a>(
584 edit_history: &'a [Arc<zeta_prompt::Event>],
585 cursor_path: &std::path::Path,
586) -> Vec<&'a zeta_prompt::Event> {
587 edit_history
588 .iter()
589 .filter(|event| match event.as_ref() {
590 zeta_prompt::Event::BufferChange { path, .. } => {
591 let event_path = path.as_ref();
592 if event_path == cursor_path {
593 return true;
594 }
595 let stripped = event_path
596 .components()
597 .skip(1)
598 .collect::<std::path::PathBuf>();
599 stripped == cursor_path
600 }
601 })
602 .map(|arc| arc.as_ref())
603 .collect()
604}
605
606fn extract_diff_from_event(event: &zeta_prompt::Event) -> &str {
607 match event {
608 zeta_prompt::Event::BufferChange { diff, .. } => diff.as_str(),
609 }
610}
611
612pub fn compute_prediction_reversal_ratio(
613 prompt_inputs: &ExamplePromptInputs,
614 predicted_content: &str,
615 cursor_path: &Path,
616) -> f32 {
617 let current_content = &prompt_inputs.content;
618
619 let edit_history: &[Arc<zeta_prompt::Event>] = &prompt_inputs.edit_history;
620 let relevant_events = filter_edit_history_by_path(edit_history, cursor_path);
621
622 if let Some(excerpt_start_row) = prompt_inputs.excerpt_start_row {
623 let diffs: Vec<&str> = relevant_events
624 .iter()
625 .map(|e| extract_diff_from_event(e))
626 .collect();
627 let overlap = compute_excerpt_aware_reversal_overlap(
628 &diffs,
629 current_content,
630 excerpt_start_row,
631 predicted_content,
632 );
633 return overlap.ratio();
634 }
635
636 let mut original_content = current_content.to_string();
637 for event in relevant_events.into_iter().rev() {
638 let diff = extract_diff_from_event(event);
639 if diff.is_empty() {
640 continue;
641 }
642 let reversed = reverse_diff(diff);
643 let with_headers = format!("--- a/file\n+++ b/file\n{}", reversed);
644 match apply_diff_to_string(&with_headers, &original_content) {
645 Ok(updated_content) => original_content = updated_content,
646 Err(_) => {
647 original_content = apply_diff_to_string_lenient(&reversed, &original_content);
648 }
649 }
650 }
651
652 let overlap = compute_reversal_overlap(&original_content, current_content, predicted_content);
653 overlap.ratio()
654}
655
656#[cfg(test)]
657mod tests {
658 use super::*;
659 use edit_prediction::udiff::apply_diff_to_string;
660 use indoc::indoc;
661
662 #[test]
663 fn test_reversal_overlap() {
664 struct Case {
665 name: &'static str,
666 original: &'static str,
667 current: &'static str,
668 predicted: &'static str,
669 expected_reversal_chars: usize,
670 expected_total_chars: usize,
671 }
672
673 let cases = [
674 Case {
675 name: "user_adds_line_prediction_removes_it",
676 original: indoc! {"
677 a
678 b
679 c"},
680 current: indoc! {"
681 a
682 new line
683 b
684 c"},
685 predicted: indoc! {"
686 a
687 b
688 c"},
689 expected_reversal_chars: 9,
690 expected_total_chars: 9,
691 },
692 Case {
693 name: "user_deletes_line_prediction_restores_it",
694 original: indoc! {"
695 a
696 deleted
697 b"},
698 current: indoc! {"
699 a
700 b"},
701 predicted: indoc! {"
702 a
703 deleted
704 b"},
705 expected_reversal_chars: 8,
706 expected_total_chars: 8,
707 },
708 Case {
709 name: "user_deletes_text_prediction_restores_partial",
710 original: "hello beautiful world",
711 current: "hello world",
712 predicted: "hello beautiful world",
713 expected_reversal_chars: 10,
714 expected_total_chars: 10,
715 },
716 Case {
717 name: "user_deletes_foo_prediction_adds_bar",
718 original: "foo",
719 current: "",
720 predicted: "bar",
721 expected_reversal_chars: 0,
722 expected_total_chars: 3,
723 },
724 Case {
725 name: "independent_edits_different_locations",
726 original: indoc! {"
727 line1
728 line2
729 line3"},
730 current: indoc! {"
731 LINE1
732 line2
733 line3"},
734 predicted: indoc! {"
735 LINE1
736 line2
737 LINE3"},
738 expected_reversal_chars: 0,
739 expected_total_chars: 10,
740 },
741 Case {
742 name: "no_history_edits",
743 original: "same",
744 current: "same",
745 predicted: "different",
746 expected_reversal_chars: 0,
747 expected_total_chars: 13,
748 },
749 Case {
750 name: "user_replaces_text_prediction_reverses",
751 original: indoc! {"
752 keep
753 delete_me
754 keep2"},
755 current: indoc! {"
756 keep
757 added
758 keep2"},
759 predicted: indoc! {"
760 keep
761 delete_me
762 keep2"},
763 expected_reversal_chars: 14,
764 expected_total_chars: 14,
765 },
766 Case {
767 name: "user_modifies_word_prediction_modifies_differently",
768 original: "the quick brown fox",
769 current: "the slow brown fox",
770 predicted: "the fast brown fox",
771 expected_reversal_chars: 4,
772 expected_total_chars: 8,
773 },
774 Case {
775 name: "user finishes function name (suffix)",
776 original: "",
777 current: "epr",
778 predicted: "eprintln!()",
779 expected_reversal_chars: 0,
780 expected_total_chars: 8,
781 },
782 Case {
783 name: "user starts function name (prefix)",
784 original: "",
785 current: "my_function()",
786 predicted: "test_my_function()",
787 expected_reversal_chars: 0,
788 expected_total_chars: 5,
789 },
790 Case {
791 name: "user types partial, prediction extends in multiple places",
792 original: "",
793 current: "test_my_function",
794 predicted: "a_test_for_my_special_function_plz",
795 expected_reversal_chars: 0,
796 expected_total_chars: 18,
797 },
798 // Edge cases for subsequence matching
799 Case {
800 name: "subsequence with interleaved underscores",
801 original: "",
802 current: "a_b_c",
803 predicted: "_a__b__c__",
804 expected_reversal_chars: 0,
805 expected_total_chars: 5,
806 },
807 Case {
808 name: "not a subsequence - different characters",
809 original: "",
810 current: "abc",
811 predicted: "xyz",
812 expected_reversal_chars: 3,
813 expected_total_chars: 6,
814 },
815 Case {
816 name: "not a subsequence - wrong order",
817 original: "",
818 current: "abc",
819 predicted: "cba",
820 expected_reversal_chars: 3,
821 expected_total_chars: 6,
822 },
823 Case {
824 name: "partial subsequence - only some chars match",
825 original: "",
826 current: "abcd",
827 predicted: "axbx",
828 expected_reversal_chars: 4,
829 expected_total_chars: 8,
830 },
831 // Common completion patterns
832 Case {
833 name: "completing a method call",
834 original: "",
835 current: "vec.pu",
836 predicted: "vec.push(item)",
837 expected_reversal_chars: 0,
838 expected_total_chars: 8,
839 },
840 Case {
841 name: "completing an import statement",
842 original: "",
843 current: "use std::col",
844 predicted: "use std::collections::HashMap",
845 expected_reversal_chars: 0,
846 expected_total_chars: 17,
847 },
848 Case {
849 name: "completing a struct field",
850 original: "",
851 current: "name: St",
852 predicted: "name: String",
853 expected_reversal_chars: 0,
854 expected_total_chars: 4,
855 },
856 Case {
857 name: "prediction replaces with completely different text",
858 original: "",
859 current: "hello",
860 predicted: "world",
861 expected_reversal_chars: 5,
862 expected_total_chars: 10,
863 },
864 Case {
865 name: "empty prediction removes user text",
866 original: "",
867 current: "mistake",
868 predicted: "",
869 expected_reversal_chars: 7,
870 expected_total_chars: 7,
871 },
872 Case {
873 name: "fixing typo is not reversal",
874 original: "",
875 current: "<dv",
876 predicted: "<div>",
877 expected_reversal_chars: 0,
878 expected_total_chars: 2,
879 },
880 Case {
881 name: "infix insertion not reversal",
882 original: indoc! {"
883 from my_project import Foo
884 "},
885 current: indoc! {"
886 ifrom my_project import Foo
887 "},
888 predicted: indoc! {"
889 import
890 from my_project import Foo
891 "},
892 expected_reversal_chars: 0,
893 expected_total_chars: 6,
894 },
895 Case {
896 name: "non-word based reversal",
897 original: "from",
898 current: "ifrom",
899 predicted: "from",
900 expected_reversal_chars: 1,
901 expected_total_chars: 1,
902 },
903 Case {
904 name: "multiple insertions no reversal",
905 original: "print(\"Hello, World!\")",
906 current: "sys.(\"Hello, World!\")",
907 predicted: "sys.stdout.write(\"Hello, World!\\n\")",
908 expected_reversal_chars: 0,
909 expected_total_chars: 14,
910 },
911 ];
912
913 for case in &cases {
914 let overlap = compute_reversal_overlap(case.original, case.current, case.predicted);
915 assert_eq!(
916 overlap.chars_reversing_user_edits, case.expected_reversal_chars,
917 "Test '{}': expected {} reversal chars, got {}",
918 case.name, case.expected_reversal_chars, overlap.chars_reversing_user_edits
919 );
920 assert_eq!(
921 overlap.total_chars_in_prediction, case.expected_total_chars,
922 "Test '{}': expected {} total chars, got {}",
923 case.name, case.expected_total_chars, overlap.total_chars_in_prediction
924 );
925 }
926 }
927
928 #[test]
929 fn test_reverse_diff() {
930 let forward_diff = indoc! {"
931 --- a/file.rs
932 +++ b/file.rs
933 @@ -1,3 +1,4 @@
934 fn main() {
935 + let x = 42;
936 println!(\"hello\");
937 }"};
938
939 let reversed = reverse_diff(forward_diff);
940
941 assert!(
942 reversed.contains("+++ a/file.rs"),
943 "Should have +++ for old path"
944 );
945 assert!(
946 reversed.contains("--- b/file.rs"),
947 "Should have --- for new path"
948 );
949 assert!(
950 reversed.contains("- let x = 42;"),
951 "Added line should become deletion"
952 );
953 assert!(
954 reversed.contains(" fn main()"),
955 "Context lines should be unchanged"
956 );
957 }
958
959 #[test]
960 fn test_reverse_diff_roundtrip() {
961 // Applying a diff and then its reverse should get back to original
962 let original = indoc! {"
963 first line
964 hello world
965 last line
966 "};
967 let modified = indoc! {"
968 first line
969 hello beautiful world
970 last line
971 "};
972
973 // unified_diff doesn't include file headers, but apply_diff_to_string needs them
974 let diff_body = language::unified_diff(original, modified);
975 let forward_diff = format!("--- a/file\n+++ b/file\n{}", diff_body);
976 let reversed_diff = reverse_diff(&forward_diff);
977
978 // Apply forward diff to original
979 let after_forward = apply_diff_to_string(&forward_diff, original).unwrap();
980 assert_eq!(after_forward, modified);
981
982 // Apply reversed diff to modified
983 let after_reverse = apply_diff_to_string(&reversed_diff, &after_forward).unwrap();
984 assert_eq!(after_reverse, original);
985 }
986
987 #[test]
988 fn test_filter_edit_history_by_path() {
989 // Test that filter_edit_history_by_path correctly matches paths when
990 // the edit history has paths with a repo prefix (e.g., "repo/src/file.rs")
991 // but the cursor_path doesn't have the repo prefix (e.g., "src/file.rs")
992 let events = vec![
993 Arc::new(zeta_prompt::Event::BufferChange {
994 path: Arc::from(Path::new("myrepo/src/file.rs")),
995 old_path: Arc::from(Path::new("myrepo/src/file.rs")),
996 diff: indoc! {"
997 @@ -1 +1 @@
998 -old
999 +new"}
1000 .into(),
1001 predicted: false,
1002 in_open_source_repo: true,
1003 }),
1004 Arc::new(zeta_prompt::Event::BufferChange {
1005 path: Arc::from(Path::new("myrepo/other.rs")),
1006 old_path: Arc::from(Path::new("myrepo/other.rs")),
1007 diff: indoc! {"
1008 @@ -1 +1 @@
1009 -a
1010 +b"}
1011 .into(),
1012 predicted: false,
1013 in_open_source_repo: true,
1014 }),
1015 Arc::new(zeta_prompt::Event::BufferChange {
1016 path: Arc::from(Path::new("src/file.rs")),
1017 old_path: Arc::from(Path::new("src/file.rs")),
1018 diff: indoc! {"
1019 @@ -1 +1 @@
1020 -x
1021 +y"}
1022 .into(),
1023 predicted: false,
1024 in_open_source_repo: true,
1025 }),
1026 ];
1027
1028 // "myrepo/src/file.rs" stripped -> "src/file.rs" matches cursor_path
1029 // "src/file.rs" exact match
1030 let cursor_path = Path::new("src/file.rs");
1031 let filtered = filter_edit_history_by_path(&events, cursor_path);
1032 assert_eq!(
1033 filtered.len(),
1034 2,
1035 "Should match myrepo/src/file.rs (stripped) and src/file.rs (exact)"
1036 );
1037
1038 // "myrepo/src/file.rs" stripped -> "src/file.rs" != "file.rs"
1039 // "src/file.rs" stripped -> "file.rs" == "file.rs"
1040 let cursor_path = Path::new("file.rs");
1041 let filtered = filter_edit_history_by_path(&events, cursor_path);
1042 assert_eq!(
1043 filtered.len(),
1044 1,
1045 "Should only match src/file.rs (stripped to file.rs)"
1046 );
1047
1048 // "myrepo/other.rs" stripped -> "other.rs" == "other.rs"
1049 let cursor_path = Path::new("other.rs");
1050 let filtered = filter_edit_history_by_path(&events, cursor_path);
1051 assert_eq!(filtered.len(), 1, "Should match only myrepo/other.rs");
1052 }
1053
1054 #[test]
1055 fn test_reverse_diff_preserves_trailing_newline() {
1056 let diff_with_trailing_newline = indoc! {"
1057 --- a/file
1058 +++ b/file
1059 @@ -1 +1 @@
1060 -old
1061 +new
1062 "};
1063 let reversed = reverse_diff(diff_with_trailing_newline);
1064 assert!(
1065 reversed.ends_with('\n'),
1066 "Reversed diff should preserve trailing newline"
1067 );
1068
1069 let diff_without_trailing_newline = indoc! {"
1070 --- a/file
1071 +++ b/file
1072 @@ -1 +1 @@
1073 -old
1074 +new"};
1075 let reversed = reverse_diff(diff_without_trailing_newline);
1076 assert!(
1077 !reversed.ends_with('\n'),
1078 "Reversed diff should not add trailing newline if original didn't have one"
1079 );
1080 }
1081
1082 #[test]
1083 fn test_filter_hunks_by_excerpt_region() {
1084 struct Case {
1085 name: &'static str,
1086 diff: &'static str,
1087 excerpt_start_row: u32,
1088 excerpt_row_count: u32,
1089 expected_filtered_diff: &'static str,
1090 expected_line_offset: i32,
1091 }
1092
1093 let cases = [
1094 Case {
1095 name: "hunk_entirely_before_excerpt",
1096 diff: indoc! {"
1097 @@ -1,3 +1,4 @@
1098 line1
1099 +inserted
1100 line2
1101 line3
1102 "},
1103 excerpt_start_row: 10,
1104 excerpt_row_count: 5,
1105 expected_filtered_diff: "",
1106 expected_line_offset: 1,
1107 },
1108 Case {
1109 name: "hunk_entirely_inside_excerpt",
1110 diff: indoc! {"
1111 @@ -12,3 +12,4 @@
1112 line12
1113 +inserted
1114 line13
1115 line14
1116 "},
1117 excerpt_start_row: 10,
1118 excerpt_row_count: 10,
1119 expected_filtered_diff: indoc! {"
1120 @@ -2,3 +2,4 @@
1121 line12
1122 +inserted
1123 line13
1124 line14
1125 "},
1126 expected_line_offset: 1,
1127 },
1128 Case {
1129 name: "hunk_entirely_after_excerpt",
1130 diff: indoc! {"
1131 @@ -50,3 +50,4 @@
1132 line50
1133 +inserted
1134 line51
1135 line52
1136 "},
1137 excerpt_start_row: 10,
1138 excerpt_row_count: 5,
1139 expected_filtered_diff: "",
1140 expected_line_offset: 0,
1141 },
1142 Case {
1143 name: "hunk_straddles_excerpt_start",
1144 diff: indoc! {"
1145 @@ -8,5 +8,6 @@
1146 line8
1147 line9
1148 +inserted
1149 line10
1150 line11
1151 line12
1152 "},
1153 excerpt_start_row: 10,
1154 excerpt_row_count: 10,
1155 expected_filtered_diff: indoc! {"
1156 @@ -1,3 +1,3 @@
1157 line10
1158 line11
1159 line12
1160 "},
1161 expected_line_offset: 1,
1162 },
1163 Case {
1164 name: "hunk_straddles_excerpt_end",
1165 diff: indoc! {"
1166 @@ -18,5 +18,6 @@
1167 line18
1168 line19
1169 +inserted
1170 line20
1171 line21
1172 line22
1173 "},
1174 excerpt_start_row: 10,
1175 excerpt_row_count: 10,
1176 expected_filtered_diff: indoc! {"
1177 @@ -8,2 +8,3 @@
1178 line18
1179 line19
1180 +inserted
1181 "},
1182 expected_line_offset: 1,
1183 },
1184 Case {
1185 name: "multiple_hunks_mixed",
1186 diff: indoc! {"
1187 @@ -1,2 +1,3 @@
1188 line1
1189 +before_excerpt
1190 line2
1191 @@ -12,2 +13,3 @@
1192 line12
1193 +inside_excerpt
1194 line13
1195 @@ -50,2 +52,3 @@
1196 line50
1197 +after_excerpt
1198 line51
1199 "},
1200 excerpt_start_row: 10,
1201 excerpt_row_count: 10,
1202 expected_filtered_diff: indoc! {"
1203 @@ -3,2 +3,3 @@
1204 line12
1205 +inside_excerpt
1206 line13
1207 "},
1208 expected_line_offset: 2,
1209 },
1210 Case {
1211 name: "deletion_before_excerpt",
1212 diff: indoc! {"
1213 @@ -1,4 +1,3 @@
1214 line1
1215 -deleted
1216 line2
1217 line3
1218 "},
1219 excerpt_start_row: 10,
1220 excerpt_row_count: 5,
1221 expected_filtered_diff: "",
1222 expected_line_offset: -1,
1223 },
1224 Case {
1225 name: "deletion_inside_excerpt",
1226 diff: indoc! {"
1227 @@ -12,4 +12,3 @@
1228 line12
1229 -deleted
1230 line13
1231 line14
1232 "},
1233 excerpt_start_row: 10,
1234 excerpt_row_count: 10,
1235 expected_filtered_diff: indoc! {"
1236 @@ -2,4 +2,3 @@
1237 line12
1238 -deleted
1239 line13
1240 line14
1241 "},
1242 expected_line_offset: -1,
1243 },
1244 Case {
1245 name: "empty_diff",
1246 diff: "",
1247 excerpt_start_row: 10,
1248 excerpt_row_count: 5,
1249 expected_filtered_diff: "",
1250 expected_line_offset: 0,
1251 },
1252 Case {
1253 name: "hunk_spans_entire_excerpt",
1254 diff: indoc! {"
1255 @@ -8,10 +8,12 @@
1256 line8
1257 line9
1258 line10
1259 line11
1260 +inserted1
1261 line12
1262 line13
1263 +inserted2
1264 line14
1265 line15
1266 line16
1267 line17
1268 "},
1269 excerpt_start_row: 10,
1270 excerpt_row_count: 5,
1271 expected_filtered_diff: indoc! {"
1272 @@ -1,3 +1,5 @@
1273 line11
1274 +inserted1
1275 line12
1276 line13
1277 +inserted2
1278 "},
1279 expected_line_offset: 2,
1280 },
1281 Case {
1282 name: "replacement_inside_excerpt",
1283 diff: indoc! {"
1284 @@ -12,3 +12,3 @@
1285 line12
1286 -old_text
1287 +new_text
1288 line14
1289 "},
1290 excerpt_start_row: 10,
1291 excerpt_row_count: 10,
1292 expected_filtered_diff: indoc! {"
1293 @@ -2,3 +2,3 @@
1294 line12
1295 -old_text
1296 +new_text
1297 line14
1298 "},
1299 expected_line_offset: 0,
1300 },
1301 ];
1302
1303 for case in &cases {
1304 let (filtered, line_offset) = filter_diff_hunks_by_excerpt(
1305 case.diff,
1306 case.excerpt_start_row,
1307 case.excerpt_row_count,
1308 );
1309 assert_eq!(
1310 filtered, case.expected_filtered_diff,
1311 "Test '{}': filtered diff mismatch.\nExpected:\n{}\nGot:\n{}",
1312 case.name, case.expected_filtered_diff, filtered
1313 );
1314 assert_eq!(
1315 line_offset, case.expected_line_offset,
1316 "Test '{}': line offset mismatch. Expected {}, got {}",
1317 case.name, case.expected_line_offset, line_offset
1318 );
1319 }
1320 }
1321
1322 #[test]
1323 fn test_excerpt_aware_reversal_tracking() {
1324 struct Case {
1325 name: &'static str,
1326 edit_history_diffs: Vec<&'static str>,
1327 excerpt_content: &'static str,
1328 excerpt_start_row: u32,
1329 predicted_content: &'static str,
1330 expected_reversal_chars: usize,
1331 expected_total_chars: usize,
1332 }
1333
1334 let cases = [
1335 Case {
1336 name: "edit_outside_excerpt_no_reversal",
1337 edit_history_diffs: vec![indoc! {"
1338 @@ -1,2 +1,3 @@
1339 line1
1340 +added_outside
1341 line2
1342 "}],
1343 excerpt_content: indoc! {"
1344 line10
1345 line11
1346 line12
1347 "},
1348 excerpt_start_row: 10,
1349 predicted_content: indoc! {"
1350 line10
1351 modified
1352 line12
1353 "},
1354 expected_reversal_chars: 0,
1355 expected_total_chars: 14,
1356 },
1357 Case {
1358 name: "edit_inside_excerpt_with_reversal",
1359 edit_history_diffs: vec![indoc! {"
1360 @@ -10,3 +10,4 @@
1361 line10
1362 +user_added
1363 line11
1364 line12
1365 "}],
1366 excerpt_content: indoc! {"
1367 line10
1368 user_added
1369 line11
1370 line12
1371 "},
1372 excerpt_start_row: 10,
1373 predicted_content: indoc! {"
1374 line10
1375 line11
1376 line12
1377 "},
1378 expected_reversal_chars: 11,
1379 expected_total_chars: 11,
1380 },
1381 Case {
1382 name: "straddling_edit_partial_reversal",
1383 edit_history_diffs: vec![indoc! {"
1384 @@ -8,6 +8,8 @@
1385 line8
1386 line9
1387 +before_excerpt
1388 line10
1389 +inside_excerpt
1390 line11
1391 line12
1392 line13
1393 "}],
1394 excerpt_content: indoc! {"
1395 line10
1396 inside_excerpt
1397 line11
1398 line12
1399 line13
1400 "},
1401 excerpt_start_row: 10,
1402 predicted_content: indoc! {"
1403 line10
1404 line11
1405 line12
1406 line13
1407 "},
1408 expected_reversal_chars: 15,
1409 expected_total_chars: 15,
1410 },
1411 Case {
1412 name: "multiple_edits_mixed_locations",
1413 edit_history_diffs: vec![
1414 indoc! {"
1415 @@ -1,2 +1,3 @@
1416 line1
1417 +outside1
1418 line2
1419 "},
1420 indoc! {"
1421 @@ -11,2 +12,3 @@
1422 line11
1423 +inside1
1424 line12
1425 "},
1426 ],
1427 excerpt_content: indoc! {"
1428 line10
1429 line11
1430 inside1
1431 line12
1432 line13
1433 "},
1434 excerpt_start_row: 10,
1435 predicted_content: indoc! {"
1436 line10
1437 line11
1438 line12
1439 line13
1440 "},
1441 expected_reversal_chars: 8,
1442 expected_total_chars: 8,
1443 },
1444 Case {
1445 name: "no_edit_history",
1446 edit_history_diffs: vec![],
1447 excerpt_content: indoc! {"
1448 line10
1449 line11
1450 line12
1451 "},
1452 excerpt_start_row: 10,
1453 predicted_content: indoc! {"
1454 line10
1455 modified
1456 line12
1457 "},
1458 expected_reversal_chars: 0,
1459 expected_total_chars: 14,
1460 },
1461 Case {
1462 name: "edit_after_excerpt_no_effect",
1463 edit_history_diffs: vec![indoc! {"
1464 @@ -50,2 +50,3 @@
1465 line50
1466 +added_after
1467 line51
1468 "}],
1469 excerpt_content: indoc! {"
1470 line10
1471 line11
1472 line12
1473 "},
1474 excerpt_start_row: 10,
1475 predicted_content: indoc! {"
1476 line10
1477 changed
1478 line12
1479 "},
1480 expected_reversal_chars: 0,
1481 expected_total_chars: 13,
1482 },
1483 Case {
1484 name: "line_offset_tracking_across_hunks",
1485 edit_history_diffs: vec![
1486 indoc! {"
1487 @@ -1,2 +1,4 @@
1488 line1
1489 +added1
1490 +added2
1491 line2
1492 "},
1493 indoc! {"
1494 @@ -12,2 +14,3 @@
1495 line12
1496 +inside_after_offset
1497 line13
1498 "},
1499 ],
1500 excerpt_content: indoc! {"
1501 line10
1502 line11
1503 line12
1504 inside_after_offset
1505 line13
1506 "},
1507 excerpt_start_row: 10,
1508 predicted_content: indoc! {"
1509 line10
1510 line11
1511 line12
1512 line13
1513 "},
1514 expected_reversal_chars: 20,
1515 expected_total_chars: 20,
1516 },
1517 ];
1518
1519 for case in &cases {
1520 let overlap = compute_excerpt_aware_reversal_overlap(
1521 &case.edit_history_diffs,
1522 case.excerpt_content,
1523 case.excerpt_start_row,
1524 case.predicted_content,
1525 );
1526 assert_eq!(
1527 overlap.chars_reversing_user_edits, case.expected_reversal_chars,
1528 "Test '{}': expected {} reversal chars, got {}",
1529 case.name, case.expected_reversal_chars, overlap.chars_reversing_user_edits
1530 );
1531 assert_eq!(
1532 overlap.total_chars_in_prediction, case.expected_total_chars,
1533 "Test '{}': expected {} total chars, got {}",
1534 case.name, case.expected_total_chars, overlap.total_chars_in_prediction
1535 );
1536 }
1537 }
1538
1539 #[test]
1540 fn test_lenient_diff_application() {
1541 struct Case {
1542 name: &'static str,
1543 diff: &'static str,
1544 content: &'static str,
1545 expected_result: &'static str,
1546 }
1547
1548 let cases = [
1549 Case {
1550 name: "hunk_context_not_found_skipped",
1551 diff: indoc! {"
1552 @@ -1,3 +1,4 @@
1553 context_not_in_content
1554 +added_line
1555 more_context
1556 final_context
1557 "},
1558 content: indoc! {"
1559 completely
1560 different
1561 content
1562 "},
1563 expected_result: indoc! {"
1564 completely
1565 different
1566 content
1567 "},
1568 },
1569 Case {
1570 name: "hunk_context_found_applied",
1571 diff: indoc! {"
1572 @@ -1,3 +1,4 @@
1573 line1
1574 +inserted
1575 line2
1576 line3
1577 "},
1578 content: indoc! {"
1579 line1
1580 line2
1581 line3
1582 "},
1583 expected_result: indoc! {"
1584 line1
1585 inserted
1586 line2
1587 line3
1588 "},
1589 },
1590 Case {
1591 name: "multiple_hunks_partial_match",
1592 diff: indoc! {"
1593 @@ -1,2 +1,3 @@
1594 not_found
1595 +skipped
1596 also_not_found
1597 @@ -5,2 +6,3 @@
1598 line5
1599 +applied
1600 line6
1601 "},
1602 content: indoc! {"
1603 line1
1604 line2
1605 line3
1606 line4
1607 line5
1608 line6
1609 "},
1610 expected_result: indoc! {"
1611 line1
1612 line2
1613 line3
1614 line4
1615 line5
1616 applied
1617 line6
1618 "},
1619 },
1620 Case {
1621 name: "empty_diff",
1622 diff: "",
1623 content: indoc! {"
1624 unchanged
1625 content
1626 "},
1627 expected_result: indoc! {"
1628 unchanged
1629 content
1630 "},
1631 },
1632 ];
1633
1634 for case in &cases {
1635 let result = apply_diff_to_string_lenient(case.diff, case.content);
1636 assert_eq!(
1637 result, case.expected_result,
1638 "Test '{}': expected:\n{}\ngot:\n{}",
1639 case.name, case.expected_result, result
1640 );
1641 }
1642 }
1643
1644 #[test]
1645 fn test_unicode_reversal_overlap() {
1646 struct Case {
1647 name: &'static str,
1648 original: &'static str,
1649 current: &'static str,
1650 predicted: &'static str,
1651 expected_reversal_chars: usize,
1652 expected_total_chars: usize,
1653 }
1654
1655 let cases = [
1656 Case {
1657 name: "unicode_extension_cjk",
1658 original: "",
1659 current: "日", // 1 char
1660 predicted: "日本語", // 3 chars, adds 2 chars
1661 expected_reversal_chars: 0,
1662 expected_total_chars: 2, // "本語" = 2 chars added
1663 },
1664 Case {
1665 name: "unicode_extension_emoji",
1666 original: "",
1667 current: "🎉", // 1 char
1668 predicted: "🎉🎊🎈", // 3 chars, adds 2 chars
1669 expected_reversal_chars: 0,
1670 expected_total_chars: 2, // "🎊🎈" = 2 chars added
1671 },
1672 Case {
1673 name: "unicode_deletion_restored",
1674 original: "héllo wörld", // 11 chars
1675 current: "héllo", // 5 chars
1676 predicted: "héllo wörld", // restores " wörld" = 6 chars
1677 expected_reversal_chars: 6, // LCS(" wörld", " wörld") = 6 chars
1678 expected_total_chars: 6,
1679 },
1680 Case {
1681 name: "unicode_addition_reversed",
1682 original: "café", // 4 chars
1683 current: "café latté", // 10 chars, added " latté" = 6 chars
1684 predicted: "café", // removes " latté"
1685 expected_reversal_chars: 6, // 6 chars removed
1686 expected_total_chars: 6,
1687 },
1688 Case {
1689 name: "mixed_ascii_unicode",
1690 original: "",
1691 current: "test日本", // 6 chars
1692 predicted: "test日本語です", // 9 chars
1693 expected_reversal_chars: 0,
1694 expected_total_chars: 3, // 3 new chars after subsequence normalization
1695 },
1696 Case {
1697 name: "unicode_replacement_not_subsequence",
1698 original: "",
1699 current: "日本", // 2 chars
1700 predicted: "中国", // 2 chars, different
1701 expected_reversal_chars: 2, // removes "日本" = 2 chars
1702 expected_total_chars: 4, // 2 removed + 2 added
1703 },
1704 ];
1705
1706 for case in &cases {
1707 let overlap = compute_reversal_overlap(case.original, case.current, case.predicted);
1708 assert_eq!(
1709 overlap.chars_reversing_user_edits, case.expected_reversal_chars,
1710 "Test '{}': expected {} reversal chars, got {}",
1711 case.name, case.expected_reversal_chars, overlap.chars_reversing_user_edits
1712 );
1713 assert_eq!(
1714 overlap.total_chars_in_prediction, case.expected_total_chars,
1715 "Test '{}': expected {} total chars, got {}",
1716 case.name, case.expected_total_chars, overlap.total_chars_in_prediction
1717 );
1718 }
1719 }
1720
1721 #[test]
1722 fn test_is_subsequence() {
1723 assert!(is_subsequence("", "anything"));
1724 assert!(is_subsequence("", ""));
1725 assert!(is_subsequence("abc", "abc"));
1726 assert!(is_subsequence("abc", "aXbXc"));
1727 assert!(is_subsequence("ac", "abc"));
1728 assert!(!is_subsequence("abc", "ab"));
1729 assert!(!is_subsequence("abc", "cba"));
1730 assert!(!is_subsequence("abc", ""));
1731 assert!(is_subsequence("日本", "日X本Y語"));
1732 assert!(!is_subsequence("日本語", "日本"));
1733 }
1734
1735 #[test]
1736 fn test_compute_lcs_length() {
1737 assert_eq!(compute_lcs_length("", ""), 0);
1738 assert_eq!(compute_lcs_length("abc", ""), 0);
1739 assert_eq!(compute_lcs_length("", "abc"), 0);
1740 assert_eq!(compute_lcs_length("abc", "abc"), 3);
1741 assert_eq!(compute_lcs_length("abc", "def"), 0);
1742 assert_eq!(compute_lcs_length("abcdef", "ace"), 3);
1743 assert_eq!(compute_lcs_length("AGGTAB", "GXTXAYB"), 4);
1744 assert_eq!(compute_lcs_length("日本語", "日語"), 2);
1745 }
1746
1747 #[test]
1748 fn test_compute_prediction_reversal_ratio_full_file() {
1749 let prompt_inputs = ExamplePromptInputs {
1750 content: indoc! {"
1751 line1
1752 user_added
1753 line2
1754 "}
1755 .to_string(),
1756 cursor_row: 0,
1757 cursor_column: 0,
1758 cursor_offset: 0,
1759 edit_history: vec![Arc::new(zeta_prompt::Event::BufferChange {
1760 path: Arc::from(Path::new("src/test.rs")),
1761 old_path: Arc::from(Path::new("src/test.rs")),
1762 diff: indoc! {"
1763 @@ -1,2 +1,3 @@
1764 line1
1765 +user_added
1766 line2
1767 "}
1768 .into(),
1769 predicted: false,
1770 in_open_source_repo: false,
1771 })],
1772 excerpt_start_row: None,
1773 related_files: None,
1774 };
1775
1776 let predicted = indoc! {"
1777 line1
1778 line2
1779 "};
1780 let ratio =
1781 compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
1782
1783 assert!(
1784 ratio > 0.9,
1785 "Expected high reversal ratio when prediction removes user addition, got {}",
1786 ratio
1787 );
1788 }
1789
1790 #[test]
1791 fn test_compute_prediction_reversal_ratio_with_excerpt() {
1792 let prompt_inputs = ExamplePromptInputs {
1793 content: indoc! {"
1794 line10
1795 user_added
1796 line11
1797 "}
1798 .to_string(),
1799 cursor_row: 0,
1800 cursor_column: 0,
1801 cursor_offset: 0,
1802 edit_history: vec![Arc::new(zeta_prompt::Event::BufferChange {
1803 path: Arc::from(Path::new("src/test.rs")),
1804 old_path: Arc::from(Path::new("src/test.rs")),
1805 diff: indoc! {"
1806 @@ -10,2 +10,3 @@
1807 line10
1808 +user_added
1809 line11
1810 "}
1811 .into(),
1812 predicted: false,
1813 in_open_source_repo: false,
1814 })],
1815 excerpt_start_row: Some(10),
1816 related_files: None,
1817 };
1818
1819 let predicted = indoc! {"
1820 line10
1821 line11
1822 "};
1823 let ratio =
1824 compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
1825
1826 assert!(
1827 ratio > 0.9,
1828 "Expected high reversal ratio for excerpt-aware computation, got {}",
1829 ratio
1830 );
1831 }
1832
1833 #[test]
1834 fn test_compute_prediction_reversal_ratio_no_history() {
1835 let prompt_inputs = ExamplePromptInputs {
1836 content: indoc! {"
1837 original content
1838 "}
1839 .to_string(),
1840 cursor_row: 0,
1841 cursor_column: 0,
1842 cursor_offset: 0,
1843 edit_history: vec![],
1844 excerpt_start_row: None,
1845 related_files: None,
1846 };
1847
1848 let predicted = indoc! {"
1849 completely different
1850 "};
1851 let ratio =
1852 compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
1853
1854 assert_eq!(
1855 ratio, 0.0,
1856 "Expected zero reversal ratio with no edit history"
1857 );
1858 }
1859
1860 #[test]
1861 fn test_compute_prediction_reversal_ratio_path_filtering() {
1862 let prompt_inputs = ExamplePromptInputs {
1863 content: indoc! {"
1864 line1
1865 user_added
1866 line2
1867 "}
1868 .to_string(),
1869 cursor_row: 0,
1870 cursor_column: 0,
1871 cursor_offset: 0,
1872 edit_history: vec![Arc::new(zeta_prompt::Event::BufferChange {
1873 path: Arc::from(Path::new("src/other.rs")),
1874 old_path: Arc::from(Path::new("src/other.rs")),
1875 diff: indoc! {"
1876 @@ -1,2 +1,3 @@
1877 line1
1878 +user_added
1879 line2
1880 "}
1881 .into(),
1882 predicted: false,
1883 in_open_source_repo: false,
1884 })],
1885 excerpt_start_row: None,
1886 related_files: None,
1887 };
1888
1889 let predicted = indoc! {"
1890 line1
1891 line2
1892 "};
1893 let ratio =
1894 compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
1895
1896 assert_eq!(
1897 ratio, 0.0,
1898 "Expected zero reversal when edit history is for different file"
1899 );
1900 }
1901
1902 #[test]
1903 fn test_compute_prediction_reversal_ratio_lenient_fallback() {
1904 let prompt_inputs = ExamplePromptInputs {
1905 content: indoc! {"
1906 actual_line1
1907 user_added
1908 actual_line2
1909 "}
1910 .to_string(),
1911 cursor_row: 0,
1912 cursor_column: 0,
1913 cursor_offset: 0,
1914 edit_history: vec![Arc::new(zeta_prompt::Event::BufferChange {
1915 path: Arc::from(Path::new("src/test.rs")),
1916 old_path: Arc::from(Path::new("src/test.rs")),
1917 diff: indoc! {"
1918 @@ -1,2 +1,3 @@
1919 wrong_context
1920 +user_added
1921 more_wrong
1922 "}
1923 .into(),
1924 predicted: false,
1925 in_open_source_repo: false,
1926 })],
1927 excerpt_start_row: None,
1928 related_files: None,
1929 };
1930
1931 let predicted = indoc! {"
1932 actual_line1
1933 actual_line2
1934 "};
1935 let ratio =
1936 compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
1937
1938 assert!(
1939 ratio >= 0.0 && ratio <= 1.0,
1940 "Ratio should be valid even with lenient fallback, got {}",
1941 ratio
1942 );
1943 }
1944
1945 #[test]
1946 fn test_excerpt_aware_reversal_error_recovery() {
1947 let diffs = vec![indoc! {"
1948 @@ -1,2 +1,3 @@
1949 nonexistent_context
1950 +added
1951 more_nonexistent
1952 "}];
1953 let excerpt_content = indoc! {"
1954 completely
1955 different
1956 content
1957 "};
1958 let predicted_content = indoc! {"
1959 completely
1960 modified
1961 content
1962 "};
1963
1964 let overlap =
1965 compute_excerpt_aware_reversal_overlap(&diffs, excerpt_content, 0, predicted_content);
1966
1967 assert!(
1968 overlap.ratio() >= 0.0 && overlap.ratio() <= 1.0,
1969 "Should handle failed diff application gracefully"
1970 );
1971 }
1972
1973 #[test]
1974 fn test_multiple_sequential_diffs() {
1975 let prompt_inputs = ExamplePromptInputs {
1976 content: indoc! {"
1977 line1
1978 first_add
1979 second_add
1980 line2
1981 "}
1982 .to_string(),
1983 cursor_row: 0,
1984 cursor_column: 0,
1985 cursor_offset: 0,
1986 edit_history: vec![
1987 Arc::new(zeta_prompt::Event::BufferChange {
1988 path: Arc::from(Path::new("src/test.rs")),
1989 old_path: Arc::from(Path::new("src/test.rs")),
1990 diff: indoc! {"
1991 @@ -1,2 +1,3 @@
1992 line1
1993 +first_add
1994 line2
1995 "}
1996 .into(),
1997 predicted: false,
1998 in_open_source_repo: false,
1999 }),
2000 Arc::new(zeta_prompt::Event::BufferChange {
2001 path: Arc::from(Path::new("src/test.rs")),
2002 old_path: Arc::from(Path::new("src/test.rs")),
2003 diff: indoc! {"
2004 @@ -2,2 +2,3 @@
2005 first_add
2006 +second_add
2007 line2
2008 "}
2009 .into(),
2010 predicted: false,
2011 in_open_source_repo: false,
2012 }),
2013 ],
2014 excerpt_start_row: None,
2015 related_files: None,
2016 };
2017
2018 let predicted = indoc! {"
2019 line1
2020 line2
2021 "};
2022 let ratio =
2023 compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
2024
2025 assert!(
2026 ratio > 0.9,
2027 "Expected high reversal ratio when reversing multiple sequential edits, got {}",
2028 ratio
2029 );
2030 }
2031}