@@ -7,7 +7,310 @@ use language::text_diff;
use crate::example::ExamplePromptInputs;
-pub fn reverse_diff(diff: &str) -> String {
+fn apply_diff_to_string_lenient(diff_str: &str, text: &str) -> String {
+ let hunks = parse_diff_hunks(diff_str);
+ let mut result = text.to_string();
+
+ for hunk in hunks {
+ let hunk_diff = format!("--- a/file\n+++ b/file\n{}", format_hunk(&hunk));
+ if let Ok(updated) = apply_diff_to_string(&hunk_diff, &result) {
+ result = updated;
+ }
+ }
+
+ result
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct ParsedHunk {
+ old_start: u32,
+ old_count: u32,
+ new_start: u32,
+ new_count: u32,
+ lines: Vec<HunkLine>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum HunkLine {
+ Context(String),
+ Addition(String),
+ Deletion(String),
+}
+
+fn parse_hunk_header(line: &str) -> Option<(u32, u32, u32, u32)> {
+ let line = line.strip_prefix("@@ -")?;
+ let (old_part, rest) = line.split_once(' ')?;
+ let rest = rest.strip_prefix('+')?;
+ let (new_part, _) = rest.split_once(" @@")?;
+
+ let (old_start, old_count) = if let Some((start, count)) = old_part.split_once(',') {
+ (start.parse().ok()?, count.parse().ok()?)
+ } else {
+ (old_part.parse().ok()?, 1)
+ };
+
+ let (new_start, new_count) = if let Some((start, count)) = new_part.split_once(',') {
+ (start.parse().ok()?, count.parse().ok()?)
+ } else {
+ (new_part.parse().ok()?, 1)
+ };
+
+ Some((old_start, old_count, new_start, new_count))
+}
+
+fn parse_diff_hunks(diff: &str) -> Vec<ParsedHunk> {
+ let mut hunks = Vec::new();
+ let mut current_hunk: Option<ParsedHunk> = None;
+
+ for line in diff.lines() {
+ if let Some((old_start, old_count, new_start, new_count)) = parse_hunk_header(line) {
+ if let Some(hunk) = current_hunk.take() {
+ hunks.push(hunk);
+ }
+ current_hunk = Some(ParsedHunk {
+ old_start,
+ old_count,
+ new_start,
+ new_count,
+ lines: Vec::new(),
+ });
+ } else if let Some(ref mut hunk) = current_hunk {
+ if let Some(stripped) = line.strip_prefix('+') {
+ hunk.lines.push(HunkLine::Addition(stripped.to_string()));
+ } else if let Some(stripped) = line.strip_prefix('-') {
+ hunk.lines.push(HunkLine::Deletion(stripped.to_string()));
+ } else if let Some(stripped) = line.strip_prefix(' ') {
+ hunk.lines.push(HunkLine::Context(stripped.to_string()));
+ } else if line.is_empty() {
+ hunk.lines.push(HunkLine::Context(String::new()));
+ }
+ }
+ }
+
+ if let Some(hunk) = current_hunk {
+ hunks.push(hunk);
+ }
+
+ hunks
+}
+
+fn format_hunk(hunk: &ParsedHunk) -> String {
+ let mut result = format!(
+ "@@ -{},{} +{},{} @@\n",
+ hunk.old_start, hunk.old_count, hunk.new_start, hunk.new_count
+ );
+ for line in &hunk.lines {
+ match line {
+ HunkLine::Context(text) => {
+ result.push(' ');
+ result.push_str(text);
+ result.push('\n');
+ }
+ HunkLine::Addition(text) => {
+ result.push('+');
+ result.push_str(text);
+ result.push('\n');
+ }
+ HunkLine::Deletion(text) => {
+ result.push('-');
+ result.push_str(text);
+ result.push('\n');
+ }
+ }
+ }
+ result
+}
+
+fn filter_diff_hunks_by_excerpt(
+ diff: &str,
+ excerpt_start_row: u32,
+ excerpt_row_count: u32,
+) -> (String, i32) {
+ let hunks = parse_diff_hunks(diff);
+ let excerpt_start_0based = excerpt_start_row;
+ let excerpt_end_0based = excerpt_start_row + excerpt_row_count;
+
+ let mut filtered_hunks = Vec::new();
+ let mut cumulative_line_offset: i32 = 0;
+
+ for hunk in hunks {
+ let hunk_start_0based = hunk.new_start.saturating_sub(1);
+ let hunk_end_0based = hunk_start_0based + hunk.new_count;
+
+ let additions: i32 = hunk
+ .lines
+ .iter()
+ .filter(|l| matches!(l, HunkLine::Addition(_)))
+ .count() as i32;
+ let deletions: i32 = hunk
+ .lines
+ .iter()
+ .filter(|l| matches!(l, HunkLine::Deletion(_)))
+ .count() as i32;
+ let hunk_line_delta = additions - deletions;
+
+ if hunk_end_0based <= excerpt_start_0based {
+ cumulative_line_offset += hunk_line_delta;
+ continue;
+ }
+
+ if hunk_start_0based >= excerpt_end_0based {
+ continue;
+ }
+
+ let mut filtered_lines = Vec::new();
+ let mut current_row_0based = hunk_start_0based;
+ let mut filtered_old_count = 0u32;
+ let mut filtered_new_count = 0u32;
+ let mut first_included_row: Option<u32> = None;
+
+ for line in &hunk.lines {
+ match line {
+ HunkLine::Context(text) => {
+ if current_row_0based >= excerpt_start_0based
+ && current_row_0based < excerpt_end_0based
+ {
+ if first_included_row.is_none() {
+ first_included_row = Some(current_row_0based);
+ }
+ filtered_lines.push(HunkLine::Context(text.clone()));
+ filtered_old_count += 1;
+ filtered_new_count += 1;
+ }
+ current_row_0based += 1;
+ }
+ HunkLine::Addition(text) => {
+ if current_row_0based >= excerpt_start_0based
+ && current_row_0based < excerpt_end_0based
+ {
+ if first_included_row.is_none() {
+ first_included_row = Some(current_row_0based);
+ }
+ filtered_lines.push(HunkLine::Addition(text.clone()));
+ filtered_new_count += 1;
+ }
+ current_row_0based += 1;
+ }
+ HunkLine::Deletion(text) => {
+ if current_row_0based >= excerpt_start_0based
+ && current_row_0based < excerpt_end_0based
+ {
+ if first_included_row.is_none() {
+ first_included_row = Some(current_row_0based);
+ }
+ filtered_lines.push(HunkLine::Deletion(text.clone()));
+ filtered_old_count += 1;
+ }
+ }
+ }
+ }
+
+ if !filtered_lines.is_empty() {
+ let first_row = first_included_row.unwrap_or(excerpt_start_0based);
+ let new_start_1based = (first_row - excerpt_start_0based) + 1;
+
+ filtered_hunks.push(ParsedHunk {
+ old_start: new_start_1based,
+ old_count: filtered_old_count,
+ new_start: new_start_1based,
+ new_count: filtered_new_count,
+ lines: filtered_lines,
+ });
+ }
+
+ cumulative_line_offset += hunk_line_delta;
+ }
+
+ let mut result = String::new();
+ for hunk in &filtered_hunks {
+ result.push_str(&format_hunk(hunk));
+ }
+
+ (result, cumulative_line_offset)
+}
+
+fn compute_excerpt_aware_reversal_overlap(
+ edit_history_diffs: &[&str],
+ excerpt_content: &str,
+ excerpt_start_row: u32,
+ predicted_content: &str,
+) -> ReversalOverlap {
+ let mut current_content = excerpt_content.to_string();
+ let mut current_excerpt_start_row = excerpt_start_row;
+
+ for diff in edit_history_diffs.iter().rev() {
+ if diff.is_empty() {
+ continue;
+ }
+
+ let current_row_count = current_content.lines().count() as u32;
+ let (filtered_diff, _line_offset) =
+ filter_diff_hunks_by_excerpt(diff, current_excerpt_start_row, current_row_count.max(1));
+
+ if filtered_diff.is_empty() {
+ let hunks = parse_diff_hunks(diff);
+ for hunk in hunks {
+ let hunk_end = hunk.new_start.saturating_sub(1) + hunk.new_count;
+ if hunk_end <= current_excerpt_start_row {
+ let additions: u32 = hunk
+ .lines
+ .iter()
+ .filter(|l| matches!(l, HunkLine::Addition(_)))
+ .count() as u32;
+ let deletions: u32 = hunk
+ .lines
+ .iter()
+ .filter(|l| matches!(l, HunkLine::Deletion(_)))
+ .count() as u32;
+ if additions >= deletions {
+ current_excerpt_start_row =
+ current_excerpt_start_row.saturating_sub(additions - deletions);
+ } else {
+ current_excerpt_start_row += deletions - additions;
+ }
+ }
+ }
+ continue;
+ }
+
+ let reversed = reverse_diff(&format!("--- a/file\n+++ b/file\n{}", filtered_diff));
+ match apply_diff_to_string(&reversed, ¤t_content) {
+ Ok(updated) => {
+ current_content = updated;
+ }
+ Err(_) => {
+ continue;
+ }
+ }
+
+ let hunks = parse_diff_hunks(diff);
+ for hunk in hunks {
+ let hunk_end = hunk.new_start.saturating_sub(1) + hunk.new_count;
+ if hunk_end <= current_excerpt_start_row {
+ let additions: u32 = hunk
+ .lines
+ .iter()
+ .filter(|l| matches!(l, HunkLine::Addition(_)))
+ .count() as u32;
+ let deletions: u32 = hunk
+ .lines
+ .iter()
+ .filter(|l| matches!(l, HunkLine::Deletion(_)))
+ .count() as u32;
+ if additions >= deletions {
+ current_excerpt_start_row =
+ current_excerpt_start_row.saturating_sub(additions - deletions);
+ } else {
+ current_excerpt_start_row += deletions - additions;
+ }
+ }
+ }
+ }
+
+ compute_reversal_overlap(¤t_content, excerpt_content, predicted_content)
+}
+
+fn reverse_diff(diff: &str) -> String {
let mut result: String = diff
.lines()
.map(|line| {
@@ -145,17 +448,20 @@ fn normalize_extension_edits(edits: Vec<GranularEdit>) -> Vec<GranularEdit> {
}
if is_subsequence(&edit.old_text, &edit.new_text) {
- let inserted_len = edit.new_text.len() - edit.old_text.len();
+ let inserted_char_count =
+ edit.new_text.chars().count() - edit.old_text.chars().count();
GranularEdit {
range: edit.range.start..edit.range.start,
old_text: String::new(),
- new_text: edit.new_text.chars().take(inserted_len).collect(),
+ new_text: edit.new_text.chars().take(inserted_char_count).collect(),
}
} else if is_subsequence(&edit.new_text, &edit.old_text) {
- let deleted_len = edit.old_text.len() - edit.new_text.len();
+ let deleted_char_count =
+ edit.old_text.chars().count() - edit.new_text.chars().count();
+ let deleted_text: String = edit.old_text.chars().take(deleted_char_count).collect();
GranularEdit {
- range: edit.range.start..edit.range.start + deleted_len,
- old_text: edit.old_text.chars().take(deleted_len).collect(),
+ range: edit.range.start..edit.range.start + deleted_text.len(),
+ old_text: deleted_text,
new_text: String::new(),
}
} else {
@@ -185,7 +491,7 @@ fn compute_reversal_overlap(
let total_chars_in_prediction: usize = prediction_edits
.iter()
- .map(|e| e.new_text.len() + e.old_text.len())
+ .map(|e| e.new_text.chars().count() + e.old_text.chars().count())
.sum();
ReversalOverlap {
@@ -212,7 +518,10 @@ fn compute_reversed_additions(
.min(history_addition.range_in_current.end);
if overlap_start < overlap_end {
- reversed_chars += overlap_end - overlap_start;
+ let relative_start = overlap_start - pred_edit.range.start;
+ let relative_end = overlap_end - pred_edit.range.start;
+ let overlap_text = &pred_edit.old_text[relative_start..relative_end];
+ reversed_chars += overlap_text.chars().count();
}
}
}
@@ -271,7 +580,7 @@ fn compute_lcs_length(a: &str, b: &str) -> usize {
prev[n]
}
-pub fn filter_edit_history_by_path<'a>(
+fn filter_edit_history_by_path<'a>(
edit_history: &'a [Arc<zeta_prompt::Event>],
cursor_path: &std::path::Path,
) -> Vec<&'a zeta_prompt::Event> {
@@ -294,7 +603,7 @@ pub fn filter_edit_history_by_path<'a>(
.collect()
}
-pub fn extract_diff_from_event(event: &zeta_prompt::Event) -> &str {
+fn extract_diff_from_event(event: &zeta_prompt::Event) -> &str {
match event {
zeta_prompt::Event::BufferChange { diff, .. } => diff.as_str(),
}
@@ -310,6 +619,20 @@ pub fn compute_prediction_reversal_ratio(
let edit_history: &[Arc<zeta_prompt::Event>] = &prompt_inputs.edit_history;
let relevant_events = filter_edit_history_by_path(edit_history, cursor_path);
+ if let Some(excerpt_start_row) = prompt_inputs.excerpt_start_row {
+ let diffs: Vec<&str> = relevant_events
+ .iter()
+ .map(|e| extract_diff_from_event(e))
+ .collect();
+ let overlap = compute_excerpt_aware_reversal_overlap(
+ &diffs,
+ current_content,
+ excerpt_start_row,
+ predicted_content,
+ );
+ return overlap.ratio();
+ }
+
let mut original_content = current_content.to_string();
for event in relevant_events.into_iter().rev() {
let diff = extract_diff_from_event(event);
@@ -320,12 +643,8 @@ pub fn compute_prediction_reversal_ratio(
let with_headers = format!("--- a/file\n+++ b/file\n{}", reversed);
match apply_diff_to_string(&with_headers, &original_content) {
Ok(updated_content) => original_content = updated_content,
- Err(err) => {
- log::warn!(
- "Failed to reconstruct original content for reversal tracking: Failed to apply reversed diff: {:#}",
- err
- );
- return 0.0;
+ Err(_) => {
+ original_content = apply_diff_to_string_lenient(&reversed, &original_content);
}
}
}
@@ -338,6 +657,7 @@ pub fn compute_prediction_reversal_ratio(
mod tests {
use super::*;
use edit_prediction::udiff::apply_diff_to_string;
+ use indoc::indoc;
#[test]
fn test_reversal_overlap() {
@@ -353,17 +673,35 @@ mod tests {
let cases = [
Case {
name: "user_adds_line_prediction_removes_it",
- original: "a\nb\nc",
- current: "a\nnew line\nb\nc",
- predicted: "a\nb\nc",
+ original: indoc! {"
+ a
+ b
+ c"},
+ current: indoc! {"
+ a
+ new line
+ b
+ c"},
+ predicted: indoc! {"
+ a
+ b
+ c"},
expected_reversal_chars: 9,
expected_total_chars: 9,
},
Case {
name: "user_deletes_line_prediction_restores_it",
- original: "a\ndeleted\nb",
- current: "a\nb",
- predicted: "a\ndeleted\nb",
+ original: indoc! {"
+ a
+ deleted
+ b"},
+ current: indoc! {"
+ a
+ b"},
+ predicted: indoc! {"
+ a
+ deleted
+ b"},
expected_reversal_chars: 8,
expected_total_chars: 8,
},
@@ -385,9 +723,18 @@ mod tests {
},
Case {
name: "independent_edits_different_locations",
- original: "line1\nline2\nline3",
- current: "LINE1\nline2\nline3",
- predicted: "LINE1\nline2\nLINE3",
+ original: indoc! {"
+ line1
+ line2
+ line3"},
+ current: indoc! {"
+ LINE1
+ line2
+ line3"},
+ predicted: indoc! {"
+ LINE1
+ line2
+ LINE3"},
expected_reversal_chars: 0,
expected_total_chars: 10,
},
@@ -401,9 +748,18 @@ mod tests {
},
Case {
name: "user_replaces_text_prediction_reverses",
- original: "keep\ndelete_me\nkeep2",
- current: "keep\nadded\nkeep2",
- predicted: "keep\ndelete_me\nkeep2",
+ original: indoc! {"
+ keep
+ delete_me
+ keep2"},
+ current: indoc! {"
+ keep
+ added
+ keep2"},
+ predicted: indoc! {"
+ keep
+ delete_me
+ keep2"},
expected_reversal_chars: 14,
expected_total_chars: 14,
},
@@ -523,9 +879,13 @@ mod tests {
},
Case {
name: "infix insertion not reversal",
- original: "from my_project import Foo\n",
- current: "ifrom my_project import Foo\n",
- predicted: indoc::indoc! {"
+ original: indoc! {"
+ from my_project import Foo
+ "},
+ current: indoc! {"
+ ifrom my_project import Foo
+ "},
+ predicted: indoc! {"
import
from my_project import Foo
"},
@@ -544,9 +904,9 @@ mod tests {
name: "multiple insertions no reversal",
original: "print(\"Hello, World!\")",
current: "sys.(\"Hello, World!\")",
- predicted: "sys.stdout.write(\"Hello, World!\n\")",
+ predicted: "sys.stdout.write(\"Hello, World!\\n\")",
expected_reversal_chars: 0,
- expected_total_chars: 13,
+ expected_total_chars: 14,
},
];
@@ -567,14 +927,14 @@ mod tests {
#[test]
fn test_reverse_diff() {
- let forward_diff = "\
---- a/file.rs
-+++ b/file.rs
-@@ -1,3 +1,4 @@
- fn main() {
-+ let x = 42;
- println!(\"hello\");
-}";
+ let forward_diff = indoc! {"
+ --- a/file.rs
+ +++ b/file.rs
+ @@ -1,3 +1,4 @@
+ fn main() {
+ + let x = 42;
+ println!(\"hello\");
+ }"};
let reversed = reverse_diff(forward_diff);
@@ -599,8 +959,16 @@ mod tests {
#[test]
fn test_reverse_diff_roundtrip() {
// Applying a diff and then its reverse should get back to original
- let original = "first line\nhello world\nlast line\n";
- let modified = "first line\nhello beautiful world\nlast line\n";
+ let original = indoc! {"
+ first line
+ hello world
+ last line
+ "};
+ let modified = indoc! {"
+ first line
+ hello beautiful world
+ last line
+ "};
// unified_diff doesn't include file headers, but apply_diff_to_string needs them
let diff_body = language::unified_diff(original, modified);
@@ -625,21 +993,33 @@ mod tests {
Arc::new(zeta_prompt::Event::BufferChange {
path: Arc::from(Path::new("myrepo/src/file.rs")),
old_path: Arc::from(Path::new("myrepo/src/file.rs")),
- diff: "@@ -1 +1 @@\n-old\n+new".into(),
+ diff: indoc! {"
+ @@ -1 +1 @@
+ -old
+ +new"}
+ .into(),
predicted: false,
in_open_source_repo: true,
}),
Arc::new(zeta_prompt::Event::BufferChange {
path: Arc::from(Path::new("myrepo/other.rs")),
old_path: Arc::from(Path::new("myrepo/other.rs")),
- diff: "@@ -1 +1 @@\n-a\n+b".into(),
+ diff: indoc! {"
+ @@ -1 +1 @@
+ -a
+ +b"}
+ .into(),
predicted: false,
in_open_source_repo: true,
}),
Arc::new(zeta_prompt::Event::BufferChange {
path: Arc::from(Path::new("src/file.rs")),
old_path: Arc::from(Path::new("src/file.rs")),
- diff: "@@ -1 +1 @@\n-x\n+y".into(),
+ diff: indoc! {"
+ @@ -1 +1 @@
+ -x
+ +y"}
+ .into(),
predicted: false,
in_open_source_repo: true,
}),
@@ -673,18 +1053,979 @@ mod tests {
#[test]
fn test_reverse_diff_preserves_trailing_newline() {
- let diff_with_trailing_newline = "--- a/file\n+++ b/file\n@@ -1 +1 @@\n-old\n+new\n";
+ let diff_with_trailing_newline = indoc! {"
+ --- a/file
+ +++ b/file
+ @@ -1 +1 @@
+ -old
+ +new
+ "};
let reversed = reverse_diff(diff_with_trailing_newline);
assert!(
reversed.ends_with('\n'),
"Reversed diff should preserve trailing newline"
);
- let diff_without_trailing_newline = "--- a/file\n+++ b/file\n@@ -1 +1 @@\n-old\n+new";
+ let diff_without_trailing_newline = indoc! {"
+ --- a/file
+ +++ b/file
+ @@ -1 +1 @@
+ -old
+ +new"};
let reversed = reverse_diff(diff_without_trailing_newline);
assert!(
!reversed.ends_with('\n'),
"Reversed diff should not add trailing newline if original didn't have one"
);
}
+
+ #[test]
+ fn test_filter_hunks_by_excerpt_region() {
+ struct Case {
+ name: &'static str,
+ diff: &'static str,
+ excerpt_start_row: u32,
+ excerpt_row_count: u32,
+ expected_filtered_diff: &'static str,
+ expected_line_offset: i32,
+ }
+
+ let cases = [
+ Case {
+ name: "hunk_entirely_before_excerpt",
+ diff: indoc! {"
+ @@ -1,3 +1,4 @@
+ line1
+ +inserted
+ line2
+ line3
+ "},
+ excerpt_start_row: 10,
+ excerpt_row_count: 5,
+ expected_filtered_diff: "",
+ expected_line_offset: 1,
+ },
+ Case {
+ name: "hunk_entirely_inside_excerpt",
+ diff: indoc! {"
+ @@ -12,3 +12,4 @@
+ line12
+ +inserted
+ line13
+ line14
+ "},
+ excerpt_start_row: 10,
+ excerpt_row_count: 10,
+ expected_filtered_diff: indoc! {"
+ @@ -2,3 +2,4 @@
+ line12
+ +inserted
+ line13
+ line14
+ "},
+ expected_line_offset: 1,
+ },
+ Case {
+ name: "hunk_entirely_after_excerpt",
+ diff: indoc! {"
+ @@ -50,3 +50,4 @@
+ line50
+ +inserted
+ line51
+ line52
+ "},
+ excerpt_start_row: 10,
+ excerpt_row_count: 5,
+ expected_filtered_diff: "",
+ expected_line_offset: 0,
+ },
+ Case {
+ name: "hunk_straddles_excerpt_start",
+ diff: indoc! {"
+ @@ -8,5 +8,6 @@
+ line8
+ line9
+ +inserted
+ line10
+ line11
+ line12
+ "},
+ excerpt_start_row: 10,
+ excerpt_row_count: 10,
+ expected_filtered_diff: indoc! {"
+ @@ -1,3 +1,3 @@
+ line10
+ line11
+ line12
+ "},
+ expected_line_offset: 1,
+ },
+ Case {
+ name: "hunk_straddles_excerpt_end",
+ diff: indoc! {"
+ @@ -18,5 +18,6 @@
+ line18
+ line19
+ +inserted
+ line20
+ line21
+ line22
+ "},
+ excerpt_start_row: 10,
+ excerpt_row_count: 10,
+ expected_filtered_diff: indoc! {"
+ @@ -8,2 +8,3 @@
+ line18
+ line19
+ +inserted
+ "},
+ expected_line_offset: 1,
+ },
+ Case {
+ name: "multiple_hunks_mixed",
+ diff: indoc! {"
+ @@ -1,2 +1,3 @@
+ line1
+ +before_excerpt
+ line2
+ @@ -12,2 +13,3 @@
+ line12
+ +inside_excerpt
+ line13
+ @@ -50,2 +52,3 @@
+ line50
+ +after_excerpt
+ line51
+ "},
+ excerpt_start_row: 10,
+ excerpt_row_count: 10,
+ expected_filtered_diff: indoc! {"
+ @@ -3,2 +3,3 @@
+ line12
+ +inside_excerpt
+ line13
+ "},
+ expected_line_offset: 2,
+ },
+ Case {
+ name: "deletion_before_excerpt",
+ diff: indoc! {"
+ @@ -1,4 +1,3 @@
+ line1
+ -deleted
+ line2
+ line3
+ "},
+ excerpt_start_row: 10,
+ excerpt_row_count: 5,
+ expected_filtered_diff: "",
+ expected_line_offset: -1,
+ },
+ Case {
+ name: "deletion_inside_excerpt",
+ diff: indoc! {"
+ @@ -12,4 +12,3 @@
+ line12
+ -deleted
+ line13
+ line14
+ "},
+ excerpt_start_row: 10,
+ excerpt_row_count: 10,
+ expected_filtered_diff: indoc! {"
+ @@ -2,4 +2,3 @@
+ line12
+ -deleted
+ line13
+ line14
+ "},
+ expected_line_offset: -1,
+ },
+ Case {
+ name: "empty_diff",
+ diff: "",
+ excerpt_start_row: 10,
+ excerpt_row_count: 5,
+ expected_filtered_diff: "",
+ expected_line_offset: 0,
+ },
+ Case {
+ name: "hunk_spans_entire_excerpt",
+ diff: indoc! {"
+ @@ -8,10 +8,12 @@
+ line8
+ line9
+ line10
+ line11
+ +inserted1
+ line12
+ line13
+ +inserted2
+ line14
+ line15
+ line16
+ line17
+ "},
+ excerpt_start_row: 10,
+ excerpt_row_count: 5,
+ expected_filtered_diff: indoc! {"
+ @@ -1,3 +1,5 @@
+ line11
+ +inserted1
+ line12
+ line13
+ +inserted2
+ "},
+ expected_line_offset: 2,
+ },
+ Case {
+ name: "replacement_inside_excerpt",
+ diff: indoc! {"
+ @@ -12,3 +12,3 @@
+ line12
+ -old_text
+ +new_text
+ line14
+ "},
+ excerpt_start_row: 10,
+ excerpt_row_count: 10,
+ expected_filtered_diff: indoc! {"
+ @@ -2,3 +2,3 @@
+ line12
+ -old_text
+ +new_text
+ line14
+ "},
+ expected_line_offset: 0,
+ },
+ ];
+
+ for case in &cases {
+ let (filtered, line_offset) = filter_diff_hunks_by_excerpt(
+ case.diff,
+ case.excerpt_start_row,
+ case.excerpt_row_count,
+ );
+ assert_eq!(
+ filtered, case.expected_filtered_diff,
+ "Test '{}': filtered diff mismatch.\nExpected:\n{}\nGot:\n{}",
+ case.name, case.expected_filtered_diff, filtered
+ );
+ assert_eq!(
+ line_offset, case.expected_line_offset,
+ "Test '{}': line offset mismatch. Expected {}, got {}",
+ case.name, case.expected_line_offset, line_offset
+ );
+ }
+ }
+
+ #[test]
+ fn test_excerpt_aware_reversal_tracking() {
+ struct Case {
+ name: &'static str,
+ edit_history_diffs: Vec<&'static str>,
+ excerpt_content: &'static str,
+ excerpt_start_row: u32,
+ predicted_content: &'static str,
+ expected_reversal_chars: usize,
+ expected_total_chars: usize,
+ }
+
+ let cases = [
+ Case {
+ name: "edit_outside_excerpt_no_reversal",
+ edit_history_diffs: vec![indoc! {"
+ @@ -1,2 +1,3 @@
+ line1
+ +added_outside
+ line2
+ "}],
+ excerpt_content: indoc! {"
+ line10
+ line11
+ line12
+ "},
+ excerpt_start_row: 10,
+ predicted_content: indoc! {"
+ line10
+ modified
+ line12
+ "},
+ expected_reversal_chars: 0,
+ expected_total_chars: 14,
+ },
+ Case {
+ name: "edit_inside_excerpt_with_reversal",
+ edit_history_diffs: vec![indoc! {"
+ @@ -10,3 +10,4 @@
+ line10
+ +user_added
+ line11
+ line12
+ "}],
+ excerpt_content: indoc! {"
+ line10
+ user_added
+ line11
+ line12
+ "},
+ excerpt_start_row: 10,
+ predicted_content: indoc! {"
+ line10
+ line11
+ line12
+ "},
+ expected_reversal_chars: 11,
+ expected_total_chars: 11,
+ },
+ Case {
+ name: "straddling_edit_partial_reversal",
+ edit_history_diffs: vec![indoc! {"
+ @@ -8,6 +8,8 @@
+ line8
+ line9
+ +before_excerpt
+ line10
+ +inside_excerpt
+ line11
+ line12
+ line13
+ "}],
+ excerpt_content: indoc! {"
+ line10
+ inside_excerpt
+ line11
+ line12
+ line13
+ "},
+ excerpt_start_row: 10,
+ predicted_content: indoc! {"
+ line10
+ line11
+ line12
+ line13
+ "},
+ expected_reversal_chars: 15,
+ expected_total_chars: 15,
+ },
+ Case {
+ name: "multiple_edits_mixed_locations",
+ edit_history_diffs: vec![
+ indoc! {"
+ @@ -1,2 +1,3 @@
+ line1
+ +outside1
+ line2
+ "},
+ indoc! {"
+ @@ -11,2 +12,3 @@
+ line11
+ +inside1
+ line12
+ "},
+ ],
+ excerpt_content: indoc! {"
+ line10
+ line11
+ inside1
+ line12
+ line13
+ "},
+ excerpt_start_row: 10,
+ predicted_content: indoc! {"
+ line10
+ line11
+ line12
+ line13
+ "},
+ expected_reversal_chars: 8,
+ expected_total_chars: 8,
+ },
+ Case {
+ name: "no_edit_history",
+ edit_history_diffs: vec![],
+ excerpt_content: indoc! {"
+ line10
+ line11
+ line12
+ "},
+ excerpt_start_row: 10,
+ predicted_content: indoc! {"
+ line10
+ modified
+ line12
+ "},
+ expected_reversal_chars: 0,
+ expected_total_chars: 14,
+ },
+ Case {
+ name: "edit_after_excerpt_no_effect",
+ edit_history_diffs: vec![indoc! {"
+ @@ -50,2 +50,3 @@
+ line50
+ +added_after
+ line51
+ "}],
+ excerpt_content: indoc! {"
+ line10
+ line11
+ line12
+ "},
+ excerpt_start_row: 10,
+ predicted_content: indoc! {"
+ line10
+ changed
+ line12
+ "},
+ expected_reversal_chars: 0,
+ expected_total_chars: 13,
+ },
+ Case {
+ name: "line_offset_tracking_across_hunks",
+ edit_history_diffs: vec![
+ indoc! {"
+ @@ -1,2 +1,4 @@
+ line1
+ +added1
+ +added2
+ line2
+ "},
+ indoc! {"
+ @@ -12,2 +14,3 @@
+ line12
+ +inside_after_offset
+ line13
+ "},
+ ],
+ excerpt_content: indoc! {"
+ line10
+ line11
+ line12
+ inside_after_offset
+ line13
+ "},
+ excerpt_start_row: 10,
+ predicted_content: indoc! {"
+ line10
+ line11
+ line12
+ line13
+ "},
+ expected_reversal_chars: 20,
+ expected_total_chars: 20,
+ },
+ ];
+
+ for case in &cases {
+ let overlap = compute_excerpt_aware_reversal_overlap(
+ &case.edit_history_diffs,
+ case.excerpt_content,
+ case.excerpt_start_row,
+ case.predicted_content,
+ );
+ assert_eq!(
+ overlap.chars_reversing_user_edits, case.expected_reversal_chars,
+ "Test '{}': expected {} reversal chars, got {}",
+ case.name, case.expected_reversal_chars, overlap.chars_reversing_user_edits
+ );
+ assert_eq!(
+ overlap.total_chars_in_prediction, case.expected_total_chars,
+ "Test '{}': expected {} total chars, got {}",
+ case.name, case.expected_total_chars, overlap.total_chars_in_prediction
+ );
+ }
+ }
+
+ #[test]
+ fn test_lenient_diff_application() {
+ struct Case {
+ name: &'static str,
+ diff: &'static str,
+ content: &'static str,
+ expected_result: &'static str,
+ }
+
+ let cases = [
+ Case {
+ name: "hunk_context_not_found_skipped",
+ diff: indoc! {"
+ @@ -1,3 +1,4 @@
+ context_not_in_content
+ +added_line
+ more_context
+ final_context
+ "},
+ content: indoc! {"
+ completely
+ different
+ content
+ "},
+ expected_result: indoc! {"
+ completely
+ different
+ content
+ "},
+ },
+ Case {
+ name: "hunk_context_found_applied",
+ diff: indoc! {"
+ @@ -1,3 +1,4 @@
+ line1
+ +inserted
+ line2
+ line3
+ "},
+ content: indoc! {"
+ line1
+ line2
+ line3
+ "},
+ expected_result: indoc! {"
+ line1
+ inserted
+ line2
+ line3
+ "},
+ },
+ Case {
+ name: "multiple_hunks_partial_match",
+ diff: indoc! {"
+ @@ -1,2 +1,3 @@
+ not_found
+ +skipped
+ also_not_found
+ @@ -5,2 +6,3 @@
+ line5
+ +applied
+ line6
+ "},
+ content: indoc! {"
+ line1
+ line2
+ line3
+ line4
+ line5
+ line6
+ "},
+ expected_result: indoc! {"
+ line1
+ line2
+ line3
+ line4
+ line5
+ applied
+ line6
+ "},
+ },
+ Case {
+ name: "empty_diff",
+ diff: "",
+ content: indoc! {"
+ unchanged
+ content
+ "},
+ expected_result: indoc! {"
+ unchanged
+ content
+ "},
+ },
+ ];
+
+ for case in &cases {
+ let result = apply_diff_to_string_lenient(case.diff, case.content);
+ assert_eq!(
+ result, case.expected_result,
+ "Test '{}': expected:\n{}\ngot:\n{}",
+ case.name, case.expected_result, result
+ );
+ }
+ }
+
+ #[test]
+ fn test_unicode_reversal_overlap() {
+ struct Case {
+ name: &'static str,
+ original: &'static str,
+ current: &'static str,
+ predicted: &'static str,
+ expected_reversal_chars: usize,
+ expected_total_chars: usize,
+ }
+
+ let cases = [
+ Case {
+ name: "unicode_extension_cjk",
+ original: "",
+ current: "日", // 1 char
+ predicted: "日本語", // 3 chars, adds 2 chars
+ expected_reversal_chars: 0,
+ expected_total_chars: 2, // "本語" = 2 chars added
+ },
+ Case {
+ name: "unicode_extension_emoji",
+ original: "",
+ current: "🎉", // 1 char
+ predicted: "🎉🎊🎈", // 3 chars, adds 2 chars
+ expected_reversal_chars: 0,
+ expected_total_chars: 2, // "🎊🎈" = 2 chars added
+ },
+ Case {
+ name: "unicode_deletion_restored",
+ original: "héllo wörld", // 11 chars
+ current: "héllo", // 5 chars
+ predicted: "héllo wörld", // restores " wörld" = 6 chars
+ expected_reversal_chars: 6, // LCS(" wörld", " wörld") = 6 chars
+ expected_total_chars: 6,
+ },
+ Case {
+ name: "unicode_addition_reversed",
+ original: "café", // 4 chars
+ current: "café latté", // 10 chars, added " latté" = 6 chars
+ predicted: "café", // removes " latté"
+ expected_reversal_chars: 6, // 6 chars removed
+ expected_total_chars: 6,
+ },
+ Case {
+ name: "mixed_ascii_unicode",
+ original: "",
+ current: "test日本", // 6 chars
+ predicted: "test日本語です", // 9 chars
+ expected_reversal_chars: 0,
+ expected_total_chars: 3, // 3 new chars after subsequence normalization
+ },
+ Case {
+ name: "unicode_replacement_not_subsequence",
+ original: "",
+ current: "日本", // 2 chars
+ predicted: "中国", // 2 chars, different
+ expected_reversal_chars: 2, // removes "日本" = 2 chars
+ expected_total_chars: 4, // 2 removed + 2 added
+ },
+ ];
+
+ for case in &cases {
+ let overlap = compute_reversal_overlap(case.original, case.current, case.predicted);
+ assert_eq!(
+ overlap.chars_reversing_user_edits, case.expected_reversal_chars,
+ "Test '{}': expected {} reversal chars, got {}",
+ case.name, case.expected_reversal_chars, overlap.chars_reversing_user_edits
+ );
+ assert_eq!(
+ overlap.total_chars_in_prediction, case.expected_total_chars,
+ "Test '{}': expected {} total chars, got {}",
+ case.name, case.expected_total_chars, overlap.total_chars_in_prediction
+ );
+ }
+ }
+
+ #[test]
+ fn test_is_subsequence() {
+ assert!(is_subsequence("", "anything"));
+ assert!(is_subsequence("", ""));
+ assert!(is_subsequence("abc", "abc"));
+ assert!(is_subsequence("abc", "aXbXc"));
+ assert!(is_subsequence("ac", "abc"));
+ assert!(!is_subsequence("abc", "ab"));
+ assert!(!is_subsequence("abc", "cba"));
+ assert!(!is_subsequence("abc", ""));
+ assert!(is_subsequence("日本", "日X本Y語"));
+ assert!(!is_subsequence("日本語", "日本"));
+ }
+
+ #[test]
+ fn test_compute_lcs_length() {
+ assert_eq!(compute_lcs_length("", ""), 0);
+ assert_eq!(compute_lcs_length("abc", ""), 0);
+ assert_eq!(compute_lcs_length("", "abc"), 0);
+ assert_eq!(compute_lcs_length("abc", "abc"), 3);
+ assert_eq!(compute_lcs_length("abc", "def"), 0);
+ assert_eq!(compute_lcs_length("abcdef", "ace"), 3);
+ assert_eq!(compute_lcs_length("AGGTAB", "GXTXAYB"), 4);
+ assert_eq!(compute_lcs_length("日本語", "日語"), 2);
+ }
+
+ #[test]
+ fn test_compute_prediction_reversal_ratio_full_file() {
+ let prompt_inputs = ExamplePromptInputs {
+ content: indoc! {"
+ line1
+ user_added
+ line2
+ "}
+ .to_string(),
+ cursor_row: 0,
+ cursor_column: 0,
+ cursor_offset: 0,
+ edit_history: vec![Arc::new(zeta_prompt::Event::BufferChange {
+ path: Arc::from(Path::new("src/test.rs")),
+ old_path: Arc::from(Path::new("src/test.rs")),
+ diff: indoc! {"
+ @@ -1,2 +1,3 @@
+ line1
+ +user_added
+ line2
+ "}
+ .into(),
+ predicted: false,
+ in_open_source_repo: false,
+ })],
+ excerpt_start_row: None,
+ related_files: None,
+ };
+
+ let predicted = indoc! {"
+ line1
+ line2
+ "};
+ let ratio =
+ compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
+
+ assert!(
+ ratio > 0.9,
+ "Expected high reversal ratio when prediction removes user addition, got {}",
+ ratio
+ );
+ }
+
+ #[test]
+ fn test_compute_prediction_reversal_ratio_with_excerpt() {
+ let prompt_inputs = ExamplePromptInputs {
+ content: indoc! {"
+ line10
+ user_added
+ line11
+ "}
+ .to_string(),
+ cursor_row: 0,
+ cursor_column: 0,
+ cursor_offset: 0,
+ edit_history: vec![Arc::new(zeta_prompt::Event::BufferChange {
+ path: Arc::from(Path::new("src/test.rs")),
+ old_path: Arc::from(Path::new("src/test.rs")),
+ diff: indoc! {"
+ @@ -10,2 +10,3 @@
+ line10
+ +user_added
+ line11
+ "}
+ .into(),
+ predicted: false,
+ in_open_source_repo: false,
+ })],
+ excerpt_start_row: Some(10),
+ related_files: None,
+ };
+
+ let predicted = indoc! {"
+ line10
+ line11
+ "};
+ let ratio =
+ compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
+
+ assert!(
+ ratio > 0.9,
+ "Expected high reversal ratio for excerpt-aware computation, got {}",
+ ratio
+ );
+ }
+
+ #[test]
+ fn test_compute_prediction_reversal_ratio_no_history() {
+ let prompt_inputs = ExamplePromptInputs {
+ content: indoc! {"
+ original content
+ "}
+ .to_string(),
+ cursor_row: 0,
+ cursor_column: 0,
+ cursor_offset: 0,
+ edit_history: vec![],
+ excerpt_start_row: None,
+ related_files: None,
+ };
+
+ let predicted = indoc! {"
+ completely different
+ "};
+ let ratio =
+ compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
+
+ assert_eq!(
+ ratio, 0.0,
+ "Expected zero reversal ratio with no edit history"
+ );
+ }
+
+ #[test]
+ fn test_compute_prediction_reversal_ratio_path_filtering() {
+ let prompt_inputs = ExamplePromptInputs {
+ content: indoc! {"
+ line1
+ user_added
+ line2
+ "}
+ .to_string(),
+ cursor_row: 0,
+ cursor_column: 0,
+ cursor_offset: 0,
+ edit_history: vec![Arc::new(zeta_prompt::Event::BufferChange {
+ path: Arc::from(Path::new("src/other.rs")),
+ old_path: Arc::from(Path::new("src/other.rs")),
+ diff: indoc! {"
+ @@ -1,2 +1,3 @@
+ line1
+ +user_added
+ line2
+ "}
+ .into(),
+ predicted: false,
+ in_open_source_repo: false,
+ })],
+ excerpt_start_row: None,
+ related_files: None,
+ };
+
+ let predicted = indoc! {"
+ line1
+ line2
+ "};
+ let ratio =
+ compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
+
+ assert_eq!(
+ ratio, 0.0,
+ "Expected zero reversal when edit history is for different file"
+ );
+ }
+
+ #[test]
+ fn test_compute_prediction_reversal_ratio_lenient_fallback() {
+ let prompt_inputs = ExamplePromptInputs {
+ content: indoc! {"
+ actual_line1
+ user_added
+ actual_line2
+ "}
+ .to_string(),
+ cursor_row: 0,
+ cursor_column: 0,
+ cursor_offset: 0,
+ edit_history: vec![Arc::new(zeta_prompt::Event::BufferChange {
+ path: Arc::from(Path::new("src/test.rs")),
+ old_path: Arc::from(Path::new("src/test.rs")),
+ diff: indoc! {"
+ @@ -1,2 +1,3 @@
+ wrong_context
+ +user_added
+ more_wrong
+ "}
+ .into(),
+ predicted: false,
+ in_open_source_repo: false,
+ })],
+ excerpt_start_row: None,
+ related_files: None,
+ };
+
+ let predicted = indoc! {"
+ actual_line1
+ actual_line2
+ "};
+ let ratio =
+ compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
+
+ assert!(
+ ratio >= 0.0 && ratio <= 1.0,
+ "Ratio should be valid even with lenient fallback, got {}",
+ ratio
+ );
+ }
+
+ #[test]
+ fn test_excerpt_aware_reversal_error_recovery() {
+ let diffs = vec![indoc! {"
+ @@ -1,2 +1,3 @@
+ nonexistent_context
+ +added
+ more_nonexistent
+ "}];
+ let excerpt_content = indoc! {"
+ completely
+ different
+ content
+ "};
+ let predicted_content = indoc! {"
+ completely
+ modified
+ content
+ "};
+
+ let overlap =
+ compute_excerpt_aware_reversal_overlap(&diffs, excerpt_content, 0, predicted_content);
+
+ assert!(
+ overlap.ratio() >= 0.0 && overlap.ratio() <= 1.0,
+ "Should handle failed diff application gracefully"
+ );
+ }
+
+ #[test]
+ fn test_multiple_sequential_diffs() {
+ let prompt_inputs = ExamplePromptInputs {
+ content: indoc! {"
+ line1
+ first_add
+ second_add
+ line2
+ "}
+ .to_string(),
+ cursor_row: 0,
+ cursor_column: 0,
+ cursor_offset: 0,
+ edit_history: vec![
+ Arc::new(zeta_prompt::Event::BufferChange {
+ path: Arc::from(Path::new("src/test.rs")),
+ old_path: Arc::from(Path::new("src/test.rs")),
+ diff: indoc! {"
+ @@ -1,2 +1,3 @@
+ line1
+ +first_add
+ line2
+ "}
+ .into(),
+ predicted: false,
+ in_open_source_repo: false,
+ }),
+ Arc::new(zeta_prompt::Event::BufferChange {
+ path: Arc::from(Path::new("src/test.rs")),
+ old_path: Arc::from(Path::new("src/test.rs")),
+ diff: indoc! {"
+ @@ -2,2 +2,3 @@
+ first_add
+ +second_add
+ line2
+ "}
+ .into(),
+ predicted: false,
+ in_open_source_repo: false,
+ }),
+ ],
+ excerpt_start_row: None,
+ related_files: None,
+ };
+
+ let predicted = indoc! {"
+ line1
+ line2
+ "};
+ let ratio =
+ compute_prediction_reversal_ratio(&prompt_inputs, predicted, Path::new("src/test.rs"));
+
+ assert!(
+ ratio > 0.9,
+ "Expected high reversal ratio when reversing multiple sequential edits, got {}",
+ ratio
+ );
+ }
}