diff --git a/crates/edit_prediction/src/udiff.rs b/crates/edit_prediction/src/udiff.rs index e1e475c85526befb5549571cf7b7a2e1ef10c3d8..14be1991d34e985067f5ad8729fd7ac8485211db 100644 --- a/crates/edit_prediction/src/udiff.rs +++ b/crates/edit_prediction/src/udiff.rs @@ -266,6 +266,66 @@ pub fn strip_diff_metadata(diff: &str) -> String { result } +/// Find all byte offsets where `hunk.context` occurs as a substring of `text`. +/// +/// If no exact matches are found and the context ends with `'\n'` but `text` +/// does not, retries without the trailing newline, accepting only a match at +/// the very end of `text`. When this fallback fires, the hunk's context is +/// trimmed and its edit ranges are clamped so that downstream code doesn't +/// index past the end of the matched region. This handles diffs that are +/// missing a `\ No newline at end of file` marker: the parser always appends +/// `'\n'` via `writeln!`, so the context can have a trailing newline that +/// doesn't exist in the source text. +fn find_context_candidates(text: &str, hunk: &mut Hunk) -> Vec { + let candidates: Vec = text + .match_indices(&hunk.context) + .map(|(offset, _)| offset) + .collect(); + + if !candidates.is_empty() { + return candidates; + } + + if hunk.context.ends_with('\n') && !hunk.context.is_empty() { + let old_len = hunk.context.len(); + hunk.context.pop(); + let new_len = hunk.context.len(); + + if !hunk.context.is_empty() { + let candidates: Vec = text + .match_indices(&hunk.context) + .filter(|(offset, _)| offset + new_len == text.len()) + .map(|(offset, _)| offset) + .collect(); + + if !candidates.is_empty() { + for edit in &mut hunk.edits { + let touched_phantom = edit.range.end > new_len; + edit.range.start = edit.range.start.min(new_len); + edit.range.end = edit.range.end.min(new_len); + if touched_phantom { + // The replacement text was also written with a + // trailing '\n' that corresponds to the phantom + // newline we just removed from the context. + if edit.text.ends_with('\n') { + edit.text.pop(); + } + } + } + return candidates; + } + + // Restore if fallback didn't help either. + hunk.context.push('\n'); + debug_assert_eq!(hunk.context.len(), old_len); + } else { + hunk.context.push('\n'); + } + } + + Vec::new() +} + /// Given multiple candidate offsets where context matches, use line numbers to disambiguate. /// Returns the offset that matches the expected line, or None if no match or no line number available. fn disambiguate_by_line_number( @@ -305,15 +365,11 @@ pub fn apply_diff_to_string_with_hunk_offset( while let Some(event) = diff.next().context("Failed to parse diff")? { match event { DiffEvent::Hunk { - hunk, + mut hunk, path: _, status: _, } => { - // Find all matches of the context in the text - let candidates: Vec = text - .match_indices(&hunk.context) - .map(|(offset, _)| offset) - .collect(); + let candidates = find_context_candidates(&text, &mut hunk); let hunk_offset = disambiguate_by_line_number(&candidates, hunk.start_line, &|offset| { @@ -348,7 +404,7 @@ pub fn edits_for_diff(content: &str, diff_str: &str) -> Result while let Some(event) = diff.next()? { match event { DiffEvent::Hunk { - hunk, + mut hunk, path: _, status: _, } => { @@ -356,11 +412,7 @@ pub fn edits_for_diff(content: &str, diff_str: &str) -> Result return Ok(Vec::new()); } - // Find all matches of the context in the content - let candidates: Vec = content - .match_indices(&hunk.context) - .map(|(offset, _)| offset) - .collect(); + let candidates = find_context_candidates(content, &mut hunk); let Some(context_offset) = disambiguate_by_line_number(&candidates, hunk.start_line, &|offset| { @@ -611,7 +663,7 @@ impl<'a> DiffParser<'a> { } fn resolve_hunk_edits_in_buffer( - hunk: Hunk, + mut hunk: Hunk, buffer: &TextBufferSnapshot, ranges: &[Range], status: FileStatus, @@ -623,7 +675,7 @@ fn resolve_hunk_edits_in_buffer( for range in ranges { let range = range.to_offset(buffer); let text = buffer.text_for_range(range.clone()).collect::(); - for (ix, _) in text.match_indices(&hunk.context) { + for ix in find_context_candidates(&text, &mut hunk) { candidates.push(range.start + ix); } } @@ -1513,4 +1565,185 @@ mod tests { "#} ); } + + #[test] + fn test_apply_diff_to_string_no_trailing_newline() { + // Text without trailing newline; diff generated without + // `\ No newline at end of file` marker. + let text = "line1\nline2\nline3"; + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -1,3 +1,3 @@ + line1 + -line2 + +replaced + line3 + "}; + + let result = apply_diff_to_string(diff, text).unwrap(); + assert_eq!(result, "line1\nreplaced\nline3"); + } + + #[test] + fn test_apply_diff_to_string_trailing_newline_present() { + // When text has a trailing newline, exact matching still works and + // the fallback is never needed. + let text = "line1\nline2\nline3\n"; + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -1,3 +1,3 @@ + line1 + -line2 + +replaced + line3 + "}; + + let result = apply_diff_to_string(diff, text).unwrap(); + assert_eq!(result, "line1\nreplaced\nline3\n"); + } + + #[test] + fn test_apply_diff_to_string_deletion_at_end_no_trailing_newline() { + // Deletion of the last line when text has no trailing newline. + // The edit range must be clamped so it doesn't index past the + // end of the text. + let text = "line1\nline2\nline3"; + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -1,3 +1,2 @@ + line1 + line2 + -line3 + "}; + + let result = apply_diff_to_string(diff, text).unwrap(); + assert_eq!(result, "line1\nline2\n"); + } + + #[test] + fn test_apply_diff_to_string_replace_last_line_no_trailing_newline() { + // Replace the last line when text has no trailing newline. + let text = "aaa\nbbb\nccc"; + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -1,3 +1,3 @@ + aaa + bbb + -ccc + +ddd + "}; + + let result = apply_diff_to_string(diff, text).unwrap(); + assert_eq!(result, "aaa\nbbb\nddd"); + } + + #[test] + fn test_apply_diff_to_string_multibyte_no_trailing_newline() { + // Multi-byte UTF-8 characters near the end; ensures char boundary + // safety when the fallback clamps edit ranges. + let text = "hello\n세계"; + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -1,2 +1,2 @@ + hello + -세계 + +world + "}; + + let result = apply_diff_to_string(diff, text).unwrap(); + assert_eq!(result, "hello\nworld"); + } + + #[test] + fn test_find_context_candidates_no_false_positive_mid_text() { + // The stripped fallback must only match at the end of text, not in + // the middle where a real newline exists. + let text = "aaa\nbbb\nccc\n"; + let mut hunk = Hunk { + context: "bbb\n".into(), + edits: vec![], + start_line: None, + }; + + let candidates = find_context_candidates(text, &mut hunk); + // Exact match at offset 4 — the fallback is not used. + assert_eq!(candidates, vec![4]); + } + + #[test] + fn test_find_context_candidates_fallback_at_end() { + let text = "aaa\nbbb"; + let mut hunk = Hunk { + context: "bbb\n".into(), + edits: vec![], + start_line: None, + }; + + let candidates = find_context_candidates(text, &mut hunk); + assert_eq!(candidates, vec![4]); + // Context should be stripped. + assert_eq!(hunk.context, "bbb"); + } + + #[test] + fn test_find_context_candidates_no_fallback_mid_text() { + // "bbb" appears mid-text followed by a newline, so the exact + // match succeeds. Verify the stripped fallback doesn't produce a + // second, spurious candidate. + let text = "aaa\nbbb\nccc"; + let mut hunk = Hunk { + context: "bbb\nccc\n".into(), + edits: vec![], + start_line: None, + }; + + let candidates = find_context_candidates(text, &mut hunk); + // No exact match (text ends without newline after "ccc"), but the + // stripped context "bbb\nccc" matches at offset 4, which is the end. + assert_eq!(candidates, vec![4]); + assert_eq!(hunk.context, "bbb\nccc"); + } + + #[test] + fn test_find_context_candidates_clamps_edit_ranges() { + let text = "aaa\nbbb"; + let mut hunk = Hunk { + context: "aaa\nbbb\n".into(), + edits: vec![Edit { + range: 4..8, // "bbb\n" — end points at the trailing \n + text: "ccc\n".into(), + }], + start_line: None, + }; + + let candidates = find_context_candidates(text, &mut hunk); + assert_eq!(candidates, vec![0]); + // Edit range end should be clamped to 7 (new context length). + assert_eq!(hunk.edits[0].range, 4..7); + } + + #[test] + fn test_edits_for_diff_no_trailing_newline() { + let content = "foo\nbar\nbaz"; + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -1,3 +1,3 @@ + foo + -bar + +qux + baz + "}; + + let result = edits_for_diff(content, diff).unwrap(); + assert_eq!(result.len(), 1); + let (range, text) = &result[0]; + assert_eq!(&content[range.clone()], "bar"); + assert_eq!(text, "qux"); + } }