ep: Heuristic to apply patches to files without trailing newline (#50291)

Oleksiy Syvokon created

This is a workaround for the fact that ep patches don't have the '\No
newline at end of file` marker.


Release Notes:

- N/A

Change summary

crates/edit_prediction/src/udiff.rs | 261 +++++++++++++++++++++++++++++-
1 file changed, 247 insertions(+), 14 deletions(-)

Detailed changes

crates/edit_prediction/src/udiff.rs πŸ”—

@@ -266,6 +266,66 @@ pub fn strip_diff_metadata(diff: &str) -> String {
     result
 }
 
+/// Find all byte offsets where `hunk.context` occurs as a substring of `text`.
+///
+/// If no exact matches are found and the context ends with `'\n'` but `text`
+/// does not, retries without the trailing newline, accepting only a match at
+/// the very end of `text`. When this fallback fires, the hunk's context is
+/// trimmed and its edit ranges are clamped so that downstream code doesn't
+/// index past the end of the matched region. This handles diffs that are
+/// missing a `\ No newline at end of file` marker: the parser always appends
+/// `'\n'` via `writeln!`, so the context can have a trailing newline that
+/// doesn't exist in the source text.
+fn find_context_candidates(text: &str, hunk: &mut Hunk) -> Vec<usize> {
+    let candidates: Vec<usize> = text
+        .match_indices(&hunk.context)
+        .map(|(offset, _)| offset)
+        .collect();
+
+    if !candidates.is_empty() {
+        return candidates;
+    }
+
+    if hunk.context.ends_with('\n') && !hunk.context.is_empty() {
+        let old_len = hunk.context.len();
+        hunk.context.pop();
+        let new_len = hunk.context.len();
+
+        if !hunk.context.is_empty() {
+            let candidates: Vec<usize> = text
+                .match_indices(&hunk.context)
+                .filter(|(offset, _)| offset + new_len == text.len())
+                .map(|(offset, _)| offset)
+                .collect();
+
+            if !candidates.is_empty() {
+                for edit in &mut hunk.edits {
+                    let touched_phantom = edit.range.end > new_len;
+                    edit.range.start = edit.range.start.min(new_len);
+                    edit.range.end = edit.range.end.min(new_len);
+                    if touched_phantom {
+                        // The replacement text was also written with a
+                        // trailing '\n' that corresponds to the phantom
+                        // newline we just removed from the context.
+                        if edit.text.ends_with('\n') {
+                            edit.text.pop();
+                        }
+                    }
+                }
+                return candidates;
+            }
+
+            // Restore if fallback didn't help either.
+            hunk.context.push('\n');
+            debug_assert_eq!(hunk.context.len(), old_len);
+        } else {
+            hunk.context.push('\n');
+        }
+    }
+
+    Vec::new()
+}
+
 /// Given multiple candidate offsets where context matches, use line numbers to disambiguate.
 /// Returns the offset that matches the expected line, or None if no match or no line number available.
 fn disambiguate_by_line_number(
@@ -305,15 +365,11 @@ pub fn apply_diff_to_string_with_hunk_offset(
     while let Some(event) = diff.next().context("Failed to parse diff")? {
         match event {
             DiffEvent::Hunk {
-                hunk,
+                mut hunk,
                 path: _,
                 status: _,
             } => {
-                // Find all matches of the context in the text
-                let candidates: Vec<usize> = text
-                    .match_indices(&hunk.context)
-                    .map(|(offset, _)| offset)
-                    .collect();
+                let candidates = find_context_candidates(&text, &mut hunk);
 
                 let hunk_offset =
                     disambiguate_by_line_number(&candidates, hunk.start_line, &|offset| {
@@ -348,7 +404,7 @@ pub fn edits_for_diff(content: &str, diff_str: &str) -> Result<Vec<(Range<usize>
     while let Some(event) = diff.next()? {
         match event {
             DiffEvent::Hunk {
-                hunk,
+                mut hunk,
                 path: _,
                 status: _,
             } => {
@@ -356,11 +412,7 @@ pub fn edits_for_diff(content: &str, diff_str: &str) -> Result<Vec<(Range<usize>
                     return Ok(Vec::new());
                 }
 
-                // Find all matches of the context in the content
-                let candidates: Vec<usize> = content
-                    .match_indices(&hunk.context)
-                    .map(|(offset, _)| offset)
-                    .collect();
+                let candidates = find_context_candidates(content, &mut hunk);
 
                 let Some(context_offset) =
                     disambiguate_by_line_number(&candidates, hunk.start_line, &|offset| {
@@ -611,7 +663,7 @@ impl<'a> DiffParser<'a> {
 }
 
 fn resolve_hunk_edits_in_buffer(
-    hunk: Hunk,
+    mut hunk: Hunk,
     buffer: &TextBufferSnapshot,
     ranges: &[Range<Anchor>],
     status: FileStatus,
@@ -623,7 +675,7 @@ fn resolve_hunk_edits_in_buffer(
         for range in ranges {
             let range = range.to_offset(buffer);
             let text = buffer.text_for_range(range.clone()).collect::<String>();
-            for (ix, _) in text.match_indices(&hunk.context) {
+            for ix in find_context_candidates(&text, &mut hunk) {
                 candidates.push(range.start + ix);
             }
         }
@@ -1513,4 +1565,185 @@ mod tests {
             "#}
         );
     }
+
+    #[test]
+    fn test_apply_diff_to_string_no_trailing_newline() {
+        // Text without trailing newline; diff generated without
+        // `\ No newline at end of file` marker.
+        let text = "line1\nline2\nline3";
+        let diff = indoc! {"
+            --- a/file.txt
+            +++ b/file.txt
+            @@ -1,3 +1,3 @@
+             line1
+            -line2
+            +replaced
+             line3
+        "};
+
+        let result = apply_diff_to_string(diff, text).unwrap();
+        assert_eq!(result, "line1\nreplaced\nline3");
+    }
+
+    #[test]
+    fn test_apply_diff_to_string_trailing_newline_present() {
+        // When text has a trailing newline, exact matching still works and
+        // the fallback is never needed.
+        let text = "line1\nline2\nline3\n";
+        let diff = indoc! {"
+            --- a/file.txt
+            +++ b/file.txt
+            @@ -1,3 +1,3 @@
+             line1
+            -line2
+            +replaced
+             line3
+        "};
+
+        let result = apply_diff_to_string(diff, text).unwrap();
+        assert_eq!(result, "line1\nreplaced\nline3\n");
+    }
+
+    #[test]
+    fn test_apply_diff_to_string_deletion_at_end_no_trailing_newline() {
+        // Deletion of the last line when text has no trailing newline.
+        // The edit range must be clamped so it doesn't index past the
+        // end of the text.
+        let text = "line1\nline2\nline3";
+        let diff = indoc! {"
+            --- a/file.txt
+            +++ b/file.txt
+            @@ -1,3 +1,2 @@
+             line1
+             line2
+            -line3
+        "};
+
+        let result = apply_diff_to_string(diff, text).unwrap();
+        assert_eq!(result, "line1\nline2\n");
+    }
+
+    #[test]
+    fn test_apply_diff_to_string_replace_last_line_no_trailing_newline() {
+        // Replace the last line when text has no trailing newline.
+        let text = "aaa\nbbb\nccc";
+        let diff = indoc! {"
+            --- a/file.txt
+            +++ b/file.txt
+            @@ -1,3 +1,3 @@
+             aaa
+             bbb
+            -ccc
+            +ddd
+        "};
+
+        let result = apply_diff_to_string(diff, text).unwrap();
+        assert_eq!(result, "aaa\nbbb\nddd");
+    }
+
+    #[test]
+    fn test_apply_diff_to_string_multibyte_no_trailing_newline() {
+        // Multi-byte UTF-8 characters near the end; ensures char boundary
+        // safety when the fallback clamps edit ranges.
+        let text = "hello\n세계";
+        let diff = indoc! {"
+            --- a/file.txt
+            +++ b/file.txt
+            @@ -1,2 +1,2 @@
+             hello
+            -세계
+            +world
+        "};
+
+        let result = apply_diff_to_string(diff, text).unwrap();
+        assert_eq!(result, "hello\nworld");
+    }
+
+    #[test]
+    fn test_find_context_candidates_no_false_positive_mid_text() {
+        // The stripped fallback must only match at the end of text, not in
+        // the middle where a real newline exists.
+        let text = "aaa\nbbb\nccc\n";
+        let mut hunk = Hunk {
+            context: "bbb\n".into(),
+            edits: vec![],
+            start_line: None,
+        };
+
+        let candidates = find_context_candidates(text, &mut hunk);
+        // Exact match at offset 4 β€” the fallback is not used.
+        assert_eq!(candidates, vec![4]);
+    }
+
+    #[test]
+    fn test_find_context_candidates_fallback_at_end() {
+        let text = "aaa\nbbb";
+        let mut hunk = Hunk {
+            context: "bbb\n".into(),
+            edits: vec![],
+            start_line: None,
+        };
+
+        let candidates = find_context_candidates(text, &mut hunk);
+        assert_eq!(candidates, vec![4]);
+        // Context should be stripped.
+        assert_eq!(hunk.context, "bbb");
+    }
+
+    #[test]
+    fn test_find_context_candidates_no_fallback_mid_text() {
+        // "bbb" appears mid-text followed by a newline, so the exact
+        // match succeeds. Verify the stripped fallback doesn't produce a
+        // second, spurious candidate.
+        let text = "aaa\nbbb\nccc";
+        let mut hunk = Hunk {
+            context: "bbb\nccc\n".into(),
+            edits: vec![],
+            start_line: None,
+        };
+
+        let candidates = find_context_candidates(text, &mut hunk);
+        // No exact match (text ends without newline after "ccc"), but the
+        // stripped context "bbb\nccc" matches at offset 4, which is the end.
+        assert_eq!(candidates, vec![4]);
+        assert_eq!(hunk.context, "bbb\nccc");
+    }
+
+    #[test]
+    fn test_find_context_candidates_clamps_edit_ranges() {
+        let text = "aaa\nbbb";
+        let mut hunk = Hunk {
+            context: "aaa\nbbb\n".into(),
+            edits: vec![Edit {
+                range: 4..8, // "bbb\n" β€” end points at the trailing \n
+                text: "ccc\n".into(),
+            }],
+            start_line: None,
+        };
+
+        let candidates = find_context_candidates(text, &mut hunk);
+        assert_eq!(candidates, vec![0]);
+        // Edit range end should be clamped to 7 (new context length).
+        assert_eq!(hunk.edits[0].range, 4..7);
+    }
+
+    #[test]
+    fn test_edits_for_diff_no_trailing_newline() {
+        let content = "foo\nbar\nbaz";
+        let diff = indoc! {"
+            --- a/file.txt
+            +++ b/file.txt
+            @@ -1,3 +1,3 @@
+             foo
+            -bar
+            +qux
+             baz
+        "};
+
+        let result = edits_for_diff(content, diff).unwrap();
+        assert_eq!(result.len(), 1);
+        let (range, text) = &result[0];
+        assert_eq!(&content[range.clone()], "bar");
+        assert_eq!(text, "qux");
+    }
 }