From f119b978f5428f4dbaa73cad4c1c40735217e3a1 Mon Sep 17 00:00:00 2001 From: Agus Zubiaga Date: Wed, 7 Jan 2026 12:14:17 -0300 Subject: [PATCH] ep cli: Make the udiff parsing more resilient (#46264) We will now properly handle `\ No newline at the end of file` after context lines and repeated occurrences. Also, when line numbers are present in hunk headers, we will use them to disambiguate the location when the context is not unique enough. Release Notes: - N/A --- crates/edit_prediction/src/udiff.rs | 296 ++++++++++++++++++++++++---- 1 file changed, 258 insertions(+), 38 deletions(-) diff --git a/crates/edit_prediction/src/udiff.rs b/crates/edit_prediction/src/udiff.rs index c4a9505de5d0fab1a361b8479beb637a0d22f5b9..ca810f8b731a49a20a3436eab95e87bcbacb0323 100644 --- a/crates/edit_prediction/src/udiff.rs +++ b/crates/edit_prediction/src/udiff.rs @@ -230,6 +230,26 @@ pub fn strip_diff_metadata(diff: &str) -> String { result } +/// Given multiple candidate offsets where context matches, use line numbers to disambiguate. +/// Returns the offset that matches the expected line, or None if no match or no line number available. +fn disambiguate_by_line_number( + candidates: &[usize], + expected_line: Option, + offset_to_line: impl Fn(usize) -> u32, +) -> Option { + match candidates.len() { + 0 => None, + 1 => Some(candidates[0]), + _ => { + let expected = expected_line?; + candidates + .iter() + .copied() + .find(|&offset| offset_to_line(offset) == expected) + } + } +} + pub fn apply_diff_to_string(diff_str: &str, text: &str) -> Result { let mut diff = DiffParser::new(diff_str); @@ -242,9 +262,18 @@ pub fn apply_diff_to_string(diff_str: &str, text: &str) -> Result { path: _, is_new_file: _, } => { - let hunk_offset = text - .find(&hunk.context) + // Find all matches of the context in the text + let candidates: Vec = text + .match_indices(&hunk.context) + .map(|(offset, _)| offset) + .collect(); + + let hunk_offset = + disambiguate_by_line_number(&candidates, hunk.start_line, |offset| { + text[..offset].matches('\n').count() as u32 + }) .ok_or_else(|| anyhow!("couldn't resolve hunk {:?}", hunk.context))?; + for edit in hunk.edits.iter().rev() { let range = (hunk_offset + edit.range.start)..(hunk_offset + edit.range.end); text.replace_range(range, &edit.text); @@ -276,17 +305,20 @@ pub fn edits_for_diff(content: &str, diff_str: &str) -> Result return Ok(Vec::new()); } - // Find the context in the content - let first_match = content.find(&hunk.context); - let Some(context_offset) = first_match else { + // Find all matches of the context in the content + let candidates: Vec = content + .match_indices(&hunk.context) + .map(|(offset, _)| offset) + .collect(); + + let Some(context_offset) = + disambiguate_by_line_number(&candidates, hunk.start_line, |offset| { + content[..offset].matches('\n').count() as u32 + }) + else { return Ok(Vec::new()); }; - // Check for ambiguity - if context appears more than once, reject - if content[context_offset + 1..].contains(&hunk.context) { - return Ok(Vec::new()); - } - // Use sub-line diffing to find precise edit positions for edit in &hunk.edits { let old_text = &content @@ -316,6 +348,18 @@ struct DiffParser<'a> { current_line: Option<(&'a str, DiffLine<'a>)>, hunk: Hunk, diff: std::str::Lines<'a>, + pending_start_line: Option, + processed_no_newline: bool, + last_diff_op: LastDiffOp, +} + +#[derive(Clone, Copy, Default)] +enum LastDiffOp { + #[default] + None, + Context, + Deletion, + Addition, } #[derive(Debug, PartialEq)] @@ -334,6 +378,7 @@ enum DiffEvent<'a> { struct Hunk { context: String, edits: Vec, + start_line: Option, } impl Hunk { @@ -357,6 +402,9 @@ impl<'a> DiffParser<'a> { hunk: Hunk::default(), current_line, diff, + pending_start_line: None, + processed_no_newline: false, + last_diff_op: LastDiffOp::None, } } @@ -378,9 +426,13 @@ impl<'a> DiffParser<'a> { } else { file.old_path.clone() }; + let mut hunk = mem::take(&mut self.hunk); + hunk.start_line = self.pending_start_line.take(); + self.processed_no_newline = false; + self.last_diff_op = LastDiffOp::None; return Ok(Some(DiffEvent::Hunk { path, - hunk: mem::take(&mut self.hunk), + hunk, is_new_file, })); } @@ -415,10 +467,15 @@ impl<'a> DiffParser<'a> { current_file.new_path = path } } - DiffLine::HunkHeader(_) => {} + DiffLine::HunkHeader(location) => { + if let Some(loc) = location { + self.pending_start_line = Some(loc.start_line_old); + } + } DiffLine::Context(ctx) => { if self.current_file.is_some() { writeln!(&mut self.hunk.context, "{ctx}")?; + self.last_diff_op = LastDiffOp::Context; } } DiffLine::Deletion(del) => { @@ -436,6 +493,7 @@ impl<'a> DiffParser<'a> { }); } writeln!(&mut self.hunk.context, "{del}")?; + self.last_diff_op = LastDiffOp::Deletion; } } DiffLine::Addition(add) => { @@ -451,23 +509,31 @@ impl<'a> DiffParser<'a> { text: format!("{add}\n"), }); } + self.last_diff_op = LastDiffOp::Addition; } } DiffLine::NoNewlineAtEOF => { - if let Some(last_edit) = self.hunk.edits.last_mut() { - if last_edit.text.ends_with('\n') { - // Previous line was an addition (has trailing newline in text) - last_edit.text.pop(); - } else if !last_edit.range.is_empty() - && last_edit.range.end == self.hunk.context.len() - { - // Previous line was a deletion (non-empty range at end of context) - self.hunk.context.pop(); - last_edit.range.end -= 1; + if !self.processed_no_newline { + self.processed_no_newline = true; + match self.last_diff_op { + LastDiffOp::Addition => { + // Remove trailing newline from the last addition + if let Some(last_edit) = self.hunk.edits.last_mut() { + last_edit.text.pop(); + } + } + LastDiffOp::Deletion => { + // Remove trailing newline from context (which includes the deletion) + self.hunk.context.pop(); + if let Some(last_edit) = self.hunk.edits.last_mut() { + last_edit.range.end -= 1; + } + } + LastDiffOp::Context | LastDiffOp::None => { + // Remove trailing newline from context + self.hunk.context.pop(); + } } - } else { - // Previous line was context (no edits) - self.hunk.context.pop(); } } DiffLine::Garbage(_) => {} @@ -491,27 +557,32 @@ fn resolve_hunk_edits_in_buffer( is_new_file: bool, ) -> Result, Arc)>, anyhow::Error> { let context_offset = if is_new_file || hunk.context.is_empty() { - Ok(0) + 0 } else { - let mut offset = None; + let mut candidates: Vec = Vec::new(); for range in ranges { let range = range.to_offset(buffer); let text = buffer.text_for_range(range.clone()).collect::(); for (ix, _) in text.match_indices(&hunk.context) { - if offset.is_some() { - anyhow::bail!("Context is not unique enough:\n{}", hunk.context); - } - offset = Some(range.start + ix); + candidates.push(range.start + ix); } } - offset.ok_or_else(|| { - anyhow!( - "Failed to match context:\n\n```\n{}```\n\nBuffer contents:\n\n```\n{}```", - hunk.context, - buffer.text() - ) + + disambiguate_by_line_number(&candidates, hunk.start_line, |offset| { + buffer.offset_to_point(offset).row }) - }?; + .ok_or_else(|| { + if candidates.is_empty() { + anyhow!( + "Failed to match context:\n\n```\n{}```\n\nBuffer contents:\n\n```\n{}```", + hunk.context, + buffer.text() + ) + } else { + anyhow!("Context is not unique enough:\n{}", hunk.context) + } + })? + }; let iter = hunk.edits.into_iter().flat_map(move |edit| { let old_text = buffer .text_for_range(context_offset + edit.range.start..context_offset + edit.range.end) @@ -878,6 +949,7 @@ mod tests { range: 4..4, text: "AND\n".into() }], + start_line: None, }, is_new_file: false, }, @@ -928,6 +1000,7 @@ mod tests { range: 80..203, text: "".into() }], + start_line: Some(54), // @@ -55,7 -> line 54 (0-indexed) }, is_new_file: false, }, @@ -965,6 +1038,91 @@ mod tests { range: 8..16, text: "added line".into() }], + start_line: Some(0), // @@ -1,2 -> line 0 (0-indexed) + }, + is_new_file: false, + }, + DiffEvent::FileEnd { renamed_to: None } + ], + ); + } + + #[test] + fn test_double_no_newline_at_eof() { + // Two consecutive "no newline" markers - the second should be ignored + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -1,3 +1,3 @@ + line1 + -old + +new + line3 + \\ No newline at end of file + \\ No newline at end of file + "}; + + let mut events = Vec::new(); + let mut parser = DiffParser::new(diff); + while let Some(event) = parser.next().unwrap() { + events.push(event); + } + + assert_eq!( + events, + &[ + DiffEvent::Hunk { + path: "file.txt".into(), + hunk: Hunk { + context: "line1\nold\nline3".into(), // Only one newline removed + edits: vec![Edit { + range: 6..10, // "old\n" is 4 bytes + text: "new\n".into() + }], + start_line: Some(0), + }, + is_new_file: false, + }, + DiffEvent::FileEnd { renamed_to: None } + ], + ); + } + + #[test] + fn test_no_newline_after_context_not_addition() { + // "No newline" after context lines should remove newline from context, + // not from an earlier addition + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -1,4 +1,4 @@ + line1 + -old + +new + line3 + line4 + \\ No newline at end of file + "}; + + let mut events = Vec::new(); + let mut parser = DiffParser::new(diff); + while let Some(event) = parser.next().unwrap() { + events.push(event); + } + + assert_eq!( + events, + &[ + DiffEvent::Hunk { + path: "file.txt".into(), + hunk: Hunk { + // newline removed from line4 (context), not from "new" (addition) + context: "line1\nold\nline3\nline4".into(), + edits: vec![Edit { + range: 6..10, // "old\n" is 4 bytes + text: "new\n".into() // Still has newline + }], + start_line: Some(0), }, is_new_file: false, }, @@ -973,6 +1131,68 @@ mod tests { ); } + #[test] + fn test_line_number_disambiguation() { + // Test that line numbers from hunk headers are used to disambiguate + // when context before the operation appears multiple times + let content = indoc! {" + repeated line + first unique + repeated line + second unique + "}; + + // Context "repeated line" appears twice - line number selects first occurrence + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -1,2 +1,2 @@ + repeated line + -first unique + +REPLACED + "}; + + let result = edits_for_diff(content, diff).unwrap(); + assert_eq!(result.len(), 1); + + // The edit should replace "first unique" (after first "repeated line\n" at offset 14) + let (range, text) = &result[0]; + assert_eq!(range.start, 14); + assert_eq!(range.end, 26); // "first unique" is 12 bytes + assert_eq!(text, "REPLACED"); + } + + #[test] + fn test_line_number_disambiguation_second_match() { + // Test disambiguation when the edit should apply to a later occurrence + let content = indoc! {" + repeated line + first unique + repeated line + second unique + "}; + + // Context "repeated line" appears twice - line number selects second occurrence + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -3,2 +3,2 @@ + repeated line + -second unique + +REPLACED + "}; + + let result = edits_for_diff(content, diff).unwrap(); + assert_eq!(result.len(), 1); + + // The edit should replace "second unique" (after second "repeated line\n") + // Offset: "repeated line\n" (14) + "first unique\n" (13) + "repeated line\n" (14) = 41 + let (range, text) = &result[0]; + assert_eq!(range.start, 41); + assert_eq!(range.end, 54); // "second unique" is 13 bytes + assert_eq!(text, "REPLACED"); + } + #[gpui::test] async fn test_apply_diff_successful(cx: &mut TestAppContext) { let fs = init_test(cx);