diff --git a/crates/edit_prediction/src/udiff.rs b/crates/edit_prediction/src/udiff.rs index c4a9505de5d0fab1a361b8479beb637a0d22f5b9..ca810f8b731a49a20a3436eab95e87bcbacb0323 100644 --- a/crates/edit_prediction/src/udiff.rs +++ b/crates/edit_prediction/src/udiff.rs @@ -230,6 +230,26 @@ pub fn strip_diff_metadata(diff: &str) -> String { result } +/// Given multiple candidate offsets where context matches, use line numbers to disambiguate. +/// Returns the offset that matches the expected line, or None if no match or no line number available. +fn disambiguate_by_line_number( + candidates: &[usize], + expected_line: Option, + offset_to_line: impl Fn(usize) -> u32, +) -> Option { + match candidates.len() { + 0 => None, + 1 => Some(candidates[0]), + _ => { + let expected = expected_line?; + candidates + .iter() + .copied() + .find(|&offset| offset_to_line(offset) == expected) + } + } +} + pub fn apply_diff_to_string(diff_str: &str, text: &str) -> Result { let mut diff = DiffParser::new(diff_str); @@ -242,9 +262,18 @@ pub fn apply_diff_to_string(diff_str: &str, text: &str) -> Result { path: _, is_new_file: _, } => { - let hunk_offset = text - .find(&hunk.context) + // Find all matches of the context in the text + let candidates: Vec = text + .match_indices(&hunk.context) + .map(|(offset, _)| offset) + .collect(); + + let hunk_offset = + disambiguate_by_line_number(&candidates, hunk.start_line, |offset| { + text[..offset].matches('\n').count() as u32 + }) .ok_or_else(|| anyhow!("couldn't resolve hunk {:?}", hunk.context))?; + for edit in hunk.edits.iter().rev() { let range = (hunk_offset + edit.range.start)..(hunk_offset + edit.range.end); text.replace_range(range, &edit.text); @@ -276,17 +305,20 @@ pub fn edits_for_diff(content: &str, diff_str: &str) -> Result return Ok(Vec::new()); } - // Find the context in the content - let first_match = content.find(&hunk.context); - let Some(context_offset) = first_match else { + // Find all matches of the context in the content + let candidates: Vec = content + .match_indices(&hunk.context) + .map(|(offset, _)| offset) + .collect(); + + let Some(context_offset) = + disambiguate_by_line_number(&candidates, hunk.start_line, |offset| { + content[..offset].matches('\n').count() as u32 + }) + else { return Ok(Vec::new()); }; - // Check for ambiguity - if context appears more than once, reject - if content[context_offset + 1..].contains(&hunk.context) { - return Ok(Vec::new()); - } - // Use sub-line diffing to find precise edit positions for edit in &hunk.edits { let old_text = &content @@ -316,6 +348,18 @@ struct DiffParser<'a> { current_line: Option<(&'a str, DiffLine<'a>)>, hunk: Hunk, diff: std::str::Lines<'a>, + pending_start_line: Option, + processed_no_newline: bool, + last_diff_op: LastDiffOp, +} + +#[derive(Clone, Copy, Default)] +enum LastDiffOp { + #[default] + None, + Context, + Deletion, + Addition, } #[derive(Debug, PartialEq)] @@ -334,6 +378,7 @@ enum DiffEvent<'a> { struct Hunk { context: String, edits: Vec, + start_line: Option, } impl Hunk { @@ -357,6 +402,9 @@ impl<'a> DiffParser<'a> { hunk: Hunk::default(), current_line, diff, + pending_start_line: None, + processed_no_newline: false, + last_diff_op: LastDiffOp::None, } } @@ -378,9 +426,13 @@ impl<'a> DiffParser<'a> { } else { file.old_path.clone() }; + let mut hunk = mem::take(&mut self.hunk); + hunk.start_line = self.pending_start_line.take(); + self.processed_no_newline = false; + self.last_diff_op = LastDiffOp::None; return Ok(Some(DiffEvent::Hunk { path, - hunk: mem::take(&mut self.hunk), + hunk, is_new_file, })); } @@ -415,10 +467,15 @@ impl<'a> DiffParser<'a> { current_file.new_path = path } } - DiffLine::HunkHeader(_) => {} + DiffLine::HunkHeader(location) => { + if let Some(loc) = location { + self.pending_start_line = Some(loc.start_line_old); + } + } DiffLine::Context(ctx) => { if self.current_file.is_some() { writeln!(&mut self.hunk.context, "{ctx}")?; + self.last_diff_op = LastDiffOp::Context; } } DiffLine::Deletion(del) => { @@ -436,6 +493,7 @@ impl<'a> DiffParser<'a> { }); } writeln!(&mut self.hunk.context, "{del}")?; + self.last_diff_op = LastDiffOp::Deletion; } } DiffLine::Addition(add) => { @@ -451,23 +509,31 @@ impl<'a> DiffParser<'a> { text: format!("{add}\n"), }); } + self.last_diff_op = LastDiffOp::Addition; } } DiffLine::NoNewlineAtEOF => { - if let Some(last_edit) = self.hunk.edits.last_mut() { - if last_edit.text.ends_with('\n') { - // Previous line was an addition (has trailing newline in text) - last_edit.text.pop(); - } else if !last_edit.range.is_empty() - && last_edit.range.end == self.hunk.context.len() - { - // Previous line was a deletion (non-empty range at end of context) - self.hunk.context.pop(); - last_edit.range.end -= 1; + if !self.processed_no_newline { + self.processed_no_newline = true; + match self.last_diff_op { + LastDiffOp::Addition => { + // Remove trailing newline from the last addition + if let Some(last_edit) = self.hunk.edits.last_mut() { + last_edit.text.pop(); + } + } + LastDiffOp::Deletion => { + // Remove trailing newline from context (which includes the deletion) + self.hunk.context.pop(); + if let Some(last_edit) = self.hunk.edits.last_mut() { + last_edit.range.end -= 1; + } + } + LastDiffOp::Context | LastDiffOp::None => { + // Remove trailing newline from context + self.hunk.context.pop(); + } } - } else { - // Previous line was context (no edits) - self.hunk.context.pop(); } } DiffLine::Garbage(_) => {} @@ -491,27 +557,32 @@ fn resolve_hunk_edits_in_buffer( is_new_file: bool, ) -> Result, Arc)>, anyhow::Error> { let context_offset = if is_new_file || hunk.context.is_empty() { - Ok(0) + 0 } else { - let mut offset = None; + let mut candidates: Vec = Vec::new(); for range in ranges { let range = range.to_offset(buffer); let text = buffer.text_for_range(range.clone()).collect::(); for (ix, _) in text.match_indices(&hunk.context) { - if offset.is_some() { - anyhow::bail!("Context is not unique enough:\n{}", hunk.context); - } - offset = Some(range.start + ix); + candidates.push(range.start + ix); } } - offset.ok_or_else(|| { - anyhow!( - "Failed to match context:\n\n```\n{}```\n\nBuffer contents:\n\n```\n{}```", - hunk.context, - buffer.text() - ) + + disambiguate_by_line_number(&candidates, hunk.start_line, |offset| { + buffer.offset_to_point(offset).row }) - }?; + .ok_or_else(|| { + if candidates.is_empty() { + anyhow!( + "Failed to match context:\n\n```\n{}```\n\nBuffer contents:\n\n```\n{}```", + hunk.context, + buffer.text() + ) + } else { + anyhow!("Context is not unique enough:\n{}", hunk.context) + } + })? + }; let iter = hunk.edits.into_iter().flat_map(move |edit| { let old_text = buffer .text_for_range(context_offset + edit.range.start..context_offset + edit.range.end) @@ -878,6 +949,7 @@ mod tests { range: 4..4, text: "AND\n".into() }], + start_line: None, }, is_new_file: false, }, @@ -928,6 +1000,7 @@ mod tests { range: 80..203, text: "".into() }], + start_line: Some(54), // @@ -55,7 -> line 54 (0-indexed) }, is_new_file: false, }, @@ -965,6 +1038,91 @@ mod tests { range: 8..16, text: "added line".into() }], + start_line: Some(0), // @@ -1,2 -> line 0 (0-indexed) + }, + is_new_file: false, + }, + DiffEvent::FileEnd { renamed_to: None } + ], + ); + } + + #[test] + fn test_double_no_newline_at_eof() { + // Two consecutive "no newline" markers - the second should be ignored + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -1,3 +1,3 @@ + line1 + -old + +new + line3 + \\ No newline at end of file + \\ No newline at end of file + "}; + + let mut events = Vec::new(); + let mut parser = DiffParser::new(diff); + while let Some(event) = parser.next().unwrap() { + events.push(event); + } + + assert_eq!( + events, + &[ + DiffEvent::Hunk { + path: "file.txt".into(), + hunk: Hunk { + context: "line1\nold\nline3".into(), // Only one newline removed + edits: vec![Edit { + range: 6..10, // "old\n" is 4 bytes + text: "new\n".into() + }], + start_line: Some(0), + }, + is_new_file: false, + }, + DiffEvent::FileEnd { renamed_to: None } + ], + ); + } + + #[test] + fn test_no_newline_after_context_not_addition() { + // "No newline" after context lines should remove newline from context, + // not from an earlier addition + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -1,4 +1,4 @@ + line1 + -old + +new + line3 + line4 + \\ No newline at end of file + "}; + + let mut events = Vec::new(); + let mut parser = DiffParser::new(diff); + while let Some(event) = parser.next().unwrap() { + events.push(event); + } + + assert_eq!( + events, + &[ + DiffEvent::Hunk { + path: "file.txt".into(), + hunk: Hunk { + // newline removed from line4 (context), not from "new" (addition) + context: "line1\nold\nline3\nline4".into(), + edits: vec![Edit { + range: 6..10, // "old\n" is 4 bytes + text: "new\n".into() // Still has newline + }], + start_line: Some(0), }, is_new_file: false, }, @@ -973,6 +1131,68 @@ mod tests { ); } + #[test] + fn test_line_number_disambiguation() { + // Test that line numbers from hunk headers are used to disambiguate + // when context before the operation appears multiple times + let content = indoc! {" + repeated line + first unique + repeated line + second unique + "}; + + // Context "repeated line" appears twice - line number selects first occurrence + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -1,2 +1,2 @@ + repeated line + -first unique + +REPLACED + "}; + + let result = edits_for_diff(content, diff).unwrap(); + assert_eq!(result.len(), 1); + + // The edit should replace "first unique" (after first "repeated line\n" at offset 14) + let (range, text) = &result[0]; + assert_eq!(range.start, 14); + assert_eq!(range.end, 26); // "first unique" is 12 bytes + assert_eq!(text, "REPLACED"); + } + + #[test] + fn test_line_number_disambiguation_second_match() { + // Test disambiguation when the edit should apply to a later occurrence + let content = indoc! {" + repeated line + first unique + repeated line + second unique + "}; + + // Context "repeated line" appears twice - line number selects second occurrence + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -3,2 +3,2 @@ + repeated line + -second unique + +REPLACED + "}; + + let result = edits_for_diff(content, diff).unwrap(); + assert_eq!(result.len(), 1); + + // The edit should replace "second unique" (after second "repeated line\n") + // Offset: "repeated line\n" (14) + "first unique\n" (13) + "repeated line\n" (14) = 41 + let (range, text) = &result[0]; + assert_eq!(range.start, 41); + assert_eq!(range.end, 54); // "second unique" is 13 bytes + assert_eq!(text, "REPLACED"); + } + #[gpui::test] async fn test_apply_diff_successful(cx: &mut TestAppContext) { let fs = init_test(cx);