diff --git a/crates/edit_prediction/src/udiff.rs b/crates/edit_prediction/src/udiff.rs index 89060bd5d6dd540716ac62f8b9d03329c99f1952..8bc6a36584e8679fe208ff995f9dbc3743cf3def 100644 --- a/crates/edit_prediction/src/udiff.rs +++ b/crates/edit_prediction/src/udiff.rs @@ -62,36 +62,26 @@ pub async fn apply_diff( hunk, is_new_file, } => { + let worktree_id = worktree.read_with(cx, |wt, _| wt.id())?; + let rel_path = RelPath::new(Path::new(path.as_ref()), PathStyle::Posix)?; + let project_path = project::ProjectPath { + worktree_id, + path: rel_path.into_arc(), + }; + let buffer = match current_file { None => { - let buffer = if is_new_file { + if is_new_file { // New file - create it first, then open the buffer - let worktree_id = worktree.read_with(cx, |wt, _| wt.id())?; - let rel_path = - RelPath::new(Path::new(path.as_ref()), PathStyle::Posix)?; - let project_path = project::ProjectPath { - worktree_id, - path: rel_path.into_arc(), - }; project .update(cx, |project, cx| { project.create_entry(project_path.clone(), false, cx) })? .await?; - project - .update(cx, |project, cx| project.open_buffer(project_path, cx))? - .await? - } else { - // Existing file - find and open it - let project_path = project - .update(cx, |project, cx| { - project.find_project_path(path.as_ref(), cx) - })? - .context("no such path")?; - project - .update(cx, |project, cx| project.open_buffer(project_path, cx))? - .await? - }; + } + let buffer = project + .update(cx, |project, cx| project.open_buffer(project_path, cx))? + .await?; included_files.insert(path.to_string(), buffer.clone()); current_file = Some(buffer); current_file.as_ref().unwrap() @@ -191,7 +181,8 @@ pub fn strip_diff_metadata(diff: &str) -> String { | DiffLine::HunkHeader(_) | DiffLine::Context(_) | DiffLine::Deletion(_) - | DiffLine::Addition(_) => { + | DiffLine::Addition(_) + | DiffLine::NoNewlineAtEOF => { result.push_str(line); result.push('\n'); } @@ -426,6 +417,23 @@ impl<'a> DiffParser<'a> { } } } + DiffLine::NoNewlineAtEOF => { + if let Some(last_edit) = self.hunk.edits.last_mut() { + if last_edit.text.ends_with('\n') { + // Previous line was an addition (has trailing newline in text) + last_edit.text.pop(); + } else if !last_edit.range.is_empty() + && last_edit.range.end == self.hunk.context.len() + { + // Previous line was a deletion (non-empty range at end of context) + self.hunk.context.pop(); + last_edit.range.end -= 1; + } + } else { + // Previous line was context (no edits) + self.hunk.context.pop(); + } + } DiffLine::Garbage(_) => {} } @@ -460,7 +468,13 @@ fn resolve_hunk_edits_in_buffer( offset = Some(range.start + ix); } } - offset.ok_or_else(|| anyhow!("Failed to match context:\n{}", hunk.context)) + offset.ok_or_else(|| { + anyhow!( + "Failed to match context:\n\n```\n{}```\n\nBuffer contents:\n\n```\n{}```", + hunk.context, + buffer.text() + ) + }) }?; let iter = hunk.edits.into_iter().flat_map(move |edit| { let old_text = buffer @@ -488,6 +502,7 @@ pub enum DiffLine<'a> { Context(&'a str), Deletion(&'a str), Addition(&'a str), + NoNewlineAtEOF, Garbage(&'a str), } @@ -505,6 +520,9 @@ impl<'a> DiffLine<'a> { } fn try_parse(line: &'a str) -> Option { + if line.starts_with("\\ No newline") { + return Some(Self::NoNewlineAtEOF); + } if let Some(header) = line.strip_prefix("---").and_then(eat_required_whitespace) { let path = parse_header_path("a/", header); Some(Self::OldPath { path }) @@ -561,6 +579,7 @@ impl<'a> Display for DiffLine<'a> { DiffLine::Context(content) => write!(f, " {content}"), DiffLine::Deletion(content) => write!(f, "-{content}"), DiffLine::Addition(content) => write!(f, "+{content}"), + DiffLine::NoNewlineAtEOF => write!(f, "\\ No newline at end of file"), DiffLine::Garbage(line) => write!(f, "{line}"), } } @@ -831,6 +850,93 @@ mod tests { ) } + #[test] + fn test_no_newline_at_eof() { + let diff = indoc! {" + --- a/file.py + +++ b/file.py + @@ -55,7 +55,3 @@ class CustomDataset(Dataset): + torch.set_rng_state(state) + mask = self.transform(mask) + + - if self.mode == 'Training': + - return (img, mask, name) + - else: + - return (img, mask, name) + \\ No newline at end of file + "}; + + let mut events = Vec::new(); + let mut parser = DiffParser::new(diff); + while let Some(event) = parser.next().unwrap() { + events.push(event); + } + + assert_eq!( + events, + &[ + DiffEvent::Hunk { + path: "file.py".into(), + hunk: Hunk { + context: concat!( + " torch.set_rng_state(state)\n", + " mask = self.transform(mask)\n", + "\n", + " if self.mode == 'Training':\n", + " return (img, mask, name)\n", + " else:\n", + " return (img, mask, name)", + ) + .into(), + edits: vec![Edit { + range: 80..203, + text: "".into() + }], + }, + is_new_file: false, + }, + DiffEvent::FileEnd { renamed_to: None } + ], + ); + } + + #[test] + fn test_no_newline_at_eof_addition() { + let diff = indoc! {" + --- a/file.txt + +++ b/file.txt + @@ -1,2 +1,3 @@ + context + -deleted + +added line + \\ No newline at end of file + "}; + + let mut events = Vec::new(); + let mut parser = DiffParser::new(diff); + while let Some(event) = parser.next().unwrap() { + events.push(event); + } + + assert_eq!( + events, + &[ + DiffEvent::Hunk { + path: "file.txt".into(), + hunk: Hunk { + context: "context\ndeleted\n".into(), + edits: vec![Edit { + range: 8..16, + text: "added line".into() + }], + }, + is_new_file: false, + }, + DiffEvent::FileEnd { renamed_to: None } + ], + ); + } + #[gpui::test] async fn test_apply_diff_successful(cx: &mut TestAppContext) { let fs = init_test(cx); diff --git a/crates/edit_prediction_cli/src/load_project.rs b/crates/edit_prediction_cli/src/load_project.rs index 3a501c0bb24a6cf170b92118a6e82e1af1ed4f15..f8a0a1ed8e171c74227f7d143a552bfb1e99174f 100644 --- a/crates/edit_prediction_cli/src/load_project.rs +++ b/crates/edit_prediction_cli/src/load_project.rs @@ -11,9 +11,10 @@ use edit_prediction::udiff::OpenedBuffers; use futures::AsyncWriteExt as _; use gpui::{AsyncApp, Entity}; use language::{Anchor, Buffer, LanguageNotFound, ToOffset, ToPoint}; -use project::Project; use project::buffer_store::BufferStoreEvent; +use project::{Project, ProjectPath}; use std::{fs, path::PathBuf, sync::Arc}; +use util::{paths::PathStyle, rel_path::RelPath}; pub async fn run_load_project( example: &mut Example, @@ -76,16 +77,16 @@ async fn cursor_position( return Err(error); } - let cursor_path = project - .read_with(cx, |project, cx| { - project.find_project_path(&example.spec.cursor_path, cx) - })? - .with_context(|| { - format!( - "failed to find cursor path {}", - example.spec.cursor_path.display() - ) - })?; + let worktree = project + .read_with(cx, |project, cx| project.visible_worktrees(cx).next())? + .context("project has no worktree")?; + + let worktree_id = worktree.read_with(cx, |wt, _| wt.id())?; + let cursor_path = ProjectPath { + worktree_id, + path: RelPath::new(example.spec.cursor_path.as_ref(), PathStyle::Posix)?.into_arc(), + }; + let cursor_buffer = project .update(cx, |project, cx| project.open_buffer(cursor_path, cx))? .await?; diff --git a/crates/edit_prediction_cli/src/main.rs b/crates/edit_prediction_cli/src/main.rs index aba569ad77207986e253149f5eae026ef8c0e6d1..b79b6d21439d4bfdc731edf3b3f8dde8d7d1f33b 100644 --- a/crates/edit_prediction_cli/src/main.rs +++ b/crates/edit_prediction_cli/src/main.rs @@ -429,7 +429,7 @@ fn main() { .join(format!("{}_err.txt", example.spec.name)); app_state .fs - .write(&err_path, e.to_string().as_bytes()) + .write(&err_path, format!("{e:?}").as_bytes()) .await .unwrap(); diff --git a/crates/edit_prediction_cli/src/split_commit.rs b/crates/edit_prediction_cli/src/split_commit.rs index ce26f3227f7e78e333cb220668e6ca70f34a4356..a13f45b20fbc8c8c7489459bf8808934aa1aa0f6 100644 --- a/crates/edit_prediction_cli/src/split_commit.rs +++ b/crates/edit_prediction_cli/src/split_commit.rs @@ -799,20 +799,15 @@ pub fn imitate_human_edits( hunk.new_count += 1; } } else { - // For pure insertions, we need to add or modify a hunk - // Check if the source hunk exists AND has enough lines for the target's line index - let can_insert_in_existing_hunk = new_src_patch - .hunks - .get(tgt_edit_loc.hunk_index) - .map_or(false, |hunk| { - tgt_edit_loc.line_index_within_hunk <= hunk.lines.len() - }); - - if can_insert_in_existing_hunk { - if let Some(hunk) = new_src_patch.hunks.get_mut(tgt_edit_loc.hunk_index) { - // Insert the partial line at the same position as target + // For pure insertions, insert after the last edit in source patch + // This imitates human typing - the intermediate content is what the user is currently typing + let last_src_edit = locate_edited_line(&new_src_patch, -1); + + if let Some(src_loc) = last_src_edit { + // Insert after the last edit in source + if let Some(hunk) = new_src_patch.hunks.get_mut(src_loc.hunk_index) { hunk.lines.insert( - tgt_edit_loc.line_index_within_hunk, + src_loc.line_index_within_hunk + 1, PatchLine::Addition(new_src.clone()), ); hunk.new_count += 1; @@ -1672,6 +1667,85 @@ index 123..456 789 ); } + #[test] + fn test_imitate_human_edits_inserts_after_last_source_edit() { + // Regression test: intermediate content should appear after the last edit + // in the source patch, not at the position of the first target edit. + // This ensures the diff output correctly imitates human typing order. + // + // The bug was: when source has edits and target has a pure insertion, + // the intermediate content was inserted at tgt_edit_loc.line_index_within_hunk + // (position of first target edit) instead of after the last source edit. + // + // Source patch has edits at lines 1-4, target has a new edit at line 10 + // (different location to avoid the "same line" early return) + let source = r#"--- a/test.py ++++ b/test.py +@@ -1,4 +1,5 @@ ++import foo + import bar +-import old + import baz ++import qux +"#; + // Target has a pure insertion at a different line (line 10, not overlapping with source) + let target = r#"--- a/test.py ++++ b/test.py +@@ -10,3 +10,4 @@ + def main(): ++ print("hello world") + pass +"#; + + // Use a seed that produces a partial result + let (new_src, _new_tgt, cursor) = imitate_human_edits(source, target, 42); + + // The function should produce a modified patch + assert!(cursor.is_some(), "Should produce intermediate state"); + + let src_patch = Patch::parse_unified_diff(&new_src); + let all_additions: Vec<_> = src_patch + .hunks + .iter() + .flat_map(|h| h.lines.iter()) + .filter_map(|l| match l { + PatchLine::Addition(s) => Some(s.as_str()), + _ => None, + }) + .collect(); + + // The intermediate content (partial 'print("hello world")') should be + // the LAST addition, appearing after "+import qux" (the last source edit) + let last_addition = all_additions.last().expect("Should have additions"); + assert!( + last_addition.trim_start().starts_with("pr"), + "Intermediate content should be the last addition (partial 'print'), but last was: {:?}", + last_addition + ); + + // Verify the original source edits are still in order before the intermediate + let foo_pos = all_additions.iter().position(|s| *s == "import foo"); + let qux_pos = all_additions.iter().position(|s| *s == "import qux"); + let intermediate_pos = all_additions + .iter() + .position(|s| s.trim_start().starts_with("pr")); + + assert!(foo_pos.is_some(), "Should have 'import foo'"); + assert!(qux_pos.is_some(), "Should have 'import qux'"); + assert!( + intermediate_pos.is_some(), + "Should have intermediate content" + ); + + assert!( + foo_pos < qux_pos && qux_pos < intermediate_pos, + "Order should be: foo < qux < intermediate. Got foo={:?}, qux={:?}, intermediate={:?}", + foo_pos, + qux_pos, + intermediate_pos + ); + } + #[test] fn test_cursor_excerpt_with_multibyte_utf8() { // Test that cursor excerpt handles multi-byte UTF-8 characters correctly