ep: Add file deletion support in unified diff parsing (#46279)

Oleksiy Syvokon and Agus Zubiaga created

- Replace `is_new_file: bool` with `FileStatus` enum
(Created/Modified/Deleted) in udiff.rs to properly track file status
through diff operations
- Handle deleted files in `apply_diff` by calling
`project.delete_file()`
- Fix diff serialization in reorder_patch.rs to output `+++ /dev/null`
for file deletions and parse both `--- /dev/null` and `+++ /dev/null`
correctly
- Add bounds check for edit ranges exceeding buffer length

Also includes edit_prediction_cli improvements:
- Track `context_range` and `editable_range` in ExampleBuffer for more
precise prompt formatting
- Export MAX_CONTEXT_TOKENS and MAX_REWRITE_TOKENS from zeta2
- Wait for buffer parsing before computing ranges
- Respect NO_COLOR env var and enable info-level logging


Release Notes:

- N/A

Co-authored-by: Agus Zubiaga <agus@zed.dev>

Change summary

crates/edit_prediction/src/udiff.rs             | 74 ++++++++++++------
crates/edit_prediction/src/zeta2.rs             |  4 
crates/edit_prediction_cli/src/example.rs       |  3 
crates/edit_prediction_cli/src/format_prompt.rs | 39 ++++++---
crates/edit_prediction_cli/src/load_project.rs  | 20 ++++
crates/edit_prediction_cli/src/progress.rs      |  5 
crates/edit_prediction_cli/src/reorder_patch.rs | 48 +++++++++++
7 files changed, 145 insertions(+), 48 deletions(-)

Detailed changes

crates/edit_prediction/src/udiff.rs 🔗

@@ -57,17 +57,29 @@ pub async fn apply_diff(
 
     while let Some(event) = diff.next()? {
         match event {
-            DiffEvent::Hunk {
-                path,
-                hunk,
-                is_new_file,
-            } => {
+            DiffEvent::Hunk { path, hunk, status } => {
+                if status == FileStatus::Deleted {
+                    let delete_task = project.update(cx, |project, cx| {
+                        if let Some(path) = project.find_project_path(path.as_ref(), cx) {
+                            project.delete_file(path, false, cx)
+                        } else {
+                            None
+                        }
+                    })?;
+
+                    if let Some(delete_task) = delete_task {
+                        delete_task.await?;
+                    };
+
+                    continue;
+                }
+
                 let buffer = match current_file {
                     None => {
                         let buffer = match included_files.entry(path.to_string()) {
                             Entry::Occupied(entry) => entry.get().clone(),
                             Entry::Vacant(entry) => {
-                                let buffer = if is_new_file {
+                                let buffer = if status == FileStatus::Created {
                                     project
                                         .update(cx, |project, cx| project.create_buffer(true, cx))?
                                         .await?
@@ -95,7 +107,7 @@ pub async fn apply_diff(
 
                 buffer.read_with(cx, |buffer, _| {
                     edits.extend(
-                        resolve_hunk_edits_in_buffer(hunk, buffer, ranges.as_slice(), is_new_file)
+                        resolve_hunk_edits_in_buffer(hunk, buffer, ranges.as_slice(), status)
                             .with_context(|| format!("Diff:\n{diff_str}"))?,
                     );
                     anyhow::Ok(())
@@ -260,7 +272,7 @@ pub fn apply_diff_to_string(diff_str: &str, text: &str) -> Result<String> {
             DiffEvent::Hunk {
                 hunk,
                 path: _,
-                is_new_file: _,
+                status: _,
             } => {
                 // Find all matches of the context in the text
                 let candidates: Vec<usize> = text
@@ -299,7 +311,7 @@ pub fn edits_for_diff(content: &str, diff_str: &str) -> Result<Vec<(Range<usize>
             DiffEvent::Hunk {
                 hunk,
                 path: _,
-                is_new_file: _,
+                status: _,
             } => {
                 if hunk.context.is_empty() {
                     return Ok(Vec::new());
@@ -367,13 +379,20 @@ enum DiffEvent<'a> {
     Hunk {
         path: Cow<'a, str>,
         hunk: Hunk,
-        is_new_file: bool,
+        status: FileStatus,
     },
     FileEnd {
         renamed_to: Option<Cow<'a, str>>,
     },
 }
 
+#[derive(Debug, Clone, Copy, PartialEq)]
+enum FileStatus {
+    Created,
+    Modified,
+    Deleted,
+}
+
 #[derive(Debug, Default, PartialEq)]
 struct Hunk {
     context: String,
@@ -420,8 +439,14 @@ impl<'a> DiffParser<'a> {
                 if let Some(file) = &self.current_file
                     && !self.hunk.is_empty()
                 {
-                    let is_new_file = file.old_path == "/dev/null";
-                    let path = if is_new_file {
+                    let status = if file.old_path == "/dev/null" {
+                        FileStatus::Created
+                    } else if file.new_path == "/dev/null" {
+                        FileStatus::Deleted
+                    } else {
+                        FileStatus::Modified
+                    };
+                    let path = if status == FileStatus::Created {
                         file.new_path.clone()
                     } else {
                         file.old_path.clone()
@@ -430,11 +455,7 @@ impl<'a> DiffParser<'a> {
                     hunk.start_line = self.pending_start_line.take();
                     self.processed_no_newline = false;
                     self.last_diff_op = LastDiffOp::None;
-                    return Ok(Some(DiffEvent::Hunk {
-                        path,
-                        hunk,
-                        is_new_file,
-                    }));
+                    return Ok(Some(DiffEvent::Hunk { path, hunk, status }));
                 }
             }
 
@@ -554,9 +575,9 @@ fn resolve_hunk_edits_in_buffer(
     hunk: Hunk,
     buffer: &TextBufferSnapshot,
     ranges: &[Range<Anchor>],
-    is_new_file: bool,
+    status: FileStatus,
 ) -> Result<impl Iterator<Item = (Range<Anchor>, Arc<str>)>, anyhow::Error> {
-    let context_offset = if is_new_file || hunk.context.is_empty() {
+    let context_offset = if status == FileStatus::Created || hunk.context.is_empty() {
         0
     } else {
         let mut candidates: Vec<usize> = Vec::new();
@@ -583,6 +604,11 @@ fn resolve_hunk_edits_in_buffer(
             }
         })?
     };
+
+    if let Some(edit) = hunk.edits.iter().find(|edit| edit.range.end > buffer.len()) {
+        return Err(anyhow!("Edit range {:?} exceeds buffer length", edit.range));
+    }
+
     let iter = hunk.edits.into_iter().flat_map(move |edit| {
         let old_text = buffer
             .text_for_range(context_offset + edit.range.start..context_offset + edit.range.end)
@@ -951,7 +977,7 @@ mod tests {
                         }],
                         start_line: None,
                     },
-                    is_new_file: false,
+                    status: FileStatus::Modified,
                 },
                 DiffEvent::FileEnd { renamed_to: None }
             ],
@@ -1002,7 +1028,7 @@ mod tests {
                         }],
                         start_line: Some(54), // @@ -55,7 -> line 54 (0-indexed)
                     },
-                    is_new_file: false,
+                    status: FileStatus::Modified,
                 },
                 DiffEvent::FileEnd { renamed_to: None }
             ],
@@ -1040,7 +1066,7 @@ mod tests {
                         }],
                         start_line: Some(0), // @@ -1,2 -> line 0 (0-indexed)
                     },
-                    is_new_file: false,
+                    status: FileStatus::Modified,
                 },
                 DiffEvent::FileEnd { renamed_to: None }
             ],
@@ -1081,7 +1107,7 @@ mod tests {
                         }],
                         start_line: Some(0),
                     },
-                    is_new_file: false,
+                    status: FileStatus::Modified,
                 },
                 DiffEvent::FileEnd { renamed_to: None }
             ],
@@ -1124,7 +1150,7 @@ mod tests {
                         }],
                         start_line: Some(0),
                     },
-                    is_new_file: false,
+                    status: FileStatus::Modified,
                 },
                 DiffEvent::FileEnd { renamed_to: None }
             ],

crates/edit_prediction/src/zeta2.rs 🔗

@@ -18,8 +18,8 @@ use std::{path::Path, sync::Arc, time::Instant};
 use zeta_prompt::CURSOR_MARKER;
 use zeta_prompt::format_zeta_prompt;
 
-const MAX_CONTEXT_TOKENS: usize = 150;
-const MAX_REWRITE_TOKENS: usize = 350;
+pub const MAX_CONTEXT_TOKENS: usize = 150;
+pub const MAX_REWRITE_TOKENS: usize = 350;
 
 pub fn request_prediction_with_zeta2(
     store: &mut EditPredictionStore,

crates/edit_prediction_cli/src/example.rs 🔗

@@ -9,6 +9,7 @@ use http_client::Url;
 use language::{Anchor, Buffer};
 use project::Project;
 use serde::{Deserialize, Serialize};
+use std::ops::Range;
 use std::sync::Arc;
 use std::{
     borrow::Cow,
@@ -69,6 +70,8 @@ pub struct ExampleBuffer {
     pub cursor_row: u32,
     pub cursor_column: u32,
     pub cursor_offset: usize,
+    pub context_range: Range<usize>,
+    pub editable_range: Range<usize>,
 }
 
 #[derive(Clone, Debug, Serialize, Deserialize)]

crates/edit_prediction_cli/src/format_prompt.rs 🔗

@@ -128,20 +128,19 @@ impl TeacherPrompt {
         // 2. Context retriever just didn't include cursor line.
         //
         // In that case, fallback to using `cursor_position` as excerpt.
-        let cursor_file = &example
+        let example_buffer = example
             .buffer
             .as_ref()
-            .context("`buffer` should be filled in in the context collection step")?
-            .content;
+            .context("`buffer` should be filled in in the context collection step")?;
+        let cursor_file = &example_buffer.content;
 
         // Extract updated (new) editable region from the model response.
         // The model may include editable region markers in its output, so we need to strip them.
         let new_editable_region = extract_last_codeblock(response);
         let mut new_editable_region = Self::extract_editable_region(&new_editable_region);
 
-        // Reconstruct old editable region we sent to the model
-        let old_editable_region = Self::format_editable_region(example);
-        let old_editable_region = Self::extract_editable_region(&old_editable_region);
+        let old_editable_region =
+            example_buffer.content[example_buffer.editable_range.clone()].to_string();
 
         // Normalize leading newlines: if old starts with newline but new doesn't,
         // prepend newline to new to preserve whitespace structure.
@@ -203,19 +202,29 @@ impl TeacherPrompt {
     fn format_editable_region(example: &Example) -> String {
         let mut result = String::new();
 
+        let example_buffer = example.buffer.as_ref().unwrap();
+
         let path_str = example.spec.cursor_path.to_string_lossy();
         result.push_str(&format!("`````path=\"{path_str}\"\n"));
+        result.push_str(
+            &example_buffer.content
+                [example_buffer.context_range.start..example_buffer.editable_range.start],
+        );
         result.push_str(Self::EDITABLE_REGION_START);
-
-        // TODO: control number of lines around cursor
-        let (mut excerpt, offset) = example.spec.cursor_excerpt().unwrap();
-        excerpt.insert_str(offset, Self::USER_CURSOR_MARKER);
-        result.push_str(&excerpt);
-        if !result.ends_with('\n') {
-            result.push('\n');
-        }
-
+        result.push_str(
+            &example_buffer.content
+                [example_buffer.editable_range.start..example_buffer.cursor_offset],
+        );
+        result.push_str(Self::USER_CURSOR_MARKER);
+        result.push_str(
+            &example_buffer.content
+                [example_buffer.cursor_offset..example_buffer.editable_range.end],
+        );
         result.push_str(Self::EDITABLE_REGION_END);
+        result.push_str(
+            &example_buffer.content
+                [example_buffer.editable_range.end..example_buffer.context_range.end],
+        );
         result.push_str("\n`````");
 
         result

crates/edit_prediction_cli/src/load_project.rs 🔗

@@ -5,11 +5,13 @@ use crate::{
     progress::{InfoStyle, Progress, Step, StepProgress},
 };
 use anyhow::{Context as _, Result};
-use edit_prediction::EditPredictionStore;
 use edit_prediction::udiff::{OpenedBuffers, refresh_worktree_entries};
+use edit_prediction::{
+    EditPredictionStore, cursor_excerpt::editable_and_context_ranges_for_cursor_position, zeta2,
+};
 use futures::AsyncWriteExt as _;
 use gpui::{AsyncApp, Entity};
-use language::{Anchor, Buffer, LanguageNotFound, ToOffset, ToPoint};
+use language::{Anchor, Buffer, LanguageNotFound, OffsetRangeExt as _, ToOffset, ToPoint};
 use project::Project;
 use project::buffer_store::BufferStoreEvent;
 use std::{fs, path::PathBuf, sync::Arc};
@@ -33,8 +35,20 @@ pub async fn run_load_project(
     progress.set_substatus("resolving cursor");
     let (buffer, cursor_position) =
         cursor_position(example, &project, &open_buffers, &mut cx).await?;
+    buffer
+        .read_with(&cx, |buffer, _| buffer.parsing_idle())?
+        .await;
     let (example_buffer, language_name) = buffer.read_with(&cx, |buffer, _cx| {
         let cursor_point = cursor_position.to_point(&buffer);
+        let snapshot = buffer.snapshot();
+        let (editable_range, context_range) = editable_and_context_ranges_for_cursor_position(
+            cursor_point,
+            &snapshot,
+            zeta2::MAX_REWRITE_TOKENS,
+            zeta2::MAX_CONTEXT_TOKENS,
+        );
+        let editable_range = editable_range.to_offset(&snapshot);
+        let context_range = context_range.to_offset(&snapshot);
         let language_name = buffer
             .language()
             .map(|l| l.name().to_string())
@@ -45,6 +59,8 @@ pub async fn run_load_project(
                 cursor_row: cursor_point.row,
                 cursor_column: cursor_point.column,
                 cursor_offset: cursor_position.to_offset(&buffer),
+                context_range,
+                editable_range,
             },
             language_name,
         )

crates/edit_prediction_cli/src/progress.rs 🔗

@@ -99,7 +99,8 @@ impl Progress {
                     inner: Mutex::new(ProgressInner {
                         completed: Vec::new(),
                         in_progress: HashMap::new(),
-                        is_tty: std::io::stderr().is_terminal(),
+                        is_tty: std::env::var("NO_COLOR").is_err()
+                            && std::io::stderr().is_terminal(),
                         terminal_width: get_terminal_width(),
                         max_example_name_len: 0,
                         status_lines_displayed: 0,
@@ -110,7 +111,7 @@ impl Progress {
                     }),
                 });
                 let _ = log::set_logger(&LOGGER);
-                log::set_max_level(log::LevelFilter::Error);
+                log::set_max_level(log::LevelFilter::Info);
                 progress
             })
             .clone()

crates/edit_prediction_cli/src/reorder_patch.rs 🔗

@@ -155,7 +155,11 @@ impl ToString for Patch {
             } else {
                 result.push_str(&format!("--- a/{}\n", current_file));
             }
-            result.push_str(&format!("+++ b/{}\n", current_file));
+            if hunk.is_file_deletion() {
+                result.push_str("+++ /dev/null\n");
+            } else {
+                result.push_str(&format!("+++ b/{}\n", current_file));
+            }
             result.push_str(&hunk.to_string());
         }
 
@@ -190,10 +194,16 @@ impl Patch {
                 is_filename_inherited = true;
             } else if let Some(path) = line.strip_prefix("--- ") {
                 is_filename_inherited = false;
-                current_file = path.trim().strip_prefix("a/").unwrap_or(path).into();
+                let path = path.trim().strip_prefix("a/").unwrap_or(path);
+                if path != "/dev/null" {
+                    current_file = path.into();
+                }
             } else if let Some(path) = line.strip_prefix("+++ ") {
                 is_filename_inherited = false;
-                current_file = path.trim().strip_prefix("b/").unwrap_or(path).into();
+                let path = path.trim().strip_prefix("b/").unwrap_or(path);
+                if path != "/dev/null" {
+                    current_file = path.into();
+                }
             } else if let Some(line) = line.strip_prefix("+") {
                 hunk.lines.push(PatchLine::Addition(line.to_string()));
             } else if let Some(line) = line.strip_prefix("-") {
@@ -339,6 +349,11 @@ impl Hunk {
         self.old_start == 0 && self.old_count == 0
     }
 
+    /// Returns true if this hunk represents a file deletion (new side is empty).
+    pub fn is_file_deletion(&self) -> bool {
+        self.new_start == 0 && self.new_count == 0
+    }
+
     /// Render the hunk header
     pub fn header_string(&self) -> String {
         format!(
@@ -1495,4 +1510,31 @@ mod tests {
         "}
         );
     }
+
+    #[test]
+    fn test_file_deletion_diff_header() {
+        // When new_start and new_count are both 0, the file is being deleted,
+        // so the +++ line should be /dev/null instead of b/filename
+        let patch = Patch::parse_unified_diff(indoc! {"
+            --- a/old_file.rs
+            +++ /dev/null
+            @@ -1,3 +0,0 @@
+            -fn main() {
+            -    println!(\"goodbye\");
+            -}
+        "});
+
+        let actual = patch.to_string();
+        assert_eq!(
+            actual,
+            indoc! {"
+            --- a/old_file.rs
+            +++ /dev/null
+            @@ -1,3 +0,0 @@
+            -fn main() {
+            -    println!(\"goodbye\");
+            -}
+        "}
+        );
+    }
 }