agent: Fix creating files with Gemini (#31439)

Oleksiy Syvokon created 9 months ago

This change instructs models to wrap new file content in Markdown fences
and introduces a parser for this format. The reasons are:

1. This is the format we put a lot of effort into explaining in the
system prompt.
2. Gemini really prefers to do it.
3. It adds an option for a model to think before writing the content

The `eval_zode` pass rate for GEmini models goes from 0% to 100%. Other
models were already at 100%, this hasn't changed.


Release Notes:

- N/A

Change summary

Cargo.lock                                                  |   1 
crates/assistant_tools/Cargo.toml                           |   1 
crates/assistant_tools/src/edit_agent.rs                    | 109 ++-
crates/assistant_tools/src/edit_agent/create_file_parser.rs | 218 +++++++
crates/assistant_tools/src/edit_agent/evals.rs              |  63 +
crates/assistant_tools/src/templates/create_file_prompt.hbs |   6 
crates/assistant_tools/src/templates/edit_file_prompt.hbs   |   3 
7 files changed, 356 insertions(+), 45 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -683,6 +683,7 @@ dependencies = [
  "language_models",
  "log",
  "markdown",
+ "once_cell",
  "open",
  "paths",
  "portable-pty",

crates/assistant_tools/Cargo.toml 🔗

@@ -62,6 +62,7 @@ which.workspace = true
 workspace-hack.workspace = true
 workspace.workspace = true
 zed_llm_client.workspace = true
+once_cell = "1.21.3"
 
 [dev-dependencies]
 client = { workspace = true, features = ["test-support"] }

crates/assistant_tools/src/edit_agent.rs 🔗

@@ -1,3 +1,4 @@
+mod create_file_parser;
 mod edit_parser;
 #[cfg(test)]
 mod evals;
@@ -6,6 +7,7 @@ use crate::{Template, Templates};
 use aho_corasick::AhoCorasick;
 use anyhow::Result;
 use assistant_tool::ActionLog;
+use create_file_parser::{CreateFileParser, CreateFileParserEvent};
 use edit_parser::{EditParser, EditParserEvent, EditParserMetrics};
 use futures::{
     Stream, StreamExt,
@@ -123,16 +125,16 @@ impl EditAgent {
         mpsc::UnboundedReceiver<EditAgentOutputEvent>,
     ) {
         let (output_events_tx, output_events_rx) = mpsc::unbounded();
+        let (parse_task, parse_rx) = Self::parse_create_file_chunks(edit_chunks, cx);
         let this = self.clone();
         let task = cx.spawn(async move |cx| {
             this.action_log
                 .update(cx, |log, cx| log.buffer_created(buffer.clone(), cx))?;
-            let output = this
-                .overwrite_with_chunks_internal(buffer, edit_chunks, output_events_tx, cx)
-                .await;
+            this.overwrite_with_chunks_internal(buffer, parse_rx, output_events_tx, cx)
+                .await?;
             this.project
                 .update(cx, |project, cx| project.set_agent_location(None, cx))?;
-            output
+            parse_task.await
         });
         (task, output_events_rx)
     }
@@ -140,10 +142,10 @@ impl EditAgent {
     async fn overwrite_with_chunks_internal(
         &self,
         buffer: Entity<Buffer>,
-        edit_chunks: impl 'static + Send + Stream<Item = Result<String, LanguageModelCompletionError>>,
+        mut parse_rx: UnboundedReceiver<Result<CreateFileParserEvent>>,
         output_events_tx: mpsc::UnboundedSender<EditAgentOutputEvent>,
         cx: &mut AsyncApp,
-    ) -> Result<EditAgentOutput> {
+    ) -> Result<()> {
         cx.update(|cx| {
             buffer.update(cx, |buffer, cx| buffer.set_text("", cx));
             self.action_log.update(cx, |log, cx| {
@@ -163,34 +165,31 @@ impl EditAgent {
                 .ok();
         })?;
 
-        let mut raw_edits = String::new();
-        pin_mut!(edit_chunks);
-        while let Some(chunk) = edit_chunks.next().await {
-            let chunk = chunk?;
-            raw_edits.push_str(&chunk);
-            cx.update(|cx| {
-                buffer.update(cx, |buffer, cx| buffer.append(chunk, cx));
-                self.action_log
-                    .update(cx, |log, cx| log.buffer_edited(buffer.clone(), cx));
-                self.project.update(cx, |project, cx| {
-                    project.set_agent_location(
-                        Some(AgentLocation {
-                            buffer: buffer.downgrade(),
-                            position: language::Anchor::MAX,
-                        }),
-                        cx,
-                    )
-                });
-            })?;
-            output_events_tx
-                .unbounded_send(EditAgentOutputEvent::Edited)
-                .ok();
+        while let Some(event) = parse_rx.next().await {
+            match event? {
+                CreateFileParserEvent::NewTextChunk { chunk } => {
+                    cx.update(|cx| {
+                        buffer.update(cx, |buffer, cx| buffer.append(chunk, cx));
+                        self.action_log
+                            .update(cx, |log, cx| log.buffer_edited(buffer.clone(), cx));
+                        self.project.update(cx, |project, cx| {
+                            project.set_agent_location(
+                                Some(AgentLocation {
+                                    buffer: buffer.downgrade(),
+                                    position: language::Anchor::MAX,
+                                }),
+                                cx,
+                            )
+                        });
+                    })?;
+                    output_events_tx
+                        .unbounded_send(EditAgentOutputEvent::Edited)
+                        .ok();
+                }
+            }
         }
 
-        Ok(EditAgentOutput {
-            raw_edits,
-            parser_metrics: EditParserMetrics::default(),
-        })
+        Ok(())
     }
 
     pub fn edit(
@@ -435,6 +434,44 @@ impl EditAgent {
         (output, rx)
     }
 
+    fn parse_create_file_chunks(
+        chunks: impl 'static + Send + Stream<Item = Result<String, LanguageModelCompletionError>>,
+        cx: &mut AsyncApp,
+    ) -> (
+        Task<Result<EditAgentOutput>>,
+        UnboundedReceiver<Result<CreateFileParserEvent>>,
+    ) {
+        let (tx, rx) = mpsc::unbounded();
+        let output = cx.background_spawn(async move {
+            pin_mut!(chunks);
+
+            let mut parser = CreateFileParser::new();
+            let mut raw_edits = String::new();
+            while let Some(chunk) = chunks.next().await {
+                match chunk {
+                    Ok(chunk) => {
+                        raw_edits.push_str(&chunk);
+                        for event in parser.push(Some(&chunk)) {
+                            tx.unbounded_send(Ok(event))?;
+                        }
+                    }
+                    Err(error) => {
+                        tx.unbounded_send(Err(error.into()))?;
+                    }
+                }
+            }
+            // Send final events with None to indicate completion
+            for event in parser.push(None) {
+                tx.unbounded_send(Ok(event))?;
+            }
+            Ok(EditAgentOutput {
+                raw_edits,
+                parser_metrics: EditParserMetrics::default(),
+            })
+        });
+        (output, rx)
+    }
+
     fn reindent_new_text_chunks(
         delta: IndentDelta,
         mut stream: impl Unpin + Stream<Item = Result<EditParserEvent>>,
@@ -1138,7 +1175,7 @@ mod tests {
             })
         );
 
-        chunks_tx.unbounded_send("jkl\n").unwrap();
+        chunks_tx.unbounded_send("```\njkl\n").unwrap();
         cx.run_until_parked();
         assert_eq!(
             drain_events(&mut events),
@@ -1146,7 +1183,7 @@ mod tests {
         );
         assert_eq!(
             buffer.read_with(cx, |buffer, _| buffer.snapshot().text()),
-            "jkl\n"
+            "jkl"
         );
         assert_eq!(
             project.read_with(cx, |project, _| project.agent_location()),
@@ -1164,7 +1201,7 @@ mod tests {
         );
         assert_eq!(
             buffer.read_with(cx, |buffer, _| buffer.snapshot().text()),
-            "jkl\nmno\n"
+            "jkl\nmno"
         );
         assert_eq!(
             project.read_with(cx, |project, _| project.agent_location()),
@@ -1174,7 +1211,7 @@ mod tests {
             })
         );
 
-        chunks_tx.unbounded_send("pqr").unwrap();
+        chunks_tx.unbounded_send("pqr\n```").unwrap();
         cx.run_until_parked();
         assert_eq!(
             drain_events(&mut events),

crates/assistant_tools/src/edit_agent/create_file_parser.rs 🔗

@@ -0,0 +1,218 @@
+use once_cell::sync::Lazy;
+use regex::Regex;
+use smallvec::SmallVec;
+use util::debug_panic;
+
+const START_MARKER: Lazy<Regex> = Lazy::new(|| Regex::new(r"\n?```\S*\n").unwrap());
+const END_MARKER: Lazy<Regex> = Lazy::new(|| Regex::new(r"\n```\s*$").unwrap());
+
+#[derive(Debug)]
+pub enum CreateFileParserEvent {
+    NewTextChunk { chunk: String },
+}
+
+#[derive(Debug)]
+pub struct CreateFileParser {
+    state: ParserState,
+    buffer: String,
+}
+
+#[derive(Debug, PartialEq)]
+enum ParserState {
+    Pending,
+    WithinText,
+    Finishing,
+    Finished,
+}
+
+impl CreateFileParser {
+    pub fn new() -> Self {
+        CreateFileParser {
+            state: ParserState::Pending,
+            buffer: String::new(),
+        }
+    }
+
+    pub fn push(&mut self, chunk: Option<&str>) -> SmallVec<[CreateFileParserEvent; 1]> {
+        if chunk.is_none() {
+            self.state = ParserState::Finishing;
+        }
+
+        let chunk = chunk.unwrap_or_default();
+
+        self.buffer.push_str(chunk);
+
+        let mut edit_events = SmallVec::new();
+        loop {
+            match &mut self.state {
+                ParserState::Pending => {
+                    if let Some(m) = START_MARKER.find(&self.buffer) {
+                        self.buffer.drain(..m.end());
+                        self.state = ParserState::WithinText;
+                    } else {
+                        break;
+                    }
+                }
+                ParserState::WithinText => {
+                    let text = self.buffer.trim_end_matches(&['`', '\n', ' ']);
+                    let text_len = text.len();
+
+                    if text_len > 0 {
+                        edit_events.push(CreateFileParserEvent::NewTextChunk {
+                            chunk: self.buffer.drain(..text_len).collect(),
+                        });
+                    }
+                    break;
+                }
+                ParserState::Finishing => {
+                    if let Some(m) = END_MARKER.find(&self.buffer) {
+                        self.buffer.drain(m.start()..);
+                    }
+                    if !self.buffer.is_empty() {
+                        if !self.buffer.ends_with('\n') {
+                            self.buffer.push('\n');
+                        }
+                        edit_events.push(CreateFileParserEvent::NewTextChunk {
+                            chunk: self.buffer.drain(..).collect(),
+                        });
+                    }
+                    self.state = ParserState::Finished;
+                    break;
+                }
+                ParserState::Finished => debug_panic!("Can't call parser after finishing"),
+            }
+        }
+        edit_events
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use indoc::indoc;
+    use rand::prelude::*;
+    use std::cmp;
+
+    #[gpui::test(iterations = 100)]
+    fn test_happy_path(mut rng: StdRng) {
+        let mut parser = CreateFileParser::new();
+        assert_eq!(
+            parse_random_chunks("```\nHello world\n```", &mut parser, &mut rng),
+            "Hello world".to_string()
+        );
+    }
+
+    #[gpui::test(iterations = 100)]
+    fn test_cut_prefix(mut rng: StdRng) {
+        let mut parser = CreateFileParser::new();
+        assert_eq!(
+            parse_random_chunks(
+                indoc! {"
+                    Let me write this file for you:
+
+                    ```
+                    Hello world
+                    ```
+
+                "},
+                &mut parser,
+                &mut rng
+            ),
+            "Hello world".to_string()
+        );
+    }
+
+    #[gpui::test(iterations = 100)]
+    fn test_language_name_on_fences(mut rng: StdRng) {
+        let mut parser = CreateFileParser::new();
+        assert_eq!(
+            parse_random_chunks(
+                indoc! {"
+                    ```rust
+                    Hello world
+                    ```
+
+                "},
+                &mut parser,
+                &mut rng
+            ),
+            "Hello world".to_string()
+        );
+    }
+
+    #[gpui::test(iterations = 100)]
+    fn test_leave_suffix(mut rng: StdRng) {
+        let mut parser = CreateFileParser::new();
+        assert_eq!(
+            parse_random_chunks(
+                indoc! {"
+                    Let me write this file for you:
+
+                    ```
+                    Hello world
+                    ```
+
+                    The end
+                "},
+                &mut parser,
+                &mut rng
+            ),
+            // This output is marlformed, so we're doing our best effort
+            "Hello world\n```\n\nThe end\n".to_string()
+        );
+    }
+
+    #[gpui::test(iterations = 100)]
+    fn test_inner_fences(mut rng: StdRng) {
+        let mut parser = CreateFileParser::new();
+        assert_eq!(
+            parse_random_chunks(
+                indoc! {"
+                    Let me write this file for you:
+
+                    ```
+                    ```
+                    Hello world
+                    ```
+                    ```
+                "},
+                &mut parser,
+                &mut rng
+            ),
+            // This output is marlformed, so we're doing our best effort
+            "```\nHello world\n```\n".to_string()
+        );
+    }
+
+    fn parse_random_chunks(input: &str, parser: &mut CreateFileParser, rng: &mut StdRng) -> String {
+        let chunk_count = rng.gen_range(1..=cmp::min(input.len(), 50));
+        let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
+        chunk_indices.sort();
+        chunk_indices.push(input.len());
+
+        let chunk_indices = chunk_indices
+            .into_iter()
+            .map(Some)
+            .chain(vec![None])
+            .collect::<Vec<Option<usize>>>();
+
+        let mut edit = String::default();
+        let mut last_ix = 0;
+        for chunk_ix in chunk_indices {
+            let mut chunk = None;
+            if let Some(chunk_ix) = chunk_ix {
+                chunk = Some(&input[last_ix..chunk_ix]);
+                last_ix = chunk_ix;
+            }
+
+            for event in parser.push(chunk) {
+                match event {
+                    CreateFileParserEvent::NewTextChunk { chunk } => {
+                        edit.push_str(&chunk);
+                    }
+                }
+            }
+        }
+        edit
+    }
+}

crates/assistant_tools/src/edit_agent/evals.rs 🔗

@@ -163,6 +163,15 @@ fn eval_delete_run_git_blame() {
 #[test]
 #[cfg_attr(not(feature = "eval"), ignore)]
 fn eval_translate_doc_comments() {
+    // Results for 2025-05-22
+    //
+    //  Model                          | Pass rate
+    // ============================================
+    //
+    //  claude-3.7-sonnet              |
+    //  gemini-2.5-pro-preview-03-25   |  1.0
+    //  gemini-2.5-flash-preview-04-17 |
+    //  gpt-4.1                        |
     let input_file_path = "root/canvas.rs";
     let input_file_content = include_str!("evals/fixtures/translate_doc_comments/before.rs");
     let edit_description = "Translate all doc comments to Italian";
@@ -216,6 +225,15 @@ fn eval_translate_doc_comments() {
 #[test]
 #[cfg_attr(not(feature = "eval"), ignore)]
 fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
+    // Results for 2025-05-22
+    //
+    //  Model                          | Pass rate
+    // ============================================
+    //
+    //  claude-3.7-sonnet              |  0.98
+    //  gemini-2.5-pro-preview-03-25   |  0.99
+    //  gemini-2.5-flash-preview-04-17 |
+    //  gpt-4.1                        |
     let input_file_path = "root/lib.rs";
     let input_file_content =
         include_str!("evals/fixtures/use_wasi_sdk_in_compile_parser_to_wasm/before.rs");
@@ -332,6 +350,15 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
 #[test]
 #[cfg_attr(not(feature = "eval"), ignore)]
 fn eval_disable_cursor_blinking() {
+    // Results for 2025-05-22
+    //
+    //  Model                          | Pass rate
+    // ============================================
+    //
+    //  claude-3.7-sonnet              |
+    //  gemini-2.5-pro-preview-03-25   |  1.0
+    //  gemini-2.5-flash-preview-04-17 |
+    //  gpt-4.1                        |
     let input_file_path = "root/editor.rs";
     let input_file_content = include_str!("evals/fixtures/disable_cursor_blinking/before.rs");
     let edit_description = "Comment out the call to `BlinkManager::enable`";
@@ -406,6 +433,15 @@ fn eval_disable_cursor_blinking() {
 #[test]
 #[cfg_attr(not(feature = "eval"), ignore)]
 fn eval_from_pixels_constructor() {
+    // Results for 2025-05-22
+    //
+    //  Model                          | Pass rate
+    // ============================================
+    //
+    //  claude-3.7-sonnet              |
+    //  gemini-2.5-pro-preview-03-25   |  0.94
+    //  gemini-2.5-flash-preview-04-17 |
+    //  gpt-4.1                        |
     let input_file_path = "root/canvas.rs";
     let input_file_content = include_str!("evals/fixtures/from_pixels_constructor/before.rs");
     let edit_description = "Implement from_pixels constructor and add tests.";
@@ -597,11 +633,20 @@ fn eval_from_pixels_constructor() {
 #[test]
 #[cfg_attr(not(feature = "eval"), ignore)]
 fn eval_zode() {
+    // Results for 2025-05-22
+    //
+    //  Model                          | Pass rate
+    // ============================================
+    //
+    //  claude-3.7-sonnet              |  1.0
+    //  gemini-2.5-pro-preview-03-25   |  1.0
+    //  gemini-2.5-flash-preview-04-17 |  1.0
+    //  gpt-4.1                        |  1.0
     let input_file_path = "root/zode.py";
     let input_content = None;
     let edit_description = "Create the main Zode CLI script";
     eval(
-        200,
+        50,
         1.,
         EvalInput::from_conversation(
             vec![
@@ -694,6 +739,15 @@ fn eval_zode() {
 #[test]
 #[cfg_attr(not(feature = "eval"), ignore)]
 fn eval_add_overwrite_test() {
+    // Results for 2025-05-22
+    //
+    //  Model                          | Pass rate
+    // ============================================
+    //
+    //  claude-3.7-sonnet              |  0.16
+    //  gemini-2.5-pro-preview-03-25   |  0.35
+    //  gemini-2.5-flash-preview-04-17 |
+    //  gpt-4.1                        |
     let input_file_path = "root/action_log.rs";
     let input_file_content = include_str!("evals/fixtures/add_overwrite_test/before.rs");
     let edit_description = "Add a new test for overwriting a file in action_log.rs";
@@ -920,14 +974,11 @@ fn eval_create_empty_file() {
     // thoughts into it. This issue is not specific to empty files, but
     // it's easier to reproduce with them.
     //
+    // Results for 2025-05-21:
     //
     //  Model                          | Pass rate
     // ============================================
     //
-    // --------------------------------------------
-    //           Prompt version: 2025-05-21
-    // --------------------------------------------
-    //
     //  claude-3.7-sonnet              |  1.00
     //  gemini-2.5-pro-preview-03-25   |  1.00
     //  gemini-2.5-flash-preview-04-17 |  1.00
@@ -1430,7 +1481,7 @@ impl EditAgentTest {
                     model.provider_id() == selected_model.provider
                         && model.id() == selected_model.model
                 })
-                .unwrap();
+                .expect("Model not found");
             let provider = models.provider(&model.provider_id()).unwrap();
             (provider, model)
         })?;

crates/assistant_tools/src/templates/create_file_prompt.hbs 🔗

@@ -1,8 +1,10 @@
 You are an expert engineer and your task is to write a new file from scratch.
 
-You MUST respond directly with the file's content, without explanations, additional text or triple backticks.
+You MUST respond with the file's content wrapped in triple backticks (```).
+The backticks should be on their own line.
 The text you output will be saved verbatim as the content of the file.
-Tool calls have been disabled. You MUST start your response directly with the file's new content.
+Tool calls have been disabled.
+Start your response with ```.
 
 <file_path>
 {{path}}

crates/assistant_tools/src/templates/edit_file_prompt.hbs 🔗

@@ -43,7 +43,8 @@ NEW TEXT 3 HERE
 - Always close all tags properly
 
 
-{{!-- This example is important for Gemini 2.5 --}}
+{{!-- The following example adds almost 10% pass rate for Gemini 2.5.
+Claude and gpt-4.1 don't really need it. --}}
 <example>
 <edits>