Cargo.lock 🔗
@@ -683,6 +683,7 @@ dependencies = [
"language_models",
"log",
"markdown",
+ "once_cell",
"open",
"paths",
"portable-pty",
Oleksiy Syvokon created
This change instructs models to wrap new file content in Markdown fences
and introduces a parser for this format. The reasons are:
1. This is the format we put a lot of effort into explaining in the
system prompt.
2. Gemini really prefers to do it.
3. It adds an option for a model to think before writing the content
The `eval_zode` pass rate for GEmini models goes from 0% to 100%. Other
models were already at 100%, this hasn't changed.
Release Notes:
- N/A
Cargo.lock | 1
crates/assistant_tools/Cargo.toml | 1
crates/assistant_tools/src/edit_agent.rs | 109 ++-
crates/assistant_tools/src/edit_agent/create_file_parser.rs | 218 +++++++
crates/assistant_tools/src/edit_agent/evals.rs | 63 +
crates/assistant_tools/src/templates/create_file_prompt.hbs | 6
crates/assistant_tools/src/templates/edit_file_prompt.hbs | 3
7 files changed, 356 insertions(+), 45 deletions(-)
@@ -683,6 +683,7 @@ dependencies = [
"language_models",
"log",
"markdown",
+ "once_cell",
"open",
"paths",
"portable-pty",
@@ -62,6 +62,7 @@ which.workspace = true
workspace-hack.workspace = true
workspace.workspace = true
zed_llm_client.workspace = true
+once_cell = "1.21.3"
[dev-dependencies]
client = { workspace = true, features = ["test-support"] }
@@ -1,3 +1,4 @@
+mod create_file_parser;
mod edit_parser;
#[cfg(test)]
mod evals;
@@ -6,6 +7,7 @@ use crate::{Template, Templates};
use aho_corasick::AhoCorasick;
use anyhow::Result;
use assistant_tool::ActionLog;
+use create_file_parser::{CreateFileParser, CreateFileParserEvent};
use edit_parser::{EditParser, EditParserEvent, EditParserMetrics};
use futures::{
Stream, StreamExt,
@@ -123,16 +125,16 @@ impl EditAgent {
mpsc::UnboundedReceiver<EditAgentOutputEvent>,
) {
let (output_events_tx, output_events_rx) = mpsc::unbounded();
+ let (parse_task, parse_rx) = Self::parse_create_file_chunks(edit_chunks, cx);
let this = self.clone();
let task = cx.spawn(async move |cx| {
this.action_log
.update(cx, |log, cx| log.buffer_created(buffer.clone(), cx))?;
- let output = this
- .overwrite_with_chunks_internal(buffer, edit_chunks, output_events_tx, cx)
- .await;
+ this.overwrite_with_chunks_internal(buffer, parse_rx, output_events_tx, cx)
+ .await?;
this.project
.update(cx, |project, cx| project.set_agent_location(None, cx))?;
- output
+ parse_task.await
});
(task, output_events_rx)
}
@@ -140,10 +142,10 @@ impl EditAgent {
async fn overwrite_with_chunks_internal(
&self,
buffer: Entity<Buffer>,
- edit_chunks: impl 'static + Send + Stream<Item = Result<String, LanguageModelCompletionError>>,
+ mut parse_rx: UnboundedReceiver<Result<CreateFileParserEvent>>,
output_events_tx: mpsc::UnboundedSender<EditAgentOutputEvent>,
cx: &mut AsyncApp,
- ) -> Result<EditAgentOutput> {
+ ) -> Result<()> {
cx.update(|cx| {
buffer.update(cx, |buffer, cx| buffer.set_text("", cx));
self.action_log.update(cx, |log, cx| {
@@ -163,34 +165,31 @@ impl EditAgent {
.ok();
})?;
- let mut raw_edits = String::new();
- pin_mut!(edit_chunks);
- while let Some(chunk) = edit_chunks.next().await {
- let chunk = chunk?;
- raw_edits.push_str(&chunk);
- cx.update(|cx| {
- buffer.update(cx, |buffer, cx| buffer.append(chunk, cx));
- self.action_log
- .update(cx, |log, cx| log.buffer_edited(buffer.clone(), cx));
- self.project.update(cx, |project, cx| {
- project.set_agent_location(
- Some(AgentLocation {
- buffer: buffer.downgrade(),
- position: language::Anchor::MAX,
- }),
- cx,
- )
- });
- })?;
- output_events_tx
- .unbounded_send(EditAgentOutputEvent::Edited)
- .ok();
+ while let Some(event) = parse_rx.next().await {
+ match event? {
+ CreateFileParserEvent::NewTextChunk { chunk } => {
+ cx.update(|cx| {
+ buffer.update(cx, |buffer, cx| buffer.append(chunk, cx));
+ self.action_log
+ .update(cx, |log, cx| log.buffer_edited(buffer.clone(), cx));
+ self.project.update(cx, |project, cx| {
+ project.set_agent_location(
+ Some(AgentLocation {
+ buffer: buffer.downgrade(),
+ position: language::Anchor::MAX,
+ }),
+ cx,
+ )
+ });
+ })?;
+ output_events_tx
+ .unbounded_send(EditAgentOutputEvent::Edited)
+ .ok();
+ }
+ }
}
- Ok(EditAgentOutput {
- raw_edits,
- parser_metrics: EditParserMetrics::default(),
- })
+ Ok(())
}
pub fn edit(
@@ -435,6 +434,44 @@ impl EditAgent {
(output, rx)
}
+ fn parse_create_file_chunks(
+ chunks: impl 'static + Send + Stream<Item = Result<String, LanguageModelCompletionError>>,
+ cx: &mut AsyncApp,
+ ) -> (
+ Task<Result<EditAgentOutput>>,
+ UnboundedReceiver<Result<CreateFileParserEvent>>,
+ ) {
+ let (tx, rx) = mpsc::unbounded();
+ let output = cx.background_spawn(async move {
+ pin_mut!(chunks);
+
+ let mut parser = CreateFileParser::new();
+ let mut raw_edits = String::new();
+ while let Some(chunk) = chunks.next().await {
+ match chunk {
+ Ok(chunk) => {
+ raw_edits.push_str(&chunk);
+ for event in parser.push(Some(&chunk)) {
+ tx.unbounded_send(Ok(event))?;
+ }
+ }
+ Err(error) => {
+ tx.unbounded_send(Err(error.into()))?;
+ }
+ }
+ }
+ // Send final events with None to indicate completion
+ for event in parser.push(None) {
+ tx.unbounded_send(Ok(event))?;
+ }
+ Ok(EditAgentOutput {
+ raw_edits,
+ parser_metrics: EditParserMetrics::default(),
+ })
+ });
+ (output, rx)
+ }
+
fn reindent_new_text_chunks(
delta: IndentDelta,
mut stream: impl Unpin + Stream<Item = Result<EditParserEvent>>,
@@ -1138,7 +1175,7 @@ mod tests {
})
);
- chunks_tx.unbounded_send("jkl\n").unwrap();
+ chunks_tx.unbounded_send("```\njkl\n").unwrap();
cx.run_until_parked();
assert_eq!(
drain_events(&mut events),
@@ -1146,7 +1183,7 @@ mod tests {
);
assert_eq!(
buffer.read_with(cx, |buffer, _| buffer.snapshot().text()),
- "jkl\n"
+ "jkl"
);
assert_eq!(
project.read_with(cx, |project, _| project.agent_location()),
@@ -1164,7 +1201,7 @@ mod tests {
);
assert_eq!(
buffer.read_with(cx, |buffer, _| buffer.snapshot().text()),
- "jkl\nmno\n"
+ "jkl\nmno"
);
assert_eq!(
project.read_with(cx, |project, _| project.agent_location()),
@@ -1174,7 +1211,7 @@ mod tests {
})
);
- chunks_tx.unbounded_send("pqr").unwrap();
+ chunks_tx.unbounded_send("pqr\n```").unwrap();
cx.run_until_parked();
assert_eq!(
drain_events(&mut events),
@@ -0,0 +1,218 @@
+use once_cell::sync::Lazy;
+use regex::Regex;
+use smallvec::SmallVec;
+use util::debug_panic;
+
+const START_MARKER: Lazy<Regex> = Lazy::new(|| Regex::new(r"\n?```\S*\n").unwrap());
+const END_MARKER: Lazy<Regex> = Lazy::new(|| Regex::new(r"\n```\s*$").unwrap());
+
+#[derive(Debug)]
+pub enum CreateFileParserEvent {
+ NewTextChunk { chunk: String },
+}
+
+#[derive(Debug)]
+pub struct CreateFileParser {
+ state: ParserState,
+ buffer: String,
+}
+
+#[derive(Debug, PartialEq)]
+enum ParserState {
+ Pending,
+ WithinText,
+ Finishing,
+ Finished,
+}
+
+impl CreateFileParser {
+ pub fn new() -> Self {
+ CreateFileParser {
+ state: ParserState::Pending,
+ buffer: String::new(),
+ }
+ }
+
+ pub fn push(&mut self, chunk: Option<&str>) -> SmallVec<[CreateFileParserEvent; 1]> {
+ if chunk.is_none() {
+ self.state = ParserState::Finishing;
+ }
+
+ let chunk = chunk.unwrap_or_default();
+
+ self.buffer.push_str(chunk);
+
+ let mut edit_events = SmallVec::new();
+ loop {
+ match &mut self.state {
+ ParserState::Pending => {
+ if let Some(m) = START_MARKER.find(&self.buffer) {
+ self.buffer.drain(..m.end());
+ self.state = ParserState::WithinText;
+ } else {
+ break;
+ }
+ }
+ ParserState::WithinText => {
+ let text = self.buffer.trim_end_matches(&['`', '\n', ' ']);
+ let text_len = text.len();
+
+ if text_len > 0 {
+ edit_events.push(CreateFileParserEvent::NewTextChunk {
+ chunk: self.buffer.drain(..text_len).collect(),
+ });
+ }
+ break;
+ }
+ ParserState::Finishing => {
+ if let Some(m) = END_MARKER.find(&self.buffer) {
+ self.buffer.drain(m.start()..);
+ }
+ if !self.buffer.is_empty() {
+ if !self.buffer.ends_with('\n') {
+ self.buffer.push('\n');
+ }
+ edit_events.push(CreateFileParserEvent::NewTextChunk {
+ chunk: self.buffer.drain(..).collect(),
+ });
+ }
+ self.state = ParserState::Finished;
+ break;
+ }
+ ParserState::Finished => debug_panic!("Can't call parser after finishing"),
+ }
+ }
+ edit_events
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use indoc::indoc;
+ use rand::prelude::*;
+ use std::cmp;
+
+ #[gpui::test(iterations = 100)]
+ fn test_happy_path(mut rng: StdRng) {
+ let mut parser = CreateFileParser::new();
+ assert_eq!(
+ parse_random_chunks("```\nHello world\n```", &mut parser, &mut rng),
+ "Hello world".to_string()
+ );
+ }
+
+ #[gpui::test(iterations = 100)]
+ fn test_cut_prefix(mut rng: StdRng) {
+ let mut parser = CreateFileParser::new();
+ assert_eq!(
+ parse_random_chunks(
+ indoc! {"
+ Let me write this file for you:
+
+ ```
+ Hello world
+ ```
+
+ "},
+ &mut parser,
+ &mut rng
+ ),
+ "Hello world".to_string()
+ );
+ }
+
+ #[gpui::test(iterations = 100)]
+ fn test_language_name_on_fences(mut rng: StdRng) {
+ let mut parser = CreateFileParser::new();
+ assert_eq!(
+ parse_random_chunks(
+ indoc! {"
+ ```rust
+ Hello world
+ ```
+
+ "},
+ &mut parser,
+ &mut rng
+ ),
+ "Hello world".to_string()
+ );
+ }
+
+ #[gpui::test(iterations = 100)]
+ fn test_leave_suffix(mut rng: StdRng) {
+ let mut parser = CreateFileParser::new();
+ assert_eq!(
+ parse_random_chunks(
+ indoc! {"
+ Let me write this file for you:
+
+ ```
+ Hello world
+ ```
+
+ The end
+ "},
+ &mut parser,
+ &mut rng
+ ),
+ // This output is marlformed, so we're doing our best effort
+ "Hello world\n```\n\nThe end\n".to_string()
+ );
+ }
+
+ #[gpui::test(iterations = 100)]
+ fn test_inner_fences(mut rng: StdRng) {
+ let mut parser = CreateFileParser::new();
+ assert_eq!(
+ parse_random_chunks(
+ indoc! {"
+ Let me write this file for you:
+
+ ```
+ ```
+ Hello world
+ ```
+ ```
+ "},
+ &mut parser,
+ &mut rng
+ ),
+ // This output is marlformed, so we're doing our best effort
+ "```\nHello world\n```\n".to_string()
+ );
+ }
+
+ fn parse_random_chunks(input: &str, parser: &mut CreateFileParser, rng: &mut StdRng) -> String {
+ let chunk_count = rng.gen_range(1..=cmp::min(input.len(), 50));
+ let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
+ chunk_indices.sort();
+ chunk_indices.push(input.len());
+
+ let chunk_indices = chunk_indices
+ .into_iter()
+ .map(Some)
+ .chain(vec![None])
+ .collect::<Vec<Option<usize>>>();
+
+ let mut edit = String::default();
+ let mut last_ix = 0;
+ for chunk_ix in chunk_indices {
+ let mut chunk = None;
+ if let Some(chunk_ix) = chunk_ix {
+ chunk = Some(&input[last_ix..chunk_ix]);
+ last_ix = chunk_ix;
+ }
+
+ for event in parser.push(chunk) {
+ match event {
+ CreateFileParserEvent::NewTextChunk { chunk } => {
+ edit.push_str(&chunk);
+ }
+ }
+ }
+ }
+ edit
+ }
+}
@@ -163,6 +163,15 @@ fn eval_delete_run_git_blame() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_translate_doc_comments() {
+ // Results for 2025-05-22
+ //
+ // Model | Pass rate
+ // ============================================
+ //
+ // claude-3.7-sonnet |
+ // gemini-2.5-pro-preview-03-25 | 1.0
+ // gemini-2.5-flash-preview-04-17 |
+ // gpt-4.1 |
let input_file_path = "root/canvas.rs";
let input_file_content = include_str!("evals/fixtures/translate_doc_comments/before.rs");
let edit_description = "Translate all doc comments to Italian";
@@ -216,6 +225,15 @@ fn eval_translate_doc_comments() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
+ // Results for 2025-05-22
+ //
+ // Model | Pass rate
+ // ============================================
+ //
+ // claude-3.7-sonnet | 0.98
+ // gemini-2.5-pro-preview-03-25 | 0.99
+ // gemini-2.5-flash-preview-04-17 |
+ // gpt-4.1 |
let input_file_path = "root/lib.rs";
let input_file_content =
include_str!("evals/fixtures/use_wasi_sdk_in_compile_parser_to_wasm/before.rs");
@@ -332,6 +350,15 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_disable_cursor_blinking() {
+ // Results for 2025-05-22
+ //
+ // Model | Pass rate
+ // ============================================
+ //
+ // claude-3.7-sonnet |
+ // gemini-2.5-pro-preview-03-25 | 1.0
+ // gemini-2.5-flash-preview-04-17 |
+ // gpt-4.1 |
let input_file_path = "root/editor.rs";
let input_file_content = include_str!("evals/fixtures/disable_cursor_blinking/before.rs");
let edit_description = "Comment out the call to `BlinkManager::enable`";
@@ -406,6 +433,15 @@ fn eval_disable_cursor_blinking() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_from_pixels_constructor() {
+ // Results for 2025-05-22
+ //
+ // Model | Pass rate
+ // ============================================
+ //
+ // claude-3.7-sonnet |
+ // gemini-2.5-pro-preview-03-25 | 0.94
+ // gemini-2.5-flash-preview-04-17 |
+ // gpt-4.1 |
let input_file_path = "root/canvas.rs";
let input_file_content = include_str!("evals/fixtures/from_pixels_constructor/before.rs");
let edit_description = "Implement from_pixels constructor and add tests.";
@@ -597,11 +633,20 @@ fn eval_from_pixels_constructor() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_zode() {
+ // Results for 2025-05-22
+ //
+ // Model | Pass rate
+ // ============================================
+ //
+ // claude-3.7-sonnet | 1.0
+ // gemini-2.5-pro-preview-03-25 | 1.0
+ // gemini-2.5-flash-preview-04-17 | 1.0
+ // gpt-4.1 | 1.0
let input_file_path = "root/zode.py";
let input_content = None;
let edit_description = "Create the main Zode CLI script";
eval(
- 200,
+ 50,
1.,
EvalInput::from_conversation(
vec![
@@ -694,6 +739,15 @@ fn eval_zode() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_add_overwrite_test() {
+ // Results for 2025-05-22
+ //
+ // Model | Pass rate
+ // ============================================
+ //
+ // claude-3.7-sonnet | 0.16
+ // gemini-2.5-pro-preview-03-25 | 0.35
+ // gemini-2.5-flash-preview-04-17 |
+ // gpt-4.1 |
let input_file_path = "root/action_log.rs";
let input_file_content = include_str!("evals/fixtures/add_overwrite_test/before.rs");
let edit_description = "Add a new test for overwriting a file in action_log.rs";
@@ -920,14 +974,11 @@ fn eval_create_empty_file() {
// thoughts into it. This issue is not specific to empty files, but
// it's easier to reproduce with them.
//
+ // Results for 2025-05-21:
//
// Model | Pass rate
// ============================================
//
- // --------------------------------------------
- // Prompt version: 2025-05-21
- // --------------------------------------------
- //
// claude-3.7-sonnet | 1.00
// gemini-2.5-pro-preview-03-25 | 1.00
// gemini-2.5-flash-preview-04-17 | 1.00
@@ -1430,7 +1481,7 @@ impl EditAgentTest {
model.provider_id() == selected_model.provider
&& model.id() == selected_model.model
})
- .unwrap();
+ .expect("Model not found");
let provider = models.provider(&model.provider_id()).unwrap();
(provider, model)
})?;
@@ -1,8 +1,10 @@
You are an expert engineer and your task is to write a new file from scratch.
-You MUST respond directly with the file's content, without explanations, additional text or triple backticks.
+You MUST respond with the file's content wrapped in triple backticks (```).
+The backticks should be on their own line.
The text you output will be saved verbatim as the content of the file.
-Tool calls have been disabled. You MUST start your response directly with the file's new content.
+Tool calls have been disabled.
+Start your response with ```.
<file_path>
{{path}}
@@ -43,7 +43,8 @@ NEW TEXT 3 HERE
- Always close all tags properly
-{{!-- This example is important for Gemini 2.5 --}}
+{{!-- The following example adds almost 10% pass rate for Gemini 2.5.
+Claude and gpt-4.1 don't really need it. --}}
<example>
<edits>