Detailed changes
@@ -49,7 +49,7 @@ pub use crate::context::{ContextLoadResult, LoadedContext};
pub use crate::inline_assistant::InlineAssistant;
use crate::slash_command_settings::SlashCommandSettings;
pub use crate::thread::{Message, MessageSegment, Thread, ThreadEvent};
-pub use crate::thread_store::{TextThreadStore, ThreadStore};
+pub use crate::thread_store::{SerializedThread, TextThreadStore, ThreadStore};
pub use agent_diff::{AgentDiffPane, AgentDiffToolbar};
pub use context_store::ContextStore;
pub use ui::preview::{all_agent_previews, get_agent_preview};
@@ -458,7 +458,7 @@ impl Thread {
tools: Entity<ToolWorkingSet>,
prompt_builder: Arc<PromptBuilder>,
project_context: SharedProjectContext,
- window: &mut Window,
+ window: Option<&mut Window>, // None in headless mode
cx: &mut Context<Self>,
) -> Self {
let next_message_id = MessageId(
@@ -386,6 +386,25 @@ impl ThreadStore {
})
}
+ pub fn create_thread_from_serialized(
+ &mut self,
+ serialized: SerializedThread,
+ cx: &mut Context<Self>,
+ ) -> Entity<Thread> {
+ cx.new(|cx| {
+ Thread::deserialize(
+ ThreadId::new(),
+ serialized,
+ self.project.clone(),
+ self.tools.clone(),
+ self.prompt_builder.clone(),
+ self.project_context.clone(),
+ None,
+ cx,
+ )
+ })
+ }
+
pub fn open_thread(
&self,
id: &ThreadId,
@@ -411,7 +430,7 @@ impl ThreadStore {
this.tools.clone(),
this.prompt_builder.clone(),
this.project_context.clone(),
- window,
+ Some(window),
cx,
)
})
@@ -54,15 +54,19 @@ impl ToolUseState {
/// Constructs a [`ToolUseState`] from the given list of [`SerializedMessage`]s.
///
/// Accepts a function to filter the tools that should be used to populate the state.
+ ///
+ /// If `window` is `None` (e.g., when in headless mode or when running evals),
+ /// tool cards won't be deserialized
pub fn from_serialized_messages(
tools: Entity<ToolWorkingSet>,
messages: &[SerializedMessage],
project: Entity<Project>,
- window: &mut Window,
+ window: Option<&mut Window>, // None in headless mode
cx: &mut App,
) -> Self {
let mut this = Self::new(tools);
let mut tool_names_by_id = HashMap::default();
+ let mut window = window;
for message in messages {
match message.role {
@@ -107,12 +111,17 @@ impl ToolUseState {
},
);
- if let Some(tool) = this.tools.read(cx).tool(tool_use, cx) {
- if let Some(output) = tool_result.output.clone() {
- if let Some(card) =
- tool.deserialize_card(output, project.clone(), window, cx)
- {
- this.tool_result_cards.insert(tool_use_id, card);
+ if let Some(window) = &mut window {
+ if let Some(tool) = this.tools.read(cx).tool(tool_use, cx) {
+ if let Some(output) = tool_result.output.clone() {
+ if let Some(card) = tool.deserialize_card(
+ output,
+ project.clone(),
+ window,
+ cx,
+ ) {
+ this.tool_result_cards.insert(tool_use_id, card);
+ }
}
}
}
@@ -42,7 +42,7 @@ use crate::list_directory_tool::ListDirectoryTool;
use crate::now_tool::NowTool;
use crate::thinking_tool::ThinkingTool;
-pub use edit_file_tool::EditFileToolInput;
+pub use edit_file_tool::{EditFileMode, EditFileToolInput};
pub use find_path_tool::FindPathToolInput;
pub use open_tool::OpenTool;
pub use read_file_tool::{ReadFileTool, ReadFileToolInput};
@@ -1,5 +1,9 @@
use super::*;
-use crate::{ReadFileToolInput, edit_file_tool::EditFileToolInput, grep_tool::GrepToolInput};
+use crate::{
+ ReadFileToolInput,
+ edit_file_tool::{EditFileMode, EditFileToolInput},
+ grep_tool::GrepToolInput,
+};
use Role::*;
use anyhow::anyhow;
use assistant_tool::ToolRegistry;
@@ -71,7 +75,7 @@ fn eval_extract_handle_command_output() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
- create_or_overwrite: false,
+ mode: EditFileMode::Edit,
},
)],
),
@@ -127,7 +131,7 @@ fn eval_delete_run_git_blame() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
- create_or_overwrite: false,
+ mode: EditFileMode::Edit,
},
)],
),
@@ -182,7 +186,7 @@ fn eval_translate_doc_comments() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
- create_or_overwrite: false,
+ mode: EditFileMode::Edit,
},
)],
),
@@ -297,7 +301,7 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
- create_or_overwrite: false,
+ mode: EditFileMode::Edit,
},
)],
),
@@ -372,7 +376,7 @@ fn eval_disable_cursor_blinking() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
- create_or_overwrite: false,
+ mode: EditFileMode::Edit,
},
)],
),
@@ -566,7 +570,7 @@ fn eval_from_pixels_constructor() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
- create_or_overwrite: false,
+ mode: EditFileMode::Edit,
},
)],
),
@@ -643,7 +647,7 @@ fn eval_zode() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
- create_or_overwrite: true,
+ mode: EditFileMode::Create,
},
),
],
@@ -888,7 +892,7 @@ fn eval_add_overwrite_test() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
- create_or_overwrite: false,
+ mode: EditFileMode::Edit,
},
),
],
@@ -76,12 +76,22 @@ pub struct EditFileToolInput {
/// </example>
pub path: PathBuf,
- /// If true, this tool will recreate the file from scratch.
- /// If false, this tool will produce granular edits to an existing file.
+ /// The mode of operation on the file. Possible values:
+ /// - 'edit': Make granular edits to an existing file.
+ /// - 'create': Create a new file if it doesn't exist.
+ /// - 'overwrite': Replace the entire contents of an existing file.
///
- /// When a file already exists or you just created it, always prefer editing
+ /// When a file already exists or you just created it, prefer editing
/// it as opposed to recreating it from scratch.
- pub create_or_overwrite: bool,
+ pub mode: EditFileMode,
+}
+
+#[derive(Debug, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum EditFileMode {
+ Edit,
+ Create,
+ Overwrite,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
@@ -195,7 +205,11 @@ impl Tool for EditFileTool {
.as_ref()
.map_or(false, |file| file.disk_state().exists())
})?;
- if !input.create_or_overwrite && !exists {
+ let create_or_overwrite = match input.mode {
+ EditFileMode::Create | EditFileMode::Overwrite => true,
+ _ => false,
+ };
+ if !create_or_overwrite && !exists {
return Err(anyhow!("{} not found", input.path.display()));
}
@@ -207,7 +221,7 @@ impl Tool for EditFileTool {
})
.await;
- let (output, mut events) = if input.create_or_overwrite {
+ let (output, mut events) = if create_or_overwrite {
edit_agent.overwrite(
buffer.clone(),
input.display_description.clone(),
@@ -876,7 +890,7 @@ mod tests {
let input = serde_json::to_value(EditFileToolInput {
display_description: "Some edit".into(),
path: "root/nonexistent_file.txt".into(),
- create_or_overwrite: false,
+ mode: EditFileMode::Edit,
})
.unwrap();
Arc::new(EditFileTool)
@@ -711,9 +711,9 @@ fn print_report(
.values()
.flat_map(|results| {
results.iter().map(|(example, _)| {
- let absolute_path = example.run_directory.join("last.messages.json");
- pathdiff::diff_paths(&absolute_path, run_dir)
- .unwrap_or_else(|| absolute_path.clone())
+ let absolute_path = run_dir.join(example.run_directory.join("last.messages.json"));
+ let cwd = std::env::current_dir().expect("Can't get current dir");
+ pathdiff::diff_paths(&absolute_path, cwd).unwrap_or_else(|| absolute_path.clone())
})
})
.collect::<Vec<_>>();
@@ -48,6 +48,7 @@ pub struct ExampleMetadata {
pub language_server: Option<LanguageServer>,
pub max_assertions: Option<usize>,
pub profile_id: AgentProfileId,
+ pub existing_thread_json: Option<String>,
}
#[derive(Clone, Debug)]
@@ -477,12 +478,16 @@ impl Response {
tool_name: &'static str,
cx: &mut ExampleContext,
) -> Result<&ToolUse> {
- let result = self.messages.iter().find_map(|msg| {
+ let result = self.find_tool_call(tool_name);
+ cx.assert_some(result, format!("called `{}`", tool_name))
+ }
+
+ pub fn find_tool_call(&self, tool_name: &str) -> Option<&ToolUse> {
+ self.messages.iter().rev().find_map(|msg| {
msg.tool_use
.iter()
.find(|tool_use| tool_use.name == tool_name)
- });
- cx.assert_some(result, format!("called `{}`", tool_name))
+ })
}
#[allow(dead_code)]
@@ -21,6 +21,7 @@ impl Example for AddArgToTraitMethod {
}),
max_assertions: None,
profile_id: AgentProfileId::default(),
+ existing_thread_json: None,
}
}
@@ -22,6 +22,7 @@ impl Example for CodeBlockCitations {
}),
max_assertions: None,
profile_id: AgentProfileId::default(),
+ existing_thread_json: None,
}
}
@@ -1,7 +1,7 @@
use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
use anyhow::Result;
use assistant_settings::AgentProfileId;
-use assistant_tools::EditFileToolInput;
+use assistant_tools::{EditFileMode, EditFileToolInput};
use async_trait::async_trait;
pub struct CommentTranslation;
@@ -16,6 +16,7 @@ impl Example for CommentTranslation {
language_server: None,
max_assertions: Some(1),
profile_id: AgentProfileId::default(),
+ existing_thread_json: None,
}
}
@@ -35,7 +36,7 @@ impl Example for CommentTranslation {
for tool_use in thread.tool_uses_for_message(message.id, cx) {
if tool_use.name == "edit_file" {
let input: EditFileToolInput = serde_json::from_value(tool_use.input)?;
- if input.create_or_overwrite {
+ if !matches!(input.mode, EditFileMode::Edit) {
create_or_overwrite_count += 1;
}
}
@@ -18,6 +18,7 @@ impl Example for FileSearchExample {
language_server: None,
max_assertions: Some(3),
profile_id: AgentProfileId::default(),
+ existing_thread_json: None,
}
}
@@ -16,6 +16,7 @@ mod add_arg_to_trait_method;
mod code_block_citations;
mod comment_translation;
mod file_search;
+mod overwrite_file;
mod planets;
pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
@@ -25,6 +26,7 @@ pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
Rc::new(code_block_citations::CodeBlockCitations),
Rc::new(planets::Planets),
Rc::new(comment_translation::CommentTranslation),
+ Rc::new(overwrite_file::FileOverwriteExample),
];
for example_path in list_declarative_examples(examples_dir).unwrap() {
@@ -45,6 +47,7 @@ impl DeclarativeExample {
pub fn load(example_path: &Path) -> Result<Self> {
let name = Self::name_from_path(example_path);
let base: ExampleToml = toml::from_str(&fs::read_to_string(&example_path)?)?;
+ let example_dir = example_path.parent().unwrap();
let language_server = if base.require_lsp {
Some(crate::example::LanguageServer {
@@ -63,6 +66,14 @@ impl DeclarativeExample {
AgentProfileId::default()
};
+ let existing_thread_json = if let Some(path) = base.existing_thread_path {
+ let content = fs::read_to_string(example_dir.join(&path))
+ .unwrap_or_else(|_| panic!("Failed to read existing thread file: {}", path));
+ Some(content)
+ } else {
+ None
+ };
+
let metadata = ExampleMetadata {
name,
url: base.url,
@@ -70,6 +81,7 @@ impl DeclarativeExample {
language_server,
max_assertions: None,
profile_id,
+ existing_thread_json,
};
Ok(DeclarativeExample {
@@ -110,6 +122,8 @@ pub struct ExampleToml {
pub diff_assertions: BTreeMap<String, String>,
#[serde(default)]
pub thread_assertions: BTreeMap<String, String>,
+ #[serde(default)]
+ pub existing_thread_path: Option<String>,
}
#[async_trait(?Send)]
@@ -0,0 +1,49 @@
+use anyhow::Result;
+use assistant_settings::AgentProfileId;
+use assistant_tools::{EditFileMode, EditFileToolInput};
+use async_trait::async_trait;
+
+use crate::example::{Example, ExampleContext, ExampleMetadata};
+
+pub struct FileOverwriteExample;
+
+/*
+This eval tests a fix for a destructive behavior of the `edit_file` tool.
+Previously, it would rewrite existing files too aggressively, which often
+resulted in content loss.
+
+Pass rate before the fix: 10%
+Pass rate after the fix: 100%
+*/
+
+#[async_trait(?Send)]
+impl Example for FileOverwriteExample {
+ fn meta(&self) -> ExampleMetadata {
+ let thread_json = include_str!("threads/overwrite-file.json");
+
+ ExampleMetadata {
+ name: "file_overwrite".to_string(),
+ url: "https://github.com/zed-industries/zed.git".to_string(),
+ revision: "023a60806a8cc82e73bd8d88e63b4b07fc7a0040".to_string(),
+ language_server: None,
+ max_assertions: Some(1),
+ profile_id: AgentProfileId::default(),
+ existing_thread_json: Some(thread_json.to_string()),
+ }
+ }
+
+ async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
+ let response = cx.run_turns(1).await?;
+ let file_overwritten = if let Some(tool_use) = response.find_tool_call("edit_file") {
+ let input = tool_use.parse_input::<EditFileToolInput>()?;
+ match input.mode {
+ EditFileMode::Edit => false,
+ EditFileMode::Create | EditFileMode::Overwrite => true,
+ }
+ } else {
+ false
+ };
+
+ cx.assert(!file_overwritten, "File should be edited, not overwritten")
+ }
+}
@@ -18,6 +18,7 @@ impl Example for Planets {
language_server: None,
max_assertions: None,
profile_id: AgentProfileId::default(),
+ existing_thread_json: None,
}
}
@@ -0,0 +1,262 @@
+{
+ "completion_mode": "normal",
+ "cumulative_token_usage": {
+ "cache_creation_input_tokens": 18383,
+ "cache_read_input_tokens": 97250,
+ "input_tokens": 45,
+ "output_tokens": 776
+ },
+ "detailed_summary_state": "NotGenerated",
+ "exceeded_window_error": null,
+ "initial_project_snapshot": {
+ "timestamp": "2025-05-08T14:31:16.701157512Z",
+ "unsaved_buffer_paths": [],
+ "worktree_snapshots": [
+ {
+ "git_state": {
+ "current_branch": null,
+ "diff": "diff --git a/crates/language_model_selector/src/language_model_selector.rs b/crates/language_model_selector/src/language_model_selector.rs\nindex 6775bee98a..e25c9e1415 100644\n--- a/crates/language_model_selector/src/language_model_selector.rs\n+++ b/crates/language_model_selector/src/language_model_selector.rs\n@@ -410,7 +410,8 @@ impl ModelMatcher {\n }\n \n pub fn is_match(self: &Self, info: &ModelInfo) -> bool {\n- self.matched_ids.contains(&info.model.id().0)\n+ let q = (info.model.provider_id(), info.model.id());\n+ self.matched_models.contains(&q)\n }\n }\n \n",
+ "head_sha": "9245656485e58a5d6d717d82209bc8c57cb9c539",
+ "remote_url": "git@github.com:zed-industries/zed.git"
+ },
+ "worktree_path": "/home/silver/develop/zed"
+ }
+ ]
+ },
+ "messages": [
+ {
@@ -1,4 +1,4 @@
-use agent::{Message, MessageSegment, ThreadStore};
+use agent::{Message, MessageSegment, SerializedThread, ThreadStore};
use anyhow::{Context, Result, anyhow, bail};
use assistant_tool::ToolWorkingSet;
use client::proto::LspWorkProgress;
@@ -312,7 +312,14 @@ impl ExampleInstance {
thread_store.update(cx, |thread_store, cx| thread_store.load_profile_by_id(profile_id, cx)).expect("Failed to load profile");
let thread =
- thread_store.update(cx, |thread_store, cx| thread_store.create_thread(cx))?;
+ thread_store.update(cx, |thread_store, cx| {
+ if let Some(json) = &meta.existing_thread_json {
+ let serialized = SerializedThread::from_json(json.as_bytes()).expect("Can't read serialized thread");
+ thread_store.create_thread_from_serialized(serialized, cx)
+ } else {
+ thread_store.create_thread(cx)
+ }
+ })?;
thread.update(cx, |thread, _cx| {