1use agent_settings::AgentProfileId;
2use anyhow::Result;
3use async_trait::async_trait;
4
5use crate::example::{Example, ExampleContext, ExampleMetadata};
6
7pub struct FileOverwriteExample;
8
9/*
10This eval tests a fix for a destructive behavior of the `edit_file` tool.
11Previously, it would rewrite existing files too aggressively, which often
12resulted in content loss.
13
14Model | Pass rate
15----------------|----------
16Sonnet 3.7 | 100%
17Gemini 2.5 Pro | 80%
18*/
19
20#[async_trait(?Send)]
21impl Example for FileOverwriteExample {
22 fn meta(&self) -> ExampleMetadata {
23 let thread_json = include_str!("threads/overwrite-file.json");
24
25 ExampleMetadata {
26 name: "file_overwrite".to_string(),
27 url: "https://github.com/zed-industries/zed.git".to_string(),
28 revision: "023a60806a8cc82e73bd8d88e63b4b07fc7a0040".to_string(),
29 language_server: None,
30 max_assertions: Some(1),
31 profile_id: AgentProfileId::default(),
32 existing_thread_json: Some(thread_json.to_string()),
33 max_turns: None,
34 }
35 }
36
37 async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
38 let response = cx.run_turns(1).await?;
39 let file_overwritten = if let Some(tool_use) = response.find_tool_call("edit_file") {
40 let input = tool_use.parse_input::<EditFileToolInput>()?;
41 match input.mode {
42 EditFileMode::Edit => false,
43 EditFileMode::Create | EditFileMode::Overwrite => {
44 input.path.ends_with("src/language_model_selector.rs")
45 }
46 }
47 } else {
48 false
49 };
50
51 cx.assert(!file_overwritten, "File should be edited, not overwritten")
52 }
53}