overwrite_file.rs

 1use agent_settings::AgentProfileId;
 2use anyhow::Result;
 3use async_trait::async_trait;
 4
 5use crate::example::{Example, ExampleContext, ExampleMetadata};
 6
 7pub struct FileOverwriteExample;
 8
 9/*
10This eval tests a fix for a destructive behavior of the `edit_file` tool.
11Previously, it would rewrite existing files too aggressively, which often
12resulted in content loss.
13
14Model           | Pass rate
15----------------|----------
16Sonnet 3.7      | 100%
17Gemini 2.5 Pro  |  80%
18*/
19
20#[async_trait(?Send)]
21impl Example for FileOverwriteExample {
22    fn meta(&self) -> ExampleMetadata {
23        let thread_json = include_str!("threads/overwrite-file.json");
24
25        ExampleMetadata {
26            name: "file_overwrite".to_string(),
27            url: "https://github.com/zed-industries/zed.git".to_string(),
28            revision: "023a60806a8cc82e73bd8d88e63b4b07fc7a0040".to_string(),
29            language_server: None,
30            max_assertions: Some(1),
31            profile_id: AgentProfileId::default(),
32            existing_thread_json: Some(thread_json.to_string()),
33            max_turns: None,
34        }
35    }
36
37    async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
38        let response = cx.run_turns(1).await?;
39        let file_overwritten = if let Some(tool_use) = response.find_tool_call("edit_file") {
40            let input = tool_use.parse_input::<EditFileToolInput>()?;
41            match input.mode {
42                EditFileMode::Edit => false,
43                EditFileMode::Create | EditFileMode::Overwrite => {
44                    input.path.ends_with("src/language_model_selector.rs")
45                }
46            }
47        } else {
48            false
49        };
50
51        cx.assert(!file_overwritten, "File should be edited, not overwritten")
52    }
53}