overwrite_file.rs

 1use agent::{EditFileMode, EditFileToolInput};
 2use agent_settings::AgentProfileId;
 3use anyhow::Result;
 4use async_trait::async_trait;
 5
 6use crate::example::{Example, ExampleContext, ExampleMetadata};
 7
 8pub struct FileOverwriteExample;
 9
10/*
11This eval tests a fix for a destructive behavior of the `edit_file` tool.
12Previously, it would rewrite existing files too aggressively, which often
13resulted in content loss.
14
15Model           | Pass rate
16----------------|----------
17Sonnet 3.7      | 100%
18Gemini 2.5 Pro  |  80%
19*/
20
21#[async_trait(?Send)]
22impl Example for FileOverwriteExample {
23    fn meta(&self) -> ExampleMetadata {
24        let thread_json = include_str!("threads/overwrite-file.json");
25
26        ExampleMetadata {
27            name: "file_overwrite".to_string(),
28            url: "https://github.com/zed-industries/zed.git".to_string(),
29            revision: "023a60806a8cc82e73bd8d88e63b4b07fc7a0040".to_string(),
30            language_server: None,
31            max_assertions: Some(1),
32            profile_id: AgentProfileId::default(),
33            existing_thread_json: Some(thread_json.to_string()),
34            max_turns: None,
35        }
36    }
37
38    async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
39        let response = cx.proceed_with_max_turns(1).await?;
40        let tool_use = response.expect_tool_call("edit_file", cx)?;
41        let input = tool_use.parse_input::<EditFileToolInput>()?;
42        let file_overwritten = match input.mode {
43            EditFileMode::Edit => false,
44            EditFileMode::Create | EditFileMode::Overwrite => {
45                input.path.ends_with("src/language_model_selector.rs")
46            }
47        };
48
49        cx.assert(!file_overwritten, "File should be edited, not overwritten")
50    }
51}