Encourage editing over re-creating a file from scratch (#29870)

Antonio Scandurra created

I also introduced a new eval to prove the encouragement actually makes a
difference.

Release Notes:

- Improved agent behavior when streaming edits, encouraging it to
editing files as opposed to creating them from scratch

Change summary

crates/assistant_tools/src/assistant_tools.rs          |  1 
crates/assistant_tools/src/streaming_edit_file_tool.rs |  3 
crates/eval/src/examples/comment_translation.rs        | 61 ++++++++++++
crates/eval/src/examples/mod.rs                        |  2 
4 files changed, 67 insertions(+)

Detailed changes

crates/assistant_tools/src/assistant_tools.rs 🔗

@@ -54,6 +54,7 @@ pub use edit_file_tool::{EditFileTool, EditFileToolInput};
 pub use find_path_tool::FindPathToolInput;
 pub use open_tool::OpenTool;
 pub use read_file_tool::ReadFileToolInput;
+pub use streaming_edit_file_tool::StreamingEditFileToolInput;
 pub use terminal_tool::TerminalTool;
 
 pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {

crates/assistant_tools/src/streaming_edit_file_tool.rs 🔗

@@ -61,6 +61,9 @@ pub struct StreamingEditFileToolInput {
 
     /// If true, this tool will recreate the file from scratch.
     /// If false, this tool will produce granular edits to an existing file.
+    ///
+    /// When a file already exists or you just created it, always prefer editing
+    /// it as opposed to recreating it from scratch.
     pub create_or_overwrite: bool,
 }
 

crates/eval/src/examples/comment_translation.rs 🔗

@@ -0,0 +1,61 @@
+use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
+use anyhow::Result;
+use assistant_tools::StreamingEditFileToolInput;
+use async_trait::async_trait;
+
+pub struct CommentTranslation;
+
+#[async_trait(?Send)]
+impl Example for CommentTranslation {
+    fn meta(&self) -> ExampleMetadata {
+        ExampleMetadata {
+            name: "comment_translation".to_string(),
+            url: "https://github.com/servo/font-kit.git".to_string(),
+            revision: "504d084e29bce4f60614bc702e91af7f7d9e60ad".to_string(),
+            language_server: None,
+            max_assertions: Some(1),
+        }
+    }
+
+    async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
+        cx.push_user_message(r#"
+            Edit the following files and translate all their comments to italian, in this exact order:
+
+            - font-kit/src/family.rs
+            - font-kit/src/canvas.rs
+            - font-kit/src/error.rs
+        "#);
+        cx.run_to_end().await?;
+
+        let mut create_or_overwrite_count = 0;
+        cx.agent_thread().read_with(cx, |thread, cx| {
+            for message in thread.messages() {
+                for tool_use in thread.tool_uses_for_message(message.id, cx) {
+                    if tool_use.name == "edit_file" {
+                        let input: StreamingEditFileToolInput =
+                            serde_json::from_value(tool_use.input)?;
+                        if input.create_or_overwrite {
+                            create_or_overwrite_count += 1;
+                        }
+                    }
+                }
+            }
+
+            anyhow::Ok(())
+        })??;
+        cx.assert_eq(create_or_overwrite_count, 0, "no_creation_or_overwrite")?;
+
+        Ok(())
+    }
+
+    fn diff_assertions(&self) -> Vec<JudgeAssertion> {
+        vec![JudgeAssertion {
+            id: "comments_translated".to_string(),
+            description: concat!(
+                "- Only `family.rs`, `canvas.rs` and `error.rs` should have changed.\n",
+                "- Their doc comments should have been all translated to Italian."
+            )
+            .into(),
+        }]
+    }
+}

crates/eval/src/examples/mod.rs 🔗

@@ -13,6 +13,7 @@ use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
 
 mod add_arg_to_trait_method;
 mod code_block_citations;
+mod comment_translation;
 mod file_search;
 mod planets;
 
@@ -22,6 +23,7 @@ pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
         Rc::new(add_arg_to_trait_method::AddArgToTraitMethod),
         Rc::new(code_block_citations::CodeBlockCitations),
         Rc::new(planets::Planets),
+        Rc::new(comment_translation::CommentTranslation),
     ];
 
     for example_path in list_declarative_examples(examples_dir).unwrap() {