Add eval for open_tool (#29801)

Richard Feldman created

Also have its description say it should only be used on request

Release Notes:

- N/A

Change summary

crates/assistant_tools/src/assistant_tools.rs       | 10 -
crates/assistant_tools/src/open_tool/description.md |  3 
crates/eval/src/eval.rs                             | 13 +-
crates/eval/src/examples/mod.rs                     |  2 
crates/eval/src/examples/planets.rs                 | 73 +++++++++++++++
5 files changed, 90 insertions(+), 11 deletions(-)

Detailed changes

crates/assistant_tools/src/assistant_tools.rs 🔗

@@ -48,27 +48,25 @@ use crate::code_action_tool::CodeActionTool;
 use crate::code_symbols_tool::CodeSymbolsTool;
 use crate::contents_tool::ContentsTool;
 use crate::create_directory_tool::CreateDirectoryTool;
-use crate::create_file_tool::CreateFileTool;
 use crate::delete_path_tool::DeletePathTool;
 use crate::diagnostics_tool::DiagnosticsTool;
-use crate::edit_file_tool::EditFileTool;
 use crate::fetch_tool::FetchTool;
 use crate::find_path_tool::FindPathTool;
 use crate::grep_tool::GrepTool;
 use crate::list_directory_tool::ListDirectoryTool;
 use crate::now_tool::NowTool;
-use crate::open_tool::OpenTool;
 use crate::read_file_tool::ReadFileTool;
 use crate::rename_tool::RenameTool;
 use crate::streaming_edit_file_tool::StreamingEditFileTool;
 use crate::symbol_info_tool::SymbolInfoTool;
-use crate::terminal_tool::TerminalTool;
 use crate::thinking_tool::ThinkingTool;
 
-pub use create_file_tool::CreateFileToolInput;
-pub use edit_file_tool::EditFileToolInput;
+pub use create_file_tool::{CreateFileTool, CreateFileToolInput};
+pub use edit_file_tool::{EditFileTool, EditFileToolInput};
 pub use find_path_tool::FindPathToolInput;
+pub use open_tool::OpenTool;
 pub use read_file_tool::ReadFileToolInput;
+pub use terminal_tool::TerminalTool;
 
 pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
     assistant_tool::init(cx);

crates/assistant_tools/src/open_tool/description.md 🔗

@@ -4,3 +4,6 @@ This tool opens a file or URL with the default application associated with it on
 - On Linux, it uses something like `xdg-open`, `gio open`, `gnome-open`, `kde-open`, `wslview` as appropriate
 
 For example, it can open a web browser with a URL, open a PDF file with the default PDF viewer, etc.
+
+You MUST ONLY use this tool when the user has explicitly requested opening something. You MUST NEVER assume that
+the user would like for you to use this tool.

crates/eval/src/eval.rs 🔗

@@ -169,11 +169,14 @@ fn main() {
                     continue;
                 }
 
-                if meta.language_server.map_or(false, |language| {
-                    !languages.contains(&language.file_extension)
-                }) {
-                    skipped.push(meta.name);
-                    continue;
+                if let Some(language) = meta.language_server {
+                    if !languages.contains(&language.file_extension) {
+                        panic!(
+                            "Eval for {:?} could not be run because no language server was found for extension {:?}",
+                            meta.name,
+                            language.file_extension
+                        );
+                    }
                 }
 
                 // TODO: This creates a worktree per repetition. Ideally these examples should

crates/eval/src/examples/mod.rs 🔗

@@ -14,12 +14,14 @@ use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
 mod add_arg_to_trait_method;
 mod code_block_citations;
 mod file_search;
+mod planets;
 
 pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
     let mut threads: Vec<Rc<dyn Example>> = vec![
         Rc::new(file_search::FileSearchExample),
         Rc::new(add_arg_to_trait_method::AddArgToTraitMethod),
         Rc::new(code_block_citations::CodeBlockCitations),
+        Rc::new(planets::Planets),
     ];
 
     for example_path in list_declarative_examples(examples_dir).unwrap() {

crates/eval/src/examples/planets.rs 🔗

@@ -0,0 +1,73 @@
+use anyhow::Result;
+use assistant_tool::Tool;
+use assistant_tools::{OpenTool, TerminalTool};
+use async_trait::async_trait;
+
+use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
+
+pub struct Planets;
+
+#[async_trait(?Send)]
+impl Example for Planets {
+    fn meta(&self) -> ExampleMetadata {
+        ExampleMetadata {
+            name: "planets".to_string(),
+            url: "https://github.com/roc-lang/roc".to_string(), // This commit in this repo is just the Apache2 license,
+            revision: "59e49c75214f60b4dc4a45092292061c8c26ce27".to_string(), // so effectively a blank project.
+            language_server: None,
+            max_assertions: None,
+        }
+    }
+
+    async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
+        cx.push_user_message(
+            r#"
+            Make a plain JavaScript web page which renders an animated 3D solar system.
+            Let me drag to rotate the camera around.
+            Do not use npm.
+            "#
+            .to_string(),
+        );
+
+        let response = cx.run_to_end().await?;
+        let mut open_tool_uses = 0;
+        let mut terminal_tool_uses = 0;
+
+        for tool_use in response.tool_uses() {
+            if tool_use.name == OpenTool.name() {
+                open_tool_uses += 1;
+            } else if tool_use.name == TerminalTool.name() {
+                terminal_tool_uses += 1;
+            }
+        }
+
+        // The open tool should only be used when requested, which it was not.
+        cx.assert_eq(open_tool_uses, 0, "`open` tool was not used")
+            .ok();
+        // No reason to use the terminal if not using npm.
+        cx.assert_eq(terminal_tool_uses, 0, "`terminal` tool was not used")
+            .ok();
+
+        Ok(())
+    }
+
+    fn diff_assertions(&self) -> Vec<JudgeAssertion> {
+        vec![
+            JudgeAssertion {
+                id: "animated solar system".to_string(),
+                description: "This page should render a solar system, and it should be animated."
+                    .to_string(),
+            },
+            JudgeAssertion {
+                id: "drag to rotate camera".to_string(),
+                description: "The user can drag to rotate the camera around.".to_string(),
+            },
+            JudgeAssertion {
+                id: "plain JavaScript".to_string(),
+                description:
+                    "The code base uses plain JavaScript and no npm, along with HTML and CSS."
+                        .to_string(),
+            },
+        ]
+    }
+}