eval: Fix stalling on tool confirmation (#28786)

Agus Zubiaga and Oleksiy created 9 months ago

The `always_allow_tool_actions` setting would get overridden with the
default when we loaded each example project, leading to examples
stalling when they run a tool that needed confirmation. There's now a
separate `runner_settings.json` file where we can configure the
environment for the eval.

Release Notes:

- N/A

---------

Co-authored-by: Oleksiy <oleksiy@zed.dev>

Change summary

Cargo.lock                       |  1 -
crates/eval/Cargo.toml           |  1 -
crates/eval/runner_settings.json |  6 ++++++
crates/eval/src/eval.rs          | 14 +++++---------
crates/eval/src/example.rs       | 27 +++++++++++++++++++++++++--
5 files changed, 36 insertions(+), 13 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -4877,7 +4877,6 @@ version = "0.1.0"
 dependencies = [
  "agent",
  "anyhow",
- "assistant_settings",
  "assistant_tool",
  "assistant_tools",
  "async-watch",

crates/eval/Cargo.toml 🔗

@@ -8,7 +8,6 @@ edition.workspace = true
 agent.workspace = true
 anyhow.workspace = true
 async-watch.workspace = true
-assistant_settings.workspace = true
 assistant_tool.workspace = true
 assistant_tools.workspace = true
 chrono.workspace = true

crates/eval/runner_settings.json 🔗

@@ -0,0 +1,6 @@
+{
+  "assistant": {
+    "always_allow_tool_actions": true,
+    "version": "2"
+  }
+}

crates/eval/src/eval.rs 🔗

@@ -1,6 +1,5 @@
 mod example;
 
-use assistant_settings::AssistantSettings;
 use client::{Client, ProxySettings, UserStore};
 pub(crate) use example::*;
 
@@ -10,7 +9,7 @@ use clap::Parser;
 use extension::ExtensionHostProxy;
 use futures::future;
 use gpui::http_client::{Uri, read_proxy_from_env};
-use gpui::{App, AppContext, Application, AsyncApp, Entity, SemanticVersion, Task};
+use gpui::{App, AppContext, Application, AsyncApp, Entity, SemanticVersion, Task, UpdateGlobal};
 use gpui_tokio::Tokio;
 use language::LanguageRegistry;
 use language_model::{
@@ -390,13 +389,10 @@ pub fn init(cx: &mut App) -> Arc<AgentAppState> {
     let prompt_builder = PromptBuilder::load(fs.clone(), stdout_is_a_pty, cx);
     agent::init(fs.clone(), client.clone(), prompt_builder.clone(), cx);
 
-    AssistantSettings::override_global(
-        AssistantSettings {
-            always_allow_tool_actions: true,
-            ..AssistantSettings::get_global(cx).clone()
-        },
-        cx,
-    );
+    SettingsStore::update_global(cx, |store, cx| {
+        store.set_user_settings(include_str!("../runner_settings.json"), cx)
+    })
+    .unwrap();
 
     Arc::new(AgentAppState {
         languages,

crates/eval/src/example.rs 🔗

@@ -330,7 +330,11 @@ impl Example {
                                 Ok(StopReason::MaxTokens) => {
                                     return Err(anyhow!("Exceeded maximum tokens"));
                                 }
-                                Ok(StopReason::ToolUse) => {}
+                                Ok(StopReason::ToolUse) => {
+                                    if std::env::var("ZED_EVAL_DEBUG").is_ok() {
+                                        println!("{}StopReason: Tool use", log_prefix);
+                                    }
+                                }
                                 Err(error) => {
                                     return Err(anyhow!(error.clone()));
                                 }
@@ -371,7 +375,20 @@ impl Example {
                                     }
                                 })?;
                             }
-                            _ => {}
+                            ThreadEvent::ToolConfirmationNeeded => {
+                                panic!("{}Bug: Tool confirmation should not be required in eval", log_prefix);
+                            },
+                            ThreadEvent::StreamedCompletion |
+                            ThreadEvent::MessageAdded(_) |
+                            ThreadEvent::MessageEdited(_) |
+                            ThreadEvent::MessageDeleted(_) |
+                            ThreadEvent::SummaryChanged |
+                            ThreadEvent::SummaryGenerated |
+                            ThreadEvent::CheckpointChanged => {
+                                if std::env::var("ZED_EVAL_DEBUG").is_ok() {
+                                    println!("{}Event: {:#?}", log_prefix, event);
+                                }
+                            }
                         }
 
                         output_file.flush().log_err();
@@ -387,16 +404,22 @@ impl Example {
 
             event_handler_task.await?;
 
+            println!("{}Stopped", this.log_prefix);
+
             if let Some((_, lsp_store)) = lsp_open_handle_and_store.as_ref() {
                 wait_for_lang_server(lsp_store, this.log_prefix.clone(), cx).await?;
             }
 
+            println!("{}Getting repository diff", this.log_prefix);
             let repository_diff = this.repository_diff().await?;
+
+            println!("{}Getting diagnostics", this.log_prefix);
             let diagnostics = cx
                 .update(move |cx| {
                     cx.spawn(async move |cx| query_lsp_diagnostics(project, cx).await)
                 })?
                 .await?;
+            println!("{}Got diagnostics", this.log_prefix);
 
             drop(subscription);
             drop(lsp_open_handle_and_store);