Cargo.lock 🔗
@@ -5912,6 +5912,7 @@ dependencies = [
"clap",
"client",
"ctrlc",
+ "db",
"debug_adapter_extension",
"env_logger 0.11.8",
"extension",
Ben Brandt created
## Context
Fixes some issues I ran into when running this on a remote machine.
## Self-Review Checklist
<!-- Check before requesting review: -->
- [x] I've reviewed my own diff for quality, security, and reliability
- [x] Unsafe blocks (if any) have justifying comments
- [x] The content is consistent with the [UI/UX
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)
- [x] Tests cover the new/changed behavior
- [x] Performance impact has been considered and is acceptable
Release Notes:
- N/A
Cargo.lock | 1
crates/eval_cli/Cargo.toml | 1
crates/eval_cli/src/headless.rs | 4 +++
crates/eval_cli/src/main.rs | 33 +++++++++++++++++++++++----
crates/eval_cli/zed_eval/agent.py | 21 ++++++++++++++++-
crates/eval_cli/zed_eval/install.sh.j2 | 6 +++++
6 files changed, 59 insertions(+), 7 deletions(-)
@@ -5912,6 +5912,7 @@ dependencies = [
"clap",
"client",
"ctrlc",
+ "db",
"debug_adapter_extension",
"env_logger 0.11.8",
"extension",
@@ -21,6 +21,7 @@ anyhow.workspace = true
clap.workspace = true
client.workspace = true
ctrlc = { version = "3.5", features = ["termination"] }
+db.workspace = true
debug_adapter_extension.workspace = true
env_logger.workspace = true
extension.workspace = true
@@ -2,6 +2,7 @@ use std::path::PathBuf;
use std::sync::Arc;
use client::{Client, ProxySettings, UserStore};
+use db::AppDatabase;
use extension::ExtensionHostProxy;
use fs::RealFs;
use gpui::http_client::read_proxy_from_env;
@@ -61,6 +62,9 @@ pub fn init(cx: &mut App) -> Arc<AgentCliAppState> {
let client = Client::production(cx);
cx.set_http_client(client.http_client());
+ let app_db = AppDatabase::new();
+ cx.set_global(app_db);
+
let git_binary_path = None;
let fs = Arc::new(RealFs::new(
git_binary_path,
@@ -82,8 +82,21 @@ struct Args {
timeout: Option<u64>,
/// Directory for output artifacts (result.json, thread.md, thread.json).
- #[arg(long, default_value = "/logs/agent")]
+ #[arg(long, default_value = ".")]
output_dir: PathBuf,
+
+ /// Disable staff mode (staff mode is enabled by default).
+ #[arg(long)]
+ no_staff: bool,
+
+ /// Reasoning effort level for models that support thinking (low, medium, high).
+ /// Defaults to "high" for thinking-capable models.
+ #[arg(long)]
+ reasoning_effort: Option<String>,
+
+ /// Enable or disable extended thinking. Defaults to model auto-detection if omitted.
+ #[arg(long)]
+ thinking: Option<bool>,
}
enum AgentOutcome {
@@ -154,7 +167,7 @@ fn main() {
app.run(move |cx| {
let app_state = headless::init(cx);
- cx.set_staff(true);
+ cx.set_staff(!args.no_staff);
let auth_tasks = LanguageModelRegistry::global(cx).update(cx, |registry, cx| {
registry
@@ -166,6 +179,8 @@ fn main() {
let model_name = args.model.clone();
let timeout = args.timeout;
+ let thinking_override = args.thinking;
+ let reasoning_effort = args.reasoning_effort.clone();
cx.spawn(async move |cx| {
futures::future::join_all(auth_tasks).await;
@@ -178,6 +193,8 @@ fn main() {
&instruction,
&model_name,
timeout,
+ thinking_override,
+ reasoning_effort.as_deref(),
Some(&output_dir),
cx,
)
@@ -257,6 +274,8 @@ async fn run_agent(
instruction: &str,
model_name: &str,
timeout: Option<u64>,
+ thinking_override: Option<bool>,
+ reasoning_effort: Option<&str>,
output_dir: Option<&std::path::Path>,
cx: &mut AsyncApp,
) -> (Result<AgentOutcome>, Option<language_model::TokenUsage>) {
@@ -292,10 +311,14 @@ async fn run_agent(
anyhow::Ok(())
})?;
- let (enable_thinking, effort) = if supports_thinking {
- (true, "\"high\"")
+ let enable_thinking = thinking_override.unwrap_or(supports_thinking);
+ let effort = if enable_thinking {
+ match reasoning_effort {
+ Some(level) => format!("\"{level}\""),
+ None => "\"high\"".to_string(),
+ }
} else {
- (false, "null")
+ "null".to_string()
};
let provider_id = selected.provider.0.to_string();
let model_id = selected.model.0.to_string();
@@ -144,15 +144,32 @@ class ZedAgent(BaseInstalledAgent):
if timeout:
parts.append(f"--timeout {timeout}")
+ staff = self._extra_env.get("EVAL_CLI_STAFF")
+ if staff and staff.lower() == "false":
+ parts.append("--no-staff")
+
+ reasoning_effort = self._extra_env.get("EVAL_CLI_REASONING_EFFORT")
+ if reasoning_effort:
+ parts.append(f"--reasoning-effort {shlex.quote(reasoning_effort)}")
+
+ enable_thinking = self._extra_env.get("EVAL_CLI_ENABLE_THINKING")
+ if enable_thinking:
+ if enable_thinking.lower() == "true":
+ parts.append("--enable-thinking")
+ elif enable_thinking.lower() == "false":
+ parts.append("--disable-thinking")
+
parts.append(f"--instruction {escaped_instruction}")
- eval_cli_command = " ".join(parts) + " 2>&1 | stdbuf -oL tee /logs/agent/eval-cli.txt"
+ eval_cli_command = (
+ " ".join(parts) + " 2>&1 | stdbuf -oL tee /logs/agent/eval-cli.txt"
+ )
patch_command = (
"cd /testbed && "
"git add -A && "
"git diff --cached HEAD > /logs/agent/patch.diff && "
- "echo \"Patch size: $(wc -c < /logs/agent/patch.diff) bytes\""
+ 'echo "Patch size: $(wc -c < /logs/agent/patch.diff) bytes"'
)
return [
@@ -22,6 +22,12 @@ apt-get install -y --no-install-recommends \
curl -fsSL https://deb.nodesource.com/setup_22.x | bash -
apt-get install -y --no-install-recommends nodejs
+# Preinstall basedpyright in Zed's language server cache to avoid first-run npm install latency.
+ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"
+BASEDPYRIGHT_DIR="$ZED_DATA_DIR/languages/basedpyright"
+mkdir -p "$BASEDPYRIGHT_DIR"
+npm install --prefix "$BASEDPYRIGHT_DIR" --save-exact basedpyright
+
# Install uv (needed for running Python tests in SWE-bench tasks).
curl -LsSf https://astral.sh/uv/install.sh | sh
. "$HOME/.local/bin/env"