eval_cli: Simplify build setup for more datasets (#52686)

Ben Brandt created 1 week ago

Cleans up build setup and simplifies it a bunch so that we can use the
binary in more eval docker containers

Release Notes:

- N/A

Change summary

crates/agent_ui/Cargo.toml               |   3 
crates/agent_ui/src/conversation_view.rs |   2 
crates/eval_cli/Dockerfile               |  55 ++---
crates/eval_cli/script/build-linux       |   8 
crates/eval_cli/zed_eval/agent.py        | 217 ++++++++++++++++++++++---
crates/eval_cli/zed_eval/install.sh.j2   |  55 ------
crates/sidebar/Cargo.toml                |   2 
crates/zed/Cargo.toml                    |   2 
8 files changed, 218 insertions(+), 126 deletions(-)

Detailed changes

crates/agent_ui/Cargo.toml 🔗

@@ -23,6 +23,7 @@ test-support = [
     "workspace/test-support",
     "agent/test-support",
 ]
+audio = ["dep:audio"]
 unit-eval = []
 
 [dependencies]
@@ -38,7 +39,7 @@ heapless.workspace = true
 assistant_text_thread.workspace = true
 assistant_slash_command.workspace = true
 assistant_slash_commands.workspace = true
-audio.workspace = true
+audio = { workspace = true, optional = true }
 base64.workspace = true
 buffer_diff.workspace = true
 chrono.workspace = true

crates/agent_ui/src/conversation_view.rs 🔗

@@ -13,6 +13,7 @@ use agent_servers::AgentServerDelegate;
 use agent_servers::{AgentServer, GEMINI_TERMINAL_AUTH_METHOD_ID};
 use agent_settings::{AgentProfileId, AgentSettings};
 use anyhow::{Result, anyhow};
+#[cfg(feature = "audio")]
 use audio::{Audio, Sound};
 use buffer_diff::BufferDiff;
 use client::zed_urls;
@@ -2278,6 +2279,7 @@ impl ConversationView {
         window: &mut Window,
         cx: &mut Context<Self>,
     ) {
+        #[cfg(feature = "audio")]
         self.play_notification_sound(window, cx);
         self.show_notification(caption, icon, window, cx);
     }

crates/eval_cli/Dockerfile 🔗

@@ -7,55 +7,44 @@
 # Or use the helper script:
 #   crates/eval_cli/script/build-linux
 
-FROM rust:1.93.1-bookworm AS builder
+FROM rust:1.93 AS builder
 
 WORKDIR /app
 
-# Install build dependencies (subset of script/linux needed for headless GPUI).
+    # Pre-install the toolchain specified in rust-toolchain.toml so it is cached.
+RUN rustup toolchain install 1.93 --profile minimal \
+    --component rustfmt --component clippy --component rust-analyzer --component rust-src \
+    --target wasm32-wasip2 --target wasm32-unknown-unknown --target x86_64-unknown-linux-musl --target x86_64-unknown-linux-gnu
+
+# Install build tools.  cmake + build-essential are needed for vendored C
+# libraries (libgit2-sys, zstd-sys, libsqlite3-sys).  No audio/GUI -dev
+# packages required — eval-cli runs headless with those features disabled.
+#
+# cargo-zigbuild cross-compiles against a specific glibc version (2.31 =
+# Debian Bullseye / Ubuntu Focal) so the resulting binary is portable to
+# any Linux distro with glibc >= 2.31.
 RUN apt-get update && apt-get install -y --no-install-recommends \
     cmake \
-    clang \
-    g++ \
-    libasound2-dev \
-    libfontconfig-dev \
-    libgit2-dev \
-    libglib2.0-dev \
-    libssl-dev \
-    libwayland-dev \
-    libx11-xcb-dev \
-    libxkbcommon-x11-dev \
-    libzstd-dev \
-    libsqlite3-dev \
     build-essential \
     curl \
+    xz-utils \
     && rm -rf /var/lib/apt/lists/*
 
-# Install wild linker for faster linking (built from source to match bookworm's glibc).
-RUN cargo install --locked wild-linker --version 0.8.0 --root /usr/local
+RUN mkdir -p /opt/zig \
+    && curl -fsSL https://ziglang.org/download/0.15.2/zig-x86_64-linux-0.15.2.tar.xz \
+    | tar -xJ -C /opt/zig --strip-components=1 \
+    && ln -s /opt/zig/zig /usr/local/bin/zig
 
-# Download WASI SDK (needed by some dependencies).
-ARG TARGETARCH
-RUN mkdir -p /app/target && \
-    WASI_ARCH=$([ "$TARGETARCH" = "arm64" ] && echo "arm64" || echo "x86_64") && \
-    curl -L "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-25/wasi-sdk-25.0-${WASI_ARCH}-linux.tar.gz" \
-    | tar -xz -C /app/target && \
-    mv /app/target/wasi-sdk-25.0-${WASI_ARCH}-linux /app/target/wasi-sdk
-
-# Pre-install the toolchain specified in rust-toolchain.toml so it is cached.
-RUN rustup toolchain install 1.93 --profile minimal \
-    --component rustfmt --component clippy --component rust-analyzer --component rust-src \
-    --target wasm32-wasip2 --target wasm32-unknown-unknown --target x86_64-unknown-linux-musl
+RUN cargo install --locked cargo-zigbuild
 
 COPY . .
 
-ENV CC=clang CXX=clang++
-ENV RUSTFLAGS="-C linker=clang -C link-arg=--ld-path=wild"
-
 RUN --mount=type=cache,target=/usr/local/cargo/registry \
     --mount=type=cache,target=/usr/local/cargo/git \
     --mount=type=cache,target=/app/target \
-    cargo build --release --package eval_cli && \
-    cp /app/target/release/eval-cli /eval-cli && \
+    cargo zigbuild --release --package eval_cli \
+        --target x86_64-unknown-linux-gnu.2.31 && \
+    cp /app/target/x86_64-unknown-linux-gnu/release/eval-cli /eval-cli && \
     strip /eval-cli
 
 FROM scratch

crates/eval_cli/script/build-linux 🔗

@@ -1,8 +1,10 @@
 #!/usr/bin/env bash
 #
 # Build eval-cli for x86_64 Linux from any host (macOS, Linux, etc.)
-# using Docker. The resulting binary is placed at the path printed on
-# completion (default: target/eval-cli).
+# using Docker + cargo-zigbuild. Targets glibc 2.31 (Debian Bullseye /
+# Ubuntu Focal) so the binary is portable to any modern Linux distro.
+# The resulting binary is placed at the path printed on completion
+# (default: target/eval-cli).
 #
 # Usage:
 #   crates/eval_cli/script/build-linux [--output PATH]
@@ -36,7 +38,7 @@ cd "$REPO_ROOT"
 
 IMAGE_TAG="eval-cli-builder"
 
-echo "Building eval-cli for x86_64-unknown-linux-gnu..."
+echo "Building eval-cli for x86_64-unknown-linux-gnu (glibc >= 2.31)..."
 echo "  Repo root: $REPO_ROOT"
 echo "  Output:    $OUTPUT"
 echo ""

crates/eval_cli/zed_eval/agent.py 🔗

@@ -22,7 +22,7 @@ import os
 import shlex
 from pathlib import Path
 
-from harbor.agents.installed.base import BaseInstalledAgent, ExecInput
+from harbor.agents.installed.base import BaseInstalledAgent, with_prompt_template
 from harbor.environments.base import BaseEnvironment
 from harbor.models.agent.context import AgentContext
 
@@ -51,12 +51,143 @@ class ZedAgent(BaseInstalledAgent):
     def name() -> str:
         return "zed"
 
-    @property
-    def _install_agent_template_path(self) -> Path:
-        return Path(__file__).parent / "install.sh.j2"
+    async def _detect_workdir(self, environment: BaseEnvironment) -> str:
+        """Detect the repo working directory inside the container.
+
+        Checks, in order:
+          1. Explicit ``EVAL_CLI_WORKDIR`` extra-env override
+          2. ``/app``      (SWE-bench Pro)
+          3. ``/testbed``  (SWE-bench Verified)
+          4. ``/repo``
+          5. First git repo found under ``/`` (max depth 3)
+        """
+        override = self._extra_env.get("EVAL_CLI_WORKDIR")
+        if override:
+            return override
+
+        result = await self.exec_as_agent(
+            environment,
+            command=(
+                "for d in /app /testbed /repo; do "
+                '  if [ -d "$d/.git" ]; then echo "$d"; exit 0; fi; '
+                "done; "
+                "find / -maxdepth 3 -name .git -type d 2>/dev/null "
+                '| head -1 | sed "s|/.git$||"'
+            ),
+        )
+        workdir = result.stdout.strip()
+        if not workdir:
+            raise RuntimeError(
+                "Could not find a git repository in the container. "
+                "Set EVAL_CLI_WORKDIR explicitly via --ae EVAL_CLI_WORKDIR=/path/to/repo"
+            )
+        return workdir
+
+    async def install(self, environment: BaseEnvironment) -> None:
+        await self.exec_as_root(
+            environment,
+            command=(
+                "apt-get update && "
+                "apt-get install -y --no-install-recommends "
+                "ca-certificates "
+                "curl "
+                "git"
+            ),
+            env={"DEBIAN_FRONTEND": "noninteractive"},
+        )
+
+        await self.exec_as_root(
+            environment,
+            command=(
+                "curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && "
+                "apt-get install -y --no-install-recommends nodejs"
+            ),
+            env={"DEBIAN_FRONTEND": "noninteractive"},
+        )
+
+        # Pre-install default LSPs so Zed doesn't have to download them at
+        # runtime.  Each gets its own subdirectory under $ZED_DATA_DIR/languages.
+        await self.exec_as_agent(
+            environment,
+            command=(
+                "set -euo pipefail; "
+                'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
+                # basedpyright (Python - default type checker)
+                'BASEDPYRIGHT_DIR="$ZED_DATA_DIR/languages/basedpyright"; '
+                'mkdir -p "$BASEDPYRIGHT_DIR"; '
+                'npm install --prefix "$BASEDPYRIGHT_DIR" --save-exact basedpyright; '
+                # typescript-language-server (TypeScript/JS - default LSP)
+                'TSSERVER_DIR="$ZED_DATA_DIR/languages/typescript-language-server"; '
+                'mkdir -p "$TSSERVER_DIR"; '
+                'npm install --prefix "$TSSERVER_DIR" --save-exact typescript typescript-language-server; '
+                # vtsls (VS Code TypeScript language features)
+                'VTSLS_DIR="$ZED_DATA_DIR/languages/vtsls"; '
+                'mkdir -p "$VTSLS_DIR"; '
+                'npm install --prefix "$VTSLS_DIR" --save-exact @vtsls/language-server typescript; '
+                # tailwindcss-language-server
+                'TAILWIND_DIR="$ZED_DATA_DIR/languages/tailwindcss-language-server"; '
+                'mkdir -p "$TAILWIND_DIR"; '
+                'npm install --prefix "$TAILWIND_DIR" --save-exact @tailwindcss/language-server'
+            ),
+        )
 
-    async def setup(self, environment: BaseEnvironment) -> None:
-        await environment.exec(command="mkdir -p /installed-agent")
+        # eslint LSP (downloaded from zed-industries/vscode-eslint GitHub release,
+        # then compiled — this mirrors what Zed does at runtime).
+        await self.exec_as_agent(
+            environment,
+            command=(
+                "set -euo pipefail; "
+                'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
+                'ESLINT_DIR="$ZED_DATA_DIR/languages/eslint/vscode-eslint-2.4.4"; '
+                'mkdir -p "$ESLINT_DIR"; '
+                'curl -fsSL "https://github.com/zed-industries/vscode-eslint/archive/refs/tags/release/2.4.4.tar.gz" '
+                '| tar -xz -C "$ESLINT_DIR"; '
+                'mv "$ESLINT_DIR"/vscode-eslint-release-2.4.4 "$ESLINT_DIR/vscode-eslint"; '
+                'cd "$ESLINT_DIR/vscode-eslint" && npm install && npm run compile'
+            ),
+        )
+
+        # gopls (Go - default LSP).  Only install when Go is present in the
+        # container (i.e. Go-related SWE-bench tasks).
+        await self.exec_as_agent(
+            environment,
+            command=(
+                "if command -v go >/dev/null 2>&1; then "
+                "go install golang.org/x/tools/gopls@latest; "
+                "fi"
+            ),
+        )
+
+        await self.exec_as_agent(
+            environment,
+            command=(
+                "curl -LsSf https://astral.sh/uv/install.sh | sh && "
+                '. "$HOME/.local/bin/env"'
+            ),
+        )
+
+        agent_home_result = await self.exec_as_agent(
+            environment,
+            command='printf %s "$HOME"',
+        )
+        agent_home = agent_home_result.stdout.strip()
+        if not agent_home:
+            raise RuntimeError("Could not determine agent home directory")
+
+        await self.exec_as_root(
+            environment,
+            command=(
+                f"ln -sf {shlex.quote(agent_home + '/.local/bin/uv')} /usr/local/bin/uv && "
+                f"ln -sf {shlex.quote(agent_home + '/.local/bin/uvx')} /usr/local/bin/uvx"
+            ),
+        )
+
+        # Install a modern ruff so `ruff server` works without --preview.
+        # This also makes it available as a CLI tool for the agent.
+        await self.exec_as_agent(
+            environment,
+            command=('export PATH="$HOME/.local/bin:$PATH" && uv tool install ruff'),
+        )
 
         if self._binary_path:
             binary = Path(self._binary_path)
@@ -69,18 +200,29 @@ class ZedAgent(BaseInstalledAgent):
                 source_path=binary,
                 target_path="/usr/local/bin/eval-cli",
             )
-            await environment.exec(command="chmod +x /usr/local/bin/eval-cli")
-
-        await super().setup(environment)
+            await self.exec_as_root(
+                environment,
+                command="chmod +x /usr/local/bin/eval-cli && eval-cli --help",
+            )
+            return
 
-    @property
-    def _template_variables(self) -> dict[str, str]:
-        variables = super()._template_variables
-        if self._binary_path:
-            variables["binary_uploaded"] = "true"
         if self._download_url:
-            variables["download_url"] = self._download_url
-        return variables
+            await self.exec_as_root(
+                environment,
+                command=(
+                    f"curl -fsSL {shlex.quote(self._download_url)} "
+                    "-o /usr/local/bin/eval-cli && "
+                    "chmod +x /usr/local/bin/eval-cli && "
+                    "eval-cli --help"
+                ),
+            )
+            return
+
+        raise ValueError(
+            "No eval-cli binary provided. "
+            "Either pass binary_path=/path/to/target/release/eval-cli "
+            "or set download_url=/EVAL_CLI_DOWNLOAD_URL."
+        )
 
     def populate_context_post_run(self, context: AgentContext) -> None:
         result_data = None
@@ -131,18 +273,27 @@ class ZedAgent(BaseInstalledAgent):
 
         return env
 
-    def create_run_agent_commands(self, instruction: str) -> list[ExecInput]:
+    @with_prompt_template
+    async def run(
+        self, instruction: str, environment: BaseEnvironment, context: AgentContext
+    ) -> None:
         escaped_instruction = shlex.quote(instruction)
         env = self._get_api_env()
 
-        parts = ["eval-cli", "--workdir /testbed", "--output-dir /logs/agent"]
+        workdir = await self._detect_workdir(environment)
+
+        parts = [
+            "eval-cli",
+            f"--workdir {shlex.quote(workdir)}",
+            "--output-dir /logs/agent",
+        ]
 
         if self.model_name:
-            parts.append(f"--model {self.model_name}")
+            parts.append(f"--model {shlex.quote(self.model_name)}")
 
         timeout = self._extra_env.get("EVAL_CLI_TIMEOUT")
         if timeout:
-            parts.append(f"--timeout {timeout}")
+            parts.append(f"--timeout {shlex.quote(timeout)}")
 
         staff = self._extra_env.get("EVAL_CLI_STAFF")
         if staff and staff.lower() == "false":
@@ -161,18 +312,20 @@ class ZedAgent(BaseInstalledAgent):
 
         parts.append(f"--instruction {escaped_instruction}")
 
-        eval_cli_command = (
-            " ".join(parts) + " 2>&1 | stdbuf -oL tee /logs/agent/eval-cli.txt"
+        await self.exec_as_agent(
+            environment,
+            command=(
+                " ".join(parts) + " 2>&1 | stdbuf -oL tee /logs/agent/eval-cli.txt"
+            ),
+            env=env,
         )
 
-        patch_command = (
-            "cd /testbed && "
-            "git add -A && "
-            "git diff --cached HEAD > /logs/agent/patch.diff && "
-            'echo "Patch size: $(wc -c < /logs/agent/patch.diff) bytes"'
+        await self.exec_as_agent(
+            environment,
+            command=(
+                "git add -A && "
+                "git diff --cached HEAD > /logs/agent/patch.diff && "
+                'echo "Patch size: $(wc -c < /logs/agent/patch.diff) bytes"'
+            ),
+            cwd=workdir,
         )
-
-        return [
-            ExecInput(command=eval_cli_command, env=env),
-            ExecInput(command=patch_command),
-        ]

crates/eval_cli/zed_eval/install.sh.j2 🔗

@@ -1,55 +0,0 @@
-#!/bin/bash
-set -euo pipefail
-
-# Install runtime dependencies needed by the eval-cli binary (dynamically linked
-# against glibc + these shared libraries from its GPUI/terminal/language stacks).
-apt-get update
-apt-get install -y --no-install-recommends \
-    ca-certificates \
-    curl \
-    git \
-    libasound2 \
-    libfontconfig1 \
-    libglib2.0-0 \
-    libsqlite3-0 \
-    libssl3 \
-    libwayland-client0 \
-    libx11-xcb1 \
-    libxkbcommon-x11-0 \
-    libzstd1
-
-# Install Node.js 22 LTS (needed by language servers like basedpyright).
-curl -fsSL https://deb.nodesource.com/setup_22.x | bash -
-apt-get install -y --no-install-recommends nodejs
-
-# Preinstall basedpyright in Zed's language server cache to avoid first-run npm install latency.
-ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"
-BASEDPYRIGHT_DIR="$ZED_DATA_DIR/languages/basedpyright"
-mkdir -p "$BASEDPYRIGHT_DIR"
-npm install --prefix "$BASEDPYRIGHT_DIR" --save-exact basedpyright
-
-# Install uv (needed for running Python tests in SWE-bench tasks).
-curl -LsSf https://astral.sh/uv/install.sh | sh
-. "$HOME/.local/bin/env"
-ln -sf "$HOME/.local/bin/uv" /usr/local/bin/uv
-ln -sf "$HOME/.local/bin/uvx" /usr/local/bin/uvx
-
-{% if binary_uploaded is defined %}
-# Binary was uploaded directly via setup() — just verify it works.
-eval-cli --help
-{% elif download_url is defined %}
-curl -fsSL "{{ download_url }}" -o /usr/local/bin/eval-cli
-chmod +x /usr/local/bin/eval-cli
-eval-cli --help
-{% else %}
-echo "ERROR: No eval-cli binary provided."
-echo ""
-echo "Either pass binary_path= to upload a local build:"
-echo "  --ae binary_path=/path/to/target/release/eval-cli"
-echo ""
-echo "Or set download_url= / EVAL_CLI_DOWNLOAD_URL:"
-echo "  --ae download_url=https://example.com/eval-cli"
-exit 1
-{% endif %}
-
-echo "INSTALL_SUCCESS"

@@ -20,7 +20,7 @@ action_log.workspace = true
 agent.workspace = true
 agent-client-protocol.workspace = true
 agent_settings.workspace = true
-agent_ui.workspace = true
+agent_ui = { workspace = true, features = ["audio"] }
 anyhow.workspace = true
 chrono.workspace = true
 collections.workspace = true

crates/zed/Cargo.toml 🔗

@@ -68,7 +68,7 @@ activity_indicator.workspace = true
 agent.workspace = true
 agent-client-protocol.workspace = true
 agent_settings.workspace = true
-agent_ui.workspace = true
+agent_ui = { workspace = true, features = ["audio"] }
 anyhow.workspace = true
 askpass.workspace = true
 assets.workspace = true