agent.py

  1"""Harbor agent wrapper for Zed's eval-cli binary.
  2
  3Usage:
  4    # Build eval-cli locally first:
  5    cargo build --release -p eval_cli
  6
  7    # Run via Harbor with a local binary:
  8    harbor run -d "dataset@version" \
  9        --agent-import-path zed_eval.agent:ZedAgent \
 10        --ae binary_path=/path/to/target/release/eval-cli \
 11        --agent-model anthropic/claude-sonnet-4-6-latest
 12
 13    # Or with a download URL (for CI):
 14    harbor run -d "dataset@version" \
 15        --agent-import-path zed_eval.agent:ZedAgent \
 16        --ae download_url=https://example.com/eval-cli \
 17        --agent-model anthropic/claude-sonnet-4-6-latest
 18"""
 19
 20import json
 21import os
 22import shlex
 23from pathlib import Path
 24
 25from harbor.agents.installed.base import BaseInstalledAgent, with_prompt_template
 26from harbor.environments.base import BaseEnvironment
 27from harbor.models.agent.context import AgentContext
 28
 29
 30class ZedAgent(BaseInstalledAgent):
 31    """Runs Zed's headless AI agent (eval-cli) to solve tasks.
 32
 33    The eval-cli binary boots a headless GPUI application and uses the same
 34    NativeAgent + AcpThread pipeline as the production Zed editor, driving
 35    the full agentic loop (tool calls, subagents, retries) without a GUI.
 36    """
 37
 38    def __init__(
 39        self,
 40        logs_dir: Path,
 41        binary_path: str | None = None,
 42        download_url: str | None = None,
 43        *args,
 44        **kwargs,
 45    ):
 46        super().__init__(logs_dir, *args, **kwargs)
 47        self._binary_path = binary_path
 48        self._download_url = download_url or os.environ.get("EVAL_CLI_DOWNLOAD_URL")
 49
 50    @staticmethod
 51    def name() -> str:
 52        return "zed"
 53
 54    async def _detect_workdir(self, environment: BaseEnvironment) -> str:
 55        """Detect the working directory inside the container.
 56
 57        Checks, in order:
 58          1. Explicit ``EVAL_CLI_WORKDIR`` extra-env override
 59          2. Well-known dirs with a ``.git`` subdirectory (SWE-bench style)
 60          3. First git repo found under ``/`` (max depth 3)
 61          4. Well-known dirs that exist at all (terminal-bench style)
 62          5. The container's default working directory (``pwd``)
 63        """
 64        override = self._extra_env.get("EVAL_CLI_WORKDIR")
 65        if override:
 66            return override
 67
 68        # First: try to find a git repo (SWE-bench, etc.)
 69        result = await self.exec_as_agent(
 70            environment,
 71            command=(
 72                "for d in /app /testbed /repo; do "
 73                '  if [ -d "$d/.git" ]; then echo "$d"; exit 0; fi; '
 74                "done; "
 75                "find / -maxdepth 3 -name .git -type d 2>/dev/null "
 76                '| head -1 | sed "s|/.git$||"'
 77            ),
 78        )
 79        workdir = (result.stdout or "").strip()
 80        if workdir:
 81            return workdir
 82
 83        # Fallback: use the first well-known directory that exists,
 84        # even without .git (terminal-bench containers aren't git repos).
 85        result = await self.exec_as_agent(
 86            environment,
 87            command=(
 88                "for d in /app /testbed /repo /root /home; do "
 89                '  if [ -d "$d" ]; then echo "$d"; exit 0; fi; '
 90                "done; "
 91                "pwd"
 92            ),
 93        )
 94        workdir = (result.stdout or "").strip()
 95        if workdir:
 96            return workdir
 97
 98        raise RuntimeError(
 99            "Could not detect a working directory in the container. "
100            "Set EVAL_CLI_WORKDIR explicitly via --ae EVAL_CLI_WORKDIR=/path/to/repo"
101        )
102
103    async def install(self, environment: BaseEnvironment) -> None:
104        # Detect the package manager and install base dependencies.
105        # Supports Debian/Ubuntu (apt-get), Alpine (apk), and
106        # Fedora/RHEL/CentOS (dnf/yum).
107        await self.exec_as_root(
108            environment,
109            command=(
110                "if command -v apt-get >/dev/null 2>&1; then "
111                "  apt-get update && "
112                "  apt-get install -y --no-install-recommends ca-certificates curl git; "
113                "elif command -v apk >/dev/null 2>&1; then "
114                "  apk add --no-cache ca-certificates curl git bash coreutils gcompat libstdc++; "
115                "elif command -v dnf >/dev/null 2>&1; then "
116                "  dnf install -y ca-certificates curl git; "
117                "elif command -v yum >/dev/null 2>&1; then "
118                "  yum install -y ca-certificates curl git; "
119                "else "
120                "  echo 'WARNING: No supported package manager found (apt-get, apk, dnf, yum)' >&2; "
121                "fi"
122            ),
123            env={"DEBIAN_FRONTEND": "noninteractive"},
124        )
125
126        # ── Non-essential tooling ─────────────────────────────────────
127        # Everything below here (Node.js, LSPs, uv/ruff) is nice-to-have.
128        # If any step fails (e.g. musl incompatibility, network issues),
129        # log a warning and continue — the agent can still work without
130        # pre-installed language servers.
131
132        await self._install_node(environment)
133        await self._install_lsps(environment)
134        await self._install_uv_and_ruff(environment)
135
136        if self._binary_path:
137            binary = Path(self._binary_path)
138            if not binary.exists():
139                raise FileNotFoundError(
140                    f"eval-cli binary not found at {binary}. "
141                    "Build it with: cargo build --release -p eval_cli"
142                )
143            await environment.upload_file(
144                source_path=binary,
145                target_path="/usr/local/bin/eval-cli",
146            )
147            await self.exec_as_root(
148                environment,
149                command="chmod +x /usr/local/bin/eval-cli && eval-cli --help",
150            )
151            return
152
153        if self._download_url:
154            await self.exec_as_root(
155                environment,
156                command=(
157                    f"curl -fsSL {shlex.quote(self._download_url)} "
158                    "-o /usr/local/bin/eval-cli && "
159                    "chmod +x /usr/local/bin/eval-cli && "
160                    "eval-cli --help"
161                ),
162            )
163            return
164
165        raise ValueError(
166            "No eval-cli binary provided. "
167            "Either pass binary_path=/path/to/target/release/eval-cli "
168            "or set download_url=/EVAL_CLI_DOWNLOAD_URL."
169        )
170
171    async def _install_node(self, environment: BaseEnvironment) -> None:
172        """Install Node.js from official binary tarballs.
173
174        Uses the musl build on Alpine and the glibc build elsewhere.
175        Skips if node is already on PATH.
176        """
177        try:
178            await self.exec_as_root(
179                environment,
180                command=(
181                    "if command -v node >/dev/null 2>&1; then "
182                    '  echo "Node.js already available: $(node --version)"; '
183                    "else "
184                    "  NODE_VER=v22.14.0; "
185                    "  ARCH=$(uname -m); "
186                    '  case "$ARCH" in '
187                    "    x86_64)  NODE_ARCH=x64  ;; "
188                    "    aarch64) NODE_ARCH=arm64 ;; "
189                    '    *)       echo "WARNING: unsupported arch $ARCH for Node.js" >&2; exit 0 ;; '
190                    "  esac; "
191                    "  if ldd /bin/sh 2>&1 | grep -qi musl; then "
192                    '    NODE_URL="https://unofficial-builds.nodejs.org/download/release/${NODE_VER}/node-${NODE_VER}-linux-${NODE_ARCH}-musl.tar.gz"; '
193                    "  else "
194                    '    NODE_URL="https://nodejs.org/dist/${NODE_VER}/node-${NODE_VER}-linux-${NODE_ARCH}.tar.gz"; '
195                    "  fi; "
196                    '  echo "Downloading Node.js from $NODE_URL"; '
197                    '  curl -fsSL "$NODE_URL" | tar -xz -C /usr/local --strip-components=1; '
198                    '  echo "Installed Node.js $(node --version)"; '
199                    "fi"
200                ),
201            )
202        except Exception as exc:
203            self.logger.warning("Node.js installation failed (non-fatal): %s", exc)
204
205    async def _install_lsps(self, environment: BaseEnvironment) -> None:
206        """Pre-install language servers so Zed doesn't download them at runtime.
207
208        Each LSP is installed independently so one failure doesn't block the rest.
209        """
210        # npm-based LSPs — skip all if npm is not available.
211        try:
212            await self.exec_as_agent(
213                environment,
214                command="command -v npm >/dev/null 2>&1",
215            )
216        except Exception:
217            self.logger.warning("npm not available — skipping npm-based LSP installs")
218            return
219
220        lsp_installs = [
221            (
222                "basedpyright",
223                'DIR="$ZED_DATA_DIR/languages/basedpyright"; '
224                'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact basedpyright',
225            ),
226            (
227                "typescript-language-server",
228                'DIR="$ZED_DATA_DIR/languages/typescript-language-server"; '
229                'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact typescript typescript-language-server',
230            ),
231            (
232                "vtsls",
233                'DIR="$ZED_DATA_DIR/languages/vtsls"; '
234                'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact @vtsls/language-server typescript',
235            ),
236            (
237                "tailwindcss-language-server",
238                'DIR="$ZED_DATA_DIR/languages/tailwindcss-language-server"; '
239                'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact @tailwindcss/language-server',
240            ),
241        ]
242
243        for name, cmd in lsp_installs:
244            try:
245                await self.exec_as_agent(
246                    environment,
247                    command=(
248                        'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
249                        + cmd
250                    ),
251                )
252            except Exception as exc:
253                self.logger.warning(
254                    "LSP install '%s' failed (non-fatal): %s", name, exc
255                )
256
257        # eslint — downloaded from GitHub and compiled separately.
258        try:
259            await self.exec_as_agent(
260                environment,
261                command=(
262                    "set -euo pipefail; "
263                    'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
264                    'ESLINT_DIR="$ZED_DATA_DIR/languages/eslint/vscode-eslint-2.4.4"; '
265                    'mkdir -p "$ESLINT_DIR"; '
266                    'curl -fsSL "https://github.com/zed-industries/vscode-eslint/archive/refs/tags/release/2.4.4.tar.gz" '
267                    '| tar -xz -C "$ESLINT_DIR"; '
268                    'mv "$ESLINT_DIR"/vscode-eslint-release-2.4.4 "$ESLINT_DIR/vscode-eslint"; '
269                    'cd "$ESLINT_DIR/vscode-eslint" && npm install && npm run compile'
270                ),
271            )
272        except Exception as exc:
273            self.logger.warning("eslint LSP install failed (non-fatal): %s", exc)
274
275        # gopls — only when Go is present.  Guarded by a 120s timeout so slow
276        # compilation can never eat the full setup budget.
277        gopls_script = (
278            "if command -v go >/dev/null 2>&1; then "
279            "if go install golang.org/x/tools/gopls@latest 2>/dev/null; then "
280            "echo 'Installed gopls@latest'; "
281            "else "
282            '  MY_GO=$(go env GOVERSION | sed "s/^go//"); '
283            "  for v in $(curl -fsSL "
284            "https://proxy.golang.org/golang.org/x/tools/gopls/@v/list 2>/dev/null"
285            " | grep -E '^v[0-9]+\\.[0-9]+\\.[0-9]+$' | sort -rV | head -5); do "
286            "    NEED=$(curl -fsSL "
287            '"https://proxy.golang.org/golang.org/x/tools/gopls/@v/${v}.mod"'
288            " 2>/dev/null | awk '/^go /{print $2; exit}'); "
289            '    if [ -n "$NEED" ] '
290            '    && [ "$(printf \'%s\\n%s\\n\' "$NEED" "$MY_GO" '
291            '         | sort -V | head -1)" = "$NEED" ]; then '
292            '      echo "Installing gopls $v (compatible with Go $MY_GO)"; '
293            '      go install "golang.org/x/tools/gopls@$v" && break; '
294            "    fi; "
295            "  done; "
296            "fi; "
297            "fi"
298        )
299        try:
300            await self.exec_as_agent(
301                environment,
302                command=(
303                    "timeout 120 bash -c "
304                    + shlex.quote(gopls_script)
305                    + " || echo 'WARNING: gopls installation timed out or failed -- skipping'"
306                ),
307            )
308        except Exception as exc:
309            self.logger.warning("gopls install failed (non-fatal): %s", exc)
310
311    async def _install_uv_and_ruff(self, environment: BaseEnvironment) -> None:
312        """Install uv and ruff for Python tooling."""
313        try:
314            await self.exec_as_agent(
315                environment,
316                command=(
317                    "curl -LsSf https://astral.sh/uv/install.sh | sh && "
318                    '. "$HOME/.local/bin/env"'
319                ),
320            )
321
322            agent_home_result = await self.exec_as_agent(
323                environment,
324                command='printf %s "$HOME"',
325            )
326            agent_home = agent_home_result.stdout.strip()
327            if not agent_home:
328                self.logger.warning(
329                    "Could not determine agent home directory — skipping uv symlinks"
330                )
331                return
332
333            await self.exec_as_root(
334                environment,
335                command=(
336                    f"ln -sf {shlex.quote(agent_home + '/.local/bin/uv')} /usr/local/bin/uv && "
337                    f"ln -sf {shlex.quote(agent_home + '/.local/bin/uvx')} /usr/local/bin/uvx"
338                ),
339            )
340
341            await self.exec_as_agent(
342                environment,
343                command='export PATH="$HOME/.local/bin:$PATH" && uv tool install ruff',
344            )
345        except Exception as exc:
346            self.logger.warning("uv/ruff installation failed (non-fatal): %s", exc)
347
348    def populate_context_post_run(self, context: AgentContext) -> None:
349        result_data = None
350        for json_file in self.logs_dir.rglob("result.json"):
351            try:
352                result_data = json.loads(json_file.read_text())
353                break
354            except (json.JSONDecodeError, OSError):
355                continue
356
357        if result_data is None:
358            self.logger.warning("Could not find or parse result.json from eval-cli")
359            return
360
361        if result_data.get("input_tokens") is not None:
362            context.n_input_tokens = result_data["input_tokens"]
363        if result_data.get("output_tokens") is not None:
364            context.n_output_tokens = result_data["output_tokens"]
365        if result_data.get("cache_read_input_tokens") is not None:
366            context.n_cache_tokens = result_data["cache_read_input_tokens"]
367
368        context.metadata = {
369            "status": result_data.get("status"),
370            "duration_secs": result_data.get("duration_secs"),
371            "model": result_data.get("model"),
372        }
373
374    def _get_api_env(self) -> dict[str, str]:
375        env: dict[str, str] = {}
376        if not self.model_name or "/" not in self.model_name:
377            return env
378
379        provider = self.model_name.split("/", 1)[0]
380        provider_env_map = {
381            "anthropic": "ANTHROPIC_API_KEY",
382            "openai": "OPENAI_API_KEY",
383            "google": "GEMINI_API_KEY",
384            "gemini": "GEMINI_API_KEY",
385            "deepseek": "DEEPSEEK_API_KEY",
386            "mistral": "MISTRAL_API_KEY",
387        }
388
389        env_var = provider_env_map.get(provider)
390        if env_var:
391            api_key = os.environ.get(env_var, "")
392            if api_key:
393                env[env_var] = api_key
394
395        return env
396
397    @with_prompt_template
398    async def run(
399        self, instruction: str, environment: BaseEnvironment, context: AgentContext
400    ) -> None:
401        escaped_instruction = shlex.quote(instruction)
402        env = self._get_api_env()
403
404        workdir = await self._detect_workdir(environment)
405
406        parts = [
407            "eval-cli",
408            f"--workdir {shlex.quote(workdir)}",
409            "--output-dir /logs/agent",
410        ]
411
412        if self.model_name:
413            parts.append(f"--model {shlex.quote(self.model_name)}")
414
415        timeout = self._extra_env.get("EVAL_CLI_TIMEOUT")
416        if timeout:
417            parts.append(f"--timeout {shlex.quote(timeout)}")
418
419        staff = self._extra_env.get("EVAL_CLI_STAFF")
420        if staff and staff.lower() == "false":
421            parts.append("--no-staff")
422
423        reasoning_effort = self._extra_env.get("EVAL_CLI_REASONING_EFFORT")
424        if reasoning_effort:
425            parts.append(f"--reasoning-effort {shlex.quote(reasoning_effort)}")
426
427        enable_thinking = self._extra_env.get("EVAL_CLI_ENABLE_THINKING")
428        if enable_thinking:
429            if enable_thinking.lower() == "true":
430                parts.append("--enable-thinking")
431            elif enable_thinking.lower() == "false":
432                parts.append("--disable-thinking")
433
434        parts.append(f"--instruction {escaped_instruction}")
435
436        await self.exec_as_agent(
437            environment,
438            command=(
439                " ".join(parts) + " 2>&1 | if command -v stdbuf >/dev/null 2>&1;"
440                " then stdbuf -oL tee /logs/agent/eval-cli.txt;"
441                " else tee /logs/agent/eval-cli.txt; fi"
442            ),
443            env=env,
444        )
445
446        # Only generate a patch if the workdir is a git repo
447        # (SWE-bench style). Terminal-bench containers aren't git repos.
448        await self.exec_as_agent(
449            environment,
450            command=(
451                'if [ -d ".git" ]; then '
452                "git add -A && "
453                "git diff --cached HEAD > /logs/agent/patch.diff && "
454                'echo "Patch size: $(wc -c < /logs/agent/patch.diff) bytes"; '
455                "else "
456                'echo "No git repo found, skipping patch generation"; '
457                "fi"
458            ),
459            cwd=workdir,
460        )