agent.py

  1"""Harbor agent wrapper for Zed's eval-cli binary.
  2
  3Usage:
  4    # Build eval-cli locally first:
  5    cargo build --release -p eval_cli
  6
  7    # Run via Harbor with a local binary:
  8    harbor run -d "dataset@version" \
  9        --agent-import-path zed_eval.agent:ZedAgent \
 10        --ae binary_path=/path/to/target/release/eval-cli \
 11        --agent-model anthropic/claude-sonnet-4-6-latest
 12
 13    # Or with a download URL (for CI):
 14    harbor run -d "dataset@version" \
 15        --agent-import-path zed_eval.agent:ZedAgent \
 16        --ae download_url=https://example.com/eval-cli \
 17        --agent-model anthropic/claude-sonnet-4-6-latest
 18"""
 19
 20import json
 21import os
 22import shlex
 23from pathlib import Path
 24
 25from harbor.agents.installed.base import BaseInstalledAgent, with_prompt_template
 26from harbor.environments.base import BaseEnvironment
 27from harbor.models.agent.context import AgentContext
 28
 29
 30class ZedAgent(BaseInstalledAgent):
 31    """Runs Zed's headless AI agent (eval-cli) to solve tasks.
 32
 33    The eval-cli binary boots a headless GPUI application and uses the same
 34    NativeAgent + AcpThread pipeline as the production Zed editor, driving
 35    the full agentic loop (tool calls, subagents, retries) without a GUI.
 36    """
 37
 38    def __init__(
 39        self,
 40        logs_dir: Path,
 41        binary_path: str | None = None,
 42        download_url: str | None = None,
 43        *args,
 44        **kwargs,
 45    ):
 46        super().__init__(logs_dir, *args, **kwargs)
 47        self._binary_path = binary_path
 48        self._download_url = download_url or os.environ.get("EVAL_CLI_DOWNLOAD_URL")
 49
 50    @staticmethod
 51    def name() -> str:
 52        return "zed"
 53
 54    async def _detect_workdir(self, environment: BaseEnvironment) -> str:
 55        """Detect the repo working directory inside the container.
 56
 57        Checks, in order:
 58          1. Explicit ``EVAL_CLI_WORKDIR`` extra-env override
 59          2. ``/app``      (SWE-bench Pro)
 60          3. ``/testbed``  (SWE-bench Verified)
 61          4. ``/repo``
 62          5. First git repo found under ``/`` (max depth 3)
 63        """
 64        override = self._extra_env.get("EVAL_CLI_WORKDIR")
 65        if override:
 66            return override
 67
 68        result = await self.exec_as_agent(
 69            environment,
 70            command=(
 71                "for d in /app /testbed /repo; do "
 72                '  if [ -d "$d/.git" ]; then echo "$d"; exit 0; fi; '
 73                "done; "
 74                "find / -maxdepth 3 -name .git -type d 2>/dev/null "
 75                '| head -1 | sed "s|/.git$||"'
 76            ),
 77        )
 78        workdir = result.stdout.strip()
 79        if not workdir:
 80            raise RuntimeError(
 81                "Could not find a git repository in the container. "
 82                "Set EVAL_CLI_WORKDIR explicitly via --ae EVAL_CLI_WORKDIR=/path/to/repo"
 83            )
 84        return workdir
 85
 86    async def install(self, environment: BaseEnvironment) -> None:
 87        # Detect the package manager and install base dependencies.
 88        # Supports Debian/Ubuntu (apt-get), Alpine (apk), and
 89        # Fedora/RHEL/CentOS (dnf/yum).
 90        await self.exec_as_root(
 91            environment,
 92            command=(
 93                "if command -v apt-get >/dev/null 2>&1; then "
 94                "  apt-get update && "
 95                "  apt-get install -y --no-install-recommends ca-certificates curl git; "
 96                "elif command -v apk >/dev/null 2>&1; then "
 97                "  apk add --no-cache ca-certificates curl git bash coreutils gcompat libstdc++; "
 98                "elif command -v dnf >/dev/null 2>&1; then "
 99                "  dnf install -y ca-certificates curl git; "
100                "elif command -v yum >/dev/null 2>&1; then "
101                "  yum install -y ca-certificates curl git; "
102                "else "
103                "  echo 'WARNING: No supported package manager found (apt-get, apk, dnf, yum)' >&2; "
104                "fi"
105            ),
106            env={"DEBIAN_FRONTEND": "noninteractive"},
107        )
108
109        # ── Non-essential tooling ─────────────────────────────────────
110        # Everything below here (Node.js, LSPs, uv/ruff) is nice-to-have.
111        # If any step fails (e.g. musl incompatibility, network issues),
112        # log a warning and continue — the agent can still work without
113        # pre-installed language servers.
114
115        await self._install_node(environment)
116        await self._install_lsps(environment)
117        await self._install_uv_and_ruff(environment)
118
119        if self._binary_path:
120            binary = Path(self._binary_path)
121            if not binary.exists():
122                raise FileNotFoundError(
123                    f"eval-cli binary not found at {binary}. "
124                    "Build it with: cargo build --release -p eval_cli"
125                )
126            await environment.upload_file(
127                source_path=binary,
128                target_path="/usr/local/bin/eval-cli",
129            )
130            await self.exec_as_root(
131                environment,
132                command="chmod +x /usr/local/bin/eval-cli && eval-cli --help",
133            )
134            return
135
136        if self._download_url:
137            await self.exec_as_root(
138                environment,
139                command=(
140                    f"curl -fsSL {shlex.quote(self._download_url)} "
141                    "-o /usr/local/bin/eval-cli && "
142                    "chmod +x /usr/local/bin/eval-cli && "
143                    "eval-cli --help"
144                ),
145            )
146            return
147
148        raise ValueError(
149            "No eval-cli binary provided. "
150            "Either pass binary_path=/path/to/target/release/eval-cli "
151            "or set download_url=/EVAL_CLI_DOWNLOAD_URL."
152        )
153
154    async def _install_node(self, environment: BaseEnvironment) -> None:
155        """Install Node.js from official binary tarballs.
156
157        Uses the musl build on Alpine and the glibc build elsewhere.
158        Skips if node is already on PATH.
159        """
160        try:
161            await self.exec_as_root(
162                environment,
163                command=(
164                    "if command -v node >/dev/null 2>&1; then "
165                    '  echo "Node.js already available: $(node --version)"; '
166                    "else "
167                    "  NODE_VER=v22.14.0; "
168                    "  ARCH=$(uname -m); "
169                    '  case "$ARCH" in '
170                    "    x86_64)  NODE_ARCH=x64  ;; "
171                    "    aarch64) NODE_ARCH=arm64 ;; "
172                    '    *)       echo "WARNING: unsupported arch $ARCH for Node.js" >&2; exit 0 ;; '
173                    "  esac; "
174                    "  if ldd /bin/sh 2>&1 | grep -qi musl; then "
175                    '    NODE_URL="https://unofficial-builds.nodejs.org/download/release/${NODE_VER}/node-${NODE_VER}-linux-${NODE_ARCH}-musl.tar.gz"; '
176                    "  else "
177                    '    NODE_URL="https://nodejs.org/dist/${NODE_VER}/node-${NODE_VER}-linux-${NODE_ARCH}.tar.gz"; '
178                    "  fi; "
179                    '  echo "Downloading Node.js from $NODE_URL"; '
180                    '  curl -fsSL "$NODE_URL" | tar -xz -C /usr/local --strip-components=1; '
181                    '  echo "Installed Node.js $(node --version)"; '
182                    "fi"
183                ),
184            )
185        except Exception as exc:
186            self.logger.warning("Node.js installation failed (non-fatal): %s", exc)
187
188    async def _install_lsps(self, environment: BaseEnvironment) -> None:
189        """Pre-install language servers so Zed doesn't download them at runtime.
190
191        Each LSP is installed independently so one failure doesn't block the rest.
192        """
193        # npm-based LSPs — skip all if npm is not available.
194        try:
195            await self.exec_as_agent(
196                environment,
197                command="command -v npm >/dev/null 2>&1",
198            )
199        except Exception:
200            self.logger.warning("npm not available — skipping npm-based LSP installs")
201            return
202
203        lsp_installs = [
204            (
205                "basedpyright",
206                'DIR="$ZED_DATA_DIR/languages/basedpyright"; '
207                'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact basedpyright',
208            ),
209            (
210                "typescript-language-server",
211                'DIR="$ZED_DATA_DIR/languages/typescript-language-server"; '
212                'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact typescript typescript-language-server',
213            ),
214            (
215                "vtsls",
216                'DIR="$ZED_DATA_DIR/languages/vtsls"; '
217                'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact @vtsls/language-server typescript',
218            ),
219            (
220                "tailwindcss-language-server",
221                'DIR="$ZED_DATA_DIR/languages/tailwindcss-language-server"; '
222                'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact @tailwindcss/language-server',
223            ),
224        ]
225
226        for name, cmd in lsp_installs:
227            try:
228                await self.exec_as_agent(
229                    environment,
230                    command=(
231                        'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
232                        + cmd
233                    ),
234                )
235            except Exception as exc:
236                self.logger.warning(
237                    "LSP install '%s' failed (non-fatal): %s", name, exc
238                )
239
240        # eslint — downloaded from GitHub and compiled separately.
241        try:
242            await self.exec_as_agent(
243                environment,
244                command=(
245                    "set -euo pipefail; "
246                    'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
247                    'ESLINT_DIR="$ZED_DATA_DIR/languages/eslint/vscode-eslint-2.4.4"; '
248                    'mkdir -p "$ESLINT_DIR"; '
249                    'curl -fsSL "https://github.com/zed-industries/vscode-eslint/archive/refs/tags/release/2.4.4.tar.gz" '
250                    '| tar -xz -C "$ESLINT_DIR"; '
251                    'mv "$ESLINT_DIR"/vscode-eslint-release-2.4.4 "$ESLINT_DIR/vscode-eslint"; '
252                    'cd "$ESLINT_DIR/vscode-eslint" && npm install && npm run compile'
253                ),
254            )
255        except Exception as exc:
256            self.logger.warning("eslint LSP install failed (non-fatal): %s", exc)
257
258        # gopls — only when Go is present.  Guarded by a 120s timeout so slow
259        # compilation can never eat the full setup budget.
260        gopls_script = (
261            "if command -v go >/dev/null 2>&1; then "
262            "if go install golang.org/x/tools/gopls@latest 2>/dev/null; then "
263            "echo 'Installed gopls@latest'; "
264            "else "
265            '  MY_GO=$(go env GOVERSION | sed "s/^go//"); '
266            "  for v in $(curl -fsSL "
267            "https://proxy.golang.org/golang.org/x/tools/gopls/@v/list 2>/dev/null"
268            " | grep -E '^v[0-9]+\\.[0-9]+\\.[0-9]+$' | sort -rV | head -5); do "
269            "    NEED=$(curl -fsSL "
270            '"https://proxy.golang.org/golang.org/x/tools/gopls/@v/${v}.mod"'
271            " 2>/dev/null | awk '/^go /{print $2; exit}'); "
272            '    if [ -n "$NEED" ] '
273            '    && [ "$(printf \'%s\\n%s\\n\' "$NEED" "$MY_GO" '
274            '         | sort -V | head -1)" = "$NEED" ]; then '
275            '      echo "Installing gopls $v (compatible with Go $MY_GO)"; '
276            '      go install "golang.org/x/tools/gopls@$v" && break; '
277            "    fi; "
278            "  done; "
279            "fi; "
280            "fi"
281        )
282        try:
283            await self.exec_as_agent(
284                environment,
285                command=(
286                    "timeout 120 bash -c "
287                    + shlex.quote(gopls_script)
288                    + " || echo 'WARNING: gopls installation timed out or failed -- skipping'"
289                ),
290            )
291        except Exception as exc:
292            self.logger.warning("gopls install failed (non-fatal): %s", exc)
293
294    async def _install_uv_and_ruff(self, environment: BaseEnvironment) -> None:
295        """Install uv and ruff for Python tooling."""
296        try:
297            await self.exec_as_agent(
298                environment,
299                command=(
300                    "curl -LsSf https://astral.sh/uv/install.sh | sh && "
301                    '. "$HOME/.local/bin/env"'
302                ),
303            )
304
305            agent_home_result = await self.exec_as_agent(
306                environment,
307                command='printf %s "$HOME"',
308            )
309            agent_home = agent_home_result.stdout.strip()
310            if not agent_home:
311                self.logger.warning(
312                    "Could not determine agent home directory — skipping uv symlinks"
313                )
314                return
315
316            await self.exec_as_root(
317                environment,
318                command=(
319                    f"ln -sf {shlex.quote(agent_home + '/.local/bin/uv')} /usr/local/bin/uv && "
320                    f"ln -sf {shlex.quote(agent_home + '/.local/bin/uvx')} /usr/local/bin/uvx"
321                ),
322            )
323
324            await self.exec_as_agent(
325                environment,
326                command='export PATH="$HOME/.local/bin:$PATH" && uv tool install ruff',
327            )
328        except Exception as exc:
329            self.logger.warning("uv/ruff installation failed (non-fatal): %s", exc)
330
331    def populate_context_post_run(self, context: AgentContext) -> None:
332        result_data = None
333        for json_file in self.logs_dir.rglob("result.json"):
334            try:
335                result_data = json.loads(json_file.read_text())
336                break
337            except (json.JSONDecodeError, OSError):
338                continue
339
340        if result_data is None:
341            self.logger.warning("Could not find or parse result.json from eval-cli")
342            return
343
344        if result_data.get("input_tokens") is not None:
345            context.n_input_tokens = result_data["input_tokens"]
346        if result_data.get("output_tokens") is not None:
347            context.n_output_tokens = result_data["output_tokens"]
348        if result_data.get("cache_read_input_tokens") is not None:
349            context.n_cache_tokens = result_data["cache_read_input_tokens"]
350
351        context.metadata = {
352            "status": result_data.get("status"),
353            "duration_secs": result_data.get("duration_secs"),
354            "model": result_data.get("model"),
355        }
356
357    def _get_api_env(self) -> dict[str, str]:
358        env: dict[str, str] = {}
359        if not self.model_name or "/" not in self.model_name:
360            return env
361
362        provider = self.model_name.split("/", 1)[0]
363        provider_env_map = {
364            "anthropic": "ANTHROPIC_API_KEY",
365            "openai": "OPENAI_API_KEY",
366            "google": "GEMINI_API_KEY",
367            "gemini": "GEMINI_API_KEY",
368            "deepseek": "DEEPSEEK_API_KEY",
369            "mistral": "MISTRAL_API_KEY",
370        }
371
372        env_var = provider_env_map.get(provider)
373        if env_var:
374            api_key = os.environ.get(env_var, "")
375            if api_key:
376                env[env_var] = api_key
377
378        return env
379
380    @with_prompt_template
381    async def run(
382        self, instruction: str, environment: BaseEnvironment, context: AgentContext
383    ) -> None:
384        escaped_instruction = shlex.quote(instruction)
385        env = self._get_api_env()
386
387        workdir = await self._detect_workdir(environment)
388
389        parts = [
390            "eval-cli",
391            f"--workdir {shlex.quote(workdir)}",
392            "--output-dir /logs/agent",
393        ]
394
395        if self.model_name:
396            parts.append(f"--model {shlex.quote(self.model_name)}")
397
398        timeout = self._extra_env.get("EVAL_CLI_TIMEOUT")
399        if timeout:
400            parts.append(f"--timeout {shlex.quote(timeout)}")
401
402        staff = self._extra_env.get("EVAL_CLI_STAFF")
403        if staff and staff.lower() == "false":
404            parts.append("--no-staff")
405
406        reasoning_effort = self._extra_env.get("EVAL_CLI_REASONING_EFFORT")
407        if reasoning_effort:
408            parts.append(f"--reasoning-effort {shlex.quote(reasoning_effort)}")
409
410        enable_thinking = self._extra_env.get("EVAL_CLI_ENABLE_THINKING")
411        if enable_thinking:
412            if enable_thinking.lower() == "true":
413                parts.append("--enable-thinking")
414            elif enable_thinking.lower() == "false":
415                parts.append("--disable-thinking")
416
417        parts.append(f"--instruction {escaped_instruction}")
418
419        await self.exec_as_agent(
420            environment,
421            command=(
422                " ".join(parts) + " 2>&1 | if command -v stdbuf >/dev/null 2>&1;"
423                " then stdbuf -oL tee /logs/agent/eval-cli.txt;"
424                " else tee /logs/agent/eval-cli.txt; fi"
425            ),
426            env=env,
427        )
428
429        await self.exec_as_agent(
430            environment,
431            command=(
432                "git add -A && "
433                "git diff --cached HEAD > /logs/agent/patch.diff && "
434                'echo "Patch size: $(wc -c < /logs/agent/patch.diff) bytes"'
435            ),
436            cwd=workdir,
437        )