agent.py

  1"""Harbor agent wrapper for Zed's eval-cli binary.
  2
  3Usage:
  4    # Build eval-cli locally first:
  5    cargo build --release -p eval_cli
  6
  7    # Run via Harbor with a local binary:
  8    harbor run -d "dataset@version" \
  9        --agent-import-path zed_eval.agent:ZedAgent \
 10        --ae binary_path=/path/to/target/release/eval-cli \
 11        --agent-model anthropic/claude-sonnet-4-6-latest
 12
 13    # Or with a download URL (for CI):
 14    harbor run -d "dataset@version" \
 15        --agent-import-path zed_eval.agent:ZedAgent \
 16        --ae download_url=https://example.com/eval-cli \
 17        --agent-model anthropic/claude-sonnet-4-6-latest
 18"""
 19
 20import json
 21import os
 22import shlex
 23from pathlib import Path
 24
 25from harbor.agents.installed.base import BaseInstalledAgent, with_prompt_template
 26from harbor.environments.base import BaseEnvironment
 27from harbor.models.agent.context import AgentContext
 28
 29
 30class ZedAgent(BaseInstalledAgent):
 31    """Runs Zed's headless AI agent (eval-cli) to solve tasks.
 32
 33    The eval-cli binary boots a headless GPUI application and uses the same
 34    NativeAgent + AcpThread pipeline as the production Zed editor, driving
 35    the full agentic loop (tool calls, subagents, retries) without a GUI.
 36    """
 37
 38    def __init__(
 39        self,
 40        logs_dir: Path,
 41        binary_path: str | None = None,
 42        download_url: str | None = None,
 43        *args,
 44        **kwargs,
 45    ):
 46        super().__init__(logs_dir, *args, **kwargs)
 47        self._binary_path = binary_path
 48        self._download_url = download_url or os.environ.get("EVAL_CLI_DOWNLOAD_URL")
 49
 50    @staticmethod
 51    def name() -> str:
 52        return "zed"
 53
 54    async def _detect_workdir(self, environment: BaseEnvironment) -> str:
 55        """Detect the repo working directory inside the container.
 56
 57        Checks, in order:
 58          1. Explicit ``EVAL_CLI_WORKDIR`` extra-env override
 59          2. ``/app``      (SWE-bench Pro)
 60          3. ``/testbed``  (SWE-bench Verified)
 61          4. ``/repo``
 62          5. First git repo found under ``/`` (max depth 3)
 63        """
 64        override = self._extra_env.get("EVAL_CLI_WORKDIR")
 65        if override:
 66            return override
 67
 68        result = await self.exec_as_agent(
 69            environment,
 70            command=(
 71                "for d in /app /testbed /repo; do "
 72                '  if [ -d "$d/.git" ]; then echo "$d"; exit 0; fi; '
 73                "done; "
 74                "find / -maxdepth 3 -name .git -type d 2>/dev/null "
 75                '| head -1 | sed "s|/.git$||"'
 76            ),
 77        )
 78        workdir = result.stdout.strip()
 79        if not workdir:
 80            raise RuntimeError(
 81                "Could not find a git repository in the container. "
 82                "Set EVAL_CLI_WORKDIR explicitly via --ae EVAL_CLI_WORKDIR=/path/to/repo"
 83            )
 84        return workdir
 85
 86    async def install(self, environment: BaseEnvironment) -> None:
 87        await self.exec_as_root(
 88            environment,
 89            command=(
 90                "apt-get update && "
 91                "apt-get install -y --no-install-recommends "
 92                "ca-certificates "
 93                "curl "
 94                "git"
 95            ),
 96            env={"DEBIAN_FRONTEND": "noninteractive"},
 97        )
 98
 99        await self.exec_as_root(
100            environment,
101            command=(
102                "curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && "
103                "apt-get install -y --no-install-recommends nodejs"
104            ),
105            env={"DEBIAN_FRONTEND": "noninteractive"},
106        )
107
108        # Pre-install default LSPs so Zed doesn't have to download them at
109        # runtime.  Each gets its own subdirectory under $ZED_DATA_DIR/languages.
110        await self.exec_as_agent(
111            environment,
112            command=(
113                "set -euo pipefail; "
114                'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
115                # basedpyright (Python - default type checker)
116                'BASEDPYRIGHT_DIR="$ZED_DATA_DIR/languages/basedpyright"; '
117                'mkdir -p "$BASEDPYRIGHT_DIR"; '
118                'npm install --prefix "$BASEDPYRIGHT_DIR" --save-exact basedpyright; '
119                # typescript-language-server (TypeScript/JS - default LSP)
120                'TSSERVER_DIR="$ZED_DATA_DIR/languages/typescript-language-server"; '
121                'mkdir -p "$TSSERVER_DIR"; '
122                'npm install --prefix "$TSSERVER_DIR" --save-exact typescript typescript-language-server; '
123                # vtsls (VS Code TypeScript language features)
124                'VTSLS_DIR="$ZED_DATA_DIR/languages/vtsls"; '
125                'mkdir -p "$VTSLS_DIR"; '
126                'npm install --prefix "$VTSLS_DIR" --save-exact @vtsls/language-server typescript; '
127                # tailwindcss-language-server
128                'TAILWIND_DIR="$ZED_DATA_DIR/languages/tailwindcss-language-server"; '
129                'mkdir -p "$TAILWIND_DIR"; '
130                'npm install --prefix "$TAILWIND_DIR" --save-exact @tailwindcss/language-server'
131            ),
132        )
133
134        # eslint LSP (downloaded from zed-industries/vscode-eslint GitHub release,
135        # then compiled — this mirrors what Zed does at runtime).
136        await self.exec_as_agent(
137            environment,
138            command=(
139                "set -euo pipefail; "
140                'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
141                'ESLINT_DIR="$ZED_DATA_DIR/languages/eslint/vscode-eslint-2.4.4"; '
142                'mkdir -p "$ESLINT_DIR"; '
143                'curl -fsSL "https://github.com/zed-industries/vscode-eslint/archive/refs/tags/release/2.4.4.tar.gz" '
144                '| tar -xz -C "$ESLINT_DIR"; '
145                'mv "$ESLINT_DIR"/vscode-eslint-release-2.4.4 "$ESLINT_DIR/vscode-eslint"; '
146                'cd "$ESLINT_DIR/vscode-eslint" && npm install && npm run compile'
147            ),
148        )
149
150        # gopls (Go - default LSP).  Only install when Go is present in the
151        # container (i.e. Go-related SWE-bench tasks).
152        await self.exec_as_agent(
153            environment,
154            command=(
155                "if command -v go >/dev/null 2>&1; then "
156                "go install golang.org/x/tools/gopls@latest; "
157                "fi"
158            ),
159        )
160
161        await self.exec_as_agent(
162            environment,
163            command=(
164                "curl -LsSf https://astral.sh/uv/install.sh | sh && "
165                '. "$HOME/.local/bin/env"'
166            ),
167        )
168
169        agent_home_result = await self.exec_as_agent(
170            environment,
171            command='printf %s "$HOME"',
172        )
173        agent_home = agent_home_result.stdout.strip()
174        if not agent_home:
175            raise RuntimeError("Could not determine agent home directory")
176
177        await self.exec_as_root(
178            environment,
179            command=(
180                f"ln -sf {shlex.quote(agent_home + '/.local/bin/uv')} /usr/local/bin/uv && "
181                f"ln -sf {shlex.quote(agent_home + '/.local/bin/uvx')} /usr/local/bin/uvx"
182            ),
183        )
184
185        # Install a modern ruff so `ruff server` works without --preview.
186        # This also makes it available as a CLI tool for the agent.
187        await self.exec_as_agent(
188            environment,
189            command=('export PATH="$HOME/.local/bin:$PATH" && uv tool install ruff'),
190        )
191
192        if self._binary_path:
193            binary = Path(self._binary_path)
194            if not binary.exists():
195                raise FileNotFoundError(
196                    f"eval-cli binary not found at {binary}. "
197                    "Build it with: cargo build --release -p eval_cli"
198                )
199            await environment.upload_file(
200                source_path=binary,
201                target_path="/usr/local/bin/eval-cli",
202            )
203            await self.exec_as_root(
204                environment,
205                command="chmod +x /usr/local/bin/eval-cli && eval-cli --help",
206            )
207            return
208
209        if self._download_url:
210            await self.exec_as_root(
211                environment,
212                command=(
213                    f"curl -fsSL {shlex.quote(self._download_url)} "
214                    "-o /usr/local/bin/eval-cli && "
215                    "chmod +x /usr/local/bin/eval-cli && "
216                    "eval-cli --help"
217                ),
218            )
219            return
220
221        raise ValueError(
222            "No eval-cli binary provided. "
223            "Either pass binary_path=/path/to/target/release/eval-cli "
224            "or set download_url=/EVAL_CLI_DOWNLOAD_URL."
225        )
226
227    def populate_context_post_run(self, context: AgentContext) -> None:
228        result_data = None
229        for json_file in self.logs_dir.rglob("result.json"):
230            try:
231                result_data = json.loads(json_file.read_text())
232                break
233            except (json.JSONDecodeError, OSError):
234                continue
235
236        if result_data is None:
237            self.logger.warning("Could not find or parse result.json from eval-cli")
238            return
239
240        if result_data.get("input_tokens") is not None:
241            context.n_input_tokens = result_data["input_tokens"]
242        if result_data.get("output_tokens") is not None:
243            context.n_output_tokens = result_data["output_tokens"]
244        if result_data.get("cache_read_input_tokens") is not None:
245            context.n_cache_tokens = result_data["cache_read_input_tokens"]
246
247        context.metadata = {
248            "status": result_data.get("status"),
249            "duration_secs": result_data.get("duration_secs"),
250            "model": result_data.get("model"),
251        }
252
253    def _get_api_env(self) -> dict[str, str]:
254        env: dict[str, str] = {}
255        if not self.model_name or "/" not in self.model_name:
256            return env
257
258        provider = self.model_name.split("/", 1)[0]
259        provider_env_map = {
260            "anthropic": "ANTHROPIC_API_KEY",
261            "openai": "OPENAI_API_KEY",
262            "google": "GEMINI_API_KEY",
263            "gemini": "GEMINI_API_KEY",
264            "deepseek": "DEEPSEEK_API_KEY",
265            "mistral": "MISTRAL_API_KEY",
266        }
267
268        env_var = provider_env_map.get(provider)
269        if env_var:
270            api_key = os.environ.get(env_var, "")
271            if api_key:
272                env[env_var] = api_key
273
274        return env
275
276    @with_prompt_template
277    async def run(
278        self, instruction: str, environment: BaseEnvironment, context: AgentContext
279    ) -> None:
280        escaped_instruction = shlex.quote(instruction)
281        env = self._get_api_env()
282
283        workdir = await self._detect_workdir(environment)
284
285        parts = [
286            "eval-cli",
287            f"--workdir {shlex.quote(workdir)}",
288            "--output-dir /logs/agent",
289        ]
290
291        if self.model_name:
292            parts.append(f"--model {shlex.quote(self.model_name)}")
293
294        timeout = self._extra_env.get("EVAL_CLI_TIMEOUT")
295        if timeout:
296            parts.append(f"--timeout {shlex.quote(timeout)}")
297
298        staff = self._extra_env.get("EVAL_CLI_STAFF")
299        if staff and staff.lower() == "false":
300            parts.append("--no-staff")
301
302        reasoning_effort = self._extra_env.get("EVAL_CLI_REASONING_EFFORT")
303        if reasoning_effort:
304            parts.append(f"--reasoning-effort {shlex.quote(reasoning_effort)}")
305
306        enable_thinking = self._extra_env.get("EVAL_CLI_ENABLE_THINKING")
307        if enable_thinking:
308            if enable_thinking.lower() == "true":
309                parts.append("--enable-thinking")
310            elif enable_thinking.lower() == "false":
311                parts.append("--disable-thinking")
312
313        parts.append(f"--instruction {escaped_instruction}")
314
315        await self.exec_as_agent(
316            environment,
317            command=(
318                " ".join(parts) + " 2>&1 | stdbuf -oL tee /logs/agent/eval-cli.txt"
319            ),
320            env=env,
321        )
322
323        await self.exec_as_agent(
324            environment,
325            command=(
326                "git add -A && "
327                "git diff --cached HEAD > /logs/agent/patch.diff && "
328                'echo "Patch size: $(wc -c < /logs/agent/patch.diff) bytes"'
329            ),
330            cwd=workdir,
331        )