1"""Harbor agent wrapper for Zed's eval-cli binary.
2
3Usage:
4 # Build eval-cli locally first:
5 cargo build --release -p eval_cli
6
7 # Run via Harbor with a local binary:
8 harbor run -d "dataset@version" \
9 --agent-import-path zed_eval.agent:ZedAgent \
10 --ae binary_path=/path/to/target/release/eval-cli \
11 --agent-model anthropic/claude-sonnet-4-6-latest
12
13 # Or with a download URL (for CI):
14 harbor run -d "dataset@version" \
15 --agent-import-path zed_eval.agent:ZedAgent \
16 --ae download_url=https://example.com/eval-cli \
17 --agent-model anthropic/claude-sonnet-4-6-latest
18"""
19
20import json
21import os
22import shlex
23from pathlib import Path
24
25from harbor.agents.installed.base import BaseInstalledAgent, with_prompt_template
26from harbor.environments.base import BaseEnvironment
27from harbor.models.agent.context import AgentContext
28
29
30class ZedAgent(BaseInstalledAgent):
31 """Runs Zed's headless AI agent (eval-cli) to solve tasks.
32
33 The eval-cli binary boots a headless GPUI application and uses the same
34 NativeAgent + AcpThread pipeline as the production Zed editor, driving
35 the full agentic loop (tool calls, subagents, retries) without a GUI.
36 """
37
38 def __init__(
39 self,
40 logs_dir: Path,
41 binary_path: str | None = None,
42 download_url: str | None = None,
43 *args,
44 **kwargs,
45 ):
46 super().__init__(logs_dir, *args, **kwargs)
47 self._binary_path = binary_path
48 self._download_url = download_url or os.environ.get("EVAL_CLI_DOWNLOAD_URL")
49
50 @staticmethod
51 def name() -> str:
52 return "zed"
53
54 async def _detect_workdir(self, environment: BaseEnvironment) -> str:
55 """Detect the working directory inside the container.
56
57 Checks, in order:
58 1. Explicit ``EVAL_CLI_WORKDIR`` extra-env override
59 2. Well-known dirs with a ``.git`` subdirectory (SWE-bench style)
60 3. First git repo found under ``/`` (max depth 3)
61 4. Well-known dirs that exist at all (terminal-bench style)
62 5. The container's default working directory (``pwd``)
63 """
64 override = self._extra_env.get("EVAL_CLI_WORKDIR")
65 if override:
66 return override
67
68 # First: try to find a git repo (SWE-bench, etc.)
69 result = await self.exec_as_agent(
70 environment,
71 command=(
72 "for d in /app /testbed /repo; do "
73 ' if [ -d "$d/.git" ]; then echo "$d"; exit 0; fi; '
74 "done; "
75 "find / -maxdepth 3 -name .git -type d 2>/dev/null "
76 '| head -1 | sed "s|/.git$||"'
77 ),
78 )
79 workdir = (result.stdout or "").strip()
80 if workdir:
81 return workdir
82
83 # Fallback: use the first well-known directory that exists,
84 # even without .git (terminal-bench containers aren't git repos).
85 result = await self.exec_as_agent(
86 environment,
87 command=(
88 "for d in /app /testbed /repo /root /home; do "
89 ' if [ -d "$d" ]; then echo "$d"; exit 0; fi; '
90 "done; "
91 "pwd"
92 ),
93 )
94 workdir = (result.stdout or "").strip()
95 if workdir:
96 return workdir
97
98 raise RuntimeError(
99 "Could not detect a working directory in the container. "
100 "Set EVAL_CLI_WORKDIR explicitly via --ae EVAL_CLI_WORKDIR=/path/to/repo"
101 )
102
103 async def install(self, environment: BaseEnvironment) -> None:
104 # Detect the package manager and install base dependencies.
105 # Supports Debian/Ubuntu (apt-get), Alpine (apk), and
106 # Fedora/RHEL/CentOS (dnf/yum).
107 await self.exec_as_root(
108 environment,
109 command=(
110 "if command -v apt-get >/dev/null 2>&1; then "
111 " apt-get update && "
112 " apt-get install -y --no-install-recommends ca-certificates curl git; "
113 "elif command -v apk >/dev/null 2>&1; then "
114 " apk add --no-cache ca-certificates curl git bash coreutils gcompat libstdc++; "
115 "elif command -v dnf >/dev/null 2>&1; then "
116 " dnf install -y ca-certificates curl git; "
117 "elif command -v yum >/dev/null 2>&1; then "
118 " yum install -y ca-certificates curl git; "
119 "else "
120 " echo 'WARNING: No supported package manager found (apt-get, apk, dnf, yum)' >&2; "
121 "fi"
122 ),
123 env={"DEBIAN_FRONTEND": "noninteractive"},
124 )
125
126 # ── Non-essential tooling ─────────────────────────────────────
127 # Everything below here (Node.js, LSPs, uv/ruff) is nice-to-have.
128 # If any step fails (e.g. musl incompatibility, network issues),
129 # log a warning and continue — the agent can still work without
130 # pre-installed language servers.
131
132 await self._install_node(environment)
133 await self._install_lsps(environment)
134 await self._install_uv_and_ruff(environment)
135
136 if self._binary_path:
137 binary = Path(self._binary_path)
138 if not binary.exists():
139 raise FileNotFoundError(
140 f"eval-cli binary not found at {binary}. "
141 "Build it with: cargo build --release -p eval_cli"
142 )
143 await environment.upload_file(
144 source_path=binary,
145 target_path="/usr/local/bin/eval-cli",
146 )
147 await self.exec_as_root(
148 environment,
149 command="chmod +x /usr/local/bin/eval-cli && eval-cli --help",
150 )
151 return
152
153 if self._download_url:
154 await self.exec_as_root(
155 environment,
156 command=(
157 f"curl -fsSL {shlex.quote(self._download_url)} "
158 "-o /usr/local/bin/eval-cli && "
159 "chmod +x /usr/local/bin/eval-cli && "
160 "eval-cli --help"
161 ),
162 )
163 return
164
165 raise ValueError(
166 "No eval-cli binary provided. "
167 "Either pass binary_path=/path/to/target/release/eval-cli "
168 "or set download_url=/EVAL_CLI_DOWNLOAD_URL."
169 )
170
171 async def _install_node(self, environment: BaseEnvironment) -> None:
172 """Install Node.js from official binary tarballs.
173
174 Uses the musl build on Alpine and the glibc build elsewhere.
175 Skips if node is already on PATH.
176 """
177 try:
178 await self.exec_as_root(
179 environment,
180 command=(
181 "if command -v node >/dev/null 2>&1; then "
182 ' echo "Node.js already available: $(node --version)"; '
183 "else "
184 " NODE_VER=v22.14.0; "
185 " ARCH=$(uname -m); "
186 ' case "$ARCH" in '
187 " x86_64) NODE_ARCH=x64 ;; "
188 " aarch64) NODE_ARCH=arm64 ;; "
189 ' *) echo "WARNING: unsupported arch $ARCH for Node.js" >&2; exit 0 ;; '
190 " esac; "
191 " if ldd /bin/sh 2>&1 | grep -qi musl; then "
192 ' NODE_URL="https://unofficial-builds.nodejs.org/download/release/${NODE_VER}/node-${NODE_VER}-linux-${NODE_ARCH}-musl.tar.gz"; '
193 " else "
194 ' NODE_URL="https://nodejs.org/dist/${NODE_VER}/node-${NODE_VER}-linux-${NODE_ARCH}.tar.gz"; '
195 " fi; "
196 ' echo "Downloading Node.js from $NODE_URL"; '
197 ' curl -fsSL "$NODE_URL" | tar -xz -C /usr/local --strip-components=1; '
198 ' echo "Installed Node.js $(node --version)"; '
199 "fi"
200 ),
201 )
202 except Exception as exc:
203 self.logger.warning("Node.js installation failed (non-fatal): %s", exc)
204
205 async def _install_lsps(self, environment: BaseEnvironment) -> None:
206 """Pre-install language servers so Zed doesn't download them at runtime.
207
208 Each LSP is installed independently so one failure doesn't block the rest.
209 """
210 # npm-based LSPs — skip all if npm is not available.
211 try:
212 await self.exec_as_agent(
213 environment,
214 command="command -v npm >/dev/null 2>&1",
215 )
216 except Exception:
217 self.logger.warning("npm not available — skipping npm-based LSP installs")
218 return
219
220 lsp_installs = [
221 (
222 "basedpyright",
223 'DIR="$ZED_DATA_DIR/languages/basedpyright"; '
224 'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact basedpyright',
225 ),
226 (
227 "typescript-language-server",
228 'DIR="$ZED_DATA_DIR/languages/typescript-language-server"; '
229 'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact typescript typescript-language-server',
230 ),
231 (
232 "vtsls",
233 'DIR="$ZED_DATA_DIR/languages/vtsls"; '
234 'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact @vtsls/language-server typescript',
235 ),
236 (
237 "tailwindcss-language-server",
238 'DIR="$ZED_DATA_DIR/languages/tailwindcss-language-server"; '
239 'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact @tailwindcss/language-server',
240 ),
241 ]
242
243 for name, cmd in lsp_installs:
244 try:
245 await self.exec_as_agent(
246 environment,
247 command=(
248 'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
249 + cmd
250 ),
251 )
252 except Exception as exc:
253 self.logger.warning(
254 "LSP install '%s' failed (non-fatal): %s", name, exc
255 )
256
257 # eslint — downloaded from GitHub and compiled separately.
258 try:
259 await self.exec_as_agent(
260 environment,
261 command=(
262 "set -euo pipefail; "
263 'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
264 'ESLINT_DIR="$ZED_DATA_DIR/languages/eslint/vscode-eslint-2.4.4"; '
265 'mkdir -p "$ESLINT_DIR"; '
266 'curl -fsSL "https://github.com/zed-industries/vscode-eslint/archive/refs/tags/release/2.4.4.tar.gz" '
267 '| tar -xz -C "$ESLINT_DIR"; '
268 'mv "$ESLINT_DIR"/vscode-eslint-release-2.4.4 "$ESLINT_DIR/vscode-eslint"; '
269 'cd "$ESLINT_DIR/vscode-eslint" && npm install && npm run compile'
270 ),
271 )
272 except Exception as exc:
273 self.logger.warning("eslint LSP install failed (non-fatal): %s", exc)
274
275 # gopls — only when Go is present. Guarded by a 120s timeout so slow
276 # compilation can never eat the full setup budget.
277 gopls_script = (
278 "if command -v go >/dev/null 2>&1; then "
279 "if go install golang.org/x/tools/gopls@latest 2>/dev/null; then "
280 "echo 'Installed gopls@latest'; "
281 "else "
282 ' MY_GO=$(go env GOVERSION | sed "s/^go//"); '
283 " for v in $(curl -fsSL "
284 "https://proxy.golang.org/golang.org/x/tools/gopls/@v/list 2>/dev/null"
285 " | grep -E '^v[0-9]+\\.[0-9]+\\.[0-9]+$' | sort -rV | head -5); do "
286 " NEED=$(curl -fsSL "
287 '"https://proxy.golang.org/golang.org/x/tools/gopls/@v/${v}.mod"'
288 " 2>/dev/null | awk '/^go /{print $2; exit}'); "
289 ' if [ -n "$NEED" ] '
290 ' && [ "$(printf \'%s\\n%s\\n\' "$NEED" "$MY_GO" '
291 ' | sort -V | head -1)" = "$NEED" ]; then '
292 ' echo "Installing gopls $v (compatible with Go $MY_GO)"; '
293 ' go install "golang.org/x/tools/gopls@$v" && break; '
294 " fi; "
295 " done; "
296 "fi; "
297 "fi"
298 )
299 try:
300 await self.exec_as_agent(
301 environment,
302 command=(
303 "timeout 120 bash -c "
304 + shlex.quote(gopls_script)
305 + " || echo 'WARNING: gopls installation timed out or failed -- skipping'"
306 ),
307 )
308 except Exception as exc:
309 self.logger.warning("gopls install failed (non-fatal): %s", exc)
310
311 async def _install_uv_and_ruff(self, environment: BaseEnvironment) -> None:
312 """Install uv and ruff for Python tooling."""
313 try:
314 await self.exec_as_agent(
315 environment,
316 command=(
317 "curl -LsSf https://astral.sh/uv/install.sh | sh && "
318 '. "$HOME/.local/bin/env"'
319 ),
320 )
321
322 agent_home_result = await self.exec_as_agent(
323 environment,
324 command='printf %s "$HOME"',
325 )
326 agent_home = agent_home_result.stdout.strip()
327 if not agent_home:
328 self.logger.warning(
329 "Could not determine agent home directory — skipping uv symlinks"
330 )
331 return
332
333 await self.exec_as_root(
334 environment,
335 command=(
336 f"ln -sf {shlex.quote(agent_home + '/.local/bin/uv')} /usr/local/bin/uv && "
337 f"ln -sf {shlex.quote(agent_home + '/.local/bin/uvx')} /usr/local/bin/uvx"
338 ),
339 )
340
341 await self.exec_as_agent(
342 environment,
343 command='export PATH="$HOME/.local/bin:$PATH" && uv tool install ruff',
344 )
345 except Exception as exc:
346 self.logger.warning("uv/ruff installation failed (non-fatal): %s", exc)
347
348 def populate_context_post_run(self, context: AgentContext) -> None:
349 result_data = None
350 for json_file in self.logs_dir.rglob("result.json"):
351 try:
352 result_data = json.loads(json_file.read_text())
353 break
354 except (json.JSONDecodeError, OSError):
355 continue
356
357 if result_data is None:
358 self.logger.warning("Could not find or parse result.json from eval-cli")
359 return
360
361 if result_data.get("input_tokens") is not None:
362 context.n_input_tokens = result_data["input_tokens"]
363 if result_data.get("output_tokens") is not None:
364 context.n_output_tokens = result_data["output_tokens"]
365 if result_data.get("cache_read_input_tokens") is not None:
366 context.n_cache_tokens = result_data["cache_read_input_tokens"]
367
368 context.metadata = {
369 "status": result_data.get("status"),
370 "duration_secs": result_data.get("duration_secs"),
371 "model": result_data.get("model"),
372 }
373
374 def _get_api_env(self) -> dict[str, str]:
375 env: dict[str, str] = {}
376 if not self.model_name or "/" not in self.model_name:
377 return env
378
379 provider = self.model_name.split("/", 1)[0]
380 provider_env_map = {
381 "anthropic": "ANTHROPIC_API_KEY",
382 "openai": "OPENAI_API_KEY",
383 "google": "GEMINI_API_KEY",
384 "gemini": "GEMINI_API_KEY",
385 "deepseek": "DEEPSEEK_API_KEY",
386 "mistral": "MISTRAL_API_KEY",
387 }
388
389 env_var = provider_env_map.get(provider)
390 if env_var:
391 api_key = os.environ.get(env_var, "")
392 if api_key:
393 env[env_var] = api_key
394
395 return env
396
397 @with_prompt_template
398 async def run(
399 self, instruction: str, environment: BaseEnvironment, context: AgentContext
400 ) -> None:
401 escaped_instruction = shlex.quote(instruction)
402 env = self._get_api_env()
403
404 workdir = await self._detect_workdir(environment)
405
406 parts = [
407 "eval-cli",
408 f"--workdir {shlex.quote(workdir)}",
409 "--output-dir /logs/agent",
410 ]
411
412 if self.model_name:
413 parts.append(f"--model {shlex.quote(self.model_name)}")
414
415 timeout = self._extra_env.get("EVAL_CLI_TIMEOUT")
416 if timeout:
417 parts.append(f"--timeout {shlex.quote(timeout)}")
418
419 staff = self._extra_env.get("EVAL_CLI_STAFF")
420 if staff and staff.lower() == "false":
421 parts.append("--no-staff")
422
423 reasoning_effort = self._extra_env.get("EVAL_CLI_REASONING_EFFORT")
424 if reasoning_effort:
425 parts.append(f"--reasoning-effort {shlex.quote(reasoning_effort)}")
426
427 enable_thinking = self._extra_env.get("EVAL_CLI_ENABLE_THINKING")
428 if enable_thinking:
429 if enable_thinking.lower() == "true":
430 parts.append("--enable-thinking")
431 elif enable_thinking.lower() == "false":
432 parts.append("--disable-thinking")
433
434 parts.append(f"--instruction {escaped_instruction}")
435
436 await self.exec_as_agent(
437 environment,
438 command=(
439 " ".join(parts) + " 2>&1 | if command -v stdbuf >/dev/null 2>&1;"
440 " then stdbuf -oL tee /logs/agent/eval-cli.txt;"
441 " else tee /logs/agent/eval-cli.txt; fi"
442 ),
443 env=env,
444 )
445
446 # Only generate a patch if the workdir is a git repo
447 # (SWE-bench style). Terminal-bench containers aren't git repos.
448 await self.exec_as_agent(
449 environment,
450 command=(
451 'if [ -d ".git" ]; then '
452 "git add -A && "
453 "git diff --cached HEAD > /logs/agent/patch.diff && "
454 'echo "Patch size: $(wc -c < /logs/agent/patch.diff) bytes"; '
455 "else "
456 'echo "No git repo found, skipping patch generation"; '
457 "fi"
458 ),
459 cwd=workdir,
460 )