1"""Harbor agent wrapper for Zed's eval-cli binary.
2
3Usage:
4 # Build eval-cli locally first:
5 cargo build --release -p eval_cli
6
7 # Run via Harbor with a local binary:
8 harbor run -d "dataset@version" \
9 --agent-import-path zed_eval.agent:ZedAgent \
10 --ae binary_path=/path/to/target/release/eval-cli \
11 --agent-model anthropic/claude-sonnet-4-6-latest
12
13 # Or with a download URL (for CI):
14 harbor run -d "dataset@version" \
15 --agent-import-path zed_eval.agent:ZedAgent \
16 --ae download_url=https://example.com/eval-cli \
17 --agent-model anthropic/claude-sonnet-4-6-latest
18"""
19
20import json
21import os
22import shlex
23from pathlib import Path
24
25from harbor.agents.installed.base import BaseInstalledAgent, with_prompt_template
26from harbor.environments.base import BaseEnvironment
27from harbor.models.agent.context import AgentContext
28
29
30class ZedAgent(BaseInstalledAgent):
31 """Runs Zed's headless AI agent (eval-cli) to solve tasks.
32
33 The eval-cli binary boots a headless GPUI application and uses the same
34 NativeAgent + AcpThread pipeline as the production Zed editor, driving
35 the full agentic loop (tool calls, subagents, retries) without a GUI.
36 """
37
38 def __init__(
39 self,
40 logs_dir: Path,
41 binary_path: str | None = None,
42 download_url: str | None = None,
43 *args,
44 **kwargs,
45 ):
46 super().__init__(logs_dir, *args, **kwargs)
47 self._binary_path = binary_path
48 self._download_url = download_url or os.environ.get("EVAL_CLI_DOWNLOAD_URL")
49
50 @staticmethod
51 def name() -> str:
52 return "zed"
53
54 async def _detect_workdir(self, environment: BaseEnvironment) -> str:
55 """Detect the repo working directory inside the container.
56
57 Checks, in order:
58 1. Explicit ``EVAL_CLI_WORKDIR`` extra-env override
59 2. ``/app`` (SWE-bench Pro)
60 3. ``/testbed`` (SWE-bench Verified)
61 4. ``/repo``
62 5. First git repo found under ``/`` (max depth 3)
63 """
64 override = self._extra_env.get("EVAL_CLI_WORKDIR")
65 if override:
66 return override
67
68 result = await self.exec_as_agent(
69 environment,
70 command=(
71 "for d in /app /testbed /repo; do "
72 ' if [ -d "$d/.git" ]; then echo "$d"; exit 0; fi; '
73 "done; "
74 "find / -maxdepth 3 -name .git -type d 2>/dev/null "
75 '| head -1 | sed "s|/.git$||"'
76 ),
77 )
78 workdir = result.stdout.strip()
79 if not workdir:
80 raise RuntimeError(
81 "Could not find a git repository in the container. "
82 "Set EVAL_CLI_WORKDIR explicitly via --ae EVAL_CLI_WORKDIR=/path/to/repo"
83 )
84 return workdir
85
86 async def install(self, environment: BaseEnvironment) -> None:
87 # Detect the package manager and install base dependencies.
88 # Supports Debian/Ubuntu (apt-get), Alpine (apk), and
89 # Fedora/RHEL/CentOS (dnf/yum).
90 await self.exec_as_root(
91 environment,
92 command=(
93 "if command -v apt-get >/dev/null 2>&1; then "
94 " apt-get update && "
95 " apt-get install -y --no-install-recommends ca-certificates curl git; "
96 "elif command -v apk >/dev/null 2>&1; then "
97 " apk add --no-cache ca-certificates curl git bash coreutils gcompat libstdc++; "
98 "elif command -v dnf >/dev/null 2>&1; then "
99 " dnf install -y ca-certificates curl git; "
100 "elif command -v yum >/dev/null 2>&1; then "
101 " yum install -y ca-certificates curl git; "
102 "else "
103 " echo 'WARNING: No supported package manager found (apt-get, apk, dnf, yum)' >&2; "
104 "fi"
105 ),
106 env={"DEBIAN_FRONTEND": "noninteractive"},
107 )
108
109 # ── Non-essential tooling ─────────────────────────────────────
110 # Everything below here (Node.js, LSPs, uv/ruff) is nice-to-have.
111 # If any step fails (e.g. musl incompatibility, network issues),
112 # log a warning and continue — the agent can still work without
113 # pre-installed language servers.
114
115 await self._install_node(environment)
116 await self._install_lsps(environment)
117 await self._install_uv_and_ruff(environment)
118
119 if self._binary_path:
120 binary = Path(self._binary_path)
121 if not binary.exists():
122 raise FileNotFoundError(
123 f"eval-cli binary not found at {binary}. "
124 "Build it with: cargo build --release -p eval_cli"
125 )
126 await environment.upload_file(
127 source_path=binary,
128 target_path="/usr/local/bin/eval-cli",
129 )
130 await self.exec_as_root(
131 environment,
132 command="chmod +x /usr/local/bin/eval-cli && eval-cli --help",
133 )
134 return
135
136 if self._download_url:
137 await self.exec_as_root(
138 environment,
139 command=(
140 f"curl -fsSL {shlex.quote(self._download_url)} "
141 "-o /usr/local/bin/eval-cli && "
142 "chmod +x /usr/local/bin/eval-cli && "
143 "eval-cli --help"
144 ),
145 )
146 return
147
148 raise ValueError(
149 "No eval-cli binary provided. "
150 "Either pass binary_path=/path/to/target/release/eval-cli "
151 "or set download_url=/EVAL_CLI_DOWNLOAD_URL."
152 )
153
154 async def _install_node(self, environment: BaseEnvironment) -> None:
155 """Install Node.js from official binary tarballs.
156
157 Uses the musl build on Alpine and the glibc build elsewhere.
158 Skips if node is already on PATH.
159 """
160 try:
161 await self.exec_as_root(
162 environment,
163 command=(
164 "if command -v node >/dev/null 2>&1; then "
165 ' echo "Node.js already available: $(node --version)"; '
166 "else "
167 " NODE_VER=v22.14.0; "
168 " ARCH=$(uname -m); "
169 ' case "$ARCH" in '
170 " x86_64) NODE_ARCH=x64 ;; "
171 " aarch64) NODE_ARCH=arm64 ;; "
172 ' *) echo "WARNING: unsupported arch $ARCH for Node.js" >&2; exit 0 ;; '
173 " esac; "
174 " if ldd /bin/sh 2>&1 | grep -qi musl; then "
175 ' NODE_URL="https://unofficial-builds.nodejs.org/download/release/${NODE_VER}/node-${NODE_VER}-linux-${NODE_ARCH}-musl.tar.gz"; '
176 " else "
177 ' NODE_URL="https://nodejs.org/dist/${NODE_VER}/node-${NODE_VER}-linux-${NODE_ARCH}.tar.gz"; '
178 " fi; "
179 ' echo "Downloading Node.js from $NODE_URL"; '
180 ' curl -fsSL "$NODE_URL" | tar -xz -C /usr/local --strip-components=1; '
181 ' echo "Installed Node.js $(node --version)"; '
182 "fi"
183 ),
184 )
185 except Exception as exc:
186 self.logger.warning("Node.js installation failed (non-fatal): %s", exc)
187
188 async def _install_lsps(self, environment: BaseEnvironment) -> None:
189 """Pre-install language servers so Zed doesn't download them at runtime.
190
191 Each LSP is installed independently so one failure doesn't block the rest.
192 """
193 # npm-based LSPs — skip all if npm is not available.
194 try:
195 await self.exec_as_agent(
196 environment,
197 command="command -v npm >/dev/null 2>&1",
198 )
199 except Exception:
200 self.logger.warning("npm not available — skipping npm-based LSP installs")
201 return
202
203 lsp_installs = [
204 (
205 "basedpyright",
206 'DIR="$ZED_DATA_DIR/languages/basedpyright"; '
207 'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact basedpyright',
208 ),
209 (
210 "typescript-language-server",
211 'DIR="$ZED_DATA_DIR/languages/typescript-language-server"; '
212 'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact typescript typescript-language-server',
213 ),
214 (
215 "vtsls",
216 'DIR="$ZED_DATA_DIR/languages/vtsls"; '
217 'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact @vtsls/language-server typescript',
218 ),
219 (
220 "tailwindcss-language-server",
221 'DIR="$ZED_DATA_DIR/languages/tailwindcss-language-server"; '
222 'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact @tailwindcss/language-server',
223 ),
224 ]
225
226 for name, cmd in lsp_installs:
227 try:
228 await self.exec_as_agent(
229 environment,
230 command=(
231 'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
232 + cmd
233 ),
234 )
235 except Exception as exc:
236 self.logger.warning(
237 "LSP install '%s' failed (non-fatal): %s", name, exc
238 )
239
240 # eslint — downloaded from GitHub and compiled separately.
241 try:
242 await self.exec_as_agent(
243 environment,
244 command=(
245 "set -euo pipefail; "
246 'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
247 'ESLINT_DIR="$ZED_DATA_DIR/languages/eslint/vscode-eslint-2.4.4"; '
248 'mkdir -p "$ESLINT_DIR"; '
249 'curl -fsSL "https://github.com/zed-industries/vscode-eslint/archive/refs/tags/release/2.4.4.tar.gz" '
250 '| tar -xz -C "$ESLINT_DIR"; '
251 'mv "$ESLINT_DIR"/vscode-eslint-release-2.4.4 "$ESLINT_DIR/vscode-eslint"; '
252 'cd "$ESLINT_DIR/vscode-eslint" && npm install && npm run compile'
253 ),
254 )
255 except Exception as exc:
256 self.logger.warning("eslint LSP install failed (non-fatal): %s", exc)
257
258 # gopls — only when Go is present. Guarded by a 120s timeout so slow
259 # compilation can never eat the full setup budget.
260 gopls_script = (
261 "if command -v go >/dev/null 2>&1; then "
262 "if go install golang.org/x/tools/gopls@latest 2>/dev/null; then "
263 "echo 'Installed gopls@latest'; "
264 "else "
265 ' MY_GO=$(go env GOVERSION | sed "s/^go//"); '
266 " for v in $(curl -fsSL "
267 "https://proxy.golang.org/golang.org/x/tools/gopls/@v/list 2>/dev/null"
268 " | grep -E '^v[0-9]+\\.[0-9]+\\.[0-9]+$' | sort -rV | head -5); do "
269 " NEED=$(curl -fsSL "
270 '"https://proxy.golang.org/golang.org/x/tools/gopls/@v/${v}.mod"'
271 " 2>/dev/null | awk '/^go /{print $2; exit}'); "
272 ' if [ -n "$NEED" ] '
273 ' && [ "$(printf \'%s\\n%s\\n\' "$NEED" "$MY_GO" '
274 ' | sort -V | head -1)" = "$NEED" ]; then '
275 ' echo "Installing gopls $v (compatible with Go $MY_GO)"; '
276 ' go install "golang.org/x/tools/gopls@$v" && break; '
277 " fi; "
278 " done; "
279 "fi; "
280 "fi"
281 )
282 try:
283 await self.exec_as_agent(
284 environment,
285 command=(
286 "timeout 120 bash -c "
287 + shlex.quote(gopls_script)
288 + " || echo 'WARNING: gopls installation timed out or failed -- skipping'"
289 ),
290 )
291 except Exception as exc:
292 self.logger.warning("gopls install failed (non-fatal): %s", exc)
293
294 async def _install_uv_and_ruff(self, environment: BaseEnvironment) -> None:
295 """Install uv and ruff for Python tooling."""
296 try:
297 await self.exec_as_agent(
298 environment,
299 command=(
300 "curl -LsSf https://astral.sh/uv/install.sh | sh && "
301 '. "$HOME/.local/bin/env"'
302 ),
303 )
304
305 agent_home_result = await self.exec_as_agent(
306 environment,
307 command='printf %s "$HOME"',
308 )
309 agent_home = agent_home_result.stdout.strip()
310 if not agent_home:
311 self.logger.warning(
312 "Could not determine agent home directory — skipping uv symlinks"
313 )
314 return
315
316 await self.exec_as_root(
317 environment,
318 command=(
319 f"ln -sf {shlex.quote(agent_home + '/.local/bin/uv')} /usr/local/bin/uv && "
320 f"ln -sf {shlex.quote(agent_home + '/.local/bin/uvx')} /usr/local/bin/uvx"
321 ),
322 )
323
324 await self.exec_as_agent(
325 environment,
326 command='export PATH="$HOME/.local/bin:$PATH" && uv tool install ruff',
327 )
328 except Exception as exc:
329 self.logger.warning("uv/ruff installation failed (non-fatal): %s", exc)
330
331 def populate_context_post_run(self, context: AgentContext) -> None:
332 result_data = None
333 for json_file in self.logs_dir.rglob("result.json"):
334 try:
335 result_data = json.loads(json_file.read_text())
336 break
337 except (json.JSONDecodeError, OSError):
338 continue
339
340 if result_data is None:
341 self.logger.warning("Could not find or parse result.json from eval-cli")
342 return
343
344 if result_data.get("input_tokens") is not None:
345 context.n_input_tokens = result_data["input_tokens"]
346 if result_data.get("output_tokens") is not None:
347 context.n_output_tokens = result_data["output_tokens"]
348 if result_data.get("cache_read_input_tokens") is not None:
349 context.n_cache_tokens = result_data["cache_read_input_tokens"]
350
351 context.metadata = {
352 "status": result_data.get("status"),
353 "duration_secs": result_data.get("duration_secs"),
354 "model": result_data.get("model"),
355 }
356
357 def _get_api_env(self) -> dict[str, str]:
358 env: dict[str, str] = {}
359 if not self.model_name or "/" not in self.model_name:
360 return env
361
362 provider = self.model_name.split("/", 1)[0]
363 provider_env_map = {
364 "anthropic": "ANTHROPIC_API_KEY",
365 "openai": "OPENAI_API_KEY",
366 "google": "GEMINI_API_KEY",
367 "gemini": "GEMINI_API_KEY",
368 "deepseek": "DEEPSEEK_API_KEY",
369 "mistral": "MISTRAL_API_KEY",
370 }
371
372 env_var = provider_env_map.get(provider)
373 if env_var:
374 api_key = os.environ.get(env_var, "")
375 if api_key:
376 env[env_var] = api_key
377
378 return env
379
380 @with_prompt_template
381 async def run(
382 self, instruction: str, environment: BaseEnvironment, context: AgentContext
383 ) -> None:
384 escaped_instruction = shlex.quote(instruction)
385 env = self._get_api_env()
386
387 workdir = await self._detect_workdir(environment)
388
389 parts = [
390 "eval-cli",
391 f"--workdir {shlex.quote(workdir)}",
392 "--output-dir /logs/agent",
393 ]
394
395 if self.model_name:
396 parts.append(f"--model {shlex.quote(self.model_name)}")
397
398 timeout = self._extra_env.get("EVAL_CLI_TIMEOUT")
399 if timeout:
400 parts.append(f"--timeout {shlex.quote(timeout)}")
401
402 staff = self._extra_env.get("EVAL_CLI_STAFF")
403 if staff and staff.lower() == "false":
404 parts.append("--no-staff")
405
406 reasoning_effort = self._extra_env.get("EVAL_CLI_REASONING_EFFORT")
407 if reasoning_effort:
408 parts.append(f"--reasoning-effort {shlex.quote(reasoning_effort)}")
409
410 enable_thinking = self._extra_env.get("EVAL_CLI_ENABLE_THINKING")
411 if enable_thinking:
412 if enable_thinking.lower() == "true":
413 parts.append("--enable-thinking")
414 elif enable_thinking.lower() == "false":
415 parts.append("--disable-thinking")
416
417 parts.append(f"--instruction {escaped_instruction}")
418
419 await self.exec_as_agent(
420 environment,
421 command=(
422 " ".join(parts) + " 2>&1 | if command -v stdbuf >/dev/null 2>&1;"
423 " then stdbuf -oL tee /logs/agent/eval-cli.txt;"
424 " else tee /logs/agent/eval-cli.txt; fi"
425 ),
426 env=env,
427 )
428
429 await self.exec_as_agent(
430 environment,
431 command=(
432 "git add -A && "
433 "git diff --cached HEAD > /logs/agent/patch.diff && "
434 'echo "Patch size: $(wc -c < /logs/agent/patch.diff) bytes"'
435 ),
436 cwd=workdir,
437 )