1"""Harbor agent wrapper for Zed's eval-cli binary.
2
3Usage:
4 # Build eval-cli locally first:
5 cargo build --release -p eval_cli
6
7 # Run via Harbor with a local binary:
8 harbor run -d "dataset@version" \
9 --agent-import-path zed_eval.agent:ZedAgent \
10 --ae binary_path=/path/to/target/release/eval-cli \
11 --agent-model anthropic/claude-sonnet-4-6-latest
12
13 # Or with a download URL (for CI):
14 harbor run -d "dataset@version" \
15 --agent-import-path zed_eval.agent:ZedAgent \
16 --ae download_url=https://example.com/eval-cli \
17 --agent-model anthropic/claude-sonnet-4-6-latest
18"""
19
20import json
21import os
22import shlex
23from pathlib import Path
24
25from harbor.agents.installed.base import BaseInstalledAgent, with_prompt_template
26from harbor.environments.base import BaseEnvironment
27from harbor.models.agent.context import AgentContext
28
29
30class ZedAgent(BaseInstalledAgent):
31 """Runs Zed's headless AI agent (eval-cli) to solve tasks.
32
33 The eval-cli binary boots a headless GPUI application and uses the same
34 NativeAgent + AcpThread pipeline as the production Zed editor, driving
35 the full agentic loop (tool calls, subagents, retries) without a GUI.
36 """
37
38 def __init__(
39 self,
40 logs_dir: Path,
41 binary_path: str | None = None,
42 download_url: str | None = None,
43 *args,
44 **kwargs,
45 ):
46 super().__init__(logs_dir, *args, **kwargs)
47 self._binary_path = binary_path
48 self._download_url = download_url or os.environ.get("EVAL_CLI_DOWNLOAD_URL")
49
50 @staticmethod
51 def name() -> str:
52 return "zed"
53
54 async def _detect_workdir(self, environment: BaseEnvironment) -> str:
55 """Detect the repo working directory inside the container.
56
57 Checks, in order:
58 1. Explicit ``EVAL_CLI_WORKDIR`` extra-env override
59 2. ``/app`` (SWE-bench Pro)
60 3. ``/testbed`` (SWE-bench Verified)
61 4. ``/repo``
62 5. First git repo found under ``/`` (max depth 3)
63 """
64 override = self._extra_env.get("EVAL_CLI_WORKDIR")
65 if override:
66 return override
67
68 result = await self.exec_as_agent(
69 environment,
70 command=(
71 "for d in /app /testbed /repo; do "
72 ' if [ -d "$d/.git" ]; then echo "$d"; exit 0; fi; '
73 "done; "
74 "find / -maxdepth 3 -name .git -type d 2>/dev/null "
75 '| head -1 | sed "s|/.git$||"'
76 ),
77 )
78 workdir = result.stdout.strip()
79 if not workdir:
80 raise RuntimeError(
81 "Could not find a git repository in the container. "
82 "Set EVAL_CLI_WORKDIR explicitly via --ae EVAL_CLI_WORKDIR=/path/to/repo"
83 )
84 return workdir
85
86 async def install(self, environment: BaseEnvironment) -> None:
87 await self.exec_as_root(
88 environment,
89 command=(
90 "apt-get update && "
91 "apt-get install -y --no-install-recommends "
92 "ca-certificates "
93 "curl "
94 "git"
95 ),
96 env={"DEBIAN_FRONTEND": "noninteractive"},
97 )
98
99 await self.exec_as_root(
100 environment,
101 command=(
102 "curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && "
103 "apt-get install -y --no-install-recommends nodejs"
104 ),
105 env={"DEBIAN_FRONTEND": "noninteractive"},
106 )
107
108 # Pre-install default LSPs so Zed doesn't have to download them at
109 # runtime. Each gets its own subdirectory under $ZED_DATA_DIR/languages.
110 await self.exec_as_agent(
111 environment,
112 command=(
113 "set -euo pipefail; "
114 'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
115 # basedpyright (Python - default type checker)
116 'BASEDPYRIGHT_DIR="$ZED_DATA_DIR/languages/basedpyright"; '
117 'mkdir -p "$BASEDPYRIGHT_DIR"; '
118 'npm install --prefix "$BASEDPYRIGHT_DIR" --save-exact basedpyright; '
119 # typescript-language-server (TypeScript/JS - default LSP)
120 'TSSERVER_DIR="$ZED_DATA_DIR/languages/typescript-language-server"; '
121 'mkdir -p "$TSSERVER_DIR"; '
122 'npm install --prefix "$TSSERVER_DIR" --save-exact typescript typescript-language-server; '
123 # vtsls (VS Code TypeScript language features)
124 'VTSLS_DIR="$ZED_DATA_DIR/languages/vtsls"; '
125 'mkdir -p "$VTSLS_DIR"; '
126 'npm install --prefix "$VTSLS_DIR" --save-exact @vtsls/language-server typescript; '
127 # tailwindcss-language-server
128 'TAILWIND_DIR="$ZED_DATA_DIR/languages/tailwindcss-language-server"; '
129 'mkdir -p "$TAILWIND_DIR"; '
130 'npm install --prefix "$TAILWIND_DIR" --save-exact @tailwindcss/language-server'
131 ),
132 )
133
134 # eslint LSP (downloaded from zed-industries/vscode-eslint GitHub release,
135 # then compiled — this mirrors what Zed does at runtime).
136 await self.exec_as_agent(
137 environment,
138 command=(
139 "set -euo pipefail; "
140 'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
141 'ESLINT_DIR="$ZED_DATA_DIR/languages/eslint/vscode-eslint-2.4.4"; '
142 'mkdir -p "$ESLINT_DIR"; '
143 'curl -fsSL "https://github.com/zed-industries/vscode-eslint/archive/refs/tags/release/2.4.4.tar.gz" '
144 '| tar -xz -C "$ESLINT_DIR"; '
145 'mv "$ESLINT_DIR"/vscode-eslint-release-2.4.4 "$ESLINT_DIR/vscode-eslint"; '
146 'cd "$ESLINT_DIR/vscode-eslint" && npm install && npm run compile'
147 ),
148 )
149
150 # gopls (Go - default LSP). Only install when Go is present in the
151 # container (i.e. Go-related SWE-bench tasks).
152 await self.exec_as_agent(
153 environment,
154 command=(
155 "if command -v go >/dev/null 2>&1; then "
156 "go install golang.org/x/tools/gopls@latest; "
157 "fi"
158 ),
159 )
160
161 await self.exec_as_agent(
162 environment,
163 command=(
164 "curl -LsSf https://astral.sh/uv/install.sh | sh && "
165 '. "$HOME/.local/bin/env"'
166 ),
167 )
168
169 agent_home_result = await self.exec_as_agent(
170 environment,
171 command='printf %s "$HOME"',
172 )
173 agent_home = agent_home_result.stdout.strip()
174 if not agent_home:
175 raise RuntimeError("Could not determine agent home directory")
176
177 await self.exec_as_root(
178 environment,
179 command=(
180 f"ln -sf {shlex.quote(agent_home + '/.local/bin/uv')} /usr/local/bin/uv && "
181 f"ln -sf {shlex.quote(agent_home + '/.local/bin/uvx')} /usr/local/bin/uvx"
182 ),
183 )
184
185 # Install a modern ruff so `ruff server` works without --preview.
186 # This also makes it available as a CLI tool for the agent.
187 await self.exec_as_agent(
188 environment,
189 command=('export PATH="$HOME/.local/bin:$PATH" && uv tool install ruff'),
190 )
191
192 if self._binary_path:
193 binary = Path(self._binary_path)
194 if not binary.exists():
195 raise FileNotFoundError(
196 f"eval-cli binary not found at {binary}. "
197 "Build it with: cargo build --release -p eval_cli"
198 )
199 await environment.upload_file(
200 source_path=binary,
201 target_path="/usr/local/bin/eval-cli",
202 )
203 await self.exec_as_root(
204 environment,
205 command="chmod +x /usr/local/bin/eval-cli && eval-cli --help",
206 )
207 return
208
209 if self._download_url:
210 await self.exec_as_root(
211 environment,
212 command=(
213 f"curl -fsSL {shlex.quote(self._download_url)} "
214 "-o /usr/local/bin/eval-cli && "
215 "chmod +x /usr/local/bin/eval-cli && "
216 "eval-cli --help"
217 ),
218 )
219 return
220
221 raise ValueError(
222 "No eval-cli binary provided. "
223 "Either pass binary_path=/path/to/target/release/eval-cli "
224 "or set download_url=/EVAL_CLI_DOWNLOAD_URL."
225 )
226
227 def populate_context_post_run(self, context: AgentContext) -> None:
228 result_data = None
229 for json_file in self.logs_dir.rglob("result.json"):
230 try:
231 result_data = json.loads(json_file.read_text())
232 break
233 except (json.JSONDecodeError, OSError):
234 continue
235
236 if result_data is None:
237 self.logger.warning("Could not find or parse result.json from eval-cli")
238 return
239
240 if result_data.get("input_tokens") is not None:
241 context.n_input_tokens = result_data["input_tokens"]
242 if result_data.get("output_tokens") is not None:
243 context.n_output_tokens = result_data["output_tokens"]
244 if result_data.get("cache_read_input_tokens") is not None:
245 context.n_cache_tokens = result_data["cache_read_input_tokens"]
246
247 context.metadata = {
248 "status": result_data.get("status"),
249 "duration_secs": result_data.get("duration_secs"),
250 "model": result_data.get("model"),
251 }
252
253 def _get_api_env(self) -> dict[str, str]:
254 env: dict[str, str] = {}
255 if not self.model_name or "/" not in self.model_name:
256 return env
257
258 provider = self.model_name.split("/", 1)[0]
259 provider_env_map = {
260 "anthropic": "ANTHROPIC_API_KEY",
261 "openai": "OPENAI_API_KEY",
262 "google": "GEMINI_API_KEY",
263 "gemini": "GEMINI_API_KEY",
264 "deepseek": "DEEPSEEK_API_KEY",
265 "mistral": "MISTRAL_API_KEY",
266 }
267
268 env_var = provider_env_map.get(provider)
269 if env_var:
270 api_key = os.environ.get(env_var, "")
271 if api_key:
272 env[env_var] = api_key
273
274 return env
275
276 @with_prompt_template
277 async def run(
278 self, instruction: str, environment: BaseEnvironment, context: AgentContext
279 ) -> None:
280 escaped_instruction = shlex.quote(instruction)
281 env = self._get_api_env()
282
283 workdir = await self._detect_workdir(environment)
284
285 parts = [
286 "eval-cli",
287 f"--workdir {shlex.quote(workdir)}",
288 "--output-dir /logs/agent",
289 ]
290
291 if self.model_name:
292 parts.append(f"--model {shlex.quote(self.model_name)}")
293
294 timeout = self._extra_env.get("EVAL_CLI_TIMEOUT")
295 if timeout:
296 parts.append(f"--timeout {shlex.quote(timeout)}")
297
298 staff = self._extra_env.get("EVAL_CLI_STAFF")
299 if staff and staff.lower() == "false":
300 parts.append("--no-staff")
301
302 reasoning_effort = self._extra_env.get("EVAL_CLI_REASONING_EFFORT")
303 if reasoning_effort:
304 parts.append(f"--reasoning-effort {shlex.quote(reasoning_effort)}")
305
306 enable_thinking = self._extra_env.get("EVAL_CLI_ENABLE_THINKING")
307 if enable_thinking:
308 if enable_thinking.lower() == "true":
309 parts.append("--enable-thinking")
310 elif enable_thinking.lower() == "false":
311 parts.append("--disable-thinking")
312
313 parts.append(f"--instruction {escaped_instruction}")
314
315 await self.exec_as_agent(
316 environment,
317 command=(
318 " ".join(parts) + " 2>&1 | stdbuf -oL tee /logs/agent/eval-cli.txt"
319 ),
320 env=env,
321 )
322
323 await self.exec_as_agent(
324 environment,
325 command=(
326 "git add -A && "
327 "git diff --cached HEAD > /logs/agent/patch.diff && "
328 'echo "Patch size: $(wc -c < /logs/agent/patch.diff) bytes"'
329 ),
330 cwd=workdir,
331 )