1name: Run Agent Eval
2
3on:
4 schedule:
5 - cron: "0 0 * * *"
6
7 pull_request:
8 branches:
9 - "**"
10 types: [opened, synchronize, reopened, labeled]
11
12 workflow_dispatch:
13
14concurrency:
15 # Allow only one workflow per any non-`main` branch.
16 group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
17 cancel-in-progress: true
18
19env:
20 CARGO_TERM_COLOR: always
21 CARGO_INCREMENTAL: 0
22 RUST_BACKTRACE: 1
23 ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
24 ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
25 ZED_EVAL_TELEMETRY: 1
26
27jobs:
28 # This is a no-op job that we run to prevent GitHub from marking the workflow
29 # as failed for PRs that don't have the `run-eval` label.
30 noop:
31 name: No-op
32 runs-on: ubuntu-latest
33 if: github.repository_owner == 'zed-industries'
34 steps:
35 - name: No-op
36 run: echo "Nothing to do"
37
38 run_eval:
39 timeout-minutes: 60
40 name: Run Agent Eval
41 if: >
42 github.repository_owner == 'zed-industries' &&
43 (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
44 runs-on:
45 - buildjet-16vcpu-ubuntu-2204
46 steps:
47 - name: Add Rust to the PATH
48 run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
49
50 - name: Checkout repo
51 uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
52 with:
53 clean: false
54
55 - name: Cache dependencies
56 uses: swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2
57 with:
58 save-if: ${{ github.ref == 'refs/heads/main' }}
59 cache-provider: "buildjet"
60
61 - name: Install Linux dependencies
62 run: ./script/linux
63
64 - name: Configure CI
65 run: |
66 mkdir -p ./../.cargo
67 cp ./.cargo/ci-config.toml ./../.cargo/config.toml
68
69 - name: Compile eval
70 run: cargo build --package=eval
71
72 - name: Run eval
73 run: cargo run --package=eval -- --repetitions=8 --concurrency=1
74
75 # Even the Linux runner is not stateful, in theory there is no need to do this cleanup.
76 # But, to avoid potential issues in the future if we choose to use a stateful Linux runner and forget to add code
77 # to clean up the config file, I’ve included the cleanup code here as a precaution.
78 # While it’s not strictly necessary at this moment, I believe it’s better to err on the side of caution.
79 - name: Clean CI config file
80 if: always()
81 run: rm -rf ./../.cargo