1name: Run Agent Eval
2
3on:
4 schedule:
5 - cron: "0 * * * *"
6 push:
7 branches:
8 - main
9 - "v[0-9]+.[0-9]+.x"
10 tags:
11 - "v*"
12
13 pull_request:
14 branches:
15 - "**"
16 types: [opened, synchronize, reopened, labeled]
17
18 workflow_dispatch:
19
20concurrency:
21 # Allow only one workflow per any non-`main` branch.
22 group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
23 cancel-in-progress: true
24
25env:
26 CARGO_TERM_COLOR: always
27 CARGO_INCREMENTAL: 0
28 RUST_BACKTRACE: 1
29 ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
30 ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
31 ZED_EVAL_TELEMETRY: 1
32
33jobs:
34 run_eval:
35 timeout-minutes: 60
36 name: Run Agent Eval
37 if: >
38 github.repository_owner == 'zed-industries' &&
39 (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
40 runs-on:
41 - buildjet-16vcpu-ubuntu-2204
42 steps:
43 - name: Add Rust to the PATH
44 run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
45
46 - name: Checkout repo
47 uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
48 with:
49 clean: false
50
51 - name: Cache dependencies
52 uses: swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2
53 with:
54 save-if: ${{ github.ref == 'refs/heads/main' }}
55 cache-provider: "buildjet"
56
57 - name: Install Linux dependencies
58 run: ./script/linux
59
60 - name: Configure CI
61 run: |
62 mkdir -p ./../.cargo
63 cp ./.cargo/ci-config.toml ./../.cargo/config.toml
64
65 - name: Compile eval
66 run: cargo build --package=eval
67
68 - name: Run eval
69 run: cargo run --package=eval -- --repetitions=3 --concurrency=1
70
71 # Even the Linux runner is not stateful, in theory there is no need to do this cleanup.
72 # But, to avoid potential issues in the future if we choose to use a stateful Linux runner and forget to add code
73 # to clean up the config file, I’ve included the cleanup code here as a precaution.
74 # While it’s not strictly necessary at this moment, I believe it’s better to err on the side of caution.
75 - name: Clean CI config file
76 if: always()
77 run: rm -rf ./../.cargo