1name: Run Agent Eval
 2
 3on:
 4  schedule:
 5    - cron: "0 0 * * *"
 6
 7  pull_request:
 8    branches:
 9      - "**"
10    types: [synchronize, reopened, labeled]
11
12  workflow_dispatch:
13
14concurrency:
15  # Allow only one workflow per any non-`main` branch.
16  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
17  cancel-in-progress: true
18
19env:
20  CARGO_TERM_COLOR: always
21  CARGO_INCREMENTAL: 0
22  RUST_BACKTRACE: 1
23  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
24  ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
25  ZED_EVAL_TELEMETRY: 1
26
27jobs:
28  run_eval:
29    timeout-minutes: 60
30    name: Run Agent Eval
31    if: >
32      github.repository_owner == 'zed-industries' &&
33      (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
34    runs-on:
35      - namespace-profile-16x32-ubuntu-2204
36    steps:
37      - name: Add Rust to the PATH
38        run: echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
39
40      - name: Checkout repo
41        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
42        with:
43          clean: false
44
45      - name: Cache dependencies
46        uses: swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2
47        with:
48          save-if: ${{ github.ref == 'refs/heads/main' }}
49          # cache-provider: "buildjet"
50
51      - name: Install Linux dependencies
52        run: ./script/linux
53
54      - name: Configure CI
55        run: |
56          mkdir -p ./../.cargo
57          cp ./.cargo/ci-config.toml ./../.cargo/config.toml
58
59      - name: Compile eval
60        run: cargo build --package=eval
61
62      - name: Run eval
63        run: cargo run --package=eval -- --repetitions=8 --concurrency=1
64
65      # Even the Linux runner is not stateful, in theory there is no need to do this cleanup.
66      # But, to avoid potential issues in the future if we choose to use a stateful Linux runner and forget to add code
67      # to clean up the config file, I’ve included the cleanup code here as a precaution.
68      # While it’s not strictly necessary at this moment, I believe it’s better to err on the side of caution.
69      - name: Clean CI config file
70        if: always()
71        run: rm -rf ./../.cargo