1# Generated from xtask::workflows::run_agent_evals
2# Rebuild with `cargo xtask workflows`.
3name: run_agent_evals
4env:
5 CARGO_TERM_COLOR: always
6 CARGO_INCREMENTAL: '0'
7 RUST_BACKTRACE: '1'
8 ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
9 ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
10 ZED_EVAL_TELEMETRY: '1'
11on:
12 pull_request:
13 types:
14 - synchronize
15 - reopened
16 - labeled
17 branches:
18 - '**'
19 schedule:
20 - cron: 0 0 * * *
21 workflow_dispatch: {}
22jobs:
23 agent_evals:
24 if: |
25 github.repository_owner == 'zed-industries' &&
26 (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
27 runs-on: namespace-profile-16x32-ubuntu-2204
28 steps:
29 - name: steps::checkout_repo
30 uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
31 with:
32 clean: false
33 - name: steps::cache_rust_dependencies
34 uses: swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6
35 with:
36 save-if: ${{ github.ref == 'refs/heads/main' }}
37 - name: steps::setup_linux
38 run: ./script/linux
39 shell: bash -euxo pipefail {0}
40 - name: steps::install_mold
41 run: ./script/install-mold
42 shell: bash -euxo pipefail {0}
43 - name: steps::setup_cargo_config
44 run: |
45 mkdir -p ./../.cargo
46 cp ./.cargo/ci-config.toml ./../.cargo/config.toml
47 shell: bash -euxo pipefail {0}
48 - name: cargo build --package=eval
49 run: cargo build --package=eval
50 shell: bash -euxo pipefail {0}
51 - name: run_agent_evals::agent_evals::run_eval
52 run: cargo run --package=eval -- --repetitions=8 --concurrency=1
53 shell: bash -euxo pipefail {0}
54 - name: steps::cleanup_cargo_config
55 if: always()
56 run: |
57 rm -rf ./../.cargo
58 shell: bash -euxo pipefail {0}
59 timeout-minutes: 60
60concurrency:
61 group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
62 cancel-in-progress: true