run_agent_evals.yml

 1# Generated from xtask::workflows::run_agent_evals
 2# Rebuild with `cargo xtask workflows`.
 3name: run_agent_evals
 4env:
 5  CARGO_TERM_COLOR: always
 6  CARGO_INCREMENTAL: '0'
 7  RUST_BACKTRACE: '1'
 8  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
 9  ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
10  ZED_EVAL_TELEMETRY: '1'
11on:
12  pull_request:
13    types:
14    - synchronize
15    - reopened
16    - labeled
17    branches:
18    - '**'
19  schedule:
20  - cron: 0 0 * * *
21  workflow_dispatch: {}
22jobs:
23  agent_evals:
24    if: |
25      github.repository_owner == 'zed-industries' &&
26      (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
27    runs-on: namespace-profile-16x32-ubuntu-2204
28    steps:
29    - name: steps::checkout_repo
30      uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
31      with:
32        clean: false
33    - name: steps::cache_rust_dependencies_namespace
34      uses: namespacelabs/nscloud-cache-action@v1
35      with:
36        cache: rust
37    - name: steps::setup_linux
38      run: ./script/linux
39      shell: bash -euxo pipefail {0}
40    - name: steps::install_mold
41      run: ./script/install-mold
42      shell: bash -euxo pipefail {0}
43    - name: steps::download_wasi_sdk
44      run: ./script/download-wasi-sdk
45      shell: bash -euxo pipefail {0}
46    - name: steps::setup_cargo_config
47      run: |
48        mkdir -p ./../.cargo
49        cp ./.cargo/ci-config.toml ./../.cargo/config.toml
50      shell: bash -euxo pipefail {0}
51    - name: cargo build --package=eval
52      run: cargo build --package=eval
53      shell: bash -euxo pipefail {0}
54    - name: run_agent_evals::agent_evals::run_eval
55      run: cargo run --package=eval -- --repetitions=8 --concurrency=1
56      shell: bash -euxo pipefail {0}
57    - name: steps::cleanup_cargo_config
58      if: always()
59      run: |
60        rm -rf ./../.cargo
61      shell: bash -euxo pipefail {0}
62    timeout-minutes: 60
63concurrency:
64  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
65  cancel-in-progress: true