1# Generated from xtask::workflows::run_agent_evals
2# Rebuild with `cargo xtask workflows`.
3name: run_agent_evals
4env:
5 CARGO_TERM_COLOR: always
6 CARGO_INCREMENTAL: '0'
7 RUST_BACKTRACE: '1'
8 ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
9 ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
10 ZED_EVAL_TELEMETRY: '1'
11 MODEL_NAME: ${{ inputs.model_name }}
12on:
13 workflow_dispatch:
14 inputs:
15 model_name:
16 description: model_name
17 required: true
18 type: string
19jobs:
20 agent_evals:
21 runs-on: namespace-profile-16x32-ubuntu-2204
22 steps:
23 - name: steps::checkout_repo
24 uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
25 with:
26 clean: false
27 - name: steps::cache_rust_dependencies_namespace
28 uses: namespacelabs/nscloud-cache-action@v1
29 with:
30 cache: rust
31 - name: steps::setup_linux
32 run: ./script/linux
33 shell: bash -euxo pipefail {0}
34 - name: steps::install_mold
35 run: ./script/install-mold
36 shell: bash -euxo pipefail {0}
37 - name: steps::download_wasi_sdk
38 run: ./script/download-wasi-sdk
39 shell: bash -euxo pipefail {0}
40 - name: steps::setup_cargo_config
41 run: |
42 mkdir -p ./../.cargo
43 cp ./.cargo/ci-config.toml ./../.cargo/config.toml
44 shell: bash -euxo pipefail {0}
45 - name: cargo build --package=eval
46 run: cargo build --package=eval
47 shell: bash -euxo pipefail {0}
48 - name: run_agent_evals::agent_evals::run_eval
49 run: cargo run --package=eval -- --repetitions=8 --concurrency=1 --model "${MODEL_NAME}"
50 shell: bash -euxo pipefail {0}
51 - name: steps::cleanup_cargo_config
52 if: always()
53 run: |
54 rm -rf ./../.cargo
55 shell: bash -euxo pipefail {0}
56 timeout-minutes: 600
57concurrency:
58 group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
59 cancel-in-progress: true