run_agent_evals.yml

 1# Generated from xtask::workflows::run_agent_evals
 2# Rebuild with `cargo xtask workflows`.
 3name: run_agent_evals
 4env:
 5  CARGO_TERM_COLOR: always
 6  CARGO_INCREMENTAL: '0'
 7  RUST_BACKTRACE: '1'
 8  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
 9  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
10  GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
11  GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
12  ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
13  ZED_EVAL_TELEMETRY: '1'
14  MODEL_NAME: ${{ inputs.model_name }}
15on:
16  workflow_dispatch:
17    inputs:
18      model_name:
19        description: model_name
20        required: true
21        type: string
22jobs:
23  agent_evals:
24    runs-on: namespace-profile-16x32-ubuntu-2204
25    steps:
26    - name: steps::checkout_repo
27      uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
28      with:
29        clean: false
30    - name: steps::cache_rust_dependencies_namespace
31      uses: namespacelabs/nscloud-cache-action@v1
32      with:
33        cache: rust
34        path: ~/.rustup
35    - name: steps::setup_linux
36      run: ./script/linux
37      shell: bash -euxo pipefail {0}
38    - name: steps::install_mold
39      run: ./script/install-mold
40      shell: bash -euxo pipefail {0}
41    - name: steps::download_wasi_sdk
42      run: ./script/download-wasi-sdk
43      shell: bash -euxo pipefail {0}
44    - name: steps::setup_cargo_config
45      run: |
46        mkdir -p ./../.cargo
47        cp ./.cargo/ci-config.toml ./../.cargo/config.toml
48      shell: bash -euxo pipefail {0}
49    - name: cargo build --package=eval
50      run: cargo build --package=eval
51      shell: bash -euxo pipefail {0}
52    - name: run_agent_evals::agent_evals::run_eval
53      run: cargo run --package=eval -- --repetitions=8 --concurrency=1 --model "${MODEL_NAME}"
54      shell: bash -euxo pipefail {0}
55      env:
56        ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
57        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
58        GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
59        GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
60    - name: steps::cleanup_cargo_config
61      if: always()
62      run: |
63        rm -rf ./../.cargo
64      shell: bash -euxo pipefail {0}
65    timeout-minutes: 600
66concurrency:
67  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
68  cancel-in-progress: true