eval.yml

 1name: Run Agent Eval
 2
 3on:
 4  schedule:
 5    - cron: "0 * * * *"
 6  push:
 7    branches:
 8      - main
 9      - "v[0-9]+.[0-9]+.x"
10    tags:
11      - "v*"
12
13  pull_request:
14    branches:
15      - "**"
16    types: [opened, synchronize, reopened, labeled]
17
18  workflow_dispatch:
19
20concurrency:
21  # Allow only one workflow per any non-`main` branch.
22  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
23  cancel-in-progress: true
24
25env:
26  CARGO_TERM_COLOR: always
27  CARGO_INCREMENTAL: 0
28  RUST_BACKTRACE: 1
29  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
30  ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
31  ZED_EVAL_TELEMETRY: 1
32
33jobs:
34  run_eval:
35    timeout-minutes: 60
36    name: Run Agent Eval
37    if: >
38      github.repository_owner == 'zed-industries' &&
39      (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
40    runs-on:
41      - buildjet-16vcpu-ubuntu-2204
42    steps:
43      - name: Add Rust to the PATH
44        run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
45
46      - name: Checkout repo
47        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
48        with:
49          clean: false
50
51      - name: Cache dependencies
52        uses: swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2
53        with:
54          save-if: ${{ github.ref == 'refs/heads/main' }}
55          cache-provider: "buildjet"
56
57      - name: Install Linux dependencies
58        run: ./script/linux
59
60      - name: Configure CI
61        run: |
62          mkdir -p ./../.cargo
63          cp ./.cargo/ci-config.toml ./../.cargo/config.toml
64
65      - name: Compile eval
66        run: cargo build --package=eval
67
68      - name: Run eval
69        run: cargo run --package=eval
70
71      # Even the Linux runner is not stateful, in theory there is no need to do this cleanup.
72      # But, to avoid potential issues in the future if we choose to use a stateful Linux runner and forget to add code
73      # to clean up the config file, I’ve included the cleanup code here as a precaution.
74      # While it’s not strictly necessary at this moment, I believe it’s better to err on the side of caution.
75      - name: Clean CI config file
76        if: always()
77        run: rm -rf ./../.cargo