Detailed changes
@@ -1,71 +0,0 @@
-name: Run Agent Eval
-
-on:
- schedule:
- - cron: "0 0 * * *"
-
- pull_request:
- branches:
- - "**"
- types: [synchronize, reopened, labeled]
-
- workflow_dispatch:
-
-concurrency:
- # Allow only one workflow per any non-`main` branch.
- group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
- cancel-in-progress: true
-
-env:
- CARGO_TERM_COLOR: always
- CARGO_INCREMENTAL: 0
- RUST_BACKTRACE: 1
- ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
- ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
- ZED_EVAL_TELEMETRY: 1
-
-jobs:
- run_eval:
- timeout-minutes: 60
- name: Run Agent Eval
- if: >
- github.repository_owner == 'zed-industries' &&
- (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
- runs-on:
- - namespace-profile-16x32-ubuntu-2204
- steps:
- - name: Add Rust to the PATH
- run: echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
-
- - name: Checkout repo
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
- with:
- clean: false
-
- - name: Cache dependencies
- uses: swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2
- with:
- save-if: ${{ github.ref == 'refs/heads/main' }}
- # cache-provider: "buildjet"
-
- - name: Install Linux dependencies
- run: ./script/linux
-
- - name: Configure CI
- run: |
- mkdir -p ./../.cargo
- cp ./.cargo/ci-config.toml ./../.cargo/config.toml
-
- - name: Compile eval
- run: cargo build --package=eval
-
- - name: Run eval
- run: cargo run --package=eval -- --repetitions=8 --concurrency=1
-
- # Even the Linux runner is not stateful, in theory there is no need to do this cleanup.
- # But, to avoid potential issues in the future if we choose to use a stateful Linux runner and forget to add code
- # to clean up the config file, Iβve included the cleanup code here as a precaution.
- # While itβs not strictly necessary at this moment, I believe itβs better to err on the side of caution.
- - name: Clean CI config file
- if: always()
- run: rm -rf ./../.cargo
@@ -0,0 +1,62 @@
+# Generated from xtask::workflows::run_agent_evals
+# Rebuild with `cargo xtask workflows`.
+name: run_agent_evals
+env:
+ CARGO_TERM_COLOR: always
+ CARGO_INCREMENTAL: '0'
+ RUST_BACKTRACE: '1'
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+ ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
+ ZED_EVAL_TELEMETRY: '1'
+on:
+ pull_request:
+ types:
+ - synchronize
+ - reopened
+ - labeled
+ branches:
+ - '**'
+ schedule:
+ - cron: 0 0 * * *
+ workflow_dispatch: {}
+jobs:
+ agent_evals:
+ if: |
+ github.repository_owner == 'zed-industries' &&
+ (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
+ runs-on: namespace-profile-16x32-ubuntu-2204
+ steps:
+ - name: steps::checkout_repo
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+ with:
+ clean: false
+ - name: steps::cache_rust_dependencies
+ uses: swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6
+ with:
+ save-if: ${{ github.ref == 'refs/heads/main' }}
+ - name: steps::setup_linux
+ run: ./script/linux
+ shell: bash -euxo pipefail {0}
+ - name: steps::install_mold
+ run: ./script/install-mold
+ shell: bash -euxo pipefail {0}
+ - name: steps::setup_cargo_config
+ run: |
+ mkdir -p ./../.cargo
+ cp ./.cargo/ci-config.toml ./../.cargo/config.toml
+ shell: bash -euxo pipefail {0}
+ - name: cargo build --package=eval
+ run: cargo build --package=eval
+ shell: bash -euxo pipefail {0}
+ - name: run_agent_evals::agent_evals::run_eval
+ run: cargo run --package=eval -- --repetitions=8 --concurrency=1
+ shell: bash -euxo pipefail {0}
+ - name: steps::cleanup_cargo_config
+ if: always()
+ run: |
+ rm -rf ./../.cargo
+ shell: bash -euxo pipefail {0}
+ timeout-minutes: 60
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
+ cancel-in-progress: true
@@ -0,0 +1,63 @@
+# Generated from xtask::workflows::run_agent_evals
+# Rebuild with `cargo xtask workflows`.
+name: run_agent_evals
+env:
+ CARGO_TERM_COLOR: always
+ CARGO_INCREMENTAL: '0'
+ RUST_BACKTRACE: '1'
+ ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
+on:
+ schedule:
+ - cron: 47 1 * * 2
+ workflow_dispatch: {}
+jobs:
+ unit_evals:
+ runs-on: namespace-profile-16x32-ubuntu-2204
+ steps:
+ - name: steps::checkout_repo
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+ with:
+ clean: false
+ - name: steps::setup_cargo_config
+ run: |
+ mkdir -p ./../.cargo
+ cp ./.cargo/ci-config.toml ./../.cargo/config.toml
+ shell: bash -euxo pipefail {0}
+ - name: steps::cache_rust_dependencies
+ uses: swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6
+ with:
+ save-if: ${{ github.ref == 'refs/heads/main' }}
+ - name: steps::setup_linux
+ run: ./script/linux
+ shell: bash -euxo pipefail {0}
+ - name: steps::install_mold
+ run: ./script/install-mold
+ shell: bash -euxo pipefail {0}
+ - name: steps::cargo_install_nextest
+ run: cargo install cargo-nextest --locked
+ shell: bash -euxo pipefail {0}
+ - name: steps::clear_target_dir_if_large
+ run: ./script/clear-target-dir-if-larger-than 100
+ shell: bash -euxo pipefail {0}
+ - name: ./script/run-unit-evals
+ run: ./script/run-unit-evals
+ shell: bash -euxo pipefail {0}
+ env:
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+ - name: run_agent_evals::unit_evals::send_failure_to_slack
+ if: ${{ failure() }}
+ uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52
+ with:
+ method: chat.postMessage
+ token: ${{ secrets.SLACK_APP_ZED_UNIT_EVALS_BOT_TOKEN }}
+ payload: |
+ channel: C04UDRNNJFQ
+ text: "Unit Evals Failed: https://github.com/zed-industries/zed/actions/runs/${{ github.run_id }}"
+ - name: steps::cleanup_cargo_config
+ if: always()
+ run: |
+ rm -rf ./../.cargo
+ shell: bash -euxo pipefail {0}
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
+ cancel-in-progress: true
@@ -1,86 +0,0 @@
-name: Run Unit Evals
-
-on:
- schedule:
- # GitHub might drop jobs at busy times, so we choose a random time in the middle of the night.
- - cron: "47 1 * * 2"
- workflow_dispatch:
-
-concurrency:
- # Allow only one workflow per any non-`main` branch.
- group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
- cancel-in-progress: true
-
-env:
- CARGO_TERM_COLOR: always
- CARGO_INCREMENTAL: 0
- RUST_BACKTRACE: 1
- ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
-
-jobs:
- unit_evals:
- if: github.repository_owner == 'zed-industries'
- timeout-minutes: 60
- name: Run unit evals
- runs-on:
- - namespace-profile-16x32-ubuntu-2204
- steps:
- - name: Add Rust to the PATH
- run: echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
-
- - name: Checkout repo
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
- with:
- clean: false
-
- - name: Cache dependencies
- uses: swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2
- with:
- save-if: ${{ github.ref == 'refs/heads/main' }}
- # cache-provider: "buildjet"
-
- - name: Install Linux dependencies
- run: ./script/linux
-
- - name: Configure CI
- run: |
- mkdir -p ./../.cargo
- cp ./.cargo/ci-config.toml ./../.cargo/config.toml
-
- - name: Install Rust
- shell: bash -euxo pipefail {0}
- run: |
- cargo install cargo-nextest --locked
-
- - name: Install Node
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
- with:
- node-version: "18"
-
- - name: Limit target directory size
- shell: bash -euxo pipefail {0}
- run: script/clear-target-dir-if-larger-than 100
-
- - name: Run unit evals
- shell: bash -euxo pipefail {0}
- run: cargo nextest run --workspace --no-fail-fast --features unit-eval --no-capture -E 'test(::eval_)'
- env:
- ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-
- - name: Send failure message to Slack channel if needed
- if: ${{ failure() }}
- uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52
- with:
- method: chat.postMessage
- token: ${{ secrets.SLACK_APP_ZED_UNIT_EVALS_BOT_TOKEN }}
- payload: |
- channel: C04UDRNNJFQ
- text: "Unit Evals Failed: https://github.com/zed-industries/zed/actions/runs/${{ github.run_id }}"
-
- # Even the Linux runner is not stateful, in theory there is no need to do this cleanup.
- # But, to avoid potential issues in the future if we choose to use a stateful Linux runner and forget to add code
- # to clean up the config file, Iβve included the cleanup code here as a precaution.
- # While itβs not strictly necessary at this moment, I believe itβs better to err on the side of caution.
- - name: Clean CI config file
- if: always()
- run: rm -rf ./../.cargo
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+
+set -euxo pipefail
+
+cargo nextest run --workspace --no-fail-fast --features unit-eval --no-capture -E 'test(::eval_)'
@@ -10,6 +10,7 @@ mod release_nightly;
mod run_bundling;
mod release;
+mod run_agent_evals;
mod run_tests;
mod runners;
mod steps;
@@ -28,6 +29,8 @@ pub fn run_workflows(_: GenerateWorkflowArgs) -> Result<()> {
("run_tests.yml", run_tests::run_tests()),
("release.yml", release::release()),
("compare_perf.yml", compare_perf::compare_perf()),
+ ("run_unit_evals.yml", run_agent_evals::run_unit_evals()),
+ ("run_agent_evals.yml", run_agent_evals::run_agent_evals()),
];
fs::create_dir_all(dir)
.with_context(|| format!("Failed to create directory: {}", dir.display()))?;
@@ -0,0 +1,113 @@
+use gh_workflow::{
+ Event, Expression, Job, PullRequest, PullRequestType, Run, Schedule, Step, Use, Workflow,
+ WorkflowDispatch,
+};
+
+use crate::tasks::workflows::{
+ runners::{self, Platform},
+ steps::{self, FluentBuilder as _, NamedJob, named, setup_cargo_config},
+ vars,
+};
+
+pub(crate) fn run_agent_evals() -> Workflow {
+ let agent_evals = agent_evals();
+
+ named::workflow()
+ .on(Event::default()
+ .schedule([Schedule::default().cron("0 0 * * *")])
+ .pull_request(PullRequest::default().add_branch("**").types([
+ PullRequestType::Synchronize,
+ PullRequestType::Reopened,
+ PullRequestType::Labeled,
+ ]))
+ .workflow_dispatch(WorkflowDispatch::default()))
+ .concurrency(vars::one_workflow_per_non_main_branch())
+ .add_env(("CARGO_TERM_COLOR", "always"))
+ .add_env(("CARGO_INCREMENTAL", 0))
+ .add_env(("RUST_BACKTRACE", 1))
+ .add_env(("ANTHROPIC_API_KEY", "${{ secrets.ANTHROPIC_API_KEY }}"))
+ .add_env((
+ "ZED_CLIENT_CHECKSUM_SEED",
+ "${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}",
+ ))
+ .add_env(("ZED_EVAL_TELEMETRY", 1))
+ .add_job(agent_evals.name, agent_evals.job)
+}
+
+fn agent_evals() -> NamedJob {
+ fn run_eval() -> Step<Run> {
+ named::bash("cargo run --package=eval -- --repetitions=8 --concurrency=1")
+ }
+
+ named::job(
+ Job::default()
+ .cond(Expression::new(indoc::indoc!{r#"
+ github.repository_owner == 'zed-industries' &&
+ (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
+ "#}))
+ .runs_on(runners::LINUX_DEFAULT)
+ .timeout_minutes(60_u32)
+ .add_step(steps::checkout_repo())
+ .add_step(steps::cache_rust_dependencies())
+ .map(steps::install_linux_dependencies)
+ .add_step(setup_cargo_config(Platform::Linux))
+ .add_step(steps::script("cargo build --package=eval"))
+ .add_step(run_eval())
+ .add_step(steps::cleanup_cargo_config(Platform::Linux))
+ )
+}
+
+pub(crate) fn run_unit_evals() -> Workflow {
+ let unit_evals = unit_evals();
+
+ named::workflow()
+ .on(Event::default()
+ .schedule([
+ // GitHub might drop jobs at busy times, so we choose a random time in the middle of the night.
+ Schedule::default().cron("47 1 * * 2"),
+ ])
+ .workflow_dispatch(WorkflowDispatch::default()))
+ .concurrency(vars::one_workflow_per_non_main_branch())
+ .add_env(("CARGO_TERM_COLOR", "always"))
+ .add_env(("CARGO_INCREMENTAL", 0))
+ .add_env(("RUST_BACKTRACE", 1))
+ .add_env((
+ "ZED_CLIENT_CHECKSUM_SEED",
+ "${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}",
+ ))
+ .add_job(unit_evals.name, unit_evals.job)
+}
+
+fn unit_evals() -> NamedJob {
+ fn send_failure_to_slack() -> Step<Use> {
+ named::uses(
+ "slackapi",
+ "slack-github-action",
+ "b0fa283ad8fea605de13dc3f449259339835fc52",
+ )
+ .if_condition(Expression::new("${{ failure() }}"))
+ .add_with(("method", "chat.postMessage"))
+ .add_with(("token", "${{ secrets.SLACK_APP_ZED_UNIT_EVALS_BOT_TOKEN }}"))
+ .add_with(("payload", indoc::indoc!{r#"
+ channel: C04UDRNNJFQ
+ text: "Unit Evals Failed: https://github.com/zed-industries/zed/actions/runs/${{ github.run_id }}"
+ "#}))
+ }
+
+ named::job(
+ Job::default()
+ .runs_on(runners::LINUX_DEFAULT)
+ .add_step(steps::checkout_repo())
+ .add_step(steps::setup_cargo_config(Platform::Linux))
+ .add_step(steps::cache_rust_dependencies())
+ .map(steps::install_linux_dependencies)
+ .add_step(steps::cargo_install_nextest(Platform::Linux))
+ .add_step(steps::clear_target_dir_if_large(Platform::Linux))
+ .add_step(
+ steps::script("./script/run-unit-evals")
+ .add_env(("ANTHROPIC_API_KEY", "${{ secrets.ANTHROPIC_API_KEY }}")),
+ )
+ .add_step(send_failure_to_slack())
+ .add_step(steps::cleanup_cargo_config(Platform::Linux)),
+ )
+}