@@ -51,6 +51,11 @@ jobs:
- name: run_agent_evals::agent_evals::run_eval
run: cargo run --package=eval -- --repetitions=8 --concurrency=1 --model "${MODEL_NAME}"
shell: bash -euxo pipefail {0}
+ env:
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+ GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
+ GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
- name: steps::cleanup_cargo_config
if: always()
run: |
@@ -0,0 +1,78 @@
+# Generated from xtask::workflows::run_cron_unit_evals
+# Rebuild with `cargo xtask workflows`.
+name: run_cron_unit_evals
+env:
+ CARGO_TERM_COLOR: always
+ CARGO_INCREMENTAL: '0'
+ RUST_BACKTRACE: '1'
+ ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
+on:
+ schedule:
+ - cron: 47 1 * * 2
+ workflow_dispatch: {}
+jobs:
+ cron_unit_evals:
+ runs-on: namespace-profile-16x32-ubuntu-2204
+ steps:
+ - name: steps::checkout_repo
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+ with:
+ clean: false
+ - name: steps::setup_cargo_config
+ run: |
+ mkdir -p ./../.cargo
+ cp ./.cargo/ci-config.toml ./../.cargo/config.toml
+ shell: bash -euxo pipefail {0}
+ - name: steps::cache_rust_dependencies_namespace
+ uses: namespacelabs/nscloud-cache-action@v1
+ with:
+ cache: rust
+ - name: steps::setup_linux
+ run: ./script/linux
+ shell: bash -euxo pipefail {0}
+ - name: steps::install_mold
+ run: ./script/install-mold
+ shell: bash -euxo pipefail {0}
+ - name: steps::download_wasi_sdk
+ run: ./script/download-wasi-sdk
+ shell: bash -euxo pipefail {0}
+ - name: steps::cargo_install_nextest
+ run: cargo install cargo-nextest --locked
+ shell: bash -euxo pipefail {0}
+ - name: steps::clear_target_dir_if_large
+ run: ./script/clear-target-dir-if-larger-than 250
+ shell: bash -euxo pipefail {0}
+ - name: ./script/run-unit-evals
+ run: ./script/run-unit-evals
+ shell: bash -euxo pipefail {0}
+ env:
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+ GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
+ GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
+ - name: run_agent_evals::unit_evals::send_failure_to_slack
+ if: ${{ failure() }}
+ uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52
+ with:
+ method: chat.postMessage
+ token: ${{ secrets.SLACK_APP_ZED_UNIT_EVALS_BOT_TOKEN }}
+ payload: |
+ channel: C04UDRNNJFQ
+ text: "Unit Evals Failed: https://github.com/zed-industries/zed/actions/runs/${{ github.run_id }}"
+ - name: steps::cleanup_cargo_config
+ if: always()
+ run: |
+ rm -rf ./../.cargo
+ shell: bash -euxo pipefail {0}
+ - name: run_agent_evals::cron_unit_evals::send_failure_to_slack
+ if: ${{ failure() }}
+ uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52
+ with:
+ method: chat.postMessage
+ token: ${{ secrets.SLACK_APP_ZED_UNIT_EVALS_BOT_TOKEN }}
+ payload: |
+ channel: C04UDRNNJFQ
+ text: "Unit Evals Failed: https://github.com/zed-industries/zed/actions/runs/${{ github.run_id }}"
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
+ cancel-in-progress: true
@@ -6,12 +6,21 @@ env:
CARGO_INCREMENTAL: '0'
RUST_BACKTRACE: '1'
ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
+ ZED_EVAL_TELEMETRY: '1'
+ MODEL_NAME: ${{ inputs.model_name }}
on:
- schedule:
- - cron: 47 1 * * 2
- workflow_dispatch: {}
+ workflow_dispatch:
+ inputs:
+ model_name:
+ description: model_name
+ required: true
+ type: string
+ commit_sha:
+ description: commit_sha
+ required: true
+ type: string
jobs:
- unit_evals:
+ run_unit_evals:
runs-on: namespace-profile-16x32-ubuntu-2204
steps:
- name: steps::checkout_repo
@@ -47,6 +56,10 @@ jobs:
shell: bash -euxo pipefail {0}
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+ GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
+ GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
+ UNIT_EVAL_COMMIT: ${{ inputs.commit_sha }}
- name: run_agent_evals::unit_evals::send_failure_to_slack
if: ${{ failure() }}
uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52
@@ -2,4 +2,8 @@
set -euxo pipefail
+if [ -n "${UNIT_EVAL_COMMIT:-}" ]; then
+ git checkout "$UNIT_EVAL_COMMIT"
+fi
+
GPUI_TEST_TIMEOUT=1500 cargo nextest run --workspace --no-fail-fast --features unit-eval --no-capture -E 'test(::eval_)'
@@ -33,6 +33,10 @@ pub fn run_workflows(_: GenerateWorkflowArgs) -> Result<()> {
("cherry_pick.yml", cherry_pick::cherry_pick()),
("compare_perf.yml", compare_perf::compare_perf()),
("run_unit_evals.yml", run_agent_evals::run_unit_evals()),
+ (
+ "run_cron_unit_evals.yml",
+ run_agent_evals::run_cron_unit_evals(),
+ ),
("run_agent_evals.yml", run_agent_evals::run_agent_evals()),
("after_release.yml", after_release::after_release()),
];
@@ -28,6 +28,36 @@ pub(crate) fn run_agent_evals() -> Workflow {
.add_job(agent_evals.name, agent_evals.job)
}
+pub(crate) fn run_unit_evals() -> Workflow {
+ let model_name = Input::string("model_name", None);
+ let commit_sha = Input::string("commit_sha", None);
+
+ let unit_evals = named::job(unit_evals(Some(&commit_sha)));
+
+ named::workflow()
+ .name("run_unit_evals")
+ .on(Event::default().workflow_dispatch(
+ WorkflowDispatch::default()
+ .add_input(model_name.name, model_name.input())
+ .add_input(commit_sha.name, commit_sha.input()),
+ ))
+ .concurrency(vars::one_workflow_per_non_main_branch())
+ .add_env(("CARGO_TERM_COLOR", "always"))
+ .add_env(("CARGO_INCREMENTAL", 0))
+ .add_env(("RUST_BACKTRACE", 1))
+ .add_env(("ZED_CLIENT_CHECKSUM_SEED", vars::ZED_CLIENT_CHECKSUM_SEED))
+ .add_env(("ZED_EVAL_TELEMETRY", 1))
+ .add_env(("MODEL_NAME", model_name.to_string()))
+ .add_job(unit_evals.name, unit_evals.job)
+}
+
+fn add_api_keys(step: Step<Run>) -> Step<Run> {
+ step.add_env(("ANTHROPIC_API_KEY", vars::ANTHROPIC_API_KEY))
+ .add_env(("OPENAI_API_KEY", vars::OPENAI_API_KEY))
+ .add_env(("GOOGLE_AI_API_KEY", vars::GOOGLE_AI_API_KEY))
+ .add_env(("GOOGLE_CLOUD_PROJECT", vars::GOOGLE_CLOUD_PROJECT))
+}
+
fn agent_evals() -> NamedJob {
fn run_eval() -> Step<Run> {
named::bash(
@@ -44,16 +74,16 @@ fn agent_evals() -> NamedJob {
.map(steps::install_linux_dependencies)
.add_step(setup_cargo_config(Platform::Linux))
.add_step(steps::script("cargo build --package=eval"))
- .add_step(run_eval())
+ .add_step(add_api_keys(run_eval()))
.add_step(steps::cleanup_cargo_config(Platform::Linux)),
)
}
-pub(crate) fn run_unit_evals() -> Workflow {
- let unit_evals = unit_evals();
+pub(crate) fn run_cron_unit_evals() -> Workflow {
+ let unit_evals = cron_unit_evals();
named::workflow()
- .name("run_unit_evals")
+ .name("run_cron_unit_evals")
.on(Event::default()
.schedule([
// GitHub might drop jobs at busy times, so we choose a random time in the middle of the night.
@@ -68,7 +98,7 @@ pub(crate) fn run_unit_evals() -> Workflow {
.add_job(unit_evals.name, unit_evals.job)
}
-fn unit_evals() -> NamedJob {
+fn cron_unit_evals() -> NamedJob {
fn send_failure_to_slack() -> Step<Use> {
named::uses(
"slackapi",
@@ -84,20 +114,39 @@ fn unit_evals() -> NamedJob {
"#}))
}
- named::job(
- Job::default()
- .runs_on(runners::LINUX_DEFAULT)
- .add_step(steps::checkout_repo())
- .add_step(steps::setup_cargo_config(Platform::Linux))
- .add_step(steps::cache_rust_dependencies_namespace())
- .map(steps::install_linux_dependencies)
- .add_step(steps::cargo_install_nextest(Platform::Linux))
- .add_step(steps::clear_target_dir_if_large(Platform::Linux))
- .add_step(
- steps::script("./script/run-unit-evals")
- .add_env(("ANTHROPIC_API_KEY", vars::ANTHROPIC_API_KEY)),
- )
- .add_step(send_failure_to_slack())
- .add_step(steps::cleanup_cargo_config(Platform::Linux)),
- )
+ named::job(unit_evals(None).add_step(send_failure_to_slack()))
+}
+
+fn unit_evals(commit: Option<&Input>) -> Job {
+ fn send_failure_to_slack() -> Step<Use> {
+ named::uses(
+ "slackapi",
+ "slack-github-action",
+ "b0fa283ad8fea605de13dc3f449259339835fc52",
+ )
+ .if_condition(Expression::new("${{ failure() }}"))
+ .add_with(("method", "chat.postMessage"))
+ .add_with(("token", vars::SLACK_APP_ZED_UNIT_EVALS_BOT_TOKEN))
+ .add_with(("payload", indoc::indoc!{r#"
+ channel: C04UDRNNJFQ
+ text: "Unit Evals Failed: https://github.com/zed-industries/zed/actions/runs/${{ github.run_id }}"
+ "#}))
+ }
+
+ let script_step = add_api_keys(steps::script("./script/run-unit-evals"));
+
+ Job::default()
+ .runs_on(runners::LINUX_DEFAULT)
+ .add_step(steps::checkout_repo())
+ .add_step(steps::setup_cargo_config(Platform::Linux))
+ .add_step(steps::cache_rust_dependencies_namespace())
+ .map(steps::install_linux_dependencies)
+ .add_step(steps::cargo_install_nextest(Platform::Linux))
+ .add_step(steps::clear_target_dir_if_large(Platform::Linux))
+ .add_step(match commit {
+ Some(commit) => script_step.add_env(("UNIT_EVAL_COMMIT", commit)),
+ None => script_step,
+ })
+ .add_step(send_failure_to_slack())
+ .add_step(steps::cleanup_cargo_config(Platform::Linux))
}