@@ -13,6 +13,12 @@ on:
jobs:
cron_unit_evals:
runs-on: namespace-profile-16x32-ubuntu-2204
+ strategy:
+ matrix:
+ model:
+ - anthropic/claude-sonnet-4-5-latest
+ - anthropic/claude-opus-4-5-latest
+ fail-fast: false
steps:
- name: steps::checkout_repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
@@ -49,6 +55,7 @@ jobs:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
+ ZED_AGENT_MODEL: ${{ matrix.model }}
- name: steps::cleanup_cargo_config
if: always()
run: |
@@ -21017,6 +21017,7 @@ dependencies = [
"indexmap",
"indoc",
"serde",
+ "serde_json",
"toml 0.8.23",
"toml_edit 0.22.27",
]
@@ -1,4 +1,7 @@
-use gh_workflow::{Event, Expression, Job, Run, Schedule, Step, Use, Workflow, WorkflowDispatch};
+use gh_workflow::{
+ Event, Expression, Job, Run, Schedule, Step, Strategy, Use, Workflow, WorkflowDispatch,
+};
+use serde_json::json;
use crate::tasks::workflows::{
runners::{self, Platform},
@@ -114,7 +117,31 @@ fn cron_unit_evals() -> NamedJob {
"#}))
}
- named::job(unit_evals(None).add_step(send_failure_to_slack()))
+ named::job(cron_unit_evals_job().add_step(send_failure_to_slack()))
+}
+
+const UNIT_EVAL_MODELS: &[&str] = &[
+ "anthropic/claude-sonnet-4-5-latest",
+ "anthropic/claude-opus-4-5-latest",
+];
+
+fn cron_unit_evals_job() -> Job {
+ let script_step = add_api_keys(steps::script("./script/run-unit-evals"))
+ .add_env(("ZED_AGENT_MODEL", "${{ matrix.model }}"));
+
+ Job::default()
+ .runs_on(runners::LINUX_DEFAULT)
+ .strategy(Strategy::default().fail_fast(false).matrix(json!({
+ "model": UNIT_EVAL_MODELS
+ })))
+ .add_step(steps::checkout_repo())
+ .add_step(steps::setup_cargo_config(Platform::Linux))
+ .add_step(steps::cache_rust_dependencies_namespace())
+ .map(steps::install_linux_dependencies)
+ .add_step(steps::cargo_install_nextest())
+ .add_step(steps::clear_target_dir_if_large(Platform::Linux))
+ .add_step(script_step)
+ .add_step(steps::cleanup_cargo_config(Platform::Linux))
}
fn unit_evals(commit: Option<&WorkflowInput>) -> Job {