1use gh_workflow::{Event, Expression, Job, Run, Step, Strategy, Use, Workflow, WorkflowDispatch};
2use serde_json::json;
3
4use crate::tasks::workflows::{
5 runners::{self, Platform},
6 steps::{self, FluentBuilder as _, NamedJob, named},
7 vars::{self, WorkflowInput},
8};
9
10pub(crate) fn run_unit_evals() -> Workflow {
11 let model_name = WorkflowInput::string("model_name", None);
12 let commit_sha = WorkflowInput::string("commit_sha", None);
13
14 let unit_evals = named::job(unit_evals(Some(&commit_sha)));
15
16 named::workflow()
17 .name("run_unit_evals")
18 .on(Event::default().workflow_dispatch(
19 WorkflowDispatch::default()
20 .add_input(model_name.name, model_name.input())
21 .add_input(commit_sha.name, commit_sha.input()),
22 ))
23 .concurrency(vars::allow_concurrent_runs())
24 .add_env(("CARGO_TERM_COLOR", "always"))
25 .add_env(("CARGO_INCREMENTAL", 0))
26 .add_env(("RUST_BACKTRACE", 1))
27 .add_env(("ZED_CLIENT_CHECKSUM_SEED", vars::ZED_CLIENT_CHECKSUM_SEED))
28 .add_env(("ZED_EVAL_TELEMETRY", 1))
29 .add_env(("MODEL_NAME", model_name.to_string()))
30 .add_job(unit_evals.name, unit_evals.job)
31}
32
33fn add_api_keys(step: Step<Run>) -> Step<Run> {
34 step.add_env(("ANTHROPIC_API_KEY", vars::ANTHROPIC_API_KEY))
35 .add_env(("OPENAI_API_KEY", vars::OPENAI_API_KEY))
36 .add_env(("GOOGLE_AI_API_KEY", vars::GOOGLE_AI_API_KEY))
37 .add_env(("GOOGLE_CLOUD_PROJECT", vars::GOOGLE_CLOUD_PROJECT))
38}
39
40pub(crate) fn run_cron_unit_evals() -> Workflow {
41 let unit_evals = cron_unit_evals();
42
43 named::workflow()
44 .name("run_cron_unit_evals")
45 .on(Event::default()
46 // .schedule([
47 // // GitHub might drop jobs at busy times, so we choose a random time in the middle of the night.
48 // Schedule::default().cron("47 1 * * 2"),
49 // ])
50 .workflow_dispatch(WorkflowDispatch::default()))
51 .concurrency(vars::one_workflow_per_non_main_branch())
52 .add_env(("CARGO_TERM_COLOR", "always"))
53 .add_env(("CARGO_INCREMENTAL", 0))
54 .add_env(("RUST_BACKTRACE", 1))
55 .add_env(("ZED_CLIENT_CHECKSUM_SEED", vars::ZED_CLIENT_CHECKSUM_SEED))
56 .add_job(unit_evals.name, unit_evals.job)
57}
58
59fn cron_unit_evals() -> NamedJob {
60 fn send_failure_to_slack() -> Step<Use> {
61 named::uses(
62 "slackapi",
63 "slack-github-action",
64 "b0fa283ad8fea605de13dc3f449259339835fc52",
65 )
66 .if_condition(Expression::new("${{ failure() }}"))
67 .add_with(("method", "chat.postMessage"))
68 .add_with(("token", vars::SLACK_APP_ZED_UNIT_EVALS_BOT_TOKEN))
69 .add_with(("payload", indoc::indoc!{r#"
70 channel: C04UDRNNJFQ
71 text: "Unit Evals Failed: https://github.com/zed-industries/zed/actions/runs/${{ github.run_id }}"
72 "#}))
73 }
74
75 named::job(cron_unit_evals_job().add_step(send_failure_to_slack()))
76}
77
78const UNIT_EVAL_MODELS: &[&str] = &[
79 "anthropic/claude-sonnet-4-5-latest",
80 "anthropic/claude-opus-4-5-latest",
81 "google/gemini-3.1-pro",
82 "openai/gpt-5",
83];
84
85fn cron_unit_evals_job() -> Job {
86 let script_step = add_api_keys(steps::script("./script/run-unit-evals"))
87 .add_env(("ZED_AGENT_MODEL", "${{ matrix.model }}"));
88
89 Job::default()
90 .runs_on(runners::LINUX_DEFAULT)
91 .strategy(Strategy::default().fail_fast(false).matrix(json!({
92 "model": UNIT_EVAL_MODELS
93 })))
94 .add_step(steps::checkout_repo())
95 .add_step(steps::setup_cargo_config(Platform::Linux))
96 .add_step(steps::cache_rust_dependencies_namespace())
97 .map(steps::install_linux_dependencies)
98 .add_step(steps::cargo_install_nextest())
99 .add_step(steps::clear_target_dir_if_large(Platform::Linux))
100 .add_step(steps::setup_sccache(Platform::Linux))
101 .add_step(script_step)
102 .add_step(steps::show_sccache_stats(Platform::Linux))
103 .add_step(steps::cleanup_cargo_config(Platform::Linux))
104}
105
106fn unit_evals(commit: Option<&WorkflowInput>) -> Job {
107 let script_step = add_api_keys(steps::script("./script/run-unit-evals"));
108
109 Job::default()
110 .runs_on(runners::LINUX_DEFAULT)
111 .add_step(steps::checkout_repo())
112 .add_step(steps::setup_cargo_config(Platform::Linux))
113 .add_step(steps::cache_rust_dependencies_namespace())
114 .map(steps::install_linux_dependencies)
115 .add_step(steps::cargo_install_nextest())
116 .add_step(steps::clear_target_dir_if_large(Platform::Linux))
117 .add_step(steps::setup_sccache(Platform::Linux))
118 .add_step(match commit {
119 Some(commit) => script_step.add_env(("UNIT_EVAL_COMMIT", commit)),
120 None => script_step,
121 })
122 .add_step(steps::show_sccache_stats(Platform::Linux))
123 .add_step(steps::cleanup_cargo_config(Platform::Linux))
124}