1use gh_workflow::{
2 Event, Expression, Job, PullRequest, PullRequestType, Run, Schedule, Step, Use, Workflow,
3 WorkflowDispatch,
4};
5
6use crate::tasks::workflows::{
7 runners::{self, Platform},
8 steps::{self, FluentBuilder as _, NamedJob, named, setup_cargo_config},
9 vars,
10};
11
12pub(crate) fn run_agent_evals() -> Workflow {
13 let agent_evals = agent_evals();
14
15 named::workflow()
16 .on(Event::default()
17 .schedule([Schedule::default().cron("0 0 * * *")])
18 .pull_request(PullRequest::default().add_branch("**").types([
19 PullRequestType::Synchronize,
20 PullRequestType::Reopened,
21 PullRequestType::Labeled,
22 ]))
23 .workflow_dispatch(WorkflowDispatch::default()))
24 .concurrency(vars::one_workflow_per_non_main_branch())
25 .add_env(("CARGO_TERM_COLOR", "always"))
26 .add_env(("CARGO_INCREMENTAL", 0))
27 .add_env(("RUST_BACKTRACE", 1))
28 .add_env(("ANTHROPIC_API_KEY", vars::ANTHROPIC_API_KEY))
29 .add_env(("ZED_CLIENT_CHECKSUM_SEED", vars::ZED_CLIENT_CHECKSUM_SEED))
30 .add_env(("ZED_EVAL_TELEMETRY", 1))
31 .add_job(agent_evals.name, agent_evals.job)
32}
33
34fn agent_evals() -> NamedJob {
35 fn run_eval() -> Step<Run> {
36 named::bash("cargo run --package=eval -- --repetitions=8 --concurrency=1")
37 }
38
39 named::job(
40 Job::default()
41 .cond(Expression::new(indoc::indoc!{r#"
42 github.repository_owner == 'zed-industries' &&
43 (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
44 "#}))
45 .runs_on(runners::LINUX_DEFAULT)
46 .timeout_minutes(60_u32)
47 .add_step(steps::checkout_repo())
48 .add_step(steps::cache_rust_dependencies_namespace())
49 .map(steps::install_linux_dependencies)
50 .add_step(setup_cargo_config(Platform::Linux))
51 .add_step(steps::script("cargo build --package=eval"))
52 .add_step(run_eval())
53 .add_step(steps::cleanup_cargo_config(Platform::Linux))
54 )
55}
56
57pub(crate) fn run_unit_evals() -> Workflow {
58 let unit_evals = unit_evals();
59
60 named::workflow()
61 .on(Event::default()
62 .schedule([
63 // GitHub might drop jobs at busy times, so we choose a random time in the middle of the night.
64 Schedule::default().cron("47 1 * * 2"),
65 ])
66 .workflow_dispatch(WorkflowDispatch::default()))
67 .concurrency(vars::one_workflow_per_non_main_branch())
68 .add_env(("CARGO_TERM_COLOR", "always"))
69 .add_env(("CARGO_INCREMENTAL", 0))
70 .add_env(("RUST_BACKTRACE", 1))
71 .add_env(("ZED_CLIENT_CHECKSUM_SEED", vars::ZED_CLIENT_CHECKSUM_SEED))
72 .add_job(unit_evals.name, unit_evals.job)
73}
74
75fn unit_evals() -> NamedJob {
76 fn send_failure_to_slack() -> Step<Use> {
77 named::uses(
78 "slackapi",
79 "slack-github-action",
80 "b0fa283ad8fea605de13dc3f449259339835fc52",
81 )
82 .if_condition(Expression::new("${{ failure() }}"))
83 .add_with(("method", "chat.postMessage"))
84 .add_with(("token", vars::SLACK_APP_ZED_UNIT_EVALS_BOT_TOKEN))
85 .add_with(("payload", indoc::indoc!{r#"
86 channel: C04UDRNNJFQ
87 text: "Unit Evals Failed: https://github.com/zed-industries/zed/actions/runs/${{ github.run_id }}"
88 "#}))
89 }
90
91 named::job(
92 Job::default()
93 .runs_on(runners::LINUX_DEFAULT)
94 .add_step(steps::checkout_repo())
95 .add_step(steps::setup_cargo_config(Platform::Linux))
96 .add_step(steps::cache_rust_dependencies_namespace())
97 .map(steps::install_linux_dependencies)
98 .add_step(steps::cargo_install_nextest(Platform::Linux))
99 .add_step(steps::clear_target_dir_if_large(Platform::Linux))
100 .add_step(
101 steps::script("./script/run-unit-evals")
102 .add_env(("ANTHROPIC_API_KEY", vars::ANTHROPIC_API_KEY)),
103 )
104 .add_step(send_failure_to_slack())
105 .add_step(steps::cleanup_cargo_config(Platform::Linux)),
106 )
107}