1use gh_workflow::{
2 Event, Expression, Job, PullRequest, PullRequestType, Run, Schedule, Step, Use, Workflow,
3 WorkflowDispatch,
4};
5
6use crate::tasks::workflows::{
7 runners::{self, Platform},
8 steps::{self, FluentBuilder as _, NamedJob, named, setup_cargo_config},
9 vars,
10};
11
12pub(crate) fn run_agent_evals() -> Workflow {
13 let agent_evals = agent_evals();
14
15 named::workflow()
16 .on(Event::default()
17 .schedule([Schedule::default().cron("0 0 * * *")])
18 .pull_request(PullRequest::default().add_branch("**").types([
19 PullRequestType::Synchronize,
20 PullRequestType::Reopened,
21 PullRequestType::Labeled,
22 ]))
23 .workflow_dispatch(WorkflowDispatch::default()))
24 .concurrency(vars::one_workflow_per_non_main_branch())
25 .add_env(("CARGO_TERM_COLOR", "always"))
26 .add_env(("CARGO_INCREMENTAL", 0))
27 .add_env(("RUST_BACKTRACE", 1))
28 .add_env(("ANTHROPIC_API_KEY", "${{ secrets.ANTHROPIC_API_KEY }}"))
29 .add_env((
30 "ZED_CLIENT_CHECKSUM_SEED",
31 "${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}",
32 ))
33 .add_env(("ZED_EVAL_TELEMETRY", 1))
34 .add_job(agent_evals.name, agent_evals.job)
35}
36
37fn agent_evals() -> NamedJob {
38 fn run_eval() -> Step<Run> {
39 named::bash("cargo run --package=eval -- --repetitions=8 --concurrency=1")
40 }
41
42 named::job(
43 Job::default()
44 .cond(Expression::new(indoc::indoc!{r#"
45 github.repository_owner == 'zed-industries' &&
46 (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
47 "#}))
48 .runs_on(runners::LINUX_DEFAULT)
49 .timeout_minutes(60_u32)
50 .add_step(steps::checkout_repo())
51 .add_step(steps::cache_rust_dependencies())
52 .map(steps::install_linux_dependencies)
53 .add_step(setup_cargo_config(Platform::Linux))
54 .add_step(steps::script("cargo build --package=eval"))
55 .add_step(run_eval())
56 .add_step(steps::cleanup_cargo_config(Platform::Linux))
57 )
58}
59
60pub(crate) fn run_unit_evals() -> Workflow {
61 let unit_evals = unit_evals();
62
63 named::workflow()
64 .on(Event::default()
65 .schedule([
66 // GitHub might drop jobs at busy times, so we choose a random time in the middle of the night.
67 Schedule::default().cron("47 1 * * 2"),
68 ])
69 .workflow_dispatch(WorkflowDispatch::default()))
70 .concurrency(vars::one_workflow_per_non_main_branch())
71 .add_env(("CARGO_TERM_COLOR", "always"))
72 .add_env(("CARGO_INCREMENTAL", 0))
73 .add_env(("RUST_BACKTRACE", 1))
74 .add_env((
75 "ZED_CLIENT_CHECKSUM_SEED",
76 "${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}",
77 ))
78 .add_job(unit_evals.name, unit_evals.job)
79}
80
81fn unit_evals() -> NamedJob {
82 fn send_failure_to_slack() -> Step<Use> {
83 named::uses(
84 "slackapi",
85 "slack-github-action",
86 "b0fa283ad8fea605de13dc3f449259339835fc52",
87 )
88 .if_condition(Expression::new("${{ failure() }}"))
89 .add_with(("method", "chat.postMessage"))
90 .add_with(("token", "${{ secrets.SLACK_APP_ZED_UNIT_EVALS_BOT_TOKEN }}"))
91 .add_with(("payload", indoc::indoc!{r#"
92 channel: C04UDRNNJFQ
93 text: "Unit Evals Failed: https://github.com/zed-industries/zed/actions/runs/${{ github.run_id }}"
94 "#}))
95 }
96
97 named::job(
98 Job::default()
99 .runs_on(runners::LINUX_DEFAULT)
100 .add_step(steps::checkout_repo())
101 .add_step(steps::setup_cargo_config(Platform::Linux))
102 .add_step(steps::cache_rust_dependencies())
103 .map(steps::install_linux_dependencies)
104 .add_step(steps::cargo_install_nextest(Platform::Linux))
105 .add_step(steps::clear_target_dir_if_large(Platform::Linux))
106 .add_step(
107 steps::script("./script/run-unit-evals")
108 .add_env(("ANTHROPIC_API_KEY", "${{ secrets.ANTHROPIC_API_KEY }}")),
109 )
110 .add_step(send_failure_to_slack())
111 .add_step(steps::cleanup_cargo_config(Platform::Linux)),
112 )
113}