@@ -8,22 +8,16 @@ env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
ZED_EVAL_TELEMETRY: '1'
+ MODEL_NAME: ${{ inputs.model_name }}
on:
- pull_request:
- types:
- - synchronize
- - reopened
- - labeled
- branches:
- - '**'
- schedule:
- - cron: 0 0 * * *
- workflow_dispatch: {}
+ workflow_dispatch:
+ inputs:
+ model_name:
+ description: model_name
+ required: true
+ type: string
jobs:
agent_evals:
- if: |
- github.repository_owner == 'zed-industries' &&
- (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
runs-on: namespace-profile-16x32-ubuntu-2204
steps:
- name: steps::checkout_repo
@@ -52,14 +46,14 @@ jobs:
run: cargo build --package=eval
shell: bash -euxo pipefail {0}
- name: run_agent_evals::agent_evals::run_eval
- run: cargo run --package=eval -- --repetitions=8 --concurrency=1
+ run: cargo run --package=eval -- --repetitions=8 --concurrency=1 --model "${MODEL_NAME}"
shell: bash -euxo pipefail {0}
- name: steps::cleanup_cargo_config
if: always()
run: |
rm -rf ./../.cargo
shell: bash -euxo pipefail {0}
- timeout-minutes: 60
+ timeout-minutes: 600
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
cancel-in-progress: true
@@ -1,26 +1,19 @@
-use gh_workflow::{
- Event, Expression, Job, PullRequest, PullRequestType, Run, Schedule, Step, Use, Workflow,
- WorkflowDispatch,
-};
+use gh_workflow::{Event, Expression, Job, Run, Schedule, Step, Use, Workflow, WorkflowDispatch};
use crate::tasks::workflows::{
runners::{self, Platform},
steps::{self, FluentBuilder as _, NamedJob, named, setup_cargo_config},
- vars,
+ vars::{self, Input},
};
pub(crate) fn run_agent_evals() -> Workflow {
let agent_evals = agent_evals();
+ let model_name = Input::string("model_name", None);
named::workflow()
- .on(Event::default()
- .schedule([Schedule::default().cron("0 0 * * *")])
- .pull_request(PullRequest::default().add_branch("**").types([
- PullRequestType::Synchronize,
- PullRequestType::Reopened,
- PullRequestType::Labeled,
- ]))
- .workflow_dispatch(WorkflowDispatch::default()))
+ .on(Event::default().workflow_dispatch(
+ WorkflowDispatch::default().add_input(model_name.name, model_name.input()),
+ ))
.concurrency(vars::one_workflow_per_non_main_branch())
.add_env(("CARGO_TERM_COLOR", "always"))
.add_env(("CARGO_INCREMENTAL", 0))
@@ -28,29 +21,28 @@ pub(crate) fn run_agent_evals() -> Workflow {
.add_env(("ANTHROPIC_API_KEY", vars::ANTHROPIC_API_KEY))
.add_env(("ZED_CLIENT_CHECKSUM_SEED", vars::ZED_CLIENT_CHECKSUM_SEED))
.add_env(("ZED_EVAL_TELEMETRY", 1))
+ .add_env(("MODEL_NAME", model_name.to_string()))
.add_job(agent_evals.name, agent_evals.job)
}
fn agent_evals() -> NamedJob {
fn run_eval() -> Step<Run> {
- named::bash("cargo run --package=eval -- --repetitions=8 --concurrency=1")
+ named::bash(
+ "cargo run --package=eval -- --repetitions=8 --concurrency=1 --model \"${MODEL_NAME}\"",
+ )
}
named::job(
Job::default()
- .cond(Expression::new(indoc::indoc!{r#"
- github.repository_owner == 'zed-industries' &&
- (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
- "#}))
.runs_on(runners::LINUX_DEFAULT)
- .timeout_minutes(60_u32)
+ .timeout_minutes(60_u32 * 10)
.add_step(steps::checkout_repo())
.add_step(steps::cache_rust_dependencies_namespace())
.map(steps::install_linux_dependencies)
.add_step(setup_cargo_config(Platform::Linux))
.add_step(steps::script("cargo build --package=eval"))
.add_step(run_eval())
- .add_step(steps::cleanup_cargo_config(Platform::Linux))
+ .add_step(steps::cleanup_cargo_config(Platform::Linux)),
)
}