From da3bab18fecbf132afb8472265df91a7a9d7dc73 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Mon, 1 Dec 2025 11:41:15 -0500 Subject: [PATCH] Unit eval GPT-5 and Gemini 3 Pro (#43916) Follow-up to #43907 Release Notes: - N/A --- .github/workflows/run_cron_unit_evals.yml | 2 ++ tooling/xtask/src/tasks/workflows/run_agent_evals.rs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/run_cron_unit_evals.yml b/.github/workflows/run_cron_unit_evals.yml index 5f1d13b5b8311a67ebff207d1211acffbdff5d6e..cdfb51cc5b351d1079369aef3abfa845ca7d0428 100644 --- a/.github/workflows/run_cron_unit_evals.yml +++ b/.github/workflows/run_cron_unit_evals.yml @@ -18,6 +18,8 @@ jobs: model: - anthropic/claude-sonnet-4-5-latest - anthropic/claude-opus-4-5-latest + - google/gemini-3-pro + - openai/gpt-5 fail-fast: false steps: - name: steps::checkout_repo diff --git a/tooling/xtask/src/tasks/workflows/run_agent_evals.rs b/tooling/xtask/src/tasks/workflows/run_agent_evals.rs index 34a7c6885db061191f5c3eac447838439708fbfe..667ea6a90bf0ab07f8c07cec87fcbd832db02109 100644 --- a/tooling/xtask/src/tasks/workflows/run_agent_evals.rs +++ b/tooling/xtask/src/tasks/workflows/run_agent_evals.rs @@ -123,6 +123,8 @@ fn cron_unit_evals() -> NamedJob { const UNIT_EVAL_MODELS: &[&str] = &[ "anthropic/claude-sonnet-4-5-latest", "anthropic/claude-opus-4-5-latest", + "google/gemini-3-pro", + "openai/gpt-5", ]; fn cron_unit_evals_job() -> Job {