diff --git a/crates/agent/src/edit_agent/evals.rs b/crates/agent/src/edit_agent/evals.rs index 0cf5c2e934f0c0cf33982fecf7a409d32245e381..5c30aa46c2fc802edf8e7d6b050af8465adc226f 100644 --- a/crates/agent/src/edit_agent/evals.rs +++ b/crates/agent/src/edit_agent/evals.rs @@ -88,7 +88,6 @@ fn eval_extract_handle_command_output() { // claude-sonnet-4 | 0.97 (2025-06-14) // gemini-2.5-pro-06-05 | 0.98 (2025-06-16) // gemini-2.5-flash | 0.11 (2025-05-22) - // gpt-4.1 | 1.00 (2025-05-22) let input_file_path = "root/blame.rs"; let input_file_content = include_str!("evals/fixtures/extract_handle_command_output/before.rs"); @@ -164,7 +163,6 @@ fn eval_delete_run_git_blame() { // claude-sonnet-4 | 0.96 (2025-06-14) // gemini-2.5-pro-06-05 | 1.0 (2025-06-16) // gemini-2.5-flash | - // gpt-4.1 | let input_file_path = "root/blame.rs"; let input_file_content = include_str!("evals/fixtures/delete_run_git_blame/before.rs"); @@ -230,7 +228,6 @@ fn eval_translate_doc_comments() { // claude-sonnet-4 | 1.0 (2025-06-14) // gemini-2.5-pro-preview-03-25 | 1.0 (2025-05-22) // gemini-2.5-flash-preview-04-17 | - // gpt-4.1 | let input_file_path = "root/canvas.rs"; let input_file_content = include_str!("evals/fixtures/translate_doc_comments/before.rs"); @@ -295,7 +292,6 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() { // claude-sonnet-4 | 0.11 (2025-06-14) // gemini-2.5-pro-preview-latest | 0.99 (2025-06-16) // gemini-2.5-flash-preview-04-17 | - // gpt-4.1 | let input_file_path = "root/lib.rs"; let input_file_content = @@ -419,7 +415,6 @@ fn eval_disable_cursor_blinking() { // claude-sonnet-4 | 0.81 (2025-07-14) // gemini-2.5-pro | 0.95 (2025-07-14) // gemini-2.5-flash-preview-04-17 | 0.78 (2025-07-14) - // gpt-4.1 | 0.00 (2025-07-14) (follows edit_description too literally) let input_file_path = "root/editor.rs"; let input_file_content = include_str!("evals/fixtures/disable_cursor_blinking/before.rs"); @@ -509,7 +504,6 @@ fn eval_from_pixels_constructor() { // claude-4.0-sonnet | 2025-06-14 | 0.99 // claude-3.7-sonnet | 2025-06-14 | 0.88 // gemini-2.5-pro-preview-06-05 | 2025-06-16 | 0.98 - // gpt-4.1 | let input_file_path = "root/canvas.rs"; let input_file_content = include_str!("evals/fixtures/from_pixels_constructor/before.rs"); @@ -718,7 +712,6 @@ fn eval_zode() { // claude-sonnet-4 | 1.0 (2025-06-14) // gemini-2.5-pro-preview-03-25 | 1.0 (2025-05-22) // gemini-2.5-flash-preview-04-17 | 1.0 (2025-05-22) - // gpt-4.1 | 1.0 (2025-05-22) let input_file_path = "root/zode.py"; let input_content = None; @@ -823,7 +816,6 @@ fn eval_add_overwrite_test() { // claude-sonnet-4 | 0.07 (2025-06-14) // gemini-2.5-pro-preview-03-25 | 0.35 (2025-05-22) // gemini-2.5-flash-preview-04-17 | - // gpt-4.1 | let input_file_path = "root/action_log.rs"; let input_file_content = include_str!("evals/fixtures/add_overwrite_test/before.rs"); @@ -1057,11 +1049,6 @@ fn eval_create_empty_file() { // claude-sonnet-4 | 1.00 (2025-06-14) // gemini-2.5-pro-preview-03-25 | 1.00 (2025-05-21) // gemini-2.5-flash-preview-04-17 | 1.00 (2025-05-21) - // gpt-4.1 | 1.00 (2025-05-21) - // - // - // TODO: gpt-4.1-mini errored 38 times: - // "data did not match any variant of untagged enum ResponseStreamResult" let input_file_content = None; let expected_output_content = String::new(); diff --git a/crates/agent_ui/src/acp/model_selector.rs b/crates/agent_ui/src/acp/model_selector.rs index 2de72d7bba2919e3519a3a0b3892c8bef7de43f3..6ac2c2ce0657365e461422d32233ee6f75589dba 100644 --- a/crates/agent_ui/src/acp/model_selector.rs +++ b/crates/agent_ui/src/acp/model_selector.rs @@ -632,36 +632,27 @@ mod tests { vec![ "Claude 3.7 Sonnet", "Claude 3.7 Sonnet Thinking", - "gpt-4.1", - "gpt-4.1-nano", + "gpt-5", + "gpt-5-mini", ], ), - ("openai", vec!["gpt-3.5-turbo", "gpt-4.1", "gpt-4.1-nano"]), + ("openai", vec!["gpt-3.5-turbo", "gpt-5", "gpt-5-mini"]), ("ollama", vec!["mistral", "deepseek"]), ]); // Results should preserve models order whenever possible. - // In the case below, `zed/gpt-4.1` and `openai/gpt-4.1` have identical - // similarity scores, but `zed/gpt-4.1` was higher in the models list, + // In the case below, `zed/gpt-5-mini` and `openai/gpt-5-mini` have identical + // similarity scores, but `zed/gpt-5-mini` was higher in the models list, // so it should appear first in the results. - let results = fuzzy_search(models.clone(), "41".into(), cx.executor()).await; + let results = fuzzy_search(models.clone(), "mini".into(), cx.executor()).await; assert_models_eq( results, - vec![ - ("zed", vec!["gpt-4.1", "gpt-4.1-nano"]), - ("openai", vec!["gpt-4.1", "gpt-4.1-nano"]), - ], + vec![("zed", vec!["gpt-5-mini"]), ("openai", vec!["gpt-5-mini"])], ); - // Fuzzy search - let results = fuzzy_search(models.clone(), "4n".into(), cx.executor()).await; - assert_models_eq( - results, - vec![ - ("zed", vec!["gpt-4.1-nano"]), - ("openai", vec!["gpt-4.1-nano"]), - ], - ); + // Fuzzy search - test with specific model name + let results = fuzzy_search(models.clone(), "mistral".into(), cx.executor()).await; + assert_models_eq(results, vec![("ollama", vec!["mistral"])]); } #[gpui::test] diff --git a/crates/agent_ui/src/agent_configuration/add_llm_provider_modal.rs b/crates/agent_ui/src/agent_configuration/add_llm_provider_modal.rs index 719ff77761562b972ef0ebd8ff6c0f2cf316d6e7..a3a389ac0a068d92112ee98caacb2986c499ad86 100644 --- a/crates/agent_ui/src/agent_configuration/add_llm_provider_modal.rs +++ b/crates/agent_ui/src/agent_configuration/add_llm_provider_modal.rs @@ -117,7 +117,7 @@ impl ModelInput { let model_name = single_line_input( "Model Name", - "e.g. gpt-4o, claude-opus-4, gemini-2.5-pro", + "e.g. gpt-5, claude-opus-4, gemini-2.5-pro", None, base_tab_index + 1, window, diff --git a/crates/agent_ui/src/language_model_selector.rs b/crates/agent_ui/src/language_model_selector.rs index 3a6505a2c1ad73574735abc076ff80d44af9a869..e3216466bd721a5fae61899834fcfb0cfd590891 100644 --- a/crates/agent_ui/src/language_model_selector.rs +++ b/crates/agent_ui/src/language_model_selector.rs @@ -752,11 +752,11 @@ mod tests { let models = create_models(vec![ ("zed", "Claude 3.7 Sonnet"), ("zed", "Claude 3.7 Sonnet Thinking"), - ("zed", "gpt-4.1"), - ("zed", "gpt-4.1-nano"), + ("zed", "gpt-5"), + ("zed", "gpt-5-mini"), ("openai", "gpt-3.5-turbo"), - ("openai", "gpt-4.1"), - ("openai", "gpt-4.1-nano"), + ("openai", "gpt-5"), + ("openai", "gpt-5-mini"), ("ollama", "mistral"), ("ollama", "deepseek"), ]); @@ -767,14 +767,14 @@ mod tests { ); // The order of models should be maintained, case doesn't matter - let results = matcher.exact_search("GPT-4.1"); + let results = matcher.exact_search("GPT-5"); assert_models_eq( results, vec![ - "zed/gpt-4.1", - "zed/gpt-4.1-nano", - "openai/gpt-4.1", - "openai/gpt-4.1-nano", + "zed/gpt-5", + "zed/gpt-5-mini", + "openai/gpt-5", + "openai/gpt-5-mini", ], ); } @@ -784,11 +784,11 @@ mod tests { let models = create_models(vec![ ("zed", "Claude 3.7 Sonnet"), ("zed", "Claude 3.7 Sonnet Thinking"), - ("zed", "gpt-4.1"), - ("zed", "gpt-4.1-nano"), + ("zed", "gpt-5"), + ("zed", "gpt-5-mini"), ("openai", "gpt-3.5-turbo"), - ("openai", "gpt-4.1"), - ("openai", "gpt-4.1-nano"), + ("openai", "gpt-5"), + ("openai", "gpt-5-mini"), ("ollama", "mistral"), ("ollama", "deepseek"), ]); @@ -799,27 +799,19 @@ mod tests { ); // Results should preserve models order whenever possible. - // In the case below, `zed/gpt-4.1` and `openai/gpt-4.1` have identical - // similarity scores, but `zed/gpt-4.1` was higher in the models list, + // In the case below, `zed/gpt-5-mini` and `openai/gpt-5-mini` have identical + // similarity scores, but `zed/gpt-5-mini` was higher in the models list, // so it should appear first in the results. - let results = matcher.fuzzy_search("41"); - assert_models_eq( - results, - vec![ - "zed/gpt-4.1", - "openai/gpt-4.1", - "zed/gpt-4.1-nano", - "openai/gpt-4.1-nano", - ], - ); + let results = matcher.fuzzy_search("mini"); + assert_models_eq(results, vec!["zed/gpt-5-mini", "openai/gpt-5-mini"]); // Model provider should be searchable as well let results = matcher.fuzzy_search("ol"); // meaning "ollama" assert_models_eq(results, vec!["ollama/mistral", "ollama/deepseek"]); - // Fuzzy search - let results = matcher.fuzzy_search("z4n"); - assert_models_eq(results, vec!["zed/gpt-4.1-nano"]); + // Fuzzy search - search for Claude to get the Thinking variant + let results = matcher.fuzzy_search("thinking"); + assert_models_eq(results, vec!["zed/Claude 3.7 Sonnet Thinking"]); } #[gpui::test] diff --git a/crates/eval/src/examples/grep_params_escapement.rs b/crates/eval/src/examples/grep_params_escapement.rs index 57086a1b9bd217e04072754539ddea20aa38c7a8..d4ba25cfcba60c66aa4a3b7fd1d93d778df1d9e8 100644 --- a/crates/eval/src/examples/grep_params_escapement.rs +++ b/crates/eval/src/examples/grep_params_escapement.rs @@ -15,7 +15,7 @@ This eval checks that the model doesn't use HTML escapement for characters like original +system_prompt change +tool description claude-opus-4 89% 92% 97%+ claude-sonnet-4 100% - gpt-4.1-mini 100% + gpt-5-mini 100% gemini-2.5-pro 98% */ diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs index d66861a8955819153134811d464929cfa8423d2c..a98dda194752dc74d896e3b76118453aa96e08a9 100644 --- a/crates/language_models/src/provider/open_ai.rs +++ b/crates/language_models/src/provider/open_ai.rs @@ -300,10 +300,7 @@ impl LanguageModel for OpenAiLanguageModel { fn supports_images(&self) -> bool { use open_ai::Model; match &self.model { - Model::FourOmni - | Model::FourOmniMini - | Model::FourPointOne - | Model::FourPointOneMini + Model::FourOmniMini | Model::FourPointOneNano | Model::Five | Model::FiveCodex @@ -313,8 +310,7 @@ impl LanguageModel for OpenAiLanguageModel { | Model::FivePointTwo | Model::FivePointTwoCodex | Model::O1 - | Model::O3 - | Model::O4Mini => true, + | Model::O3 => true, Model::ThreePointFiveTurbo | Model::Four | Model::FourTurbo @@ -1155,7 +1151,7 @@ pub fn count_open_ai_tokens( match model { Model::Custom { max_tokens, .. } => { let model = if max_tokens >= 100_000 { - // If the max tokens is 100k or more, it is likely the o200k_base tokenizer from gpt4o + // If the max tokens is 100k or more, it likely uses the o200k_base tokenizer "gpt-4o" } else { // Otherwise fallback to gpt-4, since only cl100k_base and o200k_base are @@ -1171,15 +1167,11 @@ pub fn count_open_ai_tokens( Model::ThreePointFiveTurbo | Model::Four | Model::FourTurbo - | Model::FourOmni | Model::FourOmniMini - | Model::FourPointOne - | Model::FourPointOneMini | Model::FourPointOneNano | Model::O1 | Model::O3 | Model::O3Mini - | Model::O4Mini | Model::Five | Model::FiveCodex | Model::FiveMini diff --git a/crates/open_ai/src/open_ai.rs b/crates/open_ai/src/open_ai.rs index 073217e777c39f374560c208923848ea88e11a6a..158ec689788a21216f16ffd14e34771d68f544e9 100644 --- a/crates/open_ai/src/open_ai.rs +++ b/crates/open_ai/src/open_ai.rs @@ -63,15 +63,8 @@ pub enum Model { Four, #[serde(rename = "gpt-4-turbo")] FourTurbo, - #[serde(rename = "gpt-4o")] - #[default] - FourOmni, #[serde(rename = "gpt-4o-mini")] FourOmniMini, - #[serde(rename = "gpt-4.1")] - FourPointOne, - #[serde(rename = "gpt-4.1-mini")] - FourPointOneMini, #[serde(rename = "gpt-4.1-nano")] FourPointOneNano, #[serde(rename = "o1")] @@ -80,13 +73,12 @@ pub enum Model { O3Mini, #[serde(rename = "o3")] O3, - #[serde(rename = "o4-mini")] - O4Mini, #[serde(rename = "gpt-5")] Five, #[serde(rename = "gpt-5-codex")] FiveCodex, #[serde(rename = "gpt-5-mini")] + #[default] FiveMini, #[serde(rename = "gpt-5-nano")] FiveNano, @@ -116,8 +108,7 @@ const fn default_supports_chat_completions() -> bool { impl Model { pub fn default_fast() -> Self { - // TODO: Replace with FiveMini since all other models are deprecated - Self::FourPointOneMini + Self::FiveMini } pub fn from_id(id: &str) -> Result { @@ -125,15 +116,11 @@ impl Model { "gpt-3.5-turbo" => Ok(Self::ThreePointFiveTurbo), "gpt-4" => Ok(Self::Four), "gpt-4-turbo-preview" => Ok(Self::FourTurbo), - "gpt-4o" => Ok(Self::FourOmni), "gpt-4o-mini" => Ok(Self::FourOmniMini), - "gpt-4.1" => Ok(Self::FourPointOne), - "gpt-4.1-mini" => Ok(Self::FourPointOneMini), "gpt-4.1-nano" => Ok(Self::FourPointOneNano), "o1" => Ok(Self::O1), "o3-mini" => Ok(Self::O3Mini), "o3" => Ok(Self::O3), - "o4-mini" => Ok(Self::O4Mini), "gpt-5" => Ok(Self::Five), "gpt-5-codex" => Ok(Self::FiveCodex), "gpt-5-mini" => Ok(Self::FiveMini), @@ -150,15 +137,11 @@ impl Model { Self::ThreePointFiveTurbo => "gpt-3.5-turbo", Self::Four => "gpt-4", Self::FourTurbo => "gpt-4-turbo", - Self::FourOmni => "gpt-4o", Self::FourOmniMini => "gpt-4o-mini", - Self::FourPointOne => "gpt-4.1", - Self::FourPointOneMini => "gpt-4.1-mini", Self::FourPointOneNano => "gpt-4.1-nano", Self::O1 => "o1", Self::O3Mini => "o3-mini", Self::O3 => "o3", - Self::O4Mini => "o4-mini", Self::Five => "gpt-5", Self::FiveCodex => "gpt-5-codex", Self::FiveMini => "gpt-5-mini", @@ -175,15 +158,11 @@ impl Model { Self::ThreePointFiveTurbo => "gpt-3.5-turbo", Self::Four => "gpt-4", Self::FourTurbo => "gpt-4-turbo", - Self::FourOmni => "gpt-4o", Self::FourOmniMini => "gpt-4o-mini", - Self::FourPointOne => "gpt-4.1", - Self::FourPointOneMini => "gpt-4.1-mini", Self::FourPointOneNano => "gpt-4.1-nano", Self::O1 => "o1", Self::O3Mini => "o3-mini", Self::O3 => "o3", - Self::O4Mini => "o4-mini", Self::Five => "gpt-5", Self::FiveCodex => "gpt-5-codex", Self::FiveMini => "gpt-5-mini", @@ -191,9 +170,7 @@ impl Model { Self::FivePointOne => "gpt-5.1", Self::FivePointTwo => "gpt-5.2", Self::FivePointTwoCodex => "gpt-5.2-codex", - Self::Custom { - name, display_name, .. - } => display_name.as_ref().unwrap_or(name), + Self::Custom { display_name, .. } => display_name.as_deref().unwrap_or(&self.id()), } } @@ -202,15 +179,11 @@ impl Model { Self::ThreePointFiveTurbo => 16_385, Self::Four => 8_192, Self::FourTurbo => 128_000, - Self::FourOmni => 128_000, Self::FourOmniMini => 128_000, - Self::FourPointOne => 1_047_576, - Self::FourPointOneMini => 1_047_576, Self::FourPointOneNano => 1_047_576, Self::O1 => 200_000, Self::O3Mini => 200_000, Self::O3 => 200_000, - Self::O4Mini => 200_000, Self::Five => 272_000, Self::FiveCodex => 272_000, Self::FiveMini => 272_000, @@ -230,15 +203,11 @@ impl Model { Self::ThreePointFiveTurbo => Some(4_096), Self::Four => Some(8_192), Self::FourTurbo => Some(4_096), - Self::FourOmni => Some(16_384), Self::FourOmniMini => Some(16_384), - Self::FourPointOne => Some(32_768), - Self::FourPointOneMini => Some(32_768), Self::FourPointOneNano => Some(32_768), Self::O1 => Some(100_000), Self::O3Mini => Some(100_000), Self::O3 => Some(100_000), - Self::O4Mini => Some(100_000), Self::Five => Some(128_000), Self::FiveCodex => Some(128_000), Self::FiveMini => Some(128_000), @@ -277,10 +246,7 @@ impl Model { Self::ThreePointFiveTurbo | Self::Four | Self::FourTurbo - | Self::FourOmni | Self::FourOmniMini - | Self::FourPointOne - | Self::FourPointOneMini | Self::FourPointOneNano | Self::Five | Self::FiveCodex @@ -289,7 +255,7 @@ impl Model { | Self::FivePointTwo | Self::FivePointTwoCodex | Self::FiveNano => true, - Self::O1 | Self::O3 | Self::O3Mini | Self::O4Mini | Model::Custom { .. } => false, + Self::O1 | Self::O3 | Self::O3Mini | Model::Custom { .. } => false, } }