From 6a749380aa45d2c2e6eb3307e67ab6739cc0a1ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=20Houl=C3=A9?= <13155277+tomhoule@users.noreply.github.com> Date: Thu, 26 Feb 2026 21:19:41 +0100 Subject: [PATCH] Add fast mode toggle in agent panel (#49714) This is a staff only toggle for now, since the consequences of activating it are not obvious and quite dire (tokens costs 6 times more). Also, persist thinking, thinking effort and fast mode in DbThread so the thinking mode toggle and thinking effort are persisted. Release Notes: - Agent: The thinking mode toggle and thinking effort are now persisted when selecting a thread from history. --- assets/icons/fast_forward.svg | 4 ++ assets/icons/fast_forward_off.svg | 5 ++ assets/keymaps/default-linux.json | 1 + assets/keymaps/default-macos.json | 1 + assets/keymaps/default-windows.json | 1 + crates/agent/src/db.rs | 16 +++++ crates/agent/src/edit_agent.rs | 1 + crates/agent/src/thread.rs | 33 +++++---- crates/agent/src/thread_store.rs | 3 + crates/agent_ui/src/agent_ui.rs | 2 + crates/agent_ui/src/buffer_codegen.rs | 2 + crates/agent_ui/src/connection_view.rs | 4 +- .../src/connection_view/thread_view.rs | 67 ++++++++++++++++++- .../agent_ui/src/terminal_inline_assistant.rs | 1 + crates/anthropic/src/anthropic.rs | 10 +++ .../assistant_text_thread/src/text_thread.rs | 1 + .../cloud_llm_client/src/cloud_llm_client.rs | 2 + .../src/anthropic_client.rs | 3 + crates/eval/src/instance.rs | 1 + crates/git_ui/src/git_panel.rs | 1 + crates/icons/src/icons.rs | 2 + crates/language_model/src/language_model.rs | 4 ++ crates/language_model/src/request.rs | 27 ++++++++ .../language_models/src/provider/anthropic.rs | 3 + crates/language_models/src/provider/cloud.rs | 4 ++ .../src/provider/copilot_chat.rs | 1 + .../language_models/src/provider/mistral.rs | 2 + .../language_models/src/provider/open_ai.rs | 3 + crates/rules_library/src/rules_library.rs | 1 + 29 files changed, 190 insertions(+), 16 deletions(-) create mode 100644 assets/icons/fast_forward.svg create mode 100644 assets/icons/fast_forward_off.svg diff --git a/assets/icons/fast_forward.svg b/assets/icons/fast_forward.svg new file mode 100644 index 0000000000000000000000000000000000000000..240bc65aca3558561bb52f2f8c5e860d38596223 --- /dev/null +++ b/assets/icons/fast_forward.svg @@ -0,0 +1,4 @@ + + + + diff --git a/assets/icons/fast_forward_off.svg b/assets/icons/fast_forward_off.svg new file mode 100644 index 0000000000000000000000000000000000000000..8ea7c41c6582b031f066f590dd425641945aadc9 --- /dev/null +++ b/assets/icons/fast_forward_off.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/assets/keymaps/default-linux.json b/assets/keymaps/default-linux.json index f3247e936f2b6d2d5ee5275304ea445729046afa..9b8f2d337b1f1073bca818cf0b9c66773a3ce4e9 100644 --- a/assets/keymaps/default-linux.json +++ b/assets/keymaps/default-linux.json @@ -333,6 +333,7 @@ "ctrl-alt-k": "agent::ToggleThinkingMode", "ctrl-alt-'": "agent::ToggleThinkingEffortMenu", "ctrl-'": "agent::CycleThinkingEffort", + "ctrl-alt-.": "agent::ToggleFastMode", }, }, { diff --git a/assets/keymaps/default-macos.json b/assets/keymaps/default-macos.json index 77e01368462cdfcce24cf1cba39d6a2a11cdcce0..5f210cb4da35f9909767035c941289ee24a2ee3f 100644 --- a/assets/keymaps/default-macos.json +++ b/assets/keymaps/default-macos.json @@ -377,6 +377,7 @@ "cmd-alt-k": "agent::ToggleThinkingMode", "cmd-alt-'": "agent::ToggleThinkingEffortMenu", "ctrl-'": "agent::CycleThinkingEffort", + "cmd-alt-.": "agent::ToggleFastMode", }, }, { diff --git a/assets/keymaps/default-windows.json b/assets/keymaps/default-windows.json index 51b221c8389d1588d80a8186ddceb68e8cb025c7..19f75f858cd45192c4cf30dd6bd0799046c26268 100644 --- a/assets/keymaps/default-windows.json +++ b/assets/keymaps/default-windows.json @@ -335,6 +335,7 @@ "ctrl-alt-k": "agent::ToggleThinkingMode", "ctrl-alt-'": "agent::ToggleThinkingEffortMenu", "ctrl-'": "agent::CycleThinkingEffort", + "ctrl-alt-.": "agent::ToggleFastMode", }, }, { diff --git a/crates/agent/src/db.rs b/crates/agent/src/db.rs index fa4b37dba3e789b499bfe5db4f0b76ccf12e5a09..7dba2f078adac47b951dcec9dd30883fdea618ad 100644 --- a/crates/agent/src/db.rs +++ b/crates/agent/src/db.rs @@ -8,6 +8,7 @@ use collections::{HashMap, IndexMap}; use futures::{FutureExt, future::Shared}; use gpui::{BackgroundExecutor, Global, Task}; use indoc::indoc; +use language_model::Speed; use parking_lot::Mutex; use serde::{Deserialize, Serialize}; use sqlez::{ @@ -53,6 +54,12 @@ pub struct DbThread { pub imported: bool, #[serde(default)] pub subagent_context: Option, + #[serde(default)] + pub speed: Option, + #[serde(default)] + pub thinking_enabled: bool, + #[serde(default)] + pub thinking_effort: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -91,6 +98,9 @@ impl SharedThread { profile: None, imported: true, subagent_context: None, + speed: None, + thinking_enabled: false, + thinking_effort: None, } } @@ -265,6 +275,9 @@ impl DbThread { profile: thread.profile, imported: false, subagent_context: None, + speed: None, + thinking_enabled: false, + thinking_effort: None, }) } } @@ -570,6 +583,9 @@ mod tests { profile: None, imported: false, subagent_context: None, + speed: None, + thinking_enabled: false, + thinking_effort: None, } } diff --git a/crates/agent/src/edit_agent.rs b/crates/agent/src/edit_agent.rs index 9f2f2f1877e20620373b1a7aacbf2f7b3a407bfd..288a3178f3c4501ae9de65d19624b66cbda2548d 100644 --- a/crates/agent/src/edit_agent.rs +++ b/crates/agent/src/edit_agent.rs @@ -749,6 +749,7 @@ impl EditAgent { temperature: None, thinking_allowed: self.thinking_allowed, thinking_effort: None, + speed: None, }; Ok(self.model.stream_completion_text(request, cx).await?.stream) diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index cfac50aba7daa9bf799b561bb06f14309bcf53dd..9fccf441a54bb1c3705ad1db6b6cc1ac079e38ff 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -40,7 +40,8 @@ use language_model::{ LanguageModelImage, LanguageModelProviderId, LanguageModelRegistry, LanguageModelRequest, LanguageModelRequestMessage, LanguageModelRequestTool, LanguageModelToolResult, LanguageModelToolResultContent, LanguageModelToolSchemaFormat, LanguageModelToolUse, - LanguageModelToolUseId, Role, SelectedModel, StopReason, TokenUsage, ZED_CLOUD_PROVIDER_ID, + LanguageModelToolUseId, Role, SelectedModel, Speed, StopReason, TokenUsage, + ZED_CLOUD_PROVIDER_ID, }; use project::Project; use prompt_store::ProjectContext; @@ -884,6 +885,7 @@ pub struct Thread { summarization_model: Option>, thinking_enabled: bool, thinking_effort: Option, + speed: Option, prompt_capabilities_tx: watch::Sender, pub(crate) prompt_capabilities_rx: watch::Receiver, pub(crate) project: Entity, @@ -977,6 +979,7 @@ impl Thread { model, summarization_model: None, thinking_enabled: enable_thinking, + speed: None, thinking_effort, prompt_capabilities_tx, prompt_capabilities_rx, @@ -1134,10 +1137,6 @@ impl Thread { let profile_id = db_thread .profile .unwrap_or_else(|| settings.default_profile.clone()); - let thinking_effort = settings - .default_model - .as_ref() - .and_then(|model| model.effort.clone()); let mut model = LanguageModelRegistry::global(cx).update(cx, |registry, cx| { db_thread @@ -1166,12 +1165,6 @@ impl Thread { watch::channel(Self::prompt_capabilities(model.as_deref())); let action_log = cx.new(|_| ActionLog::new(project.clone())); - // TODO: We should serialize the user's configured thinking parameter on `DbThread` - // rather than deriving it from the model's capability. A user may have explicitly - // toggled thinking off for a model that supports it, and we'd lose that preference here. - let enable_thinking = model - .as_deref() - .is_some_and(|model| model.supports_thinking()); Self { id, @@ -1199,8 +1192,9 @@ impl Thread { templates, model, summarization_model: None, - thinking_enabled: enable_thinking, - thinking_effort, + thinking_enabled: db_thread.thinking_enabled, + thinking_effort: db_thread.thinking_effort, + speed: db_thread.speed, project, action_log, updated_at: db_thread.updated_at, @@ -1230,6 +1224,9 @@ impl Thread { profile: Some(self.profile_id.clone()), imported: self.imported, subagent_context: self.subagent_context.clone(), + speed: self.speed, + thinking_enabled: self.thinking_enabled, + thinking_effort: self.thinking_effort.clone(), }; cx.background_spawn(async move { @@ -1318,6 +1315,15 @@ impl Thread { cx.notify(); } + pub fn speed(&self) -> Option { + self.speed + } + + pub fn set_speed(&mut self, speed: Speed, cx: &mut Context) { + self.speed = Some(speed); + cx.notify(); + } + pub fn last_message(&self) -> Option { if let Some(message) = self.pending_message.clone() { Some(Message::Agent(message)) @@ -2485,6 +2491,7 @@ impl Thread { temperature: AgentSettings::temperature_for_model(model, cx), thinking_allowed: self.thinking_enabled, thinking_effort: self.thinking_effort.clone(), + speed: self.speed(), }; log::debug!("Completion request built successfully"); diff --git a/crates/agent/src/thread_store.rs b/crates/agent/src/thread_store.rs index d5526b0953cb4342fcbf3b13a883385dfcf609ea..3769355bc8d3495f614ccd6787bb3a33d58e8f2f 100644 --- a/crates/agent/src/thread_store.rs +++ b/crates/agent/src/thread_store.rs @@ -133,6 +133,9 @@ mod tests { profile: None, imported: false, subagent_context: None, + speed: None, + thinking_enabled: false, + thinking_effort: None, } } diff --git a/crates/agent_ui/src/agent_ui.rs b/crates/agent_ui/src/agent_ui.rs index 1eca5a12c12cfc5e96faa83239735a1a1c9522cd..967b53bd200e6dc8e863a86602b2ac5f590406e2 100644 --- a/crates/agent_ui/src/agent_ui.rs +++ b/crates/agent_ui/src/agent_ui.rs @@ -160,6 +160,8 @@ actions!( CycleThinkingEffort, /// Toggles the thinking effort selector menu open or closed. ToggleThinkingEffortMenu, + /// Toggles fast mode for models that support it. + ToggleFastMode, ] ); diff --git a/crates/agent_ui/src/buffer_codegen.rs b/crates/agent_ui/src/buffer_codegen.rs index 0376fda47e0b20820e19cf9cc2b09493b06898b8..4f7bf084b7e96a14e6ecaafb04adfdbb6712e574 100644 --- a/crates/agent_ui/src/buffer_codegen.rs +++ b/crates/agent_ui/src/buffer_codegen.rs @@ -547,6 +547,7 @@ impl CodegenAlternative { messages, thinking_allowed: false, thinking_effort: None, + speed: None, } })) } @@ -626,6 +627,7 @@ impl CodegenAlternative { messages: vec![request_message], thinking_allowed: false, thinking_effort: None, + speed: None, } })) } diff --git a/crates/agent_ui/src/connection_view.rs b/crates/agent_ui/src/connection_view.rs index 8d38a15544f193e6b8a7aa458a24720d19163cd5..df06ed2bae7f77cfb366f3499097ab8c43bdf78c 100644 --- a/crates/agent_ui/src/connection_view.rs +++ b/crates/agent_ui/src/connection_view.rs @@ -75,8 +75,8 @@ use crate::{ ClearMessageQueue, CycleFavoriteModels, CycleModeSelector, CycleThinkingEffort, EditFirstQueuedMessage, ExpandMessageEditor, Follow, KeepAll, NewThread, OpenAddContextMenu, OpenAgentDiff, OpenHistory, RejectAll, RejectOnce, RemoveFirstQueuedMessage, - SelectPermissionGranularity, SendImmediately, SendNextQueuedMessage, ToggleProfileSelector, - ToggleThinkingEffortMenu, ToggleThinkingMode, UndoLastReject, + SelectPermissionGranularity, SendImmediately, SendNextQueuedMessage, ToggleFastMode, + ToggleProfileSelector, ToggleThinkingEffortMenu, ToggleThinkingMode, UndoLastReject, }; const STOPWATCH_THRESHOLD: Duration = Duration::from_secs(30); diff --git a/crates/agent_ui/src/connection_view/thread_view.rs b/crates/agent_ui/src/connection_view/thread_view.rs index ac605c7990359c90d172d083388be39476cd9656..499b11e5c08bd9b2c811e4cf5119bf7f71663c4b 100644 --- a/crates/agent_ui/src/connection_view/thread_view.rs +++ b/crates/agent_ui/src/connection_view/thread_view.rs @@ -1,6 +1,6 @@ use cloud_api_types::{SubmitAgentThreadFeedbackBody, SubmitAgentThreadFeedbackCommentsBody}; use gpui::{Corner, List}; -use language_model::LanguageModelEffortLevel; +use language_model::{LanguageModelEffortLevel, Speed}; use settings::update_settings_file; use ui::{ButtonLike, SplitButton, SplitButtonStyle, Tab}; @@ -2526,6 +2526,7 @@ impl ThreadView { .gap_0p5() .child(self.render_add_context_button(cx)) .child(self.render_follow_toggle(cx)) + .children(self.render_fast_mode_control(cx)) .children(self.render_thinking_control(cx)), ) .child( @@ -2950,6 +2951,49 @@ impl ThreadView { } } + fn fast_mode_available(&self, cx: &Context) -> bool { + if !cx.is_staff() { + return false; + } + self.as_native_thread(cx) + .and_then(|thread| thread.read(cx).model()) + .map(|model| model.supports_fast_mode()) + .unwrap_or(false) + } + + fn render_fast_mode_control(&self, cx: &mut Context) -> Option { + if !self.fast_mode_available(cx) { + return None; + } + + let thread = self.as_native_thread(cx)?.read(cx); + + let (tooltip_label, color, icon) = if matches!(thread.speed(), Some(Speed::Fast)) { + ("Disable Fast Mode", Color::Muted, IconName::FastForward) + } else { + ( + "Enable Fast Mode", + Color::Custom(cx.theme().colors().icon_disabled.opacity(0.8)), + IconName::FastForwardOff, + ) + }; + + let focus_handle = self.message_editor.focus_handle(cx); + + Some( + IconButton::new("fast-mode", icon) + .icon_size(IconSize::Small) + .icon_color(color) + .tooltip(move |_, cx| { + Tooltip::for_action_in(tooltip_label, &ToggleFastMode, &focus_handle, cx) + }) + .on_click(cx.listener(move |this, _, _window, cx| { + this.toggle_fast_mode(cx); + })) + .into_any_element(), + ) + } + fn render_thinking_control(&self, cx: &mut Context) -> Option { let thread = self.as_native_thread(cx)?.read(cx); let model = thread.model()?; @@ -7089,6 +7133,24 @@ impl ThreadView { }); } + fn toggle_fast_mode(&mut self, cx: &mut Context) { + if !self.fast_mode_available(cx) { + return; + } + let Some(thread) = self.as_native_thread(cx) else { + return; + }; + thread.update(cx, |thread, cx| { + thread.set_speed( + thread + .speed() + .map(|speed| speed.toggle()) + .unwrap_or(Speed::Fast), + cx, + ); + }); + } + fn cycle_thinking_effort(&mut self, cx: &mut Context) { let Some(thread) = self.as_native_thread(cx) else { return; @@ -7193,6 +7255,9 @@ impl Render for ThreadView { .on_action(cx.listener(Self::handle_select_permission_granularity)) .on_action(cx.listener(Self::open_permission_dropdown)) .on_action(cx.listener(Self::open_add_context_menu)) + .on_action(cx.listener(|this, _: &ToggleFastMode, _window, cx| { + this.toggle_fast_mode(cx); + })) .on_action(cx.listener(|this, _: &ToggleThinkingMode, _window, cx| { if let Some(thread) = this.as_native_thread(cx) { thread.update(cx, |thread, cx| { diff --git a/crates/agent_ui/src/terminal_inline_assistant.rs b/crates/agent_ui/src/terminal_inline_assistant.rs index 2d424c3e1a8ffd33c6933bd50991596bb07a44b2..3df3c1faaed9e02b659bc75b09257e81e96ebc03 100644 --- a/crates/agent_ui/src/terminal_inline_assistant.rs +++ b/crates/agent_ui/src/terminal_inline_assistant.rs @@ -276,6 +276,7 @@ impl TerminalInlineAssistant { temperature, thinking_allowed: false, thinking_effort: None, + speed: None, } })) } diff --git a/crates/anthropic/src/anthropic.rs b/crates/anthropic/src/anthropic.rs index 56baf4b58fe9ac568ea22012234510ff617fab25..6bff2be4c15841de597309b626e768bbf79e880a 100644 --- a/crates/anthropic/src/anthropic.rs +++ b/crates/anthropic/src/anthropic.rs @@ -977,6 +977,8 @@ pub struct Request { #[serde(default, skip_serializing_if = "Vec::is_empty")] pub stop_sequences: Vec, #[serde(default, skip_serializing_if = "Option::is_none")] + pub speed: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] pub temperature: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub top_k: Option, @@ -984,6 +986,14 @@ pub struct Request { pub top_p: Option, } +#[derive(Debug, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum Speed { + #[default] + Standard, + Fast, +} + #[derive(Debug, Serialize, Deserialize)] struct StreamingRequest { #[serde(flatten)] diff --git a/crates/assistant_text_thread/src/text_thread.rs b/crates/assistant_text_thread/src/text_thread.rs index 18b37808b936e354614f6681bbcb263b184f832c..34007868f9f128fa80f09f884ccbaf57ffd103c1 100644 --- a/crates/assistant_text_thread/src/text_thread.rs +++ b/crates/assistant_text_thread/src/text_thread.rs @@ -2275,6 +2275,7 @@ impl TextThread { temperature: model.and_then(|model| AgentSettings::temperature_for_model(model, cx)), thinking_allowed: true, thinking_effort: None, + speed: None, }; for message in self.messages(cx) { if message.status != MessageStatus::Done { diff --git a/crates/cloud_llm_client/src/cloud_llm_client.rs b/crates/cloud_llm_client/src/cloud_llm_client.rs index f69c279b24718b76fe2ae10f066d57324fe03461..62986b311e07c8b4439246e36c44318d23012e0b 100644 --- a/crates/cloud_llm_client/src/cloud_llm_client.rs +++ b/crates/cloud_llm_client/src/cloud_llm_client.rs @@ -302,6 +302,8 @@ pub struct LanguageModel { pub supports_tools: bool, pub supports_images: bool, pub supports_thinking: bool, + #[serde(default)] + pub supports_fast_mode: bool, pub supported_effort_levels: Vec, #[serde(default)] pub supports_streaming_tools: bool, diff --git a/crates/edit_prediction_cli/src/anthropic_client.rs b/crates/edit_prediction_cli/src/anthropic_client.rs index 784fa711b0058e3d2884460f6ca6f5300fc44a9a..869635c53a15e5c3f6cdaca7632a3e99f0b0bec1 100644 --- a/crates/edit_prediction_cli/src/anthropic_client.rs +++ b/crates/edit_prediction_cli/src/anthropic_client.rs @@ -50,6 +50,7 @@ impl PlainLlmClient { metadata: None, output_config: None, stop_sequences: Vec::new(), + speed: None, temperature: None, top_k: None, top_p: None, @@ -89,6 +90,7 @@ impl PlainLlmClient { metadata: None, output_config: None, stop_sequences: Vec::new(), + speed: None, temperature: None, top_k: None, top_p: None, @@ -578,6 +580,7 @@ impl BatchingLlmClient { temperature: None, top_k: None, top_p: None, + speed: None, }; let custom_id = format!("req_hash_{}", hash); diff --git a/crates/eval/src/instance.rs b/crates/eval/src/instance.rs index 59593578f1ffc512447f08fd728c6619943d6b6e..54e6ab0b925191c16885b8b8ed89369039c467f6 100644 --- a/crates/eval/src/instance.rs +++ b/crates/eval/src/instance.rs @@ -564,6 +564,7 @@ impl ExampleInstance { stop: Vec::new(), thinking_allowed: true, thinking_effort: None, + speed: None, }; let model = model.clone(); diff --git a/crates/git_ui/src/git_panel.rs b/crates/git_ui/src/git_panel.rs index 1c8c09d7fdeaa51b8780f29aa13028355864924f..b042d66ce9ac5c45af2e5701da2d83db3c3ab907 100644 --- a/crates/git_ui/src/git_panel.rs +++ b/crates/git_ui/src/git_panel.rs @@ -2786,6 +2786,7 @@ impl GitPanel { temperature, thinking_allowed: false, thinking_effort: None, + speed: None, }; let stream = model.stream_completion_text(request, cx); diff --git a/crates/icons/src/icons.rs b/crates/icons/src/icons.rs index d6356f831ea9bbbaec5313da1a5b56f101471411..5738d763fcf7ff50b67f5a77acb918250a537124 100644 --- a/crates/icons/src/icons.rs +++ b/crates/icons/src/icons.rs @@ -112,6 +112,8 @@ pub enum IconName { ExpandUp, ExpandVertical, Eye, + FastForward, + FastForwardOff, File, FileCode, FileDiff, diff --git a/crates/language_model/src/language_model.rs b/crates/language_model/src/language_model.rs index 313a7a3b4d94726a2e6619eddd0fd14e5e4c30e4..c403774499c9dcb384e93cf19367dc28e336aa60 100644 --- a/crates/language_model/src/language_model.rs +++ b/crates/language_model/src/language_model.rs @@ -613,6 +613,10 @@ pub trait LanguageModel: Send + Sync { false } + fn supports_fast_mode(&self) -> bool { + false + } + /// Returns the list of supported effort levels that can be used when thinking. fn supported_effort_levels(&self) -> Vec { Vec::new() diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs index cb2f6a27de65739bb684626ce5bd985a187bf28f..9be3002deae758ee99432842a31e3b90754ada0f 100644 --- a/crates/language_model/src/request.rs +++ b/crates/language_model/src/request.rs @@ -453,6 +453,33 @@ pub struct LanguageModelRequest { pub temperature: Option, pub thinking_allowed: bool, pub thinking_effort: Option, + pub speed: Option, +} + +#[derive(Clone, Copy, Default, Debug, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum Speed { + #[default] + Standard, + Fast, +} + +impl Speed { + pub fn toggle(self) -> Self { + match self { + Speed::Standard => Speed::Fast, + Speed::Fast => Speed::Standard, + } + } +} + +impl From for anthropic::Speed { + fn from(speed: Speed) -> Self { + match speed { + Speed::Standard => anthropic::Speed::Standard, + Speed::Fast => anthropic::Speed::Fast, + } + } } #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] diff --git a/crates/language_models/src/provider/anthropic.rs b/crates/language_models/src/provider/anthropic.rs index 5b7ad62e0e66977465502d61f3db3707274a9718..d3bd129248406211e43e69fc5880310a9dedbc97 100644 --- a/crates/language_models/src/provider/anthropic.rs +++ b/crates/language_models/src/provider/anthropic.rs @@ -725,6 +725,7 @@ pub fn into_anthropic( metadata: None, output_config: None, stop_sequences: Vec::new(), + speed: request.speed.map(From::from), temperature: request.temperature.or(Some(default_temperature)), top_k: None, top_p: None, @@ -1105,6 +1106,7 @@ mod tests { tool_choice: None, thinking_allowed: true, thinking_effort: None, + speed: None, }; let anthropic_request = into_anthropic( @@ -1167,6 +1169,7 @@ mod tests { tools: vec![], tool_choice: None, thinking_allowed: true, + speed: None, }; request.messages.push(LanguageModelRequestMessage { role: Role::Assistant, diff --git a/crates/language_models/src/provider/cloud.rs b/crates/language_models/src/provider/cloud.rs index f822b89916a60c32b5f076580f960d47c6a1463c..19009013bf84ad9751e9ed0de2d3338b279a258e 100644 --- a/crates/language_models/src/provider/cloud.rs +++ b/crates/language_models/src/provider/cloud.rs @@ -571,6 +571,10 @@ impl LanguageModel for CloudLanguageModel { self.model.supports_thinking } + fn supports_fast_mode(&self) -> bool { + self.model.supports_fast_mode + } + fn supported_effort_levels(&self) -> Vec { self.model .supported_effort_levels diff --git a/crates/language_models/src/provider/copilot_chat.rs b/crates/language_models/src/provider/copilot_chat.rs index 55ca0e526243dbbcb9504ea3948b192d79a02da1..4363430f865de63ed5fec0d6b40b085d9413fc2a 100644 --- a/crates/language_models/src/provider/copilot_chat.rs +++ b/crates/language_models/src/provider/copilot_chat.rs @@ -930,6 +930,7 @@ fn into_copilot_responses( temperature, thinking_allowed: _, thinking_effort: _, + speed: _, } = request; let mut input_items: Vec = Vec::new(); diff --git a/crates/language_models/src/provider/mistral.rs b/crates/language_models/src/provider/mistral.rs index 3cc583ddde1cb03a4fd312b36f4358c0fbf3b4c1..02d46dcaa7ce7acc76d85c93cad610a7d2489bf0 100644 --- a/crates/language_models/src/provider/mistral.rs +++ b/crates/language_models/src/provider/mistral.rs @@ -883,6 +883,7 @@ mod tests { stop: vec![], thinking_allowed: true, thinking_effort: None, + speed: Default::default(), }; let (mistral_request, affinity) = @@ -919,6 +920,7 @@ mod tests { stop: vec![], thinking_allowed: true, thinking_effort: None, + speed: None, }; let (mistral_request, _) = into_mistral(request, mistral::Model::Pixtral12BLatest, None); diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs index 40cc67098a76d0430f597feb8f1045859863486a..7fb65df0a534c7600f7315fd85d7adda0d66314a 100644 --- a/crates/language_models/src/provider/open_ai.rs +++ b/crates/language_models/src/provider/open_ai.rs @@ -555,6 +555,7 @@ pub fn into_open_ai_response( temperature, thinking_allowed: _, thinking_effort: _, + speed: _, } = request; let mut input_items = Vec::new(); @@ -1435,6 +1436,7 @@ mod tests { temperature: None, thinking_allowed: true, thinking_effort: None, + speed: None, }; // Validate that all models are supported by tiktoken-rs @@ -1573,6 +1575,7 @@ mod tests { temperature: None, thinking_allowed: false, thinking_effort: None, + speed: None, }; let response = into_open_ai_response( diff --git a/crates/rules_library/src/rules_library.rs b/crates/rules_library/src/rules_library.rs index b3aa0301f204e97e6b1acda2a5cff4479b51c590..a89657e29680ccfd759fe63efcc837d883ef7590 100644 --- a/crates/rules_library/src/rules_library.rs +++ b/crates/rules_library/src/rules_library.rs @@ -1106,6 +1106,7 @@ impl RulesLibrary { temperature: None, thinking_allowed: true, thinking_effort: None, + speed: None, }, cx, )