Add fast mode toggle in agent panel (#49714)

Tom Houlé created

This is a staff only toggle for now, since the consequences of
activating it are not obvious and quite dire (tokens costs 6 times
more).

Also, persist thinking, thinking effort and fast mode in DbThread so the
thinking mode toggle and thinking effort are persisted.

Release Notes:

- Agent: The thinking mode toggle and thinking effort are now persisted
when selecting a thread from history.

Change summary

assets/icons/fast_forward.svg                       |  4 
assets/icons/fast_forward_off.svg                   |  5 +
assets/keymaps/default-linux.json                   |  1 
assets/keymaps/default-macos.json                   |  1 
assets/keymaps/default-windows.json                 |  1 
crates/agent/src/db.rs                              | 16 +++
crates/agent/src/edit_agent.rs                      |  1 
crates/agent/src/thread.rs                          | 33 ++++--
crates/agent/src/thread_store.rs                    |  3 
crates/agent_ui/src/agent_ui.rs                     |  2 
crates/agent_ui/src/buffer_codegen.rs               |  2 
crates/agent_ui/src/connection_view.rs              |  4 
crates/agent_ui/src/connection_view/thread_view.rs  | 67 ++++++++++++++
crates/agent_ui/src/terminal_inline_assistant.rs    |  1 
crates/anthropic/src/anthropic.rs                   | 10 ++
crates/assistant_text_thread/src/text_thread.rs     |  1 
crates/cloud_llm_client/src/cloud_llm_client.rs     |  2 
crates/edit_prediction_cli/src/anthropic_client.rs  |  3 
crates/eval/src/instance.rs                         |  1 
crates/git_ui/src/git_panel.rs                      |  1 
crates/icons/src/icons.rs                           |  2 
crates/language_model/src/language_model.rs         |  4 
crates/language_model/src/request.rs                | 27 ++++++
crates/language_models/src/provider/anthropic.rs    |  3 
crates/language_models/src/provider/cloud.rs        |  4 
crates/language_models/src/provider/copilot_chat.rs |  1 
crates/language_models/src/provider/mistral.rs      |  2 
crates/language_models/src/provider/open_ai.rs      |  3 
crates/rules_library/src/rules_library.rs           |  1 
29 files changed, 190 insertions(+), 16 deletions(-)

Detailed changes

assets/icons/fast_forward.svg 🔗

@@ -0,0 +1,4 @@
+<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M8 4.4366C8.00005 4.20171 8.06974 3.97211 8.20026 3.77683C8.33078 3.58154 8.51627 3.42934 8.73328 3.33946C8.95029 3.24958 9.18908 3.22605 9.41946 3.27186C9.64983 3.31767 9.86146 3.43076 10.0276 3.59683L13.591 7.16022C13.8136 7.38297 13.9387 7.68503 13.9387 8C13.9387 8.31496 13.8136 8.61702 13.591 8.83977L10.0276 12.4032C9.86146 12.5692 9.64983 12.6823 9.41946 12.7281C9.18908 12.7739 8.95029 12.7504 8.73328 12.6605C8.51627 12.5707 8.33078 12.4185 8.20026 12.2232C8.06974 12.0279 8.00005 11.7983 8 11.5634V4.4366Z" fill="#C6CAD0" fill-opacity="0.15" stroke="#C6CAD0" stroke-width="1.06902" stroke-linecap="round" stroke-linejoin="round"/>
+<path d="M2.061 4.4366C2.06105 4.20171 2.13075 3.97211 2.26127 3.77683C2.39179 3.58154 2.57728 3.42934 2.79429 3.33946C3.0113 3.24958 3.25008 3.22605 3.48046 3.27186C3.71084 3.31767 3.92246 3.43076 4.08858 3.59683L7.65197 7.16022C7.87465 7.38297 7.99974 7.68503 7.99974 8C7.99974 8.31496 7.87465 8.61702 7.65197 8.83977L4.08858 12.4032C3.92246 12.5692 3.71084 12.6823 3.48046 12.7281C3.25008 12.7739 3.0113 12.7504 2.79429 12.6605C2.57728 12.5707 2.39179 12.4185 2.26127 12.2232C2.13075 12.0279 2.06105 11.7983 2.061 11.5634V4.4366Z" fill="#C6CAD0" fill-opacity="0.15" stroke="#C6CAD0" stroke-width="1.06902" stroke-linecap="round" stroke-linejoin="round"/>
+</svg>

assets/icons/fast_forward_off.svg 🔗

@@ -0,0 +1,5 @@
+<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M8 10.3715V11.5634C8.00005 11.7983 8.06974 12.0279 8.20026 12.2232C8.33078 12.4185 8.51627 12.5707 8.73328 12.6605C8.95029 12.7504 9.18908 12.7739 9.41946 12.7281C9.64983 12.6823 9.86146 12.5692 10.0276 12.4032L10.2748 12.1559M8 5.38478V4.4366C8.00005 4.20171 8.06974 3.97211 8.20026 3.77683C8.33078 3.58154 8.51627 3.42934 8.73328 3.33946C8.95029 3.24958 9.18908 3.22605 9.41946 3.27186C9.64983 3.31767 9.86146 3.43076 10.0276 3.59683L13.591 7.16022C13.8136 7.38297 13.9387 7.68503 13.9387 8C13.9387 8.31496 13.8136 8.61702 13.591 8.83977L12.7618 9.66893" stroke="#C6CAD0" stroke-width="1.06902" stroke-linecap="round" stroke-linejoin="round"/>
+<path d="M2.061 6.99803V11.5634C2.06105 11.7983 2.13075 12.0279 2.26127 12.2232C2.39179 12.4185 2.57728 12.5707 2.79429 12.6605C3.0113 12.7504 3.25008 12.7739 3.48046 12.7281C3.71084 12.6823 3.92246 12.5692 4.08858 12.4032L6.46394 10.0278" stroke="#C6CAD0" stroke-width="1.06902" stroke-linecap="round" stroke-linejoin="round"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M14.1225 13.809C14.0341 13.9146 13.877 13.9289 13.7711 13.8409L1.19311 3.4002C1.08659 3.31177 1.07221 3.15361 1.16104 3.04742L1.87752 2.191C1.96588 2.08539 2.123 2.07111 2.22895 2.15905L14.8069 12.5998C14.9134 12.6882 14.9278 12.8464 14.839 12.9526L14.1225 13.809Z" fill="#C6CAD0"/>
+</svg>

assets/keymaps/default-linux.json 🔗

@@ -333,6 +333,7 @@
       "ctrl-alt-k": "agent::ToggleThinkingMode",
       "ctrl-alt-'": "agent::ToggleThinkingEffortMenu",
       "ctrl-'": "agent::CycleThinkingEffort",
+      "ctrl-alt-.": "agent::ToggleFastMode",
     },
   },
   {

assets/keymaps/default-macos.json 🔗

@@ -377,6 +377,7 @@
       "cmd-alt-k": "agent::ToggleThinkingMode",
       "cmd-alt-'": "agent::ToggleThinkingEffortMenu",
       "ctrl-'": "agent::CycleThinkingEffort",
+      "cmd-alt-.": "agent::ToggleFastMode",
     },
   },
   {

assets/keymaps/default-windows.json 🔗

@@ -335,6 +335,7 @@
       "ctrl-alt-k": "agent::ToggleThinkingMode",
       "ctrl-alt-'": "agent::ToggleThinkingEffortMenu",
       "ctrl-'": "agent::CycleThinkingEffort",
+      "ctrl-alt-.": "agent::ToggleFastMode",
     },
   },
   {

crates/agent/src/db.rs 🔗

@@ -8,6 +8,7 @@ use collections::{HashMap, IndexMap};
 use futures::{FutureExt, future::Shared};
 use gpui::{BackgroundExecutor, Global, Task};
 use indoc::indoc;
+use language_model::Speed;
 use parking_lot::Mutex;
 use serde::{Deserialize, Serialize};
 use sqlez::{
@@ -53,6 +54,12 @@ pub struct DbThread {
     pub imported: bool,
     #[serde(default)]
     pub subagent_context: Option<crate::SubagentContext>,
+    #[serde(default)]
+    pub speed: Option<Speed>,
+    #[serde(default)]
+    pub thinking_enabled: bool,
+    #[serde(default)]
+    pub thinking_effort: Option<String>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -91,6 +98,9 @@ impl SharedThread {
             profile: None,
             imported: true,
             subagent_context: None,
+            speed: None,
+            thinking_enabled: false,
+            thinking_effort: None,
         }
     }
 
@@ -265,6 +275,9 @@ impl DbThread {
             profile: thread.profile,
             imported: false,
             subagent_context: None,
+            speed: None,
+            thinking_enabled: false,
+            thinking_effort: None,
         })
     }
 }
@@ -570,6 +583,9 @@ mod tests {
             profile: None,
             imported: false,
             subagent_context: None,
+            speed: None,
+            thinking_enabled: false,
+            thinking_effort: None,
         }
     }
 

crates/agent/src/edit_agent.rs 🔗

@@ -749,6 +749,7 @@ impl EditAgent {
             temperature: None,
             thinking_allowed: self.thinking_allowed,
             thinking_effort: None,
+            speed: None,
         };
 
         Ok(self.model.stream_completion_text(request, cx).await?.stream)

crates/agent/src/thread.rs 🔗

@@ -40,7 +40,8 @@ use language_model::{
     LanguageModelImage, LanguageModelProviderId, LanguageModelRegistry, LanguageModelRequest,
     LanguageModelRequestMessage, LanguageModelRequestTool, LanguageModelToolResult,
     LanguageModelToolResultContent, LanguageModelToolSchemaFormat, LanguageModelToolUse,
-    LanguageModelToolUseId, Role, SelectedModel, StopReason, TokenUsage, ZED_CLOUD_PROVIDER_ID,
+    LanguageModelToolUseId, Role, SelectedModel, Speed, StopReason, TokenUsage,
+    ZED_CLOUD_PROVIDER_ID,
 };
 use project::Project;
 use prompt_store::ProjectContext;
@@ -884,6 +885,7 @@ pub struct Thread {
     summarization_model: Option<Arc<dyn LanguageModel>>,
     thinking_enabled: bool,
     thinking_effort: Option<String>,
+    speed: Option<Speed>,
     prompt_capabilities_tx: watch::Sender<acp::PromptCapabilities>,
     pub(crate) prompt_capabilities_rx: watch::Receiver<acp::PromptCapabilities>,
     pub(crate) project: Entity<Project>,
@@ -977,6 +979,7 @@ impl Thread {
             model,
             summarization_model: None,
             thinking_enabled: enable_thinking,
+            speed: None,
             thinking_effort,
             prompt_capabilities_tx,
             prompt_capabilities_rx,
@@ -1134,10 +1137,6 @@ impl Thread {
         let profile_id = db_thread
             .profile
             .unwrap_or_else(|| settings.default_profile.clone());
-        let thinking_effort = settings
-            .default_model
-            .as_ref()
-            .and_then(|model| model.effort.clone());
 
         let mut model = LanguageModelRegistry::global(cx).update(cx, |registry, cx| {
             db_thread
@@ -1166,12 +1165,6 @@ impl Thread {
             watch::channel(Self::prompt_capabilities(model.as_deref()));
 
         let action_log = cx.new(|_| ActionLog::new(project.clone()));
-        // TODO: We should serialize the user's configured thinking parameter on `DbThread`
-        // rather than deriving it from the model's capability. A user may have explicitly
-        // toggled thinking off for a model that supports it, and we'd lose that preference here.
-        let enable_thinking = model
-            .as_deref()
-            .is_some_and(|model| model.supports_thinking());
 
         Self {
             id,
@@ -1199,8 +1192,9 @@ impl Thread {
             templates,
             model,
             summarization_model: None,
-            thinking_enabled: enable_thinking,
-            thinking_effort,
+            thinking_enabled: db_thread.thinking_enabled,
+            thinking_effort: db_thread.thinking_effort,
+            speed: db_thread.speed,
             project,
             action_log,
             updated_at: db_thread.updated_at,
@@ -1230,6 +1224,9 @@ impl Thread {
             profile: Some(self.profile_id.clone()),
             imported: self.imported,
             subagent_context: self.subagent_context.clone(),
+            speed: self.speed,
+            thinking_enabled: self.thinking_enabled,
+            thinking_effort: self.thinking_effort.clone(),
         };
 
         cx.background_spawn(async move {
@@ -1318,6 +1315,15 @@ impl Thread {
         cx.notify();
     }
 
+    pub fn speed(&self) -> Option<Speed> {
+        self.speed
+    }
+
+    pub fn set_speed(&mut self, speed: Speed, cx: &mut Context<Self>) {
+        self.speed = Some(speed);
+        cx.notify();
+    }
+
     pub fn last_message(&self) -> Option<Message> {
         if let Some(message) = self.pending_message.clone() {
             Some(Message::Agent(message))
@@ -2485,6 +2491,7 @@ impl Thread {
             temperature: AgentSettings::temperature_for_model(model, cx),
             thinking_allowed: self.thinking_enabled,
             thinking_effort: self.thinking_effort.clone(),
+            speed: self.speed(),
         };
 
         log::debug!("Completion request built successfully");

crates/agent/src/thread_store.rs 🔗

@@ -133,6 +133,9 @@ mod tests {
             profile: None,
             imported: false,
             subagent_context: None,
+            speed: None,
+            thinking_enabled: false,
+            thinking_effort: None,
         }
     }
 

crates/agent_ui/src/agent_ui.rs 🔗

@@ -160,6 +160,8 @@ actions!(
         CycleThinkingEffort,
         /// Toggles the thinking effort selector menu open or closed.
         ToggleThinkingEffortMenu,
+        /// Toggles fast mode for models that support it.
+        ToggleFastMode,
     ]
 );
 

crates/agent_ui/src/buffer_codegen.rs 🔗

@@ -547,6 +547,7 @@ impl CodegenAlternative {
                 messages,
                 thinking_allowed: false,
                 thinking_effort: None,
+                speed: None,
             }
         }))
     }
@@ -626,6 +627,7 @@ impl CodegenAlternative {
                 messages: vec![request_message],
                 thinking_allowed: false,
                 thinking_effort: None,
+                speed: None,
             }
         }))
     }

crates/agent_ui/src/connection_view.rs 🔗

@@ -75,8 +75,8 @@ use crate::{
     ClearMessageQueue, CycleFavoriteModels, CycleModeSelector, CycleThinkingEffort,
     EditFirstQueuedMessage, ExpandMessageEditor, Follow, KeepAll, NewThread, OpenAddContextMenu,
     OpenAgentDiff, OpenHistory, RejectAll, RejectOnce, RemoveFirstQueuedMessage,
-    SelectPermissionGranularity, SendImmediately, SendNextQueuedMessage, ToggleProfileSelector,
-    ToggleThinkingEffortMenu, ToggleThinkingMode, UndoLastReject,
+    SelectPermissionGranularity, SendImmediately, SendNextQueuedMessage, ToggleFastMode,
+    ToggleProfileSelector, ToggleThinkingEffortMenu, ToggleThinkingMode, UndoLastReject,
 };
 
 const STOPWATCH_THRESHOLD: Duration = Duration::from_secs(30);

crates/agent_ui/src/connection_view/thread_view.rs 🔗

@@ -1,6 +1,6 @@
 use cloud_api_types::{SubmitAgentThreadFeedbackBody, SubmitAgentThreadFeedbackCommentsBody};
 use gpui::{Corner, List};
-use language_model::LanguageModelEffortLevel;
+use language_model::{LanguageModelEffortLevel, Speed};
 use settings::update_settings_file;
 use ui::{ButtonLike, SplitButton, SplitButtonStyle, Tab};
 
@@ -2526,6 +2526,7 @@ impl ThreadView {
                             .gap_0p5()
                             .child(self.render_add_context_button(cx))
                             .child(self.render_follow_toggle(cx))
+                            .children(self.render_fast_mode_control(cx))
                             .children(self.render_thinking_control(cx)),
                     )
                     .child(
@@ -2950,6 +2951,49 @@ impl ThreadView {
         }
     }
 
+    fn fast_mode_available(&self, cx: &Context<Self>) -> bool {
+        if !cx.is_staff() {
+            return false;
+        }
+        self.as_native_thread(cx)
+            .and_then(|thread| thread.read(cx).model())
+            .map(|model| model.supports_fast_mode())
+            .unwrap_or(false)
+    }
+
+    fn render_fast_mode_control(&self, cx: &mut Context<Self>) -> Option<AnyElement> {
+        if !self.fast_mode_available(cx) {
+            return None;
+        }
+
+        let thread = self.as_native_thread(cx)?.read(cx);
+
+        let (tooltip_label, color, icon) = if matches!(thread.speed(), Some(Speed::Fast)) {
+            ("Disable Fast Mode", Color::Muted, IconName::FastForward)
+        } else {
+            (
+                "Enable Fast Mode",
+                Color::Custom(cx.theme().colors().icon_disabled.opacity(0.8)),
+                IconName::FastForwardOff,
+            )
+        };
+
+        let focus_handle = self.message_editor.focus_handle(cx);
+
+        Some(
+            IconButton::new("fast-mode", icon)
+                .icon_size(IconSize::Small)
+                .icon_color(color)
+                .tooltip(move |_, cx| {
+                    Tooltip::for_action_in(tooltip_label, &ToggleFastMode, &focus_handle, cx)
+                })
+                .on_click(cx.listener(move |this, _, _window, cx| {
+                    this.toggle_fast_mode(cx);
+                }))
+                .into_any_element(),
+        )
+    }
+
     fn render_thinking_control(&self, cx: &mut Context<Self>) -> Option<AnyElement> {
         let thread = self.as_native_thread(cx)?.read(cx);
         let model = thread.model()?;
@@ -7089,6 +7133,24 @@ impl ThreadView {
         });
     }
 
+    fn toggle_fast_mode(&mut self, cx: &mut Context<Self>) {
+        if !self.fast_mode_available(cx) {
+            return;
+        }
+        let Some(thread) = self.as_native_thread(cx) else {
+            return;
+        };
+        thread.update(cx, |thread, cx| {
+            thread.set_speed(
+                thread
+                    .speed()
+                    .map(|speed| speed.toggle())
+                    .unwrap_or(Speed::Fast),
+                cx,
+            );
+        });
+    }
+
     fn cycle_thinking_effort(&mut self, cx: &mut Context<Self>) {
         let Some(thread) = self.as_native_thread(cx) else {
             return;
@@ -7193,6 +7255,9 @@ impl Render for ThreadView {
             .on_action(cx.listener(Self::handle_select_permission_granularity))
             .on_action(cx.listener(Self::open_permission_dropdown))
             .on_action(cx.listener(Self::open_add_context_menu))
+            .on_action(cx.listener(|this, _: &ToggleFastMode, _window, cx| {
+                this.toggle_fast_mode(cx);
+            }))
             .on_action(cx.listener(|this, _: &ToggleThinkingMode, _window, cx| {
                 if let Some(thread) = this.as_native_thread(cx) {
                     thread.update(cx, |thread, cx| {

crates/anthropic/src/anthropic.rs 🔗

@@ -977,6 +977,8 @@ pub struct Request {
     #[serde(default, skip_serializing_if = "Vec::is_empty")]
     pub stop_sequences: Vec<String>,
     #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub speed: Option<Speed>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
     pub temperature: Option<f32>,
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub top_k: Option<u32>,
@@ -984,6 +986,14 @@ pub struct Request {
     pub top_p: Option<f32>,
 }
 
+#[derive(Debug, Default, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum Speed {
+    #[default]
+    Standard,
+    Fast,
+}
+
 #[derive(Debug, Serialize, Deserialize)]
 struct StreamingRequest {
     #[serde(flatten)]

crates/assistant_text_thread/src/text_thread.rs 🔗

@@ -2275,6 +2275,7 @@ impl TextThread {
             temperature: model.and_then(|model| AgentSettings::temperature_for_model(model, cx)),
             thinking_allowed: true,
             thinking_effort: None,
+            speed: None,
         };
         for message in self.messages(cx) {
             if message.status != MessageStatus::Done {

crates/cloud_llm_client/src/cloud_llm_client.rs 🔗

@@ -302,6 +302,8 @@ pub struct LanguageModel {
     pub supports_tools: bool,
     pub supports_images: bool,
     pub supports_thinking: bool,
+    #[serde(default)]
+    pub supports_fast_mode: bool,
     pub supported_effort_levels: Vec<SupportedEffortLevel>,
     #[serde(default)]
     pub supports_streaming_tools: bool,

crates/edit_prediction_cli/src/anthropic_client.rs 🔗

@@ -50,6 +50,7 @@ impl PlainLlmClient {
             metadata: None,
             output_config: None,
             stop_sequences: Vec::new(),
+            speed: None,
             temperature: None,
             top_k: None,
             top_p: None,
@@ -89,6 +90,7 @@ impl PlainLlmClient {
             metadata: None,
             output_config: None,
             stop_sequences: Vec::new(),
+            speed: None,
             temperature: None,
             top_k: None,
             top_p: None,
@@ -578,6 +580,7 @@ impl BatchingLlmClient {
                     temperature: None,
                     top_k: None,
                     top_p: None,
+                    speed: None,
                 };
 
                 let custom_id = format!("req_hash_{}", hash);

crates/eval/src/instance.rs 🔗

@@ -564,6 +564,7 @@ impl ExampleInstance {
                 stop: Vec::new(),
                 thinking_allowed: true,
                 thinking_effort: None,
+                speed: None,
             };
 
             let model = model.clone();

crates/git_ui/src/git_panel.rs 🔗

@@ -2786,6 +2786,7 @@ impl GitPanel {
                     temperature,
                     thinking_allowed: false,
                     thinking_effort: None,
+                    speed: None,
                 };
 
                 let stream = model.stream_completion_text(request, cx);

crates/icons/src/icons.rs 🔗

@@ -112,6 +112,8 @@ pub enum IconName {
     ExpandUp,
     ExpandVertical,
     Eye,
+    FastForward,
+    FastForwardOff,
     File,
     FileCode,
     FileDiff,

crates/language_model/src/language_model.rs 🔗

@@ -613,6 +613,10 @@ pub trait LanguageModel: Send + Sync {
         false
     }
 
+    fn supports_fast_mode(&self) -> bool {
+        false
+    }
+
     /// Returns the list of supported effort levels that can be used when thinking.
     fn supported_effort_levels(&self) -> Vec<LanguageModelEffortLevel> {
         Vec::new()

crates/language_model/src/request.rs 🔗

@@ -453,6 +453,33 @@ pub struct LanguageModelRequest {
     pub temperature: Option<f32>,
     pub thinking_allowed: bool,
     pub thinking_effort: Option<String>,
+    pub speed: Option<Speed>,
+}
+
+#[derive(Clone, Copy, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum Speed {
+    #[default]
+    Standard,
+    Fast,
+}
+
+impl Speed {
+    pub fn toggle(self) -> Self {
+        match self {
+            Speed::Standard => Speed::Fast,
+            Speed::Fast => Speed::Standard,
+        }
+    }
+}
+
+impl From<Speed> for anthropic::Speed {
+    fn from(speed: Speed) -> Self {
+        match speed {
+            Speed::Standard => anthropic::Speed::Standard,
+            Speed::Fast => anthropic::Speed::Fast,
+        }
+    }
 }
 
 #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]

crates/language_models/src/provider/anthropic.rs 🔗

@@ -725,6 +725,7 @@ pub fn into_anthropic(
         metadata: None,
         output_config: None,
         stop_sequences: Vec::new(),
+        speed: request.speed.map(From::from),
         temperature: request.temperature.or(Some(default_temperature)),
         top_k: None,
         top_p: None,
@@ -1105,6 +1106,7 @@ mod tests {
             tool_choice: None,
             thinking_allowed: true,
             thinking_effort: None,
+            speed: None,
         };
 
         let anthropic_request = into_anthropic(
@@ -1167,6 +1169,7 @@ mod tests {
             tools: vec![],
             tool_choice: None,
             thinking_allowed: true,
+            speed: None,
         };
         request.messages.push(LanguageModelRequestMessage {
             role: Role::Assistant,

crates/language_models/src/provider/cloud.rs 🔗

@@ -571,6 +571,10 @@ impl LanguageModel for CloudLanguageModel {
         self.model.supports_thinking
     }
 
+    fn supports_fast_mode(&self) -> bool {
+        self.model.supports_fast_mode
+    }
+
     fn supported_effort_levels(&self) -> Vec<LanguageModelEffortLevel> {
         self.model
             .supported_effort_levels

crates/language_models/src/provider/mistral.rs 🔗

@@ -883,6 +883,7 @@ mod tests {
             stop: vec![],
             thinking_allowed: true,
             thinking_effort: None,
+            speed: Default::default(),
         };
 
         let (mistral_request, affinity) =
@@ -919,6 +920,7 @@ mod tests {
             stop: vec![],
             thinking_allowed: true,
             thinking_effort: None,
+            speed: None,
         };
 
         let (mistral_request, _) = into_mistral(request, mistral::Model::Pixtral12BLatest, None);

crates/language_models/src/provider/open_ai.rs 🔗

@@ -555,6 +555,7 @@ pub fn into_open_ai_response(
         temperature,
         thinking_allowed: _,
         thinking_effort: _,
+        speed: _,
     } = request;
 
     let mut input_items = Vec::new();
@@ -1435,6 +1436,7 @@ mod tests {
             temperature: None,
             thinking_allowed: true,
             thinking_effort: None,
+            speed: None,
         };
 
         // Validate that all models are supported by tiktoken-rs
@@ -1573,6 +1575,7 @@ mod tests {
             temperature: None,
             thinking_allowed: false,
             thinking_effort: None,
+            speed: None,
         };
 
         let response = into_open_ai_response(