Detailed changes
@@ -1284,6 +1284,7 @@ impl Thread {
tool_choice: None,
stop: Vec::new(),
temperature: AgentSettings::temperature_for_model(&model, cx),
+ thinking_allowed: true,
};
let available_tools = self.available_tools(cx, model.clone());
@@ -1449,6 +1450,7 @@ impl Thread {
tool_choice: None,
stop: Vec::new(),
temperature: AgentSettings::temperature_for_model(model, cx),
+ thinking_allowed: false,
};
for message in &self.messages {
@@ -1461,6 +1461,7 @@ impl ActiveThread {
&configured_model.model,
cx,
),
+ thinking_allowed: true,
};
Some(configured_model.model.count_tokens(request, cx))
@@ -475,6 +475,7 @@ impl CodegenAlternative {
stop: Vec::new(),
temperature,
messages: vec![request_message],
+ thinking_allowed: false,
}
}))
}
@@ -1454,6 +1454,7 @@ impl MessageEditor {
tool_choice: None,
stop: vec![],
temperature: AgentSettings::temperature_for_model(&model.model, cx),
+ thinking_allowed: true,
};
Some(model.model.count_tokens(request, cx))
@@ -297,6 +297,7 @@ impl TerminalInlineAssistant {
tool_choice: None,
stop: Vec::new(),
temperature,
+ thinking_allowed: false,
}
}))
}
@@ -2293,6 +2293,7 @@ impl AssistantContext {
tool_choice: None,
stop: Vec::new(),
temperature: model.and_then(|model| AgentSettings::temperature_for_model(model, cx)),
+ thinking_allowed: true,
};
for message in self.messages(cx) {
if message.status != MessageStatus::Done {
@@ -719,6 +719,7 @@ impl EditAgent {
tools,
stop: Vec::new(),
temperature: None,
+ thinking_allowed: false,
};
Ok(self.model.stream_completion_text(request, cx).await?.stream)
@@ -1263,6 +1263,7 @@ impl EvalAssertion {
content: vec![prompt.into()],
cache: false,
}],
+ thinking_allowed: true,
..Default::default()
};
let mut response = retry_on_rate_limit(async || {
@@ -1599,6 +1600,7 @@ impl EditAgentTest {
let conversation = LanguageModelRequest {
messages,
tools,
+ thinking_allowed: true,
..Default::default()
};
@@ -594,6 +594,7 @@ impl ExampleInstance {
tools: Vec::new(),
tool_choice: None,
stop: Vec::new(),
+ thinking_allowed: true,
};
let model = model.clone();
@@ -1830,6 +1830,7 @@ impl GitPanel {
tool_choice: None,
stop: Vec::new(),
temperature,
+ thinking_allowed: false,
};
let stream = model.stream_completion_text(request, &cx);
@@ -391,6 +391,7 @@ pub struct LanguageModelRequest {
pub tool_choice: Option<LanguageModelToolChoice>,
pub stop: Vec<String>,
pub temperature: Option<f32>,
+ pub thinking_allowed: bool,
}
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
@@ -663,7 +663,9 @@ pub fn into_anthropic(
} else {
Some(anthropic::StringOrContents::String(system_message))
},
- thinking: if let AnthropicModelMode::Thinking { budget_tokens } = mode {
+ thinking: if request.thinking_allowed
+ && let AnthropicModelMode::Thinking { budget_tokens } = mode
+ {
Some(anthropic::Thinking::Enabled { budget_tokens })
} else {
None
@@ -1108,6 +1110,7 @@ mod tests {
temperature: None,
tools: vec![],
tool_choice: None,
+ thinking_allowed: true,
};
let anthropic_request = into_anthropic(
@@ -799,7 +799,9 @@ pub fn into_bedrock(
max_tokens: max_output_tokens,
system: Some(system_message),
tools: Some(tool_config),
- thinking: if let BedrockModelMode::Thinking { budget_tokens } = mode {
+ thinking: if request.thinking_allowed
+ && let BedrockModelMode::Thinking { budget_tokens } = mode
+ {
Some(bedrock::Thinking::Enabled { budget_tokens })
} else {
None
@@ -849,6 +849,7 @@ impl LanguageModel for CloudLanguageModel {
let use_cloud = cx
.update(|cx| cx.has_flag::<ZedCloudFeatureFlag>())
.unwrap_or(false);
+ let thinking_allowed = request.thinking_allowed;
match self.model.provider {
zed_llm_client::LanguageModelProvider::Anthropic => {
let request = into_anthropic(
@@ -856,7 +857,7 @@ impl LanguageModel for CloudLanguageModel {
self.model.id.to_string(),
1.0,
self.model.max_output_tokens as u64,
- if self.model.id.0.ends_with("-thinking") {
+ if thinking_allowed && self.model.id.0.ends_with("-thinking") {
AnthropicModelMode::Thinking {
budget_tokens: Some(4_096),
}
@@ -559,11 +559,11 @@ pub fn into_google(
stop_sequences: Some(request.stop),
max_output_tokens: None,
temperature: request.temperature.map(|t| t as f64).or(Some(1.0)),
- thinking_config: match mode {
- GoogleModelMode::Thinking { budget_tokens } => {
+ thinking_config: match (request.thinking_allowed, mode) {
+ (true, GoogleModelMode::Thinking { budget_tokens }) => {
budget_tokens.map(|thinking_budget| ThinkingConfig { thinking_budget })
}
- GoogleModelMode::Default => None,
+ _ => None,
},
top_p: None,
top_k: None,
@@ -911,6 +911,7 @@ mod tests {
intent: None,
mode: None,
stop: vec![],
+ thinking_allowed: true,
};
let mistral_request = into_mistral(request, "mistral-small-latest".into(), None);
@@ -943,6 +944,7 @@ mod tests {
intent: None,
mode: None,
stop: vec![],
+ thinking_allowed: true,
};
let mistral_request = into_mistral(request, "pixtral-12b-latest".into(), None);
@@ -334,7 +334,10 @@ impl OllamaLanguageModel {
temperature: request.temperature.or(Some(1.0)),
..Default::default()
}),
- think: self.model.supports_thinking,
+ think: self
+ .model
+ .supports_thinking
+ .map(|supports_thinking| supports_thinking && request.thinking_allowed),
tools: request.tools.into_iter().map(tool_into_ollama).collect(),
}
}
@@ -999,6 +999,7 @@ mod tests {
tool_choice: None,
stop: vec![],
temperature: None,
+ thinking_allowed: true,
};
// Validate that all models are supported by tiktoken-rs
@@ -523,7 +523,9 @@ pub fn into_open_router(
None
},
usage: open_router::RequestUsage { include: true },
- reasoning: if let OpenRouterModelMode::Thinking { budget_tokens } = model.mode {
+ reasoning: if request.thinking_allowed
+ && let OpenRouterModelMode::Thinking { budget_tokens } = model.mode
+ {
Some(open_router::Reasoning {
effort: None,
max_tokens: budget_tokens,
@@ -981,6 +981,7 @@ impl RulesLibrary {
tool_choice: None,
stop: Vec::new(),
temperature: None,
+ thinking_allowed: true,
},
cx,
)
@@ -570,6 +570,7 @@ impl SummaryIndex {
tool_choice: None,
stop: Vec::new(),
temperature: None,
+ thinking_allowed: true,
};
let code_len = code.len();