Detailed changes
@@ -942,6 +942,7 @@ impl LanguageModel for CloudLanguageModel {
model.id(),
model.supports_parallel_tool_calls(),
None,
+ None,
);
let llm_api_token = self.llm_api_token.clone();
let future = self.request_limiter.stream(async move {
@@ -14,7 +14,7 @@ use language_model::{
RateLimiter, Role, StopReason, TokenUsage,
};
use menu;
-use open_ai::{ImageUrl, Model, ResponseStreamEvent, stream_completion};
+use open_ai::{ImageUrl, Model, ReasoningEffort, ResponseStreamEvent, stream_completion};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsStore};
@@ -45,6 +45,7 @@ pub struct AvailableModel {
pub max_tokens: u64,
pub max_output_tokens: Option<u64>,
pub max_completion_tokens: Option<u64>,
+ pub reasoning_effort: Option<ReasoningEffort>,
}
pub struct OpenAiLanguageModelProvider {
@@ -213,6 +214,7 @@ impl LanguageModelProvider for OpenAiLanguageModelProvider {
max_tokens: model.max_tokens,
max_output_tokens: model.max_output_tokens,
max_completion_tokens: model.max_completion_tokens,
+ reasoning_effort: model.reasoning_effort.clone(),
},
);
}
@@ -369,6 +371,7 @@ impl LanguageModel for OpenAiLanguageModel {
self.model.id(),
self.model.supports_parallel_tool_calls(),
self.max_output_tokens(),
+ self.model.reasoning_effort(),
);
let completions = self.stream_completion(request, cx);
async move {
@@ -384,6 +387,7 @@ pub fn into_open_ai(
model_id: &str,
supports_parallel_tool_calls: bool,
max_output_tokens: Option<u64>,
+ reasoning_effort: Option<ReasoningEffort>,
) -> open_ai::Request {
let stream = !model_id.starts_with("o1-");
@@ -490,6 +494,7 @@ pub fn into_open_ai(
LanguageModelToolChoice::Any => open_ai::ToolChoice::Required,
LanguageModelToolChoice::None => open_ai::ToolChoice::None,
}),
+ reasoning_effort,
}
}
@@ -355,7 +355,13 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
LanguageModelCompletionError,
>,
> {
- let request = into_open_ai(request, &self.model.name, true, self.max_output_tokens());
+ let request = into_open_ai(
+ request,
+ &self.model.name,
+ true,
+ self.max_output_tokens(),
+ None,
+ );
let completions = self.stream_completion(request, cx);
async move {
let mapper = OpenAiEventMapper::new();
@@ -356,6 +356,7 @@ impl LanguageModel for VercelLanguageModel {
self.model.id(),
self.model.supports_parallel_tool_calls(),
self.max_output_tokens(),
+ None,
);
let completions = self.stream_completion(request, cx);
async move {
@@ -360,6 +360,7 @@ impl LanguageModel for XAiLanguageModel {
self.model.id(),
self.model.supports_parallel_tool_calls(),
self.max_output_tokens(),
+ None,
);
let completions = self.stream_completion(request, cx);
async move {
@@ -89,11 +89,13 @@ pub enum Model {
max_tokens: u64,
max_output_tokens: Option<u64>,
max_completion_tokens: Option<u64>,
+ reasoning_effort: Option<ReasoningEffort>,
},
}
impl Model {
pub fn default_fast() -> Self {
+ // TODO: Replace with FiveMini since all other models are deprecated
Self::FourPointOneMini
}
@@ -206,6 +208,15 @@ impl Model {
}
}
+ pub fn reasoning_effort(&self) -> Option<ReasoningEffort> {
+ match self {
+ Self::Custom {
+ reasoning_effort, ..
+ } => reasoning_effort.to_owned(),
+ _ => None,
+ }
+ }
+
/// Returns whether the given model supports the `parallel_tool_calls` parameter.
///
/// If the model does not support the parameter, do not pass it up, or the API will return an error.
@@ -246,6 +257,7 @@ pub struct Request {
pub tools: Vec<ToolDefinition>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub prompt_cache_key: Option<String>,
+ pub reasoning_effort: Option<ReasoningEffort>,
}
#[derive(Debug, Serialize, Deserialize)]
@@ -257,6 +269,16 @@ pub enum ToolChoice {
Other(ToolDefinition),
}
+#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
+#[serde(rename_all = "lowercase")]
+pub enum ReasoningEffort {
+ Minimal,
+ Low,
+ Medium,
+ High,
+}
+
#[derive(Clone, Deserialize, Serialize, Debug)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ToolDefinition {