language_models: Add thinking support for ollama (#31665)

Umesh Yadav and Bennet Bo Fenner created 9 months ago

This PR updates how we handle Ollama responses, leveraging the new
[v0.9.0](https://github.com/ollama/ollama/releases/tag/v0.9.0) release.
Previously, thinking text was embedded within the model's main content,
leading to it appearing directly in the agent's response. Now, thinking
content is provided as a separate parameter, allowing us to display it
correctly within the agent panel, similar to other providers. I have
tested this with qwen3:8b and works nicely. ~~We can release this once
the ollama is release is stable.~~ It's released now as stable.

<img width="433" alt="image"
src="https://github.com/user-attachments/assets/2983ef06-6679-4033-82c2-231ea9cd6434"
/>


Release Notes:

- Add thinking support for ollama

---------

Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de>

Change summary

crates/agent_settings/src/agent_settings.rs   |  1 
crates/language_models/src/provider/ollama.rs | 34 ++++++++++++++++----
crates/ollama/src/ollama.rs                   | 11 ++++++
3 files changed, 39 insertions(+), 7 deletions(-)

Detailed changes

crates/agent_settings/src/agent_settings.rs 🔗

@@ -372,6 +372,7 @@ impl AgentSettingsContent {
                                 None,
                                 None,
                                 Some(language_model.supports_tools()),
+                                None,
                             )),
                             api_url,
                         });

crates/language_models/src/provider/ollama.rs 🔗

@@ -6,7 +6,7 @@ use http_client::HttpClient;
 use language_model::{
     AuthenticateError, LanguageModelCompletionError, LanguageModelCompletionEvent,
     LanguageModelRequestTool, LanguageModelToolChoice, LanguageModelToolUse,
-    LanguageModelToolUseId, StopReason,
+    LanguageModelToolUseId, MessageContent, StopReason,
 };
 use language_model::{
     LanguageModel, LanguageModelId, LanguageModelName, LanguageModelProvider,
@@ -54,6 +54,8 @@ pub struct AvailableModel {
     pub keep_alive: Option<KeepAlive>,
     /// Whether the model supports tools
     pub supports_tools: Option<bool>,
+    /// Whether to enable think mode
+    pub supports_thinking: Option<bool>,
 }
 
 pub struct OllamaLanguageModelProvider {
@@ -99,6 +101,7 @@ impl State {
                             None,
                             None,
                             Some(capabilities.supports_tools()),
+                            Some(capabilities.supports_thinking()),
                         );
                         Ok(ollama_model)
                     }
@@ -219,6 +222,7 @@ impl LanguageModelProvider for OllamaLanguageModelProvider {
                     max_tokens: model.max_tokens,
                     keep_alive: model.keep_alive.clone(),
                     supports_tools: model.supports_tools,
+                    supports_thinking: model.supports_thinking,
                 },
             );
         }
@@ -282,10 +286,18 @@ impl OllamaLanguageModel {
                     Role::User => ChatMessage::User {
                         content: msg.string_contents(),
                     },
-                    Role::Assistant => ChatMessage::Assistant {
-                        content: msg.string_contents(),
-                        tool_calls: None,
-                    },
+                    Role::Assistant => {
+                        let content = msg.string_contents();
+                        let thinking = msg.content.into_iter().find_map(|content| match content {
+                            MessageContent::Thinking { text, .. } if !text.is_empty() => Some(text),
+                            _ => None,
+                        });
+                        ChatMessage::Assistant {
+                            content,
+                            tool_calls: None,
+                            thinking,
+                        }
+                    }
                     Role::System => ChatMessage::System {
                         content: msg.string_contents(),
                     },
@@ -299,6 +311,7 @@ impl OllamaLanguageModel {
                 temperature: request.temperature.or(Some(1.0)),
                 ..Default::default()
             }),
+            think: self.model.supports_thinking,
             tools: request.tools.into_iter().map(tool_into_ollama).collect(),
         }
     }
@@ -433,8 +446,15 @@ fn map_to_language_model_completion_events(
                 ChatMessage::Assistant {
                     content,
                     tool_calls,
+                    thinking,
                 } => {
-                    // Check for tool calls
+                    if let Some(text) = thinking {
+                        events.push(Ok(LanguageModelCompletionEvent::Thinking {
+                            text,
+                            signature: None,
+                        }));
+                    }
+
                     if let Some(tool_call) = tool_calls.and_then(|v| v.into_iter().next()) {
                         match tool_call {
                             OllamaToolCall::Function(function) => {
@@ -455,7 +475,7 @@ fn map_to_language_model_completion_events(
                                 state.used_tools = true;
                             }
                         }
-                    } else {
+                    } else if !content.is_empty() {
                         events.push(Ok(LanguageModelCompletionEvent::Text(content)));
                     }
                 }

crates/ollama/src/ollama.rs 🔗

@@ -38,6 +38,7 @@ pub struct Model {
     pub max_tokens: usize,
     pub keep_alive: Option<KeepAlive>,
     pub supports_tools: Option<bool>,
+    pub supports_thinking: Option<bool>,
 }
 
 fn get_max_tokens(name: &str) -> usize {
@@ -67,6 +68,7 @@ impl Model {
         display_name: Option<&str>,
         max_tokens: Option<usize>,
         supports_tools: Option<bool>,
+        supports_thinking: Option<bool>,
     ) -> Self {
         Self {
             name: name.to_owned(),
@@ -76,6 +78,7 @@ impl Model {
             max_tokens: max_tokens.unwrap_or_else(|| get_max_tokens(name)),
             keep_alive: Some(KeepAlive::indefinite()),
             supports_tools,
+            supports_thinking,
         }
     }
 
@@ -98,6 +101,7 @@ pub enum ChatMessage {
     Assistant {
         content: String,
         tool_calls: Option<Vec<OllamaToolCall>>,
+        thinking: Option<String>,
     },
     User {
         content: String,
@@ -140,6 +144,7 @@ pub struct ChatRequest {
     pub keep_alive: KeepAlive,
     pub options: Option<ChatOptions>,
     pub tools: Vec<OllamaTool>,
+    pub think: Option<bool>,
 }
 
 impl ChatRequest {
@@ -215,6 +220,10 @@ impl ModelShow {
         // .contains expects &String, which would require an additional allocation
         self.capabilities.iter().any(|v| v == "tools")
     }
+
+    pub fn supports_thinking(&self) -> bool {
+        self.capabilities.iter().any(|v| v == "thinking")
+    }
 }
 
 pub async fn complete(
@@ -459,9 +468,11 @@ mod tests {
             ChatMessage::Assistant {
                 content,
                 tool_calls,
+                thinking,
             } => {
                 assert!(content.is_empty());
                 assert!(tool_calls.is_some_and(|v| !v.is_empty()));
+                assert!(thinking.is_none());
             }
             _ => panic!("Deserialized wrong role"),
         }