lmstudio: Fix streaming not working in v0.3.15 (#30013)

Umesh Yadav and Peter Tripp created

Closes #29781

Tested this with llama3, gemma3 and qwen3.

This is a breaking change, which means after adding this code changes in
future version zed we will require atleast lmstudio >= 0.3.15. For
context why it's breaking changes check out the issue: #29781.

What this doesn't try to solve is:

* Tool calling, thinking text rendering. Will raise a seperate PR for
these as those are not required in this PR to make it work.


https://github.com/user-attachments/assets/945f9c73-6323-4a88-92e2-2219b760a249

Release Notes:

- lmstudio: Fixed Zed support for LMStudio >= v0.3.15 (breaking change -- older versions are no longer supported).

---------

Co-authored-by: Peter Tripp <peter@zed.dev>

Change summary

crates/language_models/src/provider/lmstudio.rs | 76 +++++++++++-------
crates/lmstudio/src/lmstudio.rs                 |  8 ++
2 files changed, 54 insertions(+), 30 deletions(-)

Detailed changes

crates/language_models/src/provider/lmstudio.rs 🔗

@@ -330,41 +330,23 @@ impl LanguageModel for LmStudioLanguageModel {
 
         let future = self.request_limiter.stream(async move {
             let response = stream_chat_completion(http_client.as_ref(), &api_url, request).await?;
-            let stream = response
-                .filter_map(|response| async move {
-                    match response {
-                        Ok(fragment) => {
-                            // Skip empty deltas
-                            if fragment.choices[0].delta.is_object()
-                                && fragment.choices[0].delta.as_object().unwrap().is_empty()
-                            {
-                                return None;
-                            }
 
-                            // Try to parse the delta as ChatMessage
-                            if let Ok(chat_message) = serde_json::from_value::<ChatMessage>(
-                                fragment.choices[0].delta.clone(),
-                            ) {
-                                let content = match chat_message {
-                                    ChatMessage::User { content } => content,
-                                    ChatMessage::Assistant { content, .. } => {
-                                        content.unwrap_or_default()
-                                    }
-                                    ChatMessage::System { content } => content,
-                                };
-                                if !content.is_empty() {
-                                    Some(Ok(content))
-                                } else {
-                                    None
-                                }
-                            } else {
-                                None
-                            }
-                        }
+            // Create a stream mapper to handle content across multiple deltas
+            let stream_mapper = LmStudioStreamMapper::new();
+
+            let stream = response
+                .map(move |response| {
+                    response.and_then(|fragment| stream_mapper.process_fragment(fragment))
+                })
+                .filter_map(|result| async move {
+                    match result {
+                        Ok(Some(content)) => Some(Ok(content)),
+                        Ok(None) => None,
                         Err(error) => Some(Err(error)),
                     }
                 })
                 .boxed();
+
             Ok(stream)
         });
 
@@ -382,6 +364,40 @@ impl LanguageModel for LmStudioLanguageModel {
     }
 }
 
+// This will be more useful when we implement tool calling. Currently keeping it empty.
+struct LmStudioStreamMapper {}
+
+impl LmStudioStreamMapper {
+    fn new() -> Self {
+        Self {}
+    }
+
+    fn process_fragment(&self, fragment: lmstudio::ChatResponse) -> Result<Option<String>> {
+        // Most of the time, there will be only one choice
+        let Some(choice) = fragment.choices.first() else {
+            return Ok(None);
+        };
+
+        // Extract the delta content
+        if let Ok(delta) =
+            serde_json::from_value::<lmstudio::ResponseMessageDelta>(choice.delta.clone())
+        {
+            if let Some(content) = delta.content {
+                if !content.is_empty() {
+                    return Ok(Some(content));
+                }
+            }
+        }
+
+        // If there's a finish_reason, we're done
+        if choice.finish_reason.is_some() {
+            return Ok(None);
+        }
+
+        Ok(None)
+    }
+}
+
 struct ConfigurationView {
     state: gpui::Entity<State>,
     loading_models_task: Option<Task<()>>,

crates/lmstudio/src/lmstudio.rs 🔗

@@ -221,6 +221,14 @@ pub enum CompatibilityType {
     Mlx,
 }
 
+#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
+pub struct ResponseMessageDelta {
+    pub role: Option<Role>,
+    pub content: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub tool_calls: Option<Vec<ToolCallChunk>>,
+}
+
 pub async fn complete(
     client: &dyn HttpClient,
     api_url: &str,