lmstudio: Fix streaming not working in v0.3.15 (#30013)
Umesh Yadav
and
Peter Tripp
created 7 months ago
Closes #29781
Tested this with llama3, gemma3 and qwen3.
This is a breaking change, which means after adding this code changes in
future version zed we will require atleast lmstudio >= 0.3.15. For
context why it's breaking changes check out the issue: #29781.
What this doesn't try to solve is:
* Tool calling, thinking text rendering. Will raise a seperate PR for
these as those are not required in this PR to make it work.
https://github.com/user-attachments/assets/945f9c73-6323-4a88-92e2-2219b760a249
Release Notes:
- lmstudio: Fixed Zed support for LMStudio >= v0.3.15 (breaking change -- older versions are no longer supported).
---------
Co-authored-by: Peter Tripp <peter@zed.dev>
Change summary
crates/language_models/src/provider/lmstudio.rs | 76 +++++++++++-------
crates/lmstudio/src/lmstudio.rs | 8 ++
2 files changed, 54 insertions(+), 30 deletions(-)
Detailed changes
@@ -330,41 +330,23 @@ impl LanguageModel for LmStudioLanguageModel {
let future = self.request_limiter.stream(async move {
let response = stream_chat_completion(http_client.as_ref(), &api_url, request).await?;
- let stream = response
- .filter_map(|response| async move {
- match response {
- Ok(fragment) => {
- // Skip empty deltas
- if fragment.choices[0].delta.is_object()
- && fragment.choices[0].delta.as_object().unwrap().is_empty()
- {
- return None;
- }
- // Try to parse the delta as ChatMessage
- if let Ok(chat_message) = serde_json::from_value::<ChatMessage>(
- fragment.choices[0].delta.clone(),
- ) {
- let content = match chat_message {
- ChatMessage::User { content } => content,
- ChatMessage::Assistant { content, .. } => {
- content.unwrap_or_default()
- }
- ChatMessage::System { content } => content,
- };
- if !content.is_empty() {
- Some(Ok(content))
- } else {
- None
- }
- } else {
- None
- }
- }
+ // Create a stream mapper to handle content across multiple deltas
+ let stream_mapper = LmStudioStreamMapper::new();
+
+ let stream = response
+ .map(move |response| {
+ response.and_then(|fragment| stream_mapper.process_fragment(fragment))
+ })
+ .filter_map(|result| async move {
+ match result {
+ Ok(Some(content)) => Some(Ok(content)),
+ Ok(None) => None,
Err(error) => Some(Err(error)),
}
})
.boxed();
+
Ok(stream)
});
@@ -382,6 +364,40 @@ impl LanguageModel for LmStudioLanguageModel {
}
}
+// This will be more useful when we implement tool calling. Currently keeping it empty.
+struct LmStudioStreamMapper {}
+
+impl LmStudioStreamMapper {
+ fn new() -> Self {
+ Self {}
+ }
+
+ fn process_fragment(&self, fragment: lmstudio::ChatResponse) -> Result<Option<String>> {
+ // Most of the time, there will be only one choice
+ let Some(choice) = fragment.choices.first() else {
+ return Ok(None);
+ };
+
+ // Extract the delta content
+ if let Ok(delta) =
+ serde_json::from_value::<lmstudio::ResponseMessageDelta>(choice.delta.clone())
+ {
+ if let Some(content) = delta.content {
+ if !content.is_empty() {
+ return Ok(Some(content));
+ }
+ }
+ }
+
+ // If there's a finish_reason, we're done
+ if choice.finish_reason.is_some() {
+ return Ok(None);
+ }
+
+ Ok(None)
+ }
+}
+
struct ConfigurationView {
state: gpui::Entity<State>,
loading_models_task: Option<Task<()>>,
@@ -221,6 +221,14 @@ pub enum CompatibilityType {
Mlx,
}
+#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
+pub struct ResponseMessageDelta {
+ pub role: Option<Role>,
+ pub content: Option<String>,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub tool_calls: Option<Vec<ToolCallChunk>>,
+}
+
pub async fn complete(
client: &dyn HttpClient,
api_url: &str,