From 905d28cc5406860984db6dce1e947773a2c68bfb Mon Sep 17 00:00:00 2001
From: Elier <87112922+Oft3r@users.noreply.github.com>
Date: Tue, 17 Mar 2026 05:38:14 -0500
Subject: [PATCH] Add stream_options.include_usage for OpenAI-compatible API
 token usage (#45812)

## Summary

This PR enables token usage reporting in streaming responses for
OpenAI-compatible APIs (OpenAI, xAI/Grok, OpenRouter, etc).

## Problem

Currently, the token counter UI in the Agent Panel doesn't display usage
for some OpenAI-compatible providers because they don't return usage
data during streaming by default. According to OpenAI's API
documentation, the `stream_options.include_usage` parameter must be set
to `true` to receive usage statistics in streaming responses.

## Solution

- Added StreamOptions struct with `include_usage` field to the open_ai
crate
- Added `stream_options` field to the Request struct
- Automatically set `stream_options: { include_usage: true }` when
`stream: true`
- Updated edit_prediction requests with `stream_options: None`
(non-streaming)

## Testing

Tested with xAI Grok models - token counter now correctly shows usage
after sending a message.

## References

- [OpenAI Chat Completions API -
stream_options](https://platform.openai.com/docs/api-reference/chat/create#chat-create-stream_options)
- [xAI API Documentation](https://docs.x.ai/api)
---
 crates/edit_prediction/src/mercury.rs           |  1 +
 crates/edit_prediction_cli/src/openai_client.rs |  2 ++
 crates/language_models/src/provider/open_ai.rs  |  5 +++++
 crates/open_ai/src/open_ai.rs                   | 15 +++++++++++++++
 4 files changed, 23 insertions(+)

diff --git a/crates/edit_prediction/src/mercury.rs b/crates/edit_prediction/src/mercury.rs
index 71362f4c873ca7b6f89030392449916cdc297b8e..df47a38062344512a784c6d2feb563e9848afb27 100644
--- a/crates/edit_prediction/src/mercury.rs
+++ b/crates/edit_prediction/src/mercury.rs
@@ -137,6 +137,7 @@ impl Mercury {
                     content: open_ai::MessageContent::Plain(prompt),
                 }],
                 stream: false,
+                stream_options: None,
                 max_completion_tokens: None,
                 stop: vec![],
                 temperature: None,
diff --git a/crates/edit_prediction_cli/src/openai_client.rs b/crates/edit_prediction_cli/src/openai_client.rs
index c9947e16099c7923e6c948045eda8ca08ff625cf..6bc9c2d77c0d6be6e2955182ebbce096be422945 100644
--- a/crates/edit_prediction_cli/src/openai_client.rs
+++ b/crates/edit_prediction_cli/src/openai_client.rs
@@ -40,6 +40,7 @@ impl PlainOpenAiClient {
             model: model.to_string(),
             messages,
             stream: false,
+            stream_options: None,
             max_completion_tokens: Some(max_tokens),
             stop: Vec::new(),
             temperature: None,
@@ -490,6 +491,7 @@ impl BatchingOpenAiClient {
                     model: serializable_request.model,
                     messages,
                     stream: false,
+                    stream_options: None,
                     max_completion_tokens: Some(serializable_request.max_tokens),
                     stop: Vec::new(),
                     temperature: None,
diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs
index c1ebf76e0b0678d35a5e013e87f9efd9488a4e8d..e033be8ee234156bc2452c11fad438be70a7f143 100644
--- a/crates/language_models/src/provider/open_ai.rs
+++ b/crates/language_models/src/provider/open_ai.rs
@@ -506,6 +506,11 @@ pub fn into_open_ai(
         model: model_id.into(),
         messages,
         stream,
+        stream_options: if stream {
+            Some(open_ai::StreamOptions::default())
+        } else {
+            None
+        },
         stop: request.stop,
         temperature: request.temperature.or(Some(1.0)),
         max_completion_tokens: max_output_tokens,
diff --git a/crates/open_ai/src/open_ai.rs b/crates/open_ai/src/open_ai.rs
index 25946591e320df4e2d58e8dd0341d7f27451cc89..c4a3e078d76eb028b90e5b80fe95b1281b795f34 100644
--- a/crates/open_ai/src/open_ai.rs
+++ b/crates/open_ai/src/open_ai.rs
@@ -295,12 +295,27 @@ impl Model {
     }
 }
 
+#[derive(Debug, Serialize, Deserialize)]
+pub struct StreamOptions {
+    pub include_usage: bool,
+}
+
+impl Default for StreamOptions {
+    fn default() -> Self {
+        Self {
+            include_usage: true,
+        }
+    }
+}
+
 #[derive(Debug, Serialize, Deserialize)]
 pub struct Request {
     pub model: String,
     pub messages: Vec<RequestMessage>,
     pub stream: bool,
     #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub stream_options: Option<StreamOptions>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
     pub max_completion_tokens: Option<u64>,
     #[serde(default, skip_serializing_if = "Vec::is_empty")]
     pub stop: Vec<String>,