From 9c5fc6ecbd832efd9964e25db73ae26a2c8f330c Mon Sep 17 00:00:00 2001
From: Mikayla Maki <mikayla@zed.dev>
Date: Wed, 14 Jan 2026 14:29:56 -0800
Subject: [PATCH] Split token display for OpenAI (#46829)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This feature cost $15.

Up -> Tokens we're sending to the model
Down -> Tokens we've received from the model.

<img width="377" height="69" alt="Screenshot 2026-01-14 at 12 31 01 PM"
src="https://github.com/user-attachments/assets/fc15824f-de5d-466b-8cc1-329f3c1940bb"
/>



Release Notes:

- Changed the display of tokens for OpenAI models to reflect the
input/output limits.

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
---
 crates/acp_thread/src/acp_thread.rs           |   1 +
 crates/agent/src/tests/mod.rs                 |   4 +
 crates/agent/src/thread.rs                    |   1 +
 crates/agent_ui/src/acp/thread_view.rs        | 145 +++++++++++++-----
 crates/language_model/src/language_model.rs   |   6 +
 crates/language_models/src/provider/cloud.rs  |   9 ++
 .../language_models/src/provider/open_ai.rs   |   4 +
 .../src/provider/open_ai_compatible.rs        |   4 +
 8 files changed, 137 insertions(+), 37 deletions(-)
diff --git a/crates/acp_thread/src/acp_thread.rs b/crates/acp_thread/src/acp_thread.rs
index 86ce2fa53f74f215a23258c3903d65ebff03f72d..5d95563e14831880ec34db5a413143740cbe9963 100644
--- a/crates/acp_thread/src/acp_thread.rs
+++ b/crates/acp_thread/src/acp_thread.rs
@@ -904,6 +904,7 @@ impl PlanEntry {
 pub struct TokenUsage {
     pub max_tokens: u64,
     pub used_tokens: u64,
+    pub input_tokens: u64,
     pub output_tokens: u64,
 }
 
diff --git a/crates/agent/src/tests/mod.rs b/crates/agent/src/tests/mod.rs
index 107d6a7baafef1e61e63d48b1ca8fe645a219de9..b78290d41c6e31ad7b76fb4014c507bf2ca6c64e 100644
--- a/crates/agent/src/tests/mod.rs
+++ b/crates/agent/src/tests/mod.rs
@@ -2516,6 +2516,7 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) {
             Some(acp_thread::TokenUsage {
                 used_tokens: 32_000 + 16_000,
                 max_tokens: 1_000_000,
+                input_tokens: 32_000,
                 output_tokens: 16_000,
             })
         );
@@ -2576,6 +2577,7 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) {
             Some(acp_thread::TokenUsage {
                 used_tokens: 40_000 + 20_000,
                 max_tokens: 1_000_000,
+                input_tokens: 40_000,
                 output_tokens: 20_000,
             })
         );
@@ -2625,6 +2627,7 @@ async fn test_truncate_second_message(cx: &mut TestAppContext) {
                 Some(acp_thread::TokenUsage {
                     used_tokens: 32_000 + 16_000,
                     max_tokens: 1_000_000,
+                    input_tokens: 32_000,
                     output_tokens: 16_000,
                 })
             );
@@ -2680,6 +2683,7 @@ async fn test_truncate_second_message(cx: &mut TestAppContext) {
             Some(acp_thread::TokenUsage {
                 used_tokens: 40_000 + 20_000,
                 max_tokens: 1_000_000,
+                input_tokens: 40_000,
                 output_tokens: 20_000,
             })
         );
diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs
index b9d82754bf523ee0ba6e4a60cb0d5b1cce120554..31ad5b9125b2bbd2cefa37e857f41a0b54942bee 100644
--- a/crates/agent/src/thread.rs
+++ b/crates/agent/src/thread.rs
@@ -1291,6 +1291,7 @@ impl Thread {
         Some(acp_thread::TokenUsage {
             max_tokens: model.max_token_count_for_mode(self.completion_mode.into()),
             used_tokens: usage.total_tokens(),
+            input_tokens: usage.input_tokens,
             output_tokens: usage.output_tokens,
         })
     }
diff --git a/crates/agent_ui/src/acp/thread_view.rs b/crates/agent_ui/src/acp/thread_view.rs
index cf601ada3e731888f4cc35a7fc4d02dd959728ff..cd065d701f9f018595cb082f247271a0c9fdfa75 100644
--- a/crates/agent_ui/src/acp/thread_view.rs
+++ b/crates/agent_ui/src/acp/thread_view.rs
@@ -79,10 +79,9 @@ use crate::{
     ToggleBurnMode, ToggleProfileSelector,
 };
 
-/// Maximum number of lines to show for a collapsed terminal command preview.
 const MAX_COLLAPSED_LINES: usize = 3;
-const STOPWATCH_THRESHOLD: Duration = Duration::from_secs(1);
-const TOKEN_THRESHOLD: u64 = 1;
+const STOPWATCH_THRESHOLD: Duration = Duration::from_secs(30);
+const TOKEN_THRESHOLD: u64 = 250;
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 enum ThreadFeedback {
@@ -6058,46 +6057,118 @@ impl AcpThreadView {
             .is_some_and(|model| model.provider_id() == language_model::ZED_CLOUD_PROVIDER_ID)
     }
 
+    fn supports_split_token_display(&self, cx: &App) -> bool {
+        self.as_native_thread(cx)
+            .and_then(|thread| thread.read(cx).model())
+            .is_some_and(|model| model.supports_split_token_display())
+    }
+
     fn render_token_usage(&self, cx: &mut Context<Self>) -> Option<Div> {
         let thread = self.thread()?.read(cx);
         let usage = thread.token_usage()?;
         let is_generating = thread.status() != ThreadStatus::Idle;
+        let show_split = self.supports_split_token_display(cx);
 
-        let used = crate::text_thread_editor::humanize_token_count(usage.used_tokens);
-        let max = crate::text_thread_editor::humanize_token_count(usage.max_tokens);
+        let separator_color = Color::Custom(cx.theme().colors().text_muted.opacity(0.5));
+        let token_label = |text: String, animation_id: &'static str| {
+            Label::new(text)
+                .size(LabelSize::Small)
+                .color(Color::Muted)
+                .map(|label| {
+                    if is_generating {
+                        label
+                            .with_animation(
+                                animation_id,
+                                Animation::new(Duration::from_secs(2))
+                                    .repeat()
+                                    .with_easing(pulsating_between(0.3, 0.8)),
+                                |label, delta| label.alpha(delta),
+                            )
+                            .into_any()
+                    } else {
+                        label.into_any_element()
+                    }
+                })
+        };
 
-        Some(
-            h_flex()
-                .flex_shrink_0()
-                .gap_0p5()
-                .mr_1p5()
-                .child(
-                    Label::new(used)
-                        .size(LabelSize::Small)
-                        .color(Color::Muted)
-                        .map(|label| {
-                            if is_generating {
-                                label
-                                    .with_animation(
-                                        "used-tokens-label",
-                                        Animation::new(Duration::from_secs(2))
-                                            .repeat()
-                                            .with_easing(pulsating_between(0.3, 0.8)),
-                                        |label, delta| label.alpha(delta),
-                                    )
-                                    .into_any()
-                            } else {
-                                label.into_any_element()
-                            }
-                        }),
-                )
-                .child(
-                    Label::new("/")
-                        .size(LabelSize::Small)
-                        .color(Color::Custom(cx.theme().colors().text_muted.opacity(0.5))),
-                )
-                .child(Label::new(max).size(LabelSize::Small).color(Color::Muted)),
-        )
+        if show_split {
+            let max_output_tokens = self
+                .as_native_thread(cx)
+                .and_then(|thread| thread.read(cx).model())
+                .and_then(|model| model.max_output_tokens())
+                .unwrap_or(0);
+
+            let input = crate::text_thread_editor::humanize_token_count(usage.input_tokens);
+            let input_max = crate::text_thread_editor::humanize_token_count(
+                usage.max_tokens.saturating_sub(max_output_tokens),
+            );
+            let output = crate::text_thread_editor::humanize_token_count(usage.output_tokens);
+            let output_max = crate::text_thread_editor::humanize_token_count(max_output_tokens);
+
+            Some(
+                h_flex()
+                    .flex_shrink_0()
+                    .gap_1()
+                    .mr_1p5()
+                    .child(
+                        h_flex()
+                            .gap_0p5()
+                            .child(
+                                Icon::new(IconName::ArrowUp)
+                                    .size(IconSize::XSmall)
+                                    .color(Color::Muted),
+                            )
+                            .child(token_label(input, "input-tokens-label"))
+                            .child(
+                                Label::new("/")
+                                    .size(LabelSize::Small)
+                                    .color(separator_color),
+                            )
+                            .child(
+                                Label::new(input_max)
+                                    .size(LabelSize::Small)
+                                    .color(Color::Muted),
+                            ),
+                    )
+                    .child(
+                        h_flex()
+                            .gap_0p5()
+                            .child(
+                                Icon::new(IconName::ArrowDown)
+                                    .size(IconSize::XSmall)
+                                    .color(Color::Muted),
+                            )
+                            .child(token_label(output, "output-tokens-label"))
+                            .child(
+                                Label::new("/")
+                                    .size(LabelSize::Small)
+                                    .color(separator_color),
+                            )
+                            .child(
+                                Label::new(output_max)
+                                    .size(LabelSize::Small)
+                                    .color(Color::Muted),
+                            ),
+                    ),
+            )
+        } else {
+            let used = crate::text_thread_editor::humanize_token_count(usage.used_tokens);
+            let max = crate::text_thread_editor::humanize_token_count(usage.max_tokens);
+
+            Some(
+                h_flex()
+                    .flex_shrink_0()
+                    .gap_0p5()
+                    .mr_1p5()
+                    .child(token_label(used, "used-tokens-label"))
+                    .child(
+                        Label::new("/")
+                            .size(LabelSize::Small)
+                            .color(separator_color),
+                    )
+                    .child(Label::new(max).size(LabelSize::Small).color(Color::Muted)),
+            )
+        }
     }
 
     fn toggle_burn_mode(
diff --git a/crates/language_model/src/language_model.rs b/crates/language_model/src/language_model.rs
index 56a970404419ec6042c463d26c2844eb0904f829..a42b2ca60896e0314562d146eea708fa07a4c255 100644
--- a/crates/language_model/src/language_model.rs
+++ b/crates/language_model/src/language_model.rs
@@ -617,6 +617,12 @@ pub trait LanguageModel: Send + Sync {
         false
     }
 
+    /// Returns whether this model/provider reports accurate split input/output token counts.
+    /// When true, the UI may show separate input/output token indicators.
+    fn supports_split_token_display(&self) -> bool {
+        false
+    }
+
     fn tool_input_format(&self) -> LanguageModelToolSchemaFormat {
         LanguageModelToolSchemaFormat::JsonSchema
     }
diff --git a/crates/language_models/src/provider/cloud.rs b/crates/language_models/src/provider/cloud.rs
index ff8d01ec717c732bfd25dac024a33e48787e706d..2183520f412f5557de3dd11536019b2fdb1059ea 100644
--- a/crates/language_models/src/provider/cloud.rs
+++ b/crates/language_models/src/provider/cloud.rs
@@ -624,6 +624,11 @@ impl LanguageModel for CloudLanguageModel {
         self.model.supports_max_mode
     }
 
+    fn supports_split_token_display(&self) -> bool {
+        use cloud_llm_client::LanguageModelProvider::*;
+        matches!(self.model.provider, OpenAi)
+    }
+
     fn telemetry_id(&self) -> String {
         format!("zed.dev/{}", self.model.id)
     }
@@ -652,6 +657,10 @@ impl LanguageModel for CloudLanguageModel {
             .map(|max_token_count| max_token_count as u64)
     }
 
+    fn max_output_tokens(&self) -> Option<u64> {
+        Some(self.model.max_output_tokens as u64)
+    }
+
     fn cache_configuration(&self) -> Option<LanguageModelCacheConfiguration> {
         match &self.model.provider {
             cloud_llm_client::LanguageModelProvider::Anthropic => {
diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs
index 0acf61c7e118a1c8e08269eb50dc6be54a9dde10..c9fd8bfae4abf903412c286b1ea24a2d1ebb17d8 100644
--- a/crates/language_models/src/provider/open_ai.rs
+++ b/crates/language_models/src/provider/open_ai.rs
@@ -329,6 +329,10 @@ impl LanguageModel for OpenAiLanguageModel {
         }
     }
 
+    fn supports_split_token_display(&self) -> bool {
+        true
+    }
+
     fn telemetry_id(&self) -> String {
         format!("openai/{}", self.model.id())
     }
diff --git a/crates/language_models/src/provider/open_ai_compatible.rs b/crates/language_models/src/provider/open_ai_compatible.rs
index 2bc6b0dc284ea66960c9557d3e1253a4e59afd62..d47ea26c594ab0abb5c859ed549d43e0ed3f859b 100644
--- a/crates/language_models/src/provider/open_ai_compatible.rs
+++ b/crates/language_models/src/provider/open_ai_compatible.rs
@@ -319,6 +319,10 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
         }
     }
 
+    fn supports_split_token_display(&self) -> bool {
+        true
+    }
+
     fn telemetry_id(&self) -> String {
         format!("openai/{}", self.model.name)
     }