Split token display for OpenAI (#46829)

Mikayla Maki and Claude Opus 4.5 created

This feature cost $15.

Up -> Tokens we're sending to the model
Down -> Tokens we've received from the model.

<img width="377" height="69" alt="Screenshot 2026-01-14 at 12 31 01 PM"
src="https://github.com/user-attachments/assets/fc15824f-de5d-466b-8cc1-329f3c1940bb"
/>



Release Notes:

- Changed the display of tokens for OpenAI models to reflect the
input/output limits.

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>

Change summary

crates/acp_thread/src/acp_thread.rs                       |   1 
crates/agent/src/tests/mod.rs                             |   4 
crates/agent/src/thread.rs                                |   1 
crates/agent_ui/src/acp/thread_view.rs                    | 145 ++++++--
crates/language_model/src/language_model.rs               |   6 
crates/language_models/src/provider/cloud.rs              |   9 
crates/language_models/src/provider/open_ai.rs            |   4 
crates/language_models/src/provider/open_ai_compatible.rs |   4 
8 files changed, 137 insertions(+), 37 deletions(-)

Detailed changes

crates/acp_thread/src/acp_thread.rs 🔗

@@ -904,6 +904,7 @@ impl PlanEntry {
 pub struct TokenUsage {
     pub max_tokens: u64,
     pub used_tokens: u64,
+    pub input_tokens: u64,
     pub output_tokens: u64,
 }
 

crates/agent/src/tests/mod.rs 🔗

@@ -2516,6 +2516,7 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) {
             Some(acp_thread::TokenUsage {
                 used_tokens: 32_000 + 16_000,
                 max_tokens: 1_000_000,
+                input_tokens: 32_000,
                 output_tokens: 16_000,
             })
         );
@@ -2576,6 +2577,7 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) {
             Some(acp_thread::TokenUsage {
                 used_tokens: 40_000 + 20_000,
                 max_tokens: 1_000_000,
+                input_tokens: 40_000,
                 output_tokens: 20_000,
             })
         );
@@ -2625,6 +2627,7 @@ async fn test_truncate_second_message(cx: &mut TestAppContext) {
                 Some(acp_thread::TokenUsage {
                     used_tokens: 32_000 + 16_000,
                     max_tokens: 1_000_000,
+                    input_tokens: 32_000,
                     output_tokens: 16_000,
                 })
             );
@@ -2680,6 +2683,7 @@ async fn test_truncate_second_message(cx: &mut TestAppContext) {
             Some(acp_thread::TokenUsage {
                 used_tokens: 40_000 + 20_000,
                 max_tokens: 1_000_000,
+                input_tokens: 40_000,
                 output_tokens: 20_000,
             })
         );

crates/agent/src/thread.rs 🔗

@@ -1291,6 +1291,7 @@ impl Thread {
         Some(acp_thread::TokenUsage {
             max_tokens: model.max_token_count_for_mode(self.completion_mode.into()),
             used_tokens: usage.total_tokens(),
+            input_tokens: usage.input_tokens,
             output_tokens: usage.output_tokens,
         })
     }

crates/agent_ui/src/acp/thread_view.rs 🔗

@@ -79,10 +79,9 @@ use crate::{
     ToggleBurnMode, ToggleProfileSelector,
 };
 
-/// Maximum number of lines to show for a collapsed terminal command preview.
 const MAX_COLLAPSED_LINES: usize = 3;
-const STOPWATCH_THRESHOLD: Duration = Duration::from_secs(1);
-const TOKEN_THRESHOLD: u64 = 1;
+const STOPWATCH_THRESHOLD: Duration = Duration::from_secs(30);
+const TOKEN_THRESHOLD: u64 = 250;
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 enum ThreadFeedback {
@@ -6058,46 +6057,118 @@ impl AcpThreadView {
             .is_some_and(|model| model.provider_id() == language_model::ZED_CLOUD_PROVIDER_ID)
     }
 
+    fn supports_split_token_display(&self, cx: &App) -> bool {
+        self.as_native_thread(cx)
+            .and_then(|thread| thread.read(cx).model())
+            .is_some_and(|model| model.supports_split_token_display())
+    }
+
     fn render_token_usage(&self, cx: &mut Context<Self>) -> Option<Div> {
         let thread = self.thread()?.read(cx);
         let usage = thread.token_usage()?;
         let is_generating = thread.status() != ThreadStatus::Idle;
+        let show_split = self.supports_split_token_display(cx);
 
-        let used = crate::text_thread_editor::humanize_token_count(usage.used_tokens);
-        let max = crate::text_thread_editor::humanize_token_count(usage.max_tokens);
+        let separator_color = Color::Custom(cx.theme().colors().text_muted.opacity(0.5));
+        let token_label = |text: String, animation_id: &'static str| {
+            Label::new(text)
+                .size(LabelSize::Small)
+                .color(Color::Muted)
+                .map(|label| {
+                    if is_generating {
+                        label
+                            .with_animation(
+                                animation_id,
+                                Animation::new(Duration::from_secs(2))
+                                    .repeat()
+                                    .with_easing(pulsating_between(0.3, 0.8)),
+                                |label, delta| label.alpha(delta),
+                            )
+                            .into_any()
+                    } else {
+                        label.into_any_element()
+                    }
+                })
+        };
 
-        Some(
-            h_flex()
-                .flex_shrink_0()
-                .gap_0p5()
-                .mr_1p5()
-                .child(
-                    Label::new(used)
-                        .size(LabelSize::Small)
-                        .color(Color::Muted)
-                        .map(|label| {
-                            if is_generating {
-                                label
-                                    .with_animation(
-                                        "used-tokens-label",
-                                        Animation::new(Duration::from_secs(2))
-                                            .repeat()
-                                            .with_easing(pulsating_between(0.3, 0.8)),
-                                        |label, delta| label.alpha(delta),
-                                    )
-                                    .into_any()
-                            } else {
-                                label.into_any_element()
-                            }
-                        }),
-                )
-                .child(
-                    Label::new("/")
-                        .size(LabelSize::Small)
-                        .color(Color::Custom(cx.theme().colors().text_muted.opacity(0.5))),
-                )
-                .child(Label::new(max).size(LabelSize::Small).color(Color::Muted)),
-        )
+        if show_split {
+            let max_output_tokens = self
+                .as_native_thread(cx)
+                .and_then(|thread| thread.read(cx).model())
+                .and_then(|model| model.max_output_tokens())
+                .unwrap_or(0);
+
+            let input = crate::text_thread_editor::humanize_token_count(usage.input_tokens);
+            let input_max = crate::text_thread_editor::humanize_token_count(
+                usage.max_tokens.saturating_sub(max_output_tokens),
+            );
+            let output = crate::text_thread_editor::humanize_token_count(usage.output_tokens);
+            let output_max = crate::text_thread_editor::humanize_token_count(max_output_tokens);
+
+            Some(
+                h_flex()
+                    .flex_shrink_0()
+                    .gap_1()
+                    .mr_1p5()
+                    .child(
+                        h_flex()
+                            .gap_0p5()
+                            .child(
+                                Icon::new(IconName::ArrowUp)
+                                    .size(IconSize::XSmall)
+                                    .color(Color::Muted),
+                            )
+                            .child(token_label(input, "input-tokens-label"))
+                            .child(
+                                Label::new("/")
+                                    .size(LabelSize::Small)
+                                    .color(separator_color),
+                            )
+                            .child(
+                                Label::new(input_max)
+                                    .size(LabelSize::Small)
+                                    .color(Color::Muted),
+                            ),
+                    )
+                    .child(
+                        h_flex()
+                            .gap_0p5()
+                            .child(
+                                Icon::new(IconName::ArrowDown)
+                                    .size(IconSize::XSmall)
+                                    .color(Color::Muted),
+                            )
+                            .child(token_label(output, "output-tokens-label"))
+                            .child(
+                                Label::new("/")
+                                    .size(LabelSize::Small)
+                                    .color(separator_color),
+                            )
+                            .child(
+                                Label::new(output_max)
+                                    .size(LabelSize::Small)
+                                    .color(Color::Muted),
+                            ),
+                    ),
+            )
+        } else {
+            let used = crate::text_thread_editor::humanize_token_count(usage.used_tokens);
+            let max = crate::text_thread_editor::humanize_token_count(usage.max_tokens);
+
+            Some(
+                h_flex()
+                    .flex_shrink_0()
+                    .gap_0p5()
+                    .mr_1p5()
+                    .child(token_label(used, "used-tokens-label"))
+                    .child(
+                        Label::new("/")
+                            .size(LabelSize::Small)
+                            .color(separator_color),
+                    )
+                    .child(Label::new(max).size(LabelSize::Small).color(Color::Muted)),
+            )
+        }
     }
 
     fn toggle_burn_mode(

crates/language_model/src/language_model.rs 🔗

@@ -617,6 +617,12 @@ pub trait LanguageModel: Send + Sync {
         false
     }
 
+    /// Returns whether this model/provider reports accurate split input/output token counts.
+    /// When true, the UI may show separate input/output token indicators.
+    fn supports_split_token_display(&self) -> bool {
+        false
+    }
+
     fn tool_input_format(&self) -> LanguageModelToolSchemaFormat {
         LanguageModelToolSchemaFormat::JsonSchema
     }

crates/language_models/src/provider/cloud.rs 🔗

@@ -624,6 +624,11 @@ impl LanguageModel for CloudLanguageModel {
         self.model.supports_max_mode
     }
 
+    fn supports_split_token_display(&self) -> bool {
+        use cloud_llm_client::LanguageModelProvider::*;
+        matches!(self.model.provider, OpenAi)
+    }
+
     fn telemetry_id(&self) -> String {
         format!("zed.dev/{}", self.model.id)
     }
@@ -652,6 +657,10 @@ impl LanguageModel for CloudLanguageModel {
             .map(|max_token_count| max_token_count as u64)
     }
 
+    fn max_output_tokens(&self) -> Option<u64> {
+        Some(self.model.max_output_tokens as u64)
+    }
+
     fn cache_configuration(&self) -> Option<LanguageModelCacheConfiguration> {
         match &self.model.provider {
             cloud_llm_client::LanguageModelProvider::Anthropic => {

crates/language_models/src/provider/open_ai.rs 🔗

@@ -329,6 +329,10 @@ impl LanguageModel for OpenAiLanguageModel {
         }
     }
 
+    fn supports_split_token_display(&self) -> bool {
+        true
+    }
+
     fn telemetry_id(&self) -> String {
         format!("openai/{}", self.model.id())
     }