From 9c5fc6ecbd832efd9964e25db73ae26a2c8f330c Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Wed, 14 Jan 2026 14:29:56 -0800 Subject: [PATCH] Split token display for OpenAI (#46829) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This feature cost $15. Up -> Tokens we're sending to the model Down -> Tokens we've received from the model. Screenshot 2026-01-14 at 12 31 01 PM Release Notes: - Changed the display of tokens for OpenAI models to reflect the input/output limits. --------- Co-authored-by: Claude Opus 4.5 --- crates/acp_thread/src/acp_thread.rs | 1 + crates/agent/src/tests/mod.rs | 4 + crates/agent/src/thread.rs | 1 + crates/agent_ui/src/acp/thread_view.rs | 145 +++++++++++++----- crates/language_model/src/language_model.rs | 6 + crates/language_models/src/provider/cloud.rs | 9 ++ .../language_models/src/provider/open_ai.rs | 4 + .../src/provider/open_ai_compatible.rs | 4 + 8 files changed, 137 insertions(+), 37 deletions(-) diff --git a/crates/acp_thread/src/acp_thread.rs b/crates/acp_thread/src/acp_thread.rs index 86ce2fa53f74f215a23258c3903d65ebff03f72d..5d95563e14831880ec34db5a413143740cbe9963 100644 --- a/crates/acp_thread/src/acp_thread.rs +++ b/crates/acp_thread/src/acp_thread.rs @@ -904,6 +904,7 @@ impl PlanEntry { pub struct TokenUsage { pub max_tokens: u64, pub used_tokens: u64, + pub input_tokens: u64, pub output_tokens: u64, } diff --git a/crates/agent/src/tests/mod.rs b/crates/agent/src/tests/mod.rs index 107d6a7baafef1e61e63d48b1ca8fe645a219de9..b78290d41c6e31ad7b76fb4014c507bf2ca6c64e 100644 --- a/crates/agent/src/tests/mod.rs +++ b/crates/agent/src/tests/mod.rs @@ -2516,6 +2516,7 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) { Some(acp_thread::TokenUsage { used_tokens: 32_000 + 16_000, max_tokens: 1_000_000, + input_tokens: 32_000, output_tokens: 16_000, }) ); @@ -2576,6 +2577,7 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) { Some(acp_thread::TokenUsage { used_tokens: 40_000 + 20_000, max_tokens: 1_000_000, + input_tokens: 40_000, output_tokens: 20_000, }) ); @@ -2625,6 +2627,7 @@ async fn test_truncate_second_message(cx: &mut TestAppContext) { Some(acp_thread::TokenUsage { used_tokens: 32_000 + 16_000, max_tokens: 1_000_000, + input_tokens: 32_000, output_tokens: 16_000, }) ); @@ -2680,6 +2683,7 @@ async fn test_truncate_second_message(cx: &mut TestAppContext) { Some(acp_thread::TokenUsage { used_tokens: 40_000 + 20_000, max_tokens: 1_000_000, + input_tokens: 40_000, output_tokens: 20_000, }) ); diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index b9d82754bf523ee0ba6e4a60cb0d5b1cce120554..31ad5b9125b2bbd2cefa37e857f41a0b54942bee 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -1291,6 +1291,7 @@ impl Thread { Some(acp_thread::TokenUsage { max_tokens: model.max_token_count_for_mode(self.completion_mode.into()), used_tokens: usage.total_tokens(), + input_tokens: usage.input_tokens, output_tokens: usage.output_tokens, }) } diff --git a/crates/agent_ui/src/acp/thread_view.rs b/crates/agent_ui/src/acp/thread_view.rs index cf601ada3e731888f4cc35a7fc4d02dd959728ff..cd065d701f9f018595cb082f247271a0c9fdfa75 100644 --- a/crates/agent_ui/src/acp/thread_view.rs +++ b/crates/agent_ui/src/acp/thread_view.rs @@ -79,10 +79,9 @@ use crate::{ ToggleBurnMode, ToggleProfileSelector, }; -/// Maximum number of lines to show for a collapsed terminal command preview. const MAX_COLLAPSED_LINES: usize = 3; -const STOPWATCH_THRESHOLD: Duration = Duration::from_secs(1); -const TOKEN_THRESHOLD: u64 = 1; +const STOPWATCH_THRESHOLD: Duration = Duration::from_secs(30); +const TOKEN_THRESHOLD: u64 = 250; #[derive(Copy, Clone, Debug, PartialEq, Eq)] enum ThreadFeedback { @@ -6058,46 +6057,118 @@ impl AcpThreadView { .is_some_and(|model| model.provider_id() == language_model::ZED_CLOUD_PROVIDER_ID) } + fn supports_split_token_display(&self, cx: &App) -> bool { + self.as_native_thread(cx) + .and_then(|thread| thread.read(cx).model()) + .is_some_and(|model| model.supports_split_token_display()) + } + fn render_token_usage(&self, cx: &mut Context) -> Option
{ let thread = self.thread()?.read(cx); let usage = thread.token_usage()?; let is_generating = thread.status() != ThreadStatus::Idle; + let show_split = self.supports_split_token_display(cx); - let used = crate::text_thread_editor::humanize_token_count(usage.used_tokens); - let max = crate::text_thread_editor::humanize_token_count(usage.max_tokens); + let separator_color = Color::Custom(cx.theme().colors().text_muted.opacity(0.5)); + let token_label = |text: String, animation_id: &'static str| { + Label::new(text) + .size(LabelSize::Small) + .color(Color::Muted) + .map(|label| { + if is_generating { + label + .with_animation( + animation_id, + Animation::new(Duration::from_secs(2)) + .repeat() + .with_easing(pulsating_between(0.3, 0.8)), + |label, delta| label.alpha(delta), + ) + .into_any() + } else { + label.into_any_element() + } + }) + }; - Some( - h_flex() - .flex_shrink_0() - .gap_0p5() - .mr_1p5() - .child( - Label::new(used) - .size(LabelSize::Small) - .color(Color::Muted) - .map(|label| { - if is_generating { - label - .with_animation( - "used-tokens-label", - Animation::new(Duration::from_secs(2)) - .repeat() - .with_easing(pulsating_between(0.3, 0.8)), - |label, delta| label.alpha(delta), - ) - .into_any() - } else { - label.into_any_element() - } - }), - ) - .child( - Label::new("/") - .size(LabelSize::Small) - .color(Color::Custom(cx.theme().colors().text_muted.opacity(0.5))), - ) - .child(Label::new(max).size(LabelSize::Small).color(Color::Muted)), - ) + if show_split { + let max_output_tokens = self + .as_native_thread(cx) + .and_then(|thread| thread.read(cx).model()) + .and_then(|model| model.max_output_tokens()) + .unwrap_or(0); + + let input = crate::text_thread_editor::humanize_token_count(usage.input_tokens); + let input_max = crate::text_thread_editor::humanize_token_count( + usage.max_tokens.saturating_sub(max_output_tokens), + ); + let output = crate::text_thread_editor::humanize_token_count(usage.output_tokens); + let output_max = crate::text_thread_editor::humanize_token_count(max_output_tokens); + + Some( + h_flex() + .flex_shrink_0() + .gap_1() + .mr_1p5() + .child( + h_flex() + .gap_0p5() + .child( + Icon::new(IconName::ArrowUp) + .size(IconSize::XSmall) + .color(Color::Muted), + ) + .child(token_label(input, "input-tokens-label")) + .child( + Label::new("/") + .size(LabelSize::Small) + .color(separator_color), + ) + .child( + Label::new(input_max) + .size(LabelSize::Small) + .color(Color::Muted), + ), + ) + .child( + h_flex() + .gap_0p5() + .child( + Icon::new(IconName::ArrowDown) + .size(IconSize::XSmall) + .color(Color::Muted), + ) + .child(token_label(output, "output-tokens-label")) + .child( + Label::new("/") + .size(LabelSize::Small) + .color(separator_color), + ) + .child( + Label::new(output_max) + .size(LabelSize::Small) + .color(Color::Muted), + ), + ), + ) + } else { + let used = crate::text_thread_editor::humanize_token_count(usage.used_tokens); + let max = crate::text_thread_editor::humanize_token_count(usage.max_tokens); + + Some( + h_flex() + .flex_shrink_0() + .gap_0p5() + .mr_1p5() + .child(token_label(used, "used-tokens-label")) + .child( + Label::new("/") + .size(LabelSize::Small) + .color(separator_color), + ) + .child(Label::new(max).size(LabelSize::Small).color(Color::Muted)), + ) + } } fn toggle_burn_mode( diff --git a/crates/language_model/src/language_model.rs b/crates/language_model/src/language_model.rs index 56a970404419ec6042c463d26c2844eb0904f829..a42b2ca60896e0314562d146eea708fa07a4c255 100644 --- a/crates/language_model/src/language_model.rs +++ b/crates/language_model/src/language_model.rs @@ -617,6 +617,12 @@ pub trait LanguageModel: Send + Sync { false } + /// Returns whether this model/provider reports accurate split input/output token counts. + /// When true, the UI may show separate input/output token indicators. + fn supports_split_token_display(&self) -> bool { + false + } + fn tool_input_format(&self) -> LanguageModelToolSchemaFormat { LanguageModelToolSchemaFormat::JsonSchema } diff --git a/crates/language_models/src/provider/cloud.rs b/crates/language_models/src/provider/cloud.rs index ff8d01ec717c732bfd25dac024a33e48787e706d..2183520f412f5557de3dd11536019b2fdb1059ea 100644 --- a/crates/language_models/src/provider/cloud.rs +++ b/crates/language_models/src/provider/cloud.rs @@ -624,6 +624,11 @@ impl LanguageModel for CloudLanguageModel { self.model.supports_max_mode } + fn supports_split_token_display(&self) -> bool { + use cloud_llm_client::LanguageModelProvider::*; + matches!(self.model.provider, OpenAi) + } + fn telemetry_id(&self) -> String { format!("zed.dev/{}", self.model.id) } @@ -652,6 +657,10 @@ impl LanguageModel for CloudLanguageModel { .map(|max_token_count| max_token_count as u64) } + fn max_output_tokens(&self) -> Option { + Some(self.model.max_output_tokens as u64) + } + fn cache_configuration(&self) -> Option { match &self.model.provider { cloud_llm_client::LanguageModelProvider::Anthropic => { diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs index 0acf61c7e118a1c8e08269eb50dc6be54a9dde10..c9fd8bfae4abf903412c286b1ea24a2d1ebb17d8 100644 --- a/crates/language_models/src/provider/open_ai.rs +++ b/crates/language_models/src/provider/open_ai.rs @@ -329,6 +329,10 @@ impl LanguageModel for OpenAiLanguageModel { } } + fn supports_split_token_display(&self) -> bool { + true + } + fn telemetry_id(&self) -> String { format!("openai/{}", self.model.id()) } diff --git a/crates/language_models/src/provider/open_ai_compatible.rs b/crates/language_models/src/provider/open_ai_compatible.rs index 2bc6b0dc284ea66960c9557d3e1253a4e59afd62..d47ea26c594ab0abb5c859ed549d43e0ed3f859b 100644 --- a/crates/language_models/src/provider/open_ai_compatible.rs +++ b/crates/language_models/src/provider/open_ai_compatible.rs @@ -319,6 +319,10 @@ impl LanguageModel for OpenAiCompatibleLanguageModel { } } + fn supports_split_token_display(&self) -> bool { + true + } + fn telemetry_id(&self) -> String { format!("openai/{}", self.model.name) }