crates/acp_thread/src/acp_thread.rs 🔗
@@ -904,6 +904,7 @@ impl PlanEntry {
pub struct TokenUsage {
pub max_tokens: u64,
pub used_tokens: u64,
+ pub input_tokens: u64,
pub output_tokens: u64,
}
Mikayla Maki and Claude Opus 4.5 created
This feature cost $15.
Up -> Tokens we're sending to the model
Down -> Tokens we've received from the model.
<img width="377" height="69" alt="Screenshot 2026-01-14 at 12 31 01 PM"
src="https://github.com/user-attachments/assets/fc15824f-de5d-466b-8cc1-329f3c1940bb"
/>
Release Notes:
- Changed the display of tokens for OpenAI models to reflect the
input/output limits.
---------
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
crates/acp_thread/src/acp_thread.rs | 1
crates/agent/src/tests/mod.rs | 4
crates/agent/src/thread.rs | 1
crates/agent_ui/src/acp/thread_view.rs | 145 ++++++--
crates/language_model/src/language_model.rs | 6
crates/language_models/src/provider/cloud.rs | 9
crates/language_models/src/provider/open_ai.rs | 4
crates/language_models/src/provider/open_ai_compatible.rs | 4
8 files changed, 137 insertions(+), 37 deletions(-)
@@ -904,6 +904,7 @@ impl PlanEntry {
pub struct TokenUsage {
pub max_tokens: u64,
pub used_tokens: u64,
+ pub input_tokens: u64,
pub output_tokens: u64,
}
@@ -2516,6 +2516,7 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) {
Some(acp_thread::TokenUsage {
used_tokens: 32_000 + 16_000,
max_tokens: 1_000_000,
+ input_tokens: 32_000,
output_tokens: 16_000,
})
);
@@ -2576,6 +2577,7 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) {
Some(acp_thread::TokenUsage {
used_tokens: 40_000 + 20_000,
max_tokens: 1_000_000,
+ input_tokens: 40_000,
output_tokens: 20_000,
})
);
@@ -2625,6 +2627,7 @@ async fn test_truncate_second_message(cx: &mut TestAppContext) {
Some(acp_thread::TokenUsage {
used_tokens: 32_000 + 16_000,
max_tokens: 1_000_000,
+ input_tokens: 32_000,
output_tokens: 16_000,
})
);
@@ -2680,6 +2683,7 @@ async fn test_truncate_second_message(cx: &mut TestAppContext) {
Some(acp_thread::TokenUsage {
used_tokens: 40_000 + 20_000,
max_tokens: 1_000_000,
+ input_tokens: 40_000,
output_tokens: 20_000,
})
);
@@ -1291,6 +1291,7 @@ impl Thread {
Some(acp_thread::TokenUsage {
max_tokens: model.max_token_count_for_mode(self.completion_mode.into()),
used_tokens: usage.total_tokens(),
+ input_tokens: usage.input_tokens,
output_tokens: usage.output_tokens,
})
}
@@ -79,10 +79,9 @@ use crate::{
ToggleBurnMode, ToggleProfileSelector,
};
-/// Maximum number of lines to show for a collapsed terminal command preview.
const MAX_COLLAPSED_LINES: usize = 3;
-const STOPWATCH_THRESHOLD: Duration = Duration::from_secs(1);
-const TOKEN_THRESHOLD: u64 = 1;
+const STOPWATCH_THRESHOLD: Duration = Duration::from_secs(30);
+const TOKEN_THRESHOLD: u64 = 250;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum ThreadFeedback {
@@ -6058,46 +6057,118 @@ impl AcpThreadView {
.is_some_and(|model| model.provider_id() == language_model::ZED_CLOUD_PROVIDER_ID)
}
+ fn supports_split_token_display(&self, cx: &App) -> bool {
+ self.as_native_thread(cx)
+ .and_then(|thread| thread.read(cx).model())
+ .is_some_and(|model| model.supports_split_token_display())
+ }
+
fn render_token_usage(&self, cx: &mut Context<Self>) -> Option<Div> {
let thread = self.thread()?.read(cx);
let usage = thread.token_usage()?;
let is_generating = thread.status() != ThreadStatus::Idle;
+ let show_split = self.supports_split_token_display(cx);
- let used = crate::text_thread_editor::humanize_token_count(usage.used_tokens);
- let max = crate::text_thread_editor::humanize_token_count(usage.max_tokens);
+ let separator_color = Color::Custom(cx.theme().colors().text_muted.opacity(0.5));
+ let token_label = |text: String, animation_id: &'static str| {
+ Label::new(text)
+ .size(LabelSize::Small)
+ .color(Color::Muted)
+ .map(|label| {
+ if is_generating {
+ label
+ .with_animation(
+ animation_id,
+ Animation::new(Duration::from_secs(2))
+ .repeat()
+ .with_easing(pulsating_between(0.3, 0.8)),
+ |label, delta| label.alpha(delta),
+ )
+ .into_any()
+ } else {
+ label.into_any_element()
+ }
+ })
+ };
- Some(
- h_flex()
- .flex_shrink_0()
- .gap_0p5()
- .mr_1p5()
- .child(
- Label::new(used)
- .size(LabelSize::Small)
- .color(Color::Muted)
- .map(|label| {
- if is_generating {
- label
- .with_animation(
- "used-tokens-label",
- Animation::new(Duration::from_secs(2))
- .repeat()
- .with_easing(pulsating_between(0.3, 0.8)),
- |label, delta| label.alpha(delta),
- )
- .into_any()
- } else {
- label.into_any_element()
- }
- }),
- )
- .child(
- Label::new("/")
- .size(LabelSize::Small)
- .color(Color::Custom(cx.theme().colors().text_muted.opacity(0.5))),
- )
- .child(Label::new(max).size(LabelSize::Small).color(Color::Muted)),
- )
+ if show_split {
+ let max_output_tokens = self
+ .as_native_thread(cx)
+ .and_then(|thread| thread.read(cx).model())
+ .and_then(|model| model.max_output_tokens())
+ .unwrap_or(0);
+
+ let input = crate::text_thread_editor::humanize_token_count(usage.input_tokens);
+ let input_max = crate::text_thread_editor::humanize_token_count(
+ usage.max_tokens.saturating_sub(max_output_tokens),
+ );
+ let output = crate::text_thread_editor::humanize_token_count(usage.output_tokens);
+ let output_max = crate::text_thread_editor::humanize_token_count(max_output_tokens);
+
+ Some(
+ h_flex()
+ .flex_shrink_0()
+ .gap_1()
+ .mr_1p5()
+ .child(
+ h_flex()
+ .gap_0p5()
+ .child(
+ Icon::new(IconName::ArrowUp)
+ .size(IconSize::XSmall)
+ .color(Color::Muted),
+ )
+ .child(token_label(input, "input-tokens-label"))
+ .child(
+ Label::new("/")
+ .size(LabelSize::Small)
+ .color(separator_color),
+ )
+ .child(
+ Label::new(input_max)
+ .size(LabelSize::Small)
+ .color(Color::Muted),
+ ),
+ )
+ .child(
+ h_flex()
+ .gap_0p5()
+ .child(
+ Icon::new(IconName::ArrowDown)
+ .size(IconSize::XSmall)
+ .color(Color::Muted),
+ )
+ .child(token_label(output, "output-tokens-label"))
+ .child(
+ Label::new("/")
+ .size(LabelSize::Small)
+ .color(separator_color),
+ )
+ .child(
+ Label::new(output_max)
+ .size(LabelSize::Small)
+ .color(Color::Muted),
+ ),
+ ),
+ )
+ } else {
+ let used = crate::text_thread_editor::humanize_token_count(usage.used_tokens);
+ let max = crate::text_thread_editor::humanize_token_count(usage.max_tokens);
+
+ Some(
+ h_flex()
+ .flex_shrink_0()
+ .gap_0p5()
+ .mr_1p5()
+ .child(token_label(used, "used-tokens-label"))
+ .child(
+ Label::new("/")
+ .size(LabelSize::Small)
+ .color(separator_color),
+ )
+ .child(Label::new(max).size(LabelSize::Small).color(Color::Muted)),
+ )
+ }
}
fn toggle_burn_mode(
@@ -617,6 +617,12 @@ pub trait LanguageModel: Send + Sync {
false
}
+ /// Returns whether this model/provider reports accurate split input/output token counts.
+ /// When true, the UI may show separate input/output token indicators.
+ fn supports_split_token_display(&self) -> bool {
+ false
+ }
+
fn tool_input_format(&self) -> LanguageModelToolSchemaFormat {
LanguageModelToolSchemaFormat::JsonSchema
}
@@ -624,6 +624,11 @@ impl LanguageModel for CloudLanguageModel {
self.model.supports_max_mode
}
+ fn supports_split_token_display(&self) -> bool {
+ use cloud_llm_client::LanguageModelProvider::*;
+ matches!(self.model.provider, OpenAi)
+ }
+
fn telemetry_id(&self) -> String {
format!("zed.dev/{}", self.model.id)
}
@@ -652,6 +657,10 @@ impl LanguageModel for CloudLanguageModel {
.map(|max_token_count| max_token_count as u64)
}
+ fn max_output_tokens(&self) -> Option<u64> {
+ Some(self.model.max_output_tokens as u64)
+ }
+
fn cache_configuration(&self) -> Option<LanguageModelCacheConfiguration> {
match &self.model.provider {
cloud_llm_client::LanguageModelProvider::Anthropic => {
@@ -329,6 +329,10 @@ impl LanguageModel for OpenAiLanguageModel {
}
}
+ fn supports_split_token_display(&self) -> bool {
+ true
+ }
+
fn telemetry_id(&self) -> String {
format!("openai/{}", self.model.id())
}
@@ -319,6 +319,10 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
}
}
+ fn supports_split_token_display(&self) -> bool {
+ true
+ }
+
fn telemetry_id(&self) -> String {
format!("openai/{}", self.model.name)
}