agent: Add token count in the thread view (#28037)

Danilo Leal and Agus Zubiaga created

This PR adds the token count to the active thread view. It doesn't
behaves quite like Assistant 1 where it updates as you type, though; it
updates after you submit the message.

<img
src="https://github.com/user-attachments/assets/82d2a180-554a-43ee-b776-3743359b609b"
width="700" />

---

Release Notes:

- agent: Add token count in the thread view

---------

Co-authored-by: Agus Zubiaga <hi@aguz.me>

Change summary

crates/agent/src/assistant_panel.rs                   | 95 ++++++++++--
crates/agent/src/message_editor.rs                    |  6 
crates/agent/src/thread.rs                            | 40 ++++-
crates/assistant_context_editor/src/context_editor.rs | 12 +
crates/language_model/src/language_model.rs           |  6 
5 files changed, 132 insertions(+), 27 deletions(-)

Detailed changes

crates/agent/src/assistant_panel.rs πŸ”—

@@ -1,5 +1,6 @@
 use std::path::PathBuf;
 use std::sync::Arc;
+use std::time::Duration;
 
 use anyhow::{Result, anyhow};
 use assistant_context_editor::{
@@ -14,9 +15,9 @@ use client::zed_urls;
 use editor::{Editor, MultiBuffer};
 use fs::Fs;
 use gpui::{
-    Action, AnyElement, App, AsyncWindowContext, Corner, Entity, EventEmitter, FocusHandle,
-    Focusable, FontWeight, KeyContext, Pixels, Subscription, Task, UpdateGlobal, WeakEntity,
-    action_with_deprecated_aliases, prelude::*,
+    Action, Animation, AnimationExt as _, AnyElement, App, AsyncWindowContext, Corner, Entity,
+    EventEmitter, FocusHandle, Focusable, FontWeight, KeyContext, Pixels, Subscription, Task,
+    UpdateGlobal, WeakEntity, action_with_deprecated_aliases, prelude::*, pulsating_between,
 };
 use language::LanguageRegistry;
 use language_model::{LanguageModelProviderTosView, LanguageModelRegistry};
@@ -38,7 +39,7 @@ use crate::active_thread::ActiveThread;
 use crate::assistant_configuration::{AssistantConfiguration, AssistantConfigurationEvent};
 use crate::history_store::{HistoryEntry, HistoryStore};
 use crate::message_editor::MessageEditor;
-use crate::thread::{Thread, ThreadError, ThreadId};
+use crate::thread::{Thread, ThreadError, ThreadId, TokenUsageRatio};
 use crate::thread_history::{PastContext, PastThread, ThreadHistory};
 use crate::thread_store::ThreadStore;
 use crate::{
@@ -715,18 +716,21 @@ impl Panel for AssistantPanel {
 
 impl AssistantPanel {
     fn render_toolbar(&self, _window: &mut Window, cx: &mut Context<Self>) -> impl IntoElement {
-        let thread = self.thread.read(cx);
-        let is_empty = thread.is_empty();
+        let active_thread = self.thread.read(cx);
+        let thread = active_thread.thread().read(cx);
+        let token_usage = thread.total_token_usage(cx);
+        let thread_id = thread.id().clone();
 
-        let thread_id = thread.thread().read(cx).id().clone();
+        let is_generating = thread.is_generating();
+        let is_empty = active_thread.is_empty();
         let focus_handle = self.focus_handle(cx);
 
         let title = match self.active_view {
             ActiveView::Thread => {
                 if is_empty {
-                    thread.summary_or_default(cx)
+                    active_thread.summary_or_default(cx)
                 } else {
-                    thread
+                    active_thread
                         .summary(cx)
                         .unwrap_or_else(|| SharedString::from("Loading Summary…"))
                 }
@@ -742,6 +746,12 @@ impl AssistantPanel {
             ActiveView::Configuration => "Settings".into(),
         };
 
+        let show_token_count = match self.active_view {
+            ActiveView::Thread => !is_empty,
+            ActiveView::PromptEditor => self.context_editor.is_some(),
+            _ => false,
+        };
+
         h_flex()
             .id("assistant-toolbar")
             .h(Tab::container_height(cx))
@@ -764,12 +774,67 @@ impl AssistantPanel {
                     .pl_2()
                     .gap_2()
                     .bg(cx.theme().colors().tab_bar_background)
-                    .children(if matches!(self.active_view, ActiveView::PromptEditor) {
-                        self.context_editor
-                            .as_ref()
-                            .and_then(|editor| render_remaining_tokens(editor, cx))
-                    } else {
-                        None
+                    .when(show_token_count, |parent| match self.active_view {
+                        ActiveView::Thread => {
+                            if token_usage.total == 0 {
+                                return parent;
+                            }
+
+                            let token_color = match token_usage.ratio {
+                                TokenUsageRatio::Normal => Color::Muted,
+                                TokenUsageRatio::Warning => Color::Warning,
+                                TokenUsageRatio::Exceeded => Color::Error,
+                            };
+
+                            parent.child(
+                                h_flex()
+                                    .gap_0p5()
+                                    .child(
+                                        Label::new(assistant_context_editor::humanize_token_count(
+                                            token_usage.total,
+                                        ))
+                                        .size(LabelSize::Small)
+                                        .color(token_color)
+                                        .map(|label| {
+                                            if is_generating {
+                                                label
+                                                    .with_animation(
+                                                        "used-tokens-label",
+                                                        Animation::new(Duration::from_secs(2))
+                                                            .repeat()
+                                                            .with_easing(pulsating_between(
+                                                                0.6, 1.,
+                                                            )),
+                                                        |label, delta| label.alpha(delta),
+                                                    )
+                                                    .into_any()
+                                            } else {
+                                                label.into_any_element()
+                                            }
+                                        }),
+                                    )
+                                    .child(
+                                        Label::new("/").size(LabelSize::Small).color(Color::Muted),
+                                    )
+                                    .child(
+                                        Label::new(assistant_context_editor::humanize_token_count(
+                                            token_usage.max,
+                                        ))
+                                        .size(LabelSize::Small)
+                                        .color(Color::Muted),
+                                    ),
+                            )
+                        }
+                        ActiveView::PromptEditor => {
+                            let Some(editor) = self.context_editor.as_ref() else {
+                                return parent;
+                            };
+                            let Some(element) = render_remaining_tokens(editor, cx) else {
+                                return parent;
+                            };
+                            parent.child(element)
+                        }
+                        _ => parent,
                     })
                     .child(
                         h_flex()

crates/agent/src/message_editor.rs πŸ”—

@@ -28,7 +28,7 @@ use crate::context_picker::{ConfirmBehavior, ContextPicker, ContextPickerComplet
 use crate::context_store::{ContextStore, refresh_context_store_text};
 use crate::context_strip::{ContextStrip, ContextStripEvent, SuggestContextKind};
 use crate::profile_selector::ProfileSelector;
-use crate::thread::{RequestKind, Thread};
+use crate::thread::{RequestKind, Thread, TokenUsageRatio};
 use crate::thread_store::ThreadStore;
 use crate::{
     AgentDiff, Chat, ChatMode, NewThread, OpenAgentDiff, RemoveAllContext, ThreadEvent,
@@ -338,7 +338,7 @@ impl Render for MessageEditor {
 
         let thread = self.thread.read(cx);
         let is_generating = thread.is_generating();
-        let is_too_long = thread.is_getting_too_long(cx);
+        let total_token_usage = thread.total_token_usage(cx);
         let is_model_selected = self.is_model_selected(cx);
         let is_editor_empty = self.is_editor_empty(cx);
         let needs_confirmation =
@@ -788,7 +788,7 @@ impl Render for MessageEditor {
                             ),
                     )
             )
-            .when(is_too_long, |parent| {
+            .when(total_token_usage.ratio != TokenUsageRatio::Normal, |parent| {
                 parent.child(
                     h_flex()
                         .p_2()

crates/agent/src/thread.rs πŸ”—

@@ -214,6 +214,21 @@ pub enum DetailedSummaryState {
     },
 }
 
+#[derive(Default)]
+pub struct TotalTokenUsage {
+    pub total: usize,
+    pub max: usize,
+    pub ratio: TokenUsageRatio,
+}
+
+#[derive(Default, PartialEq, Eq)]
+pub enum TokenUsageRatio {
+    #[default]
+    Normal,
+    Warning,
+    Exceeded,
+}
+
 /// A thread of conversation with the LLM.
 pub struct Thread {
     id: ThreadId,
@@ -1723,26 +1738,33 @@ impl Thread {
         self.cumulative_token_usage.clone()
     }
 
-    pub fn is_getting_too_long(&self, cx: &App) -> bool {
+    pub fn total_token_usage(&self, cx: &App) -> TotalTokenUsage {
         let model_registry = LanguageModelRegistry::read_global(cx);
         let Some(model) = model_registry.active_model() else {
-            return false;
+            return TotalTokenUsage::default();
         };
 
-        let max_tokens = model.max_token_count();
-
-        let current_usage =
-            self.cumulative_token_usage.input_tokens + self.cumulative_token_usage.output_tokens;
+        let max = model.max_token_count();
 
         #[cfg(debug_assertions)]
         let warning_threshold: f32 = std::env::var("ZED_THREAD_WARNING_THRESHOLD")
-            .unwrap_or("0.9".to_string())
+            .unwrap_or("0.8".to_string())
             .parse()
             .unwrap();
         #[cfg(not(debug_assertions))]
-        let warning_threshold: f32 = 0.9;
+        let warning_threshold: f32 = 0.8;
+
+        let total = self.cumulative_token_usage.total_tokens() as usize;
+
+        let ratio = if total >= max {
+            TokenUsageRatio::Exceeded
+        } else if total as f32 / max as f32 >= warning_threshold {
+            TokenUsageRatio::Warning
+        } else {
+            TokenUsageRatio::Normal
+        };
 
-        current_usage as f32 >= (max_tokens as f32 * warning_threshold)
+        TotalTokenUsage { total, max, ratio }
     }
 
     pub fn deny_tool_use(

crates/assistant_context_editor/src/context_editor.rs πŸ”—

@@ -3703,6 +3703,18 @@ pub fn humanize_token_count(count: usize) -> String {
                 format!("{}.{}k", thousands, hundreds)
             }
         }
+        1_000_000..=9_999_999 => {
+            let millions = count / 1_000_000;
+            let hundred_thousands = (count % 1_000_000 + 50_000) / 100_000;
+            if hundred_thousands == 0 {
+                format!("{}M", millions)
+            } else if hundred_thousands == 10 {
+                format!("{}M", millions + 1)
+            } else {
+                format!("{}.{}M", millions, hundred_thousands)
+            }
+        }
+        10_000_000.. => format!("{}M", (count + 500_000) / 1_000_000),
         _ => format!("{}k", (count + 500) / 1000),
     }
 }

crates/language_model/src/language_model.rs πŸ”—

@@ -95,6 +95,12 @@ pub struct TokenUsage {
     pub cache_read_input_tokens: u32,
 }
 
+impl TokenUsage {
+    pub fn total_tokens(&self) -> u32 {
+        self.input_tokens + self.output_tokens
+    }
+}
+
 impl Add<TokenUsage> for TokenUsage {
     type Output = Self;