agent: Report usage from thread summarization requests (#29012)

Marshall Bowers created

This PR makes it so the thread summarization also reports the model
request usage, to prevent the case where the count would appear to jump
by 2 the next time a message was sent after summarization.

Release Notes:

- N/A

Change summary

crates/agent/src/thread.rs                  | 11 +++++++-
crates/language_model/src/language_model.rs | 28 +++++++++++++++++-----
2 files changed, 30 insertions(+), 9 deletions(-)

Detailed changes

crates/agent/src/thread.rs 🔗

@@ -1302,8 +1302,15 @@ impl Thread {
 
         self.pending_summary = cx.spawn(async move |this, cx| {
             async move {
-                let stream = model.model.stream_completion_text(request, &cx);
-                let mut messages = stream.await?;
+                let stream = model.model.stream_completion_text_with_usage(request, &cx);
+                let (mut messages, usage) = stream.await?;
+
+                if let Some(usage) = usage {
+                    this.update(cx, |_thread, cx| {
+                        cx.emit(ThreadEvent::UsageUpdated(usage));
+                    })
+                    .ok();
+                }
 
                 let mut new_summary = String::new();
                 while let Some(message) = messages.stream.next().await {

crates/language_model/src/language_model.rs 🔗

@@ -262,10 +262,21 @@ pub trait LanguageModel: Send + Sync {
         request: LanguageModelRequest,
         cx: &AsyncApp,
     ) -> BoxFuture<'static, Result<LanguageModelTextStream>> {
-        let events = self.stream_completion(request, cx);
+        self.stream_completion_text_with_usage(request, cx)
+            .map(|result| result.map(|(stream, _usage)| stream))
+            .boxed()
+    }
+
+    fn stream_completion_text_with_usage(
+        &self,
+        request: LanguageModelRequest,
+        cx: &AsyncApp,
+    ) -> BoxFuture<'static, Result<(LanguageModelTextStream, Option<RequestUsage>)>> {
+        let future = self.stream_completion_with_usage(request, cx);
 
         async move {
-            let mut events = events.await?.fuse();
+            let (events, usage) = future.await?;
+            let mut events = events.fuse();
             let mut message_id = None;
             let mut first_item_text = None;
             let last_token_usage = Arc::new(Mutex::new(TokenUsage::default()));
@@ -305,11 +316,14 @@ pub trait LanguageModel: Send + Sync {
                 }))
                 .boxed();
 
-            Ok(LanguageModelTextStream {
-                message_id,
-                stream,
-                last_token_usage,
-            })
+            Ok((
+                LanguageModelTextStream {
+                    message_id,
+                    stream,
+                    last_token_usage,
+                },
+                usage,
+            ))
         }
         .boxed()
     }