Report response latency and errors when using (inline) assistant (#11806)

Antonio Scandurra , Nathan , and David created

Release Notes:

- N/A

Co-authored-by: Nathan <nathan@zed.dev>
Co-authored-by: David <davidsp@anthropic.com>

Change summary

crates/assistant/src/assistant_panel.rs         | 119 +++++++------
crates/assistant/src/codegen.rs                 | 163 +++++++++++-------
crates/client/src/telemetry.rs                  |   4 
crates/collab/src/api/events.rs                 |   6 
crates/telemetry_events/src/telemetry_events.rs |   4 
5 files changed, 174 insertions(+), 122 deletions(-)

Detailed changes

crates/assistant/src/assistant_panel.rs 🔗

@@ -8,6 +8,7 @@ use crate::{
     Split, ToggleFocus, ToggleHistory, ToggleIncludeConversation,
 };
 use anyhow::{anyhow, Result};
+use client::telemetry::Telemetry;
 use collections::{hash_map, HashMap, HashSet, VecDeque};
 use editor::{
     actions::{MoveDown, MoveUp},
@@ -38,7 +39,15 @@ use parking_lot::Mutex;
 use project::Project;
 use search::{buffer_search::DivRegistrar, BufferSearchBar};
 use settings::Settings;
-use std::{cmp, fmt::Write, iter, ops::Range, path::PathBuf, sync::Arc, time::Duration};
+use std::{
+    cmp,
+    fmt::Write,
+    iter,
+    ops::Range,
+    path::PathBuf,
+    sync::Arc,
+    time::{Duration, Instant},
+};
 use telemetry_events::AssistantKind;
 use theme::ThemeSettings;
 use ui::{popover_menu, prelude::*, ButtonLike, ContextMenu, Tab, TabBar, Tooltip};
@@ -86,6 +95,7 @@ pub struct AssistantPanel {
     toolbar: View<Toolbar>,
     languages: Arc<LanguageRegistry>,
     fs: Arc<dyn Fs>,
+    telemetry: Arc<Telemetry>,
     _subscriptions: Vec<Subscription>,
     next_inline_assist_id: usize,
     pending_inline_assists: HashMap<usize, PendingInlineAssist>,
@@ -179,6 +189,7 @@ impl AssistantPanel {
                         toolbar,
                         languages: workspace.app_state().languages.clone(),
                         fs: workspace.app_state().fs.clone(),
+                        telemetry: workspace.client().telemetry().clone(),
                         width: None,
                         height: None,
                         _subscriptions: subscriptions,
@@ -347,9 +358,16 @@ impl AssistantPanel {
         };
 
         let inline_assist_id = post_inc(&mut self.next_inline_assist_id);
+        let telemetry = self.telemetry.clone();
 
-        let codegen =
-            cx.new_model(|cx| Codegen::new(editor.read(cx).buffer().clone(), codegen_kind, cx));
+        let codegen = cx.new_model(|cx| {
+            Codegen::new(
+                editor.read(cx).buffer().clone(),
+                codegen_kind,
+                Some(telemetry),
+                cx,
+            )
+        });
 
         let measurements = Arc::new(Mutex::new(BlockMeasurements::default()));
         let inline_assistant = cx.new_view(|cx| {
@@ -360,7 +378,6 @@ impl AssistantPanel {
                 show_include_conversation && self.include_conversation_in_next_inline_assist,
                 self.inline_prompt_history.clone(),
                 codegen.clone(),
-                self.workspace.clone(),
                 cx,
             )
         });
@@ -1039,6 +1056,7 @@ impl AssistantPanel {
         let fs = self.fs.clone();
         let workspace = self.workspace.clone();
         let languages = self.languages.clone();
+        let telemetry = self.telemetry.clone();
         cx.spawn(|this, mut cx| async move {
             let saved_conversation = SavedConversation::load(&path, fs.as_ref()).await?;
             let model = this.update(&mut cx, |this, _| this.model.clone())?;
@@ -1047,6 +1065,7 @@ impl AssistantPanel {
                 model,
                 path.clone(),
                 languages,
+                Some(telemetry),
                 &mut cx,
             )
             .await?;
@@ -1345,6 +1364,7 @@ pub struct Conversation {
     pending_save: Task<Result<()>>,
     path: Option<PathBuf>,
     _subscriptions: Vec<Subscription>,
+    telemetry: Option<Arc<Telemetry>>,
 }
 
 #[derive(Default)]
@@ -1381,6 +1401,7 @@ impl Conversation {
     fn new(
         model: LanguageModel,
         language_registry: Arc<LanguageRegistry>,
+        telemetry: Option<Arc<Telemetry>>,
         cx: &mut ModelContext<Self>,
     ) -> Self {
         let markdown = language_registry.language_for_name("Markdown");
@@ -1415,6 +1436,7 @@ impl Conversation {
             pending_save: Task::ready(Ok(())),
             path: None,
             buffer,
+            telemetry,
         };
 
         let message = MessageAnchor {
@@ -1461,6 +1483,7 @@ impl Conversation {
         model: LanguageModel,
         path: PathBuf,
         language_registry: Arc<LanguageRegistry>,
+        telemetry: Option<Arc<Telemetry>>,
         cx: &mut AsyncAppContext,
     ) -> Result<Model<Self>> {
         let id = match saved_conversation.id {
@@ -1513,6 +1536,7 @@ impl Conversation {
                 pending_save: Task::ready(Ok(())),
                 path: Some(path),
                 buffer,
+                telemetry,
             };
             this.count_remaining_tokens(cx);
             this
@@ -1806,10 +1830,15 @@ impl Conversation {
             let task = cx.spawn({
                 |this, mut cx| async move {
                     let assistant_message_id = assistant_message.id;
+                    let mut response_latency = None;
                     let stream_completion = async {
+                        let request_start = Instant::now();
                         let mut messages = stream.await?;
 
                         while let Some(message) = messages.next().await {
+                            if response_latency.is_none() {
+                                response_latency = Some(request_start.elapsed());
+                            }
                             let text = message?;
 
                             this.update(&mut cx, |this, cx| {
@@ -1848,16 +1877,26 @@ impl Conversation {
                         if let Some(metadata) =
                             this.messages_metadata.get_mut(&assistant_message.id)
                         {
-                            match result {
-                                Ok(_) => {
-                                    metadata.status = MessageStatus::Done;
-                                }
-                                Err(error) => {
-                                    metadata.status = MessageStatus::Error(SharedString::from(
-                                        error.to_string().trim().to_string(),
-                                    ));
-                                }
+                            let error_message = result
+                                .err()
+                                .map(|error| error.to_string().trim().to_string());
+                            if let Some(error_message) = error_message.as_ref() {
+                                metadata.status =
+                                    MessageStatus::Error(SharedString::from(error_message.clone()));
+                            } else {
+                                metadata.status = MessageStatus::Done;
+                            }
+
+                            if let Some(telemetry) = this.telemetry.as_ref() {
+                                telemetry.report_assistant_event(
+                                    this.id.clone(),
+                                    AssistantKind::Panel,
+                                    this.model.telemetry_id(),
+                                    response_latency,
+                                    error_message,
+                                );
                             }
+
                             cx.emit(ConversationEvent::MessagesEdited);
                         }
                     })
@@ -2278,7 +2317,9 @@ impl ConversationEditor {
         workspace: View<Workspace>,
         cx: &mut ViewContext<Self>,
     ) -> Self {
-        let conversation = cx.new_model(|cx| Conversation::new(model, language_registry, cx));
+        let telemetry = workspace.read(cx).client().telemetry().clone();
+        let conversation =
+            cx.new_model(|cx| Conversation::new(model, language_registry, Some(telemetry), cx));
         Self::for_conversation(conversation, fs, workspace, cx)
     }
 
@@ -2318,15 +2359,6 @@ impl ConversationEditor {
     }
 
     fn assist(&mut self, _: &Assist, cx: &mut ViewContext<Self>) {
-        self.conversation.update(cx, |conversation, cx| {
-            report_assistant_event(
-                self.workspace.clone(),
-                Some(conversation),
-                AssistantKind::Panel,
-                cx,
-            )
-        });
-
         let cursors = self.cursors(cx);
 
         let user_messages = self.conversation.update(cx, |conversation, cx| {
@@ -2864,7 +2896,6 @@ enum InlineAssistantEvent {
 struct InlineAssistant {
     id: usize,
     prompt_editor: View<Editor>,
-    workspace: WeakView<Workspace>,
     confirmed: bool,
     show_include_conversation: bool,
     include_conversation: bool,
@@ -2945,7 +2976,6 @@ impl InlineAssistant {
         include_conversation: bool,
         prompt_history: VecDeque<String>,
         codegen: Model<Codegen>,
-        workspace: WeakView<Workspace>,
         cx: &mut ViewContext<Self>,
     ) -> Self {
         let prompt_editor = cx.new_view(|cx| {
@@ -2967,7 +2997,6 @@ impl InlineAssistant {
         Self {
             id,
             prompt_editor,
-            workspace,
             confirmed: false,
             show_include_conversation,
             include_conversation,
@@ -3016,8 +3045,6 @@ impl InlineAssistant {
         if self.confirmed {
             cx.emit(InlineAssistantEvent::Dismissed);
         } else {
-            report_assistant_event(self.workspace.clone(), None, AssistantKind::Inline, cx);
-
             let prompt = self.prompt_editor.read(cx).text(cx);
             self.prompt_editor
                 .update(cx, |editor, _cx| editor.set_read_only(true));
@@ -3147,30 +3174,6 @@ fn merge_ranges(ranges: &mut Vec<Range<Anchor>>, buffer: &MultiBufferSnapshot) {
     }
 }
 
-fn report_assistant_event(
-    workspace: WeakView<Workspace>,
-    conversation: Option<&Conversation>,
-    assistant_kind: AssistantKind,
-    cx: &mut AppContext,
-) {
-    let Some(workspace) = workspace.upgrade() else {
-        return;
-    };
-
-    let client = workspace.read(cx).project().read(cx).client();
-    let telemetry = client.telemetry();
-
-    let conversation_id = conversation.and_then(|conversation| conversation.id.clone());
-    let model_id = conversation
-        .map(|c| c.model.telemetry_id())
-        .unwrap_or_else(|| {
-            CompletionProvider::global(cx)
-                .default_model()
-                .telemetry_id()
-        });
-    telemetry.report_assistant_event(conversation_id, assistant_kind, model_id)
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -3187,7 +3190,7 @@ mod tests {
         let registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone()));
 
         let conversation =
-            cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, cx));
+            cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, None, cx));
         let buffer = conversation.read(cx).buffer.clone();
 
         let message_1 = conversation.read(cx).message_anchors[0].clone();
@@ -3319,7 +3322,7 @@ mod tests {
         let registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone()));
 
         let conversation =
-            cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, cx));
+            cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, None, cx));
         let buffer = conversation.read(cx).buffer.clone();
 
         let message_1 = conversation.read(cx).message_anchors[0].clone();
@@ -3418,7 +3421,7 @@ mod tests {
         init(cx);
         let registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone()));
         let conversation =
-            cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, cx));
+            cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, None, cx));
         let buffer = conversation.read(cx).buffer.clone();
 
         let message_1 = conversation.read(cx).message_anchors[0].clone();
@@ -3502,8 +3505,9 @@ mod tests {
         cx.set_global(CompletionProvider::Fake(FakeCompletionProvider::default()));
         cx.update(init);
         let registry = Arc::new(LanguageRegistry::test(cx.executor()));
-        let conversation =
-            cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry.clone(), cx));
+        let conversation = cx.new_model(|cx| {
+            Conversation::new(LanguageModel::default(), registry.clone(), None, cx)
+        });
         let buffer = conversation.read_with(cx, |conversation, _| conversation.buffer.clone());
         let message_0 =
             conversation.read_with(cx, |conversation, _| conversation.message_anchors[0].id);
@@ -3542,6 +3546,7 @@ mod tests {
             LanguageModel::default(),
             Default::default(),
             registry.clone(),
+            None,
             &mut cx.to_async(),
         )
         .await

crates/assistant/src/codegen.rs 🔗

@@ -3,12 +3,13 @@ use crate::{
     CompletionProvider, LanguageModelRequest,
 };
 use anyhow::Result;
+use client::telemetry::Telemetry;
 use editor::{Anchor, MultiBuffer, MultiBufferSnapshot, ToOffset, ToPoint};
 use futures::{channel::mpsc, SinkExt, Stream, StreamExt};
 use gpui::{EventEmitter, Model, ModelContext, Task};
 use language::{Rope, TransactionId};
 use multi_buffer::MultiBufferRow;
-use std::{cmp, future, ops::Range};
+use std::{cmp, future, ops::Range, sync::Arc, time::Instant};
 
 pub enum Event {
     Finished,
@@ -30,13 +31,19 @@ pub struct Codegen {
     error: Option<anyhow::Error>,
     generation: Task<()>,
     idle: bool,
+    telemetry: Option<Arc<Telemetry>>,
     _subscription: gpui::Subscription,
 }
 
 impl EventEmitter<Event> for Codegen {}
 
 impl Codegen {
-    pub fn new(buffer: Model<MultiBuffer>, kind: CodegenKind, cx: &mut ModelContext<Self>) -> Self {
+    pub fn new(
+        buffer: Model<MultiBuffer>,
+        kind: CodegenKind,
+        telemetry: Option<Arc<Telemetry>>,
+        cx: &mut ModelContext<Self>,
+    ) -> Self {
         let snapshot = buffer.read(cx).snapshot(cx);
         Self {
             buffer: buffer.clone(),
@@ -47,6 +54,7 @@ impl Codegen {
             error: Default::default(),
             idle: true,
             generation: Task::ready(()),
+            telemetry,
             _subscription: cx.subscribe(&buffer, Self::handle_buffer_event),
         }
     }
@@ -103,7 +111,9 @@ impl Codegen {
             .next()
             .unwrap_or_else(|| snapshot.indent_size_for_line(MultiBufferRow(selection_start.row)));
 
+        let model_telemetry_id = prompt.model.telemetry_id();
         let response = CompletionProvider::global(cx).complete(prompt);
+        let telemetry = self.telemetry.clone();
         self.generation = cx.spawn(|this, mut cx| {
             async move {
                 let generate = async {
@@ -111,68 +121,89 @@ impl Codegen {
 
                     let (mut hunks_tx, mut hunks_rx) = mpsc::channel(1);
                     let diff = cx.background_executor().spawn(async move {
-                        let chunks = strip_invalid_spans_from_codeblock(response.await?);
-                        futures::pin_mut!(chunks);
-                        let mut diff = StreamingDiff::new(selected_text.to_string());
-
-                        let mut new_text = String::new();
-                        let mut base_indent = None;
-                        let mut line_indent = None;
-                        let mut first_line = true;
-
-                        while let Some(chunk) = chunks.next().await {
-                            let chunk = chunk?;
-
-                            let mut lines = chunk.split('\n').peekable();
-                            while let Some(line) = lines.next() {
-                                new_text.push_str(line);
-                                if line_indent.is_none() {
-                                    if let Some(non_whitespace_ch_ix) =
-                                        new_text.find(|ch: char| !ch.is_whitespace())
-                                    {
-                                        line_indent = Some(non_whitespace_ch_ix);
-                                        base_indent = base_indent.or(line_indent);
-
-                                        let line_indent = line_indent.unwrap();
-                                        let base_indent = base_indent.unwrap();
-                                        let indent_delta = line_indent as i32 - base_indent as i32;
-                                        let mut corrected_indent_len = cmp::max(
-                                            0,
-                                            suggested_line_indent.len as i32 + indent_delta,
-                                        )
-                                            as usize;
-                                        if first_line {
-                                            corrected_indent_len = corrected_indent_len
-                                                .saturating_sub(selection_start.column as usize);
+                        let mut response_latency = None;
+                        let request_start = Instant::now();
+                        let diff = async {
+                            let chunks = strip_invalid_spans_from_codeblock(response.await?);
+                            futures::pin_mut!(chunks);
+                            let mut diff = StreamingDiff::new(selected_text.to_string());
+
+                            let mut new_text = String::new();
+                            let mut base_indent = None;
+                            let mut line_indent = None;
+                            let mut first_line = true;
+
+                            while let Some(chunk) = chunks.next().await {
+                                if response_latency.is_none() {
+                                    response_latency = Some(request_start.elapsed());
+                                }
+                                let chunk = chunk?;
+
+                                let mut lines = chunk.split('\n').peekable();
+                                while let Some(line) = lines.next() {
+                                    new_text.push_str(line);
+                                    if line_indent.is_none() {
+                                        if let Some(non_whitespace_ch_ix) =
+                                            new_text.find(|ch: char| !ch.is_whitespace())
+                                        {
+                                            line_indent = Some(non_whitespace_ch_ix);
+                                            base_indent = base_indent.or(line_indent);
+
+                                            let line_indent = line_indent.unwrap();
+                                            let base_indent = base_indent.unwrap();
+                                            let indent_delta =
+                                                line_indent as i32 - base_indent as i32;
+                                            let mut corrected_indent_len = cmp::max(
+                                                0,
+                                                suggested_line_indent.len as i32 + indent_delta,
+                                            )
+                                                as usize;
+                                            if first_line {
+                                                corrected_indent_len = corrected_indent_len
+                                                    .saturating_sub(
+                                                        selection_start.column as usize,
+                                                    );
+                                            }
+
+                                            let indent_char = suggested_line_indent.char();
+                                            let mut indent_buffer = [0; 4];
+                                            let indent_str =
+                                                indent_char.encode_utf8(&mut indent_buffer);
+                                            new_text.replace_range(
+                                                ..line_indent,
+                                                &indent_str.repeat(corrected_indent_len),
+                                            );
                                         }
-
-                                        let indent_char = suggested_line_indent.char();
-                                        let mut indent_buffer = [0; 4];
-                                        let indent_str =
-                                            indent_char.encode_utf8(&mut indent_buffer);
-                                        new_text.replace_range(
-                                            ..line_indent,
-                                            &indent_str.repeat(corrected_indent_len),
-                                        );
                                     }
-                                }
 
-                                if line_indent.is_some() {
-                                    hunks_tx.send(diff.push_new(&new_text)).await?;
-                                    new_text.clear();
-                                }
+                                    if line_indent.is_some() {
+                                        hunks_tx.send(diff.push_new(&new_text)).await?;
+                                        new_text.clear();
+                                    }
 
-                                if lines.peek().is_some() {
-                                    hunks_tx.send(diff.push_new("\n")).await?;
-                                    line_indent = None;
-                                    first_line = false;
+                                    if lines.peek().is_some() {
+                                        hunks_tx.send(diff.push_new("\n")).await?;
+                                        line_indent = None;
+                                        first_line = false;
+                                    }
                                 }
                             }
+                            hunks_tx.send(diff.push_new(&new_text)).await?;
+                            hunks_tx.send(diff.finish()).await?;
+
+                            anyhow::Ok(())
+                        };
+
+                        let error_message = diff.await.err().map(|error| error.to_string());
+                        if let Some(telemetry) = telemetry {
+                            telemetry.report_assistant_event(
+                                None,
+                                telemetry_events::AssistantKind::Inline,
+                                model_telemetry_id,
+                                response_latency,
+                                error_message,
+                            );
                         }
-                        hunks_tx.send(diff.push_new(&new_text)).await?;
-                        hunks_tx.send(diff.finish()).await?;
-
-                        anyhow::Ok(())
                     });
 
                     while let Some(hunks) = hunks_rx.next().await {
@@ -235,7 +266,8 @@ impl Codegen {
                         })?;
                     }
 
-                    diff.await?;
+                    diff.await;
+
                     anyhow::Ok(())
                 };
 
@@ -396,8 +428,9 @@ mod tests {
             let snapshot = buffer.snapshot(cx);
             snapshot.anchor_before(Point::new(1, 0))..snapshot.anchor_after(Point::new(4, 5))
         });
-        let codegen =
-            cx.new_model(|cx| Codegen::new(buffer.clone(), CodegenKind::Transform { range }, cx));
+        let codegen = cx.new_model(|cx| {
+            Codegen::new(buffer.clone(), CodegenKind::Transform { range }, None, cx)
+        });
 
         let request = LanguageModelRequest::default();
         codegen.update(cx, |codegen, cx| codegen.start(request, cx));
@@ -454,8 +487,9 @@ mod tests {
             let snapshot = buffer.snapshot(cx);
             snapshot.anchor_before(Point::new(1, 6))
         });
-        let codegen =
-            cx.new_model(|cx| Codegen::new(buffer.clone(), CodegenKind::Generate { position }, cx));
+        let codegen = cx.new_model(|cx| {
+            Codegen::new(buffer.clone(), CodegenKind::Generate { position }, None, cx)
+        });
 
         let request = LanguageModelRequest::default();
         codegen.update(cx, |codegen, cx| codegen.start(request, cx));
@@ -512,8 +546,9 @@ mod tests {
             let snapshot = buffer.snapshot(cx);
             snapshot.anchor_before(Point::new(1, 2))
         });
-        let codegen =
-            cx.new_model(|cx| Codegen::new(buffer.clone(), CodegenKind::Generate { position }, cx));
+        let codegen = cx.new_model(|cx| {
+            Codegen::new(buffer.clone(), CodegenKind::Generate { position }, None, cx)
+        });
 
         let request = LanguageModelRequest::default();
         codegen.update(cx, |codegen, cx| codegen.start(request, cx));

crates/client/src/telemetry.rs 🔗

@@ -261,11 +261,15 @@ impl Telemetry {
         conversation_id: Option<String>,
         kind: AssistantKind,
         model: String,
+        response_latency: Option<Duration>,
+        error_message: Option<String>,
     ) {
         let event = Event::Assistant(AssistantEvent {
             conversation_id,
             kind,
             model: model.to_string(),
+            response_latency,
+            error_message,
         });
 
         self.report_event(event)

crates/collab/src/api/events.rs 🔗

@@ -785,6 +785,8 @@ pub struct AssistantEventRow {
     conversation_id: String,
     kind: String,
     model: String,
+    response_latency_in_ms: Option<i64>,
+    error_message: Option<String>,
 }
 
 impl AssistantEventRow {
@@ -811,6 +813,10 @@ impl AssistantEventRow {
             conversation_id: event.conversation_id.unwrap_or_default(),
             kind: event.kind.to_string(),
             model: event.model,
+            response_latency_in_ms: event
+                .response_latency
+                .map(|latency| latency.as_millis() as i64),
+            error_message: event.error_message,
         }
     }
 }

crates/telemetry_events/src/telemetry_events.rs 🔗

@@ -1,6 +1,6 @@
 use semantic_version::SemanticVersion;
 use serde::{Deserialize, Serialize};
-use std::{fmt::Display, sync::Arc};
+use std::{fmt::Display, sync::Arc, time::Duration};
 
 #[derive(Serialize, Deserialize, Debug)]
 pub struct EventRequestBody {
@@ -93,6 +93,8 @@ pub struct AssistantEvent {
     pub conversation_id: Option<String>,
     pub kind: AssistantKind,
     pub model: String,
+    pub response_latency: Option<Duration>,
+    pub error_message: Option<String>,
 }
 
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]