diff --git a/crates/cloud_llm_client/src/cloud_llm_client.rs b/crates/cloud_llm_client/src/cloud_llm_client.rs index 9ed82365ea910dd910226f70e242d68388b41796..d2d25ff5b84ef524f4e573a13149b26fe32fc4a5 100644 --- a/crates/cloud_llm_client/src/cloud_llm_client.rs +++ b/crates/cloud_llm_client/src/cloud_llm_client.rs @@ -144,6 +144,8 @@ pub struct AcceptEditPredictionBody { pub request_id: String, #[serde(default, skip_serializing_if = "Option::is_none")] pub model_version: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub e2e_latency_ms: Option, } #[derive(Debug, Clone, Deserialize)] @@ -164,6 +166,8 @@ pub struct EditPredictionRejection { pub was_shown: bool, #[serde(default, skip_serializing_if = "Option::is_none")] pub model_version: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub e2e_latency_ms: Option, } #[derive(Default, Debug, Clone, Copy, Serialize, Deserialize, PartialEq)] diff --git a/crates/edit_prediction/src/edit_prediction.rs b/crates/edit_prediction/src/edit_prediction.rs index c7497fa11da3c7ec6a260aa6fe388d019e8fe24a..cfc5c7efe348b7238813853bbf3e5fd70047340d 100644 --- a/crates/edit_prediction/src/edit_prediction.rs +++ b/crates/edit_prediction/src/edit_prediction.rs @@ -385,6 +385,7 @@ impl ProjectState { EditPredictionRejectReason::Canceled, false, None, + None, cx, ); }) @@ -413,6 +414,7 @@ struct CurrentEditPrediction { pub prediction: EditPrediction, pub was_shown: bool, pub shown_with: Option, + pub e2e_latency: std::time::Duration, } impl CurrentEditPrediction { @@ -506,12 +508,14 @@ impl std::ops::Deref for BufferEditPrediction<'_> { } #[derive(Clone)] + struct PendingSettledPrediction { request_id: EditPredictionId, editable_anchor_range: Range, example: Option, enqueued_at: Instant, last_edit_at: Instant, + e2e_latency: std::time::Duration, } struct RegisteredBuffer { @@ -1686,6 +1690,7 @@ impl EditPredictionStore { request_id = pending_prediction.request_id.0.clone(), settled_editable_region, example = pending_prediction.example.take(), + e2e_latency = pending_prediction.e2e_latency.as_millis(), ); return false; @@ -1715,6 +1720,7 @@ impl EditPredictionStore { edited_buffer_snapshot: &BufferSnapshot, editable_offset_range: Range, example: Option, + e2e_latency: std::time::Duration, cx: &mut Context, ) { let this = &mut *self; @@ -1729,6 +1735,7 @@ impl EditPredictionStore { editable_anchor_range: edited_buffer_snapshot .anchor_range_around(editable_offset_range), example, + e2e_latency, enqueued_at: now, last_edit_at: now, }); @@ -1751,6 +1758,7 @@ impl EditPredictionStore { reason, prediction.was_shown, model_version, + Some(prediction.e2e_latency), cx, ); } @@ -1812,6 +1820,7 @@ impl EditPredictionStore { reason: EditPredictionRejectReason, was_shown: bool, model_version: Option, + e2e_latency: Option, cx: &App, ) { match self.edit_prediction_model { @@ -1835,6 +1844,7 @@ impl EditPredictionStore { reason, was_shown, model_version, + e2e_latency_ms: e2e_latency.map(|latency| latency.as_millis()), }, organization_id, }) @@ -2008,6 +2018,7 @@ impl EditPredictionStore { EditPredictionResult { id: prediction_result.id, prediction: Err(EditPredictionRejectReason::CurrentPreferred), + e2e_latency: prediction_result.e2e_latency, } }, PredictionRequestedBy::DiagnosticsUpdate, @@ -2205,6 +2216,7 @@ impl EditPredictionStore { prediction, was_shown: false, shown_with: None, + e2e_latency: prediction_result.e2e_latency, }; if let Some(current_prediction) = @@ -2225,6 +2237,7 @@ impl EditPredictionStore { EditPredictionRejectReason::CurrentPreferred, false, new_prediction.prediction.model_version, + Some(new_prediction.e2e_latency), cx, ); None @@ -2239,6 +2252,7 @@ impl EditPredictionStore { reject_reason, false, None, + Some(prediction_result.e2e_latency), cx, ); None diff --git a/crates/edit_prediction/src/edit_prediction_tests.rs b/crates/edit_prediction/src/edit_prediction_tests.rs index 74688f64effc4c4e371d4516b25c6ce55b317dbb..5daa7ee4a0dea1384e002acefe1fb4b47d0d5f91 100644 --- a/crates/edit_prediction/src/edit_prediction_tests.rs +++ b/crates/edit_prediction/src/edit_prediction_tests.rs @@ -1323,6 +1323,7 @@ async fn test_empty_prediction(cx: &mut TestAppContext) { reason: EditPredictionRejectReason::Empty, was_shown: false, model_version: None, + e2e_latency_ms: Some(0), }] ); } @@ -1384,6 +1385,7 @@ async fn test_interpolated_empty(cx: &mut TestAppContext) { reason: EditPredictionRejectReason::InterpolatedEmpty, was_shown: false, model_version: None, + e2e_latency_ms: Some(0), }] ); } @@ -1477,6 +1479,7 @@ async fn test_replace_current(cx: &mut TestAppContext) { reason: EditPredictionRejectReason::Replaced, was_shown: false, model_version: None, + e2e_latency_ms: Some(0), }] ); } @@ -1572,6 +1575,7 @@ async fn test_current_preferred(cx: &mut TestAppContext) { reason: EditPredictionRejectReason::CurrentPreferred, was_shown: false, model_version: None, + e2e_latency_ms: Some(0), }] ); } @@ -1664,6 +1668,7 @@ async fn test_cancel_earlier_pending_requests(cx: &mut TestAppContext) { reason: EditPredictionRejectReason::Canceled, was_shown: false, model_version: None, + e2e_latency_ms: None, }] ); } @@ -1795,12 +1800,14 @@ async fn test_cancel_second_on_third_request(cx: &mut TestAppContext) { reason: EditPredictionRejectReason::Canceled, was_shown: false, model_version: None, + e2e_latency_ms: None, }, EditPredictionRejection { request_id: first_id, reason: EditPredictionRejectReason::Replaced, was_shown: false, model_version: None, + e2e_latency_ms: Some(0), } ] ); @@ -1963,6 +1970,7 @@ async fn test_rejections_flushing(cx: &mut TestAppContext) { EditPredictionRejectReason::Discarded, false, None, + None, cx, ); ep_store.reject_prediction( @@ -1970,6 +1978,7 @@ async fn test_rejections_flushing(cx: &mut TestAppContext) { EditPredictionRejectReason::Canceled, true, None, + None, cx, ); }); @@ -1989,6 +1998,7 @@ async fn test_rejections_flushing(cx: &mut TestAppContext) { reason: EditPredictionRejectReason::Discarded, was_shown: false, model_version: None, + e2e_latency_ms: None } ); assert_eq!( @@ -1998,6 +2008,7 @@ async fn test_rejections_flushing(cx: &mut TestAppContext) { reason: EditPredictionRejectReason::Canceled, was_shown: true, model_version: None, + e2e_latency_ms: None } ); @@ -2009,6 +2020,7 @@ async fn test_rejections_flushing(cx: &mut TestAppContext) { EditPredictionRejectReason::Discarded, false, None, + None, cx, ); } @@ -2041,6 +2053,7 @@ async fn test_rejections_flushing(cx: &mut TestAppContext) { EditPredictionRejectReason::Discarded, false, None, + None, cx, ); }); @@ -2061,6 +2074,7 @@ async fn test_rejections_flushing(cx: &mut TestAppContext) { EditPredictionRejectReason::Discarded, false, None, + None, cx, ); }); @@ -2394,8 +2408,6 @@ async fn test_edit_prediction_basic_interpolation(cx: &mut TestAppContext) { can_collect_data: false, repo_url: None, }, - buffer_snapshotted_at: Instant::now(), - response_received_at: Instant::now(), model_version: None, }; @@ -3115,6 +3127,7 @@ async fn test_edit_prediction_settled(cx: &mut TestAppContext) { &snapshot_a, editable_region_a.clone(), None, + Duration::from_secs(0), cx, ); }); @@ -3178,6 +3191,7 @@ async fn test_edit_prediction_settled(cx: &mut TestAppContext) { &snapshot_b2, editable_region_b.clone(), None, + Duration::from_secs(0), cx, ); }); diff --git a/crates/edit_prediction/src/fim.rs b/crates/edit_prediction/src/fim.rs index 8de58b9b2e52502519a362d9502ddc1b3cdffde4..46586eb3796026c764ff8659734c564e368681b9 100644 --- a/crates/edit_prediction/src/fim.rs +++ b/crates/edit_prediction/src/fim.rs @@ -19,10 +19,8 @@ struct FimRequestOutput { request_id: String, edits: Vec<(std::ops::Range, Arc)>, snapshot: BufferSnapshot, - response_received_at: Instant, inputs: ZetaPromptInput, buffer: Entity, - buffer_snapshotted_at: Instant, } pub fn request_prediction( @@ -47,7 +45,7 @@ pub fn request_prediction( let http_client = cx.http_client(); let cursor_point = position.to_point(&snapshot); - let buffer_snapshotted_at = Instant::now(); + let request_start = cx.background_executor().now(); let Some(settings) = (match provider { settings::EditPredictionProvider::Ollama => settings.ollama.clone(), @@ -119,7 +117,7 @@ pub fn request_prediction( log::debug!( "fim: completion received ({:.2}s)", - (response_received_at - buffer_snapshotted_at).as_secs_f64() + (response_received_at - request_start).as_secs_f64() ); let completion: Arc = clean_fim_completion(&response_text).into(); @@ -135,10 +133,8 @@ pub fn request_prediction( request_id, edits, snapshot, - response_received_at, inputs, buffer, - buffer_snapshotted_at, }) }); @@ -151,10 +147,9 @@ pub fn request_prediction( &output.snapshot, output.edits.into(), None, - output.buffer_snapshotted_at, - output.response_received_at, output.inputs, None, + cx.background_executor().now() - request_start, cx, ) .await, diff --git a/crates/edit_prediction/src/mercury.rs b/crates/edit_prediction/src/mercury.rs index b80498c4ddccfffab02e77ceb20e6e9cf68851f4..71362f4c873ca7b6f89030392449916cdc297b8e 100644 --- a/crates/edit_prediction/src/mercury.rs +++ b/crates/edit_prediction/src/mercury.rs @@ -14,7 +14,7 @@ use language::{ToOffset, ToPoint as _}; use language_model::{ApiKeyState, EnvVar, env_var}; use release_channel::AppVersion; use serde::{Deserialize, Serialize}; -use std::{mem, ops::Range, path::Path, sync::Arc, time::Instant}; +use std::{mem, ops::Range, path::Path, sync::Arc}; use zeta_prompt::ZetaPromptInput; const MERCURY_API_URL: &str = "https://api.inceptionlabs.ai/v1/edit/completions"; @@ -67,7 +67,7 @@ impl Mercury { let http_client = cx.http_client(); let cursor_point = position.to_point(&snapshot); - let buffer_snapshotted_at = Instant::now(); + let request_start = cx.background_executor().now(); let active_buffer = buffer.clone(); let result = cx.background_spawn(async move { @@ -171,7 +171,6 @@ impl Mercury { .await .context("Failed to read response body")?; - let response_received_at = Instant::now(); if !response.status().is_success() { if response.status() == StatusCode::PAYMENT_REQUIRED { anyhow::bail!(MercuryPaymentRequiredError( @@ -222,7 +221,7 @@ impl Mercury { ); } - anyhow::Ok((id, edits, snapshot, response_received_at, inputs)) + anyhow::Ok((id, edits, snapshot, inputs)) }); cx.spawn(async move |ep_store, cx| { @@ -240,7 +239,7 @@ impl Mercury { cx.notify(); })?; - let (id, edits, old_snapshot, response_received_at, inputs) = result?; + let (id, edits, old_snapshot, inputs) = result?; anyhow::Ok(Some( EditPredictionResult::new( EditPredictionId(id.into()), @@ -248,10 +247,9 @@ impl Mercury { &old_snapshot, edits.into(), None, - buffer_snapshotted_at, - response_received_at, inputs, None, + cx.background_executor().now() - request_start, cx, ) .await, diff --git a/crates/edit_prediction/src/prediction.rs b/crates/edit_prediction/src/prediction.rs index 0db47b0ec93b69ceebeee1989d8196642385bdd0..ef2bf2deafb7309f4871a921061ab114fa280e2f 100644 --- a/crates/edit_prediction/src/prediction.rs +++ b/crates/edit_prediction/src/prediction.rs @@ -1,8 +1,4 @@ -use std::{ - ops::Range, - sync::Arc, - time::{Duration, Instant}, -}; +use std::{ops::Range, sync::Arc}; use cloud_llm_client::EditPredictionRejectReason; use edit_prediction_types::{PredictedCursorPosition, interpolate_edits}; @@ -29,6 +25,7 @@ impl std::fmt::Display for EditPredictionId { pub struct EditPredictionResult { pub id: EditPredictionId, pub prediction: Result, + pub e2e_latency: std::time::Duration, } impl EditPredictionResult { @@ -38,15 +35,15 @@ impl EditPredictionResult { edited_buffer_snapshot: &BufferSnapshot, edits: Arc<[(Range, Arc)]>, cursor_position: Option, - buffer_snapshotted_at: Instant, - response_received_at: Instant, inputs: ZetaPromptInput, model_version: Option, + e2e_latency: std::time::Duration, cx: &mut AsyncApp, ) -> Self { if edits.is_empty() { return Self { id, + e2e_latency, prediction: Err(EditPredictionRejectReason::Empty), }; } @@ -62,6 +59,7 @@ impl EditPredictionResult { else { return Self { id, + e2e_latency, prediction: Err(EditPredictionRejectReason::InterpolatedEmpty), }; }; @@ -70,6 +68,7 @@ impl EditPredictionResult { Self { id: id.clone(), + e2e_latency, prediction: Ok(EditPrediction { id, edits, @@ -78,8 +77,6 @@ impl EditPredictionResult { edit_preview, inputs, buffer: edited_buffer.clone(), - buffer_snapshotted_at, - response_received_at, model_version, }), } @@ -94,8 +91,6 @@ pub struct EditPrediction { pub snapshot: BufferSnapshot, pub edit_preview: EditPreview, pub buffer: Entity, - pub buffer_snapshotted_at: Instant, - pub response_received_at: Instant, pub inputs: zeta_prompt::ZetaPromptInput, pub model_version: Option, } @@ -111,10 +106,6 @@ impl EditPrediction { pub fn targets_buffer(&self, buffer: &Buffer) -> bool { self.snapshot.remote_id() == buffer.remote_id() } - - pub fn latency(&self) -> Duration { - self.response_received_at - self.buffer_snapshotted_at - } } impl std::fmt::Debug for EditPrediction { @@ -169,8 +160,6 @@ mod tests { can_collect_data: false, repo_url: None, }, - buffer_snapshotted_at: Instant::now(), - response_received_at: Instant::now(), }; cx.update(|cx| { diff --git a/crates/edit_prediction/src/sweep_ai.rs b/crates/edit_prediction/src/sweep_ai.rs index 99ddd9b86d238c2e56331f52f9fad51438ee1f71..93a9a34340cfe0b55e40d35bb4c8980dff983fa5 100644 --- a/crates/edit_prediction/src/sweep_ai.rs +++ b/crates/edit_prediction/src/sweep_ai.rs @@ -21,7 +21,6 @@ use std::{ ops::Range, path::Path, sync::Arc, - time::Instant, }; const SWEEP_API_URL: &str = "https://autocomplete.sweep.dev/backend/next_edit_autocomplete"; @@ -50,6 +49,7 @@ impl SweepAi { .sweep .privacy_mode; let debug_info = self.debug_info.clone(); + let request_start = cx.background_executor().now(); self.api_token.update(cx, |key_state, cx| { _ = key_state.load_if_needed(SWEEP_CREDENTIALS_URL, |s| s, cx); }); @@ -90,8 +90,6 @@ impl SweepAi { .take(3) .collect::>(); - let buffer_snapshotted_at = Instant::now(); - let result = cx.background_spawn(async move { let text = inputs.snapshot.text(); @@ -255,7 +253,6 @@ impl SweepAi { let mut body = String::new(); response.body_mut().read_to_string(&mut body).await?; - let response_received_at = Instant::now(); if !response.status().is_success() { let message = format!( "Request failed with status: {:?}\nBody: {}", @@ -289,19 +286,13 @@ impl SweepAi { }) .collect::>(); - anyhow::Ok(( - response.autocomplete_id, - edits, - inputs.snapshot, - response_received_at, - ep_inputs, - )) + anyhow::Ok((response.autocomplete_id, edits, inputs.snapshot, ep_inputs)) }); let buffer = inputs.buffer.clone(); cx.spawn(async move |cx| { - let (id, edits, old_snapshot, response_received_at, inputs) = result.await?; + let (id, edits, old_snapshot, inputs) = result.await?; anyhow::Ok(Some( EditPredictionResult::new( EditPredictionId(id.into()), @@ -309,10 +300,9 @@ impl SweepAi { &old_snapshot, edits.into(), None, - buffer_snapshotted_at, - response_received_at, inputs, None, + cx.background_executor().now() - request_start, cx, ) .await, diff --git a/crates/edit_prediction/src/zeta.rs b/crates/edit_prediction/src/zeta.rs index fc3ed81c78737f4ba4c8b7aa5131232b2b007b87..e7d38df5c8e99b86303ca72a715e10acf22eb9b1 100644 --- a/crates/edit_prediction/src/zeta.rs +++ b/crates/edit_prediction/src/zeta.rs @@ -22,7 +22,7 @@ use ui::SharedString; use workspace::notifications::{ErrorMessagePrompt, NotificationId, show_app_notification}; use zeta_prompt::{ParsedOutput, ZetaPromptInput}; -use std::{env, ops::Range, path::Path, sync::Arc, time::Instant}; +use std::{env, ops::Range, path::Path, sync::Arc}; use zeta_prompt::{ CURSOR_MARKER, ZetaFormat, format_zeta_prompt, get_prefill, parse_zeta2_model_output, prompt_input_contains_special_tokens, stop_tokens_for_format, @@ -63,7 +63,7 @@ pub fn request_prediction_with_zeta( }; let http_client = cx.http_client(); - let buffer_snapshotted_at = Instant::now(); + let request_start = cx.background_executor().now(); let raw_config = store.zeta2_raw_config().cloned(); let preferred_experiment = store.preferred_experiment().map(|s| s.to_owned()); let open_ai_compatible_api_key = load_open_ai_compatible_api_key_if_needed(provider, cx); @@ -100,7 +100,6 @@ pub fn request_prediction_with_zeta( snapshot: BufferSnapshot, edits: Vec<(Range, Arc)>, cursor_position: Option, - received_response_at: Instant, editable_range_in_buffer: Range, model_version: Option, } @@ -295,8 +294,6 @@ pub fn request_prediction_with_zeta( return Ok((None, None)); }; - let received_response_at = Instant::now(); - log::trace!("Got edit prediction response"); let Some(ParsedOutput { @@ -358,7 +355,6 @@ pub fn request_prediction_with_zeta( snapshot: snapshot.clone(), edits, cursor_position, - received_response_at, editable_range_in_buffer, model_version, }), @@ -369,6 +365,7 @@ pub fn request_prediction_with_zeta( }); cx.spawn(async move |this, cx| { + let request_duration = cx.background_executor().now() - request_start; let Some((id, prediction)) = handle_api_response(&this, request_task.await, cx)? else { return Ok(None); }; @@ -379,13 +376,13 @@ pub fn request_prediction_with_zeta( snapshot: edited_buffer_snapshot, edits, cursor_position, - received_response_at, editable_range_in_buffer, model_version, }) = prediction else { return Ok(Some(EditPredictionResult { id, + e2e_latency: request_duration, prediction: Err(EditPredictionRejectReason::Empty), })); }; @@ -423,6 +420,7 @@ pub fn request_prediction_with_zeta( &edited_buffer_snapshot, editable_range_in_buffer, example_spec, + request_duration, cx, ); }) @@ -438,10 +436,9 @@ pub fn request_prediction_with_zeta( &edited_buffer_snapshot, edits.into(), cursor_position, - buffer_snapshotted_at, - received_response_at, inputs, model_version, + request_duration, cx, ) .await, @@ -590,6 +587,7 @@ pub(crate) fn edit_prediction_accepted( let request_id = current_prediction.prediction.id.to_string(); let model_version = current_prediction.prediction.model_version; + let e2e_latency = current_prediction.e2e_latency; let require_auth = custom_accept_url.is_none(); let client = store.client.clone(); let llm_token = store.llm_token.clone(); @@ -615,6 +613,7 @@ pub(crate) fn edit_prediction_accepted( serde_json::to_string(&AcceptEditPredictionBody { request_id: request_id.clone(), model_version: model_version.clone(), + e2e_latency_ms: Some(e2e_latency.as_millis()), })? .into(), ); diff --git a/crates/edit_prediction_ui/src/rate_prediction_modal.rs b/crates/edit_prediction_ui/src/rate_prediction_modal.rs index b2e7209c1a7e9dd403ed0ee70336119ef0f1bdc9..15cccc777feb0a999724f2b4405fc11df8c5f252 100644 --- a/crates/edit_prediction_ui/src/rate_prediction_modal.rs +++ b/crates/edit_prediction_ui/src/rate_prediction_modal.rs @@ -13,7 +13,7 @@ use project::{ }; use settings::Settings as _; use std::rc::Rc; -use std::{fmt::Write, sync::Arc, time::Duration}; +use std::{fmt::Write, sync::Arc}; use theme::ThemeSettings; use ui::{ ContextMenu, DropdownMenu, KeyBinding, List, ListItem, ListItemSpacing, PopoverMenuHandle, @@ -850,30 +850,18 @@ impl RatePredictionsModal { .gap_3() .child(Icon::new(icon_name).color(icon_color).size(IconSize::Small)) .child( - v_flex() - .child( - h_flex() - .gap_1() - .child(Label::new(file_name).size(LabelSize::Small)) - .when_some(file_path, |this, p| { - this.child( - Label::new(p) - .size(LabelSize::Small) - .color(Color::Muted), - ) - }), - ) - .child( - Label::new(format!( - "{} ago, {:.2?}", - format_time_ago( - completion.response_received_at.elapsed() - ), - completion.latency() - )) - .color(Color::Muted) - .size(LabelSize::XSmall), - ), + v_flex().child( + h_flex() + .gap_1() + .child(Label::new(file_name).size(LabelSize::Small)) + .when_some(file_path, |this, p| { + this.child( + Label::new(p) + .size(LabelSize::Small) + .color(Color::Muted), + ) + }), + ), ), ) .tooltip(Tooltip::text(tooltip_text)) @@ -977,23 +965,6 @@ impl Focusable for RatePredictionsModal { impl ModalView for RatePredictionsModal {} -fn format_time_ago(elapsed: Duration) -> String { - let seconds = elapsed.as_secs(); - if seconds < 120 { - "1 minute".to_string() - } else if seconds < 3600 { - format!("{} minutes", seconds / 60) - } else if seconds < 7200 { - "1 hour".to_string() - } else if seconds < 86400 { - format!("{} hours", seconds / 3600) - } else if seconds < 172800 { - "1 day".to_string() - } else { - format!("{} days", seconds / 86400) - } -} - struct FeedbackCompletionProvider; impl FeedbackCompletionProvider {