Detailed changes
@@ -144,6 +144,8 @@ pub struct AcceptEditPredictionBody {
pub request_id: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub model_version: Option<String>,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub e2e_latency_ms: Option<u128>,
}
#[derive(Debug, Clone, Deserialize)]
@@ -164,6 +166,8 @@ pub struct EditPredictionRejection {
pub was_shown: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub model_version: Option<String>,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub e2e_latency_ms: Option<u128>,
}
#[derive(Default, Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
@@ -385,6 +385,7 @@ impl ProjectState {
EditPredictionRejectReason::Canceled,
false,
None,
+ None,
cx,
);
})
@@ -413,6 +414,7 @@ struct CurrentEditPrediction {
pub prediction: EditPrediction,
pub was_shown: bool,
pub shown_with: Option<edit_prediction_types::SuggestionDisplayType>,
+ pub e2e_latency: std::time::Duration,
}
impl CurrentEditPrediction {
@@ -506,12 +508,14 @@ impl std::ops::Deref for BufferEditPrediction<'_> {
}
#[derive(Clone)]
+
struct PendingSettledPrediction {
request_id: EditPredictionId,
editable_anchor_range: Range<Anchor>,
example: Option<ExampleSpec>,
enqueued_at: Instant,
last_edit_at: Instant,
+ e2e_latency: std::time::Duration,
}
struct RegisteredBuffer {
@@ -1686,6 +1690,7 @@ impl EditPredictionStore {
request_id = pending_prediction.request_id.0.clone(),
settled_editable_region,
example = pending_prediction.example.take(),
+ e2e_latency = pending_prediction.e2e_latency.as_millis(),
);
return false;
@@ -1715,6 +1720,7 @@ impl EditPredictionStore {
edited_buffer_snapshot: &BufferSnapshot,
editable_offset_range: Range<usize>,
example: Option<ExampleSpec>,
+ e2e_latency: std::time::Duration,
cx: &mut Context<Self>,
) {
let this = &mut *self;
@@ -1729,6 +1735,7 @@ impl EditPredictionStore {
editable_anchor_range: edited_buffer_snapshot
.anchor_range_around(editable_offset_range),
example,
+ e2e_latency,
enqueued_at: now,
last_edit_at: now,
});
@@ -1751,6 +1758,7 @@ impl EditPredictionStore {
reason,
prediction.was_shown,
model_version,
+ Some(prediction.e2e_latency),
cx,
);
}
@@ -1812,6 +1820,7 @@ impl EditPredictionStore {
reason: EditPredictionRejectReason,
was_shown: bool,
model_version: Option<String>,
+ e2e_latency: Option<std::time::Duration>,
cx: &App,
) {
match self.edit_prediction_model {
@@ -1835,6 +1844,7 @@ impl EditPredictionStore {
reason,
was_shown,
model_version,
+ e2e_latency_ms: e2e_latency.map(|latency| latency.as_millis()),
},
organization_id,
})
@@ -2008,6 +2018,7 @@ impl EditPredictionStore {
EditPredictionResult {
id: prediction_result.id,
prediction: Err(EditPredictionRejectReason::CurrentPreferred),
+ e2e_latency: prediction_result.e2e_latency,
}
},
PredictionRequestedBy::DiagnosticsUpdate,
@@ -2205,6 +2216,7 @@ impl EditPredictionStore {
prediction,
was_shown: false,
shown_with: None,
+ e2e_latency: prediction_result.e2e_latency,
};
if let Some(current_prediction) =
@@ -2225,6 +2237,7 @@ impl EditPredictionStore {
EditPredictionRejectReason::CurrentPreferred,
false,
new_prediction.prediction.model_version,
+ Some(new_prediction.e2e_latency),
cx,
);
None
@@ -2239,6 +2252,7 @@ impl EditPredictionStore {
reject_reason,
false,
None,
+ Some(prediction_result.e2e_latency),
cx,
);
None
@@ -1323,6 +1323,7 @@ async fn test_empty_prediction(cx: &mut TestAppContext) {
reason: EditPredictionRejectReason::Empty,
was_shown: false,
model_version: None,
+ e2e_latency_ms: Some(0),
}]
);
}
@@ -1384,6 +1385,7 @@ async fn test_interpolated_empty(cx: &mut TestAppContext) {
reason: EditPredictionRejectReason::InterpolatedEmpty,
was_shown: false,
model_version: None,
+ e2e_latency_ms: Some(0),
}]
);
}
@@ -1477,6 +1479,7 @@ async fn test_replace_current(cx: &mut TestAppContext) {
reason: EditPredictionRejectReason::Replaced,
was_shown: false,
model_version: None,
+ e2e_latency_ms: Some(0),
}]
);
}
@@ -1572,6 +1575,7 @@ async fn test_current_preferred(cx: &mut TestAppContext) {
reason: EditPredictionRejectReason::CurrentPreferred,
was_shown: false,
model_version: None,
+ e2e_latency_ms: Some(0),
}]
);
}
@@ -1664,6 +1668,7 @@ async fn test_cancel_earlier_pending_requests(cx: &mut TestAppContext) {
reason: EditPredictionRejectReason::Canceled,
was_shown: false,
model_version: None,
+ e2e_latency_ms: None,
}]
);
}
@@ -1795,12 +1800,14 @@ async fn test_cancel_second_on_third_request(cx: &mut TestAppContext) {
reason: EditPredictionRejectReason::Canceled,
was_shown: false,
model_version: None,
+ e2e_latency_ms: None,
},
EditPredictionRejection {
request_id: first_id,
reason: EditPredictionRejectReason::Replaced,
was_shown: false,
model_version: None,
+ e2e_latency_ms: Some(0),
}
]
);
@@ -1963,6 +1970,7 @@ async fn test_rejections_flushing(cx: &mut TestAppContext) {
EditPredictionRejectReason::Discarded,
false,
None,
+ None,
cx,
);
ep_store.reject_prediction(
@@ -1970,6 +1978,7 @@ async fn test_rejections_flushing(cx: &mut TestAppContext) {
EditPredictionRejectReason::Canceled,
true,
None,
+ None,
cx,
);
});
@@ -1989,6 +1998,7 @@ async fn test_rejections_flushing(cx: &mut TestAppContext) {
reason: EditPredictionRejectReason::Discarded,
was_shown: false,
model_version: None,
+ e2e_latency_ms: None
}
);
assert_eq!(
@@ -1998,6 +2008,7 @@ async fn test_rejections_flushing(cx: &mut TestAppContext) {
reason: EditPredictionRejectReason::Canceled,
was_shown: true,
model_version: None,
+ e2e_latency_ms: None
}
);
@@ -2009,6 +2020,7 @@ async fn test_rejections_flushing(cx: &mut TestAppContext) {
EditPredictionRejectReason::Discarded,
false,
None,
+ None,
cx,
);
}
@@ -2041,6 +2053,7 @@ async fn test_rejections_flushing(cx: &mut TestAppContext) {
EditPredictionRejectReason::Discarded,
false,
None,
+ None,
cx,
);
});
@@ -2061,6 +2074,7 @@ async fn test_rejections_flushing(cx: &mut TestAppContext) {
EditPredictionRejectReason::Discarded,
false,
None,
+ None,
cx,
);
});
@@ -2394,8 +2408,6 @@ async fn test_edit_prediction_basic_interpolation(cx: &mut TestAppContext) {
can_collect_data: false,
repo_url: None,
},
- buffer_snapshotted_at: Instant::now(),
- response_received_at: Instant::now(),
model_version: None,
};
@@ -3115,6 +3127,7 @@ async fn test_edit_prediction_settled(cx: &mut TestAppContext) {
&snapshot_a,
editable_region_a.clone(),
None,
+ Duration::from_secs(0),
cx,
);
});
@@ -3178,6 +3191,7 @@ async fn test_edit_prediction_settled(cx: &mut TestAppContext) {
&snapshot_b2,
editable_region_b.clone(),
None,
+ Duration::from_secs(0),
cx,
);
});
@@ -19,10 +19,8 @@ struct FimRequestOutput {
request_id: String,
edits: Vec<(std::ops::Range<Anchor>, Arc<str>)>,
snapshot: BufferSnapshot,
- response_received_at: Instant,
inputs: ZetaPromptInput,
buffer: Entity<Buffer>,
- buffer_snapshotted_at: Instant,
}
pub fn request_prediction(
@@ -47,7 +45,7 @@ pub fn request_prediction(
let http_client = cx.http_client();
let cursor_point = position.to_point(&snapshot);
- let buffer_snapshotted_at = Instant::now();
+ let request_start = cx.background_executor().now();
let Some(settings) = (match provider {
settings::EditPredictionProvider::Ollama => settings.ollama.clone(),
@@ -119,7 +117,7 @@ pub fn request_prediction(
log::debug!(
"fim: completion received ({:.2}s)",
- (response_received_at - buffer_snapshotted_at).as_secs_f64()
+ (response_received_at - request_start).as_secs_f64()
);
let completion: Arc<str> = clean_fim_completion(&response_text).into();
@@ -135,10 +133,8 @@ pub fn request_prediction(
request_id,
edits,
snapshot,
- response_received_at,
inputs,
buffer,
- buffer_snapshotted_at,
})
});
@@ -151,10 +147,9 @@ pub fn request_prediction(
&output.snapshot,
output.edits.into(),
None,
- output.buffer_snapshotted_at,
- output.response_received_at,
output.inputs,
None,
+ cx.background_executor().now() - request_start,
cx,
)
.await,
@@ -14,7 +14,7 @@ use language::{ToOffset, ToPoint as _};
use language_model::{ApiKeyState, EnvVar, env_var};
use release_channel::AppVersion;
use serde::{Deserialize, Serialize};
-use std::{mem, ops::Range, path::Path, sync::Arc, time::Instant};
+use std::{mem, ops::Range, path::Path, sync::Arc};
use zeta_prompt::ZetaPromptInput;
const MERCURY_API_URL: &str = "https://api.inceptionlabs.ai/v1/edit/completions";
@@ -67,7 +67,7 @@ impl Mercury {
let http_client = cx.http_client();
let cursor_point = position.to_point(&snapshot);
- let buffer_snapshotted_at = Instant::now();
+ let request_start = cx.background_executor().now();
let active_buffer = buffer.clone();
let result = cx.background_spawn(async move {
@@ -171,7 +171,6 @@ impl Mercury {
.await
.context("Failed to read response body")?;
- let response_received_at = Instant::now();
if !response.status().is_success() {
if response.status() == StatusCode::PAYMENT_REQUIRED {
anyhow::bail!(MercuryPaymentRequiredError(
@@ -222,7 +221,7 @@ impl Mercury {
);
}
- anyhow::Ok((id, edits, snapshot, response_received_at, inputs))
+ anyhow::Ok((id, edits, snapshot, inputs))
});
cx.spawn(async move |ep_store, cx| {
@@ -240,7 +239,7 @@ impl Mercury {
cx.notify();
})?;
- let (id, edits, old_snapshot, response_received_at, inputs) = result?;
+ let (id, edits, old_snapshot, inputs) = result?;
anyhow::Ok(Some(
EditPredictionResult::new(
EditPredictionId(id.into()),
@@ -248,10 +247,9 @@ impl Mercury {
&old_snapshot,
edits.into(),
None,
- buffer_snapshotted_at,
- response_received_at,
inputs,
None,
+ cx.background_executor().now() - request_start,
cx,
)
.await,
@@ -1,8 +1,4 @@
-use std::{
- ops::Range,
- sync::Arc,
- time::{Duration, Instant},
-};
+use std::{ops::Range, sync::Arc};
use cloud_llm_client::EditPredictionRejectReason;
use edit_prediction_types::{PredictedCursorPosition, interpolate_edits};
@@ -29,6 +25,7 @@ impl std::fmt::Display for EditPredictionId {
pub struct EditPredictionResult {
pub id: EditPredictionId,
pub prediction: Result<EditPrediction, EditPredictionRejectReason>,
+ pub e2e_latency: std::time::Duration,
}
impl EditPredictionResult {
@@ -38,15 +35,15 @@ impl EditPredictionResult {
edited_buffer_snapshot: &BufferSnapshot,
edits: Arc<[(Range<Anchor>, Arc<str>)]>,
cursor_position: Option<PredictedCursorPosition>,
- buffer_snapshotted_at: Instant,
- response_received_at: Instant,
inputs: ZetaPromptInput,
model_version: Option<String>,
+ e2e_latency: std::time::Duration,
cx: &mut AsyncApp,
) -> Self {
if edits.is_empty() {
return Self {
id,
+ e2e_latency,
prediction: Err(EditPredictionRejectReason::Empty),
};
}
@@ -62,6 +59,7 @@ impl EditPredictionResult {
else {
return Self {
id,
+ e2e_latency,
prediction: Err(EditPredictionRejectReason::InterpolatedEmpty),
};
};
@@ -70,6 +68,7 @@ impl EditPredictionResult {
Self {
id: id.clone(),
+ e2e_latency,
prediction: Ok(EditPrediction {
id,
edits,
@@ -78,8 +77,6 @@ impl EditPredictionResult {
edit_preview,
inputs,
buffer: edited_buffer.clone(),
- buffer_snapshotted_at,
- response_received_at,
model_version,
}),
}
@@ -94,8 +91,6 @@ pub struct EditPrediction {
pub snapshot: BufferSnapshot,
pub edit_preview: EditPreview,
pub buffer: Entity<Buffer>,
- pub buffer_snapshotted_at: Instant,
- pub response_received_at: Instant,
pub inputs: zeta_prompt::ZetaPromptInput,
pub model_version: Option<String>,
}
@@ -111,10 +106,6 @@ impl EditPrediction {
pub fn targets_buffer(&self, buffer: &Buffer) -> bool {
self.snapshot.remote_id() == buffer.remote_id()
}
-
- pub fn latency(&self) -> Duration {
- self.response_received_at - self.buffer_snapshotted_at
- }
}
impl std::fmt::Debug for EditPrediction {
@@ -169,8 +160,6 @@ mod tests {
can_collect_data: false,
repo_url: None,
},
- buffer_snapshotted_at: Instant::now(),
- response_received_at: Instant::now(),
};
cx.update(|cx| {
@@ -21,7 +21,6 @@ use std::{
ops::Range,
path::Path,
sync::Arc,
- time::Instant,
};
const SWEEP_API_URL: &str = "https://autocomplete.sweep.dev/backend/next_edit_autocomplete";
@@ -50,6 +49,7 @@ impl SweepAi {
.sweep
.privacy_mode;
let debug_info = self.debug_info.clone();
+ let request_start = cx.background_executor().now();
self.api_token.update(cx, |key_state, cx| {
_ = key_state.load_if_needed(SWEEP_CREDENTIALS_URL, |s| s, cx);
});
@@ -90,8 +90,6 @@ impl SweepAi {
.take(3)
.collect::<Vec<_>>();
- let buffer_snapshotted_at = Instant::now();
-
let result = cx.background_spawn(async move {
let text = inputs.snapshot.text();
@@ -255,7 +253,6 @@ impl SweepAi {
let mut body = String::new();
response.body_mut().read_to_string(&mut body).await?;
- let response_received_at = Instant::now();
if !response.status().is_success() {
let message = format!(
"Request failed with status: {:?}\nBody: {}",
@@ -289,19 +286,13 @@ impl SweepAi {
})
.collect::<Vec<_>>();
- anyhow::Ok((
- response.autocomplete_id,
- edits,
- inputs.snapshot,
- response_received_at,
- ep_inputs,
- ))
+ anyhow::Ok((response.autocomplete_id, edits, inputs.snapshot, ep_inputs))
});
let buffer = inputs.buffer.clone();
cx.spawn(async move |cx| {
- let (id, edits, old_snapshot, response_received_at, inputs) = result.await?;
+ let (id, edits, old_snapshot, inputs) = result.await?;
anyhow::Ok(Some(
EditPredictionResult::new(
EditPredictionId(id.into()),
@@ -309,10 +300,9 @@ impl SweepAi {
&old_snapshot,
edits.into(),
None,
- buffer_snapshotted_at,
- response_received_at,
inputs,
None,
+ cx.background_executor().now() - request_start,
cx,
)
.await,
@@ -22,7 +22,7 @@ use ui::SharedString;
use workspace::notifications::{ErrorMessagePrompt, NotificationId, show_app_notification};
use zeta_prompt::{ParsedOutput, ZetaPromptInput};
-use std::{env, ops::Range, path::Path, sync::Arc, time::Instant};
+use std::{env, ops::Range, path::Path, sync::Arc};
use zeta_prompt::{
CURSOR_MARKER, ZetaFormat, format_zeta_prompt, get_prefill, parse_zeta2_model_output,
prompt_input_contains_special_tokens, stop_tokens_for_format,
@@ -63,7 +63,7 @@ pub fn request_prediction_with_zeta(
};
let http_client = cx.http_client();
- let buffer_snapshotted_at = Instant::now();
+ let request_start = cx.background_executor().now();
let raw_config = store.zeta2_raw_config().cloned();
let preferred_experiment = store.preferred_experiment().map(|s| s.to_owned());
let open_ai_compatible_api_key = load_open_ai_compatible_api_key_if_needed(provider, cx);
@@ -100,7 +100,6 @@ pub fn request_prediction_with_zeta(
snapshot: BufferSnapshot,
edits: Vec<(Range<Anchor>, Arc<str>)>,
cursor_position: Option<PredictedCursorPosition>,
- received_response_at: Instant,
editable_range_in_buffer: Range<usize>,
model_version: Option<String>,
}
@@ -295,8 +294,6 @@ pub fn request_prediction_with_zeta(
return Ok((None, None));
};
- let received_response_at = Instant::now();
-
log::trace!("Got edit prediction response");
let Some(ParsedOutput {
@@ -358,7 +355,6 @@ pub fn request_prediction_with_zeta(
snapshot: snapshot.clone(),
edits,
cursor_position,
- received_response_at,
editable_range_in_buffer,
model_version,
}),
@@ -369,6 +365,7 @@ pub fn request_prediction_with_zeta(
});
cx.spawn(async move |this, cx| {
+ let request_duration = cx.background_executor().now() - request_start;
let Some((id, prediction)) = handle_api_response(&this, request_task.await, cx)? else {
return Ok(None);
};
@@ -379,13 +376,13 @@ pub fn request_prediction_with_zeta(
snapshot: edited_buffer_snapshot,
edits,
cursor_position,
- received_response_at,
editable_range_in_buffer,
model_version,
}) = prediction
else {
return Ok(Some(EditPredictionResult {
id,
+ e2e_latency: request_duration,
prediction: Err(EditPredictionRejectReason::Empty),
}));
};
@@ -423,6 +420,7 @@ pub fn request_prediction_with_zeta(
&edited_buffer_snapshot,
editable_range_in_buffer,
example_spec,
+ request_duration,
cx,
);
})
@@ -438,10 +436,9 @@ pub fn request_prediction_with_zeta(
&edited_buffer_snapshot,
edits.into(),
cursor_position,
- buffer_snapshotted_at,
- received_response_at,
inputs,
model_version,
+ request_duration,
cx,
)
.await,
@@ -590,6 +587,7 @@ pub(crate) fn edit_prediction_accepted(
let request_id = current_prediction.prediction.id.to_string();
let model_version = current_prediction.prediction.model_version;
+ let e2e_latency = current_prediction.e2e_latency;
let require_auth = custom_accept_url.is_none();
let client = store.client.clone();
let llm_token = store.llm_token.clone();
@@ -615,6 +613,7 @@ pub(crate) fn edit_prediction_accepted(
serde_json::to_string(&AcceptEditPredictionBody {
request_id: request_id.clone(),
model_version: model_version.clone(),
+ e2e_latency_ms: Some(e2e_latency.as_millis()),
})?
.into(),
);
@@ -13,7 +13,7 @@ use project::{
};
use settings::Settings as _;
use std::rc::Rc;
-use std::{fmt::Write, sync::Arc, time::Duration};
+use std::{fmt::Write, sync::Arc};
use theme::ThemeSettings;
use ui::{
ContextMenu, DropdownMenu, KeyBinding, List, ListItem, ListItemSpacing, PopoverMenuHandle,
@@ -850,30 +850,18 @@ impl RatePredictionsModal {
.gap_3()
.child(Icon::new(icon_name).color(icon_color).size(IconSize::Small))
.child(
- v_flex()
- .child(
- h_flex()
- .gap_1()
- .child(Label::new(file_name).size(LabelSize::Small))
- .when_some(file_path, |this, p| {
- this.child(
- Label::new(p)
- .size(LabelSize::Small)
- .color(Color::Muted),
- )
- }),
- )
- .child(
- Label::new(format!(
- "{} ago, {:.2?}",
- format_time_ago(
- completion.response_received_at.elapsed()
- ),
- completion.latency()
- ))
- .color(Color::Muted)
- .size(LabelSize::XSmall),
- ),
+ v_flex().child(
+ h_flex()
+ .gap_1()
+ .child(Label::new(file_name).size(LabelSize::Small))
+ .when_some(file_path, |this, p| {
+ this.child(
+ Label::new(p)
+ .size(LabelSize::Small)
+ .color(Color::Muted),
+ )
+ }),
+ ),
),
)
.tooltip(Tooltip::text(tooltip_text))
@@ -977,23 +965,6 @@ impl Focusable for RatePredictionsModal {
impl ModalView for RatePredictionsModal {}
-fn format_time_ago(elapsed: Duration) -> String {
- let seconds = elapsed.as_secs();
- if seconds < 120 {
- "1 minute".to_string()
- } else if seconds < 3600 {
- format!("{} minutes", seconds / 60)
- } else if seconds < 7200 {
- "1 hour".to_string()
- } else if seconds < 86400 {
- format!("{} hours", seconds / 3600)
- } else if seconds < 172800 {
- "1 day".to_string()
- } else {
- format!("{} days", seconds / 86400)
- }
-}
-
struct FeedbackCompletionProvider;
impl FeedbackCompletionProvider {