diff --git a/crates/edit_prediction/src/capture_example.rs b/crates/edit_prediction/src/capture_example.rs index 232081c579f3e0c01d33d04d1bfebdeb50621cfd..1d5557472f5e9cc79aceb28797bb028989a71aa4 100644 --- a/crates/edit_prediction/src/capture_example.rs +++ b/crates/edit_prediction/src/capture_example.rs @@ -1,16 +1,19 @@ use crate::{ - EditPredictionStore, StoredEvent, + EditPredictionExampleCaptureFeatureFlag, EditPredictionStore, StoredEvent, cursor_excerpt::editable_and_context_ranges_for_cursor_position, example_spec::ExampleSpec, }; use anyhow::Result; use buffer_diff::BufferDiffSnapshot; use collections::HashMap; +use feature_flags::FeatureFlagAppExt as _; use gpui::{App, Entity, Task}; use language::{Buffer, ToPoint as _}; use project::{Project, WorktreeId}; use std::{collections::hash_map, fmt::Write as _, path::Path, sync::Arc}; use text::BufferSnapshot as TextBufferSnapshot; +pub(crate) const DEFAULT_EXAMPLE_CAPTURE_RATE_PER_10K_PREDICTIONS: u16 = 10; + pub fn capture_example( project: Entity, buffer: Entity, @@ -189,6 +192,15 @@ fn generate_timestamp_name() -> String { } } +pub(crate) fn should_sample_edit_prediction_example_capture(cx: &App) -> bool { + let capture_rate = language::language_settings::all_language_settings(None, cx) + .edit_predictions + .example_capture_rate + .unwrap_or(DEFAULT_EXAMPLE_CAPTURE_RATE_PER_10K_PREDICTIONS); + cx.has_flag::() + && rand::random::() % 10_000 < capture_rate +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/edit_prediction/src/edit_prediction.rs b/crates/edit_prediction/src/edit_prediction.rs index 1c3035d178f86f172f0457ed08dfd4246a626783..1bd9bd8b57350cb8f0b97bb1c426a5051b5d9d95 100644 --- a/crates/edit_prediction/src/edit_prediction.rs +++ b/crates/edit_prediction/src/edit_prediction.rs @@ -68,6 +68,7 @@ pub mod zeta2; #[cfg(test)] mod edit_prediction_tests; +use crate::capture_example::should_sample_edit_prediction_example_capture; use crate::license_detection::LicenseDetectionWatcher; use crate::mercury::Mercury; use crate::onboarding_modal::ZedPredictModal; @@ -141,6 +142,16 @@ impl FeatureFlag for Zeta2FeatureFlag { } } +pub struct EditPredictionExampleCaptureFeatureFlag; + +impl FeatureFlag for EditPredictionExampleCaptureFeatureFlag { + const NAME: &'static str = "edit-prediction-example-capture"; + + fn enabled_for_staff() -> bool { + true + } +} + #[derive(Clone)] struct EditPredictionStoreGlobal(Entity); @@ -1628,6 +1639,26 @@ impl EditPredictionStore { debug_tx, }; + let can_collect_example = snapshot + .file() + .is_some_and(|file| self.can_collect_file(&project, file, cx)) + && self.can_collect_events(&inputs.events); + + if can_collect_example && should_sample_edit_prediction_example_capture(cx) { + if let Some(example_task) = capture_example::capture_example( + project.clone(), + active_buffer.clone(), + position, + cx, + ) { + cx.spawn(async move |_this, _cx| { + let example = example_task.await?; + telemetry::event!("Edit Prediction Example Captured", example = example); + anyhow::Ok(()) + }) + .detach_and_log_err(cx); + } + } let task = match self.edit_prediction_model { EditPredictionModel::Zeta1 => zeta1::request_prediction_with_zeta1(self, inputs, cx), EditPredictionModel::Zeta2 => zeta2::request_prediction_with_zeta2(self, inputs, cx), diff --git a/crates/language/src/language_settings.rs b/crates/language/src/language_settings.rs index cd2219b18ec8fda1d8783aaffa0917bfb4ddb041..e7129a5f1abad4fb7b1dbda6c36decc40c5e3153 100644 --- a/crates/language/src/language_settings.rs +++ b/crates/language/src/language_settings.rs @@ -393,6 +393,7 @@ pub struct EditPredictionSettings { /// This setting has no effect if globally disabled. pub enabled_in_text_threads: bool, pub examples_dir: Option>, + pub example_capture_rate: Option, } impl EditPredictionSettings { @@ -701,6 +702,7 @@ impl settings::Settings for AllLanguageSettings { codestral: codestral_settings, enabled_in_text_threads, examples_dir: edit_predictions.examples_dir, + example_capture_rate: edit_predictions.example_capture_rate, }, defaults: default_language_settings, languages, diff --git a/crates/settings/src/settings_content/language.rs b/crates/settings/src/settings_content/language.rs index 47e6da5d7b44d687c6c6cddbda8167efeb2e5b34..a23b9df1eede0121e29188f8d188595b6014dae2 100644 --- a/crates/settings/src/settings_content/language.rs +++ b/crates/settings/src/settings_content/language.rs @@ -169,6 +169,8 @@ pub struct EditPredictionSettingsContent { pub enabled_in_text_threads: Option, /// The directory where manually captured edit prediction examples are stored. pub examples_dir: Option>, + /// The number of edit prediction examples captured per ten thousand predictions. + pub example_capture_rate: Option, } #[with_fallible_options] diff --git a/docs/src/ai/ai-improvement.md b/docs/src/ai/ai-improvement.md index 24a19d206296338707ec10e56e5d251cea284971..565d872102d3d34541b6cffacebdee15960eb63b 100644 --- a/docs/src/ai/ai-improvement.md +++ b/docs/src/ai/ai-improvement.md @@ -93,6 +93,7 @@ For open source projects where you have opted-in, Zed may store copies of reques This data includes: +- sampled edit prediction examples (cursor context + recent diffs/edits) for offline evaluation - the edit prediction - a portion of the buffer content around the cursor - a few recent edits