Improve collection of edit prediction examples (#46010)

Max Brunsfeld created 6 days ago

Release Notes:

- N/A

Change summary

crates/edit_prediction/src/capture_example.rs    | 14 +++++++
crates/edit_prediction/src/edit_prediction.rs    | 31 ++++++++++++++++++
crates/language/src/language_settings.rs         |  2 +
crates/settings/src/settings_content/language.rs |  2 +
docs/src/ai/ai-improvement.md                    |  1 
5 files changed, 49 insertions(+), 1 deletion(-)

Detailed changes

crates/edit_prediction/src/capture_example.rs 🔗

@@ -1,16 +1,19 @@
 use crate::{
-    EditPredictionStore, StoredEvent,
+    EditPredictionExampleCaptureFeatureFlag, EditPredictionStore, StoredEvent,
     cursor_excerpt::editable_and_context_ranges_for_cursor_position, example_spec::ExampleSpec,
 };
 use anyhow::Result;
 use buffer_diff::BufferDiffSnapshot;
 use collections::HashMap;
+use feature_flags::FeatureFlagAppExt as _;
 use gpui::{App, Entity, Task};
 use language::{Buffer, ToPoint as _};
 use project::{Project, WorktreeId};
 use std::{collections::hash_map, fmt::Write as _, path::Path, sync::Arc};
 use text::BufferSnapshot as TextBufferSnapshot;
 
+pub(crate) const DEFAULT_EXAMPLE_CAPTURE_RATE_PER_10K_PREDICTIONS: u16 = 10;
+
 pub fn capture_example(
     project: Entity<Project>,
     buffer: Entity<Buffer>,
@@ -189,6 +192,15 @@ fn generate_timestamp_name() -> String {
     }
 }
 
+pub(crate) fn should_sample_edit_prediction_example_capture(cx: &App) -> bool {
+    let capture_rate = language::language_settings::all_language_settings(None, cx)
+        .edit_predictions
+        .example_capture_rate
+        .unwrap_or(DEFAULT_EXAMPLE_CAPTURE_RATE_PER_10K_PREDICTIONS);
+    cx.has_flag::<EditPredictionExampleCaptureFeatureFlag>()
+        && rand::random::<u16>() % 10_000 < capture_rate
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

crates/edit_prediction/src/edit_prediction.rs 🔗

@@ -68,6 +68,7 @@ pub mod zeta2;
 #[cfg(test)]
 mod edit_prediction_tests;
 
+use crate::capture_example::should_sample_edit_prediction_example_capture;
 use crate::license_detection::LicenseDetectionWatcher;
 use crate::mercury::Mercury;
 use crate::onboarding_modal::ZedPredictModal;
@@ -141,6 +142,16 @@ impl FeatureFlag for Zeta2FeatureFlag {
     }
 }
 
+pub struct EditPredictionExampleCaptureFeatureFlag;
+
+impl FeatureFlag for EditPredictionExampleCaptureFeatureFlag {
+    const NAME: &'static str = "edit-prediction-example-capture";
+
+    fn enabled_for_staff() -> bool {
+        true
+    }
+}
+
 #[derive(Clone)]
 struct EditPredictionStoreGlobal(Entity<EditPredictionStore>);
 
@@ -1628,6 +1639,26 @@ impl EditPredictionStore {
             debug_tx,
         };
 
+        let can_collect_example = snapshot
+            .file()
+            .is_some_and(|file| self.can_collect_file(&project, file, cx))
+            && self.can_collect_events(&inputs.events);
+
+        if can_collect_example && should_sample_edit_prediction_example_capture(cx) {
+            if let Some(example_task) = capture_example::capture_example(
+                project.clone(),
+                active_buffer.clone(),
+                position,
+                cx,
+            ) {
+                cx.spawn(async move |_this, _cx| {
+                    let example = example_task.await?;
+                    telemetry::event!("Edit Prediction Example Captured", example = example);
+                    anyhow::Ok(())
+                })
+                .detach_and_log_err(cx);
+            }
+        }
         let task = match self.edit_prediction_model {
             EditPredictionModel::Zeta1 => zeta1::request_prediction_with_zeta1(self, inputs, cx),
             EditPredictionModel::Zeta2 => zeta2::request_prediction_with_zeta2(self, inputs, cx),

crates/language/src/language_settings.rs 🔗

@@ -393,6 +393,7 @@ pub struct EditPredictionSettings {
     /// This setting has no effect if globally disabled.
     pub enabled_in_text_threads: bool,
     pub examples_dir: Option<Arc<Path>>,
+    pub example_capture_rate: Option<u16>,
 }
 
 impl EditPredictionSettings {
@@ -701,6 +702,7 @@ impl settings::Settings for AllLanguageSettings {
                 codestral: codestral_settings,
                 enabled_in_text_threads,
                 examples_dir: edit_predictions.examples_dir,
+                example_capture_rate: edit_predictions.example_capture_rate,
             },
             defaults: default_language_settings,
             languages,

crates/settings/src/settings_content/language.rs 🔗

@@ -169,6 +169,8 @@ pub struct EditPredictionSettingsContent {
     pub enabled_in_text_threads: Option<bool>,
     /// The directory where manually captured edit prediction examples are stored.
     pub examples_dir: Option<Arc<Path>>,
+    /// The number of edit prediction examples captured per ten thousand predictions.
+    pub example_capture_rate: Option<u16>,
 }
 
 #[with_fallible_options]

docs/src/ai/ai-improvement.md 🔗

@@ -93,6 +93,7 @@ For open source projects where you have opted-in, Zed may store copies of reques
 
 This data includes:
 
+- sampled edit prediction examples (cursor context + recent diffs/edits) for offline evaluation
 - the edit prediction
 - a portion of the buffer content around the cursor
 - a few recent edits