ep: Include more context (#50533)

Ben Kunkle and Max created

Closes #ISSUE

Before you mark this PR as ready for review, make sure that you have:
- [ ] Added a solid test coverage and/or screenshots from doing manual
testing
- [ ] Done a self-review taking into account security and performance
aspects
- [ ] Aligned any UI changes with the [UI
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)

Release Notes:

- N/A *or* Added/Fixed/Improved ...

---------

Co-authored-by: Max <max@zed.dev>

Change summary

crates/edit_prediction/src/cursor_excerpt.rs  | 29 +++++++++++------
crates/edit_prediction/src/edit_prediction.rs | 35 +++++++++++++++++++-
crates/edit_prediction/src/zeta.rs            | 10 +----
crates/zeta_prompt/src/zeta_prompt.rs         |  6 +++
4 files changed, 60 insertions(+), 20 deletions(-)

Detailed changes

crates/edit_prediction/src/cursor_excerpt.rs 🔗

@@ -13,6 +13,7 @@ pub fn compute_excerpt_ranges(
     let editable_150 = compute_editable_range(snapshot, position, 150);
     let editable_180 = compute_editable_range(snapshot, position, 180);
     let editable_350 = compute_editable_range(snapshot, position, 350);
+    let editable_512 = compute_editable_range(snapshot, position, 512);
 
     let editable_150_context_350 =
         expand_context_syntactically_then_linewise(snapshot, editable_150.clone(), 350);
@@ -20,17 +21,20 @@ pub fn compute_excerpt_ranges(
         expand_context_syntactically_then_linewise(snapshot, editable_180.clone(), 350);
     let editable_350_context_150 =
         expand_context_syntactically_then_linewise(snapshot, editable_350.clone(), 150);
+    let editable_350_context_512 =
+        expand_context_syntactically_then_linewise(snapshot, editable_350.clone(), 512);
+    let editable_350_context_1024 =
+        expand_context_syntactically_then_linewise(snapshot, editable_350.clone(), 1024);
+    let context_4096 = expand_context_syntactically_then_linewise(
+        snapshot,
+        editable_350_context_1024.clone(),
+        4096 - 1024,
+    );
+    let context_8192 =
+        expand_context_syntactically_then_linewise(snapshot, context_4096.clone(), 8192 - 4096);
 
-    let full_start_row = editable_150_context_350
-        .start
-        .row
-        .min(editable_180_context_350.start.row)
-        .min(editable_350_context_150.start.row);
-    let full_end_row = editable_150_context_350
-        .end
-        .row
-        .max(editable_180_context_350.end.row)
-        .max(editable_350_context_150.end.row);
+    let full_start_row = context_8192.start.row;
+    let full_end_row = context_8192.end.row;
 
     let full_context =
         Point::new(full_start_row, 0)..Point::new(full_end_row, snapshot.line_len(full_end_row));
@@ -47,9 +51,14 @@ pub fn compute_excerpt_ranges(
         editable_150: to_offset(&editable_150),
         editable_180: to_offset(&editable_180),
         editable_350: to_offset(&editable_350),
+        editable_512: Some(to_offset(&editable_512)),
         editable_150_context_350: to_offset(&editable_150_context_350),
         editable_180_context_350: to_offset(&editable_180_context_350),
         editable_350_context_150: to_offset(&editable_350_context_150),
+        editable_350_context_512: Some(to_offset(&editable_350_context_512)),
+        editable_350_context_1024: Some(to_offset(&editable_350_context_1024)),
+        context_4096: Some(to_offset(&context_4096)),
+        context_8192: Some(to_offset(&context_8192)),
     };
 
     (full_context, full_context_offset_range, ranges)

crates/edit_prediction/src/edit_prediction.rs 🔗

@@ -173,6 +173,8 @@ pub struct EditPredictionModelInput {
     trigger: PredictEditsRequestTrigger,
     diagnostic_search_range: Range<Point>,
     debug_tx: Option<mpsc::UnboundedSender<DebugEvent>>,
+    can_collect_data: bool,
+    is_open_source: bool,
     pub user_actions: Vec<UserActionRecord>,
 }
 
@@ -2060,7 +2062,7 @@ impl EditPredictionStore {
         let stored_events = project_state.events(cx);
         let has_events = !stored_events.is_empty();
         let events: Vec<Arc<zeta_prompt::Event>> =
-            stored_events.into_iter().map(|e| e.event).collect();
+            stored_events.iter().map(|e| e.event.clone()).collect();
         let debug_tx = project_state.debug_tx.clone();
 
         let snapshot = active_buffer.read(cx).snapshot();
@@ -2094,9 +2096,23 @@ impl EditPredictionStore {
 
         let related_files = self.context_for_project(&project, cx);
 
+        let is_open_source = snapshot
+            .file()
+            .map_or(false, |file| self.is_file_open_source(&project, file, cx))
+            && events.iter().all(|event| event.in_open_source_repo())
+            && related_files.iter().all(|file| file.in_open_source_repo);
+
+        let can_collect_data = !cfg!(test)
+            && is_open_source
+            && self.is_data_collection_enabled(cx)
+            && matches!(
+                self.edit_prediction_model,
+                EditPredictionModel::Zeta1 | EditPredictionModel::Zeta2
+            );
+
         let inputs = EditPredictionModelInput {
             project: project.clone(),
-            buffer: active_buffer,
+            buffer: active_buffer.clone(),
             snapshot: snapshot,
             position,
             events,
@@ -2106,8 +2122,23 @@ impl EditPredictionStore {
             diagnostic_search_range: diagnostic_search_range,
             debug_tx,
             user_actions,
+            can_collect_data,
+            is_open_source,
         };
 
+        if can_collect_data && rand::random_ratio(1, 1000) {
+            if let Some(task) = capture_example(
+                project.clone(),
+                active_buffer,
+                position,
+                stored_events,
+                false,
+                cx,
+            ) {
+                task.detach();
+            }
+        }
+
         let task = match self.edit_prediction_model {
             EditPredictionModel::Zeta1 => zeta::request_prediction_with_zeta(
                 self,

crates/edit_prediction/src/zeta.rs 🔗

@@ -35,6 +35,8 @@ pub fn request_prediction_with_zeta(
         debug_tx,
         trigger,
         project,
+        can_collect_data,
+        is_open_source,
         ..
     }: EditPredictionModelInput,
     preferred_model: Option<EditPredictionModelKind>,
@@ -63,14 +65,6 @@ pub fn request_prediction_with_zeta(
     let llm_token = store.llm_token.clone();
     let app_version = AppVersion::global(cx);
 
-    let is_open_source = snapshot
-        .file()
-        .map_or(false, |file| store.is_file_open_source(&project, file, cx))
-        && events.iter().all(|event| event.in_open_source_repo())
-        && related_files.iter().all(|file| file.in_open_source_repo);
-
-    let can_collect_data = is_open_source && store.is_data_collection_enabled(cx);
-
     let request_task = cx.background_spawn({
         async move {
             let zeta_version = raw_config

crates/zeta_prompt/src/zeta_prompt.rs 🔗

@@ -36,12 +36,18 @@ pub struct ExcerptRanges {
     pub editable_180: Range<usize>,
     /// Editable region computed with a 350-token budget.
     pub editable_350: Range<usize>,
+    /// Editable region computed with a 350-token budget.
+    pub editable_512: Option<Range<usize>>,
     /// Context boundary when using editable_150 with 350 tokens of additional context.
     pub editable_150_context_350: Range<usize>,
     /// Context boundary when using editable_180 with 350 tokens of additional context.
     pub editable_180_context_350: Range<usize>,
     /// Context boundary when using editable_350 with 150 tokens of additional context.
     pub editable_350_context_150: Range<usize>,
+    pub editable_350_context_512: Option<Range<usize>>,
+    pub editable_350_context_1024: Option<Range<usize>>,
+    pub context_4096: Option<Range<usize>>,
+    pub context_8192: Option<Range<usize>>,
 }
 
 #[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]