Omit large edits from ep history (#51938)

Ben Kunkle created

## Context

We're seeing issues where large edits (think generated, agentically or
otherwise) or edits in files with very large lines cause ep requests to
fail until they are flushed from the history due to the request body
exceeding the endpoint size limit. These edits are large enough they
would be omitted from the final prompt anyway due to budgeting so it is
safe to drop them client side.

## How to Review

<!-- Help reviewers focus their attention:
- For small PRs: note what to focus on (e.g., "error handling in
foo.rs")
- For large PRs (>400 LOC): provide a guided tour — numbered list of
files/commits to read in order. (The `large-pr` label is applied
automatically.)
     - See the review process guidelines for comment conventions -->

## Self-Review Checklist

<!-- Check before requesting review: -->
- [x] I've reviewed my own diff for quality, security, and reliability
- [x] Unsafe blocks (if any) have justifying comments
- [x] The content is consistent with the [UI/UX
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)
- [x] Tests cover the new/changed behavior
- [x] Performance impact has been considered and is acceptable

Release Notes:

- Fixed an issue where large changes to buffers, or edits in buffers
with extremely long lines would cause edit prediction requests to fail

Change summary

crates/edit_prediction/src/edit_prediction.rs       | 23 ++++
crates/edit_prediction/src/edit_prediction_tests.rs | 75 +++++++++++++++
2 files changed, 98 insertions(+)

Detailed changes

crates/edit_prediction/src/edit_prediction.rs 🔗

@@ -102,6 +102,7 @@ actions!(
 /// Maximum number of events to track.
 const EVENT_COUNT_MAX: usize = 10;
 const CHANGE_GROUPING_LINE_SPAN: u32 = 8;
+const EDIT_HISTORY_DIFF_SIZE_LIMIT: usize = 2048 * 3; // ~2048 tokens or ~50% of typical prompt budget
 const COLLABORATOR_EDIT_LOCALITY_CONTEXT_TOKENS: usize = 512;
 const LAST_CHANGE_GROUPING_TIME: Duration = Duration::from_secs(1);
 const ZED_PREDICT_DATA_COLLECTION_CHOICE: &str = "zed_predict_data_collection_choice";
@@ -724,6 +725,12 @@ fn compute_diff_between_snapshots_in_range(
     let old_edit_range = old_start_line_offset..old_end_line_offset;
     let new_edit_range = new_start_line_offset..new_end_line_offset;
 
+    if new_edit_range.len() > EDIT_HISTORY_DIFF_SIZE_LIMIT
+        || old_edit_range.len() > EDIT_HISTORY_DIFF_SIZE_LIMIT
+    {
+        return None;
+    }
+
     let old_region_text: String = old_snapshot.text_for_range(old_edit_range).collect();
     let new_region_text: String = new_snapshot.text_for_range(new_edit_range).collect();
 
@@ -1410,8 +1417,24 @@ impl EditPredictionStore {
             return;
         }
 
+        let is_recordable_history_edit =
+            compute_diff_between_snapshots_in_range(&old_snapshot, &new_snapshot, &edit_range)
+                .is_some();
+
         let events = &mut project_state.events;
 
+        if !is_recordable_history_edit {
+            if let Some(event) = project_state.last_event.take() {
+                if let Some(event) = event.finalize(&project_state.license_detection_watchers, cx) {
+                    if events.len() + 1 >= EVENT_COUNT_MAX {
+                        events.pop_front();
+                    }
+                    events.push_back(event);
+                }
+            }
+            return;
+        }
+
         if let Some(last_event) = project_state.last_event.as_mut() {
             let is_next_snapshot_of_same_buffer = old_snapshot.remote_id()
                 == last_event.new_snapshot.remote_id()

crates/edit_prediction/src/edit_prediction_tests.rs 🔗

@@ -1012,6 +1012,81 @@ async fn test_irrelevant_collaborator_edits_in_different_files_are_omitted_from_
     assert!(events.is_empty());
 }
 
+#[gpui::test]
+async fn test_large_edits_are_omitted_from_history(cx: &mut TestAppContext) {
+    let (ep_store, _requests) = init_test_with_fake_client(cx);
+    let fs = FakeFs::new(cx.executor());
+    fs.insert_tree(
+        "/root",
+        json!({
+            "foo.rs": (0..20)
+                .map(|i| format!("line {i}\n"))
+                .collect::<String>()
+        }),
+    )
+    .await;
+    let project = Project::test(fs, vec![path!("/root").as_ref()], cx).await;
+
+    let buffer = project
+        .update(cx, |project, cx| {
+            let path = project.find_project_path(path!("root/foo.rs"), cx).unwrap();
+            project.set_active_path(Some(path.clone()), cx);
+            project.open_buffer(path, cx)
+        })
+        .await
+        .unwrap();
+
+    let cursor = buffer.read_with(cx, |buffer, _cx| buffer.anchor_before(Point::new(1, 0)));
+
+    ep_store.update(cx, |ep_store, cx| {
+        ep_store.register_buffer(&buffer, &project, cx);
+        let _ = ep_store.prediction_at(&buffer, Some(cursor), &project, cx);
+    });
+
+    buffer.update(cx, |buffer, cx| {
+        buffer.edit(vec![(0..6, "LOCAL ZERO")], None, cx);
+    });
+
+    let (collaborator, mut collaborator_version) = make_collaborator_replica(&buffer, cx);
+
+    let (line_three_start, line_three_len) = collaborator.read_with(cx, |buffer, _cx| {
+        (Point::new(3, 0).to_offset(buffer), buffer.line_len(3))
+    });
+    let large_edit = "X".repeat(EDIT_HISTORY_DIFF_SIZE_LIMIT + 1);
+
+    apply_collaborator_edit(
+        &collaborator,
+        &buffer,
+        &mut collaborator_version,
+        line_three_start..line_three_start + line_three_len as usize,
+        &large_edit,
+        cx,
+    )
+    .await;
+
+    buffer.update(cx, |buffer, cx| {
+        let line_seven_start = Point::new(7, 0).to_offset(buffer);
+        let line_seven_end = Point::new(7, 6).to_offset(buffer);
+        buffer.edit(
+            vec![(line_seven_start..line_seven_end, "LOCAL SEVEN")],
+            None,
+            cx,
+        );
+    });
+
+    let events = ep_store.update(cx, |ep_store, cx| {
+        ep_store.edit_history_for_project(&project, cx)
+    });
+
+    let rendered_events = render_events_with_predicted(&events);
+
+    assert_eq!(rendered_events.len(), 2);
+    assert!(rendered_events[0].contains("+LOCAL ZERO"));
+    assert!(!rendered_events[0].contains(&large_edit));
+    assert!(rendered_events[1].contains("+LOCAL SEVEN"));
+    assert!(!rendered_events[1].contains(&large_edit));
+}
+
 #[gpui::test]
 async fn test_predicted_flag_coalescing(cx: &mut TestAppContext) {
     let (ep_store, _requests) = init_test_with_fake_client(cx);