From 1c80e273e8b2cfaccd5520630bd5fdf3391ff7ec Mon Sep 17 00:00:00 2001 From: Ben Kunkle Date: Thu, 19 Mar 2026 09:21:52 -0500 Subject: [PATCH] Omit large edits from ep history (#51938) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Context We're seeing issues where large edits (think generated, agentically or otherwise) or edits in files with very large lines cause ep requests to fail until they are flushed from the history due to the request body exceeding the endpoint size limit. These edits are large enough they would be omitted from the final prompt anyway due to budgeting so it is safe to drop them client side. ## How to Review ## Self-Review Checklist - [x] I've reviewed my own diff for quality, security, and reliability - [x] Unsafe blocks (if any) have justifying comments - [x] The content is consistent with the [UI/UX checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist) - [x] Tests cover the new/changed behavior - [x] Performance impact has been considered and is acceptable Release Notes: - Fixed an issue where large changes to buffers, or edits in buffers with extremely long lines would cause edit prediction requests to fail --- crates/edit_prediction/src/edit_prediction.rs | 23 ++++++ .../src/edit_prediction_tests.rs | 75 +++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/crates/edit_prediction/src/edit_prediction.rs b/crates/edit_prediction/src/edit_prediction.rs index da2392d757675bb1993d2dacdf963fe24aa91430..bd1bc7a3303a6f80094fd8261e90a2c5e113803d 100644 --- a/crates/edit_prediction/src/edit_prediction.rs +++ b/crates/edit_prediction/src/edit_prediction.rs @@ -102,6 +102,7 @@ actions!( /// Maximum number of events to track. const EVENT_COUNT_MAX: usize = 10; const CHANGE_GROUPING_LINE_SPAN: u32 = 8; +const EDIT_HISTORY_DIFF_SIZE_LIMIT: usize = 2048 * 3; // ~2048 tokens or ~50% of typical prompt budget const COLLABORATOR_EDIT_LOCALITY_CONTEXT_TOKENS: usize = 512; const LAST_CHANGE_GROUPING_TIME: Duration = Duration::from_secs(1); const ZED_PREDICT_DATA_COLLECTION_CHOICE: &str = "zed_predict_data_collection_choice"; @@ -724,6 +725,12 @@ fn compute_diff_between_snapshots_in_range( let old_edit_range = old_start_line_offset..old_end_line_offset; let new_edit_range = new_start_line_offset..new_end_line_offset; + if new_edit_range.len() > EDIT_HISTORY_DIFF_SIZE_LIMIT + || old_edit_range.len() > EDIT_HISTORY_DIFF_SIZE_LIMIT + { + return None; + } + let old_region_text: String = old_snapshot.text_for_range(old_edit_range).collect(); let new_region_text: String = new_snapshot.text_for_range(new_edit_range).collect(); @@ -1410,8 +1417,24 @@ impl EditPredictionStore { return; } + let is_recordable_history_edit = + compute_diff_between_snapshots_in_range(&old_snapshot, &new_snapshot, &edit_range) + .is_some(); + let events = &mut project_state.events; + if !is_recordable_history_edit { + if let Some(event) = project_state.last_event.take() { + if let Some(event) = event.finalize(&project_state.license_detection_watchers, cx) { + if events.len() + 1 >= EVENT_COUNT_MAX { + events.pop_front(); + } + events.push_back(event); + } + } + return; + } + if let Some(last_event) = project_state.last_event.as_mut() { let is_next_snapshot_of_same_buffer = old_snapshot.remote_id() == last_event.new_snapshot.remote_id() diff --git a/crates/edit_prediction/src/edit_prediction_tests.rs b/crates/edit_prediction/src/edit_prediction_tests.rs index 257dd255cbb6ae3a2ddb02019e56886d41676de2..7583ba629bc2c490c5f8e8dd83218c200025fe7c 100644 --- a/crates/edit_prediction/src/edit_prediction_tests.rs +++ b/crates/edit_prediction/src/edit_prediction_tests.rs @@ -1012,6 +1012,81 @@ async fn test_irrelevant_collaborator_edits_in_different_files_are_omitted_from_ assert!(events.is_empty()); } +#[gpui::test] +async fn test_large_edits_are_omitted_from_history(cx: &mut TestAppContext) { + let (ep_store, _requests) = init_test_with_fake_client(cx); + let fs = FakeFs::new(cx.executor()); + fs.insert_tree( + "/root", + json!({ + "foo.rs": (0..20) + .map(|i| format!("line {i}\n")) + .collect::() + }), + ) + .await; + let project = Project::test(fs, vec![path!("/root").as_ref()], cx).await; + + let buffer = project + .update(cx, |project, cx| { + let path = project.find_project_path(path!("root/foo.rs"), cx).unwrap(); + project.set_active_path(Some(path.clone()), cx); + project.open_buffer(path, cx) + }) + .await + .unwrap(); + + let cursor = buffer.read_with(cx, |buffer, _cx| buffer.anchor_before(Point::new(1, 0))); + + ep_store.update(cx, |ep_store, cx| { + ep_store.register_buffer(&buffer, &project, cx); + let _ = ep_store.prediction_at(&buffer, Some(cursor), &project, cx); + }); + + buffer.update(cx, |buffer, cx| { + buffer.edit(vec![(0..6, "LOCAL ZERO")], None, cx); + }); + + let (collaborator, mut collaborator_version) = make_collaborator_replica(&buffer, cx); + + let (line_three_start, line_three_len) = collaborator.read_with(cx, |buffer, _cx| { + (Point::new(3, 0).to_offset(buffer), buffer.line_len(3)) + }); + let large_edit = "X".repeat(EDIT_HISTORY_DIFF_SIZE_LIMIT + 1); + + apply_collaborator_edit( + &collaborator, + &buffer, + &mut collaborator_version, + line_three_start..line_three_start + line_three_len as usize, + &large_edit, + cx, + ) + .await; + + buffer.update(cx, |buffer, cx| { + let line_seven_start = Point::new(7, 0).to_offset(buffer); + let line_seven_end = Point::new(7, 6).to_offset(buffer); + buffer.edit( + vec![(line_seven_start..line_seven_end, "LOCAL SEVEN")], + None, + cx, + ); + }); + + let events = ep_store.update(cx, |ep_store, cx| { + ep_store.edit_history_for_project(&project, cx) + }); + + let rendered_events = render_events_with_predicted(&events); + + assert_eq!(rendered_events.len(), 2); + assert!(rendered_events[0].contains("+LOCAL ZERO")); + assert!(!rendered_events[0].contains(&large_edit)); + assert!(rendered_events[1].contains("+LOCAL SEVEN")); + assert!(!rendered_events[1].contains(&large_edit)); +} + #[gpui::test] async fn test_predicted_flag_coalescing(cx: &mut TestAppContext) { let (ep_store, _requests) = init_test_with_fake_client(cx);