From a2fed658100743724b33961f8f72df289bccfed1 Mon Sep 17 00:00:00 2001 From: "zed-zippy[bot]" <234243425+zed-zippy[bot]@users.noreply.github.com> Date: Tue, 17 Feb 2026 12:13:54 +0000 Subject: [PATCH] editor: Improve `buffer_into_editor_highlights` performance (#49349) (cherry-pick to preview) (#49352) Cherry-pick of #49349 to preview ---- This brings the runtime down to logarithmic scaling of excerpts / linear scaling of tokens. Release Notes: - N/A *or* Added/Fixed/Improved ... Co-authored-by: Lukas Wirth --- crates/editor/src/semantic_tokens.rs | 51 +++---- crates/multi_buffer/src/multi_buffer.rs | 127 ++++++++++++++++-- .../project/src/lsp_store/semantic_tokens.rs | 12 ++ 3 files changed, 155 insertions(+), 35 deletions(-) diff --git a/crates/editor/src/semantic_tokens.rs b/crates/editor/src/semantic_tokens.rs index fc309f38568e8e0ba21f99d62c616beb00aa097f..14e18632bfdb8d8fc815f685db41bf79c3934cc3 100644 --- a/crates/editor/src/semantic_tokens.rs +++ b/crates/editor/src/semantic_tokens.rs @@ -5,6 +5,7 @@ use futures::future::join_all; use gpui::{ App, Context, FontStyle, FontWeight, HighlightStyle, StrikethroughStyle, Task, UnderlineStyle, }; +use itertools::Itertools; use language::language_settings::language_settings; use project::{ lsp_store::{ @@ -227,7 +228,6 @@ impl Editor { return; } let multi_buffer_snapshot = editor.buffer().read(cx).snapshot(cx); - let all_excerpts = editor.buffer().read(cx).excerpt_ids(); for (buffer_id, query_version, tokens) in all_semantic_tokens { let tokens = match tokens { @@ -281,7 +281,6 @@ impl Editor { token_highlights.extend(buffer_into_editor_highlights( &server_tokens, stylizer, - &all_excerpts, &multi_buffer_snapshot, &mut interner, cx, @@ -312,34 +311,36 @@ impl Editor { fn buffer_into_editor_highlights<'a, 'b>( buffer_tokens: &'a [BufferSemanticToken], stylizer: &'a SemanticTokenStylizer, - all_excerpts: &'a [multi_buffer::ExcerptId], multi_buffer_snapshot: &'a multi_buffer::MultiBufferSnapshot, interner: &'b mut HighlightStyleInterner, cx: &'a App, ) -> impl Iterator + use<'a, 'b> { - buffer_tokens.iter().filter_map(|token| { - let multi_buffer_start = all_excerpts.iter().find_map(|&excerpt_id| { - multi_buffer_snapshot.anchor_in_excerpt(excerpt_id, token.range.start) - })?; - let multi_buffer_end = all_excerpts.iter().find_map(|&excerpt_id| { - multi_buffer_snapshot.anchor_in_excerpt(excerpt_id, token.range.end) - })?; - - let style = convert_token( - stylizer, - cx.theme().syntax(), - token.token_type, - token.token_modifiers, - )?; - let style = interner.intern(style); - Some(SemanticTokenHighlight { - range: multi_buffer_start..multi_buffer_end, - style, - token_type: token.token_type, - token_modifiers: token.token_modifiers, - server_id: stylizer.server_id(), + multi_buffer_snapshot + .text_anchors_to_visible_anchors( + buffer_tokens + .iter() + .flat_map(|token| [token.range.start, token.range.end]), + ) + .into_iter() + .tuples::<(_, _)>() + .zip(buffer_tokens) + .filter_map(|((multi_buffer_start, multi_buffer_end), token)| { + let range = multi_buffer_start?..multi_buffer_end?; + let style = convert_token( + stylizer, + cx.theme().syntax(), + token.token_type, + token.token_modifiers, + )?; + let style = interner.intern(style); + Some(SemanticTokenHighlight { + range, + style, + token_type: token.token_type, + token_modifiers: token.token_modifiers, + server_id: stylizer.server_id(), + }) }) - }) } fn convert_token( diff --git a/crates/multi_buffer/src/multi_buffer.rs b/crates/multi_buffer/src/multi_buffer.rs index 4e7210fa49a642a2e7a3710d0792f4f8f741add0..a5266874a0d24c61816212ab7fa100c01d458567 100644 --- a/crates/multi_buffer/src/multi_buffer.rs +++ b/crates/multi_buffer/src/multi_buffer.rs @@ -515,6 +515,7 @@ struct BufferState { buffer: Entity, last_version: RefCell, last_non_text_state_update_count: Cell, + // Note, any changes to this field value require updating snapshot.buffer_locators as well excerpts: Vec, _subscriptions: [gpui::Subscription; 2], } @@ -606,18 +607,18 @@ impl DiffState { #[derive(Clone, Default)] pub struct MultiBufferSnapshot { excerpts: SumTree, + buffer_locators: TreeMap>, diffs: TreeMap, diff_transforms: SumTree, + excerpt_ids: SumTree, + replaced_excerpts: TreeMap, non_text_state_update_count: usize, edit_count: usize, is_dirty: bool, has_deleted_file: bool, has_conflict: bool, has_inverted_diff: bool, - /// immutable fields singleton: bool, - excerpt_ids: SumTree, - replaced_excerpts: TreeMap, trailing_excerpt_update_count: usize, all_diff_hunks_expanded: bool, show_deleted_hunks: bool, @@ -1202,7 +1203,7 @@ impl MultiBuffer { } Self { snapshot: RefCell::new(self.snapshot.borrow().clone()), - buffers: buffers, + buffers, excerpts_by_path: Default::default(), paths_by_excerpt: Default::default(), diffs: diff_bases, @@ -1878,6 +1879,9 @@ impl MultiBuffer { } new_excerpt_ids.push(ExcerptIdMapping { id, locator }, ()); } + snapshot + .buffer_locators + .insert(buffer_id, buffer_state.excerpts.iter().cloned().collect()); let edit_end = ExcerptDimension(new_excerpts.summary().text.len); @@ -1922,6 +1926,7 @@ impl MultiBuffer { self.paths_by_excerpt.clear(); let MultiBufferSnapshot { excerpts, + buffer_locators, diffs: _, diff_transforms: _, non_text_state_update_count: _, @@ -1939,6 +1944,7 @@ impl MultiBuffer { use_extended_diff_range: _, show_headers: _, } = self.snapshot.get_mut(); + buffer_locators.clear(); let start = ExcerptDimension(MultiBufferOffset::ZERO); let prev_len = ExcerptDimension(excerpts.summary().text.len); *excerpts = Default::default(); @@ -1979,8 +1985,8 @@ impl MultiBuffer { let mut excerpts = Vec::new(); let snapshot = self.read(cx); let mut cursor = snapshot.excerpts.cursor::>(()); - if let Some(locators) = self.buffers.get(&buffer_id).map(|state| &state.excerpts) { - for locator in locators { + if let Some(locators) = snapshot.buffer_locators.get(&buffer_id) { + for locator in &**locators { cursor.seek_forward(&Some(locator), Bias::Left); if let Some(excerpt) = cursor.item() && excerpt.locator == *locator @@ -2002,11 +2008,11 @@ impl MultiBuffer { .diff_transforms .cursor::>>(()); diff_transforms.next(); - let locators = self - .buffers + let locators = snapshot + .buffer_locators .get(&buffer_id) .into_iter() - .flat_map(|state| &state.excerpts); + .flat_map(|v| &**v); let mut result = Vec::new(); for locator in locators { excerpts.seek_forward(&Some(locator), Bias::Left); @@ -2111,6 +2117,7 @@ impl MultiBuffer { pub fn buffer_point_to_anchor( &self, + // todo(lw): We shouldn't need this? buffer: &Entity, point: Point, cx: &App, @@ -2140,6 +2147,7 @@ impl MultiBuffer { pub fn buffer_anchor_to_anchor( &self, + // todo(lw): We shouldn't need this? buffer: &Entity, anchor: text::Anchor, cx: &App, @@ -2176,6 +2184,7 @@ impl MultiBuffer { let mut edits = Vec::new(); let mut excerpt_ids = ids.iter().copied().peekable(); let mut removed_buffer_ids = Vec::new(); + let mut removed_excerpts_for_buffers = HashSet::default(); while let Some(excerpt_id) = excerpt_ids.next() { self.paths_by_excerpt.remove(&excerpt_id); @@ -2192,6 +2201,7 @@ impl MultiBuffer { // Skip over the removed excerpt. 'remove_excerpts: loop { if let Some(buffer_state) = self.buffers.get_mut(&excerpt.buffer_id) { + removed_excerpts_for_buffers.insert(excerpt.buffer_id); buffer_state.excerpts.retain(|l| l != &excerpt.locator); if buffer_state.excerpts.is_empty() { log::debug!( @@ -2239,6 +2249,18 @@ impl MultiBuffer { let changed_trailing_excerpt = suffix.is_empty(); new_excerpts.append(suffix, ()); drop(cursor); + for buffer_id in removed_excerpts_for_buffers { + match self.buffers.get(&buffer_id) { + Some(buffer_state) => { + snapshot + .buffer_locators + .insert(buffer_id, buffer_state.excerpts.iter().cloned().collect()); + } + None => { + snapshot.buffer_locators.remove(&buffer_id); + } + } + } snapshot.excerpts = new_excerpts; for buffer_id in &removed_buffer_ids { self.diffs.remove(buffer_id); @@ -2993,6 +3015,7 @@ impl MultiBuffer { ) -> Vec> { let MultiBufferSnapshot { excerpts, + buffer_locators: _, diffs: buffer_diff, diff_transforms: _, non_text_state_update_count, @@ -4017,6 +4040,90 @@ impl MultiBufferSnapshot { .map(|excerpt| excerpt.buffer_id) } + /// Resolves the given [`text::Anchor`]s to [`crate::Anchor`]s if the anchor is within a visible excerpt. + /// + /// The passed in anchors must be ordered. + pub fn text_anchors_to_visible_anchors( + &self, + anchors: impl IntoIterator, + ) -> Vec> { + let mut cursor = self.excerpts.cursor::>(()); + let mut anchors = anchors.into_iter().peekable(); + let mut result = Vec::new(); + 'anchors: while let Some(anchor) = anchors.peek() { + let Some(buffer_id) = anchor.buffer_id else { + anchors.next(); + result.push(None); + continue 'anchors; + }; + let mut same_buffer_anchors = + anchors.peeking_take_while(|a| a.buffer_id.is_some_and(|b| buffer_id == b)); + + if let Some(locators) = self.buffer_locators.get(&buffer_id) { + let Some(mut next) = same_buffer_anchors.next() else { + continue 'anchors; + }; + 'excerpts: for locator in locators.iter() { + if cursor.seek_forward(&Some(locator), Bias::Left) + && let Some(excerpt) = cursor.item() + { + loop { + // anchor is before the first excerpt + if excerpt + .range + .context + .start + .cmp(&next, &excerpt.buffer) + .is_gt() + { + // so we skip it and try the next anchor + result.push(None); + match same_buffer_anchors.next() { + Some(anchor) => next = anchor, + None => continue 'anchors, + } + // anchor is within the excerpt + } else if excerpt + .range + .context + .end + .cmp(&next, &excerpt.buffer) + .is_ge() + { + // record it and all following anchors that are within + result.push(Some(Anchor::in_buffer(excerpt.id, next))); + result.extend( + same_buffer_anchors + .peeking_take_while(|a| { + excerpt + .range + .context + .end + .cmp(a, &excerpt.buffer) + .is_ge() + }) + .map(|a| Some(Anchor::in_buffer(excerpt.id, a))), + ); + match same_buffer_anchors.next() { + Some(anchor) => next = anchor, + None => continue 'anchors, + } + // anchor is after the excerpt, try the next one + } else { + continue 'excerpts; + } + } + } + } + // account for `next` + result.push(None); + } + result.extend(same_buffer_anchors.map(|_| None)); + } + + result + } + pub fn ranges_to_buffer_ranges( &self, ranges: impl Iterator>, @@ -5649,7 +5756,7 @@ impl MultiBufferSnapshot { } } - /// Wraps the [`text::Anchor`] in a [`multi_buffer::Anchor`] if this multi-buffer is a singleton. + /// Wraps the [`text::Anchor`] in a [`crate::Anchor`] if this multi-buffer is a singleton. pub fn as_singleton_anchor(&self, text_anchor: text::Anchor) -> Option { let (excerpt, buffer, _) = self.as_singleton()?; if text_anchor.buffer_id.is_none_or(|id| id == buffer) { diff --git a/crates/project/src/lsp_store/semantic_tokens.rs b/crates/project/src/lsp_store/semantic_tokens.rs index 46434d7fe69af25ad3dd12e435b635e3a58d1d91..7c110c72ae713f57040488b8d2e5ffe816e6e631 100644 --- a/crates/project/src/lsp_store/semantic_tokens.rs +++ b/crates/project/src/lsp_store/semantic_tokens.rs @@ -414,6 +414,9 @@ pub struct TokenType(pub u32); #[derive(Debug, Clone)] pub struct BufferSemanticToken { + /// The range of the token in the buffer. + /// + /// Guaranteed to contain a buffer id. pub range: Range, pub token_type: TokenType, pub token_modifiers: u32, @@ -525,6 +528,7 @@ async fn raw_to_buffer_semantic_tokens( ) -> HashMap> { let mut res = HashMap::default(); for (&server_id, server_tokens) in &raw_tokens.servers { + let mut last = 0; // We don't do `collect` here due to the filter map not pre-allocating // we'd rather over allocate here than not since we have to re-allocate into an arc slice anyways let mut buffer_tokens = Vec::with_capacity(server_tokens.data.len() / 5); @@ -543,7 +547,15 @@ async fn raw_to_buffer_semantic_tokens( let start = buffer_snapshot .as_rope() .offset_utf16_to_offset(start_offset); + if start < last { + return None; + } + let end = buffer_snapshot.as_rope().offset_utf16_to_offset(end_offset); + if end < last { + return None; + } + last = end; if start == end { return None;