editor: Improve `buffer_into_editor_highlights` performance (#49349) (cherry-pick to preview) (#49352)

zed-zippy[bot] and Lukas Wirth created

Cherry-pick of #49349 to preview

----
This brings the runtime down to logarithmic scaling of excerpts / linear
scaling of tokens.

Release Notes:

- N/A *or* Added/Fixed/Improved ...

Co-authored-by: Lukas Wirth <lukas@zed.dev>

Change summary

crates/editor/src/semantic_tokens.rs            |  51 +++---
crates/multi_buffer/src/multi_buffer.rs         | 127 +++++++++++++++++-
crates/project/src/lsp_store/semantic_tokens.rs |  12 +
3 files changed, 155 insertions(+), 35 deletions(-)

Detailed changes

crates/editor/src/semantic_tokens.rs 🔗

@@ -5,6 +5,7 @@ use futures::future::join_all;
 use gpui::{
     App, Context, FontStyle, FontWeight, HighlightStyle, StrikethroughStyle, Task, UnderlineStyle,
 };
+use itertools::Itertools;
 use language::language_settings::language_settings;
 use project::{
     lsp_store::{
@@ -227,7 +228,6 @@ impl Editor {
                     return;
                 }
                 let multi_buffer_snapshot = editor.buffer().read(cx).snapshot(cx);
-                let all_excerpts = editor.buffer().read(cx).excerpt_ids();
 
                 for (buffer_id, query_version, tokens) in all_semantic_tokens {
                     let tokens = match tokens {
@@ -281,7 +281,6 @@ impl Editor {
                                 token_highlights.extend(buffer_into_editor_highlights(
                                     &server_tokens,
                                     stylizer,
-                                    &all_excerpts,
                                     &multi_buffer_snapshot,
                                     &mut interner,
                                     cx,
@@ -312,34 +311,36 @@ impl Editor {
 fn buffer_into_editor_highlights<'a, 'b>(
     buffer_tokens: &'a [BufferSemanticToken],
     stylizer: &'a SemanticTokenStylizer,
-    all_excerpts: &'a [multi_buffer::ExcerptId],
     multi_buffer_snapshot: &'a multi_buffer::MultiBufferSnapshot,
     interner: &'b mut HighlightStyleInterner,
     cx: &'a App,
 ) -> impl Iterator<Item = SemanticTokenHighlight> + use<'a, 'b> {
-    buffer_tokens.iter().filter_map(|token| {
-        let multi_buffer_start = all_excerpts.iter().find_map(|&excerpt_id| {
-            multi_buffer_snapshot.anchor_in_excerpt(excerpt_id, token.range.start)
-        })?;
-        let multi_buffer_end = all_excerpts.iter().find_map(|&excerpt_id| {
-            multi_buffer_snapshot.anchor_in_excerpt(excerpt_id, token.range.end)
-        })?;
-
-        let style = convert_token(
-            stylizer,
-            cx.theme().syntax(),
-            token.token_type,
-            token.token_modifiers,
-        )?;
-        let style = interner.intern(style);
-        Some(SemanticTokenHighlight {
-            range: multi_buffer_start..multi_buffer_end,
-            style,
-            token_type: token.token_type,
-            token_modifiers: token.token_modifiers,
-            server_id: stylizer.server_id(),
+    multi_buffer_snapshot
+        .text_anchors_to_visible_anchors(
+            buffer_tokens
+                .iter()
+                .flat_map(|token| [token.range.start, token.range.end]),
+        )
+        .into_iter()
+        .tuples::<(_, _)>()
+        .zip(buffer_tokens)
+        .filter_map(|((multi_buffer_start, multi_buffer_end), token)| {
+            let range = multi_buffer_start?..multi_buffer_end?;
+            let style = convert_token(
+                stylizer,
+                cx.theme().syntax(),
+                token.token_type,
+                token.token_modifiers,
+            )?;
+            let style = interner.intern(style);
+            Some(SemanticTokenHighlight {
+                range,
+                style,
+                token_type: token.token_type,
+                token_modifiers: token.token_modifiers,
+                server_id: stylizer.server_id(),
+            })
         })
-    })
 }
 
 fn convert_token(

crates/multi_buffer/src/multi_buffer.rs 🔗

@@ -515,6 +515,7 @@ struct BufferState {
     buffer: Entity<Buffer>,
     last_version: RefCell<clock::Global>,
     last_non_text_state_update_count: Cell<usize>,
+    // Note, any changes to this field value require updating snapshot.buffer_locators as well
     excerpts: Vec<Locator>,
     _subscriptions: [gpui::Subscription; 2],
 }
@@ -606,18 +607,18 @@ impl DiffState {
 #[derive(Clone, Default)]
 pub struct MultiBufferSnapshot {
     excerpts: SumTree<Excerpt>,
+    buffer_locators: TreeMap<BufferId, Arc<[Locator]>>,
     diffs: TreeMap<BufferId, DiffStateSnapshot>,
     diff_transforms: SumTree<DiffTransform>,
+    excerpt_ids: SumTree<ExcerptIdMapping>,
+    replaced_excerpts: TreeMap<ExcerptId, ExcerptId>,
     non_text_state_update_count: usize,
     edit_count: usize,
     is_dirty: bool,
     has_deleted_file: bool,
     has_conflict: bool,
     has_inverted_diff: bool,
-    /// immutable fields
     singleton: bool,
-    excerpt_ids: SumTree<ExcerptIdMapping>,
-    replaced_excerpts: TreeMap<ExcerptId, ExcerptId>,
     trailing_excerpt_update_count: usize,
     all_diff_hunks_expanded: bool,
     show_deleted_hunks: bool,
@@ -1202,7 +1203,7 @@ impl MultiBuffer {
         }
         Self {
             snapshot: RefCell::new(self.snapshot.borrow().clone()),
-            buffers: buffers,
+            buffers,
             excerpts_by_path: Default::default(),
             paths_by_excerpt: Default::default(),
             diffs: diff_bases,
@@ -1878,6 +1879,9 @@ impl MultiBuffer {
             }
             new_excerpt_ids.push(ExcerptIdMapping { id, locator }, ());
         }
+        snapshot
+            .buffer_locators
+            .insert(buffer_id, buffer_state.excerpts.iter().cloned().collect());
 
         let edit_end = ExcerptDimension(new_excerpts.summary().text.len);
 
@@ -1922,6 +1926,7 @@ impl MultiBuffer {
         self.paths_by_excerpt.clear();
         let MultiBufferSnapshot {
             excerpts,
+            buffer_locators,
             diffs: _,
             diff_transforms: _,
             non_text_state_update_count: _,
@@ -1939,6 +1944,7 @@ impl MultiBuffer {
             use_extended_diff_range: _,
             show_headers: _,
         } = self.snapshot.get_mut();
+        buffer_locators.clear();
         let start = ExcerptDimension(MultiBufferOffset::ZERO);
         let prev_len = ExcerptDimension(excerpts.summary().text.len);
         *excerpts = Default::default();
@@ -1979,8 +1985,8 @@ impl MultiBuffer {
         let mut excerpts = Vec::new();
         let snapshot = self.read(cx);
         let mut cursor = snapshot.excerpts.cursor::<Option<&Locator>>(());
-        if let Some(locators) = self.buffers.get(&buffer_id).map(|state| &state.excerpts) {
-            for locator in locators {
+        if let Some(locators) = snapshot.buffer_locators.get(&buffer_id) {
+            for locator in &**locators {
                 cursor.seek_forward(&Some(locator), Bias::Left);
                 if let Some(excerpt) = cursor.item()
                     && excerpt.locator == *locator
@@ -2002,11 +2008,11 @@ impl MultiBuffer {
             .diff_transforms
             .cursor::<Dimensions<ExcerptPoint, OutputDimension<Point>>>(());
         diff_transforms.next();
-        let locators = self
-            .buffers
+        let locators = snapshot
+            .buffer_locators
             .get(&buffer_id)
             .into_iter()
-            .flat_map(|state| &state.excerpts);
+            .flat_map(|v| &**v);
         let mut result = Vec::new();
         for locator in locators {
             excerpts.seek_forward(&Some(locator), Bias::Left);
@@ -2111,6 +2117,7 @@ impl MultiBuffer {
 
     pub fn buffer_point_to_anchor(
         &self,
+        // todo(lw): We shouldn't need this?
         buffer: &Entity<Buffer>,
         point: Point,
         cx: &App,
@@ -2140,6 +2147,7 @@ impl MultiBuffer {
 
     pub fn buffer_anchor_to_anchor(
         &self,
+        // todo(lw): We shouldn't need this?
         buffer: &Entity<Buffer>,
         anchor: text::Anchor,
         cx: &App,
@@ -2176,6 +2184,7 @@ impl MultiBuffer {
         let mut edits = Vec::new();
         let mut excerpt_ids = ids.iter().copied().peekable();
         let mut removed_buffer_ids = Vec::new();
+        let mut removed_excerpts_for_buffers = HashSet::default();
 
         while let Some(excerpt_id) = excerpt_ids.next() {
             self.paths_by_excerpt.remove(&excerpt_id);
@@ -2192,6 +2201,7 @@ impl MultiBuffer {
                 // Skip over the removed excerpt.
                 'remove_excerpts: loop {
                     if let Some(buffer_state) = self.buffers.get_mut(&excerpt.buffer_id) {
+                        removed_excerpts_for_buffers.insert(excerpt.buffer_id);
                         buffer_state.excerpts.retain(|l| l != &excerpt.locator);
                         if buffer_state.excerpts.is_empty() {
                             log::debug!(
@@ -2239,6 +2249,18 @@ impl MultiBuffer {
         let changed_trailing_excerpt = suffix.is_empty();
         new_excerpts.append(suffix, ());
         drop(cursor);
+        for buffer_id in removed_excerpts_for_buffers {
+            match self.buffers.get(&buffer_id) {
+                Some(buffer_state) => {
+                    snapshot
+                        .buffer_locators
+                        .insert(buffer_id, buffer_state.excerpts.iter().cloned().collect());
+                }
+                None => {
+                    snapshot.buffer_locators.remove(&buffer_id);
+                }
+            }
+        }
         snapshot.excerpts = new_excerpts;
         for buffer_id in &removed_buffer_ids {
             self.diffs.remove(buffer_id);
@@ -2993,6 +3015,7 @@ impl MultiBuffer {
     ) -> Vec<Edit<MultiBufferOffset>> {
         let MultiBufferSnapshot {
             excerpts,
+            buffer_locators: _,
             diffs: buffer_diff,
             diff_transforms: _,
             non_text_state_update_count,
@@ -4017,6 +4040,90 @@ impl MultiBufferSnapshot {
             .map(|excerpt| excerpt.buffer_id)
     }
 
+    /// Resolves the given [`text::Anchor`]s to [`crate::Anchor`]s if the anchor is within a visible excerpt.
+    ///
+    /// The passed in anchors must be ordered.
+    pub fn text_anchors_to_visible_anchors(
+        &self,
+        anchors: impl IntoIterator<Item = text::Anchor>,
+    ) -> Vec<Option<Anchor>> {
+        let mut cursor = self.excerpts.cursor::<Option<&Locator>>(());
+        let mut anchors = anchors.into_iter().peekable();
+        let mut result = Vec::new();
+        'anchors: while let Some(anchor) = anchors.peek() {
+            let Some(buffer_id) = anchor.buffer_id else {
+                anchors.next();
+                result.push(None);
+                continue 'anchors;
+            };
+            let mut same_buffer_anchors =
+                anchors.peeking_take_while(|a| a.buffer_id.is_some_and(|b| buffer_id == b));
+
+            if let Some(locators) = self.buffer_locators.get(&buffer_id) {
+                let Some(mut next) = same_buffer_anchors.next() else {
+                    continue 'anchors;
+                };
+                'excerpts: for locator in locators.iter() {
+                    if cursor.seek_forward(&Some(locator), Bias::Left)
+                        && let Some(excerpt) = cursor.item()
+                    {
+                        loop {
+                            // anchor is before the first excerpt
+                            if excerpt
+                                .range
+                                .context
+                                .start
+                                .cmp(&next, &excerpt.buffer)
+                                .is_gt()
+                            {
+                                // so we skip it and try the next anchor
+                                result.push(None);
+                                match same_buffer_anchors.next() {
+                                    Some(anchor) => next = anchor,
+                                    None => continue 'anchors,
+                                }
+                            // anchor is within the excerpt
+                            } else if excerpt
+                                .range
+                                .context
+                                .end
+                                .cmp(&next, &excerpt.buffer)
+                                .is_ge()
+                            {
+                                // record it and all following anchors that are within
+                                result.push(Some(Anchor::in_buffer(excerpt.id, next)));
+                                result.extend(
+                                    same_buffer_anchors
+                                        .peeking_take_while(|a| {
+                                            excerpt
+                                                .range
+                                                .context
+                                                .end
+                                                .cmp(a, &excerpt.buffer)
+                                                .is_ge()
+                                        })
+                                        .map(|a| Some(Anchor::in_buffer(excerpt.id, a))),
+                                );
+                                match same_buffer_anchors.next() {
+                                    Some(anchor) => next = anchor,
+                                    None => continue 'anchors,
+                                }
+                            // anchor is after the excerpt, try the next one
+                            } else {
+                                continue 'excerpts;
+                            }
+                        }
+                    }
+                }
+                // account for `next`
+                result.push(None);
+            }
+            result.extend(same_buffer_anchors.map(|_| None));
+        }
+
+        result
+    }
+
     pub fn ranges_to_buffer_ranges<T: ToOffset>(
         &self,
         ranges: impl Iterator<Item = Range<T>>,
@@ -5649,7 +5756,7 @@ impl MultiBufferSnapshot {
         }
     }
 
-    /// Wraps the [`text::Anchor`] in a [`multi_buffer::Anchor`] if this multi-buffer is a singleton.
+    /// Wraps the [`text::Anchor`] in a [`crate::Anchor`] if this multi-buffer is a singleton.
     pub fn as_singleton_anchor(&self, text_anchor: text::Anchor) -> Option<Anchor> {
         let (excerpt, buffer, _) = self.as_singleton()?;
         if text_anchor.buffer_id.is_none_or(|id| id == buffer) {

crates/project/src/lsp_store/semantic_tokens.rs 🔗

@@ -414,6 +414,9 @@ pub struct TokenType(pub u32);
 
 #[derive(Debug, Clone)]
 pub struct BufferSemanticToken {
+    /// The range of the token in the buffer.
+    ///
+    /// Guaranteed to contain a buffer id.
     pub range: Range<Anchor>,
     pub token_type: TokenType,
     pub token_modifiers: u32,
@@ -525,6 +528,7 @@ async fn raw_to_buffer_semantic_tokens(
 ) -> HashMap<LanguageServerId, Arc<[BufferSemanticToken]>> {
     let mut res = HashMap::default();
     for (&server_id, server_tokens) in &raw_tokens.servers {
+        let mut last = 0;
         // We don't do `collect` here due to the filter map not pre-allocating
         // we'd rather over allocate here than not since we have to re-allocate into an arc slice anyways
         let mut buffer_tokens = Vec::with_capacity(server_tokens.data.len() / 5);
@@ -543,7 +547,15 @@ async fn raw_to_buffer_semantic_tokens(
                 let start = buffer_snapshot
                     .as_rope()
                     .offset_utf16_to_offset(start_offset);
+                if start < last {
+                    return None;
+                }
+
                 let end = buffer_snapshot.as_rope().offset_utf16_to_offset(end_offset);
+                if end < last {
+                    return None;
+                }
+                last = end;
 
                 if start == end {
                     return None;