editor: Some more semantic highlighting perf work (#49491)

Lukas Wirth created

- Reduce some re-allocations in `text_anchors_to_visible_anchors` and
`refresh_semantic_tokens`
- Process `raw_to_buffer_semantic_tokens` work on the background thread

Release Notes:

- N/A *or* Added/Fixed/Improved ...

Change summary

crates/editor/src/semantic_tokens.rs            | 167 ++++++++++--------
crates/multi_buffer/src/multi_buffer.rs         |   5 
crates/project/src/lsp_store/semantic_tokens.rs |  83 +++++---
3 files changed, 142 insertions(+), 113 deletions(-)

Detailed changes

crates/editor/src/semantic_tokens.rs 🔗

@@ -204,8 +204,10 @@ impl Editor {
                     buffers_to_query
                         .into_iter()
                         .filter_map(|(buffer_id, buffer)| {
-                            let known_version =
-                                editor.semantic_token_state.fetched_for_buffers.get(&buffer_id);
+                            let known_version = editor
+                                .semantic_token_state
+                                .fetched_for_buffers
+                                .get(&buffer_id);
                             let query_version = buffer.read(cx).version();
                             if known_version.is_some_and(|known_version| {
                                 !query_version.changed_since(known_version)
@@ -224,90 +226,99 @@ impl Editor {
             };
 
             let all_semantic_tokens = join_all(all_semantic_tokens_task).await;
-            editor.update(cx, |editor, cx| {
-                editor.display_map.update(cx, |display_map, _| {
-                    for buffer_id in invalidate_semantic_highlights_for_buffers {
-                        display_map.invalidate_semantic_highlights(buffer_id);
-                        editor.semantic_token_state.invalidate_buffer(&buffer_id);
-                    }
-                });
-
-
-                if all_semantic_tokens.is_empty() {
-                    return;
-                }
-                let multi_buffer_snapshot = editor.buffer().read(cx).snapshot(cx);
-
-                for (buffer_id, query_version, tokens) in all_semantic_tokens {
-                    let tokens = match tokens {
-                        Ok(BufferSemanticTokens { tokens: Some(tokens) }) => {
-                            tokens
-                        },
-                        Ok(BufferSemanticTokens { tokens: None }) => {
-                            editor.display_map.update(cx, |display_map, _| {
-                                display_map.invalidate_semantic_highlights(buffer_id);
-                            });
-                            continue;
-                        },
-                        Err(e) => {
-                            log::error!("Failed to fetch semantic tokens for buffer {buffer_id:?}: {e:#}");
-                            continue;
-                        },
-                    };
+            editor
+                .update(cx, |editor, cx| {
+                    editor.display_map.update(cx, |display_map, _| {
+                        for buffer_id in invalidate_semantic_highlights_for_buffers {
+                            display_map.invalidate_semantic_highlights(buffer_id);
+                            editor.semantic_token_state.invalidate_buffer(&buffer_id);
+                        }
+                    });
 
-                    match editor.semantic_token_state.fetched_for_buffers.entry(buffer_id) {
-                        hash_map::Entry::Occupied(mut o) => {
-                            if query_version.changed_since(o.get()) {
-                                o.insert(query_version);
-                            } else {
+                    if all_semantic_tokens.is_empty() {
+                        return;
+                    }
+                    let multi_buffer_snapshot = editor.buffer().read(cx).snapshot(cx);
+
+                    for (buffer_id, query_version, tokens) in all_semantic_tokens {
+                        let tokens = match tokens {
+                            Ok(BufferSemanticTokens {
+                                tokens: Some(tokens),
+                            }) => tokens,
+                            Ok(BufferSemanticTokens { tokens: None }) => {
+                                editor.display_map.update(cx, |display_map, _| {
+                                    display_map.invalidate_semantic_highlights(buffer_id);
+                                });
                                 continue;
                             }
-                        },
-                        hash_map::Entry::Vacant(v) => {
-                            v.insert(query_version);
-                        },
-                    }
-
-                    let language_name = editor
-                        .buffer()
-                        .read(cx)
-                        .buffer(buffer_id)
-                        .and_then(|buf| buf.read(cx).language().map(|l| l.name()));
-
-                    editor.display_map.update(cx, |display_map, cx| {
-                        project.read(cx).lsp_store().update(cx, |lsp_store, cx| {
-                            let mut token_highlights = Vec::new();
-                            let mut interner = HighlightStyleInterner::default();
-                            for (server_id, server_tokens) in tokens {
-                                let Some(stylizer) = lsp_store.get_or_create_token_stylizer(
-                                    server_id,
-                                    language_name.as_ref(),
-                                    cx,
-                                )
-                                else {
+                            Err(e) => {
+                                log::error!(
+                                    "Failed to fetch semantic tokens for buffer \
+                                    {buffer_id:?}: {e:#}"
+                                );
+                                continue;
+                            }
+                        };
+
+                        match editor
+                            .semantic_token_state
+                            .fetched_for_buffers
+                            .entry(buffer_id)
+                        {
+                            hash_map::Entry::Occupied(mut o) => {
+                                if query_version.changed_since(o.get()) {
+                                    o.insert(query_version);
+                                } else {
                                     continue;
-                                };
-                                token_highlights.extend(buffer_into_editor_highlights(
-                                    &server_tokens,
-                                    stylizer,
-                                    &multi_buffer_snapshot,
-                                    &mut interner,
-                                    cx,
-                                ));
+                                }
                             }
-
-                            token_highlights.sort_by(|a, b| {
-                                a.range.start.cmp(&b.range.start, &multi_buffer_snapshot)
+                            hash_map::Entry::Vacant(v) => {
+                                v.insert(query_version);
+                            }
+                        }
+
+                        let language_name = editor
+                            .buffer()
+                            .read(cx)
+                            .buffer(buffer_id)
+                            .and_then(|buf| buf.read(cx).language().map(|l| l.name()));
+
+                        editor.display_map.update(cx, |display_map, cx| {
+                            project.read(cx).lsp_store().update(cx, |lsp_store, cx| {
+                                let mut token_highlights = Vec::new();
+                                let mut interner = HighlightStyleInterner::default();
+                                for (server_id, server_tokens) in tokens {
+                                    let Some(stylizer) = lsp_store.get_or_create_token_stylizer(
+                                        server_id,
+                                        language_name.as_ref(),
+                                        cx,
+                                    ) else {
+                                        continue;
+                                    };
+                                    token_highlights.reserve(2 * server_tokens.len());
+                                    token_highlights.extend(buffer_into_editor_highlights(
+                                        &server_tokens,
+                                        stylizer,
+                                        &multi_buffer_snapshot,
+                                        &mut interner,
+                                        cx,
+                                    ));
+                                }
+
+                                token_highlights.sort_by(|a, b| {
+                                    a.range.start.cmp(&b.range.start, &multi_buffer_snapshot)
+                                });
+                                display_map.semantic_token_highlights.insert(
+                                    buffer_id,
+                                    (Arc::from(token_highlights), Arc::new(interner)),
+                                );
                             });
-                            display_map
-                                .semantic_token_highlights
-                                .insert(buffer_id, (Arc::from(token_highlights), Arc::new(interner)));
                         });
-                    });
-                }
+                    }
 
-                cx.notify();
-            }).ok();
+                    cx.notify();
+                })
+                .ok();
         });
     }
 }

crates/multi_buffer/src/multi_buffer.rs 🔗

@@ -4109,9 +4109,10 @@ impl MultiBufferSnapshot {
         &self,
         anchors: impl IntoIterator<Item = text::Anchor>,
     ) -> Vec<Option<Anchor>> {
+        let anchors = anchors.into_iter();
+        let mut result = Vec::with_capacity(anchors.size_hint().0);
+        let mut anchors = anchors.peekable();
         let mut cursor = self.excerpts.cursor::<Option<&Locator>>(());
-        let mut anchors = anchors.into_iter().peekable();
-        let mut result = Vec::new();
         'anchors: while let Some(anchor) = anchors.peek() {
             let Some(buffer_id) = anchor.buffer_id else {
                 anchors.next();

crates/project/src/lsp_store/semantic_tokens.rs 🔗

@@ -9,7 +9,6 @@ use futures::{
     future::{Shared, join_all},
 };
 use gpui::{App, AppContext, AsyncApp, Context, Entity, ReadGlobal as _, SharedString, Task};
-use itertools::Itertools;
 use language::{Buffer, LanguageName, language_settings::all_language_settings};
 use lsp::{AdapterServerCapabilities, LanguageServerId};
 use rpc::{TypedEnvelope, proto};
@@ -169,7 +168,13 @@ impl LspStore {
                             (semantic_tokens_data.raw_tokens.clone(), buffer_snapshot)
                         })
                         .map_err(Arc::new)?;
-                    Some(raw_to_buffer_semantic_tokens(raw_tokens, &buffer_snapshot).await)
+                    Some(
+                        cx.background_spawn(raw_to_buffer_semantic_tokens(
+                            raw_tokens,
+                            buffer_snapshot.text.clone(),
+                        ))
+                        .await,
+                    )
                 } else {
                     lsp_store.update(cx, |lsp_store, cx| {
                         if let Some(current_lsp_data) =
@@ -524,7 +529,7 @@ impl SemanticTokenStylizer {
 
 async fn raw_to_buffer_semantic_tokens(
     raw_tokens: RawSemanticTokens,
-    buffer_snapshot: &text::BufferSnapshot,
+    buffer_snapshot: text::BufferSnapshot,
 ) -> HashMap<LanguageServerId, Arc<[BufferSemanticToken]>> {
     let mut res = HashMap::default();
     for (&server_id, server_tokens) in &raw_tokens.servers {
@@ -532,46 +537,58 @@ async fn raw_to_buffer_semantic_tokens(
         // We don't do `collect` here due to the filter map not pre-allocating
         // we'd rather over allocate here than not since we have to re-allocate into an arc slice anyways
         let mut buffer_tokens = Vec::with_capacity(server_tokens.data.len() / 5);
+        let mut tokens = server_tokens.tokens();
         // 5000 was chosen by profiling, on a decent machine this will take about 1ms per chunk
         // This is to avoid blocking the main thread for hundreds of milliseconds at a time for very big files
         // If we every change the below code to not query the underlying rope 6 times per token we can bump this up
-        for chunk in server_tokens.tokens().chunks(5000).into_iter() {
-            buffer_tokens.extend(chunk.filter_map(|token| {
-                let start = Unclipped(PointUtf16::new(token.line, token.start));
-                let clipped_start = buffer_snapshot.clip_point_utf16(start, Bias::Left);
-                let start_offset = buffer_snapshot
-                    .as_rope()
-                    .point_utf16_to_offset_utf16(clipped_start);
-                let end_offset = start_offset + OffsetUtf16(token.length as usize);
-
-                let start = buffer_snapshot
-                    .as_rope()
-                    .offset_utf16_to_offset(start_offset);
-                if start < last {
-                    return None;
-                }
+        const CHUNK_LEN: usize = 5000;
+        loop {
+            let mut changed = false;
+            let chunk = tokens
+                .by_ref()
+                .take(CHUNK_LEN)
+                .inspect(|_| changed = true)
+                .filter_map(|token| {
+                    let start = Unclipped(PointUtf16::new(token.line, token.start));
+                    let clipped_start = buffer_snapshot.clip_point_utf16(start, Bias::Left);
+                    let start_offset = buffer_snapshot
+                        .as_rope()
+                        .point_utf16_to_offset_utf16(clipped_start);
+                    let end_offset = start_offset + OffsetUtf16(token.length as usize);
+
+                    let start = buffer_snapshot
+                        .as_rope()
+                        .offset_utf16_to_offset(start_offset);
+                    if start < last {
+                        return None;
+                    }
 
-                let end = buffer_snapshot.as_rope().offset_utf16_to_offset(end_offset);
-                if end < last {
-                    return None;
-                }
-                last = end;
+                    let end = buffer_snapshot.as_rope().offset_utf16_to_offset(end_offset);
+                    if end < last {
+                        return None;
+                    }
+                    last = end;
 
-                if start == end {
-                    return None;
-                }
+                    if start == end {
+                        return None;
+                    }
 
-                Some(BufferSemanticToken {
-                    range: buffer_snapshot.anchor_before(start)..buffer_snapshot.anchor_after(end),
-                    token_type: token.token_type,
-                    token_modifiers: token.token_modifiers,
-                })
-            }));
+                    Some(BufferSemanticToken {
+                        range: buffer_snapshot.anchor_before(start)
+                            ..buffer_snapshot.anchor_after(end),
+                        token_type: token.token_type,
+                        token_modifiers: token.token_modifiers,
+                    })
+                });
+            buffer_tokens.extend(chunk);
+
+            if !changed {
+                break;
+            }
             yield_now().await;
         }
 
         res.insert(server_id, buffer_tokens.into());
-        yield_now().await;
     }
     res
 }