From d09f817c866ec9205215ae8809c2fee154abe771 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Wed, 18 Feb 2026 17:25:58 +0100 Subject: [PATCH] Cherry-pick: editor: Some more semantic highlighting perf work (#49495) Release Notes: - N/A *or* Added/Fixed/Improved ... --- crates/editor/src/semantic_tokens.rs | 165 ++++++++++-------- crates/multi_buffer/src/multi_buffer.rs | 5 +- .../project/src/lsp_store/semantic_tokens.rs | 83 +++++---- 3 files changed, 141 insertions(+), 112 deletions(-) diff --git a/crates/editor/src/semantic_tokens.rs b/crates/editor/src/semantic_tokens.rs index 14e18632bfdb8d8fc815f685db41bf79c3934cc3..b27710db96678321c919546f41444b34b1db06da 100644 --- a/crates/editor/src/semantic_tokens.rs +++ b/crates/editor/src/semantic_tokens.rs @@ -196,8 +196,10 @@ impl Editor { buffers_to_query .into_iter() .filter_map(|(buffer_id, buffer)| { - let known_version = - editor.semantic_token_state.fetched_for_buffers.get(&buffer_id); + let known_version = editor + .semantic_token_state + .fetched_for_buffers + .get(&buffer_id); let query_version = buffer.read(cx).version(); if known_version.is_some_and(|known_version| { !query_version.changed_since(known_version) @@ -216,89 +218,98 @@ impl Editor { }; let all_semantic_tokens = join_all(all_semantic_tokens_task).await; - editor.update(cx, |editor, cx| { - editor.display_map.update(cx, |display_map, _| { - for buffer_id in invalidate_semantic_highlights_for_buffers { - display_map.invalidate_semantic_highlights(buffer_id); - } - }); - - - if all_semantic_tokens.is_empty() { - return; - } - let multi_buffer_snapshot = editor.buffer().read(cx).snapshot(cx); - - for (buffer_id, query_version, tokens) in all_semantic_tokens { - let tokens = match tokens { - Ok(BufferSemanticTokens { tokens: Some(tokens) }) => { - tokens - }, - Ok(BufferSemanticTokens { tokens: None }) => { - editor.display_map.update(cx, |display_map, _| { - display_map.invalidate_semantic_highlights(buffer_id); - }); - continue; - }, - Err(e) => { - log::error!("Failed to fetch semantic tokens for buffer {buffer_id:?}: {e:#}"); - continue; - }, - }; + editor + .update(cx, |editor, cx| { + editor.display_map.update(cx, |display_map, _| { + for buffer_id in invalidate_semantic_highlights_for_buffers { + display_map.invalidate_semantic_highlights(buffer_id); + } + }); - match editor.semantic_token_state.fetched_for_buffers.entry(buffer_id) { - hash_map::Entry::Occupied(mut o) => { - if query_version.changed_since(o.get()) { - o.insert(query_version); - } else { + if all_semantic_tokens.is_empty() { + return; + } + let multi_buffer_snapshot = editor.buffer().read(cx).snapshot(cx); + + for (buffer_id, query_version, tokens) in all_semantic_tokens { + let tokens = match tokens { + Ok(BufferSemanticTokens { + tokens: Some(tokens), + }) => tokens, + Ok(BufferSemanticTokens { tokens: None }) => { + editor.display_map.update(cx, |display_map, _| { + display_map.invalidate_semantic_highlights(buffer_id); + }); continue; } - }, - hash_map::Entry::Vacant(v) => { - v.insert(query_version); - }, - } - - let language_name = editor - .buffer() - .read(cx) - .buffer(buffer_id) - .and_then(|buf| buf.read(cx).language().map(|l| l.name())); - - editor.display_map.update(cx, |display_map, cx| { - project.read(cx).lsp_store().update(cx, |lsp_store, cx| { - let mut token_highlights = Vec::new(); - let mut interner = HighlightStyleInterner::default(); - for (server_id, server_tokens) in tokens { - let Some(stylizer) = lsp_store.get_or_create_token_stylizer( - server_id, - language_name.as_ref(), - cx, - ) - else { + Err(e) => { + log::error!( + "Failed to fetch semantic tokens for buffer \ + {buffer_id:?}: {e:#}" + ); + continue; + } + }; + + match editor + .semantic_token_state + .fetched_for_buffers + .entry(buffer_id) + { + hash_map::Entry::Occupied(mut o) => { + if query_version.changed_since(o.get()) { + o.insert(query_version); + } else { continue; - }; - token_highlights.extend(buffer_into_editor_highlights( - &server_tokens, - stylizer, - &multi_buffer_snapshot, - &mut interner, - cx, - )); + } } - - token_highlights.sort_by(|a, b| { - a.range.start.cmp(&b.range.start, &multi_buffer_snapshot) + hash_map::Entry::Vacant(v) => { + v.insert(query_version); + } + } + + let language_name = editor + .buffer() + .read(cx) + .buffer(buffer_id) + .and_then(|buf| buf.read(cx).language().map(|l| l.name())); + + editor.display_map.update(cx, |display_map, cx| { + project.read(cx).lsp_store().update(cx, |lsp_store, cx| { + let mut token_highlights = Vec::new(); + let mut interner = HighlightStyleInterner::default(); + for (server_id, server_tokens) in tokens { + let Some(stylizer) = lsp_store.get_or_create_token_stylizer( + server_id, + language_name.as_ref(), + cx, + ) else { + continue; + }; + token_highlights.reserve(2 * server_tokens.len()); + token_highlights.extend(buffer_into_editor_highlights( + &server_tokens, + stylizer, + &multi_buffer_snapshot, + &mut interner, + cx, + )); + } + + token_highlights.sort_by(|a, b| { + a.range.start.cmp(&b.range.start, &multi_buffer_snapshot) + }); + display_map.semantic_token_highlights.insert( + buffer_id, + (Arc::from(token_highlights), Arc::new(interner)), + ); }); - display_map - .semantic_token_highlights - .insert(buffer_id, (Arc::from(token_highlights), Arc::new(interner))); }); - }); - } + } - cx.notify(); - }).ok(); + cx.notify(); + }) + .ok(); }); } diff --git a/crates/multi_buffer/src/multi_buffer.rs b/crates/multi_buffer/src/multi_buffer.rs index dfa403e7dadbfb5f6e00825af33cc1a6b194a6cc..587f817b71f7338b93336296d53f2a48409ba79e 100644 --- a/crates/multi_buffer/src/multi_buffer.rs +++ b/crates/multi_buffer/src/multi_buffer.rs @@ -4110,9 +4110,10 @@ impl MultiBufferSnapshot { &self, anchors: impl IntoIterator, ) -> Vec> { + let anchors = anchors.into_iter(); + let mut result = Vec::with_capacity(anchors.size_hint().0); + let mut anchors = anchors.peekable(); let mut cursor = self.excerpts.cursor::>(()); - let mut anchors = anchors.into_iter().peekable(); - let mut result = Vec::new(); 'anchors: while let Some(anchor) = anchors.peek() { let Some(buffer_id) = anchor.buffer_id else { anchors.next(); diff --git a/crates/project/src/lsp_store/semantic_tokens.rs b/crates/project/src/lsp_store/semantic_tokens.rs index 7c110c72ae713f57040488b8d2e5ffe816e6e631..00e4a4c1890278526edc0174a8b6fcf9652226f5 100644 --- a/crates/project/src/lsp_store/semantic_tokens.rs +++ b/crates/project/src/lsp_store/semantic_tokens.rs @@ -9,7 +9,6 @@ use futures::{ future::{Shared, join_all}, }; use gpui::{App, AppContext, AsyncApp, Context, Entity, ReadGlobal as _, SharedString, Task}; -use itertools::Itertools; use language::{Buffer, LanguageName, language_settings::all_language_settings}; use lsp::{AdapterServerCapabilities, LanguageServerId}; use rpc::{TypedEnvelope, proto}; @@ -169,7 +168,13 @@ impl LspStore { (semantic_tokens_data.raw_tokens.clone(), buffer_snapshot) }) .map_err(Arc::new)?; - Some(raw_to_buffer_semantic_tokens(raw_tokens, &buffer_snapshot).await) + Some( + cx.background_spawn(raw_to_buffer_semantic_tokens( + raw_tokens, + buffer_snapshot.text.clone(), + )) + .await, + ) } else { lsp_store.update(cx, |lsp_store, cx| { if let Some(current_lsp_data) = @@ -524,7 +529,7 @@ impl SemanticTokenStylizer { async fn raw_to_buffer_semantic_tokens( raw_tokens: RawSemanticTokens, - buffer_snapshot: &text::BufferSnapshot, + buffer_snapshot: text::BufferSnapshot, ) -> HashMap> { let mut res = HashMap::default(); for (&server_id, server_tokens) in &raw_tokens.servers { @@ -532,46 +537,58 @@ async fn raw_to_buffer_semantic_tokens( // We don't do `collect` here due to the filter map not pre-allocating // we'd rather over allocate here than not since we have to re-allocate into an arc slice anyways let mut buffer_tokens = Vec::with_capacity(server_tokens.data.len() / 5); + let mut tokens = server_tokens.tokens(); // 5000 was chosen by profiling, on a decent machine this will take about 1ms per chunk // This is to avoid blocking the main thread for hundreds of milliseconds at a time for very big files // If we every change the below code to not query the underlying rope 6 times per token we can bump this up - for chunk in server_tokens.tokens().chunks(5000).into_iter() { - buffer_tokens.extend(chunk.filter_map(|token| { - let start = Unclipped(PointUtf16::new(token.line, token.start)); - let clipped_start = buffer_snapshot.clip_point_utf16(start, Bias::Left); - let start_offset = buffer_snapshot - .as_rope() - .point_utf16_to_offset_utf16(clipped_start); - let end_offset = start_offset + OffsetUtf16(token.length as usize); - - let start = buffer_snapshot - .as_rope() - .offset_utf16_to_offset(start_offset); - if start < last { - return None; - } + const CHUNK_LEN: usize = 5000; + loop { + let mut changed = false; + let chunk = tokens + .by_ref() + .take(CHUNK_LEN) + .inspect(|_| changed = true) + .filter_map(|token| { + let start = Unclipped(PointUtf16::new(token.line, token.start)); + let clipped_start = buffer_snapshot.clip_point_utf16(start, Bias::Left); + let start_offset = buffer_snapshot + .as_rope() + .point_utf16_to_offset_utf16(clipped_start); + let end_offset = start_offset + OffsetUtf16(token.length as usize); + + let start = buffer_snapshot + .as_rope() + .offset_utf16_to_offset(start_offset); + if start < last { + return None; + } - let end = buffer_snapshot.as_rope().offset_utf16_to_offset(end_offset); - if end < last { - return None; - } - last = end; + let end = buffer_snapshot.as_rope().offset_utf16_to_offset(end_offset); + if end < last { + return None; + } + last = end; - if start == end { - return None; - } + if start == end { + return None; + } - Some(BufferSemanticToken { - range: buffer_snapshot.anchor_before(start)..buffer_snapshot.anchor_after(end), - token_type: token.token_type, - token_modifiers: token.token_modifiers, - }) - })); + Some(BufferSemanticToken { + range: buffer_snapshot.anchor_before(start) + ..buffer_snapshot.anchor_after(end), + token_type: token.token_type, + token_modifiers: token.token_modifiers, + }) + }); + buffer_tokens.extend(chunk); + + if !changed { + break; + } yield_now().await; } res.insert(server_id, buffer_tokens.into()); - yield_now().await; } res }