From 418b51d6bcf58b20e6a07ec13b37793f45a4ec57 Mon Sep 17 00:00:00 2001 From: Marco Mihai Condrache <52580954+marcocondrache@users.noreply.github.com> Date: Wed, 18 Feb 2026 08:53:26 +0100 Subject: [PATCH] editor: Optimize TabMap chunks generation (#48378) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the next chunk is generated by traversing all characters of the current folded chunk until a tab is found. Since we already have bitmasks for characters and tabs, we can also propagate the newlines bitmap from the rope and use it to make this computation O(1) in all cases. I haven’t run benchmarks yet. Release Notes: - N/A --- crates/editor/src/display_map/block_map.rs | 4 + .../src/display_map/custom_highlights.rs | 3 + crates/editor/src/display_map/fold_map.rs | 4 + crates/editor/src/display_map/inlay_map.rs | 7 + crates/editor/src/display_map/tab_map.rs | 142 ++++++++++-------- crates/editor/src/display_map/wrap_map.rs | 3 + crates/language/src/buffer.rs | 5 + crates/multi_buffer/src/multi_buffer.rs | 6 + crates/rope/src/chunk.rs | 6 + crates/rope/src/rope.rs | 4 + 10 files changed, 125 insertions(+), 59 deletions(-) diff --git a/crates/editor/src/display_map/block_map.rs b/crates/editor/src/display_map/block_map.rs index de9d94fdb94b08c05efb582f2f56d4b8078b1ead..000114704a6f06d3df019e527925a86c5c817626 100644 --- a/crates/editor/src/display_map/block_map.rs +++ b/crates/editor/src/display_map/block_map.rs @@ -2635,9 +2635,11 @@ impl<'a> Iterator for BlockChunks<'a> { self.input_chunk.text = suffix; self.input_chunk.tabs >>= prefix_bytes.saturating_sub(1); self.input_chunk.chars >>= prefix_bytes.saturating_sub(1); + self.input_chunk.newlines >>= prefix_bytes.saturating_sub(1); let mut tabs = self.input_chunk.tabs; let mut chars = self.input_chunk.chars; + let mut newlines = self.input_chunk.newlines; if self.masked { // Not great for multibyte text because to keep cursor math correct we @@ -2647,12 +2649,14 @@ impl<'a> Iterator for BlockChunks<'a> { prefix = unsafe { std::str::from_utf8_unchecked(&BULLETS[..bullet_len]) }; chars = 1u128.unbounded_shl(bullet_len as u32).wrapping_sub(1); tabs = 0; + newlines = 0; } let chunk = Chunk { text: prefix, tabs, chars, + newlines, ..self.input_chunk.clone() }; diff --git a/crates/editor/src/display_map/custom_highlights.rs b/crates/editor/src/display_map/custom_highlights.rs index 61a6d68713a8bff755f6458adced7d8b6dcddf4d..39eabef2f9627b8088dc826ec64379bf76a6c9fa 100644 --- a/crates/editor/src/display_map/custom_highlights.rs +++ b/crates/editor/src/display_map/custom_highlights.rs @@ -195,15 +195,18 @@ impl<'a> Iterator for CustomHighlightsChunks<'a> { let mask = 1u128.unbounded_shl(split_idx as u32).wrapping_sub(1); let chars = chunk.chars & mask; let tabs = chunk.tabs & mask; + let newlines = chunk.newlines & mask; let mut prefix = Chunk { text: prefix, chars, tabs, + newlines, ..chunk.clone() }; chunk.chars = chunk.chars.unbounded_shr(split_idx as u32); chunk.tabs = chunk.tabs.unbounded_shr(split_idx as u32); + chunk.newlines = chunk.newlines.unbounded_shr(split_idx as u32); chunk.text = suffix; if !self.active_highlights.is_empty() { prefix.highlight_style = self diff --git a/crates/editor/src/display_map/fold_map.rs b/crates/editor/src/display_map/fold_map.rs index 61447c37fbedf7eafab7bac360c36349e1b76d0b..efb7abad6a169546c0d13de29870f939ced93eaa 100644 --- a/crates/editor/src/display_map/fold_map.rs +++ b/crates/editor/src/display_map/fold_map.rs @@ -1400,6 +1400,8 @@ pub struct Chunk<'a> { pub tabs: u128, /// Bitmap of character locations in chunk pub chars: u128, + /// Bitmap of newline locations in chunk + pub newlines: u128, } #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] @@ -1564,6 +1566,7 @@ impl<'a> Iterator for FoldChunks<'a> { chunk.tabs = (chunk.tabs >> bit_start) & mask; chunk.chars = (chunk.chars >> bit_start) & mask; + chunk.newlines = (chunk.newlines >> bit_start) & mask; if chunk_end == transform_end { self.transform_cursor.next(); @@ -1577,6 +1580,7 @@ impl<'a> Iterator for FoldChunks<'a> { text: chunk.text, tabs: chunk.tabs, chars: chunk.chars, + newlines: chunk.newlines, syntax_highlight_id: chunk.syntax_highlight_id, highlight_style: chunk.highlight_style, diagnostic_severity: chunk.diagnostic_severity, diff --git a/crates/editor/src/display_map/inlay_map.rs b/crates/editor/src/display_map/inlay_map.rs index aa149901cba6d6c65989fe63b16d53b03f0c3e7e..8bc66123c41a1e7af4e2814bc79ea80c97219f14 100644 --- a/crates/editor/src/display_map/inlay_map.rs +++ b/crates/editor/src/display_map/inlay_map.rs @@ -296,9 +296,11 @@ impl<'a> Iterator for InlayChunks<'a> { let mask = 1u128.unbounded_shl(split_index as u32).wrapping_sub(1); let chars = chunk.chars & mask; let tabs = chunk.tabs & mask; + let newlines = chunk.newlines & mask; chunk.chars = chunk.chars.unbounded_shr(split_index as u32); chunk.tabs = chunk.tabs.unbounded_shr(split_index as u32); + chunk.newlines = chunk.newlines.unbounded_shr(split_index as u32); chunk.text = suffix; InlayChunk { @@ -306,6 +308,7 @@ impl<'a> Iterator for InlayChunks<'a> { text: prefix, chars, tabs, + newlines, ..chunk.clone() }, renderer: None, @@ -422,6 +425,7 @@ impl<'a> Iterator for InlayChunks<'a> { text: inlay_chunk, chars, tabs, + newlines, } = self .inlay_chunk .get_or_insert_with(|| inlay_chunks.next().unwrap()); @@ -446,9 +450,11 @@ impl<'a> Iterator for InlayChunks<'a> { let mask = 1u128.unbounded_shl(split_index as u32).wrapping_sub(1); let new_chars = *chars & mask; let new_tabs = *tabs & mask; + let new_newlines = *newlines & mask; *chars = chars.unbounded_shr(split_index as u32); *tabs = tabs.unbounded_shr(split_index as u32); + *newlines = newlines.unbounded_shr(split_index as u32); if inlay_chunk.is_empty() { self.inlay_chunk = None; @@ -461,6 +467,7 @@ impl<'a> Iterator for InlayChunks<'a> { text: chunk, chars: new_chars, tabs: new_tabs, + newlines: new_newlines, highlight_style, is_inlay: true, ..Chunk::default() diff --git a/crates/editor/src/display_map/tab_map.rs b/crates/editor/src/display_map/tab_map.rs index 11db0a4f12105c607a30e98e69157c6c2c2b32f0..45c52db37ee1e9e0e1e25b745a42a7338e2e714e 100644 --- a/crates/editor/src/display_map/tab_map.rs +++ b/crates/editor/src/display_map/tab_map.rs @@ -5,7 +5,7 @@ use super::{ use language::Point; use multi_buffer::MultiBufferSnapshot; -use std::{cmp, mem, num::NonZeroU32, ops::Range}; +use std::{cmp, num::NonZeroU32, ops::Range}; use sum_tree::Bias; const MAX_EXPANSION_COLUMN: u32 = 256; @@ -280,6 +280,7 @@ impl TabSnapshot { chunk: Chunk { text: unsafe { std::str::from_utf8_unchecked(&SPACES[..to_next_stop as usize]) }, is_tab: true, + chars: 1u128.unbounded_shl(to_next_stop) - 1, ..Default::default() }, inside_leading_tab: to_next_stop > 0, @@ -608,6 +609,9 @@ impl<'a> Iterator for TabChunks<'a> { self.chunk = chunk; if self.inside_leading_tab { self.chunk.text = &self.chunk.text[1..]; + self.chunk.tabs >>= 1; + self.chunk.chars >>= 1; + self.chunk.newlines >>= 1; self.inside_leading_tab = false; self.input_column += 1; } @@ -616,72 +620,92 @@ impl<'a> Iterator for TabChunks<'a> { } } - //todo(improve performance by using tab cursor) - for (ix, c) in self.chunk.text.char_indices() { - match c { - '\t' if ix > 0 => { - let (prefix, suffix) = self.chunk.text.split_at(ix); - - let mask = 1u128.unbounded_shl(ix as u32).wrapping_sub(1); - let chars = self.chunk.chars & mask; - let tabs = self.chunk.tabs & mask; - self.chunk.tabs = self.chunk.tabs.unbounded_shr(ix as u32); - self.chunk.chars = self.chunk.chars.unbounded_shr(ix as u32); - self.chunk.text = suffix; - return Some(Chunk { - text: prefix, - chars, - tabs, - ..self.chunk.clone() - }); - } - '\t' => { - self.chunk.text = &self.chunk.text[1..]; - self.chunk.tabs >>= 1; - self.chunk.chars >>= 1; - let tab_size = if self.input_column < self.max_expansion_column { - self.tab_size.get() - } else { - 1 - }; - let mut len = tab_size - self.column % tab_size; - let next_output_position = cmp::min( - self.output_position + Point::new(0, len), - self.max_output_position, - ); - len = next_output_position.column - self.output_position.column; - self.column += len; - self.input_column += 1; - self.output_position = next_output_position; - return Some(Chunk { - text: unsafe { std::str::from_utf8_unchecked(&SPACES[..len as usize]) }, - is_tab: true, - chars: 1u128.unbounded_shl(len) - 1, - tabs: 0, - ..self.chunk.clone() - }); - } - '\n' => { - self.column = 0; - self.input_column = 0; - self.output_position += Point::new(1, 0); - } - _ => { - self.column += 1; - if !self.inside_leading_tab { - self.input_column += c.len_utf8() as u32; - } - self.output_position.column += c.len_utf8() as u32; - } + let first_tab_ix = if self.chunk.tabs != 0 { + self.chunk.tabs.trailing_zeros() as usize + } else { + self.chunk.text.len() + }; + + if first_tab_ix == 0 { + self.chunk.text = &self.chunk.text[1..]; + self.chunk.tabs >>= 1; + self.chunk.chars >>= 1; + self.chunk.newlines >>= 1; + + let tab_size = if self.input_column < self.max_expansion_column { + self.tab_size.get() + } else { + 1 + }; + let mut len = tab_size - self.column % tab_size; + let next_output_position = cmp::min( + self.output_position + Point::new(0, len), + self.max_output_position, + ); + len = next_output_position.column - self.output_position.column; + self.column += len; + self.input_column += 1; + self.output_position = next_output_position; + + return Some(Chunk { + text: unsafe { std::str::from_utf8_unchecked(&SPACES[..len as usize]) }, + is_tab: true, + chars: 1u128.unbounded_shl(len) - 1, + tabs: 0, + newlines: 0, + ..self.chunk.clone() + }); + } + + let prefix_len = first_tab_ix; + let (prefix, suffix) = self.chunk.text.split_at(prefix_len); + + let mask = 1u128.unbounded_shl(prefix_len as u32).wrapping_sub(1); + let prefix_chars = self.chunk.chars & mask; + let prefix_tabs = self.chunk.tabs & mask; + let prefix_newlines = self.chunk.newlines & mask; + + self.chunk.text = suffix; + self.chunk.tabs = self.chunk.tabs.unbounded_shr(prefix_len as u32); + self.chunk.chars = self.chunk.chars.unbounded_shr(prefix_len as u32); + self.chunk.newlines = self.chunk.newlines.unbounded_shr(prefix_len as u32); + + let newline_count = prefix_newlines.count_ones(); + if newline_count > 0 { + let last_newline_bit = 128 - prefix_newlines.leading_zeros(); + let chars_after_last_newline = + prefix_chars.unbounded_shr(last_newline_bit).count_ones(); + let bytes_after_last_newline = prefix_len as u32 - last_newline_bit; + + self.column = chars_after_last_newline; + self.input_column = bytes_after_last_newline; + self.output_position = Point::new( + self.output_position.row + newline_count, + bytes_after_last_newline, + ); + } else { + let char_count = prefix_chars.count_ones(); + self.column += char_count; + if !self.inside_leading_tab { + self.input_column += prefix_len as u32; } + self.output_position.column += prefix_len as u32; } - Some(mem::take(&mut self.chunk)) + Some(Chunk { + text: prefix, + chars: prefix_chars, + tabs: prefix_tabs, + newlines: prefix_newlines, + ..self.chunk.clone() + }) } } #[cfg(test)] mod tests { + use std::mem; + use super::*; use crate::{ MultiBuffer, diff --git a/crates/editor/src/display_map/wrap_map.rs b/crates/editor/src/display_map/wrap_map.rs index 8c6e7df41ef954e626b5a74fab12376caa02b813..9ea2064ebed1e9bc630e971f494bdbfe92df0002 100644 --- a/crates/editor/src/display_map/wrap_map.rs +++ b/crates/editor/src/display_map/wrap_map.rs @@ -1079,14 +1079,17 @@ impl<'a> Iterator for WrapChunks<'a> { let mask = 1u128.unbounded_shl(input_len as u32).wrapping_sub(1); let chars = self.input_chunk.chars & mask; let tabs = self.input_chunk.tabs & mask; + let newlines = self.input_chunk.newlines & mask; self.input_chunk.tabs = self.input_chunk.tabs.unbounded_shr(input_len as u32); self.input_chunk.chars = self.input_chunk.chars.unbounded_shr(input_len as u32); + self.input_chunk.newlines = self.input_chunk.newlines.unbounded_shr(input_len as u32); self.input_chunk.text = suffix; Some(Chunk { text: prefix, chars, tabs, + newlines, ..self.input_chunk.clone() }) } diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 2721c1fc552ad8293dbe72c34b42159788948164..1449052983a49a539201360ec48dd37c04a4ccae 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -576,6 +576,8 @@ pub struct Chunk<'a> { pub tabs: u128, /// Bitmap of character indices in this chunk pub chars: u128, + /// Bitmap of newline indices in this chunk + pub newlines: u128, /// Whether this chunk of text is marked as unnecessary. pub is_unnecessary: bool, /// Whether this chunk of text was originally a tab character. @@ -5663,6 +5665,7 @@ impl<'a> Iterator for BufferChunks<'a> { text: chunk, chars: chars_map, tabs, + newlines, }) = self.chunks.peek_with_bitmaps() { let chunk_start = self.range.start; @@ -5684,6 +5687,7 @@ impl<'a> Iterator for BufferChunks<'a> { let mask = 1u128.unbounded_shl(bit_end as u32).wrapping_sub(1); let tabs = (tabs >> bit_start) & mask; let chars = (chars_map >> bit_start) & mask; + let newlines = (newlines >> bit_start) & mask; self.range.start = chunk_end; if self.range.start == self.chunks.offset() + chunk.len() { @@ -5698,6 +5702,7 @@ impl<'a> Iterator for BufferChunks<'a> { is_unnecessary: self.current_code_is_unnecessary(), tabs, chars, + newlines, ..Chunk::default() }) } else { diff --git a/crates/multi_buffer/src/multi_buffer.rs b/crates/multi_buffer/src/multi_buffer.rs index 321d0adaff3e81e1c50fb1113750203a063a0eec..274e6771e49a7677b804437c70fae384fbd03ce9 100644 --- a/crates/multi_buffer/src/multi_buffer.rs +++ b/crates/multi_buffer/src/multi_buffer.rs @@ -8371,15 +8371,18 @@ impl<'a> Iterator for MultiBufferChunks<'a> { let mask = 1u128.unbounded_shl(split_idx as u32).wrapping_sub(1); let chars = chunk.chars & mask; let tabs = chunk.tabs & mask; + let newlines = chunk.newlines & mask; chunk.text = after; chunk.chars = chunk.chars >> split_idx; chunk.tabs = chunk.tabs >> split_idx; + chunk.newlines = chunk.newlines >> split_idx; Some(Chunk { text: before, chars, tabs, + newlines, ..chunk.clone() }) } else { @@ -8424,6 +8427,7 @@ impl<'a> Iterator for MultiBufferChunks<'a> { Chunk { text: "\n", chars: 1u128, + newlines: 1u128, ..Default::default() } }; @@ -8521,10 +8525,12 @@ impl<'a> Iterator for ExcerptChunks<'a> { if self.has_footer { let text = "\n"; let chars = 0b1; + let newlines = 0b1; self.has_footer = false; return Some(Chunk { text, chars, + newlines, ..Default::default() }); } diff --git a/crates/rope/src/chunk.rs b/crates/rope/src/chunk.rs index 1f94465282c1de01f5604a1f435831238afe64bc..e5a3ed045a7e44e2208941e908718bdf7ee5b00a 100644 --- a/crates/rope/src/chunk.rs +++ b/crates/rope/src/chunk.rs @@ -137,10 +137,16 @@ impl Chunk { self.chars } + #[inline(always)] pub fn tabs(&self) -> Bitmap { self.tabs } + #[inline(always)] + pub fn newlines(&self) -> Bitmap { + self.newlines + } + #[inline(always)] pub fn is_char_boundary(&self, offset: usize) -> bool { (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len() diff --git a/crates/rope/src/rope.rs b/crates/rope/src/rope.rs index fba7b96aca83fa05c0d6f3e7992ad7443ec7958a..7ab273be7bfa3fa84a608c69174cfcc6a038eac5 100644 --- a/crates/rope/src/rope.rs +++ b/crates/rope/src/rope.rs @@ -704,6 +704,8 @@ pub struct ChunkBitmaps<'a> { pub chars: Bitmap, /// Bitmap of tab locations in text. LSB ordered pub tabs: Bitmap, + /// Bitmap of newlines location in text. LSB ordered + pub newlines: Bitmap, } #[derive(Clone)] @@ -916,11 +918,13 @@ impl<'a> Chunks<'a> { // Shift the tabs to align with our slice window let shifted_tabs = chunk.tabs() >> chunk_start_offset; let shifted_chars = chunk.chars() >> chunk_start_offset; + let shifted_newlines = chunk.newlines() >> chunk_start_offset; Some(ChunkBitmaps { text: slice_text, chars: shifted_chars, tabs: shifted_tabs, + newlines: shifted_newlines, }) }