editor: Optimize TabMap chunks generation (#48378)

Marco Mihai Condrache created 1 month ago

Currently, the next chunk is generated by traversing all characters of
the current folded chunk until a tab is found. Since we already have
bitmasks for characters and tabs, we can also propagate the newlines
bitmap from the rope and use it to make this computation O(1) in all
cases.

I haven’t run benchmarks yet.

Release Notes:

- N/A

Change summary

crates/editor/src/display_map/block_map.rs         |   4 
crates/editor/src/display_map/custom_highlights.rs |   3 
crates/editor/src/display_map/fold_map.rs          |   4 
crates/editor/src/display_map/inlay_map.rs         |   7 
crates/editor/src/display_map/tab_map.rs           | 142 +++++++++------
crates/editor/src/display_map/wrap_map.rs          |   3 
crates/language/src/buffer.rs                      |   5 
crates/multi_buffer/src/multi_buffer.rs            |   6 
crates/rope/src/chunk.rs                           |   6 
crates/rope/src/rope.rs                            |   4 
10 files changed, 125 insertions(+), 59 deletions(-)

Detailed changes

crates/editor/src/display_map/block_map.rs 🔗

@@ -2635,9 +2635,11 @@ impl<'a> Iterator for BlockChunks<'a> {
         self.input_chunk.text = suffix;
         self.input_chunk.tabs >>= prefix_bytes.saturating_sub(1);
         self.input_chunk.chars >>= prefix_bytes.saturating_sub(1);
+        self.input_chunk.newlines >>= prefix_bytes.saturating_sub(1);
 
         let mut tabs = self.input_chunk.tabs;
         let mut chars = self.input_chunk.chars;
+        let mut newlines = self.input_chunk.newlines;
 
         if self.masked {
             // Not great for multibyte text because to keep cursor math correct we
@@ -2647,12 +2649,14 @@ impl<'a> Iterator for BlockChunks<'a> {
             prefix = unsafe { std::str::from_utf8_unchecked(&BULLETS[..bullet_len]) };
             chars = 1u128.unbounded_shl(bullet_len as u32).wrapping_sub(1);
             tabs = 0;
+            newlines = 0;
         }
 
         let chunk = Chunk {
             text: prefix,
             tabs,
             chars,
+            newlines,
             ..self.input_chunk.clone()
         };

crates/editor/src/display_map/custom_highlights.rs 🔗

@@ -195,15 +195,18 @@ impl<'a> Iterator for CustomHighlightsChunks<'a> {
         let mask = 1u128.unbounded_shl(split_idx as u32).wrapping_sub(1);
         let chars = chunk.chars & mask;
         let tabs = chunk.tabs & mask;
+        let newlines = chunk.newlines & mask;
         let mut prefix = Chunk {
             text: prefix,
             chars,
             tabs,
+            newlines,
             ..chunk.clone()
         };
 
         chunk.chars = chunk.chars.unbounded_shr(split_idx as u32);
         chunk.tabs = chunk.tabs.unbounded_shr(split_idx as u32);
+        chunk.newlines = chunk.newlines.unbounded_shr(split_idx as u32);
         chunk.text = suffix;
         if !self.active_highlights.is_empty() {
             prefix.highlight_style = self

crates/editor/src/display_map/fold_map.rs 🔗

@@ -1400,6 +1400,8 @@ pub struct Chunk<'a> {
     pub tabs: u128,
     /// Bitmap of character locations in chunk
     pub chars: u128,
+    /// Bitmap of newline locations in chunk
+    pub newlines: u128,
 }
 
 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
@@ -1564,6 +1566,7 @@ impl<'a> Iterator for FoldChunks<'a> {
 
             chunk.tabs = (chunk.tabs >> bit_start) & mask;
             chunk.chars = (chunk.chars >> bit_start) & mask;
+            chunk.newlines = (chunk.newlines >> bit_start) & mask;
 
             if chunk_end == transform_end {
                 self.transform_cursor.next();
@@ -1577,6 +1580,7 @@ impl<'a> Iterator for FoldChunks<'a> {
                 text: chunk.text,
                 tabs: chunk.tabs,
                 chars: chunk.chars,
+                newlines: chunk.newlines,
                 syntax_highlight_id: chunk.syntax_highlight_id,
                 highlight_style: chunk.highlight_style,
                 diagnostic_severity: chunk.diagnostic_severity,

crates/editor/src/display_map/inlay_map.rs 🔗

@@ -296,9 +296,11 @@ impl<'a> Iterator for InlayChunks<'a> {
                 let mask = 1u128.unbounded_shl(split_index as u32).wrapping_sub(1);
                 let chars = chunk.chars & mask;
                 let tabs = chunk.tabs & mask;
+                let newlines = chunk.newlines & mask;
 
                 chunk.chars = chunk.chars.unbounded_shr(split_index as u32);
                 chunk.tabs = chunk.tabs.unbounded_shr(split_index as u32);
+                chunk.newlines = chunk.newlines.unbounded_shr(split_index as u32);
                 chunk.text = suffix;
 
                 InlayChunk {
@@ -306,6 +308,7 @@ impl<'a> Iterator for InlayChunks<'a> {
                         text: prefix,
                         chars,
                         tabs,
+                        newlines,
                         ..chunk.clone()
                     },
                     renderer: None,
@@ -422,6 +425,7 @@ impl<'a> Iterator for InlayChunks<'a> {
                     text: inlay_chunk,
                     chars,
                     tabs,
+                    newlines,
                 } = self
                     .inlay_chunk
                     .get_or_insert_with(|| inlay_chunks.next().unwrap());
@@ -446,9 +450,11 @@ impl<'a> Iterator for InlayChunks<'a> {
                 let mask = 1u128.unbounded_shl(split_index as u32).wrapping_sub(1);
                 let new_chars = *chars & mask;
                 let new_tabs = *tabs & mask;
+                let new_newlines = *newlines & mask;
 
                 *chars = chars.unbounded_shr(split_index as u32);
                 *tabs = tabs.unbounded_shr(split_index as u32);
+                *newlines = newlines.unbounded_shr(split_index as u32);
 
                 if inlay_chunk.is_empty() {
                     self.inlay_chunk = None;
@@ -461,6 +467,7 @@ impl<'a> Iterator for InlayChunks<'a> {
                         text: chunk,
                         chars: new_chars,
                         tabs: new_tabs,
+                        newlines: new_newlines,
                         highlight_style,
                         is_inlay: true,
                         ..Chunk::default()

crates/editor/src/display_map/tab_map.rs 🔗

@@ -5,7 +5,7 @@ use super::{
 
 use language::Point;
 use multi_buffer::MultiBufferSnapshot;
-use std::{cmp, mem, num::NonZeroU32, ops::Range};
+use std::{cmp, num::NonZeroU32, ops::Range};
 use sum_tree::Bias;
 
 const MAX_EXPANSION_COLUMN: u32 = 256;
@@ -280,6 +280,7 @@ impl TabSnapshot {
             chunk: Chunk {
                 text: unsafe { std::str::from_utf8_unchecked(&SPACES[..to_next_stop as usize]) },
                 is_tab: true,
+                chars: 1u128.unbounded_shl(to_next_stop) - 1,
                 ..Default::default()
             },
             inside_leading_tab: to_next_stop > 0,
@@ -608,6 +609,9 @@ impl<'a> Iterator for TabChunks<'a> {
                 self.chunk = chunk;
                 if self.inside_leading_tab {
                     self.chunk.text = &self.chunk.text[1..];
+                    self.chunk.tabs >>= 1;
+                    self.chunk.chars >>= 1;
+                    self.chunk.newlines >>= 1;
                     self.inside_leading_tab = false;
                     self.input_column += 1;
                 }
@@ -616,72 +620,92 @@ impl<'a> Iterator for TabChunks<'a> {
             }
         }
 
-        //todo(improve performance by using tab cursor)
-        for (ix, c) in self.chunk.text.char_indices() {
-            match c {
-                '\t' if ix > 0 => {
-                    let (prefix, suffix) = self.chunk.text.split_at(ix);
-
-                    let mask = 1u128.unbounded_shl(ix as u32).wrapping_sub(1);
-                    let chars = self.chunk.chars & mask;
-                    let tabs = self.chunk.tabs & mask;
-                    self.chunk.tabs = self.chunk.tabs.unbounded_shr(ix as u32);
-                    self.chunk.chars = self.chunk.chars.unbounded_shr(ix as u32);
-                    self.chunk.text = suffix;
-                    return Some(Chunk {
-                        text: prefix,
-                        chars,
-                        tabs,
-                        ..self.chunk.clone()
-                    });
-                }
-                '\t' => {
-                    self.chunk.text = &self.chunk.text[1..];
-                    self.chunk.tabs >>= 1;
-                    self.chunk.chars >>= 1;
-                    let tab_size = if self.input_column < self.max_expansion_column {
-                        self.tab_size.get()
-                    } else {
-                        1
-                    };
-                    let mut len = tab_size - self.column % tab_size;
-                    let next_output_position = cmp::min(
-                        self.output_position + Point::new(0, len),
-                        self.max_output_position,
-                    );
-                    len = next_output_position.column - self.output_position.column;
-                    self.column += len;
-                    self.input_column += 1;
-                    self.output_position = next_output_position;
-                    return Some(Chunk {
-                        text: unsafe { std::str::from_utf8_unchecked(&SPACES[..len as usize]) },
-                        is_tab: true,
-                        chars: 1u128.unbounded_shl(len) - 1,
-                        tabs: 0,
-                        ..self.chunk.clone()
-                    });
-                }
-                '\n' => {
-                    self.column = 0;
-                    self.input_column = 0;
-                    self.output_position += Point::new(1, 0);
-                }
-                _ => {
-                    self.column += 1;
-                    if !self.inside_leading_tab {
-                        self.input_column += c.len_utf8() as u32;
-                    }
-                    self.output_position.column += c.len_utf8() as u32;
-                }
+        let first_tab_ix = if self.chunk.tabs != 0 {
+            self.chunk.tabs.trailing_zeros() as usize
+        } else {
+            self.chunk.text.len()
+        };
+
+        if first_tab_ix == 0 {
+            self.chunk.text = &self.chunk.text[1..];
+            self.chunk.tabs >>= 1;
+            self.chunk.chars >>= 1;
+            self.chunk.newlines >>= 1;
+
+            let tab_size = if self.input_column < self.max_expansion_column {
+                self.tab_size.get()
+            } else {
+                1
+            };
+            let mut len = tab_size - self.column % tab_size;
+            let next_output_position = cmp::min(
+                self.output_position + Point::new(0, len),
+                self.max_output_position,
+            );
+            len = next_output_position.column - self.output_position.column;
+            self.column += len;
+            self.input_column += 1;
+            self.output_position = next_output_position;
+
+            return Some(Chunk {
+                text: unsafe { std::str::from_utf8_unchecked(&SPACES[..len as usize]) },
+                is_tab: true,
+                chars: 1u128.unbounded_shl(len) - 1,
+                tabs: 0,
+                newlines: 0,
+                ..self.chunk.clone()
+            });
+        }
+
+        let prefix_len = first_tab_ix;
+        let (prefix, suffix) = self.chunk.text.split_at(prefix_len);
+
+        let mask = 1u128.unbounded_shl(prefix_len as u32).wrapping_sub(1);
+        let prefix_chars = self.chunk.chars & mask;
+        let prefix_tabs = self.chunk.tabs & mask;
+        let prefix_newlines = self.chunk.newlines & mask;
+
+        self.chunk.text = suffix;
+        self.chunk.tabs = self.chunk.tabs.unbounded_shr(prefix_len as u32);
+        self.chunk.chars = self.chunk.chars.unbounded_shr(prefix_len as u32);
+        self.chunk.newlines = self.chunk.newlines.unbounded_shr(prefix_len as u32);
+
+        let newline_count = prefix_newlines.count_ones();
+        if newline_count > 0 {
+            let last_newline_bit = 128 - prefix_newlines.leading_zeros();
+            let chars_after_last_newline =
+                prefix_chars.unbounded_shr(last_newline_bit).count_ones();
+            let bytes_after_last_newline = prefix_len as u32 - last_newline_bit;
+
+            self.column = chars_after_last_newline;
+            self.input_column = bytes_after_last_newline;
+            self.output_position = Point::new(
+                self.output_position.row + newline_count,
+                bytes_after_last_newline,
+            );
+        } else {
+            let char_count = prefix_chars.count_ones();
+            self.column += char_count;
+            if !self.inside_leading_tab {
+                self.input_column += prefix_len as u32;
             }
+            self.output_position.column += prefix_len as u32;
         }
 
-        Some(mem::take(&mut self.chunk))
+        Some(Chunk {
+            text: prefix,
+            chars: prefix_chars,
+            tabs: prefix_tabs,
+            newlines: prefix_newlines,
+            ..self.chunk.clone()
+        })
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use std::mem;
+
     use super::*;
     use crate::{
         MultiBuffer,

crates/editor/src/display_map/wrap_map.rs 🔗

@@ -1079,14 +1079,17 @@ impl<'a> Iterator for WrapChunks<'a> {
         let mask = 1u128.unbounded_shl(input_len as u32).wrapping_sub(1);
         let chars = self.input_chunk.chars & mask;
         let tabs = self.input_chunk.tabs & mask;
+        let newlines = self.input_chunk.newlines & mask;
         self.input_chunk.tabs = self.input_chunk.tabs.unbounded_shr(input_len as u32);
         self.input_chunk.chars = self.input_chunk.chars.unbounded_shr(input_len as u32);
+        self.input_chunk.newlines = self.input_chunk.newlines.unbounded_shr(input_len as u32);
 
         self.input_chunk.text = suffix;
         Some(Chunk {
             text: prefix,
             chars,
             tabs,
+            newlines,
             ..self.input_chunk.clone()
         })
     }

crates/language/src/buffer.rs 🔗

@@ -576,6 +576,8 @@ pub struct Chunk<'a> {
     pub tabs: u128,
     /// Bitmap of character indices in this chunk
     pub chars: u128,
+    /// Bitmap of newline indices in this chunk
+    pub newlines: u128,
     /// Whether this chunk of text is marked as unnecessary.
     pub is_unnecessary: bool,
     /// Whether this chunk of text was originally a tab character.
@@ -5663,6 +5665,7 @@ impl<'a> Iterator for BufferChunks<'a> {
             text: chunk,
             chars: chars_map,
             tabs,
+            newlines,
         }) = self.chunks.peek_with_bitmaps()
         {
             let chunk_start = self.range.start;
@@ -5684,6 +5687,7 @@ impl<'a> Iterator for BufferChunks<'a> {
             let mask = 1u128.unbounded_shl(bit_end as u32).wrapping_sub(1);
             let tabs = (tabs >> bit_start) & mask;
             let chars = (chars_map >> bit_start) & mask;
+            let newlines = (newlines >> bit_start) & mask;
 
             self.range.start = chunk_end;
             if self.range.start == self.chunks.offset() + chunk.len() {
@@ -5698,6 +5702,7 @@ impl<'a> Iterator for BufferChunks<'a> {
                 is_unnecessary: self.current_code_is_unnecessary(),
                 tabs,
                 chars,
+                newlines,
                 ..Chunk::default()
             })
         } else {

crates/multi_buffer/src/multi_buffer.rs 🔗

@@ -8371,15 +8371,18 @@ impl<'a> Iterator for MultiBufferChunks<'a> {
                     let mask = 1u128.unbounded_shl(split_idx as u32).wrapping_sub(1);
                     let chars = chunk.chars & mask;
                     let tabs = chunk.tabs & mask;
+                    let newlines = chunk.newlines & mask;
 
                     chunk.text = after;
                     chunk.chars = chunk.chars >> split_idx;
                     chunk.tabs = chunk.tabs >> split_idx;
+                    chunk.newlines = chunk.newlines >> split_idx;
 
                     Some(Chunk {
                         text: before,
                         chars,
                         tabs,
+                        newlines,
                         ..chunk.clone()
                     })
                 } else {
@@ -8424,6 +8427,7 @@ impl<'a> Iterator for MultiBufferChunks<'a> {
                     Chunk {
                         text: "\n",
                         chars: 1u128,
+                        newlines: 1u128,
                         ..Default::default()
                     }
                 };
@@ -8521,10 +8525,12 @@ impl<'a> Iterator for ExcerptChunks<'a> {
         if self.has_footer {
             let text = "\n";
             let chars = 0b1;
+            let newlines = 0b1;
             self.has_footer = false;
             return Some(Chunk {
                 text,
                 chars,
+                newlines,
                 ..Default::default()
             });
         }

crates/rope/src/chunk.rs 🔗

@@ -137,10 +137,16 @@ impl Chunk {
         self.chars
     }
 
+    #[inline(always)]
     pub fn tabs(&self) -> Bitmap {
         self.tabs
     }
 
+    #[inline(always)]
+    pub fn newlines(&self) -> Bitmap {
+        self.newlines
+    }
+
     #[inline(always)]
     pub fn is_char_boundary(&self, offset: usize) -> bool {
         (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()

crates/rope/src/rope.rs 🔗

@@ -704,6 +704,8 @@ pub struct ChunkBitmaps<'a> {
     pub chars: Bitmap,
     /// Bitmap of tab locations in text. LSB ordered
     pub tabs: Bitmap,
+    /// Bitmap of newlines location in text. LSB ordered
+    pub newlines: Bitmap,
 }
 
 #[derive(Clone)]
@@ -916,11 +918,13 @@ impl<'a> Chunks<'a> {
         // Shift the tabs to align with our slice window
         let shifted_tabs = chunk.tabs() >> chunk_start_offset;
         let shifted_chars = chunk.chars() >> chunk_start_offset;
+        let shifted_newlines = chunk.newlines() >> chunk_start_offset;
 
         Some(ChunkBitmaps {
             text: slice_text,
             chars: shifted_chars,
             tabs: shifted_tabs,
+            newlines: shifted_newlines,
         })
     }