editor: Use unbounded shifts for chunk bitmaps (#40879)

Lukas Wirth created

This simplifies some code and is also more correct in some others (I
believe some of these might've overflowed causing panics in sentry)

Release Notes:

- N/A *or* Added/Fixed/Improved ...

Change summary

Cargo.lock                                         |   1 
crates/editor/Cargo.toml                           |   1 
crates/editor/src/display_map/block_map.rs         |   8 
crates/editor/src/display_map/custom_highlights.rs |  32 +--
crates/editor/src/display_map/fold_map.rs          |   9 
crates/editor/src/display_map/inlay_map.rs         |  43 ++---
crates/editor/src/display_map/tab_map.rs           |  89 +++++-------
crates/editor/src/display_map/wrap_map.rs          |  17 -
crates/language/src/buffer.rs                      |  28 +--
crates/multi_buffer/src/multi_buffer.rs            |   8 
crates/rope/src/chunk.rs                           | 111 ++++++++-------
crates/rope/src/rope.rs                            |  47 +-----
12 files changed, 168 insertions(+), 226 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -5354,6 +5354,7 @@ dependencies = [
  "rand 0.9.2",
  "regex",
  "release_channel",
+ "rope",
  "rpc",
  "schemars 1.0.4",
  "serde",

crates/editor/Cargo.toml 🔗

@@ -64,6 +64,7 @@ project.workspace = true
 rand.workspace = true
 regex.workspace = true
 rpc.workspace = true
+rope.workspace = true
 schemars.workspace = true
 serde.workspace = true
 serde_json.workspace = true

crates/editor/src/display_map/block_map.rs 🔗

@@ -26,8 +26,8 @@ use sum_tree::{Bias, ContextLessSummary, Dimensions, SumTree, TreeMap};
 use text::{BufferId, Edit};
 use ui::ElementId;
 
-const NEWLINES: &[u8; u128::BITS as usize] = &[b'\n'; _];
-const BULLETS: &[u8; u128::BITS as usize] = &[b'*'; _];
+const NEWLINES: &[u8; rope::Chunk::MASK_BITS] = &[b'\n'; _];
+const BULLETS: &[u8; rope::Chunk::MASK_BITS] = &[b'*'; _];
 
 /// Tracks custom blocks such as diagnostics that should be displayed within buffer.
 ///
@@ -1783,11 +1783,11 @@ impl<'a> Iterator for BlockChunks<'a> {
 
         if self.masked {
             // Not great for multibyte text because to keep cursor math correct we
-            // need to have the same number of bytes in the input as output.
+            // need to have the same number of chars in the input as output.
             let chars_count = prefix.chars().count();
             let bullet_len = chars_count;
             prefix = unsafe { std::str::from_utf8_unchecked(&BULLETS[..bullet_len]) };
-            chars = 1u128.unbounded_shl(bullet_len as u32) - 1;
+            chars = 1u128.unbounded_shl(bullet_len as u32).wrapping_sub(1);
             tabs = 0;
         }
 

crates/editor/src/display_map/custom_highlights.rs 🔗

@@ -132,37 +132,31 @@ impl<'a> Iterator for CustomHighlightsChunks<'a> {
             }
         }
 
-        let chunk = self
-            .buffer_chunk
-            .get_or_insert_with(|| self.buffer_chunks.next().unwrap_or_default());
-        if chunk.text.is_empty() {
+        let chunk = match &mut self.buffer_chunk {
+            Some(it) => it,
+            slot => slot.insert(self.buffer_chunks.next()?),
+        };
+        while chunk.text.is_empty() {
             *chunk = self.buffer_chunks.next()?;
         }
 
         let split_idx = chunk.text.len().min(next_highlight_endpoint - self.offset);
         let (prefix, suffix) = chunk.text.split_at(split_idx);
-
-        let (chars, tabs) = if split_idx == 128 {
-            let output = (chunk.chars, chunk.tabs);
-            chunk.chars = 0;
-            chunk.tabs = 0;
-            output
-        } else {
-            let mask = (1 << split_idx) - 1;
-            let output = (chunk.chars & mask, chunk.tabs & mask);
-            chunk.chars = chunk.chars >> split_idx;
-            chunk.tabs = chunk.tabs >> split_idx;
-            output
-        };
-
-        chunk.text = suffix;
         self.offset += prefix.len();
+
+        let mask = 1u128.unbounded_shl(split_idx as u32).wrapping_sub(1);
+        let chars = chunk.chars & mask;
+        let tabs = chunk.tabs & mask;
         let mut prefix = Chunk {
             text: prefix,
             chars,
             tabs,
             ..chunk.clone()
         };
+
+        chunk.chars = chunk.chars.unbounded_shr(split_idx as u32);
+        chunk.tabs = chunk.tabs.unbounded_shr(split_idx as u32);
+        chunk.text = suffix;
         if !self.active_highlights.is_empty() {
             prefix.highlight_style = self
                 .active_highlights

crates/editor/src/display_map/fold_map.rs 🔗

@@ -1436,14 +1436,15 @@ impl<'a> Iterator for FoldChunks<'a> {
             let transform_end = self.transform_cursor.end().1;
             let chunk_end = buffer_chunk_end.min(transform_end);
 
-            chunk.text = &chunk.text
-                [(self.inlay_offset - buffer_chunk_start).0..(chunk_end - buffer_chunk_start).0];
+            let bit_start = (self.inlay_offset - buffer_chunk_start).0;
+            let bit_end = (chunk_end - buffer_chunk_start).0;
+            chunk.text = &chunk.text[bit_start..bit_end];
 
             let bit_end = (chunk_end - buffer_chunk_start).0;
             let mask = 1u128.unbounded_shl(bit_end as u32).wrapping_sub(1);
 
-            chunk.tabs = (chunk.tabs >> (self.inlay_offset - buffer_chunk_start).0) & mask;
-            chunk.chars = (chunk.chars >> (self.inlay_offset - buffer_chunk_start).0) & mask;
+            chunk.tabs = (chunk.tabs >> bit_start) & mask;
+            chunk.chars = (chunk.chars >> bit_start) & mask;
 
             if chunk_end == transform_end {
                 self.transform_cursor.next();

crates/editor/src/display_map/inlay_map.rs 🔗

@@ -325,21 +325,16 @@ impl<'a> Iterator for InlayChunks<'a> {
                 };
 
                 let (prefix, suffix) = chunk.text.split_at(split_index);
+                self.output_offset.0 += prefix.len();
 
-                let (chars, tabs) = if split_index == 128 {
-                    let output = (chunk.chars, chunk.tabs);
-                    chunk.chars = 0;
-                    chunk.tabs = 0;
-                    output
-                } else {
-                    let mask = (1 << split_index) - 1;
-                    let output = (chunk.chars & mask, chunk.tabs & mask);
-                    chunk.chars = chunk.chars >> split_index;
-                    chunk.tabs = chunk.tabs >> split_index;
-                    output
-                };
+                let mask = 1u128.unbounded_shl(split_index as u32).wrapping_sub(1);
+                let chars = chunk.chars & mask;
+                let tabs = chunk.tabs & mask;
+
+                chunk.chars = chunk.chars.unbounded_shr(split_index as u32);
+                chunk.tabs = chunk.tabs.unbounded_shr(split_index as u32);
                 chunk.text = suffix;
-                self.output_offset.0 += prefix.len();
+
                 InlayChunk {
                     chunk: Chunk {
                         text: prefix,
@@ -457,18 +452,12 @@ impl<'a> Iterator for InlayChunks<'a> {
                 let (chunk, remainder) = inlay_chunk.split_at(split_index);
                 *inlay_chunk = remainder;
 
-                let (chars, tabs) = if split_index == 128 {
-                    let output = (*chars, *tabs);
-                    *chars = 0;
-                    *tabs = 0;
-                    output
-                } else {
-                    let mask = (1 << split_index as u32) - 1;
-                    let output = (*chars & mask, *tabs & mask);
-                    *chars = *chars >> split_index;
-                    *tabs = *tabs >> split_index;
-                    output
-                };
+                let mask = 1u128.unbounded_shl(split_index as u32).wrapping_sub(1);
+                let new_chars = *chars & mask;
+                let new_tabs = *tabs & mask;
+
+                *chars = chars.unbounded_shr(split_index as u32);
+                *tabs = tabs.unbounded_shr(split_index as u32);
 
                 if inlay_chunk.is_empty() {
                     self.inlay_chunk = None;
@@ -479,8 +468,8 @@ impl<'a> Iterator for InlayChunks<'a> {
                 InlayChunk {
                     chunk: Chunk {
                         text: chunk,
-                        chars,
-                        tabs,
+                        chars: new_chars,
+                        tabs: new_tabs,
                         highlight_style,
                         is_inlay: true,
                         ..Chunk::default()

crates/editor/src/display_map/tab_map.rs 🔗

@@ -11,7 +11,7 @@ use sum_tree::Bias;
 const MAX_EXPANSION_COLUMN: u32 = 256;
 
 // Handles a tab width <= 128
-const SPACES: &[u8; u128::BITS as usize] = &[b' '; _];
+const SPACES: &[u8; rope::Chunk::MASK_BITS] = &[b' '; _];
 const MAX_TABS: NonZeroU32 = NonZeroU32::new(SPACES.len() as u32).unwrap();
 
 /// Keeps track of hard tabs in a text buffer.
@@ -569,56 +569,47 @@ impl<'a> Iterator for TabChunks<'a> {
         //todo(improve performance by using tab cursor)
         for (ix, c) in self.chunk.text.char_indices() {
             match c {
+                '\t' if ix > 0 => {
+                    let (prefix, suffix) = self.chunk.text.split_at(ix);
+
+                    let mask = 1u128.unbounded_shl(ix as u32).wrapping_sub(1);
+                    let chars = self.chunk.chars & mask;
+                    let tabs = self.chunk.tabs & mask;
+                    self.chunk.tabs = self.chunk.tabs.unbounded_shr(ix as u32);
+                    self.chunk.chars = self.chunk.chars.unbounded_shr(ix as u32);
+                    self.chunk.text = suffix;
+                    return Some(Chunk {
+                        text: prefix,
+                        chars,
+                        tabs,
+                        ..self.chunk.clone()
+                    });
+                }
                 '\t' => {
-                    if ix > 0 {
-                        let (prefix, suffix) = self.chunk.text.split_at(ix);
-
-                        let (chars, tabs) = if ix == 128 {
-                            let output = (self.chunk.chars, self.chunk.tabs);
-                            self.chunk.chars = 0;
-                            self.chunk.tabs = 0;
-                            output
-                        } else {
-                            let mask = (1 << ix) - 1;
-                            let output = (self.chunk.chars & mask, self.chunk.tabs & mask);
-                            self.chunk.chars = self.chunk.chars >> ix;
-                            self.chunk.tabs = self.chunk.tabs >> ix;
-                            output
-                        };
-
-                        self.chunk.text = suffix;
-                        return Some(Chunk {
-                            text: prefix,
-                            chars,
-                            tabs,
-                            ..self.chunk.clone()
-                        });
+                    self.chunk.text = &self.chunk.text[1..];
+                    self.chunk.tabs >>= 1;
+                    self.chunk.chars >>= 1;
+                    let tab_size = if self.input_column < self.max_expansion_column {
+                        self.tab_size.get()
                     } else {
-                        self.chunk.text = &self.chunk.text[1..];
-                        self.chunk.tabs >>= 1;
-                        self.chunk.chars >>= 1;
-                        let tab_size = if self.input_column < self.max_expansion_column {
-                            self.tab_size.get()
-                        } else {
-                            1
-                        };
-                        let mut len = tab_size - self.column % tab_size;
-                        let next_output_position = cmp::min(
-                            self.output_position + Point::new(0, len),
-                            self.max_output_position,
-                        );
-                        len = next_output_position.column - self.output_position.column;
-                        self.column += len;
-                        self.input_column += 1;
-                        self.output_position = next_output_position;
-                        return Some(Chunk {
-                            text: unsafe { std::str::from_utf8_unchecked(&SPACES[..len as usize]) },
-                            is_tab: true,
-                            chars: 1u128.unbounded_shl(len) - 1,
-                            tabs: 0,
-                            ..self.chunk.clone()
-                        });
-                    }
+                        1
+                    };
+                    let mut len = tab_size - self.column % tab_size;
+                    let next_output_position = cmp::min(
+                        self.output_position + Point::new(0, len),
+                        self.max_output_position,
+                    );
+                    len = next_output_position.column - self.output_position.column;
+                    self.column += len;
+                    self.input_column += 1;
+                    self.output_position = next_output_position;
+                    return Some(Chunk {
+                        text: unsafe { std::str::from_utf8_unchecked(&SPACES[..len as usize]) },
+                        is_tab: true,
+                        chars: 1u128.unbounded_shl(len) - 1,
+                        tabs: 0,
+                        ..self.chunk.clone()
+                    });
                 }
                 '\n' => {
                     self.column = 0;

crates/editor/src/display_map/wrap_map.rs 🔗

@@ -972,18 +972,11 @@ impl<'a> Iterator for WrapChunks<'a> {
 
         let (prefix, suffix) = self.input_chunk.text.split_at(input_len);
 
-        let (chars, tabs) = if input_len == 128 {
-            let output = (self.input_chunk.chars, self.input_chunk.tabs);
-            self.input_chunk.chars = 0;
-            self.input_chunk.tabs = 0;
-            output
-        } else {
-            let mask = (1 << input_len) - 1;
-            let output = (self.input_chunk.chars & mask, self.input_chunk.tabs & mask);
-            self.input_chunk.chars = self.input_chunk.chars >> input_len;
-            self.input_chunk.tabs = self.input_chunk.tabs >> input_len;
-            output
-        };
+        let mask = 1u128.unbounded_shl(input_len as u32).wrapping_sub(1);
+        let chars = self.input_chunk.chars & mask;
+        let tabs = self.input_chunk.tabs & mask;
+        self.input_chunk.tabs = self.input_chunk.tabs.unbounded_shr(input_len as u32);
+        self.input_chunk.chars = self.input_chunk.chars.unbounded_shr(input_len as u32);
 
         self.input_chunk.text = suffix;
         Some(Chunk {

crates/language/src/buffer.rs 🔗

@@ -506,15 +506,15 @@ pub struct Chunk<'a> {
     pub highlight_style: Option<HighlightStyle>,
     /// The severity of diagnostic associated with this chunk, if any.
     pub diagnostic_severity: Option<DiagnosticSeverity>,
-    /// Whether this chunk of text is marked as unnecessary.
-    pub is_unnecessary: bool,
-    /// Whether this chunk of text was originally a tab character.
-    pub is_tab: bool,
     /// A bitset of which characters are tabs in this string.
     pub tabs: u128,
     /// Bitmap of character indices in this chunk
     pub chars: u128,
+    /// Whether this chunk of text is marked as unnecessary.
+    pub is_unnecessary: bool,
     /// Whether this chunk of text was originally a tab character.
+    pub is_tab: bool,
+    /// Whether this chunk of text was originally an inlay.
     pub is_inlay: bool,
     /// Whether to underline the corresponding text range in the editor.
     pub underline: bool,
@@ -4982,7 +4982,7 @@ impl<'a> Iterator for BufferChunks<'a> {
             text: chunk,
             chars: chars_map,
             tabs,
-        }) = self.chunks.peek_tabs()
+        }) = self.chunks.peek_with_bitmaps()
         {
             let chunk_start = self.range.start;
             let mut chunk_end = (self.chunks.offset() + chunk.len())
@@ -4995,18 +4995,14 @@ impl<'a> Iterator for BufferChunks<'a> {
                 chunk_end = chunk_end.min(*parent_capture_end);
                 highlight_id = Some(*parent_highlight_id);
             }
-
-            let slice =
-                &chunk[chunk_start - self.chunks.offset()..chunk_end - self.chunks.offset()];
+            let bit_start = chunk_start - self.chunks.offset();
             let bit_end = chunk_end - self.chunks.offset();
 
-            let mask = if bit_end >= 128 {
-                u128::MAX
-            } else {
-                (1u128 << bit_end) - 1
-            };
-            let tabs = (tabs >> (chunk_start - self.chunks.offset())) & mask;
-            let chars_map = (chars_map >> (chunk_start - self.chunks.offset())) & mask;
+            let slice = &chunk[bit_start..bit_end];
+
+            let mask = 1u128.unbounded_shl(bit_end as u32).wrapping_sub(1);
+            let tabs = (tabs >> bit_start) & mask;
+            let chars = (chars_map >> bit_start) & mask;
 
             self.range.start = chunk_end;
             if self.range.start == self.chunks.offset() + chunk.len() {
@@ -5020,7 +5016,7 @@ impl<'a> Iterator for BufferChunks<'a> {
                 diagnostic_severity: self.current_diagnostic_severity(),
                 is_unnecessary: self.current_code_is_unnecessary(),
                 tabs,
-                chars: chars_map,
+                chars,
                 ..Chunk::default()
             })
         } else {

crates/multi_buffer/src/multi_buffer.rs 🔗

@@ -51,7 +51,7 @@ use text::{
 use theme::SyntaxTheme;
 use util::{post_inc, rel_path::RelPath};
 
-const NEWLINES: &[u8] = &[b'\n'; u8::MAX as usize];
+const NEWLINES: &[u8] = &[b'\n'; rope::Chunk::MASK_BITS];
 
 #[derive(Debug, Default, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
 pub struct ExcerptId(u32);
@@ -7730,7 +7730,7 @@ impl<'a> Iterator for MultiBufferChunks<'a> {
                     let split_idx = diff_transform_end - self.range.start;
                     let (before, after) = chunk.text.split_at(split_idx);
                     self.range.start = diff_transform_end;
-                    let mask = (1 << split_idx) - 1;
+                    let mask = 1u128.unbounded_shl(split_idx as u32).wrapping_sub(1);
                     let chars = chunk.chars & mask;
                     let tabs = chunk.tabs & mask;
 
@@ -7882,7 +7882,9 @@ impl<'a> Iterator for ExcerptChunks<'a> {
 
         if self.footer_height > 0 {
             let text = unsafe { str::from_utf8_unchecked(&NEWLINES[..self.footer_height]) };
-            let chars = (1 << self.footer_height) - 1;
+            let chars = 1u128
+                .unbounded_shl(self.footer_height as u32)
+                .wrapping_sub(1);
             self.footer_height = 0;
             return Some(Chunk {
                 text,

crates/rope/src/chunk.rs 🔗

@@ -5,29 +5,36 @@ use sum_tree::Bias;
 use unicode_segmentation::GraphemeCursor;
 use util::debug_panic;
 
-pub(crate) const MIN_BASE: usize = if cfg!(test) { 6 } else { 64 };
-pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
+#[cfg(not(all(test, not(rust_analyzer))))]
+pub(crate) type Bitmap = u128;
+#[cfg(all(test, not(rust_analyzer)))]
+pub(crate) type Bitmap = u16;
+
+pub(crate) const MIN_BASE: usize = MAX_BASE / 2;
+pub(crate) const MAX_BASE: usize = Bitmap::BITS as usize;
 
 #[derive(Clone, Debug, Default)]
 pub struct Chunk {
     /// If bit[i] is set, then the character at index i is the start of a UTF-8 character in the
     /// text.
-    chars: u128,
+    chars: Bitmap,
     /// The number of set bits is the number of UTF-16 code units it would take to represent the
     /// text.
     ///
     /// Bit[i] is set if text[i] is the start of a UTF-8 character. If the character would
     /// take two UTF-16 code units, then bit[i+1] is also set. (Rust chars never take more
     /// than two UTF-16 code units.)
-    chars_utf16: u128,
+    chars_utf16: Bitmap,
     /// If bit[i] is set, then the character at index i is an ascii newline.
-    newlines: u128,
+    newlines: Bitmap,
     /// If bit[i] is set, then the character at index i is an ascii tab.
-    pub tabs: u128,
+    tabs: Bitmap,
     pub text: ArrayString<MAX_BASE>,
 }
 
 impl Chunk {
+    pub const MASK_BITS: usize = Bitmap::BITS as usize;
+
     #[inline(always)]
     pub fn new(text: &str) -> Self {
         let mut this = Chunk::default();
@@ -41,9 +48,9 @@ impl Chunk {
             let ix = self.text.len() + char_ix;
             self.chars |= 1 << ix;
             self.chars_utf16 |= 1 << ix;
-            self.chars_utf16 |= (c.len_utf16() as u128) << ix;
-            self.newlines |= ((c == '\n') as u128) << ix;
-            self.tabs |= ((c == '\t') as u128) << ix;
+            self.chars_utf16 |= (c.len_utf16() as Bitmap) << ix;
+            self.newlines |= ((c == '\n') as Bitmap) << ix;
+            self.tabs |= ((c == '\t') as Bitmap) << ix;
         }
         self.text.push_str(text);
     }
@@ -79,17 +86,21 @@ impl Chunk {
     }
 
     #[inline(always)]
-    pub fn chars(&self) -> u128 {
+    pub fn chars(&self) -> Bitmap {
         self.chars
     }
+
+    pub fn tabs(&self) -> Bitmap {
+        self.tabs
+    }
 }
 
 #[derive(Clone, Copy, Debug)]
 pub struct ChunkSlice<'a> {
-    chars: u128,
-    chars_utf16: u128,
-    newlines: u128,
-    tabs: u128,
+    chars: Bitmap,
+    chars_utf16: Bitmap,
+    newlines: Bitmap,
+    tabs: Bitmap,
     text: &'a str,
 }
 
@@ -129,7 +140,7 @@ impl<'a> ChunkSlice<'a> {
             };
             (left, right)
         } else {
-            let mask = (1u128 << mid) - 1;
+            let mask = ((1 as Bitmap) << mid) - 1;
             let (left_text, right_text) = self.text.split_at(mid);
             let left = ChunkSlice {
                 chars: self.chars & mask,
@@ -151,17 +162,15 @@ impl<'a> ChunkSlice<'a> {
 
     #[inline(always)]
     pub fn slice(self, range: Range<usize>) -> Self {
-        let mask = if range.end == MAX_BASE {
-            u128::MAX
-        } else {
-            debug_assert!(
-                self.is_char_boundary(range.end),
-                "Invalid range end {} in {:?}",
-                range.end,
-                self
-            );
-            (1u128 << range.end) - 1
-        };
+        debug_assert!(
+            self.is_char_boundary(range.end),
+            "Invalid range end {} in {:?}",
+            range.end,
+            self
+        );
+        let mask = (1 as Bitmap)
+            .unbounded_shl(range.end as u32)
+            .wrapping_sub(1);
         if range.start == MAX_BASE {
             Self {
                 chars: 0,
@@ -220,7 +229,7 @@ impl<'a> ChunkSlice<'a> {
     #[inline(always)]
     pub fn lines(&self) -> Point {
         let row = self.newlines.count_ones();
-        let column = self.newlines.leading_zeros() - (u128::BITS - self.text.len() as u32);
+        let column = self.newlines.leading_zeros() - (Bitmap::BITS - self.text.len() as u32);
         Point::new(row, column)
     }
 
@@ -230,7 +239,7 @@ impl<'a> ChunkSlice<'a> {
         if self.newlines == 0 {
             self.chars.count_ones()
         } else {
-            let mask = (1u128 << self.newlines.trailing_zeros()) - 1;
+            let mask = ((1 as Bitmap) << self.newlines.trailing_zeros()) - 1;
             (self.chars & mask).count_ones()
         }
     }
@@ -241,7 +250,7 @@ impl<'a> ChunkSlice<'a> {
         if self.newlines == 0 {
             self.chars.count_ones()
         } else {
-            let mask = !(u128::MAX >> self.newlines.leading_zeros());
+            let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
             (self.chars & mask).count_ones()
         }
     }
@@ -252,7 +261,7 @@ impl<'a> ChunkSlice<'a> {
         if self.newlines == 0 {
             self.chars_utf16.count_ones()
         } else {
-            let mask = !(u128::MAX >> self.newlines.leading_zeros());
+            let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
             (self.chars_utf16 & mask).count_ones()
         }
     }
@@ -295,13 +304,9 @@ impl<'a> ChunkSlice<'a> {
 
     #[inline(always)]
     pub fn offset_to_point(&self, offset: usize) -> Point {
-        let mask = if offset == MAX_BASE {
-            u128::MAX
-        } else {
-            (1u128 << offset) - 1
-        };
+        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
         let row = (self.newlines & mask).count_ones();
-        let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
+        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
         let column = (offset - newline_ix as usize) as u32;
         Point::new(row, column)
     }
@@ -332,11 +337,7 @@ impl<'a> ChunkSlice<'a> {
 
     #[inline(always)]
     pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
-        let mask = if offset == MAX_BASE {
-            u128::MAX
-        } else {
-            (1u128 << offset) - 1
-        };
+        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
         OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
     }
 
@@ -345,7 +346,11 @@ impl<'a> ChunkSlice<'a> {
         if target.0 == 0 {
             0
         } else {
-            let ix = nth_set_bit(self.chars_utf16, target.0) + 1;
+            #[cfg(not(test))]
+            let chars_utf16 = self.chars_utf16;
+            #[cfg(test)]
+            let chars_utf16 = self.chars_utf16 as u128;
+            let ix = nth_set_bit(chars_utf16, target.0) + 1;
             if ix == MAX_BASE {
                 MAX_BASE
             } else {
@@ -360,13 +365,9 @@ impl<'a> ChunkSlice<'a> {
 
     #[inline(always)]
     pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
-        let mask = if offset == MAX_BASE {
-            u128::MAX
-        } else {
-            (1u128 << offset) - 1
-        };
+        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
         let row = (self.newlines & mask).count_ones();
-        let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
+        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
         let column = if newline_ix as usize == MAX_BASE {
             0
         } else {
@@ -520,7 +521,11 @@ impl<'a> ChunkSlice<'a> {
     #[inline(always)]
     fn offset_range_for_row(&self, row: u32) -> Range<usize> {
         let row_start = if row > 0 {
-            nth_set_bit(self.newlines, row as usize) + 1
+            #[cfg(not(test))]
+            let newlines = self.newlines;
+            #[cfg(test)]
+            let newlines = self.newlines as u128;
+            nth_set_bit(newlines, row as usize) + 1
         } else {
             0
         };
@@ -545,8 +550,8 @@ impl<'a> ChunkSlice<'a> {
 }
 
 pub struct Tabs {
-    tabs: u128,
-    chars: u128,
+    tabs: Bitmap,
+    chars: Bitmap,
 }
 
 #[derive(Debug, PartialEq, Eq)]
@@ -647,8 +652,8 @@ mod tests {
         // Verify Chunk::chars() bitmap
         let expected_chars = char_offsets(&text)
             .into_iter()
-            .inspect(|i| assert!(*i < 128))
-            .fold(0u128, |acc, i| acc | (1 << i));
+            .inspect(|i| assert!(*i < MAX_BASE))
+            .fold(0 as Bitmap, |acc, i| acc | (1 << i));
         assert_eq!(chunk.chars(), expected_chars);
 
         for _ in 0..10 {

crates/rope/src/rope.rs 🔗

@@ -4,7 +4,6 @@ mod point;
 mod point_utf16;
 mod unclipped;
 
-use chunk::Chunk;
 use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
 use smallvec::SmallVec;
 use std::{
@@ -14,12 +13,14 @@ use std::{
 };
 use sum_tree::{Bias, Dimension, Dimensions, SumTree};
 
-pub use chunk::ChunkSlice;
+pub use chunk::{Chunk, ChunkSlice};
 pub use offset_utf16::OffsetUtf16;
 pub use point::Point;
 pub use point_utf16::PointUtf16;
 pub use unclipped::Unclipped;
 
+use crate::chunk::Bitmap;
+
 #[derive(Clone, Default)]
 pub struct Rope {
     chunks: SumTree<Chunk>,
@@ -676,9 +677,9 @@ pub struct ChunkBitmaps<'a> {
     /// A slice of text up to 128 bytes in size
     pub text: &'a str,
     /// Bitmap of character locations in text. LSB ordered
-    pub chars: u128,
+    pub chars: Bitmap,
     /// Bitmap of tab locations in text. LSB ordered
-    pub tabs: u128,
+    pub tabs: Bitmap,
 }
 
 #[derive(Clone)]
@@ -850,39 +851,6 @@ impl<'a> Chunks<'a> {
         self.offset < initial_offset && self.offset == 0
     }
 
-    /// Returns bitmaps that represent character positions and tab positions
-    pub fn peek_with_bitmaps(&self) -> Option<ChunkBitmaps<'a>> {
-        if !self.offset_is_valid() {
-            return None;
-        }
-
-        let chunk = self.chunks.item()?;
-        let chunk_start = *self.chunks.start();
-        let slice_range = if self.reversed {
-            let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start;
-            let slice_end = self.offset - chunk_start;
-            slice_start..slice_end
-        } else {
-            let slice_start = self.offset - chunk_start;
-            let slice_end = cmp::min(self.chunks.end(), self.range.end) - chunk_start;
-            slice_start..slice_end
-        };
-
-        // slice range has a bounds between 0 and 128 in non test builds
-        // We use a non wrapping sub because we want to overflow in the case where slice_range.end == 128
-        // because that represents a full chunk and the bitmask shouldn't remove anything
-        let bitmask = (1u128.unbounded_shl(slice_range.end as u32)).wrapping_sub(1);
-
-        let chars = (chunk.chars() & bitmask) >> slice_range.start;
-        let tabs = (chunk.tabs & bitmask) >> slice_range.start;
-
-        Some(ChunkBitmaps {
-            text: &chunk.text[slice_range],
-            chars,
-            tabs,
-        })
-    }
-
     pub fn peek(&self) -> Option<&'a str> {
         if !self.offset_is_valid() {
             return None;
@@ -903,7 +871,8 @@ impl<'a> Chunks<'a> {
         Some(&chunk.text[slice_range])
     }
 
-    pub fn peek_tabs(&self) -> Option<ChunkBitmaps<'a>> {
+    /// Returns bitmaps that represent character positions and tab positions
+    pub fn peek_with_bitmaps(&self) -> Option<ChunkBitmaps<'a>> {
         if !self.offset_is_valid() {
             return None;
         }
@@ -923,7 +892,7 @@ impl<'a> Chunks<'a> {
         let slice_text = &chunk.text[slice_range];
 
         // Shift the tabs to align with our slice window
-        let shifted_tabs = chunk.tabs >> chunk_start_offset;
+        let shifted_tabs = chunk.tabs() >> chunk_start_offset;
         let shifted_chars = chunk.chars() >> chunk_start_offset;
 
         Some(ChunkBitmaps {