From 370d4ce2007fb6bdac2e12b7c706b97396394fec Mon Sep 17 00:00:00 2001 From: Adam Richardson <38476863+AdamWRichardson@users.noreply.github.com> Date: Mon, 27 Oct 2025 15:16:16 +0000 Subject: [PATCH] rope: Micro optimize the creation of masks (#41132) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using compiler explorer I saw that the compiler wasn't clever enough to optimise away the branches in the masking code. I thought the compiler would have a better chance if we always branched, which [turned out to be the case](https://godbolt.org/z/PM594Pz18). Running the benchmarks the biggest benefit I saw was: ``` push/65536 time: [2.9067 ms 2.9243 ms 2.9417 ms] thrpt: [21.246 MiB/s 21.373 MiB/s 21.502 MiB/s] change: time: [-8.3452% -7.2617% -6.2009%] (p = 0.00 < 0.05) thrpt: [+6.6108% +7.8303% +9.1050%] Performance has improved. ``` But I did also see some regressions: ``` slice/4096 time: [66.195 µs 66.815 µs 67.448 µs] thrpt: [57.915 MiB/s 58.464 MiB/s 59.012 MiB/s] change: time: [+3.7131% +5.1698% +6.6971%] (p = 0.00 < 0.05) thrpt: [-6.2768% -4.9157% -3.5802%] Performance has regressed. ``` Release Notes: - N/A --- crates/rope/src/chunk.rs | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/crates/rope/src/chunk.rs b/crates/rope/src/chunk.rs index 6e17c35d7c770c429fa32725a38bca94a9e1dfc2..4c1e4cd68560f15274722ff1d8249205300c4e68 100644 --- a/crates/rope/src/chunk.rs +++ b/crates/rope/src/chunk.rs @@ -32,6 +32,16 @@ pub struct Chunk { pub text: ArrayString, } +#[inline(always)] +const fn saturating_shl_mask(offset: u32) -> Bitmap { + (1 as Bitmap).unbounded_shl(offset).wrapping_sub(1) +} + +#[inline(always)] +const fn saturating_shr_mask(offset: u32) -> Bitmap { + !Bitmap::MAX.unbounded_shr(offset) +} + impl Chunk { pub const MASK_BITS: usize = Bitmap::BITS as usize; @@ -291,34 +301,19 @@ impl<'a> ChunkSlice<'a> { /// Get number of chars in first line #[inline(always)] pub fn first_line_chars(&self) -> u32 { - if self.newlines == 0 { - self.chars.count_ones() - } else { - let mask = ((1 as Bitmap) << self.newlines.trailing_zeros()) - 1; - (self.chars & mask).count_ones() - } + (self.chars & saturating_shl_mask(self.newlines.trailing_zeros())).count_ones() } /// Get number of chars in last line #[inline(always)] pub fn last_line_chars(&self) -> u32 { - if self.newlines == 0 { - self.chars.count_ones() - } else { - let mask = !(Bitmap::MAX >> self.newlines.leading_zeros()); - (self.chars & mask).count_ones() - } + (self.chars & saturating_shr_mask(self.newlines.leading_zeros())).count_ones() } /// Get number of UTF-16 code units in last line #[inline(always)] pub fn last_line_len_utf16(&self) -> u32 { - if self.newlines == 0 { - self.chars_utf16.count_ones() - } else { - let mask = !(Bitmap::MAX >> self.newlines.leading_zeros()); - (self.chars_utf16 & mask).count_ones() - } + (self.chars_utf16 & saturating_shr_mask(self.newlines.leading_zeros())).count_ones() } /// Get the longest row in the chunk and its length in characters. @@ -492,8 +487,8 @@ impl<'a> ChunkSlice<'a> { #[inline(always)] pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 { - let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1); - let row = (self.newlines & mask).count_ones(); + let mask = saturating_shl_mask(offset as u32); + let row = (self.newlines & saturating_shl_mask(offset as u32)).count_ones(); let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros(); let column = if newline_ix as usize == MAX_BASE { 0