Avoid buffering line content to compute indent guides (#15167)

Antonio Scandurra , Nathan , Bennet , and Thorsten created

Release Notes:

- Improved performance when computing indent guides for buffers with
extremely long lines.

---------

Co-authored-by: Nathan <nathan@zed.dev>
Co-authored-by: Bennet <bennet@zed.dev>
Co-authored-by: Thorsten <thorsten@zed.dev>

Change summary

Cargo.lock                    |   2 
crates/rope/Cargo.toml        |   2 
crates/rope/src/rope.rs       | 354 ++++++++++++++++++++++++++++++++++--
crates/sum_tree/src/cursor.rs |  14 
crates/text/src/text.rs       |  88 +++++++-
5 files changed, 410 insertions(+), 50 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -8863,6 +8863,8 @@ version = "0.1.0"
 dependencies = [
  "arrayvec",
  "criterion",
+ "ctor",
+ "env_logger",
  "gpui",
  "log",
  "rand 0.8.5",

crates/rope/Cargo.toml 🔗

@@ -20,6 +20,8 @@ unicode-segmentation.workspace = true
 util.workspace = true
 
 [dev-dependencies]
+ctor.workspace = true
+env_logger.workspace = true
 gpui = { workspace = true, features = ["test-support"] }
 rand.workspace = true
 util = { workspace = true, features = ["test-support"] }

crates/rope/src/rope.rs 🔗

@@ -557,33 +557,49 @@ impl<'a> Cursor<'a> {
 pub struct Chunks<'a> {
     chunks: sum_tree::Cursor<'a, Chunk, usize>,
     range: Range<usize>,
+    offset: usize,
     reversed: bool,
 }
 
 impl<'a> Chunks<'a> {
     pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
         let mut chunks = rope.chunks.cursor();
-        if reversed {
+        let offset = if reversed {
             chunks.seek(&range.end, Bias::Left, &());
+            range.end
         } else {
             chunks.seek(&range.start, Bias::Right, &());
-        }
+            range.start
+        };
         Self {
             chunks,
             range,
+            offset,
             reversed,
         }
     }
 
-    pub fn offset(&self) -> usize {
+    fn offset_is_valid(&self) -> bool {
         if self.reversed {
-            self.range.end.min(self.chunks.end(&()))
+            if self.offset <= self.range.start || self.offset > self.range.end {
+                return false;
+            }
         } else {
-            self.range.start.max(*self.chunks.start())
+            if self.offset < self.range.start || self.offset >= self.range.end {
+                return false;
+            }
         }
+
+        true
     }
 
-    pub fn seek(&mut self, offset: usize) {
+    pub fn offset(&self) -> usize {
+        self.offset
+    }
+
+    pub fn seek(&mut self, mut offset: usize) {
+        offset = offset.clamp(self.range.start, self.range.end);
+
         let bias = if self.reversed {
             Bias::Left
         } else {
@@ -596,26 +612,123 @@ impl<'a> Chunks<'a> {
             self.chunks.seek(&offset, bias, &());
         }
 
-        if self.reversed {
-            self.range.end = offset;
-        } else {
-            self.range.start = offset;
+        self.offset = offset;
+    }
+
+    /// Moves this cursor to the start of the next line in the rope.
+    ///
+    /// This method advances the cursor to the beginning of the next line.
+    /// If the cursor is already at the end of the rope, this method does nothing.
+    /// Reversed chunks iterators are not currently supported and will panic.
+    ///
+    /// Returns `true` if the cursor was successfully moved to the next line start,
+    /// or `false` if the cursor was already at the end of the rope.
+    pub fn next_line(&mut self) -> bool {
+        assert!(!self.reversed);
+
+        let mut found = false;
+        if let Some(chunk) = self.peek() {
+            if let Some(newline_ix) = chunk.find('\n') {
+                self.offset += newline_ix + 1;
+                found = self.offset <= self.range.end;
+            } else {
+                self.chunks
+                    .search_forward(|summary| summary.text.lines.row > 0, &());
+                self.offset = *self.chunks.start();
+
+                if let Some(newline_ix) = self.peek().and_then(|chunk| chunk.find('\n')) {
+                    self.offset += newline_ix + 1;
+                    found = self.offset <= self.range.end;
+                } else {
+                    self.offset = self.chunks.end(&());
+                }
+            }
+
+            if self.offset == self.chunks.end(&()) {
+                self.next();
+            }
+        }
+
+        if self.offset > self.range.end {
+            self.offset = cmp::min(self.offset, self.range.end);
+            self.chunks.seek(&self.offset, Bias::Right, &());
         }
+
+        found
+    }
+
+    /// Move this cursor to the preceding position in the rope that starts a new line.
+    /// Reversed chunks iterators are not currently supported and will panic.
+    ///
+    /// If this cursor is not on the start of a line, it will be moved to the start of
+    /// its current line. Otherwise it will be moved to the start of the previous line.
+    /// It updates the cursor's position and returns true if a previous line was found,
+    /// or false if the cursor was already at the start of the rope.
+    pub fn prev_line(&mut self) -> bool {
+        assert!(!self.reversed);
+
+        let initial_offset = self.offset;
+
+        if self.offset == *self.chunks.start() {
+            self.chunks.prev(&());
+        }
+
+        if let Some(chunk) = self.chunks.item() {
+            let mut end_ix = self.offset - *self.chunks.start();
+            if chunk.0.as_bytes()[end_ix - 1] == b'\n' {
+                end_ix -= 1;
+            }
+
+            if let Some(newline_ix) = chunk.0[..end_ix].rfind('\n') {
+                self.offset = *self.chunks.start() + newline_ix + 1;
+                if self.offset_is_valid() {
+                    return true;
+                }
+            }
+        }
+
+        self.chunks
+            .search_backward(|summary| summary.text.lines.row > 0, &());
+        self.offset = *self.chunks.start();
+        if let Some(chunk) = self.chunks.item() {
+            if let Some(newline_ix) = chunk.0.rfind('\n') {
+                self.offset += newline_ix + 1;
+                if self.offset_is_valid() {
+                    if self.offset == self.chunks.end(&()) {
+                        self.chunks.next(&());
+                    }
+
+                    return true;
+                }
+            }
+        }
+
+        if !self.offset_is_valid() || self.chunks.item().is_none() {
+            self.offset = self.range.start;
+            self.chunks.seek(&self.offset, Bias::Right, &());
+        }
+
+        self.offset < initial_offset && self.offset == 0
     }
 
     pub fn peek(&self) -> Option<&'a str> {
-        let chunk = self.chunks.item()?;
-        if self.reversed && self.range.start >= self.chunks.end(&()) {
+        if !self.offset_is_valid() {
             return None;
         }
+
+        let chunk = self.chunks.item()?;
         let chunk_start = *self.chunks.start();
-        if self.range.end <= chunk_start {
-            return None;
-        }
+        let slice_range = if self.reversed {
+            let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start;
+            let slice_end = self.offset - chunk_start;
+            slice_start..slice_end
+        } else {
+            let slice_start = self.offset - chunk_start;
+            let slice_end = cmp::min(self.chunks.end(&()), self.range.end) - chunk_start;
+            slice_start..slice_end
+        };
 
-        let start = self.range.start.saturating_sub(chunk_start);
-        let end = self.range.end - chunk_start;
-        Some(&chunk.0[start..chunk.0.len().min(end)])
+        Some(&chunk.0[slice_range])
     }
 
     pub fn lines(self) -> Lines<'a> {
@@ -633,15 +746,16 @@ impl<'a> Iterator for Chunks<'a> {
     type Item = &'a str;
 
     fn next(&mut self) -> Option<Self::Item> {
-        let result = self.peek();
-        if result.is_some() {
-            if self.reversed {
-                self.chunks.prev(&());
-            } else {
-                self.chunks.next(&());
-            }
+        let chunk = self.peek()?;
+        if self.reversed {
+            self.chunks.prev(&());
+            self.offset -= chunk.len();
+        } else {
+            self.chunks.next(&());
+            self.offset += chunk.len();
         }
-        result
+
+        Some(chunk)
     }
 }
 
@@ -1299,6 +1413,13 @@ mod tests {
     use util::RandomCharIter;
     use Bias::{Left, Right};
 
+    #[ctor::ctor]
+    fn init_logger() {
+        if std::env::var("RUST_LOG").is_ok() {
+            env_logger::init();
+        }
+    }
+
     #[test]
     fn test_all_4_byte_chars() {
         let mut rope = Rope::new();
@@ -1355,6 +1476,50 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_prev_next_line() {
+        let rope = Rope::from("abc\ndef\nghi\njkl");
+
+        let mut chunks = rope.chunks();
+        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
+
+        assert!(chunks.next_line());
+        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'd');
+
+        assert!(chunks.next_line());
+        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'g');
+
+        assert!(chunks.next_line());
+        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'j');
+
+        assert!(!chunks.next_line());
+        assert_eq!(chunks.peek(), None);
+
+        assert!(chunks.prev_line());
+        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'j');
+
+        assert!(chunks.prev_line());
+        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'g');
+
+        assert!(chunks.prev_line());
+        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'd');
+
+        assert!(chunks.prev_line());
+        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
+
+        assert!(!chunks.prev_line());
+        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
+
+        // Only return true when the cursor has moved to the start of a line
+        let mut chunks = rope.chunks_in_range(5..7);
+        chunks.seek(6);
+        assert!(!chunks.prev_line());
+        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'e');
+
+        assert!(!chunks.next_line());
+        assert_eq!(chunks.peek(), None);
+    }
+
     #[test]
     fn test_lines() {
         let rope = Rope::from("abc\ndefg\nhi");
@@ -1438,6 +1603,143 @@ mod tests {
                         .collect::<String>(),
                     &expected[start_ix..end_ix]
                 );
+
+                let mut expected_line_starts: Vec<_> = expected[start_ix..end_ix]
+                    .match_indices('\n')
+                    .map(|(index, _)| start_ix + index + 1)
+                    .collect();
+
+                let mut chunks = actual.chunks_in_range(start_ix..end_ix);
+
+                let mut actual_line_starts = Vec::new();
+                while chunks.next_line() {
+                    actual_line_starts.push(chunks.offset());
+                }
+                assert_eq!(
+                    actual_line_starts,
+                    expected_line_starts,
+                    "actual line starts != expected line starts when using next_line() for {:?} ({:?})",
+                    &expected[start_ix..end_ix],
+                    start_ix..end_ix
+                );
+
+                if start_ix < end_ix
+                    && (start_ix == 0 || expected.as_bytes()[start_ix - 1] == b'\n')
+                {
+                    expected_line_starts.insert(0, start_ix);
+                }
+                // Remove the last index if it starts at the end of the range.
+                if expected_line_starts.last() == Some(&end_ix) {
+                    expected_line_starts.pop();
+                }
+
+                let mut actual_line_starts = Vec::new();
+                while chunks.prev_line() {
+                    actual_line_starts.push(chunks.offset());
+                }
+                actual_line_starts.reverse();
+                assert_eq!(
+                    actual_line_starts,
+                    expected_line_starts,
+                    "actual line starts != expected line starts when using prev_line() for {:?} ({:?})",
+                    &expected[start_ix..end_ix],
+                    start_ix..end_ix
+                );
+
+                // Check that next_line/prev_line work correctly from random positions
+                let mut random_offset = rng.gen_range(start_ix..=end_ix);
+                while !expected.is_char_boundary(random_offset) {
+                    random_offset -= 1;
+                }
+                chunks.seek(random_offset);
+                if rng.gen() {
+                    let expected_next_line_start = expected[random_offset..end_ix]
+                        .find('\n')
+                        .map(|newline_ix| random_offset + newline_ix + 1);
+
+                    let moved = chunks.next_line();
+                    assert_eq!(
+                        moved,
+                        expected_next_line_start.is_some(),
+                        "unexpected result from next_line after seeking to {} in range {:?} ({:?})",
+                        random_offset,
+                        start_ix..end_ix,
+                        &expected[start_ix..end_ix]
+                    );
+                    if let Some(expected_next_line_start) = expected_next_line_start {
+                        assert_eq!(
+                            chunks.offset(),
+                            expected_next_line_start,
+                            "invalid position after seeking to {} in range {:?} ({:?})",
+                            random_offset,
+                            start_ix..end_ix,
+                            &expected[start_ix..end_ix]
+                        );
+                    } else {
+                        assert_eq!(
+                            chunks.offset(),
+                            end_ix,
+                            "invalid position after seeking to {} in range {:?} ({:?})",
+                            random_offset,
+                            start_ix..end_ix,
+                            &expected[start_ix..end_ix]
+                        );
+                    }
+                } else {
+                    let search_end =
+                        if random_offset > 0 && expected.as_bytes()[random_offset - 1] == b'\n' {
+                            random_offset - 1
+                        } else {
+                            random_offset
+                        };
+
+                    let expected_prev_line_start = expected[..search_end]
+                        .rfind('\n')
+                        .and_then(|newline_ix| {
+                            let line_start_ix = newline_ix + 1;
+                            if line_start_ix >= start_ix {
+                                Some(line_start_ix)
+                            } else {
+                                None
+                            }
+                        })
+                        .or_else(|| {
+                            if random_offset > 0 && start_ix == 0 {
+                                Some(0)
+                            } else {
+                                None
+                            }
+                        });
+
+                    let moved = chunks.prev_line();
+                    assert_eq!(
+                        moved,
+                        expected_prev_line_start.is_some(),
+                        "unexpected result from prev_line after seeking to {} in range {:?} ({:?})",
+                        random_offset,
+                        start_ix..end_ix,
+                        &expected[start_ix..end_ix]
+                    );
+                    if let Some(expected_prev_line_start) = expected_prev_line_start {
+                        assert_eq!(
+                            chunks.offset(),
+                            expected_prev_line_start,
+                            "invalid position after seeking to {} in range {:?} ({:?})",
+                            random_offset,
+                            start_ix..end_ix,
+                            &expected[start_ix..end_ix]
+                        );
+                    } else {
+                        assert_eq!(
+                            chunks.offset(),
+                            start_ix,
+                            "invalid position after seeking to {} in range {:?} ({:?})",
+                            random_offset,
+                            start_ix..end_ix,
+                            &expected[start_ix..end_ix]
+                        );
+                    }
+                }
             }
 
             let mut offset_utf16 = OffsetUtf16(0);

crates/sum_tree/src/cursor.rs 🔗

@@ -178,11 +178,11 @@ where
 
     #[track_caller]
     pub fn prev(&mut self, cx: &<T::Summary as Summary>::Context) {
-        self.prev_internal(|_| true, cx)
+        self.search_backward(|_| true, cx)
     }
 
     #[track_caller]
-    fn prev_internal<F>(&mut self, mut filter_node: F, cx: &<T::Summary as Summary>::Context)
+    pub fn search_backward<F>(&mut self, mut filter_node: F, cx: &<T::Summary as Summary>::Context)
     where
         F: FnMut(&T::Summary) -> bool,
     {
@@ -249,11 +249,11 @@ where
 
     #[track_caller]
     pub fn next(&mut self, cx: &<T::Summary as Summary>::Context) {
-        self.next_internal(|_| true, cx)
+        self.search_forward(|_| true, cx)
     }
 
     #[track_caller]
-    fn next_internal<F>(&mut self, mut filter_node: F, cx: &<T::Summary as Summary>::Context)
+    pub fn search_forward<F>(&mut self, mut filter_node: F, cx: &<T::Summary as Summary>::Context)
     where
         F: FnMut(&T::Summary) -> bool,
     {
@@ -658,11 +658,11 @@ where
     }
 
     pub fn next(&mut self, cx: &<T::Summary as Summary>::Context) {
-        self.cursor.next_internal(&mut self.filter_node, cx);
+        self.cursor.search_forward(&mut self.filter_node, cx);
     }
 
     pub fn prev(&mut self, cx: &<T::Summary as Summary>::Context) {
-        self.cursor.prev_internal(&mut self.filter_node, cx);
+        self.cursor.search_backward(&mut self.filter_node, cx);
     }
 }
 
@@ -681,7 +681,7 @@ where
         }
 
         if let Some(item) = self.item() {
-            self.cursor.next_internal(&mut self.filter_node, &());
+            self.cursor.search_forward(&mut self.filter_node, &());
             Some(item)
         } else {
             None

crates/text/src/text.rs 🔗

@@ -542,6 +542,35 @@ pub struct LineIndent {
 }
 
 impl LineIndent {
+    pub fn from_chunks(chunks: &mut Chunks) -> Self {
+        let mut tabs = 0;
+        let mut spaces = 0;
+        let mut line_blank = true;
+
+        'outer: while let Some(chunk) = chunks.peek() {
+            for ch in chunk.chars() {
+                if ch == '\t' {
+                    tabs += 1;
+                } else if ch == ' ' {
+                    spaces += 1;
+                } else {
+                    if ch != '\n' {
+                        line_blank = false;
+                    }
+                    break 'outer;
+                }
+            }
+
+            chunks.next();
+        }
+
+        Self {
+            tabs,
+            spaces,
+            line_blank,
+        }
+    }
+
     /// Constructs a new `LineIndent` which only contains spaces.
     pub fn spaces(spaces: u32) -> Self {
         Self {
@@ -1983,39 +2012,64 @@ impl BufferSnapshot {
         row_range: Range<u32>,
     ) -> impl Iterator<Item = (u32, LineIndent)> + '_ {
         let start = Point::new(row_range.start, 0).to_offset(self);
-        let end = Point::new(row_range.end, 0).to_offset(self);
+        let end = Point::new(row_range.end - 1, self.line_len(row_range.end - 1)).to_offset(self);
 
-        let mut lines = self.as_rope().chunks_in_range(start..end).lines();
+        let mut chunks = self.as_rope().chunks_in_range(start..end);
         let mut row = row_range.start;
+        let mut done = start == end;
         std::iter::from_fn(move || {
-            if let Some(line) = lines.next() {
-                let indent = LineIndent::from(line);
-                row += 1;
-                Some((row - 1, indent))
-            } else {
+            if done {
                 None
+            } else {
+                let indent = (row, LineIndent::from_chunks(&mut chunks));
+                done = !chunks.next_line();
+                row += 1;
+                Some(indent)
             }
         })
     }
 
+    /// Returns the line indents in the given row range, exclusive of end row, in reversed order.
     pub fn reversed_line_indents_in_row_range(
         &self,
         row_range: Range<u32>,
     ) -> impl Iterator<Item = (u32, LineIndent)> + '_ {
         let start = Point::new(row_range.start, 0).to_offset(self);
-        let end = Point::new(row_range.end, 0)
-            .to_offset(self)
-            .saturating_sub(1);
 
-        let mut lines = self.as_rope().reversed_chunks_in_range(start..end).lines();
-        let mut row = row_range.end;
+        let end_point;
+        let end;
+        if row_range.end > row_range.start {
+            end_point = Point::new(row_range.end - 1, self.line_len(row_range.end - 1));
+            end = end_point.to_offset(self);
+        } else {
+            end_point = Point::new(row_range.start, 0);
+            end = start;
+        };
+
+        let mut chunks = self.as_rope().chunks_in_range(start..end);
+        // Move the cursor to the start of the last line if it's not empty.
+        chunks.seek(end);
+        if end_point.column > 0 {
+            chunks.prev_line();
+        }
+
+        let mut row = end_point.row;
+        let mut done = start == end;
         std::iter::from_fn(move || {
-            if let Some(line) = lines.next() {
-                let indent = LineIndent::from(line);
-                row = row.saturating_sub(1);
-                Some((row, indent))
-            } else {
+            if done {
                 None
+            } else {
+                let initial_offset = chunks.offset();
+                let indent = (row, LineIndent::from_chunks(&mut chunks));
+                if chunks.offset() > initial_offset {
+                    chunks.prev_line();
+                }
+                done = !chunks.prev_line();
+                if !done {
+                    row -= 1;
+                }
+
+                Some(indent)
             }
         })
     }