Fix the bracket highlights for overly large ranges

Kirill Bulatov created

Change summary

crates/editor/src/bracket_colorization.rs | 128 +++++++--------
crates/language/src/buffer.rs             | 193 +++++++++++++++++++-----
crates/language/src/buffer_tests.rs       | 132 +++++++++++++++++
3 files changed, 343 insertions(+), 110 deletions(-)

Detailed changes

crates/editor/src/bracket_colorization.rs 🔗

@@ -118,9 +118,11 @@ impl Editor {
                     .ok();
             }
 
-            let viewport_start_row =
-                        buffer_snapshot.offset_to_point(buffer_range.start).row;
-                    let viewport_end_row = buffer_snapshot.offset_to_point(buffer_range.end).row;
+            let buffer_range = buffer_range.start..buffer_range.end;
+                    let large_block_pairs =
+                        buffer_snapshot.bracket_pairs_for_large_enclosing_blocks(&buffer_range);
+
+                    let large_block_depth = large_block_pairs.len();
 
                     let (bracket_matches_by_accent, updated_chunks) = bracket_matches_by_accent.await;
 
@@ -161,24 +163,22 @@ fn compute_bracket_ranges(
     anchors_in_multi_buffer: &impl Fn(ExcerptId, [text::Anchor; 4]) -> Option<[Option<Anchor>; 4]>,
 ) -> Vec<(usize, Vec<Range<Anchor>>)> {
     buffer_snapshot
-        .fetch_bracket_ranges(buffer_range.start..buffer_range.end, Some(fetched_chunks))
+        .fetch_bracket_ranges(buffer_range, Some(fetched_chunks))
         .into_iter()
         .flat_map(|(chunk_range, pairs)| {
-            if fetched_chunks.contains(&chunk_range) {
-                return Vec::new();
+            if fetched_chunks.insert(chunk_range) {
+                pairs
+                            } else {
+                                Vec::new()
                             }
-                            // Only claim chunks overlapping the viewport as
-                            // fetched.  Boundary chunks pulled in by
-                            // `extend_range_for_enclosing_brackets` contribute
-                            // their bracket pairs but must remain re-queryable
-            // when the user scrolls to them.
-                            if chunk_range.start <= viewport_end_row
-                                && chunk_range.end > viewport_start_row
-                            {
-                                fetched_chunks.insert(chunk_range);
+                        })
+                        .map(move |mut pair| {
+                            if let Some(idx) = pair.color_index.as_mut() {
+                                *idx += large_block_depth;
                             }
-            pairs
-        })
+                            pair
+                        })
+        .chain(large_block_pairs)
         .filter_map(|pair| {
             let color_index = pair.color_index?;
 
@@ -660,8 +660,9 @@ fn process_data«1()1» «1{
 
     #[gpui::test]
     async fn test_bracket_colorization_large_block(cx: &mut gpui::TestAppContext) {
-        // Each `//\n` is 3 bytes; 6000 lines ≈ 18 KB, exceeding MAX_BYTES_TO_QUERY (16 KB).
-        let comment_lines = 6000;
+        // Each padded comment line is 27 bytes; 620 lines = 16740 bytes,
+        // just over MAX_BYTES_TO_QUERY (16 KB) with head/tail overhead.
+        let comment_lines = 620;
 
         init_test(cx, |language_settings| {
             language_settings.defaults.colorize_brackets = Some(true);
@@ -689,33 +690,28 @@ mod foo {
             comment_lines,
         ));
 
+        let colored_head = "mod foo «1{\n\
+                            \x20   fn process_data_1«2()2» «2{\n\
+                            \x20       let map: Option«3<Vec«4<«5()5»>4»>3» = None;\n\
+                            \x20   }2»";
+        let uncolored_tail = "    fn process_data_2() {\n\
+                              \x20       let map: Option<Vec<()>> = None;\n\
+                              \x20   }\n\
+                              }1»";
+        let colored_tail = "    fn process_data_2«2()2» «2{\n\
+                            \x20       let map: Option«3<Vec«4<«5()5»>4»>3» = None;\n\
+                            \x20   }2»\n\
+                            }1»";
+
         cx.executor().advance_clock(Duration::from_millis(100));
         cx.executor().run_until_parked();
+        let markup = bracket_colors_markup(&mut cx);
+        let relevant = filter_bracket_relevant_lines(&markup);
         assert_eq!(
-            &separate_with_comment_lines(
-                indoc! {r#"
-mod foo «1{
-    fn process_data_1«2()2» «2{
-        let map: Option«3<Vec«4<«5()5»>4»>3» = None;
-    }2»
-"#},
-                indoc! {r#"
-    fn process_data_2«2()2» «2{
-        let map: Option«3<Vec«4<«5()5»>4»>3» = None;
-    }2»
-}1»
-
-1 hsla(207.80, 16.20%, 69.19%, 1.00)
-2 hsla(29.00, 54.00%, 65.88%, 1.00)
-3 hsla(286.00, 51.00%, 75.25%, 1.00)
-4 hsla(187.00, 47.00%, 59.22%, 1.00)
-5 hsla(355.00, 65.00%, 75.94%, 1.00)
-"#},
-                comment_lines,
-            ),
-            &bracket_colors_markup(&mut cx),
-            "Top chunk: brackets should be colorized even when the enclosing \
-             block exceeds MAX_BYTES_TO_QUERY"
+            relevant,
+            format!("{colored_head}\n{uncolored_tail}"),
+            "Top chunk: visible brackets should be colorized even when the \
+             enclosing block exceeds MAX_BYTES_TO_QUERY"
         );
 
         cx.update_editor(|editor, window, cx| {
@@ -724,29 +720,11 @@ mod foo «1{
         });
         cx.executor().advance_clock(Duration::from_millis(100));
         cx.executor().run_until_parked();
+        let markup = bracket_colors_markup(&mut cx);
+        let relevant = filter_bracket_relevant_lines(&markup);
         assert_eq!(
-            &separate_with_comment_lines(
-                indoc! {r#"
-mod foo «1{
-    fn process_data_1«2()2» «2{
-        let map: Option«3<Vec«4<«5()5»>4»>3» = None;
-    }2»
-"#},
-                indoc! {r#"
-    fn process_data_2«2()2» «2{
-        let map: Option«3<Vec«4<«5()5»>4»>3» = None;
-    }2»
-}1»
-
-1 hsla(207.80, 16.20%, 69.19%, 1.00)
-2 hsla(29.00, 54.00%, 65.88%, 1.00)
-3 hsla(286.00, 51.00%, 75.25%, 1.00)
-4 hsla(187.00, 47.00%, 59.22%, 1.00)
-5 hsla(355.00, 65.00%, 75.94%, 1.00)
-"#},
-                comment_lines,
-            ),
-            &bracket_colors_markup(&mut cx),
+            relevant,
+            format!("{colored_head}\n{colored_tail}"),
             "After scrolling to bottom, both chunks should have bracket \
              highlights across a large block"
         );
@@ -1562,10 +1540,26 @@ mod foo «1{
         );
     }
 
+    fn filter_bracket_relevant_lines(markup: &str) -> String {
+        markup
+            .lines()
+            .filter(|line| {
+                let trimmed = line.trim();
+                !trimmed.is_empty()
+                    && !trimmed.starts_with("//")
+                    && !trimmed.starts_with("hsla(")
+                    && !trimmed.chars().next().is_some_and(|c| c.is_ascii_digit())
+            })
+            .collect::<Vec<_>>()
+            .join("\n")
+    }
+
     fn separate_with_comment_lines(head: &str, tail: &str, comment_lines: usize) -> String {
         let mut result = head.to_string();
-        result.push_str("\n");
-        result.push_str(&"//\n".repeat(comment_lines));
+        result.push('\n');
+        for _ in 0..comment_lines {
+            result.push_str("// padding padding padding\n");
+        }
         result.push_str(tail);
         result
     }

crates/language/src/buffer.rs 🔗

@@ -4593,16 +4593,10 @@ impl BufferSnapshot {
     ) -> HashMap<Range<BufferRow>, Vec<BracketMatch<usize>>> {
         let mut all_bracket_matches = HashMap::default();
 
-        let (query_ranges, max_bytes_to_query) = self.extend_range_for_enclosing_brackets(&range);
-        let point_ranges = query_ranges
-            .iter()
-            .map(|r| r.to_point(self))
-            .collect::<Vec<_>>();
-
         for chunk in self
             .tree_sitter_data
             .chunks
-            .applicable_chunks(&point_ranges)
+            .applicable_chunks(&[range.to_point(self)])
         {
             if known_chunks.is_some_and(|chunks| chunks.contains(&chunk.row_range())) {
                 continue;
@@ -4625,7 +4619,7 @@ impl BufferSnapshot {
                 chunk_range.clone(),
                 &self.text,
                 TreeSitterOptions {
-                    max_bytes_to_query: Some(max_bytes_to_query),
+                    max_bytes_to_query: Some(MAX_BYTES_TO_QUERY),
                     max_start_depth: None,
                 },
                 |grammar| grammar.brackets_config.as_ref().map(|c| &c.query),
@@ -4823,69 +4817,182 @@ impl BufferSnapshot {
         all_bracket_matches
     }
 
-    /// Walk the syntax tree upward from `range` and return a set of byte
-    /// ranges to query (plus the `max_bytes_to_query` limit) for bracket
-    /// matching.
+    /// Walk the syntax tree upward from `range` and find bracket pairs for
+    /// enclosing nodes whose byte extent exceeds `MAX_BYTES_TO_QUERY`.
     ///
-    /// When the cursor sits inside a block whose byte extent exceeds
-    /// `MAX_BYTES_TO_QUERY`, the default containing-byte-range causes
-    /// tree-sitter's query cursor to skip its bracket children.  Rather than
-    /// expanding to the entire block (which would pull in every intermediate
-    /// chunk — catastrophic for huge files), we add small windows around the
-    /// block's start and end where bracket tokens actually live.
-    fn extend_range_for_enclosing_brackets(
+    /// Tree-sitter's `set_containing_byte_range` requires all captured nodes
+    /// to be fully contained within the range.  When a block is larger than
+    /// `MAX_BYTES_TO_QUERY`, the open and close brackets land in different
+    /// containing-range windows, so the query never returns the pair.
+    ///
+    /// Instead of expanding the query range (which is catastrophically slow),
+    /// we walk the syntax tree directly: for each oversized enclosing node we
+    /// inspect its first and last children to find bracket tokens, then count
+    /// ancestor brackets for the nesting depth (color_index).
+    pub fn bracket_pairs_for_large_enclosing_blocks(
         &self,
         range: &Range<usize>,
-    ) -> (Vec<Range<usize>>, usize) {
-        let mut ranges = vec![range.clone()];
-        let mut max_bytes = MAX_BYTES_TO_QUERY;
+    ) -> Vec<BracketMatch<usize>> {
+        const BRACKET_PAIRS: &[(u8, u8)] =
+            &[(b'{', b'}'), (b'(', b')'), (b'[', b']'), (b'<', b'>')];
+
+        let mut result = Vec::new();
 
         for layer in self
             .syntax
             .layers_for_range(range.clone(), &self.text, true)
         {
+            let depth = layer.depth;
             let mut cursor = layer.node().walk();
-            if !Self::goto_node_enclosing_range(&mut cursor, range, false) {
-                continue;
-            }
+            // Descend to the deepest node covering `range.start` so the
+            // upward walk visits every bracket-carrying ancestor (e.g.
+            // `declaration_list` inside `mod_item`).  Unlike
+            // `goto_node_enclosing_range`, this only needs a single point
+            // inside the block, so it works even when the viewport extends
+            // past the enclosing node.
+            while cursor.goto_first_child_for_byte(range.start).is_some() {}
+
+            let mut seen = HashSet::default();
             loop {
                 let node = cursor.node();
-                let node_range = node.byte_range();
-                // Skip the syntax-layer root — it spans the whole document
-                // and never carries brackets itself.
-                if node_range.len() > max_bytes && node.parent().is_some() {
-                    let window = MAX_BYTES_TO_QUERY;
-                    ranges.push(
-                        node_range.start
-                            ..node_range.start.saturating_add(window).min(node_range.end),
-                    );
-                    ranges.push(
-                        node_range.end.saturating_sub(window).max(node_range.start)..node_range.end,
-                    );
-                    // The containing byte range is centered on each chunk's
-                    // midpoint, so we need 2× the block span to guarantee
-                    // every boundary chunk's window covers both brackets.
-                    max_bytes = max_bytes.max(node_range.len().saturating_mul(2));
-                }
+
+                // `goto_first_child_for_byte` follows one path (e.g.
+                // `impl_item` → `impl` keyword), so bracket-carrying siblings
+                // like `declaration_list` are only reachable as children of an
+                // ancestor.  Check every direct child, and one level deeper
+                // for cases like `impl_item` → `declaration_list` → `{ }`.
+                Self::collect_large_bracket_children(
+                    node,
+                    &self.text,
+                    BRACKET_PAIRS,
+                    depth,
+                    &mut seen,
+                    &mut result,
+                );
+
                 if !cursor.goto_parent() {
                     break;
                 }
             }
         }
 
-        (ranges, max_bytes)
+        result
+    }
+
+    fn collect_large_bracket_children(
+        node: tree_sitter::Node,
+        text: &text::BufferSnapshot,
+        bracket_pairs: &[(u8, u8)],
+        syntax_layer_depth: usize,
+        seen: &mut HashSet<(usize, usize)>,
+        result: &mut Vec<BracketMatch<usize>>,
+    ) {
+        for child_idx in 0..node.child_count() as u32 {
+            let Some(child) = node.child(child_idx) else {
+                continue;
+            };
+            let child_range = child.byte_range();
+            if child_range.len() <= MAX_BYTES_TO_QUERY {
+                continue;
+            }
+            if child.parent().is_none() {
+                continue;
+            }
+            if !seen.insert((child_range.start, child_range.end)) {
+                continue;
+            }
+            if let Some((open_range, close_range)) =
+                Self::find_bracket_children(child, text, bracket_pairs)
+            {
+                let nesting_depth =
+                    Self::count_bracket_ancestors_from_node(child, text, bracket_pairs);
+                result.push(BracketMatch {
+                    open_range,
+                    close_range,
+                    syntax_layer_depth,
+                    newline_only: false,
+                    color_index: Some(nesting_depth),
+                });
+            } else {
+                // Brackets may be one level deeper (e.g. `impl_item` has
+                // `declaration_list` as a child, which in turn holds `{ }`).
+                Self::collect_large_bracket_children(
+                    child,
+                    text,
+                    bracket_pairs,
+                    syntax_layer_depth,
+                    seen,
+                    result,
+                );
+            }
+        }
+    }
+
+    /// Check whether `node` has a first and last child that form a matching
+    /// bracket pair, returning their byte ranges if so.
+    fn find_bracket_children(
+        node: tree_sitter::Node,
+        text: &text::BufferSnapshot,
+        bracket_pairs: &[(u8, u8)],
+    ) -> Option<(Range<usize>, Range<usize>)> {
+        let child_count = node.child_count();
+        if child_count < 2 {
+            return None;
+        }
+        let first = node.child(0)?;
+        let last = node.child((child_count - 1) as u32)?;
+        if first.byte_range().len() != 1 || last.byte_range().len() != 1 {
+            return None;
+        }
+        let open_byte = *text
+            .as_rope()
+            .bytes_in_range(first.byte_range())
+            .next()?
+            .first()?;
+        let close_byte = *text
+            .as_rope()
+            .bytes_in_range(last.byte_range())
+            .next()?
+            .first()?;
+        bracket_pairs
+            .iter()
+            .any(|&(o, c)| o == open_byte && c == close_byte)
+            .then(|| (first.byte_range(), last.byte_range()))
+    }
+
+    /// Count how many ancestor nodes above `node` also carry bracket
+    /// children, giving the nesting depth for colorization.
+    fn count_bracket_ancestors_from_node(
+        node: tree_sitter::Node,
+        text: &text::BufferSnapshot,
+        bracket_pairs: &[(u8, u8)],
+    ) -> usize {
+        let mut count = 0;
+        let mut current = node;
+        while let Some(parent) = current.parent() {
+            if parent.parent().is_none() {
+                break;
+            }
+            if Self::find_bracket_children(parent, text, bracket_pairs).is_some() {
+                count += 1;
+            }
+            current = parent;
+        }
+        count
     }
 
     pub fn all_bracket_ranges(
         &self,
         range: Range<usize>,
     ) -> impl Iterator<Item = BracketMatch<usize>> {
+        let large_block_pairs = self.bracket_pairs_for_large_enclosing_blocks(&range);
         self.fetch_bracket_ranges(range.clone(), None)
             .into_values()
             .flatten()
+            .chain(large_block_pairs)
             .filter(move |bracket_match| {
                 let bracket_range = bracket_match.open_range.start..bracket_match.close_range.end;
-                bracket_range.overlaps(&range)
+                bracket_range.overlaps(&range) || bracket_range.contains_inclusive(&range)
             })
             .dedup_by(|a, b| a.open_range == b.open_range && a.close_range == b.close_range)
     }

crates/language/src/buffer_tests.rs 🔗

@@ -1425,6 +1425,138 @@ fn test_enclosing_bracket_ranges_large_block(cx: &mut App) {
     );
 }
 
+#[gpui::test]
+fn test_bracket_pairs_for_large_enclosing_blocks(cx: &mut App) {
+    use crate::syntax_map::MAX_BYTES_TO_QUERY;
+
+    // Build a source that looks like real code: uses, structs, and other
+    // items before a large impl block — similar to editor.rs.
+    let mut source = String::from(
+        "use std::collections::HashMap;\n\
+         use std::sync::Arc;\n\
+         \n\
+         pub struct Foo {\n\
+         \x20   field_a: i32,\n\
+         \x20   field_b: String,\n\
+         }\n\
+         \n\
+         pub struct Bar {\n\
+         \x20   items: Vec<Foo>,\n\
+         }\n\
+         \n",
+    );
+    let impl_start = source.len();
+    source.push_str("impl Foo {\n");
+    let fn_body = "        let x = 1;\n        let y = 2;\n        x + y\n";
+    let mut fn_count = 0;
+    while source.len() < MAX_BYTES_TO_QUERY + 1000 {
+        writeln!(
+            source,
+            "    fn func_{fn_count}() -> i32 {{\n{fn_body}    }}"
+        )
+        .unwrap();
+        fn_count += 1;
+    }
+    source.push_str("}\n");
+
+    let buffer = cx.new(|cx| Buffer::local(source.clone(), cx).with_language(rust_lang(), cx));
+    let snapshot = buffer.update(cx, |buffer, _cx| buffer.snapshot());
+
+    let open_brace = source[impl_start..].find('{').unwrap() + impl_start;
+    let close_brace = source.rfind('}').unwrap();
+
+    // Query from a viewport near the beginning of the impl block.
+    let viewport_start = open_brace + 1;
+    let viewport_end = (viewport_start + 500).min(source.len());
+    let pairs = snapshot.bracket_pairs_for_large_enclosing_blocks(&(viewport_start..viewport_end));
+    assert_eq!(
+        pairs.len(),
+        1,
+        "should find exactly one large enclosing bracket pair from top viewport"
+    );
+    assert_eq!(pairs[0].open_range, open_brace..open_brace + 1);
+    assert_eq!(pairs[0].close_range, close_brace..close_brace + 1);
+    assert_eq!(pairs[0].color_index, Some(0), "outermost block has depth 0");
+
+    // Query from a viewport in the middle of the impl block.
+    let middle = source.len() / 2;
+    let pairs = snapshot.bracket_pairs_for_large_enclosing_blocks(&(middle..middle + 500));
+    assert_eq!(
+        pairs.len(),
+        1,
+        "should find exactly one large enclosing bracket pair from middle viewport"
+    );
+    assert_eq!(pairs[0].open_range, open_brace..open_brace + 1);
+    assert_eq!(pairs[0].close_range, close_brace..close_brace + 1);
+
+    // Query from a viewport near the end of the impl block.
+    let near_end = close_brace.saturating_sub(200);
+    let pairs = snapshot.bracket_pairs_for_large_enclosing_blocks(&(near_end..close_brace + 1));
+    assert_eq!(
+        pairs.len(),
+        1,
+        "should find exactly one large enclosing bracket pair from bottom viewport"
+    );
+    assert_eq!(pairs[0].open_range, open_brace..open_brace + 1);
+    assert_eq!(pairs[0].close_range, close_brace..close_brace + 1);
+
+    // Viewport that extends past the closing brace should still find the pair
+    // (the viewport may include trailing content after `}`).
+    let pairs = snapshot.bracket_pairs_for_large_enclosing_blocks(&(near_end..source.len()));
+    assert_eq!(
+        pairs.len(),
+        1,
+        "should find the pair even when viewport extends past the block"
+    );
+    assert_eq!(pairs[0].open_range, open_brace..open_brace + 1);
+    assert_eq!(pairs[0].close_range, close_brace..close_brace + 1);
+}
+
+#[gpui::test]
+fn test_bracket_pairs_for_large_block_viewport_before_block(cx: &mut App) {
+    use crate::syntax_map::MAX_BYTES_TO_QUERY;
+
+    // Simulate a viewport that starts a few lines BEFORE `impl Foo {`,
+    // e.g. the user sees the closing `}` of the previous item and then
+    // `impl Foo {`.  `goto_first_child_for_byte(range.start)` descends
+    // into the previous item, so the `impl_item` node is only reachable
+    // as a sibling — and brackets live on its `declaration_list` child,
+    // one level deeper.
+    let preamble = "struct Bar {\n    field: i32,\n}\n\n";
+    let mut source = String::from(preamble);
+    let impl_start = source.len();
+    source.push_str("impl Foo {\n");
+    let fn_body = "        let x = 1;\n        let y = 2;\n        x + y\n";
+    let mut fn_count = 0;
+    while source.len() < impl_start + MAX_BYTES_TO_QUERY + 1000 {
+        writeln!(
+            source,
+            "    fn func_{fn_count}() -> i32 {{\n{fn_body}    }}"
+        )
+        .unwrap();
+        fn_count += 1;
+    }
+    source.push_str("}\n");
+
+    let buffer = cx.new(|cx| Buffer::local(source.clone(), cx).with_language(rust_lang(), cx));
+    let snapshot = buffer.update(cx, |buffer, _cx| buffer.snapshot());
+
+    let open_brace = source[impl_start..].find('{').unwrap() + impl_start;
+    let close_brace = source.rfind('}').unwrap();
+
+    // Viewport starts inside the preamble (before the impl block).
+    let viewport_start = preamble.len().saturating_sub(10);
+    let viewport_end = open_brace + 200;
+    let pairs = snapshot.bracket_pairs_for_large_enclosing_blocks(&(viewport_start..viewport_end));
+    assert_eq!(
+        pairs.len(),
+        1,
+        "should find the impl bracket pair even when viewport starts before the block"
+    );
+    assert_eq!(pairs[0].open_range, open_brace..open_brace + 1);
+    assert_eq!(pairs[0].close_range, close_brace..close_brace + 1);
+}
+
 #[gpui::test]
 fn test_enclosing_bracket_ranges_where_brackets_are_not_outermost_children(cx: &mut App) {
     let mut assert = |selection_text, bracket_pair_texts| {