Fix enclosing brackets not being found for overly large ranges

Kirill Bulatov created

Change summary

crates/language/src/buffer.rs       | 65 ++++++++++++++++++++++++++++++
crates/language/src/buffer_tests.rs | 56 ++++++++++++++++++++++++++
2 files changed, 119 insertions(+), 2 deletions(-)

Detailed changes

crates/language/src/buffer.rs 🔗

@@ -33,6 +33,7 @@ use gpui::{
     Task, TextStyle,
 };
 
+use itertools::Itertools;
 use lsp::{LanguageServerId, NumberOrString};
 use parking_lot::Mutex;
 use serde::{Deserialize, Serialize};
@@ -4592,10 +4593,16 @@ impl BufferSnapshot {
     ) -> HashMap<Range<BufferRow>, Vec<BracketMatch<usize>>> {
         let mut all_bracket_matches = HashMap::default();
 
+        let (query_ranges, max_bytes_to_query) = self.extend_range_for_enclosing_brackets(&range);
+        let point_ranges = query_ranges
+            .iter()
+            .map(|r| r.to_point(self))
+            .collect::<Vec<_>>();
+
         for chunk in self
             .tree_sitter_data
             .chunks
-            .applicable_chunks(&[range.to_point(self)])
+            .applicable_chunks(&point_ranges)
         {
             if known_chunks.is_some_and(|chunks| chunks.contains(&chunk.row_range())) {
                 continue;
@@ -4618,7 +4625,7 @@ impl BufferSnapshot {
                 chunk_range.clone(),
                 &self.text,
                 TreeSitterOptions {
-                    max_bytes_to_query: Some(MAX_BYTES_TO_QUERY),
+                    max_bytes_to_query: Some(max_bytes_to_query),
                     max_start_depth: None,
                 },
                 |grammar| grammar.brackets_config.as_ref().map(|c| &c.query),
@@ -4816,6 +4823,59 @@ impl BufferSnapshot {
         all_bracket_matches
     }
 
+    /// Walk the syntax tree upward from `range` and return a set of byte
+    /// ranges to query (plus the `max_bytes_to_query` limit) for bracket
+    /// matching.
+    ///
+    /// When the cursor sits inside a block whose byte extent exceeds
+    /// `MAX_BYTES_TO_QUERY`, the default containing-byte-range causes
+    /// tree-sitter's query cursor to skip its bracket children.  Rather than
+    /// expanding to the entire block (which would pull in every intermediate
+    /// chunk — catastrophic for huge files), we add small windows around the
+    /// block's start and end where bracket tokens actually live.
+    fn extend_range_for_enclosing_brackets(
+        &self,
+        range: &Range<usize>,
+    ) -> (Vec<Range<usize>>, usize) {
+        let mut ranges = vec![range.clone()];
+        let mut max_bytes = MAX_BYTES_TO_QUERY;
+
+        for layer in self
+            .syntax
+            .layers_for_range(range.clone(), &self.text, true)
+        {
+            let mut cursor = layer.node().walk();
+            if !Self::goto_node_enclosing_range(&mut cursor, range, false) {
+                continue;
+            }
+            loop {
+                let node = cursor.node();
+                let node_range = node.byte_range();
+                // Skip the syntax-layer root — it spans the whole document
+                // and never carries brackets itself.
+                if node_range.len() > max_bytes && node.parent().is_some() {
+                    let window = MAX_BYTES_TO_QUERY;
+                    ranges.push(
+                        node_range.start
+                            ..node_range.start.saturating_add(window).min(node_range.end),
+                    );
+                    ranges.push(
+                        node_range.end.saturating_sub(window).max(node_range.start)..node_range.end,
+                    );
+                    // The containing byte range is centered on each chunk's
+                    // midpoint, so we need 2× the block span to guarantee
+                    // every boundary chunk's window covers both brackets.
+                    max_bytes = max_bytes.max(node_range.len().saturating_mul(2));
+                }
+                if !cursor.goto_parent() {
+                    break;
+                }
+            }
+        }
+
+        (ranges, max_bytes)
+    }
+
     pub fn all_bracket_ranges(
         &self,
         range: Range<usize>,
@@ -4827,6 +4887,7 @@ impl BufferSnapshot {
                 let bracket_range = bracket_match.open_range.start..bracket_match.close_range.end;
                 bracket_range.overlaps(&range)
             })
+            .dedup_by(|a, b| a.open_range == b.open_range && a.close_range == b.close_range)
     }
 
     /// Returns bracket range pairs overlapping or adjacent to `range`

crates/language/src/buffer_tests.rs 🔗

@@ -13,6 +13,7 @@ use regex::RegexBuilder;
 use settings::SettingsStore;
 use settings::{AllLanguageSettingsContent, LanguageSettingsContent};
 use std::collections::BTreeSet;
+use std::fmt::Write as _;
 use std::{
     env,
     ops::Range,
@@ -1369,6 +1370,61 @@ fn test_enclosing_bracket_ranges(cx: &mut App) {
     );
 }
 
+#[gpui::test]
+fn test_enclosing_bracket_ranges_large_block(cx: &mut App) {
+    // Build a buffer with an impl block large enough that the distance between
+    // `{` and `}` exceeds MAX_BYTES_TO_QUERY (16 KB). Each comment line is
+    // ~24 bytes, so ~700 lines push us past the limit.
+    let comment_line_count = 1000;
+    let mut source = String::from("impl Foo {\n");
+    for i in 0..comment_line_count {
+        writeln!(source, "    // line {i:04}  padding").unwrap();
+    }
+    source.push_str("}\n");
+
+    let buffer = cx.new(|cx| Buffer::local(source.clone(), cx).with_language(rust_lang(), cx));
+    let snapshot = buffer.update(cx, |buffer, _cx| buffer.snapshot());
+
+    let open_brace = source.find('{').unwrap();
+    let close_brace = source.rfind('}').unwrap();
+
+    // Cursor right after the opening brace — should find the enclosing pair.
+    let cursor = open_brace + 1;
+    let pairs = snapshot
+        .enclosing_bracket_ranges(cursor..cursor)
+        .map(|pair| (pair.open_range, pair.close_range))
+        .collect::<Vec<_>>();
+    assert_eq!(
+        pairs,
+        vec![(open_brace..open_brace + 1, close_brace..close_brace + 1)],
+        "enclosing_bracket_ranges should find the bracket pair even when \
+         open and close are in different row-chunks"
+    );
+
+    // Cursor at the opening brace itself.
+    let pairs = snapshot
+        .enclosing_bracket_ranges(open_brace..open_brace)
+        .map(|pair| (pair.open_range, pair.close_range))
+        .collect::<Vec<_>>();
+    assert_eq!(
+        pairs,
+        vec![(open_brace..open_brace + 1, close_brace..close_brace + 1)],
+        "cursor at the opening brace should also find the pair"
+    );
+
+    // Cursor somewhere in the middle of the block.
+    let middle = source.len() / 2;
+    let pairs = snapshot
+        .enclosing_bracket_ranges(middle..middle)
+        .map(|pair| (pair.open_range, pair.close_range))
+        .collect::<Vec<_>>();
+    assert_eq!(
+        pairs,
+        vec![(open_brace..open_brace + 1, close_brace..close_brace + 1)],
+        "cursor in the middle of a large block should find the enclosing pair"
+    );
+}
+
 #[gpui::test]
 fn test_enclosing_bracket_ranges_where_brackets_are_not_outermost_children(cx: &mut App) {
     let mut assert = |selection_text, bracket_pair_texts| {