language: Fix slow Tree-sitter parsing (#52674)

Om Chillure and Max Brunsfeld created

#### Context

When tree-sitter parses a file with broken syntax (e.g. a large partial
SQL `VALUES` clause, or any language where a large chunk becomes
invalid), it can produce a single `ERROR` node spanning thousands of
lines. On every render frame, Zed queries this tree for syntax
highlights via `SyntaxMapCaptures`. Previously, only `set_byte_range`
was applied to the query cursor - this limits which captures are
*returned*, but tree-sitter still had to *traverse* the entire ERROR
subtree to find them, causing O(file size) work per frame and making
scrolling/editing visibly laggy.

The fix applies `set_containing_byte_range` to the highlight query
cursor, mirroring what `SyntaxMapMatches` already does for indentation
and bracket queries. This tells tree-sitter to skip subtrees that extend
far beyond the visible window, reducing traversal to the visible range
only.

**Note:** This fix eliminates the main freeze/stall caused by full-tree
traversal. A small amount of lag may still occur on very large broken
files, as tree-sitter still needs to parse the error-recovery structure.
Further improvements would require deeper changes to tree-sitter's query
execution or incremental parsing.


#### Closes #52390

#### How to Review

Small change — focus on
[syntax_map.rs:1119-1123](crates/language/src/syntax_map.rs#L1119) (the
fix) and the `containing_byte_range_for_captures` helper below it.
Compare with the existing `SyntaxMapMatches::new` path (line ~1255)
which uses the same pattern.

#### Self-Review Checklist

- [x] I've reviewed my own diff for quality, security, and reliability
- [x] Unsafe blocks (if any) have justifying comments
- [x] The content is consistent with the [UI/UX
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)
- [x] Tests cover the new/changed behavior
- [x] Performance impact has been considered and is acceptable

#### Video 
[Screencast from 2026-03-26
14-19-19.webm](https://github.com/user-attachments/assets/6628492a-f013-438a-836a-2740f6e2f266)


#### Note : Reopens previous work from closed PR #52475 (fork was
deleted)

Release Notes:

- Fixed laggy scrolling and editing in files with large broken syntax
regions (e.g. incomplete SQL `VALUES` clauses or large invalid blocks in
any language)

---------

Co-authored-by: Max Brunsfeld <maxbrunsfeld@gmail.com>

Change summary

crates/language/src/buffer.rs     | 26 +++++++++----
crates/language/src/syntax_map.rs | 62 +++++++++++++++++++++++++-------
2 files changed, 65 insertions(+), 23 deletions(-)

Detailed changes

crates/language/src/buffer.rs 🔗

@@ -8,7 +8,7 @@ use crate::{
     outline::OutlineItem,
     row_chunk::RowChunks,
     syntax_map::{
-        MAX_BYTES_TO_QUERY, SyntaxLayer, SyntaxMap, SyntaxMapCapture, SyntaxMapCaptures,
+        MAX_CONTEXT_BYTES, SyntaxLayer, SyntaxMap, SyntaxMapCapture, SyntaxMapCaptures,
         SyntaxMapMatch, SyntaxMapMatches, SyntaxSnapshot, ToTreeSitterPoint,
     },
     task_context::RunnableRange,
@@ -3441,7 +3441,7 @@ impl BufferSnapshot {
             range.clone(),
             &self.text,
             TreeSitterOptions {
-                max_bytes_to_query: Some(MAX_BYTES_TO_QUERY),
+                max_context_bytes: Some(MAX_CONTEXT_BYTES),
                 max_start_depth: None,
             },
             |grammar| Some(&grammar.indents_config.as_ref()?.query),
@@ -3722,12 +3722,20 @@ impl BufferSnapshot {
 
     #[ztracing::instrument(skip_all)]
     fn get_highlights(&self, range: Range<usize>) -> (SyntaxMapCaptures<'_>, Vec<HighlightMap>) {
-        let captures = self.syntax.captures(range, &self.text, |grammar| {
-            grammar
-                .highlights_config
-                .as_ref()
-                .map(|config| &config.query)
-        });
+        let captures = self.syntax.captures_with_options(
+            range,
+            &self.text,
+            TreeSitterOptions {
+                max_context_bytes: Some(1024),
+                max_start_depth: None,
+            },
+            |grammar| {
+                grammar
+                    .highlights_config
+                    .as_ref()
+                    .map(|config| &config.query)
+            },
+        );
         let highlight_maps = captures
             .grammars()
             .iter()
@@ -4620,7 +4628,7 @@ impl BufferSnapshot {
                 chunk_range.clone(),
                 &self.text,
                 TreeSitterOptions {
-                    max_bytes_to_query: Some(MAX_BYTES_TO_QUERY),
+                    max_context_bytes: Some(MAX_CONTEXT_BYTES),
                     max_start_depth: None,
                 },
                 |grammar| grammar.brackets_config.as_ref().map(|c| &c.query),

crates/language/src/syntax_map.rs 🔗

@@ -24,7 +24,14 @@ use tree_sitter::{
     QueryPredicateArg,
 };
 
-pub const MAX_BYTES_TO_QUERY: usize = 16 * 1024;
+/// Default amount of byte context to allow on each side of the query range
+/// when restricting a `QueryCursor` via `set_containing_byte_range`.
+///
+/// Tree-sitter walks the subtree of the root node that's intersected by the
+/// containing range, so keeping this bounded matters when a file contains a
+/// large malformed region (e.g. an ERROR node that covers thousands of
+/// lines).
+pub const MAX_CONTEXT_BYTES: usize = 8 * 1024;
 
 pub struct SyntaxMap {
     snapshot: SyntaxSnapshot,
@@ -954,6 +961,7 @@ impl SyntaxSnapshot {
             }]
             .into_iter(),
             query,
+            TreeSitterOptions::default(),
         )
     }
 
@@ -968,6 +976,23 @@ impl SyntaxSnapshot {
             buffer.as_rope(),
             self.layers_for_range(range, buffer, true),
             query,
+            TreeSitterOptions::default(),
+        )
+    }
+
+    pub fn captures_with_options<'a>(
+        &'a self,
+        range: Range<usize>,
+        buffer: &'a BufferSnapshot,
+        options: TreeSitterOptions,
+        query: fn(&Grammar) -> Option<&Query>,
+    ) -> SyntaxMapCaptures<'a> {
+        SyntaxMapCaptures::new(
+            range.clone(),
+            buffer.as_rope(),
+            self.layers_for_range(range, buffer, true),
+            query,
+            options,
         )
     }
 
@@ -1099,6 +1124,7 @@ impl<'a> SyntaxMapCaptures<'a> {
         text: &'a Rope,
         layers: impl Iterator<Item = SyntaxLayer<'a>>,
         query: fn(&Grammar) -> Option<&Query>,
+        options: TreeSitterOptions,
     ) -> Self {
         let mut result = Self {
             layers: Vec::new(),
@@ -1124,6 +1150,12 @@ impl<'a> SyntaxMapCaptures<'a> {
                 )
             };
 
+            // Force the query cursor to skip over nodes outside of a certain context
+            // range, to limit the worst-case performance of queries.
+            if let Some(max_context_bytes) = options.max_context_bytes {
+                cursor.set_containing_byte_range(containing_range(&range, max_context_bytes));
+            }
+
             cursor.set_byte_range(range.clone());
             let captures = cursor.captures(query, layer.node(), TextProvider(text));
             let grammar_index = result
@@ -1219,18 +1251,26 @@ impl<'a> SyntaxMapCaptures<'a> {
 #[derive(Default)]
 pub struct TreeSitterOptions {
     pub max_start_depth: Option<u32>,
-    pub max_bytes_to_query: Option<usize>,
+    /// When `Some(n)`, restricts the query cursor's containing byte range to
+    /// the query range extended by `n` bytes on each side. Matches whose nodes
+    /// don't all fall within that extended range are skipped, allowing
+    /// tree-sitter to avoid walking large subtrees that lie outside it.
+    pub max_context_bytes: Option<usize>,
 }
 
 impl TreeSitterOptions {
     pub fn max_start_depth(max_start_depth: u32) -> Self {
         Self {
             max_start_depth: Some(max_start_depth),
-            max_bytes_to_query: None,
+            max_context_bytes: None,
         }
     }
 }
 
+fn containing_range(range: &Range<usize>, max_context_bytes: usize) -> Range<usize> {
+    range.start.saturating_sub(max_context_bytes)..range.end.saturating_add(max_context_bytes)
+}
+
 impl<'a> SyntaxMapMatches<'a> {
     fn new(
         range: Range<usize>,
@@ -1260,12 +1300,8 @@ impl<'a> SyntaxMapMatches<'a> {
             };
             cursor.set_max_start_depth(options.max_start_depth);
 
-            if let Some(max_bytes_to_query) = options.max_bytes_to_query {
-                let midpoint = (range.start + range.end) / 2;
-                let containing_range_start = midpoint.saturating_sub(max_bytes_to_query / 2);
-                let containing_range_end =
-                    containing_range_start.saturating_add(max_bytes_to_query);
-                cursor.set_containing_byte_range(containing_range_start..containing_range_end);
+            if let Some(max_context_bytes) = options.max_context_bytes {
+                cursor.set_containing_byte_range(containing_range(&range, max_context_bytes));
             }
 
             cursor.set_byte_range(range.clone());
@@ -1857,11 +1893,9 @@ impl<'a> SyntaxLayer<'a> {
         let config = self.language.grammar.as_ref()?.override_config.as_ref()?;
 
         let mut query_cursor = QueryCursorHandle::new();
-        query_cursor.set_byte_range(offset.saturating_sub(1)..offset.saturating_add(1));
-        query_cursor.set_containing_byte_range(
-            offset.saturating_sub(MAX_BYTES_TO_QUERY / 2)
-                ..offset.saturating_add(MAX_BYTES_TO_QUERY / 2),
-        );
+        let range = offset.saturating_sub(1)..offset.saturating_add(1);
+        query_cursor.set_byte_range(range.clone());
+        query_cursor.set_containing_byte_range(containing_range(&range, MAX_CONTEXT_BYTES));
 
         let mut smallest_match: Option<(u32, Range<usize>)> = None;
         let mut matches = query_cursor.matches(&config.query, self.node(), text);