From 6cdf954e2ce0e1a6b83ce116b81258d3b512adf1 Mon Sep 17 00:00:00 2001 From: Om Chillure Date: Wed, 22 Apr 2026 05:23:49 +0530 Subject: [PATCH] language: Fix slow Tree-sitter parsing (#52674) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #### Context When tree-sitter parses a file with broken syntax (e.g. a large partial SQL `VALUES` clause, or any language where a large chunk becomes invalid), it can produce a single `ERROR` node spanning thousands of lines. On every render frame, Zed queries this tree for syntax highlights via `SyntaxMapCaptures`. Previously, only `set_byte_range` was applied to the query cursor - this limits which captures are *returned*, but tree-sitter still had to *traverse* the entire ERROR subtree to find them, causing O(file size) work per frame and making scrolling/editing visibly laggy. The fix applies `set_containing_byte_range` to the highlight query cursor, mirroring what `SyntaxMapMatches` already does for indentation and bracket queries. This tells tree-sitter to skip subtrees that extend far beyond the visible window, reducing traversal to the visible range only. **Note:** This fix eliminates the main freeze/stall caused by full-tree traversal. A small amount of lag may still occur on very large broken files, as tree-sitter still needs to parse the error-recovery structure. Further improvements would require deeper changes to tree-sitter's query execution or incremental parsing. #### Closes #52390 #### How to Review Small change — focus on [syntax_map.rs:1119-1123](crates/language/src/syntax_map.rs#L1119) (the fix) and the `containing_byte_range_for_captures` helper below it. Compare with the existing `SyntaxMapMatches::new` path (line ~1255) which uses the same pattern. #### Self-Review Checklist - [x] I've reviewed my own diff for quality, security, and reliability - [x] Unsafe blocks (if any) have justifying comments - [x] The content is consistent with the [UI/UX checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist) - [x] Tests cover the new/changed behavior - [x] Performance impact has been considered and is acceptable #### Video [Screencast from 2026-03-26 14-19-19.webm](https://github.com/user-attachments/assets/6628492a-f013-438a-836a-2740f6e2f266) #### Note : Reopens previous work from closed PR #52475 (fork was deleted) Release Notes: - Fixed laggy scrolling and editing in files with large broken syntax regions (e.g. incomplete SQL `VALUES` clauses or large invalid blocks in any language) --------- Co-authored-by: Max Brunsfeld --- crates/language/src/buffer.rs | 26 ++++++++----- crates/language/src/syntax_map.rs | 62 ++++++++++++++++++++++++------- 2 files changed, 65 insertions(+), 23 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 3770e4ccf13a53dc88c454f48c0fce3e386598fd..4eb017902c7349955ba6d55b89f23944b7575b7e 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -8,7 +8,7 @@ use crate::{ outline::OutlineItem, row_chunk::RowChunks, syntax_map::{ - MAX_BYTES_TO_QUERY, SyntaxLayer, SyntaxMap, SyntaxMapCapture, SyntaxMapCaptures, + MAX_CONTEXT_BYTES, SyntaxLayer, SyntaxMap, SyntaxMapCapture, SyntaxMapCaptures, SyntaxMapMatch, SyntaxMapMatches, SyntaxSnapshot, ToTreeSitterPoint, }, task_context::RunnableRange, @@ -3441,7 +3441,7 @@ impl BufferSnapshot { range.clone(), &self.text, TreeSitterOptions { - max_bytes_to_query: Some(MAX_BYTES_TO_QUERY), + max_context_bytes: Some(MAX_CONTEXT_BYTES), max_start_depth: None, }, |grammar| Some(&grammar.indents_config.as_ref()?.query), @@ -3722,12 +3722,20 @@ impl BufferSnapshot { #[ztracing::instrument(skip_all)] fn get_highlights(&self, range: Range) -> (SyntaxMapCaptures<'_>, Vec) { - let captures = self.syntax.captures(range, &self.text, |grammar| { - grammar - .highlights_config - .as_ref() - .map(|config| &config.query) - }); + let captures = self.syntax.captures_with_options( + range, + &self.text, + TreeSitterOptions { + max_context_bytes: Some(1024), + max_start_depth: None, + }, + |grammar| { + grammar + .highlights_config + .as_ref() + .map(|config| &config.query) + }, + ); let highlight_maps = captures .grammars() .iter() @@ -4620,7 +4628,7 @@ impl BufferSnapshot { chunk_range.clone(), &self.text, TreeSitterOptions { - max_bytes_to_query: Some(MAX_BYTES_TO_QUERY), + max_context_bytes: Some(MAX_CONTEXT_BYTES), max_start_depth: None, }, |grammar| grammar.brackets_config.as_ref().map(|c| &c.query), diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index b73276ffd92be8915e2272b5242770fc52854af1..503675b8211de5127fdde660fa379c7e48c3d2d1 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -24,7 +24,14 @@ use tree_sitter::{ QueryPredicateArg, }; -pub const MAX_BYTES_TO_QUERY: usize = 16 * 1024; +/// Default amount of byte context to allow on each side of the query range +/// when restricting a `QueryCursor` via `set_containing_byte_range`. +/// +/// Tree-sitter walks the subtree of the root node that's intersected by the +/// containing range, so keeping this bounded matters when a file contains a +/// large malformed region (e.g. an ERROR node that covers thousands of +/// lines). +pub const MAX_CONTEXT_BYTES: usize = 8 * 1024; pub struct SyntaxMap { snapshot: SyntaxSnapshot, @@ -954,6 +961,7 @@ impl SyntaxSnapshot { }] .into_iter(), query, + TreeSitterOptions::default(), ) } @@ -968,6 +976,23 @@ impl SyntaxSnapshot { buffer.as_rope(), self.layers_for_range(range, buffer, true), query, + TreeSitterOptions::default(), + ) + } + + pub fn captures_with_options<'a>( + &'a self, + range: Range, + buffer: &'a BufferSnapshot, + options: TreeSitterOptions, + query: fn(&Grammar) -> Option<&Query>, + ) -> SyntaxMapCaptures<'a> { + SyntaxMapCaptures::new( + range.clone(), + buffer.as_rope(), + self.layers_for_range(range, buffer, true), + query, + options, ) } @@ -1099,6 +1124,7 @@ impl<'a> SyntaxMapCaptures<'a> { text: &'a Rope, layers: impl Iterator>, query: fn(&Grammar) -> Option<&Query>, + options: TreeSitterOptions, ) -> Self { let mut result = Self { layers: Vec::new(), @@ -1124,6 +1150,12 @@ impl<'a> SyntaxMapCaptures<'a> { ) }; + // Force the query cursor to skip over nodes outside of a certain context + // range, to limit the worst-case performance of queries. + if let Some(max_context_bytes) = options.max_context_bytes { + cursor.set_containing_byte_range(containing_range(&range, max_context_bytes)); + } + cursor.set_byte_range(range.clone()); let captures = cursor.captures(query, layer.node(), TextProvider(text)); let grammar_index = result @@ -1219,18 +1251,26 @@ impl<'a> SyntaxMapCaptures<'a> { #[derive(Default)] pub struct TreeSitterOptions { pub max_start_depth: Option, - pub max_bytes_to_query: Option, + /// When `Some(n)`, restricts the query cursor's containing byte range to + /// the query range extended by `n` bytes on each side. Matches whose nodes + /// don't all fall within that extended range are skipped, allowing + /// tree-sitter to avoid walking large subtrees that lie outside it. + pub max_context_bytes: Option, } impl TreeSitterOptions { pub fn max_start_depth(max_start_depth: u32) -> Self { Self { max_start_depth: Some(max_start_depth), - max_bytes_to_query: None, + max_context_bytes: None, } } } +fn containing_range(range: &Range, max_context_bytes: usize) -> Range { + range.start.saturating_sub(max_context_bytes)..range.end.saturating_add(max_context_bytes) +} + impl<'a> SyntaxMapMatches<'a> { fn new( range: Range, @@ -1260,12 +1300,8 @@ impl<'a> SyntaxMapMatches<'a> { }; cursor.set_max_start_depth(options.max_start_depth); - if let Some(max_bytes_to_query) = options.max_bytes_to_query { - let midpoint = (range.start + range.end) / 2; - let containing_range_start = midpoint.saturating_sub(max_bytes_to_query / 2); - let containing_range_end = - containing_range_start.saturating_add(max_bytes_to_query); - cursor.set_containing_byte_range(containing_range_start..containing_range_end); + if let Some(max_context_bytes) = options.max_context_bytes { + cursor.set_containing_byte_range(containing_range(&range, max_context_bytes)); } cursor.set_byte_range(range.clone()); @@ -1857,11 +1893,9 @@ impl<'a> SyntaxLayer<'a> { let config = self.language.grammar.as_ref()?.override_config.as_ref()?; let mut query_cursor = QueryCursorHandle::new(); - query_cursor.set_byte_range(offset.saturating_sub(1)..offset.saturating_add(1)); - query_cursor.set_containing_byte_range( - offset.saturating_sub(MAX_BYTES_TO_QUERY / 2) - ..offset.saturating_add(MAX_BYTES_TO_QUERY / 2), - ); + let range = offset.saturating_sub(1)..offset.saturating_add(1); + query_cursor.set_byte_range(range.clone()); + query_cursor.set_containing_byte_range(containing_range(&range, MAX_CONTEXT_BYTES)); let mut smallest_match: Option<(u32, Range)> = None; let mut matches = query_cursor.matches(&config.query, self.node(), text);