diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 3770e4ccf13a53dc88c454f48c0fce3e386598fd..4eb017902c7349955ba6d55b89f23944b7575b7e 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -8,7 +8,7 @@ use crate::{ outline::OutlineItem, row_chunk::RowChunks, syntax_map::{ - MAX_BYTES_TO_QUERY, SyntaxLayer, SyntaxMap, SyntaxMapCapture, SyntaxMapCaptures, + MAX_CONTEXT_BYTES, SyntaxLayer, SyntaxMap, SyntaxMapCapture, SyntaxMapCaptures, SyntaxMapMatch, SyntaxMapMatches, SyntaxSnapshot, ToTreeSitterPoint, }, task_context::RunnableRange, @@ -3441,7 +3441,7 @@ impl BufferSnapshot { range.clone(), &self.text, TreeSitterOptions { - max_bytes_to_query: Some(MAX_BYTES_TO_QUERY), + max_context_bytes: Some(MAX_CONTEXT_BYTES), max_start_depth: None, }, |grammar| Some(&grammar.indents_config.as_ref()?.query), @@ -3722,12 +3722,20 @@ impl BufferSnapshot { #[ztracing::instrument(skip_all)] fn get_highlights(&self, range: Range) -> (SyntaxMapCaptures<'_>, Vec) { - let captures = self.syntax.captures(range, &self.text, |grammar| { - grammar - .highlights_config - .as_ref() - .map(|config| &config.query) - }); + let captures = self.syntax.captures_with_options( + range, + &self.text, + TreeSitterOptions { + max_context_bytes: Some(1024), + max_start_depth: None, + }, + |grammar| { + grammar + .highlights_config + .as_ref() + .map(|config| &config.query) + }, + ); let highlight_maps = captures .grammars() .iter() @@ -4620,7 +4628,7 @@ impl BufferSnapshot { chunk_range.clone(), &self.text, TreeSitterOptions { - max_bytes_to_query: Some(MAX_BYTES_TO_QUERY), + max_context_bytes: Some(MAX_CONTEXT_BYTES), max_start_depth: None, }, |grammar| grammar.brackets_config.as_ref().map(|c| &c.query), diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index b73276ffd92be8915e2272b5242770fc52854af1..503675b8211de5127fdde660fa379c7e48c3d2d1 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -24,7 +24,14 @@ use tree_sitter::{ QueryPredicateArg, }; -pub const MAX_BYTES_TO_QUERY: usize = 16 * 1024; +/// Default amount of byte context to allow on each side of the query range +/// when restricting a `QueryCursor` via `set_containing_byte_range`. +/// +/// Tree-sitter walks the subtree of the root node that's intersected by the +/// containing range, so keeping this bounded matters when a file contains a +/// large malformed region (e.g. an ERROR node that covers thousands of +/// lines). +pub const MAX_CONTEXT_BYTES: usize = 8 * 1024; pub struct SyntaxMap { snapshot: SyntaxSnapshot, @@ -954,6 +961,7 @@ impl SyntaxSnapshot { }] .into_iter(), query, + TreeSitterOptions::default(), ) } @@ -968,6 +976,23 @@ impl SyntaxSnapshot { buffer.as_rope(), self.layers_for_range(range, buffer, true), query, + TreeSitterOptions::default(), + ) + } + + pub fn captures_with_options<'a>( + &'a self, + range: Range, + buffer: &'a BufferSnapshot, + options: TreeSitterOptions, + query: fn(&Grammar) -> Option<&Query>, + ) -> SyntaxMapCaptures<'a> { + SyntaxMapCaptures::new( + range.clone(), + buffer.as_rope(), + self.layers_for_range(range, buffer, true), + query, + options, ) } @@ -1099,6 +1124,7 @@ impl<'a> SyntaxMapCaptures<'a> { text: &'a Rope, layers: impl Iterator>, query: fn(&Grammar) -> Option<&Query>, + options: TreeSitterOptions, ) -> Self { let mut result = Self { layers: Vec::new(), @@ -1124,6 +1150,12 @@ impl<'a> SyntaxMapCaptures<'a> { ) }; + // Force the query cursor to skip over nodes outside of a certain context + // range, to limit the worst-case performance of queries. + if let Some(max_context_bytes) = options.max_context_bytes { + cursor.set_containing_byte_range(containing_range(&range, max_context_bytes)); + } + cursor.set_byte_range(range.clone()); let captures = cursor.captures(query, layer.node(), TextProvider(text)); let grammar_index = result @@ -1219,18 +1251,26 @@ impl<'a> SyntaxMapCaptures<'a> { #[derive(Default)] pub struct TreeSitterOptions { pub max_start_depth: Option, - pub max_bytes_to_query: Option, + /// When `Some(n)`, restricts the query cursor's containing byte range to + /// the query range extended by `n` bytes on each side. Matches whose nodes + /// don't all fall within that extended range are skipped, allowing + /// tree-sitter to avoid walking large subtrees that lie outside it. + pub max_context_bytes: Option, } impl TreeSitterOptions { pub fn max_start_depth(max_start_depth: u32) -> Self { Self { max_start_depth: Some(max_start_depth), - max_bytes_to_query: None, + max_context_bytes: None, } } } +fn containing_range(range: &Range, max_context_bytes: usize) -> Range { + range.start.saturating_sub(max_context_bytes)..range.end.saturating_add(max_context_bytes) +} + impl<'a> SyntaxMapMatches<'a> { fn new( range: Range, @@ -1260,12 +1300,8 @@ impl<'a> SyntaxMapMatches<'a> { }; cursor.set_max_start_depth(options.max_start_depth); - if let Some(max_bytes_to_query) = options.max_bytes_to_query { - let midpoint = (range.start + range.end) / 2; - let containing_range_start = midpoint.saturating_sub(max_bytes_to_query / 2); - let containing_range_end = - containing_range_start.saturating_add(max_bytes_to_query); - cursor.set_containing_byte_range(containing_range_start..containing_range_end); + if let Some(max_context_bytes) = options.max_context_bytes { + cursor.set_containing_byte_range(containing_range(&range, max_context_bytes)); } cursor.set_byte_range(range.clone()); @@ -1857,11 +1893,9 @@ impl<'a> SyntaxLayer<'a> { let config = self.language.grammar.as_ref()?.override_config.as_ref()?; let mut query_cursor = QueryCursorHandle::new(); - query_cursor.set_byte_range(offset.saturating_sub(1)..offset.saturating_add(1)); - query_cursor.set_containing_byte_range( - offset.saturating_sub(MAX_BYTES_TO_QUERY / 2) - ..offset.saturating_add(MAX_BYTES_TO_QUERY / 2), - ); + let range = offset.saturating_sub(1)..offset.saturating_add(1); + query_cursor.set_byte_range(range.clone()); + query_cursor.set_containing_byte_range(containing_range(&range, MAX_CONTEXT_BYTES)); let mut smallest_match: Option<(u32, Range)> = None; let mut matches = query_cursor.matches(&config.query, self.node(), text);