Fix slow tree-sitter query execution by limiting the range that queries search (#39416)

Max Brunsfeld and Kirill Bulatov created

Part of https://github.com/zed-industries/zed/issues/39594
Closes https://github.com/zed-industries/zed/issues/4701
Closes https://github.com/zed-industries/zed/issues/42861
Closes https://github.com/zed-industries/zed/issues/44503
~Depends on https://github.com/tree-sitter/tree-sitter/pull/4919~

Release Notes:

- Fixed some performance bottlenecks related to syntax analysis when
editing very large files

---------

Co-authored-by: Kirill Bulatov <kirill@zed.dev>

Change summary

crates/language/src/buffer.rs     | 30 ++++++++++++++++++++----------
crates/language/src/syntax_map.rs | 21 ++++++++++++++++++++-
2 files changed, 40 insertions(+), 11 deletions(-)

Detailed changes

crates/language/src/buffer.rs 🔗

@@ -8,8 +8,8 @@ use crate::{
     outline::OutlineItem,
     row_chunk::RowChunks,
     syntax_map::{
-        SyntaxLayer, SyntaxMap, SyntaxMapCapture, SyntaxMapCaptures, SyntaxMapMatch,
-        SyntaxMapMatches, SyntaxSnapshot, ToTreeSitterPoint,
+        MAX_BYTES_TO_QUERY, SyntaxLayer, SyntaxMap, SyntaxMapCapture, SyntaxMapCaptures,
+        SyntaxMapMatch, SyntaxMapMatches, SyntaxSnapshot, ToTreeSitterPoint,
     },
     task_context::RunnableRange,
     text_diff::text_diff,
@@ -3222,9 +3222,15 @@ impl BufferSnapshot {
         let start = Point::new(prev_non_blank_row.unwrap_or(row_range.start), 0);
         let end = Point::new(row_range.end, 0);
         let range = (start..end).to_offset(&self.text);
-        let mut matches = self.syntax.matches(range.clone(), &self.text, |grammar| {
-            Some(&grammar.indents_config.as_ref()?.query)
-        });
+        let mut matches = self.syntax.matches_with_options(
+            range.clone(),
+            &self.text,
+            TreeSitterOptions {
+                max_bytes_to_query: Some(MAX_BYTES_TO_QUERY),
+                max_start_depth: None,
+            },
+            |grammar| Some(&grammar.indents_config.as_ref()?.query),
+        );
         let indent_configs = matches
             .grammars()
             .iter()
@@ -4335,11 +4341,15 @@ impl BufferSnapshot {
             let mut opens = Vec::new();
             let mut color_pairs = Vec::new();
 
-            let mut matches = self
-                .syntax
-                .matches(chunk_range.clone(), &self.text, |grammar| {
-                    grammar.brackets_config.as_ref().map(|c| &c.query)
-                });
+            let mut matches = self.syntax.matches_with_options(
+                chunk_range.clone(),
+                &self.text,
+                TreeSitterOptions {
+                    max_bytes_to_query: Some(MAX_BYTES_TO_QUERY),
+                    max_start_depth: None,
+                },
+                |grammar| grammar.brackets_config.as_ref().map(|c| &c.query),
+            );
             let configs = matches
                 .grammars()
                 .iter()

crates/language/src/syntax_map.rs 🔗

@@ -21,6 +21,8 @@ use sum_tree::{Bias, Dimensions, SeekTarget, SumTree};
 use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint};
 use tree_sitter::{Node, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree};
 
+pub const MAX_BYTES_TO_QUERY: usize = 16 * 1024;
+
 pub struct SyntaxMap {
     snapshot: SyntaxSnapshot,
     language_registry: Option<Arc<LanguageRegistry>>,
@@ -1096,12 +1098,15 @@ impl<'a> SyntaxMapCaptures<'a> {
 
 #[derive(Default)]
 pub struct TreeSitterOptions {
-    max_start_depth: Option<u32>,
+    pub max_start_depth: Option<u32>,
+    pub max_bytes_to_query: Option<usize>,
 }
+
 impl TreeSitterOptions {
     pub fn max_start_depth(max_start_depth: u32) -> Self {
         Self {
             max_start_depth: Some(max_start_depth),
+            max_bytes_to_query: None,
         }
     }
 }
@@ -1135,6 +1140,14 @@ impl<'a> SyntaxMapMatches<'a> {
             };
             cursor.set_max_start_depth(options.max_start_depth);
 
+            if let Some(max_bytes_to_query) = options.max_bytes_to_query {
+                let midpoint = (range.start + range.end) / 2;
+                let containing_range_start = midpoint.saturating_sub(max_bytes_to_query / 2);
+                let containing_range_end =
+                    containing_range_start.saturating_add(max_bytes_to_query);
+                cursor.set_containing_byte_range(containing_range_start..containing_range_end);
+            }
+
             cursor.set_byte_range(range.clone());
             let matches = cursor.matches(query, layer.node(), TextProvider(text));
             let grammar_index = result
@@ -1642,6 +1655,10 @@ impl<'a> SyntaxLayer<'a> {
 
         let mut query_cursor = QueryCursorHandle::new();
         query_cursor.set_byte_range(offset.saturating_sub(1)..offset.saturating_add(1));
+        query_cursor.set_containing_byte_range(
+            offset.saturating_sub(MAX_BYTES_TO_QUERY / 2)
+                ..offset.saturating_add(MAX_BYTES_TO_QUERY / 2),
+        );
 
         let mut smallest_match: Option<(u32, Range<usize>)> = None;
         let mut matches = query_cursor.matches(&config.query, self.node(), text);
@@ -1928,6 +1945,8 @@ impl Drop for QueryCursorHandle {
         let mut cursor = self.0.take().unwrap();
         cursor.set_byte_range(0..usize::MAX);
         cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point());
+        cursor.set_containing_byte_range(0..usize::MAX);
+        cursor.set_containing_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point());
         QUERY_CURSORS.lock().push(cursor)
     }
 }