Use SyntaxMap in Buffer

Max Brunsfeld created

Change summary

crates/language/src/buffer.rs                | 614 +++++++++------------
crates/language/src/language.rs              | 144 ++++
crates/language/src/syntax_map.rs            | 571 ++++++++++++++-----
crates/language/src/tests.rs                 |   4 
crates/project/src/project.rs                |   1 
crates/zed/src/languages/rust/injections.scm |   6 
6 files changed, 819 insertions(+), 521 deletions(-)

Detailed changes

crates/language/src/buffer.rs 🔗

@@ -6,13 +6,15 @@ pub use crate::{
 use crate::{
     diagnostic_set::{DiagnosticEntry, DiagnosticGroup},
     outline::OutlineItem,
+    syntax_map::{
+        SyntaxMap, SyntaxMapCapture, SyntaxMapCaptures, SyntaxSnapshot, ToTreeSitterPoint,
+    },
     CodeLabel, Outline,
 };
 use anyhow::{anyhow, Result};
 use clock::ReplicaId;
 use futures::FutureExt as _;
 use gpui::{fonts::HighlightStyle, AppContext, Entity, ModelContext, MutableAppContext, Task};
-use lazy_static::lazy_static;
 use parking_lot::Mutex;
 use settings::Settings;
 use similar::{ChangeTag, TextDiff};
@@ -25,7 +27,7 @@ use std::{
     future::Future,
     iter::{self, Iterator, Peekable},
     mem,
-    ops::{Deref, DerefMut, Range},
+    ops::{Deref, Range},
     path::{Path, PathBuf},
     str,
     sync::Arc,
@@ -36,7 +38,6 @@ use sum_tree::TreeMap;
 use text::operation_queue::OperationQueue;
 pub use text::{Buffer as TextBuffer, BufferSnapshot as TextBufferSnapshot, Operation as _, *};
 use theme::SyntaxTheme;
-use tree_sitter::{InputEdit, QueryCursor, Tree};
 use util::TryFutureExt as _;
 
 #[cfg(any(test, feature = "test-support"))]
@@ -44,10 +45,6 @@ pub use {tree_sitter_rust, tree_sitter_typescript};
 
 pub use lsp::DiagnosticSeverity;
 
-lazy_static! {
-    static ref QUERY_CURSORS: Mutex<Vec<QueryCursor>> = Default::default();
-}
-
 pub struct Buffer {
     text: TextBuffer,
     file: Option<Arc<dyn File>>,
@@ -60,7 +57,7 @@ pub struct Buffer {
     autoindent_requests: Vec<Arc<AutoindentRequest>>,
     pending_autoindent: Option<Task<()>>,
     sync_parse_timeout: Duration,
-    syntax_tree: Mutex<Option<SyntaxTree>>,
+    syntax_map: Mutex<SyntaxMap>,
     parsing_in_background: bool,
     parse_count: usize,
     diagnostics: DiagnosticSet,
@@ -75,7 +72,7 @@ pub struct Buffer {
 
 pub struct BufferSnapshot {
     text: text::BufferSnapshot,
-    tree: Option<Tree>,
+    syntax: SyntaxSnapshot,
     file: Option<Arc<dyn File>>,
     diagnostics: DiagnosticSet,
     diagnostics_update_count: usize,
@@ -221,14 +218,6 @@ pub trait LocalFile: File {
     );
 }
 
-pub(crate) struct QueryCursorHandle(Option<QueryCursor>);
-
-#[derive(Clone)]
-struct SyntaxTree {
-    tree: Tree,
-    version: clock::Global,
-}
-
 #[derive(Clone, Debug)]
 pub enum AutoindentMode {
     /// Indent each line of inserted text.
@@ -268,14 +257,11 @@ struct IndentSuggestion {
     delta: Ordering,
 }
 
-pub(crate) struct TextProvider<'a>(pub(crate) &'a Rope);
-
 struct BufferChunkHighlights<'a> {
-    captures: tree_sitter::QueryCaptures<'a, 'a, TextProvider<'a>>,
-    next_capture: Option<(tree_sitter::QueryMatch<'a, 'a>, usize)>,
+    captures: SyntaxMapCaptures<'a>,
+    next_capture: Option<SyntaxMapCapture<'a>>,
     stack: Vec<(usize, HighlightId)>,
-    highlight_map: HighlightMap,
-    _query_cursor: QueryCursorHandle,
+    highlight_maps: Vec<HighlightMap>,
 }
 
 pub struct BufferChunks<'a> {
@@ -456,7 +442,7 @@ impl Buffer {
             was_dirty_before_starting_transaction: None,
             text: buffer,
             file,
-            syntax_tree: Mutex::new(None),
+            syntax_map: Mutex::new(SyntaxMap::new()),
             parsing_in_background: false,
             parse_count: 0,
             sync_parse_timeout: Duration::from_millis(1),
@@ -477,7 +463,7 @@ impl Buffer {
     pub fn snapshot(&self) -> BufferSnapshot {
         BufferSnapshot {
             text: self.text.snapshot(),
-            tree: self.syntax_tree(),
+            syntax: self.syntax_map(),
             file: self.file.clone(),
             remote_selections: self.remote_selections.clone(),
             diagnostics: self.diagnostics.clone(),
@@ -533,11 +519,17 @@ impl Buffer {
     }
 
     pub fn set_language(&mut self, language: Option<Arc<Language>>, cx: &mut ModelContext<Self>) {
-        *self.syntax_tree.lock() = None;
+        self.syntax_map.lock().clear();
         self.language = language;
         self.reparse(cx);
     }
 
+    pub fn set_language_registry(&mut self, language_registry: Arc<LanguageRegistry>) {
+        self.syntax_map
+            .lock()
+            .set_language_registry(language_registry);
+    }
+
     pub fn did_save(
         &mut self,
         version: clock::Global,
@@ -682,13 +674,10 @@ impl Buffer {
         self.file_update_count
     }
 
-    pub(crate) fn syntax_tree(&self) -> Option<Tree> {
-        if let Some(syntax_tree) = self.syntax_tree.lock().as_mut() {
-            self.interpolate_tree(syntax_tree);
-            Some(syntax_tree.tree.clone())
-        } else {
-            None
-        }
+    pub(crate) fn syntax_map(&self) -> SyntaxSnapshot {
+        let mut syntax_map = self.syntax_map.lock();
+        syntax_map.interpolate(&self.text_snapshot());
+        syntax_map.snapshot()
     }
 
     #[cfg(any(test, feature = "test-support"))]
@@ -706,35 +695,49 @@ impl Buffer {
             return false;
         }
 
-        if let Some(grammar) = self.grammar().cloned() {
-            let old_tree = self.syntax_tree();
-            let text = self.as_rope().clone();
+        if let Some(language) = self.language.clone() {
+            let text = self.text_snapshot();
             let parsed_version = self.version();
+
+            let mut syntax_map;
+            let language_registry;
+            let syntax_map_version;
+            {
+                let mut map = self.syntax_map.lock();
+                map.interpolate(&text);
+                language_registry = map.language_registry();
+                syntax_map = map.snapshot();
+                syntax_map_version = map.parsed_version();
+            }
             let parse_task = cx.background().spawn({
-                let grammar = grammar.clone();
-                async move { grammar.parse_text(&text, old_tree) }
+                let language = language.clone();
+                async move {
+                    syntax_map.reparse(&syntax_map_version, &text, language_registry, language);
+                    syntax_map
+                }
             });
 
             match cx
                 .background()
                 .block_with_timeout(self.sync_parse_timeout, parse_task)
             {
-                Ok(new_tree) => {
-                    self.did_finish_parsing(new_tree, parsed_version, cx);
+                Ok(new_syntax_map) => {
+                    self.did_finish_parsing(new_syntax_map, parsed_version, cx);
                     return true;
                 }
                 Err(parse_task) => {
                     self.parsing_in_background = true;
                     cx.spawn(move |this, mut cx| async move {
-                        let new_tree = parse_task.await;
+                        let new_syntax_map = parse_task.await;
                         this.update(&mut cx, move |this, cx| {
-                            let grammar_changed = this
-                                .grammar()
-                                .map_or(true, |curr_grammar| !Arc::ptr_eq(&grammar, curr_grammar));
+                            let grammar_changed =
+                                this.language.as_ref().map_or(true, |current_language| {
+                                    !Arc::ptr_eq(&language, current_language)
+                                });
                             let parse_again =
                                 this.version.changed_since(&parsed_version) || grammar_changed;
                             this.parsing_in_background = false;
-                            this.did_finish_parsing(new_tree, parsed_version, cx);
+                            this.did_finish_parsing(new_syntax_map, parsed_version, cx);
 
                             if parse_again && this.reparse(cx) {}
                         });
@@ -746,30 +749,14 @@ impl Buffer {
         false
     }
 
-    fn interpolate_tree(&self, tree: &mut SyntaxTree) {
-        for edit in self.edits_since::<(usize, Point)>(&tree.version) {
-            let (bytes, lines) = edit.flatten();
-            tree.tree.edit(&InputEdit {
-                start_byte: bytes.new.start,
-                old_end_byte: bytes.new.start + bytes.old.len(),
-                new_end_byte: bytes.new.end,
-                start_position: lines.new.start.to_ts_point(),
-                old_end_position: (lines.new.start + (lines.old.end - lines.old.start))
-                    .to_ts_point(),
-                new_end_position: lines.new.end.to_ts_point(),
-            });
-        }
-        tree.version = self.version();
-    }
-
     fn did_finish_parsing(
         &mut self,
-        tree: Tree,
+        syntax_map: SyntaxSnapshot,
         version: clock::Global,
         cx: &mut ModelContext<Self>,
     ) {
         self.parse_count += 1;
-        *self.syntax_tree.lock() = Some(SyntaxTree { tree, version });
+        self.syntax_map.lock().did_parse(syntax_map, version);
         self.request_autoindent(cx);
         cx.emit(Event::Reparsed);
         cx.notify();
@@ -808,10 +795,7 @@ impl Buffer {
     fn compute_autoindents(&self) -> Option<impl Future<Output = BTreeMap<u32, IndentSize>>> {
         let max_rows_between_yields = 100;
         let snapshot = self.snapshot();
-        if snapshot.language.is_none()
-            || snapshot.tree.is_none()
-            || self.autoindent_requests.is_empty()
-        {
+        if snapshot.syntax.is_empty() || self.autoindent_requests.is_empty() {
             return None;
         }
 
@@ -1310,10 +1294,6 @@ impl Buffer {
         cx.notify();
     }
 
-    fn grammar(&self) -> Option<&Arc<Grammar>> {
-        self.language.as_ref().and_then(|l| l.grammar.as_ref())
-    }
-
     pub fn apply_ops<I: IntoIterator<Item = Operation>>(
         &mut self,
         ops: I,
@@ -1654,32 +1634,30 @@ impl BufferSnapshot {
         let prev_non_blank_row = self.prev_non_blank_row(row_range.start);
 
         // Find the suggested indentation ranges based on the syntax tree.
-        let indents_query = grammar.indents_query.as_ref()?;
-        let mut query_cursor = QueryCursorHandle::new();
-        let indent_capture_ix = indents_query.capture_index_for_name("indent");
-        let end_capture_ix = indents_query.capture_index_for_name("end");
-        query_cursor.set_point_range(
-            Point::new(prev_non_blank_row.unwrap_or(row_range.start), 0).to_ts_point()
-                ..Point::new(row_range.end, 0).to_ts_point(),
-        );
+        let start = Point::new(prev_non_blank_row.unwrap_or(row_range.start), 0);
+        let end = Point::new(row_range.end, 0);
+        let range = (start..end).to_offset(&self.text);
+        let mut matches = self.syntax.matches(range, &self.text, |grammar| {
+            Some(&grammar.indents_config.as_ref()?.query)
+        });
 
         let mut indent_ranges = Vec::<Range<Point>>::new();
-        for mat in query_cursor.matches(
-            indents_query,
-            self.tree.as_ref()?.root_node(),
-            TextProvider(self.as_rope()),
-        ) {
+        while let Some(mat) = matches.peek() {
             let mut start: Option<Point> = None;
             let mut end: Option<Point> = None;
-            for capture in mat.captures {
-                if Some(capture.index) == indent_capture_ix {
-                    start.get_or_insert(Point::from_ts_point(capture.node.start_position()));
-                    end.get_or_insert(Point::from_ts_point(capture.node.end_position()));
-                } else if Some(capture.index) == end_capture_ix {
-                    end = Some(Point::from_ts_point(capture.node.start_position()));
+
+            if let Some(config) = &grammar.indents_config {
+                for capture in mat.captures {
+                    if capture.index == config.indent_capture_ix {
+                        start.get_or_insert(Point::from_ts_point(capture.node.start_position()));
+                        end.get_or_insert(Point::from_ts_point(capture.node.end_position()));
+                    } else if Some(capture.index) == config.end_capture_ix {
+                        end = Some(Point::from_ts_point(capture.node.start_position()));
+                    }
                 }
             }
 
+            matches.advance();
             if let Some((start, end)) = start.zip(end) {
                 if start.row == end.row {
                     continue;
@@ -1811,10 +1789,18 @@ impl BufferSnapshot {
     pub fn chunks<T: ToOffset>(&self, range: Range<T>, language_aware: bool) -> BufferChunks {
         let range = range.start.to_offset(self)..range.end.to_offset(self);
 
-        let mut tree = None;
+        let mut syntax = None;
         let mut diagnostic_endpoints = Vec::new();
         if language_aware {
-            tree = self.tree.as_ref();
+            let captures = self.syntax.captures(range.clone(), &self.text, |grammar| {
+                grammar.highlights_query.as_ref()
+            });
+            let highlight_maps = captures
+                .grammars()
+                .into_iter()
+                .map(|grammar| grammar.highlight_map())
+                .collect();
+            syntax = Some((captures, highlight_maps));
             for entry in self.diagnostics_in_range::<_, usize>(range.clone(), false) {
                 diagnostic_endpoints.push(DiagnosticEndpoint {
                     offset: entry.range.start,
@@ -1833,13 +1819,7 @@ impl BufferSnapshot {
                 .sort_unstable_by_key(|endpoint| (endpoint.offset, !endpoint.is_start));
         }
 
-        BufferChunks::new(
-            self.text.as_rope(),
-            range,
-            tree,
-            self.grammar(),
-            diagnostic_endpoints,
-        )
+        BufferChunks::new(self.text.as_rope(), range, syntax, diagnostic_endpoints)
     }
 
     pub fn for_each_line(&self, range: Range<Point>, mut callback: impl FnMut(u32, &str)) {
@@ -1865,12 +1845,6 @@ impl BufferSnapshot {
         self.language.as_ref()
     }
 
-    fn grammar(&self) -> Option<&Arc<Grammar>> {
-        self.language
-            .as_ref()
-            .and_then(|language| language.grammar.as_ref())
-    }
-
     pub fn surrounding_word<T: ToOffset>(&self, start: T) -> (Range<usize>, Option<CharKind>) {
         let mut start = start.to_offset(self);
         let mut end = start;
@@ -1901,61 +1875,71 @@ impl BufferSnapshot {
     }
 
     pub fn range_for_syntax_ancestor<T: ToOffset>(&self, range: Range<T>) -> Option<Range<usize>> {
-        let tree = self.tree.as_ref()?;
         let range = range.start.to_offset(self)..range.end.to_offset(self);
-        let mut cursor = tree.root_node().walk();
-
-        // Descend to the first leaf that touches the start of the range,
-        // and if the range is non-empty, extends beyond the start.
-        while cursor.goto_first_child_for_byte(range.start).is_some() {
-            if !range.is_empty() && cursor.node().end_byte() == range.start {
-                cursor.goto_next_sibling();
+        let mut result: Option<Range<usize>> = None;
+        'outer: for (_, _, node) in self.syntax.layers_for_range(range.clone(), &self.text) {
+            let mut cursor = node.walk();
+
+            // Descend to the first leaf that touches the start of the range,
+            // and if the range is non-empty, extends beyond the start.
+            while cursor.goto_first_child_for_byte(range.start).is_some() {
+                if !range.is_empty() && cursor.node().end_byte() == range.start {
+                    cursor.goto_next_sibling();
+                }
             }
-        }
 
-        // Ascend to the smallest ancestor that strictly contains the range.
-        loop {
-            let node_range = cursor.node().byte_range();
-            if node_range.start <= range.start
-                && node_range.end >= range.end
-                && node_range.len() > range.len()
-            {
-                break;
-            }
-            if !cursor.goto_parent() {
-                break;
+            // Ascend to the smallest ancestor that strictly contains the range.
+            loop {
+                let node_range = cursor.node().byte_range();
+                if node_range.start <= range.start
+                    && node_range.end >= range.end
+                    && node_range.len() > range.len()
+                {
+                    break;
+                }
+                if !cursor.goto_parent() {
+                    continue 'outer;
+                }
             }
-        }
 
-        let left_node = cursor.node();
+            let left_node = cursor.node();
+            let mut layer_result = left_node.byte_range();
 
-        // For an empty range, try to find another node immediately to the right of the range.
-        if left_node.end_byte() == range.start {
-            let mut right_node = None;
-            while !cursor.goto_next_sibling() {
-                if !cursor.goto_parent() {
-                    break;
+            // For an empty range, try to find another node immediately to the right of the range.
+            if left_node.end_byte() == range.start {
+                let mut right_node = None;
+                while !cursor.goto_next_sibling() {
+                    if !cursor.goto_parent() {
+                        break;
+                    }
                 }
-            }
 
-            while cursor.node().start_byte() == range.start {
-                right_node = Some(cursor.node());
-                if !cursor.goto_first_child() {
-                    break;
+                while cursor.node().start_byte() == range.start {
+                    right_node = Some(cursor.node());
+                    if !cursor.goto_first_child() {
+                        break;
+                    }
+                }
+
+                // If there is a candidate node on both sides of the (empty) range, then
+                // decide between the two by favoring a named node over an anonymous token.
+                // If both nodes are the same in that regard, favor the right one.
+                if let Some(right_node) = right_node {
+                    if right_node.is_named() || !left_node.is_named() {
+                        layer_result = right_node.byte_range();
+                    }
                 }
             }
 
-            // If there is a candidate node on both sides of the (empty) range, then
-            // decide between the two by favoring a named node over an anonymous token.
-            // If both nodes are the same in that regard, favor the right one.
-            if let Some(right_node) = right_node {
-                if right_node.is_named() || !left_node.is_named() {
-                    return Some(right_node.byte_range());
+            if let Some(previous_result) = &result {
+                if previous_result.len() < layer_result.len() {
+                    continue;
                 }
             }
+            result = Some(layer_result);
         }
 
-        Some(left_node.byte_range())
+        result
     }
 
     pub fn outline(&self, theme: Option<&SyntaxTheme>) -> Option<Outline<Anchor>> {
@@ -1985,109 +1969,107 @@ impl BufferSnapshot {
         range: Range<usize>,
         theme: Option<&SyntaxTheme>,
     ) -> Option<Vec<OutlineItem<Anchor>>> {
-        let tree = self.tree.as_ref()?;
-        let grammar = self
-            .language
-            .as_ref()
-            .and_then(|language| language.grammar.as_ref())?;
-
-        let outline_query = grammar.outline_query.as_ref()?;
-        let mut cursor = QueryCursorHandle::new();
-        cursor.set_byte_range(range.clone());
-        let matches = cursor.matches(
-            outline_query,
-            tree.root_node(),
-            TextProvider(self.as_rope()),
-        );
+        let mut matches = self.syntax.matches(range.clone(), &self.text, |grammar| {
+            grammar.outline_config.as_ref().map(|c| &c.query)
+        });
+        let configs = matches
+            .grammars()
+            .iter()
+            .map(|g| g.outline_config.as_ref().unwrap())
+            .collect::<Vec<_>>();
 
         let mut chunks = self.chunks(0..self.len(), true);
+        let mut stack = Vec::<Range<usize>>::new();
+        let mut items = Vec::new();
+        while let Some(mat) = matches.peek() {
+            let config = &configs[mat.grammar_index];
+            let item_node = mat.captures.iter().find_map(|cap| {
+                if cap.index == config.item_capture_ix {
+                    Some(cap.node)
+                } else {
+                    None
+                }
+            })?;
 
-        let item_capture_ix = outline_query.capture_index_for_name("item")?;
-        let name_capture_ix = outline_query.capture_index_for_name("name")?;
-        let context_capture_ix = outline_query
-            .capture_index_for_name("context")
-            .unwrap_or(u32::MAX);
+            let item_range = item_node.byte_range();
+            if item_range.end < range.start || item_range.start > range.end {
+                matches.advance();
+                continue;
+            }
 
-        let mut stack = Vec::<Range<usize>>::new();
-        let items = matches
-            .filter_map(|mat| {
-                let item_node = mat.nodes_for_capture_index(item_capture_ix).next()?;
-                let item_range = item_node.start_byte()..item_node.end_byte();
-                if item_range.end < range.start || item_range.start > range.end {
-                    return None;
+            // TODO - move later, after processing captures
+
+            let mut text = String::new();
+            let mut name_ranges = Vec::new();
+            let mut highlight_ranges = Vec::new();
+            for capture in mat.captures {
+                let node_is_name;
+                if capture.index == config.name_capture_ix {
+                    node_is_name = true;
+                } else if Some(capture.index) == config.context_capture_ix {
+                    node_is_name = false;
+                } else {
+                    continue;
                 }
-                let mut text = String::new();
-                let mut name_ranges = Vec::new();
-                let mut highlight_ranges = Vec::new();
 
-                for capture in mat.captures {
-                    let node_is_name;
-                    if capture.index == name_capture_ix {
-                        node_is_name = true;
-                    } else if capture.index == context_capture_ix {
-                        node_is_name = false;
-                    } else {
-                        continue;
+                let range = capture.node.start_byte()..capture.node.end_byte();
+                if !text.is_empty() {
+                    text.push(' ');
+                }
+                if node_is_name {
+                    let mut start = text.len();
+                    let end = start + range.len();
+
+                    // When multiple names are captured, then the matcheable text
+                    // includes the whitespace in between the names.
+                    if !name_ranges.is_empty() {
+                        start -= 1;
                     }
 
-                    let range = capture.node.start_byte()..capture.node.end_byte();
-                    if !text.is_empty() {
-                        text.push(' ');
-                    }
-                    if node_is_name {
-                        let mut start = text.len();
-                        let end = start + range.len();
-
-                        // When multiple names are captured, then the matcheable text
-                        // includes the whitespace in between the names.
-                        if !name_ranges.is_empty() {
-                            start -= 1;
-                        }
+                    name_ranges.push(start..end);
+                }
 
-                        name_ranges.push(start..end);
+                let mut offset = range.start;
+                chunks.seek(offset);
+                for mut chunk in chunks.by_ref() {
+                    if chunk.text.len() > range.end - offset {
+                        chunk.text = &chunk.text[0..(range.end - offset)];
+                        offset = range.end;
+                    } else {
+                        offset += chunk.text.len();
                     }
-
-                    let mut offset = range.start;
-                    chunks.seek(offset);
-                    for mut chunk in chunks.by_ref() {
-                        if chunk.text.len() > range.end - offset {
-                            chunk.text = &chunk.text[0..(range.end - offset)];
-                            offset = range.end;
-                        } else {
-                            offset += chunk.text.len();
-                        }
-                        let style = chunk
-                            .syntax_highlight_id
-                            .zip(theme)
-                            .and_then(|(highlight, theme)| highlight.style(theme));
-                        if let Some(style) = style {
-                            let start = text.len();
-                            let end = start + chunk.text.len();
-                            highlight_ranges.push((start..end, style));
-                        }
-                        text.push_str(chunk.text);
-                        if offset >= range.end {
-                            break;
-                        }
+                    let style = chunk
+                        .syntax_highlight_id
+                        .zip(theme)
+                        .and_then(|(highlight, theme)| highlight.style(theme));
+                    if let Some(style) = style {
+                        let start = text.len();
+                        let end = start + chunk.text.len();
+                        highlight_ranges.push((start..end, style));
+                    }
+                    text.push_str(chunk.text);
+                    if offset >= range.end {
+                        break;
                     }
                 }
+            }
 
-                while stack.last().map_or(false, |prev_range| {
-                    prev_range.start > item_range.start || prev_range.end < item_range.end
-                }) {
-                    stack.pop();
-                }
-                stack.push(item_range.clone());
-
-                Some(OutlineItem {
-                    depth: stack.len() - 1,
-                    range: self.anchor_after(item_range.start)..self.anchor_before(item_range.end),
-                    text,
-                    highlight_ranges,
-                    name_ranges,
-                })
+            matches.advance();
+            while stack.last().map_or(false, |prev_range| {
+                prev_range.start > item_range.start || prev_range.end < item_range.end
+            }) {
+                stack.pop();
+            }
+            stack.push(item_range.clone());
+
+            items.push(OutlineItem {
+                depth: stack.len() - 1,
+                range: self.anchor_after(item_range.start)..self.anchor_before(item_range.end),
+                text,
+                highlight_ranges,
+                name_ranges,
             })
-            .collect::<Vec<_>>();
+        }
         Some(items)
     }
 
@@ -2095,28 +2077,48 @@ impl BufferSnapshot {
         &self,
         range: Range<T>,
     ) -> Option<(Range<usize>, Range<usize>)> {
-        let (grammar, tree) = self.grammar().zip(self.tree.as_ref())?;
-        let brackets_query = grammar.brackets_query.as_ref()?;
-        let open_capture_ix = brackets_query.capture_index_for_name("open")?;
-        let close_capture_ix = brackets_query.capture_index_for_name("close")?;
-
         // Find bracket pairs that *inclusively* contain the given range.
         let range = range.start.to_offset(self).saturating_sub(1)..range.end.to_offset(self) + 1;
-        let mut cursor = QueryCursorHandle::new();
-        let matches = cursor.set_byte_range(range).matches(
-            brackets_query,
-            tree.root_node(),
-            TextProvider(self.as_rope()),
-        );
+        let mut matches = self.syntax.matches(range, &self.text, |grammar| {
+            grammar.brackets_config.as_ref().map(|c| &c.query)
+        });
+        let configs = matches
+            .grammars()
+            .iter()
+            .map(|grammar| grammar.brackets_config.as_ref().unwrap())
+            .collect::<Vec<_>>();
 
         // Get the ranges of the innermost pair of brackets.
-        matches
-            .filter_map(|mat| {
-                let open = mat.nodes_for_capture_index(open_capture_ix).next()?;
-                let close = mat.nodes_for_capture_index(close_capture_ix).next()?;
-                Some((open.byte_range(), close.byte_range()))
-            })
-            .min_by_key(|(open_range, close_range)| close_range.end - open_range.start)
+        let mut result: Option<(Range<usize>, Range<usize>)> = None;
+        while let Some(mat) = matches.peek() {
+            let mut open = None;
+            let mut close = None;
+            let config = &configs[mat.grammar_index];
+            for capture in mat.captures {
+                if capture.index == config.open_capture_ix {
+                    open = Some(capture.node.byte_range());
+                } else if capture.index == config.close_capture_ix {
+                    close = Some(capture.node.byte_range());
+                }
+            }
+
+            matches.advance();
+
+            if let Some((open, close)) = open.zip(close) {
+                let len = close.end - open.start;
+
+                if let Some((existing_open, existing_close)) = &result {
+                    let existing_len = existing_close.end - existing_open.start;
+                    if len > existing_len {
+                        continue;
+                    }
+                }
+
+                result = Some((open, close));
+            }
+        }
+
+        result
     }
 
     #[allow(clippy::type_complexity)]
@@ -2228,7 +2230,7 @@ impl Clone for BufferSnapshot {
     fn clone(&self) -> Self {
         Self {
             text: self.text.clone(),
-            tree: self.tree.clone(),
+            syntax: self.syntax.clone(),
             file: self.file.clone(),
             remote_selections: self.remote_selections.clone(),
             diagnostics: self.diagnostics.clone(),
@@ -2249,56 +2251,23 @@ impl Deref for BufferSnapshot {
     }
 }
 
-impl<'a> tree_sitter::TextProvider<'a> for TextProvider<'a> {
-    type I = ByteChunks<'a>;
-
-    fn text(&mut self, node: tree_sitter::Node) -> Self::I {
-        ByteChunks(self.0.chunks_in_range(node.byte_range()))
-    }
-}
-
-pub(crate) struct ByteChunks<'a>(rope::Chunks<'a>);
-
-impl<'a> Iterator for ByteChunks<'a> {
-    type Item = &'a [u8];
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.0.next().map(str::as_bytes)
-    }
-}
-
 unsafe impl<'a> Send for BufferChunks<'a> {}
 
 impl<'a> BufferChunks<'a> {
     pub(crate) fn new(
         text: &'a Rope,
         range: Range<usize>,
-        tree: Option<&'a Tree>,
-        grammar: Option<&'a Arc<Grammar>>,
+        syntax: Option<(SyntaxMapCaptures<'a>, Vec<HighlightMap>)>,
         diagnostic_endpoints: Vec<DiagnosticEndpoint>,
     ) -> Self {
         let mut highlights = None;
-        if let Some((grammar, tree)) = grammar.zip(tree) {
-            if let Some(highlights_query) = grammar.highlights_query.as_ref() {
-                let mut query_cursor = QueryCursorHandle::new();
-
-                // TODO - add a Tree-sitter API to remove the need for this.
-                let cursor = unsafe {
-                    std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut())
-                };
-                let captures = cursor.set_byte_range(range.clone()).captures(
-                    highlights_query,
-                    tree.root_node(),
-                    TextProvider(text),
-                );
-                highlights = Some(BufferChunkHighlights {
-                    captures,
-                    next_capture: None,
-                    stack: Default::default(),
-                    highlight_map: grammar.highlight_map(),
-                    _query_cursor: query_cursor,
-                })
-            }
+        if let Some((captures, highlight_maps)) = syntax {
+            highlights = Some(BufferChunkHighlights {
+                captures,
+                next_capture: None,
+                stack: Default::default(),
+                highlight_maps,
+            })
         }
 
         let diagnostic_endpoints = diagnostic_endpoints.into_iter().peekable();
@@ -2324,14 +2293,13 @@ impl<'a> BufferChunks<'a> {
             highlights
                 .stack
                 .retain(|(end_offset, _)| *end_offset > offset);
-            if let Some((mat, capture_ix)) = &highlights.next_capture {
-                let capture = mat.captures[*capture_ix as usize];
+            if let Some(capture) = &highlights.next_capture {
                 if offset >= capture.node.start_byte() {
                     let next_capture_end = capture.node.end_byte();
                     if offset < next_capture_end {
                         highlights.stack.push((
                             next_capture_end,
-                            highlights.highlight_map.get(capture.index),
+                            highlights.highlight_maps[capture.grammar_index].get(capture.index),
                         ));
                     }
                     highlights.next_capture.take();
@@ -2407,13 +2375,13 @@ impl<'a> Iterator for BufferChunks<'a> {
                 highlights.next_capture = highlights.captures.next();
             }
 
-            while let Some((mat, capture_ix)) = highlights.next_capture.as_ref() {
-                let capture = mat.captures[*capture_ix as usize];
+            while let Some(capture) = highlights.next_capture.as_ref() {
                 if self.range.start < capture.node.start_byte() {
                     next_capture_start = capture.node.start_byte();
                     break;
                 } else {
-                    let highlight_id = highlights.highlight_map.get(capture.index);
+                    let highlight_id =
+                        highlights.highlight_maps[capture.grammar_index].get(capture.index);
                     highlights
                         .stack
                         .push((capture.node.end_byte(), highlight_id));
@@ -2465,52 +2433,6 @@ impl<'a> Iterator for BufferChunks<'a> {
     }
 }
 
-impl QueryCursorHandle {
-    pub(crate) fn new() -> Self {
-        let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_else(QueryCursor::new);
-        cursor.set_match_limit(64);
-        QueryCursorHandle(Some(cursor))
-    }
-}
-
-impl Deref for QueryCursorHandle {
-    type Target = QueryCursor;
-
-    fn deref(&self) -> &Self::Target {
-        self.0.as_ref().unwrap()
-    }
-}
-
-impl DerefMut for QueryCursorHandle {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        self.0.as_mut().unwrap()
-    }
-}
-
-impl Drop for QueryCursorHandle {
-    fn drop(&mut self) {
-        let mut cursor = self.0.take().unwrap();
-        cursor.set_byte_range(0..usize::MAX);
-        cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point());
-        QUERY_CURSORS.lock().push(cursor)
-    }
-}
-
-pub(crate) trait ToTreeSitterPoint {
-    fn to_ts_point(self) -> tree_sitter::Point;
-    fn from_ts_point(point: tree_sitter::Point) -> Self;
-}
-
-impl ToTreeSitterPoint for Point {
-    fn to_ts_point(self) -> tree_sitter::Point {
-        tree_sitter::Point::new(self.row as usize, self.column as usize)
-    }
-
-    fn from_ts_point(point: tree_sitter::Point) -> Self {
-        Point::new(point.row as u32, point.column as u32)
-    }
-}
-
 impl operation_queue::Operation for Operation {
     fn lamport_timestamp(&self) -> clock::Lamport {
         match self {

crates/language/src/language.rs 🔗

@@ -30,8 +30,12 @@ use std::{
     ops::Range,
     path::{Path, PathBuf},
     str,
-    sync::Arc,
+    sync::{
+        atomic::{AtomicUsize, Ordering::SeqCst},
+        Arc,
+    },
 };
+use syntax_map::SyntaxSnapshot;
 use theme::{SyntaxTheme, Theme};
 use tree_sitter::{self, Query};
 use util::ResultExt;
@@ -50,6 +54,7 @@ thread_local! {
 }
 
 lazy_static! {
+    pub static ref NEXT_GRAMMAR_ID: AtomicUsize = Default::default();
     pub static ref PLAIN_TEXT: Arc<Language> = Arc::new(Language::new(
         LanguageConfig {
             name: "Plain Text".into(),
@@ -286,15 +291,29 @@ pub struct Language {
 }
 
 pub struct Grammar {
+    id: usize,
     pub(crate) ts_language: tree_sitter::Language,
     pub(crate) highlights_query: Option<Query>,
-    pub(crate) brackets_query: Option<Query>,
-    pub(crate) indents_query: Option<Query>,
-    pub(crate) outline_query: Option<Query>,
+    pub(crate) brackets_config: Option<BracketConfig>,
+    pub(crate) indents_config: Option<IndentConfig>,
+    pub(crate) outline_config: Option<OutlineConfig>,
     pub(crate) injection_config: Option<InjectionConfig>,
     pub(crate) highlight_map: Mutex<HighlightMap>,
 }
 
+struct IndentConfig {
+    query: Query,
+    indent_capture_ix: u32,
+    end_capture_ix: Option<u32>,
+}
+
+struct OutlineConfig {
+    query: Query,
+    item_capture_ix: u32,
+    name_capture_ix: u32,
+    context_capture_ix: Option<u32>,
+}
+
 struct InjectionConfig {
     query: Query,
     content_capture_ix: u32,
@@ -302,6 +321,12 @@ struct InjectionConfig {
     languages_by_pattern_ix: Vec<Option<Box<str>>>,
 }
 
+struct BracketConfig {
+    query: Query,
+    open_capture_ix: u32,
+    close_capture_ix: u32,
+}
+
 #[derive(Clone)]
 pub enum LanguageServerBinaryStatus {
     CheckingForUpdate,
@@ -499,6 +524,13 @@ impl LanguageRegistry {
     }
 }
 
+#[cfg(any(test, feature = "test-support"))]
+impl Default for LanguageRegistry {
+    fn default() -> Self {
+        Self::test()
+    }
+}
+
 async fn get_server_binary_path(
     adapter: Arc<CachedLspAdapter>,
     language: Arc<Language>,
@@ -576,10 +608,11 @@ impl Language {
             config,
             grammar: ts_language.map(|ts_language| {
                 Arc::new(Grammar {
+                    id: NEXT_GRAMMAR_ID.fetch_add(1, SeqCst),
                     highlights_query: None,
-                    brackets_query: None,
-                    indents_query: None,
-                    outline_query: None,
+                    brackets_config: None,
+                    outline_config: None,
+                    indents_config: None,
                     injection_config: None,
                     ts_language,
                     highlight_map: Default::default(),
@@ -604,19 +637,70 @@ impl Language {
 
     pub fn with_brackets_query(mut self, source: &str) -> Result<Self> {
         let grammar = self.grammar_mut();
-        grammar.brackets_query = Some(Query::new(grammar.ts_language, source)?);
+        let query = Query::new(grammar.ts_language, source)?;
+        let mut open_capture_ix = None;
+        let mut close_capture_ix = None;
+        get_capture_indices(
+            &query,
+            &mut [
+                ("open", &mut open_capture_ix),
+                ("close", &mut close_capture_ix),
+            ],
+        );
+        if let Some((open_capture_ix, close_capture_ix)) = open_capture_ix.zip(close_capture_ix) {
+            grammar.brackets_config = Some(BracketConfig {
+                query,
+                open_capture_ix,
+                close_capture_ix,
+            });
+        }
         Ok(self)
     }
 
     pub fn with_indents_query(mut self, source: &str) -> Result<Self> {
         let grammar = self.grammar_mut();
-        grammar.indents_query = Some(Query::new(grammar.ts_language, source)?);
+        let query = Query::new(grammar.ts_language, source)?;
+        let mut indent_capture_ix = None;
+        let mut end_capture_ix = None;
+        get_capture_indices(
+            &query,
+            &mut [
+                ("indent", &mut indent_capture_ix),
+                ("end", &mut end_capture_ix),
+            ],
+        );
+        if let Some(indent_capture_ix) = indent_capture_ix {
+            grammar.indents_config = Some(IndentConfig {
+                query,
+                indent_capture_ix,
+                end_capture_ix,
+            });
+        }
         Ok(self)
     }
 
     pub fn with_outline_query(mut self, source: &str) -> Result<Self> {
         let grammar = self.grammar_mut();
-        grammar.outline_query = Some(Query::new(grammar.ts_language, source)?);
+        let query = Query::new(grammar.ts_language, source)?;
+        let mut item_capture_ix = None;
+        let mut name_capture_ix = None;
+        let mut context_capture_ix = None;
+        get_capture_indices(
+            &query,
+            &mut [
+                ("item", &mut item_capture_ix),
+                ("name", &mut name_capture_ix),
+                ("context", &mut context_capture_ix),
+            ],
+        );
+        if let Some((item_capture_ix, name_capture_ix)) = item_capture_ix.zip(name_capture_ix) {
+            grammar.outline_config = Some(OutlineConfig {
+                query,
+                item_capture_ix,
+                name_capture_ix,
+                context_capture_ix,
+            });
+        }
         Ok(self)
     }
 
@@ -625,13 +709,13 @@ impl Language {
         let query = Query::new(grammar.ts_language, source)?;
         let mut language_capture_ix = None;
         let mut content_capture_ix = None;
-        for (ix, name) in query.capture_names().iter().enumerate() {
-            *match name.as_str() {
-                "language" => &mut language_capture_ix,
-                "content" => &mut content_capture_ix,
-                _ => continue,
-            } = Some(ix as u32);
-        }
+        get_capture_indices(
+            &query,
+            &mut [
+                ("language", &mut language_capture_ix),
+                ("content", &mut content_capture_ix),
+            ],
+        );
         let languages_by_pattern_ix = (0..query.pattern_count())
             .map(|ix| {
                 query.property_settings(ix).iter().find_map(|setting| {
@@ -729,9 +813,16 @@ impl Language {
         let mut result = Vec::new();
         if let Some(grammar) = &self.grammar {
             let tree = grammar.parse_text(text, None);
+            let captures = SyntaxSnapshot::single_tree_captures(
+                range.clone(),
+                text,
+                &tree,
+                grammar,
+                |grammar| grammar.highlights_query.as_ref(),
+            );
+            let highlight_maps = vec![grammar.highlight_map()];
             let mut offset = 0;
-            for chunk in BufferChunks::new(text, range, Some(&tree), self.grammar.as_ref(), vec![])
-            {
+            for chunk in BufferChunks::new(text, range, Some((captures, highlight_maps)), vec![]) {
                 let end_offset = offset + chunk.text.len();
                 if let Some(highlight_id) = chunk.syntax_highlight_id {
                     if !highlight_id.is_default() {
@@ -771,6 +862,10 @@ impl Language {
 }
 
 impl Grammar {
+    pub fn id(&self) -> usize {
+        self.id
+    }
+
     fn parse_text(&self, text: &Rope, old_tree: Option<Tree>) -> Tree {
         PARSER.with(|parser| {
             let mut parser = parser.borrow_mut();
@@ -870,6 +965,17 @@ impl LspAdapter for Arc<FakeLspAdapter> {
     }
 }
 
+fn get_capture_indices(query: &Query, captures: &mut [(&str, &mut Option<u32>)]) {
+    for (ix, name) in query.capture_names().iter().enumerate() {
+        for (capture_name, index) in captures.iter_mut() {
+            if capture_name == name {
+                **index = Some(ix as u32);
+                break;
+            }
+        }
+    }
+}
+
 pub fn point_to_lsp(point: PointUtf16) -> lsp::Position {
     lsp::Position::new(point.row, point.column)
 }

crates/language/src/syntax_map.rs 🔗

@@ -1,26 +1,28 @@
-use crate::{
-    Grammar, InjectionConfig, Language, LanguageRegistry, QueryCursorHandle, TextProvider,
-    ToTreeSitterPoint,
-};
+use crate::{Grammar, InjectionConfig, Language, LanguageRegistry};
+use lazy_static::lazy_static;
+use parking_lot::Mutex;
 use std::{
     borrow::Cow,
     cell::RefCell,
     cmp::{Ordering, Reverse},
     collections::BinaryHeap,
-    iter::Peekable,
-    ops::{DerefMut, Range},
+    ops::{Deref, DerefMut, Range},
     sync::Arc,
 };
 use sum_tree::{Bias, SeekTarget, SumTree};
-use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint};
+use text::{rope, Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint};
 use tree_sitter::{
-    Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatch, QueryMatches, Tree,
+    Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree,
 };
 
 thread_local! {
     static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
 }
 
+lazy_static! {
+    static ref QUERY_CURSORS: Mutex<Vec<QueryCursor>> = Default::default();
+}
+
 #[derive(Default)]
 pub struct SyntaxMap {
     parsed_version: clock::Global,
@@ -34,39 +36,51 @@ pub struct SyntaxSnapshot {
     layers: SumTree<SyntaxLayer>,
 }
 
+#[derive(Default)]
 pub struct SyntaxMapCaptures<'a> {
     layers: Vec<SyntaxMapCapturesLayer<'a>>,
+    active_layer_count: usize,
+    grammars: Vec<&'a Grammar>,
 }
 
+#[derive(Default)]
 pub struct SyntaxMapMatches<'a> {
     layers: Vec<SyntaxMapMatchesLayer<'a>>,
+    active_layer_count: usize,
+    grammars: Vec<&'a Grammar>,
 }
 
+#[derive(Debug)]
 pub struct SyntaxMapCapture<'a> {
-    pub grammar: &'a Grammar,
     pub depth: usize,
     pub node: Node<'a>,
     pub index: u32,
+    pub grammar_index: usize,
 }
 
+#[derive(Debug)]
 pub struct SyntaxMapMatch<'a> {
-    pub grammar: &'a Grammar,
     pub depth: usize,
     pub pattern_index: usize,
     pub captures: &'a [QueryCapture<'a>],
+    pub grammar_index: usize,
 }
 
 struct SyntaxMapCapturesLayer<'a> {
     depth: usize,
-    captures: Peekable<QueryCaptures<'a, 'a, TextProvider<'a>>>,
-    grammar: &'a Grammar,
+    captures: QueryCaptures<'a, 'a, TextProvider<'a>>,
+    next_capture: Option<QueryCapture<'a>>,
+    grammar_index: usize,
     _query_cursor: QueryCursorHandle,
 }
 
 struct SyntaxMapMatchesLayer<'a> {
     depth: usize,
-    matches: Peekable<QueryMatches<'a, 'a, TextProvider<'a>>>,
-    grammar: &'a Grammar,
+    next_pattern_index: usize,
+    next_captures: Vec<QueryCapture<'a>>,
+    has_next: bool,
+    matches: QueryMatches<'a, 'a, TextProvider<'a>>,
+    grammar_index: usize,
     _query_cursor: QueryCursorHandle,
 }
 
@@ -80,6 +94,7 @@ struct SyntaxLayer {
 
 #[derive(Debug, Clone)]
 struct SyntaxLayerSummary {
+    min_depth: usize,
     max_depth: usize,
     range: Range<Anchor>,
     last_layer_range: Range<Anchor>,
@@ -110,6 +125,12 @@ struct ChangedRegion {
 #[derive(Default)]
 struct ChangeRegionSet(Vec<ChangedRegion>);
 
+struct TextProvider<'a>(&'a Rope);
+
+struct ByteChunks<'a>(rope::Chunks<'a>);
+
+struct QueryCursorHandle(Option<QueryCursor>);
+
 impl SyntaxMap {
     pub fn new() -> Self {
         Self::default()
@@ -123,11 +144,20 @@ impl SyntaxMap {
         self.snapshot.clone()
     }
 
+    pub fn language_registry(&self) -> Option<Arc<LanguageRegistry>> {
+        self.language_registry.clone()
+    }
+
+    pub fn parsed_version(&self) -> clock::Global {
+        self.parsed_version.clone()
+    }
+
     pub fn interpolate(&mut self, text: &BufferSnapshot) {
         self.snapshot.interpolate(&self.interpolated_version, text);
         self.interpolated_version = text.version.clone();
     }
 
+    #[cfg(test)]
     pub fn reparse(&mut self, language: Arc<Language>, text: &BufferSnapshot) {
         if !self.interpolated_version.observed_all(&text.version) {
             self.interpolate(text);
@@ -141,9 +171,22 @@ impl SyntaxMap {
         );
         self.parsed_version = text.version.clone();
     }
+
+    pub fn did_parse(&mut self, snapshot: SyntaxSnapshot, version: clock::Global) {
+        self.parsed_version = version;
+        self.snapshot = snapshot;
+    }
+
+    pub fn clear(&mut self) {
+        self.snapshot = SyntaxSnapshot::default();
+    }
 }
 
 impl SyntaxSnapshot {
+    pub fn is_empty(&self) -> bool {
+        self.layers.is_empty()
+    }
+
     pub fn interpolate(&mut self, from_version: &clock::Global, text: &BufferSnapshot) {
         let edits = text
             .edits_since::<(usize, Point)>(&from_version)
@@ -429,14 +472,105 @@ impl SyntaxSnapshot {
         self.layers = layers;
     }
 
+    pub fn single_tree_captures<'a>(
+        range: Range<usize>,
+        text: &'a Rope,
+        tree: &'a Tree,
+        grammar: &'a Grammar,
+        query: fn(&Grammar) -> Option<&Query>,
+    ) -> SyntaxMapCaptures<'a> {
+        SyntaxMapCaptures::new(
+            range.clone(),
+            text,
+            [(grammar, 0, tree.root_node())].into_iter(),
+            query,
+        )
+    }
+
     pub fn captures<'a>(
         &'a self,
         range: Range<usize>,
         buffer: &'a BufferSnapshot,
-        query: impl Fn(&Grammar) -> Option<&Query>,
+        query: fn(&Grammar) -> Option<&Query>,
     ) -> SyntaxMapCaptures {
-        let mut result = SyntaxMapCaptures { layers: Vec::new() };
-        for (grammar, depth, node) in self.layers_for_range(range.clone(), buffer) {
+        SyntaxMapCaptures::new(
+            range.clone(),
+            buffer.as_rope(),
+            self.layers_for_range(range, buffer).into_iter(),
+            query,
+        )
+    }
+
+    pub fn matches<'a>(
+        &'a self,
+        range: Range<usize>,
+        buffer: &'a BufferSnapshot,
+        query: fn(&Grammar) -> Option<&Query>,
+    ) -> SyntaxMapMatches {
+        SyntaxMapMatches::new(
+            range.clone(),
+            buffer.as_rope(),
+            self.layers_for_range(range, buffer).into_iter(),
+            query,
+        )
+    }
+
+    #[cfg(test)]
+    pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, usize, Node)> {
+        self.layers_for_range(0..buffer.len(), buffer)
+    }
+
+    pub fn layers_for_range<'a, T: ToOffset>(
+        &self,
+        range: Range<T>,
+        buffer: &BufferSnapshot,
+    ) -> Vec<(&Grammar, usize, Node)> {
+        let start = buffer.anchor_before(range.start.to_offset(buffer));
+        let end = buffer.anchor_after(range.end.to_offset(buffer));
+
+        let mut cursor = self.layers.filter::<_, ()>(|summary| {
+            if summary.max_depth > summary.min_depth {
+                true
+            } else {
+                let is_before_start = summary.range.end.cmp(&start, buffer).is_lt();
+                let is_after_end = summary.range.start.cmp(&end, buffer).is_gt();
+                !is_before_start && !is_after_end
+            }
+        });
+
+        let mut result = Vec::new();
+        cursor.next(buffer);
+        while let Some(layer) = cursor.item() {
+            if let Some(grammar) = &layer.language.grammar {
+                result.push((
+                    grammar.as_ref(),
+                    layer.depth,
+                    layer.tree.root_node_with_offset(
+                        layer.range.start.to_offset(buffer),
+                        layer.range.start.to_point(buffer).to_ts_point(),
+                    ),
+                ));
+            }
+            cursor.next(buffer)
+        }
+
+        result
+    }
+}
+
+impl<'a> SyntaxMapCaptures<'a> {
+    fn new(
+        range: Range<usize>,
+        text: &'a Rope,
+        layers: impl Iterator<Item = (&'a Grammar, usize, Node<'a>)>,
+        query: fn(&Grammar) -> Option<&Query>,
+    ) -> Self {
+        let mut result = Self {
+            layers: Vec::new(),
+            grammars: Vec::new(),
+            active_layer_count: 0,
+        };
+        for (grammar, depth, node) in layers {
             let query = if let Some(query) = query(grammar) {
                 query
             } else {
@@ -451,39 +585,107 @@ impl SyntaxSnapshot {
             };
 
             cursor.set_byte_range(range.clone());
-            let captures = cursor.captures(query, node, TextProvider(buffer.as_rope()));
+            let captures = cursor.captures(query, node, TextProvider(text));
+            let grammar_index = result
+                .grammars
+                .iter()
+                .position(|g| g.id == grammar.id())
+                .unwrap_or_else(|| {
+                    result.grammars.push(grammar);
+                    result.grammars.len() - 1
+                });
             let mut layer = SyntaxMapCapturesLayer {
                 depth,
-                grammar,
-                captures: captures.peekable(),
+                grammar_index,
+                next_capture: None,
+                captures,
                 _query_cursor: query_cursor,
             };
 
-            if let Some(key) = layer.sort_key() {
-                let mut ix = 0;
-                while let Some(next_layer) = result.layers.get_mut(ix) {
-                    if let Some(next_key) = next_layer.sort_key() {
-                        if key > next_key {
-                            ix += 1;
-                            continue;
-                        }
-                    }
-                    break;
-                }
+            layer.advance();
+            if layer.next_capture.is_some() {
+                let key = layer.sort_key();
+                let ix = match result.layers[..result.active_layer_count]
+                    .binary_search_by_key(&key, |layer| layer.sort_key())
+                {
+                    Ok(ix) | Err(ix) => ix,
+                };
                 result.layers.insert(ix, layer);
+                result.active_layer_count += 1;
+            } else {
+                result.layers.push(layer);
             }
         }
+
         result
     }
 
-    pub fn matches<'a>(
-        &'a self,
+    pub fn grammars(&self) -> &[&'a Grammar] {
+        &self.grammars
+    }
+
+    pub fn peek(&self) -> Option<SyntaxMapCapture<'a>> {
+        let layer = self.layers[..self.active_layer_count].first()?;
+        let capture = layer.next_capture?;
+        Some(SyntaxMapCapture {
+            depth: layer.depth,
+            grammar_index: layer.grammar_index,
+            index: capture.index,
+            node: capture.node,
+        })
+    }
+
+    pub fn advance(&mut self) -> bool {
+        let layer = if let Some(layer) = self.layers[..self.active_layer_count].first_mut() {
+            layer
+        } else {
+            return false;
+        };
+
+        layer.advance();
+        if layer.next_capture.is_some() {
+            let key = layer.sort_key();
+            let i = 1 + self.layers[1..self.active_layer_count]
+                .iter()
+                .position(|later_layer| key < later_layer.sort_key())
+                .unwrap_or(self.active_layer_count - 1);
+            self.layers[0..i].rotate_left(1);
+        } else {
+            self.layers[0..self.active_layer_count].rotate_left(1);
+            self.active_layer_count -= 1;
+        }
+
+        true
+    }
+
+    pub fn set_byte_range(&mut self, range: Range<usize>) {
+        for layer in &mut self.layers {
+            layer.captures.set_byte_range(range.clone());
+            if let Some(capture) = &layer.next_capture {
+                if capture.node.end_byte() > range.start {
+                    continue;
+                }
+            }
+            layer.advance();
+        }
+        self.layers.sort_unstable_by_key(|layer| layer.sort_key());
+        self.active_layer_count = self
+            .layers
+            .iter()
+            .position(|layer| layer.next_capture.is_none())
+            .unwrap_or(self.layers.len());
+    }
+}
+
+impl<'a> SyntaxMapMatches<'a> {
+    fn new(
         range: Range<usize>,
-        buffer: &'a BufferSnapshot,
-        query: impl Fn(&Grammar) -> Option<&Query>,
-    ) -> SyntaxMapMatches {
-        let mut result = SyntaxMapMatches { layers: Vec::new() };
-        for (grammar, depth, node) in self.layers_for_range(range.clone(), buffer) {
+        text: &'a Rope,
+        layers: impl Iterator<Item = (&'a Grammar, usize, Node<'a>)>,
+        query: fn(&Grammar) -> Option<&Query>,
+    ) -> Self {
+        let mut result = Self::default();
+        for (grammar, depth, node) in layers {
             let query = if let Some(query) = query(grammar) {
                 query
             } else {
@@ -498,135 +700,132 @@ impl SyntaxSnapshot {
             };
 
             cursor.set_byte_range(range.clone());
-            let matches = cursor.matches(query, node, TextProvider(buffer.as_rope()));
+            let matches = cursor.matches(query, node, TextProvider(text));
+            let grammar_index = result
+                .grammars
+                .iter()
+                .position(|g| g.id == grammar.id())
+                .unwrap_or_else(|| {
+                    result.grammars.push(grammar);
+                    result.grammars.len() - 1
+                });
             let mut layer = SyntaxMapMatchesLayer {
                 depth,
-                grammar,
-                matches: matches.peekable(),
+                grammar_index,
+                matches,
+                next_pattern_index: 0,
+                next_captures: Vec::new(),
+                has_next: false,
                 _query_cursor: query_cursor,
             };
 
-            if let Some(key) = layer.sort_key() {
-                let mut ix = 0;
-                while let Some(next_layer) = result.layers.get_mut(ix) {
-                    if let Some(next_key) = next_layer.sort_key() {
-                        if key > next_key {
-                            ix += 1;
-                            continue;
-                        }
-                    }
-                    break;
-                }
+            layer.advance();
+            if layer.has_next {
+                let key = layer.sort_key();
+                let ix = match result.layers[..result.active_layer_count]
+                    .binary_search_by_key(&key, |layer| layer.sort_key())
+                {
+                    Ok(ix) | Err(ix) => ix,
+                };
                 result.layers.insert(ix, layer);
+                result.active_layer_count += 1;
+            } else {
+                result.layers.push(layer);
             }
         }
         result
     }
 
-    pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, Node)> {
-        self.layers
-            .iter()
-            .filter_map(|layer| {
-                if let Some(grammar) = &layer.language.grammar {
-                    Some((
-                        grammar.as_ref(),
-                        layer.tree.root_node_with_offset(
-                            layer.range.start.to_offset(buffer),
-                            layer.range.start.to_point(buffer).to_ts_point(),
-                        ),
-                    ))
-                } else {
-                    None
-                }
-            })
-            .collect()
+    pub fn grammars(&self) -> &[&'a Grammar] {
+        &self.grammars
     }
 
-    pub fn layers_for_range<'a, T: ToOffset>(
-        &self,
-        range: Range<T>,
-        buffer: &BufferSnapshot,
-    ) -> Vec<(&Grammar, usize, Node)> {
-        let start = buffer.anchor_before(range.start.to_offset(buffer));
-        let end = buffer.anchor_after(range.end.to_offset(buffer));
+    pub fn peek(&self) -> Option<SyntaxMapMatch> {
+        let layer = self.layers.first()?;
+        if !layer.has_next {
+            return None;
+        }
+        Some(SyntaxMapMatch {
+            depth: layer.depth,
+            grammar_index: layer.grammar_index,
+            pattern_index: layer.next_pattern_index,
+            captures: &layer.next_captures,
+        })
+    }
 
-        let mut cursor = self.layers.filter::<_, ()>(|summary| {
-            let is_before_start = summary.range.end.cmp(&start, buffer).is_lt();
-            let is_after_end = summary.range.start.cmp(&end, buffer).is_gt();
-            !is_before_start && !is_after_end
-        });
+    pub fn advance(&mut self) -> bool {
+        let layer = if let Some(layer) = self.layers.first_mut() {
+            layer
+        } else {
+            return false;
+        };
 
-        let mut result = Vec::new();
-        cursor.next(buffer);
-        while let Some(layer) = cursor.item() {
-            if let Some(grammar) = &layer.language.grammar {
-                result.push((
-                    grammar.as_ref(),
-                    layer.depth,
-                    layer.tree.root_node_with_offset(
-                        layer.range.start.to_offset(buffer),
-                        layer.range.start.to_point(buffer).to_ts_point(),
-                    ),
-                ));
-            }
-            cursor.next(buffer)
+        layer.advance();
+        if layer.has_next {
+            let key = layer.sort_key();
+            let i = 1 + self.layers[1..self.active_layer_count]
+                .iter()
+                .position(|later_layer| key < later_layer.sort_key())
+                .unwrap_or(self.active_layer_count - 1);
+            self.layers[0..i].rotate_left(1);
+        } else {
+            self.layers[0..self.active_layer_count].rotate_left(1);
+            self.active_layer_count -= 1;
         }
 
-        result
+        true
     }
 }
 
-impl<'a> Iterator for SyntaxMapCaptures<'a> {
-    type Item = SyntaxMapCapture<'a>;
+impl<'a> SyntaxMapCapturesLayer<'a> {
+    fn advance(&mut self) {
+        self.next_capture = self.captures.next().map(|(mat, ix)| mat.captures[ix]);
+    }
 
-    fn next(&mut self) -> Option<Self::Item> {
-        let layer = self.layers.first_mut()?;
-        let (mat, ix) = layer.captures.next()?;
-
-        let capture = mat.captures[ix as usize];
-        let grammar = layer.grammar;
-        let depth = layer.depth;
-
-        if let Some(key) = layer.sort_key() {
-            let mut i = 1;
-            while let Some(later_layer) = self.layers.get_mut(i) {
-                if let Some(later_key) = later_layer.sort_key() {
-                    if key > later_key {
-                        i += 1;
-                        continue;
-                    }
-                }
-                break;
-            }
-            if i > 1 {
-                self.layers[0..i].rotate_left(1);
-            }
+    fn sort_key(&self) -> (usize, Reverse<usize>, usize) {
+        if let Some(capture) = &self.next_capture {
+            let range = capture.node.byte_range();
+            (range.start, Reverse(range.end), self.depth)
         } else {
-            self.layers.remove(0);
+            (usize::MAX, Reverse(0), usize::MAX)
         }
-
-        Some(SyntaxMapCapture {
-            grammar,
-            depth,
-            node: capture.node,
-            index: capture.index,
-        })
     }
 }
 
-impl<'a> SyntaxMapCapturesLayer<'a> {
-    fn sort_key(&mut self) -> Option<(usize, Reverse<usize>, usize)> {
-        let (mat, ix) = self.captures.peek()?;
-        let range = &mat.captures[*ix].node.byte_range();
-        Some((range.start, Reverse(range.end), self.depth))
+impl<'a> SyntaxMapMatchesLayer<'a> {
+    fn advance(&mut self) {
+        if let Some(mat) = self.matches.next() {
+            self.next_captures.clear();
+            self.next_captures.extend_from_slice(&mat.captures);
+            self.next_pattern_index = mat.pattern_index;
+            self.has_next = true;
+        } else {
+            self.has_next = false;
+        }
+    }
+
+    fn sort_key(&self) -> (usize, Reverse<usize>, usize) {
+        if self.has_next {
+            let captures = &self.next_captures;
+            if let Some((first, last)) = captures.first().zip(captures.last()) {
+                return (
+                    first.node.start_byte(),
+                    Reverse(last.node.end_byte()),
+                    self.depth,
+                );
+            }
+        }
+        (usize::MAX, Reverse(0), usize::MAX)
     }
 }
 
-impl<'a> SyntaxMapMatchesLayer<'a> {
-    fn sort_key(&mut self) -> Option<(usize, Reverse<usize>, usize)> {
-        let mat = self.matches.peek()?;
-        let range = mat.captures.first()?.node.start_byte()..mat.captures.last()?.node.end_byte();
-        Some((range.start, Reverse(range.end), self.depth))
+impl<'a> Iterator for SyntaxMapCaptures<'a> {
+    type Item = SyntaxMapCapture<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let result = self.peek();
+        self.advance();
+        result
     }
 }
 
@@ -864,6 +1063,7 @@ impl Default for SyntaxLayerSummary {
     fn default() -> Self {
         Self {
             max_depth: 0,
+            min_depth: 0,
             range: Anchor::MAX..Anchor::MIN,
             last_layer_range: Anchor::MIN..Anchor::MAX,
         }
@@ -875,7 +1075,8 @@ impl sum_tree::Summary for SyntaxLayerSummary {
 
     fn add_summary(&mut self, other: &Self, buffer: &Self::Context) {
         if other.max_depth > self.max_depth {
-            *self = other.clone();
+            self.max_depth = other.max_depth;
+            self.range = other.range.clone();
         } else {
             if other.range.start.cmp(&self.range.start, buffer).is_lt() {
                 self.range.start = other.range.start;
@@ -883,8 +1084,8 @@ impl sum_tree::Summary for SyntaxLayerSummary {
             if other.range.end.cmp(&self.range.end, buffer).is_gt() {
                 self.range.end = other.range.end;
             }
-            self.last_layer_range = other.last_layer_range.clone();
         }
+        self.last_layer_range = other.last_layer_range.clone();
     }
 }
 
@@ -927,6 +1128,7 @@ impl sum_tree::Item for SyntaxLayer {
 
     fn summary(&self) -> Self::Summary {
         SyntaxLayerSummary {
+            min_depth: self.depth,
             max_depth: self.depth,
             range: self.range.clone(),
             last_layer_range: self.range.clone(),
@@ -944,12 +1146,73 @@ impl std::fmt::Debug for SyntaxLayer {
     }
 }
 
+impl<'a> tree_sitter::TextProvider<'a> for TextProvider<'a> {
+    type I = ByteChunks<'a>;
+
+    fn text(&mut self, node: tree_sitter::Node) -> Self::I {
+        ByteChunks(self.0.chunks_in_range(node.byte_range()))
+    }
+}
+
+impl<'a> Iterator for ByteChunks<'a> {
+    type Item = &'a [u8];
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.0.next().map(str::as_bytes)
+    }
+}
+
+impl QueryCursorHandle {
+    pub(crate) fn new() -> Self {
+        let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_else(QueryCursor::new);
+        cursor.set_match_limit(64);
+        QueryCursorHandle(Some(cursor))
+    }
+}
+
+impl Deref for QueryCursorHandle {
+    type Target = QueryCursor;
+
+    fn deref(&self) -> &Self::Target {
+        self.0.as_ref().unwrap()
+    }
+}
+
+impl DerefMut for QueryCursorHandle {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.0.as_mut().unwrap()
+    }
+}
+
+impl Drop for QueryCursorHandle {
+    fn drop(&mut self) {
+        let mut cursor = self.0.take().unwrap();
+        cursor.set_byte_range(0..usize::MAX);
+        cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point());
+        QUERY_CURSORS.lock().push(cursor)
+    }
+}
+
+pub(crate) trait ToTreeSitterPoint {
+    fn to_ts_point(self) -> tree_sitter::Point;
+    fn from_ts_point(point: tree_sitter::Point) -> Self;
+}
+
+impl ToTreeSitterPoint for Point {
+    fn to_ts_point(self) -> tree_sitter::Point {
+        tree_sitter::Point::new(self.row as usize, self.column as usize)
+    }
+
+    fn from_ts_point(point: tree_sitter::Point) -> Self {
+        Point::new(point.row as u32, point.column as u32)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
     use crate::LanguageConfig;
     use text::{Buffer, Point};
-    use tree_sitter::Query;
     use unindent::Unindent as _;
     use util::test::marked_text_ranges;
 
@@ -1298,13 +1561,13 @@ mod tests {
                 mutated_layers.into_iter().zip(reference_layers.into_iter())
             {
                 assert_eq!(
-                    edited_layer.1.to_sexp(),
-                    reference_layer.1.to_sexp(),
+                    edited_layer.2.to_sexp(),
+                    reference_layer.2.to_sexp(),
                     "different layer at step {i}"
                 );
                 assert_eq!(
-                    edited_layer.1.range(),
-                    reference_layer.1.range(),
+                    edited_layer.2.range(),
+                    reference_layer.2.range(),
                     "different layer at step {i}"
                 );
             }
@@ -1377,16 +1640,16 @@ mod tests {
         marked_string: &str,
     ) {
         let mut actual_ranges = Vec::<Range<usize>>::new();
-        for capture in syntax_map.captures(0..buffer.len(), buffer, |grammar| {
+        let captures = syntax_map.captures(0..buffer.len(), buffer, |grammar| {
             grammar.highlights_query.as_ref()
-        }) {
-            let name = &capture
-                .grammar
-                .highlights_query
-                .as_ref()
-                .unwrap()
-                .capture_names()[capture.index as usize];
-            dbg!(capture.node, capture.index, name);
+        });
+        let queries = captures
+            .grammars()
+            .iter()
+            .map(|grammar| grammar.highlights_query.as_ref().unwrap())
+            .collect::<Vec<_>>();
+        for capture in captures {
+            let name = &queries[capture.grammar_index].capture_names()[capture.index as usize];
             if highlight_query_capture_names.contains(&name.as_str()) {
                 actual_ranges.push(capture.node.byte_range());
             }

crates/language/src/tests.rs 🔗

@@ -1407,7 +1407,9 @@ fn json_lang() -> Language {
 
 fn get_tree_sexp(buffer: &ModelHandle<Buffer>, cx: &gpui::TestAppContext) -> String {
     buffer.read_with(cx, |buffer, _| {
-        buffer.syntax_tree().unwrap().root_node().to_sexp()
+        let syntax_map = buffer.syntax_map();
+        let layers = syntax_map.layers(buffer.as_text_snapshot());
+        layers[0].2.to_sexp()
     })
 }
 

crates/project/src/project.rs 🔗

@@ -2056,6 +2056,7 @@ impl Project {
         let full_path = buffer.read(cx).file()?.full_path(cx);
         let language = self.languages.select_language(&full_path)?;
         buffer.update(cx, |buffer, cx| {
+            buffer.set_language_registry(self.languages.clone());
             buffer.set_language(Some(language.clone()), cx);
         });