Merge pull request #2182 from zed-industries/faster-injections

Max Brunsfeld created

Fix syntax-related performance problems on gigantic files

Change summary

Cargo.lock                        |  2 
Cargo.toml                        |  2 
crates/language/src/buffer.rs     | 34 ++++++++++++++++++++++----------
crates/language/src/syntax_map.rs | 27 +++++++++++++++++++++++++
4 files changed, 51 insertions(+), 14 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -7004,7 +7004,7 @@ dependencies = [
 [[package]]
 name = "tree-sitter"
 version = "0.20.9"
-source = "git+https://github.com/tree-sitter/tree-sitter?rev=36b5b6c89e55ad1a502f8b3234bb3e12ec83a5da#36b5b6c89e55ad1a502f8b3234bb3e12ec83a5da"
+source = "git+https://github.com/tree-sitter/tree-sitter?rev=c51896d32dcc11a38e41f36e3deb1a6a9c4f4b14#c51896d32dcc11a38e41f36e3deb1a6a9c4f4b14"
 dependencies = [
  "cc",
  "regex",

Cargo.toml 🔗

@@ -69,7 +69,7 @@ serde_json = { version = "1.0", features = ["preserve_order", "raw_value"] }
 rand = { version = "0.8" }
 
 [patch.crates-io]
-tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "36b5b6c89e55ad1a502f8b3234bb3e12ec83a5da" }
+tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "c51896d32dcc11a38e41f36e3deb1a6a9c4f4b14" }
 async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" }
 
 # TODO - Remove when a version is released with this PR: https://github.com/servo/core-foundation-rs/pull/457

crates/language/src/buffer.rs 🔗

@@ -2247,7 +2247,6 @@ impl BufferSnapshot {
             .map(|g| g.outline_config.as_ref().unwrap())
             .collect::<Vec<_>>();
 
-        let mut chunks = self.chunks(0..self.len(), true);
         let mut stack = Vec::<Range<usize>>::new();
         let mut items = Vec::new();
         while let Some(mat) = matches.peek() {
@@ -2266,9 +2265,7 @@ impl BufferSnapshot {
                 continue;
             }
 
-            let mut text = String::new();
-            let mut name_ranges = Vec::new();
-            let mut highlight_ranges = Vec::new();
+            let mut buffer_ranges = Vec::new();
             for capture in mat.captures {
                 let node_is_name;
                 if capture.index == config.name_capture_ix {
@@ -2286,12 +2283,27 @@ impl BufferSnapshot {
                         range.start + self.line_len(start.row as u32) as usize - start.column;
                 }
 
+                buffer_ranges.push((range, node_is_name));
+            }
+
+            if buffer_ranges.is_empty() {
+                continue;
+            }
+
+            let mut text = String::new();
+            let mut highlight_ranges = Vec::new();
+            let mut name_ranges = Vec::new();
+            let mut chunks = self.chunks(
+                buffer_ranges.first().unwrap().0.start..buffer_ranges.last().unwrap().0.end,
+                true,
+            );
+            for (buffer_range, is_name) in buffer_ranges {
                 if !text.is_empty() {
                     text.push(' ');
                 }
-                if node_is_name {
+                if is_name {
                     let mut start = text.len();
-                    let end = start + range.len();
+                    let end = start + buffer_range.len();
 
                     // When multiple names are captured, then the matcheable text
                     // includes the whitespace in between the names.
@@ -2302,12 +2314,12 @@ impl BufferSnapshot {
                     name_ranges.push(start..end);
                 }
 
-                let mut offset = range.start;
+                let mut offset = buffer_range.start;
                 chunks.seek(offset);
                 for mut chunk in chunks.by_ref() {
-                    if chunk.text.len() > range.end - offset {
-                        chunk.text = &chunk.text[0..(range.end - offset)];
-                        offset = range.end;
+                    if chunk.text.len() > buffer_range.end - offset {
+                        chunk.text = &chunk.text[0..(buffer_range.end - offset)];
+                        offset = buffer_range.end;
                     } else {
                         offset += chunk.text.len();
                     }
@@ -2321,7 +2333,7 @@ impl BufferSnapshot {
                         highlight_ranges.push((start..end, style));
                     }
                     text.push_str(chunk.text);
-                    if offset >= range.end {
+                    if offset >= buffer_range.end {
                         break;
                     }
                 }

crates/language/src/syntax_map.rs 🔗

@@ -608,6 +608,31 @@ impl SyntaxSnapshot {
         self.layers = layers;
         self.interpolated_version = text.version.clone();
         self.parsed_version = text.version.clone();
+        #[cfg(debug_assertions)]
+        self.check_invariants(text);
+    }
+
+    #[cfg(debug_assertions)]
+    fn check_invariants(&self, text: &BufferSnapshot) {
+        let mut max_depth = 0;
+        let mut prev_range: Option<Range<Anchor>> = None;
+        for layer in self.layers.iter() {
+            if layer.depth == max_depth {
+                if let Some(prev_range) = prev_range {
+                    match layer.range.start.cmp(&prev_range.start, text) {
+                        Ordering::Less => panic!("layers out of order"),
+                        Ordering::Equal => {
+                            assert!(layer.range.end.cmp(&prev_range.end, text).is_ge())
+                        }
+                        Ordering::Greater => {}
+                    }
+                }
+            } else if layer.depth < max_depth {
+                panic!("layers out of order")
+            }
+            max_depth = layer.depth;
+            prev_range = Some(layer.range.clone());
+        }
     }
 
     pub fn single_tree_captures<'a>(
@@ -1419,7 +1444,7 @@ impl sum_tree::Summary for SyntaxLayerSummary {
             self.max_depth = other.max_depth;
             self.range = other.range.clone();
         } else {
-            if other.range.start.cmp(&self.range.start, buffer).is_lt() {
+            if self.range == (Anchor::MAX..Anchor::MAX) {
                 self.range.start = other.range.start;
             }
             if other.range.end.cmp(&self.range.end, buffer).is_gt() {