From f6a817a0f387767b0a6c3206980c8185cf0fa02b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 5 Aug 2022 14:58:45 -0700 Subject: [PATCH 01/22] Start work on a SyntaxMap data structure --- crates/language/src/buffer.rs | 2 +- crates/language/src/language.rs | 44 ++ crates/language/src/syntax_map.rs | 418 +++++++++++++++++++ crates/sum_tree/src/cursor.rs | 6 +- crates/sum_tree/src/sum_tree.rs | 2 + crates/zed/src/languages.rs | 5 + crates/zed/src/languages/rust/injections.scm | 3 + 7 files changed, 476 insertions(+), 4 deletions(-) create mode 100644 crates/language/src/syntax_map.rs create mode 100644 crates/zed/src/languages/rust/injections.scm diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 7c616762d80d62577909faab33090f29a63917fa..b7a1bd30fcdf83ed83f40233a292aa18dbe44df7 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -2496,7 +2496,7 @@ impl Drop for QueryCursorHandle { } } -trait ToTreeSitterPoint { +pub(crate) trait ToTreeSitterPoint { fn to_ts_point(self) -> tree_sitter::Point; fn from_ts_point(point: tree_sitter::Point) -> Self; } diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index fbcc983df9074663d7226cd91a718e55a8b74b0d..8dcfc8fffdb083b962d903d85247859225f4dd85 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -3,6 +3,7 @@ mod diagnostic_set; mod highlight_map; mod outline; pub mod proto; +mod syntax_map; #[cfg(test)] mod tests; @@ -290,9 +291,17 @@ pub struct Grammar { pub(crate) brackets_query: Option, pub(crate) indents_query: Option, pub(crate) outline_query: Option, + pub(crate) injection_config: Option, pub(crate) highlight_map: Mutex, } +struct InjectionConfig { + query: Query, + content_capture_ix: u32, + language_capture_ix: Option, + languages_by_pattern_ix: Vec>>, +} + #[derive(Clone)] pub enum LanguageServerBinaryStatus { CheckingForUpdate, @@ -571,6 +580,7 @@ impl Language { brackets_query: None, indents_query: None, outline_query: None, + injection_config: None, ts_language, highlight_map: Default::default(), }) @@ -610,6 +620,40 @@ impl Language { Ok(self) } + pub fn with_injection_query(mut self, source: &str) -> Result { + let grammar = self.grammar_mut(); + let query = Query::new(grammar.ts_language, source)?; + let mut language_capture_ix = None; + let mut content_capture_ix = None; + for (ix, name) in query.capture_names().iter().enumerate() { + *match name.as_str() { + "language" => &mut language_capture_ix, + "content" => &mut content_capture_ix, + _ => continue, + } = Some(ix as u32); + } + let languages_by_pattern_ix = (0..query.pattern_count()) + .map(|ix| { + query.property_settings(ix).iter().find_map(|setting| { + if setting.key.as_ref() == "language" { + return setting.value.clone(); + } else { + None + } + }) + }) + .collect(); + if let Some(content_capture_ix) = content_capture_ix { + grammar.injection_config = Some(InjectionConfig { + query, + language_capture_ix, + content_capture_ix, + languages_by_pattern_ix, + }); + } + Ok(self) + } + fn grammar_mut(&mut self) -> &mut Grammar { Arc::get_mut(self.grammar.as_mut().unwrap()).unwrap() } diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs new file mode 100644 index 0000000000000000000000000000000000000000..01ff0e50a17baf6d87e9e0f29af817064837636e --- /dev/null +++ b/crates/language/src/syntax_map.rs @@ -0,0 +1,418 @@ +use crate::{ + Grammar, Language, LanguageRegistry, QueryCursorHandle, TextProvider, ToTreeSitterPoint, +}; +use collections::VecDeque; +use gpui::executor::Background; +use std::{borrow::Cow, cell::RefCell, cmp::Ordering, ops::Range, sync::Arc}; +use sum_tree::{SeekTarget, SumTree}; +use text::{Anchor, BufferSnapshot, Point, Rope, ToOffset}; +use tree_sitter::{Parser, Tree}; +use util::post_inc; + +thread_local! { + static PARSER: RefCell = RefCell::new(Parser::new()); +} + +#[derive(Default)] +pub struct SyntaxMap { + next_layer_id: usize, + snapshot: SyntaxMapSnapshot, +} + +#[derive(Clone, Default)] +pub struct SyntaxMapSnapshot { + version: clock::Global, + layers: SumTree, +} + +#[derive(Clone)] +struct SyntaxLayer { + id: usize, + parent_id: Option, + range: SyntaxLayerRange, + tree: tree_sitter::Tree, + language: Arc, +} + +#[derive(Debug, Clone)] +struct SyntaxLayerSummary { + range: Range, + last_layer_range: Range, +} + +#[derive(Clone, Debug)] +struct SyntaxLayerRange(Range); + +impl SyntaxMap { + pub fn new( + executor: Arc, + registry: Arc, + language: Arc, + text: BufferSnapshot, + prev_set: Option, + ) -> Self { + let mut next_layer_id = 0; + let mut layers = Vec::new(); + let mut injections = VecDeque::<(Option, _, Vec)>::new(); + + injections.push_back((None, language, vec![])); + while let Some((parent_id, language, ranges)) = injections.pop_front() { + if let Some(grammar) = &language.grammar.as_deref() { + let id = post_inc(&mut next_layer_id); + let range = if let Some((first, last)) = ranges.first().zip(ranges.last()) { + text.anchor_before(first.start_byte)..text.anchor_after(last.end_byte) + } else { + Anchor::MIN..Anchor::MAX + }; + let tree = Self::parse_text(grammar, text.as_rope(), None, ranges); + Self::get_injections(grammar, &text, &tree, id, ®istry, &mut injections); + layers.push(SyntaxLayer { + id, + parent_id, + range: SyntaxLayerRange(range), + tree, + language, + }); + } + } + + layers.sort_unstable_by(|a, b| SeekTarget::cmp(&a.range, &b.range, &text)); + + Self { + next_layer_id, + snapshot: SyntaxMapSnapshot { + layers: SumTree::from_iter(layers, &text), + version: text.version, + }, + } + } + + pub fn snapshot(&self) -> SyntaxMapSnapshot { + self.snapshot.clone() + } + + fn interpolate(&mut self, text: &BufferSnapshot) { + let edits = text + .edits_since::<(Point, usize)>(&self.version) + .map(|edit| { + let (lines, bytes) = edit.flatten(); + tree_sitter::InputEdit { + start_byte: bytes.new.start, + old_end_byte: bytes.new.start + bytes.old.len(), + new_end_byte: bytes.new.end, + start_position: lines.new.start.to_ts_point(), + old_end_position: (lines.new.start + (lines.old.end - lines.old.start)) + .to_ts_point(), + new_end_position: lines.new.end.to_ts_point(), + } + }) + .collect::>(); + if edits.is_empty() { + return; + } + } + + fn get_injections( + grammar: &Grammar, + text: &BufferSnapshot, + tree: &Tree, + id: usize, + registry: &Arc, + output: &mut VecDeque<(Option, Arc, Vec)>, + ) { + let config = if let Some(config) = &grammar.injection_config { + config + } else { + return; + }; + + let mut query_cursor = QueryCursorHandle::new(); + for mat in query_cursor.matches( + &config.query, + tree.root_node(), + TextProvider(text.as_rope()), + ) { + let content_ranges = mat + .nodes_for_capture_index(config.content_capture_ix) + .map(|node| node.range()) + .collect::>(); + if content_ranges.is_empty() { + continue; + } + let language_name = config.languages_by_pattern_ix[mat.pattern_index] + .as_ref() + .map(|s| Cow::Borrowed(s.as_ref())) + .or_else(|| { + let ix = config.language_capture_ix?; + let node = mat.nodes_for_capture_index(ix).next()?; + Some(Cow::Owned(text.text_for_range(node.byte_range()).collect())) + }); + if let Some(language_name) = language_name { + if let Some(language) = registry.get_language(language_name.as_ref()) { + output.push_back((Some(id), language, content_ranges)) + } + } + } + } + + fn parse_text( + grammar: &Grammar, + text: &Rope, + old_tree: Option, + ranges: Vec, + ) -> Tree { + PARSER.with(|parser| { + let mut parser = parser.borrow_mut(); + let mut chunks = text.chunks_in_range(0..text.len()); + parser + .set_included_ranges(&ranges) + .expect("overlapping ranges"); + parser + .set_language(grammar.ts_language) + .expect("incompatible grammar"); + parser + .parse_with( + &mut move |offset, _| { + chunks.seek(offset); + chunks.next().unwrap_or("").as_bytes() + }, + old_tree.as_ref(), + ) + .expect("invalid language") + }) + } +} + +impl SyntaxMapSnapshot { + pub fn layers_for_range<'a, T: ToOffset>( + &self, + range: Range, + buffer: &BufferSnapshot, + ) -> Vec<(Tree, &Grammar)> { + let start = buffer.anchor_before(range.start.to_offset(buffer)); + let end = buffer.anchor_after(range.end.to_offset(buffer)); + + let mut cursor = self.layers.filter::<_, ()>(|summary| { + let is_before_start = summary.range.end.cmp(&start, buffer).is_lt(); + let is_after_end = summary.range.start.cmp(&end, buffer).is_gt(); + !is_before_start && !is_after_end + }); + + let mut result = Vec::new(); + cursor.next(buffer); + while let Some(item) = cursor.item() { + if let Some(grammar) = &item.language.grammar { + result.push((item.tree.clone(), grammar.as_ref())); + } + cursor.next(buffer) + } + + result + } +} + +impl std::ops::Deref for SyntaxMap { + type Target = SyntaxMapSnapshot; + + fn deref(&self) -> &Self::Target { + &self.snapshot + } +} + +impl Default for SyntaxLayerSummary { + fn default() -> Self { + Self { + range: Anchor::MAX..Anchor::MIN, + last_layer_range: Anchor::MIN..Anchor::MAX, + } + } +} + +impl sum_tree::Summary for SyntaxLayerSummary { + type Context = BufferSnapshot; + + fn add_summary(&mut self, other: &Self, buffer: &Self::Context) { + if other.range.start.cmp(&self.range.start, buffer).is_lt() { + self.range.start = other.range.start; + } + if other.range.end.cmp(&self.range.end, buffer).is_gt() { + self.range.end = other.range.end; + } + self.last_layer_range = other.last_layer_range.clone(); + } +} + +impl Default for SyntaxLayerRange { + fn default() -> Self { + Self(Anchor::MIN..Anchor::MAX) + } +} + +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerRange> for SyntaxLayerRange { + fn cmp(&self, cursor_location: &Self, buffer: &BufferSnapshot) -> Ordering { + self.0 + .start + .cmp(&cursor_location.0.start, buffer) + .then_with(|| cursor_location.0.end.cmp(&self.0.end, buffer)) + } +} + +impl<'a> sum_tree::Dimension<'a, SyntaxLayerSummary> for SyntaxLayerRange { + fn add_summary( + &mut self, + summary: &'a SyntaxLayerSummary, + _: &::Context, + ) { + self.0 = summary.last_layer_range.clone(); + } +} + +impl sum_tree::Item for SyntaxLayer { + type Summary = SyntaxLayerSummary; + + fn summary(&self) -> Self::Summary { + SyntaxLayerSummary { + range: self.range.0.clone(), + last_layer_range: self.range.0.clone(), + } + } +} + +impl std::fmt::Debug for SyntaxLayer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SyntaxLayer") + .field("id", &self.id) + .field("parent_id", &self.parent_id) + .field("range", &self.range) + .field("tree", &self.tree) + .finish() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::LanguageConfig; + use gpui::MutableAppContext; + use text::{Buffer, Point}; + use unindent::Unindent as _; + + #[gpui::test] + fn test_syntax_map(cx: &mut MutableAppContext) { + let buffer = Buffer::new( + 0, + 0, + r#" + fn a() { + assert_eq!( + b(vec![C {}]), + vec![d.e], + ); + println!("{}", f(|_| true)); + } + "# + .unindent(), + ); + + let executor = cx.background().clone(); + let registry = Arc::new(LanguageRegistry::test()); + let language = Arc::new(rust_lang()); + let snapshot = buffer.snapshot(); + registry.add(language.clone()); + + let syntax_map = SyntaxMap::new(executor, registry, language, snapshot.clone(), None); + + let layers = syntax_map.layers_for_range(Point::new(0, 0)..Point::new(0, 1), &snapshot); + assert_layers( + &layers, + &["(source_file (function_item name: (identifier)..."], + ); + + let layers = syntax_map.layers_for_range(Point::new(2, 0)..Point::new(2, 0), &snapshot); + assert_layers( + &layers, + &[ + "...(function_item ... (block (expression_statement (macro_invocation...", + "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...", + ], + ); + + let layers = syntax_map.layers_for_range(Point::new(2, 14)..Point::new(2, 16), &snapshot); + assert_layers( + &layers, + &[ + "...(function_item ...", + "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...", + "...(array_expression (struct_expression ...", + ], + ); + + let layers = syntax_map.layers_for_range(Point::new(3, 14)..Point::new(3, 16), &snapshot); + assert_layers( + &layers, + &[ + "...(function_item ...", + "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...", + "...(array_expression (field_expression ...", + ], + ); + + let layers = syntax_map.layers_for_range(Point::new(5, 12)..Point::new(5, 16), &snapshot); + assert_layers( + &layers, + &[ + "...(function_item ...", + "...(call_expression ... (arguments (closure_expression ...", + ], + ); + } + + fn rust_lang() -> Language { + Language::new( + LanguageConfig { + name: "Rust".into(), + path_suffixes: vec!["rs".to_string()], + ..Default::default() + }, + Some(tree_sitter_rust::language()), + ) + .with_injection_query( + r#" + (macro_invocation + (token_tree) @content + (#set! "language" "rust")) + "#, + ) + .unwrap() + } + + fn assert_layers(layers: &[(Tree, &Grammar)], expected_layers: &[&str]) { + assert_eq!( + layers.len(), + expected_layers.len(), + "wrong number of layers" + ); + for (i, (layer, expected_s_exp)) in layers.iter().zip(expected_layers.iter()).enumerate() { + let actual_s_exp = layer.0.root_node().to_sexp(); + assert!( + string_contains_sequence( + &actual_s_exp, + &expected_s_exp.split("...").collect::>() + ), + "layer {i}:\n\nexpected: {expected_s_exp}\nactual: {actual_s_exp}", + ); + } + } + + pub fn string_contains_sequence(text: &str, parts: &[&str]) -> bool { + let mut last_part_end = 0; + for part in parts { + if let Some(start_ix) = text[last_part_end..].find(part) { + last_part_end = start_ix + part.len(); + } else { + return false; + } + } + true + } +} diff --git a/crates/sum_tree/src/cursor.rs b/crates/sum_tree/src/cursor.rs index 09f253d43288f12e45d4eb0163c3933b84442a24..52200d64cf781bd9f07f9222891711bbed720a15 100644 --- a/crates/sum_tree/src/cursor.rs +++ b/crates/sum_tree/src/cursor.rs @@ -608,9 +608,9 @@ where impl<'a, F, T, S, U> Iterator for FilterCursor<'a, F, T, U> where - F: Fn(&T::Summary) -> bool, + F: FnMut(&T::Summary) -> bool, T: Item, - S: Summary, + S: Summary, //Context for the summary must be unit type, as .next() doesn't take arguments U: Dimension<'a, T::Summary>, { type Item = &'a T; @@ -621,7 +621,7 @@ where } if let Some(item) = self.item() { - self.cursor.next_internal(&self.filter_node, &()); + self.cursor.next_internal(&mut self.filter_node, &()); Some(item) } else { None diff --git a/crates/sum_tree/src/sum_tree.rs b/crates/sum_tree/src/sum_tree.rs index fdfd5d9de2b6d75dda4f70ea8fa55e027c4422a2..cb05dff9673579bc51383cfaf67881560dbe0ef8 100644 --- a/crates/sum_tree/src/sum_tree.rs +++ b/crates/sum_tree/src/sum_tree.rs @@ -168,6 +168,8 @@ impl SumTree { Cursor::new(self) } + /// Note: If the summary type requires a non `()` context, then the filter cursor + /// that is returned cannot be used with Rust's iterators. pub fn filter<'a, F, U>(&'a self, filter_node: F) -> FilterCursor where F: FnMut(&T::Summary) -> bool, diff --git a/crates/zed/src/languages.rs b/crates/zed/src/languages.rs index 8dc20bdbd16dda3709d3a199c43494bb9b6ef892..b7057bdd13169bb8a7b226f3b0e1e7a511d93b1f 100644 --- a/crates/zed/src/languages.rs +++ b/crates/zed/src/languages.rs @@ -128,6 +128,11 @@ pub(crate) fn language( .with_outline_query(query.as_ref()) .expect("failed to load outline query"); } + if let Some(query) = load_query(name, "/injections") { + language = language + .with_injection_query(query.as_ref()) + .expect("failed to load injection query"); + } if let Some(lsp_adapter) = lsp_adapter { language = language.with_lsp_adapter(lsp_adapter) } diff --git a/crates/zed/src/languages/rust/injections.scm b/crates/zed/src/languages/rust/injections.scm new file mode 100644 index 0000000000000000000000000000000000000000..9d8c03c8893b5acbfa5c6c0bc4703010c87b65a1 --- /dev/null +++ b/crates/zed/src/languages/rust/injections.scm @@ -0,0 +1,3 @@ +(macro_invocation + (token_tree) @content) + (#set! "language" "rust")) \ No newline at end of file From 02f8705f2e5c8a1b9ca47dd4a61d03d7dd6f8b60 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 18 Aug 2022 18:01:04 -0700 Subject: [PATCH 02/22] Start work on handling edits in the SyntaxMap --- crates/language/src/syntax_map.rs | 819 +++++++++++++++++++++++------- 1 file changed, 630 insertions(+), 189 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 01ff0e50a17baf6d87e9e0f29af817064837636e..41ea5790479ecf7fddc88aac984068e896e584e5 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -1,11 +1,13 @@ use crate::{ - Grammar, Language, LanguageRegistry, QueryCursorHandle, TextProvider, ToTreeSitterPoint, + Grammar, InjectionConfig, Language, LanguageRegistry, QueryCursorHandle, TextProvider, + ToTreeSitterPoint, }; -use collections::VecDeque; -use gpui::executor::Background; -use std::{borrow::Cow, cell::RefCell, cmp::Ordering, ops::Range, sync::Arc}; -use sum_tree::{SeekTarget, SumTree}; -use text::{Anchor, BufferSnapshot, Point, Rope, ToOffset}; +use collections::HashMap; +use std::{ + borrow::Cow, cell::RefCell, cmp::Ordering, collections::BinaryHeap, ops::Range, sync::Arc, +}; +use sum_tree::{Bias, SeekTarget, SumTree}; +use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset}; use tree_sitter::{Parser, Tree}; use util::post_inc; @@ -15,175 +17,399 @@ thread_local! { #[derive(Default)] pub struct SyntaxMap { - next_layer_id: usize, - snapshot: SyntaxMapSnapshot, + version: clock::Global, + snapshot: SyntaxSnapshot, + language_registry: Option>, } #[derive(Clone, Default)] -pub struct SyntaxMapSnapshot { - version: clock::Global, +pub struct SyntaxSnapshot { layers: SumTree, } #[derive(Clone)] struct SyntaxLayer { - id: usize, - parent_id: Option, - range: SyntaxLayerRange, + depth: usize, + range: Range, tree: tree_sitter::Tree, language: Arc, } #[derive(Debug, Clone)] struct SyntaxLayerSummary { + max_depth: usize, range: Range, last_layer_range: Range, } #[derive(Clone, Debug)] -struct SyntaxLayerRange(Range); +struct Depth(usize); -impl SyntaxMap { - pub fn new( - executor: Arc, - registry: Arc, +#[derive(Clone, Debug)] +struct MaxPosition(Anchor); + +enum ReparseStep { + CreateLayer { + depth: usize, language: Arc, - text: BufferSnapshot, - prev_set: Option, - ) -> Self { - let mut next_layer_id = 0; - let mut layers = Vec::new(); - let mut injections = VecDeque::<(Option, _, Vec)>::new(); - - injections.push_back((None, language, vec![])); - while let Some((parent_id, language, ranges)) = injections.pop_front() { - if let Some(grammar) = &language.grammar.as_deref() { - let id = post_inc(&mut next_layer_id); - let range = if let Some((first, last)) = ranges.first().zip(ranges.last()) { - text.anchor_before(first.start_byte)..text.anchor_after(last.end_byte) - } else { - Anchor::MIN..Anchor::MAX - }; - let tree = Self::parse_text(grammar, text.as_rope(), None, ranges); - Self::get_injections(grammar, &text, &tree, id, ®istry, &mut injections); - layers.push(SyntaxLayer { - id, - parent_id, - range: SyntaxLayerRange(range), - tree, - language, - }); - } - } + ranges: Vec, + }, + EnterChangedRange { + id: usize, + depth: usize, + range: Range, + }, + LeaveChangedRange { + id: usize, + depth: usize, + range: Range, + }, +} - layers.sort_unstable_by(|a, b| SeekTarget::cmp(&a.range, &b.range, &text)); +impl SyntaxMap { + pub fn new() -> Self { + Self::default() + } - Self { - next_layer_id, - snapshot: SyntaxMapSnapshot { - layers: SumTree::from_iter(layers, &text), - version: text.version, - }, - } + pub fn set_language_registry(&mut self, registry: Arc) { + self.language_registry = Some(registry); } - pub fn snapshot(&self) -> SyntaxMapSnapshot { + pub fn snapshot(&self) -> SyntaxSnapshot { self.snapshot.clone() } - fn interpolate(&mut self, text: &BufferSnapshot) { + pub fn interpolate(&mut self, text: &BufferSnapshot) { + self.snapshot.interpolate(&self.version, text); + self.version = text.version.clone(); + } + + pub fn reparse(&mut self, language: Arc, text: &BufferSnapshot) { + self.version = text.version.clone(); + self.snapshot + .reparse(self.language_registry.clone(), language, text); + } +} + +// Assumptions: +// * The maximum depth is small (< 5) +// * For a given depth, the number of layers that touch a given range +// is small (usually only 1) + +// |change| +// 0 (............................................................) +// 1 (...............................................) +// 1 (................) +// 1 (.......) +// 2 (....) +// 2 (....) +// 2 (.......) +// 2 (...) +// 2 (.........) +// 2 (...) +// 3 (.) +// 3 (.) +// 3 (..) +// 3 (..) +// 3 (..) +// 3 (.) + +impl SyntaxSnapshot { + pub fn interpolate(&mut self, current_version: &clock::Global, text: &BufferSnapshot) { let edits = text - .edits_since::<(Point, usize)>(&self.version) - .map(|edit| { - let (lines, bytes) = edit.flatten(); - tree_sitter::InputEdit { - start_byte: bytes.new.start, - old_end_byte: bytes.new.start + bytes.old.len(), - new_end_byte: bytes.new.end, - start_position: lines.new.start.to_ts_point(), - old_end_position: (lines.new.start + (lines.old.end - lines.old.start)) - .to_ts_point(), - new_end_position: lines.new.end.to_ts_point(), - } - }) + .edits_since::<(usize, Point)>(¤t_version) .collect::>(); if edits.is_empty() { return; } + + let mut layers = SumTree::new(); + let max_depth = self.layers.summary().max_depth; + let mut cursor = self.layers.cursor::(); + cursor.next(&text); + + for depth in 0..max_depth { + let mut edits = &edits[..]; + layers.push_tree(cursor.slice(&Depth(depth), Bias::Left, text), text); + + while let Some(layer) = cursor.item() { + let mut endpoints = text.summaries_for_anchors::<(usize, Point), _>([ + &layer.range.start, + &layer.range.end, + ]); + let layer_range = endpoints.next().unwrap()..endpoints.next().unwrap(); + let start_byte = layer_range.start.0; + let start_point = layer_range.start.1; + + // Preserve any layers at this depth that precede the first edit. + let first_edit = if let Some(edit) = edits.first() { + edit + } else { + break; + }; + if first_edit.new.start.0 > layer_range.end.0 { + layers.push_tree( + cursor.slice( + &( + Depth(depth), + MaxPosition(text.anchor_before(first_edit.new.start.0)), + ), + Bias::Left, + text, + ), + text, + ); + continue; + } + + // Preserve any layers at this depth that follow the last edit. + let last_edit = edits.last().unwrap(); + if last_edit.new.end.0 < layer_range.start.0 { + break; + } + + let mut layer = layer.clone(); + for (i, edit) in edits.iter().enumerate().rev() { + // Ignore any edits that start after the end of this layer. + if edit.new.start.0 > layer_range.end.0 { + continue; + } + + // Ignore edits that end before the start of this layer, and don't consider them + // for any subsequent layers at this same depth. + if edit.new.end.0 <= start_byte { + edits = &edits[i + 1..]; + break; + } + + // Apply any edits that intersect this layer to the layer's syntax tree. + if edit.new.start.0 >= start_byte { + layer.tree.edit(&tree_sitter::InputEdit { + start_byte: edit.new.start.0 - start_byte, + old_end_byte: edit.new.start.0 - start_byte + + (edit.old.end.0 - edit.old.start.0), + new_end_byte: edit.new.end.0 - start_byte, + start_position: (edit.new.start.1 - start_point).to_ts_point(), + old_end_position: (edit.new.start.1 - start_point + + (edit.old.end.1 - edit.old.start.1)) + .to_ts_point(), + new_end_position: (edit.new.end.1 - start_point).to_ts_point(), + }); + } else { + layer.tree.edit(&tree_sitter::InputEdit { + start_byte: 0, + old_end_byte: edit.new.end.0 - start_byte, + new_end_byte: 0, + start_position: Default::default(), + old_end_position: (edit.new.end.1 - start_point).to_ts_point(), + new_end_position: Default::default(), + }); + break; + } + } + + layers.push(layer, text); + cursor.next(text); + } + } + + layers.push_tree(cursor.suffix(&text), &text); + drop(cursor); + self.layers = layers; } - fn get_injections( - grammar: &Grammar, + pub fn reparse( + &mut self, + registry: Option>, + language: Arc, text: &BufferSnapshot, - tree: &Tree, - id: usize, - registry: &Arc, - output: &mut VecDeque<(Option, Arc, Vec)>, ) { - let config = if let Some(config) = &grammar.injection_config { - config - } else { - return; - }; + let mut cursor = self.layers.cursor::(); + cursor.next(&text); + let mut layers = SumTree::new(); + + let mut next_change_id = 0; + let mut current_changes = HashMap::default(); + let mut queue = BinaryHeap::new(); + queue.push(ReparseStep::CreateLayer { + depth: 0, + language: language.clone(), + ranges: Vec::new(), + }); - let mut query_cursor = QueryCursorHandle::new(); - for mat in query_cursor.matches( - &config.query, - tree.root_node(), - TextProvider(text.as_rope()), - ) { - let content_ranges = mat - .nodes_for_capture_index(config.content_capture_ix) - .map(|node| node.range()) - .collect::>(); - if content_ranges.is_empty() { - continue; - } - let language_name = config.languages_by_pattern_ix[mat.pattern_index] - .as_ref() - .map(|s| Cow::Borrowed(s.as_ref())) - .or_else(|| { - let ix = config.language_capture_ix?; - let node = mat.nodes_for_capture_index(ix).next()?; - Some(Cow::Owned(text.text_for_range(node.byte_range()).collect())) - }); - if let Some(language_name) = language_name { - if let Some(language) = registry.get_language(language_name.as_ref()) { - output.push_back((Some(id), language, content_ranges)) + while let Some(step) = queue.pop() { + match step { + ReparseStep::CreateLayer { + depth, + language, + ranges, + } => { + let range; + let start_point; + let start_byte; + let end_byte; + if let Some((first, last)) = ranges.first().zip(ranges.last()) { + start_point = first.start_point; + start_byte = first.start_byte; + end_byte = last.end_byte; + range = text.anchor_before(start_byte)..text.anchor_after(end_byte); + } else { + start_point = Point::zero().to_ts_point(); + start_byte = 0; + end_byte = text.len(); + range = Anchor::MIN..Anchor::MAX; + }; + + let target = (Depth(depth), range.clone()); + if target.cmp(cursor.start(), &text).is_gt() { + if current_changes.is_empty() { + let slice = cursor.slice(&target, Bias::Left, text); + layers.push_tree(slice, &text); + } else { + while let Some(layer) = cursor.item() { + if layer.depth > depth + || layer.depth == depth + && layer.range.start.cmp(&range.end, text).is_ge() + { + break; + } + if !layer_is_changed(layer, text, ¤t_changes) { + layers.push(layer.clone(), text); + } + cursor.next(text); + } + } + } + + let mut old_layer = cursor.item(); + if let Some(layer) = old_layer { + if layer.range.to_offset(text) == (start_byte..end_byte) { + cursor.next(&text); + } else { + old_layer = None; + } + } + + let grammar = if let Some(grammar) = language.grammar.as_deref() { + grammar + } else { + continue; + }; + + let tree; + let changed_ranges; + if let Some(old_layer) = old_layer { + tree = parse_text( + grammar, + text.as_rope(), + Some(old_layer.tree.clone()), + ranges, + ); + + changed_ranges = old_layer + .tree + .changed_ranges(&tree) + .map(|r| r.start_byte..r.end_byte) + .collect(); + } else { + tree = parse_text(grammar, text.as_rope(), None, ranges); + changed_ranges = vec![0..end_byte - start_byte]; + } + + layers.push( + SyntaxLayer { + depth, + range, + tree: tree.clone(), + language: language.clone(), + }, + &text, + ); + + if let (Some((config, registry)), false) = ( + grammar.injection_config.as_ref().zip(registry.as_ref()), + changed_ranges.is_empty(), + ) { + let depth = depth + 1; + queue.extend(changed_ranges.iter().flat_map(|range| { + let id = post_inc(&mut next_change_id); + let range = start_byte + range.start..start_byte + range.end; + [ + ReparseStep::EnterChangedRange { + id, + depth, + range: range.clone(), + }, + ReparseStep::LeaveChangedRange { + id, + depth, + range: range.clone(), + }, + ] + })); + + get_injections( + config, + text, + &tree, + registry, + depth, + start_byte, + Point::from_ts_point(start_point), + &changed_ranges, + &mut queue, + ); + } + } + ReparseStep::EnterChangedRange { id, depth, range } => { + let range = text.anchor_before(range.start)..text.anchor_after(range.end); + if current_changes.is_empty() { + let target = (Depth(depth), range.start..Anchor::MAX); + let slice = cursor.slice(&target, Bias::Left, text); + layers.push_tree(slice, text); + } else { + while let Some(layer) = cursor.item() { + if layer.depth > depth + || layer.depth == depth + && layer.range.end.cmp(&range.start, text).is_gt() + { + break; + } + if !layer_is_changed(layer, text, ¤t_changes) { + layers.push(layer.clone(), text); + } + cursor.next(text); + } + } + + current_changes.insert(id, range); + } + ReparseStep::LeaveChangedRange { id, depth, range } => { + let range = text.anchor_before(range.start)..text.anchor_after(range.end); + while let Some(layer) = cursor.item() { + if layer.depth > depth + || layer.depth == depth + && layer.range.start.cmp(&range.end, text).is_ge() + { + break; + } + if !layer_is_changed(layer, text, ¤t_changes) { + layers.push(layer.clone(), text); + } + cursor.next(text); + } + + current_changes.remove(&id); } } } - } - fn parse_text( - grammar: &Grammar, - text: &Rope, - old_tree: Option, - ranges: Vec, - ) -> Tree { - PARSER.with(|parser| { - let mut parser = parser.borrow_mut(); - let mut chunks = text.chunks_in_range(0..text.len()); - parser - .set_included_ranges(&ranges) - .expect("overlapping ranges"); - parser - .set_language(grammar.ts_language) - .expect("incompatible grammar"); - parser - .parse_with( - &mut move |offset, _| { - chunks.seek(offset); - chunks.next().unwrap_or("").as_bytes() - }, - old_tree.as_ref(), - ) - .expect("invalid language") - }) + let slice = cursor.suffix(&text); + layers.push_tree(slice, &text); + drop(cursor); + self.layers = layers; } -} -impl SyntaxMapSnapshot { pub fn layers_for_range<'a, T: ToOffset>( &self, range: Range, @@ -211,17 +437,184 @@ impl SyntaxMapSnapshot { } } +fn parse_text( + grammar: &Grammar, + text: &Rope, + old_tree: Option, + mut ranges: Vec, +) -> Tree { + let (start_byte, start_point) = ranges + .first() + .map(|range| (range.start_byte, Point::from_ts_point(range.start_point))) + .unwrap_or_default(); + + for range in &mut ranges { + range.start_byte -= start_byte; + range.end_byte -= start_byte; + range.start_point = (Point::from_ts_point(range.start_point) - start_point).to_ts_point(); + range.end_point = (Point::from_ts_point(range.end_point) - start_point).to_ts_point(); + } + + PARSER.with(|parser| { + let mut parser = parser.borrow_mut(); + let mut chunks = text.chunks_in_range(start_byte..text.len()); + parser + .set_included_ranges(&ranges) + .expect("overlapping ranges"); + parser + .set_language(grammar.ts_language) + .expect("incompatible grammar"); + parser + .parse_with( + &mut move |offset, _| { + chunks.seek(start_byte + offset); + chunks.next().unwrap_or("").as_bytes() + }, + old_tree.as_ref(), + ) + .expect("invalid language") + }) +} + +fn get_injections( + config: &InjectionConfig, + text: &BufferSnapshot, + tree: &Tree, + language_registry: &LanguageRegistry, + depth: usize, + start_byte: usize, + start_point: Point, + query_ranges: &[Range], + stack: &mut BinaryHeap, +) -> bool { + let mut result = false; + let mut query_cursor = QueryCursorHandle::new(); + let mut prev_match = None; + for query_range in query_ranges { + query_cursor.set_byte_range(query_range.start..query_range.end); + for mat in query_cursor.matches( + &config.query, + tree.root_node(), + TextProvider(text.as_rope()), + ) { + let content_ranges = mat + .nodes_for_capture_index(config.content_capture_ix) + .map(|node| tree_sitter::Range { + start_byte: start_byte + node.start_byte(), + end_byte: start_byte + node.end_byte(), + start_point: (start_point + Point::from_ts_point(node.start_position())) + .to_ts_point(), + end_point: (start_point + Point::from_ts_point(node.end_position())) + .to_ts_point(), + }) + .collect::>(); + if content_ranges.is_empty() { + continue; + } + + // Avoid duplicate matches if two changed ranges intersect the same injection. + let content_range = + content_ranges.first().unwrap().start_byte..content_ranges.last().unwrap().end_byte; + if let Some((last_pattern_ix, last_range)) = &prev_match { + if mat.pattern_index == *last_pattern_ix && content_range == *last_range { + continue; + } + } + prev_match = Some((mat.pattern_index, content_range)); + + let language_name = config.languages_by_pattern_ix[mat.pattern_index] + .as_ref() + .map(|s| Cow::Borrowed(s.as_ref())) + .or_else(|| { + let ix = config.language_capture_ix?; + let node = mat.nodes_for_capture_index(ix).next()?; + Some(Cow::Owned( + text.text_for_range( + start_byte + node.start_byte()..start_byte + node.end_byte(), + ) + .collect(), + )) + }); + + if let Some(language_name) = language_name { + if let Some(language) = language_registry.get_language(language_name.as_ref()) { + result = true; + stack.push(ReparseStep::CreateLayer { + depth, + language, + ranges: content_ranges, + }) + } + } + } + } + result +} + +fn layer_is_changed( + layer: &SyntaxLayer, + text: &BufferSnapshot, + changed_ranges: &HashMap>, +) -> bool { + changed_ranges.values().any(|range| { + let is_before_layer = range.end.cmp(&layer.range.start, text).is_le(); + let is_after_layer = range.start.cmp(&layer.range.end, text).is_ge(); + !is_before_layer && !is_after_layer + }) +} + impl std::ops::Deref for SyntaxMap { - type Target = SyntaxMapSnapshot; + type Target = SyntaxSnapshot; fn deref(&self) -> &Self::Target { &self.snapshot } } +impl ReparseStep { + fn sort_key(&self) -> (usize, Range) { + match self { + ReparseStep::CreateLayer { depth, ranges, .. } => ( + *depth, + ranges.first().map_or(0, |r| r.start_byte) + ..ranges.last().map_or(usize::MAX, |r| r.end_byte), + ), + ReparseStep::EnterChangedRange { depth, range, .. } => { + (*depth, range.start..usize::MAX) + } + ReparseStep::LeaveChangedRange { depth, range, .. } => (*depth, range.end..usize::MAX), + } + } +} + +impl PartialEq for ReparseStep { + fn eq(&self, _: &Self) -> bool { + false + } +} + +impl Eq for ReparseStep {} + +impl PartialOrd for ReparseStep { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(&other)) + } +} + +impl Ord for ReparseStep { + fn cmp(&self, other: &Self) -> Ordering { + let (depth_a, range_a) = self.sort_key(); + let (depth_b, range_b) = other.sort_key(); + Ord::cmp(&depth_b, &depth_a) + .then_with(|| Ord::cmp(&range_b.start, &range_a.start)) + .then_with(|| Ord::cmp(&range_a.end, &range_b.end)) + } +} + impl Default for SyntaxLayerSummary { fn default() -> Self { Self { + max_depth: 0, range: Anchor::MAX..Anchor::MIN, last_layer_range: Anchor::MIN..Anchor::MAX, } @@ -232,38 +625,49 @@ impl sum_tree::Summary for SyntaxLayerSummary { type Context = BufferSnapshot; fn add_summary(&mut self, other: &Self, buffer: &Self::Context) { - if other.range.start.cmp(&self.range.start, buffer).is_lt() { - self.range.start = other.range.start; - } - if other.range.end.cmp(&self.range.end, buffer).is_gt() { - self.range.end = other.range.end; + if other.max_depth > self.max_depth { + *self = other.clone(); + } else { + if other.range.start.cmp(&self.range.start, buffer).is_lt() { + self.range.start = other.range.start; + } + if other.range.end.cmp(&self.range.end, buffer).is_gt() { + self.range.end = other.range.end; + } + self.last_layer_range = other.last_layer_range.clone(); } - self.last_layer_range = other.last_layer_range.clone(); } } -impl Default for SyntaxLayerRange { - fn default() -> Self { - Self(Anchor::MIN..Anchor::MAX) +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for Depth { + fn cmp(&self, cursor_location: &SyntaxLayerSummary, _: &BufferSnapshot) -> Ordering { + Ord::cmp(&self.0, &cursor_location.max_depth) } } -impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerRange> for SyntaxLayerRange { - fn cmp(&self, cursor_location: &Self, buffer: &BufferSnapshot) -> Ordering { +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for (Depth, MaxPosition) { + fn cmp(&self, cursor_location: &SyntaxLayerSummary, text: &BufferSnapshot) -> Ordering { self.0 - .start - .cmp(&cursor_location.0.start, buffer) - .then_with(|| cursor_location.0.end.cmp(&self.0.end, buffer)) + .cmp(&cursor_location, text) + .then_with(|| (self.1).0.cmp(&cursor_location.range.end, text)) } } -impl<'a> sum_tree::Dimension<'a, SyntaxLayerSummary> for SyntaxLayerRange { - fn add_summary( - &mut self, - summary: &'a SyntaxLayerSummary, - _: &::Context, - ) { - self.0 = summary.last_layer_range.clone(); +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for (Depth, Range) { + fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering { + self.0 + .cmp(&cursor_location, buffer) + .then_with(|| { + self.1 + .start + .cmp(&cursor_location.last_layer_range.start, buffer) + }) + .then_with(|| { + cursor_location + .last_layer_range + .end + .cmp(&self.1.end, buffer) + }) } } @@ -272,8 +676,9 @@ impl sum_tree::Item for SyntaxLayer { fn summary(&self) -> Self::Summary { SyntaxLayerSummary { - range: self.range.0.clone(), - last_layer_range: self.range.0.clone(), + max_depth: self.depth, + range: self.range.clone(), + last_layer_range: self.range.clone(), } } } @@ -281,8 +686,7 @@ impl sum_tree::Item for SyntaxLayer { impl std::fmt::Debug for SyntaxLayer { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("SyntaxLayer") - .field("id", &self.id) - .field("parent_id", &self.parent_id) + .field("depth", &self.depth) .field("range", &self.range) .field("tree", &self.tree) .finish() @@ -293,13 +697,16 @@ impl std::fmt::Debug for SyntaxLayer { mod tests { use super::*; use crate::LanguageConfig; - use gpui::MutableAppContext; use text::{Buffer, Point}; use unindent::Unindent as _; #[gpui::test] - fn test_syntax_map(cx: &mut MutableAppContext) { - let buffer = Buffer::new( + fn test_syntax_map_layers_for_range() { + let registry = Arc::new(LanguageRegistry::test()); + let language = Arc::new(rust_lang()); + registry.add(language.clone()); + + let mut buffer = Buffer::new( 0, 0, r#" @@ -314,57 +721,80 @@ mod tests { .unindent(), ); - let executor = cx.background().clone(); - let registry = Arc::new(LanguageRegistry::test()); - let language = Arc::new(rust_lang()); - let snapshot = buffer.snapshot(); - registry.add(language.clone()); - - let syntax_map = SyntaxMap::new(executor, registry, language, snapshot.clone(), None); - - let layers = syntax_map.layers_for_range(Point::new(0, 0)..Point::new(0, 1), &snapshot); - assert_layers( - &layers, - &["(source_file (function_item name: (identifier)..."], - ); + let mut syntax_map = SyntaxMap::new(); + syntax_map.set_language_registry(registry.clone()); + syntax_map.reparse(language.clone(), &buffer); - let layers = syntax_map.layers_for_range(Point::new(2, 0)..Point::new(2, 0), &snapshot); - assert_layers( - &layers, + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(2, 0)..Point::new(2, 0), &[ "...(function_item ... (block (expression_statement (macro_invocation...", "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...", ], ); - - let layers = syntax_map.layers_for_range(Point::new(2, 14)..Point::new(2, 16), &snapshot); - assert_layers( - &layers, + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(2, 14)..Point::new(2, 16), &[ "...(function_item ...", "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...", "...(array_expression (struct_expression ...", ], ); - - let layers = syntax_map.layers_for_range(Point::new(3, 14)..Point::new(3, 16), &snapshot); - assert_layers( - &layers, + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(3, 14)..Point::new(3, 16), &[ "...(function_item ...", "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...", "...(array_expression (field_expression ...", ], ); - - let layers = syntax_map.layers_for_range(Point::new(5, 12)..Point::new(5, 16), &snapshot); - assert_layers( - &layers, + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(5, 12)..Point::new(5, 16), &[ "...(function_item ...", "...(call_expression ... (arguments (closure_expression ...", ], ); + + // Replace a vec! macro invocation with a plain slice, removing a syntactic layer. + let macro_name_range = range_for_text(&buffer, "vec!"); + buffer.edit([(macro_name_range, "&")]); + syntax_map.interpolate(&buffer); + syntax_map.reparse(language.clone(), &buffer); + + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(2, 14)..Point::new(2, 16), + &[ + "...(function_item ...", + "...(tuple_expression (call_expression ... arguments: (arguments (reference_expression value: (array_expression...", + ], + ); + + // Put the vec! macro back, adding back the syntactic layer. + buffer.undo(); + syntax_map.interpolate(&buffer); + syntax_map.reparse(language.clone(), &buffer); + + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(2, 14)..Point::new(2, 16), + &[ + "...(function_item ...", + "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...", + "...(array_expression (struct_expression ...", + ], + ); } fn rust_lang() -> Language { @@ -386,7 +816,18 @@ mod tests { .unwrap() } - fn assert_layers(layers: &[(Tree, &Grammar)], expected_layers: &[&str]) { + fn range_for_text(buffer: &Buffer, text: &str) -> Range { + let start = buffer.as_rope().to_string().find(text).unwrap(); + start..start + text.len() + } + + fn assert_layers_for_range( + syntax_map: &SyntaxMap, + buffer: &BufferSnapshot, + range: Range, + expected_layers: &[&str], + ) { + let layers = syntax_map.layers_for_range(range, &buffer); assert_eq!( layers.len(), expected_layers.len(), From 5209e2d68cc1db6e9d39c43432cce6595102e69a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 19 Aug 2022 17:43:33 -0700 Subject: [PATCH 03/22] Fix off-by-one in handling edits, start on more edit unit tests --- crates/language/src/syntax_map.rs | 142 ++++++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 8 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 41ea5790479ecf7fddc88aac984068e896e584e5..fe84265ec2a479c43f64d58fe5baf8cf11c191ea 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -7,7 +7,7 @@ use std::{ borrow::Cow, cell::RefCell, cmp::Ordering, collections::BinaryHeap, ops::Range, sync::Arc, }; use sum_tree::{Bias, SeekTarget, SumTree}; -use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset}; +use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; use tree_sitter::{Parser, Tree}; use util::post_inc; @@ -128,7 +128,7 @@ impl SyntaxSnapshot { let mut cursor = self.layers.cursor::(); cursor.next(&text); - for depth in 0..max_depth { + for depth in 0..=max_depth { let mut edits = &edits[..]; layers.push_tree(cursor.slice(&Depth(depth), Bias::Left, text), text); @@ -410,11 +410,31 @@ impl SyntaxSnapshot { self.layers = layers; } + pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, &Tree, (usize, Point))> { + self.layers + .iter() + .filter_map(|layer| { + if let Some(grammar) = &layer.language.grammar { + Some(( + grammar.as_ref(), + &layer.tree, + ( + layer.range.start.to_offset(buffer), + layer.range.start.to_point(buffer), + ), + )) + } else { + None + } + }) + .collect() + } + pub fn layers_for_range<'a, T: ToOffset>( &self, range: Range, buffer: &BufferSnapshot, - ) -> Vec<(Tree, &Grammar)> { + ) -> Vec<(&Grammar, &Tree, (usize, Point))> { let start = buffer.anchor_before(range.start.to_offset(buffer)); let end = buffer.anchor_after(range.end.to_offset(buffer)); @@ -426,9 +446,16 @@ impl SyntaxSnapshot { let mut result = Vec::new(); cursor.next(buffer); - while let Some(item) = cursor.item() { - if let Some(grammar) = &item.language.grammar { - result.push((item.tree.clone(), grammar.as_ref())); + while let Some(layer) = cursor.item() { + if let Some(grammar) = &layer.language.grammar { + result.push(( + grammar.as_ref(), + &layer.tree, + ( + layer.range.start.to_offset(buffer), + layer.range.start.to_point(buffer), + ), + )); } cursor.next(buffer) } @@ -698,7 +725,9 @@ mod tests { use super::*; use crate::LanguageConfig; use text::{Buffer, Point}; + use tree_sitter::Query; use unindent::Unindent as _; + use util::test::marked_text_ranges; #[gpui::test] fn test_syntax_map_layers_for_range() { @@ -797,6 +826,47 @@ mod tests { ); } + #[gpui::test] + fn test_syntax_map_edits() { + let registry = Arc::new(LanguageRegistry::test()); + let language = Arc::new(rust_lang()); + let mut syntax_map = SyntaxMap::new(); + syntax_map.set_language_registry(registry.clone()); + registry.add(language.clone()); + + let mut buffer = Buffer::new(0, 0, "".into()); + syntax_map.reparse(language.clone(), &buffer); + + edit_buffer_n( + &mut buffer, + &[ + "«fn a() { dbg }»", + "fn a() { dbg«!» }", + "fn a() { dbg!«()» }", + "fn a() { dbg!(«b») }", + "fn a() { dbg!(b«.») }", + "fn a() { dbg!(b.«c») }", + "fn a() { dbg!(b.c«()») }", + "fn a() { dbg!(b.c(«vec»)) }", + "fn a() { dbg!(b.c(vec«!»)) }", + "fn a() { dbg!(b.c(vec!«[]»)) }", + "fn a() { dbg!(b.c(vec![«d»])) }", + "fn a() { dbg!(b.c(vec![d«.»])) }", + "fn a() { dbg!(b.c(vec![d.«e»])) }", + ], + ); + + syntax_map.interpolate(&buffer); + syntax_map.reparse(language.clone(), &buffer); + + assert_node_ranges( + &syntax_map, + &buffer, + "(field_identifier) @_", + "fn a() { dbg!(b.«c»(vec![d.«e»])) }", + ); + } + fn rust_lang() -> Language { Language::new( LanguageConfig { @@ -833,8 +903,10 @@ mod tests { expected_layers.len(), "wrong number of layers" ); - for (i, (layer, expected_s_exp)) in layers.iter().zip(expected_layers.iter()).enumerate() { - let actual_s_exp = layer.0.root_node().to_sexp(); + for (i, ((_, tree, _), expected_s_exp)) in + layers.iter().zip(expected_layers.iter()).enumerate() + { + let actual_s_exp = tree.root_node().to_sexp(); assert!( string_contains_sequence( &actual_s_exp, @@ -845,6 +917,60 @@ mod tests { } } + fn assert_node_ranges( + syntax_map: &SyntaxMap, + buffer: &BufferSnapshot, + query: &str, + marked_string: &str, + ) { + let mut cursor = QueryCursorHandle::new(); + let mut actual_ranges = Vec::>::new(); + for (grammar, tree, (start_byte, _)) in syntax_map.layers(buffer) { + let query = Query::new(grammar.ts_language, query).unwrap(); + for (mat, ix) in + cursor.captures(&query, tree.root_node(), TextProvider(buffer.as_rope())) + { + let range = mat.captures[ix].node.byte_range(); + actual_ranges.push(start_byte + range.start..start_byte + range.end); + } + } + + let (text, expected_ranges) = marked_text_ranges(marked_string, false); + assert_eq!(text, buffer.text()); + assert_eq!(actual_ranges, expected_ranges); + } + + fn edit_buffer_n(buffer: &mut Buffer, marked_strings: &[&str]) { + for marked_string in marked_strings { + edit_buffer(buffer, marked_string); + } + } + + fn edit_buffer(buffer: &mut Buffer, marked_string: &str) { + let old_text = buffer.text(); + let (new_text, mut ranges) = marked_text_ranges(marked_string, false); + assert_eq!(ranges.len(), 1); + + let inserted_range = ranges.pop().unwrap(); + let inserted_text = new_text[inserted_range.clone()].to_string(); + let deleted_len = (inserted_range.len() as isize + old_text.len() as isize + - new_text.len() as isize) as usize; + let deleted_range = inserted_range.start..inserted_range.start + deleted_len; + + assert_eq!( + old_text[..deleted_range.start], + new_text[..inserted_range.start], + "invalid edit", + ); + assert_eq!( + old_text[deleted_range.end..], + new_text[inserted_range.end..], + "invalid edit", + ); + + buffer.edit([(deleted_range, inserted_text)]); + } + pub fn string_contains_sequence(text: &str, parts: &[&str]) -> bool { let mut last_part_end = 0; for part in parts { From e8548e7732e4f48d9f5c5249d75fca0cb0ec2441 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 21 Aug 2022 11:36:17 -0700 Subject: [PATCH 04/22] Restructure handling of changed regions when reparsing --- crates/language/src/syntax_map.rs | 462 +++++++++++++++--------------- 1 file changed, 231 insertions(+), 231 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index fe84265ec2a479c43f64d58fe5baf8cf11c191ea..71ac4d29590a5f32d3742a9d498ccde43b4e3a09 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -2,14 +2,12 @@ use crate::{ Grammar, InjectionConfig, Language, LanguageRegistry, QueryCursorHandle, TextProvider, ToTreeSitterPoint, }; -use collections::HashMap; use std::{ borrow::Cow, cell::RefCell, cmp::Ordering, collections::BinaryHeap, ops::Range, sync::Arc, }; use sum_tree::{Bias, SeekTarget, SumTree}; use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; use tree_sitter::{Parser, Tree}; -use util::post_inc; thread_local! { static PARSER: RefCell = RefCell::new(Parser::new()); @@ -42,28 +40,26 @@ struct SyntaxLayerSummary { last_layer_range: Range, } -#[derive(Clone, Debug)] -struct Depth(usize); +#[derive(Debug)] +struct DepthAndRange(usize, Range); -#[derive(Clone, Debug)] -struct MaxPosition(Anchor); +#[derive(Debug)] +struct DepthAndMaxPosition(usize, Anchor); -enum ReparseStep { - CreateLayer { - depth: usize, - language: Arc, - ranges: Vec, - }, - EnterChangedRange { - id: usize, - depth: usize, - range: Range, - }, - LeaveChangedRange { - id: usize, - depth: usize, - range: Range, - }, +#[derive(Debug)] +struct DepthAndRangeOrMaxPosition(usize, Range, Anchor); + +struct ReparseStep { + depth: usize, + language: Arc, + ranges: Vec, + range: Range, +} + +#[derive(Debug, PartialEq, Eq)] +struct ChangedRegion { + depth: usize, + range: Range, } impl SyntaxMap { @@ -130,7 +126,16 @@ impl SyntaxSnapshot { for depth in 0..=max_depth { let mut edits = &edits[..]; - layers.push_tree(cursor.slice(&Depth(depth), Bias::Left, text), text); + if cursor.start().max_depth < depth { + layers.push_tree( + cursor.slice( + &DepthAndRange(depth, Anchor::MIN..Anchor::MAX), + Bias::Left, + text, + ), + text, + ); + } while let Some(layer) = cursor.item() { let mut endpoints = text.summaries_for_anchors::<(usize, Point), _>([ @@ -150,10 +155,7 @@ impl SyntaxSnapshot { if first_edit.new.start.0 > layer_range.end.0 { layers.push_tree( cursor.slice( - &( - Depth(depth), - MaxPosition(text.anchor_before(first_edit.new.start.0)), - ), + &DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)), Bias::Left, text, ), @@ -183,8 +185,8 @@ impl SyntaxSnapshot { } // Apply any edits that intersect this layer to the layer's syntax tree. - if edit.new.start.0 >= start_byte { - layer.tree.edit(&tree_sitter::InputEdit { + let tree_edit = if edit.new.start.0 >= start_byte { + tree_sitter::InputEdit { start_byte: edit.new.start.0 - start_byte, old_end_byte: edit.new.start.0 - start_byte + (edit.old.end.0 - edit.old.start.0), @@ -194,16 +196,20 @@ impl SyntaxSnapshot { + (edit.old.end.1 - edit.old.start.1)) .to_ts_point(), new_end_position: (edit.new.end.1 - start_point).to_ts_point(), - }); + } } else { - layer.tree.edit(&tree_sitter::InputEdit { + tree_sitter::InputEdit { start_byte: 0, old_end_byte: edit.new.end.0 - start_byte, new_end_byte: 0, start_position: Default::default(), old_end_position: (edit.new.end.1 - start_point).to_ts_point(), new_end_position: Default::default(), - }); + } + }; + + layer.tree.edit(&tree_edit); + if edit.new.start.0 < start_byte { break; } } @@ -228,184 +234,157 @@ impl SyntaxSnapshot { cursor.next(&text); let mut layers = SumTree::new(); - let mut next_change_id = 0; - let mut current_changes = HashMap::default(); + let mut changed_regions = Vec::::new(); let mut queue = BinaryHeap::new(); - queue.push(ReparseStep::CreateLayer { + queue.push(ReparseStep { depth: 0, language: language.clone(), ranges: Vec::new(), + range: Anchor::MIN..Anchor::MAX, }); - while let Some(step) = queue.pop() { - match step { - ReparseStep::CreateLayer { - depth, - language, - ranges, - } => { - let range; - let start_point; - let start_byte; - let end_byte; - if let Some((first, last)) = ranges.first().zip(ranges.last()) { - start_point = first.start_point; - start_byte = first.start_byte; - end_byte = last.end_byte; - range = text.anchor_before(start_byte)..text.anchor_after(end_byte); - } else { - start_point = Point::zero().to_ts_point(); - start_byte = 0; - end_byte = text.len(); - range = Anchor::MIN..Anchor::MAX; - }; - - let target = (Depth(depth), range.clone()); - if target.cmp(cursor.start(), &text).is_gt() { - if current_changes.is_empty() { - let slice = cursor.slice(&target, Bias::Left, text); - layers.push_tree(slice, &text); - } else { - while let Some(layer) = cursor.item() { - if layer.depth > depth - || layer.depth == depth - && layer.range.start.cmp(&range.end, text).is_ge() - { - break; - } - if !layer_is_changed(layer, text, ¤t_changes) { - layers.push(layer.clone(), text); - } - cursor.next(text); - } - } + loop { + let step = queue.pop(); + let (depth, range) = if let Some(step) = &step { + (step.depth, step.range.clone()) + } else { + (cursor.start().max_depth, Anchor::MAX..Anchor::MAX) + }; + + let target = DepthAndRange(depth, range.clone()); + if target.cmp(cursor.start(), &text).is_gt() { + let change_start_anchor = changed_regions + .first() + .map_or(Anchor::MAX, |region| region.range.start); + let seek_target = + DepthAndRangeOrMaxPosition(depth, range.clone(), change_start_anchor); + let slice = cursor.slice(&seek_target, Bias::Left, text); + layers.push_tree(slice, &text); + + while let Some(layer) = cursor.item() { + if target.cmp(&cursor.end(text), text).is_le() { + break; } - - let mut old_layer = cursor.item(); - if let Some(layer) = old_layer { - if layer.range.to_offset(text) == (start_byte..end_byte) { - cursor.next(&text); - } else { - old_layer = None; + if layer_is_changed(layer, text, &changed_regions) { + let region = ChangedRegion { + depth: depth + 1, + range: layer.range.clone(), + }; + if let Err(i) = + changed_regions.binary_search_by(|probe| probe.cmp(®ion, text)) + { + changed_regions.insert(i, region); } - } - - let grammar = if let Some(grammar) = language.grammar.as_deref() { - grammar } else { - continue; - }; - - let tree; - let changed_ranges; - if let Some(old_layer) = old_layer { - tree = parse_text( - grammar, - text.as_rope(), - Some(old_layer.tree.clone()), - ranges, - ); - - changed_ranges = old_layer - .tree - .changed_ranges(&tree) - .map(|r| r.start_byte..r.end_byte) - .collect(); - } else { - tree = parse_text(grammar, text.as_rope(), None, ranges); - changed_ranges = vec![0..end_byte - start_byte]; + layers.push(layer.clone(), text); } - layers.push( - SyntaxLayer { - depth, - range, - tree: tree.clone(), - language: language.clone(), - }, - &text, - ); - - if let (Some((config, registry)), false) = ( - grammar.injection_config.as_ref().zip(registry.as_ref()), - changed_ranges.is_empty(), - ) { - let depth = depth + 1; - queue.extend(changed_ranges.iter().flat_map(|range| { - let id = post_inc(&mut next_change_id); - let range = start_byte + range.start..start_byte + range.end; - [ - ReparseStep::EnterChangedRange { - id, - depth, - range: range.clone(), - }, - ReparseStep::LeaveChangedRange { - id, - depth, - range: range.clone(), - }, - ] - })); - - get_injections( - config, - text, - &tree, - registry, - depth, - start_byte, - Point::from_ts_point(start_point), - &changed_ranges, - &mut queue, - ); - } + cursor.next(text); } - ReparseStep::EnterChangedRange { id, depth, range } => { - let range = text.anchor_before(range.start)..text.anchor_after(range.end); - if current_changes.is_empty() { - let target = (Depth(depth), range.start..Anchor::MAX); - let slice = cursor.slice(&target, Bias::Left, text); - layers.push_tree(slice, text); - } else { - while let Some(layer) = cursor.item() { - if layer.depth > depth - || layer.depth == depth - && layer.range.end.cmp(&range.start, text).is_gt() - { - break; - } - if !layer_is_changed(layer, text, ¤t_changes) { - layers.push(layer.clone(), text); - } - cursor.next(text); - } - } - current_changes.insert(id, range); + changed_regions.retain(|region| { + region.depth > depth + || (region.depth == depth + && region.range.end.cmp(&range.start, text).is_gt()) + }); + } + + let (ranges, language) = if let Some(step) = step { + (step.ranges, step.language) + } else { + break; + }; + + let start_point; + let start_byte; + let end_byte; + if let Some((first, last)) = ranges.first().zip(ranges.last()) { + start_point = first.start_point; + start_byte = first.start_byte; + end_byte = last.end_byte; + } else { + start_point = Point::zero().to_ts_point(); + start_byte = 0; + end_byte = text.len(); + }; + + let mut old_layer = cursor.item(); + if let Some(layer) = old_layer { + if layer.range.to_offset(text) == (start_byte..end_byte) { + cursor.next(&text); + } else { + old_layer = None; } - ReparseStep::LeaveChangedRange { id, depth, range } => { - let range = text.anchor_before(range.start)..text.anchor_after(range.end); - while let Some(layer) = cursor.item() { - if layer.depth > depth - || layer.depth == depth - && layer.range.start.cmp(&range.end, text).is_ge() - { - break; - } - if !layer_is_changed(layer, text, ¤t_changes) { - layers.push(layer.clone(), text); - } - cursor.next(text); - } + } + + let grammar = if let Some(grammar) = language.grammar.as_deref() { + grammar + } else { + continue; + }; + + let tree; + let changed_ranges; + if let Some(old_layer) = old_layer { + tree = parse_text( + grammar, + text.as_rope(), + Some(old_layer.tree.clone()), + ranges, + ); - current_changes.remove(&id); + changed_ranges = old_layer + .tree + .changed_ranges(&tree) + .map(|r| r.start_byte..r.end_byte) + .collect(); + } else { + tree = parse_text(grammar, text.as_rope(), None, ranges); + changed_ranges = vec![0..end_byte - start_byte]; + } + + layers.push( + SyntaxLayer { + depth, + range, + tree: tree.clone(), + language: language.clone(), + }, + &text, + ); + + if let (Some((config, registry)), false) = ( + grammar.injection_config.as_ref().zip(registry.as_ref()), + changed_ranges.is_empty(), + ) { + let depth = depth + 1; + + for range in &changed_ranges { + let region = ChangedRegion { + depth, + range: text.anchor_before(range.start)..text.anchor_after(range.end), + }; + if let Err(i) = + changed_regions.binary_search_by(|probe| probe.cmp(®ion, text)) + { + changed_regions.insert(i, region); + } } + + get_injections( + config, + text, + &tree, + registry, + depth, + start_byte, + Point::from_ts_point(start_point), + &changed_ranges, + &mut queue, + ); } } - let slice = cursor.suffix(&text); - layers.push_tree(slice, &text); drop(cursor); self.layers = layers; } @@ -512,7 +491,7 @@ fn get_injections( start_byte: usize, start_point: Point, query_ranges: &[Range], - stack: &mut BinaryHeap, + queue: &mut BinaryHeap, ) -> bool { let mut result = false; let mut query_cursor = QueryCursorHandle::new(); @@ -547,7 +526,7 @@ fn get_injections( continue; } } - prev_match = Some((mat.pattern_index, content_range)); + prev_match = Some((mat.pattern_index, content_range.clone())); let language_name = config.languages_by_pattern_ix[mat.pattern_index] .as_ref() @@ -566,10 +545,13 @@ fn get_injections( if let Some(language_name) = language_name { if let Some(language) = language_registry.get_language(language_name.as_ref()) { result = true; - stack.push(ReparseStep::CreateLayer { + let range = text.anchor_before(content_range.start) + ..text.anchor_after(content_range.end); + queue.push(ReparseStep { depth, language, ranges: content_ranges, + range, }) } } @@ -581,11 +563,11 @@ fn get_injections( fn layer_is_changed( layer: &SyntaxLayer, text: &BufferSnapshot, - changed_ranges: &HashMap>, + changed_regions: &[ChangedRegion], ) -> bool { - changed_ranges.values().any(|range| { - let is_before_layer = range.end.cmp(&layer.range.start, text).is_le(); - let is_after_layer = range.start.cmp(&layer.range.end, text).is_ge(); + changed_regions.iter().any(|region| { + let is_before_layer = region.range.end.cmp(&layer.range.start, text).is_le(); + let is_after_layer = region.range.start.cmp(&layer.range.end, text).is_ge(); !is_before_layer && !is_after_layer }) } @@ -598,22 +580,6 @@ impl std::ops::Deref for SyntaxMap { } } -impl ReparseStep { - fn sort_key(&self) -> (usize, Range) { - match self { - ReparseStep::CreateLayer { depth, ranges, .. } => ( - *depth, - ranges.first().map_or(0, |r| r.start_byte) - ..ranges.last().map_or(usize::MAX, |r| r.end_byte), - ), - ReparseStep::EnterChangedRange { depth, range, .. } => { - (*depth, range.start..usize::MAX) - } - ReparseStep::LeaveChangedRange { depth, range, .. } => (*depth, range.end..usize::MAX), - } - } -} - impl PartialEq for ReparseStep { fn eq(&self, _: &Self) -> bool { false @@ -630,14 +596,32 @@ impl PartialOrd for ReparseStep { impl Ord for ReparseStep { fn cmp(&self, other: &Self) -> Ordering { - let (depth_a, range_a) = self.sort_key(); - let (depth_b, range_b) = other.sort_key(); - Ord::cmp(&depth_b, &depth_a) + let range_a = self.range(); + let range_b = other.range(); + Ord::cmp(&other.depth, &self.depth) .then_with(|| Ord::cmp(&range_b.start, &range_a.start)) .then_with(|| Ord::cmp(&range_a.end, &range_b.end)) } } +impl ReparseStep { + fn range(&self) -> Range { + let start = self.ranges.first().map_or(0, |r| r.start_byte); + let end = self.ranges.last().map_or(0, |r| r.end_byte); + start..end + } +} + +impl ChangedRegion { + fn cmp(&self, other: &Self, buffer: &BufferSnapshot) -> Ordering { + let range_a = &self.range; + let range_b = &other.range; + Ord::cmp(&self.depth, &other.depth) + .then_with(|| range_a.start.cmp(&range_b.start, buffer)) + .then_with(|| range_b.end.cmp(&range_a.end, buffer)) + } +} + impl Default for SyntaxLayerSummary { fn default() -> Self { Self { @@ -666,29 +650,45 @@ impl sum_tree::Summary for SyntaxLayerSummary { } } -impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for Depth { - fn cmp(&self, cursor_location: &SyntaxLayerSummary, _: &BufferSnapshot) -> Ordering { +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndRange { + fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering { Ord::cmp(&self.0, &cursor_location.max_depth) + .then_with(|| { + self.1 + .start + .cmp(&cursor_location.last_layer_range.start, buffer) + }) + .then_with(|| { + cursor_location + .last_layer_range + .end + .cmp(&self.1.end, buffer) + }) } } -impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for (Depth, MaxPosition) { +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndMaxPosition { fn cmp(&self, cursor_location: &SyntaxLayerSummary, text: &BufferSnapshot) -> Ordering { - self.0 - .cmp(&cursor_location, text) - .then_with(|| (self.1).0.cmp(&cursor_location.range.end, text)) + Ord::cmp(&self.0, &cursor_location.max_depth) + .then_with(|| self.1.cmp(&cursor_location.range.end, text)) } } -impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for (Depth, Range) { +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndRangeOrMaxPosition { fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering { - self.0 - .cmp(&cursor_location, buffer) - .then_with(|| { - self.1 - .start - .cmp(&cursor_location.last_layer_range.start, buffer) - }) + let cmp = Ord::cmp(&self.0, &cursor_location.max_depth); + if cmp.is_ne() { + return cmp; + } + + let cmp = self.2.cmp(&cursor_location.range.end, buffer); + if cmp.is_gt() { + return Ordering::Greater; + } + + self.1 + .start + .cmp(&cursor_location.last_layer_range.start, buffer) .then_with(|| { cursor_location .last_layer_range From 58fda5ac1c0c05377f2a846adb88b8b335ef0732 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 22 Aug 2022 14:49:16 -0700 Subject: [PATCH 05/22] Test more editing patterns of SyntaxMap, fix bugs --- Cargo.lock | 2 +- Cargo.toml | 2 +- crates/language/src/syntax_map.rs | 530 +++++++++++++++++++----------- 3 files changed, 339 insertions(+), 195 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 680e40a7f9fbdf69c63689c428af5c57714a6d79..2a6d594f667bf07e39a2fbafdc3f78d91033b09f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5842,7 +5842,7 @@ dependencies = [ [[package]] name = "tree-sitter" version = "0.20.8" -source = "git+https://github.com/tree-sitter/tree-sitter?rev=1f1b1eb4501ed0a2d195d37f7de15f72aa10acd0#1f1b1eb4501ed0a2d195d37f7de15f72aa10acd0" +source = "git+https://github.com/tree-sitter/tree-sitter?rev=477b6677537e89c7bdff14ce84dad6d23a6415bb#477b6677537e89c7bdff14ce84dad6d23a6415bb" dependencies = [ "cc", "regex", diff --git a/Cargo.toml b/Cargo.toml index 74c36d7006ce8fdb5afc968c13718ae82a5f3d9a..b4df3fd101913dad11bac7660602c7cce4e6464d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ default-members = ["crates/zed"] resolver = "2" [patch.crates-io] -tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "1f1b1eb4501ed0a2d195d37f7de15f72aa10acd0" } +tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "477b6677537e89c7bdff14ce84dad6d23a6415bb" } async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" } # TODO - Remove when a version is released with this PR: https://github.com/servo/core-foundation-rs/pull/457 diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 71ac4d29590a5f32d3742a9d498ccde43b4e3a09..e6198ccb5bd7a2b77e78474e201b20d03333494e 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -7,7 +7,7 @@ use std::{ }; use sum_tree::{Bias, SeekTarget, SumTree}; use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; -use tree_sitter::{Parser, Tree}; +use tree_sitter::{Node, Parser, Tree}; thread_local! { static PARSER: RefCell = RefCell::new(Parser::new()); @@ -15,7 +15,8 @@ thread_local! { #[derive(Default)] pub struct SyntaxMap { - version: clock::Global, + parsed_version: clock::Global, + interpolated_version: clock::Global, snapshot: SyntaxSnapshot, language_registry: Option>, } @@ -40,14 +41,14 @@ struct SyntaxLayerSummary { last_layer_range: Range, } -#[derive(Debug)] +#[derive(Clone, Debug)] struct DepthAndRange(usize, Range); -#[derive(Debug)] +#[derive(Clone, Debug)] struct DepthAndMaxPosition(usize, Anchor); -#[derive(Debug)] -struct DepthAndRangeOrMaxPosition(usize, Range, Anchor); +#[derive(Clone, Debug)] +struct DepthAndRangeOrMaxPosition(DepthAndRange, DepthAndMaxPosition); struct ReparseStep { depth: usize, @@ -76,44 +77,29 @@ impl SyntaxMap { } pub fn interpolate(&mut self, text: &BufferSnapshot) { - self.snapshot.interpolate(&self.version, text); - self.version = text.version.clone(); + self.snapshot.interpolate(&self.interpolated_version, text); + self.interpolated_version = text.version.clone(); } pub fn reparse(&mut self, language: Arc, text: &BufferSnapshot) { - self.version = text.version.clone(); - self.snapshot - .reparse(self.language_registry.clone(), language, text); + if !self.interpolated_version.observed_all(&text.version) { + self.interpolate(text); + } + + self.snapshot.reparse( + &self.parsed_version, + text, + self.language_registry.clone(), + language, + ); + self.parsed_version = text.version.clone(); } } -// Assumptions: -// * The maximum depth is small (< 5) -// * For a given depth, the number of layers that touch a given range -// is small (usually only 1) - -// |change| -// 0 (............................................................) -// 1 (...............................................) -// 1 (................) -// 1 (.......) -// 2 (....) -// 2 (....) -// 2 (.......) -// 2 (...) -// 2 (.........) -// 2 (...) -// 3 (.) -// 3 (.) -// 3 (..) -// 3 (..) -// 3 (..) -// 3 (.) - impl SyntaxSnapshot { - pub fn interpolate(&mut self, current_version: &clock::Global, text: &BufferSnapshot) { + pub fn interpolate(&mut self, from_version: &clock::Global, text: &BufferSnapshot) { let edits = text - .edits_since::<(usize, Point)>(¤t_version) + .edits_since::<(usize, Point)>(&from_version) .collect::>(); if edits.is_empty() { return; @@ -152,16 +138,9 @@ impl SyntaxSnapshot { } else { break; }; - if first_edit.new.start.0 > layer_range.end.0 { - layers.push_tree( - cursor.slice( - &DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)), - Bias::Left, - text, - ), - text, - ); - continue; + let target = DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)); + if target.cmp(&cursor.start(), text).is_gt() { + layers.push_tree(cursor.slice(&target, Bias::Left, text), text); } // Preserve any layers at this depth that follow the last edit. @@ -226,10 +205,17 @@ impl SyntaxSnapshot { pub fn reparse( &mut self, + from_version: &clock::Global, + text: &BufferSnapshot, registry: Option>, language: Arc, - text: &BufferSnapshot, ) { + let edits = text.edits_since::(from_version).collect::>(); + if edits.is_empty() { + return; + } + + let max_depth = self.layers.summary().max_depth; let mut cursor = self.layers.cursor::(); cursor.next(&text); let mut layers = SumTree::new(); @@ -248,44 +234,55 @@ impl SyntaxSnapshot { let (depth, range) = if let Some(step) = &step { (step.depth, step.range.clone()) } else { - (cursor.start().max_depth, Anchor::MAX..Anchor::MAX) + (max_depth + 1, Anchor::MAX..Anchor::MAX) }; let target = DepthAndRange(depth, range.clone()); - if target.cmp(cursor.start(), &text).is_gt() { - let change_start_anchor = changed_regions - .first() - .map_or(Anchor::MAX, |region| region.range.start); - let seek_target = - DepthAndRangeOrMaxPosition(depth, range.clone(), change_start_anchor); - let slice = cursor.slice(&seek_target, Bias::Left, text); - layers.push_tree(slice, &text); - - while let Some(layer) = cursor.item() { - if target.cmp(&cursor.end(text), text).is_le() { + let mut done = cursor.item().is_none(); + while !done && target.cmp(cursor.start(), &text).is_gt() { + let bounded_target = DepthAndRangeOrMaxPosition( + target.clone(), + changed_regions + .first() + .map_or(DepthAndMaxPosition(usize::MAX, Anchor::MAX), |region| { + DepthAndMaxPosition(region.depth, region.range.start) + }), + ); + if bounded_target.cmp(&cursor.start(), &text).is_gt() { + let slice = cursor.slice(&bounded_target, Bias::Left, text); + layers.push_tree(slice, &text); + } + + while target.cmp(&cursor.end(text), text).is_gt() { + let layer = if let Some(layer) = cursor.item() { + layer + } else { break; - } + }; + if layer_is_changed(layer, text, &changed_regions) { - let region = ChangedRegion { + ChangedRegion { depth: depth + 1, range: layer.range.clone(), - }; - if let Err(i) = - changed_regions.binary_search_by(|probe| probe.cmp(®ion, text)) - { - changed_regions.insert(i, region); } + .insert(text, &mut changed_regions); } else { layers.push(layer.clone(), text); } - cursor.next(text); } + done = true; changed_regions.retain(|region| { - region.depth > depth + if region.depth > depth || (region.depth == depth && region.range.end.cmp(&range.start, text).is_gt()) + { + true + } else { + done = false; + false + } }); } @@ -332,15 +329,19 @@ impl SyntaxSnapshot { Some(old_layer.tree.clone()), ranges, ); - - changed_ranges = old_layer - .tree - .changed_ranges(&tree) - .map(|r| r.start_byte..r.end_byte) - .collect(); + changed_ranges = join_ranges( + edits + .iter() + .map(|e| e.new.clone()) + .filter(|range| range.start < end_byte && range.end > start_byte), + old_layer + .tree + .changed_ranges(&tree) + .map(|r| start_byte + r.start_byte..start_byte + r.end_byte), + ); } else { tree = parse_text(grammar, text.as_rope(), None, ranges); - changed_ranges = vec![0..end_byte - start_byte]; + changed_ranges = vec![start_byte..end_byte]; } layers.push( @@ -358,27 +359,19 @@ impl SyntaxSnapshot { changed_ranges.is_empty(), ) { let depth = depth + 1; - for range in &changed_ranges { - let region = ChangedRegion { + ChangedRegion { depth, range: text.anchor_before(range.start)..text.anchor_after(range.end), - }; - if let Err(i) = - changed_regions.binary_search_by(|probe| probe.cmp(®ion, text)) - { - changed_regions.insert(i, region); } + .insert(text, &mut changed_regions); } - get_injections( config, text, - &tree, + tree.root_node_with_offset(start_byte, start_point), registry, depth, - start_byte, - Point::from_ts_point(start_point), &changed_ranges, &mut queue, ); @@ -389,17 +382,16 @@ impl SyntaxSnapshot { self.layers = layers; } - pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, &Tree, (usize, Point))> { + pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, Node)> { self.layers .iter() .filter_map(|layer| { if let Some(grammar) = &layer.language.grammar { Some(( grammar.as_ref(), - &layer.tree, - ( + layer.tree.root_node_with_offset( layer.range.start.to_offset(buffer), - layer.range.start.to_point(buffer), + layer.range.start.to_point(buffer).to_ts_point(), ), )) } else { @@ -413,7 +405,7 @@ impl SyntaxSnapshot { &self, range: Range, buffer: &BufferSnapshot, - ) -> Vec<(&Grammar, &Tree, (usize, Point))> { + ) -> Vec<(&Grammar, Node)> { let start = buffer.anchor_before(range.start.to_offset(buffer)); let end = buffer.anchor_after(range.end.to_offset(buffer)); @@ -429,10 +421,9 @@ impl SyntaxSnapshot { if let Some(grammar) = &layer.language.grammar { result.push(( grammar.as_ref(), - &layer.tree, - ( + layer.tree.root_node_with_offset( layer.range.start.to_offset(buffer), - layer.range.start.to_point(buffer), + layer.range.start.to_point(buffer).to_ts_point(), ), )); } @@ -443,6 +434,38 @@ impl SyntaxSnapshot { } } +fn join_ranges( + a: impl Iterator>, + b: impl Iterator>, +) -> Vec> { + let mut result = Vec::>::new(); + let mut a = a.peekable(); + let mut b = b.peekable(); + loop { + let range = match (a.peek(), b.peek()) { + (Some(range_a), Some(range_b)) => { + if range_a.start < range_b.start { + a.next().unwrap() + } else { + b.next().unwrap() + } + } + (None, Some(_)) => b.next().unwrap(), + (Some(_), None) => a.next().unwrap(), + (None, None) => break, + }; + + if let Some(last) = result.last_mut() { + if range.start <= last.end { + last.end = last.end.max(range.end); + continue; + } + } + result.push(range); + } + result +} + fn parse_text( grammar: &Grammar, text: &Rope, @@ -485,11 +508,9 @@ fn parse_text( fn get_injections( config: &InjectionConfig, text: &BufferSnapshot, - tree: &Tree, + node: Node, language_registry: &LanguageRegistry, depth: usize, - start_byte: usize, - start_point: Point, query_ranges: &[Range], queue: &mut BinaryHeap, ) -> bool { @@ -498,21 +519,10 @@ fn get_injections( let mut prev_match = None; for query_range in query_ranges { query_cursor.set_byte_range(query_range.start..query_range.end); - for mat in query_cursor.matches( - &config.query, - tree.root_node(), - TextProvider(text.as_rope()), - ) { + for mat in query_cursor.matches(&config.query, node, TextProvider(text.as_rope())) { let content_ranges = mat .nodes_for_capture_index(config.content_capture_ix) - .map(|node| tree_sitter::Range { - start_byte: start_byte + node.start_byte(), - end_byte: start_byte + node.end_byte(), - start_point: (start_point + Point::from_ts_point(node.start_position())) - .to_ts_point(), - end_point: (start_point + Point::from_ts_point(node.end_position())) - .to_ts_point(), - }) + .map(|node| node.range()) .collect::>(); if content_ranges.is_empty() { continue; @@ -534,12 +544,7 @@ fn get_injections( .or_else(|| { let ix = config.language_capture_ix?; let node = mat.nodes_for_capture_index(ix).next()?; - Some(Cow::Owned( - text.text_for_range( - start_byte + node.start_byte()..start_byte + node.end_byte(), - ) - .collect(), - )) + Some(Cow::Owned(text.text_for_range(node.byte_range()).collect())) }); if let Some(language_name) = language_name { @@ -566,9 +571,10 @@ fn layer_is_changed( changed_regions: &[ChangedRegion], ) -> bool { changed_regions.iter().any(|region| { + let same_depth = region.depth == layer.depth; let is_before_layer = region.range.end.cmp(&layer.range.start, text).is_le(); let is_after_layer = region.range.start.cmp(&layer.range.end, text).is_ge(); - !is_before_layer && !is_after_layer + same_depth && !is_before_layer && !is_after_layer }) } @@ -613,6 +619,12 @@ impl ReparseStep { } impl ChangedRegion { + fn insert(self, text: &BufferSnapshot, set: &mut Vec) { + if let Err(ix) = set.binary_search_by(|probe| probe.cmp(&self, text)) { + set.insert(ix, self); + } + } + fn cmp(&self, other: &Self, buffer: &BufferSnapshot) -> Ordering { let range_a = &self.range; let range_b = &other.range; @@ -676,25 +688,11 @@ impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndMaxP impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndRangeOrMaxPosition { fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering { - let cmp = Ord::cmp(&self.0, &cursor_location.max_depth); - if cmp.is_ne() { - return cmp; - } - - let cmp = self.2.cmp(&cursor_location.range.end, buffer); - if cmp.is_gt() { - return Ordering::Greater; + if self.1.cmp(cursor_location, buffer).is_le() { + return Ordering::Less; + } else { + self.0.cmp(cursor_location, buffer) } - - self.1 - .start - .cmp(&cursor_location.last_layer_range.start, buffer) - .then_with(|| { - cursor_location - .last_layer_range - .end - .cmp(&self.1.end, buffer) - }) } } @@ -827,46 +825,188 @@ mod tests { } #[gpui::test] - fn test_syntax_map_edits() { - let registry = Arc::new(LanguageRegistry::test()); - let language = Arc::new(rust_lang()); - let mut syntax_map = SyntaxMap::new(); - syntax_map.set_language_registry(registry.clone()); - registry.add(language.clone()); + fn test_typing_multiple_new_injections() { + let (buffer, syntax_map) = test_edit_sequence(&[ + "fn a() { dbg }", + "fn a() { dbg«!» }", + "fn a() { dbg!«()» }", + "fn a() { dbg!(«b») }", + "fn a() { dbg!(b«.») }", + "fn a() { dbg!(b.«c») }", + "fn a() { dbg!(b.c«()») }", + "fn a() { dbg!(b.c(«vec»)) }", + "fn a() { dbg!(b.c(vec«!»)) }", + "fn a() { dbg!(b.c(vec!«[]»)) }", + "fn a() { dbg!(b.c(vec![«d»])) }", + "fn a() { dbg!(b.c(vec![d«.»])) }", + "fn a() { dbg!(b.c(vec![d.«e»])) }", + ]); - let mut buffer = Buffer::new(0, 0, "".into()); - syntax_map.reparse(language.clone(), &buffer); + assert_node_ranges( + &syntax_map, + &buffer, + "(field_identifier) @_", + "fn a() { dbg!(b.«c»(vec![d.«e»])) }", + ); + } - edit_buffer_n( - &mut buffer, - &[ - "«fn a() { dbg }»", - "fn a() { dbg«!» }", - "fn a() { dbg!«()» }", - "fn a() { dbg!(«b») }", - "fn a() { dbg!(b«.») }", - "fn a() { dbg!(b.«c») }", - "fn a() { dbg!(b.c«()») }", - "fn a() { dbg!(b.c(«vec»)) }", - "fn a() { dbg!(b.c(vec«!»)) }", - "fn a() { dbg!(b.c(vec!«[]»)) }", - "fn a() { dbg!(b.c(vec![«d»])) }", - "fn a() { dbg!(b.c(vec![d«.»])) }", - "fn a() { dbg!(b.c(vec![d.«e»])) }", - ], + #[gpui::test] + fn test_pasting_new_injection_line_between_others() { + let (buffer, syntax_map) = test_edit_sequence(&[ + " + fn a() { + b!(B {}); + c!(C {}); + d!(D {}); + e!(E {}); + f!(F {}); + } + ", + " + fn a() { + b!(B {}); + c!(C {}); + «g!(G {}); + »d!(D {}); + e!(E {}); + f!(F {}); + } + ", + ]); + + assert_node_ranges( + &syntax_map, + &buffer, + "(struct_expression) @_", + " + fn a() { + b!(«B {}»); + c!(«C {}»); + g!(«G {}»); + d!(«D {}»); + e!(«E {}»); + f!(«F {}»); + } + ", ); + } - syntax_map.interpolate(&buffer); - syntax_map.reparse(language.clone(), &buffer); + #[gpui::test] + fn test_joining_injections_with_child_injections() { + let (buffer, syntax_map) = test_edit_sequence(&[ + " + fn a() { + b!( + c![one.two.three], + d![four.five.six], + ); + e!( + f![seven.eight], + ); + } + ", + " + fn a() { + b!( + c![one.two.three], + d![four.five.six], + ˇ f![seven.eight], + ); + } + ", + ]); assert_node_ranges( &syntax_map, &buffer, "(field_identifier) @_", - "fn a() { dbg!(b.«c»(vec![d.«e»])) }", + " + fn a() { + b!( + c![one.«two».«three»], + d![four.«five».«six»], + f![seven.«eight»], + ); + } + ", ); } + #[gpui::test] + fn test_editing_edges_of_injection() { + test_edit_sequence(&[ + " + fn a() { + b!(c!()) + } + ", + " + fn a() { + «d»!(c!()) + } + ", + " + fn a() { + «e»d!(c!()) + } + ", + " + fn a() { + ed!«[»c!()«]» + } + ", + ]); + } + + fn test_edit_sequence(steps: &[&str]) -> (Buffer, SyntaxMap) { + let registry = Arc::new(LanguageRegistry::test()); + let language = Arc::new(rust_lang()); + registry.add(language.clone()); + let mut buffer = Buffer::new(0, 0, Default::default()); + + let mut mutated_syntax_map = SyntaxMap::new(); + mutated_syntax_map.set_language_registry(registry.clone()); + mutated_syntax_map.reparse(language.clone(), &buffer); + + for (i, marked_string) in steps.into_iter().enumerate() { + edit_buffer(&mut buffer, &marked_string.unindent()); + + // Reparse the syntax map + mutated_syntax_map.interpolate(&buffer); + mutated_syntax_map.reparse(language.clone(), &buffer); + + // Create a second syntax map from scratch + let mut reference_syntax_map = SyntaxMap::new(); + reference_syntax_map.set_language_registry(registry.clone()); + reference_syntax_map.reparse(language.clone(), &buffer); + + // Compare the mutated syntax map to the new syntax map + let mutated_layers = mutated_syntax_map.layers(&buffer); + let reference_layers = reference_syntax_map.layers(&buffer); + assert_eq!( + mutated_layers.len(), + reference_layers.len(), + "wrong number of layers at step {i}" + ); + for (edited_layer, reference_layer) in + mutated_layers.into_iter().zip(reference_layers.into_iter()) + { + assert_eq!( + edited_layer.1.to_sexp(), + reference_layer.1.to_sexp(), + "different layer at step {i}" + ); + assert_eq!( + edited_layer.1.range(), + reference_layer.1.range(), + "different layer at step {i}" + ); + } + } + + (buffer, mutated_syntax_map) + } + fn rust_lang() -> Language { Language::new( LanguageConfig { @@ -903,10 +1043,10 @@ mod tests { expected_layers.len(), "wrong number of layers" ); - for (i, ((_, tree, _), expected_s_exp)) in + for (i, ((_, node), expected_s_exp)) in layers.iter().zip(expected_layers.iter()).enumerate() { - let actual_s_exp = tree.root_node().to_sexp(); + let actual_s_exp = node.to_sexp(); assert!( string_contains_sequence( &actual_s_exp, @@ -925,50 +1065,54 @@ mod tests { ) { let mut cursor = QueryCursorHandle::new(); let mut actual_ranges = Vec::>::new(); - for (grammar, tree, (start_byte, _)) in syntax_map.layers(buffer) { + for (grammar, node) in syntax_map.layers(buffer) { let query = Query::new(grammar.ts_language, query).unwrap(); - for (mat, ix) in - cursor.captures(&query, tree.root_node(), TextProvider(buffer.as_rope())) - { - let range = mat.captures[ix].node.byte_range(); - actual_ranges.push(start_byte + range.start..start_byte + range.end); + for (mat, ix) in cursor.captures(&query, node, TextProvider(buffer.as_rope())) { + actual_ranges.push(mat.captures[ix].node.byte_range()); } } - let (text, expected_ranges) = marked_text_ranges(marked_string, false); + let (text, expected_ranges) = marked_text_ranges(&marked_string.unindent(), false); assert_eq!(text, buffer.text()); assert_eq!(actual_ranges, expected_ranges); } - fn edit_buffer_n(buffer: &mut Buffer, marked_strings: &[&str]) { - for marked_string in marked_strings { - edit_buffer(buffer, marked_string); - } - } - fn edit_buffer(buffer: &mut Buffer, marked_string: &str) { let old_text = buffer.text(); let (new_text, mut ranges) = marked_text_ranges(marked_string, false); - assert_eq!(ranges.len(), 1); + if ranges.is_empty() { + ranges.push(0..new_text.len()); + } - let inserted_range = ranges.pop().unwrap(); - let inserted_text = new_text[inserted_range.clone()].to_string(); - let deleted_len = (inserted_range.len() as isize + old_text.len() as isize - - new_text.len() as isize) as usize; - let deleted_range = inserted_range.start..inserted_range.start + deleted_len; + let mut delta = 0; + let mut edits = Vec::new(); + let mut ranges = ranges.into_iter().peekable(); + + while let Some(inserted_range) = ranges.next() { + let old_start = (inserted_range.start as isize - delta) as usize; + let following_text = if let Some(next_range) = ranges.peek() { + &new_text[inserted_range.end..next_range.start] + } else { + &new_text[inserted_range.end..] + }; + + let inserted_len = inserted_range.len(); + let deleted_len = old_text[old_start..] + .find(following_text) + .expect("invalid edit"); + + let old_range = old_start..old_start + deleted_len; + edits.push((old_range, new_text[inserted_range].to_string())); + delta += inserted_len as isize - deleted_len as isize; + } assert_eq!( - old_text[..deleted_range.start], - new_text[..inserted_range.start], - "invalid edit", - ); - assert_eq!( - old_text[deleted_range.end..], - new_text[inserted_range.end..], - "invalid edit", + old_text.len() as isize + delta, + new_text.len() as isize, + "invalid edit" ); - buffer.edit([(deleted_range, inserted_text)]); + buffer.edit(edits); } pub fn string_contains_sequence(text: &str, parts: &[&str]) -> bool { From ae9e1338f6c8431ff4fd20c22009a4f46bfe5a30 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 22 Aug 2022 17:52:14 -0700 Subject: [PATCH 06/22] Fix more bugs in syntax map interpolation --- crates/language/src/syntax_map.rs | 179 ++++++++++++++++++------------ 1 file changed, 109 insertions(+), 70 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index e6198ccb5bd7a2b77e78474e201b20d03333494e..a5ffa9964a0a0923e0e92927bdcd086d9baa2280 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -106,96 +106,102 @@ impl SyntaxSnapshot { } let mut layers = SumTree::new(); - let max_depth = self.layers.summary().max_depth; + let mut edits_for_depth = &edits[..]; let mut cursor = self.layers.cursor::(); - cursor.next(&text); + cursor.next(text); - for depth in 0..=max_depth { - let mut edits = &edits[..]; - if cursor.start().max_depth < depth { + 'outer: loop { + let depth = cursor.end(text).max_depth; + + // Preserve any layers at this depth that precede the first edit. + if let Some(first_edit) = edits_for_depth.first() { + let target = DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)); + if target.cmp(&cursor.start(), text).is_gt() { + let slice = cursor.slice(&target, Bias::Left, text); + layers.push_tree(slice, text); + } + } + // If this layer follows all of the edits, then preserve it and any + // subsequent layers at this same depth. + else { layers.push_tree( cursor.slice( - &DepthAndRange(depth, Anchor::MIN..Anchor::MAX), + &DepthAndRange(depth + 1, Anchor::MIN..Anchor::MAX), Bias::Left, text, ), text, ); - } + edits_for_depth = &edits[..]; + continue; + }; + + let layer = if let Some(layer) = cursor.item() { + layer + } else { + break; + }; - while let Some(layer) = cursor.item() { - let mut endpoints = text.summaries_for_anchors::<(usize, Point), _>([ - &layer.range.start, - &layer.range.end, - ]); - let layer_range = endpoints.next().unwrap()..endpoints.next().unwrap(); - let start_byte = layer_range.start.0; - let start_point = layer_range.start.1; - - // Preserve any layers at this depth that precede the first edit. - let first_edit = if let Some(edit) = edits.first() { - edit + let mut endpoints = text + .summaries_for_anchors::<(usize, Point), _>([&layer.range.start, &layer.range.end]); + let layer_range = endpoints.next().unwrap()..endpoints.next().unwrap(); + let start_byte = layer_range.start.0; + let start_point = layer_range.start.1; + let end_byte = layer_range.end.0; + + // Ignore edits that end before the start of this layer, and don't consider them + // for any subsequent layers at this same depth. + loop { + if let Some(edit) = edits_for_depth.first() { + if edit.new.end.0 < start_byte { + edits_for_depth = &edits_for_depth[1..]; + } else { + break; + } } else { - break; - }; - let target = DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)); - if target.cmp(&cursor.start(), text).is_gt() { - layers.push_tree(cursor.slice(&target, Bias::Left, text), text); + continue 'outer; } + } - // Preserve any layers at this depth that follow the last edit. - let last_edit = edits.last().unwrap(); - if last_edit.new.end.0 < layer_range.start.0 { + let mut layer = layer.clone(); + for edit in edits_for_depth { + // Ignore any edits that follow this layer. + if edit.new.start.0 > end_byte { break; } - let mut layer = layer.clone(); - for (i, edit) in edits.iter().enumerate().rev() { - // Ignore any edits that start after the end of this layer. - if edit.new.start.0 > layer_range.end.0 { - continue; + // Apply any edits that intersect this layer to the layer's syntax tree. + let tree_edit = if edit.new.start.0 >= start_byte { + tree_sitter::InputEdit { + start_byte: edit.new.start.0 - start_byte, + old_end_byte: edit.new.start.0 - start_byte + + (edit.old.end.0 - edit.old.start.0), + new_end_byte: edit.new.end.0 - start_byte, + start_position: (edit.new.start.1 - start_point).to_ts_point(), + old_end_position: (edit.new.start.1 - start_point + + (edit.old.end.1 - edit.old.start.1)) + .to_ts_point(), + new_end_position: (edit.new.end.1 - start_point).to_ts_point(), } - - // Ignore edits that end before the start of this layer, and don't consider them - // for any subsequent layers at this same depth. - if edit.new.end.0 <= start_byte { - edits = &edits[i + 1..]; - break; + } else { + tree_sitter::InputEdit { + start_byte: 0, + old_end_byte: edit.new.end.0 - start_byte, + new_end_byte: 0, + start_position: Default::default(), + old_end_position: (edit.new.end.1 - start_point).to_ts_point(), + new_end_position: Default::default(), } + }; - // Apply any edits that intersect this layer to the layer's syntax tree. - let tree_edit = if edit.new.start.0 >= start_byte { - tree_sitter::InputEdit { - start_byte: edit.new.start.0 - start_byte, - old_end_byte: edit.new.start.0 - start_byte - + (edit.old.end.0 - edit.old.start.0), - new_end_byte: edit.new.end.0 - start_byte, - start_position: (edit.new.start.1 - start_point).to_ts_point(), - old_end_position: (edit.new.start.1 - start_point - + (edit.old.end.1 - edit.old.start.1)) - .to_ts_point(), - new_end_position: (edit.new.end.1 - start_point).to_ts_point(), - } - } else { - tree_sitter::InputEdit { - start_byte: 0, - old_end_byte: edit.new.end.0 - start_byte, - new_end_byte: 0, - start_position: Default::default(), - old_end_position: (edit.new.end.1 - start_point).to_ts_point(), - new_end_position: Default::default(), - } - }; - - layer.tree.edit(&tree_edit); - if edit.new.start.0 < start_byte { - break; - } + layer.tree.edit(&tree_edit); + if edit.new.start.0 < start_byte { + break; } - - layers.push(layer, text); - cursor.next(text); } + + layers.push(layer, text); + cursor.next(text); } layers.push_tree(cursor.suffix(&text), &text); @@ -958,6 +964,31 @@ mod tests { ]); } + #[gpui::test] + fn test_edits_preceding_and_intersecting_injection() { + test_edit_sequence(&[ + // + "const aaaaaaaaaaaa: B = c!(d(e.f));", + "const aˇa: B = c!(d(eˇ));", + ]); + } + + #[gpui::test] + fn test_non_local_changes_create_injections() { + test_edit_sequence(&[ + " + // a! { + static B: C = d; + // } + ", + " + ˇa! { + static B: C = d; + ˇ} + ", + ]); + } + fn test_edit_sequence(steps: &[&str]) -> (Buffer, SyntaxMap) { let registry = Arc::new(LanguageRegistry::test()); let language = Arc::new(rust_lang()); @@ -1084,12 +1115,20 @@ mod tests { ranges.push(0..new_text.len()); } + assert_eq!( + old_text[..ranges[0].start], + new_text[..ranges[0].start], + "invalid edit" + ); + let mut delta = 0; let mut edits = Vec::new(); let mut ranges = ranges.into_iter().peekable(); while let Some(inserted_range) = ranges.next() { - let old_start = (inserted_range.start as isize - delta) as usize; + let new_start = inserted_range.start; + let old_start = (new_start as isize - delta) as usize; + let following_text = if let Some(next_range) = ranges.peek() { &new_text[inserted_range.end..next_range.start] } else { From 71e17a54ae9d3bd43de2f54733f6e505a138458e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 23 Aug 2022 09:38:03 -0700 Subject: [PATCH 07/22] Fix case where layers were processed linearly when reparsing --- crates/language/src/syntax_map.rs | 208 +++++++++++++++++++++--------- 1 file changed, 145 insertions(+), 63 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index a5ffa9964a0a0923e0e92927bdcd086d9baa2280..8800bb5cd885afaa1c5215adb309e4c1398e4094 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -63,6 +63,9 @@ struct ChangedRegion { range: Range, } +#[derive(Default)] +struct ChangeRegionSet(Vec); + impl SyntaxMap { pub fn new() -> Self { Self::default() @@ -124,14 +127,12 @@ impl SyntaxSnapshot { // If this layer follows all of the edits, then preserve it and any // subsequent layers at this same depth. else { - layers.push_tree( - cursor.slice( - &DepthAndRange(depth + 1, Anchor::MIN..Anchor::MAX), - Bias::Left, - text, - ), + let slice = cursor.slice( + &DepthAndRange(depth + 1, Anchor::MIN..Anchor::MAX), + Bias::Left, text, ); + layers.push_tree(slice, text); edits_for_depth = &edits[..]; continue; }; @@ -226,7 +227,7 @@ impl SyntaxSnapshot { cursor.next(&text); let mut layers = SumTree::new(); - let mut changed_regions = Vec::::new(); + let mut changed_regions = ChangeRegionSet::default(); let mut queue = BinaryHeap::new(); queue.push(ReparseStep { depth: 0, @@ -245,18 +246,19 @@ impl SyntaxSnapshot { let target = DepthAndRange(depth, range.clone()); let mut done = cursor.item().is_none(); - while !done && target.cmp(cursor.start(), &text).is_gt() { - let bounded_target = DepthAndRangeOrMaxPosition( - target.clone(), - changed_regions - .first() - .map_or(DepthAndMaxPosition(usize::MAX, Anchor::MAX), |region| { - DepthAndMaxPosition(region.depth, region.range.start) - }), - ); + while !done && target.cmp(&cursor.end(text), &text).is_gt() { + done = true; + + let bounded_target = + DepthAndRangeOrMaxPosition(target.clone(), changed_regions.start_position()); if bounded_target.cmp(&cursor.start(), &text).is_gt() { let slice = cursor.slice(&bounded_target, Bias::Left, text); - layers.push_tree(slice, &text); + if !slice.is_empty() { + layers.push_tree(slice, &text); + if changed_regions.prune(cursor.end(text), text) { + done = false; + } + } } while target.cmp(&cursor.end(text), text).is_gt() { @@ -266,30 +268,23 @@ impl SyntaxSnapshot { break; }; - if layer_is_changed(layer, text, &changed_regions) { - ChangedRegion { - depth: depth + 1, - range: layer.range.clone(), - } - .insert(text, &mut changed_regions); + if changed_regions.intersects(&layer, text) { + changed_regions.insert( + ChangedRegion { + depth: depth + 1, + range: layer.range.clone(), + }, + text, + ); } else { layers.push(layer.clone(), text); } - cursor.next(text); - } - done = true; - changed_regions.retain(|region| { - if region.depth > depth - || (region.depth == depth - && region.range.end.cmp(&range.start, text).is_gt()) - { - true - } else { + cursor.next(text); + if changed_regions.prune(cursor.end(text), text) { done = false; - false } - }); + } } let (ranges, language) = if let Some(step) = step { @@ -366,11 +361,13 @@ impl SyntaxSnapshot { ) { let depth = depth + 1; for range in &changed_ranges { - ChangedRegion { - depth, - range: text.anchor_before(range.start)..text.anchor_after(range.end), - } - .insert(text, &mut changed_regions); + changed_regions.insert( + ChangedRegion { + depth, + range: text.anchor_before(range.start)..text.anchor_after(range.end), + }, + text, + ); } get_injections( config, @@ -571,19 +568,6 @@ fn get_injections( result } -fn layer_is_changed( - layer: &SyntaxLayer, - text: &BufferSnapshot, - changed_regions: &[ChangedRegion], -) -> bool { - changed_regions.iter().any(|region| { - let same_depth = region.depth == layer.depth; - let is_before_layer = region.range.end.cmp(&layer.range.start, text).is_le(); - let is_after_layer = region.range.start.cmp(&layer.range.end, text).is_ge(); - same_depth && !is_before_layer && !is_after_layer - }) -} - impl std::ops::Deref for SyntaxMap { type Target = SyntaxSnapshot; @@ -625,12 +609,6 @@ impl ReparseStep { } impl ChangedRegion { - fn insert(self, text: &BufferSnapshot, set: &mut Vec) { - if let Err(ix) = set.binary_search_by(|probe| probe.cmp(&self, text)) { - set.insert(ix, self); - } - } - fn cmp(&self, other: &Self, buffer: &BufferSnapshot) -> Ordering { let range_a = &self.range; let range_b = &other.range; @@ -640,6 +618,55 @@ impl ChangedRegion { } } +impl ChangeRegionSet { + fn start_position(&self) -> DepthAndMaxPosition { + self.0 + .first() + .map_or(DepthAndMaxPosition(usize::MAX, Anchor::MAX), |region| { + DepthAndMaxPosition(region.depth, region.range.start) + }) + } + + fn intersects(&self, layer: &SyntaxLayer, text: &BufferSnapshot) -> bool { + for region in &self.0 { + if region.depth < layer.depth { + continue; + } + if region.depth > layer.depth { + break; + } + if region.range.end.cmp(&layer.range.start, text).is_le() { + continue; + } + if region.range.start.cmp(&layer.range.end, text).is_ge() { + break; + } + return true; + } + false + } + + fn insert(&mut self, region: ChangedRegion, text: &BufferSnapshot) { + if let Err(ix) = self.0.binary_search_by(|probe| probe.cmp(®ion, text)) { + self.0.insert(ix, region); + } + } + + fn prune(&mut self, summary: SyntaxLayerSummary, text: &BufferSnapshot) -> bool { + let prev_len = self.0.len(); + self.0.retain(|region| { + region.depth > summary.max_depth + || (region.depth == summary.max_depth + && region + .range + .end + .cmp(&summary.last_layer_range.start, text) + .is_gt()) + }); + self.0.len() < prev_len + } +} + impl Default for SyntaxLayerSummary { fn default() -> Self { Self { @@ -866,16 +893,18 @@ mod tests { d!(D {}); e!(E {}); f!(F {}); + g!(G {}); } ", " fn a() { b!(B {}); c!(C {}); - «g!(G {}); - »d!(D {}); - e!(E {}); + d!(D {}); + « h!(H {}); + » e!(E {}); f!(F {}); + g!(G {}); } ", ]); @@ -888,10 +917,11 @@ mod tests { fn a() { b!(«B {}»); c!(«C {}»); - g!(«G {}»); d!(«D {}»); + h!(«H {}»); e!(«E {}»); f!(«F {}»); + g!(«G {}»); } ", ); @@ -989,6 +1019,58 @@ mod tests { ]); } + #[gpui::test] + fn test_creating_many_injections_in_one_edit() { + test_edit_sequence(&[ + " + fn a() { + one(Two::three(3)); + four(Five::six(6)); + seven(Eight::nine(9)); + } + ", + " + fn a() { + one«!»(Two::three(3)); + four«!»(Five::six(6)); + seven«!»(Eight::nine(9)); + } + ", + " + fn a() { + one!(Two::three«!»(3)); + four!(Five::six«!»(6)); + seven!(Eight::nine«!»(9)); + } + ", + ]); + } + + #[gpui::test] + fn test_editing_across_injection_boundary() { + test_edit_sequence(&[ + " + fn one() { + two(); + three!( + three.four, + five.six, + ); + } + ", + " + fn one() { + two(); + th«irty_five![» + three.four, + five.six, + « seven.eight, + ];» + } + ", + ]); + } + fn test_edit_sequence(steps: &[&str]) -> (Buffer, SyntaxMap) { let registry = Arc::new(LanguageRegistry::test()); let language = Arc::new(rust_lang()); From 9113c94371430ef07fb412aa766ae77db7e164a9 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 23 Aug 2022 14:26:09 -0700 Subject: [PATCH 08/22] Add SyntaxMap methods for running queries and combining their results --- crates/language/src/syntax_map.rs | 241 +++++++++++++++++++++++++++--- 1 file changed, 224 insertions(+), 17 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 8800bb5cd885afaa1c5215adb309e4c1398e4094..a578d36a382144dc5bece35448254ab974eb26fa 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -3,11 +3,19 @@ use crate::{ ToTreeSitterPoint, }; use std::{ - borrow::Cow, cell::RefCell, cmp::Ordering, collections::BinaryHeap, ops::Range, sync::Arc, + borrow::Cow, + cell::RefCell, + cmp::{Ordering, Reverse}, + collections::BinaryHeap, + iter::Peekable, + ops::{DerefMut, Range}, + sync::Arc, }; use sum_tree::{Bias, SeekTarget, SumTree}; use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; -use tree_sitter::{Node, Parser, Tree}; +use tree_sitter::{ + Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatch, QueryMatches, Tree, +}; thread_local! { static PARSER: RefCell = RefCell::new(Parser::new()); @@ -26,6 +34,42 @@ pub struct SyntaxSnapshot { layers: SumTree, } +pub struct SyntaxMapCaptures<'a> { + layers: Vec>, +} + +pub struct SyntaxMapMatches<'a> { + layers: Vec>, +} + +pub struct SyntaxMapCapture<'a> { + pub grammar: &'a Grammar, + pub depth: usize, + pub node: Node<'a>, + pub index: u32, +} + +pub struct SyntaxMapMatch<'a> { + pub grammar: &'a Grammar, + pub depth: usize, + pub pattern_index: usize, + pub captures: &'a [QueryCapture<'a>], +} + +struct SyntaxMapCapturesLayer<'a> { + depth: usize, + captures: Peekable>>, + grammar: &'a Grammar, + _query_cursor: QueryCursorHandle, +} + +struct SyntaxMapMatchesLayer<'a> { + depth: usize, + matches: Peekable>>, + grammar: &'a Grammar, + _query_cursor: QueryCursorHandle, +} + #[derive(Clone)] struct SyntaxLayer { depth: usize, @@ -385,6 +429,100 @@ impl SyntaxSnapshot { self.layers = layers; } + pub fn captures<'a>( + &'a self, + range: Range, + buffer: &'a BufferSnapshot, + query: impl Fn(&Grammar) -> Option<&Query>, + ) -> SyntaxMapCaptures { + let mut result = SyntaxMapCaptures { layers: Vec::new() }; + for (grammar, depth, node) in self.layers_for_range(range.clone(), buffer) { + let query = if let Some(query) = query(grammar) { + query + } else { + continue; + }; + + let mut query_cursor = QueryCursorHandle::new(); + + // TODO - add a Tree-sitter API to remove the need for this. + let cursor = unsafe { + std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) + }; + + cursor.set_byte_range(range.clone()); + let captures = cursor.captures(query, node, TextProvider(buffer.as_rope())); + let mut layer = SyntaxMapCapturesLayer { + depth, + grammar, + captures: captures.peekable(), + _query_cursor: query_cursor, + }; + + if let Some(key) = layer.sort_key() { + let mut ix = 0; + while let Some(next_layer) = result.layers.get_mut(ix) { + if let Some(next_key) = next_layer.sort_key() { + if key > next_key { + ix += 1; + continue; + } + } + break; + } + result.layers.insert(ix, layer); + } + } + result + } + + pub fn matches<'a>( + &'a self, + range: Range, + buffer: &'a BufferSnapshot, + query: impl Fn(&Grammar) -> Option<&Query>, + ) -> SyntaxMapMatches { + let mut result = SyntaxMapMatches { layers: Vec::new() }; + for (grammar, depth, node) in self.layers_for_range(range.clone(), buffer) { + let query = if let Some(query) = query(grammar) { + query + } else { + continue; + }; + + let mut query_cursor = QueryCursorHandle::new(); + + // TODO - add a Tree-sitter API to remove the need for this. + let cursor = unsafe { + std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) + }; + + cursor.set_byte_range(range.clone()); + let matches = cursor.matches(query, node, TextProvider(buffer.as_rope())); + let mut layer = SyntaxMapMatchesLayer { + depth, + grammar, + matches: matches.peekable(), + _query_cursor: query_cursor, + }; + + if let Some(key) = layer.sort_key() { + let mut ix = 0; + while let Some(next_layer) = result.layers.get_mut(ix) { + if let Some(next_key) = next_layer.sort_key() { + if key > next_key { + ix += 1; + continue; + } + } + break; + } + result.layers.insert(ix, layer); + } + } + result + } + pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, Node)> { self.layers .iter() @@ -408,7 +546,7 @@ impl SyntaxSnapshot { &self, range: Range, buffer: &BufferSnapshot, - ) -> Vec<(&Grammar, Node)> { + ) -> Vec<(&Grammar, usize, Node)> { let start = buffer.anchor_before(range.start.to_offset(buffer)); let end = buffer.anchor_after(range.end.to_offset(buffer)); @@ -424,6 +562,7 @@ impl SyntaxSnapshot { if let Some(grammar) = &layer.language.grammar { result.push(( grammar.as_ref(), + layer.depth, layer.tree.root_node_with_offset( layer.range.start.to_offset(buffer), layer.range.start.to_point(buffer).to_ts_point(), @@ -437,6 +576,60 @@ impl SyntaxSnapshot { } } +impl<'a> Iterator for SyntaxMapCaptures<'a> { + type Item = SyntaxMapCapture<'a>; + + fn next(&mut self) -> Option { + let layer = self.layers.first_mut()?; + let (mat, ix) = layer.captures.next()?; + + let capture = mat.captures[ix as usize]; + let grammar = layer.grammar; + let depth = layer.depth; + + if let Some(key) = layer.sort_key() { + let mut i = 1; + while let Some(later_layer) = self.layers.get_mut(i) { + if let Some(later_key) = later_layer.sort_key() { + if key > later_key { + i += 1; + continue; + } + } + break; + } + if i > 1 { + self.layers[0..i].rotate_left(1); + } + } else { + self.layers.remove(0); + } + + Some(SyntaxMapCapture { + grammar, + depth, + node: capture.node, + index: capture.index, + }) + } +} + +impl<'a> SyntaxMapCapturesLayer<'a> { + fn sort_key(&mut self) -> Option<(usize, Reverse, usize)> { + let (mat, ix) = self.captures.peek()?; + let range = &mat.captures[*ix].node.byte_range(); + Some((range.start, Reverse(range.end), self.depth)) + } +} + +impl<'a> SyntaxMapMatchesLayer<'a> { + fn sort_key(&mut self) -> Option<(usize, Reverse, usize)> { + let mat = self.matches.peek()?; + let range = mat.captures.first()?.node.start_byte()..mat.captures.last()?.node.end_byte(); + Some((range.start, Reverse(range.end), self.depth)) + } +} + fn join_ranges( a: impl Iterator>, b: impl Iterator>, @@ -875,10 +1068,10 @@ mod tests { "fn a() { dbg!(b.c(vec![d.«e»])) }", ]); - assert_node_ranges( + assert_capture_ranges( &syntax_map, &buffer, - "(field_identifier) @_", + &["field"], "fn a() { dbg!(b.«c»(vec![d.«e»])) }", ); } @@ -909,10 +1102,10 @@ mod tests { ", ]); - assert_node_ranges( + assert_capture_ranges( &syntax_map, &buffer, - "(struct_expression) @_", + &["struct"], " fn a() { b!(«B {}»); @@ -952,10 +1145,10 @@ mod tests { ", ]); - assert_node_ranges( + assert_capture_ranges( &syntax_map, &buffer, - "(field_identifier) @_", + &["field"], " fn a() { b!( @@ -1129,6 +1322,13 @@ mod tests { }, Some(tree_sitter_rust::language()), ) + .with_highlights_query( + r#" + (field_identifier) @field + (struct_expression) @struct + "#, + ) + .unwrap() .with_injection_query( r#" (macro_invocation @@ -1156,7 +1356,7 @@ mod tests { expected_layers.len(), "wrong number of layers" ); - for (i, ((_, node), expected_s_exp)) in + for (i, ((_, _, node), expected_s_exp)) in layers.iter().zip(expected_layers.iter()).enumerate() { let actual_s_exp = node.to_sexp(); @@ -1170,18 +1370,25 @@ mod tests { } } - fn assert_node_ranges( + fn assert_capture_ranges( syntax_map: &SyntaxMap, buffer: &BufferSnapshot, - query: &str, + highlight_query_capture_names: &[&str], marked_string: &str, ) { - let mut cursor = QueryCursorHandle::new(); let mut actual_ranges = Vec::>::new(); - for (grammar, node) in syntax_map.layers(buffer) { - let query = Query::new(grammar.ts_language, query).unwrap(); - for (mat, ix) in cursor.captures(&query, node, TextProvider(buffer.as_rope())) { - actual_ranges.push(mat.captures[ix].node.byte_range()); + for capture in syntax_map.captures(0..buffer.len(), buffer, |grammar| { + grammar.highlights_query.as_ref() + }) { + let name = &capture + .grammar + .highlights_query + .as_ref() + .unwrap() + .capture_names()[capture.index as usize]; + dbg!(capture.node, capture.index, name); + if highlight_query_capture_names.contains(&name.as_str()) { + actual_ranges.push(capture.node.byte_range()); } } From ced45cbb0a16e0fa45a999d64dc7bc9dbc1e57fa Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 23 Aug 2022 17:09:13 -0700 Subject: [PATCH 09/22] Use SyntaxMap in Buffer --- crates/language/src/buffer.rs | 614 ++++++++----------- crates/language/src/language.rs | 144 ++++- crates/language/src/syntax_map.rs | 571 ++++++++++++----- crates/language/src/tests.rs | 4 +- crates/project/src/project.rs | 1 + crates/zed/src/languages/rust/injections.scm | 6 +- 6 files changed, 819 insertions(+), 521 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index b7a1bd30fcdf83ed83f40233a292aa18dbe44df7..7b298b74207ee73ef2bf4a50523267d296efa2f1 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -6,13 +6,15 @@ pub use crate::{ use crate::{ diagnostic_set::{DiagnosticEntry, DiagnosticGroup}, outline::OutlineItem, + syntax_map::{ + SyntaxMap, SyntaxMapCapture, SyntaxMapCaptures, SyntaxSnapshot, ToTreeSitterPoint, + }, CodeLabel, Outline, }; use anyhow::{anyhow, Result}; use clock::ReplicaId; use futures::FutureExt as _; use gpui::{fonts::HighlightStyle, AppContext, Entity, ModelContext, MutableAppContext, Task}; -use lazy_static::lazy_static; use parking_lot::Mutex; use settings::Settings; use similar::{ChangeTag, TextDiff}; @@ -25,7 +27,7 @@ use std::{ future::Future, iter::{self, Iterator, Peekable}, mem, - ops::{Deref, DerefMut, Range}, + ops::{Deref, Range}, path::{Path, PathBuf}, str, sync::Arc, @@ -36,7 +38,6 @@ use sum_tree::TreeMap; use text::operation_queue::OperationQueue; pub use text::{Buffer as TextBuffer, BufferSnapshot as TextBufferSnapshot, Operation as _, *}; use theme::SyntaxTheme; -use tree_sitter::{InputEdit, QueryCursor, Tree}; use util::TryFutureExt as _; #[cfg(any(test, feature = "test-support"))] @@ -44,10 +45,6 @@ pub use {tree_sitter_rust, tree_sitter_typescript}; pub use lsp::DiagnosticSeverity; -lazy_static! { - static ref QUERY_CURSORS: Mutex> = Default::default(); -} - pub struct Buffer { text: TextBuffer, file: Option>, @@ -60,7 +57,7 @@ pub struct Buffer { autoindent_requests: Vec>, pending_autoindent: Option>, sync_parse_timeout: Duration, - syntax_tree: Mutex>, + syntax_map: Mutex, parsing_in_background: bool, parse_count: usize, diagnostics: DiagnosticSet, @@ -75,7 +72,7 @@ pub struct Buffer { pub struct BufferSnapshot { text: text::BufferSnapshot, - tree: Option, + syntax: SyntaxSnapshot, file: Option>, diagnostics: DiagnosticSet, diagnostics_update_count: usize, @@ -221,14 +218,6 @@ pub trait LocalFile: File { ); } -pub(crate) struct QueryCursorHandle(Option); - -#[derive(Clone)] -struct SyntaxTree { - tree: Tree, - version: clock::Global, -} - #[derive(Clone, Debug)] pub enum AutoindentMode { /// Indent each line of inserted text. @@ -268,14 +257,11 @@ struct IndentSuggestion { delta: Ordering, } -pub(crate) struct TextProvider<'a>(pub(crate) &'a Rope); - struct BufferChunkHighlights<'a> { - captures: tree_sitter::QueryCaptures<'a, 'a, TextProvider<'a>>, - next_capture: Option<(tree_sitter::QueryMatch<'a, 'a>, usize)>, + captures: SyntaxMapCaptures<'a>, + next_capture: Option>, stack: Vec<(usize, HighlightId)>, - highlight_map: HighlightMap, - _query_cursor: QueryCursorHandle, + highlight_maps: Vec, } pub struct BufferChunks<'a> { @@ -456,7 +442,7 @@ impl Buffer { was_dirty_before_starting_transaction: None, text: buffer, file, - syntax_tree: Mutex::new(None), + syntax_map: Mutex::new(SyntaxMap::new()), parsing_in_background: false, parse_count: 0, sync_parse_timeout: Duration::from_millis(1), @@ -477,7 +463,7 @@ impl Buffer { pub fn snapshot(&self) -> BufferSnapshot { BufferSnapshot { text: self.text.snapshot(), - tree: self.syntax_tree(), + syntax: self.syntax_map(), file: self.file.clone(), remote_selections: self.remote_selections.clone(), diagnostics: self.diagnostics.clone(), @@ -533,11 +519,17 @@ impl Buffer { } pub fn set_language(&mut self, language: Option>, cx: &mut ModelContext) { - *self.syntax_tree.lock() = None; + self.syntax_map.lock().clear(); self.language = language; self.reparse(cx); } + pub fn set_language_registry(&mut self, language_registry: Arc) { + self.syntax_map + .lock() + .set_language_registry(language_registry); + } + pub fn did_save( &mut self, version: clock::Global, @@ -682,13 +674,10 @@ impl Buffer { self.file_update_count } - pub(crate) fn syntax_tree(&self) -> Option { - if let Some(syntax_tree) = self.syntax_tree.lock().as_mut() { - self.interpolate_tree(syntax_tree); - Some(syntax_tree.tree.clone()) - } else { - None - } + pub(crate) fn syntax_map(&self) -> SyntaxSnapshot { + let mut syntax_map = self.syntax_map.lock(); + syntax_map.interpolate(&self.text_snapshot()); + syntax_map.snapshot() } #[cfg(any(test, feature = "test-support"))] @@ -706,35 +695,49 @@ impl Buffer { return false; } - if let Some(grammar) = self.grammar().cloned() { - let old_tree = self.syntax_tree(); - let text = self.as_rope().clone(); + if let Some(language) = self.language.clone() { + let text = self.text_snapshot(); let parsed_version = self.version(); + + let mut syntax_map; + let language_registry; + let syntax_map_version; + { + let mut map = self.syntax_map.lock(); + map.interpolate(&text); + language_registry = map.language_registry(); + syntax_map = map.snapshot(); + syntax_map_version = map.parsed_version(); + } let parse_task = cx.background().spawn({ - let grammar = grammar.clone(); - async move { grammar.parse_text(&text, old_tree) } + let language = language.clone(); + async move { + syntax_map.reparse(&syntax_map_version, &text, language_registry, language); + syntax_map + } }); match cx .background() .block_with_timeout(self.sync_parse_timeout, parse_task) { - Ok(new_tree) => { - self.did_finish_parsing(new_tree, parsed_version, cx); + Ok(new_syntax_map) => { + self.did_finish_parsing(new_syntax_map, parsed_version, cx); return true; } Err(parse_task) => { self.parsing_in_background = true; cx.spawn(move |this, mut cx| async move { - let new_tree = parse_task.await; + let new_syntax_map = parse_task.await; this.update(&mut cx, move |this, cx| { - let grammar_changed = this - .grammar() - .map_or(true, |curr_grammar| !Arc::ptr_eq(&grammar, curr_grammar)); + let grammar_changed = + this.language.as_ref().map_or(true, |current_language| { + !Arc::ptr_eq(&language, current_language) + }); let parse_again = this.version.changed_since(&parsed_version) || grammar_changed; this.parsing_in_background = false; - this.did_finish_parsing(new_tree, parsed_version, cx); + this.did_finish_parsing(new_syntax_map, parsed_version, cx); if parse_again && this.reparse(cx) {} }); @@ -746,30 +749,14 @@ impl Buffer { false } - fn interpolate_tree(&self, tree: &mut SyntaxTree) { - for edit in self.edits_since::<(usize, Point)>(&tree.version) { - let (bytes, lines) = edit.flatten(); - tree.tree.edit(&InputEdit { - start_byte: bytes.new.start, - old_end_byte: bytes.new.start + bytes.old.len(), - new_end_byte: bytes.new.end, - start_position: lines.new.start.to_ts_point(), - old_end_position: (lines.new.start + (lines.old.end - lines.old.start)) - .to_ts_point(), - new_end_position: lines.new.end.to_ts_point(), - }); - } - tree.version = self.version(); - } - fn did_finish_parsing( &mut self, - tree: Tree, + syntax_map: SyntaxSnapshot, version: clock::Global, cx: &mut ModelContext, ) { self.parse_count += 1; - *self.syntax_tree.lock() = Some(SyntaxTree { tree, version }); + self.syntax_map.lock().did_parse(syntax_map, version); self.request_autoindent(cx); cx.emit(Event::Reparsed); cx.notify(); @@ -808,10 +795,7 @@ impl Buffer { fn compute_autoindents(&self) -> Option>> { let max_rows_between_yields = 100; let snapshot = self.snapshot(); - if snapshot.language.is_none() - || snapshot.tree.is_none() - || self.autoindent_requests.is_empty() - { + if snapshot.syntax.is_empty() || self.autoindent_requests.is_empty() { return None; } @@ -1310,10 +1294,6 @@ impl Buffer { cx.notify(); } - fn grammar(&self) -> Option<&Arc> { - self.language.as_ref().and_then(|l| l.grammar.as_ref()) - } - pub fn apply_ops>( &mut self, ops: I, @@ -1654,32 +1634,30 @@ impl BufferSnapshot { let prev_non_blank_row = self.prev_non_blank_row(row_range.start); // Find the suggested indentation ranges based on the syntax tree. - let indents_query = grammar.indents_query.as_ref()?; - let mut query_cursor = QueryCursorHandle::new(); - let indent_capture_ix = indents_query.capture_index_for_name("indent"); - let end_capture_ix = indents_query.capture_index_for_name("end"); - query_cursor.set_point_range( - Point::new(prev_non_blank_row.unwrap_or(row_range.start), 0).to_ts_point() - ..Point::new(row_range.end, 0).to_ts_point(), - ); + let start = Point::new(prev_non_blank_row.unwrap_or(row_range.start), 0); + let end = Point::new(row_range.end, 0); + let range = (start..end).to_offset(&self.text); + let mut matches = self.syntax.matches(range, &self.text, |grammar| { + Some(&grammar.indents_config.as_ref()?.query) + }); let mut indent_ranges = Vec::>::new(); - for mat in query_cursor.matches( - indents_query, - self.tree.as_ref()?.root_node(), - TextProvider(self.as_rope()), - ) { + while let Some(mat) = matches.peek() { let mut start: Option = None; let mut end: Option = None; - for capture in mat.captures { - if Some(capture.index) == indent_capture_ix { - start.get_or_insert(Point::from_ts_point(capture.node.start_position())); - end.get_or_insert(Point::from_ts_point(capture.node.end_position())); - } else if Some(capture.index) == end_capture_ix { - end = Some(Point::from_ts_point(capture.node.start_position())); + + if let Some(config) = &grammar.indents_config { + for capture in mat.captures { + if capture.index == config.indent_capture_ix { + start.get_or_insert(Point::from_ts_point(capture.node.start_position())); + end.get_or_insert(Point::from_ts_point(capture.node.end_position())); + } else if Some(capture.index) == config.end_capture_ix { + end = Some(Point::from_ts_point(capture.node.start_position())); + } } } + matches.advance(); if let Some((start, end)) = start.zip(end) { if start.row == end.row { continue; @@ -1811,10 +1789,18 @@ impl BufferSnapshot { pub fn chunks(&self, range: Range, language_aware: bool) -> BufferChunks { let range = range.start.to_offset(self)..range.end.to_offset(self); - let mut tree = None; + let mut syntax = None; let mut diagnostic_endpoints = Vec::new(); if language_aware { - tree = self.tree.as_ref(); + let captures = self.syntax.captures(range.clone(), &self.text, |grammar| { + grammar.highlights_query.as_ref() + }); + let highlight_maps = captures + .grammars() + .into_iter() + .map(|grammar| grammar.highlight_map()) + .collect(); + syntax = Some((captures, highlight_maps)); for entry in self.diagnostics_in_range::<_, usize>(range.clone(), false) { diagnostic_endpoints.push(DiagnosticEndpoint { offset: entry.range.start, @@ -1833,13 +1819,7 @@ impl BufferSnapshot { .sort_unstable_by_key(|endpoint| (endpoint.offset, !endpoint.is_start)); } - BufferChunks::new( - self.text.as_rope(), - range, - tree, - self.grammar(), - diagnostic_endpoints, - ) + BufferChunks::new(self.text.as_rope(), range, syntax, diagnostic_endpoints) } pub fn for_each_line(&self, range: Range, mut callback: impl FnMut(u32, &str)) { @@ -1865,12 +1845,6 @@ impl BufferSnapshot { self.language.as_ref() } - fn grammar(&self) -> Option<&Arc> { - self.language - .as_ref() - .and_then(|language| language.grammar.as_ref()) - } - pub fn surrounding_word(&self, start: T) -> (Range, Option) { let mut start = start.to_offset(self); let mut end = start; @@ -1901,61 +1875,71 @@ impl BufferSnapshot { } pub fn range_for_syntax_ancestor(&self, range: Range) -> Option> { - let tree = self.tree.as_ref()?; let range = range.start.to_offset(self)..range.end.to_offset(self); - let mut cursor = tree.root_node().walk(); - - // Descend to the first leaf that touches the start of the range, - // and if the range is non-empty, extends beyond the start. - while cursor.goto_first_child_for_byte(range.start).is_some() { - if !range.is_empty() && cursor.node().end_byte() == range.start { - cursor.goto_next_sibling(); + let mut result: Option> = None; + 'outer: for (_, _, node) in self.syntax.layers_for_range(range.clone(), &self.text) { + let mut cursor = node.walk(); + + // Descend to the first leaf that touches the start of the range, + // and if the range is non-empty, extends beyond the start. + while cursor.goto_first_child_for_byte(range.start).is_some() { + if !range.is_empty() && cursor.node().end_byte() == range.start { + cursor.goto_next_sibling(); + } } - } - // Ascend to the smallest ancestor that strictly contains the range. - loop { - let node_range = cursor.node().byte_range(); - if node_range.start <= range.start - && node_range.end >= range.end - && node_range.len() > range.len() - { - break; - } - if !cursor.goto_parent() { - break; + // Ascend to the smallest ancestor that strictly contains the range. + loop { + let node_range = cursor.node().byte_range(); + if node_range.start <= range.start + && node_range.end >= range.end + && node_range.len() > range.len() + { + break; + } + if !cursor.goto_parent() { + continue 'outer; + } } - } - let left_node = cursor.node(); + let left_node = cursor.node(); + let mut layer_result = left_node.byte_range(); - // For an empty range, try to find another node immediately to the right of the range. - if left_node.end_byte() == range.start { - let mut right_node = None; - while !cursor.goto_next_sibling() { - if !cursor.goto_parent() { - break; + // For an empty range, try to find another node immediately to the right of the range. + if left_node.end_byte() == range.start { + let mut right_node = None; + while !cursor.goto_next_sibling() { + if !cursor.goto_parent() { + break; + } } - } - while cursor.node().start_byte() == range.start { - right_node = Some(cursor.node()); - if !cursor.goto_first_child() { - break; + while cursor.node().start_byte() == range.start { + right_node = Some(cursor.node()); + if !cursor.goto_first_child() { + break; + } + } + + // If there is a candidate node on both sides of the (empty) range, then + // decide between the two by favoring a named node over an anonymous token. + // If both nodes are the same in that regard, favor the right one. + if let Some(right_node) = right_node { + if right_node.is_named() || !left_node.is_named() { + layer_result = right_node.byte_range(); + } } } - // If there is a candidate node on both sides of the (empty) range, then - // decide between the two by favoring a named node over an anonymous token. - // If both nodes are the same in that regard, favor the right one. - if let Some(right_node) = right_node { - if right_node.is_named() || !left_node.is_named() { - return Some(right_node.byte_range()); + if let Some(previous_result) = &result { + if previous_result.len() < layer_result.len() { + continue; } } + result = Some(layer_result); } - Some(left_node.byte_range()) + result } pub fn outline(&self, theme: Option<&SyntaxTheme>) -> Option> { @@ -1985,109 +1969,107 @@ impl BufferSnapshot { range: Range, theme: Option<&SyntaxTheme>, ) -> Option>> { - let tree = self.tree.as_ref()?; - let grammar = self - .language - .as_ref() - .and_then(|language| language.grammar.as_ref())?; - - let outline_query = grammar.outline_query.as_ref()?; - let mut cursor = QueryCursorHandle::new(); - cursor.set_byte_range(range.clone()); - let matches = cursor.matches( - outline_query, - tree.root_node(), - TextProvider(self.as_rope()), - ); + let mut matches = self.syntax.matches(range.clone(), &self.text, |grammar| { + grammar.outline_config.as_ref().map(|c| &c.query) + }); + let configs = matches + .grammars() + .iter() + .map(|g| g.outline_config.as_ref().unwrap()) + .collect::>(); let mut chunks = self.chunks(0..self.len(), true); + let mut stack = Vec::>::new(); + let mut items = Vec::new(); + while let Some(mat) = matches.peek() { + let config = &configs[mat.grammar_index]; + let item_node = mat.captures.iter().find_map(|cap| { + if cap.index == config.item_capture_ix { + Some(cap.node) + } else { + None + } + })?; - let item_capture_ix = outline_query.capture_index_for_name("item")?; - let name_capture_ix = outline_query.capture_index_for_name("name")?; - let context_capture_ix = outline_query - .capture_index_for_name("context") - .unwrap_or(u32::MAX); + let item_range = item_node.byte_range(); + if item_range.end < range.start || item_range.start > range.end { + matches.advance(); + continue; + } - let mut stack = Vec::>::new(); - let items = matches - .filter_map(|mat| { - let item_node = mat.nodes_for_capture_index(item_capture_ix).next()?; - let item_range = item_node.start_byte()..item_node.end_byte(); - if item_range.end < range.start || item_range.start > range.end { - return None; + // TODO - move later, after processing captures + + let mut text = String::new(); + let mut name_ranges = Vec::new(); + let mut highlight_ranges = Vec::new(); + for capture in mat.captures { + let node_is_name; + if capture.index == config.name_capture_ix { + node_is_name = true; + } else if Some(capture.index) == config.context_capture_ix { + node_is_name = false; + } else { + continue; } - let mut text = String::new(); - let mut name_ranges = Vec::new(); - let mut highlight_ranges = Vec::new(); - for capture in mat.captures { - let node_is_name; - if capture.index == name_capture_ix { - node_is_name = true; - } else if capture.index == context_capture_ix { - node_is_name = false; - } else { - continue; + let range = capture.node.start_byte()..capture.node.end_byte(); + if !text.is_empty() { + text.push(' '); + } + if node_is_name { + let mut start = text.len(); + let end = start + range.len(); + + // When multiple names are captured, then the matcheable text + // includes the whitespace in between the names. + if !name_ranges.is_empty() { + start -= 1; } - let range = capture.node.start_byte()..capture.node.end_byte(); - if !text.is_empty() { - text.push(' '); - } - if node_is_name { - let mut start = text.len(); - let end = start + range.len(); - - // When multiple names are captured, then the matcheable text - // includes the whitespace in between the names. - if !name_ranges.is_empty() { - start -= 1; - } + name_ranges.push(start..end); + } - name_ranges.push(start..end); + let mut offset = range.start; + chunks.seek(offset); + for mut chunk in chunks.by_ref() { + if chunk.text.len() > range.end - offset { + chunk.text = &chunk.text[0..(range.end - offset)]; + offset = range.end; + } else { + offset += chunk.text.len(); } - - let mut offset = range.start; - chunks.seek(offset); - for mut chunk in chunks.by_ref() { - if chunk.text.len() > range.end - offset { - chunk.text = &chunk.text[0..(range.end - offset)]; - offset = range.end; - } else { - offset += chunk.text.len(); - } - let style = chunk - .syntax_highlight_id - .zip(theme) - .and_then(|(highlight, theme)| highlight.style(theme)); - if let Some(style) = style { - let start = text.len(); - let end = start + chunk.text.len(); - highlight_ranges.push((start..end, style)); - } - text.push_str(chunk.text); - if offset >= range.end { - break; - } + let style = chunk + .syntax_highlight_id + .zip(theme) + .and_then(|(highlight, theme)| highlight.style(theme)); + if let Some(style) = style { + let start = text.len(); + let end = start + chunk.text.len(); + highlight_ranges.push((start..end, style)); + } + text.push_str(chunk.text); + if offset >= range.end { + break; } } + } - while stack.last().map_or(false, |prev_range| { - prev_range.start > item_range.start || prev_range.end < item_range.end - }) { - stack.pop(); - } - stack.push(item_range.clone()); - - Some(OutlineItem { - depth: stack.len() - 1, - range: self.anchor_after(item_range.start)..self.anchor_before(item_range.end), - text, - highlight_ranges, - name_ranges, - }) + matches.advance(); + while stack.last().map_or(false, |prev_range| { + prev_range.start > item_range.start || prev_range.end < item_range.end + }) { + stack.pop(); + } + stack.push(item_range.clone()); + + items.push(OutlineItem { + depth: stack.len() - 1, + range: self.anchor_after(item_range.start)..self.anchor_before(item_range.end), + text, + highlight_ranges, + name_ranges, }) - .collect::>(); + } Some(items) } @@ -2095,28 +2077,48 @@ impl BufferSnapshot { &self, range: Range, ) -> Option<(Range, Range)> { - let (grammar, tree) = self.grammar().zip(self.tree.as_ref())?; - let brackets_query = grammar.brackets_query.as_ref()?; - let open_capture_ix = brackets_query.capture_index_for_name("open")?; - let close_capture_ix = brackets_query.capture_index_for_name("close")?; - // Find bracket pairs that *inclusively* contain the given range. let range = range.start.to_offset(self).saturating_sub(1)..range.end.to_offset(self) + 1; - let mut cursor = QueryCursorHandle::new(); - let matches = cursor.set_byte_range(range).matches( - brackets_query, - tree.root_node(), - TextProvider(self.as_rope()), - ); + let mut matches = self.syntax.matches(range, &self.text, |grammar| { + grammar.brackets_config.as_ref().map(|c| &c.query) + }); + let configs = matches + .grammars() + .iter() + .map(|grammar| grammar.brackets_config.as_ref().unwrap()) + .collect::>(); // Get the ranges of the innermost pair of brackets. - matches - .filter_map(|mat| { - let open = mat.nodes_for_capture_index(open_capture_ix).next()?; - let close = mat.nodes_for_capture_index(close_capture_ix).next()?; - Some((open.byte_range(), close.byte_range())) - }) - .min_by_key(|(open_range, close_range)| close_range.end - open_range.start) + let mut result: Option<(Range, Range)> = None; + while let Some(mat) = matches.peek() { + let mut open = None; + let mut close = None; + let config = &configs[mat.grammar_index]; + for capture in mat.captures { + if capture.index == config.open_capture_ix { + open = Some(capture.node.byte_range()); + } else if capture.index == config.close_capture_ix { + close = Some(capture.node.byte_range()); + } + } + + matches.advance(); + + if let Some((open, close)) = open.zip(close) { + let len = close.end - open.start; + + if let Some((existing_open, existing_close)) = &result { + let existing_len = existing_close.end - existing_open.start; + if len > existing_len { + continue; + } + } + + result = Some((open, close)); + } + } + + result } #[allow(clippy::type_complexity)] @@ -2228,7 +2230,7 @@ impl Clone for BufferSnapshot { fn clone(&self) -> Self { Self { text: self.text.clone(), - tree: self.tree.clone(), + syntax: self.syntax.clone(), file: self.file.clone(), remote_selections: self.remote_selections.clone(), diagnostics: self.diagnostics.clone(), @@ -2249,56 +2251,23 @@ impl Deref for BufferSnapshot { } } -impl<'a> tree_sitter::TextProvider<'a> for TextProvider<'a> { - type I = ByteChunks<'a>; - - fn text(&mut self, node: tree_sitter::Node) -> Self::I { - ByteChunks(self.0.chunks_in_range(node.byte_range())) - } -} - -pub(crate) struct ByteChunks<'a>(rope::Chunks<'a>); - -impl<'a> Iterator for ByteChunks<'a> { - type Item = &'a [u8]; - - fn next(&mut self) -> Option { - self.0.next().map(str::as_bytes) - } -} - unsafe impl<'a> Send for BufferChunks<'a> {} impl<'a> BufferChunks<'a> { pub(crate) fn new( text: &'a Rope, range: Range, - tree: Option<&'a Tree>, - grammar: Option<&'a Arc>, + syntax: Option<(SyntaxMapCaptures<'a>, Vec)>, diagnostic_endpoints: Vec, ) -> Self { let mut highlights = None; - if let Some((grammar, tree)) = grammar.zip(tree) { - if let Some(highlights_query) = grammar.highlights_query.as_ref() { - let mut query_cursor = QueryCursorHandle::new(); - - // TODO - add a Tree-sitter API to remove the need for this. - let cursor = unsafe { - std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) - }; - let captures = cursor.set_byte_range(range.clone()).captures( - highlights_query, - tree.root_node(), - TextProvider(text), - ); - highlights = Some(BufferChunkHighlights { - captures, - next_capture: None, - stack: Default::default(), - highlight_map: grammar.highlight_map(), - _query_cursor: query_cursor, - }) - } + if let Some((captures, highlight_maps)) = syntax { + highlights = Some(BufferChunkHighlights { + captures, + next_capture: None, + stack: Default::default(), + highlight_maps, + }) } let diagnostic_endpoints = diagnostic_endpoints.into_iter().peekable(); @@ -2324,14 +2293,13 @@ impl<'a> BufferChunks<'a> { highlights .stack .retain(|(end_offset, _)| *end_offset > offset); - if let Some((mat, capture_ix)) = &highlights.next_capture { - let capture = mat.captures[*capture_ix as usize]; + if let Some(capture) = &highlights.next_capture { if offset >= capture.node.start_byte() { let next_capture_end = capture.node.end_byte(); if offset < next_capture_end { highlights.stack.push(( next_capture_end, - highlights.highlight_map.get(capture.index), + highlights.highlight_maps[capture.grammar_index].get(capture.index), )); } highlights.next_capture.take(); @@ -2407,13 +2375,13 @@ impl<'a> Iterator for BufferChunks<'a> { highlights.next_capture = highlights.captures.next(); } - while let Some((mat, capture_ix)) = highlights.next_capture.as_ref() { - let capture = mat.captures[*capture_ix as usize]; + while let Some(capture) = highlights.next_capture.as_ref() { if self.range.start < capture.node.start_byte() { next_capture_start = capture.node.start_byte(); break; } else { - let highlight_id = highlights.highlight_map.get(capture.index); + let highlight_id = + highlights.highlight_maps[capture.grammar_index].get(capture.index); highlights .stack .push((capture.node.end_byte(), highlight_id)); @@ -2465,52 +2433,6 @@ impl<'a> Iterator for BufferChunks<'a> { } } -impl QueryCursorHandle { - pub(crate) fn new() -> Self { - let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_else(QueryCursor::new); - cursor.set_match_limit(64); - QueryCursorHandle(Some(cursor)) - } -} - -impl Deref for QueryCursorHandle { - type Target = QueryCursor; - - fn deref(&self) -> &Self::Target { - self.0.as_ref().unwrap() - } -} - -impl DerefMut for QueryCursorHandle { - fn deref_mut(&mut self) -> &mut Self::Target { - self.0.as_mut().unwrap() - } -} - -impl Drop for QueryCursorHandle { - fn drop(&mut self) { - let mut cursor = self.0.take().unwrap(); - cursor.set_byte_range(0..usize::MAX); - cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point()); - QUERY_CURSORS.lock().push(cursor) - } -} - -pub(crate) trait ToTreeSitterPoint { - fn to_ts_point(self) -> tree_sitter::Point; - fn from_ts_point(point: tree_sitter::Point) -> Self; -} - -impl ToTreeSitterPoint for Point { - fn to_ts_point(self) -> tree_sitter::Point { - tree_sitter::Point::new(self.row as usize, self.column as usize) - } - - fn from_ts_point(point: tree_sitter::Point) -> Self { - Point::new(point.row as u32, point.column as u32) - } -} - impl operation_queue::Operation for Operation { fn lamport_timestamp(&self) -> clock::Lamport { match self { diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 8dcfc8fffdb083b962d903d85247859225f4dd85..780f6e75b52521bf751f7de1a748b91954001ac3 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -30,8 +30,12 @@ use std::{ ops::Range, path::{Path, PathBuf}, str, - sync::Arc, + sync::{ + atomic::{AtomicUsize, Ordering::SeqCst}, + Arc, + }, }; +use syntax_map::SyntaxSnapshot; use theme::{SyntaxTheme, Theme}; use tree_sitter::{self, Query}; use util::ResultExt; @@ -50,6 +54,7 @@ thread_local! { } lazy_static! { + pub static ref NEXT_GRAMMAR_ID: AtomicUsize = Default::default(); pub static ref PLAIN_TEXT: Arc = Arc::new(Language::new( LanguageConfig { name: "Plain Text".into(), @@ -286,15 +291,29 @@ pub struct Language { } pub struct Grammar { + id: usize, pub(crate) ts_language: tree_sitter::Language, pub(crate) highlights_query: Option, - pub(crate) brackets_query: Option, - pub(crate) indents_query: Option, - pub(crate) outline_query: Option, + pub(crate) brackets_config: Option, + pub(crate) indents_config: Option, + pub(crate) outline_config: Option, pub(crate) injection_config: Option, pub(crate) highlight_map: Mutex, } +struct IndentConfig { + query: Query, + indent_capture_ix: u32, + end_capture_ix: Option, +} + +struct OutlineConfig { + query: Query, + item_capture_ix: u32, + name_capture_ix: u32, + context_capture_ix: Option, +} + struct InjectionConfig { query: Query, content_capture_ix: u32, @@ -302,6 +321,12 @@ struct InjectionConfig { languages_by_pattern_ix: Vec>>, } +struct BracketConfig { + query: Query, + open_capture_ix: u32, + close_capture_ix: u32, +} + #[derive(Clone)] pub enum LanguageServerBinaryStatus { CheckingForUpdate, @@ -499,6 +524,13 @@ impl LanguageRegistry { } } +#[cfg(any(test, feature = "test-support"))] +impl Default for LanguageRegistry { + fn default() -> Self { + Self::test() + } +} + async fn get_server_binary_path( adapter: Arc, language: Arc, @@ -576,10 +608,11 @@ impl Language { config, grammar: ts_language.map(|ts_language| { Arc::new(Grammar { + id: NEXT_GRAMMAR_ID.fetch_add(1, SeqCst), highlights_query: None, - brackets_query: None, - indents_query: None, - outline_query: None, + brackets_config: None, + outline_config: None, + indents_config: None, injection_config: None, ts_language, highlight_map: Default::default(), @@ -604,19 +637,70 @@ impl Language { pub fn with_brackets_query(mut self, source: &str) -> Result { let grammar = self.grammar_mut(); - grammar.brackets_query = Some(Query::new(grammar.ts_language, source)?); + let query = Query::new(grammar.ts_language, source)?; + let mut open_capture_ix = None; + let mut close_capture_ix = None; + get_capture_indices( + &query, + &mut [ + ("open", &mut open_capture_ix), + ("close", &mut close_capture_ix), + ], + ); + if let Some((open_capture_ix, close_capture_ix)) = open_capture_ix.zip(close_capture_ix) { + grammar.brackets_config = Some(BracketConfig { + query, + open_capture_ix, + close_capture_ix, + }); + } Ok(self) } pub fn with_indents_query(mut self, source: &str) -> Result { let grammar = self.grammar_mut(); - grammar.indents_query = Some(Query::new(grammar.ts_language, source)?); + let query = Query::new(grammar.ts_language, source)?; + let mut indent_capture_ix = None; + let mut end_capture_ix = None; + get_capture_indices( + &query, + &mut [ + ("indent", &mut indent_capture_ix), + ("end", &mut end_capture_ix), + ], + ); + if let Some(indent_capture_ix) = indent_capture_ix { + grammar.indents_config = Some(IndentConfig { + query, + indent_capture_ix, + end_capture_ix, + }); + } Ok(self) } pub fn with_outline_query(mut self, source: &str) -> Result { let grammar = self.grammar_mut(); - grammar.outline_query = Some(Query::new(grammar.ts_language, source)?); + let query = Query::new(grammar.ts_language, source)?; + let mut item_capture_ix = None; + let mut name_capture_ix = None; + let mut context_capture_ix = None; + get_capture_indices( + &query, + &mut [ + ("item", &mut item_capture_ix), + ("name", &mut name_capture_ix), + ("context", &mut context_capture_ix), + ], + ); + if let Some((item_capture_ix, name_capture_ix)) = item_capture_ix.zip(name_capture_ix) { + grammar.outline_config = Some(OutlineConfig { + query, + item_capture_ix, + name_capture_ix, + context_capture_ix, + }); + } Ok(self) } @@ -625,13 +709,13 @@ impl Language { let query = Query::new(grammar.ts_language, source)?; let mut language_capture_ix = None; let mut content_capture_ix = None; - for (ix, name) in query.capture_names().iter().enumerate() { - *match name.as_str() { - "language" => &mut language_capture_ix, - "content" => &mut content_capture_ix, - _ => continue, - } = Some(ix as u32); - } + get_capture_indices( + &query, + &mut [ + ("language", &mut language_capture_ix), + ("content", &mut content_capture_ix), + ], + ); let languages_by_pattern_ix = (0..query.pattern_count()) .map(|ix| { query.property_settings(ix).iter().find_map(|setting| { @@ -729,9 +813,16 @@ impl Language { let mut result = Vec::new(); if let Some(grammar) = &self.grammar { let tree = grammar.parse_text(text, None); + let captures = SyntaxSnapshot::single_tree_captures( + range.clone(), + text, + &tree, + grammar, + |grammar| grammar.highlights_query.as_ref(), + ); + let highlight_maps = vec![grammar.highlight_map()]; let mut offset = 0; - for chunk in BufferChunks::new(text, range, Some(&tree), self.grammar.as_ref(), vec![]) - { + for chunk in BufferChunks::new(text, range, Some((captures, highlight_maps)), vec![]) { let end_offset = offset + chunk.text.len(); if let Some(highlight_id) = chunk.syntax_highlight_id { if !highlight_id.is_default() { @@ -771,6 +862,10 @@ impl Language { } impl Grammar { + pub fn id(&self) -> usize { + self.id + } + fn parse_text(&self, text: &Rope, old_tree: Option) -> Tree { PARSER.with(|parser| { let mut parser = parser.borrow_mut(); @@ -870,6 +965,17 @@ impl LspAdapter for Arc { } } +fn get_capture_indices(query: &Query, captures: &mut [(&str, &mut Option)]) { + for (ix, name) in query.capture_names().iter().enumerate() { + for (capture_name, index) in captures.iter_mut() { + if capture_name == name { + **index = Some(ix as u32); + break; + } + } + } +} + pub fn point_to_lsp(point: PointUtf16) -> lsp::Position { lsp::Position::new(point.row, point.column) } diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index a578d36a382144dc5bece35448254ab974eb26fa..ca0c28202c3b9d8577d4bb9544e84573cb097212 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -1,26 +1,28 @@ -use crate::{ - Grammar, InjectionConfig, Language, LanguageRegistry, QueryCursorHandle, TextProvider, - ToTreeSitterPoint, -}; +use crate::{Grammar, InjectionConfig, Language, LanguageRegistry}; +use lazy_static::lazy_static; +use parking_lot::Mutex; use std::{ borrow::Cow, cell::RefCell, cmp::{Ordering, Reverse}, collections::BinaryHeap, - iter::Peekable, - ops::{DerefMut, Range}, + ops::{Deref, DerefMut, Range}, sync::Arc, }; use sum_tree::{Bias, SeekTarget, SumTree}; -use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; +use text::{rope, Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; use tree_sitter::{ - Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatch, QueryMatches, Tree, + Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree, }; thread_local! { static PARSER: RefCell = RefCell::new(Parser::new()); } +lazy_static! { + static ref QUERY_CURSORS: Mutex> = Default::default(); +} + #[derive(Default)] pub struct SyntaxMap { parsed_version: clock::Global, @@ -34,39 +36,51 @@ pub struct SyntaxSnapshot { layers: SumTree, } +#[derive(Default)] pub struct SyntaxMapCaptures<'a> { layers: Vec>, + active_layer_count: usize, + grammars: Vec<&'a Grammar>, } +#[derive(Default)] pub struct SyntaxMapMatches<'a> { layers: Vec>, + active_layer_count: usize, + grammars: Vec<&'a Grammar>, } +#[derive(Debug)] pub struct SyntaxMapCapture<'a> { - pub grammar: &'a Grammar, pub depth: usize, pub node: Node<'a>, pub index: u32, + pub grammar_index: usize, } +#[derive(Debug)] pub struct SyntaxMapMatch<'a> { - pub grammar: &'a Grammar, pub depth: usize, pub pattern_index: usize, pub captures: &'a [QueryCapture<'a>], + pub grammar_index: usize, } struct SyntaxMapCapturesLayer<'a> { depth: usize, - captures: Peekable>>, - grammar: &'a Grammar, + captures: QueryCaptures<'a, 'a, TextProvider<'a>>, + next_capture: Option>, + grammar_index: usize, _query_cursor: QueryCursorHandle, } struct SyntaxMapMatchesLayer<'a> { depth: usize, - matches: Peekable>>, - grammar: &'a Grammar, + next_pattern_index: usize, + next_captures: Vec>, + has_next: bool, + matches: QueryMatches<'a, 'a, TextProvider<'a>>, + grammar_index: usize, _query_cursor: QueryCursorHandle, } @@ -80,6 +94,7 @@ struct SyntaxLayer { #[derive(Debug, Clone)] struct SyntaxLayerSummary { + min_depth: usize, max_depth: usize, range: Range, last_layer_range: Range, @@ -110,6 +125,12 @@ struct ChangedRegion { #[derive(Default)] struct ChangeRegionSet(Vec); +struct TextProvider<'a>(&'a Rope); + +struct ByteChunks<'a>(rope::Chunks<'a>); + +struct QueryCursorHandle(Option); + impl SyntaxMap { pub fn new() -> Self { Self::default() @@ -123,11 +144,20 @@ impl SyntaxMap { self.snapshot.clone() } + pub fn language_registry(&self) -> Option> { + self.language_registry.clone() + } + + pub fn parsed_version(&self) -> clock::Global { + self.parsed_version.clone() + } + pub fn interpolate(&mut self, text: &BufferSnapshot) { self.snapshot.interpolate(&self.interpolated_version, text); self.interpolated_version = text.version.clone(); } + #[cfg(test)] pub fn reparse(&mut self, language: Arc, text: &BufferSnapshot) { if !self.interpolated_version.observed_all(&text.version) { self.interpolate(text); @@ -141,9 +171,22 @@ impl SyntaxMap { ); self.parsed_version = text.version.clone(); } + + pub fn did_parse(&mut self, snapshot: SyntaxSnapshot, version: clock::Global) { + self.parsed_version = version; + self.snapshot = snapshot; + } + + pub fn clear(&mut self) { + self.snapshot = SyntaxSnapshot::default(); + } } impl SyntaxSnapshot { + pub fn is_empty(&self) -> bool { + self.layers.is_empty() + } + pub fn interpolate(&mut self, from_version: &clock::Global, text: &BufferSnapshot) { let edits = text .edits_since::<(usize, Point)>(&from_version) @@ -429,14 +472,105 @@ impl SyntaxSnapshot { self.layers = layers; } + pub fn single_tree_captures<'a>( + range: Range, + text: &'a Rope, + tree: &'a Tree, + grammar: &'a Grammar, + query: fn(&Grammar) -> Option<&Query>, + ) -> SyntaxMapCaptures<'a> { + SyntaxMapCaptures::new( + range.clone(), + text, + [(grammar, 0, tree.root_node())].into_iter(), + query, + ) + } + pub fn captures<'a>( &'a self, range: Range, buffer: &'a BufferSnapshot, - query: impl Fn(&Grammar) -> Option<&Query>, + query: fn(&Grammar) -> Option<&Query>, ) -> SyntaxMapCaptures { - let mut result = SyntaxMapCaptures { layers: Vec::new() }; - for (grammar, depth, node) in self.layers_for_range(range.clone(), buffer) { + SyntaxMapCaptures::new( + range.clone(), + buffer.as_rope(), + self.layers_for_range(range, buffer).into_iter(), + query, + ) + } + + pub fn matches<'a>( + &'a self, + range: Range, + buffer: &'a BufferSnapshot, + query: fn(&Grammar) -> Option<&Query>, + ) -> SyntaxMapMatches { + SyntaxMapMatches::new( + range.clone(), + buffer.as_rope(), + self.layers_for_range(range, buffer).into_iter(), + query, + ) + } + + #[cfg(test)] + pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, usize, Node)> { + self.layers_for_range(0..buffer.len(), buffer) + } + + pub fn layers_for_range<'a, T: ToOffset>( + &self, + range: Range, + buffer: &BufferSnapshot, + ) -> Vec<(&Grammar, usize, Node)> { + let start = buffer.anchor_before(range.start.to_offset(buffer)); + let end = buffer.anchor_after(range.end.to_offset(buffer)); + + let mut cursor = self.layers.filter::<_, ()>(|summary| { + if summary.max_depth > summary.min_depth { + true + } else { + let is_before_start = summary.range.end.cmp(&start, buffer).is_lt(); + let is_after_end = summary.range.start.cmp(&end, buffer).is_gt(); + !is_before_start && !is_after_end + } + }); + + let mut result = Vec::new(); + cursor.next(buffer); + while let Some(layer) = cursor.item() { + if let Some(grammar) = &layer.language.grammar { + result.push(( + grammar.as_ref(), + layer.depth, + layer.tree.root_node_with_offset( + layer.range.start.to_offset(buffer), + layer.range.start.to_point(buffer).to_ts_point(), + ), + )); + } + cursor.next(buffer) + } + + result + } +} + +impl<'a> SyntaxMapCaptures<'a> { + fn new( + range: Range, + text: &'a Rope, + layers: impl Iterator)>, + query: fn(&Grammar) -> Option<&Query>, + ) -> Self { + let mut result = Self { + layers: Vec::new(), + grammars: Vec::new(), + active_layer_count: 0, + }; + for (grammar, depth, node) in layers { let query = if let Some(query) = query(grammar) { query } else { @@ -451,39 +585,107 @@ impl SyntaxSnapshot { }; cursor.set_byte_range(range.clone()); - let captures = cursor.captures(query, node, TextProvider(buffer.as_rope())); + let captures = cursor.captures(query, node, TextProvider(text)); + let grammar_index = result + .grammars + .iter() + .position(|g| g.id == grammar.id()) + .unwrap_or_else(|| { + result.grammars.push(grammar); + result.grammars.len() - 1 + }); let mut layer = SyntaxMapCapturesLayer { depth, - grammar, - captures: captures.peekable(), + grammar_index, + next_capture: None, + captures, _query_cursor: query_cursor, }; - if let Some(key) = layer.sort_key() { - let mut ix = 0; - while let Some(next_layer) = result.layers.get_mut(ix) { - if let Some(next_key) = next_layer.sort_key() { - if key > next_key { - ix += 1; - continue; - } - } - break; - } + layer.advance(); + if layer.next_capture.is_some() { + let key = layer.sort_key(); + let ix = match result.layers[..result.active_layer_count] + .binary_search_by_key(&key, |layer| layer.sort_key()) + { + Ok(ix) | Err(ix) => ix, + }; result.layers.insert(ix, layer); + result.active_layer_count += 1; + } else { + result.layers.push(layer); } } + result } - pub fn matches<'a>( - &'a self, + pub fn grammars(&self) -> &[&'a Grammar] { + &self.grammars + } + + pub fn peek(&self) -> Option> { + let layer = self.layers[..self.active_layer_count].first()?; + let capture = layer.next_capture?; + Some(SyntaxMapCapture { + depth: layer.depth, + grammar_index: layer.grammar_index, + index: capture.index, + node: capture.node, + }) + } + + pub fn advance(&mut self) -> bool { + let layer = if let Some(layer) = self.layers[..self.active_layer_count].first_mut() { + layer + } else { + return false; + }; + + layer.advance(); + if layer.next_capture.is_some() { + let key = layer.sort_key(); + let i = 1 + self.layers[1..self.active_layer_count] + .iter() + .position(|later_layer| key < later_layer.sort_key()) + .unwrap_or(self.active_layer_count - 1); + self.layers[0..i].rotate_left(1); + } else { + self.layers[0..self.active_layer_count].rotate_left(1); + self.active_layer_count -= 1; + } + + true + } + + pub fn set_byte_range(&mut self, range: Range) { + for layer in &mut self.layers { + layer.captures.set_byte_range(range.clone()); + if let Some(capture) = &layer.next_capture { + if capture.node.end_byte() > range.start { + continue; + } + } + layer.advance(); + } + self.layers.sort_unstable_by_key(|layer| layer.sort_key()); + self.active_layer_count = self + .layers + .iter() + .position(|layer| layer.next_capture.is_none()) + .unwrap_or(self.layers.len()); + } +} + +impl<'a> SyntaxMapMatches<'a> { + fn new( range: Range, - buffer: &'a BufferSnapshot, - query: impl Fn(&Grammar) -> Option<&Query>, - ) -> SyntaxMapMatches { - let mut result = SyntaxMapMatches { layers: Vec::new() }; - for (grammar, depth, node) in self.layers_for_range(range.clone(), buffer) { + text: &'a Rope, + layers: impl Iterator)>, + query: fn(&Grammar) -> Option<&Query>, + ) -> Self { + let mut result = Self::default(); + for (grammar, depth, node) in layers { let query = if let Some(query) = query(grammar) { query } else { @@ -498,135 +700,132 @@ impl SyntaxSnapshot { }; cursor.set_byte_range(range.clone()); - let matches = cursor.matches(query, node, TextProvider(buffer.as_rope())); + let matches = cursor.matches(query, node, TextProvider(text)); + let grammar_index = result + .grammars + .iter() + .position(|g| g.id == grammar.id()) + .unwrap_or_else(|| { + result.grammars.push(grammar); + result.grammars.len() - 1 + }); let mut layer = SyntaxMapMatchesLayer { depth, - grammar, - matches: matches.peekable(), + grammar_index, + matches, + next_pattern_index: 0, + next_captures: Vec::new(), + has_next: false, _query_cursor: query_cursor, }; - if let Some(key) = layer.sort_key() { - let mut ix = 0; - while let Some(next_layer) = result.layers.get_mut(ix) { - if let Some(next_key) = next_layer.sort_key() { - if key > next_key { - ix += 1; - continue; - } - } - break; - } + layer.advance(); + if layer.has_next { + let key = layer.sort_key(); + let ix = match result.layers[..result.active_layer_count] + .binary_search_by_key(&key, |layer| layer.sort_key()) + { + Ok(ix) | Err(ix) => ix, + }; result.layers.insert(ix, layer); + result.active_layer_count += 1; + } else { + result.layers.push(layer); } } result } - pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, Node)> { - self.layers - .iter() - .filter_map(|layer| { - if let Some(grammar) = &layer.language.grammar { - Some(( - grammar.as_ref(), - layer.tree.root_node_with_offset( - layer.range.start.to_offset(buffer), - layer.range.start.to_point(buffer).to_ts_point(), - ), - )) - } else { - None - } - }) - .collect() + pub fn grammars(&self) -> &[&'a Grammar] { + &self.grammars } - pub fn layers_for_range<'a, T: ToOffset>( - &self, - range: Range, - buffer: &BufferSnapshot, - ) -> Vec<(&Grammar, usize, Node)> { - let start = buffer.anchor_before(range.start.to_offset(buffer)); - let end = buffer.anchor_after(range.end.to_offset(buffer)); + pub fn peek(&self) -> Option { + let layer = self.layers.first()?; + if !layer.has_next { + return None; + } + Some(SyntaxMapMatch { + depth: layer.depth, + grammar_index: layer.grammar_index, + pattern_index: layer.next_pattern_index, + captures: &layer.next_captures, + }) + } - let mut cursor = self.layers.filter::<_, ()>(|summary| { - let is_before_start = summary.range.end.cmp(&start, buffer).is_lt(); - let is_after_end = summary.range.start.cmp(&end, buffer).is_gt(); - !is_before_start && !is_after_end - }); + pub fn advance(&mut self) -> bool { + let layer = if let Some(layer) = self.layers.first_mut() { + layer + } else { + return false; + }; - let mut result = Vec::new(); - cursor.next(buffer); - while let Some(layer) = cursor.item() { - if let Some(grammar) = &layer.language.grammar { - result.push(( - grammar.as_ref(), - layer.depth, - layer.tree.root_node_with_offset( - layer.range.start.to_offset(buffer), - layer.range.start.to_point(buffer).to_ts_point(), - ), - )); - } - cursor.next(buffer) + layer.advance(); + if layer.has_next { + let key = layer.sort_key(); + let i = 1 + self.layers[1..self.active_layer_count] + .iter() + .position(|later_layer| key < later_layer.sort_key()) + .unwrap_or(self.active_layer_count - 1); + self.layers[0..i].rotate_left(1); + } else { + self.layers[0..self.active_layer_count].rotate_left(1); + self.active_layer_count -= 1; } - result + true } } -impl<'a> Iterator for SyntaxMapCaptures<'a> { - type Item = SyntaxMapCapture<'a>; +impl<'a> SyntaxMapCapturesLayer<'a> { + fn advance(&mut self) { + self.next_capture = self.captures.next().map(|(mat, ix)| mat.captures[ix]); + } - fn next(&mut self) -> Option { - let layer = self.layers.first_mut()?; - let (mat, ix) = layer.captures.next()?; - - let capture = mat.captures[ix as usize]; - let grammar = layer.grammar; - let depth = layer.depth; - - if let Some(key) = layer.sort_key() { - let mut i = 1; - while let Some(later_layer) = self.layers.get_mut(i) { - if let Some(later_key) = later_layer.sort_key() { - if key > later_key { - i += 1; - continue; - } - } - break; - } - if i > 1 { - self.layers[0..i].rotate_left(1); - } + fn sort_key(&self) -> (usize, Reverse, usize) { + if let Some(capture) = &self.next_capture { + let range = capture.node.byte_range(); + (range.start, Reverse(range.end), self.depth) } else { - self.layers.remove(0); + (usize::MAX, Reverse(0), usize::MAX) } - - Some(SyntaxMapCapture { - grammar, - depth, - node: capture.node, - index: capture.index, - }) } } -impl<'a> SyntaxMapCapturesLayer<'a> { - fn sort_key(&mut self) -> Option<(usize, Reverse, usize)> { - let (mat, ix) = self.captures.peek()?; - let range = &mat.captures[*ix].node.byte_range(); - Some((range.start, Reverse(range.end), self.depth)) +impl<'a> SyntaxMapMatchesLayer<'a> { + fn advance(&mut self) { + if let Some(mat) = self.matches.next() { + self.next_captures.clear(); + self.next_captures.extend_from_slice(&mat.captures); + self.next_pattern_index = mat.pattern_index; + self.has_next = true; + } else { + self.has_next = false; + } + } + + fn sort_key(&self) -> (usize, Reverse, usize) { + if self.has_next { + let captures = &self.next_captures; + if let Some((first, last)) = captures.first().zip(captures.last()) { + return ( + first.node.start_byte(), + Reverse(last.node.end_byte()), + self.depth, + ); + } + } + (usize::MAX, Reverse(0), usize::MAX) } } -impl<'a> SyntaxMapMatchesLayer<'a> { - fn sort_key(&mut self) -> Option<(usize, Reverse, usize)> { - let mat = self.matches.peek()?; - let range = mat.captures.first()?.node.start_byte()..mat.captures.last()?.node.end_byte(); - Some((range.start, Reverse(range.end), self.depth)) +impl<'a> Iterator for SyntaxMapCaptures<'a> { + type Item = SyntaxMapCapture<'a>; + + fn next(&mut self) -> Option { + let result = self.peek(); + self.advance(); + result } } @@ -864,6 +1063,7 @@ impl Default for SyntaxLayerSummary { fn default() -> Self { Self { max_depth: 0, + min_depth: 0, range: Anchor::MAX..Anchor::MIN, last_layer_range: Anchor::MIN..Anchor::MAX, } @@ -875,7 +1075,8 @@ impl sum_tree::Summary for SyntaxLayerSummary { fn add_summary(&mut self, other: &Self, buffer: &Self::Context) { if other.max_depth > self.max_depth { - *self = other.clone(); + self.max_depth = other.max_depth; + self.range = other.range.clone(); } else { if other.range.start.cmp(&self.range.start, buffer).is_lt() { self.range.start = other.range.start; @@ -883,8 +1084,8 @@ impl sum_tree::Summary for SyntaxLayerSummary { if other.range.end.cmp(&self.range.end, buffer).is_gt() { self.range.end = other.range.end; } - self.last_layer_range = other.last_layer_range.clone(); } + self.last_layer_range = other.last_layer_range.clone(); } } @@ -927,6 +1128,7 @@ impl sum_tree::Item for SyntaxLayer { fn summary(&self) -> Self::Summary { SyntaxLayerSummary { + min_depth: self.depth, max_depth: self.depth, range: self.range.clone(), last_layer_range: self.range.clone(), @@ -944,12 +1146,73 @@ impl std::fmt::Debug for SyntaxLayer { } } +impl<'a> tree_sitter::TextProvider<'a> for TextProvider<'a> { + type I = ByteChunks<'a>; + + fn text(&mut self, node: tree_sitter::Node) -> Self::I { + ByteChunks(self.0.chunks_in_range(node.byte_range())) + } +} + +impl<'a> Iterator for ByteChunks<'a> { + type Item = &'a [u8]; + + fn next(&mut self) -> Option { + self.0.next().map(str::as_bytes) + } +} + +impl QueryCursorHandle { + pub(crate) fn new() -> Self { + let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_else(QueryCursor::new); + cursor.set_match_limit(64); + QueryCursorHandle(Some(cursor)) + } +} + +impl Deref for QueryCursorHandle { + type Target = QueryCursor; + + fn deref(&self) -> &Self::Target { + self.0.as_ref().unwrap() + } +} + +impl DerefMut for QueryCursorHandle { + fn deref_mut(&mut self) -> &mut Self::Target { + self.0.as_mut().unwrap() + } +} + +impl Drop for QueryCursorHandle { + fn drop(&mut self) { + let mut cursor = self.0.take().unwrap(); + cursor.set_byte_range(0..usize::MAX); + cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point()); + QUERY_CURSORS.lock().push(cursor) + } +} + +pub(crate) trait ToTreeSitterPoint { + fn to_ts_point(self) -> tree_sitter::Point; + fn from_ts_point(point: tree_sitter::Point) -> Self; +} + +impl ToTreeSitterPoint for Point { + fn to_ts_point(self) -> tree_sitter::Point { + tree_sitter::Point::new(self.row as usize, self.column as usize) + } + + fn from_ts_point(point: tree_sitter::Point) -> Self { + Point::new(point.row as u32, point.column as u32) + } +} + #[cfg(test)] mod tests { use super::*; use crate::LanguageConfig; use text::{Buffer, Point}; - use tree_sitter::Query; use unindent::Unindent as _; use util::test::marked_text_ranges; @@ -1298,13 +1561,13 @@ mod tests { mutated_layers.into_iter().zip(reference_layers.into_iter()) { assert_eq!( - edited_layer.1.to_sexp(), - reference_layer.1.to_sexp(), + edited_layer.2.to_sexp(), + reference_layer.2.to_sexp(), "different layer at step {i}" ); assert_eq!( - edited_layer.1.range(), - reference_layer.1.range(), + edited_layer.2.range(), + reference_layer.2.range(), "different layer at step {i}" ); } @@ -1377,16 +1640,16 @@ mod tests { marked_string: &str, ) { let mut actual_ranges = Vec::>::new(); - for capture in syntax_map.captures(0..buffer.len(), buffer, |grammar| { + let captures = syntax_map.captures(0..buffer.len(), buffer, |grammar| { grammar.highlights_query.as_ref() - }) { - let name = &capture - .grammar - .highlights_query - .as_ref() - .unwrap() - .capture_names()[capture.index as usize]; - dbg!(capture.node, capture.index, name); + }); + let queries = captures + .grammars() + .iter() + .map(|grammar| grammar.highlights_query.as_ref().unwrap()) + .collect::>(); + for capture in captures { + let name = &queries[capture.grammar_index].capture_names()[capture.index as usize]; if highlight_query_capture_names.contains(&name.as_str()) { actual_ranges.push(capture.node.byte_range()); } diff --git a/crates/language/src/tests.rs b/crates/language/src/tests.rs index 44c15d1a3b7767db6e5b0f56998e08ab74c60b97..ad997753cd939d710b0ab549a9278e6ed06cf263 100644 --- a/crates/language/src/tests.rs +++ b/crates/language/src/tests.rs @@ -1407,7 +1407,9 @@ fn json_lang() -> Language { fn get_tree_sexp(buffer: &ModelHandle, cx: &gpui::TestAppContext) -> String { buffer.read_with(cx, |buffer, _| { - buffer.syntax_tree().unwrap().root_node().to_sexp() + let syntax_map = buffer.syntax_map(); + let layers = syntax_map.layers(buffer.as_text_snapshot()); + layers[0].2.to_sexp() }) } diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index 0f762f822fc8e1f9340a6885a908ed081ffcdc2b..531fdcbe15381d82e9148da79069963e93441342 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -2056,6 +2056,7 @@ impl Project { let full_path = buffer.read(cx).file()?.full_path(cx); let language = self.languages.select_language(&full_path)?; buffer.update(cx, |buffer, cx| { + buffer.set_language_registry(self.languages.clone()); buffer.set_language(Some(language.clone()), cx); }); diff --git a/crates/zed/src/languages/rust/injections.scm b/crates/zed/src/languages/rust/injections.scm index 9d8c03c8893b5acbfa5c6c0bc4703010c87b65a1..57ebea8539345c72145eaa44cffb09845f913406 100644 --- a/crates/zed/src/languages/rust/injections.scm +++ b/crates/zed/src/languages/rust/injections.scm @@ -1,3 +1,7 @@ (macro_invocation - (token_tree) @content) + (token_tree) @content + (#set! "language" "rust")) + +(macro_rule + (token_tree) @content (#set! "language" "rust")) \ No newline at end of file From 587175d0ea007875704813fe5beb274d9072c315 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 15:09:10 -0700 Subject: [PATCH 10/22] Update syntax map's interpolated version when a parse completes --- crates/language/src/syntax_map.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index ca0c28202c3b9d8577d4bb9544e84573cb097212..14055991e6f3223d694ef75f9b4891d23be62e39 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -173,6 +173,7 @@ impl SyntaxMap { } pub fn did_parse(&mut self, snapshot: SyntaxSnapshot, version: clock::Global) { + self.interpolated_version = version.clone(); self.parsed_version = version; self.snapshot = snapshot; } From 3245e4f8d70b88edb37d9758539990207be4bc06 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 15:10:53 -0700 Subject: [PATCH 11/22] Fix out-of-range panic when requesting outline items at EOF --- crates/language/src/buffer.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 7b298b74207ee73ef2bf4a50523267d296efa2f1..80b9cdbaa3cbac70284fb7e70c43fa55a4ea0272 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -1953,8 +1953,10 @@ impl BufferSnapshot { theme: Option<&SyntaxTheme>, ) -> Option>> { let position = position.to_offset(self); - let mut items = - self.outline_items_containing(position.saturating_sub(1)..position + 1, theme)?; + let mut items = self.outline_items_containing( + position.saturating_sub(1)..self.len().min(position + 1), + theme, + )?; let mut prev_depth = None; items.retain(|item| { let result = prev_depth.map_or(true, |prev_depth| item.depth > prev_depth); From b3f4c732648d8677162468ec922e051ef9ad57fd Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 15:11:26 -0700 Subject: [PATCH 12/22] Clean up some of buffer's syntax-related methods --- crates/language/src/buffer.rs | 119 +++++++++++++++++----------------- crates/language/src/tests.rs | 4 +- 2 files changed, 61 insertions(+), 62 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 80b9cdbaa3cbac70284fb7e70c43fa55a4ea0272..ca32c8b1fb203417b0d60a33b13f841709d747ca 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -72,7 +72,7 @@ pub struct Buffer { pub struct BufferSnapshot { text: text::BufferSnapshot, - syntax: SyntaxSnapshot, + pub(crate) syntax: SyntaxSnapshot, file: Option>, diagnostics: DiagnosticSet, diagnostics_update_count: usize, @@ -461,9 +461,14 @@ impl Buffer { } pub fn snapshot(&self) -> BufferSnapshot { + let text = self.text.snapshot(); + let mut syntax_map = self.syntax_map.lock(); + syntax_map.interpolate(&text); + let syntax = syntax_map.snapshot(); + BufferSnapshot { - text: self.text.snapshot(), - syntax: self.syntax_map(), + text, + syntax, file: self.file.clone(), remote_selections: self.remote_selections.clone(), diagnostics: self.diagnostics.clone(), @@ -674,12 +679,6 @@ impl Buffer { self.file_update_count } - pub(crate) fn syntax_map(&self) -> SyntaxSnapshot { - let mut syntax_map = self.syntax_map.lock(); - syntax_map.interpolate(&self.text_snapshot()); - syntax_map.snapshot() - } - #[cfg(any(test, feature = "test-support"))] pub fn is_parsing(&self) -> bool { self.parsing_in_background @@ -690,73 +689,73 @@ impl Buffer { self.sync_parse_timeout = timeout; } - fn reparse(&mut self, cx: &mut ModelContext) -> bool { + fn reparse(&mut self, cx: &mut ModelContext) { if self.parsing_in_background { - return false; + return; } + let language = if let Some(language) = self.language.clone() { + language + } else { + return; + }; - if let Some(language) = self.language.clone() { - let text = self.text_snapshot(); - let parsed_version = self.version(); + let text = self.text_snapshot(); + let parsed_version = self.version(); - let mut syntax_map; - let language_registry; - let syntax_map_version; - { - let mut map = self.syntax_map.lock(); - map.interpolate(&text); - language_registry = map.language_registry(); - syntax_map = map.snapshot(); - syntax_map_version = map.parsed_version(); + let mut syntax_map = self.syntax_map.lock(); + syntax_map.interpolate(&text); + let language_registry = syntax_map.language_registry(); + let mut syntax_snapshot = syntax_map.snapshot(); + let syntax_map_version = syntax_map.parsed_version(); + drop(syntax_map); + + let parse_task = cx.background().spawn({ + let language = language.clone(); + async move { + syntax_snapshot.reparse(&syntax_map_version, &text, language_registry, language); + syntax_snapshot } - let parse_task = cx.background().spawn({ - let language = language.clone(); - async move { - syntax_map.reparse(&syntax_map_version, &text, language_registry, language); - syntax_map - } - }); - - match cx - .background() - .block_with_timeout(self.sync_parse_timeout, parse_task) - { - Ok(new_syntax_map) => { - self.did_finish_parsing(new_syntax_map, parsed_version, cx); - return true; - } - Err(parse_task) => { - self.parsing_in_background = true; - cx.spawn(move |this, mut cx| async move { - let new_syntax_map = parse_task.await; - this.update(&mut cx, move |this, cx| { - let grammar_changed = - this.language.as_ref().map_or(true, |current_language| { - !Arc::ptr_eq(&language, current_language) - }); - let parse_again = - this.version.changed_since(&parsed_version) || grammar_changed; - this.parsing_in_background = false; - this.did_finish_parsing(new_syntax_map, parsed_version, cx); + }); - if parse_again && this.reparse(cx) {} - }); - }) - .detach(); - } + match cx + .background() + .block_with_timeout(self.sync_parse_timeout, parse_task) + { + Ok(new_syntax_snapshot) => { + self.did_finish_parsing(new_syntax_snapshot, parsed_version, cx); + return; + } + Err(parse_task) => { + self.parsing_in_background = true; + cx.spawn(move |this, mut cx| async move { + let new_syntax_map = parse_task.await; + this.update(&mut cx, move |this, cx| { + let grammar_changed = + this.language.as_ref().map_or(true, |current_language| { + !Arc::ptr_eq(&language, current_language) + }); + let parse_again = + this.version.changed_since(&parsed_version) || grammar_changed; + this.did_finish_parsing(new_syntax_map, parsed_version, cx); + this.parsing_in_background = false; + if parse_again { + this.reparse(cx); + } + }); + }) + .detach(); } } - false } fn did_finish_parsing( &mut self, - syntax_map: SyntaxSnapshot, + syntax_snapshot: SyntaxSnapshot, version: clock::Global, cx: &mut ModelContext, ) { self.parse_count += 1; - self.syntax_map.lock().did_parse(syntax_map, version); + self.syntax_map.lock().did_parse(syntax_snapshot, version); self.request_autoindent(cx); cx.emit(Event::Reparsed); cx.notify(); diff --git a/crates/language/src/tests.rs b/crates/language/src/tests.rs index ad997753cd939d710b0ab549a9278e6ed06cf263..8a912b9a9b9e981496edefde6b57629f54516509 100644 --- a/crates/language/src/tests.rs +++ b/crates/language/src/tests.rs @@ -1407,8 +1407,8 @@ fn json_lang() -> Language { fn get_tree_sexp(buffer: &ModelHandle, cx: &gpui::TestAppContext) -> String { buffer.read_with(cx, |buffer, _| { - let syntax_map = buffer.syntax_map(); - let layers = syntax_map.layers(buffer.as_text_snapshot()); + let snapshot = buffer.snapshot(); + let layers = snapshot.syntax.layers(buffer.as_text_snapshot()); layers[0].2.to_sexp() }) } From 015b6c4a31cfc6e0d34d4989daab8de1c4b04ccf Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 15:29:07 -0700 Subject: [PATCH 13/22] Fix test about auto-indent with no indent query --- crates/language/src/buffer.rs | 24 +++++++++++++----------- crates/language/src/tests.rs | 1 + 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index ca32c8b1fb203417b0d60a33b13f841709d747ca..7fe62d7cd873b6a2194da3e21a581196a024839a 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -1627,9 +1627,7 @@ impl BufferSnapshot { &self, row_range: Range, ) -> Option> + '_> { - let language = self.language.as_ref()?; - let grammar = language.grammar.as_ref()?; - let config = &language.config; + let config = &self.language.as_ref()?.config; let prev_non_blank_row = self.prev_non_blank_row(row_range.start); // Find the suggested indentation ranges based on the syntax tree. @@ -1639,20 +1637,24 @@ impl BufferSnapshot { let mut matches = self.syntax.matches(range, &self.text, |grammar| { Some(&grammar.indents_config.as_ref()?.query) }); + let indent_configs = matches + .grammars() + .iter() + .map(|grammar| grammar.indents_config.as_ref().unwrap()) + .collect::>(); let mut indent_ranges = Vec::>::new(); while let Some(mat) = matches.peek() { let mut start: Option = None; let mut end: Option = None; - if let Some(config) = &grammar.indents_config { - for capture in mat.captures { - if capture.index == config.indent_capture_ix { - start.get_or_insert(Point::from_ts_point(capture.node.start_position())); - end.get_or_insert(Point::from_ts_point(capture.node.end_position())); - } else if Some(capture.index) == config.end_capture_ix { - end = Some(Point::from_ts_point(capture.node.start_position())); - } + let config = &indent_configs[mat.grammar_index]; + for capture in mat.captures { + if capture.index == config.indent_capture_ix { + start.get_or_insert(Point::from_ts_point(capture.node.start_position())); + end.get_or_insert(Point::from_ts_point(capture.node.end_position())); + } else if Some(capture.index) == config.end_capture_ix { + end = Some(Point::from_ts_point(capture.node.start_position())); } } diff --git a/crates/language/src/tests.rs b/crates/language/src/tests.rs index 8a912b9a9b9e981496edefde6b57629f54516509..f2a33ccbd335d1009141aeac7abf3200ff8c0a2e 100644 --- a/crates/language/src/tests.rs +++ b/crates/language/src/tests.rs @@ -998,6 +998,7 @@ fn test_autoindent_language_without_indents_query(cx: &mut MutableAppContext) { Arc::new(Language::new( LanguageConfig { name: "Markdown".into(), + auto_indent_using_last_non_empty_line: false, ..Default::default() }, Some(tree_sitter_json::language()), From 61b70b7e2d6c528c89b7df377c088d9e4fc6f8c7 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 15:29:25 -0700 Subject: [PATCH 14/22] Fix re-parsing when buffer's language has changed --- crates/language/src/syntax_map.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 14055991e6f3223d694ef75f9b4891d23be62e39..117699a3515a58b130b8bb6bb7590f60293b505b 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -306,10 +306,6 @@ impl SyntaxSnapshot { language: Arc, ) { let edits = text.edits_since::(from_version).collect::>(); - if edits.is_empty() { - return; - } - let max_depth = self.layers.summary().max_depth; let mut cursor = self.layers.cursor::(); cursor.next(&text); From f96c19b81a730f5bbf2cb586da616fa9d6c2e59a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 16:37:14 -0700 Subject: [PATCH 15/22] Add injection queries for macro bodies in C and C++ --- crates/zed/src/languages/c/injections.scm | 7 +++++++ crates/zed/src/languages/cpp/injections.scm | 7 +++++++ 2 files changed, 14 insertions(+) create mode 100644 crates/zed/src/languages/c/injections.scm create mode 100644 crates/zed/src/languages/cpp/injections.scm diff --git a/crates/zed/src/languages/c/injections.scm b/crates/zed/src/languages/c/injections.scm new file mode 100644 index 0000000000000000000000000000000000000000..845a63bd1bd4e700df0fd1eb3c5d10d31e2ab0e4 --- /dev/null +++ b/crates/zed/src/languages/c/injections.scm @@ -0,0 +1,7 @@ +(preproc_def + value: (preproc_arg) @content + (#set! "language" "c")) + +(preproc_function_def + value: (preproc_arg) @content + (#set! "language" "c")) \ No newline at end of file diff --git a/crates/zed/src/languages/cpp/injections.scm b/crates/zed/src/languages/cpp/injections.scm new file mode 100644 index 0000000000000000000000000000000000000000..eca372d577be30c352a2b7f7d93505a3b869e293 --- /dev/null +++ b/crates/zed/src/languages/cpp/injections.scm @@ -0,0 +1,7 @@ +(preproc_def + value: (preproc_arg) @content + (#set! "language" "c++")) + +(preproc_function_def + value: (preproc_arg) @content + (#set! "language" "c++")) \ No newline at end of file From d48380bc48890021b51c601277538261ae514920 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 16:37:25 -0700 Subject: [PATCH 16/22] Fix pattern order in C++ highlight query Later patterns take precedence in Zed, so function names were previously being highlighted as variables due to the plain `identifier` pattern. --- crates/zed/src/languages/cpp/highlights.scm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/zed/src/languages/cpp/highlights.scm b/crates/zed/src/languages/cpp/highlights.scm index d579d701879f6a0e700c88a736e91251e9e09392..2dd9188308c874dd3204a3f8fa8f5efa258e1bc5 100644 --- a/crates/zed/src/languages/cpp/highlights.scm +++ b/crates/zed/src/languages/cpp/highlights.scm @@ -1,3 +1,5 @@ +(identifier) @variable + (call_expression function: (qualified_identifier name: (identifier) @function)) @@ -34,8 +36,6 @@ (auto) @type (type_identifier) @type -(identifier) @variable - ((identifier) @constant (#match? @constant "^[A-Z][A-Z\\d_]*$")) From 345b266deefc6c26bc527537d73912a0bac92eeb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 16:49:03 -0700 Subject: [PATCH 17/22] Add missing rust highlights --- crates/zed/src/languages/rust/highlights.scm | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/crates/zed/src/languages/rust/highlights.scm b/crates/zed/src/languages/rust/highlights.scm index d4f571dd526ecd795c6307b14780caea7201d5a0..72482b4073724f7bb664a6f62624bdd2e5eb6e1c 100644 --- a/crates/zed/src/languages/rust/highlights.scm +++ b/crates/zed/src/languages/rust/highlights.scm @@ -1,6 +1,6 @@ (type_identifier) @type (primitive_type) @type.builtin - +(self) @variable.builtin (field_identifier) @property (call_expression @@ -15,6 +15,16 @@ (function_item name: (identifier) @function.definition) (function_signature_item name: (identifier) @function.definition) +(macro_invocation + macro: [ + (identifier) @function.special + (scoped_identifier + name: (identifier) @function.special) + ]) + +(macro_definition + name: (identifier) @function.special.definition) + ; Identifier conventions ; Assume uppercase names are enum constructors @@ -71,6 +81,7 @@ "mod" "move" "pub" + "ref" "return" "static" "struct" @@ -91,6 +102,13 @@ (char_literal) ] @string +[ + (integer_literal) + (float_literal) +] @number + +(boolean_literal) @constant + [ (line_comment) (block_comment) From 1f12186e3ce53c0981a2a84e1cf562413d849782 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 25 Aug 2022 10:58:46 -0700 Subject: [PATCH 18/22] Update to latest tree-sitter commit This is needed for https://github.com/tree-sitter/tree-sitter/pull/1845 --- Cargo.lock | 2 +- Cargo.toml | 2 +- crates/language/src/syntax_map.rs | 8 ++++++++ crates/text/src/text.rs | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2a6d594f667bf07e39a2fbafdc3f78d91033b09f..0f187a54e63856fcb1ef14749c258e5c2d5edfcf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5842,7 +5842,7 @@ dependencies = [ [[package]] name = "tree-sitter" version = "0.20.8" -source = "git+https://github.com/tree-sitter/tree-sitter?rev=477b6677537e89c7bdff14ce84dad6d23a6415bb#477b6677537e89c7bdff14ce84dad6d23a6415bb" +source = "git+https://github.com/tree-sitter/tree-sitter?rev=366210ae925d7ea0891bc7a0c738f60c77c04d7b#366210ae925d7ea0891bc7a0c738f60c77c04d7b" dependencies = [ "cc", "regex", diff --git a/Cargo.toml b/Cargo.toml index b4df3fd101913dad11bac7660602c7cce4e6464d..31a9118a1ae531fb6ec9250cb11566a633fcf109 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ default-members = ["crates/zed"] resolver = "2" [patch.crates-io] -tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "477b6677537e89c7bdff14ce84dad6d23a6415bb" } +tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "366210ae925d7ea0891bc7a0c738f60c77c04d7b" } async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" } # TODO - Remove when a version is released with this PR: https://github.com/servo/core-foundation-rs/pull/457 diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 117699a3515a58b130b8bb6bb7590f60293b505b..e537e5e7937953d2c140d68f20aab28bd3f131a9 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -284,11 +284,19 @@ impl SyntaxSnapshot { }; layer.tree.edit(&tree_edit); + if edit.new.start.0 < start_byte { break; } } + debug_assert!( + layer.tree.root_node().end_byte() <= text.len(), + "tree's size {}, is larger than text size {}", + layer.tree.root_node().end_byte(), + text.len(), + ); + layers.push(layer, text); cursor.next(text); } diff --git a/crates/text/src/text.rs b/crates/text/src/text.rs index 1f2e4e7c7a95321a28e2a0e46885b90923869b87..39812740fb4ba8a5fc001b3b1ab4a86254ea224b 100644 --- a/crates/text/src/text.rs +++ b/crates/text/src/text.rs @@ -2435,7 +2435,7 @@ impl ToOffset for PointUtf16 { impl ToOffset for usize { fn to_offset<'a>(&self, snapshot: &BufferSnapshot) -> usize { - assert!(*self <= snapshot.len(), "offset is out of range"); + assert!(*self <= snapshot.len(), "offset {self} is out of range"); *self } } From 74a2b093ab2ba88e7465fe3c69818020c6da496d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 25 Aug 2022 16:40:18 -0700 Subject: [PATCH 19/22] Start work on randomized test for SyntaxMap, fix discovered bugs --- crates/language/src/syntax_map.rs | 133 ++++++++++++++++++++++++++---- 1 file changed, 118 insertions(+), 15 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index e537e5e7937953d2c140d68f20aab28bd3f131a9..f7d135da1c7f04c06dba120205ceeb529897c88d 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -197,7 +197,7 @@ impl SyntaxSnapshot { } let mut layers = SumTree::new(); - let mut edits_for_depth = &edits[..]; + let mut first_edit_ix_for_depth = 0; let mut cursor = self.layers.cursor::(); cursor.next(text); @@ -205,7 +205,7 @@ impl SyntaxSnapshot { let depth = cursor.end(text).max_depth; // Preserve any layers at this depth that precede the first edit. - if let Some(first_edit) = edits_for_depth.first() { + if let Some(first_edit) = edits.get(first_edit_ix_for_depth) { let target = DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)); if target.cmp(&cursor.start(), text).is_gt() { let slice = cursor.slice(&target, Bias::Left, text); @@ -221,7 +221,7 @@ impl SyntaxSnapshot { text, ); layers.push_tree(slice, text); - edits_for_depth = &edits[..]; + first_edit_ix_for_depth = 0; continue; }; @@ -241,9 +241,9 @@ impl SyntaxSnapshot { // Ignore edits that end before the start of this layer, and don't consider them // for any subsequent layers at this same depth. loop { - if let Some(edit) = edits_for_depth.first() { + if let Some(edit) = edits.get(first_edit_ix_for_depth) { if edit.new.end.0 < start_byte { - edits_for_depth = &edits_for_depth[1..]; + first_edit_ix_for_depth += 1; } else { break; } @@ -252,15 +252,21 @@ impl SyntaxSnapshot { } } + let mut old_start_byte = start_byte; + if first_edit_ix_for_depth > 0 { + let edit = &edits[first_edit_ix_for_depth - 1]; + old_start_byte = edit.old.end.0 + (start_byte - edit.new.end.0); + } + let mut layer = layer.clone(); - for edit in edits_for_depth { + for edit in &edits[first_edit_ix_for_depth..] { // Ignore any edits that follow this layer. if edit.new.start.0 > end_byte { break; } // Apply any edits that intersect this layer to the layer's syntax tree. - let tree_edit = if edit.new.start.0 >= start_byte { + let tree_edit = if edit.old.start.0 >= old_start_byte { tree_sitter::InputEdit { start_byte: edit.new.start.0 - start_byte, old_end_byte: edit.new.start.0 - start_byte @@ -273,21 +279,18 @@ impl SyntaxSnapshot { new_end_position: (edit.new.end.1 - start_point).to_ts_point(), } } else { + let node = layer.tree.root_node(); tree_sitter::InputEdit { start_byte: 0, - old_end_byte: edit.new.end.0 - start_byte, + old_end_byte: node.end_byte(), new_end_byte: 0, start_position: Default::default(), - old_end_position: (edit.new.end.1 - start_point).to_ts_point(), + old_end_position: node.end_position(), new_end_position: Default::default(), } }; layer.tree.edit(&tree_edit); - - if edit.new.start.0 < start_byte { - break; - } } debug_assert!( @@ -363,7 +366,7 @@ impl SyntaxSnapshot { if changed_regions.intersects(&layer, text) { changed_regions.insert( ChangedRegion { - depth: depth + 1, + depth: layer.depth + 1, range: layer.range.clone(), }, text, @@ -918,7 +921,7 @@ fn get_injections( let mut query_cursor = QueryCursorHandle::new(); let mut prev_match = None; for query_range in query_ranges { - query_cursor.set_byte_range(query_range.start..query_range.end); + query_cursor.set_byte_range(query_range.start.saturating_sub(1)..query_range.end); for mat in query_cursor.matches(&config.query, node, TextProvider(text.as_rope())) { let content_ranges = mat .nodes_for_capture_index(config.content_capture_ix) @@ -1217,6 +1220,8 @@ impl ToTreeSitterPoint for Point { mod tests { use super::*; use crate::LanguageConfig; + use rand::rngs::StdRng; + use std::env; use text::{Buffer, Point}; use unindent::Unindent as _; use util::test::marked_text_ranges; @@ -1532,6 +1537,104 @@ mod tests { ]); } + #[gpui::test] + fn test_removing_injection_by_replacing_across_boundary() { + test_edit_sequence(&[ + " + fn one() { + two!( + three.four, + ); + } + ", + " + fn one() { + t«en + .eleven( + twelve, + » + three.four, + ); + } + ", + ]); + } + + #[gpui::test(iterations = 100)] + fn test_random_syntax_map_edits(mut rng: StdRng) { + let operations = env::var("OPERATIONS") + .map(|i| i.parse().expect("invalid `OPERATIONS` variable")) + .unwrap_or(10); + + let text = r#" + fn test_something() { + let vec = vec![5, 1, 3, 8]; + assert_eq!( + vec + .into_iter() + .map(|i| i * 2) + .collect::>(), + vec![ + 5 * 2, 1 * 2, 3 * 2, 8 * 2 + ], + ); + } + "# + .unindent(); + + let registry = Arc::new(LanguageRegistry::test()); + let language = Arc::new(rust_lang()); + registry.add(language.clone()); + let mut buffer = Buffer::new(0, 0, text); + + let mut syntax_map = SyntaxMap::new(); + syntax_map.set_language_registry(registry.clone()); + syntax_map.reparse(language.clone(), &buffer); + + let mut reference_syntax_map = SyntaxMap::new(); + reference_syntax_map.set_language_registry(registry.clone()); + + for i in 0..operations { + buffer.randomly_edit(&mut rng, 2); + log::info!("text:\n{}", buffer.text()); + + syntax_map.reparse(language.clone(), &buffer); + + reference_syntax_map.clear(); + reference_syntax_map.reparse(language.clone(), &buffer); + assert_eq!( + syntax_map.layers(&buffer).len(), + reference_syntax_map.layers(&buffer).len(), + "wrong number of layers after performing edit {i}" + ); + } + + for i in 0..operations { + let i = operations - i - 1; + buffer.undo(); + log::info!("undoing operation {}", i); + log::info!("text:\n{}", buffer.text()); + + syntax_map.reparse(language.clone(), &buffer); + + reference_syntax_map.clear(); + reference_syntax_map.reparse(language.clone(), &buffer); + assert_eq!( + syntax_map.layers(&buffer).len(), + reference_syntax_map.layers(&buffer).len(), + "wrong number of layers after undoing edit {i}" + ); + } + + let layers = syntax_map.layers(&buffer); + let reference_layers = reference_syntax_map.layers(&buffer); + for (edited_layer, reference_layer) in layers.into_iter().zip(reference_layers.into_iter()) + { + assert_eq!(edited_layer.2.to_sexp(), reference_layer.2.to_sexp()); + assert_eq!(edited_layer.2.range(), reference_layer.2.range()); + } + } + fn test_edit_sequence(steps: &[&str]) -> (Buffer, SyntaxMap) { let registry = Arc::new(LanguageRegistry::test()); let language = Arc::new(rust_lang()); From 1746ec573a9c51b63ead5674437adcba70039937 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 29 Aug 2022 16:15:53 -0700 Subject: [PATCH 20/22] Check invariants after interpolating tree in random syntax map test --- crates/language/src/syntax_map.rs | 124 ++++++++++++++++++++++++++++-- 1 file changed, 119 insertions(+), 5 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index f7d135da1c7f04c06dba120205ceeb529897c88d..414516f824ff93c380d4f864ffd475467a59f663 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -159,10 +159,6 @@ impl SyntaxMap { #[cfg(test)] pub fn reparse(&mut self, language: Arc, text: &BufferSnapshot) { - if !self.interpolated_version.observed_all(&text.version) { - self.interpolate(text); - } - self.snapshot.reparse( &self.parsed_version, text, @@ -170,6 +166,7 @@ impl SyntaxMap { language, ); self.parsed_version = text.version.clone(); + self.interpolated_version = text.version.clone(); } pub fn did_parse(&mut self, snapshot: SyntaxSnapshot, version: clock::Global) { @@ -1580,7 +1577,8 @@ mod tests { ); } "# - .unindent(); + .unindent() + .repeat(2); let registry = Arc::new(LanguageRegistry::test()); let language = Arc::new(rust_lang()); @@ -1594,10 +1592,18 @@ mod tests { let mut reference_syntax_map = SyntaxMap::new(); reference_syntax_map.set_language_registry(registry.clone()); + log::info!("initial text:\n{}", buffer.text()); + for i in 0..operations { + let prev_buffer = buffer.snapshot(); + let prev_syntax_map = syntax_map.snapshot(); + buffer.randomly_edit(&mut rng, 2); log::info!("text:\n{}", buffer.text()); + syntax_map.interpolate(&buffer); + check_interpolation(&prev_syntax_map, &syntax_map, &prev_buffer, &buffer); + syntax_map.reparse(language.clone(), &buffer); reference_syntax_map.clear(); @@ -1615,6 +1621,7 @@ mod tests { log::info!("undoing operation {}", i); log::info!("text:\n{}", buffer.text()); + syntax_map.interpolate(&buffer); syntax_map.reparse(language.clone(), &buffer); reference_syntax_map.clear(); @@ -1635,6 +1642,113 @@ mod tests { } } + fn check_interpolation( + old_syntax_map: &SyntaxSnapshot, + new_syntax_map: &SyntaxSnapshot, + old_buffer: &BufferSnapshot, + new_buffer: &BufferSnapshot, + ) { + let edits = new_buffer + .edits_since::(&old_buffer.version()) + .collect::>(); + + for (old_layer, new_layer) in old_syntax_map + .layers + .iter() + .zip(new_syntax_map.layers.iter()) + { + assert_eq!(old_layer.range, new_layer.range); + let old_start_byte = old_layer.range.start.to_offset(old_buffer); + let new_start_byte = new_layer.range.start.to_offset(new_buffer); + let old_start_point = old_layer.range.start.to_point(old_buffer).to_ts_point(); + let new_start_point = new_layer.range.start.to_point(new_buffer).to_ts_point(); + let old_node = old_layer + .tree + .root_node_with_offset(old_start_byte, old_start_point); + let new_node = new_layer + .tree + .root_node_with_offset(new_start_byte, new_start_point); + check_node_edits( + old_layer.depth, + &old_layer.range, + old_node, + new_node, + old_buffer, + new_buffer, + &edits, + ); + } + + fn check_node_edits( + depth: usize, + range: &Range, + old_node: Node, + new_node: Node, + old_buffer: &BufferSnapshot, + new_buffer: &BufferSnapshot, + edits: &[text::Edit], + ) { + assert_eq!(old_node.kind(), new_node.kind()); + + let old_range = old_node.byte_range(); + let new_range = new_node.byte_range(); + + let is_edited = edits + .iter() + .any(|edit| edit.new.start < new_range.end && edit.new.end > new_range.start); + if is_edited { + assert!( + new_node.has_changes(), + concat!( + "failed to mark node as edited.\n", + "layer depth: {}, old layer range: {:?}, new layer range: {:?},\n", + "node kind: {}, old node range: {:?}, new node range: {:?}", + ), + depth, + range.to_offset(old_buffer), + range.to_offset(new_buffer), + new_node.kind(), + old_range, + new_range, + ); + } + + if !new_node.has_changes() { + assert_eq!( + old_buffer + .text_for_range(old_range.clone()) + .collect::(), + new_buffer + .text_for_range(new_range.clone()) + .collect::(), + concat!( + "mismatched text for node\n", + "layer depth: {}, old layer range: {:?}, new layer range: {:?},\n", + "node kind: {}, old node range:{:?}, new node range:{:?}", + ), + depth, + range.to_offset(old_buffer), + range.to_offset(new_buffer), + new_node.kind(), + old_range, + new_range, + ); + } + + for i in 0..new_node.child_count() { + check_node_edits( + depth, + range, + old_node.child(i).unwrap(), + new_node.child(i).unwrap(), + old_buffer, + new_buffer, + edits, + ) + } + } + } + fn test_edit_sequence(steps: &[&str]) -> (Buffer, SyntaxMap) { let registry = Arc::new(LanguageRegistry::test()); let language = Arc::new(rust_lang()); From 74fd348d22c83ae649e8ae5b6d8e1a931fa3739d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 29 Aug 2022 16:51:31 -0700 Subject: [PATCH 21/22] Add Buffer::anchored_edits_since method This method returns the anchor range associated with each edit. The anchor ranges allow you to determine how each edit interacts with an existing anchor range that the edit has touched. --- crates/text/src/text.rs | 76 +++++++++++++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 15 deletions(-) diff --git a/crates/text/src/text.rs b/crates/text/src/text.rs index 39812740fb4ba8a5fc001b3b1ab4a86254ea224b..a7736cc8cc86d548f2fa7e7dcb58705850308ad5 100644 --- a/crates/text/src/text.rs +++ b/crates/text/src/text.rs @@ -382,6 +382,7 @@ struct Edits<'a, D: TextDimension, F: FnMut(&FragmentSummary) -> bool> { old_end: D, new_end: D, range: Range<(&'a Locator, usize)>, + buffer_id: u64, } #[derive(Clone, Debug, Default, Eq, PartialEq)] @@ -1917,11 +1918,33 @@ impl BufferSnapshot { self.edits_since_in_range(since, Anchor::MIN..Anchor::MAX) } + pub fn anchored_edits_since<'a, D>( + &'a self, + since: &'a clock::Global, + ) -> impl 'a + Iterator, Range)> + where + D: TextDimension + Ord, + { + self.anchored_edits_since_in_range(since, Anchor::MIN..Anchor::MAX) + } + pub fn edits_since_in_range<'a, D>( &'a self, since: &'a clock::Global, range: Range, ) -> impl 'a + Iterator> + where + D: TextDimension + Ord, + { + self.anchored_edits_since_in_range(since, range) + .map(|item| item.0) + } + + pub fn anchored_edits_since_in_range<'a, D>( + &'a self, + since: &'a clock::Global, + range: Range, + ) -> impl 'a + Iterator, Range)> where D: TextDimension + Ord, { @@ -1961,6 +1984,7 @@ impl BufferSnapshot { old_end: Default::default(), new_end: Default::default(), range: (start_fragment_id, range.start.offset)..(end_fragment_id, range.end.offset), + buffer_id: self.remote_id, } } } @@ -2019,10 +2043,10 @@ impl<'a> RopeBuilder<'a> { } impl<'a, D: TextDimension + Ord, F: FnMut(&FragmentSummary) -> bool> Iterator for Edits<'a, D, F> { - type Item = Edit; + type Item = (Edit, Range); fn next(&mut self) -> Option { - let mut pending_edit: Option> = None; + let mut pending_edit: Option = None; let cursor = self.fragments_cursor.as_mut()?; while let Some(fragment) = cursor.item() { @@ -2041,11 +2065,25 @@ impl<'a, D: TextDimension + Ord, F: FnMut(&FragmentSummary) -> bool> Iterator fo if pending_edit .as_ref() - .map_or(false, |change| change.new.end < self.new_end) + .map_or(false, |(change, _)| change.new.end < self.new_end) { break; } + let timestamp = fragment.insertion_timestamp.local(); + let start_anchor = Anchor { + timestamp, + offset: fragment.insertion_offset, + bias: Bias::Right, + buffer_id: Some(self.buffer_id), + }; + let end_anchor = Anchor { + timestamp, + offset: fragment.insertion_offset + fragment.len, + bias: Bias::Left, + buffer_id: Some(self.buffer_id), + }; + if !fragment.was_visible(self.since, self.undos) && fragment.visible { let mut visible_end = cursor.end(&None).visible; if fragment.id == *self.range.end.0 { @@ -2058,13 +2096,17 @@ impl<'a, D: TextDimension + Ord, F: FnMut(&FragmentSummary) -> bool> Iterator fo let fragment_summary = self.visible_cursor.summary(visible_end); let mut new_end = self.new_end.clone(); new_end.add_assign(&fragment_summary); - if let Some(pending_edit) = pending_edit.as_mut() { - pending_edit.new.end = new_end.clone(); + if let Some((edit, range)) = pending_edit.as_mut() { + edit.new.end = new_end.clone(); + range.end = end_anchor; } else { - pending_edit = Some(Edit { - old: self.old_end.clone()..self.old_end.clone(), - new: self.new_end.clone()..new_end.clone(), - }); + pending_edit = Some(( + Edit { + old: self.old_end.clone()..self.old_end.clone(), + new: self.new_end.clone()..new_end.clone(), + }, + start_anchor..end_anchor, + )); } self.new_end = new_end; @@ -2083,13 +2125,17 @@ impl<'a, D: TextDimension + Ord, F: FnMut(&FragmentSummary) -> bool> Iterator fo let fragment_summary = self.deleted_cursor.summary(deleted_end); let mut old_end = self.old_end.clone(); old_end.add_assign(&fragment_summary); - if let Some(pending_edit) = pending_edit.as_mut() { - pending_edit.old.end = old_end.clone(); + if let Some((edit, range)) = pending_edit.as_mut() { + edit.old.end = old_end.clone(); + range.end = end_anchor; } else { - pending_edit = Some(Edit { - old: self.old_end.clone()..old_end.clone(), - new: self.new_end.clone()..self.new_end.clone(), - }); + pending_edit = Some(( + Edit { + old: self.old_end.clone()..old_end.clone(), + new: self.new_end.clone()..self.new_end.clone(), + }, + start_anchor..end_anchor, + )); } self.old_end = old_end; From a38c6015db40a32361c15ae2d6faa9d916fe187f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 29 Aug 2022 16:52:58 -0700 Subject: [PATCH 22/22] Fix bugs in SyntaxMap::interpolate found by the randomized test --- crates/language/src/syntax_map.rs | 46 ++++++++++++------------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 414516f824ff93c380d4f864ffd475467a59f663..d1bf698e52a07c26c38cffc4576f287e416bb0fd 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -187,7 +187,7 @@ impl SyntaxSnapshot { pub fn interpolate(&mut self, from_version: &clock::Global, text: &BufferSnapshot) { let edits = text - .edits_since::<(usize, Point)>(&from_version) + .anchored_edits_since::<(usize, Point)>(&from_version) .collect::>(); if edits.is_empty() { return; @@ -195,15 +195,20 @@ impl SyntaxSnapshot { let mut layers = SumTree::new(); let mut first_edit_ix_for_depth = 0; + let mut prev_depth = 0; let mut cursor = self.layers.cursor::(); cursor.next(text); 'outer: loop { let depth = cursor.end(text).max_depth; + if depth > prev_depth { + first_edit_ix_for_depth = 0; + prev_depth = depth; + } // Preserve any layers at this depth that precede the first edit. - if let Some(first_edit) = edits.get(first_edit_ix_for_depth) { - let target = DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)); + if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) { + let target = DepthAndMaxPosition(depth, edit_range.start); if target.cmp(&cursor.start(), text).is_gt() { let slice = cursor.slice(&target, Bias::Left, text); layers.push_tree(slice, text); @@ -211,14 +216,13 @@ impl SyntaxSnapshot { } // If this layer follows all of the edits, then preserve it and any // subsequent layers at this same depth. - else { + else if cursor.item().is_some() { let slice = cursor.slice( &DepthAndRange(depth + 1, Anchor::MIN..Anchor::MAX), Bias::Left, text, ); layers.push_tree(slice, text); - first_edit_ix_for_depth = 0; continue; }; @@ -227,19 +231,14 @@ impl SyntaxSnapshot { } else { break; }; + let (start_byte, start_point) = layer.range.start.summary::<(usize, Point)>(text); - let mut endpoints = text - .summaries_for_anchors::<(usize, Point), _>([&layer.range.start, &layer.range.end]); - let layer_range = endpoints.next().unwrap()..endpoints.next().unwrap(); - let start_byte = layer_range.start.0; - let start_point = layer_range.start.1; - let end_byte = layer_range.end.0; // Ignore edits that end before the start of this layer, and don't consider them // for any subsequent layers at this same depth. loop { - if let Some(edit) = edits.get(first_edit_ix_for_depth) { - if edit.new.end.0 < start_byte { + if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) { + if edit_range.end.cmp(&layer.range.start, text).is_le() { first_edit_ix_for_depth += 1; } else { break; @@ -249,21 +248,15 @@ impl SyntaxSnapshot { } } - let mut old_start_byte = start_byte; - if first_edit_ix_for_depth > 0 { - let edit = &edits[first_edit_ix_for_depth - 1]; - old_start_byte = edit.old.end.0 + (start_byte - edit.new.end.0); - } - let mut layer = layer.clone(); - for edit in &edits[first_edit_ix_for_depth..] { + for (edit, edit_range) in &edits[first_edit_ix_for_depth..] { // Ignore any edits that follow this layer. - if edit.new.start.0 > end_byte { + if edit_range.start.cmp(&layer.range.end, text).is_ge() { break; } // Apply any edits that intersect this layer to the layer's syntax tree. - let tree_edit = if edit.old.start.0 >= old_start_byte { + let tree_edit = if edit_range.start.cmp(&layer.range.start, text).is_ge() { tree_sitter::InputEdit { start_byte: edit.new.start.0 - start_byte, old_end_byte: edit.new.start.0 - start_byte @@ -1594,11 +1587,11 @@ mod tests { log::info!("initial text:\n{}", buffer.text()); - for i in 0..operations { + for _ in 0..operations { let prev_buffer = buffer.snapshot(); let prev_syntax_map = syntax_map.snapshot(); - buffer.randomly_edit(&mut rng, 2); + buffer.randomly_edit(&mut rng, 3); log::info!("text:\n{}", buffer.text()); syntax_map.interpolate(&buffer); @@ -1608,11 +1601,6 @@ mod tests { reference_syntax_map.clear(); reference_syntax_map.reparse(language.clone(), &buffer); - assert_eq!( - syntax_map.layers(&buffer).len(), - reference_syntax_map.layers(&buffer).len(), - "wrong number of layers after performing edit {i}" - ); } for i in 0..operations {