From be6ee3cbffccc7a1f5548094844317ccbe2d1047 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 3 Nov 2022 16:02:29 -0700 Subject: [PATCH 01/12] Start work on ERB language support --- Cargo.lock | 11 ++++++++ crates/language/src/language.rs | 30 ++++++++++++++------- crates/language/src/syntax_map.rs | 2 +- crates/zed/Cargo.toml | 1 + crates/zed/src/languages.rs | 1 + crates/zed/src/languages/erb/config.toml | 8 ++++++ crates/zed/src/languages/erb/injections.scm | 7 +++++ 7 files changed, 50 insertions(+), 10 deletions(-) create mode 100644 crates/zed/src/languages/erb/config.toml create mode 100644 crates/zed/src/languages/erb/injections.scm diff --git a/Cargo.lock b/Cargo.lock index f79a7b851d04598f00db12bf89d3fdc6fb35fa63..29c444bfee4e3316f4515394091cae545b768eef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6426,6 +6426,16 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "tree-sitter-embedded-template" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33817ade928c73a32d4f904a602321e09de9fc24b71d106f3b4b3f8ab30dcc38" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "tree-sitter-go" version = "0.19.1" @@ -7719,6 +7729,7 @@ dependencies = [ "tree-sitter-cpp", "tree-sitter-css", "tree-sitter-elixir", + "tree-sitter-embedded-template", "tree-sitter-go", "tree-sitter-html", "tree-sitter-json 0.20.0", diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 5abc89321cfe936de64ef4515c7f7a64b54e0035..4436ab416e2a7d1725f6b8843435ea886a800ebf 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -326,7 +326,13 @@ struct InjectionConfig { query: Query, content_capture_ix: u32, language_capture_ix: Option, - languages_by_pattern_ix: Vec>>, + patterns: Vec, +} + +#[derive(Default, Clone)] +struct InjectionPatternConfig { + language: Option>, + combined: bool, } struct BracketConfig { @@ -730,15 +736,21 @@ impl Language { ("content", &mut content_capture_ix), ], ); - let languages_by_pattern_ix = (0..query.pattern_count()) + let patterns = (0..query.pattern_count()) .map(|ix| { - query.property_settings(ix).iter().find_map(|setting| { - if setting.key.as_ref() == "language" { - return setting.value.clone(); - } else { - None + let mut config = InjectionPatternConfig::default(); + for setting in query.property_settings(ix) { + match setting.key.as_ref() { + "language" => { + config.language = setting.value.clone(); + } + "combined" => { + config.combined = true; + } + _ => {} } - }) + } + config }) .collect(); if let Some(content_capture_ix) = content_capture_ix { @@ -746,7 +758,7 @@ impl Language { query, language_capture_ix, content_capture_ix, - languages_by_pattern_ix, + patterns, }); } Ok(self) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 5dd9c483afff6d148fa8a96597fa60d46f044ec8..3eb15c9c5e3c87e601d5be3d39e4e065a8aa37a8 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -961,7 +961,7 @@ fn get_injections( } prev_match = Some((mat.pattern_index, content_range.clone())); - let language_name = config.languages_by_pattern_ix[mat.pattern_index] + let language_name = config.patterns[mat.pattern_index].language .as_ref() .map(|s| Cow::Borrowed(s.as_ref())) .or_else(|| { diff --git a/crates/zed/Cargo.toml b/crates/zed/Cargo.toml index cca40cbe5ae729145f20ab38368e1656479bcd18..3bfe10eedc6fb4cb45e8319a582fada27cbe7e04 100644 --- a/crates/zed/Cargo.toml +++ b/crates/zed/Cargo.toml @@ -95,6 +95,7 @@ tree-sitter-c = "0.20.1" tree-sitter-cpp = "0.20.0" tree-sitter-css = { git = "https://github.com/tree-sitter/tree-sitter-css", rev = "769203d0f9abe1a9a691ac2b9fe4bb4397a73c51" } tree-sitter-elixir = { git = "https://github.com/elixir-lang/tree-sitter-elixir", rev = "05e3631c6a0701c1fa518b0fee7be95a2ceef5e2" } +tree-sitter-embedded-template = "0.20.0" tree-sitter-go = { git = "https://github.com/tree-sitter/tree-sitter-go", rev = "aeb2f33b366fd78d5789ff104956ce23508b85db" } tree-sitter-json = { git = "https://github.com/tree-sitter/tree-sitter-json", rev = "137e1ce6a02698fc246cdb9c6b886ed1de9a1ed8" } tree-sitter-rust = "0.20.3" diff --git a/crates/zed/src/languages.rs b/crates/zed/src/languages.rs index c1e17a8e1aa5144550bc04d93516473aa874295f..76bb4394dd5c238963c1680512317c186f60448c 100644 --- a/crates/zed/src/languages.rs +++ b/crates/zed/src/languages.rs @@ -117,6 +117,7 @@ pub async fn init(languages: Arc, _executor: Arc) Some(CachedLspAdapter::new(html::HtmlLspAdapter).await), ), ("ruby", tree_sitter_ruby::language(), None), + ("erb", tree_sitter_embedded_template::language(), None), ] { languages.add(language(name, grammar, lsp_adapter)); } diff --git a/crates/zed/src/languages/erb/config.toml b/crates/zed/src/languages/erb/config.toml new file mode 100644 index 0000000000000000000000000000000000000000..280219a1191c1638190a6d52e1d7e717daa665fd --- /dev/null +++ b/crates/zed/src/languages/erb/config.toml @@ -0,0 +1,8 @@ +name = "ERB" +path_suffixes = ["erb"] +autoclose_before = ">})" +brackets = [ + { start = "<", end = ">", close = true, newline = true }, +] + +block_comment = ["<%#", "%>"] \ No newline at end of file diff --git a/crates/zed/src/languages/erb/injections.scm b/crates/zed/src/languages/erb/injections.scm new file mode 100644 index 0000000000000000000000000000000000000000..7a69a818ef31d7fa3822466209b08c15280c6f5b --- /dev/null +++ b/crates/zed/src/languages/erb/injections.scm @@ -0,0 +1,7 @@ +((code) @content + (#set! "language" "ruby") + (#set! "combined")) + +((content) @content + (#set! "language" "html") + (#set! "combined")) From 5efe2ed6d364aaf6a40370d13f3c62129889716a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 7 Nov 2022 14:45:17 -0800 Subject: [PATCH 02/12] Start work on handling combined injections in SyntaxMap --- Cargo.lock | 5 +- Cargo.toml | 2 +- crates/language/Cargo.toml | 1 + crates/language/src/language.rs | 19 ++ crates/language/src/syntax_map.rs | 343 +++++++++++++++++++++++------- 5 files changed, 288 insertions(+), 82 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 29c444bfee4e3316f4515394091cae545b768eef..e43c8473eabc81c453efa9454f2c73fb027595da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3005,6 +3005,7 @@ dependencies = [ "text", "theme", "tree-sitter", + "tree-sitter-embedded-template", "tree-sitter-html", "tree-sitter-javascript", "tree-sitter-json 0.19.0", @@ -6381,8 +6382,8 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.20.8" -source = "git+https://github.com/tree-sitter/tree-sitter?rev=366210ae925d7ea0891bc7a0c738f60c77c04d7b#366210ae925d7ea0891bc7a0c738f60c77c04d7b" +version = "0.20.9" +source = "git+https://github.com/tree-sitter/tree-sitter?rev=f0177f216e3f76a5f68e792b6f9e45fd32383eb6#f0177f216e3f76a5f68e792b6f9e45fd32383eb6" dependencies = [ "cc", "regex", diff --git a/Cargo.toml b/Cargo.toml index 205017da1fbc156543b143fc13238780767e7734..a46a56de58aa7e8dd00888db4d55ead52847e362 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,7 +65,7 @@ serde_json = { version = "1.0", features = ["preserve_order", "raw_value"] } rand = { version = "0.8" } [patch.crates-io] -tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "366210ae925d7ea0891bc7a0c738f60c77c04d7b" } +tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "f0177f216e3f76a5f68e792b6f9e45fd32383eb6" } async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" } # TODO - Remove when a version is released with this PR: https://github.com/servo/core-foundation-rs/pull/457 diff --git a/crates/language/Cargo.toml b/crates/language/Cargo.toml index 419c7a79a51114a207762af9aab91feec63cfd9d..6c074a2d75e0ef59523bd52dbfaf53bb563025d2 100644 --- a/crates/language/Cargo.toml +++ b/crates/language/Cargo.toml @@ -72,4 +72,5 @@ tree-sitter-rust = "*" tree-sitter-python = "*" tree-sitter-typescript = "*" tree-sitter-ruby = "*" +tree-sitter-embedded-template = "*" unindent = "0.1.7" diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 4436ab416e2a7d1725f6b8843435ea886a800ebf..5e9319b1289bb3ea0530dbade269f436706b328a 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -28,6 +28,7 @@ use std::{ any::Any, cell::RefCell, fmt::Debug, + hash::Hash, mem, ops::Range, path::{Path, PathBuf}, @@ -643,6 +644,10 @@ impl Language { self.adapter.clone() } + pub fn id(&self) -> Option { + self.grammar.as_ref().map(|g| g.id) + } + pub fn with_highlights_query(mut self, source: &str) -> Result { let grammar = self.grammar_mut(); grammar.highlights_query = Some(Query::new(grammar.ts_language, source)?); @@ -895,6 +900,20 @@ impl Language { } } +impl Hash for Language { + fn hash(&self, state: &mut H) { + self.id().hash(state) + } +} + +impl PartialEq for Language { + fn eq(&self, other: &Self) -> bool { + self.id().eq(&other.id()) + } +} + +impl Eq for Language {} + impl Debug for Language { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Language") diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 3eb15c9c5e3c87e601d5be3d39e4e065a8aa37a8..be735df9c089111b28c02d5abe0fb00637d6d2f2 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -1,4 +1,5 @@ use crate::{Grammar, InjectionConfig, Language, LanguageRegistry}; +use collections::HashMap; use lazy_static::lazy_static; use parking_lot::Mutex; use std::{ @@ -90,6 +91,7 @@ struct SyntaxLayer { range: Range, tree: tree_sitter::Tree, language: Arc, + combined: bool, } #[derive(Debug)] @@ -105,22 +107,39 @@ struct SyntaxLayerSummary { max_depth: usize, range: Range, last_layer_range: Range, + last_layer_language: Option, } #[derive(Clone, Debug)] -struct DepthAndRange(usize, Range); +struct SyntaxLayerPosition { + depth: usize, + range: Range, + language: Option, +} #[derive(Clone, Debug)] struct DepthAndMaxPosition(usize, Anchor); #[derive(Clone, Debug)] -struct DepthAndRangeOrMaxPosition(DepthAndRange, DepthAndMaxPosition); +struct SyntaxLayerPositionBeforeChange { + position: SyntaxLayerPosition, + change: DepthAndMaxPosition, +} struct ReparseStep { depth: usize, language: Arc, - ranges: Vec, range: Range, + included_ranges: Vec, + mode: ReparseMode, +} + +enum ReparseMode { + Single, + Combined { + parent_layer_range: Range, + parent_layer_changed_ranges: Vec>, + }, } #[derive(Debug, PartialEq, Eq)] @@ -225,7 +244,11 @@ impl SyntaxSnapshot { // subsequent layers at this same depth. else if cursor.item().is_some() { let slice = cursor.slice( - &DepthAndRange(depth + 1, Anchor::MIN..Anchor::MAX), + &SyntaxLayerPosition { + depth: depth + 1, + range: Anchor::MIN..Anchor::MAX, + language: None, + }, Bias::Left, text, ); @@ -320,28 +343,44 @@ impl SyntaxSnapshot { let mut changed_regions = ChangeRegionSet::default(); let mut queue = BinaryHeap::new(); + let mut combined_injection_ranges = HashMap::default(); queue.push(ReparseStep { depth: 0, language: language.clone(), - ranges: Vec::new(), + included_ranges: vec![tree_sitter::Range { + start_byte: 0, + end_byte: text.len(), + start_point: Point::zero().to_ts_point(), + end_point: text.max_point().to_ts_point(), + }], range: Anchor::MIN..Anchor::MAX, + mode: ReparseMode::Single, }); loop { let step = queue.pop(); - let (depth, range) = if let Some(step) = &step { - (step.depth, step.range.clone()) + let target = if let Some(step) = &step { + SyntaxLayerPosition { + depth: step.depth, + range: step.range.clone(), + language: step.language.id(), + } } else { - (max_depth + 1, Anchor::MAX..Anchor::MAX) + SyntaxLayerPosition { + depth: max_depth + 1, + range: Anchor::MAX..Anchor::MAX, + language: None, + } }; - let target = DepthAndRange(depth, range.clone()); let mut done = cursor.item().is_none(); while !done && target.cmp(&cursor.end(text), &text).is_gt() { done = true; - let bounded_target = - DepthAndRangeOrMaxPosition(target.clone(), changed_regions.start_position()); + let bounded_target = SyntaxLayerPositionBeforeChange { + position: target.clone(), + change: changed_regions.start_position(), + }; if bounded_target.cmp(&cursor.start(), &text).is_gt() { let slice = cursor.slice(&bounded_target, Bias::Left, text); if !slice.is_empty() { @@ -353,11 +392,7 @@ impl SyntaxSnapshot { } while target.cmp(&cursor.end(text), text).is_gt() { - let layer = if let Some(layer) = cursor.item() { - layer - } else { - break; - }; + let Some(layer) = cursor.item() else { break }; if changed_regions.intersects(&layer, text) { changed_regions.insert( @@ -378,70 +413,79 @@ impl SyntaxSnapshot { } } - let (ranges, language) = if let Some(step) = step { - (step.ranges, step.language) - } else { - break; - }; - - let start_point; - let start_byte; - let end_byte; - if let Some((first, last)) = ranges.first().zip(ranges.last()) { - start_point = first.start_point; - start_byte = first.start_byte; - end_byte = last.end_byte; - } else { - start_point = Point::zero().to_ts_point(); - start_byte = 0; - end_byte = text.len(); - }; + let Some(step) = step else { break }; + let (step_start_byte, step_start_point) = + step.range.start.summary::<(usize, Point)>(text); + let step_end_byte = step.range.end.to_offset(text); + let Some(grammar) = step.language.grammar.as_deref() else { continue }; let mut old_layer = cursor.item(); if let Some(layer) = old_layer { - if layer.range.to_offset(text) == (start_byte..end_byte) { + if layer.range.to_offset(text) == (step_start_byte..step_end_byte) + && layer.language.id() == step.language.id() + { cursor.next(&text); } else { old_layer = None; } } - let grammar = if let Some(grammar) = language.grammar.as_deref() { - grammar - } else { - continue; - }; + let mut combined = false; + let mut included_ranges = step.included_ranges; let tree; let changed_ranges; if let Some(old_layer) = old_layer { + if let ReparseMode::Combined { + parent_layer_changed_ranges, + .. + } = step.mode + { + combined = true; + included_ranges = splice_included_ranges( + old_layer.tree.included_ranges(), + &parent_layer_changed_ranges, + &included_ranges, + ); + } + tree = parse_text( grammar, text.as_rope(), + step_start_byte, + step_start_point, + included_ranges, Some(old_layer.tree.clone()), - ranges, ); changed_ranges = join_ranges( edits .iter() .map(|e| e.new.clone()) - .filter(|range| range.start < end_byte && range.end > start_byte), + .filter(|range| range.start < step_end_byte && range.end > step_start_byte), old_layer .tree .changed_ranges(&tree) - .map(|r| start_byte + r.start_byte..start_byte + r.end_byte), + .map(|r| step_start_byte + r.start_byte..step_start_byte + r.end_byte), ); } else { - tree = parse_text(grammar, text.as_rope(), None, ranges); - changed_ranges = vec![start_byte..end_byte]; + tree = parse_text( + grammar, + text.as_rope(), + step_start_byte, + step_start_point, + included_ranges, + None, + ); + changed_ranges = vec![step_start_byte..step_end_byte]; } layers.push( SyntaxLayer { - depth, - range, + depth: step.depth, + range: step.range, tree: tree.clone(), language: language.clone(), + combined, }, &text, ); @@ -450,11 +494,10 @@ impl SyntaxSnapshot { grammar.injection_config.as_ref().zip(registry.as_ref()), changed_ranges.is_empty(), ) { - let depth = depth + 1; for range in &changed_ranges { changed_regions.insert( ChangedRegion { - depth, + depth: step.depth + 1, range: text.anchor_before(range.start)..text.anchor_after(range.end), }, text, @@ -463,10 +506,11 @@ impl SyntaxSnapshot { get_injections( config, text, - tree.root_node_with_offset(start_byte, start_point), + tree.root_node_with_offset(step_start_byte, step_start_point.to_ts_point()), registry, - depth, + step.depth + 1, &changed_ranges, + &mut combined_injection_ranges, &mut queue, ); } @@ -547,7 +591,6 @@ impl SyntaxSnapshot { } }); - // let mut result = Vec::new(); cursor.next(buffer); std::iter::from_fn(move || { if let Some(layer) = cursor.item() { @@ -565,8 +608,6 @@ impl SyntaxSnapshot { None } }) - - // result } } @@ -892,14 +933,11 @@ fn join_ranges( fn parse_text( grammar: &Grammar, text: &Rope, - old_tree: Option, + start_byte: usize, + start_point: Point, mut ranges: Vec, + old_tree: Option, ) -> Tree { - let (start_byte, start_point) = ranges - .first() - .map(|range| (range.start_byte, Point::from_ts_point(range.start_point))) - .unwrap_or_default(); - for range in &mut ranges { range.start_byte -= start_byte; range.end_byte -= start_byte; @@ -934,13 +972,16 @@ fn get_injections( node: Node, language_registry: &LanguageRegistry, depth: usize, - query_ranges: &[Range], + changed_ranges: &[Range], + combined_injection_ranges: &mut HashMap, Vec>, queue: &mut BinaryHeap, ) -> bool { let mut result = false; let mut query_cursor = QueryCursorHandle::new(); let mut prev_match = None; - for query_range in query_ranges { + + combined_injection_ranges.clear(); + for query_range in changed_ranges { query_cursor.set_byte_range(query_range.start.saturating_sub(1)..query_range.end); for mat in query_cursor.matches(&config.query, node, TextProvider(text.as_rope())) { let content_ranges = mat @@ -961,7 +1002,9 @@ fn get_injections( } prev_match = Some((mat.pattern_index, content_range.clone())); - let language_name = config.patterns[mat.pattern_index].language + let combined = config.patterns[mat.pattern_index].combined; + let language_name = config.patterns[mat.pattern_index] + .language .as_ref() .map(|s| Cow::Borrowed(s.as_ref())) .or_else(|| { @@ -975,19 +1018,93 @@ fn get_injections( result = true; let range = text.anchor_before(content_range.start) ..text.anchor_after(content_range.end); - queue.push(ReparseStep { - depth, - language, - ranges: content_ranges, - range, - }) + if combined { + combined_injection_ranges + .entry(language.clone()) + .or_default() + .extend(content_ranges); + } else { + queue.push(ReparseStep { + depth, + language, + included_ranges: content_ranges, + range, + mode: ReparseMode::Single, + }); + } } } } } + + for (language, mut included_ranges) in combined_injection_ranges.drain() { + included_ranges.sort_unstable(); + let range = text.anchor_before(node.start_byte())..text.anchor_after(node.end_byte()); + queue.push(ReparseStep { + depth, + language, + range, + included_ranges, + mode: ReparseMode::Combined { + parent_layer_range: node.start_byte()..node.end_byte(), + parent_layer_changed_ranges: changed_ranges.to_vec(), + }, + }) + } + result } +fn splice_included_ranges( + mut ranges: Vec, + changed_ranges: &[Range], + new_ranges: &[tree_sitter::Range], +) -> Vec { + let mut changed_ranges = changed_ranges.into_iter().peekable(); + let mut new_ranges = new_ranges.into_iter().peekable(); + let mut ranges_ix = 0; + loop { + let new_range = new_ranges.peek(); + let mut changed_range = changed_ranges.peek(); + + // process changed ranges before any overlapping new ranges + if let Some((changed, new)) = changed_range.zip(new_range) { + if new.end_byte < changed.start { + changed_range = None; + } + } + + if let Some(changed) = changed_range { + let start_ix = ranges_ix + + match ranges[ranges_ix..].binary_search_by_key(&changed.start, |r| r.end_byte) { + Ok(ix) | Err(ix) => ix, + }; + let end_ix = ranges_ix + + match ranges[ranges_ix..].binary_search_by_key(&changed.end, |r| r.start_byte) { + Ok(ix) | Err(ix) => ix, + }; + if end_ix > start_ix { + ranges.splice(start_ix..end_ix, []); + } + changed_ranges.next(); + ranges_ix = start_ix; + } else if let Some(new_range) = new_range { + let ix = ranges_ix + + match ranges[ranges_ix..] + .binary_search_by_key(&new_range.start_byte, |r| r.start_byte) + { + Ok(ix) | Err(ix) => ix, + }; + ranges.insert(ix, **new_range); + new_ranges.next(); + ranges_ix = ix + 1; + } else { + break; + } + } + ranges +} + impl std::ops::Deref for SyntaxMap { type Target = SyntaxSnapshot; @@ -1017,14 +1134,22 @@ impl Ord for ReparseStep { Ord::cmp(&other.depth, &self.depth) .then_with(|| Ord::cmp(&range_b.start, &range_a.start)) .then_with(|| Ord::cmp(&range_a.end, &range_b.end)) + .then_with(|| self.language.id().cmp(&other.language.id())) } } impl ReparseStep { fn range(&self) -> Range { - let start = self.ranges.first().map_or(0, |r| r.start_byte); - let end = self.ranges.last().map_or(0, |r| r.end_byte); - start..end + if let ReparseMode::Combined { + parent_layer_range, .. + } = &self.mode + { + parent_layer_range.clone() + } else { + let start = self.included_ranges.first().map_or(0, |r| r.start_byte); + let end = self.included_ranges.last().map_or(0, |r| r.end_byte); + start..end + } } } @@ -1094,6 +1219,7 @@ impl Default for SyntaxLayerSummary { min_depth: 0, range: Anchor::MAX..Anchor::MIN, last_layer_range: Anchor::MIN..Anchor::MAX, + last_layer_language: None, } } } @@ -1114,14 +1240,15 @@ impl sum_tree::Summary for SyntaxLayerSummary { } } self.last_layer_range = other.last_layer_range.clone(); + self.last_layer_language = other.last_layer_language; } } -impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndRange { +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for SyntaxLayerPosition { fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering { - Ord::cmp(&self.0, &cursor_location.max_depth) + Ord::cmp(&self.depth, &cursor_location.max_depth) .then_with(|| { - self.1 + self.range .start .cmp(&cursor_location.last_layer_range.start, buffer) }) @@ -1129,8 +1256,9 @@ impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndRang cursor_location .last_layer_range .end - .cmp(&self.1.end, buffer) + .cmp(&self.range.end, buffer) }) + .then_with(|| self.language.cmp(&cursor_location.last_layer_language)) } } @@ -1141,12 +1269,14 @@ impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndMaxP } } -impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndRangeOrMaxPosition { +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> + for SyntaxLayerPositionBeforeChange +{ fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering { - if self.1.cmp(cursor_location, buffer).is_le() { + if self.change.cmp(cursor_location, buffer).is_le() { return Ordering::Less; } else { - self.0.cmp(cursor_location, buffer) + self.position.cmp(cursor_location, buffer) } } } @@ -1160,6 +1290,7 @@ impl sum_tree::Item for SyntaxLayer { max_depth: self.depth, range: self.range.clone(), last_layer_range: self.range.clone(), + last_layer_language: self.language.id(), } } } @@ -1246,6 +1377,60 @@ mod tests { use unindent::Unindent as _; use util::test::marked_text_ranges; + #[test] + fn test_splice_included_ranges() { + let ranges = vec![ts_range(20..30), ts_range(50..60), ts_range(80..90)]; + + let new_ranges = splice_included_ranges( + ranges.clone(), + &[54..56, 58..68], + &[ts_range(50..54), ts_range(59..67)], + ); + assert_eq!( + new_ranges, + &[ + ts_range(20..30), + ts_range(50..54), + ts_range(59..67), + ts_range(80..90), + ] + ); + + let new_ranges = splice_included_ranges(ranges.clone(), &[70..71, 91..100], &[]); + assert_eq!( + new_ranges, + &[ts_range(20..30), ts_range(50..60), ts_range(80..90)] + ); + + let new_ranges = + splice_included_ranges(ranges.clone(), &[], &[ts_range(0..2), ts_range(70..75)]); + assert_eq!( + new_ranges, + &[ + ts_range(0..2), + ts_range(20..30), + ts_range(50..60), + ts_range(70..75), + ts_range(80..90) + ] + ); + + fn ts_range(range: Range) -> tree_sitter::Range { + tree_sitter::Range { + start_byte: range.start, + start_point: tree_sitter::Point { + row: 0, + column: range.start, + }, + end_byte: range.end, + end_point: tree_sitter::Point { + row: 0, + column: range.end, + }, + } + } + } + #[gpui::test] fn test_syntax_map_layers_for_range() { let registry = Arc::new(LanguageRegistry::test()); From c838a7d973078f01843e7fe9f5ab20157382656d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 7 Nov 2022 16:58:12 -0800 Subject: [PATCH 03/12] Get combined injections basically working Co-authored-by: Nathan Sobo Co-authored-by: Mikayla Maki --- Cargo.lock | 2 +- Cargo.toml | 2 +- crates/language/src/syntax_map.rs | 530 +++++++++++++++++++----------- 3 files changed, 344 insertions(+), 190 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e43c8473eabc81c453efa9454f2c73fb027595da..bc127b24c40cb5a5f589edba88d7273b7c7288f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6383,7 +6383,7 @@ dependencies = [ [[package]] name = "tree-sitter" version = "0.20.9" -source = "git+https://github.com/tree-sitter/tree-sitter?rev=f0177f216e3f76a5f68e792b6f9e45fd32383eb6#f0177f216e3f76a5f68e792b6f9e45fd32383eb6" +source = "git+https://github.com/tree-sitter/tree-sitter?rev=da6e24de1751aef6a944adfcefb192b751c56f76#da6e24de1751aef6a944adfcefb192b751c56f76" dependencies = [ "cc", "regex", diff --git a/Cargo.toml b/Cargo.toml index a46a56de58aa7e8dd00888db4d55ead52847e362..8ac180fcc12156f7149305651ce5ba5e6a48f246 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,7 +65,7 @@ serde_json = { version = "1.0", features = ["preserve_order", "raw_value"] } rand = { version = "0.8" } [patch.crates-io] -tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "f0177f216e3f76a5f68e792b6f9e45fd32383eb6" } +tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "da6e24de1751aef6a944adfcefb192b751c56f76" } async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" } # TODO - Remove when a version is released with this PR: https://github.com/servo/core-foundation-rs/pull/457 diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index be735df9c089111b28c02d5abe0fb00637d6d2f2..7cfbd9de2fcf6dc900ec0231e97a84351df734d8 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -126,15 +126,15 @@ struct SyntaxLayerPositionBeforeChange { change: DepthAndMaxPosition, } -struct ReparseStep { +struct ParseStep { depth: usize, language: Arc, range: Range, included_ranges: Vec, - mode: ReparseMode, + mode: ParseMode, } -enum ReparseMode { +enum ParseMode { Single, Combined { parent_layer_range: Range, @@ -333,7 +333,7 @@ impl SyntaxSnapshot { from_version: &clock::Global, text: &BufferSnapshot, registry: Option>, - language: Arc, + root_language: Arc, ) { let edits = text.edits_since::(from_version).collect::>(); let max_depth = self.layers.summary().max_depth; @@ -344,9 +344,9 @@ impl SyntaxSnapshot { let mut changed_regions = ChangeRegionSet::default(); let mut queue = BinaryHeap::new(); let mut combined_injection_ranges = HashMap::default(); - queue.push(ReparseStep { + queue.push(ParseStep { depth: 0, - language: language.clone(), + language: root_language.clone(), included_ranges: vec![tree_sitter::Range { start_byte: 0, end_byte: text.len(), @@ -354,7 +354,7 @@ impl SyntaxSnapshot { end_point: text.max_point().to_ts_point(), }], range: Anchor::MIN..Anchor::MAX, - mode: ReparseMode::Single, + mode: ParseMode::Single, }); loop { @@ -394,7 +394,7 @@ impl SyntaxSnapshot { while target.cmp(&cursor.end(text), text).is_gt() { let Some(layer) = cursor.item() else { break }; - if changed_regions.intersects(&layer, text) { + if changed_regions.intersects(&layer, text) && !layer.combined { changed_regions.insert( ChangedRegion { depth: layer.depth + 1, @@ -430,18 +430,17 @@ impl SyntaxSnapshot { } } - let mut combined = false; + let combined = matches!(step.mode, ParseMode::Combined { .. }); let mut included_ranges = step.included_ranges; let tree; let changed_ranges; if let Some(old_layer) = old_layer { - if let ReparseMode::Combined { + if let ParseMode::Combined { parent_layer_changed_ranges, .. } = step.mode { - combined = true; included_ranges = splice_included_ranges( old_layer.tree.included_ranges(), &parent_layer_changed_ranges, @@ -484,7 +483,7 @@ impl SyntaxSnapshot { depth: step.depth, range: step.range, tree: tree.clone(), - language: language.clone(), + language: step.language.clone(), combined, }, &text, @@ -974,13 +973,21 @@ fn get_injections( depth: usize, changed_ranges: &[Range], combined_injection_ranges: &mut HashMap, Vec>, - queue: &mut BinaryHeap, + queue: &mut BinaryHeap, ) -> bool { let mut result = false; let mut query_cursor = QueryCursorHandle::new(); let mut prev_match = None; combined_injection_ranges.clear(); + for pattern in &config.patterns { + if let (Some(language_name), true) = (pattern.language.as_ref(), pattern.combined) { + if let Some(language) = language_registry.get_language(language_name) { + combined_injection_ranges.insert(language, Vec::new()); + } + } + } + for query_range in changed_ranges { query_cursor.set_byte_range(query_range.start.saturating_sub(1)..query_range.end); for mat in query_cursor.matches(&config.query, node, TextProvider(text.as_rope())) { @@ -1020,16 +1027,16 @@ fn get_injections( ..text.anchor_after(content_range.end); if combined { combined_injection_ranges - .entry(language.clone()) - .or_default() + .get_mut(&language.clone()) + .unwrap() .extend(content_ranges); } else { - queue.push(ReparseStep { + queue.push(ParseStep { depth, language, included_ranges: content_ranges, range, - mode: ReparseMode::Single, + mode: ParseMode::Single, }); } } @@ -1040,12 +1047,12 @@ fn get_injections( for (language, mut included_ranges) in combined_injection_ranges.drain() { included_ranges.sort_unstable(); let range = text.anchor_before(node.start_byte())..text.anchor_after(node.end_byte()); - queue.push(ReparseStep { + queue.push(ParseStep { depth, language, range, included_ranges, - mode: ReparseMode::Combined { + mode: ParseMode::Combined { parent_layer_range: node.start_byte()..node.end_byte(), parent_layer_changed_ranges: changed_ranges.to_vec(), }, @@ -1081,7 +1088,8 @@ fn splice_included_ranges( }; let end_ix = ranges_ix + match ranges[ranges_ix..].binary_search_by_key(&changed.end, |r| r.start_byte) { - Ok(ix) | Err(ix) => ix, + Ok(ix) => ix + 1, + Err(ix) => ix, }; if end_ix > start_ix { ranges.splice(start_ix..end_ix, []); @@ -1113,21 +1121,21 @@ impl std::ops::Deref for SyntaxMap { } } -impl PartialEq for ReparseStep { +impl PartialEq for ParseStep { fn eq(&self, _: &Self) -> bool { false } } -impl Eq for ReparseStep {} +impl Eq for ParseStep {} -impl PartialOrd for ReparseStep { +impl PartialOrd for ParseStep { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(&other)) } } -impl Ord for ReparseStep { +impl Ord for ParseStep { fn cmp(&self, other: &Self) -> Ordering { let range_a = self.range(); let range_b = other.range(); @@ -1138,9 +1146,9 @@ impl Ord for ReparseStep { } } -impl ReparseStep { +impl ParseStep { fn range(&self) -> Range { - if let ReparseMode::Combined { + if let ParseMode::Combined { parent_layer_range, .. } = &self.mode { @@ -1415,6 +1423,9 @@ mod tests { ] ); + let new_ranges = splice_included_ranges(ranges.clone(), &[30..50], &[ts_range(25..55)]); + assert_eq!(new_ranges, &[ts_range(25..55), ts_range(80..90)]); + fn ts_range(range: Range) -> tree_sitter::Range { tree_sitter::Range { start_byte: range.start, @@ -1530,21 +1541,24 @@ mod tests { #[gpui::test] fn test_typing_multiple_new_injections() { - let (buffer, syntax_map) = test_edit_sequence(&[ - "fn a() { dbg }", - "fn a() { dbg«!» }", - "fn a() { dbg!«()» }", - "fn a() { dbg!(«b») }", - "fn a() { dbg!(b«.») }", - "fn a() { dbg!(b.«c») }", - "fn a() { dbg!(b.c«()») }", - "fn a() { dbg!(b.c(«vec»)) }", - "fn a() { dbg!(b.c(vec«!»)) }", - "fn a() { dbg!(b.c(vec!«[]»)) }", - "fn a() { dbg!(b.c(vec![«d»])) }", - "fn a() { dbg!(b.c(vec![d«.»])) }", - "fn a() { dbg!(b.c(vec![d.«e»])) }", - ]); + let (buffer, syntax_map) = test_edit_sequence( + "Rust", + &[ + "fn a() { dbg }", + "fn a() { dbg«!» }", + "fn a() { dbg!«()» }", + "fn a() { dbg!(«b») }", + "fn a() { dbg!(b«.») }", + "fn a() { dbg!(b.«c») }", + "fn a() { dbg!(b.c«()») }", + "fn a() { dbg!(b.c(«vec»)) }", + "fn a() { dbg!(b.c(vec«!»)) }", + "fn a() { dbg!(b.c(vec!«[]»)) }", + "fn a() { dbg!(b.c(vec![«d»])) }", + "fn a() { dbg!(b.c(vec![d«.»])) }", + "fn a() { dbg!(b.c(vec![d.«e»])) }", + ], + ); assert_capture_ranges( &syntax_map, @@ -1556,29 +1570,32 @@ mod tests { #[gpui::test] fn test_pasting_new_injection_line_between_others() { - let (buffer, syntax_map) = test_edit_sequence(&[ - " - fn a() { - b!(B {}); - c!(C {}); - d!(D {}); - e!(E {}); - f!(F {}); - g!(G {}); - } - ", - " - fn a() { - b!(B {}); - c!(C {}); - d!(D {}); - « h!(H {}); - » e!(E {}); - f!(F {}); - g!(G {}); - } - ", - ]); + let (buffer, syntax_map) = test_edit_sequence( + "Rust", + &[ + " + fn a() { + b!(B {}); + c!(C {}); + d!(D {}); + e!(E {}); + f!(F {}); + g!(G {}); + } + ", + " + fn a() { + b!(B {}); + c!(C {}); + d!(D {}); + « h!(H {}); + » e!(E {}); + f!(F {}); + g!(G {}); + } + ", + ], + ); assert_capture_ranges( &syntax_map, @@ -1600,28 +1617,31 @@ mod tests { #[gpui::test] fn test_joining_injections_with_child_injections() { - let (buffer, syntax_map) = test_edit_sequence(&[ - " - fn a() { - b!( - c![one.two.three], - d![four.five.six], - ); - e!( - f![seven.eight], - ); - } - ", - " - fn a() { - b!( - c![one.two.three], - d![four.five.six], - ˇ f![seven.eight], - ); - } - ", - ]); + let (buffer, syntax_map) = test_edit_sequence( + "Rust", + &[ + " + fn a() { + b!( + c![one.two.three], + d![four.five.six], + ); + e!( + f![seven.eight], + ); + } + ", + " + fn a() { + b!( + c![one.two.three], + d![four.five.six], + ˇ f![seven.eight], + ); + } + ", + ], + ); assert_capture_ranges( &syntax_map, @@ -1641,128 +1661,193 @@ mod tests { #[gpui::test] fn test_editing_edges_of_injection() { - test_edit_sequence(&[ - " - fn a() { - b!(c!()) - } - ", - " - fn a() { - «d»!(c!()) - } - ", - " - fn a() { - «e»d!(c!()) - } - ", - " - fn a() { - ed!«[»c!()«]» - } + test_edit_sequence( + "Rust", + &[ + " + fn a() { + b!(c!()) + } + ", + " + fn a() { + «d»!(c!()) + } + ", + " + fn a() { + «e»d!(c!()) + } + ", + " + fn a() { + ed!«[»c!()«]» + } ", - ]); + ], + ); } #[gpui::test] fn test_edits_preceding_and_intersecting_injection() { - test_edit_sequence(&[ - // - "const aaaaaaaaaaaa: B = c!(d(e.f));", - "const aˇa: B = c!(d(eˇ));", - ]); + test_edit_sequence( + "Rust", + &[ + // + "const aaaaaaaaaaaa: B = c!(d(e.f));", + "const aˇa: B = c!(d(eˇ));", + ], + ); } #[gpui::test] fn test_non_local_changes_create_injections() { - test_edit_sequence(&[ - " - // a! { - static B: C = d; - // } - ", - " - ˇa! { - static B: C = d; - ˇ} - ", - ]); + test_edit_sequence( + "Rust", + &[ + " + // a! { + static B: C = d; + // } + ", + " + ˇa! { + static B: C = d; + ˇ} + ", + ], + ); } #[gpui::test] fn test_creating_many_injections_in_one_edit() { - test_edit_sequence(&[ - " - fn a() { - one(Two::three(3)); - four(Five::six(6)); - seven(Eight::nine(9)); - } - ", - " - fn a() { - one«!»(Two::three(3)); - four«!»(Five::six(6)); - seven«!»(Eight::nine(9)); - } - ", - " - fn a() { - one!(Two::three«!»(3)); - four!(Five::six«!»(6)); - seven!(Eight::nine«!»(9)); - } - ", - ]); + test_edit_sequence( + "Rust", + &[ + " + fn a() { + one(Two::three(3)); + four(Five::six(6)); + seven(Eight::nine(9)); + } + ", + " + fn a() { + one«!»(Two::three(3)); + four«!»(Five::six(6)); + seven«!»(Eight::nine(9)); + } + ", + " + fn a() { + one!(Two::three«!»(3)); + four!(Five::six«!»(6)); + seven!(Eight::nine«!»(9)); + } + ", + ], + ); } #[gpui::test] fn test_editing_across_injection_boundary() { - test_edit_sequence(&[ - " - fn one() { - two(); - three!( - three.four, - five.six, - ); - } - ", - " - fn one() { - two(); - th«irty_five![» - three.four, - five.six, - « seven.eight, - ];» - } - ", - ]); + test_edit_sequence( + "Rust", + &[ + " + fn one() { + two(); + three!( + three.four, + five.six, + ); + } + ", + " + fn one() { + two(); + th«irty_five![» + three.four, + five.six, + « seven.eight, + ];» + } + ", + ], + ); } #[gpui::test] fn test_removing_injection_by_replacing_across_boundary() { - test_edit_sequence(&[ - " - fn one() { - two!( - three.four, - ); - } - ", + test_edit_sequence( + "Rust", + &[ + " + fn one() { + two!( + three.four, + ); + } + ", + " + fn one() { + t«en + .eleven( + twelve, + » + three.four, + ); + } + ", + ], + ); + } + + #[gpui::test] + fn test_combined_injections() { + let (buffer, syntax_map) = test_edit_sequence( + "ERB", + &[ + " + + <% if @one %> +
+ <% else %> +
+ <% end %> +
+ + ", + " + + <% if @one %> +
+ ˇ else ˇ +
+ <% end %> +
+ + ", + " + + <% if @one «;» end %> +
+ + ", + ], + ); + + assert_capture_ranges( + &syntax_map, + &buffer, + &["tag", "ivar"], " - fn one() { - t«en - .eleven( - twelve, - » - three.four, - ); - } + <«body»> + <% if «@one» ; end %> + + ", - ]); + ); } #[gpui::test(iterations = 100)] @@ -1952,10 +2037,13 @@ mod tests { } } - fn test_edit_sequence(steps: &[&str]) -> (Buffer, SyntaxMap) { + fn test_edit_sequence(language_name: &str, steps: &[&str]) -> (Buffer, SyntaxMap) { let registry = Arc::new(LanguageRegistry::test()); - let language = Arc::new(rust_lang()); - registry.add(language.clone()); + registry.add(Arc::new(rust_lang())); + registry.add(Arc::new(ruby_lang())); + registry.add(Arc::new(html_lang())); + registry.add(Arc::new(erb_lang())); + let language = registry.get_language(language_name).unwrap(); let mut buffer = Buffer::new(0, 0, Default::default()); let mut mutated_syntax_map = SyntaxMap::new(); @@ -2001,6 +2089,72 @@ mod tests { (buffer, mutated_syntax_map) } + fn html_lang() -> Language { + Language::new( + LanguageConfig { + name: "HTML".into(), + path_suffixes: vec!["html".to_string()], + ..Default::default() + }, + Some(tree_sitter_html::language()), + ) + .with_highlights_query( + r#" + (tag_name) @tag + (erroneous_end_tag_name) @tag + (attribute_name) @property + "#, + ) + .unwrap() + } + + fn ruby_lang() -> Language { + Language::new( + LanguageConfig { + name: "Ruby".into(), + path_suffixes: vec!["rb".to_string()], + ..Default::default() + }, + Some(tree_sitter_ruby::language()), + ) + .with_highlights_query( + r#" + ["if" "do" "else" "end"] @keyword + (instance_variable) @ivar + "#, + ) + .unwrap() + } + + fn erb_lang() -> Language { + Language::new( + LanguageConfig { + name: "ERB".into(), + path_suffixes: vec!["erb".to_string()], + ..Default::default() + }, + Some(tree_sitter_embedded_template::language()), + ) + .with_highlights_query( + r#" + ["<%" "%>"] @keyword + "#, + ) + .unwrap() + .with_injection_query( + r#" + ((code) @content + (#set! "language" "ruby") + (#set! "combined")) + + ((content) @content + (#set! "language" "html") + (#set! "combined")) + "#, + ) + .unwrap() + } + fn rust_lang() -> Language { Language::new( LanguageConfig { From 86f51ade60f5b7e56c2f03b25163135d4f921809 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 7 Nov 2022 17:32:15 -0800 Subject: [PATCH 04/12] Fix panic in handling edits to combined injections --- crates/language/src/syntax_map.rs | 41 +++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 7cfbd9de2fcf6dc900ec0231e97a84351df734d8..11811b999fb6a5b4c8bf2cfe1a458e9c7f0b4ac5 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -1082,15 +1082,34 @@ fn splice_included_ranges( } if let Some(changed) = changed_range { - let start_ix = ranges_ix + let mut start_ix = ranges_ix + match ranges[ranges_ix..].binary_search_by_key(&changed.start, |r| r.end_byte) { Ok(ix) | Err(ix) => ix, }; - let end_ix = ranges_ix + let mut end_ix = ranges_ix + match ranges[ranges_ix..].binary_search_by_key(&changed.end, |r| r.start_byte) { Ok(ix) => ix + 1, Err(ix) => ix, }; + + // If there are empty ranges, then there may be multiple ranges with the same + // start or end. Expand the splice to include any adjacent ranges. That touch + // the changed range. + while start_ix > 0 { + if ranges[start_ix - 1].end_byte == changed.start { + start_ix -= 1; + } else { + break; + } + } + while let Some(range) = ranges.get(end_ix) { + if range.start_byte == changed.end { + end_ix += 1; + } else { + break; + } + } + if end_ix > start_ix { ranges.splice(start_ix..end_ix, []); } @@ -1850,6 +1869,24 @@ mod tests { ); } + #[gpui::test] + fn test_combined_injections_empty_ranges() { + test_edit_sequence( + "ERB", + &[ + " + <% if @one %> + <% else %> + <% end %> + ", + " + <% if @one %> + ˇ<% end %> + ", + ], + ); + } + #[gpui::test(iterations = 100)] fn test_random_syntax_map_edits(mut rng: StdRng) { let operations = env::var("OPERATIONS") From ea42bc3c9b960bdb866b3c382d3277585bcd0e0a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 8 Nov 2022 10:36:44 -0800 Subject: [PATCH 05/12] Rename some sum_tree seek targets in SyntaxMap --- crates/language/src/syntax_map.rs | 66 +++++++++++++++++-------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 11811b999fb6a5b4c8bf2cfe1a458e9c7f0b4ac5..0a8919d6a348631137927cfb9d681bdad70e9f1f 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -91,7 +91,6 @@ struct SyntaxLayer { range: Range, tree: tree_sitter::Tree, language: Arc, - combined: bool, } #[derive(Debug)] @@ -118,12 +117,15 @@ struct SyntaxLayerPosition { } #[derive(Clone, Debug)] -struct DepthAndMaxPosition(usize, Anchor); +struct ChangeStartPosition { + depth: usize, + position: Anchor, +} #[derive(Clone, Debug)] struct SyntaxLayerPositionBeforeChange { position: SyntaxLayerPosition, - change: DepthAndMaxPosition, + change: ChangeStartPosition, } struct ParseStep { @@ -234,9 +236,12 @@ impl SyntaxSnapshot { // Preserve any layers at this depth that precede the first edit. if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) { - let target = DepthAndMaxPosition(depth, edit_range.start); - if target.cmp(&cursor.start(), text).is_gt() { - let slice = cursor.slice(&target, Bias::Left, text); + let position = ChangeStartPosition { + depth, + position: edit_range.start, + }; + if position.cmp(&cursor.start(), text).is_gt() { + let slice = cursor.slice(&position, Bias::Left, text); layers.push_tree(slice, text); } } @@ -359,7 +364,7 @@ impl SyntaxSnapshot { loop { let step = queue.pop(); - let target = if let Some(step) = &step { + let position = if let Some(step) = &step { SyntaxLayerPosition { depth: step.depth, range: step.range.clone(), @@ -374,15 +379,15 @@ impl SyntaxSnapshot { }; let mut done = cursor.item().is_none(); - while !done && target.cmp(&cursor.end(text), &text).is_gt() { + while !done && position.cmp(&cursor.end(text), &text).is_gt() { done = true; - let bounded_target = SyntaxLayerPositionBeforeChange { - position: target.clone(), + let bounded_position = SyntaxLayerPositionBeforeChange { + position: position.clone(), change: changed_regions.start_position(), }; - if bounded_target.cmp(&cursor.start(), &text).is_gt() { - let slice = cursor.slice(&bounded_target, Bias::Left, text); + if bounded_position.cmp(&cursor.start(), &text).is_gt() { + let slice = cursor.slice(&bounded_position, Bias::Left, text); if !slice.is_empty() { layers.push_tree(slice, &text); if changed_regions.prune(cursor.end(text), text) { @@ -391,10 +396,10 @@ impl SyntaxSnapshot { } } - while target.cmp(&cursor.end(text), text).is_gt() { + while position.cmp(&cursor.end(text), text).is_gt() { let Some(layer) = cursor.item() else { break }; - if changed_regions.intersects(&layer, text) && !layer.combined { + if changed_regions.intersects(&layer, text) { changed_regions.insert( ChangedRegion { depth: layer.depth + 1, @@ -430,11 +435,9 @@ impl SyntaxSnapshot { } } - let combined = matches!(step.mode, ParseMode::Combined { .. }); - let mut included_ranges = step.included_ranges; - let tree; let changed_ranges; + let mut included_ranges = step.included_ranges; if let Some(old_layer) = old_layer { if let ParseMode::Combined { parent_layer_changed_ranges, @@ -484,7 +487,6 @@ impl SyntaxSnapshot { range: step.range, tree: tree.clone(), language: step.language.clone(), - combined, }, &text, ); @@ -1074,7 +1076,8 @@ fn splice_included_ranges( let new_range = new_ranges.peek(); let mut changed_range = changed_ranges.peek(); - // process changed ranges before any overlapping new ranges + // Remove ranges that have changed before inserting any new ranges + // into those ranges. if let Some((changed, new)) = changed_range.zip(new_range) { if new.end_byte < changed.start { changed_range = None; @@ -1093,7 +1096,7 @@ fn splice_included_ranges( }; // If there are empty ranges, then there may be multiple ranges with the same - // start or end. Expand the splice to include any adjacent ranges. That touch + // start or end. Expand the splice to include any adjacent ranges that touch // the changed range. while start_ix > 0 { if ranges[start_ix - 1].end_byte == changed.start { @@ -1191,12 +1194,17 @@ impl ChangedRegion { } impl ChangeRegionSet { - fn start_position(&self) -> DepthAndMaxPosition { - self.0 - .first() - .map_or(DepthAndMaxPosition(usize::MAX, Anchor::MAX), |region| { - DepthAndMaxPosition(region.depth, region.range.start) - }) + fn start_position(&self) -> ChangeStartPosition { + self.0.first().map_or( + ChangeStartPosition { + depth: usize::MAX, + position: Anchor::MAX, + }, + |region| ChangeStartPosition { + depth: region.depth, + position: region.range.start, + }, + ) } fn intersects(&self, layer: &SyntaxLayer, text: &BufferSnapshot) -> bool { @@ -1289,10 +1297,10 @@ impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for SyntaxLayerP } } -impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndMaxPosition { +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for ChangeStartPosition { fn cmp(&self, cursor_location: &SyntaxLayerSummary, text: &BufferSnapshot) -> Ordering { - Ord::cmp(&self.0, &cursor_location.max_depth) - .then_with(|| self.1.cmp(&cursor_location.range.end, text)) + Ord::cmp(&self.depth, &cursor_location.max_depth) + .then_with(|| self.position.cmp(&cursor_location.range.end, text)) } } From 7dcd6c920fafb270aebdeb3785fe50259a0ecafb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 8 Nov 2022 11:29:23 -0800 Subject: [PATCH 06/12] Add randomized test for syntax map with combined injections --- crates/language/src/syntax_map.rs | 108 +++++++++++++++++++++++++----- 1 file changed, 92 insertions(+), 16 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 0a8919d6a348631137927cfb9d681bdad70e9f1f..711a65c3e9424e59ce1bad597a1bf85ef2bc9560 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -236,12 +236,12 @@ impl SyntaxSnapshot { // Preserve any layers at this depth that precede the first edit. if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) { - let position = ChangeStartPosition { + let target = ChangeStartPosition { depth, position: edit_range.start, }; - if position.cmp(&cursor.start(), text).is_gt() { - let slice = cursor.slice(&position, Bias::Left, text); + if target.cmp(&cursor.start(), text).is_gt() { + let slice = cursor.slice(&target, Bias::Left, text); layers.push_tree(slice, text); } } @@ -261,24 +261,17 @@ impl SyntaxSnapshot { continue; }; - let layer = if let Some(layer) = cursor.item() { - layer - } else { - break; - }; + let Some(layer) = cursor.item() else { break }; let (start_byte, start_point) = layer.range.start.summary::<(usize, Point)>(text); // Ignore edits that end before the start of this layer, and don't consider them // for any subsequent layers at this same depth. loop { - if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) { - if edit_range.end.cmp(&layer.range.start, text).is_le() { - first_edit_ix_for_depth += 1; - } else { - break; - } + let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) else { continue 'outer }; + if edit_range.end.cmp(&layer.range.start, text).is_le() { + first_edit_ix_for_depth += 1; } else { - continue 'outer; + break; } } @@ -1895,7 +1888,7 @@ mod tests { ); } - #[gpui::test(iterations = 100)] + #[gpui::test(iterations = 50)] fn test_random_syntax_map_edits(mut rng: StdRng) { let operations = env::var("OPERATIONS") .map(|i| i.parse().expect("invalid `OPERATIONS` variable")) @@ -1975,6 +1968,89 @@ mod tests { } } + #[gpui::test(iterations = 50)] + fn test_random_syntax_map_edits_with_combined_injections(mut rng: StdRng) { + let operations = env::var("OPERATIONS") + .map(|i| i.parse().expect("invalid `OPERATIONS` variable")) + .unwrap_or(10); + + let text = r#" +
+ <% if one?(:two) %> +

+ <%= yield :five %> +

+ <% elsif Six.seven(8) %> +

+ <%= yield :five %> +

+ <% else %> + Ok + <% end %> +
+ "# + .unindent() + .repeat(2); + + let registry = Arc::new(LanguageRegistry::test()); + let language = Arc::new(erb_lang()); + registry.add(language.clone()); + registry.add(Arc::new(ruby_lang())); + registry.add(Arc::new(html_lang())); + let mut buffer = Buffer::new(0, 0, text); + + let mut syntax_map = SyntaxMap::new(); + syntax_map.set_language_registry(registry.clone()); + syntax_map.reparse(language.clone(), &buffer); + + let mut reference_syntax_map = SyntaxMap::new(); + reference_syntax_map.set_language_registry(registry.clone()); + + log::info!("initial text:\n{}", buffer.text()); + + for _ in 0..operations { + let prev_buffer = buffer.snapshot(); + let prev_syntax_map = syntax_map.snapshot(); + + buffer.randomly_edit(&mut rng, 3); + log::info!("text:\n{}", buffer.text()); + + syntax_map.interpolate(&buffer); + check_interpolation(&prev_syntax_map, &syntax_map, &prev_buffer, &buffer); + + syntax_map.reparse(language.clone(), &buffer); + + reference_syntax_map.clear(); + reference_syntax_map.reparse(language.clone(), &buffer); + } + + for i in 0..operations { + let i = operations - i - 1; + buffer.undo(); + log::info!("undoing operation {}", i); + log::info!("text:\n{}", buffer.text()); + + syntax_map.interpolate(&buffer); + syntax_map.reparse(language.clone(), &buffer); + + reference_syntax_map.clear(); + reference_syntax_map.reparse(language.clone(), &buffer); + assert_eq!( + syntax_map.layers(&buffer).len(), + reference_syntax_map.layers(&buffer).len(), + "wrong number of layers after undoing edit {i}" + ); + } + + let layers = syntax_map.layers(&buffer); + let reference_layers = reference_syntax_map.layers(&buffer); + for (edited_layer, reference_layer) in layers.into_iter().zip(reference_layers.into_iter()) + { + assert_eq!(edited_layer.node.to_sexp(), reference_layer.node.to_sexp()); + assert_eq!(edited_layer.node.range(), reference_layer.node.range()); + } + } + fn check_interpolation( old_syntax_map: &SyntaxSnapshot, new_syntax_map: &SyntaxSnapshot, From 2f5004c238cade89ca29fd5c665770e4eb4076aa Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 8 Nov 2022 11:29:57 -0800 Subject: [PATCH 07/12] Add highlight query for ERB --- crates/zed/src/languages/erb/highlights.scm | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 crates/zed/src/languages/erb/highlights.scm diff --git a/crates/zed/src/languages/erb/highlights.scm b/crates/zed/src/languages/erb/highlights.scm new file mode 100644 index 0000000000000000000000000000000000000000..91b21d081f9c5883ef69e4c38d05dcf6de25dc42 --- /dev/null +++ b/crates/zed/src/languages/erb/highlights.scm @@ -0,0 +1,12 @@ +(comment_directive) @comment + +[ + "<%#" + "<%" + "<%=" + "<%_" + "<%-" + "%>" + "-%>" + "_%>" +] @keyword \ No newline at end of file From 9ad8731897ec8d070d7ca695c0539d26361577b6 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 10 Nov 2022 17:04:40 -0800 Subject: [PATCH 08/12] Fix boundary condition where injection was not found after an edit --- crates/language/src/syntax_map.rs | 33 ++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 711a65c3e9424e59ce1bad597a1bf85ef2bc9560..026e4857c54ef10a48ec814a3d14ddad407874c0 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -984,7 +984,7 @@ fn get_injections( } for query_range in changed_ranges { - query_cursor.set_byte_range(query_range.start.saturating_sub(1)..query_range.end); + query_cursor.set_byte_range(query_range.start.saturating_sub(1)..query_range.end + 1); for mat in query_cursor.matches(&config.query, node, TextProvider(text.as_rope())) { let content_ranges = mat .nodes_for_capture_index(config.content_capture_ix) @@ -1888,6 +1888,37 @@ mod tests { ); } + #[gpui::test] + fn test_combined_injections_edit_edges_of_ranges() { + let (buffer, syntax_map) = test_edit_sequence( + "ERB", + &[ + " + <%= one @two %> + <%= three @four %> + ", + " + <%= one @two %ˇ + <%= three @four %> + ", + " + <%= one @two %«>» + <%= three @four %> + ", + ], + ); + + assert_capture_ranges( + &syntax_map, + &buffer, + &["tag", "ivar"], + " + <%= one «@two» %> + <%= three «@four» %> + ", + ); + } + #[gpui::test(iterations = 50)] fn test_random_syntax_map_edits(mut rng: StdRng) { let operations = env::var("OPERATIONS") From 3612c46d6d7748c06dc025b8a398104e96035284 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 11 Nov 2022 16:36:04 -0800 Subject: [PATCH 09/12] Bump tree-sitter for included range bugfix --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bc127b24c40cb5a5f589edba88d7273b7c7288f1..17ac0a2283d9cacd6c87ece51829b041a42b4462 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6383,7 +6383,7 @@ dependencies = [ [[package]] name = "tree-sitter" version = "0.20.9" -source = "git+https://github.com/tree-sitter/tree-sitter?rev=da6e24de1751aef6a944adfcefb192b751c56f76#da6e24de1751aef6a944adfcefb192b751c56f76" +source = "git+https://github.com/tree-sitter/tree-sitter?rev=d07f864815ecb1e0f1f0bab17fec80438eb4c455#d07f864815ecb1e0f1f0bab17fec80438eb4c455" dependencies = [ "cc", "regex", diff --git a/Cargo.toml b/Cargo.toml index 8ac180fcc12156f7149305651ce5ba5e6a48f246..ac8bf018f910d6bde525990ef0df912d41a65708 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,7 +65,7 @@ serde_json = { version = "1.0", features = ["preserve_order", "raw_value"] } rand = { version = "0.8" } [patch.crates-io] -tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "da6e24de1751aef6a944adfcefb192b751c56f76" } +tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "d07f864815ecb1e0f1f0bab17fec80438eb4c455" } async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" } # TODO - Remove when a version is released with this PR: https://github.com/servo/core-foundation-rs/pull/457 From ee66adbb492b06d4ce7acf72e2fbfc280c978288 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 11 Nov 2022 16:43:57 -0800 Subject: [PATCH 10/12] SyntaxMap - Don't ignore deletions at the boundaries of layers --- crates/language/src/syntax_map.rs | 32 +++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 026e4857c54ef10a48ec814a3d14ddad407874c0..d843f5e85b03dc742b013ec4ecf31d24f61bae94 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -453,10 +453,9 @@ impl SyntaxSnapshot { Some(old_layer.tree.clone()), ); changed_ranges = join_ranges( - edits - .iter() - .map(|e| e.new.clone()) - .filter(|range| range.start < step_end_byte && range.end > step_start_byte), + edits.iter().map(|e| e.new.clone()).filter(|range| { + range.start <= step_end_byte && range.end >= step_start_byte + }), old_layer .tree .changed_ranges(&tree) @@ -1919,6 +1918,31 @@ mod tests { ); } + #[gpui::test] + fn test_combined_injections_splitting_some_injections() { + let (_buffer, _syntax_map) = test_edit_sequence( + "ERB", + &[ + r#" + <%A if b(:c) %> + d + <% end %> + eee + <% f %> + "#, + r#" + <%« AAAAAAA %> + hhhhhhh + <%=» if b(:c) %> + d + <% end %> + eee + <% f %> + "#, + ], + ); + } + #[gpui::test(iterations = 50)] fn test_random_syntax_map_edits(mut rng: StdRng) { let operations = env::var("OPERATIONS") From fb356313375c10eccf175d141395310bb2167ad7 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 14 Nov 2022 16:56:09 -0800 Subject: [PATCH 11/12] Bump tree-sitter after merging included-ranges PR --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 17ac0a2283d9cacd6c87ece51829b041a42b4462..158791ac97f7efe9faefcc931b185c1a313c1519 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6383,7 +6383,7 @@ dependencies = [ [[package]] name = "tree-sitter" version = "0.20.9" -source = "git+https://github.com/tree-sitter/tree-sitter?rev=d07f864815ecb1e0f1f0bab17fec80438eb4c455#d07f864815ecb1e0f1f0bab17fec80438eb4c455" +source = "git+https://github.com/tree-sitter/tree-sitter?rev=36b5b6c89e55ad1a502f8b3234bb3e12ec83a5da#36b5b6c89e55ad1a502f8b3234bb3e12ec83a5da" dependencies = [ "cc", "regex", diff --git a/Cargo.toml b/Cargo.toml index ac8bf018f910d6bde525990ef0df912d41a65708..8e9814c4481c0472033d8818776c5edba946cf6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,7 +65,7 @@ serde_json = { version = "1.0", features = ["preserve_order", "raw_value"] } rand = { version = "0.8" } [patch.crates-io] -tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "d07f864815ecb1e0f1f0bab17fec80438eb4c455" } +tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "36b5b6c89e55ad1a502f8b3234bb3e12ec83a5da" } async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" } # TODO - Remove when a version is released with this PR: https://github.com/servo/core-foundation-rs/pull/457 From b222e8eb5a97ce3023e2bf0ecf90dcce373c9678 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 14 Nov 2022 16:56:21 -0800 Subject: [PATCH 12/12] Use a longer example text in random combined injections test --- crates/language/src/syntax_map.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index d843f5e85b03dc742b013ec4ecf31d24f61bae94..65d01e949317bf64ab7ee65258df3c0c848e5602 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -2045,7 +2045,7 @@ mod tests {
"# .unindent() - .repeat(2); + .repeat(8); let registry = Arc::new(LanguageRegistry::test()); let language = Arc::new(erb_lang());