1use crate::{Grammar, InjectionConfig, Language, LanguageRegistry};
2use collections::HashMap;
3use lazy_static::lazy_static;
4use parking_lot::Mutex;
5use std::{
6 borrow::Cow,
7 cell::RefCell,
8 cmp::{Ordering, Reverse},
9 collections::BinaryHeap,
10 ops::{Deref, DerefMut, Range},
11 sync::Arc,
12};
13use sum_tree::{Bias, SeekTarget, SumTree};
14use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint};
15use tree_sitter::{
16 Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree,
17};
18
19thread_local! {
20 static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
21}
22
23lazy_static! {
24 static ref QUERY_CURSORS: Mutex<Vec<QueryCursor>> = Default::default();
25}
26
27#[derive(Default)]
28pub struct SyntaxMap {
29 parsed_version: clock::Global,
30 interpolated_version: clock::Global,
31 snapshot: SyntaxSnapshot,
32 language_registry: Option<Arc<LanguageRegistry>>,
33}
34
35#[derive(Clone, Default)]
36pub struct SyntaxSnapshot {
37 layers: SumTree<SyntaxLayer>,
38}
39
40#[derive(Default)]
41pub struct SyntaxMapCaptures<'a> {
42 layers: Vec<SyntaxMapCapturesLayer<'a>>,
43 active_layer_count: usize,
44 grammars: Vec<&'a Grammar>,
45}
46
47#[derive(Default)]
48pub struct SyntaxMapMatches<'a> {
49 layers: Vec<SyntaxMapMatchesLayer<'a>>,
50 active_layer_count: usize,
51 grammars: Vec<&'a Grammar>,
52}
53
54#[derive(Debug)]
55pub struct SyntaxMapCapture<'a> {
56 pub depth: usize,
57 pub node: Node<'a>,
58 pub index: u32,
59 pub grammar_index: usize,
60}
61
62#[derive(Debug)]
63pub struct SyntaxMapMatch<'a> {
64 pub depth: usize,
65 pub pattern_index: usize,
66 pub captures: &'a [QueryCapture<'a>],
67 pub grammar_index: usize,
68}
69
70struct SyntaxMapCapturesLayer<'a> {
71 depth: usize,
72 captures: QueryCaptures<'a, 'a, TextProvider<'a>>,
73 next_capture: Option<QueryCapture<'a>>,
74 grammar_index: usize,
75 _query_cursor: QueryCursorHandle,
76}
77
78struct SyntaxMapMatchesLayer<'a> {
79 depth: usize,
80 next_pattern_index: usize,
81 next_captures: Vec<QueryCapture<'a>>,
82 has_next: bool,
83 matches: QueryMatches<'a, 'a, TextProvider<'a>>,
84 grammar_index: usize,
85 _query_cursor: QueryCursorHandle,
86}
87
88#[derive(Clone)]
89struct SyntaxLayer {
90 depth: usize,
91 range: Range<Anchor>,
92 tree: tree_sitter::Tree,
93 language: Arc<Language>,
94}
95
96#[derive(Debug)]
97pub struct SyntaxLayerInfo<'a> {
98 pub depth: usize,
99 pub node: Node<'a>,
100 pub language: &'a Arc<Language>,
101}
102
103#[derive(Debug, Clone)]
104struct SyntaxLayerSummary {
105 min_depth: usize,
106 max_depth: usize,
107 range: Range<Anchor>,
108 last_layer_range: Range<Anchor>,
109 last_layer_language: Option<usize>,
110}
111
112#[derive(Clone, Debug)]
113struct SyntaxLayerPosition {
114 depth: usize,
115 range: Range<Anchor>,
116 language: Option<usize>,
117}
118
119#[derive(Clone, Debug)]
120struct ChangeStartPosition {
121 depth: usize,
122 position: Anchor,
123}
124
125#[derive(Clone, Debug)]
126struct SyntaxLayerPositionBeforeChange {
127 position: SyntaxLayerPosition,
128 change: ChangeStartPosition,
129}
130
131struct ParseStep {
132 depth: usize,
133 language: Arc<Language>,
134 range: Range<Anchor>,
135 included_ranges: Vec<tree_sitter::Range>,
136 mode: ParseMode,
137}
138
139enum ParseMode {
140 Single,
141 Combined {
142 parent_layer_range: Range<usize>,
143 parent_layer_changed_ranges: Vec<Range<usize>>,
144 },
145}
146
147#[derive(Debug, PartialEq, Eq)]
148struct ChangedRegion {
149 depth: usize,
150 range: Range<Anchor>,
151}
152
153#[derive(Default)]
154struct ChangeRegionSet(Vec<ChangedRegion>);
155
156struct TextProvider<'a>(&'a Rope);
157
158struct ByteChunks<'a>(text::Chunks<'a>);
159
160struct QueryCursorHandle(Option<QueryCursor>);
161
162impl SyntaxMap {
163 pub fn new() -> Self {
164 Self::default()
165 }
166
167 pub fn set_language_registry(&mut self, registry: Arc<LanguageRegistry>) {
168 self.language_registry = Some(registry);
169 }
170
171 pub fn snapshot(&self) -> SyntaxSnapshot {
172 self.snapshot.clone()
173 }
174
175 pub fn language_registry(&self) -> Option<Arc<LanguageRegistry>> {
176 self.language_registry.clone()
177 }
178
179 pub fn parsed_version(&self) -> clock::Global {
180 self.parsed_version.clone()
181 }
182
183 pub fn interpolate(&mut self, text: &BufferSnapshot) {
184 self.snapshot.interpolate(&self.interpolated_version, text);
185 self.interpolated_version = text.version.clone();
186 }
187
188 #[cfg(test)]
189 pub fn reparse(&mut self, language: Arc<Language>, text: &BufferSnapshot) {
190 self.snapshot.reparse(
191 &self.parsed_version,
192 text,
193 self.language_registry.clone(),
194 language,
195 );
196 self.parsed_version = text.version.clone();
197 self.interpolated_version = text.version.clone();
198 }
199
200 pub fn did_parse(&mut self, snapshot: SyntaxSnapshot, version: clock::Global) {
201 self.interpolated_version = version.clone();
202 self.parsed_version = version;
203 self.snapshot = snapshot;
204 }
205
206 pub fn clear(&mut self) {
207 self.snapshot = SyntaxSnapshot::default();
208 }
209}
210
211impl SyntaxSnapshot {
212 pub fn is_empty(&self) -> bool {
213 self.layers.is_empty()
214 }
215
216 pub fn interpolate(&mut self, from_version: &clock::Global, text: &BufferSnapshot) {
217 let edits = text
218 .anchored_edits_since::<(usize, Point)>(&from_version)
219 .collect::<Vec<_>>();
220 if edits.is_empty() {
221 return;
222 }
223
224 let mut layers = SumTree::new();
225 let mut first_edit_ix_for_depth = 0;
226 let mut prev_depth = 0;
227 let mut cursor = self.layers.cursor::<SyntaxLayerSummary>();
228 cursor.next(text);
229
230 'outer: loop {
231 let depth = cursor.end(text).max_depth;
232 if depth > prev_depth {
233 first_edit_ix_for_depth = 0;
234 prev_depth = depth;
235 }
236
237 // Preserve any layers at this depth that precede the first edit.
238 if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) {
239 let position = ChangeStartPosition {
240 depth,
241 position: edit_range.start,
242 };
243 if position.cmp(&cursor.start(), text).is_gt() {
244 let slice = cursor.slice(&position, Bias::Left, text);
245 layers.push_tree(slice, text);
246 }
247 }
248 // If this layer follows all of the edits, then preserve it and any
249 // subsequent layers at this same depth.
250 else if cursor.item().is_some() {
251 let slice = cursor.slice(
252 &SyntaxLayerPosition {
253 depth: depth + 1,
254 range: Anchor::MIN..Anchor::MAX,
255 language: None,
256 },
257 Bias::Left,
258 text,
259 );
260 layers.push_tree(slice, text);
261 continue;
262 };
263
264 let layer = if let Some(layer) = cursor.item() {
265 layer
266 } else {
267 break;
268 };
269 let (start_byte, start_point) = layer.range.start.summary::<(usize, Point)>(text);
270
271 // Ignore edits that end before the start of this layer, and don't consider them
272 // for any subsequent layers at this same depth.
273 loop {
274 if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) {
275 if edit_range.end.cmp(&layer.range.start, text).is_le() {
276 first_edit_ix_for_depth += 1;
277 } else {
278 break;
279 }
280 } else {
281 continue 'outer;
282 }
283 }
284
285 let mut layer = layer.clone();
286 for (edit, edit_range) in &edits[first_edit_ix_for_depth..] {
287 // Ignore any edits that follow this layer.
288 if edit_range.start.cmp(&layer.range.end, text).is_ge() {
289 break;
290 }
291
292 // Apply any edits that intersect this layer to the layer's syntax tree.
293 let tree_edit = if edit_range.start.cmp(&layer.range.start, text).is_ge() {
294 tree_sitter::InputEdit {
295 start_byte: edit.new.start.0 - start_byte,
296 old_end_byte: edit.new.start.0 - start_byte
297 + (edit.old.end.0 - edit.old.start.0),
298 new_end_byte: edit.new.end.0 - start_byte,
299 start_position: (edit.new.start.1 - start_point).to_ts_point(),
300 old_end_position: (edit.new.start.1 - start_point
301 + (edit.old.end.1 - edit.old.start.1))
302 .to_ts_point(),
303 new_end_position: (edit.new.end.1 - start_point).to_ts_point(),
304 }
305 } else {
306 let node = layer.tree.root_node();
307 tree_sitter::InputEdit {
308 start_byte: 0,
309 old_end_byte: node.end_byte(),
310 new_end_byte: 0,
311 start_position: Default::default(),
312 old_end_position: node.end_position(),
313 new_end_position: Default::default(),
314 }
315 };
316
317 layer.tree.edit(&tree_edit);
318 }
319
320 debug_assert!(
321 layer.tree.root_node().end_byte() <= text.len(),
322 "tree's size {}, is larger than text size {}",
323 layer.tree.root_node().end_byte(),
324 text.len(),
325 );
326
327 layers.push(layer, text);
328 cursor.next(text);
329 }
330
331 layers.push_tree(cursor.suffix(&text), &text);
332 drop(cursor);
333 self.layers = layers;
334 }
335
336 pub fn reparse(
337 &mut self,
338 from_version: &clock::Global,
339 text: &BufferSnapshot,
340 registry: Option<Arc<LanguageRegistry>>,
341 root_language: Arc<Language>,
342 ) {
343 let edits = text.edits_since::<usize>(from_version).collect::<Vec<_>>();
344 let max_depth = self.layers.summary().max_depth;
345 let mut cursor = self.layers.cursor::<SyntaxLayerSummary>();
346 cursor.next(&text);
347 let mut layers = SumTree::new();
348
349 let mut changed_regions = ChangeRegionSet::default();
350 let mut queue = BinaryHeap::new();
351 let mut combined_injection_ranges = HashMap::default();
352 queue.push(ParseStep {
353 depth: 0,
354 language: root_language.clone(),
355 included_ranges: vec![tree_sitter::Range {
356 start_byte: 0,
357 end_byte: text.len(),
358 start_point: Point::zero().to_ts_point(),
359 end_point: text.max_point().to_ts_point(),
360 }],
361 range: Anchor::MIN..Anchor::MAX,
362 mode: ParseMode::Single,
363 });
364
365 loop {
366 let step = queue.pop();
367 let position = if let Some(step) = &step {
368 SyntaxLayerPosition {
369 depth: step.depth,
370 range: step.range.clone(),
371 language: step.language.id(),
372 }
373 } else {
374 SyntaxLayerPosition {
375 depth: max_depth + 1,
376 range: Anchor::MAX..Anchor::MAX,
377 language: None,
378 }
379 };
380
381 let mut done = cursor.item().is_none();
382 while !done && position.cmp(&cursor.end(text), &text).is_gt() {
383 done = true;
384
385 let bounded_position = SyntaxLayerPositionBeforeChange {
386 position: position.clone(),
387 change: changed_regions.start_position(),
388 };
389 if bounded_position.cmp(&cursor.start(), &text).is_gt() {
390 let slice = cursor.slice(&bounded_position, Bias::Left, text);
391 if !slice.is_empty() {
392 layers.push_tree(slice, &text);
393 if changed_regions.prune(cursor.end(text), text) {
394 done = false;
395 }
396 }
397 }
398
399 while position.cmp(&cursor.end(text), text).is_gt() {
400 let Some(layer) = cursor.item() else { break };
401
402 if changed_regions.intersects(&layer, text) {
403 changed_regions.insert(
404 ChangedRegion {
405 depth: layer.depth + 1,
406 range: layer.range.clone(),
407 },
408 text,
409 );
410 } else {
411 layers.push(layer.clone(), text);
412 }
413
414 cursor.next(text);
415 if changed_regions.prune(cursor.end(text), text) {
416 done = false;
417 }
418 }
419 }
420
421 let Some(step) = step else { break };
422 let (step_start_byte, step_start_point) =
423 step.range.start.summary::<(usize, Point)>(text);
424 let step_end_byte = step.range.end.to_offset(text);
425 let Some(grammar) = step.language.grammar.as_deref() else { continue };
426
427 let mut old_layer = cursor.item();
428 if let Some(layer) = old_layer {
429 if layer.range.to_offset(text) == (step_start_byte..step_end_byte)
430 && layer.language.id() == step.language.id()
431 {
432 cursor.next(&text);
433 } else {
434 old_layer = None;
435 }
436 }
437
438 let tree;
439 let changed_ranges;
440 let mut included_ranges = step.included_ranges;
441 if let Some(old_layer) = old_layer {
442 if let ParseMode::Combined {
443 parent_layer_changed_ranges,
444 ..
445 } = step.mode
446 {
447 included_ranges = splice_included_ranges(
448 old_layer.tree.included_ranges(),
449 &parent_layer_changed_ranges,
450 &included_ranges,
451 );
452 }
453
454 tree = parse_text(
455 grammar,
456 text.as_rope(),
457 step_start_byte,
458 step_start_point,
459 included_ranges,
460 Some(old_layer.tree.clone()),
461 );
462 changed_ranges = join_ranges(
463 edits
464 .iter()
465 .map(|e| e.new.clone())
466 .filter(|range| range.start < step_end_byte && range.end > step_start_byte),
467 old_layer
468 .tree
469 .changed_ranges(&tree)
470 .map(|r| step_start_byte + r.start_byte..step_start_byte + r.end_byte),
471 );
472 } else {
473 tree = parse_text(
474 grammar,
475 text.as_rope(),
476 step_start_byte,
477 step_start_point,
478 included_ranges,
479 None,
480 );
481 changed_ranges = vec![step_start_byte..step_end_byte];
482 }
483
484 layers.push(
485 SyntaxLayer {
486 depth: step.depth,
487 range: step.range,
488 tree: tree.clone(),
489 language: step.language.clone(),
490 },
491 &text,
492 );
493
494 if let (Some((config, registry)), false) = (
495 grammar.injection_config.as_ref().zip(registry.as_ref()),
496 changed_ranges.is_empty(),
497 ) {
498 for range in &changed_ranges {
499 changed_regions.insert(
500 ChangedRegion {
501 depth: step.depth + 1,
502 range: text.anchor_before(range.start)..text.anchor_after(range.end),
503 },
504 text,
505 );
506 }
507 get_injections(
508 config,
509 text,
510 tree.root_node_with_offset(step_start_byte, step_start_point.to_ts_point()),
511 registry,
512 step.depth + 1,
513 &changed_ranges,
514 &mut combined_injection_ranges,
515 &mut queue,
516 );
517 }
518 }
519
520 drop(cursor);
521 self.layers = layers;
522 }
523
524 pub fn single_tree_captures<'a>(
525 range: Range<usize>,
526 text: &'a Rope,
527 tree: &'a Tree,
528 language: &'a Arc<Language>,
529 query: fn(&Grammar) -> Option<&Query>,
530 ) -> SyntaxMapCaptures<'a> {
531 SyntaxMapCaptures::new(
532 range.clone(),
533 text,
534 [SyntaxLayerInfo {
535 language,
536 depth: 0,
537 node: tree.root_node(),
538 }]
539 .into_iter(),
540 query,
541 )
542 }
543
544 pub fn captures<'a>(
545 &'a self,
546 range: Range<usize>,
547 buffer: &'a BufferSnapshot,
548 query: fn(&Grammar) -> Option<&Query>,
549 ) -> SyntaxMapCaptures {
550 SyntaxMapCaptures::new(
551 range.clone(),
552 buffer.as_rope(),
553 self.layers_for_range(range, buffer).into_iter(),
554 query,
555 )
556 }
557
558 pub fn matches<'a>(
559 &'a self,
560 range: Range<usize>,
561 buffer: &'a BufferSnapshot,
562 query: fn(&Grammar) -> Option<&Query>,
563 ) -> SyntaxMapMatches {
564 SyntaxMapMatches::new(
565 range.clone(),
566 buffer.as_rope(),
567 self.layers_for_range(range, buffer).into_iter(),
568 query,
569 )
570 }
571
572 #[cfg(test)]
573 pub fn layers<'a>(&'a self, buffer: &'a BufferSnapshot) -> Vec<SyntaxLayerInfo> {
574 self.layers_for_range(0..buffer.len(), buffer).collect()
575 }
576
577 pub fn layers_for_range<'a, T: ToOffset>(
578 &'a self,
579 range: Range<T>,
580 buffer: &'a BufferSnapshot,
581 ) -> impl 'a + Iterator<Item = SyntaxLayerInfo> {
582 let start = buffer.anchor_before(range.start.to_offset(buffer));
583 let end = buffer.anchor_after(range.end.to_offset(buffer));
584
585 let mut cursor = self.layers.filter::<_, ()>(move |summary| {
586 if summary.max_depth > summary.min_depth {
587 true
588 } else {
589 let is_before_start = summary.range.end.cmp(&start, buffer).is_lt();
590 let is_after_end = summary.range.start.cmp(&end, buffer).is_gt();
591 !is_before_start && !is_after_end
592 }
593 });
594
595 cursor.next(buffer);
596 std::iter::from_fn(move || {
597 if let Some(layer) = cursor.item() {
598 let info = SyntaxLayerInfo {
599 language: &layer.language,
600 depth: layer.depth,
601 node: layer.tree.root_node_with_offset(
602 layer.range.start.to_offset(buffer),
603 layer.range.start.to_point(buffer).to_ts_point(),
604 ),
605 };
606 cursor.next(buffer);
607 Some(info)
608 } else {
609 None
610 }
611 })
612 }
613}
614
615impl<'a> SyntaxMapCaptures<'a> {
616 fn new(
617 range: Range<usize>,
618 text: &'a Rope,
619 layers: impl Iterator<Item = SyntaxLayerInfo<'a>>,
620 query: fn(&Grammar) -> Option<&Query>,
621 ) -> Self {
622 let mut result = Self {
623 layers: Vec::new(),
624 grammars: Vec::new(),
625 active_layer_count: 0,
626 };
627 for SyntaxLayerInfo {
628 language,
629 depth,
630 node,
631 } in layers
632 {
633 let grammar = match &language.grammar {
634 Some(grammer) => grammer,
635 None => continue,
636 };
637 let query = match query(&grammar) {
638 Some(query) => query,
639 None => continue,
640 };
641
642 let mut query_cursor = QueryCursorHandle::new();
643
644 // TODO - add a Tree-sitter API to remove the need for this.
645 let cursor = unsafe {
646 std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut())
647 };
648
649 cursor.set_byte_range(range.clone());
650 let captures = cursor.captures(query, node, TextProvider(text));
651 let grammar_index = result
652 .grammars
653 .iter()
654 .position(|g| g.id == grammar.id())
655 .unwrap_or_else(|| {
656 result.grammars.push(grammar);
657 result.grammars.len() - 1
658 });
659 let mut layer = SyntaxMapCapturesLayer {
660 depth,
661 grammar_index,
662 next_capture: None,
663 captures,
664 _query_cursor: query_cursor,
665 };
666
667 layer.advance();
668 if layer.next_capture.is_some() {
669 let key = layer.sort_key();
670 let ix = match result.layers[..result.active_layer_count]
671 .binary_search_by_key(&key, |layer| layer.sort_key())
672 {
673 Ok(ix) | Err(ix) => ix,
674 };
675 result.layers.insert(ix, layer);
676 result.active_layer_count += 1;
677 } else {
678 result.layers.push(layer);
679 }
680 }
681
682 result
683 }
684
685 pub fn grammars(&self) -> &[&'a Grammar] {
686 &self.grammars
687 }
688
689 pub fn peek(&self) -> Option<SyntaxMapCapture<'a>> {
690 let layer = self.layers[..self.active_layer_count].first()?;
691 let capture = layer.next_capture?;
692 Some(SyntaxMapCapture {
693 depth: layer.depth,
694 grammar_index: layer.grammar_index,
695 index: capture.index,
696 node: capture.node,
697 })
698 }
699
700 pub fn advance(&mut self) -> bool {
701 let layer = if let Some(layer) = self.layers[..self.active_layer_count].first_mut() {
702 layer
703 } else {
704 return false;
705 };
706
707 layer.advance();
708 if layer.next_capture.is_some() {
709 let key = layer.sort_key();
710 let i = 1 + self.layers[1..self.active_layer_count]
711 .iter()
712 .position(|later_layer| key < later_layer.sort_key())
713 .unwrap_or(self.active_layer_count - 1);
714 self.layers[0..i].rotate_left(1);
715 } else {
716 self.layers[0..self.active_layer_count].rotate_left(1);
717 self.active_layer_count -= 1;
718 }
719
720 true
721 }
722
723 pub fn set_byte_range(&mut self, range: Range<usize>) {
724 for layer in &mut self.layers {
725 layer.captures.set_byte_range(range.clone());
726 if let Some(capture) = &layer.next_capture {
727 if capture.node.end_byte() > range.start {
728 continue;
729 }
730 }
731 layer.advance();
732 }
733 self.layers.sort_unstable_by_key(|layer| layer.sort_key());
734 self.active_layer_count = self
735 .layers
736 .iter()
737 .position(|layer| layer.next_capture.is_none())
738 .unwrap_or(self.layers.len());
739 }
740}
741
742impl<'a> SyntaxMapMatches<'a> {
743 fn new(
744 range: Range<usize>,
745 text: &'a Rope,
746 layers: impl Iterator<Item = SyntaxLayerInfo<'a>>,
747 query: fn(&Grammar) -> Option<&Query>,
748 ) -> Self {
749 let mut result = Self::default();
750 for SyntaxLayerInfo {
751 language,
752 depth,
753 node,
754 } in layers
755 {
756 let grammar = match &language.grammar {
757 Some(grammer) => grammer,
758 None => continue,
759 };
760 let query = match query(&grammar) {
761 Some(query) => query,
762 None => continue,
763 };
764
765 let mut query_cursor = QueryCursorHandle::new();
766
767 // TODO - add a Tree-sitter API to remove the need for this.
768 let cursor = unsafe {
769 std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut())
770 };
771
772 cursor.set_byte_range(range.clone());
773 let matches = cursor.matches(query, node, TextProvider(text));
774 let grammar_index = result
775 .grammars
776 .iter()
777 .position(|g| g.id == grammar.id())
778 .unwrap_or_else(|| {
779 result.grammars.push(grammar);
780 result.grammars.len() - 1
781 });
782 let mut layer = SyntaxMapMatchesLayer {
783 depth,
784 grammar_index,
785 matches,
786 next_pattern_index: 0,
787 next_captures: Vec::new(),
788 has_next: false,
789 _query_cursor: query_cursor,
790 };
791
792 layer.advance();
793 if layer.has_next {
794 let key = layer.sort_key();
795 let ix = match result.layers[..result.active_layer_count]
796 .binary_search_by_key(&key, |layer| layer.sort_key())
797 {
798 Ok(ix) | Err(ix) => ix,
799 };
800 result.layers.insert(ix, layer);
801 result.active_layer_count += 1;
802 } else {
803 result.layers.push(layer);
804 }
805 }
806 result
807 }
808
809 pub fn grammars(&self) -> &[&'a Grammar] {
810 &self.grammars
811 }
812
813 pub fn peek(&self) -> Option<SyntaxMapMatch> {
814 let layer = self.layers.first()?;
815 if !layer.has_next {
816 return None;
817 }
818 Some(SyntaxMapMatch {
819 depth: layer.depth,
820 grammar_index: layer.grammar_index,
821 pattern_index: layer.next_pattern_index,
822 captures: &layer.next_captures,
823 })
824 }
825
826 pub fn advance(&mut self) -> bool {
827 let layer = if let Some(layer) = self.layers.first_mut() {
828 layer
829 } else {
830 return false;
831 };
832
833 layer.advance();
834 if layer.has_next {
835 let key = layer.sort_key();
836 let i = 1 + self.layers[1..self.active_layer_count]
837 .iter()
838 .position(|later_layer| key < later_layer.sort_key())
839 .unwrap_or(self.active_layer_count - 1);
840 self.layers[0..i].rotate_left(1);
841 } else {
842 self.layers[0..self.active_layer_count].rotate_left(1);
843 self.active_layer_count -= 1;
844 }
845
846 true
847 }
848}
849
850impl<'a> SyntaxMapCapturesLayer<'a> {
851 fn advance(&mut self) {
852 self.next_capture = self.captures.next().map(|(mat, ix)| mat.captures[ix]);
853 }
854
855 fn sort_key(&self) -> (usize, Reverse<usize>, usize) {
856 if let Some(capture) = &self.next_capture {
857 let range = capture.node.byte_range();
858 (range.start, Reverse(range.end), self.depth)
859 } else {
860 (usize::MAX, Reverse(0), usize::MAX)
861 }
862 }
863}
864
865impl<'a> SyntaxMapMatchesLayer<'a> {
866 fn advance(&mut self) {
867 if let Some(mat) = self.matches.next() {
868 self.next_captures.clear();
869 self.next_captures.extend_from_slice(&mat.captures);
870 self.next_pattern_index = mat.pattern_index;
871 self.has_next = true;
872 } else {
873 self.has_next = false;
874 }
875 }
876
877 fn sort_key(&self) -> (usize, Reverse<usize>, usize) {
878 if self.has_next {
879 let captures = &self.next_captures;
880 if let Some((first, last)) = captures.first().zip(captures.last()) {
881 return (
882 first.node.start_byte(),
883 Reverse(last.node.end_byte()),
884 self.depth,
885 );
886 }
887 }
888 (usize::MAX, Reverse(0), usize::MAX)
889 }
890}
891
892impl<'a> Iterator for SyntaxMapCaptures<'a> {
893 type Item = SyntaxMapCapture<'a>;
894
895 fn next(&mut self) -> Option<Self::Item> {
896 let result = self.peek();
897 self.advance();
898 result
899 }
900}
901
902fn join_ranges(
903 a: impl Iterator<Item = Range<usize>>,
904 b: impl Iterator<Item = Range<usize>>,
905) -> Vec<Range<usize>> {
906 let mut result = Vec::<Range<usize>>::new();
907 let mut a = a.peekable();
908 let mut b = b.peekable();
909 loop {
910 let range = match (a.peek(), b.peek()) {
911 (Some(range_a), Some(range_b)) => {
912 if range_a.start < range_b.start {
913 a.next().unwrap()
914 } else {
915 b.next().unwrap()
916 }
917 }
918 (None, Some(_)) => b.next().unwrap(),
919 (Some(_), None) => a.next().unwrap(),
920 (None, None) => break,
921 };
922
923 if let Some(last) = result.last_mut() {
924 if range.start <= last.end {
925 last.end = last.end.max(range.end);
926 continue;
927 }
928 }
929 result.push(range);
930 }
931 result
932}
933
934fn parse_text(
935 grammar: &Grammar,
936 text: &Rope,
937 start_byte: usize,
938 start_point: Point,
939 mut ranges: Vec<tree_sitter::Range>,
940 old_tree: Option<Tree>,
941) -> Tree {
942 for range in &mut ranges {
943 range.start_byte -= start_byte;
944 range.end_byte -= start_byte;
945 range.start_point = (Point::from_ts_point(range.start_point) - start_point).to_ts_point();
946 range.end_point = (Point::from_ts_point(range.end_point) - start_point).to_ts_point();
947 }
948
949 PARSER.with(|parser| {
950 let mut parser = parser.borrow_mut();
951 let mut chunks = text.chunks_in_range(start_byte..text.len());
952 parser
953 .set_included_ranges(&ranges)
954 .expect("overlapping ranges");
955 parser
956 .set_language(grammar.ts_language)
957 .expect("incompatible grammar");
958 parser
959 .parse_with(
960 &mut move |offset, _| {
961 chunks.seek(start_byte + offset);
962 chunks.next().unwrap_or("").as_bytes()
963 },
964 old_tree.as_ref(),
965 )
966 .expect("invalid language")
967 })
968}
969
970fn get_injections(
971 config: &InjectionConfig,
972 text: &BufferSnapshot,
973 node: Node,
974 language_registry: &LanguageRegistry,
975 depth: usize,
976 changed_ranges: &[Range<usize>],
977 combined_injection_ranges: &mut HashMap<Arc<Language>, Vec<tree_sitter::Range>>,
978 queue: &mut BinaryHeap<ParseStep>,
979) -> bool {
980 let mut result = false;
981 let mut query_cursor = QueryCursorHandle::new();
982 let mut prev_match = None;
983
984 combined_injection_ranges.clear();
985 for pattern in &config.patterns {
986 if let (Some(language_name), true) = (pattern.language.as_ref(), pattern.combined) {
987 if let Some(language) = language_registry.get_language(language_name) {
988 combined_injection_ranges.insert(language, Vec::new());
989 }
990 }
991 }
992
993 for query_range in changed_ranges {
994 query_cursor.set_byte_range(query_range.start.saturating_sub(1)..query_range.end);
995 for mat in query_cursor.matches(&config.query, node, TextProvider(text.as_rope())) {
996 let content_ranges = mat
997 .nodes_for_capture_index(config.content_capture_ix)
998 .map(|node| node.range())
999 .collect::<Vec<_>>();
1000 if content_ranges.is_empty() {
1001 continue;
1002 }
1003
1004 // Avoid duplicate matches if two changed ranges intersect the same injection.
1005 let content_range =
1006 content_ranges.first().unwrap().start_byte..content_ranges.last().unwrap().end_byte;
1007 if let Some((last_pattern_ix, last_range)) = &prev_match {
1008 if mat.pattern_index == *last_pattern_ix && content_range == *last_range {
1009 continue;
1010 }
1011 }
1012 prev_match = Some((mat.pattern_index, content_range.clone()));
1013
1014 let combined = config.patterns[mat.pattern_index].combined;
1015 let language_name = config.patterns[mat.pattern_index]
1016 .language
1017 .as_ref()
1018 .map(|s| Cow::Borrowed(s.as_ref()))
1019 .or_else(|| {
1020 let ix = config.language_capture_ix?;
1021 let node = mat.nodes_for_capture_index(ix).next()?;
1022 Some(Cow::Owned(text.text_for_range(node.byte_range()).collect()))
1023 });
1024
1025 if let Some(language_name) = language_name {
1026 if let Some(language) = language_registry.get_language(language_name.as_ref()) {
1027 result = true;
1028 let range = text.anchor_before(content_range.start)
1029 ..text.anchor_after(content_range.end);
1030 if combined {
1031 combined_injection_ranges
1032 .get_mut(&language.clone())
1033 .unwrap()
1034 .extend(content_ranges);
1035 } else {
1036 queue.push(ParseStep {
1037 depth,
1038 language,
1039 included_ranges: content_ranges,
1040 range,
1041 mode: ParseMode::Single,
1042 });
1043 }
1044 }
1045 }
1046 }
1047 }
1048
1049 for (language, mut included_ranges) in combined_injection_ranges.drain() {
1050 included_ranges.sort_unstable();
1051 let range = text.anchor_before(node.start_byte())..text.anchor_after(node.end_byte());
1052 queue.push(ParseStep {
1053 depth,
1054 language,
1055 range,
1056 included_ranges,
1057 mode: ParseMode::Combined {
1058 parent_layer_range: node.start_byte()..node.end_byte(),
1059 parent_layer_changed_ranges: changed_ranges.to_vec(),
1060 },
1061 })
1062 }
1063
1064 result
1065}
1066
1067fn splice_included_ranges(
1068 mut ranges: Vec<tree_sitter::Range>,
1069 changed_ranges: &[Range<usize>],
1070 new_ranges: &[tree_sitter::Range],
1071) -> Vec<tree_sitter::Range> {
1072 let mut changed_ranges = changed_ranges.into_iter().peekable();
1073 let mut new_ranges = new_ranges.into_iter().peekable();
1074 let mut ranges_ix = 0;
1075 loop {
1076 let new_range = new_ranges.peek();
1077 let mut changed_range = changed_ranges.peek();
1078
1079 // Remove ranges that have changed before inserting any new ranges
1080 // into those ranges.
1081 if let Some((changed, new)) = changed_range.zip(new_range) {
1082 if new.end_byte < changed.start {
1083 changed_range = None;
1084 }
1085 }
1086
1087 if let Some(changed) = changed_range {
1088 let mut start_ix = ranges_ix
1089 + match ranges[ranges_ix..].binary_search_by_key(&changed.start, |r| r.end_byte) {
1090 Ok(ix) | Err(ix) => ix,
1091 };
1092 let mut end_ix = ranges_ix
1093 + match ranges[ranges_ix..].binary_search_by_key(&changed.end, |r| r.start_byte) {
1094 Ok(ix) => ix + 1,
1095 Err(ix) => ix,
1096 };
1097
1098 // If there are empty ranges, then there may be multiple ranges with the same
1099 // start or end. Expand the splice to include any adjacent ranges that touch
1100 // the changed range.
1101 while start_ix > 0 {
1102 if ranges[start_ix - 1].end_byte == changed.start {
1103 start_ix -= 1;
1104 } else {
1105 break;
1106 }
1107 }
1108 while let Some(range) = ranges.get(end_ix) {
1109 if range.start_byte == changed.end {
1110 end_ix += 1;
1111 } else {
1112 break;
1113 }
1114 }
1115
1116 if end_ix > start_ix {
1117 ranges.splice(start_ix..end_ix, []);
1118 }
1119 changed_ranges.next();
1120 ranges_ix = start_ix;
1121 } else if let Some(new_range) = new_range {
1122 let ix = ranges_ix
1123 + match ranges[ranges_ix..]
1124 .binary_search_by_key(&new_range.start_byte, |r| r.start_byte)
1125 {
1126 Ok(ix) | Err(ix) => ix,
1127 };
1128 ranges.insert(ix, **new_range);
1129 new_ranges.next();
1130 ranges_ix = ix + 1;
1131 } else {
1132 break;
1133 }
1134 }
1135 ranges
1136}
1137
1138impl std::ops::Deref for SyntaxMap {
1139 type Target = SyntaxSnapshot;
1140
1141 fn deref(&self) -> &Self::Target {
1142 &self.snapshot
1143 }
1144}
1145
1146impl PartialEq for ParseStep {
1147 fn eq(&self, _: &Self) -> bool {
1148 false
1149 }
1150}
1151
1152impl Eq for ParseStep {}
1153
1154impl PartialOrd for ParseStep {
1155 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1156 Some(self.cmp(&other))
1157 }
1158}
1159
1160impl Ord for ParseStep {
1161 fn cmp(&self, other: &Self) -> Ordering {
1162 let range_a = self.range();
1163 let range_b = other.range();
1164 Ord::cmp(&other.depth, &self.depth)
1165 .then_with(|| Ord::cmp(&range_b.start, &range_a.start))
1166 .then_with(|| Ord::cmp(&range_a.end, &range_b.end))
1167 .then_with(|| self.language.id().cmp(&other.language.id()))
1168 }
1169}
1170
1171impl ParseStep {
1172 fn range(&self) -> Range<usize> {
1173 if let ParseMode::Combined {
1174 parent_layer_range, ..
1175 } = &self.mode
1176 {
1177 parent_layer_range.clone()
1178 } else {
1179 let start = self.included_ranges.first().map_or(0, |r| r.start_byte);
1180 let end = self.included_ranges.last().map_or(0, |r| r.end_byte);
1181 start..end
1182 }
1183 }
1184}
1185
1186impl ChangedRegion {
1187 fn cmp(&self, other: &Self, buffer: &BufferSnapshot) -> Ordering {
1188 let range_a = &self.range;
1189 let range_b = &other.range;
1190 Ord::cmp(&self.depth, &other.depth)
1191 .then_with(|| range_a.start.cmp(&range_b.start, buffer))
1192 .then_with(|| range_b.end.cmp(&range_a.end, buffer))
1193 }
1194}
1195
1196impl ChangeRegionSet {
1197 fn start_position(&self) -> ChangeStartPosition {
1198 self.0.first().map_or(
1199 ChangeStartPosition {
1200 depth: usize::MAX,
1201 position: Anchor::MAX,
1202 },
1203 |region| ChangeStartPosition {
1204 depth: region.depth,
1205 position: region.range.start,
1206 },
1207 )
1208 }
1209
1210 fn intersects(&self, layer: &SyntaxLayer, text: &BufferSnapshot) -> bool {
1211 for region in &self.0 {
1212 if region.depth < layer.depth {
1213 continue;
1214 }
1215 if region.depth > layer.depth {
1216 break;
1217 }
1218 if region.range.end.cmp(&layer.range.start, text).is_le() {
1219 continue;
1220 }
1221 if region.range.start.cmp(&layer.range.end, text).is_ge() {
1222 break;
1223 }
1224 return true;
1225 }
1226 false
1227 }
1228
1229 fn insert(&mut self, region: ChangedRegion, text: &BufferSnapshot) {
1230 if let Err(ix) = self.0.binary_search_by(|probe| probe.cmp(®ion, text)) {
1231 self.0.insert(ix, region);
1232 }
1233 }
1234
1235 fn prune(&mut self, summary: SyntaxLayerSummary, text: &BufferSnapshot) -> bool {
1236 let prev_len = self.0.len();
1237 self.0.retain(|region| {
1238 region.depth > summary.max_depth
1239 || (region.depth == summary.max_depth
1240 && region
1241 .range
1242 .end
1243 .cmp(&summary.last_layer_range.start, text)
1244 .is_gt())
1245 });
1246 self.0.len() < prev_len
1247 }
1248}
1249
1250impl Default for SyntaxLayerSummary {
1251 fn default() -> Self {
1252 Self {
1253 max_depth: 0,
1254 min_depth: 0,
1255 range: Anchor::MAX..Anchor::MIN,
1256 last_layer_range: Anchor::MIN..Anchor::MAX,
1257 last_layer_language: None,
1258 }
1259 }
1260}
1261
1262impl sum_tree::Summary for SyntaxLayerSummary {
1263 type Context = BufferSnapshot;
1264
1265 fn add_summary(&mut self, other: &Self, buffer: &Self::Context) {
1266 if other.max_depth > self.max_depth {
1267 self.max_depth = other.max_depth;
1268 self.range = other.range.clone();
1269 } else {
1270 if other.range.start.cmp(&self.range.start, buffer).is_lt() {
1271 self.range.start = other.range.start;
1272 }
1273 if other.range.end.cmp(&self.range.end, buffer).is_gt() {
1274 self.range.end = other.range.end;
1275 }
1276 }
1277 self.last_layer_range = other.last_layer_range.clone();
1278 self.last_layer_language = other.last_layer_language;
1279 }
1280}
1281
1282impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for SyntaxLayerPosition {
1283 fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering {
1284 Ord::cmp(&self.depth, &cursor_location.max_depth)
1285 .then_with(|| {
1286 self.range
1287 .start
1288 .cmp(&cursor_location.last_layer_range.start, buffer)
1289 })
1290 .then_with(|| {
1291 cursor_location
1292 .last_layer_range
1293 .end
1294 .cmp(&self.range.end, buffer)
1295 })
1296 .then_with(|| self.language.cmp(&cursor_location.last_layer_language))
1297 }
1298}
1299
1300impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for ChangeStartPosition {
1301 fn cmp(&self, cursor_location: &SyntaxLayerSummary, text: &BufferSnapshot) -> Ordering {
1302 Ord::cmp(&self.depth, &cursor_location.max_depth)
1303 .then_with(|| self.position.cmp(&cursor_location.range.end, text))
1304 }
1305}
1306
1307impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary>
1308 for SyntaxLayerPositionBeforeChange
1309{
1310 fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering {
1311 if self.change.cmp(cursor_location, buffer).is_le() {
1312 return Ordering::Less;
1313 } else {
1314 self.position.cmp(cursor_location, buffer)
1315 }
1316 }
1317}
1318
1319impl sum_tree::Item for SyntaxLayer {
1320 type Summary = SyntaxLayerSummary;
1321
1322 fn summary(&self) -> Self::Summary {
1323 SyntaxLayerSummary {
1324 min_depth: self.depth,
1325 max_depth: self.depth,
1326 range: self.range.clone(),
1327 last_layer_range: self.range.clone(),
1328 last_layer_language: self.language.id(),
1329 }
1330 }
1331}
1332
1333impl std::fmt::Debug for SyntaxLayer {
1334 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1335 f.debug_struct("SyntaxLayer")
1336 .field("depth", &self.depth)
1337 .field("range", &self.range)
1338 .field("tree", &self.tree)
1339 .finish()
1340 }
1341}
1342
1343impl<'a> tree_sitter::TextProvider<'a> for TextProvider<'a> {
1344 type I = ByteChunks<'a>;
1345
1346 fn text(&mut self, node: tree_sitter::Node) -> Self::I {
1347 ByteChunks(self.0.chunks_in_range(node.byte_range()))
1348 }
1349}
1350
1351impl<'a> Iterator for ByteChunks<'a> {
1352 type Item = &'a [u8];
1353
1354 fn next(&mut self) -> Option<Self::Item> {
1355 self.0.next().map(str::as_bytes)
1356 }
1357}
1358
1359impl QueryCursorHandle {
1360 pub(crate) fn new() -> Self {
1361 let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_else(QueryCursor::new);
1362 cursor.set_match_limit(64);
1363 QueryCursorHandle(Some(cursor))
1364 }
1365}
1366
1367impl Deref for QueryCursorHandle {
1368 type Target = QueryCursor;
1369
1370 fn deref(&self) -> &Self::Target {
1371 self.0.as_ref().unwrap()
1372 }
1373}
1374
1375impl DerefMut for QueryCursorHandle {
1376 fn deref_mut(&mut self) -> &mut Self::Target {
1377 self.0.as_mut().unwrap()
1378 }
1379}
1380
1381impl Drop for QueryCursorHandle {
1382 fn drop(&mut self) {
1383 let mut cursor = self.0.take().unwrap();
1384 cursor.set_byte_range(0..usize::MAX);
1385 cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point());
1386 QUERY_CURSORS.lock().push(cursor)
1387 }
1388}
1389
1390pub(crate) trait ToTreeSitterPoint {
1391 fn to_ts_point(self) -> tree_sitter::Point;
1392 fn from_ts_point(point: tree_sitter::Point) -> Self;
1393}
1394
1395impl ToTreeSitterPoint for Point {
1396 fn to_ts_point(self) -> tree_sitter::Point {
1397 tree_sitter::Point::new(self.row as usize, self.column as usize)
1398 }
1399
1400 fn from_ts_point(point: tree_sitter::Point) -> Self {
1401 Point::new(point.row as u32, point.column as u32)
1402 }
1403}
1404
1405#[cfg(test)]
1406mod tests {
1407 use super::*;
1408 use crate::LanguageConfig;
1409 use rand::rngs::StdRng;
1410 use std::env;
1411 use text::Buffer;
1412 use unindent::Unindent as _;
1413 use util::test::marked_text_ranges;
1414
1415 #[test]
1416 fn test_splice_included_ranges() {
1417 let ranges = vec![ts_range(20..30), ts_range(50..60), ts_range(80..90)];
1418
1419 let new_ranges = splice_included_ranges(
1420 ranges.clone(),
1421 &[54..56, 58..68],
1422 &[ts_range(50..54), ts_range(59..67)],
1423 );
1424 assert_eq!(
1425 new_ranges,
1426 &[
1427 ts_range(20..30),
1428 ts_range(50..54),
1429 ts_range(59..67),
1430 ts_range(80..90),
1431 ]
1432 );
1433
1434 let new_ranges = splice_included_ranges(ranges.clone(), &[70..71, 91..100], &[]);
1435 assert_eq!(
1436 new_ranges,
1437 &[ts_range(20..30), ts_range(50..60), ts_range(80..90)]
1438 );
1439
1440 let new_ranges =
1441 splice_included_ranges(ranges.clone(), &[], &[ts_range(0..2), ts_range(70..75)]);
1442 assert_eq!(
1443 new_ranges,
1444 &[
1445 ts_range(0..2),
1446 ts_range(20..30),
1447 ts_range(50..60),
1448 ts_range(70..75),
1449 ts_range(80..90)
1450 ]
1451 );
1452
1453 let new_ranges = splice_included_ranges(ranges.clone(), &[30..50], &[ts_range(25..55)]);
1454 assert_eq!(new_ranges, &[ts_range(25..55), ts_range(80..90)]);
1455
1456 fn ts_range(range: Range<usize>) -> tree_sitter::Range {
1457 tree_sitter::Range {
1458 start_byte: range.start,
1459 start_point: tree_sitter::Point {
1460 row: 0,
1461 column: range.start,
1462 },
1463 end_byte: range.end,
1464 end_point: tree_sitter::Point {
1465 row: 0,
1466 column: range.end,
1467 },
1468 }
1469 }
1470 }
1471
1472 #[gpui::test]
1473 fn test_syntax_map_layers_for_range() {
1474 let registry = Arc::new(LanguageRegistry::test());
1475 let language = Arc::new(rust_lang());
1476 registry.add(language.clone());
1477
1478 let mut buffer = Buffer::new(
1479 0,
1480 0,
1481 r#"
1482 fn a() {
1483 assert_eq!(
1484 b(vec![C {}]),
1485 vec![d.e],
1486 );
1487 println!("{}", f(|_| true));
1488 }
1489 "#
1490 .unindent(),
1491 );
1492
1493 let mut syntax_map = SyntaxMap::new();
1494 syntax_map.set_language_registry(registry.clone());
1495 syntax_map.reparse(language.clone(), &buffer);
1496
1497 assert_layers_for_range(
1498 &syntax_map,
1499 &buffer,
1500 Point::new(2, 0)..Point::new(2, 0),
1501 &[
1502 "...(function_item ... (block (expression_statement (macro_invocation...",
1503 "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...",
1504 ],
1505 );
1506 assert_layers_for_range(
1507 &syntax_map,
1508 &buffer,
1509 Point::new(2, 14)..Point::new(2, 16),
1510 &[
1511 "...(function_item ...",
1512 "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...",
1513 "...(array_expression (struct_expression ...",
1514 ],
1515 );
1516 assert_layers_for_range(
1517 &syntax_map,
1518 &buffer,
1519 Point::new(3, 14)..Point::new(3, 16),
1520 &[
1521 "...(function_item ...",
1522 "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...",
1523 "...(array_expression (field_expression ...",
1524 ],
1525 );
1526 assert_layers_for_range(
1527 &syntax_map,
1528 &buffer,
1529 Point::new(5, 12)..Point::new(5, 16),
1530 &[
1531 "...(function_item ...",
1532 "...(call_expression ... (arguments (closure_expression ...",
1533 ],
1534 );
1535
1536 // Replace a vec! macro invocation with a plain slice, removing a syntactic layer.
1537 let macro_name_range = range_for_text(&buffer, "vec!");
1538 buffer.edit([(macro_name_range, "&")]);
1539 syntax_map.interpolate(&buffer);
1540 syntax_map.reparse(language.clone(), &buffer);
1541
1542 assert_layers_for_range(
1543 &syntax_map,
1544 &buffer,
1545 Point::new(2, 14)..Point::new(2, 16),
1546 &[
1547 "...(function_item ...",
1548 "...(tuple_expression (call_expression ... arguments: (arguments (reference_expression value: (array_expression...",
1549 ],
1550 );
1551
1552 // Put the vec! macro back, adding back the syntactic layer.
1553 buffer.undo();
1554 syntax_map.interpolate(&buffer);
1555 syntax_map.reparse(language.clone(), &buffer);
1556
1557 assert_layers_for_range(
1558 &syntax_map,
1559 &buffer,
1560 Point::new(2, 14)..Point::new(2, 16),
1561 &[
1562 "...(function_item ...",
1563 "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...",
1564 "...(array_expression (struct_expression ...",
1565 ],
1566 );
1567 }
1568
1569 #[gpui::test]
1570 fn test_typing_multiple_new_injections() {
1571 let (buffer, syntax_map) = test_edit_sequence(
1572 "Rust",
1573 &[
1574 "fn a() { dbg }",
1575 "fn a() { dbg«!» }",
1576 "fn a() { dbg!«()» }",
1577 "fn a() { dbg!(«b») }",
1578 "fn a() { dbg!(b«.») }",
1579 "fn a() { dbg!(b.«c») }",
1580 "fn a() { dbg!(b.c«()») }",
1581 "fn a() { dbg!(b.c(«vec»)) }",
1582 "fn a() { dbg!(b.c(vec«!»)) }",
1583 "fn a() { dbg!(b.c(vec!«[]»)) }",
1584 "fn a() { dbg!(b.c(vec![«d»])) }",
1585 "fn a() { dbg!(b.c(vec![d«.»])) }",
1586 "fn a() { dbg!(b.c(vec![d.«e»])) }",
1587 ],
1588 );
1589
1590 assert_capture_ranges(
1591 &syntax_map,
1592 &buffer,
1593 &["field"],
1594 "fn a() { dbg!(b.«c»(vec![d.«e»])) }",
1595 );
1596 }
1597
1598 #[gpui::test]
1599 fn test_pasting_new_injection_line_between_others() {
1600 let (buffer, syntax_map) = test_edit_sequence(
1601 "Rust",
1602 &[
1603 "
1604 fn a() {
1605 b!(B {});
1606 c!(C {});
1607 d!(D {});
1608 e!(E {});
1609 f!(F {});
1610 g!(G {});
1611 }
1612 ",
1613 "
1614 fn a() {
1615 b!(B {});
1616 c!(C {});
1617 d!(D {});
1618 « h!(H {});
1619 » e!(E {});
1620 f!(F {});
1621 g!(G {});
1622 }
1623 ",
1624 ],
1625 );
1626
1627 assert_capture_ranges(
1628 &syntax_map,
1629 &buffer,
1630 &["struct"],
1631 "
1632 fn a() {
1633 b!(«B {}»);
1634 c!(«C {}»);
1635 d!(«D {}»);
1636 h!(«H {}»);
1637 e!(«E {}»);
1638 f!(«F {}»);
1639 g!(«G {}»);
1640 }
1641 ",
1642 );
1643 }
1644
1645 #[gpui::test]
1646 fn test_joining_injections_with_child_injections() {
1647 let (buffer, syntax_map) = test_edit_sequence(
1648 "Rust",
1649 &[
1650 "
1651 fn a() {
1652 b!(
1653 c![one.two.three],
1654 d![four.five.six],
1655 );
1656 e!(
1657 f![seven.eight],
1658 );
1659 }
1660 ",
1661 "
1662 fn a() {
1663 b!(
1664 c![one.two.three],
1665 d![four.five.six],
1666 ˇ f![seven.eight],
1667 );
1668 }
1669 ",
1670 ],
1671 );
1672
1673 assert_capture_ranges(
1674 &syntax_map,
1675 &buffer,
1676 &["field"],
1677 "
1678 fn a() {
1679 b!(
1680 c![one.«two».«three»],
1681 d![four.«five».«six»],
1682 f![seven.«eight»],
1683 );
1684 }
1685 ",
1686 );
1687 }
1688
1689 #[gpui::test]
1690 fn test_editing_edges_of_injection() {
1691 test_edit_sequence(
1692 "Rust",
1693 &[
1694 "
1695 fn a() {
1696 b!(c!())
1697 }
1698 ",
1699 "
1700 fn a() {
1701 «d»!(c!())
1702 }
1703 ",
1704 "
1705 fn a() {
1706 «e»d!(c!())
1707 }
1708 ",
1709 "
1710 fn a() {
1711 ed!«[»c!()«]»
1712 }
1713 ",
1714 ],
1715 );
1716 }
1717
1718 #[gpui::test]
1719 fn test_edits_preceding_and_intersecting_injection() {
1720 test_edit_sequence(
1721 "Rust",
1722 &[
1723 //
1724 "const aaaaaaaaaaaa: B = c!(d(e.f));",
1725 "const aˇa: B = c!(d(eˇ));",
1726 ],
1727 );
1728 }
1729
1730 #[gpui::test]
1731 fn test_non_local_changes_create_injections() {
1732 test_edit_sequence(
1733 "Rust",
1734 &[
1735 "
1736 // a! {
1737 static B: C = d;
1738 // }
1739 ",
1740 "
1741 ˇa! {
1742 static B: C = d;
1743 ˇ}
1744 ",
1745 ],
1746 );
1747 }
1748
1749 #[gpui::test]
1750 fn test_creating_many_injections_in_one_edit() {
1751 test_edit_sequence(
1752 "Rust",
1753 &[
1754 "
1755 fn a() {
1756 one(Two::three(3));
1757 four(Five::six(6));
1758 seven(Eight::nine(9));
1759 }
1760 ",
1761 "
1762 fn a() {
1763 one«!»(Two::three(3));
1764 four«!»(Five::six(6));
1765 seven«!»(Eight::nine(9));
1766 }
1767 ",
1768 "
1769 fn a() {
1770 one!(Two::three«!»(3));
1771 four!(Five::six«!»(6));
1772 seven!(Eight::nine«!»(9));
1773 }
1774 ",
1775 ],
1776 );
1777 }
1778
1779 #[gpui::test]
1780 fn test_editing_across_injection_boundary() {
1781 test_edit_sequence(
1782 "Rust",
1783 &[
1784 "
1785 fn one() {
1786 two();
1787 three!(
1788 three.four,
1789 five.six,
1790 );
1791 }
1792 ",
1793 "
1794 fn one() {
1795 two();
1796 th«irty_five![»
1797 three.four,
1798 five.six,
1799 « seven.eight,
1800 ];»
1801 }
1802 ",
1803 ],
1804 );
1805 }
1806
1807 #[gpui::test]
1808 fn test_removing_injection_by_replacing_across_boundary() {
1809 test_edit_sequence(
1810 "Rust",
1811 &[
1812 "
1813 fn one() {
1814 two!(
1815 three.four,
1816 );
1817 }
1818 ",
1819 "
1820 fn one() {
1821 t«en
1822 .eleven(
1823 twelve,
1824 »
1825 three.four,
1826 );
1827 }
1828 ",
1829 ],
1830 );
1831 }
1832
1833 #[gpui::test]
1834 fn test_combined_injections() {
1835 let (buffer, syntax_map) = test_edit_sequence(
1836 "ERB",
1837 &[
1838 "
1839 <body>
1840 <% if @one %>
1841 <div class=one>
1842 <% else %>
1843 <div class=two>
1844 <% end %>
1845 </div>
1846 </body>
1847 ",
1848 "
1849 <body>
1850 <% if @one %>
1851 <div class=one>
1852 ˇ else ˇ
1853 <div class=two>
1854 <% end %>
1855 </div>
1856 </body>
1857 ",
1858 "
1859 <body>
1860 <% if @one «;» end %>
1861 </div>
1862 </body>
1863 ",
1864 ],
1865 );
1866
1867 assert_capture_ranges(
1868 &syntax_map,
1869 &buffer,
1870 &["tag", "ivar"],
1871 "
1872 <«body»>
1873 <% if «@one» ; end %>
1874 </«div»>
1875 </«body»>
1876 ",
1877 );
1878 }
1879
1880 #[gpui::test]
1881 fn test_combined_injections_empty_ranges() {
1882 test_edit_sequence(
1883 "ERB",
1884 &[
1885 "
1886 <% if @one %>
1887 <% else %>
1888 <% end %>
1889 ",
1890 "
1891 <% if @one %>
1892 ˇ<% end %>
1893 ",
1894 ],
1895 );
1896 }
1897
1898 #[gpui::test(iterations = 100)]
1899 fn test_random_syntax_map_edits(mut rng: StdRng) {
1900 let operations = env::var("OPERATIONS")
1901 .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1902 .unwrap_or(10);
1903
1904 let text = r#"
1905 fn test_something() {
1906 let vec = vec![5, 1, 3, 8];
1907 assert_eq!(
1908 vec
1909 .into_iter()
1910 .map(|i| i * 2)
1911 .collect::<Vec<usize>>(),
1912 vec![
1913 5 * 2, 1 * 2, 3 * 2, 8 * 2
1914 ],
1915 );
1916 }
1917 "#
1918 .unindent()
1919 .repeat(2);
1920
1921 let registry = Arc::new(LanguageRegistry::test());
1922 let language = Arc::new(rust_lang());
1923 registry.add(language.clone());
1924 let mut buffer = Buffer::new(0, 0, text);
1925
1926 let mut syntax_map = SyntaxMap::new();
1927 syntax_map.set_language_registry(registry.clone());
1928 syntax_map.reparse(language.clone(), &buffer);
1929
1930 let mut reference_syntax_map = SyntaxMap::new();
1931 reference_syntax_map.set_language_registry(registry.clone());
1932
1933 log::info!("initial text:\n{}", buffer.text());
1934
1935 for _ in 0..operations {
1936 let prev_buffer = buffer.snapshot();
1937 let prev_syntax_map = syntax_map.snapshot();
1938
1939 buffer.randomly_edit(&mut rng, 3);
1940 log::info!("text:\n{}", buffer.text());
1941
1942 syntax_map.interpolate(&buffer);
1943 check_interpolation(&prev_syntax_map, &syntax_map, &prev_buffer, &buffer);
1944
1945 syntax_map.reparse(language.clone(), &buffer);
1946
1947 reference_syntax_map.clear();
1948 reference_syntax_map.reparse(language.clone(), &buffer);
1949 }
1950
1951 for i in 0..operations {
1952 let i = operations - i - 1;
1953 buffer.undo();
1954 log::info!("undoing operation {}", i);
1955 log::info!("text:\n{}", buffer.text());
1956
1957 syntax_map.interpolate(&buffer);
1958 syntax_map.reparse(language.clone(), &buffer);
1959
1960 reference_syntax_map.clear();
1961 reference_syntax_map.reparse(language.clone(), &buffer);
1962 assert_eq!(
1963 syntax_map.layers(&buffer).len(),
1964 reference_syntax_map.layers(&buffer).len(),
1965 "wrong number of layers after undoing edit {i}"
1966 );
1967 }
1968
1969 let layers = syntax_map.layers(&buffer);
1970 let reference_layers = reference_syntax_map.layers(&buffer);
1971 for (edited_layer, reference_layer) in layers.into_iter().zip(reference_layers.into_iter())
1972 {
1973 assert_eq!(edited_layer.node.to_sexp(), reference_layer.node.to_sexp());
1974 assert_eq!(edited_layer.node.range(), reference_layer.node.range());
1975 }
1976 }
1977
1978 fn check_interpolation(
1979 old_syntax_map: &SyntaxSnapshot,
1980 new_syntax_map: &SyntaxSnapshot,
1981 old_buffer: &BufferSnapshot,
1982 new_buffer: &BufferSnapshot,
1983 ) {
1984 let edits = new_buffer
1985 .edits_since::<usize>(&old_buffer.version())
1986 .collect::<Vec<_>>();
1987
1988 for (old_layer, new_layer) in old_syntax_map
1989 .layers
1990 .iter()
1991 .zip(new_syntax_map.layers.iter())
1992 {
1993 assert_eq!(old_layer.range, new_layer.range);
1994 let old_start_byte = old_layer.range.start.to_offset(old_buffer);
1995 let new_start_byte = new_layer.range.start.to_offset(new_buffer);
1996 let old_start_point = old_layer.range.start.to_point(old_buffer).to_ts_point();
1997 let new_start_point = new_layer.range.start.to_point(new_buffer).to_ts_point();
1998 let old_node = old_layer
1999 .tree
2000 .root_node_with_offset(old_start_byte, old_start_point);
2001 let new_node = new_layer
2002 .tree
2003 .root_node_with_offset(new_start_byte, new_start_point);
2004 check_node_edits(
2005 old_layer.depth,
2006 &old_layer.range,
2007 old_node,
2008 new_node,
2009 old_buffer,
2010 new_buffer,
2011 &edits,
2012 );
2013 }
2014
2015 fn check_node_edits(
2016 depth: usize,
2017 range: &Range<Anchor>,
2018 old_node: Node,
2019 new_node: Node,
2020 old_buffer: &BufferSnapshot,
2021 new_buffer: &BufferSnapshot,
2022 edits: &[text::Edit<usize>],
2023 ) {
2024 assert_eq!(old_node.kind(), new_node.kind());
2025
2026 let old_range = old_node.byte_range();
2027 let new_range = new_node.byte_range();
2028
2029 let is_edited = edits
2030 .iter()
2031 .any(|edit| edit.new.start < new_range.end && edit.new.end > new_range.start);
2032 if is_edited {
2033 assert!(
2034 new_node.has_changes(),
2035 concat!(
2036 "failed to mark node as edited.\n",
2037 "layer depth: {}, old layer range: {:?}, new layer range: {:?},\n",
2038 "node kind: {}, old node range: {:?}, new node range: {:?}",
2039 ),
2040 depth,
2041 range.to_offset(old_buffer),
2042 range.to_offset(new_buffer),
2043 new_node.kind(),
2044 old_range,
2045 new_range,
2046 );
2047 }
2048
2049 if !new_node.has_changes() {
2050 assert_eq!(
2051 old_buffer
2052 .text_for_range(old_range.clone())
2053 .collect::<String>(),
2054 new_buffer
2055 .text_for_range(new_range.clone())
2056 .collect::<String>(),
2057 concat!(
2058 "mismatched text for node\n",
2059 "layer depth: {}, old layer range: {:?}, new layer range: {:?},\n",
2060 "node kind: {}, old node range:{:?}, new node range:{:?}",
2061 ),
2062 depth,
2063 range.to_offset(old_buffer),
2064 range.to_offset(new_buffer),
2065 new_node.kind(),
2066 old_range,
2067 new_range,
2068 );
2069 }
2070
2071 for i in 0..new_node.child_count() {
2072 check_node_edits(
2073 depth,
2074 range,
2075 old_node.child(i).unwrap(),
2076 new_node.child(i).unwrap(),
2077 old_buffer,
2078 new_buffer,
2079 edits,
2080 )
2081 }
2082 }
2083 }
2084
2085 fn test_edit_sequence(language_name: &str, steps: &[&str]) -> (Buffer, SyntaxMap) {
2086 let registry = Arc::new(LanguageRegistry::test());
2087 registry.add(Arc::new(rust_lang()));
2088 registry.add(Arc::new(ruby_lang()));
2089 registry.add(Arc::new(html_lang()));
2090 registry.add(Arc::new(erb_lang()));
2091 let language = registry.get_language(language_name).unwrap();
2092 let mut buffer = Buffer::new(0, 0, Default::default());
2093
2094 let mut mutated_syntax_map = SyntaxMap::new();
2095 mutated_syntax_map.set_language_registry(registry.clone());
2096 mutated_syntax_map.reparse(language.clone(), &buffer);
2097
2098 for (i, marked_string) in steps.into_iter().enumerate() {
2099 edit_buffer(&mut buffer, &marked_string.unindent());
2100
2101 // Reparse the syntax map
2102 mutated_syntax_map.interpolate(&buffer);
2103 mutated_syntax_map.reparse(language.clone(), &buffer);
2104
2105 // Create a second syntax map from scratch
2106 let mut reference_syntax_map = SyntaxMap::new();
2107 reference_syntax_map.set_language_registry(registry.clone());
2108 reference_syntax_map.reparse(language.clone(), &buffer);
2109
2110 // Compare the mutated syntax map to the new syntax map
2111 let mutated_layers = mutated_syntax_map.layers(&buffer);
2112 let reference_layers = reference_syntax_map.layers(&buffer);
2113 assert_eq!(
2114 mutated_layers.len(),
2115 reference_layers.len(),
2116 "wrong number of layers at step {i}"
2117 );
2118 for (edited_layer, reference_layer) in
2119 mutated_layers.into_iter().zip(reference_layers.into_iter())
2120 {
2121 assert_eq!(
2122 edited_layer.node.to_sexp(),
2123 reference_layer.node.to_sexp(),
2124 "different layer at step {i}"
2125 );
2126 assert_eq!(
2127 edited_layer.node.range(),
2128 reference_layer.node.range(),
2129 "different layer at step {i}"
2130 );
2131 }
2132 }
2133
2134 (buffer, mutated_syntax_map)
2135 }
2136
2137 fn html_lang() -> Language {
2138 Language::new(
2139 LanguageConfig {
2140 name: "HTML".into(),
2141 path_suffixes: vec!["html".to_string()],
2142 ..Default::default()
2143 },
2144 Some(tree_sitter_html::language()),
2145 )
2146 .with_highlights_query(
2147 r#"
2148 (tag_name) @tag
2149 (erroneous_end_tag_name) @tag
2150 (attribute_name) @property
2151 "#,
2152 )
2153 .unwrap()
2154 }
2155
2156 fn ruby_lang() -> Language {
2157 Language::new(
2158 LanguageConfig {
2159 name: "Ruby".into(),
2160 path_suffixes: vec!["rb".to_string()],
2161 ..Default::default()
2162 },
2163 Some(tree_sitter_ruby::language()),
2164 )
2165 .with_highlights_query(
2166 r#"
2167 ["if" "do" "else" "end"] @keyword
2168 (instance_variable) @ivar
2169 "#,
2170 )
2171 .unwrap()
2172 }
2173
2174 fn erb_lang() -> Language {
2175 Language::new(
2176 LanguageConfig {
2177 name: "ERB".into(),
2178 path_suffixes: vec!["erb".to_string()],
2179 ..Default::default()
2180 },
2181 Some(tree_sitter_embedded_template::language()),
2182 )
2183 .with_highlights_query(
2184 r#"
2185 ["<%" "%>"] @keyword
2186 "#,
2187 )
2188 .unwrap()
2189 .with_injection_query(
2190 r#"
2191 ((code) @content
2192 (#set! "language" "ruby")
2193 (#set! "combined"))
2194
2195 ((content) @content
2196 (#set! "language" "html")
2197 (#set! "combined"))
2198 "#,
2199 )
2200 .unwrap()
2201 }
2202
2203 fn rust_lang() -> Language {
2204 Language::new(
2205 LanguageConfig {
2206 name: "Rust".into(),
2207 path_suffixes: vec!["rs".to_string()],
2208 ..Default::default()
2209 },
2210 Some(tree_sitter_rust::language()),
2211 )
2212 .with_highlights_query(
2213 r#"
2214 (field_identifier) @field
2215 (struct_expression) @struct
2216 "#,
2217 )
2218 .unwrap()
2219 .with_injection_query(
2220 r#"
2221 (macro_invocation
2222 (token_tree) @content
2223 (#set! "language" "rust"))
2224 "#,
2225 )
2226 .unwrap()
2227 }
2228
2229 fn range_for_text(buffer: &Buffer, text: &str) -> Range<usize> {
2230 let start = buffer.as_rope().to_string().find(text).unwrap();
2231 start..start + text.len()
2232 }
2233
2234 fn assert_layers_for_range(
2235 syntax_map: &SyntaxMap,
2236 buffer: &BufferSnapshot,
2237 range: Range<Point>,
2238 expected_layers: &[&str],
2239 ) {
2240 let layers = syntax_map
2241 .layers_for_range(range, &buffer)
2242 .collect::<Vec<_>>();
2243 assert_eq!(
2244 layers.len(),
2245 expected_layers.len(),
2246 "wrong number of layers"
2247 );
2248 for (i, (SyntaxLayerInfo { node, .. }, expected_s_exp)) in
2249 layers.iter().zip(expected_layers.iter()).enumerate()
2250 {
2251 let actual_s_exp = node.to_sexp();
2252 assert!(
2253 string_contains_sequence(
2254 &actual_s_exp,
2255 &expected_s_exp.split("...").collect::<Vec<_>>()
2256 ),
2257 "layer {i}:\n\nexpected: {expected_s_exp}\nactual: {actual_s_exp}",
2258 );
2259 }
2260 }
2261
2262 fn assert_capture_ranges(
2263 syntax_map: &SyntaxMap,
2264 buffer: &BufferSnapshot,
2265 highlight_query_capture_names: &[&str],
2266 marked_string: &str,
2267 ) {
2268 let mut actual_ranges = Vec::<Range<usize>>::new();
2269 let captures = syntax_map.captures(0..buffer.len(), buffer, |grammar| {
2270 grammar.highlights_query.as_ref()
2271 });
2272 let queries = captures
2273 .grammars()
2274 .iter()
2275 .map(|grammar| grammar.highlights_query.as_ref().unwrap())
2276 .collect::<Vec<_>>();
2277 for capture in captures {
2278 let name = &queries[capture.grammar_index].capture_names()[capture.index as usize];
2279 if highlight_query_capture_names.contains(&name.as_str()) {
2280 actual_ranges.push(capture.node.byte_range());
2281 }
2282 }
2283
2284 let (text, expected_ranges) = marked_text_ranges(&marked_string.unindent(), false);
2285 assert_eq!(text, buffer.text());
2286 assert_eq!(actual_ranges, expected_ranges);
2287 }
2288
2289 fn edit_buffer(buffer: &mut Buffer, marked_string: &str) {
2290 let old_text = buffer.text();
2291 let (new_text, mut ranges) = marked_text_ranges(marked_string, false);
2292 if ranges.is_empty() {
2293 ranges.push(0..new_text.len());
2294 }
2295
2296 assert_eq!(
2297 old_text[..ranges[0].start],
2298 new_text[..ranges[0].start],
2299 "invalid edit"
2300 );
2301
2302 let mut delta = 0;
2303 let mut edits = Vec::new();
2304 let mut ranges = ranges.into_iter().peekable();
2305
2306 while let Some(inserted_range) = ranges.next() {
2307 let new_start = inserted_range.start;
2308 let old_start = (new_start as isize - delta) as usize;
2309
2310 let following_text = if let Some(next_range) = ranges.peek() {
2311 &new_text[inserted_range.end..next_range.start]
2312 } else {
2313 &new_text[inserted_range.end..]
2314 };
2315
2316 let inserted_len = inserted_range.len();
2317 let deleted_len = old_text[old_start..]
2318 .find(following_text)
2319 .expect("invalid edit");
2320
2321 let old_range = old_start..old_start + deleted_len;
2322 edits.push((old_range, new_text[inserted_range].to_string()));
2323 delta += inserted_len as isize - deleted_len as isize;
2324 }
2325
2326 assert_eq!(
2327 old_text.len() as isize + delta,
2328 new_text.len() as isize,
2329 "invalid edit"
2330 );
2331
2332 buffer.edit(edits);
2333 }
2334
2335 pub fn string_contains_sequence(text: &str, parts: &[&str]) -> bool {
2336 let mut last_part_end = 0;
2337 for part in parts {
2338 if let Some(start_ix) = text[last_part_end..].find(part) {
2339 last_part_end = start_ix + part.len();
2340 } else {
2341 return false;
2342 }
2343 }
2344 true
2345 }
2346}