1use crate::{Grammar, InjectionConfig, Language, LanguageRegistry};
2use collections::HashMap;
3use lazy_static::lazy_static;
4use parking_lot::Mutex;
5use std::{
6 borrow::Cow,
7 cell::RefCell,
8 cmp::{Ordering, Reverse},
9 collections::BinaryHeap,
10 ops::{Deref, DerefMut, Range},
11 sync::Arc,
12};
13use sum_tree::{Bias, SeekTarget, SumTree};
14use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint};
15use tree_sitter::{
16 Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree,
17};
18
19thread_local! {
20 static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
21}
22
23lazy_static! {
24 static ref QUERY_CURSORS: Mutex<Vec<QueryCursor>> = Default::default();
25}
26
27#[derive(Default)]
28pub struct SyntaxMap {
29 parsed_version: clock::Global,
30 interpolated_version: clock::Global,
31 snapshot: SyntaxSnapshot,
32 language_registry: Option<Arc<LanguageRegistry>>,
33}
34
35#[derive(Clone, Default)]
36pub struct SyntaxSnapshot {
37 layers: SumTree<SyntaxLayer>,
38}
39
40#[derive(Default)]
41pub struct SyntaxMapCaptures<'a> {
42 layers: Vec<SyntaxMapCapturesLayer<'a>>,
43 active_layer_count: usize,
44 grammars: Vec<&'a Grammar>,
45}
46
47#[derive(Default)]
48pub struct SyntaxMapMatches<'a> {
49 layers: Vec<SyntaxMapMatchesLayer<'a>>,
50 active_layer_count: usize,
51 grammars: Vec<&'a Grammar>,
52}
53
54#[derive(Debug)]
55pub struct SyntaxMapCapture<'a> {
56 pub depth: usize,
57 pub node: Node<'a>,
58 pub index: u32,
59 pub grammar_index: usize,
60}
61
62#[derive(Debug)]
63pub struct SyntaxMapMatch<'a> {
64 pub depth: usize,
65 pub pattern_index: usize,
66 pub captures: &'a [QueryCapture<'a>],
67 pub grammar_index: usize,
68}
69
70struct SyntaxMapCapturesLayer<'a> {
71 depth: usize,
72 captures: QueryCaptures<'a, 'a, TextProvider<'a>>,
73 next_capture: Option<QueryCapture<'a>>,
74 grammar_index: usize,
75 _query_cursor: QueryCursorHandle,
76}
77
78struct SyntaxMapMatchesLayer<'a> {
79 depth: usize,
80 next_pattern_index: usize,
81 next_captures: Vec<QueryCapture<'a>>,
82 has_next: bool,
83 matches: QueryMatches<'a, 'a, TextProvider<'a>>,
84 grammar_index: usize,
85 _query_cursor: QueryCursorHandle,
86}
87
88#[derive(Clone)]
89struct SyntaxLayer {
90 depth: usize,
91 range: Range<Anchor>,
92 tree: tree_sitter::Tree,
93 language: Arc<Language>,
94 combined: bool,
95}
96
97#[derive(Debug)]
98pub struct SyntaxLayerInfo<'a> {
99 pub depth: usize,
100 pub node: Node<'a>,
101 pub language: &'a Arc<Language>,
102}
103
104#[derive(Debug, Clone)]
105struct SyntaxLayerSummary {
106 min_depth: usize,
107 max_depth: usize,
108 range: Range<Anchor>,
109 last_layer_range: Range<Anchor>,
110 last_layer_language: Option<usize>,
111}
112
113#[derive(Clone, Debug)]
114struct SyntaxLayerPosition {
115 depth: usize,
116 range: Range<Anchor>,
117 language: Option<usize>,
118}
119
120#[derive(Clone, Debug)]
121struct DepthAndMaxPosition(usize, Anchor);
122
123#[derive(Clone, Debug)]
124struct SyntaxLayerPositionBeforeChange {
125 position: SyntaxLayerPosition,
126 change: DepthAndMaxPosition,
127}
128
129struct ParseStep {
130 depth: usize,
131 language: Arc<Language>,
132 range: Range<Anchor>,
133 included_ranges: Vec<tree_sitter::Range>,
134 mode: ParseMode,
135}
136
137enum ParseMode {
138 Single,
139 Combined {
140 parent_layer_range: Range<usize>,
141 parent_layer_changed_ranges: Vec<Range<usize>>,
142 },
143}
144
145#[derive(Debug, PartialEq, Eq)]
146struct ChangedRegion {
147 depth: usize,
148 range: Range<Anchor>,
149}
150
151#[derive(Default)]
152struct ChangeRegionSet(Vec<ChangedRegion>);
153
154struct TextProvider<'a>(&'a Rope);
155
156struct ByteChunks<'a>(text::Chunks<'a>);
157
158struct QueryCursorHandle(Option<QueryCursor>);
159
160impl SyntaxMap {
161 pub fn new() -> Self {
162 Self::default()
163 }
164
165 pub fn set_language_registry(&mut self, registry: Arc<LanguageRegistry>) {
166 self.language_registry = Some(registry);
167 }
168
169 pub fn snapshot(&self) -> SyntaxSnapshot {
170 self.snapshot.clone()
171 }
172
173 pub fn language_registry(&self) -> Option<Arc<LanguageRegistry>> {
174 self.language_registry.clone()
175 }
176
177 pub fn parsed_version(&self) -> clock::Global {
178 self.parsed_version.clone()
179 }
180
181 pub fn interpolate(&mut self, text: &BufferSnapshot) {
182 self.snapshot.interpolate(&self.interpolated_version, text);
183 self.interpolated_version = text.version.clone();
184 }
185
186 #[cfg(test)]
187 pub fn reparse(&mut self, language: Arc<Language>, text: &BufferSnapshot) {
188 self.snapshot.reparse(
189 &self.parsed_version,
190 text,
191 self.language_registry.clone(),
192 language,
193 );
194 self.parsed_version = text.version.clone();
195 self.interpolated_version = text.version.clone();
196 }
197
198 pub fn did_parse(&mut self, snapshot: SyntaxSnapshot, version: clock::Global) {
199 self.interpolated_version = version.clone();
200 self.parsed_version = version;
201 self.snapshot = snapshot;
202 }
203
204 pub fn clear(&mut self) {
205 self.snapshot = SyntaxSnapshot::default();
206 }
207}
208
209impl SyntaxSnapshot {
210 pub fn is_empty(&self) -> bool {
211 self.layers.is_empty()
212 }
213
214 pub fn interpolate(&mut self, from_version: &clock::Global, text: &BufferSnapshot) {
215 let edits = text
216 .anchored_edits_since::<(usize, Point)>(&from_version)
217 .collect::<Vec<_>>();
218 if edits.is_empty() {
219 return;
220 }
221
222 let mut layers = SumTree::new();
223 let mut first_edit_ix_for_depth = 0;
224 let mut prev_depth = 0;
225 let mut cursor = self.layers.cursor::<SyntaxLayerSummary>();
226 cursor.next(text);
227
228 'outer: loop {
229 let depth = cursor.end(text).max_depth;
230 if depth > prev_depth {
231 first_edit_ix_for_depth = 0;
232 prev_depth = depth;
233 }
234
235 // Preserve any layers at this depth that precede the first edit.
236 if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) {
237 let target = DepthAndMaxPosition(depth, edit_range.start);
238 if target.cmp(&cursor.start(), text).is_gt() {
239 let slice = cursor.slice(&target, Bias::Left, text);
240 layers.push_tree(slice, text);
241 }
242 }
243 // If this layer follows all of the edits, then preserve it and any
244 // subsequent layers at this same depth.
245 else if cursor.item().is_some() {
246 let slice = cursor.slice(
247 &SyntaxLayerPosition {
248 depth: depth + 1,
249 range: Anchor::MIN..Anchor::MAX,
250 language: None,
251 },
252 Bias::Left,
253 text,
254 );
255 layers.push_tree(slice, text);
256 continue;
257 };
258
259 let layer = if let Some(layer) = cursor.item() {
260 layer
261 } else {
262 break;
263 };
264 let (start_byte, start_point) = layer.range.start.summary::<(usize, Point)>(text);
265
266 // Ignore edits that end before the start of this layer, and don't consider them
267 // for any subsequent layers at this same depth.
268 loop {
269 if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) {
270 if edit_range.end.cmp(&layer.range.start, text).is_le() {
271 first_edit_ix_for_depth += 1;
272 } else {
273 break;
274 }
275 } else {
276 continue 'outer;
277 }
278 }
279
280 let mut layer = layer.clone();
281 for (edit, edit_range) in &edits[first_edit_ix_for_depth..] {
282 // Ignore any edits that follow this layer.
283 if edit_range.start.cmp(&layer.range.end, text).is_ge() {
284 break;
285 }
286
287 // Apply any edits that intersect this layer to the layer's syntax tree.
288 let tree_edit = if edit_range.start.cmp(&layer.range.start, text).is_ge() {
289 tree_sitter::InputEdit {
290 start_byte: edit.new.start.0 - start_byte,
291 old_end_byte: edit.new.start.0 - start_byte
292 + (edit.old.end.0 - edit.old.start.0),
293 new_end_byte: edit.new.end.0 - start_byte,
294 start_position: (edit.new.start.1 - start_point).to_ts_point(),
295 old_end_position: (edit.new.start.1 - start_point
296 + (edit.old.end.1 - edit.old.start.1))
297 .to_ts_point(),
298 new_end_position: (edit.new.end.1 - start_point).to_ts_point(),
299 }
300 } else {
301 let node = layer.tree.root_node();
302 tree_sitter::InputEdit {
303 start_byte: 0,
304 old_end_byte: node.end_byte(),
305 new_end_byte: 0,
306 start_position: Default::default(),
307 old_end_position: node.end_position(),
308 new_end_position: Default::default(),
309 }
310 };
311
312 layer.tree.edit(&tree_edit);
313 }
314
315 debug_assert!(
316 layer.tree.root_node().end_byte() <= text.len(),
317 "tree's size {}, is larger than text size {}",
318 layer.tree.root_node().end_byte(),
319 text.len(),
320 );
321
322 layers.push(layer, text);
323 cursor.next(text);
324 }
325
326 layers.push_tree(cursor.suffix(&text), &text);
327 drop(cursor);
328 self.layers = layers;
329 }
330
331 pub fn reparse(
332 &mut self,
333 from_version: &clock::Global,
334 text: &BufferSnapshot,
335 registry: Option<Arc<LanguageRegistry>>,
336 root_language: Arc<Language>,
337 ) {
338 let edits = text.edits_since::<usize>(from_version).collect::<Vec<_>>();
339 let max_depth = self.layers.summary().max_depth;
340 let mut cursor = self.layers.cursor::<SyntaxLayerSummary>();
341 cursor.next(&text);
342 let mut layers = SumTree::new();
343
344 let mut changed_regions = ChangeRegionSet::default();
345 let mut queue = BinaryHeap::new();
346 let mut combined_injection_ranges = HashMap::default();
347 queue.push(ParseStep {
348 depth: 0,
349 language: root_language.clone(),
350 included_ranges: vec![tree_sitter::Range {
351 start_byte: 0,
352 end_byte: text.len(),
353 start_point: Point::zero().to_ts_point(),
354 end_point: text.max_point().to_ts_point(),
355 }],
356 range: Anchor::MIN..Anchor::MAX,
357 mode: ParseMode::Single,
358 });
359
360 loop {
361 let step = queue.pop();
362 let target = if let Some(step) = &step {
363 SyntaxLayerPosition {
364 depth: step.depth,
365 range: step.range.clone(),
366 language: step.language.id(),
367 }
368 } else {
369 SyntaxLayerPosition {
370 depth: max_depth + 1,
371 range: Anchor::MAX..Anchor::MAX,
372 language: None,
373 }
374 };
375
376 let mut done = cursor.item().is_none();
377 while !done && target.cmp(&cursor.end(text), &text).is_gt() {
378 done = true;
379
380 let bounded_target = SyntaxLayerPositionBeforeChange {
381 position: target.clone(),
382 change: changed_regions.start_position(),
383 };
384 if bounded_target.cmp(&cursor.start(), &text).is_gt() {
385 let slice = cursor.slice(&bounded_target, Bias::Left, text);
386 if !slice.is_empty() {
387 layers.push_tree(slice, &text);
388 if changed_regions.prune(cursor.end(text), text) {
389 done = false;
390 }
391 }
392 }
393
394 while target.cmp(&cursor.end(text), text).is_gt() {
395 let Some(layer) = cursor.item() else { break };
396
397 if changed_regions.intersects(&layer, text) && !layer.combined {
398 changed_regions.insert(
399 ChangedRegion {
400 depth: layer.depth + 1,
401 range: layer.range.clone(),
402 },
403 text,
404 );
405 } else {
406 layers.push(layer.clone(), text);
407 }
408
409 cursor.next(text);
410 if changed_regions.prune(cursor.end(text), text) {
411 done = false;
412 }
413 }
414 }
415
416 let Some(step) = step else { break };
417 let (step_start_byte, step_start_point) =
418 step.range.start.summary::<(usize, Point)>(text);
419 let step_end_byte = step.range.end.to_offset(text);
420 let Some(grammar) = step.language.grammar.as_deref() else { continue };
421
422 let mut old_layer = cursor.item();
423 if let Some(layer) = old_layer {
424 if layer.range.to_offset(text) == (step_start_byte..step_end_byte)
425 && layer.language.id() == step.language.id()
426 {
427 cursor.next(&text);
428 } else {
429 old_layer = None;
430 }
431 }
432
433 let combined = matches!(step.mode, ParseMode::Combined { .. });
434 let mut included_ranges = step.included_ranges;
435
436 let tree;
437 let changed_ranges;
438 if let Some(old_layer) = old_layer {
439 if let ParseMode::Combined {
440 parent_layer_changed_ranges,
441 ..
442 } = step.mode
443 {
444 included_ranges = splice_included_ranges(
445 old_layer.tree.included_ranges(),
446 &parent_layer_changed_ranges,
447 &included_ranges,
448 );
449 }
450
451 tree = parse_text(
452 grammar,
453 text.as_rope(),
454 step_start_byte,
455 step_start_point,
456 included_ranges,
457 Some(old_layer.tree.clone()),
458 );
459 changed_ranges = join_ranges(
460 edits
461 .iter()
462 .map(|e| e.new.clone())
463 .filter(|range| range.start < step_end_byte && range.end > step_start_byte),
464 old_layer
465 .tree
466 .changed_ranges(&tree)
467 .map(|r| step_start_byte + r.start_byte..step_start_byte + r.end_byte),
468 );
469 } else {
470 tree = parse_text(
471 grammar,
472 text.as_rope(),
473 step_start_byte,
474 step_start_point,
475 included_ranges,
476 None,
477 );
478 changed_ranges = vec![step_start_byte..step_end_byte];
479 }
480
481 layers.push(
482 SyntaxLayer {
483 depth: step.depth,
484 range: step.range,
485 tree: tree.clone(),
486 language: step.language.clone(),
487 combined,
488 },
489 &text,
490 );
491
492 if let (Some((config, registry)), false) = (
493 grammar.injection_config.as_ref().zip(registry.as_ref()),
494 changed_ranges.is_empty(),
495 ) {
496 for range in &changed_ranges {
497 changed_regions.insert(
498 ChangedRegion {
499 depth: step.depth + 1,
500 range: text.anchor_before(range.start)..text.anchor_after(range.end),
501 },
502 text,
503 );
504 }
505 get_injections(
506 config,
507 text,
508 tree.root_node_with_offset(step_start_byte, step_start_point.to_ts_point()),
509 registry,
510 step.depth + 1,
511 &changed_ranges,
512 &mut combined_injection_ranges,
513 &mut queue,
514 );
515 }
516 }
517
518 drop(cursor);
519 self.layers = layers;
520 }
521
522 pub fn single_tree_captures<'a>(
523 range: Range<usize>,
524 text: &'a Rope,
525 tree: &'a Tree,
526 language: &'a Arc<Language>,
527 query: fn(&Grammar) -> Option<&Query>,
528 ) -> SyntaxMapCaptures<'a> {
529 SyntaxMapCaptures::new(
530 range.clone(),
531 text,
532 [SyntaxLayerInfo {
533 language,
534 depth: 0,
535 node: tree.root_node(),
536 }]
537 .into_iter(),
538 query,
539 )
540 }
541
542 pub fn captures<'a>(
543 &'a self,
544 range: Range<usize>,
545 buffer: &'a BufferSnapshot,
546 query: fn(&Grammar) -> Option<&Query>,
547 ) -> SyntaxMapCaptures {
548 SyntaxMapCaptures::new(
549 range.clone(),
550 buffer.as_rope(),
551 self.layers_for_range(range, buffer).into_iter(),
552 query,
553 )
554 }
555
556 pub fn matches<'a>(
557 &'a self,
558 range: Range<usize>,
559 buffer: &'a BufferSnapshot,
560 query: fn(&Grammar) -> Option<&Query>,
561 ) -> SyntaxMapMatches {
562 SyntaxMapMatches::new(
563 range.clone(),
564 buffer.as_rope(),
565 self.layers_for_range(range, buffer).into_iter(),
566 query,
567 )
568 }
569
570 #[cfg(test)]
571 pub fn layers<'a>(&'a self, buffer: &'a BufferSnapshot) -> Vec<SyntaxLayerInfo> {
572 self.layers_for_range(0..buffer.len(), buffer).collect()
573 }
574
575 pub fn layers_for_range<'a, T: ToOffset>(
576 &'a self,
577 range: Range<T>,
578 buffer: &'a BufferSnapshot,
579 ) -> impl 'a + Iterator<Item = SyntaxLayerInfo> {
580 let start = buffer.anchor_before(range.start.to_offset(buffer));
581 let end = buffer.anchor_after(range.end.to_offset(buffer));
582
583 let mut cursor = self.layers.filter::<_, ()>(move |summary| {
584 if summary.max_depth > summary.min_depth {
585 true
586 } else {
587 let is_before_start = summary.range.end.cmp(&start, buffer).is_lt();
588 let is_after_end = summary.range.start.cmp(&end, buffer).is_gt();
589 !is_before_start && !is_after_end
590 }
591 });
592
593 cursor.next(buffer);
594 std::iter::from_fn(move || {
595 if let Some(layer) = cursor.item() {
596 let info = SyntaxLayerInfo {
597 language: &layer.language,
598 depth: layer.depth,
599 node: layer.tree.root_node_with_offset(
600 layer.range.start.to_offset(buffer),
601 layer.range.start.to_point(buffer).to_ts_point(),
602 ),
603 };
604 cursor.next(buffer);
605 Some(info)
606 } else {
607 None
608 }
609 })
610 }
611}
612
613impl<'a> SyntaxMapCaptures<'a> {
614 fn new(
615 range: Range<usize>,
616 text: &'a Rope,
617 layers: impl Iterator<Item = SyntaxLayerInfo<'a>>,
618 query: fn(&Grammar) -> Option<&Query>,
619 ) -> Self {
620 let mut result = Self {
621 layers: Vec::new(),
622 grammars: Vec::new(),
623 active_layer_count: 0,
624 };
625 for SyntaxLayerInfo {
626 language,
627 depth,
628 node,
629 } in layers
630 {
631 let grammar = match &language.grammar {
632 Some(grammer) => grammer,
633 None => continue,
634 };
635 let query = match query(&grammar) {
636 Some(query) => query,
637 None => continue,
638 };
639
640 let mut query_cursor = QueryCursorHandle::new();
641
642 // TODO - add a Tree-sitter API to remove the need for this.
643 let cursor = unsafe {
644 std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut())
645 };
646
647 cursor.set_byte_range(range.clone());
648 let captures = cursor.captures(query, node, TextProvider(text));
649 let grammar_index = result
650 .grammars
651 .iter()
652 .position(|g| g.id == grammar.id())
653 .unwrap_or_else(|| {
654 result.grammars.push(grammar);
655 result.grammars.len() - 1
656 });
657 let mut layer = SyntaxMapCapturesLayer {
658 depth,
659 grammar_index,
660 next_capture: None,
661 captures,
662 _query_cursor: query_cursor,
663 };
664
665 layer.advance();
666 if layer.next_capture.is_some() {
667 let key = layer.sort_key();
668 let ix = match result.layers[..result.active_layer_count]
669 .binary_search_by_key(&key, |layer| layer.sort_key())
670 {
671 Ok(ix) | Err(ix) => ix,
672 };
673 result.layers.insert(ix, layer);
674 result.active_layer_count += 1;
675 } else {
676 result.layers.push(layer);
677 }
678 }
679
680 result
681 }
682
683 pub fn grammars(&self) -> &[&'a Grammar] {
684 &self.grammars
685 }
686
687 pub fn peek(&self) -> Option<SyntaxMapCapture<'a>> {
688 let layer = self.layers[..self.active_layer_count].first()?;
689 let capture = layer.next_capture?;
690 Some(SyntaxMapCapture {
691 depth: layer.depth,
692 grammar_index: layer.grammar_index,
693 index: capture.index,
694 node: capture.node,
695 })
696 }
697
698 pub fn advance(&mut self) -> bool {
699 let layer = if let Some(layer) = self.layers[..self.active_layer_count].first_mut() {
700 layer
701 } else {
702 return false;
703 };
704
705 layer.advance();
706 if layer.next_capture.is_some() {
707 let key = layer.sort_key();
708 let i = 1 + self.layers[1..self.active_layer_count]
709 .iter()
710 .position(|later_layer| key < later_layer.sort_key())
711 .unwrap_or(self.active_layer_count - 1);
712 self.layers[0..i].rotate_left(1);
713 } else {
714 self.layers[0..self.active_layer_count].rotate_left(1);
715 self.active_layer_count -= 1;
716 }
717
718 true
719 }
720
721 pub fn set_byte_range(&mut self, range: Range<usize>) {
722 for layer in &mut self.layers {
723 layer.captures.set_byte_range(range.clone());
724 if let Some(capture) = &layer.next_capture {
725 if capture.node.end_byte() > range.start {
726 continue;
727 }
728 }
729 layer.advance();
730 }
731 self.layers.sort_unstable_by_key(|layer| layer.sort_key());
732 self.active_layer_count = self
733 .layers
734 .iter()
735 .position(|layer| layer.next_capture.is_none())
736 .unwrap_or(self.layers.len());
737 }
738}
739
740impl<'a> SyntaxMapMatches<'a> {
741 fn new(
742 range: Range<usize>,
743 text: &'a Rope,
744 layers: impl Iterator<Item = SyntaxLayerInfo<'a>>,
745 query: fn(&Grammar) -> Option<&Query>,
746 ) -> Self {
747 let mut result = Self::default();
748 for SyntaxLayerInfo {
749 language,
750 depth,
751 node,
752 } in layers
753 {
754 let grammar = match &language.grammar {
755 Some(grammer) => grammer,
756 None => continue,
757 };
758 let query = match query(&grammar) {
759 Some(query) => query,
760 None => continue,
761 };
762
763 let mut query_cursor = QueryCursorHandle::new();
764
765 // TODO - add a Tree-sitter API to remove the need for this.
766 let cursor = unsafe {
767 std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut())
768 };
769
770 cursor.set_byte_range(range.clone());
771 let matches = cursor.matches(query, node, TextProvider(text));
772 let grammar_index = result
773 .grammars
774 .iter()
775 .position(|g| g.id == grammar.id())
776 .unwrap_or_else(|| {
777 result.grammars.push(grammar);
778 result.grammars.len() - 1
779 });
780 let mut layer = SyntaxMapMatchesLayer {
781 depth,
782 grammar_index,
783 matches,
784 next_pattern_index: 0,
785 next_captures: Vec::new(),
786 has_next: false,
787 _query_cursor: query_cursor,
788 };
789
790 layer.advance();
791 if layer.has_next {
792 let key = layer.sort_key();
793 let ix = match result.layers[..result.active_layer_count]
794 .binary_search_by_key(&key, |layer| layer.sort_key())
795 {
796 Ok(ix) | Err(ix) => ix,
797 };
798 result.layers.insert(ix, layer);
799 result.active_layer_count += 1;
800 } else {
801 result.layers.push(layer);
802 }
803 }
804 result
805 }
806
807 pub fn grammars(&self) -> &[&'a Grammar] {
808 &self.grammars
809 }
810
811 pub fn peek(&self) -> Option<SyntaxMapMatch> {
812 let layer = self.layers.first()?;
813 if !layer.has_next {
814 return None;
815 }
816 Some(SyntaxMapMatch {
817 depth: layer.depth,
818 grammar_index: layer.grammar_index,
819 pattern_index: layer.next_pattern_index,
820 captures: &layer.next_captures,
821 })
822 }
823
824 pub fn advance(&mut self) -> bool {
825 let layer = if let Some(layer) = self.layers.first_mut() {
826 layer
827 } else {
828 return false;
829 };
830
831 layer.advance();
832 if layer.has_next {
833 let key = layer.sort_key();
834 let i = 1 + self.layers[1..self.active_layer_count]
835 .iter()
836 .position(|later_layer| key < later_layer.sort_key())
837 .unwrap_or(self.active_layer_count - 1);
838 self.layers[0..i].rotate_left(1);
839 } else {
840 self.layers[0..self.active_layer_count].rotate_left(1);
841 self.active_layer_count -= 1;
842 }
843
844 true
845 }
846}
847
848impl<'a> SyntaxMapCapturesLayer<'a> {
849 fn advance(&mut self) {
850 self.next_capture = self.captures.next().map(|(mat, ix)| mat.captures[ix]);
851 }
852
853 fn sort_key(&self) -> (usize, Reverse<usize>, usize) {
854 if let Some(capture) = &self.next_capture {
855 let range = capture.node.byte_range();
856 (range.start, Reverse(range.end), self.depth)
857 } else {
858 (usize::MAX, Reverse(0), usize::MAX)
859 }
860 }
861}
862
863impl<'a> SyntaxMapMatchesLayer<'a> {
864 fn advance(&mut self) {
865 if let Some(mat) = self.matches.next() {
866 self.next_captures.clear();
867 self.next_captures.extend_from_slice(&mat.captures);
868 self.next_pattern_index = mat.pattern_index;
869 self.has_next = true;
870 } else {
871 self.has_next = false;
872 }
873 }
874
875 fn sort_key(&self) -> (usize, Reverse<usize>, usize) {
876 if self.has_next {
877 let captures = &self.next_captures;
878 if let Some((first, last)) = captures.first().zip(captures.last()) {
879 return (
880 first.node.start_byte(),
881 Reverse(last.node.end_byte()),
882 self.depth,
883 );
884 }
885 }
886 (usize::MAX, Reverse(0), usize::MAX)
887 }
888}
889
890impl<'a> Iterator for SyntaxMapCaptures<'a> {
891 type Item = SyntaxMapCapture<'a>;
892
893 fn next(&mut self) -> Option<Self::Item> {
894 let result = self.peek();
895 self.advance();
896 result
897 }
898}
899
900fn join_ranges(
901 a: impl Iterator<Item = Range<usize>>,
902 b: impl Iterator<Item = Range<usize>>,
903) -> Vec<Range<usize>> {
904 let mut result = Vec::<Range<usize>>::new();
905 let mut a = a.peekable();
906 let mut b = b.peekable();
907 loop {
908 let range = match (a.peek(), b.peek()) {
909 (Some(range_a), Some(range_b)) => {
910 if range_a.start < range_b.start {
911 a.next().unwrap()
912 } else {
913 b.next().unwrap()
914 }
915 }
916 (None, Some(_)) => b.next().unwrap(),
917 (Some(_), None) => a.next().unwrap(),
918 (None, None) => break,
919 };
920
921 if let Some(last) = result.last_mut() {
922 if range.start <= last.end {
923 last.end = last.end.max(range.end);
924 continue;
925 }
926 }
927 result.push(range);
928 }
929 result
930}
931
932fn parse_text(
933 grammar: &Grammar,
934 text: &Rope,
935 start_byte: usize,
936 start_point: Point,
937 mut ranges: Vec<tree_sitter::Range>,
938 old_tree: Option<Tree>,
939) -> Tree {
940 for range in &mut ranges {
941 range.start_byte -= start_byte;
942 range.end_byte -= start_byte;
943 range.start_point = (Point::from_ts_point(range.start_point) - start_point).to_ts_point();
944 range.end_point = (Point::from_ts_point(range.end_point) - start_point).to_ts_point();
945 }
946
947 PARSER.with(|parser| {
948 let mut parser = parser.borrow_mut();
949 let mut chunks = text.chunks_in_range(start_byte..text.len());
950 parser
951 .set_included_ranges(&ranges)
952 .expect("overlapping ranges");
953 parser
954 .set_language(grammar.ts_language)
955 .expect("incompatible grammar");
956 parser
957 .parse_with(
958 &mut move |offset, _| {
959 chunks.seek(start_byte + offset);
960 chunks.next().unwrap_or("").as_bytes()
961 },
962 old_tree.as_ref(),
963 )
964 .expect("invalid language")
965 })
966}
967
968fn get_injections(
969 config: &InjectionConfig,
970 text: &BufferSnapshot,
971 node: Node,
972 language_registry: &LanguageRegistry,
973 depth: usize,
974 changed_ranges: &[Range<usize>],
975 combined_injection_ranges: &mut HashMap<Arc<Language>, Vec<tree_sitter::Range>>,
976 queue: &mut BinaryHeap<ParseStep>,
977) -> bool {
978 let mut result = false;
979 let mut query_cursor = QueryCursorHandle::new();
980 let mut prev_match = None;
981
982 combined_injection_ranges.clear();
983 for pattern in &config.patterns {
984 if let (Some(language_name), true) = (pattern.language.as_ref(), pattern.combined) {
985 if let Some(language) = language_registry.get_language(language_name) {
986 combined_injection_ranges.insert(language, Vec::new());
987 }
988 }
989 }
990
991 for query_range in changed_ranges {
992 query_cursor.set_byte_range(query_range.start.saturating_sub(1)..query_range.end);
993 for mat in query_cursor.matches(&config.query, node, TextProvider(text.as_rope())) {
994 let content_ranges = mat
995 .nodes_for_capture_index(config.content_capture_ix)
996 .map(|node| node.range())
997 .collect::<Vec<_>>();
998 if content_ranges.is_empty() {
999 continue;
1000 }
1001
1002 // Avoid duplicate matches if two changed ranges intersect the same injection.
1003 let content_range =
1004 content_ranges.first().unwrap().start_byte..content_ranges.last().unwrap().end_byte;
1005 if let Some((last_pattern_ix, last_range)) = &prev_match {
1006 if mat.pattern_index == *last_pattern_ix && content_range == *last_range {
1007 continue;
1008 }
1009 }
1010 prev_match = Some((mat.pattern_index, content_range.clone()));
1011
1012 let combined = config.patterns[mat.pattern_index].combined;
1013 let language_name = config.patterns[mat.pattern_index]
1014 .language
1015 .as_ref()
1016 .map(|s| Cow::Borrowed(s.as_ref()))
1017 .or_else(|| {
1018 let ix = config.language_capture_ix?;
1019 let node = mat.nodes_for_capture_index(ix).next()?;
1020 Some(Cow::Owned(text.text_for_range(node.byte_range()).collect()))
1021 });
1022
1023 if let Some(language_name) = language_name {
1024 if let Some(language) = language_registry.get_language(language_name.as_ref()) {
1025 result = true;
1026 let range = text.anchor_before(content_range.start)
1027 ..text.anchor_after(content_range.end);
1028 if combined {
1029 combined_injection_ranges
1030 .get_mut(&language.clone())
1031 .unwrap()
1032 .extend(content_ranges);
1033 } else {
1034 queue.push(ParseStep {
1035 depth,
1036 language,
1037 included_ranges: content_ranges,
1038 range,
1039 mode: ParseMode::Single,
1040 });
1041 }
1042 }
1043 }
1044 }
1045 }
1046
1047 for (language, mut included_ranges) in combined_injection_ranges.drain() {
1048 included_ranges.sort_unstable();
1049 let range = text.anchor_before(node.start_byte())..text.anchor_after(node.end_byte());
1050 queue.push(ParseStep {
1051 depth,
1052 language,
1053 range,
1054 included_ranges,
1055 mode: ParseMode::Combined {
1056 parent_layer_range: node.start_byte()..node.end_byte(),
1057 parent_layer_changed_ranges: changed_ranges.to_vec(),
1058 },
1059 })
1060 }
1061
1062 result
1063}
1064
1065fn splice_included_ranges(
1066 mut ranges: Vec<tree_sitter::Range>,
1067 changed_ranges: &[Range<usize>],
1068 new_ranges: &[tree_sitter::Range],
1069) -> Vec<tree_sitter::Range> {
1070 let mut changed_ranges = changed_ranges.into_iter().peekable();
1071 let mut new_ranges = new_ranges.into_iter().peekable();
1072 let mut ranges_ix = 0;
1073 loop {
1074 let new_range = new_ranges.peek();
1075 let mut changed_range = changed_ranges.peek();
1076
1077 // process changed ranges before any overlapping new ranges
1078 if let Some((changed, new)) = changed_range.zip(new_range) {
1079 if new.end_byte < changed.start {
1080 changed_range = None;
1081 }
1082 }
1083
1084 if let Some(changed) = changed_range {
1085 let mut start_ix = ranges_ix
1086 + match ranges[ranges_ix..].binary_search_by_key(&changed.start, |r| r.end_byte) {
1087 Ok(ix) | Err(ix) => ix,
1088 };
1089 let mut end_ix = ranges_ix
1090 + match ranges[ranges_ix..].binary_search_by_key(&changed.end, |r| r.start_byte) {
1091 Ok(ix) => ix + 1,
1092 Err(ix) => ix,
1093 };
1094
1095 // If there are empty ranges, then there may be multiple ranges with the same
1096 // start or end. Expand the splice to include any adjacent ranges. That touch
1097 // the changed range.
1098 while start_ix > 0 {
1099 if ranges[start_ix - 1].end_byte == changed.start {
1100 start_ix -= 1;
1101 } else {
1102 break;
1103 }
1104 }
1105 while let Some(range) = ranges.get(end_ix) {
1106 if range.start_byte == changed.end {
1107 end_ix += 1;
1108 } else {
1109 break;
1110 }
1111 }
1112
1113 if end_ix > start_ix {
1114 ranges.splice(start_ix..end_ix, []);
1115 }
1116 changed_ranges.next();
1117 ranges_ix = start_ix;
1118 } else if let Some(new_range) = new_range {
1119 let ix = ranges_ix
1120 + match ranges[ranges_ix..]
1121 .binary_search_by_key(&new_range.start_byte, |r| r.start_byte)
1122 {
1123 Ok(ix) | Err(ix) => ix,
1124 };
1125 ranges.insert(ix, **new_range);
1126 new_ranges.next();
1127 ranges_ix = ix + 1;
1128 } else {
1129 break;
1130 }
1131 }
1132 ranges
1133}
1134
1135impl std::ops::Deref for SyntaxMap {
1136 type Target = SyntaxSnapshot;
1137
1138 fn deref(&self) -> &Self::Target {
1139 &self.snapshot
1140 }
1141}
1142
1143impl PartialEq for ParseStep {
1144 fn eq(&self, _: &Self) -> bool {
1145 false
1146 }
1147}
1148
1149impl Eq for ParseStep {}
1150
1151impl PartialOrd for ParseStep {
1152 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1153 Some(self.cmp(&other))
1154 }
1155}
1156
1157impl Ord for ParseStep {
1158 fn cmp(&self, other: &Self) -> Ordering {
1159 let range_a = self.range();
1160 let range_b = other.range();
1161 Ord::cmp(&other.depth, &self.depth)
1162 .then_with(|| Ord::cmp(&range_b.start, &range_a.start))
1163 .then_with(|| Ord::cmp(&range_a.end, &range_b.end))
1164 .then_with(|| self.language.id().cmp(&other.language.id()))
1165 }
1166}
1167
1168impl ParseStep {
1169 fn range(&self) -> Range<usize> {
1170 if let ParseMode::Combined {
1171 parent_layer_range, ..
1172 } = &self.mode
1173 {
1174 parent_layer_range.clone()
1175 } else {
1176 let start = self.included_ranges.first().map_or(0, |r| r.start_byte);
1177 let end = self.included_ranges.last().map_or(0, |r| r.end_byte);
1178 start..end
1179 }
1180 }
1181}
1182
1183impl ChangedRegion {
1184 fn cmp(&self, other: &Self, buffer: &BufferSnapshot) -> Ordering {
1185 let range_a = &self.range;
1186 let range_b = &other.range;
1187 Ord::cmp(&self.depth, &other.depth)
1188 .then_with(|| range_a.start.cmp(&range_b.start, buffer))
1189 .then_with(|| range_b.end.cmp(&range_a.end, buffer))
1190 }
1191}
1192
1193impl ChangeRegionSet {
1194 fn start_position(&self) -> DepthAndMaxPosition {
1195 self.0
1196 .first()
1197 .map_or(DepthAndMaxPosition(usize::MAX, Anchor::MAX), |region| {
1198 DepthAndMaxPosition(region.depth, region.range.start)
1199 })
1200 }
1201
1202 fn intersects(&self, layer: &SyntaxLayer, text: &BufferSnapshot) -> bool {
1203 for region in &self.0 {
1204 if region.depth < layer.depth {
1205 continue;
1206 }
1207 if region.depth > layer.depth {
1208 break;
1209 }
1210 if region.range.end.cmp(&layer.range.start, text).is_le() {
1211 continue;
1212 }
1213 if region.range.start.cmp(&layer.range.end, text).is_ge() {
1214 break;
1215 }
1216 return true;
1217 }
1218 false
1219 }
1220
1221 fn insert(&mut self, region: ChangedRegion, text: &BufferSnapshot) {
1222 if let Err(ix) = self.0.binary_search_by(|probe| probe.cmp(®ion, text)) {
1223 self.0.insert(ix, region);
1224 }
1225 }
1226
1227 fn prune(&mut self, summary: SyntaxLayerSummary, text: &BufferSnapshot) -> bool {
1228 let prev_len = self.0.len();
1229 self.0.retain(|region| {
1230 region.depth > summary.max_depth
1231 || (region.depth == summary.max_depth
1232 && region
1233 .range
1234 .end
1235 .cmp(&summary.last_layer_range.start, text)
1236 .is_gt())
1237 });
1238 self.0.len() < prev_len
1239 }
1240}
1241
1242impl Default for SyntaxLayerSummary {
1243 fn default() -> Self {
1244 Self {
1245 max_depth: 0,
1246 min_depth: 0,
1247 range: Anchor::MAX..Anchor::MIN,
1248 last_layer_range: Anchor::MIN..Anchor::MAX,
1249 last_layer_language: None,
1250 }
1251 }
1252}
1253
1254impl sum_tree::Summary for SyntaxLayerSummary {
1255 type Context = BufferSnapshot;
1256
1257 fn add_summary(&mut self, other: &Self, buffer: &Self::Context) {
1258 if other.max_depth > self.max_depth {
1259 self.max_depth = other.max_depth;
1260 self.range = other.range.clone();
1261 } else {
1262 if other.range.start.cmp(&self.range.start, buffer).is_lt() {
1263 self.range.start = other.range.start;
1264 }
1265 if other.range.end.cmp(&self.range.end, buffer).is_gt() {
1266 self.range.end = other.range.end;
1267 }
1268 }
1269 self.last_layer_range = other.last_layer_range.clone();
1270 self.last_layer_language = other.last_layer_language;
1271 }
1272}
1273
1274impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for SyntaxLayerPosition {
1275 fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering {
1276 Ord::cmp(&self.depth, &cursor_location.max_depth)
1277 .then_with(|| {
1278 self.range
1279 .start
1280 .cmp(&cursor_location.last_layer_range.start, buffer)
1281 })
1282 .then_with(|| {
1283 cursor_location
1284 .last_layer_range
1285 .end
1286 .cmp(&self.range.end, buffer)
1287 })
1288 .then_with(|| self.language.cmp(&cursor_location.last_layer_language))
1289 }
1290}
1291
1292impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndMaxPosition {
1293 fn cmp(&self, cursor_location: &SyntaxLayerSummary, text: &BufferSnapshot) -> Ordering {
1294 Ord::cmp(&self.0, &cursor_location.max_depth)
1295 .then_with(|| self.1.cmp(&cursor_location.range.end, text))
1296 }
1297}
1298
1299impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary>
1300 for SyntaxLayerPositionBeforeChange
1301{
1302 fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering {
1303 if self.change.cmp(cursor_location, buffer).is_le() {
1304 return Ordering::Less;
1305 } else {
1306 self.position.cmp(cursor_location, buffer)
1307 }
1308 }
1309}
1310
1311impl sum_tree::Item for SyntaxLayer {
1312 type Summary = SyntaxLayerSummary;
1313
1314 fn summary(&self) -> Self::Summary {
1315 SyntaxLayerSummary {
1316 min_depth: self.depth,
1317 max_depth: self.depth,
1318 range: self.range.clone(),
1319 last_layer_range: self.range.clone(),
1320 last_layer_language: self.language.id(),
1321 }
1322 }
1323}
1324
1325impl std::fmt::Debug for SyntaxLayer {
1326 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1327 f.debug_struct("SyntaxLayer")
1328 .field("depth", &self.depth)
1329 .field("range", &self.range)
1330 .field("tree", &self.tree)
1331 .finish()
1332 }
1333}
1334
1335impl<'a> tree_sitter::TextProvider<'a> for TextProvider<'a> {
1336 type I = ByteChunks<'a>;
1337
1338 fn text(&mut self, node: tree_sitter::Node) -> Self::I {
1339 ByteChunks(self.0.chunks_in_range(node.byte_range()))
1340 }
1341}
1342
1343impl<'a> Iterator for ByteChunks<'a> {
1344 type Item = &'a [u8];
1345
1346 fn next(&mut self) -> Option<Self::Item> {
1347 self.0.next().map(str::as_bytes)
1348 }
1349}
1350
1351impl QueryCursorHandle {
1352 pub(crate) fn new() -> Self {
1353 let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_else(QueryCursor::new);
1354 cursor.set_match_limit(64);
1355 QueryCursorHandle(Some(cursor))
1356 }
1357}
1358
1359impl Deref for QueryCursorHandle {
1360 type Target = QueryCursor;
1361
1362 fn deref(&self) -> &Self::Target {
1363 self.0.as_ref().unwrap()
1364 }
1365}
1366
1367impl DerefMut for QueryCursorHandle {
1368 fn deref_mut(&mut self) -> &mut Self::Target {
1369 self.0.as_mut().unwrap()
1370 }
1371}
1372
1373impl Drop for QueryCursorHandle {
1374 fn drop(&mut self) {
1375 let mut cursor = self.0.take().unwrap();
1376 cursor.set_byte_range(0..usize::MAX);
1377 cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point());
1378 QUERY_CURSORS.lock().push(cursor)
1379 }
1380}
1381
1382pub(crate) trait ToTreeSitterPoint {
1383 fn to_ts_point(self) -> tree_sitter::Point;
1384 fn from_ts_point(point: tree_sitter::Point) -> Self;
1385}
1386
1387impl ToTreeSitterPoint for Point {
1388 fn to_ts_point(self) -> tree_sitter::Point {
1389 tree_sitter::Point::new(self.row as usize, self.column as usize)
1390 }
1391
1392 fn from_ts_point(point: tree_sitter::Point) -> Self {
1393 Point::new(point.row as u32, point.column as u32)
1394 }
1395}
1396
1397#[cfg(test)]
1398mod tests {
1399 use super::*;
1400 use crate::LanguageConfig;
1401 use rand::rngs::StdRng;
1402 use std::env;
1403 use text::Buffer;
1404 use unindent::Unindent as _;
1405 use util::test::marked_text_ranges;
1406
1407 #[test]
1408 fn test_splice_included_ranges() {
1409 let ranges = vec![ts_range(20..30), ts_range(50..60), ts_range(80..90)];
1410
1411 let new_ranges = splice_included_ranges(
1412 ranges.clone(),
1413 &[54..56, 58..68],
1414 &[ts_range(50..54), ts_range(59..67)],
1415 );
1416 assert_eq!(
1417 new_ranges,
1418 &[
1419 ts_range(20..30),
1420 ts_range(50..54),
1421 ts_range(59..67),
1422 ts_range(80..90),
1423 ]
1424 );
1425
1426 let new_ranges = splice_included_ranges(ranges.clone(), &[70..71, 91..100], &[]);
1427 assert_eq!(
1428 new_ranges,
1429 &[ts_range(20..30), ts_range(50..60), ts_range(80..90)]
1430 );
1431
1432 let new_ranges =
1433 splice_included_ranges(ranges.clone(), &[], &[ts_range(0..2), ts_range(70..75)]);
1434 assert_eq!(
1435 new_ranges,
1436 &[
1437 ts_range(0..2),
1438 ts_range(20..30),
1439 ts_range(50..60),
1440 ts_range(70..75),
1441 ts_range(80..90)
1442 ]
1443 );
1444
1445 let new_ranges = splice_included_ranges(ranges.clone(), &[30..50], &[ts_range(25..55)]);
1446 assert_eq!(new_ranges, &[ts_range(25..55), ts_range(80..90)]);
1447
1448 fn ts_range(range: Range<usize>) -> tree_sitter::Range {
1449 tree_sitter::Range {
1450 start_byte: range.start,
1451 start_point: tree_sitter::Point {
1452 row: 0,
1453 column: range.start,
1454 },
1455 end_byte: range.end,
1456 end_point: tree_sitter::Point {
1457 row: 0,
1458 column: range.end,
1459 },
1460 }
1461 }
1462 }
1463
1464 #[gpui::test]
1465 fn test_syntax_map_layers_for_range() {
1466 let registry = Arc::new(LanguageRegistry::test());
1467 let language = Arc::new(rust_lang());
1468 registry.add(language.clone());
1469
1470 let mut buffer = Buffer::new(
1471 0,
1472 0,
1473 r#"
1474 fn a() {
1475 assert_eq!(
1476 b(vec![C {}]),
1477 vec![d.e],
1478 );
1479 println!("{}", f(|_| true));
1480 }
1481 "#
1482 .unindent(),
1483 );
1484
1485 let mut syntax_map = SyntaxMap::new();
1486 syntax_map.set_language_registry(registry.clone());
1487 syntax_map.reparse(language.clone(), &buffer);
1488
1489 assert_layers_for_range(
1490 &syntax_map,
1491 &buffer,
1492 Point::new(2, 0)..Point::new(2, 0),
1493 &[
1494 "...(function_item ... (block (expression_statement (macro_invocation...",
1495 "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...",
1496 ],
1497 );
1498 assert_layers_for_range(
1499 &syntax_map,
1500 &buffer,
1501 Point::new(2, 14)..Point::new(2, 16),
1502 &[
1503 "...(function_item ...",
1504 "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...",
1505 "...(array_expression (struct_expression ...",
1506 ],
1507 );
1508 assert_layers_for_range(
1509 &syntax_map,
1510 &buffer,
1511 Point::new(3, 14)..Point::new(3, 16),
1512 &[
1513 "...(function_item ...",
1514 "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...",
1515 "...(array_expression (field_expression ...",
1516 ],
1517 );
1518 assert_layers_for_range(
1519 &syntax_map,
1520 &buffer,
1521 Point::new(5, 12)..Point::new(5, 16),
1522 &[
1523 "...(function_item ...",
1524 "...(call_expression ... (arguments (closure_expression ...",
1525 ],
1526 );
1527
1528 // Replace a vec! macro invocation with a plain slice, removing a syntactic layer.
1529 let macro_name_range = range_for_text(&buffer, "vec!");
1530 buffer.edit([(macro_name_range, "&")]);
1531 syntax_map.interpolate(&buffer);
1532 syntax_map.reparse(language.clone(), &buffer);
1533
1534 assert_layers_for_range(
1535 &syntax_map,
1536 &buffer,
1537 Point::new(2, 14)..Point::new(2, 16),
1538 &[
1539 "...(function_item ...",
1540 "...(tuple_expression (call_expression ... arguments: (arguments (reference_expression value: (array_expression...",
1541 ],
1542 );
1543
1544 // Put the vec! macro back, adding back the syntactic layer.
1545 buffer.undo();
1546 syntax_map.interpolate(&buffer);
1547 syntax_map.reparse(language.clone(), &buffer);
1548
1549 assert_layers_for_range(
1550 &syntax_map,
1551 &buffer,
1552 Point::new(2, 14)..Point::new(2, 16),
1553 &[
1554 "...(function_item ...",
1555 "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...",
1556 "...(array_expression (struct_expression ...",
1557 ],
1558 );
1559 }
1560
1561 #[gpui::test]
1562 fn test_typing_multiple_new_injections() {
1563 let (buffer, syntax_map) = test_edit_sequence(
1564 "Rust",
1565 &[
1566 "fn a() { dbg }",
1567 "fn a() { dbg«!» }",
1568 "fn a() { dbg!«()» }",
1569 "fn a() { dbg!(«b») }",
1570 "fn a() { dbg!(b«.») }",
1571 "fn a() { dbg!(b.«c») }",
1572 "fn a() { dbg!(b.c«()») }",
1573 "fn a() { dbg!(b.c(«vec»)) }",
1574 "fn a() { dbg!(b.c(vec«!»)) }",
1575 "fn a() { dbg!(b.c(vec!«[]»)) }",
1576 "fn a() { dbg!(b.c(vec![«d»])) }",
1577 "fn a() { dbg!(b.c(vec![d«.»])) }",
1578 "fn a() { dbg!(b.c(vec![d.«e»])) }",
1579 ],
1580 );
1581
1582 assert_capture_ranges(
1583 &syntax_map,
1584 &buffer,
1585 &["field"],
1586 "fn a() { dbg!(b.«c»(vec![d.«e»])) }",
1587 );
1588 }
1589
1590 #[gpui::test]
1591 fn test_pasting_new_injection_line_between_others() {
1592 let (buffer, syntax_map) = test_edit_sequence(
1593 "Rust",
1594 &[
1595 "
1596 fn a() {
1597 b!(B {});
1598 c!(C {});
1599 d!(D {});
1600 e!(E {});
1601 f!(F {});
1602 g!(G {});
1603 }
1604 ",
1605 "
1606 fn a() {
1607 b!(B {});
1608 c!(C {});
1609 d!(D {});
1610 « h!(H {});
1611 » e!(E {});
1612 f!(F {});
1613 g!(G {});
1614 }
1615 ",
1616 ],
1617 );
1618
1619 assert_capture_ranges(
1620 &syntax_map,
1621 &buffer,
1622 &["struct"],
1623 "
1624 fn a() {
1625 b!(«B {}»);
1626 c!(«C {}»);
1627 d!(«D {}»);
1628 h!(«H {}»);
1629 e!(«E {}»);
1630 f!(«F {}»);
1631 g!(«G {}»);
1632 }
1633 ",
1634 );
1635 }
1636
1637 #[gpui::test]
1638 fn test_joining_injections_with_child_injections() {
1639 let (buffer, syntax_map) = test_edit_sequence(
1640 "Rust",
1641 &[
1642 "
1643 fn a() {
1644 b!(
1645 c![one.two.three],
1646 d![four.five.six],
1647 );
1648 e!(
1649 f![seven.eight],
1650 );
1651 }
1652 ",
1653 "
1654 fn a() {
1655 b!(
1656 c![one.two.three],
1657 d![four.five.six],
1658 ˇ f![seven.eight],
1659 );
1660 }
1661 ",
1662 ],
1663 );
1664
1665 assert_capture_ranges(
1666 &syntax_map,
1667 &buffer,
1668 &["field"],
1669 "
1670 fn a() {
1671 b!(
1672 c![one.«two».«three»],
1673 d![four.«five».«six»],
1674 f![seven.«eight»],
1675 );
1676 }
1677 ",
1678 );
1679 }
1680
1681 #[gpui::test]
1682 fn test_editing_edges_of_injection() {
1683 test_edit_sequence(
1684 "Rust",
1685 &[
1686 "
1687 fn a() {
1688 b!(c!())
1689 }
1690 ",
1691 "
1692 fn a() {
1693 «d»!(c!())
1694 }
1695 ",
1696 "
1697 fn a() {
1698 «e»d!(c!())
1699 }
1700 ",
1701 "
1702 fn a() {
1703 ed!«[»c!()«]»
1704 }
1705 ",
1706 ],
1707 );
1708 }
1709
1710 #[gpui::test]
1711 fn test_edits_preceding_and_intersecting_injection() {
1712 test_edit_sequence(
1713 "Rust",
1714 &[
1715 //
1716 "const aaaaaaaaaaaa: B = c!(d(e.f));",
1717 "const aˇa: B = c!(d(eˇ));",
1718 ],
1719 );
1720 }
1721
1722 #[gpui::test]
1723 fn test_non_local_changes_create_injections() {
1724 test_edit_sequence(
1725 "Rust",
1726 &[
1727 "
1728 // a! {
1729 static B: C = d;
1730 // }
1731 ",
1732 "
1733 ˇa! {
1734 static B: C = d;
1735 ˇ}
1736 ",
1737 ],
1738 );
1739 }
1740
1741 #[gpui::test]
1742 fn test_creating_many_injections_in_one_edit() {
1743 test_edit_sequence(
1744 "Rust",
1745 &[
1746 "
1747 fn a() {
1748 one(Two::three(3));
1749 four(Five::six(6));
1750 seven(Eight::nine(9));
1751 }
1752 ",
1753 "
1754 fn a() {
1755 one«!»(Two::three(3));
1756 four«!»(Five::six(6));
1757 seven«!»(Eight::nine(9));
1758 }
1759 ",
1760 "
1761 fn a() {
1762 one!(Two::three«!»(3));
1763 four!(Five::six«!»(6));
1764 seven!(Eight::nine«!»(9));
1765 }
1766 ",
1767 ],
1768 );
1769 }
1770
1771 #[gpui::test]
1772 fn test_editing_across_injection_boundary() {
1773 test_edit_sequence(
1774 "Rust",
1775 &[
1776 "
1777 fn one() {
1778 two();
1779 three!(
1780 three.four,
1781 five.six,
1782 );
1783 }
1784 ",
1785 "
1786 fn one() {
1787 two();
1788 th«irty_five![»
1789 three.four,
1790 five.six,
1791 « seven.eight,
1792 ];»
1793 }
1794 ",
1795 ],
1796 );
1797 }
1798
1799 #[gpui::test]
1800 fn test_removing_injection_by_replacing_across_boundary() {
1801 test_edit_sequence(
1802 "Rust",
1803 &[
1804 "
1805 fn one() {
1806 two!(
1807 three.four,
1808 );
1809 }
1810 ",
1811 "
1812 fn one() {
1813 t«en
1814 .eleven(
1815 twelve,
1816 »
1817 three.four,
1818 );
1819 }
1820 ",
1821 ],
1822 );
1823 }
1824
1825 #[gpui::test]
1826 fn test_combined_injections() {
1827 let (buffer, syntax_map) = test_edit_sequence(
1828 "ERB",
1829 &[
1830 "
1831 <body>
1832 <% if @one %>
1833 <div class=one>
1834 <% else %>
1835 <div class=two>
1836 <% end %>
1837 </div>
1838 </body>
1839 ",
1840 "
1841 <body>
1842 <% if @one %>
1843 <div class=one>
1844 ˇ else ˇ
1845 <div class=two>
1846 <% end %>
1847 </div>
1848 </body>
1849 ",
1850 "
1851 <body>
1852 <% if @one «;» end %>
1853 </div>
1854 </body>
1855 ",
1856 ],
1857 );
1858
1859 assert_capture_ranges(
1860 &syntax_map,
1861 &buffer,
1862 &["tag", "ivar"],
1863 "
1864 <«body»>
1865 <% if «@one» ; end %>
1866 </«div»>
1867 </«body»>
1868 ",
1869 );
1870 }
1871
1872 #[gpui::test]
1873 fn test_combined_injections_empty_ranges() {
1874 test_edit_sequence(
1875 "ERB",
1876 &[
1877 "
1878 <% if @one %>
1879 <% else %>
1880 <% end %>
1881 ",
1882 "
1883 <% if @one %>
1884 ˇ<% end %>
1885 ",
1886 ],
1887 );
1888 }
1889
1890 #[gpui::test(iterations = 100)]
1891 fn test_random_syntax_map_edits(mut rng: StdRng) {
1892 let operations = env::var("OPERATIONS")
1893 .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1894 .unwrap_or(10);
1895
1896 let text = r#"
1897 fn test_something() {
1898 let vec = vec![5, 1, 3, 8];
1899 assert_eq!(
1900 vec
1901 .into_iter()
1902 .map(|i| i * 2)
1903 .collect::<Vec<usize>>(),
1904 vec![
1905 5 * 2, 1 * 2, 3 * 2, 8 * 2
1906 ],
1907 );
1908 }
1909 "#
1910 .unindent()
1911 .repeat(2);
1912
1913 let registry = Arc::new(LanguageRegistry::test());
1914 let language = Arc::new(rust_lang());
1915 registry.add(language.clone());
1916 let mut buffer = Buffer::new(0, 0, text);
1917
1918 let mut syntax_map = SyntaxMap::new();
1919 syntax_map.set_language_registry(registry.clone());
1920 syntax_map.reparse(language.clone(), &buffer);
1921
1922 let mut reference_syntax_map = SyntaxMap::new();
1923 reference_syntax_map.set_language_registry(registry.clone());
1924
1925 log::info!("initial text:\n{}", buffer.text());
1926
1927 for _ in 0..operations {
1928 let prev_buffer = buffer.snapshot();
1929 let prev_syntax_map = syntax_map.snapshot();
1930
1931 buffer.randomly_edit(&mut rng, 3);
1932 log::info!("text:\n{}", buffer.text());
1933
1934 syntax_map.interpolate(&buffer);
1935 check_interpolation(&prev_syntax_map, &syntax_map, &prev_buffer, &buffer);
1936
1937 syntax_map.reparse(language.clone(), &buffer);
1938
1939 reference_syntax_map.clear();
1940 reference_syntax_map.reparse(language.clone(), &buffer);
1941 }
1942
1943 for i in 0..operations {
1944 let i = operations - i - 1;
1945 buffer.undo();
1946 log::info!("undoing operation {}", i);
1947 log::info!("text:\n{}", buffer.text());
1948
1949 syntax_map.interpolate(&buffer);
1950 syntax_map.reparse(language.clone(), &buffer);
1951
1952 reference_syntax_map.clear();
1953 reference_syntax_map.reparse(language.clone(), &buffer);
1954 assert_eq!(
1955 syntax_map.layers(&buffer).len(),
1956 reference_syntax_map.layers(&buffer).len(),
1957 "wrong number of layers after undoing edit {i}"
1958 );
1959 }
1960
1961 let layers = syntax_map.layers(&buffer);
1962 let reference_layers = reference_syntax_map.layers(&buffer);
1963 for (edited_layer, reference_layer) in layers.into_iter().zip(reference_layers.into_iter())
1964 {
1965 assert_eq!(edited_layer.node.to_sexp(), reference_layer.node.to_sexp());
1966 assert_eq!(edited_layer.node.range(), reference_layer.node.range());
1967 }
1968 }
1969
1970 fn check_interpolation(
1971 old_syntax_map: &SyntaxSnapshot,
1972 new_syntax_map: &SyntaxSnapshot,
1973 old_buffer: &BufferSnapshot,
1974 new_buffer: &BufferSnapshot,
1975 ) {
1976 let edits = new_buffer
1977 .edits_since::<usize>(&old_buffer.version())
1978 .collect::<Vec<_>>();
1979
1980 for (old_layer, new_layer) in old_syntax_map
1981 .layers
1982 .iter()
1983 .zip(new_syntax_map.layers.iter())
1984 {
1985 assert_eq!(old_layer.range, new_layer.range);
1986 let old_start_byte = old_layer.range.start.to_offset(old_buffer);
1987 let new_start_byte = new_layer.range.start.to_offset(new_buffer);
1988 let old_start_point = old_layer.range.start.to_point(old_buffer).to_ts_point();
1989 let new_start_point = new_layer.range.start.to_point(new_buffer).to_ts_point();
1990 let old_node = old_layer
1991 .tree
1992 .root_node_with_offset(old_start_byte, old_start_point);
1993 let new_node = new_layer
1994 .tree
1995 .root_node_with_offset(new_start_byte, new_start_point);
1996 check_node_edits(
1997 old_layer.depth,
1998 &old_layer.range,
1999 old_node,
2000 new_node,
2001 old_buffer,
2002 new_buffer,
2003 &edits,
2004 );
2005 }
2006
2007 fn check_node_edits(
2008 depth: usize,
2009 range: &Range<Anchor>,
2010 old_node: Node,
2011 new_node: Node,
2012 old_buffer: &BufferSnapshot,
2013 new_buffer: &BufferSnapshot,
2014 edits: &[text::Edit<usize>],
2015 ) {
2016 assert_eq!(old_node.kind(), new_node.kind());
2017
2018 let old_range = old_node.byte_range();
2019 let new_range = new_node.byte_range();
2020
2021 let is_edited = edits
2022 .iter()
2023 .any(|edit| edit.new.start < new_range.end && edit.new.end > new_range.start);
2024 if is_edited {
2025 assert!(
2026 new_node.has_changes(),
2027 concat!(
2028 "failed to mark node as edited.\n",
2029 "layer depth: {}, old layer range: {:?}, new layer range: {:?},\n",
2030 "node kind: {}, old node range: {:?}, new node range: {:?}",
2031 ),
2032 depth,
2033 range.to_offset(old_buffer),
2034 range.to_offset(new_buffer),
2035 new_node.kind(),
2036 old_range,
2037 new_range,
2038 );
2039 }
2040
2041 if !new_node.has_changes() {
2042 assert_eq!(
2043 old_buffer
2044 .text_for_range(old_range.clone())
2045 .collect::<String>(),
2046 new_buffer
2047 .text_for_range(new_range.clone())
2048 .collect::<String>(),
2049 concat!(
2050 "mismatched text for node\n",
2051 "layer depth: {}, old layer range: {:?}, new layer range: {:?},\n",
2052 "node kind: {}, old node range:{:?}, new node range:{:?}",
2053 ),
2054 depth,
2055 range.to_offset(old_buffer),
2056 range.to_offset(new_buffer),
2057 new_node.kind(),
2058 old_range,
2059 new_range,
2060 );
2061 }
2062
2063 for i in 0..new_node.child_count() {
2064 check_node_edits(
2065 depth,
2066 range,
2067 old_node.child(i).unwrap(),
2068 new_node.child(i).unwrap(),
2069 old_buffer,
2070 new_buffer,
2071 edits,
2072 )
2073 }
2074 }
2075 }
2076
2077 fn test_edit_sequence(language_name: &str, steps: &[&str]) -> (Buffer, SyntaxMap) {
2078 let registry = Arc::new(LanguageRegistry::test());
2079 registry.add(Arc::new(rust_lang()));
2080 registry.add(Arc::new(ruby_lang()));
2081 registry.add(Arc::new(html_lang()));
2082 registry.add(Arc::new(erb_lang()));
2083 let language = registry.get_language(language_name).unwrap();
2084 let mut buffer = Buffer::new(0, 0, Default::default());
2085
2086 let mut mutated_syntax_map = SyntaxMap::new();
2087 mutated_syntax_map.set_language_registry(registry.clone());
2088 mutated_syntax_map.reparse(language.clone(), &buffer);
2089
2090 for (i, marked_string) in steps.into_iter().enumerate() {
2091 edit_buffer(&mut buffer, &marked_string.unindent());
2092
2093 // Reparse the syntax map
2094 mutated_syntax_map.interpolate(&buffer);
2095 mutated_syntax_map.reparse(language.clone(), &buffer);
2096
2097 // Create a second syntax map from scratch
2098 let mut reference_syntax_map = SyntaxMap::new();
2099 reference_syntax_map.set_language_registry(registry.clone());
2100 reference_syntax_map.reparse(language.clone(), &buffer);
2101
2102 // Compare the mutated syntax map to the new syntax map
2103 let mutated_layers = mutated_syntax_map.layers(&buffer);
2104 let reference_layers = reference_syntax_map.layers(&buffer);
2105 assert_eq!(
2106 mutated_layers.len(),
2107 reference_layers.len(),
2108 "wrong number of layers at step {i}"
2109 );
2110 for (edited_layer, reference_layer) in
2111 mutated_layers.into_iter().zip(reference_layers.into_iter())
2112 {
2113 assert_eq!(
2114 edited_layer.node.to_sexp(),
2115 reference_layer.node.to_sexp(),
2116 "different layer at step {i}"
2117 );
2118 assert_eq!(
2119 edited_layer.node.range(),
2120 reference_layer.node.range(),
2121 "different layer at step {i}"
2122 );
2123 }
2124 }
2125
2126 (buffer, mutated_syntax_map)
2127 }
2128
2129 fn html_lang() -> Language {
2130 Language::new(
2131 LanguageConfig {
2132 name: "HTML".into(),
2133 path_suffixes: vec!["html".to_string()],
2134 ..Default::default()
2135 },
2136 Some(tree_sitter_html::language()),
2137 )
2138 .with_highlights_query(
2139 r#"
2140 (tag_name) @tag
2141 (erroneous_end_tag_name) @tag
2142 (attribute_name) @property
2143 "#,
2144 )
2145 .unwrap()
2146 }
2147
2148 fn ruby_lang() -> Language {
2149 Language::new(
2150 LanguageConfig {
2151 name: "Ruby".into(),
2152 path_suffixes: vec!["rb".to_string()],
2153 ..Default::default()
2154 },
2155 Some(tree_sitter_ruby::language()),
2156 )
2157 .with_highlights_query(
2158 r#"
2159 ["if" "do" "else" "end"] @keyword
2160 (instance_variable) @ivar
2161 "#,
2162 )
2163 .unwrap()
2164 }
2165
2166 fn erb_lang() -> Language {
2167 Language::new(
2168 LanguageConfig {
2169 name: "ERB".into(),
2170 path_suffixes: vec!["erb".to_string()],
2171 ..Default::default()
2172 },
2173 Some(tree_sitter_embedded_template::language()),
2174 )
2175 .with_highlights_query(
2176 r#"
2177 ["<%" "%>"] @keyword
2178 "#,
2179 )
2180 .unwrap()
2181 .with_injection_query(
2182 r#"
2183 ((code) @content
2184 (#set! "language" "ruby")
2185 (#set! "combined"))
2186
2187 ((content) @content
2188 (#set! "language" "html")
2189 (#set! "combined"))
2190 "#,
2191 )
2192 .unwrap()
2193 }
2194
2195 fn rust_lang() -> Language {
2196 Language::new(
2197 LanguageConfig {
2198 name: "Rust".into(),
2199 path_suffixes: vec!["rs".to_string()],
2200 ..Default::default()
2201 },
2202 Some(tree_sitter_rust::language()),
2203 )
2204 .with_highlights_query(
2205 r#"
2206 (field_identifier) @field
2207 (struct_expression) @struct
2208 "#,
2209 )
2210 .unwrap()
2211 .with_injection_query(
2212 r#"
2213 (macro_invocation
2214 (token_tree) @content
2215 (#set! "language" "rust"))
2216 "#,
2217 )
2218 .unwrap()
2219 }
2220
2221 fn range_for_text(buffer: &Buffer, text: &str) -> Range<usize> {
2222 let start = buffer.as_rope().to_string().find(text).unwrap();
2223 start..start + text.len()
2224 }
2225
2226 fn assert_layers_for_range(
2227 syntax_map: &SyntaxMap,
2228 buffer: &BufferSnapshot,
2229 range: Range<Point>,
2230 expected_layers: &[&str],
2231 ) {
2232 let layers = syntax_map
2233 .layers_for_range(range, &buffer)
2234 .collect::<Vec<_>>();
2235 assert_eq!(
2236 layers.len(),
2237 expected_layers.len(),
2238 "wrong number of layers"
2239 );
2240 for (i, (SyntaxLayerInfo { node, .. }, expected_s_exp)) in
2241 layers.iter().zip(expected_layers.iter()).enumerate()
2242 {
2243 let actual_s_exp = node.to_sexp();
2244 assert!(
2245 string_contains_sequence(
2246 &actual_s_exp,
2247 &expected_s_exp.split("...").collect::<Vec<_>>()
2248 ),
2249 "layer {i}:\n\nexpected: {expected_s_exp}\nactual: {actual_s_exp}",
2250 );
2251 }
2252 }
2253
2254 fn assert_capture_ranges(
2255 syntax_map: &SyntaxMap,
2256 buffer: &BufferSnapshot,
2257 highlight_query_capture_names: &[&str],
2258 marked_string: &str,
2259 ) {
2260 let mut actual_ranges = Vec::<Range<usize>>::new();
2261 let captures = syntax_map.captures(0..buffer.len(), buffer, |grammar| {
2262 grammar.highlights_query.as_ref()
2263 });
2264 let queries = captures
2265 .grammars()
2266 .iter()
2267 .map(|grammar| grammar.highlights_query.as_ref().unwrap())
2268 .collect::<Vec<_>>();
2269 for capture in captures {
2270 let name = &queries[capture.grammar_index].capture_names()[capture.index as usize];
2271 if highlight_query_capture_names.contains(&name.as_str()) {
2272 actual_ranges.push(capture.node.byte_range());
2273 }
2274 }
2275
2276 let (text, expected_ranges) = marked_text_ranges(&marked_string.unindent(), false);
2277 assert_eq!(text, buffer.text());
2278 assert_eq!(actual_ranges, expected_ranges);
2279 }
2280
2281 fn edit_buffer(buffer: &mut Buffer, marked_string: &str) {
2282 let old_text = buffer.text();
2283 let (new_text, mut ranges) = marked_text_ranges(marked_string, false);
2284 if ranges.is_empty() {
2285 ranges.push(0..new_text.len());
2286 }
2287
2288 assert_eq!(
2289 old_text[..ranges[0].start],
2290 new_text[..ranges[0].start],
2291 "invalid edit"
2292 );
2293
2294 let mut delta = 0;
2295 let mut edits = Vec::new();
2296 let mut ranges = ranges.into_iter().peekable();
2297
2298 while let Some(inserted_range) = ranges.next() {
2299 let new_start = inserted_range.start;
2300 let old_start = (new_start as isize - delta) as usize;
2301
2302 let following_text = if let Some(next_range) = ranges.peek() {
2303 &new_text[inserted_range.end..next_range.start]
2304 } else {
2305 &new_text[inserted_range.end..]
2306 };
2307
2308 let inserted_len = inserted_range.len();
2309 let deleted_len = old_text[old_start..]
2310 .find(following_text)
2311 .expect("invalid edit");
2312
2313 let old_range = old_start..old_start + deleted_len;
2314 edits.push((old_range, new_text[inserted_range].to_string()));
2315 delta += inserted_len as isize - deleted_len as isize;
2316 }
2317
2318 assert_eq!(
2319 old_text.len() as isize + delta,
2320 new_text.len() as isize,
2321 "invalid edit"
2322 );
2323
2324 buffer.edit(edits);
2325 }
2326
2327 pub fn string_contains_sequence(text: &str, parts: &[&str]) -> bool {
2328 let mut last_part_end = 0;
2329 for part in parts {
2330 if let Some(start_ix) = text[last_part_end..].find(part) {
2331 last_part_end = start_ix + part.len();
2332 } else {
2333 return false;
2334 }
2335 }
2336 true
2337 }
2338}