1use crate::{Grammar, InjectionConfig, Language, LanguageRegistry};
2use collections::HashMap;
3use lazy_static::lazy_static;
4use parking_lot::Mutex;
5use std::{
6 borrow::Cow,
7 cell::RefCell,
8 cmp::{Ordering, Reverse},
9 collections::BinaryHeap,
10 ops::{Deref, DerefMut, Range},
11 sync::Arc,
12};
13use sum_tree::{Bias, SeekTarget, SumTree};
14use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint};
15use tree_sitter::{
16 Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree,
17};
18
19thread_local! {
20 static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
21}
22
23lazy_static! {
24 static ref QUERY_CURSORS: Mutex<Vec<QueryCursor>> = Default::default();
25}
26
27#[derive(Default)]
28pub struct SyntaxMap {
29 parsed_version: clock::Global,
30 interpolated_version: clock::Global,
31 snapshot: SyntaxSnapshot,
32 language_registry: Option<Arc<LanguageRegistry>>,
33}
34
35#[derive(Clone, Default)]
36pub struct SyntaxSnapshot {
37 layers: SumTree<SyntaxLayer>,
38}
39
40#[derive(Default)]
41pub struct SyntaxMapCaptures<'a> {
42 layers: Vec<SyntaxMapCapturesLayer<'a>>,
43 active_layer_count: usize,
44 grammars: Vec<&'a Grammar>,
45}
46
47#[derive(Default)]
48pub struct SyntaxMapMatches<'a> {
49 layers: Vec<SyntaxMapMatchesLayer<'a>>,
50 active_layer_count: usize,
51 grammars: Vec<&'a Grammar>,
52}
53
54#[derive(Debug)]
55pub struct SyntaxMapCapture<'a> {
56 pub depth: usize,
57 pub node: Node<'a>,
58 pub index: u32,
59 pub grammar_index: usize,
60}
61
62#[derive(Debug)]
63pub struct SyntaxMapMatch<'a> {
64 pub depth: usize,
65 pub pattern_index: usize,
66 pub captures: &'a [QueryCapture<'a>],
67 pub grammar_index: usize,
68}
69
70struct SyntaxMapCapturesLayer<'a> {
71 depth: usize,
72 captures: QueryCaptures<'a, 'a, TextProvider<'a>>,
73 next_capture: Option<QueryCapture<'a>>,
74 grammar_index: usize,
75 _query_cursor: QueryCursorHandle,
76}
77
78struct SyntaxMapMatchesLayer<'a> {
79 depth: usize,
80 next_pattern_index: usize,
81 next_captures: Vec<QueryCapture<'a>>,
82 has_next: bool,
83 matches: QueryMatches<'a, 'a, TextProvider<'a>>,
84 grammar_index: usize,
85 _query_cursor: QueryCursorHandle,
86}
87
88#[derive(Clone)]
89struct SyntaxLayer {
90 depth: usize,
91 range: Range<Anchor>,
92 tree: tree_sitter::Tree,
93 language: Arc<Language>,
94 combined: bool,
95}
96
97#[derive(Debug)]
98pub struct SyntaxLayerInfo<'a> {
99 pub depth: usize,
100 pub node: Node<'a>,
101 pub language: &'a Arc<Language>,
102}
103
104#[derive(Debug, Clone)]
105struct SyntaxLayerSummary {
106 min_depth: usize,
107 max_depth: usize,
108 range: Range<Anchor>,
109 last_layer_range: Range<Anchor>,
110 last_layer_language: Option<usize>,
111}
112
113#[derive(Clone, Debug)]
114struct SyntaxLayerPosition {
115 depth: usize,
116 range: Range<Anchor>,
117 language: Option<usize>,
118}
119
120#[derive(Clone, Debug)]
121struct DepthAndMaxPosition(usize, Anchor);
122
123#[derive(Clone, Debug)]
124struct SyntaxLayerPositionBeforeChange {
125 position: SyntaxLayerPosition,
126 change: DepthAndMaxPosition,
127}
128
129struct ParseStep {
130 depth: usize,
131 language: Arc<Language>,
132 range: Range<Anchor>,
133 included_ranges: Vec<tree_sitter::Range>,
134 mode: ParseMode,
135}
136
137enum ParseMode {
138 Single,
139 Combined {
140 parent_layer_range: Range<usize>,
141 parent_layer_changed_ranges: Vec<Range<usize>>,
142 },
143}
144
145#[derive(Debug, PartialEq, Eq)]
146struct ChangedRegion {
147 depth: usize,
148 range: Range<Anchor>,
149}
150
151#[derive(Default)]
152struct ChangeRegionSet(Vec<ChangedRegion>);
153
154struct TextProvider<'a>(&'a Rope);
155
156struct ByteChunks<'a>(text::Chunks<'a>);
157
158struct QueryCursorHandle(Option<QueryCursor>);
159
160impl SyntaxMap {
161 pub fn new() -> Self {
162 Self::default()
163 }
164
165 pub fn set_language_registry(&mut self, registry: Arc<LanguageRegistry>) {
166 self.language_registry = Some(registry);
167 }
168
169 pub fn snapshot(&self) -> SyntaxSnapshot {
170 self.snapshot.clone()
171 }
172
173 pub fn language_registry(&self) -> Option<Arc<LanguageRegistry>> {
174 self.language_registry.clone()
175 }
176
177 pub fn parsed_version(&self) -> clock::Global {
178 self.parsed_version.clone()
179 }
180
181 pub fn interpolate(&mut self, text: &BufferSnapshot) {
182 self.snapshot.interpolate(&self.interpolated_version, text);
183 self.interpolated_version = text.version.clone();
184 }
185
186 #[cfg(test)]
187 pub fn reparse(&mut self, language: Arc<Language>, text: &BufferSnapshot) {
188 self.snapshot.reparse(
189 &self.parsed_version,
190 text,
191 self.language_registry.clone(),
192 language,
193 );
194 self.parsed_version = text.version.clone();
195 self.interpolated_version = text.version.clone();
196 }
197
198 pub fn did_parse(&mut self, snapshot: SyntaxSnapshot, version: clock::Global) {
199 self.interpolated_version = version.clone();
200 self.parsed_version = version;
201 self.snapshot = snapshot;
202 }
203
204 pub fn clear(&mut self) {
205 self.snapshot = SyntaxSnapshot::default();
206 }
207}
208
209impl SyntaxSnapshot {
210 pub fn is_empty(&self) -> bool {
211 self.layers.is_empty()
212 }
213
214 pub fn interpolate(&mut self, from_version: &clock::Global, text: &BufferSnapshot) {
215 let edits = text
216 .anchored_edits_since::<(usize, Point)>(&from_version)
217 .collect::<Vec<_>>();
218 if edits.is_empty() {
219 return;
220 }
221
222 let mut layers = SumTree::new();
223 let mut first_edit_ix_for_depth = 0;
224 let mut prev_depth = 0;
225 let mut cursor = self.layers.cursor::<SyntaxLayerSummary>();
226 cursor.next(text);
227
228 'outer: loop {
229 let depth = cursor.end(text).max_depth;
230 if depth > prev_depth {
231 first_edit_ix_for_depth = 0;
232 prev_depth = depth;
233 }
234
235 // Preserve any layers at this depth that precede the first edit.
236 if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) {
237 let target = DepthAndMaxPosition(depth, edit_range.start);
238 if target.cmp(&cursor.start(), text).is_gt() {
239 let slice = cursor.slice(&target, Bias::Left, text);
240 layers.push_tree(slice, text);
241 }
242 }
243 // If this layer follows all of the edits, then preserve it and any
244 // subsequent layers at this same depth.
245 else if cursor.item().is_some() {
246 let slice = cursor.slice(
247 &SyntaxLayerPosition {
248 depth: depth + 1,
249 range: Anchor::MIN..Anchor::MAX,
250 language: None,
251 },
252 Bias::Left,
253 text,
254 );
255 layers.push_tree(slice, text);
256 continue;
257 };
258
259 let layer = if let Some(layer) = cursor.item() {
260 layer
261 } else {
262 break;
263 };
264 let (start_byte, start_point) = layer.range.start.summary::<(usize, Point)>(text);
265
266 // Ignore edits that end before the start of this layer, and don't consider them
267 // for any subsequent layers at this same depth.
268 loop {
269 if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) {
270 if edit_range.end.cmp(&layer.range.start, text).is_le() {
271 first_edit_ix_for_depth += 1;
272 } else {
273 break;
274 }
275 } else {
276 continue 'outer;
277 }
278 }
279
280 let mut layer = layer.clone();
281 for (edit, edit_range) in &edits[first_edit_ix_for_depth..] {
282 // Ignore any edits that follow this layer.
283 if edit_range.start.cmp(&layer.range.end, text).is_ge() {
284 break;
285 }
286
287 // Apply any edits that intersect this layer to the layer's syntax tree.
288 let tree_edit = if edit_range.start.cmp(&layer.range.start, text).is_ge() {
289 tree_sitter::InputEdit {
290 start_byte: edit.new.start.0 - start_byte,
291 old_end_byte: edit.new.start.0 - start_byte
292 + (edit.old.end.0 - edit.old.start.0),
293 new_end_byte: edit.new.end.0 - start_byte,
294 start_position: (edit.new.start.1 - start_point).to_ts_point(),
295 old_end_position: (edit.new.start.1 - start_point
296 + (edit.old.end.1 - edit.old.start.1))
297 .to_ts_point(),
298 new_end_position: (edit.new.end.1 - start_point).to_ts_point(),
299 }
300 } else {
301 let node = layer.tree.root_node();
302 tree_sitter::InputEdit {
303 start_byte: 0,
304 old_end_byte: node.end_byte(),
305 new_end_byte: 0,
306 start_position: Default::default(),
307 old_end_position: node.end_position(),
308 new_end_position: Default::default(),
309 }
310 };
311
312 layer.tree.edit(&tree_edit);
313 }
314
315 debug_assert!(
316 layer.tree.root_node().end_byte() <= text.len(),
317 "tree's size {}, is larger than text size {}",
318 layer.tree.root_node().end_byte(),
319 text.len(),
320 );
321
322 layers.push(layer, text);
323 cursor.next(text);
324 }
325
326 layers.push_tree(cursor.suffix(&text), &text);
327 drop(cursor);
328 self.layers = layers;
329 }
330
331 pub fn reparse(
332 &mut self,
333 from_version: &clock::Global,
334 text: &BufferSnapshot,
335 registry: Option<Arc<LanguageRegistry>>,
336 root_language: Arc<Language>,
337 ) {
338 let edits = text.edits_since::<usize>(from_version).collect::<Vec<_>>();
339 let max_depth = self.layers.summary().max_depth;
340 let mut cursor = self.layers.cursor::<SyntaxLayerSummary>();
341 cursor.next(&text);
342 let mut layers = SumTree::new();
343
344 let mut changed_regions = ChangeRegionSet::default();
345 let mut queue = BinaryHeap::new();
346 let mut combined_injection_ranges = HashMap::default();
347 queue.push(ParseStep {
348 depth: 0,
349 language: root_language.clone(),
350 included_ranges: vec![tree_sitter::Range {
351 start_byte: 0,
352 end_byte: text.len(),
353 start_point: Point::zero().to_ts_point(),
354 end_point: text.max_point().to_ts_point(),
355 }],
356 range: Anchor::MIN..Anchor::MAX,
357 mode: ParseMode::Single,
358 });
359
360 loop {
361 let step = queue.pop();
362 let target = if let Some(step) = &step {
363 SyntaxLayerPosition {
364 depth: step.depth,
365 range: step.range.clone(),
366 language: step.language.id(),
367 }
368 } else {
369 SyntaxLayerPosition {
370 depth: max_depth + 1,
371 range: Anchor::MAX..Anchor::MAX,
372 language: None,
373 }
374 };
375
376 let mut done = cursor.item().is_none();
377 while !done && target.cmp(&cursor.end(text), &text).is_gt() {
378 done = true;
379
380 let bounded_target = SyntaxLayerPositionBeforeChange {
381 position: target.clone(),
382 change: changed_regions.start_position(),
383 };
384 if bounded_target.cmp(&cursor.start(), &text).is_gt() {
385 let slice = cursor.slice(&bounded_target, Bias::Left, text);
386 if !slice.is_empty() {
387 layers.push_tree(slice, &text);
388 if changed_regions.prune(cursor.end(text), text) {
389 done = false;
390 }
391 }
392 }
393
394 while target.cmp(&cursor.end(text), text).is_gt() {
395 let Some(layer) = cursor.item() else { break };
396
397 if changed_regions.intersects(&layer, text) && !layer.combined {
398 changed_regions.insert(
399 ChangedRegion {
400 depth: layer.depth + 1,
401 range: layer.range.clone(),
402 },
403 text,
404 );
405 } else {
406 layers.push(layer.clone(), text);
407 }
408
409 cursor.next(text);
410 if changed_regions.prune(cursor.end(text), text) {
411 done = false;
412 }
413 }
414 }
415
416 let Some(step) = step else { break };
417 let (step_start_byte, step_start_point) =
418 step.range.start.summary::<(usize, Point)>(text);
419 let step_end_byte = step.range.end.to_offset(text);
420 let Some(grammar) = step.language.grammar.as_deref() else { continue };
421
422 let mut old_layer = cursor.item();
423 if let Some(layer) = old_layer {
424 if layer.range.to_offset(text) == (step_start_byte..step_end_byte)
425 && layer.language.id() == step.language.id()
426 {
427 cursor.next(&text);
428 } else {
429 old_layer = None;
430 }
431 }
432
433 let combined = matches!(step.mode, ParseMode::Combined { .. });
434 let mut included_ranges = step.included_ranges;
435
436 let tree;
437 let changed_ranges;
438 if let Some(old_layer) = old_layer {
439 if let ParseMode::Combined {
440 parent_layer_changed_ranges,
441 ..
442 } = step.mode
443 {
444 included_ranges = splice_included_ranges(
445 old_layer.tree.included_ranges(),
446 &parent_layer_changed_ranges,
447 &included_ranges,
448 );
449 }
450
451 tree = parse_text(
452 grammar,
453 text.as_rope(),
454 step_start_byte,
455 step_start_point,
456 included_ranges,
457 Some(old_layer.tree.clone()),
458 );
459 changed_ranges = join_ranges(
460 edits
461 .iter()
462 .map(|e| e.new.clone())
463 .filter(|range| range.start < step_end_byte && range.end > step_start_byte),
464 old_layer
465 .tree
466 .changed_ranges(&tree)
467 .map(|r| step_start_byte + r.start_byte..step_start_byte + r.end_byte),
468 );
469 } else {
470 tree = parse_text(
471 grammar,
472 text.as_rope(),
473 step_start_byte,
474 step_start_point,
475 included_ranges,
476 None,
477 );
478 changed_ranges = vec![step_start_byte..step_end_byte];
479 }
480
481 layers.push(
482 SyntaxLayer {
483 depth: step.depth,
484 range: step.range,
485 tree: tree.clone(),
486 language: step.language.clone(),
487 combined,
488 },
489 &text,
490 );
491
492 if let (Some((config, registry)), false) = (
493 grammar.injection_config.as_ref().zip(registry.as_ref()),
494 changed_ranges.is_empty(),
495 ) {
496 for range in &changed_ranges {
497 changed_regions.insert(
498 ChangedRegion {
499 depth: step.depth + 1,
500 range: text.anchor_before(range.start)..text.anchor_after(range.end),
501 },
502 text,
503 );
504 }
505 get_injections(
506 config,
507 text,
508 tree.root_node_with_offset(step_start_byte, step_start_point.to_ts_point()),
509 registry,
510 step.depth + 1,
511 &changed_ranges,
512 &mut combined_injection_ranges,
513 &mut queue,
514 );
515 }
516 }
517
518 drop(cursor);
519 self.layers = layers;
520 }
521
522 pub fn single_tree_captures<'a>(
523 range: Range<usize>,
524 text: &'a Rope,
525 tree: &'a Tree,
526 language: &'a Arc<Language>,
527 query: fn(&Grammar) -> Option<&Query>,
528 ) -> SyntaxMapCaptures<'a> {
529 SyntaxMapCaptures::new(
530 range.clone(),
531 text,
532 [SyntaxLayerInfo {
533 language,
534 depth: 0,
535 node: tree.root_node(),
536 }]
537 .into_iter(),
538 query,
539 )
540 }
541
542 pub fn captures<'a>(
543 &'a self,
544 range: Range<usize>,
545 buffer: &'a BufferSnapshot,
546 query: fn(&Grammar) -> Option<&Query>,
547 ) -> SyntaxMapCaptures {
548 SyntaxMapCaptures::new(
549 range.clone(),
550 buffer.as_rope(),
551 self.layers_for_range(range, buffer).into_iter(),
552 query,
553 )
554 }
555
556 pub fn matches<'a>(
557 &'a self,
558 range: Range<usize>,
559 buffer: &'a BufferSnapshot,
560 query: fn(&Grammar) -> Option<&Query>,
561 ) -> SyntaxMapMatches {
562 SyntaxMapMatches::new(
563 range.clone(),
564 buffer.as_rope(),
565 self.layers_for_range(range, buffer).into_iter(),
566 query,
567 )
568 }
569
570 #[cfg(test)]
571 pub fn layers<'a>(&'a self, buffer: &'a BufferSnapshot) -> Vec<SyntaxLayerInfo> {
572 self.layers_for_range(0..buffer.len(), buffer).collect()
573 }
574
575 pub fn layers_for_range<'a, T: ToOffset>(
576 &'a self,
577 range: Range<T>,
578 buffer: &'a BufferSnapshot,
579 ) -> impl 'a + Iterator<Item = SyntaxLayerInfo> {
580 let start = buffer.anchor_before(range.start.to_offset(buffer));
581 let end = buffer.anchor_after(range.end.to_offset(buffer));
582
583 let mut cursor = self.layers.filter::<_, ()>(move |summary| {
584 if summary.max_depth > summary.min_depth {
585 true
586 } else {
587 let is_before_start = summary.range.end.cmp(&start, buffer).is_lt();
588 let is_after_end = summary.range.start.cmp(&end, buffer).is_gt();
589 !is_before_start && !is_after_end
590 }
591 });
592
593 cursor.next(buffer);
594 std::iter::from_fn(move || {
595 if let Some(layer) = cursor.item() {
596 let info = SyntaxLayerInfo {
597 language: &layer.language,
598 depth: layer.depth,
599 node: layer.tree.root_node_with_offset(
600 layer.range.start.to_offset(buffer),
601 layer.range.start.to_point(buffer).to_ts_point(),
602 ),
603 };
604 cursor.next(buffer);
605 Some(info)
606 } else {
607 None
608 }
609 })
610 }
611}
612
613impl<'a> SyntaxMapCaptures<'a> {
614 fn new(
615 range: Range<usize>,
616 text: &'a Rope,
617 layers: impl Iterator<Item = SyntaxLayerInfo<'a>>,
618 query: fn(&Grammar) -> Option<&Query>,
619 ) -> Self {
620 let mut result = Self {
621 layers: Vec::new(),
622 grammars: Vec::new(),
623 active_layer_count: 0,
624 };
625 for SyntaxLayerInfo {
626 language,
627 depth,
628 node,
629 } in layers
630 {
631 let grammar = match &language.grammar {
632 Some(grammer) => grammer,
633 None => continue,
634 };
635 let query = match query(&grammar) {
636 Some(query) => query,
637 None => continue,
638 };
639
640 let mut query_cursor = QueryCursorHandle::new();
641
642 // TODO - add a Tree-sitter API to remove the need for this.
643 let cursor = unsafe {
644 std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut())
645 };
646
647 cursor.set_byte_range(range.clone());
648 let captures = cursor.captures(query, node, TextProvider(text));
649 let grammar_index = result
650 .grammars
651 .iter()
652 .position(|g| g.id == grammar.id())
653 .unwrap_or_else(|| {
654 result.grammars.push(grammar);
655 result.grammars.len() - 1
656 });
657 let mut layer = SyntaxMapCapturesLayer {
658 depth,
659 grammar_index,
660 next_capture: None,
661 captures,
662 _query_cursor: query_cursor,
663 };
664
665 layer.advance();
666 if layer.next_capture.is_some() {
667 let key = layer.sort_key();
668 let ix = match result.layers[..result.active_layer_count]
669 .binary_search_by_key(&key, |layer| layer.sort_key())
670 {
671 Ok(ix) | Err(ix) => ix,
672 };
673 result.layers.insert(ix, layer);
674 result.active_layer_count += 1;
675 } else {
676 result.layers.push(layer);
677 }
678 }
679
680 result
681 }
682
683 pub fn grammars(&self) -> &[&'a Grammar] {
684 &self.grammars
685 }
686
687 pub fn peek(&self) -> Option<SyntaxMapCapture<'a>> {
688 let layer = self.layers[..self.active_layer_count].first()?;
689 let capture = layer.next_capture?;
690 Some(SyntaxMapCapture {
691 depth: layer.depth,
692 grammar_index: layer.grammar_index,
693 index: capture.index,
694 node: capture.node,
695 })
696 }
697
698 pub fn advance(&mut self) -> bool {
699 let layer = if let Some(layer) = self.layers[..self.active_layer_count].first_mut() {
700 layer
701 } else {
702 return false;
703 };
704
705 layer.advance();
706 if layer.next_capture.is_some() {
707 let key = layer.sort_key();
708 let i = 1 + self.layers[1..self.active_layer_count]
709 .iter()
710 .position(|later_layer| key < later_layer.sort_key())
711 .unwrap_or(self.active_layer_count - 1);
712 self.layers[0..i].rotate_left(1);
713 } else {
714 self.layers[0..self.active_layer_count].rotate_left(1);
715 self.active_layer_count -= 1;
716 }
717
718 true
719 }
720
721 pub fn set_byte_range(&mut self, range: Range<usize>) {
722 for layer in &mut self.layers {
723 layer.captures.set_byte_range(range.clone());
724 if let Some(capture) = &layer.next_capture {
725 if capture.node.end_byte() > range.start {
726 continue;
727 }
728 }
729 layer.advance();
730 }
731 self.layers.sort_unstable_by_key(|layer| layer.sort_key());
732 self.active_layer_count = self
733 .layers
734 .iter()
735 .position(|layer| layer.next_capture.is_none())
736 .unwrap_or(self.layers.len());
737 }
738}
739
740impl<'a> SyntaxMapMatches<'a> {
741 fn new(
742 range: Range<usize>,
743 text: &'a Rope,
744 layers: impl Iterator<Item = SyntaxLayerInfo<'a>>,
745 query: fn(&Grammar) -> Option<&Query>,
746 ) -> Self {
747 let mut result = Self::default();
748 for SyntaxLayerInfo {
749 language,
750 depth,
751 node,
752 } in layers
753 {
754 let grammar = match &language.grammar {
755 Some(grammer) => grammer,
756 None => continue,
757 };
758 let query = match query(&grammar) {
759 Some(query) => query,
760 None => continue,
761 };
762
763 let mut query_cursor = QueryCursorHandle::new();
764
765 // TODO - add a Tree-sitter API to remove the need for this.
766 let cursor = unsafe {
767 std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut())
768 };
769
770 cursor.set_byte_range(range.clone());
771 let matches = cursor.matches(query, node, TextProvider(text));
772 let grammar_index = result
773 .grammars
774 .iter()
775 .position(|g| g.id == grammar.id())
776 .unwrap_or_else(|| {
777 result.grammars.push(grammar);
778 result.grammars.len() - 1
779 });
780 let mut layer = SyntaxMapMatchesLayer {
781 depth,
782 grammar_index,
783 matches,
784 next_pattern_index: 0,
785 next_captures: Vec::new(),
786 has_next: false,
787 _query_cursor: query_cursor,
788 };
789
790 layer.advance();
791 if layer.has_next {
792 let key = layer.sort_key();
793 let ix = match result.layers[..result.active_layer_count]
794 .binary_search_by_key(&key, |layer| layer.sort_key())
795 {
796 Ok(ix) | Err(ix) => ix,
797 };
798 result.layers.insert(ix, layer);
799 result.active_layer_count += 1;
800 } else {
801 result.layers.push(layer);
802 }
803 }
804 result
805 }
806
807 pub fn grammars(&self) -> &[&'a Grammar] {
808 &self.grammars
809 }
810
811 pub fn peek(&self) -> Option<SyntaxMapMatch> {
812 let layer = self.layers.first()?;
813 if !layer.has_next {
814 return None;
815 }
816 Some(SyntaxMapMatch {
817 depth: layer.depth,
818 grammar_index: layer.grammar_index,
819 pattern_index: layer.next_pattern_index,
820 captures: &layer.next_captures,
821 })
822 }
823
824 pub fn advance(&mut self) -> bool {
825 let layer = if let Some(layer) = self.layers.first_mut() {
826 layer
827 } else {
828 return false;
829 };
830
831 layer.advance();
832 if layer.has_next {
833 let key = layer.sort_key();
834 let i = 1 + self.layers[1..self.active_layer_count]
835 .iter()
836 .position(|later_layer| key < later_layer.sort_key())
837 .unwrap_or(self.active_layer_count - 1);
838 self.layers[0..i].rotate_left(1);
839 } else {
840 self.layers[0..self.active_layer_count].rotate_left(1);
841 self.active_layer_count -= 1;
842 }
843
844 true
845 }
846}
847
848impl<'a> SyntaxMapCapturesLayer<'a> {
849 fn advance(&mut self) {
850 self.next_capture = self.captures.next().map(|(mat, ix)| mat.captures[ix]);
851 }
852
853 fn sort_key(&self) -> (usize, Reverse<usize>, usize) {
854 if let Some(capture) = &self.next_capture {
855 let range = capture.node.byte_range();
856 (range.start, Reverse(range.end), self.depth)
857 } else {
858 (usize::MAX, Reverse(0), usize::MAX)
859 }
860 }
861}
862
863impl<'a> SyntaxMapMatchesLayer<'a> {
864 fn advance(&mut self) {
865 if let Some(mat) = self.matches.next() {
866 self.next_captures.clear();
867 self.next_captures.extend_from_slice(&mat.captures);
868 self.next_pattern_index = mat.pattern_index;
869 self.has_next = true;
870 } else {
871 self.has_next = false;
872 }
873 }
874
875 fn sort_key(&self) -> (usize, Reverse<usize>, usize) {
876 if self.has_next {
877 let captures = &self.next_captures;
878 if let Some((first, last)) = captures.first().zip(captures.last()) {
879 return (
880 first.node.start_byte(),
881 Reverse(last.node.end_byte()),
882 self.depth,
883 );
884 }
885 }
886 (usize::MAX, Reverse(0), usize::MAX)
887 }
888}
889
890impl<'a> Iterator for SyntaxMapCaptures<'a> {
891 type Item = SyntaxMapCapture<'a>;
892
893 fn next(&mut self) -> Option<Self::Item> {
894 let result = self.peek();
895 self.advance();
896 result
897 }
898}
899
900fn join_ranges(
901 a: impl Iterator<Item = Range<usize>>,
902 b: impl Iterator<Item = Range<usize>>,
903) -> Vec<Range<usize>> {
904 let mut result = Vec::<Range<usize>>::new();
905 let mut a = a.peekable();
906 let mut b = b.peekable();
907 loop {
908 let range = match (a.peek(), b.peek()) {
909 (Some(range_a), Some(range_b)) => {
910 if range_a.start < range_b.start {
911 a.next().unwrap()
912 } else {
913 b.next().unwrap()
914 }
915 }
916 (None, Some(_)) => b.next().unwrap(),
917 (Some(_), None) => a.next().unwrap(),
918 (None, None) => break,
919 };
920
921 if let Some(last) = result.last_mut() {
922 if range.start <= last.end {
923 last.end = last.end.max(range.end);
924 continue;
925 }
926 }
927 result.push(range);
928 }
929 result
930}
931
932fn parse_text(
933 grammar: &Grammar,
934 text: &Rope,
935 start_byte: usize,
936 start_point: Point,
937 mut ranges: Vec<tree_sitter::Range>,
938 old_tree: Option<Tree>,
939) -> Tree {
940 for range in &mut ranges {
941 range.start_byte -= start_byte;
942 range.end_byte -= start_byte;
943 range.start_point = (Point::from_ts_point(range.start_point) - start_point).to_ts_point();
944 range.end_point = (Point::from_ts_point(range.end_point) - start_point).to_ts_point();
945 }
946
947 PARSER.with(|parser| {
948 let mut parser = parser.borrow_mut();
949 let mut chunks = text.chunks_in_range(start_byte..text.len());
950 parser
951 .set_included_ranges(&ranges)
952 .expect("overlapping ranges");
953 parser
954 .set_language(grammar.ts_language)
955 .expect("incompatible grammar");
956 parser
957 .parse_with(
958 &mut move |offset, _| {
959 chunks.seek(start_byte + offset);
960 chunks.next().unwrap_or("").as_bytes()
961 },
962 old_tree.as_ref(),
963 )
964 .expect("invalid language")
965 })
966}
967
968fn get_injections(
969 config: &InjectionConfig,
970 text: &BufferSnapshot,
971 node: Node,
972 language_registry: &LanguageRegistry,
973 depth: usize,
974 changed_ranges: &[Range<usize>],
975 combined_injection_ranges: &mut HashMap<Arc<Language>, Vec<tree_sitter::Range>>,
976 queue: &mut BinaryHeap<ParseStep>,
977) -> bool {
978 let mut result = false;
979 let mut query_cursor = QueryCursorHandle::new();
980 let mut prev_match = None;
981
982 combined_injection_ranges.clear();
983 for pattern in &config.patterns {
984 if let (Some(language_name), true) = (pattern.language.as_ref(), pattern.combined) {
985 if let Some(language) = language_registry.get_language(language_name) {
986 combined_injection_ranges.insert(language, Vec::new());
987 }
988 }
989 }
990
991 for query_range in changed_ranges {
992 query_cursor.set_byte_range(query_range.start.saturating_sub(1)..query_range.end);
993 for mat in query_cursor.matches(&config.query, node, TextProvider(text.as_rope())) {
994 let content_ranges = mat
995 .nodes_for_capture_index(config.content_capture_ix)
996 .map(|node| node.range())
997 .collect::<Vec<_>>();
998 if content_ranges.is_empty() {
999 continue;
1000 }
1001
1002 // Avoid duplicate matches if two changed ranges intersect the same injection.
1003 let content_range =
1004 content_ranges.first().unwrap().start_byte..content_ranges.last().unwrap().end_byte;
1005 if let Some((last_pattern_ix, last_range)) = &prev_match {
1006 if mat.pattern_index == *last_pattern_ix && content_range == *last_range {
1007 continue;
1008 }
1009 }
1010 prev_match = Some((mat.pattern_index, content_range.clone()));
1011
1012 let combined = config.patterns[mat.pattern_index].combined;
1013 let language_name = config.patterns[mat.pattern_index]
1014 .language
1015 .as_ref()
1016 .map(|s| Cow::Borrowed(s.as_ref()))
1017 .or_else(|| {
1018 let ix = config.language_capture_ix?;
1019 let node = mat.nodes_for_capture_index(ix).next()?;
1020 Some(Cow::Owned(text.text_for_range(node.byte_range()).collect()))
1021 });
1022
1023 if let Some(language_name) = language_name {
1024 if let Some(language) = language_registry.get_language(language_name.as_ref()) {
1025 result = true;
1026 let range = text.anchor_before(content_range.start)
1027 ..text.anchor_after(content_range.end);
1028 if combined {
1029 combined_injection_ranges
1030 .get_mut(&language.clone())
1031 .unwrap()
1032 .extend(content_ranges);
1033 } else {
1034 queue.push(ParseStep {
1035 depth,
1036 language,
1037 included_ranges: content_ranges,
1038 range,
1039 mode: ParseMode::Single,
1040 });
1041 }
1042 }
1043 }
1044 }
1045 }
1046
1047 for (language, mut included_ranges) in combined_injection_ranges.drain() {
1048 included_ranges.sort_unstable();
1049 let range = text.anchor_before(node.start_byte())..text.anchor_after(node.end_byte());
1050 queue.push(ParseStep {
1051 depth,
1052 language,
1053 range,
1054 included_ranges,
1055 mode: ParseMode::Combined {
1056 parent_layer_range: node.start_byte()..node.end_byte(),
1057 parent_layer_changed_ranges: changed_ranges.to_vec(),
1058 },
1059 })
1060 }
1061
1062 result
1063}
1064
1065fn splice_included_ranges(
1066 mut ranges: Vec<tree_sitter::Range>,
1067 changed_ranges: &[Range<usize>],
1068 new_ranges: &[tree_sitter::Range],
1069) -> Vec<tree_sitter::Range> {
1070 let mut changed_ranges = changed_ranges.into_iter().peekable();
1071 let mut new_ranges = new_ranges.into_iter().peekable();
1072 let mut ranges_ix = 0;
1073 loop {
1074 let new_range = new_ranges.peek();
1075 let mut changed_range = changed_ranges.peek();
1076
1077 // process changed ranges before any overlapping new ranges
1078 if let Some((changed, new)) = changed_range.zip(new_range) {
1079 if new.end_byte < changed.start {
1080 changed_range = None;
1081 }
1082 }
1083
1084 if let Some(changed) = changed_range {
1085 let start_ix = ranges_ix
1086 + match ranges[ranges_ix..].binary_search_by_key(&changed.start, |r| r.end_byte) {
1087 Ok(ix) | Err(ix) => ix,
1088 };
1089 let end_ix = ranges_ix
1090 + match ranges[ranges_ix..].binary_search_by_key(&changed.end, |r| r.start_byte) {
1091 Ok(ix) => ix + 1,
1092 Err(ix) => ix,
1093 };
1094 if end_ix > start_ix {
1095 ranges.splice(start_ix..end_ix, []);
1096 }
1097 changed_ranges.next();
1098 ranges_ix = start_ix;
1099 } else if let Some(new_range) = new_range {
1100 let ix = ranges_ix
1101 + match ranges[ranges_ix..]
1102 .binary_search_by_key(&new_range.start_byte, |r| r.start_byte)
1103 {
1104 Ok(ix) | Err(ix) => ix,
1105 };
1106 ranges.insert(ix, **new_range);
1107 new_ranges.next();
1108 ranges_ix = ix + 1;
1109 } else {
1110 break;
1111 }
1112 }
1113 ranges
1114}
1115
1116impl std::ops::Deref for SyntaxMap {
1117 type Target = SyntaxSnapshot;
1118
1119 fn deref(&self) -> &Self::Target {
1120 &self.snapshot
1121 }
1122}
1123
1124impl PartialEq for ParseStep {
1125 fn eq(&self, _: &Self) -> bool {
1126 false
1127 }
1128}
1129
1130impl Eq for ParseStep {}
1131
1132impl PartialOrd for ParseStep {
1133 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1134 Some(self.cmp(&other))
1135 }
1136}
1137
1138impl Ord for ParseStep {
1139 fn cmp(&self, other: &Self) -> Ordering {
1140 let range_a = self.range();
1141 let range_b = other.range();
1142 Ord::cmp(&other.depth, &self.depth)
1143 .then_with(|| Ord::cmp(&range_b.start, &range_a.start))
1144 .then_with(|| Ord::cmp(&range_a.end, &range_b.end))
1145 .then_with(|| self.language.id().cmp(&other.language.id()))
1146 }
1147}
1148
1149impl ParseStep {
1150 fn range(&self) -> Range<usize> {
1151 if let ParseMode::Combined {
1152 parent_layer_range, ..
1153 } = &self.mode
1154 {
1155 parent_layer_range.clone()
1156 } else {
1157 let start = self.included_ranges.first().map_or(0, |r| r.start_byte);
1158 let end = self.included_ranges.last().map_or(0, |r| r.end_byte);
1159 start..end
1160 }
1161 }
1162}
1163
1164impl ChangedRegion {
1165 fn cmp(&self, other: &Self, buffer: &BufferSnapshot) -> Ordering {
1166 let range_a = &self.range;
1167 let range_b = &other.range;
1168 Ord::cmp(&self.depth, &other.depth)
1169 .then_with(|| range_a.start.cmp(&range_b.start, buffer))
1170 .then_with(|| range_b.end.cmp(&range_a.end, buffer))
1171 }
1172}
1173
1174impl ChangeRegionSet {
1175 fn start_position(&self) -> DepthAndMaxPosition {
1176 self.0
1177 .first()
1178 .map_or(DepthAndMaxPosition(usize::MAX, Anchor::MAX), |region| {
1179 DepthAndMaxPosition(region.depth, region.range.start)
1180 })
1181 }
1182
1183 fn intersects(&self, layer: &SyntaxLayer, text: &BufferSnapshot) -> bool {
1184 for region in &self.0 {
1185 if region.depth < layer.depth {
1186 continue;
1187 }
1188 if region.depth > layer.depth {
1189 break;
1190 }
1191 if region.range.end.cmp(&layer.range.start, text).is_le() {
1192 continue;
1193 }
1194 if region.range.start.cmp(&layer.range.end, text).is_ge() {
1195 break;
1196 }
1197 return true;
1198 }
1199 false
1200 }
1201
1202 fn insert(&mut self, region: ChangedRegion, text: &BufferSnapshot) {
1203 if let Err(ix) = self.0.binary_search_by(|probe| probe.cmp(®ion, text)) {
1204 self.0.insert(ix, region);
1205 }
1206 }
1207
1208 fn prune(&mut self, summary: SyntaxLayerSummary, text: &BufferSnapshot) -> bool {
1209 let prev_len = self.0.len();
1210 self.0.retain(|region| {
1211 region.depth > summary.max_depth
1212 || (region.depth == summary.max_depth
1213 && region
1214 .range
1215 .end
1216 .cmp(&summary.last_layer_range.start, text)
1217 .is_gt())
1218 });
1219 self.0.len() < prev_len
1220 }
1221}
1222
1223impl Default for SyntaxLayerSummary {
1224 fn default() -> Self {
1225 Self {
1226 max_depth: 0,
1227 min_depth: 0,
1228 range: Anchor::MAX..Anchor::MIN,
1229 last_layer_range: Anchor::MIN..Anchor::MAX,
1230 last_layer_language: None,
1231 }
1232 }
1233}
1234
1235impl sum_tree::Summary for SyntaxLayerSummary {
1236 type Context = BufferSnapshot;
1237
1238 fn add_summary(&mut self, other: &Self, buffer: &Self::Context) {
1239 if other.max_depth > self.max_depth {
1240 self.max_depth = other.max_depth;
1241 self.range = other.range.clone();
1242 } else {
1243 if other.range.start.cmp(&self.range.start, buffer).is_lt() {
1244 self.range.start = other.range.start;
1245 }
1246 if other.range.end.cmp(&self.range.end, buffer).is_gt() {
1247 self.range.end = other.range.end;
1248 }
1249 }
1250 self.last_layer_range = other.last_layer_range.clone();
1251 self.last_layer_language = other.last_layer_language;
1252 }
1253}
1254
1255impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for SyntaxLayerPosition {
1256 fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering {
1257 Ord::cmp(&self.depth, &cursor_location.max_depth)
1258 .then_with(|| {
1259 self.range
1260 .start
1261 .cmp(&cursor_location.last_layer_range.start, buffer)
1262 })
1263 .then_with(|| {
1264 cursor_location
1265 .last_layer_range
1266 .end
1267 .cmp(&self.range.end, buffer)
1268 })
1269 .then_with(|| self.language.cmp(&cursor_location.last_layer_language))
1270 }
1271}
1272
1273impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndMaxPosition {
1274 fn cmp(&self, cursor_location: &SyntaxLayerSummary, text: &BufferSnapshot) -> Ordering {
1275 Ord::cmp(&self.0, &cursor_location.max_depth)
1276 .then_with(|| self.1.cmp(&cursor_location.range.end, text))
1277 }
1278}
1279
1280impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary>
1281 for SyntaxLayerPositionBeforeChange
1282{
1283 fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering {
1284 if self.change.cmp(cursor_location, buffer).is_le() {
1285 return Ordering::Less;
1286 } else {
1287 self.position.cmp(cursor_location, buffer)
1288 }
1289 }
1290}
1291
1292impl sum_tree::Item for SyntaxLayer {
1293 type Summary = SyntaxLayerSummary;
1294
1295 fn summary(&self) -> Self::Summary {
1296 SyntaxLayerSummary {
1297 min_depth: self.depth,
1298 max_depth: self.depth,
1299 range: self.range.clone(),
1300 last_layer_range: self.range.clone(),
1301 last_layer_language: self.language.id(),
1302 }
1303 }
1304}
1305
1306impl std::fmt::Debug for SyntaxLayer {
1307 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1308 f.debug_struct("SyntaxLayer")
1309 .field("depth", &self.depth)
1310 .field("range", &self.range)
1311 .field("tree", &self.tree)
1312 .finish()
1313 }
1314}
1315
1316impl<'a> tree_sitter::TextProvider<'a> for TextProvider<'a> {
1317 type I = ByteChunks<'a>;
1318
1319 fn text(&mut self, node: tree_sitter::Node) -> Self::I {
1320 ByteChunks(self.0.chunks_in_range(node.byte_range()))
1321 }
1322}
1323
1324impl<'a> Iterator for ByteChunks<'a> {
1325 type Item = &'a [u8];
1326
1327 fn next(&mut self) -> Option<Self::Item> {
1328 self.0.next().map(str::as_bytes)
1329 }
1330}
1331
1332impl QueryCursorHandle {
1333 pub(crate) fn new() -> Self {
1334 let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_else(QueryCursor::new);
1335 cursor.set_match_limit(64);
1336 QueryCursorHandle(Some(cursor))
1337 }
1338}
1339
1340impl Deref for QueryCursorHandle {
1341 type Target = QueryCursor;
1342
1343 fn deref(&self) -> &Self::Target {
1344 self.0.as_ref().unwrap()
1345 }
1346}
1347
1348impl DerefMut for QueryCursorHandle {
1349 fn deref_mut(&mut self) -> &mut Self::Target {
1350 self.0.as_mut().unwrap()
1351 }
1352}
1353
1354impl Drop for QueryCursorHandle {
1355 fn drop(&mut self) {
1356 let mut cursor = self.0.take().unwrap();
1357 cursor.set_byte_range(0..usize::MAX);
1358 cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point());
1359 QUERY_CURSORS.lock().push(cursor)
1360 }
1361}
1362
1363pub(crate) trait ToTreeSitterPoint {
1364 fn to_ts_point(self) -> tree_sitter::Point;
1365 fn from_ts_point(point: tree_sitter::Point) -> Self;
1366}
1367
1368impl ToTreeSitterPoint for Point {
1369 fn to_ts_point(self) -> tree_sitter::Point {
1370 tree_sitter::Point::new(self.row as usize, self.column as usize)
1371 }
1372
1373 fn from_ts_point(point: tree_sitter::Point) -> Self {
1374 Point::new(point.row as u32, point.column as u32)
1375 }
1376}
1377
1378#[cfg(test)]
1379mod tests {
1380 use super::*;
1381 use crate::LanguageConfig;
1382 use rand::rngs::StdRng;
1383 use std::env;
1384 use text::Buffer;
1385 use unindent::Unindent as _;
1386 use util::test::marked_text_ranges;
1387
1388 #[test]
1389 fn test_splice_included_ranges() {
1390 let ranges = vec![ts_range(20..30), ts_range(50..60), ts_range(80..90)];
1391
1392 let new_ranges = splice_included_ranges(
1393 ranges.clone(),
1394 &[54..56, 58..68],
1395 &[ts_range(50..54), ts_range(59..67)],
1396 );
1397 assert_eq!(
1398 new_ranges,
1399 &[
1400 ts_range(20..30),
1401 ts_range(50..54),
1402 ts_range(59..67),
1403 ts_range(80..90),
1404 ]
1405 );
1406
1407 let new_ranges = splice_included_ranges(ranges.clone(), &[70..71, 91..100], &[]);
1408 assert_eq!(
1409 new_ranges,
1410 &[ts_range(20..30), ts_range(50..60), ts_range(80..90)]
1411 );
1412
1413 let new_ranges =
1414 splice_included_ranges(ranges.clone(), &[], &[ts_range(0..2), ts_range(70..75)]);
1415 assert_eq!(
1416 new_ranges,
1417 &[
1418 ts_range(0..2),
1419 ts_range(20..30),
1420 ts_range(50..60),
1421 ts_range(70..75),
1422 ts_range(80..90)
1423 ]
1424 );
1425
1426 let new_ranges = splice_included_ranges(ranges.clone(), &[30..50], &[ts_range(25..55)]);
1427 assert_eq!(new_ranges, &[ts_range(25..55), ts_range(80..90)]);
1428
1429 fn ts_range(range: Range<usize>) -> tree_sitter::Range {
1430 tree_sitter::Range {
1431 start_byte: range.start,
1432 start_point: tree_sitter::Point {
1433 row: 0,
1434 column: range.start,
1435 },
1436 end_byte: range.end,
1437 end_point: tree_sitter::Point {
1438 row: 0,
1439 column: range.end,
1440 },
1441 }
1442 }
1443 }
1444
1445 #[gpui::test]
1446 fn test_syntax_map_layers_for_range() {
1447 let registry = Arc::new(LanguageRegistry::test());
1448 let language = Arc::new(rust_lang());
1449 registry.add(language.clone());
1450
1451 let mut buffer = Buffer::new(
1452 0,
1453 0,
1454 r#"
1455 fn a() {
1456 assert_eq!(
1457 b(vec![C {}]),
1458 vec![d.e],
1459 );
1460 println!("{}", f(|_| true));
1461 }
1462 "#
1463 .unindent(),
1464 );
1465
1466 let mut syntax_map = SyntaxMap::new();
1467 syntax_map.set_language_registry(registry.clone());
1468 syntax_map.reparse(language.clone(), &buffer);
1469
1470 assert_layers_for_range(
1471 &syntax_map,
1472 &buffer,
1473 Point::new(2, 0)..Point::new(2, 0),
1474 &[
1475 "...(function_item ... (block (expression_statement (macro_invocation...",
1476 "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...",
1477 ],
1478 );
1479 assert_layers_for_range(
1480 &syntax_map,
1481 &buffer,
1482 Point::new(2, 14)..Point::new(2, 16),
1483 &[
1484 "...(function_item ...",
1485 "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...",
1486 "...(array_expression (struct_expression ...",
1487 ],
1488 );
1489 assert_layers_for_range(
1490 &syntax_map,
1491 &buffer,
1492 Point::new(3, 14)..Point::new(3, 16),
1493 &[
1494 "...(function_item ...",
1495 "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...",
1496 "...(array_expression (field_expression ...",
1497 ],
1498 );
1499 assert_layers_for_range(
1500 &syntax_map,
1501 &buffer,
1502 Point::new(5, 12)..Point::new(5, 16),
1503 &[
1504 "...(function_item ...",
1505 "...(call_expression ... (arguments (closure_expression ...",
1506 ],
1507 );
1508
1509 // Replace a vec! macro invocation with a plain slice, removing a syntactic layer.
1510 let macro_name_range = range_for_text(&buffer, "vec!");
1511 buffer.edit([(macro_name_range, "&")]);
1512 syntax_map.interpolate(&buffer);
1513 syntax_map.reparse(language.clone(), &buffer);
1514
1515 assert_layers_for_range(
1516 &syntax_map,
1517 &buffer,
1518 Point::new(2, 14)..Point::new(2, 16),
1519 &[
1520 "...(function_item ...",
1521 "...(tuple_expression (call_expression ... arguments: (arguments (reference_expression value: (array_expression...",
1522 ],
1523 );
1524
1525 // Put the vec! macro back, adding back the syntactic layer.
1526 buffer.undo();
1527 syntax_map.interpolate(&buffer);
1528 syntax_map.reparse(language.clone(), &buffer);
1529
1530 assert_layers_for_range(
1531 &syntax_map,
1532 &buffer,
1533 Point::new(2, 14)..Point::new(2, 16),
1534 &[
1535 "...(function_item ...",
1536 "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...",
1537 "...(array_expression (struct_expression ...",
1538 ],
1539 );
1540 }
1541
1542 #[gpui::test]
1543 fn test_typing_multiple_new_injections() {
1544 let (buffer, syntax_map) = test_edit_sequence(
1545 "Rust",
1546 &[
1547 "fn a() { dbg }",
1548 "fn a() { dbg«!» }",
1549 "fn a() { dbg!«()» }",
1550 "fn a() { dbg!(«b») }",
1551 "fn a() { dbg!(b«.») }",
1552 "fn a() { dbg!(b.«c») }",
1553 "fn a() { dbg!(b.c«()») }",
1554 "fn a() { dbg!(b.c(«vec»)) }",
1555 "fn a() { dbg!(b.c(vec«!»)) }",
1556 "fn a() { dbg!(b.c(vec!«[]»)) }",
1557 "fn a() { dbg!(b.c(vec![«d»])) }",
1558 "fn a() { dbg!(b.c(vec![d«.»])) }",
1559 "fn a() { dbg!(b.c(vec![d.«e»])) }",
1560 ],
1561 );
1562
1563 assert_capture_ranges(
1564 &syntax_map,
1565 &buffer,
1566 &["field"],
1567 "fn a() { dbg!(b.«c»(vec![d.«e»])) }",
1568 );
1569 }
1570
1571 #[gpui::test]
1572 fn test_pasting_new_injection_line_between_others() {
1573 let (buffer, syntax_map) = test_edit_sequence(
1574 "Rust",
1575 &[
1576 "
1577 fn a() {
1578 b!(B {});
1579 c!(C {});
1580 d!(D {});
1581 e!(E {});
1582 f!(F {});
1583 g!(G {});
1584 }
1585 ",
1586 "
1587 fn a() {
1588 b!(B {});
1589 c!(C {});
1590 d!(D {});
1591 « h!(H {});
1592 » e!(E {});
1593 f!(F {});
1594 g!(G {});
1595 }
1596 ",
1597 ],
1598 );
1599
1600 assert_capture_ranges(
1601 &syntax_map,
1602 &buffer,
1603 &["struct"],
1604 "
1605 fn a() {
1606 b!(«B {}»);
1607 c!(«C {}»);
1608 d!(«D {}»);
1609 h!(«H {}»);
1610 e!(«E {}»);
1611 f!(«F {}»);
1612 g!(«G {}»);
1613 }
1614 ",
1615 );
1616 }
1617
1618 #[gpui::test]
1619 fn test_joining_injections_with_child_injections() {
1620 let (buffer, syntax_map) = test_edit_sequence(
1621 "Rust",
1622 &[
1623 "
1624 fn a() {
1625 b!(
1626 c![one.two.three],
1627 d![four.five.six],
1628 );
1629 e!(
1630 f![seven.eight],
1631 );
1632 }
1633 ",
1634 "
1635 fn a() {
1636 b!(
1637 c![one.two.three],
1638 d![four.five.six],
1639 ˇ f![seven.eight],
1640 );
1641 }
1642 ",
1643 ],
1644 );
1645
1646 assert_capture_ranges(
1647 &syntax_map,
1648 &buffer,
1649 &["field"],
1650 "
1651 fn a() {
1652 b!(
1653 c![one.«two».«three»],
1654 d![four.«five».«six»],
1655 f![seven.«eight»],
1656 );
1657 }
1658 ",
1659 );
1660 }
1661
1662 #[gpui::test]
1663 fn test_editing_edges_of_injection() {
1664 test_edit_sequence(
1665 "Rust",
1666 &[
1667 "
1668 fn a() {
1669 b!(c!())
1670 }
1671 ",
1672 "
1673 fn a() {
1674 «d»!(c!())
1675 }
1676 ",
1677 "
1678 fn a() {
1679 «e»d!(c!())
1680 }
1681 ",
1682 "
1683 fn a() {
1684 ed!«[»c!()«]»
1685 }
1686 ",
1687 ],
1688 );
1689 }
1690
1691 #[gpui::test]
1692 fn test_edits_preceding_and_intersecting_injection() {
1693 test_edit_sequence(
1694 "Rust",
1695 &[
1696 //
1697 "const aaaaaaaaaaaa: B = c!(d(e.f));",
1698 "const aˇa: B = c!(d(eˇ));",
1699 ],
1700 );
1701 }
1702
1703 #[gpui::test]
1704 fn test_non_local_changes_create_injections() {
1705 test_edit_sequence(
1706 "Rust",
1707 &[
1708 "
1709 // a! {
1710 static B: C = d;
1711 // }
1712 ",
1713 "
1714 ˇa! {
1715 static B: C = d;
1716 ˇ}
1717 ",
1718 ],
1719 );
1720 }
1721
1722 #[gpui::test]
1723 fn test_creating_many_injections_in_one_edit() {
1724 test_edit_sequence(
1725 "Rust",
1726 &[
1727 "
1728 fn a() {
1729 one(Two::three(3));
1730 four(Five::six(6));
1731 seven(Eight::nine(9));
1732 }
1733 ",
1734 "
1735 fn a() {
1736 one«!»(Two::three(3));
1737 four«!»(Five::six(6));
1738 seven«!»(Eight::nine(9));
1739 }
1740 ",
1741 "
1742 fn a() {
1743 one!(Two::three«!»(3));
1744 four!(Five::six«!»(6));
1745 seven!(Eight::nine«!»(9));
1746 }
1747 ",
1748 ],
1749 );
1750 }
1751
1752 #[gpui::test]
1753 fn test_editing_across_injection_boundary() {
1754 test_edit_sequence(
1755 "Rust",
1756 &[
1757 "
1758 fn one() {
1759 two();
1760 three!(
1761 three.four,
1762 five.six,
1763 );
1764 }
1765 ",
1766 "
1767 fn one() {
1768 two();
1769 th«irty_five![»
1770 three.four,
1771 five.six,
1772 « seven.eight,
1773 ];»
1774 }
1775 ",
1776 ],
1777 );
1778 }
1779
1780 #[gpui::test]
1781 fn test_removing_injection_by_replacing_across_boundary() {
1782 test_edit_sequence(
1783 "Rust",
1784 &[
1785 "
1786 fn one() {
1787 two!(
1788 three.four,
1789 );
1790 }
1791 ",
1792 "
1793 fn one() {
1794 t«en
1795 .eleven(
1796 twelve,
1797 »
1798 three.four,
1799 );
1800 }
1801 ",
1802 ],
1803 );
1804 }
1805
1806 #[gpui::test]
1807 fn test_combined_injections() {
1808 let (buffer, syntax_map) = test_edit_sequence(
1809 "ERB",
1810 &[
1811 "
1812 <body>
1813 <% if @one %>
1814 <div class=one>
1815 <% else %>
1816 <div class=two>
1817 <% end %>
1818 </div>
1819 </body>
1820 ",
1821 "
1822 <body>
1823 <% if @one %>
1824 <div class=one>
1825 ˇ else ˇ
1826 <div class=two>
1827 <% end %>
1828 </div>
1829 </body>
1830 ",
1831 "
1832 <body>
1833 <% if @one «;» end %>
1834 </div>
1835 </body>
1836 ",
1837 ],
1838 );
1839
1840 assert_capture_ranges(
1841 &syntax_map,
1842 &buffer,
1843 &["tag", "ivar"],
1844 "
1845 <«body»>
1846 <% if «@one» ; end %>
1847 </«div»>
1848 </«body»>
1849 ",
1850 );
1851 }
1852
1853 #[gpui::test(iterations = 100)]
1854 fn test_random_syntax_map_edits(mut rng: StdRng) {
1855 let operations = env::var("OPERATIONS")
1856 .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1857 .unwrap_or(10);
1858
1859 let text = r#"
1860 fn test_something() {
1861 let vec = vec![5, 1, 3, 8];
1862 assert_eq!(
1863 vec
1864 .into_iter()
1865 .map(|i| i * 2)
1866 .collect::<Vec<usize>>(),
1867 vec![
1868 5 * 2, 1 * 2, 3 * 2, 8 * 2
1869 ],
1870 );
1871 }
1872 "#
1873 .unindent()
1874 .repeat(2);
1875
1876 let registry = Arc::new(LanguageRegistry::test());
1877 let language = Arc::new(rust_lang());
1878 registry.add(language.clone());
1879 let mut buffer = Buffer::new(0, 0, text);
1880
1881 let mut syntax_map = SyntaxMap::new();
1882 syntax_map.set_language_registry(registry.clone());
1883 syntax_map.reparse(language.clone(), &buffer);
1884
1885 let mut reference_syntax_map = SyntaxMap::new();
1886 reference_syntax_map.set_language_registry(registry.clone());
1887
1888 log::info!("initial text:\n{}", buffer.text());
1889
1890 for _ in 0..operations {
1891 let prev_buffer = buffer.snapshot();
1892 let prev_syntax_map = syntax_map.snapshot();
1893
1894 buffer.randomly_edit(&mut rng, 3);
1895 log::info!("text:\n{}", buffer.text());
1896
1897 syntax_map.interpolate(&buffer);
1898 check_interpolation(&prev_syntax_map, &syntax_map, &prev_buffer, &buffer);
1899
1900 syntax_map.reparse(language.clone(), &buffer);
1901
1902 reference_syntax_map.clear();
1903 reference_syntax_map.reparse(language.clone(), &buffer);
1904 }
1905
1906 for i in 0..operations {
1907 let i = operations - i - 1;
1908 buffer.undo();
1909 log::info!("undoing operation {}", i);
1910 log::info!("text:\n{}", buffer.text());
1911
1912 syntax_map.interpolate(&buffer);
1913 syntax_map.reparse(language.clone(), &buffer);
1914
1915 reference_syntax_map.clear();
1916 reference_syntax_map.reparse(language.clone(), &buffer);
1917 assert_eq!(
1918 syntax_map.layers(&buffer).len(),
1919 reference_syntax_map.layers(&buffer).len(),
1920 "wrong number of layers after undoing edit {i}"
1921 );
1922 }
1923
1924 let layers = syntax_map.layers(&buffer);
1925 let reference_layers = reference_syntax_map.layers(&buffer);
1926 for (edited_layer, reference_layer) in layers.into_iter().zip(reference_layers.into_iter())
1927 {
1928 assert_eq!(edited_layer.node.to_sexp(), reference_layer.node.to_sexp());
1929 assert_eq!(edited_layer.node.range(), reference_layer.node.range());
1930 }
1931 }
1932
1933 fn check_interpolation(
1934 old_syntax_map: &SyntaxSnapshot,
1935 new_syntax_map: &SyntaxSnapshot,
1936 old_buffer: &BufferSnapshot,
1937 new_buffer: &BufferSnapshot,
1938 ) {
1939 let edits = new_buffer
1940 .edits_since::<usize>(&old_buffer.version())
1941 .collect::<Vec<_>>();
1942
1943 for (old_layer, new_layer) in old_syntax_map
1944 .layers
1945 .iter()
1946 .zip(new_syntax_map.layers.iter())
1947 {
1948 assert_eq!(old_layer.range, new_layer.range);
1949 let old_start_byte = old_layer.range.start.to_offset(old_buffer);
1950 let new_start_byte = new_layer.range.start.to_offset(new_buffer);
1951 let old_start_point = old_layer.range.start.to_point(old_buffer).to_ts_point();
1952 let new_start_point = new_layer.range.start.to_point(new_buffer).to_ts_point();
1953 let old_node = old_layer
1954 .tree
1955 .root_node_with_offset(old_start_byte, old_start_point);
1956 let new_node = new_layer
1957 .tree
1958 .root_node_with_offset(new_start_byte, new_start_point);
1959 check_node_edits(
1960 old_layer.depth,
1961 &old_layer.range,
1962 old_node,
1963 new_node,
1964 old_buffer,
1965 new_buffer,
1966 &edits,
1967 );
1968 }
1969
1970 fn check_node_edits(
1971 depth: usize,
1972 range: &Range<Anchor>,
1973 old_node: Node,
1974 new_node: Node,
1975 old_buffer: &BufferSnapshot,
1976 new_buffer: &BufferSnapshot,
1977 edits: &[text::Edit<usize>],
1978 ) {
1979 assert_eq!(old_node.kind(), new_node.kind());
1980
1981 let old_range = old_node.byte_range();
1982 let new_range = new_node.byte_range();
1983
1984 let is_edited = edits
1985 .iter()
1986 .any(|edit| edit.new.start < new_range.end && edit.new.end > new_range.start);
1987 if is_edited {
1988 assert!(
1989 new_node.has_changes(),
1990 concat!(
1991 "failed to mark node as edited.\n",
1992 "layer depth: {}, old layer range: {:?}, new layer range: {:?},\n",
1993 "node kind: {}, old node range: {:?}, new node range: {:?}",
1994 ),
1995 depth,
1996 range.to_offset(old_buffer),
1997 range.to_offset(new_buffer),
1998 new_node.kind(),
1999 old_range,
2000 new_range,
2001 );
2002 }
2003
2004 if !new_node.has_changes() {
2005 assert_eq!(
2006 old_buffer
2007 .text_for_range(old_range.clone())
2008 .collect::<String>(),
2009 new_buffer
2010 .text_for_range(new_range.clone())
2011 .collect::<String>(),
2012 concat!(
2013 "mismatched text for node\n",
2014 "layer depth: {}, old layer range: {:?}, new layer range: {:?},\n",
2015 "node kind: {}, old node range:{:?}, new node range:{:?}",
2016 ),
2017 depth,
2018 range.to_offset(old_buffer),
2019 range.to_offset(new_buffer),
2020 new_node.kind(),
2021 old_range,
2022 new_range,
2023 );
2024 }
2025
2026 for i in 0..new_node.child_count() {
2027 check_node_edits(
2028 depth,
2029 range,
2030 old_node.child(i).unwrap(),
2031 new_node.child(i).unwrap(),
2032 old_buffer,
2033 new_buffer,
2034 edits,
2035 )
2036 }
2037 }
2038 }
2039
2040 fn test_edit_sequence(language_name: &str, steps: &[&str]) -> (Buffer, SyntaxMap) {
2041 let registry = Arc::new(LanguageRegistry::test());
2042 registry.add(Arc::new(rust_lang()));
2043 registry.add(Arc::new(ruby_lang()));
2044 registry.add(Arc::new(html_lang()));
2045 registry.add(Arc::new(erb_lang()));
2046 let language = registry.get_language(language_name).unwrap();
2047 let mut buffer = Buffer::new(0, 0, Default::default());
2048
2049 let mut mutated_syntax_map = SyntaxMap::new();
2050 mutated_syntax_map.set_language_registry(registry.clone());
2051 mutated_syntax_map.reparse(language.clone(), &buffer);
2052
2053 for (i, marked_string) in steps.into_iter().enumerate() {
2054 edit_buffer(&mut buffer, &marked_string.unindent());
2055
2056 // Reparse the syntax map
2057 mutated_syntax_map.interpolate(&buffer);
2058 mutated_syntax_map.reparse(language.clone(), &buffer);
2059
2060 // Create a second syntax map from scratch
2061 let mut reference_syntax_map = SyntaxMap::new();
2062 reference_syntax_map.set_language_registry(registry.clone());
2063 reference_syntax_map.reparse(language.clone(), &buffer);
2064
2065 // Compare the mutated syntax map to the new syntax map
2066 let mutated_layers = mutated_syntax_map.layers(&buffer);
2067 let reference_layers = reference_syntax_map.layers(&buffer);
2068 assert_eq!(
2069 mutated_layers.len(),
2070 reference_layers.len(),
2071 "wrong number of layers at step {i}"
2072 );
2073 for (edited_layer, reference_layer) in
2074 mutated_layers.into_iter().zip(reference_layers.into_iter())
2075 {
2076 assert_eq!(
2077 edited_layer.node.to_sexp(),
2078 reference_layer.node.to_sexp(),
2079 "different layer at step {i}"
2080 );
2081 assert_eq!(
2082 edited_layer.node.range(),
2083 reference_layer.node.range(),
2084 "different layer at step {i}"
2085 );
2086 }
2087 }
2088
2089 (buffer, mutated_syntax_map)
2090 }
2091
2092 fn html_lang() -> Language {
2093 Language::new(
2094 LanguageConfig {
2095 name: "HTML".into(),
2096 path_suffixes: vec!["html".to_string()],
2097 ..Default::default()
2098 },
2099 Some(tree_sitter_html::language()),
2100 )
2101 .with_highlights_query(
2102 r#"
2103 (tag_name) @tag
2104 (erroneous_end_tag_name) @tag
2105 (attribute_name) @property
2106 "#,
2107 )
2108 .unwrap()
2109 }
2110
2111 fn ruby_lang() -> Language {
2112 Language::new(
2113 LanguageConfig {
2114 name: "Ruby".into(),
2115 path_suffixes: vec!["rb".to_string()],
2116 ..Default::default()
2117 },
2118 Some(tree_sitter_ruby::language()),
2119 )
2120 .with_highlights_query(
2121 r#"
2122 ["if" "do" "else" "end"] @keyword
2123 (instance_variable) @ivar
2124 "#,
2125 )
2126 .unwrap()
2127 }
2128
2129 fn erb_lang() -> Language {
2130 Language::new(
2131 LanguageConfig {
2132 name: "ERB".into(),
2133 path_suffixes: vec!["erb".to_string()],
2134 ..Default::default()
2135 },
2136 Some(tree_sitter_embedded_template::language()),
2137 )
2138 .with_highlights_query(
2139 r#"
2140 ["<%" "%>"] @keyword
2141 "#,
2142 )
2143 .unwrap()
2144 .with_injection_query(
2145 r#"
2146 ((code) @content
2147 (#set! "language" "ruby")
2148 (#set! "combined"))
2149
2150 ((content) @content
2151 (#set! "language" "html")
2152 (#set! "combined"))
2153 "#,
2154 )
2155 .unwrap()
2156 }
2157
2158 fn rust_lang() -> Language {
2159 Language::new(
2160 LanguageConfig {
2161 name: "Rust".into(),
2162 path_suffixes: vec!["rs".to_string()],
2163 ..Default::default()
2164 },
2165 Some(tree_sitter_rust::language()),
2166 )
2167 .with_highlights_query(
2168 r#"
2169 (field_identifier) @field
2170 (struct_expression) @struct
2171 "#,
2172 )
2173 .unwrap()
2174 .with_injection_query(
2175 r#"
2176 (macro_invocation
2177 (token_tree) @content
2178 (#set! "language" "rust"))
2179 "#,
2180 )
2181 .unwrap()
2182 }
2183
2184 fn range_for_text(buffer: &Buffer, text: &str) -> Range<usize> {
2185 let start = buffer.as_rope().to_string().find(text).unwrap();
2186 start..start + text.len()
2187 }
2188
2189 fn assert_layers_for_range(
2190 syntax_map: &SyntaxMap,
2191 buffer: &BufferSnapshot,
2192 range: Range<Point>,
2193 expected_layers: &[&str],
2194 ) {
2195 let layers = syntax_map
2196 .layers_for_range(range, &buffer)
2197 .collect::<Vec<_>>();
2198 assert_eq!(
2199 layers.len(),
2200 expected_layers.len(),
2201 "wrong number of layers"
2202 );
2203 for (i, (SyntaxLayerInfo { node, .. }, expected_s_exp)) in
2204 layers.iter().zip(expected_layers.iter()).enumerate()
2205 {
2206 let actual_s_exp = node.to_sexp();
2207 assert!(
2208 string_contains_sequence(
2209 &actual_s_exp,
2210 &expected_s_exp.split("...").collect::<Vec<_>>()
2211 ),
2212 "layer {i}:\n\nexpected: {expected_s_exp}\nactual: {actual_s_exp}",
2213 );
2214 }
2215 }
2216
2217 fn assert_capture_ranges(
2218 syntax_map: &SyntaxMap,
2219 buffer: &BufferSnapshot,
2220 highlight_query_capture_names: &[&str],
2221 marked_string: &str,
2222 ) {
2223 let mut actual_ranges = Vec::<Range<usize>>::new();
2224 let captures = syntax_map.captures(0..buffer.len(), buffer, |grammar| {
2225 grammar.highlights_query.as_ref()
2226 });
2227 let queries = captures
2228 .grammars()
2229 .iter()
2230 .map(|grammar| grammar.highlights_query.as_ref().unwrap())
2231 .collect::<Vec<_>>();
2232 for capture in captures {
2233 let name = &queries[capture.grammar_index].capture_names()[capture.index as usize];
2234 if highlight_query_capture_names.contains(&name.as_str()) {
2235 actual_ranges.push(capture.node.byte_range());
2236 }
2237 }
2238
2239 let (text, expected_ranges) = marked_text_ranges(&marked_string.unindent(), false);
2240 assert_eq!(text, buffer.text());
2241 assert_eq!(actual_ranges, expected_ranges);
2242 }
2243
2244 fn edit_buffer(buffer: &mut Buffer, marked_string: &str) {
2245 let old_text = buffer.text();
2246 let (new_text, mut ranges) = marked_text_ranges(marked_string, false);
2247 if ranges.is_empty() {
2248 ranges.push(0..new_text.len());
2249 }
2250
2251 assert_eq!(
2252 old_text[..ranges[0].start],
2253 new_text[..ranges[0].start],
2254 "invalid edit"
2255 );
2256
2257 let mut delta = 0;
2258 let mut edits = Vec::new();
2259 let mut ranges = ranges.into_iter().peekable();
2260
2261 while let Some(inserted_range) = ranges.next() {
2262 let new_start = inserted_range.start;
2263 let old_start = (new_start as isize - delta) as usize;
2264
2265 let following_text = if let Some(next_range) = ranges.peek() {
2266 &new_text[inserted_range.end..next_range.start]
2267 } else {
2268 &new_text[inserted_range.end..]
2269 };
2270
2271 let inserted_len = inserted_range.len();
2272 let deleted_len = old_text[old_start..]
2273 .find(following_text)
2274 .expect("invalid edit");
2275
2276 let old_range = old_start..old_start + deleted_len;
2277 edits.push((old_range, new_text[inserted_range].to_string()));
2278 delta += inserted_len as isize - deleted_len as isize;
2279 }
2280
2281 assert_eq!(
2282 old_text.len() as isize + delta,
2283 new_text.len() as isize,
2284 "invalid edit"
2285 );
2286
2287 buffer.edit(edits);
2288 }
2289
2290 pub fn string_contains_sequence(text: &str, parts: &[&str]) -> bool {
2291 let mut last_part_end = 0;
2292 for part in parts {
2293 if let Some(start_ix) = text[last_part_end..].find(part) {
2294 last_part_end = start_ix + part.len();
2295 } else {
2296 return false;
2297 }
2298 }
2299 true
2300 }
2301}