1#[cfg(test)]
2mod syntax_map_tests;
3
4use crate::{
5 Grammar, InjectionConfig, Language, LanguageId, LanguageRegistry, QUERY_CURSORS, with_parser,
6};
7use collections::HashMap;
8use futures::FutureExt;
9use gpui::SharedString;
10use std::{
11 borrow::Cow,
12 cmp::{self, Ordering, Reverse},
13 collections::BinaryHeap,
14 fmt, iter,
15 ops::{ControlFlow, Deref, DerefMut, Range},
16 sync::Arc,
17 time::{Duration, Instant},
18};
19use streaming_iterator::StreamingIterator;
20use sum_tree::{Bias, Dimensions, SeekTarget, SumTree};
21use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint};
22use tree_sitter::{
23 Node, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatch, QueryMatches,
24 QueryPredicateArg, Tree,
25};
26
27pub const MAX_BYTES_TO_QUERY: usize = 16 * 1024;
28
29pub struct SyntaxMap {
30 snapshot: SyntaxSnapshot,
31 language_registry: Option<Arc<LanguageRegistry>>,
32}
33
34#[derive(Clone)]
35pub struct SyntaxSnapshot {
36 layers: SumTree<SyntaxLayerEntry>,
37 parsed_version: clock::Global,
38 interpolated_version: clock::Global,
39 language_registry_version: usize,
40 update_count: usize,
41}
42
43#[derive(Default)]
44pub struct SyntaxMapCaptures<'a> {
45 layers: Vec<SyntaxMapCapturesLayer<'a>>,
46 active_layer_count: usize,
47 grammars: Vec<&'a Grammar>,
48}
49
50#[derive(Default)]
51pub struct SyntaxMapMatches<'a> {
52 layers: Vec<SyntaxMapMatchesLayer<'a>>,
53 active_layer_count: usize,
54 grammars: Vec<&'a Grammar>,
55}
56
57#[derive(Debug)]
58pub struct SyntaxMapCapture<'a> {
59 pub node: Node<'a>,
60 pub index: u32,
61 pub grammar_index: usize,
62}
63
64#[derive(Debug)]
65pub struct SyntaxMapMatch<'a> {
66 pub language: Arc<Language>,
67 pub depth: usize,
68 pub pattern_index: usize,
69 pub captures: &'a [QueryCapture<'a>],
70 pub grammar_index: usize,
71}
72
73struct SyntaxMapCapturesLayer<'a> {
74 depth: usize,
75 captures: QueryCaptures<'a, 'a, TextProvider<'a>, &'a [u8]>,
76 next_capture: Option<QueryCapture<'a>>,
77 grammar_index: usize,
78 _query_cursor: QueryCursorHandle,
79}
80
81struct SyntaxMapMatchesLayer<'a> {
82 language: Arc<Language>,
83 depth: usize,
84 next_pattern_index: usize,
85 next_captures: Vec<QueryCapture<'a>>,
86 has_next: bool,
87 matches: QueryMatches<'a, 'a, TextProvider<'a>, &'a [u8]>,
88 query: &'a Query,
89 grammar_index: usize,
90 _query_cursor: QueryCursorHandle,
91}
92
93#[derive(Clone)]
94struct SyntaxLayerEntry {
95 depth: usize,
96 range: Range<Anchor>,
97 content: SyntaxLayerContent,
98}
99
100#[derive(Clone)]
101enum SyntaxLayerContent {
102 Parsed {
103 tree: tree_sitter::Tree,
104 language: Arc<Language>,
105 included_sub_ranges: Option<Vec<Range<Anchor>>>,
106 },
107 Pending {
108 language_name: Arc<str>,
109 },
110}
111
112impl SyntaxLayerContent {
113 fn language_id(&self) -> Option<LanguageId> {
114 match self {
115 SyntaxLayerContent::Parsed { language, .. } => Some(language.id),
116 SyntaxLayerContent::Pending { .. } => None,
117 }
118 }
119
120 fn tree(&self) -> Option<&Tree> {
121 match self {
122 SyntaxLayerContent::Parsed { tree, .. } => Some(tree),
123 SyntaxLayerContent::Pending { .. } => None,
124 }
125 }
126}
127
128/// A layer of syntax highlighting, corresponding to a single syntax
129/// tree in a particular language.
130#[derive(Debug)]
131pub struct SyntaxLayer<'a> {
132 /// The language for this layer.
133 pub language: &'a Arc<Language>,
134 pub included_sub_ranges: Option<&'a [Range<Anchor>]>,
135 pub(crate) depth: usize,
136 tree: &'a Tree,
137 pub(crate) offset: (usize, tree_sitter::Point),
138}
139
140/// A layer of syntax highlighting. Like [SyntaxLayer], but holding
141/// owned data instead of references.
142#[derive(Clone)]
143pub struct OwnedSyntaxLayer {
144 /// The language for this layer.
145 pub language: Arc<Language>,
146 tree: tree_sitter::Tree,
147 pub offset: (usize, tree_sitter::Point),
148}
149
150#[derive(Debug, Clone)]
151struct SyntaxLayerSummary {
152 min_depth: usize,
153 max_depth: usize,
154 range: Range<Anchor>,
155 last_layer_range: Range<Anchor>,
156 last_layer_language: Option<LanguageId>,
157 contains_unknown_injections: bool,
158}
159
160#[derive(Clone, Debug)]
161struct SyntaxLayerPosition {
162 depth: usize,
163 range: Range<Anchor>,
164 language: Option<LanguageId>,
165}
166
167#[derive(Clone, Debug)]
168struct ChangeStartPosition {
169 depth: usize,
170 position: Anchor,
171}
172
173#[derive(Clone, Debug)]
174struct SyntaxLayerPositionBeforeChange {
175 position: SyntaxLayerPosition,
176 change: ChangeStartPosition,
177}
178
179struct ParseStep {
180 depth: usize,
181 language: ParseStepLanguage,
182 range: Range<Anchor>,
183 included_ranges: Vec<tree_sitter::Range>,
184 mode: ParseMode,
185}
186
187#[derive(Debug)]
188enum ParseStepLanguage {
189 Loaded { language: Arc<Language> },
190 Pending { name: Arc<str> },
191}
192
193impl ParseStepLanguage {
194 fn name(&self) -> SharedString {
195 match self {
196 ParseStepLanguage::Loaded { language } => language.name().0,
197 ParseStepLanguage::Pending { name } => name.into(),
198 }
199 }
200
201 fn id(&self) -> Option<LanguageId> {
202 match self {
203 ParseStepLanguage::Loaded { language } => Some(language.id),
204 ParseStepLanguage::Pending { .. } => None,
205 }
206 }
207}
208
209enum ParseMode {
210 Single,
211 Combined {
212 parent_layer_range: Range<usize>,
213 parent_layer_changed_ranges: Vec<Range<usize>>,
214 },
215}
216
217#[derive(Debug, PartialEq, Eq)]
218struct ChangedRegion {
219 depth: usize,
220 range: Range<Anchor>,
221}
222
223#[derive(Default)]
224struct ChangeRegionSet(Vec<ChangedRegion>);
225
226struct TextProvider<'a>(&'a Rope);
227
228struct ByteChunks<'a>(text::Chunks<'a>);
229
230pub(crate) struct QueryCursorHandle(Option<QueryCursor>);
231
232impl SyntaxMap {
233 pub fn new(text: &BufferSnapshot) -> Self {
234 Self {
235 snapshot: SyntaxSnapshot::new(text),
236 language_registry: None,
237 }
238 }
239
240 pub fn set_language_registry(&mut self, registry: Arc<LanguageRegistry>) {
241 self.language_registry = Some(registry);
242 }
243
244 pub fn snapshot(&self) -> SyntaxSnapshot {
245 self.snapshot.clone()
246 }
247
248 pub fn language_registry(&self) -> Option<Arc<LanguageRegistry>> {
249 self.language_registry.clone()
250 }
251
252 pub fn interpolate(&mut self, text: &BufferSnapshot) {
253 self.snapshot.interpolate(text);
254 }
255
256 #[cfg(test)]
257 pub fn reparse(&mut self, language: Arc<Language>, text: &BufferSnapshot) {
258 self.snapshot
259 .reparse(text, self.language_registry.clone(), language);
260 }
261
262 pub fn did_parse(&mut self, snapshot: SyntaxSnapshot) {
263 self.snapshot = snapshot;
264 }
265
266 pub fn clear(&mut self, text: &BufferSnapshot) {
267 let update_count = self.snapshot.update_count + 1;
268 self.snapshot = SyntaxSnapshot::new(text);
269 self.snapshot.update_count = update_count;
270 }
271}
272
273impl SyntaxSnapshot {
274 fn new(text: &BufferSnapshot) -> Self {
275 Self {
276 layers: SumTree::new(text),
277 parsed_version: clock::Global::default(),
278 interpolated_version: clock::Global::default(),
279 language_registry_version: 0,
280 update_count: 0,
281 }
282 }
283
284 pub fn is_empty(&self) -> bool {
285 self.layers.is_empty()
286 }
287
288 pub fn root_language(&self) -> Option<Arc<Language>> {
289 match &self.layers.first()?.content {
290 SyntaxLayerContent::Parsed { language, .. } => Some(language.clone()),
291 SyntaxLayerContent::Pending { .. } => None,
292 }
293 }
294
295 pub fn update_count(&self) -> usize {
296 self.update_count
297 }
298
299 pub fn interpolate(&mut self, text: &BufferSnapshot) {
300 let edits = text
301 .anchored_edits_since::<Dimensions<usize, Point>>(&self.interpolated_version)
302 .collect::<Vec<_>>();
303 self.interpolated_version = text.version().clone();
304
305 if edits.is_empty() {
306 return;
307 }
308
309 let mut layers = SumTree::new(text);
310 let mut first_edit_ix_for_depth = 0;
311 let mut prev_depth = 0;
312 let mut cursor = self.layers.cursor::<SyntaxLayerSummary>(text);
313 cursor.next();
314
315 'outer: loop {
316 let depth = cursor.end().max_depth;
317 if depth > prev_depth {
318 first_edit_ix_for_depth = 0;
319 prev_depth = depth;
320 }
321
322 // Preserve any layers at this depth that precede the first edit.
323 if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) {
324 let target = ChangeStartPosition {
325 depth,
326 position: edit_range.start,
327 };
328 if target.cmp(cursor.start(), text).is_gt() {
329 let slice = cursor.slice(&target, Bias::Left);
330 layers.append(slice, text);
331 }
332 }
333 // If this layer follows all of the edits, then preserve it and any
334 // subsequent layers at this same depth.
335 else if cursor.item().is_some() {
336 let slice = cursor.slice(
337 &SyntaxLayerPosition {
338 depth: depth + 1,
339 range: Anchor::min_max_range_for_buffer(text.remote_id()),
340 language: None,
341 },
342 Bias::Left,
343 );
344 layers.append(slice, text);
345 continue;
346 };
347
348 let Some(layer) = cursor.item() else { break };
349 let Dimensions(start_byte, start_point, _) =
350 layer.range.start.summary::<Dimensions<usize, Point>>(text);
351
352 // Ignore edits that end before the start of this layer, and don't consider them
353 // for any subsequent layers at this same depth.
354 loop {
355 let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) else {
356 continue 'outer;
357 };
358 if edit_range.end.cmp(&layer.range.start, text).is_le() {
359 first_edit_ix_for_depth += 1;
360 } else {
361 break;
362 }
363 }
364
365 let mut layer = layer.clone();
366 if let SyntaxLayerContent::Parsed { tree, .. } = &mut layer.content {
367 for (edit, edit_range) in &edits[first_edit_ix_for_depth..] {
368 // Ignore any edits that follow this layer.
369 if edit_range.start.cmp(&layer.range.end, text).is_ge() {
370 break;
371 }
372
373 // Apply any edits that intersect this layer to the layer's syntax tree.
374 let tree_edit = if edit_range.start.cmp(&layer.range.start, text).is_ge() {
375 tree_sitter::InputEdit {
376 start_byte: edit.new.start.0 - start_byte,
377 old_end_byte: edit.new.start.0 - start_byte
378 + (edit.old.end.0 - edit.old.start.0),
379 new_end_byte: edit.new.end.0 - start_byte,
380 start_position: (edit.new.start.1 - start_point).to_ts_point(),
381 old_end_position: (edit.new.start.1 - start_point
382 + (edit.old.end.1 - edit.old.start.1))
383 .to_ts_point(),
384 new_end_position: (edit.new.end.1 - start_point).to_ts_point(),
385 }
386 } else {
387 let node = tree.root_node();
388 tree_sitter::InputEdit {
389 start_byte: 0,
390 old_end_byte: node.end_byte(),
391 new_end_byte: 0,
392 start_position: Default::default(),
393 old_end_position: node.end_position(),
394 new_end_position: Default::default(),
395 }
396 };
397
398 tree.edit(&tree_edit);
399 }
400
401 debug_assert!(
402 tree.root_node().end_byte() <= text.len(),
403 "tree's size {}, is larger than text size {}",
404 tree.root_node().end_byte(),
405 text.len(),
406 );
407 }
408
409 layers.push(layer, text);
410 cursor.next();
411 }
412
413 layers.append(cursor.suffix(), text);
414 drop(cursor);
415 self.layers = layers;
416 }
417
418 pub fn reparse(
419 &mut self,
420 text: &BufferSnapshot,
421 registry: Option<Arc<LanguageRegistry>>,
422 root_language: Arc<Language>,
423 ) {
424 self.reparse_(text, registry, root_language, None).ok();
425 }
426
427 pub fn reparse_with_timeout(
428 &mut self,
429 text: &BufferSnapshot,
430 registry: Option<Arc<LanguageRegistry>>,
431 root_language: Arc<Language>,
432 budget: Duration,
433 ) -> Result<(), ParseTimeout> {
434 self.reparse_(text, registry, root_language, Some(budget))
435 }
436
437 fn reparse_(
438 &mut self,
439 text: &BufferSnapshot,
440 registry: Option<Arc<LanguageRegistry>>,
441 root_language: Arc<Language>,
442 mut budget: Option<Duration>,
443 ) -> Result<(), ParseTimeout> {
444 let budget = &mut budget;
445 let edit_ranges = text
446 .edits_since::<usize>(&self.parsed_version)
447 .map(|edit| edit.new)
448 .collect::<Vec<_>>();
449 self.reparse_with_ranges(
450 text,
451 root_language.clone(),
452 edit_ranges,
453 registry.as_ref(),
454 budget,
455 )?;
456
457 if let Some(registry) = registry
458 && registry.version() != self.language_registry_version
459 {
460 let mut resolved_injection_ranges = Vec::new();
461 let mut cursor = self
462 .layers
463 .filter::<_, ()>(text, |summary| summary.contains_unknown_injections);
464 cursor.next();
465 while let Some(layer) = cursor.item() {
466 let SyntaxLayerContent::Pending { language_name } = &layer.content else {
467 unreachable!()
468 };
469 if registry
470 .language_for_name_or_extension(language_name)
471 .now_or_never()
472 .and_then(|language| language.ok())
473 .is_some()
474 {
475 let range = layer.range.to_offset(text);
476 log::trace!("reparse range {range:?} for language {language_name:?}");
477 resolved_injection_ranges.push(range);
478 }
479
480 cursor.next();
481 }
482 drop(cursor);
483
484 if !resolved_injection_ranges.is_empty() {
485 self.reparse_with_ranges(
486 text,
487 root_language,
488 resolved_injection_ranges,
489 Some(®istry),
490 budget,
491 )?;
492 }
493 self.language_registry_version = registry.version();
494 }
495
496 self.update_count += 1;
497 Ok(())
498 }
499
500 fn reparse_with_ranges(
501 &mut self,
502 text: &BufferSnapshot,
503 root_language: Arc<Language>,
504 invalidated_ranges: Vec<Range<usize>>,
505 registry: Option<&Arc<LanguageRegistry>>,
506 budget: &mut Option<Duration>,
507 ) -> Result<(), ParseTimeout> {
508 log::trace!(
509 "reparse. invalidated ranges:{:?}",
510 LogOffsetRanges(&invalidated_ranges, text),
511 );
512
513 let max_depth = self.layers.summary().max_depth;
514 let mut cursor = self.layers.cursor::<SyntaxLayerSummary>(text);
515 cursor.next();
516 let mut layers = SumTree::new(text);
517
518 let mut changed_regions = ChangeRegionSet::default();
519 let mut queue = BinaryHeap::new();
520 let mut combined_injection_ranges = HashMap::default();
521 queue.push(ParseStep {
522 depth: 0,
523 language: ParseStepLanguage::Loaded {
524 language: root_language,
525 },
526 included_ranges: vec![tree_sitter::Range {
527 start_byte: 0,
528 end_byte: text.len(),
529 start_point: Point::zero().to_ts_point(),
530 end_point: text.max_point().to_ts_point(),
531 }],
532 range: Anchor::min_max_range_for_buffer(text.remote_id()),
533 mode: ParseMode::Single,
534 });
535
536 loop {
537 let step = queue.pop();
538 let position = if let Some(step) = &step {
539 log::trace!(
540 "parse step depth:{}, range:{:?}, language:{} ({:?})",
541 step.depth,
542 LogAnchorRange(&step.range, text),
543 step.language.name(),
544 step.language.id(),
545 );
546 SyntaxLayerPosition {
547 depth: step.depth,
548 range: step.range.clone(),
549 language: step.language.id(),
550 }
551 } else {
552 SyntaxLayerPosition {
553 depth: max_depth + 1,
554 range: Anchor::min_max_range_for_buffer(text.remote_id()),
555 language: None,
556 }
557 };
558
559 let mut done = cursor.item().is_none();
560 while !done && position.cmp(&cursor.end(), text).is_gt() {
561 done = true;
562
563 let bounded_position = SyntaxLayerPositionBeforeChange {
564 position: position.clone(),
565 change: changed_regions.start_position(),
566 };
567 if bounded_position.cmp(cursor.start(), text).is_gt() {
568 let slice = cursor.slice(&bounded_position, Bias::Left);
569 if !slice.is_empty() {
570 layers.append(slice, text);
571 if changed_regions.prune(cursor.end(), text) {
572 done = false;
573 }
574 }
575 }
576
577 while position.cmp(&cursor.end(), text).is_gt() {
578 let Some(layer) = cursor.item() else { break };
579
580 if changed_regions.intersects(layer, text) {
581 if let SyntaxLayerContent::Parsed { language, .. } = &layer.content {
582 log::trace!(
583 "discard layer. language:{}, range:{:?}. changed_regions:{:?}",
584 language.name(),
585 LogAnchorRange(&layer.range, text),
586 LogChangedRegions(&changed_regions, text),
587 );
588 }
589
590 changed_regions.insert(
591 ChangedRegion {
592 depth: layer.depth + 1,
593 range: layer.range.clone(),
594 },
595 text,
596 );
597 } else {
598 layers.push(layer.clone(), text);
599 }
600
601 cursor.next();
602 if changed_regions.prune(cursor.end(), text) {
603 done = false;
604 }
605 }
606 }
607
608 let Some(step) = step else { break };
609 let Dimensions(step_start_byte, step_start_point, _) =
610 step.range.start.summary::<Dimensions<usize, Point>>(text);
611 let step_end_byte = step.range.end.to_offset(text);
612
613 let mut old_layer = cursor.item();
614 if let Some(layer) = old_layer {
615 if layer.range.to_offset(text) == (step_start_byte..step_end_byte)
616 && layer.content.language_id() == step.language.id()
617 {
618 cursor.next();
619 } else {
620 old_layer = None;
621 }
622 }
623
624 let content = match step.language {
625 ParseStepLanguage::Loaded { language } => {
626 let Some(grammar) = language.grammar() else {
627 continue;
628 };
629 let tree;
630 let changed_ranges;
631
632 let mut included_ranges = step.included_ranges;
633 let is_combined = matches!(step.mode, ParseMode::Combined { .. });
634
635 for range in &mut included_ranges {
636 range.start_byte -= step_start_byte;
637 range.end_byte -= step_start_byte;
638 range.start_point = (Point::from_ts_point(range.start_point)
639 - step_start_point)
640 .to_ts_point();
641 range.end_point = (Point::from_ts_point(range.end_point)
642 - step_start_point)
643 .to_ts_point();
644 }
645
646 if let Some((SyntaxLayerContent::Parsed { tree: old_tree, .. }, layer_range)) =
647 old_layer.map(|layer| (&layer.content, layer.range.clone()))
648 {
649 log::trace!(
650 "existing layer. language:{}, range:{:?}, included_ranges:{:?}",
651 language.name(),
652 LogAnchorRange(&layer_range, text),
653 LogIncludedRanges(&old_tree.included_ranges())
654 );
655
656 if let ParseMode::Combined {
657 mut parent_layer_changed_ranges,
658 ..
659 } = step.mode
660 {
661 for range in &mut parent_layer_changed_ranges {
662 range.start = range.start.saturating_sub(step_start_byte);
663 range.end = range.end.saturating_sub(step_start_byte);
664 }
665
666 let changed_indices;
667 (included_ranges, changed_indices) = splice_included_ranges(
668 old_tree.included_ranges(),
669 &parent_layer_changed_ranges,
670 &included_ranges,
671 );
672 insert_newlines_between_ranges(
673 changed_indices,
674 &mut included_ranges,
675 text,
676 step_start_byte,
677 step_start_point,
678 );
679 }
680
681 if included_ranges.is_empty() {
682 included_ranges.push(tree_sitter::Range {
683 start_byte: 0,
684 end_byte: 0,
685 start_point: Default::default(),
686 end_point: Default::default(),
687 });
688 }
689
690 log::trace!(
691 "update layer. language:{}, range:{:?}, included_ranges:{:?}",
692 language.name(),
693 LogAnchorRange(&step.range, text),
694 LogIncludedRanges(&included_ranges),
695 );
696
697 let result = parse_text(
698 grammar,
699 text.as_rope(),
700 step_start_byte,
701 &included_ranges,
702 Some(old_tree.clone()),
703 budget,
704 );
705 match result {
706 Ok(t) => tree = t,
707 Err(e) if e.downcast_ref::<ParseTimeout>().is_some() => {
708 return Err(ParseTimeout);
709 }
710 Err(e) => {
711 log::error!("error parsing text: {e:?}");
712 continue;
713 }
714 };
715
716 changed_ranges = join_ranges(
717 invalidated_ranges
718 .iter()
719 .filter(|&range| {
720 range.start <= step_end_byte && range.end >= step_start_byte
721 })
722 .cloned(),
723 old_tree.changed_ranges(&tree).map(|r| {
724 step_start_byte + r.start_byte..step_start_byte + r.end_byte
725 }),
726 );
727 } else {
728 if matches!(step.mode, ParseMode::Combined { .. }) {
729 insert_newlines_between_ranges(
730 0..included_ranges.len(),
731 &mut included_ranges,
732 text,
733 step_start_byte,
734 step_start_point,
735 );
736 }
737
738 if included_ranges.is_empty() {
739 included_ranges.push(tree_sitter::Range {
740 start_byte: 0,
741 end_byte: 0,
742 start_point: Default::default(),
743 end_point: Default::default(),
744 });
745 }
746
747 log::trace!(
748 "create layer. language:{}, range:{:?}, included_ranges:{:?}",
749 language.name(),
750 LogAnchorRange(&step.range, text),
751 LogIncludedRanges(&included_ranges),
752 );
753
754 let result = parse_text(
755 grammar,
756 text.as_rope(),
757 step_start_byte,
758 &included_ranges,
759 None,
760 budget,
761 );
762 match result {
763 Ok(t) => tree = t,
764 Err(e) if e.downcast_ref::<ParseTimeout>().is_some() => {
765 return Err(ParseTimeout);
766 }
767 Err(e) => {
768 log::error!("error parsing text: {e:?}");
769 continue;
770 }
771 };
772 changed_ranges = vec![step_start_byte..step_end_byte];
773 }
774
775 if let (Some((config, registry)), false) = (
776 grammar.injection_config.as_ref().zip(registry.as_ref()),
777 changed_ranges.is_empty(),
778 ) {
779 for range in &changed_ranges {
780 changed_regions.insert(
781 ChangedRegion {
782 depth: step.depth + 1,
783 range: text.anchor_before(range.start)
784 ..text.anchor_after(range.end),
785 },
786 text,
787 );
788 }
789 get_injections(
790 config,
791 text,
792 step.range.clone(),
793 tree.root_node_with_offset(
794 step_start_byte,
795 step_start_point.to_ts_point(),
796 ),
797 registry,
798 step.depth + 1,
799 &changed_ranges,
800 &mut combined_injection_ranges,
801 &mut queue,
802 );
803 }
804
805 let included_sub_ranges: Option<Vec<Range<Anchor>>> = if is_combined {
806 Some(
807 included_ranges
808 .into_iter()
809 .filter(|r| r.start_byte < r.end_byte)
810 .map(|r| {
811 text.anchor_before(r.start_byte + step_start_byte)
812 ..text.anchor_after(r.end_byte + step_start_byte)
813 })
814 .collect(),
815 )
816 } else {
817 None
818 };
819 SyntaxLayerContent::Parsed {
820 tree,
821 language,
822 included_sub_ranges,
823 }
824 }
825 ParseStepLanguage::Pending { name } => SyntaxLayerContent::Pending {
826 language_name: name,
827 },
828 };
829
830 layers.push(
831 SyntaxLayerEntry {
832 depth: step.depth,
833 range: step.range,
834 content,
835 },
836 text,
837 );
838 }
839
840 drop(cursor);
841 self.layers = layers;
842 self.interpolated_version = text.version.clone();
843 self.parsed_version = text.version.clone();
844 #[cfg(debug_assertions)]
845 self.check_invariants(text);
846 Ok(())
847 }
848
849 #[cfg(debug_assertions)]
850 fn check_invariants(&self, text: &BufferSnapshot) {
851 let mut max_depth = 0;
852 let mut prev_layer: Option<(Range<Anchor>, Option<LanguageId>)> = None;
853 for layer in self.layers.iter() {
854 match Ord::cmp(&layer.depth, &max_depth) {
855 Ordering::Less => {
856 panic!("layers out of order")
857 }
858 Ordering::Equal => {
859 if let Some((prev_range, prev_language_id)) = prev_layer {
860 match layer.range.start.cmp(&prev_range.start, text) {
861 Ordering::Less => panic!("layers out of order"),
862 Ordering::Equal => match layer.range.end.cmp(&prev_range.end, text) {
863 Ordering::Less => panic!("layers out of order"),
864 Ordering::Equal => {
865 if layer.content.language_id() < prev_language_id {
866 panic!("layers out of order")
867 }
868 }
869 Ordering::Greater => {}
870 },
871 Ordering::Greater => {}
872 }
873 }
874 prev_layer = Some((layer.range.clone(), layer.content.language_id()));
875 }
876 Ordering::Greater => {
877 prev_layer = None;
878 }
879 }
880
881 max_depth = layer.depth;
882 }
883 }
884
885 pub fn single_tree_captures<'a>(
886 range: Range<usize>,
887 text: &'a Rope,
888 tree: &'a Tree,
889 language: &'a Arc<Language>,
890 query: fn(&Grammar) -> Option<&Query>,
891 ) -> SyntaxMapCaptures<'a> {
892 SyntaxMapCaptures::new(
893 range,
894 text,
895 [SyntaxLayer {
896 language,
897 tree,
898 included_sub_ranges: None,
899 depth: 0,
900 offset: (0, tree_sitter::Point::new(0, 0)),
901 }]
902 .into_iter(),
903 query,
904 )
905 }
906
907 pub fn captures<'a>(
908 &'a self,
909 range: Range<usize>,
910 buffer: &'a BufferSnapshot,
911 query: fn(&Grammar) -> Option<&Query>,
912 ) -> SyntaxMapCaptures<'a> {
913 SyntaxMapCaptures::new(
914 range.clone(),
915 buffer.as_rope(),
916 self.layers_for_range(range, buffer, true),
917 query,
918 )
919 }
920
921 pub fn matches<'a>(
922 &'a self,
923 range: Range<usize>,
924 buffer: &'a BufferSnapshot,
925 query: fn(&Grammar) -> Option<&Query>,
926 ) -> SyntaxMapMatches<'a> {
927 SyntaxMapMatches::new(
928 range.clone(),
929 buffer.as_rope(),
930 self.layers_for_range(range, buffer, true),
931 query,
932 TreeSitterOptions::default(),
933 )
934 }
935
936 pub fn matches_with_options<'a>(
937 &'a self,
938 range: Range<usize>,
939 buffer: &'a BufferSnapshot,
940 options: TreeSitterOptions,
941 query: fn(&Grammar) -> Option<&Query>,
942 ) -> SyntaxMapMatches<'a> {
943 SyntaxMapMatches::new(
944 range.clone(),
945 buffer.as_rope(),
946 self.layers_for_range(range, buffer, true),
947 query,
948 options,
949 )
950 }
951
952 #[cfg(test)]
953 pub fn layers<'a>(&'a self, buffer: &'a BufferSnapshot) -> Vec<SyntaxLayer<'a>> {
954 self.layers_for_range(0..buffer.len(), buffer, true)
955 .collect()
956 }
957
958 pub fn layers_for_range<'a, T: ToOffset>(
959 &'a self,
960 range: Range<T>,
961 buffer: &'a BufferSnapshot,
962 include_hidden: bool,
963 ) -> impl 'a + Iterator<Item = SyntaxLayer<'a>> {
964 let start_offset = range.start.to_offset(buffer);
965 let end_offset = range.end.to_offset(buffer);
966 let start = buffer.anchor_before(start_offset);
967 let end = buffer.anchor_after(end_offset);
968
969 let mut cursor = self.layers.filter::<_, ()>(buffer, move |summary| {
970 if summary.max_depth > summary.min_depth {
971 true
972 } else {
973 let is_before_start = summary.range.end.cmp(&start, buffer).is_lt();
974 let is_after_end = summary.range.start.cmp(&end, buffer).is_gt();
975 !is_before_start && !is_after_end
976 }
977 });
978
979 cursor.next();
980 iter::from_fn(move || {
981 while let Some(layer) = cursor.item() {
982 let mut info = None;
983 if let SyntaxLayerContent::Parsed {
984 tree,
985 language,
986 included_sub_ranges,
987 } = &layer.content
988 {
989 let layer_start_offset = layer.range.start.to_offset(buffer);
990 let layer_start_point = layer.range.start.to_point(buffer).to_ts_point();
991 if include_hidden || !language.config.hidden {
992 info = Some(SyntaxLayer {
993 tree,
994 language,
995 included_sub_ranges: included_sub_ranges.as_deref(),
996 depth: layer.depth,
997 offset: (layer_start_offset, layer_start_point),
998 });
999 }
1000 }
1001 cursor.next();
1002 if info.is_some() {
1003 return info;
1004 }
1005 }
1006 None
1007 })
1008 }
1009
1010 pub fn contains_unknown_injections(&self) -> bool {
1011 self.layers.summary().contains_unknown_injections
1012 }
1013
1014 pub fn language_registry_version(&self) -> usize {
1015 self.language_registry_version
1016 }
1017}
1018
1019impl<'a> SyntaxMapCaptures<'a> {
1020 fn new(
1021 range: Range<usize>,
1022 text: &'a Rope,
1023 layers: impl Iterator<Item = SyntaxLayer<'a>>,
1024 query: fn(&Grammar) -> Option<&Query>,
1025 ) -> Self {
1026 let mut result = Self {
1027 layers: Vec::new(),
1028 grammars: Vec::new(),
1029 active_layer_count: 0,
1030 };
1031 for layer in layers {
1032 let grammar = match &layer.language.grammar {
1033 Some(grammar) => grammar,
1034 None => continue,
1035 };
1036 let query = match query(grammar) {
1037 Some(query) => query,
1038 None => continue,
1039 };
1040
1041 let mut query_cursor = QueryCursorHandle::new();
1042
1043 // TODO - add a Tree-sitter API to remove the need for this.
1044 let cursor = unsafe {
1045 std::mem::transmute::<&mut tree_sitter::QueryCursor, &'static mut QueryCursor>(
1046 query_cursor.deref_mut(),
1047 )
1048 };
1049
1050 cursor.set_byte_range(range.clone());
1051 let captures = cursor.captures(query, layer.node(), TextProvider(text));
1052 let grammar_index = result
1053 .grammars
1054 .iter()
1055 .position(|g| g.id == grammar.id())
1056 .unwrap_or_else(|| {
1057 result.grammars.push(grammar);
1058 result.grammars.len() - 1
1059 });
1060 let mut layer = SyntaxMapCapturesLayer {
1061 depth: layer.depth,
1062 grammar_index,
1063 next_capture: None,
1064 captures,
1065 _query_cursor: query_cursor,
1066 };
1067
1068 layer.advance();
1069 if layer.next_capture.is_some() {
1070 let key = layer.sort_key();
1071 let ix = match result.layers[..result.active_layer_count]
1072 .binary_search_by_key(&key, |layer| layer.sort_key())
1073 {
1074 Ok(ix) | Err(ix) => ix,
1075 };
1076 result.layers.insert(ix, layer);
1077 result.active_layer_count += 1;
1078 } else {
1079 result.layers.push(layer);
1080 }
1081 }
1082
1083 result
1084 }
1085
1086 pub fn grammars(&self) -> &[&'a Grammar] {
1087 &self.grammars
1088 }
1089
1090 pub fn peek(&self) -> Option<SyntaxMapCapture<'a>> {
1091 let layer = self.layers[..self.active_layer_count].first()?;
1092 let capture = layer.next_capture?;
1093 Some(SyntaxMapCapture {
1094 grammar_index: layer.grammar_index,
1095 index: capture.index,
1096 node: capture.node,
1097 })
1098 }
1099
1100 pub fn advance(&mut self) -> bool {
1101 let layer = if let Some(layer) = self.layers[..self.active_layer_count].first_mut() {
1102 layer
1103 } else {
1104 return false;
1105 };
1106
1107 layer.advance();
1108 if layer.next_capture.is_some() {
1109 let key = layer.sort_key();
1110 let i = 1 + self.layers[1..self.active_layer_count]
1111 .iter()
1112 .position(|later_layer| key < later_layer.sort_key())
1113 .unwrap_or(self.active_layer_count - 1);
1114 self.layers[0..i].rotate_left(1);
1115 } else {
1116 self.layers[0..self.active_layer_count].rotate_left(1);
1117 self.active_layer_count -= 1;
1118 }
1119
1120 true
1121 }
1122
1123 pub fn set_byte_range(&mut self, range: Range<usize>) {
1124 for layer in &mut self.layers {
1125 layer.captures.set_byte_range(range.clone());
1126 if let Some(capture) = &layer.next_capture
1127 && capture.node.end_byte() > range.start
1128 {
1129 continue;
1130 }
1131 layer.advance();
1132 }
1133 self.layers.sort_unstable_by_key(|layer| layer.sort_key());
1134 self.active_layer_count = self
1135 .layers
1136 .iter()
1137 .position(|layer| layer.next_capture.is_none())
1138 .unwrap_or(self.layers.len());
1139 }
1140}
1141
1142#[derive(Default)]
1143pub struct TreeSitterOptions {
1144 pub max_start_depth: Option<u32>,
1145 pub max_bytes_to_query: Option<usize>,
1146}
1147
1148impl TreeSitterOptions {
1149 pub fn max_start_depth(max_start_depth: u32) -> Self {
1150 Self {
1151 max_start_depth: Some(max_start_depth),
1152 max_bytes_to_query: None,
1153 }
1154 }
1155}
1156
1157impl<'a> SyntaxMapMatches<'a> {
1158 fn new(
1159 range: Range<usize>,
1160 text: &'a Rope,
1161 layers: impl Iterator<Item = SyntaxLayer<'a>>,
1162 query: fn(&Grammar) -> Option<&Query>,
1163 options: TreeSitterOptions,
1164 ) -> Self {
1165 let mut result = Self::default();
1166 for layer in layers {
1167 let grammar = match &layer.language.grammar {
1168 Some(grammar) => grammar,
1169 None => continue,
1170 };
1171 let query = match query(grammar) {
1172 Some(query) => query,
1173 None => continue,
1174 };
1175
1176 let mut query_cursor = QueryCursorHandle::new();
1177
1178 // TODO - add a Tree-sitter API to remove the need for this.
1179 let cursor = unsafe {
1180 std::mem::transmute::<&mut tree_sitter::QueryCursor, &'static mut QueryCursor>(
1181 query_cursor.deref_mut(),
1182 )
1183 };
1184 cursor.set_max_start_depth(options.max_start_depth);
1185
1186 if let Some(max_bytes_to_query) = options.max_bytes_to_query {
1187 let midpoint = (range.start + range.end) / 2;
1188 let containing_range_start = midpoint.saturating_sub(max_bytes_to_query / 2);
1189 let containing_range_end =
1190 containing_range_start.saturating_add(max_bytes_to_query);
1191 cursor.set_containing_byte_range(containing_range_start..containing_range_end);
1192 }
1193
1194 cursor.set_byte_range(range.clone());
1195 let matches = cursor.matches(query, layer.node(), TextProvider(text));
1196 let grammar_index = result
1197 .grammars
1198 .iter()
1199 .position(|g| g.id == grammar.id())
1200 .unwrap_or_else(|| {
1201 result.grammars.push(grammar);
1202 result.grammars.len() - 1
1203 });
1204 let mut layer = SyntaxMapMatchesLayer {
1205 language: layer.language.clone(),
1206 depth: layer.depth,
1207 grammar_index,
1208 matches,
1209 query,
1210 next_pattern_index: 0,
1211 next_captures: Vec::new(),
1212 has_next: false,
1213 _query_cursor: query_cursor,
1214 };
1215
1216 layer.advance();
1217 if layer.has_next {
1218 let key = layer.sort_key();
1219 let ix = match result.layers[..result.active_layer_count]
1220 .binary_search_by_key(&key, |layer| layer.sort_key())
1221 {
1222 Ok(ix) | Err(ix) => ix,
1223 };
1224 result.layers.insert(ix, layer);
1225 result.active_layer_count += 1;
1226 } else {
1227 result.layers.push(layer);
1228 }
1229 }
1230 result
1231 }
1232
1233 pub fn grammars(&self) -> &[&'a Grammar] {
1234 &self.grammars
1235 }
1236
1237 pub fn peek(&self) -> Option<SyntaxMapMatch<'_>> {
1238 let layer = self.layers.first()?;
1239
1240 if !layer.has_next {
1241 return None;
1242 }
1243
1244 Some(SyntaxMapMatch {
1245 language: layer.language.clone(),
1246 depth: layer.depth,
1247 grammar_index: layer.grammar_index,
1248 pattern_index: layer.next_pattern_index,
1249 captures: &layer.next_captures,
1250 })
1251 }
1252
1253 pub fn advance(&mut self) -> bool {
1254 let layer = if let Some(layer) = self.layers.first_mut() {
1255 layer
1256 } else {
1257 return false;
1258 };
1259
1260 layer.advance();
1261 if layer.has_next {
1262 let key = layer.sort_key();
1263 let i = 1 + self.layers[1..self.active_layer_count]
1264 .iter()
1265 .position(|later_layer| key < later_layer.sort_key())
1266 .unwrap_or(self.active_layer_count - 1);
1267 self.layers[0..i].rotate_left(1);
1268 } else if self.active_layer_count != 0 {
1269 self.layers[0..self.active_layer_count].rotate_left(1);
1270 self.active_layer_count -= 1;
1271 }
1272
1273 true
1274 }
1275
1276 // pub fn set_byte_range(&mut self, range: Range<usize>) {
1277 // for layer in &mut self.layers {
1278 // layer.matches.set_byte_range(range.clone());
1279 // layer.advance();
1280 // }
1281 // self.layers.sort_unstable_by_key(|layer| layer.sort_key());
1282 // self.active_layer_count = self
1283 // .layers
1284 // .iter()
1285 // .position(|layer| !layer.has_next)
1286 // .unwrap_or(self.layers.len());
1287 // }
1288}
1289
1290impl SyntaxMapCapturesLayer<'_> {
1291 fn advance(&mut self) {
1292 self.next_capture = self.captures.next().map(|(mat, ix)| mat.captures[*ix]);
1293 }
1294
1295 fn sort_key(&self) -> (usize, Reverse<usize>, usize) {
1296 if let Some(capture) = &self.next_capture {
1297 let range = capture.node.byte_range();
1298 (range.start, Reverse(range.end), self.depth)
1299 } else {
1300 (usize::MAX, Reverse(0), usize::MAX)
1301 }
1302 }
1303}
1304
1305impl SyntaxMapMatchesLayer<'_> {
1306 fn advance(&mut self) {
1307 loop {
1308 if let Some(mat) = self.matches.next() {
1309 if !satisfies_custom_predicates(self.query, mat) {
1310 continue;
1311 }
1312 self.next_captures.clear();
1313 self.next_captures.extend_from_slice(mat.captures);
1314 self.next_pattern_index = mat.pattern_index;
1315 self.has_next = true;
1316 return;
1317 } else {
1318 self.has_next = false;
1319 return;
1320 }
1321 }
1322 }
1323
1324 fn sort_key(&self) -> (usize, Reverse<usize>, usize) {
1325 if self.has_next {
1326 let captures = &self.next_captures;
1327 if let Some((first, last)) = captures.first().zip(captures.last()) {
1328 return (
1329 first.node.start_byte(),
1330 Reverse(last.node.end_byte()),
1331 self.depth,
1332 );
1333 }
1334 }
1335 (usize::MAX, Reverse(0), usize::MAX)
1336 }
1337}
1338
1339impl<'a> Iterator for SyntaxMapCaptures<'a> {
1340 type Item = SyntaxMapCapture<'a>;
1341
1342 fn next(&mut self) -> Option<Self::Item> {
1343 let result = self.peek();
1344 self.advance();
1345 result
1346 }
1347}
1348
1349fn satisfies_custom_predicates(query: &Query, mat: &QueryMatch) -> bool {
1350 for predicate in query.general_predicates(mat.pattern_index) {
1351 let satisfied = match predicate.operator.as_ref() {
1352 "has-parent?" => has_parent(&predicate.args, mat),
1353 "not-has-parent?" => !has_parent(&predicate.args, mat),
1354 _ => true,
1355 };
1356 if !satisfied {
1357 return false;
1358 }
1359 }
1360 true
1361}
1362
1363fn has_parent(args: &[QueryPredicateArg], mat: &QueryMatch) -> bool {
1364 let (
1365 Some(QueryPredicateArg::Capture(capture_ix)),
1366 Some(QueryPredicateArg::String(parent_kind)),
1367 ) = (args.first(), args.get(1))
1368 else {
1369 return false;
1370 };
1371
1372 let Some(capture) = mat.captures.iter().find(|c| c.index == *capture_ix) else {
1373 return false;
1374 };
1375
1376 capture
1377 .node
1378 .parent()
1379 .is_some_and(|p| p.kind() == parent_kind.as_ref())
1380}
1381
1382fn join_ranges(
1383 a: impl Iterator<Item = Range<usize>>,
1384 b: impl Iterator<Item = Range<usize>>,
1385) -> Vec<Range<usize>> {
1386 let mut result = Vec::<Range<usize>>::new();
1387 let mut a = a.peekable();
1388 let mut b = b.peekable();
1389 loop {
1390 let range = match (a.peek(), b.peek()) {
1391 (Some(range_a), Some(range_b)) => {
1392 if range_a.start < range_b.start {
1393 a.next().unwrap()
1394 } else {
1395 b.next().unwrap()
1396 }
1397 }
1398 (None, Some(_)) => b.next().unwrap(),
1399 (Some(_), None) => a.next().unwrap(),
1400 (None, None) => break,
1401 };
1402
1403 if let Some(last) = result.last_mut()
1404 && range.start <= last.end
1405 {
1406 last.end = last.end.max(range.end);
1407 continue;
1408 }
1409 result.push(range);
1410 }
1411 result
1412}
1413
1414#[derive(Copy, Clone, Debug)]
1415pub struct ParseTimeout;
1416
1417impl std::error::Error for ParseTimeout {}
1418
1419impl std::fmt::Display for ParseTimeout {
1420 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1421 write!(f, "parse timeout")
1422 }
1423}
1424
1425fn parse_text(
1426 grammar: &Grammar,
1427 text: &Rope,
1428 start_byte: usize,
1429 ranges: &[tree_sitter::Range],
1430 old_tree: Option<Tree>,
1431 parse_budget: &mut Option<Duration>,
1432) -> anyhow::Result<Tree> {
1433 with_parser(|parser| {
1434 let mut timed_out = false;
1435 let now = Instant::now();
1436 let mut progress_callback = parse_budget.map(|budget| {
1437 let timed_out = &mut timed_out;
1438 move |_: &_| {
1439 let elapsed = now.elapsed();
1440 if elapsed > budget {
1441 *timed_out = true;
1442 ControlFlow::Break(())
1443 } else {
1444 ControlFlow::Continue(())
1445 }
1446 }
1447 });
1448
1449 let mut chunks = text.chunks_in_range(start_byte..text.len());
1450 parser.set_included_ranges(ranges)?;
1451 parser.set_language(&grammar.ts_language)?;
1452 parser
1453 .parse_with_options(
1454 &mut move |offset, _| {
1455 chunks.seek(start_byte + offset);
1456 chunks.next().unwrap_or("").as_bytes()
1457 },
1458 old_tree.as_ref(),
1459 progress_callback
1460 .as_mut()
1461 .map(|progress_callback| tree_sitter::ParseOptions {
1462 progress_callback: Some(progress_callback),
1463 }),
1464 )
1465 .inspect(|_| {
1466 if let Some(parse_budget) = parse_budget {
1467 *parse_budget = parse_budget.saturating_sub(now.elapsed());
1468 }
1469 })
1470 .ok_or_else(|| match timed_out {
1471 true => anyhow::anyhow!(ParseTimeout),
1472 false => anyhow::anyhow!("parsing failed"),
1473 })
1474 })
1475}
1476
1477fn get_injections(
1478 config: &InjectionConfig,
1479 text: &BufferSnapshot,
1480 outer_range: Range<Anchor>,
1481 node: Node,
1482 language_registry: &Arc<LanguageRegistry>,
1483 depth: usize,
1484 changed_ranges: &[Range<usize>],
1485 combined_injection_ranges: &mut HashMap<LanguageId, (Arc<Language>, Vec<tree_sitter::Range>)>,
1486 queue: &mut BinaryHeap<ParseStep>,
1487) {
1488 let mut query_cursor = QueryCursorHandle::new();
1489 let mut prev_match = None;
1490
1491 // Ensure that a `ParseStep` is created for every combined injection language, even
1492 // if there currently no matches for that injection.
1493 combined_injection_ranges.clear();
1494 for pattern in &config.patterns {
1495 if let (Some(language_name), true) = (pattern.language.as_ref(), pattern.combined)
1496 && let Some(language) = language_registry
1497 .language_for_name_or_extension(language_name)
1498 .now_or_never()
1499 .and_then(|language| language.ok())
1500 {
1501 combined_injection_ranges.insert(language.id, (language, Vec::new()));
1502 }
1503 }
1504
1505 for query_range in changed_ranges {
1506 query_cursor.set_byte_range(query_range.start.saturating_sub(1)..query_range.end + 1);
1507 let mut matches = query_cursor.matches(&config.query, node, TextProvider(text.as_rope()));
1508 while let Some(mat) = matches.next() {
1509 let content_ranges = mat
1510 .nodes_for_capture_index(config.content_capture_ix)
1511 .map(|node| node.range())
1512 .collect::<Vec<_>>();
1513 if content_ranges.is_empty() {
1514 continue;
1515 }
1516
1517 let content_range =
1518 content_ranges.first().unwrap().start_byte..content_ranges.last().unwrap().end_byte;
1519
1520 // Avoid duplicate matches if two changed ranges intersect the same injection.
1521 if let Some((prev_pattern_ix, prev_range)) = &prev_match
1522 && mat.pattern_index == *prev_pattern_ix
1523 && content_range == *prev_range
1524 {
1525 continue;
1526 }
1527
1528 prev_match = Some((mat.pattern_index, content_range.clone()));
1529 let combined = config.patterns[mat.pattern_index].combined;
1530
1531 let mut step_range = content_range.clone();
1532 let language_name =
1533 if let Some(name) = config.patterns[mat.pattern_index].language.as_ref() {
1534 Some(Cow::Borrowed(name.as_ref()))
1535 } else if let Some(language_node) = config
1536 .language_capture_ix
1537 .and_then(|ix| mat.nodes_for_capture_index(ix).next())
1538 {
1539 step_range.start = cmp::min(content_range.start, language_node.start_byte());
1540 step_range.end = cmp::max(content_range.end, language_node.end_byte());
1541 let language_name: String =
1542 text.text_for_range(language_node.byte_range()).collect();
1543
1544 // Enable paths ending in a language extension to represent a language name: e.g. "foo/bar/baz.rs"
1545 if let Some(last_dot_pos) = language_name.rfind('.') {
1546 Some(Cow::Owned(language_name[last_dot_pos + 1..].to_string()))
1547 } else {
1548 Some(Cow::Owned(language_name))
1549 }
1550 } else {
1551 None
1552 };
1553
1554 if let Some(language_name) = language_name {
1555 let language = language_registry
1556 .language_for_name_or_extension(&language_name)
1557 .now_or_never()
1558 .and_then(|language| language.ok());
1559 let range = text.anchor_before(step_range.start)..text.anchor_after(step_range.end);
1560 if let Some(language) = language {
1561 if combined {
1562 combined_injection_ranges
1563 .entry(language.id)
1564 .or_insert_with(|| (language.clone(), vec![]))
1565 .1
1566 .extend(content_ranges);
1567 } else {
1568 queue.push(ParseStep {
1569 depth,
1570 language: ParseStepLanguage::Loaded { language },
1571 included_ranges: content_ranges,
1572 range,
1573 mode: ParseMode::Single,
1574 });
1575 }
1576 } else {
1577 queue.push(ParseStep {
1578 depth,
1579 language: ParseStepLanguage::Pending {
1580 name: language_name.into(),
1581 },
1582 included_ranges: content_ranges,
1583 range,
1584 mode: ParseMode::Single,
1585 });
1586 }
1587 }
1588 }
1589 }
1590
1591 for (_, (language, mut included_ranges)) in combined_injection_ranges.drain() {
1592 included_ranges.sort_unstable_by(|a, b| {
1593 Ord::cmp(&a.start_byte, &b.start_byte).then_with(|| Ord::cmp(&a.end_byte, &b.end_byte))
1594 });
1595 queue.push(ParseStep {
1596 depth,
1597 language: ParseStepLanguage::Loaded { language },
1598 range: outer_range.clone(),
1599 included_ranges,
1600 mode: ParseMode::Combined {
1601 parent_layer_range: node.start_byte()..node.end_byte(),
1602 parent_layer_changed_ranges: changed_ranges.to_vec(),
1603 },
1604 })
1605 }
1606}
1607
1608/// Updates the given list of included `ranges`, removing any ranges that intersect
1609/// `removed_ranges`, and inserting the given `new_ranges`.
1610///
1611/// Returns a new vector of ranges, and the range of the vector that was changed,
1612/// from the previous `ranges` vector.
1613pub(crate) fn splice_included_ranges(
1614 mut ranges: Vec<tree_sitter::Range>,
1615 removed_ranges: &[Range<usize>],
1616 new_ranges: &[tree_sitter::Range],
1617) -> (Vec<tree_sitter::Range>, Range<usize>) {
1618 let mut removed_ranges = removed_ranges.iter().cloned().peekable();
1619 let mut new_ranges = new_ranges.iter().cloned().peekable();
1620 let mut ranges_ix = 0;
1621 let mut changed_portion: Option<Range<usize>> = None;
1622 loop {
1623 let next_new_range = new_ranges.peek();
1624 let next_removed_range = removed_ranges.peek();
1625
1626 let (remove, insert) = match (next_removed_range, next_new_range) {
1627 (None, None) => break,
1628 (Some(_), None) => (removed_ranges.next().unwrap(), None),
1629 (Some(next_removed_range), Some(next_new_range)) => {
1630 if next_removed_range.end < next_new_range.start_byte {
1631 (removed_ranges.next().unwrap(), None)
1632 } else {
1633 let mut start = next_new_range.start_byte;
1634 let mut end = next_new_range.end_byte;
1635
1636 while let Some(next_removed_range) = removed_ranges.peek() {
1637 if next_removed_range.start > next_new_range.end_byte {
1638 break;
1639 }
1640 let next_removed_range = removed_ranges.next().unwrap();
1641 start = cmp::min(start, next_removed_range.start);
1642 end = cmp::max(end, next_removed_range.end);
1643 }
1644
1645 (start..end, Some(new_ranges.next().unwrap()))
1646 }
1647 }
1648 (None, Some(next_new_range)) => (
1649 next_new_range.start_byte..next_new_range.end_byte,
1650 Some(new_ranges.next().unwrap()),
1651 ),
1652 };
1653
1654 let mut start_ix = ranges_ix
1655 + match ranges[ranges_ix..].binary_search_by_key(&remove.start, |r| r.end_byte) {
1656 Ok(ix) => ix,
1657 Err(ix) => ix,
1658 };
1659 let mut end_ix = ranges_ix
1660 + match ranges[ranges_ix..].binary_search_by_key(&remove.end, |r| r.start_byte) {
1661 Ok(ix) => ix + 1,
1662 Err(ix) => ix,
1663 };
1664
1665 // If there are empty ranges, then there may be multiple ranges with the same
1666 // start or end. Expand the splice to include any adjacent ranges that touch
1667 // the changed range.
1668 while start_ix > 0 {
1669 if ranges[start_ix - 1].end_byte == remove.start {
1670 start_ix -= 1;
1671 } else {
1672 break;
1673 }
1674 }
1675 while let Some(range) = ranges.get(end_ix) {
1676 if range.start_byte == remove.end {
1677 end_ix += 1;
1678 } else {
1679 break;
1680 }
1681 }
1682 let changed_start = changed_portion
1683 .as_ref()
1684 .map_or(usize::MAX, |range| range.start)
1685 .min(start_ix);
1686 let changed_end =
1687 changed_portion
1688 .as_ref()
1689 .map_or(0, |range| range.end)
1690 .max(if insert.is_some() {
1691 start_ix + 1
1692 } else {
1693 start_ix
1694 });
1695 changed_portion = Some(changed_start..changed_end);
1696
1697 ranges.splice(start_ix..end_ix, insert);
1698 ranges_ix = start_ix;
1699 }
1700
1701 (ranges, changed_portion.unwrap_or(0..0))
1702}
1703
1704/// Ensure there are newline ranges in between content range that appear on
1705/// different lines. For performance, only iterate through the given range of
1706/// indices. All of the ranges in the array are relative to a given start byte
1707/// and point.
1708fn insert_newlines_between_ranges(
1709 indices: Range<usize>,
1710 ranges: &mut Vec<tree_sitter::Range>,
1711 text: &text::BufferSnapshot,
1712 start_byte: usize,
1713 start_point: Point,
1714) {
1715 let mut ix = indices.end + 1;
1716 while ix > indices.start {
1717 ix -= 1;
1718 if 0 == ix || ix == ranges.len() {
1719 continue;
1720 }
1721
1722 let range_b = ranges[ix];
1723 let range_a = &mut ranges[ix - 1];
1724 if range_a.end_point.column == 0 {
1725 continue;
1726 }
1727
1728 if range_a.end_point.row < range_b.start_point.row {
1729 let end_point = start_point + Point::from_ts_point(range_a.end_point);
1730 let line_end = Point::new(end_point.row, text.line_len(end_point.row));
1731 if end_point.column >= line_end.column {
1732 range_a.end_byte += 1;
1733 range_a.end_point.row += 1;
1734 range_a.end_point.column = 0;
1735 } else {
1736 let newline_offset = text.point_to_offset(line_end);
1737 ranges.insert(
1738 ix,
1739 tree_sitter::Range {
1740 start_byte: newline_offset - start_byte,
1741 end_byte: newline_offset - start_byte + 1,
1742 start_point: (line_end - start_point).to_ts_point(),
1743 end_point: ((line_end - start_point) + Point::new(1, 0)).to_ts_point(),
1744 },
1745 )
1746 }
1747 }
1748 }
1749}
1750
1751impl OwnedSyntaxLayer {
1752 /// Returns the root syntax node for this layer.
1753 pub fn node(&self) -> Node<'_> {
1754 self.tree
1755 .root_node_with_offset(self.offset.0, self.offset.1)
1756 }
1757}
1758
1759impl<'a> SyntaxLayer<'a> {
1760 /// Returns an owned version of this layer.
1761 pub fn to_owned(&self) -> OwnedSyntaxLayer {
1762 OwnedSyntaxLayer {
1763 tree: self.tree.clone(),
1764 offset: self.offset,
1765 language: self.language.clone(),
1766 }
1767 }
1768
1769 /// Returns the root node for this layer.
1770 pub fn node(&self) -> Node<'a> {
1771 self.tree
1772 .root_node_with_offset(self.offset.0, self.offset.1)
1773 }
1774
1775 pub(crate) fn override_id(&self, offset: usize, text: &text::BufferSnapshot) -> Option<u32> {
1776 let text = TextProvider(text.as_rope());
1777 let config = self.language.grammar.as_ref()?.override_config.as_ref()?;
1778
1779 let mut query_cursor = QueryCursorHandle::new();
1780 query_cursor.set_byte_range(offset.saturating_sub(1)..offset.saturating_add(1));
1781 query_cursor.set_containing_byte_range(
1782 offset.saturating_sub(MAX_BYTES_TO_QUERY / 2)
1783 ..offset.saturating_add(MAX_BYTES_TO_QUERY / 2),
1784 );
1785
1786 let mut smallest_match: Option<(u32, Range<usize>)> = None;
1787 let mut matches = query_cursor.matches(&config.query, self.node(), text);
1788 while let Some(mat) = matches.next() {
1789 for capture in mat.captures {
1790 let Some(override_entry) = config.values.get(&capture.index) else {
1791 continue;
1792 };
1793
1794 let range = capture.node.byte_range();
1795 if override_entry.range_is_inclusive {
1796 if offset < range.start || offset > range.end {
1797 continue;
1798 }
1799 } else if offset <= range.start || offset >= range.end {
1800 continue;
1801 }
1802
1803 if let Some((_, smallest_range)) = &smallest_match {
1804 if range.len() < smallest_range.len() {
1805 smallest_match = Some((capture.index, range))
1806 }
1807 continue;
1808 }
1809
1810 smallest_match = Some((capture.index, range));
1811 }
1812 }
1813
1814 smallest_match.map(|(index, _)| index)
1815 }
1816}
1817
1818impl std::ops::Deref for SyntaxMap {
1819 type Target = SyntaxSnapshot;
1820
1821 fn deref(&self) -> &Self::Target {
1822 &self.snapshot
1823 }
1824}
1825
1826impl PartialEq for ParseStep {
1827 fn eq(&self, _: &Self) -> bool {
1828 false
1829 }
1830}
1831
1832impl Eq for ParseStep {}
1833
1834impl PartialOrd for ParseStep {
1835 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1836 Some(self.cmp(other))
1837 }
1838}
1839
1840impl Ord for ParseStep {
1841 fn cmp(&self, other: &Self) -> Ordering {
1842 let range_a = self.range();
1843 let range_b = other.range();
1844 Ord::cmp(&other.depth, &self.depth)
1845 .then_with(|| Ord::cmp(&range_b.start, &range_a.start))
1846 .then_with(|| Ord::cmp(&range_a.end, &range_b.end))
1847 .then_with(|| other.language.id().cmp(&self.language.id()))
1848 }
1849}
1850
1851impl ParseStep {
1852 fn range(&self) -> Range<usize> {
1853 if let ParseMode::Combined {
1854 parent_layer_range, ..
1855 } = &self.mode
1856 {
1857 parent_layer_range.clone()
1858 } else {
1859 let start = self.included_ranges.first().map_or(0, |r| r.start_byte);
1860 let end = self.included_ranges.last().map_or(0, |r| r.end_byte);
1861 start..end
1862 }
1863 }
1864}
1865
1866impl ChangedRegion {
1867 fn cmp(&self, other: &Self, buffer: &BufferSnapshot) -> Ordering {
1868 let range_a = &self.range;
1869 let range_b = &other.range;
1870 Ord::cmp(&self.depth, &other.depth)
1871 .then_with(|| range_a.start.cmp(&range_b.start, buffer))
1872 .then_with(|| range_b.end.cmp(&range_a.end, buffer))
1873 }
1874}
1875
1876impl ChangeRegionSet {
1877 fn start_position(&self) -> ChangeStartPosition {
1878 self.0.first().map_or(
1879 ChangeStartPosition {
1880 depth: usize::MAX,
1881 position: Anchor::MAX,
1882 },
1883 |region| ChangeStartPosition {
1884 depth: region.depth,
1885 position: region.range.start,
1886 },
1887 )
1888 }
1889
1890 fn intersects(&self, layer: &SyntaxLayerEntry, text: &BufferSnapshot) -> bool {
1891 for region in &self.0 {
1892 if region.depth < layer.depth {
1893 continue;
1894 }
1895 if region.depth > layer.depth {
1896 break;
1897 }
1898 if region.range.end.cmp(&layer.range.start, text).is_le() {
1899 continue;
1900 }
1901 if region.range.start.cmp(&layer.range.end, text).is_ge() {
1902 break;
1903 }
1904 return true;
1905 }
1906 false
1907 }
1908
1909 fn insert(&mut self, region: ChangedRegion, text: &BufferSnapshot) {
1910 if let Err(ix) = self.0.binary_search_by(|probe| probe.cmp(®ion, text)) {
1911 self.0.insert(ix, region);
1912 }
1913 }
1914
1915 fn prune(&mut self, summary: SyntaxLayerSummary, text: &BufferSnapshot) -> bool {
1916 let prev_len = self.0.len();
1917 self.0.retain(|region| {
1918 region.depth > summary.max_depth
1919 || (region.depth == summary.max_depth
1920 && region
1921 .range
1922 .end
1923 .cmp(&summary.last_layer_range.start, text)
1924 .is_gt())
1925 });
1926 self.0.len() < prev_len
1927 }
1928}
1929
1930impl Default for SyntaxLayerSummary {
1931 fn default() -> Self {
1932 Self {
1933 max_depth: 0,
1934 min_depth: 0,
1935 range: Anchor::MAX..Anchor::MIN,
1936 last_layer_range: Anchor::MIN..Anchor::MAX,
1937 last_layer_language: None,
1938 contains_unknown_injections: false,
1939 }
1940 }
1941}
1942
1943impl sum_tree::Summary for SyntaxLayerSummary {
1944 type Context<'a> = &'a BufferSnapshot;
1945
1946 fn zero(_cx: &BufferSnapshot) -> Self {
1947 Default::default()
1948 }
1949
1950 fn add_summary(&mut self, other: &Self, buffer: Self::Context<'_>) {
1951 if other.max_depth > self.max_depth {
1952 self.max_depth = other.max_depth;
1953 self.range = other.range.clone();
1954 } else {
1955 if self.range == (Anchor::MAX..Anchor::MAX) {
1956 self.range.start = other.range.start;
1957 }
1958 if other.range.end.cmp(&self.range.end, buffer).is_gt() {
1959 self.range.end = other.range.end;
1960 }
1961 }
1962 self.last_layer_range = other.last_layer_range.clone();
1963 self.last_layer_language = other.last_layer_language;
1964 self.contains_unknown_injections |= other.contains_unknown_injections;
1965 }
1966}
1967
1968impl SeekTarget<'_, SyntaxLayerSummary, SyntaxLayerSummary> for SyntaxLayerPosition {
1969 fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering {
1970 Ord::cmp(&self.depth, &cursor_location.max_depth)
1971 .then_with(|| {
1972 self.range
1973 .start
1974 .cmp(&cursor_location.last_layer_range.start, buffer)
1975 })
1976 .then_with(|| {
1977 cursor_location
1978 .last_layer_range
1979 .end
1980 .cmp(&self.range.end, buffer)
1981 })
1982 .then_with(|| self.language.cmp(&cursor_location.last_layer_language))
1983 }
1984}
1985
1986impl SeekTarget<'_, SyntaxLayerSummary, SyntaxLayerSummary> for ChangeStartPosition {
1987 fn cmp(&self, cursor_location: &SyntaxLayerSummary, text: &BufferSnapshot) -> Ordering {
1988 Ord::cmp(&self.depth, &cursor_location.max_depth)
1989 .then_with(|| self.position.cmp(&cursor_location.range.end, text))
1990 }
1991}
1992
1993impl SeekTarget<'_, SyntaxLayerSummary, SyntaxLayerSummary> for SyntaxLayerPositionBeforeChange {
1994 fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering {
1995 if self.change.cmp(cursor_location, buffer).is_le() {
1996 Ordering::Less
1997 } else {
1998 self.position.cmp(cursor_location, buffer)
1999 }
2000 }
2001}
2002
2003impl sum_tree::Item for SyntaxLayerEntry {
2004 type Summary = SyntaxLayerSummary;
2005
2006 fn summary(&self, _cx: &BufferSnapshot) -> Self::Summary {
2007 SyntaxLayerSummary {
2008 min_depth: self.depth,
2009 max_depth: self.depth,
2010 range: self.range.clone(),
2011 last_layer_range: self.range.clone(),
2012 last_layer_language: self.content.language_id(),
2013 contains_unknown_injections: matches!(self.content, SyntaxLayerContent::Pending { .. }),
2014 }
2015 }
2016}
2017
2018impl std::fmt::Debug for SyntaxLayerEntry {
2019 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2020 f.debug_struct("SyntaxLayer")
2021 .field("depth", &self.depth)
2022 .field("range", &self.range)
2023 .field("tree", &self.content.tree())
2024 .finish()
2025 }
2026}
2027
2028impl<'a> tree_sitter::TextProvider<&'a [u8]> for TextProvider<'a> {
2029 type I = ByteChunks<'a>;
2030
2031 fn text(&mut self, node: tree_sitter::Node) -> Self::I {
2032 ByteChunks(self.0.chunks_in_range(node.byte_range()))
2033 }
2034}
2035
2036impl<'a> Iterator for ByteChunks<'a> {
2037 type Item = &'a [u8];
2038
2039 fn next(&mut self) -> Option<Self::Item> {
2040 self.0.next().map(str::as_bytes)
2041 }
2042}
2043
2044impl QueryCursorHandle {
2045 pub fn new() -> Self {
2046 let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_default();
2047 cursor.set_match_limit(64);
2048 QueryCursorHandle(Some(cursor))
2049 }
2050}
2051
2052impl Deref for QueryCursorHandle {
2053 type Target = QueryCursor;
2054
2055 fn deref(&self) -> &Self::Target {
2056 self.0.as_ref().unwrap()
2057 }
2058}
2059
2060impl DerefMut for QueryCursorHandle {
2061 fn deref_mut(&mut self) -> &mut Self::Target {
2062 self.0.as_mut().unwrap()
2063 }
2064}
2065
2066impl Drop for QueryCursorHandle {
2067 fn drop(&mut self) {
2068 let mut cursor = self.0.take().unwrap();
2069 cursor.set_byte_range(0..usize::MAX);
2070 cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point());
2071 cursor.set_containing_byte_range(0..usize::MAX);
2072 cursor.set_containing_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point());
2073 QUERY_CURSORS.lock().push(cursor)
2074 }
2075}
2076
2077pub trait ToTreeSitterPoint {
2078 fn to_ts_point(self) -> tree_sitter::Point;
2079 fn from_ts_point(point: tree_sitter::Point) -> Self;
2080}
2081
2082impl ToTreeSitterPoint for Point {
2083 fn to_ts_point(self) -> tree_sitter::Point {
2084 tree_sitter::Point::new(self.row as usize, self.column as usize)
2085 }
2086
2087 fn from_ts_point(point: tree_sitter::Point) -> Self {
2088 Point::new(point.row as u32, point.column as u32)
2089 }
2090}
2091
2092struct LogIncludedRanges<'a>(&'a [tree_sitter::Range]);
2093struct LogPoint(Point);
2094struct LogAnchorRange<'a>(&'a Range<Anchor>, &'a text::BufferSnapshot);
2095struct LogOffsetRanges<'a>(&'a [Range<usize>], &'a text::BufferSnapshot);
2096struct LogChangedRegions<'a>(&'a ChangeRegionSet, &'a text::BufferSnapshot);
2097
2098impl fmt::Debug for LogIncludedRanges<'_> {
2099 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2100 f.debug_list()
2101 .entries(self.0.iter().map(|range| {
2102 let start = range.start_point;
2103 let end = range.end_point;
2104 (start.row, start.column)..(end.row, end.column)
2105 }))
2106 .finish()
2107 }
2108}
2109
2110impl fmt::Debug for LogAnchorRange<'_> {
2111 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2112 let range = self.0.to_point(self.1);
2113 (LogPoint(range.start)..LogPoint(range.end)).fmt(f)
2114 }
2115}
2116
2117impl fmt::Debug for LogOffsetRanges<'_> {
2118 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2119 f.debug_list()
2120 .entries(self.0.iter().map(|range| {
2121 LogPoint(range.start.to_point(self.1))..LogPoint(range.end.to_point(self.1))
2122 }))
2123 .finish()
2124 }
2125}
2126
2127impl fmt::Debug for LogChangedRegions<'_> {
2128 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2129 f.debug_list()
2130 .entries(
2131 self.0
2132 .0
2133 .iter()
2134 .map(|region| LogAnchorRange(®ion.range, self.1)),
2135 )
2136 .finish()
2137 }
2138}
2139
2140impl fmt::Debug for LogPoint {
2141 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2142 (self.0.row, self.0.column).fmt(f)
2143 }
2144}