1#[cfg(test)]
2mod syntax_map_tests;
3
4use crate::{
5 Grammar, InjectionConfig, Language, LanguageId, LanguageRegistry, QUERY_CURSORS, with_parser,
6};
7use collections::HashMap;
8use futures::FutureExt;
9use gpui::SharedString;
10use std::{
11 borrow::Cow,
12 cmp::{self, Ordering, Reverse},
13 collections::BinaryHeap,
14 fmt, iter,
15 ops::{ControlFlow, Deref, DerefMut, Range},
16 sync::{Arc, LazyLock},
17 time::{Duration, Instant},
18};
19use streaming_iterator::StreamingIterator;
20use sum_tree::{Bias, Dimensions, SeekTarget, SumTree};
21use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint};
22use tree_sitter::{
23 Node, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatch, QueryMatches,
24 QueryPredicateArg,
25};
26
27pub const MAX_BYTES_TO_QUERY: usize = 16 * 1024;
28
29pub struct SyntaxMap {
30 snapshot: SyntaxSnapshot,
31 language_registry: Option<Arc<LanguageRegistry>>,
32}
33
34#[derive(Clone)]
35pub struct SyntaxSnapshot {
36 layers: SumTree<SyntaxLayerEntry>,
37 parsed_version: clock::Global,
38 interpolated_version: clock::Global,
39 language_registry_version: usize,
40 update_count: usize,
41}
42
43// Dropping deep treesitter Trees can be quite slow due to deallocating lots of memory.
44// To avoid blocking the main thread, we offload the drop operation to a background thread.
45impl Drop for SyntaxSnapshot {
46 fn drop(&mut self) {
47 static DROP_TX: LazyLock<std::sync::mpsc::Sender<SumTree<SyntaxLayerEntry>>> =
48 LazyLock::new(|| {
49 let (tx, rx) = std::sync::mpsc::channel();
50 std::thread::Builder::new()
51 .name("SyntaxSnapshot::drop".into())
52 .spawn(move || while let Ok(_) = rx.recv() {})
53 .expect("failed to spawn drop thread");
54 tx
55 });
56 // This does allocate a new Arc, but it's cheap and avoids blocking the main thread without needing to use an `Option` or `MaybeUninit`.
57 let _ = DROP_TX.send(std::mem::replace(
58 &mut self.layers,
59 SumTree::from_summary(Default::default()),
60 ));
61 }
62}
63
64#[derive(Default)]
65pub struct SyntaxMapCaptures<'a> {
66 layers: Vec<SyntaxMapCapturesLayer<'a>>,
67 active_layer_count: usize,
68 grammars: Vec<&'a Grammar>,
69}
70
71#[derive(Default)]
72pub struct SyntaxMapMatches<'a> {
73 layers: Vec<SyntaxMapMatchesLayer<'a>>,
74 active_layer_count: usize,
75 grammars: Vec<&'a Grammar>,
76}
77
78#[derive(Debug)]
79pub struct SyntaxMapCapture<'a> {
80 pub node: Node<'a>,
81 pub index: u32,
82 pub grammar_index: usize,
83}
84
85#[derive(Debug)]
86pub struct SyntaxMapMatch<'a> {
87 pub language: Arc<Language>,
88 pub depth: usize,
89 pub pattern_index: usize,
90 pub captures: &'a [QueryCapture<'a>],
91 pub grammar_index: usize,
92}
93
94struct SyntaxMapCapturesLayer<'a> {
95 depth: usize,
96 captures: QueryCaptures<'a, 'a, TextProvider<'a>, &'a [u8]>,
97 next_capture: Option<QueryCapture<'a>>,
98 grammar_index: usize,
99 _query_cursor: QueryCursorHandle,
100}
101
102struct SyntaxMapMatchesLayer<'a> {
103 language: Arc<Language>,
104 depth: usize,
105 next_pattern_index: usize,
106 next_captures: Vec<QueryCapture<'a>>,
107 has_next: bool,
108 matches: QueryMatches<'a, 'a, TextProvider<'a>, &'a [u8]>,
109 query: &'a Query,
110 grammar_index: usize,
111 _query_cursor: QueryCursorHandle,
112}
113
114#[derive(Clone)]
115struct SyntaxLayerEntry {
116 depth: usize,
117 range: Range<Anchor>,
118 content: SyntaxLayerContent,
119}
120
121#[derive(Clone)]
122enum SyntaxLayerContent {
123 Parsed {
124 tree: tree_sitter::Tree,
125 language: Arc<Language>,
126 included_sub_ranges: Option<Vec<Range<Anchor>>>,
127 },
128 Pending {
129 language_name: Arc<str>,
130 },
131}
132
133impl SyntaxLayerContent {
134 fn language_id(&self) -> Option<LanguageId> {
135 match self {
136 SyntaxLayerContent::Parsed { language, .. } => Some(language.id),
137 SyntaxLayerContent::Pending { .. } => None,
138 }
139 }
140
141 fn tree(&self) -> Option<&tree_sitter::Tree> {
142 match self {
143 SyntaxLayerContent::Parsed { tree, .. } => Some(tree),
144 SyntaxLayerContent::Pending { .. } => None,
145 }
146 }
147}
148
149/// A layer of syntax highlighting, corresponding to a single syntax
150/// tree in a particular language.
151#[derive(Debug)]
152pub struct SyntaxLayer<'a> {
153 /// The language for this layer.
154 pub language: &'a Arc<Language>,
155 pub included_sub_ranges: Option<&'a [Range<Anchor>]>,
156 pub(crate) depth: usize,
157 tree: &'a tree_sitter::Tree,
158 pub(crate) offset: (usize, tree_sitter::Point),
159}
160
161/// A layer of syntax highlighting. Like [SyntaxLayer], but holding
162/// owned data instead of references.
163#[derive(Clone)]
164pub struct OwnedSyntaxLayer {
165 /// The language for this layer.
166 pub language: Arc<Language>,
167 tree: tree_sitter::Tree,
168 pub offset: (usize, tree_sitter::Point),
169}
170
171#[derive(Debug, Clone)]
172struct SyntaxLayerSummary {
173 min_depth: usize,
174 max_depth: usize,
175 range: Range<Anchor>,
176 last_layer_range: Range<Anchor>,
177 last_layer_language: Option<LanguageId>,
178 contains_unknown_injections: bool,
179}
180
181#[derive(Clone, Debug)]
182struct SyntaxLayerPosition {
183 depth: usize,
184 range: Range<Anchor>,
185 language: Option<LanguageId>,
186}
187
188#[derive(Clone, Debug)]
189struct ChangeStartPosition {
190 depth: usize,
191 position: Anchor,
192}
193
194#[derive(Clone, Debug)]
195struct SyntaxLayerPositionBeforeChange {
196 position: SyntaxLayerPosition,
197 change: ChangeStartPosition,
198}
199
200struct ParseStep {
201 depth: usize,
202 language: ParseStepLanguage,
203 range: Range<Anchor>,
204 included_ranges: Vec<tree_sitter::Range>,
205 mode: ParseMode,
206}
207
208#[derive(Debug)]
209enum ParseStepLanguage {
210 Loaded { language: Arc<Language> },
211 Pending { name: Arc<str> },
212}
213
214impl ParseStepLanguage {
215 fn name(&self) -> SharedString {
216 match self {
217 ParseStepLanguage::Loaded { language } => language.name().0,
218 ParseStepLanguage::Pending { name } => name.into(),
219 }
220 }
221
222 fn id(&self) -> Option<LanguageId> {
223 match self {
224 ParseStepLanguage::Loaded { language } => Some(language.id),
225 ParseStepLanguage::Pending { .. } => None,
226 }
227 }
228}
229
230enum ParseMode {
231 Single,
232 Combined {
233 parent_layer_range: Range<usize>,
234 parent_layer_changed_ranges: Vec<Range<usize>>,
235 },
236}
237
238#[derive(Debug, PartialEq, Eq)]
239struct ChangedRegion {
240 depth: usize,
241 range: Range<Anchor>,
242}
243
244#[derive(Default)]
245struct ChangeRegionSet(Vec<ChangedRegion>);
246
247struct TextProvider<'a>(&'a Rope);
248
249struct ByteChunks<'a>(text::Chunks<'a>);
250
251pub(crate) struct QueryCursorHandle(Option<QueryCursor>);
252
253impl SyntaxMap {
254 pub fn new(text: &BufferSnapshot) -> Self {
255 Self {
256 snapshot: SyntaxSnapshot::new(text),
257 language_registry: None,
258 }
259 }
260
261 pub fn set_language_registry(&mut self, registry: Arc<LanguageRegistry>) {
262 self.language_registry = Some(registry);
263 }
264
265 pub fn snapshot(&self) -> SyntaxSnapshot {
266 self.snapshot.clone()
267 }
268
269 pub fn language_registry(&self) -> Option<Arc<LanguageRegistry>> {
270 self.language_registry.clone()
271 }
272
273 pub fn interpolate(&mut self, text: &BufferSnapshot) {
274 self.snapshot.interpolate(text);
275 }
276
277 #[cfg(test)]
278 pub fn reparse(&mut self, language: Arc<Language>, text: &BufferSnapshot) {
279 self.snapshot
280 .reparse(text, self.language_registry.clone(), language);
281 }
282
283 pub fn did_parse(&mut self, snapshot: SyntaxSnapshot) {
284 self.snapshot = snapshot;
285 }
286
287 pub fn clear(&mut self, text: &BufferSnapshot) {
288 let update_count = self.snapshot.update_count + 1;
289 self.snapshot = SyntaxSnapshot::new(text);
290 self.snapshot.update_count = update_count;
291 }
292}
293
294impl SyntaxSnapshot {
295 fn new(text: &BufferSnapshot) -> Self {
296 Self {
297 layers: SumTree::new(text),
298 parsed_version: clock::Global::default(),
299 interpolated_version: clock::Global::default(),
300 language_registry_version: 0,
301 update_count: 0,
302 }
303 }
304
305 pub fn is_empty(&self) -> bool {
306 self.layers.is_empty()
307 }
308
309 pub fn root_language(&self) -> Option<Arc<Language>> {
310 match &self.layers.first()?.content {
311 SyntaxLayerContent::Parsed { language, .. } => Some(language.clone()),
312 SyntaxLayerContent::Pending { .. } => None,
313 }
314 }
315
316 pub fn update_count(&self) -> usize {
317 self.update_count
318 }
319
320 #[ztracing::instrument(skip_all)]
321 pub fn interpolate(&mut self, text: &BufferSnapshot) {
322 let edits = text
323 .anchored_edits_since::<Dimensions<usize, Point>>(&self.interpolated_version)
324 .collect::<Vec<_>>();
325 self.interpolated_version = text.version().clone();
326
327 if edits.is_empty() {
328 return;
329 }
330
331 let mut layers = SumTree::new(text);
332 let mut first_edit_ix_for_depth = 0;
333 let mut prev_depth = 0;
334 let mut cursor = self.layers.cursor::<SyntaxLayerSummary>(text);
335 cursor.next();
336
337 'outer: loop {
338 let depth = cursor.end().max_depth;
339 if depth > prev_depth {
340 first_edit_ix_for_depth = 0;
341 prev_depth = depth;
342 }
343
344 // Preserve any layers at this depth that precede the first edit.
345 if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) {
346 let target = ChangeStartPosition {
347 depth,
348 position: edit_range.start,
349 };
350 if target.cmp(cursor.start(), text).is_gt() {
351 let slice = cursor.slice(&target, Bias::Left);
352 layers.append(slice, text);
353 }
354 }
355 // If this layer follows all of the edits, then preserve it and any
356 // subsequent layers at this same depth.
357 else if cursor.item().is_some() {
358 let slice = cursor.slice(
359 &SyntaxLayerPosition {
360 depth: depth + 1,
361 range: Anchor::min_max_range_for_buffer(text.remote_id()),
362 language: None,
363 },
364 Bias::Left,
365 );
366 layers.append(slice, text);
367 continue;
368 };
369
370 let Some(layer) = cursor.item() else { break };
371 let Dimensions(start_byte, start_point, _) =
372 layer.range.start.summary::<Dimensions<usize, Point>>(text);
373
374 // Ignore edits that end before the start of this layer, and don't consider them
375 // for any subsequent layers at this same depth.
376 loop {
377 let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) else {
378 continue 'outer;
379 };
380 if edit_range.end.cmp(&layer.range.start, text).is_le() {
381 first_edit_ix_for_depth += 1;
382 } else {
383 break;
384 }
385 }
386
387 let mut layer = layer.clone();
388 if let SyntaxLayerContent::Parsed { tree, .. } = &mut layer.content {
389 for (edit, edit_range) in &edits[first_edit_ix_for_depth..] {
390 // Ignore any edits that follow this layer.
391 if edit_range.start.cmp(&layer.range.end, text).is_ge() {
392 break;
393 }
394
395 // Apply any edits that intersect this layer to the layer's syntax tree.
396 let tree_edit = if edit_range.start.cmp(&layer.range.start, text).is_ge() {
397 tree_sitter::InputEdit {
398 start_byte: edit.new.start.0 - start_byte,
399 old_end_byte: edit.new.start.0 - start_byte
400 + (edit.old.end.0 - edit.old.start.0),
401 new_end_byte: edit.new.end.0 - start_byte,
402 start_position: (edit.new.start.1 - start_point).to_ts_point(),
403 old_end_position: (edit.new.start.1 - start_point
404 + (edit.old.end.1 - edit.old.start.1))
405 .to_ts_point(),
406 new_end_position: (edit.new.end.1 - start_point).to_ts_point(),
407 }
408 } else {
409 let node = tree.root_node();
410 tree_sitter::InputEdit {
411 start_byte: 0,
412 old_end_byte: node.end_byte(),
413 new_end_byte: 0,
414 start_position: Default::default(),
415 old_end_position: node.end_position(),
416 new_end_position: Default::default(),
417 }
418 };
419
420 tree.edit(&tree_edit);
421 }
422
423 debug_assert!(
424 tree.root_node().end_byte() <= text.len(),
425 "tree's size {}, is larger than text size {}",
426 tree.root_node().end_byte(),
427 text.len(),
428 );
429 }
430
431 layers.push(layer, text);
432 cursor.next();
433 }
434
435 layers.append(cursor.suffix(), text);
436 drop(cursor);
437 self.layers = layers;
438 }
439
440 #[ztracing::instrument(skip_all)]
441 pub fn reparse(
442 &mut self,
443 text: &BufferSnapshot,
444 registry: Option<Arc<LanguageRegistry>>,
445 root_language: Arc<Language>,
446 ) {
447 self.reparse_(text, registry, root_language, None).ok();
448 }
449
450 #[ztracing::instrument(skip_all)]
451 pub fn reparse_with_timeout(
452 &mut self,
453 text: &BufferSnapshot,
454 registry: Option<Arc<LanguageRegistry>>,
455 root_language: Arc<Language>,
456 budget: Duration,
457 ) -> Result<(), ParseTimeout> {
458 self.reparse_(text, registry, root_language, Some(budget))
459 }
460
461 #[ztracing::instrument(skip_all, fields(lang = root_language.config.name.0.as_str()))]
462 fn reparse_(
463 &mut self,
464 text: &BufferSnapshot,
465 registry: Option<Arc<LanguageRegistry>>,
466 root_language: Arc<Language>,
467 mut budget: Option<Duration>,
468 ) -> Result<(), ParseTimeout> {
469 let budget = &mut budget;
470 let edit_ranges = text
471 .edits_since::<usize>(&self.parsed_version)
472 .map(|edit| edit.new)
473 .collect::<Vec<_>>();
474 self.reparse_with_ranges(
475 text,
476 root_language.clone(),
477 edit_ranges,
478 registry.as_ref(),
479 budget,
480 )?;
481
482 if let Some(registry) = registry
483 && registry.version() != self.language_registry_version
484 {
485 let mut resolved_injection_ranges = Vec::new();
486 let mut cursor = self
487 .layers
488 .filter::<_, ()>(text, |summary| summary.contains_unknown_injections);
489 cursor.next();
490 while let Some(layer) = cursor.item() {
491 let SyntaxLayerContent::Pending { language_name } = &layer.content else {
492 unreachable!()
493 };
494 if registry
495 .language_for_name_or_extension(language_name)
496 .now_or_never()
497 .and_then(|language| language.ok())
498 .is_some()
499 {
500 let range = layer.range.to_offset(text);
501 log::trace!("reparse range {range:?} for language {language_name:?}");
502 resolved_injection_ranges.push(range);
503 }
504
505 cursor.next();
506 }
507 drop(cursor);
508
509 if !resolved_injection_ranges.is_empty() {
510 self.reparse_with_ranges(
511 text,
512 root_language,
513 resolved_injection_ranges,
514 Some(®istry),
515 budget,
516 )?;
517 }
518 self.language_registry_version = registry.version();
519 }
520
521 self.update_count += 1;
522 Ok(())
523 }
524
525 #[ztracing::instrument(skip_all)]
526 fn reparse_with_ranges(
527 &mut self,
528 text: &BufferSnapshot,
529 root_language: Arc<Language>,
530 invalidated_ranges: Vec<Range<usize>>,
531 registry: Option<&Arc<LanguageRegistry>>,
532 budget: &mut Option<Duration>,
533 ) -> Result<(), ParseTimeout> {
534 log::trace!(
535 "reparse. invalidated ranges:{:?}",
536 LogOffsetRanges(&invalidated_ranges, text),
537 );
538
539 let max_depth = self.layers.summary().max_depth;
540 let mut cursor = self.layers.cursor::<SyntaxLayerSummary>(text);
541 cursor.next();
542 let mut layers = SumTree::new(text);
543
544 let mut changed_regions = ChangeRegionSet::default();
545 let mut queue = BinaryHeap::new();
546 let mut combined_injection_ranges = HashMap::default();
547 queue.push(ParseStep {
548 depth: 0,
549 language: ParseStepLanguage::Loaded {
550 language: root_language,
551 },
552 included_ranges: vec![tree_sitter::Range {
553 start_byte: 0,
554 end_byte: text.len(),
555 start_point: Point::zero().to_ts_point(),
556 end_point: text.max_point().to_ts_point(),
557 }],
558 range: Anchor::min_max_range_for_buffer(text.remote_id()),
559 mode: ParseMode::Single,
560 });
561
562 loop {
563 let step = queue.pop();
564 let position = if let Some(step) = &step {
565 log::trace!(
566 "parse step depth:{}, range:{:?}, language:{} ({:?})",
567 step.depth,
568 LogAnchorRange(&step.range, text),
569 step.language.name(),
570 step.language.id(),
571 );
572 SyntaxLayerPosition {
573 depth: step.depth,
574 range: step.range.clone(),
575 language: step.language.id(),
576 }
577 } else {
578 SyntaxLayerPosition {
579 depth: max_depth + 1,
580 range: Anchor::min_max_range_for_buffer(text.remote_id()),
581 language: None,
582 }
583 };
584
585 let mut done = cursor.item().is_none();
586 while !done && position.cmp(&cursor.end(), text).is_gt() {
587 done = true;
588
589 let bounded_position = SyntaxLayerPositionBeforeChange {
590 position: position.clone(),
591 change: changed_regions.start_position(),
592 };
593 if bounded_position.cmp(cursor.start(), text).is_gt() {
594 let slice = cursor.slice(&bounded_position, Bias::Left);
595 if !slice.is_empty() {
596 layers.append(slice, text);
597 if changed_regions.prune(cursor.end(), text) {
598 done = false;
599 }
600 }
601 }
602
603 while position.cmp(&cursor.end(), text).is_gt() {
604 let Some(layer) = cursor.item() else { break };
605
606 if changed_regions.intersects(layer, text) {
607 if let SyntaxLayerContent::Parsed { language, .. } = &layer.content {
608 log::trace!(
609 "discard layer. language:{}, range:{:?}. changed_regions:{:?}",
610 language.name(),
611 LogAnchorRange(&layer.range, text),
612 LogChangedRegions(&changed_regions, text),
613 );
614 }
615
616 changed_regions.insert(
617 ChangedRegion {
618 depth: layer.depth + 1,
619 range: layer.range.clone(),
620 },
621 text,
622 );
623 } else {
624 layers.push(layer.clone(), text);
625 }
626
627 cursor.next();
628 if changed_regions.prune(cursor.end(), text) {
629 done = false;
630 }
631 }
632 }
633
634 let Some(step) = step else { break };
635 let Dimensions(step_start_byte, step_start_point, _) =
636 step.range.start.summary::<Dimensions<usize, Point>>(text);
637 let step_end_byte = step.range.end.to_offset(text);
638
639 let mut old_layer = cursor.item();
640 if let Some(layer) = old_layer {
641 if layer.range.to_offset(text) == (step_start_byte..step_end_byte)
642 && layer.content.language_id() == step.language.id()
643 {
644 cursor.next();
645 } else {
646 old_layer = None;
647 }
648 }
649
650 let content = match step.language {
651 ParseStepLanguage::Loaded { language } => {
652 let Some(grammar) = language.grammar() else {
653 continue;
654 };
655 let tree;
656 let changed_ranges;
657
658 let mut included_ranges = step.included_ranges;
659 let is_combined = matches!(step.mode, ParseMode::Combined { .. });
660
661 for range in &mut included_ranges {
662 range.start_byte -= step_start_byte;
663 range.end_byte -= step_start_byte;
664 range.start_point = (Point::from_ts_point(range.start_point)
665 - step_start_point)
666 .to_ts_point();
667 range.end_point = (Point::from_ts_point(range.end_point)
668 - step_start_point)
669 .to_ts_point();
670 }
671
672 if let Some((SyntaxLayerContent::Parsed { tree: old_tree, .. }, layer_range)) =
673 old_layer.map(|layer| (&layer.content, layer.range.clone()))
674 {
675 log::trace!(
676 "existing layer. language:{}, range:{:?}, included_ranges:{:?}",
677 language.name(),
678 LogAnchorRange(&layer_range, text),
679 LogIncludedRanges(&old_tree.included_ranges())
680 );
681
682 if let ParseMode::Combined {
683 mut parent_layer_changed_ranges,
684 ..
685 } = step.mode
686 {
687 for range in &mut parent_layer_changed_ranges {
688 range.start = range.start.saturating_sub(step_start_byte);
689 range.end = range.end.saturating_sub(step_start_byte);
690 }
691
692 let changed_indices;
693 (included_ranges, changed_indices) = splice_included_ranges(
694 old_tree.included_ranges(),
695 &parent_layer_changed_ranges,
696 &included_ranges,
697 );
698 insert_newlines_between_ranges(
699 changed_indices,
700 &mut included_ranges,
701 text,
702 step_start_byte,
703 step_start_point,
704 );
705 }
706
707 if included_ranges.is_empty() {
708 included_ranges.push(tree_sitter::Range {
709 start_byte: 0,
710 end_byte: 0,
711 start_point: Default::default(),
712 end_point: Default::default(),
713 });
714 }
715
716 log::trace!(
717 "update layer. language:{}, range:{:?}, included_ranges:{:?}",
718 language.name(),
719 LogAnchorRange(&step.range, text),
720 LogIncludedRanges(&included_ranges),
721 );
722
723 let result = parse_text(
724 grammar,
725 text.as_rope(),
726 step_start_byte,
727 &included_ranges,
728 Some(old_tree),
729 budget,
730 );
731 match result {
732 Ok(t) => tree = t,
733 Err(e) if e.downcast_ref::<ParseTimeout>().is_some() => {
734 return Err(ParseTimeout);
735 }
736 Err(e) => {
737 log::error!("error parsing text: {e:?}");
738 continue;
739 }
740 };
741
742 changed_ranges = join_ranges(
743 invalidated_ranges
744 .iter()
745 .filter(|&range| {
746 range.start <= step_end_byte && range.end >= step_start_byte
747 })
748 .cloned(),
749 old_tree.changed_ranges(&tree).map(|r| {
750 step_start_byte + r.start_byte..step_start_byte + r.end_byte
751 }),
752 );
753 } else {
754 if matches!(step.mode, ParseMode::Combined { .. }) {
755 insert_newlines_between_ranges(
756 0..included_ranges.len(),
757 &mut included_ranges,
758 text,
759 step_start_byte,
760 step_start_point,
761 );
762 }
763
764 if included_ranges.is_empty() {
765 included_ranges.push(tree_sitter::Range {
766 start_byte: 0,
767 end_byte: 0,
768 start_point: Default::default(),
769 end_point: Default::default(),
770 });
771 }
772
773 log::trace!(
774 "create layer. language:{}, range:{:?}, included_ranges:{:?}",
775 language.name(),
776 LogAnchorRange(&step.range, text),
777 LogIncludedRanges(&included_ranges),
778 );
779
780 let result = parse_text(
781 grammar,
782 text.as_rope(),
783 step_start_byte,
784 &included_ranges,
785 None,
786 budget,
787 );
788 match result {
789 Ok(t) => tree = t,
790 Err(e) if e.downcast_ref::<ParseTimeout>().is_some() => {
791 return Err(ParseTimeout);
792 }
793 Err(e) => {
794 log::error!("error parsing text: {e:?}");
795 continue;
796 }
797 };
798 changed_ranges = vec![step_start_byte..step_end_byte];
799 }
800
801 if let (Some((config, registry)), false) = (
802 grammar.injection_config.as_ref().zip(registry.as_ref()),
803 changed_ranges.is_empty(),
804 ) {
805 // Handle invalidation and reactivation of injections on comment update
806 let mut expanded_ranges: Vec<_> = changed_ranges
807 .iter()
808 .map(|range| {
809 let start_row = range.start.to_point(text).row.saturating_sub(1);
810 let end_row = range.end.to_point(text).row.saturating_add(2);
811 text.point_to_offset(Point::new(start_row, 0))
812 ..text.point_to_offset(Point::new(end_row, 0)).min(text.len())
813 })
814 .collect();
815 expanded_ranges.sort_unstable_by_key(|r| r.start);
816 expanded_ranges.dedup_by(|b, a| {
817 let overlaps = b.start <= a.end;
818 if overlaps {
819 a.end = a.end.max(b.end);
820 }
821 overlaps
822 });
823
824 for range in &expanded_ranges {
825 changed_regions.insert(
826 ChangedRegion {
827 depth: step.depth + 1,
828 range: text.anchor_before(range.start)
829 ..text.anchor_after(range.end),
830 },
831 text,
832 );
833 }
834 get_injections(
835 config,
836 text,
837 step.range.clone(),
838 tree.root_node_with_offset(
839 step_start_byte,
840 step_start_point.to_ts_point(),
841 ),
842 registry,
843 step.depth + 1,
844 &expanded_ranges,
845 &mut combined_injection_ranges,
846 &mut queue,
847 );
848 }
849
850 let included_sub_ranges: Option<Vec<Range<Anchor>>> = if is_combined {
851 Some(
852 included_ranges
853 .into_iter()
854 .filter(|r| r.start_byte < r.end_byte)
855 .map(|r| {
856 text.anchor_before(r.start_byte + step_start_byte)
857 ..text.anchor_after(r.end_byte + step_start_byte)
858 })
859 .collect(),
860 )
861 } else {
862 None
863 };
864 SyntaxLayerContent::Parsed {
865 tree,
866 language,
867 included_sub_ranges,
868 }
869 }
870 ParseStepLanguage::Pending { name } => SyntaxLayerContent::Pending {
871 language_name: name,
872 },
873 };
874
875 layers.push(
876 SyntaxLayerEntry {
877 depth: step.depth,
878 range: step.range,
879 content,
880 },
881 text,
882 );
883 }
884
885 drop(cursor);
886 self.layers = layers;
887 self.interpolated_version = text.version.clone();
888 self.parsed_version = text.version.clone();
889 #[cfg(debug_assertions)]
890 self.check_invariants(text);
891 Ok(())
892 }
893
894 #[cfg(debug_assertions)]
895 fn check_invariants(&self, text: &BufferSnapshot) {
896 let mut max_depth = 0;
897 let mut prev_layer: Option<(Range<Anchor>, Option<LanguageId>)> = None;
898 for layer in self.layers.iter() {
899 match Ord::cmp(&layer.depth, &max_depth) {
900 Ordering::Less => {
901 panic!("layers out of order")
902 }
903 Ordering::Equal => {
904 if let Some((prev_range, prev_language_id)) = prev_layer {
905 match layer.range.start.cmp(&prev_range.start, text) {
906 Ordering::Less => panic!("layers out of order"),
907 Ordering::Equal => match layer.range.end.cmp(&prev_range.end, text) {
908 Ordering::Less => panic!("layers out of order"),
909 Ordering::Equal => {
910 if layer.content.language_id() < prev_language_id {
911 panic!("layers out of order")
912 }
913 }
914 Ordering::Greater => {}
915 },
916 Ordering::Greater => {}
917 }
918 }
919 prev_layer = Some((layer.range.clone(), layer.content.language_id()));
920 }
921 Ordering::Greater => {
922 prev_layer = None;
923 }
924 }
925
926 max_depth = layer.depth;
927 }
928 }
929
930 pub fn single_tree_captures<'a>(
931 range: Range<usize>,
932 text: &'a Rope,
933 tree: &'a tree_sitter::Tree,
934 language: &'a Arc<Language>,
935 query: fn(&Grammar) -> Option<&Query>,
936 ) -> SyntaxMapCaptures<'a> {
937 SyntaxMapCaptures::new(
938 range,
939 text,
940 [SyntaxLayer {
941 language,
942 tree,
943 included_sub_ranges: None,
944 depth: 0,
945 offset: (0, tree_sitter::Point::new(0, 0)),
946 }]
947 .into_iter(),
948 query,
949 )
950 }
951
952 pub fn captures<'a>(
953 &'a self,
954 range: Range<usize>,
955 buffer: &'a BufferSnapshot,
956 query: fn(&Grammar) -> Option<&Query>,
957 ) -> SyntaxMapCaptures<'a> {
958 SyntaxMapCaptures::new(
959 range.clone(),
960 buffer.as_rope(),
961 self.layers_for_range(range, buffer, true),
962 query,
963 )
964 }
965
966 pub fn matches<'a>(
967 &'a self,
968 range: Range<usize>,
969 buffer: &'a BufferSnapshot,
970 query: fn(&Grammar) -> Option<&Query>,
971 ) -> SyntaxMapMatches<'a> {
972 SyntaxMapMatches::new(
973 range.clone(),
974 buffer.as_rope(),
975 self.layers_for_range(range, buffer, true),
976 query,
977 TreeSitterOptions::default(),
978 )
979 }
980
981 pub fn matches_with_options<'a>(
982 &'a self,
983 range: Range<usize>,
984 buffer: &'a BufferSnapshot,
985 options: TreeSitterOptions,
986 query: fn(&Grammar) -> Option<&Query>,
987 ) -> SyntaxMapMatches<'a> {
988 SyntaxMapMatches::new(
989 range.clone(),
990 buffer.as_rope(),
991 self.layers_for_range(range, buffer, true),
992 query,
993 options,
994 )
995 }
996
997 pub fn languages<'a>(
998 &'a self,
999 buffer: &'a BufferSnapshot,
1000 include_hidden: bool,
1001 ) -> impl Iterator<Item = &'a Arc<Language>> {
1002 let mut cursor = self.layers.cursor::<()>(buffer);
1003 cursor.next();
1004 iter::from_fn(move || {
1005 while let Some(layer) = cursor.item() {
1006 let mut info = None;
1007 if let SyntaxLayerContent::Parsed { language, .. } = &layer.content {
1008 if include_hidden || !language.config.hidden {
1009 info = Some(language);
1010 }
1011 }
1012 cursor.next();
1013 if info.is_some() {
1014 return info;
1015 }
1016 }
1017 None
1018 })
1019 }
1020
1021 #[cfg(test)]
1022 pub fn layers<'a>(&'a self, buffer: &'a BufferSnapshot) -> Vec<SyntaxLayer<'a>> {
1023 self.layers_for_range(0..buffer.len(), buffer, true)
1024 .collect()
1025 }
1026
1027 pub fn layers_for_range<'a, T: ToOffset>(
1028 &'a self,
1029 range: Range<T>,
1030 buffer: &'a BufferSnapshot,
1031 include_hidden: bool,
1032 ) -> impl 'a + Iterator<Item = SyntaxLayer<'a>> {
1033 let start_offset = range.start.to_offset(buffer);
1034 let end_offset = range.end.to_offset(buffer);
1035 let start = buffer.anchor_before(start_offset);
1036 let end = buffer.anchor_after(end_offset);
1037
1038 let mut cursor = self.layers.filter::<_, ()>(buffer, move |summary| {
1039 if summary.max_depth > summary.min_depth {
1040 true
1041 } else {
1042 let is_before_start = summary.range.end.cmp(&start, buffer).is_lt();
1043 let is_after_end = summary.range.start.cmp(&end, buffer).is_gt();
1044 !is_before_start && !is_after_end
1045 }
1046 });
1047
1048 cursor.next();
1049 iter::from_fn(move || {
1050 while let Some(layer) = cursor.item() {
1051 let mut info = None;
1052 if let SyntaxLayerContent::Parsed {
1053 tree,
1054 language,
1055 included_sub_ranges,
1056 } = &layer.content
1057 {
1058 let layer_start_offset = layer.range.start.to_offset(buffer);
1059 let layer_start_point = layer.range.start.to_point(buffer).to_ts_point();
1060 if include_hidden || !language.config.hidden {
1061 info = Some(SyntaxLayer {
1062 tree,
1063 language,
1064 included_sub_ranges: included_sub_ranges.as_deref(),
1065 depth: layer.depth,
1066 offset: (layer_start_offset, layer_start_point),
1067 });
1068 }
1069 }
1070 cursor.next();
1071 if info.is_some() {
1072 return info;
1073 }
1074 }
1075 None
1076 })
1077 }
1078
1079 pub fn contains_unknown_injections(&self) -> bool {
1080 self.layers.summary().contains_unknown_injections
1081 }
1082
1083 pub fn language_registry_version(&self) -> usize {
1084 self.language_registry_version
1085 }
1086}
1087
1088impl<'a> SyntaxMapCaptures<'a> {
1089 fn new(
1090 range: Range<usize>,
1091 text: &'a Rope,
1092 layers: impl Iterator<Item = SyntaxLayer<'a>>,
1093 query: fn(&Grammar) -> Option<&Query>,
1094 ) -> Self {
1095 let mut result = Self {
1096 layers: Vec::new(),
1097 grammars: Vec::new(),
1098 active_layer_count: 0,
1099 };
1100 for layer in layers {
1101 let grammar = match &layer.language.grammar {
1102 Some(grammar) => grammar,
1103 None => continue,
1104 };
1105 let query = match query(grammar) {
1106 Some(query) => query,
1107 None => continue,
1108 };
1109
1110 let mut query_cursor = QueryCursorHandle::new();
1111
1112 // TODO - add a Tree-sitter API to remove the need for this.
1113 let cursor = unsafe {
1114 std::mem::transmute::<&mut tree_sitter::QueryCursor, &'static mut QueryCursor>(
1115 query_cursor.deref_mut(),
1116 )
1117 };
1118
1119 cursor.set_byte_range(range.clone());
1120 let captures = cursor.captures(query, layer.node(), TextProvider(text));
1121 let grammar_index = result
1122 .grammars
1123 .iter()
1124 .position(|g| g.id == grammar.id())
1125 .unwrap_or_else(|| {
1126 result.grammars.push(grammar);
1127 result.grammars.len() - 1
1128 });
1129 let mut layer = SyntaxMapCapturesLayer {
1130 depth: layer.depth,
1131 grammar_index,
1132 next_capture: None,
1133 captures,
1134 _query_cursor: query_cursor,
1135 };
1136
1137 layer.advance();
1138 if layer.next_capture.is_some() {
1139 let key = layer.sort_key();
1140 let ix = match result.layers[..result.active_layer_count]
1141 .binary_search_by_key(&key, |layer| layer.sort_key())
1142 {
1143 Ok(ix) | Err(ix) => ix,
1144 };
1145 result.layers.insert(ix, layer);
1146 result.active_layer_count += 1;
1147 } else {
1148 result.layers.push(layer);
1149 }
1150 }
1151
1152 result
1153 }
1154
1155 pub fn grammars(&self) -> &[&'a Grammar] {
1156 &self.grammars
1157 }
1158
1159 pub fn peek(&self) -> Option<SyntaxMapCapture<'a>> {
1160 let layer = self.layers[..self.active_layer_count].first()?;
1161 let capture = layer.next_capture?;
1162 Some(SyntaxMapCapture {
1163 grammar_index: layer.grammar_index,
1164 index: capture.index,
1165 node: capture.node,
1166 })
1167 }
1168
1169 pub fn advance(&mut self) -> bool {
1170 let layer = if let Some(layer) = self.layers[..self.active_layer_count].first_mut() {
1171 layer
1172 } else {
1173 return false;
1174 };
1175
1176 layer.advance();
1177 if layer.next_capture.is_some() {
1178 let key = layer.sort_key();
1179 let i = 1 + self.layers[1..self.active_layer_count]
1180 .iter()
1181 .position(|later_layer| key < later_layer.sort_key())
1182 .unwrap_or(self.active_layer_count - 1);
1183 self.layers[0..i].rotate_left(1);
1184 } else {
1185 self.layers[0..self.active_layer_count].rotate_left(1);
1186 self.active_layer_count -= 1;
1187 }
1188
1189 true
1190 }
1191
1192 pub fn set_byte_range(&mut self, range: Range<usize>) {
1193 for layer in &mut self.layers {
1194 layer.captures.set_byte_range(range.clone());
1195 if let Some(capture) = &layer.next_capture
1196 && capture.node.end_byte() > range.start
1197 {
1198 continue;
1199 }
1200 layer.advance();
1201 }
1202 self.layers.sort_unstable_by_key(|layer| layer.sort_key());
1203 self.active_layer_count = self
1204 .layers
1205 .iter()
1206 .position(|layer| layer.next_capture.is_none())
1207 .unwrap_or(self.layers.len());
1208 }
1209}
1210
1211#[derive(Default)]
1212pub struct TreeSitterOptions {
1213 pub max_start_depth: Option<u32>,
1214 pub max_bytes_to_query: Option<usize>,
1215}
1216
1217impl TreeSitterOptions {
1218 pub fn max_start_depth(max_start_depth: u32) -> Self {
1219 Self {
1220 max_start_depth: Some(max_start_depth),
1221 max_bytes_to_query: None,
1222 }
1223 }
1224}
1225
1226impl<'a> SyntaxMapMatches<'a> {
1227 fn new(
1228 range: Range<usize>,
1229 text: &'a Rope,
1230 layers: impl Iterator<Item = SyntaxLayer<'a>>,
1231 query: fn(&Grammar) -> Option<&Query>,
1232 options: TreeSitterOptions,
1233 ) -> Self {
1234 let mut result = Self::default();
1235 for layer in layers {
1236 let grammar = match &layer.language.grammar {
1237 Some(grammar) => grammar,
1238 None => continue,
1239 };
1240 let query = match query(grammar) {
1241 Some(query) => query,
1242 None => continue,
1243 };
1244
1245 let mut query_cursor = QueryCursorHandle::new();
1246
1247 // TODO - add a Tree-sitter API to remove the need for this.
1248 let cursor = unsafe {
1249 std::mem::transmute::<&mut tree_sitter::QueryCursor, &'static mut QueryCursor>(
1250 query_cursor.deref_mut(),
1251 )
1252 };
1253 cursor.set_max_start_depth(options.max_start_depth);
1254
1255 if let Some(max_bytes_to_query) = options.max_bytes_to_query {
1256 let midpoint = (range.start + range.end) / 2;
1257 let containing_range_start = midpoint.saturating_sub(max_bytes_to_query / 2);
1258 let containing_range_end =
1259 containing_range_start.saturating_add(max_bytes_to_query);
1260 cursor.set_containing_byte_range(containing_range_start..containing_range_end);
1261 }
1262
1263 cursor.set_byte_range(range.clone());
1264 let matches = cursor.matches(query, layer.node(), TextProvider(text));
1265 let grammar_index = result
1266 .grammars
1267 .iter()
1268 .position(|g| g.id == grammar.id())
1269 .unwrap_or_else(|| {
1270 result.grammars.push(grammar);
1271 result.grammars.len() - 1
1272 });
1273 let mut layer = SyntaxMapMatchesLayer {
1274 language: layer.language.clone(),
1275 depth: layer.depth,
1276 grammar_index,
1277 matches,
1278 query,
1279 next_pattern_index: 0,
1280 next_captures: Vec::new(),
1281 has_next: false,
1282 _query_cursor: query_cursor,
1283 };
1284
1285 layer.advance();
1286 if layer.has_next {
1287 let key = layer.sort_key();
1288 let ix = match result.layers[..result.active_layer_count]
1289 .binary_search_by_key(&key, |layer| layer.sort_key())
1290 {
1291 Ok(ix) | Err(ix) => ix,
1292 };
1293 result.layers.insert(ix, layer);
1294 result.active_layer_count += 1;
1295 } else {
1296 result.layers.push(layer);
1297 }
1298 }
1299 result
1300 }
1301
1302 pub fn grammars(&self) -> &[&'a Grammar] {
1303 &self.grammars
1304 }
1305
1306 pub fn peek(&self) -> Option<SyntaxMapMatch<'_>> {
1307 let layer = self.layers.first()?;
1308
1309 if !layer.has_next {
1310 return None;
1311 }
1312
1313 Some(SyntaxMapMatch {
1314 language: layer.language.clone(),
1315 depth: layer.depth,
1316 grammar_index: layer.grammar_index,
1317 pattern_index: layer.next_pattern_index,
1318 captures: &layer.next_captures,
1319 })
1320 }
1321
1322 pub fn advance(&mut self) -> bool {
1323 let layer = if let Some(layer) = self.layers.first_mut() {
1324 layer
1325 } else {
1326 return false;
1327 };
1328
1329 layer.advance();
1330 if layer.has_next {
1331 let key = layer.sort_key();
1332 let i = 1 + self.layers[1..self.active_layer_count]
1333 .iter()
1334 .position(|later_layer| key < later_layer.sort_key())
1335 .unwrap_or(self.active_layer_count - 1);
1336 self.layers[0..i].rotate_left(1);
1337 } else if self.active_layer_count != 0 {
1338 self.layers[0..self.active_layer_count].rotate_left(1);
1339 self.active_layer_count -= 1;
1340 }
1341
1342 true
1343 }
1344
1345 // pub fn set_byte_range(&mut self, range: Range<usize>) {
1346 // for layer in &mut self.layers {
1347 // layer.matches.set_byte_range(range.clone());
1348 // layer.advance();
1349 // }
1350 // self.layers.sort_unstable_by_key(|layer| layer.sort_key());
1351 // self.active_layer_count = self
1352 // .layers
1353 // .iter()
1354 // .position(|layer| !layer.has_next)
1355 // .unwrap_or(self.layers.len());
1356 // }
1357}
1358
1359impl SyntaxMapCapturesLayer<'_> {
1360 fn advance(&mut self) {
1361 self.next_capture = self.captures.next().map(|(mat, ix)| mat.captures[*ix]);
1362 }
1363
1364 fn sort_key(&self) -> (usize, Reverse<usize>, usize) {
1365 if let Some(capture) = &self.next_capture {
1366 let range = capture.node.byte_range();
1367 (range.start, Reverse(range.end), self.depth)
1368 } else {
1369 (usize::MAX, Reverse(0), usize::MAX)
1370 }
1371 }
1372}
1373
1374impl SyntaxMapMatchesLayer<'_> {
1375 fn advance(&mut self) {
1376 loop {
1377 if let Some(mat) = self.matches.next() {
1378 if !satisfies_custom_predicates(self.query, mat) {
1379 continue;
1380 }
1381 self.next_captures.clear();
1382 self.next_captures.extend_from_slice(mat.captures);
1383 self.next_pattern_index = mat.pattern_index;
1384 self.has_next = true;
1385 return;
1386 } else {
1387 self.has_next = false;
1388 return;
1389 }
1390 }
1391 }
1392
1393 fn sort_key(&self) -> (usize, Reverse<usize>, usize) {
1394 if self.has_next {
1395 let captures = &self.next_captures;
1396 if let Some((first, last)) = captures.first().zip(captures.last()) {
1397 return (
1398 first.node.start_byte(),
1399 Reverse(last.node.end_byte()),
1400 self.depth,
1401 );
1402 }
1403 }
1404 (usize::MAX, Reverse(0), usize::MAX)
1405 }
1406}
1407
1408impl<'a> Iterator for SyntaxMapCaptures<'a> {
1409 type Item = SyntaxMapCapture<'a>;
1410
1411 fn next(&mut self) -> Option<Self::Item> {
1412 let result = self.peek();
1413 self.advance();
1414 result
1415 }
1416}
1417
1418fn satisfies_custom_predicates(query: &Query, mat: &QueryMatch) -> bool {
1419 for predicate in query.general_predicates(mat.pattern_index) {
1420 let satisfied = match predicate.operator.as_ref() {
1421 "has-parent?" => has_parent(&predicate.args, mat),
1422 "not-has-parent?" => !has_parent(&predicate.args, mat),
1423 _ => true,
1424 };
1425 if !satisfied {
1426 return false;
1427 }
1428 }
1429 true
1430}
1431
1432fn has_parent(args: &[QueryPredicateArg], mat: &QueryMatch) -> bool {
1433 let (
1434 Some(QueryPredicateArg::Capture(capture_ix)),
1435 Some(QueryPredicateArg::String(parent_kind)),
1436 ) = (args.first(), args.get(1))
1437 else {
1438 return false;
1439 };
1440
1441 let Some(capture) = mat.captures.iter().find(|c| c.index == *capture_ix) else {
1442 return false;
1443 };
1444
1445 capture
1446 .node
1447 .parent()
1448 .is_some_and(|p| p.kind() == parent_kind.as_ref())
1449}
1450
1451fn join_ranges(
1452 a: impl Iterator<Item = Range<usize>>,
1453 b: impl Iterator<Item = Range<usize>>,
1454) -> Vec<Range<usize>> {
1455 let mut result = Vec::<Range<usize>>::new();
1456 let mut a = a.peekable();
1457 let mut b = b.peekable();
1458 loop {
1459 let range = match (a.peek(), b.peek()) {
1460 (Some(range_a), Some(range_b)) => {
1461 if range_a.start < range_b.start {
1462 a.next().unwrap()
1463 } else {
1464 b.next().unwrap()
1465 }
1466 }
1467 (None, Some(_)) => b.next().unwrap(),
1468 (Some(_), None) => a.next().unwrap(),
1469 (None, None) => break,
1470 };
1471
1472 if let Some(last) = result.last_mut()
1473 && range.start <= last.end
1474 {
1475 last.end = last.end.max(range.end);
1476 continue;
1477 }
1478 result.push(range);
1479 }
1480 result
1481}
1482
1483#[derive(Copy, Clone, Debug)]
1484pub struct ParseTimeout;
1485
1486impl std::error::Error for ParseTimeout {}
1487
1488impl std::fmt::Display for ParseTimeout {
1489 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1490 write!(f, "parse timeout")
1491 }
1492}
1493
1494#[ztracing::instrument(skip_all)]
1495fn parse_text(
1496 grammar: &Grammar,
1497 text: &Rope,
1498 start_byte: usize,
1499 ranges: &[tree_sitter::Range],
1500 old_tree: Option<&tree_sitter::Tree>,
1501 parse_budget: &mut Option<Duration>,
1502) -> anyhow::Result<tree_sitter::Tree> {
1503 with_parser(|parser| {
1504 let mut timed_out = false;
1505 let now = Instant::now();
1506 let mut progress_callback = parse_budget.map(|budget| {
1507 let timed_out = &mut timed_out;
1508 move |_: &_| {
1509 let elapsed = now.elapsed();
1510 if elapsed > budget {
1511 *timed_out = true;
1512 ControlFlow::Break(())
1513 } else {
1514 ControlFlow::Continue(())
1515 }
1516 }
1517 });
1518
1519 let mut chunks = text.chunks_in_range(start_byte..text.len());
1520 parser.set_included_ranges(ranges)?;
1521 parser.set_language(&grammar.ts_language)?;
1522 parser
1523 .parse_with_options(
1524 &mut move |offset, _| {
1525 chunks.seek(start_byte + offset);
1526 chunks.next().unwrap_or("").as_bytes()
1527 },
1528 old_tree,
1529 progress_callback
1530 .as_mut()
1531 .map(|progress_callback| tree_sitter::ParseOptions {
1532 progress_callback: Some(progress_callback),
1533 }),
1534 )
1535 .inspect(|_| {
1536 if let Some(parse_budget) = parse_budget {
1537 *parse_budget = parse_budget.saturating_sub(now.elapsed());
1538 }
1539 })
1540 .ok_or_else(|| match timed_out {
1541 true => anyhow::anyhow!(ParseTimeout),
1542 false => anyhow::anyhow!("parsing failed"),
1543 })
1544 })
1545}
1546
1547#[ztracing::instrument(skip_all)]
1548fn get_injections(
1549 config: &InjectionConfig,
1550 text: &BufferSnapshot,
1551 outer_range: Range<Anchor>,
1552 node: Node,
1553 language_registry: &Arc<LanguageRegistry>,
1554 depth: usize,
1555 changed_ranges: &[Range<usize>],
1556 combined_injection_ranges: &mut HashMap<LanguageId, (Arc<Language>, Vec<tree_sitter::Range>)>,
1557 queue: &mut BinaryHeap<ParseStep>,
1558) {
1559 let mut query_cursor = QueryCursorHandle::new();
1560 let mut prev_match = None;
1561
1562 // Ensure that a `ParseStep` is created for every combined injection language, even
1563 // if there currently no matches for that injection.
1564 combined_injection_ranges.clear();
1565 for pattern in &config.patterns {
1566 if let (Some(language_name), true) = (pattern.language.as_ref(), pattern.combined)
1567 && let Some(language) = language_registry
1568 .language_for_name_or_extension(language_name)
1569 .now_or_never()
1570 .and_then(|language| language.ok())
1571 {
1572 combined_injection_ranges.insert(language.id, (language, Vec::new()));
1573 }
1574 }
1575
1576 for query_range in changed_ranges {
1577 query_cursor.set_byte_range(query_range.start.saturating_sub(1)..query_range.end + 1);
1578 let mut matches = query_cursor.matches(&config.query, node, TextProvider(text.as_rope()));
1579 while let Some(mat) = matches.next() {
1580 let content_ranges = mat
1581 .nodes_for_capture_index(config.content_capture_ix)
1582 .map(|node| node.range())
1583 .collect::<Vec<_>>();
1584 if content_ranges.is_empty() {
1585 continue;
1586 }
1587
1588 let content_range =
1589 content_ranges.first().unwrap().start_byte..content_ranges.last().unwrap().end_byte;
1590
1591 // Avoid duplicate matches if two changed ranges intersect the same injection.
1592 if let Some((prev_pattern_ix, prev_range)) = &prev_match
1593 && mat.pattern_index == *prev_pattern_ix
1594 && content_range == *prev_range
1595 {
1596 continue;
1597 }
1598
1599 prev_match = Some((mat.pattern_index, content_range.clone()));
1600 let combined = config.patterns[mat.pattern_index].combined;
1601
1602 let mut step_range = content_range.clone();
1603 let language_name =
1604 if let Some(name) = config.patterns[mat.pattern_index].language.as_ref() {
1605 Some(Cow::Borrowed(name.as_ref()))
1606 } else if let Some(language_node) = config
1607 .language_capture_ix
1608 .and_then(|ix| mat.nodes_for_capture_index(ix).next())
1609 {
1610 step_range.start = cmp::min(content_range.start, language_node.start_byte());
1611 step_range.end = cmp::max(content_range.end, language_node.end_byte());
1612 let language_name: String =
1613 text.text_for_range(language_node.byte_range()).collect();
1614
1615 // Enable paths ending in a language extension to represent a language name: e.g. "foo/bar/baz.rs"
1616 if let Some(last_dot_pos) = language_name.rfind('.') {
1617 Some(Cow::Owned(language_name[last_dot_pos + 1..].to_string()))
1618 } else {
1619 Some(Cow::Owned(language_name))
1620 }
1621 } else {
1622 None
1623 };
1624
1625 if let Some(language_name) = language_name {
1626 let language = language_registry
1627 .language_for_name_or_extension(&language_name)
1628 .now_or_never()
1629 .and_then(|language| language.ok());
1630 let range = text.anchor_before(step_range.start)..text.anchor_after(step_range.end);
1631 if let Some(language) = language {
1632 if combined {
1633 combined_injection_ranges
1634 .entry(language.id)
1635 .or_insert_with(|| (language.clone(), vec![]))
1636 .1
1637 .extend(content_ranges);
1638 } else {
1639 queue.push(ParseStep {
1640 depth,
1641 language: ParseStepLanguage::Loaded { language },
1642 included_ranges: content_ranges,
1643 range,
1644 mode: ParseMode::Single,
1645 });
1646 }
1647 } else {
1648 queue.push(ParseStep {
1649 depth,
1650 language: ParseStepLanguage::Pending {
1651 name: language_name.into(),
1652 },
1653 included_ranges: content_ranges,
1654 range,
1655 mode: ParseMode::Single,
1656 });
1657 }
1658 }
1659 }
1660 }
1661
1662 for (_, (language, mut included_ranges)) in combined_injection_ranges.drain() {
1663 included_ranges.sort_unstable_by(|a, b| {
1664 Ord::cmp(&a.start_byte, &b.start_byte).then_with(|| Ord::cmp(&a.end_byte, &b.end_byte))
1665 });
1666 queue.push(ParseStep {
1667 depth,
1668 language: ParseStepLanguage::Loaded { language },
1669 range: outer_range.clone(),
1670 included_ranges,
1671 mode: ParseMode::Combined {
1672 parent_layer_range: node.start_byte()..node.end_byte(),
1673 parent_layer_changed_ranges: changed_ranges.to_vec(),
1674 },
1675 })
1676 }
1677}
1678
1679/// Updates the given list of included `ranges`, removing any ranges that intersect
1680/// `removed_ranges`, and inserting the given `new_ranges`.
1681///
1682/// Returns a new vector of ranges, and the range of the vector that was changed,
1683/// from the previous `ranges` vector.
1684pub(crate) fn splice_included_ranges(
1685 mut ranges: Vec<tree_sitter::Range>,
1686 removed_ranges: &[Range<usize>],
1687 new_ranges: &[tree_sitter::Range],
1688) -> (Vec<tree_sitter::Range>, Range<usize>) {
1689 let mut removed_ranges = removed_ranges.iter().cloned().peekable();
1690 let mut new_ranges = new_ranges.iter().cloned().peekable();
1691 let mut ranges_ix = 0;
1692 let mut changed_portion: Option<Range<usize>> = None;
1693 loop {
1694 let next_new_range = new_ranges.peek();
1695 let next_removed_range = removed_ranges.peek();
1696
1697 let (remove, insert) = match (next_removed_range, next_new_range) {
1698 (None, None) => break,
1699 (Some(_), None) => (removed_ranges.next().unwrap(), None),
1700 (Some(next_removed_range), Some(next_new_range)) => {
1701 if next_removed_range.end < next_new_range.start_byte {
1702 (removed_ranges.next().unwrap(), None)
1703 } else {
1704 let mut start = next_new_range.start_byte;
1705 let mut end = next_new_range.end_byte;
1706
1707 while let Some(next_removed_range) = removed_ranges.peek() {
1708 if next_removed_range.start > next_new_range.end_byte {
1709 break;
1710 }
1711 let next_removed_range = removed_ranges.next().unwrap();
1712 start = cmp::min(start, next_removed_range.start);
1713 end = cmp::max(end, next_removed_range.end);
1714 }
1715
1716 (start..end, Some(new_ranges.next().unwrap()))
1717 }
1718 }
1719 (None, Some(next_new_range)) => (
1720 next_new_range.start_byte..next_new_range.end_byte,
1721 Some(new_ranges.next().unwrap()),
1722 ),
1723 };
1724
1725 let mut start_ix = ranges_ix
1726 + match ranges[ranges_ix..].binary_search_by_key(&remove.start, |r| r.end_byte) {
1727 Ok(ix) => ix,
1728 Err(ix) => ix,
1729 };
1730 let mut end_ix = ranges_ix
1731 + match ranges[ranges_ix..].binary_search_by_key(&remove.end, |r| r.start_byte) {
1732 Ok(ix) => ix + 1,
1733 Err(ix) => ix,
1734 };
1735
1736 // If there are empty ranges, then there may be multiple ranges with the same
1737 // start or end. Expand the splice to include any adjacent ranges that touch
1738 // the changed range.
1739 while start_ix > 0 {
1740 if ranges[start_ix - 1].end_byte == remove.start {
1741 start_ix -= 1;
1742 } else {
1743 break;
1744 }
1745 }
1746 while let Some(range) = ranges.get(end_ix) {
1747 if range.start_byte == remove.end {
1748 end_ix += 1;
1749 } else {
1750 break;
1751 }
1752 }
1753 let changed_start = changed_portion
1754 .as_ref()
1755 .map_or(usize::MAX, |range| range.start)
1756 .min(start_ix);
1757 let changed_end =
1758 changed_portion
1759 .as_ref()
1760 .map_or(0, |range| range.end)
1761 .max(if insert.is_some() {
1762 start_ix + 1
1763 } else {
1764 start_ix
1765 });
1766 changed_portion = Some(changed_start..changed_end);
1767
1768 ranges.splice(start_ix..end_ix, insert);
1769 ranges_ix = start_ix;
1770 }
1771
1772 (ranges, changed_portion.unwrap_or(0..0))
1773}
1774
1775/// Ensure there are newline ranges in between content range that appear on
1776/// different lines. For performance, only iterate through the given range of
1777/// indices. All of the ranges in the array are relative to a given start byte
1778/// and point.
1779#[ztracing::instrument(skip_all)]
1780fn insert_newlines_between_ranges(
1781 indices: Range<usize>,
1782 ranges: &mut Vec<tree_sitter::Range>,
1783 text: &text::BufferSnapshot,
1784 start_byte: usize,
1785 start_point: Point,
1786) {
1787 let mut ix = indices.end + 1;
1788 while ix > indices.start {
1789 ix -= 1;
1790 if 0 == ix || ix == ranges.len() {
1791 continue;
1792 }
1793
1794 let range_b = ranges[ix];
1795 let range_a = &mut ranges[ix - 1];
1796 if range_a.end_point.column == 0 {
1797 continue;
1798 }
1799
1800 if range_a.end_point.row < range_b.start_point.row {
1801 let end_point = start_point + Point::from_ts_point(range_a.end_point);
1802 let line_end = Point::new(end_point.row, text.line_len(end_point.row));
1803 if end_point.column >= line_end.column {
1804 range_a.end_byte += 1;
1805 range_a.end_point.row += 1;
1806 range_a.end_point.column = 0;
1807 } else {
1808 let newline_offset = text.point_to_offset(line_end);
1809 ranges.insert(
1810 ix,
1811 tree_sitter::Range {
1812 start_byte: newline_offset - start_byte,
1813 end_byte: newline_offset - start_byte + 1,
1814 start_point: (line_end - start_point).to_ts_point(),
1815 end_point: ((line_end - start_point) + Point::new(1, 0)).to_ts_point(),
1816 },
1817 )
1818 }
1819 }
1820 }
1821}
1822
1823impl OwnedSyntaxLayer {
1824 /// Returns the root syntax node for this layer.
1825 pub fn node(&self) -> Node<'_> {
1826 self.tree
1827 .root_node_with_offset(self.offset.0, self.offset.1)
1828 }
1829}
1830
1831impl<'a> SyntaxLayer<'a> {
1832 /// Returns an owned version of this layer.
1833 pub fn to_owned(&self) -> OwnedSyntaxLayer {
1834 OwnedSyntaxLayer {
1835 tree: self.tree.clone(),
1836 offset: self.offset,
1837 language: self.language.clone(),
1838 }
1839 }
1840
1841 /// Returns the root node for this layer.
1842 pub fn node(&self) -> Node<'a> {
1843 self.tree
1844 .root_node_with_offset(self.offset.0, self.offset.1)
1845 }
1846
1847 pub(crate) fn override_id(&self, offset: usize, text: &text::BufferSnapshot) -> Option<u32> {
1848 let text = TextProvider(text.as_rope());
1849 let config = self.language.grammar.as_ref()?.override_config.as_ref()?;
1850
1851 let mut query_cursor = QueryCursorHandle::new();
1852 query_cursor.set_byte_range(offset.saturating_sub(1)..offset.saturating_add(1));
1853 query_cursor.set_containing_byte_range(
1854 offset.saturating_sub(MAX_BYTES_TO_QUERY / 2)
1855 ..offset.saturating_add(MAX_BYTES_TO_QUERY / 2),
1856 );
1857
1858 let mut smallest_match: Option<(u32, Range<usize>)> = None;
1859 let mut matches = query_cursor.matches(&config.query, self.node(), text);
1860 while let Some(mat) = matches.next() {
1861 for capture in mat.captures {
1862 let Some(override_entry) = config.values.get(&capture.index) else {
1863 continue;
1864 };
1865
1866 let range = capture.node.byte_range();
1867 if override_entry.range_is_inclusive {
1868 if offset < range.start || offset > range.end {
1869 continue;
1870 }
1871 } else if offset <= range.start || offset >= range.end {
1872 continue;
1873 }
1874
1875 if let Some((_, smallest_range)) = &smallest_match {
1876 if range.len() < smallest_range.len() {
1877 smallest_match = Some((capture.index, range))
1878 }
1879 continue;
1880 }
1881
1882 smallest_match = Some((capture.index, range));
1883 }
1884 }
1885
1886 smallest_match.map(|(index, _)| index)
1887 }
1888}
1889
1890impl std::ops::Deref for SyntaxMap {
1891 type Target = SyntaxSnapshot;
1892
1893 fn deref(&self) -> &Self::Target {
1894 &self.snapshot
1895 }
1896}
1897
1898impl PartialEq for ParseStep {
1899 fn eq(&self, _: &Self) -> bool {
1900 false
1901 }
1902}
1903
1904impl Eq for ParseStep {}
1905
1906impl PartialOrd for ParseStep {
1907 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1908 Some(self.cmp(other))
1909 }
1910}
1911
1912impl Ord for ParseStep {
1913 fn cmp(&self, other: &Self) -> Ordering {
1914 let range_a = self.range();
1915 let range_b = other.range();
1916 Ord::cmp(&other.depth, &self.depth)
1917 .then_with(|| Ord::cmp(&range_b.start, &range_a.start))
1918 .then_with(|| Ord::cmp(&range_a.end, &range_b.end))
1919 .then_with(|| other.language.id().cmp(&self.language.id()))
1920 }
1921}
1922
1923impl ParseStep {
1924 fn range(&self) -> Range<usize> {
1925 if let ParseMode::Combined {
1926 parent_layer_range, ..
1927 } = &self.mode
1928 {
1929 parent_layer_range.clone()
1930 } else {
1931 let start = self.included_ranges.first().map_or(0, |r| r.start_byte);
1932 let end = self.included_ranges.last().map_or(0, |r| r.end_byte);
1933 start..end
1934 }
1935 }
1936}
1937
1938impl ChangedRegion {
1939 fn cmp(&self, other: &Self, buffer: &BufferSnapshot) -> Ordering {
1940 let range_a = &self.range;
1941 let range_b = &other.range;
1942 Ord::cmp(&self.depth, &other.depth)
1943 .then_with(|| range_a.start.cmp(&range_b.start, buffer))
1944 .then_with(|| range_b.end.cmp(&range_a.end, buffer))
1945 }
1946}
1947
1948impl ChangeRegionSet {
1949 fn start_position(&self) -> ChangeStartPosition {
1950 self.0.first().map_or(
1951 ChangeStartPosition {
1952 depth: usize::MAX,
1953 position: Anchor::MAX,
1954 },
1955 |region| ChangeStartPosition {
1956 depth: region.depth,
1957 position: region.range.start,
1958 },
1959 )
1960 }
1961
1962 fn intersects(&self, layer: &SyntaxLayerEntry, text: &BufferSnapshot) -> bool {
1963 for region in &self.0 {
1964 if region.depth < layer.depth {
1965 continue;
1966 }
1967 if region.depth > layer.depth {
1968 break;
1969 }
1970 if region.range.end.cmp(&layer.range.start, text).is_le() {
1971 continue;
1972 }
1973 if region.range.start.cmp(&layer.range.end, text).is_ge() {
1974 break;
1975 }
1976 return true;
1977 }
1978 false
1979 }
1980
1981 fn insert(&mut self, region: ChangedRegion, text: &BufferSnapshot) {
1982 if let Err(ix) = self.0.binary_search_by(|probe| probe.cmp(®ion, text)) {
1983 self.0.insert(ix, region);
1984 }
1985 }
1986
1987 fn prune(&mut self, summary: SyntaxLayerSummary, text: &BufferSnapshot) -> bool {
1988 let prev_len = self.0.len();
1989 self.0.retain(|region| {
1990 region.depth > summary.max_depth
1991 || (region.depth == summary.max_depth
1992 && region
1993 .range
1994 .end
1995 .cmp(&summary.last_layer_range.start, text)
1996 .is_gt())
1997 });
1998 self.0.len() < prev_len
1999 }
2000}
2001
2002impl Default for SyntaxLayerSummary {
2003 fn default() -> Self {
2004 Self {
2005 max_depth: 0,
2006 min_depth: 0,
2007 range: Anchor::MAX..Anchor::MIN,
2008 last_layer_range: Anchor::MIN..Anchor::MAX,
2009 last_layer_language: None,
2010 contains_unknown_injections: false,
2011 }
2012 }
2013}
2014
2015impl sum_tree::Summary for SyntaxLayerSummary {
2016 type Context<'a> = &'a BufferSnapshot;
2017
2018 fn zero(_cx: &BufferSnapshot) -> Self {
2019 Default::default()
2020 }
2021
2022 fn add_summary(&mut self, other: &Self, buffer: Self::Context<'_>) {
2023 if other.max_depth > self.max_depth {
2024 self.max_depth = other.max_depth;
2025 self.range = other.range.clone();
2026 } else {
2027 if self.range == (Anchor::MAX..Anchor::MAX) {
2028 self.range.start = other.range.start;
2029 }
2030 if other.range.end.cmp(&self.range.end, buffer).is_gt() {
2031 self.range.end = other.range.end;
2032 }
2033 }
2034 self.last_layer_range = other.last_layer_range.clone();
2035 self.last_layer_language = other.last_layer_language;
2036 self.contains_unknown_injections |= other.contains_unknown_injections;
2037 }
2038}
2039
2040impl SeekTarget<'_, SyntaxLayerSummary, SyntaxLayerSummary> for SyntaxLayerPosition {
2041 fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering {
2042 Ord::cmp(&self.depth, &cursor_location.max_depth)
2043 .then_with(|| {
2044 self.range
2045 .start
2046 .cmp(&cursor_location.last_layer_range.start, buffer)
2047 })
2048 .then_with(|| {
2049 cursor_location
2050 .last_layer_range
2051 .end
2052 .cmp(&self.range.end, buffer)
2053 })
2054 .then_with(|| self.language.cmp(&cursor_location.last_layer_language))
2055 }
2056}
2057
2058impl SeekTarget<'_, SyntaxLayerSummary, SyntaxLayerSummary> for ChangeStartPosition {
2059 fn cmp(&self, cursor_location: &SyntaxLayerSummary, text: &BufferSnapshot) -> Ordering {
2060 Ord::cmp(&self.depth, &cursor_location.max_depth)
2061 .then_with(|| self.position.cmp(&cursor_location.range.end, text))
2062 }
2063}
2064
2065impl SeekTarget<'_, SyntaxLayerSummary, SyntaxLayerSummary> for SyntaxLayerPositionBeforeChange {
2066 fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering {
2067 if self.change.cmp(cursor_location, buffer).is_le() {
2068 Ordering::Less
2069 } else {
2070 self.position.cmp(cursor_location, buffer)
2071 }
2072 }
2073}
2074
2075impl sum_tree::Item for SyntaxLayerEntry {
2076 type Summary = SyntaxLayerSummary;
2077
2078 fn summary(&self, _cx: &BufferSnapshot) -> Self::Summary {
2079 SyntaxLayerSummary {
2080 min_depth: self.depth,
2081 max_depth: self.depth,
2082 range: self.range.clone(),
2083 last_layer_range: self.range.clone(),
2084 last_layer_language: self.content.language_id(),
2085 contains_unknown_injections: matches!(self.content, SyntaxLayerContent::Pending { .. }),
2086 }
2087 }
2088}
2089
2090impl std::fmt::Debug for SyntaxLayerEntry {
2091 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2092 f.debug_struct("SyntaxLayer")
2093 .field("depth", &self.depth)
2094 .field("range", &self.range)
2095 .field("tree", &self.content.tree())
2096 .finish()
2097 }
2098}
2099
2100impl<'a> tree_sitter::TextProvider<&'a [u8]> for TextProvider<'a> {
2101 type I = ByteChunks<'a>;
2102
2103 fn text(&mut self, node: tree_sitter::Node) -> Self::I {
2104 ByteChunks(self.0.chunks_in_range(node.byte_range()))
2105 }
2106}
2107
2108impl<'a> Iterator for ByteChunks<'a> {
2109 type Item = &'a [u8];
2110
2111 fn next(&mut self) -> Option<Self::Item> {
2112 self.0.next().map(str::as_bytes)
2113 }
2114}
2115
2116impl QueryCursorHandle {
2117 pub fn new() -> Self {
2118 let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_default();
2119 cursor.set_match_limit(64);
2120 QueryCursorHandle(Some(cursor))
2121 }
2122}
2123
2124impl Deref for QueryCursorHandle {
2125 type Target = QueryCursor;
2126
2127 fn deref(&self) -> &Self::Target {
2128 self.0.as_ref().unwrap()
2129 }
2130}
2131
2132impl DerefMut for QueryCursorHandle {
2133 fn deref_mut(&mut self) -> &mut Self::Target {
2134 self.0.as_mut().unwrap()
2135 }
2136}
2137
2138impl Drop for QueryCursorHandle {
2139 fn drop(&mut self) {
2140 let mut cursor = self.0.take().unwrap();
2141 cursor.set_byte_range(0..usize::MAX);
2142 cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point());
2143 cursor.set_containing_byte_range(0..usize::MAX);
2144 cursor.set_containing_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point());
2145 QUERY_CURSORS.lock().push(cursor)
2146 }
2147}
2148
2149pub trait ToTreeSitterPoint {
2150 fn to_ts_point(self) -> tree_sitter::Point;
2151 fn from_ts_point(point: tree_sitter::Point) -> Self;
2152}
2153
2154impl ToTreeSitterPoint for Point {
2155 fn to_ts_point(self) -> tree_sitter::Point {
2156 tree_sitter::Point::new(self.row as usize, self.column as usize)
2157 }
2158
2159 fn from_ts_point(point: tree_sitter::Point) -> Self {
2160 Point::new(point.row as u32, point.column as u32)
2161 }
2162}
2163
2164struct LogIncludedRanges<'a>(&'a [tree_sitter::Range]);
2165struct LogPoint(Point);
2166struct LogAnchorRange<'a>(&'a Range<Anchor>, &'a text::BufferSnapshot);
2167struct LogOffsetRanges<'a>(&'a [Range<usize>], &'a text::BufferSnapshot);
2168struct LogChangedRegions<'a>(&'a ChangeRegionSet, &'a text::BufferSnapshot);
2169
2170impl fmt::Debug for LogIncludedRanges<'_> {
2171 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2172 f.debug_list()
2173 .entries(self.0.iter().map(|range| {
2174 let start = range.start_point;
2175 let end = range.end_point;
2176 (start.row, start.column)..(end.row, end.column)
2177 }))
2178 .finish()
2179 }
2180}
2181
2182impl fmt::Debug for LogAnchorRange<'_> {
2183 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2184 let range = self.0.to_point(self.1);
2185 (LogPoint(range.start)..LogPoint(range.end)).fmt(f)
2186 }
2187}
2188
2189impl fmt::Debug for LogOffsetRanges<'_> {
2190 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2191 f.debug_list()
2192 .entries(self.0.iter().map(|range| {
2193 LogPoint(range.start.to_point(self.1))..LogPoint(range.end.to_point(self.1))
2194 }))
2195 .finish()
2196 }
2197}
2198
2199impl fmt::Debug for LogChangedRegions<'_> {
2200 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2201 f.debug_list()
2202 .entries(
2203 self.0
2204 .0
2205 .iter()
2206 .map(|region| LogAnchorRange(®ion.range, self.1)),
2207 )
2208 .finish()
2209 }
2210}
2211
2212impl fmt::Debug for LogPoint {
2213 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2214 (self.0.row, self.0.column).fmt(f)
2215 }
2216}