syntax_map.rs

   1#[cfg(test)]
   2mod syntax_map_tests;
   3
   4use crate::{
   5    Grammar, InjectionConfig, Language, LanguageId, LanguageRegistry, QUERY_CURSORS, with_parser,
   6};
   7use anyhow::Context as _;
   8use collections::HashMap;
   9use futures::FutureExt;
  10use gpui::SharedString;
  11use std::{
  12    borrow::Cow,
  13    cmp::{self, Ordering, Reverse},
  14    collections::BinaryHeap,
  15    fmt, iter,
  16    ops::{Deref, DerefMut, Range},
  17    sync::Arc,
  18};
  19use streaming_iterator::StreamingIterator;
  20use sum_tree::{Bias, Dimensions, SeekTarget, SumTree};
  21use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint};
  22use tree_sitter::{Node, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree};
  23
  24pub const MAX_BYTES_TO_QUERY: usize = 16 * 1024;
  25
  26pub struct SyntaxMap {
  27    snapshot: SyntaxSnapshot,
  28    language_registry: Option<Arc<LanguageRegistry>>,
  29}
  30
  31#[derive(Clone)]
  32pub struct SyntaxSnapshot {
  33    layers: SumTree<SyntaxLayerEntry>,
  34    parsed_version: clock::Global,
  35    interpolated_version: clock::Global,
  36    language_registry_version: usize,
  37    update_count: usize,
  38}
  39
  40#[derive(Default)]
  41pub struct SyntaxMapCaptures<'a> {
  42    layers: Vec<SyntaxMapCapturesLayer<'a>>,
  43    active_layer_count: usize,
  44    grammars: Vec<&'a Grammar>,
  45}
  46
  47#[derive(Default)]
  48pub struct SyntaxMapMatches<'a> {
  49    layers: Vec<SyntaxMapMatchesLayer<'a>>,
  50    active_layer_count: usize,
  51    grammars: Vec<&'a Grammar>,
  52}
  53
  54#[derive(Debug)]
  55pub struct SyntaxMapCapture<'a> {
  56    pub node: Node<'a>,
  57    pub index: u32,
  58    pub grammar_index: usize,
  59}
  60
  61#[derive(Debug)]
  62pub struct SyntaxMapMatch<'a> {
  63    pub language: Arc<Language>,
  64    pub depth: usize,
  65    pub pattern_index: usize,
  66    pub captures: &'a [QueryCapture<'a>],
  67    pub grammar_index: usize,
  68}
  69
  70struct SyntaxMapCapturesLayer<'a> {
  71    depth: usize,
  72    captures: QueryCaptures<'a, 'a, TextProvider<'a>, &'a [u8]>,
  73    next_capture: Option<QueryCapture<'a>>,
  74    grammar_index: usize,
  75    _query_cursor: QueryCursorHandle,
  76}
  77
  78struct SyntaxMapMatchesLayer<'a> {
  79    language: Arc<Language>,
  80    depth: usize,
  81    next_pattern_index: usize,
  82    next_captures: Vec<QueryCapture<'a>>,
  83    has_next: bool,
  84    matches: QueryMatches<'a, 'a, TextProvider<'a>, &'a [u8]>,
  85    grammar_index: usize,
  86    _query_cursor: QueryCursorHandle,
  87}
  88
  89#[derive(Clone)]
  90struct SyntaxLayerEntry {
  91    depth: usize,
  92    range: Range<Anchor>,
  93    content: SyntaxLayerContent,
  94}
  95
  96#[derive(Clone)]
  97enum SyntaxLayerContent {
  98    Parsed {
  99        tree: tree_sitter::Tree,
 100        language: Arc<Language>,
 101        included_sub_ranges: Option<Vec<Range<Anchor>>>,
 102    },
 103    Pending {
 104        language_name: Arc<str>,
 105    },
 106}
 107
 108impl SyntaxLayerContent {
 109    fn language_id(&self) -> Option<LanguageId> {
 110        match self {
 111            SyntaxLayerContent::Parsed { language, .. } => Some(language.id),
 112            SyntaxLayerContent::Pending { .. } => None,
 113        }
 114    }
 115
 116    fn tree(&self) -> Option<&Tree> {
 117        match self {
 118            SyntaxLayerContent::Parsed { tree, .. } => Some(tree),
 119            SyntaxLayerContent::Pending { .. } => None,
 120        }
 121    }
 122}
 123
 124/// A layer of syntax highlighting, corresponding to a single syntax
 125/// tree in a particular language.
 126#[derive(Debug)]
 127pub struct SyntaxLayer<'a> {
 128    /// The language for this layer.
 129    pub language: &'a Arc<Language>,
 130    pub included_sub_ranges: Option<&'a [Range<Anchor>]>,
 131    pub(crate) depth: usize,
 132    tree: &'a Tree,
 133    pub(crate) offset: (usize, tree_sitter::Point),
 134}
 135
 136/// A layer of syntax highlighting. Like [SyntaxLayer], but holding
 137/// owned data instead of references.
 138#[derive(Clone)]
 139pub struct OwnedSyntaxLayer {
 140    /// The language for this layer.
 141    pub language: Arc<Language>,
 142    tree: tree_sitter::Tree,
 143    pub offset: (usize, tree_sitter::Point),
 144}
 145
 146#[derive(Debug, Clone)]
 147struct SyntaxLayerSummary {
 148    min_depth: usize,
 149    max_depth: usize,
 150    range: Range<Anchor>,
 151    last_layer_range: Range<Anchor>,
 152    last_layer_language: Option<LanguageId>,
 153    contains_unknown_injections: bool,
 154}
 155
 156#[derive(Clone, Debug)]
 157struct SyntaxLayerPosition {
 158    depth: usize,
 159    range: Range<Anchor>,
 160    language: Option<LanguageId>,
 161}
 162
 163#[derive(Clone, Debug)]
 164struct ChangeStartPosition {
 165    depth: usize,
 166    position: Anchor,
 167}
 168
 169#[derive(Clone, Debug)]
 170struct SyntaxLayerPositionBeforeChange {
 171    position: SyntaxLayerPosition,
 172    change: ChangeStartPosition,
 173}
 174
 175struct ParseStep {
 176    depth: usize,
 177    language: ParseStepLanguage,
 178    range: Range<Anchor>,
 179    included_ranges: Vec<tree_sitter::Range>,
 180    mode: ParseMode,
 181}
 182
 183#[derive(Debug)]
 184enum ParseStepLanguage {
 185    Loaded { language: Arc<Language> },
 186    Pending { name: Arc<str> },
 187}
 188
 189impl ParseStepLanguage {
 190    fn name(&self) -> SharedString {
 191        match self {
 192            ParseStepLanguage::Loaded { language } => language.name().0,
 193            ParseStepLanguage::Pending { name } => name.into(),
 194        }
 195    }
 196
 197    fn id(&self) -> Option<LanguageId> {
 198        match self {
 199            ParseStepLanguage::Loaded { language } => Some(language.id),
 200            ParseStepLanguage::Pending { .. } => None,
 201        }
 202    }
 203}
 204
 205enum ParseMode {
 206    Single,
 207    Combined {
 208        parent_layer_range: Range<usize>,
 209        parent_layer_changed_ranges: Vec<Range<usize>>,
 210    },
 211}
 212
 213#[derive(Debug, PartialEq, Eq)]
 214struct ChangedRegion {
 215    depth: usize,
 216    range: Range<Anchor>,
 217}
 218
 219#[derive(Default)]
 220struct ChangeRegionSet(Vec<ChangedRegion>);
 221
 222struct TextProvider<'a>(&'a Rope);
 223
 224struct ByteChunks<'a>(text::Chunks<'a>);
 225
 226pub(crate) struct QueryCursorHandle(Option<QueryCursor>);
 227
 228impl SyntaxMap {
 229    pub fn new(text: &BufferSnapshot) -> Self {
 230        Self {
 231            snapshot: SyntaxSnapshot::new(text),
 232            language_registry: None,
 233        }
 234    }
 235
 236    pub fn set_language_registry(&mut self, registry: Arc<LanguageRegistry>) {
 237        self.language_registry = Some(registry);
 238    }
 239
 240    pub fn snapshot(&self) -> SyntaxSnapshot {
 241        self.snapshot.clone()
 242    }
 243
 244    pub fn language_registry(&self) -> Option<Arc<LanguageRegistry>> {
 245        self.language_registry.clone()
 246    }
 247
 248    pub fn interpolate(&mut self, text: &BufferSnapshot) {
 249        self.snapshot.interpolate(text);
 250    }
 251
 252    #[cfg(test)]
 253    pub fn reparse(&mut self, language: Arc<Language>, text: &BufferSnapshot) {
 254        self.snapshot
 255            .reparse(text, self.language_registry.clone(), language);
 256    }
 257
 258    pub fn did_parse(&mut self, snapshot: SyntaxSnapshot) {
 259        self.snapshot = snapshot;
 260    }
 261
 262    pub fn clear(&mut self, text: &BufferSnapshot) {
 263        let update_count = self.snapshot.update_count + 1;
 264        self.snapshot = SyntaxSnapshot::new(text);
 265        self.snapshot.update_count = update_count;
 266    }
 267}
 268
 269impl SyntaxSnapshot {
 270    fn new(text: &BufferSnapshot) -> Self {
 271        Self {
 272            layers: SumTree::new(text),
 273            parsed_version: clock::Global::default(),
 274            interpolated_version: clock::Global::default(),
 275            language_registry_version: 0,
 276            update_count: 0,
 277        }
 278    }
 279
 280    pub fn is_empty(&self) -> bool {
 281        self.layers.is_empty()
 282    }
 283
 284    pub fn root_language(&self) -> Option<Arc<Language>> {
 285        match &self.layers.first()?.content {
 286            SyntaxLayerContent::Parsed { language, .. } => Some(language.clone()),
 287            SyntaxLayerContent::Pending { .. } => None,
 288        }
 289    }
 290
 291    pub fn update_count(&self) -> usize {
 292        self.update_count
 293    }
 294
 295    pub fn interpolate(&mut self, text: &BufferSnapshot) {
 296        let edits = text
 297            .anchored_edits_since::<Dimensions<usize, Point>>(&self.interpolated_version)
 298            .collect::<Vec<_>>();
 299        self.interpolated_version = text.version().clone();
 300
 301        if edits.is_empty() {
 302            return;
 303        }
 304
 305        let mut layers = SumTree::new(text);
 306        let mut first_edit_ix_for_depth = 0;
 307        let mut prev_depth = 0;
 308        let mut cursor = self.layers.cursor::<SyntaxLayerSummary>(text);
 309        cursor.next();
 310
 311        'outer: loop {
 312            let depth = cursor.end().max_depth;
 313            if depth > prev_depth {
 314                first_edit_ix_for_depth = 0;
 315                prev_depth = depth;
 316            }
 317
 318            // Preserve any layers at this depth that precede the first edit.
 319            if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) {
 320                let target = ChangeStartPosition {
 321                    depth,
 322                    position: edit_range.start,
 323                };
 324                if target.cmp(cursor.start(), text).is_gt() {
 325                    let slice = cursor.slice(&target, Bias::Left);
 326                    layers.append(slice, text);
 327                }
 328            }
 329            // If this layer follows all of the edits, then preserve it and any
 330            // subsequent layers at this same depth.
 331            else if cursor.item().is_some() {
 332                let slice = cursor.slice(
 333                    &SyntaxLayerPosition {
 334                        depth: depth + 1,
 335                        range: Anchor::min_max_range_for_buffer(text.remote_id()),
 336                        language: None,
 337                    },
 338                    Bias::Left,
 339                );
 340                layers.append(slice, text);
 341                continue;
 342            };
 343
 344            let Some(layer) = cursor.item() else { break };
 345            let Dimensions(start_byte, start_point, _) =
 346                layer.range.start.summary::<Dimensions<usize, Point>>(text);
 347
 348            // Ignore edits that end before the start of this layer, and don't consider them
 349            // for any subsequent layers at this same depth.
 350            loop {
 351                let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) else {
 352                    continue 'outer;
 353                };
 354                if edit_range.end.cmp(&layer.range.start, text).is_le() {
 355                    first_edit_ix_for_depth += 1;
 356                } else {
 357                    break;
 358                }
 359            }
 360
 361            let mut layer = layer.clone();
 362            if let SyntaxLayerContent::Parsed { tree, .. } = &mut layer.content {
 363                for (edit, edit_range) in &edits[first_edit_ix_for_depth..] {
 364                    // Ignore any edits that follow this layer.
 365                    if edit_range.start.cmp(&layer.range.end, text).is_ge() {
 366                        break;
 367                    }
 368
 369                    // Apply any edits that intersect this layer to the layer's syntax tree.
 370                    let tree_edit = if edit_range.start.cmp(&layer.range.start, text).is_ge() {
 371                        tree_sitter::InputEdit {
 372                            start_byte: edit.new.start.0 - start_byte,
 373                            old_end_byte: edit.new.start.0 - start_byte
 374                                + (edit.old.end.0 - edit.old.start.0),
 375                            new_end_byte: edit.new.end.0 - start_byte,
 376                            start_position: (edit.new.start.1 - start_point).to_ts_point(),
 377                            old_end_position: (edit.new.start.1 - start_point
 378                                + (edit.old.end.1 - edit.old.start.1))
 379                                .to_ts_point(),
 380                            new_end_position: (edit.new.end.1 - start_point).to_ts_point(),
 381                        }
 382                    } else {
 383                        let node = tree.root_node();
 384                        tree_sitter::InputEdit {
 385                            start_byte: 0,
 386                            old_end_byte: node.end_byte(),
 387                            new_end_byte: 0,
 388                            start_position: Default::default(),
 389                            old_end_position: node.end_position(),
 390                            new_end_position: Default::default(),
 391                        }
 392                    };
 393
 394                    tree.edit(&tree_edit);
 395                }
 396
 397                debug_assert!(
 398                    tree.root_node().end_byte() <= text.len(),
 399                    "tree's size {}, is larger than text size {}",
 400                    tree.root_node().end_byte(),
 401                    text.len(),
 402                );
 403            }
 404
 405            layers.push(layer, text);
 406            cursor.next();
 407        }
 408
 409        layers.append(cursor.suffix(), text);
 410        drop(cursor);
 411        self.layers = layers;
 412    }
 413
 414    pub fn reparse(
 415        &mut self,
 416        text: &BufferSnapshot,
 417        registry: Option<Arc<LanguageRegistry>>,
 418        root_language: Arc<Language>,
 419    ) {
 420        let edit_ranges = text
 421            .edits_since::<usize>(&self.parsed_version)
 422            .map(|edit| edit.new)
 423            .collect::<Vec<_>>();
 424        self.reparse_with_ranges(text, root_language.clone(), edit_ranges, registry.as_ref());
 425
 426        if let Some(registry) = registry
 427            && registry.version() != self.language_registry_version
 428        {
 429            let mut resolved_injection_ranges = Vec::new();
 430            let mut cursor = self
 431                .layers
 432                .filter::<_, ()>(text, |summary| summary.contains_unknown_injections);
 433            cursor.next();
 434            while let Some(layer) = cursor.item() {
 435                let SyntaxLayerContent::Pending { language_name } = &layer.content else {
 436                    unreachable!()
 437                };
 438                if registry
 439                    .language_for_name_or_extension(language_name)
 440                    .now_or_never()
 441                    .and_then(|language| language.ok())
 442                    .is_some()
 443                {
 444                    let range = layer.range.to_offset(text);
 445                    log::trace!("reparse range {range:?} for language {language_name:?}");
 446                    resolved_injection_ranges.push(range);
 447                }
 448
 449                cursor.next();
 450            }
 451            drop(cursor);
 452
 453            if !resolved_injection_ranges.is_empty() {
 454                self.reparse_with_ranges(
 455                    text,
 456                    root_language,
 457                    resolved_injection_ranges,
 458                    Some(&registry),
 459                );
 460            }
 461            self.language_registry_version = registry.version();
 462        }
 463
 464        self.update_count += 1;
 465    }
 466
 467    fn reparse_with_ranges(
 468        &mut self,
 469        text: &BufferSnapshot,
 470        root_language: Arc<Language>,
 471        invalidated_ranges: Vec<Range<usize>>,
 472        registry: Option<&Arc<LanguageRegistry>>,
 473    ) {
 474        log::trace!(
 475            "reparse. invalidated ranges:{:?}",
 476            LogOffsetRanges(&invalidated_ranges, text),
 477        );
 478
 479        let max_depth = self.layers.summary().max_depth;
 480        let mut cursor = self.layers.cursor::<SyntaxLayerSummary>(text);
 481        cursor.next();
 482        let mut layers = SumTree::new(text);
 483
 484        let mut changed_regions = ChangeRegionSet::default();
 485        let mut queue = BinaryHeap::new();
 486        let mut combined_injection_ranges = HashMap::default();
 487        queue.push(ParseStep {
 488            depth: 0,
 489            language: ParseStepLanguage::Loaded {
 490                language: root_language,
 491            },
 492            included_ranges: vec![tree_sitter::Range {
 493                start_byte: 0,
 494                end_byte: text.len(),
 495                start_point: Point::zero().to_ts_point(),
 496                end_point: text.max_point().to_ts_point(),
 497            }],
 498            range: Anchor::min_max_range_for_buffer(text.remote_id()),
 499            mode: ParseMode::Single,
 500        });
 501
 502        loop {
 503            let step = queue.pop();
 504            let position = if let Some(step) = &step {
 505                log::trace!(
 506                    "parse step depth:{}, range:{:?}, language:{} ({:?})",
 507                    step.depth,
 508                    LogAnchorRange(&step.range, text),
 509                    step.language.name(),
 510                    step.language.id(),
 511                );
 512                SyntaxLayerPosition {
 513                    depth: step.depth,
 514                    range: step.range.clone(),
 515                    language: step.language.id(),
 516                }
 517            } else {
 518                SyntaxLayerPosition {
 519                    depth: max_depth + 1,
 520                    range: Anchor::min_max_range_for_buffer(text.remote_id()),
 521                    language: None,
 522                }
 523            };
 524
 525            let mut done = cursor.item().is_none();
 526            while !done && position.cmp(&cursor.end(), text).is_gt() {
 527                done = true;
 528
 529                let bounded_position = SyntaxLayerPositionBeforeChange {
 530                    position: position.clone(),
 531                    change: changed_regions.start_position(),
 532                };
 533                if bounded_position.cmp(cursor.start(), text).is_gt() {
 534                    let slice = cursor.slice(&bounded_position, Bias::Left);
 535                    if !slice.is_empty() {
 536                        layers.append(slice, text);
 537                        if changed_regions.prune(cursor.end(), text) {
 538                            done = false;
 539                        }
 540                    }
 541                }
 542
 543                while position.cmp(&cursor.end(), text).is_gt() {
 544                    let Some(layer) = cursor.item() else { break };
 545
 546                    if changed_regions.intersects(layer, text) {
 547                        if let SyntaxLayerContent::Parsed { language, .. } = &layer.content {
 548                            log::trace!(
 549                                "discard layer. language:{}, range:{:?}. changed_regions:{:?}",
 550                                language.name(),
 551                                LogAnchorRange(&layer.range, text),
 552                                LogChangedRegions(&changed_regions, text),
 553                            );
 554                        }
 555
 556                        changed_regions.insert(
 557                            ChangedRegion {
 558                                depth: layer.depth + 1,
 559                                range: layer.range.clone(),
 560                            },
 561                            text,
 562                        );
 563                    } else {
 564                        layers.push(layer.clone(), text);
 565                    }
 566
 567                    cursor.next();
 568                    if changed_regions.prune(cursor.end(), text) {
 569                        done = false;
 570                    }
 571                }
 572            }
 573
 574            let Some(step) = step else { break };
 575            let Dimensions(step_start_byte, step_start_point, _) =
 576                step.range.start.summary::<Dimensions<usize, Point>>(text);
 577            let step_end_byte = step.range.end.to_offset(text);
 578
 579            let mut old_layer = cursor.item();
 580            if let Some(layer) = old_layer {
 581                if layer.range.to_offset(text) == (step_start_byte..step_end_byte)
 582                    && layer.content.language_id() == step.language.id()
 583                {
 584                    cursor.next();
 585                } else {
 586                    old_layer = None;
 587                }
 588            }
 589
 590            let content = match step.language {
 591                ParseStepLanguage::Loaded { language } => {
 592                    let Some(grammar) = language.grammar() else {
 593                        continue;
 594                    };
 595                    let tree;
 596                    let changed_ranges;
 597
 598                    let mut included_ranges = step.included_ranges;
 599                    let is_combined = matches!(step.mode, ParseMode::Combined { .. });
 600
 601                    for range in &mut included_ranges {
 602                        range.start_byte -= step_start_byte;
 603                        range.end_byte -= step_start_byte;
 604                        range.start_point = (Point::from_ts_point(range.start_point)
 605                            - step_start_point)
 606                            .to_ts_point();
 607                        range.end_point = (Point::from_ts_point(range.end_point)
 608                            - step_start_point)
 609                            .to_ts_point();
 610                    }
 611
 612                    if let Some((SyntaxLayerContent::Parsed { tree: old_tree, .. }, layer_range)) =
 613                        old_layer.map(|layer| (&layer.content, layer.range.clone()))
 614                    {
 615                        log::trace!(
 616                            "existing layer. language:{}, range:{:?}, included_ranges:{:?}",
 617                            language.name(),
 618                            LogAnchorRange(&layer_range, text),
 619                            LogIncludedRanges(&old_tree.included_ranges())
 620                        );
 621
 622                        if let ParseMode::Combined {
 623                            mut parent_layer_changed_ranges,
 624                            ..
 625                        } = step.mode
 626                        {
 627                            for range in &mut parent_layer_changed_ranges {
 628                                range.start = range.start.saturating_sub(step_start_byte);
 629                                range.end = range.end.saturating_sub(step_start_byte);
 630                            }
 631
 632                            let changed_indices;
 633                            (included_ranges, changed_indices) = splice_included_ranges(
 634                                old_tree.included_ranges(),
 635                                &parent_layer_changed_ranges,
 636                                &included_ranges,
 637                            );
 638                            insert_newlines_between_ranges(
 639                                changed_indices,
 640                                &mut included_ranges,
 641                                text,
 642                                step_start_byte,
 643                                step_start_point,
 644                            );
 645                        }
 646
 647                        if included_ranges.is_empty() {
 648                            included_ranges.push(tree_sitter::Range {
 649                                start_byte: 0,
 650                                end_byte: 0,
 651                                start_point: Default::default(),
 652                                end_point: Default::default(),
 653                            });
 654                        }
 655
 656                        log::trace!(
 657                            "update layer. language:{}, range:{:?}, included_ranges:{:?}",
 658                            language.name(),
 659                            LogAnchorRange(&step.range, text),
 660                            LogIncludedRanges(&included_ranges),
 661                        );
 662
 663                        let result = parse_text(
 664                            grammar,
 665                            text.as_rope(),
 666                            step_start_byte,
 667                            &included_ranges,
 668                            Some(old_tree.clone()),
 669                        );
 670                        match result {
 671                            Ok(t) => tree = t,
 672                            Err(e) => {
 673                                log::error!("error parsing text: {:?}", e);
 674                                continue;
 675                            }
 676                        };
 677
 678                        changed_ranges = join_ranges(
 679                            invalidated_ranges
 680                                .iter()
 681                                .filter(|&range| {
 682                                    range.start <= step_end_byte && range.end >= step_start_byte
 683                                })
 684                                .cloned(),
 685                            old_tree.changed_ranges(&tree).map(|r| {
 686                                step_start_byte + r.start_byte..step_start_byte + r.end_byte
 687                            }),
 688                        );
 689                    } else {
 690                        if matches!(step.mode, ParseMode::Combined { .. }) {
 691                            insert_newlines_between_ranges(
 692                                0..included_ranges.len(),
 693                                &mut included_ranges,
 694                                text,
 695                                step_start_byte,
 696                                step_start_point,
 697                            );
 698                        }
 699
 700                        if included_ranges.is_empty() {
 701                            included_ranges.push(tree_sitter::Range {
 702                                start_byte: 0,
 703                                end_byte: 0,
 704                                start_point: Default::default(),
 705                                end_point: Default::default(),
 706                            });
 707                        }
 708
 709                        log::trace!(
 710                            "create layer. language:{}, range:{:?}, included_ranges:{:?}",
 711                            language.name(),
 712                            LogAnchorRange(&step.range, text),
 713                            LogIncludedRanges(&included_ranges),
 714                        );
 715
 716                        let result = parse_text(
 717                            grammar,
 718                            text.as_rope(),
 719                            step_start_byte,
 720                            &included_ranges,
 721                            None,
 722                        );
 723                        match result {
 724                            Ok(t) => tree = t,
 725                            Err(e) => {
 726                                log::error!("error parsing text: {:?}", e);
 727                                continue;
 728                            }
 729                        };
 730                        changed_ranges = vec![step_start_byte..step_end_byte];
 731                    }
 732
 733                    if let (Some((config, registry)), false) = (
 734                        grammar.injection_config.as_ref().zip(registry.as_ref()),
 735                        changed_ranges.is_empty(),
 736                    ) {
 737                        for range in &changed_ranges {
 738                            changed_regions.insert(
 739                                ChangedRegion {
 740                                    depth: step.depth + 1,
 741                                    range: text.anchor_before(range.start)
 742                                        ..text.anchor_after(range.end),
 743                                },
 744                                text,
 745                            );
 746                        }
 747                        get_injections(
 748                            config,
 749                            text,
 750                            step.range.clone(),
 751                            tree.root_node_with_offset(
 752                                step_start_byte,
 753                                step_start_point.to_ts_point(),
 754                            ),
 755                            registry,
 756                            step.depth + 1,
 757                            &changed_ranges,
 758                            &mut combined_injection_ranges,
 759                            &mut queue,
 760                        );
 761                    }
 762
 763                    let included_sub_ranges: Option<Vec<Range<Anchor>>> = if is_combined {
 764                        Some(
 765                            included_ranges
 766                                .into_iter()
 767                                .filter(|r| r.start_byte < r.end_byte)
 768                                .map(|r| {
 769                                    text.anchor_before(r.start_byte + step_start_byte)
 770                                        ..text.anchor_after(r.end_byte + step_start_byte)
 771                                })
 772                                .collect(),
 773                        )
 774                    } else {
 775                        None
 776                    };
 777                    SyntaxLayerContent::Parsed {
 778                        tree,
 779                        language,
 780                        included_sub_ranges,
 781                    }
 782                }
 783                ParseStepLanguage::Pending { name } => SyntaxLayerContent::Pending {
 784                    language_name: name,
 785                },
 786            };
 787
 788            layers.push(
 789                SyntaxLayerEntry {
 790                    depth: step.depth,
 791                    range: step.range,
 792                    content,
 793                },
 794                text,
 795            );
 796        }
 797
 798        drop(cursor);
 799        self.layers = layers;
 800        self.interpolated_version = text.version.clone();
 801        self.parsed_version = text.version.clone();
 802        #[cfg(debug_assertions)]
 803        self.check_invariants(text);
 804    }
 805
 806    #[cfg(debug_assertions)]
 807    fn check_invariants(&self, text: &BufferSnapshot) {
 808        let mut max_depth = 0;
 809        let mut prev_layer: Option<(Range<Anchor>, Option<LanguageId>)> = None;
 810        for layer in self.layers.iter() {
 811            match Ord::cmp(&layer.depth, &max_depth) {
 812                Ordering::Less => {
 813                    panic!("layers out of order")
 814                }
 815                Ordering::Equal => {
 816                    if let Some((prev_range, prev_language_id)) = prev_layer {
 817                        match layer.range.start.cmp(&prev_range.start, text) {
 818                            Ordering::Less => panic!("layers out of order"),
 819                            Ordering::Equal => match layer.range.end.cmp(&prev_range.end, text) {
 820                                Ordering::Less => panic!("layers out of order"),
 821                                Ordering::Equal => {
 822                                    if layer.content.language_id() < prev_language_id {
 823                                        panic!("layers out of order")
 824                                    }
 825                                }
 826                                Ordering::Greater => {}
 827                            },
 828                            Ordering::Greater => {}
 829                        }
 830                    }
 831                    prev_layer = Some((layer.range.clone(), layer.content.language_id()));
 832                }
 833                Ordering::Greater => {
 834                    prev_layer = None;
 835                }
 836            }
 837
 838            max_depth = layer.depth;
 839        }
 840    }
 841
 842    pub fn single_tree_captures<'a>(
 843        range: Range<usize>,
 844        text: &'a Rope,
 845        tree: &'a Tree,
 846        language: &'a Arc<Language>,
 847        query: fn(&Grammar) -> Option<&Query>,
 848    ) -> SyntaxMapCaptures<'a> {
 849        SyntaxMapCaptures::new(
 850            range,
 851            text,
 852            [SyntaxLayer {
 853                language,
 854                tree,
 855                included_sub_ranges: None,
 856                depth: 0,
 857                offset: (0, tree_sitter::Point::new(0, 0)),
 858            }]
 859            .into_iter(),
 860            query,
 861        )
 862    }
 863
 864    pub fn captures<'a>(
 865        &'a self,
 866        range: Range<usize>,
 867        buffer: &'a BufferSnapshot,
 868        query: fn(&Grammar) -> Option<&Query>,
 869    ) -> SyntaxMapCaptures<'a> {
 870        SyntaxMapCaptures::new(
 871            range.clone(),
 872            buffer.as_rope(),
 873            self.layers_for_range(range, buffer, true),
 874            query,
 875        )
 876    }
 877
 878    pub fn matches<'a>(
 879        &'a self,
 880        range: Range<usize>,
 881        buffer: &'a BufferSnapshot,
 882        query: fn(&Grammar) -> Option<&Query>,
 883    ) -> SyntaxMapMatches<'a> {
 884        SyntaxMapMatches::new(
 885            range.clone(),
 886            buffer.as_rope(),
 887            self.layers_for_range(range, buffer, true),
 888            query,
 889            TreeSitterOptions::default(),
 890        )
 891    }
 892
 893    pub fn matches_with_options<'a>(
 894        &'a self,
 895        range: Range<usize>,
 896        buffer: &'a BufferSnapshot,
 897        options: TreeSitterOptions,
 898        query: fn(&Grammar) -> Option<&Query>,
 899    ) -> SyntaxMapMatches<'a> {
 900        SyntaxMapMatches::new(
 901            range.clone(),
 902            buffer.as_rope(),
 903            self.layers_for_range(range, buffer, true),
 904            query,
 905            options,
 906        )
 907    }
 908
 909    #[cfg(test)]
 910    pub fn layers<'a>(&'a self, buffer: &'a BufferSnapshot) -> Vec<SyntaxLayer<'a>> {
 911        self.layers_for_range(0..buffer.len(), buffer, true)
 912            .collect()
 913    }
 914
 915    pub fn layers_for_range<'a, T: ToOffset>(
 916        &'a self,
 917        range: Range<T>,
 918        buffer: &'a BufferSnapshot,
 919        include_hidden: bool,
 920    ) -> impl 'a + Iterator<Item = SyntaxLayer<'a>> {
 921        let start_offset = range.start.to_offset(buffer);
 922        let end_offset = range.end.to_offset(buffer);
 923        let start = buffer.anchor_before(start_offset);
 924        let end = buffer.anchor_after(end_offset);
 925
 926        let mut cursor = self.layers.filter::<_, ()>(buffer, move |summary| {
 927            if summary.max_depth > summary.min_depth {
 928                true
 929            } else {
 930                let is_before_start = summary.range.end.cmp(&start, buffer).is_lt();
 931                let is_after_end = summary.range.start.cmp(&end, buffer).is_gt();
 932                !is_before_start && !is_after_end
 933            }
 934        });
 935
 936        cursor.next();
 937        iter::from_fn(move || {
 938            while let Some(layer) = cursor.item() {
 939                let mut info = None;
 940                if let SyntaxLayerContent::Parsed {
 941                    tree,
 942                    language,
 943                    included_sub_ranges,
 944                } = &layer.content
 945                {
 946                    let layer_start_offset = layer.range.start.to_offset(buffer);
 947                    let layer_start_point = layer.range.start.to_point(buffer).to_ts_point();
 948                    if include_hidden || !language.config.hidden {
 949                        info = Some(SyntaxLayer {
 950                            tree,
 951                            language,
 952                            included_sub_ranges: included_sub_ranges.as_deref(),
 953                            depth: layer.depth,
 954                            offset: (layer_start_offset, layer_start_point),
 955                        });
 956                    }
 957                }
 958                cursor.next();
 959                if info.is_some() {
 960                    return info;
 961                }
 962            }
 963            None
 964        })
 965    }
 966
 967    pub fn contains_unknown_injections(&self) -> bool {
 968        self.layers.summary().contains_unknown_injections
 969    }
 970
 971    pub fn language_registry_version(&self) -> usize {
 972        self.language_registry_version
 973    }
 974}
 975
 976impl<'a> SyntaxMapCaptures<'a> {
 977    fn new(
 978        range: Range<usize>,
 979        text: &'a Rope,
 980        layers: impl Iterator<Item = SyntaxLayer<'a>>,
 981        query: fn(&Grammar) -> Option<&Query>,
 982    ) -> Self {
 983        let mut result = Self {
 984            layers: Vec::new(),
 985            grammars: Vec::new(),
 986            active_layer_count: 0,
 987        };
 988        for layer in layers {
 989            let grammar = match &layer.language.grammar {
 990                Some(grammar) => grammar,
 991                None => continue,
 992            };
 993            let query = match query(grammar) {
 994                Some(query) => query,
 995                None => continue,
 996            };
 997
 998            let mut query_cursor = QueryCursorHandle::new();
 999
1000            // TODO - add a Tree-sitter API to remove the need for this.
1001            let cursor = unsafe {
1002                std::mem::transmute::<&mut tree_sitter::QueryCursor, &'static mut QueryCursor>(
1003                    query_cursor.deref_mut(),
1004                )
1005            };
1006
1007            cursor.set_byte_range(range.clone());
1008            let captures = cursor.captures(query, layer.node(), TextProvider(text));
1009            let grammar_index = result
1010                .grammars
1011                .iter()
1012                .position(|g| g.id == grammar.id())
1013                .unwrap_or_else(|| {
1014                    result.grammars.push(grammar);
1015                    result.grammars.len() - 1
1016                });
1017            let mut layer = SyntaxMapCapturesLayer {
1018                depth: layer.depth,
1019                grammar_index,
1020                next_capture: None,
1021                captures,
1022                _query_cursor: query_cursor,
1023            };
1024
1025            layer.advance();
1026            if layer.next_capture.is_some() {
1027                let key = layer.sort_key();
1028                let ix = match result.layers[..result.active_layer_count]
1029                    .binary_search_by_key(&key, |layer| layer.sort_key())
1030                {
1031                    Ok(ix) | Err(ix) => ix,
1032                };
1033                result.layers.insert(ix, layer);
1034                result.active_layer_count += 1;
1035            } else {
1036                result.layers.push(layer);
1037            }
1038        }
1039
1040        result
1041    }
1042
1043    pub fn grammars(&self) -> &[&'a Grammar] {
1044        &self.grammars
1045    }
1046
1047    pub fn peek(&self) -> Option<SyntaxMapCapture<'a>> {
1048        let layer = self.layers[..self.active_layer_count].first()?;
1049        let capture = layer.next_capture?;
1050        Some(SyntaxMapCapture {
1051            grammar_index: layer.grammar_index,
1052            index: capture.index,
1053            node: capture.node,
1054        })
1055    }
1056
1057    pub fn advance(&mut self) -> bool {
1058        let layer = if let Some(layer) = self.layers[..self.active_layer_count].first_mut() {
1059            layer
1060        } else {
1061            return false;
1062        };
1063
1064        layer.advance();
1065        if layer.next_capture.is_some() {
1066            let key = layer.sort_key();
1067            let i = 1 + self.layers[1..self.active_layer_count]
1068                .iter()
1069                .position(|later_layer| key < later_layer.sort_key())
1070                .unwrap_or(self.active_layer_count - 1);
1071            self.layers[0..i].rotate_left(1);
1072        } else {
1073            self.layers[0..self.active_layer_count].rotate_left(1);
1074            self.active_layer_count -= 1;
1075        }
1076
1077        true
1078    }
1079
1080    pub fn set_byte_range(&mut self, range: Range<usize>) {
1081        for layer in &mut self.layers {
1082            layer.captures.set_byte_range(range.clone());
1083            if let Some(capture) = &layer.next_capture
1084                && capture.node.end_byte() > range.start
1085            {
1086                continue;
1087            }
1088            layer.advance();
1089        }
1090        self.layers.sort_unstable_by_key(|layer| layer.sort_key());
1091        self.active_layer_count = self
1092            .layers
1093            .iter()
1094            .position(|layer| layer.next_capture.is_none())
1095            .unwrap_or(self.layers.len());
1096    }
1097}
1098
1099#[derive(Default)]
1100pub struct TreeSitterOptions {
1101    pub max_start_depth: Option<u32>,
1102    pub max_bytes_to_query: Option<usize>,
1103}
1104
1105impl TreeSitterOptions {
1106    pub fn max_start_depth(max_start_depth: u32) -> Self {
1107        Self {
1108            max_start_depth: Some(max_start_depth),
1109            max_bytes_to_query: None,
1110        }
1111    }
1112}
1113
1114impl<'a> SyntaxMapMatches<'a> {
1115    fn new(
1116        range: Range<usize>,
1117        text: &'a Rope,
1118        layers: impl Iterator<Item = SyntaxLayer<'a>>,
1119        query: fn(&Grammar) -> Option<&Query>,
1120        options: TreeSitterOptions,
1121    ) -> Self {
1122        let mut result = Self::default();
1123        for layer in layers {
1124            let grammar = match &layer.language.grammar {
1125                Some(grammar) => grammar,
1126                None => continue,
1127            };
1128            let query = match query(grammar) {
1129                Some(query) => query,
1130                None => continue,
1131            };
1132
1133            let mut query_cursor = QueryCursorHandle::new();
1134
1135            // TODO - add a Tree-sitter API to remove the need for this.
1136            let cursor = unsafe {
1137                std::mem::transmute::<&mut tree_sitter::QueryCursor, &'static mut QueryCursor>(
1138                    query_cursor.deref_mut(),
1139                )
1140            };
1141            cursor.set_max_start_depth(options.max_start_depth);
1142
1143            if let Some(max_bytes_to_query) = options.max_bytes_to_query {
1144                let midpoint = (range.start + range.end) / 2;
1145                let containing_range_start = midpoint.saturating_sub(max_bytes_to_query / 2);
1146                let containing_range_end =
1147                    containing_range_start.saturating_add(max_bytes_to_query);
1148                cursor.set_containing_byte_range(containing_range_start..containing_range_end);
1149            }
1150
1151            cursor.set_byte_range(range.clone());
1152            let matches = cursor.matches(query, layer.node(), TextProvider(text));
1153            let grammar_index = result
1154                .grammars
1155                .iter()
1156                .position(|g| g.id == grammar.id())
1157                .unwrap_or_else(|| {
1158                    result.grammars.push(grammar);
1159                    result.grammars.len() - 1
1160                });
1161            let mut layer = SyntaxMapMatchesLayer {
1162                language: layer.language.clone(),
1163                depth: layer.depth,
1164                grammar_index,
1165                matches,
1166                next_pattern_index: 0,
1167                next_captures: Vec::new(),
1168                has_next: false,
1169                _query_cursor: query_cursor,
1170            };
1171
1172            layer.advance();
1173            if layer.has_next {
1174                let key = layer.sort_key();
1175                let ix = match result.layers[..result.active_layer_count]
1176                    .binary_search_by_key(&key, |layer| layer.sort_key())
1177                {
1178                    Ok(ix) | Err(ix) => ix,
1179                };
1180                result.layers.insert(ix, layer);
1181                result.active_layer_count += 1;
1182            } else {
1183                result.layers.push(layer);
1184            }
1185        }
1186        result
1187    }
1188
1189    pub fn grammars(&self) -> &[&'a Grammar] {
1190        &self.grammars
1191    }
1192
1193    pub fn peek(&self) -> Option<SyntaxMapMatch<'_>> {
1194        let layer = self.layers.first()?;
1195
1196        if !layer.has_next {
1197            return None;
1198        }
1199
1200        Some(SyntaxMapMatch {
1201            language: layer.language.clone(),
1202            depth: layer.depth,
1203            grammar_index: layer.grammar_index,
1204            pattern_index: layer.next_pattern_index,
1205            captures: &layer.next_captures,
1206        })
1207    }
1208
1209    pub fn advance(&mut self) -> bool {
1210        let layer = if let Some(layer) = self.layers.first_mut() {
1211            layer
1212        } else {
1213            return false;
1214        };
1215
1216        layer.advance();
1217        if layer.has_next {
1218            let key = layer.sort_key();
1219            let i = 1 + self.layers[1..self.active_layer_count]
1220                .iter()
1221                .position(|later_layer| key < later_layer.sort_key())
1222                .unwrap_or(self.active_layer_count - 1);
1223            self.layers[0..i].rotate_left(1);
1224        } else if self.active_layer_count != 0 {
1225            self.layers[0..self.active_layer_count].rotate_left(1);
1226            self.active_layer_count -= 1;
1227        }
1228
1229        true
1230    }
1231
1232    // pub fn set_byte_range(&mut self, range: Range<usize>) {
1233    //     for layer in &mut self.layers {
1234    //         layer.matches.set_byte_range(range.clone());
1235    //         layer.advance();
1236    //     }
1237    //     self.layers.sort_unstable_by_key(|layer| layer.sort_key());
1238    //     self.active_layer_count = self
1239    //         .layers
1240    //         .iter()
1241    //         .position(|layer| !layer.has_next)
1242    //         .unwrap_or(self.layers.len());
1243    // }
1244}
1245
1246impl SyntaxMapCapturesLayer<'_> {
1247    fn advance(&mut self) {
1248        self.next_capture = self.captures.next().map(|(mat, ix)| mat.captures[*ix]);
1249    }
1250
1251    fn sort_key(&self) -> (usize, Reverse<usize>, usize) {
1252        if let Some(capture) = &self.next_capture {
1253            let range = capture.node.byte_range();
1254            (range.start, Reverse(range.end), self.depth)
1255        } else {
1256            (usize::MAX, Reverse(0), usize::MAX)
1257        }
1258    }
1259}
1260
1261impl SyntaxMapMatchesLayer<'_> {
1262    fn advance(&mut self) {
1263        if let Some(mat) = self.matches.next() {
1264            self.next_captures.clear();
1265            self.next_captures.extend_from_slice(mat.captures);
1266            self.next_pattern_index = mat.pattern_index;
1267            self.has_next = true;
1268        } else {
1269            self.has_next = false;
1270        }
1271    }
1272
1273    fn sort_key(&self) -> (usize, Reverse<usize>, usize) {
1274        if self.has_next {
1275            let captures = &self.next_captures;
1276            if let Some((first, last)) = captures.first().zip(captures.last()) {
1277                return (
1278                    first.node.start_byte(),
1279                    Reverse(last.node.end_byte()),
1280                    self.depth,
1281                );
1282            }
1283        }
1284        (usize::MAX, Reverse(0), usize::MAX)
1285    }
1286}
1287
1288impl<'a> Iterator for SyntaxMapCaptures<'a> {
1289    type Item = SyntaxMapCapture<'a>;
1290
1291    fn next(&mut self) -> Option<Self::Item> {
1292        let result = self.peek();
1293        self.advance();
1294        result
1295    }
1296}
1297
1298fn join_ranges(
1299    a: impl Iterator<Item = Range<usize>>,
1300    b: impl Iterator<Item = Range<usize>>,
1301) -> Vec<Range<usize>> {
1302    let mut result = Vec::<Range<usize>>::new();
1303    let mut a = a.peekable();
1304    let mut b = b.peekable();
1305    loop {
1306        let range = match (a.peek(), b.peek()) {
1307            (Some(range_a), Some(range_b)) => {
1308                if range_a.start < range_b.start {
1309                    a.next().unwrap()
1310                } else {
1311                    b.next().unwrap()
1312                }
1313            }
1314            (None, Some(_)) => b.next().unwrap(),
1315            (Some(_), None) => a.next().unwrap(),
1316            (None, None) => break,
1317        };
1318
1319        if let Some(last) = result.last_mut()
1320            && range.start <= last.end
1321        {
1322            last.end = last.end.max(range.end);
1323            continue;
1324        }
1325        result.push(range);
1326    }
1327    result
1328}
1329
1330fn parse_text(
1331    grammar: &Grammar,
1332    text: &Rope,
1333    start_byte: usize,
1334    ranges: &[tree_sitter::Range],
1335    old_tree: Option<Tree>,
1336) -> anyhow::Result<Tree> {
1337    with_parser(|parser| {
1338        let mut chunks = text.chunks_in_range(start_byte..text.len());
1339        parser.set_included_ranges(ranges)?;
1340        parser.set_language(&grammar.ts_language)?;
1341        parser
1342            .parse_with_options(
1343                &mut move |offset, _| {
1344                    chunks.seek(start_byte + offset);
1345                    chunks.next().unwrap_or("").as_bytes()
1346                },
1347                old_tree.as_ref(),
1348                None,
1349            )
1350            .context("failed to parse")
1351    })
1352}
1353
1354fn get_injections(
1355    config: &InjectionConfig,
1356    text: &BufferSnapshot,
1357    outer_range: Range<Anchor>,
1358    node: Node,
1359    language_registry: &Arc<LanguageRegistry>,
1360    depth: usize,
1361    changed_ranges: &[Range<usize>],
1362    combined_injection_ranges: &mut HashMap<LanguageId, (Arc<Language>, Vec<tree_sitter::Range>)>,
1363    queue: &mut BinaryHeap<ParseStep>,
1364) {
1365    let mut query_cursor = QueryCursorHandle::new();
1366    let mut prev_match = None;
1367
1368    // Ensure that a `ParseStep` is created for every combined injection language, even
1369    // if there currently no matches for that injection.
1370    combined_injection_ranges.clear();
1371    for pattern in &config.patterns {
1372        if let (Some(language_name), true) = (pattern.language.as_ref(), pattern.combined)
1373            && let Some(language) = language_registry
1374                .language_for_name_or_extension(language_name)
1375                .now_or_never()
1376                .and_then(|language| language.ok())
1377        {
1378            combined_injection_ranges.insert(language.id, (language, Vec::new()));
1379        }
1380    }
1381
1382    for query_range in changed_ranges {
1383        query_cursor.set_byte_range(query_range.start.saturating_sub(1)..query_range.end + 1);
1384        let mut matches = query_cursor.matches(&config.query, node, TextProvider(text.as_rope()));
1385        while let Some(mat) = matches.next() {
1386            let content_ranges = mat
1387                .nodes_for_capture_index(config.content_capture_ix)
1388                .map(|node| node.range())
1389                .collect::<Vec<_>>();
1390            if content_ranges.is_empty() {
1391                continue;
1392            }
1393
1394            let content_range =
1395                content_ranges.first().unwrap().start_byte..content_ranges.last().unwrap().end_byte;
1396
1397            // Avoid duplicate matches if two changed ranges intersect the same injection.
1398            if let Some((prev_pattern_ix, prev_range)) = &prev_match
1399                && mat.pattern_index == *prev_pattern_ix
1400                && content_range == *prev_range
1401            {
1402                continue;
1403            }
1404
1405            prev_match = Some((mat.pattern_index, content_range.clone()));
1406            let combined = config.patterns[mat.pattern_index].combined;
1407
1408            let mut step_range = content_range.clone();
1409            let language_name =
1410                if let Some(name) = config.patterns[mat.pattern_index].language.as_ref() {
1411                    Some(Cow::Borrowed(name.as_ref()))
1412                } else if let Some(language_node) = config
1413                    .language_capture_ix
1414                    .and_then(|ix| mat.nodes_for_capture_index(ix).next())
1415                {
1416                    step_range.start = cmp::min(content_range.start, language_node.start_byte());
1417                    step_range.end = cmp::max(content_range.end, language_node.end_byte());
1418                    let language_name: String =
1419                        text.text_for_range(language_node.byte_range()).collect();
1420
1421                    // Enable paths ending in a language extension to represent a language name: e.g. "foo/bar/baz.rs"
1422                    if let Some(last_dot_pos) = language_name.rfind('.') {
1423                        Some(Cow::Owned(language_name[last_dot_pos + 1..].to_string()))
1424                    } else {
1425                        Some(Cow::Owned(language_name))
1426                    }
1427                } else {
1428                    None
1429                };
1430
1431            if let Some(language_name) = language_name {
1432                let language = language_registry
1433                    .language_for_name_or_extension(&language_name)
1434                    .now_or_never()
1435                    .and_then(|language| language.ok());
1436                let range = text.anchor_before(step_range.start)..text.anchor_after(step_range.end);
1437                if let Some(language) = language {
1438                    if combined {
1439                        combined_injection_ranges
1440                            .entry(language.id)
1441                            .or_insert_with(|| (language.clone(), vec![]))
1442                            .1
1443                            .extend(content_ranges);
1444                    } else {
1445                        queue.push(ParseStep {
1446                            depth,
1447                            language: ParseStepLanguage::Loaded { language },
1448                            included_ranges: content_ranges,
1449                            range,
1450                            mode: ParseMode::Single,
1451                        });
1452                    }
1453                } else {
1454                    queue.push(ParseStep {
1455                        depth,
1456                        language: ParseStepLanguage::Pending {
1457                            name: language_name.into(),
1458                        },
1459                        included_ranges: content_ranges,
1460                        range,
1461                        mode: ParseMode::Single,
1462                    });
1463                }
1464            }
1465        }
1466    }
1467
1468    for (_, (language, mut included_ranges)) in combined_injection_ranges.drain() {
1469        included_ranges.sort_unstable_by(|a, b| {
1470            Ord::cmp(&a.start_byte, &b.start_byte).then_with(|| Ord::cmp(&a.end_byte, &b.end_byte))
1471        });
1472        queue.push(ParseStep {
1473            depth,
1474            language: ParseStepLanguage::Loaded { language },
1475            range: outer_range.clone(),
1476            included_ranges,
1477            mode: ParseMode::Combined {
1478                parent_layer_range: node.start_byte()..node.end_byte(),
1479                parent_layer_changed_ranges: changed_ranges.to_vec(),
1480            },
1481        })
1482    }
1483}
1484
1485/// Updates the given list of included `ranges`, removing any ranges that intersect
1486/// `removed_ranges`, and inserting the given `new_ranges`.
1487///
1488/// Returns a new vector of ranges, and the range of the vector that was changed,
1489/// from the previous `ranges` vector.
1490pub(crate) fn splice_included_ranges(
1491    mut ranges: Vec<tree_sitter::Range>,
1492    removed_ranges: &[Range<usize>],
1493    new_ranges: &[tree_sitter::Range],
1494) -> (Vec<tree_sitter::Range>, Range<usize>) {
1495    let mut removed_ranges = removed_ranges.iter().cloned().peekable();
1496    let mut new_ranges = new_ranges.iter().cloned().peekable();
1497    let mut ranges_ix = 0;
1498    let mut changed_portion: Option<Range<usize>> = None;
1499    loop {
1500        let next_new_range = new_ranges.peek();
1501        let next_removed_range = removed_ranges.peek();
1502
1503        let (remove, insert) = match (next_removed_range, next_new_range) {
1504            (None, None) => break,
1505            (Some(_), None) => (removed_ranges.next().unwrap(), None),
1506            (Some(next_removed_range), Some(next_new_range)) => {
1507                if next_removed_range.end < next_new_range.start_byte {
1508                    (removed_ranges.next().unwrap(), None)
1509                } else {
1510                    let mut start = next_new_range.start_byte;
1511                    let mut end = next_new_range.end_byte;
1512
1513                    while let Some(next_removed_range) = removed_ranges.peek() {
1514                        if next_removed_range.start > next_new_range.end_byte {
1515                            break;
1516                        }
1517                        let next_removed_range = removed_ranges.next().unwrap();
1518                        start = cmp::min(start, next_removed_range.start);
1519                        end = cmp::max(end, next_removed_range.end);
1520                    }
1521
1522                    (start..end, Some(new_ranges.next().unwrap()))
1523                }
1524            }
1525            (None, Some(next_new_range)) => (
1526                next_new_range.start_byte..next_new_range.end_byte,
1527                Some(new_ranges.next().unwrap()),
1528            ),
1529        };
1530
1531        let mut start_ix = ranges_ix
1532            + match ranges[ranges_ix..].binary_search_by_key(&remove.start, |r| r.end_byte) {
1533                Ok(ix) => ix,
1534                Err(ix) => ix,
1535            };
1536        let mut end_ix = ranges_ix
1537            + match ranges[ranges_ix..].binary_search_by_key(&remove.end, |r| r.start_byte) {
1538                Ok(ix) => ix + 1,
1539                Err(ix) => ix,
1540            };
1541
1542        // If there are empty ranges, then there may be multiple ranges with the same
1543        // start or end. Expand the splice to include any adjacent ranges that touch
1544        // the changed range.
1545        while start_ix > 0 {
1546            if ranges[start_ix - 1].end_byte == remove.start {
1547                start_ix -= 1;
1548            } else {
1549                break;
1550            }
1551        }
1552        while let Some(range) = ranges.get(end_ix) {
1553            if range.start_byte == remove.end {
1554                end_ix += 1;
1555            } else {
1556                break;
1557            }
1558        }
1559        let changed_start = changed_portion
1560            .as_ref()
1561            .map_or(usize::MAX, |range| range.start)
1562            .min(start_ix);
1563        let changed_end =
1564            changed_portion
1565                .as_ref()
1566                .map_or(0, |range| range.end)
1567                .max(if insert.is_some() {
1568                    start_ix + 1
1569                } else {
1570                    start_ix
1571                });
1572        changed_portion = Some(changed_start..changed_end);
1573
1574        ranges.splice(start_ix..end_ix, insert);
1575        ranges_ix = start_ix;
1576    }
1577
1578    (ranges, changed_portion.unwrap_or(0..0))
1579}
1580
1581/// Ensure there are newline ranges in between content range that appear on
1582/// different lines. For performance, only iterate through the given range of
1583/// indices. All of the ranges in the array are relative to a given start byte
1584/// and point.
1585fn insert_newlines_between_ranges(
1586    indices: Range<usize>,
1587    ranges: &mut Vec<tree_sitter::Range>,
1588    text: &text::BufferSnapshot,
1589    start_byte: usize,
1590    start_point: Point,
1591) {
1592    let mut ix = indices.end + 1;
1593    while ix > indices.start {
1594        ix -= 1;
1595        if 0 == ix || ix == ranges.len() {
1596            continue;
1597        }
1598
1599        let range_b = ranges[ix];
1600        let range_a = &mut ranges[ix - 1];
1601        if range_a.end_point.column == 0 {
1602            continue;
1603        }
1604
1605        if range_a.end_point.row < range_b.start_point.row {
1606            let end_point = start_point + Point::from_ts_point(range_a.end_point);
1607            let line_end = Point::new(end_point.row, text.line_len(end_point.row));
1608            if end_point.column >= line_end.column {
1609                range_a.end_byte += 1;
1610                range_a.end_point.row += 1;
1611                range_a.end_point.column = 0;
1612            } else {
1613                let newline_offset = text.point_to_offset(line_end);
1614                ranges.insert(
1615                    ix,
1616                    tree_sitter::Range {
1617                        start_byte: newline_offset - start_byte,
1618                        end_byte: newline_offset - start_byte + 1,
1619                        start_point: (line_end - start_point).to_ts_point(),
1620                        end_point: ((line_end - start_point) + Point::new(1, 0)).to_ts_point(),
1621                    },
1622                )
1623            }
1624        }
1625    }
1626}
1627
1628impl OwnedSyntaxLayer {
1629    /// Returns the root syntax node for this layer.
1630    pub fn node(&self) -> Node<'_> {
1631        self.tree
1632            .root_node_with_offset(self.offset.0, self.offset.1)
1633    }
1634}
1635
1636impl<'a> SyntaxLayer<'a> {
1637    /// Returns an owned version of this layer.
1638    pub fn to_owned(&self) -> OwnedSyntaxLayer {
1639        OwnedSyntaxLayer {
1640            tree: self.tree.clone(),
1641            offset: self.offset,
1642            language: self.language.clone(),
1643        }
1644    }
1645
1646    /// Returns the root node for this layer.
1647    pub fn node(&self) -> Node<'a> {
1648        self.tree
1649            .root_node_with_offset(self.offset.0, self.offset.1)
1650    }
1651
1652    pub(crate) fn override_id(&self, offset: usize, text: &text::BufferSnapshot) -> Option<u32> {
1653        let text = TextProvider(text.as_rope());
1654        let config = self.language.grammar.as_ref()?.override_config.as_ref()?;
1655
1656        let mut query_cursor = QueryCursorHandle::new();
1657        query_cursor.set_byte_range(offset.saturating_sub(1)..offset.saturating_add(1));
1658        query_cursor.set_containing_byte_range(
1659            offset.saturating_sub(MAX_BYTES_TO_QUERY / 2)
1660                ..offset.saturating_add(MAX_BYTES_TO_QUERY / 2),
1661        );
1662
1663        let mut smallest_match: Option<(u32, Range<usize>)> = None;
1664        let mut matches = query_cursor.matches(&config.query, self.node(), text);
1665        while let Some(mat) = matches.next() {
1666            for capture in mat.captures {
1667                let Some(override_entry) = config.values.get(&capture.index) else {
1668                    continue;
1669                };
1670
1671                let range = capture.node.byte_range();
1672                if override_entry.range_is_inclusive {
1673                    if offset < range.start || offset > range.end {
1674                        continue;
1675                    }
1676                } else if offset <= range.start || offset >= range.end {
1677                    continue;
1678                }
1679
1680                if let Some((_, smallest_range)) = &smallest_match {
1681                    if range.len() < smallest_range.len() {
1682                        smallest_match = Some((capture.index, range))
1683                    }
1684                    continue;
1685                }
1686
1687                smallest_match = Some((capture.index, range));
1688            }
1689        }
1690
1691        smallest_match.map(|(index, _)| index)
1692    }
1693}
1694
1695impl std::ops::Deref for SyntaxMap {
1696    type Target = SyntaxSnapshot;
1697
1698    fn deref(&self) -> &Self::Target {
1699        &self.snapshot
1700    }
1701}
1702
1703impl PartialEq for ParseStep {
1704    fn eq(&self, _: &Self) -> bool {
1705        false
1706    }
1707}
1708
1709impl Eq for ParseStep {}
1710
1711impl PartialOrd for ParseStep {
1712    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1713        Some(self.cmp(other))
1714    }
1715}
1716
1717impl Ord for ParseStep {
1718    fn cmp(&self, other: &Self) -> Ordering {
1719        let range_a = self.range();
1720        let range_b = other.range();
1721        Ord::cmp(&other.depth, &self.depth)
1722            .then_with(|| Ord::cmp(&range_b.start, &range_a.start))
1723            .then_with(|| Ord::cmp(&range_a.end, &range_b.end))
1724            .then_with(|| other.language.id().cmp(&self.language.id()))
1725    }
1726}
1727
1728impl ParseStep {
1729    fn range(&self) -> Range<usize> {
1730        if let ParseMode::Combined {
1731            parent_layer_range, ..
1732        } = &self.mode
1733        {
1734            parent_layer_range.clone()
1735        } else {
1736            let start = self.included_ranges.first().map_or(0, |r| r.start_byte);
1737            let end = self.included_ranges.last().map_or(0, |r| r.end_byte);
1738            start..end
1739        }
1740    }
1741}
1742
1743impl ChangedRegion {
1744    fn cmp(&self, other: &Self, buffer: &BufferSnapshot) -> Ordering {
1745        let range_a = &self.range;
1746        let range_b = &other.range;
1747        Ord::cmp(&self.depth, &other.depth)
1748            .then_with(|| range_a.start.cmp(&range_b.start, buffer))
1749            .then_with(|| range_b.end.cmp(&range_a.end, buffer))
1750    }
1751}
1752
1753impl ChangeRegionSet {
1754    fn start_position(&self) -> ChangeStartPosition {
1755        self.0.first().map_or(
1756            ChangeStartPosition {
1757                depth: usize::MAX,
1758                position: Anchor::MAX,
1759            },
1760            |region| ChangeStartPosition {
1761                depth: region.depth,
1762                position: region.range.start,
1763            },
1764        )
1765    }
1766
1767    fn intersects(&self, layer: &SyntaxLayerEntry, text: &BufferSnapshot) -> bool {
1768        for region in &self.0 {
1769            if region.depth < layer.depth {
1770                continue;
1771            }
1772            if region.depth > layer.depth {
1773                break;
1774            }
1775            if region.range.end.cmp(&layer.range.start, text).is_le() {
1776                continue;
1777            }
1778            if region.range.start.cmp(&layer.range.end, text).is_ge() {
1779                break;
1780            }
1781            return true;
1782        }
1783        false
1784    }
1785
1786    fn insert(&mut self, region: ChangedRegion, text: &BufferSnapshot) {
1787        if let Err(ix) = self.0.binary_search_by(|probe| probe.cmp(&region, text)) {
1788            self.0.insert(ix, region);
1789        }
1790    }
1791
1792    fn prune(&mut self, summary: SyntaxLayerSummary, text: &BufferSnapshot) -> bool {
1793        let prev_len = self.0.len();
1794        self.0.retain(|region| {
1795            region.depth > summary.max_depth
1796                || (region.depth == summary.max_depth
1797                    && region
1798                        .range
1799                        .end
1800                        .cmp(&summary.last_layer_range.start, text)
1801                        .is_gt())
1802        });
1803        self.0.len() < prev_len
1804    }
1805}
1806
1807impl Default for SyntaxLayerSummary {
1808    fn default() -> Self {
1809        Self {
1810            max_depth: 0,
1811            min_depth: 0,
1812            range: Anchor::MAX..Anchor::MIN,
1813            last_layer_range: Anchor::MIN..Anchor::MAX,
1814            last_layer_language: None,
1815            contains_unknown_injections: false,
1816        }
1817    }
1818}
1819
1820impl sum_tree::Summary for SyntaxLayerSummary {
1821    type Context<'a> = &'a BufferSnapshot;
1822
1823    fn zero(_cx: &BufferSnapshot) -> Self {
1824        Default::default()
1825    }
1826
1827    fn add_summary(&mut self, other: &Self, buffer: Self::Context<'_>) {
1828        if other.max_depth > self.max_depth {
1829            self.max_depth = other.max_depth;
1830            self.range = other.range.clone();
1831        } else {
1832            if self.range == (Anchor::MAX..Anchor::MAX) {
1833                self.range.start = other.range.start;
1834            }
1835            if other.range.end.cmp(&self.range.end, buffer).is_gt() {
1836                self.range.end = other.range.end;
1837            }
1838        }
1839        self.last_layer_range = other.last_layer_range.clone();
1840        self.last_layer_language = other.last_layer_language;
1841        self.contains_unknown_injections |= other.contains_unknown_injections;
1842    }
1843}
1844
1845impl SeekTarget<'_, SyntaxLayerSummary, SyntaxLayerSummary> for SyntaxLayerPosition {
1846    fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering {
1847        Ord::cmp(&self.depth, &cursor_location.max_depth)
1848            .then_with(|| {
1849                self.range
1850                    .start
1851                    .cmp(&cursor_location.last_layer_range.start, buffer)
1852            })
1853            .then_with(|| {
1854                cursor_location
1855                    .last_layer_range
1856                    .end
1857                    .cmp(&self.range.end, buffer)
1858            })
1859            .then_with(|| self.language.cmp(&cursor_location.last_layer_language))
1860    }
1861}
1862
1863impl SeekTarget<'_, SyntaxLayerSummary, SyntaxLayerSummary> for ChangeStartPosition {
1864    fn cmp(&self, cursor_location: &SyntaxLayerSummary, text: &BufferSnapshot) -> Ordering {
1865        Ord::cmp(&self.depth, &cursor_location.max_depth)
1866            .then_with(|| self.position.cmp(&cursor_location.range.end, text))
1867    }
1868}
1869
1870impl SeekTarget<'_, SyntaxLayerSummary, SyntaxLayerSummary> for SyntaxLayerPositionBeforeChange {
1871    fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering {
1872        if self.change.cmp(cursor_location, buffer).is_le() {
1873            Ordering::Less
1874        } else {
1875            self.position.cmp(cursor_location, buffer)
1876        }
1877    }
1878}
1879
1880impl sum_tree::Item for SyntaxLayerEntry {
1881    type Summary = SyntaxLayerSummary;
1882
1883    fn summary(&self, _cx: &BufferSnapshot) -> Self::Summary {
1884        SyntaxLayerSummary {
1885            min_depth: self.depth,
1886            max_depth: self.depth,
1887            range: self.range.clone(),
1888            last_layer_range: self.range.clone(),
1889            last_layer_language: self.content.language_id(),
1890            contains_unknown_injections: matches!(self.content, SyntaxLayerContent::Pending { .. }),
1891        }
1892    }
1893}
1894
1895impl std::fmt::Debug for SyntaxLayerEntry {
1896    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1897        f.debug_struct("SyntaxLayer")
1898            .field("depth", &self.depth)
1899            .field("range", &self.range)
1900            .field("tree", &self.content.tree())
1901            .finish()
1902    }
1903}
1904
1905impl<'a> tree_sitter::TextProvider<&'a [u8]> for TextProvider<'a> {
1906    type I = ByteChunks<'a>;
1907
1908    fn text(&mut self, node: tree_sitter::Node) -> Self::I {
1909        ByteChunks(self.0.chunks_in_range(node.byte_range()))
1910    }
1911}
1912
1913impl<'a> Iterator for ByteChunks<'a> {
1914    type Item = &'a [u8];
1915
1916    fn next(&mut self) -> Option<Self::Item> {
1917        self.0.next().map(str::as_bytes)
1918    }
1919}
1920
1921impl QueryCursorHandle {
1922    pub fn new() -> Self {
1923        let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_default();
1924        cursor.set_match_limit(64);
1925        QueryCursorHandle(Some(cursor))
1926    }
1927}
1928
1929impl Deref for QueryCursorHandle {
1930    type Target = QueryCursor;
1931
1932    fn deref(&self) -> &Self::Target {
1933        self.0.as_ref().unwrap()
1934    }
1935}
1936
1937impl DerefMut for QueryCursorHandle {
1938    fn deref_mut(&mut self) -> &mut Self::Target {
1939        self.0.as_mut().unwrap()
1940    }
1941}
1942
1943impl Drop for QueryCursorHandle {
1944    fn drop(&mut self) {
1945        let mut cursor = self.0.take().unwrap();
1946        cursor.set_byte_range(0..usize::MAX);
1947        cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point());
1948        cursor.set_containing_byte_range(0..usize::MAX);
1949        cursor.set_containing_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point());
1950        QUERY_CURSORS.lock().push(cursor)
1951    }
1952}
1953
1954pub trait ToTreeSitterPoint {
1955    fn to_ts_point(self) -> tree_sitter::Point;
1956    fn from_ts_point(point: tree_sitter::Point) -> Self;
1957}
1958
1959impl ToTreeSitterPoint for Point {
1960    fn to_ts_point(self) -> tree_sitter::Point {
1961        tree_sitter::Point::new(self.row as usize, self.column as usize)
1962    }
1963
1964    fn from_ts_point(point: tree_sitter::Point) -> Self {
1965        Point::new(point.row as u32, point.column as u32)
1966    }
1967}
1968
1969struct LogIncludedRanges<'a>(&'a [tree_sitter::Range]);
1970struct LogPoint(Point);
1971struct LogAnchorRange<'a>(&'a Range<Anchor>, &'a text::BufferSnapshot);
1972struct LogOffsetRanges<'a>(&'a [Range<usize>], &'a text::BufferSnapshot);
1973struct LogChangedRegions<'a>(&'a ChangeRegionSet, &'a text::BufferSnapshot);
1974
1975impl fmt::Debug for LogIncludedRanges<'_> {
1976    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1977        f.debug_list()
1978            .entries(self.0.iter().map(|range| {
1979                let start = range.start_point;
1980                let end = range.end_point;
1981                (start.row, start.column)..(end.row, end.column)
1982            }))
1983            .finish()
1984    }
1985}
1986
1987impl fmt::Debug for LogAnchorRange<'_> {
1988    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1989        let range = self.0.to_point(self.1);
1990        (LogPoint(range.start)..LogPoint(range.end)).fmt(f)
1991    }
1992}
1993
1994impl fmt::Debug for LogOffsetRanges<'_> {
1995    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1996        f.debug_list()
1997            .entries(self.0.iter().map(|range| {
1998                LogPoint(range.start.to_point(self.1))..LogPoint(range.end.to_point(self.1))
1999            }))
2000            .finish()
2001    }
2002}
2003
2004impl fmt::Debug for LogChangedRegions<'_> {
2005    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2006        f.debug_list()
2007            .entries(
2008                self.0
2009                    .0
2010                    .iter()
2011                    .map(|region| LogAnchorRange(&region.range, self.1)),
2012            )
2013            .finish()
2014    }
2015}
2016
2017impl fmt::Debug for LogPoint {
2018    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2019        (self.0.row, self.0.column).fmt(f)
2020    }
2021}