From 46e6b2cff47e6aef786ccfe43c850069ea2d797e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 20 May 2021 16:24:29 -0700 Subject: [PATCH] Start maintaining a syntax tree on buffers --- zed/src/editor/buffer/mod.rs | 152 +++++++++++++++++++++++-- zed/src/editor/buffer/point.rs | 18 +++ zed/src/editor/buffer/rope.rs | 6 +- zed/src/editor/buffer_view.rs | 1 + zed/src/editor/display_map/fold_map.rs | 2 +- zed/src/language.rs | 6 +- 6 files changed, 170 insertions(+), 15 deletions(-) diff --git a/zed/src/editor/buffer/mod.rs b/zed/src/editor/buffer/mod.rs index 9fdf4e0c3620ee20993d0db1fb5f2ac562992043..a6461c07b776bdf69ca71a10d94dd4ae1587a25c 100644 --- a/zed/src/editor/buffer/mod.rs +++ b/zed/src/editor/buffer/mod.rs @@ -9,19 +9,21 @@ pub use rope::{ChunksIter, Rope, TextSummary}; use seahash::SeaHasher; pub use selection::*; use similar::{ChangeTag, TextDiff}; +use tree_sitter::{InputEdit, Parser}; use crate::{ editor::Bias, - language::Language, + language::{Language, Tree}, operation_queue::{self, OperationQueue}, sum_tree::{self, FilterCursor, SeekBias, SumTree}, - time::{self, ReplicaId}, + time::{self, Global, ReplicaId}, worktree::FileHandle, }; use anyhow::{anyhow, Result}; use gpui::{AppContext, Entity, ModelContext, Task}; use lazy_static::lazy_static; use std::{ + cell::RefCell, cmp, hash::BuildHasher, iter::{self, Iterator}, @@ -57,6 +59,10 @@ type HashMap = std::collections::HashMap; #[cfg(not(test))] type HashSet = std::collections::HashSet; +thread_local! { + pub static PARSER: RefCell = RefCell::new(Parser::new()); +} + pub struct Buffer { fragments: SumTree, visible_text: Rope, @@ -70,6 +76,8 @@ pub struct Buffer { history: History, file: Option, language: Option>, + tree: Option, + is_parsing: bool, selections: HashMap>, pub selections_last_update: SelectionsVersion, deferred_ops: OperationQueue, @@ -465,7 +473,7 @@ impl Buffer { ); } - Self { + let mut result = Self { visible_text, deleted_text: Rope::new(), fragments, @@ -476,6 +484,8 @@ impl Buffer { undo_map: Default::default(), history, file, + tree: None, + is_parsing: false, language, saved_mtime, selections: HashMap::default(), @@ -485,7 +495,9 @@ impl Buffer { replica_id, local_clock: time::Local::new(replica_id), lamport_clock: time::Lamport::new(replica_id), - } + }; + result.reparse(ctx); + result } pub fn snapshot(&self) -> Rope { @@ -534,6 +546,109 @@ impl Buffer { ctx.emit(Event::Saved); } + fn reparse(&mut self, ctx: &mut ModelContext) { + // Avoid spawning a new parsing task if the buffer is already being reparsed + // due to an earlier edit. + if self.is_parsing { + return; + } + + if let Some(language) = self.language.clone() { + self.is_parsing = true; + let mut old_text = self.visible_text.clone(); + let mut old_tree = self.tree.clone(); + let mut old_version = self.version(); + let mut had_changes = true; + ctx.spawn(|handle, mut ctx| async move { + while had_changes { + // Parse the current text in a background thread. + let (mut tree, text) = ctx + .background_executor() + .spawn({ + let language = language.clone(); + async move { + let tree = Self::parse_text(&old_text, old_tree, &language); + (tree, old_text) + } + }) + .await; + + // When the parsing completes, check if any new changes have occurred since + // this parse began. If so, edit the new tree to reflect these new changes. + let (has_changes, new_text, new_tree, new_version) = + handle.update(&mut ctx, move |this, ctx| { + let mut delta = 0_isize; + let mut has_further_changes = false; + for Edit { + old_range, + new_range, + } in this.edits_since(old_version) + { + let old_len = old_range.end - old_range.start; + let new_len = new_range.end - new_range.start; + let old_start = (old_range.start as isize + delta) as usize; + tree.edit(&InputEdit { + start_byte: old_start, + old_end_byte: old_start + old_len, + new_end_byte: old_start + new_len, + start_position: text.to_point(old_start).into(), + old_end_position: text.to_point(old_start + old_len).into(), + new_end_position: this + .point_for_offset(old_start + new_len) + .unwrap() + .into(), + }); + delta += new_len as isize - old_len as isize; + has_further_changes = true; + } + + this.tree = Some(tree); + ctx.emit(Event::Reparsed); + + // If there were new changes, then continue the loop, spawning a new + // parsing task. Otherwise, record the fact that parsing is complete. + if has_further_changes { + ( + true, + this.visible_text.clone(), + this.tree.clone(), + this.version(), + ) + } else { + this.is_parsing = false; + (false, Rope::new(), None, Global::new()) + } + }); + + had_changes = has_changes; + old_text = new_text; + old_tree = new_tree; + old_version = new_version; + } + }) + .detach(); + } + } + + fn parse_text(text: &Rope, old_tree: Option, language: &Language) -> Tree { + PARSER.with(|parser| { + let mut parser = parser.borrow_mut(); + parser + .set_language(language.grammar) + .expect("incompatible grammar"); + let mut chunks = text.chunks_in_range(0..text.len()); + parser + .parse_with( + &mut move |offset, _| { + chunks.seek(offset); + chunks.next().map(str::as_bytes).unwrap_or(&[]) + }, + old_tree.as_ref(), + ) + .unwrap() + }) + } + fn diff(&self, new_text: Arc, ctx: &AppContext) -> Task { // TODO: it would be nice to not allocate here. let old_text = self.text(); @@ -725,6 +840,7 @@ impl Buffer { if self.edits_since(since).next().is_some() { self.did_edit(was_dirty, ctx); + self.reparse(ctx); } } } @@ -746,17 +862,32 @@ impl Buffer { self.start_transaction_at(None, Instant::now())?; let new_text = new_text.into(); + let old_ranges = old_ranges + .into_iter() + .map(|range| range.start.to_offset(self)..range.end.to_offset(self)) + .collect::>>(); + + if let Some(tree) = self.tree.as_mut() { + let new_extent = TextSummary::from(new_text.as_str()).lines; + for old_range in old_ranges.iter().rev() { + let start_position = self.visible_text.to_point(old_range.start); + tree.edit(&InputEdit { + start_byte: old_range.start, + old_end_byte: old_range.end, + new_end_byte: old_range.start + new_text.len(), + start_position: start_position.into(), + old_end_position: self.visible_text.to_point(old_range.end).into(), + new_end_position: (start_position + new_extent).into(), + }); + } + } + let new_text = if new_text.len() > 0 { Some(new_text) } else { None }; - let old_ranges = old_ranges - .into_iter() - .map(|range| range.start.to_offset(self)..range.end.to_offset(self)) - .collect::>>(); - let has_new_text = new_text.is_some(); let ops = self.splice_fragments( old_ranges @@ -1890,6 +2021,8 @@ impl Clone for Buffer { deferred_ops: self.deferred_ops.clone(), file: self.file.clone(), language: self.language.clone(), + tree: self.tree.clone(), + is_parsing: false, deferred_replicas: self.deferred_replicas.clone(), replica_id: self.replica_id, local_clock: self.local_clock.clone(), @@ -1957,6 +2090,7 @@ pub enum Event { Saved, FileHandleChanged, Reloaded, + Reparsed, } impl Entity for Buffer { diff --git a/zed/src/editor/buffer/point.rs b/zed/src/editor/buffer/point.rs index d4ecc69e0c67ec8942312f355de9f22e32e0d1e8..f1fb81ef105ad4bd8c68a8249713ad1e389b3450 100644 --- a/zed/src/editor/buffer/point.rs +++ b/zed/src/editor/buffer/point.rs @@ -98,3 +98,21 @@ impl Ord for Point { } } } + +impl Into for Point { + fn into(self) -> tree_sitter::Point { + tree_sitter::Point { + row: self.row as usize, + column: self.column as usize, + } + } +} + +impl From for Point { + fn from(point: tree_sitter::Point) -> Self { + Self { + row: point.row as u32, + column: point.column as u32, + } + } +} diff --git a/zed/src/editor/buffer/rope.rs b/zed/src/editor/buffer/rope.rs index 101620f6c269cde2600c2edddb1f2817c1356297..174210c1a9efe8c5c7533bc8234d7753af98b621 100644 --- a/zed/src/editor/buffer/rope.rs +++ b/zed/src/editor/buffer/rope.rs @@ -284,11 +284,13 @@ impl<'a> ChunksIter<'a> { self.range.start.max(*self.chunks.start()) } - pub fn advance_to(&mut self, offset: usize) { + pub fn seek(&mut self, offset: usize) { if offset >= self.chunks.end() { self.chunks.seek_forward(&offset, SeekBias::Right, &()); - self.range.start = offset; + } else { + self.chunks.seek(&offset, SeekBias::Right, &()); } + self.range.start = offset; } pub fn peek(&self) -> Option<&'a str> { diff --git a/zed/src/editor/buffer_view.rs b/zed/src/editor/buffer_view.rs index 5f05defdf63cb112552189833055382b599c172c..4c2469095f1cff79151f941b1cbb472c0d70dc8e 100644 --- a/zed/src/editor/buffer_view.rs +++ b/zed/src/editor/buffer_view.rs @@ -2246,6 +2246,7 @@ impl BufferView { buffer::Event::Saved => ctx.emit(Event::Saved), buffer::Event::FileHandleChanged => ctx.emit(Event::FileHandleChanged), buffer::Event::Reloaded => ctx.emit(Event::FileHandleChanged), + buffer::Event::Reparsed => {} } } } diff --git a/zed/src/editor/display_map/fold_map.rs b/zed/src/editor/display_map/fold_map.rs index 4cfcd8933704892bf8e831df4d03bceea63b2d70..243fabf0a637ac49ce782bd95434d2f634716e39 100644 --- a/zed/src/editor/display_map/fold_map.rs +++ b/zed/src/editor/display_map/fold_map.rs @@ -685,7 +685,7 @@ impl<'a> Iterator for Chunks<'a> { // advance the transform and buffer cursors to the end of the fold. if let Some(display_text) = transform.display_text { self.buffer_offset += transform.summary.buffer.bytes; - self.buffer_chunks.advance_to(self.buffer_offset); + self.buffer_chunks.seek(self.buffer_offset); while self.buffer_offset >= self.transform_cursor.end().buffer.bytes && self.transform_cursor.item().is_some() diff --git a/zed/src/language.rs b/zed/src/language.rs index 666e55c892c2844aa4d9100181b189df117bee77..04bc42a522556751f3e3d86449e61337da1f5e3a 100644 --- a/zed/src/language.rs +++ b/zed/src/language.rs @@ -9,9 +9,9 @@ pub use tree_sitter::{Parser, Tree}; pub struct LanguageDir; pub struct Language { - name: String, - grammar: Grammar, - highlight_query: Query, + pub name: String, + pub grammar: Grammar, + pub highlight_query: Query, path_suffixes: Vec, }