From 9b2bc458e393066c21b2256b2f38ec31b1a8905a Mon Sep 17 00:00:00 2001 From: Michael Sloan Date: Fri, 20 Dec 2024 15:42:18 -0700 Subject: [PATCH] Expand diagnostic excerpts using heuristics on syntactic information from TreeSitter (#21942) This is quite experimental and untested in languages other than Rust. It's written to attempt to do something sensible in many languages. Due to its experimental nature, just releasing to staff, and so not including it in release notes. Future release note might be "Improved diagnostic excerpts by using syntactic info to determine the context lines to show." Release Notes: - N/A --- Cargo.lock | 2 + crates/diagnostics/Cargo.toml | 1 + crates/diagnostics/src/diagnostics.rs | 201 ++++++++++++++++++++++-- crates/editor/src/editor.rs | 16 +- crates/editor/src/hover_popover.rs | 2 +- crates/language/src/buffer.rs | 59 ++++++- crates/language/src/buffer_tests.rs | 28 +++- crates/language/src/language.rs | 2 +- crates/language/src/syntax_map.rs | 2 +- crates/multi_buffer/Cargo.toml | 1 + crates/multi_buffer/src/multi_buffer.rs | 13 +- 11 files changed, 284 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 03fa792d90f202dd504d77a03309f4703cc9c686..7bc9f96f956594855400bb52d7ccab600a68951c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3682,6 +3682,7 @@ dependencies = [ "ctor", "editor", "env_logger 0.11.5", + "feature_flags", "gpui", "language", "log", @@ -7697,6 +7698,7 @@ dependencies = [ "sum_tree", "text", "theme", + "tree-sitter", "util", ] diff --git a/crates/diagnostics/Cargo.toml b/crates/diagnostics/Cargo.toml index b851a2733f8805a4dff9208959f1b57c34f9f133..f15792cdbe4dbd1ddf0cdeee750590e5b606128e 100644 --- a/crates/diagnostics/Cargo.toml +++ b/crates/diagnostics/Cargo.toml @@ -18,6 +18,7 @@ collections.workspace = true ctor.workspace = true editor.workspace = true env_logger.workspace = true +feature_flags.workspace = true gpui.workspace = true language.workspace = true log.workspace = true diff --git a/crates/diagnostics/src/diagnostics.rs b/crates/diagnostics/src/diagnostics.rs index 13cb113acbcbc65ef7063f887bf7997aa918174c..d22bfa6ee1bbf3a69fb8ad44aca61a859f3beb79 100644 --- a/crates/diagnostics/src/diagnostics.rs +++ b/crates/diagnostics/src/diagnostics.rs @@ -14,6 +14,7 @@ use editor::{ scroll::Autoscroll, Editor, EditorEvent, ExcerptId, ExcerptRange, MultiBuffer, ToOffset, }; +use feature_flags::FeatureFlagAppExt; use gpui::{ actions, div, svg, AnyElement, AnyView, AppContext, Context, EventEmitter, FocusHandle, FocusableView, Global, HighlightStyle, InteractiveElement, IntoElement, Model, ParentElement, @@ -21,7 +22,8 @@ use gpui::{ WeakView, WindowContext, }; use language::{ - Bias, Buffer, Diagnostic, DiagnosticEntry, DiagnosticSeverity, Point, Selection, SelectionGoal, + Bias, Buffer, BufferRow, BufferSnapshot, Diagnostic, DiagnosticEntry, DiagnosticSeverity, + Point, Selection, SelectionGoal, ToTreeSitterPoint, }; use lsp::LanguageServerId; use project::{DiagnosticSummary, Project, ProjectPath}; @@ -29,9 +31,10 @@ use project_diagnostics_settings::ProjectDiagnosticsSettings; use settings::Settings; use std::{ any::{Any, TypeId}, + cmp, cmp::Ordering, mem, - ops::Range, + ops::{Range, RangeInclusive}, sync::Arc, time::Duration, }; @@ -422,31 +425,28 @@ impl ProjectDiagnosticsEditor { blocks: Default::default(), block_count: 0, }; - let mut pending_range: Option<(Range, usize)> = None; + let mut pending_range: Option<(Range, Range, usize)> = None; let mut is_first_excerpt_for_group = true; for (ix, entry) in group.entries.iter().map(Some).chain([None]).enumerate() { let resolved_entry = entry.map(|e| e.resolve::(&snapshot)); - if let Some((range, start_ix)) = &mut pending_range { - if let Some(entry) = resolved_entry.as_ref() { - if entry.range.start.row <= range.end.row + 1 + self.context * 2 { - range.end = range.end.max(entry.range.end); + let expanded_range = resolved_entry.as_ref().map(|entry| { + context_range_for_entry(entry, self.context, &snapshot, cx) + }); + if let Some((range, context_range, start_ix)) = &mut pending_range { + if let Some(expanded_range) = expanded_range.clone() { + // If the entries are overlapping or next to each-other, merge them into one excerpt. + if context_range.end.row + 1 >= expanded_range.start.row { + context_range.end = context_range.end.max(expanded_range.end); continue; } } - let excerpt_start = - Point::new(range.start.row.saturating_sub(self.context), 0); - let excerpt_end = snapshot.clip_point( - Point::new(range.end.row + self.context, u32::MAX), - Bias::Left, - ); - let excerpt_id = excerpts .insert_excerpts_after( prev_excerpt_id, buffer.clone(), [ExcerptRange { - context: excerpt_start..excerpt_end, + context: context_range.clone(), primary: Some(range.clone()), }], cx, @@ -503,8 +503,9 @@ impl ProjectDiagnosticsEditor { pending_range.take(); } - if let Some(entry) = resolved_entry { - pending_range = Some((entry.range.clone(), ix)); + if let Some(entry) = resolved_entry.as_ref() { + let range = entry.range.clone(); + pending_range = Some((range, expanded_range.unwrap(), ix)); } } @@ -923,3 +924,169 @@ fn compare_diagnostics( }) .then_with(|| old.diagnostic.message.cmp(&new.diagnostic.message)) } + +const DIAGNOSTIC_EXPANSION_ROW_LIMIT: u32 = 32; + +fn context_range_for_entry( + entry: &DiagnosticEntry, + context: u32, + snapshot: &BufferSnapshot, + cx: &AppContext, +) -> Range { + if cx.is_staff() { + if let Some(rows) = heuristic_syntactic_expand( + entry.range.clone(), + DIAGNOSTIC_EXPANSION_ROW_LIMIT, + snapshot, + cx, + ) { + return Range { + start: Point::new(*rows.start(), 0), + end: snapshot.clip_point(Point::new(*rows.end(), u32::MAX), Bias::Left), + }; + } + } + Range { + start: Point::new(entry.range.start.row.saturating_sub(context), 0), + end: snapshot.clip_point( + Point::new(entry.range.end.row + context, u32::MAX), + Bias::Left, + ), + } +} + +/// Expands the input range using syntax information from TreeSitter. This expansion will be limited +/// to the specified `max_row_count`. +/// +/// If there is a containing outline item that is less than `max_row_count`, it will be returned. +/// Otherwise fairly arbitrary heuristics are applied to attempt to return a logical block of code. +fn heuristic_syntactic_expand<'a>( + input_range: Range, + max_row_count: u32, + snapshot: &'a BufferSnapshot, + cx: &'a AppContext, +) -> Option> { + let input_row_count = input_range.end.row - input_range.start.row; + if input_row_count > max_row_count { + return None; + } + + // If the outline node contains the diagnostic and is small enough, just use that. + let outline_range = snapshot.outline_range_containing(input_range.clone()); + if let Some(outline_range) = outline_range.clone() { + // Remove blank lines from start and end + if let Some(start_row) = (outline_range.start.row..outline_range.end.row) + .find(|row| !snapshot.line_indent_for_row(*row).is_line_blank()) + { + if let Some(end_row) = (outline_range.start.row..outline_range.end.row + 1) + .rev() + .find(|row| !snapshot.line_indent_for_row(*row).is_line_blank()) + { + let row_count = end_row.saturating_sub(start_row); + if row_count <= max_row_count { + return Some(RangeInclusive::new( + outline_range.start.row, + outline_range.end.row, + )); + } + } + } + } + + let mut node = snapshot.syntax_ancestor(input_range.clone())?; + loop { + let node_start = Point::from_ts_point(node.start_position()); + let node_end = Point::from_ts_point(node.end_position()); + let node_range = node_start..node_end; + let row_count = node_end.row - node_start.row + 1; + + // Stop if we've exceeded the row count or reached an outline node. Then, find the interval + // of node children which contains the query range. For example, this allows just returning + // the header of a declaration rather than the entire declaration. + if row_count > max_row_count || outline_range == Some(node_range.clone()) { + let mut cursor = node.walk(); + let mut included_child_start = None; + let mut included_child_end = None; + let mut previous_end = node_start; + if cursor.goto_first_child() { + loop { + let child_node = cursor.node(); + let child_range = previous_end..Point::from_ts_point(child_node.end_position()); + if included_child_start.is_none() && child_range.contains(&input_range.start) { + included_child_start = Some(child_range.start); + } + if child_range.contains(&input_range.end) { + included_child_end = Some(child_range.end); + } + previous_end = child_range.end; + if !cursor.goto_next_sibling() { + break; + } + } + } + let end = included_child_end.unwrap_or(node_range.end); + if let Some(start) = included_child_start { + let row_count = end.row - start.row; + if row_count < max_row_count { + return Some(RangeInclusive::new(start.row, end.row)); + } + } + + log::info!( + "Expanding to ancestor started on {} node exceeding row limit of {max_row_count}.", + node.grammar_name() + ); + return None; + } + + let node_name = node.grammar_name(); + let node_row_range = RangeInclusive::new(node_range.start.row, node_range.end.row); + if node_name.ends_with("block") { + return Some(node_row_range); + } else if node_name.ends_with("statement") || node_name.ends_with("declaration") { + // Expand to the nearest dedent or blank line for statements and declarations. + let tab_size = snapshot.settings_at(node_range.start, cx).tab_size.get(); + let indent_level = snapshot + .line_indent_for_row(node_range.start.row) + .len(tab_size); + let rows_remaining = max_row_count.saturating_sub(row_count); + let Some(start_row) = (node_range.start.row.saturating_sub(rows_remaining) + ..node_range.start.row) + .rev() + .find(|row| is_line_blank_or_indented_less(indent_level, *row, tab_size, snapshot)) + else { + return Some(node_row_range); + }; + let rows_remaining = max_row_count.saturating_sub(node_range.end.row - start_row); + let Some(end_row) = (node_range.end.row + 1 + ..cmp::min( + node_range.end.row + rows_remaining + 1, + snapshot.row_count(), + )) + .find(|row| is_line_blank_or_indented_less(indent_level, *row, tab_size, snapshot)) + else { + return Some(node_row_range); + }; + return Some(RangeInclusive::new(start_row, end_row)); + } + + // TODO: doing this instead of walking a cursor as that doesn't work - why? + let Some(parent) = node.parent() else { + log::info!( + "Expanding to ancestor reached the top node, so using default context line count.", + ); + return None; + }; + node = parent; + } +} + +fn is_line_blank_or_indented_less( + indent_level: u32, + row: u32, + tab_size: u32, + snapshot: &BufferSnapshot, +) -> bool { + let line_indent = snapshot.line_indent_for_row(row); + line_indent.is_line_blank() || line_indent.len(tab_size) < indent_level +} diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index 36959e055ce83b978c069926c54822fdfccef2c4..20040bd58c6b29e5d73cab6a39c97913b78980d1 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -8774,9 +8774,10 @@ impl Editor { .map(|selection| { let old_range = selection.start..selection.end; let mut new_range = old_range.clone(); - while let Some(containing_range) = - buffer.range_for_syntax_ancestor(new_range.clone()) + let mut new_node = None; + while let Some((node, containing_range)) = buffer.syntax_ancestor(new_range.clone()) { + new_node = Some(node); new_range = containing_range; if !display_map.intersects_fold(new_range.start) && !display_map.intersects_fold(new_range.end) @@ -8785,6 +8786,17 @@ impl Editor { } } + if let Some(node) = new_node { + // Log the ancestor, to support using this action as a way to explore TreeSitter + // nodes. Parent and grandparent are also logged because this operation will not + // visit nodes that have the same range as their parent. + log::info!("Node: {node:?}"); + let parent = node.parent(); + log::info!("Parent: {parent:?}"); + let grandparent = parent.and_then(|x| x.parent()); + log::info!("Grandparent: {grandparent:?}"); + } + selected_larger_node |= new_range != old_range; Selection { id: selection.id, diff --git a/crates/editor/src/hover_popover.rs b/crates/editor/src/hover_popover.rs index 23dce30ca63e5bc4c298af603b899db4e905a9d5..fe3668b2fe2231d694fc2585f92a4764afdfc064 100644 --- a/crates/editor/src/hover_popover.rs +++ b/crates/editor/src/hover_popover.rs @@ -429,7 +429,7 @@ fn show_hover( }) .or_else(|| { let snapshot = &snapshot.buffer_snapshot; - let offset_range = snapshot.range_for_syntax_ancestor(anchor..anchor)?; + let offset_range = snapshot.syntax_ancestor(anchor..anchor)?.1; Some( snapshot.anchor_before(offset_range.start) ..snapshot.anchor_after(offset_range.end), diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 925e648d9f9aa0f703c762497fb905fd35d769f5..a55396e1e3c890a7a56af42db9a701c71160b4d2 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -68,7 +68,7 @@ pub use text::{ use theme::SyntaxTheme; #[cfg(any(test, feature = "test-support"))] use util::RandomCharIter; -use util::{debug_panic, RangeExt}; +use util::{debug_panic, maybe, RangeExt}; #[cfg(any(test, feature = "test-support"))] pub use {tree_sitter_rust, tree_sitter_typescript}; @@ -2923,10 +2923,13 @@ impl BufferSnapshot { (start..end, word_kind) } - /// Returns the range for the closes syntax node enclosing the given range. - pub fn range_for_syntax_ancestor(&self, range: Range) -> Option> { + /// Returns the closest syntax node enclosing the given range. + pub fn syntax_ancestor<'a, T: ToOffset>( + &'a self, + range: Range, + ) -> Option> { let range = range.start.to_offset(self)..range.end.to_offset(self); - let mut result: Option> = None; + let mut result: Option> = None; 'outer: for layer in self .syntax .layers_for_range(range.clone(), &self.text, true) @@ -2956,7 +2959,7 @@ impl BufferSnapshot { } let left_node = cursor.node(); - let mut layer_result = left_node.byte_range(); + let mut layer_result = left_node; // For an empty range, try to find another node immediately to the right of the range. if left_node.end_byte() == range.start { @@ -2979,13 +2982,13 @@ impl BufferSnapshot { // If both nodes are the same in that regard, favor the right one. if let Some(right_node) = right_node { if right_node.is_named() || !left_node.is_named() { - layer_result = right_node.byte_range(); + layer_result = right_node; } } } if let Some(previous_result) = &result { - if previous_result.len() < layer_result.len() { + if previous_result.byte_range().len() < layer_result.byte_range().len() { continue; } } @@ -3028,6 +3031,48 @@ impl BufferSnapshot { Some(items) } + pub fn outline_range_containing(&self, range: Range) -> Option> { + let range = range.to_offset(self); + let mut matches = self.syntax.matches(range.clone(), &self.text, |grammar| { + grammar.outline_config.as_ref().map(|c| &c.query) + }); + let configs = matches + .grammars() + .iter() + .map(|g| g.outline_config.as_ref().unwrap()) + .collect::>(); + + while let Some(mat) = matches.peek() { + let config = &configs[mat.grammar_index]; + let containing_item_node = maybe!({ + let item_node = mat.captures.iter().find_map(|cap| { + if cap.index == config.item_capture_ix { + Some(cap.node) + } else { + None + } + })?; + + let item_byte_range = item_node.byte_range(); + if item_byte_range.end < range.start || item_byte_range.start > range.end { + None + } else { + Some(item_node) + } + }); + + if let Some(item_node) = containing_item_node { + return Some( + Point::from_ts_point(item_node.start_position()) + ..Point::from_ts_point(item_node.end_position()), + ); + } + + matches.advance(); + } + None + } + pub fn outline_items_containing( &self, range: Range, diff --git a/crates/language/src/buffer_tests.rs b/crates/language/src/buffer_tests.rs index a1d1a57f13edbd708ccc713458042e52c6d08e08..4eab416965ef8b956aa5fbf7c108e6a68b91d48d 100644 --- a/crates/language/src/buffer_tests.rs +++ b/crates/language/src/buffer_tests.rs @@ -1104,20 +1104,32 @@ fn test_range_for_syntax_ancestor(cx: &mut AppContext) { let snapshot = buffer.snapshot(); assert_eq!( - snapshot.range_for_syntax_ancestor(empty_range_at(text, "|")), - Some(range_of(text, "|")) + snapshot + .syntax_ancestor(empty_range_at(text, "|")) + .unwrap() + .byte_range(), + range_of(text, "|") ); assert_eq!( - snapshot.range_for_syntax_ancestor(range_of(text, "|")), - Some(range_of(text, "|c|")) + snapshot + .syntax_ancestor(range_of(text, "|")) + .unwrap() + .byte_range(), + range_of(text, "|c|") ); assert_eq!( - snapshot.range_for_syntax_ancestor(range_of(text, "|c|")), - Some(range_of(text, "|c| {}")) + snapshot + .syntax_ancestor(range_of(text, "|c|")) + .unwrap() + .byte_range(), + range_of(text, "|c| {}") ); assert_eq!( - snapshot.range_for_syntax_ancestor(range_of(text, "|c| {}")), - Some(range_of(text, "(|c| {})")) + snapshot + .syntax_ancestor(range_of(text, "|c| {}")) + .unwrap() + .byte_range(), + range_of(text, "(|c| {})") ); buffer diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 9832c50482a657f9872961855a0c7c7b91cea503..9e006f72adcf3419b7b9cbaf2c312cd4a6222c2f 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -78,7 +78,7 @@ pub use language_registry::{ }; pub use lsp::LanguageServerId; pub use outline::*; -pub use syntax_map::{OwnedSyntaxLayer, SyntaxLayer, TreeSitterOptions}; +pub use syntax_map::{OwnedSyntaxLayer, SyntaxLayer, ToTreeSitterPoint, TreeSitterOptions}; pub use text::{AnchorRangeExt, LineEnding}; pub use tree_sitter::{Node, Parser, Tree, TreeCursor}; diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 76c6dc75e3093ab91c25b58f69ebfccf359d8e89..f51eeb96889b2e62756ebf8f1c731f633521c891 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -1845,7 +1845,7 @@ impl Drop for QueryCursorHandle { } } -pub(crate) trait ToTreeSitterPoint { +pub trait ToTreeSitterPoint { fn to_ts_point(self) -> tree_sitter::Point; fn from_ts_point(point: tree_sitter::Point) -> Self; } diff --git a/crates/multi_buffer/Cargo.toml b/crates/multi_buffer/Cargo.toml index 444fe3c75c6cfb5c5b8421cce61fa88defccaecc..42cacc34b14e4e427606cf8e158e40d93ddf9d08 100644 --- a/crates/multi_buffer/Cargo.toml +++ b/crates/multi_buffer/Cargo.toml @@ -39,6 +39,7 @@ smallvec.workspace = true sum_tree.workspace = true text.workspace = true theme.workspace = true +tree-sitter.workspace = true util.workspace = true [dev-dependencies] diff --git a/crates/multi_buffer/src/multi_buffer.rs b/crates/multi_buffer/src/multi_buffer.rs index 1253e82ebc56a4ee1360a43fc5571a2860e7a664..2330d12ee4d1c4b08914fa8f3507bd2debd6a958 100644 --- a/crates/multi_buffer/src/multi_buffer.rs +++ b/crates/multi_buffer/src/multi_buffer.rs @@ -3920,15 +3920,16 @@ impl MultiBufferSnapshot { }) } - pub fn range_for_syntax_ancestor(&self, range: Range) -> Option> { + pub fn syntax_ancestor( + &self, + range: Range, + ) -> Option<(tree_sitter::Node, Range)> { let range = range.start.to_offset(self)..range.end.to_offset(self); let excerpt = self.excerpt_containing(range.clone())?; - - let ancestor_buffer_range = excerpt + let node = excerpt .buffer() - .range_for_syntax_ancestor(excerpt.map_range_to_buffer(range))?; - - Some(excerpt.map_range_from_buffer(ancestor_buffer_range)) + .syntax_ancestor(excerpt.map_range_to_buffer(range))?; + Some((node, excerpt.map_range_from_buffer(node.byte_range()))) } pub fn outline(&self, theme: Option<&SyntaxTheme>) -> Option> {