From 69af5261ea046bd533d342fc7456daaf0dcb1c1b Mon Sep 17 00:00:00 2001 From: Michael Sloan Date: Wed, 17 Sep 2025 14:59:58 -0600 Subject: [PATCH] Renames + fixes Co-authored-by: Agus --- .../src/declaration.rs | 205 +++++++++++++++++ ..._declaration.rs => declaration_scoring.rs} | 37 ++- .../src/edit_prediction_context.rs | 8 +- crates/edit_prediction_context/src/outline.rs | 12 +- .../edit_prediction_context/src/reference.rs | 2 +- .../{tree_sitter_index.rs => syntax_index.rs} | 212 +----------------- 6 files changed, 242 insertions(+), 234 deletions(-) create mode 100644 crates/edit_prediction_context/src/declaration.rs rename crates/edit_prediction_context/src/{scored_declaration.rs => declaration_scoring.rs} (94%) rename crates/edit_prediction_context/src/{tree_sitter_index.rs => syntax_index.rs} (79%) diff --git a/crates/edit_prediction_context/src/declaration.rs b/crates/edit_prediction_context/src/declaration.rs new file mode 100644 index 0000000000000000000000000000000000000000..9eee5a8273810441a578cf48843cf6c9be9a70f6 --- /dev/null +++ b/crates/edit_prediction_context/src/declaration.rs @@ -0,0 +1,205 @@ +use gpui::{App, WeakEntity}; +use language::{Buffer, BufferSnapshot, LanguageId}; +use project::ProjectEntryId; +use std::borrow::Cow; +use std::ops::{Deref, Range}; +use std::sync::Arc; +use text::{Anchor, Bias, OffsetRangeExt, ToOffset}; + +use crate::outline::OutlineDeclaration; + +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct Identifier { + pub name: Arc, + pub language_id: LanguageId, +} + +slotmap::new_key_type! { + pub struct DeclarationId; +} + +#[derive(Debug, Clone)] +pub enum Declaration { + File { + project_entry_id: ProjectEntryId, + declaration: FileDeclaration, + }, + Buffer { + buffer: WeakEntity, + declaration: BufferDeclaration, + }, +} + +const ITEM_TEXT_TRUNCATION_LENGTH: usize = 1024; + +impl Declaration { + pub fn identifier(&self) -> &Identifier { + match self { + Declaration::File { declaration, .. } => &declaration.identifier, + Declaration::Buffer { declaration, .. } => &declaration.identifier, + } + } + + pub fn project_entry_id(&self, cx: &App) -> Option { + match self { + Declaration::File { + project_entry_id, .. + } => Some(*project_entry_id), + Declaration::Buffer { buffer, .. } => buffer + .read_with(cx, |buffer, _cx| { + project::File::from_dyn(buffer.file()) + .and_then(|file| file.project_entry_id(cx)) + }) + .ok() + .flatten(), + } + } + + pub fn item_text(&self, cx: &App) -> (Cow<'_, str>, bool) { + match self { + Declaration::File { declaration, .. } => ( + declaration.text.as_ref().into(), + declaration.text_is_truncated, + ), + Declaration::Buffer { + buffer, + declaration, + } => buffer + .read_with(cx, |buffer, _cx| { + let (range, is_truncated) = expand_range_to_line_boundaries_and_truncate( + &declaration.item_range, + ITEM_TEXT_TRUNCATION_LENGTH, + buffer.deref(), + ); + ( + buffer.text_for_range(range).collect::>(), + is_truncated, + ) + }) + .unwrap_or_default(), + } + } + + pub fn signature_text(&self, cx: &App) -> (Cow<'_, str>, bool) { + match self { + Declaration::File { declaration, .. } => ( + declaration.text[declaration.signature_range_in_text.clone()].into(), + declaration.signature_is_truncated, + ), + Declaration::Buffer { + buffer, + declaration, + } => buffer + .read_with(cx, |buffer, _cx| { + let (range, is_truncated) = expand_range_to_line_boundaries_and_truncate( + &declaration.signature_range, + ITEM_TEXT_TRUNCATION_LENGTH, + buffer.deref(), + ); + ( + buffer.text_for_range(range).collect::>(), + is_truncated, + ) + }) + .unwrap_or_default(), + } + } +} + +fn expand_range_to_line_boundaries_and_truncate( + range: &Range, + limit: usize, + buffer: &text::BufferSnapshot, +) -> (Range, bool) { + let mut point_range = range.to_point(buffer); + point_range.start.column = 0; + point_range.end.row += 1; + point_range.end.column = 0; + + let mut item_range = point_range.to_offset(buffer); + let is_truncated = item_range.len() > limit; + if is_truncated { + item_range.end = item_range.start + limit; + } + item_range.end = buffer.clip_offset(item_range.end, Bias::Left); + (item_range, is_truncated) +} + +#[derive(Debug, Clone)] +pub struct FileDeclaration { + pub parent: Option, + pub identifier: Identifier, + /// offset range of the declaration in the file, expanded to line boundaries and truncated + pub item_range_in_file: Range, + /// text of `item_range_in_file` + pub text: Arc, + /// whether `text` was truncated + pub text_is_truncated: bool, + /// offset range of the signature within `text` + pub signature_range_in_text: Range, + /// whether `signature` was truncated + pub signature_is_truncated: bool, +} + +impl FileDeclaration { + pub fn from_outline( + declaration: OutlineDeclaration, + snapshot: &BufferSnapshot, + ) -> FileDeclaration { + let (item_range_in_file, text_is_truncated) = expand_range_to_line_boundaries_and_truncate( + &declaration.item_range, + ITEM_TEXT_TRUNCATION_LENGTH, + snapshot, + ); + + // TODO: consider logging if unexpected + let signature_start = declaration + .signature_range + .start + .saturating_sub(item_range_in_file.start); + let mut signature_end = declaration + .signature_range + .end + .saturating_sub(item_range_in_file.start); + let signature_is_truncated = signature_end > item_range_in_file.len(); + if signature_is_truncated { + signature_end = item_range_in_file.len(); + } + + FileDeclaration { + parent: None, + identifier: declaration.identifier, + signature_range_in_text: signature_start..signature_end, + signature_is_truncated, + text: snapshot + .text_for_range(item_range_in_file.clone()) + .collect::() + .into(), + text_is_truncated, + item_range_in_file, + } + } +} + +#[derive(Debug, Clone)] +pub struct BufferDeclaration { + pub parent: Option, + pub identifier: Identifier, + pub item_range: Range, + pub signature_range: Range, +} + +impl BufferDeclaration { + pub fn from_outline(declaration: OutlineDeclaration, snapshot: &BufferSnapshot) -> Self { + // use of anchor_before is a guess that the proper behavior is to expand to include + // insertions immediately before the declaration, but not for insertions immediately after + Self { + parent: None, + identifier: declaration.identifier, + item_range: snapshot.anchor_before(declaration.item_range.start) + ..snapshot.anchor_before(declaration.item_range.end), + signature_range: snapshot.anchor_before(declaration.signature_range.start) + ..snapshot.anchor_before(declaration.signature_range.end), + } + } +} diff --git a/crates/edit_prediction_context/src/scored_declaration.rs b/crates/edit_prediction_context/src/declaration_scoring.rs similarity index 94% rename from crates/edit_prediction_context/src/scored_declaration.rs rename to crates/edit_prediction_context/src/declaration_scoring.rs index f6a6f0c1d0a3e3bafcb7106cbbbdfadcb90790c3..df6f0f967580d0a3c819eaf2c134eb087a30e7e0 100644 --- a/crates/edit_prediction_context/src/scored_declaration.rs +++ b/crates/edit_prediction_context/src/declaration_scoring.rs @@ -1,16 +1,13 @@ -use collections::HashSet; use gpui::{App, Entity}; use itertools::Itertools as _; use language::BufferSnapshot; -use project::ProjectEntryId; use serde::Serialize; use std::{collections::HashMap, ops::Range}; use strum::EnumIter; use text::{OffsetRangeExt, Point, ToPoint}; use crate::{ - Declaration, EditPredictionExcerpt, EditPredictionExcerptText, TreeSitterIndex, - outline::Identifier, + Declaration, EditPredictionExcerpt, EditPredictionExcerptText, Identifier, SyntaxIndex, reference::{Reference, ReferenceRegion}, text_similarity::{IdentifierOccurrences, jaccard_similarity, weighted_overlap_coefficient}, }; @@ -54,7 +51,7 @@ impl ScoredSnippet { } fn scored_snippets( - index: Entity, + index: Entity, excerpt: &EditPredictionExcerpt, excerpt_text: &EditPredictionExcerptText, identifier_to_references: HashMap>, @@ -66,10 +63,6 @@ fn scored_snippets( IdentifierOccurrences::within_string(&excerpt_text.body); let cursor_point = cursor_offset.to_point(¤t_buffer); - // todo! ask michael why we needed this - // if let Some(cursor_within_excerpt) = cursor_offset.checked_sub(excerpt.range.start) { - // } else { - // }; let start_point = Point::new(cursor_point.row.saturating_sub(2), 0); let end_point = Point::new(cursor_point.row + 1, 0); let adjacent_identifier_occurrences = IdentifierOccurrences::within_string( @@ -91,7 +84,7 @@ fn scored_snippets( .iter() .filter_map(|declaration| match declaration { Declaration::Buffer { - declaration, + declaration: buffer_declaration, buffer, } => { let is_same_file = buffer @@ -100,13 +93,16 @@ fn scored_snippets( if is_same_file { range_intersection( - &declaration.item_range.to_offset(¤t_buffer), + &buffer_declaration.item_range.to_offset(¤t_buffer), &excerpt.range, ) .is_none() .then(|| { - let declaration_line = - declaration.item_range.start.to_point(current_buffer).row; + let declaration_line = buffer_declaration + .item_range + .start + .to_point(current_buffer) + .row; ( true, (cursor_point.row as i32 - declaration_line as i32).abs() @@ -120,7 +116,7 @@ fn scored_snippets( } Declaration::File { .. } => { // We can assume that a file declaration is in a different file, - // because the current onemust be open + // because the current one must be open Some((false, 0, declaration)) } }) @@ -199,9 +195,10 @@ fn score_snippet( .min() .unwrap(); - let item_source_occurrences = IdentifierOccurrences::within_string(&declaration.item_text(cx)); + let item_source_occurrences = + IdentifierOccurrences::within_string(&declaration.item_text(cx).0); let item_signature_occurrences = - IdentifierOccurrences::within_string(&declaration.signature_text(cx)); + IdentifierOccurrences::within_string(&declaration.signature_text(cx).0); let containing_range_vs_item_jaccard = jaccard_similarity( containing_range_identifier_occurrences, &item_source_occurrences, @@ -327,9 +324,7 @@ mod tests { use text::ToOffset; use util::path; - use crate::{ - EditPredictionExcerptOptions, references_in_excerpt, tree_sitter_index::TreeSitterIndex, - }; + use crate::{EditPredictionExcerptOptions, references_in_excerpt}; #[gpui::test] async fn test_call_site(cx: &mut TestAppContext) { @@ -382,7 +377,7 @@ mod tests { async fn init_test( cx: &mut TestAppContext, - ) -> (Entity, Entity, LanguageId) { + ) -> (Entity, Entity, LanguageId) { cx.update(|cx| { let settings_store = SettingsStore::test(cx); cx.set_global(settings_store); @@ -460,7 +455,7 @@ mod tests { let lang_id = lang.id(); language_registry.add(Arc::new(lang)); - let index = cx.new(|cx| TreeSitterIndex::new(&project, cx)); + let index = cx.new(|cx| SyntaxIndex::new(&project, cx)); cx.run_until_parked(); (project, index, lang_id) diff --git a/crates/edit_prediction_context/src/edit_prediction_context.rs b/crates/edit_prediction_context/src/edit_prediction_context.rs index 03102cc62b01888725dfdc73a40897c15cecb46b..ef14896c27d6acbe8f06364ae566ea1b8ef588cc 100644 --- a/crates/edit_prediction_context/src/edit_prediction_context.rs +++ b/crates/edit_prediction_context/src/edit_prediction_context.rs @@ -1,10 +1,12 @@ +mod declaration; +mod declaration_scoring; mod excerpt; mod outline; mod reference; -mod scored_declaration; +mod syntax_index; mod text_similarity; -mod tree_sitter_index; +pub use declaration::{BufferDeclaration, Declaration, FileDeclaration, Identifier}; pub use excerpt::{EditPredictionExcerpt, EditPredictionExcerptOptions, EditPredictionExcerptText}; pub use reference::references_in_excerpt; -pub use tree_sitter_index::{BufferDeclaration, Declaration, FileDeclaration, TreeSitterIndex}; +pub use syntax_index::SyntaxIndex; diff --git a/crates/edit_prediction_context/src/outline.rs b/crates/edit_prediction_context/src/outline.rs index 492352add1fd4c666eab3b12989f9b801d03570f..ec02c869dfae4cb861206cb801c285462e734f36 100644 --- a/crates/edit_prediction_context/src/outline.rs +++ b/crates/edit_prediction_context/src/outline.rs @@ -1,5 +1,7 @@ -use language::{BufferSnapshot, LanguageId, SyntaxMapMatches}; -use std::{cmp::Reverse, ops::Range, sync::Arc}; +use language::{BufferSnapshot, SyntaxMapMatches}; +use std::{cmp::Reverse, ops::Range}; + +use crate::declaration::Identifier; // TODO: // @@ -18,12 +20,6 @@ pub struct OutlineDeclaration { pub signature_range: Range, } -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub struct Identifier { - pub name: Arc, - pub language_id: LanguageId, -} - pub fn declarations_in_buffer(buffer: &BufferSnapshot) -> Vec { declarations_overlapping_range(0..buffer.len(), buffer) } diff --git a/crates/edit_prediction_context/src/reference.rs b/crates/edit_prediction_context/src/reference.rs index 65d34e73bf20f62b24ac2a654af43fc3b83041a9..ee2fc7ba573c3909b5a650e3ca0ff20155272b9f 100644 --- a/crates/edit_prediction_context/src/reference.rs +++ b/crates/edit_prediction_context/src/reference.rs @@ -3,8 +3,8 @@ use std::collections::HashMap; use std::ops::Range; use crate::{ + declaration::Identifier, excerpt::{EditPredictionExcerpt, EditPredictionExcerptText}, - outline::Identifier, }; #[derive(Debug)] diff --git a/crates/edit_prediction_context/src/tree_sitter_index.rs b/crates/edit_prediction_context/src/syntax_index.rs similarity index 79% rename from crates/edit_prediction_context/src/tree_sitter_index.rs rename to crates/edit_prediction_context/src/syntax_index.rs index 76363c1384069b1508b94aab291ed55ea3fcb1f8..10059a18f9785e2e0574844f70a3bba37723cd2f 100644 --- a/crates/edit_prediction_context/src/tree_sitter_index.rs +++ b/crates/edit_prediction_context/src/syntax_index.rs @@ -1,17 +1,16 @@ use collections::{HashMap, HashSet}; use gpui::{App, AppContext as _, Context, Entity, Task, WeakEntity}; -use language::{Buffer, BufferEvent, BufferSnapshot}; +use language::{Buffer, BufferEvent}; use project::buffer_store::{BufferStore, BufferStoreEvent}; use project::worktree_store::{WorktreeStore, WorktreeStoreEvent}; use project::{PathChange, Project, ProjectEntryId, ProjectPath}; use slotmap::SlotMap; -use std::borrow::Cow; -use std::ops::{Deref, Range}; -use std::sync::Arc; -use text::{Anchor, Bias, OffsetRangeExt, ToOffset}; use util::{ResultExt as _, debug_panic, some_or_debug_panic}; -use crate::outline::{Identifier, OutlineDeclaration, declarations_in_buffer}; +use crate::declaration::{ + BufferDeclaration, Declaration, DeclarationId, FileDeclaration, Identifier, +}; +use crate::outline::declarations_in_buffer; // TODO: // @@ -36,13 +35,7 @@ use crate::outline::{Identifier, OutlineDeclaration, declarations_in_buffer}; // // * Use queue for parsing -const ITEM_TEXT_TRUNCATION_LENGTH: usize = 1024; - -slotmap::new_key_type! { - pub struct DeclarationId; -} - -pub struct TreeSitterIndex { +pub struct SyntaxIndex { declarations: SlotMap, identifiers: HashMap>, files: HashMap, @@ -62,136 +55,7 @@ struct BufferState { task: Option>, } -#[derive(Debug, Clone)] -pub enum Declaration { - File { - project_entry_id: ProjectEntryId, - declaration: FileDeclaration, - }, - Buffer { - buffer: WeakEntity, - declaration: BufferDeclaration, - }, -} - -impl Declaration { - fn identifier(&self) -> &Identifier { - match self { - Declaration::File { declaration, .. } => &declaration.identifier, - Declaration::Buffer { declaration, .. } => &declaration.identifier, - } - } - - pub fn project_entry_id(&self, cx: &App) -> Option { - match self { - Declaration::File { - project_entry_id, .. - } => Some(*project_entry_id), - Declaration::Buffer { buffer, .. } => buffer - .read_with(cx, |buffer, _cx| { - project::File::from_dyn(buffer.file()) - .and_then(|file| file.project_entry_id(cx)) - }) - .ok() - .flatten(), - } - } - - pub fn item_text(&self, cx: &App) -> (Cow<'_, str>, bool) { - match self { - Declaration::File { declaration, .. } => ( - declaration.text.as_ref().into(), - declaration.text_is_truncated, - ), - Declaration::Buffer { - buffer, - declaration, - } => buffer - .read_with(cx, |buffer, _cx| { - let (range, is_truncated) = expand_range_to_line_boundaries_and_truncate( - &declaration.item_range, - ITEM_TEXT_TRUNCATION_LENGTH, - buffer.deref(), - ); - ( - buffer.text_for_range(range).collect::>(), - is_truncated, - ) - }) - .unwrap_or_default(), - } - } - - pub fn signature_text(&self, cx: &App) -> (Cow<'_, str>, bool) { - match self { - Declaration::File { declaration, .. } => ( - declaration.text[declaration.signature_range_in_text.clone()].into(), - declaration.signature_is_truncated, - ), - Declaration::Buffer { - buffer, - declaration, - } => buffer - .read_with(cx, |buffer, _cx| { - let (range, is_truncated) = expand_range_to_line_boundaries_and_truncate( - &declaration.signature_range, - ITEM_TEXT_TRUNCATION_LENGTH, - buffer.deref(), - ); - ( - buffer.text_for_range(range).collect::>(), - is_truncated, - ) - }) - .unwrap_or_default(), - } - } -} - -fn expand_range_to_line_boundaries_and_truncate( - range: &Range, - limit: usize, - buffer: &text::BufferSnapshot, -) -> (Range, bool) { - let mut point_range = range.to_point(buffer); - point_range.start.column = 0; - point_range.end.row += 1; - point_range.end.column = 0; - - let mut item_range = point_range.to_offset(buffer); - let is_truncated = item_range.len() > limit; - if is_truncated { - item_range.end = item_range.start + limit; - } - item_range.end = buffer.clip_offset(item_range.end, Bias::Left); - (item_range, is_truncated) -} - -#[derive(Debug, Clone)] -pub struct FileDeclaration { - pub parent: Option, - pub identifier: Identifier, - /// offset range of the declaration in the file, expanded to line boundaries and truncated - pub item_range_in_file: Range, - /// text of `item_range_in_file` - pub text: Arc, - /// whether `text` was truncated - pub text_is_truncated: bool, - /// offset range of the signature within `text` - pub signature_range_in_text: Range, - /// whether `signature` was truncated - pub signature_is_truncated: bool, -} - -#[derive(Debug, Clone)] -pub struct BufferDeclaration { - pub parent: Option, - pub identifier: Identifier, - pub item_range: Range, - pub signature_range: Range, -} - -impl TreeSitterIndex { +impl SyntaxIndex { pub fn new(project: &Entity, cx: &mut Context) -> Self { let mut this = Self { declarations: SlotMap::with_key(), @@ -579,61 +443,6 @@ impl TreeSitterIndex { } } -impl BufferDeclaration { - pub fn from_outline(declaration: OutlineDeclaration, snapshot: &BufferSnapshot) -> Self { - // use of anchor_before is a guess that the proper behavior is to expand to include - // insertions immediately before the declaration, but not for insertions immediately after - Self { - parent: None, - identifier: declaration.identifier, - item_range: snapshot.anchor_before(declaration.item_range.start) - ..snapshot.anchor_before(declaration.item_range.end), - signature_range: snapshot.anchor_before(declaration.signature_range.start) - ..snapshot.anchor_before(declaration.signature_range.end), - } - } -} - -impl FileDeclaration { - pub fn from_outline( - declaration: OutlineDeclaration, - snapshot: &BufferSnapshot, - ) -> FileDeclaration { - let (item_range_in_file, text_is_truncated) = expand_range_to_line_boundaries_and_truncate( - &declaration.item_range, - ITEM_TEXT_TRUNCATION_LENGTH, - snapshot, - ); - - // TODO: consider logging if unexpected - let signature_start = declaration - .signature_range - .start - .saturating_sub(item_range_in_file.start); - let mut signature_end = declaration - .signature_range - .end - .saturating_sub(item_range_in_file.start); - let signature_is_truncated = signature_end > item_range_in_file.len(); - if signature_is_truncated { - signature_end = item_range_in_file.len(); - } - - FileDeclaration { - parent: None, - identifier: declaration.identifier, - signature_range_in_text: signature_start..signature_end, - signature_is_truncated, - text: snapshot - .text_for_range(item_range_in_file.clone()) - .collect::() - .into(), - text_is_truncated, - item_range_in_file, - } - } -} - #[cfg(test)] mod tests { use super::*; @@ -646,9 +455,10 @@ mod tests { use project::{FakeFs, Project, ProjectItem}; use serde_json::json; use settings::SettingsStore; + use text::OffsetRangeExt as _; use util::path; - use crate::tree_sitter_index::TreeSitterIndex; + use crate::syntax_index::SyntaxIndex; #[gpui::test] async fn test_unopen_indexed_files(cx: &mut TestAppContext) { @@ -863,7 +673,7 @@ mod tests { async fn init_test( cx: &mut TestAppContext, - ) -> (Entity, Entity, LanguageId) { + ) -> (Entity, Entity, LanguageId) { cx.update(|cx| { let settings_store = SettingsStore::test(cx); cx.set_global(settings_store); @@ -941,7 +751,7 @@ mod tests { let lang_id = lang.id(); language_registry.add(Arc::new(lang)); - let index = cx.new(|cx| TreeSitterIndex::new(&project, cx)); + let index = cx.new(|cx| SyntaxIndex::new(&project, cx)); cx.run_until_parked(); (project, index, lang_id)