Cargo.lock 🔗
@@ -8713,9 +8713,12 @@ dependencies = [
"sha2 0.10.7",
"smol",
"tempfile",
+ "theme",
"tree-sitter",
+ "ui",
"unindent",
"util",
+ "workspace",
"worktree",
]
Max Brunsfeld and Marshall created
This fixes a tricky intermittent issue I was seeing, where failed to
chunk certain files correctly because of the way we reuse Tree-sitter
`Parser` instances across parses.
I've also accounted for leading comments in chunk boundaries, so that
items are grouped with their leading comments whenever possible when
chunking.
Finally, we've changed the `debug project index` action so that it opens
a simple debug view in a pane, instead of printing paths to the console.
This lets you click into a path and see how it was chunked.
Release Notes:
- N/A
---------
Co-authored-by: Marshall <marshall@zed.dev>
Cargo.lock | 3
crates/assistant2/src/assistant2.rs | 35
crates/editor/src/editor.rs | 32
crates/language/src/buffer_tests.rs | 25
crates/language/src/language.rs | 37
crates/language/src/language_registry.rs | 7
crates/language/src/syntax_map.rs | 12
crates/semantic_index/Cargo.toml | 4
crates/semantic_index/src/chunking.rs | 171 ++++--
crates/semantic_index/src/project_index_debug_view.rs | 300 +++++++++++++
crates/semantic_index/src/semantic_index.rs | 79 ++-
11 files changed, 533 insertions(+), 172 deletions(-)
@@ -8713,9 +8713,12 @@ dependencies = [
"sha2 0.10.7",
"smol",
"tempfile",
+ "theme",
"tree-sitter",
+ "ui",
"unindent",
"util",
+ "workspace",
"worktree",
]
@@ -21,7 +21,7 @@ use gpui::{
use language::{language_settings::SoftWrap, LanguageRegistry};
use open_ai::{FunctionContent, ToolCall, ToolCallContent};
use rich_text::RichText;
-use semantic_index::{CloudEmbeddingProvider, ProjectIndex, SemanticIndex};
+use semantic_index::{CloudEmbeddingProvider, ProjectIndex, ProjectIndexDebugView, SemanticIndex};
use serde::Deserialize;
use settings::Settings;
use std::sync::Arc;
@@ -83,6 +83,14 @@ pub fn init(client: Arc<Client>, cx: &mut AppContext) {
workspace.register_action(|workspace, _: &ToggleFocus, cx| {
workspace.toggle_panel_focus::<AssistantPanel>(cx);
});
+ workspace.register_action(|workspace, _: &DebugProjectIndex, cx| {
+ if let Some(panel) = workspace.panel::<AssistantPanel>(cx) {
+ if let Some(index) = panel.read(cx).chat.read(cx).project_index.clone() {
+ let view = cx.new_view(|cx| ProjectIndexDebugView::new(index, cx));
+ workspace.add_item_to_center(Box::new(view), cx);
+ }
+ }
+ });
},
)
.detach();
@@ -107,8 +115,6 @@ impl AssistantPanel {
(workspace.app_state().clone(), workspace.project().clone())
})?;
- let user_store = app_state.user_store.clone();
-
cx.new_view(|cx| {
let project_index = cx.update_global(|semantic_index: &mut SemanticIndex, cx| {
semantic_index.project_index(project.clone(), cx)
@@ -117,7 +123,7 @@ impl AssistantPanel {
let mut tool_registry = ToolRegistry::new();
tool_registry
.register(
- ProjectIndexTool::new(project_index.clone(), app_state.fs.clone()),
+ ProjectIndexTool::new(project_index.clone(), project.read(cx).fs().clone()),
cx,
)
.context("failed to register ProjectIndexTool")
@@ -135,9 +141,9 @@ impl AssistantPanel {
Self::new(
app_state.languages.clone(),
- Arc::new(attachment_store),
Arc::new(tool_registry),
- user_store,
+ Arc::new(attachment_store),
+ app_state.user_store.clone(),
Some(project_index),
workspace,
cx,
@@ -148,8 +154,8 @@ impl AssistantPanel {
pub fn new(
language_registry: Arc<LanguageRegistry>,
- attachment_store: Arc<UserAttachmentStore>,
tool_registry: Arc<ToolRegistry>,
+ attachment_store: Arc<UserAttachmentStore>,
user_store: Model<UserStore>,
project_index: Option<Model<ProjectIndex>>,
workspace: WeakView<Workspace>,
@@ -157,9 +163,9 @@ impl AssistantPanel {
) -> Self {
let chat = cx.new_view(|cx| {
AssistantChat::new(
- language_registry.clone(),
- attachment_store.clone(),
+ language_registry,
tool_registry.clone(),
+ attachment_store,
user_store,
project_index,
workspace,
@@ -257,8 +263,8 @@ struct EditingMessage {
impl AssistantChat {
fn new(
language_registry: Arc<LanguageRegistry>,
- attachment_store: Arc<UserAttachmentStore>,
tool_registry: Arc<ToolRegistry>,
+ attachment_store: Arc<UserAttachmentStore>,
user_store: Model<UserStore>,
project_index: Option<Model<ProjectIndex>>,
workspace: WeakView<Workspace>,
@@ -429,14 +435,6 @@ impl AssistantChat {
}));
}
- fn debug_project_index(&mut self, _: &DebugProjectIndex, cx: &mut ViewContext<Self>) {
- if let Some(index) = &self.project_index {
- index.update(cx, |project_index, cx| {
- project_index.debug(cx).detach_and_log_err(cx)
- });
- }
- }
-
async fn request_completion(
this: WeakView<Self>,
mode: SubmitMode,
@@ -846,7 +844,6 @@ impl Render for AssistantChat {
.key_context("AssistantChat")
.on_action(cx.listener(Self::submit))
.on_action(cx.listener(Self::cancel))
- .on_action(cx.listener(Self::debug_project_index))
.text_color(Color::Default.color(cx))
.child(list(self.list_state.clone()).flex_1())
.child(Composer::new(
@@ -2768,7 +2768,7 @@ impl Editor {
indent.len = cmp::min(indent.len, start_point.column);
let start = selection.start;
let end = selection.end;
- let is_cursor = start == end;
+ let selection_is_empty = start == end;
let language_scope = buffer.language_scope_at(start);
let (comment_delimiter, insert_extra_newline) = if let Some(language) =
&language_scope
@@ -2802,13 +2802,18 @@ impl Editor {
pair_start,
)
});
+
// Comment extension on newline is allowed only for cursor selections
- let comment_delimiter = language.line_comment_prefixes().filter(|_| {
- let is_comment_extension_enabled =
- multi_buffer.settings_at(0, cx).extend_comment_on_newline;
- is_cursor && is_comment_extension_enabled
- });
- let get_comment_delimiter = |delimiters: &[Arc<str>]| {
+ let comment_delimiter = maybe!({
+ if !selection_is_empty {
+ return None;
+ }
+
+ if !multi_buffer.settings_at(0, cx).extend_comment_on_newline {
+ return None;
+ }
+
+ let delimiters = language.line_comment_prefixes();
let max_len_of_delimiter =
delimiters.iter().map(|delimiter| delimiter.len()).max()?;
let (snapshot, range) =
@@ -2837,12 +2842,7 @@ impl Editor {
} else {
None
}
- };
- let comment_delimiter = if let Some(delimiters) = comment_delimiter {
- get_comment_delimiter(delimiters)
- } else {
- None
- };
+ });
(comment_delimiter, insert_extra_newline)
} else {
(None, false)
@@ -7181,10 +7181,8 @@ impl Editor {
}
// If the language has line comments, toggle those.
- if let Some(full_comment_prefixes) = language
- .line_comment_prefixes()
- .filter(|prefixes| !prefixes.is_empty())
- {
+ let full_comment_prefixes = language.line_comment_prefixes();
+ if !full_comment_prefixes.is_empty() {
let first_prefix = full_comment_prefixes
.first()
.expect("prefixes is non-empty");
@@ -1818,7 +1818,7 @@ fn test_language_scope_at_with_javascript(cx: &mut AppContext) {
let snapshot = buffer.snapshot();
let config = snapshot.language_scope_at(0).unwrap();
- assert_eq!(config.line_comment_prefixes().unwrap(), &[Arc::from("// ")]);
+ assert_eq!(config.line_comment_prefixes(), &[Arc::from("// ")]);
// Both bracket pairs are enabled
assert_eq!(
config.brackets().map(|e| e.1).collect::<Vec<_>>(),
@@ -1828,10 +1828,7 @@ fn test_language_scope_at_with_javascript(cx: &mut AppContext) {
let string_config = snapshot
.language_scope_at(text.find("b\"").unwrap())
.unwrap();
- assert_eq!(
- string_config.line_comment_prefixes().unwrap(),
- &[Arc::from("// ")]
- );
+ assert_eq!(string_config.line_comment_prefixes(), &[Arc::from("// ")]);
// Second bracket pair is disabled
assert_eq!(
string_config.brackets().map(|e| e.1).collect::<Vec<_>>(),
@@ -1842,7 +1839,7 @@ fn test_language_scope_at_with_javascript(cx: &mut AppContext) {
let element_config = snapshot
.language_scope_at(text.find("<F>").unwrap())
.unwrap();
- assert_eq!(element_config.line_comment_prefixes(), None);
+ assert_eq!(element_config.line_comment_prefixes(), &[]);
assert_eq!(
element_config.block_comment_delimiters(),
Some((&"{/*".into(), &"*/}".into()))
@@ -1856,10 +1853,7 @@ fn test_language_scope_at_with_javascript(cx: &mut AppContext) {
let tag_config = snapshot
.language_scope_at(text.find(" d=").unwrap() + 1)
.unwrap();
- assert_eq!(
- tag_config.line_comment_prefixes().unwrap(),
- &[Arc::from("// ")]
- );
+ assert_eq!(tag_config.line_comment_prefixes(), &[Arc::from("// ")]);
assert_eq!(
tag_config.brackets().map(|e| e.1).collect::<Vec<_>>(),
&[true, true]
@@ -1870,9 +1864,7 @@ fn test_language_scope_at_with_javascript(cx: &mut AppContext) {
.language_scope_at(text.find('{').unwrap() + 1)
.unwrap();
assert_eq!(
- expression_in_element_config
- .line_comment_prefixes()
- .unwrap(),
+ expression_in_element_config.line_comment_prefixes(),
&[Arc::from("// ")]
);
assert_eq!(
@@ -1988,17 +1980,14 @@ fn test_language_scope_at_with_combined_injections(cx: &mut AppContext) {
let snapshot = buffer.snapshot();
let html_config = snapshot.language_scope_at(Point::new(2, 4)).unwrap();
- assert_eq!(html_config.line_comment_prefixes(), Some(&vec![]));
+ assert_eq!(html_config.line_comment_prefixes(), &[]);
assert_eq!(
html_config.block_comment_delimiters(),
Some((&"<!--".into(), &"-->".into()))
);
let ruby_config = snapshot.language_scope_at(Point::new(3, 12)).unwrap();
- assert_eq!(
- ruby_config.line_comment_prefixes().unwrap(),
- &[Arc::from("# ")]
- );
+ assert_eq!(ruby_config.line_comment_prefixes(), &[Arc::from("# ")]);
assert_eq!(ruby_config.block_comment_delimiters(), None);
buffer
@@ -20,6 +20,7 @@ mod task_context;
mod buffer_tests;
pub mod markdown;
+use crate::language_settings::SoftWrap;
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use collections::{HashMap, HashSet};
@@ -41,12 +42,11 @@ use smol::future::FutureExt as _;
use std::num::NonZeroU32;
use std::{
any::Any,
- cell::RefCell,
ffi::OsStr,
fmt::Debug,
hash::Hash,
mem,
- ops::Range,
+ ops::{DerefMut, Range},
path::{Path, PathBuf},
pin::Pin,
str,
@@ -74,8 +74,6 @@ pub use syntax_map::{OwnedSyntaxLayer, SyntaxLayer};
pub use text::LineEnding;
pub use tree_sitter::{Node, Parser, Tree, TreeCursor};
-use crate::language_settings::SoftWrap;
-
/// Initializes the `language` crate.
///
/// This should be called before making use of items from the create.
@@ -83,29 +81,30 @@ pub fn init(cx: &mut AppContext) {
language_settings::init(cx);
}
-thread_local! {
- static PARSER: RefCell<Parser> = {
- let mut parser = Parser::new();
- parser.set_wasm_store(WasmStore::new(WASM_ENGINE.clone()).unwrap()).unwrap();
- RefCell::new(parser)
- };
-}
+static QUERY_CURSORS: Mutex<Vec<QueryCursor>> = Mutex::new(vec![]);
+static PARSERS: Mutex<Vec<Parser>> = Mutex::new(vec![]);
pub fn with_parser<F, R>(func: F) -> R
where
F: FnOnce(&mut Parser) -> R,
{
- PARSER.with(|parser| {
- let mut parser = parser.borrow_mut();
- func(&mut parser)
- })
+ let mut parser = PARSERS.lock().pop().unwrap_or_else(|| {
+ let mut parser = Parser::new();
+ parser
+ .set_wasm_store(WasmStore::new(WASM_ENGINE.clone()).unwrap())
+ .unwrap();
+ parser
+ });
+ parser.set_included_ranges(&[]).unwrap();
+ let result = func(&mut parser);
+ PARSERS.lock().push(parser);
+ result
}
pub fn with_query_cursor<F, R>(func: F) -> R
where
F: FnOnce(&mut QueryCursor) -> R,
{
- use std::ops::DerefMut;
let mut cursor = QueryCursorHandle::new();
func(cursor.deref_mut())
}
@@ -1340,11 +1339,12 @@ impl LanguageScope {
/// Returns line prefix that is inserted in e.g. line continuations or
/// in `toggle comments` action.
- pub fn line_comment_prefixes(&self) -> Option<&Vec<Arc<str>>> {
+ pub fn line_comment_prefixes(&self) -> &[Arc<str>] {
Override::as_option(
self.config_override().map(|o| &o.line_comments),
Some(&self.language.config.line_comments),
)
+ .map_or(&[] as &[_], |e| e.as_slice())
}
pub fn block_comment_delimiters(&self) -> Option<(&Arc<str>, &Arc<str>)> {
@@ -1445,8 +1445,7 @@ impl Grammar {
}
fn parse_text(&self, text: &Rope, old_tree: Option<Tree>) -> Tree {
- PARSER.with(|parser| {
- let mut parser = parser.borrow_mut();
+ with_parser(|parser| {
parser
.set_language(&self.ts_language)
.expect("incompatible grammar");
@@ -3,8 +3,8 @@ use crate::{
all_language_settings, AllLanguageSettingsContent, LanguageSettingsContent,
},
task_context::ContextProvider,
- CachedLspAdapter, File, Language, LanguageConfig, LanguageId, LanguageMatcher,
- LanguageServerName, LspAdapter, LspAdapterDelegate, PARSER, PLAIN_TEXT,
+ with_parser, CachedLspAdapter, File, Language, LanguageConfig, LanguageId, LanguageMatcher,
+ LanguageServerName, LspAdapter, LspAdapterDelegate, PLAIN_TEXT,
};
use anyhow::{anyhow, Context as _, Result};
use collections::{hash_map, HashMap};
@@ -668,8 +668,7 @@ impl LanguageRegistry {
.file_stem()
.and_then(OsStr::to_str)
.ok_or_else(|| anyhow!("invalid grammar filename"))?;
- anyhow::Ok(PARSER.with(|parser| {
- let mut parser = parser.borrow_mut();
+ anyhow::Ok(with_parser(|parser| {
let mut store = parser.take_wasm_store().unwrap();
let grammar = store.load_language(&grammar_name, &wasm_bytes);
parser.set_wasm_store(store).unwrap();
@@ -1,10 +1,11 @@
#[cfg(test)]
mod syntax_map_tests;
-use crate::{Grammar, InjectionConfig, Language, LanguageId, LanguageRegistry};
+use crate::{
+ with_parser, Grammar, InjectionConfig, Language, LanguageId, LanguageRegistry, QUERY_CURSORS,
+};
use collections::HashMap;
use futures::FutureExt;
-use parking_lot::Mutex;
use std::{
borrow::Cow,
cmp::{self, Ordering, Reverse},
@@ -17,10 +18,6 @@ use sum_tree::{Bias, SeekTarget, SumTree};
use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint};
use tree_sitter::{Node, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree};
-use super::PARSER;
-
-static QUERY_CURSORS: Mutex<Vec<QueryCursor>> = Mutex::new(vec![]);
-
#[derive(Default)]
pub struct SyntaxMap {
snapshot: SyntaxSnapshot,
@@ -1177,8 +1174,7 @@ fn parse_text(
ranges: Vec<tree_sitter::Range>,
old_tree: Option<Tree>,
) -> anyhow::Result<Tree> {
- PARSER.with(|parser| {
- let mut parser = parser.borrow_mut();
+ with_parser(|parser| {
let mut chunks = text.chunks_in_range(start_byte..text.len());
parser.set_included_ranges(&ranges)?;
parser.set_language(&grammar.ts_language)?;
@@ -37,9 +37,12 @@ serde.workspace = true
serde_json.workspace = true
sha2.workspace = true
smol.workspace = true
+theme.workspace = true
tree-sitter.workspace = true
+ui. workspace = true
util. workspace = true
unindent.workspace = true
+workspace.workspace = true
worktree.workspace = true
[dev-dependencies]
@@ -54,3 +57,4 @@ project = { workspace = true, features = ["test-support"] }
tempfile.workspace = true
util = { workspace = true, features = ["test-support"] }
worktree = { workspace = true, features = ["test-support"] }
+workspace = { workspace = true, features = ["test-support"] }
@@ -1,9 +1,10 @@
-use language::{with_parser, with_query_cursor, Grammar};
+use language::{with_parser, with_query_cursor, Language};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::{
cmp::{self, Reverse},
ops::Range,
+ path::Path,
sync::Arc,
};
use tree_sitter::QueryCapture;
@@ -26,52 +27,95 @@ pub struct Chunk {
pub digest: [u8; 32],
}
-pub fn chunk_text(text: &str, grammar: Option<&Arc<Grammar>>) -> Vec<Chunk> {
- chunk_text_with_size_range(text, grammar, CHUNK_SIZE_RANGE)
+pub fn chunk_text(text: &str, language: Option<&Arc<Language>>, path: &Path) -> Vec<Chunk> {
+ chunk_text_with_size_range(text, language, path, CHUNK_SIZE_RANGE)
}
fn chunk_text_with_size_range(
text: &str,
- grammar: Option<&Arc<Grammar>>,
+ language: Option<&Arc<Language>>,
+ path: &Path,
size_config: ChunkSizeRange,
) -> Vec<Chunk> {
- let mut syntactic_ranges = Vec::new();
-
- if let Some(grammar) = grammar {
- if let Some(outline) = grammar.outline_config.as_ref() {
- let tree = with_parser(|parser| {
- parser.set_language(&grammar.ts_language).log_err()?;
- parser.parse(&text, None)
- });
+ let ranges = syntactic_ranges(text, language, path).unwrap_or_default();
+ chunk_text_with_syntactic_ranges(text, &ranges, size_config)
+}
- if let Some(tree) = tree {
- with_query_cursor(|cursor| {
- // Retrieve a list of ranges of outline items (types, functions, etc) in the document.
- // Omit single-line outline items (e.g. struct fields, constant declarations), because
- // we'll already be attempting to split on lines.
- syntactic_ranges = cursor
- .matches(&outline.query, tree.root_node(), text.as_bytes())
- .filter_map(|mat| {
- mat.captures
- .iter()
- .find_map(|QueryCapture { node, index }| {
- if *index == outline.item_capture_ix {
- if node.end_position().row > node.start_position().row {
- return Some(node.byte_range());
- }
- }
- None
- })
- })
- .collect::<Vec<_>>();
- syntactic_ranges
- .sort_unstable_by_key(|range| (range.start, Reverse(range.end)));
- });
- }
- }
+fn syntactic_ranges(
+ text: &str,
+ language: Option<&Arc<Language>>,
+ path: &Path,
+) -> Option<Vec<Range<usize>>> {
+ let language = language?;
+ let grammar = language.grammar()?;
+ let outline = grammar.outline_config.as_ref()?;
+ let tree = with_parser(|parser| {
+ parser.set_language(&grammar.ts_language).log_err()?;
+ parser.parse(&text, None)
+ });
+
+ let Some(tree) = tree else {
+ log::error!("failed to parse file {path:?} for chunking");
+ return None;
+ };
+
+ struct RowInfo {
+ offset: usize,
+ is_comment: bool,
}
- chunk_text_with_syntactic_ranges(text, &syntactic_ranges, size_config)
+ let scope = language.default_scope();
+ let line_comment_prefixes = scope.line_comment_prefixes();
+ let row_infos = text
+ .split('\n')
+ .map({
+ let mut offset = 0;
+ move |line| {
+ let line = line.trim_start();
+ let is_comment = line_comment_prefixes
+ .iter()
+ .any(|prefix| line.starts_with(prefix.as_ref()));
+ let result = RowInfo { offset, is_comment };
+ offset += line.len() + 1;
+ result
+ }
+ })
+ .collect::<Vec<_>>();
+
+ // Retrieve a list of ranges of outline items (types, functions, etc) in the document.
+ // Omit single-line outline items (e.g. struct fields, constant declarations), because
+ // we'll already be attempting to split on lines.
+ let mut ranges = with_query_cursor(|cursor| {
+ cursor
+ .matches(&outline.query, tree.root_node(), text.as_bytes())
+ .filter_map(|mat| {
+ mat.captures
+ .iter()
+ .find_map(|QueryCapture { node, index }| {
+ if *index == outline.item_capture_ix {
+ let mut start_offset = node.start_byte();
+ let mut start_row = node.start_position().row;
+ let end_offset = node.end_byte();
+ let end_row = node.end_position().row;
+
+ // Expand the range to include any preceding comments.
+ while start_row > 0 && row_infos[start_row - 1].is_comment {
+ start_offset = row_infos[start_row - 1].offset;
+ start_row -= 1;
+ }
+
+ if end_row > start_row {
+ return Some(start_offset..end_offset);
+ }
+ }
+ None
+ })
+ })
+ .collect::<Vec<_>>()
+ });
+
+ ranges.sort_unstable_by_key(|range| (range.start, Reverse(range.end)));
+ Some(ranges)
}
fn chunk_text_with_syntactic_ranges(
@@ -148,7 +192,7 @@ fn chunk_text_with_syntactic_ranges(
if !range.is_empty() {
chunks.push(Chunk {
range: range.clone(),
- digest: Sha256::digest(&text[range.clone()]).into(),
+ digest: Sha256::digest(&text[range]).into(),
});
}
@@ -177,6 +221,8 @@ mod tests {
Self { first_name, last_name, age }
}
+ /// Returns the first name
+ /// something something something
fn first_name(&self) -> &str {
&self.first_name
}
@@ -185,8 +231,8 @@ mod tests {
&self.last_name
}
- fn age(&self) -> usize {
- self.ages
+ fn age(&self) -> u32 {
+ self.age
}
}
"
@@ -194,7 +240,8 @@ mod tests {
let chunks = chunk_text_with_size_range(
&text,
- language.grammar(),
+ Some(&language),
+ Path::new("lib.rs"),
ChunkSizeRange {
min: text.find('}').unwrap(),
max: text.find("Self {").unwrap(),
@@ -209,8 +256,8 @@ mod tests {
&[
"struct Person {", // ...
"impl Person {",
- " fn first_name",
- " fn age",
+ " /// Returns the first name",
+ " fn last_name",
],
);
@@ -227,7 +274,8 @@ mod tests {
let chunks = chunk_text_with_size_range(
&text,
- language.grammar(),
+ Some(&language),
+ Path::new("lib.rs"),
ChunkSizeRange {
min: text.find('{').unwrap(),
max: text.find('V').unwrap(),
@@ -263,7 +311,8 @@ mod tests {
let chunks = chunk_text_with_size_range(
&text,
- language.grammar(),
+ Some(&language),
+ Path::new("lib.rs"),
ChunkSizeRange { min: 32, max: 64 },
);
@@ -331,33 +380,35 @@ mod tests {
#[test]
fn test_chunk_text() {
let text = "a\n".repeat(1000);
- let chunks = chunk_text(&text, None);
+ let chunks = chunk_text(&text, None, Path::new("lib.rs"));
assert_eq!(
chunks.len(),
((2000_f64) / (CHUNK_SIZE_RANGE.max as f64)).ceil() as usize
);
}
- fn rust_language() -> Language {
- Language::new(
- LanguageConfig {
- name: "Rust".into(),
- matcher: LanguageMatcher {
- path_suffixes: vec!["rs".to_string()],
+ fn rust_language() -> Arc<Language> {
+ Arc::new(
+ Language::new(
+ LanguageConfig {
+ name: "Rust".into(),
+ matcher: LanguageMatcher {
+ path_suffixes: vec!["rs".to_string()],
+ ..Default::default()
+ },
..Default::default()
},
- ..Default::default()
- },
- Some(tree_sitter_rust::language()),
- )
- .with_outline_query(
- "
+ Some(tree_sitter_rust::language()),
+ )
+ .with_outline_query(
+ "
(function_item name: (_) @name) @item
(impl_item type: (_) @name) @item
(struct_item name: (_) @name) @item
(field_declaration name: (_) @name) @item
",
+ )
+ .unwrap(),
)
- .unwrap()
}
}
@@ -0,0 +1,300 @@
+use crate::ProjectIndex;
+use gpui::{
+ canvas, div, list, uniform_list, AnyElement, AppContext, CursorStyle, EventEmitter,
+ FocusHandle, FocusableView, IntoElement, ListOffset, ListState, Model, MouseMoveEvent, Render,
+ UniformListScrollHandle, View,
+};
+use project::WorktreeId;
+use settings::Settings;
+use std::{path::Path, sync::Arc};
+use theme::ThemeSettings;
+use ui::prelude::*;
+use workspace::item::{Item, TabContentParams};
+
+pub struct ProjectIndexDebugView {
+ index: Model<ProjectIndex>,
+ rows: Vec<Row>,
+ selected_path: Option<PathState>,
+ hovered_row_ix: Option<usize>,
+ focus_handle: FocusHandle,
+ list_scroll_handle: UniformListScrollHandle,
+ _subscription: gpui::Subscription,
+}
+
+struct PathState {
+ path: Arc<Path>,
+ chunks: Vec<SharedString>,
+ list_state: ListState,
+}
+
+enum Row {
+ Worktree(Arc<Path>),
+ Entry(WorktreeId, Arc<Path>),
+}
+
+impl ProjectIndexDebugView {
+ pub fn new(index: Model<ProjectIndex>, cx: &mut ViewContext<Self>) -> Self {
+ let mut this = Self {
+ rows: Vec::new(),
+ list_scroll_handle: UniformListScrollHandle::new(),
+ selected_path: None,
+ hovered_row_ix: None,
+ focus_handle: cx.focus_handle(),
+ _subscription: cx.subscribe(&index, |this, _, _, cx| this.update_rows(cx)),
+ index,
+ };
+ this.update_rows(cx);
+ this
+ }
+
+ fn update_rows(&mut self, cx: &mut ViewContext<Self>) {
+ let worktree_indices = self.index.read(cx).worktree_indices(cx);
+ cx.spawn(|this, mut cx| async move {
+ let mut rows = Vec::new();
+
+ for index in worktree_indices {
+ let (root_path, worktree_id, worktree_paths) =
+ index.read_with(&cx, |index, cx| {
+ let worktree = index.worktree.read(cx);
+ (worktree.abs_path(), worktree.id(), index.paths(cx))
+ })?;
+ rows.push(Row::Worktree(root_path));
+ rows.extend(
+ worktree_paths
+ .await?
+ .into_iter()
+ .map(|path| Row::Entry(worktree_id, path)),
+ );
+ }
+
+ this.update(&mut cx, |this, cx| {
+ this.rows = rows;
+ cx.notify();
+ })
+ })
+ .detach();
+ }
+
+ fn handle_path_click(
+ &mut self,
+ worktree_id: WorktreeId,
+ file_path: Arc<Path>,
+ cx: &mut ViewContext<Self>,
+ ) -> Option<()> {
+ let project_index = self.index.read(cx);
+ let fs = project_index.fs.clone();
+ let worktree_index = project_index.worktree_index(worktree_id, cx)?.read(cx);
+ let root_path = worktree_index.worktree.read(cx).abs_path();
+ let chunks = worktree_index.chunks_for_path(file_path.clone(), cx);
+
+ cx.spawn(|this, mut cx| async move {
+ let chunks = chunks.await?;
+ let content = fs.load(&root_path.join(&file_path)).await?;
+ let chunks = chunks
+ .into_iter()
+ .map(|chunk| {
+ let mut start = chunk.chunk.range.start.min(content.len());
+ let mut end = chunk.chunk.range.end.min(content.len());
+ while !content.is_char_boundary(start) {
+ start += 1;
+ }
+ while !content.is_char_boundary(end) {
+ end -= 1;
+ }
+ content[start..end].to_string().into()
+ })
+ .collect::<Vec<_>>();
+
+ this.update(&mut cx, |this, cx| {
+ let view = cx.view().downgrade();
+ this.selected_path = Some(PathState {
+ path: file_path,
+ list_state: ListState::new(
+ chunks.len(),
+ gpui::ListAlignment::Top,
+ px(100.),
+ move |ix, cx| {
+ if let Some(view) = view.upgrade() {
+ view.update(cx, |view, cx| view.render_chunk(ix, cx))
+ } else {
+ div().into_any()
+ }
+ },
+ ),
+ chunks,
+ });
+ cx.notify();
+ })
+ })
+ .detach();
+ None
+ }
+
+ fn render_chunk(&mut self, ix: usize, cx: &mut ViewContext<Self>) -> AnyElement {
+ let buffer_font = ThemeSettings::get_global(cx).buffer_font.family.clone();
+ let Some(state) = &self.selected_path else {
+ return div().into_any();
+ };
+
+ let colors = cx.theme().colors();
+ let chunk = &state.chunks[ix];
+
+ div()
+ .text_ui(cx)
+ .w_full()
+ .font_family(buffer_font)
+ .child(
+ h_flex()
+ .justify_between()
+ .child(format!(
+ "chunk {} of {}. length: {}",
+ ix + 1,
+ state.chunks.len(),
+ chunk.len(),
+ ))
+ .child(
+ h_flex()
+ .child(
+ Button::new(("prev", ix), "prev")
+ .disabled(ix == 0)
+ .on_click(cx.listener(move |this, _, _| {
+ this.scroll_to_chunk(ix.saturating_sub(1))
+ })),
+ )
+ .child(
+ Button::new(("next", ix), "next")
+ .disabled(ix + 1 == state.chunks.len())
+ .on_click(
+ cx.listener(move |this, _, _| this.scroll_to_chunk(ix + 1)),
+ ),
+ ),
+ ),
+ )
+ .child(
+ div()
+ .bg(colors.editor_background)
+ .text_xs()
+ .child(chunk.clone()),
+ )
+ .into_any_element()
+ }
+
+ fn scroll_to_chunk(&mut self, ix: usize) {
+ if let Some(state) = self.selected_path.as_mut() {
+ state.list_state.scroll_to(ListOffset {
+ item_ix: ix,
+ offset_in_item: px(0.),
+ })
+ }
+ }
+}
+
+impl Render for ProjectIndexDebugView {
+ fn render(&mut self, cx: &mut gpui::ViewContext<'_, Self>) -> impl IntoElement {
+ if let Some(selected_path) = self.selected_path.as_ref() {
+ v_flex()
+ .child(
+ div()
+ .id("selected-path-name")
+ .child(
+ h_flex()
+ .justify_between()
+ .child(selected_path.path.to_string_lossy().to_string())
+ .child("x"),
+ )
+ .border_b_1()
+ .border_color(cx.theme().colors().border)
+ .cursor(CursorStyle::PointingHand)
+ .on_click(cx.listener(|this, _, cx| {
+ this.selected_path.take();
+ cx.notify();
+ })),
+ )
+ .child(list(selected_path.list_state.clone()).size_full())
+ .size_full()
+ .into_any_element()
+ } else {
+ let mut list = uniform_list(
+ cx.view().clone(),
+ "ProjectIndexDebugView",
+ self.rows.len(),
+ move |this, range, cx| {
+ this.rows[range]
+ .iter()
+ .enumerate()
+ .map(|(ix, row)| match row {
+ Row::Worktree(root_path) => div()
+ .id(ix)
+ .child(Label::new(root_path.to_string_lossy().to_string())),
+ Row::Entry(worktree_id, file_path) => div()
+ .id(ix)
+ .pl_8()
+ .child(Label::new(file_path.to_string_lossy().to_string()))
+ .on_mouse_move(cx.listener(move |this, _: &MouseMoveEvent, cx| {
+ if this.hovered_row_ix != Some(ix) {
+ this.hovered_row_ix = Some(ix);
+ cx.notify();
+ }
+ }))
+ .cursor(CursorStyle::PointingHand)
+ .on_click(cx.listener({
+ let worktree_id = *worktree_id;
+ let file_path = file_path.clone();
+ move |this, _, cx| {
+ this.handle_path_click(worktree_id, file_path.clone(), cx);
+ }
+ })),
+ })
+ .collect()
+ },
+ )
+ .track_scroll(self.list_scroll_handle.clone())
+ .size_full()
+ .text_bg(cx.theme().colors().background)
+ .into_any_element();
+
+ canvas(
+ move |bounds, cx| {
+ list.prepaint_as_root(bounds.origin, bounds.size.into(), cx);
+ list
+ },
+ |_, mut list, cx| list.paint(cx),
+ )
+ .size_full()
+ .into_any_element()
+ }
+ }
+}
+
+impl EventEmitter<()> for ProjectIndexDebugView {}
+
+impl Item for ProjectIndexDebugView {
+ type Event = ();
+
+ fn tab_content(&self, params: TabContentParams, _: &WindowContext<'_>) -> AnyElement {
+ Label::new("Project Index (Debug)")
+ .color(if params.selected {
+ Color::Default
+ } else {
+ Color::Muted
+ })
+ .into_any_element()
+ }
+
+ fn clone_on_split(
+ &self,
+ _: workspace::WorkspaceId,
+ cx: &mut ViewContext<Self>,
+ ) -> Option<View<Self>>
+ where
+ Self: Sized,
+ {
+ Some(cx.new_view(|cx| Self::new(self.index.clone(), cx)))
+ }
+}
+
+impl FocusableView for ProjectIndexDebugView {
+ fn focus_handle(&self, _: &AppContext) -> gpui::FocusHandle {
+ self.focus_handle.clone()
+ }
+}
@@ -1,5 +1,6 @@
mod chunking;
mod embedding;
+mod project_index_debug_view;
use anyhow::{anyhow, Context as _, Result};
use chunking::{chunk_text, Chunk};
@@ -31,6 +32,8 @@ use std::{
use util::ResultExt;
use worktree::LocalSnapshot;
+pub use project_index_debug_view::ProjectIndexDebugView;
+
pub struct SemanticIndex {
embedding_provider: Arc<dyn EmbeddingProvider>,
db_connection: heed::Env,
@@ -397,26 +400,35 @@ impl ProjectIndex {
Ok(result)
}
- pub fn debug(&self, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
- let indices = self
+ pub(crate) fn worktree_index(
+ &self,
+ worktree_id: WorktreeId,
+ cx: &AppContext,
+ ) -> Option<Model<WorktreeIndex>> {
+ for index in self.worktree_indices.values() {
+ if let WorktreeIndexHandle::Loaded { index, .. } = index {
+ if index.read(cx).worktree.read(cx).id() == worktree_id {
+ return Some(index.clone());
+ }
+ }
+ }
+ None
+ }
+
+ pub(crate) fn worktree_indices(&self, cx: &AppContext) -> Vec<Model<WorktreeIndex>> {
+ let mut result = self
.worktree_indices
.values()
- .filter_map(|worktree_index| {
- if let WorktreeIndexHandle::Loaded { index, .. } = worktree_index {
+ .filter_map(|index| {
+ if let WorktreeIndexHandle::Loaded { index, .. } = index {
Some(index.clone())
} else {
None
}
})
.collect::<Vec<_>>();
-
- cx.spawn(|_, mut cx| async move {
- eprintln!("semantic index contents:");
- for index in indices {
- index.update(&mut cx, |index, cx| index.debug(cx))?.await?
- }
- Ok(())
- })
+ result.sort_by_key(|index| index.read(cx).worktree.read(cx).id());
+ result
}
}
@@ -726,10 +738,8 @@ impl WorktreeIndex {
.language_for_file_path(&entry.path)
.await
.ok();
- let grammar =
- language.as_ref().and_then(|language| language.grammar());
let chunked_file = ChunkedFile {
- chunks: chunk_text(&text, grammar),
+ chunks: chunk_text(&text, language.as_ref(), &entry.path),
handle,
path: entry.path,
mtime: entry.mtime,
@@ -861,7 +871,6 @@ impl WorktreeIndex {
db.put(&mut txn, &key, file)?;
}
txn.commit()?;
- eprintln!("committed {:?}", embedded_files.len());
drop(embedded_files);
log::debug!("committed");
@@ -871,18 +880,38 @@ impl WorktreeIndex {
})
}
- fn debug(&mut self, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
+ fn paths(&self, cx: &AppContext) -> Task<Result<Vec<Arc<Path>>>> {
let connection = self.db_connection.clone();
let db = self.db;
cx.background_executor().spawn(async move {
let tx = connection
.read_txn()
.context("failed to create read transaction")?;
- for record in db.iter(&tx)? {
- let (key, _) = record?;
- eprintln!("{}", path_for_db_key(key));
- }
- Ok(())
+ let result = db
+ .iter(&tx)?
+ .map(|entry| Ok(entry?.1.path.clone()))
+ .collect::<Result<Vec<Arc<Path>>>>();
+ drop(tx);
+ result
+ })
+ }
+
+ fn chunks_for_path(
+ &self,
+ path: Arc<Path>,
+ cx: &AppContext,
+ ) -> Task<Result<Vec<EmbeddedChunk>>> {
+ let connection = self.db_connection.clone();
+ let db = self.db;
+ cx.background_executor().spawn(async move {
+ let tx = connection
+ .read_txn()
+ .context("failed to create read transaction")?;
+ Ok(db
+ .get(&tx, &db_key_for_path(&path))?
+ .ok_or_else(|| anyhow!("no such path"))?
+ .chunks
+ .clone())
})
}
@@ -927,7 +956,7 @@ struct EmbeddedFile {
chunks: Vec<EmbeddedChunk>,
}
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug, Serialize, Deserialize)]
struct EmbeddedChunk {
chunk: Chunk,
embedding: Embedding,
@@ -981,10 +1010,6 @@ fn db_key_for_path(path: &Arc<Path>) -> String {
path.to_string_lossy().replace('/', "\0")
}
-fn path_for_db_key(key: &str) -> String {
- key.replace('\0', "/")
-}
-
#[cfg(test)]
mod tests {
use super::*;