Detailed changes
@@ -8134,6 +8134,16 @@ dependencies = [
"tree-sitter",
]
+[[package]]
+name = "tree-sitter-toml"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca517f578a98b23d20780247cc2688407fa81effad5b627a5a364ec3339b53e8"
+dependencies = [
+ "cc",
+ "tree-sitter",
+]
+
[[package]]
name = "tree-sitter-typescript"
version = "0.20.2"
@@ -8508,8 +8518,8 @@ dependencies = [
"theme",
"tiktoken-rs 0.5.0",
"tree-sitter",
- "tree-sitter-javascript",
"tree-sitter-rust",
+ "tree-sitter-toml 0.20.0",
"tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)",
"unindent",
"util",
@@ -9560,7 +9570,7 @@ dependencies = [
"tree-sitter-ruby",
"tree-sitter-rust",
"tree-sitter-scheme",
- "tree-sitter-toml",
+ "tree-sitter-toml 0.5.1",
"tree-sitter-typescript 0.20.2 (git+https://github.com/tree-sitter/tree-sitter-typescript?rev=5d20856f34315b068c41edaee2ac8a100081d259)",
"tree-sitter-yaml",
"unindent",
@@ -51,6 +51,6 @@ tempdir.workspace = true
ctor.workspace = true
env_logger.workspace = true
-tree-sitter-javascript = "*"
tree-sitter-typescript = "*"
tree-sitter-rust = "*"
+tree-sitter-toml = "*"
@@ -13,6 +13,9 @@ pub struct Document {
const CODE_CONTEXT_TEMPLATE: &str =
"The below code snippet is from file '<path>'\n\n```<language>\n<item>\n```";
+const ENTIRE_FILE_TEMPLATE: &str =
+ "The below snippet is from file '<path>'\n\n```<language>\n<item>\n```";
+pub const PARSEABLE_ENTIRE_FILE_TYPES: [&str; 3] = ["TOML", "YAML", "JSON"];
pub struct CodeContextRetriever {
pub parser: Parser,
@@ -27,12 +30,35 @@ impl CodeContextRetriever {
}
}
+ fn _parse_entire_file(
+ &self,
+ relative_path: &Path,
+ language_name: Arc<str>,
+ content: &str,
+ ) -> Result<Vec<Document>> {
+ let document_span = ENTIRE_FILE_TEMPLATE
+ .replace("<path>", relative_path.to_string_lossy().as_ref())
+ .replace("<language>", language_name.as_ref())
+ .replace("item", &content);
+
+ Ok(vec![Document {
+ range: 0..content.len(),
+ content: document_span,
+ embedding: Vec::new(),
+ name: language_name.to_string(),
+ }])
+ }
+
pub fn parse_file(
&mut self,
relative_path: &Path,
content: &str,
language: Arc<Language>,
) -> Result<Vec<Document>> {
+ if PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref()) {
+ return self._parse_entire_file(relative_path, language.name(), &content);
+ }
+
let grammar = language
.grammar()
.ok_or_else(|| anyhow!("no grammar for language"))?;
@@ -19,7 +19,7 @@ use gpui::{
use language::{Language, LanguageRegistry};
use modal::{SemanticSearch, SemanticSearchDelegate, Toggle};
use parking_lot::Mutex;
-use parsing::{CodeContextRetriever, Document};
+use parsing::{CodeContextRetriever, Document, PARSEABLE_ENTIRE_FILE_TYPES};
use project::{Fs, Project, WorktreeId};
use smol::channel;
use std::{
@@ -537,10 +537,11 @@ impl VectorStore {
.language_for_file(&absolute_path, None)
.await
{
- if language
- .grammar()
- .and_then(|grammar| grammar.embedding_config.as_ref())
- .is_none()
+ if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref())
+ && language
+ .grammar()
+ .and_then(|grammar| grammar.embedding_config.as_ref())
+ .is_none()
{
continue;
}
@@ -56,6 +56,9 @@ async fn test_vector_store(cx: &mut TestAppContext) {
println!(\"bbbb!\");
}
".unindent(),
+ "file3.toml": "
+ ZZZZZZZ = 5
+ ".unindent(),
}
}),
)
@@ -63,7 +66,9 @@ async fn test_vector_store(cx: &mut TestAppContext) {
let languages = Arc::new(LanguageRegistry::new(Task::ready(())));
let rust_language = rust_lang();
+ let toml_language = toml_lang();
languages.add(rust_language);
+ languages.add(toml_language);
let db_dir = tempdir::TempDir::new("vector-store").unwrap();
let db_path = db_dir.path().join("db.sqlite");
@@ -87,7 +92,7 @@ async fn test_vector_store(cx: &mut TestAppContext) {
.update(cx, |store, cx| store.index_project(project.clone(), cx))
.await
.unwrap();
- assert_eq!(file_count, 2);
+ assert_eq!(file_count, 3);
cx.foreground().run_until_parked();
store.update(cx, |store, _cx| {
assert_eq!(
@@ -578,3 +583,14 @@ fn rust_lang() -> Arc<Language> {
.unwrap(),
)
}
+
+fn toml_lang() -> Arc<Language> {
+ Arc::new(Language::new(
+ LanguageConfig {
+ name: "TOML".into(),
+ path_suffixes: vec!["toml".into()],
+ ..Default::default()
+ },
+ Some(tree_sitter_toml::language()),
+ ))
+}