update jsx family of languages for preceeding comments and nested exports

KCaverly created

Change summary

Cargo.lock                                        |   2 
crates/vector_store/Cargo.toml                    |   5 
crates/vector_store/src/vector_store_tests.rs     | 242 ++++++++++++++++
crates/zed/src/languages/javascript/embedding.scm | 139 +++++---
crates/zed/src/languages/tsx/embedding.scm        |  85 ++++-
crates/zed/src/languages/typescript/embedding.scm |  85 ++++-
6 files changed, 458 insertions(+), 100 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -8508,7 +8508,9 @@ dependencies = [
  "theme",
  "tiktoken-rs 0.5.0",
  "tree-sitter",
+ "tree-sitter-javascript",
  "tree-sitter-rust",
+ "tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "unindent",
  "util",
  "workspace",

crates/vector_store/Cargo.toml 🔗

@@ -44,10 +44,13 @@ project = { path = "../project", features = ["test-support"] }
 rpc = { path = "../rpc", features = ["test-support"] }
 workspace = { path = "../workspace", features = ["test-support"] }
 settings = { path = "../settings", features = ["test-support"]}
-tree-sitter-rust = "*"
 
 rand.workspace = true
 unindent.workspace = true
 tempdir.workspace = true
 ctor.workspace = true
 env_logger.workspace = true
+
+tree-sitter-javascript = "*"
+tree-sitter-typescript = "*"
+tree-sitter-rust = "*"

crates/vector_store/src/vector_store_tests.rs 🔗

@@ -144,7 +144,7 @@ async fn test_vector_store(cx: &mut TestAppContext) {
 }
 
 #[gpui::test]
-async fn test_code_context_retrieval() {
+async fn test_code_context_retrieval_rust() {
     let language = rust_lang();
     let mut retriever = CodeContextRetriever::new();
 
@@ -200,6 +200,142 @@ async fn test_code_context_retrieval() {
     );
 }
 
+#[gpui::test]
+async fn test_code_context_retrieval_javascript() {
+    let language = js_lang();
+    let mut retriever = CodeContextRetriever::new();
+
+    let text = "
+/* globals importScripts, backend */
+function _authorize() {}
+
+/**
+ * Sometimes the frontend build is way faster than backend.
+ */
+export async function authorizeBank() {
+    _authorize(pushModal, upgradingAccountId, {});
+}
+
+export class SettingsPage {
+    /* This is a test setting */
+    constructor(page) {
+        this.page = page;
+    }
+}
+
+/* This is a test comment */
+class TestClass {}
+
+/* Schema for editor_events in Clickhouse. */
+export interface ClickhouseEditorEvent {
+    installation_id: string
+    operation: string
+}
+";
+
+    let parsed_files = retriever
+        .parse_file(Path::new("foo.js"), &text, language)
+        .unwrap();
+
+    let test_documents = &[
+        Document {
+            name: "function _authorize".into(),
+            range: text.find("function _authorize").unwrap()..(text.find("}").unwrap() + 1),
+            content: "
+                    The below code snippet is from file 'foo.js'
+
+                    ```javascript
+                    /* globals importScripts, backend */
+                    function _authorize() {}
+                    ```"
+            .unindent(),
+            embedding: vec![],
+        },
+        Document {
+            name: "async function authorizeBank".into(),
+            range: text.find("export async").unwrap()..224,
+            content: "
+                    The below code snippet is from file 'foo.js'
+
+                    ```javascript
+                    /**
+                     * Sometimes the frontend build is way faster than backend.
+                     */
+                    export async function authorizeBank() {
+                        _authorize(pushModal, upgradingAccountId, {});
+                    }
+                    ```"
+            .unindent(),
+            embedding: vec![],
+        },
+        Document {
+            name: "class SettingsPage".into(),
+            range: 226..344,
+            content: "
+                    The below code snippet is from file 'foo.js'
+
+                    ```javascript
+                    export class SettingsPage {
+                        /* This is a test setting */
+                        constructor(page) {
+                            this.page = page;
+                        }
+                    }
+                    ```"
+            .unindent(),
+            embedding: vec![],
+        },
+        Document {
+            name: "constructor".into(),
+            range: 291..342,
+            content: "
+                The below code snippet is from file 'foo.js'
+
+                ```javascript
+                /* This is a test setting */
+                constructor(page) {
+                        this.page = page;
+                    }
+                ```"
+            .unindent(),
+            embedding: vec![],
+        },
+        Document {
+            name: "class TestClass".into(),
+            range: 375..393,
+            content: "
+                    The below code snippet is from file 'foo.js'
+
+                    ```javascript
+                    /* This is a test comment */
+                    class TestClass {}
+                    ```"
+            .unindent(),
+            embedding: vec![],
+        },
+        Document {
+            name: "interface ClickhouseEditorEvent".into(),
+            range: 441..533,
+            content: "
+                    The below code snippet is from file 'foo.js'
+
+                    ```javascript
+                    /* Schema for editor_events in Clickhouse. */
+                    export interface ClickhouseEditorEvent {
+                        installation_id: string
+                        operation: string
+                    }
+                    ```"
+            .unindent(),
+            embedding: vec![],
+        },
+    ];
+
+    for idx in 0..test_documents.len() {
+        assert_eq!(test_documents[idx], parsed_files[idx]);
+    }
+}
+
 #[gpui::test]
 fn test_dot_product(mut rng: StdRng) {
     assert_eq!(dot(&[1., 0., 0., 0., 0.], &[0., 1., 0., 0., 0.]), 0.);
@@ -271,6 +407,110 @@ impl EmbeddingProvider for FakeEmbeddingProvider {
     }
 }
 
+fn js_lang() -> Arc<Language> {
+    Arc::new(
+        Language::new(
+            LanguageConfig {
+                name: "Javascript".into(),
+                path_suffixes: vec!["js".into()],
+                ..Default::default()
+            },
+            Some(tree_sitter_typescript::language_tsx()),
+        )
+        .with_embedding_query(
+            &r#"
+
+            (
+                (comment)* @context
+                .
+                (export_statement
+                    (function_declaration
+                        "async"? @name
+                        "function" @name
+                        name: (_) @name)) @item
+                    )
+
+            (
+                (comment)* @context
+                .
+                (function_declaration
+                    "async"? @name
+                    "function" @name
+                    name: (_) @name) @item
+                    )
+
+            (
+                (comment)* @context
+                .
+                (export_statement
+                    (class_declaration
+                        "class" @name
+                        name: (_) @name)) @item
+                    )
+
+            (
+                (comment)* @context
+                .
+                (class_declaration
+                    "class" @name
+                    name: (_) @name) @item
+                    )
+
+            (
+                (comment)* @context
+                .
+                (method_definition
+                    [
+                        "get"
+                        "set"
+                        "async"
+                        "*"
+                        "static"
+                    ]* @name
+                    name: (_) @name) @item
+                )
+
+            (
+                (comment)* @context
+                .
+                (export_statement
+                    (interface_declaration
+                        "interface" @name
+                        name: (_) @name)) @item
+                )
+
+            (
+                (comment)* @context
+                .
+                (interface_declaration
+                    "interface" @name
+                    name: (_) @name) @item
+                )
+
+            (
+                (comment)* @context
+                .
+                (export_statement
+                    (enum_declaration
+                        "enum" @name
+                        name: (_) @name)) @item
+                )
+
+            (
+                (comment)* @context
+                .
+                (enum_declaration
+                    "enum" @name
+                    name: (_) @name) @item
+                )
+
+                    "#
+            .unindent(),
+        )
+        .unwrap(),
+    )
+}
+
 fn rust_lang() -> Arc<Language> {
     Arc::new(
         Language::new(

crates/zed/src/languages/javascript/embedding.scm 🔗

@@ -1,56 +1,83 @@
-; (internal_module
-;     "namespace" @context
-;     name: (_) @name) @item
-
-(enum_declaration
-    "enum" @context
-    name: (_) @name) @item
-
-(function_declaration
-    "async"? @context
-    "function" @context
-    name: (_) @name) @item
-
-(interface_declaration
-    "interface" @context
-    name: (_) @name) @item
-
-; (program
-;     (export_statement
-;         (lexical_declaration
-;             ["let" "const"] @context
-;             (variable_declarator
-;                 name: (_) @name) @item)))
-
-(program
-    (lexical_declaration
-        ["let" "const"] @context
-        (variable_declarator
-            name: (_) @name) @item))
-
-(class_declaration
-    "class" @context
-    name: (_) @name) @item
-
-(method_definition
-    [
-        "get"
-        "set"
-        "async"
-        "*"
-        "readonly"
-        "static"
-        (override_modifier)
-        (accessibility_modifier)
-        ]* @context
-    name: (_) @name) @item
-
-; (public_field_definition
-;     [
-;         "declare"
-;         "readonly"
-;         "abstract"
-;         "static"
-;         (accessibility_modifier)
-;         ]* @context
-;     name: (_) @name) @item
+(
+    (comment)* @context
+    .
+    (export_statement
+        (function_declaration
+            "async"? @name
+            "function" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (function_declaration
+        "async"? @name
+        "function" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (class_declaration
+            "class" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (class_declaration
+        "class" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (method_definition
+        [
+            "get"
+            "set"
+            "async"
+            "*"
+            "static"
+            ]* @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (interface_declaration
+            "interface" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (interface_declaration
+        "interface" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (enum_declaration
+            "enum" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (enum_declaration
+        "enum" @name
+        name: (_) @name) @item
+    )

crates/zed/src/languages/tsx/embedding.scm 🔗

@@ -1,33 +1,29 @@
 (
     (comment)* @context
     .
-    (enum_declaration
-        "enum" @context
-        name: (_) @name) @item
+    (export_statement
+        (function_declaration
+            "async"? @name
+            "function" @name
+            name: (_) @name)) @item
     )
 
 (
     (comment)* @context
     .
-    [
-        (export_statement
-            (function_declaration
-                "async"? @name
-                "function" @name
-                name: (_) @name)
-            ) @item
-        (function_declaration
-            "async"? @name
-            "function" @name
-            name: (_) @name) @item
-        ])
+    (function_declaration
+        "async"? @name
+        "function" @name
+        name: (_) @name) @item
+    )
 
 (
     (comment)* @context
     .
-    (interface_declaration
-        "interface" @name
-        name: (_) @name) @item
+    (export_statement
+        (class_declaration
+            "class" @name
+            name: (_) @name)) @item
     )
 
 (
@@ -47,10 +43,57 @@
             "set"
             "async"
             "*"
-            "readonly"
             "static"
-            (override_modifier)
-            (accessibility_modifier)
             ]* @name
         name: (_) @name) @item
     )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (interface_declaration
+            "interface" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (interface_declaration
+        "interface" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (enum_declaration
+            "enum" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (enum_declaration
+        "enum" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (type_alias_declaration
+            "type" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (type_alias_declaration
+        "type" @name
+        name: (_) @name) @item)

crates/zed/src/languages/typescript/embedding.scm 🔗

@@ -1,33 +1,29 @@
 (
     (comment)* @context
     .
-    (enum_declaration
-        "enum" @context
-        name: (_) @name) @item
+    (export_statement
+        (function_declaration
+            "async"? @name
+            "function" @name
+            name: (_) @name)) @item
     )
 
 (
     (comment)* @context
     .
-    [
-        (export_statement
-            (function_declaration
-                "async"? @name
-                "function" @name
-                name: (_) @name)
-            ) @item
-        (function_declaration
-            "async"? @name
-            "function" @name
-            name: (_) @name) @item
-    ])
+    (function_declaration
+        "async"? @name
+        "function" @name
+        name: (_) @name) @item
+    )
 
 (
     (comment)* @context
     .
-    (interface_declaration
-        "interface" @name
-        name: (_) @name) @item
+    (export_statement
+        (class_declaration
+            "class" @name
+            name: (_) @name)) @item
     )
 
 (
@@ -47,10 +43,57 @@
             "set"
             "async"
             "*"
-            "readonly"
             "static"
-            (override_modifier)
-            (accessibility_modifier)
             ]* @name
         name: (_) @name) @item
     )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (interface_declaration
+            "interface" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (interface_declaration
+        "interface" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (enum_declaration
+            "enum" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (enum_declaration
+        "enum" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (type_alias_declaration
+            "type" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (type_alias_declaration
+        "type" @name
+        name: (_) @name) @item)