add embedding treesitter queries for elixir

KCaverly created

Change summary

Cargo.lock                                    |  13 +
crates/vector_store/Cargo.toml                |   1 
crates/vector_store/src/vector_store_tests.rs | 182 +++++++++++++++++++++
crates/zed/src/languages/elixir/embedding.scm |  27 +++
4 files changed, 222 insertions(+), 1 deletion(-)

Detailed changes

Cargo.lock 🔗

@@ -7982,6 +7982,16 @@ dependencies = [
  "tree-sitter",
 ]
 
+[[package]]
+name = "tree-sitter-elixir"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a9916f3e1c80b3c8aab8582604e97e8720cb9b893489b347cf999f80f9d469e"
+dependencies = [
+ "cc",
+ "tree-sitter",
+]
+
 [[package]]
 name = "tree-sitter-elixir"
 version = "0.1.0"
@@ -8519,6 +8529,7 @@ dependencies = [
  "tiktoken-rs 0.5.0",
  "tree-sitter",
  "tree-sitter-cpp",
+ "tree-sitter-elixir 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "tree-sitter-rust",
  "tree-sitter-toml 0.20.0",
  "tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -9558,7 +9569,7 @@ dependencies = [
  "tree-sitter-c",
  "tree-sitter-cpp",
  "tree-sitter-css",
- "tree-sitter-elixir",
+ "tree-sitter-elixir 0.1.0 (git+https://github.com/elixir-lang/tree-sitter-elixir?rev=4ba9dab6e2602960d95b2b625f3386c27e08084e)",
  "tree-sitter-embedded-template",
  "tree-sitter-go",
  "tree-sitter-heex",

crates/vector_store/Cargo.toml 🔗

@@ -55,3 +55,4 @@ tree-sitter-typescript = "*"
 tree-sitter-rust = "*"
 tree-sitter-toml = "*"
 tree-sitter-cpp = "*"
+tree-sitter-elixir = "*"

crates/vector_store/src/vector_store_tests.rs 🔗

@@ -342,6 +342,143 @@ async fn test_code_context_retrieval_javascript() {
     }
 }
 
+#[gpui::test]
+async fn test_code_context_retrieval_elixir() {
+    let language = elixir_lang();
+    let mut retriever = CodeContextRetriever::new();
+
+    let text = r#"
+defmodule File.Stream do
+    @moduledoc """
+    Defines a `File.Stream` struct returned by `File.stream!/3`.
+
+    The following fields are public:
+
+    * `path`          - the file path
+    * `modes`         - the file modes
+    * `raw`           - a boolean indicating if bin functions should be used
+    * `line_or_bytes` - if reading should read lines or a given number of bytes
+    * `node`          - the node the file belongs to
+
+    """
+
+    defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
+
+    @type t :: %__MODULE__{}
+
+    @doc false
+    def __build__(path, modes, line_or_bytes) do
+    raw = :lists.keyfind(:encoding, 1, modes) == false
+
+    modes =
+        case raw do
+        true ->
+            case :lists.keyfind(:read_ahead, 1, modes) do
+            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
+            {:read_ahead, _} -> [:raw | modes]
+            false -> [:raw, :read_ahead | modes]
+            end
+
+        false ->
+            modes
+        end
+
+    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
+
+    end
+"#
+    .unindent();
+
+    let parsed_files = retriever
+        .parse_file(Path::new("foo.ex"), &text, language)
+        .unwrap();
+
+    let test_documents = &[
+        Document{
+            name: "defmodule File.Stream".into(),
+            range: 0..1132,
+            content: r#"
+                The below code snippet is from file 'foo.ex'
+
+                ```elixir
+                defmodule File.Stream do
+                    @moduledoc """
+                    Defines a `File.Stream` struct returned by `File.stream!/3`.
+
+                    The following fields are public:
+
+                    * `path`          - the file path
+                    * `modes`         - the file modes
+                    * `raw`           - a boolean indicating if bin functions should be used
+                    * `line_or_bytes` - if reading should read lines or a given number of bytes
+                    * `node`          - the node the file belongs to
+
+                    """
+
+                    defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
+
+                    @type t :: %__MODULE__{}
+
+                    @doc false
+                    def __build__(path, modes, line_or_bytes) do
+                    raw = :lists.keyfind(:encoding, 1, modes) == false
+
+                    modes =
+                        case raw do
+                        true ->
+                            case :lists.keyfind(:read_ahead, 1, modes) do
+                            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
+                            {:read_ahead, _} -> [:raw | modes]
+                            false -> [:raw, :read_ahead | modes]
+                            end
+
+                        false ->
+                            modes
+                        end
+
+                    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
+
+                    end
+                ```"#.unindent(),
+            embedding: vec![],
+        },
+        Document {
+        name: "def __build__".into(),
+        range: 574..1132,
+        content: r#"
+The below code snippet is from file 'foo.ex'
+
+```elixir
+@doc false
+def __build__(path, modes, line_or_bytes) do
+    raw = :lists.keyfind(:encoding, 1, modes) == false
+
+    modes =
+        case raw do
+        true ->
+            case :lists.keyfind(:read_ahead, 1, modes) do
+            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
+            {:read_ahead, _} -> [:raw | modes]
+            false -> [:raw, :read_ahead | modes]
+            end
+
+        false ->
+            modes
+        end
+
+    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
+
+    end
+```"#
+            .unindent(),
+        embedding: vec![],
+    }];
+
+    for idx in 0..test_documents.len() {
+        assert_eq!(test_documents[idx], parsed_files[idx]);
+    }
+}
+
 #[gpui::test]
 async fn test_code_context_retrieval_cpp() {
     let language = cpp_lang();
@@ -850,3 +987,48 @@ fn cpp_lang() -> Arc<Language> {
         .unwrap(),
     )
 }
+
+fn elixir_lang() -> Arc<Language> {
+    Arc::new(
+        Language::new(
+            LanguageConfig {
+                name: "Elixir".into(),
+                path_suffixes: vec!["rs".into()],
+                ..Default::default()
+            },
+            Some(tree_sitter_elixir::language()),
+        )
+        .with_embedding_query(
+            r#"
+            (
+                (unary_operator
+                    operator: "@"
+                    operand: (call
+                        target: (identifier) @unary
+                        (#match? @unary "^(doc)$"))
+                    ) @context
+                .
+                (call
+                target: (identifier) @name
+                (arguments
+                [
+                (identifier) @name
+                (call
+                target: (identifier) @name)
+                (binary_operator
+                left: (call
+                target: (identifier) @name)
+                operator: "when")
+                ])
+                (#match? @name "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @item
+                )
+
+            (call
+                target: (identifier) @name
+                (arguments (alias) @name)
+                (#match? @name "^(defmodule|defprotocol)$")) @item
+            "#,
+        )
+        .unwrap(),
+    )
+}

crates/zed/src/languages/elixir/embedding.scm 🔗

@@ -0,0 +1,27 @@
+(
+    (unary_operator
+        operator: "@"
+        operand: (call
+            target: (identifier) @unary
+            (#match? @unary "^(doc)$"))
+        ) @context
+    .
+    (call
+        target: (identifier) @name
+        (arguments
+            [
+            (identifier) @name
+            (call
+                target: (identifier) @name)
+                (binary_operator
+                    left: (call
+                    target: (identifier) @name)
+                    operator: "when")
+            ])
+        (#match? @name "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @item
+        )
+
+    (call
+        target: (identifier) @name
+        (arguments (alias) @name)
+        (#match? @name "^(defmodule|defprotocol)$")) @item