semantic_index_tests.rs

   1use crate::{
   2    db::dot,
   3    embedding::EmbeddingProvider,
   4    parsing::{CodeContextRetriever, Document},
   5    semantic_index_settings::SemanticIndexSettings,
   6    SemanticIndex,
   7};
   8use anyhow::Result;
   9use async_trait::async_trait;
  10use gpui::{Task, TestAppContext};
  11use language::{Language, LanguageConfig, LanguageRegistry};
  12use project::{project_settings::ProjectSettings, FakeFs, Fs, Project};
  13use rand::{rngs::StdRng, Rng};
  14use serde_json::json;
  15use settings::SettingsStore;
  16use std::{
  17    path::Path,
  18    sync::{
  19        atomic::{self, AtomicUsize},
  20        Arc,
  21    },
  22};
  23use unindent::Unindent;
  24
  25#[ctor::ctor]
  26fn init_logger() {
  27    if std::env::var("RUST_LOG").is_ok() {
  28        env_logger::init();
  29    }
  30}
  31
  32#[gpui::test]
  33async fn test_semantic_index(cx: &mut TestAppContext) {
  34    cx.update(|cx| {
  35        cx.set_global(SettingsStore::test(cx));
  36        settings::register::<SemanticIndexSettings>(cx);
  37        settings::register::<ProjectSettings>(cx);
  38    });
  39
  40    let fs = FakeFs::new(cx.background());
  41    fs.insert_tree(
  42        "/the-root",
  43        json!({
  44            "src": {
  45                "file1.rs": "
  46                    fn aaa() {
  47                        println!(\"aaaa!\");
  48                    }
  49
  50                    fn zzzzzzzzz() {
  51                        println!(\"SLEEPING\");
  52                    }
  53                ".unindent(),
  54                "file2.rs": "
  55                    fn bbb() {
  56                        println!(\"bbbb!\");
  57                    }
  58                ".unindent(),
  59                "file3.toml": "
  60                    ZZZZZZZ = 5
  61                    ".unindent(),
  62            }
  63        }),
  64    )
  65    .await;
  66
  67    let languages = Arc::new(LanguageRegistry::new(Task::ready(())));
  68    let rust_language = rust_lang();
  69    let toml_language = toml_lang();
  70    languages.add(rust_language);
  71    languages.add(toml_language);
  72
  73    let db_dir = tempdir::TempDir::new("vector-store").unwrap();
  74    let db_path = db_dir.path().join("db.sqlite");
  75
  76    let embedding_provider = Arc::new(FakeEmbeddingProvider::default());
  77    let store = SemanticIndex::new(
  78        fs.clone(),
  79        db_path,
  80        embedding_provider.clone(),
  81        languages,
  82        cx.to_async(),
  83    )
  84    .await
  85    .unwrap();
  86
  87    let project = Project::test(fs.clone(), ["/the-root".as_ref()], cx).await;
  88    let worktree_id = project.read_with(cx, |project, cx| {
  89        project.worktrees(cx).next().unwrap().read(cx).id()
  90    });
  91    let (file_count, outstanding_file_count) = store
  92        .update(cx, |store, cx| store.index_project(project.clone(), cx))
  93        .await
  94        .unwrap();
  95    assert_eq!(file_count, 3);
  96    cx.foreground().run_until_parked();
  97    assert_eq!(*outstanding_file_count.borrow(), 0);
  98
  99    let search_results = store
 100        .update(cx, |store, cx| {
 101            store.search_project(project.clone(), "aaaa".to_string(), 5, cx)
 102        })
 103        .await
 104        .unwrap();
 105
 106    assert_eq!(search_results[0].byte_range.start, 0);
 107    assert_eq!(search_results[0].name, "aaa");
 108    assert_eq!(search_results[0].worktree_id, worktree_id);
 109
 110    fs.save(
 111        "/the-root/src/file2.rs".as_ref(),
 112        &"
 113            fn dddd() { println!(\"ddddd!\"); }
 114            struct pqpqpqp {}
 115        "
 116        .unindent()
 117        .into(),
 118        Default::default(),
 119    )
 120    .await
 121    .unwrap();
 122
 123    cx.foreground().run_until_parked();
 124
 125    let prev_embedding_count = embedding_provider.embedding_count();
 126    let (file_count, outstanding_file_count) = store
 127        .update(cx, |store, cx| store.index_project(project.clone(), cx))
 128        .await
 129        .unwrap();
 130    assert_eq!(file_count, 1);
 131
 132    cx.foreground().run_until_parked();
 133    assert_eq!(*outstanding_file_count.borrow(), 0);
 134
 135    assert_eq!(
 136        embedding_provider.embedding_count() - prev_embedding_count,
 137        2
 138    );
 139}
 140
 141#[gpui::test]
 142async fn test_code_context_retrieval_rust() {
 143    let language = rust_lang();
 144    let mut retriever = CodeContextRetriever::new();
 145
 146    let text = "
 147        /// A doc comment
 148        /// that spans multiple lines
 149        fn a() {
 150            b
 151        }
 152
 153        impl C for D {
 154        }
 155    "
 156    .unindent();
 157
 158    let parsed_files = retriever
 159        .parse_file(Path::new("foo.rs"), &text, language)
 160        .unwrap();
 161
 162    assert_eq!(
 163        parsed_files,
 164        &[
 165            Document {
 166                name: "a".into(),
 167                range: text.find("fn a").unwrap()..(text.find("}").unwrap() + 1),
 168                content: "
 169                    The below code snippet is from file 'foo.rs'
 170
 171                    ```rust
 172                    /// A doc comment
 173                    /// that spans multiple lines
 174                    fn a() {
 175                        b
 176                    }
 177                    ```"
 178                .unindent(),
 179                embedding: vec![],
 180            },
 181            Document {
 182                name: "C for D".into(),
 183                range: text.find("impl C").unwrap()..(text.rfind("}").unwrap() + 1),
 184                content: "
 185                    The below code snippet is from file 'foo.rs'
 186
 187                    ```rust
 188                    impl C for D {
 189                    }
 190                    ```"
 191                .unindent(),
 192                embedding: vec![],
 193            }
 194        ]
 195    );
 196}
 197
 198#[gpui::test]
 199async fn test_code_context_retrieval_javascript() {
 200    let language = js_lang();
 201    let mut retriever = CodeContextRetriever::new();
 202
 203    let text = "
 204        /* globals importScripts, backend */
 205        function _authorize() {}
 206
 207        /**
 208         * Sometimes the frontend build is way faster than backend.
 209         */
 210        export async function authorizeBank() {
 211            _authorize(pushModal, upgradingAccountId, {});
 212        }
 213
 214        export class SettingsPage {
 215            /* This is a test setting */
 216            constructor(page) {
 217                this.page = page;
 218            }
 219        }
 220
 221        /* This is a test comment */
 222        class TestClass {}
 223
 224        /* Schema for editor_events in Clickhouse. */
 225        export interface ClickhouseEditorEvent {
 226            installation_id: string
 227            operation: string
 228        }
 229        "
 230    .unindent();
 231
 232    let parsed_files = retriever
 233        .parse_file(Path::new("foo.js"), &text, language)
 234        .unwrap();
 235
 236    let test_documents = &[
 237        Document {
 238            name: "function _authorize".into(),
 239            range: text.find("function _authorize").unwrap()..(text.find("}").unwrap() + 1),
 240            content: "
 241                    The below code snippet is from file 'foo.js'
 242
 243                    ```javascript
 244                    /* globals importScripts, backend */
 245                    function _authorize() {}
 246                    ```"
 247            .unindent(),
 248            embedding: vec![],
 249        },
 250        Document {
 251            name: "async function authorizeBank".into(),
 252            range: text.find("export async").unwrap()..223,
 253            content: "
 254                    The below code snippet is from file 'foo.js'
 255
 256                    ```javascript
 257                    /**
 258                     * Sometimes the frontend build is way faster than backend.
 259                     */
 260                    export async function authorizeBank() {
 261                        _authorize(pushModal, upgradingAccountId, {});
 262                    }
 263                    ```"
 264            .unindent(),
 265            embedding: vec![],
 266        },
 267        Document {
 268            name: "class SettingsPage".into(),
 269            range: 225..343,
 270            content: "
 271                    The below code snippet is from file 'foo.js'
 272
 273                    ```javascript
 274                    export class SettingsPage {
 275                        /* This is a test setting */
 276                        constructor(page) {
 277                            this.page = page;
 278                        }
 279                    }
 280                    ```"
 281            .unindent(),
 282            embedding: vec![],
 283        },
 284        Document {
 285            name: "constructor".into(),
 286            range: 290..341,
 287            content: "
 288                The below code snippet is from file 'foo.js'
 289
 290                ```javascript
 291                /* This is a test setting */
 292                constructor(page) {
 293                        this.page = page;
 294                    }
 295                ```"
 296            .unindent(),
 297            embedding: vec![],
 298        },
 299        Document {
 300            name: "class TestClass".into(),
 301            range: 374..392,
 302            content: "
 303                    The below code snippet is from file 'foo.js'
 304
 305                    ```javascript
 306                    /* This is a test comment */
 307                    class TestClass {}
 308                    ```"
 309            .unindent(),
 310            embedding: vec![],
 311        },
 312        Document {
 313            name: "interface ClickhouseEditorEvent".into(),
 314            range: 440..532,
 315            content: "
 316                    The below code snippet is from file 'foo.js'
 317
 318                    ```javascript
 319                    /* Schema for editor_events in Clickhouse. */
 320                    export interface ClickhouseEditorEvent {
 321                        installation_id: string
 322                        operation: string
 323                    }
 324                    ```"
 325            .unindent(),
 326            embedding: vec![],
 327        },
 328    ];
 329
 330    for idx in 0..test_documents.len() {
 331        assert_eq!(test_documents[idx], parsed_files[idx]);
 332    }
 333}
 334
 335#[gpui::test]
 336async fn test_code_context_retrieval_elixir() {
 337    let language = elixir_lang();
 338    let mut retriever = CodeContextRetriever::new();
 339
 340    let text = r#"
 341defmodule File.Stream do
 342    @moduledoc """
 343    Defines a `File.Stream` struct returned by `File.stream!/3`.
 344
 345    The following fields are public:
 346
 347    * `path`          - the file path
 348    * `modes`         - the file modes
 349    * `raw`           - a boolean indicating if bin functions should be used
 350    * `line_or_bytes` - if reading should read lines or a given number of bytes
 351    * `node`          - the node the file belongs to
 352
 353    """
 354
 355    defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
 356
 357    @type t :: %__MODULE__{}
 358
 359    @doc false
 360    def __build__(path, modes, line_or_bytes) do
 361    raw = :lists.keyfind(:encoding, 1, modes) == false
 362
 363    modes =
 364        case raw do
 365        true ->
 366            case :lists.keyfind(:read_ahead, 1, modes) do
 367            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
 368            {:read_ahead, _} -> [:raw | modes]
 369            false -> [:raw, :read_ahead | modes]
 370            end
 371
 372        false ->
 373            modes
 374        end
 375
 376    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
 377
 378    end
 379"#
 380    .unindent();
 381
 382    let parsed_files = retriever
 383        .parse_file(Path::new("foo.ex"), &text, language)
 384        .unwrap();
 385
 386    let test_documents = &[
 387        Document{
 388            name: "defmodule File.Stream".into(),
 389            range: 0..1132,
 390            content: r#"
 391                The below code snippet is from file 'foo.ex'
 392
 393                ```elixir
 394                defmodule File.Stream do
 395                    @moduledoc """
 396                    Defines a `File.Stream` struct returned by `File.stream!/3`.
 397
 398                    The following fields are public:
 399
 400                    * `path`          - the file path
 401                    * `modes`         - the file modes
 402                    * `raw`           - a boolean indicating if bin functions should be used
 403                    * `line_or_bytes` - if reading should read lines or a given number of bytes
 404                    * `node`          - the node the file belongs to
 405
 406                    """
 407
 408                    defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
 409
 410                    @type t :: %__MODULE__{}
 411
 412                    @doc false
 413                    def __build__(path, modes, line_or_bytes) do
 414                    raw = :lists.keyfind(:encoding, 1, modes) == false
 415
 416                    modes =
 417                        case raw do
 418                        true ->
 419                            case :lists.keyfind(:read_ahead, 1, modes) do
 420                            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
 421                            {:read_ahead, _} -> [:raw | modes]
 422                            false -> [:raw, :read_ahead | modes]
 423                            end
 424
 425                        false ->
 426                            modes
 427                        end
 428
 429                    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
 430
 431                    end
 432                ```"#.unindent(),
 433            embedding: vec![],
 434        },
 435        Document {
 436        name: "def __build__".into(),
 437        range: 574..1132,
 438        content: r#"
 439The below code snippet is from file 'foo.ex'
 440
 441```elixir
 442@doc false
 443def __build__(path, modes, line_or_bytes) do
 444    raw = :lists.keyfind(:encoding, 1, modes) == false
 445
 446    modes =
 447        case raw do
 448        true ->
 449            case :lists.keyfind(:read_ahead, 1, modes) do
 450            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
 451            {:read_ahead, _} -> [:raw | modes]
 452            false -> [:raw, :read_ahead | modes]
 453            end
 454
 455        false ->
 456            modes
 457        end
 458
 459    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
 460
 461    end
 462```"#
 463            .unindent(),
 464        embedding: vec![],
 465    }];
 466
 467    for idx in 0..test_documents.len() {
 468        assert_eq!(test_documents[idx], parsed_files[idx]);
 469    }
 470}
 471
 472#[gpui::test]
 473async fn test_code_context_retrieval_cpp() {
 474    let language = cpp_lang();
 475    let mut retriever = CodeContextRetriever::new();
 476
 477    let text = "
 478    /**
 479     * @brief Main function
 480     * @returns 0 on exit
 481     */
 482    int main() { return 0; }
 483
 484    /**
 485    * This is a test comment
 486    */
 487    class MyClass {       // The class
 488        public:             // Access specifier
 489        int myNum;        // Attribute (int variable)
 490        string myString;  // Attribute (string variable)
 491    };
 492
 493    // This is a test comment
 494    enum Color { red, green, blue };
 495
 496    /** This is a preceeding block comment
 497     * This is the second line
 498     */
 499    struct {           // Structure declaration
 500        int myNum;       // Member (int variable)
 501        string myString; // Member (string variable)
 502    } myStructure;
 503
 504    /**
 505    * @brief Matrix class.
 506    */
 507    template <typename T,
 508              typename = typename std::enable_if<
 509                std::is_integral<T>::value || std::is_floating_point<T>::value,
 510                bool>::type>
 511    class Matrix2 {
 512        std::vector<std::vector<T>> _mat;
 513
 514    public:
 515        /**
 516        * @brief Constructor
 517        * @tparam Integer ensuring integers are being evaluated and not other
 518        * data types.
 519        * @param size denoting the size of Matrix as size x size
 520        */
 521        template <typename Integer,
 522                  typename = typename std::enable_if<std::is_integral<Integer>::value,
 523                  Integer>::type>
 524        explicit Matrix(const Integer size) {
 525            for (size_t i = 0; i < size; ++i) {
 526                _mat.emplace_back(std::vector<T>(size, 0));
 527            }
 528        }
 529    }"
 530    .unindent();
 531
 532    let parsed_files = retriever
 533        .parse_file(Path::new("foo.cpp"), &text, language)
 534        .unwrap();
 535
 536    let test_documents = &[
 537        Document {
 538            name: "int main".into(),
 539            range: 54..78,
 540            content: "
 541                The below code snippet is from file 'foo.cpp'
 542
 543                ```cpp
 544                /**
 545                 * @brief Main function
 546                 * @returns 0 on exit
 547                 */
 548                int main() { return 0; }
 549                ```"
 550            .unindent(),
 551            embedding: vec![],
 552        },
 553        Document {
 554            name: "class MyClass".into(),
 555            range: 112..295,
 556            content: "
 557                The below code snippet is from file 'foo.cpp'
 558
 559                ```cpp
 560                /**
 561                * This is a test comment
 562                */
 563                class MyClass {       // The class
 564                    public:             // Access specifier
 565                    int myNum;        // Attribute (int variable)
 566                    string myString;  // Attribute (string variable)
 567                }
 568                ```"
 569            .unindent(),
 570            embedding: vec![],
 571        },
 572        Document {
 573            name: "enum Color".into(),
 574            range: 324..355,
 575            content: "
 576                The below code snippet is from file 'foo.cpp'
 577
 578                ```cpp
 579                // This is a test comment
 580                enum Color { red, green, blue }
 581                ```"
 582            .unindent(),
 583            embedding: vec![],
 584        },
 585        Document {
 586            name: "struct myStructure".into(),
 587            range: 428..581,
 588            content: "
 589                The below code snippet is from file 'foo.cpp'
 590
 591                ```cpp
 592                /** This is a preceeding block comment
 593                 * This is the second line
 594                 */
 595                struct {           // Structure declaration
 596                    int myNum;       // Member (int variable)
 597                    string myString; // Member (string variable)
 598                } myStructure;
 599                ```"
 600            .unindent(),
 601            embedding: vec![],
 602        },
 603        Document {
 604            name: "class Matrix2".into(),
 605            range: 613..1342,
 606            content: "
 607                The below code snippet is from file 'foo.cpp'
 608
 609                ```cpp
 610                /**
 611                * @brief Matrix class.
 612                */
 613                template <typename T,
 614                          typename = typename std::enable_if<
 615                            std::is_integral<T>::value || std::is_floating_point<T>::value,
 616                            bool>::type>
 617                class Matrix2 {
 618                    std::vector<std::vector<T>> _mat;
 619
 620                public:
 621                    /**
 622                    * @brief Constructor
 623                    * @tparam Integer ensuring integers are being evaluated and not other
 624                    * data types.
 625                    * @param size denoting the size of Matrix as size x size
 626                    */
 627                    template <typename Integer,
 628                              typename = typename std::enable_if<std::is_integral<Integer>::value,
 629                              Integer>::type>
 630                    explicit Matrix(const Integer size) {
 631                        for (size_t i = 0; i < size; ++i) {
 632                            _mat.emplace_back(std::vector<T>(size, 0));
 633                        }
 634                    }
 635                }
 636                ```"
 637            .unindent(),
 638            embedding: vec![],
 639        },
 640    ];
 641
 642    for idx in 0..test_documents.len() {
 643        assert_eq!(test_documents[idx], parsed_files[idx]);
 644    }
 645}
 646
 647#[gpui::test]
 648fn test_dot_product(mut rng: StdRng) {
 649    assert_eq!(dot(&[1., 0., 0., 0., 0.], &[0., 1., 0., 0., 0.]), 0.);
 650    assert_eq!(dot(&[2., 0., 0., 0., 0.], &[3., 1., 0., 0., 0.]), 6.);
 651
 652    for _ in 0..100 {
 653        let size = 1536;
 654        let mut a = vec![0.; size];
 655        let mut b = vec![0.; size];
 656        for (a, b) in a.iter_mut().zip(b.iter_mut()) {
 657            *a = rng.gen();
 658            *b = rng.gen();
 659        }
 660
 661        assert_eq!(
 662            round_to_decimals(dot(&a, &b), 1),
 663            round_to_decimals(reference_dot(&a, &b), 1)
 664        );
 665    }
 666
 667    fn round_to_decimals(n: f32, decimal_places: i32) -> f32 {
 668        let factor = (10.0 as f32).powi(decimal_places);
 669        (n * factor).round() / factor
 670    }
 671
 672    fn reference_dot(a: &[f32], b: &[f32]) -> f32 {
 673        a.iter().zip(b.iter()).map(|(a, b)| a * b).sum()
 674    }
 675}
 676
 677#[derive(Default)]
 678struct FakeEmbeddingProvider {
 679    embedding_count: AtomicUsize,
 680}
 681
 682impl FakeEmbeddingProvider {
 683    fn embedding_count(&self) -> usize {
 684        self.embedding_count.load(atomic::Ordering::SeqCst)
 685    }
 686}
 687
 688#[async_trait]
 689impl EmbeddingProvider for FakeEmbeddingProvider {
 690    async fn embed_batch(&self, spans: Vec<&str>) -> Result<Vec<Vec<f32>>> {
 691        self.embedding_count
 692            .fetch_add(spans.len(), atomic::Ordering::SeqCst);
 693        Ok(spans
 694            .iter()
 695            .map(|span| {
 696                let mut result = vec![1.0; 26];
 697                for letter in span.chars() {
 698                    let letter = letter.to_ascii_lowercase();
 699                    if letter as u32 >= 'a' as u32 {
 700                        let ix = (letter as u32) - ('a' as u32);
 701                        if ix < 26 {
 702                            result[ix as usize] += 1.0;
 703                        }
 704                    }
 705                }
 706
 707                let norm = result.iter().map(|x| x * x).sum::<f32>().sqrt();
 708                for x in &mut result {
 709                    *x /= norm;
 710                }
 711
 712                result
 713            })
 714            .collect())
 715    }
 716}
 717
 718fn js_lang() -> Arc<Language> {
 719    Arc::new(
 720        Language::new(
 721            LanguageConfig {
 722                name: "Javascript".into(),
 723                path_suffixes: vec!["js".into()],
 724                ..Default::default()
 725            },
 726            Some(tree_sitter_typescript::language_tsx()),
 727        )
 728        .with_embedding_query(
 729            &r#"
 730
 731            (
 732                (comment)* @context
 733                .
 734                (export_statement
 735                    (function_declaration
 736                        "async"? @name
 737                        "function" @name
 738                        name: (_) @name)) @item
 739                    )
 740
 741            (
 742                (comment)* @context
 743                .
 744                (function_declaration
 745                    "async"? @name
 746                    "function" @name
 747                    name: (_) @name) @item
 748                    )
 749
 750            (
 751                (comment)* @context
 752                .
 753                (export_statement
 754                    (class_declaration
 755                        "class" @name
 756                        name: (_) @name)) @item
 757                    )
 758
 759            (
 760                (comment)* @context
 761                .
 762                (class_declaration
 763                    "class" @name
 764                    name: (_) @name) @item
 765                    )
 766
 767            (
 768                (comment)* @context
 769                .
 770                (method_definition
 771                    [
 772                        "get"
 773                        "set"
 774                        "async"
 775                        "*"
 776                        "static"
 777                    ]* @name
 778                    name: (_) @name) @item
 779                )
 780
 781            (
 782                (comment)* @context
 783                .
 784                (export_statement
 785                    (interface_declaration
 786                        "interface" @name
 787                        name: (_) @name)) @item
 788                )
 789
 790            (
 791                (comment)* @context
 792                .
 793                (interface_declaration
 794                    "interface" @name
 795                    name: (_) @name) @item
 796                )
 797
 798            (
 799                (comment)* @context
 800                .
 801                (export_statement
 802                    (enum_declaration
 803                        "enum" @name
 804                        name: (_) @name)) @item
 805                )
 806
 807            (
 808                (comment)* @context
 809                .
 810                (enum_declaration
 811                    "enum" @name
 812                    name: (_) @name) @item
 813                )
 814
 815                    "#
 816            .unindent(),
 817        )
 818        .unwrap(),
 819    )
 820}
 821
 822fn rust_lang() -> Arc<Language> {
 823    Arc::new(
 824        Language::new(
 825            LanguageConfig {
 826                name: "Rust".into(),
 827                path_suffixes: vec!["rs".into()],
 828                ..Default::default()
 829            },
 830            Some(tree_sitter_rust::language()),
 831        )
 832        .with_embedding_query(
 833            r#"
 834            (
 835                (line_comment)* @context
 836                .
 837                (enum_item
 838                    name: (_) @name) @item
 839            )
 840
 841            (
 842                (line_comment)* @context
 843                .
 844                (struct_item
 845                    name: (_) @name) @item
 846            )
 847
 848            (
 849                (line_comment)* @context
 850                .
 851                (impl_item
 852                    trait: (_)? @name
 853                    "for"? @name
 854                    type: (_) @name) @item
 855            )
 856
 857            (
 858                (line_comment)* @context
 859                .
 860                (trait_item
 861                    name: (_) @name) @item
 862            )
 863
 864            (
 865                (line_comment)* @context
 866                .
 867                (function_item
 868                    name: (_) @name) @item
 869            )
 870
 871            (
 872                (line_comment)* @context
 873                .
 874                (macro_definition
 875                    name: (_) @name) @item
 876            )
 877
 878            (
 879                (line_comment)* @context
 880                .
 881                (function_signature_item
 882                    name: (_) @name) @item
 883            )
 884            "#,
 885        )
 886        .unwrap(),
 887    )
 888}
 889
 890fn toml_lang() -> Arc<Language> {
 891    Arc::new(Language::new(
 892        LanguageConfig {
 893            name: "TOML".into(),
 894            path_suffixes: vec!["toml".into()],
 895            ..Default::default()
 896        },
 897        Some(tree_sitter_toml::language()),
 898    ))
 899}
 900
 901fn cpp_lang() -> Arc<Language> {
 902    Arc::new(
 903        Language::new(
 904            LanguageConfig {
 905                name: "CPP".into(),
 906                path_suffixes: vec!["cpp".into()],
 907                ..Default::default()
 908            },
 909            Some(tree_sitter_cpp::language()),
 910        )
 911        .with_embedding_query(
 912            r#"
 913            (
 914                (comment)* @context
 915                .
 916                (function_definition
 917                    (type_qualifier)? @name
 918                    type: (_)? @name
 919                    declarator: [
 920                        (function_declarator
 921                            declarator: (_) @name)
 922                        (pointer_declarator
 923                            "*" @name
 924                            declarator: (function_declarator
 925                            declarator: (_) @name))
 926                        (pointer_declarator
 927                            "*" @name
 928                            declarator: (pointer_declarator
 929                                "*" @name
 930                            declarator: (function_declarator
 931                                declarator: (_) @name)))
 932                        (reference_declarator
 933                            ["&" "&&"] @name
 934                            (function_declarator
 935                            declarator: (_) @name))
 936                    ]
 937                    (type_qualifier)? @name) @item
 938                )
 939
 940            (
 941                (comment)* @context
 942                .
 943                (template_declaration
 944                    (class_specifier
 945                        "class" @name
 946                        name: (_) @name)
 947                        ) @item
 948            )
 949
 950            (
 951                (comment)* @context
 952                .
 953                (class_specifier
 954                    "class" @name
 955                    name: (_) @name) @item
 956                )
 957
 958            (
 959                (comment)* @context
 960                .
 961                (enum_specifier
 962                    "enum" @name
 963                    name: (_) @name) @item
 964                )
 965
 966            (
 967                (comment)* @context
 968                .
 969                (declaration
 970                    type: (struct_specifier
 971                    "struct" @name)
 972                    declarator: (_) @name) @item
 973            )
 974
 975            "#,
 976        )
 977        .unwrap(),
 978    )
 979}
 980
 981fn elixir_lang() -> Arc<Language> {
 982    Arc::new(
 983        Language::new(
 984            LanguageConfig {
 985                name: "Elixir".into(),
 986                path_suffixes: vec!["rs".into()],
 987                ..Default::default()
 988            },
 989            Some(tree_sitter_elixir::language()),
 990        )
 991        .with_embedding_query(
 992            r#"
 993            (
 994                (unary_operator
 995                    operator: "@"
 996                    operand: (call
 997                        target: (identifier) @unary
 998                        (#match? @unary "^(doc)$"))
 999                    ) @context
1000                .
1001                (call
1002                target: (identifier) @name
1003                (arguments
1004                [
1005                (identifier) @name
1006                (call
1007                target: (identifier) @name)
1008                (binary_operator
1009                left: (call
1010                target: (identifier) @name)
1011                operator: "when")
1012                ])
1013                (#match? @name "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @item
1014                )
1015
1016            (call
1017                target: (identifier) @name
1018                (arguments (alias) @name)
1019                (#match? @name "^(defmodule|defprotocol)$")) @item
1020            "#,
1021        )
1022        .unwrap(),
1023    )
1024}