vector_store_tests.rs

   1use crate::{
   2    db::dot,
   3    embedding::EmbeddingProvider,
   4    parsing::{CodeContextRetriever, Document},
   5    vector_store_settings::VectorStoreSettings,
   6    VectorStore,
   7};
   8use anyhow::Result;
   9use async_trait::async_trait;
  10use gpui::{Task, TestAppContext};
  11use language::{Language, LanguageConfig, LanguageRegistry};
  12use project::{project_settings::ProjectSettings, FakeFs, Fs, Project};
  13use rand::{rngs::StdRng, Rng};
  14use serde_json::json;
  15use settings::SettingsStore;
  16use std::{
  17    path::Path,
  18    sync::{
  19        atomic::{self, AtomicUsize},
  20        Arc,
  21    },
  22};
  23use unindent::Unindent;
  24
  25#[ctor::ctor]
  26fn init_logger() {
  27    if std::env::var("RUST_LOG").is_ok() {
  28        env_logger::init();
  29    }
  30}
  31
  32#[gpui::test]
  33async fn test_vector_store(cx: &mut TestAppContext) {
  34    cx.update(|cx| {
  35        cx.set_global(SettingsStore::test(cx));
  36        settings::register::<VectorStoreSettings>(cx);
  37        settings::register::<ProjectSettings>(cx);
  38    });
  39
  40    let fs = FakeFs::new(cx.background());
  41    fs.insert_tree(
  42        "/the-root",
  43        json!({
  44            "src": {
  45                "file1.rs": "
  46                    fn aaa() {
  47                        println!(\"aaaa!\");
  48                    }
  49
  50                    fn zzzzzzzzz() {
  51                        println!(\"SLEEPING\");
  52                    }
  53                ".unindent(),
  54                "file2.rs": "
  55                    fn bbb() {
  56                        println!(\"bbbb!\");
  57                    }
  58                ".unindent(),
  59                "file3.toml": "
  60                    ZZZZZZZ = 5
  61                    ".unindent(),
  62            }
  63        }),
  64    )
  65    .await;
  66
  67    let languages = Arc::new(LanguageRegistry::new(Task::ready(())));
  68    let rust_language = rust_lang();
  69    let toml_language = toml_lang();
  70    languages.add(rust_language);
  71    languages.add(toml_language);
  72
  73    let db_dir = tempdir::TempDir::new("vector-store").unwrap();
  74    let db_path = db_dir.path().join("db.sqlite");
  75
  76    let embedding_provider = Arc::new(FakeEmbeddingProvider::default());
  77    let store = VectorStore::new(
  78        fs.clone(),
  79        db_path,
  80        embedding_provider.clone(),
  81        languages,
  82        cx.to_async(),
  83    )
  84    .await
  85    .unwrap();
  86
  87    let project = Project::test(fs.clone(), ["/the-root".as_ref()], cx).await;
  88    let worktree_id = project.read_with(cx, |project, cx| {
  89        project.worktrees(cx).next().unwrap().read(cx).id()
  90    });
  91    let file_count = store
  92        .update(cx, |store, cx| store.index_project(project.clone(), cx))
  93        .await
  94        .unwrap();
  95    assert_eq!(file_count, 3);
  96    cx.foreground().run_until_parked();
  97    store.update(cx, |store, _cx| {
  98        assert_eq!(
  99            store.remaining_files_to_index_for_project(&project),
 100            Some(0)
 101        );
 102    });
 103
 104    let search_results = store
 105        .update(cx, |store, cx| {
 106            store.search_project(project.clone(), "aaaa".to_string(), 5, cx)
 107        })
 108        .await
 109        .unwrap();
 110
 111    assert_eq!(search_results[0].byte_range.start, 0);
 112    assert_eq!(search_results[0].name, "aaa");
 113    assert_eq!(search_results[0].worktree_id, worktree_id);
 114
 115    fs.save(
 116        "/the-root/src/file2.rs".as_ref(),
 117        &"
 118            fn dddd() { println!(\"ddddd!\"); }
 119            struct pqpqpqp {}
 120        "
 121        .unindent()
 122        .into(),
 123        Default::default(),
 124    )
 125    .await
 126    .unwrap();
 127
 128    cx.foreground().run_until_parked();
 129
 130    let prev_embedding_count = embedding_provider.embedding_count();
 131    let file_count = store
 132        .update(cx, |store, cx| store.index_project(project.clone(), cx))
 133        .await
 134        .unwrap();
 135    assert_eq!(file_count, 1);
 136
 137    cx.foreground().run_until_parked();
 138    store.update(cx, |store, _cx| {
 139        assert_eq!(
 140            store.remaining_files_to_index_for_project(&project),
 141            Some(0)
 142        );
 143    });
 144
 145    assert_eq!(
 146        embedding_provider.embedding_count() - prev_embedding_count,
 147        2
 148    );
 149}
 150
 151#[gpui::test]
 152async fn test_code_context_retrieval_rust() {
 153    let language = rust_lang();
 154    let mut retriever = CodeContextRetriever::new();
 155
 156    let text = "
 157        /// A doc comment
 158        /// that spans multiple lines
 159        fn a() {
 160            b
 161        }
 162
 163        impl C for D {
 164        }
 165    "
 166    .unindent();
 167
 168    let parsed_files = retriever
 169        .parse_file(Path::new("foo.rs"), &text, language)
 170        .unwrap();
 171
 172    assert_eq!(
 173        parsed_files,
 174        &[
 175            Document {
 176                name: "a".into(),
 177                range: text.find("fn a").unwrap()..(text.find("}").unwrap() + 1),
 178                content: "
 179                    The below code snippet is from file 'foo.rs'
 180
 181                    ```rust
 182                    /// A doc comment
 183                    /// that spans multiple lines
 184                    fn a() {
 185                        b
 186                    }
 187                    ```"
 188                .unindent(),
 189                embedding: vec![],
 190            },
 191            Document {
 192                name: "C for D".into(),
 193                range: text.find("impl C").unwrap()..(text.rfind("}").unwrap() + 1),
 194                content: "
 195                    The below code snippet is from file 'foo.rs'
 196
 197                    ```rust
 198                    impl C for D {
 199                    }
 200                    ```"
 201                .unindent(),
 202                embedding: vec![],
 203            }
 204        ]
 205    );
 206}
 207
 208#[gpui::test]
 209async fn test_code_context_retrieval_javascript() {
 210    let language = js_lang();
 211    let mut retriever = CodeContextRetriever::new();
 212
 213    let text = "
 214        /* globals importScripts, backend */
 215        function _authorize() {}
 216
 217        /**
 218         * Sometimes the frontend build is way faster than backend.
 219         */
 220        export async function authorizeBank() {
 221            _authorize(pushModal, upgradingAccountId, {});
 222        }
 223
 224        export class SettingsPage {
 225            /* This is a test setting */
 226            constructor(page) {
 227                this.page = page;
 228            }
 229        }
 230
 231        /* This is a test comment */
 232        class TestClass {}
 233
 234        /* Schema for editor_events in Clickhouse. */
 235        export interface ClickhouseEditorEvent {
 236            installation_id: string
 237            operation: string
 238        }
 239        "
 240    .unindent();
 241
 242    let parsed_files = retriever
 243        .parse_file(Path::new("foo.js"), &text, language)
 244        .unwrap();
 245
 246    let test_documents = &[
 247        Document {
 248            name: "function _authorize".into(),
 249            range: text.find("function _authorize").unwrap()..(text.find("}").unwrap() + 1),
 250            content: "
 251                    The below code snippet is from file 'foo.js'
 252
 253                    ```javascript
 254                    /* globals importScripts, backend */
 255                    function _authorize() {}
 256                    ```"
 257            .unindent(),
 258            embedding: vec![],
 259        },
 260        Document {
 261            name: "async function authorizeBank".into(),
 262            range: text.find("export async").unwrap()..223,
 263            content: "
 264                    The below code snippet is from file 'foo.js'
 265
 266                    ```javascript
 267                    /**
 268                     * Sometimes the frontend build is way faster than backend.
 269                     */
 270                    export async function authorizeBank() {
 271                        _authorize(pushModal, upgradingAccountId, {});
 272                    }
 273                    ```"
 274            .unindent(),
 275            embedding: vec![],
 276        },
 277        Document {
 278            name: "class SettingsPage".into(),
 279            range: 225..343,
 280            content: "
 281                    The below code snippet is from file 'foo.js'
 282
 283                    ```javascript
 284                    export class SettingsPage {
 285                        /* This is a test setting */
 286                        constructor(page) {
 287                            this.page = page;
 288                        }
 289                    }
 290                    ```"
 291            .unindent(),
 292            embedding: vec![],
 293        },
 294        Document {
 295            name: "constructor".into(),
 296            range: 290..341,
 297            content: "
 298                The below code snippet is from file 'foo.js'
 299
 300                ```javascript
 301                /* This is a test setting */
 302                constructor(page) {
 303                        this.page = page;
 304                    }
 305                ```"
 306            .unindent(),
 307            embedding: vec![],
 308        },
 309        Document {
 310            name: "class TestClass".into(),
 311            range: 374..392,
 312            content: "
 313                    The below code snippet is from file 'foo.js'
 314
 315                    ```javascript
 316                    /* This is a test comment */
 317                    class TestClass {}
 318                    ```"
 319            .unindent(),
 320            embedding: vec![],
 321        },
 322        Document {
 323            name: "interface ClickhouseEditorEvent".into(),
 324            range: 440..532,
 325            content: "
 326                    The below code snippet is from file 'foo.js'
 327
 328                    ```javascript
 329                    /* Schema for editor_events in Clickhouse. */
 330                    export interface ClickhouseEditorEvent {
 331                        installation_id: string
 332                        operation: string
 333                    }
 334                    ```"
 335            .unindent(),
 336            embedding: vec![],
 337        },
 338    ];
 339
 340    for idx in 0..test_documents.len() {
 341        assert_eq!(test_documents[idx], parsed_files[idx]);
 342    }
 343}
 344
 345#[gpui::test]
 346async fn test_code_context_retrieval_elixir() {
 347    let language = elixir_lang();
 348    let mut retriever = CodeContextRetriever::new();
 349
 350    let text = r#"
 351defmodule File.Stream do
 352    @moduledoc """
 353    Defines a `File.Stream` struct returned by `File.stream!/3`.
 354
 355    The following fields are public:
 356
 357    * `path`          - the file path
 358    * `modes`         - the file modes
 359    * `raw`           - a boolean indicating if bin functions should be used
 360    * `line_or_bytes` - if reading should read lines or a given number of bytes
 361    * `node`          - the node the file belongs to
 362
 363    """
 364
 365    defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
 366
 367    @type t :: %__MODULE__{}
 368
 369    @doc false
 370    def __build__(path, modes, line_or_bytes) do
 371    raw = :lists.keyfind(:encoding, 1, modes) == false
 372
 373    modes =
 374        case raw do
 375        true ->
 376            case :lists.keyfind(:read_ahead, 1, modes) do
 377            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
 378            {:read_ahead, _} -> [:raw | modes]
 379            false -> [:raw, :read_ahead | modes]
 380            end
 381
 382        false ->
 383            modes
 384        end
 385
 386    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
 387
 388    end
 389"#
 390    .unindent();
 391
 392    let parsed_files = retriever
 393        .parse_file(Path::new("foo.ex"), &text, language)
 394        .unwrap();
 395
 396    let test_documents = &[
 397        Document{
 398            name: "defmodule File.Stream".into(),
 399            range: 0..1132,
 400            content: r#"
 401                The below code snippet is from file 'foo.ex'
 402
 403                ```elixir
 404                defmodule File.Stream do
 405                    @moduledoc """
 406                    Defines a `File.Stream` struct returned by `File.stream!/3`.
 407
 408                    The following fields are public:
 409
 410                    * `path`          - the file path
 411                    * `modes`         - the file modes
 412                    * `raw`           - a boolean indicating if bin functions should be used
 413                    * `line_or_bytes` - if reading should read lines or a given number of bytes
 414                    * `node`          - the node the file belongs to
 415
 416                    """
 417
 418                    defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
 419
 420                    @type t :: %__MODULE__{}
 421
 422                    @doc false
 423                    def __build__(path, modes, line_or_bytes) do
 424                    raw = :lists.keyfind(:encoding, 1, modes) == false
 425
 426                    modes =
 427                        case raw do
 428                        true ->
 429                            case :lists.keyfind(:read_ahead, 1, modes) do
 430                            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
 431                            {:read_ahead, _} -> [:raw | modes]
 432                            false -> [:raw, :read_ahead | modes]
 433                            end
 434
 435                        false ->
 436                            modes
 437                        end
 438
 439                    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
 440
 441                    end
 442                ```"#.unindent(),
 443            embedding: vec![],
 444        },
 445        Document {
 446        name: "def __build__".into(),
 447        range: 574..1132,
 448        content: r#"
 449The below code snippet is from file 'foo.ex'
 450
 451```elixir
 452@doc false
 453def __build__(path, modes, line_or_bytes) do
 454    raw = :lists.keyfind(:encoding, 1, modes) == false
 455
 456    modes =
 457        case raw do
 458        true ->
 459            case :lists.keyfind(:read_ahead, 1, modes) do
 460            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
 461            {:read_ahead, _} -> [:raw | modes]
 462            false -> [:raw, :read_ahead | modes]
 463            end
 464
 465        false ->
 466            modes
 467        end
 468
 469    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
 470
 471    end
 472```"#
 473            .unindent(),
 474        embedding: vec![],
 475    }];
 476
 477    for idx in 0..test_documents.len() {
 478        assert_eq!(test_documents[idx], parsed_files[idx]);
 479    }
 480}
 481
 482#[gpui::test]
 483async fn test_code_context_retrieval_cpp() {
 484    let language = cpp_lang();
 485    let mut retriever = CodeContextRetriever::new();
 486
 487    let text = "
 488    /**
 489     * @brief Main function
 490     * @returns 0 on exit
 491     */
 492    int main() { return 0; }
 493
 494    /**
 495    * This is a test comment
 496    */
 497    class MyClass {       // The class
 498        public:             // Access specifier
 499        int myNum;        // Attribute (int variable)
 500        string myString;  // Attribute (string variable)
 501    };
 502
 503    // This is a test comment
 504    enum Color { red, green, blue };
 505
 506    /** This is a preceeding block comment
 507     * This is the second line
 508     */
 509    struct {           // Structure declaration
 510        int myNum;       // Member (int variable)
 511        string myString; // Member (string variable)
 512    } myStructure;
 513
 514    /**
 515    * @brief Matrix class.
 516    */
 517    template <typename T,
 518              typename = typename std::enable_if<
 519                std::is_integral<T>::value || std::is_floating_point<T>::value,
 520                bool>::type>
 521    class Matrix2 {
 522        std::vector<std::vector<T>> _mat;
 523
 524    public:
 525        /**
 526        * @brief Constructor
 527        * @tparam Integer ensuring integers are being evaluated and not other
 528        * data types.
 529        * @param size denoting the size of Matrix as size x size
 530        */
 531        template <typename Integer,
 532                  typename = typename std::enable_if<std::is_integral<Integer>::value,
 533                  Integer>::type>
 534        explicit Matrix(const Integer size) {
 535            for (size_t i = 0; i < size; ++i) {
 536                _mat.emplace_back(std::vector<T>(size, 0));
 537            }
 538        }
 539    }"
 540    .unindent();
 541
 542    let parsed_files = retriever
 543        .parse_file(Path::new("foo.cpp"), &text, language)
 544        .unwrap();
 545
 546    let test_documents = &[
 547        Document {
 548            name: "int main".into(),
 549            range: 54..78,
 550            content: "
 551                The below code snippet is from file 'foo.cpp'
 552
 553                ```cpp
 554                /**
 555                 * @brief Main function
 556                 * @returns 0 on exit
 557                 */
 558                int main() { return 0; }
 559                ```"
 560            .unindent(),
 561            embedding: vec![],
 562        },
 563        Document {
 564            name: "class MyClass".into(),
 565            range: 112..295,
 566            content: "
 567                The below code snippet is from file 'foo.cpp'
 568
 569                ```cpp
 570                /**
 571                * This is a test comment
 572                */
 573                class MyClass {       // The class
 574                    public:             // Access specifier
 575                    int myNum;        // Attribute (int variable)
 576                    string myString;  // Attribute (string variable)
 577                }
 578                ```"
 579            .unindent(),
 580            embedding: vec![],
 581        },
 582        Document {
 583            name: "enum Color".into(),
 584            range: 324..355,
 585            content: "
 586                The below code snippet is from file 'foo.cpp'
 587
 588                ```cpp
 589                // This is a test comment
 590                enum Color { red, green, blue }
 591                ```"
 592            .unindent(),
 593            embedding: vec![],
 594        },
 595        Document {
 596            name: "struct myStructure".into(),
 597            range: 428..581,
 598            content: "
 599                The below code snippet is from file 'foo.cpp'
 600
 601                ```cpp
 602                /** This is a preceeding block comment
 603                 * This is the second line
 604                 */
 605                struct {           // Structure declaration
 606                    int myNum;       // Member (int variable)
 607                    string myString; // Member (string variable)
 608                } myStructure;
 609                ```"
 610            .unindent(),
 611            embedding: vec![],
 612        },
 613        Document {
 614            name: "class Matrix2".into(),
 615            range: 613..1342,
 616            content: "
 617                The below code snippet is from file 'foo.cpp'
 618
 619                ```cpp
 620                /**
 621                * @brief Matrix class.
 622                */
 623                template <typename T,
 624                          typename = typename std::enable_if<
 625                            std::is_integral<T>::value || std::is_floating_point<T>::value,
 626                            bool>::type>
 627                class Matrix2 {
 628                    std::vector<std::vector<T>> _mat;
 629
 630                public:
 631                    /**
 632                    * @brief Constructor
 633                    * @tparam Integer ensuring integers are being evaluated and not other
 634                    * data types.
 635                    * @param size denoting the size of Matrix as size x size
 636                    */
 637                    template <typename Integer,
 638                              typename = typename std::enable_if<std::is_integral<Integer>::value,
 639                              Integer>::type>
 640                    explicit Matrix(const Integer size) {
 641                        for (size_t i = 0; i < size; ++i) {
 642                            _mat.emplace_back(std::vector<T>(size, 0));
 643                        }
 644                    }
 645                }
 646                ```"
 647            .unindent(),
 648            embedding: vec![],
 649        },
 650    ];
 651
 652    for idx in 0..test_documents.len() {
 653        assert_eq!(test_documents[idx], parsed_files[idx]);
 654    }
 655}
 656
 657#[gpui::test]
 658fn test_dot_product(mut rng: StdRng) {
 659    assert_eq!(dot(&[1., 0., 0., 0., 0.], &[0., 1., 0., 0., 0.]), 0.);
 660    assert_eq!(dot(&[2., 0., 0., 0., 0.], &[3., 1., 0., 0., 0.]), 6.);
 661
 662    for _ in 0..100 {
 663        let size = 1536;
 664        let mut a = vec![0.; size];
 665        let mut b = vec![0.; size];
 666        for (a, b) in a.iter_mut().zip(b.iter_mut()) {
 667            *a = rng.gen();
 668            *b = rng.gen();
 669        }
 670
 671        assert_eq!(
 672            round_to_decimals(dot(&a, &b), 1),
 673            round_to_decimals(reference_dot(&a, &b), 1)
 674        );
 675    }
 676
 677    fn round_to_decimals(n: f32, decimal_places: i32) -> f32 {
 678        let factor = (10.0 as f32).powi(decimal_places);
 679        (n * factor).round() / factor
 680    }
 681
 682    fn reference_dot(a: &[f32], b: &[f32]) -> f32 {
 683        a.iter().zip(b.iter()).map(|(a, b)| a * b).sum()
 684    }
 685}
 686
 687#[derive(Default)]
 688struct FakeEmbeddingProvider {
 689    embedding_count: AtomicUsize,
 690}
 691
 692impl FakeEmbeddingProvider {
 693    fn embedding_count(&self) -> usize {
 694        self.embedding_count.load(atomic::Ordering::SeqCst)
 695    }
 696}
 697
 698#[async_trait]
 699impl EmbeddingProvider for FakeEmbeddingProvider {
 700    async fn embed_batch(&self, spans: Vec<&str>) -> Result<Vec<Vec<f32>>> {
 701        self.embedding_count
 702            .fetch_add(spans.len(), atomic::Ordering::SeqCst);
 703        Ok(spans
 704            .iter()
 705            .map(|span| {
 706                let mut result = vec![1.0; 26];
 707                for letter in span.chars() {
 708                    let letter = letter.to_ascii_lowercase();
 709                    if letter as u32 >= 'a' as u32 {
 710                        let ix = (letter as u32) - ('a' as u32);
 711                        if ix < 26 {
 712                            result[ix as usize] += 1.0;
 713                        }
 714                    }
 715                }
 716
 717                let norm = result.iter().map(|x| x * x).sum::<f32>().sqrt();
 718                for x in &mut result {
 719                    *x /= norm;
 720                }
 721
 722                result
 723            })
 724            .collect())
 725    }
 726}
 727
 728fn js_lang() -> Arc<Language> {
 729    Arc::new(
 730        Language::new(
 731            LanguageConfig {
 732                name: "Javascript".into(),
 733                path_suffixes: vec!["js".into()],
 734                ..Default::default()
 735            },
 736            Some(tree_sitter_typescript::language_tsx()),
 737        )
 738        .with_embedding_query(
 739            &r#"
 740
 741            (
 742                (comment)* @context
 743                .
 744                (export_statement
 745                    (function_declaration
 746                        "async"? @name
 747                        "function" @name
 748                        name: (_) @name)) @item
 749                    )
 750
 751            (
 752                (comment)* @context
 753                .
 754                (function_declaration
 755                    "async"? @name
 756                    "function" @name
 757                    name: (_) @name) @item
 758                    )
 759
 760            (
 761                (comment)* @context
 762                .
 763                (export_statement
 764                    (class_declaration
 765                        "class" @name
 766                        name: (_) @name)) @item
 767                    )
 768
 769            (
 770                (comment)* @context
 771                .
 772                (class_declaration
 773                    "class" @name
 774                    name: (_) @name) @item
 775                    )
 776
 777            (
 778                (comment)* @context
 779                .
 780                (method_definition
 781                    [
 782                        "get"
 783                        "set"
 784                        "async"
 785                        "*"
 786                        "static"
 787                    ]* @name
 788                    name: (_) @name) @item
 789                )
 790
 791            (
 792                (comment)* @context
 793                .
 794                (export_statement
 795                    (interface_declaration
 796                        "interface" @name
 797                        name: (_) @name)) @item
 798                )
 799
 800            (
 801                (comment)* @context
 802                .
 803                (interface_declaration
 804                    "interface" @name
 805                    name: (_) @name) @item
 806                )
 807
 808            (
 809                (comment)* @context
 810                .
 811                (export_statement
 812                    (enum_declaration
 813                        "enum" @name
 814                        name: (_) @name)) @item
 815                )
 816
 817            (
 818                (comment)* @context
 819                .
 820                (enum_declaration
 821                    "enum" @name
 822                    name: (_) @name) @item
 823                )
 824
 825                    "#
 826            .unindent(),
 827        )
 828        .unwrap(),
 829    )
 830}
 831
 832fn rust_lang() -> Arc<Language> {
 833    Arc::new(
 834        Language::new(
 835            LanguageConfig {
 836                name: "Rust".into(),
 837                path_suffixes: vec!["rs".into()],
 838                ..Default::default()
 839            },
 840            Some(tree_sitter_rust::language()),
 841        )
 842        .with_embedding_query(
 843            r#"
 844            (
 845                (line_comment)* @context
 846                .
 847                (enum_item
 848                    name: (_) @name) @item
 849            )
 850
 851            (
 852                (line_comment)* @context
 853                .
 854                (struct_item
 855                    name: (_) @name) @item
 856            )
 857
 858            (
 859                (line_comment)* @context
 860                .
 861                (impl_item
 862                    trait: (_)? @name
 863                    "for"? @name
 864                    type: (_) @name) @item
 865            )
 866
 867            (
 868                (line_comment)* @context
 869                .
 870                (trait_item
 871                    name: (_) @name) @item
 872            )
 873
 874            (
 875                (line_comment)* @context
 876                .
 877                (function_item
 878                    name: (_) @name) @item
 879            )
 880
 881            (
 882                (line_comment)* @context
 883                .
 884                (macro_definition
 885                    name: (_) @name) @item
 886            )
 887
 888            (
 889                (line_comment)* @context
 890                .
 891                (function_signature_item
 892                    name: (_) @name) @item
 893            )
 894            "#,
 895        )
 896        .unwrap(),
 897    )
 898}
 899
 900fn toml_lang() -> Arc<Language> {
 901    Arc::new(Language::new(
 902        LanguageConfig {
 903            name: "TOML".into(),
 904            path_suffixes: vec!["toml".into()],
 905            ..Default::default()
 906        },
 907        Some(tree_sitter_toml::language()),
 908    ))
 909}
 910
 911fn cpp_lang() -> Arc<Language> {
 912    Arc::new(
 913        Language::new(
 914            LanguageConfig {
 915                name: "CPP".into(),
 916                path_suffixes: vec!["cpp".into()],
 917                ..Default::default()
 918            },
 919            Some(tree_sitter_cpp::language()),
 920        )
 921        .with_embedding_query(
 922            r#"
 923            (
 924                (comment)* @context
 925                .
 926                (function_definition
 927                    (type_qualifier)? @name
 928                    type: (_)? @name
 929                    declarator: [
 930                        (function_declarator
 931                            declarator: (_) @name)
 932                        (pointer_declarator
 933                            "*" @name
 934                            declarator: (function_declarator
 935                            declarator: (_) @name))
 936                        (pointer_declarator
 937                            "*" @name
 938                            declarator: (pointer_declarator
 939                                "*" @name
 940                            declarator: (function_declarator
 941                                declarator: (_) @name)))
 942                        (reference_declarator
 943                            ["&" "&&"] @name
 944                            (function_declarator
 945                            declarator: (_) @name))
 946                    ]
 947                    (type_qualifier)? @name) @item
 948                )
 949
 950            (
 951                (comment)* @context
 952                .
 953                (template_declaration
 954                    (class_specifier
 955                        "class" @name
 956                        name: (_) @name)
 957                        ) @item
 958            )
 959
 960            (
 961                (comment)* @context
 962                .
 963                (class_specifier
 964                    "class" @name
 965                    name: (_) @name) @item
 966                )
 967
 968            (
 969                (comment)* @context
 970                .
 971                (enum_specifier
 972                    "enum" @name
 973                    name: (_) @name) @item
 974                )
 975
 976            (
 977                (comment)* @context
 978                .
 979                (declaration
 980                    type: (struct_specifier
 981                    "struct" @name)
 982                    declarator: (_) @name) @item
 983            )
 984
 985            "#,
 986        )
 987        .unwrap(),
 988    )
 989}
 990
 991fn elixir_lang() -> Arc<Language> {
 992    Arc::new(
 993        Language::new(
 994            LanguageConfig {
 995                name: "Elixir".into(),
 996                path_suffixes: vec!["rs".into()],
 997                ..Default::default()
 998            },
 999            Some(tree_sitter_elixir::language()),
1000        )
1001        .with_embedding_query(
1002            r#"
1003            (
1004                (unary_operator
1005                    operator: "@"
1006                    operand: (call
1007                        target: (identifier) @unary
1008                        (#match? @unary "^(doc)$"))
1009                    ) @context
1010                .
1011                (call
1012                target: (identifier) @name
1013                (arguments
1014                [
1015                (identifier) @name
1016                (call
1017                target: (identifier) @name)
1018                (binary_operator
1019                left: (call
1020                target: (identifier) @name)
1021                operator: "when")
1022                ])
1023                (#match? @name "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @item
1024                )
1025
1026            (call
1027                target: (identifier) @name
1028                (arguments (alias) @name)
1029                (#match? @name "^(defmodule|defprotocol)$")) @item
1030            "#,
1031        )
1032        .unwrap(),
1033    )
1034}