semantic_index_tests.rs

   1use crate::{
   2    db::dot,
   3    embedding::EmbeddingProvider,
   4    parsing::{subtract_ranges, CodeContextRetriever, Document},
   5    semantic_index_settings::SemanticIndexSettings,
   6    SearchResult, SemanticIndex,
   7};
   8use anyhow::Result;
   9use async_trait::async_trait;
  10use gpui::{Task, TestAppContext};
  11use language::{Language, LanguageConfig, LanguageRegistry, ToOffset};
  12use pretty_assertions::assert_eq;
  13use project::{project_settings::ProjectSettings, search::PathMatcher, FakeFs, Fs, Project};
  14use rand::{rngs::StdRng, Rng};
  15use serde_json::json;
  16use settings::SettingsStore;
  17use std::{
  18    path::Path,
  19    sync::{
  20        atomic::{self, AtomicUsize},
  21        Arc,
  22    },
  23};
  24use unindent::Unindent;
  25
  26#[ctor::ctor]
  27fn init_logger() {
  28    if std::env::var("RUST_LOG").is_ok() {
  29        env_logger::init();
  30    }
  31}
  32
  33#[gpui::test]
  34async fn test_semantic_index(cx: &mut TestAppContext) {
  35    cx.update(|cx| {
  36        cx.set_global(SettingsStore::test(cx));
  37        settings::register::<SemanticIndexSettings>(cx);
  38        settings::register::<ProjectSettings>(cx);
  39    });
  40
  41    let fs = FakeFs::new(cx.background());
  42    fs.insert_tree(
  43        "/the-root",
  44        json!({
  45            "src": {
  46                "file1.rs": "
  47                    fn aaa() {
  48                        println!(\"aaaaaaaaaaaa!\");
  49                    }
  50
  51                    fn zzzzz() {
  52                        println!(\"SLEEPING\");
  53                    }
  54                ".unindent(),
  55                "file2.rs": "
  56                    fn bbb() {
  57                        println!(\"bbbbbbbbbbbbb!\");
  58                    }
  59                ".unindent(),
  60                "file3.toml": "
  61                    ZZZZZZZZZZZZZZZZZZ = 5
  62                ".unindent(),
  63            }
  64        }),
  65    )
  66    .await;
  67
  68    let languages = Arc::new(LanguageRegistry::new(Task::ready(())));
  69    let rust_language = rust_lang();
  70    let toml_language = toml_lang();
  71    languages.add(rust_language);
  72    languages.add(toml_language);
  73
  74    let db_dir = tempdir::TempDir::new("vector-store").unwrap();
  75    let db_path = db_dir.path().join("db.sqlite");
  76
  77    let embedding_provider = Arc::new(FakeEmbeddingProvider::default());
  78    let store = SemanticIndex::new(
  79        fs.clone(),
  80        db_path,
  81        embedding_provider.clone(),
  82        languages,
  83        cx.to_async(),
  84    )
  85    .await
  86    .unwrap();
  87
  88    let project = Project::test(fs.clone(), ["/the-root".as_ref()], cx).await;
  89    let (file_count, outstanding_file_count) = store
  90        .update(cx, |store, cx| store.index_project(project.clone(), cx))
  91        .await
  92        .unwrap();
  93    assert_eq!(file_count, 3);
  94    cx.foreground().run_until_parked();
  95    assert_eq!(*outstanding_file_count.borrow(), 0);
  96
  97    let search_results = store
  98        .update(cx, |store, cx| {
  99            store.search_project(
 100                project.clone(),
 101                "aaaaaabbbbzz".to_string(),
 102                5,
 103                vec![],
 104                vec![],
 105                cx,
 106            )
 107        })
 108        .await
 109        .unwrap();
 110
 111    assert_search_results(
 112        &search_results,
 113        &[
 114            (Path::new("src/file1.rs").into(), 0),
 115            (Path::new("src/file2.rs").into(), 0),
 116            (Path::new("src/file3.toml").into(), 0),
 117            (Path::new("src/file1.rs").into(), 45),
 118        ],
 119        cx,
 120    );
 121
 122    // Test Include Files Functonality
 123    let include_files = vec![PathMatcher::new("*.rs").unwrap()];
 124    let exclude_files = vec![PathMatcher::new("*.rs").unwrap()];
 125    let rust_only_search_results = store
 126        .update(cx, |store, cx| {
 127            store.search_project(
 128                project.clone(),
 129                "aaaaaabbbbzz".to_string(),
 130                5,
 131                include_files,
 132                vec![],
 133                cx,
 134            )
 135        })
 136        .await
 137        .unwrap();
 138
 139    assert_search_results(
 140        &rust_only_search_results,
 141        &[
 142            (Path::new("src/file1.rs").into(), 0),
 143            (Path::new("src/file2.rs").into(), 0),
 144            (Path::new("src/file1.rs").into(), 45),
 145        ],
 146        cx,
 147    );
 148
 149    let no_rust_search_results = store
 150        .update(cx, |store, cx| {
 151            store.search_project(
 152                project.clone(),
 153                "aaaaaabbbbzz".to_string(),
 154                5,
 155                vec![],
 156                exclude_files,
 157                cx,
 158            )
 159        })
 160        .await
 161        .unwrap();
 162
 163    assert_search_results(
 164        &no_rust_search_results,
 165        &[(Path::new("src/file3.toml").into(), 0)],
 166        cx,
 167    );
 168
 169    fs.save(
 170        "/the-root/src/file2.rs".as_ref(),
 171        &"
 172            fn dddd() { println!(\"ddddd!\"); }
 173            struct pqpqpqp {}
 174        "
 175        .unindent()
 176        .into(),
 177        Default::default(),
 178    )
 179    .await
 180    .unwrap();
 181
 182    cx.foreground().run_until_parked();
 183
 184    let prev_embedding_count = embedding_provider.embedding_count();
 185    let (file_count, outstanding_file_count) = store
 186        .update(cx, |store, cx| store.index_project(project.clone(), cx))
 187        .await
 188        .unwrap();
 189    assert_eq!(file_count, 1);
 190
 191    cx.foreground().run_until_parked();
 192    assert_eq!(*outstanding_file_count.borrow(), 0);
 193
 194    assert_eq!(
 195        embedding_provider.embedding_count() - prev_embedding_count,
 196        2
 197    );
 198}
 199
 200#[track_caller]
 201fn assert_search_results(
 202    actual: &[SearchResult],
 203    expected: &[(Arc<Path>, usize)],
 204    cx: &TestAppContext,
 205) {
 206    let actual = actual
 207        .iter()
 208        .map(|search_result| {
 209            search_result.buffer.read_with(cx, |buffer, _cx| {
 210                (
 211                    buffer.file().unwrap().path().clone(),
 212                    search_result.range.start.to_offset(buffer),
 213                )
 214            })
 215        })
 216        .collect::<Vec<_>>();
 217    assert_eq!(actual, expected);
 218}
 219
 220#[gpui::test]
 221async fn test_code_context_retrieval_rust() {
 222    let language = rust_lang();
 223    let mut retriever = CodeContextRetriever::new();
 224
 225    let text = "
 226        /// A doc comment
 227        /// that spans multiple lines
 228        #[gpui::test]
 229        fn a() {
 230            b
 231        }
 232
 233        impl C for D {
 234        }
 235
 236        impl E {
 237            // This is also a preceding comment
 238            pub fn function_1() -> Option<()> {
 239                todo!();
 240            }
 241
 242            // This is a preceding comment
 243            fn function_2() -> Result<()> {
 244                todo!();
 245            }
 246        }
 247    "
 248    .unindent();
 249
 250    let documents = retriever.parse_file(&text, language).unwrap();
 251
 252    assert_documents_eq(
 253        &documents,
 254        &[
 255            (
 256                "
 257                /// A doc comment
 258                /// that spans multiple lines
 259                #[gpui::test]
 260                fn a() {
 261                    b
 262                }"
 263                .unindent(),
 264                text.find("fn a").unwrap(),
 265            ),
 266            (
 267                "
 268                impl C for D {
 269                }"
 270                .unindent(),
 271                text.find("impl C").unwrap(),
 272            ),
 273            (
 274                "
 275                impl E {
 276                    // This is also a preceding comment
 277                    pub fn function_1() -> Option<()> { /* ... */ }
 278
 279                    // This is a preceding comment
 280                    fn function_2() -> Result<()> { /* ... */ }
 281                }"
 282                .unindent(),
 283                text.find("impl E").unwrap(),
 284            ),
 285            (
 286                "
 287                // This is also a preceding comment
 288                pub fn function_1() -> Option<()> {
 289                    todo!();
 290                }"
 291                .unindent(),
 292                text.find("pub fn function_1").unwrap(),
 293            ),
 294            (
 295                "
 296                // This is a preceding comment
 297                fn function_2() -> Result<()> {
 298                    todo!();
 299                }"
 300                .unindent(),
 301                text.find("fn function_2").unwrap(),
 302            ),
 303        ],
 304    );
 305}
 306
 307#[gpui::test]
 308async fn test_code_context_retrieval_json() {
 309    let language = json_lang();
 310    let mut retriever = CodeContextRetriever::new();
 311
 312    let text = r#"
 313        {
 314            "array": [1, 2, 3, 4],
 315            "string": "abcdefg",
 316            "nested_object": {
 317                "array_2": [5, 6, 7, 8],
 318                "string_2": "hijklmnop",
 319                "boolean": true,
 320                "none": null
 321            }
 322        }
 323    "#
 324    .unindent();
 325
 326    let documents = retriever.parse_file(&text, language.clone()).unwrap();
 327
 328    assert_documents_eq(
 329        &documents,
 330        &[(
 331            r#"
 332                {
 333                    "array": [],
 334                    "string": "",
 335                    "nested_object": {
 336                        "array_2": [],
 337                        "string_2": "",
 338                        "boolean": true,
 339                        "none": null
 340                    }
 341                }"#
 342            .unindent(),
 343            text.find("{").unwrap(),
 344        )],
 345    );
 346
 347    let text = r#"
 348        [
 349            {
 350                "name": "somebody",
 351                "age": 42
 352            },
 353            {
 354                "name": "somebody else",
 355                "age": 43
 356            }
 357        ]
 358    "#
 359    .unindent();
 360
 361    let documents = retriever.parse_file(&text, language.clone()).unwrap();
 362
 363    assert_documents_eq(
 364        &documents,
 365        &[(
 366            r#"
 367            [{
 368                    "name": "",
 369                    "age": 42
 370                }]"#
 371            .unindent(),
 372            text.find("[").unwrap(),
 373        )],
 374    );
 375}
 376
 377fn assert_documents_eq(
 378    documents: &[Document],
 379    expected_contents_and_start_offsets: &[(String, usize)],
 380) {
 381    assert_eq!(
 382        documents
 383            .iter()
 384            .map(|document| (document.content.clone(), document.range.start))
 385            .collect::<Vec<_>>(),
 386        expected_contents_and_start_offsets
 387    );
 388}
 389
 390#[gpui::test]
 391async fn test_code_context_retrieval_javascript() {
 392    let language = js_lang();
 393    let mut retriever = CodeContextRetriever::new();
 394
 395    let text = "
 396        /* globals importScripts, backend */
 397        function _authorize() {}
 398
 399        /**
 400         * Sometimes the frontend build is way faster than backend.
 401         */
 402        export async function authorizeBank() {
 403            _authorize(pushModal, upgradingAccountId, {});
 404        }
 405
 406        export class SettingsPage {
 407            /* This is a test setting */
 408            constructor(page) {
 409                this.page = page;
 410            }
 411        }
 412
 413        /* This is a test comment */
 414        class TestClass {}
 415
 416        /* Schema for editor_events in Clickhouse. */
 417        export interface ClickhouseEditorEvent {
 418            installation_id: string
 419            operation: string
 420        }
 421        "
 422    .unindent();
 423
 424    let documents = retriever.parse_file(&text, language.clone()).unwrap();
 425
 426    assert_documents_eq(
 427        &documents,
 428        &[
 429            (
 430                "
 431            /* globals importScripts, backend */
 432            function _authorize() {}"
 433                    .unindent(),
 434                37,
 435            ),
 436            (
 437                "
 438            /**
 439             * Sometimes the frontend build is way faster than backend.
 440             */
 441            export async function authorizeBank() {
 442                _authorize(pushModal, upgradingAccountId, {});
 443            }"
 444                .unindent(),
 445                131,
 446            ),
 447            (
 448                "
 449                export class SettingsPage {
 450                    /* This is a test setting */
 451                    constructor(page) {
 452                        this.page = page;
 453                    }
 454                }"
 455                .unindent(),
 456                225,
 457            ),
 458            (
 459                "
 460                /* This is a test setting */
 461                constructor(page) {
 462                    this.page = page;
 463                }"
 464                .unindent(),
 465                290,
 466            ),
 467            (
 468                "
 469                /* This is a test comment */
 470                class TestClass {}"
 471                    .unindent(),
 472                374,
 473            ),
 474            (
 475                "
 476                /* Schema for editor_events in Clickhouse. */
 477                export interface ClickhouseEditorEvent {
 478                    installation_id: string
 479                    operation: string
 480                }"
 481                .unindent(),
 482                440,
 483            ),
 484        ],
 485    )
 486}
 487
 488#[gpui::test]
 489async fn test_code_context_retrieval_elixir() {
 490    let language = elixir_lang();
 491    let mut retriever = CodeContextRetriever::new();
 492
 493    let text = r#"
 494        defmodule File.Stream do
 495            @moduledoc """
 496            Defines a `File.Stream` struct returned by `File.stream!/3`.
 497
 498            The following fields are public:
 499
 500            * `path`          - the file path
 501            * `modes`         - the file modes
 502            * `raw`           - a boolean indicating if bin functions should be used
 503            * `line_or_bytes` - if reading should read lines or a given number of bytes
 504            * `node`          - the node the file belongs to
 505
 506            """
 507
 508            defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
 509
 510            @type t :: %__MODULE__{}
 511
 512            @doc false
 513            def __build__(path, modes, line_or_bytes) do
 514            raw = :lists.keyfind(:encoding, 1, modes) == false
 515
 516            modes =
 517                case raw do
 518                true ->
 519                    case :lists.keyfind(:read_ahead, 1, modes) do
 520                    {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
 521                    {:read_ahead, _} -> [:raw | modes]
 522                    false -> [:raw, :read_ahead | modes]
 523                    end
 524
 525                false ->
 526                    modes
 527                end
 528
 529            %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
 530
 531            end"#
 532    .unindent();
 533
 534    let documents = retriever.parse_file(&text, language.clone()).unwrap();
 535
 536    assert_documents_eq(
 537        &documents,
 538        &[(
 539            r#"
 540        defmodule File.Stream do
 541            @moduledoc """
 542            Defines a `File.Stream` struct returned by `File.stream!/3`.
 543
 544            The following fields are public:
 545
 546            * `path`          - the file path
 547            * `modes`         - the file modes
 548            * `raw`           - a boolean indicating if bin functions should be used
 549            * `line_or_bytes` - if reading should read lines or a given number of bytes
 550            * `node`          - the node the file belongs to
 551
 552            """
 553
 554            defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
 555
 556            @type t :: %__MODULE__{}
 557
 558            @doc false
 559            def __build__(path, modes, line_or_bytes) do
 560            raw = :lists.keyfind(:encoding, 1, modes) == false
 561
 562            modes =
 563                case raw do
 564                true ->
 565                    case :lists.keyfind(:read_ahead, 1, modes) do
 566                    {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
 567                    {:read_ahead, _} -> [:raw | modes]
 568                    false -> [:raw, :read_ahead | modes]
 569                    end
 570
 571                false ->
 572                    modes
 573                end
 574
 575            %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
 576
 577            end"#
 578                .unindent(),
 579            0,
 580        ),(r#"
 581            @doc false
 582            def __build__(path, modes, line_or_bytes) do
 583            raw = :lists.keyfind(:encoding, 1, modes) == false
 584
 585            modes =
 586                case raw do
 587                true ->
 588                    case :lists.keyfind(:read_ahead, 1, modes) do
 589                    {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
 590                    {:read_ahead, _} -> [:raw | modes]
 591                    false -> [:raw, :read_ahead | modes]
 592                    end
 593
 594                false ->
 595                    modes
 596                end
 597
 598            %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
 599
 600            end"#.unindent(), 574)],
 601    );
 602}
 603
 604#[gpui::test]
 605async fn test_code_context_retrieval_cpp() {
 606    let language = cpp_lang();
 607    let mut retriever = CodeContextRetriever::new();
 608
 609    let text = "
 610    /**
 611     * @brief Main function
 612     * @returns 0 on exit
 613     */
 614    int main() { return 0; }
 615
 616    /**
 617    * This is a test comment
 618    */
 619    class MyClass {       // The class
 620        public:           // Access specifier
 621        int myNum;        // Attribute (int variable)
 622        string myString;  // Attribute (string variable)
 623    };
 624
 625    // This is a test comment
 626    enum Color { red, green, blue };
 627
 628    /** This is a preceding block comment
 629     * This is the second line
 630     */
 631    struct {           // Structure declaration
 632        int myNum;       // Member (int variable)
 633        string myString; // Member (string variable)
 634    } myStructure;
 635
 636    /**
 637     * @brief Matrix class.
 638     */
 639    template <typename T,
 640              typename = typename std::enable_if<
 641                std::is_integral<T>::value || std::is_floating_point<T>::value,
 642                bool>::type>
 643    class Matrix2 {
 644        std::vector<std::vector<T>> _mat;
 645
 646        public:
 647            /**
 648            * @brief Constructor
 649            * @tparam Integer ensuring integers are being evaluated and not other
 650            * data types.
 651            * @param size denoting the size of Matrix as size x size
 652            */
 653            template <typename Integer,
 654                    typename = typename std::enable_if<std::is_integral<Integer>::value,
 655                    Integer>::type>
 656            explicit Matrix(const Integer size) {
 657                for (size_t i = 0; i < size; ++i) {
 658                    _mat.emplace_back(std::vector<T>(size, 0));
 659                }
 660            }
 661    }"
 662    .unindent();
 663
 664    let documents = retriever.parse_file(&text, language.clone()).unwrap();
 665
 666    assert_documents_eq(
 667        &documents,
 668        &[
 669            (
 670                "
 671        /**
 672         * @brief Main function
 673         * @returns 0 on exit
 674         */
 675        int main() { return 0; }"
 676                    .unindent(),
 677                54,
 678            ),
 679            (
 680                "
 681                /**
 682                * This is a test comment
 683                */
 684                class MyClass {       // The class
 685                    public:           // Access specifier
 686                    int myNum;        // Attribute (int variable)
 687                    string myString;  // Attribute (string variable)
 688                }"
 689                .unindent(),
 690                112,
 691            ),
 692            (
 693                "
 694                // This is a test comment
 695                enum Color { red, green, blue }"
 696                    .unindent(),
 697                322,
 698            ),
 699            (
 700                "
 701                /** This is a preceding block comment
 702                 * This is the second line
 703                 */
 704                struct {           // Structure declaration
 705                    int myNum;       // Member (int variable)
 706                    string myString; // Member (string variable)
 707                } myStructure;"
 708                    .unindent(),
 709                425,
 710            ),
 711            (
 712                "
 713                /**
 714                 * @brief Matrix class.
 715                 */
 716                template <typename T,
 717                          typename = typename std::enable_if<
 718                            std::is_integral<T>::value || std::is_floating_point<T>::value,
 719                            bool>::type>
 720                class Matrix2 {
 721                    std::vector<std::vector<T>> _mat;
 722
 723                    public:
 724                        /**
 725                        * @brief Constructor
 726                        * @tparam Integer ensuring integers are being evaluated and not other
 727                        * data types.
 728                        * @param size denoting the size of Matrix as size x size
 729                        */
 730                        template <typename Integer,
 731                                typename = typename std::enable_if<std::is_integral<Integer>::value,
 732                                Integer>::type>
 733                        explicit Matrix(const Integer size) {
 734                            for (size_t i = 0; i < size; ++i) {
 735                                _mat.emplace_back(std::vector<T>(size, 0));
 736                            }
 737                        }
 738                }"
 739                .unindent(),
 740                612,
 741            ),
 742            (
 743                "
 744                explicit Matrix(const Integer size) {
 745                    for (size_t i = 0; i < size; ++i) {
 746                        _mat.emplace_back(std::vector<T>(size, 0));
 747                    }
 748                }"
 749                .unindent(),
 750                1226,
 751            ),
 752        ],
 753    );
 754}
 755
 756#[gpui::test]
 757fn test_dot_product(mut rng: StdRng) {
 758    assert_eq!(dot(&[1., 0., 0., 0., 0.], &[0., 1., 0., 0., 0.]), 0.);
 759    assert_eq!(dot(&[2., 0., 0., 0., 0.], &[3., 1., 0., 0., 0.]), 6.);
 760
 761    for _ in 0..100 {
 762        let size = 1536;
 763        let mut a = vec![0.; size];
 764        let mut b = vec![0.; size];
 765        for (a, b) in a.iter_mut().zip(b.iter_mut()) {
 766            *a = rng.gen();
 767            *b = rng.gen();
 768        }
 769
 770        assert_eq!(
 771            round_to_decimals(dot(&a, &b), 1),
 772            round_to_decimals(reference_dot(&a, &b), 1)
 773        );
 774    }
 775
 776    fn round_to_decimals(n: f32, decimal_places: i32) -> f32 {
 777        let factor = (10.0 as f32).powi(decimal_places);
 778        (n * factor).round() / factor
 779    }
 780
 781    fn reference_dot(a: &[f32], b: &[f32]) -> f32 {
 782        a.iter().zip(b.iter()).map(|(a, b)| a * b).sum()
 783    }
 784}
 785
 786#[derive(Default)]
 787struct FakeEmbeddingProvider {
 788    embedding_count: AtomicUsize,
 789}
 790
 791impl FakeEmbeddingProvider {
 792    fn embedding_count(&self) -> usize {
 793        self.embedding_count.load(atomic::Ordering::SeqCst)
 794    }
 795}
 796
 797#[async_trait]
 798impl EmbeddingProvider for FakeEmbeddingProvider {
 799    async fn embed_batch(&self, spans: Vec<&str>) -> Result<Vec<Vec<f32>>> {
 800        self.embedding_count
 801            .fetch_add(spans.len(), atomic::Ordering::SeqCst);
 802        Ok(spans
 803            .iter()
 804            .map(|span| {
 805                let mut result = vec![1.0; 26];
 806                for letter in span.chars() {
 807                    let letter = letter.to_ascii_lowercase();
 808                    if letter as u32 >= 'a' as u32 {
 809                        let ix = (letter as u32) - ('a' as u32);
 810                        if ix < 26 {
 811                            result[ix as usize] += 1.0;
 812                        }
 813                    }
 814                }
 815
 816                let norm = result.iter().map(|x| x * x).sum::<f32>().sqrt();
 817                for x in &mut result {
 818                    *x /= norm;
 819                }
 820
 821                result
 822            })
 823            .collect())
 824    }
 825}
 826
 827fn js_lang() -> Arc<Language> {
 828    Arc::new(
 829        Language::new(
 830            LanguageConfig {
 831                name: "Javascript".into(),
 832                path_suffixes: vec!["js".into()],
 833                ..Default::default()
 834            },
 835            Some(tree_sitter_typescript::language_tsx()),
 836        )
 837        .with_embedding_query(
 838            &r#"
 839
 840            (
 841                (comment)* @context
 842                .
 843                [
 844                (export_statement
 845                    (function_declaration
 846                        "async"? @name
 847                        "function" @name
 848                        name: (_) @name))
 849                (function_declaration
 850                    "async"? @name
 851                    "function" @name
 852                    name: (_) @name)
 853                ] @item
 854            )
 855
 856            (
 857                (comment)* @context
 858                .
 859                [
 860                (export_statement
 861                    (class_declaration
 862                        "class" @name
 863                        name: (_) @name))
 864                (class_declaration
 865                    "class" @name
 866                    name: (_) @name)
 867                ] @item
 868            )
 869
 870            (
 871                (comment)* @context
 872                .
 873                [
 874                (export_statement
 875                    (interface_declaration
 876                        "interface" @name
 877                        name: (_) @name))
 878                (interface_declaration
 879                    "interface" @name
 880                    name: (_) @name)
 881                ] @item
 882            )
 883
 884            (
 885                (comment)* @context
 886                .
 887                [
 888                (export_statement
 889                    (enum_declaration
 890                        "enum" @name
 891                        name: (_) @name))
 892                (enum_declaration
 893                    "enum" @name
 894                    name: (_) @name)
 895                ] @item
 896            )
 897
 898            (
 899                (comment)* @context
 900                .
 901                (method_definition
 902                    [
 903                        "get"
 904                        "set"
 905                        "async"
 906                        "*"
 907                        "static"
 908                    ]* @name
 909                    name: (_) @name) @item
 910            )
 911
 912                    "#
 913            .unindent(),
 914        )
 915        .unwrap(),
 916    )
 917}
 918
 919fn rust_lang() -> Arc<Language> {
 920    Arc::new(
 921        Language::new(
 922            LanguageConfig {
 923                name: "Rust".into(),
 924                path_suffixes: vec!["rs".into()],
 925                collapsed_placeholder: " /* ... */ ".to_string(),
 926                ..Default::default()
 927            },
 928            Some(tree_sitter_rust::language()),
 929        )
 930        .with_embedding_query(
 931            r#"
 932            (
 933                [(line_comment) (attribute_item)]* @context
 934                .
 935                [
 936                    (struct_item
 937                        name: (_) @name)
 938
 939                    (enum_item
 940                        name: (_) @name)
 941
 942                    (impl_item
 943                        trait: (_)? @name
 944                        "for"? @name
 945                        type: (_) @name)
 946
 947                    (trait_item
 948                        name: (_) @name)
 949
 950                    (function_item
 951                        name: (_) @name
 952                        body: (block
 953                            "{" @keep
 954                            "}" @keep) @collapse)
 955
 956                    (macro_definition
 957                        name: (_) @name)
 958                ] @item
 959            )
 960            "#,
 961        )
 962        .unwrap(),
 963    )
 964}
 965
 966fn json_lang() -> Arc<Language> {
 967    Arc::new(
 968        Language::new(
 969            LanguageConfig {
 970                name: "JSON".into(),
 971                path_suffixes: vec!["json".into()],
 972                ..Default::default()
 973            },
 974            Some(tree_sitter_json::language()),
 975        )
 976        .with_embedding_query(
 977            r#"
 978            (document) @item
 979
 980            (array
 981                "[" @keep
 982                .
 983                (object)? @keep
 984                "]" @keep) @collapse
 985
 986            (pair value: (string
 987                "\"" @keep
 988                "\"" @keep) @collapse)
 989            "#,
 990        )
 991        .unwrap(),
 992    )
 993}
 994
 995fn toml_lang() -> Arc<Language> {
 996    Arc::new(Language::new(
 997        LanguageConfig {
 998            name: "TOML".into(),
 999            path_suffixes: vec!["toml".into()],
1000            ..Default::default()
1001        },
1002        Some(tree_sitter_toml::language()),
1003    ))
1004}
1005
1006fn cpp_lang() -> Arc<Language> {
1007    Arc::new(
1008        Language::new(
1009            LanguageConfig {
1010                name: "CPP".into(),
1011                path_suffixes: vec!["cpp".into()],
1012                ..Default::default()
1013            },
1014            Some(tree_sitter_cpp::language()),
1015        )
1016        .with_embedding_query(
1017            r#"
1018            (
1019                (comment)* @context
1020                .
1021                (function_definition
1022                    (type_qualifier)? @name
1023                    type: (_)? @name
1024                    declarator: [
1025                        (function_declarator
1026                            declarator: (_) @name)
1027                        (pointer_declarator
1028                            "*" @name
1029                            declarator: (function_declarator
1030                            declarator: (_) @name))
1031                        (pointer_declarator
1032                            "*" @name
1033                            declarator: (pointer_declarator
1034                                "*" @name
1035                            declarator: (function_declarator
1036                                declarator: (_) @name)))
1037                        (reference_declarator
1038                            ["&" "&&"] @name
1039                            (function_declarator
1040                            declarator: (_) @name))
1041                    ]
1042                    (type_qualifier)? @name) @item
1043                )
1044
1045            (
1046                (comment)* @context
1047                .
1048                (template_declaration
1049                    (class_specifier
1050                        "class" @name
1051                        name: (_) @name)
1052                        ) @item
1053            )
1054
1055            (
1056                (comment)* @context
1057                .
1058                (class_specifier
1059                    "class" @name
1060                    name: (_) @name) @item
1061                )
1062
1063            (
1064                (comment)* @context
1065                .
1066                (enum_specifier
1067                    "enum" @name
1068                    name: (_) @name) @item
1069                )
1070
1071            (
1072                (comment)* @context
1073                .
1074                (declaration
1075                    type: (struct_specifier
1076                    "struct" @name)
1077                    declarator: (_) @name) @item
1078            )
1079
1080            "#,
1081        )
1082        .unwrap(),
1083    )
1084}
1085
1086fn elixir_lang() -> Arc<Language> {
1087    Arc::new(
1088        Language::new(
1089            LanguageConfig {
1090                name: "Elixir".into(),
1091                path_suffixes: vec!["rs".into()],
1092                ..Default::default()
1093            },
1094            Some(tree_sitter_elixir::language()),
1095        )
1096        .with_embedding_query(
1097            r#"
1098            (
1099                (unary_operator
1100                    operator: "@"
1101                    operand: (call
1102                        target: (identifier) @unary
1103                        (#match? @unary "^(doc)$"))
1104                    ) @context
1105                .
1106                (call
1107                target: (identifier) @name
1108                (arguments
1109                [
1110                (identifier) @name
1111                (call
1112                target: (identifier) @name)
1113                (binary_operator
1114                left: (call
1115                target: (identifier) @name)
1116                operator: "when")
1117                ])
1118                (#match? @name "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @item
1119                )
1120
1121            (call
1122                target: (identifier) @name
1123                (arguments (alias) @name)
1124                (#match? @name "^(defmodule|defprotocol)$")) @item
1125            "#,
1126        )
1127        .unwrap(),
1128    )
1129}
1130
1131#[gpui::test]
1132fn test_subtract_ranges() {
1133    // collapsed_ranges: Vec<Range<usize>>, keep_ranges: Vec<Range<usize>>
1134
1135    assert_eq!(
1136        subtract_ranges(&[0..5, 10..21], &[0..1, 4..5]),
1137        vec![1..4, 10..21]
1138    );
1139
1140    assert_eq!(subtract_ranges(&[0..5], &[1..2]), &[0..1, 2..5]);
1141}