semantic_index_tests.rs

   1use crate::{
   2    db::dot,
   3    embedding::EmbeddingProvider,
   4    parsing::{subtract_ranges, CodeContextRetriever, Document},
   5    semantic_index_settings::SemanticIndexSettings,
   6    SearchResult, SemanticIndex,
   7};
   8use anyhow::Result;
   9use async_trait::async_trait;
  10use globset::Glob;
  11use gpui::{Task, TestAppContext};
  12use language::{Language, LanguageConfig, LanguageRegistry, ToOffset};
  13use pretty_assertions::assert_eq;
  14use project::{project_settings::ProjectSettings, FakeFs, Fs, Project};
  15use rand::{rngs::StdRng, Rng};
  16use serde_json::json;
  17use settings::SettingsStore;
  18use std::{
  19    path::Path,
  20    sync::{
  21        atomic::{self, AtomicUsize},
  22        Arc,
  23    },
  24};
  25use unindent::Unindent;
  26
  27#[ctor::ctor]
  28fn init_logger() {
  29    if std::env::var("RUST_LOG").is_ok() {
  30        env_logger::init();
  31    }
  32}
  33
  34#[gpui::test]
  35async fn test_semantic_index(cx: &mut TestAppContext) {
  36    cx.update(|cx| {
  37        cx.set_global(SettingsStore::test(cx));
  38        settings::register::<SemanticIndexSettings>(cx);
  39        settings::register::<ProjectSettings>(cx);
  40    });
  41
  42    let fs = FakeFs::new(cx.background());
  43    fs.insert_tree(
  44        "/the-root",
  45        json!({
  46            "src": {
  47                "file1.rs": "
  48                    fn aaa() {
  49                        println!(\"aaaaaaaaaaaa!\");
  50                    }
  51
  52                    fn zzzzz() {
  53                        println!(\"SLEEPING\");
  54                    }
  55                ".unindent(),
  56                "file2.rs": "
  57                    fn bbb() {
  58                        println!(\"bbbbbbbbbbbbb!\");
  59                    }
  60                ".unindent(),
  61                "file3.toml": "
  62                    ZZZZZZZZZZZZZZZZZZ = 5
  63                ".unindent(),
  64            }
  65        }),
  66    )
  67    .await;
  68
  69    let languages = Arc::new(LanguageRegistry::new(Task::ready(())));
  70    let rust_language = rust_lang();
  71    let toml_language = toml_lang();
  72    languages.add(rust_language);
  73    languages.add(toml_language);
  74
  75    let db_dir = tempdir::TempDir::new("vector-store").unwrap();
  76    let db_path = db_dir.path().join("db.sqlite");
  77
  78    let embedding_provider = Arc::new(FakeEmbeddingProvider::default());
  79    let store = SemanticIndex::new(
  80        fs.clone(),
  81        db_path,
  82        embedding_provider.clone(),
  83        languages,
  84        cx.to_async(),
  85    )
  86    .await
  87    .unwrap();
  88
  89    let project = Project::test(fs.clone(), ["/the-root".as_ref()], cx).await;
  90    let (file_count, outstanding_file_count) = store
  91        .update(cx, |store, cx| store.index_project(project.clone(), cx))
  92        .await
  93        .unwrap();
  94    assert_eq!(file_count, 3);
  95    cx.foreground().run_until_parked();
  96    assert_eq!(*outstanding_file_count.borrow(), 0);
  97
  98    let search_results = store
  99        .update(cx, |store, cx| {
 100            store.search_project(
 101                project.clone(),
 102                "aaaaaabbbbzz".to_string(),
 103                5,
 104                vec![],
 105                vec![],
 106                cx,
 107            )
 108        })
 109        .await
 110        .unwrap();
 111
 112    assert_search_results(
 113        &search_results,
 114        &[
 115            (Path::new("src/file1.rs").into(), 0),
 116            (Path::new("src/file2.rs").into(), 0),
 117            (Path::new("src/file3.toml").into(), 0),
 118            (Path::new("src/file1.rs").into(), 45),
 119        ],
 120        cx,
 121    );
 122
 123    // Test Include Files Functonality
 124    let include_files = vec![Glob::new("*.rs").unwrap().compile_matcher()];
 125    let exclude_files = vec![Glob::new("*.rs").unwrap().compile_matcher()];
 126    let rust_only_search_results = store
 127        .update(cx, |store, cx| {
 128            store.search_project(
 129                project.clone(),
 130                "aaaaaabbbbzz".to_string(),
 131                5,
 132                include_files,
 133                vec![],
 134                cx,
 135            )
 136        })
 137        .await
 138        .unwrap();
 139
 140    assert_search_results(
 141        &rust_only_search_results,
 142        &[
 143            (Path::new("src/file1.rs").into(), 0),
 144            (Path::new("src/file2.rs").into(), 0),
 145            (Path::new("src/file1.rs").into(), 45),
 146        ],
 147        cx,
 148    );
 149
 150    let no_rust_search_results = store
 151        .update(cx, |store, cx| {
 152            store.search_project(
 153                project.clone(),
 154                "aaaaaabbbbzz".to_string(),
 155                5,
 156                vec![],
 157                exclude_files,
 158                cx,
 159            )
 160        })
 161        .await
 162        .unwrap();
 163
 164    assert_search_results(
 165        &no_rust_search_results,
 166        &[(Path::new("src/file3.toml").into(), 0)],
 167        cx,
 168    );
 169
 170    fs.save(
 171        "/the-root/src/file2.rs".as_ref(),
 172        &"
 173            fn dddd() { println!(\"ddddd!\"); }
 174            struct pqpqpqp {}
 175        "
 176        .unindent()
 177        .into(),
 178        Default::default(),
 179    )
 180    .await
 181    .unwrap();
 182
 183    cx.foreground().run_until_parked();
 184
 185    let prev_embedding_count = embedding_provider.embedding_count();
 186    let (file_count, outstanding_file_count) = store
 187        .update(cx, |store, cx| store.index_project(project.clone(), cx))
 188        .await
 189        .unwrap();
 190    assert_eq!(file_count, 1);
 191
 192    cx.foreground().run_until_parked();
 193    assert_eq!(*outstanding_file_count.borrow(), 0);
 194
 195    assert_eq!(
 196        embedding_provider.embedding_count() - prev_embedding_count,
 197        2
 198    );
 199}
 200
 201#[track_caller]
 202fn assert_search_results(
 203    actual: &[SearchResult],
 204    expected: &[(Arc<Path>, usize)],
 205    cx: &TestAppContext,
 206) {
 207    let actual = actual
 208        .iter()
 209        .map(|search_result| {
 210            search_result.buffer.read_with(cx, |buffer, _cx| {
 211                (
 212                    buffer.file().unwrap().path().clone(),
 213                    search_result.range.start.to_offset(buffer),
 214                )
 215            })
 216        })
 217        .collect::<Vec<_>>();
 218    assert_eq!(actual, expected);
 219}
 220
 221#[gpui::test]
 222async fn test_code_context_retrieval_rust() {
 223    let language = rust_lang();
 224    let mut retriever = CodeContextRetriever::new();
 225
 226    let text = "
 227        /// A doc comment
 228        /// that spans multiple lines
 229        #[gpui::test]
 230        fn a() {
 231            b
 232        }
 233
 234        impl C for D {
 235        }
 236
 237        impl E {
 238            // This is also a preceding comment
 239            pub fn function_1() -> Option<()> {
 240                todo!();
 241            }
 242
 243            // This is a preceding comment
 244            fn function_2() -> Result<()> {
 245                todo!();
 246            }
 247        }
 248    "
 249    .unindent();
 250
 251    let documents = retriever.parse_file(&text, language).unwrap();
 252
 253    assert_documents_eq(
 254        &documents,
 255        &[
 256            (
 257                "
 258                /// A doc comment
 259                /// that spans multiple lines
 260                #[gpui::test]
 261                fn a() {
 262                    b
 263                }"
 264                .unindent(),
 265                text.find("fn a").unwrap(),
 266            ),
 267            (
 268                "
 269                impl C for D {
 270                }"
 271                .unindent(),
 272                text.find("impl C").unwrap(),
 273            ),
 274            (
 275                "
 276                impl E {
 277                    // This is also a preceding comment
 278                    pub fn function_1() -> Option<()> { /* ... */ }
 279
 280                    // This is a preceding comment
 281                    fn function_2() -> Result<()> { /* ... */ }
 282                }"
 283                .unindent(),
 284                text.find("impl E").unwrap(),
 285            ),
 286            (
 287                "
 288                // This is also a preceding comment
 289                pub fn function_1() -> Option<()> {
 290                    todo!();
 291                }"
 292                .unindent(),
 293                text.find("pub fn function_1").unwrap(),
 294            ),
 295            (
 296                "
 297                // This is a preceding comment
 298                fn function_2() -> Result<()> {
 299                    todo!();
 300                }"
 301                .unindent(),
 302                text.find("fn function_2").unwrap(),
 303            ),
 304        ],
 305    );
 306}
 307
 308#[gpui::test]
 309async fn test_code_context_retrieval_json() {
 310    let language = json_lang();
 311    let mut retriever = CodeContextRetriever::new();
 312
 313    let text = r#"
 314        {
 315            "array": [1, 2, 3, 4],
 316            "string": "abcdefg",
 317            "nested_object": {
 318                "array_2": [5, 6, 7, 8],
 319                "string_2": "hijklmnop",
 320                "boolean": true,
 321                "none": null
 322            }
 323        }
 324    "#
 325    .unindent();
 326
 327    let documents = retriever.parse_file(&text, language.clone()).unwrap();
 328
 329    assert_documents_eq(
 330        &documents,
 331        &[(
 332            r#"
 333                {
 334                    "array": [],
 335                    "string": "",
 336                    "nested_object": {
 337                        "array_2": [],
 338                        "string_2": "",
 339                        "boolean": true,
 340                        "none": null
 341                    }
 342                }"#
 343            .unindent(),
 344            text.find("{").unwrap(),
 345        )],
 346    );
 347
 348    let text = r#"
 349        [
 350            {
 351                "name": "somebody",
 352                "age": 42
 353            },
 354            {
 355                "name": "somebody else",
 356                "age": 43
 357            }
 358        ]
 359    "#
 360    .unindent();
 361
 362    let documents = retriever.parse_file(&text, language.clone()).unwrap();
 363
 364    assert_documents_eq(
 365        &documents,
 366        &[(
 367            r#"
 368            [{
 369                    "name": "",
 370                    "age": 42
 371                }]"#
 372            .unindent(),
 373            text.find("[").unwrap(),
 374        )],
 375    );
 376}
 377
 378fn assert_documents_eq(
 379    documents: &[Document],
 380    expected_contents_and_start_offsets: &[(String, usize)],
 381) {
 382    assert_eq!(
 383        documents
 384            .iter()
 385            .map(|document| (document.content.clone(), document.range.start))
 386            .collect::<Vec<_>>(),
 387        expected_contents_and_start_offsets
 388    );
 389}
 390
 391#[gpui::test]
 392async fn test_code_context_retrieval_javascript() {
 393    let language = js_lang();
 394    let mut retriever = CodeContextRetriever::new();
 395
 396    let text = "
 397        /* globals importScripts, backend */
 398        function _authorize() {}
 399
 400        /**
 401         * Sometimes the frontend build is way faster than backend.
 402         */
 403        export async function authorizeBank() {
 404            _authorize(pushModal, upgradingAccountId, {});
 405        }
 406
 407        export class SettingsPage {
 408            /* This is a test setting */
 409            constructor(page) {
 410                this.page = page;
 411            }
 412        }
 413
 414        /* This is a test comment */
 415        class TestClass {}
 416
 417        /* Schema for editor_events in Clickhouse. */
 418        export interface ClickhouseEditorEvent {
 419            installation_id: string
 420            operation: string
 421        }
 422        "
 423    .unindent();
 424
 425    let documents = retriever.parse_file(&text, language.clone()).unwrap();
 426
 427    assert_documents_eq(
 428        &documents,
 429        &[
 430            (
 431                "
 432            /* globals importScripts, backend */
 433            function _authorize() {}"
 434                    .unindent(),
 435                37,
 436            ),
 437            (
 438                "
 439            /**
 440             * Sometimes the frontend build is way faster than backend.
 441             */
 442            export async function authorizeBank() {
 443                _authorize(pushModal, upgradingAccountId, {});
 444            }"
 445                .unindent(),
 446                131,
 447            ),
 448            (
 449                "
 450                export class SettingsPage {
 451                    /* This is a test setting */
 452                    constructor(page) {
 453                        this.page = page;
 454                    }
 455                }"
 456                .unindent(),
 457                225,
 458            ),
 459            (
 460                "
 461                /* This is a test setting */
 462                constructor(page) {
 463                    this.page = page;
 464                }"
 465                .unindent(),
 466                290,
 467            ),
 468            (
 469                "
 470                /* This is a test comment */
 471                class TestClass {}"
 472                    .unindent(),
 473                374,
 474            ),
 475            (
 476                "
 477                /* Schema for editor_events in Clickhouse. */
 478                export interface ClickhouseEditorEvent {
 479                    installation_id: string
 480                    operation: string
 481                }"
 482                .unindent(),
 483                440,
 484            ),
 485        ],
 486    )
 487}
 488
 489#[gpui::test]
 490async fn test_code_context_retrieval_elixir() {
 491    let language = elixir_lang();
 492    let mut retriever = CodeContextRetriever::new();
 493
 494    let text = r#"
 495        defmodule File.Stream do
 496            @moduledoc """
 497            Defines a `File.Stream` struct returned by `File.stream!/3`.
 498
 499            The following fields are public:
 500
 501            * `path`          - the file path
 502            * `modes`         - the file modes
 503            * `raw`           - a boolean indicating if bin functions should be used
 504            * `line_or_bytes` - if reading should read lines or a given number of bytes
 505            * `node`          - the node the file belongs to
 506
 507            """
 508
 509            defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
 510
 511            @type t :: %__MODULE__{}
 512
 513            @doc false
 514            def __build__(path, modes, line_or_bytes) do
 515            raw = :lists.keyfind(:encoding, 1, modes) == false
 516
 517            modes =
 518                case raw do
 519                true ->
 520                    case :lists.keyfind(:read_ahead, 1, modes) do
 521                    {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
 522                    {:read_ahead, _} -> [:raw | modes]
 523                    false -> [:raw, :read_ahead | modes]
 524                    end
 525
 526                false ->
 527                    modes
 528                end
 529
 530            %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
 531
 532            end"#
 533    .unindent();
 534
 535    let documents = retriever.parse_file(&text, language.clone()).unwrap();
 536
 537    assert_documents_eq(
 538        &documents,
 539        &[(
 540            r#"
 541        defmodule File.Stream do
 542            @moduledoc """
 543            Defines a `File.Stream` struct returned by `File.stream!/3`.
 544
 545            The following fields are public:
 546
 547            * `path`          - the file path
 548            * `modes`         - the file modes
 549            * `raw`           - a boolean indicating if bin functions should be used
 550            * `line_or_bytes` - if reading should read lines or a given number of bytes
 551            * `node`          - the node the file belongs to
 552
 553            """
 554
 555            defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
 556
 557            @type t :: %__MODULE__{}
 558
 559            @doc false
 560            def __build__(path, modes, line_or_bytes) do
 561            raw = :lists.keyfind(:encoding, 1, modes) == false
 562
 563            modes =
 564                case raw do
 565                true ->
 566                    case :lists.keyfind(:read_ahead, 1, modes) do
 567                    {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
 568                    {:read_ahead, _} -> [:raw | modes]
 569                    false -> [:raw, :read_ahead | modes]
 570                    end
 571
 572                false ->
 573                    modes
 574                end
 575
 576            %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
 577
 578            end"#
 579                .unindent(),
 580            0,
 581        ),(r#"
 582            @doc false
 583            def __build__(path, modes, line_or_bytes) do
 584            raw = :lists.keyfind(:encoding, 1, modes) == false
 585
 586            modes =
 587                case raw do
 588                true ->
 589                    case :lists.keyfind(:read_ahead, 1, modes) do
 590                    {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
 591                    {:read_ahead, _} -> [:raw | modes]
 592                    false -> [:raw, :read_ahead | modes]
 593                    end
 594
 595                false ->
 596                    modes
 597                end
 598
 599            %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
 600
 601            end"#.unindent(), 574)],
 602    );
 603}
 604
 605#[gpui::test]
 606async fn test_code_context_retrieval_cpp() {
 607    let language = cpp_lang();
 608    let mut retriever = CodeContextRetriever::new();
 609
 610    let text = "
 611    /**
 612     * @brief Main function
 613     * @returns 0 on exit
 614     */
 615    int main() { return 0; }
 616
 617    /**
 618    * This is a test comment
 619    */
 620    class MyClass {       // The class
 621        public:           // Access specifier
 622        int myNum;        // Attribute (int variable)
 623        string myString;  // Attribute (string variable)
 624    };
 625
 626    // This is a test comment
 627    enum Color { red, green, blue };
 628
 629    /** This is a preceding block comment
 630     * This is the second line
 631     */
 632    struct {           // Structure declaration
 633        int myNum;       // Member (int variable)
 634        string myString; // Member (string variable)
 635    } myStructure;
 636
 637    /**
 638     * @brief Matrix class.
 639     */
 640    template <typename T,
 641              typename = typename std::enable_if<
 642                std::is_integral<T>::value || std::is_floating_point<T>::value,
 643                bool>::type>
 644    class Matrix2 {
 645        std::vector<std::vector<T>> _mat;
 646
 647        public:
 648            /**
 649            * @brief Constructor
 650            * @tparam Integer ensuring integers are being evaluated and not other
 651            * data types.
 652            * @param size denoting the size of Matrix as size x size
 653            */
 654            template <typename Integer,
 655                    typename = typename std::enable_if<std::is_integral<Integer>::value,
 656                    Integer>::type>
 657            explicit Matrix(const Integer size) {
 658                for (size_t i = 0; i < size; ++i) {
 659                    _mat.emplace_back(std::vector<T>(size, 0));
 660                }
 661            }
 662    }"
 663    .unindent();
 664
 665    let documents = retriever.parse_file(&text, language.clone()).unwrap();
 666
 667    assert_documents_eq(
 668        &documents,
 669        &[
 670            (
 671                "
 672        /**
 673         * @brief Main function
 674         * @returns 0 on exit
 675         */
 676        int main() { return 0; }"
 677                    .unindent(),
 678                54,
 679            ),
 680            (
 681                "
 682                /**
 683                * This is a test comment
 684                */
 685                class MyClass {       // The class
 686                    public:           // Access specifier
 687                    int myNum;        // Attribute (int variable)
 688                    string myString;  // Attribute (string variable)
 689                }"
 690                .unindent(),
 691                112,
 692            ),
 693            (
 694                "
 695                // This is a test comment
 696                enum Color { red, green, blue }"
 697                    .unindent(),
 698                322,
 699            ),
 700            (
 701                "
 702                /** This is a preceding block comment
 703                 * This is the second line
 704                 */
 705                struct {           // Structure declaration
 706                    int myNum;       // Member (int variable)
 707                    string myString; // Member (string variable)
 708                } myStructure;"
 709                    .unindent(),
 710                425,
 711            ),
 712            (
 713                "
 714                /**
 715                 * @brief Matrix class.
 716                 */
 717                template <typename T,
 718                          typename = typename std::enable_if<
 719                            std::is_integral<T>::value || std::is_floating_point<T>::value,
 720                            bool>::type>
 721                class Matrix2 {
 722                    std::vector<std::vector<T>> _mat;
 723
 724                    public:
 725                        /**
 726                        * @brief Constructor
 727                        * @tparam Integer ensuring integers are being evaluated and not other
 728                        * data types.
 729                        * @param size denoting the size of Matrix as size x size
 730                        */
 731                        template <typename Integer,
 732                                typename = typename std::enable_if<std::is_integral<Integer>::value,
 733                                Integer>::type>
 734                        explicit Matrix(const Integer size) {
 735                            for (size_t i = 0; i < size; ++i) {
 736                                _mat.emplace_back(std::vector<T>(size, 0));
 737                            }
 738                        }
 739                }"
 740                .unindent(),
 741                612,
 742            ),
 743            (
 744                "
 745                explicit Matrix(const Integer size) {
 746                    for (size_t i = 0; i < size; ++i) {
 747                        _mat.emplace_back(std::vector<T>(size, 0));
 748                    }
 749                }"
 750                .unindent(),
 751                1226,
 752            ),
 753        ],
 754    );
 755}
 756
 757#[gpui::test]
 758fn test_dot_product(mut rng: StdRng) {
 759    assert_eq!(dot(&[1., 0., 0., 0., 0.], &[0., 1., 0., 0., 0.]), 0.);
 760    assert_eq!(dot(&[2., 0., 0., 0., 0.], &[3., 1., 0., 0., 0.]), 6.);
 761
 762    for _ in 0..100 {
 763        let size = 1536;
 764        let mut a = vec![0.; size];
 765        let mut b = vec![0.; size];
 766        for (a, b) in a.iter_mut().zip(b.iter_mut()) {
 767            *a = rng.gen();
 768            *b = rng.gen();
 769        }
 770
 771        assert_eq!(
 772            round_to_decimals(dot(&a, &b), 1),
 773            round_to_decimals(reference_dot(&a, &b), 1)
 774        );
 775    }
 776
 777    fn round_to_decimals(n: f32, decimal_places: i32) -> f32 {
 778        let factor = (10.0 as f32).powi(decimal_places);
 779        (n * factor).round() / factor
 780    }
 781
 782    fn reference_dot(a: &[f32], b: &[f32]) -> f32 {
 783        a.iter().zip(b.iter()).map(|(a, b)| a * b).sum()
 784    }
 785}
 786
 787#[derive(Default)]
 788struct FakeEmbeddingProvider {
 789    embedding_count: AtomicUsize,
 790}
 791
 792impl FakeEmbeddingProvider {
 793    fn embedding_count(&self) -> usize {
 794        self.embedding_count.load(atomic::Ordering::SeqCst)
 795    }
 796}
 797
 798#[async_trait]
 799impl EmbeddingProvider for FakeEmbeddingProvider {
 800    async fn embed_batch(&self, spans: Vec<&str>) -> Result<Vec<Vec<f32>>> {
 801        self.embedding_count
 802            .fetch_add(spans.len(), atomic::Ordering::SeqCst);
 803        Ok(spans
 804            .iter()
 805            .map(|span| {
 806                let mut result = vec![1.0; 26];
 807                for letter in span.chars() {
 808                    let letter = letter.to_ascii_lowercase();
 809                    if letter as u32 >= 'a' as u32 {
 810                        let ix = (letter as u32) - ('a' as u32);
 811                        if ix < 26 {
 812                            result[ix as usize] += 1.0;
 813                        }
 814                    }
 815                }
 816
 817                let norm = result.iter().map(|x| x * x).sum::<f32>().sqrt();
 818                for x in &mut result {
 819                    *x /= norm;
 820                }
 821
 822                result
 823            })
 824            .collect())
 825    }
 826}
 827
 828fn js_lang() -> Arc<Language> {
 829    Arc::new(
 830        Language::new(
 831            LanguageConfig {
 832                name: "Javascript".into(),
 833                path_suffixes: vec!["js".into()],
 834                ..Default::default()
 835            },
 836            Some(tree_sitter_typescript::language_tsx()),
 837        )
 838        .with_embedding_query(
 839            &r#"
 840
 841            (
 842                (comment)* @context
 843                .
 844                [
 845                (export_statement
 846                    (function_declaration
 847                        "async"? @name
 848                        "function" @name
 849                        name: (_) @name))
 850                (function_declaration
 851                    "async"? @name
 852                    "function" @name
 853                    name: (_) @name)
 854                ] @item
 855            )
 856
 857            (
 858                (comment)* @context
 859                .
 860                [
 861                (export_statement
 862                    (class_declaration
 863                        "class" @name
 864                        name: (_) @name))
 865                (class_declaration
 866                    "class" @name
 867                    name: (_) @name)
 868                ] @item
 869            )
 870
 871            (
 872                (comment)* @context
 873                .
 874                [
 875                (export_statement
 876                    (interface_declaration
 877                        "interface" @name
 878                        name: (_) @name))
 879                (interface_declaration
 880                    "interface" @name
 881                    name: (_) @name)
 882                ] @item
 883            )
 884
 885            (
 886                (comment)* @context
 887                .
 888                [
 889                (export_statement
 890                    (enum_declaration
 891                        "enum" @name
 892                        name: (_) @name))
 893                (enum_declaration
 894                    "enum" @name
 895                    name: (_) @name)
 896                ] @item
 897            )
 898
 899            (
 900                (comment)* @context
 901                .
 902                (method_definition
 903                    [
 904                        "get"
 905                        "set"
 906                        "async"
 907                        "*"
 908                        "static"
 909                    ]* @name
 910                    name: (_) @name) @item
 911            )
 912
 913                    "#
 914            .unindent(),
 915        )
 916        .unwrap(),
 917    )
 918}
 919
 920fn rust_lang() -> Arc<Language> {
 921    Arc::new(
 922        Language::new(
 923            LanguageConfig {
 924                name: "Rust".into(),
 925                path_suffixes: vec!["rs".into()],
 926                collapsed_placeholder: " /* ... */ ".to_string(),
 927                ..Default::default()
 928            },
 929            Some(tree_sitter_rust::language()),
 930        )
 931        .with_embedding_query(
 932            r#"
 933            (
 934                [(line_comment) (attribute_item)]* @context
 935                .
 936                [
 937                    (struct_item
 938                        name: (_) @name)
 939
 940                    (enum_item
 941                        name: (_) @name)
 942
 943                    (impl_item
 944                        trait: (_)? @name
 945                        "for"? @name
 946                        type: (_) @name)
 947
 948                    (trait_item
 949                        name: (_) @name)
 950
 951                    (function_item
 952                        name: (_) @name
 953                        body: (block
 954                            "{" @keep
 955                            "}" @keep) @collapse)
 956
 957                    (macro_definition
 958                        name: (_) @name)
 959                ] @item
 960            )
 961            "#,
 962        )
 963        .unwrap(),
 964    )
 965}
 966
 967fn json_lang() -> Arc<Language> {
 968    Arc::new(
 969        Language::new(
 970            LanguageConfig {
 971                name: "JSON".into(),
 972                path_suffixes: vec!["json".into()],
 973                ..Default::default()
 974            },
 975            Some(tree_sitter_json::language()),
 976        )
 977        .with_embedding_query(
 978            r#"
 979            (document) @item
 980
 981            (array
 982                "[" @keep
 983                .
 984                (object)? @keep
 985                "]" @keep) @collapse
 986
 987            (pair value: (string
 988                "\"" @keep
 989                "\"" @keep) @collapse)
 990            "#,
 991        )
 992        .unwrap(),
 993    )
 994}
 995
 996fn toml_lang() -> Arc<Language> {
 997    Arc::new(Language::new(
 998        LanguageConfig {
 999            name: "TOML".into(),
1000            path_suffixes: vec!["toml".into()],
1001            ..Default::default()
1002        },
1003        Some(tree_sitter_toml::language()),
1004    ))
1005}
1006
1007fn cpp_lang() -> Arc<Language> {
1008    Arc::new(
1009        Language::new(
1010            LanguageConfig {
1011                name: "CPP".into(),
1012                path_suffixes: vec!["cpp".into()],
1013                ..Default::default()
1014            },
1015            Some(tree_sitter_cpp::language()),
1016        )
1017        .with_embedding_query(
1018            r#"
1019            (
1020                (comment)* @context
1021                .
1022                (function_definition
1023                    (type_qualifier)? @name
1024                    type: (_)? @name
1025                    declarator: [
1026                        (function_declarator
1027                            declarator: (_) @name)
1028                        (pointer_declarator
1029                            "*" @name
1030                            declarator: (function_declarator
1031                            declarator: (_) @name))
1032                        (pointer_declarator
1033                            "*" @name
1034                            declarator: (pointer_declarator
1035                                "*" @name
1036                            declarator: (function_declarator
1037                                declarator: (_) @name)))
1038                        (reference_declarator
1039                            ["&" "&&"] @name
1040                            (function_declarator
1041                            declarator: (_) @name))
1042                    ]
1043                    (type_qualifier)? @name) @item
1044                )
1045
1046            (
1047                (comment)* @context
1048                .
1049                (template_declaration
1050                    (class_specifier
1051                        "class" @name
1052                        name: (_) @name)
1053                        ) @item
1054            )
1055
1056            (
1057                (comment)* @context
1058                .
1059                (class_specifier
1060                    "class" @name
1061                    name: (_) @name) @item
1062                )
1063
1064            (
1065                (comment)* @context
1066                .
1067                (enum_specifier
1068                    "enum" @name
1069                    name: (_) @name) @item
1070                )
1071
1072            (
1073                (comment)* @context
1074                .
1075                (declaration
1076                    type: (struct_specifier
1077                    "struct" @name)
1078                    declarator: (_) @name) @item
1079            )
1080
1081            "#,
1082        )
1083        .unwrap(),
1084    )
1085}
1086
1087fn elixir_lang() -> Arc<Language> {
1088    Arc::new(
1089        Language::new(
1090            LanguageConfig {
1091                name: "Elixir".into(),
1092                path_suffixes: vec!["rs".into()],
1093                ..Default::default()
1094            },
1095            Some(tree_sitter_elixir::language()),
1096        )
1097        .with_embedding_query(
1098            r#"
1099            (
1100                (unary_operator
1101                    operator: "@"
1102                    operand: (call
1103                        target: (identifier) @unary
1104                        (#match? @unary "^(doc)$"))
1105                    ) @context
1106                .
1107                (call
1108                target: (identifier) @name
1109                (arguments
1110                [
1111                (identifier) @name
1112                (call
1113                target: (identifier) @name)
1114                (binary_operator
1115                left: (call
1116                target: (identifier) @name)
1117                operator: "when")
1118                ])
1119                (#match? @name "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @item
1120                )
1121
1122            (call
1123                target: (identifier) @name
1124                (arguments (alias) @name)
1125                (#match? @name "^(defmodule|defprotocol)$")) @item
1126            "#,
1127        )
1128        .unwrap(),
1129    )
1130}
1131
1132#[gpui::test]
1133fn test_subtract_ranges() {
1134    // collapsed_ranges: Vec<Range<usize>>, keep_ranges: Vec<Range<usize>>
1135
1136    assert_eq!(
1137        subtract_ranges(&[0..5, 10..21], &[0..1, 4..5]),
1138        vec![1..4, 10..21]
1139    );
1140
1141    assert_eq!(subtract_ranges(&[0..5], &[1..2]), &[0..1, 2..5]);
1142}