syntax_index.rs

  1use collections::{HashMap, HashSet};
  2use gpui::{App, AppContext as _, Context, Entity, Task, WeakEntity};
  3use language::{Buffer, BufferEvent};
  4use project::buffer_store::{BufferStore, BufferStoreEvent};
  5use project::worktree_store::{WorktreeStore, WorktreeStoreEvent};
  6use project::{PathChange, Project, ProjectEntryId, ProjectPath};
  7use slotmap::SlotMap;
  8use util::{ResultExt as _, debug_panic, some_or_debug_panic};
  9
 10use crate::declaration::{
 11    BufferDeclaration, Declaration, DeclarationId, FileDeclaration, Identifier,
 12};
 13use crate::outline::declarations_in_buffer;
 14
 15// TODO:
 16//
 17// * Skip for remote projects
 18
 19// Potential future improvements:
 20//
 21// * Send multiple selected excerpt ranges. Challenge is that excerpt ranges influence which
 22// references are present and their scores.
 23
 24// Potential future optimizations:
 25//
 26// * Cache of buffers for files
 27//
 28// * Parse files directly instead of loading into a Rope. Make SyntaxMap generic to handle embedded
 29// languages? Will also need to find line boundaries, but that can be done by scanning characters in
 30// the flat representation.
 31//
 32// * Use something similar to slotmap without key versions.
 33//
 34// * Concurrent slotmap
 35//
 36// * Use queue for parsing
 37
 38pub struct SyntaxIndex {
 39    declarations: SlotMap<DeclarationId, Declaration>,
 40    identifiers: HashMap<Identifier, HashSet<DeclarationId>>,
 41    files: HashMap<ProjectEntryId, FileState>,
 42    buffers: HashMap<WeakEntity<Buffer>, BufferState>,
 43    project: WeakEntity<Project>,
 44}
 45
 46#[derive(Debug, Default)]
 47struct FileState {
 48    declarations: Vec<DeclarationId>,
 49    task: Option<Task<()>>,
 50}
 51
 52#[derive(Default)]
 53struct BufferState {
 54    declarations: Vec<DeclarationId>,
 55    task: Option<Task<()>>,
 56}
 57
 58impl SyntaxIndex {
 59    pub fn new(project: &Entity<Project>, cx: &mut Context<Self>) -> Self {
 60        let mut this = Self {
 61            declarations: SlotMap::with_key(),
 62            identifiers: HashMap::default(),
 63            project: project.downgrade(),
 64            files: HashMap::default(),
 65            buffers: HashMap::default(),
 66        };
 67
 68        let worktree_store = project.read(cx).worktree_store();
 69        cx.subscribe(&worktree_store, Self::handle_worktree_store_event)
 70            .detach();
 71
 72        for worktree in worktree_store
 73            .read(cx)
 74            .worktrees()
 75            .map(|w| w.read(cx).snapshot())
 76            .collect::<Vec<_>>()
 77        {
 78            for entry in worktree.files(false, 0) {
 79                this.update_file(
 80                    entry.id,
 81                    ProjectPath {
 82                        worktree_id: worktree.id(),
 83                        path: entry.path.clone(),
 84                    },
 85                    cx,
 86                );
 87            }
 88        }
 89
 90        let buffer_store = project.read(cx).buffer_store().clone();
 91        for buffer in buffer_store.read(cx).buffers().collect::<Vec<_>>() {
 92            this.register_buffer(&buffer, cx);
 93        }
 94        cx.subscribe(&buffer_store, Self::handle_buffer_store_event)
 95            .detach();
 96
 97        this
 98    }
 99
100    pub fn declaration(&self, id: DeclarationId) -> Option<&Declaration> {
101        self.declarations.get(id)
102    }
103
104    pub fn declarations_for_identifier<const N: usize>(
105        &self,
106        identifier: &Identifier,
107        cx: &App,
108    ) -> Vec<Declaration> {
109        // make sure to not have a large stack allocation
110        assert!(N < 32);
111
112        let Some(declaration_ids) = self.identifiers.get(&identifier) else {
113            return vec![];
114        };
115
116        let mut result = Vec::with_capacity(N);
117        let mut included_buffer_entry_ids = arrayvec::ArrayVec::<_, N>::new();
118        let mut file_declarations = Vec::new();
119
120        for declaration_id in declaration_ids {
121            let declaration = self.declarations.get(*declaration_id);
122            let Some(declaration) = some_or_debug_panic(declaration) else {
123                continue;
124            };
125            match declaration {
126                Declaration::Buffer { buffer, .. } => {
127                    if let Ok(Some(entry_id)) = buffer.read_with(cx, |buffer, cx| {
128                        project::File::from_dyn(buffer.file()).and_then(|f| f.project_entry_id(cx))
129                    }) {
130                        included_buffer_entry_ids.push(entry_id);
131                        result.push(declaration.clone());
132                        if result.len() == N {
133                            return result;
134                        }
135                    }
136                }
137                Declaration::File {
138                    project_entry_id, ..
139                } => {
140                    if !included_buffer_entry_ids.contains(project_entry_id) {
141                        file_declarations.push(declaration.clone());
142                    }
143                }
144            }
145        }
146
147        for declaration in file_declarations {
148            match declaration {
149                Declaration::File {
150                    project_entry_id, ..
151                } => {
152                    if !included_buffer_entry_ids.contains(&project_entry_id) {
153                        result.push(declaration);
154
155                        if result.len() == N {
156                            return result;
157                        }
158                    }
159                }
160                Declaration::Buffer { .. } => {}
161            }
162        }
163
164        result
165    }
166
167    pub fn file_declaration_count(&self, declaration: &Declaration) -> usize {
168        match declaration {
169            Declaration::File {
170                project_entry_id, ..
171            } => self
172                .files
173                .get(project_entry_id)
174                .map(|file_state| file_state.declarations.len())
175                .unwrap_or_default(),
176            Declaration::Buffer { buffer, .. } => self
177                .buffers
178                .get(buffer)
179                .map(|buffer_state| buffer_state.declarations.len())
180                .unwrap_or_default(),
181        }
182    }
183
184    fn handle_worktree_store_event(
185        &mut self,
186        _worktree_store: Entity<WorktreeStore>,
187        event: &WorktreeStoreEvent,
188        cx: &mut Context<Self>,
189    ) {
190        use WorktreeStoreEvent::*;
191        match event {
192            WorktreeUpdatedEntries(worktree_id, updated_entries_set) => {
193                for (path, entry_id, path_change) in updated_entries_set.iter() {
194                    if let PathChange::Removed = path_change {
195                        self.files.remove(entry_id);
196                    } else {
197                        let project_path = ProjectPath {
198                            worktree_id: *worktree_id,
199                            path: path.clone(),
200                        };
201                        self.update_file(*entry_id, project_path, cx);
202                    }
203                }
204            }
205            WorktreeDeletedEntry(_worktree_id, project_entry_id) => {
206                // TODO: Is this needed?
207                self.files.remove(project_entry_id);
208            }
209            _ => {}
210        }
211    }
212
213    fn handle_buffer_store_event(
214        &mut self,
215        _buffer_store: Entity<BufferStore>,
216        event: &BufferStoreEvent,
217        cx: &mut Context<Self>,
218    ) {
219        use BufferStoreEvent::*;
220        match event {
221            BufferAdded(buffer) => self.register_buffer(buffer, cx),
222            BufferOpened { .. }
223            | BufferChangedFilePath { .. }
224            | BufferDropped { .. }
225            | SharedBufferClosed { .. } => {}
226        }
227    }
228
229    fn register_buffer(&mut self, buffer: &Entity<Buffer>, cx: &mut Context<Self>) {
230        self.buffers
231            .insert(buffer.downgrade(), BufferState::default());
232        let weak_buf = buffer.downgrade();
233        cx.observe_release(buffer, move |this, _buffer, _cx| {
234            this.buffers.remove(&weak_buf);
235        })
236        .detach();
237        cx.subscribe(buffer, Self::handle_buffer_event).detach();
238        self.update_buffer(buffer.clone(), cx);
239    }
240
241    fn handle_buffer_event(
242        &mut self,
243        buffer: Entity<Buffer>,
244        event: &BufferEvent,
245        cx: &mut Context<Self>,
246    ) {
247        match event {
248            BufferEvent::Edited => self.update_buffer(buffer, cx),
249            _ => {}
250        }
251    }
252
253    fn update_buffer(&mut self, buffer: Entity<Buffer>, cx: &Context<Self>) {
254        let mut parse_status = buffer.read(cx).parse_status();
255        let snapshot_task = cx.spawn({
256            let weak_buffer = buffer.downgrade();
257            async move |_, cx| {
258                while *parse_status.borrow() != language::ParseStatus::Idle {
259                    parse_status.changed().await?;
260                }
261                weak_buffer.read_with(cx, |buffer, _cx| buffer.snapshot())
262            }
263        });
264
265        let parse_task = cx.background_spawn(async move {
266            let snapshot = snapshot_task.await?;
267
268            anyhow::Ok(
269                declarations_in_buffer(&snapshot)
270                    .into_iter()
271                    .map(|item| {
272                        (
273                            item.parent_index,
274                            BufferDeclaration::from_outline(item, &snapshot),
275                        )
276                    })
277                    .collect::<Vec<_>>(),
278            )
279        });
280
281        let task = cx.spawn({
282            let weak_buffer = buffer.downgrade();
283            async move |this, cx| {
284                let Ok(declarations) = parse_task.await else {
285                    return;
286                };
287
288                this.update(cx, |this, _cx| {
289                    let buffer_state = this
290                        .buffers
291                        .entry(weak_buffer.clone())
292                        .or_insert_with(Default::default);
293
294                    for old_declaration_id in &buffer_state.declarations {
295                        let Some(declaration) = this.declarations.remove(*old_declaration_id)
296                        else {
297                            debug_panic!("declaration not found");
298                            continue;
299                        };
300                        if let Some(identifier_declarations) =
301                            this.identifiers.get_mut(declaration.identifier())
302                        {
303                            identifier_declarations.remove(old_declaration_id);
304                        }
305                    }
306
307                    let mut new_ids = Vec::with_capacity(declarations.len());
308                    this.declarations.reserve(declarations.len());
309                    for (parent_index, mut declaration) in declarations {
310                        declaration.parent = parent_index
311                            .and_then(|ix| some_or_debug_panic(new_ids.get(ix).copied()));
312
313                        let identifier = declaration.identifier.clone();
314                        let declaration_id = this.declarations.insert(Declaration::Buffer {
315                            buffer: weak_buffer.clone(),
316                            declaration,
317                        });
318                        new_ids.push(declaration_id);
319
320                        this.identifiers
321                            .entry(identifier)
322                            .or_default()
323                            .insert(declaration_id);
324                    }
325
326                    buffer_state.declarations = new_ids;
327                })
328                .ok();
329            }
330        });
331
332        self.buffers
333            .entry(buffer.downgrade())
334            .or_insert_with(Default::default)
335            .task = Some(task);
336    }
337
338    fn update_file(
339        &mut self,
340        entry_id: ProjectEntryId,
341        project_path: ProjectPath,
342        cx: &mut Context<Self>,
343    ) {
344        let Some(project) = self.project.upgrade() else {
345            return;
346        };
347        let project = project.read(cx);
348        let Some(worktree) = project.worktree_for_id(project_path.worktree_id, cx) else {
349            return;
350        };
351        let language_registry = project.languages().clone();
352
353        let snapshot_task = worktree.update(cx, |worktree, cx| {
354            let load_task = worktree.load_file(&project_path.path, cx);
355            cx.spawn(async move |_this, cx| {
356                let loaded_file = load_task.await?;
357                let language = language_registry
358                    .language_for_file_path(&project_path.path)
359                    .await
360                    .log_err();
361
362                let buffer = cx.new(|cx| {
363                    let mut buffer = Buffer::local(loaded_file.text, cx);
364                    buffer.set_language(language, cx);
365                    buffer
366                })?;
367
368                let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?;
369                while *parse_status.borrow() != language::ParseStatus::Idle {
370                    parse_status.changed().await?;
371                }
372
373                buffer.read_with(cx, |buffer, _cx| buffer.snapshot())
374            })
375        });
376
377        let parse_task = cx.background_spawn(async move {
378            let snapshot = snapshot_task.await?;
379            let declarations = declarations_in_buffer(&snapshot)
380                .into_iter()
381                .map(|item| {
382                    (
383                        item.parent_index,
384                        FileDeclaration::from_outline(item, &snapshot),
385                    )
386                })
387                .collect::<Vec<_>>();
388            anyhow::Ok(declarations)
389        });
390
391        let task = cx.spawn({
392            async move |this, cx| {
393                // TODO: how to handle errors?
394                let Ok(declarations) = parse_task.await else {
395                    return;
396                };
397                this.update(cx, |this, _cx| {
398                    let file_state = this.files.entry(entry_id).or_insert_with(Default::default);
399
400                    for old_declaration_id in &file_state.declarations {
401                        let Some(declaration) = this.declarations.remove(*old_declaration_id)
402                        else {
403                            debug_panic!("declaration not found");
404                            continue;
405                        };
406                        if let Some(identifier_declarations) =
407                            this.identifiers.get_mut(declaration.identifier())
408                        {
409                            identifier_declarations.remove(old_declaration_id);
410                        }
411                    }
412
413                    let mut new_ids = Vec::with_capacity(declarations.len());
414                    this.declarations.reserve(declarations.len());
415
416                    for (parent_index, mut declaration) in declarations {
417                        declaration.parent = parent_index
418                            .and_then(|ix| some_or_debug_panic(new_ids.get(ix).copied()));
419
420                        let identifier = declaration.identifier.clone();
421                        let declaration_id = this.declarations.insert(Declaration::File {
422                            project_entry_id: entry_id,
423                            declaration,
424                        });
425                        new_ids.push(declaration_id);
426
427                        this.identifiers
428                            .entry(identifier)
429                            .or_default()
430                            .insert(declaration_id);
431                    }
432
433                    file_state.declarations = new_ids;
434                })
435                .ok();
436            }
437        });
438
439        self.files
440            .entry(entry_id)
441            .or_insert_with(Default::default)
442            .task = Some(task);
443    }
444}
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449    use std::{path::Path, sync::Arc};
450
451    use futures::channel::oneshot;
452    use gpui::TestAppContext;
453    use indoc::indoc;
454    use language::{Language, LanguageConfig, LanguageId, LanguageMatcher, tree_sitter_rust};
455    use project::{FakeFs, Project, ProjectItem};
456    use serde_json::json;
457    use settings::SettingsStore;
458    use text::OffsetRangeExt as _;
459    use util::path;
460
461    use crate::syntax_index::SyntaxIndex;
462
463    #[gpui::test]
464    async fn test_unopen_indexed_files(cx: &mut TestAppContext) {
465        let (project, index, rust_lang_id) = init_test(cx).await;
466        let main = Identifier {
467            name: "main".into(),
468            language_id: rust_lang_id,
469        };
470
471        index.read_with(cx, |index, cx| {
472            let decls = index.declarations_for_identifier::<8>(&main, cx);
473            assert_eq!(decls.len(), 2);
474
475            let decl = expect_file_decl("c.rs", &decls[0], &project, cx);
476            assert_eq!(decl.identifier, main.clone());
477            assert_eq!(decl.item_range_in_file, 32..280);
478
479            let decl = expect_file_decl("a.rs", &decls[1], &project, cx);
480            assert_eq!(decl.identifier, main);
481            assert_eq!(decl.item_range_in_file, 0..98);
482        });
483    }
484
485    #[gpui::test]
486    async fn test_parents_in_file(cx: &mut TestAppContext) {
487        let (project, index, rust_lang_id) = init_test(cx).await;
488        let test_process_data = Identifier {
489            name: "test_process_data".into(),
490            language_id: rust_lang_id,
491        };
492
493        index.read_with(cx, |index, cx| {
494            let decls = index.declarations_for_identifier::<8>(&test_process_data, cx);
495            assert_eq!(decls.len(), 1);
496
497            let decl = expect_file_decl("c.rs", &decls[0], &project, cx);
498            assert_eq!(decl.identifier, test_process_data);
499
500            let parent_id = decl.parent.unwrap();
501            let parent = index.declaration(parent_id).unwrap();
502            let parent_decl = expect_file_decl("c.rs", &parent, &project, cx);
503            assert_eq!(
504                parent_decl.identifier,
505                Identifier {
506                    name: "tests".into(),
507                    language_id: rust_lang_id
508                }
509            );
510            assert_eq!(parent_decl.parent, None);
511        });
512    }
513
514    #[gpui::test]
515    async fn test_parents_in_buffer(cx: &mut TestAppContext) {
516        let (project, index, rust_lang_id) = init_test(cx).await;
517        let test_process_data = Identifier {
518            name: "test_process_data".into(),
519            language_id: rust_lang_id,
520        };
521
522        let buffer = project
523            .update(cx, |project, cx| {
524                let project_path = project.find_project_path("c.rs", cx).unwrap();
525                project.open_buffer(project_path, cx)
526            })
527            .await
528            .unwrap();
529
530        cx.run_until_parked();
531
532        index.read_with(cx, |index, cx| {
533            let decls = index.declarations_for_identifier::<8>(&test_process_data, cx);
534            assert_eq!(decls.len(), 1);
535
536            let decl = expect_buffer_decl("c.rs", &decls[0], cx);
537            assert_eq!(decl.identifier, test_process_data);
538
539            let parent_id = decl.parent.unwrap();
540            let parent = index.declaration(parent_id).unwrap();
541            let parent_decl = expect_buffer_decl("c.rs", &parent, cx);
542            assert_eq!(
543                parent_decl.identifier,
544                Identifier {
545                    name: "tests".into(),
546                    language_id: rust_lang_id
547                }
548            );
549            assert_eq!(parent_decl.parent, None);
550        });
551
552        drop(buffer);
553    }
554
555    #[gpui::test]
556    async fn test_declarations_limt(cx: &mut TestAppContext) {
557        let (_, index, rust_lang_id) = init_test(cx).await;
558
559        index.read_with(cx, |index, cx| {
560            let decls = index.declarations_for_identifier::<1>(
561                &Identifier {
562                    name: "main".into(),
563                    language_id: rust_lang_id,
564                },
565                cx,
566            );
567            assert_eq!(decls.len(), 1);
568        });
569    }
570
571    #[gpui::test]
572    async fn test_buffer_shadow(cx: &mut TestAppContext) {
573        let (project, index, rust_lang_id) = init_test(cx).await;
574
575        let main = Identifier {
576            name: "main".into(),
577            language_id: rust_lang_id,
578        };
579
580        let buffer = project
581            .update(cx, |project, cx| {
582                let project_path = project.find_project_path("c.rs", cx).unwrap();
583                project.open_buffer(project_path, cx)
584            })
585            .await
586            .unwrap();
587
588        cx.run_until_parked();
589
590        index.read_with(cx, |index, cx| {
591            let decls = index.declarations_for_identifier::<8>(&main, cx);
592            assert_eq!(decls.len(), 2);
593            let decl = expect_buffer_decl("c.rs", &decls[0], cx);
594            assert_eq!(decl.identifier, main);
595            assert_eq!(decl.item_range.to_offset(&buffer.read(cx)), 32..279);
596
597            expect_file_decl("a.rs", &decls[1], &project, cx);
598        });
599
600        // Drop the buffer and wait for release
601        let (release_tx, release_rx) = oneshot::channel();
602        cx.update(|cx| {
603            cx.observe_release(&buffer, |_, _| {
604                release_tx.send(()).ok();
605            })
606            .detach();
607        });
608        drop(buffer);
609        cx.run_until_parked();
610        release_rx.await.ok();
611        cx.run_until_parked();
612
613        index.read_with(cx, |index, cx| {
614            let decls = index.declarations_for_identifier::<8>(&main, cx);
615            assert_eq!(decls.len(), 2);
616            expect_file_decl("c.rs", &decls[0], &project, cx);
617            expect_file_decl("a.rs", &decls[1], &project, cx);
618        });
619    }
620
621    fn expect_buffer_decl<'a>(
622        path: &str,
623        declaration: &'a Declaration,
624        cx: &App,
625    ) -> &'a BufferDeclaration {
626        if let Declaration::Buffer {
627            declaration,
628            buffer,
629        } = declaration
630        {
631            assert_eq!(
632                buffer
633                    .upgrade()
634                    .unwrap()
635                    .read(cx)
636                    .project_path(cx)
637                    .unwrap()
638                    .path
639                    .as_ref(),
640                Path::new(path),
641            );
642            declaration
643        } else {
644            panic!("Expected a buffer declaration, found {:?}", declaration);
645        }
646    }
647
648    fn expect_file_decl<'a>(
649        path: &str,
650        declaration: &'a Declaration,
651        project: &Entity<Project>,
652        cx: &App,
653    ) -> &'a FileDeclaration {
654        if let Declaration::File {
655            declaration,
656            project_entry_id: file,
657        } = declaration
658        {
659            assert_eq!(
660                project
661                    .read(cx)
662                    .path_for_entry(*file, cx)
663                    .unwrap()
664                    .path
665                    .as_ref(),
666                Path::new(path),
667            );
668            declaration
669        } else {
670            panic!("Expected a file declaration, found {:?}", declaration);
671        }
672    }
673
674    async fn init_test(
675        cx: &mut TestAppContext,
676    ) -> (Entity<Project>, Entity<SyntaxIndex>, LanguageId) {
677        cx.update(|cx| {
678            let settings_store = SettingsStore::test(cx);
679            cx.set_global(settings_store);
680            language::init(cx);
681            Project::init_settings(cx);
682        });
683
684        let fs = FakeFs::new(cx.executor());
685        fs.insert_tree(
686            path!("/root"),
687            json!({
688                "a.rs": indoc! {r#"
689                    fn main() {
690                        let x = 1;
691                        let y = 2;
692                        let z = add(x, y);
693                        println!("Result: {}", z);
694                    }
695
696                    fn add(a: i32, b: i32) -> i32 {
697                        a + b
698                    }
699                "#},
700                "b.rs": indoc! {"
701                    pub struct Config {
702                        pub name: String,
703                        pub value: i32,
704                    }
705
706                    impl Config {
707                        pub fn new(name: String, value: i32) -> Self {
708                            Config { name, value }
709                        }
710                    }
711                "},
712                "c.rs": indoc! {r#"
713                    use std::collections::HashMap;
714
715                    fn main() {
716                        let args: Vec<String> = std::env::args().collect();
717                        let data: Vec<i32> = args[1..]
718                            .iter()
719                            .filter_map(|s| s.parse().ok())
720                            .collect();
721                        let result = process_data(data);
722                        println!("{:?}", result);
723                    }
724
725                    fn process_data(data: Vec<i32>) -> HashMap<i32, usize> {
726                        let mut counts = HashMap::new();
727                        for value in data {
728                            *counts.entry(value).or_insert(0) += 1;
729                        }
730                        counts
731                    }
732
733                    #[cfg(test)]
734                    mod tests {
735                        use super::*;
736
737                        #[test]
738                        fn test_process_data() {
739                            let data = vec![1, 2, 2, 3];
740                            let result = process_data(data);
741                            assert_eq!(result.get(&2), Some(&2));
742                        }
743                    }
744                "#}
745            }),
746        )
747        .await;
748        let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
749        let language_registry = project.read_with(cx, |project, _| project.languages().clone());
750        let lang = rust_lang();
751        let lang_id = lang.id();
752        language_registry.add(Arc::new(lang));
753
754        let index = cx.new(|cx| SyntaxIndex::new(&project, cx));
755        cx.run_until_parked();
756
757        (project, index, lang_id)
758    }
759
760    fn rust_lang() -> Language {
761        Language::new(
762            LanguageConfig {
763                name: "Rust".into(),
764                matcher: LanguageMatcher {
765                    path_suffixes: vec!["rs".to_string()],
766                    ..Default::default()
767                },
768                ..Default::default()
769            },
770            Some(tree_sitter_rust::LANGUAGE.into()),
771        )
772        .with_outline_query(include_str!("../../languages/src/rust/outline.scm"))
773        .unwrap()
774    }
775}