Start indexing right away when project was already indexed before (#2941)

Antonio Scandurra created

Release notes:
- Improved semantic search indexing to start in the background if the
project was already indexed before.

Change summary

crates/search/src/project_search.rs         | 109 ++++++++++------------
crates/semantic_index/src/db.rs             |   7 -
crates/semantic_index/src/semantic_index.rs |  99 +++++++++++++++++++-
3 files changed, 144 insertions(+), 71 deletions(-)

Detailed changes

crates/search/src/project_search.rs 🔗

@@ -20,12 +20,11 @@ use gpui::{
     Task, View, ViewContext, ViewHandle, WeakModelHandle, WeakViewHandle,
 };
 use menu::Confirm;
-use postage::stream::Stream;
 use project::{
     search::{PathMatcher, SearchInputs, SearchQuery},
     Entry, Project,
 };
-use semantic_index::SemanticIndex;
+use semantic_index::{SemanticIndex, SemanticIndexStatus};
 use smallvec::SmallVec;
 use std::{
     any::{Any, TypeId},
@@ -116,7 +115,7 @@ pub struct ProjectSearchView {
     model: ModelHandle<ProjectSearch>,
     query_editor: ViewHandle<Editor>,
     results_editor: ViewHandle<Editor>,
-    semantic_state: Option<SemanticSearchState>,
+    semantic_state: Option<SemanticState>,
     semantic_permissioned: Option<bool>,
     search_options: SearchOptions,
     panels_with_errors: HashSet<InputPanel>,
@@ -129,9 +128,9 @@ pub struct ProjectSearchView {
     current_mode: SearchMode,
 }
 
-struct SemanticSearchState {
-    pending_file_count: usize,
-    _progress_task: Task<()>,
+struct SemanticState {
+    index_status: SemanticIndexStatus,
+    _subscription: Subscription,
 }
 
 pub struct ProjectSearchBar {
@@ -230,7 +229,7 @@ impl ProjectSearch {
         self.search_id += 1;
         self.match_ranges.clear();
         self.search_history.add(inputs.as_str().to_string());
-        self.no_results = Some(true);
+        self.no_results = None;
         self.pending_search = Some(cx.spawn(|this, mut cx| async move {
             let results = search?.await.log_err()?;
             let matches = results
@@ -238,9 +237,10 @@ impl ProjectSearch {
                 .map(|result| (result.buffer, vec![result.range.start..result.range.start]));
 
             this.update(&mut cx, |this, cx| {
+                this.no_results = Some(true);
                 this.excerpts.update(cx, |excerpts, cx| {
                     excerpts.clear(cx);
-                })
+                });
             });
             for (buffer, ranges) in matches {
                 let mut match_ranges = this.update(&mut cx, |this, cx| {
@@ -315,15 +315,20 @@ impl View for ProjectSearchView {
                 }
             };
 
-            let semantic_status = if let Some(semantic) = &self.semantic_state {
-                if semantic.pending_file_count > 0 {
-                    format!("Remaining files to index: {}", semantic.pending_file_count)
-                } else {
-                    "Indexing complete".to_string()
+            let semantic_status = self.semantic_state.as_ref().and_then(|semantic| {
+                let status = semantic.index_status;
+                match status {
+                    SemanticIndexStatus::Indexed => Some("Indexing complete".to_string()),
+                    SemanticIndexStatus::Indexing { remaining_files } => {
+                        if remaining_files == 0 {
+                            Some(format!("Indexing..."))
+                        } else {
+                            Some(format!("Remaining files to index: {}", remaining_files))
+                        }
+                    }
+                    SemanticIndexStatus::NotIndexed => None,
                 }
-            } else {
-                "Indexing: ...".to_string()
-            };
+            });
 
             let minor_text = if let Some(no_results) = model.no_results {
                 if model.pending_search.is_none() && no_results {
@@ -333,12 +338,16 @@ impl View for ProjectSearchView {
                 }
             } else {
                 match current_mode {
-                    SearchMode::Semantic => vec![
-                        "".to_owned(),
-                        semantic_status,
-                        "Simply explain the code you are looking to find.".to_owned(),
-                        "ex. 'prompt user for permissions to index their project'".to_owned(),
-                    ],
+                    SearchMode::Semantic => {
+                        let mut minor_text = Vec::new();
+                        minor_text.push("".into());
+                        minor_text.extend(semantic_status);
+                        minor_text.push("Simply explain the code you are looking to find.".into());
+                        minor_text.push(
+                            "ex. 'prompt user for permissions to index their project'".into(),
+                        );
+                        minor_text
+                    }
                     _ => vec![
                         "".to_owned(),
                         "Include/exclude specific paths with the filter option.".to_owned(),
@@ -634,41 +643,29 @@ impl ProjectSearchView {
 
             let project = self.model.read(cx).project.clone();
 
-            let mut pending_file_count_rx = semantic_index.update(cx, |semantic_index, cx| {
+            semantic_index.update(cx, |semantic_index, cx| {
                 semantic_index
                     .index_project(project.clone(), cx)
                     .detach_and_log_err(cx);
-                semantic_index.pending_file_count(&project).unwrap()
             });
 
-            cx.spawn(|search_view, mut cx| async move {
-                search_view.update(&mut cx, |search_view, cx| {
-                    cx.notify();
-                    let pending_file_count = *pending_file_count_rx.borrow();
-                    search_view.semantic_state = Some(SemanticSearchState {
-                        pending_file_count,
-                        _progress_task: cx.spawn(|search_view, mut cx| async move {
-                            while let Some(count) = pending_file_count_rx.recv().await {
-                                search_view
-                                    .update(&mut cx, |search_view, cx| {
-                                        if let Some(semantic_search_state) =
-                                            &mut search_view.semantic_state
-                                        {
-                                            semantic_search_state.pending_file_count = count;
-                                            cx.notify();
-                                            if count == 0 {
-                                                return;
-                                            }
-                                        }
-                                    })
-                                    .ok();
-                            }
-                        }),
-                    });
-                })?;
-                anyhow::Ok(())
-            })
-            .detach_and_log_err(cx);
+            self.semantic_state = Some(SemanticState {
+                index_status: semantic_index.read(cx).status(&project),
+                _subscription: cx.observe(&semantic_index, Self::semantic_index_changed),
+            });
+            cx.notify();
+        }
+    }
+
+    fn semantic_index_changed(
+        &mut self,
+        semantic_index: ModelHandle<SemanticIndex>,
+        cx: &mut ViewContext<Self>,
+    ) {
+        let project = self.model.read(cx).project.clone();
+        if let Some(semantic_state) = self.semantic_state.as_mut() {
+            semantic_state.index_status = semantic_index.read(cx).status(&project);
+            cx.notify();
         }
     }
 
@@ -867,7 +864,7 @@ impl ProjectSearchView {
         SemanticIndex::global(cx)
             .map(|semantic| {
                 let project = self.model.read(cx).project.clone();
-                semantic.update(cx, |this, cx| this.project_previously_indexed(project, cx))
+                semantic.update(cx, |this, cx| this.project_previously_indexed(&project, cx))
             })
             .unwrap_or(Task::ready(Ok(false)))
     }
@@ -952,11 +949,7 @@ impl ProjectSearchView {
         let mode = self.current_mode;
         match mode {
             SearchMode::Semantic => {
-                if let Some(semantic) = &mut self.semantic_state {
-                    if semantic.pending_file_count > 0 {
-                        return;
-                    }
-
+                if self.semantic_state.is_some() {
                     if let Some(query) = self.build_search_query(cx) {
                         self.model
                             .update(cx, |model, cx| model.semantic_search(query.as_inner(), cx));

crates/semantic_index/src/db.rs 🔗

@@ -18,7 +18,7 @@ use std::{
     path::{Path, PathBuf},
     rc::Rc,
     sync::Arc,
-    time::{Instant, SystemTime},
+    time::SystemTime,
 };
 use util::TryFutureExt;
 
@@ -232,7 +232,6 @@ impl VectorDatabase {
 
             let file_id = db.last_insert_rowid();
 
-            let t0 = Instant::now();
             let mut query = db.prepare(
                 "
                 INSERT INTO spans
@@ -240,10 +239,6 @@ impl VectorDatabase {
                 VALUES (?1, ?2, ?3, ?4, ?5, ?6)
                 ",
             )?;
-            log::trace!(
-                "Preparing Query Took: {:?} milliseconds",
-                t0.elapsed().as_millis()
-            );
 
             for span in spans {
                 query.execute(params![

crates/semantic_index/src/semantic_index.rs 🔗

@@ -35,6 +35,7 @@ use util::{
     paths::EMBEDDINGS_DIR,
     ResultExt,
 };
+use workspace::WorkspaceCreated;
 
 const SEMANTIC_INDEX_VERSION: usize = 10;
 const BACKGROUND_INDEXING_DELAY: Duration = Duration::from_secs(5 * 60);
@@ -57,6 +58,35 @@ pub fn init(
         return;
     }
 
+    cx.subscribe_global::<WorkspaceCreated, _>({
+        move |event, cx| {
+            let Some(semantic_index) = SemanticIndex::global(cx) else {
+                return;
+            };
+            let workspace = &event.0;
+            if let Some(workspace) = workspace.upgrade(cx) {
+                let project = workspace.read(cx).project().clone();
+                if project.read(cx).is_local() {
+                    cx.spawn(|mut cx| async move {
+                        let previously_indexed = semantic_index
+                            .update(&mut cx, |index, cx| {
+                                index.project_previously_indexed(&project, cx)
+                            })
+                            .await?;
+                        if previously_indexed {
+                            semantic_index
+                                .update(&mut cx, |index, cx| index.index_project(project, cx))
+                                .await?;
+                        }
+                        anyhow::Ok(())
+                    })
+                    .detach_and_log_err(cx);
+                }
+            }
+        }
+    })
+    .detach();
+
     cx.spawn(move |mut cx| async move {
         let semantic_index = SemanticIndex::new(
             fs,
@@ -79,6 +109,13 @@ pub fn init(
     .detach();
 }
 
+#[derive(Copy, Clone, Debug)]
+pub enum SemanticIndexStatus {
+    NotIndexed,
+    Indexed,
+    Indexing { remaining_files: usize },
+}
+
 pub struct SemanticIndex {
     fs: Arc<dyn Fs>,
     db: VectorDatabase,
@@ -94,7 +131,9 @@ struct ProjectState {
     worktrees: HashMap<WorktreeId, WorktreeState>,
     pending_file_count_rx: watch::Receiver<usize>,
     pending_file_count_tx: Arc<Mutex<watch::Sender<usize>>>,
+    pending_index: usize,
     _subscription: gpui::Subscription,
+    _observe_pending_file_count: Task<()>,
 }
 
 enum WorktreeState {
@@ -103,6 +142,10 @@ enum WorktreeState {
 }
 
 impl WorktreeState {
+    fn is_registered(&self) -> bool {
+        matches!(self, Self::Registered(_))
+    }
+
     fn paths_changed(
         &mut self,
         changes: Arc<[(Arc<Path>, ProjectEntryId, PathChange)]>,
@@ -177,14 +220,25 @@ impl JobHandle {
 }
 
 impl ProjectState {
-    fn new(subscription: gpui::Subscription) -> Self {
+    fn new(subscription: gpui::Subscription, cx: &mut ModelContext<SemanticIndex>) -> Self {
         let (pending_file_count_tx, pending_file_count_rx) = watch::channel_with(0);
         let pending_file_count_tx = Arc::new(Mutex::new(pending_file_count_tx));
         Self {
             worktrees: Default::default(),
-            pending_file_count_rx,
+            pending_file_count_rx: pending_file_count_rx.clone(),
             pending_file_count_tx,
+            pending_index: 0,
             _subscription: subscription,
+            _observe_pending_file_count: cx.spawn_weak({
+                let mut pending_file_count_rx = pending_file_count_rx.clone();
+                |this, mut cx| async move {
+                    while let Some(_) = pending_file_count_rx.next().await {
+                        if let Some(this) = this.upgrade(&cx) {
+                            this.update(&mut cx, |_, cx| cx.notify());
+                        }
+                    }
+                }
+            }),
         }
     }
 
@@ -227,6 +281,25 @@ impl SemanticIndex {
             && *RELEASE_CHANNEL != ReleaseChannel::Stable
     }
 
+    pub fn status(&self, project: &ModelHandle<Project>) -> SemanticIndexStatus {
+        if let Some(project_state) = self.projects.get(&project.downgrade()) {
+            if project_state
+                .worktrees
+                .values()
+                .all(|worktree| worktree.is_registered())
+                && project_state.pending_index == 0
+            {
+                SemanticIndexStatus::Indexed
+            } else {
+                SemanticIndexStatus::Indexing {
+                    remaining_files: project_state.pending_file_count_rx.borrow().clone(),
+                }
+            }
+        } else {
+            SemanticIndexStatus::NotIndexed
+        }
+    }
+
     async fn new(
         fs: Arc<dyn Fs>,
         database_path: PathBuf,
@@ -356,7 +429,7 @@ impl SemanticIndex {
 
     pub fn project_previously_indexed(
         &mut self,
-        project: ModelHandle<Project>,
+        project: &ModelHandle<Project>,
         cx: &mut ModelContext<Self>,
     ) -> Task<Result<bool>> {
         let worktrees_indexed_previously = project
@@ -770,13 +843,15 @@ impl SemanticIndex {
                 }
                 _ => {}
             });
-            self.projects
-                .insert(project.downgrade(), ProjectState::new(subscription));
+            let project_state = ProjectState::new(subscription, cx);
+            self.projects.insert(project.downgrade(), project_state);
             self.project_worktrees_changed(project.clone(), cx);
         }
-        let project_state = &self.projects[&project.downgrade()];
-        let mut pending_file_count_rx = project_state.pending_file_count_rx.clone();
+        let project_state = self.projects.get_mut(&project.downgrade()).unwrap();
+        project_state.pending_index += 1;
+        cx.notify();
 
+        let mut pending_file_count_rx = project_state.pending_file_count_rx.clone();
         let db = self.db.clone();
         let language_registry = self.language_registry.clone();
         let parsing_files_tx = self.parsing_files_tx.clone();
@@ -887,6 +962,16 @@ impl SemanticIndex {
                 })
                 .await;
 
+            this.update(&mut cx, |this, cx| {
+                let project_state = this
+                    .projects
+                    .get_mut(&project.downgrade())
+                    .ok_or_else(|| anyhow!("project was dropped"))?;
+                project_state.pending_index -= 1;
+                cx.notify();
+                anyhow::Ok(())
+            })?;
+
             Ok(())
         })
     }