From 6181ac6bade22e7ed074c47793a7fde298a5536b Mon Sep 17 00:00:00 2001 From: Marshall Bowers Date: Thu, 13 Jun 2024 16:40:06 -0400 Subject: [PATCH] rustdoc: Index crates progressively (#13011) This PR updates the rustdoc indexing to be more progressive. Rather than waiting until we've crawled the entire crate to begin writing to the database, we instead start writing the docs as we go. This makes it so you can start getting completions while the indexing is still running. Release Notes: - N/A --- .../src/slash_command/rustdoc_command.rs | 2 +- crates/rustdoc/src/{crawler.rs => indexer.rs} | 43 +++++++++++-------- crates/rustdoc/src/rustdoc.rs | 3 +- crates/rustdoc/src/store.rs | 23 ++-------- 4 files changed, 31 insertions(+), 40 deletions(-) rename crates/rustdoc/src/{crawler.rs => indexer.rs} (87%) diff --git a/crates/assistant/src/slash_command/rustdoc_command.rs b/crates/assistant/src/slash_command/rustdoc_command.rs index 827a961a4910785622dd900a4914a5c6c9c7e32e..385e48d67db5f5ed2b0b17d20109b5adbfeb862b 100644 --- a/crates/assistant/src/slash_command/rustdoc_command.rs +++ b/crates/assistant/src/slash_command/rustdoc_command.rs @@ -10,7 +10,7 @@ use gpui::{AppContext, Model, Task, WeakView}; use http::{AsyncBody, HttpClient, HttpClientWithUrl}; use language::LspAdapterDelegate; use project::{Project, ProjectPath}; -use rustdoc::crawler::LocalProvider; +use rustdoc::LocalProvider; use rustdoc::{convert_rustdoc_to_markdown, RustdocStore}; use ui::{prelude::*, ButtonLike, ElevationIndex}; use workspace::Workspace; diff --git a/crates/rustdoc/src/crawler.rs b/crates/rustdoc/src/indexer.rs similarity index 87% rename from crates/rustdoc/src/crawler.rs rename to crates/rustdoc/src/indexer.rs index 80f3e0fc4adceb87450099f632d22aee52a39f14..f56b2eda1a94a6300e07bf8518b5ff209b4dca81 100644 --- a/crates/rustdoc/src/crawler.rs +++ b/crates/rustdoc/src/indexer.rs @@ -7,9 +7,8 @@ use collections::{HashSet, VecDeque}; use fs::Fs; use futures::AsyncReadExt; use http::{AsyncBody, HttpClient, HttpClientWithUrl}; -use indexmap::IndexMap; -use crate::{convert_rustdoc_to_markdown, RustdocItem, RustdocItemKind}; +use crate::{convert_rustdoc_to_markdown, RustdocDatabase, RustdocItem, RustdocItemKind}; #[derive(Debug, Clone, Copy)] pub enum RustdocSource { @@ -129,29 +128,32 @@ struct RustdocItemWithHistory { pub history: Vec, } -pub struct CrateDocs { - pub crate_root_markdown: String, - pub items: IndexMap, -} - -pub struct RustdocCrawler { +pub(crate) struct RustdocIndexer { + database: Arc, provider: Box, } -impl RustdocCrawler { - pub fn new(provider: Box) -> Self { - Self { provider } +impl RustdocIndexer { + pub fn new( + database: Arc, + provider: Box, + ) -> Self { + Self { database, provider } } - pub async fn crawl(&self, crate_name: String) -> Result> { + /// Indexes the crate with the given name. + pub async fn index(&self, crate_name: String) -> Result<()> { let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else { - return Ok(None); + return Ok(()); }; let (crate_root_markdown, items) = convert_rustdoc_to_markdown(crate_root_content.as_bytes())?; - let mut docs_by_item = IndexMap::new(); + self.database + .insert(crate_name.clone(), None, crate_root_markdown) + .await?; + let mut seen_items = HashSet::from_iter(items.clone()); let mut items_to_visit: VecDeque = VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory { @@ -189,7 +191,13 @@ impl RustdocCrawler { let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?; - docs_by_item.insert(item.clone(), markdown); + self.database + .insert( + format!("{crate_name}::{}", item.display()), + Some(item), + markdown, + ) + .await?; let parent_item = item; for mut item in referenced_items { @@ -219,9 +227,6 @@ impl RustdocCrawler { } } - Ok(Some(CrateDocs { - crate_root_markdown, - items: docs_by_item, - })) + Ok(()) } } diff --git a/crates/rustdoc/src/rustdoc.rs b/crates/rustdoc/src/rustdoc.rs index a35fdac62a277de73d9dd9ce9499eca2b4b3ae9b..5bf300a5f4338129cc5073aba2bbd07712b533c1 100644 --- a/crates/rustdoc/src/rustdoc.rs +++ b/crates/rustdoc/src/rustdoc.rs @@ -1,8 +1,9 @@ -pub mod crawler; +mod indexer; mod item; mod store; mod to_markdown; +pub use crate::indexer::{DocsDotRsProvider, LocalProvider, RustdocSource}; pub use crate::item::*; pub use crate::store::*; pub use crate::to_markdown::convert_rustdoc_to_markdown; diff --git a/crates/rustdoc/src/store.rs b/crates/rustdoc/src/store.rs index 9e14d245c8293bea3385af0cc75c35fee347b68c..3372d281b6bb83d1c7b7c5437c0381dde40b0495 100644 --- a/crates/rustdoc/src/store.rs +++ b/crates/rustdoc/src/store.rs @@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize}; use util::paths::SUPPORT_DIR; use util::ResultExt; -use crate::crawler::{RustdocCrawler, RustdocProvider}; +use crate::indexer::{RustdocIndexer, RustdocProvider}; use crate::{RustdocItem, RustdocItemKind}; struct GlobalRustdocStore(Arc); @@ -75,25 +75,10 @@ impl RustdocStore { ) -> Task> { let database_future = self.database_future.clone(); self.executor.spawn(async move { - let crawler = RustdocCrawler::new(provider); - - let Some(crate_docs) = crawler.crawl(crate_name.clone()).await? else { - return Ok(()); - }; - let database = database_future.await.map_err(|err| anyhow!(err))?; + let indexer = RustdocIndexer::new(database, provider); - database - .insert(crate_name.clone(), None, crate_docs.crate_root_markdown) - .await?; - - for (item, item_docs) in crate_docs.items { - database - .insert(crate_name.clone(), Some(&item), item_docs) - .await?; - } - - Ok(()) + indexer.index(crate_name.clone()).await }) } @@ -151,7 +136,7 @@ impl RustdocDatabaseEntry { } } -struct RustdocDatabase { +pub(crate) struct RustdocDatabase { executor: BackgroundExecutor, env: heed::Env, entries: Database, SerdeBincode>,