From 85acc2be4467a65203452a120063fad53e6833b6 Mon Sep 17 00:00:00 2001 From: Marshall Bowers Date: Thu, 13 Jun 2024 12:07:26 -0400 Subject: [PATCH] Persist index for `/rustdoc` in LMDB (#12988) This PR updates the `/rustdoc` command with persistence for the documented rustdoc items. Now when you run `/rustdoc --index ` it will index the crate and store the results in LMDB. The documented items will then be read from the database when searching using `/rustdoc` and persist across restarts of Zed. Release Notes: - N/A --- Cargo.lock | 4 +- .../src/slash_command/rustdoc_command.rs | 7 +- crates/rustdoc/Cargo.toml | 4 +- crates/rustdoc/src/item.rs | 6 +- crates/rustdoc/src/store.rs | 191 +++++++++++++++--- 5 files changed, 174 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 76fc50d64bed80bff6ada8939bd090749343632e..1aad3cc67716641dfd3870b341f935a6812cb749 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8718,13 +8718,15 @@ dependencies = [ "futures 0.3.28", "fuzzy", "gpui", + "heed", "html_to_markdown", "http 0.1.0", "indexmap 1.9.3", "indoc", - "parking_lot", "pretty_assertions", + "serde", "strum", + "util", ] [[package]] diff --git a/crates/assistant/src/slash_command/rustdoc_command.rs b/crates/assistant/src/slash_command/rustdoc_command.rs index baa6568d1c353bab371b3c4079a4e339d46f56cc..827a961a4910785622dd900a4914a5c6c9c7e32e 100644 --- a/crates/assistant/src/slash_command/rustdoc_command.rs +++ b/crates/assistant/src/slash_command/rustdoc_command.rs @@ -124,10 +124,7 @@ impl SlashCommand for RustdocSlashCommand { let store = RustdocStore::global(cx); cx.background_executor().spawn(async move { let items = store.search(query).await; - Ok(items - .into_iter() - .map(|(crate_name, item)| format!("{crate_name}::{}", item.display())) - .collect()) + Ok(items) }) } @@ -228,7 +225,7 @@ impl SlashCommand for RustdocSlashCommand { .await; if let Ok(item_docs) = item_docs { - anyhow::Ok((RustdocSource::Local, item_docs)) + anyhow::Ok((RustdocSource::Local, item_docs.docs().to_owned())) } else { Self::build_message( fs, diff --git a/crates/rustdoc/Cargo.toml b/crates/rustdoc/Cargo.toml index 6aa0f5bb4bb00d6192680fea94002a1aa911cd28..1937204606899052b84d91ee6560b13036b047ac 100644 --- a/crates/rustdoc/Cargo.toml +++ b/crates/rustdoc/Cargo.toml @@ -19,11 +19,13 @@ fs.workspace = true futures.workspace = true fuzzy.workspace = true gpui.workspace = true +heed.workspace = true html_to_markdown.workspace = true http.workspace = true indexmap.workspace = true -parking_lot.workspace = true +serde.workspace = true strum.workspace = true +util.workspace = true [dev-dependencies] indoc.workspace = true diff --git a/crates/rustdoc/src/item.rs b/crates/rustdoc/src/item.rs index 14d414113f2f080010f4741359e25b1753a86406..7d9023ef3e1bcda298e7c1aaabcf9205f333ee23 100644 --- a/crates/rustdoc/src/item.rs +++ b/crates/rustdoc/src/item.rs @@ -1,8 +1,12 @@ use std::sync::Arc; +use serde::{Deserialize, Serialize}; use strum::EnumIter; -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, EnumIter)] +#[derive( + Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Serialize, Deserialize, EnumIter, +)] +#[serde(rename_all = "snake_case")] pub enum RustdocItemKind { Mod, Macro, diff --git a/crates/rustdoc/src/store.rs b/crates/rustdoc/src/store.rs index 626a58e1c176e518acc2847aa314bb5165f0e669..9e14d245c8293bea3385af0cc75c35fee347b68c 100644 --- a/crates/rustdoc/src/store.rs +++ b/crates/rustdoc/src/store.rs @@ -1,14 +1,20 @@ +use std::path::PathBuf; use std::sync::atomic::AtomicBool; use std::sync::Arc; use anyhow::{anyhow, Result}; -use collections::HashMap; +use futures::future::{self, BoxFuture, Shared}; +use futures::FutureExt; use fuzzy::StringMatchCandidate; use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, Task, UpdateGlobal}; -use parking_lot::RwLock; +use heed::types::SerdeBincode; +use heed::Database; +use serde::{Deserialize, Serialize}; +use util::paths::SUPPORT_DIR; +use util::ResultExt; use crate::crawler::{RustdocCrawler, RustdocProvider}; -use crate::RustdocItem; +use crate::{RustdocItem, RustdocItemKind}; struct GlobalRustdocStore(Arc); @@ -16,7 +22,7 @@ impl Global for GlobalRustdocStore {} pub struct RustdocStore { executor: BackgroundExecutor, - docs: Arc>>, + database_future: Shared, Arc>>>, } impl RustdocStore { @@ -32,26 +38,34 @@ impl RustdocStore { } pub fn new(executor: BackgroundExecutor) -> Self { + let database_future = executor + .spawn({ + let executor = executor.clone(); + async move { + RustdocDatabase::new(SUPPORT_DIR.join("docs/rust/rustdoc-db.0.mdb"), executor) + } + }) + .then(|result| future::ready(result.map(Arc::new).map_err(Arc::new))) + .boxed() + .shared(); + Self { executor, - docs: Arc::new(RwLock::new(HashMap::default())), + database_future, } } - pub fn load(&self, crate_name: String, item_path: Option) -> Task> { - let item_docs = self - .docs - .read() - .iter() - .find_map(|((item_crate_name, item), item_docs)| { - if item_crate_name == &crate_name && item_path == Some(item.display()) { - Some(item_docs.clone()) - } else { - None - } - }); - - Task::ready(item_docs.ok_or_else(|| anyhow!("no docs found"))) + pub async fn load( + &self, + crate_name: String, + item_path: Option, + ) -> Result { + self.database_future + .clone() + .await + .map_err(|err| anyhow!(err))? + .load(crate_name, item_path) + .await } pub fn index( @@ -59,42 +73,50 @@ impl RustdocStore { crate_name: String, provider: Box, ) -> Task> { - let docs = self.docs.clone(); + let database_future = self.database_future.clone(); self.executor.spawn(async move { let crawler = RustdocCrawler::new(provider); - println!("Indexing {crate_name}"); - let Some(crate_docs) = crawler.crawl(crate_name.clone()).await? else { return Ok(()); }; - let mut lock = docs.write(); + let database = database_future.await.map_err(|err| anyhow!(err))?; + + database + .insert(crate_name.clone(), None, crate_docs.crate_root_markdown) + .await?; for (item, item_docs) in crate_docs.items { - lock.insert((crate_name.clone(), item), item_docs); + database + .insert(crate_name.clone(), Some(&item), item_docs) + .await?; } Ok(()) }) } - pub fn search(&self, query: String) -> Task> { + pub fn search(&self, query: String) -> Task> { let executor = self.executor.clone(); - let docs = self.docs.read().clone(); + let database_future = self.database_future.clone(); self.executor.spawn(async move { if query.is_empty() { return Vec::new(); } - let items = docs.keys().collect::>(); + let Some(database) = database_future.await.map_err(|err| anyhow!(err)).log_err() else { + return Vec::new(); + }; + + let Some(items) = database.keys().await.log_err() else { + return Vec::new(); + }; let candidates = items .iter() .enumerate() - .map(|(ix, (crate_name, item))| { - StringMatchCandidate::new(ix, format!("{crate_name}::{}", item.display())) - }) + .map(|(ix, item_path)| StringMatchCandidate::new(ix, item_path.clone())) .collect::>(); let matches = fuzzy::match_strings( @@ -114,3 +136,112 @@ impl RustdocStore { }) } } + +#[derive(Serialize, Deserialize)] +pub enum RustdocDatabaseEntry { + Crate { docs: String }, + Item { kind: RustdocItemKind, docs: String }, +} + +impl RustdocDatabaseEntry { + pub fn docs(&self) -> &str { + match self { + Self::Crate { docs } | Self::Item { docs, .. } => &docs, + } + } +} + +struct RustdocDatabase { + executor: BackgroundExecutor, + env: heed::Env, + entries: Database, SerdeBincode>, +} + +impl RustdocDatabase { + pub fn new(path: PathBuf, executor: BackgroundExecutor) -> Result { + std::fs::create_dir_all(&path)?; + + const ONE_GB_IN_BYTES: usize = 1024 * 1024 * 1024; + let env = unsafe { + heed::EnvOpenOptions::new() + .map_size(ONE_GB_IN_BYTES) + .max_dbs(1) + .open(path)? + }; + + let mut txn = env.write_txn()?; + let entries = env.create_database(&mut txn, Some("rustdoc_entries"))?; + txn.commit()?; + + Ok(Self { + executor, + env, + entries, + }) + } + + pub fn keys(&self) -> Task>> { + let env = self.env.clone(); + let entries = self.entries; + + self.executor.spawn(async move { + let txn = env.read_txn()?; + let mut iter = entries.iter(&txn)?; + let mut keys = Vec::new(); + while let Some((key, _value)) = iter.next().transpose()? { + keys.push(key); + } + + Ok(keys) + }) + } + + pub fn load( + &self, + crate_name: String, + item_path: Option, + ) -> Task> { + let env = self.env.clone(); + let entries = self.entries; + let item_path = if let Some(item_path) = item_path { + format!("{crate_name}::{item_path}") + } else { + crate_name + }; + + self.executor.spawn(async move { + let txn = env.read_txn()?; + entries + .get(&txn, &item_path)? + .ok_or_else(|| anyhow!("no docs found for {item_path}")) + }) + } + + pub fn insert( + &self, + crate_name: String, + item: Option<&RustdocItem>, + docs: String, + ) -> Task> { + let env = self.env.clone(); + let entries = self.entries; + let (item_path, entry) = if let Some(item) = item { + ( + format!("{crate_name}::{}", item.display()), + RustdocDatabaseEntry::Item { + kind: item.kind, + docs, + }, + ) + } else { + (crate_name, RustdocDatabaseEntry::Crate { docs }) + }; + + self.executor.spawn(async move { + let mut txn = env.write_txn()?; + entries.put(&mut txn, &item_path, &entry)?; + txn.commit()?; + Ok(()) + }) + } +}