Persist index for `/rustdoc` in LMDB (#12988)

Marshall Bowers created

This PR updates the `/rustdoc` command with persistence for the
documented rustdoc items.

Now when you run `/rustdoc --index <CRATE_NAME>` it will index the crate
and store the results in LMDB.

The documented items will then be read from the database when searching
using `/rustdoc` and persist across restarts of Zed.

Release Notes:

- N/A

Change summary

Cargo.lock                                            |   4 
crates/assistant/src/slash_command/rustdoc_command.rs |   7 
crates/rustdoc/Cargo.toml                             |   4 
crates/rustdoc/src/item.rs                            |   6 
crates/rustdoc/src/store.rs                           | 191 ++++++++++--
5 files changed, 174 insertions(+), 38 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -8718,13 +8718,15 @@ dependencies = [
  "futures 0.3.28",
  "fuzzy",
  "gpui",
+ "heed",
  "html_to_markdown",
  "http 0.1.0",
  "indexmap 1.9.3",
  "indoc",
- "parking_lot",
  "pretty_assertions",
+ "serde",
  "strum",
+ "util",
 ]
 
 [[package]]

crates/assistant/src/slash_command/rustdoc_command.rs 🔗

@@ -124,10 +124,7 @@ impl SlashCommand for RustdocSlashCommand {
         let store = RustdocStore::global(cx);
         cx.background_executor().spawn(async move {
             let items = store.search(query).await;
-            Ok(items
-                .into_iter()
-                .map(|(crate_name, item)| format!("{crate_name}::{}", item.display()))
-                .collect())
+            Ok(items)
         })
     }
 
@@ -228,7 +225,7 @@ impl SlashCommand for RustdocSlashCommand {
                     .await;
 
                 if let Ok(item_docs) = item_docs {
-                    anyhow::Ok((RustdocSource::Local, item_docs))
+                    anyhow::Ok((RustdocSource::Local, item_docs.docs().to_owned()))
                 } else {
                     Self::build_message(
                         fs,

crates/rustdoc/Cargo.toml 🔗

@@ -19,11 +19,13 @@ fs.workspace = true
 futures.workspace = true
 fuzzy.workspace = true
 gpui.workspace = true
+heed.workspace = true
 html_to_markdown.workspace = true
 http.workspace = true
 indexmap.workspace = true
-parking_lot.workspace = true
+serde.workspace = true
 strum.workspace = true
+util.workspace = true
 
 [dev-dependencies]
 indoc.workspace = true

crates/rustdoc/src/item.rs 🔗

@@ -1,8 +1,12 @@
 use std::sync::Arc;
 
+use serde::{Deserialize, Serialize};
 use strum::EnumIter;
 
-#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, EnumIter)]
+#[derive(
+    Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Serialize, Deserialize, EnumIter,
+)]
+#[serde(rename_all = "snake_case")]
 pub enum RustdocItemKind {
     Mod,
     Macro,

crates/rustdoc/src/store.rs 🔗

@@ -1,14 +1,20 @@
+use std::path::PathBuf;
 use std::sync::atomic::AtomicBool;
 use std::sync::Arc;
 
 use anyhow::{anyhow, Result};
-use collections::HashMap;
+use futures::future::{self, BoxFuture, Shared};
+use futures::FutureExt;
 use fuzzy::StringMatchCandidate;
 use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, Task, UpdateGlobal};
-use parking_lot::RwLock;
+use heed::types::SerdeBincode;
+use heed::Database;
+use serde::{Deserialize, Serialize};
+use util::paths::SUPPORT_DIR;
+use util::ResultExt;
 
 use crate::crawler::{RustdocCrawler, RustdocProvider};
-use crate::RustdocItem;
+use crate::{RustdocItem, RustdocItemKind};
 
 struct GlobalRustdocStore(Arc<RustdocStore>);
 
@@ -16,7 +22,7 @@ impl Global for GlobalRustdocStore {}
 
 pub struct RustdocStore {
     executor: BackgroundExecutor,
-    docs: Arc<RwLock<HashMap<(String, RustdocItem), String>>>,
+    database_future: Shared<BoxFuture<'static, Result<Arc<RustdocDatabase>, Arc<anyhow::Error>>>>,
 }
 
 impl RustdocStore {
@@ -32,26 +38,34 @@ impl RustdocStore {
     }
 
     pub fn new(executor: BackgroundExecutor) -> Self {
+        let database_future = executor
+            .spawn({
+                let executor = executor.clone();
+                async move {
+                    RustdocDatabase::new(SUPPORT_DIR.join("docs/rust/rustdoc-db.0.mdb"), executor)
+                }
+            })
+            .then(|result| future::ready(result.map(Arc::new).map_err(Arc::new)))
+            .boxed()
+            .shared();
+
         Self {
             executor,
-            docs: Arc::new(RwLock::new(HashMap::default())),
+            database_future,
         }
     }
 
-    pub fn load(&self, crate_name: String, item_path: Option<String>) -> Task<Result<String>> {
-        let item_docs = self
-            .docs
-            .read()
-            .iter()
-            .find_map(|((item_crate_name, item), item_docs)| {
-                if item_crate_name == &crate_name && item_path == Some(item.display()) {
-                    Some(item_docs.clone())
-                } else {
-                    None
-                }
-            });
-
-        Task::ready(item_docs.ok_or_else(|| anyhow!("no docs found")))
+    pub async fn load(
+        &self,
+        crate_name: String,
+        item_path: Option<String>,
+    ) -> Result<RustdocDatabaseEntry> {
+        self.database_future
+            .clone()
+            .await
+            .map_err(|err| anyhow!(err))?
+            .load(crate_name, item_path)
+            .await
     }
 
     pub fn index(
@@ -59,42 +73,50 @@ impl RustdocStore {
         crate_name: String,
         provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
     ) -> Task<Result<()>> {
-        let docs = self.docs.clone();
+        let database_future = self.database_future.clone();
         self.executor.spawn(async move {
             let crawler = RustdocCrawler::new(provider);
 
-            println!("Indexing {crate_name}");
-
             let Some(crate_docs) = crawler.crawl(crate_name.clone()).await? else {
                 return Ok(());
             };
 
-            let mut lock = docs.write();
+            let database = database_future.await.map_err(|err| anyhow!(err))?;
+
+            database
+                .insert(crate_name.clone(), None, crate_docs.crate_root_markdown)
+                .await?;
 
             for (item, item_docs) in crate_docs.items {
-                lock.insert((crate_name.clone(), item), item_docs);
+                database
+                    .insert(crate_name.clone(), Some(&item), item_docs)
+                    .await?;
             }
 
             Ok(())
         })
     }
 
-    pub fn search(&self, query: String) -> Task<Vec<(String, RustdocItem)>> {
+    pub fn search(&self, query: String) -> Task<Vec<String>> {
         let executor = self.executor.clone();
-        let docs = self.docs.read().clone();
+        let database_future = self.database_future.clone();
         self.executor.spawn(async move {
             if query.is_empty() {
                 return Vec::new();
             }
 
-            let items = docs.keys().collect::<Vec<_>>();
+            let Some(database) = database_future.await.map_err(|err| anyhow!(err)).log_err() else {
+                return Vec::new();
+            };
+
+            let Some(items) = database.keys().await.log_err() else {
+                return Vec::new();
+            };
 
             let candidates = items
                 .iter()
                 .enumerate()
-                .map(|(ix, (crate_name, item))| {
-                    StringMatchCandidate::new(ix, format!("{crate_name}::{}", item.display()))
-                })
+                .map(|(ix, item_path)| StringMatchCandidate::new(ix, item_path.clone()))
                 .collect::<Vec<_>>();
 
             let matches = fuzzy::match_strings(
@@ -114,3 +136,112 @@ impl RustdocStore {
         })
     }
 }
+
+#[derive(Serialize, Deserialize)]
+pub enum RustdocDatabaseEntry {
+    Crate { docs: String },
+    Item { kind: RustdocItemKind, docs: String },
+}
+
+impl RustdocDatabaseEntry {
+    pub fn docs(&self) -> &str {
+        match self {
+            Self::Crate { docs } | Self::Item { docs, .. } => &docs,
+        }
+    }
+}
+
+struct RustdocDatabase {
+    executor: BackgroundExecutor,
+    env: heed::Env,
+    entries: Database<SerdeBincode<String>, SerdeBincode<RustdocDatabaseEntry>>,
+}
+
+impl RustdocDatabase {
+    pub fn new(path: PathBuf, executor: BackgroundExecutor) -> Result<Self> {
+        std::fs::create_dir_all(&path)?;
+
+        const ONE_GB_IN_BYTES: usize = 1024 * 1024 * 1024;
+        let env = unsafe {
+            heed::EnvOpenOptions::new()
+                .map_size(ONE_GB_IN_BYTES)
+                .max_dbs(1)
+                .open(path)?
+        };
+
+        let mut txn = env.write_txn()?;
+        let entries = env.create_database(&mut txn, Some("rustdoc_entries"))?;
+        txn.commit()?;
+
+        Ok(Self {
+            executor,
+            env,
+            entries,
+        })
+    }
+
+    pub fn keys(&self) -> Task<Result<Vec<String>>> {
+        let env = self.env.clone();
+        let entries = self.entries;
+
+        self.executor.spawn(async move {
+            let txn = env.read_txn()?;
+            let mut iter = entries.iter(&txn)?;
+            let mut keys = Vec::new();
+            while let Some((key, _value)) = iter.next().transpose()? {
+                keys.push(key);
+            }
+
+            Ok(keys)
+        })
+    }
+
+    pub fn load(
+        &self,
+        crate_name: String,
+        item_path: Option<String>,
+    ) -> Task<Result<RustdocDatabaseEntry>> {
+        let env = self.env.clone();
+        let entries = self.entries;
+        let item_path = if let Some(item_path) = item_path {
+            format!("{crate_name}::{item_path}")
+        } else {
+            crate_name
+        };
+
+        self.executor.spawn(async move {
+            let txn = env.read_txn()?;
+            entries
+                .get(&txn, &item_path)?
+                .ok_or_else(|| anyhow!("no docs found for {item_path}"))
+        })
+    }
+
+    pub fn insert(
+        &self,
+        crate_name: String,
+        item: Option<&RustdocItem>,
+        docs: String,
+    ) -> Task<Result<()>> {
+        let env = self.env.clone();
+        let entries = self.entries;
+        let (item_path, entry) = if let Some(item) = item {
+            (
+                format!("{crate_name}::{}", item.display()),
+                RustdocDatabaseEntry::Item {
+                    kind: item.kind,
+                    docs,
+                },
+            )
+        } else {
+            (crate_name, RustdocDatabaseEntry::Crate { docs })
+        };
+
+        self.executor.spawn(async move {
+            let mut txn = env.write_txn()?;
+            entries.put(&mut txn, &item_path, &entry)?;
+            txn.commit()?;
+            Ok(())
+        })
+    }
+}