assistant: Add MVP for `/rustdoc` using indexed docs (#12952)

Marshall Bowers created

This PR adds an MVP of retrieving docs using the `/rustdoc` command from
an indexed set of docs.

To try this out:

1. Build local docs using `cargo doc`
2. Index the docs for the crate you want to search using `/rustdoc
--index <CRATE_NAME>`
    - Note: This may take a while, depending on the size of the crate
3. Search for docs using `/rustdoc my_crate::path::to::item`
    - You should get completions for the available items

Here are some screenshots of it in action:

<img width="640" alt="Screenshot 2024-06-12 at 6 19 20 PM"
src="https://github.com/zed-industries/zed/assets/1486634/6c49bec9-d084-4dcb-a92c-1b4c557ee9ce">

<img width="636" alt="Screenshot 2024-06-12 at 6 52 56 PM"
src="https://github.com/zed-industries/zed/assets/1486634/636a651c-7d02-48dc-b05c-931f33c49f9c">

Release Notes:

- N/A

Change summary

Cargo.lock                                            |   3 
crates/assistant/src/assistant.rs                     |   2 
crates/assistant/src/slash_command/rustdoc_command.rs | 140 +++++++++++-
crates/rustdoc/Cargo.toml                             |   3 
crates/rustdoc/src/crawler.rs                         |  64 +++--
crates/rustdoc/src/item.rs                            |   7 
crates/rustdoc/src/rustdoc.rs                         |   2 
crates/rustdoc/src/store.rs                           | 116 ++++++++++
8 files changed, 295 insertions(+), 42 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -8696,10 +8696,13 @@ dependencies = [
  "collections",
  "fs",
  "futures 0.3.28",
+ "fuzzy",
+ "gpui",
  "html_to_markdown",
  "http 0.1.0",
  "indexmap 1.9.3",
  "indoc",
+ "parking_lot",
  "pretty_assertions",
  "strum",
 ]

crates/assistant/src/assistant.rs 🔗

@@ -21,6 +21,7 @@ pub(crate) use context_store::*;
 use gpui::{actions, AppContext, Global, SharedString, UpdateGlobal};
 pub(crate) use inline_assistant::*;
 pub(crate) use model_selector::*;
+use rustdoc::RustdocStore;
 use semantic_index::{CloudEmbeddingProvider, SemanticIndex};
 use serde::{Deserialize, Serialize};
 use settings::{Settings, SettingsStore};
@@ -286,6 +287,7 @@ pub fn init(client: Arc<Client>, cx: &mut AppContext) {
     register_slash_commands(cx);
     assistant_panel::init(cx);
     inline_assistant::init(client.telemetry().clone(), cx);
+    RustdocStore::init_global(cx);
 
     CommandPaletteFilter::update_global(cx, |filter, _cx| {
         filter.hide_namespace(Assistant::NAMESPACE);

crates/assistant/src/slash_command/rustdoc_command.rs 🔗

@@ -10,7 +10,8 @@ use gpui::{AppContext, Model, Task, WeakView};
 use http::{AsyncBody, HttpClient, HttpClientWithUrl};
 use language::LspAdapterDelegate;
 use project::{Project, ProjectPath};
-use rustdoc::convert_rustdoc_to_markdown;
+use rustdoc::crawler::LocalProvider;
+use rustdoc::{convert_rustdoc_to_markdown, RustdocStore};
 use ui::{prelude::*, ButtonLike, ElevationIndex};
 use workspace::Workspace;
 
@@ -115,12 +116,19 @@ impl SlashCommand for RustdocSlashCommand {
 
     fn complete_argument(
         &self,
-        _query: String,
+        query: String,
         _cancel: Arc<AtomicBool>,
         _workspace: Option<WeakView<Workspace>>,
-        _cx: &mut AppContext,
+        cx: &mut AppContext,
     ) -> Task<Result<Vec<String>>> {
-        Task::ready(Ok(Vec::new()))
+        let store = RustdocStore::global(cx);
+        cx.background_executor().spawn(async move {
+            let items = store.search(query).await;
+            Ok(items
+                .into_iter()
+                .map(|(crate_name, item)| format!("{crate_name}::{}", item.display()))
+                .collect())
+        })
     }
 
     fn run(
@@ -140,7 +148,67 @@ impl SlashCommand for RustdocSlashCommand {
         let project = workspace.read(cx).project().clone();
         let fs = project.read(cx).fs().clone();
         let http_client = workspace.read(cx).client().http_client();
-        let mut path_components = argument.split("::");
+        let path_to_cargo_toml = Self::path_to_cargo_toml(project, cx);
+
+        let mut item_path = String::new();
+        let mut crate_name_to_index = None;
+
+        let mut args = argument.split(' ').map(|word| word.trim());
+        while let Some(arg) = args.next() {
+            if arg == "--index" {
+                let Some(crate_name) = args.next() else {
+                    return Task::ready(Err(anyhow!("no crate name provided to --index")));
+                };
+                crate_name_to_index = Some(crate_name.to_string());
+                continue;
+            }
+
+            item_path.push_str(arg);
+        }
+
+        if let Some(crate_name_to_index) = crate_name_to_index {
+            let index_task = cx.background_executor().spawn({
+                let rustdoc_store = RustdocStore::global(cx);
+                let fs = fs.clone();
+                let crate_name_to_index = crate_name_to_index.clone();
+                async move {
+                    let cargo_workspace_root = path_to_cargo_toml
+                        .and_then(|path| path.parent().map(|path| path.to_path_buf()))
+                        .ok_or_else(|| anyhow!("no Cargo workspace root found"))?;
+
+                    let provider = Box::new(LocalProvider::new(fs, cargo_workspace_root));
+
+                    rustdoc_store
+                        .index(crate_name_to_index.clone(), provider)
+                        .await?;
+
+                    anyhow::Ok(format!("Indexed {crate_name_to_index}"))
+                }
+            });
+
+            return cx.foreground_executor().spawn(async move {
+                let text = index_task.await?;
+                let range = 0..text.len();
+                Ok(SlashCommandOutput {
+                    text,
+                    sections: vec![SlashCommandOutputSection {
+                        range,
+                        render_placeholder: Arc::new(move |id, unfold, _cx| {
+                            RustdocIndexPlaceholder {
+                                id,
+                                unfold,
+                                source: RustdocSource::Local,
+                                crate_name: SharedString::from(crate_name_to_index.clone()),
+                            }
+                            .into_any_element()
+                        }),
+                    }],
+                    run_commands_in_text: false,
+                })
+            });
+        }
+
+        let mut path_components = item_path.split("::");
         let crate_name = match path_components
             .next()
             .ok_or_else(|| anyhow!("missing crate name"))
@@ -148,29 +216,37 @@ impl SlashCommand for RustdocSlashCommand {
             Ok(crate_name) => crate_name.to_string(),
             Err(err) => return Task::ready(Err(err)),
         };
-        let module_path = path_components.map(ToString::to_string).collect::<Vec<_>>();
-        let path_to_cargo_toml = Self::path_to_cargo_toml(project, cx);
+        let item_path = path_components.map(ToString::to_string).collect::<Vec<_>>();
 
         let text = cx.background_executor().spawn({
+            let rustdoc_store = RustdocStore::global(cx);
             let crate_name = crate_name.clone();
-            let module_path = module_path.clone();
+            let item_path = item_path.clone();
             async move {
-                Self::build_message(
-                    fs,
-                    http_client,
-                    crate_name,
-                    module_path,
-                    path_to_cargo_toml.as_deref(),
-                )
-                .await
+                let item_docs = rustdoc_store
+                    .load(crate_name.clone(), Some(item_path.join("::")))
+                    .await;
+
+                if let Ok(item_docs) = item_docs {
+                    anyhow::Ok((RustdocSource::Local, item_docs))
+                } else {
+                    Self::build_message(
+                        fs,
+                        http_client,
+                        crate_name,
+                        item_path,
+                        path_to_cargo_toml.as_deref(),
+                    )
+                    .await
+                }
             }
         });
 
         let crate_name = SharedString::from(crate_name);
-        let module_path = if module_path.is_empty() {
+        let module_path = if item_path.is_empty() {
             None
         } else {
-            Some(SharedString::from(module_path.join("::")))
+            Some(SharedString::from(item_path.join("::")))
         };
         cx.foreground_executor().spawn(async move {
             let (source, text) = text.await?;
@@ -228,3 +304,31 @@ impl RenderOnce for RustdocPlaceholder {
             .on_click(move |_, cx| unfold(cx))
     }
 }
+
+#[derive(IntoElement)]
+struct RustdocIndexPlaceholder {
+    pub id: ElementId,
+    pub unfold: Arc<dyn Fn(&mut WindowContext)>,
+    pub source: RustdocSource,
+    pub crate_name: SharedString,
+}
+
+impl RenderOnce for RustdocIndexPlaceholder {
+    fn render(self, _cx: &mut WindowContext) -> impl IntoElement {
+        let unfold = self.unfold;
+
+        ButtonLike::new(self.id)
+            .style(ButtonStyle::Filled)
+            .layer(ElevationIndex::ElevatedSurface)
+            .child(Icon::new(IconName::FileRust))
+            .child(Label::new(format!(
+                "rustdoc index ({source}): {crate_name}",
+                crate_name = self.crate_name,
+                source = match self.source {
+                    RustdocSource::Local => "local",
+                    RustdocSource::DocsDotRs => "docs.rs",
+                }
+            )))
+            .on_click(move |_, cx| unfold(cx))
+    }
+}

crates/rustdoc/Cargo.toml 🔗

@@ -17,9 +17,12 @@ async-trait.workspace = true
 collections.workspace = true
 fs.workspace = true
 futures.workspace = true
+fuzzy.workspace = true
+gpui.workspace = true
 html_to_markdown.workspace = true
 http.workspace = true
 indexmap.workspace = true
+parking_lot.workspace = true
 strum.workspace = true
 
 [dev-dependencies]

crates/rustdoc/src/crawler.rs 🔗

@@ -7,6 +7,7 @@ use collections::{HashSet, VecDeque};
 use fs::Fs;
 use futures::AsyncReadExt;
 use http::{AsyncBody, HttpClient, HttpClientWithUrl};
+use indexmap::IndexMap;
 
 use crate::{convert_rustdoc_to_markdown, RustdocItem, RustdocItemKind};
 
@@ -51,11 +52,12 @@ impl RustdocProvider for LocalProvider {
         let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
         local_cargo_doc_path.push(&crate_name);
         if let Some(item) = item {
-            if !item.path.is_empty() {
-                local_cargo_doc_path.push(item.path.join("/"));
-            }
+            local_cargo_doc_path.push(item.url_path());
+        } else {
+            local_cargo_doc_path.push("index.html");
         }
-        local_cargo_doc_path.push("index.html");
+
+        println!("Fetching {}", local_cargo_doc_path.display());
 
         let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
             return Ok(None);
@@ -120,12 +122,18 @@ impl RustdocProvider for DocsDotRsProvider {
     }
 }
 
-pub struct RustdocItemWithHistory {
+#[derive(Debug)]
+struct RustdocItemWithHistory {
     pub item: RustdocItem,
     #[cfg(debug_assertions)]
     pub history: Vec<String>,
 }
 
+pub struct CrateDocs {
+    pub crate_root_markdown: String,
+    pub items: IndexMap<RustdocItem, String>,
+}
+
 pub struct RustdocCrawler {
     provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
 }
@@ -135,14 +143,16 @@ impl RustdocCrawler {
         Self { provider }
     }
 
-    pub async fn crawl(&self, crate_name: String) -> Result<Option<String>> {
-        let Some(crate_index_content) = self.provider.fetch_page(&crate_name, None).await? else {
+    pub async fn crawl(&self, crate_name: String) -> Result<Option<CrateDocs>> {
+        let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else {
             return Ok(None);
         };
 
-        let (_markdown, items) = convert_rustdoc_to_markdown(crate_index_content.as_bytes())?;
+        let (crate_root_markdown, items) =
+            convert_rustdoc_to_markdown(crate_root_content.as_bytes())?;
 
-        let mut seen_items = HashSet::default();
+        let mut docs_by_item = IndexMap::new();
+        let mut seen_items = HashSet::from_iter(items.clone());
         let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
             VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
                 item,
@@ -152,6 +162,7 @@ impl RustdocCrawler {
 
         while let Some(item_with_history) = items_to_visit.pop_front() {
             let item = &item_with_history.item;
+
             println!("Visiting {:?} {:?} {}", &item.kind, &item.path, &item.name);
 
             let Some(result) = self
@@ -176,23 +187,27 @@ impl RustdocCrawler {
                 continue;
             };
 
-            let (_markdown, mut items) = convert_rustdoc_to_markdown(result.as_bytes())?;
+            let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
 
-            seen_items.insert(item.clone());
+            docs_by_item.insert(item.clone(), markdown);
 
-            for child in &mut items {
-                child.path.extend(item.path.clone());
-                match item.kind {
+            let parent_item = item;
+            for mut item in referenced_items {
+                if seen_items.contains(&item) {
+                    continue;
+                }
+
+                seen_items.insert(item.clone());
+
+                item.path.extend(parent_item.path.clone());
+                match parent_item.kind {
                     RustdocItemKind::Mod => {
-                        child.path.push(item.name.clone());
+                        item.path.push(parent_item.name.clone());
                     }
                     _ => {}
                 }
-            }
 
-            let unseen_items = items
-                .into_iter()
-                .map(|item| RustdocItemWithHistory {
+                items_to_visit.push_back(RustdocItemWithHistory {
                     #[cfg(debug_assertions)]
                     history: {
                         let mut history = item_with_history.history.clone();
@@ -200,12 +215,13 @@ impl RustdocCrawler {
                         history
                     },
                     item,
-                })
-                .filter(|item| !seen_items.contains(&item.item));
-
-            items_to_visit.extend(unseen_items);
+                });
+            }
         }
 
-        Ok(Some(String::new()))
+        Ok(Some(CrateDocs {
+            crate_root_markdown,
+            items: docs_by_item,
+        }))
     }
 }

crates/rustdoc/src/item.rs 🔗

@@ -43,6 +43,13 @@ pub struct RustdocItem {
 }
 
 impl RustdocItem {
+    pub fn display(&self) -> String {
+        let mut path_segments = self.path.clone();
+        path_segments.push(self.name.clone());
+
+        path_segments.join("::")
+    }
+
     pub fn url_path(&self) -> String {
         let name = &self.name;
         let mut path_components = self.path.clone();

crates/rustdoc/src/rustdoc.rs 🔗

@@ -1,6 +1,8 @@
 pub mod crawler;
 mod item;
+mod store;
 mod to_markdown;
 
 pub use crate::item::*;
+pub use crate::store::*;
 pub use crate::to_markdown::convert_rustdoc_to_markdown;

crates/rustdoc/src/store.rs 🔗

@@ -0,0 +1,116 @@
+use std::sync::atomic::AtomicBool;
+use std::sync::Arc;
+
+use anyhow::{anyhow, Result};
+use collections::HashMap;
+use fuzzy::StringMatchCandidate;
+use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, Task, UpdateGlobal};
+use parking_lot::RwLock;
+
+use crate::crawler::{RustdocCrawler, RustdocProvider};
+use crate::RustdocItem;
+
+struct GlobalRustdocStore(Arc<RustdocStore>);
+
+impl Global for GlobalRustdocStore {}
+
+pub struct RustdocStore {
+    executor: BackgroundExecutor,
+    docs: Arc<RwLock<HashMap<(String, RustdocItem), String>>>,
+}
+
+impl RustdocStore {
+    pub fn global(cx: &AppContext) -> Arc<Self> {
+        GlobalRustdocStore::global(cx).0.clone()
+    }
+
+    pub fn init_global(cx: &mut AppContext) {
+        GlobalRustdocStore::set_global(
+            cx,
+            GlobalRustdocStore(Arc::new(Self::new(cx.background_executor().clone()))),
+        );
+    }
+
+    pub fn new(executor: BackgroundExecutor) -> Self {
+        Self {
+            executor,
+            docs: Arc::new(RwLock::new(HashMap::default())),
+        }
+    }
+
+    pub fn load(&self, crate_name: String, item_path: Option<String>) -> Task<Result<String>> {
+        let item_docs = self
+            .docs
+            .read()
+            .iter()
+            .find_map(|((item_crate_name, item), item_docs)| {
+                if item_crate_name == &crate_name && item_path == Some(item.display()) {
+                    Some(item_docs.clone())
+                } else {
+                    None
+                }
+            });
+
+        Task::ready(item_docs.ok_or_else(|| anyhow!("no docs found")))
+    }
+
+    pub fn index(
+        &self,
+        crate_name: String,
+        provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
+    ) -> Task<Result<()>> {
+        let docs = self.docs.clone();
+        self.executor.spawn(async move {
+            let crawler = RustdocCrawler::new(provider);
+
+            println!("Indexing {crate_name}");
+
+            let Some(crate_docs) = crawler.crawl(crate_name.clone()).await? else {
+                return Ok(());
+            };
+
+            let mut lock = docs.write();
+
+            for (item, item_docs) in crate_docs.items {
+                lock.insert((crate_name.clone(), item), item_docs);
+            }
+
+            Ok(())
+        })
+    }
+
+    pub fn search(&self, query: String) -> Task<Vec<(String, RustdocItem)>> {
+        let executor = self.executor.clone();
+        let docs = self.docs.read().clone();
+        self.executor.spawn(async move {
+            if query.is_empty() {
+                return Vec::new();
+            }
+
+            let items = docs.keys().collect::<Vec<_>>();
+
+            let candidates = items
+                .iter()
+                .enumerate()
+                .map(|(ix, (crate_name, item))| {
+                    StringMatchCandidate::new(ix, format!("{crate_name}::{}", item.display()))
+                })
+                .collect::<Vec<_>>();
+
+            let matches = fuzzy::match_strings(
+                &candidates,
+                &query,
+                false,
+                100,
+                &AtomicBool::default(),
+                executor,
+            )
+            .await;
+
+            matches
+                .into_iter()
+                .map(|mat| items[mat.candidate_id].clone())
+                .collect()
+        })
+    }
+}