assistant: Add docs provider for `docs.rs` (#14356)

Marshall Bowers created

This PR adds an indexed docs provider for retrieving docs from `docs.rs`
using the `/docs` slash command.

Release Notes:

- N/A

Change summary

crates/assistant/src/slash_command/docs_command.rs |  39 +
crates/indexed_docs/src/providers/rustdoc.rs       | 337 ++++++++-------
crates/indexed_docs/src/store.rs                   |   6 
3 files changed, 204 insertions(+), 178 deletions(-)

Detailed changes

crates/assistant/src/slash_command/docs_command.rs 🔗

@@ -8,7 +8,8 @@ use assistant_slash_command::{
 };
 use gpui::{AppContext, Model, Task, WeakView};
 use indexed_docs::{
-    IndexedDocsRegistry, IndexedDocsStore, LocalProvider, PackageName, ProviderId, RustdocIndexer,
+    DocsDotRsProvider, IndexedDocsRegistry, IndexedDocsStore, LocalRustdocProvider, PackageName,
+    ProviderId,
 };
 use language::LspAdapterDelegate;
 use project::{Project, ProjectPath};
@@ -34,22 +35,22 @@ impl DocsSlashCommand {
         ))
     }
 
-    /// Ensures that the rustdoc provider is registered.
+    /// Ensures that the indexed doc providers for Rust are registered.
     ///
     /// Ideally we would do this sooner, but we need to wait until we're able to
     /// access the workspace so we can read the project.
-    fn ensure_rustdoc_provider_is_registered(
+    fn ensure_rust_doc_providers_are_registered(
         &self,
         workspace: Option<WeakView<Workspace>>,
         cx: &mut AppContext,
     ) {
         let indexed_docs_registry = IndexedDocsRegistry::global(cx);
         if indexed_docs_registry
-            .get_provider_store(ProviderId::rustdoc())
+            .get_provider_store(LocalRustdocProvider::id())
             .is_none()
         {
             let index_provider_deps = maybe!({
-                let workspace = workspace.ok_or_else(|| anyhow!("no workspace"))?;
+                let workspace = workspace.clone().ok_or_else(|| anyhow!("no workspace"))?;
                 let workspace = workspace
                     .upgrade()
                     .ok_or_else(|| anyhow!("workspace was dropped"))?;
@@ -63,9 +64,29 @@ impl DocsSlashCommand {
             });
 
             if let Some((fs, cargo_workspace_root)) = index_provider_deps.log_err() {
-                indexed_docs_registry.register_provider(Box::new(RustdocIndexer::new(Box::new(
-                    LocalProvider::new(fs, cargo_workspace_root),
-                ))));
+                indexed_docs_registry.register_provider(Box::new(LocalRustdocProvider::new(
+                    fs,
+                    cargo_workspace_root,
+                )));
+            }
+        }
+
+        if indexed_docs_registry
+            .get_provider_store(DocsDotRsProvider::id())
+            .is_none()
+        {
+            let http_client = maybe!({
+                let workspace = workspace.ok_or_else(|| anyhow!("no workspace"))?;
+                let workspace = workspace
+                    .upgrade()
+                    .ok_or_else(|| anyhow!("workspace was dropped"))?;
+                let project = workspace.read(cx).project().clone();
+                anyhow::Ok(project.read(cx).client().http_client().clone())
+            });
+
+            if let Some(http_client) = http_client.log_err() {
+                indexed_docs_registry
+                    .register_provider(Box::new(DocsDotRsProvider::new(http_client)));
             }
         }
     }
@@ -95,7 +116,7 @@ impl SlashCommand for DocsSlashCommand {
         workspace: Option<WeakView<Workspace>>,
         cx: &mut AppContext,
     ) -> Task<Result<Vec<ArgumentCompletion>>> {
-        self.ensure_rustdoc_provider_is_registered(workspace, cx);
+        self.ensure_rust_doc_providers_are_registered(workspace, cx);
 
         let indexed_docs_registry = IndexedDocsRegistry::global(cx);
         let args = DocsSlashCommandArgs::parse(&query);

crates/indexed_docs/src/providers/rustdoc.rs 🔗

@@ -1,6 +1,7 @@
 mod item;
 mod to_markdown;
 
+use futures::future::BoxFuture;
 pub use item::*;
 pub use to_markdown::convert_rustdoc_to_markdown;
 
@@ -11,7 +12,7 @@ use anyhow::{bail, Context, Result};
 use async_trait::async_trait;
 use collections::{HashSet, VecDeque};
 use fs::Fs;
-use futures::AsyncReadExt;
+use futures::{AsyncReadExt, FutureExt};
 use http::{AsyncBody, HttpClient, HttpClientWithUrl};
 
 use crate::{IndexedDocsDatabase, IndexedDocsProvider, PackageName, ProviderId};
@@ -23,29 +24,28 @@ struct RustdocItemWithHistory {
     pub history: Vec<String>,
 }
 
-#[async_trait]
-pub trait RustdocProvider {
-    async fn fetch_page(
-        &self,
-        package: &PackageName,
-        item: Option<&RustdocItem>,
-    ) -> Result<Option<String>>;
+pub struct LocalRustdocProvider {
+    fs: Arc<dyn Fs>,
+    cargo_workspace_root: PathBuf,
 }
 
-pub struct RustdocIndexer {
-    provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
-}
+impl LocalRustdocProvider {
+    pub fn id() -> ProviderId {
+        ProviderId("rustdoc".into())
+    }
 
-impl RustdocIndexer {
-    pub fn new(provider: Box<dyn RustdocProvider + Send + Sync + 'static>) -> Self {
-        Self { provider }
+    pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
+        Self {
+            fs,
+            cargo_workspace_root,
+        }
     }
 }
 
 #[async_trait]
-impl IndexedDocsProvider for RustdocIndexer {
+impl IndexedDocsProvider for LocalRustdocProvider {
     fn id(&self) -> ProviderId {
-        ProviderId::rustdoc()
+        Self::id()
     }
 
     fn database_path(&self) -> PathBuf {
@@ -53,179 +53,190 @@ impl IndexedDocsProvider for RustdocIndexer {
     }
 
     async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
-        let Some(package_root_content) = self.provider.fetch_page(&package, None).await? else {
-            return Ok(());
-        };
-
-        let (crate_root_markdown, items) =
-            convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
-
-        database
-            .insert(package.to_string(), crate_root_markdown)
-            .await?;
-
-        let mut seen_items = HashSet::from_iter(items.clone());
-        let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
-            VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
-                item,
-                #[cfg(debug_assertions)]
-                history: Vec::new(),
-            }));
-
-        while let Some(item_with_history) = items_to_visit.pop_front() {
-            let item = &item_with_history.item;
-
-            let Some(result) = self
-                .provider
-                .fetch_page(&package, Some(&item))
-                .await
-                .with_context(|| {
-                    #[cfg(debug_assertions)]
-                    {
-                        format!(
-                            "failed to fetch {item:?}: {history:?}",
-                            history = item_with_history.history
-                        )
+        index_rustdoc(package, database, {
+            move |crate_name, item| {
+                let fs = self.fs.clone();
+                let cargo_workspace_root = self.cargo_workspace_root.clone();
+                let crate_name = crate_name.clone();
+                let item = item.cloned();
+                async move {
+                    let mut local_cargo_doc_path = cargo_workspace_root.join("target/doc");
+                    local_cargo_doc_path.push(crate_name.as_ref());
+
+                    if !fs.is_dir(&local_cargo_doc_path).await {
+                        bail!("docs directory for '{crate_name}' does not exist. run `cargo doc`");
                     }
 
-                    #[cfg(not(debug_assertions))]
-                    {
-                        format!("failed to fetch {item:?}")
+                    if let Some(item) = item {
+                        local_cargo_doc_path.push(item.url_path());
+                    } else {
+                        local_cargo_doc_path.push("index.html");
                     }
-                })?
-            else {
-                continue;
-            };
 
-            let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
+                    let Ok(contents) = fs.load(&local_cargo_doc_path).await else {
+                        return Ok(None);
+                    };
 
-            database
-                .insert(format!("{package}::{}", item.display()), markdown)
-                .await?;
-
-            let parent_item = item;
-            for mut item in referenced_items {
-                if seen_items.contains(&item) {
-                    continue;
-                }
-
-                seen_items.insert(item.clone());
-
-                item.path.extend(parent_item.path.clone());
-                match parent_item.kind {
-                    RustdocItemKind::Mod => {
-                        item.path.push(parent_item.name.clone());
-                    }
-                    _ => {}
+                    Ok(Some(contents))
                 }
-
-                items_to_visit.push_back(RustdocItemWithHistory {
-                    #[cfg(debug_assertions)]
-                    history: {
-                        let mut history = item_with_history.history.clone();
-                        history.push(item.url_path());
-                        history
-                    },
-                    item,
-                });
+                .boxed()
             }
-        }
-
-        Ok(())
+        })
+        .await
     }
 }
 
-pub struct LocalProvider {
-    fs: Arc<dyn Fs>,
-    cargo_workspace_root: PathBuf,
+pub struct DocsDotRsProvider {
+    http_client: Arc<HttpClientWithUrl>,
 }
 
-impl LocalProvider {
-    pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
-        Self {
-            fs,
-            cargo_workspace_root,
-        }
+impl DocsDotRsProvider {
+    pub fn id() -> ProviderId {
+        ProviderId("docs-rs".into())
+    }
+
+    pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
+        Self { http_client }
     }
 }
 
 #[async_trait]
-impl RustdocProvider for LocalProvider {
-    async fn fetch_page(
-        &self,
-        crate_name: &PackageName,
-        item: Option<&RustdocItem>,
-    ) -> Result<Option<String>> {
-        let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
-        local_cargo_doc_path.push(crate_name.as_ref());
-
-        if !self.fs.is_dir(&local_cargo_doc_path).await {
-            bail!("docs directory for '{crate_name}' does not exist. run `cargo doc`");
-        }
+impl IndexedDocsProvider for DocsDotRsProvider {
+    fn id(&self) -> ProviderId {
+        Self::id()
+    }
 
-        if let Some(item) = item {
-            local_cargo_doc_path.push(item.url_path());
-        } else {
-            local_cargo_doc_path.push("index.html");
-        }
+    fn database_path(&self) -> PathBuf {
+        paths::support_dir().join("docs/rust/docs-rs-db.1.mdb")
+    }
 
-        let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
-            return Ok(None);
-        };
+    async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
+        index_rustdoc(package, database, {
+            move |crate_name, item| {
+                let http_client = self.http_client.clone();
+                let crate_name = crate_name.clone();
+                let item = item.cloned();
+                async move {
+                    let version = "latest";
+                    let path = format!(
+                        "{crate_name}/{version}/{crate_name}{item_path}",
+                        item_path = item
+                            .map(|item| format!("/{}", item.url_path()))
+                            .unwrap_or_default()
+                    );
+
+                    let mut response = http_client
+                        .get(
+                            &format!("https://docs.rs/{path}"),
+                            AsyncBody::default(),
+                            true,
+                        )
+                        .await?;
+
+                    let mut body = Vec::new();
+                    response
+                        .body_mut()
+                        .read_to_end(&mut body)
+                        .await
+                        .context("error reading docs.rs response body")?;
+
+                    if response.status().is_client_error() {
+                        let text = String::from_utf8_lossy(body.as_slice());
+                        bail!(
+                            "status error {}, response: {text:?}",
+                            response.status().as_u16()
+                        );
+                    }
 
-        Ok(Some(contents))
+                    Ok(Some(String::from_utf8(body)?))
+                }
+                .boxed()
+            }
+        })
+        .await
     }
 }
 
-pub struct DocsDotRsProvider {
-    http_client: Arc<HttpClientWithUrl>,
-}
+async fn index_rustdoc(
+    package: PackageName,
+    database: Arc<IndexedDocsDatabase>,
+    fetch_page: impl Fn(&PackageName, Option<&RustdocItem>) -> BoxFuture<'static, Result<Option<String>>>
+        + Send
+        + Sync,
+) -> Result<()> {
+    let Some(package_root_content) = fetch_page(&package, None).await? else {
+        return Ok(());
+    };
+
+    let (crate_root_markdown, items) =
+        convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
+
+    database
+        .insert(package.to_string(), crate_root_markdown)
+        .await?;
+
+    let mut seen_items = HashSet::from_iter(items.clone());
+    let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
+        VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
+            item,
+            #[cfg(debug_assertions)]
+            history: Vec::new(),
+        }));
+
+    while let Some(item_with_history) = items_to_visit.pop_front() {
+        let item = &item_with_history.item;
+
+        let Some(result) = fetch_page(&package, Some(&item)).await.with_context(|| {
+            #[cfg(debug_assertions)]
+            {
+                format!(
+                    "failed to fetch {item:?}: {history:?}",
+                    history = item_with_history.history
+                )
+            }
 
-impl DocsDotRsProvider {
-    pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
-        Self { http_client }
-    }
-}
+            #[cfg(not(debug_assertions))]
+            {
+                format!("failed to fetch {item:?}")
+            }
+        })?
+        else {
+            continue;
+        };
 
-#[async_trait]
-impl RustdocProvider for DocsDotRsProvider {
-    async fn fetch_page(
-        &self,
-        crate_name: &PackageName,
-        item: Option<&RustdocItem>,
-    ) -> Result<Option<String>> {
-        let version = "latest";
-        let path = format!(
-            "{crate_name}/{version}/{crate_name}{item_path}",
-            item_path = item
-                .map(|item| format!("/{}", item.url_path()))
-                .unwrap_or_default()
-        );
-
-        let mut response = self
-            .http_client
-            .get(
-                &format!("https://docs.rs/{path}"),
-                AsyncBody::default(),
-                true,
-            )
+        let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
+
+        database
+            .insert(format!("{package}::{}", item.display()), markdown)
             .await?;
 
-        let mut body = Vec::new();
-        response
-            .body_mut()
-            .read_to_end(&mut body)
-            .await
-            .context("error reading docs.rs response body")?;
-
-        if response.status().is_client_error() {
-            let text = String::from_utf8_lossy(body.as_slice());
-            bail!(
-                "status error {}, response: {text:?}",
-                response.status().as_u16()
-            );
-        }
+        let parent_item = item;
+        for mut item in referenced_items {
+            if seen_items.contains(&item) {
+                continue;
+            }
+
+            seen_items.insert(item.clone());
+
+            item.path.extend(parent_item.path.clone());
+            match parent_item.kind {
+                RustdocItemKind::Mod => {
+                    item.path.push(parent_item.name.clone());
+                }
+                _ => {}
+            }
 
-        Ok(Some(String::from_utf8(body)?))
+            items_to_visit.push_back(RustdocItemWithHistory {
+                #[cfg(debug_assertions)]
+                history: {
+                    let mut history = item_with_history.history.clone();
+                    history.push(item.url_path());
+                    history
+                },
+                item,
+            });
+        }
     }
+
+    Ok(())
 }

crates/indexed_docs/src/store.rs 🔗

@@ -21,12 +21,6 @@ use crate::IndexedDocsRegistry;
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
 pub struct ProviderId(pub Arc<str>);
 
-impl ProviderId {
-    pub fn rustdoc() -> Self {
-        Self("rustdoc".into())
-    }
-}
-
 /// The name of a package.
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
 pub struct PackageName(Arc<str>);