Start work on genericizing `/rustdoc` (#13745)

Marshall Bowers created

This PR begins the process of making the backing infrastructure for the
`/rustdoc` command more generic such that it can be applied to
additional documentation providers.

In this PR we:

- Rename the `rustdoc` crate to `indexed_docs` as a more general-purpose
name
- Start moving rustdoc-specific functionality into
`indexed_docs::providers::rustdoc`
- Add an `IndexedDocsRegistry` to hold multiple `IndexedDocsStore`s (one
per provider)

We haven't yet removed the rustdoc-specific bits in the `DocsIndexer`.
That will follow soon.

Release Notes:

- N/A

Change summary

Cargo.lock                                               |  52 +-
Cargo.toml                                               |   4 
crates/assistant/Cargo.toml                              |   4 
crates/assistant/src/assistant.rs                        |  11 
crates/assistant/src/assistant_panel.rs                  |   8 
crates/assistant/src/slash_command/rustdoc_command.rs    |  18 
crates/indexed_docs/Cargo.toml                           |   4 
crates/indexed_docs/LICENSE-GPL                          |   0 
crates/indexed_docs/src/indexed_docs.rs                  |   8 
crates/indexed_docs/src/indexer.rs                       | 122 +++++
crates/indexed_docs/src/providers.rs                     |   1 
crates/indexed_docs/src/providers/rustdoc.rs             | 117 +++++
crates/indexed_docs/src/providers/rustdoc/item.rs        |   0 
crates/indexed_docs/src/providers/rustdoc/to_markdown.rs |   0 
crates/indexed_docs/src/registry.rs                      |  47 ++
crates/indexed_docs/src/store.rs                         | 149 +++---
crates/rustdoc/src/indexer.rs                            | 226 ----------
crates/rustdoc/src/rustdoc.rs                            |   9 
18 files changed, 425 insertions(+), 355 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -391,6 +391,7 @@ dependencies = [
  "heed",
  "html_to_markdown 0.1.0",
  "http 0.1.0",
+ "indexed_docs",
  "indoc",
  "language",
  "log",
@@ -406,7 +407,6 @@ dependencies = [
  "rand 0.8.5",
  "regex",
  "rope",
- "rustdoc",
  "schemars",
  "search",
  "semantic_index",
@@ -5493,6 +5493,31 @@ version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "44feda355f4159a7c757171a77de25daf6411e217b4cabd03bd6650690468126"
 
+[[package]]
+name = "indexed_docs"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "collections",
+ "derive_more",
+ "fs",
+ "futures 0.3.28",
+ "fuzzy",
+ "gpui",
+ "heed",
+ "html_to_markdown 0.1.0",
+ "http 0.1.0",
+ "indexmap 1.9.3",
+ "indoc",
+ "parking_lot",
+ "paths",
+ "pretty_assertions",
+ "serde",
+ "strum",
+ "util",
+]
+
 [[package]]
 name = "indexmap"
 version = "1.9.3"
@@ -9019,31 +9044,6 @@ dependencies = [
  "semver",
 ]
 
-[[package]]
-name = "rustdoc"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "async-trait",
- "collections",
- "derive_more",
- "fs",
- "futures 0.3.28",
- "fuzzy",
- "gpui",
- "heed",
- "html_to_markdown 0.1.0",
- "http 0.1.0",
- "indexmap 1.9.3",
- "indoc",
- "parking_lot",
- "paths",
- "pretty_assertions",
- "serde",
- "strum",
- "util",
-]
-
 [[package]]
 name = "rustix"
 version = "0.37.23"

Cargo.toml 🔗

@@ -45,6 +45,7 @@ members = [
     "crates/html_to_markdown",
     "crates/http",
     "crates/image_viewer",
+    "crates/indexed_docs",
     "crates/inline_completion_button",
     "crates/install_cli",
     "crates/journal",
@@ -82,7 +83,6 @@ members = [
     "crates/rich_text",
     "crates/rope",
     "crates/rpc",
-    "crates/rustdoc",
     "crates/search",
     "crates/semantic_index",
     "crates/semantic_version",
@@ -198,6 +198,7 @@ headless = { path = "crates/headless" }
 html_to_markdown = { path = "crates/html_to_markdown" }
 http = { path = "crates/http" }
 image_viewer = { path = "crates/image_viewer" }
+indexed_docs = { path = "crates/indexed_docs" }
 inline_completion_button = { path = "crates/inline_completion_button" }
 install_cli = { path = "crates/install_cli" }
 journal = { path = "crates/journal" }
@@ -235,7 +236,6 @@ repl = { path = "crates/repl" }
 rich_text = { path = "crates/rich_text" }
 rope = { path = "crates/rope" }
 rpc = { path = "crates/rpc" }
-rustdoc = { path = "crates/rustdoc" }
 search = { path = "crates/search" }
 semantic_index = { path = "crates/semantic_index" }
 semantic_version = { path = "crates/semantic_version" }

crates/assistant/Cargo.toml 🔗

@@ -13,8 +13,8 @@ path = "src/assistant.rs"
 doctest = false
 
 [dependencies]
-anyhow.workspace = true
 anthropic = { workspace = true, features = ["schemars"] }
+anyhow.workspace = true
 assistant_slash_command.workspace = true
 async-watch.workspace = true
 cargo_toml.workspace = true
@@ -32,6 +32,7 @@ gpui.workspace = true
 heed.workspace = true
 html_to_markdown.workspace = true
 http.workspace = true
+indexed_docs.workspace = true
 indoc.workspace = true
 language.workspace = true
 log.workspace = true
@@ -45,7 +46,6 @@ paths.workspace = true
 project.workspace = true
 regex.workspace = true
 rope.workspace = true
-rustdoc.workspace = true
 schemars.workspace = true
 search.workspace = true
 semantic_index.workspace = true

crates/assistant/src/assistant.rs 🔗

@@ -20,9 +20,9 @@ pub(crate) use completion_provider::*;
 pub(crate) use context_store::*;
 use fs::Fs;
 use gpui::{actions, AppContext, Global, SharedString, UpdateGlobal};
+use indexed_docs::{IndexedDocsRegistry, Provider};
 pub(crate) use inline_assistant::*;
 pub(crate) use model_selector::*;
-use rustdoc::RustdocStore;
 use semantic_index::{CloudEmbeddingProvider, SemanticIndex};
 use serde::{Deserialize, Serialize};
 use settings::{Settings, SettingsStore};
@@ -292,7 +292,8 @@ pub fn init(fs: Arc<dyn Fs>, client: Arc<Client>, cx: &mut AppContext) {
     assistant_panel::init(cx);
     inline_assistant::init(fs.clone(), client.telemetry().clone(), cx);
     terminal_inline_assistant::init(fs.clone(), client.telemetry().clone(), cx);
-    RustdocStore::init_global(cx);
+    IndexedDocsRegistry::init_global(cx);
+    register_indexed_docs_providers(cx);
 
     CommandPaletteFilter::update_global(cx, |filter, _cx| {
         filter.hide_namespace(Assistant::NAMESPACE);
@@ -327,6 +328,12 @@ fn register_slash_commands(cx: &mut AppContext) {
     slash_command_registry.register_command(fetch_command::FetchSlashCommand, false);
 }
 
+fn register_indexed_docs_providers(cx: &mut AppContext) {
+    let indexed_docs_registry = IndexedDocsRegistry::global(cx);
+
+    indexed_docs_registry.register_provider(Provider::rustdoc());
+}
+
 pub fn humanize_token_count(count: usize) -> String {
     match count {
         0..=999 => count.to_string(),

crates/assistant/src/assistant_panel.rs 🔗

@@ -39,6 +39,7 @@ use gpui::{
     Subscription, Task, Transformation, UpdateGlobal, View, ViewContext, VisualContext, WeakView,
     WindowContext,
 };
+use indexed_docs::{IndexedDocsStore, PackageName, ProviderId};
 use language::{
     language_settings::SoftWrap, AnchorRangeExt as _, AutoindentMode, Buffer, LanguageRegistry,
     LspAdapterDelegate, OffsetRangeExt as _, Point, ToOffset as _,
@@ -47,7 +48,6 @@ use multi_buffer::MultiBufferRow;
 use paths::contexts_dir;
 use picker::{Picker, PickerDelegate};
 use project::{Project, ProjectLspAdapterDelegate, ProjectTransaction};
-use rustdoc::{CrateName, RustdocStore};
 use search::{buffer_search::DivRegistrar, BufferSearchBar};
 use settings::Settings;
 use std::{
@@ -3410,7 +3410,9 @@ fn render_rustdoc_slash_command_trailer(
     command: PendingSlashCommand,
     cx: &mut WindowContext,
 ) -> AnyElement {
-    let rustdoc_store = RustdocStore::global(cx);
+    let Some(rustdoc_store) = IndexedDocsStore::try_global(ProviderId::rustdoc(), cx).ok() else {
+        return Empty.into_any();
+    };
 
     let Some((crate_name, _)) = command
         .argument
@@ -3420,7 +3422,7 @@ fn render_rustdoc_slash_command_trailer(
         return Empty.into_any();
     };
 
-    let crate_name = CrateName::from(crate_name);
+    let crate_name = PackageName::from(crate_name);
     if !rustdoc_store.is_indexing(&crate_name) {
         return Empty.into_any();
     }

crates/assistant/src/slash_command/rustdoc_command.rs 🔗

@@ -8,9 +8,12 @@ use fs::Fs;
 use futures::AsyncReadExt;
 use gpui::{AppContext, Model, Task, WeakView};
 use http::{AsyncBody, HttpClient, HttpClientWithUrl};
+use indexed_docs::{
+    convert_rustdoc_to_markdown, IndexedDocsStore, LocalProvider, PackageName, ProviderId,
+    RustdocSource,
+};
 use language::LspAdapterDelegate;
 use project::{Project, ProjectPath};
-use rustdoc::{convert_rustdoc_to_markdown, CrateName, LocalProvider, RustdocSource, RustdocStore};
 use ui::prelude::*;
 use util::{maybe, ResultExt};
 use workspace::Workspace;
@@ -21,7 +24,7 @@ impl RustdocSlashCommand {
     async fn build_message(
         fs: Arc<dyn Fs>,
         http_client: Arc<HttpClientWithUrl>,
-        crate_name: CrateName,
+        crate_name: PackageName,
         module_path: Vec<String>,
         path_to_cargo_toml: Option<&Path>,
     ) -> Result<(RustdocSource, String)> {
@@ -127,8 +130,10 @@ impl SlashCommand for RustdocSlashCommand {
             anyhow::Ok((fs, cargo_workspace_root))
         });
 
-        let store = RustdocStore::global(cx);
+        let store = IndexedDocsStore::try_global(ProviderId::rustdoc(), cx);
         cx.background_executor().spawn(async move {
+            let store = store?;
+
             if let Some((crate_name, rest)) = query.split_once(':') {
                 if rest.is_empty() {
                     if let Some((fs, cargo_workspace_root)) = index_provider_deps.log_err() {
@@ -169,16 +174,17 @@ impl SlashCommand for RustdocSlashCommand {
             .next()
             .ok_or_else(|| anyhow!("missing crate name"))
         {
-            Ok(crate_name) => CrateName::from(crate_name),
+            Ok(crate_name) => PackageName::from(crate_name),
             Err(err) => return Task::ready(Err(err)),
         };
         let item_path = path_components.map(ToString::to_string).collect::<Vec<_>>();
 
         let text = cx.background_executor().spawn({
-            let rustdoc_store = RustdocStore::global(cx);
+            let rustdoc_store = IndexedDocsStore::try_global(ProviderId::rustdoc(), cx);
             let crate_name = crate_name.clone();
             let item_path = item_path.clone();
             async move {
+                let rustdoc_store = rustdoc_store?;
                 let item_docs = rustdoc_store
                     .load(
                         crate_name.clone(),
@@ -191,7 +197,7 @@ impl SlashCommand for RustdocSlashCommand {
                     .await;
 
                 if let Ok(item_docs) = item_docs {
-                    anyhow::Ok((RustdocSource::Index, item_docs.docs().to_owned()))
+                    anyhow::Ok((RustdocSource::Index, item_docs.to_string()))
                 } else {
                     Self::build_message(
                         fs,

crates/rustdoc/Cargo.toml → crates/indexed_docs/Cargo.toml 🔗

@@ -1,5 +1,5 @@
 [package]
-name = "rustdoc"
+name = "indexed_docs"
 version = "0.1.0"
 edition = "2021"
 publish = false
@@ -9,7 +9,7 @@ license = "GPL-3.0-or-later"
 workspace = true
 
 [lib]
-path = "src/rustdoc.rs"
+path = "src/indexed_docs.rs"
 
 [dependencies]
 anyhow.workspace = true

crates/indexed_docs/src/indexed_docs.rs 🔗

@@ -0,0 +1,8 @@
+mod indexer;
+mod providers;
+mod registry;
+mod store;
+
+pub use crate::providers::rustdoc::*;
+pub use crate::registry::*;
+pub use crate::store::*;

crates/indexed_docs/src/indexer.rs 🔗

@@ -0,0 +1,122 @@
+use std::sync::Arc;
+
+use anyhow::{Context, Result};
+use async_trait::async_trait;
+use collections::{HashSet, VecDeque};
+
+use crate::{
+    convert_rustdoc_to_markdown, IndexedDocsDatabase, PackageName, RustdocItem, RustdocItemKind,
+};
+
+#[async_trait]
+pub trait IndexedDocsProvider {
+    async fn fetch_page(
+        &self,
+        package: &PackageName,
+        item: Option<&RustdocItem>,
+    ) -> Result<Option<String>>;
+}
+
+#[derive(Debug)]
+struct RustdocItemWithHistory {
+    pub item: RustdocItem,
+    #[cfg(debug_assertions)]
+    pub history: Vec<String>,
+}
+
+pub(crate) struct DocsIndexer {
+    database: Arc<IndexedDocsDatabase>,
+    provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
+}
+
+impl DocsIndexer {
+    pub fn new(
+        database: Arc<IndexedDocsDatabase>,
+        provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
+    ) -> Self {
+        Self { database, provider }
+    }
+
+    /// Indexes the package with the given name.
+    pub async fn index(&self, package: PackageName) -> Result<()> {
+        let Some(package_root_content) = self.provider.fetch_page(&package, None).await? else {
+            return Ok(());
+        };
+
+        let (crate_root_markdown, items) =
+            convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
+
+        self.database
+            .insert(package.clone(), None, crate_root_markdown)
+            .await?;
+
+        let mut seen_items = HashSet::from_iter(items.clone());
+        let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
+            VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
+                item,
+                #[cfg(debug_assertions)]
+                history: Vec::new(),
+            }));
+
+        while let Some(item_with_history) = items_to_visit.pop_front() {
+            let item = &item_with_history.item;
+
+            let Some(result) = self
+                .provider
+                .fetch_page(&package, Some(&item))
+                .await
+                .with_context(|| {
+                    #[cfg(debug_assertions)]
+                    {
+                        format!(
+                            "failed to fetch {item:?}: {history:?}",
+                            history = item_with_history.history
+                        )
+                    }
+
+                    #[cfg(not(debug_assertions))]
+                    {
+                        format!("failed to fetch {item:?}")
+                    }
+                })?
+            else {
+                continue;
+            };
+
+            let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
+
+            self.database
+                .insert(package.clone(), Some(item), markdown)
+                .await?;
+
+            let parent_item = item;
+            for mut item in referenced_items {
+                if seen_items.contains(&item) {
+                    continue;
+                }
+
+                seen_items.insert(item.clone());
+
+                item.path.extend(parent_item.path.clone());
+                match parent_item.kind {
+                    RustdocItemKind::Mod => {
+                        item.path.push(parent_item.name.clone());
+                    }
+                    _ => {}
+                }
+
+                items_to_visit.push_back(RustdocItemWithHistory {
+                    #[cfg(debug_assertions)]
+                    history: {
+                        let mut history = item_with_history.history.clone();
+                        history.push(item.url_path());
+                        history
+                    },
+                    item,
+                });
+            }
+        }
+
+        Ok(())
+    }
+}

crates/indexed_docs/src/providers/rustdoc.rs 🔗

@@ -0,0 +1,117 @@
+mod item;
+mod to_markdown;
+
+pub use item::*;
+pub use to_markdown::convert_rustdoc_to_markdown;
+
+use std::path::PathBuf;
+use std::sync::Arc;
+
+use anyhow::{bail, Context, Result};
+use async_trait::async_trait;
+use fs::Fs;
+use futures::AsyncReadExt;
+use http::{AsyncBody, HttpClient, HttpClientWithUrl};
+
+use crate::indexer::IndexedDocsProvider;
+use crate::PackageName;
+
+#[derive(Debug, Clone, Copy)]
+pub enum RustdocSource {
+    /// The docs were sourced from Zed's rustdoc index.
+    Index,
+    /// The docs were sourced from local `cargo doc` output.
+    Local,
+    /// The docs were sourced from `docs.rs`.
+    DocsDotRs,
+}
+
+pub struct LocalProvider {
+    fs: Arc<dyn Fs>,
+    cargo_workspace_root: PathBuf,
+}
+
+impl LocalProvider {
+    pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
+        Self {
+            fs,
+            cargo_workspace_root,
+        }
+    }
+}
+
+#[async_trait]
+impl IndexedDocsProvider for LocalProvider {
+    async fn fetch_page(
+        &self,
+        crate_name: &PackageName,
+        item: Option<&RustdocItem>,
+    ) -> Result<Option<String>> {
+        let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
+        local_cargo_doc_path.push(crate_name.as_ref());
+        if let Some(item) = item {
+            local_cargo_doc_path.push(item.url_path());
+        } else {
+            local_cargo_doc_path.push("index.html");
+        }
+
+        let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
+            return Ok(None);
+        };
+
+        Ok(Some(contents))
+    }
+}
+
+pub struct DocsDotRsProvider {
+    http_client: Arc<HttpClientWithUrl>,
+}
+
+impl DocsDotRsProvider {
+    pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
+        Self { http_client }
+    }
+}
+
+#[async_trait]
+impl IndexedDocsProvider for DocsDotRsProvider {
+    async fn fetch_page(
+        &self,
+        crate_name: &PackageName,
+        item: Option<&RustdocItem>,
+    ) -> Result<Option<String>> {
+        let version = "latest";
+        let path = format!(
+            "{crate_name}/{version}/{crate_name}{item_path}",
+            item_path = item
+                .map(|item| format!("/{}", item.url_path()))
+                .unwrap_or_default()
+        );
+
+        let mut response = self
+            .http_client
+            .get(
+                &format!("https://docs.rs/{path}"),
+                AsyncBody::default(),
+                true,
+            )
+            .await?;
+
+        let mut body = Vec::new();
+        response
+            .body_mut()
+            .read_to_end(&mut body)
+            .await
+            .context("error reading docs.rs response body")?;
+
+        if response.status().is_client_error() {
+            let text = String::from_utf8_lossy(body.as_slice());
+            bail!(
+                "status error {}, response: {text:?}",
+                response.status().as_u16()
+            );
+        }
+
+        Ok(Some(String::from_utf8(body)?))
+    }
+}

crates/indexed_docs/src/registry.rs 🔗

@@ -0,0 +1,47 @@
+use std::sync::Arc;
+
+use collections::HashMap;
+use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, UpdateGlobal};
+use parking_lot::RwLock;
+
+use crate::{IndexedDocsStore, Provider, ProviderId};
+
+struct GlobalIndexedDocsRegistry(Arc<IndexedDocsRegistry>);
+
+impl Global for GlobalIndexedDocsRegistry {}
+
+pub struct IndexedDocsRegistry {
+    executor: BackgroundExecutor,
+    stores_by_provider: RwLock<HashMap<ProviderId, Arc<IndexedDocsStore>>>,
+}
+
+impl IndexedDocsRegistry {
+    pub fn global(cx: &AppContext) -> Arc<Self> {
+        GlobalIndexedDocsRegistry::global(cx).0.clone()
+    }
+
+    pub fn init_global(cx: &mut AppContext) {
+        GlobalIndexedDocsRegistry::set_global(
+            cx,
+            GlobalIndexedDocsRegistry(Arc::new(Self::new(cx.background_executor().clone()))),
+        );
+    }
+
+    pub fn new(executor: BackgroundExecutor) -> Self {
+        Self {
+            executor,
+            stores_by_provider: RwLock::new(HashMap::default()),
+        }
+    }
+
+    pub fn register_provider(&self, provider: Provider) {
+        self.stores_by_provider.write().insert(
+            provider.id.clone(),
+            Arc::new(IndexedDocsStore::new(provider, self.executor.clone())),
+        );
+    }
+
+    pub fn get_provider_store(&self, provider_id: ProviderId) -> Option<Arc<IndexedDocsStore>> {
+        self.stores_by_provider.read().get(&provider_id).cloned()
+    }
+}

crates/rustdoc/src/store.rs → crates/indexed_docs/src/store.rs 🔗

@@ -8,59 +8,71 @@ use derive_more::{Deref, Display};
 use futures::future::{self, BoxFuture, Shared};
 use futures::FutureExt;
 use fuzzy::StringMatchCandidate;
-use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, Task, UpdateGlobal};
+use gpui::{AppContext, BackgroundExecutor, Task};
 use heed::types::SerdeBincode;
 use heed::Database;
 use parking_lot::RwLock;
 use serde::{Deserialize, Serialize};
 use util::ResultExt;
 
-use crate::indexer::{RustdocIndexer, RustdocProvider};
-use crate::{RustdocItem, RustdocItemKind};
+use crate::indexer::{DocsIndexer, IndexedDocsProvider};
+use crate::{IndexedDocsRegistry, RustdocItem};
 
-/// The name of a crate.
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
-pub struct CrateName(Arc<str>);
+pub struct ProviderId(Arc<str>);
 
-impl From<&str> for CrateName {
-    fn from(value: &str) -> Self {
-        Self(value.into())
+impl ProviderId {
+    pub fn rustdoc() -> Self {
+        Self("rustdoc".into())
     }
 }
 
-struct GlobalRustdocStore(Arc<RustdocStore>);
-
-impl Global for GlobalRustdocStore {}
+pub struct Provider {
+    pub id: ProviderId,
+    pub database_path: PathBuf,
+}
 
-pub struct RustdocStore {
-    executor: BackgroundExecutor,
-    database_future: Shared<BoxFuture<'static, Result<Arc<RustdocDatabase>, Arc<anyhow::Error>>>>,
-    indexing_tasks_by_crate:
-        RwLock<HashMap<CrateName, Shared<Task<Result<(), Arc<anyhow::Error>>>>>>,
+impl Provider {
+    pub fn rustdoc() -> Self {
+        Self {
+            id: ProviderId("rustdoc".into()),
+            database_path: paths::support_dir().join("docs/rust/rustdoc-db.1.mdb"),
+        }
+    }
 }
 
-impl RustdocStore {
-    pub fn global(cx: &AppContext) -> Arc<Self> {
-        GlobalRustdocStore::global(cx).0.clone()
+/// The name of a package.
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
+pub struct PackageName(Arc<str>);
+
+impl From<&str> for PackageName {
+    fn from(value: &str) -> Self {
+        Self(value.into())
     }
+}
+
+/// A store for indexed docs.
+pub struct IndexedDocsStore {
+    executor: BackgroundExecutor,
+    database_future:
+        Shared<BoxFuture<'static, Result<Arc<IndexedDocsDatabase>, Arc<anyhow::Error>>>>,
+    indexing_tasks_by_package:
+        RwLock<HashMap<PackageName, Shared<Task<Result<(), Arc<anyhow::Error>>>>>>,
+}
 
-    pub fn init_global(cx: &mut AppContext) {
-        GlobalRustdocStore::set_global(
-            cx,
-            GlobalRustdocStore(Arc::new(Self::new(cx.background_executor().clone()))),
-        );
+impl IndexedDocsStore {
+    pub fn try_global(provider: ProviderId, cx: &AppContext) -> Result<Arc<Self>> {
+        let registry = IndexedDocsRegistry::global(cx);
+        registry
+            .get_provider_store(provider.clone())
+            .ok_or_else(|| anyhow!("no indexed docs store found for {provider}"))
     }
 
-    pub fn new(executor: BackgroundExecutor) -> Self {
+    pub fn new(provider: Provider, executor: BackgroundExecutor) -> Self {
         let database_future = executor
             .spawn({
                 let executor = executor.clone();
-                async move {
-                    RustdocDatabase::new(
-                        paths::support_dir().join("docs/rust/rustdoc-db.0.mdb"),
-                        executor,
-                    )
-                }
+                async move { IndexedDocsDatabase::new(provider.database_path, executor) }
             })
             .then(|result| future::ready(result.map(Arc::new).map_err(Arc::new)))
             .boxed()
@@ -69,34 +81,34 @@ impl RustdocStore {
         Self {
             executor,
             database_future,
-            indexing_tasks_by_crate: RwLock::new(HashMap::default()),
+            indexing_tasks_by_package: RwLock::new(HashMap::default()),
         }
     }
 
-    /// Returns whether the crate with the given name is currently being indexed.
-    pub fn is_indexing(&self, crate_name: &CrateName) -> bool {
-        self.indexing_tasks_by_crate.read().contains_key(crate_name)
+    /// Returns whether the package with the given name is currently being indexed.
+    pub fn is_indexing(&self, package: &PackageName) -> bool {
+        self.indexing_tasks_by_package.read().contains_key(package)
     }
 
     pub async fn load(
         &self,
-        crate_name: CrateName,
+        package: PackageName,
         item_path: Option<String>,
-    ) -> Result<RustdocDatabaseEntry> {
+    ) -> Result<MarkdownDocs> {
         self.database_future
             .clone()
             .await
             .map_err(|err| anyhow!(err))?
-            .load(crate_name, item_path)
+            .load(package, item_path)
             .await
     }
 
     pub fn index(
         self: Arc<Self>,
-        crate_name: CrateName,
-        provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
+        package: PackageName,
+        provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
     ) -> Shared<Task<Result<(), Arc<anyhow::Error>>>> {
-        if let Some(existing_task) = self.indexing_tasks_by_crate.read().get(&crate_name) {
+        if let Some(existing_task) = self.indexing_tasks_by_package.read().get(&package) {
             return existing_task.clone();
         }
 
@@ -104,13 +116,13 @@ impl RustdocStore {
             .executor
             .spawn({
                 let this = self.clone();
-                let crate_name = crate_name.clone();
+                let package = package.clone();
                 async move {
                     let _finally = util::defer({
                         let this = this.clone();
-                        let crate_name = crate_name.clone();
+                        let package = package.clone();
                         move || {
-                            this.indexing_tasks_by_crate.write().remove(&crate_name);
+                            this.indexing_tasks_by_package.write().remove(&package);
                         }
                     });
 
@@ -120,9 +132,9 @@ impl RustdocStore {
                             .clone()
                             .await
                             .map_err(|err| anyhow!(err))?;
-                        let indexer = RustdocIndexer::new(database, provider);
+                        let indexer = DocsIndexer::new(database, provider);
 
-                        indexer.index(crate_name.clone()).await
+                        indexer.index(package.clone()).await
                     };
 
                     index_task.await.map_err(Arc::new)
@@ -130,9 +142,9 @@ impl RustdocStore {
             })
             .shared();
 
-        self.indexing_tasks_by_crate
+        self.indexing_tasks_by_package
             .write()
-            .insert(crate_name, indexing_task.clone());
+            .insert(package, indexing_task.clone());
 
         indexing_task
     }
@@ -177,27 +189,16 @@ impl RustdocStore {
     }
 }
 
-#[derive(Serialize, Deserialize)]
-pub enum RustdocDatabaseEntry {
-    Crate { docs: String },
-    Item { kind: RustdocItemKind, docs: String },
-}
-
-impl RustdocDatabaseEntry {
-    pub fn docs(&self) -> &str {
-        match self {
-            Self::Crate { docs } | Self::Item { docs, .. } => &docs,
-        }
-    }
-}
+#[derive(Debug, PartialEq, Eq, Clone, Display, Serialize, Deserialize)]
+pub struct MarkdownDocs(pub String);
 
-pub(crate) struct RustdocDatabase {
+pub(crate) struct IndexedDocsDatabase {
     executor: BackgroundExecutor,
     env: heed::Env,
-    entries: Database<SerdeBincode<String>, SerdeBincode<RustdocDatabaseEntry>>,
+    entries: Database<SerdeBincode<String>, SerdeBincode<MarkdownDocs>>,
 }
 
-impl RustdocDatabase {
+impl IndexedDocsDatabase {
     pub fn new(path: PathBuf, executor: BackgroundExecutor) -> Result<Self> {
         std::fs::create_dir_all(&path)?;
 
@@ -238,15 +239,15 @@ impl RustdocDatabase {
 
     pub fn load(
         &self,
-        crate_name: CrateName,
+        package: PackageName,
         item_path: Option<String>,
-    ) -> Task<Result<RustdocDatabaseEntry>> {
+    ) -> Task<Result<MarkdownDocs>> {
         let env = self.env.clone();
         let entries = self.entries;
         let item_path = if let Some(item_path) = item_path {
-            format!("{crate_name}::{item_path}")
+            format!("{package}::{item_path}")
         } else {
-            crate_name.to_string()
+            package.to_string()
         };
 
         self.executor.spawn(async move {
@@ -259,22 +260,16 @@ impl RustdocDatabase {
 
     pub fn insert(
         &self,
-        crate_name: CrateName,
+        package: PackageName,
         item: Option<&RustdocItem>,
         docs: String,
     ) -> Task<Result<()>> {
         let env = self.env.clone();
         let entries = self.entries;
         let (item_path, entry) = if let Some(item) = item {
-            (
-                format!("{crate_name}::{}", item.display()),
-                RustdocDatabaseEntry::Item {
-                    kind: item.kind,
-                    docs,
-                },
-            )
+            (format!("{package}::{}", item.display()), MarkdownDocs(docs))
         } else {
-            (crate_name.to_string(), RustdocDatabaseEntry::Crate { docs })
+            (package.to_string(), MarkdownDocs(docs))
         };
 
         self.executor.spawn(async move {

crates/rustdoc/src/indexer.rs 🔗

@@ -1,226 +0,0 @@
-use std::path::PathBuf;
-use std::sync::Arc;
-
-use anyhow::{bail, Context, Result};
-use async_trait::async_trait;
-use collections::{HashSet, VecDeque};
-use fs::Fs;
-use futures::AsyncReadExt;
-use http::{AsyncBody, HttpClient, HttpClientWithUrl};
-
-use crate::{
-    convert_rustdoc_to_markdown, CrateName, RustdocDatabase, RustdocItem, RustdocItemKind,
-};
-
-#[derive(Debug, Clone, Copy)]
-pub enum RustdocSource {
-    /// The docs were sourced from Zed's rustdoc index.
-    Index,
-    /// The docs were sourced from local `cargo doc` output.
-    Local,
-    /// The docs were sourced from `docs.rs`.
-    DocsDotRs,
-}
-
-#[async_trait]
-pub trait RustdocProvider {
-    async fn fetch_page(
-        &self,
-        crate_name: &CrateName,
-        item: Option<&RustdocItem>,
-    ) -> Result<Option<String>>;
-}
-
-pub struct LocalProvider {
-    fs: Arc<dyn Fs>,
-    cargo_workspace_root: PathBuf,
-}
-
-impl LocalProvider {
-    pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
-        Self {
-            fs,
-            cargo_workspace_root,
-        }
-    }
-}
-
-#[async_trait]
-impl RustdocProvider for LocalProvider {
-    async fn fetch_page(
-        &self,
-        crate_name: &CrateName,
-        item: Option<&RustdocItem>,
-    ) -> Result<Option<String>> {
-        let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
-        local_cargo_doc_path.push(crate_name.as_ref());
-        if let Some(item) = item {
-            local_cargo_doc_path.push(item.url_path());
-        } else {
-            local_cargo_doc_path.push("index.html");
-        }
-
-        let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
-            return Ok(None);
-        };
-
-        Ok(Some(contents))
-    }
-}
-
-pub struct DocsDotRsProvider {
-    http_client: Arc<HttpClientWithUrl>,
-}
-
-impl DocsDotRsProvider {
-    pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
-        Self { http_client }
-    }
-}
-
-#[async_trait]
-impl RustdocProvider for DocsDotRsProvider {
-    async fn fetch_page(
-        &self,
-        crate_name: &CrateName,
-        item: Option<&RustdocItem>,
-    ) -> Result<Option<String>> {
-        let version = "latest";
-        let path = format!(
-            "{crate_name}/{version}/{crate_name}{item_path}",
-            item_path = item
-                .map(|item| format!("/{}", item.url_path()))
-                .unwrap_or_default()
-        );
-
-        let mut response = self
-            .http_client
-            .get(
-                &format!("https://docs.rs/{path}"),
-                AsyncBody::default(),
-                true,
-            )
-            .await?;
-
-        let mut body = Vec::new();
-        response
-            .body_mut()
-            .read_to_end(&mut body)
-            .await
-            .context("error reading docs.rs response body")?;
-
-        if response.status().is_client_error() {
-            let text = String::from_utf8_lossy(body.as_slice());
-            bail!(
-                "status error {}, response: {text:?}",
-                response.status().as_u16()
-            );
-        }
-
-        Ok(Some(String::from_utf8(body)?))
-    }
-}
-
-#[derive(Debug)]
-struct RustdocItemWithHistory {
-    pub item: RustdocItem,
-    #[cfg(debug_assertions)]
-    pub history: Vec<String>,
-}
-
-pub(crate) struct RustdocIndexer {
-    database: Arc<RustdocDatabase>,
-    provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
-}
-
-impl RustdocIndexer {
-    pub fn new(
-        database: Arc<RustdocDatabase>,
-        provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
-    ) -> Self {
-        Self { database, provider }
-    }
-
-    /// Indexes the crate with the given name.
-    pub async fn index(&self, crate_name: CrateName) -> Result<()> {
-        let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else {
-            return Ok(());
-        };
-
-        let (crate_root_markdown, items) =
-            convert_rustdoc_to_markdown(crate_root_content.as_bytes())?;
-
-        self.database
-            .insert(crate_name.clone(), None, crate_root_markdown)
-            .await?;
-
-        let mut seen_items = HashSet::from_iter(items.clone());
-        let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
-            VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
-                item,
-                #[cfg(debug_assertions)]
-                history: Vec::new(),
-            }));
-
-        while let Some(item_with_history) = items_to_visit.pop_front() {
-            let item = &item_with_history.item;
-
-            let Some(result) = self
-                .provider
-                .fetch_page(&crate_name, Some(&item))
-                .await
-                .with_context(|| {
-                    #[cfg(debug_assertions)]
-                    {
-                        format!(
-                            "failed to fetch {item:?}: {history:?}",
-                            history = item_with_history.history
-                        )
-                    }
-
-                    #[cfg(not(debug_assertions))]
-                    {
-                        format!("failed to fetch {item:?}")
-                    }
-                })?
-            else {
-                continue;
-            };
-
-            let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
-
-            self.database
-                .insert(crate_name.clone(), Some(item), markdown)
-                .await?;
-
-            let parent_item = item;
-            for mut item in referenced_items {
-                if seen_items.contains(&item) {
-                    continue;
-                }
-
-                seen_items.insert(item.clone());
-
-                item.path.extend(parent_item.path.clone());
-                match parent_item.kind {
-                    RustdocItemKind::Mod => {
-                        item.path.push(parent_item.name.clone());
-                    }
-                    _ => {}
-                }
-
-                items_to_visit.push_back(RustdocItemWithHistory {
-                    #[cfg(debug_assertions)]
-                    history: {
-                        let mut history = item_with_history.history.clone();
-                        history.push(item.url_path());
-                        history
-                    },
-                    item,
-                });
-            }
-        }
-
-        Ok(())
-    }
-}

crates/rustdoc/src/rustdoc.rs 🔗

@@ -1,9 +0,0 @@
-mod indexer;
-mod item;
-mod store;
-mod to_markdown;
-
-pub use crate::indexer::{DocsDotRsProvider, LocalProvider, RustdocSource};
-pub use crate::item::*;
-pub use crate::store::*;
-pub use crate::to_markdown::convert_rustdoc_to_markdown;