Allow extensions to define providers for indexing docs (#13755)

Marshall Bowers created

This PR provides extensions with the ability to define providers for
indexing docs.

Release Notes:

- N/A

Change summary

Cargo.lock                                            |   1 
crates/assistant/src/assistant.rs                     |   9 
crates/assistant/src/slash_command/rustdoc_command.rs |  63 ++++--
crates/extension/Cargo.toml                           |   1 
crates/extension/src/extension_docs_indexer.rs        |  57 +++++
crates/extension/src/extension_manifest.rs            |   6 
crates/extension/src/extension_store.rs               |  18 +
crates/extension/src/extension_store_test.rs          |   9 
crates/extension/src/wasm_host/wit.rs                 |  19 +
crates/extension/src/wasm_host/wit/since_v0_0_7.rs    |  22 ++
crates/extension_api/src/extension_api.rs             |  19 +
crates/extension_api/wit/since_v0.0.7/extension.wit   |   9 
crates/indexed_docs/src/indexed_docs.rs               |   1 
crates/indexed_docs/src/indexer.rs                    | 122 ------------
crates/indexed_docs/src/providers/rustdoc.rs          | 127 ++++++++++++
crates/indexed_docs/src/registry.rs                   |   9 
crates/indexed_docs/src/store.rs                      |  86 +++-----
extensions/gleam/extension.toml                       |   2 
extensions/gleam/src/gleam.rs                         |  40 +++
19 files changed, 407 insertions(+), 213 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -3917,6 +3917,7 @@ dependencies = [
  "futures 0.3.28",
  "gpui",
  "http 0.1.0",
+ "indexed_docs",
  "isahc",
  "language",
  "log",

crates/assistant/src/assistant.rs 🔗

@@ -20,7 +20,7 @@ pub(crate) use completion_provider::*;
 pub(crate) use context_store::*;
 use fs::Fs;
 use gpui::{actions, AppContext, Global, SharedString, UpdateGlobal};
-use indexed_docs::{IndexedDocsRegistry, Provider};
+use indexed_docs::IndexedDocsRegistry;
 pub(crate) use inline_assistant::*;
 pub(crate) use model_selector::*;
 use semantic_index::{CloudEmbeddingProvider, SemanticIndex};
@@ -293,7 +293,6 @@ pub fn init(fs: Arc<dyn Fs>, client: Arc<Client>, cx: &mut AppContext) {
     inline_assistant::init(fs.clone(), client.telemetry().clone(), cx);
     terminal_inline_assistant::init(fs.clone(), client.telemetry().clone(), cx);
     IndexedDocsRegistry::init_global(cx);
-    register_indexed_docs_providers(cx);
 
     CommandPaletteFilter::update_global(cx, |filter, _cx| {
         filter.hide_namespace(Assistant::NAMESPACE);
@@ -328,12 +327,6 @@ fn register_slash_commands(cx: &mut AppContext) {
     slash_command_registry.register_command(fetch_command::FetchSlashCommand, false);
 }
 
-fn register_indexed_docs_providers(cx: &mut AppContext) {
-    let indexed_docs_registry = IndexedDocsRegistry::global(cx);
-
-    indexed_docs_registry.register_provider(Provider::rustdoc());
-}
-
 pub fn humanize_token_count(count: usize) -> String {
     match count {
         0..=999 => count.to_string(),

crates/assistant/src/slash_command/rustdoc_command.rs 🔗

@@ -9,8 +9,8 @@ use futures::AsyncReadExt;
 use gpui::{AppContext, Model, Task, WeakView};
 use http::{AsyncBody, HttpClient, HttpClientWithUrl};
 use indexed_docs::{
-    convert_rustdoc_to_markdown, IndexedDocsStore, LocalProvider, PackageName, ProviderId,
-    RustdocSource,
+    convert_rustdoc_to_markdown, IndexedDocsRegistry, IndexedDocsStore, LocalProvider, PackageName,
+    ProviderId, RustdocIndexer, RustdocSource,
 };
 use language::LspAdapterDelegate;
 use project::{Project, ProjectPath};
@@ -90,6 +90,42 @@ impl RustdocSlashCommand {
             project.read(cx).absolute_path(&path, cx)?.as_path(),
         ))
     }
+
+    /// Ensures that the rustdoc provider is registered.
+    ///
+    /// Ideally we would do this sooner, but we need to wait until we're able to
+    /// access the workspace so we can read the project.
+    fn ensure_rustdoc_provider_is_registered(
+        &self,
+        workspace: Option<WeakView<Workspace>>,
+        cx: &mut AppContext,
+    ) {
+        let indexed_docs_registry = IndexedDocsRegistry::global(cx);
+        if indexed_docs_registry
+            .get_provider_store(ProviderId::rustdoc())
+            .is_none()
+        {
+            let index_provider_deps = maybe!({
+                let workspace = workspace.ok_or_else(|| anyhow!("no workspace"))?;
+                let workspace = workspace
+                    .upgrade()
+                    .ok_or_else(|| anyhow!("workspace was dropped"))?;
+                let project = workspace.read(cx).project().clone();
+                let fs = project.read(cx).fs().clone();
+                let cargo_workspace_root = Self::path_to_cargo_toml(project, cx)
+                    .and_then(|path| path.parent().map(|path| path.to_path_buf()))
+                    .ok_or_else(|| anyhow!("no Cargo workspace root found"))?;
+
+                anyhow::Ok((fs, cargo_workspace_root))
+            });
+
+            if let Some((fs, cargo_workspace_root)) = index_provider_deps.log_err() {
+                indexed_docs_registry.register_provider(Box::new(RustdocIndexer::new(Box::new(
+                    LocalProvider::new(fs, cargo_workspace_root),
+                ))));
+            }
+        }
+    }
 }
 
 impl SlashCommand for RustdocSlashCommand {
@@ -116,19 +152,7 @@ impl SlashCommand for RustdocSlashCommand {
         workspace: Option<WeakView<Workspace>>,
         cx: &mut AppContext,
     ) -> Task<Result<Vec<String>>> {
-        let index_provider_deps = maybe!({
-            let workspace = workspace.ok_or_else(|| anyhow!("no workspace"))?;
-            let workspace = workspace
-                .upgrade()
-                .ok_or_else(|| anyhow!("workspace was dropped"))?;
-            let project = workspace.read(cx).project().clone();
-            let fs = project.read(cx).fs().clone();
-            let cargo_workspace_root = Self::path_to_cargo_toml(project, cx)
-                .and_then(|path| path.parent().map(|path| path.to_path_buf()))
-                .ok_or_else(|| anyhow!("no Cargo workspace root found"))?;
-
-            anyhow::Ok((fs, cargo_workspace_root))
-        });
+        self.ensure_rustdoc_provider_is_registered(workspace, cx);
 
         let store = IndexedDocsStore::try_global(ProviderId::rustdoc(), cx);
         cx.background_executor().spawn(async move {
@@ -136,12 +160,9 @@ impl SlashCommand for RustdocSlashCommand {
 
             if let Some((crate_name, rest)) = query.split_once(':') {
                 if rest.is_empty() {
-                    if let Some((fs, cargo_workspace_root)) = index_provider_deps.log_err() {
-                        let provider = Box::new(LocalProvider::new(fs, cargo_workspace_root));
-                        // We don't need to hold onto this task, as the `RustdocStore` will hold it
-                        // until it completes.
-                        let _ = store.clone().index(crate_name.into(), provider);
-                    }
+                    // We don't need to hold onto this task, as the `IndexedDocsStore` will hold it
+                    // until it completes.
+                    let _ = store.clone().index(crate_name.into());
                 }
             }
 

crates/extension/Cargo.toml 🔗

@@ -28,6 +28,7 @@ fs.workspace = true
 futures.workspace = true
 gpui.workspace = true
 http.workspace = true
+indexed_docs.workspace = true
 isahc.workspace = true
 language.workspace = true
 log.workspace = true

crates/extension/src/extension_docs_indexer.rs 🔗

@@ -0,0 +1,57 @@
+use std::path::PathBuf;
+use std::sync::Arc;
+
+use anyhow::{anyhow, Result};
+use async_trait::async_trait;
+use futures::FutureExt;
+use indexed_docs::{IndexedDocsDatabase, IndexedDocsProvider, PackageName, ProviderId};
+use wasmtime_wasi::WasiView;
+
+use crate::wasm_host::{WasmExtension, WasmHost};
+
+pub struct ExtensionDocsIndexer {
+    pub(crate) extension: WasmExtension,
+    pub(crate) host: Arc<WasmHost>,
+    pub(crate) id: ProviderId,
+}
+
+#[async_trait]
+impl IndexedDocsProvider for ExtensionDocsIndexer {
+    fn id(&self) -> ProviderId {
+        self.id.clone()
+    }
+
+    fn database_path(&self) -> PathBuf {
+        let mut database_path = self.host.work_dir.clone();
+        database_path.push(self.extension.manifest.id.as_ref());
+        database_path.push("docs");
+        database_path.push(format!("{}.0.mdb", self.id));
+
+        database_path
+    }
+
+    async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
+        self.extension
+            .call({
+                let id = self.id.clone();
+                |extension, store| {
+                    async move {
+                        let database_resource = store.data_mut().table().push(database)?;
+                        extension
+                            .call_index_docs(
+                                store,
+                                id.as_ref(),
+                                package.as_ref(),
+                                database_resource,
+                            )
+                            .await?
+                            .map_err(|err| anyhow!("{err:?}"))?;
+
+                        anyhow::Ok(())
+                    }
+                    .boxed()
+                }
+            })
+            .await
+    }
+}

crates/extension/src/extension_manifest.rs 🔗

@@ -76,6 +76,8 @@ pub struct ExtensionManifest {
     pub language_servers: BTreeMap<LanguageServerName, LanguageServerManifestEntry>,
     #[serde(default)]
     pub slash_commands: BTreeMap<Arc<str>, SlashCommandManifestEntry>,
+    #[serde(default)]
+    pub indexed_docs_providers: BTreeMap<Arc<str>, IndexedDocsProviderEntry>,
 }
 
 #[derive(Clone, Default, PartialEq, Eq, Debug, Deserialize, Serialize)]
@@ -137,6 +139,9 @@ pub struct SlashCommandManifestEntry {
     pub requires_argument: bool,
 }
 
+#[derive(Clone, PartialEq, Eq, Debug, Deserialize, Serialize)]
+pub struct IndexedDocsProviderEntry {}
+
 impl ExtensionManifest {
     pub async fn load(fs: Arc<dyn Fs>, extension_dir: &Path) -> Result<Self> {
         let extension_name = extension_dir
@@ -200,5 +205,6 @@ fn manifest_from_old_manifest(
             .collect(),
         language_servers: Default::default(),
         slash_commands: BTreeMap::default(),
+        indexed_docs_providers: BTreeMap::default(),
     }
 }

crates/extension/src/extension_store.rs 🔗

@@ -1,4 +1,5 @@
 pub mod extension_builder;
+mod extension_docs_indexer;
 mod extension_lsp_adapter;
 mod extension_manifest;
 mod extension_settings;
@@ -8,6 +9,7 @@ mod wasm_host;
 #[cfg(test)]
 mod extension_store_test;
 
+use crate::extension_docs_indexer::ExtensionDocsIndexer;
 use crate::extension_manifest::SchemaVersion;
 use crate::extension_slash_command::ExtensionSlashCommand;
 use crate::{extension_lsp_adapter::ExtensionLspAdapter, wasm_host::wit};
@@ -32,6 +34,7 @@ use gpui::{
     WeakModel,
 };
 use http::{AsyncBody, HttpClient, HttpClientWithUrl};
+use indexed_docs::{IndexedDocsRegistry, ProviderId};
 use language::{
     LanguageConfig, LanguageMatcher, LanguageQueries, LanguageRegistry, QUERY_FILENAME_PREFIXES,
 };
@@ -111,6 +114,7 @@ pub struct ExtensionStore {
     language_registry: Arc<LanguageRegistry>,
     theme_registry: Arc<ThemeRegistry>,
     slash_command_registry: Arc<SlashCommandRegistry>,
+    indexed_docs_registry: Arc<IndexedDocsRegistry>,
     modified_extensions: HashSet<Arc<str>>,
     wasm_host: Arc<WasmHost>,
     wasm_extensions: Vec<(Arc<ExtensionManifest>, WasmExtension)>,
@@ -188,6 +192,7 @@ pub fn init(
             language_registry,
             theme_registry,
             SlashCommandRegistry::global(cx),
+            IndexedDocsRegistry::global(cx),
             cx,
         )
     });
@@ -221,6 +226,7 @@ impl ExtensionStore {
         language_registry: Arc<LanguageRegistry>,
         theme_registry: Arc<ThemeRegistry>,
         slash_command_registry: Arc<SlashCommandRegistry>,
+        indexed_docs_registry: Arc<IndexedDocsRegistry>,
         cx: &mut ModelContext<Self>,
     ) -> Self {
         let work_dir = extensions_dir.join("work");
@@ -252,6 +258,7 @@ impl ExtensionStore {
             language_registry,
             theme_registry,
             slash_command_registry,
+            indexed_docs_registry,
             reload_tx,
             tasks: Vec::new(),
         };
@@ -1192,7 +1199,18 @@ impl ExtensionStore {
                             false,
                         );
                     }
+
+                    for (provider_id, _provider) in &manifest.indexed_docs_providers {
+                        this.indexed_docs_registry.register_provider(Box::new(
+                            ExtensionDocsIndexer {
+                                extension: wasm_extension.clone(),
+                                host: this.wasm_host.clone(),
+                                id: ProviderId(provider_id.clone()),
+                            },
+                        ));
+                    }
                 }
+
                 this.wasm_extensions.extend(wasm_extensions);
                 ThemeSettings::reload_current_theme(cx)
             })

crates/extension/src/extension_store_test.rs 🔗

@@ -12,6 +12,7 @@ use fs::{FakeFs, Fs, RealFs};
 use futures::{io::BufReader, AsyncReadExt, StreamExt};
 use gpui::{Context, SemanticVersion, TestAppContext};
 use http::{FakeHttpClient, Response};
+use indexed_docs::IndexedDocsRegistry;
 use language::{LanguageMatcher, LanguageRegistry, LanguageServerBinaryStatus, LanguageServerName};
 use node_runtime::FakeNodeRuntime;
 use parking_lot::Mutex;
@@ -158,6 +159,7 @@ async fn test_extension_store(cx: &mut TestAppContext) {
                         .collect(),
                         language_servers: BTreeMap::default(),
                         slash_commands: BTreeMap::default(),
+                        indexed_docs_providers: BTreeMap::default(),
                     }),
                     dev: false,
                 },
@@ -182,6 +184,7 @@ async fn test_extension_store(cx: &mut TestAppContext) {
                         grammars: BTreeMap::default(),
                         language_servers: BTreeMap::default(),
                         slash_commands: BTreeMap::default(),
+                        indexed_docs_providers: BTreeMap::default(),
                     }),
                     dev: false,
                 },
@@ -254,6 +257,7 @@ async fn test_extension_store(cx: &mut TestAppContext) {
     let language_registry = Arc::new(LanguageRegistry::test(cx.executor()));
     let theme_registry = Arc::new(ThemeRegistry::new(Box::new(())));
     let slash_command_registry = SlashCommandRegistry::new();
+    let indexed_docs_registry = Arc::new(IndexedDocsRegistry::new(cx.executor()));
     let node_runtime = FakeNodeRuntime::new();
 
     let store = cx.new_model(|cx| {
@@ -267,6 +271,7 @@ async fn test_extension_store(cx: &mut TestAppContext) {
             language_registry.clone(),
             theme_registry.clone(),
             slash_command_registry.clone(),
+            indexed_docs_registry.clone(),
             cx,
         )
     });
@@ -339,6 +344,7 @@ async fn test_extension_store(cx: &mut TestAppContext) {
                 grammars: BTreeMap::default(),
                 language_servers: BTreeMap::default(),
                 slash_commands: BTreeMap::default(),
+                indexed_docs_providers: BTreeMap::default(),
             }),
             dev: false,
         },
@@ -389,6 +395,7 @@ async fn test_extension_store(cx: &mut TestAppContext) {
             language_registry.clone(),
             theme_registry.clone(),
             slash_command_registry,
+            indexed_docs_registry,
             cx,
         )
     });
@@ -468,6 +475,7 @@ async fn test_extension_store_with_gleam_extension(cx: &mut TestAppContext) {
     let language_registry = project.read_with(cx, |project, _cx| project.languages().clone());
     let theme_registry = Arc::new(ThemeRegistry::new(Box::new(())));
     let slash_command_registry = SlashCommandRegistry::new();
+    let indexed_docs_registry = Arc::new(IndexedDocsRegistry::new(cx.executor()));
     let node_runtime = FakeNodeRuntime::new();
 
     let mut status_updates = language_registry.language_server_binary_statuses();
@@ -558,6 +566,7 @@ async fn test_extension_store_with_gleam_extension(cx: &mut TestAppContext) {
             language_registry.clone(),
             theme_registry.clone(),
             slash_command_registry,
+            indexed_docs_registry,
             cx,
         )
     });

crates/extension/src/wasm_host/wit.rs 🔗

@@ -2,6 +2,7 @@ mod since_v0_0_1;
 mod since_v0_0_4;
 mod since_v0_0_6;
 mod since_v0_0_7;
+use indexed_docs::IndexedDocsDatabase;
 use release_channel::ReleaseChannel;
 use since_v0_0_7 as latest;
 
@@ -289,6 +290,24 @@ impl Extension {
             }
         }
     }
+
+    pub async fn call_index_docs(
+        &self,
+        store: &mut Store<WasmState>,
+        provider: &str,
+        package_name: &str,
+        database: Resource<Arc<IndexedDocsDatabase>>,
+    ) -> Result<Result<(), String>> {
+        match self {
+            Extension::V007(ext) => {
+                ext.call_index_docs(store, provider, package_name, database)
+                    .await
+            }
+            Extension::V001(_) | Extension::V004(_) | Extension::V006(_) => {
+                Err(anyhow!("`index_docs` not available prior to v0.0.7"))
+            }
+        }
+    }
 }
 
 trait ToWasmtimeResult<T> {

crates/extension/src/wasm_host/wit/since_v0_0_7.rs 🔗

@@ -7,6 +7,7 @@ use async_trait::async_trait;
 use futures::AsyncReadExt;
 use futures::{io::BufReader, FutureExt as _};
 use http::AsyncBody;
+use indexed_docs::IndexedDocsDatabase;
 use language::{
     language_settings::AllLanguageSettings, LanguageServerBinaryStatus, LspAdapterDelegate,
 };
@@ -28,6 +29,7 @@ wasmtime::component::bindgen!({
     path: "../extension_api/wit/since_v0.0.7",
     with: {
          "worktree": ExtensionWorktree,
+         "key-value-store": ExtensionKeyValueStore
     },
 });
 
@@ -39,11 +41,31 @@ mod settings {
 
 pub type ExtensionWorktree = Arc<dyn LspAdapterDelegate>;
 
+pub type ExtensionKeyValueStore = Arc<IndexedDocsDatabase>;
+
 pub fn linker() -> &'static Linker<WasmState> {
     static LINKER: OnceLock<Linker<WasmState>> = OnceLock::new();
     LINKER.get_or_init(|| super::new_linker(Extension::add_to_linker))
 }
 
+#[async_trait]
+impl HostKeyValueStore for WasmState {
+    async fn insert(
+        &mut self,
+        kv_store: Resource<ExtensionKeyValueStore>,
+        key: String,
+        value: String,
+    ) -> wasmtime::Result<Result<(), String>> {
+        let kv_store = self.table.get(&kv_store)?;
+        kv_store.insert(key, value).await.to_wasmtime_result()
+    }
+
+    fn drop(&mut self, _worktree: Resource<ExtensionKeyValueStore>) -> Result<()> {
+        // We only ever hand out borrows of key-value stores.
+        Ok(())
+    }
+}
+
 #[async_trait]
 impl HostWorktree for WasmState {
     async fn id(

crates/extension_api/src/extension_api.rs 🔗

@@ -27,7 +27,7 @@ pub use wit::{
     zed::extension::platform::{current_platform, Architecture, Os},
     zed::extension::slash_command::{SlashCommand, SlashCommandOutput, SlashCommandOutputSection},
     CodeLabel, CodeLabelSpan, CodeLabelSpanLiteral, Command, DownloadedFileType, EnvVars,
-    LanguageServerInstallationStatus, Range, Worktree,
+    KeyValueStore, LanguageServerInstallationStatus, Range, Worktree,
 };
 
 // Undocumented WIT re-exports.
@@ -127,6 +127,15 @@ pub trait Extension: Send + Sync {
     ) -> Result<SlashCommandOutput, String> {
         Err("`run_slash_command` not implemented".to_string())
     }
+
+    fn index_docs(
+        &self,
+        _provider: String,
+        _package: String,
+        _database: &KeyValueStore,
+    ) -> Result<(), String> {
+        Err("`index_docs` not implemented".to_string())
+    }
 }
 
 /// Registers the provided type as a Zed extension.
@@ -249,6 +258,14 @@ impl wit::Guest for Component {
     ) -> Result<SlashCommandOutput, String> {
         extension().run_slash_command(command, argument, worktree)
     }
+
+    fn index_docs(
+        provider: String,
+        package: String,
+        database: &KeyValueStore,
+    ) -> Result<(), String> {
+        extension().index_docs(provider, package, database)
+    }
 }
 
 /// The ID of a language server.

crates/extension_api/wit/since_v0.0.7/extension.wit 🔗

@@ -83,6 +83,12 @@ world extension {
         shell-env: func() -> env-vars;
     }
 
+    /// A key-value store.
+    resource key-value-store {
+        /// Inserts an entry under the specified key.
+        insert: func(key: string, value: string) -> result<_, string>;
+    }
+
     /// Returns the command used to start up the language server.
     export language-server-command: func(language-server-id: string, worktree: borrow<worktree>) -> result<command, string>;
 
@@ -128,4 +134,7 @@ world extension {
 
     /// Returns the output from running the provided slash command.
     export run-slash-command: func(command: slash-command, argument: option<string>, worktree: borrow<worktree>) -> result<slash-command-output, string>;
+
+    /// Indexes the docs for the specified package.
+    export index-docs: func(provider-name: string, package-name: string, database: borrow<key-value-store>) -> result<_, string>;
 }

crates/indexed_docs/src/indexer.rs 🔗

@@ -1,122 +0,0 @@
-use std::sync::Arc;
-
-use anyhow::{Context, Result};
-use async_trait::async_trait;
-use collections::{HashSet, VecDeque};
-
-use crate::{
-    convert_rustdoc_to_markdown, IndexedDocsDatabase, PackageName, RustdocItem, RustdocItemKind,
-};
-
-#[async_trait]
-pub trait IndexedDocsProvider {
-    async fn fetch_page(
-        &self,
-        package: &PackageName,
-        item: Option<&RustdocItem>,
-    ) -> Result<Option<String>>;
-}
-
-#[derive(Debug)]
-struct RustdocItemWithHistory {
-    pub item: RustdocItem,
-    #[cfg(debug_assertions)]
-    pub history: Vec<String>,
-}
-
-pub(crate) struct DocsIndexer {
-    database: Arc<IndexedDocsDatabase>,
-    provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
-}
-
-impl DocsIndexer {
-    pub fn new(
-        database: Arc<IndexedDocsDatabase>,
-        provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
-    ) -> Self {
-        Self { database, provider }
-    }
-
-    /// Indexes the package with the given name.
-    pub async fn index(&self, package: PackageName) -> Result<()> {
-        let Some(package_root_content) = self.provider.fetch_page(&package, None).await? else {
-            return Ok(());
-        };
-
-        let (crate_root_markdown, items) =
-            convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
-
-        self.database
-            .insert(package.clone(), None, crate_root_markdown)
-            .await?;
-
-        let mut seen_items = HashSet::from_iter(items.clone());
-        let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
-            VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
-                item,
-                #[cfg(debug_assertions)]
-                history: Vec::new(),
-            }));
-
-        while let Some(item_with_history) = items_to_visit.pop_front() {
-            let item = &item_with_history.item;
-
-            let Some(result) = self
-                .provider
-                .fetch_page(&package, Some(&item))
-                .await
-                .with_context(|| {
-                    #[cfg(debug_assertions)]
-                    {
-                        format!(
-                            "failed to fetch {item:?}: {history:?}",
-                            history = item_with_history.history
-                        )
-                    }
-
-                    #[cfg(not(debug_assertions))]
-                    {
-                        format!("failed to fetch {item:?}")
-                    }
-                })?
-            else {
-                continue;
-            };
-
-            let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
-
-            self.database
-                .insert(package.clone(), Some(item), markdown)
-                .await?;
-
-            let parent_item = item;
-            for mut item in referenced_items {
-                if seen_items.contains(&item) {
-                    continue;
-                }
-
-                seen_items.insert(item.clone());
-
-                item.path.extend(parent_item.path.clone());
-                match parent_item.kind {
-                    RustdocItemKind::Mod => {
-                        item.path.push(parent_item.name.clone());
-                    }
-                    _ => {}
-                }
-
-                items_to_visit.push_back(RustdocItemWithHistory {
-                    #[cfg(debug_assertions)]
-                    history: {
-                        let mut history = item_with_history.history.clone();
-                        history.push(item.url_path());
-                        history
-                    },
-                    item,
-                });
-            }
-        }
-
-        Ok(())
-    }
-}

crates/indexed_docs/src/providers/rustdoc.rs 🔗

@@ -9,12 +9,12 @@ use std::sync::Arc;
 
 use anyhow::{bail, Context, Result};
 use async_trait::async_trait;
+use collections::{HashSet, VecDeque};
 use fs::Fs;
 use futures::AsyncReadExt;
 use http::{AsyncBody, HttpClient, HttpClientWithUrl};
 
-use crate::indexer::IndexedDocsProvider;
-use crate::PackageName;
+use crate::{IndexedDocsDatabase, IndexedDocsProvider, PackageName, ProviderId};
 
 #[derive(Debug, Clone, Copy)]
 pub enum RustdocSource {
@@ -26,6 +26,125 @@ pub enum RustdocSource {
     DocsDotRs,
 }
 
+#[derive(Debug)]
+struct RustdocItemWithHistory {
+    pub item: RustdocItem,
+    #[cfg(debug_assertions)]
+    pub history: Vec<String>,
+}
+
+#[async_trait]
+pub trait RustdocProvider {
+    async fn fetch_page(
+        &self,
+        package: &PackageName,
+        item: Option<&RustdocItem>,
+    ) -> Result<Option<String>>;
+}
+
+pub struct RustdocIndexer {
+    provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
+}
+
+impl RustdocIndexer {
+    pub fn new(provider: Box<dyn RustdocProvider + Send + Sync + 'static>) -> Self {
+        Self { provider }
+    }
+}
+
+#[async_trait]
+impl IndexedDocsProvider for RustdocIndexer {
+    fn id(&self) -> ProviderId {
+        ProviderId::rustdoc()
+    }
+
+    fn database_path(&self) -> PathBuf {
+        paths::support_dir().join("docs/rust/rustdoc-db.1.mdb")
+    }
+
+    async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
+        let Some(package_root_content) = self.provider.fetch_page(&package, None).await? else {
+            return Ok(());
+        };
+
+        let (crate_root_markdown, items) =
+            convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
+
+        database
+            .insert(package.to_string(), crate_root_markdown)
+            .await?;
+
+        let mut seen_items = HashSet::from_iter(items.clone());
+        let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
+            VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
+                item,
+                #[cfg(debug_assertions)]
+                history: Vec::new(),
+            }));
+
+        while let Some(item_with_history) = items_to_visit.pop_front() {
+            let item = &item_with_history.item;
+
+            let Some(result) = self
+                .provider
+                .fetch_page(&package, Some(&item))
+                .await
+                .with_context(|| {
+                    #[cfg(debug_assertions)]
+                    {
+                        format!(
+                            "failed to fetch {item:?}: {history:?}",
+                            history = item_with_history.history
+                        )
+                    }
+
+                    #[cfg(not(debug_assertions))]
+                    {
+                        format!("failed to fetch {item:?}")
+                    }
+                })?
+            else {
+                continue;
+            };
+
+            let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
+
+            database
+                .insert(format!("{package}::{}", item.display()), markdown)
+                .await?;
+
+            let parent_item = item;
+            for mut item in referenced_items {
+                if seen_items.contains(&item) {
+                    continue;
+                }
+
+                seen_items.insert(item.clone());
+
+                item.path.extend(parent_item.path.clone());
+                match parent_item.kind {
+                    RustdocItemKind::Mod => {
+                        item.path.push(parent_item.name.clone());
+                    }
+                    _ => {}
+                }
+
+                items_to_visit.push_back(RustdocItemWithHistory {
+                    #[cfg(debug_assertions)]
+                    history: {
+                        let mut history = item_with_history.history.clone();
+                        history.push(item.url_path());
+                        history
+                    },
+                    item,
+                });
+            }
+        }
+
+        Ok(())
+    }
+}
+
 pub struct LocalProvider {
     fs: Arc<dyn Fs>,
     cargo_workspace_root: PathBuf,
@@ -41,7 +160,7 @@ impl LocalProvider {
 }
 
 #[async_trait]
-impl IndexedDocsProvider for LocalProvider {
+impl RustdocProvider for LocalProvider {
     async fn fetch_page(
         &self,
         crate_name: &PackageName,
@@ -74,7 +193,7 @@ impl DocsDotRsProvider {
 }
 
 #[async_trait]
-impl IndexedDocsProvider for DocsDotRsProvider {
+impl RustdocProvider for DocsDotRsProvider {
     async fn fetch_page(
         &self,
         crate_name: &PackageName,

crates/indexed_docs/src/registry.rs 🔗

@@ -4,7 +4,7 @@ use collections::HashMap;
 use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, UpdateGlobal};
 use parking_lot::RwLock;
 
-use crate::{IndexedDocsStore, Provider, ProviderId};
+use crate::{IndexedDocsProvider, IndexedDocsStore, ProviderId};
 
 struct GlobalIndexedDocsRegistry(Arc<IndexedDocsRegistry>);
 
@@ -34,9 +34,12 @@ impl IndexedDocsRegistry {
         }
     }
 
-    pub fn register_provider(&self, provider: Provider) {
+    pub fn register_provider(
+        &self,
+        provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
+    ) {
         self.stores_by_provider.write().insert(
-            provider.id.clone(),
+            provider.id(),
             Arc::new(IndexedDocsStore::new(provider, self.executor.clone())),
         );
     }

crates/indexed_docs/src/store.rs 🔗

@@ -3,6 +3,7 @@ use std::sync::atomic::AtomicBool;
 use std::sync::Arc;
 
 use anyhow::{anyhow, Result};
+use async_trait::async_trait;
 use collections::HashMap;
 use derive_more::{Deref, Display};
 use futures::future::{self, BoxFuture, Shared};
@@ -15,11 +16,10 @@ use parking_lot::RwLock;
 use serde::{Deserialize, Serialize};
 use util::ResultExt;
 
-use crate::indexer::{DocsIndexer, IndexedDocsProvider};
-use crate::{IndexedDocsRegistry, RustdocItem};
+use crate::IndexedDocsRegistry;
 
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
-pub struct ProviderId(Arc<str>);
+pub struct ProviderId(pub Arc<str>);
 
 impl ProviderId {
     pub fn rustdoc() -> Self {
@@ -27,20 +27,6 @@ impl ProviderId {
     }
 }
 
-pub struct Provider {
-    pub id: ProviderId,
-    pub database_path: PathBuf,
-}
-
-impl Provider {
-    pub fn rustdoc() -> Self {
-        Self {
-            id: ProviderId("rustdoc".into()),
-            database_path: paths::support_dir().join("docs/rust/rustdoc-db.1.mdb"),
-        }
-    }
-}
-
 /// The name of a package.
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
 pub struct PackageName(Arc<str>);
@@ -51,11 +37,24 @@ impl From<&str> for PackageName {
     }
 }
 
+#[async_trait]
+pub trait IndexedDocsProvider {
+    /// Returns the ID of this provider.
+    fn id(&self) -> ProviderId;
+
+    /// Returns the path to the database for this provider.
+    fn database_path(&self) -> PathBuf;
+
+    /// Indexes the package with the given name.
+    async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()>;
+}
+
 /// A store for indexed docs.
 pub struct IndexedDocsStore {
     executor: BackgroundExecutor,
     database_future:
         Shared<BoxFuture<'static, Result<Arc<IndexedDocsDatabase>, Arc<anyhow::Error>>>>,
+    provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
     indexing_tasks_by_package:
         RwLock<HashMap<PackageName, Shared<Task<Result<(), Arc<anyhow::Error>>>>>>,
 }
@@ -68,11 +67,15 @@ impl IndexedDocsStore {
             .ok_or_else(|| anyhow!("no indexed docs store found for {provider}"))
     }
 
-    pub fn new(provider: Provider, executor: BackgroundExecutor) -> Self {
+    pub fn new(
+        provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
+        executor: BackgroundExecutor,
+    ) -> Self {
         let database_future = executor
             .spawn({
                 let executor = executor.clone();
-                async move { IndexedDocsDatabase::new(provider.database_path, executor) }
+                let database_path = provider.database_path();
+                async move { IndexedDocsDatabase::new(database_path, executor) }
             })
             .then(|result| future::ready(result.map(Arc::new).map_err(Arc::new)))
             .boxed()
@@ -81,6 +84,7 @@ impl IndexedDocsStore {
         Self {
             executor,
             database_future,
+            provider,
             indexing_tasks_by_package: RwLock::new(HashMap::default()),
         }
     }
@@ -95,18 +99,23 @@ impl IndexedDocsStore {
         package: PackageName,
         item_path: Option<String>,
     ) -> Result<MarkdownDocs> {
+        let item_path = if let Some(item_path) = item_path {
+            format!("{package}::{item_path}")
+        } else {
+            package.to_string()
+        };
+
         self.database_future
             .clone()
             .await
             .map_err(|err| anyhow!(err))?
-            .load(package, item_path)
+            .load(item_path)
             .await
     }
 
     pub fn index(
         self: Arc<Self>,
         package: PackageName,
-        provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
     ) -> Shared<Task<Result<(), Arc<anyhow::Error>>>> {
         if let Some(existing_task) = self.indexing_tasks_by_package.read().get(&package) {
             return existing_task.clone();
@@ -132,9 +141,7 @@ impl IndexedDocsStore {
                             .clone()
                             .await
                             .map_err(|err| anyhow!(err))?;
-                        let indexer = DocsIndexer::new(database, provider);
-
-                        indexer.index(package.clone()).await
+                        this.provider.index(package, database).await
                     };
 
                     index_task.await.map_err(Arc::new)
@@ -192,7 +199,7 @@ impl IndexedDocsStore {
 #[derive(Debug, PartialEq, Eq, Clone, Display, Serialize, Deserialize)]
 pub struct MarkdownDocs(pub String);
 
-pub(crate) struct IndexedDocsDatabase {
+pub struct IndexedDocsDatabase {
     executor: BackgroundExecutor,
     env: heed::Env,
     entries: Database<SerdeBincode<String>, SerdeBincode<MarkdownDocs>>,
@@ -237,44 +244,25 @@ impl IndexedDocsDatabase {
         })
     }
 
-    pub fn load(
-        &self,
-        package: PackageName,
-        item_path: Option<String>,
-    ) -> Task<Result<MarkdownDocs>> {
+    pub fn load(&self, key: String) -> Task<Result<MarkdownDocs>> {
         let env = self.env.clone();
         let entries = self.entries;
-        let item_path = if let Some(item_path) = item_path {
-            format!("{package}::{item_path}")
-        } else {
-            package.to_string()
-        };
 
         self.executor.spawn(async move {
             let txn = env.read_txn()?;
             entries
-                .get(&txn, &item_path)?
-                .ok_or_else(|| anyhow!("no docs found for {item_path}"))
+                .get(&txn, &key)?
+                .ok_or_else(|| anyhow!("no docs found for {key}"))
         })
     }
 
-    pub fn insert(
-        &self,
-        package: PackageName,
-        item: Option<&RustdocItem>,
-        docs: String,
-    ) -> Task<Result<()>> {
+    pub fn insert(&self, key: String, docs: String) -> Task<Result<()>> {
         let env = self.env.clone();
         let entries = self.entries;
-        let (item_path, entry) = if let Some(item) = item {
-            (format!("{package}::{}", item.display()), MarkdownDocs(docs))
-        } else {
-            (package.to_string(), MarkdownDocs(docs))
-        };
 
         self.executor.spawn(async move {
             let mut txn = env.write_txn()?;
-            entries.put(&mut txn, &item_path, &entry)?;
+            entries.put(&mut txn, &key, &MarkdownDocs(docs))?;
             txn.commit()?;
             Ok(())
         })

extensions/gleam/extension.toml 🔗

@@ -23,3 +23,5 @@ tooltip_text = "Insert Gleam project data"
 description = "Returns Gleam docs."
 requires_argument = true
 tooltip_text = "Insert Gleam docs"
+
+# [indexed_docs_providers.gleam-hexdocs]

extensions/gleam/src/gleam.rs 🔗

@@ -4,10 +4,10 @@ use std::fs;
 use std::rc::Rc;
 use zed::lsp::CompletionKind;
 use zed::{
-    CodeLabel, CodeLabelSpan, LanguageServerId, SlashCommand, SlashCommandOutput,
-    SlashCommandOutputSection,
+    CodeLabel, CodeLabelSpan, HttpRequest, KeyValueStore, LanguageServerId, SlashCommand,
+    SlashCommandOutput, SlashCommandOutputSection,
 };
-use zed_extension_api::{self as zed, fetch, HttpRequest, Result};
+use zed_extension_api::{self as zed, Result};
 
 struct GleamExtension {
     cached_binary_path: Option<String>,
@@ -180,7 +180,7 @@ impl zed::Extension for GleamExtension {
                     .ok_or_else(|| "missing package name".to_string())?;
                 let module_path = components.map(ToString::to_string).collect::<Vec<_>>();
 
-                let response = fetch(&HttpRequest {
+                let response = zed::fetch(&HttpRequest {
                     url: format!(
                         "https://hexdocs.pm/{package_name}{maybe_path}",
                         maybe_path = if !module_path.is_empty() {
@@ -236,6 +236,38 @@ impl zed::Extension for GleamExtension {
             command => Err(format!("unknown slash command: \"{command}\"")),
         }
     }
+
+    fn index_docs(
+        &self,
+        provider: String,
+        package: String,
+        database: &KeyValueStore,
+    ) -> Result<(), String> {
+        match provider.as_str() {
+            "gleam-hexdocs" => {
+                let response = zed::fetch(&HttpRequest {
+                    url: format!("https://hexdocs.pm/{package}"),
+                })?;
+
+                let mut handlers: Vec<TagHandler> = vec![
+                    Rc::new(RefCell::new(
+                        html_to_markdown::markdown::WebpageChromeRemover,
+                    )),
+                    Rc::new(RefCell::new(html_to_markdown::markdown::ParagraphHandler)),
+                    Rc::new(RefCell::new(html_to_markdown::markdown::HeadingHandler)),
+                    Rc::new(RefCell::new(html_to_markdown::markdown::ListHandler)),
+                    Rc::new(RefCell::new(html_to_markdown::markdown::TableHandler::new())),
+                    Rc::new(RefCell::new(html_to_markdown::markdown::StyledTextHandler)),
+                ];
+
+                let markdown = convert_html_to_markdown(response.body.as_bytes(), &mut handlers)
+                    .map_err(|err| format!("failed to convert docs to Markdown {err}"))?;
+
+                Ok(database.insert(&package, &markdown)?)
+            }
+            _ => Ok(()),
+        }
+    }
 }
 
 zed::register_extension!(GleamExtension);