Promote package suggestions to a first-class concept on `IndexedDocsProvider`s (#16177)

Marshall Bowers created

This PR promotes package suggestions to a first-class concept on the
`IndexedDocsProvider` trait.

This will allow any implementer of `IndexedDocsProvider` to provide a
list of package names to suggest for use with `/docs`.

For the docs.rs provider we use the 250 most popular Rust crates (as
identified [here](https://lib.rs/std)), and for the rustdoc provider we
use the packages in the Cargo workspace.

Release Notes:

- N/A

Change summary

crates/assistant/src/slash_command/docs_command.rs           |  65 -
crates/extension/src/extension_indexed_docs_provider.rs      |   4 
crates/indexed_docs/src/providers/rustdoc.rs                 |  64 
crates/indexed_docs/src/providers/rustdoc/popular_crates.txt | 252 ++++++
crates/indexed_docs/src/store.rs                             |  13 
typos.toml                                                   |   3 
6 files changed, 318 insertions(+), 83 deletions(-)

Detailed changes

crates/assistant/src/slash_command/docs_command.rs 🔗

@@ -72,9 +72,6 @@ impl DocsSlashCommand {
             });
 
             if let Some((fs, cargo_workspace_root)) = index_provider_deps.log_err() {
-                // List the workspace crates once to prime the cache.
-                LocalRustdocProvider::list_workspace_crates().ok();
-
                 indexed_docs_registry.register_provider(Box::new(LocalRustdocProvider::new(
                     fs,
                     cargo_workspace_root,
@@ -232,50 +229,26 @@ impl SlashCommand for DocsSlashCommand {
                         drop(store.clone().index(package.as_str().into()));
                     }
 
-                    let items = store.search(package).await;
-
-                    if provider == LocalRustdocProvider::id() {
-                        let items = build_completions(provider.clone(), items);
-                        let workspace_crates = LocalRustdocProvider::list_workspace_crates()?;
-
-                        let mut all_items = items;
-                        let workspace_crate_completions = workspace_crates
-                            .into_iter()
-                            .filter(|crate_name| {
-                                !all_items
-                                    .iter()
-                                    .any(|item| item.label.as_str() == crate_name.as_ref())
-                            })
-                            .map(|crate_name| ArgumentCompletion {
-                                label: format!("{crate_name} (unindexed)"),
-                                new_text: format!("{provider} {crate_name}"),
-                                run_command: true,
-                            })
-                            .collect::<Vec<_>>();
-                        all_items.extend(workspace_crate_completions);
-                        return Ok(all_items);
-                    }
+                    let suggested_packages = store.clone().suggest_packages().await?;
+                    let search_results = store.search(package).await;
 
-                    if items.is_empty() {
-                        if provider == DocsDotRsProvider::id() {
-                            return Ok(std::iter::once(ArgumentCompletion {
-                                label: format!(
-                                    "Enter a {package_term} name or try one of these:",
-                                    package_term = package_term(&provider)
-                                ),
-                                new_text: provider.to_string(),
-                                run_command: false,
-                            })
-                            .chain(DocsDotRsProvider::AUTO_SUGGESTED_CRATES.into_iter().map(
-                                |crate_name| ArgumentCompletion {
-                                    label: crate_name.to_string(),
-                                    new_text: format!("{provider} {crate_name}"),
-                                    run_command: true,
-                                },
-                            ))
-                            .collect());
-                        }
+                    let mut items = build_completions(provider.clone(), search_results);
+                    let workspace_crate_completions = suggested_packages
+                        .into_iter()
+                        .filter(|package_name| {
+                            !items
+                                .iter()
+                                .any(|item| item.label.as_str() == package_name.as_ref())
+                        })
+                        .map(|package_name| ArgumentCompletion {
+                            label: format!("{package_name} (unindexed)"),
+                            new_text: format!("{provider} {package_name}"),
+                            run_command: true,
+                        })
+                        .collect::<Vec<_>>();
+                    items.extend(workspace_crate_completions);
 
+                    if items.is_empty() {
                         return Ok(vec![ArgumentCompletion {
                             label: format!(
                                 "Enter a {package_term} name.",
@@ -286,7 +259,7 @@ impl SlashCommand for DocsSlashCommand {
                         }]);
                     }
 
-                    Ok(build_completions(provider, items))
+                    Ok(items)
                 }
                 DocsSlashCommandArgs::SearchItemDocs {
                     provider,

crates/extension/src/extension_indexed_docs_provider.rs 🔗

@@ -30,6 +30,10 @@ impl IndexedDocsProvider for ExtensionIndexedDocsProvider {
         database_path
     }
 
+    async fn suggest_packages(&self) -> Result<Vec<PackageName>> {
+        Ok(Vec::new())
+    }
+
     async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
         self.extension
             .call({

crates/indexed_docs/src/providers/rustdoc.rs 🔗

@@ -44,12 +44,20 @@ impl LocalRustdocProvider {
             cargo_workspace_root,
         }
     }
+}
+
+#[async_trait]
+impl IndexedDocsProvider for LocalRustdocProvider {
+    fn id(&self) -> ProviderId {
+        Self::id()
+    }
 
-    /// Returns the list of all crates in the Cargo workspace.
-    ///
-    /// Includes the list of workspace crates as well as all dependency crates.
-    pub fn list_workspace_crates() -> Result<Vec<Arc<str>>> {
-        static WORKSPACE_CRATES: LazyLock<RwLock<Option<(BTreeSet<Arc<str>>, Instant)>>> =
+    fn database_path(&self) -> PathBuf {
+        paths::support_dir().join("docs/rust/rustdoc-db.1.mdb")
+    }
+
+    async fn suggest_packages(&self) -> Result<Vec<PackageName>> {
+        static WORKSPACE_CRATES: LazyLock<RwLock<Option<(BTreeSet<PackageName>, Instant)>>> =
             LazyLock::new(|| RwLock::new(None));
 
         if let Some((crates, fetched_at)) = &*WORKSPACE_CRATES.read() {
@@ -59,30 +67,20 @@ impl LocalRustdocProvider {
         }
 
         let workspace = MetadataCommand::new()
+            .manifest_path(self.cargo_workspace_root.join("Cargo.toml"))
             .exec()
             .context("failed to load cargo metadata")?;
 
         let workspace_crates = workspace
             .packages
             .into_iter()
-            .map(|package| package.name.into())
+            .map(|package| PackageName::from(package.name.as_str()))
             .collect::<BTreeSet<_>>();
 
         *WORKSPACE_CRATES.write() = Some((workspace_crates.clone(), Instant::now()));
 
         Ok(workspace_crates.iter().cloned().collect())
     }
-}
-
-#[async_trait]
-impl IndexedDocsProvider for LocalRustdocProvider {
-    fn id(&self) -> ProviderId {
-        Self::id()
-    }
-
-    fn database_path(&self) -> PathBuf {
-        paths::support_dir().join("docs/rust/rustdoc-db.1.mdb")
-    }
 
     async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
         index_rustdoc(package, database, {
@@ -130,26 +128,6 @@ pub struct DocsDotRsProvider {
 }
 
 impl DocsDotRsProvider {
-    /// The list of crates to auto-suggest for the docs.rs provider when
-    /// the index is empty.
-    ///
-    /// List has been chosen loosely based on [this list](https://lib.rs/std) of
-    /// popular Rust libraries.
-    ///
-    /// Keep this alphabetized.
-    pub const AUTO_SUGGESTED_CRATES: &'static [&'static str] = &[
-        "anyhow",
-        "axum",
-        "chrono",
-        "itertools",
-        "rand",
-        "regex",
-        "serde",
-        "strum",
-        "thiserror",
-        "tokio",
-    ];
-
     pub fn id() -> ProviderId {
         ProviderId("docs-rs".into())
     }
@@ -169,6 +147,18 @@ impl IndexedDocsProvider for DocsDotRsProvider {
         paths::support_dir().join("docs/rust/docs-rs-db.1.mdb")
     }
 
+    async fn suggest_packages(&self) -> Result<Vec<PackageName>> {
+        static POPULAR_CRATES: LazyLock<Vec<PackageName>> = LazyLock::new(|| {
+            include_str!("./rustdoc/popular_crates.txt")
+                .lines()
+                .filter(|line| !line.starts_with('#'))
+                .map(|line| PackageName::from(line.trim()))
+                .collect()
+        });
+
+        Ok(POPULAR_CRATES.clone())
+    }
+
     async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
         index_rustdoc(package, database, {
             move |crate_name, item| {

crates/indexed_docs/src/providers/rustdoc/popular_crates.txt 🔗

@@ -0,0 +1,252 @@
+# A list of the most popular Rust crates.
+# Sourced from https://lib.rs/std.
+serde
+serde_json
+syn
+clap
+thiserror
+rand
+log
+tokio
+anyhow
+regex
+quote
+proc-macro2
+base64
+itertools
+chrono
+lazy_static
+once_cell
+libc
+reqwest
+futures
+bitflags
+tracing
+url
+bytes
+toml
+tempfile
+uuid
+indexmap
+env_logger
+num-traits
+async-trait
+sha2
+hex
+tracing-subscriber
+http
+parking_lot
+cfg-if
+futures-util
+cc
+hashbrown
+rayon
+hyper
+getrandom
+semver
+strum
+flate2
+tokio-util
+smallvec
+criterion
+paste
+heck
+rand_core
+nom
+rustls
+nix
+glob
+time
+byteorder
+strum_macros
+serde_yaml
+wasm-bindgen
+ahash
+either
+num_cpus
+rand_chacha
+prost
+percent-encoding
+pin-project-lite
+tokio-stream
+bincode
+walkdir
+bindgen
+axum
+windows-sys
+futures-core
+ring
+digest
+num-bigint
+rustls-pemfile
+serde_with
+crossbeam-channel
+tokio-rustls
+hmac
+fastrand
+dirs
+zeroize
+socket2
+pin-project
+tower
+derive_more
+memchr
+toml_edit
+static_assertions
+pretty_assertions
+js-sys
+convert_case
+unicode-width
+pkg-config
+itoa
+colored
+rustc-hash
+darling
+mime
+web-sys
+image
+bytemuck
+which
+sha1
+dashmap
+arrayvec
+fnv
+tonic
+humantime
+libloading
+winapi
+rustc_version
+http-body
+indoc
+num
+home
+serde_urlencoded
+http-body-util
+unicode-segmentation
+num-integer
+webpki-roots
+phf
+futures-channel
+indicatif
+petgraph
+ordered-float
+strsim
+zstd
+console
+encoding_rs
+wasm-bindgen-futures
+urlencoding
+subtle
+crc32fast
+slab
+rustix
+predicates
+spin
+hyper-rustls
+backtrace
+rustversion
+mio
+scopeguard
+proc-macro-error
+hyper-util
+ryu
+prost-types
+textwrap
+memmap2
+zip
+zerocopy
+generic-array
+tar
+pyo3
+async-stream
+quick-xml
+memoffset
+csv
+crossterm
+windows
+num_enum
+tokio-tungstenite
+crossbeam-utils
+async-channel
+lru
+aes
+futures-lite
+tracing-core
+prettyplease
+httparse
+serde_bytes
+tracing-log
+tower-service
+cargo_metadata
+pest
+mime_guess
+tower-http
+data-encoding
+native-tls
+prost-build
+proptest
+derivative
+serial_test
+libm
+half
+futures-io
+bitvec
+rustls-native-certs
+ureq
+object
+anstyle
+tonic-build
+form_urlencoded
+num-derive
+pest_derive
+schemars
+proc-macro-crate
+rstest
+futures-executor
+assert_cmd
+termcolor
+serde_repr
+ctrlc
+sha3
+clap_complete
+flume
+mockall
+ipnet
+aho-corasick
+atty
+signal-hook
+async-std
+filetime
+num-complex
+opentelemetry
+cmake
+arc-swap
+derive_builder
+async-recursion
+dyn-clone
+bumpalo
+fs_extra
+git2
+sysinfo
+shlex
+instant
+approx
+rmp-serde
+rand_distr
+rustls-pki-types
+maplit
+sqlx
+blake3
+hyper-tls
+dotenvy
+jsonwebtoken
+openssl-sys
+crossbeam
+camino
+winreg
+config
+rsa
+bit-vec
+chrono-tz
+async-lock
+bstr

crates/indexed_docs/src/store.rs 🔗

@@ -39,6 +39,13 @@ pub trait IndexedDocsProvider {
     /// Returns the path to the database for this provider.
     fn database_path(&self) -> PathBuf;
 
+    /// Returns a list of packages as suggestions to be included in the search
+    /// results.
+    ///
+    /// This can be used to provide completions for known packages (e.g., from the
+    /// local project or a registry) before a package has been indexed.
+    async fn suggest_packages(&self) -> Result<Vec<PackageName>>;
+
     /// Indexes the package with the given name.
     async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()>;
 }
@@ -122,6 +129,12 @@ impl IndexedDocsStore {
             .await
     }
 
+    pub fn suggest_packages(self: Arc<Self>) -> Task<Result<Vec<PackageName>>> {
+        let this = self.clone();
+        self.executor
+            .spawn(async move { this.provider.suggest_packages().await })
+    }
+
     pub fn index(
         self: Arc<Self>,
         package: PackageName,

typos.toml 🔗

@@ -12,6 +12,9 @@ extend-exclude = [
     "crates/google_ai/src/supported_countries.rs",
     "crates/open_ai/src/supported_countries.rs",
 
+    # Some crate names are flagged as typos.
+    "crates/indexed_docs/src/providers/rustdoc/popular_crates.txt",
+
     # Stripe IDs are flagged as typos.
     "crates/collab/src/db/tests/processed_stripe_event_tests.rs",
     # Not our typos