rustdoc.rs

  1mod item;
  2mod to_markdown;
  3
  4use cargo_metadata::MetadataCommand;
  5use futures::future::BoxFuture;
  6pub use item::*;
  7use parking_lot::RwLock;
  8pub use to_markdown::convert_rustdoc_to_markdown;
  9
 10use std::collections::BTreeSet;
 11use std::path::PathBuf;
 12use std::sync::{Arc, LazyLock};
 13use std::time::{Duration, Instant};
 14
 15use anyhow::{bail, Context, Result};
 16use async_trait::async_trait;
 17use collections::{HashSet, VecDeque};
 18use fs::Fs;
 19use futures::{AsyncReadExt, FutureExt};
 20use http_client::{AsyncBody, HttpClient, HttpClientWithUrl};
 21
 22use crate::{IndexedDocsDatabase, IndexedDocsProvider, PackageName, ProviderId};
 23
 24#[derive(Debug)]
 25struct RustdocItemWithHistory {
 26    pub item: RustdocItem,
 27    #[cfg(debug_assertions)]
 28    pub history: Vec<String>,
 29}
 30
 31pub struct LocalRustdocProvider {
 32    fs: Arc<dyn Fs>,
 33    cargo_workspace_root: PathBuf,
 34}
 35
 36impl LocalRustdocProvider {
 37    pub fn id() -> ProviderId {
 38        ProviderId("rustdoc".into())
 39    }
 40
 41    pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
 42        Self {
 43            fs,
 44            cargo_workspace_root,
 45        }
 46    }
 47
 48    /// Returns the list of all crates in the Cargo workspace.
 49    ///
 50    /// Includes the list of workspace crates as well as all dependency crates.
 51    pub fn list_workspace_crates() -> Result<Vec<Arc<str>>> {
 52        static WORKSPACE_CRATES: LazyLock<RwLock<Option<(BTreeSet<Arc<str>>, Instant)>>> =
 53            LazyLock::new(|| RwLock::new(None));
 54
 55        if let Some((crates, fetched_at)) = &*WORKSPACE_CRATES.read() {
 56            if fetched_at.elapsed() < Duration::from_secs(300) {
 57                return Ok(crates.iter().cloned().collect());
 58            }
 59        }
 60
 61        let workspace = MetadataCommand::new()
 62            .exec()
 63            .context("failed to load cargo metadata")?;
 64
 65        let workspace_crates = workspace
 66            .packages
 67            .into_iter()
 68            .map(|package| package.name.into())
 69            .collect::<BTreeSet<_>>();
 70
 71        *WORKSPACE_CRATES.write() = Some((workspace_crates.clone(), Instant::now()));
 72
 73        Ok(workspace_crates.iter().cloned().collect())
 74    }
 75}
 76
 77#[async_trait]
 78impl IndexedDocsProvider for LocalRustdocProvider {
 79    fn id(&self) -> ProviderId {
 80        Self::id()
 81    }
 82
 83    fn database_path(&self) -> PathBuf {
 84        paths::support_dir().join("docs/rust/rustdoc-db.1.mdb")
 85    }
 86
 87    async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
 88        index_rustdoc(package, database, {
 89            move |crate_name, item| {
 90                let fs = self.fs.clone();
 91                let cargo_workspace_root = self.cargo_workspace_root.clone();
 92                let crate_name = crate_name.clone();
 93                let item = item.cloned();
 94                async move {
 95                    let target_doc_path = cargo_workspace_root.join("target/doc");
 96                    let mut local_cargo_doc_path = target_doc_path.join(crate_name.as_ref());
 97
 98                    if !fs.is_dir(&local_cargo_doc_path).await {
 99                        let cargo_doc_exists_at_all = fs.is_dir(&target_doc_path).await;
100                        if cargo_doc_exists_at_all {
101                            bail!(
102                                "no docs directory for '{crate_name}'. if this is a valid crate name, try running `cargo doc`"
103                            );
104                        } else {
105                            bail!("no cargo doc directory. run `cargo doc`");
106                        }
107                    }
108
109                    if let Some(item) = item {
110                        local_cargo_doc_path.push(item.url_path());
111                    } else {
112                        local_cargo_doc_path.push("index.html");
113                    }
114
115                    let Ok(contents) = fs.load(&local_cargo_doc_path).await else {
116                        return Ok(None);
117                    };
118
119                    Ok(Some(contents))
120                }
121                .boxed()
122            }
123        })
124        .await
125    }
126}
127
128pub struct DocsDotRsProvider {
129    http_client: Arc<HttpClientWithUrl>,
130}
131
132impl DocsDotRsProvider {
133    /// The list of crates to auto-suggest for the docs.rs provider when
134    /// the index is empty.
135    ///
136    /// List has been chosen loosely based on [this list](https://lib.rs/std) of
137    /// popular Rust libraries.
138    ///
139    /// Keep this alphabetized.
140    pub const AUTO_SUGGESTED_CRATES: &'static [&'static str] = &[
141        "anyhow",
142        "axum",
143        "chrono",
144        "itertools",
145        "rand",
146        "regex",
147        "serde",
148        "strum",
149        "thiserror",
150        "tokio",
151    ];
152
153    pub fn id() -> ProviderId {
154        ProviderId("docs-rs".into())
155    }
156
157    pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
158        Self { http_client }
159    }
160}
161
162#[async_trait]
163impl IndexedDocsProvider for DocsDotRsProvider {
164    fn id(&self) -> ProviderId {
165        Self::id()
166    }
167
168    fn database_path(&self) -> PathBuf {
169        paths::support_dir().join("docs/rust/docs-rs-db.1.mdb")
170    }
171
172    async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
173        index_rustdoc(package, database, {
174            move |crate_name, item| {
175                let http_client = self.http_client.clone();
176                let crate_name = crate_name.clone();
177                let item = item.cloned();
178                async move {
179                    let version = "latest";
180                    let path = format!(
181                        "{crate_name}/{version}/{crate_name}{item_path}",
182                        item_path = item
183                            .map(|item| format!("/{}", item.url_path()))
184                            .unwrap_or_default()
185                    );
186
187                    let mut response = http_client
188                        .get(
189                            &format!("https://docs.rs/{path}"),
190                            AsyncBody::default(),
191                            true,
192                        )
193                        .await?;
194
195                    let mut body = Vec::new();
196                    response
197                        .body_mut()
198                        .read_to_end(&mut body)
199                        .await
200                        .context("error reading docs.rs response body")?;
201
202                    if response.status().is_client_error() {
203                        let text = String::from_utf8_lossy(body.as_slice());
204                        bail!(
205                            "status error {}, response: {text:?}",
206                            response.status().as_u16()
207                        );
208                    }
209
210                    Ok(Some(String::from_utf8(body)?))
211                }
212                .boxed()
213            }
214        })
215        .await
216    }
217}
218
219async fn index_rustdoc(
220    package: PackageName,
221    database: Arc<IndexedDocsDatabase>,
222    fetch_page: impl Fn(&PackageName, Option<&RustdocItem>) -> BoxFuture<'static, Result<Option<String>>>
223        + Send
224        + Sync,
225) -> Result<()> {
226    let Some(package_root_content) = fetch_page(&package, None).await? else {
227        return Ok(());
228    };
229
230    let (crate_root_markdown, items) =
231        convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
232
233    database
234        .insert(package.to_string(), crate_root_markdown)
235        .await?;
236
237    let mut seen_items = HashSet::from_iter(items.clone());
238    let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
239        VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
240            item,
241            #[cfg(debug_assertions)]
242            history: Vec::new(),
243        }));
244
245    while let Some(item_with_history) = items_to_visit.pop_front() {
246        let item = &item_with_history.item;
247
248        let Some(result) = fetch_page(&package, Some(&item)).await.with_context(|| {
249            #[cfg(debug_assertions)]
250            {
251                format!(
252                    "failed to fetch {item:?}: {history:?}",
253                    history = item_with_history.history
254                )
255            }
256
257            #[cfg(not(debug_assertions))]
258            {
259                format!("failed to fetch {item:?}")
260            }
261        })?
262        else {
263            continue;
264        };
265
266        let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
267
268        database
269            .insert(format!("{package}::{}", item.display()), markdown)
270            .await?;
271
272        let parent_item = item;
273        for mut item in referenced_items {
274            if seen_items.contains(&item) {
275                continue;
276            }
277
278            seen_items.insert(item.clone());
279
280            item.path.extend(parent_item.path.clone());
281            match parent_item.kind {
282                RustdocItemKind::Mod => {
283                    item.path.push(parent_item.name.clone());
284                }
285                _ => {}
286            }
287
288            items_to_visit.push_back(RustdocItemWithHistory {
289                #[cfg(debug_assertions)]
290                history: {
291                    let mut history = item_with_history.history.clone();
292                    history.push(item.url_path());
293                    history
294                },
295                item,
296            });
297        }
298    }
299
300    Ok(())
301}