rustdoc.rs

  1mod item;
  2mod to_markdown;
  3
  4use cargo_metadata::MetadataCommand;
  5use futures::future::BoxFuture;
  6pub use item::*;
  7use parking_lot::RwLock;
  8pub use to_markdown::convert_rustdoc_to_markdown;
  9
 10use std::collections::BTreeSet;
 11use std::path::PathBuf;
 12use std::sync::{Arc, LazyLock};
 13use std::time::{Duration, Instant};
 14
 15use anyhow::{Context as _, Result, bail};
 16use async_trait::async_trait;
 17use collections::{HashSet, VecDeque};
 18use fs::Fs;
 19use futures::{AsyncReadExt, FutureExt};
 20use http_client::{AsyncBody, HttpClient, HttpClientWithUrl};
 21
 22use crate::{IndexedDocsDatabase, IndexedDocsProvider, PackageName, ProviderId};
 23
 24#[derive(Debug)]
 25struct RustdocItemWithHistory {
 26    pub item: RustdocItem,
 27    #[cfg(debug_assertions)]
 28    pub history: Vec<String>,
 29}
 30
 31pub struct LocalRustdocProvider {
 32    fs: Arc<dyn Fs>,
 33    cargo_workspace_root: PathBuf,
 34}
 35
 36impl LocalRustdocProvider {
 37    pub fn id() -> ProviderId {
 38        ProviderId("rustdoc".into())
 39    }
 40
 41    pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
 42        Self {
 43            fs,
 44            cargo_workspace_root,
 45        }
 46    }
 47}
 48
 49#[async_trait]
 50impl IndexedDocsProvider for LocalRustdocProvider {
 51    fn id(&self) -> ProviderId {
 52        Self::id()
 53    }
 54
 55    fn database_path(&self) -> PathBuf {
 56        paths::data_dir().join("docs/rust/rustdoc-db.1.mdb")
 57    }
 58
 59    async fn suggest_packages(&self) -> Result<Vec<PackageName>> {
 60        static WORKSPACE_CRATES: LazyLock<RwLock<Option<(BTreeSet<PackageName>, Instant)>>> =
 61            LazyLock::new(|| RwLock::new(None));
 62
 63        if let Some((crates, fetched_at)) = &*WORKSPACE_CRATES.read() {
 64            if fetched_at.elapsed() < Duration::from_secs(300) {
 65                return Ok(crates.iter().cloned().collect());
 66            }
 67        }
 68
 69        let workspace = MetadataCommand::new()
 70            .manifest_path(self.cargo_workspace_root.join("Cargo.toml"))
 71            .exec()
 72            .context("failed to load cargo metadata")?;
 73
 74        let workspace_crates = workspace
 75            .packages
 76            .into_iter()
 77            .map(|package| PackageName::from(package.name.as_str()))
 78            .collect::<BTreeSet<_>>();
 79
 80        *WORKSPACE_CRATES.write() = Some((workspace_crates.clone(), Instant::now()));
 81
 82        Ok(workspace_crates.into_iter().collect())
 83    }
 84
 85    async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
 86        index_rustdoc(package, database, {
 87            move |crate_name, item| {
 88                let fs = self.fs.clone();
 89                let cargo_workspace_root = self.cargo_workspace_root.clone();
 90                let crate_name = crate_name.clone();
 91                let item = item.cloned();
 92                async move {
 93                    let target_doc_path = cargo_workspace_root.join("target/doc");
 94                    let mut local_cargo_doc_path = target_doc_path.join(crate_name.as_ref().replace('-', "_"));
 95
 96                    if !fs.is_dir(&local_cargo_doc_path).await {
 97                        let cargo_doc_exists_at_all = fs.is_dir(&target_doc_path).await;
 98                        if cargo_doc_exists_at_all {
 99                            bail!(
100                                "no docs directory for '{crate_name}'. if this is a valid crate name, try running `cargo doc`"
101                            );
102                        } else {
103                            bail!("no cargo doc directory. run `cargo doc`");
104                        }
105                    }
106
107                    if let Some(item) = item {
108                        local_cargo_doc_path.push(item.url_path());
109                    } else {
110                        local_cargo_doc_path.push("index.html");
111                    }
112
113                    let Ok(contents) = fs.load(&local_cargo_doc_path).await else {
114                        return Ok(None);
115                    };
116
117                    Ok(Some(contents))
118                }
119                .boxed()
120            }
121        })
122        .await
123    }
124}
125
126pub struct DocsDotRsProvider {
127    http_client: Arc<HttpClientWithUrl>,
128}
129
130impl DocsDotRsProvider {
131    pub fn id() -> ProviderId {
132        ProviderId("docs-rs".into())
133    }
134
135    pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
136        Self { http_client }
137    }
138}
139
140#[async_trait]
141impl IndexedDocsProvider for DocsDotRsProvider {
142    fn id(&self) -> ProviderId {
143        Self::id()
144    }
145
146    fn database_path(&self) -> PathBuf {
147        paths::data_dir().join("docs/rust/docs-rs-db.1.mdb")
148    }
149
150    async fn suggest_packages(&self) -> Result<Vec<PackageName>> {
151        static POPULAR_CRATES: LazyLock<Vec<PackageName>> = LazyLock::new(|| {
152            include_str!("./rustdoc/popular_crates.txt")
153                .lines()
154                .filter(|line| !line.starts_with('#'))
155                .map(|line| PackageName::from(line.trim()))
156                .collect()
157        });
158
159        Ok(POPULAR_CRATES.clone())
160    }
161
162    async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
163        index_rustdoc(package, database, {
164            move |crate_name, item| {
165                let http_client = self.http_client.clone();
166                let crate_name = crate_name.clone();
167                let item = item.cloned();
168                async move {
169                    let version = "latest";
170                    let path = format!(
171                        "{crate_name}/{version}/{crate_name}{item_path}",
172                        item_path = item
173                            .map(|item| format!("/{}", item.url_path()))
174                            .unwrap_or_default()
175                    );
176
177                    let mut response = http_client
178                        .get(
179                            &format!("https://docs.rs/{path}"),
180                            AsyncBody::default(),
181                            true,
182                        )
183                        .await?;
184
185                    let mut body = Vec::new();
186                    response
187                        .body_mut()
188                        .read_to_end(&mut body)
189                        .await
190                        .context("error reading docs.rs response body")?;
191
192                    if response.status().is_client_error() {
193                        let text = String::from_utf8_lossy(body.as_slice());
194                        bail!(
195                            "status error {}, response: {text:?}",
196                            response.status().as_u16()
197                        );
198                    }
199
200                    Ok(Some(String::from_utf8(body)?))
201                }
202                .boxed()
203            }
204        })
205        .await
206    }
207}
208
209async fn index_rustdoc(
210    package: PackageName,
211    database: Arc<IndexedDocsDatabase>,
212    fetch_page: impl Fn(
213        &PackageName,
214        Option<&RustdocItem>,
215    ) -> BoxFuture<'static, Result<Option<String>>>
216    + Send
217    + Sync,
218) -> Result<()> {
219    let Some(package_root_content) = fetch_page(&package, None).await? else {
220        return Ok(());
221    };
222
223    let (crate_root_markdown, items) =
224        convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
225
226    database
227        .insert(package.to_string(), crate_root_markdown)
228        .await?;
229
230    let mut seen_items = HashSet::from_iter(items.clone());
231    let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
232        VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
233            item,
234            #[cfg(debug_assertions)]
235            history: Vec::new(),
236        }));
237
238    while let Some(item_with_history) = items_to_visit.pop_front() {
239        let item = &item_with_history.item;
240
241        let Some(result) = fetch_page(&package, Some(item)).await.with_context(|| {
242            #[cfg(debug_assertions)]
243            {
244                format!(
245                    "failed to fetch {item:?}: {history:?}",
246                    history = item_with_history.history
247                )
248            }
249
250            #[cfg(not(debug_assertions))]
251            {
252                format!("failed to fetch {item:?}")
253            }
254        })?
255        else {
256            continue;
257        };
258
259        let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
260
261        database
262            .insert(format!("{package}::{}", item.display()), markdown)
263            .await?;
264
265        let parent_item = item;
266        for mut item in referenced_items {
267            if seen_items.contains(&item) {
268                continue;
269            }
270
271            seen_items.insert(item.clone());
272
273            item.path.extend(parent_item.path.clone());
274            if parent_item.kind == RustdocItemKind::Mod {
275                item.path.push(parent_item.name.clone());
276            }
277
278            items_to_visit.push_back(RustdocItemWithHistory {
279                #[cfg(debug_assertions)]
280                history: {
281                    let mut history = item_with_history.history.clone();
282                    history.push(item.url_path());
283                    history
284                },
285                item,
286            });
287        }
288    }
289
290    Ok(())
291}