rustdoc.rs

  1mod item;
  2mod to_markdown;
  3
  4use cargo_metadata::MetadataCommand;
  5use extension_host::DocsDatabase;
  6use futures::future::BoxFuture;
  7pub use item::*;
  8use parking_lot::RwLock;
  9pub use to_markdown::convert_rustdoc_to_markdown;
 10
 11use std::collections::BTreeSet;
 12use std::path::PathBuf;
 13use std::sync::{Arc, LazyLock};
 14use std::time::{Duration, Instant};
 15
 16use anyhow::{bail, Context, Result};
 17use async_trait::async_trait;
 18use collections::{HashSet, VecDeque};
 19use fs::Fs;
 20use futures::{AsyncReadExt, FutureExt};
 21use http_client::{AsyncBody, HttpClient, HttpClientWithUrl};
 22
 23use crate::{IndexedDocsDatabase, IndexedDocsProvider, PackageName, ProviderId};
 24
 25#[derive(Debug)]
 26struct RustdocItemWithHistory {
 27    pub item: RustdocItem,
 28    #[cfg(debug_assertions)]
 29    pub history: Vec<String>,
 30}
 31
 32pub struct LocalRustdocProvider {
 33    fs: Arc<dyn Fs>,
 34    cargo_workspace_root: PathBuf,
 35}
 36
 37impl LocalRustdocProvider {
 38    pub fn id() -> ProviderId {
 39        ProviderId("rustdoc".into())
 40    }
 41
 42    pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
 43        Self {
 44            fs,
 45            cargo_workspace_root,
 46        }
 47    }
 48}
 49
 50#[async_trait]
 51impl IndexedDocsProvider for LocalRustdocProvider {
 52    fn id(&self) -> ProviderId {
 53        Self::id()
 54    }
 55
 56    fn database_path(&self) -> PathBuf {
 57        paths::support_dir().join("docs/rust/rustdoc-db.1.mdb")
 58    }
 59
 60    async fn suggest_packages(&self) -> Result<Vec<PackageName>> {
 61        static WORKSPACE_CRATES: LazyLock<RwLock<Option<(BTreeSet<PackageName>, Instant)>>> =
 62            LazyLock::new(|| RwLock::new(None));
 63
 64        if let Some((crates, fetched_at)) = &*WORKSPACE_CRATES.read() {
 65            if fetched_at.elapsed() < Duration::from_secs(300) {
 66                return Ok(crates.iter().cloned().collect());
 67            }
 68        }
 69
 70        let workspace = MetadataCommand::new()
 71            .manifest_path(self.cargo_workspace_root.join("Cargo.toml"))
 72            .exec()
 73            .context("failed to load cargo metadata")?;
 74
 75        let workspace_crates = workspace
 76            .packages
 77            .into_iter()
 78            .map(|package| PackageName::from(package.name.as_str()))
 79            .collect::<BTreeSet<_>>();
 80
 81        *WORKSPACE_CRATES.write() = Some((workspace_crates.clone(), Instant::now()));
 82
 83        Ok(workspace_crates.iter().cloned().collect())
 84    }
 85
 86    async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
 87        index_rustdoc(package, database, {
 88            move |crate_name, item| {
 89                let fs = self.fs.clone();
 90                let cargo_workspace_root = self.cargo_workspace_root.clone();
 91                let crate_name = crate_name.clone();
 92                let item = item.cloned();
 93                async move {
 94                    let target_doc_path = cargo_workspace_root.join("target/doc");
 95                    let mut local_cargo_doc_path = target_doc_path.join(crate_name.as_ref().replace('-', "_"));
 96
 97                    if !fs.is_dir(&local_cargo_doc_path).await {
 98                        let cargo_doc_exists_at_all = fs.is_dir(&target_doc_path).await;
 99                        if cargo_doc_exists_at_all {
100                            bail!(
101                                "no docs directory for '{crate_name}'. if this is a valid crate name, try running `cargo doc`"
102                            );
103                        } else {
104                            bail!("no cargo doc directory. run `cargo doc`");
105                        }
106                    }
107
108                    if let Some(item) = item {
109                        local_cargo_doc_path.push(item.url_path());
110                    } else {
111                        local_cargo_doc_path.push("index.html");
112                    }
113
114                    let Ok(contents) = fs.load(&local_cargo_doc_path).await else {
115                        return Ok(None);
116                    };
117
118                    Ok(Some(contents))
119                }
120                .boxed()
121            }
122        })
123        .await
124    }
125}
126
127pub struct DocsDotRsProvider {
128    http_client: Arc<HttpClientWithUrl>,
129}
130
131impl DocsDotRsProvider {
132    pub fn id() -> ProviderId {
133        ProviderId("docs-rs".into())
134    }
135
136    pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
137        Self { http_client }
138    }
139}
140
141#[async_trait]
142impl IndexedDocsProvider for DocsDotRsProvider {
143    fn id(&self) -> ProviderId {
144        Self::id()
145    }
146
147    fn database_path(&self) -> PathBuf {
148        paths::support_dir().join("docs/rust/docs-rs-db.1.mdb")
149    }
150
151    async fn suggest_packages(&self) -> Result<Vec<PackageName>> {
152        static POPULAR_CRATES: LazyLock<Vec<PackageName>> = LazyLock::new(|| {
153            include_str!("./rustdoc/popular_crates.txt")
154                .lines()
155                .filter(|line| !line.starts_with('#'))
156                .map(|line| PackageName::from(line.trim()))
157                .collect()
158        });
159
160        Ok(POPULAR_CRATES.clone())
161    }
162
163    async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
164        index_rustdoc(package, database, {
165            move |crate_name, item| {
166                let http_client = self.http_client.clone();
167                let crate_name = crate_name.clone();
168                let item = item.cloned();
169                async move {
170                    let version = "latest";
171                    let path = format!(
172                        "{crate_name}/{version}/{crate_name}{item_path}",
173                        item_path = item
174                            .map(|item| format!("/{}", item.url_path()))
175                            .unwrap_or_default()
176                    );
177
178                    let mut response = http_client
179                        .get(
180                            &format!("https://docs.rs/{path}"),
181                            AsyncBody::default(),
182                            true,
183                        )
184                        .await?;
185
186                    let mut body = Vec::new();
187                    response
188                        .body_mut()
189                        .read_to_end(&mut body)
190                        .await
191                        .context("error reading docs.rs response body")?;
192
193                    if response.status().is_client_error() {
194                        let text = String::from_utf8_lossy(body.as_slice());
195                        bail!(
196                            "status error {}, response: {text:?}",
197                            response.status().as_u16()
198                        );
199                    }
200
201                    Ok(Some(String::from_utf8(body)?))
202                }
203                .boxed()
204            }
205        })
206        .await
207    }
208}
209
210async fn index_rustdoc(
211    package: PackageName,
212    database: Arc<dyn DocsDatabase>,
213    fetch_page: impl Fn(&PackageName, Option<&RustdocItem>) -> BoxFuture<'static, Result<Option<String>>>
214        + Send
215        + Sync,
216) -> Result<()> {
217    let Some(package_root_content) = fetch_page(&package, None).await? else {
218        return Ok(());
219    };
220
221    let (crate_root_markdown, items) =
222        convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
223
224    database
225        .insert(package.to_string(), crate_root_markdown)
226        .await?;
227
228    let mut seen_items = HashSet::from_iter(items.clone());
229    let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
230        VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
231            item,
232            #[cfg(debug_assertions)]
233            history: Vec::new(),
234        }));
235
236    while let Some(item_with_history) = items_to_visit.pop_front() {
237        let item = &item_with_history.item;
238
239        let Some(result) = fetch_page(&package, Some(item)).await.with_context(|| {
240            #[cfg(debug_assertions)]
241            {
242                format!(
243                    "failed to fetch {item:?}: {history:?}",
244                    history = item_with_history.history
245                )
246            }
247
248            #[cfg(not(debug_assertions))]
249            {
250                format!("failed to fetch {item:?}")
251            }
252        })?
253        else {
254            continue;
255        };
256
257        let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
258
259        database
260            .insert(format!("{package}::{}", item.display()), markdown)
261            .await?;
262
263        let parent_item = item;
264        for mut item in referenced_items {
265            if seen_items.contains(&item) {
266                continue;
267            }
268
269            seen_items.insert(item.clone());
270
271            item.path.extend(parent_item.path.clone());
272            if parent_item.kind == RustdocItemKind::Mod {
273                item.path.push(parent_item.name.clone());
274            }
275
276            items_to_visit.push_back(RustdocItemWithHistory {
277                #[cfg(debug_assertions)]
278                history: {
279                    let mut history = item_with_history.history.clone();
280                    history.push(item.url_path());
281                    history
282                },
283                item,
284            });
285        }
286    }
287
288    Ok(())
289}