rustdoc.rs

  1mod item;
  2mod to_markdown;
  3
  4use futures::future::BoxFuture;
  5pub use item::*;
  6pub use to_markdown::convert_rustdoc_to_markdown;
  7
  8use std::path::PathBuf;
  9use std::sync::Arc;
 10
 11use anyhow::{bail, Context, Result};
 12use async_trait::async_trait;
 13use collections::{HashSet, VecDeque};
 14use fs::Fs;
 15use futures::{AsyncReadExt, FutureExt};
 16use http::{AsyncBody, HttpClient, HttpClientWithUrl};
 17
 18use crate::{IndexedDocsDatabase, IndexedDocsProvider, PackageName, ProviderId};
 19
 20#[derive(Debug)]
 21struct RustdocItemWithHistory {
 22    pub item: RustdocItem,
 23    #[cfg(debug_assertions)]
 24    pub history: Vec<String>,
 25}
 26
 27pub struct LocalRustdocProvider {
 28    fs: Arc<dyn Fs>,
 29    cargo_workspace_root: PathBuf,
 30}
 31
 32impl LocalRustdocProvider {
 33    pub fn id() -> ProviderId {
 34        ProviderId("rustdoc".into())
 35    }
 36
 37    pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
 38        Self {
 39            fs,
 40            cargo_workspace_root,
 41        }
 42    }
 43}
 44
 45#[async_trait]
 46impl IndexedDocsProvider for LocalRustdocProvider {
 47    fn id(&self) -> ProviderId {
 48        Self::id()
 49    }
 50
 51    fn database_path(&self) -> PathBuf {
 52        paths::support_dir().join("docs/rust/rustdoc-db.1.mdb")
 53    }
 54
 55    async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
 56        index_rustdoc(package, database, {
 57            move |crate_name, item| {
 58                let fs = self.fs.clone();
 59                let cargo_workspace_root = self.cargo_workspace_root.clone();
 60                let crate_name = crate_name.clone();
 61                let item = item.cloned();
 62                async move {
 63                    let target_doc_path = cargo_workspace_root.join("target/doc");
 64                    let mut local_cargo_doc_path = target_doc_path.join(crate_name.as_ref());
 65
 66                    if !fs.is_dir(&local_cargo_doc_path).await {
 67                        let cargo_doc_exists_at_all = fs.is_dir(&target_doc_path).await;
 68                        if cargo_doc_exists_at_all {
 69                            bail!(
 70                                "no docs directory for '{crate_name}'. if this is a valid crate name, try running `cargo doc`"
 71                            );
 72                        } else {
 73                            bail!("no cargo doc directory. run `cargo doc`");
 74                        }
 75                    }
 76
 77                    if let Some(item) = item {
 78                        local_cargo_doc_path.push(item.url_path());
 79                    } else {
 80                        local_cargo_doc_path.push("index.html");
 81                    }
 82
 83                    let Ok(contents) = fs.load(&local_cargo_doc_path).await else {
 84                        return Ok(None);
 85                    };
 86
 87                    Ok(Some(contents))
 88                }
 89                .boxed()
 90            }
 91        })
 92        .await
 93    }
 94}
 95
 96pub struct DocsDotRsProvider {
 97    http_client: Arc<HttpClientWithUrl>,
 98}
 99
100impl DocsDotRsProvider {
101    pub fn id() -> ProviderId {
102        ProviderId("docs-rs".into())
103    }
104
105    pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
106        Self { http_client }
107    }
108}
109
110#[async_trait]
111impl IndexedDocsProvider for DocsDotRsProvider {
112    fn id(&self) -> ProviderId {
113        Self::id()
114    }
115
116    fn database_path(&self) -> PathBuf {
117        paths::support_dir().join("docs/rust/docs-rs-db.1.mdb")
118    }
119
120    async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
121        index_rustdoc(package, database, {
122            move |crate_name, item| {
123                let http_client = self.http_client.clone();
124                let crate_name = crate_name.clone();
125                let item = item.cloned();
126                async move {
127                    let version = "latest";
128                    let path = format!(
129                        "{crate_name}/{version}/{crate_name}{item_path}",
130                        item_path = item
131                            .map(|item| format!("/{}", item.url_path()))
132                            .unwrap_or_default()
133                    );
134
135                    let mut response = http_client
136                        .get(
137                            &format!("https://docs.rs/{path}"),
138                            AsyncBody::default(),
139                            true,
140                        )
141                        .await?;
142
143                    let mut body = Vec::new();
144                    response
145                        .body_mut()
146                        .read_to_end(&mut body)
147                        .await
148                        .context("error reading docs.rs response body")?;
149
150                    if response.status().is_client_error() {
151                        let text = String::from_utf8_lossy(body.as_slice());
152                        bail!(
153                            "status error {}, response: {text:?}",
154                            response.status().as_u16()
155                        );
156                    }
157
158                    Ok(Some(String::from_utf8(body)?))
159                }
160                .boxed()
161            }
162        })
163        .await
164    }
165}
166
167async fn index_rustdoc(
168    package: PackageName,
169    database: Arc<IndexedDocsDatabase>,
170    fetch_page: impl Fn(&PackageName, Option<&RustdocItem>) -> BoxFuture<'static, Result<Option<String>>>
171        + Send
172        + Sync,
173) -> Result<()> {
174    let Some(package_root_content) = fetch_page(&package, None).await? else {
175        return Ok(());
176    };
177
178    let (crate_root_markdown, items) =
179        convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
180
181    database
182        .insert(package.to_string(), crate_root_markdown)
183        .await?;
184
185    let mut seen_items = HashSet::from_iter(items.clone());
186    let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
187        VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
188            item,
189            #[cfg(debug_assertions)]
190            history: Vec::new(),
191        }));
192
193    while let Some(item_with_history) = items_to_visit.pop_front() {
194        let item = &item_with_history.item;
195
196        let Some(result) = fetch_page(&package, Some(&item)).await.with_context(|| {
197            #[cfg(debug_assertions)]
198            {
199                format!(
200                    "failed to fetch {item:?}: {history:?}",
201                    history = item_with_history.history
202                )
203            }
204
205            #[cfg(not(debug_assertions))]
206            {
207                format!("failed to fetch {item:?}")
208            }
209        })?
210        else {
211            continue;
212        };
213
214        let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
215
216        database
217            .insert(format!("{package}::{}", item.display()), markdown)
218            .await?;
219
220        let parent_item = item;
221        for mut item in referenced_items {
222            if seen_items.contains(&item) {
223                continue;
224            }
225
226            seen_items.insert(item.clone());
227
228            item.path.extend(parent_item.path.clone());
229            match parent_item.kind {
230                RustdocItemKind::Mod => {
231                    item.path.push(parent_item.name.clone());
232                }
233                _ => {}
234            }
235
236            items_to_visit.push_back(RustdocItemWithHistory {
237                #[cfg(debug_assertions)]
238                history: {
239                    let mut history = item_with_history.history.clone();
240                    history.push(item.url_path());
241                    history
242                },
243                item,
244            });
245        }
246    }
247
248    Ok(())
249}