rustdoc.rs

  1mod item;
  2mod to_markdown;
  3
  4pub use item::*;
  5pub use to_markdown::convert_rustdoc_to_markdown;
  6
  7use std::path::PathBuf;
  8use std::sync::Arc;
  9
 10use anyhow::{bail, Context, Result};
 11use async_trait::async_trait;
 12use collections::{HashSet, VecDeque};
 13use fs::Fs;
 14use futures::AsyncReadExt;
 15use http::{AsyncBody, HttpClient, HttpClientWithUrl};
 16
 17use crate::{IndexedDocsDatabase, IndexedDocsProvider, PackageName, ProviderId};
 18
 19#[derive(Debug)]
 20struct RustdocItemWithHistory {
 21    pub item: RustdocItem,
 22    #[cfg(debug_assertions)]
 23    pub history: Vec<String>,
 24}
 25
 26#[async_trait]
 27pub trait RustdocProvider {
 28    async fn fetch_page(
 29        &self,
 30        package: &PackageName,
 31        item: Option<&RustdocItem>,
 32    ) -> Result<Option<String>>;
 33}
 34
 35pub struct RustdocIndexer {
 36    provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
 37}
 38
 39impl RustdocIndexer {
 40    pub fn new(provider: Box<dyn RustdocProvider + Send + Sync + 'static>) -> Self {
 41        Self { provider }
 42    }
 43}
 44
 45#[async_trait]
 46impl IndexedDocsProvider for RustdocIndexer {
 47    fn id(&self) -> ProviderId {
 48        ProviderId::rustdoc()
 49    }
 50
 51    fn database_path(&self) -> PathBuf {
 52        paths::support_dir().join("docs/rust/rustdoc-db.1.mdb")
 53    }
 54
 55    async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
 56        let Some(package_root_content) = self.provider.fetch_page(&package, None).await? else {
 57            return Ok(());
 58        };
 59
 60        let (crate_root_markdown, items) =
 61            convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
 62
 63        database
 64            .insert(package.to_string(), crate_root_markdown)
 65            .await?;
 66
 67        let mut seen_items = HashSet::from_iter(items.clone());
 68        let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
 69            VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
 70                item,
 71                #[cfg(debug_assertions)]
 72                history: Vec::new(),
 73            }));
 74
 75        while let Some(item_with_history) = items_to_visit.pop_front() {
 76            let item = &item_with_history.item;
 77
 78            let Some(result) = self
 79                .provider
 80                .fetch_page(&package, Some(&item))
 81                .await
 82                .with_context(|| {
 83                    #[cfg(debug_assertions)]
 84                    {
 85                        format!(
 86                            "failed to fetch {item:?}: {history:?}",
 87                            history = item_with_history.history
 88                        )
 89                    }
 90
 91                    #[cfg(not(debug_assertions))]
 92                    {
 93                        format!("failed to fetch {item:?}")
 94                    }
 95                })?
 96            else {
 97                continue;
 98            };
 99
100            let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
101
102            database
103                .insert(format!("{package}::{}", item.display()), markdown)
104                .await?;
105
106            let parent_item = item;
107            for mut item in referenced_items {
108                if seen_items.contains(&item) {
109                    continue;
110                }
111
112                seen_items.insert(item.clone());
113
114                item.path.extend(parent_item.path.clone());
115                match parent_item.kind {
116                    RustdocItemKind::Mod => {
117                        item.path.push(parent_item.name.clone());
118                    }
119                    _ => {}
120                }
121
122                items_to_visit.push_back(RustdocItemWithHistory {
123                    #[cfg(debug_assertions)]
124                    history: {
125                        let mut history = item_with_history.history.clone();
126                        history.push(item.url_path());
127                        history
128                    },
129                    item,
130                });
131            }
132        }
133
134        Ok(())
135    }
136}
137
138pub struct LocalProvider {
139    fs: Arc<dyn Fs>,
140    cargo_workspace_root: PathBuf,
141}
142
143impl LocalProvider {
144    pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
145        Self {
146            fs,
147            cargo_workspace_root,
148        }
149    }
150}
151
152#[async_trait]
153impl RustdocProvider for LocalProvider {
154    async fn fetch_page(
155        &self,
156        crate_name: &PackageName,
157        item: Option<&RustdocItem>,
158    ) -> Result<Option<String>> {
159        let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
160        local_cargo_doc_path.push(crate_name.as_ref());
161        if let Some(item) = item {
162            local_cargo_doc_path.push(item.url_path());
163        } else {
164            local_cargo_doc_path.push("index.html");
165        }
166
167        let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
168            return Ok(None);
169        };
170
171        Ok(Some(contents))
172    }
173}
174
175pub struct DocsDotRsProvider {
176    http_client: Arc<HttpClientWithUrl>,
177}
178
179impl DocsDotRsProvider {
180    pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
181        Self { http_client }
182    }
183}
184
185#[async_trait]
186impl RustdocProvider for DocsDotRsProvider {
187    async fn fetch_page(
188        &self,
189        crate_name: &PackageName,
190        item: Option<&RustdocItem>,
191    ) -> Result<Option<String>> {
192        let version = "latest";
193        let path = format!(
194            "{crate_name}/{version}/{crate_name}{item_path}",
195            item_path = item
196                .map(|item| format!("/{}", item.url_path()))
197                .unwrap_or_default()
198        );
199
200        let mut response = self
201            .http_client
202            .get(
203                &format!("https://docs.rs/{path}"),
204                AsyncBody::default(),
205                true,
206            )
207            .await?;
208
209        let mut body = Vec::new();
210        response
211            .body_mut()
212            .read_to_end(&mut body)
213            .await
214            .context("error reading docs.rs response body")?;
215
216        if response.status().is_client_error() {
217            let text = String::from_utf8_lossy(body.as_slice());
218            bail!(
219                "status error {}, response: {text:?}",
220                response.status().as_u16()
221            );
222        }
223
224        Ok(Some(String::from_utf8(body)?))
225    }
226}