indexer.rs

  1use std::path::PathBuf;
  2use std::sync::Arc;
  3
  4use anyhow::{bail, Context, Result};
  5use async_trait::async_trait;
  6use collections::{HashSet, VecDeque};
  7use fs::Fs;
  8use futures::AsyncReadExt;
  9use http::{AsyncBody, HttpClient, HttpClientWithUrl};
 10
 11use crate::{
 12    convert_rustdoc_to_markdown, CrateName, RustdocDatabase, RustdocItem, RustdocItemKind,
 13};
 14
 15#[derive(Debug, Clone, Copy)]
 16pub enum RustdocSource {
 17    /// The docs were sourced from local `cargo doc` output.
 18    Local,
 19    /// The docs were sourced from `docs.rs`.
 20    DocsDotRs,
 21}
 22
 23#[async_trait]
 24pub trait RustdocProvider {
 25    async fn fetch_page(
 26        &self,
 27        crate_name: &CrateName,
 28        item: Option<&RustdocItem>,
 29    ) -> Result<Option<String>>;
 30}
 31
 32pub struct LocalProvider {
 33    fs: Arc<dyn Fs>,
 34    cargo_workspace_root: PathBuf,
 35}
 36
 37impl LocalProvider {
 38    pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
 39        Self {
 40            fs,
 41            cargo_workspace_root,
 42        }
 43    }
 44}
 45
 46#[async_trait]
 47impl RustdocProvider for LocalProvider {
 48    async fn fetch_page(
 49        &self,
 50        crate_name: &CrateName,
 51        item: Option<&RustdocItem>,
 52    ) -> Result<Option<String>> {
 53        let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
 54        local_cargo_doc_path.push(crate_name.as_ref());
 55        if let Some(item) = item {
 56            local_cargo_doc_path.push(item.url_path());
 57        } else {
 58            local_cargo_doc_path.push("index.html");
 59        }
 60
 61        let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
 62            return Ok(None);
 63        };
 64
 65        Ok(Some(contents))
 66    }
 67}
 68
 69pub struct DocsDotRsProvider {
 70    http_client: Arc<HttpClientWithUrl>,
 71}
 72
 73impl DocsDotRsProvider {
 74    pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
 75        Self { http_client }
 76    }
 77}
 78
 79#[async_trait]
 80impl RustdocProvider for DocsDotRsProvider {
 81    async fn fetch_page(
 82        &self,
 83        crate_name: &CrateName,
 84        item: Option<&RustdocItem>,
 85    ) -> Result<Option<String>> {
 86        let version = "latest";
 87        let path = format!(
 88            "{crate_name}/{version}/{crate_name}{item_path}",
 89            item_path = item
 90                .map(|item| format!("/{}", item.url_path()))
 91                .unwrap_or_default()
 92        );
 93
 94        let mut response = self
 95            .http_client
 96            .get(
 97                &format!("https://docs.rs/{path}"),
 98                AsyncBody::default(),
 99                true,
100            )
101            .await?;
102
103        let mut body = Vec::new();
104        response
105            .body_mut()
106            .read_to_end(&mut body)
107            .await
108            .context("error reading docs.rs response body")?;
109
110        if response.status().is_client_error() {
111            let text = String::from_utf8_lossy(body.as_slice());
112            bail!(
113                "status error {}, response: {text:?}",
114                response.status().as_u16()
115            );
116        }
117
118        Ok(Some(String::from_utf8(body)?))
119    }
120}
121
122#[derive(Debug)]
123struct RustdocItemWithHistory {
124    pub item: RustdocItem,
125    #[cfg(debug_assertions)]
126    pub history: Vec<String>,
127}
128
129pub(crate) struct RustdocIndexer {
130    database: Arc<RustdocDatabase>,
131    provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
132}
133
134impl RustdocIndexer {
135    pub fn new(
136        database: Arc<RustdocDatabase>,
137        provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
138    ) -> Self {
139        Self { database, provider }
140    }
141
142    /// Indexes the crate with the given name.
143    pub async fn index(&self, crate_name: CrateName) -> Result<()> {
144        let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else {
145            return Ok(());
146        };
147
148        let (crate_root_markdown, items) =
149            convert_rustdoc_to_markdown(crate_root_content.as_bytes())?;
150
151        self.database
152            .insert(crate_name.clone(), None, crate_root_markdown)
153            .await?;
154
155        let mut seen_items = HashSet::from_iter(items.clone());
156        let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
157            VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
158                item,
159                #[cfg(debug_assertions)]
160                history: Vec::new(),
161            }));
162
163        while let Some(item_with_history) = items_to_visit.pop_front() {
164            let item = &item_with_history.item;
165
166            let Some(result) = self
167                .provider
168                .fetch_page(&crate_name, Some(&item))
169                .await
170                .with_context(|| {
171                    #[cfg(debug_assertions)]
172                    {
173                        format!(
174                            "failed to fetch {item:?}: {history:?}",
175                            history = item_with_history.history
176                        )
177                    }
178
179                    #[cfg(not(debug_assertions))]
180                    {
181                        format!("failed to fetch {item:?}")
182                    }
183                })?
184            else {
185                continue;
186            };
187
188            let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
189
190            self.database
191                .insert(crate_name.clone(), Some(item), markdown)
192                .await?;
193
194            let parent_item = item;
195            for mut item in referenced_items {
196                if seen_items.contains(&item) {
197                    continue;
198                }
199
200                seen_items.insert(item.clone());
201
202                item.path.extend(parent_item.path.clone());
203                match parent_item.kind {
204                    RustdocItemKind::Mod => {
205                        item.path.push(parent_item.name.clone());
206                    }
207                    _ => {}
208                }
209
210                items_to_visit.push_back(RustdocItemWithHistory {
211                    #[cfg(debug_assertions)]
212                    history: {
213                        let mut history = item_with_history.history.clone();
214                        history.push(item.url_path());
215                        history
216                    },
217                    item,
218                });
219            }
220        }
221
222        Ok(())
223    }
224}