indexer.rs

  1use std::path::PathBuf;
  2use std::sync::Arc;
  3
  4use anyhow::{bail, Context, Result};
  5use async_trait::async_trait;
  6use collections::{HashSet, VecDeque};
  7use fs::Fs;
  8use futures::AsyncReadExt;
  9use http::{AsyncBody, HttpClient, HttpClientWithUrl};
 10
 11use crate::{convert_rustdoc_to_markdown, RustdocDatabase, RustdocItem, RustdocItemKind};
 12
 13#[derive(Debug, Clone, Copy)]
 14pub enum RustdocSource {
 15    /// The docs were sourced from local `cargo doc` output.
 16    Local,
 17    /// The docs were sourced from `docs.rs`.
 18    DocsDotRs,
 19}
 20
 21#[async_trait]
 22pub trait RustdocProvider {
 23    async fn fetch_page(
 24        &self,
 25        crate_name: &str,
 26        item: Option<&RustdocItem>,
 27    ) -> Result<Option<String>>;
 28}
 29
 30pub struct LocalProvider {
 31    fs: Arc<dyn Fs>,
 32    cargo_workspace_root: PathBuf,
 33}
 34
 35impl LocalProvider {
 36    pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
 37        Self {
 38            fs,
 39            cargo_workspace_root,
 40        }
 41    }
 42}
 43
 44#[async_trait]
 45impl RustdocProvider for LocalProvider {
 46    async fn fetch_page(
 47        &self,
 48        crate_name: &str,
 49        item: Option<&RustdocItem>,
 50    ) -> Result<Option<String>> {
 51        let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
 52        local_cargo_doc_path.push(&crate_name);
 53        if let Some(item) = item {
 54            local_cargo_doc_path.push(item.url_path());
 55        } else {
 56            local_cargo_doc_path.push("index.html");
 57        }
 58
 59        let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
 60            return Ok(None);
 61        };
 62
 63        Ok(Some(contents))
 64    }
 65}
 66
 67pub struct DocsDotRsProvider {
 68    http_client: Arc<HttpClientWithUrl>,
 69}
 70
 71impl DocsDotRsProvider {
 72    pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
 73        Self { http_client }
 74    }
 75}
 76
 77#[async_trait]
 78impl RustdocProvider for DocsDotRsProvider {
 79    async fn fetch_page(
 80        &self,
 81        crate_name: &str,
 82        item: Option<&RustdocItem>,
 83    ) -> Result<Option<String>> {
 84        let version = "latest";
 85        let path = format!(
 86            "{crate_name}/{version}/{crate_name}{item_path}",
 87            item_path = item
 88                .map(|item| format!("/{}", item.url_path()))
 89                .unwrap_or_default()
 90        );
 91
 92        let mut response = self
 93            .http_client
 94            .get(
 95                &format!("https://docs.rs/{path}"),
 96                AsyncBody::default(),
 97                true,
 98            )
 99            .await?;
100
101        let mut body = Vec::new();
102        response
103            .body_mut()
104            .read_to_end(&mut body)
105            .await
106            .context("error reading docs.rs response body")?;
107
108        if response.status().is_client_error() {
109            let text = String::from_utf8_lossy(body.as_slice());
110            bail!(
111                "status error {}, response: {text:?}",
112                response.status().as_u16()
113            );
114        }
115
116        Ok(Some(String::from_utf8(body)?))
117    }
118}
119
120#[derive(Debug)]
121struct RustdocItemWithHistory {
122    pub item: RustdocItem,
123    #[cfg(debug_assertions)]
124    pub history: Vec<String>,
125}
126
127pub(crate) struct RustdocIndexer {
128    database: Arc<RustdocDatabase>,
129    provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
130}
131
132impl RustdocIndexer {
133    pub fn new(
134        database: Arc<RustdocDatabase>,
135        provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
136    ) -> Self {
137        Self { database, provider }
138    }
139
140    /// Indexes the crate with the given name.
141    pub async fn index(&self, crate_name: String) -> Result<()> {
142        let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else {
143            return Ok(());
144        };
145
146        let (crate_root_markdown, items) =
147            convert_rustdoc_to_markdown(crate_root_content.as_bytes())?;
148
149        self.database
150            .insert(crate_name.clone(), None, crate_root_markdown)
151            .await?;
152
153        let mut seen_items = HashSet::from_iter(items.clone());
154        let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
155            VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
156                item,
157                #[cfg(debug_assertions)]
158                history: Vec::new(),
159            }));
160
161        while let Some(item_with_history) = items_to_visit.pop_front() {
162            let item = &item_with_history.item;
163
164            let Some(result) = self
165                .provider
166                .fetch_page(&crate_name, Some(&item))
167                .await
168                .with_context(|| {
169                    #[cfg(debug_assertions)]
170                    {
171                        format!(
172                            "failed to fetch {item:?}: {history:?}",
173                            history = item_with_history.history
174                        )
175                    }
176
177                    #[cfg(not(debug_assertions))]
178                    {
179                        format!("failed to fetch {item:?}")
180                    }
181                })?
182            else {
183                continue;
184            };
185
186            let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
187
188            self.database
189                .insert(crate_name.clone(), Some(item), markdown)
190                .await?;
191
192            let parent_item = item;
193            for mut item in referenced_items {
194                if seen_items.contains(&item) {
195                    continue;
196                }
197
198                seen_items.insert(item.clone());
199
200                item.path.extend(parent_item.path.clone());
201                match parent_item.kind {
202                    RustdocItemKind::Mod => {
203                        item.path.push(parent_item.name.clone());
204                    }
205                    _ => {}
206                }
207
208                items_to_visit.push_back(RustdocItemWithHistory {
209                    #[cfg(debug_assertions)]
210                    history: {
211                        let mut history = item_with_history.history.clone();
212                        history.push(item.url_path());
213                        history
214                    },
215                    item,
216                });
217            }
218        }
219
220        Ok(())
221    }
222}