indexer.rs

  1use std::path::PathBuf;
  2use std::sync::Arc;
  3
  4use anyhow::{bail, Context, Result};
  5use async_trait::async_trait;
  6use collections::{HashSet, VecDeque};
  7use fs::Fs;
  8use futures::AsyncReadExt;
  9use http::{AsyncBody, HttpClient, HttpClientWithUrl};
 10
 11use crate::{
 12    convert_rustdoc_to_markdown, CrateName, RustdocDatabase, RustdocItem, RustdocItemKind,
 13};
 14
 15#[derive(Debug, Clone, Copy)]
 16pub enum RustdocSource {
 17    /// The docs were sourced from Zed's rustdoc index.
 18    Index,
 19    /// The docs were sourced from local `cargo doc` output.
 20    Local,
 21    /// The docs were sourced from `docs.rs`.
 22    DocsDotRs,
 23}
 24
 25#[async_trait]
 26pub trait RustdocProvider {
 27    async fn fetch_page(
 28        &self,
 29        crate_name: &CrateName,
 30        item: Option<&RustdocItem>,
 31    ) -> Result<Option<String>>;
 32}
 33
 34pub struct LocalProvider {
 35    fs: Arc<dyn Fs>,
 36    cargo_workspace_root: PathBuf,
 37}
 38
 39impl LocalProvider {
 40    pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
 41        Self {
 42            fs,
 43            cargo_workspace_root,
 44        }
 45    }
 46}
 47
 48#[async_trait]
 49impl RustdocProvider for LocalProvider {
 50    async fn fetch_page(
 51        &self,
 52        crate_name: &CrateName,
 53        item: Option<&RustdocItem>,
 54    ) -> Result<Option<String>> {
 55        let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
 56        local_cargo_doc_path.push(crate_name.as_ref());
 57        if let Some(item) = item {
 58            local_cargo_doc_path.push(item.url_path());
 59        } else {
 60            local_cargo_doc_path.push("index.html");
 61        }
 62
 63        let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
 64            return Ok(None);
 65        };
 66
 67        Ok(Some(contents))
 68    }
 69}
 70
 71pub struct DocsDotRsProvider {
 72    http_client: Arc<HttpClientWithUrl>,
 73}
 74
 75impl DocsDotRsProvider {
 76    pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
 77        Self { http_client }
 78    }
 79}
 80
 81#[async_trait]
 82impl RustdocProvider for DocsDotRsProvider {
 83    async fn fetch_page(
 84        &self,
 85        crate_name: &CrateName,
 86        item: Option<&RustdocItem>,
 87    ) -> Result<Option<String>> {
 88        let version = "latest";
 89        let path = format!(
 90            "{crate_name}/{version}/{crate_name}{item_path}",
 91            item_path = item
 92                .map(|item| format!("/{}", item.url_path()))
 93                .unwrap_or_default()
 94        );
 95
 96        let mut response = self
 97            .http_client
 98            .get(
 99                &format!("https://docs.rs/{path}"),
100                AsyncBody::default(),
101                true,
102            )
103            .await?;
104
105        let mut body = Vec::new();
106        response
107            .body_mut()
108            .read_to_end(&mut body)
109            .await
110            .context("error reading docs.rs response body")?;
111
112        if response.status().is_client_error() {
113            let text = String::from_utf8_lossy(body.as_slice());
114            bail!(
115                "status error {}, response: {text:?}",
116                response.status().as_u16()
117            );
118        }
119
120        Ok(Some(String::from_utf8(body)?))
121    }
122}
123
124#[derive(Debug)]
125struct RustdocItemWithHistory {
126    pub item: RustdocItem,
127    #[cfg(debug_assertions)]
128    pub history: Vec<String>,
129}
130
131pub(crate) struct RustdocIndexer {
132    database: Arc<RustdocDatabase>,
133    provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
134}
135
136impl RustdocIndexer {
137    pub fn new(
138        database: Arc<RustdocDatabase>,
139        provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
140    ) -> Self {
141        Self { database, provider }
142    }
143
144    /// Indexes the crate with the given name.
145    pub async fn index(&self, crate_name: CrateName) -> Result<()> {
146        let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else {
147            return Ok(());
148        };
149
150        let (crate_root_markdown, items) =
151            convert_rustdoc_to_markdown(crate_root_content.as_bytes())?;
152
153        self.database
154            .insert(crate_name.clone(), None, crate_root_markdown)
155            .await?;
156
157        let mut seen_items = HashSet::from_iter(items.clone());
158        let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
159            VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
160                item,
161                #[cfg(debug_assertions)]
162                history: Vec::new(),
163            }));
164
165        while let Some(item_with_history) = items_to_visit.pop_front() {
166            let item = &item_with_history.item;
167
168            let Some(result) = self
169                .provider
170                .fetch_page(&crate_name, Some(&item))
171                .await
172                .with_context(|| {
173                    #[cfg(debug_assertions)]
174                    {
175                        format!(
176                            "failed to fetch {item:?}: {history:?}",
177                            history = item_with_history.history
178                        )
179                    }
180
181                    #[cfg(not(debug_assertions))]
182                    {
183                        format!("failed to fetch {item:?}")
184                    }
185                })?
186            else {
187                continue;
188            };
189
190            let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
191
192            self.database
193                .insert(crate_name.clone(), Some(item), markdown)
194                .await?;
195
196            let parent_item = item;
197            for mut item in referenced_items {
198                if seen_items.contains(&item) {
199                    continue;
200                }
201
202                seen_items.insert(item.clone());
203
204                item.path.extend(parent_item.path.clone());
205                match parent_item.kind {
206                    RustdocItemKind::Mod => {
207                        item.path.push(parent_item.name.clone());
208                    }
209                    _ => {}
210                }
211
212                items_to_visit.push_back(RustdocItemWithHistory {
213                    #[cfg(debug_assertions)]
214                    history: {
215                        let mut history = item_with_history.history.clone();
216                        history.push(item.url_path());
217                        history
218                    },
219                    item,
220                });
221            }
222        }
223
224        Ok(())
225    }
226}