1use std::path::PathBuf;
2use std::sync::Arc;
3
4use anyhow::{bail, Context, Result};
5use async_trait::async_trait;
6use collections::{HashSet, VecDeque};
7use fs::Fs;
8use futures::AsyncReadExt;
9use http::{AsyncBody, HttpClient, HttpClientWithUrl};
10use indexmap::IndexMap;
11
12use crate::{convert_rustdoc_to_markdown, RustdocItem, RustdocItemKind};
13
14#[derive(Debug, Clone, Copy)]
15pub enum RustdocSource {
16 /// The docs were sourced from local `cargo doc` output.
17 Local,
18 /// The docs were sourced from `docs.rs`.
19 DocsDotRs,
20}
21
22#[async_trait]
23pub trait RustdocProvider {
24 async fn fetch_page(
25 &self,
26 crate_name: &str,
27 item: Option<&RustdocItem>,
28 ) -> Result<Option<String>>;
29}
30
31pub struct LocalProvider {
32 fs: Arc<dyn Fs>,
33 cargo_workspace_root: PathBuf,
34}
35
36impl LocalProvider {
37 pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
38 Self {
39 fs,
40 cargo_workspace_root,
41 }
42 }
43}
44
45#[async_trait]
46impl RustdocProvider for LocalProvider {
47 async fn fetch_page(
48 &self,
49 crate_name: &str,
50 item: Option<&RustdocItem>,
51 ) -> Result<Option<String>> {
52 let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
53 local_cargo_doc_path.push(&crate_name);
54 if let Some(item) = item {
55 local_cargo_doc_path.push(item.url_path());
56 } else {
57 local_cargo_doc_path.push("index.html");
58 }
59
60 println!("Fetching {}", local_cargo_doc_path.display());
61
62 let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
63 return Ok(None);
64 };
65
66 Ok(Some(contents))
67 }
68}
69
70pub struct DocsDotRsProvider {
71 http_client: Arc<HttpClientWithUrl>,
72}
73
74impl DocsDotRsProvider {
75 pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
76 Self { http_client }
77 }
78}
79
80#[async_trait]
81impl RustdocProvider for DocsDotRsProvider {
82 async fn fetch_page(
83 &self,
84 crate_name: &str,
85 item: Option<&RustdocItem>,
86 ) -> Result<Option<String>> {
87 let version = "latest";
88 let path = format!(
89 "{crate_name}/{version}/{crate_name}{item_path}",
90 item_path = item
91 .map(|item| format!("/{}", item.url_path()))
92 .unwrap_or_default()
93 );
94
95 println!("Fetching {}", &format!("https://docs.rs/{path}"));
96
97 let mut response = self
98 .http_client
99 .get(
100 &format!("https://docs.rs/{path}"),
101 AsyncBody::default(),
102 true,
103 )
104 .await?;
105
106 let mut body = Vec::new();
107 response
108 .body_mut()
109 .read_to_end(&mut body)
110 .await
111 .context("error reading docs.rs response body")?;
112
113 if response.status().is_client_error() {
114 let text = String::from_utf8_lossy(body.as_slice());
115 bail!(
116 "status error {}, response: {text:?}",
117 response.status().as_u16()
118 );
119 }
120
121 Ok(Some(String::from_utf8(body)?))
122 }
123}
124
125#[derive(Debug)]
126struct RustdocItemWithHistory {
127 pub item: RustdocItem,
128 #[cfg(debug_assertions)]
129 pub history: Vec<String>,
130}
131
132pub struct CrateDocs {
133 pub crate_root_markdown: String,
134 pub items: IndexMap<RustdocItem, String>,
135}
136
137pub struct RustdocCrawler {
138 provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
139}
140
141impl RustdocCrawler {
142 pub fn new(provider: Box<dyn RustdocProvider + Send + Sync + 'static>) -> Self {
143 Self { provider }
144 }
145
146 pub async fn crawl(&self, crate_name: String) -> Result<Option<CrateDocs>> {
147 let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else {
148 return Ok(None);
149 };
150
151 let (crate_root_markdown, items) =
152 convert_rustdoc_to_markdown(crate_root_content.as_bytes())?;
153
154 let mut docs_by_item = IndexMap::new();
155 let mut seen_items = HashSet::from_iter(items.clone());
156 let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
157 VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
158 item,
159 #[cfg(debug_assertions)]
160 history: Vec::new(),
161 }));
162
163 while let Some(item_with_history) = items_to_visit.pop_front() {
164 let item = &item_with_history.item;
165
166 println!("Visiting {:?} {:?} {}", &item.kind, &item.path, &item.name);
167
168 let Some(result) = self
169 .provider
170 .fetch_page(&crate_name, Some(&item))
171 .await
172 .with_context(|| {
173 #[cfg(debug_assertions)]
174 {
175 format!(
176 "failed to fetch {item:?}: {history:?}",
177 history = item_with_history.history
178 )
179 }
180
181 #[cfg(not(debug_assertions))]
182 {
183 format!("failed to fetch {item:?}")
184 }
185 })?
186 else {
187 continue;
188 };
189
190 let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
191
192 docs_by_item.insert(item.clone(), markdown);
193
194 let parent_item = item;
195 for mut item in referenced_items {
196 if seen_items.contains(&item) {
197 continue;
198 }
199
200 seen_items.insert(item.clone());
201
202 item.path.extend(parent_item.path.clone());
203 match parent_item.kind {
204 RustdocItemKind::Mod => {
205 item.path.push(parent_item.name.clone());
206 }
207 _ => {}
208 }
209
210 items_to_visit.push_back(RustdocItemWithHistory {
211 #[cfg(debug_assertions)]
212 history: {
213 let mut history = item_with_history.history.clone();
214 history.push(item.url_path());
215 history
216 },
217 item,
218 });
219 }
220 }
221
222 Ok(Some(CrateDocs {
223 crate_root_markdown,
224 items: docs_by_item,
225 }))
226 }
227}