1use std::path::PathBuf;
2use std::sync::Arc;
3
4use anyhow::{bail, Context, Result};
5use async_trait::async_trait;
6use collections::{HashSet, VecDeque};
7use fs::Fs;
8use futures::AsyncReadExt;
9use http::{AsyncBody, HttpClient, HttpClientWithUrl};
10
11use crate::{convert_rustdoc_to_markdown, RustdocItem, RustdocItemKind};
12
13#[derive(Debug, Clone, Copy)]
14pub enum RustdocSource {
15 /// The docs were sourced from local `cargo doc` output.
16 Local,
17 /// The docs were sourced from `docs.rs`.
18 DocsDotRs,
19}
20
21#[async_trait]
22pub trait RustdocProvider {
23 async fn fetch_page(
24 &self,
25 crate_name: &str,
26 item: Option<&RustdocItem>,
27 ) -> Result<Option<String>>;
28}
29
30pub struct LocalProvider {
31 fs: Arc<dyn Fs>,
32 cargo_workspace_root: PathBuf,
33}
34
35impl LocalProvider {
36 pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
37 Self {
38 fs,
39 cargo_workspace_root,
40 }
41 }
42}
43
44#[async_trait]
45impl RustdocProvider for LocalProvider {
46 async fn fetch_page(
47 &self,
48 crate_name: &str,
49 item: Option<&RustdocItem>,
50 ) -> Result<Option<String>> {
51 let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
52 local_cargo_doc_path.push(&crate_name);
53 if let Some(item) = item {
54 if !item.path.is_empty() {
55 local_cargo_doc_path.push(item.path.join("/"));
56 }
57 }
58 local_cargo_doc_path.push("index.html");
59
60 let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
61 return Ok(None);
62 };
63
64 Ok(Some(contents))
65 }
66}
67
68pub struct DocsDotRsProvider {
69 http_client: Arc<HttpClientWithUrl>,
70}
71
72impl DocsDotRsProvider {
73 pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
74 Self { http_client }
75 }
76}
77
78#[async_trait]
79impl RustdocProvider for DocsDotRsProvider {
80 async fn fetch_page(
81 &self,
82 crate_name: &str,
83 item: Option<&RustdocItem>,
84 ) -> Result<Option<String>> {
85 let version = "latest";
86 let path = format!(
87 "{crate_name}/{version}/{crate_name}{item_path}",
88 item_path = item
89 .map(|item| format!("/{}", item.url_path()))
90 .unwrap_or_default()
91 );
92
93 println!("Fetching {}", &format!("https://docs.rs/{path}"));
94
95 let mut response = self
96 .http_client
97 .get(
98 &format!("https://docs.rs/{path}"),
99 AsyncBody::default(),
100 true,
101 )
102 .await?;
103
104 let mut body = Vec::new();
105 response
106 .body_mut()
107 .read_to_end(&mut body)
108 .await
109 .context("error reading docs.rs response body")?;
110
111 if response.status().is_client_error() {
112 let text = String::from_utf8_lossy(body.as_slice());
113 bail!(
114 "status error {}, response: {text:?}",
115 response.status().as_u16()
116 );
117 }
118
119 Ok(Some(String::from_utf8(body)?))
120 }
121}
122
123pub struct RustdocItemWithHistory {
124 pub item: RustdocItem,
125 #[cfg(debug_assertions)]
126 pub history: Vec<String>,
127}
128
129pub struct RustdocCrawler {
130 provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
131}
132
133impl RustdocCrawler {
134 pub fn new(provider: Box<dyn RustdocProvider + Send + Sync + 'static>) -> Self {
135 Self { provider }
136 }
137
138 pub async fn crawl(&self, crate_name: String) -> Result<Option<String>> {
139 let Some(crate_index_content) = self.provider.fetch_page(&crate_name, None).await? else {
140 return Ok(None);
141 };
142
143 let (_markdown, items) = convert_rustdoc_to_markdown(crate_index_content.as_bytes())?;
144
145 let mut seen_items = HashSet::default();
146 let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
147 VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
148 item,
149 #[cfg(debug_assertions)]
150 history: Vec::new(),
151 }));
152
153 while let Some(item_with_history) = items_to_visit.pop_front() {
154 let item = &item_with_history.item;
155 println!("Visiting {:?} {:?} {}", &item.kind, &item.path, &item.name);
156
157 let Some(result) = self
158 .provider
159 .fetch_page(&crate_name, Some(&item))
160 .await
161 .with_context(|| {
162 #[cfg(debug_assertions)]
163 {
164 format!(
165 "failed to fetch {item:?}: {history:?}",
166 history = item_with_history.history
167 )
168 }
169
170 #[cfg(not(debug_assertions))]
171 {
172 format!("failed to fetch {item:?}")
173 }
174 })?
175 else {
176 continue;
177 };
178
179 let (_markdown, mut items) = convert_rustdoc_to_markdown(result.as_bytes())?;
180
181 seen_items.insert(item.clone());
182
183 for child in &mut items {
184 child.path.extend(item.path.clone());
185 match item.kind {
186 RustdocItemKind::Mod => {
187 child.path.push(item.name.clone());
188 }
189 _ => {}
190 }
191 }
192
193 let unseen_items = items
194 .into_iter()
195 .map(|item| RustdocItemWithHistory {
196 #[cfg(debug_assertions)]
197 history: {
198 let mut history = item_with_history.history.clone();
199 history.push(item.url_path());
200 history
201 },
202 item,
203 })
204 .filter(|item| !seen_items.contains(&item.item));
205
206 items_to_visit.extend(unseen_items);
207 }
208
209 Ok(Some(String::new()))
210 }
211}