1use std::sync::Arc;
2
3use indexmap::IndexSet;
4use strum::{EnumIter, IntoEnumIterator};
5
6use crate::html_element::HtmlElement;
7use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome};
8
9pub struct RustdocHeadingHandler;
10
11impl HandleTag for RustdocHeadingHandler {
12 fn should_handle(&self, _tag: &str) -> bool {
13 // We're only handling text, so we don't need to visit any tags.
14 false
15 }
16
17 fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
18 if writer.is_inside("h1")
19 || writer.is_inside("h2")
20 || writer.is_inside("h3")
21 || writer.is_inside("h4")
22 || writer.is_inside("h5")
23 || writer.is_inside("h6")
24 {
25 let text = text
26 .trim_matches(|char| char == '\n' || char == '\r' || char == 'ยง')
27 .replace('\n', " ");
28 writer.push_str(&text);
29
30 return HandlerOutcome::Handled;
31 }
32
33 HandlerOutcome::NoOp
34 }
35}
36
37pub struct RustdocCodeHandler;
38
39impl HandleTag for RustdocCodeHandler {
40 fn should_handle(&self, tag: &str) -> bool {
41 match tag {
42 "pre" | "code" => true,
43 _ => false,
44 }
45 }
46
47 fn handle_tag_start(
48 &mut self,
49 tag: &HtmlElement,
50 writer: &mut MarkdownWriter,
51 ) -> StartTagOutcome {
52 match tag.tag.as_str() {
53 "code" => {
54 if !writer.is_inside("pre") {
55 writer.push_str("`");
56 }
57 }
58 "pre" => {
59 let classes = tag.classes();
60 let is_rust = classes.iter().any(|class| class == "rust");
61 let language = is_rust
62 .then(|| "rs")
63 .or_else(|| {
64 classes.iter().find_map(|class| {
65 if let Some((_, language)) = class.split_once("language-") {
66 Some(language.trim())
67 } else {
68 None
69 }
70 })
71 })
72 .unwrap_or("");
73
74 writer.push_str(&format!("\n\n```{language}\n"));
75 }
76 _ => {}
77 }
78
79 StartTagOutcome::Continue
80 }
81
82 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
83 match tag.tag.as_str() {
84 "code" => {
85 if !writer.is_inside("pre") {
86 writer.push_str("`");
87 }
88 }
89 "pre" => writer.push_str("\n```\n"),
90 _ => {}
91 }
92 }
93
94 fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
95 if writer.is_inside("pre") {
96 writer.push_str(&text);
97 return HandlerOutcome::Handled;
98 }
99
100 HandlerOutcome::NoOp
101 }
102}
103
104const RUSTDOC_ITEM_NAME_CLASS: &str = "item-name";
105
106pub struct RustdocItemHandler;
107
108impl RustdocItemHandler {
109 /// Returns whether we're currently inside of an `.item-name` element, which
110 /// rustdoc uses to display Rust items in a list.
111 fn is_inside_item_name(writer: &MarkdownWriter) -> bool {
112 writer
113 .current_element_stack()
114 .iter()
115 .any(|element| element.has_class(RUSTDOC_ITEM_NAME_CLASS))
116 }
117}
118
119impl HandleTag for RustdocItemHandler {
120 fn should_handle(&self, tag: &str) -> bool {
121 match tag {
122 "div" | "span" => true,
123 _ => false,
124 }
125 }
126
127 fn handle_tag_start(
128 &mut self,
129 tag: &HtmlElement,
130 writer: &mut MarkdownWriter,
131 ) -> StartTagOutcome {
132 match tag.tag.as_str() {
133 "div" | "span" => {
134 if Self::is_inside_item_name(writer) && tag.has_class("stab") {
135 writer.push_str(" [");
136 }
137 }
138 _ => {}
139 }
140
141 StartTagOutcome::Continue
142 }
143
144 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
145 match tag.tag.as_str() {
146 "div" | "span" => {
147 if tag.has_class(RUSTDOC_ITEM_NAME_CLASS) {
148 writer.push_str(": ");
149 }
150
151 if Self::is_inside_item_name(writer) && tag.has_class("stab") {
152 writer.push_str("]");
153 }
154 }
155 _ => {}
156 }
157 }
158
159 fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
160 if Self::is_inside_item_name(writer)
161 && !writer.is_inside("span")
162 && !writer.is_inside("code")
163 {
164 writer.push_str(&format!("`{text}`"));
165 return HandlerOutcome::Handled;
166 }
167
168 HandlerOutcome::NoOp
169 }
170}
171
172pub struct RustdocChromeRemover;
173
174impl HandleTag for RustdocChromeRemover {
175 fn should_handle(&self, tag: &str) -> bool {
176 match tag {
177 "head" | "script" | "nav" | "summary" | "button" | "div" | "span" => true,
178 _ => false,
179 }
180 }
181
182 fn handle_tag_start(
183 &mut self,
184 tag: &HtmlElement,
185 _writer: &mut MarkdownWriter,
186 ) -> StartTagOutcome {
187 match tag.tag.as_str() {
188 "head" | "script" | "nav" => return StartTagOutcome::Skip,
189 "summary" => {
190 if tag.has_class("hideme") {
191 return StartTagOutcome::Skip;
192 }
193 }
194 "button" => {
195 if tag.attr("id").as_deref() == Some("copy-path") {
196 return StartTagOutcome::Skip;
197 }
198 }
199 "div" | "span" => {
200 let classes_to_skip = ["nav-container", "sidebar-elems", "out-of-band"];
201 if tag.has_any_classes(&classes_to_skip) {
202 return StartTagOutcome::Skip;
203 }
204 }
205 _ => {}
206 }
207
208 StartTagOutcome::Continue
209 }
210}
211
212#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, EnumIter)]
213pub enum RustdocItemKind {
214 Mod,
215 Macro,
216 Struct,
217 Enum,
218 Constant,
219 Trait,
220 Function,
221 TypeAlias,
222 AttributeMacro,
223 DeriveMacro,
224}
225
226impl RustdocItemKind {
227 const fn class(&self) -> &'static str {
228 match self {
229 Self::Mod => "mod",
230 Self::Macro => "macro",
231 Self::Struct => "struct",
232 Self::Enum => "enum",
233 Self::Constant => "constant",
234 Self::Trait => "trait",
235 Self::Function => "fn",
236 Self::TypeAlias => "type",
237 Self::AttributeMacro => "attr",
238 Self::DeriveMacro => "derive",
239 }
240 }
241}
242
243#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
244pub struct RustdocItem {
245 pub kind: RustdocItemKind,
246 /// The item path, up until the name of the item.
247 pub path: Vec<Arc<str>>,
248 /// The name of the item.
249 pub name: Arc<str>,
250}
251
252impl RustdocItem {
253 pub fn url_path(&self) -> String {
254 let name = &self.name;
255 let mut path_components = self.path.clone();
256
257 match self.kind {
258 RustdocItemKind::Mod => {
259 path_components.push(name.clone());
260 path_components.push("index.html".into());
261 }
262 RustdocItemKind::Macro
263 | RustdocItemKind::Struct
264 | RustdocItemKind::Enum
265 | RustdocItemKind::Constant
266 | RustdocItemKind::Trait
267 | RustdocItemKind::Function
268 | RustdocItemKind::TypeAlias
269 | RustdocItemKind::AttributeMacro
270 | RustdocItemKind::DeriveMacro => {
271 path_components
272 .push(format!("{kind}.{name}.html", kind = self.kind.class()).into());
273 }
274 }
275
276 path_components.join("/")
277 }
278}
279
280pub struct RustdocItemCollector {
281 pub items: IndexSet<RustdocItem>,
282}
283
284impl RustdocItemCollector {
285 pub fn new() -> Self {
286 Self {
287 items: IndexSet::new(),
288 }
289 }
290
291 fn parse_item(tag: &HtmlElement) -> Option<RustdocItem> {
292 if tag.tag.as_str() != "a" {
293 return None;
294 }
295
296 let href = tag.attr("href")?;
297 if href.starts_with('#') || href.starts_with("https://") || href.starts_with("../") {
298 return None;
299 }
300
301 for kind in RustdocItemKind::iter() {
302 if tag.has_class(kind.class()) {
303 let mut parts = href.trim_end_matches("/index.html").split('/');
304
305 if let Some(last_component) = parts.next_back() {
306 let last_component = match last_component.split_once('#') {
307 Some((component, _fragment)) => component,
308 None => last_component,
309 };
310
311 let name = last_component
312 .trim_start_matches(&format!("{}.", kind.class()))
313 .trim_end_matches(".html");
314
315 return Some(RustdocItem {
316 kind,
317 name: name.into(),
318 path: parts.map(Into::into).collect(),
319 });
320 }
321 }
322 }
323
324 None
325 }
326}
327
328impl HandleTag for RustdocItemCollector {
329 fn should_handle(&self, tag: &str) -> bool {
330 tag == "a"
331 }
332
333 fn handle_tag_start(
334 &mut self,
335 tag: &HtmlElement,
336 writer: &mut MarkdownWriter,
337 ) -> StartTagOutcome {
338 match tag.tag.as_str() {
339 "a" => {
340 let is_reexport = writer.current_element_stack().iter().any(|element| {
341 if let Some(id) = element.attr("id") {
342 id.starts_with("reexport.") || id.starts_with("method.")
343 } else {
344 false
345 }
346 });
347
348 if !is_reexport {
349 if let Some(item) = Self::parse_item(tag) {
350 self.items.insert(item);
351 }
352 }
353 }
354 _ => {}
355 }
356
357 StartTagOutcome::Continue
358 }
359}