rustdoc.rs

  1use std::sync::Arc;
  2
  3use indexmap::IndexSet;
  4use strum::{EnumIter, IntoEnumIterator};
  5
  6use crate::html_element::HtmlElement;
  7use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome};
  8
  9pub struct RustdocHeadingHandler;
 10
 11impl HandleTag for RustdocHeadingHandler {
 12    fn should_handle(&self, _tag: &str) -> bool {
 13        // We're only handling text, so we don't need to visit any tags.
 14        false
 15    }
 16
 17    fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
 18        if writer.is_inside("h1")
 19            || writer.is_inside("h2")
 20            || writer.is_inside("h3")
 21            || writer.is_inside("h4")
 22            || writer.is_inside("h5")
 23            || writer.is_inside("h6")
 24        {
 25            let text = text
 26                .trim_matches(|char| char == '\n' || char == '\r' || char == 'ยง')
 27                .replace('\n', " ");
 28            writer.push_str(&text);
 29
 30            return HandlerOutcome::Handled;
 31        }
 32
 33        HandlerOutcome::NoOp
 34    }
 35}
 36
 37pub struct RustdocCodeHandler;
 38
 39impl HandleTag for RustdocCodeHandler {
 40    fn should_handle(&self, tag: &str) -> bool {
 41        match tag {
 42            "pre" | "code" => true,
 43            _ => false,
 44        }
 45    }
 46
 47    fn handle_tag_start(
 48        &mut self,
 49        tag: &HtmlElement,
 50        writer: &mut MarkdownWriter,
 51    ) -> StartTagOutcome {
 52        match tag.tag.as_str() {
 53            "code" => {
 54                if !writer.is_inside("pre") {
 55                    writer.push_str("`");
 56                }
 57            }
 58            "pre" => {
 59                let classes = tag.classes();
 60                let is_rust = classes.iter().any(|class| class == "rust");
 61                let language = is_rust
 62                    .then(|| "rs")
 63                    .or_else(|| {
 64                        classes.iter().find_map(|class| {
 65                            if let Some((_, language)) = class.split_once("language-") {
 66                                Some(language.trim())
 67                            } else {
 68                                None
 69                            }
 70                        })
 71                    })
 72                    .unwrap_or("");
 73
 74                writer.push_str(&format!("\n\n```{language}\n"));
 75            }
 76            _ => {}
 77        }
 78
 79        StartTagOutcome::Continue
 80    }
 81
 82    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
 83        match tag.tag.as_str() {
 84            "code" => {
 85                if !writer.is_inside("pre") {
 86                    writer.push_str("`");
 87                }
 88            }
 89            "pre" => writer.push_str("\n```\n"),
 90            _ => {}
 91        }
 92    }
 93
 94    fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
 95        if writer.is_inside("pre") {
 96            writer.push_str(&text);
 97            return HandlerOutcome::Handled;
 98        }
 99
100        HandlerOutcome::NoOp
101    }
102}
103
104const RUSTDOC_ITEM_NAME_CLASS: &str = "item-name";
105
106pub struct RustdocItemHandler;
107
108impl RustdocItemHandler {
109    /// Returns whether we're currently inside of an `.item-name` element, which
110    /// rustdoc uses to display Rust items in a list.
111    fn is_inside_item_name(writer: &MarkdownWriter) -> bool {
112        writer
113            .current_element_stack()
114            .iter()
115            .any(|element| element.has_class(RUSTDOC_ITEM_NAME_CLASS))
116    }
117}
118
119impl HandleTag for RustdocItemHandler {
120    fn should_handle(&self, tag: &str) -> bool {
121        match tag {
122            "div" | "span" => true,
123            _ => false,
124        }
125    }
126
127    fn handle_tag_start(
128        &mut self,
129        tag: &HtmlElement,
130        writer: &mut MarkdownWriter,
131    ) -> StartTagOutcome {
132        match tag.tag.as_str() {
133            "div" | "span" => {
134                if Self::is_inside_item_name(writer) && tag.has_class("stab") {
135                    writer.push_str(" [");
136                }
137            }
138            _ => {}
139        }
140
141        StartTagOutcome::Continue
142    }
143
144    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
145        match tag.tag.as_str() {
146            "div" | "span" => {
147                if tag.has_class(RUSTDOC_ITEM_NAME_CLASS) {
148                    writer.push_str(": ");
149                }
150
151                if Self::is_inside_item_name(writer) && tag.has_class("stab") {
152                    writer.push_str("]");
153                }
154            }
155            _ => {}
156        }
157    }
158
159    fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
160        if Self::is_inside_item_name(writer)
161            && !writer.is_inside("span")
162            && !writer.is_inside("code")
163        {
164            writer.push_str(&format!("`{text}`"));
165            return HandlerOutcome::Handled;
166        }
167
168        HandlerOutcome::NoOp
169    }
170}
171
172pub struct RustdocChromeRemover;
173
174impl HandleTag for RustdocChromeRemover {
175    fn should_handle(&self, tag: &str) -> bool {
176        match tag {
177            "head" | "script" | "nav" | "summary" | "button" | "div" | "span" => true,
178            _ => false,
179        }
180    }
181
182    fn handle_tag_start(
183        &mut self,
184        tag: &HtmlElement,
185        _writer: &mut MarkdownWriter,
186    ) -> StartTagOutcome {
187        match tag.tag.as_str() {
188            "head" | "script" | "nav" => return StartTagOutcome::Skip,
189            "summary" => {
190                if tag.has_class("hideme") {
191                    return StartTagOutcome::Skip;
192                }
193            }
194            "button" => {
195                if tag.attr("id").as_deref() == Some("copy-path") {
196                    return StartTagOutcome::Skip;
197                }
198            }
199            "div" | "span" => {
200                let classes_to_skip = ["nav-container", "sidebar-elems", "out-of-band"];
201                if tag.has_any_classes(&classes_to_skip) {
202                    return StartTagOutcome::Skip;
203                }
204            }
205            _ => {}
206        }
207
208        StartTagOutcome::Continue
209    }
210}
211
212#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, EnumIter)]
213pub enum RustdocItemKind {
214    Mod,
215    Macro,
216    Struct,
217    Enum,
218    Constant,
219    Trait,
220    Function,
221    TypeAlias,
222    AttributeMacro,
223    DeriveMacro,
224}
225
226impl RustdocItemKind {
227    const fn class(&self) -> &'static str {
228        match self {
229            Self::Mod => "mod",
230            Self::Macro => "macro",
231            Self::Struct => "struct",
232            Self::Enum => "enum",
233            Self::Constant => "constant",
234            Self::Trait => "trait",
235            Self::Function => "fn",
236            Self::TypeAlias => "type",
237            Self::AttributeMacro => "attr",
238            Self::DeriveMacro => "derive",
239        }
240    }
241}
242
243#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
244pub struct RustdocItem {
245    pub kind: RustdocItemKind,
246    /// The item path, up until the name of the item.
247    pub path: Vec<Arc<str>>,
248    /// The name of the item.
249    pub name: Arc<str>,
250}
251
252impl RustdocItem {
253    pub fn url_path(&self) -> String {
254        let name = &self.name;
255        let mut path_components = self.path.clone();
256
257        match self.kind {
258            RustdocItemKind::Mod => {
259                path_components.push(name.clone());
260                path_components.push("index.html".into());
261            }
262            RustdocItemKind::Macro
263            | RustdocItemKind::Struct
264            | RustdocItemKind::Enum
265            | RustdocItemKind::Constant
266            | RustdocItemKind::Trait
267            | RustdocItemKind::Function
268            | RustdocItemKind::TypeAlias
269            | RustdocItemKind::AttributeMacro
270            | RustdocItemKind::DeriveMacro => {
271                path_components
272                    .push(format!("{kind}.{name}.html", kind = self.kind.class()).into());
273            }
274        }
275
276        path_components.join("/")
277    }
278}
279
280pub struct RustdocItemCollector {
281    pub items: IndexSet<RustdocItem>,
282}
283
284impl RustdocItemCollector {
285    pub fn new() -> Self {
286        Self {
287            items: IndexSet::new(),
288        }
289    }
290
291    fn parse_item(tag: &HtmlElement) -> Option<RustdocItem> {
292        if tag.tag.as_str() != "a" {
293            return None;
294        }
295
296        let href = tag.attr("href")?;
297        if href.starts_with('#') || href.starts_with("https://") || href.starts_with("../") {
298            return None;
299        }
300
301        for kind in RustdocItemKind::iter() {
302            if tag.has_class(kind.class()) {
303                let mut parts = href.trim_end_matches("/index.html").split('/');
304
305                if let Some(last_component) = parts.next_back() {
306                    let last_component = match last_component.split_once('#') {
307                        Some((component, _fragment)) => component,
308                        None => last_component,
309                    };
310
311                    let name = last_component
312                        .trim_start_matches(&format!("{}.", kind.class()))
313                        .trim_end_matches(".html");
314
315                    return Some(RustdocItem {
316                        kind,
317                        name: name.into(),
318                        path: parts.map(Into::into).collect(),
319                    });
320                }
321            }
322        }
323
324        None
325    }
326}
327
328impl HandleTag for RustdocItemCollector {
329    fn should_handle(&self, tag: &str) -> bool {
330        tag == "a"
331    }
332
333    fn handle_tag_start(
334        &mut self,
335        tag: &HtmlElement,
336        writer: &mut MarkdownWriter,
337    ) -> StartTagOutcome {
338        match tag.tag.as_str() {
339            "a" => {
340                let is_reexport = writer.current_element_stack().iter().any(|element| {
341                    if let Some(id) = element.attr("id") {
342                        id.starts_with("reexport.") || id.starts_with("method.")
343                    } else {
344                        false
345                    }
346                });
347
348                if !is_reexport {
349                    if let Some(item) = Self::parse_item(tag) {
350                        self.items.insert(item);
351                    }
352                }
353            }
354            _ => {}
355        }
356
357        StartTagOutcome::Continue
358    }
359}