rustdoc_to_markdown.rs

 1//! Provides conversion from rustdoc's HTML output to Markdown.
 2
 3#![deny(missing_docs)]
 4
 5mod markdown_writer;
 6
 7use std::io::Read;
 8
 9use anyhow::{Context, Result};
10use html5ever::driver::ParseOpts;
11use html5ever::parse_document;
12use html5ever::tendril::TendrilSink;
13use html5ever::tree_builder::TreeBuilderOpts;
14use markup5ever_rcdom::RcDom;
15
16use crate::markdown_writer::MarkdownWriter;
17
18/// Converts the provided rustdoc HTML to Markdown.
19pub fn convert_rustdoc_to_markdown(mut html: impl Read) -> Result<String> {
20    let parse_options = ParseOpts {
21        tree_builder: TreeBuilderOpts {
22            drop_doctype: true,
23            ..Default::default()
24        },
25        ..Default::default()
26    };
27    let dom = parse_document(RcDom::default(), parse_options)
28        .from_utf8()
29        .read_from(&mut html)
30        .context("failed to parse rustdoc HTML")?;
31
32    let markdown_writer = MarkdownWriter::new();
33    let markdown = markdown_writer
34        .run(&dom.document)
35        .context("failed to convert rustdoc to HTML")?;
36
37    Ok(markdown)
38}
39
40#[cfg(test)]
41mod tests {
42    use indoc::indoc;
43    use pretty_assertions::assert_eq;
44
45    use super::*;
46
47    #[test]
48    fn test_code_blocks() {
49        let html = indoc! {r#"
50            <pre class="rust rust-example-rendered"><code><span class="kw">use </span>axum::extract::{Path, Query, Json};
51            <span class="kw">use </span>std::collections::HashMap;
52
53            <span class="comment">// `Path` gives you the path parameters and deserializes them.
54            </span><span class="kw">async fn </span>path(Path(user_id): Path&lt;u32&gt;) {}
55
56            <span class="comment">// `Query` gives you the query parameters and deserializes them.
57            </span><span class="kw">async fn </span>query(Query(params): Query&lt;HashMap&lt;String, String&gt;&gt;) {}
58
59            <span class="comment">// Buffer the request body and deserialize it as JSON into a
60            // `serde_json::Value`. `Json` supports any type that implements
61            // `serde::Deserialize`.
62            </span><span class="kw">async fn </span>json(Json(payload): Json&lt;serde_json::Value&gt;) {}</code></pre>
63        "#};
64        let expected = indoc! {"
65            ```rs
66            use axum::extract::{Path, Query, Json};
67            use std::collections::HashMap;
68
69            // `Path` gives you the path parameters and deserializes them.
70            async fn path(Path(user_id): Path<u32>) {}
71
72            // `Query` gives you the query parameters and deserializes them.
73            async fn query(Query(params): Query<HashMap<String, String>>) {}
74
75            // Buffer the request body and deserialize it as JSON into a
76            // `serde_json::Value`. `Json` supports any type that implements
77            // `serde::Deserialize`.
78            async fn json(Json(payload): Json<serde_json::Value>) {}
79            ```
80        "}
81        .trim();
82
83        assert_eq!(
84            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
85            expected
86        )
87    }
88}