rustdoc_to_markdown.rs

  1//! Provides conversion from rustdoc's HTML output to Markdown.
  2
  3#![deny(missing_docs)]
  4
  5mod markdown_writer;
  6
  7use std::io::Read;
  8
  9use anyhow::{Context, Result};
 10use html5ever::driver::ParseOpts;
 11use html5ever::parse_document;
 12use html5ever::tendril::TendrilSink;
 13use html5ever::tree_builder::TreeBuilderOpts;
 14use markup5ever_rcdom::RcDom;
 15
 16use crate::markdown_writer::MarkdownWriter;
 17
 18/// Converts the provided rustdoc HTML to Markdown.
 19pub fn convert_rustdoc_to_markdown(mut html: impl Read) -> Result<String> {
 20    let parse_options = ParseOpts {
 21        tree_builder: TreeBuilderOpts {
 22            drop_doctype: true,
 23            ..Default::default()
 24        },
 25        ..Default::default()
 26    };
 27    let dom = parse_document(RcDom::default(), parse_options)
 28        .from_utf8()
 29        .read_from(&mut html)
 30        .context("failed to parse rustdoc HTML")?;
 31
 32    let markdown_writer = MarkdownWriter::new();
 33    let markdown = markdown_writer
 34        .run(&dom.document)
 35        .context("failed to convert rustdoc to HTML")?;
 36
 37    Ok(markdown)
 38}
 39
 40#[cfg(test)]
 41mod tests {
 42    use indoc::indoc;
 43    use pretty_assertions::assert_eq;
 44
 45    use super::*;
 46
 47    #[test]
 48    fn test_main_heading_buttons_get_removed() {
 49        let html = indoc! {r##"
 50            <div class="main-heading">
 51                <h1>Crate <a class="mod" href="#">serde</a><button id="copy-path" title="Copy item path to clipboard">Copy item path</button></h1>
 52                <span class="out-of-band">
 53                    <a class="src" href="../src/serde/lib.rs.html#1-340">source</a> · <button id="toggle-all-docs" title="collapse all docs">[<span>−</span>]</button>
 54                </span>
 55            </div>
 56        "##};
 57        let expected = indoc! {"
 58            # Crate serde
 59        "}
 60        .trim();
 61
 62        assert_eq!(
 63            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
 64            expected
 65        )
 66    }
 67
 68    #[test]
 69    fn test_rust_code_block() {
 70        let html = indoc! {r#"
 71            <pre class="rust rust-example-rendered"><code><span class="kw">use </span>axum::extract::{Path, Query, Json};
 72            <span class="kw">use </span>std::collections::HashMap;
 73
 74            <span class="comment">// `Path` gives you the path parameters and deserializes them.
 75            </span><span class="kw">async fn </span>path(Path(user_id): Path&lt;u32&gt;) {}
 76
 77            <span class="comment">// `Query` gives you the query parameters and deserializes them.
 78            </span><span class="kw">async fn </span>query(Query(params): Query&lt;HashMap&lt;String, String&gt;&gt;) {}
 79
 80            <span class="comment">// Buffer the request body and deserialize it as JSON into a
 81            // `serde_json::Value`. `Json` supports any type that implements
 82            // `serde::Deserialize`.
 83            </span><span class="kw">async fn </span>json(Json(payload): Json&lt;serde_json::Value&gt;) {}</code></pre>
 84        "#};
 85        let expected = indoc! {"
 86            ```rs
 87            use axum::extract::{Path, Query, Json};
 88            use std::collections::HashMap;
 89
 90            // `Path` gives you the path parameters and deserializes them.
 91            async fn path(Path(user_id): Path<u32>) {}
 92
 93            // `Query` gives you the query parameters and deserializes them.
 94            async fn query(Query(params): Query<HashMap<String, String>>) {}
 95
 96            // Buffer the request body and deserialize it as JSON into a
 97            // `serde_json::Value`. `Json` supports any type that implements
 98            // `serde::Deserialize`.
 99            async fn json(Json(payload): Json<serde_json::Value>) {}
100            ```
101        "}
102        .trim();
103
104        assert_eq!(
105            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
106            expected
107        )
108    }
109
110    #[test]
111    fn test_toml_code_block() {
112        let html = indoc! {r##"
113            <h2 id="required-dependencies"><a class="doc-anchor" href="#required-dependencies">§</a>Required dependencies</h2>
114            <p>To use axum there are a few dependencies you have to pull in as well:</p>
115            <div class="example-wrap"><pre class="language-toml"><code>[dependencies]
116            axum = &quot;&lt;latest-version&gt;&quot;
117            tokio = { version = &quot;&lt;latest-version&gt;&quot;, features = [&quot;full&quot;] }
118            tower = &quot;&lt;latest-version&gt;&quot;
119            </code></pre></div>
120        "##};
121        let expected = indoc! {r#"
122            ## Required dependencies
123
124            To use axum there are a few dependencies you have to pull in as well:
125
126            ```toml
127            [dependencies]
128            axum = "<latest-version>"
129            tokio = { version = "<latest-version>", features = ["full"] }
130            tower = "<latest-version>"
131
132            ```
133        "#}
134        .trim();
135
136        assert_eq!(
137            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
138            expected
139        )
140    }
141
142    #[test]
143    fn test_item_table() {
144        let html = indoc! {r##"
145            <h2 id="structs" class="section-header">Structs<a href="#structs" class="anchor">§</a></h2>
146            <ul class="item-table">
147            <li><div class="item-name"><a class="struct" href="struct.Error.html" title="struct axum::Error">Error</a></div><div class="desc docblock-short">Errors that can happen when using axum.</div></li>
148            <li><div class="item-name"><a class="struct" href="struct.Extension.html" title="struct axum::Extension">Extension</a></div><div class="desc docblock-short">Extractor and response for extensions.</div></li>
149            <li><div class="item-name"><a class="struct" href="struct.Form.html" title="struct axum::Form">Form</a><span class="stab portability" title="Available on crate feature `form` only"><code>form</code></span></div><div class="desc docblock-short">URL encoded extractor and response.</div></li>
150            <li><div class="item-name"><a class="struct" href="struct.Json.html" title="struct axum::Json">Json</a><span class="stab portability" title="Available on crate feature `json` only"><code>json</code></span></div><div class="desc docblock-short">JSON Extractor / Response.</div></li>
151            <li><div class="item-name"><a class="struct" href="struct.Router.html" title="struct axum::Router">Router</a></div><div class="desc docblock-short">The router type for composing handlers and services.</div></li></ul>
152            <h2 id="functions" class="section-header">Functions<a href="#functions" class="anchor">§</a></h2>
153            <ul class="item-table">
154            <li><div class="item-name"><a class="fn" href="fn.serve.html" title="fn axum::serve">serve</a><span class="stab portability" title="Available on crate feature `tokio` and (crate features `http1` or `http2`) only"><code>tokio</code> and (<code>http1</code> or <code>http2</code>)</span></div><div class="desc docblock-short">Serve the service with the supplied listener.</div></li>
155            </ul>
156        "##};
157        let expected = indoc! {r#"
158            ## Structs
159
160            - `Error`: Errors that can happen when using axum.
161            - `Extension`: Extractor and response for extensions.
162            - `Form` [`form`]: URL encoded extractor and response.
163            - `Json` [`json`]: JSON Extractor / Response.
164            - `Router`: The router type for composing handlers and services.
165
166            ## Functions
167
168            - `serve` [`tokio` and (`http1` or `http2`)]: Serve the service with the supplied listener.
169        "#}
170        .trim();
171
172        assert_eq!(
173            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
174            expected
175        )
176    }
177
178    #[test]
179    fn test_table() {
180        let html = indoc! {r##"
181            <h2 id="feature-flags"><a class="doc-anchor" href="#feature-flags">§</a>Feature flags</h2>
182            <p>axum uses a set of <a href="https://doc.rust-lang.org/cargo/reference/features.html#the-features-section">feature flags</a> to reduce the amount of compiled and
183            optional dependencies.</p>
184            <p>The following optional features are available:</p>
185            <div><table><thead><tr><th>Name</th><th>Description</th><th>Default?</th></tr></thead><tbody>
186            <tr><td><code>http1</code></td><td>Enables hyper’s <code>http1</code> feature</td><td>Yes</td></tr>
187            <tr><td><code>http2</code></td><td>Enables hyper’s <code>http2</code> feature</td><td>No</td></tr>
188            <tr><td><code>json</code></td><td>Enables the <a href="struct.Json.html" title="struct axum::Json"><code>Json</code></a> type and some similar convenience functionality</td><td>Yes</td></tr>
189            <tr><td><code>macros</code></td><td>Enables optional utility macros</td><td>No</td></tr>
190            <tr><td><code>matched-path</code></td><td>Enables capturing of every request’s router path and the <a href="extract/struct.MatchedPath.html" title="struct axum::extract::MatchedPath"><code>MatchedPath</code></a> extractor</td><td>Yes</td></tr>
191            <tr><td><code>multipart</code></td><td>Enables parsing <code>multipart/form-data</code> requests with <a href="extract/struct.Multipart.html" title="struct axum::extract::Multipart"><code>Multipart</code></a></td><td>No</td></tr>
192            <tr><td><code>original-uri</code></td><td>Enables capturing of every request’s original URI and the <a href="extract/struct.OriginalUri.html" title="struct axum::extract::OriginalUri"><code>OriginalUri</code></a> extractor</td><td>Yes</td></tr>
193            <tr><td><code>tokio</code></td><td>Enables <code>tokio</code> as a dependency and <code>axum::serve</code>, <code>SSE</code> and <code>extract::connect_info</code> types.</td><td>Yes</td></tr>
194            <tr><td><code>tower-log</code></td><td>Enables <code>tower</code>’s <code>log</code> feature</td><td>Yes</td></tr>
195            <tr><td><code>tracing</code></td><td>Log rejections from built-in extractors</td><td>Yes</td></tr>
196            <tr><td><code>ws</code></td><td>Enables WebSockets support via <a href="extract/ws/index.html" title="mod axum::extract::ws"><code>extract::ws</code></a></td><td>No</td></tr>
197            <tr><td><code>form</code></td><td>Enables the <code>Form</code> extractor</td><td>Yes</td></tr>
198            <tr><td><code>query</code></td><td>Enables the <code>Query</code> extractor</td><td>Yes</td></tr>
199            </tbody></table>
200        "##};
201        let expected = indoc! {r#"
202            ## Feature flags
203
204            axum uses a set of feature flags to reduce the amount of compiled and
205            optional dependencies.The following optional features are available:
206
207            | Name | Description | Default? |
208            | --- | --- | --- |
209            | `http1` | Enables hyper’s `http1` feature | Yes |
210            | `http2` | Enables hyper’s `http2` feature | No |
211            | `json` | Enables the `Json` type and some similar convenience functionality | Yes |
212            | `macros` | Enables optional utility macros | No |
213            | `matched-path` | Enables capturing of every request’s router path and the `MatchedPath` extractor | Yes |
214            | `multipart` | Enables parsing `multipart/form-data` requests with `Multipart` | No |
215            | `original-uri` | Enables capturing of every request’s original URI and the `OriginalUri` extractor | Yes |
216            | `tokio` | Enables `tokio` as a dependency and `axum::serve`, `SSE` and `extract::connect_info` types. | Yes |
217            | `tower-log` | Enables `tower`’s `log` feature | Yes |
218            | `tracing` | Log rejections from built-in extractors | Yes |
219            | `ws` | Enables WebSockets support via `extract::ws` | No |
220            | `form` | Enables the `Form` extractor | Yes |
221            | `query` | Enables the `Query` extractor | Yes |
222        "#}
223        .trim();
224
225        assert_eq!(
226            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
227            expected
228        )
229    }
230}