rustdoc_to_markdown.rs

  1//! Provides conversion from rustdoc's HTML output to Markdown.
  2
  3#![deny(missing_docs)]
  4
  5mod html_element;
  6mod markdown_writer;
  7
  8use std::io::Read;
  9
 10use anyhow::{Context, Result};
 11use html5ever::driver::ParseOpts;
 12use html5ever::parse_document;
 13use html5ever::tendril::TendrilSink;
 14use html5ever::tree_builder::TreeBuilderOpts;
 15use markup5ever_rcdom::RcDom;
 16
 17use crate::markdown_writer::MarkdownWriter;
 18
 19/// Converts the provided rustdoc HTML to Markdown.
 20pub fn convert_rustdoc_to_markdown(mut html: impl Read) -> Result<String> {
 21    let parse_options = ParseOpts {
 22        tree_builder: TreeBuilderOpts {
 23            drop_doctype: true,
 24            ..Default::default()
 25        },
 26        ..Default::default()
 27    };
 28    let dom = parse_document(RcDom::default(), parse_options)
 29        .from_utf8()
 30        .read_from(&mut html)
 31        .context("failed to parse rustdoc HTML")?;
 32
 33    let markdown_writer = MarkdownWriter::new();
 34    let markdown = markdown_writer
 35        .run(&dom.document)
 36        .context("failed to convert rustdoc to HTML")?;
 37
 38    Ok(markdown)
 39}
 40
 41#[cfg(test)]
 42mod tests {
 43    use indoc::indoc;
 44    use pretty_assertions::assert_eq;
 45
 46    use super::*;
 47
 48    #[test]
 49    fn test_main_heading_buttons_get_removed() {
 50        let html = indoc! {r##"
 51            <div class="main-heading">
 52                <h1>Crate <a class="mod" href="#">serde</a><button id="copy-path" title="Copy item path to clipboard">Copy item path</button></h1>
 53                <span class="out-of-band">
 54                    <a class="src" href="../src/serde/lib.rs.html#1-340">source</a> · <button id="toggle-all-docs" title="collapse all docs">[<span>−</span>]</button>
 55                </span>
 56            </div>
 57        "##};
 58        let expected = indoc! {"
 59            # Crate serde
 60        "}
 61        .trim();
 62
 63        assert_eq!(
 64            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
 65            expected
 66        )
 67    }
 68
 69    #[test]
 70    fn test_single_paragraph() {
 71        let html = indoc! {r#"
 72            <p>In particular, the last point is what sets <code>axum</code> apart from other frameworks.
 73            <code>axum</code> doesn’t have its own middleware system but instead uses
 74            <a href="https://docs.rs/tower-service/0.3.2/x86_64-unknown-linux-gnu/tower_service/trait.Service.html" title="trait tower_service::Service"><code>tower::Service</code></a>. This means <code>axum</code> gets timeouts, tracing, compression,
 75            authorization, and more, for free. It also enables you to share middleware with
 76            applications written using <a href="http://crates.io/crates/hyper"><code>hyper</code></a> or <a href="http://crates.io/crates/tonic"><code>tonic</code></a>.</p>
 77        "#};
 78        let expected = indoc! {"
 79            In particular, the last point is what sets `axum` apart from other frameworks. `axum` doesn’t have its own middleware system but instead uses `tower::Service`. This means `axum` gets timeouts, tracing, compression, authorization, and more, for free. It also enables you to share middleware with applications written using `hyper` or `tonic`.
 80        "}
 81        .trim();
 82
 83        assert_eq!(
 84            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
 85            expected
 86        )
 87    }
 88
 89    #[test]
 90    fn test_multiple_paragraphs() {
 91        let html = indoc! {r##"
 92            <h2 id="serde"><a class="doc-anchor" href="#serde">§</a>Serde</h2>
 93            <p>Serde is a framework for <em><strong>ser</strong></em>ializing and <em><strong>de</strong></em>serializing Rust data
 94            structures efficiently and generically.</p>
 95            <p>The Serde ecosystem consists of data structures that know how to serialize
 96            and deserialize themselves along with data formats that know how to
 97            serialize and deserialize other things. Serde provides the layer by which
 98            these two groups interact with each other, allowing any supported data
 99            structure to be serialized and deserialized using any supported data format.</p>
100            <p>See the Serde website <a href="https://serde.rs/">https://serde.rs/</a> for additional documentation and
101            usage examples.</p>
102            <h3 id="design"><a class="doc-anchor" href="#design">§</a>Design</h3>
103            <p>Where many other languages rely on runtime reflection for serializing data,
104            Serde is instead built on Rust’s powerful trait system. A data structure
105            that knows how to serialize and deserialize itself is one that implements
106            Serde’s <code>Serialize</code> and <code>Deserialize</code> traits (or uses Serde’s derive
107            attribute to automatically generate implementations at compile time). This
108            avoids any overhead of reflection or runtime type information. In fact in
109            many situations the interaction between data structure and data format can
110            be completely optimized away by the Rust compiler, leaving Serde
111            serialization to perform the same speed as a handwritten serializer for the
112            specific selection of data structure and data format.</p>
113        "##};
114        let expected = indoc! {"
115            ## Serde
116
117            Serde is a framework for _**ser**_ializing and _**de**_serializing Rust data structures efficiently and generically.
118
119            The Serde ecosystem consists of data structures that know how to serialize and deserialize themselves along with data formats that know how to serialize and deserialize other things. Serde provides the layer by which these two groups interact with each other, allowing any supported data structure to be serialized and deserialized using any supported data format.
120
121            See the Serde website https://serde.rs/ for additional documentation and usage examples.
122
123            ### Design
124
125            Where many other languages rely on runtime reflection for serializing data, Serde is instead built on Rust’s powerful trait system. A data structure that knows how to serialize and deserialize itself is one that implements Serde’s `Serialize` and `Deserialize` traits (or uses Serde’s derive attribute to automatically generate implementations at compile time). This avoids any overhead of reflection or runtime type information. In fact in many situations the interaction between data structure and data format can be completely optimized away by the Rust compiler, leaving Serde serialization to perform the same speed as a handwritten serializer for the specific selection of data structure and data format.
126        "}
127        .trim();
128
129        assert_eq!(
130            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
131            expected
132        )
133    }
134
135    #[test]
136    fn test_styled_text() {
137        let html = indoc! {r#"
138            <p>This text is <strong>bolded</strong>.</p>
139            <p>This text is <em>italicized</em>.</p>
140        "#};
141        let expected = indoc! {"
142            This text is **bolded**.
143
144            This text is _italicized_.
145        "}
146        .trim();
147
148        assert_eq!(
149            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
150            expected
151        )
152    }
153
154    #[test]
155    fn test_rust_code_block() {
156        let html = indoc! {r#"
157            <pre class="rust rust-example-rendered"><code><span class="kw">use </span>axum::extract::{Path, Query, Json};
158            <span class="kw">use </span>std::collections::HashMap;
159
160            <span class="comment">// `Path` gives you the path parameters and deserializes them.
161            </span><span class="kw">async fn </span>path(Path(user_id): Path&lt;u32&gt;) {}
162
163            <span class="comment">// `Query` gives you the query parameters and deserializes them.
164            </span><span class="kw">async fn </span>query(Query(params): Query&lt;HashMap&lt;String, String&gt;&gt;) {}
165
166            <span class="comment">// Buffer the request body and deserialize it as JSON into a
167            // `serde_json::Value`. `Json` supports any type that implements
168            // `serde::Deserialize`.
169            </span><span class="kw">async fn </span>json(Json(payload): Json&lt;serde_json::Value&gt;) {}</code></pre>
170        "#};
171        let expected = indoc! {"
172            ```rs
173            use axum::extract::{Path, Query, Json};
174            use std::collections::HashMap;
175
176            // `Path` gives you the path parameters and deserializes them.
177            async fn path(Path(user_id): Path<u32>) {}
178
179            // `Query` gives you the query parameters and deserializes them.
180            async fn query(Query(params): Query<HashMap<String, String>>) {}
181
182            // Buffer the request body and deserialize it as JSON into a
183            // `serde_json::Value`. `Json` supports any type that implements
184            // `serde::Deserialize`.
185            async fn json(Json(payload): Json<serde_json::Value>) {}
186            ```
187        "}
188        .trim();
189
190        assert_eq!(
191            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
192            expected
193        )
194    }
195
196    #[test]
197    fn test_toml_code_block() {
198        let html = indoc! {r##"
199            <h2 id="required-dependencies"><a class="doc-anchor" href="#required-dependencies">§</a>Required dependencies</h2>
200            <p>To use axum there are a few dependencies you have to pull in as well:</p>
201            <div class="example-wrap"><pre class="language-toml"><code>[dependencies]
202            axum = &quot;&lt;latest-version&gt;&quot;
203            tokio = { version = &quot;&lt;latest-version&gt;&quot;, features = [&quot;full&quot;] }
204            tower = &quot;&lt;latest-version&gt;&quot;
205            </code></pre></div>
206        "##};
207        let expected = indoc! {r#"
208            ## Required dependencies
209
210            To use axum there are a few dependencies you have to pull in as well:
211
212            ```toml
213            [dependencies]
214            axum = "<latest-version>"
215            tokio = { version = "<latest-version>", features = ["full"] }
216            tower = "<latest-version>"
217
218            ```
219        "#}
220        .trim();
221
222        assert_eq!(
223            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
224            expected
225        )
226    }
227
228    #[test]
229    fn test_item_table() {
230        let html = indoc! {r##"
231            <h2 id="structs" class="section-header">Structs<a href="#structs" class="anchor">§</a></h2>
232            <ul class="item-table">
233            <li><div class="item-name"><a class="struct" href="struct.Error.html" title="struct axum::Error">Error</a></div><div class="desc docblock-short">Errors that can happen when using axum.</div></li>
234            <li><div class="item-name"><a class="struct" href="struct.Extension.html" title="struct axum::Extension">Extension</a></div><div class="desc docblock-short">Extractor and response for extensions.</div></li>
235            <li><div class="item-name"><a class="struct" href="struct.Form.html" title="struct axum::Form">Form</a><span class="stab portability" title="Available on crate feature `form` only"><code>form</code></span></div><div class="desc docblock-short">URL encoded extractor and response.</div></li>
236            <li><div class="item-name"><a class="struct" href="struct.Json.html" title="struct axum::Json">Json</a><span class="stab portability" title="Available on crate feature `json` only"><code>json</code></span></div><div class="desc docblock-short">JSON Extractor / Response.</div></li>
237            <li><div class="item-name"><a class="struct" href="struct.Router.html" title="struct axum::Router">Router</a></div><div class="desc docblock-short">The router type for composing handlers and services.</div></li></ul>
238            <h2 id="functions" class="section-header">Functions<a href="#functions" class="anchor">§</a></h2>
239            <ul class="item-table">
240            <li><div class="item-name"><a class="fn" href="fn.serve.html" title="fn axum::serve">serve</a><span class="stab portability" title="Available on crate feature `tokio` and (crate features `http1` or `http2`) only"><code>tokio</code> and (<code>http1</code> or <code>http2</code>)</span></div><div class="desc docblock-short">Serve the service with the supplied listener.</div></li>
241            </ul>
242        "##};
243        let expected = indoc! {r#"
244            ## Structs
245
246            - `Error`: Errors that can happen when using axum.
247            - `Extension`: Extractor and response for extensions.
248            - `Form` [`form`]: URL encoded extractor and response.
249            - `Json` [`json`]: JSON Extractor / Response.
250            - `Router`: The router type for composing handlers and services.
251
252            ## Functions
253
254            - `serve` [`tokio` and (`http1` or `http2`)]: Serve the service with the supplied listener.
255        "#}
256        .trim();
257
258        assert_eq!(
259            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
260            expected
261        )
262    }
263
264    #[test]
265    fn test_table() {
266        let html = indoc! {r##"
267            <h2 id="feature-flags"><a class="doc-anchor" href="#feature-flags">§</a>Feature flags</h2>
268            <p>axum uses a set of <a href="https://doc.rust-lang.org/cargo/reference/features.html#the-features-section">feature flags</a> to reduce the amount of compiled and
269            optional dependencies.</p>
270            <p>The following optional features are available:</p>
271            <div><table><thead><tr><th>Name</th><th>Description</th><th>Default?</th></tr></thead><tbody>
272            <tr><td><code>http1</code></td><td>Enables hyper’s <code>http1</code> feature</td><td>Yes</td></tr>
273            <tr><td><code>http2</code></td><td>Enables hyper’s <code>http2</code> feature</td><td>No</td></tr>
274            <tr><td><code>json</code></td><td>Enables the <a href="struct.Json.html" title="struct axum::Json"><code>Json</code></a> type and some similar convenience functionality</td><td>Yes</td></tr>
275            <tr><td><code>macros</code></td><td>Enables optional utility macros</td><td>No</td></tr>
276            <tr><td><code>matched-path</code></td><td>Enables capturing of every request’s router path and the <a href="extract/struct.MatchedPath.html" title="struct axum::extract::MatchedPath"><code>MatchedPath</code></a> extractor</td><td>Yes</td></tr>
277            <tr><td><code>multipart</code></td><td>Enables parsing <code>multipart/form-data</code> requests with <a href="extract/struct.Multipart.html" title="struct axum::extract::Multipart"><code>Multipart</code></a></td><td>No</td></tr>
278            <tr><td><code>original-uri</code></td><td>Enables capturing of every request’s original URI and the <a href="extract/struct.OriginalUri.html" title="struct axum::extract::OriginalUri"><code>OriginalUri</code></a> extractor</td><td>Yes</td></tr>
279            <tr><td><code>tokio</code></td><td>Enables <code>tokio</code> as a dependency and <code>axum::serve</code>, <code>SSE</code> and <code>extract::connect_info</code> types.</td><td>Yes</td></tr>
280            <tr><td><code>tower-log</code></td><td>Enables <code>tower</code>’s <code>log</code> feature</td><td>Yes</td></tr>
281            <tr><td><code>tracing</code></td><td>Log rejections from built-in extractors</td><td>Yes</td></tr>
282            <tr><td><code>ws</code></td><td>Enables WebSockets support via <a href="extract/ws/index.html" title="mod axum::extract::ws"><code>extract::ws</code></a></td><td>No</td></tr>
283            <tr><td><code>form</code></td><td>Enables the <code>Form</code> extractor</td><td>Yes</td></tr>
284            <tr><td><code>query</code></td><td>Enables the <code>Query</code> extractor</td><td>Yes</td></tr>
285            </tbody></table>
286        "##};
287        let expected = indoc! {r#"
288            ## Feature flags
289
290            axum uses a set of feature flags to reduce the amount of compiled and optional dependencies.
291
292            The following optional features are available:
293
294            | Name | Description | Default? |
295            | --- | --- | --- |
296            | `http1` | Enables hyper’s `http1` feature | Yes |
297            | `http2` | Enables hyper’s `http2` feature | No |
298            | `json` | Enables the `Json` type and some similar convenience functionality | Yes |
299            | `macros` | Enables optional utility macros | No |
300            | `matched-path` | Enables capturing of every request’s router path and the `MatchedPath` extractor | Yes |
301            | `multipart` | Enables parsing `multipart/form-data` requests with `Multipart` | No |
302            | `original-uri` | Enables capturing of every request’s original URI and the `OriginalUri` extractor | Yes |
303            | `tokio` | Enables `tokio` as a dependency and `axum::serve`, `SSE` and `extract::connect_info` types. | Yes |
304            | `tower-log` | Enables `tower`’s `log` feature | Yes |
305            | `tracing` | Log rejections from built-in extractors | Yes |
306            | `ws` | Enables WebSockets support via `extract::ws` | No |
307            | `form` | Enables the `Form` extractor | Yes |
308            | `query` | Enables the `Query` extractor | Yes |
309        "#}
310        .trim();
311
312        assert_eq!(
313            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
314            expected
315        )
316    }
317}