rustdoc_to_markdown: Add table support (#12488)

Marshall Bowers created

This PR extends `rustdoc_to_markdown` with support for tables:

<img width="1007" alt="Screenshot 2024-05-30 at 12 05 35 PM"
src="https://github.com/zed-industries/zed/assets/1486634/4e9a2a65-8aaa-4df1-98c4-4dd4e7874514">


Release Notes:

- N/A

Change summary

crates/rustdoc_to_markdown/src/markdown_writer.rs     | 49 ++++++++++++
crates/rustdoc_to_markdown/src/rustdoc_to_markdown.rs | 53 +++++++++++++
2 files changed, 102 insertions(+)

Detailed changes

crates/rustdoc_to_markdown/src/markdown_writer.rs 🔗

@@ -30,6 +30,10 @@ enum StartTagOutcome {
 
 pub struct MarkdownWriter {
     current_element_stack: VecDeque<HtmlElement>,
+    /// The number of columns in the current `<table>`.
+    current_table_columns: usize,
+    is_first_th: bool,
+    is_first_td: bool,
     /// The Markdown output.
     markdown: String,
 }
@@ -38,6 +42,9 @@ impl MarkdownWriter {
     pub fn new() -> Self {
         Self {
             current_element_stack: VecDeque::new(),
+            current_table_columns: 0,
+            is_first_th: true,
+            is_first_td: true,
             markdown: String::new(),
         }
     }
@@ -58,6 +65,11 @@ impl MarkdownWriter {
         self.push_str("\n");
     }
 
+    /// Appends a blank line to the end of the Markdown output.
+    fn push_blank_line(&mut self) {
+        self.push_str("\n\n");
+    }
+
     pub fn run(mut self, root_node: &Handle) -> Result<String> {
         self.visit_node(&root_node)?;
         Ok(Self::prettify_markdown(self.markdown))
@@ -166,6 +178,25 @@ impl MarkdownWriter {
             }
             "ul" | "ol" => self.push_newline(),
             "li" => self.push_str("- "),
+            "thead" => self.push_blank_line(),
+            "tr" => self.push_newline(),
+            "th" => {
+                self.current_table_columns += 1;
+                if self.is_first_th {
+                    self.is_first_th = false;
+                } else {
+                    self.push_str(" ");
+                }
+                self.push_str("| ");
+            }
+            "td" => {
+                if self.is_first_td {
+                    self.is_first_td = false;
+                } else {
+                    self.push_str(" ");
+                }
+                self.push_str("| ");
+            }
             "summary" => {
                 if tag.attrs.borrow().iter().any(|attr| {
                     attr.name.local.to_string() == "class" && attr.value.to_string() == "hideme"
@@ -209,6 +240,24 @@ impl MarkdownWriter {
             "pre" => self.push_str("\n```\n"),
             "ul" | "ol" => self.push_newline(),
             "li" => self.push_newline(),
+            "thead" => {
+                self.push_newline();
+                for ix in 0..self.current_table_columns {
+                    if ix > 0 {
+                        self.push_str(" ");
+                    }
+                    self.push_str("| ---");
+                }
+                self.push_str(" |");
+                self.is_first_th = true;
+            }
+            "tr" => {
+                self.push_str(" |");
+                self.is_first_td = true;
+            }
+            "table" => {
+                self.current_table_columns = 0;
+            }
             "div" => {
                 if tag.attrs.borrow().iter().any(|attr| {
                     attr.name.local.to_string() == "class" && attr.value.to_string() == "item-name"

crates/rustdoc_to_markdown/src/rustdoc_to_markdown.rs 🔗

@@ -117,4 +117,57 @@ mod tests {
             expected
         )
     }
+
+    #[test]
+    fn test_table() {
+        let html = indoc! {r##"
+            <h2 id="feature-flags"><a class="doc-anchor" href="#feature-flags">§</a>Feature flags</h2>
+            <p>axum uses a set of <a href="https://doc.rust-lang.org/cargo/reference/features.html#the-features-section">feature flags</a> to reduce the amount of compiled and
+            optional dependencies.</p>
+            <p>The following optional features are available:</p>
+            <div><table><thead><tr><th>Name</th><th>Description</th><th>Default?</th></tr></thead><tbody>
+            <tr><td><code>http1</code></td><td>Enables hyper’s <code>http1</code> feature</td><td>Yes</td></tr>
+            <tr><td><code>http2</code></td><td>Enables hyper’s <code>http2</code> feature</td><td>No</td></tr>
+            <tr><td><code>json</code></td><td>Enables the <a href="struct.Json.html" title="struct axum::Json"><code>Json</code></a> type and some similar convenience functionality</td><td>Yes</td></tr>
+            <tr><td><code>macros</code></td><td>Enables optional utility macros</td><td>No</td></tr>
+            <tr><td><code>matched-path</code></td><td>Enables capturing of every request’s router path and the <a href="extract/struct.MatchedPath.html" title="struct axum::extract::MatchedPath"><code>MatchedPath</code></a> extractor</td><td>Yes</td></tr>
+            <tr><td><code>multipart</code></td><td>Enables parsing <code>multipart/form-data</code> requests with <a href="extract/struct.Multipart.html" title="struct axum::extract::Multipart"><code>Multipart</code></a></td><td>No</td></tr>
+            <tr><td><code>original-uri</code></td><td>Enables capturing of every request’s original URI and the <a href="extract/struct.OriginalUri.html" title="struct axum::extract::OriginalUri"><code>OriginalUri</code></a> extractor</td><td>Yes</td></tr>
+            <tr><td><code>tokio</code></td><td>Enables <code>tokio</code> as a dependency and <code>axum::serve</code>, <code>SSE</code> and <code>extract::connect_info</code> types.</td><td>Yes</td></tr>
+            <tr><td><code>tower-log</code></td><td>Enables <code>tower</code>’s <code>log</code> feature</td><td>Yes</td></tr>
+            <tr><td><code>tracing</code></td><td>Log rejections from built-in extractors</td><td>Yes</td></tr>
+            <tr><td><code>ws</code></td><td>Enables WebSockets support via <a href="extract/ws/index.html" title="mod axum::extract::ws"><code>extract::ws</code></a></td><td>No</td></tr>
+            <tr><td><code>form</code></td><td>Enables the <code>Form</code> extractor</td><td>Yes</td></tr>
+            <tr><td><code>query</code></td><td>Enables the <code>Query</code> extractor</td><td>Yes</td></tr>
+            </tbody></table>
+        "##};
+        let expected = indoc! {r#"
+            ## Feature flags
+
+            axum uses a set of feature flags to reduce the amount of compiled and
+            optional dependencies.The following optional features are available:
+
+            | Name | Description | Default? |
+            | --- | --- | --- |
+            | `http1` | Enables hyper’s `http1` feature | Yes |
+            | `http2` | Enables hyper’s `http2` feature | No |
+            | `json` | Enables the `Json` type and some similar convenience functionality | Yes |
+            | `macros` | Enables optional utility macros | No |
+            | `matched-path` | Enables capturing of every request’s router path and the `MatchedPath` extractor | Yes |
+            | `multipart` | Enables parsing `multipart/form-data` requests with `Multipart` | No |
+            | `original-uri` | Enables capturing of every request’s original URI and the `OriginalUri` extractor | Yes |
+            | `tokio` | Enables `tokio` as a dependency and `axum::serve`, `SSE` and `extract::connect_info` types. | Yes |
+            | `tower-log` | Enables `tower`’s `log` feature | Yes |
+            | `tracing` | Log rejections from built-in extractors | Yes |
+            | `ws` | Enables WebSockets support via `extract::ws` | No |
+            | `form` | Enables the `Form` extractor | Yes |
+            | `query` | Enables the `Query` extractor | Yes |
+        "#}
+        .trim();
+
+        assert_eq!(
+            convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
+            expected
+        )
+    }
 }