html_to_markdown: Move `TableHandler` out of `rustdoc` (#12697)

Marshall Bowers created

This PR moves the `TableHandler` out of the `rustdoc` module, as it
doesn't contain anything specific to rustdoc.

Release Notes:

- N/A

Change summary

crates/html_to_markdown/src/html_to_markdown.rs  |  8 +
crates/html_to_markdown/src/markdown.rs          | 81 ++++++++++++++++++
crates/html_to_markdown/src/structure/rustdoc.rs | 81 ------------------
3 files changed, 86 insertions(+), 84 deletions(-)

Detailed changes

crates/html_to_markdown/src/html_to_markdown.rs 🔗

@@ -16,7 +16,9 @@ use html5ever::tendril::TendrilSink;
 use html5ever::tree_builder::TreeBuilderOpts;
 use markup5ever_rcdom::RcDom;
 
-use crate::markdown::{HeadingHandler, ListHandler, ParagraphHandler, StyledTextHandler};
+use crate::markdown::{
+    HeadingHandler, ListHandler, ParagraphHandler, StyledTextHandler, TableHandler,
+};
 use crate::markdown_writer::{HandleTag, MarkdownWriter};
 
 /// Converts the provided HTML to Markdown.
@@ -27,11 +29,11 @@ pub fn convert_html_to_markdown(html: impl Read) -> Result<String> {
         Box::new(ParagraphHandler),
         Box::new(HeadingHandler),
         Box::new(ListHandler),
+        Box::new(TableHandler::new()),
         Box::new(StyledTextHandler),
         Box::new(structure::rustdoc::RustdocChromeRemover),
         Box::new(structure::rustdoc::RustdocHeadingHandler),
         Box::new(structure::rustdoc::RustdocCodeHandler),
-        Box::new(structure::rustdoc::RustdocTableHandler::new()),
         Box::new(structure::rustdoc::RustdocItemHandler),
     ];
 
@@ -51,11 +53,11 @@ pub fn convert_rustdoc_to_markdown(html: impl Read) -> Result<String> {
         Box::new(ParagraphHandler),
         Box::new(HeadingHandler),
         Box::new(ListHandler),
+        Box::new(TableHandler::new()),
         Box::new(StyledTextHandler),
         Box::new(structure::rustdoc::RustdocChromeRemover),
         Box::new(structure::rustdoc::RustdocHeadingHandler),
         Box::new(structure::rustdoc::RustdocCodeHandler),
-        Box::new(structure::rustdoc::RustdocTableHandler::new()),
         Box::new(structure::rustdoc::RustdocItemHandler),
     ];
 

crates/html_to_markdown/src/markdown.rs 🔗

@@ -101,6 +101,87 @@ impl HandleTag for ListHandler {
     }
 }
 
+pub struct TableHandler {
+    /// The number of columns in the current `<table>`.
+    current_table_columns: usize,
+    is_first_th: bool,
+    is_first_td: bool,
+}
+
+impl TableHandler {
+    pub fn new() -> Self {
+        Self {
+            current_table_columns: 0,
+            is_first_th: true,
+            is_first_td: true,
+        }
+    }
+}
+
+impl HandleTag for TableHandler {
+    fn should_handle(&self, tag: &str) -> bool {
+        match tag {
+            "table" | "thead" | "tbody" | "tr" | "th" | "td" => true,
+            _ => false,
+        }
+    }
+
+    fn handle_tag_start(
+        &mut self,
+        tag: &HtmlElement,
+        writer: &mut MarkdownWriter,
+    ) -> StartTagOutcome {
+        match tag.tag.as_str() {
+            "thead" => writer.push_blank_line(),
+            "tr" => writer.push_newline(),
+            "th" => {
+                self.current_table_columns += 1;
+                if self.is_first_th {
+                    self.is_first_th = false;
+                } else {
+                    writer.push_str(" ");
+                }
+                writer.push_str("| ");
+            }
+            "td" => {
+                if self.is_first_td {
+                    self.is_first_td = false;
+                } else {
+                    writer.push_str(" ");
+                }
+                writer.push_str("| ");
+            }
+            _ => {}
+        }
+
+        StartTagOutcome::Continue
+    }
+
+    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
+        match tag.tag.as_str() {
+            "thead" => {
+                writer.push_newline();
+                for ix in 0..self.current_table_columns {
+                    if ix > 0 {
+                        writer.push_str(" ");
+                    }
+                    writer.push_str("| ---");
+                }
+                writer.push_str(" |");
+                self.is_first_th = true;
+            }
+            "tr" => {
+                writer.push_str(" |");
+                self.is_first_td = true;
+            }
+            "table" => {
+                self.current_table_columns = 0;
+            }
+            _ => {}
+        }
+    }
+}
+
 pub struct StyledTextHandler;
 
 impl HandleTag for StyledTextHandler {

crates/html_to_markdown/src/structure/rustdoc.rs 🔗

@@ -96,87 +96,6 @@ impl HandleTag for RustdocCodeHandler {
     }
 }
 
-pub struct RustdocTableHandler {
-    /// The number of columns in the current `<table>`.
-    current_table_columns: usize,
-    is_first_th: bool,
-    is_first_td: bool,
-}
-
-impl RustdocTableHandler {
-    pub fn new() -> Self {
-        Self {
-            current_table_columns: 0,
-            is_first_th: true,
-            is_first_td: true,
-        }
-    }
-}
-
-impl HandleTag for RustdocTableHandler {
-    fn should_handle(&self, tag: &str) -> bool {
-        match tag {
-            "table" | "thead" | "tbody" | "tr" | "th" | "td" => true,
-            _ => false,
-        }
-    }
-
-    fn handle_tag_start(
-        &mut self,
-        tag: &HtmlElement,
-        writer: &mut MarkdownWriter,
-    ) -> StartTagOutcome {
-        match tag.tag.as_str() {
-            "thead" => writer.push_blank_line(),
-            "tr" => writer.push_newline(),
-            "th" => {
-                self.current_table_columns += 1;
-                if self.is_first_th {
-                    self.is_first_th = false;
-                } else {
-                    writer.push_str(" ");
-                }
-                writer.push_str("| ");
-            }
-            "td" => {
-                if self.is_first_td {
-                    self.is_first_td = false;
-                } else {
-                    writer.push_str(" ");
-                }
-                writer.push_str("| ");
-            }
-            _ => {}
-        }
-
-        StartTagOutcome::Continue
-    }
-
-    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
-        match tag.tag.as_str() {
-            "thead" => {
-                writer.push_newline();
-                for ix in 0..self.current_table_columns {
-                    if ix > 0 {
-                        writer.push_str(" ");
-                    }
-                    writer.push_str("| ---");
-                }
-                writer.push_str(" |");
-                self.is_first_th = true;
-            }
-            "tr" => {
-                writer.push_str(" |");
-                self.is_first_td = true;
-            }
-            "table" => {
-                self.current_table_columns = 0;
-            }
-            _ => {}
-        }
-    }
-}
-
 const RUSTDOC_ITEM_NAME_CLASS: &str = "item-name";
 
 pub struct RustdocItemHandler;