markdown.rs

  1use crate::html_element::HtmlElement;
  2use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome};
  3
  4pub struct WebpageChromeRemover;
  5
  6impl HandleTag for WebpageChromeRemover {
  7    fn should_handle(&self, tag: &str) -> bool {
  8        matches!(tag, "head" | "script" | "style" | "nav")
  9    }
 10
 11    fn handle_tag_start(
 12        &mut self,
 13        tag: &HtmlElement,
 14        _writer: &mut MarkdownWriter,
 15    ) -> StartTagOutcome {
 16        match tag.tag() {
 17            "head" | "script" | "style" | "nav" => return StartTagOutcome::Skip,
 18            _ => {}
 19        }
 20
 21        StartTagOutcome::Continue
 22    }
 23}
 24
 25pub struct ParagraphHandler;
 26
 27impl HandleTag for ParagraphHandler {
 28    fn should_handle(&self, _tag: &str) -> bool {
 29        true
 30    }
 31
 32    fn handle_tag_start(
 33        &mut self,
 34        tag: &HtmlElement,
 35        writer: &mut MarkdownWriter,
 36    ) -> StartTagOutcome {
 37        if tag.is_inline() && writer.is_inside("p") {
 38            if let Some(parent) = writer.current_element_stack().iter().last() {
 39                if !(parent.is_inline()
 40                    || writer.markdown.ends_with(' ')
 41                    || writer.markdown.ends_with('\n'))
 42                {
 43                    writer.push_str(" ");
 44                }
 45            }
 46        }
 47
 48        if tag.tag() == "p" {
 49            writer.push_blank_line()
 50        }
 51        StartTagOutcome::Continue
 52    }
 53}
 54
 55pub struct HeadingHandler;
 56
 57impl HandleTag for HeadingHandler {
 58    fn should_handle(&self, tag: &str) -> bool {
 59        matches!(tag, "h1" | "h2" | "h3" | "h4" | "h5" | "h6")
 60    }
 61
 62    fn handle_tag_start(
 63        &mut self,
 64        tag: &HtmlElement,
 65        writer: &mut MarkdownWriter,
 66    ) -> StartTagOutcome {
 67        match tag.tag() {
 68            "h1" => writer.push_str("\n\n# "),
 69            "h2" => writer.push_str("\n\n## "),
 70            "h3" => writer.push_str("\n\n### "),
 71            "h4" => writer.push_str("\n\n#### "),
 72            "h5" => writer.push_str("\n\n##### "),
 73            "h6" => writer.push_str("\n\n###### "),
 74            _ => {}
 75        }
 76
 77        StartTagOutcome::Continue
 78    }
 79
 80    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
 81        match tag.tag() {
 82            "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => writer.push_blank_line(),
 83            _ => {}
 84        }
 85    }
 86}
 87
 88pub struct ListHandler;
 89
 90impl HandleTag for ListHandler {
 91    fn should_handle(&self, tag: &str) -> bool {
 92        matches!(tag, "ul" | "ol" | "li")
 93    }
 94
 95    fn handle_tag_start(
 96        &mut self,
 97        tag: &HtmlElement,
 98        writer: &mut MarkdownWriter,
 99    ) -> StartTagOutcome {
100        match tag.tag() {
101            "ul" | "ol" => writer.push_newline(),
102            "li" => writer.push_str("- "),
103            _ => {}
104        }
105
106        StartTagOutcome::Continue
107    }
108
109    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
110        match tag.tag() {
111            "ul" | "ol" => writer.push_newline(),
112            "li" => writer.push_newline(),
113            _ => {}
114        }
115    }
116}
117
118pub struct TableHandler {
119    /// The number of columns in the current `<table>`.
120    current_table_columns: usize,
121    is_first_th: bool,
122    is_first_td: bool,
123}
124
125impl TableHandler {
126    pub fn new() -> Self {
127        Self {
128            current_table_columns: 0,
129            is_first_th: true,
130            is_first_td: true,
131        }
132    }
133}
134
135impl Default for TableHandler {
136    fn default() -> Self {
137        Self::new()
138    }
139}
140
141impl HandleTag for TableHandler {
142    fn should_handle(&self, tag: &str) -> bool {
143        matches!(tag, "table" | "thead" | "tbody" | "tr" | "th" | "td")
144    }
145
146    fn handle_tag_start(
147        &mut self,
148        tag: &HtmlElement,
149        writer: &mut MarkdownWriter,
150    ) -> StartTagOutcome {
151        match tag.tag() {
152            "thead" => writer.push_blank_line(),
153            "tr" => writer.push_newline(),
154            "th" => {
155                self.current_table_columns += 1;
156                if self.is_first_th {
157                    self.is_first_th = false;
158                } else {
159                    writer.push_str(" ");
160                }
161                writer.push_str("| ");
162            }
163            "td" => {
164                if self.is_first_td {
165                    self.is_first_td = false;
166                } else {
167                    writer.push_str(" ");
168                }
169                writer.push_str("| ");
170            }
171            _ => {}
172        }
173
174        StartTagOutcome::Continue
175    }
176
177    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
178        match tag.tag() {
179            "thead" => {
180                writer.push_newline();
181                for ix in 0..self.current_table_columns {
182                    if ix > 0 {
183                        writer.push_str(" ");
184                    }
185                    writer.push_str("| ---");
186                }
187                writer.push_str(" |");
188                self.is_first_th = true;
189            }
190            "tr" => {
191                writer.push_str(" |");
192                self.is_first_td = true;
193            }
194            "table" => {
195                self.current_table_columns = 0;
196            }
197            _ => {}
198        }
199    }
200}
201
202pub struct StyledTextHandler;
203
204impl HandleTag for StyledTextHandler {
205    fn should_handle(&self, tag: &str) -> bool {
206        matches!(tag, "strong" | "em")
207    }
208
209    fn handle_tag_start(
210        &mut self,
211        tag: &HtmlElement,
212        writer: &mut MarkdownWriter,
213    ) -> StartTagOutcome {
214        match tag.tag() {
215            "strong" => writer.push_str("**"),
216            "em" => writer.push_str("_"),
217            _ => {}
218        }
219
220        StartTagOutcome::Continue
221    }
222
223    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
224        match tag.tag() {
225            "strong" => writer.push_str("**"),
226            "em" => writer.push_str("_"),
227            _ => {}
228        }
229    }
230}
231
232pub struct CodeHandler;
233
234impl HandleTag for CodeHandler {
235    fn should_handle(&self, tag: &str) -> bool {
236        matches!(tag, "pre" | "code")
237    }
238
239    fn handle_tag_start(
240        &mut self,
241        tag: &HtmlElement,
242        writer: &mut MarkdownWriter,
243    ) -> StartTagOutcome {
244        match tag.tag() {
245            "code" => {
246                if !writer.is_inside("pre") {
247                    writer.push_str("`");
248                }
249            }
250            "pre" => writer.push_str("\n\n```\n"),
251            _ => {}
252        }
253
254        StartTagOutcome::Continue
255    }
256
257    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
258        match tag.tag() {
259            "code" => {
260                if !writer.is_inside("pre") {
261                    writer.push_str("`");
262                }
263            }
264            "pre" => writer.push_str("\n```\n"),
265            _ => {}
266        }
267    }
268
269    fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
270        if writer.is_inside("pre") {
271            writer.push_str(text);
272            return HandlerOutcome::Handled;
273        }
274
275        HandlerOutcome::NoOp
276    }
277}