markdown.rs

  1use crate::html_element::HtmlElement;
  2use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome};
  3
  4pub struct WebpageChromeRemover;
  5
  6impl HandleTag for WebpageChromeRemover {
  7    fn should_handle(&self, tag: &str) -> bool {
  8        matches!(tag, "head" | "script" | "style" | "nav")
  9    }
 10
 11    fn handle_tag_start(
 12        &mut self,
 13        tag: &HtmlElement,
 14        _writer: &mut MarkdownWriter,
 15    ) -> StartTagOutcome {
 16        match tag.tag() {
 17            "head" | "script" | "style" | "nav" => return StartTagOutcome::Skip,
 18            _ => {}
 19        }
 20
 21        StartTagOutcome::Continue
 22    }
 23}
 24
 25pub struct ParagraphHandler;
 26
 27impl HandleTag for ParagraphHandler {
 28    fn should_handle(&self, _tag: &str) -> bool {
 29        true
 30    }
 31
 32    fn handle_tag_start(
 33        &mut self,
 34        tag: &HtmlElement,
 35        writer: &mut MarkdownWriter,
 36    ) -> StartTagOutcome {
 37        if tag.is_inline() && writer.is_inside("p")
 38            && let Some(parent) = writer.current_element_stack().iter().last()
 39                && !(parent.is_inline()
 40                    || writer.markdown.ends_with(' ')
 41                    || writer.markdown.ends_with('\n'))
 42                {
 43                    writer.push_str(" ");
 44                }
 45
 46        if tag.tag() == "p" {
 47            writer.push_blank_line()
 48        }
 49        StartTagOutcome::Continue
 50    }
 51}
 52
 53pub struct HeadingHandler;
 54
 55impl HandleTag for HeadingHandler {
 56    fn should_handle(&self, tag: &str) -> bool {
 57        matches!(tag, "h1" | "h2" | "h3" | "h4" | "h5" | "h6")
 58    }
 59
 60    fn handle_tag_start(
 61        &mut self,
 62        tag: &HtmlElement,
 63        writer: &mut MarkdownWriter,
 64    ) -> StartTagOutcome {
 65        match tag.tag() {
 66            "h1" => writer.push_str("\n\n# "),
 67            "h2" => writer.push_str("\n\n## "),
 68            "h3" => writer.push_str("\n\n### "),
 69            "h4" => writer.push_str("\n\n#### "),
 70            "h5" => writer.push_str("\n\n##### "),
 71            "h6" => writer.push_str("\n\n###### "),
 72            _ => {}
 73        }
 74
 75        StartTagOutcome::Continue
 76    }
 77
 78    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
 79        match tag.tag() {
 80            "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => writer.push_blank_line(),
 81            _ => {}
 82        }
 83    }
 84}
 85
 86pub struct ListHandler;
 87
 88impl HandleTag for ListHandler {
 89    fn should_handle(&self, tag: &str) -> bool {
 90        matches!(tag, "ul" | "ol" | "li")
 91    }
 92
 93    fn handle_tag_start(
 94        &mut self,
 95        tag: &HtmlElement,
 96        writer: &mut MarkdownWriter,
 97    ) -> StartTagOutcome {
 98        match tag.tag() {
 99            "ul" | "ol" => writer.push_newline(),
100            "li" => writer.push_str("- "),
101            _ => {}
102        }
103
104        StartTagOutcome::Continue
105    }
106
107    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
108        match tag.tag() {
109            "ul" | "ol" => writer.push_newline(),
110            "li" => writer.push_newline(),
111            _ => {}
112        }
113    }
114}
115
116pub struct TableHandler {
117    /// The number of columns in the current `<table>`.
118    current_table_columns: usize,
119    is_first_th: bool,
120    is_first_td: bool,
121}
122
123impl TableHandler {
124    pub fn new() -> Self {
125        Self {
126            current_table_columns: 0,
127            is_first_th: true,
128            is_first_td: true,
129        }
130    }
131}
132
133impl Default for TableHandler {
134    fn default() -> Self {
135        Self::new()
136    }
137}
138
139impl HandleTag for TableHandler {
140    fn should_handle(&self, tag: &str) -> bool {
141        matches!(tag, "table" | "thead" | "tbody" | "tr" | "th" | "td")
142    }
143
144    fn handle_tag_start(
145        &mut self,
146        tag: &HtmlElement,
147        writer: &mut MarkdownWriter,
148    ) -> StartTagOutcome {
149        match tag.tag() {
150            "thead" => writer.push_blank_line(),
151            "tr" => writer.push_newline(),
152            "th" => {
153                self.current_table_columns += 1;
154                if self.is_first_th {
155                    self.is_first_th = false;
156                } else {
157                    writer.push_str(" ");
158                }
159                writer.push_str("| ");
160            }
161            "td" => {
162                if self.is_first_td {
163                    self.is_first_td = false;
164                } else {
165                    writer.push_str(" ");
166                }
167                writer.push_str("| ");
168            }
169            _ => {}
170        }
171
172        StartTagOutcome::Continue
173    }
174
175    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
176        match tag.tag() {
177            "thead" => {
178                writer.push_newline();
179                for ix in 0..self.current_table_columns {
180                    if ix > 0 {
181                        writer.push_str(" ");
182                    }
183                    writer.push_str("| ---");
184                }
185                writer.push_str(" |");
186                self.is_first_th = true;
187            }
188            "tr" => {
189                writer.push_str(" |");
190                self.is_first_td = true;
191            }
192            "table" => {
193                self.current_table_columns = 0;
194            }
195            _ => {}
196        }
197    }
198}
199
200pub struct StyledTextHandler;
201
202impl HandleTag for StyledTextHandler {
203    fn should_handle(&self, tag: &str) -> bool {
204        matches!(tag, "strong" | "em")
205    }
206
207    fn handle_tag_start(
208        &mut self,
209        tag: &HtmlElement,
210        writer: &mut MarkdownWriter,
211    ) -> StartTagOutcome {
212        match tag.tag() {
213            "strong" => writer.push_str("**"),
214            "em" => writer.push_str("_"),
215            _ => {}
216        }
217
218        StartTagOutcome::Continue
219    }
220
221    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
222        match tag.tag() {
223            "strong" => writer.push_str("**"),
224            "em" => writer.push_str("_"),
225            _ => {}
226        }
227    }
228}
229
230pub struct CodeHandler;
231
232impl HandleTag for CodeHandler {
233    fn should_handle(&self, tag: &str) -> bool {
234        matches!(tag, "pre" | "code")
235    }
236
237    fn handle_tag_start(
238        &mut self,
239        tag: &HtmlElement,
240        writer: &mut MarkdownWriter,
241    ) -> StartTagOutcome {
242        match tag.tag() {
243            "code" => {
244                if !writer.is_inside("pre") {
245                    writer.push_str("`");
246                }
247            }
248            "pre" => writer.push_str("\n\n```\n"),
249            _ => {}
250        }
251
252        StartTagOutcome::Continue
253    }
254
255    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
256        match tag.tag() {
257            "code" => {
258                if !writer.is_inside("pre") {
259                    writer.push_str("`");
260                }
261            }
262            "pre" => writer.push_str("\n```\n"),
263            _ => {}
264        }
265    }
266
267    fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
268        if writer.is_inside("pre") {
269            writer.push_str(text);
270            return HandlerOutcome::Handled;
271        }
272
273        HandlerOutcome::NoOp
274    }
275}