1use crate::html_element::HtmlElement;
  2use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome};
  3
  4pub struct WebpageChromeRemover;
  5
  6impl HandleTag for WebpageChromeRemover {
  7    fn should_handle(&self, tag: &str) -> bool {
  8        matches!(tag, "head" | "script" | "style" | "nav")
  9    }
 10
 11    fn handle_tag_start(
 12        &mut self,
 13        tag: &HtmlElement,
 14        _writer: &mut MarkdownWriter,
 15    ) -> StartTagOutcome {
 16        match tag.tag() {
 17            "head" | "script" | "style" | "nav" => return StartTagOutcome::Skip,
 18            _ => {}
 19        }
 20
 21        StartTagOutcome::Continue
 22    }
 23}
 24
 25pub struct ParagraphHandler;
 26
 27impl HandleTag for ParagraphHandler {
 28    fn should_handle(&self, _tag: &str) -> bool {
 29        true
 30    }
 31
 32    fn handle_tag_start(
 33        &mut self,
 34        tag: &HtmlElement,
 35        writer: &mut MarkdownWriter,
 36    ) -> StartTagOutcome {
 37        if tag.is_inline()
 38            && writer.is_inside("p")
 39            && let Some(parent) = writer.current_element_stack().iter().last()
 40            && !(parent.is_inline()
 41                || writer.markdown.ends_with(' ')
 42                || writer.markdown.ends_with('\n'))
 43        {
 44            writer.push_str(" ");
 45        }
 46
 47        if tag.tag() == "p" {
 48            writer.push_blank_line()
 49        }
 50        StartTagOutcome::Continue
 51    }
 52}
 53
 54pub struct HeadingHandler;
 55
 56impl HandleTag for HeadingHandler {
 57    fn should_handle(&self, tag: &str) -> bool {
 58        matches!(tag, "h1" | "h2" | "h3" | "h4" | "h5" | "h6")
 59    }
 60
 61    fn handle_tag_start(
 62        &mut self,
 63        tag: &HtmlElement,
 64        writer: &mut MarkdownWriter,
 65    ) -> StartTagOutcome {
 66        match tag.tag() {
 67            "h1" => writer.push_str("\n\n# "),
 68            "h2" => writer.push_str("\n\n## "),
 69            "h3" => writer.push_str("\n\n### "),
 70            "h4" => writer.push_str("\n\n#### "),
 71            "h5" => writer.push_str("\n\n##### "),
 72            "h6" => writer.push_str("\n\n###### "),
 73            _ => {}
 74        }
 75
 76        StartTagOutcome::Continue
 77    }
 78
 79    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
 80        match tag.tag() {
 81            "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => writer.push_blank_line(),
 82            _ => {}
 83        }
 84    }
 85}
 86
 87pub struct ListHandler;
 88
 89impl HandleTag for ListHandler {
 90    fn should_handle(&self, tag: &str) -> bool {
 91        matches!(tag, "ul" | "ol" | "li")
 92    }
 93
 94    fn handle_tag_start(
 95        &mut self,
 96        tag: &HtmlElement,
 97        writer: &mut MarkdownWriter,
 98    ) -> StartTagOutcome {
 99        match tag.tag() {
100            "ul" | "ol" => writer.push_newline(),
101            "li" => writer.push_str("- "),
102            _ => {}
103        }
104
105        StartTagOutcome::Continue
106    }
107
108    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
109        match tag.tag() {
110            "ul" | "ol" => writer.push_newline(),
111            "li" => writer.push_newline(),
112            _ => {}
113        }
114    }
115}
116
117pub struct TableHandler {
118    /// The number of columns in the current `<table>`.
119    current_table_columns: usize,
120    is_first_th: bool,
121    is_first_td: bool,
122}
123
124impl TableHandler {
125    pub fn new() -> Self {
126        Self {
127            current_table_columns: 0,
128            is_first_th: true,
129            is_first_td: true,
130        }
131    }
132}
133
134impl Default for TableHandler {
135    fn default() -> Self {
136        Self::new()
137    }
138}
139
140impl HandleTag for TableHandler {
141    fn should_handle(&self, tag: &str) -> bool {
142        matches!(tag, "table" | "thead" | "tbody" | "tr" | "th" | "td")
143    }
144
145    fn handle_tag_start(
146        &mut self,
147        tag: &HtmlElement,
148        writer: &mut MarkdownWriter,
149    ) -> StartTagOutcome {
150        match tag.tag() {
151            "thead" => writer.push_blank_line(),
152            "tr" => writer.push_newline(),
153            "th" => {
154                self.current_table_columns += 1;
155                if self.is_first_th {
156                    self.is_first_th = false;
157                } else {
158                    writer.push_str(" ");
159                }
160                writer.push_str("| ");
161            }
162            "td" => {
163                if self.is_first_td {
164                    self.is_first_td = false;
165                } else {
166                    writer.push_str(" ");
167                }
168                writer.push_str("| ");
169            }
170            _ => {}
171        }
172
173        StartTagOutcome::Continue
174    }
175
176    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
177        match tag.tag() {
178            "thead" => {
179                writer.push_newline();
180                for ix in 0..self.current_table_columns {
181                    if ix > 0 {
182                        writer.push_str(" ");
183                    }
184                    writer.push_str("| ---");
185                }
186                writer.push_str(" |");
187                self.is_first_th = true;
188            }
189            "tr" => {
190                writer.push_str(" |");
191                self.is_first_td = true;
192            }
193            "table" => {
194                self.current_table_columns = 0;
195            }
196            _ => {}
197        }
198    }
199}
200
201pub struct StyledTextHandler;
202
203impl HandleTag for StyledTextHandler {
204    fn should_handle(&self, tag: &str) -> bool {
205        matches!(tag, "strong" | "em")
206    }
207
208    fn handle_tag_start(
209        &mut self,
210        tag: &HtmlElement,
211        writer: &mut MarkdownWriter,
212    ) -> StartTagOutcome {
213        match tag.tag() {
214            "strong" => writer.push_str("**"),
215            "em" => writer.push_str("_"),
216            _ => {}
217        }
218
219        StartTagOutcome::Continue
220    }
221
222    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
223        match tag.tag() {
224            "strong" => writer.push_str("**"),
225            "em" => writer.push_str("_"),
226            _ => {}
227        }
228    }
229}
230
231pub struct CodeHandler;
232
233impl HandleTag for CodeHandler {
234    fn should_handle(&self, tag: &str) -> bool {
235        matches!(tag, "pre" | "code")
236    }
237
238    fn handle_tag_start(
239        &mut self,
240        tag: &HtmlElement,
241        writer: &mut MarkdownWriter,
242    ) -> StartTagOutcome {
243        match tag.tag() {
244            "code" => {
245                if !writer.is_inside("pre") {
246                    writer.push_str("`");
247                }
248            }
249            "pre" => writer.push_str("\n\n```\n"),
250            _ => {}
251        }
252
253        StartTagOutcome::Continue
254    }
255
256    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
257        match tag.tag() {
258            "code" => {
259                if !writer.is_inside("pre") {
260                    writer.push_str("`");
261                }
262            }
263            "pre" => writer.push_str("\n```\n"),
264            _ => {}
265        }
266    }
267
268    fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
269        if writer.is_inside("pre") {
270            writer.push_str(text);
271            return HandlerOutcome::Handled;
272        }
273
274        HandlerOutcome::NoOp
275    }
276}