markdown.rs

  1use crate::html_element::HtmlElement;
  2use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome};
  3
  4pub struct WebpageChromeRemover;
  5
  6impl HandleTag for WebpageChromeRemover {
  7    fn should_handle(&self, tag: &str) -> bool {
  8        match tag {
  9            "head" | "script" | "style" | "nav" => true,
 10            _ => false,
 11        }
 12    }
 13
 14    fn handle_tag_start(
 15        &mut self,
 16        tag: &HtmlElement,
 17        _writer: &mut MarkdownWriter,
 18    ) -> StartTagOutcome {
 19        match tag.tag() {
 20            "head" | "script" | "style" | "nav" => return StartTagOutcome::Skip,
 21            _ => {}
 22        }
 23
 24        StartTagOutcome::Continue
 25    }
 26}
 27
 28pub struct ParagraphHandler;
 29
 30impl HandleTag for ParagraphHandler {
 31    fn should_handle(&self, _tag: &str) -> bool {
 32        true
 33    }
 34
 35    fn handle_tag_start(
 36        &mut self,
 37        tag: &HtmlElement,
 38        writer: &mut MarkdownWriter,
 39    ) -> StartTagOutcome {
 40        if tag.is_inline() && writer.is_inside("p") {
 41            if let Some(parent) = writer.current_element_stack().iter().last() {
 42                if !parent.is_inline() {
 43                    if !(writer.markdown.ends_with(' ') || writer.markdown.ends_with('\n')) {
 44                        writer.push_str(" ");
 45                    }
 46                }
 47            }
 48        }
 49
 50        match tag.tag() {
 51            "p" => writer.push_blank_line(),
 52            _ => {}
 53        }
 54
 55        StartTagOutcome::Continue
 56    }
 57}
 58
 59pub struct HeadingHandler;
 60
 61impl HandleTag for HeadingHandler {
 62    fn should_handle(&self, tag: &str) -> bool {
 63        match tag {
 64            "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => true,
 65            _ => false,
 66        }
 67    }
 68
 69    fn handle_tag_start(
 70        &mut self,
 71        tag: &HtmlElement,
 72        writer: &mut MarkdownWriter,
 73    ) -> StartTagOutcome {
 74        match tag.tag() {
 75            "h1" => writer.push_str("\n\n# "),
 76            "h2" => writer.push_str("\n\n## "),
 77            "h3" => writer.push_str("\n\n### "),
 78            "h4" => writer.push_str("\n\n#### "),
 79            "h5" => writer.push_str("\n\n##### "),
 80            "h6" => writer.push_str("\n\n###### "),
 81            _ => {}
 82        }
 83
 84        StartTagOutcome::Continue
 85    }
 86
 87    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
 88        match tag.tag() {
 89            "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => writer.push_blank_line(),
 90            _ => {}
 91        }
 92    }
 93}
 94
 95pub struct ListHandler;
 96
 97impl HandleTag for ListHandler {
 98    fn should_handle(&self, tag: &str) -> bool {
 99        match tag {
100            "ul" | "ol" | "li" => true,
101            _ => false,
102        }
103    }
104
105    fn handle_tag_start(
106        &mut self,
107        tag: &HtmlElement,
108        writer: &mut MarkdownWriter,
109    ) -> StartTagOutcome {
110        match tag.tag() {
111            "ul" | "ol" => writer.push_newline(),
112            "li" => writer.push_str("- "),
113            _ => {}
114        }
115
116        StartTagOutcome::Continue
117    }
118
119    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
120        match tag.tag() {
121            "ul" | "ol" => writer.push_newline(),
122            "li" => writer.push_newline(),
123            _ => {}
124        }
125    }
126}
127
128pub struct TableHandler {
129    /// The number of columns in the current `<table>`.
130    current_table_columns: usize,
131    is_first_th: bool,
132    is_first_td: bool,
133}
134
135impl TableHandler {
136    pub fn new() -> Self {
137        Self {
138            current_table_columns: 0,
139            is_first_th: true,
140            is_first_td: true,
141        }
142    }
143}
144
145impl HandleTag for TableHandler {
146    fn should_handle(&self, tag: &str) -> bool {
147        match tag {
148            "table" | "thead" | "tbody" | "tr" | "th" | "td" => true,
149            _ => false,
150        }
151    }
152
153    fn handle_tag_start(
154        &mut self,
155        tag: &HtmlElement,
156        writer: &mut MarkdownWriter,
157    ) -> StartTagOutcome {
158        match tag.tag() {
159            "thead" => writer.push_blank_line(),
160            "tr" => writer.push_newline(),
161            "th" => {
162                self.current_table_columns += 1;
163                if self.is_first_th {
164                    self.is_first_th = false;
165                } else {
166                    writer.push_str(" ");
167                }
168                writer.push_str("| ");
169            }
170            "td" => {
171                if self.is_first_td {
172                    self.is_first_td = false;
173                } else {
174                    writer.push_str(" ");
175                }
176                writer.push_str("| ");
177            }
178            _ => {}
179        }
180
181        StartTagOutcome::Continue
182    }
183
184    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
185        match tag.tag() {
186            "thead" => {
187                writer.push_newline();
188                for ix in 0..self.current_table_columns {
189                    if ix > 0 {
190                        writer.push_str(" ");
191                    }
192                    writer.push_str("| ---");
193                }
194                writer.push_str(" |");
195                self.is_first_th = true;
196            }
197            "tr" => {
198                writer.push_str(" |");
199                self.is_first_td = true;
200            }
201            "table" => {
202                self.current_table_columns = 0;
203            }
204            _ => {}
205        }
206    }
207}
208
209pub struct StyledTextHandler;
210
211impl HandleTag for StyledTextHandler {
212    fn should_handle(&self, tag: &str) -> bool {
213        match tag {
214            "strong" | "em" => true,
215            _ => false,
216        }
217    }
218
219    fn handle_tag_start(
220        &mut self,
221        tag: &HtmlElement,
222        writer: &mut MarkdownWriter,
223    ) -> StartTagOutcome {
224        match tag.tag() {
225            "strong" => writer.push_str("**"),
226            "em" => writer.push_str("_"),
227            _ => {}
228        }
229
230        StartTagOutcome::Continue
231    }
232
233    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
234        match tag.tag() {
235            "strong" => writer.push_str("**"),
236            "em" => writer.push_str("_"),
237            _ => {}
238        }
239    }
240}
241
242pub struct CodeHandler;
243
244impl HandleTag for CodeHandler {
245    fn should_handle(&self, tag: &str) -> bool {
246        match tag {
247            "pre" | "code" => true,
248            _ => false,
249        }
250    }
251
252    fn handle_tag_start(
253        &mut self,
254        tag: &HtmlElement,
255        writer: &mut MarkdownWriter,
256    ) -> StartTagOutcome {
257        match tag.tag() {
258            "code" => {
259                if !writer.is_inside("pre") {
260                    writer.push_str("`");
261                }
262            }
263            "pre" => writer.push_str("\n\n```\n"),
264            _ => {}
265        }
266
267        StartTagOutcome::Continue
268    }
269
270    fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
271        match tag.tag() {
272            "code" => {
273                if !writer.is_inside("pre") {
274                    writer.push_str("`");
275                }
276            }
277            "pre" => writer.push_str("\n```\n"),
278            _ => {}
279        }
280    }
281
282    fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
283        if writer.is_inside("pre") {
284            writer.push_str(&text);
285            return HandlerOutcome::Handled;
286        }
287
288        HandlerOutcome::NoOp
289    }
290}