1use crate::html_element::HtmlElement;
2use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome};
3
4pub struct WebpageChromeRemover;
5
6impl HandleTag for WebpageChromeRemover {
7 fn should_handle(&self, tag: &str) -> bool {
8 matches!(tag, "head" | "script" | "style" | "nav")
9 }
10
11 fn handle_tag_start(
12 &mut self,
13 tag: &HtmlElement,
14 _writer: &mut MarkdownWriter,
15 ) -> StartTagOutcome {
16 match tag.tag() {
17 "head" | "script" | "style" | "nav" => return StartTagOutcome::Skip,
18 _ => {}
19 }
20
21 StartTagOutcome::Continue
22 }
23}
24
25pub struct ParagraphHandler;
26
27impl HandleTag for ParagraphHandler {
28 fn should_handle(&self, _tag: &str) -> bool {
29 true
30 }
31
32 fn handle_tag_start(
33 &mut self,
34 tag: &HtmlElement,
35 writer: &mut MarkdownWriter,
36 ) -> StartTagOutcome {
37 if tag.is_inline() && writer.is_inside("p")
38 && let Some(parent) = writer.current_element_stack().iter().last()
39 && !(parent.is_inline()
40 || writer.markdown.ends_with(' ')
41 || writer.markdown.ends_with('\n'))
42 {
43 writer.push_str(" ");
44 }
45
46 if tag.tag() == "p" {
47 writer.push_blank_line()
48 }
49 StartTagOutcome::Continue
50 }
51}
52
53pub struct HeadingHandler;
54
55impl HandleTag for HeadingHandler {
56 fn should_handle(&self, tag: &str) -> bool {
57 matches!(tag, "h1" | "h2" | "h3" | "h4" | "h5" | "h6")
58 }
59
60 fn handle_tag_start(
61 &mut self,
62 tag: &HtmlElement,
63 writer: &mut MarkdownWriter,
64 ) -> StartTagOutcome {
65 match tag.tag() {
66 "h1" => writer.push_str("\n\n# "),
67 "h2" => writer.push_str("\n\n## "),
68 "h3" => writer.push_str("\n\n### "),
69 "h4" => writer.push_str("\n\n#### "),
70 "h5" => writer.push_str("\n\n##### "),
71 "h6" => writer.push_str("\n\n###### "),
72 _ => {}
73 }
74
75 StartTagOutcome::Continue
76 }
77
78 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
79 match tag.tag() {
80 "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => writer.push_blank_line(),
81 _ => {}
82 }
83 }
84}
85
86pub struct ListHandler;
87
88impl HandleTag for ListHandler {
89 fn should_handle(&self, tag: &str) -> bool {
90 matches!(tag, "ul" | "ol" | "li")
91 }
92
93 fn handle_tag_start(
94 &mut self,
95 tag: &HtmlElement,
96 writer: &mut MarkdownWriter,
97 ) -> StartTagOutcome {
98 match tag.tag() {
99 "ul" | "ol" => writer.push_newline(),
100 "li" => writer.push_str("- "),
101 _ => {}
102 }
103
104 StartTagOutcome::Continue
105 }
106
107 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
108 match tag.tag() {
109 "ul" | "ol" => writer.push_newline(),
110 "li" => writer.push_newline(),
111 _ => {}
112 }
113 }
114}
115
116pub struct TableHandler {
117 /// The number of columns in the current `<table>`.
118 current_table_columns: usize,
119 is_first_th: bool,
120 is_first_td: bool,
121}
122
123impl TableHandler {
124 pub fn new() -> Self {
125 Self {
126 current_table_columns: 0,
127 is_first_th: true,
128 is_first_td: true,
129 }
130 }
131}
132
133impl Default for TableHandler {
134 fn default() -> Self {
135 Self::new()
136 }
137}
138
139impl HandleTag for TableHandler {
140 fn should_handle(&self, tag: &str) -> bool {
141 matches!(tag, "table" | "thead" | "tbody" | "tr" | "th" | "td")
142 }
143
144 fn handle_tag_start(
145 &mut self,
146 tag: &HtmlElement,
147 writer: &mut MarkdownWriter,
148 ) -> StartTagOutcome {
149 match tag.tag() {
150 "thead" => writer.push_blank_line(),
151 "tr" => writer.push_newline(),
152 "th" => {
153 self.current_table_columns += 1;
154 if self.is_first_th {
155 self.is_first_th = false;
156 } else {
157 writer.push_str(" ");
158 }
159 writer.push_str("| ");
160 }
161 "td" => {
162 if self.is_first_td {
163 self.is_first_td = false;
164 } else {
165 writer.push_str(" ");
166 }
167 writer.push_str("| ");
168 }
169 _ => {}
170 }
171
172 StartTagOutcome::Continue
173 }
174
175 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
176 match tag.tag() {
177 "thead" => {
178 writer.push_newline();
179 for ix in 0..self.current_table_columns {
180 if ix > 0 {
181 writer.push_str(" ");
182 }
183 writer.push_str("| ---");
184 }
185 writer.push_str(" |");
186 self.is_first_th = true;
187 }
188 "tr" => {
189 writer.push_str(" |");
190 self.is_first_td = true;
191 }
192 "table" => {
193 self.current_table_columns = 0;
194 }
195 _ => {}
196 }
197 }
198}
199
200pub struct StyledTextHandler;
201
202impl HandleTag for StyledTextHandler {
203 fn should_handle(&self, tag: &str) -> bool {
204 matches!(tag, "strong" | "em")
205 }
206
207 fn handle_tag_start(
208 &mut self,
209 tag: &HtmlElement,
210 writer: &mut MarkdownWriter,
211 ) -> StartTagOutcome {
212 match tag.tag() {
213 "strong" => writer.push_str("**"),
214 "em" => writer.push_str("_"),
215 _ => {}
216 }
217
218 StartTagOutcome::Continue
219 }
220
221 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
222 match tag.tag() {
223 "strong" => writer.push_str("**"),
224 "em" => writer.push_str("_"),
225 _ => {}
226 }
227 }
228}
229
230pub struct CodeHandler;
231
232impl HandleTag for CodeHandler {
233 fn should_handle(&self, tag: &str) -> bool {
234 matches!(tag, "pre" | "code")
235 }
236
237 fn handle_tag_start(
238 &mut self,
239 tag: &HtmlElement,
240 writer: &mut MarkdownWriter,
241 ) -> StartTagOutcome {
242 match tag.tag() {
243 "code" => {
244 if !writer.is_inside("pre") {
245 writer.push_str("`");
246 }
247 }
248 "pre" => writer.push_str("\n\n```\n"),
249 _ => {}
250 }
251
252 StartTagOutcome::Continue
253 }
254
255 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
256 match tag.tag() {
257 "code" => {
258 if !writer.is_inside("pre") {
259 writer.push_str("`");
260 }
261 }
262 "pre" => writer.push_str("\n```\n"),
263 _ => {}
264 }
265 }
266
267 fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
268 if writer.is_inside("pre") {
269 writer.push_str(text);
270 return HandlerOutcome::Handled;
271 }
272
273 HandlerOutcome::NoOp
274 }
275}