1use crate::html_element::HtmlElement;
2use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome};
3
4pub struct WebpageChromeRemover;
5
6impl HandleTag for WebpageChromeRemover {
7 fn should_handle(&self, tag: &str) -> bool {
8 match tag {
9 "head" | "script" | "style" | "nav" => true,
10 _ => false,
11 }
12 }
13
14 fn handle_tag_start(
15 &mut self,
16 tag: &HtmlElement,
17 _writer: &mut MarkdownWriter,
18 ) -> StartTagOutcome {
19 match tag.tag() {
20 "head" | "script" | "style" | "nav" => return StartTagOutcome::Skip,
21 _ => {}
22 }
23
24 StartTagOutcome::Continue
25 }
26}
27
28pub struct ParagraphHandler;
29
30impl HandleTag for ParagraphHandler {
31 fn should_handle(&self, _tag: &str) -> bool {
32 true
33 }
34
35 fn handle_tag_start(
36 &mut self,
37 tag: &HtmlElement,
38 writer: &mut MarkdownWriter,
39 ) -> StartTagOutcome {
40 if tag.is_inline() && writer.is_inside("p") {
41 if let Some(parent) = writer.current_element_stack().iter().last() {
42 if !parent.is_inline() {
43 if !(writer.markdown.ends_with(' ') || writer.markdown.ends_with('\n')) {
44 writer.push_str(" ");
45 }
46 }
47 }
48 }
49
50 match tag.tag() {
51 "p" => writer.push_blank_line(),
52 _ => {}
53 }
54
55 StartTagOutcome::Continue
56 }
57}
58
59pub struct HeadingHandler;
60
61impl HandleTag for HeadingHandler {
62 fn should_handle(&self, tag: &str) -> bool {
63 match tag {
64 "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => true,
65 _ => false,
66 }
67 }
68
69 fn handle_tag_start(
70 &mut self,
71 tag: &HtmlElement,
72 writer: &mut MarkdownWriter,
73 ) -> StartTagOutcome {
74 match tag.tag() {
75 "h1" => writer.push_str("\n\n# "),
76 "h2" => writer.push_str("\n\n## "),
77 "h3" => writer.push_str("\n\n### "),
78 "h4" => writer.push_str("\n\n#### "),
79 "h5" => writer.push_str("\n\n##### "),
80 "h6" => writer.push_str("\n\n###### "),
81 _ => {}
82 }
83
84 StartTagOutcome::Continue
85 }
86
87 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
88 match tag.tag() {
89 "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => writer.push_blank_line(),
90 _ => {}
91 }
92 }
93}
94
95pub struct ListHandler;
96
97impl HandleTag for ListHandler {
98 fn should_handle(&self, tag: &str) -> bool {
99 match tag {
100 "ul" | "ol" | "li" => true,
101 _ => false,
102 }
103 }
104
105 fn handle_tag_start(
106 &mut self,
107 tag: &HtmlElement,
108 writer: &mut MarkdownWriter,
109 ) -> StartTagOutcome {
110 match tag.tag() {
111 "ul" | "ol" => writer.push_newline(),
112 "li" => writer.push_str("- "),
113 _ => {}
114 }
115
116 StartTagOutcome::Continue
117 }
118
119 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
120 match tag.tag() {
121 "ul" | "ol" => writer.push_newline(),
122 "li" => writer.push_newline(),
123 _ => {}
124 }
125 }
126}
127
128pub struct TableHandler {
129 /// The number of columns in the current `<table>`.
130 current_table_columns: usize,
131 is_first_th: bool,
132 is_first_td: bool,
133}
134
135impl TableHandler {
136 pub fn new() -> Self {
137 Self {
138 current_table_columns: 0,
139 is_first_th: true,
140 is_first_td: true,
141 }
142 }
143}
144
145impl HandleTag for TableHandler {
146 fn should_handle(&self, tag: &str) -> bool {
147 match tag {
148 "table" | "thead" | "tbody" | "tr" | "th" | "td" => true,
149 _ => false,
150 }
151 }
152
153 fn handle_tag_start(
154 &mut self,
155 tag: &HtmlElement,
156 writer: &mut MarkdownWriter,
157 ) -> StartTagOutcome {
158 match tag.tag() {
159 "thead" => writer.push_blank_line(),
160 "tr" => writer.push_newline(),
161 "th" => {
162 self.current_table_columns += 1;
163 if self.is_first_th {
164 self.is_first_th = false;
165 } else {
166 writer.push_str(" ");
167 }
168 writer.push_str("| ");
169 }
170 "td" => {
171 if self.is_first_td {
172 self.is_first_td = false;
173 } else {
174 writer.push_str(" ");
175 }
176 writer.push_str("| ");
177 }
178 _ => {}
179 }
180
181 StartTagOutcome::Continue
182 }
183
184 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
185 match tag.tag() {
186 "thead" => {
187 writer.push_newline();
188 for ix in 0..self.current_table_columns {
189 if ix > 0 {
190 writer.push_str(" ");
191 }
192 writer.push_str("| ---");
193 }
194 writer.push_str(" |");
195 self.is_first_th = true;
196 }
197 "tr" => {
198 writer.push_str(" |");
199 self.is_first_td = true;
200 }
201 "table" => {
202 self.current_table_columns = 0;
203 }
204 _ => {}
205 }
206 }
207}
208
209pub struct StyledTextHandler;
210
211impl HandleTag for StyledTextHandler {
212 fn should_handle(&self, tag: &str) -> bool {
213 match tag {
214 "strong" | "em" => true,
215 _ => false,
216 }
217 }
218
219 fn handle_tag_start(
220 &mut self,
221 tag: &HtmlElement,
222 writer: &mut MarkdownWriter,
223 ) -> StartTagOutcome {
224 match tag.tag() {
225 "strong" => writer.push_str("**"),
226 "em" => writer.push_str("_"),
227 _ => {}
228 }
229
230 StartTagOutcome::Continue
231 }
232
233 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
234 match tag.tag() {
235 "strong" => writer.push_str("**"),
236 "em" => writer.push_str("_"),
237 _ => {}
238 }
239 }
240}
241
242pub struct CodeHandler;
243
244impl HandleTag for CodeHandler {
245 fn should_handle(&self, tag: &str) -> bool {
246 match tag {
247 "pre" | "code" => true,
248 _ => false,
249 }
250 }
251
252 fn handle_tag_start(
253 &mut self,
254 tag: &HtmlElement,
255 writer: &mut MarkdownWriter,
256 ) -> StartTagOutcome {
257 match tag.tag() {
258 "code" => {
259 if !writer.is_inside("pre") {
260 writer.push_str("`");
261 }
262 }
263 "pre" => writer.push_str("\n\n```\n"),
264 _ => {}
265 }
266
267 StartTagOutcome::Continue
268 }
269
270 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
271 match tag.tag() {
272 "code" => {
273 if !writer.is_inside("pre") {
274 writer.push_str("`");
275 }
276 }
277 "pre" => writer.push_str("\n```\n"),
278 _ => {}
279 }
280 }
281
282 fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
283 if writer.is_inside("pre") {
284 writer.push_str(&text);
285 return HandlerOutcome::Handled;
286 }
287
288 HandlerOutcome::NoOp
289 }
290}