1use crate::html_element::HtmlElement;
2use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome};
3
4pub struct WebpageChromeRemover;
5
6impl HandleTag for WebpageChromeRemover {
7 fn should_handle(&self, tag: &str) -> bool {
8 matches!(tag, "head" | "script" | "style" | "nav")
9 }
10
11 fn handle_tag_start(
12 &mut self,
13 tag: &HtmlElement,
14 _writer: &mut MarkdownWriter,
15 ) -> StartTagOutcome {
16 match tag.tag() {
17 "head" | "script" | "style" | "nav" => return StartTagOutcome::Skip,
18 _ => {}
19 }
20
21 StartTagOutcome::Continue
22 }
23}
24
25pub struct ParagraphHandler;
26
27impl HandleTag for ParagraphHandler {
28 fn should_handle(&self, _tag: &str) -> bool {
29 true
30 }
31
32 fn handle_tag_start(
33 &mut self,
34 tag: &HtmlElement,
35 writer: &mut MarkdownWriter,
36 ) -> StartTagOutcome {
37 if tag.is_inline() && writer.is_inside("p") {
38 if let Some(parent) = writer.current_element_stack().iter().last() {
39 if !(parent.is_inline()
40 || writer.markdown.ends_with(' ')
41 || writer.markdown.ends_with('\n'))
42 {
43 writer.push_str(" ");
44 }
45 }
46 }
47
48 if tag.tag() == "p" {
49 writer.push_blank_line()
50 }
51 StartTagOutcome::Continue
52 }
53}
54
55pub struct HeadingHandler;
56
57impl HandleTag for HeadingHandler {
58 fn should_handle(&self, tag: &str) -> bool {
59 matches!(tag, "h1" | "h2" | "h3" | "h4" | "h5" | "h6")
60 }
61
62 fn handle_tag_start(
63 &mut self,
64 tag: &HtmlElement,
65 writer: &mut MarkdownWriter,
66 ) -> StartTagOutcome {
67 match tag.tag() {
68 "h1" => writer.push_str("\n\n# "),
69 "h2" => writer.push_str("\n\n## "),
70 "h3" => writer.push_str("\n\n### "),
71 "h4" => writer.push_str("\n\n#### "),
72 "h5" => writer.push_str("\n\n##### "),
73 "h6" => writer.push_str("\n\n###### "),
74 _ => {}
75 }
76
77 StartTagOutcome::Continue
78 }
79
80 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
81 match tag.tag() {
82 "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => writer.push_blank_line(),
83 _ => {}
84 }
85 }
86}
87
88pub struct ListHandler;
89
90impl HandleTag for ListHandler {
91 fn should_handle(&self, tag: &str) -> bool {
92 matches!(tag, "ul" | "ol" | "li")
93 }
94
95 fn handle_tag_start(
96 &mut self,
97 tag: &HtmlElement,
98 writer: &mut MarkdownWriter,
99 ) -> StartTagOutcome {
100 match tag.tag() {
101 "ul" | "ol" => writer.push_newline(),
102 "li" => writer.push_str("- "),
103 _ => {}
104 }
105
106 StartTagOutcome::Continue
107 }
108
109 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
110 match tag.tag() {
111 "ul" | "ol" => writer.push_newline(),
112 "li" => writer.push_newline(),
113 _ => {}
114 }
115 }
116}
117
118pub struct TableHandler {
119 /// The number of columns in the current `<table>`.
120 current_table_columns: usize,
121 is_first_th: bool,
122 is_first_td: bool,
123}
124
125impl TableHandler {
126 pub fn new() -> Self {
127 Self {
128 current_table_columns: 0,
129 is_first_th: true,
130 is_first_td: true,
131 }
132 }
133}
134
135impl Default for TableHandler {
136 fn default() -> Self {
137 Self::new()
138 }
139}
140
141impl HandleTag for TableHandler {
142 fn should_handle(&self, tag: &str) -> bool {
143 matches!(tag, "table" | "thead" | "tbody" | "tr" | "th" | "td")
144 }
145
146 fn handle_tag_start(
147 &mut self,
148 tag: &HtmlElement,
149 writer: &mut MarkdownWriter,
150 ) -> StartTagOutcome {
151 match tag.tag() {
152 "thead" => writer.push_blank_line(),
153 "tr" => writer.push_newline(),
154 "th" => {
155 self.current_table_columns += 1;
156 if self.is_first_th {
157 self.is_first_th = false;
158 } else {
159 writer.push_str(" ");
160 }
161 writer.push_str("| ");
162 }
163 "td" => {
164 if self.is_first_td {
165 self.is_first_td = false;
166 } else {
167 writer.push_str(" ");
168 }
169 writer.push_str("| ");
170 }
171 _ => {}
172 }
173
174 StartTagOutcome::Continue
175 }
176
177 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
178 match tag.tag() {
179 "thead" => {
180 writer.push_newline();
181 for ix in 0..self.current_table_columns {
182 if ix > 0 {
183 writer.push_str(" ");
184 }
185 writer.push_str("| ---");
186 }
187 writer.push_str(" |");
188 self.is_first_th = true;
189 }
190 "tr" => {
191 writer.push_str(" |");
192 self.is_first_td = true;
193 }
194 "table" => {
195 self.current_table_columns = 0;
196 }
197 _ => {}
198 }
199 }
200}
201
202pub struct StyledTextHandler;
203
204impl HandleTag for StyledTextHandler {
205 fn should_handle(&self, tag: &str) -> bool {
206 matches!(tag, "strong" | "em")
207 }
208
209 fn handle_tag_start(
210 &mut self,
211 tag: &HtmlElement,
212 writer: &mut MarkdownWriter,
213 ) -> StartTagOutcome {
214 match tag.tag() {
215 "strong" => writer.push_str("**"),
216 "em" => writer.push_str("_"),
217 _ => {}
218 }
219
220 StartTagOutcome::Continue
221 }
222
223 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
224 match tag.tag() {
225 "strong" => writer.push_str("**"),
226 "em" => writer.push_str("_"),
227 _ => {}
228 }
229 }
230}
231
232pub struct CodeHandler;
233
234impl HandleTag for CodeHandler {
235 fn should_handle(&self, tag: &str) -> bool {
236 matches!(tag, "pre" | "code")
237 }
238
239 fn handle_tag_start(
240 &mut self,
241 tag: &HtmlElement,
242 writer: &mut MarkdownWriter,
243 ) -> StartTagOutcome {
244 match tag.tag() {
245 "code" => {
246 if !writer.is_inside("pre") {
247 writer.push_str("`");
248 }
249 }
250 "pre" => writer.push_str("\n\n```\n"),
251 _ => {}
252 }
253
254 StartTagOutcome::Continue
255 }
256
257 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
258 match tag.tag() {
259 "code" => {
260 if !writer.is_inside("pre") {
261 writer.push_str("`");
262 }
263 }
264 "pre" => writer.push_str("\n```\n"),
265 _ => {}
266 }
267 }
268
269 fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
270 if writer.is_inside("pre") {
271 writer.push_str(text);
272 return HandlerOutcome::Handled;
273 }
274
275 HandlerOutcome::NoOp
276 }
277}