1use crate::html_element::HtmlElement;
2use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome};
3
4pub struct WebpageChromeRemover;
5
6impl HandleTag for WebpageChromeRemover {
7 fn should_handle(&self, tag: &str) -> bool {
8 matches!(tag, "head" | "script" | "style" | "nav")
9 }
10
11 fn handle_tag_start(
12 &mut self,
13 tag: &HtmlElement,
14 _writer: &mut MarkdownWriter,
15 ) -> StartTagOutcome {
16 match tag.tag() {
17 "head" | "script" | "style" | "nav" => return StartTagOutcome::Skip,
18 _ => {}
19 }
20
21 StartTagOutcome::Continue
22 }
23}
24
25pub struct ParagraphHandler;
26
27impl HandleTag for ParagraphHandler {
28 fn should_handle(&self, _tag: &str) -> bool {
29 true
30 }
31
32 fn handle_tag_start(
33 &mut self,
34 tag: &HtmlElement,
35 writer: &mut MarkdownWriter,
36 ) -> StartTagOutcome {
37 if tag.is_inline()
38 && writer.is_inside("p")
39 && let Some(parent) = writer.current_element_stack().iter().last()
40 && !(parent.is_inline()
41 || writer.markdown.ends_with(' ')
42 || writer.markdown.ends_with('\n'))
43 {
44 writer.push_str(" ");
45 }
46
47 if tag.tag() == "p" {
48 writer.push_blank_line()
49 }
50 StartTagOutcome::Continue
51 }
52}
53
54pub struct HeadingHandler;
55
56impl HandleTag for HeadingHandler {
57 fn should_handle(&self, tag: &str) -> bool {
58 matches!(tag, "h1" | "h2" | "h3" | "h4" | "h5" | "h6")
59 }
60
61 fn handle_tag_start(
62 &mut self,
63 tag: &HtmlElement,
64 writer: &mut MarkdownWriter,
65 ) -> StartTagOutcome {
66 match tag.tag() {
67 "h1" => writer.push_str("\n\n# "),
68 "h2" => writer.push_str("\n\n## "),
69 "h3" => writer.push_str("\n\n### "),
70 "h4" => writer.push_str("\n\n#### "),
71 "h5" => writer.push_str("\n\n##### "),
72 "h6" => writer.push_str("\n\n###### "),
73 _ => {}
74 }
75
76 StartTagOutcome::Continue
77 }
78
79 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
80 match tag.tag() {
81 "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => writer.push_blank_line(),
82 _ => {}
83 }
84 }
85}
86
87pub struct ListHandler;
88
89impl HandleTag for ListHandler {
90 fn should_handle(&self, tag: &str) -> bool {
91 matches!(tag, "ul" | "ol" | "li")
92 }
93
94 fn handle_tag_start(
95 &mut self,
96 tag: &HtmlElement,
97 writer: &mut MarkdownWriter,
98 ) -> StartTagOutcome {
99 match tag.tag() {
100 "ul" | "ol" => writer.push_newline(),
101 "li" => writer.push_str("- "),
102 _ => {}
103 }
104
105 StartTagOutcome::Continue
106 }
107
108 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
109 match tag.tag() {
110 "ul" | "ol" => writer.push_newline(),
111 "li" => writer.push_newline(),
112 _ => {}
113 }
114 }
115}
116
117pub struct TableHandler {
118 /// The number of columns in the current `<table>`.
119 current_table_columns: usize,
120 is_first_th: bool,
121 is_first_td: bool,
122}
123
124impl TableHandler {
125 pub const fn new() -> Self {
126 Self {
127 current_table_columns: 0,
128 is_first_th: true,
129 is_first_td: true,
130 }
131 }
132}
133
134impl Default for TableHandler {
135 fn default() -> Self {
136 Self::new()
137 }
138}
139
140impl HandleTag for TableHandler {
141 fn should_handle(&self, tag: &str) -> bool {
142 matches!(tag, "table" | "thead" | "tbody" | "tr" | "th" | "td")
143 }
144
145 fn handle_tag_start(
146 &mut self,
147 tag: &HtmlElement,
148 writer: &mut MarkdownWriter,
149 ) -> StartTagOutcome {
150 match tag.tag() {
151 "thead" => writer.push_blank_line(),
152 "tr" => writer.push_newline(),
153 "th" => {
154 self.current_table_columns += 1;
155 if self.is_first_th {
156 self.is_first_th = false;
157 } else {
158 writer.push_str(" ");
159 }
160 writer.push_str("| ");
161 }
162 "td" => {
163 if self.is_first_td {
164 self.is_first_td = false;
165 } else {
166 writer.push_str(" ");
167 }
168 writer.push_str("| ");
169 }
170 _ => {}
171 }
172
173 StartTagOutcome::Continue
174 }
175
176 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
177 match tag.tag() {
178 "thead" => {
179 writer.push_newline();
180 for ix in 0..self.current_table_columns {
181 if ix > 0 {
182 writer.push_str(" ");
183 }
184 writer.push_str("| ---");
185 }
186 writer.push_str(" |");
187 self.is_first_th = true;
188 }
189 "tr" => {
190 writer.push_str(" |");
191 self.is_first_td = true;
192 }
193 "table" => {
194 self.current_table_columns = 0;
195 }
196 _ => {}
197 }
198 }
199}
200
201pub struct StyledTextHandler;
202
203impl HandleTag for StyledTextHandler {
204 fn should_handle(&self, tag: &str) -> bool {
205 matches!(tag, "strong" | "em")
206 }
207
208 fn handle_tag_start(
209 &mut self,
210 tag: &HtmlElement,
211 writer: &mut MarkdownWriter,
212 ) -> StartTagOutcome {
213 match tag.tag() {
214 "strong" => writer.push_str("**"),
215 "em" => writer.push_str("_"),
216 _ => {}
217 }
218
219 StartTagOutcome::Continue
220 }
221
222 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
223 match tag.tag() {
224 "strong" => writer.push_str("**"),
225 "em" => writer.push_str("_"),
226 _ => {}
227 }
228 }
229}
230
231pub struct CodeHandler;
232
233impl HandleTag for CodeHandler {
234 fn should_handle(&self, tag: &str) -> bool {
235 matches!(tag, "pre" | "code")
236 }
237
238 fn handle_tag_start(
239 &mut self,
240 tag: &HtmlElement,
241 writer: &mut MarkdownWriter,
242 ) -> StartTagOutcome {
243 match tag.tag() {
244 "code" => {
245 if !writer.is_inside("pre") {
246 writer.push_str("`");
247 }
248 }
249 "pre" => writer.push_str("\n\n```\n"),
250 _ => {}
251 }
252
253 StartTagOutcome::Continue
254 }
255
256 fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
257 match tag.tag() {
258 "code" => {
259 if !writer.is_inside("pre") {
260 writer.push_str("`");
261 }
262 }
263 "pre" => writer.push_str("\n```\n"),
264 _ => {}
265 }
266 }
267
268 fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
269 if writer.is_inside("pre") {
270 writer.push_str(text);
271 return HandlerOutcome::Handled;
272 }
273
274 HandlerOutcome::NoOp
275 }
276}