1use std::{cell::RefCell, collections::HashMap, mem, ops::Range};
2
3use gpui::{DefiniteLength, FontWeight, SharedString, px, relative};
4use html5ever::{
5 Attribute, LocalName, ParseOpts, local_name, parse_document, tendril::TendrilSink,
6};
7use markup5ever_rcdom::{Node, NodeData, RcDom};
8use pulldown_cmark::{Alignment, HeadingLevel};
9use stacksafe::stacksafe;
10
11use crate::html::html_minifier::{Minifier, MinifierOptions};
12
13#[derive(Debug, Clone, Default)]
14#[cfg_attr(test, derive(PartialEq))]
15pub(crate) struct ParsedHtmlBlock {
16 pub source_range: Range<usize>,
17 pub children: Vec<ParsedHtmlElement>,
18}
19
20#[derive(Debug, Clone)]
21#[cfg_attr(test, derive(PartialEq))]
22pub(crate) enum ParsedHtmlElement {
23 Heading(ParsedHtmlHeading),
24 List(ParsedHtmlList),
25 Table(ParsedHtmlTable),
26 BlockQuote(ParsedHtmlBlockQuote),
27 Paragraph(HtmlParagraph),
28 Image(HtmlImage),
29}
30
31impl ParsedHtmlElement {
32 pub fn source_range(&self) -> Option<Range<usize>> {
33 Some(match self {
34 Self::Heading(heading) => heading.source_range.clone(),
35 Self::List(list) => list.source_range.clone(),
36 Self::Table(table) => table.source_range.clone(),
37 Self::BlockQuote(block_quote) => block_quote.source_range.clone(),
38 Self::Paragraph(text) => match text.first()? {
39 HtmlParagraphChunk::Text(text) => text.source_range.clone(),
40 HtmlParagraphChunk::Image(image) => image.source_range.clone(),
41 },
42 Self::Image(image) => image.source_range.clone(),
43 })
44 }
45}
46
47pub(crate) type HtmlParagraph = Vec<HtmlParagraphChunk>;
48
49#[derive(Debug, Clone)]
50#[cfg_attr(test, derive(PartialEq))]
51pub(crate) enum HtmlParagraphChunk {
52 Text(ParsedHtmlText),
53 Image(HtmlImage),
54}
55
56#[derive(Debug, Clone)]
57#[cfg_attr(test, derive(PartialEq))]
58pub(crate) struct ParsedHtmlList {
59 pub source_range: Range<usize>,
60 pub depth: u16,
61 pub ordered: bool,
62 pub items: Vec<ParsedHtmlListItem>,
63}
64
65#[derive(Debug, Clone)]
66#[cfg_attr(test, derive(PartialEq))]
67pub(crate) struct ParsedHtmlListItem {
68 pub source_range: Range<usize>,
69 pub item_type: ParsedHtmlListItemType,
70 pub content: Vec<ParsedHtmlElement>,
71}
72
73#[derive(Debug, Clone)]
74#[cfg_attr(test, derive(PartialEq))]
75pub(crate) enum ParsedHtmlListItemType {
76 Ordered(u64),
77 Unordered,
78}
79
80#[derive(Debug, Clone)]
81#[cfg_attr(test, derive(PartialEq))]
82pub(crate) struct ParsedHtmlHeading {
83 pub source_range: Range<usize>,
84 pub level: HeadingLevel,
85 pub contents: HtmlParagraph,
86}
87
88#[derive(Debug, Clone)]
89#[cfg_attr(test, derive(PartialEq))]
90pub(crate) struct ParsedHtmlTable {
91 pub source_range: Range<usize>,
92 pub header: Vec<ParsedHtmlTableRow>,
93 pub body: Vec<ParsedHtmlTableRow>,
94 pub caption: Option<HtmlParagraph>,
95}
96
97#[derive(Debug, Clone)]
98#[cfg_attr(test, derive(PartialEq))]
99pub(crate) struct ParsedHtmlTableColumn {
100 pub col_span: usize,
101 pub row_span: usize,
102 pub is_header: bool,
103 pub children: HtmlParagraph,
104 pub alignment: Alignment,
105}
106
107#[derive(Debug, Clone, Default)]
108#[cfg_attr(test, derive(PartialEq))]
109pub(crate) struct ParsedHtmlTableRow {
110 pub columns: Vec<ParsedHtmlTableColumn>,
111}
112
113#[derive(Debug, Clone)]
114#[cfg_attr(test, derive(PartialEq))]
115pub(crate) struct ParsedHtmlBlockQuote {
116 pub source_range: Range<usize>,
117 pub children: Vec<ParsedHtmlElement>,
118}
119
120#[derive(Debug, Clone)]
121#[cfg_attr(test, derive(PartialEq))]
122pub(crate) struct ParsedHtmlText {
123 pub source_range: Range<usize>,
124 pub contents: SharedString,
125 pub highlights: Vec<(Range<usize>, HtmlHighlightStyle)>,
126 pub links: Vec<(Range<usize>, SharedString)>,
127}
128
129#[derive(Debug, Clone, Default, PartialEq, Eq)]
130pub(crate) struct HtmlHighlightStyle {
131 pub italic: bool,
132 pub underline: bool,
133 pub strikethrough: bool,
134 pub weight: FontWeight,
135 pub link: bool,
136 pub oblique: bool,
137}
138
139#[derive(Debug, Clone)]
140#[cfg_attr(test, derive(PartialEq))]
141pub(crate) struct HtmlImage {
142 pub dest_url: SharedString,
143 pub source_range: Range<usize>,
144 pub alt_text: Option<SharedString>,
145 pub width: Option<DefiniteLength>,
146 pub height: Option<DefiniteLength>,
147}
148
149impl HtmlImage {
150 fn new(dest_url: String, source_range: Range<usize>) -> Self {
151 Self {
152 dest_url: dest_url.into(),
153 source_range,
154 alt_text: None,
155 width: None,
156 height: None,
157 }
158 }
159
160 fn set_alt_text(&mut self, alt_text: SharedString) {
161 self.alt_text = Some(alt_text);
162 }
163
164 fn set_width(&mut self, width: DefiniteLength) {
165 self.width = Some(width);
166 }
167
168 fn set_height(&mut self, height: DefiniteLength) {
169 self.height = Some(height);
170 }
171}
172
173#[derive(Debug)]
174struct ParseHtmlNodeContext {
175 list_item_depth: u16,
176}
177
178impl Default for ParseHtmlNodeContext {
179 fn default() -> Self {
180 Self { list_item_depth: 1 }
181 }
182}
183
184pub(crate) fn parse_html_block(
185 source: &str,
186 source_range: Range<usize>,
187) -> Option<ParsedHtmlBlock> {
188 let bytes = cleanup_html(source);
189 let mut cursor = std::io::Cursor::new(bytes);
190 let dom = parse_document(RcDom::default(), ParseOpts::default())
191 .from_utf8()
192 .read_from(&mut cursor)
193 .ok()?;
194
195 let mut children = Vec::new();
196 parse_html_node(
197 source_range.clone(),
198 &dom.document,
199 &mut children,
200 &ParseHtmlNodeContext::default(),
201 );
202
203 Some(ParsedHtmlBlock {
204 source_range,
205 children,
206 })
207}
208
209fn cleanup_html(source: &str) -> Vec<u8> {
210 let mut writer = std::io::Cursor::new(Vec::new());
211 let mut reader = std::io::Cursor::new(source);
212 let mut minify = Minifier::new(
213 &mut writer,
214 MinifierOptions {
215 omit_doctype: true,
216 collapse_whitespace: true,
217 ..Default::default()
218 },
219 );
220 if let Ok(()) = minify.minify(&mut reader) {
221 writer.into_inner()
222 } else {
223 source.bytes().collect()
224 }
225}
226
227#[stacksafe]
228fn parse_html_node(
229 source_range: Range<usize>,
230 node: &Node,
231 elements: &mut Vec<ParsedHtmlElement>,
232 context: &ParseHtmlNodeContext,
233) {
234 match &node.data {
235 NodeData::Document => {
236 consume_children(source_range, node, elements, context);
237 }
238 NodeData::Text { contents } => {
239 elements.push(ParsedHtmlElement::Paragraph(vec![
240 HtmlParagraphChunk::Text(ParsedHtmlText {
241 source_range,
242 highlights: Vec::default(),
243 links: Vec::default(),
244 contents: contents.borrow().to_string().into(),
245 }),
246 ]));
247 }
248 NodeData::Comment { .. } => {}
249 NodeData::Element { name, attrs, .. } => {
250 let mut styles = if let Some(styles) =
251 html_style_from_html_styles(extract_styles_from_attributes(attrs))
252 {
253 vec![styles]
254 } else {
255 Vec::default()
256 };
257
258 if name.local == local_name!("img") {
259 if let Some(image) = extract_image(source_range, attrs) {
260 elements.push(ParsedHtmlElement::Image(image));
261 }
262 } else if name.local == local_name!("p") {
263 let mut paragraph = HtmlParagraph::new();
264 parse_paragraph(
265 source_range,
266 node,
267 &mut paragraph,
268 &mut styles,
269 &mut Vec::new(),
270 );
271
272 if !paragraph.is_empty() {
273 elements.push(ParsedHtmlElement::Paragraph(paragraph));
274 }
275 } else if matches!(
276 name.local,
277 local_name!("h1")
278 | local_name!("h2")
279 | local_name!("h3")
280 | local_name!("h4")
281 | local_name!("h5")
282 | local_name!("h6")
283 ) {
284 let mut paragraph = HtmlParagraph::new();
285 consume_paragraph(
286 source_range.clone(),
287 node,
288 &mut paragraph,
289 &mut styles,
290 &mut Vec::new(),
291 );
292
293 if !paragraph.is_empty() {
294 elements.push(ParsedHtmlElement::Heading(ParsedHtmlHeading {
295 source_range,
296 level: match name.local {
297 local_name!("h1") => HeadingLevel::H1,
298 local_name!("h2") => HeadingLevel::H2,
299 local_name!("h3") => HeadingLevel::H3,
300 local_name!("h4") => HeadingLevel::H4,
301 local_name!("h5") => HeadingLevel::H5,
302 local_name!("h6") => HeadingLevel::H6,
303 _ => unreachable!(),
304 },
305 contents: paragraph,
306 }));
307 }
308 } else if name.local == local_name!("ul") || name.local == local_name!("ol") {
309 if let Some(list) = extract_html_list(
310 node,
311 name.local == local_name!("ol"),
312 context.list_item_depth,
313 source_range,
314 ) {
315 elements.push(ParsedHtmlElement::List(list));
316 }
317 } else if name.local == local_name!("blockquote") {
318 if let Some(blockquote) = extract_html_blockquote(node, source_range) {
319 elements.push(ParsedHtmlElement::BlockQuote(blockquote));
320 }
321 } else if name.local == local_name!("table") {
322 if let Some(table) = extract_html_table(node, source_range) {
323 elements.push(ParsedHtmlElement::Table(table));
324 }
325 } else {
326 consume_children(source_range, node, elements, context);
327 }
328 }
329 _ => {}
330 }
331}
332
333#[stacksafe]
334fn parse_paragraph(
335 source_range: Range<usize>,
336 node: &Node,
337 paragraph: &mut HtmlParagraph,
338 highlights: &mut Vec<HtmlHighlightStyle>,
339 links: &mut Vec<SharedString>,
340) {
341 fn items_with_range<T>(
342 range: Range<usize>,
343 items: impl IntoIterator<Item = T>,
344 ) -> Vec<(Range<usize>, T)> {
345 items
346 .into_iter()
347 .map(|item| (range.clone(), item))
348 .collect()
349 }
350
351 match &node.data {
352 NodeData::Text { contents } => {
353 if let Some(text) =
354 paragraph
355 .iter_mut()
356 .last()
357 .and_then(|paragraph_chunk| match paragraph_chunk {
358 HtmlParagraphChunk::Text(text) => Some(text),
359 _ => None,
360 })
361 {
362 let mut new_text = text.contents.to_string();
363 new_text.push_str(&contents.borrow());
364
365 text.highlights.extend(items_with_range(
366 text.contents.len()..new_text.len(),
367 mem::take(highlights),
368 ));
369 text.links.extend(items_with_range(
370 text.contents.len()..new_text.len(),
371 mem::take(links),
372 ));
373 text.contents = SharedString::from(new_text);
374 } else {
375 let contents = contents.borrow().to_string();
376 paragraph.push(HtmlParagraphChunk::Text(ParsedHtmlText {
377 source_range,
378 highlights: items_with_range(0..contents.len(), mem::take(highlights)),
379 links: items_with_range(0..contents.len(), mem::take(links)),
380 contents: contents.into(),
381 }));
382 }
383 }
384 NodeData::Element { name, attrs, .. } => {
385 if name.local == local_name!("img") {
386 if let Some(image) = extract_image(source_range, attrs) {
387 paragraph.push(HtmlParagraphChunk::Image(image));
388 }
389 } else if name.local == local_name!("b") || name.local == local_name!("strong") {
390 highlights.push(HtmlHighlightStyle {
391 weight: FontWeight::BOLD,
392 ..Default::default()
393 });
394 consume_paragraph(source_range, node, paragraph, highlights, links);
395 } else if name.local == local_name!("i") {
396 highlights.push(HtmlHighlightStyle {
397 italic: true,
398 ..Default::default()
399 });
400 consume_paragraph(source_range, node, paragraph, highlights, links);
401 } else if name.local == local_name!("em") {
402 highlights.push(HtmlHighlightStyle {
403 oblique: true,
404 ..Default::default()
405 });
406 consume_paragraph(source_range, node, paragraph, highlights, links);
407 } else if name.local == local_name!("del") {
408 highlights.push(HtmlHighlightStyle {
409 strikethrough: true,
410 ..Default::default()
411 });
412 consume_paragraph(source_range, node, paragraph, highlights, links);
413 } else if name.local == local_name!("ins") {
414 highlights.push(HtmlHighlightStyle {
415 underline: true,
416 ..Default::default()
417 });
418 consume_paragraph(source_range, node, paragraph, highlights, links);
419 } else if name.local == local_name!("a") {
420 if let Some(url) = attr_value(attrs, local_name!("href")) {
421 highlights.push(HtmlHighlightStyle {
422 link: true,
423 ..Default::default()
424 });
425 links.push(url.into());
426 }
427 consume_paragraph(source_range, node, paragraph, highlights, links);
428 } else {
429 consume_paragraph(source_range, node, paragraph, highlights, links);
430 }
431 }
432 _ => {}
433 }
434}
435
436fn consume_paragraph(
437 source_range: Range<usize>,
438 node: &Node,
439 paragraph: &mut HtmlParagraph,
440 highlights: &mut Vec<HtmlHighlightStyle>,
441 links: &mut Vec<SharedString>,
442) {
443 for child in node.children.borrow().iter() {
444 parse_paragraph(source_range.clone(), child, paragraph, highlights, links);
445 }
446}
447
448fn parse_table_row(source_range: Range<usize>, node: &Node) -> Option<ParsedHtmlTableRow> {
449 let mut columns = Vec::new();
450
451 if let NodeData::Element { name, .. } = &node.data {
452 if name.local != local_name!("tr") {
453 return None;
454 }
455
456 for child in node.children.borrow().iter() {
457 if let Some(column) = parse_table_column(source_range.clone(), child) {
458 columns.push(column);
459 }
460 }
461 }
462
463 if columns.is_empty() {
464 None
465 } else {
466 Some(ParsedHtmlTableRow { columns })
467 }
468}
469
470fn parse_table_column(source_range: Range<usize>, node: &Node) -> Option<ParsedHtmlTableColumn> {
471 match &node.data {
472 NodeData::Element { name, attrs, .. } => {
473 if !matches!(name.local, local_name!("th") | local_name!("td")) {
474 return None;
475 }
476
477 let mut children = HtmlParagraph::new();
478 consume_paragraph(
479 source_range,
480 node,
481 &mut children,
482 &mut Vec::new(),
483 &mut Vec::new(),
484 );
485
486 let is_header = name.local == local_name!("th");
487
488 Some(ParsedHtmlTableColumn {
489 col_span: std::cmp::max(
490 attr_value(attrs, local_name!("colspan"))
491 .and_then(|span| span.parse().ok())
492 .unwrap_or(1),
493 1,
494 ),
495 row_span: std::cmp::max(
496 attr_value(attrs, local_name!("rowspan"))
497 .and_then(|span| span.parse().ok())
498 .unwrap_or(1),
499 1,
500 ),
501 is_header,
502 children,
503 alignment: attr_value(attrs, local_name!("align"))
504 .and_then(|align| match align.as_str() {
505 "left" => Some(Alignment::Left),
506 "center" => Some(Alignment::Center),
507 "right" => Some(Alignment::Right),
508 _ => None,
509 })
510 .unwrap_or(if is_header {
511 Alignment::Center
512 } else {
513 Alignment::None
514 }),
515 })
516 }
517 _ => None,
518 }
519}
520
521fn consume_children(
522 source_range: Range<usize>,
523 node: &Node,
524 elements: &mut Vec<ParsedHtmlElement>,
525 context: &ParseHtmlNodeContext,
526) {
527 for child in node.children.borrow().iter() {
528 parse_html_node(source_range.clone(), child, elements, context);
529 }
530}
531
532fn attr_value(attrs: &RefCell<Vec<Attribute>>, name: LocalName) -> Option<String> {
533 attrs.borrow().iter().find_map(|attr| {
534 if attr.name.local == name {
535 Some(attr.value.to_string())
536 } else {
537 None
538 }
539 })
540}
541
542fn html_style_from_html_styles(styles: HashMap<String, String>) -> Option<HtmlHighlightStyle> {
543 let mut html_style = HtmlHighlightStyle::default();
544
545 if let Some(text_decoration) = styles.get("text-decoration") {
546 match text_decoration.to_lowercase().as_str() {
547 "underline" => {
548 html_style.underline = true;
549 }
550 "line-through" => {
551 html_style.strikethrough = true;
552 }
553 _ => {}
554 }
555 }
556
557 if let Some(font_style) = styles.get("font-style") {
558 match font_style.to_lowercase().as_str() {
559 "italic" => {
560 html_style.italic = true;
561 }
562 "oblique" => {
563 html_style.oblique = true;
564 }
565 _ => {}
566 }
567 }
568
569 if let Some(font_weight) = styles.get("font-weight") {
570 match font_weight.to_lowercase().as_str() {
571 "bold" => {
572 html_style.weight = FontWeight::BOLD;
573 }
574 "lighter" => {
575 html_style.weight = FontWeight::THIN;
576 }
577 _ => {
578 if let Ok(weight) = font_weight.parse::<f32>() {
579 html_style.weight = FontWeight(weight);
580 }
581 }
582 }
583 }
584
585 if html_style != HtmlHighlightStyle::default() {
586 Some(html_style)
587 } else {
588 None
589 }
590}
591
592fn extract_styles_from_attributes(attrs: &RefCell<Vec<Attribute>>) -> HashMap<String, String> {
593 let mut styles = HashMap::new();
594
595 if let Some(style) = attr_value(attrs, local_name!("style")) {
596 for declaration in style.split(';') {
597 let mut parts = declaration.splitn(2, ':');
598 if let Some((key, value)) = parts.next().zip(parts.next()) {
599 styles.insert(key.trim().to_lowercase(), value.trim().to_string());
600 }
601 }
602 }
603
604 styles
605}
606
607fn extract_image(source_range: Range<usize>, attrs: &RefCell<Vec<Attribute>>) -> Option<HtmlImage> {
608 let src = attr_value(attrs, local_name!("src"))?;
609
610 let mut image = HtmlImage::new(src, source_range);
611
612 if let Some(alt) = attr_value(attrs, local_name!("alt")) {
613 image.set_alt_text(alt.into());
614 }
615
616 let styles = extract_styles_from_attributes(attrs);
617
618 if let Some(width) = attr_value(attrs, local_name!("width"))
619 .or_else(|| styles.get("width").cloned())
620 .and_then(|width| parse_html_element_dimension(&width))
621 {
622 image.set_width(width);
623 }
624
625 if let Some(height) = attr_value(attrs, local_name!("height"))
626 .or_else(|| styles.get("height").cloned())
627 .and_then(|height| parse_html_element_dimension(&height))
628 {
629 image.set_height(height);
630 }
631
632 Some(image)
633}
634
635fn extract_html_list(
636 node: &Node,
637 ordered: bool,
638 depth: u16,
639 source_range: Range<usize>,
640) -> Option<ParsedHtmlList> {
641 let mut items = Vec::with_capacity(node.children.borrow().len());
642
643 for (index, child) in node.children.borrow().iter().enumerate() {
644 if let NodeData::Element { name, .. } = &child.data {
645 if name.local != local_name!("li") {
646 continue;
647 }
648
649 let mut content = Vec::new();
650 consume_children(
651 source_range.clone(),
652 child,
653 &mut content,
654 &ParseHtmlNodeContext {
655 list_item_depth: depth + 1,
656 },
657 );
658
659 if !content.is_empty() {
660 items.push(ParsedHtmlListItem {
661 source_range: source_range.clone(),
662 item_type: if ordered {
663 ParsedHtmlListItemType::Ordered(index as u64 + 1)
664 } else {
665 ParsedHtmlListItemType::Unordered
666 },
667 content,
668 });
669 }
670 }
671 }
672
673 if items.is_empty() {
674 None
675 } else {
676 Some(ParsedHtmlList {
677 source_range,
678 depth,
679 ordered,
680 items,
681 })
682 }
683}
684
685fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
686 if value.ends_with('%') {
687 value
688 .trim_end_matches('%')
689 .parse::<f32>()
690 .ok()
691 .map(|value| relative(value / 100.))
692 } else {
693 value
694 .trim_end_matches("px")
695 .parse()
696 .ok()
697 .map(|value| px(value).into())
698 }
699}
700
701fn extract_html_blockquote(
702 node: &Node,
703 source_range: Range<usize>,
704) -> Option<ParsedHtmlBlockQuote> {
705 let mut children = Vec::new();
706 consume_children(
707 source_range.clone(),
708 node,
709 &mut children,
710 &ParseHtmlNodeContext::default(),
711 );
712
713 if children.is_empty() {
714 None
715 } else {
716 Some(ParsedHtmlBlockQuote {
717 children,
718 source_range,
719 })
720 }
721}
722
723fn extract_html_table(node: &Node, source_range: Range<usize>) -> Option<ParsedHtmlTable> {
724 let mut header_rows = Vec::new();
725 let mut body_rows = Vec::new();
726 let mut caption = None;
727
728 for child in node.children.borrow().iter() {
729 if let NodeData::Element { name, .. } = &child.data {
730 if name.local == local_name!("caption") {
731 let mut paragraph = HtmlParagraph::new();
732 parse_paragraph(
733 source_range.clone(),
734 child,
735 &mut paragraph,
736 &mut Vec::new(),
737 &mut Vec::new(),
738 );
739 caption = Some(paragraph);
740 }
741
742 if name.local == local_name!("thead") {
743 for row in child.children.borrow().iter() {
744 if let Some(row) = parse_table_row(source_range.clone(), row) {
745 header_rows.push(row);
746 }
747 }
748 } else if name.local == local_name!("tbody") {
749 for row in child.children.borrow().iter() {
750 if let Some(row) = parse_table_row(source_range.clone(), row) {
751 body_rows.push(row);
752 }
753 }
754 }
755 }
756 }
757
758 if !header_rows.is_empty() || !body_rows.is_empty() {
759 Some(ParsedHtmlTable {
760 source_range,
761 body: body_rows,
762 header: header_rows,
763 caption,
764 })
765 } else {
766 None
767 }
768}
769
770#[cfg(test)]
771mod tests {
772 use super::*;
773
774 #[test]
775 fn parses_html_styled_text() {
776 let parsed = parse_html_block(
777 "<p>Some text <strong>strong</strong> <a href=\"https://example.com\">link</a></p>",
778 0..79,
779 )
780 .unwrap();
781
782 assert_eq!(parsed.children.len(), 1);
783 let ParsedHtmlElement::Paragraph(paragraph) = &parsed.children[0] else {
784 panic!("expected paragraph");
785 };
786 let HtmlParagraphChunk::Text(text) = ¶graph[0] else {
787 panic!("expected text chunk");
788 };
789
790 assert_eq!(text.contents.as_ref(), "Some text strong link");
791 assert_eq!(
792 text.highlights,
793 vec![
794 (
795 10..16,
796 HtmlHighlightStyle {
797 weight: FontWeight::BOLD,
798 ..Default::default()
799 }
800 ),
801 (
802 17..21,
803 HtmlHighlightStyle {
804 link: true,
805 ..Default::default()
806 }
807 )
808 ]
809 );
810 assert_eq!(
811 text.links,
812 vec![(17..21, SharedString::from("https://example.com"))]
813 );
814 }
815
816 #[test]
817 fn parses_html_table_spans() {
818 let parsed = parse_html_block(
819 "<table><tbody><tr><td colspan=\"2\">a</td></tr><tr><td>b</td><td>c</td></tr></tbody></table>",
820 0..91,
821 )
822 .unwrap();
823
824 let ParsedHtmlElement::Table(table) = &parsed.children[0] else {
825 panic!("expected table");
826 };
827 assert_eq!(table.body.len(), 2);
828 assert_eq!(table.body[0].columns[0].col_span, 2);
829 assert_eq!(table.body[1].columns.len(), 2);
830 }
831
832 #[test]
833 fn parses_html_list_as_explicit_list_node() {
834 let parsed = parse_html_block(
835 "<ul><li>parent<ul><li>child</li></ul></li><li>sibling</li></ul>",
836 0..64,
837 )
838 .unwrap();
839
840 assert_eq!(parsed.children.len(), 1);
841
842 let ParsedHtmlElement::List(list) = &parsed.children[0] else {
843 panic!("expected list");
844 };
845
846 assert!(!list.ordered);
847 assert_eq!(list.depth, 1);
848 assert_eq!(list.items.len(), 2);
849
850 let first_item = &list.items[0];
851 let ParsedHtmlElement::Paragraph(paragraph) = &first_item.content[0] else {
852 panic!("expected first item paragraph");
853 };
854 let HtmlParagraphChunk::Text(text) = ¶graph[0] else {
855 panic!("expected first item text");
856 };
857 assert_eq!(text.contents.as_ref(), "parent");
858
859 let ParsedHtmlElement::List(nested_list) = &first_item.content[1] else {
860 panic!("expected nested list");
861 };
862 assert_eq!(nested_list.depth, 2);
863 assert_eq!(nested_list.items.len(), 1);
864
865 let ParsedHtmlElement::Paragraph(nested_paragraph) = &nested_list.items[0].content[0]
866 else {
867 panic!("expected nested item paragraph");
868 };
869 let HtmlParagraphChunk::Text(nested_text) = &nested_paragraph[0] else {
870 panic!("expected nested item text");
871 };
872 assert_eq!(nested_text.contents.as_ref(), "child");
873
874 let second_item = &list.items[1];
875 let ParsedHtmlElement::Paragraph(second_paragraph) = &second_item.content[0] else {
876 panic!("expected second item paragraph");
877 };
878 let HtmlParagraphChunk::Text(second_text) = &second_paragraph[0] else {
879 panic!("expected second item text");
880 };
881 assert_eq!(second_text.contents.as_ref(), "sibling");
882 }
883}