html_parser.rs

  1use std::{cell::RefCell, collections::HashMap, mem, ops::Range};
  2
  3use gpui::{DefiniteLength, FontWeight, SharedString, px, relative};
  4use html5ever::{
  5    Attribute, LocalName, ParseOpts, local_name, parse_document, tendril::TendrilSink,
  6};
  7use markup5ever_rcdom::{Node, NodeData, RcDom};
  8use pulldown_cmark::{Alignment, HeadingLevel};
  9use stacksafe::stacksafe;
 10
 11use crate::html::html_minifier::{Minifier, MinifierOptions};
 12
 13#[derive(Debug, Clone, Default)]
 14#[cfg_attr(test, derive(PartialEq))]
 15pub(crate) struct ParsedHtmlBlock {
 16    pub source_range: Range<usize>,
 17    pub children: Vec<ParsedHtmlElement>,
 18}
 19
 20#[derive(Debug, Clone)]
 21#[cfg_attr(test, derive(PartialEq))]
 22pub(crate) enum ParsedHtmlElement {
 23    Heading(ParsedHtmlHeading),
 24    List(ParsedHtmlList),
 25    Table(ParsedHtmlTable),
 26    BlockQuote(ParsedHtmlBlockQuote),
 27    Paragraph(HtmlParagraph),
 28    Image(HtmlImage),
 29}
 30
 31impl ParsedHtmlElement {
 32    pub fn source_range(&self) -> Option<Range<usize>> {
 33        Some(match self {
 34            Self::Heading(heading) => heading.source_range.clone(),
 35            Self::List(list) => list.source_range.clone(),
 36            Self::Table(table) => table.source_range.clone(),
 37            Self::BlockQuote(block_quote) => block_quote.source_range.clone(),
 38            Self::Paragraph(text) => match text.first()? {
 39                HtmlParagraphChunk::Text(text) => text.source_range.clone(),
 40                HtmlParagraphChunk::Image(image) => image.source_range.clone(),
 41            },
 42            Self::Image(image) => image.source_range.clone(),
 43        })
 44    }
 45}
 46
 47pub(crate) type HtmlParagraph = Vec<HtmlParagraphChunk>;
 48
 49#[derive(Debug, Clone)]
 50#[cfg_attr(test, derive(PartialEq))]
 51pub(crate) enum HtmlParagraphChunk {
 52    Text(ParsedHtmlText),
 53    Image(HtmlImage),
 54}
 55
 56#[derive(Debug, Clone)]
 57#[cfg_attr(test, derive(PartialEq))]
 58pub(crate) struct ParsedHtmlList {
 59    pub source_range: Range<usize>,
 60    pub depth: u16,
 61    pub ordered: bool,
 62    pub items: Vec<ParsedHtmlListItem>,
 63}
 64
 65#[derive(Debug, Clone)]
 66#[cfg_attr(test, derive(PartialEq))]
 67pub(crate) struct ParsedHtmlListItem {
 68    pub source_range: Range<usize>,
 69    pub item_type: ParsedHtmlListItemType,
 70    pub content: Vec<ParsedHtmlElement>,
 71}
 72
 73#[derive(Debug, Clone)]
 74#[cfg_attr(test, derive(PartialEq))]
 75pub(crate) enum ParsedHtmlListItemType {
 76    Ordered(u64),
 77    Unordered,
 78}
 79
 80#[derive(Debug, Clone)]
 81#[cfg_attr(test, derive(PartialEq))]
 82pub(crate) struct ParsedHtmlHeading {
 83    pub source_range: Range<usize>,
 84    pub level: HeadingLevel,
 85    pub contents: HtmlParagraph,
 86}
 87
 88#[derive(Debug, Clone)]
 89#[cfg_attr(test, derive(PartialEq))]
 90pub(crate) struct ParsedHtmlTable {
 91    pub source_range: Range<usize>,
 92    pub header: Vec<ParsedHtmlTableRow>,
 93    pub body: Vec<ParsedHtmlTableRow>,
 94    pub caption: Option<HtmlParagraph>,
 95}
 96
 97#[derive(Debug, Clone)]
 98#[cfg_attr(test, derive(PartialEq))]
 99pub(crate) struct ParsedHtmlTableColumn {
100    pub col_span: usize,
101    pub row_span: usize,
102    pub is_header: bool,
103    pub children: HtmlParagraph,
104    pub alignment: Alignment,
105}
106
107#[derive(Debug, Clone, Default)]
108#[cfg_attr(test, derive(PartialEq))]
109pub(crate) struct ParsedHtmlTableRow {
110    pub columns: Vec<ParsedHtmlTableColumn>,
111}
112
113#[derive(Debug, Clone)]
114#[cfg_attr(test, derive(PartialEq))]
115pub(crate) struct ParsedHtmlBlockQuote {
116    pub source_range: Range<usize>,
117    pub children: Vec<ParsedHtmlElement>,
118}
119
120#[derive(Debug, Clone)]
121#[cfg_attr(test, derive(PartialEq))]
122pub(crate) struct ParsedHtmlText {
123    pub source_range: Range<usize>,
124    pub contents: SharedString,
125    pub highlights: Vec<(Range<usize>, HtmlHighlightStyle)>,
126    pub links: Vec<(Range<usize>, SharedString)>,
127}
128
129#[derive(Debug, Clone, Default, PartialEq, Eq)]
130pub(crate) struct HtmlHighlightStyle {
131    pub italic: bool,
132    pub underline: bool,
133    pub strikethrough: bool,
134    pub weight: FontWeight,
135    pub link: bool,
136    pub oblique: bool,
137}
138
139#[derive(Debug, Clone)]
140#[cfg_attr(test, derive(PartialEq))]
141pub(crate) struct HtmlImage {
142    pub dest_url: SharedString,
143    pub source_range: Range<usize>,
144    pub alt_text: Option<SharedString>,
145    pub width: Option<DefiniteLength>,
146    pub height: Option<DefiniteLength>,
147}
148
149impl HtmlImage {
150    fn new(dest_url: String, source_range: Range<usize>) -> Self {
151        Self {
152            dest_url: dest_url.into(),
153            source_range,
154            alt_text: None,
155            width: None,
156            height: None,
157        }
158    }
159
160    fn set_alt_text(&mut self, alt_text: SharedString) {
161        self.alt_text = Some(alt_text);
162    }
163
164    fn set_width(&mut self, width: DefiniteLength) {
165        self.width = Some(width);
166    }
167
168    fn set_height(&mut self, height: DefiniteLength) {
169        self.height = Some(height);
170    }
171}
172
173#[derive(Debug)]
174struct ParseHtmlNodeContext {
175    list_item_depth: u16,
176}
177
178impl Default for ParseHtmlNodeContext {
179    fn default() -> Self {
180        Self { list_item_depth: 1 }
181    }
182}
183
184pub(crate) fn parse_html_block(
185    source: &str,
186    source_range: Range<usize>,
187) -> Option<ParsedHtmlBlock> {
188    let bytes = cleanup_html(source);
189    let mut cursor = std::io::Cursor::new(bytes);
190    let dom = parse_document(RcDom::default(), ParseOpts::default())
191        .from_utf8()
192        .read_from(&mut cursor)
193        .ok()?;
194
195    let mut children = Vec::new();
196    parse_html_node(
197        source_range.clone(),
198        &dom.document,
199        &mut children,
200        &ParseHtmlNodeContext::default(),
201    );
202
203    Some(ParsedHtmlBlock {
204        source_range,
205        children,
206    })
207}
208
209fn cleanup_html(source: &str) -> Vec<u8> {
210    let mut writer = std::io::Cursor::new(Vec::new());
211    let mut reader = std::io::Cursor::new(source);
212    let mut minify = Minifier::new(
213        &mut writer,
214        MinifierOptions {
215            omit_doctype: true,
216            collapse_whitespace: true,
217            ..Default::default()
218        },
219    );
220    if let Ok(()) = minify.minify(&mut reader) {
221        writer.into_inner()
222    } else {
223        source.bytes().collect()
224    }
225}
226
227#[stacksafe]
228fn parse_html_node(
229    source_range: Range<usize>,
230    node: &Node,
231    elements: &mut Vec<ParsedHtmlElement>,
232    context: &ParseHtmlNodeContext,
233) {
234    match &node.data {
235        NodeData::Document => {
236            consume_children(source_range, node, elements, context);
237        }
238        NodeData::Text { contents } => {
239            elements.push(ParsedHtmlElement::Paragraph(vec![
240                HtmlParagraphChunk::Text(ParsedHtmlText {
241                    source_range,
242                    highlights: Vec::default(),
243                    links: Vec::default(),
244                    contents: contents.borrow().to_string().into(),
245                }),
246            ]));
247        }
248        NodeData::Comment { .. } => {}
249        NodeData::Element { name, attrs, .. } => {
250            let mut styles = if let Some(styles) =
251                html_style_from_html_styles(extract_styles_from_attributes(attrs))
252            {
253                vec![styles]
254            } else {
255                Vec::default()
256            };
257
258            if name.local == local_name!("img") {
259                if let Some(image) = extract_image(source_range, attrs) {
260                    elements.push(ParsedHtmlElement::Image(image));
261                }
262            } else if name.local == local_name!("p") {
263                let mut paragraph = HtmlParagraph::new();
264                parse_paragraph(
265                    source_range,
266                    node,
267                    &mut paragraph,
268                    &mut styles,
269                    &mut Vec::new(),
270                );
271
272                if !paragraph.is_empty() {
273                    elements.push(ParsedHtmlElement::Paragraph(paragraph));
274                }
275            } else if matches!(
276                name.local,
277                local_name!("h1")
278                    | local_name!("h2")
279                    | local_name!("h3")
280                    | local_name!("h4")
281                    | local_name!("h5")
282                    | local_name!("h6")
283            ) {
284                let mut paragraph = HtmlParagraph::new();
285                consume_paragraph(
286                    source_range.clone(),
287                    node,
288                    &mut paragraph,
289                    &mut styles,
290                    &mut Vec::new(),
291                );
292
293                if !paragraph.is_empty() {
294                    elements.push(ParsedHtmlElement::Heading(ParsedHtmlHeading {
295                        source_range,
296                        level: match name.local {
297                            local_name!("h1") => HeadingLevel::H1,
298                            local_name!("h2") => HeadingLevel::H2,
299                            local_name!("h3") => HeadingLevel::H3,
300                            local_name!("h4") => HeadingLevel::H4,
301                            local_name!("h5") => HeadingLevel::H5,
302                            local_name!("h6") => HeadingLevel::H6,
303                            _ => unreachable!(),
304                        },
305                        contents: paragraph,
306                    }));
307                }
308            } else if name.local == local_name!("ul") || name.local == local_name!("ol") {
309                if let Some(list) = extract_html_list(
310                    node,
311                    name.local == local_name!("ol"),
312                    context.list_item_depth,
313                    source_range,
314                ) {
315                    elements.push(ParsedHtmlElement::List(list));
316                }
317            } else if name.local == local_name!("blockquote") {
318                if let Some(blockquote) = extract_html_blockquote(node, source_range) {
319                    elements.push(ParsedHtmlElement::BlockQuote(blockquote));
320                }
321            } else if name.local == local_name!("table") {
322                if let Some(table) = extract_html_table(node, source_range) {
323                    elements.push(ParsedHtmlElement::Table(table));
324                }
325            } else {
326                consume_children(source_range, node, elements, context);
327            }
328        }
329        _ => {}
330    }
331}
332
333#[stacksafe]
334fn parse_paragraph(
335    source_range: Range<usize>,
336    node: &Node,
337    paragraph: &mut HtmlParagraph,
338    highlights: &mut Vec<HtmlHighlightStyle>,
339    links: &mut Vec<SharedString>,
340) {
341    fn items_with_range<T>(
342        range: Range<usize>,
343        items: impl IntoIterator<Item = T>,
344    ) -> Vec<(Range<usize>, T)> {
345        items
346            .into_iter()
347            .map(|item| (range.clone(), item))
348            .collect()
349    }
350
351    match &node.data {
352        NodeData::Text { contents } => {
353            if let Some(text) =
354                paragraph
355                    .iter_mut()
356                    .last()
357                    .and_then(|paragraph_chunk| match paragraph_chunk {
358                        HtmlParagraphChunk::Text(text) => Some(text),
359                        _ => None,
360                    })
361            {
362                let mut new_text = text.contents.to_string();
363                new_text.push_str(&contents.borrow());
364
365                text.highlights.extend(items_with_range(
366                    text.contents.len()..new_text.len(),
367                    mem::take(highlights),
368                ));
369                text.links.extend(items_with_range(
370                    text.contents.len()..new_text.len(),
371                    mem::take(links),
372                ));
373                text.contents = SharedString::from(new_text);
374            } else {
375                let contents = contents.borrow().to_string();
376                paragraph.push(HtmlParagraphChunk::Text(ParsedHtmlText {
377                    source_range,
378                    highlights: items_with_range(0..contents.len(), mem::take(highlights)),
379                    links: items_with_range(0..contents.len(), mem::take(links)),
380                    contents: contents.into(),
381                }));
382            }
383        }
384        NodeData::Element { name, attrs, .. } => {
385            if name.local == local_name!("img") {
386                if let Some(image) = extract_image(source_range, attrs) {
387                    paragraph.push(HtmlParagraphChunk::Image(image));
388                }
389            } else if name.local == local_name!("b") || name.local == local_name!("strong") {
390                highlights.push(HtmlHighlightStyle {
391                    weight: FontWeight::BOLD,
392                    ..Default::default()
393                });
394                consume_paragraph(source_range, node, paragraph, highlights, links);
395            } else if name.local == local_name!("i") {
396                highlights.push(HtmlHighlightStyle {
397                    italic: true,
398                    ..Default::default()
399                });
400                consume_paragraph(source_range, node, paragraph, highlights, links);
401            } else if name.local == local_name!("em") {
402                highlights.push(HtmlHighlightStyle {
403                    oblique: true,
404                    ..Default::default()
405                });
406                consume_paragraph(source_range, node, paragraph, highlights, links);
407            } else if name.local == local_name!("del") {
408                highlights.push(HtmlHighlightStyle {
409                    strikethrough: true,
410                    ..Default::default()
411                });
412                consume_paragraph(source_range, node, paragraph, highlights, links);
413            } else if name.local == local_name!("ins") {
414                highlights.push(HtmlHighlightStyle {
415                    underline: true,
416                    ..Default::default()
417                });
418                consume_paragraph(source_range, node, paragraph, highlights, links);
419            } else if name.local == local_name!("a") {
420                if let Some(url) = attr_value(attrs, local_name!("href")) {
421                    highlights.push(HtmlHighlightStyle {
422                        link: true,
423                        ..Default::default()
424                    });
425                    links.push(url.into());
426                }
427                consume_paragraph(source_range, node, paragraph, highlights, links);
428            } else {
429                consume_paragraph(source_range, node, paragraph, highlights, links);
430            }
431        }
432        _ => {}
433    }
434}
435
436fn consume_paragraph(
437    source_range: Range<usize>,
438    node: &Node,
439    paragraph: &mut HtmlParagraph,
440    highlights: &mut Vec<HtmlHighlightStyle>,
441    links: &mut Vec<SharedString>,
442) {
443    for child in node.children.borrow().iter() {
444        parse_paragraph(source_range.clone(), child, paragraph, highlights, links);
445    }
446}
447
448fn parse_table_row(source_range: Range<usize>, node: &Node) -> Option<ParsedHtmlTableRow> {
449    let mut columns = Vec::new();
450
451    if let NodeData::Element { name, .. } = &node.data {
452        if name.local != local_name!("tr") {
453            return None;
454        }
455
456        for child in node.children.borrow().iter() {
457            if let Some(column) = parse_table_column(source_range.clone(), child) {
458                columns.push(column);
459            }
460        }
461    }
462
463    if columns.is_empty() {
464        None
465    } else {
466        Some(ParsedHtmlTableRow { columns })
467    }
468}
469
470fn parse_table_column(source_range: Range<usize>, node: &Node) -> Option<ParsedHtmlTableColumn> {
471    match &node.data {
472        NodeData::Element { name, attrs, .. } => {
473            if !matches!(name.local, local_name!("th") | local_name!("td")) {
474                return None;
475            }
476
477            let mut children = HtmlParagraph::new();
478            consume_paragraph(
479                source_range,
480                node,
481                &mut children,
482                &mut Vec::new(),
483                &mut Vec::new(),
484            );
485
486            let is_header = name.local == local_name!("th");
487
488            Some(ParsedHtmlTableColumn {
489                col_span: std::cmp::max(
490                    attr_value(attrs, local_name!("colspan"))
491                        .and_then(|span| span.parse().ok())
492                        .unwrap_or(1),
493                    1,
494                ),
495                row_span: std::cmp::max(
496                    attr_value(attrs, local_name!("rowspan"))
497                        .and_then(|span| span.parse().ok())
498                        .unwrap_or(1),
499                    1,
500                ),
501                is_header,
502                children,
503                alignment: attr_value(attrs, local_name!("align"))
504                    .and_then(|align| match align.as_str() {
505                        "left" => Some(Alignment::Left),
506                        "center" => Some(Alignment::Center),
507                        "right" => Some(Alignment::Right),
508                        _ => None,
509                    })
510                    .unwrap_or(if is_header {
511                        Alignment::Center
512                    } else {
513                        Alignment::None
514                    }),
515            })
516        }
517        _ => None,
518    }
519}
520
521fn consume_children(
522    source_range: Range<usize>,
523    node: &Node,
524    elements: &mut Vec<ParsedHtmlElement>,
525    context: &ParseHtmlNodeContext,
526) {
527    for child in node.children.borrow().iter() {
528        parse_html_node(source_range.clone(), child, elements, context);
529    }
530}
531
532fn attr_value(attrs: &RefCell<Vec<Attribute>>, name: LocalName) -> Option<String> {
533    attrs.borrow().iter().find_map(|attr| {
534        if attr.name.local == name {
535            Some(attr.value.to_string())
536        } else {
537            None
538        }
539    })
540}
541
542fn html_style_from_html_styles(styles: HashMap<String, String>) -> Option<HtmlHighlightStyle> {
543    let mut html_style = HtmlHighlightStyle::default();
544
545    if let Some(text_decoration) = styles.get("text-decoration") {
546        match text_decoration.to_lowercase().as_str() {
547            "underline" => {
548                html_style.underline = true;
549            }
550            "line-through" => {
551                html_style.strikethrough = true;
552            }
553            _ => {}
554        }
555    }
556
557    if let Some(font_style) = styles.get("font-style") {
558        match font_style.to_lowercase().as_str() {
559            "italic" => {
560                html_style.italic = true;
561            }
562            "oblique" => {
563                html_style.oblique = true;
564            }
565            _ => {}
566        }
567    }
568
569    if let Some(font_weight) = styles.get("font-weight") {
570        match font_weight.to_lowercase().as_str() {
571            "bold" => {
572                html_style.weight = FontWeight::BOLD;
573            }
574            "lighter" => {
575                html_style.weight = FontWeight::THIN;
576            }
577            _ => {
578                if let Ok(weight) = font_weight.parse::<f32>() {
579                    html_style.weight = FontWeight(weight);
580                }
581            }
582        }
583    }
584
585    if html_style != HtmlHighlightStyle::default() {
586        Some(html_style)
587    } else {
588        None
589    }
590}
591
592fn extract_styles_from_attributes(attrs: &RefCell<Vec<Attribute>>) -> HashMap<String, String> {
593    let mut styles = HashMap::new();
594
595    if let Some(style) = attr_value(attrs, local_name!("style")) {
596        for declaration in style.split(';') {
597            let mut parts = declaration.splitn(2, ':');
598            if let Some((key, value)) = parts.next().zip(parts.next()) {
599                styles.insert(key.trim().to_lowercase(), value.trim().to_string());
600            }
601        }
602    }
603
604    styles
605}
606
607fn extract_image(source_range: Range<usize>, attrs: &RefCell<Vec<Attribute>>) -> Option<HtmlImage> {
608    let src = attr_value(attrs, local_name!("src"))?;
609
610    let mut image = HtmlImage::new(src, source_range);
611
612    if let Some(alt) = attr_value(attrs, local_name!("alt")) {
613        image.set_alt_text(alt.into());
614    }
615
616    let styles = extract_styles_from_attributes(attrs);
617
618    if let Some(width) = attr_value(attrs, local_name!("width"))
619        .or_else(|| styles.get("width").cloned())
620        .and_then(|width| parse_html_element_dimension(&width))
621    {
622        image.set_width(width);
623    }
624
625    if let Some(height) = attr_value(attrs, local_name!("height"))
626        .or_else(|| styles.get("height").cloned())
627        .and_then(|height| parse_html_element_dimension(&height))
628    {
629        image.set_height(height);
630    }
631
632    Some(image)
633}
634
635fn extract_html_list(
636    node: &Node,
637    ordered: bool,
638    depth: u16,
639    source_range: Range<usize>,
640) -> Option<ParsedHtmlList> {
641    let mut items = Vec::with_capacity(node.children.borrow().len());
642
643    for (index, child) in node.children.borrow().iter().enumerate() {
644        if let NodeData::Element { name, .. } = &child.data {
645            if name.local != local_name!("li") {
646                continue;
647            }
648
649            let mut content = Vec::new();
650            consume_children(
651                source_range.clone(),
652                child,
653                &mut content,
654                &ParseHtmlNodeContext {
655                    list_item_depth: depth + 1,
656                },
657            );
658
659            if !content.is_empty() {
660                items.push(ParsedHtmlListItem {
661                    source_range: source_range.clone(),
662                    item_type: if ordered {
663                        ParsedHtmlListItemType::Ordered(index as u64 + 1)
664                    } else {
665                        ParsedHtmlListItemType::Unordered
666                    },
667                    content,
668                });
669            }
670        }
671    }
672
673    if items.is_empty() {
674        None
675    } else {
676        Some(ParsedHtmlList {
677            source_range,
678            depth,
679            ordered,
680            items,
681        })
682    }
683}
684
685fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
686    if value.ends_with('%') {
687        value
688            .trim_end_matches('%')
689            .parse::<f32>()
690            .ok()
691            .map(|value| relative(value / 100.))
692    } else {
693        value
694            .trim_end_matches("px")
695            .parse()
696            .ok()
697            .map(|value| px(value).into())
698    }
699}
700
701fn extract_html_blockquote(
702    node: &Node,
703    source_range: Range<usize>,
704) -> Option<ParsedHtmlBlockQuote> {
705    let mut children = Vec::new();
706    consume_children(
707        source_range.clone(),
708        node,
709        &mut children,
710        &ParseHtmlNodeContext::default(),
711    );
712
713    if children.is_empty() {
714        None
715    } else {
716        Some(ParsedHtmlBlockQuote {
717            children,
718            source_range,
719        })
720    }
721}
722
723fn extract_html_table(node: &Node, source_range: Range<usize>) -> Option<ParsedHtmlTable> {
724    let mut header_rows = Vec::new();
725    let mut body_rows = Vec::new();
726    let mut caption = None;
727
728    for child in node.children.borrow().iter() {
729        if let NodeData::Element { name, .. } = &child.data {
730            if name.local == local_name!("caption") {
731                let mut paragraph = HtmlParagraph::new();
732                parse_paragraph(
733                    source_range.clone(),
734                    child,
735                    &mut paragraph,
736                    &mut Vec::new(),
737                    &mut Vec::new(),
738                );
739                caption = Some(paragraph);
740            }
741
742            if name.local == local_name!("thead") {
743                for row in child.children.borrow().iter() {
744                    if let Some(row) = parse_table_row(source_range.clone(), row) {
745                        header_rows.push(row);
746                    }
747                }
748            } else if name.local == local_name!("tbody") {
749                for row in child.children.borrow().iter() {
750                    if let Some(row) = parse_table_row(source_range.clone(), row) {
751                        body_rows.push(row);
752                    }
753                }
754            }
755        }
756    }
757
758    if !header_rows.is_empty() || !body_rows.is_empty() {
759        Some(ParsedHtmlTable {
760            source_range,
761            body: body_rows,
762            header: header_rows,
763            caption,
764        })
765    } else {
766        None
767    }
768}
769
770#[cfg(test)]
771mod tests {
772    use super::*;
773
774    #[test]
775    fn parses_html_styled_text() {
776        let parsed = parse_html_block(
777            "<p>Some text <strong>strong</strong> <a href=\"https://example.com\">link</a></p>",
778            0..79,
779        )
780        .unwrap();
781
782        assert_eq!(parsed.children.len(), 1);
783        let ParsedHtmlElement::Paragraph(paragraph) = &parsed.children[0] else {
784            panic!("expected paragraph");
785        };
786        let HtmlParagraphChunk::Text(text) = &paragraph[0] else {
787            panic!("expected text chunk");
788        };
789
790        assert_eq!(text.contents.as_ref(), "Some text strong link");
791        assert_eq!(
792            text.highlights,
793            vec![
794                (
795                    10..16,
796                    HtmlHighlightStyle {
797                        weight: FontWeight::BOLD,
798                        ..Default::default()
799                    }
800                ),
801                (
802                    17..21,
803                    HtmlHighlightStyle {
804                        link: true,
805                        ..Default::default()
806                    }
807                )
808            ]
809        );
810        assert_eq!(
811            text.links,
812            vec![(17..21, SharedString::from("https://example.com"))]
813        );
814    }
815
816    #[test]
817    fn parses_html_table_spans() {
818        let parsed = parse_html_block(
819            "<table><tbody><tr><td colspan=\"2\">a</td></tr><tr><td>b</td><td>c</td></tr></tbody></table>",
820            0..91,
821        )
822        .unwrap();
823
824        let ParsedHtmlElement::Table(table) = &parsed.children[0] else {
825            panic!("expected table");
826        };
827        assert_eq!(table.body.len(), 2);
828        assert_eq!(table.body[0].columns[0].col_span, 2);
829        assert_eq!(table.body[1].columns.len(), 2);
830    }
831
832    #[test]
833    fn parses_html_list_as_explicit_list_node() {
834        let parsed = parse_html_block(
835            "<ul><li>parent<ul><li>child</li></ul></li><li>sibling</li></ul>",
836            0..64,
837        )
838        .unwrap();
839
840        assert_eq!(parsed.children.len(), 1);
841
842        let ParsedHtmlElement::List(list) = &parsed.children[0] else {
843            panic!("expected list");
844        };
845
846        assert!(!list.ordered);
847        assert_eq!(list.depth, 1);
848        assert_eq!(list.items.len(), 2);
849
850        let first_item = &list.items[0];
851        let ParsedHtmlElement::Paragraph(paragraph) = &first_item.content[0] else {
852            panic!("expected first item paragraph");
853        };
854        let HtmlParagraphChunk::Text(text) = &paragraph[0] else {
855            panic!("expected first item text");
856        };
857        assert_eq!(text.contents.as_ref(), "parent");
858
859        let ParsedHtmlElement::List(nested_list) = &first_item.content[1] else {
860            panic!("expected nested list");
861        };
862        assert_eq!(nested_list.depth, 2);
863        assert_eq!(nested_list.items.len(), 1);
864
865        let ParsedHtmlElement::Paragraph(nested_paragraph) = &nested_list.items[0].content[0]
866        else {
867            panic!("expected nested item paragraph");
868        };
869        let HtmlParagraphChunk::Text(nested_text) = &nested_paragraph[0] else {
870            panic!("expected nested item text");
871        };
872        assert_eq!(nested_text.contents.as_ref(), "child");
873
874        let second_item = &list.items[1];
875        let ParsedHtmlElement::Paragraph(second_paragraph) = &second_item.content[0] else {
876            panic!("expected second item paragraph");
877        };
878        let HtmlParagraphChunk::Text(second_text) = &second_paragraph[0] else {
879            panic!("expected second item text");
880        };
881        assert_eq!(second_text.contents.as_ref(), "sibling");
882    }
883}