html_parser.rs

  1use std::{cell::RefCell, collections::HashMap, mem, ops::Range};
  2
  3use gpui::{DefiniteLength, FontWeight, SharedString, TextAlign, px, relative};
  4use html5ever::{
  5    Attribute, LocalName, ParseOpts, local_name, parse_document, tendril::TendrilSink,
  6};
  7use markup5ever_rcdom::{Node, NodeData, RcDom};
  8use pulldown_cmark::{Alignment, HeadingLevel};
  9use stacksafe::stacksafe;
 10
 11use crate::html::html_minifier::{Minifier, MinifierOptions};
 12
 13#[derive(Debug, Clone, Default)]
 14#[cfg_attr(test, derive(PartialEq))]
 15pub(crate) struct ParsedHtmlBlock {
 16    pub source_range: Range<usize>,
 17    pub children: Vec<ParsedHtmlElement>,
 18}
 19
 20#[derive(Debug, Clone)]
 21#[cfg_attr(test, derive(PartialEq))]
 22pub(crate) enum ParsedHtmlElement {
 23    Heading(ParsedHtmlHeading),
 24    List(ParsedHtmlList),
 25    Table(ParsedHtmlTable),
 26    BlockQuote(ParsedHtmlBlockQuote),
 27    Paragraph(ParsedHtmlParagraph),
 28    Image(HtmlImage),
 29}
 30
 31#[derive(Debug, Clone)]
 32#[cfg_attr(test, derive(PartialEq))]
 33pub(crate) struct ParsedHtmlParagraph {
 34    pub text_align: Option<TextAlign>,
 35    pub contents: HtmlParagraph,
 36}
 37
 38impl ParsedHtmlElement {
 39    pub fn source_range(&self) -> Option<Range<usize>> {
 40        Some(match self {
 41            Self::Heading(heading) => heading.source_range.clone(),
 42            Self::List(list) => list.source_range.clone(),
 43            Self::Table(table) => table.source_range.clone(),
 44            Self::BlockQuote(block_quote) => block_quote.source_range.clone(),
 45            Self::Paragraph(paragraph) => match paragraph.contents.first()? {
 46                HtmlParagraphChunk::Text(text) => text.source_range.clone(),
 47                HtmlParagraphChunk::Image(image) => image.source_range.clone(),
 48            },
 49            Self::Image(image) => image.source_range.clone(),
 50        })
 51    }
 52}
 53
 54pub(crate) type HtmlParagraph = Vec<HtmlParagraphChunk>;
 55
 56#[derive(Debug, Clone)]
 57#[cfg_attr(test, derive(PartialEq))]
 58pub(crate) enum HtmlParagraphChunk {
 59    Text(ParsedHtmlText),
 60    Image(HtmlImage),
 61}
 62
 63#[derive(Debug, Clone)]
 64#[cfg_attr(test, derive(PartialEq))]
 65pub(crate) struct ParsedHtmlList {
 66    pub source_range: Range<usize>,
 67    pub depth: u16,
 68    pub ordered: bool,
 69    pub items: Vec<ParsedHtmlListItem>,
 70}
 71
 72#[derive(Debug, Clone)]
 73#[cfg_attr(test, derive(PartialEq))]
 74pub(crate) struct ParsedHtmlListItem {
 75    pub source_range: Range<usize>,
 76    pub item_type: ParsedHtmlListItemType,
 77    pub content: Vec<ParsedHtmlElement>,
 78}
 79
 80#[derive(Debug, Clone)]
 81#[cfg_attr(test, derive(PartialEq))]
 82pub(crate) enum ParsedHtmlListItemType {
 83    Ordered(u64),
 84    Unordered,
 85}
 86
 87#[derive(Debug, Clone)]
 88#[cfg_attr(test, derive(PartialEq))]
 89pub(crate) struct ParsedHtmlHeading {
 90    pub source_range: Range<usize>,
 91    pub level: HeadingLevel,
 92    pub contents: HtmlParagraph,
 93    pub text_align: Option<TextAlign>,
 94}
 95
 96#[derive(Debug, Clone)]
 97#[cfg_attr(test, derive(PartialEq))]
 98pub(crate) struct ParsedHtmlTable {
 99    pub source_range: Range<usize>,
100    pub header: Vec<ParsedHtmlTableRow>,
101    pub body: Vec<ParsedHtmlTableRow>,
102    pub caption: Option<HtmlParagraph>,
103}
104
105#[derive(Debug, Clone)]
106#[cfg_attr(test, derive(PartialEq))]
107pub(crate) struct ParsedHtmlTableColumn {
108    pub col_span: usize,
109    pub row_span: usize,
110    pub is_header: bool,
111    pub children: HtmlParagraph,
112    pub alignment: Alignment,
113}
114
115#[derive(Debug, Clone, Default)]
116#[cfg_attr(test, derive(PartialEq))]
117pub(crate) struct ParsedHtmlTableRow {
118    pub columns: Vec<ParsedHtmlTableColumn>,
119}
120
121#[derive(Debug, Clone)]
122#[cfg_attr(test, derive(PartialEq))]
123pub(crate) struct ParsedHtmlBlockQuote {
124    pub source_range: Range<usize>,
125    pub children: Vec<ParsedHtmlElement>,
126}
127
128#[derive(Debug, Clone)]
129#[cfg_attr(test, derive(PartialEq))]
130pub(crate) struct ParsedHtmlText {
131    pub source_range: Range<usize>,
132    pub contents: SharedString,
133    pub highlights: Vec<(Range<usize>, HtmlHighlightStyle)>,
134    pub links: Vec<(Range<usize>, SharedString)>,
135}
136
137#[derive(Debug, Clone, Default, PartialEq, Eq)]
138pub(crate) struct HtmlHighlightStyle {
139    pub italic: bool,
140    pub underline: bool,
141    pub strikethrough: bool,
142    pub weight: FontWeight,
143    pub link: bool,
144    pub oblique: bool,
145}
146
147#[derive(Debug, Clone)]
148#[cfg_attr(test, derive(PartialEq))]
149pub(crate) struct HtmlImage {
150    pub dest_url: SharedString,
151    pub source_range: Range<usize>,
152    pub alt_text: Option<SharedString>,
153    pub width: Option<DefiniteLength>,
154    pub height: Option<DefiniteLength>,
155}
156
157impl HtmlImage {
158    fn new(dest_url: String, source_range: Range<usize>) -> Self {
159        Self {
160            dest_url: dest_url.into(),
161            source_range,
162            alt_text: None,
163            width: None,
164            height: None,
165        }
166    }
167
168    fn set_alt_text(&mut self, alt_text: SharedString) {
169        self.alt_text = Some(alt_text);
170    }
171
172    fn set_width(&mut self, width: DefiniteLength) {
173        self.width = Some(width);
174    }
175
176    fn set_height(&mut self, height: DefiniteLength) {
177        self.height = Some(height);
178    }
179}
180
181#[derive(Debug)]
182struct ParseHtmlNodeContext {
183    list_item_depth: u16,
184}
185
186impl Default for ParseHtmlNodeContext {
187    fn default() -> Self {
188        Self { list_item_depth: 1 }
189    }
190}
191
192pub(crate) fn parse_html_block(
193    source: &str,
194    source_range: Range<usize>,
195) -> Option<ParsedHtmlBlock> {
196    let bytes = cleanup_html(source);
197    let mut cursor = std::io::Cursor::new(bytes);
198    let dom = parse_document(RcDom::default(), ParseOpts::default())
199        .from_utf8()
200        .read_from(&mut cursor)
201        .ok()?;
202
203    let mut children = Vec::new();
204    parse_html_node(
205        source_range.clone(),
206        &dom.document,
207        &mut children,
208        &ParseHtmlNodeContext::default(),
209    );
210
211    Some(ParsedHtmlBlock {
212        source_range,
213        children,
214    })
215}
216
217fn cleanup_html(source: &str) -> Vec<u8> {
218    let mut writer = std::io::Cursor::new(Vec::new());
219    let mut reader = std::io::Cursor::new(source);
220    let mut minify = Minifier::new(
221        &mut writer,
222        MinifierOptions {
223            omit_doctype: true,
224            collapse_whitespace: true,
225            ..Default::default()
226        },
227    );
228    if let Ok(()) = minify.minify(&mut reader) {
229        writer.into_inner()
230    } else {
231        source.bytes().collect()
232    }
233}
234
235#[stacksafe]
236fn parse_html_node(
237    source_range: Range<usize>,
238    node: &Node,
239    elements: &mut Vec<ParsedHtmlElement>,
240    context: &ParseHtmlNodeContext,
241) {
242    match &node.data {
243        NodeData::Document => {
244            consume_children(source_range, node, elements, context);
245        }
246        NodeData::Text { contents } => {
247            elements.push(ParsedHtmlElement::Paragraph(ParsedHtmlParagraph {
248                text_align: None,
249                contents: vec![HtmlParagraphChunk::Text(ParsedHtmlText {
250                    source_range,
251                    highlights: Vec::default(),
252                    links: Vec::default(),
253                    contents: contents.borrow().to_string().into(),
254                })],
255            }));
256        }
257        NodeData::Comment { .. } => {}
258        NodeData::Element { name, attrs, .. } => {
259            let styles_map = extract_styles_from_attributes(attrs);
260            let text_align = text_align_from_attributes(attrs, &styles_map);
261            let mut styles = if let Some(styles) = html_style_from_html_styles(styles_map) {
262                vec![styles]
263            } else {
264                Vec::default()
265            };
266
267            if name.local == local_name!("img") {
268                if let Some(image) = extract_image(source_range, attrs) {
269                    elements.push(ParsedHtmlElement::Image(image));
270                }
271            } else if name.local == local_name!("p") {
272                let mut paragraph = HtmlParagraph::new();
273                parse_paragraph(
274                    source_range,
275                    node,
276                    &mut paragraph,
277                    &mut styles,
278                    &mut Vec::new(),
279                );
280
281                if !paragraph.is_empty() {
282                    elements.push(ParsedHtmlElement::Paragraph(ParsedHtmlParagraph {
283                        text_align,
284                        contents: paragraph,
285                    }));
286                }
287            } else if matches!(
288                name.local,
289                local_name!("h1")
290                    | local_name!("h2")
291                    | local_name!("h3")
292                    | local_name!("h4")
293                    | local_name!("h5")
294                    | local_name!("h6")
295            ) {
296                let mut paragraph = HtmlParagraph::new();
297                consume_paragraph(
298                    source_range.clone(),
299                    node,
300                    &mut paragraph,
301                    &mut styles,
302                    &mut Vec::new(),
303                );
304
305                if !paragraph.is_empty() {
306                    elements.push(ParsedHtmlElement::Heading(ParsedHtmlHeading {
307                        source_range,
308                        level: match name.local {
309                            local_name!("h1") => HeadingLevel::H1,
310                            local_name!("h2") => HeadingLevel::H2,
311                            local_name!("h3") => HeadingLevel::H3,
312                            local_name!("h4") => HeadingLevel::H4,
313                            local_name!("h5") => HeadingLevel::H5,
314                            local_name!("h6") => HeadingLevel::H6,
315                            _ => unreachable!(),
316                        },
317                        contents: paragraph,
318                        text_align,
319                    }));
320                }
321            } else if name.local == local_name!("ul") || name.local == local_name!("ol") {
322                if let Some(list) = extract_html_list(
323                    node,
324                    name.local == local_name!("ol"),
325                    context.list_item_depth,
326                    source_range,
327                ) {
328                    elements.push(ParsedHtmlElement::List(list));
329                }
330            } else if name.local == local_name!("blockquote") {
331                if let Some(blockquote) = extract_html_blockquote(node, source_range) {
332                    elements.push(ParsedHtmlElement::BlockQuote(blockquote));
333                }
334            } else if name.local == local_name!("table") {
335                if let Some(table) = extract_html_table(node, source_range) {
336                    elements.push(ParsedHtmlElement::Table(table));
337                }
338            } else {
339                consume_children(source_range, node, elements, context);
340            }
341        }
342        _ => {}
343    }
344}
345
346#[stacksafe]
347fn parse_paragraph(
348    source_range: Range<usize>,
349    node: &Node,
350    paragraph: &mut HtmlParagraph,
351    highlights: &mut Vec<HtmlHighlightStyle>,
352    links: &mut Vec<SharedString>,
353) {
354    fn items_with_range<T>(
355        range: Range<usize>,
356        items: impl IntoIterator<Item = T>,
357    ) -> Vec<(Range<usize>, T)> {
358        items
359            .into_iter()
360            .map(|item| (range.clone(), item))
361            .collect()
362    }
363
364    match &node.data {
365        NodeData::Text { contents } => {
366            if let Some(text) =
367                paragraph
368                    .iter_mut()
369                    .last()
370                    .and_then(|paragraph_chunk| match paragraph_chunk {
371                        HtmlParagraphChunk::Text(text) => Some(text),
372                        _ => None,
373                    })
374            {
375                let mut new_text = text.contents.to_string();
376                new_text.push_str(&contents.borrow());
377
378                text.highlights.extend(items_with_range(
379                    text.contents.len()..new_text.len(),
380                    mem::take(highlights),
381                ));
382                text.links.extend(items_with_range(
383                    text.contents.len()..new_text.len(),
384                    mem::take(links),
385                ));
386                text.contents = SharedString::from(new_text);
387            } else {
388                let contents = contents.borrow().to_string();
389                paragraph.push(HtmlParagraphChunk::Text(ParsedHtmlText {
390                    source_range,
391                    highlights: items_with_range(0..contents.len(), mem::take(highlights)),
392                    links: items_with_range(0..contents.len(), mem::take(links)),
393                    contents: contents.into(),
394                }));
395            }
396        }
397        NodeData::Element { name, attrs, .. } => {
398            if name.local == local_name!("img") {
399                if let Some(image) = extract_image(source_range, attrs) {
400                    paragraph.push(HtmlParagraphChunk::Image(image));
401                }
402            } else if name.local == local_name!("b") || name.local == local_name!("strong") {
403                highlights.push(HtmlHighlightStyle {
404                    weight: FontWeight::BOLD,
405                    ..Default::default()
406                });
407                consume_paragraph(source_range, node, paragraph, highlights, links);
408            } else if name.local == local_name!("i") {
409                highlights.push(HtmlHighlightStyle {
410                    italic: true,
411                    ..Default::default()
412                });
413                consume_paragraph(source_range, node, paragraph, highlights, links);
414            } else if name.local == local_name!("em") {
415                highlights.push(HtmlHighlightStyle {
416                    oblique: true,
417                    ..Default::default()
418                });
419                consume_paragraph(source_range, node, paragraph, highlights, links);
420            } else if name.local == local_name!("del") {
421                highlights.push(HtmlHighlightStyle {
422                    strikethrough: true,
423                    ..Default::default()
424                });
425                consume_paragraph(source_range, node, paragraph, highlights, links);
426            } else if name.local == local_name!("ins") {
427                highlights.push(HtmlHighlightStyle {
428                    underline: true,
429                    ..Default::default()
430                });
431                consume_paragraph(source_range, node, paragraph, highlights, links);
432            } else if name.local == local_name!("a") {
433                if let Some(url) = attr_value(attrs, local_name!("href")) {
434                    highlights.push(HtmlHighlightStyle {
435                        link: true,
436                        ..Default::default()
437                    });
438                    links.push(url.into());
439                }
440                consume_paragraph(source_range, node, paragraph, highlights, links);
441            } else {
442                consume_paragraph(source_range, node, paragraph, highlights, links);
443            }
444        }
445        _ => {}
446    }
447}
448
449fn consume_paragraph(
450    source_range: Range<usize>,
451    node: &Node,
452    paragraph: &mut HtmlParagraph,
453    highlights: &mut Vec<HtmlHighlightStyle>,
454    links: &mut Vec<SharedString>,
455) {
456    for child in node.children.borrow().iter() {
457        parse_paragraph(source_range.clone(), child, paragraph, highlights, links);
458    }
459}
460
461fn parse_table_row(source_range: Range<usize>, node: &Node) -> Option<ParsedHtmlTableRow> {
462    let mut columns = Vec::new();
463
464    if let NodeData::Element { name, .. } = &node.data {
465        if name.local != local_name!("tr") {
466            return None;
467        }
468
469        for child in node.children.borrow().iter() {
470            if let Some(column) = parse_table_column(source_range.clone(), child) {
471                columns.push(column);
472            }
473        }
474    }
475
476    if columns.is_empty() {
477        None
478    } else {
479        Some(ParsedHtmlTableRow { columns })
480    }
481}
482
483fn parse_table_column(source_range: Range<usize>, node: &Node) -> Option<ParsedHtmlTableColumn> {
484    match &node.data {
485        NodeData::Element { name, attrs, .. } => {
486            if !matches!(name.local, local_name!("th") | local_name!("td")) {
487                return None;
488            }
489
490            let mut children = HtmlParagraph::new();
491            consume_paragraph(
492                source_range,
493                node,
494                &mut children,
495                &mut Vec::new(),
496                &mut Vec::new(),
497            );
498
499            let is_header = name.local == local_name!("th");
500
501            Some(ParsedHtmlTableColumn {
502                col_span: std::cmp::max(
503                    attr_value(attrs, local_name!("colspan"))
504                        .and_then(|span| span.parse().ok())
505                        .unwrap_or(1),
506                    1,
507                ),
508                row_span: std::cmp::max(
509                    attr_value(attrs, local_name!("rowspan"))
510                        .and_then(|span| span.parse().ok())
511                        .unwrap_or(1),
512                    1,
513                ),
514                is_header,
515                children,
516                alignment: attr_value(attrs, local_name!("align"))
517                    .and_then(|align| match align.as_str() {
518                        "left" => Some(Alignment::Left),
519                        "center" => Some(Alignment::Center),
520                        "right" => Some(Alignment::Right),
521                        _ => None,
522                    })
523                    .unwrap_or(if is_header {
524                        Alignment::Center
525                    } else {
526                        Alignment::None
527                    }),
528            })
529        }
530        _ => None,
531    }
532}
533
534fn consume_children(
535    source_range: Range<usize>,
536    node: &Node,
537    elements: &mut Vec<ParsedHtmlElement>,
538    context: &ParseHtmlNodeContext,
539) {
540    for child in node.children.borrow().iter() {
541        parse_html_node(source_range.clone(), child, elements, context);
542    }
543}
544
545fn attr_value(attrs: &RefCell<Vec<Attribute>>, name: LocalName) -> Option<String> {
546    attrs.borrow().iter().find_map(|attr| {
547        if attr.name.local == name {
548            Some(attr.value.to_string())
549        } else {
550            None
551        }
552    })
553}
554
555fn html_style_from_html_styles(styles: HashMap<String, String>) -> Option<HtmlHighlightStyle> {
556    let mut html_style = HtmlHighlightStyle::default();
557
558    if let Some(text_decoration) = styles.get("text-decoration") {
559        match text_decoration.to_lowercase().as_str() {
560            "underline" => {
561                html_style.underline = true;
562            }
563            "line-through" => {
564                html_style.strikethrough = true;
565            }
566            _ => {}
567        }
568    }
569
570    if let Some(font_style) = styles.get("font-style") {
571        match font_style.to_lowercase().as_str() {
572            "italic" => {
573                html_style.italic = true;
574            }
575            "oblique" => {
576                html_style.oblique = true;
577            }
578            _ => {}
579        }
580    }
581
582    if let Some(font_weight) = styles.get("font-weight") {
583        match font_weight.to_lowercase().as_str() {
584            "bold" => {
585                html_style.weight = FontWeight::BOLD;
586            }
587            "lighter" => {
588                html_style.weight = FontWeight::THIN;
589            }
590            _ => {
591                if let Ok(weight) = font_weight.parse::<f32>() {
592                    html_style.weight = FontWeight(weight);
593                }
594            }
595        }
596    }
597
598    if html_style != HtmlHighlightStyle::default() {
599        Some(html_style)
600    } else {
601        None
602    }
603}
604
605fn parse_text_align(value: &str) -> Option<TextAlign> {
606    match value.trim().to_ascii_lowercase().as_str() {
607        "left" => Some(TextAlign::Left),
608        "center" => Some(TextAlign::Center),
609        "right" => Some(TextAlign::Right),
610        _ => None,
611    }
612}
613
614fn text_align_from_styles(styles: &HashMap<String, String>) -> Option<TextAlign> {
615    styles
616        .get("text-align")
617        .and_then(|value| parse_text_align(value))
618}
619
620fn text_align_from_attributes(
621    attrs: &RefCell<Vec<Attribute>>,
622    styles: &HashMap<String, String>,
623) -> Option<TextAlign> {
624    text_align_from_styles(styles).or_else(|| {
625        attr_value(attrs, local_name!("align")).and_then(|value| parse_text_align(&value))
626    })
627}
628
629fn extract_styles_from_attributes(attrs: &RefCell<Vec<Attribute>>) -> HashMap<String, String> {
630    let mut styles = HashMap::new();
631
632    if let Some(style) = attr_value(attrs, local_name!("style")) {
633        for declaration in style.split(';') {
634            let mut parts = declaration.splitn(2, ':');
635            if let Some((key, value)) = parts.next().zip(parts.next()) {
636                styles.insert(key.trim().to_lowercase(), value.trim().to_string());
637            }
638        }
639    }
640
641    styles
642}
643
644fn extract_image(source_range: Range<usize>, attrs: &RefCell<Vec<Attribute>>) -> Option<HtmlImage> {
645    let src = attr_value(attrs, local_name!("src"))?;
646
647    let mut image = HtmlImage::new(src, source_range);
648
649    if let Some(alt) = attr_value(attrs, local_name!("alt")) {
650        image.set_alt_text(alt.into());
651    }
652
653    let styles = extract_styles_from_attributes(attrs);
654
655    if let Some(width) = attr_value(attrs, local_name!("width"))
656        .or_else(|| styles.get("width").cloned())
657        .and_then(|width| parse_html_element_dimension(&width))
658    {
659        image.set_width(width);
660    }
661
662    if let Some(height) = attr_value(attrs, local_name!("height"))
663        .or_else(|| styles.get("height").cloned())
664        .and_then(|height| parse_html_element_dimension(&height))
665    {
666        image.set_height(height);
667    }
668
669    Some(image)
670}
671
672fn extract_html_list(
673    node: &Node,
674    ordered: bool,
675    depth: u16,
676    source_range: Range<usize>,
677) -> Option<ParsedHtmlList> {
678    let mut items = Vec::with_capacity(node.children.borrow().len());
679
680    for (index, child) in node.children.borrow().iter().enumerate() {
681        if let NodeData::Element { name, .. } = &child.data {
682            if name.local != local_name!("li") {
683                continue;
684            }
685
686            let mut content = Vec::new();
687            consume_children(
688                source_range.clone(),
689                child,
690                &mut content,
691                &ParseHtmlNodeContext {
692                    list_item_depth: depth + 1,
693                },
694            );
695
696            if !content.is_empty() {
697                items.push(ParsedHtmlListItem {
698                    source_range: source_range.clone(),
699                    item_type: if ordered {
700                        ParsedHtmlListItemType::Ordered(index as u64 + 1)
701                    } else {
702                        ParsedHtmlListItemType::Unordered
703                    },
704                    content,
705                });
706            }
707        }
708    }
709
710    if items.is_empty() {
711        None
712    } else {
713        Some(ParsedHtmlList {
714            source_range,
715            depth,
716            ordered,
717            items,
718        })
719    }
720}
721
722fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
723    if value.ends_with('%') {
724        value
725            .trim_end_matches('%')
726            .parse::<f32>()
727            .ok()
728            .map(|value| relative(value / 100.))
729    } else {
730        value
731            .trim_end_matches("px")
732            .parse()
733            .ok()
734            .map(|value| px(value).into())
735    }
736}
737
738fn extract_html_blockquote(
739    node: &Node,
740    source_range: Range<usize>,
741) -> Option<ParsedHtmlBlockQuote> {
742    let mut children = Vec::new();
743    consume_children(
744        source_range.clone(),
745        node,
746        &mut children,
747        &ParseHtmlNodeContext::default(),
748    );
749
750    if children.is_empty() {
751        None
752    } else {
753        Some(ParsedHtmlBlockQuote {
754            children,
755            source_range,
756        })
757    }
758}
759
760fn extract_html_table(node: &Node, source_range: Range<usize>) -> Option<ParsedHtmlTable> {
761    let mut header_rows = Vec::new();
762    let mut body_rows = Vec::new();
763    let mut caption = None;
764
765    for child in node.children.borrow().iter() {
766        if let NodeData::Element { name, .. } = &child.data {
767            if name.local == local_name!("caption") {
768                let mut paragraph = HtmlParagraph::new();
769                parse_paragraph(
770                    source_range.clone(),
771                    child,
772                    &mut paragraph,
773                    &mut Vec::new(),
774                    &mut Vec::new(),
775                );
776                caption = Some(paragraph);
777            }
778
779            if name.local == local_name!("thead") {
780                for row in child.children.borrow().iter() {
781                    if let Some(row) = parse_table_row(source_range.clone(), row) {
782                        header_rows.push(row);
783                    }
784                }
785            } else if name.local == local_name!("tbody") {
786                for row in child.children.borrow().iter() {
787                    if let Some(row) = parse_table_row(source_range.clone(), row) {
788                        body_rows.push(row);
789                    }
790                }
791            }
792        }
793    }
794
795    if !header_rows.is_empty() || !body_rows.is_empty() {
796        Some(ParsedHtmlTable {
797            source_range,
798            body: body_rows,
799            header: header_rows,
800            caption,
801        })
802    } else {
803        None
804    }
805}
806
807#[cfg(test)]
808mod tests {
809    use super::*;
810    use gpui::TextAlign;
811
812    #[test]
813    fn parses_html_styled_text() {
814        let parsed = parse_html_block(
815            "<p>Some text <strong>strong</strong> <a href=\"https://example.com\">link</a></p>",
816            0..79,
817        )
818        .unwrap();
819
820        assert_eq!(parsed.children.len(), 1);
821        let ParsedHtmlElement::Paragraph(paragraph) = &parsed.children[0] else {
822            panic!("expected paragraph");
823        };
824        let HtmlParagraphChunk::Text(text) = &paragraph.contents[0] else {
825            panic!("expected text chunk");
826        };
827
828        assert_eq!(text.contents.as_ref(), "Some text strong link");
829        assert_eq!(
830            text.highlights,
831            vec![
832                (
833                    10..16,
834                    HtmlHighlightStyle {
835                        weight: FontWeight::BOLD,
836                        ..Default::default()
837                    }
838                ),
839                (
840                    17..21,
841                    HtmlHighlightStyle {
842                        link: true,
843                        ..Default::default()
844                    }
845                )
846            ]
847        );
848        assert_eq!(
849            text.links,
850            vec![(17..21, SharedString::from("https://example.com"))]
851        );
852    }
853
854    #[test]
855    fn parses_html_table_spans() {
856        let parsed = parse_html_block(
857            "<table><tbody><tr><td colspan=\"2\">a</td></tr><tr><td>b</td><td>c</td></tr></tbody></table>",
858            0..91,
859        )
860        .unwrap();
861
862        let ParsedHtmlElement::Table(table) = &parsed.children[0] else {
863            panic!("expected table");
864        };
865        assert_eq!(table.body.len(), 2);
866        assert_eq!(table.body[0].columns[0].col_span, 2);
867        assert_eq!(table.body[1].columns.len(), 2);
868    }
869
870    #[test]
871    fn parses_html_list_as_explicit_list_node() {
872        let parsed = parse_html_block(
873            "<ul><li>parent<ul><li>child</li></ul></li><li>sibling</li></ul>",
874            0..64,
875        )
876        .unwrap();
877
878        assert_eq!(parsed.children.len(), 1);
879
880        let ParsedHtmlElement::List(list) = &parsed.children[0] else {
881            panic!("expected list");
882        };
883
884        assert!(!list.ordered);
885        assert_eq!(list.depth, 1);
886        assert_eq!(list.items.len(), 2);
887
888        let first_item = &list.items[0];
889        let ParsedHtmlElement::Paragraph(paragraph) = &first_item.content[0] else {
890            panic!("expected first item paragraph");
891        };
892        let HtmlParagraphChunk::Text(text) = &paragraph.contents[0] else {
893            panic!("expected first item text");
894        };
895        assert_eq!(text.contents.as_ref(), "parent");
896
897        let ParsedHtmlElement::List(nested_list) = &first_item.content[1] else {
898            panic!("expected nested list");
899        };
900        assert_eq!(nested_list.depth, 2);
901        assert_eq!(nested_list.items.len(), 1);
902
903        let ParsedHtmlElement::Paragraph(nested_paragraph) = &nested_list.items[0].content[0]
904        else {
905            panic!("expected nested item paragraph");
906        };
907        let HtmlParagraphChunk::Text(nested_text) = &nested_paragraph.contents[0] else {
908            panic!("expected nested item text");
909        };
910        assert_eq!(nested_text.contents.as_ref(), "child");
911
912        let second_item = &list.items[1];
913        let ParsedHtmlElement::Paragraph(second_paragraph) = &second_item.content[0] else {
914            panic!("expected second item paragraph");
915        };
916        let HtmlParagraphChunk::Text(second_text) = &second_paragraph.contents[0] else {
917            panic!("expected second item text");
918        };
919        assert_eq!(second_text.contents.as_ref(), "sibling");
920    }
921
922    #[test]
923    fn parses_paragraph_text_align_from_style() {
924        let parsed = parse_html_block("<p style=\"text-align: center\">x</p>", 0..40).unwrap();
925        let ParsedHtmlElement::Paragraph(paragraph) = &parsed.children[0] else {
926            panic!("expected paragraph");
927        };
928        assert_eq!(paragraph.text_align, Some(TextAlign::Center));
929    }
930
931    #[test]
932    fn parses_heading_text_align_from_style() {
933        let parsed = parse_html_block("<h2 style=\"text-align: right\">Title</h2>", 0..45).unwrap();
934        let ParsedHtmlElement::Heading(heading) = &parsed.children[0] else {
935            panic!("expected heading");
936        };
937        assert_eq!(heading.text_align, Some(TextAlign::Right));
938    }
939
940    #[test]
941    fn parses_paragraph_text_align_from_align_attribute() {
942        let parsed = parse_html_block("<p align=\"center\">x</p>", 0..24).unwrap();
943        let ParsedHtmlElement::Paragraph(paragraph) = &parsed.children[0] else {
944            panic!("expected paragraph");
945        };
946        assert_eq!(paragraph.text_align, Some(TextAlign::Center));
947    }
948
949    #[test]
950    fn parses_heading_text_align_from_align_attribute() {
951        let parsed = parse_html_block("<h2 align=\"right\">Title</h2>", 0..30).unwrap();
952        let ParsedHtmlElement::Heading(heading) = &parsed.children[0] else {
953            panic!("expected heading");
954        };
955        assert_eq!(heading.text_align, Some(TextAlign::Right));
956    }
957
958    #[test]
959    fn prefers_style_text_align_over_align_attribute() {
960        let parsed = parse_html_block(
961            "<p align=\"left\" style=\"text-align: center\">x</p>",
962            0..50,
963        )
964        .unwrap();
965        let ParsedHtmlElement::Paragraph(paragraph) = &parsed.children[0] else {
966            panic!("expected paragraph");
967        };
968        assert_eq!(paragraph.text_align, Some(TextAlign::Center));
969    }
970}