1use std::{cell::RefCell, collections::HashMap, mem, ops::Range};
2
3use gpui::{DefiniteLength, FontWeight, SharedString, TextAlign, px, relative};
4use html5ever::{
5 Attribute, LocalName, ParseOpts, local_name, parse_document, tendril::TendrilSink,
6};
7use markup5ever_rcdom::{Node, NodeData, RcDom};
8use pulldown_cmark::{Alignment, HeadingLevel};
9use stacksafe::stacksafe;
10
11use crate::html::html_minifier::{Minifier, MinifierOptions};
12
13#[derive(Debug, Clone, Default)]
14#[cfg_attr(test, derive(PartialEq))]
15pub(crate) struct ParsedHtmlBlock {
16 pub source_range: Range<usize>,
17 pub children: Vec<ParsedHtmlElement>,
18}
19
20#[derive(Debug, Clone)]
21#[cfg_attr(test, derive(PartialEq))]
22pub(crate) enum ParsedHtmlElement {
23 Heading(ParsedHtmlHeading),
24 List(ParsedHtmlList),
25 Table(ParsedHtmlTable),
26 BlockQuote(ParsedHtmlBlockQuote),
27 Paragraph(ParsedHtmlParagraph),
28 Image(HtmlImage),
29}
30
31#[derive(Debug, Clone)]
32#[cfg_attr(test, derive(PartialEq))]
33pub(crate) struct ParsedHtmlParagraph {
34 pub text_align: Option<TextAlign>,
35 pub contents: HtmlParagraph,
36}
37
38impl ParsedHtmlElement {
39 pub fn source_range(&self) -> Option<Range<usize>> {
40 Some(match self {
41 Self::Heading(heading) => heading.source_range.clone(),
42 Self::List(list) => list.source_range.clone(),
43 Self::Table(table) => table.source_range.clone(),
44 Self::BlockQuote(block_quote) => block_quote.source_range.clone(),
45 Self::Paragraph(paragraph) => match paragraph.contents.first()? {
46 HtmlParagraphChunk::Text(text) => text.source_range.clone(),
47 HtmlParagraphChunk::Image(image) => image.source_range.clone(),
48 },
49 Self::Image(image) => image.source_range.clone(),
50 })
51 }
52}
53
54pub(crate) type HtmlParagraph = Vec<HtmlParagraphChunk>;
55
56#[derive(Debug, Clone)]
57#[cfg_attr(test, derive(PartialEq))]
58pub(crate) enum HtmlParagraphChunk {
59 Text(ParsedHtmlText),
60 Image(HtmlImage),
61}
62
63#[derive(Debug, Clone)]
64#[cfg_attr(test, derive(PartialEq))]
65pub(crate) struct ParsedHtmlList {
66 pub source_range: Range<usize>,
67 pub depth: u16,
68 pub ordered: bool,
69 pub items: Vec<ParsedHtmlListItem>,
70}
71
72#[derive(Debug, Clone)]
73#[cfg_attr(test, derive(PartialEq))]
74pub(crate) struct ParsedHtmlListItem {
75 pub source_range: Range<usize>,
76 pub item_type: ParsedHtmlListItemType,
77 pub content: Vec<ParsedHtmlElement>,
78}
79
80#[derive(Debug, Clone)]
81#[cfg_attr(test, derive(PartialEq))]
82pub(crate) enum ParsedHtmlListItemType {
83 Ordered(u64),
84 Unordered,
85}
86
87#[derive(Debug, Clone)]
88#[cfg_attr(test, derive(PartialEq))]
89pub(crate) struct ParsedHtmlHeading {
90 pub source_range: Range<usize>,
91 pub level: HeadingLevel,
92 pub contents: HtmlParagraph,
93 pub text_align: Option<TextAlign>,
94}
95
96#[derive(Debug, Clone)]
97#[cfg_attr(test, derive(PartialEq))]
98pub(crate) struct ParsedHtmlTable {
99 pub source_range: Range<usize>,
100 pub header: Vec<ParsedHtmlTableRow>,
101 pub body: Vec<ParsedHtmlTableRow>,
102 pub caption: Option<HtmlParagraph>,
103}
104
105#[derive(Debug, Clone)]
106#[cfg_attr(test, derive(PartialEq))]
107pub(crate) struct ParsedHtmlTableColumn {
108 pub col_span: usize,
109 pub row_span: usize,
110 pub is_header: bool,
111 pub children: HtmlParagraph,
112 pub alignment: Alignment,
113}
114
115#[derive(Debug, Clone, Default)]
116#[cfg_attr(test, derive(PartialEq))]
117pub(crate) struct ParsedHtmlTableRow {
118 pub columns: Vec<ParsedHtmlTableColumn>,
119}
120
121#[derive(Debug, Clone)]
122#[cfg_attr(test, derive(PartialEq))]
123pub(crate) struct ParsedHtmlBlockQuote {
124 pub source_range: Range<usize>,
125 pub children: Vec<ParsedHtmlElement>,
126}
127
128#[derive(Debug, Clone)]
129#[cfg_attr(test, derive(PartialEq))]
130pub(crate) struct ParsedHtmlText {
131 pub source_range: Range<usize>,
132 pub contents: SharedString,
133 pub highlights: Vec<(Range<usize>, HtmlHighlightStyle)>,
134 pub links: Vec<(Range<usize>, SharedString)>,
135}
136
137#[derive(Debug, Clone, Default, PartialEq, Eq)]
138pub(crate) struct HtmlHighlightStyle {
139 pub italic: bool,
140 pub underline: bool,
141 pub strikethrough: bool,
142 pub weight: FontWeight,
143 pub link: bool,
144 pub oblique: bool,
145}
146
147#[derive(Debug, Clone)]
148#[cfg_attr(test, derive(PartialEq))]
149pub(crate) struct HtmlImage {
150 pub dest_url: SharedString,
151 pub source_range: Range<usize>,
152 pub alt_text: Option<SharedString>,
153 pub width: Option<DefiniteLength>,
154 pub height: Option<DefiniteLength>,
155}
156
157impl HtmlImage {
158 fn new(dest_url: String, source_range: Range<usize>) -> Self {
159 Self {
160 dest_url: dest_url.into(),
161 source_range,
162 alt_text: None,
163 width: None,
164 height: None,
165 }
166 }
167
168 fn set_alt_text(&mut self, alt_text: SharedString) {
169 self.alt_text = Some(alt_text);
170 }
171
172 fn set_width(&mut self, width: DefiniteLength) {
173 self.width = Some(width);
174 }
175
176 fn set_height(&mut self, height: DefiniteLength) {
177 self.height = Some(height);
178 }
179}
180
181#[derive(Debug)]
182struct ParseHtmlNodeContext {
183 list_item_depth: u16,
184}
185
186impl Default for ParseHtmlNodeContext {
187 fn default() -> Self {
188 Self { list_item_depth: 1 }
189 }
190}
191
192pub(crate) fn parse_html_block(
193 source: &str,
194 source_range: Range<usize>,
195) -> Option<ParsedHtmlBlock> {
196 let bytes = cleanup_html(source);
197 let mut cursor = std::io::Cursor::new(bytes);
198 let dom = parse_document(RcDom::default(), ParseOpts::default())
199 .from_utf8()
200 .read_from(&mut cursor)
201 .ok()?;
202
203 let mut children = Vec::new();
204 parse_html_node(
205 source_range.clone(),
206 &dom.document,
207 &mut children,
208 &ParseHtmlNodeContext::default(),
209 );
210
211 Some(ParsedHtmlBlock {
212 source_range,
213 children,
214 })
215}
216
217fn cleanup_html(source: &str) -> Vec<u8> {
218 let mut writer = std::io::Cursor::new(Vec::new());
219 let mut reader = std::io::Cursor::new(source);
220 let mut minify = Minifier::new(
221 &mut writer,
222 MinifierOptions {
223 omit_doctype: true,
224 collapse_whitespace: true,
225 ..Default::default()
226 },
227 );
228 if let Ok(()) = minify.minify(&mut reader) {
229 writer.into_inner()
230 } else {
231 source.bytes().collect()
232 }
233}
234
235#[stacksafe]
236fn parse_html_node(
237 source_range: Range<usize>,
238 node: &Node,
239 elements: &mut Vec<ParsedHtmlElement>,
240 context: &ParseHtmlNodeContext,
241) {
242 match &node.data {
243 NodeData::Document => {
244 consume_children(source_range, node, elements, context);
245 }
246 NodeData::Text { contents } => {
247 elements.push(ParsedHtmlElement::Paragraph(ParsedHtmlParagraph {
248 text_align: None,
249 contents: vec![HtmlParagraphChunk::Text(ParsedHtmlText {
250 source_range,
251 highlights: Vec::default(),
252 links: Vec::default(),
253 contents: contents.borrow().to_string().into(),
254 })],
255 }));
256 }
257 NodeData::Comment { .. } => {}
258 NodeData::Element { name, attrs, .. } => {
259 let styles_map = extract_styles_from_attributes(attrs);
260 let text_align = text_align_from_attributes(attrs, &styles_map);
261 let mut styles = if let Some(styles) = html_style_from_html_styles(styles_map) {
262 vec![styles]
263 } else {
264 Vec::default()
265 };
266
267 if name.local == local_name!("img") {
268 if let Some(image) = extract_image(source_range, attrs) {
269 elements.push(ParsedHtmlElement::Image(image));
270 }
271 } else if name.local == local_name!("p") {
272 let mut paragraph = HtmlParagraph::new();
273 parse_paragraph(
274 source_range,
275 node,
276 &mut paragraph,
277 &mut styles,
278 &mut Vec::new(),
279 );
280
281 if !paragraph.is_empty() {
282 elements.push(ParsedHtmlElement::Paragraph(ParsedHtmlParagraph {
283 text_align,
284 contents: paragraph,
285 }));
286 }
287 } else if matches!(
288 name.local,
289 local_name!("h1")
290 | local_name!("h2")
291 | local_name!("h3")
292 | local_name!("h4")
293 | local_name!("h5")
294 | local_name!("h6")
295 ) {
296 let mut paragraph = HtmlParagraph::new();
297 consume_paragraph(
298 source_range.clone(),
299 node,
300 &mut paragraph,
301 &mut styles,
302 &mut Vec::new(),
303 );
304
305 if !paragraph.is_empty() {
306 elements.push(ParsedHtmlElement::Heading(ParsedHtmlHeading {
307 source_range,
308 level: match name.local {
309 local_name!("h1") => HeadingLevel::H1,
310 local_name!("h2") => HeadingLevel::H2,
311 local_name!("h3") => HeadingLevel::H3,
312 local_name!("h4") => HeadingLevel::H4,
313 local_name!("h5") => HeadingLevel::H5,
314 local_name!("h6") => HeadingLevel::H6,
315 _ => unreachable!(),
316 },
317 contents: paragraph,
318 text_align,
319 }));
320 }
321 } else if name.local == local_name!("ul") || name.local == local_name!("ol") {
322 if let Some(list) = extract_html_list(
323 node,
324 name.local == local_name!("ol"),
325 context.list_item_depth,
326 source_range,
327 ) {
328 elements.push(ParsedHtmlElement::List(list));
329 }
330 } else if name.local == local_name!("blockquote") {
331 if let Some(blockquote) = extract_html_blockquote(node, source_range) {
332 elements.push(ParsedHtmlElement::BlockQuote(blockquote));
333 }
334 } else if name.local == local_name!("table") {
335 if let Some(table) = extract_html_table(node, source_range) {
336 elements.push(ParsedHtmlElement::Table(table));
337 }
338 } else {
339 consume_children(source_range, node, elements, context);
340 }
341 }
342 _ => {}
343 }
344}
345
346#[stacksafe]
347fn parse_paragraph(
348 source_range: Range<usize>,
349 node: &Node,
350 paragraph: &mut HtmlParagraph,
351 highlights: &mut Vec<HtmlHighlightStyle>,
352 links: &mut Vec<SharedString>,
353) {
354 fn items_with_range<T>(
355 range: Range<usize>,
356 items: impl IntoIterator<Item = T>,
357 ) -> Vec<(Range<usize>, T)> {
358 items
359 .into_iter()
360 .map(|item| (range.clone(), item))
361 .collect()
362 }
363
364 match &node.data {
365 NodeData::Text { contents } => {
366 if let Some(text) =
367 paragraph
368 .iter_mut()
369 .last()
370 .and_then(|paragraph_chunk| match paragraph_chunk {
371 HtmlParagraphChunk::Text(text) => Some(text),
372 _ => None,
373 })
374 {
375 let mut new_text = text.contents.to_string();
376 new_text.push_str(&contents.borrow());
377
378 text.highlights.extend(items_with_range(
379 text.contents.len()..new_text.len(),
380 mem::take(highlights),
381 ));
382 text.links.extend(items_with_range(
383 text.contents.len()..new_text.len(),
384 mem::take(links),
385 ));
386 text.contents = SharedString::from(new_text);
387 } else {
388 let contents = contents.borrow().to_string();
389 paragraph.push(HtmlParagraphChunk::Text(ParsedHtmlText {
390 source_range,
391 highlights: items_with_range(0..contents.len(), mem::take(highlights)),
392 links: items_with_range(0..contents.len(), mem::take(links)),
393 contents: contents.into(),
394 }));
395 }
396 }
397 NodeData::Element { name, attrs, .. } => {
398 if name.local == local_name!("img") {
399 if let Some(image) = extract_image(source_range, attrs) {
400 paragraph.push(HtmlParagraphChunk::Image(image));
401 }
402 } else if name.local == local_name!("b") || name.local == local_name!("strong") {
403 highlights.push(HtmlHighlightStyle {
404 weight: FontWeight::BOLD,
405 ..Default::default()
406 });
407 consume_paragraph(source_range, node, paragraph, highlights, links);
408 } else if name.local == local_name!("i") {
409 highlights.push(HtmlHighlightStyle {
410 italic: true,
411 ..Default::default()
412 });
413 consume_paragraph(source_range, node, paragraph, highlights, links);
414 } else if name.local == local_name!("em") {
415 highlights.push(HtmlHighlightStyle {
416 oblique: true,
417 ..Default::default()
418 });
419 consume_paragraph(source_range, node, paragraph, highlights, links);
420 } else if name.local == local_name!("del") {
421 highlights.push(HtmlHighlightStyle {
422 strikethrough: true,
423 ..Default::default()
424 });
425 consume_paragraph(source_range, node, paragraph, highlights, links);
426 } else if name.local == local_name!("ins") {
427 highlights.push(HtmlHighlightStyle {
428 underline: true,
429 ..Default::default()
430 });
431 consume_paragraph(source_range, node, paragraph, highlights, links);
432 } else if name.local == local_name!("a") {
433 if let Some(url) = attr_value(attrs, local_name!("href")) {
434 highlights.push(HtmlHighlightStyle {
435 link: true,
436 ..Default::default()
437 });
438 links.push(url.into());
439 }
440 consume_paragraph(source_range, node, paragraph, highlights, links);
441 } else {
442 consume_paragraph(source_range, node, paragraph, highlights, links);
443 }
444 }
445 _ => {}
446 }
447}
448
449fn consume_paragraph(
450 source_range: Range<usize>,
451 node: &Node,
452 paragraph: &mut HtmlParagraph,
453 highlights: &mut Vec<HtmlHighlightStyle>,
454 links: &mut Vec<SharedString>,
455) {
456 for child in node.children.borrow().iter() {
457 parse_paragraph(source_range.clone(), child, paragraph, highlights, links);
458 }
459}
460
461fn parse_table_row(source_range: Range<usize>, node: &Node) -> Option<ParsedHtmlTableRow> {
462 let mut columns = Vec::new();
463
464 if let NodeData::Element { name, .. } = &node.data {
465 if name.local != local_name!("tr") {
466 return None;
467 }
468
469 for child in node.children.borrow().iter() {
470 if let Some(column) = parse_table_column(source_range.clone(), child) {
471 columns.push(column);
472 }
473 }
474 }
475
476 if columns.is_empty() {
477 None
478 } else {
479 Some(ParsedHtmlTableRow { columns })
480 }
481}
482
483fn parse_table_column(source_range: Range<usize>, node: &Node) -> Option<ParsedHtmlTableColumn> {
484 match &node.data {
485 NodeData::Element { name, attrs, .. } => {
486 if !matches!(name.local, local_name!("th") | local_name!("td")) {
487 return None;
488 }
489
490 let mut children = HtmlParagraph::new();
491 consume_paragraph(
492 source_range,
493 node,
494 &mut children,
495 &mut Vec::new(),
496 &mut Vec::new(),
497 );
498
499 let is_header = name.local == local_name!("th");
500
501 Some(ParsedHtmlTableColumn {
502 col_span: std::cmp::max(
503 attr_value(attrs, local_name!("colspan"))
504 .and_then(|span| span.parse().ok())
505 .unwrap_or(1),
506 1,
507 ),
508 row_span: std::cmp::max(
509 attr_value(attrs, local_name!("rowspan"))
510 .and_then(|span| span.parse().ok())
511 .unwrap_or(1),
512 1,
513 ),
514 is_header,
515 children,
516 alignment: attr_value(attrs, local_name!("align"))
517 .and_then(|align| match align.as_str() {
518 "left" => Some(Alignment::Left),
519 "center" => Some(Alignment::Center),
520 "right" => Some(Alignment::Right),
521 _ => None,
522 })
523 .unwrap_or(if is_header {
524 Alignment::Center
525 } else {
526 Alignment::None
527 }),
528 })
529 }
530 _ => None,
531 }
532}
533
534fn consume_children(
535 source_range: Range<usize>,
536 node: &Node,
537 elements: &mut Vec<ParsedHtmlElement>,
538 context: &ParseHtmlNodeContext,
539) {
540 for child in node.children.borrow().iter() {
541 parse_html_node(source_range.clone(), child, elements, context);
542 }
543}
544
545fn attr_value(attrs: &RefCell<Vec<Attribute>>, name: LocalName) -> Option<String> {
546 attrs.borrow().iter().find_map(|attr| {
547 if attr.name.local == name {
548 Some(attr.value.to_string())
549 } else {
550 None
551 }
552 })
553}
554
555fn html_style_from_html_styles(styles: HashMap<String, String>) -> Option<HtmlHighlightStyle> {
556 let mut html_style = HtmlHighlightStyle::default();
557
558 if let Some(text_decoration) = styles.get("text-decoration") {
559 match text_decoration.to_lowercase().as_str() {
560 "underline" => {
561 html_style.underline = true;
562 }
563 "line-through" => {
564 html_style.strikethrough = true;
565 }
566 _ => {}
567 }
568 }
569
570 if let Some(font_style) = styles.get("font-style") {
571 match font_style.to_lowercase().as_str() {
572 "italic" => {
573 html_style.italic = true;
574 }
575 "oblique" => {
576 html_style.oblique = true;
577 }
578 _ => {}
579 }
580 }
581
582 if let Some(font_weight) = styles.get("font-weight") {
583 match font_weight.to_lowercase().as_str() {
584 "bold" => {
585 html_style.weight = FontWeight::BOLD;
586 }
587 "lighter" => {
588 html_style.weight = FontWeight::THIN;
589 }
590 _ => {
591 if let Ok(weight) = font_weight.parse::<f32>() {
592 html_style.weight = FontWeight(weight);
593 }
594 }
595 }
596 }
597
598 if html_style != HtmlHighlightStyle::default() {
599 Some(html_style)
600 } else {
601 None
602 }
603}
604
605fn parse_text_align(value: &str) -> Option<TextAlign> {
606 match value.trim().to_ascii_lowercase().as_str() {
607 "left" => Some(TextAlign::Left),
608 "center" => Some(TextAlign::Center),
609 "right" => Some(TextAlign::Right),
610 _ => None,
611 }
612}
613
614fn text_align_from_styles(styles: &HashMap<String, String>) -> Option<TextAlign> {
615 styles
616 .get("text-align")
617 .and_then(|value| parse_text_align(value))
618}
619
620fn text_align_from_attributes(
621 attrs: &RefCell<Vec<Attribute>>,
622 styles: &HashMap<String, String>,
623) -> Option<TextAlign> {
624 text_align_from_styles(styles).or_else(|| {
625 attr_value(attrs, local_name!("align")).and_then(|value| parse_text_align(&value))
626 })
627}
628
629fn extract_styles_from_attributes(attrs: &RefCell<Vec<Attribute>>) -> HashMap<String, String> {
630 let mut styles = HashMap::new();
631
632 if let Some(style) = attr_value(attrs, local_name!("style")) {
633 for declaration in style.split(';') {
634 let mut parts = declaration.splitn(2, ':');
635 if let Some((key, value)) = parts.next().zip(parts.next()) {
636 styles.insert(key.trim().to_lowercase(), value.trim().to_string());
637 }
638 }
639 }
640
641 styles
642}
643
644fn extract_image(source_range: Range<usize>, attrs: &RefCell<Vec<Attribute>>) -> Option<HtmlImage> {
645 let src = attr_value(attrs, local_name!("src"))?;
646
647 let mut image = HtmlImage::new(src, source_range);
648
649 if let Some(alt) = attr_value(attrs, local_name!("alt")) {
650 image.set_alt_text(alt.into());
651 }
652
653 let styles = extract_styles_from_attributes(attrs);
654
655 if let Some(width) = attr_value(attrs, local_name!("width"))
656 .or_else(|| styles.get("width").cloned())
657 .and_then(|width| parse_html_element_dimension(&width))
658 {
659 image.set_width(width);
660 }
661
662 if let Some(height) = attr_value(attrs, local_name!("height"))
663 .or_else(|| styles.get("height").cloned())
664 .and_then(|height| parse_html_element_dimension(&height))
665 {
666 image.set_height(height);
667 }
668
669 Some(image)
670}
671
672fn extract_html_list(
673 node: &Node,
674 ordered: bool,
675 depth: u16,
676 source_range: Range<usize>,
677) -> Option<ParsedHtmlList> {
678 let mut items = Vec::with_capacity(node.children.borrow().len());
679
680 for (index, child) in node.children.borrow().iter().enumerate() {
681 if let NodeData::Element { name, .. } = &child.data {
682 if name.local != local_name!("li") {
683 continue;
684 }
685
686 let mut content = Vec::new();
687 consume_children(
688 source_range.clone(),
689 child,
690 &mut content,
691 &ParseHtmlNodeContext {
692 list_item_depth: depth + 1,
693 },
694 );
695
696 if !content.is_empty() {
697 items.push(ParsedHtmlListItem {
698 source_range: source_range.clone(),
699 item_type: if ordered {
700 ParsedHtmlListItemType::Ordered(index as u64 + 1)
701 } else {
702 ParsedHtmlListItemType::Unordered
703 },
704 content,
705 });
706 }
707 }
708 }
709
710 if items.is_empty() {
711 None
712 } else {
713 Some(ParsedHtmlList {
714 source_range,
715 depth,
716 ordered,
717 items,
718 })
719 }
720}
721
722fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
723 if value.ends_with('%') {
724 value
725 .trim_end_matches('%')
726 .parse::<f32>()
727 .ok()
728 .map(|value| relative(value / 100.))
729 } else {
730 value
731 .trim_end_matches("px")
732 .parse()
733 .ok()
734 .map(|value| px(value).into())
735 }
736}
737
738fn extract_html_blockquote(
739 node: &Node,
740 source_range: Range<usize>,
741) -> Option<ParsedHtmlBlockQuote> {
742 let mut children = Vec::new();
743 consume_children(
744 source_range.clone(),
745 node,
746 &mut children,
747 &ParseHtmlNodeContext::default(),
748 );
749
750 if children.is_empty() {
751 None
752 } else {
753 Some(ParsedHtmlBlockQuote {
754 children,
755 source_range,
756 })
757 }
758}
759
760fn extract_html_table(node: &Node, source_range: Range<usize>) -> Option<ParsedHtmlTable> {
761 let mut header_rows = Vec::new();
762 let mut body_rows = Vec::new();
763 let mut caption = None;
764
765 for child in node.children.borrow().iter() {
766 if let NodeData::Element { name, .. } = &child.data {
767 if name.local == local_name!("caption") {
768 let mut paragraph = HtmlParagraph::new();
769 parse_paragraph(
770 source_range.clone(),
771 child,
772 &mut paragraph,
773 &mut Vec::new(),
774 &mut Vec::new(),
775 );
776 caption = Some(paragraph);
777 }
778
779 if name.local == local_name!("thead") {
780 for row in child.children.borrow().iter() {
781 if let Some(row) = parse_table_row(source_range.clone(), row) {
782 header_rows.push(row);
783 }
784 }
785 } else if name.local == local_name!("tbody") {
786 for row in child.children.borrow().iter() {
787 if let Some(row) = parse_table_row(source_range.clone(), row) {
788 body_rows.push(row);
789 }
790 }
791 }
792 }
793 }
794
795 if !header_rows.is_empty() || !body_rows.is_empty() {
796 Some(ParsedHtmlTable {
797 source_range,
798 body: body_rows,
799 header: header_rows,
800 caption,
801 })
802 } else {
803 None
804 }
805}
806
807#[cfg(test)]
808mod tests {
809 use super::*;
810 use gpui::TextAlign;
811
812 #[test]
813 fn parses_html_styled_text() {
814 let parsed = parse_html_block(
815 "<p>Some text <strong>strong</strong> <a href=\"https://example.com\">link</a></p>",
816 0..79,
817 )
818 .unwrap();
819
820 assert_eq!(parsed.children.len(), 1);
821 let ParsedHtmlElement::Paragraph(paragraph) = &parsed.children[0] else {
822 panic!("expected paragraph");
823 };
824 let HtmlParagraphChunk::Text(text) = ¶graph.contents[0] else {
825 panic!("expected text chunk");
826 };
827
828 assert_eq!(text.contents.as_ref(), "Some text strong link");
829 assert_eq!(
830 text.highlights,
831 vec![
832 (
833 10..16,
834 HtmlHighlightStyle {
835 weight: FontWeight::BOLD,
836 ..Default::default()
837 }
838 ),
839 (
840 17..21,
841 HtmlHighlightStyle {
842 link: true,
843 ..Default::default()
844 }
845 )
846 ]
847 );
848 assert_eq!(
849 text.links,
850 vec![(17..21, SharedString::from("https://example.com"))]
851 );
852 }
853
854 #[test]
855 fn parses_html_table_spans() {
856 let parsed = parse_html_block(
857 "<table><tbody><tr><td colspan=\"2\">a</td></tr><tr><td>b</td><td>c</td></tr></tbody></table>",
858 0..91,
859 )
860 .unwrap();
861
862 let ParsedHtmlElement::Table(table) = &parsed.children[0] else {
863 panic!("expected table");
864 };
865 assert_eq!(table.body.len(), 2);
866 assert_eq!(table.body[0].columns[0].col_span, 2);
867 assert_eq!(table.body[1].columns.len(), 2);
868 }
869
870 #[test]
871 fn parses_html_list_as_explicit_list_node() {
872 let parsed = parse_html_block(
873 "<ul><li>parent<ul><li>child</li></ul></li><li>sibling</li></ul>",
874 0..64,
875 )
876 .unwrap();
877
878 assert_eq!(parsed.children.len(), 1);
879
880 let ParsedHtmlElement::List(list) = &parsed.children[0] else {
881 panic!("expected list");
882 };
883
884 assert!(!list.ordered);
885 assert_eq!(list.depth, 1);
886 assert_eq!(list.items.len(), 2);
887
888 let first_item = &list.items[0];
889 let ParsedHtmlElement::Paragraph(paragraph) = &first_item.content[0] else {
890 panic!("expected first item paragraph");
891 };
892 let HtmlParagraphChunk::Text(text) = ¶graph.contents[0] else {
893 panic!("expected first item text");
894 };
895 assert_eq!(text.contents.as_ref(), "parent");
896
897 let ParsedHtmlElement::List(nested_list) = &first_item.content[1] else {
898 panic!("expected nested list");
899 };
900 assert_eq!(nested_list.depth, 2);
901 assert_eq!(nested_list.items.len(), 1);
902
903 let ParsedHtmlElement::Paragraph(nested_paragraph) = &nested_list.items[0].content[0]
904 else {
905 panic!("expected nested item paragraph");
906 };
907 let HtmlParagraphChunk::Text(nested_text) = &nested_paragraph.contents[0] else {
908 panic!("expected nested item text");
909 };
910 assert_eq!(nested_text.contents.as_ref(), "child");
911
912 let second_item = &list.items[1];
913 let ParsedHtmlElement::Paragraph(second_paragraph) = &second_item.content[0] else {
914 panic!("expected second item paragraph");
915 };
916 let HtmlParagraphChunk::Text(second_text) = &second_paragraph.contents[0] else {
917 panic!("expected second item text");
918 };
919 assert_eq!(second_text.contents.as_ref(), "sibling");
920 }
921
922 #[test]
923 fn parses_paragraph_text_align_from_style() {
924 let parsed = parse_html_block("<p style=\"text-align: center\">x</p>", 0..40).unwrap();
925 let ParsedHtmlElement::Paragraph(paragraph) = &parsed.children[0] else {
926 panic!("expected paragraph");
927 };
928 assert_eq!(paragraph.text_align, Some(TextAlign::Center));
929 }
930
931 #[test]
932 fn parses_heading_text_align_from_style() {
933 let parsed = parse_html_block("<h2 style=\"text-align: right\">Title</h2>", 0..45).unwrap();
934 let ParsedHtmlElement::Heading(heading) = &parsed.children[0] else {
935 panic!("expected heading");
936 };
937 assert_eq!(heading.text_align, Some(TextAlign::Right));
938 }
939
940 #[test]
941 fn parses_paragraph_text_align_from_align_attribute() {
942 let parsed = parse_html_block("<p align=\"center\">x</p>", 0..24).unwrap();
943 let ParsedHtmlElement::Paragraph(paragraph) = &parsed.children[0] else {
944 panic!("expected paragraph");
945 };
946 assert_eq!(paragraph.text_align, Some(TextAlign::Center));
947 }
948
949 #[test]
950 fn parses_heading_text_align_from_align_attribute() {
951 let parsed = parse_html_block("<h2 align=\"right\">Title</h2>", 0..30).unwrap();
952 let ParsedHtmlElement::Heading(heading) = &parsed.children[0] else {
953 panic!("expected heading");
954 };
955 assert_eq!(heading.text_align, Some(TextAlign::Right));
956 }
957
958 #[test]
959 fn prefers_style_text_align_over_align_attribute() {
960 let parsed = parse_html_block(
961 "<p align=\"left\" style=\"text-align: center\">x</p>",
962 0..50,
963 )
964 .unwrap();
965 let ParsedHtmlElement::Paragraph(paragraph) = &parsed.children[0] else {
966 panic!("expected paragraph");
967 };
968 assert_eq!(paragraph.text_align, Some(TextAlign::Center));
969 }
970}