markdown: Add support for `HTML` styling attributes (#42143)

Remco Smits and Bennet Bo Fenner created

Second take on https://github.com/zed-industries/zed/pull/37765.

This PR adds support for styling elements (**b**, **strong**, **em**,
**i**, **ins**, **del**), but also allow you to show the styling text
inline with the current text.
This is done by appending all the up-following text into one text chunk
and merge the highlights from both of them into the already existing
chunk. If there does not exist a text chunk, we will create one and the
next iteration we will use that one to store all the information on.

**Before**
<img width="483" height="692" alt="Screenshot 2025-11-06 at 22 08 09"
src="https://github.com/user-attachments/assets/6158fd3b-066c-4abe-9f8e-bcafae85392e"
/>

**After**
<img width="868" height="300" alt="Screenshot 2025-11-06 at 22 08 21"
src="https://github.com/user-attachments/assets/4d5a7a33-d31c-4514-91c8-2b2a2ff43e0e"
/>

**Code example**
```html
<p>some text <b>bold text</b></p>
<p>some text <strong>strong text</strong></p>
<p>some text <i>italic text</i></p>
<p>some text <em>emphasized text</em></p>
<p>some text <del>delete text</del></p>
<p>some text <ins>insert text</ins></p>

<p>Some text <strong>strong text</strong> more text <b>bold text</b> more text <i>italic text</i> more text <em>emphasized text</em> more text <del>deleted text</del> more text <ins>inserted text</ins></p>

<p><a href="https://example.com">Link Text</a></p>

<p style="text-decoration: underline;">text styled from style attribute</p>
```

cc @bennetbo 

**TODO**
- [x] add tests for styling nested text that should result in one merge

Release Notes:

- Markdown Preview: Added support for `HTML` styling elements

---------

Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de>

Change summary

crates/markdown_preview/src/markdown_elements.rs |   6 
crates/markdown_preview/src/markdown_parser.rs   | 236 +++++++++++++++++
2 files changed, 229 insertions(+), 13 deletions(-)

Detailed changes

crates/markdown_preview/src/markdown_elements.rs 🔗

@@ -222,6 +222,10 @@ impl MarkdownHighlight {
                     });
                 }
 
+                if style.oblique {
+                    highlight.font_style = Some(FontStyle::Oblique)
+                }
+
                 Some(highlight)
             }
 
@@ -243,6 +247,8 @@ pub struct MarkdownHighlightStyle {
     pub weight: FontWeight,
     /// Whether the text should be stylized as link.
     pub link: bool,
+    // Whether the text should be obliqued.
+    pub oblique: bool,
 }
 
 /// A parsed region in a Markdown document.

crates/markdown_preview/src/markdown_parser.rs 🔗

@@ -12,6 +12,7 @@ use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
 use std::{
     cell::RefCell, collections::HashMap, mem, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec,
 };
+use ui::SharedString;
 
 pub async fn parse_markdown(
     markdown_input: &str,
@@ -876,13 +877,21 @@ impl<'a> MarkdownParser<'a> {
             }
             markup5ever_rcdom::NodeData::Comment { .. } => {}
             markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
+                let mut styles = if let Some(styles) = Self::markdown_style_from_html_styles(
+                    Self::extract_styles_from_attributes(attrs),
+                ) {
+                    vec![MarkdownHighlight::Style(styles)]
+                } else {
+                    Vec::default()
+                };
+
                 if local_name!("img") == name.local {
                     if let Some(image) = self.extract_image(source_range, attrs) {
                         elements.push(ParsedMarkdownElement::Image(image));
                     }
                 } else if local_name!("p") == name.local {
                     let mut paragraph = MarkdownParagraph::new();
-                    self.parse_paragraph(source_range, node, &mut paragraph);
+                    self.parse_paragraph(source_range, node, &mut paragraph, &mut styles);
 
                     if !paragraph.is_empty() {
                         elements.push(ParsedMarkdownElement::Paragraph(paragraph));
@@ -897,7 +906,7 @@ impl<'a> MarkdownParser<'a> {
                         | local_name!("h6")
                 ) {
                     let mut paragraph = MarkdownParagraph::new();
-                    self.consume_paragraph(source_range.clone(), node, &mut paragraph);
+                    self.consume_paragraph(source_range.clone(), node, &mut paragraph, &mut styles);
 
                     if !paragraph.is_empty() {
                         elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
@@ -944,24 +953,90 @@ impl<'a> MarkdownParser<'a> {
         source_range: Range<usize>,
         node: &Rc<markup5ever_rcdom::Node>,
         paragraph: &mut MarkdownParagraph,
+        highlights: &mut Vec<MarkdownHighlight>,
     ) {
+        fn add_highlight_range(
+            text: &String,
+            start: usize,
+            highlights: Vec<MarkdownHighlight>,
+        ) -> Vec<(Range<usize>, MarkdownHighlight)> {
+            highlights
+                .into_iter()
+                .map(|style| (start..text.len(), style))
+                .collect()
+        }
+
         match &node.data {
             markup5ever_rcdom::NodeData::Text { contents } => {
-                paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
-                    source_range,
-                    regions: Vec::default(),
-                    region_ranges: Vec::default(),
-                    highlights: Vec::default(),
-                    contents: contents.borrow().to_string().into(),
-                }));
+                // append the text to the last chunk, so we can have a hacky version
+                // of inline text with highlighting
+                if let Some(text) = paragraph.iter_mut().last().and_then(|p| match p {
+                    MarkdownParagraphChunk::Text(text) => Some(text),
+                    _ => None,
+                }) {
+                    let mut new_text = text.contents.to_string();
+                    new_text.push_str(&contents.borrow());
+                    let highlights = add_highlight_range(
+                        &new_text,
+                        text.contents.len(),
+                        std::mem::take(highlights),
+                    );
+
+                    text.contents = SharedString::from(new_text);
+                    text.highlights.extend(highlights);
+                } else {
+                    let contents = contents.borrow().to_string();
+                    paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
+                        source_range,
+                        highlights: add_highlight_range(&contents, 0, std::mem::take(highlights)),
+                        regions: Vec::default(),
+                        contents: contents.into(),
+                        region_ranges: Vec::default(),
+                    }));
+                }
             }
             markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
                 if local_name!("img") == name.local {
                     if let Some(image) = self.extract_image(source_range, attrs) {
                         paragraph.push(MarkdownParagraphChunk::Image(image));
                     }
+                } else if local_name!("b") == name.local || local_name!("strong") == name.local {
+                    highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
+                        weight: FontWeight::BOLD,
+                        ..Default::default()
+                    }));
+
+                    self.consume_paragraph(source_range, node, paragraph, highlights);
+                } else if local_name!("i") == name.local {
+                    highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
+                        italic: true,
+                        ..Default::default()
+                    }));
+
+                    self.consume_paragraph(source_range, node, paragraph, highlights);
+                } else if local_name!("em") == name.local {
+                    highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
+                        oblique: true,
+                        ..Default::default()
+                    }));
+
+                    self.consume_paragraph(source_range, node, paragraph, highlights);
+                } else if local_name!("del") == name.local {
+                    highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
+                        strikethrough: true,
+                        ..Default::default()
+                    }));
+
+                    self.consume_paragraph(source_range, node, paragraph, highlights);
+                } else if local_name!("ins") == name.local {
+                    highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
+                        underline: true,
+                        ..Default::default()
+                    }));
+
+                    self.consume_paragraph(source_range, node, paragraph, highlights);
                 } else {
-                    self.consume_paragraph(source_range, node, paragraph);
+                    self.consume_paragraph(source_range, node, paragraph, highlights);
                 }
             }
             _ => {}
@@ -973,9 +1048,10 @@ impl<'a> MarkdownParser<'a> {
         source_range: Range<usize>,
         node: &Rc<markup5ever_rcdom::Node>,
         paragraph: &mut MarkdownParagraph,
+        highlights: &mut Vec<MarkdownHighlight>,
     ) {
         for node in node.children.borrow().iter() {
-            self.parse_paragraph(source_range.clone(), node, paragraph);
+            self.parse_paragraph(source_range.clone(), node, paragraph, highlights);
         }
     }
 
@@ -1020,7 +1096,7 @@ impl<'a> MarkdownParser<'a> {
                 }
 
                 let mut children = MarkdownParagraph::new();
-                self.consume_paragraph(source_range, node, &mut children);
+                self.consume_paragraph(source_range, node, &mut children, &mut Vec::new());
 
                 let is_header = matches!(name.local, local_name!("th"));
 
@@ -1084,6 +1160,58 @@ impl<'a> MarkdownParser<'a> {
         })
     }
 
+    fn markdown_style_from_html_styles(
+        styles: HashMap<String, String>,
+    ) -> Option<MarkdownHighlightStyle> {
+        let mut markdown_style = MarkdownHighlightStyle::default();
+
+        if let Some(text_decoration) = styles.get("text-decoration") {
+            match text_decoration.to_lowercase().as_str() {
+                "underline" => {
+                    markdown_style.underline = true;
+                }
+                "line-through" => {
+                    markdown_style.strikethrough = true;
+                }
+                _ => {}
+            }
+        }
+
+        if let Some(font_style) = styles.get("font-style") {
+            match font_style.to_lowercase().as_str() {
+                "italic" => {
+                    markdown_style.italic = true;
+                }
+                "oblique" => {
+                    markdown_style.oblique = true;
+                }
+                _ => {}
+            }
+        }
+
+        if let Some(font_weight) = styles.get("font-weight") {
+            match font_weight.to_lowercase().as_str() {
+                "bold" => {
+                    markdown_style.weight = FontWeight::BOLD;
+                }
+                "lighter" => {
+                    markdown_style.weight = FontWeight::THIN;
+                }
+                _ => {
+                    if let Some(weight) = font_weight.parse::<f32>().ok() {
+                        markdown_style.weight = FontWeight(weight);
+                    }
+                }
+            }
+        }
+
+        if markdown_style != MarkdownHighlightStyle::default() {
+            Some(markdown_style)
+        } else {
+            None
+        }
+    }
+
     fn extract_styles_from_attributes(
         attrs: &RefCell<Vec<html5ever::Attribute>>,
     ) -> HashMap<String, String> {
@@ -1241,7 +1369,12 @@ impl<'a> MarkdownParser<'a> {
                 markup5ever_rcdom::NodeData::Element { name, .. } => {
                     if local_name!("caption") == name.local {
                         let mut paragraph = MarkdownParagraph::new();
-                        self.parse_paragraph(source_range.clone(), node, &mut paragraph);
+                        self.parse_paragraph(
+                            source_range.clone(),
+                            node,
+                            &mut paragraph,
+                            &mut Vec::new(),
+                        );
                         caption = Some(paragraph);
                     }
                     if local_name!("thead") == name.local {
@@ -1408,6 +1541,83 @@ mod tests {
         );
     }
 
+    #[gpui::test]
+    async fn test_html_inline_style_elements() {
+        let parsed =
+                parse("<p>Some text <strong>strong text</strong> more text <b>bold text</b> more text <i>italic text</i> more text <em>emphasized text</em> more text <del>deleted text</del> more text <ins>inserted text</ins></p>").await;
+
+        assert_eq!(1, parsed.children.len());
+        let chunks = if let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] {
+            chunks
+        } else {
+            panic!("Expected a paragraph");
+        };
+
+        assert_eq!(1, chunks.len());
+        let text = if let MarkdownParagraphChunk::Text(text) = &chunks[0] {
+            text
+        } else {
+            panic!("Expected a paragraph");
+        };
+
+        assert_eq!(0..205, text.source_range);
+        assert_eq!(
+            "Some text strong text more text bold text more text italic text more text emphasized text more text deleted text more text inserted text",
+            text.contents.as_str(),
+        );
+        assert_eq!(
+            vec![
+                (
+                    10..21,
+                    MarkdownHighlight::Style(MarkdownHighlightStyle {
+                        weight: FontWeight(700.0),
+                        ..Default::default()
+                    },),
+                ),
+                (
+                    32..41,
+                    MarkdownHighlight::Style(MarkdownHighlightStyle {
+                        weight: FontWeight(700.0),
+                        ..Default::default()
+                    },),
+                ),
+                (
+                    52..63,
+                    MarkdownHighlight::Style(MarkdownHighlightStyle {
+                        italic: true,
+                        weight: FontWeight(400.0),
+                        ..Default::default()
+                    },),
+                ),
+                (
+                    74..89,
+                    MarkdownHighlight::Style(MarkdownHighlightStyle {
+                        weight: FontWeight(400.0),
+                        oblique: true,
+                        ..Default::default()
+                    },),
+                ),
+                (
+                    100..112,
+                    MarkdownHighlight::Style(MarkdownHighlightStyle {
+                        strikethrough: true,
+                        weight: FontWeight(400.0),
+                        ..Default::default()
+                    },),
+                ),
+                (
+                    123..136,
+                    MarkdownHighlight::Style(MarkdownHighlightStyle {
+                        underline: true,
+                        weight: FontWeight(400.0,),
+                        ..Default::default()
+                    },),
+                ),
+            ],
+            text.highlights
+        );
+    }
+
     #[gpui::test]
     async fn test_text_with_inline_html() {
         let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;