From 9591790d8df9d97f0fa2aa9901113bc9bdc560be Mon Sep 17 00:00:00 2001 From: Remco Smits Date: Fri, 7 Nov 2025 17:12:27 +0100 Subject: [PATCH] markdown: Add support for `HTML` styling attributes (#42143) Second take on https://github.com/zed-industries/zed/pull/37765. This PR adds support for styling elements (**b**, **strong**, **em**, **i**, **ins**, **del**), but also allow you to show the styling text inline with the current text. This is done by appending all the up-following text into one text chunk and merge the highlights from both of them into the already existing chunk. If there does not exist a text chunk, we will create one and the next iteration we will use that one to store all the information on. **Before** Screenshot 2025-11-06 at 22 08 09 **After** Screenshot 2025-11-06 at 22 08 21 **Code example** ```html

some text bold text

some text strong text

some text italic text

some text emphasized text

some text delete text

some text insert text

Some text strong text more text bold text more text italic text more text emphasized text more text deleted text more text inserted text

Link Text

text styled from style attribute

``` cc @bennetbo **TODO** - [x] add tests for styling nested text that should result in one merge Release Notes: - Markdown Preview: Added support for `HTML` styling elements --------- Co-authored-by: Bennet Bo Fenner --- .../markdown_preview/src/markdown_elements.rs | 6 + .../markdown_preview/src/markdown_parser.rs | 236 +++++++++++++++++- 2 files changed, 229 insertions(+), 13 deletions(-) diff --git a/crates/markdown_preview/src/markdown_elements.rs b/crates/markdown_preview/src/markdown_elements.rs index 9ed885f8765dd58a929b6125d6e7064278dab3bc..0a5e138e432cc66ddb0cb2a7231cffd2fd54a074 100644 --- a/crates/markdown_preview/src/markdown_elements.rs +++ b/crates/markdown_preview/src/markdown_elements.rs @@ -222,6 +222,10 @@ impl MarkdownHighlight { }); } + if style.oblique { + highlight.font_style = Some(FontStyle::Oblique) + } + Some(highlight) } @@ -243,6 +247,8 @@ pub struct MarkdownHighlightStyle { pub weight: FontWeight, /// Whether the text should be stylized as link. pub link: bool, + // Whether the text should be obliqued. + pub oblique: bool, } /// A parsed region in a Markdown document. diff --git a/crates/markdown_preview/src/markdown_parser.rs b/crates/markdown_preview/src/markdown_parser.rs index b2e68b4e1495515d2b37ca310ff3ec9b3502c2cc..e76f5182b047c9079750aa2eab53d83a48e139e6 100644 --- a/crates/markdown_preview/src/markdown_parser.rs +++ b/crates/markdown_preview/src/markdown_parser.rs @@ -12,6 +12,7 @@ use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd}; use std::{ cell::RefCell, collections::HashMap, mem, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec, }; +use ui::SharedString; pub async fn parse_markdown( markdown_input: &str, @@ -876,13 +877,21 @@ impl<'a> MarkdownParser<'a> { } markup5ever_rcdom::NodeData::Comment { .. } => {} markup5ever_rcdom::NodeData::Element { name, attrs, .. } => { + let mut styles = if let Some(styles) = Self::markdown_style_from_html_styles( + Self::extract_styles_from_attributes(attrs), + ) { + vec![MarkdownHighlight::Style(styles)] + } else { + Vec::default() + }; + if local_name!("img") == name.local { if let Some(image) = self.extract_image(source_range, attrs) { elements.push(ParsedMarkdownElement::Image(image)); } } else if local_name!("p") == name.local { let mut paragraph = MarkdownParagraph::new(); - self.parse_paragraph(source_range, node, &mut paragraph); + self.parse_paragraph(source_range, node, &mut paragraph, &mut styles); if !paragraph.is_empty() { elements.push(ParsedMarkdownElement::Paragraph(paragraph)); @@ -897,7 +906,7 @@ impl<'a> MarkdownParser<'a> { | local_name!("h6") ) { let mut paragraph = MarkdownParagraph::new(); - self.consume_paragraph(source_range.clone(), node, &mut paragraph); + self.consume_paragraph(source_range.clone(), node, &mut paragraph, &mut styles); if !paragraph.is_empty() { elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading { @@ -944,24 +953,90 @@ impl<'a> MarkdownParser<'a> { source_range: Range, node: &Rc, paragraph: &mut MarkdownParagraph, + highlights: &mut Vec, ) { + fn add_highlight_range( + text: &String, + start: usize, + highlights: Vec, + ) -> Vec<(Range, MarkdownHighlight)> { + highlights + .into_iter() + .map(|style| (start..text.len(), style)) + .collect() + } + match &node.data { markup5ever_rcdom::NodeData::Text { contents } => { - paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText { - source_range, - regions: Vec::default(), - region_ranges: Vec::default(), - highlights: Vec::default(), - contents: contents.borrow().to_string().into(), - })); + // append the text to the last chunk, so we can have a hacky version + // of inline text with highlighting + if let Some(text) = paragraph.iter_mut().last().and_then(|p| match p { + MarkdownParagraphChunk::Text(text) => Some(text), + _ => None, + }) { + let mut new_text = text.contents.to_string(); + new_text.push_str(&contents.borrow()); + let highlights = add_highlight_range( + &new_text, + text.contents.len(), + std::mem::take(highlights), + ); + + text.contents = SharedString::from(new_text); + text.highlights.extend(highlights); + } else { + let contents = contents.borrow().to_string(); + paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText { + source_range, + highlights: add_highlight_range(&contents, 0, std::mem::take(highlights)), + regions: Vec::default(), + contents: contents.into(), + region_ranges: Vec::default(), + })); + } } markup5ever_rcdom::NodeData::Element { name, attrs, .. } => { if local_name!("img") == name.local { if let Some(image) = self.extract_image(source_range, attrs) { paragraph.push(MarkdownParagraphChunk::Image(image)); } + } else if local_name!("b") == name.local || local_name!("strong") == name.local { + highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle { + weight: FontWeight::BOLD, + ..Default::default() + })); + + self.consume_paragraph(source_range, node, paragraph, highlights); + } else if local_name!("i") == name.local { + highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle { + italic: true, + ..Default::default() + })); + + self.consume_paragraph(source_range, node, paragraph, highlights); + } else if local_name!("em") == name.local { + highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle { + oblique: true, + ..Default::default() + })); + + self.consume_paragraph(source_range, node, paragraph, highlights); + } else if local_name!("del") == name.local { + highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle { + strikethrough: true, + ..Default::default() + })); + + self.consume_paragraph(source_range, node, paragraph, highlights); + } else if local_name!("ins") == name.local { + highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle { + underline: true, + ..Default::default() + })); + + self.consume_paragraph(source_range, node, paragraph, highlights); } else { - self.consume_paragraph(source_range, node, paragraph); + self.consume_paragraph(source_range, node, paragraph, highlights); } } _ => {} @@ -973,9 +1048,10 @@ impl<'a> MarkdownParser<'a> { source_range: Range, node: &Rc, paragraph: &mut MarkdownParagraph, + highlights: &mut Vec, ) { for node in node.children.borrow().iter() { - self.parse_paragraph(source_range.clone(), node, paragraph); + self.parse_paragraph(source_range.clone(), node, paragraph, highlights); } } @@ -1020,7 +1096,7 @@ impl<'a> MarkdownParser<'a> { } let mut children = MarkdownParagraph::new(); - self.consume_paragraph(source_range, node, &mut children); + self.consume_paragraph(source_range, node, &mut children, &mut Vec::new()); let is_header = matches!(name.local, local_name!("th")); @@ -1084,6 +1160,58 @@ impl<'a> MarkdownParser<'a> { }) } + fn markdown_style_from_html_styles( + styles: HashMap, + ) -> Option { + let mut markdown_style = MarkdownHighlightStyle::default(); + + if let Some(text_decoration) = styles.get("text-decoration") { + match text_decoration.to_lowercase().as_str() { + "underline" => { + markdown_style.underline = true; + } + "line-through" => { + markdown_style.strikethrough = true; + } + _ => {} + } + } + + if let Some(font_style) = styles.get("font-style") { + match font_style.to_lowercase().as_str() { + "italic" => { + markdown_style.italic = true; + } + "oblique" => { + markdown_style.oblique = true; + } + _ => {} + } + } + + if let Some(font_weight) = styles.get("font-weight") { + match font_weight.to_lowercase().as_str() { + "bold" => { + markdown_style.weight = FontWeight::BOLD; + } + "lighter" => { + markdown_style.weight = FontWeight::THIN; + } + _ => { + if let Some(weight) = font_weight.parse::().ok() { + markdown_style.weight = FontWeight(weight); + } + } + } + } + + if markdown_style != MarkdownHighlightStyle::default() { + Some(markdown_style) + } else { + None + } + } + fn extract_styles_from_attributes( attrs: &RefCell>, ) -> HashMap { @@ -1241,7 +1369,12 @@ impl<'a> MarkdownParser<'a> { markup5ever_rcdom::NodeData::Element { name, .. } => { if local_name!("caption") == name.local { let mut paragraph = MarkdownParagraph::new(); - self.parse_paragraph(source_range.clone(), node, &mut paragraph); + self.parse_paragraph( + source_range.clone(), + node, + &mut paragraph, + &mut Vec::new(), + ); caption = Some(paragraph); } if local_name!("thead") == name.local { @@ -1408,6 +1541,83 @@ mod tests { ); } + #[gpui::test] + async fn test_html_inline_style_elements() { + let parsed = + parse("

Some text strong text more text bold text more text italic text more text emphasized text more text deleted text more text inserted text

").await; + + assert_eq!(1, parsed.children.len()); + let chunks = if let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] { + chunks + } else { + panic!("Expected a paragraph"); + }; + + assert_eq!(1, chunks.len()); + let text = if let MarkdownParagraphChunk::Text(text) = &chunks[0] { + text + } else { + panic!("Expected a paragraph"); + }; + + assert_eq!(0..205, text.source_range); + assert_eq!( + "Some text strong text more text bold text more text italic text more text emphasized text more text deleted text more text inserted text", + text.contents.as_str(), + ); + assert_eq!( + vec![ + ( + 10..21, + MarkdownHighlight::Style(MarkdownHighlightStyle { + weight: FontWeight(700.0), + ..Default::default() + },), + ), + ( + 32..41, + MarkdownHighlight::Style(MarkdownHighlightStyle { + weight: FontWeight(700.0), + ..Default::default() + },), + ), + ( + 52..63, + MarkdownHighlight::Style(MarkdownHighlightStyle { + italic: true, + weight: FontWeight(400.0), + ..Default::default() + },), + ), + ( + 74..89, + MarkdownHighlight::Style(MarkdownHighlightStyle { + weight: FontWeight(400.0), + oblique: true, + ..Default::default() + },), + ), + ( + 100..112, + MarkdownHighlight::Style(MarkdownHighlightStyle { + strikethrough: true, + weight: FontWeight(400.0), + ..Default::default() + },), + ), + ( + 123..136, + MarkdownHighlight::Style(MarkdownHighlightStyle { + underline: true, + weight: FontWeight(400.0,), + ..Default::default() + },), + ), + ], + text.highlights + ); + } + #[gpui::test] async fn test_text_with_inline_html() { let parsed = parse("This is a paragraph with an inline HTML tag.").await;