diff --git a/crates/markdown_preview/src/markdown_minifier.rs b/crates/markdown_preview/src/markdown_minifier.rs new file mode 100644 index 0000000000000000000000000000000000000000..a7d5ad0be0d9e65617bb45c66eb9748123d43067 --- /dev/null +++ b/crates/markdown_preview/src/markdown_minifier.rs @@ -0,0 +1,829 @@ +use html5ever::{ + Attribute, ParseOpts, QualName, parse_document, + tendril::{Tendril, TendrilSink, fmt::UTF8}, +}; +use markup5ever_rcdom::{Node, NodeData, RcDom}; +use std::{cell::RefCell, io, rc::Rc, str}; + +#[derive(Default)] +pub(crate) struct MinifierOptions { + pub omit_doctype: bool, + pub preserve_comments: bool, + pub collapse_whitespace: bool, +} + +pub(crate) struct Minifier<'a, W: io::Write> { + w: &'a mut W, + options: MinifierOptions, + preceding_whitespace: bool, +} + +impl<'a, W> Minifier<'a, W> +where + W: io::Write, +{ + /// Creates a new `Minifier` instance. + #[inline] + pub fn new(w: &'a mut W, options: MinifierOptions) -> Self { + Self { + w, + options, + preceding_whitespace: false, + } + } + + /// Minifies the given reader input. + /// + /// # Errors + /// + /// Will return `Err` if unable to write to the output writer. + #[inline] + pub fn minify(&mut self, mut r: &mut R) -> io::Result<()> { + let dom = parse_document(RcDom::default(), ParseOpts::default()) + .from_utf8() + .read_from(&mut r)?; + + if !self.options.omit_doctype { + self.w.write_all(b"")?; + } + + self.minify_node(&None, &dom.document) + } + + fn minify_node<'b>(&mut self, ctx: &'b Option, node: &'b Node) -> io::Result<()> { + match &node.data { + NodeData::Text { contents } => { + // Check if whitespace collapsing disabled + let contents = contents.borrow(); + let contents = contents.as_ref(); + + if !self.options.collapse_whitespace { + return self.w.write_all(contents.as_bytes()); + } + + // Check if parent is whitespace preserving element or contains code ( ", + "", + true, + false, + ), + ( + " ", + "", + true, + false, + ), + ("

A", "

A", true, false), + ("

A", "

A", true, false), + // Retain whitespace, whitespace before

+ ( + "

A ", + "

A ", + false, + false, + ), + // Retain whitespace, touching

+ ("

A", "

A", false, false), + // Comments ignored + ("

A", "

A", false, false), + // Comments preserved + ( + "

A", + "

A", + false, + true, + ), + // Retain end tag if touching inline element + ( + "

Some text

", + "

Some text

", + false, + false, + ), + ] { + let mut w = Vec::new(); + let mut minifier = Minifier::new( + &mut w, + MinifierOptions { + omit_doctype: true, + preserve_comments, + collapse_whitespace, + }, + ); + minifier.minify(&mut input.as_bytes()).unwrap(); + + let s = str::from_utf8(&w).unwrap(); + + assert_eq!(expected, s); + } + } +} diff --git a/crates/markdown_preview/src/markdown_parser.rs b/crates/markdown_preview/src/markdown_parser.rs index d995f07f3d38e18938c854ad5a73b3afe52e5977..86bf7e94b4f8be6b7724b9e95b2583127861fff3 100644 --- a/crates/markdown_preview/src/markdown_parser.rs +++ b/crates/markdown_preview/src/markdown_parser.rs @@ -1,4 +1,7 @@ -use crate::markdown_elements::*; +use crate::{ + markdown_elements::*, + markdown_minifier::{Minifier, MinifierOptions}, +}; use async_recursion::async_recursion; use collections::FxHashMap; use gpui::{DefiniteLength, FontWeight, px, relative}; @@ -28,6 +31,24 @@ pub async fn parse_markdown( } } +fn cleanup_html(source: &str) -> Vec { + let mut writer = std::io::Cursor::new(Vec::new()); + let mut reader = std::io::Cursor::new(source); + let mut minify = Minifier::new( + &mut writer, + MinifierOptions { + omit_doctype: true, + collapse_whitespace: true, + ..Default::default() + }, + ); + if let Ok(()) = minify.minify(&mut reader) { + writer.into_inner() + } else { + source.bytes().collect() + } +} + struct MarkdownParser<'a> { tokens: Vec<(Event<'a>, Range)>, /// The current index in the tokens array @@ -764,6 +785,10 @@ impl<'a> MarkdownParser<'a> { return elements; }; + let mut html_source_range_start = None; + let mut html_source_range_end = None; + let mut html_buffer = String::new(); + while !self.eof() { let Some((current, source_range)) = self.current() else { break; @@ -771,19 +796,10 @@ impl<'a> MarkdownParser<'a> { let source_range = source_range.clone(); match current { Event::Html(html) => { - let mut cursor = std::io::Cursor::new(html.as_bytes()); - let Some(dom) = parse_document(RcDom::default(), ParseOpts::default()) - .from_utf8() - .read_from(&mut cursor) - .ok() - else { - self.cursor += 1; - continue; - }; - + html_source_range_start.get_or_insert(source_range.start); + html_source_range_end = Some(source_range.end); + html_buffer.push_str(html); self.cursor += 1; - - self.parse_html_node(source_range, &dom.document, &mut elements); } Event::End(TagEnd::CodeBlock) => { self.cursor += 1; @@ -795,6 +811,17 @@ impl<'a> MarkdownParser<'a> { } } + let bytes = cleanup_html(&html_buffer); + + let mut cursor = std::io::Cursor::new(bytes); + if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default()) + .from_utf8() + .read_from(&mut cursor) + && let Some((start, end)) = html_source_range_start.zip(html_source_range_end) + { + self.parse_html_node(start..end, &dom.document, &mut elements); + } + elements } @@ -853,6 +880,10 @@ impl<'a> MarkdownParser<'a> { contents: paragraph, })); } + } else if local_name!("table") == name.local { + if let Some(table) = self.extract_html_table(node, source_range) { + elements.push(ParsedMarkdownElement::Table(table)); + } } else { self.consume_children(source_range, node, elements); } @@ -971,6 +1002,55 @@ impl<'a> MarkdownParser<'a> { Some(image) } + fn extract_html_table( + &self, + node: &Rc, + source_range: Range, + ) -> Option { + let mut header_columns = Vec::new(); + let mut body_rows = Vec::new(); + + // node should be a thead or tbody element + for node in node.children.borrow().iter() { + match &node.data { + markup5ever_rcdom::NodeData::Element { name, .. } => { + if local_name!("thead") == name.local { + // node should be a tr element + for node in node.children.borrow().iter() { + let mut paragraph = MarkdownParagraph::new(); + self.consume_paragraph(source_range.clone(), node, &mut paragraph); + + for paragraph in paragraph.into_iter() { + header_columns.push(vec![paragraph]); + } + } + } else if local_name!("tbody") == name.local { + // node should be a tr element + for node in node.children.borrow().iter() { + let mut row = MarkdownParagraph::new(); + self.consume_paragraph(source_range.clone(), node, &mut row); + body_rows.push(ParsedMarkdownTableRow::with_children( + row.into_iter().map(|column| vec![column]).collect(), + )); + } + } + } + _ => {} + } + } + + if !header_columns.is_empty() || !body_rows.is_empty() { + Some(ParsedMarkdownTable { + source_range, + body: body_rows, + column_alignments: Vec::default(), + header: ParsedMarkdownTableRow::with_children(header_columns), + }) + } else { + None + } + } + /// Parses the width/height attribute value of an html element (e.g. img element) fn parse_length(value: &str) -> Option { if value.ends_with("%") { @@ -1330,6 +1410,104 @@ mod tests { ); } + #[gpui::test] + async fn test_html_table() { + let parsed = parse( + " + + + + + + + + + + + + + + + + +
IdName
1Chris
2Dennis
", + ) + .await; + + assert_eq!( + ParsedMarkdown { + children: vec![ParsedMarkdownElement::Table(table( + 0..366, + row(vec![text("Id", 0..366), text("Name ", 0..366)]), + vec![ + row(vec![text("1", 0..366), text("Chris", 0..366)]), + row(vec![text("2", 0..366), text("Dennis", 0..366)]), + ], + ))], + }, + parsed + ); + } + + #[gpui::test] + async fn test_html_table_without_headings() { + let parsed = parse( + " + + + + + + + + + + +
1Chris
2Dennis
", + ) + .await; + + assert_eq!( + ParsedMarkdown { + children: vec![ParsedMarkdownElement::Table(table( + 0..240, + row(vec![]), + vec![ + row(vec![text("1", 0..240), text("Chris", 0..240)]), + row(vec![text("2", 0..240), text("Dennis", 0..240)]), + ], + ))], + }, + parsed + ); + } + + #[gpui::test] + async fn test_html_table_without_body() { + let parsed = parse( + " + + + + + + +
IdName
", + ) + .await; + + assert_eq!( + ParsedMarkdown { + children: vec![ParsedMarkdownElement::Table(table( + 0..150, + row(vec![text("Id", 0..150), text("Name", 0..150)]), + vec![], + ))], + }, + parsed + ); + } + #[gpui::test] async fn test_html_heading_tags() { let parsed = parse("

Heading

Heading

Heading

Heading

Heading
Heading
").await; diff --git a/crates/markdown_preview/src/markdown_preview.rs b/crates/markdown_preview/src/markdown_preview.rs index 91c0005097d778d4b60f7a8a721ed898f0059ed1..77bad89a629cbb1f660e1cd16158d4dbca03361e 100644 --- a/crates/markdown_preview/src/markdown_preview.rs +++ b/crates/markdown_preview/src/markdown_preview.rs @@ -2,6 +2,7 @@ use gpui::{App, actions}; use workspace::Workspace; pub mod markdown_elements; +mod markdown_minifier; pub mod markdown_parser; pub mod markdown_preview_view; pub mod markdown_renderer; diff --git a/crates/markdown_preview/src/markdown_renderer.rs b/crates/markdown_preview/src/markdown_renderer.rs index 4ac08df0930e9cc523d1e277abf4517946be5368..a873771e001f594149acc83ea46ce45608a9ed87 100644 --- a/crates/markdown_preview/src/markdown_renderer.rs +++ b/crates/markdown_preview/src/markdown_renderer.rs @@ -475,6 +475,10 @@ fn render_markdown_table(parsed: &ParsedMarkdownTable, cx: &mut RenderContext) - for (index, cell) in row.children.iter().enumerate() { let length = paragraph_len(cell); + if index >= max_lengths.len() { + max_lengths.resize(index + 1, length); + } + if length > max_lengths[index] { max_lengths[index] = length; } @@ -523,7 +527,7 @@ fn render_markdown_table_row( is_header: bool, cx: &mut RenderContext, ) -> AnyElement { - let mut items = vec![]; + let mut items = Vec::with_capacity(parsed.children.len()); let count = parsed.children.len(); for (index, cell) in parsed.children.iter().enumerate() { @@ -652,7 +656,7 @@ fn render_markdown_paragraph(parsed: &MarkdownParagraph, cx: &mut RenderContext) } fn render_markdown_text(parsed_new: &MarkdownParagraph, cx: &mut RenderContext) -> Vec { - let mut any_element = vec![]; + let mut any_element = Vec::with_capacity(parsed_new.len()); // these values are cloned in-order satisfy borrow checker let syntax_theme = cx.syntax_theme.clone(); let workspace_clone = cx.workspace.clone();