markdown: Add HTML `table` element support (#38605)

Remco Smits created

Follow-up: https://github.com/zed-industries/zed/pull/38590

**Note**: this PR contains changes from the [previous
PR](https://github.com/zed-industries/zed/pull/38590), when that PR gets
merged we should see the real changes.
This PR fixes 4 things in order to make:

1. Add html/markdown minifier to remove all the **\t** and **\n**
characters. This is needed as you cannot create new lines with markdown
by just adding an enter to the source file.
2. The event Event::HTML only contained a chunk of the real html for
multiline HTML code. I fixed this by storing the currently watched HTML
inside a buffer and at the end we parse it into the right elements.
Instead of trying to parse a chunck into multiple elements which would
always fail before.
3. Add support for html tables.
4. Fixed panic that occured when table does not have an header.

I also decided to keep the html minifier inside Zed, because making it a
dependency for just a few 100 lines seems to be an overkill. The
original crate had a few cve in their dependencies, so figured this
would be the best.

**Html table support**
<img width="1439" height="801" alt="Screenshot 2025-09-27 at 12 19 07"
src="https://github.com/user-attachments/assets/a884cc6f-cf47-45a2-81fa-91300c7bbf3f"
/>

**Before & after Zed's README (no changes)**
<img width="3440" height="1378" alt="Screenshot 2025-09-27 at 12 34 47"
src="https://github.com/user-attachments/assets/1273b094-fb24-4abd-bffa-56ef3b44670c"
/>

Release Notes:

- Markdown: Added support for html tables

Change summary

crates/markdown_preview/src/markdown_minifier.rs | 829 ++++++++++++++++++
crates/markdown_preview/src/markdown_parser.rs   | 204 ++++
crates/markdown_preview/src/markdown_preview.rs  |   1 
crates/markdown_preview/src/markdown_renderer.rs |   8 
4 files changed, 1,027 insertions(+), 15 deletions(-)

Detailed changes

crates/markdown_preview/src/markdown_minifier.rs 🔗

@@ -0,0 +1,829 @@
+use html5ever::{
+    Attribute, ParseOpts, QualName, parse_document,
+    tendril::{Tendril, TendrilSink, fmt::UTF8},
+};
+use markup5ever_rcdom::{Node, NodeData, RcDom};
+use std::{cell::RefCell, io, rc::Rc, str};
+
+#[derive(Default)]
+pub(crate) struct MinifierOptions {
+    pub omit_doctype: bool,
+    pub preserve_comments: bool,
+    pub collapse_whitespace: bool,
+}
+
+pub(crate) struct Minifier<'a, W: io::Write> {
+    w: &'a mut W,
+    options: MinifierOptions,
+    preceding_whitespace: bool,
+}
+
+impl<'a, W> Minifier<'a, W>
+where
+    W: io::Write,
+{
+    /// Creates a new `Minifier` instance.
+    #[inline]
+    pub fn new(w: &'a mut W, options: MinifierOptions) -> Self {
+        Self {
+            w,
+            options,
+            preceding_whitespace: false,
+        }
+    }
+
+    /// Minifies the given reader input.
+    ///
+    /// # Errors
+    ///
+    /// Will return `Err` if unable to write to the output writer.
+    #[inline]
+    pub fn minify<R: io::Read>(&mut self, mut r: &mut R) -> io::Result<()> {
+        let dom = parse_document(RcDom::default(), ParseOpts::default())
+            .from_utf8()
+            .read_from(&mut r)?;
+
+        if !self.options.omit_doctype {
+            self.w.write_all(b"<!doctype html>")?;
+        }
+
+        self.minify_node(&None, &dom.document)
+    }
+
+    fn minify_node<'b>(&mut self, ctx: &'b Option<Context>, node: &'b Node) -> io::Result<()> {
+        match &node.data {
+            NodeData::Text { contents } => {
+                // Check if whitespace collapsing disabled
+                let contents = contents.borrow();
+                let contents = contents.as_ref();
+
+                if !self.options.collapse_whitespace {
+                    return self.w.write_all(contents.as_bytes());
+                }
+
+                // Check if parent is whitespace preserving element or contains code (<script>, <style>)
+                let (skip_collapse_whitespace, contains_code) =
+                    ctx.as_ref().map_or((false, false), |ctx| {
+                        if let NodeData::Element { name, .. } = &ctx.parent.data {
+                            let name = name.local.as_ref();
+
+                            (preserve_whitespace(name), contains_code(name))
+                        } else {
+                            (false, false)
+                        }
+                    });
+
+                if skip_collapse_whitespace {
+                    return self.w.write_all(contents.as_bytes());
+                }
+
+                if contains_code {
+                    return self
+                        .w
+                        .write_all(contents.trim_matches(is_ascii_whitespace).as_bytes());
+                }
+
+                // Early exit if empty to forego expensive trim logic
+                if contents.is_empty() {
+                    return io::Result::Ok(());
+                }
+
+                let (trim_left, trim_right) = ctx
+                    .as_ref()
+                    .map_or((true, true), |ctx| ctx.trim(self.preceding_whitespace));
+                let contents = match (trim_left, trim_right) {
+                    (true, true) => contents.trim_matches(is_ascii_whitespace),
+                    (true, false) => contents.trim_start_matches(is_ascii_whitespace),
+                    (false, true) => contents.trim_end_matches(is_ascii_whitespace),
+                    _ => contents,
+                };
+
+                // Second empty check after trimming whitespace
+                if !contents.is_empty() {
+                    // replace \n, \r to ' '
+                    let contents = contents
+                        .bytes()
+                        .map(|c| if matches!(c, b'\n' | b'\r') { b' ' } else { c })
+                        .collect::<Vec<u8>>();
+
+                    self.write_collapse_whitespace(&contents, reserved_entity, None)?;
+
+                    self.preceding_whitespace = !trim_right
+                        && contents
+                            .iter()
+                            .last()
+                            .map_or(false, u8::is_ascii_whitespace);
+                }
+
+                Ok(())
+            }
+
+            NodeData::Comment { contents } if self.options.preserve_comments => {
+                self.w.write_all(b"<!--")?;
+                self.w.write_all(contents.as_bytes())?;
+                self.w.write_all(b"-->")
+            }
+
+            NodeData::Document => self.minify_children(ctx, node),
+
+            NodeData::Element { name, attrs, .. } => {
+                let attrs = attrs.borrow();
+                let tag = name.local.as_ref();
+
+                if is_self_closing(tag) {
+                    return self.write_start_tag(name, &attrs);
+                }
+
+                let (omit_start_tag, omit_end_tag) =
+                    self.omit_tags(ctx, node, tag, attrs.is_empty());
+
+                if !omit_start_tag {
+                    self.write_start_tag(name, &attrs)?;
+                }
+
+                self.minify_children(ctx, node)?;
+
+                if !omit_end_tag {
+                    self.write_end_tag(name)?;
+                }
+
+                Ok(())
+            }
+
+            _ => Ok(()),
+        }
+    }
+
+    fn next_is_comment<'b, I>(&self, v: I) -> bool
+    where
+        I: IntoIterator<Item = &'b Rc<Node>>,
+    {
+        v.into_iter()
+            .find_map(|node| match &node.data {
+                NodeData::Text { contents } => {
+                    if self.options.collapse_whitespace && is_whitespace(contents) {
+                        // Blocks of whitespace are skipped
+                        None
+                    } else {
+                        Some(false)
+                    }
+                }
+                NodeData::Comment { .. } => Some(self.options.preserve_comments),
+                _ => Some(false),
+            })
+            .unwrap_or(false)
+    }
+
+    fn is_whitespace(&self, s: &RefCell<Tendril<UTF8>>) -> Option<bool> {
+        if self.options.collapse_whitespace && is_whitespace(s) {
+            None
+        } else {
+            Some(
+                !s.borrow()
+                    .as_bytes()
+                    .iter()
+                    .next()
+                    .map_or(false, u8::is_ascii_whitespace),
+            )
+        }
+    }
+
+    /// Determines if start and end tags can be omitted.
+    /// Whitespace rules are ignored if `collapse_whitespace` is enabled.
+    #[allow(clippy::too_many_lines)]
+    fn omit_tags(
+        &self,
+        ctx: &Option<Context>,
+        node: &Node,
+        name: &str,
+        empty_attributes: bool,
+    ) -> (bool, bool) {
+        ctx.as_ref().map_or((false, false), |ctx| match name {
+            "html" => {
+                // The end tag may be omitted if the <html> element is not immediately followed by a comment.
+                let omit_end = ctx.right.map_or(true, |right| !self.next_is_comment(right));
+                // The start tag may be omitted if the first thing inside the <html> element is not a comment.
+                let omit_start =
+                    empty_attributes && omit_end && !self.next_is_comment(&*node.children.borrow());
+
+                (omit_start, omit_end)
+            }
+            "head" => {
+                // The end tag may be omitted if the first thing following the <head> element is not a space character or a comment.
+                let omit_end = ctx.right.map_or(true, |right| {
+                    right
+                        .iter()
+                        .find_map(|node| match &node.data {
+                            NodeData::Text { contents } => self.is_whitespace(contents),
+                            NodeData::Comment { .. } => {
+                                if self.options.preserve_comments {
+                                    Some(false)
+                                } else {
+                                    None
+                                }
+                            }
+                            _ => Some(true),
+                        })
+                        .unwrap_or(true)
+                });
+                // The start tag may be omitted if the first thing inside the <head> element is an element.
+                let omit_start = empty_attributes
+                    && omit_end
+                    && node
+                        .children
+                        .borrow()
+                        .iter()
+                        .find_map(|node| match &node.data {
+                            NodeData::Text { contents } => self.is_whitespace(contents),
+                            NodeData::Element { .. } => Some(true),
+                            NodeData::Comment { .. } => {
+                                if self.options.preserve_comments {
+                                    Some(false)
+                                } else {
+                                    None
+                                }
+                            }
+                            _ => Some(false),
+                        })
+                        .unwrap_or(true);
+
+                (omit_start, omit_end)
+            }
+            "body" => {
+                // The start tag may be omitted if the first thing inside it is not a space character, comment, <script> element or <style> element.
+                let omit_start = empty_attributes
+                    && node
+                        .children
+                        .borrow()
+                        .iter()
+                        .find_map(|node| match &node.data {
+                            NodeData::Text { contents } => self.is_whitespace(contents),
+                            NodeData::Element { name, .. } => {
+                                Some(!matches!(name.local.as_ref(), "script" | "style"))
+                            }
+                            NodeData::Comment { .. } => {
+                                if self.options.preserve_comments {
+                                    Some(false)
+                                } else {
+                                    None
+                                }
+                            }
+                            _ => Some(true),
+                        })
+                        .unwrap_or(true);
+                // The end tag may be omitted if the <body> element has contents or has a start tag, and is not immediately followed by a comment.
+                let omit_end = ctx.right.map_or(true, |right| !self.next_is_comment(right));
+
+                (omit_start && omit_end, omit_end)
+            }
+            "p" => {
+                let omit_end = ctx.next_element().map_or(true, |node| {
+                    if let NodeData::Element { name, .. } = &node.data {
+                        matches!(
+                            name.local.as_ref().to_ascii_lowercase().as_str(),
+                            "address"
+                                | "article"
+                                | "aside"
+                                | "blockquote"
+                                | "div"
+                                | "dl"
+                                | "fieldset"
+                                | "footer"
+                                | "form"
+                                | "h1"
+                                | "h2"
+                                | "h3"
+                                | "h4"
+                                | "h5"
+                                | "h6"
+                                | "header"
+                                | "hr"
+                                | "menu"
+                                | "nav"
+                                | "ol"
+                                | "p"
+                                | "pre"
+                                | "section"
+                                | "table"
+                                | "ul"
+                        )
+                    } else {
+                        false
+                    }
+                });
+
+                (false, omit_end)
+            }
+            // TODO: comprehensive handling of optional end element rules
+            _ => (false, optional_end_tag(name)),
+        })
+    }
+
+    #[allow(clippy::needless_pass_by_value)]
+    fn minify_children(&mut self, ctx: &Option<Context>, node: &Node) -> io::Result<()> {
+        let children = node.children.borrow();
+        let l = children.len();
+
+        children.iter().enumerate().try_for_each(|(i, child)| {
+            if self.preceding_whitespace && is_block_element(child) {
+                self.preceding_whitespace = false;
+            }
+
+            self.minify_node(
+                &Some(Context {
+                    parent: node,
+                    parent_context: ctx.as_ref(),
+                    left: if i > 0 { Some(&children[..i]) } else { None },
+                    right: if i + 1 < l {
+                        Some(&children[i + 1..])
+                    } else {
+                        None
+                    },
+                }),
+                child,
+            )
+        })
+    }
+
+    fn write_qualified_name(&mut self, name: &QualName) -> io::Result<()> {
+        if let Some(prefix) = &name.prefix {
+            self.w
+                .write_all(prefix.as_ref().to_ascii_lowercase().as_bytes())?;
+            self.w.write_all(b":")?;
+        }
+
+        self.w
+            .write_all(name.local.as_ref().to_ascii_lowercase().as_bytes())
+    }
+
+    fn write_start_tag(&mut self, name: &QualName, attrs: &[Attribute]) -> io::Result<()> {
+        self.w.write_all(b"<")?;
+        self.write_qualified_name(name)?;
+
+        attrs
+            .iter()
+            .try_for_each(|attr| self.write_attribute(attr))?;
+
+        self.w.write_all(b">")
+    }
+
+    fn write_end_tag(&mut self, name: &QualName) -> io::Result<()> {
+        self.w.write_all(b"</")?;
+        self.write_qualified_name(name)?;
+        self.w.write_all(b">")
+    }
+
+    fn write_attribute(&mut self, attr: &Attribute) -> io::Result<()> {
+        self.w.write_all(b" ")?;
+        self.write_qualified_name(&attr.name)?;
+
+        let value = attr.value.as_ref();
+        let value = if self.options.collapse_whitespace {
+            value.trim_matches(is_ascii_whitespace)
+        } else {
+            value
+        };
+
+        if value.is_empty() {
+            return io::Result::Ok(());
+        }
+
+        self.w.write_all(b"=")?;
+
+        let b = value.as_bytes();
+        let (unquoted, double, _) =
+            b.iter()
+                .fold((true, false, false), |(unquoted, double, single), &c| {
+                    let (double, single) = (double || c == b'"', single || c == b'\'');
+                    let unquoted =
+                        unquoted && !double && !single && c != b'=' && !c.is_ascii_whitespace();
+
+                    (unquoted, double, single)
+                });
+
+        if unquoted {
+            self.w.write_all(b)
+        } else if double {
+            self.write_attribute_value(b, b"'", reserved_entity_with_apos)
+        } else {
+            self.write_attribute_value(b, b"\"", reserved_entity)
+        }
+    }
+
+    fn write_attribute_value<T: AsRef<[u8]>>(
+        &mut self,
+        v: T,
+        quote: &[u8],
+        f: EntityFn,
+    ) -> io::Result<()> {
+        self.w.write_all(quote)?;
+
+        let b = v.as_ref();
+
+        if self.options.collapse_whitespace {
+            self.write_collapse_whitespace(b, f, Some(false))
+        } else {
+            self.w.write_all(b)
+        }?;
+
+        self.w.write_all(quote)
+    }
+
+    /// Efficiently writes blocks of content, e.g. a string with no collapsed
+    /// whitespace would result in a single write.
+    fn write_collapse_whitespace(
+        &mut self,
+        b: &[u8],
+        f: EntityFn,
+        preceding_whitespace: Option<bool>,
+    ) -> io::Result<()> {
+        b.iter()
+            .enumerate()
+            .try_fold(
+                (0, preceding_whitespace.unwrap_or(self.preceding_whitespace)),
+                |(pos, preceding_whitespace), (i, &c)| {
+                    let is_whitespace = c.is_ascii_whitespace();
+
+                    Ok(if is_whitespace && preceding_whitespace {
+                        if i != pos {
+                            self.write(&b[pos..i], f)?;
+                        }
+
+                        // ASCII whitespace = 1 byte
+                        (i + 1, true)
+                    } else {
+                        (pos, is_whitespace)
+                    })
+                },
+            )
+            .and_then(|(pos, _)| {
+                if pos < b.len() {
+                    self.write(&b[pos..], f)?;
+                }
+
+                Ok(())
+            })
+    }
+
+    fn write(&mut self, b: &[u8], f: EntityFn) -> io::Result<()> {
+        b.iter()
+            .enumerate()
+            .try_fold(0, |pos, (i, &c)| {
+                Ok(if let Some(entity) = f(c) {
+                    self.w.write_all(&b[pos..i])?;
+                    self.w.write_all(entity)?;
+
+                    // Reserved characters are 1 byte
+                    i + 1
+                } else {
+                    pos
+                })
+            })
+            .and_then(|pos| {
+                if pos < b.len() {
+                    self.w.write_all(&b[pos..])?;
+                }
+
+                Ok(())
+            })
+    }
+}
+
+struct Context<'a> {
+    parent: &'a Node,
+    parent_context: Option<&'a Context<'a>>,
+    left: Option<&'a [Rc<Node>]>,
+    right: Option<&'a [Rc<Node>]>,
+}
+
+impl<'a> Context<'a> {
+    /// Determine whether to trim whitespace.
+    /// Uses naive HTML5 whitespace collapsing rules.
+    fn trim(&self, preceding_whitespace: bool) -> (bool, bool) {
+        (preceding_whitespace || self.trim_left(), self.trim_right())
+    }
+
+    fn trim_left(&self) -> bool {
+        self.left.map_or_else(
+            || is_block_element(self.parent) || self.parent_trim_left(),
+            |siblings| {
+                siblings
+                    .iter()
+                    .rev()
+                    .find_map(Self::is_block_element)
+                    .unwrap_or_else(|| self.parent_trim_left())
+            },
+        )
+    }
+
+    fn parent_trim_left(&self) -> bool {
+        self.parent_context.map_or(true, Context::trim_left)
+    }
+
+    fn trim_right(&self) -> bool {
+        self.right.map_or(true, |siblings| {
+            siblings
+                .iter()
+                .find_map(Self::is_block_element)
+                .unwrap_or(true)
+        })
+    }
+
+    fn next_element(&self) -> Option<&Rc<Node>> {
+        self.right.and_then(|siblings| {
+            siblings
+                .iter()
+                .find(|node| matches!(node.data, NodeData::Element { .. }))
+        })
+    }
+
+    fn is_block_element(node: &Rc<Node>) -> Option<bool> {
+        if let NodeData::Element { name, .. } = &node.data {
+            Some(is_block_element_name(name.local.as_ref()))
+        } else {
+            None
+        }
+    }
+}
+
+type EntityFn = fn(u8) -> Option<&'static [u8]>;
+
+const fn reserved_entity(v: u8) -> Option<&'static [u8]> {
+    match v {
+        b'<' => Some(b"&lt;"),
+        b'>' => Some(b"&gt;"),
+        b'&' => Some(b"&#38;"),
+        _ => None,
+    }
+}
+
+const fn reserved_entity_with_apos(v: u8) -> Option<&'static [u8]> {
+    if v == b'\'' {
+        Some(b"&#39;")
+    } else {
+        reserved_entity(v)
+    }
+}
+
+fn is_whitespace(s: &RefCell<Tendril<UTF8>>) -> bool {
+    s.borrow().as_bytes().iter().all(u8::is_ascii_whitespace)
+}
+
+fn is_block_element_name(name: &str) -> bool {
+    matches!(
+        name,
+        "address"
+            | "article"
+            | "aside"
+            | "blockquote"
+            | "body"
+            | "br"
+            | "details"
+            | "dialog"
+            | "dd"
+            | "div"
+            | "dl"
+            | "dt"
+            | "fieldset"
+            | "figcaption"
+            | "figure"
+            | "footer"
+            | "form"
+            | "h1"
+            | "h2"
+            | "h3"
+            | "h4"
+            | "h5"
+            | "h6"
+            | "head"
+            | "header"
+            | "hgroup"
+            | "hr"
+            | "html"
+            | "li"
+            | "link"
+            | "main"
+            | "meta"
+            | "nav"
+            | "ol"
+            | "option"
+            | "p"
+            | "pre"
+            | "script"
+            | "section"
+            | "source"
+            | "table"
+            | "td"
+            | "th"
+            | "title"
+            | "tr"
+            | "ul"
+    )
+}
+
+fn is_block_element(node: &Node) -> bool {
+    match &node.data {
+        NodeData::Element { name, .. } => is_block_element_name(name.local.as_ref()),
+        NodeData::Document => true,
+        _ => false,
+    }
+}
+
+#[allow(clippy::missing_const_for_fn)]
+fn is_ascii_whitespace(c: char) -> bool {
+    c.is_ascii_whitespace()
+}
+
+fn preserve_whitespace(name: &str) -> bool {
+    matches!(name, "pre" | "textarea")
+}
+
+fn contains_code(name: &str) -> bool {
+    matches!(name, "script" | "style")
+}
+
+fn is_self_closing(name: &str) -> bool {
+    matches!(
+        name,
+        "area"
+            | "base"
+            | "br"
+            | "col"
+            | "embed"
+            | "hr"
+            | "img"
+            | "input"
+            | "link"
+            | "meta"
+            | "param"
+            | "source"
+            | "track"
+            | "wbr"
+            | "command"
+            | "keygen"
+            | "menuitem"
+    )
+}
+
+fn optional_end_tag(name: &str) -> bool {
+    matches!(
+        name,
+        "basefont"
+            | "colgroup"
+            | "dd"
+            | "dt"
+            | "frame"
+            | "isindex"
+            | "li"
+            | "option"
+            | "p"
+            | "tbody"
+            | "td"
+            | "tfoot"
+            | "th"
+            | "thead"
+            | "tr"
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::str;
+
+    #[test]
+    fn test_write_collapse_whitespace() {
+        for &(input, expected, preceding_whitespace) in &[
+            ("", "", false),
+            ("  ", " ", false),
+            ("   ", " ", false),
+            ("   ", "", true),
+            (" x      y  ", " x y ", false),
+            (" x      y  ", "x y ", true),
+            (" x   \n  \t \n   y  ", " x y ", false),
+            (" x   \n  \t \n   y  ", "x y ", true),
+        ] {
+            let mut w = Vec::new();
+            let mut minifier = Minifier::new(&mut w, MinifierOptions::default());
+            minifier.preceding_whitespace = preceding_whitespace;
+            minifier
+                .write_collapse_whitespace(
+                    input.as_bytes(),
+                    reserved_entity,
+                    Some(preceding_whitespace),
+                )
+                .unwrap();
+
+            let s = str::from_utf8(&w).unwrap();
+
+            assert_eq!(expected, s);
+        }
+    }
+
+    #[test]
+    fn test_omit_tags() {
+        for &(input, expected, collapse_whitespace, preserve_comments) in &[
+            // <html>
+            ("<html>", "", true, false),
+            // Comments ignored
+            ("<html><!-- -->", "", true, false),
+            // Comments preserved
+            ("<html>     <!-- -->    ", "<html><!-- -->", true, true),
+            ("<html><!-- --></html>", "<html><!-- -->", true, true),
+            (
+                "<html><!-- --></html><!-- -->",
+                "<html><!-- --></html><!-- -->",
+                true,
+                true,
+            ),
+            (
+                "<html>    <!-- -->    </html>    <!-- -->    ",
+                "<html><!-- --></html><!-- -->",
+                true,
+                true,
+            ),
+            (
+                "<html>    <!-- -->    </html>    <!-- -->    ",
+                // <body> is implicitly added to the DOM
+                "<html><!-- --><body>        </html><!-- -->",
+                false,
+                true,
+            ),
+            // <head>
+            (
+                "<html>   <head>   <title>A</title>     </head>   <body><p>     B  </p> </body>",
+                "<title>A</title><p>B",
+                true,
+                false,
+            ),
+            (
+                "<html>   <head>   <title>A</title>     </head>   <body><p>     B  </p> </body>",
+                "<head>   <title>A</title>     </head>   <p>     B   ",
+                false,
+                false,
+            ),
+            (
+                "<html>   <head><!-- -->   <title>A</title>     </head>   <body><p>     B  </p> </body>",
+                "<head><!-- --><title>A</title><p>B",
+                true,
+                true,
+            ),
+            // <body>
+            ("<body>", "", true, false),
+            (
+                "<body>    <script>let x = 1;</script>   ",
+                "<body><script>let x = 1;</script>",
+                true,
+                false,
+            ),
+            (
+                "<body>        <style>body{margin:1em}</style>",
+                "<body><style>body{margin:1em}</style>",
+                true,
+                false,
+            ),
+            ("<body>    <p>A", "<p>A", true, false),
+            ("<body id=main>    <p>A", "<body id=main><p>A", true, false),
+            // Retain whitespace, whitespace before <p>
+            (
+                "    <body>    <p>A      ",
+                "<body>    <p>A      ",
+                false,
+                false,
+            ),
+            // Retain whitespace, touching <p>
+            ("<body><p>A</body>", "<p>A", false, false),
+            // Comments ignored
+            ("<body><p>A</body><!-- -->", "<p>A", false, false),
+            // Comments preserved
+            (
+                "<body><p>A</body><!-- -->",
+                "<body><p>A</body><!-- -->",
+                false,
+                true,
+            ),
+            // Retain end tag if touching inline element
+            (
+                "<p>Some text</p><button></button>",
+                "<p>Some text</p><button></button>",
+                false,
+                false,
+            ),
+        ] {
+            let mut w = Vec::new();
+            let mut minifier = Minifier::new(
+                &mut w,
+                MinifierOptions {
+                    omit_doctype: true,
+                    preserve_comments,
+                    collapse_whitespace,
+                },
+            );
+            minifier.minify(&mut input.as_bytes()).unwrap();
+
+            let s = str::from_utf8(&w).unwrap();
+
+            assert_eq!(expected, s);
+        }
+    }
+}

crates/markdown_preview/src/markdown_parser.rs 🔗

@@ -1,4 +1,7 @@
-use crate::markdown_elements::*;
+use crate::{
+    markdown_elements::*,
+    markdown_minifier::{Minifier, MinifierOptions},
+};
 use async_recursion::async_recursion;
 use collections::FxHashMap;
 use gpui::{DefiniteLength, FontWeight, px, relative};
@@ -28,6 +31,24 @@ pub async fn parse_markdown(
     }
 }
 
+fn cleanup_html(source: &str) -> Vec<u8> {
+    let mut writer = std::io::Cursor::new(Vec::new());
+    let mut reader = std::io::Cursor::new(source);
+    let mut minify = Minifier::new(
+        &mut writer,
+        MinifierOptions {
+            omit_doctype: true,
+            collapse_whitespace: true,
+            ..Default::default()
+        },
+    );
+    if let Ok(()) = minify.minify(&mut reader) {
+        writer.into_inner()
+    } else {
+        source.bytes().collect()
+    }
+}
+
 struct MarkdownParser<'a> {
     tokens: Vec<(Event<'a>, Range<usize>)>,
     /// The current index in the tokens array
@@ -764,6 +785,10 @@ impl<'a> MarkdownParser<'a> {
             return elements;
         };
 
+        let mut html_source_range_start = None;
+        let mut html_source_range_end = None;
+        let mut html_buffer = String::new();
+
         while !self.eof() {
             let Some((current, source_range)) = self.current() else {
                 break;
@@ -771,19 +796,10 @@ impl<'a> MarkdownParser<'a> {
             let source_range = source_range.clone();
             match current {
                 Event::Html(html) => {
-                    let mut cursor = std::io::Cursor::new(html.as_bytes());
-                    let Some(dom) = parse_document(RcDom::default(), ParseOpts::default())
-                        .from_utf8()
-                        .read_from(&mut cursor)
-                        .ok()
-                    else {
-                        self.cursor += 1;
-                        continue;
-                    };
-
+                    html_source_range_start.get_or_insert(source_range.start);
+                    html_source_range_end = Some(source_range.end);
+                    html_buffer.push_str(html);
                     self.cursor += 1;
-
-                    self.parse_html_node(source_range, &dom.document, &mut elements);
                 }
                 Event::End(TagEnd::CodeBlock) => {
                     self.cursor += 1;
@@ -795,6 +811,17 @@ impl<'a> MarkdownParser<'a> {
             }
         }
 
+        let bytes = cleanup_html(&html_buffer);
+
+        let mut cursor = std::io::Cursor::new(bytes);
+        if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
+            .from_utf8()
+            .read_from(&mut cursor)
+            && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
+        {
+            self.parse_html_node(start..end, &dom.document, &mut elements);
+        }
+
         elements
     }
 
@@ -853,6 +880,10 @@ impl<'a> MarkdownParser<'a> {
                             contents: paragraph,
                         }));
                     }
+                } else if local_name!("table") == name.local {
+                    if let Some(table) = self.extract_html_table(node, source_range) {
+                        elements.push(ParsedMarkdownElement::Table(table));
+                    }
                 } else {
                     self.consume_children(source_range, node, elements);
                 }
@@ -971,6 +1002,55 @@ impl<'a> MarkdownParser<'a> {
         Some(image)
     }
 
+    fn extract_html_table(
+        &self,
+        node: &Rc<markup5ever_rcdom::Node>,
+        source_range: Range<usize>,
+    ) -> Option<ParsedMarkdownTable> {
+        let mut header_columns = Vec::new();
+        let mut body_rows = Vec::new();
+
+        // node should be a thead or tbody element
+        for node in node.children.borrow().iter() {
+            match &node.data {
+                markup5ever_rcdom::NodeData::Element { name, .. } => {
+                    if local_name!("thead") == name.local {
+                        // node should be a tr element
+                        for node in node.children.borrow().iter() {
+                            let mut paragraph = MarkdownParagraph::new();
+                            self.consume_paragraph(source_range.clone(), node, &mut paragraph);
+
+                            for paragraph in paragraph.into_iter() {
+                                header_columns.push(vec![paragraph]);
+                            }
+                        }
+                    } else if local_name!("tbody") == name.local {
+                        // node should be a tr element
+                        for node in node.children.borrow().iter() {
+                            let mut row = MarkdownParagraph::new();
+                            self.consume_paragraph(source_range.clone(), node, &mut row);
+                            body_rows.push(ParsedMarkdownTableRow::with_children(
+                                row.into_iter().map(|column| vec![column]).collect(),
+                            ));
+                        }
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        if !header_columns.is_empty() || !body_rows.is_empty() {
+            Some(ParsedMarkdownTable {
+                source_range,
+                body: body_rows,
+                column_alignments: Vec::default(),
+                header: ParsedMarkdownTableRow::with_children(header_columns),
+            })
+        } else {
+            None
+        }
+    }
+
     /// Parses the width/height attribute value of an html element (e.g. img element)
     fn parse_length(value: &str) -> Option<DefiniteLength> {
         if value.ends_with("%") {
@@ -1330,6 +1410,104 @@ mod tests {
         );
     }
 
+    #[gpui::test]
+    async fn test_html_table() {
+        let parsed = parse(
+            "<table>
+          <thead>
+            <tr>
+              <th>Id</th>
+              <th>Name</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>1</td>
+              <td>Chris</td>
+            </tr>
+            <tr>
+              <td>2</td>
+              <td>Dennis</td>
+            </tr>
+          </tbody>
+        </table>",
+        )
+        .await;
+
+        assert_eq!(
+            ParsedMarkdown {
+                children: vec![ParsedMarkdownElement::Table(table(
+                    0..366,
+                    row(vec![text("Id", 0..366), text("Name ", 0..366)]),
+                    vec![
+                        row(vec![text("1", 0..366), text("Chris", 0..366)]),
+                        row(vec![text("2", 0..366), text("Dennis", 0..366)]),
+                    ],
+                ))],
+            },
+            parsed
+        );
+    }
+
+    #[gpui::test]
+    async fn test_html_table_without_headings() {
+        let parsed = parse(
+            "<table>
+          <tbody>
+            <tr>
+              <td>1</td>
+              <td>Chris</td>
+            </tr>
+            <tr>
+              <td>2</td>
+              <td>Dennis</td>
+            </tr>
+          </tbody>
+        </table>",
+        )
+        .await;
+
+        assert_eq!(
+            ParsedMarkdown {
+                children: vec![ParsedMarkdownElement::Table(table(
+                    0..240,
+                    row(vec![]),
+                    vec![
+                        row(vec![text("1", 0..240), text("Chris", 0..240)]),
+                        row(vec![text("2", 0..240), text("Dennis", 0..240)]),
+                    ],
+                ))],
+            },
+            parsed
+        );
+    }
+
+    #[gpui::test]
+    async fn test_html_table_without_body() {
+        let parsed = parse(
+            "<table>
+          <thead>
+            <tr>
+              <th>Id</th>
+              <th>Name</th>
+            </tr>
+          </thead>
+        </table>",
+        )
+        .await;
+
+        assert_eq!(
+            ParsedMarkdown {
+                children: vec![ParsedMarkdownElement::Table(table(
+                    0..150,
+                    row(vec![text("Id", 0..150), text("Name", 0..150)]),
+                    vec![],
+                ))],
+            },
+            parsed
+        );
+    }
+
     #[gpui::test]
     async fn test_html_heading_tags() {
         let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;

crates/markdown_preview/src/markdown_preview.rs 🔗

@@ -2,6 +2,7 @@ use gpui::{App, actions};
 use workspace::Workspace;
 
 pub mod markdown_elements;
+mod markdown_minifier;
 pub mod markdown_parser;
 pub mod markdown_preview_view;
 pub mod markdown_renderer;

crates/markdown_preview/src/markdown_renderer.rs 🔗

@@ -475,6 +475,10 @@ fn render_markdown_table(parsed: &ParsedMarkdownTable, cx: &mut RenderContext) -
         for (index, cell) in row.children.iter().enumerate() {
             let length = paragraph_len(cell);
 
+            if index >= max_lengths.len() {
+                max_lengths.resize(index + 1, length);
+            }
+
             if length > max_lengths[index] {
                 max_lengths[index] = length;
             }
@@ -523,7 +527,7 @@ fn render_markdown_table_row(
     is_header: bool,
     cx: &mut RenderContext,
 ) -> AnyElement {
-    let mut items = vec![];
+    let mut items = Vec::with_capacity(parsed.children.len());
     let count = parsed.children.len();
 
     for (index, cell) in parsed.children.iter().enumerate() {
@@ -652,7 +656,7 @@ fn render_markdown_paragraph(parsed: &MarkdownParagraph, cx: &mut RenderContext)
 }
 
 fn render_markdown_text(parsed_new: &MarkdownParagraph, cx: &mut RenderContext) -> Vec<AnyElement> {
-    let mut any_element = vec![];
+    let mut any_element = Vec::with_capacity(parsed_new.len());
     // these values are cloned in-order satisfy borrow checker
     let syntax_theme = cx.syntax_theme.clone();
     let workspace_clone = cx.workspace.clone();