Cargo.lock 🔗
@@ -13911,6 +13911,7 @@ dependencies = [
"file_icons",
"futures 0.3.31",
"gpui",
+ "html_to_markdown",
"http_client",
"image",
"indoc",
Kyle Kelley created
Closes #15555
Adds a super basic render of html output from jupyter kernels.
<img width="1061" height="1207" alt="image"
src="https://github.com/user-attachments/assets/1bfb8c71-0e38-4bff-9f0c-bec12721232a"
/>
Obviously not as full featured as #48157
Release Notes:
- Added basic handling of HTML in REPL outputs
Cargo.lock | 1
crates/repl/Cargo.toml | 1
crates/repl/src/outputs.rs | 27 +++
crates/repl/src/outputs/html.rs | 198 +++++++++++++++++++++++++++++++++++
4 files changed, 222 insertions(+), 5 deletions(-)
@@ -13911,6 +13911,7 @@ dependencies = [
"file_icons",
"futures 0.3.31",
"gpui",
+ "html_to_markdown",
"http_client",
"image",
"indoc",
@@ -30,6 +30,7 @@ feature_flags.workspace = true
file_icons.workspace = true
futures.workspace = true
gpui.workspace = true
+html_to_markdown.workspace = true
http_client.workspace = true
image.workspace = true
jupyter-websocket-client.workspace = true
@@ -52,6 +52,8 @@ use table::TableView;
mod json;
use json::JsonView;
+mod html;
+
pub mod plain;
use plain::TerminalOutput;
@@ -65,7 +67,8 @@ use settings::Settings;
/// When deciding what to render from a collection of mediatypes, we need to rank them in order of importance
fn rank_mime_type(mimetype: &MimeType) -> usize {
match mimetype {
- MimeType::DataTable(_) => 6,
+ MimeType::DataTable(_) => 7,
+ MimeType::Html(_) => 6,
MimeType::Json(_) => 5,
MimeType::Png(_) => 4,
MimeType::Jpeg(_) => 3,
@@ -419,6 +422,19 @@ impl Output {
content: cx.new(|cx| TableView::new(data, window, cx)),
display_id,
},
+ Some(MimeType::Html(html_content)) => match html::html_to_markdown(html_content) {
+ Ok(markdown_text) => {
+ let content = cx.new(|cx| MarkdownView::from(markdown_text, cx));
+ Output::Markdown {
+ content,
+ display_id,
+ }
+ }
+ Err(_) => Output::Plain {
+ content: cx.new(|cx| TerminalOutput::from(html_content, window, cx)),
+ display_id,
+ },
+ },
// Any other media types are not supported
_ => Output::Message("Unsupported media type".to_string()),
}
@@ -836,20 +852,23 @@ mod tests {
#[test]
fn test_rank_mime_type_ordering() {
let data_table = MimeType::DataTable(Box::default());
+ let html = MimeType::Html(String::new());
let json = MimeType::Json(serde_json::json!({}));
let png = MimeType::Png(String::new());
let jpeg = MimeType::Jpeg(String::new());
let markdown = MimeType::Markdown(String::new());
let plain = MimeType::Plain(String::new());
- assert_eq!(rank_mime_type(&data_table), 6);
+ assert_eq!(rank_mime_type(&data_table), 7);
+ assert_eq!(rank_mime_type(&html), 6);
assert_eq!(rank_mime_type(&json), 5);
assert_eq!(rank_mime_type(&png), 4);
assert_eq!(rank_mime_type(&jpeg), 3);
assert_eq!(rank_mime_type(&markdown), 2);
assert_eq!(rank_mime_type(&plain), 1);
- assert!(rank_mime_type(&data_table) > rank_mime_type(&json));
+ assert!(rank_mime_type(&data_table) > rank_mime_type(&html));
+ assert!(rank_mime_type(&html) > rank_mime_type(&json));
assert!(rank_mime_type(&json) > rank_mime_type(&png));
assert!(rank_mime_type(&png) > rank_mime_type(&jpeg));
assert!(rank_mime_type(&jpeg) > rank_mime_type(&markdown));
@@ -858,11 +877,9 @@ mod tests {
#[test]
fn test_rank_mime_type_unsupported_returns_zero() {
- let html = MimeType::Html(String::new());
let svg = MimeType::Svg(String::new());
let latex = MimeType::Latex(String::new());
- assert_eq!(rank_mime_type(&html), 0);
assert_eq!(rank_mime_type(&svg), 0);
assert_eq!(rank_mime_type(&latex), 0);
}
@@ -0,0 +1,198 @@
+use anyhow::Result;
+use html_to_markdown::markdown::{
+ CodeHandler, HeadingHandler, ListHandler, ParagraphHandler, StyledTextHandler, TableHandler,
+ WebpageChromeRemover,
+};
+use html_to_markdown::{TagHandler, convert_html_to_markdown};
+use std::cell::RefCell;
+use std::rc::Rc;
+
+/// Convert HTML to Markdown for rendering in the REPL.
+pub fn html_to_markdown(html: &str) -> Result<String> {
+ let mut handlers: Vec<TagHandler> = vec![
+ // WebpageChromeRemover must come first to skip style, script, head, nav tags
+ Rc::new(RefCell::new(WebpageChromeRemover)),
+ Rc::new(RefCell::new(ParagraphHandler)),
+ Rc::new(RefCell::new(HeadingHandler)),
+ Rc::new(RefCell::new(ListHandler)),
+ Rc::new(RefCell::new(TableHandler::new())),
+ Rc::new(RefCell::new(StyledTextHandler)),
+ Rc::new(RefCell::new(CodeHandler)),
+ ];
+
+ let markdown = convert_html_to_markdown(html.as_bytes(), &mut handlers)?;
+ Ok(clean_markdown_tables(&markdown))
+}
+
+/// Clean up markdown table formatting and ensure tables have separator rows.
+fn clean_markdown_tables(markdown: &str) -> String {
+ let lines: Vec<&str> = markdown.lines().collect();
+ let mut result: Vec<String> = Vec::new();
+ let mut in_table = false;
+ let mut has_separator = false;
+
+ for (i, line) in lines.iter().enumerate() {
+ let trimmed = line.trim();
+
+ if trimmed.starts_with('|') {
+ let normalized = normalize_table_row(trimmed);
+
+ if !in_table {
+ // Starting a new table
+ in_table = true;
+ has_separator = false;
+ }
+
+ // Check if this line is a separator row
+ if trimmed.contains("---") {
+ has_separator = true;
+ }
+
+ result.push(normalized.clone());
+
+ // If this is the first row and no separator exists yet,
+ // check if next row is a table row (not separator) and add one
+ if !has_separator {
+ let next_is_table_row = i + 1 < lines.len()
+ && lines[i + 1].trim().starts_with('|')
+ && !lines[i + 1].contains("---");
+
+ if next_is_table_row {
+ // Insert separator after first row
+ let col_count = normalized.matches('|').count().saturating_sub(1);
+ if col_count > 0 {
+ let separator = (0..col_count)
+ .map(|_| "---")
+ .collect::<Vec<_>>()
+ .join(" | ");
+ result.push(format!("| {} |", separator));
+ has_separator = true;
+ }
+ }
+ }
+ } else {
+ // Not a table row
+ if !trimmed.is_empty() {
+ result.push(trimmed.to_string());
+ }
+ in_table = false;
+ has_separator = false;
+ }
+ }
+
+ result.join("\n")
+}
+
+/// Normalize a table row by trimming cells and ensuring consistent spacing.
+fn normalize_table_row(row: &str) -> String {
+ let parts: Vec<&str> = row.split('|').collect();
+ let normalized: Vec<String> = parts.iter().map(|cell| cell.trim().to_string()).collect();
+ normalized.join(" | ").trim().to_string()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_html_table_to_markdown() {
+ let html = r#"<table>
+ <thead><tr><th>A</th><th>B</th></tr></thead>
+ <tbody><tr><td>1</td><td>x</td></tr></tbody>
+ </table>"#;
+
+ let md = html_to_markdown(html).unwrap();
+ assert!(md.contains("|"));
+ assert!(md.contains("---"));
+ }
+
+ #[test]
+ fn test_html_with_headings() {
+ let html = "<h1>Title</h1><p>Content</p>";
+ let md = html_to_markdown(html).unwrap();
+ assert!(md.contains("# Title"));
+ }
+
+ #[test]
+ fn test_pandas_dataframe_html() {
+ let html = r#"<table border="1" class="dataframe">
+ <thead><tr><th></th><th>A</th><th>B</th></tr></thead>
+ <tbody>
+ <tr><th>0</th><td>1</td><td>x</td></tr>
+ <tr><th>1</th><td>2</td><td>y</td></tr>
+ </tbody>
+ </table>"#;
+
+ let md = html_to_markdown(html).unwrap();
+ assert!(md.contains("|"));
+ // Verify table rows are properly formatted (start with |)
+ for line in md.lines() {
+ if line.contains("|") {
+ assert!(
+ line.starts_with("|"),
+ "Table line should start with |: {:?}",
+ line
+ );
+ }
+ }
+ }
+
+ #[test]
+ fn test_table_format_normalized() {
+ let html = r#"<table>
+ <thead>
+ <tr><th>Name</th><th>Age</th></tr>
+ </thead>
+ <tbody>
+ <tr><td>Alice</td><td>25</td></tr>
+ </tbody>
+</table>"#;
+
+ let md = html_to_markdown(html).unwrap();
+
+ // Should have clean table format
+ assert!(md.contains("| Name | Age |"));
+ assert!(md.contains("| --- | --- |"));
+ assert!(md.contains("| Alice | 25 |"));
+ }
+
+ #[test]
+ fn test_style_tags_are_filtered() {
+ let html = r#"<style>
+ .dataframe { border: 1px solid; }
+ </style>
+ <table>
+ <thead><tr><th>A</th></tr></thead>
+ <tbody><tr><td>1</td></tr></tbody>
+ </table>"#;
+
+ let md = html_to_markdown(html).unwrap();
+
+ // Style content should not appear in output
+ assert!(!md.contains("dataframe"));
+ assert!(!md.contains("border"));
+ // Table should still be present
+ assert!(md.contains("| A |"));
+ }
+
+ #[test]
+ fn test_table_without_thead() {
+ // Tables without <thead> should still get a separator row
+ let html = r#"<table>
+ <tr><th>Feature</th><th>Supported</th></tr>
+ <tr><td>Tables</td><td>✓</td></tr>
+ <tr><td>Lists</td><td>✓</td></tr>
+ </table>"#;
+
+ let md = html_to_markdown(html).unwrap();
+
+ // Should have separator row inserted after first row
+ assert!(
+ md.contains("| --- | --- |"),
+ "Missing separator row: {}",
+ md
+ );
+ assert!(md.contains("| Feature | Supported |"));
+ assert!(md.contains("| Tables | ✓ |"));
+ }
+}