diff --git a/Cargo.lock b/Cargo.lock index f91ce427606ee07dca7d1b68e59945a45d450a11..d5f282c8653ae907336f6b85a12c39241062c14f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8639,6 +8639,7 @@ dependencies = [ "html5ever", "indoc", "markup5ever_rcdom", + "regex", ] [[package]] diff --git a/crates/rustdoc_to_markdown/Cargo.toml b/crates/rustdoc_to_markdown/Cargo.toml index 001e476be70dbde23f0e4032ed3d35536a384d71..18bb21f9d9b94e0efe7a9ec959cefd7adca127c6 100644 --- a/crates/rustdoc_to_markdown/Cargo.toml +++ b/crates/rustdoc_to_markdown/Cargo.toml @@ -15,6 +15,7 @@ path = "src/rustdoc_to_markdown.rs" anyhow.workspace = true html5ever.workspace = true markup5ever_rcdom.workspace = true +regex.workspace = true [dev-dependencies] indoc.workspace = true diff --git a/crates/rustdoc_to_markdown/src/markdown_writer.rs b/crates/rustdoc_to_markdown/src/markdown_writer.rs index 4b69594bc8e237cfb4609fd35a42fff660d5aebf..0191ed8549e38792886d03284c722a5f509b56c6 100644 --- a/crates/rustdoc_to_markdown/src/markdown_writer.rs +++ b/crates/rustdoc_to_markdown/src/markdown_writer.rs @@ -1,9 +1,21 @@ use std::cell::RefCell; use std::collections::VecDeque; +use std::sync::OnceLock; use anyhow::Result; use html5ever::Attribute; use markup5ever_rcdom::{Handle, NodeData}; +use regex::Regex; + +fn empty_line_regex() -> &'static Regex { + static REGEX: OnceLock = OnceLock::new(); + REGEX.get_or_init(|| Regex::new(r"^\s*$").unwrap()) +} + +fn more_than_three_newlines_regex() -> &'static Regex { + static REGEX: OnceLock = OnceLock::new(); + REGEX.get_or_init(|| Regex::new(r"\n{3,}").unwrap()) +} #[derive(Debug, Clone)] struct HtmlElement { @@ -48,7 +60,14 @@ impl MarkdownWriter { pub fn run(mut self, root_node: &Handle) -> Result { self.visit_node(&root_node)?; - Ok(self.markdown.trim().to_string()) + Ok(Self::prettify_markdown(self.markdown)) + } + + fn prettify_markdown(markdown: String) -> String { + let markdown = empty_line_regex().replace_all(&markdown, ""); + let markdown = more_than_three_newlines_regex().replace_all(&markdown, "\n\n"); + + markdown.trim().to_string() } fn visit_node(&mut self, node: &Handle) -> Result<()> { @@ -107,12 +126,12 @@ impl MarkdownWriter { fn start_tag(&mut self, tag: &HtmlElement) -> StartTagOutcome { match tag.tag.as_str() { "head" | "script" | "nav" => return StartTagOutcome::Skip, - "h1" => self.push_str("\n# "), - "h2" => self.push_str("\n## "), - "h3" => self.push_str("\n### "), - "h4" => self.push_str("\n#### "), - "h5" => self.push_str("\n##### "), - "h6" => self.push_str("\n###### "), + "h1" => self.push_str("\n\n# "), + "h2" => self.push_str("\n\n## "), + "h3" => self.push_str("\n\n### "), + "h4" => self.push_str("\n\n#### "), + "h5" => self.push_str("\n\n##### "), + "h6" => self.push_str("\n\n###### "), "code" => { if !self.is_inside("pre") { self.push_str("`")