@@ -5060,6 +5060,20 @@ version = "3.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d13cdbd5dbb29f9c88095bbdc2590c9cba0d0a1269b983fef6b2cdd7e9f4db1"
+[[package]]
+name = "html5ever"
+version = "0.27.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
+dependencies = [
+ "log",
+ "mac",
+ "markup5ever",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.59",
+]
+
[[package]]
name = "http"
version = "0.1.0"
@@ -5719,7 +5733,7 @@ dependencies = [
"tree-sitter-embedded-template",
"tree-sitter-heex",
"tree-sitter-html",
- "tree-sitter-json 0.20.2",
+ "tree-sitter-json",
"tree-sitter-markdown",
"tree-sitter-ruby",
"tree-sitter-rust",
@@ -5809,7 +5823,7 @@ dependencies = [
"tree-sitter-gomod",
"tree-sitter-gowork",
"tree-sitter-jsdoc",
- "tree-sitter-json 0.20.2",
+ "tree-sitter-json",
"tree-sitter-markdown",
"tree-sitter-proto",
"tree-sitter-python",
@@ -6181,6 +6195,32 @@ dependencies = [
"workspace",
]
+[[package]]
+name = "markup5ever"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
+dependencies = [
+ "log",
+ "phf",
+ "phf_codegen",
+ "string_cache",
+ "string_cache_codegen",
+ "tendril",
+]
+
+[[package]]
+name = "markup5ever_rcdom"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18"
+dependencies = [
+ "html5ever",
+ "markup5ever",
+ "tendril",
+ "xml5ever",
+]
+
[[package]]
name = "matchers"
version = "0.1.0"
@@ -7286,7 +7326,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
dependencies = [
"phf_macros",
- "phf_shared",
+ "phf_shared 0.11.2",
+]
+
+[[package]]
+name = "phf_codegen"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
+dependencies = [
+ "phf_generator 0.11.2",
+ "phf_shared 0.11.2",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
+dependencies = [
+ "phf_shared 0.10.0",
+ "rand 0.8.5",
]
[[package]]
@@ -7295,7 +7355,7 @@ version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
dependencies = [
- "phf_shared",
+ "phf_shared 0.11.2",
"rand 0.8.5",
]
@@ -7305,13 +7365,22 @@ version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b"
dependencies = [
- "phf_generator",
- "phf_shared",
+ "phf_generator 0.11.2",
+ "phf_shared 0.11.2",
"proc-macro2",
"quote",
"syn 2.0.59",
]
+[[package]]
+name = "phf_shared"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
+dependencies = [
+ "siphasher 0.3.11",
+]
+
[[package]]
name = "phf_shared"
version = "0.11.2"
@@ -7555,6 +7624,12 @@ version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+[[package]]
+name = "precomputed-hash"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
+
[[package]]
name = "prettier"
version = "0.1.0"
@@ -8554,6 +8629,16 @@ dependencies = [
"semver",
]
+[[package]]
+name = "rustdoc_to_markdown"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "html5ever",
+ "indoc",
+ "markup5ever_rcdom",
+]
+
[[package]]
name = "rustix"
version = "0.37.23"
@@ -9118,7 +9203,7 @@ dependencies = [
"serde_json_lenient",
"smallvec",
"tree-sitter",
- "tree-sitter-json 0.19.0",
+ "tree-sitter-json",
"unindent",
"util",
]
@@ -9802,6 +9887,32 @@ dependencies = [
"float-cmp",
]
+[[package]]
+name = "string_cache"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b"
+dependencies = [
+ "new_debug_unreachable",
+ "once_cell",
+ "parking_lot",
+ "phf_shared 0.10.0",
+ "precomputed-hash",
+ "serde",
+]
+
+[[package]]
+name = "string_cache_codegen"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988"
+dependencies = [
+ "phf_generator 0.10.0",
+ "phf_shared 0.10.0",
+ "proc-macro2",
+ "quote",
+]
+
[[package]]
name = "stringprep"
version = "0.1.4"
@@ -10991,16 +11102,6 @@ dependencies = [
"tree-sitter",
]
-[[package]]
-name = "tree-sitter-json"
-version = "0.19.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90b04c4e1a92139535eb9fca4ec8fa9666cc96b618005d3ae35f3c957fa92f92"
-dependencies = [
- "cc",
- "tree-sitter",
-]
-
[[package]]
name = "tree-sitter-json"
version = "0.20.2"
@@ -12937,6 +13038,17 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "054a8e68b76250b253f671d1268cb7f1ae089ec35e195b2efb2a4e9a836d0621"
+[[package]]
+name = "xml5ever"
+version = "0.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c376f76ed09df711203e20c3ef5ce556f0166fa03d39590016c0fd625437fad"
+dependencies = [
+ "log",
+ "mac",
+ "markup5ever",
+]
+
[[package]]
name = "xmlparser"
version = "0.13.5"
@@ -76,6 +76,7 @@ members = [
"crates/rich_text",
"crates/rope",
"crates/rpc",
+ "crates/rustdoc_to_markdown",
"crates/task",
"crates/tasks_ui",
"crates/search",
@@ -220,6 +221,7 @@ dev_server_projects = { path = "crates/dev_server_projects" }
rich_text = { path = "crates/rich_text" }
rope = { path = "crates/rope" }
rpc = { path = "crates/rpc" }
+rustdoc_to_markdown = { path = "crates/rustdoc_to_markdown" }
task = { path = "crates/task" }
tasks_ui = { path = "crates/tasks_ui" }
search = { path = "crates/search" }
@@ -288,6 +290,7 @@ heed = { version = "0.20.1", features = [
"read-txn-no-tls",
] }
hex = "0.4.3"
+html5ever = "0.27.0"
ignore = "0.4.22"
indoc = "1"
# We explicitly disable http2 support in isahc.
@@ -300,6 +303,7 @@ lazy_static = "1.4.0"
libc = "0.2"
linkify = "0.10.0"
log = { version = "0.4.16", features = ["kv_unstable_serde"] }
+markup5ever_rcdom = "0.3.0"
nanoid = "0.4"
nix = "0.28"
once_cell = "1.19.0"
@@ -0,0 +1,201 @@
+use std::cell::RefCell;
+use std::collections::VecDeque;
+
+use anyhow::Result;
+use html5ever::Attribute;
+use markup5ever_rcdom::{Handle, NodeData};
+
+#[derive(Debug, Clone)]
+struct HtmlElement {
+ tag: String,
+ attrs: RefCell<Vec<Attribute>>,
+}
+
+enum StartTagOutcome {
+ Continue,
+ Skip,
+}
+
+pub struct MarkdownWriter {
+ current_element_stack: VecDeque<HtmlElement>,
+ /// The Markdown output.
+ markdown: String,
+}
+
+impl MarkdownWriter {
+ pub fn new() -> Self {
+ Self {
+ current_element_stack: VecDeque::new(),
+ markdown: String::new(),
+ }
+ }
+
+ fn is_inside(&self, tag: &str) -> bool {
+ self.current_element_stack
+ .iter()
+ .any(|parent_element| parent_element.tag == tag)
+ }
+
+ fn is_inside_heading(&self) -> bool {
+ ["h1", "h2", "h3", "h4", "h5", "h6"]
+ .into_iter()
+ .any(|heading| self.is_inside(heading))
+ }
+
+ /// Appends the given string slice onto the end of the Markdown output.
+ fn push_str(&mut self, str: &str) {
+ self.markdown.push_str(str);
+ }
+
+ /// Appends a newline to the end of the Markdown output.
+ fn push_newline(&mut self) {
+ self.push_str("\n");
+ }
+
+ pub fn run(mut self, root_node: &Handle) -> Result<String> {
+ self.visit_node(&root_node)?;
+ Ok(self.markdown.trim().to_string())
+ }
+
+ fn visit_node(&mut self, node: &Handle) -> Result<()> {
+ let mut current_element = None;
+
+ match node.data {
+ NodeData::Document
+ | NodeData::Doctype { .. }
+ | NodeData::ProcessingInstruction { .. }
+ | NodeData::Comment { .. } => {
+ // Currently left unimplemented, as we're not interested in this data
+ // at this time.
+ }
+ NodeData::Element {
+ ref name,
+ ref attrs,
+ ..
+ } => {
+ let tag_name = name.local.to_string();
+ if !tag_name.is_empty() {
+ current_element = Some(HtmlElement {
+ tag: tag_name,
+ attrs: attrs.clone(),
+ });
+ }
+ }
+ NodeData::Text { ref contents } => {
+ let text = contents.borrow().to_string();
+ self.visit_text(text)?;
+ }
+ }
+
+ if let Some(current_element) = current_element.as_ref() {
+ match self.start_tag(¤t_element) {
+ StartTagOutcome::Continue => {}
+ StartTagOutcome::Skip => return Ok(()),
+ }
+
+ self.current_element_stack
+ .push_back(current_element.clone());
+ }
+
+ for child in node.children.borrow().iter() {
+ self.visit_node(child)?;
+ }
+
+ self.current_element_stack.pop_back();
+
+ if let Some(current_element) = current_element {
+ self.end_tag(¤t_element);
+ }
+
+ Ok(())
+ }
+
+ fn start_tag(&mut self, tag: &HtmlElement) -> StartTagOutcome {
+ match tag.tag.as_str() {
+ "head" | "script" | "nav" => return StartTagOutcome::Skip,
+ "h1" => self.push_str("\n# "),
+ "h2" => self.push_str("\n## "),
+ "h3" => self.push_str("\n### "),
+ "h4" => self.push_str("\n#### "),
+ "h5" => self.push_str("\n##### "),
+ "h6" => self.push_str("\n###### "),
+ "code" => {
+ if !self.is_inside("pre") {
+ self.push_str("`")
+ }
+ }
+ "pre" => self.push_str("\n```\n"),
+ "ul" | "ol" => self.push_newline(),
+ "li" => self.push_str("- "),
+ "summary" => {
+ if tag.attrs.borrow().iter().any(|attr| {
+ attr.name.local.to_string() == "class" && attr.value.to_string() == "hideme"
+ }) {
+ return StartTagOutcome::Skip;
+ }
+ }
+ "div" | "span" => {
+ if tag.attrs.borrow().iter().any(|attr| {
+ attr.name.local.to_string() == "class"
+ && attr.value.to_string() == "sidebar-elems"
+ }) {
+ return StartTagOutcome::Skip;
+ }
+
+ if tag.attrs.borrow().iter().any(|attr| {
+ attr.name.local.to_string() == "class"
+ && attr.value.to_string() == "out-of-band"
+ }) {
+ return StartTagOutcome::Skip;
+ }
+
+ if tag.attrs.borrow().iter().any(|attr| {
+ attr.name.local.to_string() == "class" && attr.value.to_string() == "item-name"
+ }) {
+ self.push_str("`");
+ }
+ }
+ _ => {}
+ }
+
+ StartTagOutcome::Continue
+ }
+
+ fn end_tag(&mut self, tag: &HtmlElement) {
+ match tag.tag.as_str() {
+ "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => self.push_str("\n\n"),
+ "code" => {
+ if !self.is_inside("pre") {
+ self.push_str("`")
+ }
+ }
+ "pre" => self.push_str("\n```\n"),
+ "ul" | "ol" => self.push_newline(),
+ "li" => self.push_newline(),
+ "div" => {
+ if tag.attrs.borrow().iter().any(|attr| {
+ attr.name.local.to_string() == "class" && attr.value.to_string() == "item-name"
+ }) {
+ self.push_str("`: ");
+ }
+ }
+ _ => {}
+ }
+ }
+
+ fn visit_text(&mut self, text: String) -> Result<()> {
+ if self.is_inside("pre") {
+ self.push_str(&text);
+ return Ok(());
+ }
+
+ if self.is_inside_heading() && self.is_inside("a") {
+ return Ok(());
+ }
+
+ let trimmed_text = text.trim_matches(|char| char == '\n' || char == '\r' || char == 'ยง');
+ self.push_str(trimmed_text);
+
+ Ok(())
+ }
+}