diff --git a/Cargo.lock b/Cargo.lock index fb0f84637e91de268bdda1a45c604bc6dcafa0ef..4c01ef2462c311c3c3c1b8e649f10ffc68ac41c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -368,6 +368,7 @@ dependencies = [ "rand 0.8.5", "regex", "rope", + "rustdoc", "schemars", "search", "semantic_index", @@ -5072,12 +5073,10 @@ version = "0.1.0" dependencies = [ "anyhow", "html5ever", - "indexmap 1.9.3", "indoc", "markup5ever_rcdom", "pretty_assertions", "regex", - "strum", ] [[package]] @@ -8675,6 +8674,10 @@ dependencies = [ "futures 0.3.28", "html_to_markdown", "http 0.1.0", + "indexmap 1.9.3", + "indoc", + "pretty_assertions", + "strum", ] [[package]] diff --git a/crates/assistant/Cargo.toml b/crates/assistant/Cargo.toml index 77f0bc4ae0ad3aa19d243ace4b7ffff2e57adbb1..3dbd08bdbc74942bd1735746cfaebf27eec0bd14 100644 --- a/crates/assistant/Cargo.toml +++ b/crates/assistant/Cargo.toml @@ -42,6 +42,7 @@ parking_lot.workspace = true project.workspace = true regex.workspace = true rope.workspace = true +rustdoc.workspace = true schemars.workspace = true search.workspace = true semantic_index.workspace = true diff --git a/crates/assistant/src/slash_command/rustdoc_command.rs b/crates/assistant/src/slash_command/rustdoc_command.rs index adeca134d016da74e6f7fe4f6d8d2df16f619958..853665ddc9a2f0389192913b9641b71dd4b8ab90 100644 --- a/crates/assistant/src/slash_command/rustdoc_command.rs +++ b/crates/assistant/src/slash_command/rustdoc_command.rs @@ -7,10 +7,10 @@ use assistant_slash_command::{SlashCommand, SlashCommandOutput, SlashCommandOutp use fs::Fs; use futures::AsyncReadExt; use gpui::{AppContext, Model, Task, WeakView}; -use html_to_markdown::convert_rustdoc_to_markdown; use http::{AsyncBody, HttpClient, HttpClientWithUrl}; use language::LspAdapterDelegate; use project::{Project, ProjectPath}; +use rustdoc::convert_rustdoc_to_markdown; use ui::{prelude::*, ButtonLike, ElevationIndex}; use workspace::Workspace; diff --git a/crates/html_to_markdown/Cargo.toml b/crates/html_to_markdown/Cargo.toml index e7c5f29b1eb705b1edcf2f38ae9ce51069e0c196..bac60ef9a638575479903b78fe3f54e889bdb31e 100644 --- a/crates/html_to_markdown/Cargo.toml +++ b/crates/html_to_markdown/Cargo.toml @@ -14,10 +14,8 @@ path = "src/html_to_markdown.rs" [dependencies] anyhow.workspace = true html5ever.workspace = true -indexmap.workspace = true markup5ever_rcdom.workspace = true regex.workspace = true -strum.workspace = true [dev-dependencies] indoc.workspace = true diff --git a/crates/html_to_markdown/src/html_element.rs b/crates/html_to_markdown/src/html_element.rs index 8ba14362a9053df2d8d9a54c89c7028343e72155..e83818524e9d244ab6bc5c3204c17bdb7616a75b 100644 --- a/crates/html_to_markdown/src/html_element.rs +++ b/crates/html_to_markdown/src/html_element.rs @@ -23,11 +23,19 @@ fn inline_elements() -> &'static HashSet<&'static str> { #[derive(Debug, Clone)] pub struct HtmlElement { - pub(crate) tag: String, + tag: String, pub(crate) attrs: RefCell>, } impl HtmlElement { + pub fn new(tag: String, attrs: RefCell>) -> Self { + Self { tag, attrs } + } + + pub fn tag(&self) -> &str { + &self.tag + } + /// Returns whether this [`HtmlElement`] is an inline element. pub fn is_inline(&self) -> bool { inline_elements().contains(self.tag.as_str()) diff --git a/crates/html_to_markdown/src/html_to_markdown.rs b/crates/html_to_markdown/src/html_to_markdown.rs index be890500544eac2fefffe26af6e937f31638937e..2183054efa612221bf480649c804cffaf1269eb1 100644 --- a/crates/html_to_markdown/src/html_to_markdown.rs +++ b/crates/html_to_markdown/src/html_to_markdown.rs @@ -5,9 +5,7 @@ pub mod markdown; mod markdown_writer; pub mod structure; -use std::cell::RefCell; use std::io::Read; -use std::rc::Rc; use anyhow::{Context, Result}; use html5ever::driver::ParseOpts; @@ -16,13 +14,8 @@ use html5ever::tendril::TendrilSink; use html5ever::tree_builder::TreeBuilderOpts; use markup5ever_rcdom::RcDom; -use crate::markdown::{ - HeadingHandler, ListHandler, ParagraphHandler, StyledTextHandler, TableHandler, -}; -use crate::markdown_writer::MarkdownWriter; - -pub use crate::markdown_writer::{HandleTag, TagHandler}; -use crate::structure::rustdoc::RustdocItem; +pub use crate::html_element::*; +pub use crate::markdown_writer::*; /// Converts the provided HTML to Markdown. pub fn convert_html_to_markdown(html: impl Read, handlers: &mut Vec) -> Result { @@ -36,35 +29,6 @@ pub fn convert_html_to_markdown(html: impl Read, handlers: &mut Vec) Ok(markdown) } -/// Converts the provided rustdoc HTML to Markdown. -pub fn convert_rustdoc_to_markdown(html: impl Read) -> Result<(String, Vec)> { - let item_collector = Rc::new(RefCell::new(structure::rustdoc::RustdocItemCollector::new())); - - let mut handlers: Vec = vec![ - Rc::new(RefCell::new(ParagraphHandler)), - Rc::new(RefCell::new(HeadingHandler)), - Rc::new(RefCell::new(ListHandler)), - Rc::new(RefCell::new(TableHandler::new())), - Rc::new(RefCell::new(StyledTextHandler)), - Rc::new(RefCell::new(structure::rustdoc::RustdocChromeRemover)), - Rc::new(RefCell::new(structure::rustdoc::RustdocHeadingHandler)), - Rc::new(RefCell::new(structure::rustdoc::RustdocCodeHandler)), - Rc::new(RefCell::new(structure::rustdoc::RustdocItemHandler)), - item_collector.clone(), - ]; - - let markdown = convert_html_to_markdown(html, &mut handlers)?; - - let items = item_collector - .borrow() - .items - .iter() - .cloned() - .collect::>(); - - Ok((markdown, items)) -} - fn parse_html(mut html: impl Read) -> Result { let parse_options = ParseOpts { tree_builder: TreeBuilderOpts { @@ -80,295 +44,3 @@ fn parse_html(mut html: impl Read) -> Result { Ok(dom) } - -#[cfg(test)] -mod tests { - use indoc::indoc; - use pretty_assertions::assert_eq; - - use super::*; - - fn rustdoc_handlers() -> Vec { - vec![ - Rc::new(RefCell::new(ParagraphHandler)), - Rc::new(RefCell::new(HeadingHandler)), - Rc::new(RefCell::new(ListHandler)), - Rc::new(RefCell::new(TableHandler::new())), - Rc::new(RefCell::new(StyledTextHandler)), - Rc::new(RefCell::new(structure::rustdoc::RustdocChromeRemover)), - Rc::new(RefCell::new(structure::rustdoc::RustdocHeadingHandler)), - Rc::new(RefCell::new(structure::rustdoc::RustdocCodeHandler)), - Rc::new(RefCell::new(structure::rustdoc::RustdocItemHandler)), - ] - } - - #[test] - fn test_main_heading_buttons_get_removed() { - let html = indoc! {r##" -
-

Crate serde

- - source · - -
- "##}; - let expected = indoc! {" - # Crate serde - "} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } - - #[test] - fn test_single_paragraph() { - let html = indoc! {r#" -

In particular, the last point is what sets axum apart from other frameworks. - axum doesn’t have its own middleware system but instead uses - tower::Service. This means axum gets timeouts, tracing, compression, - authorization, and more, for free. It also enables you to share middleware with - applications written using hyper or tonic.

- "#}; - let expected = indoc! {" - In particular, the last point is what sets `axum` apart from other frameworks. `axum` doesn’t have its own middleware system but instead uses `tower::Service`. This means `axum` gets timeouts, tracing, compression, authorization, and more, for free. It also enables you to share middleware with applications written using `hyper` or `tonic`. - "} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } - - #[test] - fn test_multiple_paragraphs() { - let html = indoc! {r##" -

§Serde

-

Serde is a framework for serializing and deserializing Rust data - structures efficiently and generically.

-

The Serde ecosystem consists of data structures that know how to serialize - and deserialize themselves along with data formats that know how to - serialize and deserialize other things. Serde provides the layer by which - these two groups interact with each other, allowing any supported data - structure to be serialized and deserialized using any supported data format.

-

See the Serde website https://serde.rs/ for additional documentation and - usage examples.

-

§Design

-

Where many other languages rely on runtime reflection for serializing data, - Serde is instead built on Rust’s powerful trait system. A data structure - that knows how to serialize and deserialize itself is one that implements - Serde’s Serialize and Deserialize traits (or uses Serde’s derive - attribute to automatically generate implementations at compile time). This - avoids any overhead of reflection or runtime type information. In fact in - many situations the interaction between data structure and data format can - be completely optimized away by the Rust compiler, leaving Serde - serialization to perform the same speed as a handwritten serializer for the - specific selection of data structure and data format.

- "##}; - let expected = indoc! {" - ## Serde - - Serde is a framework for _**ser**_ializing and _**de**_serializing Rust data structures efficiently and generically. - - The Serde ecosystem consists of data structures that know how to serialize and deserialize themselves along with data formats that know how to serialize and deserialize other things. Serde provides the layer by which these two groups interact with each other, allowing any supported data structure to be serialized and deserialized using any supported data format. - - See the Serde website https://serde.rs/ for additional documentation and usage examples. - - ### Design - - Where many other languages rely on runtime reflection for serializing data, Serde is instead built on Rust’s powerful trait system. A data structure that knows how to serialize and deserialize itself is one that implements Serde’s `Serialize` and `Deserialize` traits (or uses Serde’s derive attribute to automatically generate implementations at compile time). This avoids any overhead of reflection or runtime type information. In fact in many situations the interaction between data structure and data format can be completely optimized away by the Rust compiler, leaving Serde serialization to perform the same speed as a handwritten serializer for the specific selection of data structure and data format. - "} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } - - #[test] - fn test_styled_text() { - let html = indoc! {r#" -

This text is bolded.

-

This text is italicized.

- "#}; - let expected = indoc! {" - This text is **bolded**. - - This text is _italicized_. - "} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } - - #[test] - fn test_rust_code_block() { - let html = indoc! {r#" -
use axum::extract::{Path, Query, Json};
-            use std::collections::HashMap;
-
-            // `Path` gives you the path parameters and deserializes them.
-            async fn path(Path(user_id): Path<u32>) {}
-
-            // `Query` gives you the query parameters and deserializes them.
-            async fn query(Query(params): Query<HashMap<String, String>>) {}
-
-            // Buffer the request body and deserialize it as JSON into a
-            // `serde_json::Value`. `Json` supports any type that implements
-            // `serde::Deserialize`.
-            async fn json(Json(payload): Json<serde_json::Value>) {}
- "#}; - let expected = indoc! {" - ```rs - use axum::extract::{Path, Query, Json}; - use std::collections::HashMap; - - // `Path` gives you the path parameters and deserializes them. - async fn path(Path(user_id): Path) {} - - // `Query` gives you the query parameters and deserializes them. - async fn query(Query(params): Query>) {} - - // Buffer the request body and deserialize it as JSON into a - // `serde_json::Value`. `Json` supports any type that implements - // `serde::Deserialize`. - async fn json(Json(payload): Json) {} - ``` - "} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } - - #[test] - fn test_toml_code_block() { - let html = indoc! {r##" -

§Required dependencies

-

To use axum there are a few dependencies you have to pull in as well:

-
[dependencies]
-            axum = "<latest-version>"
-            tokio = { version = "<latest-version>", features = ["full"] }
-            tower = "<latest-version>"
-            
- "##}; - let expected = indoc! {r#" - ## Required dependencies - - To use axum there are a few dependencies you have to pull in as well: - - ```toml - [dependencies] - axum = "" - tokio = { version = "", features = ["full"] } - tower = "" - - ``` - "#} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } - - #[test] - fn test_item_table() { - let html = indoc! {r##" -

Structs§

-
    -
  • Errors that can happen when using axum.
  • -
  • Extractor and response for extensions.
  • -
  • Formform
    URL encoded extractor and response.
  • -
  • Jsonjson
    JSON Extractor / Response.
  • -
  • The router type for composing handlers and services.
-

Functions§

-
    -
  • servetokio and (http1 or http2)
    Serve the service with the supplied listener.
  • -
- "##}; - let expected = indoc! {r#" - ## Structs - - - `Error`: Errors that can happen when using axum. - - `Extension`: Extractor and response for extensions. - - `Form` [`form`]: URL encoded extractor and response. - - `Json` [`json`]: JSON Extractor / Response. - - `Router`: The router type for composing handlers and services. - - ## Functions - - - `serve` [`tokio` and (`http1` or `http2`)]: Serve the service with the supplied listener. - "#} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } - - #[test] - fn test_table() { - let html = indoc! {r##" -

§Feature flags

-

axum uses a set of feature flags to reduce the amount of compiled and - optional dependencies.

-

The following optional features are available:

-
- - - - - - - - - - - - - -
NameDescriptionDefault?
http1Enables hyper’s http1 featureYes
http2Enables hyper’s http2 featureNo
jsonEnables the Json type and some similar convenience functionalityYes
macrosEnables optional utility macrosNo
matched-pathEnables capturing of every request’s router path and the MatchedPath extractorYes
multipartEnables parsing multipart/form-data requests with MultipartNo
original-uriEnables capturing of every request’s original URI and the OriginalUri extractorYes
tokioEnables tokio as a dependency and axum::serve, SSE and extract::connect_info types.Yes
tower-logEnables tower’s log featureYes
tracingLog rejections from built-in extractorsYes
wsEnables WebSockets support via extract::wsNo
formEnables the Form extractorYes
queryEnables the Query extractorYes
- "##}; - let expected = indoc! {r#" - ## Feature flags - - axum uses a set of feature flags to reduce the amount of compiled and optional dependencies. - - The following optional features are available: - - | Name | Description | Default? | - | --- | --- | --- | - | `http1` | Enables hyper’s `http1` feature | Yes | - | `http2` | Enables hyper’s `http2` feature | No | - | `json` | Enables the `Json` type and some similar convenience functionality | Yes | - | `macros` | Enables optional utility macros | No | - | `matched-path` | Enables capturing of every request’s router path and the `MatchedPath` extractor | Yes | - | `multipart` | Enables parsing `multipart/form-data` requests with `Multipart` | No | - | `original-uri` | Enables capturing of every request’s original URI and the `OriginalUri` extractor | Yes | - | `tokio` | Enables `tokio` as a dependency and `axum::serve`, `SSE` and `extract::connect_info` types. | Yes | - | `tower-log` | Enables `tower`’s `log` feature | Yes | - | `tracing` | Log rejections from built-in extractors | Yes | - | `ws` | Enables WebSockets support via `extract::ws` | No | - | `form` | Enables the `Form` extractor | Yes | - | `query` | Enables the `Query` extractor | Yes | - "#} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } -} diff --git a/crates/html_to_markdown/src/markdown.rs b/crates/html_to_markdown/src/markdown.rs index 3a12cffcee648502b269df52d817230b541d8db4..58d4e73f0f318869b22759c35ba8fbdb20f4c9be 100644 --- a/crates/html_to_markdown/src/markdown.rs +++ b/crates/html_to_markdown/src/markdown.rs @@ -23,7 +23,7 @@ impl HandleTag for ParagraphHandler { } } - match tag.tag.as_str() { + match tag.tag() { "p" => writer.push_blank_line(), _ => {} } @@ -47,7 +47,7 @@ impl HandleTag for HeadingHandler { tag: &HtmlElement, writer: &mut MarkdownWriter, ) -> StartTagOutcome { - match tag.tag.as_str() { + match tag.tag() { "h1" => writer.push_str("\n\n# "), "h2" => writer.push_str("\n\n## "), "h3" => writer.push_str("\n\n### "), @@ -61,7 +61,7 @@ impl HandleTag for HeadingHandler { } fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) { - match tag.tag.as_str() { + match tag.tag() { "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => writer.push_blank_line(), _ => {} } @@ -83,7 +83,7 @@ impl HandleTag for ListHandler { tag: &HtmlElement, writer: &mut MarkdownWriter, ) -> StartTagOutcome { - match tag.tag.as_str() { + match tag.tag() { "ul" | "ol" => writer.push_newline(), "li" => writer.push_str("- "), _ => {} @@ -93,7 +93,7 @@ impl HandleTag for ListHandler { } fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) { - match tag.tag.as_str() { + match tag.tag() { "ul" | "ol" => writer.push_newline(), "li" => writer.push_newline(), _ => {} @@ -131,7 +131,7 @@ impl HandleTag for TableHandler { tag: &HtmlElement, writer: &mut MarkdownWriter, ) -> StartTagOutcome { - match tag.tag.as_str() { + match tag.tag() { "thead" => writer.push_blank_line(), "tr" => writer.push_newline(), "th" => { @@ -158,7 +158,7 @@ impl HandleTag for TableHandler { } fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) { - match tag.tag.as_str() { + match tag.tag() { "thead" => { writer.push_newline(); for ix in 0..self.current_table_columns { @@ -197,7 +197,7 @@ impl HandleTag for StyledTextHandler { tag: &HtmlElement, writer: &mut MarkdownWriter, ) -> StartTagOutcome { - match tag.tag.as_str() { + match tag.tag() { "strong" => writer.push_str("**"), "em" => writer.push_str("_"), _ => {} @@ -207,7 +207,7 @@ impl HandleTag for StyledTextHandler { } fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) { - match tag.tag.as_str() { + match tag.tag() { "strong" => writer.push_str("**"), "em" => writer.push_str("_"), _ => {} @@ -230,7 +230,7 @@ impl HandleTag for CodeHandler { tag: &HtmlElement, writer: &mut MarkdownWriter, ) -> StartTagOutcome { - match tag.tag.as_str() { + match tag.tag() { "code" => { if !writer.is_inside("pre") { writer.push_str("`"); @@ -244,7 +244,7 @@ impl HandleTag for CodeHandler { } fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) { - match tag.tag.as_str() { + match tag.tag() { "code" => { if !writer.is_inside("pre") { writer.push_str("`"); diff --git a/crates/html_to_markdown/src/markdown_writer.rs b/crates/html_to_markdown/src/markdown_writer.rs index 7dc6308ffef613cd33c700471a3e217ecd6ff07e..0bf75962e966606273a575aa6f2f86b2b58a8c96 100644 --- a/crates/html_to_markdown/src/markdown_writer.rs +++ b/crates/html_to_markdown/src/markdown_writer.rs @@ -46,7 +46,7 @@ impl MarkdownWriter { pub fn is_inside(&self, tag: &str) -> bool { self.current_element_stack .iter() - .any(|parent_element| parent_element.tag == tag) + .any(|parent_element| parent_element.tag() == tag) } /// Appends the given string slice onto the end of the Markdown output. @@ -94,10 +94,7 @@ impl MarkdownWriter { } => { let tag_name = name.local.to_string(); if !tag_name.is_empty() { - current_element = Some(HtmlElement { - tag: tag_name, - attrs: attrs.clone(), - }); + current_element = Some(HtmlElement::new(tag_name, attrs.clone())); } } NodeData::Text { ref contents } => { @@ -130,7 +127,7 @@ impl MarkdownWriter { fn start_tag(&mut self, tag: &HtmlElement, handlers: &mut [TagHandler]) -> StartTagOutcome { for handler in handlers { - if handler.borrow().should_handle(tag.tag.as_str()) { + if handler.borrow().should_handle(tag.tag()) { match handler.borrow_mut().handle_tag_start(tag, self) { StartTagOutcome::Continue => {} StartTagOutcome::Skip => return StartTagOutcome::Skip, @@ -143,7 +140,7 @@ impl MarkdownWriter { fn end_tag(&mut self, tag: &HtmlElement, handlers: &mut [TagHandler]) { for handler in handlers { - if handler.borrow().should_handle(tag.tag.as_str()) { + if handler.borrow().should_handle(tag.tag()) { handler.borrow_mut().handle_tag_end(tag, self); } } diff --git a/crates/html_to_markdown/src/structure.rs b/crates/html_to_markdown/src/structure.rs index 2ff6b4d4b53c619d4e28deaef38a5005ad2b5e4f..622d63c774c12008915e6379b8ea852c20fd6a5d 100644 --- a/crates/html_to_markdown/src/structure.rs +++ b/crates/html_to_markdown/src/structure.rs @@ -1,2 +1 @@ -pub mod rustdoc; pub mod wikipedia; diff --git a/crates/html_to_markdown/src/structure/rustdoc.rs b/crates/html_to_markdown/src/structure/rustdoc.rs deleted file mode 100644 index 16eae9cb2f4b50e520ebbd9e69b9a16bbbe5f1d6..0000000000000000000000000000000000000000 --- a/crates/html_to_markdown/src/structure/rustdoc.rs +++ /dev/null @@ -1,359 +0,0 @@ -use std::sync::Arc; - -use indexmap::IndexSet; -use strum::{EnumIter, IntoEnumIterator}; - -use crate::html_element::HtmlElement; -use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome}; - -pub struct RustdocHeadingHandler; - -impl HandleTag for RustdocHeadingHandler { - fn should_handle(&self, _tag: &str) -> bool { - // We're only handling text, so we don't need to visit any tags. - false - } - - fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome { - if writer.is_inside("h1") - || writer.is_inside("h2") - || writer.is_inside("h3") - || writer.is_inside("h4") - || writer.is_inside("h5") - || writer.is_inside("h6") - { - let text = text - .trim_matches(|char| char == '\n' || char == '\r' || char == '§') - .replace('\n', " "); - writer.push_str(&text); - - return HandlerOutcome::Handled; - } - - HandlerOutcome::NoOp - } -} - -pub struct RustdocCodeHandler; - -impl HandleTag for RustdocCodeHandler { - fn should_handle(&self, tag: &str) -> bool { - match tag { - "pre" | "code" => true, - _ => false, - } - } - - fn handle_tag_start( - &mut self, - tag: &HtmlElement, - writer: &mut MarkdownWriter, - ) -> StartTagOutcome { - match tag.tag.as_str() { - "code" => { - if !writer.is_inside("pre") { - writer.push_str("`"); - } - } - "pre" => { - let classes = tag.classes(); - let is_rust = classes.iter().any(|class| class == "rust"); - let language = is_rust - .then(|| "rs") - .or_else(|| { - classes.iter().find_map(|class| { - if let Some((_, language)) = class.split_once("language-") { - Some(language.trim()) - } else { - None - } - }) - }) - .unwrap_or(""); - - writer.push_str(&format!("\n\n```{language}\n")); - } - _ => {} - } - - StartTagOutcome::Continue - } - - fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) { - match tag.tag.as_str() { - "code" => { - if !writer.is_inside("pre") { - writer.push_str("`"); - } - } - "pre" => writer.push_str("\n```\n"), - _ => {} - } - } - - fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome { - if writer.is_inside("pre") { - writer.push_str(&text); - return HandlerOutcome::Handled; - } - - HandlerOutcome::NoOp - } -} - -const RUSTDOC_ITEM_NAME_CLASS: &str = "item-name"; - -pub struct RustdocItemHandler; - -impl RustdocItemHandler { - /// Returns whether we're currently inside of an `.item-name` element, which - /// rustdoc uses to display Rust items in a list. - fn is_inside_item_name(writer: &MarkdownWriter) -> bool { - writer - .current_element_stack() - .iter() - .any(|element| element.has_class(RUSTDOC_ITEM_NAME_CLASS)) - } -} - -impl HandleTag for RustdocItemHandler { - fn should_handle(&self, tag: &str) -> bool { - match tag { - "div" | "span" => true, - _ => false, - } - } - - fn handle_tag_start( - &mut self, - tag: &HtmlElement, - writer: &mut MarkdownWriter, - ) -> StartTagOutcome { - match tag.tag.as_str() { - "div" | "span" => { - if Self::is_inside_item_name(writer) && tag.has_class("stab") { - writer.push_str(" ["); - } - } - _ => {} - } - - StartTagOutcome::Continue - } - - fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) { - match tag.tag.as_str() { - "div" | "span" => { - if tag.has_class(RUSTDOC_ITEM_NAME_CLASS) { - writer.push_str(": "); - } - - if Self::is_inside_item_name(writer) && tag.has_class("stab") { - writer.push_str("]"); - } - } - _ => {} - } - } - - fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome { - if Self::is_inside_item_name(writer) - && !writer.is_inside("span") - && !writer.is_inside("code") - { - writer.push_str(&format!("`{text}`")); - return HandlerOutcome::Handled; - } - - HandlerOutcome::NoOp - } -} - -pub struct RustdocChromeRemover; - -impl HandleTag for RustdocChromeRemover { - fn should_handle(&self, tag: &str) -> bool { - match tag { - "head" | "script" | "nav" | "summary" | "button" | "div" | "span" => true, - _ => false, - } - } - - fn handle_tag_start( - &mut self, - tag: &HtmlElement, - _writer: &mut MarkdownWriter, - ) -> StartTagOutcome { - match tag.tag.as_str() { - "head" | "script" | "nav" => return StartTagOutcome::Skip, - "summary" => { - if tag.has_class("hideme") { - return StartTagOutcome::Skip; - } - } - "button" => { - if tag.attr("id").as_deref() == Some("copy-path") { - return StartTagOutcome::Skip; - } - } - "div" | "span" => { - let classes_to_skip = ["nav-container", "sidebar-elems", "out-of-band"]; - if tag.has_any_classes(&classes_to_skip) { - return StartTagOutcome::Skip; - } - } - _ => {} - } - - StartTagOutcome::Continue - } -} - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, EnumIter)] -pub enum RustdocItemKind { - Mod, - Macro, - Struct, - Enum, - Constant, - Trait, - Function, - TypeAlias, - AttributeMacro, - DeriveMacro, -} - -impl RustdocItemKind { - const fn class(&self) -> &'static str { - match self { - Self::Mod => "mod", - Self::Macro => "macro", - Self::Struct => "struct", - Self::Enum => "enum", - Self::Constant => "constant", - Self::Trait => "trait", - Self::Function => "fn", - Self::TypeAlias => "type", - Self::AttributeMacro => "attr", - Self::DeriveMacro => "derive", - } - } -} - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] -pub struct RustdocItem { - pub kind: RustdocItemKind, - /// The item path, up until the name of the item. - pub path: Vec>, - /// The name of the item. - pub name: Arc, -} - -impl RustdocItem { - pub fn url_path(&self) -> String { - let name = &self.name; - let mut path_components = self.path.clone(); - - match self.kind { - RustdocItemKind::Mod => { - path_components.push(name.clone()); - path_components.push("index.html".into()); - } - RustdocItemKind::Macro - | RustdocItemKind::Struct - | RustdocItemKind::Enum - | RustdocItemKind::Constant - | RustdocItemKind::Trait - | RustdocItemKind::Function - | RustdocItemKind::TypeAlias - | RustdocItemKind::AttributeMacro - | RustdocItemKind::DeriveMacro => { - path_components - .push(format!("{kind}.{name}.html", kind = self.kind.class()).into()); - } - } - - path_components.join("/") - } -} - -pub struct RustdocItemCollector { - pub items: IndexSet, -} - -impl RustdocItemCollector { - pub fn new() -> Self { - Self { - items: IndexSet::new(), - } - } - - fn parse_item(tag: &HtmlElement) -> Option { - if tag.tag.as_str() != "a" { - return None; - } - - let href = tag.attr("href")?; - if href.starts_with('#') || href.starts_with("https://") || href.starts_with("../") { - return None; - } - - for kind in RustdocItemKind::iter() { - if tag.has_class(kind.class()) { - let mut parts = href.trim_end_matches("/index.html").split('/'); - - if let Some(last_component) = parts.next_back() { - let last_component = match last_component.split_once('#') { - Some((component, _fragment)) => component, - None => last_component, - }; - - let name = last_component - .trim_start_matches(&format!("{}.", kind.class())) - .trim_end_matches(".html"); - - return Some(RustdocItem { - kind, - name: name.into(), - path: parts.map(Into::into).collect(), - }); - } - } - } - - None - } -} - -impl HandleTag for RustdocItemCollector { - fn should_handle(&self, tag: &str) -> bool { - tag == "a" - } - - fn handle_tag_start( - &mut self, - tag: &HtmlElement, - writer: &mut MarkdownWriter, - ) -> StartTagOutcome { - match tag.tag.as_str() { - "a" => { - let is_reexport = writer.current_element_stack().iter().any(|element| { - if let Some(id) = element.attr("id") { - id.starts_with("reexport.") || id.starts_with("method.") - } else { - false - } - }); - - if !is_reexport { - if let Some(item) = Self::parse_item(tag) { - self.items.insert(item); - } - } - } - _ => {} - } - - StartTagOutcome::Continue - } -} diff --git a/crates/html_to_markdown/src/structure/wikipedia.rs b/crates/html_to_markdown/src/structure/wikipedia.rs index 9c9a470685818e6d2becc50426ae62b2218cd955..9a3161f01006da856e9688710a0d567d9075cefe 100644 --- a/crates/html_to_markdown/src/structure/wikipedia.rs +++ b/crates/html_to_markdown/src/structure/wikipedia.rs @@ -14,7 +14,7 @@ impl HandleTag for WikipediaChromeRemover { tag: &HtmlElement, _writer: &mut MarkdownWriter, ) -> StartTagOutcome { - match tag.tag.as_str() { + match tag.tag() { "head" | "script" | "style" | "nav" => return StartTagOutcome::Skip, "sup" => { if tag.has_class("reference") { @@ -54,7 +54,7 @@ impl HandleTag for WikipediaInfoboxHandler { tag: &HtmlElement, _writer: &mut MarkdownWriter, ) -> StartTagOutcome { - match tag.tag.as_str() { + match tag.tag() { "table" => { if tag.has_class("infobox") { return StartTagOutcome::Skip; @@ -90,7 +90,7 @@ impl HandleTag for WikipediaCodeHandler { tag: &HtmlElement, writer: &mut MarkdownWriter, ) -> StartTagOutcome { - match tag.tag.as_str() { + match tag.tag() { "code" => { if !writer.is_inside("pre") { writer.push_str("`"); @@ -121,7 +121,7 @@ impl HandleTag for WikipediaCodeHandler { } fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) { - match tag.tag.as_str() { + match tag.tag() { "code" => { if !writer.is_inside("pre") { writer.push_str("`"); diff --git a/crates/rustdoc/Cargo.toml b/crates/rustdoc/Cargo.toml index 2a3f9a476fcd36aa48726277327d3e431f4eafcb..15f0013233733d98be4980cf31990db37a7d560f 100644 --- a/crates/rustdoc/Cargo.toml +++ b/crates/rustdoc/Cargo.toml @@ -19,5 +19,9 @@ fs.workspace = true futures.workspace = true html_to_markdown.workspace = true http.workspace = true +indexmap.workspace = true +strum.workspace = true [dev-dependencies] +indoc.workspace = true +pretty_assertions.workspace = true diff --git a/crates/rustdoc/src/crawler.rs b/crates/rustdoc/src/crawler.rs index bc829c886003838f2b877761d9792205103f44a3..8e536a3075aa60996b75a9ace11b0c7caf851f50 100644 --- a/crates/rustdoc/src/crawler.rs +++ b/crates/rustdoc/src/crawler.rs @@ -6,10 +6,10 @@ use async_trait::async_trait; use collections::{HashSet, VecDeque}; use fs::Fs; use futures::AsyncReadExt; -use html_to_markdown::convert_rustdoc_to_markdown; -use html_to_markdown::structure::rustdoc::{RustdocItem, RustdocItemKind}; use http::{AsyncBody, HttpClient, HttpClientWithUrl}; +use crate::{convert_rustdoc_to_markdown, RustdocItem, RustdocItemKind}; + #[derive(Debug, Clone, Copy)] pub enum RustdocSource { /// The docs were sourced from local `cargo doc` output. diff --git a/crates/rustdoc/src/item.rs b/crates/rustdoc/src/item.rs new file mode 100644 index 0000000000000000000000000000000000000000..69f10794c3e0b12390b1e7827fa4c48d43d3b06b --- /dev/null +++ b/crates/rustdoc/src/item.rs @@ -0,0 +1,71 @@ +use std::sync::Arc; + +use strum::EnumIter; + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, EnumIter)] +pub enum RustdocItemKind { + Mod, + Macro, + Struct, + Enum, + Constant, + Trait, + Function, + TypeAlias, + AttributeMacro, + DeriveMacro, +} + +impl RustdocItemKind { + pub(crate) const fn class(&self) -> &'static str { + match self { + Self::Mod => "mod", + Self::Macro => "macro", + Self::Struct => "struct", + Self::Enum => "enum", + Self::Constant => "constant", + Self::Trait => "trait", + Self::Function => "fn", + Self::TypeAlias => "type", + Self::AttributeMacro => "attr", + Self::DeriveMacro => "derive", + } + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] +pub struct RustdocItem { + pub kind: RustdocItemKind, + /// The item path, up until the name of the item. + pub path: Vec>, + /// The name of the item. + pub name: Arc, +} + +impl RustdocItem { + pub fn url_path(&self) -> String { + let name = &self.name; + let mut path_components = self.path.clone(); + + match self.kind { + RustdocItemKind::Mod => { + path_components.push(name.clone()); + path_components.push("index.html".into()); + } + RustdocItemKind::Macro + | RustdocItemKind::Struct + | RustdocItemKind::Enum + | RustdocItemKind::Constant + | RustdocItemKind::Trait + | RustdocItemKind::Function + | RustdocItemKind::TypeAlias + | RustdocItemKind::AttributeMacro + | RustdocItemKind::DeriveMacro => { + path_components + .push(format!("{kind}.{name}.html", kind = self.kind.class()).into()); + } + } + + path_components.join("/") + } +} diff --git a/crates/rustdoc/src/rustdoc.rs b/crates/rustdoc/src/rustdoc.rs index 145a637bc370771b36a0af82869536cad22fa729..a1ca949334f3fed7630c33666fcb9a6ad2f24fe4 100644 --- a/crates/rustdoc/src/rustdoc.rs +++ b/crates/rustdoc/src/rustdoc.rs @@ -1 +1,6 @@ pub mod crawler; +mod item; +mod to_markdown; + +pub use crate::item::*; +pub use crate::to_markdown::convert_rustdoc_to_markdown; diff --git a/crates/rustdoc/src/to_markdown.rs b/crates/rustdoc/src/to_markdown.rs new file mode 100644 index 0000000000000000000000000000000000000000..750788db19b9c03a435da6edcc04fbc42fa87d6b --- /dev/null +++ b/crates/rustdoc/src/to_markdown.rs @@ -0,0 +1,622 @@ +use std::cell::RefCell; +use std::io::Read; +use std::rc::Rc; + +use anyhow::Result; +use html_to_markdown::markdown::{ + HeadingHandler, ListHandler, ParagraphHandler, StyledTextHandler, TableHandler, +}; +use html_to_markdown::{ + convert_html_to_markdown, HandleTag, HandlerOutcome, HtmlElement, MarkdownWriter, + StartTagOutcome, TagHandler, +}; +use indexmap::IndexSet; +use strum::IntoEnumIterator; + +use crate::{RustdocItem, RustdocItemKind}; + +/// Converts the provided rustdoc HTML to Markdown. +pub fn convert_rustdoc_to_markdown(html: impl Read) -> Result<(String, Vec)> { + let item_collector = Rc::new(RefCell::new(RustdocItemCollector::new())); + + let mut handlers: Vec = vec![ + Rc::new(RefCell::new(ParagraphHandler)), + Rc::new(RefCell::new(HeadingHandler)), + Rc::new(RefCell::new(ListHandler)), + Rc::new(RefCell::new(TableHandler::new())), + Rc::new(RefCell::new(StyledTextHandler)), + Rc::new(RefCell::new(RustdocChromeRemover)), + Rc::new(RefCell::new(RustdocHeadingHandler)), + Rc::new(RefCell::new(RustdocCodeHandler)), + Rc::new(RefCell::new(RustdocItemHandler)), + item_collector.clone(), + ]; + + let markdown = convert_html_to_markdown(html, &mut handlers)?; + + let items = item_collector + .borrow() + .items + .iter() + .cloned() + .collect::>(); + + Ok((markdown, items)) +} + +pub struct RustdocHeadingHandler; + +impl HandleTag for RustdocHeadingHandler { + fn should_handle(&self, _tag: &str) -> bool { + // We're only handling text, so we don't need to visit any tags. + false + } + + fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome { + if writer.is_inside("h1") + || writer.is_inside("h2") + || writer.is_inside("h3") + || writer.is_inside("h4") + || writer.is_inside("h5") + || writer.is_inside("h6") + { + let text = text + .trim_matches(|char| char == '\n' || char == '\r' || char == '§') + .replace('\n', " "); + writer.push_str(&text); + + return HandlerOutcome::Handled; + } + + HandlerOutcome::NoOp + } +} + +pub struct RustdocCodeHandler; + +impl HandleTag for RustdocCodeHandler { + fn should_handle(&self, tag: &str) -> bool { + match tag { + "pre" | "code" => true, + _ => false, + } + } + + fn handle_tag_start( + &mut self, + tag: &HtmlElement, + writer: &mut MarkdownWriter, + ) -> StartTagOutcome { + match tag.tag() { + "code" => { + if !writer.is_inside("pre") { + writer.push_str("`"); + } + } + "pre" => { + let classes = tag.classes(); + let is_rust = classes.iter().any(|class| class == "rust"); + let language = is_rust + .then(|| "rs") + .or_else(|| { + classes.iter().find_map(|class| { + if let Some((_, language)) = class.split_once("language-") { + Some(language.trim()) + } else { + None + } + }) + }) + .unwrap_or(""); + + writer.push_str(&format!("\n\n```{language}\n")); + } + _ => {} + } + + StartTagOutcome::Continue + } + + fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) { + match tag.tag() { + "code" => { + if !writer.is_inside("pre") { + writer.push_str("`"); + } + } + "pre" => writer.push_str("\n```\n"), + _ => {} + } + } + + fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome { + if writer.is_inside("pre") { + writer.push_str(&text); + return HandlerOutcome::Handled; + } + + HandlerOutcome::NoOp + } +} + +const RUSTDOC_ITEM_NAME_CLASS: &str = "item-name"; + +pub struct RustdocItemHandler; + +impl RustdocItemHandler { + /// Returns whether we're currently inside of an `.item-name` element, which + /// rustdoc uses to display Rust items in a list. + fn is_inside_item_name(writer: &MarkdownWriter) -> bool { + writer + .current_element_stack() + .iter() + .any(|element| element.has_class(RUSTDOC_ITEM_NAME_CLASS)) + } +} + +impl HandleTag for RustdocItemHandler { + fn should_handle(&self, tag: &str) -> bool { + match tag { + "div" | "span" => true, + _ => false, + } + } + + fn handle_tag_start( + &mut self, + tag: &HtmlElement, + writer: &mut MarkdownWriter, + ) -> StartTagOutcome { + match tag.tag() { + "div" | "span" => { + if Self::is_inside_item_name(writer) && tag.has_class("stab") { + writer.push_str(" ["); + } + } + _ => {} + } + + StartTagOutcome::Continue + } + + fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) { + match tag.tag() { + "div" | "span" => { + if tag.has_class(RUSTDOC_ITEM_NAME_CLASS) { + writer.push_str(": "); + } + + if Self::is_inside_item_name(writer) && tag.has_class("stab") { + writer.push_str("]"); + } + } + _ => {} + } + } + + fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome { + if Self::is_inside_item_name(writer) + && !writer.is_inside("span") + && !writer.is_inside("code") + { + writer.push_str(&format!("`{text}`")); + return HandlerOutcome::Handled; + } + + HandlerOutcome::NoOp + } +} + +pub struct RustdocChromeRemover; + +impl HandleTag for RustdocChromeRemover { + fn should_handle(&self, tag: &str) -> bool { + match tag { + "head" | "script" | "nav" | "summary" | "button" | "div" | "span" => true, + _ => false, + } + } + + fn handle_tag_start( + &mut self, + tag: &HtmlElement, + _writer: &mut MarkdownWriter, + ) -> StartTagOutcome { + match tag.tag() { + "head" | "script" | "nav" => return StartTagOutcome::Skip, + "summary" => { + if tag.has_class("hideme") { + return StartTagOutcome::Skip; + } + } + "button" => { + if tag.attr("id").as_deref() == Some("copy-path") { + return StartTagOutcome::Skip; + } + } + "div" | "span" => { + let classes_to_skip = ["nav-container", "sidebar-elems", "out-of-band"]; + if tag.has_any_classes(&classes_to_skip) { + return StartTagOutcome::Skip; + } + } + _ => {} + } + + StartTagOutcome::Continue + } +} + +pub struct RustdocItemCollector { + pub items: IndexSet, +} + +impl RustdocItemCollector { + pub fn new() -> Self { + Self { + items: IndexSet::new(), + } + } + + fn parse_item(tag: &HtmlElement) -> Option { + if tag.tag() != "a" { + return None; + } + + let href = tag.attr("href")?; + if href.starts_with('#') || href.starts_with("https://") || href.starts_with("../") { + return None; + } + + for kind in RustdocItemKind::iter() { + if tag.has_class(kind.class()) { + let mut parts = href.trim_end_matches("/index.html").split('/'); + + if let Some(last_component) = parts.next_back() { + let last_component = match last_component.split_once('#') { + Some((component, _fragment)) => component, + None => last_component, + }; + + let name = last_component + .trim_start_matches(&format!("{}.", kind.class())) + .trim_end_matches(".html"); + + return Some(RustdocItem { + kind, + name: name.into(), + path: parts.map(Into::into).collect(), + }); + } + } + } + + None + } +} + +impl HandleTag for RustdocItemCollector { + fn should_handle(&self, tag: &str) -> bool { + tag == "a" + } + + fn handle_tag_start( + &mut self, + tag: &HtmlElement, + writer: &mut MarkdownWriter, + ) -> StartTagOutcome { + match tag.tag() { + "a" => { + let is_reexport = writer.current_element_stack().iter().any(|element| { + if let Some(id) = element.attr("id") { + id.starts_with("reexport.") || id.starts_with("method.") + } else { + false + } + }); + + if !is_reexport { + if let Some(item) = Self::parse_item(tag) { + self.items.insert(item); + } + } + } + _ => {} + } + + StartTagOutcome::Continue + } +} + +#[cfg(test)] +mod tests { + use html_to_markdown::{convert_html_to_markdown, TagHandler}; + use indoc::indoc; + use pretty_assertions::assert_eq; + + use super::*; + + fn rustdoc_handlers() -> Vec { + vec![ + Rc::new(RefCell::new(ParagraphHandler)), + Rc::new(RefCell::new(HeadingHandler)), + Rc::new(RefCell::new(ListHandler)), + Rc::new(RefCell::new(TableHandler::new())), + Rc::new(RefCell::new(StyledTextHandler)), + Rc::new(RefCell::new(RustdocChromeRemover)), + Rc::new(RefCell::new(RustdocHeadingHandler)), + Rc::new(RefCell::new(RustdocCodeHandler)), + Rc::new(RefCell::new(RustdocItemHandler)), + ] + } + + #[test] + fn test_main_heading_buttons_get_removed() { + let html = indoc! {r##" +
+

Crate serde

+ + source · + +
+ "##}; + let expected = indoc! {" + # Crate serde + "} + .trim(); + + assert_eq!( + convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), + expected + ) + } + + #[test] + fn test_single_paragraph() { + let html = indoc! {r#" +

In particular, the last point is what sets axum apart from other frameworks. + axum doesn’t have its own middleware system but instead uses + tower::Service. This means axum gets timeouts, tracing, compression, + authorization, and more, for free. It also enables you to share middleware with + applications written using hyper or tonic.

+ "#}; + let expected = indoc! {" + In particular, the last point is what sets `axum` apart from other frameworks. `axum` doesn’t have its own middleware system but instead uses `tower::Service`. This means `axum` gets timeouts, tracing, compression, authorization, and more, for free. It also enables you to share middleware with applications written using `hyper` or `tonic`. + "} + .trim(); + + assert_eq!( + convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), + expected + ) + } + + #[test] + fn test_multiple_paragraphs() { + let html = indoc! {r##" +

§Serde

+

Serde is a framework for serializing and deserializing Rust data + structures efficiently and generically.

+

The Serde ecosystem consists of data structures that know how to serialize + and deserialize themselves along with data formats that know how to + serialize and deserialize other things. Serde provides the layer by which + these two groups interact with each other, allowing any supported data + structure to be serialized and deserialized using any supported data format.

+

See the Serde website https://serde.rs/ for additional documentation and + usage examples.

+

§Design

+

Where many other languages rely on runtime reflection for serializing data, + Serde is instead built on Rust’s powerful trait system. A data structure + that knows how to serialize and deserialize itself is one that implements + Serde’s Serialize and Deserialize traits (or uses Serde’s derive + attribute to automatically generate implementations at compile time). This + avoids any overhead of reflection or runtime type information. In fact in + many situations the interaction between data structure and data format can + be completely optimized away by the Rust compiler, leaving Serde + serialization to perform the same speed as a handwritten serializer for the + specific selection of data structure and data format.

+ "##}; + let expected = indoc! {" + ## Serde + + Serde is a framework for _**ser**_ializing and _**de**_serializing Rust data structures efficiently and generically. + + The Serde ecosystem consists of data structures that know how to serialize and deserialize themselves along with data formats that know how to serialize and deserialize other things. Serde provides the layer by which these two groups interact with each other, allowing any supported data structure to be serialized and deserialized using any supported data format. + + See the Serde website https://serde.rs/ for additional documentation and usage examples. + + ### Design + + Where many other languages rely on runtime reflection for serializing data, Serde is instead built on Rust’s powerful trait system. A data structure that knows how to serialize and deserialize itself is one that implements Serde’s `Serialize` and `Deserialize` traits (or uses Serde’s derive attribute to automatically generate implementations at compile time). This avoids any overhead of reflection or runtime type information. In fact in many situations the interaction between data structure and data format can be completely optimized away by the Rust compiler, leaving Serde serialization to perform the same speed as a handwritten serializer for the specific selection of data structure and data format. + "} + .trim(); + + assert_eq!( + convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), + expected + ) + } + + #[test] + fn test_styled_text() { + let html = indoc! {r#" +

This text is bolded.

+

This text is italicized.

+ "#}; + let expected = indoc! {" + This text is **bolded**. + + This text is _italicized_. + "} + .trim(); + + assert_eq!( + convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), + expected + ) + } + + #[test] + fn test_rust_code_block() { + let html = indoc! {r#" +
use axum::extract::{Path, Query, Json};
+            use std::collections::HashMap;
+
+            // `Path` gives you the path parameters and deserializes them.
+            async fn path(Path(user_id): Path<u32>) {}
+
+            // `Query` gives you the query parameters and deserializes them.
+            async fn query(Query(params): Query<HashMap<String, String>>) {}
+
+            // Buffer the request body and deserialize it as JSON into a
+            // `serde_json::Value`. `Json` supports any type that implements
+            // `serde::Deserialize`.
+            async fn json(Json(payload): Json<serde_json::Value>) {}
+ "#}; + let expected = indoc! {" + ```rs + use axum::extract::{Path, Query, Json}; + use std::collections::HashMap; + + // `Path` gives you the path parameters and deserializes them. + async fn path(Path(user_id): Path) {} + + // `Query` gives you the query parameters and deserializes them. + async fn query(Query(params): Query>) {} + + // Buffer the request body and deserialize it as JSON into a + // `serde_json::Value`. `Json` supports any type that implements + // `serde::Deserialize`. + async fn json(Json(payload): Json) {} + ``` + "} + .trim(); + + assert_eq!( + convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), + expected + ) + } + + #[test] + fn test_toml_code_block() { + let html = indoc! {r##" +

§Required dependencies

+

To use axum there are a few dependencies you have to pull in as well:

+
[dependencies]
+            axum = "<latest-version>"
+            tokio = { version = "<latest-version>", features = ["full"] }
+            tower = "<latest-version>"
+            
+ "##}; + let expected = indoc! {r#" + ## Required dependencies + + To use axum there are a few dependencies you have to pull in as well: + + ```toml + [dependencies] + axum = "" + tokio = { version = "", features = ["full"] } + tower = "" + + ``` + "#} + .trim(); + + assert_eq!( + convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), + expected + ) + } + + #[test] + fn test_item_table() { + let html = indoc! {r##" +

Structs§

+
    +
  • Errors that can happen when using axum.
  • +
  • Extractor and response for extensions.
  • +
  • Formform
    URL encoded extractor and response.
  • +
  • Jsonjson
    JSON Extractor / Response.
  • +
  • The router type for composing handlers and services.
+

Functions§

+
    +
  • servetokio and (http1 or http2)
    Serve the service with the supplied listener.
  • +
+ "##}; + let expected = indoc! {r#" + ## Structs + + - `Error`: Errors that can happen when using axum. + - `Extension`: Extractor and response for extensions. + - `Form` [`form`]: URL encoded extractor and response. + - `Json` [`json`]: JSON Extractor / Response. + - `Router`: The router type for composing handlers and services. + + ## Functions + + - `serve` [`tokio` and (`http1` or `http2`)]: Serve the service with the supplied listener. + "#} + .trim(); + + assert_eq!( + convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), + expected + ) + } + + #[test] + fn test_table() { + let html = indoc! {r##" +

§Feature flags

+

axum uses a set of feature flags to reduce the amount of compiled and + optional dependencies.

+

The following optional features are available:

+
+ + + + + + + + + + + + + +
NameDescriptionDefault?
http1Enables hyper’s http1 featureYes
http2Enables hyper’s http2 featureNo
jsonEnables the Json type and some similar convenience functionalityYes
macrosEnables optional utility macrosNo
matched-pathEnables capturing of every request’s router path and the MatchedPath extractorYes
multipartEnables parsing multipart/form-data requests with MultipartNo
original-uriEnables capturing of every request’s original URI and the OriginalUri extractorYes
tokioEnables tokio as a dependency and axum::serve, SSE and extract::connect_info types.Yes
tower-logEnables tower’s log featureYes
tracingLog rejections from built-in extractorsYes
wsEnables WebSockets support via extract::wsNo
formEnables the Form extractorYes
queryEnables the Query extractorYes
+ "##}; + let expected = indoc! {r#" + ## Feature flags + + axum uses a set of feature flags to reduce the amount of compiled and optional dependencies. + + The following optional features are available: + + | Name | Description | Default? | + | --- | --- | --- | + | `http1` | Enables hyper’s `http1` feature | Yes | + | `http2` | Enables hyper’s `http2` feature | No | + | `json` | Enables the `Json` type and some similar convenience functionality | Yes | + | `macros` | Enables optional utility macros | No | + | `matched-path` | Enables capturing of every request’s router path and the `MatchedPath` extractor | Yes | + | `multipart` | Enables parsing `multipart/form-data` requests with `Multipart` | No | + | `original-uri` | Enables capturing of every request’s original URI and the `OriginalUri` extractor | Yes | + | `tokio` | Enables `tokio` as a dependency and `axum::serve`, `SSE` and `extract::connect_info` types. | Yes | + | `tower-log` | Enables `tower`’s `log` feature | Yes | + | `tracing` | Log rejections from built-in extractors | Yes | + | `ws` | Enables WebSockets support via `extract::ws` | No | + | `form` | Enables the `Form` extractor | Yes | + | `query` | Enables the `Query` extractor | Yes | + "#} + .trim(); + + assert_eq!( + convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), + expected + ) + } +}