1use std::fmt::{Display, Formatter};
  2
  3/// Indicates that the wrapped `String` is markdown text.
  4#[derive(Debug, Clone)]
  5pub struct MarkdownString(pub String);
  6
  7impl Display for MarkdownString {
  8    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
  9        write!(f, "{}", self.0)
 10    }
 11}
 12
 13/// Escapes markdown special characters in markdown text blocks. Markdown code blocks follow
 14/// different rules and `MarkdownInlineCode` or `MarkdownCodeBlock` should be used in that case.
 15///
 16/// Also escapes the following markdown extensions:
 17///
 18/// * `^` for superscripts
 19/// * `$` for inline math
 20/// * `~` for strikethrough
 21///
 22/// Escape of some characters is unnecessary, because while they are involved in markdown syntax,
 23/// the other characters involved are escaped:
 24///
 25/// * `!`, `]`, `(`, and `)` are used in link syntax, but `[` is escaped so these are parsed as
 26///   plaintext.
 27///
 28/// * `;` is used in HTML entity syntax, but `&` is escaped, so they are parsed as plaintext.
 29///
 30/// TODO: There is one escape this doesn't do currently. Period after numbers at the start of the
 31/// line (`[0-9]*\.`) should also be escaped to avoid it being interpreted as a list item.
 32pub struct MarkdownEscaped<'a>(pub &'a str);
 33
 34/// Implements `Display` to format markdown inline code (wrapped in backticks), handling code that
 35/// contains backticks and spaces. All whitespace is treated as a single space character. For text
 36/// that does not contain whitespace other than ' ', this escaping roundtrips through
 37/// pulldown-cmark.
 38///
 39/// When used in tables, `|` should be escaped like `\|` in the text provided to this function.
 40pub struct MarkdownInlineCode<'a>(pub &'a str);
 41
 42/// Implements `Display` to format markdown code blocks, wrapped in 3 or more backticks as needed.
 43pub struct MarkdownCodeBlock<'a> {
 44    pub tag: &'a str,
 45    pub text: &'a str,
 46}
 47
 48impl Display for MarkdownEscaped<'_> {
 49    fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
 50        let mut start_of_unescaped = None;
 51        for (ix, c) in self.0.char_indices() {
 52            match c {
 53                // Always escaped.
 54                '\\' | '`' | '*' | '_' | '[' | '^' | '$' | '~' | '&' |
 55                // TODO: these only need to be escaped when they are the first non-whitespace
 56                // character of the line of a block. There should probably be both an `escape_block`
 57                // which does this and an `escape_inline` method which does not escape these.
 58                '#' | '+' | '=' | '-' => {
 59                    match start_of_unescaped {
 60                        None => {}
 61                        Some(start_of_unescaped) => {
 62                            write!(formatter, "{}", &self.0[start_of_unescaped..ix])?;
 63                        }
 64                    }
 65                    write!(formatter, "\\")?;
 66                    // Can include this char in the "unescaped" text since a
 67                    // backslash was just emitted.
 68                    start_of_unescaped = Some(ix);
 69                }
 70                // Escaped since `<` is used in opening HTML tags. `<` is used since Markdown
 71                // supports HTML entities, and this allows the text to be used directly in HTML.
 72                '<' => {
 73                    match start_of_unescaped {
 74                        None => {}
 75                        Some(start_of_unescaped) => {
 76                            write!(formatter, "{}", &self.0[start_of_unescaped..ix])?;
 77                        }
 78                    }
 79                    write!(formatter, "<")?;
 80                    start_of_unescaped = None;
 81                }
 82                // Escaped since `>` is used for blockquotes. `>` is used since Markdown supports
 83                // HTML entities, and this allows the text to be used directly in HTML.
 84                '>' => {
 85                    match start_of_unescaped {
 86                        None => {}
 87                        Some(start_of_unescaped) => {
 88                            write!(formatter, "{}", &self.0[start_of_unescaped..ix])?;
 89                        }
 90                    }
 91                    write!(formatter, ">")?;
 92                    start_of_unescaped = None;
 93                }
 94                _ => {
 95                    if start_of_unescaped.is_none() {
 96                        start_of_unescaped = Some(ix);
 97                    }
 98                }
 99            }
100        }
101        if let Some(start_of_unescaped) = start_of_unescaped {
102            write!(formatter, "{}", &self.0[start_of_unescaped..])?;
103        }
104        Ok(())
105    }
106}
107
108impl Display for MarkdownInlineCode<'_> {
109    fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
110        // Apache License 2.0, same as this crate.
111        //
112        // Copied from `pulldown-cmark-to-cmark-20.0.0` with modifications:
113        //
114        // * Handling of all whitespace. pulldown-cmark-to-cmark is anticipating
115        // `Code` events parsed by pulldown-cmark.
116        //
117        // https://github.com/Byron/pulldown-cmark-to-cmark/blob/3c850de2d3d1d79f19ca5f375e1089a653cf3ff7/src/lib.rs#L290
118
119        let mut all_whitespace = true;
120        let text = self
121            .0
122            .chars()
123            .map(|c| {
124                if c.is_whitespace() {
125                    ' '
126                } else {
127                    all_whitespace = false;
128                    c
129                }
130            })
131            .collect::<String>();
132
133        // When inline code has leading and trailing ' ' characters, additional space is needed
134        // to escape it, unless all characters are space.
135        if all_whitespace {
136            write!(formatter, "`{text}`")
137        } else {
138            // More backticks are needed to delimit the inline code than the maximum number of
139            // backticks in a consecutive run.
140            let backticks = "`".repeat(count_max_consecutive_chars(&text, '`') + 1);
141            let space = match text.as_bytes() {
142                &[b'`', ..] | &[.., b'`'] => " ", // Space needed to separate backtick.
143                &[b' ', .., b' '] => " ",         // Space needed to escape inner space.
144                _ => "",                          // No space needed.
145            };
146            write!(formatter, "{backticks}{space}{text}{space}{backticks}")
147        }
148    }
149}
150
151impl Display for MarkdownCodeBlock<'_> {
152    fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
153        let tag = self.tag;
154        let text = self.text;
155        let backticks = "`".repeat(3.max(count_max_consecutive_chars(text, '`') + 1));
156        write!(formatter, "{backticks}{tag}\n{text}\n{backticks}\n")
157    }
158}
159
160// Copied from `pulldown-cmark-to-cmark-20.0.0` with changed names.
161// https://github.com/Byron/pulldown-cmark-to-cmark/blob/3c850de2d3d1d79f19ca5f375e1089a653cf3ff7/src/lib.rs#L1063
162// Apache License 2.0, same as this code.
163fn count_max_consecutive_chars(text: &str, search: char) -> usize {
164    let mut in_search_chars = false;
165    let mut max_count = 0;
166    let mut cur_count = 0;
167
168    for ch in text.chars() {
169        if ch == search {
170            cur_count += 1;
171            in_search_chars = true;
172        } else if in_search_chars {
173            max_count = max_count.max(cur_count);
174            cur_count = 0;
175            in_search_chars = false;
176        }
177    }
178    max_count.max(cur_count)
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184
185    #[test]
186    fn test_markdown_escaped() {
187        let input = r#"
188        # Heading
189
190        Another heading
191        ===
192
193        Another heading variant
194        ---
195
196        Paragraph with [link](https://example.com) and `code`, *emphasis*, and ~strikethrough~.
197
198        ```
199        code block
200        ```
201
202        List with varying leaders:
203          - Item 1
204          * Item 2
205          + Item 3
206
207        Some math:  $`\sqrt{3x-1}+(1+x)^2`$
208
209        HTML entity:  
210        "#;
211
212        let expected = r#"
213        \# Heading
214
215        Another heading
216        \=\=\=
217
218        Another heading variant
219        \-\-\-
220
221        Paragraph with \[link](https://example.com) and \`code\`, \*emphasis\*, and \~strikethrough\~.
222
223        \`\`\`
224        code block
225        \`\`\`
226
227        List with varying leaders:
228          \- Item 1
229          \* Item 2
230          \+ Item 3
231
232        Some math:  \$\`\\sqrt{3x\-1}\+(1\+x)\^2\`\$
233
234        HTML entity: \ 
235        "#;
236
237        assert_eq!(MarkdownEscaped(input).to_string(), expected);
238    }
239
240    #[test]
241    fn test_markdown_inline_code() {
242        assert_eq!(MarkdownInlineCode(" ").to_string(), "` `");
243        assert_eq!(MarkdownInlineCode("text").to_string(), "`text`");
244        assert_eq!(MarkdownInlineCode("text ").to_string(), "`text `");
245        assert_eq!(MarkdownInlineCode(" text ").to_string(), "`  text  `");
246        assert_eq!(MarkdownInlineCode("`").to_string(), "`` ` ``");
247        assert_eq!(MarkdownInlineCode("``").to_string(), "``` `` ```");
248        assert_eq!(MarkdownInlineCode("`text`").to_string(), "`` `text` ``");
249        assert_eq!(
250            MarkdownInlineCode("some `text` no leading or trailing backticks").to_string(),
251            "``some `text` no leading or trailing backticks``"
252        );
253    }
254
255    #[test]
256    fn test_count_max_consecutive_chars() {
257        assert_eq!(
258            count_max_consecutive_chars("``a```b``", '`'),
259            3,
260            "the highest seen consecutive segment of backticks counts"
261        );
262        assert_eq!(
263            count_max_consecutive_chars("```a``b`", '`'),
264            3,
265            "it can't be downgraded later"
266        );
267    }
268}