1use std::fmt::{Display, Formatter};
2
3/// Markdown text.
4#[derive(Debug, Clone)]
5pub struct MarkdownString(pub String);
6
7impl Display for MarkdownString {
8 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
9 write!(f, "{}", self.0)
10 }
11}
12
13impl MarkdownString {
14 /// Escapes markdown special characters.
15 ///
16 /// Also escapes the following markdown extensions:
17 ///
18 /// * `^` for superscripts
19 /// * `$` for inline math
20 /// * `~` for strikethrough
21 ///
22 /// Escape of some characters is unnecessary, because while they are involved in markdown syntax,
23 /// the other characters involved are escaped:
24 ///
25 /// * `!`, `]`, `(`, and `)` are used in link syntax, but `[` is escaped so these are parsed as
26 /// plaintext.
27 ///
28 /// * `;` is used in HTML entity syntax, but `&` is escaped, so they are parsed as plaintext.
29 ///
30 /// TODO: There is one escape this doesn't do currently. Period after numbers at the start of the
31 /// line (`[0-9]*\.`) should also be escaped to avoid it being interpreted as a list item.
32 pub fn escape(text: &str) -> Self {
33 let mut chunks = Vec::new();
34 let mut start_of_unescaped = None;
35 for (ix, c) in text.char_indices() {
36 match c {
37 // Always escaped.
38 '\\' | '`' | '*' | '_' | '[' | '^' | '$' | '~' | '&' |
39 // TODO: these only need to be escaped when they are the first non-whitespace
40 // character of the line of a block. There should probably be both an `escape_block`
41 // which does this and an `escape_inline` method which does not escape these.
42 '#' | '+' | '=' | '-' => {
43 match start_of_unescaped {
44 None => {}
45 Some(start_of_unescaped) => {
46 chunks.push(&text[start_of_unescaped..ix]);
47 }
48 }
49 chunks.push("\\");
50 // Can include this char in the "unescaped" text since a
51 // backslash was just emitted.
52 start_of_unescaped = Some(ix);
53 }
54 // Escaped since `<` is used in opening HTML tags. `<` is used since Markdown
55 // supports HTML entities, and this allows the text to be used directly in HTML.
56 '<' => {
57 match start_of_unescaped {
58 None => {}
59 Some(start_of_unescaped) => {
60 chunks.push(&text[start_of_unescaped..ix]);
61 }
62 }
63 chunks.push("<");
64 start_of_unescaped = None;
65 }
66 // Escaped since `>` is used for blockquotes. `>` is used since Markdown supports
67 // HTML entities, and this allows the text to be used directly in HTML.
68 '>' => {
69 match start_of_unescaped {
70 None => {}
71 Some(start_of_unescaped) => {
72 chunks.push(&text[start_of_unescaped..ix]);
73 }
74 }
75 chunks.push("gt;");
76 start_of_unescaped = None;
77 }
78 _ => {
79 if start_of_unescaped.is_none() {
80 start_of_unescaped = Some(ix);
81 }
82 }
83 }
84 }
85 if let Some(start_of_unescaped) = start_of_unescaped {
86 chunks.push(&text[start_of_unescaped..])
87 }
88 Self(chunks.concat())
89 }
90
91 /// Returns markdown for inline code (wrapped in backticks), handling code that contains backticks
92 /// and spaces. All whitespace is treated as a single space character. For text that does not
93 /// contain whitespace other than ' ', this escaping roundtrips through pulldown-cmark.
94 ///
95 /// When used in tables, `|` should be escaped like `\|` in the text provided to this function.
96 pub fn inline_code(text: &str) -> Self {
97 // Apache License 2.0, same as this crate.
98 //
99 // Copied from `pulldown-cmark-to-cmark-20.0.0` with modifications:
100 //
101 // * Handling of all whitespace. pulldown-cmark-to-cmark is anticipating
102 // `Code` events parsed by pulldown-cmark.
103 //
104 // * Direct return of string.
105 //
106 // https://github.com/Byron/pulldown-cmark-to-cmark/blob/3c850de2d3d1d79f19ca5f375e1089a653cf3ff7/src/lib.rs#L290
107
108 let mut all_whitespace = true;
109 let text = text
110 .chars()
111 .map(|c| {
112 if c.is_whitespace() {
113 ' '
114 } else {
115 all_whitespace = false;
116 c
117 }
118 })
119 .collect::<String>();
120
121 // When inline code has leading and trailing ' ' characters, additional space is needed
122 // to escape it, unless all characters are space.
123 if all_whitespace {
124 Self(format!("`{text}`"))
125 } else {
126 // More backticks are needed to delimit the inline code than the maximum number of
127 // backticks in a consecutive run.
128 let backticks = "`".repeat(count_max_consecutive_chars(&text, '`') + 1);
129 let space = match text.as_bytes() {
130 &[b'`', ..] | &[.., b'`'] => " ", // Space needed to separate backtick.
131 &[b' ', .., b' '] => " ", // Space needed to escape inner space.
132 _ => "", // No space needed.
133 };
134 Self(format!("{backticks}{space}{text}{space}{backticks}"))
135 }
136 }
137}
138
139// Copied from `pulldown-cmark-to-cmark-20.0.0` with changed names.
140// https://github.com/Byron/pulldown-cmark-to-cmark/blob/3c850de2d3d1d79f19ca5f375e1089a653cf3ff7/src/lib.rs#L1063
141// Apache License 2.0, same as this code.
142fn count_max_consecutive_chars(text: &str, search: char) -> usize {
143 let mut in_search_chars = false;
144 let mut max_count = 0;
145 let mut cur_count = 0;
146
147 for ch in text.chars() {
148 if ch == search {
149 cur_count += 1;
150 in_search_chars = true;
151 } else if in_search_chars {
152 max_count = max_count.max(cur_count);
153 cur_count = 0;
154 in_search_chars = false;
155 }
156 }
157 max_count.max(cur_count)
158}
159
160#[cfg(test)]
161mod tests {
162 use super::*;
163
164 #[test]
165 fn test_markdown_string_escape() {
166 let input = r#"
167 # Heading
168
169 Another heading
170 ===
171
172 Another heading variant
173 ---
174
175 Paragraph with [link](https://example.com) and `code`, *emphasis*, and ~strikethrough~.
176
177 ```
178 code block
179 ```
180
181 List with varying leaders:
182 - Item 1
183 * Item 2
184 + Item 3
185
186 Some math: $`\sqrt{3x-1}+(1+x)^2`$
187
188 HTML entity:
189 "#;
190
191 let expected = r#"
192 \# Heading
193
194 Another heading
195 \=\=\=
196
197 Another heading variant
198 \-\-\-
199
200 Paragraph with \[link](https://example.com) and \`code\`, \*emphasis\*, and \~strikethrough\~.
201
202 \`\`\`
203 code block
204 \`\`\`
205
206 List with varying leaders:
207 \- Item 1
208 \* Item 2
209 \+ Item 3
210
211 Some math: \$\`\\sqrt{3x\-1}\+(1\+x)\^2\`\$
212
213 HTML entity: \
214 "#;
215
216 assert_eq!(MarkdownString::escape(input).0, expected);
217 }
218
219 #[test]
220 fn test_markdown_string_inline_code() {
221 assert_eq!(MarkdownString::inline_code(" ").0, "` `");
222 assert_eq!(MarkdownString::inline_code("text").0, "`text`");
223 assert_eq!(MarkdownString::inline_code("text ").0, "`text `");
224 assert_eq!(MarkdownString::inline_code(" text ").0, "` text `");
225 assert_eq!(MarkdownString::inline_code("`").0, "`` ` ``");
226 assert_eq!(MarkdownString::inline_code("``").0, "``` `` ```");
227 assert_eq!(MarkdownString::inline_code("`text`").0, "`` `text` ``");
228 assert_eq!(
229 MarkdownString::inline_code("some `text` no leading or trailing backticks").0,
230 "``some `text` no leading or trailing backticks``"
231 );
232 }
233
234 #[test]
235 fn test_count_max_consecutive_chars() {
236 assert_eq!(
237 count_max_consecutive_chars("``a```b``", '`'),
238 3,
239 "the highest seen consecutive segment of backticks counts"
240 );
241 assert_eq!(
242 count_max_consecutive_chars("```a``b`", '`'),
243 3,
244 "it can't be downgraded later"
245 );
246 }
247}