1use std::fmt::{Display, Formatter};
2
3/// Markdown text.
4#[derive(Debug, Clone)]
5pub struct MarkdownString(pub String);
6
7impl Display for MarkdownString {
8 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
9 write!(f, "{}", self.0)
10 }
11}
12
13impl MarkdownString {
14 /// Escapes markdown special characters in markdown text blocks. Markdown code blocks follow
15 /// different rules and `MarkdownString::inline_code` or `MarkdownString::code_block` should be
16 /// used in that case.
17 ///
18 /// Also escapes the following markdown extensions:
19 ///
20 /// * `^` for superscripts
21 /// * `$` for inline math
22 /// * `~` for strikethrough
23 ///
24 /// Escape of some characters is unnecessary, because while they are involved in markdown syntax,
25 /// the other characters involved are escaped:
26 ///
27 /// * `!`, `]`, `(`, and `)` are used in link syntax, but `[` is escaped so these are parsed as
28 /// plaintext.
29 ///
30 /// * `;` is used in HTML entity syntax, but `&` is escaped, so they are parsed as plaintext.
31 ///
32 /// TODO: There is one escape this doesn't do currently. Period after numbers at the start of the
33 /// line (`[0-9]*\.`) should also be escaped to avoid it being interpreted as a list item.
34 pub fn escape(text: &str) -> Self {
35 let mut chunks = Vec::new();
36 let mut start_of_unescaped = None;
37 for (ix, c) in text.char_indices() {
38 match c {
39 // Always escaped.
40 '\\' | '`' | '*' | '_' | '[' | '^' | '$' | '~' | '&' |
41 // TODO: these only need to be escaped when they are the first non-whitespace
42 // character of the line of a block. There should probably be both an `escape_block`
43 // which does this and an `escape_inline` method which does not escape these.
44 '#' | '+' | '=' | '-' => {
45 match start_of_unescaped {
46 None => {}
47 Some(start_of_unescaped) => {
48 chunks.push(&text[start_of_unescaped..ix]);
49 }
50 }
51 chunks.push("\\");
52 // Can include this char in the "unescaped" text since a
53 // backslash was just emitted.
54 start_of_unescaped = Some(ix);
55 }
56 // Escaped since `<` is used in opening HTML tags. `<` is used since Markdown
57 // supports HTML entities, and this allows the text to be used directly in HTML.
58 '<' => {
59 match start_of_unescaped {
60 None => {}
61 Some(start_of_unescaped) => {
62 chunks.push(&text[start_of_unescaped..ix]);
63 }
64 }
65 chunks.push("<");
66 start_of_unescaped = None;
67 }
68 // Escaped since `>` is used for blockquotes. `>` is used since Markdown supports
69 // HTML entities, and this allows the text to be used directly in HTML.
70 '>' => {
71 match start_of_unescaped {
72 None => {}
73 Some(start_of_unescaped) => {
74 chunks.push(&text[start_of_unescaped..ix]);
75 }
76 }
77 chunks.push("gt;");
78 start_of_unescaped = None;
79 }
80 _ => {
81 if start_of_unescaped.is_none() {
82 start_of_unescaped = Some(ix);
83 }
84 }
85 }
86 }
87 if let Some(start_of_unescaped) = start_of_unescaped {
88 chunks.push(&text[start_of_unescaped..])
89 }
90 Self(chunks.concat())
91 }
92
93 /// Returns markdown for inline code (wrapped in backticks), handling code that contains backticks
94 /// and spaces. All whitespace is treated as a single space character. For text that does not
95 /// contain whitespace other than ' ', this escaping roundtrips through pulldown-cmark.
96 ///
97 /// When used in tables, `|` should be escaped like `\|` in the text provided to this function.
98 pub fn inline_code(text: &str) -> Self {
99 // Apache License 2.0, same as this crate.
100 //
101 // Copied from `pulldown-cmark-to-cmark-20.0.0` with modifications:
102 //
103 // * Handling of all whitespace. pulldown-cmark-to-cmark is anticipating
104 // `Code` events parsed by pulldown-cmark.
105 //
106 // * Direct return of string.
107 //
108 // https://github.com/Byron/pulldown-cmark-to-cmark/blob/3c850de2d3d1d79f19ca5f375e1089a653cf3ff7/src/lib.rs#L290
109
110 let mut all_whitespace = true;
111 let text = text
112 .chars()
113 .map(|c| {
114 if c.is_whitespace() {
115 ' '
116 } else {
117 all_whitespace = false;
118 c
119 }
120 })
121 .collect::<String>();
122
123 // When inline code has leading and trailing ' ' characters, additional space is needed
124 // to escape it, unless all characters are space.
125 if all_whitespace {
126 Self(format!("`{text}`"))
127 } else {
128 // More backticks are needed to delimit the inline code than the maximum number of
129 // backticks in a consecutive run.
130 let backticks = "`".repeat(count_max_consecutive_chars(&text, '`') + 1);
131 let space = match text.as_bytes() {
132 &[b'`', ..] | &[.., b'`'] => " ", // Space needed to separate backtick.
133 &[b' ', .., b' '] => " ", // Space needed to escape inner space.
134 _ => "", // No space needed.
135 };
136 Self(format!("{backticks}{space}{text}{space}{backticks}"))
137 }
138 }
139
140 /// Returns markdown for code blocks, wrapped in 3 or more backticks as needed.
141 pub fn code_block(tag: &str, text: &str) -> Self {
142 let backticks = "`".repeat(3.max(count_max_consecutive_chars(text, '`') + 1));
143 Self(format!("{backticks}{tag}\n{text}\n{backticks}\n"))
144 }
145}
146
147// Copied from `pulldown-cmark-to-cmark-20.0.0` with changed names.
148// https://github.com/Byron/pulldown-cmark-to-cmark/blob/3c850de2d3d1d79f19ca5f375e1089a653cf3ff7/src/lib.rs#L1063
149// Apache License 2.0, same as this code.
150fn count_max_consecutive_chars(text: &str, search: char) -> usize {
151 let mut in_search_chars = false;
152 let mut max_count = 0;
153 let mut cur_count = 0;
154
155 for ch in text.chars() {
156 if ch == search {
157 cur_count += 1;
158 in_search_chars = true;
159 } else if in_search_chars {
160 max_count = max_count.max(cur_count);
161 cur_count = 0;
162 in_search_chars = false;
163 }
164 }
165 max_count.max(cur_count)
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171
172 #[test]
173 fn test_markdown_string_escape() {
174 let input = r#"
175 # Heading
176
177 Another heading
178 ===
179
180 Another heading variant
181 ---
182
183 Paragraph with [link](https://example.com) and `code`, *emphasis*, and ~strikethrough~.
184
185 ```
186 code block
187 ```
188
189 List with varying leaders:
190 - Item 1
191 * Item 2
192 + Item 3
193
194 Some math: $`\sqrt{3x-1}+(1+x)^2`$
195
196 HTML entity:
197 "#;
198
199 let expected = r#"
200 \# Heading
201
202 Another heading
203 \=\=\=
204
205 Another heading variant
206 \-\-\-
207
208 Paragraph with \[link](https://example.com) and \`code\`, \*emphasis\*, and \~strikethrough\~.
209
210 \`\`\`
211 code block
212 \`\`\`
213
214 List with varying leaders:
215 \- Item 1
216 \* Item 2
217 \+ Item 3
218
219 Some math: \$\`\\sqrt{3x\-1}\+(1\+x)\^2\`\$
220
221 HTML entity: \
222 "#;
223
224 assert_eq!(MarkdownString::escape(input).0, expected);
225 }
226
227 #[test]
228 fn test_markdown_string_inline_code() {
229 assert_eq!(MarkdownString::inline_code(" ").0, "` `");
230 assert_eq!(MarkdownString::inline_code("text").0, "`text`");
231 assert_eq!(MarkdownString::inline_code("text ").0, "`text `");
232 assert_eq!(MarkdownString::inline_code(" text ").0, "` text `");
233 assert_eq!(MarkdownString::inline_code("`").0, "`` ` ``");
234 assert_eq!(MarkdownString::inline_code("``").0, "``` `` ```");
235 assert_eq!(MarkdownString::inline_code("`text`").0, "`` `text` ``");
236 assert_eq!(
237 MarkdownString::inline_code("some `text` no leading or trailing backticks").0,
238 "``some `text` no leading or trailing backticks``"
239 );
240 }
241
242 #[test]
243 fn test_count_max_consecutive_chars() {
244 assert_eq!(
245 count_max_consecutive_chars("``a```b``", '`'),
246 3,
247 "the highest seen consecutive segment of backticks counts"
248 );
249 assert_eq!(
250 count_max_consecutive_chars("```a``b`", '`'),
251 3,
252 "it can't be downgraded later"
253 );
254 }
255}