1use std::fmt::{Display, Formatter};
2
3/// Generates a URL-friendly slug from heading text (e.g. "Hello World" β "hello-world").
4pub fn generate_heading_slug(text: &str) -> String {
5 text.trim()
6 .chars()
7 .filter_map(|c| {
8 if c.is_alphanumeric() || c == '-' || c == '_' {
9 Some(c.to_lowercase().next().unwrap_or(c))
10 } else if c == ' ' {
11 Some('-')
12 } else {
13 None
14 }
15 })
16 .collect()
17}
18
19/// Returns true if the URL starts with a URI scheme (RFC 3986 Β§3.1).
20fn has_uri_scheme(url: &str) -> bool {
21 let mut chars = url.chars();
22 match chars.next() {
23 Some(c) if c.is_ascii_alphabetic() => {}
24 _ => return false,
25 }
26 for c in chars {
27 if c == ':' {
28 return true;
29 }
30 if !(c.is_ascii_alphanumeric() || c == '+' || c == '-' || c == '.') {
31 return false;
32 }
33 }
34 false
35}
36
37/// Splits a relative URL into its path and `#fragment` parts.
38/// Absolute URLs are returned as-is with no fragment.
39pub fn split_local_url_fragment(url: &str) -> (&str, Option<&str>) {
40 if has_uri_scheme(url) {
41 return (url, None);
42 }
43 match url.find('#') {
44 Some(pos) => {
45 let path = &url[..pos];
46 let fragment = &url[pos + 1..];
47 (
48 path,
49 if fragment.is_empty() {
50 None
51 } else {
52 Some(fragment)
53 },
54 )
55 }
56 None => (url, None),
57 }
58}
59
60/// Indicates that the wrapped `String` is markdown text.
61#[derive(Debug, Clone)]
62pub struct MarkdownString(pub String);
63
64impl Display for MarkdownString {
65 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
66 write!(f, "{}", self.0)
67 }
68}
69
70/// Escapes markdown special characters in markdown text blocks. Markdown code blocks follow
71/// different rules and `MarkdownInlineCode` or `MarkdownCodeBlock` should be used in that case.
72///
73/// Also escapes the following markdown extensions:
74///
75/// * `^` for superscripts
76/// * `$` for inline math
77/// * `~` for strikethrough
78///
79/// Escape of some characters is unnecessary, because while they are involved in markdown syntax,
80/// the other characters involved are escaped:
81///
82/// * `!`, `]`, `(`, and `)` are used in link syntax, but `[` is escaped so these are parsed as
83/// plaintext.
84///
85/// * `;` is used in HTML entity syntax, but `&` is escaped, so they are parsed as plaintext.
86///
87/// TODO: There is one escape this doesn't do currently. Period after numbers at the start of the
88/// line (`[0-9]*\.`) should also be escaped to avoid it being interpreted as a list item.
89pub struct MarkdownEscaped<'a>(pub &'a str);
90
91/// Implements `Display` to format markdown inline code (wrapped in backticks), handling code that
92/// contains backticks and spaces. All whitespace is treated as a single space character. For text
93/// that does not contain whitespace other than ' ', this escaping roundtrips through
94/// pulldown-cmark.
95///
96/// When used in tables, `|` should be escaped like `\|` in the text provided to this function.
97pub struct MarkdownInlineCode<'a>(pub &'a str);
98
99/// Implements `Display` to format markdown code blocks, wrapped in 3 or more backticks as needed.
100pub struct MarkdownCodeBlock<'a> {
101 pub tag: &'a str,
102 pub text: &'a str,
103}
104
105impl Display for MarkdownEscaped<'_> {
106 fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
107 let mut start_of_unescaped = None;
108 for (ix, c) in self.0.char_indices() {
109 match c {
110 // Always escaped.
111 '\\' | '`' | '*' | '_' | '[' | '^' | '$' | '~' | '&' |
112 // TODO: these only need to be escaped when they are the first non-whitespace
113 // character of the line of a block. There should probably be both an `escape_block`
114 // which does this and an `escape_inline` method which does not escape these.
115 '#' | '+' | '=' | '-' => {
116 match start_of_unescaped {
117 None => {}
118 Some(start_of_unescaped) => {
119 write!(formatter, "{}", &self.0[start_of_unescaped..ix])?;
120 }
121 }
122 write!(formatter, "\\")?;
123 // Can include this char in the "unescaped" text since a
124 // backslash was just emitted.
125 start_of_unescaped = Some(ix);
126 }
127 // Escaped since `<` is used in opening HTML tags. `<` is used since Markdown
128 // supports HTML entities, and this allows the text to be used directly in HTML.
129 '<' => {
130 match start_of_unescaped {
131 None => {}
132 Some(start_of_unescaped) => {
133 write!(formatter, "{}", &self.0[start_of_unescaped..ix])?;
134 }
135 }
136 write!(formatter, "<")?;
137 start_of_unescaped = None;
138 }
139 // Escaped since `>` is used for blockquotes. `>` is used since Markdown supports
140 // HTML entities, and this allows the text to be used directly in HTML.
141 '>' => {
142 match start_of_unescaped {
143 None => {}
144 Some(start_of_unescaped) => {
145 write!(formatter, "{}", &self.0[start_of_unescaped..ix])?;
146 }
147 }
148 write!(formatter, ">")?;
149 start_of_unescaped = None;
150 }
151 _ => {
152 if start_of_unescaped.is_none() {
153 start_of_unescaped = Some(ix);
154 }
155 }
156 }
157 }
158 if let Some(start_of_unescaped) = start_of_unescaped {
159 write!(formatter, "{}", &self.0[start_of_unescaped..])?;
160 }
161 Ok(())
162 }
163}
164
165impl Display for MarkdownInlineCode<'_> {
166 fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
167 // Apache License 2.0, same as this crate.
168 //
169 // Copied from `pulldown-cmark-to-cmark-20.0.0` with modifications:
170 //
171 // * Handling of all whitespace. pulldown-cmark-to-cmark is anticipating
172 // `Code` events parsed by pulldown-cmark.
173 //
174 // https://github.com/Byron/pulldown-cmark-to-cmark/blob/3c850de2d3d1d79f19ca5f375e1089a653cf3ff7/src/lib.rs#L290
175
176 let mut all_whitespace = true;
177 let text = self
178 .0
179 .chars()
180 .map(|c| {
181 if c.is_whitespace() {
182 ' '
183 } else {
184 all_whitespace = false;
185 c
186 }
187 })
188 .collect::<String>();
189
190 // When inline code has leading and trailing ' ' characters, additional space is needed
191 // to escape it, unless all characters are space.
192 if all_whitespace {
193 write!(formatter, "`{text}`")
194 } else {
195 // More backticks are needed to delimit the inline code than the maximum number of
196 // backticks in a consecutive run.
197 let backticks = "`".repeat(count_max_consecutive_chars(&text, '`') + 1);
198 let space = match text.as_bytes() {
199 &[b'`', ..] | &[.., b'`'] => " ", // Space needed to separate backtick.
200 &[b' ', .., b' '] => " ", // Space needed to escape inner space.
201 _ => "", // No space needed.
202 };
203 write!(formatter, "{backticks}{space}{text}{space}{backticks}")
204 }
205 }
206}
207
208impl Display for MarkdownCodeBlock<'_> {
209 fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
210 let tag = self.tag;
211 let text = self.text;
212 let backticks = "`".repeat(3.max(count_max_consecutive_chars(text, '`') + 1));
213 write!(formatter, "{backticks}{tag}\n{text}\n{backticks}\n")
214 }
215}
216
217// Copied from `pulldown-cmark-to-cmark-20.0.0` with changed names.
218// https://github.com/Byron/pulldown-cmark-to-cmark/blob/3c850de2d3d1d79f19ca5f375e1089a653cf3ff7/src/lib.rs#L1063
219// Apache License 2.0, same as this code.
220fn count_max_consecutive_chars(text: &str, search: char) -> usize {
221 let mut in_search_chars = false;
222 let mut max_count = 0;
223 let mut cur_count = 0;
224
225 for ch in text.chars() {
226 if ch == search {
227 cur_count += 1;
228 in_search_chars = true;
229 } else if in_search_chars {
230 max_count = max_count.max(cur_count);
231 cur_count = 0;
232 in_search_chars = false;
233 }
234 }
235 max_count.max(cur_count)
236}
237
238#[cfg(test)]
239mod tests {
240 use super::*;
241
242 #[test]
243 fn test_markdown_escaped() {
244 let input = r#"
245 # Heading
246
247 Another heading
248 ===
249
250 Another heading variant
251 ---
252
253 Paragraph with [link](https://example.com) and `code`, *emphasis*, and ~strikethrough~.
254
255 ```
256 code block
257 ```
258
259 List with varying leaders:
260 - Item 1
261 * Item 2
262 + Item 3
263
264 Some math: $`\sqrt{3x-1}+(1+x)^2`$
265
266 HTML entity:
267 "#;
268
269 let expected = r#"
270 \# Heading
271
272 Another heading
273 \=\=\=
274
275 Another heading variant
276 \-\-\-
277
278 Paragraph with \[link](https://example.com) and \`code\`, \*emphasis\*, and \~strikethrough\~.
279
280 \`\`\`
281 code block
282 \`\`\`
283
284 List with varying leaders:
285 \- Item 1
286 \* Item 2
287 \+ Item 3
288
289 Some math: \$\`\\sqrt{3x\-1}\+(1\+x)\^2\`\$
290
291 HTML entity: \
292 "#;
293
294 assert_eq!(MarkdownEscaped(input).to_string(), expected);
295 }
296
297 #[test]
298 fn test_markdown_inline_code() {
299 assert_eq!(MarkdownInlineCode(" ").to_string(), "` `");
300 assert_eq!(MarkdownInlineCode("text").to_string(), "`text`");
301 assert_eq!(MarkdownInlineCode("text ").to_string(), "`text `");
302 assert_eq!(MarkdownInlineCode(" text ").to_string(), "` text `");
303 assert_eq!(MarkdownInlineCode("`").to_string(), "`` ` ``");
304 assert_eq!(MarkdownInlineCode("``").to_string(), "``` `` ```");
305 assert_eq!(MarkdownInlineCode("`text`").to_string(), "`` `text` ``");
306 assert_eq!(
307 MarkdownInlineCode("some `text` no leading or trailing backticks").to_string(),
308 "``some `text` no leading or trailing backticks``"
309 );
310 }
311
312 #[test]
313 fn test_count_max_consecutive_chars() {
314 assert_eq!(
315 count_max_consecutive_chars("``a```b``", '`'),
316 3,
317 "the highest seen consecutive segment of backticks counts"
318 );
319 assert_eq!(
320 count_max_consecutive_chars("```a``b`", '`'),
321 3,
322 "it can't be downgraded later"
323 );
324 }
325
326 #[test]
327 fn test_split_local_url_fragment() {
328 assert_eq!(split_local_url_fragment("#heading"), ("", Some("heading")));
329 assert_eq!(
330 split_local_url_fragment("./file.md#heading"),
331 ("./file.md", Some("heading"))
332 );
333 assert_eq!(split_local_url_fragment("./file.md"), ("./file.md", None));
334 assert_eq!(
335 split_local_url_fragment("https://example.com#frag"),
336 ("https://example.com#frag", None)
337 );
338 assert_eq!(
339 split_local_url_fragment("mailto:user@example.com"),
340 ("mailto:user@example.com", None)
341 );
342 assert_eq!(split_local_url_fragment("#"), ("", None));
343 assert_eq!(
344 split_local_url_fragment("../other.md#section"),
345 ("../other.md", Some("section"))
346 );
347 assert_eq!(
348 split_local_url_fragment("123:not-a-scheme#frag"),
349 ("123:not-a-scheme", Some("frag"))
350 );
351 }
352
353 #[test]
354 fn test_generate_heading_slug() {
355 assert_eq!(generate_heading_slug("Hello World"), "hello-world");
356 assert_eq!(generate_heading_slug("Hello World"), "hello--world");
357 assert_eq!(generate_heading_slug("Hello-World"), "hello-world");
358 assert_eq!(
359 generate_heading_slug("Some **bold** text"),
360 "some-bold-text"
361 );
362 assert_eq!(generate_heading_slug("Let's try with Γ"), "lets-try-with-ΓΌ");
363 assert_eq!(
364 generate_heading_slug("heading with 123 numbers"),
365 "heading-with-123-numbers"
366 );
367 assert_eq!(
368 generate_heading_slug("What about (parens)?"),
369 "what-about-parens"
370 );
371 assert_eq!(
372 generate_heading_slug(" leading spaces "),
373 "leading-spaces"
374 );
375 }
376}