parser.rs

  1use gpui::SharedString;
  2use linkify::LinkFinder;
  3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
  4use pulldown_cmark::{
  5    Alignment, CowStr, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser,
  6};
  7use std::{collections::HashSet, ops::Range, path::Path, sync::Arc};
  8
  9use crate::path_range::PathWithRange;
 10
 11const PARSE_OPTIONS: Options = Options::ENABLE_TABLES
 12    .union(Options::ENABLE_FOOTNOTES)
 13    .union(Options::ENABLE_STRIKETHROUGH)
 14    .union(Options::ENABLE_TASKLISTS)
 15    .union(Options::ENABLE_SMART_PUNCTUATION)
 16    .union(Options::ENABLE_HEADING_ATTRIBUTES)
 17    .union(Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS)
 18    .union(Options::ENABLE_OLD_FOOTNOTES)
 19    .union(Options::ENABLE_GFM);
 20
 21pub fn parse_markdown(
 22    text: &str,
 23) -> (
 24    Vec<(Range<usize>, MarkdownEvent)>,
 25    HashSet<SharedString>,
 26    HashSet<Arc<Path>>,
 27) {
 28    let mut events = Vec::new();
 29    let mut language_names = HashSet::new();
 30    let mut language_paths = HashSet::new();
 31    let mut within_link = false;
 32    let mut within_metadata = false;
 33    let mut parser = Parser::new_ext(text, PARSE_OPTIONS)
 34        .into_offset_iter()
 35        .peekable();
 36    while let Some((pulldown_event, mut range)) = parser.next() {
 37        if within_metadata {
 38            if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
 39                pulldown_event
 40            {
 41                within_metadata = false;
 42            }
 43            continue;
 44        }
 45        match pulldown_event {
 46            pulldown_cmark::Event::Start(tag) => {
 47                let tag = match tag {
 48                    pulldown_cmark::Tag::Link {
 49                        link_type,
 50                        dest_url,
 51                        title,
 52                        id,
 53                    } => {
 54                        within_link = true;
 55                        MarkdownTag::Link {
 56                            link_type,
 57                            dest_url: SharedString::from(dest_url.into_string()),
 58                            title: SharedString::from(title.into_string()),
 59                            id: SharedString::from(id.into_string()),
 60                        }
 61                    }
 62                    pulldown_cmark::Tag::MetadataBlock(kind) => {
 63                        within_metadata = true;
 64                        MarkdownTag::MetadataBlock(kind)
 65                    }
 66                    pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Indented) => {
 67                        MarkdownTag::CodeBlock {
 68                            kind: CodeBlockKind::Indented,
 69                            metadata: CodeBlockMetadata {
 70                                content_range: range.start + 1..range.end + 1,
 71                                line_count: 1,
 72                            },
 73                        }
 74                    }
 75                    pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(
 76                        ref info,
 77                    )) => {
 78                        let content_range = extract_code_block_content_range(&text[range.clone()]);
 79                        let content_range =
 80                            content_range.start + range.start..content_range.end + range.start;
 81
 82                        let line_count = text[content_range.clone()]
 83                            .bytes()
 84                            .filter(|c| *c == b'\n')
 85                            .count();
 86                        let metadata = CodeBlockMetadata {
 87                            content_range,
 88                            line_count,
 89                        };
 90
 91                        let info = info.trim();
 92                        let kind = if info.is_empty() {
 93                            CodeBlockKind::Fenced
 94                            // Languages should never contain a slash, and PathRanges always should.
 95                            // (Models are told to specify them relative to a workspace root.)
 96                        } else if info.contains('/') {
 97                            let path_range = PathWithRange::new(info);
 98                            language_paths.insert(path_range.path.clone());
 99                            CodeBlockKind::FencedSrc(path_range)
100                        } else {
101                            let language = SharedString::from(info.to_string());
102                            language_names.insert(language.clone());
103                            CodeBlockKind::FencedLang(language)
104                        };
105
106                        MarkdownTag::CodeBlock { kind, metadata }
107                    }
108                    pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
109                    pulldown_cmark::Tag::Heading {
110                        level,
111                        id,
112                        classes,
113                        attrs,
114                    } => {
115                        let id = id.map(|id| SharedString::from(id.into_string()));
116                        let classes = classes
117                            .into_iter()
118                            .map(|c| SharedString::from(c.into_string()))
119                            .collect();
120                        let attrs = attrs
121                            .into_iter()
122                            .map(|(key, value)| {
123                                (
124                                    SharedString::from(key.into_string()),
125                                    value.map(|v| SharedString::from(v.into_string())),
126                                )
127                            })
128                            .collect();
129                        MarkdownTag::Heading {
130                            level,
131                            id,
132                            classes,
133                            attrs,
134                        }
135                    }
136                    pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
137                    pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
138                    pulldown_cmark::Tag::Item => MarkdownTag::Item,
139                    pulldown_cmark::Tag::FootnoteDefinition(label) => {
140                        MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
141                    }
142                    pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
143                    pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
144                    pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
145                    pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
146                    pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
147                    pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
148                    pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
149                    pulldown_cmark::Tag::Image {
150                        link_type,
151                        dest_url,
152                        title,
153                        id,
154                    } => MarkdownTag::Image {
155                        link_type,
156                        dest_url: SharedString::from(dest_url.into_string()),
157                        title: SharedString::from(title.into_string()),
158                        id: SharedString::from(id.into_string()),
159                    },
160                    pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
161                    pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
162                    pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
163                    pulldown_cmark::Tag::DefinitionListDefinition => {
164                        MarkdownTag::DefinitionListDefinition
165                    }
166                };
167                events.push((range, MarkdownEvent::Start(tag)))
168            }
169            pulldown_cmark::Event::End(tag) => {
170                if let pulldown_cmark::TagEnd::Link = tag {
171                    within_link = false;
172                }
173                events.push((range, MarkdownEvent::End(tag)));
174            }
175            pulldown_cmark::Event::Text(parsed) => {
176                fn event_for(
177                    text: &str,
178                    range: Range<usize>,
179                    str: &str,
180                ) -> (Range<usize>, MarkdownEvent) {
181                    if str == &text[range.clone()] {
182                        (range, MarkdownEvent::Text)
183                    } else {
184                        (range, MarkdownEvent::SubstitutedText(str.to_owned()))
185                    }
186                }
187                #[derive(Debug)]
188                struct TextRange<'a> {
189                    source_range: Range<usize>,
190                    merged_range: Range<usize>,
191                    parsed: CowStr<'a>,
192                }
193
194                let mut last_len = parsed.len();
195                let mut ranges = vec![TextRange {
196                    source_range: range.clone(),
197                    merged_range: 0..last_len,
198                    parsed,
199                }];
200
201                while matches!(parser.peek(), Some((pulldown_cmark::Event::Text(_), _))) {
202                    let Some((pulldown_cmark::Event::Text(next_event), next_range)) = parser.next()
203                    else {
204                        unreachable!()
205                    };
206                    let next_len = last_len + next_event.len();
207                    ranges.push(TextRange {
208                        source_range: next_range.clone(),
209                        merged_range: last_len..next_len,
210                        parsed: next_event,
211                    });
212                    last_len = next_len;
213                }
214
215                let mut merged_text =
216                    String::with_capacity(ranges.last().unwrap().merged_range.end);
217                for range in &ranges {
218                    merged_text.push_str(&range.parsed);
219                }
220
221                let mut ranges = ranges.into_iter().peekable();
222
223                if !within_link {
224                    let mut finder = LinkFinder::new();
225                    finder.kinds(&[linkify::LinkKind::Url]);
226
227                    // Find links in the merged text
228                    for link in finder.links(&merged_text) {
229                        let link_start_in_merged = link.start();
230                        let link_end_in_merged = link.end();
231
232                        while ranges
233                            .peek()
234                            .is_some_and(|range| range.merged_range.end <= link_start_in_merged)
235                        {
236                            let range = ranges.next().unwrap();
237                            events.push(event_for(text, range.source_range, &range.parsed));
238                        }
239
240                        let Some(range) = ranges.peek_mut() else {
241                            continue;
242                        };
243                        let prefix_len = link_start_in_merged - range.merged_range.start;
244                        if prefix_len > 0 {
245                            let (head, tail) = range.parsed.split_at(prefix_len);
246                            events.push(event_for(
247                                text,
248                                range.source_range.start..range.source_range.start + prefix_len,
249                                &head,
250                            ));
251                            range.parsed = CowStr::Boxed(tail.into());
252                            range.merged_range.start += prefix_len;
253                            range.source_range.start += prefix_len;
254                        }
255
256                        let link_start_in_source = range.source_range.start;
257                        let mut link_end_in_source = range.source_range.end;
258                        let mut link_events = Vec::new();
259
260                        while ranges
261                            .peek()
262                            .is_some_and(|range| range.merged_range.end <= link_end_in_merged)
263                        {
264                            let range = ranges.next().unwrap();
265                            link_end_in_source = range.source_range.end;
266                            link_events.push(event_for(text, range.source_range, &range.parsed));
267                        }
268
269                        if let Some(range) = ranges.peek_mut() {
270                            let prefix_len = link_end_in_merged - range.merged_range.start;
271                            if prefix_len > 0 {
272                                let (head, tail) = range.parsed.split_at(prefix_len);
273                                link_events.push(event_for(
274                                    text,
275                                    range.source_range.start..range.source_range.start + prefix_len,
276                                    head,
277                                ));
278                                range.parsed = CowStr::Boxed(tail.into());
279                                range.merged_range.start += prefix_len;
280                                range.source_range.start += prefix_len;
281                                link_end_in_source = range.source_range.start;
282                            }
283                        }
284                        let link_range = link_start_in_source..link_end_in_source;
285
286                        events.push((
287                            link_range.clone(),
288                            MarkdownEvent::Start(MarkdownTag::Link {
289                                link_type: LinkType::Autolink,
290                                dest_url: SharedString::from(link.as_str().to_string()),
291                                title: SharedString::default(),
292                                id: SharedString::default(),
293                            }),
294                        ));
295                        events.extend(link_events);
296                        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
297                    }
298                }
299
300                for range in ranges {
301                    events.push(event_for(text, range.source_range, &range.parsed));
302                }
303            }
304            pulldown_cmark::Event::Code(_) => {
305                range.start += 1;
306                range.end -= 1;
307                events.push((range, MarkdownEvent::Code))
308            }
309            pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
310            pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
311            pulldown_cmark::Event::FootnoteReference(_) => {
312                events.push((range, MarkdownEvent::FootnoteReference))
313            }
314            pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
315            pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
316            pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
317            pulldown_cmark::Event::TaskListMarker(checked) => {
318                events.push((range, MarkdownEvent::TaskListMarker(checked)))
319            }
320            pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
321        }
322    }
323    (events, language_names, language_paths)
324}
325
326pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
327    let mut events = Vec::new();
328    let mut finder = LinkFinder::new();
329    finder.kinds(&[linkify::LinkKind::Url]);
330    let mut text_range = Range {
331        start: 0,
332        end: text.len(),
333    };
334    for link in finder.links(text) {
335        let link_range = link.start()..link.end();
336
337        if link_range.start > text_range.start {
338            events.push((text_range.start..link_range.start, MarkdownEvent::Text));
339        }
340
341        events.push((
342            link_range.clone(),
343            MarkdownEvent::Start(MarkdownTag::Link {
344                link_type: LinkType::Autolink,
345                dest_url: SharedString::from(link.as_str().to_string()),
346                title: SharedString::default(),
347                id: SharedString::default(),
348            }),
349        ));
350        events.push((link_range.clone(), MarkdownEvent::Text));
351        events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
352
353        text_range.start = link_range.end;
354    }
355
356    if text_range.end > text_range.start {
357        events.push((text_range, MarkdownEvent::Text));
358    }
359
360    events
361}
362
363/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
364/// parse result for rendering without resorting to unsafe lifetime coercion.
365#[derive(Clone, Debug, PartialEq)]
366pub enum MarkdownEvent {
367    /// Start of a tagged element. Events that are yielded after this event
368    /// and before its corresponding `End` event are inside this element.
369    /// Start and end events are guaranteed to be balanced.
370    Start(MarkdownTag),
371    /// End of a tagged element.
372    End(MarkdownTagEnd),
373    /// Text that uses the associated range from the markdown source.
374    Text,
375    /// Text that differs from the markdown source - typically due to substitution of HTML entities
376    /// and smart punctuation.
377    SubstitutedText(String),
378    /// An inline code node.
379    Code,
380    /// An HTML node.
381    Html,
382    /// An inline HTML node.
383    InlineHtml,
384    /// A reference to a footnote with given label, which may or may not be defined
385    /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
386    /// occur in any order.
387    FootnoteReference,
388    /// A soft line break.
389    SoftBreak,
390    /// A hard line break.
391    HardBreak,
392    /// A horizontal ruler.
393    Rule,
394    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
395    TaskListMarker(bool),
396}
397
398/// Tags for elements that can contain other elements.
399#[derive(Clone, Debug, PartialEq)]
400pub enum MarkdownTag {
401    /// A paragraph of text and other inline elements.
402    Paragraph,
403
404    /// A heading, with optional identifier, classes and custom attributes.
405    /// The identifier is prefixed with `#` and the last one in the attributes
406    /// list is chosen, classes are prefixed with `.` and custom attributes
407    /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
408    Heading {
409        level: HeadingLevel,
410        id: Option<SharedString>,
411        classes: Vec<SharedString>,
412        /// The first item of the tuple is the attr and second one the value.
413        attrs: Vec<(SharedString, Option<SharedString>)>,
414    },
415
416    BlockQuote,
417
418    /// A code block.
419    CodeBlock {
420        kind: CodeBlockKind,
421        metadata: CodeBlockMetadata,
422    },
423
424    /// A HTML block.
425    HtmlBlock,
426
427    /// A list. If the list is ordered the field indicates the number of the first item.
428    /// Contains only list items.
429    List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
430
431    /// A list item.
432    Item,
433
434    /// A footnote definition. The value contained is the footnote's label by which it can
435    /// be referred to.
436    FootnoteDefinition(SharedString),
437
438    /// A table. Contains a vector describing the text-alignment for each of its columns.
439    Table(Vec<Alignment>),
440
441    /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
442    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
443    TableHead,
444
445    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
446    TableRow,
447    TableCell,
448
449    // span-level tags
450    Emphasis,
451    Strong,
452    Strikethrough,
453
454    /// A link.
455    Link {
456        link_type: LinkType,
457        dest_url: SharedString,
458        title: SharedString,
459        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
460        id: SharedString,
461    },
462
463    /// An image. The first field is the link type, the second the destination URL and the third is a title,
464    /// the fourth is the link identifier.
465    Image {
466        link_type: LinkType,
467        dest_url: SharedString,
468        title: SharedString,
469        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
470        id: SharedString,
471    },
472
473    /// A metadata block.
474    MetadataBlock(MetadataBlockKind),
475
476    DefinitionList,
477    DefinitionListTitle,
478    DefinitionListDefinition,
479}
480
481#[derive(Clone, Debug, PartialEq)]
482pub enum CodeBlockKind {
483    Indented,
484    /// "Fenced" means "surrounded by triple backticks."
485    /// There can optionally be either a language after the backticks (like in traditional Markdown)
486    /// or, if an agent is specifying a path for a source location in the project, it can be a PathRange,
487    /// e.g. ```path/to/foo.rs#L123-456 instead of ```rust
488    Fenced,
489    FencedLang(SharedString),
490    FencedSrc(PathWithRange),
491}
492
493#[derive(Default, Clone, Debug, PartialEq)]
494pub struct CodeBlockMetadata {
495    pub content_range: Range<usize>,
496    pub line_count: usize,
497}
498
499pub(crate) fn extract_code_block_content_range(text: &str) -> Range<usize> {
500    let mut range = 0..text.len();
501    if text.starts_with("```") {
502        range.start += 3;
503
504        if let Some(newline_ix) = text[range.clone()].find('\n') {
505            range.start += newline_ix + 1;
506        }
507    }
508
509    if !range.is_empty() && text.ends_with("```") {
510        range.end -= 3;
511    }
512    range
513}
514
515#[cfg(test)]
516mod tests {
517    use super::MarkdownEvent::*;
518    use super::MarkdownTag::*;
519    use super::*;
520
521    const UNWANTED_OPTIONS: Options = Options::ENABLE_YAML_STYLE_METADATA_BLOCKS
522        .union(Options::ENABLE_MATH)
523        .union(Options::ENABLE_DEFINITION_LIST);
524
525    #[test]
526    fn all_options_considered() {
527        // The purpose of this is to fail when new options are added to pulldown_cmark, so that they
528        // can be evaluated for inclusion.
529        assert_eq!(PARSE_OPTIONS.union(UNWANTED_OPTIONS), Options::all());
530    }
531
532    #[test]
533    fn wanted_and_unwanted_options_disjoint() {
534        assert_eq!(
535            PARSE_OPTIONS.intersection(UNWANTED_OPTIONS),
536            Options::empty()
537        );
538    }
539
540    #[test]
541    fn test_plain_urls_and_escaped_text() {
542        assert_eq!(
543            parse_markdown("&nbsp;&nbsp; https://some.url some \\`&#9658;\\` text"),
544            (
545                vec![
546                    (0..51, Start(Paragraph)),
547                    (0..6, SubstitutedText("\u{a0}".into())),
548                    (6..12, SubstitutedText("\u{a0}".into())),
549                    (12..13, Text),
550                    (
551                        13..29,
552                        Start(Link {
553                            link_type: LinkType::Autolink,
554                            dest_url: "https://some.url".into(),
555                            title: "".into(),
556                            id: "".into(),
557                        })
558                    ),
559                    (13..29, Text),
560                    (13..29, End(MarkdownTagEnd::Link)),
561                    (29..35, Text),
562                    (36..37, Text), // Escaped backtick
563                    (37..44, SubstitutedText("".into())),
564                    (45..46, Text), // Escaped backtick
565                    (46..51, Text),
566                    (0..51, End(MarkdownTagEnd::Paragraph))
567                ],
568                HashSet::new(),
569                HashSet::new()
570            )
571        );
572    }
573
574    #[test]
575    fn test_incomplete_link() {
576        assert_eq!(
577            parse_markdown("You can use the [GitHub Search API](https://docs.github.com/en").0,
578            vec![
579                (0..62, Start(Paragraph)),
580                (0..16, Text),
581                (16..17, Text),
582                (17..34, Text),
583                (34..35, Text),
584                (35..36, Text),
585                (
586                    36..62,
587                    Start(Link {
588                        link_type: LinkType::Autolink,
589                        dest_url: "https://docs.github.com/en".into(),
590                        title: "".into(),
591                        id: "".into()
592                    })
593                ),
594                (36..62, Text),
595                (36..62, End(MarkdownTagEnd::Link)),
596                (0..62, End(MarkdownTagEnd::Paragraph))
597            ],
598        );
599    }
600
601    #[test]
602    fn test_smart_punctuation() {
603        assert_eq!(
604            parse_markdown("-- --- ... \"double quoted\" 'single quoted' ----------"),
605            (
606                vec![
607                    (0..53, Start(Paragraph)),
608                    (0..2, SubstitutedText("".into())),
609                    (2..3, Text),
610                    (3..6, SubstitutedText("".into())),
611                    (6..7, Text),
612                    (7..10, SubstitutedText("".into())),
613                    (10..11, Text),
614                    (11..12, SubstitutedText("".into())),
615                    (12..25, Text),
616                    (25..26, SubstitutedText("".into())),
617                    (26..27, Text),
618                    (27..28, SubstitutedText("".into())),
619                    (28..41, Text),
620                    (41..42, SubstitutedText("".into())),
621                    (42..43, Text),
622                    (43..53, SubstitutedText("–––––".into())),
623                    (0..53, End(MarkdownTagEnd::Paragraph))
624                ],
625                HashSet::new(),
626                HashSet::new()
627            )
628        )
629    }
630
631    #[test]
632    fn test_code_block_metadata() {
633        assert_eq!(
634            parse_markdown("```rust\nfn main() {\n let a = 1;\n}\n```"),
635            (
636                vec![
637                    (
638                        0..37,
639                        Start(CodeBlock {
640                            kind: CodeBlockKind::FencedLang("rust".into()),
641                            metadata: CodeBlockMetadata {
642                                content_range: 8..34,
643                                line_count: 3
644                            }
645                        })
646                    ),
647                    (8..34, Text),
648                    (0..37, End(MarkdownTagEnd::CodeBlock)),
649                ],
650                HashSet::from(["rust".into()]),
651                HashSet::new()
652            )
653        )
654    }
655
656    #[test]
657    fn test_extract_code_block_content_range() {
658        let input = "```rust\nlet x = 5;\n```";
659        assert_eq!(extract_code_block_content_range(input), 8..19);
660
661        let input = "plain text";
662        assert_eq!(extract_code_block_content_range(input), 0..10);
663
664        let input = "```python\nprint('hello')\nprint('world')\n```";
665        assert_eq!(extract_code_block_content_range(input), 10..40);
666    }
667
668    #[test]
669    fn test_links_split_across_fragments() {
670        // This test verifies that links split across multiple text fragments due to escaping or other issues
671        // are correctly detected and processed
672        // Note: In real usage, pulldown_cmark creates separate text events for the escaped character
673        // We're verifying our parser can handle this correctly
674        assert_eq!(
675            parse_markdown("https:/\\/example.com is equivalent to https://example&#46;com!").0,
676            vec![
677                (0..62, Start(Paragraph)),
678                (
679                    0..20,
680                    Start(Link {
681                        link_type: LinkType::Autolink,
682                        dest_url: "https://example.com".into(),
683                        title: "".into(),
684                        id: "".into()
685                    })
686                ),
687                (0..7, Text),
688                (8..20, Text),
689                (0..20, End(MarkdownTagEnd::Link)),
690                (20..38, Text),
691                (
692                    38..61,
693                    Start(Link {
694                        link_type: LinkType::Autolink,
695                        dest_url: "https://example.com".into(),
696                        title: "".into(),
697                        id: "".into()
698                    })
699                ),
700                (38..53, Text),
701                (53..58, SubstitutedText(".".into())),
702                (58..61, Text),
703                (38..61, End(MarkdownTagEnd::Link)),
704                (61..62, Text),
705                (0..62, End(MarkdownTagEnd::Paragraph))
706            ],
707        );
708
709        assert_eq!(
710            parse_markdown("Visit https://example.com/cat\\/é&#8205;☕ for coffee!").0,
711            [
712                (0..55, Start(Paragraph)),
713                (0..6, Text),
714                (
715                    6..43,
716                    Start(Link {
717                        link_type: LinkType::Autolink,
718                        dest_url: "https://example.com/cat/é\u{200d}".into(),
719                        title: "".into(),
720                        id: "".into()
721                    })
722                ),
723                (6..29, Text),
724                (30..33, Text),
725                (33..40, SubstitutedText("\u{200d}".into())),
726                (40..43, Text),
727                (6..43, End(MarkdownTagEnd::Link)),
728                (43..55, Text),
729                (0..55, End(MarkdownTagEnd::Paragraph))
730            ]
731        );
732    }
733}