1use gpui::SharedString;
2use linkify::LinkFinder;
3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
4use pulldown_cmark::{
5 Alignment, CowStr, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser,
6};
7use std::{ops::Range, sync::Arc};
8
9use collections::HashSet;
10
11use crate::path_range::PathWithRange;
12
13const PARSE_OPTIONS: Options = Options::ENABLE_TABLES
14 .union(Options::ENABLE_FOOTNOTES)
15 .union(Options::ENABLE_STRIKETHROUGH)
16 .union(Options::ENABLE_TASKLISTS)
17 .union(Options::ENABLE_SMART_PUNCTUATION)
18 .union(Options::ENABLE_HEADING_ATTRIBUTES)
19 .union(Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS)
20 .union(Options::ENABLE_OLD_FOOTNOTES)
21 .union(Options::ENABLE_GFM);
22
23pub fn parse_markdown(
24 text: &str,
25) -> (
26 Vec<(Range<usize>, MarkdownEvent)>,
27 HashSet<SharedString>,
28 HashSet<Arc<str>>,
29) {
30 let mut events = Vec::new();
31 let mut language_names = HashSet::default();
32 let mut language_paths = HashSet::default();
33 let mut within_link = false;
34 let mut within_metadata = false;
35 let mut parser = Parser::new_ext(text, PARSE_OPTIONS)
36 .into_offset_iter()
37 .peekable();
38 while let Some((pulldown_event, range)) = parser.next() {
39 if within_metadata {
40 if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
41 pulldown_event
42 {
43 within_metadata = false;
44 }
45 continue;
46 }
47 match pulldown_event {
48 pulldown_cmark::Event::Start(tag) => {
49 let tag = match tag {
50 pulldown_cmark::Tag::Link {
51 link_type,
52 dest_url,
53 title,
54 id,
55 } => {
56 within_link = true;
57 MarkdownTag::Link {
58 link_type,
59 dest_url: SharedString::from(dest_url.into_string()),
60 title: SharedString::from(title.into_string()),
61 id: SharedString::from(id.into_string()),
62 }
63 }
64 pulldown_cmark::Tag::MetadataBlock(kind) => {
65 within_metadata = true;
66 MarkdownTag::MetadataBlock(kind)
67 }
68 pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Indented) => {
69 MarkdownTag::CodeBlock {
70 kind: CodeBlockKind::Indented,
71 metadata: CodeBlockMetadata {
72 content_range: range.clone(),
73 line_count: 1,
74 },
75 }
76 }
77 pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(
78 ref info,
79 )) => {
80 let content_range = extract_code_block_content_range(&text[range.clone()]);
81 let content_range =
82 content_range.start + range.start..content_range.end + range.start;
83
84 // Valid to use bytes since multi-byte UTF-8 doesn't use ASCII chars.
85 let line_count = text[content_range.clone()]
86 .bytes()
87 .filter(|c| *c == b'\n')
88 .count();
89 let metadata = CodeBlockMetadata {
90 content_range,
91 line_count,
92 };
93
94 let info = info.trim();
95 let kind = if info.is_empty() {
96 CodeBlockKind::Fenced
97 // Languages should never contain a slash, and PathRanges always should.
98 // (Models are told to specify them relative to a workspace root.)
99 } else if info.contains('/') {
100 let path_range = PathWithRange::new(info);
101 language_paths.insert(path_range.path.clone());
102 CodeBlockKind::FencedSrc(path_range)
103 } else {
104 let language = SharedString::from(info.to_string());
105 language_names.insert(language.clone());
106 CodeBlockKind::FencedLang(language)
107 };
108
109 MarkdownTag::CodeBlock { kind, metadata }
110 }
111 pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
112 pulldown_cmark::Tag::Heading {
113 level,
114 id,
115 classes,
116 attrs,
117 } => {
118 let id = id.map(|id| SharedString::from(id.into_string()));
119 let classes = classes
120 .into_iter()
121 .map(|c| SharedString::from(c.into_string()))
122 .collect();
123 let attrs = attrs
124 .into_iter()
125 .map(|(key, value)| {
126 (
127 SharedString::from(key.into_string()),
128 value.map(|v| SharedString::from(v.into_string())),
129 )
130 })
131 .collect();
132 MarkdownTag::Heading {
133 level,
134 id,
135 classes,
136 attrs,
137 }
138 }
139 pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
140 pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
141 pulldown_cmark::Tag::Item => MarkdownTag::Item,
142 pulldown_cmark::Tag::FootnoteDefinition(label) => {
143 MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
144 }
145 pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
146 pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
147 pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
148 pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
149 pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
150 pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
151 pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
152 pulldown_cmark::Tag::Image {
153 link_type,
154 dest_url,
155 title,
156 id,
157 } => MarkdownTag::Image {
158 link_type,
159 dest_url: SharedString::from(dest_url.into_string()),
160 title: SharedString::from(title.into_string()),
161 id: SharedString::from(id.into_string()),
162 },
163 pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
164 pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
165 pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
166 pulldown_cmark::Tag::DefinitionListDefinition => {
167 MarkdownTag::DefinitionListDefinition
168 }
169 };
170 events.push((range, MarkdownEvent::Start(tag)))
171 }
172 pulldown_cmark::Event::End(tag) => {
173 if let pulldown_cmark::TagEnd::Link = tag {
174 within_link = false;
175 }
176 events.push((range, MarkdownEvent::End(tag)));
177 }
178 pulldown_cmark::Event::Text(parsed) => {
179 fn event_for(
180 text: &str,
181 range: Range<usize>,
182 str: &str,
183 ) -> (Range<usize>, MarkdownEvent) {
184 if str == &text[range.clone()] {
185 (range, MarkdownEvent::Text)
186 } else {
187 (range, MarkdownEvent::SubstitutedText(str.to_owned()))
188 }
189 }
190 #[derive(Debug)]
191 struct TextRange<'a> {
192 source_range: Range<usize>,
193 merged_range: Range<usize>,
194 parsed: CowStr<'a>,
195 }
196
197 let mut last_len = parsed.len();
198 let mut ranges = vec![TextRange {
199 source_range: range.clone(),
200 merged_range: 0..last_len,
201 parsed,
202 }];
203
204 while matches!(parser.peek(), Some((pulldown_cmark::Event::Text(_), _))) {
205 let Some((pulldown_cmark::Event::Text(next_event), next_range)) = parser.next()
206 else {
207 unreachable!()
208 };
209 let next_len = last_len + next_event.len();
210 ranges.push(TextRange {
211 source_range: next_range.clone(),
212 merged_range: last_len..next_len,
213 parsed: next_event,
214 });
215 last_len = next_len;
216 }
217
218 let mut merged_text =
219 String::with_capacity(ranges.last().unwrap().merged_range.end);
220 for range in &ranges {
221 merged_text.push_str(&range.parsed);
222 }
223
224 let mut ranges = ranges.into_iter().peekable();
225
226 if !within_link {
227 let mut finder = LinkFinder::new();
228 finder.kinds(&[linkify::LinkKind::Url]);
229
230 // Find links in the merged text
231 for link in finder.links(&merged_text) {
232 let link_start_in_merged = link.start();
233 let link_end_in_merged = link.end();
234
235 while ranges
236 .peek()
237 .is_some_and(|range| range.merged_range.end <= link_start_in_merged)
238 {
239 let range = ranges.next().unwrap();
240 events.push(event_for(text, range.source_range, &range.parsed));
241 }
242
243 let Some(range) = ranges.peek_mut() else {
244 continue;
245 };
246 let prefix_len = link_start_in_merged - range.merged_range.start;
247 if prefix_len > 0 {
248 let (head, tail) = range.parsed.split_at(prefix_len);
249 events.push(event_for(
250 text,
251 range.source_range.start..range.source_range.start + prefix_len,
252 head,
253 ));
254 range.parsed = CowStr::Boxed(tail.into());
255 range.merged_range.start += prefix_len;
256 range.source_range.start += prefix_len;
257 }
258
259 let link_start_in_source = range.source_range.start;
260 let mut link_end_in_source = range.source_range.end;
261 let mut link_events = Vec::new();
262
263 while ranges
264 .peek()
265 .is_some_and(|range| range.merged_range.end <= link_end_in_merged)
266 {
267 let range = ranges.next().unwrap();
268 link_end_in_source = range.source_range.end;
269 link_events.push(event_for(text, range.source_range, &range.parsed));
270 }
271
272 if let Some(range) = ranges.peek_mut() {
273 let prefix_len = link_end_in_merged - range.merged_range.start;
274 if prefix_len > 0 {
275 let (head, tail) = range.parsed.split_at(prefix_len);
276 link_events.push(event_for(
277 text,
278 range.source_range.start..range.source_range.start + prefix_len,
279 head,
280 ));
281 range.parsed = CowStr::Boxed(tail.into());
282 range.merged_range.start += prefix_len;
283 range.source_range.start += prefix_len;
284 link_end_in_source = range.source_range.start;
285 }
286 }
287 let link_range = link_start_in_source..link_end_in_source;
288
289 events.push((
290 link_range.clone(),
291 MarkdownEvent::Start(MarkdownTag::Link {
292 link_type: LinkType::Autolink,
293 dest_url: SharedString::from(link.as_str().to_string()),
294 title: SharedString::default(),
295 id: SharedString::default(),
296 }),
297 ));
298 events.extend(link_events);
299 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
300 }
301 }
302
303 for range in ranges {
304 events.push(event_for(text, range.source_range, &range.parsed));
305 }
306 }
307 pulldown_cmark::Event::Code(_) => {
308 let content_range = extract_code_content_range(&text[range.clone()]);
309 let content_range =
310 content_range.start + range.start..content_range.end + range.start;
311 events.push((content_range, MarkdownEvent::Code))
312 }
313 pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
314 pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
315 pulldown_cmark::Event::FootnoteReference(_) => {
316 events.push((range, MarkdownEvent::FootnoteReference))
317 }
318 pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
319 pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
320 pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
321 pulldown_cmark::Event::TaskListMarker(checked) => {
322 events.push((range, MarkdownEvent::TaskListMarker(checked)))
323 }
324 pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
325 }
326 }
327 (events, language_names, language_paths)
328}
329
330pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
331 let mut events = Vec::new();
332 let mut finder = LinkFinder::new();
333 finder.kinds(&[linkify::LinkKind::Url]);
334 let mut text_range = Range {
335 start: 0,
336 end: text.len(),
337 };
338 for link in finder.links(text) {
339 let link_range = link.start()..link.end();
340
341 if link_range.start > text_range.start {
342 events.push((text_range.start..link_range.start, MarkdownEvent::Text));
343 }
344
345 events.push((
346 link_range.clone(),
347 MarkdownEvent::Start(MarkdownTag::Link {
348 link_type: LinkType::Autolink,
349 dest_url: SharedString::from(link.as_str().to_string()),
350 title: SharedString::default(),
351 id: SharedString::default(),
352 }),
353 ));
354 events.push((link_range.clone(), MarkdownEvent::Text));
355 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
356
357 text_range.start = link_range.end;
358 }
359
360 if text_range.end > text_range.start {
361 events.push((text_range, MarkdownEvent::Text));
362 }
363
364 events
365}
366
367/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
368/// parse result for rendering without resorting to unsafe lifetime coercion.
369#[derive(Clone, Debug, PartialEq)]
370pub enum MarkdownEvent {
371 /// Start of a tagged element. Events that are yielded after this event
372 /// and before its corresponding `End` event are inside this element.
373 /// Start and end events are guaranteed to be balanced.
374 Start(MarkdownTag),
375 /// End of a tagged element.
376 End(MarkdownTagEnd),
377 /// Text that uses the associated range from the markdown source.
378 Text,
379 /// Text that differs from the markdown source - typically due to substitution of HTML entities
380 /// and smart punctuation.
381 SubstitutedText(String),
382 /// An inline code node.
383 Code,
384 /// An HTML node.
385 Html,
386 /// An inline HTML node.
387 InlineHtml,
388 /// A reference to a footnote with given label, which may or may not be defined
389 /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
390 /// occur in any order.
391 FootnoteReference,
392 /// A soft line break.
393 SoftBreak,
394 /// A hard line break.
395 HardBreak,
396 /// A horizontal ruler.
397 Rule,
398 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
399 TaskListMarker(bool),
400}
401
402/// Tags for elements that can contain other elements.
403#[derive(Clone, Debug, PartialEq)]
404pub enum MarkdownTag {
405 /// A paragraph of text and other inline elements.
406 Paragraph,
407
408 /// A heading, with optional identifier, classes and custom attributes.
409 /// The identifier is prefixed with `#` and the last one in the attributes
410 /// list is chosen, classes are prefixed with `.` and custom attributes
411 /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
412 Heading {
413 level: HeadingLevel,
414 id: Option<SharedString>,
415 classes: Vec<SharedString>,
416 /// The first item of the tuple is the attr and second one the value.
417 attrs: Vec<(SharedString, Option<SharedString>)>,
418 },
419
420 BlockQuote,
421
422 /// A code block.
423 CodeBlock {
424 kind: CodeBlockKind,
425 metadata: CodeBlockMetadata,
426 },
427
428 /// A HTML block.
429 HtmlBlock,
430
431 /// A list. If the list is ordered the field indicates the number of the first item.
432 /// Contains only list items.
433 List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
434
435 /// A list item.
436 Item,
437
438 /// A footnote definition. The value contained is the footnote's label by which it can
439 /// be referred to.
440 FootnoteDefinition(SharedString),
441
442 /// A table. Contains a vector describing the text-alignment for each of its columns.
443 Table(Vec<Alignment>),
444
445 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
446 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
447 TableHead,
448
449 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
450 TableRow,
451 TableCell,
452
453 // span-level tags
454 Emphasis,
455 Strong,
456 Strikethrough,
457
458 /// A link.
459 Link {
460 link_type: LinkType,
461 dest_url: SharedString,
462 title: SharedString,
463 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
464 id: SharedString,
465 },
466
467 /// An image. The first field is the link type, the second the destination URL and the third is a title,
468 /// the fourth is the link identifier.
469 Image {
470 link_type: LinkType,
471 dest_url: SharedString,
472 title: SharedString,
473 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
474 id: SharedString,
475 },
476
477 /// A metadata block.
478 MetadataBlock(MetadataBlockKind),
479
480 DefinitionList,
481 DefinitionListTitle,
482 DefinitionListDefinition,
483}
484
485#[derive(Clone, Debug, PartialEq)]
486pub enum CodeBlockKind {
487 Indented,
488 /// "Fenced" means "surrounded by triple backticks."
489 /// There can optionally be either a language after the backticks (like in traditional Markdown)
490 /// or, if an agent is specifying a path for a source location in the project, it can be a PathRange,
491 /// e.g. ```path/to/foo.rs#L123-456 instead of ```rust
492 Fenced,
493 FencedLang(SharedString),
494 FencedSrc(PathWithRange),
495}
496
497#[derive(Default, Clone, Debug, PartialEq)]
498pub struct CodeBlockMetadata {
499 pub content_range: Range<usize>,
500 pub line_count: usize,
501}
502
503fn extract_code_content_range(text: &str) -> Range<usize> {
504 let text_len = text.len();
505 if text_len == 0 {
506 return 0..0;
507 }
508
509 let start_ticks = text.chars().take_while(|&c| c == '`').count();
510
511 if start_ticks == 0 || start_ticks > text_len {
512 return 0..text_len;
513 }
514
515 let end_ticks = text.chars().rev().take_while(|&c| c == '`').count();
516
517 if end_ticks != start_ticks || text_len < start_ticks + end_ticks {
518 return 0..text_len;
519 }
520
521 start_ticks..text_len - end_ticks
522}
523
524pub(crate) fn extract_code_block_content_range(text: &str) -> Range<usize> {
525 let mut range = 0..text.len();
526 if text.starts_with("```") {
527 range.start += 3;
528
529 if let Some(newline_ix) = text[range.clone()].find('\n') {
530 range.start += newline_ix + 1;
531 }
532 }
533
534 if !range.is_empty() && text.ends_with("```") {
535 range.end -= 3;
536 }
537 if range.start > range.end {
538 range.end = range.start;
539 }
540 range
541}
542
543#[cfg(test)]
544mod tests {
545 use super::MarkdownEvent::*;
546 use super::MarkdownTag::*;
547 use super::*;
548
549 const UNWANTED_OPTIONS: Options = Options::ENABLE_YAML_STYLE_METADATA_BLOCKS
550 .union(Options::ENABLE_MATH)
551 .union(Options::ENABLE_DEFINITION_LIST);
552
553 #[test]
554 fn all_options_considered() {
555 // The purpose of this is to fail when new options are added to pulldown_cmark, so that they
556 // can be evaluated for inclusion.
557 assert_eq!(PARSE_OPTIONS.union(UNWANTED_OPTIONS), Options::all());
558 }
559
560 #[test]
561 fn wanted_and_unwanted_options_disjoint() {
562 assert_eq!(
563 PARSE_OPTIONS.intersection(UNWANTED_OPTIONS),
564 Options::empty()
565 );
566 }
567
568 #[test]
569 fn test_html_comments() {
570 assert_eq!(
571 parse_markdown(" <!--\nrdoc-file=string.c\n-->\nReturns"),
572 (
573 vec![
574 (2..30, Start(HtmlBlock)),
575 (2..2, SubstitutedText(" ".into())),
576 (2..7, Html),
577 (7..26, Html),
578 (26..30, Html),
579 (2..30, End(MarkdownTagEnd::HtmlBlock)),
580 (30..37, Start(Paragraph)),
581 (30..37, Text),
582 (30..37, End(MarkdownTagEnd::Paragraph))
583 ],
584 HashSet::default(),
585 HashSet::default()
586 )
587 )
588 }
589
590 #[test]
591 fn test_plain_urls_and_escaped_text() {
592 assert_eq!(
593 parse_markdown(" https://some.url some \\`►\\` text"),
594 (
595 vec![
596 (0..51, Start(Paragraph)),
597 (0..6, SubstitutedText("\u{a0}".into())),
598 (6..12, SubstitutedText("\u{a0}".into())),
599 (12..13, Text),
600 (
601 13..29,
602 Start(Link {
603 link_type: LinkType::Autolink,
604 dest_url: "https://some.url".into(),
605 title: "".into(),
606 id: "".into(),
607 })
608 ),
609 (13..29, Text),
610 (13..29, End(MarkdownTagEnd::Link)),
611 (29..35, Text),
612 (36..37, Text), // Escaped backtick
613 (37..44, SubstitutedText("►".into())),
614 (45..46, Text), // Escaped backtick
615 (46..51, Text),
616 (0..51, End(MarkdownTagEnd::Paragraph))
617 ],
618 HashSet::default(),
619 HashSet::default()
620 )
621 );
622 }
623
624 #[test]
625 fn test_incomplete_link() {
626 assert_eq!(
627 parse_markdown("You can use the [GitHub Search API](https://docs.github.com/en").0,
628 vec![
629 (0..62, Start(Paragraph)),
630 (0..16, Text),
631 (16..17, Text),
632 (17..34, Text),
633 (34..35, Text),
634 (35..36, Text),
635 (
636 36..62,
637 Start(Link {
638 link_type: LinkType::Autolink,
639 dest_url: "https://docs.github.com/en".into(),
640 title: "".into(),
641 id: "".into()
642 })
643 ),
644 (36..62, Text),
645 (36..62, End(MarkdownTagEnd::Link)),
646 (0..62, End(MarkdownTagEnd::Paragraph))
647 ],
648 );
649 }
650
651 #[test]
652 fn test_smart_punctuation() {
653 assert_eq!(
654 parse_markdown("-- --- ... \"double quoted\" 'single quoted' ----------"),
655 (
656 vec![
657 (0..53, Start(Paragraph)),
658 (0..2, SubstitutedText("–".into())),
659 (2..3, Text),
660 (3..6, SubstitutedText("—".into())),
661 (6..7, Text),
662 (7..10, SubstitutedText("…".into())),
663 (10..11, Text),
664 (11..12, SubstitutedText("“".into())),
665 (12..25, Text),
666 (25..26, SubstitutedText("”".into())),
667 (26..27, Text),
668 (27..28, SubstitutedText("‘".into())),
669 (28..41, Text),
670 (41..42, SubstitutedText("’".into())),
671 (42..43, Text),
672 (43..53, SubstitutedText("–––––".into())),
673 (0..53, End(MarkdownTagEnd::Paragraph))
674 ],
675 HashSet::default(),
676 HashSet::default()
677 )
678 )
679 }
680
681 #[test]
682 fn test_code_block_metadata() {
683 assert_eq!(
684 parse_markdown("```rust\nfn main() {\n let a = 1;\n}\n```"),
685 (
686 vec![
687 (
688 0..37,
689 Start(CodeBlock {
690 kind: CodeBlockKind::FencedLang("rust".into()),
691 metadata: CodeBlockMetadata {
692 content_range: 8..34,
693 line_count: 3
694 }
695 })
696 ),
697 (8..34, Text),
698 (0..37, End(MarkdownTagEnd::CodeBlock)),
699 ],
700 {
701 let mut h = HashSet::default();
702 h.insert("rust".into());
703 h
704 },
705 HashSet::default()
706 )
707 );
708 assert_eq!(
709 parse_markdown(" fn main() {}"),
710 (
711 vec![
712 (
713 4..16,
714 Start(CodeBlock {
715 kind: CodeBlockKind::Indented,
716 metadata: CodeBlockMetadata {
717 content_range: 4..16,
718 line_count: 1
719 }
720 })
721 ),
722 (4..16, Text),
723 (4..16, End(MarkdownTagEnd::CodeBlock))
724 ],
725 HashSet::default(),
726 HashSet::default()
727 )
728 );
729 }
730
731 #[test]
732 fn test_extract_code_content_range() {
733 let input = "```let x = 5;```";
734 assert_eq!(extract_code_content_range(input), 3..13);
735
736 let input = "``let x = 5;``";
737 assert_eq!(extract_code_content_range(input), 2..12);
738
739 let input = "`let x = 5;`";
740 assert_eq!(extract_code_content_range(input), 1..11);
741
742 let input = "plain text";
743 assert_eq!(extract_code_content_range(input), 0..10);
744
745 let input = "``let x = 5;`";
746 assert_eq!(extract_code_content_range(input), 0..13);
747 }
748
749 #[test]
750 fn test_extract_code_block_content_range() {
751 let input = "```rust\nlet x = 5;\n```";
752 assert_eq!(extract_code_block_content_range(input), 8..19);
753
754 let input = "plain text";
755 assert_eq!(extract_code_block_content_range(input), 0..10);
756
757 let input = "```python\nprint('hello')\nprint('world')\n```";
758 assert_eq!(extract_code_block_content_range(input), 10..40);
759
760 // Malformed input
761 let input = "`````";
762 assert_eq!(extract_code_block_content_range(input), 3..3);
763 }
764
765 #[test]
766 fn test_links_split_across_fragments() {
767 // This test verifies that links split across multiple text fragments due to escaping or other issues
768 // are correctly detected and processed
769 // Note: In real usage, pulldown_cmark creates separate text events for the escaped character
770 // We're verifying our parser can handle this correctly
771 assert_eq!(
772 parse_markdown("https:/\\/example.com is equivalent to https://example.com!").0,
773 vec![
774 (0..62, Start(Paragraph)),
775 (
776 0..20,
777 Start(Link {
778 link_type: LinkType::Autolink,
779 dest_url: "https://example.com".into(),
780 title: "".into(),
781 id: "".into()
782 })
783 ),
784 (0..7, Text),
785 (8..20, Text),
786 (0..20, End(MarkdownTagEnd::Link)),
787 (20..38, Text),
788 (
789 38..61,
790 Start(Link {
791 link_type: LinkType::Autolink,
792 dest_url: "https://example.com".into(),
793 title: "".into(),
794 id: "".into()
795 })
796 ),
797 (38..53, Text),
798 (53..58, SubstitutedText(".".into())),
799 (58..61, Text),
800 (38..61, End(MarkdownTagEnd::Link)),
801 (61..62, Text),
802 (0..62, End(MarkdownTagEnd::Paragraph))
803 ],
804 );
805
806 assert_eq!(
807 parse_markdown("Visit https://example.com/cat\\/é‍☕ for coffee!").0,
808 [
809 (0..55, Start(Paragraph)),
810 (0..6, Text),
811 (
812 6..43,
813 Start(Link {
814 link_type: LinkType::Autolink,
815 dest_url: "https://example.com/cat/é\u{200d}☕".into(),
816 title: "".into(),
817 id: "".into()
818 })
819 ),
820 (6..29, Text),
821 (30..33, Text),
822 (33..40, SubstitutedText("\u{200d}".into())),
823 (40..43, Text),
824 (6..43, End(MarkdownTagEnd::Link)),
825 (43..55, Text),
826 (0..55, End(MarkdownTagEnd::Paragraph))
827 ]
828 );
829 }
830}