1use gpui::SharedString;
2use linkify::LinkFinder;
3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
4use pulldown_cmark::{
5 Alignment, CowStr, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser,
6};
7use std::{collections::HashSet, ops::Range, path::Path, sync::Arc};
8
9use crate::path_range::PathWithRange;
10
11const PARSE_OPTIONS: Options = Options::ENABLE_TABLES
12 .union(Options::ENABLE_FOOTNOTES)
13 .union(Options::ENABLE_STRIKETHROUGH)
14 .union(Options::ENABLE_TASKLISTS)
15 .union(Options::ENABLE_SMART_PUNCTUATION)
16 .union(Options::ENABLE_HEADING_ATTRIBUTES)
17 .union(Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS)
18 .union(Options::ENABLE_OLD_FOOTNOTES)
19 .union(Options::ENABLE_GFM);
20
21pub fn parse_markdown(
22 text: &str,
23) -> (
24 Vec<(Range<usize>, MarkdownEvent)>,
25 HashSet<SharedString>,
26 HashSet<Arc<Path>>,
27) {
28 let mut events = Vec::new();
29 let mut language_names = HashSet::new();
30 let mut language_paths = HashSet::new();
31 let mut within_link = false;
32 let mut within_metadata = false;
33 let mut parser = Parser::new_ext(text, PARSE_OPTIONS)
34 .into_offset_iter()
35 .peekable();
36 while let Some((pulldown_event, range)) = parser.next() {
37 if within_metadata {
38 if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
39 pulldown_event
40 {
41 within_metadata = false;
42 }
43 continue;
44 }
45 match pulldown_event {
46 pulldown_cmark::Event::Start(tag) => {
47 let tag = match tag {
48 pulldown_cmark::Tag::Link {
49 link_type,
50 dest_url,
51 title,
52 id,
53 } => {
54 within_link = true;
55 MarkdownTag::Link {
56 link_type,
57 dest_url: SharedString::from(dest_url.into_string()),
58 title: SharedString::from(title.into_string()),
59 id: SharedString::from(id.into_string()),
60 }
61 }
62 pulldown_cmark::Tag::MetadataBlock(kind) => {
63 within_metadata = true;
64 MarkdownTag::MetadataBlock(kind)
65 }
66 pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Indented) => {
67 MarkdownTag::CodeBlock {
68 kind: CodeBlockKind::Indented,
69 metadata: CodeBlockMetadata {
70 content_range: range.clone(),
71 line_count: 1,
72 },
73 }
74 }
75 pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(
76 ref info,
77 )) => {
78 let content_range = extract_code_block_content_range(&text[range.clone()]);
79 let content_range =
80 content_range.start + range.start..content_range.end + range.start;
81
82 // Valid to use bytes since multi-byte UTF-8 doesn't use ASCII chars.
83 let line_count = text[content_range.clone()]
84 .bytes()
85 .filter(|c| *c == b'\n')
86 .count();
87 let metadata = CodeBlockMetadata {
88 content_range,
89 line_count,
90 };
91
92 let info = info.trim();
93 let kind = if info.is_empty() {
94 CodeBlockKind::Fenced
95 // Languages should never contain a slash, and PathRanges always should.
96 // (Models are told to specify them relative to a workspace root.)
97 } else if info.contains('/') {
98 let path_range = PathWithRange::new(info);
99 language_paths.insert(path_range.path.clone());
100 CodeBlockKind::FencedSrc(path_range)
101 } else {
102 let language = SharedString::from(info.to_string());
103 language_names.insert(language.clone());
104 CodeBlockKind::FencedLang(language)
105 };
106
107 MarkdownTag::CodeBlock { kind, metadata }
108 }
109 pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
110 pulldown_cmark::Tag::Heading {
111 level,
112 id,
113 classes,
114 attrs,
115 } => {
116 let id = id.map(|id| SharedString::from(id.into_string()));
117 let classes = classes
118 .into_iter()
119 .map(|c| SharedString::from(c.into_string()))
120 .collect();
121 let attrs = attrs
122 .into_iter()
123 .map(|(key, value)| {
124 (
125 SharedString::from(key.into_string()),
126 value.map(|v| SharedString::from(v.into_string())),
127 )
128 })
129 .collect();
130 MarkdownTag::Heading {
131 level,
132 id,
133 classes,
134 attrs,
135 }
136 }
137 pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
138 pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
139 pulldown_cmark::Tag::Item => MarkdownTag::Item,
140 pulldown_cmark::Tag::FootnoteDefinition(label) => {
141 MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
142 }
143 pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
144 pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
145 pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
146 pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
147 pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
148 pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
149 pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
150 pulldown_cmark::Tag::Image {
151 link_type,
152 dest_url,
153 title,
154 id,
155 } => MarkdownTag::Image {
156 link_type,
157 dest_url: SharedString::from(dest_url.into_string()),
158 title: SharedString::from(title.into_string()),
159 id: SharedString::from(id.into_string()),
160 },
161 pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
162 pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
163 pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
164 pulldown_cmark::Tag::DefinitionListDefinition => {
165 MarkdownTag::DefinitionListDefinition
166 }
167 };
168 events.push((range, MarkdownEvent::Start(tag)))
169 }
170 pulldown_cmark::Event::End(tag) => {
171 if let pulldown_cmark::TagEnd::Link = tag {
172 within_link = false;
173 }
174 events.push((range, MarkdownEvent::End(tag)));
175 }
176 pulldown_cmark::Event::Text(parsed) => {
177 fn event_for(
178 text: &str,
179 range: Range<usize>,
180 str: &str,
181 ) -> (Range<usize>, MarkdownEvent) {
182 if str == &text[range.clone()] {
183 (range, MarkdownEvent::Text)
184 } else {
185 (range, MarkdownEvent::SubstitutedText(str.to_owned()))
186 }
187 }
188 #[derive(Debug)]
189 struct TextRange<'a> {
190 source_range: Range<usize>,
191 merged_range: Range<usize>,
192 parsed: CowStr<'a>,
193 }
194
195 let mut last_len = parsed.len();
196 let mut ranges = vec![TextRange {
197 source_range: range.clone(),
198 merged_range: 0..last_len,
199 parsed,
200 }];
201
202 while matches!(parser.peek(), Some((pulldown_cmark::Event::Text(_), _))) {
203 let Some((pulldown_cmark::Event::Text(next_event), next_range)) = parser.next()
204 else {
205 unreachable!()
206 };
207 let next_len = last_len + next_event.len();
208 ranges.push(TextRange {
209 source_range: next_range.clone(),
210 merged_range: last_len..next_len,
211 parsed: next_event,
212 });
213 last_len = next_len;
214 }
215
216 let mut merged_text =
217 String::with_capacity(ranges.last().unwrap().merged_range.end);
218 for range in &ranges {
219 merged_text.push_str(&range.parsed);
220 }
221
222 let mut ranges = ranges.into_iter().peekable();
223
224 if !within_link {
225 let mut finder = LinkFinder::new();
226 finder.kinds(&[linkify::LinkKind::Url]);
227
228 // Find links in the merged text
229 for link in finder.links(&merged_text) {
230 let link_start_in_merged = link.start();
231 let link_end_in_merged = link.end();
232
233 while ranges
234 .peek()
235 .is_some_and(|range| range.merged_range.end <= link_start_in_merged)
236 {
237 let range = ranges.next().unwrap();
238 events.push(event_for(text, range.source_range, &range.parsed));
239 }
240
241 let Some(range) = ranges.peek_mut() else {
242 continue;
243 };
244 let prefix_len = link_start_in_merged - range.merged_range.start;
245 if prefix_len > 0 {
246 let (head, tail) = range.parsed.split_at(prefix_len);
247 events.push(event_for(
248 text,
249 range.source_range.start..range.source_range.start + prefix_len,
250 head,
251 ));
252 range.parsed = CowStr::Boxed(tail.into());
253 range.merged_range.start += prefix_len;
254 range.source_range.start += prefix_len;
255 }
256
257 let link_start_in_source = range.source_range.start;
258 let mut link_end_in_source = range.source_range.end;
259 let mut link_events = Vec::new();
260
261 while ranges
262 .peek()
263 .is_some_and(|range| range.merged_range.end <= link_end_in_merged)
264 {
265 let range = ranges.next().unwrap();
266 link_end_in_source = range.source_range.end;
267 link_events.push(event_for(text, range.source_range, &range.parsed));
268 }
269
270 if let Some(range) = ranges.peek_mut() {
271 let prefix_len = link_end_in_merged - range.merged_range.start;
272 if prefix_len > 0 {
273 let (head, tail) = range.parsed.split_at(prefix_len);
274 link_events.push(event_for(
275 text,
276 range.source_range.start..range.source_range.start + prefix_len,
277 head,
278 ));
279 range.parsed = CowStr::Boxed(tail.into());
280 range.merged_range.start += prefix_len;
281 range.source_range.start += prefix_len;
282 link_end_in_source = range.source_range.start;
283 }
284 }
285 let link_range = link_start_in_source..link_end_in_source;
286
287 events.push((
288 link_range.clone(),
289 MarkdownEvent::Start(MarkdownTag::Link {
290 link_type: LinkType::Autolink,
291 dest_url: SharedString::from(link.as_str().to_string()),
292 title: SharedString::default(),
293 id: SharedString::default(),
294 }),
295 ));
296 events.extend(link_events);
297 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
298 }
299 }
300
301 for range in ranges {
302 events.push(event_for(text, range.source_range, &range.parsed));
303 }
304 }
305 pulldown_cmark::Event::Code(_) => {
306 let content_range = extract_code_content_range(&text[range.clone()]);
307 let content_range =
308 content_range.start + range.start..content_range.end + range.start;
309 events.push((content_range, MarkdownEvent::Code))
310 }
311 pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
312 pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
313 pulldown_cmark::Event::FootnoteReference(_) => {
314 events.push((range, MarkdownEvent::FootnoteReference))
315 }
316 pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
317 pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
318 pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
319 pulldown_cmark::Event::TaskListMarker(checked) => {
320 events.push((range, MarkdownEvent::TaskListMarker(checked)))
321 }
322 pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
323 }
324 }
325 (events, language_names, language_paths)
326}
327
328pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
329 let mut events = Vec::new();
330 let mut finder = LinkFinder::new();
331 finder.kinds(&[linkify::LinkKind::Url]);
332 let mut text_range = Range {
333 start: 0,
334 end: text.len(),
335 };
336 for link in finder.links(text) {
337 let link_range = link.start()..link.end();
338
339 if link_range.start > text_range.start {
340 events.push((text_range.start..link_range.start, MarkdownEvent::Text));
341 }
342
343 events.push((
344 link_range.clone(),
345 MarkdownEvent::Start(MarkdownTag::Link {
346 link_type: LinkType::Autolink,
347 dest_url: SharedString::from(link.as_str().to_string()),
348 title: SharedString::default(),
349 id: SharedString::default(),
350 }),
351 ));
352 events.push((link_range.clone(), MarkdownEvent::Text));
353 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
354
355 text_range.start = link_range.end;
356 }
357
358 if text_range.end > text_range.start {
359 events.push((text_range, MarkdownEvent::Text));
360 }
361
362 events
363}
364
365/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
366/// parse result for rendering without resorting to unsafe lifetime coercion.
367#[derive(Clone, Debug, PartialEq)]
368pub enum MarkdownEvent {
369 /// Start of a tagged element. Events that are yielded after this event
370 /// and before its corresponding `End` event are inside this element.
371 /// Start and end events are guaranteed to be balanced.
372 Start(MarkdownTag),
373 /// End of a tagged element.
374 End(MarkdownTagEnd),
375 /// Text that uses the associated range from the markdown source.
376 Text,
377 /// Text that differs from the markdown source - typically due to substitution of HTML entities
378 /// and smart punctuation.
379 SubstitutedText(String),
380 /// An inline code node.
381 Code,
382 /// An HTML node.
383 Html,
384 /// An inline HTML node.
385 InlineHtml,
386 /// A reference to a footnote with given label, which may or may not be defined
387 /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
388 /// occur in any order.
389 FootnoteReference,
390 /// A soft line break.
391 SoftBreak,
392 /// A hard line break.
393 HardBreak,
394 /// A horizontal ruler.
395 Rule,
396 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
397 TaskListMarker(bool),
398}
399
400/// Tags for elements that can contain other elements.
401#[derive(Clone, Debug, PartialEq)]
402pub enum MarkdownTag {
403 /// A paragraph of text and other inline elements.
404 Paragraph,
405
406 /// A heading, with optional identifier, classes and custom attributes.
407 /// The identifier is prefixed with `#` and the last one in the attributes
408 /// list is chosen, classes are prefixed with `.` and custom attributes
409 /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
410 Heading {
411 level: HeadingLevel,
412 id: Option<SharedString>,
413 classes: Vec<SharedString>,
414 /// The first item of the tuple is the attr and second one the value.
415 attrs: Vec<(SharedString, Option<SharedString>)>,
416 },
417
418 BlockQuote,
419
420 /// A code block.
421 CodeBlock {
422 kind: CodeBlockKind,
423 metadata: CodeBlockMetadata,
424 },
425
426 /// A HTML block.
427 HtmlBlock,
428
429 /// A list. If the list is ordered the field indicates the number of the first item.
430 /// Contains only list items.
431 List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
432
433 /// A list item.
434 Item,
435
436 /// A footnote definition. The value contained is the footnote's label by which it can
437 /// be referred to.
438 FootnoteDefinition(SharedString),
439
440 /// A table. Contains a vector describing the text-alignment for each of its columns.
441 Table(Vec<Alignment>),
442
443 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
444 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
445 TableHead,
446
447 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
448 TableRow,
449 TableCell,
450
451 // span-level tags
452 Emphasis,
453 Strong,
454 Strikethrough,
455
456 /// A link.
457 Link {
458 link_type: LinkType,
459 dest_url: SharedString,
460 title: SharedString,
461 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
462 id: SharedString,
463 },
464
465 /// An image. The first field is the link type, the second the destination URL and the third is a title,
466 /// the fourth is the link identifier.
467 Image {
468 link_type: LinkType,
469 dest_url: SharedString,
470 title: SharedString,
471 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
472 id: SharedString,
473 },
474
475 /// A metadata block.
476 MetadataBlock(MetadataBlockKind),
477
478 DefinitionList,
479 DefinitionListTitle,
480 DefinitionListDefinition,
481}
482
483#[derive(Clone, Debug, PartialEq)]
484pub enum CodeBlockKind {
485 Indented,
486 /// "Fenced" means "surrounded by triple backticks."
487 /// There can optionally be either a language after the backticks (like in traditional Markdown)
488 /// or, if an agent is specifying a path for a source location in the project, it can be a PathRange,
489 /// e.g. ```path/to/foo.rs#L123-456 instead of ```rust
490 Fenced,
491 FencedLang(SharedString),
492 FencedSrc(PathWithRange),
493}
494
495#[derive(Default, Clone, Debug, PartialEq)]
496pub struct CodeBlockMetadata {
497 pub content_range: Range<usize>,
498 pub line_count: usize,
499}
500
501fn extract_code_content_range(text: &str) -> Range<usize> {
502 let text_len = text.len();
503 if text_len == 0 {
504 return 0..0;
505 }
506
507 let start_ticks = text.chars().take_while(|&c| c == '`').count();
508
509 if start_ticks == 0 || start_ticks > text_len {
510 return 0..text_len;
511 }
512
513 let end_ticks = text.chars().rev().take_while(|&c| c == '`').count();
514
515 if end_ticks != start_ticks || text_len < start_ticks + end_ticks {
516 return 0..text_len;
517 }
518
519 start_ticks..text_len - end_ticks
520}
521
522pub(crate) fn extract_code_block_content_range(text: &str) -> Range<usize> {
523 let mut range = 0..text.len();
524 if text.starts_with("```") {
525 range.start += 3;
526
527 if let Some(newline_ix) = text[range.clone()].find('\n') {
528 range.start += newline_ix + 1;
529 }
530 }
531
532 if !range.is_empty() && text.ends_with("```") {
533 range.end -= 3;
534 }
535 if range.start > range.end {
536 range.end = range.start;
537 }
538 range
539}
540
541#[cfg(test)]
542mod tests {
543 use super::MarkdownEvent::*;
544 use super::MarkdownTag::*;
545 use super::*;
546
547 const UNWANTED_OPTIONS: Options = Options::ENABLE_YAML_STYLE_METADATA_BLOCKS
548 .union(Options::ENABLE_MATH)
549 .union(Options::ENABLE_DEFINITION_LIST);
550
551 #[test]
552 fn all_options_considered() {
553 // The purpose of this is to fail when new options are added to pulldown_cmark, so that they
554 // can be evaluated for inclusion.
555 assert_eq!(PARSE_OPTIONS.union(UNWANTED_OPTIONS), Options::all());
556 }
557
558 #[test]
559 fn wanted_and_unwanted_options_disjoint() {
560 assert_eq!(
561 PARSE_OPTIONS.intersection(UNWANTED_OPTIONS),
562 Options::empty()
563 );
564 }
565
566 #[test]
567 fn test_html_comments() {
568 assert_eq!(
569 parse_markdown(" <!--\nrdoc-file=string.c\n-->\nReturns"),
570 (
571 vec![
572 (2..30, Start(HtmlBlock)),
573 (2..2, SubstitutedText(" ".into())),
574 (2..7, Html),
575 (7..26, Html),
576 (26..30, Html),
577 (2..30, End(MarkdownTagEnd::HtmlBlock)),
578 (30..37, Start(Paragraph)),
579 (30..37, Text),
580 (30..37, End(MarkdownTagEnd::Paragraph))
581 ],
582 HashSet::new(),
583 HashSet::new()
584 )
585 )
586 }
587
588 #[test]
589 fn test_plain_urls_and_escaped_text() {
590 assert_eq!(
591 parse_markdown(" https://some.url some \\`►\\` text"),
592 (
593 vec![
594 (0..51, Start(Paragraph)),
595 (0..6, SubstitutedText("\u{a0}".into())),
596 (6..12, SubstitutedText("\u{a0}".into())),
597 (12..13, Text),
598 (
599 13..29,
600 Start(Link {
601 link_type: LinkType::Autolink,
602 dest_url: "https://some.url".into(),
603 title: "".into(),
604 id: "".into(),
605 })
606 ),
607 (13..29, Text),
608 (13..29, End(MarkdownTagEnd::Link)),
609 (29..35, Text),
610 (36..37, Text), // Escaped backtick
611 (37..44, SubstitutedText("►".into())),
612 (45..46, Text), // Escaped backtick
613 (46..51, Text),
614 (0..51, End(MarkdownTagEnd::Paragraph))
615 ],
616 HashSet::new(),
617 HashSet::new()
618 )
619 );
620 }
621
622 #[test]
623 fn test_incomplete_link() {
624 assert_eq!(
625 parse_markdown("You can use the [GitHub Search API](https://docs.github.com/en").0,
626 vec![
627 (0..62, Start(Paragraph)),
628 (0..16, Text),
629 (16..17, Text),
630 (17..34, Text),
631 (34..35, Text),
632 (35..36, Text),
633 (
634 36..62,
635 Start(Link {
636 link_type: LinkType::Autolink,
637 dest_url: "https://docs.github.com/en".into(),
638 title: "".into(),
639 id: "".into()
640 })
641 ),
642 (36..62, Text),
643 (36..62, End(MarkdownTagEnd::Link)),
644 (0..62, End(MarkdownTagEnd::Paragraph))
645 ],
646 );
647 }
648
649 #[test]
650 fn test_smart_punctuation() {
651 assert_eq!(
652 parse_markdown("-- --- ... \"double quoted\" 'single quoted' ----------"),
653 (
654 vec![
655 (0..53, Start(Paragraph)),
656 (0..2, SubstitutedText("–".into())),
657 (2..3, Text),
658 (3..6, SubstitutedText("—".into())),
659 (6..7, Text),
660 (7..10, SubstitutedText("…".into())),
661 (10..11, Text),
662 (11..12, SubstitutedText("“".into())),
663 (12..25, Text),
664 (25..26, SubstitutedText("”".into())),
665 (26..27, Text),
666 (27..28, SubstitutedText("‘".into())),
667 (28..41, Text),
668 (41..42, SubstitutedText("’".into())),
669 (42..43, Text),
670 (43..53, SubstitutedText("–––––".into())),
671 (0..53, End(MarkdownTagEnd::Paragraph))
672 ],
673 HashSet::new(),
674 HashSet::new()
675 )
676 )
677 }
678
679 #[test]
680 fn test_code_block_metadata() {
681 assert_eq!(
682 parse_markdown("```rust\nfn main() {\n let a = 1;\n}\n```"),
683 (
684 vec![
685 (
686 0..37,
687 Start(CodeBlock {
688 kind: CodeBlockKind::FencedLang("rust".into()),
689 metadata: CodeBlockMetadata {
690 content_range: 8..34,
691 line_count: 3
692 }
693 })
694 ),
695 (8..34, Text),
696 (0..37, End(MarkdownTagEnd::CodeBlock)),
697 ],
698 HashSet::from(["rust".into()]),
699 HashSet::new()
700 )
701 );
702 assert_eq!(
703 parse_markdown(" fn main() {}"),
704 (
705 vec![
706 (
707 4..16,
708 Start(CodeBlock {
709 kind: CodeBlockKind::Indented,
710 metadata: CodeBlockMetadata {
711 content_range: 4..16,
712 line_count: 1
713 }
714 })
715 ),
716 (4..16, Text),
717 (4..16, End(MarkdownTagEnd::CodeBlock))
718 ],
719 HashSet::new(),
720 HashSet::new()
721 )
722 );
723 }
724
725 #[test]
726 fn test_extract_code_content_range() {
727 let input = "```let x = 5;```";
728 assert_eq!(extract_code_content_range(input), 3..13);
729
730 let input = "``let x = 5;``";
731 assert_eq!(extract_code_content_range(input), 2..12);
732
733 let input = "`let x = 5;`";
734 assert_eq!(extract_code_content_range(input), 1..11);
735
736 let input = "plain text";
737 assert_eq!(extract_code_content_range(input), 0..10);
738
739 let input = "``let x = 5;`";
740 assert_eq!(extract_code_content_range(input), 0..13);
741 }
742
743 #[test]
744 fn test_extract_code_block_content_range() {
745 let input = "```rust\nlet x = 5;\n```";
746 assert_eq!(extract_code_block_content_range(input), 8..19);
747
748 let input = "plain text";
749 assert_eq!(extract_code_block_content_range(input), 0..10);
750
751 let input = "```python\nprint('hello')\nprint('world')\n```";
752 assert_eq!(extract_code_block_content_range(input), 10..40);
753
754 // Malformed input
755 let input = "`````";
756 assert_eq!(extract_code_block_content_range(input), 3..3);
757 }
758
759 #[test]
760 fn test_links_split_across_fragments() {
761 // This test verifies that links split across multiple text fragments due to escaping or other issues
762 // are correctly detected and processed
763 // Note: In real usage, pulldown_cmark creates separate text events for the escaped character
764 // We're verifying our parser can handle this correctly
765 assert_eq!(
766 parse_markdown("https:/\\/example.com is equivalent to https://example.com!").0,
767 vec![
768 (0..62, Start(Paragraph)),
769 (
770 0..20,
771 Start(Link {
772 link_type: LinkType::Autolink,
773 dest_url: "https://example.com".into(),
774 title: "".into(),
775 id: "".into()
776 })
777 ),
778 (0..7, Text),
779 (8..20, Text),
780 (0..20, End(MarkdownTagEnd::Link)),
781 (20..38, Text),
782 (
783 38..61,
784 Start(Link {
785 link_type: LinkType::Autolink,
786 dest_url: "https://example.com".into(),
787 title: "".into(),
788 id: "".into()
789 })
790 ),
791 (38..53, Text),
792 (53..58, SubstitutedText(".".into())),
793 (58..61, Text),
794 (38..61, End(MarkdownTagEnd::Link)),
795 (61..62, Text),
796 (0..62, End(MarkdownTagEnd::Paragraph))
797 ],
798 );
799
800 assert_eq!(
801 parse_markdown("Visit https://example.com/cat\\/é‍☕ for coffee!").0,
802 [
803 (0..55, Start(Paragraph)),
804 (0..6, Text),
805 (
806 6..43,
807 Start(Link {
808 link_type: LinkType::Autolink,
809 dest_url: "https://example.com/cat/é\u{200d}☕".into(),
810 title: "".into(),
811 id: "".into()
812 })
813 ),
814 (6..29, Text),
815 (30..33, Text),
816 (33..40, SubstitutedText("\u{200d}".into())),
817 (40..43, Text),
818 (6..43, End(MarkdownTagEnd::Link)),
819 (43..55, Text),
820 (0..55, End(MarkdownTagEnd::Paragraph))
821 ]
822 );
823 }
824}