1use gpui::SharedString;
2use linkify::LinkFinder;
3pub use pulldown_cmark::TagEnd as MarkdownTagEnd;
4use pulldown_cmark::{
5 Alignment, CowStr, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser,
6};
7use std::{ops::Range, sync::Arc};
8
9use collections::HashSet;
10
11use crate::path_range::PathWithRange;
12
13const PARSE_OPTIONS: Options = Options::ENABLE_TABLES
14 .union(Options::ENABLE_FOOTNOTES)
15 .union(Options::ENABLE_STRIKETHROUGH)
16 .union(Options::ENABLE_TASKLISTS)
17 .union(Options::ENABLE_SMART_PUNCTUATION)
18 .union(Options::ENABLE_HEADING_ATTRIBUTES)
19 .union(Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS)
20 .union(Options::ENABLE_OLD_FOOTNOTES)
21 .union(Options::ENABLE_GFM)
22 .union(Options::ENABLE_SUPERSCRIPT)
23 .union(Options::ENABLE_SUBSCRIPT);
24
25pub fn parse_markdown(
26 text: &str,
27) -> (
28 Vec<(Range<usize>, MarkdownEvent)>,
29 HashSet<SharedString>,
30 HashSet<Arc<str>>,
31) {
32 let mut events = Vec::new();
33 let mut language_names = HashSet::default();
34 let mut language_paths = HashSet::default();
35 let mut within_link = false;
36 let mut within_metadata = false;
37 let mut parser = Parser::new_ext(text, PARSE_OPTIONS)
38 .into_offset_iter()
39 .peekable();
40 while let Some((pulldown_event, range)) = parser.next() {
41 if within_metadata {
42 if let pulldown_cmark::Event::End(pulldown_cmark::TagEnd::MetadataBlock { .. }) =
43 pulldown_event
44 {
45 within_metadata = false;
46 }
47 continue;
48 }
49 match pulldown_event {
50 pulldown_cmark::Event::Start(tag) => {
51 let tag = match tag {
52 pulldown_cmark::Tag::Link {
53 link_type,
54 dest_url,
55 title,
56 id,
57 } => {
58 within_link = true;
59 MarkdownTag::Link {
60 link_type,
61 dest_url: SharedString::from(dest_url.into_string()),
62 title: SharedString::from(title.into_string()),
63 id: SharedString::from(id.into_string()),
64 }
65 }
66 pulldown_cmark::Tag::MetadataBlock(kind) => {
67 within_metadata = true;
68 MarkdownTag::MetadataBlock(kind)
69 }
70 pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Indented) => {
71 MarkdownTag::CodeBlock {
72 kind: CodeBlockKind::Indented,
73 metadata: CodeBlockMetadata {
74 content_range: range.clone(),
75 line_count: 1,
76 },
77 }
78 }
79 pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(
80 ref info,
81 )) => {
82 let content_range = extract_code_block_content_range(&text[range.clone()]);
83 let content_range =
84 content_range.start + range.start..content_range.end + range.start;
85
86 // Valid to use bytes since multi-byte UTF-8 doesn't use ASCII chars.
87 let line_count = text[content_range.clone()]
88 .bytes()
89 .filter(|c| *c == b'\n')
90 .count();
91 let metadata = CodeBlockMetadata {
92 content_range,
93 line_count,
94 };
95
96 let info = info.trim();
97 let kind = if info.is_empty() {
98 CodeBlockKind::Fenced
99 // Languages should never contain a slash, and PathRanges always should.
100 // (Models are told to specify them relative to a workspace root.)
101 } else if info.contains('/') {
102 let path_range = PathWithRange::new(info);
103 language_paths.insert(path_range.path.clone());
104 CodeBlockKind::FencedSrc(path_range)
105 } else {
106 let language = SharedString::from(info.to_string());
107 language_names.insert(language.clone());
108 CodeBlockKind::FencedLang(language)
109 };
110
111 MarkdownTag::CodeBlock { kind, metadata }
112 }
113 pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
114 pulldown_cmark::Tag::Heading {
115 level,
116 id,
117 classes,
118 attrs,
119 } => {
120 let id = id.map(|id| SharedString::from(id.into_string()));
121 let classes = classes
122 .into_iter()
123 .map(|c| SharedString::from(c.into_string()))
124 .collect();
125 let attrs = attrs
126 .into_iter()
127 .map(|(key, value)| {
128 (
129 SharedString::from(key.into_string()),
130 value.map(|v| SharedString::from(v.into_string())),
131 )
132 })
133 .collect();
134 MarkdownTag::Heading {
135 level,
136 id,
137 classes,
138 attrs,
139 }
140 }
141 pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
142 pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
143 pulldown_cmark::Tag::Item => MarkdownTag::Item,
144 pulldown_cmark::Tag::FootnoteDefinition(label) => {
145 MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
146 }
147 pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
148 pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
149 pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
150 pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
151 pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
152 pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
153 pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
154 pulldown_cmark::Tag::Superscript => MarkdownTag::Superscript,
155 pulldown_cmark::Tag::Subscript => MarkdownTag::Subscript,
156 pulldown_cmark::Tag::Image {
157 link_type,
158 dest_url,
159 title,
160 id,
161 } => MarkdownTag::Image {
162 link_type,
163 dest_url: SharedString::from(dest_url.into_string()),
164 title: SharedString::from(title.into_string()),
165 id: SharedString::from(id.into_string()),
166 },
167 pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
168 pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
169 pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
170 pulldown_cmark::Tag::DefinitionListDefinition => {
171 MarkdownTag::DefinitionListDefinition
172 }
173 };
174 events.push((range, MarkdownEvent::Start(tag)))
175 }
176 pulldown_cmark::Event::End(tag) => {
177 if let pulldown_cmark::TagEnd::Link = tag {
178 within_link = false;
179 }
180 events.push((range, MarkdownEvent::End(tag)));
181 }
182 pulldown_cmark::Event::Text(parsed) => {
183 fn event_for(
184 text: &str,
185 range: Range<usize>,
186 str: &str,
187 ) -> (Range<usize>, MarkdownEvent) {
188 if str == &text[range.clone()] {
189 (range, MarkdownEvent::Text)
190 } else {
191 (range, MarkdownEvent::SubstitutedText(str.to_owned()))
192 }
193 }
194 #[derive(Debug)]
195 struct TextRange<'a> {
196 source_range: Range<usize>,
197 merged_range: Range<usize>,
198 parsed: CowStr<'a>,
199 }
200
201 let mut last_len = parsed.len();
202 let mut ranges = vec![TextRange {
203 source_range: range.clone(),
204 merged_range: 0..last_len,
205 parsed,
206 }];
207
208 while matches!(parser.peek(), Some((pulldown_cmark::Event::Text(_), _))) {
209 let Some((pulldown_cmark::Event::Text(next_event), next_range)) = parser.next()
210 else {
211 unreachable!()
212 };
213 let next_len = last_len + next_event.len();
214 ranges.push(TextRange {
215 source_range: next_range.clone(),
216 merged_range: last_len..next_len,
217 parsed: next_event,
218 });
219 last_len = next_len;
220 }
221
222 let mut merged_text =
223 String::with_capacity(ranges.last().unwrap().merged_range.end);
224 for range in &ranges {
225 merged_text.push_str(&range.parsed);
226 }
227
228 let mut ranges = ranges.into_iter().peekable();
229
230 if !within_link {
231 let mut finder = LinkFinder::new();
232 finder.kinds(&[linkify::LinkKind::Url]);
233
234 // Find links in the merged text
235 for link in finder.links(&merged_text) {
236 let link_start_in_merged = link.start();
237 let link_end_in_merged = link.end();
238
239 while ranges
240 .peek()
241 .is_some_and(|range| range.merged_range.end <= link_start_in_merged)
242 {
243 let range = ranges.next().unwrap();
244 events.push(event_for(text, range.source_range, &range.parsed));
245 }
246
247 let Some(range) = ranges.peek_mut() else {
248 continue;
249 };
250 let prefix_len = link_start_in_merged - range.merged_range.start;
251 if prefix_len > 0 {
252 let (head, tail) = range.parsed.split_at(prefix_len);
253 events.push(event_for(
254 text,
255 range.source_range.start..range.source_range.start + prefix_len,
256 head,
257 ));
258 range.parsed = CowStr::Boxed(tail.into());
259 range.merged_range.start += prefix_len;
260 range.source_range.start += prefix_len;
261 }
262
263 let link_start_in_source = range.source_range.start;
264 let mut link_end_in_source = range.source_range.end;
265 let mut link_events = Vec::new();
266
267 while ranges
268 .peek()
269 .is_some_and(|range| range.merged_range.end <= link_end_in_merged)
270 {
271 let range = ranges.next().unwrap();
272 link_end_in_source = range.source_range.end;
273 link_events.push(event_for(text, range.source_range, &range.parsed));
274 }
275
276 if let Some(range) = ranges.peek_mut() {
277 let prefix_len = link_end_in_merged - range.merged_range.start;
278 if prefix_len > 0 {
279 let (head, tail) = range.parsed.split_at(prefix_len);
280 link_events.push(event_for(
281 text,
282 range.source_range.start..range.source_range.start + prefix_len,
283 head,
284 ));
285 range.parsed = CowStr::Boxed(tail.into());
286 range.merged_range.start += prefix_len;
287 range.source_range.start += prefix_len;
288 link_end_in_source = range.source_range.start;
289 }
290 }
291 let link_range = link_start_in_source..link_end_in_source;
292
293 events.push((
294 link_range.clone(),
295 MarkdownEvent::Start(MarkdownTag::Link {
296 link_type: LinkType::Autolink,
297 dest_url: SharedString::from(link.as_str().to_string()),
298 title: SharedString::default(),
299 id: SharedString::default(),
300 }),
301 ));
302 events.extend(link_events);
303 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
304 }
305 }
306
307 for range in ranges {
308 events.push(event_for(text, range.source_range, &range.parsed));
309 }
310 }
311 pulldown_cmark::Event::Code(_) => {
312 let content_range = extract_code_content_range(&text[range.clone()]);
313 let content_range =
314 content_range.start + range.start..content_range.end + range.start;
315 events.push((content_range, MarkdownEvent::Code))
316 }
317 pulldown_cmark::Event::Html(_) => events.push((range, MarkdownEvent::Html)),
318 pulldown_cmark::Event::InlineHtml(_) => events.push((range, MarkdownEvent::InlineHtml)),
319 pulldown_cmark::Event::FootnoteReference(_) => {
320 events.push((range, MarkdownEvent::FootnoteReference))
321 }
322 pulldown_cmark::Event::SoftBreak => events.push((range, MarkdownEvent::SoftBreak)),
323 pulldown_cmark::Event::HardBreak => events.push((range, MarkdownEvent::HardBreak)),
324 pulldown_cmark::Event::Rule => events.push((range, MarkdownEvent::Rule)),
325 pulldown_cmark::Event::TaskListMarker(checked) => {
326 events.push((range, MarkdownEvent::TaskListMarker(checked)))
327 }
328 pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
329 }
330 }
331 (events, language_names, language_paths)
332}
333
334pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
335 let mut events = Vec::new();
336 let mut finder = LinkFinder::new();
337 finder.kinds(&[linkify::LinkKind::Url]);
338 let mut text_range = Range {
339 start: 0,
340 end: text.len(),
341 };
342 for link in finder.links(text) {
343 let link_range = link.start()..link.end();
344
345 if link_range.start > text_range.start {
346 events.push((text_range.start..link_range.start, MarkdownEvent::Text));
347 }
348
349 events.push((
350 link_range.clone(),
351 MarkdownEvent::Start(MarkdownTag::Link {
352 link_type: LinkType::Autolink,
353 dest_url: SharedString::from(link.as_str().to_string()),
354 title: SharedString::default(),
355 id: SharedString::default(),
356 }),
357 ));
358 events.push((link_range.clone(), MarkdownEvent::Text));
359 events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link)));
360
361 text_range.start = link_range.end;
362 }
363
364 if text_range.end > text_range.start {
365 events.push((text_range, MarkdownEvent::Text));
366 }
367
368 events
369}
370
371/// A static-lifetime equivalent of pulldown_cmark::Event so we can cache the
372/// parse result for rendering without resorting to unsafe lifetime coercion.
373#[derive(Clone, Debug, PartialEq)]
374pub enum MarkdownEvent {
375 /// Start of a tagged element. Events that are yielded after this event
376 /// and before its corresponding `End` event are inside this element.
377 /// Start and end events are guaranteed to be balanced.
378 Start(MarkdownTag),
379 /// End of a tagged element.
380 End(MarkdownTagEnd),
381 /// Text that uses the associated range from the markdown source.
382 Text,
383 /// Text that differs from the markdown source - typically due to substitution of HTML entities
384 /// and smart punctuation.
385 SubstitutedText(String),
386 /// An inline code node.
387 Code,
388 /// An HTML node.
389 Html,
390 /// An inline HTML node.
391 InlineHtml,
392 /// A reference to a footnote with given label, which may or may not be defined
393 /// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
394 /// occur in any order.
395 FootnoteReference,
396 /// A soft line break.
397 SoftBreak,
398 /// A hard line break.
399 HardBreak,
400 /// A horizontal ruler.
401 Rule,
402 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
403 TaskListMarker(bool),
404}
405
406/// Tags for elements that can contain other elements.
407#[derive(Clone, Debug, PartialEq)]
408pub enum MarkdownTag {
409 /// A paragraph of text and other inline elements.
410 Paragraph,
411
412 /// A heading, with optional identifier, classes and custom attributes.
413 /// The identifier is prefixed with `#` and the last one in the attributes
414 /// list is chosen, classes are prefixed with `.` and custom attributes
415 /// have no prefix and can optionally have a value (`myattr` o `myattr=myvalue`).
416 Heading {
417 level: HeadingLevel,
418 id: Option<SharedString>,
419 classes: Vec<SharedString>,
420 /// The first item of the tuple is the attr and second one the value.
421 attrs: Vec<(SharedString, Option<SharedString>)>,
422 },
423
424 BlockQuote,
425
426 /// A code block.
427 CodeBlock {
428 kind: CodeBlockKind,
429 metadata: CodeBlockMetadata,
430 },
431
432 /// A HTML block.
433 HtmlBlock,
434
435 /// A list. If the list is ordered the field indicates the number of the first item.
436 /// Contains only list items.
437 List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
438
439 /// A list item.
440 Item,
441
442 /// A footnote definition. The value contained is the footnote's label by which it can
443 /// be referred to.
444 FootnoteDefinition(SharedString),
445
446 /// A table. Contains a vector describing the text-alignment for each of its columns.
447 Table(Vec<Alignment>),
448
449 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
450 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
451 TableHead,
452
453 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
454 TableRow,
455 TableCell,
456
457 // span-level tags
458 Emphasis,
459 Strong,
460 Strikethrough,
461 Superscript,
462 Subscript,
463
464 /// A link.
465 Link {
466 link_type: LinkType,
467 dest_url: SharedString,
468 title: SharedString,
469 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
470 id: SharedString,
471 },
472
473 /// An image. The first field is the link type, the second the destination URL and the third is a title,
474 /// the fourth is the link identifier.
475 Image {
476 link_type: LinkType,
477 dest_url: SharedString,
478 title: SharedString,
479 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
480 id: SharedString,
481 },
482
483 /// A metadata block.
484 MetadataBlock(MetadataBlockKind),
485
486 DefinitionList,
487 DefinitionListTitle,
488 DefinitionListDefinition,
489}
490
491#[derive(Clone, Debug, PartialEq)]
492pub enum CodeBlockKind {
493 Indented,
494 /// "Fenced" means "surrounded by triple backticks."
495 /// There can optionally be either a language after the backticks (like in traditional Markdown)
496 /// or, if an agent is specifying a path for a source location in the project, it can be a PathRange,
497 /// e.g. ```path/to/foo.rs#L123-456 instead of ```rust
498 Fenced,
499 FencedLang(SharedString),
500 FencedSrc(PathWithRange),
501}
502
503#[derive(Default, Clone, Debug, PartialEq)]
504pub struct CodeBlockMetadata {
505 pub content_range: Range<usize>,
506 pub line_count: usize,
507}
508
509fn extract_code_content_range(text: &str) -> Range<usize> {
510 let text_len = text.len();
511 if text_len == 0 {
512 return 0..0;
513 }
514
515 let start_ticks = text.chars().take_while(|&c| c == '`').count();
516
517 if start_ticks == 0 || start_ticks > text_len {
518 return 0..text_len;
519 }
520
521 let end_ticks = text.chars().rev().take_while(|&c| c == '`').count();
522
523 if end_ticks != start_ticks || text_len < start_ticks + end_ticks {
524 return 0..text_len;
525 }
526
527 start_ticks..text_len - end_ticks
528}
529
530pub(crate) fn extract_code_block_content_range(text: &str) -> Range<usize> {
531 let mut range = 0..text.len();
532 if text.starts_with("```") {
533 range.start += 3;
534
535 if let Some(newline_ix) = text[range.clone()].find('\n') {
536 range.start += newline_ix + 1;
537 }
538 }
539
540 if !range.is_empty() && text.ends_with("```") {
541 range.end -= 3;
542 }
543 if range.start > range.end {
544 range.end = range.start;
545 }
546 range
547}
548
549#[cfg(test)]
550mod tests {
551 use super::MarkdownEvent::*;
552 use super::MarkdownTag::*;
553 use super::*;
554
555 const UNWANTED_OPTIONS: Options = Options::ENABLE_YAML_STYLE_METADATA_BLOCKS
556 .union(Options::ENABLE_MATH)
557 .union(Options::ENABLE_DEFINITION_LIST)
558 .union(Options::ENABLE_WIKILINKS);
559
560 #[test]
561 fn all_options_considered() {
562 // The purpose of this is to fail when new options are added to pulldown_cmark, so that they
563 // can be evaluated for inclusion.
564 assert_eq!(PARSE_OPTIONS.union(UNWANTED_OPTIONS), Options::all());
565 }
566
567 #[test]
568 fn wanted_and_unwanted_options_disjoint() {
569 assert_eq!(
570 PARSE_OPTIONS.intersection(UNWANTED_OPTIONS),
571 Options::empty()
572 );
573 }
574
575 #[test]
576 fn test_html_comments() {
577 assert_eq!(
578 parse_markdown(" <!--\nrdoc-file=string.c\n-->\nReturns"),
579 (
580 vec![
581 (2..30, Start(HtmlBlock)),
582 (2..2, SubstitutedText(" ".into())),
583 (2..7, Html),
584 (7..26, Html),
585 (26..30, Html),
586 (2..30, End(MarkdownTagEnd::HtmlBlock)),
587 (30..37, Start(Paragraph)),
588 (30..37, Text),
589 (30..37, End(MarkdownTagEnd::Paragraph))
590 ],
591 HashSet::default(),
592 HashSet::default()
593 )
594 )
595 }
596
597 #[test]
598 fn test_plain_urls_and_escaped_text() {
599 assert_eq!(
600 parse_markdown(" https://some.url some \\`►\\` text"),
601 (
602 vec![
603 (0..51, Start(Paragraph)),
604 (0..6, SubstitutedText("\u{a0}".into())),
605 (6..12, SubstitutedText("\u{a0}".into())),
606 (12..13, Text),
607 (
608 13..29,
609 Start(Link {
610 link_type: LinkType::Autolink,
611 dest_url: "https://some.url".into(),
612 title: "".into(),
613 id: "".into(),
614 })
615 ),
616 (13..29, Text),
617 (13..29, End(MarkdownTagEnd::Link)),
618 (29..35, Text),
619 (36..37, Text), // Escaped backtick
620 (37..44, SubstitutedText("►".into())),
621 (45..46, Text), // Escaped backtick
622 (46..51, Text),
623 (0..51, End(MarkdownTagEnd::Paragraph))
624 ],
625 HashSet::default(),
626 HashSet::default()
627 )
628 );
629 }
630
631 #[test]
632 fn test_incomplete_link() {
633 assert_eq!(
634 parse_markdown("You can use the [GitHub Search API](https://docs.github.com/en").0,
635 vec![
636 (0..62, Start(Paragraph)),
637 (0..16, Text),
638 (16..17, Text),
639 (17..34, Text),
640 (34..35, Text),
641 (35..36, Text),
642 (
643 36..62,
644 Start(Link {
645 link_type: LinkType::Autolink,
646 dest_url: "https://docs.github.com/en".into(),
647 title: "".into(),
648 id: "".into()
649 })
650 ),
651 (36..62, Text),
652 (36..62, End(MarkdownTagEnd::Link)),
653 (0..62, End(MarkdownTagEnd::Paragraph))
654 ],
655 );
656 }
657
658 #[test]
659 fn test_smart_punctuation() {
660 assert_eq!(
661 parse_markdown("-- --- ... \"double quoted\" 'single quoted' ----------"),
662 (
663 vec![
664 (0..53, Start(Paragraph)),
665 (0..2, SubstitutedText("–".into())),
666 (2..3, Text),
667 (3..6, SubstitutedText("—".into())),
668 (6..7, Text),
669 (7..10, SubstitutedText("…".into())),
670 (10..11, Text),
671 (11..12, SubstitutedText("“".into())),
672 (12..25, Text),
673 (25..26, SubstitutedText("”".into())),
674 (26..27, Text),
675 (27..28, SubstitutedText("‘".into())),
676 (28..41, Text),
677 (41..42, SubstitutedText("’".into())),
678 (42..43, Text),
679 (43..53, SubstitutedText("–––––".into())),
680 (0..53, End(MarkdownTagEnd::Paragraph))
681 ],
682 HashSet::default(),
683 HashSet::default()
684 )
685 )
686 }
687
688 #[test]
689 fn test_code_block_metadata() {
690 assert_eq!(
691 parse_markdown("```rust\nfn main() {\n let a = 1;\n}\n```"),
692 (
693 vec![
694 (
695 0..37,
696 Start(CodeBlock {
697 kind: CodeBlockKind::FencedLang("rust".into()),
698 metadata: CodeBlockMetadata {
699 content_range: 8..34,
700 line_count: 3
701 }
702 })
703 ),
704 (8..34, Text),
705 (0..37, End(MarkdownTagEnd::CodeBlock)),
706 ],
707 {
708 let mut h = HashSet::default();
709 h.insert("rust".into());
710 h
711 },
712 HashSet::default()
713 )
714 );
715 assert_eq!(
716 parse_markdown(" fn main() {}"),
717 (
718 vec![
719 (
720 4..16,
721 Start(CodeBlock {
722 kind: CodeBlockKind::Indented,
723 metadata: CodeBlockMetadata {
724 content_range: 4..16,
725 line_count: 1
726 }
727 })
728 ),
729 (4..16, Text),
730 (4..16, End(MarkdownTagEnd::CodeBlock))
731 ],
732 HashSet::default(),
733 HashSet::default()
734 )
735 );
736 }
737
738 #[test]
739 fn test_extract_code_content_range() {
740 let input = "```let x = 5;```";
741 assert_eq!(extract_code_content_range(input), 3..13);
742
743 let input = "``let x = 5;``";
744 assert_eq!(extract_code_content_range(input), 2..12);
745
746 let input = "`let x = 5;`";
747 assert_eq!(extract_code_content_range(input), 1..11);
748
749 let input = "plain text";
750 assert_eq!(extract_code_content_range(input), 0..10);
751
752 let input = "``let x = 5;`";
753 assert_eq!(extract_code_content_range(input), 0..13);
754 }
755
756 #[test]
757 fn test_extract_code_block_content_range() {
758 let input = "```rust\nlet x = 5;\n```";
759 assert_eq!(extract_code_block_content_range(input), 8..19);
760
761 let input = "plain text";
762 assert_eq!(extract_code_block_content_range(input), 0..10);
763
764 let input = "```python\nprint('hello')\nprint('world')\n```";
765 assert_eq!(extract_code_block_content_range(input), 10..40);
766
767 // Malformed input
768 let input = "`````";
769 assert_eq!(extract_code_block_content_range(input), 3..3);
770 }
771
772 #[test]
773 fn test_links_split_across_fragments() {
774 // This test verifies that links split across multiple text fragments due to escaping or other issues
775 // are correctly detected and processed
776 // Note: In real usage, pulldown_cmark creates separate text events for the escaped character
777 // We're verifying our parser can handle this correctly
778 assert_eq!(
779 parse_markdown("https:/\\/example.com is equivalent to https://example.com!").0,
780 vec![
781 (0..62, Start(Paragraph)),
782 (
783 0..20,
784 Start(Link {
785 link_type: LinkType::Autolink,
786 dest_url: "https://example.com".into(),
787 title: "".into(),
788 id: "".into()
789 })
790 ),
791 (0..7, Text),
792 (8..20, Text),
793 (0..20, End(MarkdownTagEnd::Link)),
794 (20..38, Text),
795 (
796 38..61,
797 Start(Link {
798 link_type: LinkType::Autolink,
799 dest_url: "https://example.com".into(),
800 title: "".into(),
801 id: "".into()
802 })
803 ),
804 (38..53, Text),
805 (53..58, SubstitutedText(".".into())),
806 (58..61, Text),
807 (38..61, End(MarkdownTagEnd::Link)),
808 (61..62, Text),
809 (0..62, End(MarkdownTagEnd::Paragraph))
810 ],
811 );
812
813 assert_eq!(
814 parse_markdown("Visit https://example.com/cat\\/é‍☕ for coffee!").0,
815 [
816 (0..55, Start(Paragraph)),
817 (0..6, Text),
818 (
819 6..43,
820 Start(Link {
821 link_type: LinkType::Autolink,
822 dest_url: "https://example.com/cat/é\u{200d}☕".into(),
823 title: "".into(),
824 id: "".into()
825 })
826 ),
827 (6..29, Text),
828 (30..33, Text),
829 (33..40, SubstitutedText("\u{200d}".into())),
830 (40..43, Text),
831 (6..43, End(MarkdownTagEnd::Link)),
832 (43..55, Text),
833 (0..55, End(MarkdownTagEnd::Paragraph))
834 ]
835 );
836 }
837}