1use crate::markdown_elements::*;
2use async_recursion::async_recursion;
3use collections::FxHashMap;
4use gpui::FontWeight;
5use language::LanguageRegistry;
6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
7use std::{ops::Range, path::PathBuf, sync::Arc};
8
9pub async fn parse_markdown(
10 markdown_input: &str,
11 file_location_directory: Option<PathBuf>,
12 language_registry: Option<Arc<LanguageRegistry>>,
13) -> ParsedMarkdown {
14 let mut options = Options::all();
15 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
16
17 let parser = Parser::new_ext(markdown_input, options);
18 let parser = MarkdownParser::new(
19 parser.into_offset_iter().collect(),
20 file_location_directory,
21 language_registry,
22 );
23 let renderer = parser.parse_document().await;
24 ParsedMarkdown {
25 children: renderer.parsed,
26 }
27}
28
29struct MarkdownParser<'a> {
30 tokens: Vec<(Event<'a>, Range<usize>)>,
31 /// The current index in the tokens array
32 cursor: usize,
33 /// The blocks that we have successfully parsed so far
34 parsed: Vec<ParsedMarkdownElement>,
35 file_location_directory: Option<PathBuf>,
36 language_registry: Option<Arc<LanguageRegistry>>,
37}
38
39impl<'a> MarkdownParser<'a> {
40 fn new(
41 tokens: Vec<(Event<'a>, Range<usize>)>,
42 file_location_directory: Option<PathBuf>,
43 language_registry: Option<Arc<LanguageRegistry>>,
44 ) -> Self {
45 Self {
46 tokens,
47 file_location_directory,
48 language_registry,
49 cursor: 0,
50 parsed: vec![],
51 }
52 }
53
54 fn eof(&self) -> bool {
55 if self.tokens.is_empty() {
56 return true;
57 }
58 self.cursor >= self.tokens.len() - 1
59 }
60
61 fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
62 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
63 return self.tokens.last();
64 }
65 return self.tokens.get(self.cursor + steps);
66 }
67
68 fn previous(&self) -> Option<&(Event, Range<usize>)> {
69 if self.cursor == 0 || self.cursor > self.tokens.len() {
70 return None;
71 }
72 return self.tokens.get(self.cursor - 1);
73 }
74
75 fn current(&self) -> Option<&(Event, Range<usize>)> {
76 return self.peek(0);
77 }
78
79 fn current_event(&self) -> Option<&Event> {
80 return self.current().map(|(event, _)| event);
81 }
82
83 fn is_text_like(event: &Event) -> bool {
84 match event {
85 Event::Text(_)
86 // Represent an inline code block
87 | Event::Code(_)
88 | Event::Html(_)
89 | Event::FootnoteReference(_)
90 | Event::Start(Tag::Link { link_type: _, dest_url: _, title: _, id: _ })
91 | Event::Start(Tag::Emphasis)
92 | Event::Start(Tag::Strong)
93 | Event::Start(Tag::Strikethrough)
94 | Event::Start(Tag::Image { link_type: _, dest_url: _, title: _, id: _ }) => {
95 true
96 }
97 _ => false,
98 }
99 }
100
101 async fn parse_document(mut self) -> Self {
102 while !self.eof() {
103 if let Some(block) = self.parse_block().await {
104 self.parsed.extend(block);
105 } else {
106 self.cursor += 1;
107 }
108 }
109 self
110 }
111
112 #[async_recursion]
113 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
114 let (current, source_range) = self.current().unwrap();
115 let source_range = source_range.clone();
116 match current {
117 Event::Start(tag) => match tag {
118 Tag::Paragraph => {
119 self.cursor += 1;
120 let text = self.parse_text(false, Some(source_range));
121 Some(vec![ParsedMarkdownElement::Paragraph(text)])
122 }
123 Tag::Heading {
124 level,
125 id: _,
126 classes: _,
127 attrs: _,
128 } => {
129 let level = *level;
130 self.cursor += 1;
131 let heading = self.parse_heading(level);
132 Some(vec![ParsedMarkdownElement::Heading(heading)])
133 }
134 Tag::Table(alignment) => {
135 let alignment = alignment.clone();
136 self.cursor += 1;
137 let table = self.parse_table(alignment);
138 Some(vec![ParsedMarkdownElement::Table(table)])
139 }
140 Tag::List(order) => {
141 let order = *order;
142 self.cursor += 1;
143 let list = self.parse_list(order).await;
144 Some(list)
145 }
146 Tag::BlockQuote(_kind) => {
147 self.cursor += 1;
148 let block_quote = self.parse_block_quote().await;
149 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
150 }
151 Tag::CodeBlock(kind) => {
152 let language = match kind {
153 pulldown_cmark::CodeBlockKind::Indented => None,
154 pulldown_cmark::CodeBlockKind::Fenced(language) => {
155 if language.is_empty() {
156 None
157 } else {
158 Some(language.to_string())
159 }
160 }
161 };
162
163 self.cursor += 1;
164
165 let code_block = self.parse_code_block(language).await;
166 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
167 }
168 _ => None,
169 },
170 Event::Rule => {
171 let source_range = source_range.clone();
172 self.cursor += 1;
173 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
174 }
175 _ => None,
176 }
177 }
178
179 fn parse_text(
180 &mut self,
181 should_complete_on_soft_break: bool,
182 source_range: Option<Range<usize>>,
183 ) -> ParsedMarkdownText {
184 let source_range = source_range.unwrap_or_else(|| {
185 self.current()
186 .map(|(_, range)| range.clone())
187 .unwrap_or_default()
188 });
189
190 let mut text = String::new();
191 let mut bold_depth = 0;
192 let mut italic_depth = 0;
193 let mut strikethrough_depth = 0;
194 let mut link: Option<Link> = None;
195 let mut region_ranges: Vec<Range<usize>> = vec![];
196 let mut regions: Vec<ParsedRegion> = vec![];
197 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
198
199 let mut link_urls: Vec<String> = vec![];
200 let mut link_ranges: Vec<Range<usize>> = vec![];
201
202 loop {
203 if self.eof() {
204 break;
205 }
206
207 let (current, _source_range) = self.current().unwrap();
208 let prev_len = text.len();
209 match current {
210 Event::SoftBreak => {
211 if should_complete_on_soft_break {
212 break;
213 }
214
215 // `Some text\nSome more text` should be treated as a single line.
216 text.push(' ');
217 }
218
219 Event::HardBreak => {
220 text.push('\n');
221 }
222
223 Event::Text(t) => {
224 text.push_str(t.as_ref());
225
226 let mut style = MarkdownHighlightStyle::default();
227
228 if bold_depth > 0 {
229 style.weight = FontWeight::BOLD;
230 }
231
232 if italic_depth > 0 {
233 style.italic = true;
234 }
235
236 if strikethrough_depth > 0 {
237 style.strikethrough = true;
238 }
239
240 let last_run_len = if let Some(link) = link.clone() {
241 region_ranges.push(prev_len..text.len());
242 regions.push(ParsedRegion {
243 code: false,
244 link: Some(link),
245 });
246 style.underline = true;
247 prev_len
248 } else {
249 // Manually scan for links
250 let mut finder = linkify::LinkFinder::new();
251 finder.kinds(&[linkify::LinkKind::Url]);
252 let mut last_link_len = prev_len;
253 for link in finder.links(t) {
254 let start = link.start();
255 let end = link.end();
256 let range = (prev_len + start)..(prev_len + end);
257 link_ranges.push(range.clone());
258 link_urls.push(link.as_str().to_string());
259
260 // If there is a style before we match a link, we have to add this to the highlighted ranges
261 if style != MarkdownHighlightStyle::default()
262 && last_link_len < link.start()
263 {
264 highlights.push((
265 last_link_len..link.start(),
266 MarkdownHighlight::Style(style.clone()),
267 ));
268 }
269
270 highlights.push((
271 range.clone(),
272 MarkdownHighlight::Style(MarkdownHighlightStyle {
273 underline: true,
274 ..style
275 }),
276 ));
277 region_ranges.push(range.clone());
278 regions.push(ParsedRegion {
279 code: false,
280 link: Some(Link::Web {
281 url: link.as_str().to_string(),
282 }),
283 });
284
285 last_link_len = end;
286 }
287 last_link_len
288 };
289
290 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
291 let mut new_highlight = true;
292 if let Some((last_range, last_style)) = highlights.last_mut() {
293 if last_range.end == last_run_len
294 && last_style == &MarkdownHighlight::Style(style.clone())
295 {
296 last_range.end = text.len();
297 new_highlight = false;
298 }
299 }
300 if new_highlight {
301 highlights
302 .push((last_run_len..text.len(), MarkdownHighlight::Style(style)));
303 }
304 }
305 }
306
307 // Note: This event means "inline code" and not "code block"
308 Event::Code(t) => {
309 text.push_str(t.as_ref());
310 region_ranges.push(prev_len..text.len());
311
312 if link.is_some() {
313 highlights.push((
314 prev_len..text.len(),
315 MarkdownHighlight::Style(MarkdownHighlightStyle {
316 underline: true,
317 ..Default::default()
318 }),
319 ));
320 }
321
322 regions.push(ParsedRegion {
323 code: true,
324 link: link.clone(),
325 });
326 }
327
328 Event::Start(tag) => match tag {
329 Tag::Emphasis => italic_depth += 1,
330 Tag::Strong => bold_depth += 1,
331 Tag::Strikethrough => strikethrough_depth += 1,
332 Tag::Link {
333 link_type: _,
334 dest_url,
335 title: _,
336 id: _,
337 } => {
338 link = Link::identify(
339 self.file_location_directory.clone(),
340 dest_url.to_string(),
341 );
342 }
343 _ => {
344 break;
345 }
346 },
347
348 Event::End(tag) => match tag {
349 TagEnd::Emphasis => {
350 italic_depth -= 1;
351 }
352 TagEnd::Strong => {
353 bold_depth -= 1;
354 }
355 TagEnd::Strikethrough => {
356 strikethrough_depth -= 1;
357 }
358 TagEnd::Link => {
359 link = None;
360 }
361 TagEnd::Paragraph => {
362 self.cursor += 1;
363 break;
364 }
365 _ => {
366 break;
367 }
368 },
369
370 _ => {
371 break;
372 }
373 }
374
375 self.cursor += 1;
376 }
377
378 ParsedMarkdownText {
379 source_range,
380 contents: text,
381 highlights,
382 regions,
383 region_ranges,
384 }
385 }
386
387 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
388 let (_event, source_range) = self.previous().unwrap();
389 let source_range = source_range.clone();
390 let text = self.parse_text(true, None);
391
392 // Advance past the heading end tag
393 self.cursor += 1;
394
395 ParsedMarkdownHeading {
396 source_range: source_range.clone(),
397 level: match level {
398 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
399 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
400 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
401 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
402 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
403 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
404 },
405 contents: text,
406 }
407 }
408
409 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
410 let (_event, source_range) = self.previous().unwrap();
411 let source_range = source_range.clone();
412 let mut header = ParsedMarkdownTableRow::new();
413 let mut body = vec![];
414 let mut current_row = vec![];
415 let mut in_header = true;
416 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
417
418 loop {
419 if self.eof() {
420 break;
421 }
422
423 let (current, source_range) = self.current().unwrap();
424 let source_range = source_range.clone();
425 match current {
426 Event::Start(Tag::TableHead)
427 | Event::Start(Tag::TableRow)
428 | Event::End(TagEnd::TableCell) => {
429 self.cursor += 1;
430 }
431 Event::Start(Tag::TableCell) => {
432 self.cursor += 1;
433 let cell_contents = self.parse_text(false, Some(source_range));
434 current_row.push(cell_contents);
435 }
436 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
437 self.cursor += 1;
438 let new_row = std::mem::take(&mut current_row);
439 if in_header {
440 header.children = new_row;
441 in_header = false;
442 } else {
443 let row = ParsedMarkdownTableRow::with_children(new_row);
444 body.push(row);
445 }
446 }
447 Event::End(TagEnd::Table) => {
448 self.cursor += 1;
449 break;
450 }
451 _ => {
452 break;
453 }
454 }
455 }
456
457 ParsedMarkdownTable {
458 source_range,
459 header,
460 body,
461 column_alignments,
462 }
463 }
464
465 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
466 match alignment {
467 Alignment::None => ParsedMarkdownTableAlignment::None,
468 Alignment::Left => ParsedMarkdownTableAlignment::Left,
469 Alignment::Center => ParsedMarkdownTableAlignment::Center,
470 Alignment::Right => ParsedMarkdownTableAlignment::Right,
471 }
472 }
473
474 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
475 let (_, list_source_range) = self.previous().unwrap();
476
477 let mut items = Vec::new();
478 let mut items_stack = vec![Vec::new()];
479 let mut depth = 1;
480 let mut task_item = None;
481 let mut order = order;
482 let mut order_stack = Vec::new();
483
484 let mut insertion_indices = FxHashMap::default();
485 let mut source_ranges = FxHashMap::default();
486 let mut start_item_range = list_source_range.clone();
487
488 while !self.eof() {
489 let (current, source_range) = self.current().unwrap();
490 match current {
491 Event::Start(Tag::List(new_order)) => {
492 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
493 insertion_indices.insert(depth, items.len());
494 }
495
496 // We will use the start of the nested list as the end for the current item's range,
497 // because we don't care about the hierarchy of list items
498 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
499 e.insert(start_item_range.start..source_range.start);
500 }
501
502 order_stack.push(order);
503 order = *new_order;
504 self.cursor += 1;
505 depth += 1;
506 }
507 Event::End(TagEnd::List(_)) => {
508 order = order_stack.pop().flatten();
509 self.cursor += 1;
510 depth -= 1;
511
512 if depth == 0 {
513 break;
514 }
515 }
516 Event::Start(Tag::Item) => {
517 start_item_range = source_range.clone();
518
519 self.cursor += 1;
520 items_stack.push(Vec::new());
521
522 // Check for task list marker (`- [ ]` or `- [x]`)
523 if let Some(event) = self.current_event() {
524 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
525 if event == &Event::Start(Tag::Paragraph) {
526 self.cursor += 1;
527 }
528
529 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
530 task_item = Some((*checked, range.clone()));
531 self.cursor += 1;
532 }
533 }
534
535 if let Some((event, range)) = self.current() {
536 // This is a plain list item.
537 // For example `- some text` or `1. [Docs](./docs.md)`
538 if MarkdownParser::is_text_like(event) {
539 let text = self.parse_text(false, Some(range.clone()));
540 let block = ParsedMarkdownElement::Paragraph(text);
541 if let Some(content) = items_stack.last_mut() {
542 content.push(block);
543 }
544 } else {
545 let block = self.parse_block().await;
546 if let Some(block) = block {
547 if let Some(content) = items_stack.last_mut() {
548 content.extend(block);
549 }
550 }
551 }
552 }
553
554 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
555 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
556 self.cursor += 1;
557 }
558 }
559 Event::End(TagEnd::Item) => {
560 self.cursor += 1;
561
562 let item_type = if let Some((checked, range)) = task_item {
563 ParsedMarkdownListItemType::Task(checked, range)
564 } else if let Some(order) = order {
565 ParsedMarkdownListItemType::Ordered(order)
566 } else {
567 ParsedMarkdownListItemType::Unordered
568 };
569
570 if let Some(current) = order {
571 order = Some(current + 1);
572 }
573
574 if let Some(content) = items_stack.pop() {
575 let source_range = source_ranges
576 .remove(&depth)
577 .unwrap_or(start_item_range.clone());
578
579 // We need to remove the last character of the source range, because it includes the newline character
580 let source_range = source_range.start..source_range.end - 1;
581 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
582 source_range,
583 content,
584 depth,
585 item_type,
586 });
587
588 if let Some(index) = insertion_indices.get(&depth) {
589 items.insert(*index, item);
590 insertion_indices.remove(&depth);
591 } else {
592 items.push(item);
593 }
594 }
595
596 task_item = None;
597 }
598 _ => {
599 if depth == 0 {
600 break;
601 }
602 // This can only happen if a list item starts with more then one paragraph,
603 // or the list item contains blocks that should be rendered after the nested list items
604 let block = self.parse_block().await;
605 if let Some(block) = block {
606 if let Some(items_stack) = items_stack.last_mut() {
607 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
608 if !insertion_indices.contains_key(&depth) {
609 items_stack.extend(block);
610 continue;
611 }
612 }
613
614 // Otherwise we need to insert the block after all the nested items
615 // that have been parsed so far
616 items.extend(block);
617 }
618 }
619 }
620 }
621
622 items
623 }
624
625 #[async_recursion]
626 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
627 let (_event, source_range) = self.previous().unwrap();
628 let source_range = source_range.clone();
629 let mut nested_depth = 1;
630
631 let mut children: Vec<ParsedMarkdownElement> = vec![];
632
633 while !self.eof() {
634 let block = self.parse_block().await;
635
636 if let Some(block) = block {
637 children.extend(block);
638 } else {
639 break;
640 }
641
642 if self.eof() {
643 break;
644 }
645
646 let (current, _source_range) = self.current().unwrap();
647 match current {
648 // This is a nested block quote.
649 // Record that we're in a nested block quote and continue parsing.
650 // We don't need to advance the cursor since the next
651 // call to `parse_block` will handle it.
652 Event::Start(Tag::BlockQuote(_kind)) => {
653 nested_depth += 1;
654 }
655 Event::End(TagEnd::BlockQuote(_kind)) => {
656 nested_depth -= 1;
657 if nested_depth == 0 {
658 self.cursor += 1;
659 break;
660 }
661 }
662 _ => {}
663 };
664 }
665
666 ParsedMarkdownBlockQuote {
667 source_range,
668 children,
669 }
670 }
671
672 async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
673 let (_event, source_range) = self.previous().unwrap();
674 let source_range = source_range.clone();
675 let mut code = String::new();
676
677 while !self.eof() {
678 let (current, _source_range) = self.current().unwrap();
679 match current {
680 Event::Text(text) => {
681 code.push_str(text);
682 self.cursor += 1;
683 }
684 Event::End(TagEnd::CodeBlock) => {
685 self.cursor += 1;
686 break;
687 }
688 _ => {
689 break;
690 }
691 }
692 }
693
694 let highlights = if let Some(language) = &language {
695 if let Some(registry) = &self.language_registry {
696 let rope: language::Rope = code.as_str().into();
697 registry
698 .language_for_name_or_extension(language)
699 .await
700 .map(|l| l.highlight_text(&rope, 0..code.len()))
701 .ok()
702 } else {
703 None
704 }
705 } else {
706 None
707 };
708
709 ParsedMarkdownCodeBlock {
710 source_range,
711 contents: code.trim().to_string().into(),
712 language,
713 highlights,
714 }
715 }
716}
717
718#[cfg(test)]
719mod tests {
720 use super::*;
721
722 use gpui::BackgroundExecutor;
723 use language::{tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher};
724 use pretty_assertions::assert_eq;
725
726 use ParsedMarkdownListItemType::*;
727
728 async fn parse(input: &str) -> ParsedMarkdown {
729 parse_markdown(input, None, None).await
730 }
731
732 #[gpui::test]
733 async fn test_headings() {
734 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
735
736 assert_eq!(
737 parsed.children,
738 vec![
739 h1(text("Heading one", 2..13), 0..14),
740 h2(text("Heading two", 17..28), 14..29),
741 h3(text("Heading three", 33..46), 29..46),
742 ]
743 );
744 }
745
746 #[gpui::test]
747 async fn test_newlines_dont_new_paragraphs() {
748 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
749
750 assert_eq!(
751 parsed.children,
752 vec![p("Some text that is bolded and italicized", 0..46)]
753 );
754 }
755
756 #[gpui::test]
757 async fn test_heading_with_paragraph() {
758 let parsed = parse("# Zed\nThe editor").await;
759
760 assert_eq!(
761 parsed.children,
762 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
763 );
764 }
765
766 #[gpui::test]
767 async fn test_double_newlines_do_new_paragraphs() {
768 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
769
770 assert_eq!(
771 parsed.children,
772 vec![
773 p("Some text that is bolded", 0..29),
774 p("and italicized", 31..47),
775 ]
776 );
777 }
778
779 #[gpui::test]
780 async fn test_bold_italic_text() {
781 let parsed = parse("Some text **that is bolded** and *italicized*").await;
782
783 assert_eq!(
784 parsed.children,
785 vec![p("Some text that is bolded and italicized", 0..45)]
786 );
787 }
788
789 #[gpui::test]
790 async fn test_nested_bold_strikethrough_text() {
791 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
792
793 assert_eq!(parsed.children.len(), 1);
794 assert_eq!(
795 parsed.children[0],
796 ParsedMarkdownElement::Paragraph(ParsedMarkdownText {
797 source_range: 0..35,
798 contents: "Some bostrikethroughld text".to_string(),
799 highlights: Vec::new(),
800 region_ranges: Vec::new(),
801 regions: Vec::new(),
802 })
803 );
804
805 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
806 text
807 } else {
808 panic!("Expected a paragraph");
809 };
810 assert_eq!(
811 paragraph.highlights,
812 vec![
813 (
814 5..7,
815 MarkdownHighlight::Style(MarkdownHighlightStyle {
816 weight: FontWeight::BOLD,
817 ..Default::default()
818 }),
819 ),
820 (
821 7..20,
822 MarkdownHighlight::Style(MarkdownHighlightStyle {
823 weight: FontWeight::BOLD,
824 strikethrough: true,
825 ..Default::default()
826 }),
827 ),
828 (
829 20..22,
830 MarkdownHighlight::Style(MarkdownHighlightStyle {
831 weight: FontWeight::BOLD,
832 ..Default::default()
833 }),
834 ),
835 ]
836 );
837 }
838
839 #[gpui::test]
840 async fn test_raw_links_detection() {
841 let parsed = parse("Checkout this https://zed.dev link").await;
842
843 assert_eq!(
844 parsed.children,
845 vec![p("Checkout this https://zed.dev link", 0..34)]
846 );
847
848 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
849 text
850 } else {
851 panic!("Expected a paragraph");
852 };
853 assert_eq!(
854 paragraph.highlights,
855 vec![(
856 14..29,
857 MarkdownHighlight::Style(MarkdownHighlightStyle {
858 underline: true,
859 ..Default::default()
860 }),
861 )]
862 );
863 assert_eq!(
864 paragraph.regions,
865 vec![ParsedRegion {
866 code: false,
867 link: Some(Link::Web {
868 url: "https://zed.dev".to_string()
869 }),
870 }]
871 );
872 assert_eq!(paragraph.region_ranges, vec![14..29]);
873 }
874
875 #[gpui::test]
876 async fn test_header_only_table() {
877 let markdown = "\
878| Header 1 | Header 2 |
879|----------|----------|
880
881Some other content
882";
883
884 let expected_table = table(
885 0..48,
886 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
887 vec![],
888 );
889
890 assert_eq!(
891 parse(markdown).await.children[0],
892 ParsedMarkdownElement::Table(expected_table)
893 );
894 }
895
896 #[gpui::test]
897 async fn test_basic_table() {
898 let markdown = "\
899| Header 1 | Header 2 |
900|----------|----------|
901| Cell 1 | Cell 2 |
902| Cell 3 | Cell 4 |";
903
904 let expected_table = table(
905 0..95,
906 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
907 vec![
908 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
909 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
910 ],
911 );
912
913 assert_eq!(
914 parse(markdown).await.children[0],
915 ParsedMarkdownElement::Table(expected_table)
916 );
917 }
918
919 #[gpui::test]
920 async fn test_list_basic() {
921 let parsed = parse(
922 "\
923* Item 1
924* Item 2
925* Item 3
926",
927 )
928 .await;
929
930 assert_eq!(
931 parsed.children,
932 vec![
933 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
934 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
935 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
936 ],
937 );
938 }
939
940 #[gpui::test]
941 async fn test_list_with_tasks() {
942 let parsed = parse(
943 "\
944- [ ] TODO
945- [x] Checked
946",
947 )
948 .await;
949
950 assert_eq!(
951 parsed.children,
952 vec![
953 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
954 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
955 ],
956 );
957 }
958
959 #[gpui::test]
960 async fn test_list_with_linebreak_is_handled_correctly() {
961 let parsed = parse(
962 "\
963- [ ] Task 1
964
965- [x] Task 2
966",
967 )
968 .await;
969
970 assert_eq!(
971 parsed.children,
972 vec![
973 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
974 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
975 ],
976 );
977 }
978
979 #[gpui::test]
980 async fn test_list_nested() {
981 let parsed = parse(
982 "\
983* Item 1
984* Item 2
985* Item 3
986
9871. Hello
9881. Two
989 1. Three
9902. Four
9913. Five
992
993* First
994 1. Hello
995 1. Goodbyte
996 - Inner
997 - Inner
998 2. Goodbyte
999 - Next item empty
1000 -
1001* Last
1002",
1003 )
1004 .await;
1005
1006 assert_eq!(
1007 parsed.children,
1008 vec![
1009 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1010 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1011 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1012 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1013 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1014 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1015 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1016 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1017 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1018 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1019 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1020 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1021 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1022 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1023 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1024 list_item(186..190, 3, Unordered, vec![]),
1025 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1026 ]
1027 );
1028 }
1029
1030 #[gpui::test]
1031 async fn test_list_with_nested_content() {
1032 let parsed = parse(
1033 "\
1034* This is a list item with two paragraphs.
1035
1036 This is the second paragraph in the list item.
1037",
1038 )
1039 .await;
1040
1041 assert_eq!(
1042 parsed.children,
1043 vec![list_item(
1044 0..96,
1045 1,
1046 Unordered,
1047 vec![
1048 p("This is a list item with two paragraphs.", 4..44),
1049 p("This is the second paragraph in the list item.", 50..97)
1050 ],
1051 ),],
1052 );
1053 }
1054
1055 #[gpui::test]
1056 async fn test_nested_list_with_paragraph_inside() {
1057 let parsed = parse(
1058 "\
10591. a
1060 1. b
1061 1. c
1062
1063 text
1064
1065 1. d
1066",
1067 )
1068 .await;
1069
1070 assert_eq!(
1071 parsed.children,
1072 vec![
1073 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1074 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1075 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1076 p("text", 32..37),
1077 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1078 ],
1079 );
1080 }
1081
1082 #[gpui::test]
1083 async fn test_list_with_leading_text() {
1084 let parsed = parse(
1085 "\
1086* `code`
1087* **bold**
1088* [link](https://example.com)
1089",
1090 )
1091 .await;
1092
1093 assert_eq!(
1094 parsed.children,
1095 vec![
1096 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1097 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1098 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],)
1099 ],
1100 );
1101 }
1102
1103 #[gpui::test]
1104 async fn test_simple_block_quote() {
1105 let parsed = parse("> Simple block quote with **styled text**").await;
1106
1107 assert_eq!(
1108 parsed.children,
1109 vec![block_quote(
1110 vec![p("Simple block quote with styled text", 2..41)],
1111 0..41
1112 )]
1113 );
1114 }
1115
1116 #[gpui::test]
1117 async fn test_simple_block_quote_with_multiple_lines() {
1118 let parsed = parse(
1119 "\
1120> # Heading
1121> More
1122> text
1123>
1124> More text
1125",
1126 )
1127 .await;
1128
1129 assert_eq!(
1130 parsed.children,
1131 vec![block_quote(
1132 vec![
1133 h1(text("Heading", 4..11), 2..12),
1134 p("More text", 14..26),
1135 p("More text", 30..40)
1136 ],
1137 0..40
1138 )]
1139 );
1140 }
1141
1142 #[gpui::test]
1143 async fn test_nested_block_quote() {
1144 let parsed = parse(
1145 "\
1146> A
1147>
1148> > # B
1149>
1150> C
1151
1152More text
1153",
1154 )
1155 .await;
1156
1157 assert_eq!(
1158 parsed.children,
1159 vec![
1160 block_quote(
1161 vec![
1162 p("A", 2..4),
1163 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1164 p("C", 18..20)
1165 ],
1166 0..20
1167 ),
1168 p("More text", 21..31)
1169 ]
1170 );
1171 }
1172
1173 #[gpui::test]
1174 async fn test_code_block() {
1175 let parsed = parse(
1176 "\
1177```
1178fn main() {
1179 return 0;
1180}
1181```
1182",
1183 )
1184 .await;
1185
1186 assert_eq!(
1187 parsed.children,
1188 vec![code_block(
1189 None,
1190 "fn main() {\n return 0;\n}",
1191 0..35,
1192 None
1193 )]
1194 );
1195 }
1196
1197 #[gpui::test]
1198 async fn test_code_block_with_language(executor: BackgroundExecutor) {
1199 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1200 language_registry.add(rust_lang());
1201
1202 let parsed = parse_markdown(
1203 "\
1204```rust
1205fn main() {
1206 return 0;
1207}
1208```
1209",
1210 None,
1211 Some(language_registry),
1212 )
1213 .await;
1214
1215 assert_eq!(
1216 parsed.children,
1217 vec![code_block(
1218 Some("rust".to_string()),
1219 "fn main() {\n return 0;\n}",
1220 0..39,
1221 Some(vec![])
1222 )]
1223 );
1224 }
1225
1226 fn rust_lang() -> Arc<Language> {
1227 Arc::new(Language::new(
1228 LanguageConfig {
1229 name: "Rust".into(),
1230 matcher: LanguageMatcher {
1231 path_suffixes: vec!["rs".into()],
1232 ..Default::default()
1233 },
1234 collapsed_placeholder: " /* ... */ ".to_string(),
1235 ..Default::default()
1236 },
1237 Some(tree_sitter_rust::LANGUAGE.into()),
1238 ))
1239 }
1240
1241 fn h1(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1242 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1243 source_range,
1244 level: HeadingLevel::H1,
1245 contents,
1246 })
1247 }
1248
1249 fn h2(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1250 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1251 source_range,
1252 level: HeadingLevel::H2,
1253 contents,
1254 })
1255 }
1256
1257 fn h3(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1258 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1259 source_range,
1260 level: HeadingLevel::H3,
1261 contents,
1262 })
1263 }
1264
1265 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1266 ParsedMarkdownElement::Paragraph(text(contents, source_range))
1267 }
1268
1269 fn text(contents: &str, source_range: Range<usize>) -> ParsedMarkdownText {
1270 ParsedMarkdownText {
1271 highlights: Vec::new(),
1272 region_ranges: Vec::new(),
1273 regions: Vec::new(),
1274 source_range,
1275 contents: contents.to_string(),
1276 }
1277 }
1278
1279 fn block_quote(
1280 children: Vec<ParsedMarkdownElement>,
1281 source_range: Range<usize>,
1282 ) -> ParsedMarkdownElement {
1283 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1284 source_range,
1285 children,
1286 })
1287 }
1288
1289 fn code_block(
1290 language: Option<String>,
1291 code: &str,
1292 source_range: Range<usize>,
1293 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1294 ) -> ParsedMarkdownElement {
1295 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1296 source_range,
1297 language,
1298 contents: code.to_string().into(),
1299 highlights,
1300 })
1301 }
1302
1303 fn list_item(
1304 source_range: Range<usize>,
1305 depth: u16,
1306 item_type: ParsedMarkdownListItemType,
1307 content: Vec<ParsedMarkdownElement>,
1308 ) -> ParsedMarkdownElement {
1309 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1310 source_range,
1311 item_type,
1312 depth,
1313 content,
1314 })
1315 }
1316
1317 fn table(
1318 source_range: Range<usize>,
1319 header: ParsedMarkdownTableRow,
1320 body: Vec<ParsedMarkdownTableRow>,
1321 ) -> ParsedMarkdownTable {
1322 ParsedMarkdownTable {
1323 column_alignments: Vec::new(),
1324 source_range,
1325 header,
1326 body,
1327 }
1328 }
1329
1330 fn row(children: Vec<ParsedMarkdownText>) -> ParsedMarkdownTableRow {
1331 ParsedMarkdownTableRow { children }
1332 }
1333
1334 impl PartialEq for ParsedMarkdownTable {
1335 fn eq(&self, other: &Self) -> bool {
1336 self.source_range == other.source_range
1337 && self.header == other.header
1338 && self.body == other.body
1339 }
1340 }
1341
1342 impl PartialEq for ParsedMarkdownText {
1343 fn eq(&self, other: &Self) -> bool {
1344 self.source_range == other.source_range && self.contents == other.contents
1345 }
1346 }
1347}