1use crate::markdown_elements::*;
2use async_recursion::async_recursion;
3use collections::FxHashMap;
4use gpui::FontWeight;
5use language::LanguageRegistry;
6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
7use std::{ops::Range, path::PathBuf, sync::Arc};
8
9pub async fn parse_markdown(
10 markdown_input: &str,
11 file_location_directory: Option<PathBuf>,
12 language_registry: Option<Arc<LanguageRegistry>>,
13) -> ParsedMarkdown {
14 let options = Options::all();
15 let parser = Parser::new_ext(markdown_input, options);
16 let parser = MarkdownParser::new(
17 parser.into_offset_iter().collect(),
18 file_location_directory,
19 language_registry,
20 );
21 let renderer = parser.parse_document().await;
22 ParsedMarkdown {
23 children: renderer.parsed,
24 }
25}
26
27struct MarkdownParser<'a> {
28 tokens: Vec<(Event<'a>, Range<usize>)>,
29 /// The current index in the tokens array
30 cursor: usize,
31 /// The blocks that we have successfully parsed so far
32 parsed: Vec<ParsedMarkdownElement>,
33 file_location_directory: Option<PathBuf>,
34 language_registry: Option<Arc<LanguageRegistry>>,
35}
36
37impl<'a> MarkdownParser<'a> {
38 fn new(
39 tokens: Vec<(Event<'a>, Range<usize>)>,
40 file_location_directory: Option<PathBuf>,
41 language_registry: Option<Arc<LanguageRegistry>>,
42 ) -> Self {
43 Self {
44 tokens,
45 file_location_directory,
46 language_registry,
47 cursor: 0,
48 parsed: vec![],
49 }
50 }
51
52 fn eof(&self) -> bool {
53 if self.tokens.is_empty() {
54 return true;
55 }
56 self.cursor >= self.tokens.len() - 1
57 }
58
59 fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
60 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
61 return self.tokens.last();
62 }
63 return self.tokens.get(self.cursor + steps);
64 }
65
66 fn previous(&self) -> Option<&(Event, Range<usize>)> {
67 if self.cursor == 0 || self.cursor > self.tokens.len() {
68 return None;
69 }
70 return self.tokens.get(self.cursor - 1);
71 }
72
73 fn current(&self) -> Option<&(Event, Range<usize>)> {
74 return self.peek(0);
75 }
76
77 fn current_event(&self) -> Option<&Event> {
78 return self.current().map(|(event, _)| event);
79 }
80
81 fn is_text_like(event: &Event) -> bool {
82 match event {
83 Event::Text(_)
84 // Represent an inline code block
85 | Event::Code(_)
86 | Event::Html(_)
87 | Event::FootnoteReference(_)
88 | Event::Start(Tag::Link { link_type: _, dest_url: _, title: _, id: _ })
89 | Event::Start(Tag::Emphasis)
90 | Event::Start(Tag::Strong)
91 | Event::Start(Tag::Strikethrough)
92 | Event::Start(Tag::Image { link_type: _, dest_url: _, title: _, id: _ }) => {
93 true
94 }
95 _ => false,
96 }
97 }
98
99 async fn parse_document(mut self) -> Self {
100 while !self.eof() {
101 if let Some(block) = self.parse_block().await {
102 self.parsed.extend(block);
103 }
104 }
105 self
106 }
107
108 #[async_recursion]
109 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
110 let (current, source_range) = self.current().unwrap();
111 let source_range = source_range.clone();
112 match current {
113 Event::Start(tag) => match tag {
114 Tag::Paragraph => {
115 self.cursor += 1;
116 let text = self.parse_text(false, Some(source_range));
117 Some(vec![ParsedMarkdownElement::Paragraph(text)])
118 }
119 Tag::Heading {
120 level,
121 id: _,
122 classes: _,
123 attrs: _,
124 } => {
125 let level = *level;
126 self.cursor += 1;
127 let heading = self.parse_heading(level);
128 Some(vec![ParsedMarkdownElement::Heading(heading)])
129 }
130 Tag::Table(alignment) => {
131 let alignment = alignment.clone();
132 self.cursor += 1;
133 let table = self.parse_table(alignment);
134 Some(vec![ParsedMarkdownElement::Table(table)])
135 }
136 Tag::List(order) => {
137 let order = *order;
138 self.cursor += 1;
139 let list = self.parse_list(order).await;
140 Some(list)
141 }
142 Tag::BlockQuote(_kind) => {
143 self.cursor += 1;
144 let block_quote = self.parse_block_quote().await;
145 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
146 }
147 Tag::CodeBlock(kind) => {
148 let language = match kind {
149 pulldown_cmark::CodeBlockKind::Indented => None,
150 pulldown_cmark::CodeBlockKind::Fenced(language) => {
151 if language.is_empty() {
152 None
153 } else {
154 Some(language.to_string())
155 }
156 }
157 };
158
159 self.cursor += 1;
160
161 let code_block = self.parse_code_block(language).await;
162 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
163 }
164 _ => {
165 self.cursor += 1;
166 None
167 }
168 },
169 Event::Rule => {
170 let source_range = source_range.clone();
171 self.cursor += 1;
172 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
173 }
174 _ => {
175 self.cursor += 1;
176 None
177 }
178 }
179 }
180
181 fn parse_text(
182 &mut self,
183 should_complete_on_soft_break: bool,
184 source_range: Option<Range<usize>>,
185 ) -> ParsedMarkdownText {
186 let source_range = source_range.unwrap_or_else(|| {
187 self.current()
188 .map(|(_, range)| range.clone())
189 .unwrap_or_default()
190 });
191
192 let mut text = String::new();
193 let mut bold_depth = 0;
194 let mut italic_depth = 0;
195 let mut strikethrough_depth = 0;
196 let mut link: Option<Link> = None;
197 let mut region_ranges: Vec<Range<usize>> = vec![];
198 let mut regions: Vec<ParsedRegion> = vec![];
199 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
200
201 let mut link_urls: Vec<String> = vec![];
202 let mut link_ranges: Vec<Range<usize>> = vec![];
203
204 loop {
205 if self.eof() {
206 break;
207 }
208
209 let (current, _source_range) = self.current().unwrap();
210 let prev_len = text.len();
211 match current {
212 Event::SoftBreak => {
213 if should_complete_on_soft_break {
214 break;
215 }
216
217 // `Some text\nSome more text` should be treated as a single line.
218 text.push(' ');
219 }
220
221 Event::HardBreak => {
222 text.push('\n');
223 }
224
225 Event::Text(t) => {
226 text.push_str(t.as_ref());
227
228 let mut style = MarkdownHighlightStyle::default();
229
230 if bold_depth > 0 {
231 style.weight = FontWeight::BOLD;
232 }
233
234 if italic_depth > 0 {
235 style.italic = true;
236 }
237
238 if strikethrough_depth > 0 {
239 style.strikethrough = true;
240 }
241
242 let last_run_len = if let Some(link) = link.clone() {
243 region_ranges.push(prev_len..text.len());
244 regions.push(ParsedRegion {
245 code: false,
246 link: Some(link),
247 });
248 style.underline = true;
249 prev_len
250 } else {
251 // Manually scan for links
252 let mut finder = linkify::LinkFinder::new();
253 finder.kinds(&[linkify::LinkKind::Url]);
254 let mut last_link_len = prev_len;
255 for link in finder.links(t) {
256 let start = link.start();
257 let end = link.end();
258 let range = (prev_len + start)..(prev_len + end);
259 link_ranges.push(range.clone());
260 link_urls.push(link.as_str().to_string());
261
262 // If there is a style before we match a link, we have to add this to the highlighted ranges
263 if style != MarkdownHighlightStyle::default()
264 && last_link_len < link.start()
265 {
266 highlights.push((
267 last_link_len..link.start(),
268 MarkdownHighlight::Style(style.clone()),
269 ));
270 }
271
272 highlights.push((
273 range.clone(),
274 MarkdownHighlight::Style(MarkdownHighlightStyle {
275 underline: true,
276 ..style
277 }),
278 ));
279 region_ranges.push(range.clone());
280 regions.push(ParsedRegion {
281 code: false,
282 link: Some(Link::Web {
283 url: link.as_str().to_string(),
284 }),
285 });
286
287 last_link_len = end;
288 }
289 last_link_len
290 };
291
292 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
293 let mut new_highlight = true;
294 if let Some((last_range, last_style)) = highlights.last_mut() {
295 if last_range.end == last_run_len
296 && last_style == &MarkdownHighlight::Style(style.clone())
297 {
298 last_range.end = text.len();
299 new_highlight = false;
300 }
301 }
302 if new_highlight {
303 highlights
304 .push((last_run_len..text.len(), MarkdownHighlight::Style(style)));
305 }
306 }
307 }
308
309 // Note: This event means "inline code" and not "code block"
310 Event::Code(t) => {
311 text.push_str(t.as_ref());
312 region_ranges.push(prev_len..text.len());
313
314 if link.is_some() {
315 highlights.push((
316 prev_len..text.len(),
317 MarkdownHighlight::Style(MarkdownHighlightStyle {
318 underline: true,
319 ..Default::default()
320 }),
321 ));
322 }
323
324 regions.push(ParsedRegion {
325 code: true,
326 link: link.clone(),
327 });
328 }
329
330 Event::Start(tag) => match tag {
331 Tag::Emphasis => italic_depth += 1,
332 Tag::Strong => bold_depth += 1,
333 Tag::Strikethrough => strikethrough_depth += 1,
334 Tag::Link {
335 link_type: _,
336 dest_url,
337 title: _,
338 id: _,
339 } => {
340 link = Link::identify(
341 self.file_location_directory.clone(),
342 dest_url.to_string(),
343 );
344 }
345 _ => {
346 break;
347 }
348 },
349
350 Event::End(tag) => match tag {
351 TagEnd::Emphasis => {
352 italic_depth -= 1;
353 }
354 TagEnd::Strong => {
355 bold_depth -= 1;
356 }
357 TagEnd::Strikethrough => {
358 strikethrough_depth -= 1;
359 }
360 TagEnd::Link => {
361 link = None;
362 }
363 TagEnd::Paragraph => {
364 self.cursor += 1;
365 break;
366 }
367 _ => {
368 break;
369 }
370 },
371
372 _ => {
373 break;
374 }
375 }
376
377 self.cursor += 1;
378 }
379
380 ParsedMarkdownText {
381 source_range,
382 contents: text,
383 highlights,
384 regions,
385 region_ranges,
386 }
387 }
388
389 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
390 let (_event, source_range) = self.previous().unwrap();
391 let source_range = source_range.clone();
392 let text = self.parse_text(true, None);
393
394 // Advance past the heading end tag
395 self.cursor += 1;
396
397 ParsedMarkdownHeading {
398 source_range: source_range.clone(),
399 level: match level {
400 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
401 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
402 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
403 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
404 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
405 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
406 },
407 contents: text,
408 }
409 }
410
411 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
412 let (_event, source_range) = self.previous().unwrap();
413 let source_range = source_range.clone();
414 let mut header = ParsedMarkdownTableRow::new();
415 let mut body = vec![];
416 let mut current_row = vec![];
417 let mut in_header = true;
418 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
419
420 loop {
421 if self.eof() {
422 break;
423 }
424
425 let (current, source_range) = self.current().unwrap();
426 let source_range = source_range.clone();
427 match current {
428 Event::Start(Tag::TableHead)
429 | Event::Start(Tag::TableRow)
430 | Event::End(TagEnd::TableCell) => {
431 self.cursor += 1;
432 }
433 Event::Start(Tag::TableCell) => {
434 self.cursor += 1;
435 let cell_contents = self.parse_text(false, Some(source_range));
436 current_row.push(cell_contents);
437 }
438 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
439 self.cursor += 1;
440 let new_row = std::mem::take(&mut current_row);
441 if in_header {
442 header.children = new_row;
443 in_header = false;
444 } else {
445 let row = ParsedMarkdownTableRow::with_children(new_row);
446 body.push(row);
447 }
448 }
449 Event::End(TagEnd::Table) => {
450 self.cursor += 1;
451 break;
452 }
453 _ => {
454 break;
455 }
456 }
457 }
458
459 ParsedMarkdownTable {
460 source_range,
461 header,
462 body,
463 column_alignments,
464 }
465 }
466
467 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
468 match alignment {
469 Alignment::None => ParsedMarkdownTableAlignment::None,
470 Alignment::Left => ParsedMarkdownTableAlignment::Left,
471 Alignment::Center => ParsedMarkdownTableAlignment::Center,
472 Alignment::Right => ParsedMarkdownTableAlignment::Right,
473 }
474 }
475
476 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
477 let (_, list_source_range) = self.previous().unwrap();
478
479 let mut items = Vec::new();
480 let mut items_stack = vec![Vec::new()];
481 let mut depth = 1;
482 let mut task_item = None;
483 let mut order = order;
484 let mut order_stack = Vec::new();
485
486 let mut insertion_indices = FxHashMap::default();
487 let mut source_ranges = FxHashMap::default();
488 let mut start_item_range = list_source_range.clone();
489
490 while !self.eof() {
491 let (current, source_range) = self.current().unwrap();
492 match current {
493 Event::Start(Tag::List(new_order)) => {
494 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
495 insertion_indices.insert(depth, items.len());
496 }
497
498 // We will use the start of the nested list as the end for the current item's range,
499 // because we don't care about the hierarchy of list items
500 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
501 e.insert(start_item_range.start..source_range.start);
502 }
503
504 order_stack.push(order);
505 order = *new_order;
506 self.cursor += 1;
507 depth += 1;
508 }
509 Event::End(TagEnd::List(_)) => {
510 order = order_stack.pop().flatten();
511 self.cursor += 1;
512 depth -= 1;
513
514 if depth == 0 {
515 break;
516 }
517 }
518 Event::Start(Tag::Item) => {
519 start_item_range = source_range.clone();
520
521 self.cursor += 1;
522 items_stack.push(Vec::new());
523
524 // Check for task list marker (`- [ ]` or `- [x]`)
525 if let Some(event) = self.current_event() {
526 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
527 if event == &Event::Start(Tag::Paragraph) {
528 self.cursor += 1;
529 }
530
531 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
532 task_item = Some((*checked, range.clone()));
533 self.cursor += 1;
534 }
535 }
536
537 if let Some((event, range)) = self.current() {
538 // This is a plain list item.
539 // For example `- some text` or `1. [Docs](./docs.md)`
540 if MarkdownParser::is_text_like(event) {
541 let text = self.parse_text(false, Some(range.clone()));
542 let block = ParsedMarkdownElement::Paragraph(text);
543 if let Some(content) = items_stack.last_mut() {
544 content.push(block);
545 }
546 } else {
547 let block = self.parse_block().await;
548 if let Some(block) = block {
549 if let Some(content) = items_stack.last_mut() {
550 content.extend(block);
551 }
552 }
553 }
554 }
555
556 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
557 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
558 self.cursor += 1;
559 }
560 }
561 Event::End(TagEnd::Item) => {
562 self.cursor += 1;
563
564 let item_type = if let Some((checked, range)) = task_item {
565 ParsedMarkdownListItemType::Task(checked, range)
566 } else if let Some(order) = order {
567 ParsedMarkdownListItemType::Ordered(order)
568 } else {
569 ParsedMarkdownListItemType::Unordered
570 };
571
572 if let Some(current) = order {
573 order = Some(current + 1);
574 }
575
576 if let Some(content) = items_stack.pop() {
577 let source_range = source_ranges
578 .remove(&depth)
579 .unwrap_or(start_item_range.clone());
580
581 // We need to remove the last character of the source range, because it includes the newline character
582 let source_range = source_range.start..source_range.end - 1;
583 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
584 source_range,
585 content,
586 depth,
587 item_type,
588 });
589
590 if let Some(index) = insertion_indices.get(&depth) {
591 items.insert(*index, item);
592 insertion_indices.remove(&depth);
593 } else {
594 items.push(item);
595 }
596 }
597
598 task_item = None;
599 }
600 _ => {
601 if depth == 0 {
602 break;
603 }
604 // This can only happen if a list item starts with more then one paragraph,
605 // or the list item contains blocks that should be rendered after the nested list items
606 let block = self.parse_block().await;
607 if let Some(block) = block {
608 if let Some(items_stack) = items_stack.last_mut() {
609 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
610 if !insertion_indices.contains_key(&depth) {
611 items_stack.extend(block);
612 continue;
613 }
614 }
615
616 // Otherwise we need to insert the block after all the nested items
617 // that have been parsed so far
618 items.extend(block);
619 }
620 }
621 }
622 }
623
624 items
625 }
626
627 #[async_recursion]
628 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
629 let (_event, source_range) = self.previous().unwrap();
630 let source_range = source_range.clone();
631 let mut nested_depth = 1;
632
633 let mut children: Vec<ParsedMarkdownElement> = vec![];
634
635 while !self.eof() {
636 let block = self.parse_block().await;
637
638 if let Some(block) = block {
639 children.extend(block);
640 } else {
641 break;
642 }
643
644 if self.eof() {
645 break;
646 }
647
648 let (current, _source_range) = self.current().unwrap();
649 match current {
650 // This is a nested block quote.
651 // Record that we're in a nested block quote and continue parsing.
652 // We don't need to advance the cursor since the next
653 // call to `parse_block` will handle it.
654 Event::Start(Tag::BlockQuote(_kind)) => {
655 nested_depth += 1;
656 }
657 Event::End(TagEnd::BlockQuote(_kind)) => {
658 nested_depth -= 1;
659 if nested_depth == 0 {
660 self.cursor += 1;
661 break;
662 }
663 }
664 _ => {}
665 };
666 }
667
668 ParsedMarkdownBlockQuote {
669 source_range,
670 children,
671 }
672 }
673
674 async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
675 let (_event, source_range) = self.previous().unwrap();
676 let source_range = source_range.clone();
677 let mut code = String::new();
678
679 while !self.eof() {
680 let (current, _source_range) = self.current().unwrap();
681 match current {
682 Event::Text(text) => {
683 code.push_str(text);
684 self.cursor += 1;
685 }
686 Event::End(TagEnd::CodeBlock) => {
687 self.cursor += 1;
688 break;
689 }
690 _ => {
691 break;
692 }
693 }
694 }
695
696 let highlights = if let Some(language) = &language {
697 if let Some(registry) = &self.language_registry {
698 let rope: language::Rope = code.as_str().into();
699 registry
700 .language_for_name_or_extension(language)
701 .await
702 .map(|l| l.highlight_text(&rope, 0..code.len()))
703 .ok()
704 } else {
705 None
706 }
707 } else {
708 None
709 };
710
711 ParsedMarkdownCodeBlock {
712 source_range,
713 contents: code.trim().to_string().into(),
714 language,
715 highlights,
716 }
717 }
718}
719
720#[cfg(test)]
721mod tests {
722 use super::*;
723
724 use gpui::BackgroundExecutor;
725 use language::{tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher};
726 use pretty_assertions::assert_eq;
727
728 use ParsedMarkdownListItemType::*;
729
730 async fn parse(input: &str) -> ParsedMarkdown {
731 parse_markdown(input, None, None).await
732 }
733
734 #[gpui::test]
735 async fn test_headings() {
736 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
737
738 assert_eq!(
739 parsed.children,
740 vec![
741 h1(text("Heading one", 2..13), 0..14),
742 h2(text("Heading two", 17..28), 14..29),
743 h3(text("Heading three", 33..46), 29..46),
744 ]
745 );
746 }
747
748 #[gpui::test]
749 async fn test_newlines_dont_new_paragraphs() {
750 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
751
752 assert_eq!(
753 parsed.children,
754 vec![p("Some text that is bolded and italicized", 0..46)]
755 );
756 }
757
758 #[gpui::test]
759 async fn test_heading_with_paragraph() {
760 let parsed = parse("# Zed\nThe editor").await;
761
762 assert_eq!(
763 parsed.children,
764 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
765 );
766 }
767
768 #[gpui::test]
769 async fn test_double_newlines_do_new_paragraphs() {
770 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
771
772 assert_eq!(
773 parsed.children,
774 vec![
775 p("Some text that is bolded", 0..29),
776 p("and italicized", 31..47),
777 ]
778 );
779 }
780
781 #[gpui::test]
782 async fn test_bold_italic_text() {
783 let parsed = parse("Some text **that is bolded** and *italicized*").await;
784
785 assert_eq!(
786 parsed.children,
787 vec![p("Some text that is bolded and italicized", 0..45)]
788 );
789 }
790
791 #[gpui::test]
792 async fn test_nested_bold_strikethrough_text() {
793 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
794
795 assert_eq!(parsed.children.len(), 1);
796 assert_eq!(
797 parsed.children[0],
798 ParsedMarkdownElement::Paragraph(ParsedMarkdownText {
799 source_range: 0..35,
800 contents: "Some bostrikethroughld text".to_string(),
801 highlights: Vec::new(),
802 region_ranges: Vec::new(),
803 regions: Vec::new(),
804 })
805 );
806
807 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
808 text
809 } else {
810 panic!("Expected a paragraph");
811 };
812 assert_eq!(
813 paragraph.highlights,
814 vec![
815 (
816 5..7,
817 MarkdownHighlight::Style(MarkdownHighlightStyle {
818 weight: FontWeight::BOLD,
819 ..Default::default()
820 }),
821 ),
822 (
823 7..20,
824 MarkdownHighlight::Style(MarkdownHighlightStyle {
825 weight: FontWeight::BOLD,
826 strikethrough: true,
827 ..Default::default()
828 }),
829 ),
830 (
831 20..22,
832 MarkdownHighlight::Style(MarkdownHighlightStyle {
833 weight: FontWeight::BOLD,
834 ..Default::default()
835 }),
836 ),
837 ]
838 );
839 }
840
841 #[gpui::test]
842 async fn test_raw_links_detection() {
843 let parsed = parse("Checkout this https://zed.dev link").await;
844
845 assert_eq!(
846 parsed.children,
847 vec![p("Checkout this https://zed.dev link", 0..34)]
848 );
849
850 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
851 text
852 } else {
853 panic!("Expected a paragraph");
854 };
855 assert_eq!(
856 paragraph.highlights,
857 vec![(
858 14..29,
859 MarkdownHighlight::Style(MarkdownHighlightStyle {
860 underline: true,
861 ..Default::default()
862 }),
863 )]
864 );
865 assert_eq!(
866 paragraph.regions,
867 vec![ParsedRegion {
868 code: false,
869 link: Some(Link::Web {
870 url: "https://zed.dev".to_string()
871 }),
872 }]
873 );
874 assert_eq!(paragraph.region_ranges, vec![14..29]);
875 }
876
877 #[gpui::test]
878 async fn test_header_only_table() {
879 let markdown = "\
880| Header 1 | Header 2 |
881|----------|----------|
882
883Some other content
884";
885
886 let expected_table = table(
887 0..48,
888 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
889 vec![],
890 );
891
892 assert_eq!(
893 parse(markdown).await.children[0],
894 ParsedMarkdownElement::Table(expected_table)
895 );
896 }
897
898 #[gpui::test]
899 async fn test_basic_table() {
900 let markdown = "\
901| Header 1 | Header 2 |
902|----------|----------|
903| Cell 1 | Cell 2 |
904| Cell 3 | Cell 4 |";
905
906 let expected_table = table(
907 0..95,
908 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
909 vec![
910 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
911 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
912 ],
913 );
914
915 assert_eq!(
916 parse(markdown).await.children[0],
917 ParsedMarkdownElement::Table(expected_table)
918 );
919 }
920
921 #[gpui::test]
922 async fn test_list_basic() {
923 let parsed = parse(
924 "\
925* Item 1
926* Item 2
927* Item 3
928",
929 )
930 .await;
931
932 assert_eq!(
933 parsed.children,
934 vec![
935 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
936 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
937 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
938 ],
939 );
940 }
941
942 #[gpui::test]
943 async fn test_list_with_tasks() {
944 let parsed = parse(
945 "\
946- [ ] TODO
947- [x] Checked
948",
949 )
950 .await;
951
952 assert_eq!(
953 parsed.children,
954 vec![
955 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
956 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
957 ],
958 );
959 }
960
961 #[gpui::test]
962 async fn test_list_with_linebreak_is_handled_correctly() {
963 let parsed = parse(
964 "\
965- [ ] Task 1
966
967- [x] Task 2
968",
969 )
970 .await;
971
972 assert_eq!(
973 parsed.children,
974 vec![
975 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
976 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
977 ],
978 );
979 }
980
981 #[gpui::test]
982 async fn test_list_nested() {
983 let parsed = parse(
984 "\
985* Item 1
986* Item 2
987* Item 3
988
9891. Hello
9901. Two
991 1. Three
9922. Four
9933. Five
994
995* First
996 1. Hello
997 1. Goodbyte
998 - Inner
999 - Inner
1000 2. Goodbyte
1001* Last
1002",
1003 )
1004 .await;
1005
1006 assert_eq!(
1007 parsed.children,
1008 vec![
1009 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1010 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1011 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1012 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1013 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1014 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1015 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1016 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1017 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1018 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1019 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1020 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1021 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1022 list_item(143..154, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1023 list_item(155..161, 1, Unordered, vec![p("Last", 157..161)]),
1024 ]
1025 );
1026 }
1027
1028 #[gpui::test]
1029 async fn test_list_with_nested_content() {
1030 let parsed = parse(
1031 "\
1032* This is a list item with two paragraphs.
1033
1034 This is the second paragraph in the list item.
1035",
1036 )
1037 .await;
1038
1039 assert_eq!(
1040 parsed.children,
1041 vec![list_item(
1042 0..96,
1043 1,
1044 Unordered,
1045 vec![
1046 p("This is a list item with two paragraphs.", 4..44),
1047 p("This is the second paragraph in the list item.", 50..97)
1048 ],
1049 ),],
1050 );
1051 }
1052
1053 #[gpui::test]
1054 async fn test_nested_list_with_paragraph_inside() {
1055 let parsed = parse(
1056 "\
10571. a
1058 1. b
1059 1. c
1060
1061 text
1062
1063 1. d
1064",
1065 )
1066 .await;
1067
1068 assert_eq!(
1069 parsed.children,
1070 vec![
1071 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1072 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1073 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1074 p("text", 32..37),
1075 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1076 ],
1077 );
1078 }
1079
1080 #[gpui::test]
1081 async fn test_list_with_leading_text() {
1082 let parsed = parse(
1083 "\
1084* `code`
1085* **bold**
1086* [link](https://example.com)
1087",
1088 )
1089 .await;
1090
1091 assert_eq!(
1092 parsed.children,
1093 vec![
1094 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1095 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1096 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],)
1097 ],
1098 );
1099 }
1100
1101 #[gpui::test]
1102 async fn test_simple_block_quote() {
1103 let parsed = parse("> Simple block quote with **styled text**").await;
1104
1105 assert_eq!(
1106 parsed.children,
1107 vec![block_quote(
1108 vec![p("Simple block quote with styled text", 2..41)],
1109 0..41
1110 )]
1111 );
1112 }
1113
1114 #[gpui::test]
1115 async fn test_simple_block_quote_with_multiple_lines() {
1116 let parsed = parse(
1117 "\
1118> # Heading
1119> More
1120> text
1121>
1122> More text
1123",
1124 )
1125 .await;
1126
1127 assert_eq!(
1128 parsed.children,
1129 vec![block_quote(
1130 vec![
1131 h1(text("Heading", 4..11), 2..12),
1132 p("More text", 14..26),
1133 p("More text", 30..40)
1134 ],
1135 0..40
1136 )]
1137 );
1138 }
1139
1140 #[gpui::test]
1141 async fn test_nested_block_quote() {
1142 let parsed = parse(
1143 "\
1144> A
1145>
1146> > # B
1147>
1148> C
1149
1150More text
1151",
1152 )
1153 .await;
1154
1155 assert_eq!(
1156 parsed.children,
1157 vec![
1158 block_quote(
1159 vec![
1160 p("A", 2..4),
1161 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1162 p("C", 18..20)
1163 ],
1164 0..20
1165 ),
1166 p("More text", 21..31)
1167 ]
1168 );
1169 }
1170
1171 #[gpui::test]
1172 async fn test_code_block() {
1173 let parsed = parse(
1174 "\
1175```
1176fn main() {
1177 return 0;
1178}
1179```
1180",
1181 )
1182 .await;
1183
1184 assert_eq!(
1185 parsed.children,
1186 vec![code_block(
1187 None,
1188 "fn main() {\n return 0;\n}",
1189 0..35,
1190 None
1191 )]
1192 );
1193 }
1194
1195 #[gpui::test]
1196 async fn test_code_block_with_language(executor: BackgroundExecutor) {
1197 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1198 language_registry.add(rust_lang());
1199
1200 let parsed = parse_markdown(
1201 "\
1202```rust
1203fn main() {
1204 return 0;
1205}
1206```
1207",
1208 None,
1209 Some(language_registry),
1210 )
1211 .await;
1212
1213 assert_eq!(
1214 parsed.children,
1215 vec![code_block(
1216 Some("rust".to_string()),
1217 "fn main() {\n return 0;\n}",
1218 0..39,
1219 Some(vec![])
1220 )]
1221 );
1222 }
1223
1224 fn rust_lang() -> Arc<Language> {
1225 Arc::new(Language::new(
1226 LanguageConfig {
1227 name: "Rust".into(),
1228 matcher: LanguageMatcher {
1229 path_suffixes: vec!["rs".into()],
1230 ..Default::default()
1231 },
1232 collapsed_placeholder: " /* ... */ ".to_string(),
1233 ..Default::default()
1234 },
1235 Some(tree_sitter_rust::language()),
1236 ))
1237 }
1238
1239 fn h1(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1240 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1241 source_range,
1242 level: HeadingLevel::H1,
1243 contents,
1244 })
1245 }
1246
1247 fn h2(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1248 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1249 source_range,
1250 level: HeadingLevel::H2,
1251 contents,
1252 })
1253 }
1254
1255 fn h3(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1256 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1257 source_range,
1258 level: HeadingLevel::H3,
1259 contents,
1260 })
1261 }
1262
1263 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1264 ParsedMarkdownElement::Paragraph(text(contents, source_range))
1265 }
1266
1267 fn text(contents: &str, source_range: Range<usize>) -> ParsedMarkdownText {
1268 ParsedMarkdownText {
1269 highlights: Vec::new(),
1270 region_ranges: Vec::new(),
1271 regions: Vec::new(),
1272 source_range,
1273 contents: contents.to_string(),
1274 }
1275 }
1276
1277 fn block_quote(
1278 children: Vec<ParsedMarkdownElement>,
1279 source_range: Range<usize>,
1280 ) -> ParsedMarkdownElement {
1281 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1282 source_range,
1283 children,
1284 })
1285 }
1286
1287 fn code_block(
1288 language: Option<String>,
1289 code: &str,
1290 source_range: Range<usize>,
1291 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1292 ) -> ParsedMarkdownElement {
1293 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1294 source_range,
1295 language,
1296 contents: code.to_string().into(),
1297 highlights,
1298 })
1299 }
1300
1301 fn list_item(
1302 source_range: Range<usize>,
1303 depth: u16,
1304 item_type: ParsedMarkdownListItemType,
1305 content: Vec<ParsedMarkdownElement>,
1306 ) -> ParsedMarkdownElement {
1307 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1308 source_range,
1309 item_type,
1310 depth,
1311 content,
1312 })
1313 }
1314
1315 fn table(
1316 source_range: Range<usize>,
1317 header: ParsedMarkdownTableRow,
1318 body: Vec<ParsedMarkdownTableRow>,
1319 ) -> ParsedMarkdownTable {
1320 ParsedMarkdownTable {
1321 column_alignments: Vec::new(),
1322 source_range,
1323 header,
1324 body,
1325 }
1326 }
1327
1328 fn row(children: Vec<ParsedMarkdownText>) -> ParsedMarkdownTableRow {
1329 ParsedMarkdownTableRow { children }
1330 }
1331
1332 impl PartialEq for ParsedMarkdownTable {
1333 fn eq(&self, other: &Self) -> bool {
1334 self.source_range == other.source_range
1335 && self.header == other.header
1336 && self.body == other.body
1337 }
1338 }
1339
1340 impl PartialEq for ParsedMarkdownText {
1341 fn eq(&self, other: &Self) -> bool {
1342 self.source_range == other.source_range && self.contents == other.contents
1343 }
1344 }
1345}