1use crate::markdown_elements::*;
2use async_recursion::async_recursion;
3use collections::FxHashMap;
4use gpui::FontWeight;
5use language::LanguageRegistry;
6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
7use std::{ops::Range, path::PathBuf, sync::Arc};
8
9pub async fn parse_markdown(
10 markdown_input: &str,
11 file_location_directory: Option<PathBuf>,
12 language_registry: Option<Arc<LanguageRegistry>>,
13) -> ParsedMarkdown {
14 let options = Options::all();
15 let parser = Parser::new_ext(markdown_input, options);
16 let parser = MarkdownParser::new(
17 parser.into_offset_iter().collect(),
18 file_location_directory,
19 language_registry,
20 );
21 let renderer = parser.parse_document().await;
22 ParsedMarkdown {
23 children: renderer.parsed,
24 }
25}
26
27struct MarkdownParser<'a> {
28 tokens: Vec<(Event<'a>, Range<usize>)>,
29 /// The current index in the tokens array
30 cursor: usize,
31 /// The blocks that we have successfully parsed so far
32 parsed: Vec<ParsedMarkdownElement>,
33 file_location_directory: Option<PathBuf>,
34 language_registry: Option<Arc<LanguageRegistry>>,
35}
36
37impl<'a> MarkdownParser<'a> {
38 fn new(
39 tokens: Vec<(Event<'a>, Range<usize>)>,
40 file_location_directory: Option<PathBuf>,
41 language_registry: Option<Arc<LanguageRegistry>>,
42 ) -> Self {
43 Self {
44 tokens,
45 file_location_directory,
46 language_registry,
47 cursor: 0,
48 parsed: vec![],
49 }
50 }
51
52 fn eof(&self) -> bool {
53 if self.tokens.is_empty() {
54 return true;
55 }
56 self.cursor >= self.tokens.len() - 1
57 }
58
59 fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
60 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
61 return self.tokens.last();
62 }
63 return self.tokens.get(self.cursor + steps);
64 }
65
66 fn previous(&self) -> Option<&(Event, Range<usize>)> {
67 if self.cursor == 0 || self.cursor > self.tokens.len() {
68 return None;
69 }
70 return self.tokens.get(self.cursor - 1);
71 }
72
73 fn current(&self) -> Option<&(Event, Range<usize>)> {
74 return self.peek(0);
75 }
76
77 fn current_event(&self) -> Option<&Event> {
78 return self.current().map(|(event, _)| event);
79 }
80
81 fn is_text_like(event: &Event) -> bool {
82 match event {
83 Event::Text(_)
84 // Represent an inline code block
85 | Event::Code(_)
86 | Event::Html(_)
87 | Event::FootnoteReference(_)
88 | Event::Start(Tag::Link { link_type: _, dest_url: _, title: _, id: _ })
89 | Event::Start(Tag::Emphasis)
90 | Event::Start(Tag::Strong)
91 | Event::Start(Tag::Strikethrough)
92 | Event::Start(Tag::Image { link_type: _, dest_url: _, title: _, id: _ }) => {
93 return true;
94 }
95 _ => return false,
96 }
97 }
98
99 async fn parse_document(mut self) -> Self {
100 while !self.eof() {
101 if let Some(block) = self.parse_block().await {
102 self.parsed.extend(block);
103 }
104 }
105 self
106 }
107
108 #[async_recursion]
109 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
110 let (current, source_range) = self.current().unwrap();
111 let source_range = source_range.clone();
112 match current {
113 Event::Start(tag) => match tag {
114 Tag::Paragraph => {
115 self.cursor += 1;
116 let text = self.parse_text(false, Some(source_range));
117 Some(vec![ParsedMarkdownElement::Paragraph(text)])
118 }
119 Tag::Heading {
120 level,
121 id: _,
122 classes: _,
123 attrs: _,
124 } => {
125 let level = *level;
126 self.cursor += 1;
127 let heading = self.parse_heading(level);
128 Some(vec![ParsedMarkdownElement::Heading(heading)])
129 }
130 Tag::Table(alignment) => {
131 let alignment = alignment.clone();
132 self.cursor += 1;
133 let table = self.parse_table(alignment);
134 Some(vec![ParsedMarkdownElement::Table(table)])
135 }
136 Tag::List(order) => {
137 let order = *order;
138 self.cursor += 1;
139 let list = self.parse_list(order).await;
140 Some(list)
141 }
142 Tag::BlockQuote => {
143 self.cursor += 1;
144 let block_quote = self.parse_block_quote().await;
145 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
146 }
147 Tag::CodeBlock(kind) => {
148 let language = match kind {
149 pulldown_cmark::CodeBlockKind::Indented => None,
150 pulldown_cmark::CodeBlockKind::Fenced(language) => {
151 if language.is_empty() {
152 None
153 } else {
154 Some(language.to_string())
155 }
156 }
157 };
158
159 self.cursor += 1;
160
161 let code_block = self.parse_code_block(language).await;
162 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
163 }
164 _ => {
165 self.cursor += 1;
166 None
167 }
168 },
169 Event::Rule => {
170 let source_range = source_range.clone();
171 self.cursor += 1;
172 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
173 }
174 _ => {
175 self.cursor += 1;
176 None
177 }
178 }
179 }
180
181 fn parse_text(
182 &mut self,
183 should_complete_on_soft_break: bool,
184 source_range: Option<Range<usize>>,
185 ) -> ParsedMarkdownText {
186 let source_range = source_range.unwrap_or_else(|| {
187 self.current()
188 .map(|(_, range)| range.clone())
189 .unwrap_or_default()
190 });
191
192 let mut text = String::new();
193 let mut bold_depth = 0;
194 let mut italic_depth = 0;
195 let mut strikethrough_depth = 0;
196 let mut link: Option<Link> = None;
197 let mut region_ranges: Vec<Range<usize>> = vec![];
198 let mut regions: Vec<ParsedRegion> = vec![];
199 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
200
201 let mut link_urls: Vec<String> = vec![];
202 let mut link_ranges: Vec<Range<usize>> = vec![];
203
204 loop {
205 if self.eof() {
206 break;
207 }
208
209 let (current, _source_range) = self.current().unwrap();
210 let prev_len = text.len();
211 match current {
212 Event::SoftBreak => {
213 if should_complete_on_soft_break {
214 break;
215 }
216
217 // `Some text\nSome more text` should be treated as a single line.
218 text.push(' ');
219 }
220
221 Event::HardBreak => {
222 text.push('\n');
223 }
224
225 Event::Text(t) => {
226 text.push_str(t.as_ref());
227
228 let mut style = MarkdownHighlightStyle::default();
229
230 if bold_depth > 0 {
231 style.weight = FontWeight::BOLD;
232 }
233
234 if italic_depth > 0 {
235 style.italic = true;
236 }
237
238 if strikethrough_depth > 0 {
239 style.strikethrough = true;
240 }
241
242 let last_run_len = if let Some(link) = link.clone() {
243 region_ranges.push(prev_len..text.len());
244 regions.push(ParsedRegion {
245 code: false,
246 link: Some(link),
247 });
248 style.underline = true;
249 prev_len
250 } else {
251 // Manually scan for links
252 let mut finder = linkify::LinkFinder::new();
253 finder.kinds(&[linkify::LinkKind::Url]);
254 let mut last_link_len = prev_len;
255 for link in finder.links(&t) {
256 let start = link.start();
257 let end = link.end();
258 let range = (prev_len + start)..(prev_len + end);
259 link_ranges.push(range.clone());
260 link_urls.push(link.as_str().to_string());
261
262 // If there is a style before we match a link, we have to add this to the highlighted ranges
263 if style != MarkdownHighlightStyle::default()
264 && last_link_len < link.start()
265 {
266 highlights.push((
267 last_link_len..link.start(),
268 MarkdownHighlight::Style(style.clone()),
269 ));
270 }
271
272 highlights.push((
273 range.clone(),
274 MarkdownHighlight::Style(MarkdownHighlightStyle {
275 underline: true,
276 ..style
277 }),
278 ));
279 region_ranges.push(range.clone());
280 regions.push(ParsedRegion {
281 code: false,
282 link: Some(Link::Web {
283 url: link.as_str().to_string(),
284 }),
285 });
286
287 last_link_len = end;
288 }
289 last_link_len
290 };
291
292 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
293 let mut new_highlight = true;
294 if let Some((last_range, last_style)) = highlights.last_mut() {
295 if last_range.end == last_run_len
296 && last_style == &MarkdownHighlight::Style(style.clone())
297 {
298 last_range.end = text.len();
299 new_highlight = false;
300 }
301 }
302 if new_highlight {
303 highlights
304 .push((last_run_len..text.len(), MarkdownHighlight::Style(style)));
305 }
306 }
307 }
308
309 // Note: This event means "inline code" and not "code block"
310 Event::Code(t) => {
311 text.push_str(t.as_ref());
312 region_ranges.push(prev_len..text.len());
313
314 if link.is_some() {
315 highlights.push((
316 prev_len..text.len(),
317 MarkdownHighlight::Style(MarkdownHighlightStyle {
318 underline: true,
319 ..Default::default()
320 }),
321 ));
322 }
323
324 regions.push(ParsedRegion {
325 code: true,
326 link: link.clone(),
327 });
328 }
329
330 Event::Start(tag) => match tag {
331 Tag::Emphasis => italic_depth += 1,
332 Tag::Strong => bold_depth += 1,
333 Tag::Strikethrough => strikethrough_depth += 1,
334 Tag::Link {
335 link_type: _,
336 dest_url,
337 title: _,
338 id: _,
339 } => {
340 link = Link::identify(
341 self.file_location_directory.clone(),
342 dest_url.to_string(),
343 );
344 }
345 _ => {
346 break;
347 }
348 },
349
350 Event::End(tag) => match tag {
351 TagEnd::Emphasis => {
352 italic_depth -= 1;
353 }
354 TagEnd::Strong => {
355 bold_depth -= 1;
356 }
357 TagEnd::Strikethrough => {
358 strikethrough_depth -= 1;
359 }
360 TagEnd::Link => {
361 link = None;
362 }
363 TagEnd::Paragraph => {
364 self.cursor += 1;
365 break;
366 }
367 _ => {
368 break;
369 }
370 },
371
372 _ => {
373 break;
374 }
375 }
376
377 self.cursor += 1;
378 }
379
380 ParsedMarkdownText {
381 source_range,
382 contents: text,
383 highlights,
384 regions,
385 region_ranges,
386 }
387 }
388
389 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
390 let (_event, source_range) = self.previous().unwrap();
391 let source_range = source_range.clone();
392 let text = self.parse_text(true, None);
393
394 // Advance past the heading end tag
395 self.cursor += 1;
396
397 ParsedMarkdownHeading {
398 source_range: source_range.clone(),
399 level: match level {
400 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
401 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
402 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
403 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
404 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
405 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
406 },
407 contents: text,
408 }
409 }
410
411 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
412 let (_event, source_range) = self.previous().unwrap();
413 let source_range = source_range.clone();
414 let mut header = ParsedMarkdownTableRow::new();
415 let mut body = vec![];
416 let mut current_row = vec![];
417 let mut in_header = true;
418 let column_alignments = alignment
419 .iter()
420 .map(|a| Self::convert_alignment(a))
421 .collect();
422
423 loop {
424 if self.eof() {
425 break;
426 }
427
428 let (current, source_range) = self.current().unwrap();
429 let source_range = source_range.clone();
430 match current {
431 Event::Start(Tag::TableHead)
432 | Event::Start(Tag::TableRow)
433 | Event::End(TagEnd::TableCell) => {
434 self.cursor += 1;
435 }
436 Event::Start(Tag::TableCell) => {
437 self.cursor += 1;
438 let cell_contents = self.parse_text(false, Some(source_range));
439 current_row.push(cell_contents);
440 }
441 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
442 self.cursor += 1;
443 let new_row = std::mem::replace(&mut current_row, vec![]);
444 if in_header {
445 header.children = new_row;
446 in_header = false;
447 } else {
448 let row = ParsedMarkdownTableRow::with_children(new_row);
449 body.push(row);
450 }
451 }
452 Event::End(TagEnd::Table) => {
453 self.cursor += 1;
454 break;
455 }
456 _ => {
457 break;
458 }
459 }
460 }
461
462 ParsedMarkdownTable {
463 source_range,
464 header,
465 body,
466 column_alignments,
467 }
468 }
469
470 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
471 match alignment {
472 Alignment::None => ParsedMarkdownTableAlignment::None,
473 Alignment::Left => ParsedMarkdownTableAlignment::Left,
474 Alignment::Center => ParsedMarkdownTableAlignment::Center,
475 Alignment::Right => ParsedMarkdownTableAlignment::Right,
476 }
477 }
478
479 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
480 let (_, list_source_range) = self.previous().unwrap();
481
482 let mut items = Vec::new();
483 let mut items_stack = vec![Vec::new()];
484 let mut depth = 1;
485 let mut task_item = None;
486 let mut order = order;
487 let mut order_stack = Vec::new();
488
489 let mut insertion_indices = FxHashMap::default();
490 let mut source_ranges = FxHashMap::default();
491 let mut start_item_range = list_source_range.clone();
492
493 while !self.eof() {
494 let (current, source_range) = self.current().unwrap();
495 match current {
496 Event::Start(Tag::List(new_order)) => {
497 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
498 insertion_indices.insert(depth, items.len());
499 }
500
501 // We will use the start of the nested list as the end for the current item's range,
502 // because we don't care about the hierarchy of list items
503 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
504 e.insert(start_item_range.start..source_range.start);
505 }
506
507 order_stack.push(order);
508 order = *new_order;
509 self.cursor += 1;
510 depth += 1;
511 }
512 Event::End(TagEnd::List(_)) => {
513 order = order_stack.pop().flatten();
514 self.cursor += 1;
515 depth -= 1;
516
517 if depth == 0 {
518 break;
519 }
520 }
521 Event::Start(Tag::Item) => {
522 start_item_range = source_range.clone();
523
524 self.cursor += 1;
525 items_stack.push(Vec::new());
526
527 // Check for task list marker (`- [ ]` or `- [x]`)
528 if let Some(event) = self.current_event() {
529 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
530 if event == &Event::Start(Tag::Paragraph) {
531 self.cursor += 1;
532 }
533
534 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
535 task_item = Some((*checked, range.clone()));
536 self.cursor += 1;
537 }
538 }
539
540 if let Some((event, range)) = self.current() {
541 // This is a plain list item.
542 // For example `- some text` or `1. [Docs](./docs.md)`
543 if MarkdownParser::is_text_like(event) {
544 let text = self.parse_text(false, Some(range.clone()));
545 let block = ParsedMarkdownElement::Paragraph(text);
546 if let Some(content) = items_stack.last_mut() {
547 content.push(block);
548 }
549 } else {
550 let block = self.parse_block().await;
551 if let Some(block) = block {
552 if let Some(content) = items_stack.last_mut() {
553 content.extend(block);
554 }
555 }
556 }
557 }
558
559 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
560 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
561 self.cursor += 1;
562 }
563 }
564 Event::End(TagEnd::Item) => {
565 self.cursor += 1;
566
567 let item_type = if let Some((checked, range)) = task_item {
568 ParsedMarkdownListItemType::Task(checked, range)
569 } else if let Some(order) = order {
570 ParsedMarkdownListItemType::Ordered(order)
571 } else {
572 ParsedMarkdownListItemType::Unordered
573 };
574
575 if let Some(current) = order {
576 order = Some(current + 1);
577 }
578
579 if let Some(content) = items_stack.pop() {
580 let source_range = source_ranges
581 .remove(&depth)
582 .unwrap_or(start_item_range.clone());
583
584 // We need to remove the last character of the source range, because it includes the newline character
585 let source_range = source_range.start..source_range.end - 1;
586 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
587 source_range,
588 content,
589 depth,
590 item_type,
591 });
592
593 if let Some(index) = insertion_indices.get(&depth) {
594 items.insert(*index, item);
595 insertion_indices.remove(&depth);
596 } else {
597 items.push(item);
598 }
599 }
600
601 task_item = None;
602 }
603 _ => {
604 if depth == 0 {
605 break;
606 }
607 // This can only happen if a list item starts with more then one paragraph,
608 // or the list item contains blocks that should be rendered after the nested list items
609 let block = self.parse_block().await;
610 if let Some(block) = block {
611 if let Some(items_stack) = items_stack.last_mut() {
612 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
613 if !insertion_indices.contains_key(&depth) {
614 items_stack.extend(block);
615 continue;
616 }
617 }
618
619 // Otherwise we need to insert the block after all the nested items
620 // that have been parsed so far
621 items.extend(block);
622 }
623 }
624 }
625 }
626
627 items
628 }
629
630 #[async_recursion]
631 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
632 let (_event, source_range) = self.previous().unwrap();
633 let source_range = source_range.clone();
634 let mut nested_depth = 1;
635
636 let mut children: Vec<ParsedMarkdownElement> = vec![];
637
638 while !self.eof() {
639 let block = self.parse_block().await;
640
641 if let Some(block) = block {
642 children.extend(block);
643 } else {
644 break;
645 }
646
647 if self.eof() {
648 break;
649 }
650
651 let (current, _source_range) = self.current().unwrap();
652 match current {
653 // This is a nested block quote.
654 // Record that we're in a nested block quote and continue parsing.
655 // We don't need to advance the cursor since the next
656 // call to `parse_block` will handle it.
657 Event::Start(Tag::BlockQuote) => {
658 nested_depth += 1;
659 }
660 Event::End(TagEnd::BlockQuote) => {
661 nested_depth -= 1;
662 if nested_depth == 0 {
663 self.cursor += 1;
664 break;
665 }
666 }
667 _ => {}
668 };
669 }
670
671 ParsedMarkdownBlockQuote {
672 source_range,
673 children,
674 }
675 }
676
677 async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
678 let (_event, source_range) = self.previous().unwrap();
679 let source_range = source_range.clone();
680 let mut code = String::new();
681
682 while !self.eof() {
683 let (current, _source_range) = self.current().unwrap();
684 match current {
685 Event::Text(text) => {
686 code.push_str(&text);
687 self.cursor += 1;
688 }
689 Event::End(TagEnd::CodeBlock) => {
690 self.cursor += 1;
691 break;
692 }
693 _ => {
694 break;
695 }
696 }
697 }
698
699 let highlights = if let Some(language) = &language {
700 if let Some(registry) = &self.language_registry {
701 let rope: language::Rope = code.as_str().into();
702 registry
703 .language_for_name_or_extension(language)
704 .await
705 .map(|l| l.highlight_text(&rope, 0..code.len()))
706 .ok()
707 } else {
708 None
709 }
710 } else {
711 None
712 };
713
714 ParsedMarkdownCodeBlock {
715 source_range,
716 contents: code.trim().to_string().into(),
717 language,
718 highlights,
719 }
720 }
721}
722
723#[cfg(test)]
724mod tests {
725 use super::*;
726
727 use gpui::BackgroundExecutor;
728 use language::{tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher};
729 use pretty_assertions::assert_eq;
730
731 use ParsedMarkdownListItemType::*;
732
733 async fn parse(input: &str) -> ParsedMarkdown {
734 parse_markdown(input, None, None).await
735 }
736
737 #[gpui::test]
738 async fn test_headings() {
739 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
740
741 assert_eq!(
742 parsed.children,
743 vec![
744 h1(text("Heading one", 2..13), 0..14),
745 h2(text("Heading two", 17..28), 14..29),
746 h3(text("Heading three", 33..46), 29..46),
747 ]
748 );
749 }
750
751 #[gpui::test]
752 async fn test_newlines_dont_new_paragraphs() {
753 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
754
755 assert_eq!(
756 parsed.children,
757 vec![p("Some text that is bolded and italicized", 0..46)]
758 );
759 }
760
761 #[gpui::test]
762 async fn test_heading_with_paragraph() {
763 let parsed = parse("# Zed\nThe editor").await;
764
765 assert_eq!(
766 parsed.children,
767 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
768 );
769 }
770
771 #[gpui::test]
772 async fn test_double_newlines_do_new_paragraphs() {
773 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
774
775 assert_eq!(
776 parsed.children,
777 vec![
778 p("Some text that is bolded", 0..29),
779 p("and italicized", 31..47),
780 ]
781 );
782 }
783
784 #[gpui::test]
785 async fn test_bold_italic_text() {
786 let parsed = parse("Some text **that is bolded** and *italicized*").await;
787
788 assert_eq!(
789 parsed.children,
790 vec![p("Some text that is bolded and italicized", 0..45)]
791 );
792 }
793
794 #[gpui::test]
795 async fn test_nested_bold_strikethrough_text() {
796 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
797
798 assert_eq!(parsed.children.len(), 1);
799 assert_eq!(
800 parsed.children[0],
801 ParsedMarkdownElement::Paragraph(ParsedMarkdownText {
802 source_range: 0..35,
803 contents: "Some bostrikethroughld text".to_string(),
804 highlights: Vec::new(),
805 region_ranges: Vec::new(),
806 regions: Vec::new(),
807 })
808 );
809
810 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
811 text
812 } else {
813 panic!("Expected a paragraph");
814 };
815 assert_eq!(
816 paragraph.highlights,
817 vec![
818 (
819 5..7,
820 MarkdownHighlight::Style(MarkdownHighlightStyle {
821 weight: FontWeight::BOLD,
822 ..Default::default()
823 }),
824 ),
825 (
826 7..20,
827 MarkdownHighlight::Style(MarkdownHighlightStyle {
828 weight: FontWeight::BOLD,
829 strikethrough: true,
830 ..Default::default()
831 }),
832 ),
833 (
834 20..22,
835 MarkdownHighlight::Style(MarkdownHighlightStyle {
836 weight: FontWeight::BOLD,
837 ..Default::default()
838 }),
839 ),
840 ]
841 );
842 }
843
844 #[gpui::test]
845 async fn test_raw_links_detection() {
846 let parsed = parse("Checkout this https://zed.dev link").await;
847
848 assert_eq!(
849 parsed.children,
850 vec![p("Checkout this https://zed.dev link", 0..34)]
851 );
852
853 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
854 text
855 } else {
856 panic!("Expected a paragraph");
857 };
858 assert_eq!(
859 paragraph.highlights,
860 vec![(
861 14..29,
862 MarkdownHighlight::Style(MarkdownHighlightStyle {
863 underline: true,
864 ..Default::default()
865 }),
866 )]
867 );
868 assert_eq!(
869 paragraph.regions,
870 vec![ParsedRegion {
871 code: false,
872 link: Some(Link::Web {
873 url: "https://zed.dev".to_string()
874 }),
875 }]
876 );
877 assert_eq!(paragraph.region_ranges, vec![14..29]);
878 }
879
880 #[gpui::test]
881 async fn test_header_only_table() {
882 let markdown = "\
883| Header 1 | Header 2 |
884|----------|----------|
885
886Some other content
887";
888
889 let expected_table = table(
890 0..48,
891 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
892 vec![],
893 );
894
895 assert_eq!(
896 parse(markdown).await.children[0],
897 ParsedMarkdownElement::Table(expected_table)
898 );
899 }
900
901 #[gpui::test]
902 async fn test_basic_table() {
903 let markdown = "\
904| Header 1 | Header 2 |
905|----------|----------|
906| Cell 1 | Cell 2 |
907| Cell 3 | Cell 4 |";
908
909 let expected_table = table(
910 0..95,
911 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
912 vec![
913 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
914 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
915 ],
916 );
917
918 assert_eq!(
919 parse(markdown).await.children[0],
920 ParsedMarkdownElement::Table(expected_table)
921 );
922 }
923
924 #[gpui::test]
925 async fn test_list_basic() {
926 let parsed = parse(
927 "\
928* Item 1
929* Item 2
930* Item 3
931",
932 )
933 .await;
934
935 assert_eq!(
936 parsed.children,
937 vec![
938 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
939 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
940 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
941 ],
942 );
943 }
944
945 #[gpui::test]
946 async fn test_list_with_tasks() {
947 let parsed = parse(
948 "\
949- [ ] TODO
950- [x] Checked
951",
952 )
953 .await;
954
955 assert_eq!(
956 parsed.children,
957 vec![
958 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
959 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
960 ],
961 );
962 }
963
964 #[gpui::test]
965 async fn test_list_with_linebreak_is_handled_correctly() {
966 let parsed = parse(
967 "\
968- [ ] Task 1
969
970- [x] Task 2
971",
972 )
973 .await;
974
975 assert_eq!(
976 parsed.children,
977 vec![
978 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
979 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
980 ],
981 );
982 }
983
984 #[gpui::test]
985 async fn test_list_nested() {
986 let parsed = parse(
987 "\
988* Item 1
989* Item 2
990* Item 3
991
9921. Hello
9931. Two
994 1. Three
9952. Four
9963. Five
997
998* First
999 1. Hello
1000 1. Goodbyte
1001 - Inner
1002 - Inner
1003 2. Goodbyte
1004* Last
1005",
1006 )
1007 .await;
1008
1009 assert_eq!(
1010 parsed.children,
1011 vec![
1012 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1013 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1014 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1015 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1016 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1017 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1018 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1019 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1020 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1021 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1022 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1023 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1024 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1025 list_item(143..154, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1026 list_item(155..161, 1, Unordered, vec![p("Last", 157..161)]),
1027 ]
1028 );
1029 }
1030
1031 #[gpui::test]
1032 async fn test_list_with_nested_content() {
1033 let parsed = parse(
1034 "\
1035* This is a list item with two paragraphs.
1036
1037 This is the second paragraph in the list item.
1038",
1039 )
1040 .await;
1041
1042 assert_eq!(
1043 parsed.children,
1044 vec![list_item(
1045 0..96,
1046 1,
1047 Unordered,
1048 vec![
1049 p("This is a list item with two paragraphs.", 4..44),
1050 p("This is the second paragraph in the list item.", 50..97)
1051 ],
1052 ),],
1053 );
1054 }
1055
1056 #[gpui::test]
1057 async fn test_nested_list_with_paragraph_inside() {
1058 let parsed = parse(
1059 "\
10601. a
1061 1. b
1062 1. c
1063
1064 text
1065
1066 1. d
1067",
1068 )
1069 .await;
1070
1071 assert_eq!(
1072 parsed.children,
1073 vec![
1074 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1075 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1076 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1077 p("text", 32..37),
1078 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1079 ],
1080 );
1081 }
1082
1083 #[gpui::test]
1084 async fn test_list_with_leading_text() {
1085 let parsed = parse(
1086 "\
1087* `code`
1088* **bold**
1089* [link](https://example.com)
1090",
1091 )
1092 .await;
1093
1094 assert_eq!(
1095 parsed.children,
1096 vec![
1097 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1098 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1099 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],)
1100 ],
1101 );
1102 }
1103
1104 #[gpui::test]
1105 async fn test_simple_block_quote() {
1106 let parsed = parse("> Simple block quote with **styled text**").await;
1107
1108 assert_eq!(
1109 parsed.children,
1110 vec![block_quote(
1111 vec![p("Simple block quote with styled text", 2..41)],
1112 0..41
1113 )]
1114 );
1115 }
1116
1117 #[gpui::test]
1118 async fn test_simple_block_quote_with_multiple_lines() {
1119 let parsed = parse(
1120 "\
1121> # Heading
1122> More
1123> text
1124>
1125> More text
1126",
1127 )
1128 .await;
1129
1130 assert_eq!(
1131 parsed.children,
1132 vec![block_quote(
1133 vec![
1134 h1(text("Heading", 4..11), 2..12),
1135 p("More text", 14..26),
1136 p("More text", 30..40)
1137 ],
1138 0..40
1139 )]
1140 );
1141 }
1142
1143 #[gpui::test]
1144 async fn test_nested_block_quote() {
1145 let parsed = parse(
1146 "\
1147> A
1148>
1149> > # B
1150>
1151> C
1152
1153More text
1154",
1155 )
1156 .await;
1157
1158 assert_eq!(
1159 parsed.children,
1160 vec![
1161 block_quote(
1162 vec![
1163 p("A", 2..4),
1164 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1165 p("C", 18..20)
1166 ],
1167 0..20
1168 ),
1169 p("More text", 21..31)
1170 ]
1171 );
1172 }
1173
1174 #[gpui::test]
1175 async fn test_code_block() {
1176 let parsed = parse(
1177 "\
1178```
1179fn main() {
1180 return 0;
1181}
1182```
1183",
1184 )
1185 .await;
1186
1187 assert_eq!(
1188 parsed.children,
1189 vec![code_block(
1190 None,
1191 "fn main() {\n return 0;\n}",
1192 0..35,
1193 None
1194 )]
1195 );
1196 }
1197
1198 #[gpui::test]
1199 async fn test_code_block_with_language(executor: BackgroundExecutor) {
1200 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1201 language_registry.add(rust_lang());
1202
1203 let parsed = parse_markdown(
1204 "\
1205```rust
1206fn main() {
1207 return 0;
1208}
1209```
1210",
1211 None,
1212 Some(language_registry),
1213 )
1214 .await;
1215
1216 assert_eq!(
1217 parsed.children,
1218 vec![code_block(
1219 Some("rust".to_string()),
1220 "fn main() {\n return 0;\n}",
1221 0..39,
1222 Some(vec![])
1223 )]
1224 );
1225 }
1226
1227 fn rust_lang() -> Arc<Language> {
1228 Arc::new(Language::new(
1229 LanguageConfig {
1230 name: "Rust".into(),
1231 matcher: LanguageMatcher {
1232 path_suffixes: vec!["rs".into()],
1233 ..Default::default()
1234 },
1235 collapsed_placeholder: " /* ... */ ".to_string(),
1236 ..Default::default()
1237 },
1238 Some(tree_sitter_rust::language()),
1239 ))
1240 }
1241
1242 fn h1(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1243 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1244 source_range,
1245 level: HeadingLevel::H1,
1246 contents,
1247 })
1248 }
1249
1250 fn h2(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1251 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1252 source_range,
1253 level: HeadingLevel::H2,
1254 contents,
1255 })
1256 }
1257
1258 fn h3(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1259 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1260 source_range,
1261 level: HeadingLevel::H3,
1262 contents,
1263 })
1264 }
1265
1266 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1267 ParsedMarkdownElement::Paragraph(text(contents, source_range))
1268 }
1269
1270 fn text(contents: &str, source_range: Range<usize>) -> ParsedMarkdownText {
1271 ParsedMarkdownText {
1272 highlights: Vec::new(),
1273 region_ranges: Vec::new(),
1274 regions: Vec::new(),
1275 source_range,
1276 contents: contents.to_string(),
1277 }
1278 }
1279
1280 fn block_quote(
1281 children: Vec<ParsedMarkdownElement>,
1282 source_range: Range<usize>,
1283 ) -> ParsedMarkdownElement {
1284 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1285 source_range,
1286 children,
1287 })
1288 }
1289
1290 fn code_block(
1291 language: Option<String>,
1292 code: &str,
1293 source_range: Range<usize>,
1294 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1295 ) -> ParsedMarkdownElement {
1296 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1297 source_range,
1298 language,
1299 contents: code.to_string().into(),
1300 highlights,
1301 })
1302 }
1303
1304 fn list_item(
1305 source_range: Range<usize>,
1306 depth: u16,
1307 item_type: ParsedMarkdownListItemType,
1308 content: Vec<ParsedMarkdownElement>,
1309 ) -> ParsedMarkdownElement {
1310 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1311 source_range,
1312 item_type,
1313 depth,
1314 content,
1315 })
1316 }
1317
1318 fn table(
1319 source_range: Range<usize>,
1320 header: ParsedMarkdownTableRow,
1321 body: Vec<ParsedMarkdownTableRow>,
1322 ) -> ParsedMarkdownTable {
1323 ParsedMarkdownTable {
1324 column_alignments: Vec::new(),
1325 source_range,
1326 header,
1327 body,
1328 }
1329 }
1330
1331 fn row(children: Vec<ParsedMarkdownText>) -> ParsedMarkdownTableRow {
1332 ParsedMarkdownTableRow { children }
1333 }
1334
1335 impl PartialEq for ParsedMarkdownTable {
1336 fn eq(&self, other: &Self) -> bool {
1337 self.source_range == other.source_range
1338 && self.header == other.header
1339 && self.body == other.body
1340 }
1341 }
1342
1343 impl PartialEq for ParsedMarkdownText {
1344 fn eq(&self, other: &Self) -> bool {
1345 self.source_range == other.source_range && self.contents == other.contents
1346 }
1347 }
1348}