1use crate::markdown_elements::*;
2use async_recursion::async_recursion;
3use collections::FxHashMap;
4use gpui::FontWeight;
5use language::LanguageRegistry;
6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
7use std::{ops::Range, path::PathBuf, sync::Arc};
8
9pub async fn parse_markdown(
10 markdown_input: &str,
11 file_location_directory: Option<PathBuf>,
12 language_registry: Option<Arc<LanguageRegistry>>,
13) -> ParsedMarkdown {
14 let mut options = Options::all();
15 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
16
17 let parser = Parser::new_ext(markdown_input, options);
18 let parser = MarkdownParser::new(
19 parser.into_offset_iter().collect(),
20 file_location_directory,
21 language_registry,
22 );
23 let renderer = parser.parse_document().await;
24 ParsedMarkdown {
25 children: renderer.parsed,
26 }
27}
28
29struct MarkdownParser<'a> {
30 tokens: Vec<(Event<'a>, Range<usize>)>,
31 /// The current index in the tokens array
32 cursor: usize,
33 /// The blocks that we have successfully parsed so far
34 parsed: Vec<ParsedMarkdownElement>,
35 file_location_directory: Option<PathBuf>,
36 language_registry: Option<Arc<LanguageRegistry>>,
37}
38
39struct MarkdownListItem {
40 content: Vec<ParsedMarkdownElement>,
41 item_type: ParsedMarkdownListItemType,
42}
43
44impl Default for MarkdownListItem {
45 fn default() -> Self {
46 Self {
47 content: Vec::new(),
48 item_type: ParsedMarkdownListItemType::Unordered,
49 }
50 }
51}
52
53impl<'a> MarkdownParser<'a> {
54 fn new(
55 tokens: Vec<(Event<'a>, Range<usize>)>,
56 file_location_directory: Option<PathBuf>,
57 language_registry: Option<Arc<LanguageRegistry>>,
58 ) -> Self {
59 Self {
60 tokens,
61 file_location_directory,
62 language_registry,
63 cursor: 0,
64 parsed: vec![],
65 }
66 }
67
68 fn eof(&self) -> bool {
69 if self.tokens.is_empty() {
70 return true;
71 }
72 self.cursor >= self.tokens.len() - 1
73 }
74
75 fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
76 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
77 return self.tokens.last();
78 }
79 return self.tokens.get(self.cursor + steps);
80 }
81
82 fn previous(&self) -> Option<&(Event, Range<usize>)> {
83 if self.cursor == 0 || self.cursor > self.tokens.len() {
84 return None;
85 }
86 return self.tokens.get(self.cursor - 1);
87 }
88
89 fn current(&self) -> Option<&(Event, Range<usize>)> {
90 return self.peek(0);
91 }
92
93 fn current_event(&self) -> Option<&Event> {
94 return self.current().map(|(event, _)| event);
95 }
96
97 fn is_text_like(event: &Event) -> bool {
98 match event {
99 Event::Text(_)
100 // Represent an inline code block
101 | Event::Code(_)
102 | Event::Html(_)
103 | Event::FootnoteReference(_)
104 | Event::Start(Tag::Link { link_type: _, dest_url: _, title: _, id: _ })
105 | Event::Start(Tag::Emphasis)
106 | Event::Start(Tag::Strong)
107 | Event::Start(Tag::Strikethrough)
108 | Event::Start(Tag::Image { link_type: _, dest_url: _, title: _, id: _ }) => {
109 true
110 }
111 _ => false,
112 }
113 }
114
115 async fn parse_document(mut self) -> Self {
116 while !self.eof() {
117 if let Some(block) = self.parse_block().await {
118 self.parsed.extend(block);
119 } else {
120 self.cursor += 1;
121 }
122 }
123 self
124 }
125
126 #[async_recursion]
127 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
128 let (current, source_range) = self.current().unwrap();
129 let source_range = source_range.clone();
130 match current {
131 Event::Start(tag) => match tag {
132 Tag::Paragraph => {
133 self.cursor += 1;
134 let text = self.parse_text(false, Some(source_range));
135 Some(vec![ParsedMarkdownElement::Paragraph(text)])
136 }
137 Tag::Heading {
138 level,
139 id: _,
140 classes: _,
141 attrs: _,
142 } => {
143 let level = *level;
144 self.cursor += 1;
145 let heading = self.parse_heading(level);
146 Some(vec![ParsedMarkdownElement::Heading(heading)])
147 }
148 Tag::Table(alignment) => {
149 let alignment = alignment.clone();
150 self.cursor += 1;
151 let table = self.parse_table(alignment);
152 Some(vec![ParsedMarkdownElement::Table(table)])
153 }
154 Tag::List(order) => {
155 let order = *order;
156 self.cursor += 1;
157 let list = self.parse_list(order).await;
158 Some(list)
159 }
160 Tag::BlockQuote(_kind) => {
161 self.cursor += 1;
162 let block_quote = self.parse_block_quote().await;
163 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
164 }
165 Tag::CodeBlock(kind) => {
166 let language = match kind {
167 pulldown_cmark::CodeBlockKind::Indented => None,
168 pulldown_cmark::CodeBlockKind::Fenced(language) => {
169 if language.is_empty() {
170 None
171 } else {
172 Some(language.to_string())
173 }
174 }
175 };
176
177 self.cursor += 1;
178
179 let code_block = self.parse_code_block(language).await;
180 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
181 }
182 _ => None,
183 },
184 Event::Rule => {
185 let source_range = source_range.clone();
186 self.cursor += 1;
187 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
188 }
189 _ => None,
190 }
191 }
192
193 fn parse_text(
194 &mut self,
195 should_complete_on_soft_break: bool,
196 source_range: Option<Range<usize>>,
197 ) -> ParsedMarkdownText {
198 let source_range = source_range.unwrap_or_else(|| {
199 self.current()
200 .map(|(_, range)| range.clone())
201 .unwrap_or_default()
202 });
203
204 let mut text = String::new();
205 let mut bold_depth = 0;
206 let mut italic_depth = 0;
207 let mut strikethrough_depth = 0;
208 let mut link: Option<Link> = None;
209 let mut region_ranges: Vec<Range<usize>> = vec![];
210 let mut regions: Vec<ParsedRegion> = vec![];
211 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
212
213 let mut link_urls: Vec<String> = vec![];
214 let mut link_ranges: Vec<Range<usize>> = vec![];
215
216 loop {
217 if self.eof() {
218 break;
219 }
220
221 let (current, _source_range) = self.current().unwrap();
222 let prev_len = text.len();
223 match current {
224 Event::SoftBreak => {
225 if should_complete_on_soft_break {
226 break;
227 }
228
229 // `Some text\nSome more text` should be treated as a single line.
230 text.push(' ');
231 }
232
233 Event::HardBreak => {
234 text.push('\n');
235 }
236
237 Event::Text(t) => {
238 text.push_str(t.as_ref());
239
240 let mut style = MarkdownHighlightStyle::default();
241
242 if bold_depth > 0 {
243 style.weight = FontWeight::BOLD;
244 }
245
246 if italic_depth > 0 {
247 style.italic = true;
248 }
249
250 if strikethrough_depth > 0 {
251 style.strikethrough = true;
252 }
253
254 let last_run_len = if let Some(link) = link.clone() {
255 region_ranges.push(prev_len..text.len());
256 regions.push(ParsedRegion {
257 code: false,
258 link: Some(link),
259 });
260 style.underline = true;
261 prev_len
262 } else {
263 // Manually scan for links
264 let mut finder = linkify::LinkFinder::new();
265 finder.kinds(&[linkify::LinkKind::Url]);
266 let mut last_link_len = prev_len;
267 for link in finder.links(t) {
268 let start = link.start();
269 let end = link.end();
270 let range = (prev_len + start)..(prev_len + end);
271 link_ranges.push(range.clone());
272 link_urls.push(link.as_str().to_string());
273
274 // If there is a style before we match a link, we have to add this to the highlighted ranges
275 if style != MarkdownHighlightStyle::default()
276 && last_link_len < link.start()
277 {
278 highlights.push((
279 last_link_len..link.start(),
280 MarkdownHighlight::Style(style.clone()),
281 ));
282 }
283
284 highlights.push((
285 range.clone(),
286 MarkdownHighlight::Style(MarkdownHighlightStyle {
287 underline: true,
288 ..style
289 }),
290 ));
291 region_ranges.push(range.clone());
292 regions.push(ParsedRegion {
293 code: false,
294 link: Some(Link::Web {
295 url: link.as_str().to_string(),
296 }),
297 });
298
299 last_link_len = end;
300 }
301 last_link_len
302 };
303
304 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
305 let mut new_highlight = true;
306 if let Some((last_range, last_style)) = highlights.last_mut() {
307 if last_range.end == last_run_len
308 && last_style == &MarkdownHighlight::Style(style.clone())
309 {
310 last_range.end = text.len();
311 new_highlight = false;
312 }
313 }
314 if new_highlight {
315 highlights
316 .push((last_run_len..text.len(), MarkdownHighlight::Style(style)));
317 }
318 }
319 }
320
321 // Note: This event means "inline code" and not "code block"
322 Event::Code(t) => {
323 text.push_str(t.as_ref());
324 region_ranges.push(prev_len..text.len());
325
326 if link.is_some() {
327 highlights.push((
328 prev_len..text.len(),
329 MarkdownHighlight::Style(MarkdownHighlightStyle {
330 underline: true,
331 ..Default::default()
332 }),
333 ));
334 }
335
336 regions.push(ParsedRegion {
337 code: true,
338 link: link.clone(),
339 });
340 }
341
342 Event::Start(tag) => match tag {
343 Tag::Emphasis => italic_depth += 1,
344 Tag::Strong => bold_depth += 1,
345 Tag::Strikethrough => strikethrough_depth += 1,
346 Tag::Link {
347 link_type: _,
348 dest_url,
349 title: _,
350 id: _,
351 } => {
352 link = Link::identify(
353 self.file_location_directory.clone(),
354 dest_url.to_string(),
355 );
356 }
357 _ => {
358 break;
359 }
360 },
361
362 Event::End(tag) => match tag {
363 TagEnd::Emphasis => {
364 italic_depth -= 1;
365 }
366 TagEnd::Strong => {
367 bold_depth -= 1;
368 }
369 TagEnd::Strikethrough => {
370 strikethrough_depth -= 1;
371 }
372 TagEnd::Link => {
373 link = None;
374 }
375 TagEnd::Paragraph => {
376 self.cursor += 1;
377 break;
378 }
379 _ => {
380 break;
381 }
382 },
383
384 _ => {
385 break;
386 }
387 }
388
389 self.cursor += 1;
390 }
391
392 ParsedMarkdownText {
393 source_range,
394 contents: text,
395 highlights,
396 regions,
397 region_ranges,
398 }
399 }
400
401 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
402 let (_event, source_range) = self.previous().unwrap();
403 let source_range = source_range.clone();
404 let text = self.parse_text(true, None);
405
406 // Advance past the heading end tag
407 self.cursor += 1;
408
409 ParsedMarkdownHeading {
410 source_range: source_range.clone(),
411 level: match level {
412 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
413 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
414 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
415 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
416 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
417 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
418 },
419 contents: text,
420 }
421 }
422
423 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
424 let (_event, source_range) = self.previous().unwrap();
425 let source_range = source_range.clone();
426 let mut header = ParsedMarkdownTableRow::new();
427 let mut body = vec![];
428 let mut current_row = vec![];
429 let mut in_header = true;
430 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
431
432 loop {
433 if self.eof() {
434 break;
435 }
436
437 let (current, source_range) = self.current().unwrap();
438 let source_range = source_range.clone();
439 match current {
440 Event::Start(Tag::TableHead)
441 | Event::Start(Tag::TableRow)
442 | Event::End(TagEnd::TableCell) => {
443 self.cursor += 1;
444 }
445 Event::Start(Tag::TableCell) => {
446 self.cursor += 1;
447 let cell_contents = self.parse_text(false, Some(source_range));
448 current_row.push(cell_contents);
449 }
450 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
451 self.cursor += 1;
452 let new_row = std::mem::take(&mut current_row);
453 if in_header {
454 header.children = new_row;
455 in_header = false;
456 } else {
457 let row = ParsedMarkdownTableRow::with_children(new_row);
458 body.push(row);
459 }
460 }
461 Event::End(TagEnd::Table) => {
462 self.cursor += 1;
463 break;
464 }
465 _ => {
466 break;
467 }
468 }
469 }
470
471 ParsedMarkdownTable {
472 source_range,
473 header,
474 body,
475 column_alignments,
476 }
477 }
478
479 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
480 match alignment {
481 Alignment::None => ParsedMarkdownTableAlignment::None,
482 Alignment::Left => ParsedMarkdownTableAlignment::Left,
483 Alignment::Center => ParsedMarkdownTableAlignment::Center,
484 Alignment::Right => ParsedMarkdownTableAlignment::Right,
485 }
486 }
487
488 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
489 let (_, list_source_range) = self.previous().unwrap();
490
491 let mut items = Vec::new();
492 let mut items_stack = vec![MarkdownListItem::default()];
493 let mut depth = 1;
494 let mut order = order;
495 let mut order_stack = Vec::new();
496
497 let mut insertion_indices = FxHashMap::default();
498 let mut source_ranges = FxHashMap::default();
499 let mut start_item_range = list_source_range.clone();
500
501 while !self.eof() {
502 let (current, source_range) = self.current().unwrap();
503 match current {
504 Event::Start(Tag::List(new_order)) => {
505 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
506 insertion_indices.insert(depth, items.len());
507 }
508
509 // We will use the start of the nested list as the end for the current item's range,
510 // because we don't care about the hierarchy of list items
511 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
512 e.insert(start_item_range.start..source_range.start);
513 }
514
515 order_stack.push(order);
516 order = *new_order;
517 self.cursor += 1;
518 depth += 1;
519 }
520 Event::End(TagEnd::List(_)) => {
521 order = order_stack.pop().flatten();
522 self.cursor += 1;
523 depth -= 1;
524
525 if depth == 0 {
526 break;
527 }
528 }
529 Event::Start(Tag::Item) => {
530 start_item_range = source_range.clone();
531
532 self.cursor += 1;
533 items_stack.push(MarkdownListItem::default());
534
535 let mut task_list = None;
536 // Check for task list marker (`- [ ]` or `- [x]`)
537 if let Some(event) = self.current_event() {
538 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
539 if event == &Event::Start(Tag::Paragraph) {
540 self.cursor += 1;
541 }
542
543 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
544 task_list = Some((*checked, range.clone()));
545 self.cursor += 1;
546 }
547 }
548
549 if let Some((event, range)) = self.current() {
550 // This is a plain list item.
551 // For example `- some text` or `1. [Docs](./docs.md)`
552 if MarkdownParser::is_text_like(event) {
553 let text = self.parse_text(false, Some(range.clone()));
554 let block = ParsedMarkdownElement::Paragraph(text);
555 if let Some(content) = items_stack.last_mut() {
556 let item_type = if let Some((checked, range)) = task_list {
557 ParsedMarkdownListItemType::Task(checked, range)
558 } else if let Some(order) = order {
559 ParsedMarkdownListItemType::Ordered(order)
560 } else {
561 ParsedMarkdownListItemType::Unordered
562 };
563 content.item_type = item_type;
564 content.content.push(block);
565 }
566 } else {
567 let block = self.parse_block().await;
568 if let Some(block) = block {
569 if let Some(list_item) = items_stack.last_mut() {
570 list_item.content.extend(block);
571 }
572 }
573 }
574 }
575
576 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
577 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
578 self.cursor += 1;
579 }
580 }
581 Event::End(TagEnd::Item) => {
582 self.cursor += 1;
583
584 if let Some(current) = order {
585 order = Some(current + 1);
586 }
587
588 if let Some(list_item) = items_stack.pop() {
589 let source_range = source_ranges
590 .remove(&depth)
591 .unwrap_or(start_item_range.clone());
592
593 // We need to remove the last character of the source range, because it includes the newline character
594 let source_range = source_range.start..source_range.end - 1;
595 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
596 source_range,
597 content: list_item.content,
598 depth,
599 item_type: list_item.item_type,
600 });
601
602 if let Some(index) = insertion_indices.get(&depth) {
603 items.insert(*index, item);
604 insertion_indices.remove(&depth);
605 } else {
606 items.push(item);
607 }
608 }
609 }
610 _ => {
611 if depth == 0 {
612 break;
613 }
614 // This can only happen if a list item starts with more then one paragraph,
615 // or the list item contains blocks that should be rendered after the nested list items
616 let block = self.parse_block().await;
617 if let Some(block) = block {
618 if let Some(list_item) = items_stack.last_mut() {
619 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
620 if !insertion_indices.contains_key(&depth) {
621 list_item.content.extend(block);
622 continue;
623 }
624 }
625
626 // Otherwise we need to insert the block after all the nested items
627 // that have been parsed so far
628 items.extend(block);
629 } else {
630 self.cursor += 1;
631 }
632 }
633 }
634 }
635
636 items
637 }
638
639 #[async_recursion]
640 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
641 let (_event, source_range) = self.previous().unwrap();
642 let source_range = source_range.clone();
643 let mut nested_depth = 1;
644
645 let mut children: Vec<ParsedMarkdownElement> = vec![];
646
647 while !self.eof() {
648 let block = self.parse_block().await;
649
650 if let Some(block) = block {
651 children.extend(block);
652 } else {
653 break;
654 }
655
656 if self.eof() {
657 break;
658 }
659
660 let (current, _source_range) = self.current().unwrap();
661 match current {
662 // This is a nested block quote.
663 // Record that we're in a nested block quote and continue parsing.
664 // We don't need to advance the cursor since the next
665 // call to `parse_block` will handle it.
666 Event::Start(Tag::BlockQuote(_kind)) => {
667 nested_depth += 1;
668 }
669 Event::End(TagEnd::BlockQuote(_kind)) => {
670 nested_depth -= 1;
671 if nested_depth == 0 {
672 self.cursor += 1;
673 break;
674 }
675 }
676 _ => {}
677 };
678 }
679
680 ParsedMarkdownBlockQuote {
681 source_range,
682 children,
683 }
684 }
685
686 async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
687 let (_event, source_range) = self.previous().unwrap();
688 let source_range = source_range.clone();
689 let mut code = String::new();
690
691 while !self.eof() {
692 let (current, _source_range) = self.current().unwrap();
693 match current {
694 Event::Text(text) => {
695 code.push_str(text);
696 self.cursor += 1;
697 }
698 Event::End(TagEnd::CodeBlock) => {
699 self.cursor += 1;
700 break;
701 }
702 _ => {
703 break;
704 }
705 }
706 }
707
708 let highlights = if let Some(language) = &language {
709 if let Some(registry) = &self.language_registry {
710 let rope: language::Rope = code.as_str().into();
711 registry
712 .language_for_name_or_extension(language)
713 .await
714 .map(|l| l.highlight_text(&rope, 0..code.len()))
715 .ok()
716 } else {
717 None
718 }
719 } else {
720 None
721 };
722
723 ParsedMarkdownCodeBlock {
724 source_range,
725 contents: code.trim().to_string().into(),
726 language,
727 highlights,
728 }
729 }
730}
731
732#[cfg(test)]
733mod tests {
734 use super::*;
735
736 use gpui::BackgroundExecutor;
737 use language::{tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher};
738 use pretty_assertions::assert_eq;
739 use ParsedMarkdownListItemType::*;
740
741 async fn parse(input: &str) -> ParsedMarkdown {
742 parse_markdown(input, None, None).await
743 }
744
745 #[gpui::test]
746 async fn test_headings() {
747 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
748
749 assert_eq!(
750 parsed.children,
751 vec![
752 h1(text("Heading one", 2..13), 0..14),
753 h2(text("Heading two", 17..28), 14..29),
754 h3(text("Heading three", 33..46), 29..46),
755 ]
756 );
757 }
758
759 #[gpui::test]
760 async fn test_newlines_dont_new_paragraphs() {
761 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
762
763 assert_eq!(
764 parsed.children,
765 vec![p("Some text that is bolded and italicized", 0..46)]
766 );
767 }
768
769 #[gpui::test]
770 async fn test_heading_with_paragraph() {
771 let parsed = parse("# Zed\nThe editor").await;
772
773 assert_eq!(
774 parsed.children,
775 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
776 );
777 }
778
779 #[gpui::test]
780 async fn test_double_newlines_do_new_paragraphs() {
781 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
782
783 assert_eq!(
784 parsed.children,
785 vec![
786 p("Some text that is bolded", 0..29),
787 p("and italicized", 31..47),
788 ]
789 );
790 }
791
792 #[gpui::test]
793 async fn test_bold_italic_text() {
794 let parsed = parse("Some text **that is bolded** and *italicized*").await;
795
796 assert_eq!(
797 parsed.children,
798 vec![p("Some text that is bolded and italicized", 0..45)]
799 );
800 }
801
802 #[gpui::test]
803 async fn test_nested_bold_strikethrough_text() {
804 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
805
806 assert_eq!(parsed.children.len(), 1);
807 assert_eq!(
808 parsed.children[0],
809 ParsedMarkdownElement::Paragraph(ParsedMarkdownText {
810 source_range: 0..35,
811 contents: "Some bostrikethroughld text".to_string(),
812 highlights: Vec::new(),
813 region_ranges: Vec::new(),
814 regions: Vec::new(),
815 })
816 );
817
818 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
819 text
820 } else {
821 panic!("Expected a paragraph");
822 };
823 assert_eq!(
824 paragraph.highlights,
825 vec![
826 (
827 5..7,
828 MarkdownHighlight::Style(MarkdownHighlightStyle {
829 weight: FontWeight::BOLD,
830 ..Default::default()
831 }),
832 ),
833 (
834 7..20,
835 MarkdownHighlight::Style(MarkdownHighlightStyle {
836 weight: FontWeight::BOLD,
837 strikethrough: true,
838 ..Default::default()
839 }),
840 ),
841 (
842 20..22,
843 MarkdownHighlight::Style(MarkdownHighlightStyle {
844 weight: FontWeight::BOLD,
845 ..Default::default()
846 }),
847 ),
848 ]
849 );
850 }
851
852 #[gpui::test]
853 async fn test_raw_links_detection() {
854 let parsed = parse("Checkout this https://zed.dev link").await;
855
856 assert_eq!(
857 parsed.children,
858 vec![p("Checkout this https://zed.dev link", 0..34)]
859 );
860
861 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
862 text
863 } else {
864 panic!("Expected a paragraph");
865 };
866 assert_eq!(
867 paragraph.highlights,
868 vec![(
869 14..29,
870 MarkdownHighlight::Style(MarkdownHighlightStyle {
871 underline: true,
872 ..Default::default()
873 }),
874 )]
875 );
876 assert_eq!(
877 paragraph.regions,
878 vec![ParsedRegion {
879 code: false,
880 link: Some(Link::Web {
881 url: "https://zed.dev".to_string()
882 }),
883 }]
884 );
885 assert_eq!(paragraph.region_ranges, vec![14..29]);
886 }
887
888 #[gpui::test]
889 async fn test_header_only_table() {
890 let markdown = "\
891| Header 1 | Header 2 |
892|----------|----------|
893
894Some other content
895";
896
897 let expected_table = table(
898 0..48,
899 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
900 vec![],
901 );
902
903 assert_eq!(
904 parse(markdown).await.children[0],
905 ParsedMarkdownElement::Table(expected_table)
906 );
907 }
908
909 #[gpui::test]
910 async fn test_basic_table() {
911 let markdown = "\
912| Header 1 | Header 2 |
913|----------|----------|
914| Cell 1 | Cell 2 |
915| Cell 3 | Cell 4 |";
916
917 let expected_table = table(
918 0..95,
919 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
920 vec![
921 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
922 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
923 ],
924 );
925
926 assert_eq!(
927 parse(markdown).await.children[0],
928 ParsedMarkdownElement::Table(expected_table)
929 );
930 }
931
932 #[gpui::test]
933 async fn test_list_basic() {
934 let parsed = parse(
935 "\
936* Item 1
937* Item 2
938* Item 3
939",
940 )
941 .await;
942
943 assert_eq!(
944 parsed.children,
945 vec![
946 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
947 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
948 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
949 ],
950 );
951 }
952
953 #[gpui::test]
954 async fn test_list_with_tasks() {
955 let parsed = parse(
956 "\
957- [ ] TODO
958- [x] Checked
959",
960 )
961 .await;
962
963 assert_eq!(
964 parsed.children,
965 vec![
966 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
967 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
968 ],
969 );
970 }
971
972 #[gpui::test]
973 async fn test_list_with_indented_task() {
974 let parsed = parse(
975 "\
976- [ ] TODO
977 - [x] Checked
978 - Unordered
979 1. Number 1
980 1. Number 2
9811. Number A
982",
983 )
984 .await;
985
986 assert_eq!(
987 parsed.children,
988 vec![
989 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
990 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
991 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
992 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
993 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
994 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
995 ],
996 );
997 }
998
999 #[gpui::test]
1000 async fn test_list_with_linebreak_is_handled_correctly() {
1001 let parsed = parse(
1002 "\
1003- [ ] Task 1
1004
1005- [x] Task 2
1006",
1007 )
1008 .await;
1009
1010 assert_eq!(
1011 parsed.children,
1012 vec![
1013 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1014 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1015 ],
1016 );
1017 }
1018
1019 #[gpui::test]
1020 async fn test_list_nested() {
1021 let parsed = parse(
1022 "\
1023* Item 1
1024* Item 2
1025* Item 3
1026
10271. Hello
10281. Two
1029 1. Three
10302. Four
10313. Five
1032
1033* First
1034 1. Hello
1035 1. Goodbyte
1036 - Inner
1037 - Inner
1038 2. Goodbyte
1039 - Next item empty
1040 -
1041* Last
1042",
1043 )
1044 .await;
1045
1046 assert_eq!(
1047 parsed.children,
1048 vec![
1049 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1050 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1051 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1052 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1053 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1054 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1055 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1056 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1057 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1058 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1059 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1060 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1061 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1062 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1063 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1064 list_item(186..190, 3, Unordered, vec![]),
1065 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1066 ]
1067 );
1068 }
1069
1070 #[gpui::test]
1071 async fn test_list_with_nested_content() {
1072 let parsed = parse(
1073 "\
1074* This is a list item with two paragraphs.
1075
1076 This is the second paragraph in the list item.
1077",
1078 )
1079 .await;
1080
1081 assert_eq!(
1082 parsed.children,
1083 vec![list_item(
1084 0..96,
1085 1,
1086 Unordered,
1087 vec![
1088 p("This is a list item with two paragraphs.", 4..44),
1089 p("This is the second paragraph in the list item.", 50..97)
1090 ],
1091 ),],
1092 );
1093 }
1094
1095 #[gpui::test]
1096 async fn test_nested_list_with_paragraph_inside() {
1097 let parsed = parse(
1098 "\
10991. a
1100 1. b
1101 1. c
1102
1103 text
1104
1105 1. d
1106",
1107 )
1108 .await;
1109
1110 assert_eq!(
1111 parsed.children,
1112 vec![
1113 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1114 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1115 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1116 p("text", 32..37),
1117 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1118 ],
1119 );
1120 }
1121
1122 #[gpui::test]
1123 async fn test_list_with_leading_text() {
1124 let parsed = parse(
1125 "\
1126* `code`
1127* **bold**
1128* [link](https://example.com)
1129",
1130 )
1131 .await;
1132
1133 assert_eq!(
1134 parsed.children,
1135 vec![
1136 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1137 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1138 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],)
1139 ],
1140 );
1141 }
1142
1143 #[gpui::test]
1144 async fn test_simple_block_quote() {
1145 let parsed = parse("> Simple block quote with **styled text**").await;
1146
1147 assert_eq!(
1148 parsed.children,
1149 vec![block_quote(
1150 vec![p("Simple block quote with styled text", 2..41)],
1151 0..41
1152 )]
1153 );
1154 }
1155
1156 #[gpui::test]
1157 async fn test_simple_block_quote_with_multiple_lines() {
1158 let parsed = parse(
1159 "\
1160> # Heading
1161> More
1162> text
1163>
1164> More text
1165",
1166 )
1167 .await;
1168
1169 assert_eq!(
1170 parsed.children,
1171 vec![block_quote(
1172 vec![
1173 h1(text("Heading", 4..11), 2..12),
1174 p("More text", 14..26),
1175 p("More text", 30..40)
1176 ],
1177 0..40
1178 )]
1179 );
1180 }
1181
1182 #[gpui::test]
1183 async fn test_nested_block_quote() {
1184 let parsed = parse(
1185 "\
1186> A
1187>
1188> > # B
1189>
1190> C
1191
1192More text
1193",
1194 )
1195 .await;
1196
1197 assert_eq!(
1198 parsed.children,
1199 vec![
1200 block_quote(
1201 vec![
1202 p("A", 2..4),
1203 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1204 p("C", 18..20)
1205 ],
1206 0..20
1207 ),
1208 p("More text", 21..31)
1209 ]
1210 );
1211 }
1212
1213 #[gpui::test]
1214 async fn test_code_block() {
1215 let parsed = parse(
1216 "\
1217```
1218fn main() {
1219 return 0;
1220}
1221```
1222",
1223 )
1224 .await;
1225
1226 assert_eq!(
1227 parsed.children,
1228 vec![code_block(
1229 None,
1230 "fn main() {\n return 0;\n}",
1231 0..35,
1232 None
1233 )]
1234 );
1235 }
1236
1237 #[gpui::test]
1238 async fn test_code_block_with_language(executor: BackgroundExecutor) {
1239 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1240 language_registry.add(rust_lang());
1241
1242 let parsed = parse_markdown(
1243 "\
1244```rust
1245fn main() {
1246 return 0;
1247}
1248```
1249",
1250 None,
1251 Some(language_registry),
1252 )
1253 .await;
1254
1255 assert_eq!(
1256 parsed.children,
1257 vec![code_block(
1258 Some("rust".to_string()),
1259 "fn main() {\n return 0;\n}",
1260 0..39,
1261 Some(vec![])
1262 )]
1263 );
1264 }
1265
1266 fn rust_lang() -> Arc<Language> {
1267 Arc::new(Language::new(
1268 LanguageConfig {
1269 name: "Rust".into(),
1270 matcher: LanguageMatcher {
1271 path_suffixes: vec!["rs".into()],
1272 ..Default::default()
1273 },
1274 collapsed_placeholder: " /* ... */ ".to_string(),
1275 ..Default::default()
1276 },
1277 Some(tree_sitter_rust::LANGUAGE.into()),
1278 ))
1279 }
1280
1281 fn h1(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1282 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1283 source_range,
1284 level: HeadingLevel::H1,
1285 contents,
1286 })
1287 }
1288
1289 fn h2(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1290 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1291 source_range,
1292 level: HeadingLevel::H2,
1293 contents,
1294 })
1295 }
1296
1297 fn h3(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1298 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1299 source_range,
1300 level: HeadingLevel::H3,
1301 contents,
1302 })
1303 }
1304
1305 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1306 ParsedMarkdownElement::Paragraph(text(contents, source_range))
1307 }
1308
1309 fn text(contents: &str, source_range: Range<usize>) -> ParsedMarkdownText {
1310 ParsedMarkdownText {
1311 highlights: Vec::new(),
1312 region_ranges: Vec::new(),
1313 regions: Vec::new(),
1314 source_range,
1315 contents: contents.to_string(),
1316 }
1317 }
1318
1319 fn block_quote(
1320 children: Vec<ParsedMarkdownElement>,
1321 source_range: Range<usize>,
1322 ) -> ParsedMarkdownElement {
1323 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1324 source_range,
1325 children,
1326 })
1327 }
1328
1329 fn code_block(
1330 language: Option<String>,
1331 code: &str,
1332 source_range: Range<usize>,
1333 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1334 ) -> ParsedMarkdownElement {
1335 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1336 source_range,
1337 language,
1338 contents: code.to_string().into(),
1339 highlights,
1340 })
1341 }
1342
1343 fn list_item(
1344 source_range: Range<usize>,
1345 depth: u16,
1346 item_type: ParsedMarkdownListItemType,
1347 content: Vec<ParsedMarkdownElement>,
1348 ) -> ParsedMarkdownElement {
1349 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1350 source_range,
1351 item_type,
1352 depth,
1353 content,
1354 })
1355 }
1356
1357 fn table(
1358 source_range: Range<usize>,
1359 header: ParsedMarkdownTableRow,
1360 body: Vec<ParsedMarkdownTableRow>,
1361 ) -> ParsedMarkdownTable {
1362 ParsedMarkdownTable {
1363 column_alignments: Vec::new(),
1364 source_range,
1365 header,
1366 body,
1367 }
1368 }
1369
1370 fn row(children: Vec<ParsedMarkdownText>) -> ParsedMarkdownTableRow {
1371 ParsedMarkdownTableRow { children }
1372 }
1373
1374 impl PartialEq for ParsedMarkdownTable {
1375 fn eq(&self, other: &Self) -> bool {
1376 self.source_range == other.source_range
1377 && self.header == other.header
1378 && self.body == other.body
1379 }
1380 }
1381
1382 impl PartialEq for ParsedMarkdownText {
1383 fn eq(&self, other: &Self) -> bool {
1384 self.source_range == other.source_range && self.contents == other.contents
1385 }
1386 }
1387}