1use crate::markdown_elements::*;
2use async_recursion::async_recursion;
3use collections::FxHashMap;
4use gpui::FontWeight;
5use language::LanguageRegistry;
6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
7use std::{ops::Range, path::PathBuf, sync::Arc};
8
9pub async fn parse_markdown(
10 markdown_input: &str,
11 file_location_directory: Option<PathBuf>,
12 language_registry: Option<Arc<LanguageRegistry>>,
13) -> ParsedMarkdown {
14 let mut options = Options::all();
15 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
16
17 let parser = Parser::new_ext(markdown_input, options);
18 let parser = MarkdownParser::new(
19 parser.into_offset_iter().collect(),
20 file_location_directory,
21 language_registry,
22 );
23 let renderer = parser.parse_document().await;
24 ParsedMarkdown {
25 children: renderer.parsed,
26 }
27}
28
29struct MarkdownParser<'a> {
30 tokens: Vec<(Event<'a>, Range<usize>)>,
31 /// The current index in the tokens array
32 cursor: usize,
33 /// The blocks that we have successfully parsed so far
34 parsed: Vec<ParsedMarkdownElement>,
35 file_location_directory: Option<PathBuf>,
36 language_registry: Option<Arc<LanguageRegistry>>,
37}
38
39impl<'a> MarkdownParser<'a> {
40 fn new(
41 tokens: Vec<(Event<'a>, Range<usize>)>,
42 file_location_directory: Option<PathBuf>,
43 language_registry: Option<Arc<LanguageRegistry>>,
44 ) -> Self {
45 Self {
46 tokens,
47 file_location_directory,
48 language_registry,
49 cursor: 0,
50 parsed: vec![],
51 }
52 }
53
54 fn eof(&self) -> bool {
55 if self.tokens.is_empty() {
56 return true;
57 }
58 self.cursor >= self.tokens.len() - 1
59 }
60
61 fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
62 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
63 return self.tokens.last();
64 }
65 return self.tokens.get(self.cursor + steps);
66 }
67
68 fn previous(&self) -> Option<&(Event, Range<usize>)> {
69 if self.cursor == 0 || self.cursor > self.tokens.len() {
70 return None;
71 }
72 return self.tokens.get(self.cursor - 1);
73 }
74
75 fn current(&self) -> Option<&(Event, Range<usize>)> {
76 return self.peek(0);
77 }
78
79 fn current_event(&self) -> Option<&Event> {
80 return self.current().map(|(event, _)| event);
81 }
82
83 fn is_text_like(event: &Event) -> bool {
84 match event {
85 Event::Text(_)
86 // Represent an inline code block
87 | Event::Code(_)
88 | Event::Html(_)
89 | Event::FootnoteReference(_)
90 | Event::Start(Tag::Link { link_type: _, dest_url: _, title: _, id: _ })
91 | Event::Start(Tag::Emphasis)
92 | Event::Start(Tag::Strong)
93 | Event::Start(Tag::Strikethrough)
94 | Event::Start(Tag::Image { link_type: _, dest_url: _, title: _, id: _ }) => {
95 true
96 }
97 _ => false,
98 }
99 }
100
101 async fn parse_document(mut self) -> Self {
102 while !self.eof() {
103 if let Some(block) = self.parse_block().await {
104 self.parsed.extend(block);
105 }
106 }
107 self
108 }
109
110 #[async_recursion]
111 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
112 let (current, source_range) = self.current().unwrap();
113 let source_range = source_range.clone();
114 match current {
115 Event::Start(tag) => match tag {
116 Tag::Paragraph => {
117 self.cursor += 1;
118 let text = self.parse_text(false, Some(source_range));
119 Some(vec![ParsedMarkdownElement::Paragraph(text)])
120 }
121 Tag::Heading {
122 level,
123 id: _,
124 classes: _,
125 attrs: _,
126 } => {
127 let level = *level;
128 self.cursor += 1;
129 let heading = self.parse_heading(level);
130 Some(vec![ParsedMarkdownElement::Heading(heading)])
131 }
132 Tag::Table(alignment) => {
133 let alignment = alignment.clone();
134 self.cursor += 1;
135 let table = self.parse_table(alignment);
136 Some(vec![ParsedMarkdownElement::Table(table)])
137 }
138 Tag::List(order) => {
139 let order = *order;
140 self.cursor += 1;
141 let list = self.parse_list(order).await;
142 Some(list)
143 }
144 Tag::BlockQuote(_kind) => {
145 self.cursor += 1;
146 let block_quote = self.parse_block_quote().await;
147 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
148 }
149 Tag::CodeBlock(kind) => {
150 let language = match kind {
151 pulldown_cmark::CodeBlockKind::Indented => None,
152 pulldown_cmark::CodeBlockKind::Fenced(language) => {
153 if language.is_empty() {
154 None
155 } else {
156 Some(language.to_string())
157 }
158 }
159 };
160
161 self.cursor += 1;
162
163 let code_block = self.parse_code_block(language).await;
164 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
165 }
166 _ => {
167 self.cursor += 1;
168 None
169 }
170 },
171 Event::Rule => {
172 let source_range = source_range.clone();
173 self.cursor += 1;
174 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
175 }
176 _ => {
177 self.cursor += 1;
178 None
179 }
180 }
181 }
182
183 fn parse_text(
184 &mut self,
185 should_complete_on_soft_break: bool,
186 source_range: Option<Range<usize>>,
187 ) -> ParsedMarkdownText {
188 let source_range = source_range.unwrap_or_else(|| {
189 self.current()
190 .map(|(_, range)| range.clone())
191 .unwrap_or_default()
192 });
193
194 let mut text = String::new();
195 let mut bold_depth = 0;
196 let mut italic_depth = 0;
197 let mut strikethrough_depth = 0;
198 let mut link: Option<Link> = None;
199 let mut region_ranges: Vec<Range<usize>> = vec![];
200 let mut regions: Vec<ParsedRegion> = vec![];
201 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
202
203 let mut link_urls: Vec<String> = vec![];
204 let mut link_ranges: Vec<Range<usize>> = vec![];
205
206 loop {
207 if self.eof() {
208 break;
209 }
210
211 let (current, _source_range) = self.current().unwrap();
212 let prev_len = text.len();
213 match current {
214 Event::SoftBreak => {
215 if should_complete_on_soft_break {
216 break;
217 }
218
219 // `Some text\nSome more text` should be treated as a single line.
220 text.push(' ');
221 }
222
223 Event::HardBreak => {
224 text.push('\n');
225 }
226
227 Event::Text(t) => {
228 text.push_str(t.as_ref());
229
230 let mut style = MarkdownHighlightStyle::default();
231
232 if bold_depth > 0 {
233 style.weight = FontWeight::BOLD;
234 }
235
236 if italic_depth > 0 {
237 style.italic = true;
238 }
239
240 if strikethrough_depth > 0 {
241 style.strikethrough = true;
242 }
243
244 let last_run_len = if let Some(link) = link.clone() {
245 region_ranges.push(prev_len..text.len());
246 regions.push(ParsedRegion {
247 code: false,
248 link: Some(link),
249 });
250 style.underline = true;
251 prev_len
252 } else {
253 // Manually scan for links
254 let mut finder = linkify::LinkFinder::new();
255 finder.kinds(&[linkify::LinkKind::Url]);
256 let mut last_link_len = prev_len;
257 for link in finder.links(t) {
258 let start = link.start();
259 let end = link.end();
260 let range = (prev_len + start)..(prev_len + end);
261 link_ranges.push(range.clone());
262 link_urls.push(link.as_str().to_string());
263
264 // If there is a style before we match a link, we have to add this to the highlighted ranges
265 if style != MarkdownHighlightStyle::default()
266 && last_link_len < link.start()
267 {
268 highlights.push((
269 last_link_len..link.start(),
270 MarkdownHighlight::Style(style.clone()),
271 ));
272 }
273
274 highlights.push((
275 range.clone(),
276 MarkdownHighlight::Style(MarkdownHighlightStyle {
277 underline: true,
278 ..style
279 }),
280 ));
281 region_ranges.push(range.clone());
282 regions.push(ParsedRegion {
283 code: false,
284 link: Some(Link::Web {
285 url: link.as_str().to_string(),
286 }),
287 });
288
289 last_link_len = end;
290 }
291 last_link_len
292 };
293
294 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
295 let mut new_highlight = true;
296 if let Some((last_range, last_style)) = highlights.last_mut() {
297 if last_range.end == last_run_len
298 && last_style == &MarkdownHighlight::Style(style.clone())
299 {
300 last_range.end = text.len();
301 new_highlight = false;
302 }
303 }
304 if new_highlight {
305 highlights
306 .push((last_run_len..text.len(), MarkdownHighlight::Style(style)));
307 }
308 }
309 }
310
311 // Note: This event means "inline code" and not "code block"
312 Event::Code(t) => {
313 text.push_str(t.as_ref());
314 region_ranges.push(prev_len..text.len());
315
316 if link.is_some() {
317 highlights.push((
318 prev_len..text.len(),
319 MarkdownHighlight::Style(MarkdownHighlightStyle {
320 underline: true,
321 ..Default::default()
322 }),
323 ));
324 }
325
326 regions.push(ParsedRegion {
327 code: true,
328 link: link.clone(),
329 });
330 }
331
332 Event::Start(tag) => match tag {
333 Tag::Emphasis => italic_depth += 1,
334 Tag::Strong => bold_depth += 1,
335 Tag::Strikethrough => strikethrough_depth += 1,
336 Tag::Link {
337 link_type: _,
338 dest_url,
339 title: _,
340 id: _,
341 } => {
342 link = Link::identify(
343 self.file_location_directory.clone(),
344 dest_url.to_string(),
345 );
346 }
347 _ => {
348 break;
349 }
350 },
351
352 Event::End(tag) => match tag {
353 TagEnd::Emphasis => {
354 italic_depth -= 1;
355 }
356 TagEnd::Strong => {
357 bold_depth -= 1;
358 }
359 TagEnd::Strikethrough => {
360 strikethrough_depth -= 1;
361 }
362 TagEnd::Link => {
363 link = None;
364 }
365 TagEnd::Paragraph => {
366 self.cursor += 1;
367 break;
368 }
369 _ => {
370 break;
371 }
372 },
373
374 _ => {
375 break;
376 }
377 }
378
379 self.cursor += 1;
380 }
381
382 ParsedMarkdownText {
383 source_range,
384 contents: text,
385 highlights,
386 regions,
387 region_ranges,
388 }
389 }
390
391 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
392 let (_event, source_range) = self.previous().unwrap();
393 let source_range = source_range.clone();
394 let text = self.parse_text(true, None);
395
396 // Advance past the heading end tag
397 self.cursor += 1;
398
399 ParsedMarkdownHeading {
400 source_range: source_range.clone(),
401 level: match level {
402 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
403 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
404 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
405 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
406 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
407 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
408 },
409 contents: text,
410 }
411 }
412
413 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
414 let (_event, source_range) = self.previous().unwrap();
415 let source_range = source_range.clone();
416 let mut header = ParsedMarkdownTableRow::new();
417 let mut body = vec![];
418 let mut current_row = vec![];
419 let mut in_header = true;
420 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
421
422 loop {
423 if self.eof() {
424 break;
425 }
426
427 let (current, source_range) = self.current().unwrap();
428 let source_range = source_range.clone();
429 match current {
430 Event::Start(Tag::TableHead)
431 | Event::Start(Tag::TableRow)
432 | Event::End(TagEnd::TableCell) => {
433 self.cursor += 1;
434 }
435 Event::Start(Tag::TableCell) => {
436 self.cursor += 1;
437 let cell_contents = self.parse_text(false, Some(source_range));
438 current_row.push(cell_contents);
439 }
440 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
441 self.cursor += 1;
442 let new_row = std::mem::take(&mut current_row);
443 if in_header {
444 header.children = new_row;
445 in_header = false;
446 } else {
447 let row = ParsedMarkdownTableRow::with_children(new_row);
448 body.push(row);
449 }
450 }
451 Event::End(TagEnd::Table) => {
452 self.cursor += 1;
453 break;
454 }
455 _ => {
456 break;
457 }
458 }
459 }
460
461 ParsedMarkdownTable {
462 source_range,
463 header,
464 body,
465 column_alignments,
466 }
467 }
468
469 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
470 match alignment {
471 Alignment::None => ParsedMarkdownTableAlignment::None,
472 Alignment::Left => ParsedMarkdownTableAlignment::Left,
473 Alignment::Center => ParsedMarkdownTableAlignment::Center,
474 Alignment::Right => ParsedMarkdownTableAlignment::Right,
475 }
476 }
477
478 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
479 let (_, list_source_range) = self.previous().unwrap();
480
481 let mut items = Vec::new();
482 let mut items_stack = vec![Vec::new()];
483 let mut depth = 1;
484 let mut task_item = None;
485 let mut order = order;
486 let mut order_stack = Vec::new();
487
488 let mut insertion_indices = FxHashMap::default();
489 let mut source_ranges = FxHashMap::default();
490 let mut start_item_range = list_source_range.clone();
491
492 while !self.eof() {
493 let (current, source_range) = self.current().unwrap();
494 match current {
495 Event::Start(Tag::List(new_order)) => {
496 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
497 insertion_indices.insert(depth, items.len());
498 }
499
500 // We will use the start of the nested list as the end for the current item's range,
501 // because we don't care about the hierarchy of list items
502 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
503 e.insert(start_item_range.start..source_range.start);
504 }
505
506 order_stack.push(order);
507 order = *new_order;
508 self.cursor += 1;
509 depth += 1;
510 }
511 Event::End(TagEnd::List(_)) => {
512 order = order_stack.pop().flatten();
513 self.cursor += 1;
514 depth -= 1;
515
516 if depth == 0 {
517 break;
518 }
519 }
520 Event::Start(Tag::Item) => {
521 start_item_range = source_range.clone();
522
523 self.cursor += 1;
524 items_stack.push(Vec::new());
525
526 // Check for task list marker (`- [ ]` or `- [x]`)
527 if let Some(event) = self.current_event() {
528 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
529 if event == &Event::Start(Tag::Paragraph) {
530 self.cursor += 1;
531 }
532
533 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
534 task_item = Some((*checked, range.clone()));
535 self.cursor += 1;
536 }
537 }
538
539 if let Some((event, range)) = self.current() {
540 // This is a plain list item.
541 // For example `- some text` or `1. [Docs](./docs.md)`
542 if MarkdownParser::is_text_like(event) {
543 let text = self.parse_text(false, Some(range.clone()));
544 let block = ParsedMarkdownElement::Paragraph(text);
545 if let Some(content) = items_stack.last_mut() {
546 content.push(block);
547 }
548 } else {
549 let block = self.parse_block().await;
550 if let Some(block) = block {
551 if let Some(content) = items_stack.last_mut() {
552 content.extend(block);
553 }
554 }
555 }
556 }
557
558 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
559 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
560 self.cursor += 1;
561 }
562 }
563 Event::End(TagEnd::Item) => {
564 self.cursor += 1;
565
566 let item_type = if let Some((checked, range)) = task_item {
567 ParsedMarkdownListItemType::Task(checked, range)
568 } else if let Some(order) = order {
569 ParsedMarkdownListItemType::Ordered(order)
570 } else {
571 ParsedMarkdownListItemType::Unordered
572 };
573
574 if let Some(current) = order {
575 order = Some(current + 1);
576 }
577
578 if let Some(content) = items_stack.pop() {
579 let source_range = source_ranges
580 .remove(&depth)
581 .unwrap_or(start_item_range.clone());
582
583 // We need to remove the last character of the source range, because it includes the newline character
584 let source_range = source_range.start..source_range.end - 1;
585 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
586 source_range,
587 content,
588 depth,
589 item_type,
590 });
591
592 if let Some(index) = insertion_indices.get(&depth) {
593 items.insert(*index, item);
594 insertion_indices.remove(&depth);
595 } else {
596 items.push(item);
597 }
598 }
599
600 task_item = None;
601 }
602 _ => {
603 if depth == 0 {
604 break;
605 }
606 // This can only happen if a list item starts with more then one paragraph,
607 // or the list item contains blocks that should be rendered after the nested list items
608 let block = self.parse_block().await;
609 if let Some(block) = block {
610 if let Some(items_stack) = items_stack.last_mut() {
611 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
612 if !insertion_indices.contains_key(&depth) {
613 items_stack.extend(block);
614 continue;
615 }
616 }
617
618 // Otherwise we need to insert the block after all the nested items
619 // that have been parsed so far
620 items.extend(block);
621 }
622 }
623 }
624 }
625
626 items
627 }
628
629 #[async_recursion]
630 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
631 let (_event, source_range) = self.previous().unwrap();
632 let source_range = source_range.clone();
633 let mut nested_depth = 1;
634
635 let mut children: Vec<ParsedMarkdownElement> = vec![];
636
637 while !self.eof() {
638 let block = self.parse_block().await;
639
640 if let Some(block) = block {
641 children.extend(block);
642 } else {
643 break;
644 }
645
646 if self.eof() {
647 break;
648 }
649
650 let (current, _source_range) = self.current().unwrap();
651 match current {
652 // This is a nested block quote.
653 // Record that we're in a nested block quote and continue parsing.
654 // We don't need to advance the cursor since the next
655 // call to `parse_block` will handle it.
656 Event::Start(Tag::BlockQuote(_kind)) => {
657 nested_depth += 1;
658 }
659 Event::End(TagEnd::BlockQuote(_kind)) => {
660 nested_depth -= 1;
661 if nested_depth == 0 {
662 self.cursor += 1;
663 break;
664 }
665 }
666 _ => {}
667 };
668 }
669
670 ParsedMarkdownBlockQuote {
671 source_range,
672 children,
673 }
674 }
675
676 async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
677 let (_event, source_range) = self.previous().unwrap();
678 let source_range = source_range.clone();
679 let mut code = String::new();
680
681 while !self.eof() {
682 let (current, _source_range) = self.current().unwrap();
683 match current {
684 Event::Text(text) => {
685 code.push_str(text);
686 self.cursor += 1;
687 }
688 Event::End(TagEnd::CodeBlock) => {
689 self.cursor += 1;
690 break;
691 }
692 _ => {
693 break;
694 }
695 }
696 }
697
698 let highlights = if let Some(language) = &language {
699 if let Some(registry) = &self.language_registry {
700 let rope: language::Rope = code.as_str().into();
701 registry
702 .language_for_name_or_extension(language)
703 .await
704 .map(|l| l.highlight_text(&rope, 0..code.len()))
705 .ok()
706 } else {
707 None
708 }
709 } else {
710 None
711 };
712
713 ParsedMarkdownCodeBlock {
714 source_range,
715 contents: code.trim().to_string().into(),
716 language,
717 highlights,
718 }
719 }
720}
721
722#[cfg(test)]
723mod tests {
724 use super::*;
725
726 use gpui::BackgroundExecutor;
727 use language::{tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher};
728 use pretty_assertions::assert_eq;
729
730 use ParsedMarkdownListItemType::*;
731
732 async fn parse(input: &str) -> ParsedMarkdown {
733 parse_markdown(input, None, None).await
734 }
735
736 #[gpui::test]
737 async fn test_headings() {
738 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
739
740 assert_eq!(
741 parsed.children,
742 vec![
743 h1(text("Heading one", 2..13), 0..14),
744 h2(text("Heading two", 17..28), 14..29),
745 h3(text("Heading three", 33..46), 29..46),
746 ]
747 );
748 }
749
750 #[gpui::test]
751 async fn test_newlines_dont_new_paragraphs() {
752 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
753
754 assert_eq!(
755 parsed.children,
756 vec![p("Some text that is bolded and italicized", 0..46)]
757 );
758 }
759
760 #[gpui::test]
761 async fn test_heading_with_paragraph() {
762 let parsed = parse("# Zed\nThe editor").await;
763
764 assert_eq!(
765 parsed.children,
766 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
767 );
768 }
769
770 #[gpui::test]
771 async fn test_double_newlines_do_new_paragraphs() {
772 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
773
774 assert_eq!(
775 parsed.children,
776 vec![
777 p("Some text that is bolded", 0..29),
778 p("and italicized", 31..47),
779 ]
780 );
781 }
782
783 #[gpui::test]
784 async fn test_bold_italic_text() {
785 let parsed = parse("Some text **that is bolded** and *italicized*").await;
786
787 assert_eq!(
788 parsed.children,
789 vec![p("Some text that is bolded and italicized", 0..45)]
790 );
791 }
792
793 #[gpui::test]
794 async fn test_nested_bold_strikethrough_text() {
795 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
796
797 assert_eq!(parsed.children.len(), 1);
798 assert_eq!(
799 parsed.children[0],
800 ParsedMarkdownElement::Paragraph(ParsedMarkdownText {
801 source_range: 0..35,
802 contents: "Some bostrikethroughld text".to_string(),
803 highlights: Vec::new(),
804 region_ranges: Vec::new(),
805 regions: Vec::new(),
806 })
807 );
808
809 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
810 text
811 } else {
812 panic!("Expected a paragraph");
813 };
814 assert_eq!(
815 paragraph.highlights,
816 vec![
817 (
818 5..7,
819 MarkdownHighlight::Style(MarkdownHighlightStyle {
820 weight: FontWeight::BOLD,
821 ..Default::default()
822 }),
823 ),
824 (
825 7..20,
826 MarkdownHighlight::Style(MarkdownHighlightStyle {
827 weight: FontWeight::BOLD,
828 strikethrough: true,
829 ..Default::default()
830 }),
831 ),
832 (
833 20..22,
834 MarkdownHighlight::Style(MarkdownHighlightStyle {
835 weight: FontWeight::BOLD,
836 ..Default::default()
837 }),
838 ),
839 ]
840 );
841 }
842
843 #[gpui::test]
844 async fn test_raw_links_detection() {
845 let parsed = parse("Checkout this https://zed.dev link").await;
846
847 assert_eq!(
848 parsed.children,
849 vec![p("Checkout this https://zed.dev link", 0..34)]
850 );
851
852 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
853 text
854 } else {
855 panic!("Expected a paragraph");
856 };
857 assert_eq!(
858 paragraph.highlights,
859 vec![(
860 14..29,
861 MarkdownHighlight::Style(MarkdownHighlightStyle {
862 underline: true,
863 ..Default::default()
864 }),
865 )]
866 );
867 assert_eq!(
868 paragraph.regions,
869 vec![ParsedRegion {
870 code: false,
871 link: Some(Link::Web {
872 url: "https://zed.dev".to_string()
873 }),
874 }]
875 );
876 assert_eq!(paragraph.region_ranges, vec![14..29]);
877 }
878
879 #[gpui::test]
880 async fn test_header_only_table() {
881 let markdown = "\
882| Header 1 | Header 2 |
883|----------|----------|
884
885Some other content
886";
887
888 let expected_table = table(
889 0..48,
890 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
891 vec![],
892 );
893
894 assert_eq!(
895 parse(markdown).await.children[0],
896 ParsedMarkdownElement::Table(expected_table)
897 );
898 }
899
900 #[gpui::test]
901 async fn test_basic_table() {
902 let markdown = "\
903| Header 1 | Header 2 |
904|----------|----------|
905| Cell 1 | Cell 2 |
906| Cell 3 | Cell 4 |";
907
908 let expected_table = table(
909 0..95,
910 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
911 vec![
912 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
913 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
914 ],
915 );
916
917 assert_eq!(
918 parse(markdown).await.children[0],
919 ParsedMarkdownElement::Table(expected_table)
920 );
921 }
922
923 #[gpui::test]
924 async fn test_list_basic() {
925 let parsed = parse(
926 "\
927* Item 1
928* Item 2
929* Item 3
930",
931 )
932 .await;
933
934 assert_eq!(
935 parsed.children,
936 vec![
937 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
938 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
939 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
940 ],
941 );
942 }
943
944 #[gpui::test]
945 async fn test_list_with_tasks() {
946 let parsed = parse(
947 "\
948- [ ] TODO
949- [x] Checked
950",
951 )
952 .await;
953
954 assert_eq!(
955 parsed.children,
956 vec![
957 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
958 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
959 ],
960 );
961 }
962
963 #[gpui::test]
964 async fn test_list_with_linebreak_is_handled_correctly() {
965 let parsed = parse(
966 "\
967- [ ] Task 1
968
969- [x] Task 2
970",
971 )
972 .await;
973
974 assert_eq!(
975 parsed.children,
976 vec![
977 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
978 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
979 ],
980 );
981 }
982
983 #[gpui::test]
984 async fn test_list_nested() {
985 let parsed = parse(
986 "\
987* Item 1
988* Item 2
989* Item 3
990
9911. Hello
9921. Two
993 1. Three
9942. Four
9953. Five
996
997* First
998 1. Hello
999 1. Goodbyte
1000 - Inner
1001 - Inner
1002 2. Goodbyte
1003* Last
1004",
1005 )
1006 .await;
1007
1008 assert_eq!(
1009 parsed.children,
1010 vec![
1011 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1012 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1013 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1014 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1015 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1016 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1017 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1018 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1019 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1020 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1021 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1022 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1023 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1024 list_item(143..154, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1025 list_item(155..161, 1, Unordered, vec![p("Last", 157..161)]),
1026 ]
1027 );
1028 }
1029
1030 #[gpui::test]
1031 async fn test_list_with_nested_content() {
1032 let parsed = parse(
1033 "\
1034* This is a list item with two paragraphs.
1035
1036 This is the second paragraph in the list item.
1037",
1038 )
1039 .await;
1040
1041 assert_eq!(
1042 parsed.children,
1043 vec![list_item(
1044 0..96,
1045 1,
1046 Unordered,
1047 vec![
1048 p("This is a list item with two paragraphs.", 4..44),
1049 p("This is the second paragraph in the list item.", 50..97)
1050 ],
1051 ),],
1052 );
1053 }
1054
1055 #[gpui::test]
1056 async fn test_nested_list_with_paragraph_inside() {
1057 let parsed = parse(
1058 "\
10591. a
1060 1. b
1061 1. c
1062
1063 text
1064
1065 1. d
1066",
1067 )
1068 .await;
1069
1070 assert_eq!(
1071 parsed.children,
1072 vec![
1073 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1074 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1075 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1076 p("text", 32..37),
1077 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1078 ],
1079 );
1080 }
1081
1082 #[gpui::test]
1083 async fn test_list_with_leading_text() {
1084 let parsed = parse(
1085 "\
1086* `code`
1087* **bold**
1088* [link](https://example.com)
1089",
1090 )
1091 .await;
1092
1093 assert_eq!(
1094 parsed.children,
1095 vec![
1096 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1097 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1098 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],)
1099 ],
1100 );
1101 }
1102
1103 #[gpui::test]
1104 async fn test_simple_block_quote() {
1105 let parsed = parse("> Simple block quote with **styled text**").await;
1106
1107 assert_eq!(
1108 parsed.children,
1109 vec![block_quote(
1110 vec![p("Simple block quote with styled text", 2..41)],
1111 0..41
1112 )]
1113 );
1114 }
1115
1116 #[gpui::test]
1117 async fn test_simple_block_quote_with_multiple_lines() {
1118 let parsed = parse(
1119 "\
1120> # Heading
1121> More
1122> text
1123>
1124> More text
1125",
1126 )
1127 .await;
1128
1129 assert_eq!(
1130 parsed.children,
1131 vec![block_quote(
1132 vec![
1133 h1(text("Heading", 4..11), 2..12),
1134 p("More text", 14..26),
1135 p("More text", 30..40)
1136 ],
1137 0..40
1138 )]
1139 );
1140 }
1141
1142 #[gpui::test]
1143 async fn test_nested_block_quote() {
1144 let parsed = parse(
1145 "\
1146> A
1147>
1148> > # B
1149>
1150> C
1151
1152More text
1153",
1154 )
1155 .await;
1156
1157 assert_eq!(
1158 parsed.children,
1159 vec![
1160 block_quote(
1161 vec![
1162 p("A", 2..4),
1163 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1164 p("C", 18..20)
1165 ],
1166 0..20
1167 ),
1168 p("More text", 21..31)
1169 ]
1170 );
1171 }
1172
1173 #[gpui::test]
1174 async fn test_code_block() {
1175 let parsed = parse(
1176 "\
1177```
1178fn main() {
1179 return 0;
1180}
1181```
1182",
1183 )
1184 .await;
1185
1186 assert_eq!(
1187 parsed.children,
1188 vec![code_block(
1189 None,
1190 "fn main() {\n return 0;\n}",
1191 0..35,
1192 None
1193 )]
1194 );
1195 }
1196
1197 #[gpui::test]
1198 async fn test_code_block_with_language(executor: BackgroundExecutor) {
1199 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1200 language_registry.add(rust_lang());
1201
1202 let parsed = parse_markdown(
1203 "\
1204```rust
1205fn main() {
1206 return 0;
1207}
1208```
1209",
1210 None,
1211 Some(language_registry),
1212 )
1213 .await;
1214
1215 assert_eq!(
1216 parsed.children,
1217 vec![code_block(
1218 Some("rust".to_string()),
1219 "fn main() {\n return 0;\n}",
1220 0..39,
1221 Some(vec![])
1222 )]
1223 );
1224 }
1225
1226 fn rust_lang() -> Arc<Language> {
1227 Arc::new(Language::new(
1228 LanguageConfig {
1229 name: "Rust".into(),
1230 matcher: LanguageMatcher {
1231 path_suffixes: vec!["rs".into()],
1232 ..Default::default()
1233 },
1234 collapsed_placeholder: " /* ... */ ".to_string(),
1235 ..Default::default()
1236 },
1237 Some(tree_sitter_rust::LANGUAGE.into()),
1238 ))
1239 }
1240
1241 fn h1(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1242 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1243 source_range,
1244 level: HeadingLevel::H1,
1245 contents,
1246 })
1247 }
1248
1249 fn h2(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1250 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1251 source_range,
1252 level: HeadingLevel::H2,
1253 contents,
1254 })
1255 }
1256
1257 fn h3(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1258 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1259 source_range,
1260 level: HeadingLevel::H3,
1261 contents,
1262 })
1263 }
1264
1265 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1266 ParsedMarkdownElement::Paragraph(text(contents, source_range))
1267 }
1268
1269 fn text(contents: &str, source_range: Range<usize>) -> ParsedMarkdownText {
1270 ParsedMarkdownText {
1271 highlights: Vec::new(),
1272 region_ranges: Vec::new(),
1273 regions: Vec::new(),
1274 source_range,
1275 contents: contents.to_string(),
1276 }
1277 }
1278
1279 fn block_quote(
1280 children: Vec<ParsedMarkdownElement>,
1281 source_range: Range<usize>,
1282 ) -> ParsedMarkdownElement {
1283 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1284 source_range,
1285 children,
1286 })
1287 }
1288
1289 fn code_block(
1290 language: Option<String>,
1291 code: &str,
1292 source_range: Range<usize>,
1293 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1294 ) -> ParsedMarkdownElement {
1295 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1296 source_range,
1297 language,
1298 contents: code.to_string().into(),
1299 highlights,
1300 })
1301 }
1302
1303 fn list_item(
1304 source_range: Range<usize>,
1305 depth: u16,
1306 item_type: ParsedMarkdownListItemType,
1307 content: Vec<ParsedMarkdownElement>,
1308 ) -> ParsedMarkdownElement {
1309 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1310 source_range,
1311 item_type,
1312 depth,
1313 content,
1314 })
1315 }
1316
1317 fn table(
1318 source_range: Range<usize>,
1319 header: ParsedMarkdownTableRow,
1320 body: Vec<ParsedMarkdownTableRow>,
1321 ) -> ParsedMarkdownTable {
1322 ParsedMarkdownTable {
1323 column_alignments: Vec::new(),
1324 source_range,
1325 header,
1326 body,
1327 }
1328 }
1329
1330 fn row(children: Vec<ParsedMarkdownText>) -> ParsedMarkdownTableRow {
1331 ParsedMarkdownTableRow { children }
1332 }
1333
1334 impl PartialEq for ParsedMarkdownTable {
1335 fn eq(&self, other: &Self) -> bool {
1336 self.source_range == other.source_range
1337 && self.header == other.header
1338 && self.body == other.body
1339 }
1340 }
1341
1342 impl PartialEq for ParsedMarkdownText {
1343 fn eq(&self, other: &Self) -> bool {
1344 self.source_range == other.source_range && self.contents == other.contents
1345 }
1346 }
1347}