1use crate::markdown_elements::*;
2use async_recursion::async_recursion;
3use collections::FxHashMap;
4use gpui::FontWeight;
5use language::LanguageRegistry;
6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
7use std::{ops::Range, path::PathBuf, sync::Arc};
8
9pub async fn parse_markdown(
10 markdown_input: &str,
11 file_location_directory: Option<PathBuf>,
12 language_registry: Option<Arc<LanguageRegistry>>,
13) -> ParsedMarkdown {
14 let mut options = Options::all();
15 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
16
17 let parser = Parser::new_ext(markdown_input, options);
18 let parser = MarkdownParser::new(
19 parser.into_offset_iter().collect(),
20 file_location_directory,
21 language_registry,
22 );
23 let renderer = parser.parse_document().await;
24 ParsedMarkdown {
25 children: renderer.parsed,
26 }
27}
28
29struct MarkdownParser<'a> {
30 tokens: Vec<(Event<'a>, Range<usize>)>,
31 /// The current index in the tokens array
32 cursor: usize,
33 /// The blocks that we have successfully parsed so far
34 parsed: Vec<ParsedMarkdownElement>,
35 file_location_directory: Option<PathBuf>,
36 language_registry: Option<Arc<LanguageRegistry>>,
37}
38
39struct MarkdownListItem {
40 content: Vec<ParsedMarkdownElement>,
41 item_type: ParsedMarkdownListItemType,
42}
43
44impl Default for MarkdownListItem {
45 fn default() -> Self {
46 Self {
47 content: Vec::new(),
48 item_type: ParsedMarkdownListItemType::Unordered,
49 }
50 }
51}
52
53impl<'a> MarkdownParser<'a> {
54 fn new(
55 tokens: Vec<(Event<'a>, Range<usize>)>,
56 file_location_directory: Option<PathBuf>,
57 language_registry: Option<Arc<LanguageRegistry>>,
58 ) -> Self {
59 Self {
60 tokens,
61 file_location_directory,
62 language_registry,
63 cursor: 0,
64 parsed: vec![],
65 }
66 }
67
68 fn eof(&self) -> bool {
69 if self.tokens.is_empty() {
70 return true;
71 }
72 self.cursor >= self.tokens.len() - 1
73 }
74
75 fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
76 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
77 return self.tokens.last();
78 }
79 return self.tokens.get(self.cursor + steps);
80 }
81
82 fn previous(&self) -> Option<&(Event, Range<usize>)> {
83 if self.cursor == 0 || self.cursor > self.tokens.len() {
84 return None;
85 }
86 return self.tokens.get(self.cursor - 1);
87 }
88
89 fn current(&self) -> Option<&(Event, Range<usize>)> {
90 return self.peek(0);
91 }
92
93 fn current_event(&self) -> Option<&Event> {
94 return self.current().map(|(event, _)| event);
95 }
96
97 fn is_text_like(event: &Event) -> bool {
98 match event {
99 Event::Text(_)
100 // Represent an inline code block
101 | Event::Code(_)
102 | Event::Html(_)
103 | Event::FootnoteReference(_)
104 | Event::Start(Tag::Link { link_type: _, dest_url: _, title: _, id: _ })
105 | Event::Start(Tag::Emphasis)
106 | Event::Start(Tag::Strong)
107 | Event::Start(Tag::Strikethrough)
108 | Event::Start(Tag::Image { link_type: _, dest_url: _, title: _, id: _ }) => {
109 true
110 }
111 _ => false,
112 }
113 }
114
115 async fn parse_document(mut self) -> Self {
116 while !self.eof() {
117 if let Some(block) = self.parse_block().await {
118 self.parsed.extend(block);
119 } else {
120 self.cursor += 1;
121 }
122 }
123 self
124 }
125
126 #[async_recursion]
127 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
128 let (current, source_range) = self.current().unwrap();
129 let source_range = source_range.clone();
130 match current {
131 Event::Start(tag) => match tag {
132 Tag::Paragraph => {
133 self.cursor += 1;
134 let text = self.parse_text(false, Some(source_range));
135 Some(vec![ParsedMarkdownElement::Paragraph(text)])
136 }
137 Tag::Heading {
138 level,
139 id: _,
140 classes: _,
141 attrs: _,
142 } => {
143 let level = *level;
144 self.cursor += 1;
145 let heading = self.parse_heading(level);
146 Some(vec![ParsedMarkdownElement::Heading(heading)])
147 }
148 Tag::Table(alignment) => {
149 let alignment = alignment.clone();
150 self.cursor += 1;
151 let table = self.parse_table(alignment);
152 Some(vec![ParsedMarkdownElement::Table(table)])
153 }
154 Tag::List(order) => {
155 let order = *order;
156 self.cursor += 1;
157 let list = self.parse_list(order).await;
158 Some(list)
159 }
160 Tag::BlockQuote(_kind) => {
161 self.cursor += 1;
162 let block_quote = self.parse_block_quote().await;
163 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
164 }
165 Tag::CodeBlock(kind) => {
166 let language = match kind {
167 pulldown_cmark::CodeBlockKind::Indented => None,
168 pulldown_cmark::CodeBlockKind::Fenced(language) => {
169 if language.is_empty() {
170 None
171 } else {
172 Some(language.to_string())
173 }
174 }
175 };
176
177 self.cursor += 1;
178
179 let code_block = self.parse_code_block(language).await;
180 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
181 }
182 _ => None,
183 },
184 Event::Rule => {
185 let source_range = source_range.clone();
186 self.cursor += 1;
187 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
188 }
189 _ => None,
190 }
191 }
192
193 fn parse_text(
194 &mut self,
195 should_complete_on_soft_break: bool,
196 source_range: Option<Range<usize>>,
197 ) -> ParsedMarkdownText {
198 let source_range = source_range.unwrap_or_else(|| {
199 self.current()
200 .map(|(_, range)| range.clone())
201 .unwrap_or_default()
202 });
203
204 let mut text = String::new();
205 let mut bold_depth = 0;
206 let mut italic_depth = 0;
207 let mut strikethrough_depth = 0;
208 let mut link: Option<Link> = None;
209 let mut region_ranges: Vec<Range<usize>> = vec![];
210 let mut regions: Vec<ParsedRegion> = vec![];
211 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
212
213 let mut link_urls: Vec<String> = vec![];
214 let mut link_ranges: Vec<Range<usize>> = vec![];
215
216 loop {
217 if self.eof() {
218 break;
219 }
220
221 let (current, _source_range) = self.current().unwrap();
222 let prev_len = text.len();
223 match current {
224 Event::SoftBreak => {
225 if should_complete_on_soft_break {
226 break;
227 }
228
229 // `Some text\nSome more text` should be treated as a single line.
230 text.push(' ');
231 }
232
233 Event::HardBreak => {
234 text.push('\n');
235 }
236
237 // We want to ignore any inline HTML tags in the text but keep
238 // the text between them
239 Event::InlineHtml(_) => {}
240
241 Event::Text(t) => {
242 text.push_str(t.as_ref());
243
244 let mut style = MarkdownHighlightStyle::default();
245
246 if bold_depth > 0 {
247 style.weight = FontWeight::BOLD;
248 }
249
250 if italic_depth > 0 {
251 style.italic = true;
252 }
253
254 if strikethrough_depth > 0 {
255 style.strikethrough = true;
256 }
257
258 let last_run_len = if let Some(link) = link.clone() {
259 region_ranges.push(prev_len..text.len());
260 regions.push(ParsedRegion {
261 code: false,
262 link: Some(link),
263 });
264 style.underline = true;
265 prev_len
266 } else {
267 // Manually scan for links
268 let mut finder = linkify::LinkFinder::new();
269 finder.kinds(&[linkify::LinkKind::Url]);
270 let mut last_link_len = prev_len;
271 for link in finder.links(t) {
272 let start = link.start();
273 let end = link.end();
274 let range = (prev_len + start)..(prev_len + end);
275 link_ranges.push(range.clone());
276 link_urls.push(link.as_str().to_string());
277
278 // If there is a style before we match a link, we have to add this to the highlighted ranges
279 if style != MarkdownHighlightStyle::default()
280 && last_link_len < link.start()
281 {
282 highlights.push((
283 last_link_len..link.start(),
284 MarkdownHighlight::Style(style.clone()),
285 ));
286 }
287
288 highlights.push((
289 range.clone(),
290 MarkdownHighlight::Style(MarkdownHighlightStyle {
291 underline: true,
292 ..style
293 }),
294 ));
295 region_ranges.push(range.clone());
296 regions.push(ParsedRegion {
297 code: false,
298 link: Some(Link::Web {
299 url: link.as_str().to_string(),
300 }),
301 });
302
303 last_link_len = end;
304 }
305 last_link_len
306 };
307
308 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
309 let mut new_highlight = true;
310 if let Some((last_range, last_style)) = highlights.last_mut() {
311 if last_range.end == last_run_len
312 && last_style == &MarkdownHighlight::Style(style.clone())
313 {
314 last_range.end = text.len();
315 new_highlight = false;
316 }
317 }
318 if new_highlight {
319 highlights
320 .push((last_run_len..text.len(), MarkdownHighlight::Style(style)));
321 }
322 }
323 }
324
325 // Note: This event means "inline code" and not "code block"
326 Event::Code(t) => {
327 text.push_str(t.as_ref());
328 region_ranges.push(prev_len..text.len());
329
330 if link.is_some() {
331 highlights.push((
332 prev_len..text.len(),
333 MarkdownHighlight::Style(MarkdownHighlightStyle {
334 underline: true,
335 ..Default::default()
336 }),
337 ));
338 }
339
340 regions.push(ParsedRegion {
341 code: true,
342 link: link.clone(),
343 });
344 }
345
346 Event::Start(tag) => match tag {
347 Tag::Emphasis => italic_depth += 1,
348 Tag::Strong => bold_depth += 1,
349 Tag::Strikethrough => strikethrough_depth += 1,
350 Tag::Link {
351 link_type: _,
352 dest_url,
353 title: _,
354 id: _,
355 } => {
356 link = Link::identify(
357 self.file_location_directory.clone(),
358 dest_url.to_string(),
359 );
360 }
361 _ => {
362 break;
363 }
364 },
365
366 Event::End(tag) => match tag {
367 TagEnd::Emphasis => {
368 italic_depth -= 1;
369 }
370 TagEnd::Strong => {
371 bold_depth -= 1;
372 }
373 TagEnd::Strikethrough => {
374 strikethrough_depth -= 1;
375 }
376 TagEnd::Link => {
377 link = None;
378 }
379 TagEnd::Paragraph => {
380 self.cursor += 1;
381 break;
382 }
383 _ => {
384 break;
385 }
386 },
387
388 _ => {
389 break;
390 }
391 }
392
393 self.cursor += 1;
394 }
395
396 ParsedMarkdownText {
397 source_range,
398 contents: text,
399 highlights,
400 regions,
401 region_ranges,
402 }
403 }
404
405 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
406 let (_event, source_range) = self.previous().unwrap();
407 let source_range = source_range.clone();
408 let text = self.parse_text(true, None);
409
410 // Advance past the heading end tag
411 self.cursor += 1;
412
413 ParsedMarkdownHeading {
414 source_range: source_range.clone(),
415 level: match level {
416 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
417 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
418 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
419 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
420 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
421 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
422 },
423 contents: text,
424 }
425 }
426
427 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
428 let (_event, source_range) = self.previous().unwrap();
429 let source_range = source_range.clone();
430 let mut header = ParsedMarkdownTableRow::new();
431 let mut body = vec![];
432 let mut current_row = vec![];
433 let mut in_header = true;
434 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
435
436 loop {
437 if self.eof() {
438 break;
439 }
440
441 let (current, source_range) = self.current().unwrap();
442 let source_range = source_range.clone();
443 match current {
444 Event::Start(Tag::TableHead)
445 | Event::Start(Tag::TableRow)
446 | Event::End(TagEnd::TableCell) => {
447 self.cursor += 1;
448 }
449 Event::Start(Tag::TableCell) => {
450 self.cursor += 1;
451 let cell_contents = self.parse_text(false, Some(source_range));
452 current_row.push(cell_contents);
453 }
454 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
455 self.cursor += 1;
456 let new_row = std::mem::take(&mut current_row);
457 if in_header {
458 header.children = new_row;
459 in_header = false;
460 } else {
461 let row = ParsedMarkdownTableRow::with_children(new_row);
462 body.push(row);
463 }
464 }
465 Event::End(TagEnd::Table) => {
466 self.cursor += 1;
467 break;
468 }
469 _ => {
470 break;
471 }
472 }
473 }
474
475 ParsedMarkdownTable {
476 source_range,
477 header,
478 body,
479 column_alignments,
480 }
481 }
482
483 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
484 match alignment {
485 Alignment::None => ParsedMarkdownTableAlignment::None,
486 Alignment::Left => ParsedMarkdownTableAlignment::Left,
487 Alignment::Center => ParsedMarkdownTableAlignment::Center,
488 Alignment::Right => ParsedMarkdownTableAlignment::Right,
489 }
490 }
491
492 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
493 let (_, list_source_range) = self.previous().unwrap();
494
495 let mut items = Vec::new();
496 let mut items_stack = vec![MarkdownListItem::default()];
497 let mut depth = 1;
498 let mut order = order;
499 let mut order_stack = Vec::new();
500
501 let mut insertion_indices = FxHashMap::default();
502 let mut source_ranges = FxHashMap::default();
503 let mut start_item_range = list_source_range.clone();
504
505 while !self.eof() {
506 let (current, source_range) = self.current().unwrap();
507 match current {
508 Event::Start(Tag::List(new_order)) => {
509 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
510 insertion_indices.insert(depth, items.len());
511 }
512
513 // We will use the start of the nested list as the end for the current item's range,
514 // because we don't care about the hierarchy of list items
515 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
516 e.insert(start_item_range.start..source_range.start);
517 }
518
519 order_stack.push(order);
520 order = *new_order;
521 self.cursor += 1;
522 depth += 1;
523 }
524 Event::End(TagEnd::List(_)) => {
525 order = order_stack.pop().flatten();
526 self.cursor += 1;
527 depth -= 1;
528
529 if depth == 0 {
530 break;
531 }
532 }
533 Event::Start(Tag::Item) => {
534 start_item_range = source_range.clone();
535
536 self.cursor += 1;
537 items_stack.push(MarkdownListItem::default());
538
539 let mut task_list = None;
540 // Check for task list marker (`- [ ]` or `- [x]`)
541 if let Some(event) = self.current_event() {
542 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
543 if event == &Event::Start(Tag::Paragraph) {
544 self.cursor += 1;
545 }
546
547 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
548 task_list = Some((*checked, range.clone()));
549 self.cursor += 1;
550 }
551 }
552
553 if let Some((event, range)) = self.current() {
554 // This is a plain list item.
555 // For example `- some text` or `1. [Docs](./docs.md)`
556 if MarkdownParser::is_text_like(event) {
557 let text = self.parse_text(false, Some(range.clone()));
558 let block = ParsedMarkdownElement::Paragraph(text);
559 if let Some(content) = items_stack.last_mut() {
560 let item_type = if let Some((checked, range)) = task_list {
561 ParsedMarkdownListItemType::Task(checked, range)
562 } else if let Some(order) = order {
563 ParsedMarkdownListItemType::Ordered(order)
564 } else {
565 ParsedMarkdownListItemType::Unordered
566 };
567 content.item_type = item_type;
568 content.content.push(block);
569 }
570 } else {
571 let block = self.parse_block().await;
572 if let Some(block) = block {
573 if let Some(list_item) = items_stack.last_mut() {
574 list_item.content.extend(block);
575 }
576 }
577 }
578 }
579
580 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
581 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
582 self.cursor += 1;
583 }
584 }
585 Event::End(TagEnd::Item) => {
586 self.cursor += 1;
587
588 if let Some(current) = order {
589 order = Some(current + 1);
590 }
591
592 if let Some(list_item) = items_stack.pop() {
593 let source_range = source_ranges
594 .remove(&depth)
595 .unwrap_or(start_item_range.clone());
596
597 // We need to remove the last character of the source range, because it includes the newline character
598 let source_range = source_range.start..source_range.end - 1;
599 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
600 source_range,
601 content: list_item.content,
602 depth,
603 item_type: list_item.item_type,
604 });
605
606 if let Some(index) = insertion_indices.get(&depth) {
607 items.insert(*index, item);
608 insertion_indices.remove(&depth);
609 } else {
610 items.push(item);
611 }
612 }
613 }
614 _ => {
615 if depth == 0 {
616 break;
617 }
618 // This can only happen if a list item starts with more then one paragraph,
619 // or the list item contains blocks that should be rendered after the nested list items
620 let block = self.parse_block().await;
621 if let Some(block) = block {
622 if let Some(list_item) = items_stack.last_mut() {
623 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
624 if !insertion_indices.contains_key(&depth) {
625 list_item.content.extend(block);
626 continue;
627 }
628 }
629
630 // Otherwise we need to insert the block after all the nested items
631 // that have been parsed so far
632 items.extend(block);
633 } else {
634 self.cursor += 1;
635 }
636 }
637 }
638 }
639
640 items
641 }
642
643 #[async_recursion]
644 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
645 let (_event, source_range) = self.previous().unwrap();
646 let source_range = source_range.clone();
647 let mut nested_depth = 1;
648
649 let mut children: Vec<ParsedMarkdownElement> = vec![];
650
651 while !self.eof() {
652 let block = self.parse_block().await;
653
654 if let Some(block) = block {
655 children.extend(block);
656 } else {
657 break;
658 }
659
660 if self.eof() {
661 break;
662 }
663
664 let (current, _source_range) = self.current().unwrap();
665 match current {
666 // This is a nested block quote.
667 // Record that we're in a nested block quote and continue parsing.
668 // We don't need to advance the cursor since the next
669 // call to `parse_block` will handle it.
670 Event::Start(Tag::BlockQuote(_kind)) => {
671 nested_depth += 1;
672 }
673 Event::End(TagEnd::BlockQuote(_kind)) => {
674 nested_depth -= 1;
675 if nested_depth == 0 {
676 self.cursor += 1;
677 break;
678 }
679 }
680 _ => {}
681 };
682 }
683
684 ParsedMarkdownBlockQuote {
685 source_range,
686 children,
687 }
688 }
689
690 async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
691 let (_event, source_range) = self.previous().unwrap();
692 let source_range = source_range.clone();
693 let mut code = String::new();
694
695 while !self.eof() {
696 let (current, _source_range) = self.current().unwrap();
697 match current {
698 Event::Text(text) => {
699 code.push_str(text);
700 self.cursor += 1;
701 }
702 Event::End(TagEnd::CodeBlock) => {
703 self.cursor += 1;
704 break;
705 }
706 _ => {
707 break;
708 }
709 }
710 }
711
712 let highlights = if let Some(language) = &language {
713 if let Some(registry) = &self.language_registry {
714 let rope: language::Rope = code.as_str().into();
715 registry
716 .language_for_name_or_extension(language)
717 .await
718 .map(|l| l.highlight_text(&rope, 0..code.len()))
719 .ok()
720 } else {
721 None
722 }
723 } else {
724 None
725 };
726
727 ParsedMarkdownCodeBlock {
728 source_range,
729 contents: code.trim().to_string().into(),
730 language,
731 highlights,
732 }
733 }
734}
735
736#[cfg(test)]
737mod tests {
738 use super::*;
739
740 use gpui::BackgroundExecutor;
741 use language::{tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher};
742 use pretty_assertions::assert_eq;
743 use ParsedMarkdownListItemType::*;
744
745 async fn parse(input: &str) -> ParsedMarkdown {
746 parse_markdown(input, None, None).await
747 }
748
749 #[gpui::test]
750 async fn test_headings() {
751 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
752
753 assert_eq!(
754 parsed.children,
755 vec![
756 h1(text("Heading one", 2..13), 0..14),
757 h2(text("Heading two", 17..28), 14..29),
758 h3(text("Heading three", 33..46), 29..46),
759 ]
760 );
761 }
762
763 #[gpui::test]
764 async fn test_newlines_dont_new_paragraphs() {
765 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
766
767 assert_eq!(
768 parsed.children,
769 vec![p("Some text that is bolded and italicized", 0..46)]
770 );
771 }
772
773 #[gpui::test]
774 async fn test_heading_with_paragraph() {
775 let parsed = parse("# Zed\nThe editor").await;
776
777 assert_eq!(
778 parsed.children,
779 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
780 );
781 }
782
783 #[gpui::test]
784 async fn test_double_newlines_do_new_paragraphs() {
785 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
786
787 assert_eq!(
788 parsed.children,
789 vec![
790 p("Some text that is bolded", 0..29),
791 p("and italicized", 31..47),
792 ]
793 );
794 }
795
796 #[gpui::test]
797 async fn test_bold_italic_text() {
798 let parsed = parse("Some text **that is bolded** and *italicized*").await;
799
800 assert_eq!(
801 parsed.children,
802 vec![p("Some text that is bolded and italicized", 0..45)]
803 );
804 }
805
806 #[gpui::test]
807 async fn test_nested_bold_strikethrough_text() {
808 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
809
810 assert_eq!(parsed.children.len(), 1);
811 assert_eq!(
812 parsed.children[0],
813 ParsedMarkdownElement::Paragraph(ParsedMarkdownText {
814 source_range: 0..35,
815 contents: "Some bostrikethroughld text".to_string(),
816 highlights: Vec::new(),
817 region_ranges: Vec::new(),
818 regions: Vec::new(),
819 })
820 );
821
822 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
823 text
824 } else {
825 panic!("Expected a paragraph");
826 };
827 assert_eq!(
828 paragraph.highlights,
829 vec![
830 (
831 5..7,
832 MarkdownHighlight::Style(MarkdownHighlightStyle {
833 weight: FontWeight::BOLD,
834 ..Default::default()
835 }),
836 ),
837 (
838 7..20,
839 MarkdownHighlight::Style(MarkdownHighlightStyle {
840 weight: FontWeight::BOLD,
841 strikethrough: true,
842 ..Default::default()
843 }),
844 ),
845 (
846 20..22,
847 MarkdownHighlight::Style(MarkdownHighlightStyle {
848 weight: FontWeight::BOLD,
849 ..Default::default()
850 }),
851 ),
852 ]
853 );
854 }
855
856 #[gpui::test]
857 async fn test_text_with_inline_html() {
858 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
859
860 assert_eq!(
861 parsed.children,
862 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
863 );
864 }
865
866 #[gpui::test]
867 async fn test_raw_links_detection() {
868 let parsed = parse("Checkout this https://zed.dev link").await;
869
870 assert_eq!(
871 parsed.children,
872 vec![p("Checkout this https://zed.dev link", 0..34)]
873 );
874
875 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
876 text
877 } else {
878 panic!("Expected a paragraph");
879 };
880 assert_eq!(
881 paragraph.highlights,
882 vec![(
883 14..29,
884 MarkdownHighlight::Style(MarkdownHighlightStyle {
885 underline: true,
886 ..Default::default()
887 }),
888 )]
889 );
890 assert_eq!(
891 paragraph.regions,
892 vec![ParsedRegion {
893 code: false,
894 link: Some(Link::Web {
895 url: "https://zed.dev".to_string()
896 }),
897 }]
898 );
899 assert_eq!(paragraph.region_ranges, vec![14..29]);
900 }
901
902 #[gpui::test]
903 async fn test_header_only_table() {
904 let markdown = "\
905| Header 1 | Header 2 |
906|----------|----------|
907
908Some other content
909";
910
911 let expected_table = table(
912 0..48,
913 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
914 vec![],
915 );
916
917 assert_eq!(
918 parse(markdown).await.children[0],
919 ParsedMarkdownElement::Table(expected_table)
920 );
921 }
922
923 #[gpui::test]
924 async fn test_basic_table() {
925 let markdown = "\
926| Header 1 | Header 2 |
927|----------|----------|
928| Cell 1 | Cell 2 |
929| Cell 3 | Cell 4 |";
930
931 let expected_table = table(
932 0..95,
933 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
934 vec![
935 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
936 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
937 ],
938 );
939
940 assert_eq!(
941 parse(markdown).await.children[0],
942 ParsedMarkdownElement::Table(expected_table)
943 );
944 }
945
946 #[gpui::test]
947 async fn test_list_basic() {
948 let parsed = parse(
949 "\
950* Item 1
951* Item 2
952* Item 3
953",
954 )
955 .await;
956
957 assert_eq!(
958 parsed.children,
959 vec![
960 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
961 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
962 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
963 ],
964 );
965 }
966
967 #[gpui::test]
968 async fn test_list_with_tasks() {
969 let parsed = parse(
970 "\
971- [ ] TODO
972- [x] Checked
973",
974 )
975 .await;
976
977 assert_eq!(
978 parsed.children,
979 vec![
980 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
981 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
982 ],
983 );
984 }
985
986 #[gpui::test]
987 async fn test_list_with_indented_task() {
988 let parsed = parse(
989 "\
990- [ ] TODO
991 - [x] Checked
992 - Unordered
993 1. Number 1
994 1. Number 2
9951. Number A
996",
997 )
998 .await;
999
1000 assert_eq!(
1001 parsed.children,
1002 vec![
1003 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1004 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1005 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1006 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1007 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1008 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1009 ],
1010 );
1011 }
1012
1013 #[gpui::test]
1014 async fn test_list_with_linebreak_is_handled_correctly() {
1015 let parsed = parse(
1016 "\
1017- [ ] Task 1
1018
1019- [x] Task 2
1020",
1021 )
1022 .await;
1023
1024 assert_eq!(
1025 parsed.children,
1026 vec![
1027 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1028 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1029 ],
1030 );
1031 }
1032
1033 #[gpui::test]
1034 async fn test_list_nested() {
1035 let parsed = parse(
1036 "\
1037* Item 1
1038* Item 2
1039* Item 3
1040
10411. Hello
10421. Two
1043 1. Three
10442. Four
10453. Five
1046
1047* First
1048 1. Hello
1049 1. Goodbyte
1050 - Inner
1051 - Inner
1052 2. Goodbyte
1053 - Next item empty
1054 -
1055* Last
1056",
1057 )
1058 .await;
1059
1060 assert_eq!(
1061 parsed.children,
1062 vec![
1063 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1064 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1065 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1066 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1067 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1068 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1069 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1070 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1071 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1072 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1073 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1074 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1075 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1076 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1077 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1078 list_item(186..190, 3, Unordered, vec![]),
1079 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1080 ]
1081 );
1082 }
1083
1084 #[gpui::test]
1085 async fn test_list_with_nested_content() {
1086 let parsed = parse(
1087 "\
1088* This is a list item with two paragraphs.
1089
1090 This is the second paragraph in the list item.
1091",
1092 )
1093 .await;
1094
1095 assert_eq!(
1096 parsed.children,
1097 vec![list_item(
1098 0..96,
1099 1,
1100 Unordered,
1101 vec![
1102 p("This is a list item with two paragraphs.", 4..44),
1103 p("This is the second paragraph in the list item.", 50..97)
1104 ],
1105 ),],
1106 );
1107 }
1108
1109 #[gpui::test]
1110 async fn test_list_item_with_inline_html() {
1111 let parsed = parse(
1112 "\
1113* This is a list item with an inline HTML <sometag>tag</sometag>.
1114",
1115 )
1116 .await;
1117
1118 assert_eq!(
1119 parsed.children,
1120 vec![list_item(
1121 0..67,
1122 1,
1123 Unordered,
1124 vec![p("This is a list item with an inline HTML tag.", 4..44),],
1125 ),],
1126 );
1127 }
1128
1129 #[gpui::test]
1130 async fn test_nested_list_with_paragraph_inside() {
1131 let parsed = parse(
1132 "\
11331. a
1134 1. b
1135 1. c
1136
1137 text
1138
1139 1. d
1140",
1141 )
1142 .await;
1143
1144 assert_eq!(
1145 parsed.children,
1146 vec![
1147 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1148 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1149 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1150 p("text", 32..37),
1151 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1152 ],
1153 );
1154 }
1155
1156 #[gpui::test]
1157 async fn test_list_with_leading_text() {
1158 let parsed = parse(
1159 "\
1160* `code`
1161* **bold**
1162* [link](https://example.com)
1163",
1164 )
1165 .await;
1166
1167 assert_eq!(
1168 parsed.children,
1169 vec![
1170 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1171 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1172 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],)
1173 ],
1174 );
1175 }
1176
1177 #[gpui::test]
1178 async fn test_simple_block_quote() {
1179 let parsed = parse("> Simple block quote with **styled text**").await;
1180
1181 assert_eq!(
1182 parsed.children,
1183 vec![block_quote(
1184 vec![p("Simple block quote with styled text", 2..41)],
1185 0..41
1186 )]
1187 );
1188 }
1189
1190 #[gpui::test]
1191 async fn test_simple_block_quote_with_multiple_lines() {
1192 let parsed = parse(
1193 "\
1194> # Heading
1195> More
1196> text
1197>
1198> More text
1199",
1200 )
1201 .await;
1202
1203 assert_eq!(
1204 parsed.children,
1205 vec![block_quote(
1206 vec![
1207 h1(text("Heading", 4..11), 2..12),
1208 p("More text", 14..26),
1209 p("More text", 30..40)
1210 ],
1211 0..40
1212 )]
1213 );
1214 }
1215
1216 #[gpui::test]
1217 async fn test_nested_block_quote() {
1218 let parsed = parse(
1219 "\
1220> A
1221>
1222> > # B
1223>
1224> C
1225
1226More text
1227",
1228 )
1229 .await;
1230
1231 assert_eq!(
1232 parsed.children,
1233 vec![
1234 block_quote(
1235 vec![
1236 p("A", 2..4),
1237 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1238 p("C", 18..20)
1239 ],
1240 0..20
1241 ),
1242 p("More text", 21..31)
1243 ]
1244 );
1245 }
1246
1247 #[gpui::test]
1248 async fn test_code_block() {
1249 let parsed = parse(
1250 "\
1251```
1252fn main() {
1253 return 0;
1254}
1255```
1256",
1257 )
1258 .await;
1259
1260 assert_eq!(
1261 parsed.children,
1262 vec![code_block(
1263 None,
1264 "fn main() {\n return 0;\n}",
1265 0..35,
1266 None
1267 )]
1268 );
1269 }
1270
1271 #[gpui::test]
1272 async fn test_code_block_with_language(executor: BackgroundExecutor) {
1273 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1274 language_registry.add(rust_lang());
1275
1276 let parsed = parse_markdown(
1277 "\
1278```rust
1279fn main() {
1280 return 0;
1281}
1282```
1283",
1284 None,
1285 Some(language_registry),
1286 )
1287 .await;
1288
1289 assert_eq!(
1290 parsed.children,
1291 vec![code_block(
1292 Some("rust".to_string()),
1293 "fn main() {\n return 0;\n}",
1294 0..39,
1295 Some(vec![])
1296 )]
1297 );
1298 }
1299
1300 fn rust_lang() -> Arc<Language> {
1301 Arc::new(Language::new(
1302 LanguageConfig {
1303 name: "Rust".into(),
1304 matcher: LanguageMatcher {
1305 path_suffixes: vec!["rs".into()],
1306 ..Default::default()
1307 },
1308 collapsed_placeholder: " /* ... */ ".to_string(),
1309 ..Default::default()
1310 },
1311 Some(tree_sitter_rust::LANGUAGE.into()),
1312 ))
1313 }
1314
1315 fn h1(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1316 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1317 source_range,
1318 level: HeadingLevel::H1,
1319 contents,
1320 })
1321 }
1322
1323 fn h2(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1324 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1325 source_range,
1326 level: HeadingLevel::H2,
1327 contents,
1328 })
1329 }
1330
1331 fn h3(contents: ParsedMarkdownText, source_range: Range<usize>) -> ParsedMarkdownElement {
1332 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1333 source_range,
1334 level: HeadingLevel::H3,
1335 contents,
1336 })
1337 }
1338
1339 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1340 ParsedMarkdownElement::Paragraph(text(contents, source_range))
1341 }
1342
1343 fn text(contents: &str, source_range: Range<usize>) -> ParsedMarkdownText {
1344 ParsedMarkdownText {
1345 highlights: Vec::new(),
1346 region_ranges: Vec::new(),
1347 regions: Vec::new(),
1348 source_range,
1349 contents: contents.to_string(),
1350 }
1351 }
1352
1353 fn block_quote(
1354 children: Vec<ParsedMarkdownElement>,
1355 source_range: Range<usize>,
1356 ) -> ParsedMarkdownElement {
1357 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1358 source_range,
1359 children,
1360 })
1361 }
1362
1363 fn code_block(
1364 language: Option<String>,
1365 code: &str,
1366 source_range: Range<usize>,
1367 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1368 ) -> ParsedMarkdownElement {
1369 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1370 source_range,
1371 language,
1372 contents: code.to_string().into(),
1373 highlights,
1374 })
1375 }
1376
1377 fn list_item(
1378 source_range: Range<usize>,
1379 depth: u16,
1380 item_type: ParsedMarkdownListItemType,
1381 content: Vec<ParsedMarkdownElement>,
1382 ) -> ParsedMarkdownElement {
1383 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1384 source_range,
1385 item_type,
1386 depth,
1387 content,
1388 })
1389 }
1390
1391 fn table(
1392 source_range: Range<usize>,
1393 header: ParsedMarkdownTableRow,
1394 body: Vec<ParsedMarkdownTableRow>,
1395 ) -> ParsedMarkdownTable {
1396 ParsedMarkdownTable {
1397 column_alignments: Vec::new(),
1398 source_range,
1399 header,
1400 body,
1401 }
1402 }
1403
1404 fn row(children: Vec<ParsedMarkdownText>) -> ParsedMarkdownTableRow {
1405 ParsedMarkdownTableRow { children }
1406 }
1407
1408 impl PartialEq for ParsedMarkdownTable {
1409 fn eq(&self, other: &Self) -> bool {
1410 self.source_range == other.source_range
1411 && self.header == other.header
1412 && self.body == other.body
1413 }
1414 }
1415
1416 impl PartialEq for ParsedMarkdownText {
1417 fn eq(&self, other: &Self) -> bool {
1418 self.source_range == other.source_range && self.contents == other.contents
1419 }
1420 }
1421}