1use crate::markdown_elements::*;
2use async_recursion::async_recursion;
3use collections::FxHashMap;
4use gpui::FontWeight;
5use language::LanguageRegistry;
6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
7use std::{ops::Range, path::PathBuf, sync::Arc, vec};
8
9pub async fn parse_markdown(
10 markdown_input: &str,
11 file_location_directory: Option<PathBuf>,
12 language_registry: Option<Arc<LanguageRegistry>>,
13) -> ParsedMarkdown {
14 let mut options = Options::all();
15 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
16
17 let parser = Parser::new_ext(markdown_input, options);
18 let parser = MarkdownParser::new(
19 parser.into_offset_iter().collect(),
20 file_location_directory,
21 language_registry,
22 );
23 let renderer = parser.parse_document().await;
24 ParsedMarkdown {
25 children: renderer.parsed,
26 }
27}
28
29struct MarkdownParser<'a> {
30 tokens: Vec<(Event<'a>, Range<usize>)>,
31 /// The current index in the tokens array
32 cursor: usize,
33 /// The blocks that we have successfully parsed so far
34 parsed: Vec<ParsedMarkdownElement>,
35 file_location_directory: Option<PathBuf>,
36 language_registry: Option<Arc<LanguageRegistry>>,
37}
38
39struct MarkdownListItem {
40 content: Vec<ParsedMarkdownElement>,
41 item_type: ParsedMarkdownListItemType,
42}
43
44impl Default for MarkdownListItem {
45 fn default() -> Self {
46 Self {
47 content: Vec::new(),
48 item_type: ParsedMarkdownListItemType::Unordered,
49 }
50 }
51}
52
53impl<'a> MarkdownParser<'a> {
54 fn new(
55 tokens: Vec<(Event<'a>, Range<usize>)>,
56 file_location_directory: Option<PathBuf>,
57 language_registry: Option<Arc<LanguageRegistry>>,
58 ) -> Self {
59 Self {
60 tokens,
61 file_location_directory,
62 language_registry,
63 cursor: 0,
64 parsed: vec![],
65 }
66 }
67
68 fn eof(&self) -> bool {
69 if self.tokens.is_empty() {
70 return true;
71 }
72 self.cursor >= self.tokens.len() - 1
73 }
74
75 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
76 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
77 return self.tokens.last();
78 }
79 return self.tokens.get(self.cursor + steps);
80 }
81
82 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
83 if self.cursor == 0 || self.cursor > self.tokens.len() {
84 return None;
85 }
86 return self.tokens.get(self.cursor - 1);
87 }
88
89 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
90 return self.peek(0);
91 }
92
93 fn current_event(&self) -> Option<&Event<'_>> {
94 return self.current().map(|(event, _)| event);
95 }
96
97 fn is_text_like(event: &Event) -> bool {
98 match event {
99 Event::Text(_)
100 // Represent an inline code block
101 | Event::Code(_)
102 | Event::Html(_)
103 | Event::InlineHtml(_)
104 | Event::FootnoteReference(_)
105 | Event::Start(Tag::Link { .. })
106 | Event::Start(Tag::Emphasis)
107 | Event::Start(Tag::Strong)
108 | Event::Start(Tag::Strikethrough)
109 | Event::Start(Tag::Image { .. }) => {
110 true
111 }
112 _ => false,
113 }
114 }
115
116 async fn parse_document(mut self) -> Self {
117 while !self.eof() {
118 if let Some(block) = self.parse_block().await {
119 self.parsed.extend(block);
120 } else {
121 self.cursor += 1;
122 }
123 }
124 self
125 }
126
127 #[async_recursion]
128 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
129 let (current, source_range) = self.current().unwrap();
130 let source_range = source_range.clone();
131 match current {
132 Event::Start(tag) => match tag {
133 Tag::Paragraph => {
134 self.cursor += 1;
135 let text = self.parse_text(false, Some(source_range));
136 Some(vec![ParsedMarkdownElement::Paragraph(text)])
137 }
138 Tag::Heading { level, .. } => {
139 let level = *level;
140 self.cursor += 1;
141 let heading = self.parse_heading(level);
142 Some(vec![ParsedMarkdownElement::Heading(heading)])
143 }
144 Tag::Table(alignment) => {
145 let alignment = alignment.clone();
146 self.cursor += 1;
147 let table = self.parse_table(alignment);
148 Some(vec![ParsedMarkdownElement::Table(table)])
149 }
150 Tag::List(order) => {
151 let order = *order;
152 self.cursor += 1;
153 let list = self.parse_list(order).await;
154 Some(list)
155 }
156 Tag::BlockQuote(_kind) => {
157 self.cursor += 1;
158 let block_quote = self.parse_block_quote().await;
159 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
160 }
161 Tag::CodeBlock(kind) => {
162 let language = match kind {
163 pulldown_cmark::CodeBlockKind::Indented => None,
164 pulldown_cmark::CodeBlockKind::Fenced(language) => {
165 if language.is_empty() {
166 None
167 } else {
168 Some(language.to_string())
169 }
170 }
171 };
172
173 self.cursor += 1;
174
175 let code_block = self.parse_code_block(language).await;
176 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
177 }
178 _ => None,
179 },
180 Event::Rule => {
181 let source_range = source_range.clone();
182 self.cursor += 1;
183 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
184 }
185 _ => None,
186 }
187 }
188
189 fn parse_text(
190 &mut self,
191 should_complete_on_soft_break: bool,
192 source_range: Option<Range<usize>>,
193 ) -> MarkdownParagraph {
194 let source_range = source_range.unwrap_or_else(|| {
195 self.current()
196 .map(|(_, range)| range.clone())
197 .unwrap_or_default()
198 });
199
200 let mut markdown_text_like = Vec::new();
201 let mut text = String::new();
202 let mut bold_depth = 0;
203 let mut italic_depth = 0;
204 let mut strikethrough_depth = 0;
205 let mut link: Option<Link> = None;
206 let mut image: Option<Image> = None;
207 let mut region_ranges: Vec<Range<usize>> = vec![];
208 let mut regions: Vec<ParsedRegion> = vec![];
209 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
210 let mut link_urls: Vec<String> = vec![];
211 let mut link_ranges: Vec<Range<usize>> = vec![];
212
213 loop {
214 if self.eof() {
215 break;
216 }
217
218 let (current, _) = self.current().unwrap();
219 let prev_len = text.len();
220 match current {
221 Event::SoftBreak => {
222 if should_complete_on_soft_break {
223 break;
224 }
225 text.push(' ');
226 }
227
228 Event::HardBreak => {
229 text.push('\n');
230 }
231
232 // We want to ignore any inline HTML tags in the text but keep
233 // the text between them
234 Event::InlineHtml(_) => {}
235
236 Event::Text(t) => {
237 text.push_str(t.as_ref());
238 let mut style = MarkdownHighlightStyle::default();
239
240 if bold_depth > 0 {
241 style.weight = FontWeight::BOLD;
242 }
243
244 if italic_depth > 0 {
245 style.italic = true;
246 }
247
248 if strikethrough_depth > 0 {
249 style.strikethrough = true;
250 }
251
252 let last_run_len = if let Some(link) = link.clone() {
253 region_ranges.push(prev_len..text.len());
254 regions.push(ParsedRegion {
255 code: false,
256 link: Some(link),
257 });
258 style.underline = true;
259 prev_len
260 } else {
261 // Manually scan for links
262 let mut finder = linkify::LinkFinder::new();
263 finder.kinds(&[linkify::LinkKind::Url]);
264 let mut last_link_len = prev_len;
265 for link in finder.links(t) {
266 let start = link.start();
267 let end = link.end();
268 let range = (prev_len + start)..(prev_len + end);
269 link_ranges.push(range.clone());
270 link_urls.push(link.as_str().to_string());
271
272 // If there is a style before we match a link, we have to add this to the highlighted ranges
273 if style != MarkdownHighlightStyle::default()
274 && last_link_len < link.start()
275 {
276 highlights.push((
277 last_link_len..link.start(),
278 MarkdownHighlight::Style(style.clone()),
279 ));
280 }
281
282 highlights.push((
283 range.clone(),
284 MarkdownHighlight::Style(MarkdownHighlightStyle {
285 underline: true,
286 ..style
287 }),
288 ));
289 region_ranges.push(range.clone());
290 regions.push(ParsedRegion {
291 code: false,
292 link: Some(Link::Web {
293 url: link.as_str().to_string(),
294 }),
295 });
296 last_link_len = end;
297 }
298 last_link_len
299 };
300
301 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
302 let mut new_highlight = true;
303 if let Some((last_range, last_style)) = highlights.last_mut() {
304 if last_range.end == last_run_len
305 && last_style == &MarkdownHighlight::Style(style.clone())
306 {
307 last_range.end = text.len();
308 new_highlight = false;
309 }
310 }
311 if new_highlight {
312 highlights.push((
313 last_run_len..text.len(),
314 MarkdownHighlight::Style(style.clone()),
315 ));
316 }
317 }
318 }
319 Event::Code(t) => {
320 text.push_str(t.as_ref());
321 region_ranges.push(prev_len..text.len());
322
323 if link.is_some() {
324 highlights.push((
325 prev_len..text.len(),
326 MarkdownHighlight::Style(MarkdownHighlightStyle {
327 underline: true,
328 ..Default::default()
329 }),
330 ));
331 }
332 regions.push(ParsedRegion {
333 code: true,
334 link: link.clone(),
335 });
336 }
337 Event::Start(tag) => match tag {
338 Tag::Emphasis => italic_depth += 1,
339 Tag::Strong => bold_depth += 1,
340 Tag::Strikethrough => strikethrough_depth += 1,
341 Tag::Link { dest_url, .. } => {
342 link = Link::identify(
343 self.file_location_directory.clone(),
344 dest_url.to_string(),
345 );
346 }
347 Tag::Image { dest_url, .. } => {
348 if !text.is_empty() {
349 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
350 source_range: source_range.clone(),
351 contents: text.clone(),
352 highlights: highlights.clone(),
353 region_ranges: region_ranges.clone(),
354 regions: regions.clone(),
355 });
356 text = String::new();
357 highlights = vec![];
358 region_ranges = vec![];
359 regions = vec![];
360 markdown_text_like.push(parsed_regions);
361 }
362 image = Image::identify(
363 dest_url.to_string(),
364 source_range.clone(),
365 self.file_location_directory.clone(),
366 );
367 }
368 _ => {
369 break;
370 }
371 },
372
373 Event::End(tag) => match tag {
374 TagEnd::Emphasis => italic_depth -= 1,
375 TagEnd::Strong => bold_depth -= 1,
376 TagEnd::Strikethrough => strikethrough_depth -= 1,
377 TagEnd::Link => {
378 link = None;
379 }
380 TagEnd::Image => {
381 if let Some(mut image) = image.take() {
382 if !text.is_empty() {
383 image.alt_text = Some(std::mem::take(&mut text).into());
384 }
385 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
386 }
387 }
388 TagEnd::Paragraph => {
389 self.cursor += 1;
390 break;
391 }
392 _ => {
393 break;
394 }
395 },
396 _ => {
397 break;
398 }
399 }
400
401 self.cursor += 1;
402 }
403 if !text.is_empty() {
404 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
405 source_range: source_range.clone(),
406 contents: text,
407 highlights,
408 regions,
409 region_ranges,
410 }));
411 }
412 markdown_text_like
413 }
414
415 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
416 let (_event, source_range) = self.previous().unwrap();
417 let source_range = source_range.clone();
418 let text = self.parse_text(true, None);
419
420 // Advance past the heading end tag
421 self.cursor += 1;
422
423 ParsedMarkdownHeading {
424 source_range: source_range.clone(),
425 level: match level {
426 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
427 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
428 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
429 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
430 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
431 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
432 },
433 contents: text,
434 }
435 }
436
437 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
438 let (_event, source_range) = self.previous().unwrap();
439 let source_range = source_range.clone();
440 let mut header = ParsedMarkdownTableRow::new();
441 let mut body = vec![];
442 let mut current_row = vec![];
443 let mut in_header = true;
444 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
445
446 loop {
447 if self.eof() {
448 break;
449 }
450
451 let (current, source_range) = self.current().unwrap();
452 let source_range = source_range.clone();
453 match current {
454 Event::Start(Tag::TableHead)
455 | Event::Start(Tag::TableRow)
456 | Event::End(TagEnd::TableCell) => {
457 self.cursor += 1;
458 }
459 Event::Start(Tag::TableCell) => {
460 self.cursor += 1;
461 let cell_contents = self.parse_text(false, Some(source_range));
462 current_row.push(cell_contents);
463 }
464 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
465 self.cursor += 1;
466 let new_row = std::mem::take(&mut current_row);
467 if in_header {
468 header.children = new_row;
469 in_header = false;
470 } else {
471 let row = ParsedMarkdownTableRow::with_children(new_row);
472 body.push(row);
473 }
474 }
475 Event::End(TagEnd::Table) => {
476 self.cursor += 1;
477 break;
478 }
479 _ => {
480 break;
481 }
482 }
483 }
484
485 ParsedMarkdownTable {
486 source_range,
487 header,
488 body,
489 column_alignments,
490 }
491 }
492
493 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
494 match alignment {
495 Alignment::None => ParsedMarkdownTableAlignment::None,
496 Alignment::Left => ParsedMarkdownTableAlignment::Left,
497 Alignment::Center => ParsedMarkdownTableAlignment::Center,
498 Alignment::Right => ParsedMarkdownTableAlignment::Right,
499 }
500 }
501
502 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
503 let (_, list_source_range) = self.previous().unwrap();
504
505 let mut items = Vec::new();
506 let mut items_stack = vec![MarkdownListItem::default()];
507 let mut depth = 1;
508 let mut order = order;
509 let mut order_stack = Vec::new();
510
511 let mut insertion_indices = FxHashMap::default();
512 let mut source_ranges = FxHashMap::default();
513 let mut start_item_range = list_source_range.clone();
514
515 while !self.eof() {
516 let (current, source_range) = self.current().unwrap();
517 match current {
518 Event::Start(Tag::List(new_order)) => {
519 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
520 insertion_indices.insert(depth, items.len());
521 }
522
523 // We will use the start of the nested list as the end for the current item's range,
524 // because we don't care about the hierarchy of list items
525 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
526 e.insert(start_item_range.start..source_range.start);
527 }
528
529 order_stack.push(order);
530 order = *new_order;
531 self.cursor += 1;
532 depth += 1;
533 }
534 Event::End(TagEnd::List(_)) => {
535 order = order_stack.pop().flatten();
536 self.cursor += 1;
537 depth -= 1;
538
539 if depth == 0 {
540 break;
541 }
542 }
543 Event::Start(Tag::Item) => {
544 start_item_range = source_range.clone();
545
546 self.cursor += 1;
547 items_stack.push(MarkdownListItem::default());
548
549 let mut task_list = None;
550 // Check for task list marker (`- [ ]` or `- [x]`)
551 if let Some(event) = self.current_event() {
552 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
553 if event == &Event::Start(Tag::Paragraph) {
554 self.cursor += 1;
555 }
556
557 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
558 task_list = Some((*checked, range.clone()));
559 self.cursor += 1;
560 }
561 }
562
563 if let Some((event, range)) = self.current() {
564 // This is a plain list item.
565 // For example `- some text` or `1. [Docs](./docs.md)`
566 if MarkdownParser::is_text_like(event) {
567 let text = self.parse_text(false, Some(range.clone()));
568 let block = ParsedMarkdownElement::Paragraph(text);
569 if let Some(content) = items_stack.last_mut() {
570 let item_type = if let Some((checked, range)) = task_list {
571 ParsedMarkdownListItemType::Task(checked, range)
572 } else if let Some(order) = order {
573 ParsedMarkdownListItemType::Ordered(order)
574 } else {
575 ParsedMarkdownListItemType::Unordered
576 };
577 content.item_type = item_type;
578 content.content.push(block);
579 }
580 } else {
581 let block = self.parse_block().await;
582 if let Some(block) = block {
583 if let Some(list_item) = items_stack.last_mut() {
584 list_item.content.extend(block);
585 }
586 }
587 }
588 }
589
590 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
591 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
592 self.cursor += 1;
593 }
594 }
595 Event::End(TagEnd::Item) => {
596 self.cursor += 1;
597
598 if let Some(current) = order {
599 order = Some(current + 1);
600 }
601
602 if let Some(list_item) = items_stack.pop() {
603 let source_range = source_ranges
604 .remove(&depth)
605 .unwrap_or(start_item_range.clone());
606
607 // We need to remove the last character of the source range, because it includes the newline character
608 let source_range = source_range.start..source_range.end - 1;
609 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
610 source_range,
611 content: list_item.content,
612 depth,
613 item_type: list_item.item_type,
614 });
615
616 if let Some(index) = insertion_indices.get(&depth) {
617 items.insert(*index, item);
618 insertion_indices.remove(&depth);
619 } else {
620 items.push(item);
621 }
622 }
623 }
624 _ => {
625 if depth == 0 {
626 break;
627 }
628 // This can only happen if a list item starts with more then one paragraph,
629 // or the list item contains blocks that should be rendered after the nested list items
630 let block = self.parse_block().await;
631 if let Some(block) = block {
632 if let Some(list_item) = items_stack.last_mut() {
633 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
634 if !insertion_indices.contains_key(&depth) {
635 list_item.content.extend(block);
636 continue;
637 }
638 }
639
640 // Otherwise we need to insert the block after all the nested items
641 // that have been parsed so far
642 items.extend(block);
643 } else {
644 self.cursor += 1;
645 }
646 }
647 }
648 }
649
650 items
651 }
652
653 #[async_recursion]
654 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
655 let (_event, source_range) = self.previous().unwrap();
656 let source_range = source_range.clone();
657 let mut nested_depth = 1;
658
659 let mut children: Vec<ParsedMarkdownElement> = vec![];
660
661 while !self.eof() {
662 let block = self.parse_block().await;
663
664 if let Some(block) = block {
665 children.extend(block);
666 } else {
667 break;
668 }
669
670 if self.eof() {
671 break;
672 }
673
674 let (current, _source_range) = self.current().unwrap();
675 match current {
676 // This is a nested block quote.
677 // Record that we're in a nested block quote and continue parsing.
678 // We don't need to advance the cursor since the next
679 // call to `parse_block` will handle it.
680 Event::Start(Tag::BlockQuote(_kind)) => {
681 nested_depth += 1;
682 }
683 Event::End(TagEnd::BlockQuote(_kind)) => {
684 nested_depth -= 1;
685 if nested_depth == 0 {
686 self.cursor += 1;
687 break;
688 }
689 }
690 _ => {}
691 };
692 }
693
694 ParsedMarkdownBlockQuote {
695 source_range,
696 children,
697 }
698 }
699
700 async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
701 let (_event, source_range) = self.previous().unwrap();
702 let source_range = source_range.clone();
703 let mut code = String::new();
704
705 while !self.eof() {
706 let (current, _source_range) = self.current().unwrap();
707 match current {
708 Event::Text(text) => {
709 code.push_str(text);
710 self.cursor += 1;
711 }
712 Event::End(TagEnd::CodeBlock) => {
713 self.cursor += 1;
714 break;
715 }
716 _ => {
717 break;
718 }
719 }
720 }
721
722 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
723
724 let highlights = if let Some(language) = &language {
725 if let Some(registry) = &self.language_registry {
726 let rope: language::Rope = code.as_str().into();
727 registry
728 .language_for_name_or_extension(language)
729 .await
730 .map(|l| l.highlight_text(&rope, 0..code.len()))
731 .ok()
732 } else {
733 None
734 }
735 } else {
736 None
737 };
738
739 ParsedMarkdownCodeBlock {
740 source_range,
741 contents: code.into(),
742 language,
743 highlights,
744 }
745 }
746}
747
748#[cfg(test)]
749mod tests {
750 use core::panic;
751
752 use super::*;
753
754 use ParsedMarkdownListItemType::*;
755 use gpui::BackgroundExecutor;
756 use language::{
757 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
758 };
759 use pretty_assertions::assert_eq;
760
761 async fn parse(input: &str) -> ParsedMarkdown {
762 parse_markdown(input, None, None).await
763 }
764
765 #[gpui::test]
766 async fn test_headings() {
767 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
768
769 assert_eq!(
770 parsed.children,
771 vec![
772 h1(text("Heading one", 2..13), 0..14),
773 h2(text("Heading two", 17..28), 14..29),
774 h3(text("Heading three", 33..46), 29..46),
775 ]
776 );
777 }
778
779 #[gpui::test]
780 async fn test_newlines_dont_new_paragraphs() {
781 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
782
783 assert_eq!(
784 parsed.children,
785 vec![p("Some text that is bolded and italicized", 0..46)]
786 );
787 }
788
789 #[gpui::test]
790 async fn test_heading_with_paragraph() {
791 let parsed = parse("# Zed\nThe editor").await;
792
793 assert_eq!(
794 parsed.children,
795 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
796 );
797 }
798
799 #[gpui::test]
800 async fn test_double_newlines_do_new_paragraphs() {
801 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
802
803 assert_eq!(
804 parsed.children,
805 vec![
806 p("Some text that is bolded", 0..29),
807 p("and italicized", 31..47),
808 ]
809 );
810 }
811
812 #[gpui::test]
813 async fn test_bold_italic_text() {
814 let parsed = parse("Some text **that is bolded** and *italicized*").await;
815
816 assert_eq!(
817 parsed.children,
818 vec![p("Some text that is bolded and italicized", 0..45)]
819 );
820 }
821
822 #[gpui::test]
823 async fn test_nested_bold_strikethrough_text() {
824 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
825
826 assert_eq!(parsed.children.len(), 1);
827 assert_eq!(
828 parsed.children[0],
829 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
830 ParsedMarkdownText {
831 source_range: 0..35,
832 contents: "Some bostrikethroughld text".to_string(),
833 highlights: Vec::new(),
834 region_ranges: Vec::new(),
835 regions: Vec::new(),
836 }
837 )])
838 );
839
840 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
841 text
842 } else {
843 panic!("Expected a paragraph");
844 };
845
846 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
847 text
848 } else {
849 panic!("Expected a text");
850 };
851
852 assert_eq!(
853 paragraph.highlights,
854 vec![
855 (
856 5..7,
857 MarkdownHighlight::Style(MarkdownHighlightStyle {
858 weight: FontWeight::BOLD,
859 ..Default::default()
860 }),
861 ),
862 (
863 7..20,
864 MarkdownHighlight::Style(MarkdownHighlightStyle {
865 weight: FontWeight::BOLD,
866 strikethrough: true,
867 ..Default::default()
868 }),
869 ),
870 (
871 20..22,
872 MarkdownHighlight::Style(MarkdownHighlightStyle {
873 weight: FontWeight::BOLD,
874 ..Default::default()
875 }),
876 ),
877 ]
878 );
879 }
880
881 #[gpui::test]
882 async fn test_text_with_inline_html() {
883 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
884
885 assert_eq!(
886 parsed.children,
887 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
888 );
889 }
890
891 #[gpui::test]
892 async fn test_raw_links_detection() {
893 let parsed = parse("Checkout this https://zed.dev link").await;
894
895 assert_eq!(
896 parsed.children,
897 vec![p("Checkout this https://zed.dev link", 0..34)]
898 );
899 }
900
901 #[gpui::test]
902 async fn test_empty_image() {
903 let parsed = parse("![]()").await;
904
905 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
906 text
907 } else {
908 panic!("Expected a paragraph");
909 };
910 assert_eq!(paragraph.len(), 0);
911 }
912
913 #[gpui::test]
914 async fn test_image_links_detection() {
915 let parsed = parse("").await;
916
917 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
918 text
919 } else {
920 panic!("Expected a paragraph");
921 };
922 assert_eq!(
923 paragraph[0],
924 MarkdownParagraphChunk::Image(Image {
925 source_range: 0..111,
926 link: Link::Web {
927 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
928 },
929 alt_text: Some("test".into()),
930 },)
931 );
932 }
933
934 #[gpui::test]
935 async fn test_image_without_alt_text() {
936 let parsed = parse("").await;
937
938 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
939 text
940 } else {
941 panic!("Expected a paragraph");
942 };
943 assert_eq!(
944 paragraph[0],
945 MarkdownParagraphChunk::Image(Image {
946 source_range: 0..31,
947 link: Link::Web {
948 url: "http://example.com/foo.png".to_string(),
949 },
950 alt_text: None,
951 },)
952 );
953 }
954
955 #[gpui::test]
956 async fn test_image_with_alt_text_containing_formatting() {
957 let parsed = parse("").await;
958
959 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
960 panic!("Expected a paragraph");
961 };
962 assert_eq!(
963 chunks,
964 &[MarkdownParagraphChunk::Image(Image {
965 source_range: 0..44,
966 link: Link::Web {
967 url: "http://example.com/foo.png".to_string(),
968 },
969 alt_text: Some("foo bar baz".into()),
970 }),],
971 );
972 }
973
974 #[gpui::test]
975 async fn test_images_with_text_in_between() {
976 let parsed = parse(
977 "\nLorem Ipsum\n",
978 )
979 .await;
980
981 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
982 text
983 } else {
984 panic!("Expected a paragraph");
985 };
986 assert_eq!(
987 chunks,
988 &vec![
989 MarkdownParagraphChunk::Image(Image {
990 source_range: 0..81,
991 link: Link::Web {
992 url: "http://example.com/foo.png".to_string(),
993 },
994 alt_text: Some("foo".into()),
995 }),
996 MarkdownParagraphChunk::Text(ParsedMarkdownText {
997 source_range: 0..81,
998 contents: " Lorem Ipsum ".to_string(),
999 highlights: Vec::new(),
1000 region_ranges: Vec::new(),
1001 regions: Vec::new(),
1002 }),
1003 MarkdownParagraphChunk::Image(Image {
1004 source_range: 0..81,
1005 link: Link::Web {
1006 url: "http://example.com/bar.png".to_string(),
1007 },
1008 alt_text: Some("bar".into()),
1009 })
1010 ]
1011 );
1012 }
1013
1014 #[gpui::test]
1015 async fn test_header_only_table() {
1016 let markdown = "\
1017| Header 1 | Header 2 |
1018|----------|----------|
1019
1020Some other content
1021";
1022
1023 let expected_table = table(
1024 0..48,
1025 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1026 vec![],
1027 );
1028
1029 assert_eq!(
1030 parse(markdown).await.children[0],
1031 ParsedMarkdownElement::Table(expected_table)
1032 );
1033 }
1034
1035 #[gpui::test]
1036 async fn test_basic_table() {
1037 let markdown = "\
1038| Header 1 | Header 2 |
1039|----------|----------|
1040| Cell 1 | Cell 2 |
1041| Cell 3 | Cell 4 |";
1042
1043 let expected_table = table(
1044 0..95,
1045 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1046 vec![
1047 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1048 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1049 ],
1050 );
1051
1052 assert_eq!(
1053 parse(markdown).await.children[0],
1054 ParsedMarkdownElement::Table(expected_table)
1055 );
1056 }
1057
1058 #[gpui::test]
1059 async fn test_list_basic() {
1060 let parsed = parse(
1061 "\
1062* Item 1
1063* Item 2
1064* Item 3
1065",
1066 )
1067 .await;
1068
1069 assert_eq!(
1070 parsed.children,
1071 vec![
1072 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1073 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1074 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1075 ],
1076 );
1077 }
1078
1079 #[gpui::test]
1080 async fn test_list_with_tasks() {
1081 let parsed = parse(
1082 "\
1083- [ ] TODO
1084- [x] Checked
1085",
1086 )
1087 .await;
1088
1089 assert_eq!(
1090 parsed.children,
1091 vec![
1092 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1093 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1094 ],
1095 );
1096 }
1097
1098 #[gpui::test]
1099 async fn test_list_with_indented_task() {
1100 let parsed = parse(
1101 "\
1102- [ ] TODO
1103 - [x] Checked
1104 - Unordered
1105 1. Number 1
1106 1. Number 2
11071. Number A
1108",
1109 )
1110 .await;
1111
1112 assert_eq!(
1113 parsed.children,
1114 vec![
1115 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1116 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1117 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1118 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1119 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1120 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1121 ],
1122 );
1123 }
1124
1125 #[gpui::test]
1126 async fn test_list_with_linebreak_is_handled_correctly() {
1127 let parsed = parse(
1128 "\
1129- [ ] Task 1
1130
1131- [x] Task 2
1132",
1133 )
1134 .await;
1135
1136 assert_eq!(
1137 parsed.children,
1138 vec![
1139 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1140 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1141 ],
1142 );
1143 }
1144
1145 #[gpui::test]
1146 async fn test_list_nested() {
1147 let parsed = parse(
1148 "\
1149* Item 1
1150* Item 2
1151* Item 3
1152
11531. Hello
11541. Two
1155 1. Three
11562. Four
11573. Five
1158
1159* First
1160 1. Hello
1161 1. Goodbyte
1162 - Inner
1163 - Inner
1164 2. Goodbyte
1165 - Next item empty
1166 -
1167* Last
1168",
1169 )
1170 .await;
1171
1172 assert_eq!(
1173 parsed.children,
1174 vec![
1175 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1176 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1177 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1178 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1179 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1180 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1181 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1182 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1183 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1184 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1185 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1186 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1187 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1188 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1189 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1190 list_item(186..190, 3, Unordered, vec![]),
1191 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1192 ]
1193 );
1194 }
1195
1196 #[gpui::test]
1197 async fn test_list_with_nested_content() {
1198 let parsed = parse(
1199 "\
1200* This is a list item with two paragraphs.
1201
1202 This is the second paragraph in the list item.
1203",
1204 )
1205 .await;
1206
1207 assert_eq!(
1208 parsed.children,
1209 vec![list_item(
1210 0..96,
1211 1,
1212 Unordered,
1213 vec![
1214 p("This is a list item with two paragraphs.", 4..44),
1215 p("This is the second paragraph in the list item.", 50..97)
1216 ],
1217 ),],
1218 );
1219 }
1220
1221 #[gpui::test]
1222 async fn test_list_item_with_inline_html() {
1223 let parsed = parse(
1224 "\
1225* This is a list item with an inline HTML <sometag>tag</sometag>.
1226",
1227 )
1228 .await;
1229
1230 assert_eq!(
1231 parsed.children,
1232 vec![list_item(
1233 0..67,
1234 1,
1235 Unordered,
1236 vec![p("This is a list item with an inline HTML tag.", 4..44),],
1237 ),],
1238 );
1239 }
1240
1241 #[gpui::test]
1242 async fn test_nested_list_with_paragraph_inside() {
1243 let parsed = parse(
1244 "\
12451. a
1246 1. b
1247 1. c
1248
1249 text
1250
1251 1. d
1252",
1253 )
1254 .await;
1255
1256 assert_eq!(
1257 parsed.children,
1258 vec![
1259 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1260 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1261 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1262 p("text", 32..37),
1263 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1264 ],
1265 );
1266 }
1267
1268 #[gpui::test]
1269 async fn test_list_with_leading_text() {
1270 let parsed = parse(
1271 "\
1272* `code`
1273* **bold**
1274* [link](https://example.com)
1275",
1276 )
1277 .await;
1278
1279 assert_eq!(
1280 parsed.children,
1281 vec![
1282 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1283 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1284 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1285 ],
1286 );
1287 }
1288
1289 #[gpui::test]
1290 async fn test_simple_block_quote() {
1291 let parsed = parse("> Simple block quote with **styled text**").await;
1292
1293 assert_eq!(
1294 parsed.children,
1295 vec![block_quote(
1296 vec![p("Simple block quote with styled text", 2..41)],
1297 0..41
1298 )]
1299 );
1300 }
1301
1302 #[gpui::test]
1303 async fn test_simple_block_quote_with_multiple_lines() {
1304 let parsed = parse(
1305 "\
1306> # Heading
1307> More
1308> text
1309>
1310> More text
1311",
1312 )
1313 .await;
1314
1315 assert_eq!(
1316 parsed.children,
1317 vec![block_quote(
1318 vec![
1319 h1(text("Heading", 4..11), 2..12),
1320 p("More text", 14..26),
1321 p("More text", 30..40)
1322 ],
1323 0..40
1324 )]
1325 );
1326 }
1327
1328 #[gpui::test]
1329 async fn test_nested_block_quote() {
1330 let parsed = parse(
1331 "\
1332> A
1333>
1334> > # B
1335>
1336> C
1337
1338More text
1339",
1340 )
1341 .await;
1342
1343 assert_eq!(
1344 parsed.children,
1345 vec![
1346 block_quote(
1347 vec![
1348 p("A", 2..4),
1349 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1350 p("C", 18..20)
1351 ],
1352 0..20
1353 ),
1354 p("More text", 21..31)
1355 ]
1356 );
1357 }
1358
1359 #[gpui::test]
1360 async fn test_code_block() {
1361 let parsed = parse(
1362 "\
1363```
1364fn main() {
1365 return 0;
1366}
1367```
1368",
1369 )
1370 .await;
1371
1372 assert_eq!(
1373 parsed.children,
1374 vec![code_block(
1375 None,
1376 "fn main() {\n return 0;\n}",
1377 0..35,
1378 None
1379 )]
1380 );
1381 }
1382
1383 #[gpui::test]
1384 async fn test_code_block_with_language(executor: BackgroundExecutor) {
1385 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1386 language_registry.add(rust_lang());
1387
1388 let parsed = parse_markdown(
1389 "\
1390```rust
1391fn main() {
1392 return 0;
1393}
1394```
1395",
1396 None,
1397 Some(language_registry),
1398 )
1399 .await;
1400
1401 assert_eq!(
1402 parsed.children,
1403 vec![code_block(
1404 Some("rust".to_string()),
1405 "fn main() {\n return 0;\n}",
1406 0..39,
1407 Some(vec![])
1408 )]
1409 );
1410 }
1411
1412 fn rust_lang() -> Arc<Language> {
1413 Arc::new(Language::new(
1414 LanguageConfig {
1415 name: "Rust".into(),
1416 matcher: LanguageMatcher {
1417 path_suffixes: vec!["rs".into()],
1418 ..Default::default()
1419 },
1420 collapsed_placeholder: " /* ... */ ".to_string(),
1421 ..Default::default()
1422 },
1423 Some(tree_sitter_rust::LANGUAGE.into()),
1424 ))
1425 }
1426
1427 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1428 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1429 source_range,
1430 level: HeadingLevel::H1,
1431 contents,
1432 })
1433 }
1434
1435 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1436 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1437 source_range,
1438 level: HeadingLevel::H2,
1439 contents,
1440 })
1441 }
1442
1443 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1444 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1445 source_range,
1446 level: HeadingLevel::H3,
1447 contents,
1448 })
1449 }
1450
1451 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1452 ParsedMarkdownElement::Paragraph(text(contents, source_range))
1453 }
1454
1455 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1456 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1457 highlights: Vec::new(),
1458 region_ranges: Vec::new(),
1459 regions: Vec::new(),
1460 source_range,
1461 contents: contents.to_string(),
1462 })]
1463 }
1464
1465 fn block_quote(
1466 children: Vec<ParsedMarkdownElement>,
1467 source_range: Range<usize>,
1468 ) -> ParsedMarkdownElement {
1469 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1470 source_range,
1471 children,
1472 })
1473 }
1474
1475 fn code_block(
1476 language: Option<String>,
1477 code: &str,
1478 source_range: Range<usize>,
1479 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1480 ) -> ParsedMarkdownElement {
1481 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1482 source_range,
1483 language,
1484 contents: code.to_string().into(),
1485 highlights,
1486 })
1487 }
1488
1489 fn list_item(
1490 source_range: Range<usize>,
1491 depth: u16,
1492 item_type: ParsedMarkdownListItemType,
1493 content: Vec<ParsedMarkdownElement>,
1494 ) -> ParsedMarkdownElement {
1495 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1496 source_range,
1497 item_type,
1498 depth,
1499 content,
1500 })
1501 }
1502
1503 fn table(
1504 source_range: Range<usize>,
1505 header: ParsedMarkdownTableRow,
1506 body: Vec<ParsedMarkdownTableRow>,
1507 ) -> ParsedMarkdownTable {
1508 ParsedMarkdownTable {
1509 column_alignments: Vec::new(),
1510 source_range,
1511 header,
1512 body,
1513 }
1514 }
1515
1516 fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
1517 ParsedMarkdownTableRow { children }
1518 }
1519
1520 impl PartialEq for ParsedMarkdownTable {
1521 fn eq(&self, other: &Self) -> bool {
1522 self.source_range == other.source_range
1523 && self.header == other.header
1524 && self.body == other.body
1525 }
1526 }
1527
1528 impl PartialEq for ParsedMarkdownText {
1529 fn eq(&self, other: &Self) -> bool {
1530 self.source_range == other.source_range && self.contents == other.contents
1531 }
1532 }
1533}