1use crate::markdown_elements::*;
2use async_recursion::async_recursion;
3use collections::FxHashMap;
4use gpui::FontWeight;
5use language::LanguageRegistry;
6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
7use std::{ops::Range, path::PathBuf, sync::Arc, vec};
8
9pub async fn parse_markdown(
10 markdown_input: &str,
11 file_location_directory: Option<PathBuf>,
12 language_registry: Option<Arc<LanguageRegistry>>,
13) -> ParsedMarkdown {
14 let mut options = Options::all();
15 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
16
17 let parser = Parser::new_ext(markdown_input, options);
18 let parser = MarkdownParser::new(
19 parser.into_offset_iter().collect(),
20 file_location_directory,
21 language_registry,
22 );
23 let renderer = parser.parse_document().await;
24 ParsedMarkdown {
25 children: renderer.parsed,
26 }
27}
28
29struct MarkdownParser<'a> {
30 tokens: Vec<(Event<'a>, Range<usize>)>,
31 /// The current index in the tokens array
32 cursor: usize,
33 /// The blocks that we have successfully parsed so far
34 parsed: Vec<ParsedMarkdownElement>,
35 file_location_directory: Option<PathBuf>,
36 language_registry: Option<Arc<LanguageRegistry>>,
37}
38
39struct MarkdownListItem {
40 content: Vec<ParsedMarkdownElement>,
41 item_type: ParsedMarkdownListItemType,
42}
43
44impl Default for MarkdownListItem {
45 fn default() -> Self {
46 Self {
47 content: Vec::new(),
48 item_type: ParsedMarkdownListItemType::Unordered,
49 }
50 }
51}
52
53impl<'a> MarkdownParser<'a> {
54 fn new(
55 tokens: Vec<(Event<'a>, Range<usize>)>,
56 file_location_directory: Option<PathBuf>,
57 language_registry: Option<Arc<LanguageRegistry>>,
58 ) -> Self {
59 Self {
60 tokens,
61 file_location_directory,
62 language_registry,
63 cursor: 0,
64 parsed: vec![],
65 }
66 }
67
68 fn eof(&self) -> bool {
69 if self.tokens.is_empty() {
70 return true;
71 }
72 self.cursor >= self.tokens.len() - 1
73 }
74
75 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
76 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
77 return self.tokens.last();
78 }
79 self.tokens.get(self.cursor + steps)
80 }
81
82 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
83 if self.cursor == 0 || self.cursor > self.tokens.len() {
84 return None;
85 }
86 self.tokens.get(self.cursor - 1)
87 }
88
89 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
90 self.peek(0)
91 }
92
93 fn current_event(&self) -> Option<&Event<'_>> {
94 self.current().map(|(event, _)| event)
95 }
96
97 fn is_text_like(event: &Event) -> bool {
98 match event {
99 Event::Text(_)
100 // Represent an inline code block
101 | Event::Code(_)
102 | Event::Html(_)
103 | Event::InlineHtml(_)
104 | Event::FootnoteReference(_)
105 | Event::Start(Tag::Link { .. })
106 | Event::Start(Tag::Emphasis)
107 | Event::Start(Tag::Strong)
108 | Event::Start(Tag::Strikethrough)
109 | Event::Start(Tag::Image { .. }) => {
110 true
111 }
112 _ => false,
113 }
114 }
115
116 async fn parse_document(mut self) -> Self {
117 while !self.eof() {
118 if let Some(block) = self.parse_block().await {
119 self.parsed.extend(block);
120 } else {
121 self.cursor += 1;
122 }
123 }
124 self
125 }
126
127 #[async_recursion]
128 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
129 let (current, source_range) = self.current().unwrap();
130 let source_range = source_range.clone();
131 match current {
132 Event::Start(tag) => match tag {
133 Tag::Paragraph => {
134 self.cursor += 1;
135 let text = self.parse_text(false, Some(source_range));
136 Some(vec![ParsedMarkdownElement::Paragraph(text)])
137 }
138 Tag::Heading { level, .. } => {
139 let level = *level;
140 self.cursor += 1;
141 let heading = self.parse_heading(level);
142 Some(vec![ParsedMarkdownElement::Heading(heading)])
143 }
144 Tag::Table(alignment) => {
145 let alignment = alignment.clone();
146 self.cursor += 1;
147 let table = self.parse_table(alignment);
148 Some(vec![ParsedMarkdownElement::Table(table)])
149 }
150 Tag::List(order) => {
151 let order = *order;
152 self.cursor += 1;
153 let list = self.parse_list(order).await;
154 Some(list)
155 }
156 Tag::BlockQuote(_kind) => {
157 self.cursor += 1;
158 let block_quote = self.parse_block_quote().await;
159 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
160 }
161 Tag::CodeBlock(kind) => {
162 let language = match kind {
163 pulldown_cmark::CodeBlockKind::Indented => None,
164 pulldown_cmark::CodeBlockKind::Fenced(language) => {
165 if language.is_empty() {
166 None
167 } else {
168 Some(language.to_string())
169 }
170 }
171 };
172
173 self.cursor += 1;
174
175 let code_block = self.parse_code_block(language).await;
176 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
177 }
178 _ => None,
179 },
180 Event::Rule => {
181 self.cursor += 1;
182 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
183 }
184 _ => None,
185 }
186 }
187
188 fn parse_text(
189 &mut self,
190 should_complete_on_soft_break: bool,
191 source_range: Option<Range<usize>>,
192 ) -> MarkdownParagraph {
193 let source_range = source_range.unwrap_or_else(|| {
194 self.current()
195 .map(|(_, range)| range.clone())
196 .unwrap_or_default()
197 });
198
199 let mut markdown_text_like = Vec::new();
200 let mut text = String::new();
201 let mut bold_depth = 0;
202 let mut italic_depth = 0;
203 let mut strikethrough_depth = 0;
204 let mut link: Option<Link> = None;
205 let mut image: Option<Image> = None;
206 let mut region_ranges: Vec<Range<usize>> = vec![];
207 let mut regions: Vec<ParsedRegion> = vec![];
208 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
209 let mut link_urls: Vec<String> = vec![];
210 let mut link_ranges: Vec<Range<usize>> = vec![];
211
212 loop {
213 if self.eof() {
214 break;
215 }
216
217 let (current, _) = self.current().unwrap();
218 let prev_len = text.len();
219 match current {
220 Event::SoftBreak => {
221 if should_complete_on_soft_break {
222 break;
223 }
224 text.push(' ');
225 }
226
227 Event::HardBreak => {
228 text.push('\n');
229 }
230
231 // We want to ignore any inline HTML tags in the text but keep
232 // the text between them
233 Event::InlineHtml(_) => {}
234
235 Event::Text(t) => {
236 text.push_str(t.as_ref());
237 let mut style = MarkdownHighlightStyle::default();
238
239 if bold_depth > 0 {
240 style.weight = FontWeight::BOLD;
241 }
242
243 if italic_depth > 0 {
244 style.italic = true;
245 }
246
247 if strikethrough_depth > 0 {
248 style.strikethrough = true;
249 }
250
251 let last_run_len = if let Some(link) = link.clone() {
252 region_ranges.push(prev_len..text.len());
253 regions.push(ParsedRegion {
254 code: false,
255 link: Some(link),
256 });
257 style.underline = true;
258 prev_len
259 } else {
260 // Manually scan for links
261 let mut finder = linkify::LinkFinder::new();
262 finder.kinds(&[linkify::LinkKind::Url]);
263 let mut last_link_len = prev_len;
264 for link in finder.links(t) {
265 let start = link.start();
266 let end = link.end();
267 let range = (prev_len + start)..(prev_len + end);
268 link_ranges.push(range.clone());
269 link_urls.push(link.as_str().to_string());
270
271 // If there is a style before we match a link, we have to add this to the highlighted ranges
272 if style != MarkdownHighlightStyle::default()
273 && last_link_len < link.start()
274 {
275 highlights.push((
276 last_link_len..link.start(),
277 MarkdownHighlight::Style(style.clone()),
278 ));
279 }
280
281 highlights.push((
282 range.clone(),
283 MarkdownHighlight::Style(MarkdownHighlightStyle {
284 underline: true,
285 ..style
286 }),
287 ));
288 region_ranges.push(range.clone());
289 regions.push(ParsedRegion {
290 code: false,
291 link: Some(Link::Web {
292 url: link.as_str().to_string(),
293 }),
294 });
295 last_link_len = end;
296 }
297 last_link_len
298 };
299
300 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
301 let mut new_highlight = true;
302 if let Some((last_range, last_style)) = highlights.last_mut()
303 && last_range.end == last_run_len
304 && last_style == &MarkdownHighlight::Style(style.clone())
305 {
306 last_range.end = text.len();
307 new_highlight = false;
308 }
309 if new_highlight {
310 highlights.push((
311 last_run_len..text.len(),
312 MarkdownHighlight::Style(style.clone()),
313 ));
314 }
315 }
316 }
317 Event::Code(t) => {
318 text.push_str(t.as_ref());
319 region_ranges.push(prev_len..text.len());
320
321 if link.is_some() {
322 highlights.push((
323 prev_len..text.len(),
324 MarkdownHighlight::Style(MarkdownHighlightStyle {
325 underline: true,
326 ..Default::default()
327 }),
328 ));
329 }
330 regions.push(ParsedRegion {
331 code: true,
332 link: link.clone(),
333 });
334 }
335 Event::Start(tag) => match tag {
336 Tag::Emphasis => italic_depth += 1,
337 Tag::Strong => bold_depth += 1,
338 Tag::Strikethrough => strikethrough_depth += 1,
339 Tag::Link { dest_url, .. } => {
340 link = Link::identify(
341 self.file_location_directory.clone(),
342 dest_url.to_string(),
343 );
344 }
345 Tag::Image { dest_url, .. } => {
346 if !text.is_empty() {
347 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
348 source_range: source_range.clone(),
349 contents: text.clone(),
350 highlights: highlights.clone(),
351 region_ranges: region_ranges.clone(),
352 regions: regions.clone(),
353 });
354 text = String::new();
355 highlights = vec![];
356 region_ranges = vec![];
357 regions = vec![];
358 markdown_text_like.push(parsed_regions);
359 }
360 image = Image::identify(
361 dest_url.to_string(),
362 source_range.clone(),
363 self.file_location_directory.clone(),
364 );
365 }
366 _ => {
367 break;
368 }
369 },
370
371 Event::End(tag) => match tag {
372 TagEnd::Emphasis => italic_depth -= 1,
373 TagEnd::Strong => bold_depth -= 1,
374 TagEnd::Strikethrough => strikethrough_depth -= 1,
375 TagEnd::Link => {
376 link = None;
377 }
378 TagEnd::Image => {
379 if let Some(mut image) = image.take() {
380 if !text.is_empty() {
381 image.alt_text = Some(std::mem::take(&mut text).into());
382 }
383 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
384 }
385 }
386 TagEnd::Paragraph => {
387 self.cursor += 1;
388 break;
389 }
390 _ => {
391 break;
392 }
393 },
394 _ => {
395 break;
396 }
397 }
398
399 self.cursor += 1;
400 }
401 if !text.is_empty() {
402 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
403 source_range,
404 contents: text,
405 highlights,
406 regions,
407 region_ranges,
408 }));
409 }
410 markdown_text_like
411 }
412
413 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
414 let (_event, source_range) = self.previous().unwrap();
415 let source_range = source_range.clone();
416 let text = self.parse_text(true, None);
417
418 // Advance past the heading end tag
419 self.cursor += 1;
420
421 ParsedMarkdownHeading {
422 source_range,
423 level: match level {
424 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
425 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
426 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
427 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
428 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
429 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
430 },
431 contents: text,
432 }
433 }
434
435 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
436 let (_event, source_range) = self.previous().unwrap();
437 let source_range = source_range.clone();
438 let mut header = ParsedMarkdownTableRow::new();
439 let mut body = vec![];
440 let mut current_row = vec![];
441 let mut in_header = true;
442 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
443
444 loop {
445 if self.eof() {
446 break;
447 }
448
449 let (current, source_range) = self.current().unwrap();
450 let source_range = source_range.clone();
451 match current {
452 Event::Start(Tag::TableHead)
453 | Event::Start(Tag::TableRow)
454 | Event::End(TagEnd::TableCell) => {
455 self.cursor += 1;
456 }
457 Event::Start(Tag::TableCell) => {
458 self.cursor += 1;
459 let cell_contents = self.parse_text(false, Some(source_range));
460 current_row.push(cell_contents);
461 }
462 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
463 self.cursor += 1;
464 let new_row = std::mem::take(&mut current_row);
465 if in_header {
466 header.children = new_row;
467 in_header = false;
468 } else {
469 let row = ParsedMarkdownTableRow::with_children(new_row);
470 body.push(row);
471 }
472 }
473 Event::End(TagEnd::Table) => {
474 self.cursor += 1;
475 break;
476 }
477 _ => {
478 break;
479 }
480 }
481 }
482
483 ParsedMarkdownTable {
484 source_range,
485 header,
486 body,
487 column_alignments,
488 }
489 }
490
491 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
492 match alignment {
493 Alignment::None => ParsedMarkdownTableAlignment::None,
494 Alignment::Left => ParsedMarkdownTableAlignment::Left,
495 Alignment::Center => ParsedMarkdownTableAlignment::Center,
496 Alignment::Right => ParsedMarkdownTableAlignment::Right,
497 }
498 }
499
500 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
501 let (_, list_source_range) = self.previous().unwrap();
502
503 let mut items = Vec::new();
504 let mut items_stack = vec![MarkdownListItem::default()];
505 let mut depth = 1;
506 let mut order = order;
507 let mut order_stack = Vec::new();
508
509 let mut insertion_indices = FxHashMap::default();
510 let mut source_ranges = FxHashMap::default();
511 let mut start_item_range = list_source_range.clone();
512
513 while !self.eof() {
514 let (current, source_range) = self.current().unwrap();
515 match current {
516 Event::Start(Tag::List(new_order)) => {
517 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
518 insertion_indices.insert(depth, items.len());
519 }
520
521 // We will use the start of the nested list as the end for the current item's range,
522 // because we don't care about the hierarchy of list items
523 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
524 e.insert(start_item_range.start..source_range.start);
525 }
526
527 order_stack.push(order);
528 order = *new_order;
529 self.cursor += 1;
530 depth += 1;
531 }
532 Event::End(TagEnd::List(_)) => {
533 order = order_stack.pop().flatten();
534 self.cursor += 1;
535 depth -= 1;
536
537 if depth == 0 {
538 break;
539 }
540 }
541 Event::Start(Tag::Item) => {
542 start_item_range = source_range.clone();
543
544 self.cursor += 1;
545 items_stack.push(MarkdownListItem::default());
546
547 let mut task_list = None;
548 // Check for task list marker (`- [ ]` or `- [x]`)
549 if let Some(event) = self.current_event() {
550 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
551 if event == &Event::Start(Tag::Paragraph) {
552 self.cursor += 1;
553 }
554
555 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
556 task_list = Some((*checked, range.clone()));
557 self.cursor += 1;
558 }
559 }
560
561 if let Some((event, range)) = self.current() {
562 // This is a plain list item.
563 // For example `- some text` or `1. [Docs](./docs.md)`
564 if MarkdownParser::is_text_like(event) {
565 let text = self.parse_text(false, Some(range.clone()));
566 let block = ParsedMarkdownElement::Paragraph(text);
567 if let Some(content) = items_stack.last_mut() {
568 let item_type = if let Some((checked, range)) = task_list {
569 ParsedMarkdownListItemType::Task(checked, range)
570 } else if let Some(order) = order {
571 ParsedMarkdownListItemType::Ordered(order)
572 } else {
573 ParsedMarkdownListItemType::Unordered
574 };
575 content.item_type = item_type;
576 content.content.push(block);
577 }
578 } else {
579 let block = self.parse_block().await;
580 if let Some(block) = block
581 && let Some(list_item) = items_stack.last_mut()
582 {
583 list_item.content.extend(block);
584 }
585 }
586 }
587
588 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
589 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
590 self.cursor += 1;
591 }
592 }
593 Event::End(TagEnd::Item) => {
594 self.cursor += 1;
595
596 if let Some(current) = order {
597 order = Some(current + 1);
598 }
599
600 if let Some(list_item) = items_stack.pop() {
601 let source_range = source_ranges
602 .remove(&depth)
603 .unwrap_or(start_item_range.clone());
604
605 // We need to remove the last character of the source range, because it includes the newline character
606 let source_range = source_range.start..source_range.end - 1;
607 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
608 source_range,
609 content: list_item.content,
610 depth,
611 item_type: list_item.item_type,
612 });
613
614 if let Some(index) = insertion_indices.get(&depth) {
615 items.insert(*index, item);
616 insertion_indices.remove(&depth);
617 } else {
618 items.push(item);
619 }
620 }
621 }
622 _ => {
623 if depth == 0 {
624 break;
625 }
626 // This can only happen if a list item starts with more then one paragraph,
627 // or the list item contains blocks that should be rendered after the nested list items
628 let block = self.parse_block().await;
629 if let Some(block) = block {
630 if let Some(list_item) = items_stack.last_mut() {
631 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
632 if !insertion_indices.contains_key(&depth) {
633 list_item.content.extend(block);
634 continue;
635 }
636 }
637
638 // Otherwise we need to insert the block after all the nested items
639 // that have been parsed so far
640 items.extend(block);
641 } else {
642 self.cursor += 1;
643 }
644 }
645 }
646 }
647
648 items
649 }
650
651 #[async_recursion]
652 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
653 let (_event, source_range) = self.previous().unwrap();
654 let source_range = source_range.clone();
655 let mut nested_depth = 1;
656
657 let mut children: Vec<ParsedMarkdownElement> = vec![];
658
659 while !self.eof() {
660 let block = self.parse_block().await;
661
662 if let Some(block) = block {
663 children.extend(block);
664 } else {
665 break;
666 }
667
668 if self.eof() {
669 break;
670 }
671
672 let (current, _source_range) = self.current().unwrap();
673 match current {
674 // This is a nested block quote.
675 // Record that we're in a nested block quote and continue parsing.
676 // We don't need to advance the cursor since the next
677 // call to `parse_block` will handle it.
678 Event::Start(Tag::BlockQuote(_kind)) => {
679 nested_depth += 1;
680 }
681 Event::End(TagEnd::BlockQuote(_kind)) => {
682 nested_depth -= 1;
683 if nested_depth == 0 {
684 self.cursor += 1;
685 break;
686 }
687 }
688 _ => {}
689 };
690 }
691
692 ParsedMarkdownBlockQuote {
693 source_range,
694 children,
695 }
696 }
697
698 async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
699 let (_event, source_range) = self.previous().unwrap();
700 let source_range = source_range.clone();
701 let mut code = String::new();
702
703 while !self.eof() {
704 let (current, _source_range) = self.current().unwrap();
705 match current {
706 Event::Text(text) => {
707 code.push_str(text);
708 self.cursor += 1;
709 }
710 Event::End(TagEnd::CodeBlock) => {
711 self.cursor += 1;
712 break;
713 }
714 _ => {
715 break;
716 }
717 }
718 }
719
720 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
721
722 let highlights = if let Some(language) = &language {
723 if let Some(registry) = &self.language_registry {
724 let rope: language::Rope = code.as_str().into();
725 registry
726 .language_for_name_or_extension(language)
727 .await
728 .map(|l| l.highlight_text(&rope, 0..code.len()))
729 .ok()
730 } else {
731 None
732 }
733 } else {
734 None
735 };
736
737 ParsedMarkdownCodeBlock {
738 source_range,
739 contents: code.into(),
740 language,
741 highlights,
742 }
743 }
744}
745
746#[cfg(test)]
747mod tests {
748 use core::panic;
749
750 use super::*;
751
752 use ParsedMarkdownListItemType::*;
753 use gpui::BackgroundExecutor;
754 use language::{
755 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
756 };
757 use pretty_assertions::assert_eq;
758
759 async fn parse(input: &str) -> ParsedMarkdown {
760 parse_markdown(input, None, None).await
761 }
762
763 #[gpui::test]
764 async fn test_headings() {
765 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
766
767 assert_eq!(
768 parsed.children,
769 vec![
770 h1(text("Heading one", 2..13), 0..14),
771 h2(text("Heading two", 17..28), 14..29),
772 h3(text("Heading three", 33..46), 29..46),
773 ]
774 );
775 }
776
777 #[gpui::test]
778 async fn test_newlines_dont_new_paragraphs() {
779 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
780
781 assert_eq!(
782 parsed.children,
783 vec![p("Some text that is bolded and italicized", 0..46)]
784 );
785 }
786
787 #[gpui::test]
788 async fn test_heading_with_paragraph() {
789 let parsed = parse("# Zed\nThe editor").await;
790
791 assert_eq!(
792 parsed.children,
793 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
794 );
795 }
796
797 #[gpui::test]
798 async fn test_double_newlines_do_new_paragraphs() {
799 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
800
801 assert_eq!(
802 parsed.children,
803 vec![
804 p("Some text that is bolded", 0..29),
805 p("and italicized", 31..47),
806 ]
807 );
808 }
809
810 #[gpui::test]
811 async fn test_bold_italic_text() {
812 let parsed = parse("Some text **that is bolded** and *italicized*").await;
813
814 assert_eq!(
815 parsed.children,
816 vec![p("Some text that is bolded and italicized", 0..45)]
817 );
818 }
819
820 #[gpui::test]
821 async fn test_nested_bold_strikethrough_text() {
822 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
823
824 assert_eq!(parsed.children.len(), 1);
825 assert_eq!(
826 parsed.children[0],
827 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
828 ParsedMarkdownText {
829 source_range: 0..35,
830 contents: "Some bostrikethroughld text".to_string(),
831 highlights: Vec::new(),
832 region_ranges: Vec::new(),
833 regions: Vec::new(),
834 }
835 )])
836 );
837
838 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
839 text
840 } else {
841 panic!("Expected a paragraph");
842 };
843
844 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
845 text
846 } else {
847 panic!("Expected a text");
848 };
849
850 assert_eq!(
851 paragraph.highlights,
852 vec![
853 (
854 5..7,
855 MarkdownHighlight::Style(MarkdownHighlightStyle {
856 weight: FontWeight::BOLD,
857 ..Default::default()
858 }),
859 ),
860 (
861 7..20,
862 MarkdownHighlight::Style(MarkdownHighlightStyle {
863 weight: FontWeight::BOLD,
864 strikethrough: true,
865 ..Default::default()
866 }),
867 ),
868 (
869 20..22,
870 MarkdownHighlight::Style(MarkdownHighlightStyle {
871 weight: FontWeight::BOLD,
872 ..Default::default()
873 }),
874 ),
875 ]
876 );
877 }
878
879 #[gpui::test]
880 async fn test_text_with_inline_html() {
881 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
882
883 assert_eq!(
884 parsed.children,
885 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
886 );
887 }
888
889 #[gpui::test]
890 async fn test_raw_links_detection() {
891 let parsed = parse("Checkout this https://zed.dev link").await;
892
893 assert_eq!(
894 parsed.children,
895 vec![p("Checkout this https://zed.dev link", 0..34)]
896 );
897 }
898
899 #[gpui::test]
900 async fn test_empty_image() {
901 let parsed = parse("![]()").await;
902
903 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
904 text
905 } else {
906 panic!("Expected a paragraph");
907 };
908 assert_eq!(paragraph.len(), 0);
909 }
910
911 #[gpui::test]
912 async fn test_image_links_detection() {
913 let parsed = parse("").await;
914
915 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
916 text
917 } else {
918 panic!("Expected a paragraph");
919 };
920 assert_eq!(
921 paragraph[0],
922 MarkdownParagraphChunk::Image(Image {
923 source_range: 0..111,
924 link: Link::Web {
925 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
926 },
927 alt_text: Some("test".into()),
928 },)
929 );
930 }
931
932 #[gpui::test]
933 async fn test_image_without_alt_text() {
934 let parsed = parse("").await;
935
936 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
937 text
938 } else {
939 panic!("Expected a paragraph");
940 };
941 assert_eq!(
942 paragraph[0],
943 MarkdownParagraphChunk::Image(Image {
944 source_range: 0..31,
945 link: Link::Web {
946 url: "http://example.com/foo.png".to_string(),
947 },
948 alt_text: None,
949 },)
950 );
951 }
952
953 #[gpui::test]
954 async fn test_image_with_alt_text_containing_formatting() {
955 let parsed = parse("").await;
956
957 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
958 panic!("Expected a paragraph");
959 };
960 assert_eq!(
961 chunks,
962 &[MarkdownParagraphChunk::Image(Image {
963 source_range: 0..44,
964 link: Link::Web {
965 url: "http://example.com/foo.png".to_string(),
966 },
967 alt_text: Some("foo bar baz".into()),
968 }),],
969 );
970 }
971
972 #[gpui::test]
973 async fn test_images_with_text_in_between() {
974 let parsed = parse(
975 "\nLorem Ipsum\n",
976 )
977 .await;
978
979 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
980 text
981 } else {
982 panic!("Expected a paragraph");
983 };
984 assert_eq!(
985 chunks,
986 &vec![
987 MarkdownParagraphChunk::Image(Image {
988 source_range: 0..81,
989 link: Link::Web {
990 url: "http://example.com/foo.png".to_string(),
991 },
992 alt_text: Some("foo".into()),
993 }),
994 MarkdownParagraphChunk::Text(ParsedMarkdownText {
995 source_range: 0..81,
996 contents: " Lorem Ipsum ".to_string(),
997 highlights: Vec::new(),
998 region_ranges: Vec::new(),
999 regions: Vec::new(),
1000 }),
1001 MarkdownParagraphChunk::Image(Image {
1002 source_range: 0..81,
1003 link: Link::Web {
1004 url: "http://example.com/bar.png".to_string(),
1005 },
1006 alt_text: Some("bar".into()),
1007 })
1008 ]
1009 );
1010 }
1011
1012 #[gpui::test]
1013 async fn test_header_only_table() {
1014 let markdown = "\
1015| Header 1 | Header 2 |
1016|----------|----------|
1017
1018Some other content
1019";
1020
1021 let expected_table = table(
1022 0..48,
1023 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1024 vec![],
1025 );
1026
1027 assert_eq!(
1028 parse(markdown).await.children[0],
1029 ParsedMarkdownElement::Table(expected_table)
1030 );
1031 }
1032
1033 #[gpui::test]
1034 async fn test_basic_table() {
1035 let markdown = "\
1036| Header 1 | Header 2 |
1037|----------|----------|
1038| Cell 1 | Cell 2 |
1039| Cell 3 | Cell 4 |";
1040
1041 let expected_table = table(
1042 0..95,
1043 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1044 vec![
1045 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1046 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1047 ],
1048 );
1049
1050 assert_eq!(
1051 parse(markdown).await.children[0],
1052 ParsedMarkdownElement::Table(expected_table)
1053 );
1054 }
1055
1056 #[gpui::test]
1057 async fn test_list_basic() {
1058 let parsed = parse(
1059 "\
1060* Item 1
1061* Item 2
1062* Item 3
1063",
1064 )
1065 .await;
1066
1067 assert_eq!(
1068 parsed.children,
1069 vec![
1070 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1071 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1072 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1073 ],
1074 );
1075 }
1076
1077 #[gpui::test]
1078 async fn test_list_with_tasks() {
1079 let parsed = parse(
1080 "\
1081- [ ] TODO
1082- [x] Checked
1083",
1084 )
1085 .await;
1086
1087 assert_eq!(
1088 parsed.children,
1089 vec![
1090 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1091 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1092 ],
1093 );
1094 }
1095
1096 #[gpui::test]
1097 async fn test_list_with_indented_task() {
1098 let parsed = parse(
1099 "\
1100- [ ] TODO
1101 - [x] Checked
1102 - Unordered
1103 1. Number 1
1104 1. Number 2
11051. Number A
1106",
1107 )
1108 .await;
1109
1110 assert_eq!(
1111 parsed.children,
1112 vec![
1113 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1114 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1115 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1116 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1117 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1118 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1119 ],
1120 );
1121 }
1122
1123 #[gpui::test]
1124 async fn test_list_with_linebreak_is_handled_correctly() {
1125 let parsed = parse(
1126 "\
1127- [ ] Task 1
1128
1129- [x] Task 2
1130",
1131 )
1132 .await;
1133
1134 assert_eq!(
1135 parsed.children,
1136 vec![
1137 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1138 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1139 ],
1140 );
1141 }
1142
1143 #[gpui::test]
1144 async fn test_list_nested() {
1145 let parsed = parse(
1146 "\
1147* Item 1
1148* Item 2
1149* Item 3
1150
11511. Hello
11521. Two
1153 1. Three
11542. Four
11553. Five
1156
1157* First
1158 1. Hello
1159 1. Goodbyte
1160 - Inner
1161 - Inner
1162 2. Goodbyte
1163 - Next item empty
1164 -
1165* Last
1166",
1167 )
1168 .await;
1169
1170 assert_eq!(
1171 parsed.children,
1172 vec![
1173 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1174 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1175 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1176 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1177 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1178 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1179 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1180 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1181 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1182 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1183 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1184 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1185 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1186 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1187 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1188 list_item(186..190, 3, Unordered, vec![]),
1189 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1190 ]
1191 );
1192 }
1193
1194 #[gpui::test]
1195 async fn test_list_with_nested_content() {
1196 let parsed = parse(
1197 "\
1198* This is a list item with two paragraphs.
1199
1200 This is the second paragraph in the list item.
1201",
1202 )
1203 .await;
1204
1205 assert_eq!(
1206 parsed.children,
1207 vec![list_item(
1208 0..96,
1209 1,
1210 Unordered,
1211 vec![
1212 p("This is a list item with two paragraphs.", 4..44),
1213 p("This is the second paragraph in the list item.", 50..97)
1214 ],
1215 ),],
1216 );
1217 }
1218
1219 #[gpui::test]
1220 async fn test_list_item_with_inline_html() {
1221 let parsed = parse(
1222 "\
1223* This is a list item with an inline HTML <sometag>tag</sometag>.
1224",
1225 )
1226 .await;
1227
1228 assert_eq!(
1229 parsed.children,
1230 vec![list_item(
1231 0..67,
1232 1,
1233 Unordered,
1234 vec![p("This is a list item with an inline HTML tag.", 4..44),],
1235 ),],
1236 );
1237 }
1238
1239 #[gpui::test]
1240 async fn test_nested_list_with_paragraph_inside() {
1241 let parsed = parse(
1242 "\
12431. a
1244 1. b
1245 1. c
1246
1247 text
1248
1249 1. d
1250",
1251 )
1252 .await;
1253
1254 assert_eq!(
1255 parsed.children,
1256 vec![
1257 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1258 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1259 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1260 p("text", 32..37),
1261 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1262 ],
1263 );
1264 }
1265
1266 #[gpui::test]
1267 async fn test_list_with_leading_text() {
1268 let parsed = parse(
1269 "\
1270* `code`
1271* **bold**
1272* [link](https://example.com)
1273",
1274 )
1275 .await;
1276
1277 assert_eq!(
1278 parsed.children,
1279 vec![
1280 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1281 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1282 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1283 ],
1284 );
1285 }
1286
1287 #[gpui::test]
1288 async fn test_simple_block_quote() {
1289 let parsed = parse("> Simple block quote with **styled text**").await;
1290
1291 assert_eq!(
1292 parsed.children,
1293 vec![block_quote(
1294 vec![p("Simple block quote with styled text", 2..41)],
1295 0..41
1296 )]
1297 );
1298 }
1299
1300 #[gpui::test]
1301 async fn test_simple_block_quote_with_multiple_lines() {
1302 let parsed = parse(
1303 "\
1304> # Heading
1305> More
1306> text
1307>
1308> More text
1309",
1310 )
1311 .await;
1312
1313 assert_eq!(
1314 parsed.children,
1315 vec![block_quote(
1316 vec![
1317 h1(text("Heading", 4..11), 2..12),
1318 p("More text", 14..26),
1319 p("More text", 30..40)
1320 ],
1321 0..40
1322 )]
1323 );
1324 }
1325
1326 #[gpui::test]
1327 async fn test_nested_block_quote() {
1328 let parsed = parse(
1329 "\
1330> A
1331>
1332> > # B
1333>
1334> C
1335
1336More text
1337",
1338 )
1339 .await;
1340
1341 assert_eq!(
1342 parsed.children,
1343 vec![
1344 block_quote(
1345 vec![
1346 p("A", 2..4),
1347 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1348 p("C", 18..20)
1349 ],
1350 0..20
1351 ),
1352 p("More text", 21..31)
1353 ]
1354 );
1355 }
1356
1357 #[gpui::test]
1358 async fn test_code_block() {
1359 let parsed = parse(
1360 "\
1361```
1362fn main() {
1363 return 0;
1364}
1365```
1366",
1367 )
1368 .await;
1369
1370 assert_eq!(
1371 parsed.children,
1372 vec![code_block(
1373 None,
1374 "fn main() {\n return 0;\n}",
1375 0..35,
1376 None
1377 )]
1378 );
1379 }
1380
1381 #[gpui::test]
1382 async fn test_code_block_with_language(executor: BackgroundExecutor) {
1383 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1384 language_registry.add(rust_lang());
1385
1386 let parsed = parse_markdown(
1387 "\
1388```rust
1389fn main() {
1390 return 0;
1391}
1392```
1393",
1394 None,
1395 Some(language_registry),
1396 )
1397 .await;
1398
1399 assert_eq!(
1400 parsed.children,
1401 vec![code_block(
1402 Some("rust".to_string()),
1403 "fn main() {\n return 0;\n}",
1404 0..39,
1405 Some(vec![])
1406 )]
1407 );
1408 }
1409
1410 fn rust_lang() -> Arc<Language> {
1411 Arc::new(Language::new(
1412 LanguageConfig {
1413 name: "Rust".into(),
1414 matcher: LanguageMatcher {
1415 path_suffixes: vec!["rs".into()],
1416 ..Default::default()
1417 },
1418 collapsed_placeholder: " /* ... */ ".to_string(),
1419 ..Default::default()
1420 },
1421 Some(tree_sitter_rust::LANGUAGE.into()),
1422 ))
1423 }
1424
1425 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1426 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1427 source_range,
1428 level: HeadingLevel::H1,
1429 contents,
1430 })
1431 }
1432
1433 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1434 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1435 source_range,
1436 level: HeadingLevel::H2,
1437 contents,
1438 })
1439 }
1440
1441 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1442 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1443 source_range,
1444 level: HeadingLevel::H3,
1445 contents,
1446 })
1447 }
1448
1449 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1450 ParsedMarkdownElement::Paragraph(text(contents, source_range))
1451 }
1452
1453 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1454 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1455 highlights: Vec::new(),
1456 region_ranges: Vec::new(),
1457 regions: Vec::new(),
1458 source_range,
1459 contents: contents.to_string(),
1460 })]
1461 }
1462
1463 fn block_quote(
1464 children: Vec<ParsedMarkdownElement>,
1465 source_range: Range<usize>,
1466 ) -> ParsedMarkdownElement {
1467 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1468 source_range,
1469 children,
1470 })
1471 }
1472
1473 fn code_block(
1474 language: Option<String>,
1475 code: &str,
1476 source_range: Range<usize>,
1477 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1478 ) -> ParsedMarkdownElement {
1479 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1480 source_range,
1481 language,
1482 contents: code.to_string().into(),
1483 highlights,
1484 })
1485 }
1486
1487 fn list_item(
1488 source_range: Range<usize>,
1489 depth: u16,
1490 item_type: ParsedMarkdownListItemType,
1491 content: Vec<ParsedMarkdownElement>,
1492 ) -> ParsedMarkdownElement {
1493 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1494 source_range,
1495 item_type,
1496 depth,
1497 content,
1498 })
1499 }
1500
1501 fn table(
1502 source_range: Range<usize>,
1503 header: ParsedMarkdownTableRow,
1504 body: Vec<ParsedMarkdownTableRow>,
1505 ) -> ParsedMarkdownTable {
1506 ParsedMarkdownTable {
1507 column_alignments: Vec::new(),
1508 source_range,
1509 header,
1510 body,
1511 }
1512 }
1513
1514 fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
1515 ParsedMarkdownTableRow { children }
1516 }
1517
1518 impl PartialEq for ParsedMarkdownTable {
1519 fn eq(&self, other: &Self) -> bool {
1520 self.source_range == other.source_range
1521 && self.header == other.header
1522 && self.body == other.body
1523 }
1524 }
1525
1526 impl PartialEq for ParsedMarkdownText {
1527 fn eq(&self, other: &Self) -> bool {
1528 self.source_range == other.source_range && self.contents == other.contents
1529 }
1530 }
1531}