1use crate::markdown_elements::*;
2use async_recursion::async_recursion;
3use collections::FxHashMap;
4use gpui::FontWeight;
5use language::LanguageRegistry;
6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
7use std::{ops::Range, path::PathBuf, sync::Arc, vec};
8
9pub async fn parse_markdown(
10 markdown_input: &str,
11 file_location_directory: Option<PathBuf>,
12 language_registry: Option<Arc<LanguageRegistry>>,
13) -> ParsedMarkdown {
14 let mut options = Options::all();
15 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
16
17 let parser = Parser::new_ext(markdown_input, options);
18 let parser = MarkdownParser::new(
19 parser.into_offset_iter().collect(),
20 file_location_directory,
21 language_registry,
22 );
23 let renderer = parser.parse_document().await;
24 ParsedMarkdown {
25 children: renderer.parsed,
26 }
27}
28
29struct MarkdownParser<'a> {
30 tokens: Vec<(Event<'a>, Range<usize>)>,
31 /// The current index in the tokens array
32 cursor: usize,
33 /// The blocks that we have successfully parsed so far
34 parsed: Vec<ParsedMarkdownElement>,
35 file_location_directory: Option<PathBuf>,
36 language_registry: Option<Arc<LanguageRegistry>>,
37}
38
39struct MarkdownListItem {
40 content: Vec<ParsedMarkdownElement>,
41 item_type: ParsedMarkdownListItemType,
42}
43
44impl Default for MarkdownListItem {
45 fn default() -> Self {
46 Self {
47 content: Vec::new(),
48 item_type: ParsedMarkdownListItemType::Unordered,
49 }
50 }
51}
52
53impl<'a> MarkdownParser<'a> {
54 fn new(
55 tokens: Vec<(Event<'a>, Range<usize>)>,
56 file_location_directory: Option<PathBuf>,
57 language_registry: Option<Arc<LanguageRegistry>>,
58 ) -> Self {
59 Self {
60 tokens,
61 file_location_directory,
62 language_registry,
63 cursor: 0,
64 parsed: vec![],
65 }
66 }
67
68 fn eof(&self) -> bool {
69 if self.tokens.is_empty() {
70 return true;
71 }
72 self.cursor >= self.tokens.len() - 1
73 }
74
75 fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
76 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
77 return self.tokens.last();
78 }
79 return self.tokens.get(self.cursor + steps);
80 }
81
82 fn previous(&self) -> Option<&(Event, Range<usize>)> {
83 if self.cursor == 0 || self.cursor > self.tokens.len() {
84 return None;
85 }
86 return self.tokens.get(self.cursor - 1);
87 }
88
89 fn current(&self) -> Option<&(Event, Range<usize>)> {
90 return self.peek(0);
91 }
92
93 fn current_event(&self) -> Option<&Event> {
94 return self.current().map(|(event, _)| event);
95 }
96
97 fn is_text_like(event: &Event) -> bool {
98 match event {
99 Event::Text(_)
100 // Represent an inline code block
101 | Event::Code(_)
102 | Event::Html(_)
103 | Event::FootnoteReference(_)
104 | Event::Start(Tag::Link { .. })
105 | Event::Start(Tag::Emphasis)
106 | Event::Start(Tag::Strong)
107 | Event::Start(Tag::Strikethrough)
108 | Event::Start(Tag::Image { .. }) => {
109 true
110 }
111 _ => false,
112 }
113 }
114
115 async fn parse_document(mut self) -> Self {
116 while !self.eof() {
117 if let Some(block) = self.parse_block().await {
118 self.parsed.extend(block);
119 } else {
120 self.cursor += 1;
121 }
122 }
123 self
124 }
125
126 #[async_recursion]
127 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
128 let (current, source_range) = self.current().unwrap();
129 let source_range = source_range.clone();
130 match current {
131 Event::Start(tag) => match tag {
132 Tag::Paragraph => {
133 self.cursor += 1;
134 let text = self.parse_text(false, Some(source_range));
135 Some(vec![ParsedMarkdownElement::Paragraph(text)])
136 }
137 Tag::Heading { level, .. } => {
138 let level = *level;
139 self.cursor += 1;
140 let heading = self.parse_heading(level);
141 Some(vec![ParsedMarkdownElement::Heading(heading)])
142 }
143 Tag::Table(alignment) => {
144 let alignment = alignment.clone();
145 self.cursor += 1;
146 let table = self.parse_table(alignment);
147 Some(vec![ParsedMarkdownElement::Table(table)])
148 }
149 Tag::List(order) => {
150 let order = *order;
151 self.cursor += 1;
152 let list = self.parse_list(order).await;
153 Some(list)
154 }
155 Tag::BlockQuote(_kind) => {
156 self.cursor += 1;
157 let block_quote = self.parse_block_quote().await;
158 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
159 }
160 Tag::CodeBlock(kind) => {
161 let language = match kind {
162 pulldown_cmark::CodeBlockKind::Indented => None,
163 pulldown_cmark::CodeBlockKind::Fenced(language) => {
164 if language.is_empty() {
165 None
166 } else {
167 Some(language.to_string())
168 }
169 }
170 };
171
172 self.cursor += 1;
173
174 let code_block = self.parse_code_block(language).await;
175 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
176 }
177 _ => None,
178 },
179 Event::Rule => {
180 let source_range = source_range.clone();
181 self.cursor += 1;
182 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
183 }
184 _ => None,
185 }
186 }
187
188 fn parse_text(
189 &mut self,
190 should_complete_on_soft_break: bool,
191 source_range: Option<Range<usize>>,
192 ) -> MarkdownParagraph {
193 let source_range = source_range.unwrap_or_else(|| {
194 self.current()
195 .map(|(_, range)| range.clone())
196 .unwrap_or_default()
197 });
198
199 let mut markdown_text_like = Vec::new();
200 let mut text = String::new();
201 let mut bold_depth = 0;
202 let mut italic_depth = 0;
203 let mut strikethrough_depth = 0;
204 let mut link: Option<Link> = None;
205 let mut image: Option<Image> = None;
206 let mut region_ranges: Vec<Range<usize>> = vec![];
207 let mut regions: Vec<ParsedRegion> = vec![];
208 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
209 let mut link_urls: Vec<String> = vec![];
210 let mut link_ranges: Vec<Range<usize>> = vec![];
211
212 loop {
213 if self.eof() {
214 break;
215 }
216
217 let (current, _) = self.current().unwrap();
218 let prev_len = text.len();
219 match current {
220 Event::SoftBreak => {
221 if should_complete_on_soft_break {
222 break;
223 }
224 text.push(' ');
225 }
226
227 Event::HardBreak => {
228 text.push('\n');
229 }
230
231 // We want to ignore any inline HTML tags in the text but keep
232 // the text between them
233 Event::InlineHtml(_) => {}
234
235 Event::Text(t) => {
236 text.push_str(t.as_ref());
237 let mut style = MarkdownHighlightStyle::default();
238
239 if bold_depth > 0 {
240 style.weight = FontWeight::BOLD;
241 }
242
243 if italic_depth > 0 {
244 style.italic = true;
245 }
246
247 if strikethrough_depth > 0 {
248 style.strikethrough = true;
249 }
250
251 let last_run_len = if let Some(link) = link.clone() {
252 region_ranges.push(prev_len..text.len());
253 regions.push(ParsedRegion {
254 code: false,
255 link: Some(link),
256 });
257 style.underline = true;
258 prev_len
259 } else {
260 // Manually scan for links
261 let mut finder = linkify::LinkFinder::new();
262 finder.kinds(&[linkify::LinkKind::Url]);
263 let mut last_link_len = prev_len;
264 for link in finder.links(t) {
265 let start = link.start();
266 let end = link.end();
267 let range = (prev_len + start)..(prev_len + end);
268 link_ranges.push(range.clone());
269 link_urls.push(link.as_str().to_string());
270
271 // If there is a style before we match a link, we have to add this to the highlighted ranges
272 if style != MarkdownHighlightStyle::default()
273 && last_link_len < link.start()
274 {
275 highlights.push((
276 last_link_len..link.start(),
277 MarkdownHighlight::Style(style.clone()),
278 ));
279 }
280
281 highlights.push((
282 range.clone(),
283 MarkdownHighlight::Style(MarkdownHighlightStyle {
284 underline: true,
285 ..style
286 }),
287 ));
288 region_ranges.push(range.clone());
289 regions.push(ParsedRegion {
290 code: false,
291 link: Some(Link::Web {
292 url: link.as_str().to_string(),
293 }),
294 });
295 last_link_len = end;
296 }
297 last_link_len
298 };
299
300 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
301 let mut new_highlight = true;
302 if let Some((last_range, last_style)) = highlights.last_mut() {
303 if last_range.end == last_run_len
304 && last_style == &MarkdownHighlight::Style(style.clone())
305 {
306 last_range.end = text.len();
307 new_highlight = false;
308 }
309 }
310 if new_highlight {
311 highlights.push((
312 last_run_len..text.len(),
313 MarkdownHighlight::Style(style.clone()),
314 ));
315 }
316 }
317 if let Some(image) = image.as_mut() {
318 text.truncate(text.len() - t.len());
319 image.set_alt_text(t.to_string().into());
320 if !text.is_empty() {
321 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
322 source_range: source_range.clone(),
323 contents: text.clone(),
324 highlights: highlights.clone(),
325 region_ranges: region_ranges.clone(),
326 regions: regions.clone(),
327 });
328 text = String::new();
329 highlights = vec![];
330 region_ranges = vec![];
331 regions = vec![];
332 markdown_text_like.push(parsed_regions);
333 }
334
335 let parsed_image = MarkdownParagraphChunk::Image(image.clone());
336 markdown_text_like.push(parsed_image);
337 style = MarkdownHighlightStyle::default();
338 style.underline = true;
339 }
340 }
341 Event::Code(t) => {
342 text.push_str(t.as_ref());
343 region_ranges.push(prev_len..text.len());
344
345 if link.is_some() {
346 highlights.push((
347 prev_len..text.len(),
348 MarkdownHighlight::Style(MarkdownHighlightStyle {
349 underline: true,
350 ..Default::default()
351 }),
352 ));
353 }
354 regions.push(ParsedRegion {
355 code: true,
356 link: link.clone(),
357 });
358 }
359 Event::Start(tag) => match tag {
360 Tag::Emphasis => italic_depth += 1,
361 Tag::Strong => bold_depth += 1,
362 Tag::Strikethrough => strikethrough_depth += 1,
363 Tag::Link { dest_url, .. } => {
364 link = Link::identify(
365 self.file_location_directory.clone(),
366 dest_url.to_string(),
367 );
368 }
369 Tag::Image { dest_url, .. } => {
370 image = Image::identify(
371 dest_url.to_string(),
372 source_range.clone(),
373 self.file_location_directory.clone(),
374 );
375 }
376 _ => {
377 break;
378 }
379 },
380
381 Event::End(tag) => match tag {
382 TagEnd::Emphasis => italic_depth -= 1,
383 TagEnd::Strong => bold_depth -= 1,
384 TagEnd::Strikethrough => strikethrough_depth -= 1,
385 TagEnd::Link => {
386 link = None;
387 }
388 TagEnd::Image => {
389 image = None;
390 }
391 TagEnd::Paragraph => {
392 self.cursor += 1;
393 break;
394 }
395 _ => {
396 break;
397 }
398 },
399 _ => {
400 break;
401 }
402 }
403
404 self.cursor += 1;
405 }
406 if !text.is_empty() {
407 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
408 source_range: source_range.clone(),
409 contents: text,
410 highlights,
411 regions,
412 region_ranges,
413 }));
414 }
415 markdown_text_like
416 }
417
418 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
419 let (_event, source_range) = self.previous().unwrap();
420 let source_range = source_range.clone();
421 let text = self.parse_text(true, None);
422
423 // Advance past the heading end tag
424 self.cursor += 1;
425
426 ParsedMarkdownHeading {
427 source_range: source_range.clone(),
428 level: match level {
429 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
430 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
431 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
432 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
433 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
434 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
435 },
436 contents: text,
437 }
438 }
439
440 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
441 let (_event, source_range) = self.previous().unwrap();
442 let source_range = source_range.clone();
443 let mut header = ParsedMarkdownTableRow::new();
444 let mut body = vec![];
445 let mut current_row = vec![];
446 let mut in_header = true;
447 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
448
449 loop {
450 if self.eof() {
451 break;
452 }
453
454 let (current, source_range) = self.current().unwrap();
455 let source_range = source_range.clone();
456 match current {
457 Event::Start(Tag::TableHead)
458 | Event::Start(Tag::TableRow)
459 | Event::End(TagEnd::TableCell) => {
460 self.cursor += 1;
461 }
462 Event::Start(Tag::TableCell) => {
463 self.cursor += 1;
464 let cell_contents = self.parse_text(false, Some(source_range));
465 current_row.push(cell_contents);
466 }
467 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
468 self.cursor += 1;
469 let new_row = std::mem::take(&mut current_row);
470 if in_header {
471 header.children = new_row;
472 in_header = false;
473 } else {
474 let row = ParsedMarkdownTableRow::with_children(new_row);
475 body.push(row);
476 }
477 }
478 Event::End(TagEnd::Table) => {
479 self.cursor += 1;
480 break;
481 }
482 _ => {
483 break;
484 }
485 }
486 }
487
488 ParsedMarkdownTable {
489 source_range,
490 header,
491 body,
492 column_alignments,
493 }
494 }
495
496 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
497 match alignment {
498 Alignment::None => ParsedMarkdownTableAlignment::None,
499 Alignment::Left => ParsedMarkdownTableAlignment::Left,
500 Alignment::Center => ParsedMarkdownTableAlignment::Center,
501 Alignment::Right => ParsedMarkdownTableAlignment::Right,
502 }
503 }
504
505 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
506 let (_, list_source_range) = self.previous().unwrap();
507
508 let mut items = Vec::new();
509 let mut items_stack = vec![MarkdownListItem::default()];
510 let mut depth = 1;
511 let mut order = order;
512 let mut order_stack = Vec::new();
513
514 let mut insertion_indices = FxHashMap::default();
515 let mut source_ranges = FxHashMap::default();
516 let mut start_item_range = list_source_range.clone();
517
518 while !self.eof() {
519 let (current, source_range) = self.current().unwrap();
520 match current {
521 Event::Start(Tag::List(new_order)) => {
522 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
523 insertion_indices.insert(depth, items.len());
524 }
525
526 // We will use the start of the nested list as the end for the current item's range,
527 // because we don't care about the hierarchy of list items
528 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
529 e.insert(start_item_range.start..source_range.start);
530 }
531
532 order_stack.push(order);
533 order = *new_order;
534 self.cursor += 1;
535 depth += 1;
536 }
537 Event::End(TagEnd::List(_)) => {
538 order = order_stack.pop().flatten();
539 self.cursor += 1;
540 depth -= 1;
541
542 if depth == 0 {
543 break;
544 }
545 }
546 Event::Start(Tag::Item) => {
547 start_item_range = source_range.clone();
548
549 self.cursor += 1;
550 items_stack.push(MarkdownListItem::default());
551
552 let mut task_list = None;
553 // Check for task list marker (`- [ ]` or `- [x]`)
554 if let Some(event) = self.current_event() {
555 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
556 if event == &Event::Start(Tag::Paragraph) {
557 self.cursor += 1;
558 }
559
560 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
561 task_list = Some((*checked, range.clone()));
562 self.cursor += 1;
563 }
564 }
565
566 if let Some((event, range)) = self.current() {
567 // This is a plain list item.
568 // For example `- some text` or `1. [Docs](./docs.md)`
569 if MarkdownParser::is_text_like(event) {
570 let text = self.parse_text(false, Some(range.clone()));
571 let block = ParsedMarkdownElement::Paragraph(text);
572 if let Some(content) = items_stack.last_mut() {
573 let item_type = if let Some((checked, range)) = task_list {
574 ParsedMarkdownListItemType::Task(checked, range)
575 } else if let Some(order) = order {
576 ParsedMarkdownListItemType::Ordered(order)
577 } else {
578 ParsedMarkdownListItemType::Unordered
579 };
580 content.item_type = item_type;
581 content.content.push(block);
582 }
583 } else {
584 let block = self.parse_block().await;
585 if let Some(block) = block {
586 if let Some(list_item) = items_stack.last_mut() {
587 list_item.content.extend(block);
588 }
589 }
590 }
591 }
592
593 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
594 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
595 self.cursor += 1;
596 }
597 }
598 Event::End(TagEnd::Item) => {
599 self.cursor += 1;
600
601 if let Some(current) = order {
602 order = Some(current + 1);
603 }
604
605 if let Some(list_item) = items_stack.pop() {
606 let source_range = source_ranges
607 .remove(&depth)
608 .unwrap_or(start_item_range.clone());
609
610 // We need to remove the last character of the source range, because it includes the newline character
611 let source_range = source_range.start..source_range.end - 1;
612 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
613 source_range,
614 content: list_item.content,
615 depth,
616 item_type: list_item.item_type,
617 });
618
619 if let Some(index) = insertion_indices.get(&depth) {
620 items.insert(*index, item);
621 insertion_indices.remove(&depth);
622 } else {
623 items.push(item);
624 }
625 }
626 }
627 _ => {
628 if depth == 0 {
629 break;
630 }
631 // This can only happen if a list item starts with more then one paragraph,
632 // or the list item contains blocks that should be rendered after the nested list items
633 let block = self.parse_block().await;
634 if let Some(block) = block {
635 if let Some(list_item) = items_stack.last_mut() {
636 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
637 if !insertion_indices.contains_key(&depth) {
638 list_item.content.extend(block);
639 continue;
640 }
641 }
642
643 // Otherwise we need to insert the block after all the nested items
644 // that have been parsed so far
645 items.extend(block);
646 } else {
647 self.cursor += 1;
648 }
649 }
650 }
651 }
652
653 items
654 }
655
656 #[async_recursion]
657 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
658 let (_event, source_range) = self.previous().unwrap();
659 let source_range = source_range.clone();
660 let mut nested_depth = 1;
661
662 let mut children: Vec<ParsedMarkdownElement> = vec![];
663
664 while !self.eof() {
665 let block = self.parse_block().await;
666
667 if let Some(block) = block {
668 children.extend(block);
669 } else {
670 break;
671 }
672
673 if self.eof() {
674 break;
675 }
676
677 let (current, _source_range) = self.current().unwrap();
678 match current {
679 // This is a nested block quote.
680 // Record that we're in a nested block quote and continue parsing.
681 // We don't need to advance the cursor since the next
682 // call to `parse_block` will handle it.
683 Event::Start(Tag::BlockQuote(_kind)) => {
684 nested_depth += 1;
685 }
686 Event::End(TagEnd::BlockQuote(_kind)) => {
687 nested_depth -= 1;
688 if nested_depth == 0 {
689 self.cursor += 1;
690 break;
691 }
692 }
693 _ => {}
694 };
695 }
696
697 ParsedMarkdownBlockQuote {
698 source_range,
699 children,
700 }
701 }
702
703 async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
704 let (_event, source_range) = self.previous().unwrap();
705 let source_range = source_range.clone();
706 let mut code = String::new();
707
708 while !self.eof() {
709 let (current, _source_range) = self.current().unwrap();
710 match current {
711 Event::Text(text) => {
712 code.push_str(text);
713 self.cursor += 1;
714 }
715 Event::End(TagEnd::CodeBlock) => {
716 self.cursor += 1;
717 break;
718 }
719 _ => {
720 break;
721 }
722 }
723 }
724 let highlights = if let Some(language) = &language {
725 if let Some(registry) = &self.language_registry {
726 let rope: language::Rope = code.as_str().into();
727 registry
728 .language_for_name_or_extension(language)
729 .await
730 .map(|l| l.highlight_text(&rope, 0..code.len()))
731 .ok()
732 } else {
733 None
734 }
735 } else {
736 None
737 };
738
739 ParsedMarkdownCodeBlock {
740 source_range,
741 contents: code.trim().to_string().into(),
742 language,
743 highlights,
744 }
745 }
746}
747
748#[cfg(test)]
749mod tests {
750 use core::panic;
751
752 use super::*;
753
754 use gpui::BackgroundExecutor;
755 use language::{
756 tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry,
757 };
758 use pretty_assertions::assert_eq;
759 use ParsedMarkdownListItemType::*;
760
761 async fn parse(input: &str) -> ParsedMarkdown {
762 parse_markdown(input, None, None).await
763 }
764
765 #[gpui::test]
766 async fn test_headings() {
767 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
768
769 assert_eq!(
770 parsed.children,
771 vec![
772 h1(text("Heading one", 2..13), 0..14),
773 h2(text("Heading two", 17..28), 14..29),
774 h3(text("Heading three", 33..46), 29..46),
775 ]
776 );
777 }
778
779 #[gpui::test]
780 async fn test_newlines_dont_new_paragraphs() {
781 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
782
783 assert_eq!(
784 parsed.children,
785 vec![p("Some text that is bolded and italicized", 0..46)]
786 );
787 }
788
789 #[gpui::test]
790 async fn test_heading_with_paragraph() {
791 let parsed = parse("# Zed\nThe editor").await;
792
793 assert_eq!(
794 parsed.children,
795 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
796 );
797 }
798
799 #[gpui::test]
800 async fn test_double_newlines_do_new_paragraphs() {
801 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
802
803 assert_eq!(
804 parsed.children,
805 vec![
806 p("Some text that is bolded", 0..29),
807 p("and italicized", 31..47),
808 ]
809 );
810 }
811
812 #[gpui::test]
813 async fn test_bold_italic_text() {
814 let parsed = parse("Some text **that is bolded** and *italicized*").await;
815
816 assert_eq!(
817 parsed.children,
818 vec![p("Some text that is bolded and italicized", 0..45)]
819 );
820 }
821
822 #[gpui::test]
823 async fn test_nested_bold_strikethrough_text() {
824 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
825
826 assert_eq!(parsed.children.len(), 1);
827 assert_eq!(
828 parsed.children[0],
829 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
830 ParsedMarkdownText {
831 source_range: 0..35,
832 contents: "Some bostrikethroughld text".to_string(),
833 highlights: Vec::new(),
834 region_ranges: Vec::new(),
835 regions: Vec::new(),
836 }
837 )])
838 );
839
840 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
841 text
842 } else {
843 panic!("Expected a paragraph");
844 };
845
846 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
847 text
848 } else {
849 panic!("Expected a text");
850 };
851
852 assert_eq!(
853 paragraph.highlights,
854 vec![
855 (
856 5..7,
857 MarkdownHighlight::Style(MarkdownHighlightStyle {
858 weight: FontWeight::BOLD,
859 ..Default::default()
860 }),
861 ),
862 (
863 7..20,
864 MarkdownHighlight::Style(MarkdownHighlightStyle {
865 weight: FontWeight::BOLD,
866 strikethrough: true,
867 ..Default::default()
868 }),
869 ),
870 (
871 20..22,
872 MarkdownHighlight::Style(MarkdownHighlightStyle {
873 weight: FontWeight::BOLD,
874 ..Default::default()
875 }),
876 ),
877 ]
878 );
879 }
880
881 #[gpui::test]
882 async fn test_text_with_inline_html() {
883 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
884
885 assert_eq!(
886 parsed.children,
887 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
888 );
889 }
890
891 #[gpui::test]
892 async fn test_raw_links_detection() {
893 let parsed = parse("Checkout this https://zed.dev link").await;
894
895 assert_eq!(
896 parsed.children,
897 vec![p("Checkout this https://zed.dev link", 0..34)]
898 );
899 }
900
901 #[gpui::test]
902 async fn test_empty_image() {
903 let parsed = parse("![]()").await;
904
905 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
906 text
907 } else {
908 panic!("Expected a paragraph");
909 };
910 assert_eq!(paragraph.len(), 0);
911 }
912
913 #[gpui::test]
914 async fn test_image_links_detection() {
915 let parsed = parse("").await;
916
917 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
918 text
919 } else {
920 panic!("Expected a paragraph");
921 };
922 assert_eq!(
923 paragraph[0],
924 MarkdownParagraphChunk::Image(Image {
925 source_range: 0..111,
926 link: Link::Web {
927 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
928 },
929 alt_text: Some("test".into()),
930 },)
931 );
932 }
933
934 #[gpui::test]
935 async fn test_header_only_table() {
936 let markdown = "\
937| Header 1 | Header 2 |
938|----------|----------|
939
940Some other content
941";
942
943 let expected_table = table(
944 0..48,
945 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
946 vec![],
947 );
948
949 assert_eq!(
950 parse(markdown).await.children[0],
951 ParsedMarkdownElement::Table(expected_table)
952 );
953 }
954
955 #[gpui::test]
956 async fn test_basic_table() {
957 let markdown = "\
958| Header 1 | Header 2 |
959|----------|----------|
960| Cell 1 | Cell 2 |
961| Cell 3 | Cell 4 |";
962
963 let expected_table = table(
964 0..95,
965 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
966 vec![
967 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
968 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
969 ],
970 );
971
972 assert_eq!(
973 parse(markdown).await.children[0],
974 ParsedMarkdownElement::Table(expected_table)
975 );
976 }
977
978 #[gpui::test]
979 async fn test_list_basic() {
980 let parsed = parse(
981 "\
982* Item 1
983* Item 2
984* Item 3
985",
986 )
987 .await;
988
989 assert_eq!(
990 parsed.children,
991 vec![
992 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
993 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
994 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
995 ],
996 );
997 }
998
999 #[gpui::test]
1000 async fn test_list_with_tasks() {
1001 let parsed = parse(
1002 "\
1003- [ ] TODO
1004- [x] Checked
1005",
1006 )
1007 .await;
1008
1009 assert_eq!(
1010 parsed.children,
1011 vec![
1012 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1013 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1014 ],
1015 );
1016 }
1017
1018 #[gpui::test]
1019 async fn test_list_with_indented_task() {
1020 let parsed = parse(
1021 "\
1022- [ ] TODO
1023 - [x] Checked
1024 - Unordered
1025 1. Number 1
1026 1. Number 2
10271. Number A
1028",
1029 )
1030 .await;
1031
1032 assert_eq!(
1033 parsed.children,
1034 vec![
1035 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1036 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1037 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1038 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1039 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1040 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1041 ],
1042 );
1043 }
1044
1045 #[gpui::test]
1046 async fn test_list_with_linebreak_is_handled_correctly() {
1047 let parsed = parse(
1048 "\
1049- [ ] Task 1
1050
1051- [x] Task 2
1052",
1053 )
1054 .await;
1055
1056 assert_eq!(
1057 parsed.children,
1058 vec![
1059 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1060 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1061 ],
1062 );
1063 }
1064
1065 #[gpui::test]
1066 async fn test_list_nested() {
1067 let parsed = parse(
1068 "\
1069* Item 1
1070* Item 2
1071* Item 3
1072
10731. Hello
10741. Two
1075 1. Three
10762. Four
10773. Five
1078
1079* First
1080 1. Hello
1081 1. Goodbyte
1082 - Inner
1083 - Inner
1084 2. Goodbyte
1085 - Next item empty
1086 -
1087* Last
1088",
1089 )
1090 .await;
1091
1092 assert_eq!(
1093 parsed.children,
1094 vec![
1095 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1096 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1097 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1098 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1099 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1100 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1101 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1102 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1103 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1104 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1105 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1106 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1107 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1108 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1109 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1110 list_item(186..190, 3, Unordered, vec![]),
1111 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1112 ]
1113 );
1114 }
1115
1116 #[gpui::test]
1117 async fn test_list_with_nested_content() {
1118 let parsed = parse(
1119 "\
1120* This is a list item with two paragraphs.
1121
1122 This is the second paragraph in the list item.
1123",
1124 )
1125 .await;
1126
1127 assert_eq!(
1128 parsed.children,
1129 vec![list_item(
1130 0..96,
1131 1,
1132 Unordered,
1133 vec![
1134 p("This is a list item with two paragraphs.", 4..44),
1135 p("This is the second paragraph in the list item.", 50..97)
1136 ],
1137 ),],
1138 );
1139 }
1140
1141 #[gpui::test]
1142 async fn test_list_item_with_inline_html() {
1143 let parsed = parse(
1144 "\
1145* This is a list item with an inline HTML <sometag>tag</sometag>.
1146",
1147 )
1148 .await;
1149
1150 assert_eq!(
1151 parsed.children,
1152 vec![list_item(
1153 0..67,
1154 1,
1155 Unordered,
1156 vec![p("This is a list item with an inline HTML tag.", 4..44),],
1157 ),],
1158 );
1159 }
1160
1161 #[gpui::test]
1162 async fn test_nested_list_with_paragraph_inside() {
1163 let parsed = parse(
1164 "\
11651. a
1166 1. b
1167 1. c
1168
1169 text
1170
1171 1. d
1172",
1173 )
1174 .await;
1175
1176 assert_eq!(
1177 parsed.children,
1178 vec![
1179 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1180 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1181 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1182 p("text", 32..37),
1183 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1184 ],
1185 );
1186 }
1187
1188 #[gpui::test]
1189 async fn test_list_with_leading_text() {
1190 let parsed = parse(
1191 "\
1192* `code`
1193* **bold**
1194* [link](https://example.com)
1195",
1196 )
1197 .await;
1198
1199 assert_eq!(
1200 parsed.children,
1201 vec![
1202 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1203 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1204 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1205 ],
1206 );
1207 }
1208
1209 #[gpui::test]
1210 async fn test_simple_block_quote() {
1211 let parsed = parse("> Simple block quote with **styled text**").await;
1212
1213 assert_eq!(
1214 parsed.children,
1215 vec![block_quote(
1216 vec![p("Simple block quote with styled text", 2..41)],
1217 0..41
1218 )]
1219 );
1220 }
1221
1222 #[gpui::test]
1223 async fn test_simple_block_quote_with_multiple_lines() {
1224 let parsed = parse(
1225 "\
1226> # Heading
1227> More
1228> text
1229>
1230> More text
1231",
1232 )
1233 .await;
1234
1235 assert_eq!(
1236 parsed.children,
1237 vec![block_quote(
1238 vec![
1239 h1(text("Heading", 4..11), 2..12),
1240 p("More text", 14..26),
1241 p("More text", 30..40)
1242 ],
1243 0..40
1244 )]
1245 );
1246 }
1247
1248 #[gpui::test]
1249 async fn test_nested_block_quote() {
1250 let parsed = parse(
1251 "\
1252> A
1253>
1254> > # B
1255>
1256> C
1257
1258More text
1259",
1260 )
1261 .await;
1262
1263 assert_eq!(
1264 parsed.children,
1265 vec![
1266 block_quote(
1267 vec![
1268 p("A", 2..4),
1269 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1270 p("C", 18..20)
1271 ],
1272 0..20
1273 ),
1274 p("More text", 21..31)
1275 ]
1276 );
1277 }
1278
1279 #[gpui::test]
1280 async fn test_code_block() {
1281 let parsed = parse(
1282 "\
1283```
1284fn main() {
1285 return 0;
1286}
1287```
1288",
1289 )
1290 .await;
1291
1292 assert_eq!(
1293 parsed.children,
1294 vec![code_block(
1295 None,
1296 "fn main() {\n return 0;\n}",
1297 0..35,
1298 None
1299 )]
1300 );
1301 }
1302
1303 #[gpui::test]
1304 async fn test_code_block_with_language(executor: BackgroundExecutor) {
1305 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1306 language_registry.add(rust_lang());
1307
1308 let parsed = parse_markdown(
1309 "\
1310```rust
1311fn main() {
1312 return 0;
1313}
1314```
1315",
1316 None,
1317 Some(language_registry),
1318 )
1319 .await;
1320
1321 assert_eq!(
1322 parsed.children,
1323 vec![code_block(
1324 Some("rust".to_string()),
1325 "fn main() {\n return 0;\n}",
1326 0..39,
1327 Some(vec![])
1328 )]
1329 );
1330 }
1331
1332 fn rust_lang() -> Arc<Language> {
1333 Arc::new(Language::new(
1334 LanguageConfig {
1335 name: "Rust".into(),
1336 matcher: LanguageMatcher {
1337 path_suffixes: vec!["rs".into()],
1338 ..Default::default()
1339 },
1340 collapsed_placeholder: " /* ... */ ".to_string(),
1341 ..Default::default()
1342 },
1343 Some(tree_sitter_rust::LANGUAGE.into()),
1344 ))
1345 }
1346
1347 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1348 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1349 source_range,
1350 level: HeadingLevel::H1,
1351 contents,
1352 })
1353 }
1354
1355 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1356 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1357 source_range,
1358 level: HeadingLevel::H2,
1359 contents,
1360 })
1361 }
1362
1363 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1364 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1365 source_range,
1366 level: HeadingLevel::H3,
1367 contents,
1368 })
1369 }
1370
1371 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1372 ParsedMarkdownElement::Paragraph(text(contents, source_range))
1373 }
1374
1375 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1376 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1377 highlights: Vec::new(),
1378 region_ranges: Vec::new(),
1379 regions: Vec::new(),
1380 source_range,
1381 contents: contents.to_string(),
1382 })]
1383 }
1384
1385 fn block_quote(
1386 children: Vec<ParsedMarkdownElement>,
1387 source_range: Range<usize>,
1388 ) -> ParsedMarkdownElement {
1389 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1390 source_range,
1391 children,
1392 })
1393 }
1394
1395 fn code_block(
1396 language: Option<String>,
1397 code: &str,
1398 source_range: Range<usize>,
1399 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1400 ) -> ParsedMarkdownElement {
1401 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1402 source_range,
1403 language,
1404 contents: code.to_string().into(),
1405 highlights,
1406 })
1407 }
1408
1409 fn list_item(
1410 source_range: Range<usize>,
1411 depth: u16,
1412 item_type: ParsedMarkdownListItemType,
1413 content: Vec<ParsedMarkdownElement>,
1414 ) -> ParsedMarkdownElement {
1415 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1416 source_range,
1417 item_type,
1418 depth,
1419 content,
1420 })
1421 }
1422
1423 fn table(
1424 source_range: Range<usize>,
1425 header: ParsedMarkdownTableRow,
1426 body: Vec<ParsedMarkdownTableRow>,
1427 ) -> ParsedMarkdownTable {
1428 ParsedMarkdownTable {
1429 column_alignments: Vec::new(),
1430 source_range,
1431 header,
1432 body,
1433 }
1434 }
1435
1436 fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
1437 ParsedMarkdownTableRow { children }
1438 }
1439
1440 impl PartialEq for ParsedMarkdownTable {
1441 fn eq(&self, other: &Self) -> bool {
1442 self.source_range == other.source_range
1443 && self.header == other.header
1444 && self.body == other.body
1445 }
1446 }
1447
1448 impl PartialEq for ParsedMarkdownText {
1449 fn eq(&self, other: &Self) -> bool {
1450 self.source_range == other.source_range && self.contents == other.contents
1451 }
1452 }
1453}