1use crate::markdown_elements::*;
2use async_recursion::async_recursion;
3use collections::FxHashMap;
4use gpui::FontWeight;
5use language::LanguageRegistry;
6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
7use std::{ops::Range, path::PathBuf, sync::Arc, vec};
8
9pub async fn parse_markdown(
10 markdown_input: &str,
11 file_location_directory: Option<PathBuf>,
12 language_registry: Option<Arc<LanguageRegistry>>,
13) -> ParsedMarkdown {
14 let mut options = Options::all();
15 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
16
17 let parser = Parser::new_ext(markdown_input, options);
18 let parser = MarkdownParser::new(
19 parser.into_offset_iter().collect(),
20 file_location_directory,
21 language_registry,
22 );
23 let renderer = parser.parse_document().await;
24 ParsedMarkdown {
25 children: renderer.parsed,
26 }
27}
28
29struct MarkdownParser<'a> {
30 tokens: Vec<(Event<'a>, Range<usize>)>,
31 /// The current index in the tokens array
32 cursor: usize,
33 /// The blocks that we have successfully parsed so far
34 parsed: Vec<ParsedMarkdownElement>,
35 file_location_directory: Option<PathBuf>,
36 language_registry: Option<Arc<LanguageRegistry>>,
37}
38
39struct MarkdownListItem {
40 content: Vec<ParsedMarkdownElement>,
41 item_type: ParsedMarkdownListItemType,
42}
43
44impl Default for MarkdownListItem {
45 fn default() -> Self {
46 Self {
47 content: Vec::new(),
48 item_type: ParsedMarkdownListItemType::Unordered,
49 }
50 }
51}
52
53impl<'a> MarkdownParser<'a> {
54 fn new(
55 tokens: Vec<(Event<'a>, Range<usize>)>,
56 file_location_directory: Option<PathBuf>,
57 language_registry: Option<Arc<LanguageRegistry>>,
58 ) -> Self {
59 Self {
60 tokens,
61 file_location_directory,
62 language_registry,
63 cursor: 0,
64 parsed: vec![],
65 }
66 }
67
68 fn eof(&self) -> bool {
69 if self.tokens.is_empty() {
70 return true;
71 }
72 self.cursor >= self.tokens.len() - 1
73 }
74
75 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
76 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
77 return self.tokens.last();
78 }
79 self.tokens.get(self.cursor + steps)
80 }
81
82 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
83 if self.cursor == 0 || self.cursor > self.tokens.len() {
84 return None;
85 }
86 self.tokens.get(self.cursor - 1)
87 }
88
89 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
90 self.peek(0)
91 }
92
93 fn current_event(&self) -> Option<&Event<'_>> {
94 self.current().map(|(event, _)| event)
95 }
96
97 fn is_text_like(event: &Event) -> bool {
98 match event {
99 Event::Text(_)
100 // Represent an inline code block
101 | Event::Code(_)
102 | Event::Html(_)
103 | Event::InlineHtml(_)
104 | Event::FootnoteReference(_)
105 | Event::Start(Tag::Link { .. })
106 | Event::Start(Tag::Emphasis)
107 | Event::Start(Tag::Strong)
108 | Event::Start(Tag::Strikethrough)
109 | Event::Start(Tag::Image { .. }) => {
110 true
111 }
112 _ => false,
113 }
114 }
115
116 async fn parse_document(mut self) -> Self {
117 while !self.eof() {
118 if let Some(block) = self.parse_block().await {
119 self.parsed.extend(block);
120 } else {
121 self.cursor += 1;
122 }
123 }
124 self
125 }
126
127 #[async_recursion]
128 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
129 let (current, source_range) = self.current().unwrap();
130 let source_range = source_range.clone();
131 match current {
132 Event::Start(tag) => match tag {
133 Tag::Paragraph => {
134 self.cursor += 1;
135 let text = self.parse_text(false, Some(source_range));
136 Some(vec![ParsedMarkdownElement::Paragraph(text)])
137 }
138 Tag::Heading { level, .. } => {
139 let level = *level;
140 self.cursor += 1;
141 let heading = self.parse_heading(level);
142 Some(vec![ParsedMarkdownElement::Heading(heading)])
143 }
144 Tag::Table(alignment) => {
145 let alignment = alignment.clone();
146 self.cursor += 1;
147 let table = self.parse_table(alignment);
148 Some(vec![ParsedMarkdownElement::Table(table)])
149 }
150 Tag::List(order) => {
151 let order = *order;
152 self.cursor += 1;
153 let list = self.parse_list(order).await;
154 Some(list)
155 }
156 Tag::BlockQuote(_kind) => {
157 self.cursor += 1;
158 let block_quote = self.parse_block_quote().await;
159 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
160 }
161 Tag::CodeBlock(kind) => {
162 let language = match kind {
163 pulldown_cmark::CodeBlockKind::Indented => None,
164 pulldown_cmark::CodeBlockKind::Fenced(language) => {
165 if language.is_empty() {
166 None
167 } else {
168 Some(language.to_string())
169 }
170 }
171 };
172
173 self.cursor += 1;
174
175 let code_block = self.parse_code_block(language).await;
176 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
177 }
178 _ => None,
179 },
180 Event::Rule => {
181 let source_range = source_range.clone();
182 self.cursor += 1;
183 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
184 }
185 _ => None,
186 }
187 }
188
189 fn parse_text(
190 &mut self,
191 should_complete_on_soft_break: bool,
192 source_range: Option<Range<usize>>,
193 ) -> MarkdownParagraph {
194 let source_range = source_range.unwrap_or_else(|| {
195 self.current()
196 .map(|(_, range)| range.clone())
197 .unwrap_or_default()
198 });
199
200 let mut markdown_text_like = Vec::new();
201 let mut text = String::new();
202 let mut bold_depth = 0;
203 let mut italic_depth = 0;
204 let mut strikethrough_depth = 0;
205 let mut link: Option<Link> = None;
206 let mut image: Option<Image> = None;
207 let mut region_ranges: Vec<Range<usize>> = vec![];
208 let mut regions: Vec<ParsedRegion> = vec![];
209 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
210 let mut link_urls: Vec<String> = vec![];
211 let mut link_ranges: Vec<Range<usize>> = vec![];
212
213 loop {
214 if self.eof() {
215 break;
216 }
217
218 let (current, _) = self.current().unwrap();
219 let prev_len = text.len();
220 match current {
221 Event::SoftBreak => {
222 if should_complete_on_soft_break {
223 break;
224 }
225 text.push(' ');
226 }
227
228 Event::HardBreak => {
229 text.push('\n');
230 }
231
232 // We want to ignore any inline HTML tags in the text but keep
233 // the text between them
234 Event::InlineHtml(_) => {}
235
236 Event::Text(t) => {
237 text.push_str(t.as_ref());
238 let mut style = MarkdownHighlightStyle::default();
239
240 if bold_depth > 0 {
241 style.weight = FontWeight::BOLD;
242 }
243
244 if italic_depth > 0 {
245 style.italic = true;
246 }
247
248 if strikethrough_depth > 0 {
249 style.strikethrough = true;
250 }
251
252 let last_run_len = if let Some(link) = link.clone() {
253 region_ranges.push(prev_len..text.len());
254 regions.push(ParsedRegion {
255 code: false,
256 link: Some(link),
257 });
258 style.underline = true;
259 prev_len
260 } else {
261 // Manually scan for links
262 let mut finder = linkify::LinkFinder::new();
263 finder.kinds(&[linkify::LinkKind::Url]);
264 let mut last_link_len = prev_len;
265 for link in finder.links(t) {
266 let start = link.start();
267 let end = link.end();
268 let range = (prev_len + start)..(prev_len + end);
269 link_ranges.push(range.clone());
270 link_urls.push(link.as_str().to_string());
271
272 // If there is a style before we match a link, we have to add this to the highlighted ranges
273 if style != MarkdownHighlightStyle::default()
274 && last_link_len < link.start()
275 {
276 highlights.push((
277 last_link_len..link.start(),
278 MarkdownHighlight::Style(style.clone()),
279 ));
280 }
281
282 highlights.push((
283 range.clone(),
284 MarkdownHighlight::Style(MarkdownHighlightStyle {
285 underline: true,
286 ..style
287 }),
288 ));
289 region_ranges.push(range.clone());
290 regions.push(ParsedRegion {
291 code: false,
292 link: Some(Link::Web {
293 url: link.as_str().to_string(),
294 }),
295 });
296 last_link_len = end;
297 }
298 last_link_len
299 };
300
301 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
302 let mut new_highlight = true;
303 if let Some((last_range, last_style)) = highlights.last_mut()
304 && last_range.end == last_run_len
305 && last_style == &MarkdownHighlight::Style(style.clone())
306 {
307 last_range.end = text.len();
308 new_highlight = false;
309 }
310 if new_highlight {
311 highlights.push((
312 last_run_len..text.len(),
313 MarkdownHighlight::Style(style.clone()),
314 ));
315 }
316 }
317 }
318 Event::Code(t) => {
319 text.push_str(t.as_ref());
320 region_ranges.push(prev_len..text.len());
321
322 if link.is_some() {
323 highlights.push((
324 prev_len..text.len(),
325 MarkdownHighlight::Style(MarkdownHighlightStyle {
326 underline: true,
327 ..Default::default()
328 }),
329 ));
330 }
331 regions.push(ParsedRegion {
332 code: true,
333 link: link.clone(),
334 });
335 }
336 Event::Start(tag) => match tag {
337 Tag::Emphasis => italic_depth += 1,
338 Tag::Strong => bold_depth += 1,
339 Tag::Strikethrough => strikethrough_depth += 1,
340 Tag::Link { dest_url, .. } => {
341 link = Link::identify(
342 self.file_location_directory.clone(),
343 dest_url.to_string(),
344 );
345 }
346 Tag::Image { dest_url, .. } => {
347 if !text.is_empty() {
348 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
349 source_range: source_range.clone(),
350 contents: text.clone(),
351 highlights: highlights.clone(),
352 region_ranges: region_ranges.clone(),
353 regions: regions.clone(),
354 });
355 text = String::new();
356 highlights = vec![];
357 region_ranges = vec![];
358 regions = vec![];
359 markdown_text_like.push(parsed_regions);
360 }
361 image = Image::identify(
362 dest_url.to_string(),
363 source_range.clone(),
364 self.file_location_directory.clone(),
365 );
366 }
367 _ => {
368 break;
369 }
370 },
371
372 Event::End(tag) => match tag {
373 TagEnd::Emphasis => italic_depth -= 1,
374 TagEnd::Strong => bold_depth -= 1,
375 TagEnd::Strikethrough => strikethrough_depth -= 1,
376 TagEnd::Link => {
377 link = None;
378 }
379 TagEnd::Image => {
380 if let Some(mut image) = image.take() {
381 if !text.is_empty() {
382 image.alt_text = Some(std::mem::take(&mut text).into());
383 }
384 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
385 }
386 }
387 TagEnd::Paragraph => {
388 self.cursor += 1;
389 break;
390 }
391 _ => {
392 break;
393 }
394 },
395 _ => {
396 break;
397 }
398 }
399
400 self.cursor += 1;
401 }
402 if !text.is_empty() {
403 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
404 source_range: source_range.clone(),
405 contents: text,
406 highlights,
407 regions,
408 region_ranges,
409 }));
410 }
411 markdown_text_like
412 }
413
414 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
415 let (_event, source_range) = self.previous().unwrap();
416 let source_range = source_range.clone();
417 let text = self.parse_text(true, None);
418
419 // Advance past the heading end tag
420 self.cursor += 1;
421
422 ParsedMarkdownHeading {
423 source_range: source_range.clone(),
424 level: match level {
425 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
426 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
427 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
428 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
429 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
430 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
431 },
432 contents: text,
433 }
434 }
435
436 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
437 let (_event, source_range) = self.previous().unwrap();
438 let source_range = source_range.clone();
439 let mut header = ParsedMarkdownTableRow::new();
440 let mut body = vec![];
441 let mut current_row = vec![];
442 let mut in_header = true;
443 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
444
445 loop {
446 if self.eof() {
447 break;
448 }
449
450 let (current, source_range) = self.current().unwrap();
451 let source_range = source_range.clone();
452 match current {
453 Event::Start(Tag::TableHead)
454 | Event::Start(Tag::TableRow)
455 | Event::End(TagEnd::TableCell) => {
456 self.cursor += 1;
457 }
458 Event::Start(Tag::TableCell) => {
459 self.cursor += 1;
460 let cell_contents = self.parse_text(false, Some(source_range));
461 current_row.push(cell_contents);
462 }
463 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
464 self.cursor += 1;
465 let new_row = std::mem::take(&mut current_row);
466 if in_header {
467 header.children = new_row;
468 in_header = false;
469 } else {
470 let row = ParsedMarkdownTableRow::with_children(new_row);
471 body.push(row);
472 }
473 }
474 Event::End(TagEnd::Table) => {
475 self.cursor += 1;
476 break;
477 }
478 _ => {
479 break;
480 }
481 }
482 }
483
484 ParsedMarkdownTable {
485 source_range,
486 header,
487 body,
488 column_alignments,
489 }
490 }
491
492 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
493 match alignment {
494 Alignment::None => ParsedMarkdownTableAlignment::None,
495 Alignment::Left => ParsedMarkdownTableAlignment::Left,
496 Alignment::Center => ParsedMarkdownTableAlignment::Center,
497 Alignment::Right => ParsedMarkdownTableAlignment::Right,
498 }
499 }
500
501 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
502 let (_, list_source_range) = self.previous().unwrap();
503
504 let mut items = Vec::new();
505 let mut items_stack = vec![MarkdownListItem::default()];
506 let mut depth = 1;
507 let mut order = order;
508 let mut order_stack = Vec::new();
509
510 let mut insertion_indices = FxHashMap::default();
511 let mut source_ranges = FxHashMap::default();
512 let mut start_item_range = list_source_range.clone();
513
514 while !self.eof() {
515 let (current, source_range) = self.current().unwrap();
516 match current {
517 Event::Start(Tag::List(new_order)) => {
518 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
519 insertion_indices.insert(depth, items.len());
520 }
521
522 // We will use the start of the nested list as the end for the current item's range,
523 // because we don't care about the hierarchy of list items
524 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
525 e.insert(start_item_range.start..source_range.start);
526 }
527
528 order_stack.push(order);
529 order = *new_order;
530 self.cursor += 1;
531 depth += 1;
532 }
533 Event::End(TagEnd::List(_)) => {
534 order = order_stack.pop().flatten();
535 self.cursor += 1;
536 depth -= 1;
537
538 if depth == 0 {
539 break;
540 }
541 }
542 Event::Start(Tag::Item) => {
543 start_item_range = source_range.clone();
544
545 self.cursor += 1;
546 items_stack.push(MarkdownListItem::default());
547
548 let mut task_list = None;
549 // Check for task list marker (`- [ ]` or `- [x]`)
550 if let Some(event) = self.current_event() {
551 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
552 if event == &Event::Start(Tag::Paragraph) {
553 self.cursor += 1;
554 }
555
556 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
557 task_list = Some((*checked, range.clone()));
558 self.cursor += 1;
559 }
560 }
561
562 if let Some((event, range)) = self.current() {
563 // This is a plain list item.
564 // For example `- some text` or `1. [Docs](./docs.md)`
565 if MarkdownParser::is_text_like(event) {
566 let text = self.parse_text(false, Some(range.clone()));
567 let block = ParsedMarkdownElement::Paragraph(text);
568 if let Some(content) = items_stack.last_mut() {
569 let item_type = if let Some((checked, range)) = task_list {
570 ParsedMarkdownListItemType::Task(checked, range)
571 } else if let Some(order) = order {
572 ParsedMarkdownListItemType::Ordered(order)
573 } else {
574 ParsedMarkdownListItemType::Unordered
575 };
576 content.item_type = item_type;
577 content.content.push(block);
578 }
579 } else {
580 let block = self.parse_block().await;
581 if let Some(block) = block
582 && let Some(list_item) = items_stack.last_mut()
583 {
584 list_item.content.extend(block);
585 }
586 }
587 }
588
589 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
590 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
591 self.cursor += 1;
592 }
593 }
594 Event::End(TagEnd::Item) => {
595 self.cursor += 1;
596
597 if let Some(current) = order {
598 order = Some(current + 1);
599 }
600
601 if let Some(list_item) = items_stack.pop() {
602 let source_range = source_ranges
603 .remove(&depth)
604 .unwrap_or(start_item_range.clone());
605
606 // We need to remove the last character of the source range, because it includes the newline character
607 let source_range = source_range.start..source_range.end - 1;
608 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
609 source_range,
610 content: list_item.content,
611 depth,
612 item_type: list_item.item_type,
613 });
614
615 if let Some(index) = insertion_indices.get(&depth) {
616 items.insert(*index, item);
617 insertion_indices.remove(&depth);
618 } else {
619 items.push(item);
620 }
621 }
622 }
623 _ => {
624 if depth == 0 {
625 break;
626 }
627 // This can only happen if a list item starts with more then one paragraph,
628 // or the list item contains blocks that should be rendered after the nested list items
629 let block = self.parse_block().await;
630 if let Some(block) = block {
631 if let Some(list_item) = items_stack.last_mut() {
632 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
633 if !insertion_indices.contains_key(&depth) {
634 list_item.content.extend(block);
635 continue;
636 }
637 }
638
639 // Otherwise we need to insert the block after all the nested items
640 // that have been parsed so far
641 items.extend(block);
642 } else {
643 self.cursor += 1;
644 }
645 }
646 }
647 }
648
649 items
650 }
651
652 #[async_recursion]
653 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
654 let (_event, source_range) = self.previous().unwrap();
655 let source_range = source_range.clone();
656 let mut nested_depth = 1;
657
658 let mut children: Vec<ParsedMarkdownElement> = vec![];
659
660 while !self.eof() {
661 let block = self.parse_block().await;
662
663 if let Some(block) = block {
664 children.extend(block);
665 } else {
666 break;
667 }
668
669 if self.eof() {
670 break;
671 }
672
673 let (current, _source_range) = self.current().unwrap();
674 match current {
675 // This is a nested block quote.
676 // Record that we're in a nested block quote and continue parsing.
677 // We don't need to advance the cursor since the next
678 // call to `parse_block` will handle it.
679 Event::Start(Tag::BlockQuote(_kind)) => {
680 nested_depth += 1;
681 }
682 Event::End(TagEnd::BlockQuote(_kind)) => {
683 nested_depth -= 1;
684 if nested_depth == 0 {
685 self.cursor += 1;
686 break;
687 }
688 }
689 _ => {}
690 };
691 }
692
693 ParsedMarkdownBlockQuote {
694 source_range,
695 children,
696 }
697 }
698
699 async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
700 let (_event, source_range) = self.previous().unwrap();
701 let source_range = source_range.clone();
702 let mut code = String::new();
703
704 while !self.eof() {
705 let (current, _source_range) = self.current().unwrap();
706 match current {
707 Event::Text(text) => {
708 code.push_str(text);
709 self.cursor += 1;
710 }
711 Event::End(TagEnd::CodeBlock) => {
712 self.cursor += 1;
713 break;
714 }
715 _ => {
716 break;
717 }
718 }
719 }
720
721 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
722
723 let highlights = if let Some(language) = &language {
724 if let Some(registry) = &self.language_registry {
725 let rope: language::Rope = code.as_str().into();
726 registry
727 .language_for_name_or_extension(language)
728 .await
729 .map(|l| l.highlight_text(&rope, 0..code.len()))
730 .ok()
731 } else {
732 None
733 }
734 } else {
735 None
736 };
737
738 ParsedMarkdownCodeBlock {
739 source_range,
740 contents: code.into(),
741 language,
742 highlights,
743 }
744 }
745}
746
747#[cfg(test)]
748mod tests {
749 use core::panic;
750
751 use super::*;
752
753 use ParsedMarkdownListItemType::*;
754 use gpui::BackgroundExecutor;
755 use language::{
756 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
757 };
758 use pretty_assertions::assert_eq;
759
760 async fn parse(input: &str) -> ParsedMarkdown {
761 parse_markdown(input, None, None).await
762 }
763
764 #[gpui::test]
765 async fn test_headings() {
766 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
767
768 assert_eq!(
769 parsed.children,
770 vec![
771 h1(text("Heading one", 2..13), 0..14),
772 h2(text("Heading two", 17..28), 14..29),
773 h3(text("Heading three", 33..46), 29..46),
774 ]
775 );
776 }
777
778 #[gpui::test]
779 async fn test_newlines_dont_new_paragraphs() {
780 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
781
782 assert_eq!(
783 parsed.children,
784 vec![p("Some text that is bolded and italicized", 0..46)]
785 );
786 }
787
788 #[gpui::test]
789 async fn test_heading_with_paragraph() {
790 let parsed = parse("# Zed\nThe editor").await;
791
792 assert_eq!(
793 parsed.children,
794 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
795 );
796 }
797
798 #[gpui::test]
799 async fn test_double_newlines_do_new_paragraphs() {
800 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
801
802 assert_eq!(
803 parsed.children,
804 vec![
805 p("Some text that is bolded", 0..29),
806 p("and italicized", 31..47),
807 ]
808 );
809 }
810
811 #[gpui::test]
812 async fn test_bold_italic_text() {
813 let parsed = parse("Some text **that is bolded** and *italicized*").await;
814
815 assert_eq!(
816 parsed.children,
817 vec![p("Some text that is bolded and italicized", 0..45)]
818 );
819 }
820
821 #[gpui::test]
822 async fn test_nested_bold_strikethrough_text() {
823 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
824
825 assert_eq!(parsed.children.len(), 1);
826 assert_eq!(
827 parsed.children[0],
828 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
829 ParsedMarkdownText {
830 source_range: 0..35,
831 contents: "Some bostrikethroughld text".to_string(),
832 highlights: Vec::new(),
833 region_ranges: Vec::new(),
834 regions: Vec::new(),
835 }
836 )])
837 );
838
839 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
840 text
841 } else {
842 panic!("Expected a paragraph");
843 };
844
845 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
846 text
847 } else {
848 panic!("Expected a text");
849 };
850
851 assert_eq!(
852 paragraph.highlights,
853 vec![
854 (
855 5..7,
856 MarkdownHighlight::Style(MarkdownHighlightStyle {
857 weight: FontWeight::BOLD,
858 ..Default::default()
859 }),
860 ),
861 (
862 7..20,
863 MarkdownHighlight::Style(MarkdownHighlightStyle {
864 weight: FontWeight::BOLD,
865 strikethrough: true,
866 ..Default::default()
867 }),
868 ),
869 (
870 20..22,
871 MarkdownHighlight::Style(MarkdownHighlightStyle {
872 weight: FontWeight::BOLD,
873 ..Default::default()
874 }),
875 ),
876 ]
877 );
878 }
879
880 #[gpui::test]
881 async fn test_text_with_inline_html() {
882 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
883
884 assert_eq!(
885 parsed.children,
886 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
887 );
888 }
889
890 #[gpui::test]
891 async fn test_raw_links_detection() {
892 let parsed = parse("Checkout this https://zed.dev link").await;
893
894 assert_eq!(
895 parsed.children,
896 vec![p("Checkout this https://zed.dev link", 0..34)]
897 );
898 }
899
900 #[gpui::test]
901 async fn test_empty_image() {
902 let parsed = parse("![]()").await;
903
904 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
905 text
906 } else {
907 panic!("Expected a paragraph");
908 };
909 assert_eq!(paragraph.len(), 0);
910 }
911
912 #[gpui::test]
913 async fn test_image_links_detection() {
914 let parsed = parse("").await;
915
916 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
917 text
918 } else {
919 panic!("Expected a paragraph");
920 };
921 assert_eq!(
922 paragraph[0],
923 MarkdownParagraphChunk::Image(Image {
924 source_range: 0..111,
925 link: Link::Web {
926 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
927 },
928 alt_text: Some("test".into()),
929 },)
930 );
931 }
932
933 #[gpui::test]
934 async fn test_image_without_alt_text() {
935 let parsed = parse("").await;
936
937 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
938 text
939 } else {
940 panic!("Expected a paragraph");
941 };
942 assert_eq!(
943 paragraph[0],
944 MarkdownParagraphChunk::Image(Image {
945 source_range: 0..31,
946 link: Link::Web {
947 url: "http://example.com/foo.png".to_string(),
948 },
949 alt_text: None,
950 },)
951 );
952 }
953
954 #[gpui::test]
955 async fn test_image_with_alt_text_containing_formatting() {
956 let parsed = parse("").await;
957
958 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
959 panic!("Expected a paragraph");
960 };
961 assert_eq!(
962 chunks,
963 &[MarkdownParagraphChunk::Image(Image {
964 source_range: 0..44,
965 link: Link::Web {
966 url: "http://example.com/foo.png".to_string(),
967 },
968 alt_text: Some("foo bar baz".into()),
969 }),],
970 );
971 }
972
973 #[gpui::test]
974 async fn test_images_with_text_in_between() {
975 let parsed = parse(
976 "\nLorem Ipsum\n",
977 )
978 .await;
979
980 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
981 text
982 } else {
983 panic!("Expected a paragraph");
984 };
985 assert_eq!(
986 chunks,
987 &vec![
988 MarkdownParagraphChunk::Image(Image {
989 source_range: 0..81,
990 link: Link::Web {
991 url: "http://example.com/foo.png".to_string(),
992 },
993 alt_text: Some("foo".into()),
994 }),
995 MarkdownParagraphChunk::Text(ParsedMarkdownText {
996 source_range: 0..81,
997 contents: " Lorem Ipsum ".to_string(),
998 highlights: Vec::new(),
999 region_ranges: Vec::new(),
1000 regions: Vec::new(),
1001 }),
1002 MarkdownParagraphChunk::Image(Image {
1003 source_range: 0..81,
1004 link: Link::Web {
1005 url: "http://example.com/bar.png".to_string(),
1006 },
1007 alt_text: Some("bar".into()),
1008 })
1009 ]
1010 );
1011 }
1012
1013 #[gpui::test]
1014 async fn test_header_only_table() {
1015 let markdown = "\
1016| Header 1 | Header 2 |
1017|----------|----------|
1018
1019Some other content
1020";
1021
1022 let expected_table = table(
1023 0..48,
1024 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1025 vec![],
1026 );
1027
1028 assert_eq!(
1029 parse(markdown).await.children[0],
1030 ParsedMarkdownElement::Table(expected_table)
1031 );
1032 }
1033
1034 #[gpui::test]
1035 async fn test_basic_table() {
1036 let markdown = "\
1037| Header 1 | Header 2 |
1038|----------|----------|
1039| Cell 1 | Cell 2 |
1040| Cell 3 | Cell 4 |";
1041
1042 let expected_table = table(
1043 0..95,
1044 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1045 vec![
1046 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1047 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1048 ],
1049 );
1050
1051 assert_eq!(
1052 parse(markdown).await.children[0],
1053 ParsedMarkdownElement::Table(expected_table)
1054 );
1055 }
1056
1057 #[gpui::test]
1058 async fn test_list_basic() {
1059 let parsed = parse(
1060 "\
1061* Item 1
1062* Item 2
1063* Item 3
1064",
1065 )
1066 .await;
1067
1068 assert_eq!(
1069 parsed.children,
1070 vec![
1071 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1072 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1073 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1074 ],
1075 );
1076 }
1077
1078 #[gpui::test]
1079 async fn test_list_with_tasks() {
1080 let parsed = parse(
1081 "\
1082- [ ] TODO
1083- [x] Checked
1084",
1085 )
1086 .await;
1087
1088 assert_eq!(
1089 parsed.children,
1090 vec![
1091 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1092 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1093 ],
1094 );
1095 }
1096
1097 #[gpui::test]
1098 async fn test_list_with_indented_task() {
1099 let parsed = parse(
1100 "\
1101- [ ] TODO
1102 - [x] Checked
1103 - Unordered
1104 1. Number 1
1105 1. Number 2
11061. Number A
1107",
1108 )
1109 .await;
1110
1111 assert_eq!(
1112 parsed.children,
1113 vec![
1114 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1115 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1116 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1117 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1118 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1119 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1120 ],
1121 );
1122 }
1123
1124 #[gpui::test]
1125 async fn test_list_with_linebreak_is_handled_correctly() {
1126 let parsed = parse(
1127 "\
1128- [ ] Task 1
1129
1130- [x] Task 2
1131",
1132 )
1133 .await;
1134
1135 assert_eq!(
1136 parsed.children,
1137 vec![
1138 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1139 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1140 ],
1141 );
1142 }
1143
1144 #[gpui::test]
1145 async fn test_list_nested() {
1146 let parsed = parse(
1147 "\
1148* Item 1
1149* Item 2
1150* Item 3
1151
11521. Hello
11531. Two
1154 1. Three
11552. Four
11563. Five
1157
1158* First
1159 1. Hello
1160 1. Goodbyte
1161 - Inner
1162 - Inner
1163 2. Goodbyte
1164 - Next item empty
1165 -
1166* Last
1167",
1168 )
1169 .await;
1170
1171 assert_eq!(
1172 parsed.children,
1173 vec![
1174 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1175 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1176 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1177 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1178 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1179 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1180 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1181 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1182 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1183 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1184 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1185 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1186 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1187 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1188 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1189 list_item(186..190, 3, Unordered, vec![]),
1190 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1191 ]
1192 );
1193 }
1194
1195 #[gpui::test]
1196 async fn test_list_with_nested_content() {
1197 let parsed = parse(
1198 "\
1199* This is a list item with two paragraphs.
1200
1201 This is the second paragraph in the list item.
1202",
1203 )
1204 .await;
1205
1206 assert_eq!(
1207 parsed.children,
1208 vec![list_item(
1209 0..96,
1210 1,
1211 Unordered,
1212 vec![
1213 p("This is a list item with two paragraphs.", 4..44),
1214 p("This is the second paragraph in the list item.", 50..97)
1215 ],
1216 ),],
1217 );
1218 }
1219
1220 #[gpui::test]
1221 async fn test_list_item_with_inline_html() {
1222 let parsed = parse(
1223 "\
1224* This is a list item with an inline HTML <sometag>tag</sometag>.
1225",
1226 )
1227 .await;
1228
1229 assert_eq!(
1230 parsed.children,
1231 vec![list_item(
1232 0..67,
1233 1,
1234 Unordered,
1235 vec![p("This is a list item with an inline HTML tag.", 4..44),],
1236 ),],
1237 );
1238 }
1239
1240 #[gpui::test]
1241 async fn test_nested_list_with_paragraph_inside() {
1242 let parsed = parse(
1243 "\
12441. a
1245 1. b
1246 1. c
1247
1248 text
1249
1250 1. d
1251",
1252 )
1253 .await;
1254
1255 assert_eq!(
1256 parsed.children,
1257 vec![
1258 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1259 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1260 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1261 p("text", 32..37),
1262 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1263 ],
1264 );
1265 }
1266
1267 #[gpui::test]
1268 async fn test_list_with_leading_text() {
1269 let parsed = parse(
1270 "\
1271* `code`
1272* **bold**
1273* [link](https://example.com)
1274",
1275 )
1276 .await;
1277
1278 assert_eq!(
1279 parsed.children,
1280 vec![
1281 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1282 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1283 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1284 ],
1285 );
1286 }
1287
1288 #[gpui::test]
1289 async fn test_simple_block_quote() {
1290 let parsed = parse("> Simple block quote with **styled text**").await;
1291
1292 assert_eq!(
1293 parsed.children,
1294 vec![block_quote(
1295 vec![p("Simple block quote with styled text", 2..41)],
1296 0..41
1297 )]
1298 );
1299 }
1300
1301 #[gpui::test]
1302 async fn test_simple_block_quote_with_multiple_lines() {
1303 let parsed = parse(
1304 "\
1305> # Heading
1306> More
1307> text
1308>
1309> More text
1310",
1311 )
1312 .await;
1313
1314 assert_eq!(
1315 parsed.children,
1316 vec![block_quote(
1317 vec![
1318 h1(text("Heading", 4..11), 2..12),
1319 p("More text", 14..26),
1320 p("More text", 30..40)
1321 ],
1322 0..40
1323 )]
1324 );
1325 }
1326
1327 #[gpui::test]
1328 async fn test_nested_block_quote() {
1329 let parsed = parse(
1330 "\
1331> A
1332>
1333> > # B
1334>
1335> C
1336
1337More text
1338",
1339 )
1340 .await;
1341
1342 assert_eq!(
1343 parsed.children,
1344 vec![
1345 block_quote(
1346 vec![
1347 p("A", 2..4),
1348 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1349 p("C", 18..20)
1350 ],
1351 0..20
1352 ),
1353 p("More text", 21..31)
1354 ]
1355 );
1356 }
1357
1358 #[gpui::test]
1359 async fn test_code_block() {
1360 let parsed = parse(
1361 "\
1362```
1363fn main() {
1364 return 0;
1365}
1366```
1367",
1368 )
1369 .await;
1370
1371 assert_eq!(
1372 parsed.children,
1373 vec![code_block(
1374 None,
1375 "fn main() {\n return 0;\n}",
1376 0..35,
1377 None
1378 )]
1379 );
1380 }
1381
1382 #[gpui::test]
1383 async fn test_code_block_with_language(executor: BackgroundExecutor) {
1384 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1385 language_registry.add(rust_lang());
1386
1387 let parsed = parse_markdown(
1388 "\
1389```rust
1390fn main() {
1391 return 0;
1392}
1393```
1394",
1395 None,
1396 Some(language_registry),
1397 )
1398 .await;
1399
1400 assert_eq!(
1401 parsed.children,
1402 vec![code_block(
1403 Some("rust".to_string()),
1404 "fn main() {\n return 0;\n}",
1405 0..39,
1406 Some(vec![])
1407 )]
1408 );
1409 }
1410
1411 fn rust_lang() -> Arc<Language> {
1412 Arc::new(Language::new(
1413 LanguageConfig {
1414 name: "Rust".into(),
1415 matcher: LanguageMatcher {
1416 path_suffixes: vec!["rs".into()],
1417 ..Default::default()
1418 },
1419 collapsed_placeholder: " /* ... */ ".to_string(),
1420 ..Default::default()
1421 },
1422 Some(tree_sitter_rust::LANGUAGE.into()),
1423 ))
1424 }
1425
1426 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1427 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1428 source_range,
1429 level: HeadingLevel::H1,
1430 contents,
1431 })
1432 }
1433
1434 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1435 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1436 source_range,
1437 level: HeadingLevel::H2,
1438 contents,
1439 })
1440 }
1441
1442 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1443 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1444 source_range,
1445 level: HeadingLevel::H3,
1446 contents,
1447 })
1448 }
1449
1450 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1451 ParsedMarkdownElement::Paragraph(text(contents, source_range))
1452 }
1453
1454 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1455 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1456 highlights: Vec::new(),
1457 region_ranges: Vec::new(),
1458 regions: Vec::new(),
1459 source_range,
1460 contents: contents.to_string(),
1461 })]
1462 }
1463
1464 fn block_quote(
1465 children: Vec<ParsedMarkdownElement>,
1466 source_range: Range<usize>,
1467 ) -> ParsedMarkdownElement {
1468 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1469 source_range,
1470 children,
1471 })
1472 }
1473
1474 fn code_block(
1475 language: Option<String>,
1476 code: &str,
1477 source_range: Range<usize>,
1478 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1479 ) -> ParsedMarkdownElement {
1480 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1481 source_range,
1482 language,
1483 contents: code.to_string().into(),
1484 highlights,
1485 })
1486 }
1487
1488 fn list_item(
1489 source_range: Range<usize>,
1490 depth: u16,
1491 item_type: ParsedMarkdownListItemType,
1492 content: Vec<ParsedMarkdownElement>,
1493 ) -> ParsedMarkdownElement {
1494 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1495 source_range,
1496 item_type,
1497 depth,
1498 content,
1499 })
1500 }
1501
1502 fn table(
1503 source_range: Range<usize>,
1504 header: ParsedMarkdownTableRow,
1505 body: Vec<ParsedMarkdownTableRow>,
1506 ) -> ParsedMarkdownTable {
1507 ParsedMarkdownTable {
1508 column_alignments: Vec::new(),
1509 source_range,
1510 header,
1511 body,
1512 }
1513 }
1514
1515 fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
1516 ParsedMarkdownTableRow { children }
1517 }
1518
1519 impl PartialEq for ParsedMarkdownTable {
1520 fn eq(&self, other: &Self) -> bool {
1521 self.source_range == other.source_range
1522 && self.header == other.header
1523 && self.body == other.body
1524 }
1525 }
1526
1527 impl PartialEq for ParsedMarkdownText {
1528 fn eq(&self, other: &Self) -> bool {
1529 self.source_range == other.source_range && self.contents == other.contents
1530 }
1531 }
1532}