1use crate::markdown_elements::*;
2use async_recursion::async_recursion;
3use collections::FxHashMap;
4use gpui::FontWeight;
5use language::LanguageRegistry;
6use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
7use std::{ops::Range, path::PathBuf, sync::Arc, vec};
8
9pub async fn parse_markdown(
10 markdown_input: &str,
11 file_location_directory: Option<PathBuf>,
12 language_registry: Option<Arc<LanguageRegistry>>,
13) -> ParsedMarkdown {
14 let mut options = Options::all();
15 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
16
17 let parser = Parser::new_ext(markdown_input, options);
18 let parser = MarkdownParser::new(
19 parser.into_offset_iter().collect(),
20 file_location_directory,
21 language_registry,
22 );
23 let renderer = parser.parse_document().await;
24 ParsedMarkdown {
25 children: renderer.parsed,
26 }
27}
28
29struct MarkdownParser<'a> {
30 tokens: Vec<(Event<'a>, Range<usize>)>,
31 /// The current index in the tokens array
32 cursor: usize,
33 /// The blocks that we have successfully parsed so far
34 parsed: Vec<ParsedMarkdownElement>,
35 file_location_directory: Option<PathBuf>,
36 language_registry: Option<Arc<LanguageRegistry>>,
37}
38
39struct MarkdownListItem {
40 content: Vec<ParsedMarkdownElement>,
41 item_type: ParsedMarkdownListItemType,
42}
43
44impl Default for MarkdownListItem {
45 fn default() -> Self {
46 Self {
47 content: Vec::new(),
48 item_type: ParsedMarkdownListItemType::Unordered,
49 }
50 }
51}
52
53impl<'a> MarkdownParser<'a> {
54 fn new(
55 tokens: Vec<(Event<'a>, Range<usize>)>,
56 file_location_directory: Option<PathBuf>,
57 language_registry: Option<Arc<LanguageRegistry>>,
58 ) -> Self {
59 Self {
60 tokens,
61 file_location_directory,
62 language_registry,
63 cursor: 0,
64 parsed: vec![],
65 }
66 }
67
68 fn eof(&self) -> bool {
69 if self.tokens.is_empty() {
70 return true;
71 }
72 self.cursor >= self.tokens.len() - 1
73 }
74
75 fn peek(&self, steps: usize) -> Option<&(Event, Range<usize>)> {
76 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
77 return self.tokens.last();
78 }
79 return self.tokens.get(self.cursor + steps);
80 }
81
82 fn previous(&self) -> Option<&(Event, Range<usize>)> {
83 if self.cursor == 0 || self.cursor > self.tokens.len() {
84 return None;
85 }
86 return self.tokens.get(self.cursor - 1);
87 }
88
89 fn current(&self) -> Option<&(Event, Range<usize>)> {
90 return self.peek(0);
91 }
92
93 fn current_event(&self) -> Option<&Event> {
94 return self.current().map(|(event, _)| event);
95 }
96
97 fn is_text_like(event: &Event) -> bool {
98 match event {
99 Event::Text(_)
100 // Represent an inline code block
101 | Event::Code(_)
102 | Event::Html(_)
103 | Event::FootnoteReference(_)
104 | Event::Start(Tag::Link { .. })
105 | Event::Start(Tag::Emphasis)
106 | Event::Start(Tag::Strong)
107 | Event::Start(Tag::Strikethrough)
108 | Event::Start(Tag::Image { .. }) => {
109 true
110 }
111 _ => false,
112 }
113 }
114
115 async fn parse_document(mut self) -> Self {
116 while !self.eof() {
117 if let Some(block) = self.parse_block().await {
118 self.parsed.extend(block);
119 } else {
120 self.cursor += 1;
121 }
122 }
123 self
124 }
125
126 #[async_recursion]
127 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
128 let (current, source_range) = self.current().unwrap();
129 let source_range = source_range.clone();
130 match current {
131 Event::Start(tag) => match tag {
132 Tag::Paragraph => {
133 self.cursor += 1;
134 let text = self.parse_text(false, Some(source_range));
135 Some(vec![ParsedMarkdownElement::Paragraph(text)])
136 }
137 Tag::Heading { level, .. } => {
138 let level = *level;
139 self.cursor += 1;
140 let heading = self.parse_heading(level);
141 Some(vec![ParsedMarkdownElement::Heading(heading)])
142 }
143 Tag::Table(alignment) => {
144 let alignment = alignment.clone();
145 self.cursor += 1;
146 let table = self.parse_table(alignment);
147 Some(vec![ParsedMarkdownElement::Table(table)])
148 }
149 Tag::List(order) => {
150 let order = *order;
151 self.cursor += 1;
152 let list = self.parse_list(order).await;
153 Some(list)
154 }
155 Tag::BlockQuote(_kind) => {
156 self.cursor += 1;
157 let block_quote = self.parse_block_quote().await;
158 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
159 }
160 Tag::CodeBlock(kind) => {
161 let language = match kind {
162 pulldown_cmark::CodeBlockKind::Indented => None,
163 pulldown_cmark::CodeBlockKind::Fenced(language) => {
164 if language.is_empty() {
165 None
166 } else {
167 Some(language.to_string())
168 }
169 }
170 };
171
172 self.cursor += 1;
173
174 let code_block = self.parse_code_block(language).await;
175 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
176 }
177 _ => None,
178 },
179 Event::Rule => {
180 let source_range = source_range.clone();
181 self.cursor += 1;
182 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
183 }
184 _ => None,
185 }
186 }
187
188 fn parse_text(
189 &mut self,
190 should_complete_on_soft_break: bool,
191 source_range: Option<Range<usize>>,
192 ) -> MarkdownParagraph {
193 let source_range = source_range.unwrap_or_else(|| {
194 self.current()
195 .map(|(_, range)| range.clone())
196 .unwrap_or_default()
197 });
198
199 let mut markdown_text_like = Vec::new();
200 let mut text = String::new();
201 let mut bold_depth = 0;
202 let mut italic_depth = 0;
203 let mut strikethrough_depth = 0;
204 let mut link: Option<Link> = None;
205 let mut image: Option<Image> = None;
206 let mut region_ranges: Vec<Range<usize>> = vec![];
207 let mut regions: Vec<ParsedRegion> = vec![];
208 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
209 let mut link_urls: Vec<String> = vec![];
210 let mut link_ranges: Vec<Range<usize>> = vec![];
211
212 loop {
213 if self.eof() {
214 break;
215 }
216
217 let (current, _source_range) = self.current().unwrap();
218 let prev_len = text.len();
219 match current {
220 Event::SoftBreak => {
221 if should_complete_on_soft_break {
222 break;
223 }
224 text.push(' ');
225 }
226
227 Event::HardBreak => {
228 text.push('\n');
229 }
230
231 // We want to ignore any inline HTML tags in the text but keep
232 // the text between them
233 Event::InlineHtml(_) => {}
234
235 Event::Text(t) => {
236 text.push_str(t.as_ref());
237 let mut style = MarkdownHighlightStyle::default();
238
239 if bold_depth > 0 {
240 style.weight = FontWeight::BOLD;
241 }
242
243 if italic_depth > 0 {
244 style.italic = true;
245 }
246
247 if strikethrough_depth > 0 {
248 style.strikethrough = true;
249 }
250
251 let last_run_len = if let Some(link) = link.clone() {
252 region_ranges.push(prev_len..text.len());
253 regions.push(ParsedRegion {
254 code: false,
255 link: Some(link),
256 });
257 style.underline = true;
258 prev_len
259 } else {
260 // Manually scan for links
261 let mut finder = linkify::LinkFinder::new();
262 finder.kinds(&[linkify::LinkKind::Url]);
263 let mut last_link_len = prev_len;
264 for link in finder.links(t) {
265 let start = link.start();
266 let end = link.end();
267 let range = (prev_len + start)..(prev_len + end);
268 link_ranges.push(range.clone());
269 link_urls.push(link.as_str().to_string());
270
271 // If there is a style before we match a link, we have to add this to the highlighted ranges
272 if style != MarkdownHighlightStyle::default()
273 && last_link_len < link.start()
274 {
275 highlights.push((
276 last_link_len..link.start(),
277 MarkdownHighlight::Style(style.clone()),
278 ));
279 }
280
281 highlights.push((
282 range.clone(),
283 MarkdownHighlight::Style(MarkdownHighlightStyle {
284 underline: true,
285 ..style
286 }),
287 ));
288 region_ranges.push(range.clone());
289 regions.push(ParsedRegion {
290 code: false,
291 link: Some(Link::Web {
292 url: link.as_str().to_string(),
293 }),
294 });
295 last_link_len = end;
296 }
297 last_link_len
298 };
299
300 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
301 let mut new_highlight = true;
302 if let Some((last_range, last_style)) = highlights.last_mut() {
303 if last_range.end == last_run_len
304 && last_style == &MarkdownHighlight::Style(style.clone())
305 {
306 last_range.end = text.len();
307 new_highlight = false;
308 }
309 }
310 if new_highlight {
311 highlights.push((
312 last_run_len..text.len(),
313 MarkdownHighlight::Style(style.clone()),
314 ));
315 }
316 }
317 if let Some(mut image) = image.clone() {
318 let is_valid_image = match image.clone() {
319 Image::Path { display_path, .. } => {
320 gpui::ImageSource::try_from(display_path).is_ok()
321 }
322 Image::Web { url, .. } => gpui::ImageSource::try_from(url).is_ok(),
323 };
324 if is_valid_image {
325 text.truncate(text.len() - t.len());
326 if !t.is_empty() {
327 let alt_text = ParsedMarkdownText {
328 source_range: source_range.clone(),
329 contents: t.to_string(),
330 highlights: highlights.clone(),
331 region_ranges: region_ranges.clone(),
332 regions: regions.clone(),
333 };
334 image = image.with_alt_text(alt_text);
335 } else {
336 let alt_text = ParsedMarkdownText {
337 source_range: source_range.clone(),
338 contents: "img".to_string(),
339 highlights: highlights.clone(),
340 region_ranges: region_ranges.clone(),
341 regions: regions.clone(),
342 };
343 image = image.with_alt_text(alt_text);
344 }
345 if !text.is_empty() {
346 let parsed_regions =
347 MarkdownParagraphChunk::Text(ParsedMarkdownText {
348 source_range: source_range.clone(),
349 contents: text.clone(),
350 highlights: highlights.clone(),
351 region_ranges: region_ranges.clone(),
352 regions: regions.clone(),
353 });
354 text = String::new();
355 highlights = vec![];
356 region_ranges = vec![];
357 regions = vec![];
358 markdown_text_like.push(parsed_regions);
359 }
360
361 let parsed_image = MarkdownParagraphChunk::Image(image.clone());
362 markdown_text_like.push(parsed_image);
363 style = MarkdownHighlightStyle::default();
364 }
365 style.underline = true;
366 };
367 }
368 Event::Code(t) => {
369 text.push_str(t.as_ref());
370 region_ranges.push(prev_len..text.len());
371
372 if link.is_some() {
373 highlights.push((
374 prev_len..text.len(),
375 MarkdownHighlight::Style(MarkdownHighlightStyle {
376 underline: true,
377 ..Default::default()
378 }),
379 ));
380 }
381 regions.push(ParsedRegion {
382 code: true,
383 link: link.clone(),
384 });
385 }
386 Event::Start(tag) => match tag {
387 Tag::Emphasis => italic_depth += 1,
388 Tag::Strong => bold_depth += 1,
389 Tag::Strikethrough => strikethrough_depth += 1,
390 Tag::Link { dest_url, .. } => {
391 link = Link::identify(
392 self.file_location_directory.clone(),
393 dest_url.to_string(),
394 );
395 }
396 Tag::Image { dest_url, .. } => {
397 image = Image::identify(
398 source_range.clone(),
399 self.file_location_directory.clone(),
400 dest_url.to_string(),
401 link.clone(),
402 );
403 }
404 _ => {
405 break;
406 }
407 },
408
409 Event::End(tag) => match tag {
410 TagEnd::Emphasis => italic_depth -= 1,
411 TagEnd::Strong => bold_depth -= 1,
412 TagEnd::Strikethrough => strikethrough_depth -= 1,
413 TagEnd::Link => {
414 link = None;
415 }
416 TagEnd::Image => {
417 image = None;
418 }
419 TagEnd::Paragraph => {
420 self.cursor += 1;
421 break;
422 }
423 _ => {
424 break;
425 }
426 },
427 _ => {
428 break;
429 }
430 }
431
432 self.cursor += 1;
433 }
434 if !text.is_empty() {
435 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
436 source_range: source_range.clone(),
437 contents: text,
438 highlights,
439 regions,
440 region_ranges,
441 }));
442 }
443 markdown_text_like
444 }
445
446 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
447 let (_event, source_range) = self.previous().unwrap();
448 let source_range = source_range.clone();
449 let text = self.parse_text(true, None);
450
451 // Advance past the heading end tag
452 self.cursor += 1;
453
454 ParsedMarkdownHeading {
455 source_range: source_range.clone(),
456 level: match level {
457 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
458 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
459 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
460 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
461 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
462 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
463 },
464 contents: text,
465 }
466 }
467
468 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
469 let (_event, source_range) = self.previous().unwrap();
470 let source_range = source_range.clone();
471 let mut header = ParsedMarkdownTableRow::new();
472 let mut body = vec![];
473 let mut current_row = vec![];
474 let mut in_header = true;
475 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
476
477 loop {
478 if self.eof() {
479 break;
480 }
481
482 let (current, source_range) = self.current().unwrap();
483 let source_range = source_range.clone();
484 match current {
485 Event::Start(Tag::TableHead)
486 | Event::Start(Tag::TableRow)
487 | Event::End(TagEnd::TableCell) => {
488 self.cursor += 1;
489 }
490 Event::Start(Tag::TableCell) => {
491 self.cursor += 1;
492 let cell_contents = self.parse_text(false, Some(source_range));
493 current_row.push(cell_contents);
494 }
495 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
496 self.cursor += 1;
497 let new_row = std::mem::take(&mut current_row);
498 if in_header {
499 header.children = new_row;
500 in_header = false;
501 } else {
502 let row = ParsedMarkdownTableRow::with_children(new_row);
503 body.push(row);
504 }
505 }
506 Event::End(TagEnd::Table) => {
507 self.cursor += 1;
508 break;
509 }
510 _ => {
511 break;
512 }
513 }
514 }
515
516 ParsedMarkdownTable {
517 source_range,
518 header,
519 body,
520 column_alignments,
521 }
522 }
523
524 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
525 match alignment {
526 Alignment::None => ParsedMarkdownTableAlignment::None,
527 Alignment::Left => ParsedMarkdownTableAlignment::Left,
528 Alignment::Center => ParsedMarkdownTableAlignment::Center,
529 Alignment::Right => ParsedMarkdownTableAlignment::Right,
530 }
531 }
532
533 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
534 let (_, list_source_range) = self.previous().unwrap();
535
536 let mut items = Vec::new();
537 let mut items_stack = vec![MarkdownListItem::default()];
538 let mut depth = 1;
539 let mut order = order;
540 let mut order_stack = Vec::new();
541
542 let mut insertion_indices = FxHashMap::default();
543 let mut source_ranges = FxHashMap::default();
544 let mut start_item_range = list_source_range.clone();
545
546 while !self.eof() {
547 let (current, source_range) = self.current().unwrap();
548 match current {
549 Event::Start(Tag::List(new_order)) => {
550 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
551 insertion_indices.insert(depth, items.len());
552 }
553
554 // We will use the start of the nested list as the end for the current item's range,
555 // because we don't care about the hierarchy of list items
556 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
557 e.insert(start_item_range.start..source_range.start);
558 }
559
560 order_stack.push(order);
561 order = *new_order;
562 self.cursor += 1;
563 depth += 1;
564 }
565 Event::End(TagEnd::List(_)) => {
566 order = order_stack.pop().flatten();
567 self.cursor += 1;
568 depth -= 1;
569
570 if depth == 0 {
571 break;
572 }
573 }
574 Event::Start(Tag::Item) => {
575 start_item_range = source_range.clone();
576
577 self.cursor += 1;
578 items_stack.push(MarkdownListItem::default());
579
580 let mut task_list = None;
581 // Check for task list marker (`- [ ]` or `- [x]`)
582 if let Some(event) = self.current_event() {
583 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
584 if event == &Event::Start(Tag::Paragraph) {
585 self.cursor += 1;
586 }
587
588 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
589 task_list = Some((*checked, range.clone()));
590 self.cursor += 1;
591 }
592 }
593
594 if let Some((event, range)) = self.current() {
595 // This is a plain list item.
596 // For example `- some text` or `1. [Docs](./docs.md)`
597 if MarkdownParser::is_text_like(event) {
598 let text = self.parse_text(false, Some(range.clone()));
599 let block = ParsedMarkdownElement::Paragraph(text);
600 if let Some(content) = items_stack.last_mut() {
601 let item_type = if let Some((checked, range)) = task_list {
602 ParsedMarkdownListItemType::Task(checked, range)
603 } else if let Some(order) = order {
604 ParsedMarkdownListItemType::Ordered(order)
605 } else {
606 ParsedMarkdownListItemType::Unordered
607 };
608 content.item_type = item_type;
609 content.content.push(block);
610 }
611 } else {
612 let block = self.parse_block().await;
613 if let Some(block) = block {
614 if let Some(list_item) = items_stack.last_mut() {
615 list_item.content.extend(block);
616 }
617 }
618 }
619 }
620
621 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
622 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
623 self.cursor += 1;
624 }
625 }
626 Event::End(TagEnd::Item) => {
627 self.cursor += 1;
628
629 if let Some(current) = order {
630 order = Some(current + 1);
631 }
632
633 if let Some(list_item) = items_stack.pop() {
634 let source_range = source_ranges
635 .remove(&depth)
636 .unwrap_or(start_item_range.clone());
637
638 // We need to remove the last character of the source range, because it includes the newline character
639 let source_range = source_range.start..source_range.end - 1;
640 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
641 source_range,
642 content: list_item.content,
643 depth,
644 item_type: list_item.item_type,
645 });
646
647 if let Some(index) = insertion_indices.get(&depth) {
648 items.insert(*index, item);
649 insertion_indices.remove(&depth);
650 } else {
651 items.push(item);
652 }
653 }
654 }
655 _ => {
656 if depth == 0 {
657 break;
658 }
659 // This can only happen if a list item starts with more then one paragraph,
660 // or the list item contains blocks that should be rendered after the nested list items
661 let block = self.parse_block().await;
662 if let Some(block) = block {
663 if let Some(list_item) = items_stack.last_mut() {
664 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
665 if !insertion_indices.contains_key(&depth) {
666 list_item.content.extend(block);
667 continue;
668 }
669 }
670
671 // Otherwise we need to insert the block after all the nested items
672 // that have been parsed so far
673 items.extend(block);
674 } else {
675 self.cursor += 1;
676 }
677 }
678 }
679 }
680
681 items
682 }
683
684 #[async_recursion]
685 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
686 let (_event, source_range) = self.previous().unwrap();
687 let source_range = source_range.clone();
688 let mut nested_depth = 1;
689
690 let mut children: Vec<ParsedMarkdownElement> = vec![];
691
692 while !self.eof() {
693 let block = self.parse_block().await;
694
695 if let Some(block) = block {
696 children.extend(block);
697 } else {
698 break;
699 }
700
701 if self.eof() {
702 break;
703 }
704
705 let (current, _source_range) = self.current().unwrap();
706 match current {
707 // This is a nested block quote.
708 // Record that we're in a nested block quote and continue parsing.
709 // We don't need to advance the cursor since the next
710 // call to `parse_block` will handle it.
711 Event::Start(Tag::BlockQuote(_kind)) => {
712 nested_depth += 1;
713 }
714 Event::End(TagEnd::BlockQuote(_kind)) => {
715 nested_depth -= 1;
716 if nested_depth == 0 {
717 self.cursor += 1;
718 break;
719 }
720 }
721 _ => {}
722 };
723 }
724
725 ParsedMarkdownBlockQuote {
726 source_range,
727 children,
728 }
729 }
730
731 async fn parse_code_block(&mut self, language: Option<String>) -> ParsedMarkdownCodeBlock {
732 let (_event, source_range) = self.previous().unwrap();
733 let source_range = source_range.clone();
734 let mut code = String::new();
735
736 while !self.eof() {
737 let (current, _source_range) = self.current().unwrap();
738 match current {
739 Event::Text(text) => {
740 code.push_str(text);
741 self.cursor += 1;
742 }
743 Event::End(TagEnd::CodeBlock) => {
744 self.cursor += 1;
745 break;
746 }
747 _ => {
748 break;
749 }
750 }
751 }
752 let highlights = if let Some(language) = &language {
753 if let Some(registry) = &self.language_registry {
754 let rope: language::Rope = code.as_str().into();
755 registry
756 .language_for_name_or_extension(language)
757 .await
758 .map(|l| l.highlight_text(&rope, 0..code.len()))
759 .ok()
760 } else {
761 None
762 }
763 } else {
764 None
765 };
766
767 ParsedMarkdownCodeBlock {
768 source_range,
769 contents: code.trim().to_string().into(),
770 language,
771 highlights,
772 }
773 }
774}
775
776#[cfg(test)]
777mod tests {
778 use core::panic;
779
780 use super::*;
781
782 use gpui::BackgroundExecutor;
783 use language::{
784 tree_sitter_rust, HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry,
785 };
786 use pretty_assertions::assert_eq;
787 use ParsedMarkdownListItemType::*;
788
789 async fn parse(input: &str) -> ParsedMarkdown {
790 parse_markdown(input, None, None).await
791 }
792
793 #[gpui::test]
794 async fn test_headings() {
795 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
796
797 assert_eq!(
798 parsed.children,
799 vec![
800 h1(text("Heading one", 2..13), 0..14),
801 h2(text("Heading two", 17..28), 14..29),
802 h3(text("Heading three", 33..46), 29..46),
803 ]
804 );
805 }
806
807 #[gpui::test]
808 async fn test_newlines_dont_new_paragraphs() {
809 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
810
811 assert_eq!(
812 parsed.children,
813 vec![p("Some text that is bolded and italicized", 0..46)]
814 );
815 }
816
817 #[gpui::test]
818 async fn test_heading_with_paragraph() {
819 let parsed = parse("# Zed\nThe editor").await;
820
821 assert_eq!(
822 parsed.children,
823 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
824 );
825 }
826
827 #[gpui::test]
828 async fn test_double_newlines_do_new_paragraphs() {
829 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
830
831 assert_eq!(
832 parsed.children,
833 vec![
834 p("Some text that is bolded", 0..29),
835 p("and italicized", 31..47),
836 ]
837 );
838 }
839
840 #[gpui::test]
841 async fn test_bold_italic_text() {
842 let parsed = parse("Some text **that is bolded** and *italicized*").await;
843
844 assert_eq!(
845 parsed.children,
846 vec![p("Some text that is bolded and italicized", 0..45)]
847 );
848 }
849
850 #[gpui::test]
851 async fn test_nested_bold_strikethrough_text() {
852 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
853
854 assert_eq!(parsed.children.len(), 1);
855 assert_eq!(
856 parsed.children[0],
857 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
858 ParsedMarkdownText {
859 source_range: 0..35,
860 contents: "Some bostrikethroughld text".to_string(),
861 highlights: Vec::new(),
862 region_ranges: Vec::new(),
863 regions: Vec::new(),
864 }
865 )])
866 );
867
868 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
869 text
870 } else {
871 panic!("Expected a paragraph");
872 };
873
874 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
875 text
876 } else {
877 panic!("Expected a text");
878 };
879
880 assert_eq!(
881 paragraph.highlights,
882 vec![
883 (
884 5..7,
885 MarkdownHighlight::Style(MarkdownHighlightStyle {
886 weight: FontWeight::BOLD,
887 ..Default::default()
888 }),
889 ),
890 (
891 7..20,
892 MarkdownHighlight::Style(MarkdownHighlightStyle {
893 weight: FontWeight::BOLD,
894 strikethrough: true,
895 ..Default::default()
896 }),
897 ),
898 (
899 20..22,
900 MarkdownHighlight::Style(MarkdownHighlightStyle {
901 weight: FontWeight::BOLD,
902 ..Default::default()
903 }),
904 ),
905 ]
906 );
907 }
908
909 #[gpui::test]
910 async fn test_text_with_inline_html() {
911 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
912
913 assert_eq!(
914 parsed.children,
915 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
916 );
917 }
918
919 #[gpui::test]
920 async fn test_raw_links_detection() {
921 let parsed = parse("Checkout this https://zed.dev link").await;
922
923 assert_eq!(
924 parsed.children,
925 vec![p("Checkout this https://zed.dev link", 0..34)]
926 );
927 }
928
929 #[gpui::test]
930 async fn test_image_links_detection() {
931 let parsed = parse("").await;
932
933 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
934 text
935 } else {
936 panic!("Expected a paragraph");
937 };
938 assert_eq!(
939 paragraph[0],
940 MarkdownParagraphChunk::Image(Image::Web {
941 source_range: 0..111,
942 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
943 link: None,
944 alt_text: Some(
945 ParsedMarkdownText {
946 source_range: 0..111,
947 contents: "test".to_string(),
948 highlights: vec![],
949 region_ranges: vec![],
950 regions: vec![],
951 },
952 ),
953 },)
954 );
955 }
956
957 #[gpui::test]
958 async fn test_header_only_table() {
959 let markdown = "\
960| Header 1 | Header 2 |
961|----------|----------|
962
963Some other content
964";
965
966 let expected_table = table(
967 0..48,
968 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
969 vec![],
970 );
971
972 assert_eq!(
973 parse(markdown).await.children[0],
974 ParsedMarkdownElement::Table(expected_table)
975 );
976 }
977
978 #[gpui::test]
979 async fn test_basic_table() {
980 let markdown = "\
981| Header 1 | Header 2 |
982|----------|----------|
983| Cell 1 | Cell 2 |
984| Cell 3 | Cell 4 |";
985
986 let expected_table = table(
987 0..95,
988 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
989 vec![
990 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
991 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
992 ],
993 );
994
995 assert_eq!(
996 parse(markdown).await.children[0],
997 ParsedMarkdownElement::Table(expected_table)
998 );
999 }
1000
1001 #[gpui::test]
1002 async fn test_list_basic() {
1003 let parsed = parse(
1004 "\
1005* Item 1
1006* Item 2
1007* Item 3
1008",
1009 )
1010 .await;
1011
1012 assert_eq!(
1013 parsed.children,
1014 vec![
1015 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1016 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1017 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1018 ],
1019 );
1020 }
1021
1022 #[gpui::test]
1023 async fn test_list_with_tasks() {
1024 let parsed = parse(
1025 "\
1026- [ ] TODO
1027- [x] Checked
1028",
1029 )
1030 .await;
1031
1032 assert_eq!(
1033 parsed.children,
1034 vec![
1035 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1036 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1037 ],
1038 );
1039 }
1040
1041 #[gpui::test]
1042 async fn test_list_with_indented_task() {
1043 let parsed = parse(
1044 "\
1045- [ ] TODO
1046 - [x] Checked
1047 - Unordered
1048 1. Number 1
1049 1. Number 2
10501. Number A
1051",
1052 )
1053 .await;
1054
1055 assert_eq!(
1056 parsed.children,
1057 vec![
1058 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1059 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1060 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1061 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1062 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1063 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1064 ],
1065 );
1066 }
1067
1068 #[gpui::test]
1069 async fn test_list_with_linebreak_is_handled_correctly() {
1070 let parsed = parse(
1071 "\
1072- [ ] Task 1
1073
1074- [x] Task 2
1075",
1076 )
1077 .await;
1078
1079 assert_eq!(
1080 parsed.children,
1081 vec![
1082 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1083 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1084 ],
1085 );
1086 }
1087
1088 #[gpui::test]
1089 async fn test_list_nested() {
1090 let parsed = parse(
1091 "\
1092* Item 1
1093* Item 2
1094* Item 3
1095
10961. Hello
10971. Two
1098 1. Three
10992. Four
11003. Five
1101
1102* First
1103 1. Hello
1104 1. Goodbyte
1105 - Inner
1106 - Inner
1107 2. Goodbyte
1108 - Next item empty
1109 -
1110* Last
1111",
1112 )
1113 .await;
1114
1115 assert_eq!(
1116 parsed.children,
1117 vec![
1118 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1119 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1120 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1121 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1122 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1123 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1124 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1125 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1126 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1127 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1128 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1129 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1130 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1131 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1132 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1133 list_item(186..190, 3, Unordered, vec![]),
1134 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1135 ]
1136 );
1137 }
1138
1139 #[gpui::test]
1140 async fn test_list_with_nested_content() {
1141 let parsed = parse(
1142 "\
1143* This is a list item with two paragraphs.
1144
1145 This is the second paragraph in the list item.
1146",
1147 )
1148 .await;
1149
1150 assert_eq!(
1151 parsed.children,
1152 vec![list_item(
1153 0..96,
1154 1,
1155 Unordered,
1156 vec![
1157 p("This is a list item with two paragraphs.", 4..44),
1158 p("This is the second paragraph in the list item.", 50..97)
1159 ],
1160 ),],
1161 );
1162 }
1163
1164 #[gpui::test]
1165 async fn test_list_item_with_inline_html() {
1166 let parsed = parse(
1167 "\
1168* This is a list item with an inline HTML <sometag>tag</sometag>.
1169",
1170 )
1171 .await;
1172
1173 assert_eq!(
1174 parsed.children,
1175 vec![list_item(
1176 0..67,
1177 1,
1178 Unordered,
1179 vec![p("This is a list item with an inline HTML tag.", 4..44),],
1180 ),],
1181 );
1182 }
1183
1184 #[gpui::test]
1185 async fn test_nested_list_with_paragraph_inside() {
1186 let parsed = parse(
1187 "\
11881. a
1189 1. b
1190 1. c
1191
1192 text
1193
1194 1. d
1195",
1196 )
1197 .await;
1198
1199 assert_eq!(
1200 parsed.children,
1201 vec![
1202 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1203 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1204 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1205 p("text", 32..37),
1206 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1207 ],
1208 );
1209 }
1210
1211 #[gpui::test]
1212 async fn test_list_with_leading_text() {
1213 let parsed = parse(
1214 "\
1215* `code`
1216* **bold**
1217* [link](https://example.com)
1218",
1219 )
1220 .await;
1221
1222 assert_eq!(
1223 parsed.children,
1224 vec![
1225 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1226 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1227 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1228 ],
1229 );
1230 }
1231
1232 #[gpui::test]
1233 async fn test_simple_block_quote() {
1234 let parsed = parse("> Simple block quote with **styled text**").await;
1235
1236 assert_eq!(
1237 parsed.children,
1238 vec![block_quote(
1239 vec![p("Simple block quote with styled text", 2..41)],
1240 0..41
1241 )]
1242 );
1243 }
1244
1245 #[gpui::test]
1246 async fn test_simple_block_quote_with_multiple_lines() {
1247 let parsed = parse(
1248 "\
1249> # Heading
1250> More
1251> text
1252>
1253> More text
1254",
1255 )
1256 .await;
1257
1258 assert_eq!(
1259 parsed.children,
1260 vec![block_quote(
1261 vec![
1262 h1(text("Heading", 4..11), 2..12),
1263 p("More text", 14..26),
1264 p("More text", 30..40)
1265 ],
1266 0..40
1267 )]
1268 );
1269 }
1270
1271 #[gpui::test]
1272 async fn test_nested_block_quote() {
1273 let parsed = parse(
1274 "\
1275> A
1276>
1277> > # B
1278>
1279> C
1280
1281More text
1282",
1283 )
1284 .await;
1285
1286 assert_eq!(
1287 parsed.children,
1288 vec![
1289 block_quote(
1290 vec![
1291 p("A", 2..4),
1292 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1293 p("C", 18..20)
1294 ],
1295 0..20
1296 ),
1297 p("More text", 21..31)
1298 ]
1299 );
1300 }
1301
1302 #[gpui::test]
1303 async fn test_code_block() {
1304 let parsed = parse(
1305 "\
1306```
1307fn main() {
1308 return 0;
1309}
1310```
1311",
1312 )
1313 .await;
1314
1315 assert_eq!(
1316 parsed.children,
1317 vec![code_block(
1318 None,
1319 "fn main() {\n return 0;\n}",
1320 0..35,
1321 None
1322 )]
1323 );
1324 }
1325
1326 #[gpui::test]
1327 async fn test_code_block_with_language(executor: BackgroundExecutor) {
1328 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1329 language_registry.add(rust_lang());
1330
1331 let parsed = parse_markdown(
1332 "\
1333```rust
1334fn main() {
1335 return 0;
1336}
1337```
1338",
1339 None,
1340 Some(language_registry),
1341 )
1342 .await;
1343
1344 assert_eq!(
1345 parsed.children,
1346 vec![code_block(
1347 Some("rust".to_string()),
1348 "fn main() {\n return 0;\n}",
1349 0..39,
1350 Some(vec![])
1351 )]
1352 );
1353 }
1354
1355 fn rust_lang() -> Arc<Language> {
1356 Arc::new(Language::new(
1357 LanguageConfig {
1358 name: "Rust".into(),
1359 matcher: LanguageMatcher {
1360 path_suffixes: vec!["rs".into()],
1361 ..Default::default()
1362 },
1363 collapsed_placeholder: " /* ... */ ".to_string(),
1364 ..Default::default()
1365 },
1366 Some(tree_sitter_rust::LANGUAGE.into()),
1367 ))
1368 }
1369
1370 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1371 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1372 source_range,
1373 level: HeadingLevel::H1,
1374 contents,
1375 })
1376 }
1377
1378 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1379 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1380 source_range,
1381 level: HeadingLevel::H2,
1382 contents,
1383 })
1384 }
1385
1386 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1387 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1388 source_range,
1389 level: HeadingLevel::H3,
1390 contents,
1391 })
1392 }
1393
1394 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1395 ParsedMarkdownElement::Paragraph(text(contents, source_range))
1396 }
1397
1398 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1399 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1400 highlights: Vec::new(),
1401 region_ranges: Vec::new(),
1402 regions: Vec::new(),
1403 source_range,
1404 contents: contents.to_string(),
1405 })]
1406 }
1407
1408 fn block_quote(
1409 children: Vec<ParsedMarkdownElement>,
1410 source_range: Range<usize>,
1411 ) -> ParsedMarkdownElement {
1412 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1413 source_range,
1414 children,
1415 })
1416 }
1417
1418 fn code_block(
1419 language: Option<String>,
1420 code: &str,
1421 source_range: Range<usize>,
1422 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1423 ) -> ParsedMarkdownElement {
1424 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1425 source_range,
1426 language,
1427 contents: code.to_string().into(),
1428 highlights,
1429 })
1430 }
1431
1432 fn list_item(
1433 source_range: Range<usize>,
1434 depth: u16,
1435 item_type: ParsedMarkdownListItemType,
1436 content: Vec<ParsedMarkdownElement>,
1437 ) -> ParsedMarkdownElement {
1438 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1439 source_range,
1440 item_type,
1441 depth,
1442 content,
1443 })
1444 }
1445
1446 fn table(
1447 source_range: Range<usize>,
1448 header: ParsedMarkdownTableRow,
1449 body: Vec<ParsedMarkdownTableRow>,
1450 ) -> ParsedMarkdownTable {
1451 ParsedMarkdownTable {
1452 column_alignments: Vec::new(),
1453 source_range,
1454 header,
1455 body,
1456 }
1457 }
1458
1459 fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
1460 ParsedMarkdownTableRow { children }
1461 }
1462
1463 impl PartialEq for ParsedMarkdownTable {
1464 fn eq(&self, other: &Self) -> bool {
1465 self.source_range == other.source_range
1466 && self.header == other.header
1467 && self.body == other.body
1468 }
1469 }
1470
1471 impl PartialEq for ParsedMarkdownText {
1472 fn eq(&self, other: &Self) -> bool {
1473 self.source_range == other.source_range && self.contents == other.contents
1474 }
1475 }
1476}