1use crate::markdown_elements::*;
2use async_recursion::async_recursion;
3use collections::FxHashMap;
4use gpui::{DefiniteLength, FontWeight, px, relative};
5use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
6use language::LanguageRegistry;
7use markup5ever_rcdom::RcDom;
8use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
9use std::{cell::RefCell, collections::HashMap, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec};
10
11pub async fn parse_markdown(
12 markdown_input: &str,
13 file_location_directory: Option<PathBuf>,
14 language_registry: Option<Arc<LanguageRegistry>>,
15) -> ParsedMarkdown {
16 let mut options = Options::all();
17 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
18
19 let parser = Parser::new_ext(markdown_input, options);
20 let parser = MarkdownParser::new(
21 parser.into_offset_iter().collect(),
22 file_location_directory,
23 language_registry,
24 );
25 let renderer = parser.parse_document().await;
26 ParsedMarkdown {
27 children: renderer.parsed,
28 }
29}
30
31struct MarkdownParser<'a> {
32 tokens: Vec<(Event<'a>, Range<usize>)>,
33 /// The current index in the tokens array
34 cursor: usize,
35 /// The blocks that we have successfully parsed so far
36 parsed: Vec<ParsedMarkdownElement>,
37 file_location_directory: Option<PathBuf>,
38 language_registry: Option<Arc<LanguageRegistry>>,
39}
40
41struct MarkdownListItem {
42 content: Vec<ParsedMarkdownElement>,
43 item_type: ParsedMarkdownListItemType,
44}
45
46impl Default for MarkdownListItem {
47 fn default() -> Self {
48 Self {
49 content: Vec::new(),
50 item_type: ParsedMarkdownListItemType::Unordered,
51 }
52 }
53}
54
55impl<'a> MarkdownParser<'a> {
56 fn new(
57 tokens: Vec<(Event<'a>, Range<usize>)>,
58 file_location_directory: Option<PathBuf>,
59 language_registry: Option<Arc<LanguageRegistry>>,
60 ) -> Self {
61 Self {
62 tokens,
63 file_location_directory,
64 language_registry,
65 cursor: 0,
66 parsed: vec![],
67 }
68 }
69
70 fn eof(&self) -> bool {
71 if self.tokens.is_empty() {
72 return true;
73 }
74 self.cursor >= self.tokens.len() - 1
75 }
76
77 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
78 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
79 return self.tokens.last();
80 }
81 self.tokens.get(self.cursor + steps)
82 }
83
84 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
85 if self.cursor == 0 || self.cursor > self.tokens.len() {
86 return None;
87 }
88 self.tokens.get(self.cursor - 1)
89 }
90
91 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
92 self.peek(0)
93 }
94
95 fn current_event(&self) -> Option<&Event<'_>> {
96 self.current().map(|(event, _)| event)
97 }
98
99 fn is_text_like(event: &Event) -> bool {
100 match event {
101 Event::Text(_)
102 // Represent an inline code block
103 | Event::Code(_)
104 | Event::Html(_)
105 | Event::InlineHtml(_)
106 | Event::FootnoteReference(_)
107 | Event::Start(Tag::Link { .. })
108 | Event::Start(Tag::Emphasis)
109 | Event::Start(Tag::Strong)
110 | Event::Start(Tag::Strikethrough)
111 | Event::Start(Tag::Image { .. }) => {
112 true
113 }
114 _ => false,
115 }
116 }
117
118 async fn parse_document(mut self) -> Self {
119 while !self.eof() {
120 if let Some(block) = self.parse_block().await {
121 self.parsed.extend(block);
122 } else {
123 self.cursor += 1;
124 }
125 }
126 self
127 }
128
129 #[async_recursion]
130 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
131 let (current, source_range) = self.current().unwrap();
132 let source_range = source_range.clone();
133 match current {
134 Event::Start(tag) => match tag {
135 Tag::Paragraph => {
136 self.cursor += 1;
137 let text = self.parse_text(false, Some(source_range));
138 Some(vec![ParsedMarkdownElement::Paragraph(text)])
139 }
140 Tag::Heading { level, .. } => {
141 let level = *level;
142 self.cursor += 1;
143 let heading = self.parse_heading(level);
144 Some(vec![ParsedMarkdownElement::Heading(heading)])
145 }
146 Tag::Table(alignment) => {
147 let alignment = alignment.clone();
148 self.cursor += 1;
149 let table = self.parse_table(alignment);
150 Some(vec![ParsedMarkdownElement::Table(table)])
151 }
152 Tag::List(order) => {
153 let order = *order;
154 self.cursor += 1;
155 let list = self.parse_list(order).await;
156 Some(list)
157 }
158 Tag::BlockQuote(_kind) => {
159 self.cursor += 1;
160 let block_quote = self.parse_block_quote().await;
161 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
162 }
163 Tag::CodeBlock(kind) => {
164 let language = match kind {
165 pulldown_cmark::CodeBlockKind::Indented => None,
166 pulldown_cmark::CodeBlockKind::Fenced(language) => {
167 if language.is_empty() {
168 None
169 } else {
170 Some(language.to_string())
171 }
172 }
173 };
174
175 self.cursor += 1;
176
177 let code_block = self.parse_code_block(language).await?;
178 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
179 }
180 Tag::HtmlBlock => {
181 self.cursor += 1;
182
183 Some(self.parse_html_block().await)
184 }
185 _ => None,
186 },
187 Event::Rule => {
188 self.cursor += 1;
189 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
190 }
191 _ => None,
192 }
193 }
194
195 fn parse_text(
196 &mut self,
197 should_complete_on_soft_break: bool,
198 source_range: Option<Range<usize>>,
199 ) -> MarkdownParagraph {
200 let source_range = source_range.unwrap_or_else(|| {
201 self.current()
202 .map(|(_, range)| range.clone())
203 .unwrap_or_default()
204 });
205
206 let mut markdown_text_like = Vec::new();
207 let mut text = String::new();
208 let mut bold_depth = 0;
209 let mut italic_depth = 0;
210 let mut strikethrough_depth = 0;
211 let mut link: Option<Link> = None;
212 let mut image: Option<Image> = None;
213 let mut region_ranges: Vec<Range<usize>> = vec![];
214 let mut regions: Vec<ParsedRegion> = vec![];
215 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
216 let mut link_urls: Vec<String> = vec![];
217 let mut link_ranges: Vec<Range<usize>> = vec![];
218
219 loop {
220 if self.eof() {
221 break;
222 }
223
224 let (current, _) = self.current().unwrap();
225 let prev_len = text.len();
226 match current {
227 Event::SoftBreak => {
228 if should_complete_on_soft_break {
229 break;
230 }
231 text.push(' ');
232 }
233
234 Event::HardBreak => {
235 text.push('\n');
236 }
237
238 // We want to ignore any inline HTML tags in the text but keep
239 // the text between them
240 Event::InlineHtml(_) => {}
241
242 Event::Text(t) => {
243 text.push_str(t.as_ref());
244 let mut style = MarkdownHighlightStyle::default();
245
246 if bold_depth > 0 {
247 style.weight = FontWeight::BOLD;
248 }
249
250 if italic_depth > 0 {
251 style.italic = true;
252 }
253
254 if strikethrough_depth > 0 {
255 style.strikethrough = true;
256 }
257
258 let last_run_len = if let Some(link) = link.clone() {
259 region_ranges.push(prev_len..text.len());
260 regions.push(ParsedRegion {
261 code: false,
262 link: Some(link),
263 });
264 style.underline = true;
265 prev_len
266 } else {
267 // Manually scan for links
268 let mut finder = linkify::LinkFinder::new();
269 finder.kinds(&[linkify::LinkKind::Url]);
270 let mut last_link_len = prev_len;
271 for link in finder.links(t) {
272 let start = link.start();
273 let end = link.end();
274 let range = (prev_len + start)..(prev_len + end);
275 link_ranges.push(range.clone());
276 link_urls.push(link.as_str().to_string());
277
278 // If there is a style before we match a link, we have to add this to the highlighted ranges
279 if style != MarkdownHighlightStyle::default()
280 && last_link_len < link.start()
281 {
282 highlights.push((
283 last_link_len..link.start(),
284 MarkdownHighlight::Style(style.clone()),
285 ));
286 }
287
288 highlights.push((
289 range.clone(),
290 MarkdownHighlight::Style(MarkdownHighlightStyle {
291 underline: true,
292 ..style
293 }),
294 ));
295 region_ranges.push(range.clone());
296 regions.push(ParsedRegion {
297 code: false,
298 link: Some(Link::Web {
299 url: link.as_str().to_string(),
300 }),
301 });
302 last_link_len = end;
303 }
304 last_link_len
305 };
306
307 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
308 let mut new_highlight = true;
309 if let Some((last_range, last_style)) = highlights.last_mut()
310 && last_range.end == last_run_len
311 && last_style == &MarkdownHighlight::Style(style.clone())
312 {
313 last_range.end = text.len();
314 new_highlight = false;
315 }
316 if new_highlight {
317 highlights.push((
318 last_run_len..text.len(),
319 MarkdownHighlight::Style(style.clone()),
320 ));
321 }
322 }
323 }
324 Event::Code(t) => {
325 text.push_str(t.as_ref());
326 region_ranges.push(prev_len..text.len());
327
328 if link.is_some() {
329 highlights.push((
330 prev_len..text.len(),
331 MarkdownHighlight::Style(MarkdownHighlightStyle {
332 underline: true,
333 ..Default::default()
334 }),
335 ));
336 }
337 regions.push(ParsedRegion {
338 code: true,
339 link: link.clone(),
340 });
341 }
342 Event::Start(tag) => match tag {
343 Tag::Emphasis => italic_depth += 1,
344 Tag::Strong => bold_depth += 1,
345 Tag::Strikethrough => strikethrough_depth += 1,
346 Tag::Link { dest_url, .. } => {
347 link = Link::identify(
348 self.file_location_directory.clone(),
349 dest_url.to_string(),
350 );
351 }
352 Tag::Image { dest_url, .. } => {
353 if !text.is_empty() {
354 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
355 source_range: source_range.clone(),
356 contents: text.clone(),
357 highlights: highlights.clone(),
358 region_ranges: region_ranges.clone(),
359 regions: regions.clone(),
360 });
361 text = String::new();
362 highlights = vec![];
363 region_ranges = vec![];
364 regions = vec![];
365 markdown_text_like.push(parsed_regions);
366 }
367 image = Image::identify(
368 dest_url.to_string(),
369 source_range.clone(),
370 self.file_location_directory.clone(),
371 );
372 }
373 _ => {
374 break;
375 }
376 },
377
378 Event::End(tag) => match tag {
379 TagEnd::Emphasis => italic_depth -= 1,
380 TagEnd::Strong => bold_depth -= 1,
381 TagEnd::Strikethrough => strikethrough_depth -= 1,
382 TagEnd::Link => {
383 link = None;
384 }
385 TagEnd::Image => {
386 if let Some(mut image) = image.take() {
387 if !text.is_empty() {
388 image.set_alt_text(std::mem::take(&mut text).into());
389 }
390 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
391 }
392 }
393 TagEnd::Paragraph => {
394 self.cursor += 1;
395 break;
396 }
397 _ => {
398 break;
399 }
400 },
401 _ => {
402 break;
403 }
404 }
405
406 self.cursor += 1;
407 }
408 if !text.is_empty() {
409 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
410 source_range,
411 contents: text,
412 highlights,
413 regions,
414 region_ranges,
415 }));
416 }
417 markdown_text_like
418 }
419
420 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
421 let (_event, source_range) = self.previous().unwrap();
422 let source_range = source_range.clone();
423 let text = self.parse_text(true, None);
424
425 // Advance past the heading end tag
426 self.cursor += 1;
427
428 ParsedMarkdownHeading {
429 source_range,
430 level: match level {
431 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
432 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
433 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
434 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
435 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
436 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
437 },
438 contents: text,
439 }
440 }
441
442 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
443 let (_event, source_range) = self.previous().unwrap();
444 let source_range = source_range.clone();
445 let mut header = ParsedMarkdownTableRow::new();
446 let mut body = vec![];
447 let mut current_row = vec![];
448 let mut in_header = true;
449 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
450
451 loop {
452 if self.eof() {
453 break;
454 }
455
456 let (current, source_range) = self.current().unwrap();
457 let source_range = source_range.clone();
458 match current {
459 Event::Start(Tag::TableHead)
460 | Event::Start(Tag::TableRow)
461 | Event::End(TagEnd::TableCell) => {
462 self.cursor += 1;
463 }
464 Event::Start(Tag::TableCell) => {
465 self.cursor += 1;
466 let cell_contents = self.parse_text(false, Some(source_range));
467 current_row.push(cell_contents);
468 }
469 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
470 self.cursor += 1;
471 let new_row = std::mem::take(&mut current_row);
472 if in_header {
473 header.children = new_row;
474 in_header = false;
475 } else {
476 let row = ParsedMarkdownTableRow::with_children(new_row);
477 body.push(row);
478 }
479 }
480 Event::End(TagEnd::Table) => {
481 self.cursor += 1;
482 break;
483 }
484 _ => {
485 break;
486 }
487 }
488 }
489
490 ParsedMarkdownTable {
491 source_range,
492 header,
493 body,
494 column_alignments,
495 }
496 }
497
498 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
499 match alignment {
500 Alignment::None => ParsedMarkdownTableAlignment::None,
501 Alignment::Left => ParsedMarkdownTableAlignment::Left,
502 Alignment::Center => ParsedMarkdownTableAlignment::Center,
503 Alignment::Right => ParsedMarkdownTableAlignment::Right,
504 }
505 }
506
507 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
508 let (_, list_source_range) = self.previous().unwrap();
509
510 let mut items = Vec::new();
511 let mut items_stack = vec![MarkdownListItem::default()];
512 let mut depth = 1;
513 let mut order = order;
514 let mut order_stack = Vec::new();
515
516 let mut insertion_indices = FxHashMap::default();
517 let mut source_ranges = FxHashMap::default();
518 let mut start_item_range = list_source_range.clone();
519
520 while !self.eof() {
521 let (current, source_range) = self.current().unwrap();
522 match current {
523 Event::Start(Tag::List(new_order)) => {
524 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
525 insertion_indices.insert(depth, items.len());
526 }
527
528 // We will use the start of the nested list as the end for the current item's range,
529 // because we don't care about the hierarchy of list items
530 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
531 e.insert(start_item_range.start..source_range.start);
532 }
533
534 order_stack.push(order);
535 order = *new_order;
536 self.cursor += 1;
537 depth += 1;
538 }
539 Event::End(TagEnd::List(_)) => {
540 order = order_stack.pop().flatten();
541 self.cursor += 1;
542 depth -= 1;
543
544 if depth == 0 {
545 break;
546 }
547 }
548 Event::Start(Tag::Item) => {
549 start_item_range = source_range.clone();
550
551 self.cursor += 1;
552 items_stack.push(MarkdownListItem::default());
553
554 let mut task_list = None;
555 // Check for task list marker (`- [ ]` or `- [x]`)
556 if let Some(event) = self.current_event() {
557 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
558 if event == &Event::Start(Tag::Paragraph) {
559 self.cursor += 1;
560 }
561
562 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
563 task_list = Some((*checked, range.clone()));
564 self.cursor += 1;
565 }
566 }
567
568 if let Some((event, range)) = self.current() {
569 // This is a plain list item.
570 // For example `- some text` or `1. [Docs](./docs.md)`
571 if MarkdownParser::is_text_like(event) {
572 let text = self.parse_text(false, Some(range.clone()));
573 let block = ParsedMarkdownElement::Paragraph(text);
574 if let Some(content) = items_stack.last_mut() {
575 let item_type = if let Some((checked, range)) = task_list {
576 ParsedMarkdownListItemType::Task(checked, range)
577 } else if let Some(order) = order {
578 ParsedMarkdownListItemType::Ordered(order)
579 } else {
580 ParsedMarkdownListItemType::Unordered
581 };
582 content.item_type = item_type;
583 content.content.push(block);
584 }
585 } else {
586 let block = self.parse_block().await;
587 if let Some(block) = block
588 && let Some(list_item) = items_stack.last_mut()
589 {
590 list_item.content.extend(block);
591 }
592 }
593 }
594
595 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
596 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
597 self.cursor += 1;
598 }
599 }
600 Event::End(TagEnd::Item) => {
601 self.cursor += 1;
602
603 if let Some(current) = order {
604 order = Some(current + 1);
605 }
606
607 if let Some(list_item) = items_stack.pop() {
608 let source_range = source_ranges
609 .remove(&depth)
610 .unwrap_or(start_item_range.clone());
611
612 // We need to remove the last character of the source range, because it includes the newline character
613 let source_range = source_range.start..source_range.end - 1;
614 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
615 source_range,
616 content: list_item.content,
617 depth,
618 item_type: list_item.item_type,
619 });
620
621 if let Some(index) = insertion_indices.get(&depth) {
622 items.insert(*index, item);
623 insertion_indices.remove(&depth);
624 } else {
625 items.push(item);
626 }
627 }
628 }
629 _ => {
630 if depth == 0 {
631 break;
632 }
633 // This can only happen if a list item starts with more then one paragraph,
634 // or the list item contains blocks that should be rendered after the nested list items
635 let block = self.parse_block().await;
636 if let Some(block) = block {
637 if let Some(list_item) = items_stack.last_mut() {
638 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
639 if !insertion_indices.contains_key(&depth) {
640 list_item.content.extend(block);
641 continue;
642 }
643 }
644
645 // Otherwise we need to insert the block after all the nested items
646 // that have been parsed so far
647 items.extend(block);
648 } else {
649 self.cursor += 1;
650 }
651 }
652 }
653 }
654
655 items
656 }
657
658 #[async_recursion]
659 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
660 let (_event, source_range) = self.previous().unwrap();
661 let source_range = source_range.clone();
662 let mut nested_depth = 1;
663
664 let mut children: Vec<ParsedMarkdownElement> = vec![];
665
666 while !self.eof() {
667 let block = self.parse_block().await;
668
669 if let Some(block) = block {
670 children.extend(block);
671 } else {
672 break;
673 }
674
675 if self.eof() {
676 break;
677 }
678
679 let (current, _source_range) = self.current().unwrap();
680 match current {
681 // This is a nested block quote.
682 // Record that we're in a nested block quote and continue parsing.
683 // We don't need to advance the cursor since the next
684 // call to `parse_block` will handle it.
685 Event::Start(Tag::BlockQuote(_kind)) => {
686 nested_depth += 1;
687 }
688 Event::End(TagEnd::BlockQuote(_kind)) => {
689 nested_depth -= 1;
690 if nested_depth == 0 {
691 self.cursor += 1;
692 break;
693 }
694 }
695 _ => {}
696 };
697 }
698
699 ParsedMarkdownBlockQuote {
700 source_range,
701 children,
702 }
703 }
704
705 async fn parse_code_block(
706 &mut self,
707 language: Option<String>,
708 ) -> Option<ParsedMarkdownCodeBlock> {
709 let Some((_event, source_range)) = self.previous() else {
710 return None;
711 };
712
713 let source_range = source_range.clone();
714 let mut code = String::new();
715
716 while !self.eof() {
717 let Some((current, _source_range)) = self.current() else {
718 break;
719 };
720
721 match current {
722 Event::Text(text) => {
723 code.push_str(text);
724 self.cursor += 1;
725 }
726 Event::End(TagEnd::CodeBlock) => {
727 self.cursor += 1;
728 break;
729 }
730 _ => {
731 break;
732 }
733 }
734 }
735
736 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
737
738 let highlights = if let Some(language) = &language {
739 if let Some(registry) = &self.language_registry {
740 let rope: language::Rope = code.as_str().into();
741 registry
742 .language_for_name_or_extension(language)
743 .await
744 .map(|l| l.highlight_text(&rope, 0..code.len()))
745 .ok()
746 } else {
747 None
748 }
749 } else {
750 None
751 };
752
753 Some(ParsedMarkdownCodeBlock {
754 source_range,
755 contents: code.into(),
756 language,
757 highlights,
758 })
759 }
760
761 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
762 let mut elements = Vec::new();
763 let Some((_event, _source_range)) = self.previous() else {
764 return elements;
765 };
766
767 while !self.eof() {
768 let Some((current, source_range)) = self.current() else {
769 break;
770 };
771 let source_range = source_range.clone();
772 match current {
773 Event::Html(html) => {
774 let mut cursor = std::io::Cursor::new(html.as_bytes());
775 let Some(dom) = parse_document(RcDom::default(), ParseOpts::default())
776 .from_utf8()
777 .read_from(&mut cursor)
778 .ok()
779 else {
780 self.cursor += 1;
781 continue;
782 };
783
784 self.cursor += 1;
785
786 self.parse_html_node(source_range, &dom.document, &mut elements);
787 }
788 Event::End(TagEnd::CodeBlock) => {
789 self.cursor += 1;
790 break;
791 }
792 _ => {
793 break;
794 }
795 }
796 }
797
798 elements
799 }
800
801 fn parse_html_node(
802 &self,
803 source_range: Range<usize>,
804 node: &Rc<markup5ever_rcdom::Node>,
805 elements: &mut Vec<ParsedMarkdownElement>,
806 ) {
807 match &node.data {
808 markup5ever_rcdom::NodeData::Document => {
809 self.consume_children(source_range, node, elements);
810 }
811 markup5ever_rcdom::NodeData::Doctype { .. } => {}
812 markup5ever_rcdom::NodeData::Text { contents } => {
813 elements.push(ParsedMarkdownElement::Paragraph(vec![
814 MarkdownParagraphChunk::Text(ParsedMarkdownText {
815 source_range,
816 contents: contents.borrow().to_string(),
817 highlights: Vec::default(),
818 region_ranges: Vec::default(),
819 regions: Vec::default(),
820 }),
821 ]));
822 }
823 markup5ever_rcdom::NodeData::Comment { .. } => {}
824 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
825 if local_name!("img") == name.local {
826 if let Some(image) = self.extract_image(source_range, attrs) {
827 elements.push(ParsedMarkdownElement::Image(image));
828 }
829 } else {
830 self.consume_children(source_range, node, elements);
831 }
832 }
833 markup5ever_rcdom::NodeData::ProcessingInstruction { .. } => {}
834 }
835 }
836
837 fn consume_children(
838 &self,
839 source_range: Range<usize>,
840 node: &Rc<markup5ever_rcdom::Node>,
841 elements: &mut Vec<ParsedMarkdownElement>,
842 ) {
843 for node in node.children.borrow().iter() {
844 self.parse_html_node(source_range.clone(), node, elements);
845 }
846 }
847
848 fn attr_value(
849 attrs: &RefCell<Vec<html5ever::Attribute>>,
850 name: html5ever::LocalName,
851 ) -> Option<String> {
852 attrs.borrow().iter().find_map(|attr| {
853 if attr.name.local == name {
854 Some(attr.value.to_string())
855 } else {
856 None
857 }
858 })
859 }
860
861 fn extract_styles_from_attributes(
862 attrs: &RefCell<Vec<html5ever::Attribute>>,
863 ) -> HashMap<String, String> {
864 let mut styles = HashMap::new();
865
866 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
867 for decl in style.split(';') {
868 let mut parts = decl.splitn(2, ':');
869 if let Some((key, value)) = parts.next().zip(parts.next()) {
870 styles.insert(
871 key.trim().to_lowercase().to_string(),
872 value.trim().to_string(),
873 );
874 }
875 }
876 }
877
878 styles
879 }
880
881 fn extract_image(
882 &self,
883 source_range: Range<usize>,
884 attrs: &RefCell<Vec<html5ever::Attribute>>,
885 ) -> Option<Image> {
886 let src = Self::attr_value(attrs, local_name!("src"))?;
887
888 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
889
890 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
891 image.set_alt_text(alt.into());
892 }
893
894 let styles = Self::extract_styles_from_attributes(attrs);
895
896 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
897 .or_else(|| styles.get("width").cloned())
898 .and_then(|width| Self::parse_length(&width))
899 {
900 image.set_width(width);
901 }
902
903 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
904 .or_else(|| styles.get("height").cloned())
905 .and_then(|height| Self::parse_length(&height))
906 {
907 image.set_height(height);
908 }
909
910 Some(image)
911 }
912
913 /// Parses the width/height attribute value of an html element (e.g. img element)
914 fn parse_length(value: &str) -> Option<DefiniteLength> {
915 if value.ends_with("%") {
916 value
917 .trim_end_matches("%")
918 .parse::<f32>()
919 .ok()
920 .map(|value| relative(value / 100.))
921 } else {
922 value
923 .trim_end_matches("px")
924 .parse()
925 .ok()
926 .map(|value| px(value).into())
927 }
928 }
929}
930
931#[cfg(test)]
932mod tests {
933 use super::*;
934 use ParsedMarkdownListItemType::*;
935 use core::panic;
936 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
937 use language::{
938 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
939 };
940 use pretty_assertions::assert_eq;
941
942 async fn parse(input: &str) -> ParsedMarkdown {
943 parse_markdown(input, None, None).await
944 }
945
946 #[gpui::test]
947 async fn test_headings() {
948 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
949
950 assert_eq!(
951 parsed.children,
952 vec![
953 h1(text("Heading one", 2..13), 0..14),
954 h2(text("Heading two", 17..28), 14..29),
955 h3(text("Heading three", 33..46), 29..46),
956 ]
957 );
958 }
959
960 #[gpui::test]
961 async fn test_newlines_dont_new_paragraphs() {
962 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
963
964 assert_eq!(
965 parsed.children,
966 vec![p("Some text that is bolded and italicized", 0..46)]
967 );
968 }
969
970 #[gpui::test]
971 async fn test_heading_with_paragraph() {
972 let parsed = parse("# Zed\nThe editor").await;
973
974 assert_eq!(
975 parsed.children,
976 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
977 );
978 }
979
980 #[gpui::test]
981 async fn test_double_newlines_do_new_paragraphs() {
982 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
983
984 assert_eq!(
985 parsed.children,
986 vec![
987 p("Some text that is bolded", 0..29),
988 p("and italicized", 31..47),
989 ]
990 );
991 }
992
993 #[gpui::test]
994 async fn test_bold_italic_text() {
995 let parsed = parse("Some text **that is bolded** and *italicized*").await;
996
997 assert_eq!(
998 parsed.children,
999 vec![p("Some text that is bolded and italicized", 0..45)]
1000 );
1001 }
1002
1003 #[gpui::test]
1004 async fn test_nested_bold_strikethrough_text() {
1005 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1006
1007 assert_eq!(parsed.children.len(), 1);
1008 assert_eq!(
1009 parsed.children[0],
1010 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1011 ParsedMarkdownText {
1012 source_range: 0..35,
1013 contents: "Some bostrikethroughld text".to_string(),
1014 highlights: Vec::new(),
1015 region_ranges: Vec::new(),
1016 regions: Vec::new(),
1017 }
1018 )])
1019 );
1020
1021 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1022 text
1023 } else {
1024 panic!("Expected a paragraph");
1025 };
1026
1027 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1028 text
1029 } else {
1030 panic!("Expected a text");
1031 };
1032
1033 assert_eq!(
1034 paragraph.highlights,
1035 vec![
1036 (
1037 5..7,
1038 MarkdownHighlight::Style(MarkdownHighlightStyle {
1039 weight: FontWeight::BOLD,
1040 ..Default::default()
1041 }),
1042 ),
1043 (
1044 7..20,
1045 MarkdownHighlight::Style(MarkdownHighlightStyle {
1046 weight: FontWeight::BOLD,
1047 strikethrough: true,
1048 ..Default::default()
1049 }),
1050 ),
1051 (
1052 20..22,
1053 MarkdownHighlight::Style(MarkdownHighlightStyle {
1054 weight: FontWeight::BOLD,
1055 ..Default::default()
1056 }),
1057 ),
1058 ]
1059 );
1060 }
1061
1062 #[gpui::test]
1063 async fn test_text_with_inline_html() {
1064 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1065
1066 assert_eq!(
1067 parsed.children,
1068 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1069 );
1070 }
1071
1072 #[gpui::test]
1073 async fn test_raw_links_detection() {
1074 let parsed = parse("Checkout this https://zed.dev link").await;
1075
1076 assert_eq!(
1077 parsed.children,
1078 vec![p("Checkout this https://zed.dev link", 0..34)]
1079 );
1080 }
1081
1082 #[gpui::test]
1083 async fn test_empty_image() {
1084 let parsed = parse("![]()").await;
1085
1086 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1087 text
1088 } else {
1089 panic!("Expected a paragraph");
1090 };
1091 assert_eq!(paragraph.len(), 0);
1092 }
1093
1094 #[gpui::test]
1095 async fn test_image_links_detection() {
1096 let parsed = parse("").await;
1097
1098 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1099 text
1100 } else {
1101 panic!("Expected a paragraph");
1102 };
1103 assert_eq!(
1104 paragraph[0],
1105 MarkdownParagraphChunk::Image(Image {
1106 source_range: 0..111,
1107 link: Link::Web {
1108 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1109 },
1110 alt_text: Some("test".into()),
1111 height: None,
1112 width: None,
1113 },)
1114 );
1115 }
1116
1117 #[gpui::test]
1118 async fn test_image_without_alt_text() {
1119 let parsed = parse("").await;
1120
1121 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1122 text
1123 } else {
1124 panic!("Expected a paragraph");
1125 };
1126 assert_eq!(
1127 paragraph[0],
1128 MarkdownParagraphChunk::Image(Image {
1129 source_range: 0..31,
1130 link: Link::Web {
1131 url: "http://example.com/foo.png".to_string(),
1132 },
1133 alt_text: None,
1134 height: None,
1135 width: None,
1136 },)
1137 );
1138 }
1139
1140 #[gpui::test]
1141 async fn test_image_with_alt_text_containing_formatting() {
1142 let parsed = parse("").await;
1143
1144 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1145 panic!("Expected a paragraph");
1146 };
1147 assert_eq!(
1148 chunks,
1149 &[MarkdownParagraphChunk::Image(Image {
1150 source_range: 0..44,
1151 link: Link::Web {
1152 url: "http://example.com/foo.png".to_string(),
1153 },
1154 alt_text: Some("foo bar baz".into()),
1155 height: None,
1156 width: None,
1157 }),],
1158 );
1159 }
1160
1161 #[gpui::test]
1162 async fn test_images_with_text_in_between() {
1163 let parsed = parse(
1164 "\nLorem Ipsum\n",
1165 )
1166 .await;
1167
1168 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1169 text
1170 } else {
1171 panic!("Expected a paragraph");
1172 };
1173 assert_eq!(
1174 chunks,
1175 &vec![
1176 MarkdownParagraphChunk::Image(Image {
1177 source_range: 0..81,
1178 link: Link::Web {
1179 url: "http://example.com/foo.png".to_string(),
1180 },
1181 alt_text: Some("foo".into()),
1182 height: None,
1183 width: None,
1184 }),
1185 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1186 source_range: 0..81,
1187 contents: " Lorem Ipsum ".to_string(),
1188 highlights: Vec::new(),
1189 region_ranges: Vec::new(),
1190 regions: Vec::new(),
1191 }),
1192 MarkdownParagraphChunk::Image(Image {
1193 source_range: 0..81,
1194 link: Link::Web {
1195 url: "http://example.com/bar.png".to_string(),
1196 },
1197 alt_text: Some("bar".into()),
1198 height: None,
1199 width: None,
1200 })
1201 ]
1202 );
1203 }
1204
1205 #[test]
1206 fn test_parse_length() {
1207 // Test percentage values
1208 assert_eq!(
1209 MarkdownParser::parse_length("50%"),
1210 Some(DefiniteLength::Fraction(0.5))
1211 );
1212 assert_eq!(
1213 MarkdownParser::parse_length("100%"),
1214 Some(DefiniteLength::Fraction(1.0))
1215 );
1216 assert_eq!(
1217 MarkdownParser::parse_length("25%"),
1218 Some(DefiniteLength::Fraction(0.25))
1219 );
1220 assert_eq!(
1221 MarkdownParser::parse_length("0%"),
1222 Some(DefiniteLength::Fraction(0.0))
1223 );
1224
1225 // Test pixel values
1226 assert_eq!(
1227 MarkdownParser::parse_length("100px"),
1228 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1229 );
1230 assert_eq!(
1231 MarkdownParser::parse_length("50px"),
1232 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1233 );
1234 assert_eq!(
1235 MarkdownParser::parse_length("0px"),
1236 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1237 );
1238
1239 // Test values without units (should be treated as pixels)
1240 assert_eq!(
1241 MarkdownParser::parse_length("100"),
1242 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1243 );
1244 assert_eq!(
1245 MarkdownParser::parse_length("42"),
1246 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1247 );
1248
1249 // Test invalid values
1250 assert_eq!(MarkdownParser::parse_length("invalid"), None);
1251 assert_eq!(MarkdownParser::parse_length("px"), None);
1252 assert_eq!(MarkdownParser::parse_length("%"), None);
1253 assert_eq!(MarkdownParser::parse_length(""), None);
1254 assert_eq!(MarkdownParser::parse_length("abc%"), None);
1255 assert_eq!(MarkdownParser::parse_length("abcpx"), None);
1256
1257 // Test decimal values
1258 assert_eq!(
1259 MarkdownParser::parse_length("50.5%"),
1260 Some(DefiniteLength::Fraction(0.505))
1261 );
1262 assert_eq!(
1263 MarkdownParser::parse_length("100.25px"),
1264 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1265 );
1266 assert_eq!(
1267 MarkdownParser::parse_length("42.0"),
1268 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1269 );
1270 }
1271
1272 #[gpui::test]
1273 async fn test_html_image_tag() {
1274 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
1275
1276 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1277 panic!("Expected a image element");
1278 };
1279 assert_eq!(
1280 image.clone(),
1281 Image {
1282 source_range: 0..40,
1283 link: Link::Web {
1284 url: "http://example.com/foo.png".to_string(),
1285 },
1286 alt_text: None,
1287 height: None,
1288 width: None,
1289 },
1290 );
1291 }
1292
1293 #[gpui::test]
1294 async fn test_html_image_tag_with_alt_text() {
1295 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
1296
1297 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1298 panic!("Expected a image element");
1299 };
1300 assert_eq!(
1301 image.clone(),
1302 Image {
1303 source_range: 0..50,
1304 link: Link::Web {
1305 url: "http://example.com/foo.png".to_string(),
1306 },
1307 alt_text: Some("Foo".into()),
1308 height: None,
1309 width: None,
1310 },
1311 );
1312 }
1313
1314 #[gpui::test]
1315 async fn test_html_image_tag_with_height_and_width() {
1316 let parsed =
1317 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
1318
1319 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1320 panic!("Expected a image element");
1321 };
1322 assert_eq!(
1323 image.clone(),
1324 Image {
1325 source_range: 0..65,
1326 link: Link::Web {
1327 url: "http://example.com/foo.png".to_string(),
1328 },
1329 alt_text: None,
1330 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1331 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1332 },
1333 );
1334 }
1335
1336 #[gpui::test]
1337 async fn test_html_image_style_tag_with_height_and_width() {
1338 let parsed = parse(
1339 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
1340 )
1341 .await;
1342
1343 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1344 panic!("Expected a image element");
1345 };
1346 assert_eq!(
1347 image.clone(),
1348 Image {
1349 source_range: 0..75,
1350 link: Link::Web {
1351 url: "http://example.com/foo.png".to_string(),
1352 },
1353 alt_text: None,
1354 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1355 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1356 },
1357 );
1358 }
1359
1360 #[gpui::test]
1361 async fn test_header_only_table() {
1362 let markdown = "\
1363| Header 1 | Header 2 |
1364|----------|----------|
1365
1366Some other content
1367";
1368
1369 let expected_table = table(
1370 0..48,
1371 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1372 vec![],
1373 );
1374
1375 assert_eq!(
1376 parse(markdown).await.children[0],
1377 ParsedMarkdownElement::Table(expected_table)
1378 );
1379 }
1380
1381 #[gpui::test]
1382 async fn test_basic_table() {
1383 let markdown = "\
1384| Header 1 | Header 2 |
1385|----------|----------|
1386| Cell 1 | Cell 2 |
1387| Cell 3 | Cell 4 |";
1388
1389 let expected_table = table(
1390 0..95,
1391 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1392 vec![
1393 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1394 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1395 ],
1396 );
1397
1398 assert_eq!(
1399 parse(markdown).await.children[0],
1400 ParsedMarkdownElement::Table(expected_table)
1401 );
1402 }
1403
1404 #[gpui::test]
1405 async fn test_list_basic() {
1406 let parsed = parse(
1407 "\
1408* Item 1
1409* Item 2
1410* Item 3
1411",
1412 )
1413 .await;
1414
1415 assert_eq!(
1416 parsed.children,
1417 vec![
1418 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1419 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1420 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1421 ],
1422 );
1423 }
1424
1425 #[gpui::test]
1426 async fn test_list_with_tasks() {
1427 let parsed = parse(
1428 "\
1429- [ ] TODO
1430- [x] Checked
1431",
1432 )
1433 .await;
1434
1435 assert_eq!(
1436 parsed.children,
1437 vec![
1438 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1439 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1440 ],
1441 );
1442 }
1443
1444 #[gpui::test]
1445 async fn test_list_with_indented_task() {
1446 let parsed = parse(
1447 "\
1448- [ ] TODO
1449 - [x] Checked
1450 - Unordered
1451 1. Number 1
1452 1. Number 2
14531. Number A
1454",
1455 )
1456 .await;
1457
1458 assert_eq!(
1459 parsed.children,
1460 vec![
1461 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1462 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1463 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1464 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1465 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1466 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1467 ],
1468 );
1469 }
1470
1471 #[gpui::test]
1472 async fn test_list_with_linebreak_is_handled_correctly() {
1473 let parsed = parse(
1474 "\
1475- [ ] Task 1
1476
1477- [x] Task 2
1478",
1479 )
1480 .await;
1481
1482 assert_eq!(
1483 parsed.children,
1484 vec![
1485 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1486 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1487 ],
1488 );
1489 }
1490
1491 #[gpui::test]
1492 async fn test_list_nested() {
1493 let parsed = parse(
1494 "\
1495* Item 1
1496* Item 2
1497* Item 3
1498
14991. Hello
15001. Two
1501 1. Three
15022. Four
15033. Five
1504
1505* First
1506 1. Hello
1507 1. Goodbyte
1508 - Inner
1509 - Inner
1510 2. Goodbyte
1511 - Next item empty
1512 -
1513* Last
1514",
1515 )
1516 .await;
1517
1518 assert_eq!(
1519 parsed.children,
1520 vec![
1521 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1522 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1523 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1524 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1525 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1526 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1527 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1528 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1529 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1530 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1531 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1532 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1533 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1534 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1535 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1536 list_item(186..190, 3, Unordered, vec![]),
1537 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1538 ]
1539 );
1540 }
1541
1542 #[gpui::test]
1543 async fn test_list_with_nested_content() {
1544 let parsed = parse(
1545 "\
1546* This is a list item with two paragraphs.
1547
1548 This is the second paragraph in the list item.
1549",
1550 )
1551 .await;
1552
1553 assert_eq!(
1554 parsed.children,
1555 vec![list_item(
1556 0..96,
1557 1,
1558 Unordered,
1559 vec![
1560 p("This is a list item with two paragraphs.", 4..44),
1561 p("This is the second paragraph in the list item.", 50..97)
1562 ],
1563 ),],
1564 );
1565 }
1566
1567 #[gpui::test]
1568 async fn test_list_item_with_inline_html() {
1569 let parsed = parse(
1570 "\
1571* This is a list item with an inline HTML <sometag>tag</sometag>.
1572",
1573 )
1574 .await;
1575
1576 assert_eq!(
1577 parsed.children,
1578 vec![list_item(
1579 0..67,
1580 1,
1581 Unordered,
1582 vec![p("This is a list item with an inline HTML tag.", 4..44),],
1583 ),],
1584 );
1585 }
1586
1587 #[gpui::test]
1588 async fn test_nested_list_with_paragraph_inside() {
1589 let parsed = parse(
1590 "\
15911. a
1592 1. b
1593 1. c
1594
1595 text
1596
1597 1. d
1598",
1599 )
1600 .await;
1601
1602 assert_eq!(
1603 parsed.children,
1604 vec![
1605 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1606 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1607 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1608 p("text", 32..37),
1609 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1610 ],
1611 );
1612 }
1613
1614 #[gpui::test]
1615 async fn test_list_with_leading_text() {
1616 let parsed = parse(
1617 "\
1618* `code`
1619* **bold**
1620* [link](https://example.com)
1621",
1622 )
1623 .await;
1624
1625 assert_eq!(
1626 parsed.children,
1627 vec![
1628 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1629 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1630 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1631 ],
1632 );
1633 }
1634
1635 #[gpui::test]
1636 async fn test_simple_block_quote() {
1637 let parsed = parse("> Simple block quote with **styled text**").await;
1638
1639 assert_eq!(
1640 parsed.children,
1641 vec![block_quote(
1642 vec![p("Simple block quote with styled text", 2..41)],
1643 0..41
1644 )]
1645 );
1646 }
1647
1648 #[gpui::test]
1649 async fn test_simple_block_quote_with_multiple_lines() {
1650 let parsed = parse(
1651 "\
1652> # Heading
1653> More
1654> text
1655>
1656> More text
1657",
1658 )
1659 .await;
1660
1661 assert_eq!(
1662 parsed.children,
1663 vec![block_quote(
1664 vec![
1665 h1(text("Heading", 4..11), 2..12),
1666 p("More text", 14..26),
1667 p("More text", 30..40)
1668 ],
1669 0..40
1670 )]
1671 );
1672 }
1673
1674 #[gpui::test]
1675 async fn test_nested_block_quote() {
1676 let parsed = parse(
1677 "\
1678> A
1679>
1680> > # B
1681>
1682> C
1683
1684More text
1685",
1686 )
1687 .await;
1688
1689 assert_eq!(
1690 parsed.children,
1691 vec![
1692 block_quote(
1693 vec![
1694 p("A", 2..4),
1695 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1696 p("C", 18..20)
1697 ],
1698 0..20
1699 ),
1700 p("More text", 21..31)
1701 ]
1702 );
1703 }
1704
1705 #[gpui::test]
1706 async fn test_code_block() {
1707 let parsed = parse(
1708 "\
1709```
1710fn main() {
1711 return 0;
1712}
1713```
1714",
1715 )
1716 .await;
1717
1718 assert_eq!(
1719 parsed.children,
1720 vec![code_block(
1721 None,
1722 "fn main() {\n return 0;\n}",
1723 0..35,
1724 None
1725 )]
1726 );
1727 }
1728
1729 #[gpui::test]
1730 async fn test_code_block_with_language(executor: BackgroundExecutor) {
1731 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1732 language_registry.add(rust_lang());
1733
1734 let parsed = parse_markdown(
1735 "\
1736```rust
1737fn main() {
1738 return 0;
1739}
1740```
1741",
1742 None,
1743 Some(language_registry),
1744 )
1745 .await;
1746
1747 assert_eq!(
1748 parsed.children,
1749 vec![code_block(
1750 Some("rust".to_string()),
1751 "fn main() {\n return 0;\n}",
1752 0..39,
1753 Some(vec![])
1754 )]
1755 );
1756 }
1757
1758 fn rust_lang() -> Arc<Language> {
1759 Arc::new(Language::new(
1760 LanguageConfig {
1761 name: "Rust".into(),
1762 matcher: LanguageMatcher {
1763 path_suffixes: vec!["rs".into()],
1764 ..Default::default()
1765 },
1766 collapsed_placeholder: " /* ... */ ".to_string(),
1767 ..Default::default()
1768 },
1769 Some(tree_sitter_rust::LANGUAGE.into()),
1770 ))
1771 }
1772
1773 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1774 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1775 source_range,
1776 level: HeadingLevel::H1,
1777 contents,
1778 })
1779 }
1780
1781 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1782 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1783 source_range,
1784 level: HeadingLevel::H2,
1785 contents,
1786 })
1787 }
1788
1789 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1790 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1791 source_range,
1792 level: HeadingLevel::H3,
1793 contents,
1794 })
1795 }
1796
1797 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1798 ParsedMarkdownElement::Paragraph(text(contents, source_range))
1799 }
1800
1801 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1802 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1803 highlights: Vec::new(),
1804 region_ranges: Vec::new(),
1805 regions: Vec::new(),
1806 source_range,
1807 contents: contents.to_string(),
1808 })]
1809 }
1810
1811 fn block_quote(
1812 children: Vec<ParsedMarkdownElement>,
1813 source_range: Range<usize>,
1814 ) -> ParsedMarkdownElement {
1815 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1816 source_range,
1817 children,
1818 })
1819 }
1820
1821 fn code_block(
1822 language: Option<String>,
1823 code: &str,
1824 source_range: Range<usize>,
1825 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1826 ) -> ParsedMarkdownElement {
1827 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1828 source_range,
1829 language,
1830 contents: code.to_string().into(),
1831 highlights,
1832 })
1833 }
1834
1835 fn list_item(
1836 source_range: Range<usize>,
1837 depth: u16,
1838 item_type: ParsedMarkdownListItemType,
1839 content: Vec<ParsedMarkdownElement>,
1840 ) -> ParsedMarkdownElement {
1841 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1842 source_range,
1843 item_type,
1844 depth,
1845 content,
1846 })
1847 }
1848
1849 fn table(
1850 source_range: Range<usize>,
1851 header: ParsedMarkdownTableRow,
1852 body: Vec<ParsedMarkdownTableRow>,
1853 ) -> ParsedMarkdownTable {
1854 ParsedMarkdownTable {
1855 column_alignments: Vec::new(),
1856 source_range,
1857 header,
1858 body,
1859 }
1860 }
1861
1862 fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
1863 ParsedMarkdownTableRow { children }
1864 }
1865
1866 impl PartialEq for ParsedMarkdownTable {
1867 fn eq(&self, other: &Self) -> bool {
1868 self.source_range == other.source_range
1869 && self.header == other.header
1870 && self.body == other.body
1871 }
1872 }
1873
1874 impl PartialEq for ParsedMarkdownText {
1875 fn eq(&self, other: &Self) -> bool {
1876 self.source_range == other.source_range && self.contents == other.contents
1877 }
1878 }
1879}