1use crate::markdown_elements::*;
2use async_recursion::async_recursion;
3use collections::FxHashMap;
4use gpui::{DefiniteLength, FontWeight, px, relative};
5use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
6use language::LanguageRegistry;
7use markup5ever_rcdom::RcDom;
8use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
9use std::{cell::RefCell, collections::HashMap, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec};
10
11pub async fn parse_markdown(
12 markdown_input: &str,
13 file_location_directory: Option<PathBuf>,
14 language_registry: Option<Arc<LanguageRegistry>>,
15) -> ParsedMarkdown {
16 let mut options = Options::all();
17 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
18
19 let parser = Parser::new_ext(markdown_input, options);
20 let parser = MarkdownParser::new(
21 parser.into_offset_iter().collect(),
22 file_location_directory,
23 language_registry,
24 );
25 let renderer = parser.parse_document().await;
26 ParsedMarkdown {
27 children: renderer.parsed,
28 }
29}
30
31struct MarkdownParser<'a> {
32 tokens: Vec<(Event<'a>, Range<usize>)>,
33 /// The current index in the tokens array
34 cursor: usize,
35 /// The blocks that we have successfully parsed so far
36 parsed: Vec<ParsedMarkdownElement>,
37 file_location_directory: Option<PathBuf>,
38 language_registry: Option<Arc<LanguageRegistry>>,
39}
40
41struct MarkdownListItem {
42 content: Vec<ParsedMarkdownElement>,
43 item_type: ParsedMarkdownListItemType,
44}
45
46impl Default for MarkdownListItem {
47 fn default() -> Self {
48 Self {
49 content: Vec::new(),
50 item_type: ParsedMarkdownListItemType::Unordered,
51 }
52 }
53}
54
55impl<'a> MarkdownParser<'a> {
56 fn new(
57 tokens: Vec<(Event<'a>, Range<usize>)>,
58 file_location_directory: Option<PathBuf>,
59 language_registry: Option<Arc<LanguageRegistry>>,
60 ) -> Self {
61 Self {
62 tokens,
63 file_location_directory,
64 language_registry,
65 cursor: 0,
66 parsed: vec![],
67 }
68 }
69
70 fn eof(&self) -> bool {
71 if self.tokens.is_empty() {
72 return true;
73 }
74 self.cursor >= self.tokens.len() - 1
75 }
76
77 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
78 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
79 return self.tokens.last();
80 }
81 self.tokens.get(self.cursor + steps)
82 }
83
84 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
85 if self.cursor == 0 || self.cursor > self.tokens.len() {
86 return None;
87 }
88 self.tokens.get(self.cursor - 1)
89 }
90
91 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
92 self.peek(0)
93 }
94
95 fn current_event(&self) -> Option<&Event<'_>> {
96 self.current().map(|(event, _)| event)
97 }
98
99 fn is_text_like(event: &Event) -> bool {
100 match event {
101 Event::Text(_)
102 // Represent an inline code block
103 | Event::Code(_)
104 | Event::Html(_)
105 | Event::InlineHtml(_)
106 | Event::FootnoteReference(_)
107 | Event::Start(Tag::Link { .. })
108 | Event::Start(Tag::Emphasis)
109 | Event::Start(Tag::Strong)
110 | Event::Start(Tag::Strikethrough)
111 | Event::Start(Tag::Image { .. }) => {
112 true
113 }
114 _ => false,
115 }
116 }
117
118 async fn parse_document(mut self) -> Self {
119 while !self.eof() {
120 if let Some(block) = self.parse_block().await {
121 self.parsed.extend(block);
122 } else {
123 self.cursor += 1;
124 }
125 }
126 self
127 }
128
129 #[async_recursion]
130 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
131 let (current, source_range) = self.current().unwrap();
132 let source_range = source_range.clone();
133 match current {
134 Event::Start(tag) => match tag {
135 Tag::Paragraph => {
136 self.cursor += 1;
137 let text = self.parse_text(false, Some(source_range));
138 Some(vec![ParsedMarkdownElement::Paragraph(text)])
139 }
140 Tag::Heading { level, .. } => {
141 let level = *level;
142 self.cursor += 1;
143 let heading = self.parse_heading(level);
144 Some(vec![ParsedMarkdownElement::Heading(heading)])
145 }
146 Tag::Table(alignment) => {
147 let alignment = alignment.clone();
148 self.cursor += 1;
149 let table = self.parse_table(alignment);
150 Some(vec![ParsedMarkdownElement::Table(table)])
151 }
152 Tag::List(order) => {
153 let order = *order;
154 self.cursor += 1;
155 let list = self.parse_list(order).await;
156 Some(list)
157 }
158 Tag::BlockQuote(_kind) => {
159 self.cursor += 1;
160 let block_quote = self.parse_block_quote().await;
161 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
162 }
163 Tag::CodeBlock(kind) => {
164 let language = match kind {
165 pulldown_cmark::CodeBlockKind::Indented => None,
166 pulldown_cmark::CodeBlockKind::Fenced(language) => {
167 if language.is_empty() {
168 None
169 } else {
170 Some(language.to_string())
171 }
172 }
173 };
174
175 self.cursor += 1;
176
177 let code_block = self.parse_code_block(language).await?;
178 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
179 }
180 Tag::HtmlBlock => {
181 self.cursor += 1;
182
183 Some(self.parse_html_block().await)
184 }
185 _ => None,
186 },
187 Event::Rule => {
188 self.cursor += 1;
189 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
190 }
191 _ => None,
192 }
193 }
194
195 fn parse_text(
196 &mut self,
197 should_complete_on_soft_break: bool,
198 source_range: Option<Range<usize>>,
199 ) -> MarkdownParagraph {
200 let source_range = source_range.unwrap_or_else(|| {
201 self.current()
202 .map(|(_, range)| range.clone())
203 .unwrap_or_default()
204 });
205
206 let mut markdown_text_like = Vec::new();
207 let mut text = String::new();
208 let mut bold_depth = 0;
209 let mut italic_depth = 0;
210 let mut strikethrough_depth = 0;
211 let mut link: Option<Link> = None;
212 let mut image: Option<Image> = None;
213 let mut region_ranges: Vec<Range<usize>> = vec![];
214 let mut regions: Vec<ParsedRegion> = vec![];
215 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
216 let mut link_urls: Vec<String> = vec![];
217 let mut link_ranges: Vec<Range<usize>> = vec![];
218
219 loop {
220 if self.eof() {
221 break;
222 }
223
224 let (current, _) = self.current().unwrap();
225 let prev_len = text.len();
226 match current {
227 Event::SoftBreak => {
228 if should_complete_on_soft_break {
229 break;
230 }
231 text.push(' ');
232 }
233
234 Event::HardBreak => {
235 text.push('\n');
236 }
237
238 // We want to ignore any inline HTML tags in the text but keep
239 // the text between them
240 Event::InlineHtml(_) => {}
241
242 Event::Text(t) => {
243 text.push_str(t.as_ref());
244 let mut style = MarkdownHighlightStyle::default();
245
246 if bold_depth > 0 {
247 style.weight = FontWeight::BOLD;
248 }
249
250 if italic_depth > 0 {
251 style.italic = true;
252 }
253
254 if strikethrough_depth > 0 {
255 style.strikethrough = true;
256 }
257
258 let last_run_len = if let Some(link) = link.clone() {
259 region_ranges.push(prev_len..text.len());
260 regions.push(ParsedRegion {
261 code: false,
262 link: Some(link),
263 });
264 style.underline = true;
265 prev_len
266 } else {
267 // Manually scan for links
268 let mut finder = linkify::LinkFinder::new();
269 finder.kinds(&[linkify::LinkKind::Url]);
270 let mut last_link_len = prev_len;
271 for link in finder.links(t) {
272 let start = link.start();
273 let end = link.end();
274 let range = (prev_len + start)..(prev_len + end);
275 link_ranges.push(range.clone());
276 link_urls.push(link.as_str().to_string());
277
278 // If there is a style before we match a link, we have to add this to the highlighted ranges
279 if style != MarkdownHighlightStyle::default()
280 && last_link_len < link.start()
281 {
282 highlights.push((
283 last_link_len..link.start(),
284 MarkdownHighlight::Style(style.clone()),
285 ));
286 }
287
288 highlights.push((
289 range.clone(),
290 MarkdownHighlight::Style(MarkdownHighlightStyle {
291 underline: true,
292 ..style
293 }),
294 ));
295 region_ranges.push(range.clone());
296 regions.push(ParsedRegion {
297 code: false,
298 link: Some(Link::Web {
299 url: link.as_str().to_string(),
300 }),
301 });
302 last_link_len = end;
303 }
304 last_link_len
305 };
306
307 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
308 let mut new_highlight = true;
309 if let Some((last_range, last_style)) = highlights.last_mut()
310 && last_range.end == last_run_len
311 && last_style == &MarkdownHighlight::Style(style.clone())
312 {
313 last_range.end = text.len();
314 new_highlight = false;
315 }
316 if new_highlight {
317 highlights.push((
318 last_run_len..text.len(),
319 MarkdownHighlight::Style(style.clone()),
320 ));
321 }
322 }
323 }
324 Event::Code(t) => {
325 text.push_str(t.as_ref());
326 region_ranges.push(prev_len..text.len());
327
328 if link.is_some() {
329 highlights.push((
330 prev_len..text.len(),
331 MarkdownHighlight::Style(MarkdownHighlightStyle {
332 underline: true,
333 ..Default::default()
334 }),
335 ));
336 }
337 regions.push(ParsedRegion {
338 code: true,
339 link: link.clone(),
340 });
341 }
342 Event::Start(tag) => match tag {
343 Tag::Emphasis => italic_depth += 1,
344 Tag::Strong => bold_depth += 1,
345 Tag::Strikethrough => strikethrough_depth += 1,
346 Tag::Link { dest_url, .. } => {
347 link = Link::identify(
348 self.file_location_directory.clone(),
349 dest_url.to_string(),
350 );
351 }
352 Tag::Image { dest_url, .. } => {
353 if !text.is_empty() {
354 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
355 source_range: source_range.clone(),
356 contents: text.clone(),
357 highlights: highlights.clone(),
358 region_ranges: region_ranges.clone(),
359 regions: regions.clone(),
360 });
361 text = String::new();
362 highlights = vec![];
363 region_ranges = vec![];
364 regions = vec![];
365 markdown_text_like.push(parsed_regions);
366 }
367 image = Image::identify(
368 dest_url.to_string(),
369 source_range.clone(),
370 self.file_location_directory.clone(),
371 );
372 }
373 _ => {
374 break;
375 }
376 },
377
378 Event::End(tag) => match tag {
379 TagEnd::Emphasis => italic_depth -= 1,
380 TagEnd::Strong => bold_depth -= 1,
381 TagEnd::Strikethrough => strikethrough_depth -= 1,
382 TagEnd::Link => {
383 link = None;
384 }
385 TagEnd::Image => {
386 if let Some(mut image) = image.take() {
387 if !text.is_empty() {
388 image.set_alt_text(std::mem::take(&mut text).into());
389 }
390 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
391 }
392 }
393 TagEnd::Paragraph => {
394 self.cursor += 1;
395 break;
396 }
397 _ => {
398 break;
399 }
400 },
401 _ => {
402 break;
403 }
404 }
405
406 self.cursor += 1;
407 }
408 if !text.is_empty() {
409 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
410 source_range,
411 contents: text,
412 highlights,
413 regions,
414 region_ranges,
415 }));
416 }
417 markdown_text_like
418 }
419
420 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
421 let (_event, source_range) = self.previous().unwrap();
422 let source_range = source_range.clone();
423 let text = self.parse_text(true, None);
424
425 // Advance past the heading end tag
426 self.cursor += 1;
427
428 ParsedMarkdownHeading {
429 source_range,
430 level: match level {
431 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
432 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
433 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
434 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
435 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
436 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
437 },
438 contents: text,
439 }
440 }
441
442 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
443 let (_event, source_range) = self.previous().unwrap();
444 let source_range = source_range.clone();
445 let mut header = ParsedMarkdownTableRow::new();
446 let mut body = vec![];
447 let mut current_row = vec![];
448 let mut in_header = true;
449 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
450
451 loop {
452 if self.eof() {
453 break;
454 }
455
456 let (current, source_range) = self.current().unwrap();
457 let source_range = source_range.clone();
458 match current {
459 Event::Start(Tag::TableHead)
460 | Event::Start(Tag::TableRow)
461 | Event::End(TagEnd::TableCell) => {
462 self.cursor += 1;
463 }
464 Event::Start(Tag::TableCell) => {
465 self.cursor += 1;
466 let cell_contents = self.parse_text(false, Some(source_range));
467 current_row.push(cell_contents);
468 }
469 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
470 self.cursor += 1;
471 let new_row = std::mem::take(&mut current_row);
472 if in_header {
473 header.children = new_row;
474 in_header = false;
475 } else {
476 let row = ParsedMarkdownTableRow::with_children(new_row);
477 body.push(row);
478 }
479 }
480 Event::End(TagEnd::Table) => {
481 self.cursor += 1;
482 break;
483 }
484 _ => {
485 break;
486 }
487 }
488 }
489
490 ParsedMarkdownTable {
491 source_range,
492 header,
493 body,
494 column_alignments,
495 }
496 }
497
498 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
499 match alignment {
500 Alignment::None => ParsedMarkdownTableAlignment::None,
501 Alignment::Left => ParsedMarkdownTableAlignment::Left,
502 Alignment::Center => ParsedMarkdownTableAlignment::Center,
503 Alignment::Right => ParsedMarkdownTableAlignment::Right,
504 }
505 }
506
507 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
508 let (_, list_source_range) = self.previous().unwrap();
509
510 let mut items = Vec::new();
511 let mut items_stack = vec![MarkdownListItem::default()];
512 let mut depth = 1;
513 let mut order = order;
514 let mut order_stack = Vec::new();
515
516 let mut insertion_indices = FxHashMap::default();
517 let mut source_ranges = FxHashMap::default();
518 let mut start_item_range = list_source_range.clone();
519
520 while !self.eof() {
521 let (current, source_range) = self.current().unwrap();
522 match current {
523 Event::Start(Tag::List(new_order)) => {
524 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
525 insertion_indices.insert(depth, items.len());
526 }
527
528 // We will use the start of the nested list as the end for the current item's range,
529 // because we don't care about the hierarchy of list items
530 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
531 e.insert(start_item_range.start..source_range.start);
532 }
533
534 order_stack.push(order);
535 order = *new_order;
536 self.cursor += 1;
537 depth += 1;
538 }
539 Event::End(TagEnd::List(_)) => {
540 order = order_stack.pop().flatten();
541 self.cursor += 1;
542 depth -= 1;
543
544 if depth == 0 {
545 break;
546 }
547 }
548 Event::Start(Tag::Item) => {
549 start_item_range = source_range.clone();
550
551 self.cursor += 1;
552 items_stack.push(MarkdownListItem::default());
553
554 let mut task_list = None;
555 // Check for task list marker (`- [ ]` or `- [x]`)
556 if let Some(event) = self.current_event() {
557 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
558 if event == &Event::Start(Tag::Paragraph) {
559 self.cursor += 1;
560 }
561
562 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
563 task_list = Some((*checked, range.clone()));
564 self.cursor += 1;
565 }
566 }
567
568 if let Some((event, range)) = self.current() {
569 // This is a plain list item.
570 // For example `- some text` or `1. [Docs](./docs.md)`
571 if MarkdownParser::is_text_like(event) {
572 let text = self.parse_text(false, Some(range.clone()));
573 let block = ParsedMarkdownElement::Paragraph(text);
574 if let Some(content) = items_stack.last_mut() {
575 let item_type = if let Some((checked, range)) = task_list {
576 ParsedMarkdownListItemType::Task(checked, range)
577 } else if let Some(order) = order {
578 ParsedMarkdownListItemType::Ordered(order)
579 } else {
580 ParsedMarkdownListItemType::Unordered
581 };
582 content.item_type = item_type;
583 content.content.push(block);
584 }
585 } else {
586 let block = self.parse_block().await;
587 if let Some(block) = block
588 && let Some(list_item) = items_stack.last_mut()
589 {
590 list_item.content.extend(block);
591 }
592 }
593 }
594
595 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
596 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
597 self.cursor += 1;
598 }
599 }
600 Event::End(TagEnd::Item) => {
601 self.cursor += 1;
602
603 if let Some(current) = order {
604 order = Some(current + 1);
605 }
606
607 if let Some(list_item) = items_stack.pop() {
608 let source_range = source_ranges
609 .remove(&depth)
610 .unwrap_or(start_item_range.clone());
611
612 // We need to remove the last character of the source range, because it includes the newline character
613 let source_range = source_range.start..source_range.end - 1;
614 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
615 source_range,
616 content: list_item.content,
617 depth,
618 item_type: list_item.item_type,
619 });
620
621 if let Some(index) = insertion_indices.get(&depth) {
622 items.insert(*index, item);
623 insertion_indices.remove(&depth);
624 } else {
625 items.push(item);
626 }
627 }
628 }
629 _ => {
630 if depth == 0 {
631 break;
632 }
633 // This can only happen if a list item starts with more then one paragraph,
634 // or the list item contains blocks that should be rendered after the nested list items
635 let block = self.parse_block().await;
636 if let Some(block) = block {
637 if let Some(list_item) = items_stack.last_mut() {
638 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
639 if !insertion_indices.contains_key(&depth) {
640 list_item.content.extend(block);
641 continue;
642 }
643 }
644
645 // Otherwise we need to insert the block after all the nested items
646 // that have been parsed so far
647 items.extend(block);
648 } else {
649 self.cursor += 1;
650 }
651 }
652 }
653 }
654
655 items
656 }
657
658 #[async_recursion]
659 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
660 let (_event, source_range) = self.previous().unwrap();
661 let source_range = source_range.clone();
662 let mut nested_depth = 1;
663
664 let mut children: Vec<ParsedMarkdownElement> = vec![];
665
666 while !self.eof() {
667 let block = self.parse_block().await;
668
669 if let Some(block) = block {
670 children.extend(block);
671 } else {
672 break;
673 }
674
675 if self.eof() {
676 break;
677 }
678
679 let (current, _source_range) = self.current().unwrap();
680 match current {
681 // This is a nested block quote.
682 // Record that we're in a nested block quote and continue parsing.
683 // We don't need to advance the cursor since the next
684 // call to `parse_block` will handle it.
685 Event::Start(Tag::BlockQuote(_kind)) => {
686 nested_depth += 1;
687 }
688 Event::End(TagEnd::BlockQuote(_kind)) => {
689 nested_depth -= 1;
690 if nested_depth == 0 {
691 self.cursor += 1;
692 break;
693 }
694 }
695 _ => {}
696 };
697 }
698
699 ParsedMarkdownBlockQuote {
700 source_range,
701 children,
702 }
703 }
704
705 async fn parse_code_block(
706 &mut self,
707 language: Option<String>,
708 ) -> Option<ParsedMarkdownCodeBlock> {
709 let Some((_event, source_range)) = self.previous() else {
710 return None;
711 };
712
713 let source_range = source_range.clone();
714 let mut code = String::new();
715
716 while !self.eof() {
717 let Some((current, _source_range)) = self.current() else {
718 break;
719 };
720
721 match current {
722 Event::Text(text) => {
723 code.push_str(text);
724 self.cursor += 1;
725 }
726 Event::End(TagEnd::CodeBlock) => {
727 self.cursor += 1;
728 break;
729 }
730 _ => {
731 break;
732 }
733 }
734 }
735
736 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
737
738 let highlights = if let Some(language) = &language {
739 if let Some(registry) = &self.language_registry {
740 let rope: language::Rope = code.as_str().into();
741 registry
742 .language_for_name_or_extension(language)
743 .await
744 .map(|l| l.highlight_text(&rope, 0..code.len()))
745 .ok()
746 } else {
747 None
748 }
749 } else {
750 None
751 };
752
753 Some(ParsedMarkdownCodeBlock {
754 source_range,
755 contents: code.into(),
756 language,
757 highlights,
758 })
759 }
760
761 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
762 let mut elements = Vec::new();
763 let Some((_event, _source_range)) = self.previous() else {
764 return elements;
765 };
766
767 while !self.eof() {
768 let Some((current, source_range)) = self.current() else {
769 break;
770 };
771 let source_range = source_range.clone();
772 match current {
773 Event::Html(html) => {
774 let mut cursor = std::io::Cursor::new(html.as_bytes());
775 let Some(dom) = parse_document(RcDom::default(), ParseOpts::default())
776 .from_utf8()
777 .read_from(&mut cursor)
778 .ok()
779 else {
780 self.cursor += 1;
781 continue;
782 };
783
784 self.cursor += 1;
785
786 self.parse_html_node(source_range, &dom.document, &mut elements);
787 }
788 Event::End(TagEnd::CodeBlock) => {
789 self.cursor += 1;
790 break;
791 }
792 _ => {
793 break;
794 }
795 }
796 }
797
798 elements
799 }
800
801 fn parse_html_node(
802 &self,
803 source_range: Range<usize>,
804 node: &Rc<markup5ever_rcdom::Node>,
805 elements: &mut Vec<ParsedMarkdownElement>,
806 ) {
807 match &node.data {
808 markup5ever_rcdom::NodeData::Document => {
809 self.consume_children(source_range, node, elements);
810 }
811 markup5ever_rcdom::NodeData::Doctype { .. } => {}
812 markup5ever_rcdom::NodeData::Text { contents } => {
813 elements.push(ParsedMarkdownElement::Paragraph(vec![
814 MarkdownParagraphChunk::Text(ParsedMarkdownText {
815 source_range,
816 contents: contents.borrow().to_string(),
817 highlights: Vec::default(),
818 region_ranges: Vec::default(),
819 regions: Vec::default(),
820 }),
821 ]));
822 }
823 markup5ever_rcdom::NodeData::Comment { .. } => {}
824 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
825 if local_name!("img") == name.local {
826 if let Some(image) = self.extract_image(source_range, attrs) {
827 elements.push(ParsedMarkdownElement::Image(image));
828 }
829 } else if matches!(
830 name.local,
831 local_name!("h1")
832 | local_name!("h2")
833 | local_name!("h3")
834 | local_name!("h4")
835 | local_name!("h5")
836 | local_name!("h6")
837 ) {
838 let mut paragraph = MarkdownParagraph::new();
839 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
840
841 if !paragraph.is_empty() {
842 elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
843 source_range,
844 level: match name.local {
845 local_name!("h1") => HeadingLevel::H1,
846 local_name!("h2") => HeadingLevel::H2,
847 local_name!("h3") => HeadingLevel::H3,
848 local_name!("h4") => HeadingLevel::H4,
849 local_name!("h5") => HeadingLevel::H5,
850 local_name!("h6") => HeadingLevel::H6,
851 _ => unreachable!(),
852 },
853 contents: paragraph,
854 }));
855 }
856 } else {
857 self.consume_children(source_range, node, elements);
858 }
859 }
860 markup5ever_rcdom::NodeData::ProcessingInstruction { .. } => {}
861 }
862 }
863
864 fn parse_paragraph(
865 &self,
866 source_range: Range<usize>,
867 node: &Rc<markup5ever_rcdom::Node>,
868 paragraph: &mut MarkdownParagraph,
869 ) {
870 match &node.data {
871 markup5ever_rcdom::NodeData::Text { contents } => {
872 paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
873 source_range,
874 regions: Vec::default(),
875 contents: contents.borrow().to_string(),
876 region_ranges: Vec::default(),
877 highlights: Vec::default(),
878 }));
879 }
880 markup5ever_rcdom::NodeData::Element { .. } => {
881 self.consume_paragraph(source_range, node, paragraph);
882 }
883 _ => {}
884 }
885 }
886
887 fn consume_paragraph(
888 &self,
889 source_range: Range<usize>,
890 node: &Rc<markup5ever_rcdom::Node>,
891 paragraph: &mut MarkdownParagraph,
892 ) {
893 for node in node.children.borrow().iter() {
894 self.parse_paragraph(source_range.clone(), node, paragraph);
895 }
896 }
897
898 fn consume_children(
899 &self,
900 source_range: Range<usize>,
901 node: &Rc<markup5ever_rcdom::Node>,
902 elements: &mut Vec<ParsedMarkdownElement>,
903 ) {
904 for node in node.children.borrow().iter() {
905 self.parse_html_node(source_range.clone(), node, elements);
906 }
907 }
908
909 fn attr_value(
910 attrs: &RefCell<Vec<html5ever::Attribute>>,
911 name: html5ever::LocalName,
912 ) -> Option<String> {
913 attrs.borrow().iter().find_map(|attr| {
914 if attr.name.local == name {
915 Some(attr.value.to_string())
916 } else {
917 None
918 }
919 })
920 }
921
922 fn extract_styles_from_attributes(
923 attrs: &RefCell<Vec<html5ever::Attribute>>,
924 ) -> HashMap<String, String> {
925 let mut styles = HashMap::new();
926
927 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
928 for decl in style.split(';') {
929 let mut parts = decl.splitn(2, ':');
930 if let Some((key, value)) = parts.next().zip(parts.next()) {
931 styles.insert(
932 key.trim().to_lowercase().to_string(),
933 value.trim().to_string(),
934 );
935 }
936 }
937 }
938
939 styles
940 }
941
942 fn extract_image(
943 &self,
944 source_range: Range<usize>,
945 attrs: &RefCell<Vec<html5ever::Attribute>>,
946 ) -> Option<Image> {
947 let src = Self::attr_value(attrs, local_name!("src"))?;
948
949 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
950
951 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
952 image.set_alt_text(alt.into());
953 }
954
955 let styles = Self::extract_styles_from_attributes(attrs);
956
957 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
958 .or_else(|| styles.get("width").cloned())
959 .and_then(|width| Self::parse_length(&width))
960 {
961 image.set_width(width);
962 }
963
964 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
965 .or_else(|| styles.get("height").cloned())
966 .and_then(|height| Self::parse_length(&height))
967 {
968 image.set_height(height);
969 }
970
971 Some(image)
972 }
973
974 /// Parses the width/height attribute value of an html element (e.g. img element)
975 fn parse_length(value: &str) -> Option<DefiniteLength> {
976 if value.ends_with("%") {
977 value
978 .trim_end_matches("%")
979 .parse::<f32>()
980 .ok()
981 .map(|value| relative(value / 100.))
982 } else {
983 value
984 .trim_end_matches("px")
985 .parse()
986 .ok()
987 .map(|value| px(value).into())
988 }
989 }
990}
991
992#[cfg(test)]
993mod tests {
994 use super::*;
995 use ParsedMarkdownListItemType::*;
996 use core::panic;
997 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
998 use language::{
999 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1000 };
1001 use pretty_assertions::assert_eq;
1002
1003 async fn parse(input: &str) -> ParsedMarkdown {
1004 parse_markdown(input, None, None).await
1005 }
1006
1007 #[gpui::test]
1008 async fn test_headings() {
1009 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1010
1011 assert_eq!(
1012 parsed.children,
1013 vec![
1014 h1(text("Heading one", 2..13), 0..14),
1015 h2(text("Heading two", 17..28), 14..29),
1016 h3(text("Heading three", 33..46), 29..46),
1017 ]
1018 );
1019 }
1020
1021 #[gpui::test]
1022 async fn test_newlines_dont_new_paragraphs() {
1023 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1024
1025 assert_eq!(
1026 parsed.children,
1027 vec![p("Some text that is bolded and italicized", 0..46)]
1028 );
1029 }
1030
1031 #[gpui::test]
1032 async fn test_heading_with_paragraph() {
1033 let parsed = parse("# Zed\nThe editor").await;
1034
1035 assert_eq!(
1036 parsed.children,
1037 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1038 );
1039 }
1040
1041 #[gpui::test]
1042 async fn test_double_newlines_do_new_paragraphs() {
1043 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1044
1045 assert_eq!(
1046 parsed.children,
1047 vec![
1048 p("Some text that is bolded", 0..29),
1049 p("and italicized", 31..47),
1050 ]
1051 );
1052 }
1053
1054 #[gpui::test]
1055 async fn test_bold_italic_text() {
1056 let parsed = parse("Some text **that is bolded** and *italicized*").await;
1057
1058 assert_eq!(
1059 parsed.children,
1060 vec![p("Some text that is bolded and italicized", 0..45)]
1061 );
1062 }
1063
1064 #[gpui::test]
1065 async fn test_nested_bold_strikethrough_text() {
1066 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1067
1068 assert_eq!(parsed.children.len(), 1);
1069 assert_eq!(
1070 parsed.children[0],
1071 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1072 ParsedMarkdownText {
1073 source_range: 0..35,
1074 contents: "Some bostrikethroughld text".to_string(),
1075 highlights: Vec::new(),
1076 region_ranges: Vec::new(),
1077 regions: Vec::new(),
1078 }
1079 )])
1080 );
1081
1082 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1083 text
1084 } else {
1085 panic!("Expected a paragraph");
1086 };
1087
1088 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1089 text
1090 } else {
1091 panic!("Expected a text");
1092 };
1093
1094 assert_eq!(
1095 paragraph.highlights,
1096 vec![
1097 (
1098 5..7,
1099 MarkdownHighlight::Style(MarkdownHighlightStyle {
1100 weight: FontWeight::BOLD,
1101 ..Default::default()
1102 }),
1103 ),
1104 (
1105 7..20,
1106 MarkdownHighlight::Style(MarkdownHighlightStyle {
1107 weight: FontWeight::BOLD,
1108 strikethrough: true,
1109 ..Default::default()
1110 }),
1111 ),
1112 (
1113 20..22,
1114 MarkdownHighlight::Style(MarkdownHighlightStyle {
1115 weight: FontWeight::BOLD,
1116 ..Default::default()
1117 }),
1118 ),
1119 ]
1120 );
1121 }
1122
1123 #[gpui::test]
1124 async fn test_text_with_inline_html() {
1125 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1126
1127 assert_eq!(
1128 parsed.children,
1129 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1130 );
1131 }
1132
1133 #[gpui::test]
1134 async fn test_raw_links_detection() {
1135 let parsed = parse("Checkout this https://zed.dev link").await;
1136
1137 assert_eq!(
1138 parsed.children,
1139 vec![p("Checkout this https://zed.dev link", 0..34)]
1140 );
1141 }
1142
1143 #[gpui::test]
1144 async fn test_empty_image() {
1145 let parsed = parse("![]()").await;
1146
1147 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1148 text
1149 } else {
1150 panic!("Expected a paragraph");
1151 };
1152 assert_eq!(paragraph.len(), 0);
1153 }
1154
1155 #[gpui::test]
1156 async fn test_image_links_detection() {
1157 let parsed = parse("").await;
1158
1159 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1160 text
1161 } else {
1162 panic!("Expected a paragraph");
1163 };
1164 assert_eq!(
1165 paragraph[0],
1166 MarkdownParagraphChunk::Image(Image {
1167 source_range: 0..111,
1168 link: Link::Web {
1169 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1170 },
1171 alt_text: Some("test".into()),
1172 height: None,
1173 width: None,
1174 },)
1175 );
1176 }
1177
1178 #[gpui::test]
1179 async fn test_image_without_alt_text() {
1180 let parsed = parse("").await;
1181
1182 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1183 text
1184 } else {
1185 panic!("Expected a paragraph");
1186 };
1187 assert_eq!(
1188 paragraph[0],
1189 MarkdownParagraphChunk::Image(Image {
1190 source_range: 0..31,
1191 link: Link::Web {
1192 url: "http://example.com/foo.png".to_string(),
1193 },
1194 alt_text: None,
1195 height: None,
1196 width: None,
1197 },)
1198 );
1199 }
1200
1201 #[gpui::test]
1202 async fn test_image_with_alt_text_containing_formatting() {
1203 let parsed = parse("").await;
1204
1205 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1206 panic!("Expected a paragraph");
1207 };
1208 assert_eq!(
1209 chunks,
1210 &[MarkdownParagraphChunk::Image(Image {
1211 source_range: 0..44,
1212 link: Link::Web {
1213 url: "http://example.com/foo.png".to_string(),
1214 },
1215 alt_text: Some("foo bar baz".into()),
1216 height: None,
1217 width: None,
1218 }),],
1219 );
1220 }
1221
1222 #[gpui::test]
1223 async fn test_images_with_text_in_between() {
1224 let parsed = parse(
1225 "\nLorem Ipsum\n",
1226 )
1227 .await;
1228
1229 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1230 text
1231 } else {
1232 panic!("Expected a paragraph");
1233 };
1234 assert_eq!(
1235 chunks,
1236 &vec![
1237 MarkdownParagraphChunk::Image(Image {
1238 source_range: 0..81,
1239 link: Link::Web {
1240 url: "http://example.com/foo.png".to_string(),
1241 },
1242 alt_text: Some("foo".into()),
1243 height: None,
1244 width: None,
1245 }),
1246 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1247 source_range: 0..81,
1248 contents: " Lorem Ipsum ".to_string(),
1249 highlights: Vec::new(),
1250 region_ranges: Vec::new(),
1251 regions: Vec::new(),
1252 }),
1253 MarkdownParagraphChunk::Image(Image {
1254 source_range: 0..81,
1255 link: Link::Web {
1256 url: "http://example.com/bar.png".to_string(),
1257 },
1258 alt_text: Some("bar".into()),
1259 height: None,
1260 width: None,
1261 })
1262 ]
1263 );
1264 }
1265
1266 #[test]
1267 fn test_parse_length() {
1268 // Test percentage values
1269 assert_eq!(
1270 MarkdownParser::parse_length("50%"),
1271 Some(DefiniteLength::Fraction(0.5))
1272 );
1273 assert_eq!(
1274 MarkdownParser::parse_length("100%"),
1275 Some(DefiniteLength::Fraction(1.0))
1276 );
1277 assert_eq!(
1278 MarkdownParser::parse_length("25%"),
1279 Some(DefiniteLength::Fraction(0.25))
1280 );
1281 assert_eq!(
1282 MarkdownParser::parse_length("0%"),
1283 Some(DefiniteLength::Fraction(0.0))
1284 );
1285
1286 // Test pixel values
1287 assert_eq!(
1288 MarkdownParser::parse_length("100px"),
1289 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1290 );
1291 assert_eq!(
1292 MarkdownParser::parse_length("50px"),
1293 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1294 );
1295 assert_eq!(
1296 MarkdownParser::parse_length("0px"),
1297 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1298 );
1299
1300 // Test values without units (should be treated as pixels)
1301 assert_eq!(
1302 MarkdownParser::parse_length("100"),
1303 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1304 );
1305 assert_eq!(
1306 MarkdownParser::parse_length("42"),
1307 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1308 );
1309
1310 // Test invalid values
1311 assert_eq!(MarkdownParser::parse_length("invalid"), None);
1312 assert_eq!(MarkdownParser::parse_length("px"), None);
1313 assert_eq!(MarkdownParser::parse_length("%"), None);
1314 assert_eq!(MarkdownParser::parse_length(""), None);
1315 assert_eq!(MarkdownParser::parse_length("abc%"), None);
1316 assert_eq!(MarkdownParser::parse_length("abcpx"), None);
1317
1318 // Test decimal values
1319 assert_eq!(
1320 MarkdownParser::parse_length("50.5%"),
1321 Some(DefiniteLength::Fraction(0.505))
1322 );
1323 assert_eq!(
1324 MarkdownParser::parse_length("100.25px"),
1325 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1326 );
1327 assert_eq!(
1328 MarkdownParser::parse_length("42.0"),
1329 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1330 );
1331 }
1332
1333 #[gpui::test]
1334 async fn test_html_heading_tags() {
1335 let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
1336
1337 assert_eq!(
1338 ParsedMarkdown {
1339 children: vec![
1340 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1341 level: HeadingLevel::H1,
1342 source_range: 0..96,
1343 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1344 source_range: 0..96,
1345 contents: "Heading".into(),
1346 highlights: Vec::default(),
1347 region_ranges: Vec::default(),
1348 regions: Vec::default()
1349 })],
1350 }),
1351 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1352 level: HeadingLevel::H2,
1353 source_range: 0..96,
1354 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1355 source_range: 0..96,
1356 contents: "Heading".into(),
1357 highlights: Vec::default(),
1358 region_ranges: Vec::default(),
1359 regions: Vec::default()
1360 })],
1361 }),
1362 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1363 level: HeadingLevel::H3,
1364 source_range: 0..96,
1365 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1366 source_range: 0..96,
1367 contents: "Heading".into(),
1368 highlights: Vec::default(),
1369 region_ranges: Vec::default(),
1370 regions: Vec::default()
1371 })],
1372 }),
1373 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1374 level: HeadingLevel::H4,
1375 source_range: 0..96,
1376 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1377 source_range: 0..96,
1378 contents: "Heading".into(),
1379 highlights: Vec::default(),
1380 region_ranges: Vec::default(),
1381 regions: Vec::default()
1382 })],
1383 }),
1384 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1385 level: HeadingLevel::H5,
1386 source_range: 0..96,
1387 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1388 source_range: 0..96,
1389 contents: "Heading".into(),
1390 highlights: Vec::default(),
1391 region_ranges: Vec::default(),
1392 regions: Vec::default()
1393 })],
1394 }),
1395 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1396 level: HeadingLevel::H6,
1397 source_range: 0..96,
1398 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1399 source_range: 0..96,
1400 contents: "Heading".into(),
1401 highlights: Vec::default(),
1402 region_ranges: Vec::default(),
1403 regions: Vec::default()
1404 })],
1405 }),
1406 ],
1407 },
1408 parsed
1409 );
1410 }
1411
1412 #[gpui::test]
1413 async fn test_html_image_tag() {
1414 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
1415
1416 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1417 panic!("Expected a image element");
1418 };
1419 assert_eq!(
1420 image.clone(),
1421 Image {
1422 source_range: 0..40,
1423 link: Link::Web {
1424 url: "http://example.com/foo.png".to_string(),
1425 },
1426 alt_text: None,
1427 height: None,
1428 width: None,
1429 },
1430 );
1431 }
1432
1433 #[gpui::test]
1434 async fn test_html_image_tag_with_alt_text() {
1435 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
1436
1437 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1438 panic!("Expected a image element");
1439 };
1440 assert_eq!(
1441 image.clone(),
1442 Image {
1443 source_range: 0..50,
1444 link: Link::Web {
1445 url: "http://example.com/foo.png".to_string(),
1446 },
1447 alt_text: Some("Foo".into()),
1448 height: None,
1449 width: None,
1450 },
1451 );
1452 }
1453
1454 #[gpui::test]
1455 async fn test_html_image_tag_with_height_and_width() {
1456 let parsed =
1457 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
1458
1459 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1460 panic!("Expected a image element");
1461 };
1462 assert_eq!(
1463 image.clone(),
1464 Image {
1465 source_range: 0..65,
1466 link: Link::Web {
1467 url: "http://example.com/foo.png".to_string(),
1468 },
1469 alt_text: None,
1470 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1471 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1472 },
1473 );
1474 }
1475
1476 #[gpui::test]
1477 async fn test_html_image_style_tag_with_height_and_width() {
1478 let parsed = parse(
1479 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
1480 )
1481 .await;
1482
1483 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1484 panic!("Expected a image element");
1485 };
1486 assert_eq!(
1487 image.clone(),
1488 Image {
1489 source_range: 0..75,
1490 link: Link::Web {
1491 url: "http://example.com/foo.png".to_string(),
1492 },
1493 alt_text: None,
1494 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1495 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1496 },
1497 );
1498 }
1499
1500 #[gpui::test]
1501 async fn test_header_only_table() {
1502 let markdown = "\
1503| Header 1 | Header 2 |
1504|----------|----------|
1505
1506Some other content
1507";
1508
1509 let expected_table = table(
1510 0..48,
1511 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1512 vec![],
1513 );
1514
1515 assert_eq!(
1516 parse(markdown).await.children[0],
1517 ParsedMarkdownElement::Table(expected_table)
1518 );
1519 }
1520
1521 #[gpui::test]
1522 async fn test_basic_table() {
1523 let markdown = "\
1524| Header 1 | Header 2 |
1525|----------|----------|
1526| Cell 1 | Cell 2 |
1527| Cell 3 | Cell 4 |";
1528
1529 let expected_table = table(
1530 0..95,
1531 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1532 vec![
1533 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1534 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1535 ],
1536 );
1537
1538 assert_eq!(
1539 parse(markdown).await.children[0],
1540 ParsedMarkdownElement::Table(expected_table)
1541 );
1542 }
1543
1544 #[gpui::test]
1545 async fn test_list_basic() {
1546 let parsed = parse(
1547 "\
1548* Item 1
1549* Item 2
1550* Item 3
1551",
1552 )
1553 .await;
1554
1555 assert_eq!(
1556 parsed.children,
1557 vec![
1558 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1559 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1560 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1561 ],
1562 );
1563 }
1564
1565 #[gpui::test]
1566 async fn test_list_with_tasks() {
1567 let parsed = parse(
1568 "\
1569- [ ] TODO
1570- [x] Checked
1571",
1572 )
1573 .await;
1574
1575 assert_eq!(
1576 parsed.children,
1577 vec![
1578 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1579 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1580 ],
1581 );
1582 }
1583
1584 #[gpui::test]
1585 async fn test_list_with_indented_task() {
1586 let parsed = parse(
1587 "\
1588- [ ] TODO
1589 - [x] Checked
1590 - Unordered
1591 1. Number 1
1592 1. Number 2
15931. Number A
1594",
1595 )
1596 .await;
1597
1598 assert_eq!(
1599 parsed.children,
1600 vec![
1601 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1602 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1603 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1604 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1605 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1606 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1607 ],
1608 );
1609 }
1610
1611 #[gpui::test]
1612 async fn test_list_with_linebreak_is_handled_correctly() {
1613 let parsed = parse(
1614 "\
1615- [ ] Task 1
1616
1617- [x] Task 2
1618",
1619 )
1620 .await;
1621
1622 assert_eq!(
1623 parsed.children,
1624 vec![
1625 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1626 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1627 ],
1628 );
1629 }
1630
1631 #[gpui::test]
1632 async fn test_list_nested() {
1633 let parsed = parse(
1634 "\
1635* Item 1
1636* Item 2
1637* Item 3
1638
16391. Hello
16401. Two
1641 1. Three
16422. Four
16433. Five
1644
1645* First
1646 1. Hello
1647 1. Goodbyte
1648 - Inner
1649 - Inner
1650 2. Goodbyte
1651 - Next item empty
1652 -
1653* Last
1654",
1655 )
1656 .await;
1657
1658 assert_eq!(
1659 parsed.children,
1660 vec![
1661 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1662 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1663 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1664 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1665 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1666 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1667 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1668 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1669 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1670 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1671 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1672 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1673 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1674 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1675 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1676 list_item(186..190, 3, Unordered, vec![]),
1677 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1678 ]
1679 );
1680 }
1681
1682 #[gpui::test]
1683 async fn test_list_with_nested_content() {
1684 let parsed = parse(
1685 "\
1686* This is a list item with two paragraphs.
1687
1688 This is the second paragraph in the list item.
1689",
1690 )
1691 .await;
1692
1693 assert_eq!(
1694 parsed.children,
1695 vec![list_item(
1696 0..96,
1697 1,
1698 Unordered,
1699 vec![
1700 p("This is a list item with two paragraphs.", 4..44),
1701 p("This is the second paragraph in the list item.", 50..97)
1702 ],
1703 ),],
1704 );
1705 }
1706
1707 #[gpui::test]
1708 async fn test_list_item_with_inline_html() {
1709 let parsed = parse(
1710 "\
1711* This is a list item with an inline HTML <sometag>tag</sometag>.
1712",
1713 )
1714 .await;
1715
1716 assert_eq!(
1717 parsed.children,
1718 vec![list_item(
1719 0..67,
1720 1,
1721 Unordered,
1722 vec![p("This is a list item with an inline HTML tag.", 4..44),],
1723 ),],
1724 );
1725 }
1726
1727 #[gpui::test]
1728 async fn test_nested_list_with_paragraph_inside() {
1729 let parsed = parse(
1730 "\
17311. a
1732 1. b
1733 1. c
1734
1735 text
1736
1737 1. d
1738",
1739 )
1740 .await;
1741
1742 assert_eq!(
1743 parsed.children,
1744 vec![
1745 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1746 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1747 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1748 p("text", 32..37),
1749 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1750 ],
1751 );
1752 }
1753
1754 #[gpui::test]
1755 async fn test_list_with_leading_text() {
1756 let parsed = parse(
1757 "\
1758* `code`
1759* **bold**
1760* [link](https://example.com)
1761",
1762 )
1763 .await;
1764
1765 assert_eq!(
1766 parsed.children,
1767 vec![
1768 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1769 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1770 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1771 ],
1772 );
1773 }
1774
1775 #[gpui::test]
1776 async fn test_simple_block_quote() {
1777 let parsed = parse("> Simple block quote with **styled text**").await;
1778
1779 assert_eq!(
1780 parsed.children,
1781 vec![block_quote(
1782 vec![p("Simple block quote with styled text", 2..41)],
1783 0..41
1784 )]
1785 );
1786 }
1787
1788 #[gpui::test]
1789 async fn test_simple_block_quote_with_multiple_lines() {
1790 let parsed = parse(
1791 "\
1792> # Heading
1793> More
1794> text
1795>
1796> More text
1797",
1798 )
1799 .await;
1800
1801 assert_eq!(
1802 parsed.children,
1803 vec![block_quote(
1804 vec![
1805 h1(text("Heading", 4..11), 2..12),
1806 p("More text", 14..26),
1807 p("More text", 30..40)
1808 ],
1809 0..40
1810 )]
1811 );
1812 }
1813
1814 #[gpui::test]
1815 async fn test_nested_block_quote() {
1816 let parsed = parse(
1817 "\
1818> A
1819>
1820> > # B
1821>
1822> C
1823
1824More text
1825",
1826 )
1827 .await;
1828
1829 assert_eq!(
1830 parsed.children,
1831 vec![
1832 block_quote(
1833 vec![
1834 p("A", 2..4),
1835 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1836 p("C", 18..20)
1837 ],
1838 0..20
1839 ),
1840 p("More text", 21..31)
1841 ]
1842 );
1843 }
1844
1845 #[gpui::test]
1846 async fn test_code_block() {
1847 let parsed = parse(
1848 "\
1849```
1850fn main() {
1851 return 0;
1852}
1853```
1854",
1855 )
1856 .await;
1857
1858 assert_eq!(
1859 parsed.children,
1860 vec![code_block(
1861 None,
1862 "fn main() {\n return 0;\n}",
1863 0..35,
1864 None
1865 )]
1866 );
1867 }
1868
1869 #[gpui::test]
1870 async fn test_code_block_with_language(executor: BackgroundExecutor) {
1871 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1872 language_registry.add(rust_lang());
1873
1874 let parsed = parse_markdown(
1875 "\
1876```rust
1877fn main() {
1878 return 0;
1879}
1880```
1881",
1882 None,
1883 Some(language_registry),
1884 )
1885 .await;
1886
1887 assert_eq!(
1888 parsed.children,
1889 vec![code_block(
1890 Some("rust".to_string()),
1891 "fn main() {\n return 0;\n}",
1892 0..39,
1893 Some(vec![])
1894 )]
1895 );
1896 }
1897
1898 fn rust_lang() -> Arc<Language> {
1899 Arc::new(Language::new(
1900 LanguageConfig {
1901 name: "Rust".into(),
1902 matcher: LanguageMatcher {
1903 path_suffixes: vec!["rs".into()],
1904 ..Default::default()
1905 },
1906 collapsed_placeholder: " /* ... */ ".to_string(),
1907 ..Default::default()
1908 },
1909 Some(tree_sitter_rust::LANGUAGE.into()),
1910 ))
1911 }
1912
1913 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1914 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1915 source_range,
1916 level: HeadingLevel::H1,
1917 contents,
1918 })
1919 }
1920
1921 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1922 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1923 source_range,
1924 level: HeadingLevel::H2,
1925 contents,
1926 })
1927 }
1928
1929 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1930 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1931 source_range,
1932 level: HeadingLevel::H3,
1933 contents,
1934 })
1935 }
1936
1937 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1938 ParsedMarkdownElement::Paragraph(text(contents, source_range))
1939 }
1940
1941 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1942 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1943 highlights: Vec::new(),
1944 region_ranges: Vec::new(),
1945 regions: Vec::new(),
1946 source_range,
1947 contents: contents.to_string(),
1948 })]
1949 }
1950
1951 fn block_quote(
1952 children: Vec<ParsedMarkdownElement>,
1953 source_range: Range<usize>,
1954 ) -> ParsedMarkdownElement {
1955 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1956 source_range,
1957 children,
1958 })
1959 }
1960
1961 fn code_block(
1962 language: Option<String>,
1963 code: &str,
1964 source_range: Range<usize>,
1965 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1966 ) -> ParsedMarkdownElement {
1967 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1968 source_range,
1969 language,
1970 contents: code.to_string().into(),
1971 highlights,
1972 })
1973 }
1974
1975 fn list_item(
1976 source_range: Range<usize>,
1977 depth: u16,
1978 item_type: ParsedMarkdownListItemType,
1979 content: Vec<ParsedMarkdownElement>,
1980 ) -> ParsedMarkdownElement {
1981 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1982 source_range,
1983 item_type,
1984 depth,
1985 content,
1986 })
1987 }
1988
1989 fn table(
1990 source_range: Range<usize>,
1991 header: ParsedMarkdownTableRow,
1992 body: Vec<ParsedMarkdownTableRow>,
1993 ) -> ParsedMarkdownTable {
1994 ParsedMarkdownTable {
1995 column_alignments: Vec::new(),
1996 source_range,
1997 header,
1998 body,
1999 }
2000 }
2001
2002 fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
2003 ParsedMarkdownTableRow { children }
2004 }
2005
2006 impl PartialEq for ParsedMarkdownTable {
2007 fn eq(&self, other: &Self) -> bool {
2008 self.source_range == other.source_range
2009 && self.header == other.header
2010 && self.body == other.body
2011 }
2012 }
2013
2014 impl PartialEq for ParsedMarkdownText {
2015 fn eq(&self, other: &Self) -> bool {
2016 self.source_range == other.source_range && self.contents == other.contents
2017 }
2018 }
2019}