1use crate::markdown_elements::*;
2use async_recursion::async_recursion;
3use collections::FxHashMap;
4use gpui::{DefiniteLength, FontWeight, px, relative};
5use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
6use language::LanguageRegistry;
7use markup5ever_rcdom::RcDom;
8use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
9use std::{cell::RefCell, collections::HashMap, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec};
10
11pub async fn parse_markdown(
12 markdown_input: &str,
13 file_location_directory: Option<PathBuf>,
14 language_registry: Option<Arc<LanguageRegistry>>,
15) -> ParsedMarkdown {
16 let mut options = Options::all();
17 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
18
19 let parser = Parser::new_ext(markdown_input, options);
20 let parser = MarkdownParser::new(
21 parser.into_offset_iter().collect(),
22 file_location_directory,
23 language_registry,
24 );
25 let renderer = parser.parse_document().await;
26 ParsedMarkdown {
27 children: renderer.parsed,
28 }
29}
30
31struct MarkdownParser<'a> {
32 tokens: Vec<(Event<'a>, Range<usize>)>,
33 /// The current index in the tokens array
34 cursor: usize,
35 /// The blocks that we have successfully parsed so far
36 parsed: Vec<ParsedMarkdownElement>,
37 file_location_directory: Option<PathBuf>,
38 language_registry: Option<Arc<LanguageRegistry>>,
39}
40
41struct MarkdownListItem {
42 content: Vec<ParsedMarkdownElement>,
43 item_type: ParsedMarkdownListItemType,
44}
45
46impl Default for MarkdownListItem {
47 fn default() -> Self {
48 Self {
49 content: Vec::new(),
50 item_type: ParsedMarkdownListItemType::Unordered,
51 }
52 }
53}
54
55impl<'a> MarkdownParser<'a> {
56 fn new(
57 tokens: Vec<(Event<'a>, Range<usize>)>,
58 file_location_directory: Option<PathBuf>,
59 language_registry: Option<Arc<LanguageRegistry>>,
60 ) -> Self {
61 Self {
62 tokens,
63 file_location_directory,
64 language_registry,
65 cursor: 0,
66 parsed: vec![],
67 }
68 }
69
70 fn eof(&self) -> bool {
71 if self.tokens.is_empty() {
72 return true;
73 }
74 self.cursor >= self.tokens.len() - 1
75 }
76
77 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
78 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
79 return self.tokens.last();
80 }
81 self.tokens.get(self.cursor + steps)
82 }
83
84 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
85 if self.cursor == 0 || self.cursor > self.tokens.len() {
86 return None;
87 }
88 self.tokens.get(self.cursor - 1)
89 }
90
91 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
92 self.peek(0)
93 }
94
95 fn current_event(&self) -> Option<&Event<'_>> {
96 self.current().map(|(event, _)| event)
97 }
98
99 fn is_text_like(event: &Event) -> bool {
100 match event {
101 Event::Text(_)
102 // Represent an inline code block
103 | Event::Code(_)
104 | Event::Html(_)
105 | Event::InlineHtml(_)
106 | Event::FootnoteReference(_)
107 | Event::Start(Tag::Link { .. })
108 | Event::Start(Tag::Emphasis)
109 | Event::Start(Tag::Strong)
110 | Event::Start(Tag::Strikethrough)
111 | Event::Start(Tag::Image { .. }) => {
112 true
113 }
114 _ => false,
115 }
116 }
117
118 async fn parse_document(mut self) -> Self {
119 while !self.eof() {
120 if let Some(block) = self.parse_block().await {
121 self.parsed.extend(block);
122 } else {
123 self.cursor += 1;
124 }
125 }
126 self
127 }
128
129 #[async_recursion]
130 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
131 let (current, source_range) = self.current().unwrap();
132 let source_range = source_range.clone();
133 match current {
134 Event::Start(tag) => match tag {
135 Tag::Paragraph => {
136 self.cursor += 1;
137 let text = self.parse_text(false, Some(source_range));
138 Some(vec![ParsedMarkdownElement::Paragraph(text)])
139 }
140 Tag::Heading { level, .. } => {
141 let level = *level;
142 self.cursor += 1;
143 let heading = self.parse_heading(level);
144 Some(vec![ParsedMarkdownElement::Heading(heading)])
145 }
146 Tag::Table(alignment) => {
147 let alignment = alignment.clone();
148 self.cursor += 1;
149 let table = self.parse_table(alignment);
150 Some(vec![ParsedMarkdownElement::Table(table)])
151 }
152 Tag::List(order) => {
153 let order = *order;
154 self.cursor += 1;
155 let list = self.parse_list(order).await;
156 Some(list)
157 }
158 Tag::BlockQuote(_kind) => {
159 self.cursor += 1;
160 let block_quote = self.parse_block_quote().await;
161 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
162 }
163 Tag::CodeBlock(kind) => {
164 let language = match kind {
165 pulldown_cmark::CodeBlockKind::Indented => None,
166 pulldown_cmark::CodeBlockKind::Fenced(language) => {
167 if language.is_empty() {
168 None
169 } else {
170 Some(language.to_string())
171 }
172 }
173 };
174
175 self.cursor += 1;
176
177 let code_block = self.parse_code_block(language).await?;
178 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
179 }
180 Tag::HtmlBlock => {
181 self.cursor += 1;
182
183 Some(self.parse_html_block().await)
184 }
185 _ => None,
186 },
187 Event::Rule => {
188 self.cursor += 1;
189 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
190 }
191 _ => None,
192 }
193 }
194
195 fn parse_text(
196 &mut self,
197 should_complete_on_soft_break: bool,
198 source_range: Option<Range<usize>>,
199 ) -> MarkdownParagraph {
200 let source_range = source_range.unwrap_or_else(|| {
201 self.current()
202 .map(|(_, range)| range.clone())
203 .unwrap_or_default()
204 });
205
206 let mut markdown_text_like = Vec::new();
207 let mut text = String::new();
208 let mut bold_depth = 0;
209 let mut italic_depth = 0;
210 let mut strikethrough_depth = 0;
211 let mut link: Option<Link> = None;
212 let mut image: Option<Image> = None;
213 let mut region_ranges: Vec<Range<usize>> = vec![];
214 let mut regions: Vec<ParsedRegion> = vec![];
215 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
216 let mut link_urls: Vec<String> = vec![];
217 let mut link_ranges: Vec<Range<usize>> = vec![];
218
219 loop {
220 if self.eof() {
221 break;
222 }
223
224 let (current, _) = self.current().unwrap();
225 let prev_len = text.len();
226 match current {
227 Event::SoftBreak => {
228 if should_complete_on_soft_break {
229 break;
230 }
231 text.push(' ');
232 }
233
234 Event::HardBreak => {
235 text.push('\n');
236 }
237
238 // We want to ignore any inline HTML tags in the text but keep
239 // the text between them
240 Event::InlineHtml(_) => {}
241
242 Event::Text(t) => {
243 text.push_str(t.as_ref());
244 let mut style = MarkdownHighlightStyle::default();
245
246 if bold_depth > 0 {
247 style.weight = FontWeight::BOLD;
248 }
249
250 if italic_depth > 0 {
251 style.italic = true;
252 }
253
254 if strikethrough_depth > 0 {
255 style.strikethrough = true;
256 }
257
258 let last_run_len = if let Some(link) = link.clone() {
259 region_ranges.push(prev_len..text.len());
260 regions.push(ParsedRegion {
261 code: false,
262 link: Some(link),
263 });
264 style.underline = true;
265 prev_len
266 } else {
267 // Manually scan for links
268 let mut finder = linkify::LinkFinder::new();
269 finder.kinds(&[linkify::LinkKind::Url]);
270 let mut last_link_len = prev_len;
271 for link in finder.links(t) {
272 let start = link.start();
273 let end = link.end();
274 let range = (prev_len + start)..(prev_len + end);
275 link_ranges.push(range.clone());
276 link_urls.push(link.as_str().to_string());
277
278 // If there is a style before we match a link, we have to add this to the highlighted ranges
279 if style != MarkdownHighlightStyle::default()
280 && last_link_len < link.start()
281 {
282 highlights.push((
283 last_link_len..link.start(),
284 MarkdownHighlight::Style(style.clone()),
285 ));
286 }
287
288 highlights.push((
289 range.clone(),
290 MarkdownHighlight::Style(MarkdownHighlightStyle {
291 underline: true,
292 ..style
293 }),
294 ));
295 region_ranges.push(range.clone());
296 regions.push(ParsedRegion {
297 code: false,
298 link: Some(Link::Web {
299 url: link.as_str().to_string(),
300 }),
301 });
302 last_link_len = end;
303 }
304 last_link_len
305 };
306
307 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
308 let mut new_highlight = true;
309 if let Some((last_range, last_style)) = highlights.last_mut()
310 && last_range.end == last_run_len
311 && last_style == &MarkdownHighlight::Style(style.clone())
312 {
313 last_range.end = text.len();
314 new_highlight = false;
315 }
316 if new_highlight {
317 highlights.push((
318 last_run_len..text.len(),
319 MarkdownHighlight::Style(style.clone()),
320 ));
321 }
322 }
323 }
324 Event::Code(t) => {
325 text.push_str(t.as_ref());
326 region_ranges.push(prev_len..text.len());
327
328 if link.is_some() {
329 highlights.push((
330 prev_len..text.len(),
331 MarkdownHighlight::Style(MarkdownHighlightStyle {
332 underline: true,
333 ..Default::default()
334 }),
335 ));
336 }
337 regions.push(ParsedRegion {
338 code: true,
339 link: link.clone(),
340 });
341 }
342 Event::Start(tag) => match tag {
343 Tag::Emphasis => italic_depth += 1,
344 Tag::Strong => bold_depth += 1,
345 Tag::Strikethrough => strikethrough_depth += 1,
346 Tag::Link { dest_url, .. } => {
347 link = Link::identify(
348 self.file_location_directory.clone(),
349 dest_url.to_string(),
350 );
351 }
352 Tag::Image { dest_url, .. } => {
353 if !text.is_empty() {
354 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
355 source_range: source_range.clone(),
356 contents: text.into(),
357 highlights: highlights.clone(),
358 region_ranges: region_ranges.clone(),
359 regions: regions.clone(),
360 });
361 text = String::new();
362 highlights = vec![];
363 region_ranges = vec![];
364 regions = vec![];
365 markdown_text_like.push(parsed_regions);
366 }
367 image = Image::identify(
368 dest_url.to_string(),
369 source_range.clone(),
370 self.file_location_directory.clone(),
371 );
372 }
373 _ => {
374 break;
375 }
376 },
377
378 Event::End(tag) => match tag {
379 TagEnd::Emphasis => italic_depth -= 1,
380 TagEnd::Strong => bold_depth -= 1,
381 TagEnd::Strikethrough => strikethrough_depth -= 1,
382 TagEnd::Link => {
383 link = None;
384 }
385 TagEnd::Image => {
386 if let Some(mut image) = image.take() {
387 if !text.is_empty() {
388 image.set_alt_text(std::mem::take(&mut text).into());
389 }
390 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
391 }
392 }
393 TagEnd::Paragraph => {
394 self.cursor += 1;
395 break;
396 }
397 _ => {
398 break;
399 }
400 },
401 _ => {
402 break;
403 }
404 }
405
406 self.cursor += 1;
407 }
408 if !text.is_empty() {
409 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
410 source_range,
411 contents: text.into(),
412 highlights,
413 regions,
414 region_ranges,
415 }));
416 }
417 markdown_text_like
418 }
419
420 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
421 let (_event, source_range) = self.previous().unwrap();
422 let source_range = source_range.clone();
423 let text = self.parse_text(true, None);
424
425 // Advance past the heading end tag
426 self.cursor += 1;
427
428 ParsedMarkdownHeading {
429 source_range,
430 level: match level {
431 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
432 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
433 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
434 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
435 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
436 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
437 },
438 contents: text,
439 }
440 }
441
442 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
443 let (_event, source_range) = self.previous().unwrap();
444 let source_range = source_range.clone();
445 let mut header = ParsedMarkdownTableRow::new();
446 let mut body = vec![];
447 let mut current_row = vec![];
448 let mut in_header = true;
449 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
450
451 loop {
452 if self.eof() {
453 break;
454 }
455
456 let (current, source_range) = self.current().unwrap();
457 let source_range = source_range.clone();
458 match current {
459 Event::Start(Tag::TableHead)
460 | Event::Start(Tag::TableRow)
461 | Event::End(TagEnd::TableCell) => {
462 self.cursor += 1;
463 }
464 Event::Start(Tag::TableCell) => {
465 self.cursor += 1;
466 let cell_contents = self.parse_text(false, Some(source_range));
467 current_row.push(cell_contents);
468 }
469 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
470 self.cursor += 1;
471 let new_row = std::mem::take(&mut current_row);
472 if in_header {
473 header.children = new_row;
474 in_header = false;
475 } else {
476 let row = ParsedMarkdownTableRow::with_children(new_row);
477 body.push(row);
478 }
479 }
480 Event::End(TagEnd::Table) => {
481 self.cursor += 1;
482 break;
483 }
484 _ => {
485 break;
486 }
487 }
488 }
489
490 ParsedMarkdownTable {
491 source_range,
492 header,
493 body,
494 column_alignments,
495 }
496 }
497
498 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
499 match alignment {
500 Alignment::None => ParsedMarkdownTableAlignment::None,
501 Alignment::Left => ParsedMarkdownTableAlignment::Left,
502 Alignment::Center => ParsedMarkdownTableAlignment::Center,
503 Alignment::Right => ParsedMarkdownTableAlignment::Right,
504 }
505 }
506
507 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
508 let (_, list_source_range) = self.previous().unwrap();
509
510 let mut items = Vec::new();
511 let mut items_stack = vec![MarkdownListItem::default()];
512 let mut depth = 1;
513 let mut order = order;
514 let mut order_stack = Vec::new();
515
516 let mut insertion_indices = FxHashMap::default();
517 let mut source_ranges = FxHashMap::default();
518 let mut start_item_range = list_source_range.clone();
519
520 while !self.eof() {
521 let (current, source_range) = self.current().unwrap();
522 match current {
523 Event::Start(Tag::List(new_order)) => {
524 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
525 insertion_indices.insert(depth, items.len());
526 }
527
528 // We will use the start of the nested list as the end for the current item's range,
529 // because we don't care about the hierarchy of list items
530 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
531 e.insert(start_item_range.start..source_range.start);
532 }
533
534 order_stack.push(order);
535 order = *new_order;
536 self.cursor += 1;
537 depth += 1;
538 }
539 Event::End(TagEnd::List(_)) => {
540 order = order_stack.pop().flatten();
541 self.cursor += 1;
542 depth -= 1;
543
544 if depth == 0 {
545 break;
546 }
547 }
548 Event::Start(Tag::Item) => {
549 start_item_range = source_range.clone();
550
551 self.cursor += 1;
552 items_stack.push(MarkdownListItem::default());
553
554 let mut task_list = None;
555 // Check for task list marker (`- [ ]` or `- [x]`)
556 if let Some(event) = self.current_event() {
557 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
558 if event == &Event::Start(Tag::Paragraph) {
559 self.cursor += 1;
560 }
561
562 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
563 task_list = Some((*checked, range.clone()));
564 self.cursor += 1;
565 }
566 }
567
568 if let Some((event, range)) = self.current() {
569 // This is a plain list item.
570 // For example `- some text` or `1. [Docs](./docs.md)`
571 if MarkdownParser::is_text_like(event) {
572 let text = self.parse_text(false, Some(range.clone()));
573 let block = ParsedMarkdownElement::Paragraph(text);
574 if let Some(content) = items_stack.last_mut() {
575 let item_type = if let Some((checked, range)) = task_list {
576 ParsedMarkdownListItemType::Task(checked, range)
577 } else if let Some(order) = order {
578 ParsedMarkdownListItemType::Ordered(order)
579 } else {
580 ParsedMarkdownListItemType::Unordered
581 };
582 content.item_type = item_type;
583 content.content.push(block);
584 }
585 } else {
586 let block = self.parse_block().await;
587 if let Some(block) = block
588 && let Some(list_item) = items_stack.last_mut()
589 {
590 list_item.content.extend(block);
591 }
592 }
593 }
594
595 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
596 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
597 self.cursor += 1;
598 }
599 }
600 Event::End(TagEnd::Item) => {
601 self.cursor += 1;
602
603 if let Some(current) = order {
604 order = Some(current + 1);
605 }
606
607 if let Some(list_item) = items_stack.pop() {
608 let source_range = source_ranges
609 .remove(&depth)
610 .unwrap_or(start_item_range.clone());
611
612 // We need to remove the last character of the source range, because it includes the newline character
613 let source_range = source_range.start..source_range.end - 1;
614 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
615 source_range,
616 content: list_item.content,
617 depth,
618 item_type: list_item.item_type,
619 });
620
621 if let Some(index) = insertion_indices.get(&depth) {
622 items.insert(*index, item);
623 insertion_indices.remove(&depth);
624 } else {
625 items.push(item);
626 }
627 }
628 }
629 _ => {
630 if depth == 0 {
631 break;
632 }
633 // This can only happen if a list item starts with more then one paragraph,
634 // or the list item contains blocks that should be rendered after the nested list items
635 let block = self.parse_block().await;
636 if let Some(block) = block {
637 if let Some(list_item) = items_stack.last_mut() {
638 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
639 if !insertion_indices.contains_key(&depth) {
640 list_item.content.extend(block);
641 continue;
642 }
643 }
644
645 // Otherwise we need to insert the block after all the nested items
646 // that have been parsed so far
647 items.extend(block);
648 } else {
649 self.cursor += 1;
650 }
651 }
652 }
653 }
654
655 items
656 }
657
658 #[async_recursion]
659 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
660 let (_event, source_range) = self.previous().unwrap();
661 let source_range = source_range.clone();
662 let mut nested_depth = 1;
663
664 let mut children: Vec<ParsedMarkdownElement> = vec![];
665
666 while !self.eof() {
667 let block = self.parse_block().await;
668
669 if let Some(block) = block {
670 children.extend(block);
671 } else {
672 break;
673 }
674
675 if self.eof() {
676 break;
677 }
678
679 let (current, _source_range) = self.current().unwrap();
680 match current {
681 // This is a nested block quote.
682 // Record that we're in a nested block quote and continue parsing.
683 // We don't need to advance the cursor since the next
684 // call to `parse_block` will handle it.
685 Event::Start(Tag::BlockQuote(_kind)) => {
686 nested_depth += 1;
687 }
688 Event::End(TagEnd::BlockQuote(_kind)) => {
689 nested_depth -= 1;
690 if nested_depth == 0 {
691 self.cursor += 1;
692 break;
693 }
694 }
695 _ => {}
696 };
697 }
698
699 ParsedMarkdownBlockQuote {
700 source_range,
701 children,
702 }
703 }
704
705 async fn parse_code_block(
706 &mut self,
707 language: Option<String>,
708 ) -> Option<ParsedMarkdownCodeBlock> {
709 let Some((_event, source_range)) = self.previous() else {
710 return None;
711 };
712
713 let source_range = source_range.clone();
714 let mut code = String::new();
715
716 while !self.eof() {
717 let Some((current, _source_range)) = self.current() else {
718 break;
719 };
720
721 match current {
722 Event::Text(text) => {
723 code.push_str(text);
724 self.cursor += 1;
725 }
726 Event::End(TagEnd::CodeBlock) => {
727 self.cursor += 1;
728 break;
729 }
730 _ => {
731 break;
732 }
733 }
734 }
735
736 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
737
738 let highlights = if let Some(language) = &language {
739 if let Some(registry) = &self.language_registry {
740 let rope: language::Rope = code.as_str().into();
741 registry
742 .language_for_name_or_extension(language)
743 .await
744 .map(|l| l.highlight_text(&rope, 0..code.len()))
745 .ok()
746 } else {
747 None
748 }
749 } else {
750 None
751 };
752
753 Some(ParsedMarkdownCodeBlock {
754 source_range,
755 contents: code.into(),
756 language,
757 highlights,
758 })
759 }
760
761 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
762 let mut elements = Vec::new();
763 let Some((_event, _source_range)) = self.previous() else {
764 return elements;
765 };
766
767 while !self.eof() {
768 let Some((current, source_range)) = self.current() else {
769 break;
770 };
771 let source_range = source_range.clone();
772 match current {
773 Event::Html(html) => {
774 let mut cursor = std::io::Cursor::new(html.as_bytes());
775 let Some(dom) = parse_document(RcDom::default(), ParseOpts::default())
776 .from_utf8()
777 .read_from(&mut cursor)
778 .ok()
779 else {
780 self.cursor += 1;
781 continue;
782 };
783
784 self.cursor += 1;
785
786 self.parse_html_node(source_range, &dom.document, &mut elements);
787 }
788 Event::End(TagEnd::CodeBlock) => {
789 self.cursor += 1;
790 break;
791 }
792 _ => {
793 break;
794 }
795 }
796 }
797
798 elements
799 }
800
801 fn parse_html_node(
802 &self,
803 source_range: Range<usize>,
804 node: &Rc<markup5ever_rcdom::Node>,
805 elements: &mut Vec<ParsedMarkdownElement>,
806 ) {
807 match &node.data {
808 markup5ever_rcdom::NodeData::Document => {
809 self.consume_children(source_range, node, elements);
810 }
811 markup5ever_rcdom::NodeData::Text { contents } => {
812 elements.push(ParsedMarkdownElement::Paragraph(vec![
813 MarkdownParagraphChunk::Text(ParsedMarkdownText {
814 source_range,
815 regions: Vec::default(),
816 region_ranges: Vec::default(),
817 highlights: Vec::default(),
818 contents: contents.borrow().to_string().into(),
819 }),
820 ]));
821 }
822 markup5ever_rcdom::NodeData::Comment { .. } => {}
823 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
824 if local_name!("img") == name.local {
825 if let Some(image) = self.extract_image(source_range, attrs) {
826 elements.push(ParsedMarkdownElement::Image(image));
827 }
828 } else if local_name!("p") == name.local {
829 self.parse_paragraph(
830 source_range,
831 node,
832 &mut MarkdownParagraph::new(),
833 elements,
834 );
835 } else {
836 self.consume_children(source_range, node, elements);
837 }
838 }
839 _ => {}
840 }
841 }
842
843 fn parse_paragraph(
844 &self,
845 source_range: Range<usize>,
846 node: &Rc<markup5ever_rcdom::Node>,
847 paragraph: &mut MarkdownParagraph,
848 elements: &mut Vec<ParsedMarkdownElement>,
849 ) {
850 match &node.data {
851 markup5ever_rcdom::NodeData::Text { contents } => {
852 paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
853 source_range,
854 regions: Vec::default(),
855 region_ranges: Vec::default(),
856 highlights: Vec::default(),
857 contents: contents.borrow().to_string().into(),
858 }));
859 }
860 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
861 if local_name!("img") == name.local {
862 if let Some(image) = self.extract_image(source_range, attrs) {
863 paragraph.push(MarkdownParagraphChunk::Image(image));
864 }
865 } else {
866 self.consume_paragraph(source_range, node, paragraph, elements);
867
868 if !paragraph.is_empty() {
869 elements.push(ParsedMarkdownElement::Paragraph(std::mem::take(paragraph)));
870 }
871 }
872 }
873 _ => {}
874 }
875 }
876
877 fn consume_paragraph(
878 &self,
879 source_range: Range<usize>,
880 node: &Rc<markup5ever_rcdom::Node>,
881 paragraph: &mut MarkdownParagraph,
882 elements: &mut Vec<ParsedMarkdownElement>,
883 ) {
884 for node in node.children.borrow().iter() {
885 self.parse_paragraph(source_range.clone(), node, paragraph, elements);
886 }
887 }
888
889 fn consume_children(
890 &self,
891 source_range: Range<usize>,
892 node: &Rc<markup5ever_rcdom::Node>,
893 elements: &mut Vec<ParsedMarkdownElement>,
894 ) {
895 for node in node.children.borrow().iter() {
896 self.parse_html_node(source_range.clone(), node, elements);
897 }
898 }
899
900 fn attr_value(
901 attrs: &RefCell<Vec<html5ever::Attribute>>,
902 name: html5ever::LocalName,
903 ) -> Option<String> {
904 attrs.borrow().iter().find_map(|attr| {
905 if attr.name.local == name {
906 Some(attr.value.to_string())
907 } else {
908 None
909 }
910 })
911 }
912
913 fn extract_styles_from_attributes(
914 attrs: &RefCell<Vec<html5ever::Attribute>>,
915 ) -> HashMap<String, String> {
916 let mut styles = HashMap::new();
917
918 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
919 for decl in style.split(';') {
920 let mut parts = decl.splitn(2, ':');
921 if let Some((key, value)) = parts.next().zip(parts.next()) {
922 styles.insert(
923 key.trim().to_lowercase().to_string(),
924 value.trim().to_string(),
925 );
926 }
927 }
928 }
929
930 styles
931 }
932
933 fn extract_image(
934 &self,
935 source_range: Range<usize>,
936 attrs: &RefCell<Vec<html5ever::Attribute>>,
937 ) -> Option<Image> {
938 let src = Self::attr_value(attrs, local_name!("src"))?;
939
940 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
941
942 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
943 image.set_alt_text(alt.into());
944 }
945
946 let styles = Self::extract_styles_from_attributes(attrs);
947
948 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
949 .or_else(|| styles.get("width").cloned())
950 .and_then(|width| Self::parse_html_element_dimension(&width))
951 {
952 image.set_width(width);
953 }
954
955 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
956 .or_else(|| styles.get("height").cloned())
957 .and_then(|height| Self::parse_html_element_dimension(&height))
958 {
959 image.set_height(height);
960 }
961
962 Some(image)
963 }
964
965 fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
966 if value.ends_with("%") {
967 value
968 .trim_end_matches("%")
969 .parse::<f32>()
970 .ok()
971 .map(|value| relative(value / 100.))
972 } else {
973 value
974 .trim_end_matches("px")
975 .parse()
976 .ok()
977 .map(|value| px(value).into())
978 }
979 }
980}
981
982#[cfg(test)]
983mod tests {
984 use super::*;
985 use ParsedMarkdownListItemType::*;
986 use core::panic;
987 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
988 use language::{
989 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
990 };
991 use pretty_assertions::assert_eq;
992
993 async fn parse(input: &str) -> ParsedMarkdown {
994 parse_markdown(input, None, None).await
995 }
996
997 #[gpui::test]
998 async fn test_headings() {
999 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1000
1001 assert_eq!(
1002 parsed.children,
1003 vec![
1004 h1(text("Heading one", 2..13), 0..14),
1005 h2(text("Heading two", 17..28), 14..29),
1006 h3(text("Heading three", 33..46), 29..46),
1007 ]
1008 );
1009 }
1010
1011 #[gpui::test]
1012 async fn test_newlines_dont_new_paragraphs() {
1013 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1014
1015 assert_eq!(
1016 parsed.children,
1017 vec![p("Some text that is bolded and italicized", 0..46)]
1018 );
1019 }
1020
1021 #[gpui::test]
1022 async fn test_heading_with_paragraph() {
1023 let parsed = parse("# Zed\nThe editor").await;
1024
1025 assert_eq!(
1026 parsed.children,
1027 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1028 );
1029 }
1030
1031 #[gpui::test]
1032 async fn test_double_newlines_do_new_paragraphs() {
1033 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1034
1035 assert_eq!(
1036 parsed.children,
1037 vec![
1038 p("Some text that is bolded", 0..29),
1039 p("and italicized", 31..47),
1040 ]
1041 );
1042 }
1043
1044 #[gpui::test]
1045 async fn test_bold_italic_text() {
1046 let parsed = parse("Some text **that is bolded** and *italicized*").await;
1047
1048 assert_eq!(
1049 parsed.children,
1050 vec![p("Some text that is bolded and italicized", 0..45)]
1051 );
1052 }
1053
1054 #[gpui::test]
1055 async fn test_nested_bold_strikethrough_text() {
1056 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1057
1058 assert_eq!(parsed.children.len(), 1);
1059 assert_eq!(
1060 parsed.children[0],
1061 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1062 ParsedMarkdownText {
1063 source_range: 0..35,
1064 contents: "Some bostrikethroughld text".into(),
1065 highlights: Vec::new(),
1066 region_ranges: Vec::new(),
1067 regions: Vec::new(),
1068 }
1069 )])
1070 );
1071
1072 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1073 text
1074 } else {
1075 panic!("Expected a paragraph");
1076 };
1077
1078 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1079 text
1080 } else {
1081 panic!("Expected a text");
1082 };
1083
1084 assert_eq!(
1085 paragraph.highlights,
1086 vec![
1087 (
1088 5..7,
1089 MarkdownHighlight::Style(MarkdownHighlightStyle {
1090 weight: FontWeight::BOLD,
1091 ..Default::default()
1092 }),
1093 ),
1094 (
1095 7..20,
1096 MarkdownHighlight::Style(MarkdownHighlightStyle {
1097 weight: FontWeight::BOLD,
1098 strikethrough: true,
1099 ..Default::default()
1100 }),
1101 ),
1102 (
1103 20..22,
1104 MarkdownHighlight::Style(MarkdownHighlightStyle {
1105 weight: FontWeight::BOLD,
1106 ..Default::default()
1107 }),
1108 ),
1109 ]
1110 );
1111 }
1112
1113 #[gpui::test]
1114 async fn test_text_with_inline_html() {
1115 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1116
1117 assert_eq!(
1118 parsed.children,
1119 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1120 );
1121 }
1122
1123 #[gpui::test]
1124 async fn test_raw_links_detection() {
1125 let parsed = parse("Checkout this https://zed.dev link").await;
1126
1127 assert_eq!(
1128 parsed.children,
1129 vec![p("Checkout this https://zed.dev link", 0..34)]
1130 );
1131 }
1132
1133 #[gpui::test]
1134 async fn test_empty_image() {
1135 let parsed = parse("![]()").await;
1136
1137 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1138 text
1139 } else {
1140 panic!("Expected a paragraph");
1141 };
1142 assert_eq!(paragraph.len(), 0);
1143 }
1144
1145 #[gpui::test]
1146 async fn test_image_links_detection() {
1147 let parsed = parse("").await;
1148
1149 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1150 text
1151 } else {
1152 panic!("Expected a paragraph");
1153 };
1154 assert_eq!(
1155 paragraph[0],
1156 MarkdownParagraphChunk::Image(Image {
1157 source_range: 0..111,
1158 link: Link::Web {
1159 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1160 },
1161 alt_text: Some("test".into()),
1162 height: None,
1163 width: None,
1164 },)
1165 );
1166 }
1167
1168 #[gpui::test]
1169 async fn test_image_without_alt_text() {
1170 let parsed = parse("").await;
1171
1172 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1173 text
1174 } else {
1175 panic!("Expected a paragraph");
1176 };
1177 assert_eq!(
1178 paragraph[0],
1179 MarkdownParagraphChunk::Image(Image {
1180 source_range: 0..31,
1181 link: Link::Web {
1182 url: "http://example.com/foo.png".to_string(),
1183 },
1184 alt_text: None,
1185 height: None,
1186 width: None,
1187 },)
1188 );
1189 }
1190
1191 #[gpui::test]
1192 async fn test_image_with_alt_text_containing_formatting() {
1193 let parsed = parse("").await;
1194
1195 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1196 panic!("Expected a paragraph");
1197 };
1198 assert_eq!(
1199 chunks,
1200 &[MarkdownParagraphChunk::Image(Image {
1201 source_range: 0..44,
1202 link: Link::Web {
1203 url: "http://example.com/foo.png".to_string(),
1204 },
1205 alt_text: Some("foo bar baz".into()),
1206 height: None,
1207 width: None,
1208 }),],
1209 );
1210 }
1211
1212 #[gpui::test]
1213 async fn test_images_with_text_in_between() {
1214 let parsed = parse(
1215 "\nLorem Ipsum\n",
1216 )
1217 .await;
1218
1219 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1220 text
1221 } else {
1222 panic!("Expected a paragraph");
1223 };
1224 assert_eq!(
1225 chunks,
1226 &vec![
1227 MarkdownParagraphChunk::Image(Image {
1228 source_range: 0..81,
1229 link: Link::Web {
1230 url: "http://example.com/foo.png".to_string(),
1231 },
1232 alt_text: Some("foo".into()),
1233 height: None,
1234 width: None,
1235 }),
1236 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1237 source_range: 0..81,
1238 contents: " Lorem Ipsum ".into(),
1239 highlights: Vec::new(),
1240 region_ranges: Vec::new(),
1241 regions: Vec::new(),
1242 }),
1243 MarkdownParagraphChunk::Image(Image {
1244 source_range: 0..81,
1245 link: Link::Web {
1246 url: "http://example.com/bar.png".to_string(),
1247 },
1248 alt_text: Some("bar".into()),
1249 height: None,
1250 width: None,
1251 })
1252 ]
1253 );
1254 }
1255
1256 #[test]
1257 fn test_parse_html_element_dimension() {
1258 // Test percentage values
1259 assert_eq!(
1260 MarkdownParser::parse_html_element_dimension("50%"),
1261 Some(DefiniteLength::Fraction(0.5))
1262 );
1263 assert_eq!(
1264 MarkdownParser::parse_html_element_dimension("100%"),
1265 Some(DefiniteLength::Fraction(1.0))
1266 );
1267 assert_eq!(
1268 MarkdownParser::parse_html_element_dimension("25%"),
1269 Some(DefiniteLength::Fraction(0.25))
1270 );
1271 assert_eq!(
1272 MarkdownParser::parse_html_element_dimension("0%"),
1273 Some(DefiniteLength::Fraction(0.0))
1274 );
1275
1276 // Test pixel values
1277 assert_eq!(
1278 MarkdownParser::parse_html_element_dimension("100px"),
1279 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1280 );
1281 assert_eq!(
1282 MarkdownParser::parse_html_element_dimension("50px"),
1283 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1284 );
1285 assert_eq!(
1286 MarkdownParser::parse_html_element_dimension("0px"),
1287 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1288 );
1289
1290 // Test values without units (should be treated as pixels)
1291 assert_eq!(
1292 MarkdownParser::parse_html_element_dimension("100"),
1293 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1294 );
1295 assert_eq!(
1296 MarkdownParser::parse_html_element_dimension("42"),
1297 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1298 );
1299
1300 // Test invalid values
1301 assert_eq!(
1302 MarkdownParser::parse_html_element_dimension("invalid"),
1303 None
1304 );
1305 assert_eq!(MarkdownParser::parse_html_element_dimension("px"), None);
1306 assert_eq!(MarkdownParser::parse_html_element_dimension("%"), None);
1307 assert_eq!(MarkdownParser::parse_html_element_dimension(""), None);
1308 assert_eq!(MarkdownParser::parse_html_element_dimension("abc%"), None);
1309 assert_eq!(MarkdownParser::parse_html_element_dimension("abcpx"), None);
1310
1311 // Test decimal values
1312 assert_eq!(
1313 MarkdownParser::parse_html_element_dimension("50.5%"),
1314 Some(DefiniteLength::Fraction(0.505))
1315 );
1316 assert_eq!(
1317 MarkdownParser::parse_html_element_dimension("100.25px"),
1318 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1319 );
1320 assert_eq!(
1321 MarkdownParser::parse_html_element_dimension("42.0"),
1322 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1323 );
1324 }
1325
1326 #[gpui::test]
1327 async fn test_inline_html_image_tag() {
1328 let parsed =
1329 parse("<p>Some text<img src=\"http://example.com/foo.png\" /> some more text</p>")
1330 .await;
1331
1332 assert_eq!(
1333 ParsedMarkdown {
1334 children: vec![ParsedMarkdownElement::Paragraph(vec![
1335 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1336 source_range: 0..71,
1337 contents: "Some text".into(),
1338 highlights: Default::default(),
1339 region_ranges: Default::default(),
1340 regions: Default::default()
1341 }),
1342 MarkdownParagraphChunk::Image(Image {
1343 source_range: 0..71,
1344 link: Link::Web {
1345 url: "http://example.com/foo.png".to_string(),
1346 },
1347 alt_text: None,
1348 height: None,
1349 width: None,
1350 }),
1351 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1352 source_range: 0..71,
1353 contents: " some more text".into(),
1354 highlights: Default::default(),
1355 region_ranges: Default::default(),
1356 regions: Default::default()
1357 }),
1358 ])]
1359 },
1360 parsed
1361 );
1362 }
1363
1364 #[gpui::test]
1365 async fn test_html_image_tag() {
1366 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
1367
1368 assert_eq!(
1369 ParsedMarkdown {
1370 children: vec![ParsedMarkdownElement::Image(Image {
1371 source_range: 0..40,
1372 link: Link::Web {
1373 url: "http://example.com/foo.png".to_string(),
1374 },
1375 alt_text: None,
1376 height: None,
1377 width: None,
1378 })]
1379 },
1380 parsed
1381 );
1382 }
1383
1384 #[gpui::test]
1385 async fn test_html_image_tag_with_alt_text() {
1386 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
1387
1388 assert_eq!(
1389 ParsedMarkdown {
1390 children: vec![ParsedMarkdownElement::Image(Image {
1391 source_range: 0..50,
1392 link: Link::Web {
1393 url: "http://example.com/foo.png".to_string(),
1394 },
1395 alt_text: Some("Foo".into()),
1396 height: None,
1397 width: None,
1398 })]
1399 },
1400 parsed
1401 );
1402 }
1403
1404 #[gpui::test]
1405 async fn test_html_image_tag_with_height_and_width() {
1406 let parsed =
1407 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
1408
1409 assert_eq!(
1410 ParsedMarkdown {
1411 children: vec![ParsedMarkdownElement::Image(Image {
1412 source_range: 0..65,
1413 link: Link::Web {
1414 url: "http://example.com/foo.png".to_string(),
1415 },
1416 alt_text: None,
1417 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1418 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1419 })]
1420 },
1421 parsed
1422 );
1423 }
1424
1425 #[gpui::test]
1426 async fn test_html_image_style_tag_with_height_and_width() {
1427 let parsed = parse(
1428 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
1429 )
1430 .await;
1431
1432 assert_eq!(
1433 ParsedMarkdown {
1434 children: vec![ParsedMarkdownElement::Image(Image {
1435 source_range: 0..75,
1436 link: Link::Web {
1437 url: "http://example.com/foo.png".to_string(),
1438 },
1439 alt_text: None,
1440 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1441 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1442 })]
1443 },
1444 parsed
1445 );
1446 }
1447
1448 #[gpui::test]
1449 async fn test_header_only_table() {
1450 let markdown = "\
1451| Header 1 | Header 2 |
1452|----------|----------|
1453
1454Some other content
1455";
1456
1457 let expected_table = table(
1458 0..48,
1459 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1460 vec![],
1461 );
1462
1463 assert_eq!(
1464 parse(markdown).await.children[0],
1465 ParsedMarkdownElement::Table(expected_table)
1466 );
1467 }
1468
1469 #[gpui::test]
1470 async fn test_basic_table() {
1471 let markdown = "\
1472| Header 1 | Header 2 |
1473|----------|----------|
1474| Cell 1 | Cell 2 |
1475| Cell 3 | Cell 4 |";
1476
1477 let expected_table = table(
1478 0..95,
1479 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1480 vec![
1481 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1482 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1483 ],
1484 );
1485
1486 assert_eq!(
1487 parse(markdown).await.children[0],
1488 ParsedMarkdownElement::Table(expected_table)
1489 );
1490 }
1491
1492 #[gpui::test]
1493 async fn test_list_basic() {
1494 let parsed = parse(
1495 "\
1496* Item 1
1497* Item 2
1498* Item 3
1499",
1500 )
1501 .await;
1502
1503 assert_eq!(
1504 parsed.children,
1505 vec![
1506 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1507 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1508 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1509 ],
1510 );
1511 }
1512
1513 #[gpui::test]
1514 async fn test_list_with_tasks() {
1515 let parsed = parse(
1516 "\
1517- [ ] TODO
1518- [x] Checked
1519",
1520 )
1521 .await;
1522
1523 assert_eq!(
1524 parsed.children,
1525 vec![
1526 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1527 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1528 ],
1529 );
1530 }
1531
1532 #[gpui::test]
1533 async fn test_list_with_indented_task() {
1534 let parsed = parse(
1535 "\
1536- [ ] TODO
1537 - [x] Checked
1538 - Unordered
1539 1. Number 1
1540 1. Number 2
15411. Number A
1542",
1543 )
1544 .await;
1545
1546 assert_eq!(
1547 parsed.children,
1548 vec![
1549 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1550 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1551 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1552 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1553 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1554 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1555 ],
1556 );
1557 }
1558
1559 #[gpui::test]
1560 async fn test_list_with_linebreak_is_handled_correctly() {
1561 let parsed = parse(
1562 "\
1563- [ ] Task 1
1564
1565- [x] Task 2
1566",
1567 )
1568 .await;
1569
1570 assert_eq!(
1571 parsed.children,
1572 vec![
1573 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1574 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1575 ],
1576 );
1577 }
1578
1579 #[gpui::test]
1580 async fn test_list_nested() {
1581 let parsed = parse(
1582 "\
1583* Item 1
1584* Item 2
1585* Item 3
1586
15871. Hello
15881. Two
1589 1. Three
15902. Four
15913. Five
1592
1593* First
1594 1. Hello
1595 1. Goodbyte
1596 - Inner
1597 - Inner
1598 2. Goodbyte
1599 - Next item empty
1600 -
1601* Last
1602",
1603 )
1604 .await;
1605
1606 assert_eq!(
1607 parsed.children,
1608 vec![
1609 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1610 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1611 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1612 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1613 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1614 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1615 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1616 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1617 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1618 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1619 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1620 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1621 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1622 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1623 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1624 list_item(186..190, 3, Unordered, vec![]),
1625 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1626 ]
1627 );
1628 }
1629
1630 #[gpui::test]
1631 async fn test_list_with_nested_content() {
1632 let parsed = parse(
1633 "\
1634* This is a list item with two paragraphs.
1635
1636 This is the second paragraph in the list item.
1637",
1638 )
1639 .await;
1640
1641 assert_eq!(
1642 parsed.children,
1643 vec![list_item(
1644 0..96,
1645 1,
1646 Unordered,
1647 vec![
1648 p("This is a list item with two paragraphs.", 4..44),
1649 p("This is the second paragraph in the list item.", 50..97)
1650 ],
1651 ),],
1652 );
1653 }
1654
1655 #[gpui::test]
1656 async fn test_list_item_with_inline_html() {
1657 let parsed = parse(
1658 "\
1659* This is a list item with an inline HTML <sometag>tag</sometag>.
1660",
1661 )
1662 .await;
1663
1664 assert_eq!(
1665 parsed.children,
1666 vec![list_item(
1667 0..67,
1668 1,
1669 Unordered,
1670 vec![p("This is a list item with an inline HTML tag.", 4..44),],
1671 ),],
1672 );
1673 }
1674
1675 #[gpui::test]
1676 async fn test_nested_list_with_paragraph_inside() {
1677 let parsed = parse(
1678 "\
16791. a
1680 1. b
1681 1. c
1682
1683 text
1684
1685 1. d
1686",
1687 )
1688 .await;
1689
1690 assert_eq!(
1691 parsed.children,
1692 vec![
1693 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1694 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1695 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1696 p("text", 32..37),
1697 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1698 ],
1699 );
1700 }
1701
1702 #[gpui::test]
1703 async fn test_list_with_leading_text() {
1704 let parsed = parse(
1705 "\
1706* `code`
1707* **bold**
1708* [link](https://example.com)
1709",
1710 )
1711 .await;
1712
1713 assert_eq!(
1714 parsed.children,
1715 vec![
1716 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1717 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1718 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1719 ],
1720 );
1721 }
1722
1723 #[gpui::test]
1724 async fn test_simple_block_quote() {
1725 let parsed = parse("> Simple block quote with **styled text**").await;
1726
1727 assert_eq!(
1728 parsed.children,
1729 vec![block_quote(
1730 vec![p("Simple block quote with styled text", 2..41)],
1731 0..41
1732 )]
1733 );
1734 }
1735
1736 #[gpui::test]
1737 async fn test_simple_block_quote_with_multiple_lines() {
1738 let parsed = parse(
1739 "\
1740> # Heading
1741> More
1742> text
1743>
1744> More text
1745",
1746 )
1747 .await;
1748
1749 assert_eq!(
1750 parsed.children,
1751 vec![block_quote(
1752 vec![
1753 h1(text("Heading", 4..11), 2..12),
1754 p("More text", 14..26),
1755 p("More text", 30..40)
1756 ],
1757 0..40
1758 )]
1759 );
1760 }
1761
1762 #[gpui::test]
1763 async fn test_nested_block_quote() {
1764 let parsed = parse(
1765 "\
1766> A
1767>
1768> > # B
1769>
1770> C
1771
1772More text
1773",
1774 )
1775 .await;
1776
1777 assert_eq!(
1778 parsed.children,
1779 vec![
1780 block_quote(
1781 vec![
1782 p("A", 2..4),
1783 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
1784 p("C", 18..20)
1785 ],
1786 0..20
1787 ),
1788 p("More text", 21..31)
1789 ]
1790 );
1791 }
1792
1793 #[gpui::test]
1794 async fn test_code_block() {
1795 let parsed = parse(
1796 "\
1797```
1798fn main() {
1799 return 0;
1800}
1801```
1802",
1803 )
1804 .await;
1805
1806 assert_eq!(
1807 parsed.children,
1808 vec![code_block(
1809 None,
1810 "fn main() {\n return 0;\n}",
1811 0..35,
1812 None
1813 )]
1814 );
1815 }
1816
1817 #[gpui::test]
1818 async fn test_code_block_with_language(executor: BackgroundExecutor) {
1819 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
1820 language_registry.add(rust_lang());
1821
1822 let parsed = parse_markdown(
1823 "\
1824```rust
1825fn main() {
1826 return 0;
1827}
1828```
1829",
1830 None,
1831 Some(language_registry),
1832 )
1833 .await;
1834
1835 assert_eq!(
1836 parsed.children,
1837 vec![code_block(
1838 Some("rust".to_string()),
1839 "fn main() {\n return 0;\n}",
1840 0..39,
1841 Some(vec![])
1842 )]
1843 );
1844 }
1845
1846 fn rust_lang() -> Arc<Language> {
1847 Arc::new(Language::new(
1848 LanguageConfig {
1849 name: "Rust".into(),
1850 matcher: LanguageMatcher {
1851 path_suffixes: vec!["rs".into()],
1852 ..Default::default()
1853 },
1854 collapsed_placeholder: " /* ... */ ".to_string(),
1855 ..Default::default()
1856 },
1857 Some(tree_sitter_rust::LANGUAGE.into()),
1858 ))
1859 }
1860
1861 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1862 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1863 source_range,
1864 level: HeadingLevel::H1,
1865 contents,
1866 })
1867 }
1868
1869 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1870 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1871 source_range,
1872 level: HeadingLevel::H2,
1873 contents,
1874 })
1875 }
1876
1877 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
1878 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1879 source_range,
1880 level: HeadingLevel::H3,
1881 contents,
1882 })
1883 }
1884
1885 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
1886 ParsedMarkdownElement::Paragraph(text(contents, source_range))
1887 }
1888
1889 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
1890 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1891 highlights: Vec::new(),
1892 region_ranges: Vec::new(),
1893 regions: Vec::new(),
1894 source_range,
1895 contents: contents.to_string().into(),
1896 })]
1897 }
1898
1899 fn block_quote(
1900 children: Vec<ParsedMarkdownElement>,
1901 source_range: Range<usize>,
1902 ) -> ParsedMarkdownElement {
1903 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
1904 source_range,
1905 children,
1906 })
1907 }
1908
1909 fn code_block(
1910 language: Option<String>,
1911 code: &str,
1912 source_range: Range<usize>,
1913 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
1914 ) -> ParsedMarkdownElement {
1915 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
1916 source_range,
1917 language,
1918 contents: code.to_string().into(),
1919 highlights,
1920 })
1921 }
1922
1923 fn list_item(
1924 source_range: Range<usize>,
1925 depth: u16,
1926 item_type: ParsedMarkdownListItemType,
1927 content: Vec<ParsedMarkdownElement>,
1928 ) -> ParsedMarkdownElement {
1929 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1930 source_range,
1931 item_type,
1932 depth,
1933 content,
1934 })
1935 }
1936
1937 fn table(
1938 source_range: Range<usize>,
1939 header: ParsedMarkdownTableRow,
1940 body: Vec<ParsedMarkdownTableRow>,
1941 ) -> ParsedMarkdownTable {
1942 ParsedMarkdownTable {
1943 column_alignments: Vec::new(),
1944 source_range,
1945 header,
1946 body,
1947 }
1948 }
1949
1950 fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
1951 ParsedMarkdownTableRow { children }
1952 }
1953
1954 impl PartialEq for ParsedMarkdownTable {
1955 fn eq(&self, other: &Self) -> bool {
1956 self.source_range == other.source_range
1957 && self.header == other.header
1958 && self.body == other.body
1959 }
1960 }
1961
1962 impl PartialEq for ParsedMarkdownText {
1963 fn eq(&self, other: &Self) -> bool {
1964 self.source_range == other.source_range && self.contents == other.contents
1965 }
1966 }
1967}