1use crate::{
2 markdown_elements::*,
3 markdown_minifier::{Minifier, MinifierOptions},
4};
5use async_recursion::async_recursion;
6use collections::FxHashMap;
7use gpui::{DefiniteLength, FontWeight, px, relative};
8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
9use language::LanguageRegistry;
10use markup5ever_rcdom::RcDom;
11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
12use std::{
13 cell::RefCell, collections::HashMap, mem, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec,
14};
15
16pub async fn parse_markdown(
17 markdown_input: &str,
18 file_location_directory: Option<PathBuf>,
19 language_registry: Option<Arc<LanguageRegistry>>,
20) -> ParsedMarkdown {
21 let mut options = Options::all();
22 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
23
24 let parser = Parser::new_ext(markdown_input, options);
25 let parser = MarkdownParser::new(
26 parser.into_offset_iter().collect(),
27 file_location_directory,
28 language_registry,
29 );
30 let renderer = parser.parse_document().await;
31 ParsedMarkdown {
32 children: renderer.parsed,
33 }
34}
35
36fn cleanup_html(source: &str) -> Vec<u8> {
37 let mut writer = std::io::Cursor::new(Vec::new());
38 let mut reader = std::io::Cursor::new(source);
39 let mut minify = Minifier::new(
40 &mut writer,
41 MinifierOptions {
42 omit_doctype: true,
43 collapse_whitespace: true,
44 ..Default::default()
45 },
46 );
47 if let Ok(()) = minify.minify(&mut reader) {
48 writer.into_inner()
49 } else {
50 source.bytes().collect()
51 }
52}
53
54struct MarkdownParser<'a> {
55 tokens: Vec<(Event<'a>, Range<usize>)>,
56 /// The current index in the tokens array
57 cursor: usize,
58 /// The blocks that we have successfully parsed so far
59 parsed: Vec<ParsedMarkdownElement>,
60 file_location_directory: Option<PathBuf>,
61 language_registry: Option<Arc<LanguageRegistry>>,
62}
63
64struct MarkdownListItem {
65 content: Vec<ParsedMarkdownElement>,
66 item_type: ParsedMarkdownListItemType,
67}
68
69impl Default for MarkdownListItem {
70 fn default() -> Self {
71 Self {
72 content: Vec::new(),
73 item_type: ParsedMarkdownListItemType::Unordered,
74 }
75 }
76}
77
78impl<'a> MarkdownParser<'a> {
79 fn new(
80 tokens: Vec<(Event<'a>, Range<usize>)>,
81 file_location_directory: Option<PathBuf>,
82 language_registry: Option<Arc<LanguageRegistry>>,
83 ) -> Self {
84 Self {
85 tokens,
86 file_location_directory,
87 language_registry,
88 cursor: 0,
89 parsed: vec![],
90 }
91 }
92
93 fn eof(&self) -> bool {
94 if self.tokens.is_empty() {
95 return true;
96 }
97 self.cursor >= self.tokens.len() - 1
98 }
99
100 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
101 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
102 return self.tokens.last();
103 }
104 self.tokens.get(self.cursor + steps)
105 }
106
107 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
108 if self.cursor == 0 || self.cursor > self.tokens.len() {
109 return None;
110 }
111 self.tokens.get(self.cursor - 1)
112 }
113
114 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
115 self.peek(0)
116 }
117
118 fn current_event(&self) -> Option<&Event<'_>> {
119 self.current().map(|(event, _)| event)
120 }
121
122 fn is_text_like(event: &Event) -> bool {
123 match event {
124 Event::Text(_)
125 // Represent an inline code block
126 | Event::Code(_)
127 | Event::Html(_)
128 | Event::InlineHtml(_)
129 | Event::FootnoteReference(_)
130 | Event::Start(Tag::Link { .. })
131 | Event::Start(Tag::Emphasis)
132 | Event::Start(Tag::Strong)
133 | Event::Start(Tag::Strikethrough)
134 | Event::Start(Tag::Image { .. }) => {
135 true
136 }
137 _ => false,
138 }
139 }
140
141 async fn parse_document(mut self) -> Self {
142 while !self.eof() {
143 if let Some(block) = self.parse_block().await {
144 self.parsed.extend(block);
145 } else {
146 self.cursor += 1;
147 }
148 }
149 self
150 }
151
152 #[async_recursion]
153 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
154 let (current, source_range) = self.current().unwrap();
155 let source_range = source_range.clone();
156 match current {
157 Event::Start(tag) => match tag {
158 Tag::Paragraph => {
159 self.cursor += 1;
160 let text = self.parse_text(false, Some(source_range));
161 Some(vec![ParsedMarkdownElement::Paragraph(text)])
162 }
163 Tag::Heading { level, .. } => {
164 let level = *level;
165 self.cursor += 1;
166 let heading = self.parse_heading(level);
167 Some(vec![ParsedMarkdownElement::Heading(heading)])
168 }
169 Tag::Table(alignment) => {
170 let alignment = alignment.clone();
171 self.cursor += 1;
172 let table = self.parse_table(alignment);
173 Some(vec![ParsedMarkdownElement::Table(table)])
174 }
175 Tag::List(order) => {
176 let order = *order;
177 self.cursor += 1;
178 let list = self.parse_list(order).await;
179 Some(list)
180 }
181 Tag::BlockQuote(_kind) => {
182 self.cursor += 1;
183 let block_quote = self.parse_block_quote().await;
184 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
185 }
186 Tag::CodeBlock(kind) => {
187 let language = match kind {
188 pulldown_cmark::CodeBlockKind::Indented => None,
189 pulldown_cmark::CodeBlockKind::Fenced(language) => {
190 if language.is_empty() {
191 None
192 } else {
193 Some(language.to_string())
194 }
195 }
196 };
197
198 self.cursor += 1;
199
200 let code_block = self.parse_code_block(language).await?;
201 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
202 }
203 Tag::HtmlBlock => {
204 self.cursor += 1;
205
206 Some(self.parse_html_block().await)
207 }
208 _ => None,
209 },
210 Event::Rule => {
211 self.cursor += 1;
212 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
213 }
214 _ => None,
215 }
216 }
217
218 fn parse_text(
219 &mut self,
220 should_complete_on_soft_break: bool,
221 source_range: Option<Range<usize>>,
222 ) -> MarkdownParagraph {
223 let source_range = source_range.unwrap_or_else(|| {
224 self.current()
225 .map(|(_, range)| range.clone())
226 .unwrap_or_default()
227 });
228
229 let mut markdown_text_like = Vec::new();
230 let mut text = String::new();
231 let mut bold_depth = 0;
232 let mut italic_depth = 0;
233 let mut strikethrough_depth = 0;
234 let mut link: Option<Link> = None;
235 let mut image: Option<Image> = None;
236 let mut region_ranges: Vec<Range<usize>> = vec![];
237 let mut regions: Vec<ParsedRegion> = vec![];
238 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
239 let mut link_urls: Vec<String> = vec![];
240 let mut link_ranges: Vec<Range<usize>> = vec![];
241
242 loop {
243 if self.eof() {
244 break;
245 }
246
247 let (current, _) = self.current().unwrap();
248 let prev_len = text.len();
249 match current {
250 Event::SoftBreak => {
251 if should_complete_on_soft_break {
252 break;
253 }
254 text.push(' ');
255 }
256
257 Event::HardBreak => {
258 text.push('\n');
259 }
260
261 // We want to ignore any inline HTML tags in the text but keep
262 // the text between them
263 Event::InlineHtml(_) => {}
264
265 Event::Text(t) => {
266 text.push_str(t.as_ref());
267 let mut style = MarkdownHighlightStyle::default();
268
269 if bold_depth > 0 {
270 style.weight = FontWeight::BOLD;
271 }
272
273 if italic_depth > 0 {
274 style.italic = true;
275 }
276
277 if strikethrough_depth > 0 {
278 style.strikethrough = true;
279 }
280
281 let last_run_len = if let Some(link) = link.clone() {
282 region_ranges.push(prev_len..text.len());
283 regions.push(ParsedRegion {
284 code: false,
285 link: Some(link),
286 });
287 style.link = true;
288 prev_len
289 } else {
290 // Manually scan for links
291 let mut finder = linkify::LinkFinder::new();
292 finder.kinds(&[linkify::LinkKind::Url]);
293 let mut last_link_len = prev_len;
294 for link in finder.links(t) {
295 let start = prev_len + link.start();
296 let end = prev_len + link.end();
297 let range = start..end;
298 link_ranges.push(range.clone());
299 link_urls.push(link.as_str().to_string());
300
301 // If there is a style before we match a link, we have to add this to the highlighted ranges
302 if style != MarkdownHighlightStyle::default() && last_link_len < start {
303 highlights.push((
304 last_link_len..start,
305 MarkdownHighlight::Style(style.clone()),
306 ));
307 }
308
309 highlights.push((
310 range.clone(),
311 MarkdownHighlight::Style(MarkdownHighlightStyle {
312 underline: true,
313 ..style
314 }),
315 ));
316 region_ranges.push(range.clone());
317 regions.push(ParsedRegion {
318 code: false,
319 link: Some(Link::Web {
320 url: link.as_str().to_string(),
321 }),
322 });
323 last_link_len = end;
324 }
325 last_link_len
326 };
327
328 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
329 let mut new_highlight = true;
330 if let Some((last_range, last_style)) = highlights.last_mut()
331 && last_range.end == last_run_len
332 && last_style == &MarkdownHighlight::Style(style.clone())
333 {
334 last_range.end = text.len();
335 new_highlight = false;
336 }
337 if new_highlight {
338 highlights.push((
339 last_run_len..text.len(),
340 MarkdownHighlight::Style(style.clone()),
341 ));
342 }
343 }
344 }
345 Event::Code(t) => {
346 text.push_str(t.as_ref());
347 region_ranges.push(prev_len..text.len());
348
349 if link.is_some() {
350 highlights.push((
351 prev_len..text.len(),
352 MarkdownHighlight::Style(MarkdownHighlightStyle {
353 link: true,
354 ..Default::default()
355 }),
356 ));
357 }
358 regions.push(ParsedRegion {
359 code: true,
360 link: link.clone(),
361 });
362 }
363 Event::Start(tag) => match tag {
364 Tag::Emphasis => italic_depth += 1,
365 Tag::Strong => bold_depth += 1,
366 Tag::Strikethrough => strikethrough_depth += 1,
367 Tag::Link { dest_url, .. } => {
368 link = Link::identify(
369 self.file_location_directory.clone(),
370 dest_url.to_string(),
371 );
372 }
373 Tag::Image { dest_url, .. } => {
374 if !text.is_empty() {
375 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
376 source_range: source_range.clone(),
377 contents: mem::take(&mut text).into(),
378 highlights: mem::take(&mut highlights),
379 region_ranges: mem::take(&mut region_ranges),
380 regions: mem::take(&mut regions),
381 });
382 markdown_text_like.push(parsed_regions);
383 }
384 image = Image::identify(
385 dest_url.to_string(),
386 source_range.clone(),
387 self.file_location_directory.clone(),
388 );
389 }
390 _ => {
391 break;
392 }
393 },
394
395 Event::End(tag) => match tag {
396 TagEnd::Emphasis => italic_depth -= 1,
397 TagEnd::Strong => bold_depth -= 1,
398 TagEnd::Strikethrough => strikethrough_depth -= 1,
399 TagEnd::Link => {
400 link = None;
401 }
402 TagEnd::Image => {
403 if let Some(mut image) = image.take() {
404 if !text.is_empty() {
405 image.set_alt_text(std::mem::take(&mut text).into());
406 mem::take(&mut highlights);
407 mem::take(&mut region_ranges);
408 mem::take(&mut regions);
409 }
410 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
411 }
412 }
413 TagEnd::Paragraph => {
414 self.cursor += 1;
415 break;
416 }
417 _ => {
418 break;
419 }
420 },
421 _ => {
422 break;
423 }
424 }
425
426 self.cursor += 1;
427 }
428 if !text.is_empty() {
429 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
430 source_range,
431 contents: text.into(),
432 highlights,
433 regions,
434 region_ranges,
435 }));
436 }
437 markdown_text_like
438 }
439
440 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
441 let (_event, source_range) = self.previous().unwrap();
442 let source_range = source_range.clone();
443 let text = self.parse_text(true, None);
444
445 // Advance past the heading end tag
446 self.cursor += 1;
447
448 ParsedMarkdownHeading {
449 source_range,
450 level: match level {
451 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
452 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
453 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
454 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
455 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
456 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
457 },
458 contents: text,
459 }
460 }
461
462 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
463 let (_event, source_range) = self.previous().unwrap();
464 let source_range = source_range.clone();
465 let mut header = ParsedMarkdownTableRow::new();
466 let mut body = vec![];
467 let mut current_row = vec![];
468 let mut in_header = true;
469 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
470
471 loop {
472 if self.eof() {
473 break;
474 }
475
476 let (current, source_range) = self.current().unwrap();
477 let source_range = source_range.clone();
478 match current {
479 Event::Start(Tag::TableHead)
480 | Event::Start(Tag::TableRow)
481 | Event::End(TagEnd::TableCell) => {
482 self.cursor += 1;
483 }
484 Event::Start(Tag::TableCell) => {
485 self.cursor += 1;
486 let cell_contents = self.parse_text(false, Some(source_range));
487 current_row.push(cell_contents);
488 }
489 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
490 self.cursor += 1;
491 let new_row = std::mem::take(&mut current_row);
492 if in_header {
493 header.children = new_row;
494 in_header = false;
495 } else {
496 let row = ParsedMarkdownTableRow::with_children(new_row);
497 body.push(row);
498 }
499 }
500 Event::End(TagEnd::Table) => {
501 self.cursor += 1;
502 break;
503 }
504 _ => {
505 break;
506 }
507 }
508 }
509
510 ParsedMarkdownTable {
511 source_range,
512 header,
513 body,
514 column_alignments,
515 }
516 }
517
518 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
519 match alignment {
520 Alignment::None => ParsedMarkdownTableAlignment::None,
521 Alignment::Left => ParsedMarkdownTableAlignment::Left,
522 Alignment::Center => ParsedMarkdownTableAlignment::Center,
523 Alignment::Right => ParsedMarkdownTableAlignment::Right,
524 }
525 }
526
527 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
528 let (_, list_source_range) = self.previous().unwrap();
529
530 let mut items = Vec::new();
531 let mut items_stack = vec![MarkdownListItem::default()];
532 let mut depth = 1;
533 let mut order = order;
534 let mut order_stack = Vec::new();
535
536 let mut insertion_indices = FxHashMap::default();
537 let mut source_ranges = FxHashMap::default();
538 let mut start_item_range = list_source_range.clone();
539
540 while !self.eof() {
541 let (current, source_range) = self.current().unwrap();
542 match current {
543 Event::Start(Tag::List(new_order)) => {
544 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
545 insertion_indices.insert(depth, items.len());
546 }
547
548 // We will use the start of the nested list as the end for the current item's range,
549 // because we don't care about the hierarchy of list items
550 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
551 e.insert(start_item_range.start..source_range.start);
552 }
553
554 order_stack.push(order);
555 order = *new_order;
556 self.cursor += 1;
557 depth += 1;
558 }
559 Event::End(TagEnd::List(_)) => {
560 order = order_stack.pop().flatten();
561 self.cursor += 1;
562 depth -= 1;
563
564 if depth == 0 {
565 break;
566 }
567 }
568 Event::Start(Tag::Item) => {
569 start_item_range = source_range.clone();
570
571 self.cursor += 1;
572 items_stack.push(MarkdownListItem::default());
573
574 let mut task_list = None;
575 // Check for task list marker (`- [ ]` or `- [x]`)
576 if let Some(event) = self.current_event() {
577 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
578 if event == &Event::Start(Tag::Paragraph) {
579 self.cursor += 1;
580 }
581
582 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
583 task_list = Some((*checked, range.clone()));
584 self.cursor += 1;
585 }
586 }
587
588 if let Some((event, range)) = self.current() {
589 // This is a plain list item.
590 // For example `- some text` or `1. [Docs](./docs.md)`
591 if MarkdownParser::is_text_like(event) {
592 let text = self.parse_text(false, Some(range.clone()));
593 let block = ParsedMarkdownElement::Paragraph(text);
594 if let Some(content) = items_stack.last_mut() {
595 let item_type = if let Some((checked, range)) = task_list {
596 ParsedMarkdownListItemType::Task(checked, range)
597 } else if let Some(order) = order {
598 ParsedMarkdownListItemType::Ordered(order)
599 } else {
600 ParsedMarkdownListItemType::Unordered
601 };
602 content.item_type = item_type;
603 content.content.push(block);
604 }
605 } else {
606 let block = self.parse_block().await;
607 if let Some(block) = block
608 && let Some(list_item) = items_stack.last_mut()
609 {
610 list_item.content.extend(block);
611 }
612 }
613 }
614
615 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
616 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
617 self.cursor += 1;
618 }
619 }
620 Event::End(TagEnd::Item) => {
621 self.cursor += 1;
622
623 if let Some(current) = order {
624 order = Some(current + 1);
625 }
626
627 if let Some(list_item) = items_stack.pop() {
628 let source_range = source_ranges
629 .remove(&depth)
630 .unwrap_or(start_item_range.clone());
631
632 // We need to remove the last character of the source range, because it includes the newline character
633 let source_range = source_range.start..source_range.end - 1;
634 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
635 source_range,
636 content: list_item.content,
637 depth,
638 item_type: list_item.item_type,
639 });
640
641 if let Some(index) = insertion_indices.get(&depth) {
642 items.insert(*index, item);
643 insertion_indices.remove(&depth);
644 } else {
645 items.push(item);
646 }
647 }
648 }
649 _ => {
650 if depth == 0 {
651 break;
652 }
653 // This can only happen if a list item starts with more then one paragraph,
654 // or the list item contains blocks that should be rendered after the nested list items
655 let block = self.parse_block().await;
656 if let Some(block) = block {
657 if let Some(list_item) = items_stack.last_mut() {
658 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
659 if !insertion_indices.contains_key(&depth) {
660 list_item.content.extend(block);
661 continue;
662 }
663 }
664
665 // Otherwise we need to insert the block after all the nested items
666 // that have been parsed so far
667 items.extend(block);
668 } else {
669 self.cursor += 1;
670 }
671 }
672 }
673 }
674
675 items
676 }
677
678 #[async_recursion]
679 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
680 let (_event, source_range) = self.previous().unwrap();
681 let source_range = source_range.clone();
682 let mut nested_depth = 1;
683
684 let mut children: Vec<ParsedMarkdownElement> = vec![];
685
686 while !self.eof() {
687 let block = self.parse_block().await;
688
689 if let Some(block) = block {
690 children.extend(block);
691 } else {
692 break;
693 }
694
695 if self.eof() {
696 break;
697 }
698
699 let (current, _source_range) = self.current().unwrap();
700 match current {
701 // This is a nested block quote.
702 // Record that we're in a nested block quote and continue parsing.
703 // We don't need to advance the cursor since the next
704 // call to `parse_block` will handle it.
705 Event::Start(Tag::BlockQuote(_kind)) => {
706 nested_depth += 1;
707 }
708 Event::End(TagEnd::BlockQuote(_kind)) => {
709 nested_depth -= 1;
710 if nested_depth == 0 {
711 self.cursor += 1;
712 break;
713 }
714 }
715 _ => {}
716 };
717 }
718
719 ParsedMarkdownBlockQuote {
720 source_range,
721 children,
722 }
723 }
724
725 async fn parse_code_block(
726 &mut self,
727 language: Option<String>,
728 ) -> Option<ParsedMarkdownCodeBlock> {
729 let Some((_event, source_range)) = self.previous() else {
730 return None;
731 };
732
733 let source_range = source_range.clone();
734 let mut code = String::new();
735
736 while !self.eof() {
737 let Some((current, _source_range)) = self.current() else {
738 break;
739 };
740
741 match current {
742 Event::Text(text) => {
743 code.push_str(text);
744 self.cursor += 1;
745 }
746 Event::End(TagEnd::CodeBlock) => {
747 self.cursor += 1;
748 break;
749 }
750 _ => {
751 break;
752 }
753 }
754 }
755
756 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
757
758 let highlights = if let Some(language) = &language {
759 if let Some(registry) = &self.language_registry {
760 let rope: language::Rope = code.as_str().into();
761 registry
762 .language_for_name_or_extension(language)
763 .await
764 .map(|l| l.highlight_text(&rope, 0..code.len()))
765 .ok()
766 } else {
767 None
768 }
769 } else {
770 None
771 };
772
773 Some(ParsedMarkdownCodeBlock {
774 source_range,
775 contents: code.into(),
776 language,
777 highlights,
778 })
779 }
780
781 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
782 let mut elements = Vec::new();
783 let Some((_event, _source_range)) = self.previous() else {
784 return elements;
785 };
786
787 let mut html_source_range_start = None;
788 let mut html_source_range_end = None;
789 let mut html_buffer = String::new();
790
791 while !self.eof() {
792 let Some((current, source_range)) = self.current() else {
793 break;
794 };
795 let source_range = source_range.clone();
796 match current {
797 Event::Html(html) => {
798 html_source_range_start.get_or_insert(source_range.start);
799 html_source_range_end = Some(source_range.end);
800 html_buffer.push_str(html);
801 self.cursor += 1;
802 }
803 Event::End(TagEnd::CodeBlock) => {
804 self.cursor += 1;
805 break;
806 }
807 _ => {
808 break;
809 }
810 }
811 }
812
813 let bytes = cleanup_html(&html_buffer);
814
815 let mut cursor = std::io::Cursor::new(bytes);
816 if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
817 .from_utf8()
818 .read_from(&mut cursor)
819 && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
820 {
821 self.parse_html_node(start..end, &dom.document, &mut elements);
822 }
823
824 elements
825 }
826
827 fn parse_html_node(
828 &self,
829 source_range: Range<usize>,
830 node: &Rc<markup5ever_rcdom::Node>,
831 elements: &mut Vec<ParsedMarkdownElement>,
832 ) {
833 match &node.data {
834 markup5ever_rcdom::NodeData::Document => {
835 self.consume_children(source_range, node, elements);
836 }
837 markup5ever_rcdom::NodeData::Text { contents } => {
838 elements.push(ParsedMarkdownElement::Paragraph(vec![
839 MarkdownParagraphChunk::Text(ParsedMarkdownText {
840 source_range,
841 regions: Vec::default(),
842 region_ranges: Vec::default(),
843 highlights: Vec::default(),
844 contents: contents.borrow().to_string().into(),
845 }),
846 ]));
847 }
848 markup5ever_rcdom::NodeData::Comment { .. } => {}
849 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
850 if local_name!("img") == name.local {
851 if let Some(image) = self.extract_image(source_range, attrs) {
852 elements.push(ParsedMarkdownElement::Image(image));
853 }
854 } else if local_name!("p") == name.local {
855 let mut paragraph = MarkdownParagraph::new();
856 self.parse_paragraph(source_range, node, &mut paragraph);
857
858 if !paragraph.is_empty() {
859 elements.push(ParsedMarkdownElement::Paragraph(paragraph));
860 }
861 } else if matches!(
862 name.local,
863 local_name!("h1")
864 | local_name!("h2")
865 | local_name!("h3")
866 | local_name!("h4")
867 | local_name!("h5")
868 | local_name!("h6")
869 ) {
870 let mut paragraph = MarkdownParagraph::new();
871 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
872
873 if !paragraph.is_empty() {
874 elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
875 source_range,
876 level: match name.local {
877 local_name!("h1") => HeadingLevel::H1,
878 local_name!("h2") => HeadingLevel::H2,
879 local_name!("h3") => HeadingLevel::H3,
880 local_name!("h4") => HeadingLevel::H4,
881 local_name!("h5") => HeadingLevel::H5,
882 local_name!("h6") => HeadingLevel::H6,
883 _ => unreachable!(),
884 },
885 contents: paragraph,
886 }));
887 }
888 } else if local_name!("blockquote") == name.local {
889 if let Some(blockquote) = self.extract_html_blockquote(node, source_range) {
890 elements.push(ParsedMarkdownElement::BlockQuote(blockquote));
891 }
892 } else if local_name!("table") == name.local {
893 if let Some(table) = self.extract_html_table(node, source_range) {
894 elements.push(ParsedMarkdownElement::Table(table));
895 }
896 } else {
897 self.consume_children(source_range, node, elements);
898 }
899 }
900 _ => {}
901 }
902 }
903
904 fn parse_paragraph(
905 &self,
906 source_range: Range<usize>,
907 node: &Rc<markup5ever_rcdom::Node>,
908 paragraph: &mut MarkdownParagraph,
909 ) {
910 match &node.data {
911 markup5ever_rcdom::NodeData::Text { contents } => {
912 paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
913 source_range,
914 regions: Vec::default(),
915 region_ranges: Vec::default(),
916 highlights: Vec::default(),
917 contents: contents.borrow().to_string().into(),
918 }));
919 }
920 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
921 if local_name!("img") == name.local {
922 if let Some(image) = self.extract_image(source_range, attrs) {
923 paragraph.push(MarkdownParagraphChunk::Image(image));
924 }
925 } else {
926 self.consume_paragraph(source_range, node, paragraph);
927 }
928 }
929 _ => {}
930 }
931 }
932
933 fn consume_paragraph(
934 &self,
935 source_range: Range<usize>,
936 node: &Rc<markup5ever_rcdom::Node>,
937 paragraph: &mut MarkdownParagraph,
938 ) {
939 for node in node.children.borrow().iter() {
940 self.parse_paragraph(source_range.clone(), node, paragraph);
941 }
942 }
943
944 fn consume_children(
945 &self,
946 source_range: Range<usize>,
947 node: &Rc<markup5ever_rcdom::Node>,
948 elements: &mut Vec<ParsedMarkdownElement>,
949 ) {
950 for node in node.children.borrow().iter() {
951 self.parse_html_node(source_range.clone(), node, elements);
952 }
953 }
954
955 fn attr_value(
956 attrs: &RefCell<Vec<html5ever::Attribute>>,
957 name: html5ever::LocalName,
958 ) -> Option<String> {
959 attrs.borrow().iter().find_map(|attr| {
960 if attr.name.local == name {
961 Some(attr.value.to_string())
962 } else {
963 None
964 }
965 })
966 }
967
968 fn extract_styles_from_attributes(
969 attrs: &RefCell<Vec<html5ever::Attribute>>,
970 ) -> HashMap<String, String> {
971 let mut styles = HashMap::new();
972
973 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
974 for decl in style.split(';') {
975 let mut parts = decl.splitn(2, ':');
976 if let Some((key, value)) = parts.next().zip(parts.next()) {
977 styles.insert(
978 key.trim().to_lowercase().to_string(),
979 value.trim().to_string(),
980 );
981 }
982 }
983 }
984
985 styles
986 }
987
988 fn extract_image(
989 &self,
990 source_range: Range<usize>,
991 attrs: &RefCell<Vec<html5ever::Attribute>>,
992 ) -> Option<Image> {
993 let src = Self::attr_value(attrs, local_name!("src"))?;
994
995 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
996
997 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
998 image.set_alt_text(alt.into());
999 }
1000
1001 let styles = Self::extract_styles_from_attributes(attrs);
1002
1003 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
1004 .or_else(|| styles.get("width").cloned())
1005 .and_then(|width| Self::parse_html_element_dimension(&width))
1006 {
1007 image.set_width(width);
1008 }
1009
1010 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
1011 .or_else(|| styles.get("height").cloned())
1012 .and_then(|height| Self::parse_html_element_dimension(&height))
1013 {
1014 image.set_height(height);
1015 }
1016
1017 Some(image)
1018 }
1019
1020 fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
1021 if value.ends_with("%") {
1022 value
1023 .trim_end_matches("%")
1024 .parse::<f32>()
1025 .ok()
1026 .map(|value| relative(value / 100.))
1027 } else {
1028 value
1029 .trim_end_matches("px")
1030 .parse()
1031 .ok()
1032 .map(|value| px(value).into())
1033 }
1034 }
1035
1036 fn extract_html_blockquote(
1037 &self,
1038 node: &Rc<markup5ever_rcdom::Node>,
1039 source_range: Range<usize>,
1040 ) -> Option<ParsedMarkdownBlockQuote> {
1041 let mut children = Vec::new();
1042 self.consume_children(source_range.clone(), node, &mut children);
1043
1044 if children.is_empty() {
1045 None
1046 } else {
1047 Some(ParsedMarkdownBlockQuote {
1048 children,
1049 source_range,
1050 })
1051 }
1052 }
1053
1054 fn extract_html_table(
1055 &self,
1056 node: &Rc<markup5ever_rcdom::Node>,
1057 source_range: Range<usize>,
1058 ) -> Option<ParsedMarkdownTable> {
1059 let mut header_columns = Vec::new();
1060 let mut body_rows = Vec::new();
1061
1062 // node should be a thead or tbody element
1063 for node in node.children.borrow().iter() {
1064 match &node.data {
1065 markup5ever_rcdom::NodeData::Element { name, .. } => {
1066 if local_name!("thead") == name.local {
1067 // node should be a tr element
1068 for node in node.children.borrow().iter() {
1069 let mut paragraph = MarkdownParagraph::new();
1070 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
1071
1072 for paragraph in paragraph.into_iter() {
1073 header_columns.push(vec![paragraph]);
1074 }
1075 }
1076 } else if local_name!("tbody") == name.local {
1077 // node should be a tr element
1078 for node in node.children.borrow().iter() {
1079 let mut row = MarkdownParagraph::new();
1080 self.consume_paragraph(source_range.clone(), node, &mut row);
1081 body_rows.push(ParsedMarkdownTableRow::with_children(
1082 row.into_iter().map(|column| vec![column]).collect(),
1083 ));
1084 }
1085 }
1086 }
1087 _ => {}
1088 }
1089 }
1090
1091 if !header_columns.is_empty() || !body_rows.is_empty() {
1092 Some(ParsedMarkdownTable {
1093 source_range,
1094 body: body_rows,
1095 column_alignments: Vec::default(),
1096 header: ParsedMarkdownTableRow::with_children(header_columns),
1097 })
1098 } else {
1099 None
1100 }
1101 }
1102}
1103
1104#[cfg(test)]
1105mod tests {
1106 use super::*;
1107 use ParsedMarkdownListItemType::*;
1108 use core::panic;
1109 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1110 use language::{
1111 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1112 };
1113 use pretty_assertions::assert_eq;
1114
1115 async fn parse(input: &str) -> ParsedMarkdown {
1116 parse_markdown(input, None, None).await
1117 }
1118
1119 #[gpui::test]
1120 async fn test_headings() {
1121 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1122
1123 assert_eq!(
1124 parsed.children,
1125 vec![
1126 h1(text("Heading one", 2..13), 0..14),
1127 h2(text("Heading two", 17..28), 14..29),
1128 h3(text("Heading three", 33..46), 29..46),
1129 ]
1130 );
1131 }
1132
1133 #[gpui::test]
1134 async fn test_newlines_dont_new_paragraphs() {
1135 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1136
1137 assert_eq!(
1138 parsed.children,
1139 vec![p("Some text that is bolded and italicized", 0..46)]
1140 );
1141 }
1142
1143 #[gpui::test]
1144 async fn test_heading_with_paragraph() {
1145 let parsed = parse("# Zed\nThe editor").await;
1146
1147 assert_eq!(
1148 parsed.children,
1149 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1150 );
1151 }
1152
1153 #[gpui::test]
1154 async fn test_double_newlines_do_new_paragraphs() {
1155 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1156
1157 assert_eq!(
1158 parsed.children,
1159 vec![
1160 p("Some text that is bolded", 0..29),
1161 p("and italicized", 31..47),
1162 ]
1163 );
1164 }
1165
1166 #[gpui::test]
1167 async fn test_bold_italic_text() {
1168 let parsed = parse("Some text **that is bolded** and *italicized*").await;
1169
1170 assert_eq!(
1171 parsed.children,
1172 vec![p("Some text that is bolded and italicized", 0..45)]
1173 );
1174 }
1175
1176 #[gpui::test]
1177 async fn test_nested_bold_strikethrough_text() {
1178 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1179
1180 assert_eq!(parsed.children.len(), 1);
1181 assert_eq!(
1182 parsed.children[0],
1183 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1184 ParsedMarkdownText {
1185 source_range: 0..35,
1186 contents: "Some bostrikethroughld text".into(),
1187 highlights: Vec::new(),
1188 region_ranges: Vec::new(),
1189 regions: Vec::new(),
1190 }
1191 )])
1192 );
1193
1194 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1195 text
1196 } else {
1197 panic!("Expected a paragraph");
1198 };
1199
1200 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1201 text
1202 } else {
1203 panic!("Expected a text");
1204 };
1205
1206 assert_eq!(
1207 paragraph.highlights,
1208 vec![
1209 (
1210 5..7,
1211 MarkdownHighlight::Style(MarkdownHighlightStyle {
1212 weight: FontWeight::BOLD,
1213 ..Default::default()
1214 }),
1215 ),
1216 (
1217 7..20,
1218 MarkdownHighlight::Style(MarkdownHighlightStyle {
1219 weight: FontWeight::BOLD,
1220 strikethrough: true,
1221 ..Default::default()
1222 }),
1223 ),
1224 (
1225 20..22,
1226 MarkdownHighlight::Style(MarkdownHighlightStyle {
1227 weight: FontWeight::BOLD,
1228 ..Default::default()
1229 }),
1230 ),
1231 ]
1232 );
1233 }
1234
1235 #[gpui::test]
1236 async fn test_text_with_inline_html() {
1237 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1238
1239 assert_eq!(
1240 parsed.children,
1241 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1242 );
1243 }
1244
1245 #[gpui::test]
1246 async fn test_raw_links_detection() {
1247 let parsed = parse("Checkout this https://zed.dev link").await;
1248
1249 assert_eq!(
1250 parsed.children,
1251 vec![p("Checkout this https://zed.dev link", 0..34)]
1252 );
1253 }
1254
1255 #[gpui::test]
1256 async fn test_empty_image() {
1257 let parsed = parse("![]()").await;
1258
1259 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1260 text
1261 } else {
1262 panic!("Expected a paragraph");
1263 };
1264 assert_eq!(paragraph.len(), 0);
1265 }
1266
1267 #[gpui::test]
1268 async fn test_image_links_detection() {
1269 let parsed = parse("").await;
1270
1271 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1272 text
1273 } else {
1274 panic!("Expected a paragraph");
1275 };
1276 assert_eq!(
1277 paragraph[0],
1278 MarkdownParagraphChunk::Image(Image {
1279 source_range: 0..111,
1280 link: Link::Web {
1281 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1282 },
1283 alt_text: Some("test".into()),
1284 height: None,
1285 width: None,
1286 },)
1287 );
1288 }
1289
1290 #[gpui::test]
1291 async fn test_image_alt_text() {
1292 let parsed = parse("[](https://zed.dev)\n ").await;
1293
1294 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1295 text
1296 } else {
1297 panic!("Expected a paragraph");
1298 };
1299 assert_eq!(
1300 paragraph[0],
1301 MarkdownParagraphChunk::Image(Image {
1302 source_range: 0..142,
1303 link: Link::Web {
1304 url: "https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/zed-industries/zed/main/assets/badge/v0.json".to_string(),
1305 },
1306 alt_text: Some("Zed".into()),
1307 height: None,
1308 width: None,
1309 },)
1310 );
1311 }
1312
1313 #[gpui::test]
1314 async fn test_image_without_alt_text() {
1315 let parsed = parse("").await;
1316
1317 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1318 text
1319 } else {
1320 panic!("Expected a paragraph");
1321 };
1322 assert_eq!(
1323 paragraph[0],
1324 MarkdownParagraphChunk::Image(Image {
1325 source_range: 0..31,
1326 link: Link::Web {
1327 url: "http://example.com/foo.png".to_string(),
1328 },
1329 alt_text: None,
1330 height: None,
1331 width: None,
1332 },)
1333 );
1334 }
1335
1336 #[gpui::test]
1337 async fn test_image_with_alt_text_containing_formatting() {
1338 let parsed = parse("").await;
1339
1340 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1341 panic!("Expected a paragraph");
1342 };
1343 assert_eq!(
1344 chunks,
1345 &[MarkdownParagraphChunk::Image(Image {
1346 source_range: 0..44,
1347 link: Link::Web {
1348 url: "http://example.com/foo.png".to_string(),
1349 },
1350 alt_text: Some("foo bar baz".into()),
1351 height: None,
1352 width: None,
1353 }),],
1354 );
1355 }
1356
1357 #[gpui::test]
1358 async fn test_images_with_text_in_between() {
1359 let parsed = parse(
1360 "\nLorem Ipsum\n",
1361 )
1362 .await;
1363
1364 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1365 text
1366 } else {
1367 panic!("Expected a paragraph");
1368 };
1369 assert_eq!(
1370 chunks,
1371 &vec![
1372 MarkdownParagraphChunk::Image(Image {
1373 source_range: 0..81,
1374 link: Link::Web {
1375 url: "http://example.com/foo.png".to_string(),
1376 },
1377 alt_text: Some("foo".into()),
1378 height: None,
1379 width: None,
1380 }),
1381 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1382 source_range: 0..81,
1383 contents: " Lorem Ipsum ".into(),
1384 highlights: Vec::new(),
1385 region_ranges: Vec::new(),
1386 regions: Vec::new(),
1387 }),
1388 MarkdownParagraphChunk::Image(Image {
1389 source_range: 0..81,
1390 link: Link::Web {
1391 url: "http://example.com/bar.png".to_string(),
1392 },
1393 alt_text: Some("bar".into()),
1394 height: None,
1395 width: None,
1396 })
1397 ]
1398 );
1399 }
1400
1401 #[test]
1402 fn test_parse_html_element_dimension() {
1403 // Test percentage values
1404 assert_eq!(
1405 MarkdownParser::parse_html_element_dimension("50%"),
1406 Some(DefiniteLength::Fraction(0.5))
1407 );
1408 assert_eq!(
1409 MarkdownParser::parse_html_element_dimension("100%"),
1410 Some(DefiniteLength::Fraction(1.0))
1411 );
1412 assert_eq!(
1413 MarkdownParser::parse_html_element_dimension("25%"),
1414 Some(DefiniteLength::Fraction(0.25))
1415 );
1416 assert_eq!(
1417 MarkdownParser::parse_html_element_dimension("0%"),
1418 Some(DefiniteLength::Fraction(0.0))
1419 );
1420
1421 // Test pixel values
1422 assert_eq!(
1423 MarkdownParser::parse_html_element_dimension("100px"),
1424 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1425 );
1426 assert_eq!(
1427 MarkdownParser::parse_html_element_dimension("50px"),
1428 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1429 );
1430 assert_eq!(
1431 MarkdownParser::parse_html_element_dimension("0px"),
1432 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1433 );
1434
1435 // Test values without units (should be treated as pixels)
1436 assert_eq!(
1437 MarkdownParser::parse_html_element_dimension("100"),
1438 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1439 );
1440 assert_eq!(
1441 MarkdownParser::parse_html_element_dimension("42"),
1442 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1443 );
1444
1445 // Test invalid values
1446 assert_eq!(
1447 MarkdownParser::parse_html_element_dimension("invalid"),
1448 None
1449 );
1450 assert_eq!(MarkdownParser::parse_html_element_dimension("px"), None);
1451 assert_eq!(MarkdownParser::parse_html_element_dimension("%"), None);
1452 assert_eq!(MarkdownParser::parse_html_element_dimension(""), None);
1453 assert_eq!(MarkdownParser::parse_html_element_dimension("abc%"), None);
1454 assert_eq!(MarkdownParser::parse_html_element_dimension("abcpx"), None);
1455
1456 // Test decimal values
1457 assert_eq!(
1458 MarkdownParser::parse_html_element_dimension("50.5%"),
1459 Some(DefiniteLength::Fraction(0.505))
1460 );
1461 assert_eq!(
1462 MarkdownParser::parse_html_element_dimension("100.25px"),
1463 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1464 );
1465 assert_eq!(
1466 MarkdownParser::parse_html_element_dimension("42.0"),
1467 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1468 );
1469 }
1470
1471 #[gpui::test]
1472 async fn test_inline_html_image_tag() {
1473 let parsed =
1474 parse("<p>Some text<img src=\"http://example.com/foo.png\" /> some more text</p>")
1475 .await;
1476
1477 assert_eq!(
1478 ParsedMarkdown {
1479 children: vec![ParsedMarkdownElement::Paragraph(vec![
1480 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1481 source_range: 0..71,
1482 contents: "Some text".into(),
1483 highlights: Default::default(),
1484 region_ranges: Default::default(),
1485 regions: Default::default()
1486 }),
1487 MarkdownParagraphChunk::Image(Image {
1488 source_range: 0..71,
1489 link: Link::Web {
1490 url: "http://example.com/foo.png".to_string(),
1491 },
1492 alt_text: None,
1493 height: None,
1494 width: None,
1495 }),
1496 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1497 source_range: 0..71,
1498 contents: " some more text".into(),
1499 highlights: Default::default(),
1500 region_ranges: Default::default(),
1501 regions: Default::default()
1502 }),
1503 ])]
1504 },
1505 parsed
1506 );
1507 }
1508
1509 #[gpui::test]
1510 async fn test_html_block_quote() {
1511 let parsed = parse(
1512 "<blockquote>
1513 <p>some description</p>
1514 </blockquote>",
1515 )
1516 .await;
1517
1518 assert_eq!(
1519 ParsedMarkdown {
1520 children: vec![block_quote(
1521 vec![ParsedMarkdownElement::Paragraph(text(
1522 "some description",
1523 0..76
1524 ))],
1525 0..76,
1526 )]
1527 },
1528 parsed
1529 );
1530 }
1531
1532 #[gpui::test]
1533 async fn test_html_nested_block_quote() {
1534 let parsed = parse(
1535 "<blockquote>
1536 <p>some description</p>
1537 <blockquote>
1538 <p>second description</p>
1539 </blockquote>
1540 </blockquote>",
1541 )
1542 .await;
1543
1544 assert_eq!(
1545 ParsedMarkdown {
1546 children: vec![block_quote(
1547 vec![
1548 ParsedMarkdownElement::Paragraph(text("some description", 0..173)),
1549 block_quote(
1550 vec![ParsedMarkdownElement::Paragraph(text(
1551 "second description",
1552 0..173
1553 ))],
1554 0..173,
1555 )
1556 ],
1557 0..173,
1558 )]
1559 },
1560 parsed
1561 );
1562 }
1563
1564 #[gpui::test]
1565 async fn test_html_table() {
1566 let parsed = parse(
1567 "<table>
1568 <thead>
1569 <tr>
1570 <th>Id</th>
1571 <th>Name</th>
1572 </tr>
1573 </thead>
1574 <tbody>
1575 <tr>
1576 <td>1</td>
1577 <td>Chris</td>
1578 </tr>
1579 <tr>
1580 <td>2</td>
1581 <td>Dennis</td>
1582 </tr>
1583 </tbody>
1584 </table>",
1585 )
1586 .await;
1587
1588 assert_eq!(
1589 ParsedMarkdown {
1590 children: vec![ParsedMarkdownElement::Table(table(
1591 0..366,
1592 row(vec![text("Id", 0..366), text("Name ", 0..366)]),
1593 vec![
1594 row(vec![text("1", 0..366), text("Chris", 0..366)]),
1595 row(vec![text("2", 0..366), text("Dennis", 0..366)]),
1596 ],
1597 ))],
1598 },
1599 parsed
1600 );
1601 }
1602
1603 #[gpui::test]
1604 async fn test_html_table_without_headings() {
1605 let parsed = parse(
1606 "<table>
1607 <tbody>
1608 <tr>
1609 <td>1</td>
1610 <td>Chris</td>
1611 </tr>
1612 <tr>
1613 <td>2</td>
1614 <td>Dennis</td>
1615 </tr>
1616 </tbody>
1617 </table>",
1618 )
1619 .await;
1620
1621 assert_eq!(
1622 ParsedMarkdown {
1623 children: vec![ParsedMarkdownElement::Table(table(
1624 0..240,
1625 row(vec![]),
1626 vec![
1627 row(vec![text("1", 0..240), text("Chris", 0..240)]),
1628 row(vec![text("2", 0..240), text("Dennis", 0..240)]),
1629 ],
1630 ))],
1631 },
1632 parsed
1633 );
1634 }
1635
1636 #[gpui::test]
1637 async fn test_html_table_without_body() {
1638 let parsed = parse(
1639 "<table>
1640 <thead>
1641 <tr>
1642 <th>Id</th>
1643 <th>Name</th>
1644 </tr>
1645 </thead>
1646 </table>",
1647 )
1648 .await;
1649
1650 assert_eq!(
1651 ParsedMarkdown {
1652 children: vec![ParsedMarkdownElement::Table(table(
1653 0..150,
1654 row(vec![text("Id", 0..150), text("Name", 0..150)]),
1655 vec![],
1656 ))],
1657 },
1658 parsed
1659 );
1660 }
1661
1662 #[gpui::test]
1663 async fn test_html_heading_tags() {
1664 let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
1665
1666 assert_eq!(
1667 ParsedMarkdown {
1668 children: vec![
1669 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1670 level: HeadingLevel::H1,
1671 source_range: 0..96,
1672 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1673 source_range: 0..96,
1674 contents: "Heading".into(),
1675 highlights: Vec::default(),
1676 region_ranges: Vec::default(),
1677 regions: Vec::default()
1678 })],
1679 }),
1680 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1681 level: HeadingLevel::H2,
1682 source_range: 0..96,
1683 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1684 source_range: 0..96,
1685 contents: "Heading".into(),
1686 highlights: Vec::default(),
1687 region_ranges: Vec::default(),
1688 regions: Vec::default()
1689 })],
1690 }),
1691 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1692 level: HeadingLevel::H3,
1693 source_range: 0..96,
1694 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1695 source_range: 0..96,
1696 contents: "Heading".into(),
1697 highlights: Vec::default(),
1698 region_ranges: Vec::default(),
1699 regions: Vec::default()
1700 })],
1701 }),
1702 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1703 level: HeadingLevel::H4,
1704 source_range: 0..96,
1705 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1706 source_range: 0..96,
1707 contents: "Heading".into(),
1708 highlights: Vec::default(),
1709 region_ranges: Vec::default(),
1710 regions: Vec::default()
1711 })],
1712 }),
1713 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1714 level: HeadingLevel::H5,
1715 source_range: 0..96,
1716 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1717 source_range: 0..96,
1718 contents: "Heading".into(),
1719 highlights: Vec::default(),
1720 region_ranges: Vec::default(),
1721 regions: Vec::default()
1722 })],
1723 }),
1724 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1725 level: HeadingLevel::H6,
1726 source_range: 0..96,
1727 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1728 source_range: 0..96,
1729 contents: "Heading".into(),
1730 highlights: Vec::default(),
1731 region_ranges: Vec::default(),
1732 regions: Vec::default()
1733 })],
1734 }),
1735 ],
1736 },
1737 parsed
1738 );
1739 }
1740
1741 #[gpui::test]
1742 async fn test_html_image_tag() {
1743 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
1744
1745 assert_eq!(
1746 ParsedMarkdown {
1747 children: vec![ParsedMarkdownElement::Image(Image {
1748 source_range: 0..40,
1749 link: Link::Web {
1750 url: "http://example.com/foo.png".to_string(),
1751 },
1752 alt_text: None,
1753 height: None,
1754 width: None,
1755 })]
1756 },
1757 parsed
1758 );
1759 }
1760
1761 #[gpui::test]
1762 async fn test_html_image_tag_with_alt_text() {
1763 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
1764
1765 assert_eq!(
1766 ParsedMarkdown {
1767 children: vec![ParsedMarkdownElement::Image(Image {
1768 source_range: 0..50,
1769 link: Link::Web {
1770 url: "http://example.com/foo.png".to_string(),
1771 },
1772 alt_text: Some("Foo".into()),
1773 height: None,
1774 width: None,
1775 })]
1776 },
1777 parsed
1778 );
1779 }
1780
1781 #[gpui::test]
1782 async fn test_html_image_tag_with_height_and_width() {
1783 let parsed =
1784 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
1785
1786 assert_eq!(
1787 ParsedMarkdown {
1788 children: vec![ParsedMarkdownElement::Image(Image {
1789 source_range: 0..65,
1790 link: Link::Web {
1791 url: "http://example.com/foo.png".to_string(),
1792 },
1793 alt_text: None,
1794 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1795 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1796 })]
1797 },
1798 parsed
1799 );
1800 }
1801
1802 #[gpui::test]
1803 async fn test_html_image_style_tag_with_height_and_width() {
1804 let parsed = parse(
1805 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
1806 )
1807 .await;
1808
1809 assert_eq!(
1810 ParsedMarkdown {
1811 children: vec![ParsedMarkdownElement::Image(Image {
1812 source_range: 0..75,
1813 link: Link::Web {
1814 url: "http://example.com/foo.png".to_string(),
1815 },
1816 alt_text: None,
1817 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1818 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1819 })]
1820 },
1821 parsed
1822 );
1823 }
1824
1825 #[gpui::test]
1826 async fn test_header_only_table() {
1827 let markdown = "\
1828| Header 1 | Header 2 |
1829|----------|----------|
1830
1831Some other content
1832";
1833
1834 let expected_table = table(
1835 0..48,
1836 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1837 vec![],
1838 );
1839
1840 assert_eq!(
1841 parse(markdown).await.children[0],
1842 ParsedMarkdownElement::Table(expected_table)
1843 );
1844 }
1845
1846 #[gpui::test]
1847 async fn test_basic_table() {
1848 let markdown = "\
1849| Header 1 | Header 2 |
1850|----------|----------|
1851| Cell 1 | Cell 2 |
1852| Cell 3 | Cell 4 |";
1853
1854 let expected_table = table(
1855 0..95,
1856 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1857 vec![
1858 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1859 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1860 ],
1861 );
1862
1863 assert_eq!(
1864 parse(markdown).await.children[0],
1865 ParsedMarkdownElement::Table(expected_table)
1866 );
1867 }
1868
1869 #[gpui::test]
1870 async fn test_list_basic() {
1871 let parsed = parse(
1872 "\
1873* Item 1
1874* Item 2
1875* Item 3
1876",
1877 )
1878 .await;
1879
1880 assert_eq!(
1881 parsed.children,
1882 vec![
1883 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1884 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1885 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1886 ],
1887 );
1888 }
1889
1890 #[gpui::test]
1891 async fn test_list_with_tasks() {
1892 let parsed = parse(
1893 "\
1894- [ ] TODO
1895- [x] Checked
1896",
1897 )
1898 .await;
1899
1900 assert_eq!(
1901 parsed.children,
1902 vec![
1903 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1904 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1905 ],
1906 );
1907 }
1908
1909 #[gpui::test]
1910 async fn test_list_with_indented_task() {
1911 let parsed = parse(
1912 "\
1913- [ ] TODO
1914 - [x] Checked
1915 - Unordered
1916 1. Number 1
1917 1. Number 2
19181. Number A
1919",
1920 )
1921 .await;
1922
1923 assert_eq!(
1924 parsed.children,
1925 vec![
1926 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1927 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1928 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1929 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1930 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1931 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1932 ],
1933 );
1934 }
1935
1936 #[gpui::test]
1937 async fn test_list_with_linebreak_is_handled_correctly() {
1938 let parsed = parse(
1939 "\
1940- [ ] Task 1
1941
1942- [x] Task 2
1943",
1944 )
1945 .await;
1946
1947 assert_eq!(
1948 parsed.children,
1949 vec![
1950 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1951 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1952 ],
1953 );
1954 }
1955
1956 #[gpui::test]
1957 async fn test_list_nested() {
1958 let parsed = parse(
1959 "\
1960* Item 1
1961* Item 2
1962* Item 3
1963
19641. Hello
19651. Two
1966 1. Three
19672. Four
19683. Five
1969
1970* First
1971 1. Hello
1972 1. Goodbyte
1973 - Inner
1974 - Inner
1975 2. Goodbyte
1976 - Next item empty
1977 -
1978* Last
1979",
1980 )
1981 .await;
1982
1983 assert_eq!(
1984 parsed.children,
1985 vec![
1986 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1987 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1988 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1989 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1990 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1991 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1992 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1993 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1994 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1995 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1996 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1997 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1998 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1999 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
2000 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
2001 list_item(186..190, 3, Unordered, vec![]),
2002 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
2003 ]
2004 );
2005 }
2006
2007 #[gpui::test]
2008 async fn test_list_with_nested_content() {
2009 let parsed = parse(
2010 "\
2011* This is a list item with two paragraphs.
2012
2013 This is the second paragraph in the list item.
2014",
2015 )
2016 .await;
2017
2018 assert_eq!(
2019 parsed.children,
2020 vec![list_item(
2021 0..96,
2022 1,
2023 Unordered,
2024 vec![
2025 p("This is a list item with two paragraphs.", 4..44),
2026 p("This is the second paragraph in the list item.", 50..97)
2027 ],
2028 ),],
2029 );
2030 }
2031
2032 #[gpui::test]
2033 async fn test_list_item_with_inline_html() {
2034 let parsed = parse(
2035 "\
2036* This is a list item with an inline HTML <sometag>tag</sometag>.
2037",
2038 )
2039 .await;
2040
2041 assert_eq!(
2042 parsed.children,
2043 vec![list_item(
2044 0..67,
2045 1,
2046 Unordered,
2047 vec![p("This is a list item with an inline HTML tag.", 4..44),],
2048 ),],
2049 );
2050 }
2051
2052 #[gpui::test]
2053 async fn test_nested_list_with_paragraph_inside() {
2054 let parsed = parse(
2055 "\
20561. a
2057 1. b
2058 1. c
2059
2060 text
2061
2062 1. d
2063",
2064 )
2065 .await;
2066
2067 assert_eq!(
2068 parsed.children,
2069 vec![
2070 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
2071 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
2072 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
2073 p("text", 32..37),
2074 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
2075 ],
2076 );
2077 }
2078
2079 #[gpui::test]
2080 async fn test_list_with_leading_text() {
2081 let parsed = parse(
2082 "\
2083* `code`
2084* **bold**
2085* [link](https://example.com)
2086",
2087 )
2088 .await;
2089
2090 assert_eq!(
2091 parsed.children,
2092 vec![
2093 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
2094 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
2095 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
2096 ],
2097 );
2098 }
2099
2100 #[gpui::test]
2101 async fn test_simple_block_quote() {
2102 let parsed = parse("> Simple block quote with **styled text**").await;
2103
2104 assert_eq!(
2105 parsed.children,
2106 vec![block_quote(
2107 vec![p("Simple block quote with styled text", 2..41)],
2108 0..41
2109 )]
2110 );
2111 }
2112
2113 #[gpui::test]
2114 async fn test_simple_block_quote_with_multiple_lines() {
2115 let parsed = parse(
2116 "\
2117> # Heading
2118> More
2119> text
2120>
2121> More text
2122",
2123 )
2124 .await;
2125
2126 assert_eq!(
2127 parsed.children,
2128 vec![block_quote(
2129 vec![
2130 h1(text("Heading", 4..11), 2..12),
2131 p("More text", 14..26),
2132 p("More text", 30..40)
2133 ],
2134 0..40
2135 )]
2136 );
2137 }
2138
2139 #[gpui::test]
2140 async fn test_nested_block_quote() {
2141 let parsed = parse(
2142 "\
2143> A
2144>
2145> > # B
2146>
2147> C
2148
2149More text
2150",
2151 )
2152 .await;
2153
2154 assert_eq!(
2155 parsed.children,
2156 vec![
2157 block_quote(
2158 vec![
2159 p("A", 2..4),
2160 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
2161 p("C", 18..20)
2162 ],
2163 0..20
2164 ),
2165 p("More text", 21..31)
2166 ]
2167 );
2168 }
2169
2170 #[gpui::test]
2171 async fn test_code_block() {
2172 let parsed = parse(
2173 "\
2174```
2175fn main() {
2176 return 0;
2177}
2178```
2179",
2180 )
2181 .await;
2182
2183 assert_eq!(
2184 parsed.children,
2185 vec![code_block(
2186 None,
2187 "fn main() {\n return 0;\n}",
2188 0..35,
2189 None
2190 )]
2191 );
2192 }
2193
2194 #[gpui::test]
2195 async fn test_code_block_with_language(executor: BackgroundExecutor) {
2196 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
2197 language_registry.add(rust_lang());
2198
2199 let parsed = parse_markdown(
2200 "\
2201```rust
2202fn main() {
2203 return 0;
2204}
2205```
2206",
2207 None,
2208 Some(language_registry),
2209 )
2210 .await;
2211
2212 assert_eq!(
2213 parsed.children,
2214 vec![code_block(
2215 Some("rust".to_string()),
2216 "fn main() {\n return 0;\n}",
2217 0..39,
2218 Some(vec![])
2219 )]
2220 );
2221 }
2222
2223 fn rust_lang() -> Arc<Language> {
2224 Arc::new(Language::new(
2225 LanguageConfig {
2226 name: "Rust".into(),
2227 matcher: LanguageMatcher {
2228 path_suffixes: vec!["rs".into()],
2229 ..Default::default()
2230 },
2231 collapsed_placeholder: " /* ... */ ".to_string(),
2232 ..Default::default()
2233 },
2234 Some(tree_sitter_rust::LANGUAGE.into()),
2235 ))
2236 }
2237
2238 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2239 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2240 source_range,
2241 level: HeadingLevel::H1,
2242 contents,
2243 })
2244 }
2245
2246 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2247 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2248 source_range,
2249 level: HeadingLevel::H2,
2250 contents,
2251 })
2252 }
2253
2254 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2255 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2256 source_range,
2257 level: HeadingLevel::H3,
2258 contents,
2259 })
2260 }
2261
2262 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
2263 ParsedMarkdownElement::Paragraph(text(contents, source_range))
2264 }
2265
2266 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
2267 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2268 highlights: Vec::new(),
2269 region_ranges: Vec::new(),
2270 regions: Vec::new(),
2271 source_range,
2272 contents: contents.to_string().into(),
2273 })]
2274 }
2275
2276 fn block_quote(
2277 children: Vec<ParsedMarkdownElement>,
2278 source_range: Range<usize>,
2279 ) -> ParsedMarkdownElement {
2280 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
2281 source_range,
2282 children,
2283 })
2284 }
2285
2286 fn code_block(
2287 language: Option<String>,
2288 code: &str,
2289 source_range: Range<usize>,
2290 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
2291 ) -> ParsedMarkdownElement {
2292 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
2293 source_range,
2294 language,
2295 contents: code.to_string().into(),
2296 highlights,
2297 })
2298 }
2299
2300 fn list_item(
2301 source_range: Range<usize>,
2302 depth: u16,
2303 item_type: ParsedMarkdownListItemType,
2304 content: Vec<ParsedMarkdownElement>,
2305 ) -> ParsedMarkdownElement {
2306 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
2307 source_range,
2308 item_type,
2309 depth,
2310 content,
2311 })
2312 }
2313
2314 fn table(
2315 source_range: Range<usize>,
2316 header: ParsedMarkdownTableRow,
2317 body: Vec<ParsedMarkdownTableRow>,
2318 ) -> ParsedMarkdownTable {
2319 ParsedMarkdownTable {
2320 column_alignments: Vec::new(),
2321 source_range,
2322 header,
2323 body,
2324 }
2325 }
2326
2327 fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
2328 ParsedMarkdownTableRow { children }
2329 }
2330
2331 impl PartialEq for ParsedMarkdownTable {
2332 fn eq(&self, other: &Self) -> bool {
2333 self.source_range == other.source_range
2334 && self.header == other.header
2335 && self.body == other.body
2336 }
2337 }
2338
2339 impl PartialEq for ParsedMarkdownText {
2340 fn eq(&self, other: &Self) -> bool {
2341 self.source_range == other.source_range && self.contents == other.contents
2342 }
2343 }
2344}