1use crate::{
2 markdown_elements::*,
3 markdown_minifier::{Minifier, MinifierOptions},
4};
5use async_recursion::async_recursion;
6use collections::FxHashMap;
7use gpui::{DefiniteLength, FontWeight, px, relative};
8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
9use language::LanguageRegistry;
10use markup5ever_rcdom::RcDom;
11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
12use std::{
13 cell::RefCell, collections::HashMap, mem, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec,
14};
15
16pub async fn parse_markdown(
17 markdown_input: &str,
18 file_location_directory: Option<PathBuf>,
19 language_registry: Option<Arc<LanguageRegistry>>,
20) -> ParsedMarkdown {
21 let mut options = Options::all();
22 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
23
24 let parser = Parser::new_ext(markdown_input, options);
25 let parser = MarkdownParser::new(
26 parser.into_offset_iter().collect(),
27 file_location_directory,
28 language_registry,
29 );
30 let renderer = parser.parse_document().await;
31 ParsedMarkdown {
32 children: renderer.parsed,
33 }
34}
35
36fn cleanup_html(source: &str) -> Vec<u8> {
37 let mut writer = std::io::Cursor::new(Vec::new());
38 let mut reader = std::io::Cursor::new(source);
39 let mut minify = Minifier::new(
40 &mut writer,
41 MinifierOptions {
42 omit_doctype: true,
43 collapse_whitespace: true,
44 ..Default::default()
45 },
46 );
47 if let Ok(()) = minify.minify(&mut reader) {
48 writer.into_inner()
49 } else {
50 source.bytes().collect()
51 }
52}
53
54struct MarkdownParser<'a> {
55 tokens: Vec<(Event<'a>, Range<usize>)>,
56 /// The current index in the tokens array
57 cursor: usize,
58 /// The blocks that we have successfully parsed so far
59 parsed: Vec<ParsedMarkdownElement>,
60 file_location_directory: Option<PathBuf>,
61 language_registry: Option<Arc<LanguageRegistry>>,
62}
63
64struct MarkdownListItem {
65 content: Vec<ParsedMarkdownElement>,
66 item_type: ParsedMarkdownListItemType,
67}
68
69impl Default for MarkdownListItem {
70 fn default() -> Self {
71 Self {
72 content: Vec::new(),
73 item_type: ParsedMarkdownListItemType::Unordered,
74 }
75 }
76}
77
78impl<'a> MarkdownParser<'a> {
79 fn new(
80 tokens: Vec<(Event<'a>, Range<usize>)>,
81 file_location_directory: Option<PathBuf>,
82 language_registry: Option<Arc<LanguageRegistry>>,
83 ) -> Self {
84 Self {
85 tokens,
86 file_location_directory,
87 language_registry,
88 cursor: 0,
89 parsed: vec![],
90 }
91 }
92
93 fn eof(&self) -> bool {
94 if self.tokens.is_empty() {
95 return true;
96 }
97 self.cursor >= self.tokens.len() - 1
98 }
99
100 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
101 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
102 return self.tokens.last();
103 }
104 self.tokens.get(self.cursor + steps)
105 }
106
107 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
108 if self.cursor == 0 || self.cursor > self.tokens.len() {
109 return None;
110 }
111 self.tokens.get(self.cursor - 1)
112 }
113
114 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
115 self.peek(0)
116 }
117
118 fn current_event(&self) -> Option<&Event<'_>> {
119 self.current().map(|(event, _)| event)
120 }
121
122 fn is_text_like(event: &Event) -> bool {
123 match event {
124 Event::Text(_)
125 // Represent an inline code block
126 | Event::Code(_)
127 | Event::Html(_)
128 | Event::InlineHtml(_)
129 | Event::FootnoteReference(_)
130 | Event::Start(Tag::Link { .. })
131 | Event::Start(Tag::Emphasis)
132 | Event::Start(Tag::Strong)
133 | Event::Start(Tag::Strikethrough)
134 | Event::Start(Tag::Image { .. }) => {
135 true
136 }
137 _ => false,
138 }
139 }
140
141 async fn parse_document(mut self) -> Self {
142 while !self.eof() {
143 if let Some(block) = self.parse_block().await {
144 self.parsed.extend(block);
145 } else {
146 self.cursor += 1;
147 }
148 }
149 self
150 }
151
152 #[async_recursion]
153 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
154 let (current, source_range) = self.current().unwrap();
155 let source_range = source_range.clone();
156 match current {
157 Event::Start(tag) => match tag {
158 Tag::Paragraph => {
159 self.cursor += 1;
160 let text = self.parse_text(false, Some(source_range));
161 Some(vec![ParsedMarkdownElement::Paragraph(text)])
162 }
163 Tag::Heading { level, .. } => {
164 let level = *level;
165 self.cursor += 1;
166 let heading = self.parse_heading(level);
167 Some(vec![ParsedMarkdownElement::Heading(heading)])
168 }
169 Tag::Table(alignment) => {
170 let alignment = alignment.clone();
171 self.cursor += 1;
172 let table = self.parse_table(alignment);
173 Some(vec![ParsedMarkdownElement::Table(table)])
174 }
175 Tag::List(order) => {
176 let order = *order;
177 self.cursor += 1;
178 let list = self.parse_list(order).await;
179 Some(list)
180 }
181 Tag::BlockQuote(_kind) => {
182 self.cursor += 1;
183 let block_quote = self.parse_block_quote().await;
184 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
185 }
186 Tag::CodeBlock(kind) => {
187 let language = match kind {
188 pulldown_cmark::CodeBlockKind::Indented => None,
189 pulldown_cmark::CodeBlockKind::Fenced(language) => {
190 if language.is_empty() {
191 None
192 } else {
193 Some(language.to_string())
194 }
195 }
196 };
197
198 self.cursor += 1;
199
200 let code_block = self.parse_code_block(language).await?;
201 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
202 }
203 Tag::HtmlBlock => {
204 self.cursor += 1;
205
206 Some(self.parse_html_block().await)
207 }
208 _ => None,
209 },
210 Event::Rule => {
211 self.cursor += 1;
212 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
213 }
214 _ => None,
215 }
216 }
217
218 fn parse_text(
219 &mut self,
220 should_complete_on_soft_break: bool,
221 source_range: Option<Range<usize>>,
222 ) -> MarkdownParagraph {
223 let source_range = source_range.unwrap_or_else(|| {
224 self.current()
225 .map(|(_, range)| range.clone())
226 .unwrap_or_default()
227 });
228
229 let mut markdown_text_like = Vec::new();
230 let mut text = String::new();
231 let mut bold_depth = 0;
232 let mut italic_depth = 0;
233 let mut strikethrough_depth = 0;
234 let mut link: Option<Link> = None;
235 let mut image: Option<Image> = None;
236 let mut region_ranges: Vec<Range<usize>> = vec![];
237 let mut regions: Vec<ParsedRegion> = vec![];
238 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
239 let mut link_urls: Vec<String> = vec![];
240 let mut link_ranges: Vec<Range<usize>> = vec![];
241
242 loop {
243 if self.eof() {
244 break;
245 }
246
247 let (current, _) = self.current().unwrap();
248 let prev_len = text.len();
249 match current {
250 Event::SoftBreak => {
251 if should_complete_on_soft_break {
252 break;
253 }
254 text.push(' ');
255 }
256
257 Event::HardBreak => {
258 text.push('\n');
259 }
260
261 // We want to ignore any inline HTML tags in the text but keep
262 // the text between them
263 Event::InlineHtml(_) => {}
264
265 Event::Text(t) => {
266 text.push_str(t.as_ref());
267 let mut style = MarkdownHighlightStyle::default();
268
269 if bold_depth > 0 {
270 style.weight = FontWeight::BOLD;
271 }
272
273 if italic_depth > 0 {
274 style.italic = true;
275 }
276
277 if strikethrough_depth > 0 {
278 style.strikethrough = true;
279 }
280
281 let last_run_len = if let Some(link) = link.clone() {
282 region_ranges.push(prev_len..text.len());
283 regions.push(ParsedRegion {
284 code: false,
285 link: Some(link),
286 });
287 style.link = true;
288 prev_len
289 } else {
290 // Manually scan for links
291 let mut finder = linkify::LinkFinder::new();
292 finder.kinds(&[linkify::LinkKind::Url]);
293 let mut last_link_len = prev_len;
294 for link in finder.links(t) {
295 let start = link.start();
296 let end = link.end();
297 let range = (prev_len + start)..(prev_len + end);
298 link_ranges.push(range.clone());
299 link_urls.push(link.as_str().to_string());
300
301 // If there is a style before we match a link, we have to add this to the highlighted ranges
302 if style != MarkdownHighlightStyle::default()
303 && last_link_len < link.start()
304 {
305 highlights.push((
306 last_link_len..link.start(),
307 MarkdownHighlight::Style(style.clone()),
308 ));
309 }
310
311 highlights.push((
312 range.clone(),
313 MarkdownHighlight::Style(MarkdownHighlightStyle {
314 underline: true,
315 ..style
316 }),
317 ));
318 region_ranges.push(range.clone());
319 regions.push(ParsedRegion {
320 code: false,
321 link: Some(Link::Web {
322 url: link.as_str().to_string(),
323 }),
324 });
325 last_link_len = end;
326 }
327 last_link_len
328 };
329
330 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
331 let mut new_highlight = true;
332 if let Some((last_range, last_style)) = highlights.last_mut()
333 && last_range.end == last_run_len
334 && last_style == &MarkdownHighlight::Style(style.clone())
335 {
336 last_range.end = text.len();
337 new_highlight = false;
338 }
339 if new_highlight {
340 highlights.push((
341 last_run_len..text.len(),
342 MarkdownHighlight::Style(style.clone()),
343 ));
344 }
345 }
346 }
347 Event::Code(t) => {
348 text.push_str(t.as_ref());
349 region_ranges.push(prev_len..text.len());
350
351 if link.is_some() {
352 highlights.push((
353 prev_len..text.len(),
354 MarkdownHighlight::Style(MarkdownHighlightStyle {
355 link: true,
356 ..Default::default()
357 }),
358 ));
359 }
360 regions.push(ParsedRegion {
361 code: true,
362 link: link.clone(),
363 });
364 }
365 Event::Start(tag) => match tag {
366 Tag::Emphasis => italic_depth += 1,
367 Tag::Strong => bold_depth += 1,
368 Tag::Strikethrough => strikethrough_depth += 1,
369 Tag::Link { dest_url, .. } => {
370 link = Link::identify(
371 self.file_location_directory.clone(),
372 dest_url.to_string(),
373 );
374 }
375 Tag::Image { dest_url, .. } => {
376 if !text.is_empty() {
377 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
378 source_range: source_range.clone(),
379 contents: text.into(),
380 highlights: highlights.clone(),
381 region_ranges: region_ranges.clone(),
382 regions: regions.clone(),
383 });
384 text = String::new();
385 highlights = vec![];
386 region_ranges = vec![];
387 regions = vec![];
388 markdown_text_like.push(parsed_regions);
389 }
390 image = Image::identify(
391 dest_url.to_string(),
392 source_range.clone(),
393 self.file_location_directory.clone(),
394 );
395 }
396 _ => {
397 break;
398 }
399 },
400
401 Event::End(tag) => match tag {
402 TagEnd::Emphasis => italic_depth -= 1,
403 TagEnd::Strong => bold_depth -= 1,
404 TagEnd::Strikethrough => strikethrough_depth -= 1,
405 TagEnd::Link => {
406 link = None;
407 }
408 TagEnd::Image => {
409 if let Some(mut image) = image.take() {
410 if !text.is_empty() {
411 image.set_alt_text(std::mem::take(&mut text).into());
412 mem::take(&mut highlights);
413 mem::take(&mut region_ranges);
414 mem::take(&mut regions);
415 }
416 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
417 }
418 }
419 TagEnd::Paragraph => {
420 self.cursor += 1;
421 break;
422 }
423 _ => {
424 break;
425 }
426 },
427 _ => {
428 break;
429 }
430 }
431
432 self.cursor += 1;
433 }
434 if !text.is_empty() {
435 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
436 source_range,
437 contents: text.into(),
438 highlights,
439 regions,
440 region_ranges,
441 }));
442 }
443 markdown_text_like
444 }
445
446 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
447 let (_event, source_range) = self.previous().unwrap();
448 let source_range = source_range.clone();
449 let text = self.parse_text(true, None);
450
451 // Advance past the heading end tag
452 self.cursor += 1;
453
454 ParsedMarkdownHeading {
455 source_range,
456 level: match level {
457 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
458 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
459 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
460 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
461 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
462 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
463 },
464 contents: text,
465 }
466 }
467
468 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
469 let (_event, source_range) = self.previous().unwrap();
470 let source_range = source_range.clone();
471 let mut header = ParsedMarkdownTableRow::new();
472 let mut body = vec![];
473 let mut current_row = vec![];
474 let mut in_header = true;
475 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
476
477 loop {
478 if self.eof() {
479 break;
480 }
481
482 let (current, source_range) = self.current().unwrap();
483 let source_range = source_range.clone();
484 match current {
485 Event::Start(Tag::TableHead)
486 | Event::Start(Tag::TableRow)
487 | Event::End(TagEnd::TableCell) => {
488 self.cursor += 1;
489 }
490 Event::Start(Tag::TableCell) => {
491 self.cursor += 1;
492 let cell_contents = self.parse_text(false, Some(source_range));
493 current_row.push(cell_contents);
494 }
495 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
496 self.cursor += 1;
497 let new_row = std::mem::take(&mut current_row);
498 if in_header {
499 header.children = new_row;
500 in_header = false;
501 } else {
502 let row = ParsedMarkdownTableRow::with_children(new_row);
503 body.push(row);
504 }
505 }
506 Event::End(TagEnd::Table) => {
507 self.cursor += 1;
508 break;
509 }
510 _ => {
511 break;
512 }
513 }
514 }
515
516 ParsedMarkdownTable {
517 source_range,
518 header,
519 body,
520 column_alignments,
521 }
522 }
523
524 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
525 match alignment {
526 Alignment::None => ParsedMarkdownTableAlignment::None,
527 Alignment::Left => ParsedMarkdownTableAlignment::Left,
528 Alignment::Center => ParsedMarkdownTableAlignment::Center,
529 Alignment::Right => ParsedMarkdownTableAlignment::Right,
530 }
531 }
532
533 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
534 let (_, list_source_range) = self.previous().unwrap();
535
536 let mut items = Vec::new();
537 let mut items_stack = vec![MarkdownListItem::default()];
538 let mut depth = 1;
539 let mut order = order;
540 let mut order_stack = Vec::new();
541
542 let mut insertion_indices = FxHashMap::default();
543 let mut source_ranges = FxHashMap::default();
544 let mut start_item_range = list_source_range.clone();
545
546 while !self.eof() {
547 let (current, source_range) = self.current().unwrap();
548 match current {
549 Event::Start(Tag::List(new_order)) => {
550 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
551 insertion_indices.insert(depth, items.len());
552 }
553
554 // We will use the start of the nested list as the end for the current item's range,
555 // because we don't care about the hierarchy of list items
556 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
557 e.insert(start_item_range.start..source_range.start);
558 }
559
560 order_stack.push(order);
561 order = *new_order;
562 self.cursor += 1;
563 depth += 1;
564 }
565 Event::End(TagEnd::List(_)) => {
566 order = order_stack.pop().flatten();
567 self.cursor += 1;
568 depth -= 1;
569
570 if depth == 0 {
571 break;
572 }
573 }
574 Event::Start(Tag::Item) => {
575 start_item_range = source_range.clone();
576
577 self.cursor += 1;
578 items_stack.push(MarkdownListItem::default());
579
580 let mut task_list = None;
581 // Check for task list marker (`- [ ]` or `- [x]`)
582 if let Some(event) = self.current_event() {
583 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
584 if event == &Event::Start(Tag::Paragraph) {
585 self.cursor += 1;
586 }
587
588 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
589 task_list = Some((*checked, range.clone()));
590 self.cursor += 1;
591 }
592 }
593
594 if let Some((event, range)) = self.current() {
595 // This is a plain list item.
596 // For example `- some text` or `1. [Docs](./docs.md)`
597 if MarkdownParser::is_text_like(event) {
598 let text = self.parse_text(false, Some(range.clone()));
599 let block = ParsedMarkdownElement::Paragraph(text);
600 if let Some(content) = items_stack.last_mut() {
601 let item_type = if let Some((checked, range)) = task_list {
602 ParsedMarkdownListItemType::Task(checked, range)
603 } else if let Some(order) = order {
604 ParsedMarkdownListItemType::Ordered(order)
605 } else {
606 ParsedMarkdownListItemType::Unordered
607 };
608 content.item_type = item_type;
609 content.content.push(block);
610 }
611 } else {
612 let block = self.parse_block().await;
613 if let Some(block) = block
614 && let Some(list_item) = items_stack.last_mut()
615 {
616 list_item.content.extend(block);
617 }
618 }
619 }
620
621 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
622 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
623 self.cursor += 1;
624 }
625 }
626 Event::End(TagEnd::Item) => {
627 self.cursor += 1;
628
629 if let Some(current) = order {
630 order = Some(current + 1);
631 }
632
633 if let Some(list_item) = items_stack.pop() {
634 let source_range = source_ranges
635 .remove(&depth)
636 .unwrap_or(start_item_range.clone());
637
638 // We need to remove the last character of the source range, because it includes the newline character
639 let source_range = source_range.start..source_range.end - 1;
640 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
641 source_range,
642 content: list_item.content,
643 depth,
644 item_type: list_item.item_type,
645 });
646
647 if let Some(index) = insertion_indices.get(&depth) {
648 items.insert(*index, item);
649 insertion_indices.remove(&depth);
650 } else {
651 items.push(item);
652 }
653 }
654 }
655 _ => {
656 if depth == 0 {
657 break;
658 }
659 // This can only happen if a list item starts with more then one paragraph,
660 // or the list item contains blocks that should be rendered after the nested list items
661 let block = self.parse_block().await;
662 if let Some(block) = block {
663 if let Some(list_item) = items_stack.last_mut() {
664 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
665 if !insertion_indices.contains_key(&depth) {
666 list_item.content.extend(block);
667 continue;
668 }
669 }
670
671 // Otherwise we need to insert the block after all the nested items
672 // that have been parsed so far
673 items.extend(block);
674 } else {
675 self.cursor += 1;
676 }
677 }
678 }
679 }
680
681 items
682 }
683
684 #[async_recursion]
685 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
686 let (_event, source_range) = self.previous().unwrap();
687 let source_range = source_range.clone();
688 let mut nested_depth = 1;
689
690 let mut children: Vec<ParsedMarkdownElement> = vec![];
691
692 while !self.eof() {
693 let block = self.parse_block().await;
694
695 if let Some(block) = block {
696 children.extend(block);
697 } else {
698 break;
699 }
700
701 if self.eof() {
702 break;
703 }
704
705 let (current, _source_range) = self.current().unwrap();
706 match current {
707 // This is a nested block quote.
708 // Record that we're in a nested block quote and continue parsing.
709 // We don't need to advance the cursor since the next
710 // call to `parse_block` will handle it.
711 Event::Start(Tag::BlockQuote(_kind)) => {
712 nested_depth += 1;
713 }
714 Event::End(TagEnd::BlockQuote(_kind)) => {
715 nested_depth -= 1;
716 if nested_depth == 0 {
717 self.cursor += 1;
718 break;
719 }
720 }
721 _ => {}
722 };
723 }
724
725 ParsedMarkdownBlockQuote {
726 source_range,
727 children,
728 }
729 }
730
731 async fn parse_code_block(
732 &mut self,
733 language: Option<String>,
734 ) -> Option<ParsedMarkdownCodeBlock> {
735 let Some((_event, source_range)) = self.previous() else {
736 return None;
737 };
738
739 let source_range = source_range.clone();
740 let mut code = String::new();
741
742 while !self.eof() {
743 let Some((current, _source_range)) = self.current() else {
744 break;
745 };
746
747 match current {
748 Event::Text(text) => {
749 code.push_str(text);
750 self.cursor += 1;
751 }
752 Event::End(TagEnd::CodeBlock) => {
753 self.cursor += 1;
754 break;
755 }
756 _ => {
757 break;
758 }
759 }
760 }
761
762 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
763
764 let highlights = if let Some(language) = &language {
765 if let Some(registry) = &self.language_registry {
766 let rope: language::Rope = code.as_str().into();
767 registry
768 .language_for_name_or_extension(language)
769 .await
770 .map(|l| l.highlight_text(&rope, 0..code.len()))
771 .ok()
772 } else {
773 None
774 }
775 } else {
776 None
777 };
778
779 Some(ParsedMarkdownCodeBlock {
780 source_range,
781 contents: code.into(),
782 language,
783 highlights,
784 })
785 }
786
787 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
788 let mut elements = Vec::new();
789 let Some((_event, _source_range)) = self.previous() else {
790 return elements;
791 };
792
793 let mut html_source_range_start = None;
794 let mut html_source_range_end = None;
795 let mut html_buffer = String::new();
796
797 while !self.eof() {
798 let Some((current, source_range)) = self.current() else {
799 break;
800 };
801 let source_range = source_range.clone();
802 match current {
803 Event::Html(html) => {
804 html_source_range_start.get_or_insert(source_range.start);
805 html_source_range_end = Some(source_range.end);
806 html_buffer.push_str(html);
807 self.cursor += 1;
808 }
809 Event::End(TagEnd::CodeBlock) => {
810 self.cursor += 1;
811 break;
812 }
813 _ => {
814 break;
815 }
816 }
817 }
818
819 let bytes = cleanup_html(&html_buffer);
820
821 let mut cursor = std::io::Cursor::new(bytes);
822 if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
823 .from_utf8()
824 .read_from(&mut cursor)
825 && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
826 {
827 self.parse_html_node(start..end, &dom.document, &mut elements);
828 }
829
830 elements
831 }
832
833 fn parse_html_node(
834 &self,
835 source_range: Range<usize>,
836 node: &Rc<markup5ever_rcdom::Node>,
837 elements: &mut Vec<ParsedMarkdownElement>,
838 ) {
839 match &node.data {
840 markup5ever_rcdom::NodeData::Document => {
841 self.consume_children(source_range, node, elements);
842 }
843 markup5ever_rcdom::NodeData::Text { contents } => {
844 elements.push(ParsedMarkdownElement::Paragraph(vec![
845 MarkdownParagraphChunk::Text(ParsedMarkdownText {
846 source_range,
847 regions: Vec::default(),
848 region_ranges: Vec::default(),
849 highlights: Vec::default(),
850 contents: contents.borrow().to_string().into(),
851 }),
852 ]));
853 }
854 markup5ever_rcdom::NodeData::Comment { .. } => {}
855 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
856 if local_name!("img") == name.local {
857 if let Some(image) = self.extract_image(source_range, attrs) {
858 elements.push(ParsedMarkdownElement::Image(image));
859 }
860 } else if local_name!("p") == name.local {
861 let mut paragraph = MarkdownParagraph::new();
862 self.parse_paragraph(source_range, node, &mut paragraph);
863
864 if !paragraph.is_empty() {
865 elements.push(ParsedMarkdownElement::Paragraph(paragraph));
866 }
867 } else if matches!(
868 name.local,
869 local_name!("h1")
870 | local_name!("h2")
871 | local_name!("h3")
872 | local_name!("h4")
873 | local_name!("h5")
874 | local_name!("h6")
875 ) {
876 let mut paragraph = MarkdownParagraph::new();
877 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
878
879 if !paragraph.is_empty() {
880 elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
881 source_range,
882 level: match name.local {
883 local_name!("h1") => HeadingLevel::H1,
884 local_name!("h2") => HeadingLevel::H2,
885 local_name!("h3") => HeadingLevel::H3,
886 local_name!("h4") => HeadingLevel::H4,
887 local_name!("h5") => HeadingLevel::H5,
888 local_name!("h6") => HeadingLevel::H6,
889 _ => unreachable!(),
890 },
891 contents: paragraph,
892 }));
893 }
894 } else if local_name!("blockquote") == name.local {
895 if let Some(blockquote) = self.extract_html_blockquote(node, source_range) {
896 elements.push(ParsedMarkdownElement::BlockQuote(blockquote));
897 }
898 } else if local_name!("table") == name.local {
899 if let Some(table) = self.extract_html_table(node, source_range) {
900 elements.push(ParsedMarkdownElement::Table(table));
901 }
902 } else {
903 self.consume_children(source_range, node, elements);
904 }
905 }
906 _ => {}
907 }
908 }
909
910 fn parse_paragraph(
911 &self,
912 source_range: Range<usize>,
913 node: &Rc<markup5ever_rcdom::Node>,
914 paragraph: &mut MarkdownParagraph,
915 ) {
916 match &node.data {
917 markup5ever_rcdom::NodeData::Text { contents } => {
918 paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
919 source_range,
920 regions: Vec::default(),
921 region_ranges: Vec::default(),
922 highlights: Vec::default(),
923 contents: contents.borrow().to_string().into(),
924 }));
925 }
926 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
927 if local_name!("img") == name.local {
928 if let Some(image) = self.extract_image(source_range, attrs) {
929 paragraph.push(MarkdownParagraphChunk::Image(image));
930 }
931 } else {
932 self.consume_paragraph(source_range, node, paragraph);
933 }
934 }
935 _ => {}
936 }
937 }
938
939 fn consume_paragraph(
940 &self,
941 source_range: Range<usize>,
942 node: &Rc<markup5ever_rcdom::Node>,
943 paragraph: &mut MarkdownParagraph,
944 ) {
945 for node in node.children.borrow().iter() {
946 self.parse_paragraph(source_range.clone(), node, paragraph);
947 }
948 }
949
950 fn consume_children(
951 &self,
952 source_range: Range<usize>,
953 node: &Rc<markup5ever_rcdom::Node>,
954 elements: &mut Vec<ParsedMarkdownElement>,
955 ) {
956 for node in node.children.borrow().iter() {
957 self.parse_html_node(source_range.clone(), node, elements);
958 }
959 }
960
961 fn attr_value(
962 attrs: &RefCell<Vec<html5ever::Attribute>>,
963 name: html5ever::LocalName,
964 ) -> Option<String> {
965 attrs.borrow().iter().find_map(|attr| {
966 if attr.name.local == name {
967 Some(attr.value.to_string())
968 } else {
969 None
970 }
971 })
972 }
973
974 fn extract_styles_from_attributes(
975 attrs: &RefCell<Vec<html5ever::Attribute>>,
976 ) -> HashMap<String, String> {
977 let mut styles = HashMap::new();
978
979 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
980 for decl in style.split(';') {
981 let mut parts = decl.splitn(2, ':');
982 if let Some((key, value)) = parts.next().zip(parts.next()) {
983 styles.insert(
984 key.trim().to_lowercase().to_string(),
985 value.trim().to_string(),
986 );
987 }
988 }
989 }
990
991 styles
992 }
993
994 fn extract_image(
995 &self,
996 source_range: Range<usize>,
997 attrs: &RefCell<Vec<html5ever::Attribute>>,
998 ) -> Option<Image> {
999 let src = Self::attr_value(attrs, local_name!("src"))?;
1000
1001 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
1002
1003 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
1004 image.set_alt_text(alt.into());
1005 }
1006
1007 let styles = Self::extract_styles_from_attributes(attrs);
1008
1009 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
1010 .or_else(|| styles.get("width").cloned())
1011 .and_then(|width| Self::parse_html_element_dimension(&width))
1012 {
1013 image.set_width(width);
1014 }
1015
1016 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
1017 .or_else(|| styles.get("height").cloned())
1018 .and_then(|height| Self::parse_html_element_dimension(&height))
1019 {
1020 image.set_height(height);
1021 }
1022
1023 Some(image)
1024 }
1025
1026 fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
1027 if value.ends_with("%") {
1028 value
1029 .trim_end_matches("%")
1030 .parse::<f32>()
1031 .ok()
1032 .map(|value| relative(value / 100.))
1033 } else {
1034 value
1035 .trim_end_matches("px")
1036 .parse()
1037 .ok()
1038 .map(|value| px(value).into())
1039 }
1040 }
1041
1042 fn extract_html_blockquote(
1043 &self,
1044 node: &Rc<markup5ever_rcdom::Node>,
1045 source_range: Range<usize>,
1046 ) -> Option<ParsedMarkdownBlockQuote> {
1047 let mut children = Vec::new();
1048 self.consume_children(source_range.clone(), node, &mut children);
1049
1050 if children.is_empty() {
1051 None
1052 } else {
1053 Some(ParsedMarkdownBlockQuote {
1054 children,
1055 source_range,
1056 })
1057 }
1058 }
1059
1060 fn extract_html_table(
1061 &self,
1062 node: &Rc<markup5ever_rcdom::Node>,
1063 source_range: Range<usize>,
1064 ) -> Option<ParsedMarkdownTable> {
1065 let mut header_columns = Vec::new();
1066 let mut body_rows = Vec::new();
1067
1068 // node should be a thead or tbody element
1069 for node in node.children.borrow().iter() {
1070 match &node.data {
1071 markup5ever_rcdom::NodeData::Element { name, .. } => {
1072 if local_name!("thead") == name.local {
1073 // node should be a tr element
1074 for node in node.children.borrow().iter() {
1075 let mut paragraph = MarkdownParagraph::new();
1076 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
1077
1078 for paragraph in paragraph.into_iter() {
1079 header_columns.push(vec![paragraph]);
1080 }
1081 }
1082 } else if local_name!("tbody") == name.local {
1083 // node should be a tr element
1084 for node in node.children.borrow().iter() {
1085 let mut row = MarkdownParagraph::new();
1086 self.consume_paragraph(source_range.clone(), node, &mut row);
1087 body_rows.push(ParsedMarkdownTableRow::with_children(
1088 row.into_iter().map(|column| vec![column]).collect(),
1089 ));
1090 }
1091 }
1092 }
1093 _ => {}
1094 }
1095 }
1096
1097 if !header_columns.is_empty() || !body_rows.is_empty() {
1098 Some(ParsedMarkdownTable {
1099 source_range,
1100 body: body_rows,
1101 column_alignments: Vec::default(),
1102 header: ParsedMarkdownTableRow::with_children(header_columns),
1103 })
1104 } else {
1105 None
1106 }
1107 }
1108}
1109
1110#[cfg(test)]
1111mod tests {
1112 use super::*;
1113 use ParsedMarkdownListItemType::*;
1114 use core::panic;
1115 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1116 use language::{
1117 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1118 };
1119 use pretty_assertions::assert_eq;
1120
1121 async fn parse(input: &str) -> ParsedMarkdown {
1122 parse_markdown(input, None, None).await
1123 }
1124
1125 #[gpui::test]
1126 async fn test_headings() {
1127 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1128
1129 assert_eq!(
1130 parsed.children,
1131 vec![
1132 h1(text("Heading one", 2..13), 0..14),
1133 h2(text("Heading two", 17..28), 14..29),
1134 h3(text("Heading three", 33..46), 29..46),
1135 ]
1136 );
1137 }
1138
1139 #[gpui::test]
1140 async fn test_newlines_dont_new_paragraphs() {
1141 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1142
1143 assert_eq!(
1144 parsed.children,
1145 vec![p("Some text that is bolded and italicized", 0..46)]
1146 );
1147 }
1148
1149 #[gpui::test]
1150 async fn test_heading_with_paragraph() {
1151 let parsed = parse("# Zed\nThe editor").await;
1152
1153 assert_eq!(
1154 parsed.children,
1155 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1156 );
1157 }
1158
1159 #[gpui::test]
1160 async fn test_double_newlines_do_new_paragraphs() {
1161 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1162
1163 assert_eq!(
1164 parsed.children,
1165 vec![
1166 p("Some text that is bolded", 0..29),
1167 p("and italicized", 31..47),
1168 ]
1169 );
1170 }
1171
1172 #[gpui::test]
1173 async fn test_bold_italic_text() {
1174 let parsed = parse("Some text **that is bolded** and *italicized*").await;
1175
1176 assert_eq!(
1177 parsed.children,
1178 vec![p("Some text that is bolded and italicized", 0..45)]
1179 );
1180 }
1181
1182 #[gpui::test]
1183 async fn test_nested_bold_strikethrough_text() {
1184 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1185
1186 assert_eq!(parsed.children.len(), 1);
1187 assert_eq!(
1188 parsed.children[0],
1189 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1190 ParsedMarkdownText {
1191 source_range: 0..35,
1192 contents: "Some bostrikethroughld text".into(),
1193 highlights: Vec::new(),
1194 region_ranges: Vec::new(),
1195 regions: Vec::new(),
1196 }
1197 )])
1198 );
1199
1200 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1201 text
1202 } else {
1203 panic!("Expected a paragraph");
1204 };
1205
1206 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1207 text
1208 } else {
1209 panic!("Expected a text");
1210 };
1211
1212 assert_eq!(
1213 paragraph.highlights,
1214 vec![
1215 (
1216 5..7,
1217 MarkdownHighlight::Style(MarkdownHighlightStyle {
1218 weight: FontWeight::BOLD,
1219 ..Default::default()
1220 }),
1221 ),
1222 (
1223 7..20,
1224 MarkdownHighlight::Style(MarkdownHighlightStyle {
1225 weight: FontWeight::BOLD,
1226 strikethrough: true,
1227 ..Default::default()
1228 }),
1229 ),
1230 (
1231 20..22,
1232 MarkdownHighlight::Style(MarkdownHighlightStyle {
1233 weight: FontWeight::BOLD,
1234 ..Default::default()
1235 }),
1236 ),
1237 ]
1238 );
1239 }
1240
1241 #[gpui::test]
1242 async fn test_text_with_inline_html() {
1243 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1244
1245 assert_eq!(
1246 parsed.children,
1247 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1248 );
1249 }
1250
1251 #[gpui::test]
1252 async fn test_raw_links_detection() {
1253 let parsed = parse("Checkout this https://zed.dev link").await;
1254
1255 assert_eq!(
1256 parsed.children,
1257 vec![p("Checkout this https://zed.dev link", 0..34)]
1258 );
1259 }
1260
1261 #[gpui::test]
1262 async fn test_empty_image() {
1263 let parsed = parse("![]()").await;
1264
1265 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1266 text
1267 } else {
1268 panic!("Expected a paragraph");
1269 };
1270 assert_eq!(paragraph.len(), 0);
1271 }
1272
1273 #[gpui::test]
1274 async fn test_image_links_detection() {
1275 let parsed = parse("").await;
1276
1277 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1278 text
1279 } else {
1280 panic!("Expected a paragraph");
1281 };
1282 assert_eq!(
1283 paragraph[0],
1284 MarkdownParagraphChunk::Image(Image {
1285 source_range: 0..111,
1286 link: Link::Web {
1287 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1288 },
1289 alt_text: Some("test".into()),
1290 height: None,
1291 width: None,
1292 },)
1293 );
1294 }
1295
1296 #[gpui::test]
1297 async fn test_image_alt_text() {
1298 let parsed = parse("[](https://zed.dev)\n ").await;
1299
1300 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1301 text
1302 } else {
1303 panic!("Expected a paragraph");
1304 };
1305 assert_eq!(
1306 paragraph[0],
1307 MarkdownParagraphChunk::Image(Image {
1308 source_range: 0..142,
1309 link: Link::Web {
1310 url: "https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/zed-industries/zed/main/assets/badge/v0.json".to_string(),
1311 },
1312 alt_text: Some("Zed".into()),
1313 height: None,
1314 width: None,
1315 },)
1316 );
1317 }
1318
1319 #[gpui::test]
1320 async fn test_image_without_alt_text() {
1321 let parsed = parse("").await;
1322
1323 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1324 text
1325 } else {
1326 panic!("Expected a paragraph");
1327 };
1328 assert_eq!(
1329 paragraph[0],
1330 MarkdownParagraphChunk::Image(Image {
1331 source_range: 0..31,
1332 link: Link::Web {
1333 url: "http://example.com/foo.png".to_string(),
1334 },
1335 alt_text: None,
1336 height: None,
1337 width: None,
1338 },)
1339 );
1340 }
1341
1342 #[gpui::test]
1343 async fn test_image_with_alt_text_containing_formatting() {
1344 let parsed = parse("").await;
1345
1346 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1347 panic!("Expected a paragraph");
1348 };
1349 assert_eq!(
1350 chunks,
1351 &[MarkdownParagraphChunk::Image(Image {
1352 source_range: 0..44,
1353 link: Link::Web {
1354 url: "http://example.com/foo.png".to_string(),
1355 },
1356 alt_text: Some("foo bar baz".into()),
1357 height: None,
1358 width: None,
1359 }),],
1360 );
1361 }
1362
1363 #[gpui::test]
1364 async fn test_images_with_text_in_between() {
1365 let parsed = parse(
1366 "\nLorem Ipsum\n",
1367 )
1368 .await;
1369
1370 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1371 text
1372 } else {
1373 panic!("Expected a paragraph");
1374 };
1375 assert_eq!(
1376 chunks,
1377 &vec![
1378 MarkdownParagraphChunk::Image(Image {
1379 source_range: 0..81,
1380 link: Link::Web {
1381 url: "http://example.com/foo.png".to_string(),
1382 },
1383 alt_text: Some("foo".into()),
1384 height: None,
1385 width: None,
1386 }),
1387 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1388 source_range: 0..81,
1389 contents: " Lorem Ipsum ".into(),
1390 highlights: Vec::new(),
1391 region_ranges: Vec::new(),
1392 regions: Vec::new(),
1393 }),
1394 MarkdownParagraphChunk::Image(Image {
1395 source_range: 0..81,
1396 link: Link::Web {
1397 url: "http://example.com/bar.png".to_string(),
1398 },
1399 alt_text: Some("bar".into()),
1400 height: None,
1401 width: None,
1402 })
1403 ]
1404 );
1405 }
1406
1407 #[test]
1408 fn test_parse_html_element_dimension() {
1409 // Test percentage values
1410 assert_eq!(
1411 MarkdownParser::parse_html_element_dimension("50%"),
1412 Some(DefiniteLength::Fraction(0.5))
1413 );
1414 assert_eq!(
1415 MarkdownParser::parse_html_element_dimension("100%"),
1416 Some(DefiniteLength::Fraction(1.0))
1417 );
1418 assert_eq!(
1419 MarkdownParser::parse_html_element_dimension("25%"),
1420 Some(DefiniteLength::Fraction(0.25))
1421 );
1422 assert_eq!(
1423 MarkdownParser::parse_html_element_dimension("0%"),
1424 Some(DefiniteLength::Fraction(0.0))
1425 );
1426
1427 // Test pixel values
1428 assert_eq!(
1429 MarkdownParser::parse_html_element_dimension("100px"),
1430 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1431 );
1432 assert_eq!(
1433 MarkdownParser::parse_html_element_dimension("50px"),
1434 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1435 );
1436 assert_eq!(
1437 MarkdownParser::parse_html_element_dimension("0px"),
1438 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1439 );
1440
1441 // Test values without units (should be treated as pixels)
1442 assert_eq!(
1443 MarkdownParser::parse_html_element_dimension("100"),
1444 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1445 );
1446 assert_eq!(
1447 MarkdownParser::parse_html_element_dimension("42"),
1448 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1449 );
1450
1451 // Test invalid values
1452 assert_eq!(
1453 MarkdownParser::parse_html_element_dimension("invalid"),
1454 None
1455 );
1456 assert_eq!(MarkdownParser::parse_html_element_dimension("px"), None);
1457 assert_eq!(MarkdownParser::parse_html_element_dimension("%"), None);
1458 assert_eq!(MarkdownParser::parse_html_element_dimension(""), None);
1459 assert_eq!(MarkdownParser::parse_html_element_dimension("abc%"), None);
1460 assert_eq!(MarkdownParser::parse_html_element_dimension("abcpx"), None);
1461
1462 // Test decimal values
1463 assert_eq!(
1464 MarkdownParser::parse_html_element_dimension("50.5%"),
1465 Some(DefiniteLength::Fraction(0.505))
1466 );
1467 assert_eq!(
1468 MarkdownParser::parse_html_element_dimension("100.25px"),
1469 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1470 );
1471 assert_eq!(
1472 MarkdownParser::parse_html_element_dimension("42.0"),
1473 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1474 );
1475 }
1476
1477 #[gpui::test]
1478 async fn test_inline_html_image_tag() {
1479 let parsed =
1480 parse("<p>Some text<img src=\"http://example.com/foo.png\" /> some more text</p>")
1481 .await;
1482
1483 assert_eq!(
1484 ParsedMarkdown {
1485 children: vec![ParsedMarkdownElement::Paragraph(vec![
1486 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1487 source_range: 0..71,
1488 contents: "Some text".into(),
1489 highlights: Default::default(),
1490 region_ranges: Default::default(),
1491 regions: Default::default()
1492 }),
1493 MarkdownParagraphChunk::Image(Image {
1494 source_range: 0..71,
1495 link: Link::Web {
1496 url: "http://example.com/foo.png".to_string(),
1497 },
1498 alt_text: None,
1499 height: None,
1500 width: None,
1501 }),
1502 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1503 source_range: 0..71,
1504 contents: " some more text".into(),
1505 highlights: Default::default(),
1506 region_ranges: Default::default(),
1507 regions: Default::default()
1508 }),
1509 ])]
1510 },
1511 parsed
1512 );
1513 }
1514
1515 #[gpui::test]
1516 async fn test_html_block_quote() {
1517 let parsed = parse(
1518 "<blockquote>
1519 <p>some description</p>
1520 </blockquote>",
1521 )
1522 .await;
1523
1524 assert_eq!(
1525 ParsedMarkdown {
1526 children: vec![block_quote(
1527 vec![ParsedMarkdownElement::Paragraph(text(
1528 "some description",
1529 0..76
1530 ))],
1531 0..76,
1532 )]
1533 },
1534 parsed
1535 );
1536 }
1537
1538 #[gpui::test]
1539 async fn test_html_nested_block_quote() {
1540 let parsed = parse(
1541 "<blockquote>
1542 <p>some description</p>
1543 <blockquote>
1544 <p>second description</p>
1545 </blockquote>
1546 </blockquote>",
1547 )
1548 .await;
1549
1550 assert_eq!(
1551 ParsedMarkdown {
1552 children: vec![block_quote(
1553 vec![
1554 ParsedMarkdownElement::Paragraph(text("some description", 0..173)),
1555 block_quote(
1556 vec![ParsedMarkdownElement::Paragraph(text(
1557 "second description",
1558 0..173
1559 ))],
1560 0..173,
1561 )
1562 ],
1563 0..173,
1564 )]
1565 },
1566 parsed
1567 );
1568 }
1569
1570 #[gpui::test]
1571 async fn test_html_table() {
1572 let parsed = parse(
1573 "<table>
1574 <thead>
1575 <tr>
1576 <th>Id</th>
1577 <th>Name</th>
1578 </tr>
1579 </thead>
1580 <tbody>
1581 <tr>
1582 <td>1</td>
1583 <td>Chris</td>
1584 </tr>
1585 <tr>
1586 <td>2</td>
1587 <td>Dennis</td>
1588 </tr>
1589 </tbody>
1590 </table>",
1591 )
1592 .await;
1593
1594 assert_eq!(
1595 ParsedMarkdown {
1596 children: vec![ParsedMarkdownElement::Table(table(
1597 0..366,
1598 row(vec![text("Id", 0..366), text("Name ", 0..366)]),
1599 vec![
1600 row(vec![text("1", 0..366), text("Chris", 0..366)]),
1601 row(vec![text("2", 0..366), text("Dennis", 0..366)]),
1602 ],
1603 ))],
1604 },
1605 parsed
1606 );
1607 }
1608
1609 #[gpui::test]
1610 async fn test_html_table_without_headings() {
1611 let parsed = parse(
1612 "<table>
1613 <tbody>
1614 <tr>
1615 <td>1</td>
1616 <td>Chris</td>
1617 </tr>
1618 <tr>
1619 <td>2</td>
1620 <td>Dennis</td>
1621 </tr>
1622 </tbody>
1623 </table>",
1624 )
1625 .await;
1626
1627 assert_eq!(
1628 ParsedMarkdown {
1629 children: vec![ParsedMarkdownElement::Table(table(
1630 0..240,
1631 row(vec![]),
1632 vec![
1633 row(vec![text("1", 0..240), text("Chris", 0..240)]),
1634 row(vec![text("2", 0..240), text("Dennis", 0..240)]),
1635 ],
1636 ))],
1637 },
1638 parsed
1639 );
1640 }
1641
1642 #[gpui::test]
1643 async fn test_html_table_without_body() {
1644 let parsed = parse(
1645 "<table>
1646 <thead>
1647 <tr>
1648 <th>Id</th>
1649 <th>Name</th>
1650 </tr>
1651 </thead>
1652 </table>",
1653 )
1654 .await;
1655
1656 assert_eq!(
1657 ParsedMarkdown {
1658 children: vec![ParsedMarkdownElement::Table(table(
1659 0..150,
1660 row(vec![text("Id", 0..150), text("Name", 0..150)]),
1661 vec![],
1662 ))],
1663 },
1664 parsed
1665 );
1666 }
1667
1668 #[gpui::test]
1669 async fn test_html_heading_tags() {
1670 let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
1671
1672 assert_eq!(
1673 ParsedMarkdown {
1674 children: vec![
1675 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1676 level: HeadingLevel::H1,
1677 source_range: 0..96,
1678 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1679 source_range: 0..96,
1680 contents: "Heading".into(),
1681 highlights: Vec::default(),
1682 region_ranges: Vec::default(),
1683 regions: Vec::default()
1684 })],
1685 }),
1686 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1687 level: HeadingLevel::H2,
1688 source_range: 0..96,
1689 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1690 source_range: 0..96,
1691 contents: "Heading".into(),
1692 highlights: Vec::default(),
1693 region_ranges: Vec::default(),
1694 regions: Vec::default()
1695 })],
1696 }),
1697 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1698 level: HeadingLevel::H3,
1699 source_range: 0..96,
1700 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1701 source_range: 0..96,
1702 contents: "Heading".into(),
1703 highlights: Vec::default(),
1704 region_ranges: Vec::default(),
1705 regions: Vec::default()
1706 })],
1707 }),
1708 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1709 level: HeadingLevel::H4,
1710 source_range: 0..96,
1711 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1712 source_range: 0..96,
1713 contents: "Heading".into(),
1714 highlights: Vec::default(),
1715 region_ranges: Vec::default(),
1716 regions: Vec::default()
1717 })],
1718 }),
1719 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1720 level: HeadingLevel::H5,
1721 source_range: 0..96,
1722 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1723 source_range: 0..96,
1724 contents: "Heading".into(),
1725 highlights: Vec::default(),
1726 region_ranges: Vec::default(),
1727 regions: Vec::default()
1728 })],
1729 }),
1730 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1731 level: HeadingLevel::H6,
1732 source_range: 0..96,
1733 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1734 source_range: 0..96,
1735 contents: "Heading".into(),
1736 highlights: Vec::default(),
1737 region_ranges: Vec::default(),
1738 regions: Vec::default()
1739 })],
1740 }),
1741 ],
1742 },
1743 parsed
1744 );
1745 }
1746
1747 #[gpui::test]
1748 async fn test_html_image_tag() {
1749 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
1750
1751 assert_eq!(
1752 ParsedMarkdown {
1753 children: vec![ParsedMarkdownElement::Image(Image {
1754 source_range: 0..40,
1755 link: Link::Web {
1756 url: "http://example.com/foo.png".to_string(),
1757 },
1758 alt_text: None,
1759 height: None,
1760 width: None,
1761 })]
1762 },
1763 parsed
1764 );
1765 }
1766
1767 #[gpui::test]
1768 async fn test_html_image_tag_with_alt_text() {
1769 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
1770
1771 assert_eq!(
1772 ParsedMarkdown {
1773 children: vec![ParsedMarkdownElement::Image(Image {
1774 source_range: 0..50,
1775 link: Link::Web {
1776 url: "http://example.com/foo.png".to_string(),
1777 },
1778 alt_text: Some("Foo".into()),
1779 height: None,
1780 width: None,
1781 })]
1782 },
1783 parsed
1784 );
1785 }
1786
1787 #[gpui::test]
1788 async fn test_html_image_tag_with_height_and_width() {
1789 let parsed =
1790 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
1791
1792 assert_eq!(
1793 ParsedMarkdown {
1794 children: vec![ParsedMarkdownElement::Image(Image {
1795 source_range: 0..65,
1796 link: Link::Web {
1797 url: "http://example.com/foo.png".to_string(),
1798 },
1799 alt_text: None,
1800 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1801 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1802 })]
1803 },
1804 parsed
1805 );
1806 }
1807
1808 #[gpui::test]
1809 async fn test_html_image_style_tag_with_height_and_width() {
1810 let parsed = parse(
1811 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
1812 )
1813 .await;
1814
1815 assert_eq!(
1816 ParsedMarkdown {
1817 children: vec![ParsedMarkdownElement::Image(Image {
1818 source_range: 0..75,
1819 link: Link::Web {
1820 url: "http://example.com/foo.png".to_string(),
1821 },
1822 alt_text: None,
1823 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1824 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1825 })]
1826 },
1827 parsed
1828 );
1829 }
1830
1831 #[gpui::test]
1832 async fn test_header_only_table() {
1833 let markdown = "\
1834| Header 1 | Header 2 |
1835|----------|----------|
1836
1837Some other content
1838";
1839
1840 let expected_table = table(
1841 0..48,
1842 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1843 vec![],
1844 );
1845
1846 assert_eq!(
1847 parse(markdown).await.children[0],
1848 ParsedMarkdownElement::Table(expected_table)
1849 );
1850 }
1851
1852 #[gpui::test]
1853 async fn test_basic_table() {
1854 let markdown = "\
1855| Header 1 | Header 2 |
1856|----------|----------|
1857| Cell 1 | Cell 2 |
1858| Cell 3 | Cell 4 |";
1859
1860 let expected_table = table(
1861 0..95,
1862 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1863 vec![
1864 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1865 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1866 ],
1867 );
1868
1869 assert_eq!(
1870 parse(markdown).await.children[0],
1871 ParsedMarkdownElement::Table(expected_table)
1872 );
1873 }
1874
1875 #[gpui::test]
1876 async fn test_list_basic() {
1877 let parsed = parse(
1878 "\
1879* Item 1
1880* Item 2
1881* Item 3
1882",
1883 )
1884 .await;
1885
1886 assert_eq!(
1887 parsed.children,
1888 vec![
1889 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1890 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1891 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1892 ],
1893 );
1894 }
1895
1896 #[gpui::test]
1897 async fn test_list_with_tasks() {
1898 let parsed = parse(
1899 "\
1900- [ ] TODO
1901- [x] Checked
1902",
1903 )
1904 .await;
1905
1906 assert_eq!(
1907 parsed.children,
1908 vec![
1909 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1910 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1911 ],
1912 );
1913 }
1914
1915 #[gpui::test]
1916 async fn test_list_with_indented_task() {
1917 let parsed = parse(
1918 "\
1919- [ ] TODO
1920 - [x] Checked
1921 - Unordered
1922 1. Number 1
1923 1. Number 2
19241. Number A
1925",
1926 )
1927 .await;
1928
1929 assert_eq!(
1930 parsed.children,
1931 vec![
1932 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1933 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1934 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1935 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1936 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1937 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1938 ],
1939 );
1940 }
1941
1942 #[gpui::test]
1943 async fn test_list_with_linebreak_is_handled_correctly() {
1944 let parsed = parse(
1945 "\
1946- [ ] Task 1
1947
1948- [x] Task 2
1949",
1950 )
1951 .await;
1952
1953 assert_eq!(
1954 parsed.children,
1955 vec![
1956 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1957 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1958 ],
1959 );
1960 }
1961
1962 #[gpui::test]
1963 async fn test_list_nested() {
1964 let parsed = parse(
1965 "\
1966* Item 1
1967* Item 2
1968* Item 3
1969
19701. Hello
19711. Two
1972 1. Three
19732. Four
19743. Five
1975
1976* First
1977 1. Hello
1978 1. Goodbyte
1979 - Inner
1980 - Inner
1981 2. Goodbyte
1982 - Next item empty
1983 -
1984* Last
1985",
1986 )
1987 .await;
1988
1989 assert_eq!(
1990 parsed.children,
1991 vec![
1992 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1993 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1994 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1995 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1996 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1997 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1998 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1999 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
2000 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
2001 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
2002 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
2003 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
2004 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
2005 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
2006 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
2007 list_item(186..190, 3, Unordered, vec![]),
2008 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
2009 ]
2010 );
2011 }
2012
2013 #[gpui::test]
2014 async fn test_list_with_nested_content() {
2015 let parsed = parse(
2016 "\
2017* This is a list item with two paragraphs.
2018
2019 This is the second paragraph in the list item.
2020",
2021 )
2022 .await;
2023
2024 assert_eq!(
2025 parsed.children,
2026 vec![list_item(
2027 0..96,
2028 1,
2029 Unordered,
2030 vec![
2031 p("This is a list item with two paragraphs.", 4..44),
2032 p("This is the second paragraph in the list item.", 50..97)
2033 ],
2034 ),],
2035 );
2036 }
2037
2038 #[gpui::test]
2039 async fn test_list_item_with_inline_html() {
2040 let parsed = parse(
2041 "\
2042* This is a list item with an inline HTML <sometag>tag</sometag>.
2043",
2044 )
2045 .await;
2046
2047 assert_eq!(
2048 parsed.children,
2049 vec![list_item(
2050 0..67,
2051 1,
2052 Unordered,
2053 vec![p("This is a list item with an inline HTML tag.", 4..44),],
2054 ),],
2055 );
2056 }
2057
2058 #[gpui::test]
2059 async fn test_nested_list_with_paragraph_inside() {
2060 let parsed = parse(
2061 "\
20621. a
2063 1. b
2064 1. c
2065
2066 text
2067
2068 1. d
2069",
2070 )
2071 .await;
2072
2073 assert_eq!(
2074 parsed.children,
2075 vec![
2076 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
2077 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
2078 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
2079 p("text", 32..37),
2080 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
2081 ],
2082 );
2083 }
2084
2085 #[gpui::test]
2086 async fn test_list_with_leading_text() {
2087 let parsed = parse(
2088 "\
2089* `code`
2090* **bold**
2091* [link](https://example.com)
2092",
2093 )
2094 .await;
2095
2096 assert_eq!(
2097 parsed.children,
2098 vec![
2099 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
2100 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
2101 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
2102 ],
2103 );
2104 }
2105
2106 #[gpui::test]
2107 async fn test_simple_block_quote() {
2108 let parsed = parse("> Simple block quote with **styled text**").await;
2109
2110 assert_eq!(
2111 parsed.children,
2112 vec![block_quote(
2113 vec![p("Simple block quote with styled text", 2..41)],
2114 0..41
2115 )]
2116 );
2117 }
2118
2119 #[gpui::test]
2120 async fn test_simple_block_quote_with_multiple_lines() {
2121 let parsed = parse(
2122 "\
2123> # Heading
2124> More
2125> text
2126>
2127> More text
2128",
2129 )
2130 .await;
2131
2132 assert_eq!(
2133 parsed.children,
2134 vec![block_quote(
2135 vec![
2136 h1(text("Heading", 4..11), 2..12),
2137 p("More text", 14..26),
2138 p("More text", 30..40)
2139 ],
2140 0..40
2141 )]
2142 );
2143 }
2144
2145 #[gpui::test]
2146 async fn test_nested_block_quote() {
2147 let parsed = parse(
2148 "\
2149> A
2150>
2151> > # B
2152>
2153> C
2154
2155More text
2156",
2157 )
2158 .await;
2159
2160 assert_eq!(
2161 parsed.children,
2162 vec![
2163 block_quote(
2164 vec![
2165 p("A", 2..4),
2166 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
2167 p("C", 18..20)
2168 ],
2169 0..20
2170 ),
2171 p("More text", 21..31)
2172 ]
2173 );
2174 }
2175
2176 #[gpui::test]
2177 async fn test_code_block() {
2178 let parsed = parse(
2179 "\
2180```
2181fn main() {
2182 return 0;
2183}
2184```
2185",
2186 )
2187 .await;
2188
2189 assert_eq!(
2190 parsed.children,
2191 vec![code_block(
2192 None,
2193 "fn main() {\n return 0;\n}",
2194 0..35,
2195 None
2196 )]
2197 );
2198 }
2199
2200 #[gpui::test]
2201 async fn test_code_block_with_language(executor: BackgroundExecutor) {
2202 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
2203 language_registry.add(rust_lang());
2204
2205 let parsed = parse_markdown(
2206 "\
2207```rust
2208fn main() {
2209 return 0;
2210}
2211```
2212",
2213 None,
2214 Some(language_registry),
2215 )
2216 .await;
2217
2218 assert_eq!(
2219 parsed.children,
2220 vec![code_block(
2221 Some("rust".to_string()),
2222 "fn main() {\n return 0;\n}",
2223 0..39,
2224 Some(vec![])
2225 )]
2226 );
2227 }
2228
2229 fn rust_lang() -> Arc<Language> {
2230 Arc::new(Language::new(
2231 LanguageConfig {
2232 name: "Rust".into(),
2233 matcher: LanguageMatcher {
2234 path_suffixes: vec!["rs".into()],
2235 ..Default::default()
2236 },
2237 collapsed_placeholder: " /* ... */ ".to_string(),
2238 ..Default::default()
2239 },
2240 Some(tree_sitter_rust::LANGUAGE.into()),
2241 ))
2242 }
2243
2244 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2245 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2246 source_range,
2247 level: HeadingLevel::H1,
2248 contents,
2249 })
2250 }
2251
2252 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2253 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2254 source_range,
2255 level: HeadingLevel::H2,
2256 contents,
2257 })
2258 }
2259
2260 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2261 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2262 source_range,
2263 level: HeadingLevel::H3,
2264 contents,
2265 })
2266 }
2267
2268 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
2269 ParsedMarkdownElement::Paragraph(text(contents, source_range))
2270 }
2271
2272 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
2273 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2274 highlights: Vec::new(),
2275 region_ranges: Vec::new(),
2276 regions: Vec::new(),
2277 source_range,
2278 contents: contents.to_string().into(),
2279 })]
2280 }
2281
2282 fn block_quote(
2283 children: Vec<ParsedMarkdownElement>,
2284 source_range: Range<usize>,
2285 ) -> ParsedMarkdownElement {
2286 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
2287 source_range,
2288 children,
2289 })
2290 }
2291
2292 fn code_block(
2293 language: Option<String>,
2294 code: &str,
2295 source_range: Range<usize>,
2296 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
2297 ) -> ParsedMarkdownElement {
2298 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
2299 source_range,
2300 language,
2301 contents: code.to_string().into(),
2302 highlights,
2303 })
2304 }
2305
2306 fn list_item(
2307 source_range: Range<usize>,
2308 depth: u16,
2309 item_type: ParsedMarkdownListItemType,
2310 content: Vec<ParsedMarkdownElement>,
2311 ) -> ParsedMarkdownElement {
2312 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
2313 source_range,
2314 item_type,
2315 depth,
2316 content,
2317 })
2318 }
2319
2320 fn table(
2321 source_range: Range<usize>,
2322 header: ParsedMarkdownTableRow,
2323 body: Vec<ParsedMarkdownTableRow>,
2324 ) -> ParsedMarkdownTable {
2325 ParsedMarkdownTable {
2326 column_alignments: Vec::new(),
2327 source_range,
2328 header,
2329 body,
2330 }
2331 }
2332
2333 fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
2334 ParsedMarkdownTableRow { children }
2335 }
2336
2337 impl PartialEq for ParsedMarkdownTable {
2338 fn eq(&self, other: &Self) -> bool {
2339 self.source_range == other.source_range
2340 && self.header == other.header
2341 && self.body == other.body
2342 }
2343 }
2344
2345 impl PartialEq for ParsedMarkdownText {
2346 fn eq(&self, other: &Self) -> bool {
2347 self.source_range == other.source_range && self.contents == other.contents
2348 }
2349 }
2350}