1use crate::{
2 markdown_elements::*,
3 markdown_minifier::{Minifier, MinifierOptions},
4};
5use async_recursion::async_recursion;
6use collections::FxHashMap;
7use gpui::{DefiniteLength, FontWeight, px, relative};
8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
9use language::LanguageRegistry;
10use markup5ever_rcdom::RcDom;
11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
12use std::{
13 cell::RefCell, collections::HashMap, mem, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec,
14};
15
16pub async fn parse_markdown(
17 markdown_input: &str,
18 file_location_directory: Option<PathBuf>,
19 language_registry: Option<Arc<LanguageRegistry>>,
20) -> ParsedMarkdown {
21 let mut options = Options::all();
22 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
23
24 let parser = Parser::new_ext(markdown_input, options);
25 let parser = MarkdownParser::new(
26 parser.into_offset_iter().collect(),
27 file_location_directory,
28 language_registry,
29 );
30 let renderer = parser.parse_document().await;
31 ParsedMarkdown {
32 children: renderer.parsed,
33 }
34}
35
36fn cleanup_html(source: &str) -> Vec<u8> {
37 let mut writer = std::io::Cursor::new(Vec::new());
38 let mut reader = std::io::Cursor::new(source);
39 let mut minify = Minifier::new(
40 &mut writer,
41 MinifierOptions {
42 omit_doctype: true,
43 collapse_whitespace: true,
44 ..Default::default()
45 },
46 );
47 if let Ok(()) = minify.minify(&mut reader) {
48 writer.into_inner()
49 } else {
50 source.bytes().collect()
51 }
52}
53
54struct MarkdownParser<'a> {
55 tokens: Vec<(Event<'a>, Range<usize>)>,
56 /// The current index in the tokens array
57 cursor: usize,
58 /// The blocks that we have successfully parsed so far
59 parsed: Vec<ParsedMarkdownElement>,
60 file_location_directory: Option<PathBuf>,
61 language_registry: Option<Arc<LanguageRegistry>>,
62}
63
64struct MarkdownListItem {
65 content: Vec<ParsedMarkdownElement>,
66 item_type: ParsedMarkdownListItemType,
67}
68
69impl Default for MarkdownListItem {
70 fn default() -> Self {
71 Self {
72 content: Vec::new(),
73 item_type: ParsedMarkdownListItemType::Unordered,
74 }
75 }
76}
77
78impl<'a> MarkdownParser<'a> {
79 fn new(
80 tokens: Vec<(Event<'a>, Range<usize>)>,
81 file_location_directory: Option<PathBuf>,
82 language_registry: Option<Arc<LanguageRegistry>>,
83 ) -> Self {
84 Self {
85 tokens,
86 file_location_directory,
87 language_registry,
88 cursor: 0,
89 parsed: vec![],
90 }
91 }
92
93 fn eof(&self) -> bool {
94 if self.tokens.is_empty() {
95 return true;
96 }
97 self.cursor >= self.tokens.len() - 1
98 }
99
100 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
101 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
102 return self.tokens.last();
103 }
104 self.tokens.get(self.cursor + steps)
105 }
106
107 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
108 if self.cursor == 0 || self.cursor > self.tokens.len() {
109 return None;
110 }
111 self.tokens.get(self.cursor - 1)
112 }
113
114 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
115 self.peek(0)
116 }
117
118 fn current_event(&self) -> Option<&Event<'_>> {
119 self.current().map(|(event, _)| event)
120 }
121
122 fn is_text_like(event: &Event) -> bool {
123 match event {
124 Event::Text(_)
125 // Represent an inline code block
126 | Event::Code(_)
127 | Event::Html(_)
128 | Event::InlineHtml(_)
129 | Event::FootnoteReference(_)
130 | Event::Start(Tag::Link { .. })
131 | Event::Start(Tag::Emphasis)
132 | Event::Start(Tag::Strong)
133 | Event::Start(Tag::Strikethrough)
134 | Event::Start(Tag::Image { .. }) => {
135 true
136 }
137 _ => false,
138 }
139 }
140
141 async fn parse_document(mut self) -> Self {
142 while !self.eof() {
143 if let Some(block) = self.parse_block().await {
144 self.parsed.extend(block);
145 } else {
146 self.cursor += 1;
147 }
148 }
149 self
150 }
151
152 #[async_recursion]
153 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
154 let (current, source_range) = self.current().unwrap();
155 let source_range = source_range.clone();
156 match current {
157 Event::Start(tag) => match tag {
158 Tag::Paragraph => {
159 self.cursor += 1;
160 let text = self.parse_text(false, Some(source_range));
161 Some(vec![ParsedMarkdownElement::Paragraph(text)])
162 }
163 Tag::Heading { level, .. } => {
164 let level = *level;
165 self.cursor += 1;
166 let heading = self.parse_heading(level);
167 Some(vec![ParsedMarkdownElement::Heading(heading)])
168 }
169 Tag::Table(alignment) => {
170 let alignment = alignment.clone();
171 self.cursor += 1;
172 let table = self.parse_table(alignment);
173 Some(vec![ParsedMarkdownElement::Table(table)])
174 }
175 Tag::List(order) => {
176 let order = *order;
177 self.cursor += 1;
178 let list = self.parse_list(order).await;
179 Some(list)
180 }
181 Tag::BlockQuote(_kind) => {
182 self.cursor += 1;
183 let block_quote = self.parse_block_quote().await;
184 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
185 }
186 Tag::CodeBlock(kind) => {
187 let language = match kind {
188 pulldown_cmark::CodeBlockKind::Indented => None,
189 pulldown_cmark::CodeBlockKind::Fenced(language) => {
190 if language.is_empty() {
191 None
192 } else {
193 Some(language.to_string())
194 }
195 }
196 };
197
198 self.cursor += 1;
199
200 let code_block = self.parse_code_block(language).await?;
201 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
202 }
203 Tag::HtmlBlock => {
204 self.cursor += 1;
205
206 Some(self.parse_html_block().await)
207 }
208 _ => None,
209 },
210 Event::Rule => {
211 self.cursor += 1;
212 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
213 }
214 _ => None,
215 }
216 }
217
218 fn parse_text(
219 &mut self,
220 should_complete_on_soft_break: bool,
221 source_range: Option<Range<usize>>,
222 ) -> MarkdownParagraph {
223 let source_range = source_range.unwrap_or_else(|| {
224 self.current()
225 .map(|(_, range)| range.clone())
226 .unwrap_or_default()
227 });
228
229 let mut markdown_text_like = Vec::new();
230 let mut text = String::new();
231 let mut bold_depth = 0;
232 let mut italic_depth = 0;
233 let mut strikethrough_depth = 0;
234 let mut link: Option<Link> = None;
235 let mut image: Option<Image> = None;
236 let mut region_ranges: Vec<Range<usize>> = vec![];
237 let mut regions: Vec<ParsedRegion> = vec![];
238 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
239 let mut link_urls: Vec<String> = vec![];
240 let mut link_ranges: Vec<Range<usize>> = vec![];
241
242 loop {
243 if self.eof() {
244 break;
245 }
246
247 let (current, _) = self.current().unwrap();
248 let prev_len = text.len();
249 match current {
250 Event::SoftBreak => {
251 if should_complete_on_soft_break {
252 break;
253 }
254 text.push(' ');
255 }
256
257 Event::HardBreak => {
258 text.push('\n');
259 }
260
261 // We want to ignore any inline HTML tags in the text but keep
262 // the text between them
263 Event::InlineHtml(_) => {}
264
265 Event::Text(t) => {
266 text.push_str(t.as_ref());
267 let mut style = MarkdownHighlightStyle::default();
268
269 if bold_depth > 0 {
270 style.weight = FontWeight::BOLD;
271 }
272
273 if italic_depth > 0 {
274 style.italic = true;
275 }
276
277 if strikethrough_depth > 0 {
278 style.strikethrough = true;
279 }
280
281 let last_run_len = if let Some(link) = link.clone() {
282 region_ranges.push(prev_len..text.len());
283 regions.push(ParsedRegion {
284 code: false,
285 link: Some(link),
286 });
287 style.link = true;
288 prev_len
289 } else {
290 // Manually scan for links
291 let mut finder = linkify::LinkFinder::new();
292 finder.kinds(&[linkify::LinkKind::Url]);
293 let mut last_link_len = prev_len;
294 for link in finder.links(t) {
295 let start = prev_len + link.start();
296 let end = prev_len + link.end();
297 let range = start..end;
298 link_ranges.push(range.clone());
299 link_urls.push(link.as_str().to_string());
300
301 // If there is a style before we match a link, we have to add this to the highlighted ranges
302 if style != MarkdownHighlightStyle::default() && last_link_len < start {
303 highlights.push((
304 last_link_len..start,
305 MarkdownHighlight::Style(style.clone()),
306 ));
307 }
308
309 highlights.push((
310 range.clone(),
311 MarkdownHighlight::Style(MarkdownHighlightStyle {
312 underline: true,
313 ..style
314 }),
315 ));
316 region_ranges.push(range.clone());
317 regions.push(ParsedRegion {
318 code: false,
319 link: Some(Link::Web {
320 url: link.as_str().to_string(),
321 }),
322 });
323 last_link_len = end;
324 }
325 last_link_len
326 };
327
328 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
329 let mut new_highlight = true;
330 if let Some((last_range, last_style)) = highlights.last_mut()
331 && last_range.end == last_run_len
332 && last_style == &MarkdownHighlight::Style(style.clone())
333 {
334 last_range.end = text.len();
335 new_highlight = false;
336 }
337 if new_highlight {
338 highlights.push((
339 last_run_len..text.len(),
340 MarkdownHighlight::Style(style.clone()),
341 ));
342 }
343 }
344 }
345 Event::Code(t) => {
346 text.push_str(t.as_ref());
347 region_ranges.push(prev_len..text.len());
348
349 if link.is_some() {
350 highlights.push((
351 prev_len..text.len(),
352 MarkdownHighlight::Style(MarkdownHighlightStyle {
353 link: true,
354 ..Default::default()
355 }),
356 ));
357 }
358 regions.push(ParsedRegion {
359 code: true,
360 link: link.clone(),
361 });
362 }
363 Event::Start(tag) => match tag {
364 Tag::Emphasis => italic_depth += 1,
365 Tag::Strong => bold_depth += 1,
366 Tag::Strikethrough => strikethrough_depth += 1,
367 Tag::Link { dest_url, .. } => {
368 link = Link::identify(
369 self.file_location_directory.clone(),
370 dest_url.to_string(),
371 );
372 }
373 Tag::Image { dest_url, .. } => {
374 if !text.is_empty() {
375 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
376 source_range: source_range.clone(),
377 contents: mem::take(&mut text).into(),
378 highlights: mem::take(&mut highlights),
379 region_ranges: mem::take(&mut region_ranges),
380 regions: mem::take(&mut regions),
381 });
382 markdown_text_like.push(parsed_regions);
383 }
384 image = Image::identify(
385 dest_url.to_string(),
386 source_range.clone(),
387 self.file_location_directory.clone(),
388 );
389 }
390 _ => {
391 break;
392 }
393 },
394
395 Event::End(tag) => match tag {
396 TagEnd::Emphasis => italic_depth -= 1,
397 TagEnd::Strong => bold_depth -= 1,
398 TagEnd::Strikethrough => strikethrough_depth -= 1,
399 TagEnd::Link => {
400 link = None;
401 }
402 TagEnd::Image => {
403 if let Some(mut image) = image.take() {
404 if !text.is_empty() {
405 image.set_alt_text(std::mem::take(&mut text).into());
406 }
407 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
408 }
409 }
410 TagEnd::Paragraph => {
411 self.cursor += 1;
412 break;
413 }
414 _ => {
415 break;
416 }
417 },
418 _ => {
419 break;
420 }
421 }
422
423 self.cursor += 1;
424 }
425 if !text.is_empty() {
426 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
427 source_range,
428 contents: text.into(),
429 highlights,
430 regions,
431 region_ranges,
432 }));
433 }
434 markdown_text_like
435 }
436
437 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
438 let (_event, source_range) = self.previous().unwrap();
439 let source_range = source_range.clone();
440 let text = self.parse_text(true, None);
441
442 // Advance past the heading end tag
443 self.cursor += 1;
444
445 ParsedMarkdownHeading {
446 source_range,
447 level: match level {
448 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
449 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
450 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
451 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
452 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
453 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
454 },
455 contents: text,
456 }
457 }
458
459 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
460 let (_event, source_range) = self.previous().unwrap();
461 let source_range = source_range.clone();
462 let mut header = ParsedMarkdownTableRow::new();
463 let mut body = vec![];
464 let mut current_row = vec![];
465 let mut in_header = true;
466 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
467
468 loop {
469 if self.eof() {
470 break;
471 }
472
473 let (current, source_range) = self.current().unwrap();
474 let source_range = source_range.clone();
475 match current {
476 Event::Start(Tag::TableHead)
477 | Event::Start(Tag::TableRow)
478 | Event::End(TagEnd::TableCell) => {
479 self.cursor += 1;
480 }
481 Event::Start(Tag::TableCell) => {
482 self.cursor += 1;
483 let cell_contents = self.parse_text(false, Some(source_range));
484 current_row.push(cell_contents);
485 }
486 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
487 self.cursor += 1;
488 let new_row = std::mem::take(&mut current_row);
489 if in_header {
490 header.children = new_row;
491 in_header = false;
492 } else {
493 let row = ParsedMarkdownTableRow::with_children(new_row);
494 body.push(row);
495 }
496 }
497 Event::End(TagEnd::Table) => {
498 self.cursor += 1;
499 break;
500 }
501 _ => {
502 break;
503 }
504 }
505 }
506
507 ParsedMarkdownTable {
508 source_range,
509 header,
510 body,
511 column_alignments,
512 }
513 }
514
515 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
516 match alignment {
517 Alignment::None => ParsedMarkdownTableAlignment::None,
518 Alignment::Left => ParsedMarkdownTableAlignment::Left,
519 Alignment::Center => ParsedMarkdownTableAlignment::Center,
520 Alignment::Right => ParsedMarkdownTableAlignment::Right,
521 }
522 }
523
524 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
525 let (_, list_source_range) = self.previous().unwrap();
526
527 let mut items = Vec::new();
528 let mut items_stack = vec![MarkdownListItem::default()];
529 let mut depth = 1;
530 let mut order = order;
531 let mut order_stack = Vec::new();
532
533 let mut insertion_indices = FxHashMap::default();
534 let mut source_ranges = FxHashMap::default();
535 let mut start_item_range = list_source_range.clone();
536
537 while !self.eof() {
538 let (current, source_range) = self.current().unwrap();
539 match current {
540 Event::Start(Tag::List(new_order)) => {
541 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
542 insertion_indices.insert(depth, items.len());
543 }
544
545 // We will use the start of the nested list as the end for the current item's range,
546 // because we don't care about the hierarchy of list items
547 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
548 e.insert(start_item_range.start..source_range.start);
549 }
550
551 order_stack.push(order);
552 order = *new_order;
553 self.cursor += 1;
554 depth += 1;
555 }
556 Event::End(TagEnd::List(_)) => {
557 order = order_stack.pop().flatten();
558 self.cursor += 1;
559 depth -= 1;
560
561 if depth == 0 {
562 break;
563 }
564 }
565 Event::Start(Tag::Item) => {
566 start_item_range = source_range.clone();
567
568 self.cursor += 1;
569 items_stack.push(MarkdownListItem::default());
570
571 let mut task_list = None;
572 // Check for task list marker (`- [ ]` or `- [x]`)
573 if let Some(event) = self.current_event() {
574 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
575 if event == &Event::Start(Tag::Paragraph) {
576 self.cursor += 1;
577 }
578
579 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
580 task_list = Some((*checked, range.clone()));
581 self.cursor += 1;
582 }
583 }
584
585 if let Some((event, range)) = self.current() {
586 // This is a plain list item.
587 // For example `- some text` or `1. [Docs](./docs.md)`
588 if MarkdownParser::is_text_like(event) {
589 let text = self.parse_text(false, Some(range.clone()));
590 let block = ParsedMarkdownElement::Paragraph(text);
591 if let Some(content) = items_stack.last_mut() {
592 let item_type = if let Some((checked, range)) = task_list {
593 ParsedMarkdownListItemType::Task(checked, range)
594 } else if let Some(order) = order {
595 ParsedMarkdownListItemType::Ordered(order)
596 } else {
597 ParsedMarkdownListItemType::Unordered
598 };
599 content.item_type = item_type;
600 content.content.push(block);
601 }
602 } else {
603 let block = self.parse_block().await;
604 if let Some(block) = block
605 && let Some(list_item) = items_stack.last_mut()
606 {
607 list_item.content.extend(block);
608 }
609 }
610 }
611
612 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
613 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
614 self.cursor += 1;
615 }
616 }
617 Event::End(TagEnd::Item) => {
618 self.cursor += 1;
619
620 if let Some(current) = order {
621 order = Some(current + 1);
622 }
623
624 if let Some(list_item) = items_stack.pop() {
625 let source_range = source_ranges
626 .remove(&depth)
627 .unwrap_or(start_item_range.clone());
628
629 // We need to remove the last character of the source range, because it includes the newline character
630 let source_range = source_range.start..source_range.end - 1;
631 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
632 source_range,
633 content: list_item.content,
634 depth,
635 item_type: list_item.item_type,
636 });
637
638 if let Some(index) = insertion_indices.get(&depth) {
639 items.insert(*index, item);
640 insertion_indices.remove(&depth);
641 } else {
642 items.push(item);
643 }
644 }
645 }
646 _ => {
647 if depth == 0 {
648 break;
649 }
650 // This can only happen if a list item starts with more then one paragraph,
651 // or the list item contains blocks that should be rendered after the nested list items
652 let block = self.parse_block().await;
653 if let Some(block) = block {
654 if let Some(list_item) = items_stack.last_mut() {
655 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
656 if !insertion_indices.contains_key(&depth) {
657 list_item.content.extend(block);
658 continue;
659 }
660 }
661
662 // Otherwise we need to insert the block after all the nested items
663 // that have been parsed so far
664 items.extend(block);
665 } else {
666 self.cursor += 1;
667 }
668 }
669 }
670 }
671
672 items
673 }
674
675 #[async_recursion]
676 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
677 let (_event, source_range) = self.previous().unwrap();
678 let source_range = source_range.clone();
679 let mut nested_depth = 1;
680
681 let mut children: Vec<ParsedMarkdownElement> = vec![];
682
683 while !self.eof() {
684 let block = self.parse_block().await;
685
686 if let Some(block) = block {
687 children.extend(block);
688 } else {
689 break;
690 }
691
692 if self.eof() {
693 break;
694 }
695
696 let (current, _source_range) = self.current().unwrap();
697 match current {
698 // This is a nested block quote.
699 // Record that we're in a nested block quote and continue parsing.
700 // We don't need to advance the cursor since the next
701 // call to `parse_block` will handle it.
702 Event::Start(Tag::BlockQuote(_kind)) => {
703 nested_depth += 1;
704 }
705 Event::End(TagEnd::BlockQuote(_kind)) => {
706 nested_depth -= 1;
707 if nested_depth == 0 {
708 self.cursor += 1;
709 break;
710 }
711 }
712 _ => {}
713 };
714 }
715
716 ParsedMarkdownBlockQuote {
717 source_range,
718 children,
719 }
720 }
721
722 async fn parse_code_block(
723 &mut self,
724 language: Option<String>,
725 ) -> Option<ParsedMarkdownCodeBlock> {
726 let Some((_event, source_range)) = self.previous() else {
727 return None;
728 };
729
730 let source_range = source_range.clone();
731 let mut code = String::new();
732
733 while !self.eof() {
734 let Some((current, _source_range)) = self.current() else {
735 break;
736 };
737
738 match current {
739 Event::Text(text) => {
740 code.push_str(text);
741 self.cursor += 1;
742 }
743 Event::End(TagEnd::CodeBlock) => {
744 self.cursor += 1;
745 break;
746 }
747 _ => {
748 break;
749 }
750 }
751 }
752
753 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
754
755 let highlights = if let Some(language) = &language {
756 if let Some(registry) = &self.language_registry {
757 let rope: language::Rope = code.as_str().into();
758 registry
759 .language_for_name_or_extension(language)
760 .await
761 .map(|l| l.highlight_text(&rope, 0..code.len()))
762 .ok()
763 } else {
764 None
765 }
766 } else {
767 None
768 };
769
770 Some(ParsedMarkdownCodeBlock {
771 source_range,
772 contents: code.into(),
773 language,
774 highlights,
775 })
776 }
777
778 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
779 let mut elements = Vec::new();
780 let Some((_event, _source_range)) = self.previous() else {
781 return elements;
782 };
783
784 let mut html_source_range_start = None;
785 let mut html_source_range_end = None;
786 let mut html_buffer = String::new();
787
788 while !self.eof() {
789 let Some((current, source_range)) = self.current() else {
790 break;
791 };
792 let source_range = source_range.clone();
793 match current {
794 Event::Html(html) => {
795 html_source_range_start.get_or_insert(source_range.start);
796 html_source_range_end = Some(source_range.end);
797 html_buffer.push_str(html);
798 self.cursor += 1;
799 }
800 Event::End(TagEnd::CodeBlock) => {
801 self.cursor += 1;
802 break;
803 }
804 _ => {
805 break;
806 }
807 }
808 }
809
810 let bytes = cleanup_html(&html_buffer);
811
812 let mut cursor = std::io::Cursor::new(bytes);
813 if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
814 .from_utf8()
815 .read_from(&mut cursor)
816 && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
817 {
818 self.parse_html_node(start..end, &dom.document, &mut elements);
819 }
820
821 elements
822 }
823
824 fn parse_html_node(
825 &self,
826 source_range: Range<usize>,
827 node: &Rc<markup5ever_rcdom::Node>,
828 elements: &mut Vec<ParsedMarkdownElement>,
829 ) {
830 match &node.data {
831 markup5ever_rcdom::NodeData::Document => {
832 self.consume_children(source_range, node, elements);
833 }
834 markup5ever_rcdom::NodeData::Text { contents } => {
835 elements.push(ParsedMarkdownElement::Paragraph(vec![
836 MarkdownParagraphChunk::Text(ParsedMarkdownText {
837 source_range,
838 regions: Vec::default(),
839 region_ranges: Vec::default(),
840 highlights: Vec::default(),
841 contents: contents.borrow().to_string().into(),
842 }),
843 ]));
844 }
845 markup5ever_rcdom::NodeData::Comment { .. } => {}
846 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
847 if local_name!("img") == name.local {
848 if let Some(image) = self.extract_image(source_range, attrs) {
849 elements.push(ParsedMarkdownElement::Image(image));
850 }
851 } else if local_name!("p") == name.local {
852 let mut paragraph = MarkdownParagraph::new();
853 self.parse_paragraph(source_range, node, &mut paragraph);
854
855 if !paragraph.is_empty() {
856 elements.push(ParsedMarkdownElement::Paragraph(paragraph));
857 }
858 } else if matches!(
859 name.local,
860 local_name!("h1")
861 | local_name!("h2")
862 | local_name!("h3")
863 | local_name!("h4")
864 | local_name!("h5")
865 | local_name!("h6")
866 ) {
867 let mut paragraph = MarkdownParagraph::new();
868 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
869
870 if !paragraph.is_empty() {
871 elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
872 source_range,
873 level: match name.local {
874 local_name!("h1") => HeadingLevel::H1,
875 local_name!("h2") => HeadingLevel::H2,
876 local_name!("h3") => HeadingLevel::H3,
877 local_name!("h4") => HeadingLevel::H4,
878 local_name!("h5") => HeadingLevel::H5,
879 local_name!("h6") => HeadingLevel::H6,
880 _ => unreachable!(),
881 },
882 contents: paragraph,
883 }));
884 }
885 } else if local_name!("blockquote") == name.local {
886 if let Some(blockquote) = self.extract_html_blockquote(node, source_range) {
887 elements.push(ParsedMarkdownElement::BlockQuote(blockquote));
888 }
889 } else if local_name!("table") == name.local {
890 if let Some(table) = self.extract_html_table(node, source_range) {
891 elements.push(ParsedMarkdownElement::Table(table));
892 }
893 } else {
894 self.consume_children(source_range, node, elements);
895 }
896 }
897 _ => {}
898 }
899 }
900
901 fn parse_paragraph(
902 &self,
903 source_range: Range<usize>,
904 node: &Rc<markup5ever_rcdom::Node>,
905 paragraph: &mut MarkdownParagraph,
906 ) {
907 match &node.data {
908 markup5ever_rcdom::NodeData::Text { contents } => {
909 paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
910 source_range,
911 regions: Vec::default(),
912 region_ranges: Vec::default(),
913 highlights: Vec::default(),
914 contents: contents.borrow().to_string().into(),
915 }));
916 }
917 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
918 if local_name!("img") == name.local {
919 if let Some(image) = self.extract_image(source_range, attrs) {
920 paragraph.push(MarkdownParagraphChunk::Image(image));
921 }
922 } else {
923 self.consume_paragraph(source_range, node, paragraph);
924 }
925 }
926 _ => {}
927 }
928 }
929
930 fn consume_paragraph(
931 &self,
932 source_range: Range<usize>,
933 node: &Rc<markup5ever_rcdom::Node>,
934 paragraph: &mut MarkdownParagraph,
935 ) {
936 for node in node.children.borrow().iter() {
937 self.parse_paragraph(source_range.clone(), node, paragraph);
938 }
939 }
940
941 fn consume_children(
942 &self,
943 source_range: Range<usize>,
944 node: &Rc<markup5ever_rcdom::Node>,
945 elements: &mut Vec<ParsedMarkdownElement>,
946 ) {
947 for node in node.children.borrow().iter() {
948 self.parse_html_node(source_range.clone(), node, elements);
949 }
950 }
951
952 fn attr_value(
953 attrs: &RefCell<Vec<html5ever::Attribute>>,
954 name: html5ever::LocalName,
955 ) -> Option<String> {
956 attrs.borrow().iter().find_map(|attr| {
957 if attr.name.local == name {
958 Some(attr.value.to_string())
959 } else {
960 None
961 }
962 })
963 }
964
965 fn extract_styles_from_attributes(
966 attrs: &RefCell<Vec<html5ever::Attribute>>,
967 ) -> HashMap<String, String> {
968 let mut styles = HashMap::new();
969
970 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
971 for decl in style.split(';') {
972 let mut parts = decl.splitn(2, ':');
973 if let Some((key, value)) = parts.next().zip(parts.next()) {
974 styles.insert(
975 key.trim().to_lowercase().to_string(),
976 value.trim().to_string(),
977 );
978 }
979 }
980 }
981
982 styles
983 }
984
985 fn extract_image(
986 &self,
987 source_range: Range<usize>,
988 attrs: &RefCell<Vec<html5ever::Attribute>>,
989 ) -> Option<Image> {
990 let src = Self::attr_value(attrs, local_name!("src"))?;
991
992 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
993
994 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
995 image.set_alt_text(alt.into());
996 }
997
998 let styles = Self::extract_styles_from_attributes(attrs);
999
1000 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
1001 .or_else(|| styles.get("width").cloned())
1002 .and_then(|width| Self::parse_html_element_dimension(&width))
1003 {
1004 image.set_width(width);
1005 }
1006
1007 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
1008 .or_else(|| styles.get("height").cloned())
1009 .and_then(|height| Self::parse_html_element_dimension(&height))
1010 {
1011 image.set_height(height);
1012 }
1013
1014 Some(image)
1015 }
1016
1017 fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
1018 if value.ends_with("%") {
1019 value
1020 .trim_end_matches("%")
1021 .parse::<f32>()
1022 .ok()
1023 .map(|value| relative(value / 100.))
1024 } else {
1025 value
1026 .trim_end_matches("px")
1027 .parse()
1028 .ok()
1029 .map(|value| px(value).into())
1030 }
1031 }
1032
1033 fn extract_html_blockquote(
1034 &self,
1035 node: &Rc<markup5ever_rcdom::Node>,
1036 source_range: Range<usize>,
1037 ) -> Option<ParsedMarkdownBlockQuote> {
1038 let mut children = Vec::new();
1039 self.consume_children(source_range.clone(), node, &mut children);
1040
1041 if children.is_empty() {
1042 None
1043 } else {
1044 Some(ParsedMarkdownBlockQuote {
1045 children,
1046 source_range,
1047 })
1048 }
1049 }
1050
1051 fn extract_html_table(
1052 &self,
1053 node: &Rc<markup5ever_rcdom::Node>,
1054 source_range: Range<usize>,
1055 ) -> Option<ParsedMarkdownTable> {
1056 let mut header_columns = Vec::new();
1057 let mut body_rows = Vec::new();
1058
1059 // node should be a thead or tbody element
1060 for node in node.children.borrow().iter() {
1061 match &node.data {
1062 markup5ever_rcdom::NodeData::Element { name, .. } => {
1063 if local_name!("thead") == name.local {
1064 // node should be a tr element
1065 for node in node.children.borrow().iter() {
1066 let mut paragraph = MarkdownParagraph::new();
1067 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
1068
1069 for paragraph in paragraph.into_iter() {
1070 header_columns.push(vec![paragraph]);
1071 }
1072 }
1073 } else if local_name!("tbody") == name.local {
1074 // node should be a tr element
1075 for node in node.children.borrow().iter() {
1076 let mut row = MarkdownParagraph::new();
1077 self.consume_paragraph(source_range.clone(), node, &mut row);
1078 body_rows.push(ParsedMarkdownTableRow::with_children(
1079 row.into_iter().map(|column| vec![column]).collect(),
1080 ));
1081 }
1082 }
1083 }
1084 _ => {}
1085 }
1086 }
1087
1088 if !header_columns.is_empty() || !body_rows.is_empty() {
1089 Some(ParsedMarkdownTable {
1090 source_range,
1091 body: body_rows,
1092 column_alignments: Vec::default(),
1093 header: ParsedMarkdownTableRow::with_children(header_columns),
1094 })
1095 } else {
1096 None
1097 }
1098 }
1099}
1100
1101#[cfg(test)]
1102mod tests {
1103 use super::*;
1104 use ParsedMarkdownListItemType::*;
1105 use core::panic;
1106 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1107 use language::{
1108 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1109 };
1110 use pretty_assertions::assert_eq;
1111
1112 async fn parse(input: &str) -> ParsedMarkdown {
1113 parse_markdown(input, None, None).await
1114 }
1115
1116 #[gpui::test]
1117 async fn test_headings() {
1118 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1119
1120 assert_eq!(
1121 parsed.children,
1122 vec![
1123 h1(text("Heading one", 2..13), 0..14),
1124 h2(text("Heading two", 17..28), 14..29),
1125 h3(text("Heading three", 33..46), 29..46),
1126 ]
1127 );
1128 }
1129
1130 #[gpui::test]
1131 async fn test_newlines_dont_new_paragraphs() {
1132 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1133
1134 assert_eq!(
1135 parsed.children,
1136 vec![p("Some text that is bolded and italicized", 0..46)]
1137 );
1138 }
1139
1140 #[gpui::test]
1141 async fn test_heading_with_paragraph() {
1142 let parsed = parse("# Zed\nThe editor").await;
1143
1144 assert_eq!(
1145 parsed.children,
1146 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1147 );
1148 }
1149
1150 #[gpui::test]
1151 async fn test_double_newlines_do_new_paragraphs() {
1152 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1153
1154 assert_eq!(
1155 parsed.children,
1156 vec![
1157 p("Some text that is bolded", 0..29),
1158 p("and italicized", 31..47),
1159 ]
1160 );
1161 }
1162
1163 #[gpui::test]
1164 async fn test_bold_italic_text() {
1165 let parsed = parse("Some text **that is bolded** and *italicized*").await;
1166
1167 assert_eq!(
1168 parsed.children,
1169 vec![p("Some text that is bolded and italicized", 0..45)]
1170 );
1171 }
1172
1173 #[gpui::test]
1174 async fn test_nested_bold_strikethrough_text() {
1175 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1176
1177 assert_eq!(parsed.children.len(), 1);
1178 assert_eq!(
1179 parsed.children[0],
1180 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1181 ParsedMarkdownText {
1182 source_range: 0..35,
1183 contents: "Some bostrikethroughld text".into(),
1184 highlights: Vec::new(),
1185 region_ranges: Vec::new(),
1186 regions: Vec::new(),
1187 }
1188 )])
1189 );
1190
1191 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1192 text
1193 } else {
1194 panic!("Expected a paragraph");
1195 };
1196
1197 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1198 text
1199 } else {
1200 panic!("Expected a text");
1201 };
1202
1203 assert_eq!(
1204 paragraph.highlights,
1205 vec![
1206 (
1207 5..7,
1208 MarkdownHighlight::Style(MarkdownHighlightStyle {
1209 weight: FontWeight::BOLD,
1210 ..Default::default()
1211 }),
1212 ),
1213 (
1214 7..20,
1215 MarkdownHighlight::Style(MarkdownHighlightStyle {
1216 weight: FontWeight::BOLD,
1217 strikethrough: true,
1218 ..Default::default()
1219 }),
1220 ),
1221 (
1222 20..22,
1223 MarkdownHighlight::Style(MarkdownHighlightStyle {
1224 weight: FontWeight::BOLD,
1225 ..Default::default()
1226 }),
1227 ),
1228 ]
1229 );
1230 }
1231
1232 #[gpui::test]
1233 async fn test_text_with_inline_html() {
1234 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1235
1236 assert_eq!(
1237 parsed.children,
1238 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1239 );
1240 }
1241
1242 #[gpui::test]
1243 async fn test_raw_links_detection() {
1244 let parsed = parse("Checkout this https://zed.dev link").await;
1245
1246 assert_eq!(
1247 parsed.children,
1248 vec![p("Checkout this https://zed.dev link", 0..34)]
1249 );
1250 }
1251
1252 #[gpui::test]
1253 async fn test_empty_image() {
1254 let parsed = parse("![]()").await;
1255
1256 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1257 text
1258 } else {
1259 panic!("Expected a paragraph");
1260 };
1261 assert_eq!(paragraph.len(), 0);
1262 }
1263
1264 #[gpui::test]
1265 async fn test_image_links_detection() {
1266 let parsed = parse("").await;
1267
1268 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1269 text
1270 } else {
1271 panic!("Expected a paragraph");
1272 };
1273 assert_eq!(
1274 paragraph[0],
1275 MarkdownParagraphChunk::Image(Image {
1276 source_range: 0..111,
1277 link: Link::Web {
1278 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1279 },
1280 alt_text: Some("test".into()),
1281 height: None,
1282 width: None,
1283 },)
1284 );
1285 }
1286
1287 #[gpui::test]
1288 async fn test_image_without_alt_text() {
1289 let parsed = parse("").await;
1290
1291 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1292 text
1293 } else {
1294 panic!("Expected a paragraph");
1295 };
1296 assert_eq!(
1297 paragraph[0],
1298 MarkdownParagraphChunk::Image(Image {
1299 source_range: 0..31,
1300 link: Link::Web {
1301 url: "http://example.com/foo.png".to_string(),
1302 },
1303 alt_text: None,
1304 height: None,
1305 width: None,
1306 },)
1307 );
1308 }
1309
1310 #[gpui::test]
1311 async fn test_image_with_alt_text_containing_formatting() {
1312 let parsed = parse("").await;
1313
1314 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1315 panic!("Expected a paragraph");
1316 };
1317 assert_eq!(
1318 chunks,
1319 &[MarkdownParagraphChunk::Image(Image {
1320 source_range: 0..44,
1321 link: Link::Web {
1322 url: "http://example.com/foo.png".to_string(),
1323 },
1324 alt_text: Some("foo bar baz".into()),
1325 height: None,
1326 width: None,
1327 }),],
1328 );
1329 }
1330
1331 #[gpui::test]
1332 async fn test_images_with_text_in_between() {
1333 let parsed = parse(
1334 "\nLorem Ipsum\n",
1335 )
1336 .await;
1337
1338 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1339 text
1340 } else {
1341 panic!("Expected a paragraph");
1342 };
1343 assert_eq!(
1344 chunks,
1345 &vec![
1346 MarkdownParagraphChunk::Image(Image {
1347 source_range: 0..81,
1348 link: Link::Web {
1349 url: "http://example.com/foo.png".to_string(),
1350 },
1351 alt_text: Some("foo".into()),
1352 height: None,
1353 width: None,
1354 }),
1355 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1356 source_range: 0..81,
1357 contents: " Lorem Ipsum ".into(),
1358 highlights: Vec::new(),
1359 region_ranges: Vec::new(),
1360 regions: Vec::new(),
1361 }),
1362 MarkdownParagraphChunk::Image(Image {
1363 source_range: 0..81,
1364 link: Link::Web {
1365 url: "http://example.com/bar.png".to_string(),
1366 },
1367 alt_text: Some("bar".into()),
1368 height: None,
1369 width: None,
1370 })
1371 ]
1372 );
1373 }
1374
1375 #[test]
1376 fn test_parse_html_element_dimension() {
1377 // Test percentage values
1378 assert_eq!(
1379 MarkdownParser::parse_html_element_dimension("50%"),
1380 Some(DefiniteLength::Fraction(0.5))
1381 );
1382 assert_eq!(
1383 MarkdownParser::parse_html_element_dimension("100%"),
1384 Some(DefiniteLength::Fraction(1.0))
1385 );
1386 assert_eq!(
1387 MarkdownParser::parse_html_element_dimension("25%"),
1388 Some(DefiniteLength::Fraction(0.25))
1389 );
1390 assert_eq!(
1391 MarkdownParser::parse_html_element_dimension("0%"),
1392 Some(DefiniteLength::Fraction(0.0))
1393 );
1394
1395 // Test pixel values
1396 assert_eq!(
1397 MarkdownParser::parse_html_element_dimension("100px"),
1398 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1399 );
1400 assert_eq!(
1401 MarkdownParser::parse_html_element_dimension("50px"),
1402 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1403 );
1404 assert_eq!(
1405 MarkdownParser::parse_html_element_dimension("0px"),
1406 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1407 );
1408
1409 // Test values without units (should be treated as pixels)
1410 assert_eq!(
1411 MarkdownParser::parse_html_element_dimension("100"),
1412 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1413 );
1414 assert_eq!(
1415 MarkdownParser::parse_html_element_dimension("42"),
1416 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1417 );
1418
1419 // Test invalid values
1420 assert_eq!(
1421 MarkdownParser::parse_html_element_dimension("invalid"),
1422 None
1423 );
1424 assert_eq!(MarkdownParser::parse_html_element_dimension("px"), None);
1425 assert_eq!(MarkdownParser::parse_html_element_dimension("%"), None);
1426 assert_eq!(MarkdownParser::parse_html_element_dimension(""), None);
1427 assert_eq!(MarkdownParser::parse_html_element_dimension("abc%"), None);
1428 assert_eq!(MarkdownParser::parse_html_element_dimension("abcpx"), None);
1429
1430 // Test decimal values
1431 assert_eq!(
1432 MarkdownParser::parse_html_element_dimension("50.5%"),
1433 Some(DefiniteLength::Fraction(0.505))
1434 );
1435 assert_eq!(
1436 MarkdownParser::parse_html_element_dimension("100.25px"),
1437 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1438 );
1439 assert_eq!(
1440 MarkdownParser::parse_html_element_dimension("42.0"),
1441 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1442 );
1443 }
1444
1445 #[gpui::test]
1446 async fn test_inline_html_image_tag() {
1447 let parsed =
1448 parse("<p>Some text<img src=\"http://example.com/foo.png\" /> some more text</p>")
1449 .await;
1450
1451 assert_eq!(
1452 ParsedMarkdown {
1453 children: vec![ParsedMarkdownElement::Paragraph(vec![
1454 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1455 source_range: 0..71,
1456 contents: "Some text".into(),
1457 highlights: Default::default(),
1458 region_ranges: Default::default(),
1459 regions: Default::default()
1460 }),
1461 MarkdownParagraphChunk::Image(Image {
1462 source_range: 0..71,
1463 link: Link::Web {
1464 url: "http://example.com/foo.png".to_string(),
1465 },
1466 alt_text: None,
1467 height: None,
1468 width: None,
1469 }),
1470 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1471 source_range: 0..71,
1472 contents: " some more text".into(),
1473 highlights: Default::default(),
1474 region_ranges: Default::default(),
1475 regions: Default::default()
1476 }),
1477 ])]
1478 },
1479 parsed
1480 );
1481 }
1482
1483 #[gpui::test]
1484 async fn test_html_block_quote() {
1485 let parsed = parse(
1486 "<blockquote>
1487 <p>some description</p>
1488 </blockquote>",
1489 )
1490 .await;
1491
1492 assert_eq!(
1493 ParsedMarkdown {
1494 children: vec![block_quote(
1495 vec![ParsedMarkdownElement::Paragraph(text(
1496 "some description",
1497 0..76
1498 ))],
1499 0..76,
1500 )]
1501 },
1502 parsed
1503 );
1504 }
1505
1506 #[gpui::test]
1507 async fn test_html_nested_block_quote() {
1508 let parsed = parse(
1509 "<blockquote>
1510 <p>some description</p>
1511 <blockquote>
1512 <p>second description</p>
1513 </blockquote>
1514 </blockquote>",
1515 )
1516 .await;
1517
1518 assert_eq!(
1519 ParsedMarkdown {
1520 children: vec![block_quote(
1521 vec![
1522 ParsedMarkdownElement::Paragraph(text("some description", 0..173)),
1523 block_quote(
1524 vec![ParsedMarkdownElement::Paragraph(text(
1525 "second description",
1526 0..173
1527 ))],
1528 0..173,
1529 )
1530 ],
1531 0..173,
1532 )]
1533 },
1534 parsed
1535 );
1536 }
1537
1538 #[gpui::test]
1539 async fn test_html_table() {
1540 let parsed = parse(
1541 "<table>
1542 <thead>
1543 <tr>
1544 <th>Id</th>
1545 <th>Name</th>
1546 </tr>
1547 </thead>
1548 <tbody>
1549 <tr>
1550 <td>1</td>
1551 <td>Chris</td>
1552 </tr>
1553 <tr>
1554 <td>2</td>
1555 <td>Dennis</td>
1556 </tr>
1557 </tbody>
1558 </table>",
1559 )
1560 .await;
1561
1562 assert_eq!(
1563 ParsedMarkdown {
1564 children: vec![ParsedMarkdownElement::Table(table(
1565 0..366,
1566 row(vec![text("Id", 0..366), text("Name ", 0..366)]),
1567 vec![
1568 row(vec![text("1", 0..366), text("Chris", 0..366)]),
1569 row(vec![text("2", 0..366), text("Dennis", 0..366)]),
1570 ],
1571 ))],
1572 },
1573 parsed
1574 );
1575 }
1576
1577 #[gpui::test]
1578 async fn test_html_table_without_headings() {
1579 let parsed = parse(
1580 "<table>
1581 <tbody>
1582 <tr>
1583 <td>1</td>
1584 <td>Chris</td>
1585 </tr>
1586 <tr>
1587 <td>2</td>
1588 <td>Dennis</td>
1589 </tr>
1590 </tbody>
1591 </table>",
1592 )
1593 .await;
1594
1595 assert_eq!(
1596 ParsedMarkdown {
1597 children: vec![ParsedMarkdownElement::Table(table(
1598 0..240,
1599 row(vec![]),
1600 vec![
1601 row(vec![text("1", 0..240), text("Chris", 0..240)]),
1602 row(vec![text("2", 0..240), text("Dennis", 0..240)]),
1603 ],
1604 ))],
1605 },
1606 parsed
1607 );
1608 }
1609
1610 #[gpui::test]
1611 async fn test_html_table_without_body() {
1612 let parsed = parse(
1613 "<table>
1614 <thead>
1615 <tr>
1616 <th>Id</th>
1617 <th>Name</th>
1618 </tr>
1619 </thead>
1620 </table>",
1621 )
1622 .await;
1623
1624 assert_eq!(
1625 ParsedMarkdown {
1626 children: vec![ParsedMarkdownElement::Table(table(
1627 0..150,
1628 row(vec![text("Id", 0..150), text("Name", 0..150)]),
1629 vec![],
1630 ))],
1631 },
1632 parsed
1633 );
1634 }
1635
1636 #[gpui::test]
1637 async fn test_html_heading_tags() {
1638 let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
1639
1640 assert_eq!(
1641 ParsedMarkdown {
1642 children: vec![
1643 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1644 level: HeadingLevel::H1,
1645 source_range: 0..96,
1646 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1647 source_range: 0..96,
1648 contents: "Heading".into(),
1649 highlights: Vec::default(),
1650 region_ranges: Vec::default(),
1651 regions: Vec::default()
1652 })],
1653 }),
1654 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1655 level: HeadingLevel::H2,
1656 source_range: 0..96,
1657 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1658 source_range: 0..96,
1659 contents: "Heading".into(),
1660 highlights: Vec::default(),
1661 region_ranges: Vec::default(),
1662 regions: Vec::default()
1663 })],
1664 }),
1665 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1666 level: HeadingLevel::H3,
1667 source_range: 0..96,
1668 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1669 source_range: 0..96,
1670 contents: "Heading".into(),
1671 highlights: Vec::default(),
1672 region_ranges: Vec::default(),
1673 regions: Vec::default()
1674 })],
1675 }),
1676 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1677 level: HeadingLevel::H4,
1678 source_range: 0..96,
1679 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1680 source_range: 0..96,
1681 contents: "Heading".into(),
1682 highlights: Vec::default(),
1683 region_ranges: Vec::default(),
1684 regions: Vec::default()
1685 })],
1686 }),
1687 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1688 level: HeadingLevel::H5,
1689 source_range: 0..96,
1690 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1691 source_range: 0..96,
1692 contents: "Heading".into(),
1693 highlights: Vec::default(),
1694 region_ranges: Vec::default(),
1695 regions: Vec::default()
1696 })],
1697 }),
1698 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1699 level: HeadingLevel::H6,
1700 source_range: 0..96,
1701 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1702 source_range: 0..96,
1703 contents: "Heading".into(),
1704 highlights: Vec::default(),
1705 region_ranges: Vec::default(),
1706 regions: Vec::default()
1707 })],
1708 }),
1709 ],
1710 },
1711 parsed
1712 );
1713 }
1714
1715 #[gpui::test]
1716 async fn test_html_image_tag() {
1717 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
1718
1719 assert_eq!(
1720 ParsedMarkdown {
1721 children: vec![ParsedMarkdownElement::Image(Image {
1722 source_range: 0..40,
1723 link: Link::Web {
1724 url: "http://example.com/foo.png".to_string(),
1725 },
1726 alt_text: None,
1727 height: None,
1728 width: None,
1729 })]
1730 },
1731 parsed
1732 );
1733 }
1734
1735 #[gpui::test]
1736 async fn test_html_image_tag_with_alt_text() {
1737 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
1738
1739 assert_eq!(
1740 ParsedMarkdown {
1741 children: vec![ParsedMarkdownElement::Image(Image {
1742 source_range: 0..50,
1743 link: Link::Web {
1744 url: "http://example.com/foo.png".to_string(),
1745 },
1746 alt_text: Some("Foo".into()),
1747 height: None,
1748 width: None,
1749 })]
1750 },
1751 parsed
1752 );
1753 }
1754
1755 #[gpui::test]
1756 async fn test_html_image_tag_with_height_and_width() {
1757 let parsed =
1758 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
1759
1760 assert_eq!(
1761 ParsedMarkdown {
1762 children: vec![ParsedMarkdownElement::Image(Image {
1763 source_range: 0..65,
1764 link: Link::Web {
1765 url: "http://example.com/foo.png".to_string(),
1766 },
1767 alt_text: None,
1768 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1769 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1770 })]
1771 },
1772 parsed
1773 );
1774 }
1775
1776 #[gpui::test]
1777 async fn test_html_image_style_tag_with_height_and_width() {
1778 let parsed = parse(
1779 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
1780 )
1781 .await;
1782
1783 assert_eq!(
1784 ParsedMarkdown {
1785 children: vec![ParsedMarkdownElement::Image(Image {
1786 source_range: 0..75,
1787 link: Link::Web {
1788 url: "http://example.com/foo.png".to_string(),
1789 },
1790 alt_text: None,
1791 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1792 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1793 })]
1794 },
1795 parsed
1796 );
1797 }
1798
1799 #[gpui::test]
1800 async fn test_header_only_table() {
1801 let markdown = "\
1802| Header 1 | Header 2 |
1803|----------|----------|
1804
1805Some other content
1806";
1807
1808 let expected_table = table(
1809 0..48,
1810 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1811 vec![],
1812 );
1813
1814 assert_eq!(
1815 parse(markdown).await.children[0],
1816 ParsedMarkdownElement::Table(expected_table)
1817 );
1818 }
1819
1820 #[gpui::test]
1821 async fn test_basic_table() {
1822 let markdown = "\
1823| Header 1 | Header 2 |
1824|----------|----------|
1825| Cell 1 | Cell 2 |
1826| Cell 3 | Cell 4 |";
1827
1828 let expected_table = table(
1829 0..95,
1830 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1831 vec![
1832 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1833 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1834 ],
1835 );
1836
1837 assert_eq!(
1838 parse(markdown).await.children[0],
1839 ParsedMarkdownElement::Table(expected_table)
1840 );
1841 }
1842
1843 #[gpui::test]
1844 async fn test_list_basic() {
1845 let parsed = parse(
1846 "\
1847* Item 1
1848* Item 2
1849* Item 3
1850",
1851 )
1852 .await;
1853
1854 assert_eq!(
1855 parsed.children,
1856 vec![
1857 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1858 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1859 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1860 ],
1861 );
1862 }
1863
1864 #[gpui::test]
1865 async fn test_list_with_tasks() {
1866 let parsed = parse(
1867 "\
1868- [ ] TODO
1869- [x] Checked
1870",
1871 )
1872 .await;
1873
1874 assert_eq!(
1875 parsed.children,
1876 vec![
1877 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1878 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1879 ],
1880 );
1881 }
1882
1883 #[gpui::test]
1884 async fn test_list_with_indented_task() {
1885 let parsed = parse(
1886 "\
1887- [ ] TODO
1888 - [x] Checked
1889 - Unordered
1890 1. Number 1
1891 1. Number 2
18921. Number A
1893",
1894 )
1895 .await;
1896
1897 assert_eq!(
1898 parsed.children,
1899 vec![
1900 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1901 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1902 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1903 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1904 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1905 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1906 ],
1907 );
1908 }
1909
1910 #[gpui::test]
1911 async fn test_list_with_linebreak_is_handled_correctly() {
1912 let parsed = parse(
1913 "\
1914- [ ] Task 1
1915
1916- [x] Task 2
1917",
1918 )
1919 .await;
1920
1921 assert_eq!(
1922 parsed.children,
1923 vec![
1924 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1925 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1926 ],
1927 );
1928 }
1929
1930 #[gpui::test]
1931 async fn test_list_nested() {
1932 let parsed = parse(
1933 "\
1934* Item 1
1935* Item 2
1936* Item 3
1937
19381. Hello
19391. Two
1940 1. Three
19412. Four
19423. Five
1943
1944* First
1945 1. Hello
1946 1. Goodbyte
1947 - Inner
1948 - Inner
1949 2. Goodbyte
1950 - Next item empty
1951 -
1952* Last
1953",
1954 )
1955 .await;
1956
1957 assert_eq!(
1958 parsed.children,
1959 vec![
1960 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1961 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1962 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1963 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1964 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1965 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1966 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1967 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1968 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1969 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1970 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1971 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1972 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1973 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1974 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1975 list_item(186..190, 3, Unordered, vec![]),
1976 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1977 ]
1978 );
1979 }
1980
1981 #[gpui::test]
1982 async fn test_list_with_nested_content() {
1983 let parsed = parse(
1984 "\
1985* This is a list item with two paragraphs.
1986
1987 This is the second paragraph in the list item.
1988",
1989 )
1990 .await;
1991
1992 assert_eq!(
1993 parsed.children,
1994 vec![list_item(
1995 0..96,
1996 1,
1997 Unordered,
1998 vec![
1999 p("This is a list item with two paragraphs.", 4..44),
2000 p("This is the second paragraph in the list item.", 50..97)
2001 ],
2002 ),],
2003 );
2004 }
2005
2006 #[gpui::test]
2007 async fn test_list_item_with_inline_html() {
2008 let parsed = parse(
2009 "\
2010* This is a list item with an inline HTML <sometag>tag</sometag>.
2011",
2012 )
2013 .await;
2014
2015 assert_eq!(
2016 parsed.children,
2017 vec![list_item(
2018 0..67,
2019 1,
2020 Unordered,
2021 vec![p("This is a list item with an inline HTML tag.", 4..44),],
2022 ),],
2023 );
2024 }
2025
2026 #[gpui::test]
2027 async fn test_nested_list_with_paragraph_inside() {
2028 let parsed = parse(
2029 "\
20301. a
2031 1. b
2032 1. c
2033
2034 text
2035
2036 1. d
2037",
2038 )
2039 .await;
2040
2041 assert_eq!(
2042 parsed.children,
2043 vec![
2044 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
2045 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
2046 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
2047 p("text", 32..37),
2048 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
2049 ],
2050 );
2051 }
2052
2053 #[gpui::test]
2054 async fn test_list_with_leading_text() {
2055 let parsed = parse(
2056 "\
2057* `code`
2058* **bold**
2059* [link](https://example.com)
2060",
2061 )
2062 .await;
2063
2064 assert_eq!(
2065 parsed.children,
2066 vec![
2067 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
2068 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
2069 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
2070 ],
2071 );
2072 }
2073
2074 #[gpui::test]
2075 async fn test_simple_block_quote() {
2076 let parsed = parse("> Simple block quote with **styled text**").await;
2077
2078 assert_eq!(
2079 parsed.children,
2080 vec![block_quote(
2081 vec![p("Simple block quote with styled text", 2..41)],
2082 0..41
2083 )]
2084 );
2085 }
2086
2087 #[gpui::test]
2088 async fn test_simple_block_quote_with_multiple_lines() {
2089 let parsed = parse(
2090 "\
2091> # Heading
2092> More
2093> text
2094>
2095> More text
2096",
2097 )
2098 .await;
2099
2100 assert_eq!(
2101 parsed.children,
2102 vec![block_quote(
2103 vec![
2104 h1(text("Heading", 4..11), 2..12),
2105 p("More text", 14..26),
2106 p("More text", 30..40)
2107 ],
2108 0..40
2109 )]
2110 );
2111 }
2112
2113 #[gpui::test]
2114 async fn test_nested_block_quote() {
2115 let parsed = parse(
2116 "\
2117> A
2118>
2119> > # B
2120>
2121> C
2122
2123More text
2124",
2125 )
2126 .await;
2127
2128 assert_eq!(
2129 parsed.children,
2130 vec![
2131 block_quote(
2132 vec![
2133 p("A", 2..4),
2134 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
2135 p("C", 18..20)
2136 ],
2137 0..20
2138 ),
2139 p("More text", 21..31)
2140 ]
2141 );
2142 }
2143
2144 #[gpui::test]
2145 async fn test_code_block() {
2146 let parsed = parse(
2147 "\
2148```
2149fn main() {
2150 return 0;
2151}
2152```
2153",
2154 )
2155 .await;
2156
2157 assert_eq!(
2158 parsed.children,
2159 vec![code_block(
2160 None,
2161 "fn main() {\n return 0;\n}",
2162 0..35,
2163 None
2164 )]
2165 );
2166 }
2167
2168 #[gpui::test]
2169 async fn test_code_block_with_language(executor: BackgroundExecutor) {
2170 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
2171 language_registry.add(rust_lang());
2172
2173 let parsed = parse_markdown(
2174 "\
2175```rust
2176fn main() {
2177 return 0;
2178}
2179```
2180",
2181 None,
2182 Some(language_registry),
2183 )
2184 .await;
2185
2186 assert_eq!(
2187 parsed.children,
2188 vec![code_block(
2189 Some("rust".to_string()),
2190 "fn main() {\n return 0;\n}",
2191 0..39,
2192 Some(vec![])
2193 )]
2194 );
2195 }
2196
2197 fn rust_lang() -> Arc<Language> {
2198 Arc::new(Language::new(
2199 LanguageConfig {
2200 name: "Rust".into(),
2201 matcher: LanguageMatcher {
2202 path_suffixes: vec!["rs".into()],
2203 ..Default::default()
2204 },
2205 collapsed_placeholder: " /* ... */ ".to_string(),
2206 ..Default::default()
2207 },
2208 Some(tree_sitter_rust::LANGUAGE.into()),
2209 ))
2210 }
2211
2212 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2213 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2214 source_range,
2215 level: HeadingLevel::H1,
2216 contents,
2217 })
2218 }
2219
2220 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2221 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2222 source_range,
2223 level: HeadingLevel::H2,
2224 contents,
2225 })
2226 }
2227
2228 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2229 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2230 source_range,
2231 level: HeadingLevel::H3,
2232 contents,
2233 })
2234 }
2235
2236 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
2237 ParsedMarkdownElement::Paragraph(text(contents, source_range))
2238 }
2239
2240 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
2241 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2242 highlights: Vec::new(),
2243 region_ranges: Vec::new(),
2244 regions: Vec::new(),
2245 source_range,
2246 contents: contents.to_string().into(),
2247 })]
2248 }
2249
2250 fn block_quote(
2251 children: Vec<ParsedMarkdownElement>,
2252 source_range: Range<usize>,
2253 ) -> ParsedMarkdownElement {
2254 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
2255 source_range,
2256 children,
2257 })
2258 }
2259
2260 fn code_block(
2261 language: Option<String>,
2262 code: &str,
2263 source_range: Range<usize>,
2264 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
2265 ) -> ParsedMarkdownElement {
2266 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
2267 source_range,
2268 language,
2269 contents: code.to_string().into(),
2270 highlights,
2271 })
2272 }
2273
2274 fn list_item(
2275 source_range: Range<usize>,
2276 depth: u16,
2277 item_type: ParsedMarkdownListItemType,
2278 content: Vec<ParsedMarkdownElement>,
2279 ) -> ParsedMarkdownElement {
2280 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
2281 source_range,
2282 item_type,
2283 depth,
2284 content,
2285 })
2286 }
2287
2288 fn table(
2289 source_range: Range<usize>,
2290 header: ParsedMarkdownTableRow,
2291 body: Vec<ParsedMarkdownTableRow>,
2292 ) -> ParsedMarkdownTable {
2293 ParsedMarkdownTable {
2294 column_alignments: Vec::new(),
2295 source_range,
2296 header,
2297 body,
2298 }
2299 }
2300
2301 fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
2302 ParsedMarkdownTableRow { children }
2303 }
2304
2305 impl PartialEq for ParsedMarkdownTable {
2306 fn eq(&self, other: &Self) -> bool {
2307 self.source_range == other.source_range
2308 && self.header == other.header
2309 && self.body == other.body
2310 }
2311 }
2312
2313 impl PartialEq for ParsedMarkdownText {
2314 fn eq(&self, other: &Self) -> bool {
2315 self.source_range == other.source_range && self.contents == other.contents
2316 }
2317 }
2318}