1use crate::{
2 markdown_elements::*,
3 markdown_minifier::{Minifier, MinifierOptions},
4};
5use async_recursion::async_recursion;
6use collections::FxHashMap;
7use gpui::{DefiniteLength, FontWeight, px, relative};
8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
9use language::LanguageRegistry;
10use markup5ever_rcdom::RcDom;
11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
12use std::{cell::RefCell, collections::HashMap, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec};
13
14pub async fn parse_markdown(
15 markdown_input: &str,
16 file_location_directory: Option<PathBuf>,
17 language_registry: Option<Arc<LanguageRegistry>>,
18) -> ParsedMarkdown {
19 let mut options = Options::all();
20 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
21
22 let parser = Parser::new_ext(markdown_input, options);
23 let parser = MarkdownParser::new(
24 parser.into_offset_iter().collect(),
25 file_location_directory,
26 language_registry,
27 );
28 let renderer = parser.parse_document().await;
29 ParsedMarkdown {
30 children: renderer.parsed,
31 }
32}
33
34fn cleanup_html(source: &str) -> Vec<u8> {
35 let mut writer = std::io::Cursor::new(Vec::new());
36 let mut reader = std::io::Cursor::new(source);
37 let mut minify = Minifier::new(
38 &mut writer,
39 MinifierOptions {
40 omit_doctype: true,
41 collapse_whitespace: true,
42 ..Default::default()
43 },
44 );
45 if let Ok(()) = minify.minify(&mut reader) {
46 writer.into_inner()
47 } else {
48 source.bytes().collect()
49 }
50}
51
52struct MarkdownParser<'a> {
53 tokens: Vec<(Event<'a>, Range<usize>)>,
54 /// The current index in the tokens array
55 cursor: usize,
56 /// The blocks that we have successfully parsed so far
57 parsed: Vec<ParsedMarkdownElement>,
58 file_location_directory: Option<PathBuf>,
59 language_registry: Option<Arc<LanguageRegistry>>,
60}
61
62struct MarkdownListItem {
63 content: Vec<ParsedMarkdownElement>,
64 item_type: ParsedMarkdownListItemType,
65}
66
67impl Default for MarkdownListItem {
68 fn default() -> Self {
69 Self {
70 content: Vec::new(),
71 item_type: ParsedMarkdownListItemType::Unordered,
72 }
73 }
74}
75
76impl<'a> MarkdownParser<'a> {
77 fn new(
78 tokens: Vec<(Event<'a>, Range<usize>)>,
79 file_location_directory: Option<PathBuf>,
80 language_registry: Option<Arc<LanguageRegistry>>,
81 ) -> Self {
82 Self {
83 tokens,
84 file_location_directory,
85 language_registry,
86 cursor: 0,
87 parsed: vec![],
88 }
89 }
90
91 fn eof(&self) -> bool {
92 if self.tokens.is_empty() {
93 return true;
94 }
95 self.cursor >= self.tokens.len() - 1
96 }
97
98 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
99 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
100 return self.tokens.last();
101 }
102 self.tokens.get(self.cursor + steps)
103 }
104
105 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
106 if self.cursor == 0 || self.cursor > self.tokens.len() {
107 return None;
108 }
109 self.tokens.get(self.cursor - 1)
110 }
111
112 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
113 self.peek(0)
114 }
115
116 fn current_event(&self) -> Option<&Event<'_>> {
117 self.current().map(|(event, _)| event)
118 }
119
120 fn is_text_like(event: &Event) -> bool {
121 match event {
122 Event::Text(_)
123 // Represent an inline code block
124 | Event::Code(_)
125 | Event::Html(_)
126 | Event::InlineHtml(_)
127 | Event::FootnoteReference(_)
128 | Event::Start(Tag::Link { .. })
129 | Event::Start(Tag::Emphasis)
130 | Event::Start(Tag::Strong)
131 | Event::Start(Tag::Strikethrough)
132 | Event::Start(Tag::Image { .. }) => {
133 true
134 }
135 _ => false,
136 }
137 }
138
139 async fn parse_document(mut self) -> Self {
140 while !self.eof() {
141 if let Some(block) = self.parse_block().await {
142 self.parsed.extend(block);
143 } else {
144 self.cursor += 1;
145 }
146 }
147 self
148 }
149
150 #[async_recursion]
151 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
152 let (current, source_range) = self.current().unwrap();
153 let source_range = source_range.clone();
154 match current {
155 Event::Start(tag) => match tag {
156 Tag::Paragraph => {
157 self.cursor += 1;
158 let text = self.parse_text(false, Some(source_range));
159 Some(vec![ParsedMarkdownElement::Paragraph(text)])
160 }
161 Tag::Heading { level, .. } => {
162 let level = *level;
163 self.cursor += 1;
164 let heading = self.parse_heading(level);
165 Some(vec![ParsedMarkdownElement::Heading(heading)])
166 }
167 Tag::Table(alignment) => {
168 let alignment = alignment.clone();
169 self.cursor += 1;
170 let table = self.parse_table(alignment);
171 Some(vec![ParsedMarkdownElement::Table(table)])
172 }
173 Tag::List(order) => {
174 let order = *order;
175 self.cursor += 1;
176 let list = self.parse_list(order).await;
177 Some(list)
178 }
179 Tag::BlockQuote(_kind) => {
180 self.cursor += 1;
181 let block_quote = self.parse_block_quote().await;
182 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
183 }
184 Tag::CodeBlock(kind) => {
185 let language = match kind {
186 pulldown_cmark::CodeBlockKind::Indented => None,
187 pulldown_cmark::CodeBlockKind::Fenced(language) => {
188 if language.is_empty() {
189 None
190 } else {
191 Some(language.to_string())
192 }
193 }
194 };
195
196 self.cursor += 1;
197
198 let code_block = self.parse_code_block(language).await?;
199 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
200 }
201 Tag::HtmlBlock => {
202 self.cursor += 1;
203
204 Some(self.parse_html_block().await)
205 }
206 _ => None,
207 },
208 Event::Rule => {
209 self.cursor += 1;
210 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
211 }
212 _ => None,
213 }
214 }
215
216 fn parse_text(
217 &mut self,
218 should_complete_on_soft_break: bool,
219 source_range: Option<Range<usize>>,
220 ) -> MarkdownParagraph {
221 let source_range = source_range.unwrap_or_else(|| {
222 self.current()
223 .map(|(_, range)| range.clone())
224 .unwrap_or_default()
225 });
226
227 let mut markdown_text_like = Vec::new();
228 let mut text = String::new();
229 let mut bold_depth = 0;
230 let mut italic_depth = 0;
231 let mut strikethrough_depth = 0;
232 let mut link: Option<Link> = None;
233 let mut image: Option<Image> = None;
234 let mut region_ranges: Vec<Range<usize>> = vec![];
235 let mut regions: Vec<ParsedRegion> = vec![];
236 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
237 let mut link_urls: Vec<String> = vec![];
238 let mut link_ranges: Vec<Range<usize>> = vec![];
239
240 loop {
241 if self.eof() {
242 break;
243 }
244
245 let (current, _) = self.current().unwrap();
246 let prev_len = text.len();
247 match current {
248 Event::SoftBreak => {
249 if should_complete_on_soft_break {
250 break;
251 }
252 text.push(' ');
253 }
254
255 Event::HardBreak => {
256 text.push('\n');
257 }
258
259 // We want to ignore any inline HTML tags in the text but keep
260 // the text between them
261 Event::InlineHtml(_) => {}
262
263 Event::Text(t) => {
264 text.push_str(t.as_ref());
265 let mut style = MarkdownHighlightStyle::default();
266
267 if bold_depth > 0 {
268 style.weight = FontWeight::BOLD;
269 }
270
271 if italic_depth > 0 {
272 style.italic = true;
273 }
274
275 if strikethrough_depth > 0 {
276 style.strikethrough = true;
277 }
278
279 let last_run_len = if let Some(link) = link.clone() {
280 region_ranges.push(prev_len..text.len());
281 regions.push(ParsedRegion {
282 code: false,
283 link: Some(link),
284 });
285 style.link = true;
286 prev_len
287 } else {
288 // Manually scan for links
289 let mut finder = linkify::LinkFinder::new();
290 finder.kinds(&[linkify::LinkKind::Url]);
291 let mut last_link_len = prev_len;
292 for link in finder.links(t) {
293 let start = link.start();
294 let end = link.end();
295 let range = (prev_len + start)..(prev_len + end);
296 link_ranges.push(range.clone());
297 link_urls.push(link.as_str().to_string());
298
299 // If there is a style before we match a link, we have to add this to the highlighted ranges
300 if style != MarkdownHighlightStyle::default()
301 && last_link_len < link.start()
302 {
303 highlights.push((
304 last_link_len..link.start(),
305 MarkdownHighlight::Style(style.clone()),
306 ));
307 }
308
309 highlights.push((
310 range.clone(),
311 MarkdownHighlight::Style(MarkdownHighlightStyle {
312 underline: true,
313 ..style
314 }),
315 ));
316 region_ranges.push(range.clone());
317 regions.push(ParsedRegion {
318 code: false,
319 link: Some(Link::Web {
320 url: link.as_str().to_string(),
321 }),
322 });
323 last_link_len = end;
324 }
325 last_link_len
326 };
327
328 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
329 let mut new_highlight = true;
330 if let Some((last_range, last_style)) = highlights.last_mut()
331 && last_range.end == last_run_len
332 && last_style == &MarkdownHighlight::Style(style.clone())
333 {
334 last_range.end = text.len();
335 new_highlight = false;
336 }
337 if new_highlight {
338 highlights.push((
339 last_run_len..text.len(),
340 MarkdownHighlight::Style(style.clone()),
341 ));
342 }
343 }
344 }
345 Event::Code(t) => {
346 text.push_str(t.as_ref());
347 region_ranges.push(prev_len..text.len());
348
349 if link.is_some() {
350 highlights.push((
351 prev_len..text.len(),
352 MarkdownHighlight::Style(MarkdownHighlightStyle {
353 link: true,
354 ..Default::default()
355 }),
356 ));
357 }
358 regions.push(ParsedRegion {
359 code: true,
360 link: link.clone(),
361 });
362 }
363 Event::Start(tag) => match tag {
364 Tag::Emphasis => italic_depth += 1,
365 Tag::Strong => bold_depth += 1,
366 Tag::Strikethrough => strikethrough_depth += 1,
367 Tag::Link { dest_url, .. } => {
368 link = Link::identify(
369 self.file_location_directory.clone(),
370 dest_url.to_string(),
371 );
372 }
373 Tag::Image { dest_url, .. } => {
374 if !text.is_empty() {
375 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
376 source_range: source_range.clone(),
377 contents: text.clone(),
378 highlights: highlights.clone(),
379 region_ranges: region_ranges.clone(),
380 regions: regions.clone(),
381 });
382 text = String::new();
383 highlights = vec![];
384 region_ranges = vec![];
385 regions = vec![];
386 markdown_text_like.push(parsed_regions);
387 }
388 image = Image::identify(
389 dest_url.to_string(),
390 source_range.clone(),
391 self.file_location_directory.clone(),
392 );
393 }
394 _ => {
395 break;
396 }
397 },
398
399 Event::End(tag) => match tag {
400 TagEnd::Emphasis => italic_depth -= 1,
401 TagEnd::Strong => bold_depth -= 1,
402 TagEnd::Strikethrough => strikethrough_depth -= 1,
403 TagEnd::Link => {
404 link = None;
405 }
406 TagEnd::Image => {
407 if let Some(mut image) = image.take() {
408 if !text.is_empty() {
409 image.set_alt_text(std::mem::take(&mut text).into());
410 }
411 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
412 }
413 }
414 TagEnd::Paragraph => {
415 self.cursor += 1;
416 break;
417 }
418 _ => {
419 break;
420 }
421 },
422 _ => {
423 break;
424 }
425 }
426
427 self.cursor += 1;
428 }
429 if !text.is_empty() {
430 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
431 source_range,
432 contents: text,
433 highlights,
434 regions,
435 region_ranges,
436 }));
437 }
438 markdown_text_like
439 }
440
441 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
442 let (_event, source_range) = self.previous().unwrap();
443 let source_range = source_range.clone();
444 let text = self.parse_text(true, None);
445
446 // Advance past the heading end tag
447 self.cursor += 1;
448
449 ParsedMarkdownHeading {
450 source_range,
451 level: match level {
452 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
453 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
454 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
455 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
456 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
457 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
458 },
459 contents: text,
460 }
461 }
462
463 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
464 let (_event, source_range) = self.previous().unwrap();
465 let source_range = source_range.clone();
466 let mut header = ParsedMarkdownTableRow::new();
467 let mut body = vec![];
468 let mut current_row = vec![];
469 let mut in_header = true;
470 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
471
472 loop {
473 if self.eof() {
474 break;
475 }
476
477 let (current, source_range) = self.current().unwrap();
478 let source_range = source_range.clone();
479 match current {
480 Event::Start(Tag::TableHead)
481 | Event::Start(Tag::TableRow)
482 | Event::End(TagEnd::TableCell) => {
483 self.cursor += 1;
484 }
485 Event::Start(Tag::TableCell) => {
486 self.cursor += 1;
487 let cell_contents = self.parse_text(false, Some(source_range));
488 current_row.push(cell_contents);
489 }
490 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
491 self.cursor += 1;
492 let new_row = std::mem::take(&mut current_row);
493 if in_header {
494 header.children = new_row;
495 in_header = false;
496 } else {
497 let row = ParsedMarkdownTableRow::with_children(new_row);
498 body.push(row);
499 }
500 }
501 Event::End(TagEnd::Table) => {
502 self.cursor += 1;
503 break;
504 }
505 _ => {
506 break;
507 }
508 }
509 }
510
511 ParsedMarkdownTable {
512 source_range,
513 header,
514 body,
515 column_alignments,
516 }
517 }
518
519 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
520 match alignment {
521 Alignment::None => ParsedMarkdownTableAlignment::None,
522 Alignment::Left => ParsedMarkdownTableAlignment::Left,
523 Alignment::Center => ParsedMarkdownTableAlignment::Center,
524 Alignment::Right => ParsedMarkdownTableAlignment::Right,
525 }
526 }
527
528 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
529 let (_, list_source_range) = self.previous().unwrap();
530
531 let mut items = Vec::new();
532 let mut items_stack = vec![MarkdownListItem::default()];
533 let mut depth = 1;
534 let mut order = order;
535 let mut order_stack = Vec::new();
536
537 let mut insertion_indices = FxHashMap::default();
538 let mut source_ranges = FxHashMap::default();
539 let mut start_item_range = list_source_range.clone();
540
541 while !self.eof() {
542 let (current, source_range) = self.current().unwrap();
543 match current {
544 Event::Start(Tag::List(new_order)) => {
545 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
546 insertion_indices.insert(depth, items.len());
547 }
548
549 // We will use the start of the nested list as the end for the current item's range,
550 // because we don't care about the hierarchy of list items
551 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
552 e.insert(start_item_range.start..source_range.start);
553 }
554
555 order_stack.push(order);
556 order = *new_order;
557 self.cursor += 1;
558 depth += 1;
559 }
560 Event::End(TagEnd::List(_)) => {
561 order = order_stack.pop().flatten();
562 self.cursor += 1;
563 depth -= 1;
564
565 if depth == 0 {
566 break;
567 }
568 }
569 Event::Start(Tag::Item) => {
570 start_item_range = source_range.clone();
571
572 self.cursor += 1;
573 items_stack.push(MarkdownListItem::default());
574
575 let mut task_list = None;
576 // Check for task list marker (`- [ ]` or `- [x]`)
577 if let Some(event) = self.current_event() {
578 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
579 if event == &Event::Start(Tag::Paragraph) {
580 self.cursor += 1;
581 }
582
583 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
584 task_list = Some((*checked, range.clone()));
585 self.cursor += 1;
586 }
587 }
588
589 if let Some((event, range)) = self.current() {
590 // This is a plain list item.
591 // For example `- some text` or `1. [Docs](./docs.md)`
592 if MarkdownParser::is_text_like(event) {
593 let text = self.parse_text(false, Some(range.clone()));
594 let block = ParsedMarkdownElement::Paragraph(text);
595 if let Some(content) = items_stack.last_mut() {
596 let item_type = if let Some((checked, range)) = task_list {
597 ParsedMarkdownListItemType::Task(checked, range)
598 } else if let Some(order) = order {
599 ParsedMarkdownListItemType::Ordered(order)
600 } else {
601 ParsedMarkdownListItemType::Unordered
602 };
603 content.item_type = item_type;
604 content.content.push(block);
605 }
606 } else {
607 let block = self.parse_block().await;
608 if let Some(block) = block
609 && let Some(list_item) = items_stack.last_mut()
610 {
611 list_item.content.extend(block);
612 }
613 }
614 }
615
616 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
617 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
618 self.cursor += 1;
619 }
620 }
621 Event::End(TagEnd::Item) => {
622 self.cursor += 1;
623
624 if let Some(current) = order {
625 order = Some(current + 1);
626 }
627
628 if let Some(list_item) = items_stack.pop() {
629 let source_range = source_ranges
630 .remove(&depth)
631 .unwrap_or(start_item_range.clone());
632
633 // We need to remove the last character of the source range, because it includes the newline character
634 let source_range = source_range.start..source_range.end - 1;
635 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
636 source_range,
637 content: list_item.content,
638 depth,
639 item_type: list_item.item_type,
640 });
641
642 if let Some(index) = insertion_indices.get(&depth) {
643 items.insert(*index, item);
644 insertion_indices.remove(&depth);
645 } else {
646 items.push(item);
647 }
648 }
649 }
650 _ => {
651 if depth == 0 {
652 break;
653 }
654 // This can only happen if a list item starts with more then one paragraph,
655 // or the list item contains blocks that should be rendered after the nested list items
656 let block = self.parse_block().await;
657 if let Some(block) = block {
658 if let Some(list_item) = items_stack.last_mut() {
659 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
660 if !insertion_indices.contains_key(&depth) {
661 list_item.content.extend(block);
662 continue;
663 }
664 }
665
666 // Otherwise we need to insert the block after all the nested items
667 // that have been parsed so far
668 items.extend(block);
669 } else {
670 self.cursor += 1;
671 }
672 }
673 }
674 }
675
676 items
677 }
678
679 #[async_recursion]
680 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
681 let (_event, source_range) = self.previous().unwrap();
682 let source_range = source_range.clone();
683 let mut nested_depth = 1;
684
685 let mut children: Vec<ParsedMarkdownElement> = vec![];
686
687 while !self.eof() {
688 let block = self.parse_block().await;
689
690 if let Some(block) = block {
691 children.extend(block);
692 } else {
693 break;
694 }
695
696 if self.eof() {
697 break;
698 }
699
700 let (current, _source_range) = self.current().unwrap();
701 match current {
702 // This is a nested block quote.
703 // Record that we're in a nested block quote and continue parsing.
704 // We don't need to advance the cursor since the next
705 // call to `parse_block` will handle it.
706 Event::Start(Tag::BlockQuote(_kind)) => {
707 nested_depth += 1;
708 }
709 Event::End(TagEnd::BlockQuote(_kind)) => {
710 nested_depth -= 1;
711 if nested_depth == 0 {
712 self.cursor += 1;
713 break;
714 }
715 }
716 _ => {}
717 };
718 }
719
720 ParsedMarkdownBlockQuote {
721 source_range,
722 children,
723 }
724 }
725
726 async fn parse_code_block(
727 &mut self,
728 language: Option<String>,
729 ) -> Option<ParsedMarkdownCodeBlock> {
730 let Some((_event, source_range)) = self.previous() else {
731 return None;
732 };
733
734 let source_range = source_range.clone();
735 let mut code = String::new();
736
737 while !self.eof() {
738 let Some((current, _source_range)) = self.current() else {
739 break;
740 };
741
742 match current {
743 Event::Text(text) => {
744 code.push_str(text);
745 self.cursor += 1;
746 }
747 Event::End(TagEnd::CodeBlock) => {
748 self.cursor += 1;
749 break;
750 }
751 _ => {
752 break;
753 }
754 }
755 }
756
757 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
758
759 let highlights = if let Some(language) = &language {
760 if let Some(registry) = &self.language_registry {
761 let rope: language::Rope = code.as_str().into();
762 registry
763 .language_for_name_or_extension(language)
764 .await
765 .map(|l| l.highlight_text(&rope, 0..code.len()))
766 .ok()
767 } else {
768 None
769 }
770 } else {
771 None
772 };
773
774 Some(ParsedMarkdownCodeBlock {
775 source_range,
776 contents: code.into(),
777 language,
778 highlights,
779 })
780 }
781
782 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
783 let mut elements = Vec::new();
784 let Some((_event, _source_range)) = self.previous() else {
785 return elements;
786 };
787
788 let mut html_source_range_start = None;
789 let mut html_source_range_end = None;
790 let mut html_buffer = String::new();
791
792 while !self.eof() {
793 let Some((current, source_range)) = self.current() else {
794 break;
795 };
796 let source_range = source_range.clone();
797 match current {
798 Event::Html(html) => {
799 html_source_range_start.get_or_insert(source_range.start);
800 html_source_range_end = Some(source_range.end);
801 html_buffer.push_str(html);
802 self.cursor += 1;
803 }
804 Event::End(TagEnd::CodeBlock) => {
805 self.cursor += 1;
806 break;
807 }
808 _ => {
809 break;
810 }
811 }
812 }
813
814 let bytes = cleanup_html(&html_buffer);
815
816 let mut cursor = std::io::Cursor::new(bytes);
817 if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
818 .from_utf8()
819 .read_from(&mut cursor)
820 && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
821 {
822 self.parse_html_node(start..end, &dom.document, &mut elements);
823 }
824
825 elements
826 }
827
828 fn parse_html_node(
829 &self,
830 source_range: Range<usize>,
831 node: &Rc<markup5ever_rcdom::Node>,
832 elements: &mut Vec<ParsedMarkdownElement>,
833 ) {
834 match &node.data {
835 markup5ever_rcdom::NodeData::Document => {
836 self.consume_children(source_range, node, elements);
837 }
838 markup5ever_rcdom::NodeData::Doctype { .. } => {}
839 markup5ever_rcdom::NodeData::Text { contents } => {
840 elements.push(ParsedMarkdownElement::Paragraph(vec![
841 MarkdownParagraphChunk::Text(ParsedMarkdownText {
842 source_range,
843 contents: contents.borrow().to_string(),
844 highlights: Vec::default(),
845 region_ranges: Vec::default(),
846 regions: Vec::default(),
847 }),
848 ]));
849 }
850 markup5ever_rcdom::NodeData::Comment { .. } => {}
851 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
852 if local_name!("img") == name.local {
853 if let Some(image) = self.extract_image(source_range, attrs) {
854 elements.push(ParsedMarkdownElement::Image(image));
855 }
856 } else if matches!(
857 name.local,
858 local_name!("h1")
859 | local_name!("h2")
860 | local_name!("h3")
861 | local_name!("h4")
862 | local_name!("h5")
863 | local_name!("h6")
864 ) {
865 let mut paragraph = MarkdownParagraph::new();
866 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
867
868 if !paragraph.is_empty() {
869 elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
870 source_range,
871 level: match name.local {
872 local_name!("h1") => HeadingLevel::H1,
873 local_name!("h2") => HeadingLevel::H2,
874 local_name!("h3") => HeadingLevel::H3,
875 local_name!("h4") => HeadingLevel::H4,
876 local_name!("h5") => HeadingLevel::H5,
877 local_name!("h6") => HeadingLevel::H6,
878 _ => unreachable!(),
879 },
880 contents: paragraph,
881 }));
882 }
883 } else if local_name!("table") == name.local {
884 if let Some(table) = self.extract_html_table(node, source_range) {
885 elements.push(ParsedMarkdownElement::Table(table));
886 }
887 } else {
888 self.consume_children(source_range, node, elements);
889 }
890 }
891 markup5ever_rcdom::NodeData::ProcessingInstruction { .. } => {}
892 }
893 }
894
895 fn parse_paragraph(
896 &self,
897 source_range: Range<usize>,
898 node: &Rc<markup5ever_rcdom::Node>,
899 paragraph: &mut MarkdownParagraph,
900 ) {
901 match &node.data {
902 markup5ever_rcdom::NodeData::Text { contents } => {
903 paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
904 source_range,
905 regions: Vec::default(),
906 contents: contents.borrow().to_string(),
907 region_ranges: Vec::default(),
908 highlights: Vec::default(),
909 }));
910 }
911 markup5ever_rcdom::NodeData::Element { .. } => {
912 self.consume_paragraph(source_range, node, paragraph);
913 }
914 _ => {}
915 }
916 }
917
918 fn consume_paragraph(
919 &self,
920 source_range: Range<usize>,
921 node: &Rc<markup5ever_rcdom::Node>,
922 paragraph: &mut MarkdownParagraph,
923 ) {
924 for node in node.children.borrow().iter() {
925 self.parse_paragraph(source_range.clone(), node, paragraph);
926 }
927 }
928
929 fn consume_children(
930 &self,
931 source_range: Range<usize>,
932 node: &Rc<markup5ever_rcdom::Node>,
933 elements: &mut Vec<ParsedMarkdownElement>,
934 ) {
935 for node in node.children.borrow().iter() {
936 self.parse_html_node(source_range.clone(), node, elements);
937 }
938 }
939
940 fn attr_value(
941 attrs: &RefCell<Vec<html5ever::Attribute>>,
942 name: html5ever::LocalName,
943 ) -> Option<String> {
944 attrs.borrow().iter().find_map(|attr| {
945 if attr.name.local == name {
946 Some(attr.value.to_string())
947 } else {
948 None
949 }
950 })
951 }
952
953 fn extract_styles_from_attributes(
954 attrs: &RefCell<Vec<html5ever::Attribute>>,
955 ) -> HashMap<String, String> {
956 let mut styles = HashMap::new();
957
958 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
959 for decl in style.split(';') {
960 let mut parts = decl.splitn(2, ':');
961 if let Some((key, value)) = parts.next().zip(parts.next()) {
962 styles.insert(
963 key.trim().to_lowercase().to_string(),
964 value.trim().to_string(),
965 );
966 }
967 }
968 }
969
970 styles
971 }
972
973 fn extract_image(
974 &self,
975 source_range: Range<usize>,
976 attrs: &RefCell<Vec<html5ever::Attribute>>,
977 ) -> Option<Image> {
978 let src = Self::attr_value(attrs, local_name!("src"))?;
979
980 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
981
982 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
983 image.set_alt_text(alt.into());
984 }
985
986 let styles = Self::extract_styles_from_attributes(attrs);
987
988 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
989 .or_else(|| styles.get("width").cloned())
990 .and_then(|width| Self::parse_length(&width))
991 {
992 image.set_width(width);
993 }
994
995 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
996 .or_else(|| styles.get("height").cloned())
997 .and_then(|height| Self::parse_length(&height))
998 {
999 image.set_height(height);
1000 }
1001
1002 Some(image)
1003 }
1004
1005 fn extract_html_table(
1006 &self,
1007 node: &Rc<markup5ever_rcdom::Node>,
1008 source_range: Range<usize>,
1009 ) -> Option<ParsedMarkdownTable> {
1010 let mut header_columns = Vec::new();
1011 let mut body_rows = Vec::new();
1012
1013 // node should be a thead or tbody element
1014 for node in node.children.borrow().iter() {
1015 match &node.data {
1016 markup5ever_rcdom::NodeData::Element { name, .. } => {
1017 if local_name!("thead") == name.local {
1018 // node should be a tr element
1019 for node in node.children.borrow().iter() {
1020 let mut paragraph = MarkdownParagraph::new();
1021 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
1022
1023 for paragraph in paragraph.into_iter() {
1024 header_columns.push(vec![paragraph]);
1025 }
1026 }
1027 } else if local_name!("tbody") == name.local {
1028 // node should be a tr element
1029 for node in node.children.borrow().iter() {
1030 let mut row = MarkdownParagraph::new();
1031 self.consume_paragraph(source_range.clone(), node, &mut row);
1032 body_rows.push(ParsedMarkdownTableRow::with_children(
1033 row.into_iter().map(|column| vec![column]).collect(),
1034 ));
1035 }
1036 }
1037 }
1038 _ => {}
1039 }
1040 }
1041
1042 if !header_columns.is_empty() || !body_rows.is_empty() {
1043 Some(ParsedMarkdownTable {
1044 source_range,
1045 body: body_rows,
1046 column_alignments: Vec::default(),
1047 header: ParsedMarkdownTableRow::with_children(header_columns),
1048 })
1049 } else {
1050 None
1051 }
1052 }
1053
1054 /// Parses the width/height attribute value of an html element (e.g. img element)
1055 fn parse_length(value: &str) -> Option<DefiniteLength> {
1056 if value.ends_with("%") {
1057 value
1058 .trim_end_matches("%")
1059 .parse::<f32>()
1060 .ok()
1061 .map(|value| relative(value / 100.))
1062 } else {
1063 value
1064 .trim_end_matches("px")
1065 .parse()
1066 .ok()
1067 .map(|value| px(value).into())
1068 }
1069 }
1070}
1071
1072#[cfg(test)]
1073mod tests {
1074 use super::*;
1075 use ParsedMarkdownListItemType::*;
1076 use core::panic;
1077 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1078 use language::{
1079 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1080 };
1081 use pretty_assertions::assert_eq;
1082
1083 async fn parse(input: &str) -> ParsedMarkdown {
1084 parse_markdown(input, None, None).await
1085 }
1086
1087 #[gpui::test]
1088 async fn test_headings() {
1089 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1090
1091 assert_eq!(
1092 parsed.children,
1093 vec![
1094 h1(text("Heading one", 2..13), 0..14),
1095 h2(text("Heading two", 17..28), 14..29),
1096 h3(text("Heading three", 33..46), 29..46),
1097 ]
1098 );
1099 }
1100
1101 #[gpui::test]
1102 async fn test_newlines_dont_new_paragraphs() {
1103 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1104
1105 assert_eq!(
1106 parsed.children,
1107 vec![p("Some text that is bolded and italicized", 0..46)]
1108 );
1109 }
1110
1111 #[gpui::test]
1112 async fn test_heading_with_paragraph() {
1113 let parsed = parse("# Zed\nThe editor").await;
1114
1115 assert_eq!(
1116 parsed.children,
1117 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1118 );
1119 }
1120
1121 #[gpui::test]
1122 async fn test_double_newlines_do_new_paragraphs() {
1123 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1124
1125 assert_eq!(
1126 parsed.children,
1127 vec![
1128 p("Some text that is bolded", 0..29),
1129 p("and italicized", 31..47),
1130 ]
1131 );
1132 }
1133
1134 #[gpui::test]
1135 async fn test_bold_italic_text() {
1136 let parsed = parse("Some text **that is bolded** and *italicized*").await;
1137
1138 assert_eq!(
1139 parsed.children,
1140 vec![p("Some text that is bolded and italicized", 0..45)]
1141 );
1142 }
1143
1144 #[gpui::test]
1145 async fn test_nested_bold_strikethrough_text() {
1146 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1147
1148 assert_eq!(parsed.children.len(), 1);
1149 assert_eq!(
1150 parsed.children[0],
1151 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1152 ParsedMarkdownText {
1153 source_range: 0..35,
1154 contents: "Some bostrikethroughld text".to_string(),
1155 highlights: Vec::new(),
1156 region_ranges: Vec::new(),
1157 regions: Vec::new(),
1158 }
1159 )])
1160 );
1161
1162 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1163 text
1164 } else {
1165 panic!("Expected a paragraph");
1166 };
1167
1168 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1169 text
1170 } else {
1171 panic!("Expected a text");
1172 };
1173
1174 assert_eq!(
1175 paragraph.highlights,
1176 vec![
1177 (
1178 5..7,
1179 MarkdownHighlight::Style(MarkdownHighlightStyle {
1180 weight: FontWeight::BOLD,
1181 ..Default::default()
1182 }),
1183 ),
1184 (
1185 7..20,
1186 MarkdownHighlight::Style(MarkdownHighlightStyle {
1187 weight: FontWeight::BOLD,
1188 strikethrough: true,
1189 ..Default::default()
1190 }),
1191 ),
1192 (
1193 20..22,
1194 MarkdownHighlight::Style(MarkdownHighlightStyle {
1195 weight: FontWeight::BOLD,
1196 ..Default::default()
1197 }),
1198 ),
1199 ]
1200 );
1201 }
1202
1203 #[gpui::test]
1204 async fn test_text_with_inline_html() {
1205 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1206
1207 assert_eq!(
1208 parsed.children,
1209 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1210 );
1211 }
1212
1213 #[gpui::test]
1214 async fn test_raw_links_detection() {
1215 let parsed = parse("Checkout this https://zed.dev link").await;
1216
1217 assert_eq!(
1218 parsed.children,
1219 vec![p("Checkout this https://zed.dev link", 0..34)]
1220 );
1221 }
1222
1223 #[gpui::test]
1224 async fn test_empty_image() {
1225 let parsed = parse("![]()").await;
1226
1227 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1228 text
1229 } else {
1230 panic!("Expected a paragraph");
1231 };
1232 assert_eq!(paragraph.len(), 0);
1233 }
1234
1235 #[gpui::test]
1236 async fn test_image_links_detection() {
1237 let parsed = parse("").await;
1238
1239 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1240 text
1241 } else {
1242 panic!("Expected a paragraph");
1243 };
1244 assert_eq!(
1245 paragraph[0],
1246 MarkdownParagraphChunk::Image(Image {
1247 source_range: 0..111,
1248 link: Link::Web {
1249 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1250 },
1251 alt_text: Some("test".into()),
1252 height: None,
1253 width: None,
1254 },)
1255 );
1256 }
1257
1258 #[gpui::test]
1259 async fn test_image_without_alt_text() {
1260 let parsed = parse("").await;
1261
1262 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1263 text
1264 } else {
1265 panic!("Expected a paragraph");
1266 };
1267 assert_eq!(
1268 paragraph[0],
1269 MarkdownParagraphChunk::Image(Image {
1270 source_range: 0..31,
1271 link: Link::Web {
1272 url: "http://example.com/foo.png".to_string(),
1273 },
1274 alt_text: None,
1275 height: None,
1276 width: None,
1277 },)
1278 );
1279 }
1280
1281 #[gpui::test]
1282 async fn test_image_with_alt_text_containing_formatting() {
1283 let parsed = parse("").await;
1284
1285 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1286 panic!("Expected a paragraph");
1287 };
1288 assert_eq!(
1289 chunks,
1290 &[MarkdownParagraphChunk::Image(Image {
1291 source_range: 0..44,
1292 link: Link::Web {
1293 url: "http://example.com/foo.png".to_string(),
1294 },
1295 alt_text: Some("foo bar baz".into()),
1296 height: None,
1297 width: None,
1298 }),],
1299 );
1300 }
1301
1302 #[gpui::test]
1303 async fn test_images_with_text_in_between() {
1304 let parsed = parse(
1305 "\nLorem Ipsum\n",
1306 )
1307 .await;
1308
1309 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1310 text
1311 } else {
1312 panic!("Expected a paragraph");
1313 };
1314 assert_eq!(
1315 chunks,
1316 &vec![
1317 MarkdownParagraphChunk::Image(Image {
1318 source_range: 0..81,
1319 link: Link::Web {
1320 url: "http://example.com/foo.png".to_string(),
1321 },
1322 alt_text: Some("foo".into()),
1323 height: None,
1324 width: None,
1325 }),
1326 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1327 source_range: 0..81,
1328 contents: " Lorem Ipsum ".to_string(),
1329 highlights: Vec::new(),
1330 region_ranges: Vec::new(),
1331 regions: Vec::new(),
1332 }),
1333 MarkdownParagraphChunk::Image(Image {
1334 source_range: 0..81,
1335 link: Link::Web {
1336 url: "http://example.com/bar.png".to_string(),
1337 },
1338 alt_text: Some("bar".into()),
1339 height: None,
1340 width: None,
1341 })
1342 ]
1343 );
1344 }
1345
1346 #[test]
1347 fn test_parse_length() {
1348 // Test percentage values
1349 assert_eq!(
1350 MarkdownParser::parse_length("50%"),
1351 Some(DefiniteLength::Fraction(0.5))
1352 );
1353 assert_eq!(
1354 MarkdownParser::parse_length("100%"),
1355 Some(DefiniteLength::Fraction(1.0))
1356 );
1357 assert_eq!(
1358 MarkdownParser::parse_length("25%"),
1359 Some(DefiniteLength::Fraction(0.25))
1360 );
1361 assert_eq!(
1362 MarkdownParser::parse_length("0%"),
1363 Some(DefiniteLength::Fraction(0.0))
1364 );
1365
1366 // Test pixel values
1367 assert_eq!(
1368 MarkdownParser::parse_length("100px"),
1369 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1370 );
1371 assert_eq!(
1372 MarkdownParser::parse_length("50px"),
1373 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1374 );
1375 assert_eq!(
1376 MarkdownParser::parse_length("0px"),
1377 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1378 );
1379
1380 // Test values without units (should be treated as pixels)
1381 assert_eq!(
1382 MarkdownParser::parse_length("100"),
1383 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1384 );
1385 assert_eq!(
1386 MarkdownParser::parse_length("42"),
1387 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1388 );
1389
1390 // Test invalid values
1391 assert_eq!(MarkdownParser::parse_length("invalid"), None);
1392 assert_eq!(MarkdownParser::parse_length("px"), None);
1393 assert_eq!(MarkdownParser::parse_length("%"), None);
1394 assert_eq!(MarkdownParser::parse_length(""), None);
1395 assert_eq!(MarkdownParser::parse_length("abc%"), None);
1396 assert_eq!(MarkdownParser::parse_length("abcpx"), None);
1397
1398 // Test decimal values
1399 assert_eq!(
1400 MarkdownParser::parse_length("50.5%"),
1401 Some(DefiniteLength::Fraction(0.505))
1402 );
1403 assert_eq!(
1404 MarkdownParser::parse_length("100.25px"),
1405 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1406 );
1407 assert_eq!(
1408 MarkdownParser::parse_length("42.0"),
1409 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1410 );
1411 }
1412
1413 #[gpui::test]
1414 async fn test_html_table() {
1415 let parsed = parse(
1416 "<table>
1417 <thead>
1418 <tr>
1419 <th>Id</th>
1420 <th>Name</th>
1421 </tr>
1422 </thead>
1423 <tbody>
1424 <tr>
1425 <td>1</td>
1426 <td>Chris</td>
1427 </tr>
1428 <tr>
1429 <td>2</td>
1430 <td>Dennis</td>
1431 </tr>
1432 </tbody>
1433 </table>",
1434 )
1435 .await;
1436
1437 assert_eq!(
1438 ParsedMarkdown {
1439 children: vec![ParsedMarkdownElement::Table(table(
1440 0..366,
1441 row(vec![text("Id", 0..366), text("Name ", 0..366)]),
1442 vec![
1443 row(vec![text("1", 0..366), text("Chris", 0..366)]),
1444 row(vec![text("2", 0..366), text("Dennis", 0..366)]),
1445 ],
1446 ))],
1447 },
1448 parsed
1449 );
1450 }
1451
1452 #[gpui::test]
1453 async fn test_html_table_without_headings() {
1454 let parsed = parse(
1455 "<table>
1456 <tbody>
1457 <tr>
1458 <td>1</td>
1459 <td>Chris</td>
1460 </tr>
1461 <tr>
1462 <td>2</td>
1463 <td>Dennis</td>
1464 </tr>
1465 </tbody>
1466 </table>",
1467 )
1468 .await;
1469
1470 assert_eq!(
1471 ParsedMarkdown {
1472 children: vec![ParsedMarkdownElement::Table(table(
1473 0..240,
1474 row(vec![]),
1475 vec![
1476 row(vec![text("1", 0..240), text("Chris", 0..240)]),
1477 row(vec![text("2", 0..240), text("Dennis", 0..240)]),
1478 ],
1479 ))],
1480 },
1481 parsed
1482 );
1483 }
1484
1485 #[gpui::test]
1486 async fn test_html_table_without_body() {
1487 let parsed = parse(
1488 "<table>
1489 <thead>
1490 <tr>
1491 <th>Id</th>
1492 <th>Name</th>
1493 </tr>
1494 </thead>
1495 </table>",
1496 )
1497 .await;
1498
1499 assert_eq!(
1500 ParsedMarkdown {
1501 children: vec![ParsedMarkdownElement::Table(table(
1502 0..150,
1503 row(vec![text("Id", 0..150), text("Name", 0..150)]),
1504 vec![],
1505 ))],
1506 },
1507 parsed
1508 );
1509 }
1510
1511 #[gpui::test]
1512 async fn test_html_heading_tags() {
1513 let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
1514
1515 assert_eq!(
1516 ParsedMarkdown {
1517 children: vec![
1518 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1519 level: HeadingLevel::H1,
1520 source_range: 0..96,
1521 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1522 source_range: 0..96,
1523 contents: "Heading".into(),
1524 highlights: Vec::default(),
1525 region_ranges: Vec::default(),
1526 regions: Vec::default()
1527 })],
1528 }),
1529 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1530 level: HeadingLevel::H2,
1531 source_range: 0..96,
1532 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1533 source_range: 0..96,
1534 contents: "Heading".into(),
1535 highlights: Vec::default(),
1536 region_ranges: Vec::default(),
1537 regions: Vec::default()
1538 })],
1539 }),
1540 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1541 level: HeadingLevel::H3,
1542 source_range: 0..96,
1543 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1544 source_range: 0..96,
1545 contents: "Heading".into(),
1546 highlights: Vec::default(),
1547 region_ranges: Vec::default(),
1548 regions: Vec::default()
1549 })],
1550 }),
1551 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1552 level: HeadingLevel::H4,
1553 source_range: 0..96,
1554 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1555 source_range: 0..96,
1556 contents: "Heading".into(),
1557 highlights: Vec::default(),
1558 region_ranges: Vec::default(),
1559 regions: Vec::default()
1560 })],
1561 }),
1562 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1563 level: HeadingLevel::H5,
1564 source_range: 0..96,
1565 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1566 source_range: 0..96,
1567 contents: "Heading".into(),
1568 highlights: Vec::default(),
1569 region_ranges: Vec::default(),
1570 regions: Vec::default()
1571 })],
1572 }),
1573 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1574 level: HeadingLevel::H6,
1575 source_range: 0..96,
1576 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1577 source_range: 0..96,
1578 contents: "Heading".into(),
1579 highlights: Vec::default(),
1580 region_ranges: Vec::default(),
1581 regions: Vec::default()
1582 })],
1583 }),
1584 ],
1585 },
1586 parsed
1587 );
1588 }
1589
1590 #[gpui::test]
1591 async fn test_html_image_tag() {
1592 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
1593
1594 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1595 panic!("Expected a image element");
1596 };
1597 assert_eq!(
1598 image.clone(),
1599 Image {
1600 source_range: 0..40,
1601 link: Link::Web {
1602 url: "http://example.com/foo.png".to_string(),
1603 },
1604 alt_text: None,
1605 height: None,
1606 width: None,
1607 },
1608 );
1609 }
1610
1611 #[gpui::test]
1612 async fn test_html_image_tag_with_alt_text() {
1613 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
1614
1615 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1616 panic!("Expected a image element");
1617 };
1618 assert_eq!(
1619 image.clone(),
1620 Image {
1621 source_range: 0..50,
1622 link: Link::Web {
1623 url: "http://example.com/foo.png".to_string(),
1624 },
1625 alt_text: Some("Foo".into()),
1626 height: None,
1627 width: None,
1628 },
1629 );
1630 }
1631
1632 #[gpui::test]
1633 async fn test_html_image_tag_with_height_and_width() {
1634 let parsed =
1635 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
1636
1637 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1638 panic!("Expected a image element");
1639 };
1640 assert_eq!(
1641 image.clone(),
1642 Image {
1643 source_range: 0..65,
1644 link: Link::Web {
1645 url: "http://example.com/foo.png".to_string(),
1646 },
1647 alt_text: None,
1648 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1649 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1650 },
1651 );
1652 }
1653
1654 #[gpui::test]
1655 async fn test_html_image_style_tag_with_height_and_width() {
1656 let parsed = parse(
1657 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
1658 )
1659 .await;
1660
1661 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1662 panic!("Expected a image element");
1663 };
1664 assert_eq!(
1665 image.clone(),
1666 Image {
1667 source_range: 0..75,
1668 link: Link::Web {
1669 url: "http://example.com/foo.png".to_string(),
1670 },
1671 alt_text: None,
1672 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1673 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1674 },
1675 );
1676 }
1677
1678 #[gpui::test]
1679 async fn test_header_only_table() {
1680 let markdown = "\
1681| Header 1 | Header 2 |
1682|----------|----------|
1683
1684Some other content
1685";
1686
1687 let expected_table = table(
1688 0..48,
1689 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1690 vec![],
1691 );
1692
1693 assert_eq!(
1694 parse(markdown).await.children[0],
1695 ParsedMarkdownElement::Table(expected_table)
1696 );
1697 }
1698
1699 #[gpui::test]
1700 async fn test_basic_table() {
1701 let markdown = "\
1702| Header 1 | Header 2 |
1703|----------|----------|
1704| Cell 1 | Cell 2 |
1705| Cell 3 | Cell 4 |";
1706
1707 let expected_table = table(
1708 0..95,
1709 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1710 vec![
1711 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1712 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1713 ],
1714 );
1715
1716 assert_eq!(
1717 parse(markdown).await.children[0],
1718 ParsedMarkdownElement::Table(expected_table)
1719 );
1720 }
1721
1722 #[gpui::test]
1723 async fn test_list_basic() {
1724 let parsed = parse(
1725 "\
1726* Item 1
1727* Item 2
1728* Item 3
1729",
1730 )
1731 .await;
1732
1733 assert_eq!(
1734 parsed.children,
1735 vec![
1736 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1737 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1738 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1739 ],
1740 );
1741 }
1742
1743 #[gpui::test]
1744 async fn test_list_with_tasks() {
1745 let parsed = parse(
1746 "\
1747- [ ] TODO
1748- [x] Checked
1749",
1750 )
1751 .await;
1752
1753 assert_eq!(
1754 parsed.children,
1755 vec![
1756 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1757 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1758 ],
1759 );
1760 }
1761
1762 #[gpui::test]
1763 async fn test_list_with_indented_task() {
1764 let parsed = parse(
1765 "\
1766- [ ] TODO
1767 - [x] Checked
1768 - Unordered
1769 1. Number 1
1770 1. Number 2
17711. Number A
1772",
1773 )
1774 .await;
1775
1776 assert_eq!(
1777 parsed.children,
1778 vec![
1779 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1780 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1781 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1782 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1783 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1784 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1785 ],
1786 );
1787 }
1788
1789 #[gpui::test]
1790 async fn test_list_with_linebreak_is_handled_correctly() {
1791 let parsed = parse(
1792 "\
1793- [ ] Task 1
1794
1795- [x] Task 2
1796",
1797 )
1798 .await;
1799
1800 assert_eq!(
1801 parsed.children,
1802 vec![
1803 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1804 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1805 ],
1806 );
1807 }
1808
1809 #[gpui::test]
1810 async fn test_list_nested() {
1811 let parsed = parse(
1812 "\
1813* Item 1
1814* Item 2
1815* Item 3
1816
18171. Hello
18181. Two
1819 1. Three
18202. Four
18213. Five
1822
1823* First
1824 1. Hello
1825 1. Goodbyte
1826 - Inner
1827 - Inner
1828 2. Goodbyte
1829 - Next item empty
1830 -
1831* Last
1832",
1833 )
1834 .await;
1835
1836 assert_eq!(
1837 parsed.children,
1838 vec![
1839 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1840 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1841 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1842 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1843 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1844 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1845 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1846 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1847 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1848 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1849 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1850 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1851 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1852 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1853 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1854 list_item(186..190, 3, Unordered, vec![]),
1855 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1856 ]
1857 );
1858 }
1859
1860 #[gpui::test]
1861 async fn test_list_with_nested_content() {
1862 let parsed = parse(
1863 "\
1864* This is a list item with two paragraphs.
1865
1866 This is the second paragraph in the list item.
1867",
1868 )
1869 .await;
1870
1871 assert_eq!(
1872 parsed.children,
1873 vec![list_item(
1874 0..96,
1875 1,
1876 Unordered,
1877 vec![
1878 p("This is a list item with two paragraphs.", 4..44),
1879 p("This is the second paragraph in the list item.", 50..97)
1880 ],
1881 ),],
1882 );
1883 }
1884
1885 #[gpui::test]
1886 async fn test_list_item_with_inline_html() {
1887 let parsed = parse(
1888 "\
1889* This is a list item with an inline HTML <sometag>tag</sometag>.
1890",
1891 )
1892 .await;
1893
1894 assert_eq!(
1895 parsed.children,
1896 vec![list_item(
1897 0..67,
1898 1,
1899 Unordered,
1900 vec![p("This is a list item with an inline HTML tag.", 4..44),],
1901 ),],
1902 );
1903 }
1904
1905 #[gpui::test]
1906 async fn test_nested_list_with_paragraph_inside() {
1907 let parsed = parse(
1908 "\
19091. a
1910 1. b
1911 1. c
1912
1913 text
1914
1915 1. d
1916",
1917 )
1918 .await;
1919
1920 assert_eq!(
1921 parsed.children,
1922 vec![
1923 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
1924 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
1925 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
1926 p("text", 32..37),
1927 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
1928 ],
1929 );
1930 }
1931
1932 #[gpui::test]
1933 async fn test_list_with_leading_text() {
1934 let parsed = parse(
1935 "\
1936* `code`
1937* **bold**
1938* [link](https://example.com)
1939",
1940 )
1941 .await;
1942
1943 assert_eq!(
1944 parsed.children,
1945 vec![
1946 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
1947 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
1948 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
1949 ],
1950 );
1951 }
1952
1953 #[gpui::test]
1954 async fn test_simple_block_quote() {
1955 let parsed = parse("> Simple block quote with **styled text**").await;
1956
1957 assert_eq!(
1958 parsed.children,
1959 vec![block_quote(
1960 vec![p("Simple block quote with styled text", 2..41)],
1961 0..41
1962 )]
1963 );
1964 }
1965
1966 #[gpui::test]
1967 async fn test_simple_block_quote_with_multiple_lines() {
1968 let parsed = parse(
1969 "\
1970> # Heading
1971> More
1972> text
1973>
1974> More text
1975",
1976 )
1977 .await;
1978
1979 assert_eq!(
1980 parsed.children,
1981 vec![block_quote(
1982 vec![
1983 h1(text("Heading", 4..11), 2..12),
1984 p("More text", 14..26),
1985 p("More text", 30..40)
1986 ],
1987 0..40
1988 )]
1989 );
1990 }
1991
1992 #[gpui::test]
1993 async fn test_nested_block_quote() {
1994 let parsed = parse(
1995 "\
1996> A
1997>
1998> > # B
1999>
2000> C
2001
2002More text
2003",
2004 )
2005 .await;
2006
2007 assert_eq!(
2008 parsed.children,
2009 vec![
2010 block_quote(
2011 vec![
2012 p("A", 2..4),
2013 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
2014 p("C", 18..20)
2015 ],
2016 0..20
2017 ),
2018 p("More text", 21..31)
2019 ]
2020 );
2021 }
2022
2023 #[gpui::test]
2024 async fn test_code_block() {
2025 let parsed = parse(
2026 "\
2027```
2028fn main() {
2029 return 0;
2030}
2031```
2032",
2033 )
2034 .await;
2035
2036 assert_eq!(
2037 parsed.children,
2038 vec![code_block(
2039 None,
2040 "fn main() {\n return 0;\n}",
2041 0..35,
2042 None
2043 )]
2044 );
2045 }
2046
2047 #[gpui::test]
2048 async fn test_code_block_with_language(executor: BackgroundExecutor) {
2049 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
2050 language_registry.add(rust_lang());
2051
2052 let parsed = parse_markdown(
2053 "\
2054```rust
2055fn main() {
2056 return 0;
2057}
2058```
2059",
2060 None,
2061 Some(language_registry),
2062 )
2063 .await;
2064
2065 assert_eq!(
2066 parsed.children,
2067 vec![code_block(
2068 Some("rust".to_string()),
2069 "fn main() {\n return 0;\n}",
2070 0..39,
2071 Some(vec![])
2072 )]
2073 );
2074 }
2075
2076 fn rust_lang() -> Arc<Language> {
2077 Arc::new(Language::new(
2078 LanguageConfig {
2079 name: "Rust".into(),
2080 matcher: LanguageMatcher {
2081 path_suffixes: vec!["rs".into()],
2082 ..Default::default()
2083 },
2084 collapsed_placeholder: " /* ... */ ".to_string(),
2085 ..Default::default()
2086 },
2087 Some(tree_sitter_rust::LANGUAGE.into()),
2088 ))
2089 }
2090
2091 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2092 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2093 source_range,
2094 level: HeadingLevel::H1,
2095 contents,
2096 })
2097 }
2098
2099 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2100 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2101 source_range,
2102 level: HeadingLevel::H2,
2103 contents,
2104 })
2105 }
2106
2107 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2108 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2109 source_range,
2110 level: HeadingLevel::H3,
2111 contents,
2112 })
2113 }
2114
2115 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
2116 ParsedMarkdownElement::Paragraph(text(contents, source_range))
2117 }
2118
2119 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
2120 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2121 highlights: Vec::new(),
2122 region_ranges: Vec::new(),
2123 regions: Vec::new(),
2124 source_range,
2125 contents: contents.to_string(),
2126 })]
2127 }
2128
2129 fn block_quote(
2130 children: Vec<ParsedMarkdownElement>,
2131 source_range: Range<usize>,
2132 ) -> ParsedMarkdownElement {
2133 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
2134 source_range,
2135 children,
2136 })
2137 }
2138
2139 fn code_block(
2140 language: Option<String>,
2141 code: &str,
2142 source_range: Range<usize>,
2143 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
2144 ) -> ParsedMarkdownElement {
2145 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
2146 source_range,
2147 language,
2148 contents: code.to_string().into(),
2149 highlights,
2150 })
2151 }
2152
2153 fn list_item(
2154 source_range: Range<usize>,
2155 depth: u16,
2156 item_type: ParsedMarkdownListItemType,
2157 content: Vec<ParsedMarkdownElement>,
2158 ) -> ParsedMarkdownElement {
2159 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
2160 source_range,
2161 item_type,
2162 depth,
2163 content,
2164 })
2165 }
2166
2167 fn table(
2168 source_range: Range<usize>,
2169 header: ParsedMarkdownTableRow,
2170 body: Vec<ParsedMarkdownTableRow>,
2171 ) -> ParsedMarkdownTable {
2172 ParsedMarkdownTable {
2173 column_alignments: Vec::new(),
2174 source_range,
2175 header,
2176 body,
2177 }
2178 }
2179
2180 fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
2181 ParsedMarkdownTableRow { children }
2182 }
2183
2184 impl PartialEq for ParsedMarkdownTable {
2185 fn eq(&self, other: &Self) -> bool {
2186 self.source_range == other.source_range
2187 && self.header == other.header
2188 && self.body == other.body
2189 }
2190 }
2191
2192 impl PartialEq for ParsedMarkdownText {
2193 fn eq(&self, other: &Self) -> bool {
2194 self.source_range == other.source_range && self.contents == other.contents
2195 }
2196 }
2197}