1use crate::{
2 markdown_elements::*,
3 markdown_minifier::{Minifier, MinifierOptions},
4};
5use async_recursion::async_recursion;
6use collections::FxHashMap;
7use gpui::{DefiniteLength, FontWeight, px, relative};
8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
9use language::LanguageRegistry;
10use markup5ever_rcdom::RcDom;
11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
12use std::{cell::RefCell, collections::HashMap, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec};
13
14pub async fn parse_markdown(
15 markdown_input: &str,
16 file_location_directory: Option<PathBuf>,
17 language_registry: Option<Arc<LanguageRegistry>>,
18) -> ParsedMarkdown {
19 let mut options = Options::all();
20 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
21
22 let parser = Parser::new_ext(markdown_input, options);
23 let parser = MarkdownParser::new(
24 parser.into_offset_iter().collect(),
25 file_location_directory,
26 language_registry,
27 );
28 let renderer = parser.parse_document().await;
29 ParsedMarkdown {
30 children: renderer.parsed,
31 }
32}
33
34fn cleanup_html(source: &str) -> Vec<u8> {
35 let mut writer = std::io::Cursor::new(Vec::new());
36 let mut reader = std::io::Cursor::new(source);
37 let mut minify = Minifier::new(
38 &mut writer,
39 MinifierOptions {
40 omit_doctype: true,
41 collapse_whitespace: true,
42 ..Default::default()
43 },
44 );
45 if let Ok(()) = minify.minify(&mut reader) {
46 writer.into_inner()
47 } else {
48 source.bytes().collect()
49 }
50}
51
52struct MarkdownParser<'a> {
53 tokens: Vec<(Event<'a>, Range<usize>)>,
54 /// The current index in the tokens array
55 cursor: usize,
56 /// The blocks that we have successfully parsed so far
57 parsed: Vec<ParsedMarkdownElement>,
58 file_location_directory: Option<PathBuf>,
59 language_registry: Option<Arc<LanguageRegistry>>,
60}
61
62struct MarkdownListItem {
63 content: Vec<ParsedMarkdownElement>,
64 item_type: ParsedMarkdownListItemType,
65}
66
67impl Default for MarkdownListItem {
68 fn default() -> Self {
69 Self {
70 content: Vec::new(),
71 item_type: ParsedMarkdownListItemType::Unordered,
72 }
73 }
74}
75
76impl<'a> MarkdownParser<'a> {
77 fn new(
78 tokens: Vec<(Event<'a>, Range<usize>)>,
79 file_location_directory: Option<PathBuf>,
80 language_registry: Option<Arc<LanguageRegistry>>,
81 ) -> Self {
82 Self {
83 tokens,
84 file_location_directory,
85 language_registry,
86 cursor: 0,
87 parsed: vec![],
88 }
89 }
90
91 fn eof(&self) -> bool {
92 if self.tokens.is_empty() {
93 return true;
94 }
95 self.cursor >= self.tokens.len() - 1
96 }
97
98 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
99 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
100 return self.tokens.last();
101 }
102 self.tokens.get(self.cursor + steps)
103 }
104
105 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
106 if self.cursor == 0 || self.cursor > self.tokens.len() {
107 return None;
108 }
109 self.tokens.get(self.cursor - 1)
110 }
111
112 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
113 self.peek(0)
114 }
115
116 fn current_event(&self) -> Option<&Event<'_>> {
117 self.current().map(|(event, _)| event)
118 }
119
120 fn is_text_like(event: &Event) -> bool {
121 match event {
122 Event::Text(_)
123 // Represent an inline code block
124 | Event::Code(_)
125 | Event::Html(_)
126 | Event::InlineHtml(_)
127 | Event::FootnoteReference(_)
128 | Event::Start(Tag::Link { .. })
129 | Event::Start(Tag::Emphasis)
130 | Event::Start(Tag::Strong)
131 | Event::Start(Tag::Strikethrough)
132 | Event::Start(Tag::Image { .. }) => {
133 true
134 }
135 _ => false,
136 }
137 }
138
139 async fn parse_document(mut self) -> Self {
140 while !self.eof() {
141 if let Some(block) = self.parse_block().await {
142 self.parsed.extend(block);
143 } else {
144 self.cursor += 1;
145 }
146 }
147 self
148 }
149
150 #[async_recursion]
151 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
152 let (current, source_range) = self.current().unwrap();
153 let source_range = source_range.clone();
154 match current {
155 Event::Start(tag) => match tag {
156 Tag::Paragraph => {
157 self.cursor += 1;
158 let text = self.parse_text(false, Some(source_range));
159 Some(vec![ParsedMarkdownElement::Paragraph(text)])
160 }
161 Tag::Heading { level, .. } => {
162 let level = *level;
163 self.cursor += 1;
164 let heading = self.parse_heading(level);
165 Some(vec![ParsedMarkdownElement::Heading(heading)])
166 }
167 Tag::Table(alignment) => {
168 let alignment = alignment.clone();
169 self.cursor += 1;
170 let table = self.parse_table(alignment);
171 Some(vec![ParsedMarkdownElement::Table(table)])
172 }
173 Tag::List(order) => {
174 let order = *order;
175 self.cursor += 1;
176 let list = self.parse_list(order).await;
177 Some(list)
178 }
179 Tag::BlockQuote(_kind) => {
180 self.cursor += 1;
181 let block_quote = self.parse_block_quote().await;
182 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
183 }
184 Tag::CodeBlock(kind) => {
185 let language = match kind {
186 pulldown_cmark::CodeBlockKind::Indented => None,
187 pulldown_cmark::CodeBlockKind::Fenced(language) => {
188 if language.is_empty() {
189 None
190 } else {
191 Some(language.to_string())
192 }
193 }
194 };
195
196 self.cursor += 1;
197
198 let code_block = self.parse_code_block(language).await?;
199 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
200 }
201 Tag::HtmlBlock => {
202 self.cursor += 1;
203
204 Some(self.parse_html_block().await)
205 }
206 _ => None,
207 },
208 Event::Rule => {
209 self.cursor += 1;
210 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
211 }
212 _ => None,
213 }
214 }
215
216 fn parse_text(
217 &mut self,
218 should_complete_on_soft_break: bool,
219 source_range: Option<Range<usize>>,
220 ) -> MarkdownParagraph {
221 let source_range = source_range.unwrap_or_else(|| {
222 self.current()
223 .map(|(_, range)| range.clone())
224 .unwrap_or_default()
225 });
226
227 let mut markdown_text_like = Vec::new();
228 let mut text = String::new();
229 let mut bold_depth = 0;
230 let mut italic_depth = 0;
231 let mut strikethrough_depth = 0;
232 let mut link: Option<Link> = None;
233 let mut image: Option<Image> = None;
234 let mut region_ranges: Vec<Range<usize>> = vec![];
235 let mut regions: Vec<ParsedRegion> = vec![];
236 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
237 let mut link_urls: Vec<String> = vec![];
238 let mut link_ranges: Vec<Range<usize>> = vec![];
239
240 loop {
241 if self.eof() {
242 break;
243 }
244
245 let (current, _) = self.current().unwrap();
246 let prev_len = text.len();
247 match current {
248 Event::SoftBreak => {
249 if should_complete_on_soft_break {
250 break;
251 }
252 text.push(' ');
253 }
254
255 Event::HardBreak => {
256 text.push('\n');
257 }
258
259 // We want to ignore any inline HTML tags in the text but keep
260 // the text between them
261 Event::InlineHtml(_) => {}
262
263 Event::Text(t) => {
264 text.push_str(t.as_ref());
265 let mut style = MarkdownHighlightStyle::default();
266
267 if bold_depth > 0 {
268 style.weight = FontWeight::BOLD;
269 }
270
271 if italic_depth > 0 {
272 style.italic = true;
273 }
274
275 if strikethrough_depth > 0 {
276 style.strikethrough = true;
277 }
278
279 let last_run_len = if let Some(link) = link.clone() {
280 region_ranges.push(prev_len..text.len());
281 regions.push(ParsedRegion {
282 code: false,
283 link: Some(link),
284 });
285 style.link = true;
286 prev_len
287 } else {
288 // Manually scan for links
289 let mut finder = linkify::LinkFinder::new();
290 finder.kinds(&[linkify::LinkKind::Url]);
291 let mut last_link_len = prev_len;
292 for link in finder.links(t) {
293 let start = link.start();
294 let end = link.end();
295 let range = (prev_len + start)..(prev_len + end);
296 link_ranges.push(range.clone());
297 link_urls.push(link.as_str().to_string());
298
299 // If there is a style before we match a link, we have to add this to the highlighted ranges
300 if style != MarkdownHighlightStyle::default()
301 && last_link_len < link.start()
302 {
303 highlights.push((
304 last_link_len..link.start(),
305 MarkdownHighlight::Style(style.clone()),
306 ));
307 }
308
309 highlights.push((
310 range.clone(),
311 MarkdownHighlight::Style(MarkdownHighlightStyle {
312 underline: true,
313 ..style
314 }),
315 ));
316 region_ranges.push(range.clone());
317 regions.push(ParsedRegion {
318 code: false,
319 link: Some(Link::Web {
320 url: link.as_str().to_string(),
321 }),
322 });
323 last_link_len = end;
324 }
325 last_link_len
326 };
327
328 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
329 let mut new_highlight = true;
330 if let Some((last_range, last_style)) = highlights.last_mut()
331 && last_range.end == last_run_len
332 && last_style == &MarkdownHighlight::Style(style.clone())
333 {
334 last_range.end = text.len();
335 new_highlight = false;
336 }
337 if new_highlight {
338 highlights.push((
339 last_run_len..text.len(),
340 MarkdownHighlight::Style(style.clone()),
341 ));
342 }
343 }
344 }
345 Event::Code(t) => {
346 text.push_str(t.as_ref());
347 region_ranges.push(prev_len..text.len());
348
349 if link.is_some() {
350 highlights.push((
351 prev_len..text.len(),
352 MarkdownHighlight::Style(MarkdownHighlightStyle {
353 link: true,
354 ..Default::default()
355 }),
356 ));
357 }
358 regions.push(ParsedRegion {
359 code: true,
360 link: link.clone(),
361 });
362 }
363 Event::Start(tag) => match tag {
364 Tag::Emphasis => italic_depth += 1,
365 Tag::Strong => bold_depth += 1,
366 Tag::Strikethrough => strikethrough_depth += 1,
367 Tag::Link { dest_url, .. } => {
368 link = Link::identify(
369 self.file_location_directory.clone(),
370 dest_url.to_string(),
371 );
372 }
373 Tag::Image { dest_url, .. } => {
374 if !text.is_empty() {
375 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
376 source_range: source_range.clone(),
377 contents: text.clone(),
378 highlights: highlights.clone(),
379 region_ranges: region_ranges.clone(),
380 regions: regions.clone(),
381 });
382 text = String::new();
383 highlights = vec![];
384 region_ranges = vec![];
385 regions = vec![];
386 markdown_text_like.push(parsed_regions);
387 }
388 image = Image::identify(
389 dest_url.to_string(),
390 source_range.clone(),
391 self.file_location_directory.clone(),
392 );
393 }
394 _ => {
395 break;
396 }
397 },
398
399 Event::End(tag) => match tag {
400 TagEnd::Emphasis => italic_depth -= 1,
401 TagEnd::Strong => bold_depth -= 1,
402 TagEnd::Strikethrough => strikethrough_depth -= 1,
403 TagEnd::Link => {
404 link = None;
405 }
406 TagEnd::Image => {
407 if let Some(mut image) = image.take() {
408 if !text.is_empty() {
409 image.set_alt_text(std::mem::take(&mut text).into());
410 }
411 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
412 }
413 }
414 TagEnd::Paragraph => {
415 self.cursor += 1;
416 break;
417 }
418 _ => {
419 break;
420 }
421 },
422 _ => {
423 break;
424 }
425 }
426
427 self.cursor += 1;
428 }
429 if !text.is_empty() {
430 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
431 source_range,
432 contents: text,
433 highlights,
434 regions,
435 region_ranges,
436 }));
437 }
438 markdown_text_like
439 }
440
441 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
442 let (_event, source_range) = self.previous().unwrap();
443 let source_range = source_range.clone();
444 let text = self.parse_text(true, None);
445
446 // Advance past the heading end tag
447 self.cursor += 1;
448
449 ParsedMarkdownHeading {
450 source_range,
451 level: match level {
452 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
453 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
454 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
455 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
456 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
457 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
458 },
459 contents: text,
460 }
461 }
462
463 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
464 let (_event, source_range) = self.previous().unwrap();
465 let source_range = source_range.clone();
466 let mut header = ParsedMarkdownTableRow::new();
467 let mut body = vec![];
468 let mut current_row = vec![];
469 let mut in_header = true;
470 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
471
472 loop {
473 if self.eof() {
474 break;
475 }
476
477 let (current, source_range) = self.current().unwrap();
478 let source_range = source_range.clone();
479 match current {
480 Event::Start(Tag::TableHead)
481 | Event::Start(Tag::TableRow)
482 | Event::End(TagEnd::TableCell) => {
483 self.cursor += 1;
484 }
485 Event::Start(Tag::TableCell) => {
486 self.cursor += 1;
487 let cell_contents = self.parse_text(false, Some(source_range));
488 current_row.push(cell_contents);
489 }
490 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
491 self.cursor += 1;
492 let new_row = std::mem::take(&mut current_row);
493 if in_header {
494 header.children = new_row;
495 in_header = false;
496 } else {
497 let row = ParsedMarkdownTableRow::with_children(new_row);
498 body.push(row);
499 }
500 }
501 Event::End(TagEnd::Table) => {
502 self.cursor += 1;
503 break;
504 }
505 _ => {
506 break;
507 }
508 }
509 }
510
511 ParsedMarkdownTable {
512 source_range,
513 header,
514 body,
515 column_alignments,
516 }
517 }
518
519 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
520 match alignment {
521 Alignment::None => ParsedMarkdownTableAlignment::None,
522 Alignment::Left => ParsedMarkdownTableAlignment::Left,
523 Alignment::Center => ParsedMarkdownTableAlignment::Center,
524 Alignment::Right => ParsedMarkdownTableAlignment::Right,
525 }
526 }
527
528 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
529 let (_, list_source_range) = self.previous().unwrap();
530
531 let mut items = Vec::new();
532 let mut items_stack = vec![MarkdownListItem::default()];
533 let mut depth = 1;
534 let mut order = order;
535 let mut order_stack = Vec::new();
536
537 let mut insertion_indices = FxHashMap::default();
538 let mut source_ranges = FxHashMap::default();
539 let mut start_item_range = list_source_range.clone();
540
541 while !self.eof() {
542 let (current, source_range) = self.current().unwrap();
543 match current {
544 Event::Start(Tag::List(new_order)) => {
545 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
546 insertion_indices.insert(depth, items.len());
547 }
548
549 // We will use the start of the nested list as the end for the current item's range,
550 // because we don't care about the hierarchy of list items
551 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
552 e.insert(start_item_range.start..source_range.start);
553 }
554
555 order_stack.push(order);
556 order = *new_order;
557 self.cursor += 1;
558 depth += 1;
559 }
560 Event::End(TagEnd::List(_)) => {
561 order = order_stack.pop().flatten();
562 self.cursor += 1;
563 depth -= 1;
564
565 if depth == 0 {
566 break;
567 }
568 }
569 Event::Start(Tag::Item) => {
570 start_item_range = source_range.clone();
571
572 self.cursor += 1;
573 items_stack.push(MarkdownListItem::default());
574
575 let mut task_list = None;
576 // Check for task list marker (`- [ ]` or `- [x]`)
577 if let Some(event) = self.current_event() {
578 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
579 if event == &Event::Start(Tag::Paragraph) {
580 self.cursor += 1;
581 }
582
583 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
584 task_list = Some((*checked, range.clone()));
585 self.cursor += 1;
586 }
587 }
588
589 if let Some((event, range)) = self.current() {
590 // This is a plain list item.
591 // For example `- some text` or `1. [Docs](./docs.md)`
592 if MarkdownParser::is_text_like(event) {
593 let text = self.parse_text(false, Some(range.clone()));
594 let block = ParsedMarkdownElement::Paragraph(text);
595 if let Some(content) = items_stack.last_mut() {
596 let item_type = if let Some((checked, range)) = task_list {
597 ParsedMarkdownListItemType::Task(checked, range)
598 } else if let Some(order) = order {
599 ParsedMarkdownListItemType::Ordered(order)
600 } else {
601 ParsedMarkdownListItemType::Unordered
602 };
603 content.item_type = item_type;
604 content.content.push(block);
605 }
606 } else {
607 let block = self.parse_block().await;
608 if let Some(block) = block
609 && let Some(list_item) = items_stack.last_mut()
610 {
611 list_item.content.extend(block);
612 }
613 }
614 }
615
616 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
617 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
618 self.cursor += 1;
619 }
620 }
621 Event::End(TagEnd::Item) => {
622 self.cursor += 1;
623
624 if let Some(current) = order {
625 order = Some(current + 1);
626 }
627
628 if let Some(list_item) = items_stack.pop() {
629 let source_range = source_ranges
630 .remove(&depth)
631 .unwrap_or(start_item_range.clone());
632
633 // We need to remove the last character of the source range, because it includes the newline character
634 let source_range = source_range.start..source_range.end - 1;
635 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
636 source_range,
637 content: list_item.content,
638 depth,
639 item_type: list_item.item_type,
640 });
641
642 if let Some(index) = insertion_indices.get(&depth) {
643 items.insert(*index, item);
644 insertion_indices.remove(&depth);
645 } else {
646 items.push(item);
647 }
648 }
649 }
650 _ => {
651 if depth == 0 {
652 break;
653 }
654 // This can only happen if a list item starts with more then one paragraph,
655 // or the list item contains blocks that should be rendered after the nested list items
656 let block = self.parse_block().await;
657 if let Some(block) = block {
658 if let Some(list_item) = items_stack.last_mut() {
659 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
660 if !insertion_indices.contains_key(&depth) {
661 list_item.content.extend(block);
662 continue;
663 }
664 }
665
666 // Otherwise we need to insert the block after all the nested items
667 // that have been parsed so far
668 items.extend(block);
669 } else {
670 self.cursor += 1;
671 }
672 }
673 }
674 }
675
676 items
677 }
678
679 #[async_recursion]
680 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
681 let (_event, source_range) = self.previous().unwrap();
682 let source_range = source_range.clone();
683 let mut nested_depth = 1;
684
685 let mut children: Vec<ParsedMarkdownElement> = vec![];
686
687 while !self.eof() {
688 let block = self.parse_block().await;
689
690 if let Some(block) = block {
691 children.extend(block);
692 } else {
693 break;
694 }
695
696 if self.eof() {
697 break;
698 }
699
700 let (current, _source_range) = self.current().unwrap();
701 match current {
702 // This is a nested block quote.
703 // Record that we're in a nested block quote and continue parsing.
704 // We don't need to advance the cursor since the next
705 // call to `parse_block` will handle it.
706 Event::Start(Tag::BlockQuote(_kind)) => {
707 nested_depth += 1;
708 }
709 Event::End(TagEnd::BlockQuote(_kind)) => {
710 nested_depth -= 1;
711 if nested_depth == 0 {
712 self.cursor += 1;
713 break;
714 }
715 }
716 _ => {}
717 };
718 }
719
720 ParsedMarkdownBlockQuote {
721 source_range,
722 children,
723 }
724 }
725
726 async fn parse_code_block(
727 &mut self,
728 language: Option<String>,
729 ) -> Option<ParsedMarkdownCodeBlock> {
730 let Some((_event, source_range)) = self.previous() else {
731 return None;
732 };
733
734 let source_range = source_range.clone();
735 let mut code = String::new();
736
737 while !self.eof() {
738 let Some((current, _source_range)) = self.current() else {
739 break;
740 };
741
742 match current {
743 Event::Text(text) => {
744 code.push_str(text);
745 self.cursor += 1;
746 }
747 Event::End(TagEnd::CodeBlock) => {
748 self.cursor += 1;
749 break;
750 }
751 _ => {
752 break;
753 }
754 }
755 }
756
757 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
758
759 let highlights = if let Some(language) = &language {
760 if let Some(registry) = &self.language_registry {
761 let rope: language::Rope = code.as_str().into();
762 registry
763 .language_for_name_or_extension(language)
764 .await
765 .map(|l| l.highlight_text(&rope, 0..code.len()))
766 .ok()
767 } else {
768 None
769 }
770 } else {
771 None
772 };
773
774 Some(ParsedMarkdownCodeBlock {
775 source_range,
776 contents: code.into(),
777 language,
778 highlights,
779 })
780 }
781
782 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
783 let mut elements = Vec::new();
784 let Some((_event, _source_range)) = self.previous() else {
785 return elements;
786 };
787
788 let mut html_source_range_start = None;
789 let mut html_source_range_end = None;
790 let mut html_buffer = String::new();
791
792 while !self.eof() {
793 let Some((current, source_range)) = self.current() else {
794 break;
795 };
796 let source_range = source_range.clone();
797 match current {
798 Event::Html(html) => {
799 html_source_range_start.get_or_insert(source_range.start);
800 html_source_range_end = Some(source_range.end);
801 html_buffer.push_str(html);
802 self.cursor += 1;
803 }
804 Event::End(TagEnd::CodeBlock) => {
805 self.cursor += 1;
806 break;
807 }
808 _ => {
809 break;
810 }
811 }
812 }
813
814 let bytes = cleanup_html(&html_buffer);
815
816 let mut cursor = std::io::Cursor::new(bytes);
817 if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
818 .from_utf8()
819 .read_from(&mut cursor)
820 && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
821 {
822 self.parse_html_node(start..end, &dom.document, &mut elements);
823 }
824
825 elements
826 }
827
828 fn parse_html_node(
829 &self,
830 source_range: Range<usize>,
831 node: &Rc<markup5ever_rcdom::Node>,
832 elements: &mut Vec<ParsedMarkdownElement>,
833 ) {
834 match &node.data {
835 markup5ever_rcdom::NodeData::Document => {
836 self.consume_children(source_range, node, elements);
837 }
838 markup5ever_rcdom::NodeData::Doctype { .. } => {}
839 markup5ever_rcdom::NodeData::Text { contents } => {
840 elements.push(ParsedMarkdownElement::Paragraph(vec![
841 MarkdownParagraphChunk::Text(ParsedMarkdownText {
842 source_range,
843 contents: contents.borrow().to_string(),
844 highlights: Vec::default(),
845 region_ranges: Vec::default(),
846 regions: Vec::default(),
847 }),
848 ]));
849 }
850 markup5ever_rcdom::NodeData::Comment { .. } => {}
851 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
852 if local_name!("img") == name.local {
853 if let Some(image) = self.extract_image(source_range, attrs) {
854 elements.push(ParsedMarkdownElement::Image(image));
855 }
856 } else if matches!(
857 name.local,
858 local_name!("h1")
859 | local_name!("h2")
860 | local_name!("h3")
861 | local_name!("h4")
862 | local_name!("h5")
863 | local_name!("h6")
864 ) {
865 let mut paragraph = MarkdownParagraph::new();
866 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
867
868 if !paragraph.is_empty() {
869 elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
870 source_range,
871 level: match name.local {
872 local_name!("h1") => HeadingLevel::H1,
873 local_name!("h2") => HeadingLevel::H2,
874 local_name!("h3") => HeadingLevel::H3,
875 local_name!("h4") => HeadingLevel::H4,
876 local_name!("h5") => HeadingLevel::H5,
877 local_name!("h6") => HeadingLevel::H6,
878 _ => unreachable!(),
879 },
880 contents: paragraph,
881 }));
882 }
883 } else if local_name!("blockquote") == name.local {
884 if let Some(blockquote) = self.extract_html_blockquote(node, source_range) {
885 elements.push(ParsedMarkdownElement::BlockQuote(blockquote));
886 }
887 } else if local_name!("table") == name.local {
888 if let Some(table) = self.extract_html_table(node, source_range) {
889 elements.push(ParsedMarkdownElement::Table(table));
890 }
891 } else {
892 self.consume_children(source_range, node, elements);
893 }
894 }
895 markup5ever_rcdom::NodeData::ProcessingInstruction { .. } => {}
896 }
897 }
898
899 fn parse_paragraph(
900 &self,
901 source_range: Range<usize>,
902 node: &Rc<markup5ever_rcdom::Node>,
903 paragraph: &mut MarkdownParagraph,
904 ) {
905 match &node.data {
906 markup5ever_rcdom::NodeData::Text { contents } => {
907 paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
908 source_range,
909 regions: Vec::default(),
910 contents: contents.borrow().to_string(),
911 region_ranges: Vec::default(),
912 highlights: Vec::default(),
913 }));
914 }
915 markup5ever_rcdom::NodeData::Element { .. } => {
916 self.consume_paragraph(source_range, node, paragraph);
917 }
918 _ => {}
919 }
920 }
921
922 fn consume_paragraph(
923 &self,
924 source_range: Range<usize>,
925 node: &Rc<markup5ever_rcdom::Node>,
926 paragraph: &mut MarkdownParagraph,
927 ) {
928 for node in node.children.borrow().iter() {
929 self.parse_paragraph(source_range.clone(), node, paragraph);
930 }
931 }
932
933 fn consume_children(
934 &self,
935 source_range: Range<usize>,
936 node: &Rc<markup5ever_rcdom::Node>,
937 elements: &mut Vec<ParsedMarkdownElement>,
938 ) {
939 for node in node.children.borrow().iter() {
940 self.parse_html_node(source_range.clone(), node, elements);
941 }
942 }
943
944 fn attr_value(
945 attrs: &RefCell<Vec<html5ever::Attribute>>,
946 name: html5ever::LocalName,
947 ) -> Option<String> {
948 attrs.borrow().iter().find_map(|attr| {
949 if attr.name.local == name {
950 Some(attr.value.to_string())
951 } else {
952 None
953 }
954 })
955 }
956
957 fn extract_styles_from_attributes(
958 attrs: &RefCell<Vec<html5ever::Attribute>>,
959 ) -> HashMap<String, String> {
960 let mut styles = HashMap::new();
961
962 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
963 for decl in style.split(';') {
964 let mut parts = decl.splitn(2, ':');
965 if let Some((key, value)) = parts.next().zip(parts.next()) {
966 styles.insert(
967 key.trim().to_lowercase().to_string(),
968 value.trim().to_string(),
969 );
970 }
971 }
972 }
973
974 styles
975 }
976
977 fn extract_image(
978 &self,
979 source_range: Range<usize>,
980 attrs: &RefCell<Vec<html5ever::Attribute>>,
981 ) -> Option<Image> {
982 let src = Self::attr_value(attrs, local_name!("src"))?;
983
984 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
985
986 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
987 image.set_alt_text(alt.into());
988 }
989
990 let styles = Self::extract_styles_from_attributes(attrs);
991
992 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
993 .or_else(|| styles.get("width").cloned())
994 .and_then(|width| Self::parse_length(&width))
995 {
996 image.set_width(width);
997 }
998
999 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
1000 .or_else(|| styles.get("height").cloned())
1001 .and_then(|height| Self::parse_length(&height))
1002 {
1003 image.set_height(height);
1004 }
1005
1006 Some(image)
1007 }
1008
1009 fn extract_html_blockquote(
1010 &self,
1011 node: &Rc<markup5ever_rcdom::Node>,
1012 source_range: Range<usize>,
1013 ) -> Option<ParsedMarkdownBlockQuote> {
1014 let mut children = Vec::new();
1015 self.consume_children(source_range.clone(), node, &mut children);
1016
1017 if children.is_empty() {
1018 None
1019 } else {
1020 Some(ParsedMarkdownBlockQuote {
1021 children,
1022 source_range,
1023 })
1024 }
1025 }
1026
1027 fn extract_html_table(
1028 &self,
1029 node: &Rc<markup5ever_rcdom::Node>,
1030 source_range: Range<usize>,
1031 ) -> Option<ParsedMarkdownTable> {
1032 let mut header_columns = Vec::new();
1033 let mut body_rows = Vec::new();
1034
1035 // node should be a thead or tbody element
1036 for node in node.children.borrow().iter() {
1037 match &node.data {
1038 markup5ever_rcdom::NodeData::Element { name, .. } => {
1039 if local_name!("thead") == name.local {
1040 // node should be a tr element
1041 for node in node.children.borrow().iter() {
1042 let mut paragraph = MarkdownParagraph::new();
1043 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
1044
1045 for paragraph in paragraph.into_iter() {
1046 header_columns.push(vec![paragraph]);
1047 }
1048 }
1049 } else if local_name!("tbody") == name.local {
1050 // node should be a tr element
1051 for node in node.children.borrow().iter() {
1052 let mut row = MarkdownParagraph::new();
1053 self.consume_paragraph(source_range.clone(), node, &mut row);
1054 body_rows.push(ParsedMarkdownTableRow::with_children(
1055 row.into_iter().map(|column| vec![column]).collect(),
1056 ));
1057 }
1058 }
1059 }
1060 _ => {}
1061 }
1062 }
1063
1064 if !header_columns.is_empty() || !body_rows.is_empty() {
1065 Some(ParsedMarkdownTable {
1066 source_range,
1067 body: body_rows,
1068 column_alignments: Vec::default(),
1069 header: ParsedMarkdownTableRow::with_children(header_columns),
1070 })
1071 } else {
1072 None
1073 }
1074 }
1075
1076 /// Parses the width/height attribute value of an html element (e.g. img element)
1077 fn parse_length(value: &str) -> Option<DefiniteLength> {
1078 if value.ends_with("%") {
1079 value
1080 .trim_end_matches("%")
1081 .parse::<f32>()
1082 .ok()
1083 .map(|value| relative(value / 100.))
1084 } else {
1085 value
1086 .trim_end_matches("px")
1087 .parse()
1088 .ok()
1089 .map(|value| px(value).into())
1090 }
1091 }
1092}
1093
1094#[cfg(test)]
1095mod tests {
1096 use super::*;
1097 use ParsedMarkdownListItemType::*;
1098 use core::panic;
1099 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1100 use language::{
1101 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1102 };
1103 use pretty_assertions::assert_eq;
1104
1105 async fn parse(input: &str) -> ParsedMarkdown {
1106 parse_markdown(input, None, None).await
1107 }
1108
1109 #[gpui::test]
1110 async fn test_headings() {
1111 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1112
1113 assert_eq!(
1114 parsed.children,
1115 vec![
1116 h1(text("Heading one", 2..13), 0..14),
1117 h2(text("Heading two", 17..28), 14..29),
1118 h3(text("Heading three", 33..46), 29..46),
1119 ]
1120 );
1121 }
1122
1123 #[gpui::test]
1124 async fn test_newlines_dont_new_paragraphs() {
1125 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1126
1127 assert_eq!(
1128 parsed.children,
1129 vec![p("Some text that is bolded and italicized", 0..46)]
1130 );
1131 }
1132
1133 #[gpui::test]
1134 async fn test_heading_with_paragraph() {
1135 let parsed = parse("# Zed\nThe editor").await;
1136
1137 assert_eq!(
1138 parsed.children,
1139 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1140 );
1141 }
1142
1143 #[gpui::test]
1144 async fn test_double_newlines_do_new_paragraphs() {
1145 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1146
1147 assert_eq!(
1148 parsed.children,
1149 vec![
1150 p("Some text that is bolded", 0..29),
1151 p("and italicized", 31..47),
1152 ]
1153 );
1154 }
1155
1156 #[gpui::test]
1157 async fn test_bold_italic_text() {
1158 let parsed = parse("Some text **that is bolded** and *italicized*").await;
1159
1160 assert_eq!(
1161 parsed.children,
1162 vec![p("Some text that is bolded and italicized", 0..45)]
1163 );
1164 }
1165
1166 #[gpui::test]
1167 async fn test_nested_bold_strikethrough_text() {
1168 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1169
1170 assert_eq!(parsed.children.len(), 1);
1171 assert_eq!(
1172 parsed.children[0],
1173 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1174 ParsedMarkdownText {
1175 source_range: 0..35,
1176 contents: "Some bostrikethroughld text".to_string(),
1177 highlights: Vec::new(),
1178 region_ranges: Vec::new(),
1179 regions: Vec::new(),
1180 }
1181 )])
1182 );
1183
1184 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1185 text
1186 } else {
1187 panic!("Expected a paragraph");
1188 };
1189
1190 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1191 text
1192 } else {
1193 panic!("Expected a text");
1194 };
1195
1196 assert_eq!(
1197 paragraph.highlights,
1198 vec![
1199 (
1200 5..7,
1201 MarkdownHighlight::Style(MarkdownHighlightStyle {
1202 weight: FontWeight::BOLD,
1203 ..Default::default()
1204 }),
1205 ),
1206 (
1207 7..20,
1208 MarkdownHighlight::Style(MarkdownHighlightStyle {
1209 weight: FontWeight::BOLD,
1210 strikethrough: true,
1211 ..Default::default()
1212 }),
1213 ),
1214 (
1215 20..22,
1216 MarkdownHighlight::Style(MarkdownHighlightStyle {
1217 weight: FontWeight::BOLD,
1218 ..Default::default()
1219 }),
1220 ),
1221 ]
1222 );
1223 }
1224
1225 #[gpui::test]
1226 async fn test_text_with_inline_html() {
1227 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1228
1229 assert_eq!(
1230 parsed.children,
1231 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1232 );
1233 }
1234
1235 #[gpui::test]
1236 async fn test_raw_links_detection() {
1237 let parsed = parse("Checkout this https://zed.dev link").await;
1238
1239 assert_eq!(
1240 parsed.children,
1241 vec![p("Checkout this https://zed.dev link", 0..34)]
1242 );
1243 }
1244
1245 #[gpui::test]
1246 async fn test_empty_image() {
1247 let parsed = parse("![]()").await;
1248
1249 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1250 text
1251 } else {
1252 panic!("Expected a paragraph");
1253 };
1254 assert_eq!(paragraph.len(), 0);
1255 }
1256
1257 #[gpui::test]
1258 async fn test_image_links_detection() {
1259 let parsed = parse("").await;
1260
1261 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1262 text
1263 } else {
1264 panic!("Expected a paragraph");
1265 };
1266 assert_eq!(
1267 paragraph[0],
1268 MarkdownParagraphChunk::Image(Image {
1269 source_range: 0..111,
1270 link: Link::Web {
1271 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1272 },
1273 alt_text: Some("test".into()),
1274 height: None,
1275 width: None,
1276 },)
1277 );
1278 }
1279
1280 #[gpui::test]
1281 async fn test_image_without_alt_text() {
1282 let parsed = parse("").await;
1283
1284 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1285 text
1286 } else {
1287 panic!("Expected a paragraph");
1288 };
1289 assert_eq!(
1290 paragraph[0],
1291 MarkdownParagraphChunk::Image(Image {
1292 source_range: 0..31,
1293 link: Link::Web {
1294 url: "http://example.com/foo.png".to_string(),
1295 },
1296 alt_text: None,
1297 height: None,
1298 width: None,
1299 },)
1300 );
1301 }
1302
1303 #[gpui::test]
1304 async fn test_image_with_alt_text_containing_formatting() {
1305 let parsed = parse("").await;
1306
1307 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1308 panic!("Expected a paragraph");
1309 };
1310 assert_eq!(
1311 chunks,
1312 &[MarkdownParagraphChunk::Image(Image {
1313 source_range: 0..44,
1314 link: Link::Web {
1315 url: "http://example.com/foo.png".to_string(),
1316 },
1317 alt_text: Some("foo bar baz".into()),
1318 height: None,
1319 width: None,
1320 }),],
1321 );
1322 }
1323
1324 #[gpui::test]
1325 async fn test_images_with_text_in_between() {
1326 let parsed = parse(
1327 "\nLorem Ipsum\n",
1328 )
1329 .await;
1330
1331 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1332 text
1333 } else {
1334 panic!("Expected a paragraph");
1335 };
1336 assert_eq!(
1337 chunks,
1338 &vec![
1339 MarkdownParagraphChunk::Image(Image {
1340 source_range: 0..81,
1341 link: Link::Web {
1342 url: "http://example.com/foo.png".to_string(),
1343 },
1344 alt_text: Some("foo".into()),
1345 height: None,
1346 width: None,
1347 }),
1348 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1349 source_range: 0..81,
1350 contents: " Lorem Ipsum ".to_string(),
1351 highlights: Vec::new(),
1352 region_ranges: Vec::new(),
1353 regions: Vec::new(),
1354 }),
1355 MarkdownParagraphChunk::Image(Image {
1356 source_range: 0..81,
1357 link: Link::Web {
1358 url: "http://example.com/bar.png".to_string(),
1359 },
1360 alt_text: Some("bar".into()),
1361 height: None,
1362 width: None,
1363 })
1364 ]
1365 );
1366 }
1367
1368 #[test]
1369 fn test_parse_length() {
1370 // Test percentage values
1371 assert_eq!(
1372 MarkdownParser::parse_length("50%"),
1373 Some(DefiniteLength::Fraction(0.5))
1374 );
1375 assert_eq!(
1376 MarkdownParser::parse_length("100%"),
1377 Some(DefiniteLength::Fraction(1.0))
1378 );
1379 assert_eq!(
1380 MarkdownParser::parse_length("25%"),
1381 Some(DefiniteLength::Fraction(0.25))
1382 );
1383 assert_eq!(
1384 MarkdownParser::parse_length("0%"),
1385 Some(DefiniteLength::Fraction(0.0))
1386 );
1387
1388 // Test pixel values
1389 assert_eq!(
1390 MarkdownParser::parse_length("100px"),
1391 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1392 );
1393 assert_eq!(
1394 MarkdownParser::parse_length("50px"),
1395 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1396 );
1397 assert_eq!(
1398 MarkdownParser::parse_length("0px"),
1399 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1400 );
1401
1402 // Test values without units (should be treated as pixels)
1403 assert_eq!(
1404 MarkdownParser::parse_length("100"),
1405 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1406 );
1407 assert_eq!(
1408 MarkdownParser::parse_length("42"),
1409 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1410 );
1411
1412 // Test invalid values
1413 assert_eq!(MarkdownParser::parse_length("invalid"), None);
1414 assert_eq!(MarkdownParser::parse_length("px"), None);
1415 assert_eq!(MarkdownParser::parse_length("%"), None);
1416 assert_eq!(MarkdownParser::parse_length(""), None);
1417 assert_eq!(MarkdownParser::parse_length("abc%"), None);
1418 assert_eq!(MarkdownParser::parse_length("abcpx"), None);
1419
1420 // Test decimal values
1421 assert_eq!(
1422 MarkdownParser::parse_length("50.5%"),
1423 Some(DefiniteLength::Fraction(0.505))
1424 );
1425 assert_eq!(
1426 MarkdownParser::parse_length("100.25px"),
1427 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1428 );
1429 assert_eq!(
1430 MarkdownParser::parse_length("42.0"),
1431 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1432 );
1433 }
1434
1435 #[gpui::test]
1436 async fn test_html_block_quote() {
1437 let parsed = parse(
1438 "<blockquote>
1439 <p>some description</p>
1440 </blockquote>",
1441 )
1442 .await;
1443
1444 assert_eq!(
1445 ParsedMarkdown {
1446 children: vec![block_quote(
1447 vec![ParsedMarkdownElement::Paragraph(text(
1448 "some description",
1449 0..76
1450 ))],
1451 0..76,
1452 )]
1453 },
1454 parsed
1455 );
1456 }
1457
1458 #[gpui::test]
1459 async fn test_html_nested_block_quote() {
1460 let parsed = parse(
1461 "<blockquote>
1462 <p>some description</p>
1463 <blockquote>
1464 <p>second description</p>
1465 </blockquote>
1466 </blockquote>",
1467 )
1468 .await;
1469
1470 assert_eq!(
1471 ParsedMarkdown {
1472 children: vec![block_quote(
1473 vec![
1474 ParsedMarkdownElement::Paragraph(text("some description", 0..173)),
1475 block_quote(
1476 vec![ParsedMarkdownElement::Paragraph(text(
1477 "second description",
1478 0..173
1479 ))],
1480 0..173,
1481 )
1482 ],
1483 0..173,
1484 )]
1485 },
1486 parsed
1487 );
1488 }
1489
1490 #[gpui::test]
1491 async fn test_html_table() {
1492 let parsed = parse(
1493 "<table>
1494 <thead>
1495 <tr>
1496 <th>Id</th>
1497 <th>Name</th>
1498 </tr>
1499 </thead>
1500 <tbody>
1501 <tr>
1502 <td>1</td>
1503 <td>Chris</td>
1504 </tr>
1505 <tr>
1506 <td>2</td>
1507 <td>Dennis</td>
1508 </tr>
1509 </tbody>
1510 </table>",
1511 )
1512 .await;
1513
1514 assert_eq!(
1515 ParsedMarkdown {
1516 children: vec![ParsedMarkdownElement::Table(table(
1517 0..366,
1518 row(vec![text("Id", 0..366), text("Name ", 0..366)]),
1519 vec![
1520 row(vec![text("1", 0..366), text("Chris", 0..366)]),
1521 row(vec![text("2", 0..366), text("Dennis", 0..366)]),
1522 ],
1523 ))],
1524 },
1525 parsed
1526 );
1527 }
1528
1529 #[gpui::test]
1530 async fn test_html_table_without_headings() {
1531 let parsed = parse(
1532 "<table>
1533 <tbody>
1534 <tr>
1535 <td>1</td>
1536 <td>Chris</td>
1537 </tr>
1538 <tr>
1539 <td>2</td>
1540 <td>Dennis</td>
1541 </tr>
1542 </tbody>
1543 </table>",
1544 )
1545 .await;
1546
1547 assert_eq!(
1548 ParsedMarkdown {
1549 children: vec![ParsedMarkdownElement::Table(table(
1550 0..240,
1551 row(vec![]),
1552 vec![
1553 row(vec![text("1", 0..240), text("Chris", 0..240)]),
1554 row(vec![text("2", 0..240), text("Dennis", 0..240)]),
1555 ],
1556 ))],
1557 },
1558 parsed
1559 );
1560 }
1561
1562 #[gpui::test]
1563 async fn test_html_table_without_body() {
1564 let parsed = parse(
1565 "<table>
1566 <thead>
1567 <tr>
1568 <th>Id</th>
1569 <th>Name</th>
1570 </tr>
1571 </thead>
1572 </table>",
1573 )
1574 .await;
1575
1576 assert_eq!(
1577 ParsedMarkdown {
1578 children: vec![ParsedMarkdownElement::Table(table(
1579 0..150,
1580 row(vec![text("Id", 0..150), text("Name", 0..150)]),
1581 vec![],
1582 ))],
1583 },
1584 parsed
1585 );
1586 }
1587
1588 #[gpui::test]
1589 async fn test_html_heading_tags() {
1590 let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
1591
1592 assert_eq!(
1593 ParsedMarkdown {
1594 children: vec![
1595 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1596 level: HeadingLevel::H1,
1597 source_range: 0..96,
1598 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1599 source_range: 0..96,
1600 contents: "Heading".into(),
1601 highlights: Vec::default(),
1602 region_ranges: Vec::default(),
1603 regions: Vec::default()
1604 })],
1605 }),
1606 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1607 level: HeadingLevel::H2,
1608 source_range: 0..96,
1609 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1610 source_range: 0..96,
1611 contents: "Heading".into(),
1612 highlights: Vec::default(),
1613 region_ranges: Vec::default(),
1614 regions: Vec::default()
1615 })],
1616 }),
1617 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1618 level: HeadingLevel::H3,
1619 source_range: 0..96,
1620 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1621 source_range: 0..96,
1622 contents: "Heading".into(),
1623 highlights: Vec::default(),
1624 region_ranges: Vec::default(),
1625 regions: Vec::default()
1626 })],
1627 }),
1628 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1629 level: HeadingLevel::H4,
1630 source_range: 0..96,
1631 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1632 source_range: 0..96,
1633 contents: "Heading".into(),
1634 highlights: Vec::default(),
1635 region_ranges: Vec::default(),
1636 regions: Vec::default()
1637 })],
1638 }),
1639 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1640 level: HeadingLevel::H5,
1641 source_range: 0..96,
1642 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1643 source_range: 0..96,
1644 contents: "Heading".into(),
1645 highlights: Vec::default(),
1646 region_ranges: Vec::default(),
1647 regions: Vec::default()
1648 })],
1649 }),
1650 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1651 level: HeadingLevel::H6,
1652 source_range: 0..96,
1653 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1654 source_range: 0..96,
1655 contents: "Heading".into(),
1656 highlights: Vec::default(),
1657 region_ranges: Vec::default(),
1658 regions: Vec::default()
1659 })],
1660 }),
1661 ],
1662 },
1663 parsed
1664 );
1665 }
1666
1667 #[gpui::test]
1668 async fn test_html_image_tag() {
1669 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
1670
1671 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1672 panic!("Expected a image element");
1673 };
1674 assert_eq!(
1675 image.clone(),
1676 Image {
1677 source_range: 0..40,
1678 link: Link::Web {
1679 url: "http://example.com/foo.png".to_string(),
1680 },
1681 alt_text: None,
1682 height: None,
1683 width: None,
1684 },
1685 );
1686 }
1687
1688 #[gpui::test]
1689 async fn test_html_image_tag_with_alt_text() {
1690 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
1691
1692 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1693 panic!("Expected a image element");
1694 };
1695 assert_eq!(
1696 image.clone(),
1697 Image {
1698 source_range: 0..50,
1699 link: Link::Web {
1700 url: "http://example.com/foo.png".to_string(),
1701 },
1702 alt_text: Some("Foo".into()),
1703 height: None,
1704 width: None,
1705 },
1706 );
1707 }
1708
1709 #[gpui::test]
1710 async fn test_html_image_tag_with_height_and_width() {
1711 let parsed =
1712 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
1713
1714 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1715 panic!("Expected a image element");
1716 };
1717 assert_eq!(
1718 image.clone(),
1719 Image {
1720 source_range: 0..65,
1721 link: Link::Web {
1722 url: "http://example.com/foo.png".to_string(),
1723 },
1724 alt_text: None,
1725 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1726 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1727 },
1728 );
1729 }
1730
1731 #[gpui::test]
1732 async fn test_html_image_style_tag_with_height_and_width() {
1733 let parsed = parse(
1734 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
1735 )
1736 .await;
1737
1738 let ParsedMarkdownElement::Image(image) = &parsed.children[0] else {
1739 panic!("Expected a image element");
1740 };
1741 assert_eq!(
1742 image.clone(),
1743 Image {
1744 source_range: 0..75,
1745 link: Link::Web {
1746 url: "http://example.com/foo.png".to_string(),
1747 },
1748 alt_text: None,
1749 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1750 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1751 },
1752 );
1753 }
1754
1755 #[gpui::test]
1756 async fn test_header_only_table() {
1757 let markdown = "\
1758| Header 1 | Header 2 |
1759|----------|----------|
1760
1761Some other content
1762";
1763
1764 let expected_table = table(
1765 0..48,
1766 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1767 vec![],
1768 );
1769
1770 assert_eq!(
1771 parse(markdown).await.children[0],
1772 ParsedMarkdownElement::Table(expected_table)
1773 );
1774 }
1775
1776 #[gpui::test]
1777 async fn test_basic_table() {
1778 let markdown = "\
1779| Header 1 | Header 2 |
1780|----------|----------|
1781| Cell 1 | Cell 2 |
1782| Cell 3 | Cell 4 |";
1783
1784 let expected_table = table(
1785 0..95,
1786 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1787 vec![
1788 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1789 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1790 ],
1791 );
1792
1793 assert_eq!(
1794 parse(markdown).await.children[0],
1795 ParsedMarkdownElement::Table(expected_table)
1796 );
1797 }
1798
1799 #[gpui::test]
1800 async fn test_list_basic() {
1801 let parsed = parse(
1802 "\
1803* Item 1
1804* Item 2
1805* Item 3
1806",
1807 )
1808 .await;
1809
1810 assert_eq!(
1811 parsed.children,
1812 vec![
1813 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1814 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1815 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1816 ],
1817 );
1818 }
1819
1820 #[gpui::test]
1821 async fn test_list_with_tasks() {
1822 let parsed = parse(
1823 "\
1824- [ ] TODO
1825- [x] Checked
1826",
1827 )
1828 .await;
1829
1830 assert_eq!(
1831 parsed.children,
1832 vec![
1833 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1834 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1835 ],
1836 );
1837 }
1838
1839 #[gpui::test]
1840 async fn test_list_with_indented_task() {
1841 let parsed = parse(
1842 "\
1843- [ ] TODO
1844 - [x] Checked
1845 - Unordered
1846 1. Number 1
1847 1. Number 2
18481. Number A
1849",
1850 )
1851 .await;
1852
1853 assert_eq!(
1854 parsed.children,
1855 vec![
1856 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1857 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1858 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1859 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1860 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1861 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1862 ],
1863 );
1864 }
1865
1866 #[gpui::test]
1867 async fn test_list_with_linebreak_is_handled_correctly() {
1868 let parsed = parse(
1869 "\
1870- [ ] Task 1
1871
1872- [x] Task 2
1873",
1874 )
1875 .await;
1876
1877 assert_eq!(
1878 parsed.children,
1879 vec![
1880 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1881 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1882 ],
1883 );
1884 }
1885
1886 #[gpui::test]
1887 async fn test_list_nested() {
1888 let parsed = parse(
1889 "\
1890* Item 1
1891* Item 2
1892* Item 3
1893
18941. Hello
18951. Two
1896 1. Three
18972. Four
18983. Five
1899
1900* First
1901 1. Hello
1902 1. Goodbyte
1903 - Inner
1904 - Inner
1905 2. Goodbyte
1906 - Next item empty
1907 -
1908* Last
1909",
1910 )
1911 .await;
1912
1913 assert_eq!(
1914 parsed.children,
1915 vec![
1916 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1917 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1918 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1919 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1920 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1921 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1922 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1923 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1924 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1925 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1926 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1927 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1928 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1929 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1930 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1931 list_item(186..190, 3, Unordered, vec![]),
1932 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1933 ]
1934 );
1935 }
1936
1937 #[gpui::test]
1938 async fn test_list_with_nested_content() {
1939 let parsed = parse(
1940 "\
1941* This is a list item with two paragraphs.
1942
1943 This is the second paragraph in the list item.
1944",
1945 )
1946 .await;
1947
1948 assert_eq!(
1949 parsed.children,
1950 vec![list_item(
1951 0..96,
1952 1,
1953 Unordered,
1954 vec![
1955 p("This is a list item with two paragraphs.", 4..44),
1956 p("This is the second paragraph in the list item.", 50..97)
1957 ],
1958 ),],
1959 );
1960 }
1961
1962 #[gpui::test]
1963 async fn test_list_item_with_inline_html() {
1964 let parsed = parse(
1965 "\
1966* This is a list item with an inline HTML <sometag>tag</sometag>.
1967",
1968 )
1969 .await;
1970
1971 assert_eq!(
1972 parsed.children,
1973 vec![list_item(
1974 0..67,
1975 1,
1976 Unordered,
1977 vec![p("This is a list item with an inline HTML tag.", 4..44),],
1978 ),],
1979 );
1980 }
1981
1982 #[gpui::test]
1983 async fn test_nested_list_with_paragraph_inside() {
1984 let parsed = parse(
1985 "\
19861. a
1987 1. b
1988 1. c
1989
1990 text
1991
1992 1. d
1993",
1994 )
1995 .await;
1996
1997 assert_eq!(
1998 parsed.children,
1999 vec![
2000 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
2001 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
2002 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
2003 p("text", 32..37),
2004 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
2005 ],
2006 );
2007 }
2008
2009 #[gpui::test]
2010 async fn test_list_with_leading_text() {
2011 let parsed = parse(
2012 "\
2013* `code`
2014* **bold**
2015* [link](https://example.com)
2016",
2017 )
2018 .await;
2019
2020 assert_eq!(
2021 parsed.children,
2022 vec![
2023 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
2024 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
2025 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
2026 ],
2027 );
2028 }
2029
2030 #[gpui::test]
2031 async fn test_simple_block_quote() {
2032 let parsed = parse("> Simple block quote with **styled text**").await;
2033
2034 assert_eq!(
2035 parsed.children,
2036 vec![block_quote(
2037 vec![p("Simple block quote with styled text", 2..41)],
2038 0..41
2039 )]
2040 );
2041 }
2042
2043 #[gpui::test]
2044 async fn test_simple_block_quote_with_multiple_lines() {
2045 let parsed = parse(
2046 "\
2047> # Heading
2048> More
2049> text
2050>
2051> More text
2052",
2053 )
2054 .await;
2055
2056 assert_eq!(
2057 parsed.children,
2058 vec![block_quote(
2059 vec![
2060 h1(text("Heading", 4..11), 2..12),
2061 p("More text", 14..26),
2062 p("More text", 30..40)
2063 ],
2064 0..40
2065 )]
2066 );
2067 }
2068
2069 #[gpui::test]
2070 async fn test_nested_block_quote() {
2071 let parsed = parse(
2072 "\
2073> A
2074>
2075> > # B
2076>
2077> C
2078
2079More text
2080",
2081 )
2082 .await;
2083
2084 assert_eq!(
2085 parsed.children,
2086 vec![
2087 block_quote(
2088 vec![
2089 p("A", 2..4),
2090 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
2091 p("C", 18..20)
2092 ],
2093 0..20
2094 ),
2095 p("More text", 21..31)
2096 ]
2097 );
2098 }
2099
2100 #[gpui::test]
2101 async fn test_code_block() {
2102 let parsed = parse(
2103 "\
2104```
2105fn main() {
2106 return 0;
2107}
2108```
2109",
2110 )
2111 .await;
2112
2113 assert_eq!(
2114 parsed.children,
2115 vec![code_block(
2116 None,
2117 "fn main() {\n return 0;\n}",
2118 0..35,
2119 None
2120 )]
2121 );
2122 }
2123
2124 #[gpui::test]
2125 async fn test_code_block_with_language(executor: BackgroundExecutor) {
2126 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
2127 language_registry.add(rust_lang());
2128
2129 let parsed = parse_markdown(
2130 "\
2131```rust
2132fn main() {
2133 return 0;
2134}
2135```
2136",
2137 None,
2138 Some(language_registry),
2139 )
2140 .await;
2141
2142 assert_eq!(
2143 parsed.children,
2144 vec![code_block(
2145 Some("rust".to_string()),
2146 "fn main() {\n return 0;\n}",
2147 0..39,
2148 Some(vec![])
2149 )]
2150 );
2151 }
2152
2153 fn rust_lang() -> Arc<Language> {
2154 Arc::new(Language::new(
2155 LanguageConfig {
2156 name: "Rust".into(),
2157 matcher: LanguageMatcher {
2158 path_suffixes: vec!["rs".into()],
2159 ..Default::default()
2160 },
2161 collapsed_placeholder: " /* ... */ ".to_string(),
2162 ..Default::default()
2163 },
2164 Some(tree_sitter_rust::LANGUAGE.into()),
2165 ))
2166 }
2167
2168 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2169 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2170 source_range,
2171 level: HeadingLevel::H1,
2172 contents,
2173 })
2174 }
2175
2176 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2177 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2178 source_range,
2179 level: HeadingLevel::H2,
2180 contents,
2181 })
2182 }
2183
2184 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2185 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2186 source_range,
2187 level: HeadingLevel::H3,
2188 contents,
2189 })
2190 }
2191
2192 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
2193 ParsedMarkdownElement::Paragraph(text(contents, source_range))
2194 }
2195
2196 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
2197 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2198 highlights: Vec::new(),
2199 region_ranges: Vec::new(),
2200 regions: Vec::new(),
2201 source_range,
2202 contents: contents.to_string(),
2203 })]
2204 }
2205
2206 fn block_quote(
2207 children: Vec<ParsedMarkdownElement>,
2208 source_range: Range<usize>,
2209 ) -> ParsedMarkdownElement {
2210 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
2211 source_range,
2212 children,
2213 })
2214 }
2215
2216 fn code_block(
2217 language: Option<String>,
2218 code: &str,
2219 source_range: Range<usize>,
2220 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
2221 ) -> ParsedMarkdownElement {
2222 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
2223 source_range,
2224 language,
2225 contents: code.to_string().into(),
2226 highlights,
2227 })
2228 }
2229
2230 fn list_item(
2231 source_range: Range<usize>,
2232 depth: u16,
2233 item_type: ParsedMarkdownListItemType,
2234 content: Vec<ParsedMarkdownElement>,
2235 ) -> ParsedMarkdownElement {
2236 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
2237 source_range,
2238 item_type,
2239 depth,
2240 content,
2241 })
2242 }
2243
2244 fn table(
2245 source_range: Range<usize>,
2246 header: ParsedMarkdownTableRow,
2247 body: Vec<ParsedMarkdownTableRow>,
2248 ) -> ParsedMarkdownTable {
2249 ParsedMarkdownTable {
2250 column_alignments: Vec::new(),
2251 source_range,
2252 header,
2253 body,
2254 }
2255 }
2256
2257 fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
2258 ParsedMarkdownTableRow { children }
2259 }
2260
2261 impl PartialEq for ParsedMarkdownTable {
2262 fn eq(&self, other: &Self) -> bool {
2263 self.source_range == other.source_range
2264 && self.header == other.header
2265 && self.body == other.body
2266 }
2267 }
2268
2269 impl PartialEq for ParsedMarkdownText {
2270 fn eq(&self, other: &Self) -> bool {
2271 self.source_range == other.source_range && self.contents == other.contents
2272 }
2273 }
2274}