1use crate::{
2 markdown_elements::*,
3 markdown_minifier::{Minifier, MinifierOptions},
4};
5use async_recursion::async_recursion;
6use collections::FxHashMap;
7use gpui::{DefiniteLength, FontWeight, px, relative};
8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
9use language::LanguageRegistry;
10use markup5ever_rcdom::RcDom;
11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
12use std::{cell::RefCell, collections::HashMap, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec};
13
14pub async fn parse_markdown(
15 markdown_input: &str,
16 file_location_directory: Option<PathBuf>,
17 language_registry: Option<Arc<LanguageRegistry>>,
18) -> ParsedMarkdown {
19 let mut options = Options::all();
20 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
21
22 let parser = Parser::new_ext(markdown_input, options);
23 let parser = MarkdownParser::new(
24 parser.into_offset_iter().collect(),
25 file_location_directory,
26 language_registry,
27 );
28 let renderer = parser.parse_document().await;
29 ParsedMarkdown {
30 children: renderer.parsed,
31 }
32}
33
34fn cleanup_html(source: &str) -> Vec<u8> {
35 let mut writer = std::io::Cursor::new(Vec::new());
36 let mut reader = std::io::Cursor::new(source);
37 let mut minify = Minifier::new(
38 &mut writer,
39 MinifierOptions {
40 omit_doctype: true,
41 collapse_whitespace: true,
42 ..Default::default()
43 },
44 );
45 if let Ok(()) = minify.minify(&mut reader) {
46 writer.into_inner()
47 } else {
48 source.bytes().collect()
49 }
50}
51
52struct MarkdownParser<'a> {
53 tokens: Vec<(Event<'a>, Range<usize>)>,
54 /// The current index in the tokens array
55 cursor: usize,
56 /// The blocks that we have successfully parsed so far
57 parsed: Vec<ParsedMarkdownElement>,
58 file_location_directory: Option<PathBuf>,
59 language_registry: Option<Arc<LanguageRegistry>>,
60}
61
62struct MarkdownListItem {
63 content: Vec<ParsedMarkdownElement>,
64 item_type: ParsedMarkdownListItemType,
65}
66
67impl Default for MarkdownListItem {
68 fn default() -> Self {
69 Self {
70 content: Vec::new(),
71 item_type: ParsedMarkdownListItemType::Unordered,
72 }
73 }
74}
75
76impl<'a> MarkdownParser<'a> {
77 fn new(
78 tokens: Vec<(Event<'a>, Range<usize>)>,
79 file_location_directory: Option<PathBuf>,
80 language_registry: Option<Arc<LanguageRegistry>>,
81 ) -> Self {
82 Self {
83 tokens,
84 file_location_directory,
85 language_registry,
86 cursor: 0,
87 parsed: vec![],
88 }
89 }
90
91 fn eof(&self) -> bool {
92 if self.tokens.is_empty() {
93 return true;
94 }
95 self.cursor >= self.tokens.len() - 1
96 }
97
98 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
99 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
100 return self.tokens.last();
101 }
102 self.tokens.get(self.cursor + steps)
103 }
104
105 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
106 if self.cursor == 0 || self.cursor > self.tokens.len() {
107 return None;
108 }
109 self.tokens.get(self.cursor - 1)
110 }
111
112 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
113 self.peek(0)
114 }
115
116 fn current_event(&self) -> Option<&Event<'_>> {
117 self.current().map(|(event, _)| event)
118 }
119
120 fn is_text_like(event: &Event) -> bool {
121 match event {
122 Event::Text(_)
123 // Represent an inline code block
124 | Event::Code(_)
125 | Event::Html(_)
126 | Event::InlineHtml(_)
127 | Event::FootnoteReference(_)
128 | Event::Start(Tag::Link { .. })
129 | Event::Start(Tag::Emphasis)
130 | Event::Start(Tag::Strong)
131 | Event::Start(Tag::Strikethrough)
132 | Event::Start(Tag::Image { .. }) => {
133 true
134 }
135 _ => false,
136 }
137 }
138
139 async fn parse_document(mut self) -> Self {
140 while !self.eof() {
141 if let Some(block) = self.parse_block().await {
142 self.parsed.extend(block);
143 } else {
144 self.cursor += 1;
145 }
146 }
147 self
148 }
149
150 #[async_recursion]
151 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
152 let (current, source_range) = self.current().unwrap();
153 let source_range = source_range.clone();
154 match current {
155 Event::Start(tag) => match tag {
156 Tag::Paragraph => {
157 self.cursor += 1;
158 let text = self.parse_text(false, Some(source_range));
159 Some(vec![ParsedMarkdownElement::Paragraph(text)])
160 }
161 Tag::Heading { level, .. } => {
162 let level = *level;
163 self.cursor += 1;
164 let heading = self.parse_heading(level);
165 Some(vec![ParsedMarkdownElement::Heading(heading)])
166 }
167 Tag::Table(alignment) => {
168 let alignment = alignment.clone();
169 self.cursor += 1;
170 let table = self.parse_table(alignment);
171 Some(vec![ParsedMarkdownElement::Table(table)])
172 }
173 Tag::List(order) => {
174 let order = *order;
175 self.cursor += 1;
176 let list = self.parse_list(order).await;
177 Some(list)
178 }
179 Tag::BlockQuote(_kind) => {
180 self.cursor += 1;
181 let block_quote = self.parse_block_quote().await;
182 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
183 }
184 Tag::CodeBlock(kind) => {
185 let language = match kind {
186 pulldown_cmark::CodeBlockKind::Indented => None,
187 pulldown_cmark::CodeBlockKind::Fenced(language) => {
188 if language.is_empty() {
189 None
190 } else {
191 Some(language.to_string())
192 }
193 }
194 };
195
196 self.cursor += 1;
197
198 let code_block = self.parse_code_block(language).await?;
199 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
200 }
201 Tag::HtmlBlock => {
202 self.cursor += 1;
203
204 Some(self.parse_html_block().await)
205 }
206 _ => None,
207 },
208 Event::Rule => {
209 self.cursor += 1;
210 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
211 }
212 _ => None,
213 }
214 }
215
216 fn parse_text(
217 &mut self,
218 should_complete_on_soft_break: bool,
219 source_range: Option<Range<usize>>,
220 ) -> MarkdownParagraph {
221 let source_range = source_range.unwrap_or_else(|| {
222 self.current()
223 .map(|(_, range)| range.clone())
224 .unwrap_or_default()
225 });
226
227 let mut markdown_text_like = Vec::new();
228 let mut text = String::new();
229 let mut bold_depth = 0;
230 let mut italic_depth = 0;
231 let mut strikethrough_depth = 0;
232 let mut link: Option<Link> = None;
233 let mut image: Option<Image> = None;
234 let mut region_ranges: Vec<Range<usize>> = vec![];
235 let mut regions: Vec<ParsedRegion> = vec![];
236 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
237 let mut link_urls: Vec<String> = vec![];
238 let mut link_ranges: Vec<Range<usize>> = vec![];
239
240 loop {
241 if self.eof() {
242 break;
243 }
244
245 let (current, _) = self.current().unwrap();
246 let prev_len = text.len();
247 match current {
248 Event::SoftBreak => {
249 if should_complete_on_soft_break {
250 break;
251 }
252 text.push(' ');
253 }
254
255 Event::HardBreak => {
256 text.push('\n');
257 }
258
259 // We want to ignore any inline HTML tags in the text but keep
260 // the text between them
261 Event::InlineHtml(_) => {}
262
263 Event::Text(t) => {
264 text.push_str(t.as_ref());
265 let mut style = MarkdownHighlightStyle::default();
266
267 if bold_depth > 0 {
268 style.weight = FontWeight::BOLD;
269 }
270
271 if italic_depth > 0 {
272 style.italic = true;
273 }
274
275 if strikethrough_depth > 0 {
276 style.strikethrough = true;
277 }
278
279 let last_run_len = if let Some(link) = link.clone() {
280 region_ranges.push(prev_len..text.len());
281 regions.push(ParsedRegion {
282 code: false,
283 link: Some(link),
284 });
285 style.link = true;
286 prev_len
287 } else {
288 // Manually scan for links
289 let mut finder = linkify::LinkFinder::new();
290 finder.kinds(&[linkify::LinkKind::Url]);
291 let mut last_link_len = prev_len;
292 for link in finder.links(t) {
293 let start = link.start();
294 let end = link.end();
295 let range = (prev_len + start)..(prev_len + end);
296 link_ranges.push(range.clone());
297 link_urls.push(link.as_str().to_string());
298
299 // If there is a style before we match a link, we have to add this to the highlighted ranges
300 if style != MarkdownHighlightStyle::default()
301 && last_link_len < link.start()
302 {
303 highlights.push((
304 last_link_len..link.start(),
305 MarkdownHighlight::Style(style.clone()),
306 ));
307 }
308
309 highlights.push((
310 range.clone(),
311 MarkdownHighlight::Style(MarkdownHighlightStyle {
312 underline: true,
313 ..style
314 }),
315 ));
316 region_ranges.push(range.clone());
317 regions.push(ParsedRegion {
318 code: false,
319 link: Some(Link::Web {
320 url: link.as_str().to_string(),
321 }),
322 });
323 last_link_len = end;
324 }
325 last_link_len
326 };
327
328 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
329 let mut new_highlight = true;
330 if let Some((last_range, last_style)) = highlights.last_mut()
331 && last_range.end == last_run_len
332 && last_style == &MarkdownHighlight::Style(style.clone())
333 {
334 last_range.end = text.len();
335 new_highlight = false;
336 }
337 if new_highlight {
338 highlights.push((
339 last_run_len..text.len(),
340 MarkdownHighlight::Style(style.clone()),
341 ));
342 }
343 }
344 }
345 Event::Code(t) => {
346 text.push_str(t.as_ref());
347 region_ranges.push(prev_len..text.len());
348
349 if link.is_some() {
350 highlights.push((
351 prev_len..text.len(),
352 MarkdownHighlight::Style(MarkdownHighlightStyle {
353 link: true,
354 ..Default::default()
355 }),
356 ));
357 }
358 regions.push(ParsedRegion {
359 code: true,
360 link: link.clone(),
361 });
362 }
363 Event::Start(tag) => match tag {
364 Tag::Emphasis => italic_depth += 1,
365 Tag::Strong => bold_depth += 1,
366 Tag::Strikethrough => strikethrough_depth += 1,
367 Tag::Link { dest_url, .. } => {
368 link = Link::identify(
369 self.file_location_directory.clone(),
370 dest_url.to_string(),
371 );
372 }
373 Tag::Image { dest_url, .. } => {
374 if !text.is_empty() {
375 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
376 source_range: source_range.clone(),
377 contents: text.into(),
378 highlights: highlights.clone(),
379 region_ranges: region_ranges.clone(),
380 regions: regions.clone(),
381 });
382 text = String::new();
383 highlights = vec![];
384 region_ranges = vec![];
385 regions = vec![];
386 markdown_text_like.push(parsed_regions);
387 }
388 image = Image::identify(
389 dest_url.to_string(),
390 source_range.clone(),
391 self.file_location_directory.clone(),
392 );
393 }
394 _ => {
395 break;
396 }
397 },
398
399 Event::End(tag) => match tag {
400 TagEnd::Emphasis => italic_depth -= 1,
401 TagEnd::Strong => bold_depth -= 1,
402 TagEnd::Strikethrough => strikethrough_depth -= 1,
403 TagEnd::Link => {
404 link = None;
405 }
406 TagEnd::Image => {
407 if let Some(mut image) = image.take() {
408 if !text.is_empty() {
409 image.set_alt_text(std::mem::take(&mut text).into());
410 }
411 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
412 }
413 }
414 TagEnd::Paragraph => {
415 self.cursor += 1;
416 break;
417 }
418 _ => {
419 break;
420 }
421 },
422 _ => {
423 break;
424 }
425 }
426
427 self.cursor += 1;
428 }
429 if !text.is_empty() {
430 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
431 source_range,
432 contents: text.into(),
433 highlights,
434 regions,
435 region_ranges,
436 }));
437 }
438 markdown_text_like
439 }
440
441 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
442 let (_event, source_range) = self.previous().unwrap();
443 let source_range = source_range.clone();
444 let text = self.parse_text(true, None);
445
446 // Advance past the heading end tag
447 self.cursor += 1;
448
449 ParsedMarkdownHeading {
450 source_range,
451 level: match level {
452 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
453 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
454 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
455 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
456 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
457 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
458 },
459 contents: text,
460 }
461 }
462
463 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
464 let (_event, source_range) = self.previous().unwrap();
465 let source_range = source_range.clone();
466 let mut header = ParsedMarkdownTableRow::new();
467 let mut body = vec![];
468 let mut current_row = vec![];
469 let mut in_header = true;
470 let column_alignments = alignment.iter().map(Self::convert_alignment).collect();
471
472 loop {
473 if self.eof() {
474 break;
475 }
476
477 let (current, source_range) = self.current().unwrap();
478 let source_range = source_range.clone();
479 match current {
480 Event::Start(Tag::TableHead)
481 | Event::Start(Tag::TableRow)
482 | Event::End(TagEnd::TableCell) => {
483 self.cursor += 1;
484 }
485 Event::Start(Tag::TableCell) => {
486 self.cursor += 1;
487 let cell_contents = self.parse_text(false, Some(source_range));
488 current_row.push(cell_contents);
489 }
490 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
491 self.cursor += 1;
492 let new_row = std::mem::take(&mut current_row);
493 if in_header {
494 header.children = new_row;
495 in_header = false;
496 } else {
497 let row = ParsedMarkdownTableRow::with_children(new_row);
498 body.push(row);
499 }
500 }
501 Event::End(TagEnd::Table) => {
502 self.cursor += 1;
503 break;
504 }
505 _ => {
506 break;
507 }
508 }
509 }
510
511 ParsedMarkdownTable {
512 source_range,
513 header,
514 body,
515 column_alignments,
516 }
517 }
518
519 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
520 match alignment {
521 Alignment::None => ParsedMarkdownTableAlignment::None,
522 Alignment::Left => ParsedMarkdownTableAlignment::Left,
523 Alignment::Center => ParsedMarkdownTableAlignment::Center,
524 Alignment::Right => ParsedMarkdownTableAlignment::Right,
525 }
526 }
527
528 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
529 let (_, list_source_range) = self.previous().unwrap();
530
531 let mut items = Vec::new();
532 let mut items_stack = vec![MarkdownListItem::default()];
533 let mut depth = 1;
534 let mut order = order;
535 let mut order_stack = Vec::new();
536
537 let mut insertion_indices = FxHashMap::default();
538 let mut source_ranges = FxHashMap::default();
539 let mut start_item_range = list_source_range.clone();
540
541 while !self.eof() {
542 let (current, source_range) = self.current().unwrap();
543 match current {
544 Event::Start(Tag::List(new_order)) => {
545 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
546 insertion_indices.insert(depth, items.len());
547 }
548
549 // We will use the start of the nested list as the end for the current item's range,
550 // because we don't care about the hierarchy of list items
551 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
552 e.insert(start_item_range.start..source_range.start);
553 }
554
555 order_stack.push(order);
556 order = *new_order;
557 self.cursor += 1;
558 depth += 1;
559 }
560 Event::End(TagEnd::List(_)) => {
561 order = order_stack.pop().flatten();
562 self.cursor += 1;
563 depth -= 1;
564
565 if depth == 0 {
566 break;
567 }
568 }
569 Event::Start(Tag::Item) => {
570 start_item_range = source_range.clone();
571
572 self.cursor += 1;
573 items_stack.push(MarkdownListItem::default());
574
575 let mut task_list = None;
576 // Check for task list marker (`- [ ]` or `- [x]`)
577 if let Some(event) = self.current_event() {
578 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
579 if event == &Event::Start(Tag::Paragraph) {
580 self.cursor += 1;
581 }
582
583 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
584 task_list = Some((*checked, range.clone()));
585 self.cursor += 1;
586 }
587 }
588
589 if let Some((event, range)) = self.current() {
590 // This is a plain list item.
591 // For example `- some text` or `1. [Docs](./docs.md)`
592 if MarkdownParser::is_text_like(event) {
593 let text = self.parse_text(false, Some(range.clone()));
594 let block = ParsedMarkdownElement::Paragraph(text);
595 if let Some(content) = items_stack.last_mut() {
596 let item_type = if let Some((checked, range)) = task_list {
597 ParsedMarkdownListItemType::Task(checked, range)
598 } else if let Some(order) = order {
599 ParsedMarkdownListItemType::Ordered(order)
600 } else {
601 ParsedMarkdownListItemType::Unordered
602 };
603 content.item_type = item_type;
604 content.content.push(block);
605 }
606 } else {
607 let block = self.parse_block().await;
608 if let Some(block) = block
609 && let Some(list_item) = items_stack.last_mut()
610 {
611 list_item.content.extend(block);
612 }
613 }
614 }
615
616 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
617 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
618 self.cursor += 1;
619 }
620 }
621 Event::End(TagEnd::Item) => {
622 self.cursor += 1;
623
624 if let Some(current) = order {
625 order = Some(current + 1);
626 }
627
628 if let Some(list_item) = items_stack.pop() {
629 let source_range = source_ranges
630 .remove(&depth)
631 .unwrap_or(start_item_range.clone());
632
633 // We need to remove the last character of the source range, because it includes the newline character
634 let source_range = source_range.start..source_range.end - 1;
635 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
636 source_range,
637 content: list_item.content,
638 depth,
639 item_type: list_item.item_type,
640 });
641
642 if let Some(index) = insertion_indices.get(&depth) {
643 items.insert(*index, item);
644 insertion_indices.remove(&depth);
645 } else {
646 items.push(item);
647 }
648 }
649 }
650 _ => {
651 if depth == 0 {
652 break;
653 }
654 // This can only happen if a list item starts with more then one paragraph,
655 // or the list item contains blocks that should be rendered after the nested list items
656 let block = self.parse_block().await;
657 if let Some(block) = block {
658 if let Some(list_item) = items_stack.last_mut() {
659 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
660 if !insertion_indices.contains_key(&depth) {
661 list_item.content.extend(block);
662 continue;
663 }
664 }
665
666 // Otherwise we need to insert the block after all the nested items
667 // that have been parsed so far
668 items.extend(block);
669 } else {
670 self.cursor += 1;
671 }
672 }
673 }
674 }
675
676 items
677 }
678
679 #[async_recursion]
680 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
681 let (_event, source_range) = self.previous().unwrap();
682 let source_range = source_range.clone();
683 let mut nested_depth = 1;
684
685 let mut children: Vec<ParsedMarkdownElement> = vec![];
686
687 while !self.eof() {
688 let block = self.parse_block().await;
689
690 if let Some(block) = block {
691 children.extend(block);
692 } else {
693 break;
694 }
695
696 if self.eof() {
697 break;
698 }
699
700 let (current, _source_range) = self.current().unwrap();
701 match current {
702 // This is a nested block quote.
703 // Record that we're in a nested block quote and continue parsing.
704 // We don't need to advance the cursor since the next
705 // call to `parse_block` will handle it.
706 Event::Start(Tag::BlockQuote(_kind)) => {
707 nested_depth += 1;
708 }
709 Event::End(TagEnd::BlockQuote(_kind)) => {
710 nested_depth -= 1;
711 if nested_depth == 0 {
712 self.cursor += 1;
713 break;
714 }
715 }
716 _ => {}
717 };
718 }
719
720 ParsedMarkdownBlockQuote {
721 source_range,
722 children,
723 }
724 }
725
726 async fn parse_code_block(
727 &mut self,
728 language: Option<String>,
729 ) -> Option<ParsedMarkdownCodeBlock> {
730 let Some((_event, source_range)) = self.previous() else {
731 return None;
732 };
733
734 let source_range = source_range.clone();
735 let mut code = String::new();
736
737 while !self.eof() {
738 let Some((current, _source_range)) = self.current() else {
739 break;
740 };
741
742 match current {
743 Event::Text(text) => {
744 code.push_str(text);
745 self.cursor += 1;
746 }
747 Event::End(TagEnd::CodeBlock) => {
748 self.cursor += 1;
749 break;
750 }
751 _ => {
752 break;
753 }
754 }
755 }
756
757 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
758
759 let highlights = if let Some(language) = &language {
760 if let Some(registry) = &self.language_registry {
761 let rope: language::Rope = code.as_str().into();
762 registry
763 .language_for_name_or_extension(language)
764 .await
765 .map(|l| l.highlight_text(&rope, 0..code.len()))
766 .ok()
767 } else {
768 None
769 }
770 } else {
771 None
772 };
773
774 Some(ParsedMarkdownCodeBlock {
775 source_range,
776 contents: code.into(),
777 language,
778 highlights,
779 })
780 }
781
782 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
783 let mut elements = Vec::new();
784 let Some((_event, _source_range)) = self.previous() else {
785 return elements;
786 };
787
788 let mut html_source_range_start = None;
789 let mut html_source_range_end = None;
790 let mut html_buffer = String::new();
791
792 while !self.eof() {
793 let Some((current, source_range)) = self.current() else {
794 break;
795 };
796 let source_range = source_range.clone();
797 match current {
798 Event::Html(html) => {
799 html_source_range_start.get_or_insert(source_range.start);
800 html_source_range_end = Some(source_range.end);
801 html_buffer.push_str(html);
802 self.cursor += 1;
803 }
804 Event::End(TagEnd::CodeBlock) => {
805 self.cursor += 1;
806 break;
807 }
808 _ => {
809 break;
810 }
811 }
812 }
813
814 let bytes = cleanup_html(&html_buffer);
815
816 let mut cursor = std::io::Cursor::new(bytes);
817 if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
818 .from_utf8()
819 .read_from(&mut cursor)
820 && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
821 {
822 self.parse_html_node(start..end, &dom.document, &mut elements);
823 }
824
825 elements
826 }
827
828 fn parse_html_node(
829 &self,
830 source_range: Range<usize>,
831 node: &Rc<markup5ever_rcdom::Node>,
832 elements: &mut Vec<ParsedMarkdownElement>,
833 ) {
834 match &node.data {
835 markup5ever_rcdom::NodeData::Document => {
836 self.consume_children(source_range, node, elements);
837 }
838 markup5ever_rcdom::NodeData::Text { contents } => {
839 elements.push(ParsedMarkdownElement::Paragraph(vec![
840 MarkdownParagraphChunk::Text(ParsedMarkdownText {
841 source_range,
842 regions: Vec::default(),
843 region_ranges: Vec::default(),
844 highlights: Vec::default(),
845 contents: contents.borrow().to_string().into(),
846 }),
847 ]));
848 }
849 markup5ever_rcdom::NodeData::Comment { .. } => {}
850 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
851 if local_name!("img") == name.local {
852 if let Some(image) = self.extract_image(source_range, attrs) {
853 elements.push(ParsedMarkdownElement::Image(image));
854 }
855 } else if local_name!("p") == name.local {
856 let mut paragraph = MarkdownParagraph::new();
857 self.parse_paragraph(source_range, node, &mut paragraph);
858
859 if !paragraph.is_empty() {
860 elements.push(ParsedMarkdownElement::Paragraph(paragraph));
861 }
862 } else if matches!(
863 name.local,
864 local_name!("h1")
865 | local_name!("h2")
866 | local_name!("h3")
867 | local_name!("h4")
868 | local_name!("h5")
869 | local_name!("h6")
870 ) {
871 let mut paragraph = MarkdownParagraph::new();
872 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
873
874 if !paragraph.is_empty() {
875 elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
876 source_range,
877 level: match name.local {
878 local_name!("h1") => HeadingLevel::H1,
879 local_name!("h2") => HeadingLevel::H2,
880 local_name!("h3") => HeadingLevel::H3,
881 local_name!("h4") => HeadingLevel::H4,
882 local_name!("h5") => HeadingLevel::H5,
883 local_name!("h6") => HeadingLevel::H6,
884 _ => unreachable!(),
885 },
886 contents: paragraph,
887 }));
888 }
889 } else if local_name!("blockquote") == name.local {
890 if let Some(blockquote) = self.extract_html_blockquote(node, source_range) {
891 elements.push(ParsedMarkdownElement::BlockQuote(blockquote));
892 }
893 } else if local_name!("table") == name.local {
894 if let Some(table) = self.extract_html_table(node, source_range) {
895 elements.push(ParsedMarkdownElement::Table(table));
896 }
897 } else {
898 self.consume_children(source_range, node, elements);
899 }
900 }
901 _ => {}
902 }
903 }
904
905 fn parse_paragraph(
906 &self,
907 source_range: Range<usize>,
908 node: &Rc<markup5ever_rcdom::Node>,
909 paragraph: &mut MarkdownParagraph,
910 ) {
911 match &node.data {
912 markup5ever_rcdom::NodeData::Text { contents } => {
913 paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
914 source_range,
915 regions: Vec::default(),
916 region_ranges: Vec::default(),
917 highlights: Vec::default(),
918 contents: contents.borrow().to_string().into(),
919 }));
920 }
921 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
922 if local_name!("img") == name.local {
923 if let Some(image) = self.extract_image(source_range, attrs) {
924 paragraph.push(MarkdownParagraphChunk::Image(image));
925 }
926 } else {
927 self.consume_paragraph(source_range, node, paragraph);
928 }
929 }
930 _ => {}
931 }
932 }
933
934 fn consume_paragraph(
935 &self,
936 source_range: Range<usize>,
937 node: &Rc<markup5ever_rcdom::Node>,
938 paragraph: &mut MarkdownParagraph,
939 ) {
940 for node in node.children.borrow().iter() {
941 self.parse_paragraph(source_range.clone(), node, paragraph);
942 }
943 }
944
945 fn consume_children(
946 &self,
947 source_range: Range<usize>,
948 node: &Rc<markup5ever_rcdom::Node>,
949 elements: &mut Vec<ParsedMarkdownElement>,
950 ) {
951 for node in node.children.borrow().iter() {
952 self.parse_html_node(source_range.clone(), node, elements);
953 }
954 }
955
956 fn attr_value(
957 attrs: &RefCell<Vec<html5ever::Attribute>>,
958 name: html5ever::LocalName,
959 ) -> Option<String> {
960 attrs.borrow().iter().find_map(|attr| {
961 if attr.name.local == name {
962 Some(attr.value.to_string())
963 } else {
964 None
965 }
966 })
967 }
968
969 fn extract_styles_from_attributes(
970 attrs: &RefCell<Vec<html5ever::Attribute>>,
971 ) -> HashMap<String, String> {
972 let mut styles = HashMap::new();
973
974 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
975 for decl in style.split(';') {
976 let mut parts = decl.splitn(2, ':');
977 if let Some((key, value)) = parts.next().zip(parts.next()) {
978 styles.insert(
979 key.trim().to_lowercase().to_string(),
980 value.trim().to_string(),
981 );
982 }
983 }
984 }
985
986 styles
987 }
988
989 fn extract_image(
990 &self,
991 source_range: Range<usize>,
992 attrs: &RefCell<Vec<html5ever::Attribute>>,
993 ) -> Option<Image> {
994 let src = Self::attr_value(attrs, local_name!("src"))?;
995
996 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
997
998 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
999 image.set_alt_text(alt.into());
1000 }
1001
1002 let styles = Self::extract_styles_from_attributes(attrs);
1003
1004 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
1005 .or_else(|| styles.get("width").cloned())
1006 .and_then(|width| Self::parse_html_element_dimension(&width))
1007 {
1008 image.set_width(width);
1009 }
1010
1011 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
1012 .or_else(|| styles.get("height").cloned())
1013 .and_then(|height| Self::parse_html_element_dimension(&height))
1014 {
1015 image.set_height(height);
1016 }
1017
1018 Some(image)
1019 }
1020
1021 fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
1022 if value.ends_with("%") {
1023 value
1024 .trim_end_matches("%")
1025 .parse::<f32>()
1026 .ok()
1027 .map(|value| relative(value / 100.))
1028 } else {
1029 value
1030 .trim_end_matches("px")
1031 .parse()
1032 .ok()
1033 .map(|value| px(value).into())
1034 }
1035 }
1036
1037 fn extract_html_blockquote(
1038 &self,
1039 node: &Rc<markup5ever_rcdom::Node>,
1040 source_range: Range<usize>,
1041 ) -> Option<ParsedMarkdownBlockQuote> {
1042 let mut children = Vec::new();
1043 self.consume_children(source_range.clone(), node, &mut children);
1044
1045 if children.is_empty() {
1046 None
1047 } else {
1048 Some(ParsedMarkdownBlockQuote {
1049 children,
1050 source_range,
1051 })
1052 }
1053 }
1054
1055 fn extract_html_table(
1056 &self,
1057 node: &Rc<markup5ever_rcdom::Node>,
1058 source_range: Range<usize>,
1059 ) -> Option<ParsedMarkdownTable> {
1060 let mut header_columns = Vec::new();
1061 let mut body_rows = Vec::new();
1062
1063 // node should be a thead or tbody element
1064 for node in node.children.borrow().iter() {
1065 match &node.data {
1066 markup5ever_rcdom::NodeData::Element { name, .. } => {
1067 if local_name!("thead") == name.local {
1068 // node should be a tr element
1069 for node in node.children.borrow().iter() {
1070 let mut paragraph = MarkdownParagraph::new();
1071 self.consume_paragraph(source_range.clone(), node, &mut paragraph);
1072
1073 for paragraph in paragraph.into_iter() {
1074 header_columns.push(vec![paragraph]);
1075 }
1076 }
1077 } else if local_name!("tbody") == name.local {
1078 // node should be a tr element
1079 for node in node.children.borrow().iter() {
1080 let mut row = MarkdownParagraph::new();
1081 self.consume_paragraph(source_range.clone(), node, &mut row);
1082 body_rows.push(ParsedMarkdownTableRow::with_children(
1083 row.into_iter().map(|column| vec![column]).collect(),
1084 ));
1085 }
1086 }
1087 }
1088 _ => {}
1089 }
1090 }
1091
1092 if !header_columns.is_empty() || !body_rows.is_empty() {
1093 Some(ParsedMarkdownTable {
1094 source_range,
1095 body: body_rows,
1096 column_alignments: Vec::default(),
1097 header: ParsedMarkdownTableRow::with_children(header_columns),
1098 })
1099 } else {
1100 None
1101 }
1102 }
1103}
1104
1105#[cfg(test)]
1106mod tests {
1107 use super::*;
1108 use ParsedMarkdownListItemType::*;
1109 use core::panic;
1110 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1111 use language::{
1112 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1113 };
1114 use pretty_assertions::assert_eq;
1115
1116 async fn parse(input: &str) -> ParsedMarkdown {
1117 parse_markdown(input, None, None).await
1118 }
1119
1120 #[gpui::test]
1121 async fn test_headings() {
1122 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1123
1124 assert_eq!(
1125 parsed.children,
1126 vec![
1127 h1(text("Heading one", 2..13), 0..14),
1128 h2(text("Heading two", 17..28), 14..29),
1129 h3(text("Heading three", 33..46), 29..46),
1130 ]
1131 );
1132 }
1133
1134 #[gpui::test]
1135 async fn test_newlines_dont_new_paragraphs() {
1136 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1137
1138 assert_eq!(
1139 parsed.children,
1140 vec![p("Some text that is bolded and italicized", 0..46)]
1141 );
1142 }
1143
1144 #[gpui::test]
1145 async fn test_heading_with_paragraph() {
1146 let parsed = parse("# Zed\nThe editor").await;
1147
1148 assert_eq!(
1149 parsed.children,
1150 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1151 );
1152 }
1153
1154 #[gpui::test]
1155 async fn test_double_newlines_do_new_paragraphs() {
1156 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1157
1158 assert_eq!(
1159 parsed.children,
1160 vec![
1161 p("Some text that is bolded", 0..29),
1162 p("and italicized", 31..47),
1163 ]
1164 );
1165 }
1166
1167 #[gpui::test]
1168 async fn test_bold_italic_text() {
1169 let parsed = parse("Some text **that is bolded** and *italicized*").await;
1170
1171 assert_eq!(
1172 parsed.children,
1173 vec![p("Some text that is bolded and italicized", 0..45)]
1174 );
1175 }
1176
1177 #[gpui::test]
1178 async fn test_nested_bold_strikethrough_text() {
1179 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1180
1181 assert_eq!(parsed.children.len(), 1);
1182 assert_eq!(
1183 parsed.children[0],
1184 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1185 ParsedMarkdownText {
1186 source_range: 0..35,
1187 contents: "Some bostrikethroughld text".into(),
1188 highlights: Vec::new(),
1189 region_ranges: Vec::new(),
1190 regions: Vec::new(),
1191 }
1192 )])
1193 );
1194
1195 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1196 text
1197 } else {
1198 panic!("Expected a paragraph");
1199 };
1200
1201 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1202 text
1203 } else {
1204 panic!("Expected a text");
1205 };
1206
1207 assert_eq!(
1208 paragraph.highlights,
1209 vec![
1210 (
1211 5..7,
1212 MarkdownHighlight::Style(MarkdownHighlightStyle {
1213 weight: FontWeight::BOLD,
1214 ..Default::default()
1215 }),
1216 ),
1217 (
1218 7..20,
1219 MarkdownHighlight::Style(MarkdownHighlightStyle {
1220 weight: FontWeight::BOLD,
1221 strikethrough: true,
1222 ..Default::default()
1223 }),
1224 ),
1225 (
1226 20..22,
1227 MarkdownHighlight::Style(MarkdownHighlightStyle {
1228 weight: FontWeight::BOLD,
1229 ..Default::default()
1230 }),
1231 ),
1232 ]
1233 );
1234 }
1235
1236 #[gpui::test]
1237 async fn test_text_with_inline_html() {
1238 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1239
1240 assert_eq!(
1241 parsed.children,
1242 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1243 );
1244 }
1245
1246 #[gpui::test]
1247 async fn test_raw_links_detection() {
1248 let parsed = parse("Checkout this https://zed.dev link").await;
1249
1250 assert_eq!(
1251 parsed.children,
1252 vec![p("Checkout this https://zed.dev link", 0..34)]
1253 );
1254 }
1255
1256 #[gpui::test]
1257 async fn test_empty_image() {
1258 let parsed = parse("![]()").await;
1259
1260 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1261 text
1262 } else {
1263 panic!("Expected a paragraph");
1264 };
1265 assert_eq!(paragraph.len(), 0);
1266 }
1267
1268 #[gpui::test]
1269 async fn test_image_links_detection() {
1270 let parsed = parse("").await;
1271
1272 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1273 text
1274 } else {
1275 panic!("Expected a paragraph");
1276 };
1277 assert_eq!(
1278 paragraph[0],
1279 MarkdownParagraphChunk::Image(Image {
1280 source_range: 0..111,
1281 link: Link::Web {
1282 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1283 },
1284 alt_text: Some("test".into()),
1285 height: None,
1286 width: None,
1287 },)
1288 );
1289 }
1290
1291 #[gpui::test]
1292 async fn test_image_without_alt_text() {
1293 let parsed = parse("").await;
1294
1295 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1296 text
1297 } else {
1298 panic!("Expected a paragraph");
1299 };
1300 assert_eq!(
1301 paragraph[0],
1302 MarkdownParagraphChunk::Image(Image {
1303 source_range: 0..31,
1304 link: Link::Web {
1305 url: "http://example.com/foo.png".to_string(),
1306 },
1307 alt_text: None,
1308 height: None,
1309 width: None,
1310 },)
1311 );
1312 }
1313
1314 #[gpui::test]
1315 async fn test_image_with_alt_text_containing_formatting() {
1316 let parsed = parse("").await;
1317
1318 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1319 panic!("Expected a paragraph");
1320 };
1321 assert_eq!(
1322 chunks,
1323 &[MarkdownParagraphChunk::Image(Image {
1324 source_range: 0..44,
1325 link: Link::Web {
1326 url: "http://example.com/foo.png".to_string(),
1327 },
1328 alt_text: Some("foo bar baz".into()),
1329 height: None,
1330 width: None,
1331 }),],
1332 );
1333 }
1334
1335 #[gpui::test]
1336 async fn test_images_with_text_in_between() {
1337 let parsed = parse(
1338 "\nLorem Ipsum\n",
1339 )
1340 .await;
1341
1342 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1343 text
1344 } else {
1345 panic!("Expected a paragraph");
1346 };
1347 assert_eq!(
1348 chunks,
1349 &vec![
1350 MarkdownParagraphChunk::Image(Image {
1351 source_range: 0..81,
1352 link: Link::Web {
1353 url: "http://example.com/foo.png".to_string(),
1354 },
1355 alt_text: Some("foo".into()),
1356 height: None,
1357 width: None,
1358 }),
1359 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1360 source_range: 0..81,
1361 contents: " Lorem Ipsum ".into(),
1362 highlights: Vec::new(),
1363 region_ranges: Vec::new(),
1364 regions: Vec::new(),
1365 }),
1366 MarkdownParagraphChunk::Image(Image {
1367 source_range: 0..81,
1368 link: Link::Web {
1369 url: "http://example.com/bar.png".to_string(),
1370 },
1371 alt_text: Some("bar".into()),
1372 height: None,
1373 width: None,
1374 })
1375 ]
1376 );
1377 }
1378
1379 #[test]
1380 fn test_parse_html_element_dimension() {
1381 // Test percentage values
1382 assert_eq!(
1383 MarkdownParser::parse_html_element_dimension("50%"),
1384 Some(DefiniteLength::Fraction(0.5))
1385 );
1386 assert_eq!(
1387 MarkdownParser::parse_html_element_dimension("100%"),
1388 Some(DefiniteLength::Fraction(1.0))
1389 );
1390 assert_eq!(
1391 MarkdownParser::parse_html_element_dimension("25%"),
1392 Some(DefiniteLength::Fraction(0.25))
1393 );
1394 assert_eq!(
1395 MarkdownParser::parse_html_element_dimension("0%"),
1396 Some(DefiniteLength::Fraction(0.0))
1397 );
1398
1399 // Test pixel values
1400 assert_eq!(
1401 MarkdownParser::parse_html_element_dimension("100px"),
1402 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1403 );
1404 assert_eq!(
1405 MarkdownParser::parse_html_element_dimension("50px"),
1406 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1407 );
1408 assert_eq!(
1409 MarkdownParser::parse_html_element_dimension("0px"),
1410 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1411 );
1412
1413 // Test values without units (should be treated as pixels)
1414 assert_eq!(
1415 MarkdownParser::parse_html_element_dimension("100"),
1416 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1417 );
1418 assert_eq!(
1419 MarkdownParser::parse_html_element_dimension("42"),
1420 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1421 );
1422
1423 // Test invalid values
1424 assert_eq!(
1425 MarkdownParser::parse_html_element_dimension("invalid"),
1426 None
1427 );
1428 assert_eq!(MarkdownParser::parse_html_element_dimension("px"), None);
1429 assert_eq!(MarkdownParser::parse_html_element_dimension("%"), None);
1430 assert_eq!(MarkdownParser::parse_html_element_dimension(""), None);
1431 assert_eq!(MarkdownParser::parse_html_element_dimension("abc%"), None);
1432 assert_eq!(MarkdownParser::parse_html_element_dimension("abcpx"), None);
1433
1434 // Test decimal values
1435 assert_eq!(
1436 MarkdownParser::parse_html_element_dimension("50.5%"),
1437 Some(DefiniteLength::Fraction(0.505))
1438 );
1439 assert_eq!(
1440 MarkdownParser::parse_html_element_dimension("100.25px"),
1441 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1442 );
1443 assert_eq!(
1444 MarkdownParser::parse_html_element_dimension("42.0"),
1445 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1446 );
1447 }
1448
1449 #[gpui::test]
1450 async fn test_inline_html_image_tag() {
1451 let parsed =
1452 parse("<p>Some text<img src=\"http://example.com/foo.png\" /> some more text</p>")
1453 .await;
1454
1455 assert_eq!(
1456 ParsedMarkdown {
1457 children: vec![ParsedMarkdownElement::Paragraph(vec![
1458 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1459 source_range: 0..71,
1460 contents: "Some text".into(),
1461 highlights: Default::default(),
1462 region_ranges: Default::default(),
1463 regions: Default::default()
1464 }),
1465 MarkdownParagraphChunk::Image(Image {
1466 source_range: 0..71,
1467 link: Link::Web {
1468 url: "http://example.com/foo.png".to_string(),
1469 },
1470 alt_text: None,
1471 height: None,
1472 width: None,
1473 }),
1474 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1475 source_range: 0..71,
1476 contents: " some more text".into(),
1477 highlights: Default::default(),
1478 region_ranges: Default::default(),
1479 regions: Default::default()
1480 }),
1481 ])]
1482 },
1483 parsed
1484 );
1485 }
1486
1487 #[gpui::test]
1488 async fn test_html_block_quote() {
1489 let parsed = parse(
1490 "<blockquote>
1491 <p>some description</p>
1492 </blockquote>",
1493 )
1494 .await;
1495
1496 assert_eq!(
1497 ParsedMarkdown {
1498 children: vec![block_quote(
1499 vec![ParsedMarkdownElement::Paragraph(text(
1500 "some description",
1501 0..76
1502 ))],
1503 0..76,
1504 )]
1505 },
1506 parsed
1507 );
1508 }
1509
1510 #[gpui::test]
1511 async fn test_html_nested_block_quote() {
1512 let parsed = parse(
1513 "<blockquote>
1514 <p>some description</p>
1515 <blockquote>
1516 <p>second description</p>
1517 </blockquote>
1518 </blockquote>",
1519 )
1520 .await;
1521
1522 assert_eq!(
1523 ParsedMarkdown {
1524 children: vec![block_quote(
1525 vec![
1526 ParsedMarkdownElement::Paragraph(text("some description", 0..173)),
1527 block_quote(
1528 vec![ParsedMarkdownElement::Paragraph(text(
1529 "second description",
1530 0..173
1531 ))],
1532 0..173,
1533 )
1534 ],
1535 0..173,
1536 )]
1537 },
1538 parsed
1539 );
1540 }
1541
1542 #[gpui::test]
1543 async fn test_html_table() {
1544 let parsed = parse(
1545 "<table>
1546 <thead>
1547 <tr>
1548 <th>Id</th>
1549 <th>Name</th>
1550 </tr>
1551 </thead>
1552 <tbody>
1553 <tr>
1554 <td>1</td>
1555 <td>Chris</td>
1556 </tr>
1557 <tr>
1558 <td>2</td>
1559 <td>Dennis</td>
1560 </tr>
1561 </tbody>
1562 </table>",
1563 )
1564 .await;
1565
1566 assert_eq!(
1567 ParsedMarkdown {
1568 children: vec![ParsedMarkdownElement::Table(table(
1569 0..366,
1570 row(vec![text("Id", 0..366), text("Name ", 0..366)]),
1571 vec![
1572 row(vec![text("1", 0..366), text("Chris", 0..366)]),
1573 row(vec![text("2", 0..366), text("Dennis", 0..366)]),
1574 ],
1575 ))],
1576 },
1577 parsed
1578 );
1579 }
1580
1581 #[gpui::test]
1582 async fn test_html_table_without_headings() {
1583 let parsed = parse(
1584 "<table>
1585 <tbody>
1586 <tr>
1587 <td>1</td>
1588 <td>Chris</td>
1589 </tr>
1590 <tr>
1591 <td>2</td>
1592 <td>Dennis</td>
1593 </tr>
1594 </tbody>
1595 </table>",
1596 )
1597 .await;
1598
1599 assert_eq!(
1600 ParsedMarkdown {
1601 children: vec![ParsedMarkdownElement::Table(table(
1602 0..240,
1603 row(vec![]),
1604 vec![
1605 row(vec![text("1", 0..240), text("Chris", 0..240)]),
1606 row(vec![text("2", 0..240), text("Dennis", 0..240)]),
1607 ],
1608 ))],
1609 },
1610 parsed
1611 );
1612 }
1613
1614 #[gpui::test]
1615 async fn test_html_table_without_body() {
1616 let parsed = parse(
1617 "<table>
1618 <thead>
1619 <tr>
1620 <th>Id</th>
1621 <th>Name</th>
1622 </tr>
1623 </thead>
1624 </table>",
1625 )
1626 .await;
1627
1628 assert_eq!(
1629 ParsedMarkdown {
1630 children: vec![ParsedMarkdownElement::Table(table(
1631 0..150,
1632 row(vec![text("Id", 0..150), text("Name", 0..150)]),
1633 vec![],
1634 ))],
1635 },
1636 parsed
1637 );
1638 }
1639
1640 #[gpui::test]
1641 async fn test_html_heading_tags() {
1642 let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
1643
1644 assert_eq!(
1645 ParsedMarkdown {
1646 children: vec![
1647 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1648 level: HeadingLevel::H1,
1649 source_range: 0..96,
1650 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1651 source_range: 0..96,
1652 contents: "Heading".into(),
1653 highlights: Vec::default(),
1654 region_ranges: Vec::default(),
1655 regions: Vec::default()
1656 })],
1657 }),
1658 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1659 level: HeadingLevel::H2,
1660 source_range: 0..96,
1661 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1662 source_range: 0..96,
1663 contents: "Heading".into(),
1664 highlights: Vec::default(),
1665 region_ranges: Vec::default(),
1666 regions: Vec::default()
1667 })],
1668 }),
1669 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1670 level: HeadingLevel::H3,
1671 source_range: 0..96,
1672 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1673 source_range: 0..96,
1674 contents: "Heading".into(),
1675 highlights: Vec::default(),
1676 region_ranges: Vec::default(),
1677 regions: Vec::default()
1678 })],
1679 }),
1680 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1681 level: HeadingLevel::H4,
1682 source_range: 0..96,
1683 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1684 source_range: 0..96,
1685 contents: "Heading".into(),
1686 highlights: Vec::default(),
1687 region_ranges: Vec::default(),
1688 regions: Vec::default()
1689 })],
1690 }),
1691 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1692 level: HeadingLevel::H5,
1693 source_range: 0..96,
1694 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1695 source_range: 0..96,
1696 contents: "Heading".into(),
1697 highlights: Vec::default(),
1698 region_ranges: Vec::default(),
1699 regions: Vec::default()
1700 })],
1701 }),
1702 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
1703 level: HeadingLevel::H6,
1704 source_range: 0..96,
1705 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
1706 source_range: 0..96,
1707 contents: "Heading".into(),
1708 highlights: Vec::default(),
1709 region_ranges: Vec::default(),
1710 regions: Vec::default()
1711 })],
1712 }),
1713 ],
1714 },
1715 parsed
1716 );
1717 }
1718
1719 #[gpui::test]
1720 async fn test_html_image_tag() {
1721 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
1722
1723 assert_eq!(
1724 ParsedMarkdown {
1725 children: vec![ParsedMarkdownElement::Image(Image {
1726 source_range: 0..40,
1727 link: Link::Web {
1728 url: "http://example.com/foo.png".to_string(),
1729 },
1730 alt_text: None,
1731 height: None,
1732 width: None,
1733 })]
1734 },
1735 parsed
1736 );
1737 }
1738
1739 #[gpui::test]
1740 async fn test_html_image_tag_with_alt_text() {
1741 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
1742
1743 assert_eq!(
1744 ParsedMarkdown {
1745 children: vec![ParsedMarkdownElement::Image(Image {
1746 source_range: 0..50,
1747 link: Link::Web {
1748 url: "http://example.com/foo.png".to_string(),
1749 },
1750 alt_text: Some("Foo".into()),
1751 height: None,
1752 width: None,
1753 })]
1754 },
1755 parsed
1756 );
1757 }
1758
1759 #[gpui::test]
1760 async fn test_html_image_tag_with_height_and_width() {
1761 let parsed =
1762 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
1763
1764 assert_eq!(
1765 ParsedMarkdown {
1766 children: vec![ParsedMarkdownElement::Image(Image {
1767 source_range: 0..65,
1768 link: Link::Web {
1769 url: "http://example.com/foo.png".to_string(),
1770 },
1771 alt_text: None,
1772 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1773 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1774 })]
1775 },
1776 parsed
1777 );
1778 }
1779
1780 #[gpui::test]
1781 async fn test_html_image_style_tag_with_height_and_width() {
1782 let parsed = parse(
1783 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
1784 )
1785 .await;
1786
1787 assert_eq!(
1788 ParsedMarkdown {
1789 children: vec![ParsedMarkdownElement::Image(Image {
1790 source_range: 0..75,
1791 link: Link::Web {
1792 url: "http://example.com/foo.png".to_string(),
1793 },
1794 alt_text: None,
1795 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
1796 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
1797 })]
1798 },
1799 parsed
1800 );
1801 }
1802
1803 #[gpui::test]
1804 async fn test_header_only_table() {
1805 let markdown = "\
1806| Header 1 | Header 2 |
1807|----------|----------|
1808
1809Some other content
1810";
1811
1812 let expected_table = table(
1813 0..48,
1814 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1815 vec![],
1816 );
1817
1818 assert_eq!(
1819 parse(markdown).await.children[0],
1820 ParsedMarkdownElement::Table(expected_table)
1821 );
1822 }
1823
1824 #[gpui::test]
1825 async fn test_basic_table() {
1826 let markdown = "\
1827| Header 1 | Header 2 |
1828|----------|----------|
1829| Cell 1 | Cell 2 |
1830| Cell 3 | Cell 4 |";
1831
1832 let expected_table = table(
1833 0..95,
1834 row(vec![text("Header 1", 1..11), text("Header 2", 12..22)]),
1835 vec![
1836 row(vec![text("Cell 1", 49..59), text("Cell 2", 60..70)]),
1837 row(vec![text("Cell 3", 73..83), text("Cell 4", 84..94)]),
1838 ],
1839 );
1840
1841 assert_eq!(
1842 parse(markdown).await.children[0],
1843 ParsedMarkdownElement::Table(expected_table)
1844 );
1845 }
1846
1847 #[gpui::test]
1848 async fn test_list_basic() {
1849 let parsed = parse(
1850 "\
1851* Item 1
1852* Item 2
1853* Item 3
1854",
1855 )
1856 .await;
1857
1858 assert_eq!(
1859 parsed.children,
1860 vec![
1861 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1862 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1863 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
1864 ],
1865 );
1866 }
1867
1868 #[gpui::test]
1869 async fn test_list_with_tasks() {
1870 let parsed = parse(
1871 "\
1872- [ ] TODO
1873- [x] Checked
1874",
1875 )
1876 .await;
1877
1878 assert_eq!(
1879 parsed.children,
1880 vec![
1881 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1882 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
1883 ],
1884 );
1885 }
1886
1887 #[gpui::test]
1888 async fn test_list_with_indented_task() {
1889 let parsed = parse(
1890 "\
1891- [ ] TODO
1892 - [x] Checked
1893 - Unordered
1894 1. Number 1
1895 1. Number 2
18961. Number A
1897",
1898 )
1899 .await;
1900
1901 assert_eq!(
1902 parsed.children,
1903 vec![
1904 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
1905 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
1906 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
1907 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
1908 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
1909 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
1910 ],
1911 );
1912 }
1913
1914 #[gpui::test]
1915 async fn test_list_with_linebreak_is_handled_correctly() {
1916 let parsed = parse(
1917 "\
1918- [ ] Task 1
1919
1920- [x] Task 2
1921",
1922 )
1923 .await;
1924
1925 assert_eq!(
1926 parsed.children,
1927 vec![
1928 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
1929 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
1930 ],
1931 );
1932 }
1933
1934 #[gpui::test]
1935 async fn test_list_nested() {
1936 let parsed = parse(
1937 "\
1938* Item 1
1939* Item 2
1940* Item 3
1941
19421. Hello
19431. Two
1944 1. Three
19452. Four
19463. Five
1947
1948* First
1949 1. Hello
1950 1. Goodbyte
1951 - Inner
1952 - Inner
1953 2. Goodbyte
1954 - Next item empty
1955 -
1956* Last
1957",
1958 )
1959 .await;
1960
1961 assert_eq!(
1962 parsed.children,
1963 vec![
1964 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
1965 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
1966 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
1967 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
1968 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
1969 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
1970 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
1971 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
1972 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
1973 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
1974 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
1975 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
1976 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
1977 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
1978 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
1979 list_item(186..190, 3, Unordered, vec![]),
1980 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
1981 ]
1982 );
1983 }
1984
1985 #[gpui::test]
1986 async fn test_list_with_nested_content() {
1987 let parsed = parse(
1988 "\
1989* This is a list item with two paragraphs.
1990
1991 This is the second paragraph in the list item.
1992",
1993 )
1994 .await;
1995
1996 assert_eq!(
1997 parsed.children,
1998 vec![list_item(
1999 0..96,
2000 1,
2001 Unordered,
2002 vec![
2003 p("This is a list item with two paragraphs.", 4..44),
2004 p("This is the second paragraph in the list item.", 50..97)
2005 ],
2006 ),],
2007 );
2008 }
2009
2010 #[gpui::test]
2011 async fn test_list_item_with_inline_html() {
2012 let parsed = parse(
2013 "\
2014* This is a list item with an inline HTML <sometag>tag</sometag>.
2015",
2016 )
2017 .await;
2018
2019 assert_eq!(
2020 parsed.children,
2021 vec![list_item(
2022 0..67,
2023 1,
2024 Unordered,
2025 vec![p("This is a list item with an inline HTML tag.", 4..44),],
2026 ),],
2027 );
2028 }
2029
2030 #[gpui::test]
2031 async fn test_nested_list_with_paragraph_inside() {
2032 let parsed = parse(
2033 "\
20341. a
2035 1. b
2036 1. c
2037
2038 text
2039
2040 1. d
2041",
2042 )
2043 .await;
2044
2045 assert_eq!(
2046 parsed.children,
2047 vec![
2048 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
2049 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
2050 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
2051 p("text", 32..37),
2052 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
2053 ],
2054 );
2055 }
2056
2057 #[gpui::test]
2058 async fn test_list_with_leading_text() {
2059 let parsed = parse(
2060 "\
2061* `code`
2062* **bold**
2063* [link](https://example.com)
2064",
2065 )
2066 .await;
2067
2068 assert_eq!(
2069 parsed.children,
2070 vec![
2071 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
2072 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
2073 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
2074 ],
2075 );
2076 }
2077
2078 #[gpui::test]
2079 async fn test_simple_block_quote() {
2080 let parsed = parse("> Simple block quote with **styled text**").await;
2081
2082 assert_eq!(
2083 parsed.children,
2084 vec![block_quote(
2085 vec![p("Simple block quote with styled text", 2..41)],
2086 0..41
2087 )]
2088 );
2089 }
2090
2091 #[gpui::test]
2092 async fn test_simple_block_quote_with_multiple_lines() {
2093 let parsed = parse(
2094 "\
2095> # Heading
2096> More
2097> text
2098>
2099> More text
2100",
2101 )
2102 .await;
2103
2104 assert_eq!(
2105 parsed.children,
2106 vec![block_quote(
2107 vec![
2108 h1(text("Heading", 4..11), 2..12),
2109 p("More text", 14..26),
2110 p("More text", 30..40)
2111 ],
2112 0..40
2113 )]
2114 );
2115 }
2116
2117 #[gpui::test]
2118 async fn test_nested_block_quote() {
2119 let parsed = parse(
2120 "\
2121> A
2122>
2123> > # B
2124>
2125> C
2126
2127More text
2128",
2129 )
2130 .await;
2131
2132 assert_eq!(
2133 parsed.children,
2134 vec![
2135 block_quote(
2136 vec![
2137 p("A", 2..4),
2138 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
2139 p("C", 18..20)
2140 ],
2141 0..20
2142 ),
2143 p("More text", 21..31)
2144 ]
2145 );
2146 }
2147
2148 #[gpui::test]
2149 async fn test_code_block() {
2150 let parsed = parse(
2151 "\
2152```
2153fn main() {
2154 return 0;
2155}
2156```
2157",
2158 )
2159 .await;
2160
2161 assert_eq!(
2162 parsed.children,
2163 vec![code_block(
2164 None,
2165 "fn main() {\n return 0;\n}",
2166 0..35,
2167 None
2168 )]
2169 );
2170 }
2171
2172 #[gpui::test]
2173 async fn test_code_block_with_language(executor: BackgroundExecutor) {
2174 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
2175 language_registry.add(rust_lang());
2176
2177 let parsed = parse_markdown(
2178 "\
2179```rust
2180fn main() {
2181 return 0;
2182}
2183```
2184",
2185 None,
2186 Some(language_registry),
2187 )
2188 .await;
2189
2190 assert_eq!(
2191 parsed.children,
2192 vec![code_block(
2193 Some("rust".to_string()),
2194 "fn main() {\n return 0;\n}",
2195 0..39,
2196 Some(vec![])
2197 )]
2198 );
2199 }
2200
2201 fn rust_lang() -> Arc<Language> {
2202 Arc::new(Language::new(
2203 LanguageConfig {
2204 name: "Rust".into(),
2205 matcher: LanguageMatcher {
2206 path_suffixes: vec!["rs".into()],
2207 ..Default::default()
2208 },
2209 collapsed_placeholder: " /* ... */ ".to_string(),
2210 ..Default::default()
2211 },
2212 Some(tree_sitter_rust::LANGUAGE.into()),
2213 ))
2214 }
2215
2216 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2217 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2218 source_range,
2219 level: HeadingLevel::H1,
2220 contents,
2221 })
2222 }
2223
2224 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2225 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2226 source_range,
2227 level: HeadingLevel::H2,
2228 contents,
2229 })
2230 }
2231
2232 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
2233 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2234 source_range,
2235 level: HeadingLevel::H3,
2236 contents,
2237 })
2238 }
2239
2240 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
2241 ParsedMarkdownElement::Paragraph(text(contents, source_range))
2242 }
2243
2244 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
2245 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2246 highlights: Vec::new(),
2247 region_ranges: Vec::new(),
2248 regions: Vec::new(),
2249 source_range,
2250 contents: contents.to_string().into(),
2251 })]
2252 }
2253
2254 fn block_quote(
2255 children: Vec<ParsedMarkdownElement>,
2256 source_range: Range<usize>,
2257 ) -> ParsedMarkdownElement {
2258 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
2259 source_range,
2260 children,
2261 })
2262 }
2263
2264 fn code_block(
2265 language: Option<String>,
2266 code: &str,
2267 source_range: Range<usize>,
2268 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
2269 ) -> ParsedMarkdownElement {
2270 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
2271 source_range,
2272 language,
2273 contents: code.to_string().into(),
2274 highlights,
2275 })
2276 }
2277
2278 fn list_item(
2279 source_range: Range<usize>,
2280 depth: u16,
2281 item_type: ParsedMarkdownListItemType,
2282 content: Vec<ParsedMarkdownElement>,
2283 ) -> ParsedMarkdownElement {
2284 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
2285 source_range,
2286 item_type,
2287 depth,
2288 content,
2289 })
2290 }
2291
2292 fn table(
2293 source_range: Range<usize>,
2294 header: ParsedMarkdownTableRow,
2295 body: Vec<ParsedMarkdownTableRow>,
2296 ) -> ParsedMarkdownTable {
2297 ParsedMarkdownTable {
2298 column_alignments: Vec::new(),
2299 source_range,
2300 header,
2301 body,
2302 }
2303 }
2304
2305 fn row(children: Vec<MarkdownParagraph>) -> ParsedMarkdownTableRow {
2306 ParsedMarkdownTableRow { children }
2307 }
2308
2309 impl PartialEq for ParsedMarkdownTable {
2310 fn eq(&self, other: &Self) -> bool {
2311 self.source_range == other.source_range
2312 && self.header == other.header
2313 && self.body == other.body
2314 }
2315 }
2316
2317 impl PartialEq for ParsedMarkdownText {
2318 fn eq(&self, other: &Self) -> bool {
2319 self.source_range == other.source_range && self.contents == other.contents
2320 }
2321 }
2322}