1use crate::{
2 markdown_elements::*,
3 markdown_minifier::{Minifier, MinifierOptions},
4};
5use async_recursion::async_recursion;
6use collections::FxHashMap;
7use gpui::{DefiniteLength, FontWeight, px, relative};
8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
9use language::LanguageRegistry;
10use markup5ever_rcdom::RcDom;
11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
12use std::{
13 cell::RefCell, collections::HashMap, mem, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec,
14};
15use ui::SharedString;
16
17pub async fn parse_markdown(
18 markdown_input: &str,
19 file_location_directory: Option<PathBuf>,
20 language_registry: Option<Arc<LanguageRegistry>>,
21) -> ParsedMarkdown {
22 let mut options = Options::all();
23 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
24
25 let parser = Parser::new_ext(markdown_input, options);
26 let parser = MarkdownParser::new(
27 parser.into_offset_iter().collect(),
28 file_location_directory,
29 language_registry,
30 );
31 let renderer = parser.parse_document().await;
32 ParsedMarkdown {
33 children: renderer.parsed,
34 }
35}
36
37fn cleanup_html(source: &str) -> Vec<u8> {
38 let mut writer = std::io::Cursor::new(Vec::new());
39 let mut reader = std::io::Cursor::new(source);
40 let mut minify = Minifier::new(
41 &mut writer,
42 MinifierOptions {
43 omit_doctype: true,
44 collapse_whitespace: true,
45 ..Default::default()
46 },
47 );
48 if let Ok(()) = minify.minify(&mut reader) {
49 writer.into_inner()
50 } else {
51 source.bytes().collect()
52 }
53}
54
55struct MarkdownParser<'a> {
56 tokens: Vec<(Event<'a>, Range<usize>)>,
57 /// The current index in the tokens array
58 cursor: usize,
59 /// The blocks that we have successfully parsed so far
60 parsed: Vec<ParsedMarkdownElement>,
61 file_location_directory: Option<PathBuf>,
62 language_registry: Option<Arc<LanguageRegistry>>,
63}
64
65#[derive(Debug)]
66struct ParseHtmlNodeContext {
67 list_item_depth: u16,
68}
69
70impl Default for ParseHtmlNodeContext {
71 fn default() -> Self {
72 Self { list_item_depth: 1 }
73 }
74}
75
76struct MarkdownListItem {
77 content: Vec<ParsedMarkdownElement>,
78 item_type: ParsedMarkdownListItemType,
79}
80
81impl Default for MarkdownListItem {
82 fn default() -> Self {
83 Self {
84 content: Vec::new(),
85 item_type: ParsedMarkdownListItemType::Unordered,
86 }
87 }
88}
89
90impl<'a> MarkdownParser<'a> {
91 fn new(
92 tokens: Vec<(Event<'a>, Range<usize>)>,
93 file_location_directory: Option<PathBuf>,
94 language_registry: Option<Arc<LanguageRegistry>>,
95 ) -> Self {
96 Self {
97 tokens,
98 file_location_directory,
99 language_registry,
100 cursor: 0,
101 parsed: vec![],
102 }
103 }
104
105 fn eof(&self) -> bool {
106 if self.tokens.is_empty() {
107 return true;
108 }
109 self.cursor >= self.tokens.len() - 1
110 }
111
112 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
113 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
114 return self.tokens.last();
115 }
116 self.tokens.get(self.cursor + steps)
117 }
118
119 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
120 if self.cursor == 0 || self.cursor > self.tokens.len() {
121 return None;
122 }
123 self.tokens.get(self.cursor - 1)
124 }
125
126 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
127 self.peek(0)
128 }
129
130 fn current_event(&self) -> Option<&Event<'_>> {
131 self.current().map(|(event, _)| event)
132 }
133
134 fn is_text_like(event: &Event) -> bool {
135 match event {
136 Event::Text(_)
137 // Represent an inline code block
138 | Event::Code(_)
139 | Event::Html(_)
140 | Event::InlineHtml(_)
141 | Event::FootnoteReference(_)
142 | Event::Start(Tag::Link { .. })
143 | Event::Start(Tag::Emphasis)
144 | Event::Start(Tag::Strong)
145 | Event::Start(Tag::Strikethrough)
146 | Event::Start(Tag::Image { .. }) => {
147 true
148 }
149 _ => false,
150 }
151 }
152
153 async fn parse_document(mut self) -> Self {
154 while !self.eof() {
155 if let Some(block) = self.parse_block().await {
156 self.parsed.extend(block);
157 } else {
158 self.cursor += 1;
159 }
160 }
161 self
162 }
163
164 #[async_recursion]
165 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
166 let (current, source_range) = self.current().unwrap();
167 let source_range = source_range.clone();
168 match current {
169 Event::Start(tag) => match tag {
170 Tag::Paragraph => {
171 self.cursor += 1;
172 let text = self.parse_text(false, Some(source_range));
173 Some(vec![ParsedMarkdownElement::Paragraph(text)])
174 }
175 Tag::Heading { level, .. } => {
176 let level = *level;
177 self.cursor += 1;
178 let heading = self.parse_heading(level);
179 Some(vec![ParsedMarkdownElement::Heading(heading)])
180 }
181 Tag::Table(alignment) => {
182 let alignment = alignment.clone();
183 self.cursor += 1;
184 let table = self.parse_table(alignment);
185 Some(vec![ParsedMarkdownElement::Table(table)])
186 }
187 Tag::List(order) => {
188 let order = *order;
189 self.cursor += 1;
190 let list = self.parse_list(order).await;
191 Some(list)
192 }
193 Tag::BlockQuote(_kind) => {
194 self.cursor += 1;
195 let block_quote = self.parse_block_quote().await;
196 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
197 }
198 Tag::CodeBlock(kind) => {
199 let language = match kind {
200 pulldown_cmark::CodeBlockKind::Indented => None,
201 pulldown_cmark::CodeBlockKind::Fenced(language) => {
202 if language.is_empty() {
203 None
204 } else {
205 Some(language.to_string())
206 }
207 }
208 };
209
210 self.cursor += 1;
211
212 let code_block = self.parse_code_block(language).await?;
213 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
214 }
215 Tag::HtmlBlock => {
216 self.cursor += 1;
217
218 Some(self.parse_html_block().await)
219 }
220 _ => None,
221 },
222 Event::Rule => {
223 self.cursor += 1;
224 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
225 }
226 _ => None,
227 }
228 }
229
230 fn parse_text(
231 &mut self,
232 should_complete_on_soft_break: bool,
233 source_range: Option<Range<usize>>,
234 ) -> MarkdownParagraph {
235 let source_range = source_range.unwrap_or_else(|| {
236 self.current()
237 .map(|(_, range)| range.clone())
238 .unwrap_or_default()
239 });
240
241 let mut markdown_text_like = Vec::new();
242 let mut text = String::new();
243 let mut bold_depth = 0;
244 let mut italic_depth = 0;
245 let mut strikethrough_depth = 0;
246 let mut link: Option<Link> = None;
247 let mut image: Option<Image> = None;
248 let mut regions: Vec<(Range<usize>, ParsedRegion)> = vec![];
249 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
250 let mut link_urls: Vec<String> = vec![];
251 let mut link_ranges: Vec<Range<usize>> = vec![];
252
253 loop {
254 if self.eof() {
255 break;
256 }
257
258 let (current, _) = self.current().unwrap();
259 let prev_len = text.len();
260 match current {
261 Event::SoftBreak => {
262 if should_complete_on_soft_break {
263 break;
264 }
265 text.push(' ');
266 }
267
268 Event::HardBreak => {
269 text.push('\n');
270 }
271
272 // We want to ignore any inline HTML tags in the text but keep
273 // the text between them
274 Event::InlineHtml(_) => {}
275
276 Event::Text(t) => {
277 text.push_str(t.as_ref());
278 let mut style = MarkdownHighlightStyle::default();
279
280 if bold_depth > 0 {
281 style.weight = FontWeight::BOLD;
282 }
283
284 if italic_depth > 0 {
285 style.italic = true;
286 }
287
288 if strikethrough_depth > 0 {
289 style.strikethrough = true;
290 }
291
292 let last_run_len = if let Some(link) = link.clone() {
293 regions.push((
294 prev_len..text.len(),
295 ParsedRegion {
296 code: false,
297 link: Some(link),
298 },
299 ));
300 style.link = true;
301 prev_len
302 } else {
303 // Manually scan for links
304 let mut finder = linkify::LinkFinder::new();
305 finder.kinds(&[linkify::LinkKind::Url]);
306 let mut last_link_len = prev_len;
307 for link in finder.links(t) {
308 let start = prev_len + link.start();
309 let end = prev_len + link.end();
310 let range = start..end;
311 link_ranges.push(range.clone());
312 link_urls.push(link.as_str().to_string());
313
314 // If there is a style before we match a link, we have to add this to the highlighted ranges
315 if style != MarkdownHighlightStyle::default() && last_link_len < start {
316 highlights.push((
317 last_link_len..start,
318 MarkdownHighlight::Style(style.clone()),
319 ));
320 }
321
322 highlights.push((
323 range.clone(),
324 MarkdownHighlight::Style(MarkdownHighlightStyle {
325 underline: true,
326 ..style
327 }),
328 ));
329
330 regions.push((
331 range.clone(),
332 ParsedRegion {
333 code: false,
334 link: Some(Link::Web {
335 url: link.as_str().to_string(),
336 }),
337 },
338 ));
339 last_link_len = end;
340 }
341 last_link_len
342 };
343
344 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
345 let mut new_highlight = true;
346 if let Some((last_range, last_style)) = highlights.last_mut()
347 && last_range.end == last_run_len
348 && last_style == &MarkdownHighlight::Style(style.clone())
349 {
350 last_range.end = text.len();
351 new_highlight = false;
352 }
353 if new_highlight {
354 highlights.push((
355 last_run_len..text.len(),
356 MarkdownHighlight::Style(style.clone()),
357 ));
358 }
359 }
360 }
361 Event::Code(t) => {
362 text.push_str(t.as_ref());
363 let range = prev_len..text.len();
364
365 if link.is_some() {
366 highlights.push((
367 range.clone(),
368 MarkdownHighlight::Style(MarkdownHighlightStyle {
369 link: true,
370 ..Default::default()
371 }),
372 ));
373 }
374 regions.push((
375 range,
376 ParsedRegion {
377 code: true,
378 link: link.clone(),
379 },
380 ));
381 }
382 Event::Start(tag) => match tag {
383 Tag::Emphasis => italic_depth += 1,
384 Tag::Strong => bold_depth += 1,
385 Tag::Strikethrough => strikethrough_depth += 1,
386 Tag::Link { dest_url, .. } => {
387 link = Link::identify(
388 self.file_location_directory.clone(),
389 dest_url.to_string(),
390 );
391 }
392 Tag::Image { dest_url, .. } => {
393 if !text.is_empty() {
394 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
395 source_range: source_range.clone(),
396 contents: mem::take(&mut text).into(),
397 highlights: mem::take(&mut highlights),
398 regions: mem::take(&mut regions),
399 });
400 markdown_text_like.push(parsed_regions);
401 }
402 image = Image::identify(
403 dest_url.to_string(),
404 source_range.clone(),
405 self.file_location_directory.clone(),
406 );
407 }
408 _ => {
409 break;
410 }
411 },
412
413 Event::End(tag) => match tag {
414 TagEnd::Emphasis => italic_depth -= 1,
415 TagEnd::Strong => bold_depth -= 1,
416 TagEnd::Strikethrough => strikethrough_depth -= 1,
417 TagEnd::Link => {
418 link = None;
419 }
420 TagEnd::Image => {
421 if let Some(mut image) = image.take() {
422 if !text.is_empty() {
423 image.set_alt_text(std::mem::take(&mut text).into());
424 mem::take(&mut highlights);
425 mem::take(&mut regions);
426 }
427 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
428 }
429 }
430 TagEnd::Paragraph => {
431 self.cursor += 1;
432 break;
433 }
434 _ => {
435 break;
436 }
437 },
438 _ => {
439 break;
440 }
441 }
442
443 self.cursor += 1;
444 }
445 if !text.is_empty() {
446 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
447 source_range,
448 contents: text.into(),
449 highlights,
450 regions,
451 }));
452 }
453 markdown_text_like
454 }
455
456 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
457 let (_event, source_range) = self.previous().unwrap();
458 let source_range = source_range.clone();
459 let text = self.parse_text(true, None);
460
461 // Advance past the heading end tag
462 self.cursor += 1;
463
464 ParsedMarkdownHeading {
465 source_range,
466 level: match level {
467 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
468 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
469 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
470 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
471 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
472 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
473 },
474 contents: text,
475 }
476 }
477
478 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
479 let (_event, source_range) = self.previous().unwrap();
480 let source_range = source_range.clone();
481 let mut header = vec![];
482 let mut body = vec![];
483 let mut row_columns = vec![];
484 let mut in_header = true;
485 let column_alignments = alignment
486 .iter()
487 .map(Self::convert_alignment)
488 .collect::<Vec<_>>();
489
490 loop {
491 if self.eof() {
492 break;
493 }
494
495 let (current, source_range) = self.current().unwrap();
496 let source_range = source_range.clone();
497 match current {
498 Event::Start(Tag::TableHead)
499 | Event::Start(Tag::TableRow)
500 | Event::End(TagEnd::TableCell) => {
501 self.cursor += 1;
502 }
503 Event::Start(Tag::TableCell) => {
504 self.cursor += 1;
505 let cell_contents = self.parse_text(false, Some(source_range));
506 row_columns.push(ParsedMarkdownTableColumn {
507 col_span: 1,
508 row_span: 1,
509 is_header: in_header,
510 children: cell_contents,
511 alignment: column_alignments
512 .get(row_columns.len())
513 .copied()
514 .unwrap_or_default(),
515 });
516 }
517 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
518 self.cursor += 1;
519 let columns = std::mem::take(&mut row_columns);
520 if in_header {
521 header.push(ParsedMarkdownTableRow { columns: columns });
522 in_header = false;
523 } else {
524 body.push(ParsedMarkdownTableRow::with_columns(columns));
525 }
526 }
527 Event::End(TagEnd::Table) => {
528 self.cursor += 1;
529 break;
530 }
531 _ => {
532 break;
533 }
534 }
535 }
536
537 ParsedMarkdownTable {
538 source_range,
539 header,
540 body,
541 caption: None,
542 }
543 }
544
545 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
546 match alignment {
547 Alignment::None => ParsedMarkdownTableAlignment::None,
548 Alignment::Left => ParsedMarkdownTableAlignment::Left,
549 Alignment::Center => ParsedMarkdownTableAlignment::Center,
550 Alignment::Right => ParsedMarkdownTableAlignment::Right,
551 }
552 }
553
554 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
555 let (_, list_source_range) = self.previous().unwrap();
556
557 let mut items = Vec::new();
558 let mut items_stack = vec![MarkdownListItem::default()];
559 let mut depth = 1;
560 let mut order = order;
561 let mut order_stack = Vec::new();
562
563 let mut insertion_indices = FxHashMap::default();
564 let mut source_ranges = FxHashMap::default();
565 let mut start_item_range = list_source_range.clone();
566
567 while !self.eof() {
568 let (current, source_range) = self.current().unwrap();
569 match current {
570 Event::Start(Tag::List(new_order)) => {
571 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
572 insertion_indices.insert(depth, items.len());
573 }
574
575 // We will use the start of the nested list as the end for the current item's range,
576 // because we don't care about the hierarchy of list items
577 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
578 e.insert(start_item_range.start..source_range.start);
579 }
580
581 order_stack.push(order);
582 order = *new_order;
583 self.cursor += 1;
584 depth += 1;
585 }
586 Event::End(TagEnd::List(_)) => {
587 order = order_stack.pop().flatten();
588 self.cursor += 1;
589 depth -= 1;
590
591 if depth == 0 {
592 break;
593 }
594 }
595 Event::Start(Tag::Item) => {
596 start_item_range = source_range.clone();
597
598 self.cursor += 1;
599 items_stack.push(MarkdownListItem::default());
600
601 let mut task_list = None;
602 // Check for task list marker (`- [ ]` or `- [x]`)
603 if let Some(event) = self.current_event() {
604 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
605 if event == &Event::Start(Tag::Paragraph) {
606 self.cursor += 1;
607 }
608
609 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
610 task_list = Some((*checked, range.clone()));
611 self.cursor += 1;
612 }
613 }
614
615 if let Some((event, range)) = self.current() {
616 // This is a plain list item.
617 // For example `- some text` or `1. [Docs](./docs.md)`
618 if MarkdownParser::is_text_like(event) {
619 let text = self.parse_text(false, Some(range.clone()));
620 let block = ParsedMarkdownElement::Paragraph(text);
621 if let Some(content) = items_stack.last_mut() {
622 let item_type = if let Some((checked, range)) = task_list {
623 ParsedMarkdownListItemType::Task(checked, range)
624 } else if let Some(order) = order {
625 ParsedMarkdownListItemType::Ordered(order)
626 } else {
627 ParsedMarkdownListItemType::Unordered
628 };
629 content.item_type = item_type;
630 content.content.push(block);
631 }
632 } else {
633 let block = self.parse_block().await;
634 if let Some(block) = block
635 && let Some(list_item) = items_stack.last_mut()
636 {
637 list_item.content.extend(block);
638 }
639 }
640 }
641
642 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
643 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
644 self.cursor += 1;
645 }
646 }
647 Event::End(TagEnd::Item) => {
648 self.cursor += 1;
649
650 if let Some(current) = order {
651 order = Some(current + 1);
652 }
653
654 if let Some(list_item) = items_stack.pop() {
655 let source_range = source_ranges
656 .remove(&depth)
657 .unwrap_or(start_item_range.clone());
658
659 // We need to remove the last character of the source range, because it includes the newline character
660 let source_range = source_range.start..source_range.end - 1;
661 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
662 source_range,
663 content: list_item.content,
664 depth,
665 item_type: list_item.item_type,
666 nested: false,
667 });
668
669 if let Some(index) = insertion_indices.get(&depth) {
670 items.insert(*index, item);
671 insertion_indices.remove(&depth);
672 } else {
673 items.push(item);
674 }
675 }
676 }
677 _ => {
678 if depth == 0 {
679 break;
680 }
681 // This can only happen if a list item starts with more then one paragraph,
682 // or the list item contains blocks that should be rendered after the nested list items
683 let block = self.parse_block().await;
684 if let Some(block) = block {
685 if let Some(list_item) = items_stack.last_mut() {
686 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
687 if !insertion_indices.contains_key(&depth) {
688 list_item.content.extend(block);
689 continue;
690 }
691 }
692
693 // Otherwise we need to insert the block after all the nested items
694 // that have been parsed so far
695 items.extend(block);
696 } else {
697 self.cursor += 1;
698 }
699 }
700 }
701 }
702
703 items
704 }
705
706 #[async_recursion]
707 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
708 let (_event, source_range) = self.previous().unwrap();
709 let source_range = source_range.clone();
710 let mut nested_depth = 1;
711
712 let mut children: Vec<ParsedMarkdownElement> = vec![];
713
714 while !self.eof() {
715 let block = self.parse_block().await;
716
717 if let Some(block) = block {
718 children.extend(block);
719 } else {
720 break;
721 }
722
723 if self.eof() {
724 break;
725 }
726
727 let (current, _source_range) = self.current().unwrap();
728 match current {
729 // This is a nested block quote.
730 // Record that we're in a nested block quote and continue parsing.
731 // We don't need to advance the cursor since the next
732 // call to `parse_block` will handle it.
733 Event::Start(Tag::BlockQuote(_kind)) => {
734 nested_depth += 1;
735 }
736 Event::End(TagEnd::BlockQuote(_kind)) => {
737 nested_depth -= 1;
738 if nested_depth == 0 {
739 self.cursor += 1;
740 break;
741 }
742 }
743 _ => {}
744 };
745 }
746
747 ParsedMarkdownBlockQuote {
748 source_range,
749 children,
750 }
751 }
752
753 async fn parse_code_block(
754 &mut self,
755 language: Option<String>,
756 ) -> Option<ParsedMarkdownCodeBlock> {
757 let Some((_event, source_range)) = self.previous() else {
758 return None;
759 };
760
761 let source_range = source_range.clone();
762 let mut code = String::new();
763
764 while !self.eof() {
765 let Some((current, _source_range)) = self.current() else {
766 break;
767 };
768
769 match current {
770 Event::Text(text) => {
771 code.push_str(text);
772 self.cursor += 1;
773 }
774 Event::End(TagEnd::CodeBlock) => {
775 self.cursor += 1;
776 break;
777 }
778 _ => {
779 break;
780 }
781 }
782 }
783
784 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
785
786 let highlights = if let Some(language) = &language {
787 if let Some(registry) = &self.language_registry {
788 let rope: language::Rope = code.as_str().into();
789 registry
790 .language_for_name_or_extension(language)
791 .await
792 .map(|l| l.highlight_text(&rope, 0..code.len()))
793 .ok()
794 } else {
795 None
796 }
797 } else {
798 None
799 };
800
801 Some(ParsedMarkdownCodeBlock {
802 source_range,
803 contents: code.into(),
804 language,
805 highlights,
806 })
807 }
808
809 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
810 let mut elements = Vec::new();
811 let Some((_event, _source_range)) = self.previous() else {
812 return elements;
813 };
814
815 let mut html_source_range_start = None;
816 let mut html_source_range_end = None;
817 let mut html_buffer = String::new();
818
819 while !self.eof() {
820 let Some((current, source_range)) = self.current() else {
821 break;
822 };
823 let source_range = source_range.clone();
824 match current {
825 Event::Html(html) => {
826 html_source_range_start.get_or_insert(source_range.start);
827 html_source_range_end = Some(source_range.end);
828 html_buffer.push_str(html);
829 self.cursor += 1;
830 }
831 Event::End(TagEnd::CodeBlock) => {
832 self.cursor += 1;
833 break;
834 }
835 _ => {
836 break;
837 }
838 }
839 }
840
841 let bytes = cleanup_html(&html_buffer);
842
843 let mut cursor = std::io::Cursor::new(bytes);
844 if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
845 .from_utf8()
846 .read_from(&mut cursor)
847 && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
848 {
849 self.parse_html_node(
850 start..end,
851 &dom.document,
852 &mut elements,
853 &ParseHtmlNodeContext::default(),
854 );
855 }
856
857 elements
858 }
859
860 fn parse_html_node(
861 &self,
862 source_range: Range<usize>,
863 node: &Rc<markup5ever_rcdom::Node>,
864 elements: &mut Vec<ParsedMarkdownElement>,
865 context: &ParseHtmlNodeContext,
866 ) {
867 match &node.data {
868 markup5ever_rcdom::NodeData::Document => {
869 self.consume_children(source_range, node, elements, context);
870 }
871 markup5ever_rcdom::NodeData::Text { contents } => {
872 elements.push(ParsedMarkdownElement::Paragraph(vec![
873 MarkdownParagraphChunk::Text(ParsedMarkdownText {
874 source_range,
875 regions: Vec::default(),
876 highlights: Vec::default(),
877 contents: contents.borrow().to_string().into(),
878 }),
879 ]));
880 }
881 markup5ever_rcdom::NodeData::Comment { .. } => {}
882 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
883 let mut styles = if let Some(styles) = Self::markdown_style_from_html_styles(
884 Self::extract_styles_from_attributes(attrs),
885 ) {
886 vec![MarkdownHighlight::Style(styles)]
887 } else {
888 Vec::default()
889 };
890
891 if local_name!("img") == name.local {
892 if let Some(image) = self.extract_image(source_range, attrs) {
893 elements.push(ParsedMarkdownElement::Image(image));
894 }
895 } else if local_name!("p") == name.local {
896 let mut paragraph = MarkdownParagraph::new();
897 self.parse_paragraph(
898 source_range,
899 node,
900 &mut paragraph,
901 &mut styles,
902 &mut Vec::new(),
903 );
904
905 if !paragraph.is_empty() {
906 elements.push(ParsedMarkdownElement::Paragraph(paragraph));
907 }
908 } else if matches!(
909 name.local,
910 local_name!("h1")
911 | local_name!("h2")
912 | local_name!("h3")
913 | local_name!("h4")
914 | local_name!("h5")
915 | local_name!("h6")
916 ) {
917 let mut paragraph = MarkdownParagraph::new();
918 self.consume_paragraph(
919 source_range.clone(),
920 node,
921 &mut paragraph,
922 &mut styles,
923 &mut Vec::new(),
924 );
925
926 if !paragraph.is_empty() {
927 elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
928 source_range,
929 level: match name.local {
930 local_name!("h1") => HeadingLevel::H1,
931 local_name!("h2") => HeadingLevel::H2,
932 local_name!("h3") => HeadingLevel::H3,
933 local_name!("h4") => HeadingLevel::H4,
934 local_name!("h5") => HeadingLevel::H5,
935 local_name!("h6") => HeadingLevel::H6,
936 _ => unreachable!(),
937 },
938 contents: paragraph,
939 }));
940 }
941 } else if local_name!("ul") == name.local || local_name!("ol") == name.local {
942 if let Some(list_items) = self.extract_html_list(
943 node,
944 local_name!("ol") == name.local,
945 context.list_item_depth,
946 source_range,
947 ) {
948 elements.extend(list_items);
949 }
950 } else if local_name!("blockquote") == name.local {
951 if let Some(blockquote) = self.extract_html_blockquote(node, source_range) {
952 elements.push(ParsedMarkdownElement::BlockQuote(blockquote));
953 }
954 } else if local_name!("table") == name.local {
955 if let Some(table) = self.extract_html_table(node, source_range) {
956 elements.push(ParsedMarkdownElement::Table(table));
957 }
958 } else {
959 self.consume_children(source_range, node, elements, context);
960 }
961 }
962 _ => {}
963 }
964 }
965
966 fn parse_paragraph(
967 &self,
968 source_range: Range<usize>,
969 node: &Rc<markup5ever_rcdom::Node>,
970 paragraph: &mut MarkdownParagraph,
971 highlights: &mut Vec<MarkdownHighlight>,
972 regions: &mut Vec<(Range<usize>, ParsedRegion)>,
973 ) {
974 fn items_with_range<T>(
975 range: Range<usize>,
976 items: impl IntoIterator<Item = T>,
977 ) -> Vec<(Range<usize>, T)> {
978 items
979 .into_iter()
980 .map(|item| (range.clone(), item))
981 .collect()
982 }
983
984 match &node.data {
985 markup5ever_rcdom::NodeData::Text { contents } => {
986 // append the text to the last chunk, so we can have a hacky version
987 // of inline text with highlighting
988 if let Some(text) = paragraph.iter_mut().last().and_then(|p| match p {
989 MarkdownParagraphChunk::Text(text) => Some(text),
990 _ => None,
991 }) {
992 let mut new_text = text.contents.to_string();
993 new_text.push_str(&contents.borrow());
994
995 text.highlights.extend(items_with_range(
996 text.contents.len()..new_text.len(),
997 std::mem::take(highlights),
998 ));
999 text.regions.extend(items_with_range(
1000 text.contents.len()..new_text.len(),
1001 std::mem::take(regions)
1002 .into_iter()
1003 .map(|(_, region)| region),
1004 ));
1005 text.contents = SharedString::from(new_text);
1006 } else {
1007 let contents = contents.borrow().to_string();
1008 paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
1009 source_range,
1010 highlights: items_with_range(0..contents.len(), std::mem::take(highlights)),
1011 regions: items_with_range(
1012 0..contents.len(),
1013 std::mem::take(regions)
1014 .into_iter()
1015 .map(|(_, region)| region),
1016 ),
1017 contents: contents.into(),
1018 }));
1019 }
1020 }
1021 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
1022 if local_name!("img") == name.local {
1023 if let Some(image) = self.extract_image(source_range, attrs) {
1024 paragraph.push(MarkdownParagraphChunk::Image(image));
1025 }
1026 } else if local_name!("b") == name.local || local_name!("strong") == name.local {
1027 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1028 weight: FontWeight::BOLD,
1029 ..Default::default()
1030 }));
1031
1032 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1033 } else if local_name!("i") == name.local {
1034 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1035 italic: true,
1036 ..Default::default()
1037 }));
1038
1039 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1040 } else if local_name!("em") == name.local {
1041 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1042 oblique: true,
1043 ..Default::default()
1044 }));
1045
1046 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1047 } else if local_name!("del") == name.local {
1048 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1049 strikethrough: true,
1050 ..Default::default()
1051 }));
1052
1053 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1054 } else if local_name!("ins") == name.local {
1055 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1056 underline: true,
1057 ..Default::default()
1058 }));
1059
1060 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1061 } else if local_name!("a") == name.local {
1062 if let Some(url) = Self::attr_value(attrs, local_name!("href"))
1063 && let Some(link) =
1064 Link::identify(self.file_location_directory.clone(), url)
1065 {
1066 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1067 link: true,
1068 ..Default::default()
1069 }));
1070
1071 regions.push((
1072 source_range.clone(),
1073 ParsedRegion {
1074 code: false,
1075 link: Some(link),
1076 },
1077 ));
1078 }
1079
1080 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1081 } else {
1082 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1083 }
1084 }
1085 _ => {}
1086 }
1087 }
1088
1089 fn consume_paragraph(
1090 &self,
1091 source_range: Range<usize>,
1092 node: &Rc<markup5ever_rcdom::Node>,
1093 paragraph: &mut MarkdownParagraph,
1094 highlights: &mut Vec<MarkdownHighlight>,
1095 regions: &mut Vec<(Range<usize>, ParsedRegion)>,
1096 ) {
1097 for node in node.children.borrow().iter() {
1098 self.parse_paragraph(source_range.clone(), node, paragraph, highlights, regions);
1099 }
1100 }
1101
1102 fn parse_table_row(
1103 &self,
1104 source_range: Range<usize>,
1105 node: &Rc<markup5ever_rcdom::Node>,
1106 ) -> Option<ParsedMarkdownTableRow> {
1107 let mut columns = Vec::new();
1108
1109 match &node.data {
1110 markup5ever_rcdom::NodeData::Element { name, .. } => {
1111 if local_name!("tr") != name.local {
1112 return None;
1113 }
1114
1115 for node in node.children.borrow().iter() {
1116 if let Some(column) = self.parse_table_column(source_range.clone(), node) {
1117 columns.push(column);
1118 }
1119 }
1120 }
1121 _ => {}
1122 }
1123
1124 if columns.is_empty() {
1125 None
1126 } else {
1127 Some(ParsedMarkdownTableRow { columns })
1128 }
1129 }
1130
1131 fn parse_table_column(
1132 &self,
1133 source_range: Range<usize>,
1134 node: &Rc<markup5ever_rcdom::Node>,
1135 ) -> Option<ParsedMarkdownTableColumn> {
1136 match &node.data {
1137 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
1138 if !matches!(name.local, local_name!("th") | local_name!("td")) {
1139 return None;
1140 }
1141
1142 let mut children = MarkdownParagraph::new();
1143 self.consume_paragraph(
1144 source_range,
1145 node,
1146 &mut children,
1147 &mut Vec::new(),
1148 &mut Vec::new(),
1149 );
1150
1151 let is_header = matches!(name.local, local_name!("th"));
1152
1153 Some(ParsedMarkdownTableColumn {
1154 col_span: std::cmp::max(
1155 Self::attr_value(attrs, local_name!("colspan"))
1156 .and_then(|span| span.parse().ok())
1157 .unwrap_or(1),
1158 1,
1159 ),
1160 row_span: std::cmp::max(
1161 Self::attr_value(attrs, local_name!("rowspan"))
1162 .and_then(|span| span.parse().ok())
1163 .unwrap_or(1),
1164 1,
1165 ),
1166 is_header,
1167 children,
1168 alignment: Self::attr_value(attrs, local_name!("align"))
1169 .and_then(|align| match align.as_str() {
1170 "left" => Some(ParsedMarkdownTableAlignment::Left),
1171 "center" => Some(ParsedMarkdownTableAlignment::Center),
1172 "right" => Some(ParsedMarkdownTableAlignment::Right),
1173 _ => None,
1174 })
1175 .unwrap_or_else(|| {
1176 if is_header {
1177 ParsedMarkdownTableAlignment::Center
1178 } else {
1179 ParsedMarkdownTableAlignment::default()
1180 }
1181 }),
1182 })
1183 }
1184 _ => None,
1185 }
1186 }
1187
1188 fn consume_children(
1189 &self,
1190 source_range: Range<usize>,
1191 node: &Rc<markup5ever_rcdom::Node>,
1192 elements: &mut Vec<ParsedMarkdownElement>,
1193 context: &ParseHtmlNodeContext,
1194 ) {
1195 for node in node.children.borrow().iter() {
1196 self.parse_html_node(source_range.clone(), node, elements, context);
1197 }
1198 }
1199
1200 fn attr_value(
1201 attrs: &RefCell<Vec<html5ever::Attribute>>,
1202 name: html5ever::LocalName,
1203 ) -> Option<String> {
1204 attrs.borrow().iter().find_map(|attr| {
1205 if attr.name.local == name {
1206 Some(attr.value.to_string())
1207 } else {
1208 None
1209 }
1210 })
1211 }
1212
1213 fn markdown_style_from_html_styles(
1214 styles: HashMap<String, String>,
1215 ) -> Option<MarkdownHighlightStyle> {
1216 let mut markdown_style = MarkdownHighlightStyle::default();
1217
1218 if let Some(text_decoration) = styles.get("text-decoration") {
1219 match text_decoration.to_lowercase().as_str() {
1220 "underline" => {
1221 markdown_style.underline = true;
1222 }
1223 "line-through" => {
1224 markdown_style.strikethrough = true;
1225 }
1226 _ => {}
1227 }
1228 }
1229
1230 if let Some(font_style) = styles.get("font-style") {
1231 match font_style.to_lowercase().as_str() {
1232 "italic" => {
1233 markdown_style.italic = true;
1234 }
1235 "oblique" => {
1236 markdown_style.oblique = true;
1237 }
1238 _ => {}
1239 }
1240 }
1241
1242 if let Some(font_weight) = styles.get("font-weight") {
1243 match font_weight.to_lowercase().as_str() {
1244 "bold" => {
1245 markdown_style.weight = FontWeight::BOLD;
1246 }
1247 "lighter" => {
1248 markdown_style.weight = FontWeight::THIN;
1249 }
1250 _ => {
1251 if let Some(weight) = font_weight.parse::<f32>().ok() {
1252 markdown_style.weight = FontWeight(weight);
1253 }
1254 }
1255 }
1256 }
1257
1258 if markdown_style != MarkdownHighlightStyle::default() {
1259 Some(markdown_style)
1260 } else {
1261 None
1262 }
1263 }
1264
1265 fn extract_styles_from_attributes(
1266 attrs: &RefCell<Vec<html5ever::Attribute>>,
1267 ) -> HashMap<String, String> {
1268 let mut styles = HashMap::new();
1269
1270 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
1271 for decl in style.split(';') {
1272 let mut parts = decl.splitn(2, ':');
1273 if let Some((key, value)) = parts.next().zip(parts.next()) {
1274 styles.insert(
1275 key.trim().to_lowercase().to_string(),
1276 value.trim().to_string(),
1277 );
1278 }
1279 }
1280 }
1281
1282 styles
1283 }
1284
1285 fn extract_image(
1286 &self,
1287 source_range: Range<usize>,
1288 attrs: &RefCell<Vec<html5ever::Attribute>>,
1289 ) -> Option<Image> {
1290 let src = Self::attr_value(attrs, local_name!("src"))?;
1291
1292 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
1293
1294 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
1295 image.set_alt_text(alt.into());
1296 }
1297
1298 let styles = Self::extract_styles_from_attributes(attrs);
1299
1300 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
1301 .or_else(|| styles.get("width").cloned())
1302 .and_then(|width| Self::parse_html_element_dimension(&width))
1303 {
1304 image.set_width(width);
1305 }
1306
1307 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
1308 .or_else(|| styles.get("height").cloned())
1309 .and_then(|height| Self::parse_html_element_dimension(&height))
1310 {
1311 image.set_height(height);
1312 }
1313
1314 Some(image)
1315 }
1316
1317 fn extract_html_list(
1318 &self,
1319 node: &Rc<markup5ever_rcdom::Node>,
1320 ordered: bool,
1321 depth: u16,
1322 source_range: Range<usize>,
1323 ) -> Option<Vec<ParsedMarkdownElement>> {
1324 let mut list_items = Vec::with_capacity(node.children.borrow().len());
1325
1326 for (index, node) in node.children.borrow().iter().enumerate() {
1327 match &node.data {
1328 markup5ever_rcdom::NodeData::Element { name, .. } => {
1329 if local_name!("li") != name.local {
1330 continue;
1331 }
1332
1333 let mut content = Vec::new();
1334 self.consume_children(
1335 source_range.clone(),
1336 node,
1337 &mut content,
1338 &ParseHtmlNodeContext {
1339 list_item_depth: depth + 1,
1340 },
1341 );
1342
1343 if !content.is_empty() {
1344 list_items.push(ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1345 depth,
1346 source_range: source_range.clone(),
1347 item_type: if ordered {
1348 ParsedMarkdownListItemType::Ordered(index as u64 + 1)
1349 } else {
1350 ParsedMarkdownListItemType::Unordered
1351 },
1352 content,
1353 nested: true,
1354 }));
1355 }
1356 }
1357 _ => {}
1358 }
1359 }
1360
1361 if list_items.is_empty() {
1362 None
1363 } else {
1364 Some(list_items)
1365 }
1366 }
1367
1368 fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
1369 if value.ends_with("%") {
1370 value
1371 .trim_end_matches("%")
1372 .parse::<f32>()
1373 .ok()
1374 .map(|value| relative(value / 100.))
1375 } else {
1376 value
1377 .trim_end_matches("px")
1378 .parse()
1379 .ok()
1380 .map(|value| px(value).into())
1381 }
1382 }
1383
1384 fn extract_html_blockquote(
1385 &self,
1386 node: &Rc<markup5ever_rcdom::Node>,
1387 source_range: Range<usize>,
1388 ) -> Option<ParsedMarkdownBlockQuote> {
1389 let mut children = Vec::new();
1390 self.consume_children(
1391 source_range.clone(),
1392 node,
1393 &mut children,
1394 &ParseHtmlNodeContext::default(),
1395 );
1396
1397 if children.is_empty() {
1398 None
1399 } else {
1400 Some(ParsedMarkdownBlockQuote {
1401 children,
1402 source_range,
1403 })
1404 }
1405 }
1406
1407 fn extract_html_table(
1408 &self,
1409 node: &Rc<markup5ever_rcdom::Node>,
1410 source_range: Range<usize>,
1411 ) -> Option<ParsedMarkdownTable> {
1412 let mut header_rows = Vec::new();
1413 let mut body_rows = Vec::new();
1414 let mut caption = None;
1415
1416 // node should be a thead, tbody or caption element
1417 for node in node.children.borrow().iter() {
1418 match &node.data {
1419 markup5ever_rcdom::NodeData::Element { name, .. } => {
1420 if local_name!("caption") == name.local {
1421 let mut paragraph = MarkdownParagraph::new();
1422 self.parse_paragraph(
1423 source_range.clone(),
1424 node,
1425 &mut paragraph,
1426 &mut Vec::new(),
1427 &mut Vec::new(),
1428 );
1429 caption = Some(paragraph);
1430 }
1431 if local_name!("thead") == name.local {
1432 // node should be a tr element
1433 for node in node.children.borrow().iter() {
1434 if let Some(row) = self.parse_table_row(source_range.clone(), node) {
1435 header_rows.push(row);
1436 }
1437 }
1438 } else if local_name!("tbody") == name.local {
1439 // node should be a tr element
1440 for node in node.children.borrow().iter() {
1441 if let Some(row) = self.parse_table_row(source_range.clone(), node) {
1442 body_rows.push(row);
1443 }
1444 }
1445 }
1446 }
1447 _ => {}
1448 }
1449 }
1450
1451 if !header_rows.is_empty() || !body_rows.is_empty() {
1452 Some(ParsedMarkdownTable {
1453 source_range,
1454 body: body_rows,
1455 header: header_rows,
1456 caption,
1457 })
1458 } else {
1459 None
1460 }
1461 }
1462}
1463
1464#[cfg(test)]
1465mod tests {
1466 use super::*;
1467 use ParsedMarkdownListItemType::*;
1468 use core::panic;
1469 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1470 use language::{HighlightId, LanguageRegistry};
1471 use pretty_assertions::assert_eq;
1472
1473 async fn parse(input: &str) -> ParsedMarkdown {
1474 parse_markdown(input, None, None).await
1475 }
1476
1477 #[gpui::test]
1478 async fn test_headings() {
1479 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1480
1481 assert_eq!(
1482 parsed.children,
1483 vec![
1484 h1(text("Heading one", 2..13), 0..14),
1485 h2(text("Heading two", 17..28), 14..29),
1486 h3(text("Heading three", 33..46), 29..46),
1487 ]
1488 );
1489 }
1490
1491 #[gpui::test]
1492 async fn test_newlines_dont_new_paragraphs() {
1493 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1494
1495 assert_eq!(
1496 parsed.children,
1497 vec![p("Some text that is bolded and italicized", 0..46)]
1498 );
1499 }
1500
1501 #[gpui::test]
1502 async fn test_heading_with_paragraph() {
1503 let parsed = parse("# Zed\nThe editor").await;
1504
1505 assert_eq!(
1506 parsed.children,
1507 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1508 );
1509 }
1510
1511 #[gpui::test]
1512 async fn test_double_newlines_do_new_paragraphs() {
1513 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1514
1515 assert_eq!(
1516 parsed.children,
1517 vec![
1518 p("Some text that is bolded", 0..29),
1519 p("and italicized", 31..47),
1520 ]
1521 );
1522 }
1523
1524 #[gpui::test]
1525 async fn test_bold_italic_text() {
1526 let parsed = parse("Some text **that is bolded** and *italicized*").await;
1527
1528 assert_eq!(
1529 parsed.children,
1530 vec![p("Some text that is bolded and italicized", 0..45)]
1531 );
1532 }
1533
1534 #[gpui::test]
1535 async fn test_nested_bold_strikethrough_text() {
1536 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1537
1538 assert_eq!(parsed.children.len(), 1);
1539 assert_eq!(
1540 parsed.children[0],
1541 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1542 ParsedMarkdownText {
1543 source_range: 0..35,
1544 contents: "Some bostrikethroughld text".into(),
1545 highlights: Vec::new(),
1546 regions: Vec::new(),
1547 }
1548 )])
1549 );
1550
1551 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1552 text
1553 } else {
1554 panic!("Expected a paragraph");
1555 };
1556
1557 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1558 text
1559 } else {
1560 panic!("Expected a text");
1561 };
1562
1563 assert_eq!(
1564 paragraph.highlights,
1565 vec![
1566 (
1567 5..7,
1568 MarkdownHighlight::Style(MarkdownHighlightStyle {
1569 weight: FontWeight::BOLD,
1570 ..Default::default()
1571 }),
1572 ),
1573 (
1574 7..20,
1575 MarkdownHighlight::Style(MarkdownHighlightStyle {
1576 weight: FontWeight::BOLD,
1577 strikethrough: true,
1578 ..Default::default()
1579 }),
1580 ),
1581 (
1582 20..22,
1583 MarkdownHighlight::Style(MarkdownHighlightStyle {
1584 weight: FontWeight::BOLD,
1585 ..Default::default()
1586 }),
1587 ),
1588 ]
1589 );
1590 }
1591
1592 #[gpui::test]
1593 async fn test_html_inline_style_elements() {
1594 let parsed =
1595 parse("<p>Some text <strong>strong text</strong> more text <b>bold text</b> more text <i>italic text</i> more text <em>emphasized text</em> more text <del>deleted text</del> more text <ins>inserted text</ins></p>").await;
1596
1597 assert_eq!(1, parsed.children.len());
1598 let chunks = if let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] {
1599 chunks
1600 } else {
1601 panic!("Expected a paragraph");
1602 };
1603
1604 assert_eq!(1, chunks.len());
1605 let text = if let MarkdownParagraphChunk::Text(text) = &chunks[0] {
1606 text
1607 } else {
1608 panic!("Expected a paragraph");
1609 };
1610
1611 assert_eq!(0..205, text.source_range);
1612 assert_eq!(
1613 "Some text strong text more text bold text more text italic text more text emphasized text more text deleted text more text inserted text",
1614 text.contents.as_str(),
1615 );
1616 assert_eq!(
1617 vec![
1618 (
1619 10..21,
1620 MarkdownHighlight::Style(MarkdownHighlightStyle {
1621 weight: FontWeight(700.0),
1622 ..Default::default()
1623 },),
1624 ),
1625 (
1626 32..41,
1627 MarkdownHighlight::Style(MarkdownHighlightStyle {
1628 weight: FontWeight(700.0),
1629 ..Default::default()
1630 },),
1631 ),
1632 (
1633 52..63,
1634 MarkdownHighlight::Style(MarkdownHighlightStyle {
1635 italic: true,
1636 weight: FontWeight(400.0),
1637 ..Default::default()
1638 },),
1639 ),
1640 (
1641 74..89,
1642 MarkdownHighlight::Style(MarkdownHighlightStyle {
1643 weight: FontWeight(400.0),
1644 oblique: true,
1645 ..Default::default()
1646 },),
1647 ),
1648 (
1649 100..112,
1650 MarkdownHighlight::Style(MarkdownHighlightStyle {
1651 strikethrough: true,
1652 weight: FontWeight(400.0),
1653 ..Default::default()
1654 },),
1655 ),
1656 (
1657 123..136,
1658 MarkdownHighlight::Style(MarkdownHighlightStyle {
1659 underline: true,
1660 weight: FontWeight(400.0,),
1661 ..Default::default()
1662 },),
1663 ),
1664 ],
1665 text.highlights
1666 );
1667 }
1668
1669 #[gpui::test]
1670 async fn test_html_href_element() {
1671 let parsed =
1672 parse("<p>Some text <a href=\"https://example.com\">link</a> more text</p>").await;
1673
1674 assert_eq!(1, parsed.children.len());
1675 let chunks = if let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] {
1676 chunks
1677 } else {
1678 panic!("Expected a paragraph");
1679 };
1680
1681 assert_eq!(1, chunks.len());
1682 let text = if let MarkdownParagraphChunk::Text(text) = &chunks[0] {
1683 text
1684 } else {
1685 panic!("Expected a paragraph");
1686 };
1687
1688 assert_eq!(0..65, text.source_range);
1689 assert_eq!("Some text link more text", text.contents.as_str(),);
1690 assert_eq!(
1691 vec![(
1692 10..14,
1693 MarkdownHighlight::Style(MarkdownHighlightStyle {
1694 link: true,
1695 ..Default::default()
1696 },),
1697 )],
1698 text.highlights
1699 );
1700 assert_eq!(
1701 vec![(
1702 10..14,
1703 ParsedRegion {
1704 code: false,
1705 link: Some(Link::Web {
1706 url: "https://example.com".into()
1707 })
1708 }
1709 )],
1710 text.regions
1711 )
1712 }
1713
1714 #[gpui::test]
1715 async fn test_text_with_inline_html() {
1716 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1717
1718 assert_eq!(
1719 parsed.children,
1720 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1721 );
1722 }
1723
1724 #[gpui::test]
1725 async fn test_raw_links_detection() {
1726 let parsed = parse("Checkout this https://zed.dev link").await;
1727
1728 assert_eq!(
1729 parsed.children,
1730 vec![p("Checkout this https://zed.dev link", 0..34)]
1731 );
1732 }
1733
1734 #[gpui::test]
1735 async fn test_empty_image() {
1736 let parsed = parse("![]()").await;
1737
1738 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1739 text
1740 } else {
1741 panic!("Expected a paragraph");
1742 };
1743 assert_eq!(paragraph.len(), 0);
1744 }
1745
1746 #[gpui::test]
1747 async fn test_image_links_detection() {
1748 let parsed = parse("").await;
1749
1750 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1751 text
1752 } else {
1753 panic!("Expected a paragraph");
1754 };
1755 assert_eq!(
1756 paragraph[0],
1757 MarkdownParagraphChunk::Image(Image {
1758 source_range: 0..111,
1759 link: Link::Web {
1760 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1761 },
1762 alt_text: Some("test".into()),
1763 height: None,
1764 width: None,
1765 },)
1766 );
1767 }
1768
1769 #[gpui::test]
1770 async fn test_image_alt_text() {
1771 let parsed = parse("[](https://zed.dev)\n ").await;
1772
1773 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1774 text
1775 } else {
1776 panic!("Expected a paragraph");
1777 };
1778 assert_eq!(
1779 paragraph[0],
1780 MarkdownParagraphChunk::Image(Image {
1781 source_range: 0..142,
1782 link: Link::Web {
1783 url: "https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/zed-industries/zed/main/assets/badge/v0.json".to_string(),
1784 },
1785 alt_text: Some("Zed".into()),
1786 height: None,
1787 width: None,
1788 },)
1789 );
1790 }
1791
1792 #[gpui::test]
1793 async fn test_image_without_alt_text() {
1794 let parsed = parse("").await;
1795
1796 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1797 text
1798 } else {
1799 panic!("Expected a paragraph");
1800 };
1801 assert_eq!(
1802 paragraph[0],
1803 MarkdownParagraphChunk::Image(Image {
1804 source_range: 0..31,
1805 link: Link::Web {
1806 url: "http://example.com/foo.png".to_string(),
1807 },
1808 alt_text: None,
1809 height: None,
1810 width: None,
1811 },)
1812 );
1813 }
1814
1815 #[gpui::test]
1816 async fn test_image_with_alt_text_containing_formatting() {
1817 let parsed = parse("").await;
1818
1819 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1820 panic!("Expected a paragraph");
1821 };
1822 assert_eq!(
1823 chunks,
1824 &[MarkdownParagraphChunk::Image(Image {
1825 source_range: 0..44,
1826 link: Link::Web {
1827 url: "http://example.com/foo.png".to_string(),
1828 },
1829 alt_text: Some("foo bar baz".into()),
1830 height: None,
1831 width: None,
1832 }),],
1833 );
1834 }
1835
1836 #[gpui::test]
1837 async fn test_images_with_text_in_between() {
1838 let parsed = parse(
1839 "\nLorem Ipsum\n",
1840 )
1841 .await;
1842
1843 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1844 text
1845 } else {
1846 panic!("Expected a paragraph");
1847 };
1848 assert_eq!(
1849 chunks,
1850 &vec![
1851 MarkdownParagraphChunk::Image(Image {
1852 source_range: 0..81,
1853 link: Link::Web {
1854 url: "http://example.com/foo.png".to_string(),
1855 },
1856 alt_text: Some("foo".into()),
1857 height: None,
1858 width: None,
1859 }),
1860 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1861 source_range: 0..81,
1862 contents: " Lorem Ipsum ".into(),
1863 highlights: Vec::new(),
1864 regions: Vec::new(),
1865 }),
1866 MarkdownParagraphChunk::Image(Image {
1867 source_range: 0..81,
1868 link: Link::Web {
1869 url: "http://example.com/bar.png".to_string(),
1870 },
1871 alt_text: Some("bar".into()),
1872 height: None,
1873 width: None,
1874 })
1875 ]
1876 );
1877 }
1878
1879 #[test]
1880 fn test_parse_html_element_dimension() {
1881 // Test percentage values
1882 assert_eq!(
1883 MarkdownParser::parse_html_element_dimension("50%"),
1884 Some(DefiniteLength::Fraction(0.5))
1885 );
1886 assert_eq!(
1887 MarkdownParser::parse_html_element_dimension("100%"),
1888 Some(DefiniteLength::Fraction(1.0))
1889 );
1890 assert_eq!(
1891 MarkdownParser::parse_html_element_dimension("25%"),
1892 Some(DefiniteLength::Fraction(0.25))
1893 );
1894 assert_eq!(
1895 MarkdownParser::parse_html_element_dimension("0%"),
1896 Some(DefiniteLength::Fraction(0.0))
1897 );
1898
1899 // Test pixel values
1900 assert_eq!(
1901 MarkdownParser::parse_html_element_dimension("100px"),
1902 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1903 );
1904 assert_eq!(
1905 MarkdownParser::parse_html_element_dimension("50px"),
1906 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1907 );
1908 assert_eq!(
1909 MarkdownParser::parse_html_element_dimension("0px"),
1910 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1911 );
1912
1913 // Test values without units (should be treated as pixels)
1914 assert_eq!(
1915 MarkdownParser::parse_html_element_dimension("100"),
1916 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1917 );
1918 assert_eq!(
1919 MarkdownParser::parse_html_element_dimension("42"),
1920 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1921 );
1922
1923 // Test invalid values
1924 assert_eq!(
1925 MarkdownParser::parse_html_element_dimension("invalid"),
1926 None
1927 );
1928 assert_eq!(MarkdownParser::parse_html_element_dimension("px"), None);
1929 assert_eq!(MarkdownParser::parse_html_element_dimension("%"), None);
1930 assert_eq!(MarkdownParser::parse_html_element_dimension(""), None);
1931 assert_eq!(MarkdownParser::parse_html_element_dimension("abc%"), None);
1932 assert_eq!(MarkdownParser::parse_html_element_dimension("abcpx"), None);
1933
1934 // Test decimal values
1935 assert_eq!(
1936 MarkdownParser::parse_html_element_dimension("50.5%"),
1937 Some(DefiniteLength::Fraction(0.505))
1938 );
1939 assert_eq!(
1940 MarkdownParser::parse_html_element_dimension("100.25px"),
1941 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1942 );
1943 assert_eq!(
1944 MarkdownParser::parse_html_element_dimension("42.0"),
1945 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1946 );
1947 }
1948
1949 #[gpui::test]
1950 async fn test_html_unordered_list() {
1951 let parsed = parse(
1952 "<ul>
1953 <li>Item 1</li>
1954 <li>Item 2</li>
1955 </ul>",
1956 )
1957 .await;
1958
1959 assert_eq!(
1960 ParsedMarkdown {
1961 children: vec![
1962 nested_list_item(
1963 0..82,
1964 1,
1965 ParsedMarkdownListItemType::Unordered,
1966 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..82))]
1967 ),
1968 nested_list_item(
1969 0..82,
1970 1,
1971 ParsedMarkdownListItemType::Unordered,
1972 vec![ParsedMarkdownElement::Paragraph(text("Item 2", 0..82))]
1973 ),
1974 ]
1975 },
1976 parsed
1977 );
1978 }
1979
1980 #[gpui::test]
1981 async fn test_html_ordered_list() {
1982 let parsed = parse(
1983 "<ol>
1984 <li>Item 1</li>
1985 <li>Item 2</li>
1986 </ol>",
1987 )
1988 .await;
1989
1990 assert_eq!(
1991 ParsedMarkdown {
1992 children: vec![
1993 nested_list_item(
1994 0..82,
1995 1,
1996 ParsedMarkdownListItemType::Ordered(1),
1997 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..82))]
1998 ),
1999 nested_list_item(
2000 0..82,
2001 1,
2002 ParsedMarkdownListItemType::Ordered(2),
2003 vec![ParsedMarkdownElement::Paragraph(text("Item 2", 0..82))]
2004 ),
2005 ]
2006 },
2007 parsed
2008 );
2009 }
2010
2011 #[gpui::test]
2012 async fn test_html_nested_ordered_list() {
2013 let parsed = parse(
2014 "<ol>
2015 <li>Item 1</li>
2016 <li>Item 2
2017 <ol>
2018 <li>Sub-Item 1</li>
2019 <li>Sub-Item 2</li>
2020 </ol>
2021 </li>
2022 </ol>",
2023 )
2024 .await;
2025
2026 assert_eq!(
2027 ParsedMarkdown {
2028 children: vec![
2029 nested_list_item(
2030 0..216,
2031 1,
2032 ParsedMarkdownListItemType::Ordered(1),
2033 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..216))]
2034 ),
2035 nested_list_item(
2036 0..216,
2037 1,
2038 ParsedMarkdownListItemType::Ordered(2),
2039 vec![
2040 ParsedMarkdownElement::Paragraph(text("Item 2", 0..216)),
2041 nested_list_item(
2042 0..216,
2043 2,
2044 ParsedMarkdownListItemType::Ordered(1),
2045 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 1", 0..216))]
2046 ),
2047 nested_list_item(
2048 0..216,
2049 2,
2050 ParsedMarkdownListItemType::Ordered(2),
2051 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 2", 0..216))]
2052 ),
2053 ]
2054 ),
2055 ]
2056 },
2057 parsed
2058 );
2059 }
2060
2061 #[gpui::test]
2062 async fn test_html_nested_unordered_list() {
2063 let parsed = parse(
2064 "<ul>
2065 <li>Item 1</li>
2066 <li>Item 2
2067 <ul>
2068 <li>Sub-Item 1</li>
2069 <li>Sub-Item 2</li>
2070 </ul>
2071 </li>
2072 </ul>",
2073 )
2074 .await;
2075
2076 assert_eq!(
2077 ParsedMarkdown {
2078 children: vec![
2079 nested_list_item(
2080 0..216,
2081 1,
2082 ParsedMarkdownListItemType::Unordered,
2083 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..216))]
2084 ),
2085 nested_list_item(
2086 0..216,
2087 1,
2088 ParsedMarkdownListItemType::Unordered,
2089 vec![
2090 ParsedMarkdownElement::Paragraph(text("Item 2", 0..216)),
2091 nested_list_item(
2092 0..216,
2093 2,
2094 ParsedMarkdownListItemType::Unordered,
2095 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 1", 0..216))]
2096 ),
2097 nested_list_item(
2098 0..216,
2099 2,
2100 ParsedMarkdownListItemType::Unordered,
2101 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 2", 0..216))]
2102 ),
2103 ]
2104 ),
2105 ]
2106 },
2107 parsed
2108 );
2109 }
2110
2111 #[gpui::test]
2112 async fn test_inline_html_image_tag() {
2113 let parsed =
2114 parse("<p>Some text<img src=\"http://example.com/foo.png\" /> some more text</p>")
2115 .await;
2116
2117 assert_eq!(
2118 ParsedMarkdown {
2119 children: vec![ParsedMarkdownElement::Paragraph(vec![
2120 MarkdownParagraphChunk::Text(ParsedMarkdownText {
2121 source_range: 0..71,
2122 contents: "Some text".into(),
2123 highlights: Default::default(),
2124 regions: Default::default()
2125 }),
2126 MarkdownParagraphChunk::Image(Image {
2127 source_range: 0..71,
2128 link: Link::Web {
2129 url: "http://example.com/foo.png".to_string(),
2130 },
2131 alt_text: None,
2132 height: None,
2133 width: None,
2134 }),
2135 MarkdownParagraphChunk::Text(ParsedMarkdownText {
2136 source_range: 0..71,
2137 contents: " some more text".into(),
2138 highlights: Default::default(),
2139 regions: Default::default()
2140 }),
2141 ])]
2142 },
2143 parsed
2144 );
2145 }
2146
2147 #[gpui::test]
2148 async fn test_html_block_quote() {
2149 let parsed = parse(
2150 "<blockquote>
2151 <p>some description</p>
2152 </blockquote>",
2153 )
2154 .await;
2155
2156 assert_eq!(
2157 ParsedMarkdown {
2158 children: vec![block_quote(
2159 vec![ParsedMarkdownElement::Paragraph(text(
2160 "some description",
2161 0..78
2162 ))],
2163 0..78,
2164 )]
2165 },
2166 parsed
2167 );
2168 }
2169
2170 #[gpui::test]
2171 async fn test_html_nested_block_quote() {
2172 let parsed = parse(
2173 "<blockquote>
2174 <p>some description</p>
2175 <blockquote>
2176 <p>second description</p>
2177 </blockquote>
2178 </blockquote>",
2179 )
2180 .await;
2181
2182 assert_eq!(
2183 ParsedMarkdown {
2184 children: vec![block_quote(
2185 vec![
2186 ParsedMarkdownElement::Paragraph(text("some description", 0..179)),
2187 block_quote(
2188 vec![ParsedMarkdownElement::Paragraph(text(
2189 "second description",
2190 0..179
2191 ))],
2192 0..179,
2193 )
2194 ],
2195 0..179,
2196 )]
2197 },
2198 parsed
2199 );
2200 }
2201
2202 #[gpui::test]
2203 async fn test_html_table() {
2204 let parsed = parse(
2205 "<table>
2206 <thead>
2207 <tr>
2208 <th>Id</th>
2209 <th>Name</th>
2210 </tr>
2211 </thead>
2212 <tbody>
2213 <tr>
2214 <td>1</td>
2215 <td>Chris</td>
2216 </tr>
2217 <tr>
2218 <td>2</td>
2219 <td>Dennis</td>
2220 </tr>
2221 </tbody>
2222 </table>",
2223 )
2224 .await;
2225
2226 assert_eq!(
2227 ParsedMarkdown {
2228 children: vec![ParsedMarkdownElement::Table(table(
2229 0..366,
2230 None,
2231 vec![row(vec![
2232 column(
2233 1,
2234 1,
2235 true,
2236 text("Id", 0..366),
2237 ParsedMarkdownTableAlignment::Center
2238 ),
2239 column(
2240 1,
2241 1,
2242 true,
2243 text("Name ", 0..366),
2244 ParsedMarkdownTableAlignment::Center
2245 )
2246 ])],
2247 vec![
2248 row(vec![
2249 column(
2250 1,
2251 1,
2252 false,
2253 text("1", 0..366),
2254 ParsedMarkdownTableAlignment::None
2255 ),
2256 column(
2257 1,
2258 1,
2259 false,
2260 text("Chris", 0..366),
2261 ParsedMarkdownTableAlignment::None
2262 )
2263 ]),
2264 row(vec![
2265 column(
2266 1,
2267 1,
2268 false,
2269 text("2", 0..366),
2270 ParsedMarkdownTableAlignment::None
2271 ),
2272 column(
2273 1,
2274 1,
2275 false,
2276 text("Dennis", 0..366),
2277 ParsedMarkdownTableAlignment::None
2278 )
2279 ]),
2280 ],
2281 ))],
2282 },
2283 parsed
2284 );
2285 }
2286
2287 #[gpui::test]
2288 async fn test_html_table_with_caption() {
2289 let parsed = parse(
2290 "<table>
2291 <caption>My Table</caption>
2292 <tbody>
2293 <tr>
2294 <td>1</td>
2295 <td>Chris</td>
2296 </tr>
2297 <tr>
2298 <td>2</td>
2299 <td>Dennis</td>
2300 </tr>
2301 </tbody>
2302 </table>",
2303 )
2304 .await;
2305
2306 assert_eq!(
2307 ParsedMarkdown {
2308 children: vec![ParsedMarkdownElement::Table(table(
2309 0..280,
2310 Some(vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2311 source_range: 0..280,
2312 contents: "My Table".into(),
2313 highlights: Default::default(),
2314 regions: Default::default()
2315 })]),
2316 vec![],
2317 vec![
2318 row(vec![
2319 column(
2320 1,
2321 1,
2322 false,
2323 text("1", 0..280),
2324 ParsedMarkdownTableAlignment::None
2325 ),
2326 column(
2327 1,
2328 1,
2329 false,
2330 text("Chris", 0..280),
2331 ParsedMarkdownTableAlignment::None
2332 )
2333 ]),
2334 row(vec![
2335 column(
2336 1,
2337 1,
2338 false,
2339 text("2", 0..280),
2340 ParsedMarkdownTableAlignment::None
2341 ),
2342 column(
2343 1,
2344 1,
2345 false,
2346 text("Dennis", 0..280),
2347 ParsedMarkdownTableAlignment::None
2348 )
2349 ]),
2350 ],
2351 ))],
2352 },
2353 parsed
2354 );
2355 }
2356
2357 #[gpui::test]
2358 async fn test_html_table_without_headings() {
2359 let parsed = parse(
2360 "<table>
2361 <tbody>
2362 <tr>
2363 <td>1</td>
2364 <td>Chris</td>
2365 </tr>
2366 <tr>
2367 <td>2</td>
2368 <td>Dennis</td>
2369 </tr>
2370 </tbody>
2371 </table>",
2372 )
2373 .await;
2374
2375 assert_eq!(
2376 ParsedMarkdown {
2377 children: vec![ParsedMarkdownElement::Table(table(
2378 0..240,
2379 None,
2380 vec![],
2381 vec![
2382 row(vec![
2383 column(
2384 1,
2385 1,
2386 false,
2387 text("1", 0..240),
2388 ParsedMarkdownTableAlignment::None
2389 ),
2390 column(
2391 1,
2392 1,
2393 false,
2394 text("Chris", 0..240),
2395 ParsedMarkdownTableAlignment::None
2396 )
2397 ]),
2398 row(vec![
2399 column(
2400 1,
2401 1,
2402 false,
2403 text("2", 0..240),
2404 ParsedMarkdownTableAlignment::None
2405 ),
2406 column(
2407 1,
2408 1,
2409 false,
2410 text("Dennis", 0..240),
2411 ParsedMarkdownTableAlignment::None
2412 )
2413 ]),
2414 ],
2415 ))],
2416 },
2417 parsed
2418 );
2419 }
2420
2421 #[gpui::test]
2422 async fn test_html_table_without_body() {
2423 let parsed = parse(
2424 "<table>
2425 <thead>
2426 <tr>
2427 <th>Id</th>
2428 <th>Name</th>
2429 </tr>
2430 </thead>
2431 </table>",
2432 )
2433 .await;
2434
2435 assert_eq!(
2436 ParsedMarkdown {
2437 children: vec![ParsedMarkdownElement::Table(table(
2438 0..150,
2439 None,
2440 vec![row(vec![
2441 column(
2442 1,
2443 1,
2444 true,
2445 text("Id", 0..150),
2446 ParsedMarkdownTableAlignment::Center
2447 ),
2448 column(
2449 1,
2450 1,
2451 true,
2452 text("Name", 0..150),
2453 ParsedMarkdownTableAlignment::Center
2454 )
2455 ])],
2456 vec![],
2457 ))],
2458 },
2459 parsed
2460 );
2461 }
2462
2463 #[gpui::test]
2464 async fn test_html_heading_tags() {
2465 let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
2466
2467 assert_eq!(
2468 ParsedMarkdown {
2469 children: vec![
2470 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2471 level: HeadingLevel::H1,
2472 source_range: 0..96,
2473 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2474 source_range: 0..96,
2475 contents: "Heading".into(),
2476 highlights: Vec::default(),
2477 regions: Vec::default()
2478 })],
2479 }),
2480 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2481 level: HeadingLevel::H2,
2482 source_range: 0..96,
2483 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2484 source_range: 0..96,
2485 contents: "Heading".into(),
2486 highlights: Vec::default(),
2487 regions: Vec::default()
2488 })],
2489 }),
2490 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2491 level: HeadingLevel::H3,
2492 source_range: 0..96,
2493 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2494 source_range: 0..96,
2495 contents: "Heading".into(),
2496 highlights: Vec::default(),
2497 regions: Vec::default()
2498 })],
2499 }),
2500 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2501 level: HeadingLevel::H4,
2502 source_range: 0..96,
2503 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2504 source_range: 0..96,
2505 contents: "Heading".into(),
2506 highlights: Vec::default(),
2507 regions: Vec::default()
2508 })],
2509 }),
2510 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2511 level: HeadingLevel::H5,
2512 source_range: 0..96,
2513 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2514 source_range: 0..96,
2515 contents: "Heading".into(),
2516 highlights: Vec::default(),
2517 regions: Vec::default()
2518 })],
2519 }),
2520 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2521 level: HeadingLevel::H6,
2522 source_range: 0..96,
2523 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2524 source_range: 0..96,
2525 contents: "Heading".into(),
2526 highlights: Vec::default(),
2527 regions: Vec::default()
2528 })],
2529 }),
2530 ],
2531 },
2532 parsed
2533 );
2534 }
2535
2536 #[gpui::test]
2537 async fn test_html_image_tag() {
2538 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
2539
2540 assert_eq!(
2541 ParsedMarkdown {
2542 children: vec![ParsedMarkdownElement::Image(Image {
2543 source_range: 0..40,
2544 link: Link::Web {
2545 url: "http://example.com/foo.png".to_string(),
2546 },
2547 alt_text: None,
2548 height: None,
2549 width: None,
2550 })]
2551 },
2552 parsed
2553 );
2554 }
2555
2556 #[gpui::test]
2557 async fn test_html_image_tag_with_alt_text() {
2558 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
2559
2560 assert_eq!(
2561 ParsedMarkdown {
2562 children: vec![ParsedMarkdownElement::Image(Image {
2563 source_range: 0..50,
2564 link: Link::Web {
2565 url: "http://example.com/foo.png".to_string(),
2566 },
2567 alt_text: Some("Foo".into()),
2568 height: None,
2569 width: None,
2570 })]
2571 },
2572 parsed
2573 );
2574 }
2575
2576 #[gpui::test]
2577 async fn test_html_image_tag_with_height_and_width() {
2578 let parsed =
2579 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
2580
2581 assert_eq!(
2582 ParsedMarkdown {
2583 children: vec![ParsedMarkdownElement::Image(Image {
2584 source_range: 0..65,
2585 link: Link::Web {
2586 url: "http://example.com/foo.png".to_string(),
2587 },
2588 alt_text: None,
2589 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
2590 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
2591 })]
2592 },
2593 parsed
2594 );
2595 }
2596
2597 #[gpui::test]
2598 async fn test_html_image_style_tag_with_height_and_width() {
2599 let parsed = parse(
2600 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
2601 )
2602 .await;
2603
2604 assert_eq!(
2605 ParsedMarkdown {
2606 children: vec![ParsedMarkdownElement::Image(Image {
2607 source_range: 0..75,
2608 link: Link::Web {
2609 url: "http://example.com/foo.png".to_string(),
2610 },
2611 alt_text: None,
2612 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
2613 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
2614 })]
2615 },
2616 parsed
2617 );
2618 }
2619
2620 #[gpui::test]
2621 async fn test_header_only_table() {
2622 let markdown = "\
2623| Header 1 | Header 2 |
2624|----------|----------|
2625
2626Some other content
2627";
2628
2629 let expected_table = table(
2630 0..48,
2631 None,
2632 vec![row(vec![
2633 column(
2634 1,
2635 1,
2636 true,
2637 text("Header 1", 1..11),
2638 ParsedMarkdownTableAlignment::None,
2639 ),
2640 column(
2641 1,
2642 1,
2643 true,
2644 text("Header 2", 12..22),
2645 ParsedMarkdownTableAlignment::None,
2646 ),
2647 ])],
2648 vec![],
2649 );
2650
2651 assert_eq!(
2652 parse(markdown).await.children[0],
2653 ParsedMarkdownElement::Table(expected_table)
2654 );
2655 }
2656
2657 #[gpui::test]
2658 async fn test_basic_table() {
2659 let markdown = "\
2660| Header 1 | Header 2 |
2661|----------|----------|
2662| Cell 1 | Cell 2 |
2663| Cell 3 | Cell 4 |";
2664
2665 let expected_table = table(
2666 0..95,
2667 None,
2668 vec![row(vec![
2669 column(
2670 1,
2671 1,
2672 true,
2673 text("Header 1", 1..11),
2674 ParsedMarkdownTableAlignment::None,
2675 ),
2676 column(
2677 1,
2678 1,
2679 true,
2680 text("Header 2", 12..22),
2681 ParsedMarkdownTableAlignment::None,
2682 ),
2683 ])],
2684 vec![
2685 row(vec![
2686 column(
2687 1,
2688 1,
2689 false,
2690 text("Cell 1", 49..59),
2691 ParsedMarkdownTableAlignment::None,
2692 ),
2693 column(
2694 1,
2695 1,
2696 false,
2697 text("Cell 2", 60..70),
2698 ParsedMarkdownTableAlignment::None,
2699 ),
2700 ]),
2701 row(vec![
2702 column(
2703 1,
2704 1,
2705 false,
2706 text("Cell 3", 73..83),
2707 ParsedMarkdownTableAlignment::None,
2708 ),
2709 column(
2710 1,
2711 1,
2712 false,
2713 text("Cell 4", 84..94),
2714 ParsedMarkdownTableAlignment::None,
2715 ),
2716 ]),
2717 ],
2718 );
2719
2720 assert_eq!(
2721 parse(markdown).await.children[0],
2722 ParsedMarkdownElement::Table(expected_table)
2723 );
2724 }
2725
2726 #[gpui::test]
2727 async fn test_list_basic() {
2728 let parsed = parse(
2729 "\
2730* Item 1
2731* Item 2
2732* Item 3
2733",
2734 )
2735 .await;
2736
2737 assert_eq!(
2738 parsed.children,
2739 vec![
2740 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
2741 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
2742 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
2743 ],
2744 );
2745 }
2746
2747 #[gpui::test]
2748 async fn test_list_with_tasks() {
2749 let parsed = parse(
2750 "\
2751- [ ] TODO
2752- [x] Checked
2753",
2754 )
2755 .await;
2756
2757 assert_eq!(
2758 parsed.children,
2759 vec![
2760 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
2761 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
2762 ],
2763 );
2764 }
2765
2766 #[gpui::test]
2767 async fn test_list_with_indented_task() {
2768 let parsed = parse(
2769 "\
2770- [ ] TODO
2771 - [x] Checked
2772 - Unordered
2773 1. Number 1
2774 1. Number 2
27751. Number A
2776",
2777 )
2778 .await;
2779
2780 assert_eq!(
2781 parsed.children,
2782 vec![
2783 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
2784 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
2785 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
2786 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
2787 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
2788 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
2789 ],
2790 );
2791 }
2792
2793 #[gpui::test]
2794 async fn test_list_with_linebreak_is_handled_correctly() {
2795 let parsed = parse(
2796 "\
2797- [ ] Task 1
2798
2799- [x] Task 2
2800",
2801 )
2802 .await;
2803
2804 assert_eq!(
2805 parsed.children,
2806 vec![
2807 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
2808 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
2809 ],
2810 );
2811 }
2812
2813 #[gpui::test]
2814 async fn test_list_nested() {
2815 let parsed = parse(
2816 "\
2817* Item 1
2818* Item 2
2819* Item 3
2820
28211. Hello
28221. Two
2823 1. Three
28242. Four
28253. Five
2826
2827* First
2828 1. Hello
2829 1. Goodbyte
2830 - Inner
2831 - Inner
2832 2. Goodbyte
2833 - Next item empty
2834 -
2835* Last
2836",
2837 )
2838 .await;
2839
2840 assert_eq!(
2841 parsed.children,
2842 vec![
2843 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
2844 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
2845 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
2846 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
2847 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
2848 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
2849 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
2850 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
2851 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
2852 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
2853 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
2854 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
2855 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
2856 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
2857 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
2858 list_item(186..190, 3, Unordered, vec![]),
2859 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
2860 ]
2861 );
2862 }
2863
2864 #[gpui::test]
2865 async fn test_list_with_nested_content() {
2866 let parsed = parse(
2867 "\
2868* This is a list item with two paragraphs.
2869
2870 This is the second paragraph in the list item.
2871",
2872 )
2873 .await;
2874
2875 assert_eq!(
2876 parsed.children,
2877 vec![list_item(
2878 0..96,
2879 1,
2880 Unordered,
2881 vec![
2882 p("This is a list item with two paragraphs.", 4..44),
2883 p("This is the second paragraph in the list item.", 50..97)
2884 ],
2885 ),],
2886 );
2887 }
2888
2889 #[gpui::test]
2890 async fn test_list_item_with_inline_html() {
2891 let parsed = parse(
2892 "\
2893* This is a list item with an inline HTML <sometag>tag</sometag>.
2894",
2895 )
2896 .await;
2897
2898 assert_eq!(
2899 parsed.children,
2900 vec![list_item(
2901 0..67,
2902 1,
2903 Unordered,
2904 vec![p("This is a list item with an inline HTML tag.", 4..44),],
2905 ),],
2906 );
2907 }
2908
2909 #[gpui::test]
2910 async fn test_nested_list_with_paragraph_inside() {
2911 let parsed = parse(
2912 "\
29131. a
2914 1. b
2915 1. c
2916
2917 text
2918
2919 1. d
2920",
2921 )
2922 .await;
2923
2924 assert_eq!(
2925 parsed.children,
2926 vec![
2927 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
2928 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
2929 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
2930 p("text", 32..37),
2931 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
2932 ],
2933 );
2934 }
2935
2936 #[gpui::test]
2937 async fn test_list_with_leading_text() {
2938 let parsed = parse(
2939 "\
2940* `code`
2941* **bold**
2942* [link](https://example.com)
2943",
2944 )
2945 .await;
2946
2947 assert_eq!(
2948 parsed.children,
2949 vec![
2950 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
2951 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
2952 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
2953 ],
2954 );
2955 }
2956
2957 #[gpui::test]
2958 async fn test_simple_block_quote() {
2959 let parsed = parse("> Simple block quote with **styled text**").await;
2960
2961 assert_eq!(
2962 parsed.children,
2963 vec![block_quote(
2964 vec![p("Simple block quote with styled text", 2..41)],
2965 0..41
2966 )]
2967 );
2968 }
2969
2970 #[gpui::test]
2971 async fn test_simple_block_quote_with_multiple_lines() {
2972 let parsed = parse(
2973 "\
2974> # Heading
2975> More
2976> text
2977>
2978> More text
2979",
2980 )
2981 .await;
2982
2983 assert_eq!(
2984 parsed.children,
2985 vec![block_quote(
2986 vec![
2987 h1(text("Heading", 4..11), 2..12),
2988 p("More text", 14..26),
2989 p("More text", 30..40)
2990 ],
2991 0..40
2992 )]
2993 );
2994 }
2995
2996 #[gpui::test]
2997 async fn test_nested_block_quote() {
2998 let parsed = parse(
2999 "\
3000> A
3001>
3002> > # B
3003>
3004> C
3005
3006More text
3007",
3008 )
3009 .await;
3010
3011 assert_eq!(
3012 parsed.children,
3013 vec![
3014 block_quote(
3015 vec![
3016 p("A", 2..4),
3017 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
3018 p("C", 18..20)
3019 ],
3020 0..20
3021 ),
3022 p("More text", 21..31)
3023 ]
3024 );
3025 }
3026
3027 #[gpui::test]
3028 async fn test_code_block() {
3029 let parsed = parse(
3030 "\
3031```
3032fn main() {
3033 return 0;
3034}
3035```
3036",
3037 )
3038 .await;
3039
3040 assert_eq!(
3041 parsed.children,
3042 vec![code_block(
3043 None,
3044 "fn main() {\n return 0;\n}",
3045 0..35,
3046 None
3047 )]
3048 );
3049 }
3050
3051 #[gpui::test]
3052 async fn test_code_block_with_language(executor: BackgroundExecutor) {
3053 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
3054 language_registry.add(language::rust_lang());
3055
3056 let parsed = parse_markdown(
3057 "\
3058```rust
3059fn main() {
3060 return 0;
3061}
3062```
3063",
3064 None,
3065 Some(language_registry),
3066 )
3067 .await;
3068
3069 assert_eq!(
3070 parsed.children,
3071 vec![code_block(
3072 Some("rust".to_string()),
3073 "fn main() {\n return 0;\n}",
3074 0..39,
3075 Some(vec![])
3076 )]
3077 );
3078 }
3079
3080 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
3081 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
3082 source_range,
3083 level: HeadingLevel::H1,
3084 contents,
3085 })
3086 }
3087
3088 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
3089 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
3090 source_range,
3091 level: HeadingLevel::H2,
3092 contents,
3093 })
3094 }
3095
3096 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
3097 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
3098 source_range,
3099 level: HeadingLevel::H3,
3100 contents,
3101 })
3102 }
3103
3104 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
3105 ParsedMarkdownElement::Paragraph(text(contents, source_range))
3106 }
3107
3108 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
3109 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
3110 highlights: Vec::new(),
3111 regions: Vec::new(),
3112 source_range,
3113 contents: contents.to_string().into(),
3114 })]
3115 }
3116
3117 fn block_quote(
3118 children: Vec<ParsedMarkdownElement>,
3119 source_range: Range<usize>,
3120 ) -> ParsedMarkdownElement {
3121 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
3122 source_range,
3123 children,
3124 })
3125 }
3126
3127 fn code_block(
3128 language: Option<String>,
3129 code: &str,
3130 source_range: Range<usize>,
3131 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
3132 ) -> ParsedMarkdownElement {
3133 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
3134 source_range,
3135 language,
3136 contents: code.to_string().into(),
3137 highlights,
3138 })
3139 }
3140
3141 fn list_item(
3142 source_range: Range<usize>,
3143 depth: u16,
3144 item_type: ParsedMarkdownListItemType,
3145 content: Vec<ParsedMarkdownElement>,
3146 ) -> ParsedMarkdownElement {
3147 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
3148 source_range,
3149 item_type,
3150 depth,
3151 content,
3152 nested: false,
3153 })
3154 }
3155
3156 fn nested_list_item(
3157 source_range: Range<usize>,
3158 depth: u16,
3159 item_type: ParsedMarkdownListItemType,
3160 content: Vec<ParsedMarkdownElement>,
3161 ) -> ParsedMarkdownElement {
3162 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
3163 source_range,
3164 item_type,
3165 depth,
3166 content,
3167 nested: true,
3168 })
3169 }
3170
3171 fn table(
3172 source_range: Range<usize>,
3173 caption: Option<MarkdownParagraph>,
3174 header: Vec<ParsedMarkdownTableRow>,
3175 body: Vec<ParsedMarkdownTableRow>,
3176 ) -> ParsedMarkdownTable {
3177 ParsedMarkdownTable {
3178 source_range,
3179 header,
3180 body,
3181 caption,
3182 }
3183 }
3184
3185 fn row(columns: Vec<ParsedMarkdownTableColumn>) -> ParsedMarkdownTableRow {
3186 ParsedMarkdownTableRow { columns }
3187 }
3188
3189 fn column(
3190 col_span: usize,
3191 row_span: usize,
3192 is_header: bool,
3193 children: MarkdownParagraph,
3194 alignment: ParsedMarkdownTableAlignment,
3195 ) -> ParsedMarkdownTableColumn {
3196 ParsedMarkdownTableColumn {
3197 col_span,
3198 row_span,
3199 is_header,
3200 children,
3201 alignment,
3202 }
3203 }
3204
3205 impl PartialEq for ParsedMarkdownTable {
3206 fn eq(&self, other: &Self) -> bool {
3207 self.source_range == other.source_range
3208 && self.header == other.header
3209 && self.body == other.body
3210 }
3211 }
3212
3213 impl PartialEq for ParsedMarkdownText {
3214 fn eq(&self, other: &Self) -> bool {
3215 self.source_range == other.source_range && self.contents == other.contents
3216 }
3217 }
3218}