1use crate::{
2 markdown_elements::*,
3 markdown_minifier::{Minifier, MinifierOptions},
4};
5use async_recursion::async_recursion;
6use collections::FxHashMap;
7use gpui::{DefiniteLength, FontWeight, px, relative};
8use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
9use language::LanguageRegistry;
10use markup5ever_rcdom::RcDom;
11use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
12use std::{
13 cell::RefCell, collections::HashMap, mem, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec,
14};
15use ui::SharedString;
16
17pub async fn parse_markdown(
18 markdown_input: &str,
19 file_location_directory: Option<PathBuf>,
20 language_registry: Option<Arc<LanguageRegistry>>,
21) -> ParsedMarkdown {
22 let mut options = Options::all();
23 options.remove(pulldown_cmark::Options::ENABLE_DEFINITION_LIST);
24
25 let parser = Parser::new_ext(markdown_input, options);
26 let parser = MarkdownParser::new(
27 parser.into_offset_iter().collect(),
28 file_location_directory,
29 language_registry,
30 );
31 let renderer = parser.parse_document().await;
32 ParsedMarkdown {
33 children: renderer.parsed,
34 }
35}
36
37fn cleanup_html(source: &str) -> Vec<u8> {
38 let mut writer = std::io::Cursor::new(Vec::new());
39 let mut reader = std::io::Cursor::new(source);
40 let mut minify = Minifier::new(
41 &mut writer,
42 MinifierOptions {
43 omit_doctype: true,
44 collapse_whitespace: true,
45 ..Default::default()
46 },
47 );
48 if let Ok(()) = minify.minify(&mut reader) {
49 writer.into_inner()
50 } else {
51 source.bytes().collect()
52 }
53}
54
55struct MarkdownParser<'a> {
56 tokens: Vec<(Event<'a>, Range<usize>)>,
57 /// The current index in the tokens array
58 cursor: usize,
59 /// The blocks that we have successfully parsed so far
60 parsed: Vec<ParsedMarkdownElement>,
61 file_location_directory: Option<PathBuf>,
62 language_registry: Option<Arc<LanguageRegistry>>,
63}
64
65#[derive(Debug)]
66struct ParseHtmlNodeContext {
67 list_item_depth: u16,
68}
69
70impl Default for ParseHtmlNodeContext {
71 fn default() -> Self {
72 Self { list_item_depth: 1 }
73 }
74}
75
76struct MarkdownListItem {
77 content: Vec<ParsedMarkdownElement>,
78 item_type: ParsedMarkdownListItemType,
79}
80
81impl Default for MarkdownListItem {
82 fn default() -> Self {
83 Self {
84 content: Vec::new(),
85 item_type: ParsedMarkdownListItemType::Unordered,
86 }
87 }
88}
89
90impl<'a> MarkdownParser<'a> {
91 fn new(
92 tokens: Vec<(Event<'a>, Range<usize>)>,
93 file_location_directory: Option<PathBuf>,
94 language_registry: Option<Arc<LanguageRegistry>>,
95 ) -> Self {
96 Self {
97 tokens,
98 file_location_directory,
99 language_registry,
100 cursor: 0,
101 parsed: vec![],
102 }
103 }
104
105 fn eof(&self) -> bool {
106 if self.tokens.is_empty() {
107 return true;
108 }
109 self.cursor >= self.tokens.len() - 1
110 }
111
112 fn peek(&self, steps: usize) -> Option<&(Event<'_>, Range<usize>)> {
113 if self.eof() || (steps + self.cursor) >= self.tokens.len() {
114 return self.tokens.last();
115 }
116 self.tokens.get(self.cursor + steps)
117 }
118
119 fn previous(&self) -> Option<&(Event<'_>, Range<usize>)> {
120 if self.cursor == 0 || self.cursor > self.tokens.len() {
121 return None;
122 }
123 self.tokens.get(self.cursor - 1)
124 }
125
126 fn current(&self) -> Option<&(Event<'_>, Range<usize>)> {
127 self.peek(0)
128 }
129
130 fn current_event(&self) -> Option<&Event<'_>> {
131 self.current().map(|(event, _)| event)
132 }
133
134 fn is_text_like(event: &Event) -> bool {
135 match event {
136 Event::Text(_)
137 // Represent an inline code block
138 | Event::Code(_)
139 | Event::Html(_)
140 | Event::InlineHtml(_)
141 | Event::FootnoteReference(_)
142 | Event::Start(Tag::Link { .. })
143 | Event::Start(Tag::Emphasis)
144 | Event::Start(Tag::Strong)
145 | Event::Start(Tag::Strikethrough)
146 | Event::Start(Tag::Image { .. }) => {
147 true
148 }
149 _ => false,
150 }
151 }
152
153 async fn parse_document(mut self) -> Self {
154 while !self.eof() {
155 if let Some(block) = self.parse_block().await {
156 self.parsed.extend(block);
157 } else {
158 self.cursor += 1;
159 }
160 }
161 self
162 }
163
164 #[async_recursion]
165 async fn parse_block(&mut self) -> Option<Vec<ParsedMarkdownElement>> {
166 let (current, source_range) = self.current().unwrap();
167 let source_range = source_range.clone();
168 match current {
169 Event::Start(tag) => match tag {
170 Tag::Paragraph => {
171 self.cursor += 1;
172 let text = self.parse_text(false, Some(source_range));
173 Some(vec![ParsedMarkdownElement::Paragraph(text)])
174 }
175 Tag::Heading { level, .. } => {
176 let level = *level;
177 self.cursor += 1;
178 let heading = self.parse_heading(level);
179 Some(vec![ParsedMarkdownElement::Heading(heading)])
180 }
181 Tag::Table(alignment) => {
182 let alignment = alignment.clone();
183 self.cursor += 1;
184 let table = self.parse_table(alignment);
185 Some(vec![ParsedMarkdownElement::Table(table)])
186 }
187 Tag::List(order) => {
188 let order = *order;
189 self.cursor += 1;
190 let list = self.parse_list(order).await;
191 Some(list)
192 }
193 Tag::BlockQuote(_kind) => {
194 self.cursor += 1;
195 let block_quote = self.parse_block_quote().await;
196 Some(vec![ParsedMarkdownElement::BlockQuote(block_quote)])
197 }
198 Tag::CodeBlock(kind) => {
199 let language = match kind {
200 pulldown_cmark::CodeBlockKind::Indented => None,
201 pulldown_cmark::CodeBlockKind::Fenced(language) => {
202 if language.is_empty() {
203 None
204 } else {
205 Some(language.to_string())
206 }
207 }
208 };
209
210 self.cursor += 1;
211
212 let code_block = self.parse_code_block(language).await?;
213 Some(vec![ParsedMarkdownElement::CodeBlock(code_block)])
214 }
215 Tag::HtmlBlock => {
216 self.cursor += 1;
217
218 Some(self.parse_html_block().await)
219 }
220 _ => None,
221 },
222 Event::Rule => {
223 self.cursor += 1;
224 Some(vec![ParsedMarkdownElement::HorizontalRule(source_range)])
225 }
226 _ => None,
227 }
228 }
229
230 fn parse_text(
231 &mut self,
232 should_complete_on_soft_break: bool,
233 source_range: Option<Range<usize>>,
234 ) -> MarkdownParagraph {
235 let source_range = source_range.unwrap_or_else(|| {
236 self.current()
237 .map(|(_, range)| range.clone())
238 .unwrap_or_default()
239 });
240
241 let mut markdown_text_like = Vec::new();
242 let mut text = String::new();
243 let mut bold_depth = 0;
244 let mut italic_depth = 0;
245 let mut strikethrough_depth = 0;
246 let mut link: Option<Link> = None;
247 let mut image: Option<Image> = None;
248 let mut regions: Vec<(Range<usize>, ParsedRegion)> = vec![];
249 let mut highlights: Vec<(Range<usize>, MarkdownHighlight)> = vec![];
250 let mut link_urls: Vec<String> = vec![];
251 let mut link_ranges: Vec<Range<usize>> = vec![];
252
253 loop {
254 if self.eof() {
255 break;
256 }
257
258 let (current, _) = self.current().unwrap();
259 let prev_len = text.len();
260 match current {
261 Event::SoftBreak => {
262 if should_complete_on_soft_break {
263 break;
264 }
265 text.push(' ');
266 }
267
268 Event::HardBreak => {
269 text.push('\n');
270 }
271
272 // We want to ignore any inline HTML tags in the text but keep
273 // the text between them
274 Event::InlineHtml(_) => {}
275
276 Event::Text(t) => {
277 text.push_str(t.as_ref());
278 let mut style = MarkdownHighlightStyle::default();
279
280 if bold_depth > 0 {
281 style.weight = FontWeight::BOLD;
282 }
283
284 if italic_depth > 0 {
285 style.italic = true;
286 }
287
288 if strikethrough_depth > 0 {
289 style.strikethrough = true;
290 }
291
292 let last_run_len = if let Some(link) = link.clone() {
293 regions.push((
294 prev_len..text.len(),
295 ParsedRegion {
296 code: false,
297 link: Some(link),
298 },
299 ));
300 style.link = true;
301 prev_len
302 } else {
303 // Manually scan for links
304 let mut finder = linkify::LinkFinder::new();
305 finder.kinds(&[linkify::LinkKind::Url]);
306 let mut last_link_len = prev_len;
307 for link in finder.links(t) {
308 let start = prev_len + link.start();
309 let end = prev_len + link.end();
310 let range = start..end;
311 link_ranges.push(range.clone());
312 link_urls.push(link.as_str().to_string());
313
314 // If there is a style before we match a link, we have to add this to the highlighted ranges
315 if style != MarkdownHighlightStyle::default() && last_link_len < start {
316 highlights.push((
317 last_link_len..start,
318 MarkdownHighlight::Style(style.clone()),
319 ));
320 }
321
322 highlights.push((
323 range.clone(),
324 MarkdownHighlight::Style(MarkdownHighlightStyle {
325 underline: true,
326 ..style
327 }),
328 ));
329
330 regions.push((
331 range.clone(),
332 ParsedRegion {
333 code: false,
334 link: Some(Link::Web {
335 url: link.as_str().to_string(),
336 }),
337 },
338 ));
339 last_link_len = end;
340 }
341 last_link_len
342 };
343
344 if style != MarkdownHighlightStyle::default() && last_run_len < text.len() {
345 let mut new_highlight = true;
346 if let Some((last_range, last_style)) = highlights.last_mut()
347 && last_range.end == last_run_len
348 && last_style == &MarkdownHighlight::Style(style.clone())
349 {
350 last_range.end = text.len();
351 new_highlight = false;
352 }
353 if new_highlight {
354 highlights.push((
355 last_run_len..text.len(),
356 MarkdownHighlight::Style(style.clone()),
357 ));
358 }
359 }
360 }
361 Event::Code(t) => {
362 text.push_str(t.as_ref());
363 let range = prev_len..text.len();
364
365 if link.is_some() {
366 highlights.push((
367 range.clone(),
368 MarkdownHighlight::Style(MarkdownHighlightStyle {
369 link: true,
370 ..Default::default()
371 }),
372 ));
373 }
374 regions.push((
375 range,
376 ParsedRegion {
377 code: true,
378 link: link.clone(),
379 },
380 ));
381 }
382 Event::Start(tag) => match tag {
383 Tag::Emphasis => italic_depth += 1,
384 Tag::Strong => bold_depth += 1,
385 Tag::Strikethrough => strikethrough_depth += 1,
386 Tag::Link { dest_url, .. } => {
387 link = Link::identify(
388 self.file_location_directory.clone(),
389 dest_url.to_string(),
390 );
391 }
392 Tag::Image { dest_url, .. } => {
393 if !text.is_empty() {
394 let parsed_regions = MarkdownParagraphChunk::Text(ParsedMarkdownText {
395 source_range: source_range.clone(),
396 contents: mem::take(&mut text).into(),
397 highlights: mem::take(&mut highlights),
398 regions: mem::take(&mut regions),
399 });
400 markdown_text_like.push(parsed_regions);
401 }
402 image = Image::identify(
403 dest_url.to_string(),
404 source_range.clone(),
405 self.file_location_directory.clone(),
406 );
407 }
408 _ => {
409 break;
410 }
411 },
412
413 Event::End(tag) => match tag {
414 TagEnd::Emphasis => italic_depth -= 1,
415 TagEnd::Strong => bold_depth -= 1,
416 TagEnd::Strikethrough => strikethrough_depth -= 1,
417 TagEnd::Link => {
418 link = None;
419 }
420 TagEnd::Image => {
421 if let Some(mut image) = image.take() {
422 if !text.is_empty() {
423 image.set_alt_text(std::mem::take(&mut text).into());
424 mem::take(&mut highlights);
425 mem::take(&mut regions);
426 }
427 markdown_text_like.push(MarkdownParagraphChunk::Image(image));
428 }
429 }
430 TagEnd::Paragraph => {
431 self.cursor += 1;
432 break;
433 }
434 _ => {
435 break;
436 }
437 },
438 _ => {
439 break;
440 }
441 }
442
443 self.cursor += 1;
444 }
445 if !text.is_empty() {
446 markdown_text_like.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
447 source_range,
448 contents: text.into(),
449 highlights,
450 regions,
451 }));
452 }
453 markdown_text_like
454 }
455
456 fn parse_heading(&mut self, level: pulldown_cmark::HeadingLevel) -> ParsedMarkdownHeading {
457 let (_event, source_range) = self.previous().unwrap();
458 let source_range = source_range.clone();
459 let text = self.parse_text(true, None);
460
461 // Advance past the heading end tag
462 self.cursor += 1;
463
464 ParsedMarkdownHeading {
465 source_range,
466 level: match level {
467 pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
468 pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
469 pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
470 pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
471 pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
472 pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
473 },
474 contents: text,
475 }
476 }
477
478 fn parse_table(&mut self, alignment: Vec<Alignment>) -> ParsedMarkdownTable {
479 let (_event, source_range) = self.previous().unwrap();
480 let source_range = source_range.clone();
481 let mut header = vec![];
482 let mut body = vec![];
483 let mut row_columns = vec![];
484 let mut in_header = true;
485 let column_alignments = alignment
486 .iter()
487 .map(Self::convert_alignment)
488 .collect::<Vec<_>>();
489
490 loop {
491 if self.eof() {
492 break;
493 }
494
495 let (current, source_range) = self.current().unwrap();
496 let source_range = source_range.clone();
497 match current {
498 Event::Start(Tag::TableHead)
499 | Event::Start(Tag::TableRow)
500 | Event::End(TagEnd::TableCell) => {
501 self.cursor += 1;
502 }
503 Event::Start(Tag::TableCell) => {
504 self.cursor += 1;
505 let cell_contents = self.parse_text(false, Some(source_range));
506 row_columns.push(ParsedMarkdownTableColumn {
507 col_span: 1,
508 row_span: 1,
509 is_header: in_header,
510 children: cell_contents,
511 alignment: column_alignments
512 .get(row_columns.len())
513 .copied()
514 .unwrap_or_default(),
515 });
516 }
517 Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
518 self.cursor += 1;
519 let columns = std::mem::take(&mut row_columns);
520 if in_header {
521 header.push(ParsedMarkdownTableRow { columns: columns });
522 in_header = false;
523 } else {
524 body.push(ParsedMarkdownTableRow::with_columns(columns));
525 }
526 }
527 Event::End(TagEnd::Table) => {
528 self.cursor += 1;
529 break;
530 }
531 _ => {
532 break;
533 }
534 }
535 }
536
537 ParsedMarkdownTable {
538 source_range,
539 header,
540 body,
541 caption: None,
542 }
543 }
544
545 fn convert_alignment(alignment: &Alignment) -> ParsedMarkdownTableAlignment {
546 match alignment {
547 Alignment::None => ParsedMarkdownTableAlignment::None,
548 Alignment::Left => ParsedMarkdownTableAlignment::Left,
549 Alignment::Center => ParsedMarkdownTableAlignment::Center,
550 Alignment::Right => ParsedMarkdownTableAlignment::Right,
551 }
552 }
553
554 async fn parse_list(&mut self, order: Option<u64>) -> Vec<ParsedMarkdownElement> {
555 let (_, list_source_range) = self.previous().unwrap();
556
557 let mut items = Vec::new();
558 let mut items_stack = vec![MarkdownListItem::default()];
559 let mut depth = 1;
560 let mut order = order;
561 let mut order_stack = Vec::new();
562
563 let mut insertion_indices = FxHashMap::default();
564 let mut source_ranges = FxHashMap::default();
565 let mut start_item_range = list_source_range.clone();
566
567 while !self.eof() {
568 let (current, source_range) = self.current().unwrap();
569 match current {
570 Event::Start(Tag::List(new_order)) => {
571 if items_stack.last().is_some() && !insertion_indices.contains_key(&depth) {
572 insertion_indices.insert(depth, items.len());
573 }
574
575 // We will use the start of the nested list as the end for the current item's range,
576 // because we don't care about the hierarchy of list items
577 if let collections::hash_map::Entry::Vacant(e) = source_ranges.entry(depth) {
578 e.insert(start_item_range.start..source_range.start);
579 }
580
581 order_stack.push(order);
582 order = *new_order;
583 self.cursor += 1;
584 depth += 1;
585 }
586 Event::End(TagEnd::List(_)) => {
587 order = order_stack.pop().flatten();
588 self.cursor += 1;
589 depth -= 1;
590
591 if depth == 0 {
592 break;
593 }
594 }
595 Event::Start(Tag::Item) => {
596 start_item_range = source_range.clone();
597
598 self.cursor += 1;
599 items_stack.push(MarkdownListItem::default());
600
601 let mut task_list = None;
602 // Check for task list marker (`- [ ]` or `- [x]`)
603 if let Some(event) = self.current_event() {
604 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
605 if event == &Event::Start(Tag::Paragraph) {
606 self.cursor += 1;
607 }
608
609 if let Some((Event::TaskListMarker(checked), range)) = self.current() {
610 task_list = Some((*checked, range.clone()));
611 self.cursor += 1;
612 }
613 }
614
615 if let Some((event, range)) = self.current() {
616 // This is a plain list item.
617 // For example `- some text` or `1. [Docs](./docs.md)`
618 if MarkdownParser::is_text_like(event) {
619 let text = self.parse_text(false, Some(range.clone()));
620 let block = ParsedMarkdownElement::Paragraph(text);
621 if let Some(content) = items_stack.last_mut() {
622 let item_type = if let Some((checked, range)) = task_list {
623 ParsedMarkdownListItemType::Task(checked, range)
624 } else if let Some(order) = order {
625 ParsedMarkdownListItemType::Ordered(order)
626 } else {
627 ParsedMarkdownListItemType::Unordered
628 };
629 content.item_type = item_type;
630 content.content.push(block);
631 }
632 } else {
633 let block = self.parse_block().await;
634 if let Some(block) = block
635 && let Some(list_item) = items_stack.last_mut()
636 {
637 list_item.content.extend(block);
638 }
639 }
640 }
641
642 // If there is a linebreak in between two list items the task list marker will actually be the first element of the paragraph
643 if self.current_event() == Some(&Event::End(TagEnd::Paragraph)) {
644 self.cursor += 1;
645 }
646 }
647 Event::End(TagEnd::Item) => {
648 self.cursor += 1;
649
650 if let Some(current) = order {
651 order = Some(current + 1);
652 }
653
654 if let Some(list_item) = items_stack.pop() {
655 let source_range = source_ranges
656 .remove(&depth)
657 .unwrap_or(start_item_range.clone());
658
659 // We need to remove the last character of the source range, because it includes the newline character
660 let source_range = source_range.start..source_range.end - 1;
661 let item = ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
662 source_range,
663 content: list_item.content,
664 depth,
665 item_type: list_item.item_type,
666 nested: false,
667 });
668
669 if let Some(index) = insertion_indices.get(&depth) {
670 items.insert(*index, item);
671 insertion_indices.remove(&depth);
672 } else {
673 items.push(item);
674 }
675 }
676 }
677 _ => {
678 if depth == 0 {
679 break;
680 }
681 // This can only happen if a list item starts with more then one paragraph,
682 // or the list item contains blocks that should be rendered after the nested list items
683 let block = self.parse_block().await;
684 if let Some(block) = block {
685 if let Some(list_item) = items_stack.last_mut() {
686 // If we did not insert any nested items yet (in this case insertion index is set), we can append the block to the current list item
687 if !insertion_indices.contains_key(&depth) {
688 list_item.content.extend(block);
689 continue;
690 }
691 }
692
693 // Otherwise we need to insert the block after all the nested items
694 // that have been parsed so far
695 items.extend(block);
696 } else {
697 self.cursor += 1;
698 }
699 }
700 }
701 }
702
703 items
704 }
705
706 #[async_recursion]
707 async fn parse_block_quote(&mut self) -> ParsedMarkdownBlockQuote {
708 let (_event, source_range) = self.previous().unwrap();
709 let source_range = source_range.clone();
710 let mut nested_depth = 1;
711
712 let mut children: Vec<ParsedMarkdownElement> = vec![];
713
714 while !self.eof() {
715 let block = self.parse_block().await;
716
717 if let Some(block) = block {
718 children.extend(block);
719 } else {
720 break;
721 }
722
723 if self.eof() {
724 break;
725 }
726
727 let (current, _source_range) = self.current().unwrap();
728 match current {
729 // This is a nested block quote.
730 // Record that we're in a nested block quote and continue parsing.
731 // We don't need to advance the cursor since the next
732 // call to `parse_block` will handle it.
733 Event::Start(Tag::BlockQuote(_kind)) => {
734 nested_depth += 1;
735 }
736 Event::End(TagEnd::BlockQuote(_kind)) => {
737 nested_depth -= 1;
738 if nested_depth == 0 {
739 self.cursor += 1;
740 break;
741 }
742 }
743 _ => {}
744 };
745 }
746
747 ParsedMarkdownBlockQuote {
748 source_range,
749 children,
750 }
751 }
752
753 async fn parse_code_block(
754 &mut self,
755 language: Option<String>,
756 ) -> Option<ParsedMarkdownCodeBlock> {
757 let Some((_event, source_range)) = self.previous() else {
758 return None;
759 };
760
761 let source_range = source_range.clone();
762 let mut code = String::new();
763
764 while !self.eof() {
765 let Some((current, _source_range)) = self.current() else {
766 break;
767 };
768
769 match current {
770 Event::Text(text) => {
771 code.push_str(text);
772 self.cursor += 1;
773 }
774 Event::End(TagEnd::CodeBlock) => {
775 self.cursor += 1;
776 break;
777 }
778 _ => {
779 break;
780 }
781 }
782 }
783
784 code = code.strip_suffix('\n').unwrap_or(&code).to_string();
785
786 let highlights = if let Some(language) = &language {
787 if let Some(registry) = &self.language_registry {
788 let rope: language::Rope = code.as_str().into();
789 registry
790 .language_for_name_or_extension(language)
791 .await
792 .map(|l| l.highlight_text(&rope, 0..code.len()))
793 .ok()
794 } else {
795 None
796 }
797 } else {
798 None
799 };
800
801 Some(ParsedMarkdownCodeBlock {
802 source_range,
803 contents: code.into(),
804 language,
805 highlights,
806 })
807 }
808
809 async fn parse_html_block(&mut self) -> Vec<ParsedMarkdownElement> {
810 let mut elements = Vec::new();
811 let Some((_event, _source_range)) = self.previous() else {
812 return elements;
813 };
814
815 let mut html_source_range_start = None;
816 let mut html_source_range_end = None;
817 let mut html_buffer = String::new();
818
819 while !self.eof() {
820 let Some((current, source_range)) = self.current() else {
821 break;
822 };
823 let source_range = source_range.clone();
824 match current {
825 Event::Html(html) => {
826 html_source_range_start.get_or_insert(source_range.start);
827 html_source_range_end = Some(source_range.end);
828 html_buffer.push_str(html);
829 self.cursor += 1;
830 }
831 Event::End(TagEnd::CodeBlock) => {
832 self.cursor += 1;
833 break;
834 }
835 _ => {
836 break;
837 }
838 }
839 }
840
841 let bytes = cleanup_html(&html_buffer);
842
843 let mut cursor = std::io::Cursor::new(bytes);
844 if let Ok(dom) = parse_document(RcDom::default(), ParseOpts::default())
845 .from_utf8()
846 .read_from(&mut cursor)
847 && let Some((start, end)) = html_source_range_start.zip(html_source_range_end)
848 {
849 self.parse_html_node(
850 start..end,
851 &dom.document,
852 &mut elements,
853 &ParseHtmlNodeContext::default(),
854 );
855 }
856
857 elements
858 }
859
860 fn parse_html_node(
861 &self,
862 source_range: Range<usize>,
863 node: &Rc<markup5ever_rcdom::Node>,
864 elements: &mut Vec<ParsedMarkdownElement>,
865 context: &ParseHtmlNodeContext,
866 ) {
867 match &node.data {
868 markup5ever_rcdom::NodeData::Document => {
869 self.consume_children(source_range, node, elements, context);
870 }
871 markup5ever_rcdom::NodeData::Text { contents } => {
872 elements.push(ParsedMarkdownElement::Paragraph(vec![
873 MarkdownParagraphChunk::Text(ParsedMarkdownText {
874 source_range,
875 regions: Vec::default(),
876 highlights: Vec::default(),
877 contents: contents.borrow().to_string().into(),
878 }),
879 ]));
880 }
881 markup5ever_rcdom::NodeData::Comment { .. } => {}
882 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
883 let mut styles = if let Some(styles) = Self::markdown_style_from_html_styles(
884 Self::extract_styles_from_attributes(attrs),
885 ) {
886 vec![MarkdownHighlight::Style(styles)]
887 } else {
888 Vec::default()
889 };
890
891 if local_name!("img") == name.local {
892 if let Some(image) = self.extract_image(source_range, attrs) {
893 elements.push(ParsedMarkdownElement::Image(image));
894 }
895 } else if local_name!("p") == name.local {
896 let mut paragraph = MarkdownParagraph::new();
897 self.parse_paragraph(
898 source_range,
899 node,
900 &mut paragraph,
901 &mut styles,
902 &mut Vec::new(),
903 );
904
905 if !paragraph.is_empty() {
906 elements.push(ParsedMarkdownElement::Paragraph(paragraph));
907 }
908 } else if matches!(
909 name.local,
910 local_name!("h1")
911 | local_name!("h2")
912 | local_name!("h3")
913 | local_name!("h4")
914 | local_name!("h5")
915 | local_name!("h6")
916 ) {
917 let mut paragraph = MarkdownParagraph::new();
918 self.consume_paragraph(
919 source_range.clone(),
920 node,
921 &mut paragraph,
922 &mut styles,
923 &mut Vec::new(),
924 );
925
926 if !paragraph.is_empty() {
927 elements.push(ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
928 source_range,
929 level: match name.local {
930 local_name!("h1") => HeadingLevel::H1,
931 local_name!("h2") => HeadingLevel::H2,
932 local_name!("h3") => HeadingLevel::H3,
933 local_name!("h4") => HeadingLevel::H4,
934 local_name!("h5") => HeadingLevel::H5,
935 local_name!("h6") => HeadingLevel::H6,
936 _ => unreachable!(),
937 },
938 contents: paragraph,
939 }));
940 }
941 } else if local_name!("ul") == name.local || local_name!("ol") == name.local {
942 if let Some(list_items) = self.extract_html_list(
943 node,
944 local_name!("ol") == name.local,
945 context.list_item_depth,
946 source_range,
947 ) {
948 elements.extend(list_items);
949 }
950 } else if local_name!("blockquote") == name.local {
951 if let Some(blockquote) = self.extract_html_blockquote(node, source_range) {
952 elements.push(ParsedMarkdownElement::BlockQuote(blockquote));
953 }
954 } else if local_name!("table") == name.local {
955 if let Some(table) = self.extract_html_table(node, source_range) {
956 elements.push(ParsedMarkdownElement::Table(table));
957 }
958 } else {
959 self.consume_children(source_range, node, elements, context);
960 }
961 }
962 _ => {}
963 }
964 }
965
966 fn parse_paragraph(
967 &self,
968 source_range: Range<usize>,
969 node: &Rc<markup5ever_rcdom::Node>,
970 paragraph: &mut MarkdownParagraph,
971 highlights: &mut Vec<MarkdownHighlight>,
972 regions: &mut Vec<(Range<usize>, ParsedRegion)>,
973 ) {
974 fn items_with_range<T>(
975 range: Range<usize>,
976 items: impl IntoIterator<Item = T>,
977 ) -> Vec<(Range<usize>, T)> {
978 items
979 .into_iter()
980 .map(|item| (range.clone(), item))
981 .collect()
982 }
983
984 match &node.data {
985 markup5ever_rcdom::NodeData::Text { contents } => {
986 // append the text to the last chunk, so we can have a hacky version
987 // of inline text with highlighting
988 if let Some(text) = paragraph.iter_mut().last().and_then(|p| match p {
989 MarkdownParagraphChunk::Text(text) => Some(text),
990 _ => None,
991 }) {
992 let mut new_text = text.contents.to_string();
993 new_text.push_str(&contents.borrow());
994
995 text.highlights.extend(items_with_range(
996 text.contents.len()..new_text.len(),
997 std::mem::take(highlights),
998 ));
999 text.regions.extend(items_with_range(
1000 text.contents.len()..new_text.len(),
1001 std::mem::take(regions)
1002 .into_iter()
1003 .map(|(_, region)| region),
1004 ));
1005 text.contents = SharedString::from(new_text);
1006 } else {
1007 let contents = contents.borrow().to_string();
1008 paragraph.push(MarkdownParagraphChunk::Text(ParsedMarkdownText {
1009 source_range,
1010 highlights: items_with_range(0..contents.len(), std::mem::take(highlights)),
1011 regions: items_with_range(
1012 0..contents.len(),
1013 std::mem::take(regions)
1014 .into_iter()
1015 .map(|(_, region)| region),
1016 ),
1017 contents: contents.into(),
1018 }));
1019 }
1020 }
1021 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
1022 if local_name!("img") == name.local {
1023 if let Some(image) = self.extract_image(source_range, attrs) {
1024 paragraph.push(MarkdownParagraphChunk::Image(image));
1025 }
1026 } else if local_name!("b") == name.local || local_name!("strong") == name.local {
1027 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1028 weight: FontWeight::BOLD,
1029 ..Default::default()
1030 }));
1031
1032 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1033 } else if local_name!("i") == name.local {
1034 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1035 italic: true,
1036 ..Default::default()
1037 }));
1038
1039 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1040 } else if local_name!("em") == name.local {
1041 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1042 oblique: true,
1043 ..Default::default()
1044 }));
1045
1046 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1047 } else if local_name!("del") == name.local {
1048 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1049 strikethrough: true,
1050 ..Default::default()
1051 }));
1052
1053 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1054 } else if local_name!("ins") == name.local {
1055 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1056 underline: true,
1057 ..Default::default()
1058 }));
1059
1060 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1061 } else if local_name!("a") == name.local {
1062 if let Some(url) = Self::attr_value(attrs, local_name!("href"))
1063 && let Some(link) =
1064 Link::identify(self.file_location_directory.clone(), url)
1065 {
1066 highlights.push(MarkdownHighlight::Style(MarkdownHighlightStyle {
1067 link: true,
1068 ..Default::default()
1069 }));
1070
1071 regions.push((
1072 source_range.clone(),
1073 ParsedRegion {
1074 code: false,
1075 link: Some(link),
1076 },
1077 ));
1078 }
1079
1080 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1081 } else {
1082 self.consume_paragraph(source_range, node, paragraph, highlights, regions);
1083 }
1084 }
1085 _ => {}
1086 }
1087 }
1088
1089 fn consume_paragraph(
1090 &self,
1091 source_range: Range<usize>,
1092 node: &Rc<markup5ever_rcdom::Node>,
1093 paragraph: &mut MarkdownParagraph,
1094 highlights: &mut Vec<MarkdownHighlight>,
1095 regions: &mut Vec<(Range<usize>, ParsedRegion)>,
1096 ) {
1097 for node in node.children.borrow().iter() {
1098 self.parse_paragraph(source_range.clone(), node, paragraph, highlights, regions);
1099 }
1100 }
1101
1102 fn parse_table_row(
1103 &self,
1104 source_range: Range<usize>,
1105 node: &Rc<markup5ever_rcdom::Node>,
1106 ) -> Option<ParsedMarkdownTableRow> {
1107 let mut columns = Vec::new();
1108
1109 match &node.data {
1110 markup5ever_rcdom::NodeData::Element { name, .. } => {
1111 if local_name!("tr") != name.local {
1112 return None;
1113 }
1114
1115 for node in node.children.borrow().iter() {
1116 if let Some(column) = self.parse_table_column(source_range.clone(), node) {
1117 columns.push(column);
1118 }
1119 }
1120 }
1121 _ => {}
1122 }
1123
1124 if columns.is_empty() {
1125 None
1126 } else {
1127 Some(ParsedMarkdownTableRow { columns })
1128 }
1129 }
1130
1131 fn parse_table_column(
1132 &self,
1133 source_range: Range<usize>,
1134 node: &Rc<markup5ever_rcdom::Node>,
1135 ) -> Option<ParsedMarkdownTableColumn> {
1136 match &node.data {
1137 markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
1138 if !matches!(name.local, local_name!("th") | local_name!("td")) {
1139 return None;
1140 }
1141
1142 let mut children = MarkdownParagraph::new();
1143 self.consume_paragraph(
1144 source_range,
1145 node,
1146 &mut children,
1147 &mut Vec::new(),
1148 &mut Vec::new(),
1149 );
1150
1151 let is_header = matches!(name.local, local_name!("th"));
1152
1153 Some(ParsedMarkdownTableColumn {
1154 col_span: std::cmp::max(
1155 Self::attr_value(attrs, local_name!("colspan"))
1156 .and_then(|span| span.parse().ok())
1157 .unwrap_or(1),
1158 1,
1159 ),
1160 row_span: std::cmp::max(
1161 Self::attr_value(attrs, local_name!("rowspan"))
1162 .and_then(|span| span.parse().ok())
1163 .unwrap_or(1),
1164 1,
1165 ),
1166 is_header,
1167 children,
1168 alignment: Self::attr_value(attrs, local_name!("align"))
1169 .and_then(|align| match align.as_str() {
1170 "left" => Some(ParsedMarkdownTableAlignment::Left),
1171 "center" => Some(ParsedMarkdownTableAlignment::Center),
1172 "right" => Some(ParsedMarkdownTableAlignment::Right),
1173 _ => None,
1174 })
1175 .unwrap_or_else(|| {
1176 if is_header {
1177 ParsedMarkdownTableAlignment::Center
1178 } else {
1179 ParsedMarkdownTableAlignment::default()
1180 }
1181 }),
1182 })
1183 }
1184 _ => None,
1185 }
1186 }
1187
1188 fn consume_children(
1189 &self,
1190 source_range: Range<usize>,
1191 node: &Rc<markup5ever_rcdom::Node>,
1192 elements: &mut Vec<ParsedMarkdownElement>,
1193 context: &ParseHtmlNodeContext,
1194 ) {
1195 for node in node.children.borrow().iter() {
1196 self.parse_html_node(source_range.clone(), node, elements, context);
1197 }
1198 }
1199
1200 fn attr_value(
1201 attrs: &RefCell<Vec<html5ever::Attribute>>,
1202 name: html5ever::LocalName,
1203 ) -> Option<String> {
1204 attrs.borrow().iter().find_map(|attr| {
1205 if attr.name.local == name {
1206 Some(attr.value.to_string())
1207 } else {
1208 None
1209 }
1210 })
1211 }
1212
1213 fn markdown_style_from_html_styles(
1214 styles: HashMap<String, String>,
1215 ) -> Option<MarkdownHighlightStyle> {
1216 let mut markdown_style = MarkdownHighlightStyle::default();
1217
1218 if let Some(text_decoration) = styles.get("text-decoration") {
1219 match text_decoration.to_lowercase().as_str() {
1220 "underline" => {
1221 markdown_style.underline = true;
1222 }
1223 "line-through" => {
1224 markdown_style.strikethrough = true;
1225 }
1226 _ => {}
1227 }
1228 }
1229
1230 if let Some(font_style) = styles.get("font-style") {
1231 match font_style.to_lowercase().as_str() {
1232 "italic" => {
1233 markdown_style.italic = true;
1234 }
1235 "oblique" => {
1236 markdown_style.oblique = true;
1237 }
1238 _ => {}
1239 }
1240 }
1241
1242 if let Some(font_weight) = styles.get("font-weight") {
1243 match font_weight.to_lowercase().as_str() {
1244 "bold" => {
1245 markdown_style.weight = FontWeight::BOLD;
1246 }
1247 "lighter" => {
1248 markdown_style.weight = FontWeight::THIN;
1249 }
1250 _ => {
1251 if let Some(weight) = font_weight.parse::<f32>().ok() {
1252 markdown_style.weight = FontWeight(weight);
1253 }
1254 }
1255 }
1256 }
1257
1258 if markdown_style != MarkdownHighlightStyle::default() {
1259 Some(markdown_style)
1260 } else {
1261 None
1262 }
1263 }
1264
1265 fn extract_styles_from_attributes(
1266 attrs: &RefCell<Vec<html5ever::Attribute>>,
1267 ) -> HashMap<String, String> {
1268 let mut styles = HashMap::new();
1269
1270 if let Some(style) = Self::attr_value(attrs, local_name!("style")) {
1271 for decl in style.split(';') {
1272 let mut parts = decl.splitn(2, ':');
1273 if let Some((key, value)) = parts.next().zip(parts.next()) {
1274 styles.insert(
1275 key.trim().to_lowercase().to_string(),
1276 value.trim().to_string(),
1277 );
1278 }
1279 }
1280 }
1281
1282 styles
1283 }
1284
1285 fn extract_image(
1286 &self,
1287 source_range: Range<usize>,
1288 attrs: &RefCell<Vec<html5ever::Attribute>>,
1289 ) -> Option<Image> {
1290 let src = Self::attr_value(attrs, local_name!("src"))?;
1291
1292 let mut image = Image::identify(src, source_range, self.file_location_directory.clone())?;
1293
1294 if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
1295 image.set_alt_text(alt.into());
1296 }
1297
1298 let styles = Self::extract_styles_from_attributes(attrs);
1299
1300 if let Some(width) = Self::attr_value(attrs, local_name!("width"))
1301 .or_else(|| styles.get("width").cloned())
1302 .and_then(|width| Self::parse_html_element_dimension(&width))
1303 {
1304 image.set_width(width);
1305 }
1306
1307 if let Some(height) = Self::attr_value(attrs, local_name!("height"))
1308 .or_else(|| styles.get("height").cloned())
1309 .and_then(|height| Self::parse_html_element_dimension(&height))
1310 {
1311 image.set_height(height);
1312 }
1313
1314 Some(image)
1315 }
1316
1317 fn extract_html_list(
1318 &self,
1319 node: &Rc<markup5ever_rcdom::Node>,
1320 ordered: bool,
1321 depth: u16,
1322 source_range: Range<usize>,
1323 ) -> Option<Vec<ParsedMarkdownElement>> {
1324 let mut list_items = Vec::with_capacity(node.children.borrow().len());
1325
1326 for (index, node) in node.children.borrow().iter().enumerate() {
1327 match &node.data {
1328 markup5ever_rcdom::NodeData::Element { name, .. } => {
1329 if local_name!("li") != name.local {
1330 continue;
1331 }
1332
1333 let mut content = Vec::new();
1334 self.consume_children(
1335 source_range.clone(),
1336 node,
1337 &mut content,
1338 &ParseHtmlNodeContext {
1339 list_item_depth: depth + 1,
1340 },
1341 );
1342
1343 if !content.is_empty() {
1344 list_items.push(ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
1345 depth,
1346 source_range: source_range.clone(),
1347 item_type: if ordered {
1348 ParsedMarkdownListItemType::Ordered(index as u64 + 1)
1349 } else {
1350 ParsedMarkdownListItemType::Unordered
1351 },
1352 content,
1353 nested: true,
1354 }));
1355 }
1356 }
1357 _ => {}
1358 }
1359 }
1360
1361 if list_items.is_empty() {
1362 None
1363 } else {
1364 Some(list_items)
1365 }
1366 }
1367
1368 fn parse_html_element_dimension(value: &str) -> Option<DefiniteLength> {
1369 if value.ends_with("%") {
1370 value
1371 .trim_end_matches("%")
1372 .parse::<f32>()
1373 .ok()
1374 .map(|value| relative(value / 100.))
1375 } else {
1376 value
1377 .trim_end_matches("px")
1378 .parse()
1379 .ok()
1380 .map(|value| px(value).into())
1381 }
1382 }
1383
1384 fn extract_html_blockquote(
1385 &self,
1386 node: &Rc<markup5ever_rcdom::Node>,
1387 source_range: Range<usize>,
1388 ) -> Option<ParsedMarkdownBlockQuote> {
1389 let mut children = Vec::new();
1390 self.consume_children(
1391 source_range.clone(),
1392 node,
1393 &mut children,
1394 &ParseHtmlNodeContext::default(),
1395 );
1396
1397 if children.is_empty() {
1398 None
1399 } else {
1400 Some(ParsedMarkdownBlockQuote {
1401 children,
1402 source_range,
1403 })
1404 }
1405 }
1406
1407 fn extract_html_table(
1408 &self,
1409 node: &Rc<markup5ever_rcdom::Node>,
1410 source_range: Range<usize>,
1411 ) -> Option<ParsedMarkdownTable> {
1412 let mut header_rows = Vec::new();
1413 let mut body_rows = Vec::new();
1414 let mut caption = None;
1415
1416 // node should be a thead, tbody or caption element
1417 for node in node.children.borrow().iter() {
1418 match &node.data {
1419 markup5ever_rcdom::NodeData::Element { name, .. } => {
1420 if local_name!("caption") == name.local {
1421 let mut paragraph = MarkdownParagraph::new();
1422 self.parse_paragraph(
1423 source_range.clone(),
1424 node,
1425 &mut paragraph,
1426 &mut Vec::new(),
1427 &mut Vec::new(),
1428 );
1429 caption = Some(paragraph);
1430 }
1431 if local_name!("thead") == name.local {
1432 // node should be a tr element
1433 for node in node.children.borrow().iter() {
1434 if let Some(row) = self.parse_table_row(source_range.clone(), node) {
1435 header_rows.push(row);
1436 }
1437 }
1438 } else if local_name!("tbody") == name.local {
1439 // node should be a tr element
1440 for node in node.children.borrow().iter() {
1441 if let Some(row) = self.parse_table_row(source_range.clone(), node) {
1442 body_rows.push(row);
1443 }
1444 }
1445 }
1446 }
1447 _ => {}
1448 }
1449 }
1450
1451 if !header_rows.is_empty() || !body_rows.is_empty() {
1452 Some(ParsedMarkdownTable {
1453 source_range,
1454 body: body_rows,
1455 header: header_rows,
1456 caption,
1457 })
1458 } else {
1459 None
1460 }
1461 }
1462}
1463
1464#[cfg(test)]
1465mod tests {
1466 use super::*;
1467 use ParsedMarkdownListItemType::*;
1468 use core::panic;
1469 use gpui::{AbsoluteLength, BackgroundExecutor, DefiniteLength};
1470 use language::{
1471 HighlightId, Language, LanguageConfig, LanguageMatcher, LanguageRegistry, tree_sitter_rust,
1472 };
1473 use pretty_assertions::assert_eq;
1474
1475 async fn parse(input: &str) -> ParsedMarkdown {
1476 parse_markdown(input, None, None).await
1477 }
1478
1479 #[gpui::test]
1480 async fn test_headings() {
1481 let parsed = parse("# Heading one\n## Heading two\n### Heading three").await;
1482
1483 assert_eq!(
1484 parsed.children,
1485 vec![
1486 h1(text("Heading one", 2..13), 0..14),
1487 h2(text("Heading two", 17..28), 14..29),
1488 h3(text("Heading three", 33..46), 29..46),
1489 ]
1490 );
1491 }
1492
1493 #[gpui::test]
1494 async fn test_newlines_dont_new_paragraphs() {
1495 let parsed = parse("Some text **that is bolded**\n and *italicized*").await;
1496
1497 assert_eq!(
1498 parsed.children,
1499 vec![p("Some text that is bolded and italicized", 0..46)]
1500 );
1501 }
1502
1503 #[gpui::test]
1504 async fn test_heading_with_paragraph() {
1505 let parsed = parse("# Zed\nThe editor").await;
1506
1507 assert_eq!(
1508 parsed.children,
1509 vec![h1(text("Zed", 2..5), 0..6), p("The editor", 6..16),]
1510 );
1511 }
1512
1513 #[gpui::test]
1514 async fn test_double_newlines_do_new_paragraphs() {
1515 let parsed = parse("Some text **that is bolded**\n\n and *italicized*").await;
1516
1517 assert_eq!(
1518 parsed.children,
1519 vec![
1520 p("Some text that is bolded", 0..29),
1521 p("and italicized", 31..47),
1522 ]
1523 );
1524 }
1525
1526 #[gpui::test]
1527 async fn test_bold_italic_text() {
1528 let parsed = parse("Some text **that is bolded** and *italicized*").await;
1529
1530 assert_eq!(
1531 parsed.children,
1532 vec![p("Some text that is bolded and italicized", 0..45)]
1533 );
1534 }
1535
1536 #[gpui::test]
1537 async fn test_nested_bold_strikethrough_text() {
1538 let parsed = parse("Some **bo~~strikethrough~~ld** text").await;
1539
1540 assert_eq!(parsed.children.len(), 1);
1541 assert_eq!(
1542 parsed.children[0],
1543 ParsedMarkdownElement::Paragraph(vec![MarkdownParagraphChunk::Text(
1544 ParsedMarkdownText {
1545 source_range: 0..35,
1546 contents: "Some bostrikethroughld text".into(),
1547 highlights: Vec::new(),
1548 regions: Vec::new(),
1549 }
1550 )])
1551 );
1552
1553 let new_text = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1554 text
1555 } else {
1556 panic!("Expected a paragraph");
1557 };
1558
1559 let paragraph = if let MarkdownParagraphChunk::Text(text) = &new_text[0] {
1560 text
1561 } else {
1562 panic!("Expected a text");
1563 };
1564
1565 assert_eq!(
1566 paragraph.highlights,
1567 vec![
1568 (
1569 5..7,
1570 MarkdownHighlight::Style(MarkdownHighlightStyle {
1571 weight: FontWeight::BOLD,
1572 ..Default::default()
1573 }),
1574 ),
1575 (
1576 7..20,
1577 MarkdownHighlight::Style(MarkdownHighlightStyle {
1578 weight: FontWeight::BOLD,
1579 strikethrough: true,
1580 ..Default::default()
1581 }),
1582 ),
1583 (
1584 20..22,
1585 MarkdownHighlight::Style(MarkdownHighlightStyle {
1586 weight: FontWeight::BOLD,
1587 ..Default::default()
1588 }),
1589 ),
1590 ]
1591 );
1592 }
1593
1594 #[gpui::test]
1595 async fn test_html_inline_style_elements() {
1596 let parsed =
1597 parse("<p>Some text <strong>strong text</strong> more text <b>bold text</b> more text <i>italic text</i> more text <em>emphasized text</em> more text <del>deleted text</del> more text <ins>inserted text</ins></p>").await;
1598
1599 assert_eq!(1, parsed.children.len());
1600 let chunks = if let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] {
1601 chunks
1602 } else {
1603 panic!("Expected a paragraph");
1604 };
1605
1606 assert_eq!(1, chunks.len());
1607 let text = if let MarkdownParagraphChunk::Text(text) = &chunks[0] {
1608 text
1609 } else {
1610 panic!("Expected a paragraph");
1611 };
1612
1613 assert_eq!(0..205, text.source_range);
1614 assert_eq!(
1615 "Some text strong text more text bold text more text italic text more text emphasized text more text deleted text more text inserted text",
1616 text.contents.as_str(),
1617 );
1618 assert_eq!(
1619 vec![
1620 (
1621 10..21,
1622 MarkdownHighlight::Style(MarkdownHighlightStyle {
1623 weight: FontWeight(700.0),
1624 ..Default::default()
1625 },),
1626 ),
1627 (
1628 32..41,
1629 MarkdownHighlight::Style(MarkdownHighlightStyle {
1630 weight: FontWeight(700.0),
1631 ..Default::default()
1632 },),
1633 ),
1634 (
1635 52..63,
1636 MarkdownHighlight::Style(MarkdownHighlightStyle {
1637 italic: true,
1638 weight: FontWeight(400.0),
1639 ..Default::default()
1640 },),
1641 ),
1642 (
1643 74..89,
1644 MarkdownHighlight::Style(MarkdownHighlightStyle {
1645 weight: FontWeight(400.0),
1646 oblique: true,
1647 ..Default::default()
1648 },),
1649 ),
1650 (
1651 100..112,
1652 MarkdownHighlight::Style(MarkdownHighlightStyle {
1653 strikethrough: true,
1654 weight: FontWeight(400.0),
1655 ..Default::default()
1656 },),
1657 ),
1658 (
1659 123..136,
1660 MarkdownHighlight::Style(MarkdownHighlightStyle {
1661 underline: true,
1662 weight: FontWeight(400.0,),
1663 ..Default::default()
1664 },),
1665 ),
1666 ],
1667 text.highlights
1668 );
1669 }
1670
1671 #[gpui::test]
1672 async fn test_html_href_element() {
1673 let parsed =
1674 parse("<p>Some text <a href=\"https://example.com\">link</a> more text</p>").await;
1675
1676 assert_eq!(1, parsed.children.len());
1677 let chunks = if let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] {
1678 chunks
1679 } else {
1680 panic!("Expected a paragraph");
1681 };
1682
1683 assert_eq!(1, chunks.len());
1684 let text = if let MarkdownParagraphChunk::Text(text) = &chunks[0] {
1685 text
1686 } else {
1687 panic!("Expected a paragraph");
1688 };
1689
1690 assert_eq!(0..65, text.source_range);
1691 assert_eq!("Some text link more text", text.contents.as_str(),);
1692 assert_eq!(
1693 vec![(
1694 10..14,
1695 MarkdownHighlight::Style(MarkdownHighlightStyle {
1696 link: true,
1697 ..Default::default()
1698 },),
1699 )],
1700 text.highlights
1701 );
1702 assert_eq!(
1703 vec![(
1704 10..14,
1705 ParsedRegion {
1706 code: false,
1707 link: Some(Link::Web {
1708 url: "https://example.com".into()
1709 })
1710 }
1711 )],
1712 text.regions
1713 )
1714 }
1715
1716 #[gpui::test]
1717 async fn test_text_with_inline_html() {
1718 let parsed = parse("This is a paragraph with an inline HTML <sometag>tag</sometag>.").await;
1719
1720 assert_eq!(
1721 parsed.children,
1722 vec![p("This is a paragraph with an inline HTML tag.", 0..63),],
1723 );
1724 }
1725
1726 #[gpui::test]
1727 async fn test_raw_links_detection() {
1728 let parsed = parse("Checkout this https://zed.dev link").await;
1729
1730 assert_eq!(
1731 parsed.children,
1732 vec![p("Checkout this https://zed.dev link", 0..34)]
1733 );
1734 }
1735
1736 #[gpui::test]
1737 async fn test_empty_image() {
1738 let parsed = parse("![]()").await;
1739
1740 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1741 text
1742 } else {
1743 panic!("Expected a paragraph");
1744 };
1745 assert_eq!(paragraph.len(), 0);
1746 }
1747
1748 #[gpui::test]
1749 async fn test_image_links_detection() {
1750 let parsed = parse("").await;
1751
1752 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1753 text
1754 } else {
1755 panic!("Expected a paragraph");
1756 };
1757 assert_eq!(
1758 paragraph[0],
1759 MarkdownParagraphChunk::Image(Image {
1760 source_range: 0..111,
1761 link: Link::Web {
1762 url: "https://blog.logrocket.com/wp-content/uploads/2024/04/exploring-zed-open-source-code-editor-rust-2.png".to_string(),
1763 },
1764 alt_text: Some("test".into()),
1765 height: None,
1766 width: None,
1767 },)
1768 );
1769 }
1770
1771 #[gpui::test]
1772 async fn test_image_alt_text() {
1773 let parsed = parse("[](https://zed.dev)\n ").await;
1774
1775 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1776 text
1777 } else {
1778 panic!("Expected a paragraph");
1779 };
1780 assert_eq!(
1781 paragraph[0],
1782 MarkdownParagraphChunk::Image(Image {
1783 source_range: 0..142,
1784 link: Link::Web {
1785 url: "https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/zed-industries/zed/main/assets/badge/v0.json".to_string(),
1786 },
1787 alt_text: Some("Zed".into()),
1788 height: None,
1789 width: None,
1790 },)
1791 );
1792 }
1793
1794 #[gpui::test]
1795 async fn test_image_without_alt_text() {
1796 let parsed = parse("").await;
1797
1798 let paragraph = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1799 text
1800 } else {
1801 panic!("Expected a paragraph");
1802 };
1803 assert_eq!(
1804 paragraph[0],
1805 MarkdownParagraphChunk::Image(Image {
1806 source_range: 0..31,
1807 link: Link::Web {
1808 url: "http://example.com/foo.png".to_string(),
1809 },
1810 alt_text: None,
1811 height: None,
1812 width: None,
1813 },)
1814 );
1815 }
1816
1817 #[gpui::test]
1818 async fn test_image_with_alt_text_containing_formatting() {
1819 let parsed = parse("").await;
1820
1821 let ParsedMarkdownElement::Paragraph(chunks) = &parsed.children[0] else {
1822 panic!("Expected a paragraph");
1823 };
1824 assert_eq!(
1825 chunks,
1826 &[MarkdownParagraphChunk::Image(Image {
1827 source_range: 0..44,
1828 link: Link::Web {
1829 url: "http://example.com/foo.png".to_string(),
1830 },
1831 alt_text: Some("foo bar baz".into()),
1832 height: None,
1833 width: None,
1834 }),],
1835 );
1836 }
1837
1838 #[gpui::test]
1839 async fn test_images_with_text_in_between() {
1840 let parsed = parse(
1841 "\nLorem Ipsum\n",
1842 )
1843 .await;
1844
1845 let chunks = if let ParsedMarkdownElement::Paragraph(text) = &parsed.children[0] {
1846 text
1847 } else {
1848 panic!("Expected a paragraph");
1849 };
1850 assert_eq!(
1851 chunks,
1852 &vec![
1853 MarkdownParagraphChunk::Image(Image {
1854 source_range: 0..81,
1855 link: Link::Web {
1856 url: "http://example.com/foo.png".to_string(),
1857 },
1858 alt_text: Some("foo".into()),
1859 height: None,
1860 width: None,
1861 }),
1862 MarkdownParagraphChunk::Text(ParsedMarkdownText {
1863 source_range: 0..81,
1864 contents: " Lorem Ipsum ".into(),
1865 highlights: Vec::new(),
1866 regions: Vec::new(),
1867 }),
1868 MarkdownParagraphChunk::Image(Image {
1869 source_range: 0..81,
1870 link: Link::Web {
1871 url: "http://example.com/bar.png".to_string(),
1872 },
1873 alt_text: Some("bar".into()),
1874 height: None,
1875 width: None,
1876 })
1877 ]
1878 );
1879 }
1880
1881 #[test]
1882 fn test_parse_html_element_dimension() {
1883 // Test percentage values
1884 assert_eq!(
1885 MarkdownParser::parse_html_element_dimension("50%"),
1886 Some(DefiniteLength::Fraction(0.5))
1887 );
1888 assert_eq!(
1889 MarkdownParser::parse_html_element_dimension("100%"),
1890 Some(DefiniteLength::Fraction(1.0))
1891 );
1892 assert_eq!(
1893 MarkdownParser::parse_html_element_dimension("25%"),
1894 Some(DefiniteLength::Fraction(0.25))
1895 );
1896 assert_eq!(
1897 MarkdownParser::parse_html_element_dimension("0%"),
1898 Some(DefiniteLength::Fraction(0.0))
1899 );
1900
1901 // Test pixel values
1902 assert_eq!(
1903 MarkdownParser::parse_html_element_dimension("100px"),
1904 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1905 );
1906 assert_eq!(
1907 MarkdownParser::parse_html_element_dimension("50px"),
1908 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(50.0))))
1909 );
1910 assert_eq!(
1911 MarkdownParser::parse_html_element_dimension("0px"),
1912 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(0.0))))
1913 );
1914
1915 // Test values without units (should be treated as pixels)
1916 assert_eq!(
1917 MarkdownParser::parse_html_element_dimension("100"),
1918 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.0))))
1919 );
1920 assert_eq!(
1921 MarkdownParser::parse_html_element_dimension("42"),
1922 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1923 );
1924
1925 // Test invalid values
1926 assert_eq!(
1927 MarkdownParser::parse_html_element_dimension("invalid"),
1928 None
1929 );
1930 assert_eq!(MarkdownParser::parse_html_element_dimension("px"), None);
1931 assert_eq!(MarkdownParser::parse_html_element_dimension("%"), None);
1932 assert_eq!(MarkdownParser::parse_html_element_dimension(""), None);
1933 assert_eq!(MarkdownParser::parse_html_element_dimension("abc%"), None);
1934 assert_eq!(MarkdownParser::parse_html_element_dimension("abcpx"), None);
1935
1936 // Test decimal values
1937 assert_eq!(
1938 MarkdownParser::parse_html_element_dimension("50.5%"),
1939 Some(DefiniteLength::Fraction(0.505))
1940 );
1941 assert_eq!(
1942 MarkdownParser::parse_html_element_dimension("100.25px"),
1943 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.25))))
1944 );
1945 assert_eq!(
1946 MarkdownParser::parse_html_element_dimension("42.0"),
1947 Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(42.0))))
1948 );
1949 }
1950
1951 #[gpui::test]
1952 async fn test_html_unordered_list() {
1953 let parsed = parse(
1954 "<ul>
1955 <li>Item 1</li>
1956 <li>Item 2</li>
1957 </ul>",
1958 )
1959 .await;
1960
1961 assert_eq!(
1962 ParsedMarkdown {
1963 children: vec![
1964 nested_list_item(
1965 0..82,
1966 1,
1967 ParsedMarkdownListItemType::Unordered,
1968 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..82))]
1969 ),
1970 nested_list_item(
1971 0..82,
1972 1,
1973 ParsedMarkdownListItemType::Unordered,
1974 vec![ParsedMarkdownElement::Paragraph(text("Item 2", 0..82))]
1975 ),
1976 ]
1977 },
1978 parsed
1979 );
1980 }
1981
1982 #[gpui::test]
1983 async fn test_html_ordered_list() {
1984 let parsed = parse(
1985 "<ol>
1986 <li>Item 1</li>
1987 <li>Item 2</li>
1988 </ol>",
1989 )
1990 .await;
1991
1992 assert_eq!(
1993 ParsedMarkdown {
1994 children: vec![
1995 nested_list_item(
1996 0..82,
1997 1,
1998 ParsedMarkdownListItemType::Ordered(1),
1999 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..82))]
2000 ),
2001 nested_list_item(
2002 0..82,
2003 1,
2004 ParsedMarkdownListItemType::Ordered(2),
2005 vec![ParsedMarkdownElement::Paragraph(text("Item 2", 0..82))]
2006 ),
2007 ]
2008 },
2009 parsed
2010 );
2011 }
2012
2013 #[gpui::test]
2014 async fn test_html_nested_ordered_list() {
2015 let parsed = parse(
2016 "<ol>
2017 <li>Item 1</li>
2018 <li>Item 2
2019 <ol>
2020 <li>Sub-Item 1</li>
2021 <li>Sub-Item 2</li>
2022 </ol>
2023 </li>
2024 </ol>",
2025 )
2026 .await;
2027
2028 assert_eq!(
2029 ParsedMarkdown {
2030 children: vec![
2031 nested_list_item(
2032 0..216,
2033 1,
2034 ParsedMarkdownListItemType::Ordered(1),
2035 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..216))]
2036 ),
2037 nested_list_item(
2038 0..216,
2039 1,
2040 ParsedMarkdownListItemType::Ordered(2),
2041 vec![
2042 ParsedMarkdownElement::Paragraph(text("Item 2", 0..216)),
2043 nested_list_item(
2044 0..216,
2045 2,
2046 ParsedMarkdownListItemType::Ordered(1),
2047 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 1", 0..216))]
2048 ),
2049 nested_list_item(
2050 0..216,
2051 2,
2052 ParsedMarkdownListItemType::Ordered(2),
2053 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 2", 0..216))]
2054 ),
2055 ]
2056 ),
2057 ]
2058 },
2059 parsed
2060 );
2061 }
2062
2063 #[gpui::test]
2064 async fn test_html_nested_unordered_list() {
2065 let parsed = parse(
2066 "<ul>
2067 <li>Item 1</li>
2068 <li>Item 2
2069 <ul>
2070 <li>Sub-Item 1</li>
2071 <li>Sub-Item 2</li>
2072 </ul>
2073 </li>
2074 </ul>",
2075 )
2076 .await;
2077
2078 assert_eq!(
2079 ParsedMarkdown {
2080 children: vec![
2081 nested_list_item(
2082 0..216,
2083 1,
2084 ParsedMarkdownListItemType::Unordered,
2085 vec![ParsedMarkdownElement::Paragraph(text("Item 1", 0..216))]
2086 ),
2087 nested_list_item(
2088 0..216,
2089 1,
2090 ParsedMarkdownListItemType::Unordered,
2091 vec![
2092 ParsedMarkdownElement::Paragraph(text("Item 2", 0..216)),
2093 nested_list_item(
2094 0..216,
2095 2,
2096 ParsedMarkdownListItemType::Unordered,
2097 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 1", 0..216))]
2098 ),
2099 nested_list_item(
2100 0..216,
2101 2,
2102 ParsedMarkdownListItemType::Unordered,
2103 vec![ParsedMarkdownElement::Paragraph(text("Sub-Item 2", 0..216))]
2104 ),
2105 ]
2106 ),
2107 ]
2108 },
2109 parsed
2110 );
2111 }
2112
2113 #[gpui::test]
2114 async fn test_inline_html_image_tag() {
2115 let parsed =
2116 parse("<p>Some text<img src=\"http://example.com/foo.png\" /> some more text</p>")
2117 .await;
2118
2119 assert_eq!(
2120 ParsedMarkdown {
2121 children: vec![ParsedMarkdownElement::Paragraph(vec![
2122 MarkdownParagraphChunk::Text(ParsedMarkdownText {
2123 source_range: 0..71,
2124 contents: "Some text".into(),
2125 highlights: Default::default(),
2126 regions: Default::default()
2127 }),
2128 MarkdownParagraphChunk::Image(Image {
2129 source_range: 0..71,
2130 link: Link::Web {
2131 url: "http://example.com/foo.png".to_string(),
2132 },
2133 alt_text: None,
2134 height: None,
2135 width: None,
2136 }),
2137 MarkdownParagraphChunk::Text(ParsedMarkdownText {
2138 source_range: 0..71,
2139 contents: " some more text".into(),
2140 highlights: Default::default(),
2141 regions: Default::default()
2142 }),
2143 ])]
2144 },
2145 parsed
2146 );
2147 }
2148
2149 #[gpui::test]
2150 async fn test_html_block_quote() {
2151 let parsed = parse(
2152 "<blockquote>
2153 <p>some description</p>
2154 </blockquote>",
2155 )
2156 .await;
2157
2158 assert_eq!(
2159 ParsedMarkdown {
2160 children: vec![block_quote(
2161 vec![ParsedMarkdownElement::Paragraph(text(
2162 "some description",
2163 0..78
2164 ))],
2165 0..78,
2166 )]
2167 },
2168 parsed
2169 );
2170 }
2171
2172 #[gpui::test]
2173 async fn test_html_nested_block_quote() {
2174 let parsed = parse(
2175 "<blockquote>
2176 <p>some description</p>
2177 <blockquote>
2178 <p>second description</p>
2179 </blockquote>
2180 </blockquote>",
2181 )
2182 .await;
2183
2184 assert_eq!(
2185 ParsedMarkdown {
2186 children: vec![block_quote(
2187 vec![
2188 ParsedMarkdownElement::Paragraph(text("some description", 0..179)),
2189 block_quote(
2190 vec![ParsedMarkdownElement::Paragraph(text(
2191 "second description",
2192 0..179
2193 ))],
2194 0..179,
2195 )
2196 ],
2197 0..179,
2198 )]
2199 },
2200 parsed
2201 );
2202 }
2203
2204 #[gpui::test]
2205 async fn test_html_table() {
2206 let parsed = parse(
2207 "<table>
2208 <thead>
2209 <tr>
2210 <th>Id</th>
2211 <th>Name</th>
2212 </tr>
2213 </thead>
2214 <tbody>
2215 <tr>
2216 <td>1</td>
2217 <td>Chris</td>
2218 </tr>
2219 <tr>
2220 <td>2</td>
2221 <td>Dennis</td>
2222 </tr>
2223 </tbody>
2224 </table>",
2225 )
2226 .await;
2227
2228 assert_eq!(
2229 ParsedMarkdown {
2230 children: vec![ParsedMarkdownElement::Table(table(
2231 0..366,
2232 None,
2233 vec![row(vec![
2234 column(
2235 1,
2236 1,
2237 true,
2238 text("Id", 0..366),
2239 ParsedMarkdownTableAlignment::Center
2240 ),
2241 column(
2242 1,
2243 1,
2244 true,
2245 text("Name ", 0..366),
2246 ParsedMarkdownTableAlignment::Center
2247 )
2248 ])],
2249 vec![
2250 row(vec![
2251 column(
2252 1,
2253 1,
2254 false,
2255 text("1", 0..366),
2256 ParsedMarkdownTableAlignment::None
2257 ),
2258 column(
2259 1,
2260 1,
2261 false,
2262 text("Chris", 0..366),
2263 ParsedMarkdownTableAlignment::None
2264 )
2265 ]),
2266 row(vec![
2267 column(
2268 1,
2269 1,
2270 false,
2271 text("2", 0..366),
2272 ParsedMarkdownTableAlignment::None
2273 ),
2274 column(
2275 1,
2276 1,
2277 false,
2278 text("Dennis", 0..366),
2279 ParsedMarkdownTableAlignment::None
2280 )
2281 ]),
2282 ],
2283 ))],
2284 },
2285 parsed
2286 );
2287 }
2288
2289 #[gpui::test]
2290 async fn test_html_table_with_caption() {
2291 let parsed = parse(
2292 "<table>
2293 <caption>My Table</caption>
2294 <tbody>
2295 <tr>
2296 <td>1</td>
2297 <td>Chris</td>
2298 </tr>
2299 <tr>
2300 <td>2</td>
2301 <td>Dennis</td>
2302 </tr>
2303 </tbody>
2304 </table>",
2305 )
2306 .await;
2307
2308 assert_eq!(
2309 ParsedMarkdown {
2310 children: vec![ParsedMarkdownElement::Table(table(
2311 0..280,
2312 Some(vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2313 source_range: 0..280,
2314 contents: "My Table".into(),
2315 highlights: Default::default(),
2316 regions: Default::default()
2317 })]),
2318 vec![],
2319 vec![
2320 row(vec![
2321 column(
2322 1,
2323 1,
2324 false,
2325 text("1", 0..280),
2326 ParsedMarkdownTableAlignment::None
2327 ),
2328 column(
2329 1,
2330 1,
2331 false,
2332 text("Chris", 0..280),
2333 ParsedMarkdownTableAlignment::None
2334 )
2335 ]),
2336 row(vec![
2337 column(
2338 1,
2339 1,
2340 false,
2341 text("2", 0..280),
2342 ParsedMarkdownTableAlignment::None
2343 ),
2344 column(
2345 1,
2346 1,
2347 false,
2348 text("Dennis", 0..280),
2349 ParsedMarkdownTableAlignment::None
2350 )
2351 ]),
2352 ],
2353 ))],
2354 },
2355 parsed
2356 );
2357 }
2358
2359 #[gpui::test]
2360 async fn test_html_table_without_headings() {
2361 let parsed = parse(
2362 "<table>
2363 <tbody>
2364 <tr>
2365 <td>1</td>
2366 <td>Chris</td>
2367 </tr>
2368 <tr>
2369 <td>2</td>
2370 <td>Dennis</td>
2371 </tr>
2372 </tbody>
2373 </table>",
2374 )
2375 .await;
2376
2377 assert_eq!(
2378 ParsedMarkdown {
2379 children: vec![ParsedMarkdownElement::Table(table(
2380 0..240,
2381 None,
2382 vec![],
2383 vec![
2384 row(vec![
2385 column(
2386 1,
2387 1,
2388 false,
2389 text("1", 0..240),
2390 ParsedMarkdownTableAlignment::None
2391 ),
2392 column(
2393 1,
2394 1,
2395 false,
2396 text("Chris", 0..240),
2397 ParsedMarkdownTableAlignment::None
2398 )
2399 ]),
2400 row(vec![
2401 column(
2402 1,
2403 1,
2404 false,
2405 text("2", 0..240),
2406 ParsedMarkdownTableAlignment::None
2407 ),
2408 column(
2409 1,
2410 1,
2411 false,
2412 text("Dennis", 0..240),
2413 ParsedMarkdownTableAlignment::None
2414 )
2415 ]),
2416 ],
2417 ))],
2418 },
2419 parsed
2420 );
2421 }
2422
2423 #[gpui::test]
2424 async fn test_html_table_without_body() {
2425 let parsed = parse(
2426 "<table>
2427 <thead>
2428 <tr>
2429 <th>Id</th>
2430 <th>Name</th>
2431 </tr>
2432 </thead>
2433 </table>",
2434 )
2435 .await;
2436
2437 assert_eq!(
2438 ParsedMarkdown {
2439 children: vec![ParsedMarkdownElement::Table(table(
2440 0..150,
2441 None,
2442 vec![row(vec![
2443 column(
2444 1,
2445 1,
2446 true,
2447 text("Id", 0..150),
2448 ParsedMarkdownTableAlignment::Center
2449 ),
2450 column(
2451 1,
2452 1,
2453 true,
2454 text("Name", 0..150),
2455 ParsedMarkdownTableAlignment::Center
2456 )
2457 ])],
2458 vec![],
2459 ))],
2460 },
2461 parsed
2462 );
2463 }
2464
2465 #[gpui::test]
2466 async fn test_html_heading_tags() {
2467 let parsed = parse("<h1>Heading</h1><h2>Heading</h2><h3>Heading</h3><h4>Heading</h4><h5>Heading</h5><h6>Heading</h6>").await;
2468
2469 assert_eq!(
2470 ParsedMarkdown {
2471 children: vec![
2472 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2473 level: HeadingLevel::H1,
2474 source_range: 0..96,
2475 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2476 source_range: 0..96,
2477 contents: "Heading".into(),
2478 highlights: Vec::default(),
2479 regions: Vec::default()
2480 })],
2481 }),
2482 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2483 level: HeadingLevel::H2,
2484 source_range: 0..96,
2485 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2486 source_range: 0..96,
2487 contents: "Heading".into(),
2488 highlights: Vec::default(),
2489 regions: Vec::default()
2490 })],
2491 }),
2492 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2493 level: HeadingLevel::H3,
2494 source_range: 0..96,
2495 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2496 source_range: 0..96,
2497 contents: "Heading".into(),
2498 highlights: Vec::default(),
2499 regions: Vec::default()
2500 })],
2501 }),
2502 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2503 level: HeadingLevel::H4,
2504 source_range: 0..96,
2505 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2506 source_range: 0..96,
2507 contents: "Heading".into(),
2508 highlights: Vec::default(),
2509 regions: Vec::default()
2510 })],
2511 }),
2512 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2513 level: HeadingLevel::H5,
2514 source_range: 0..96,
2515 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2516 source_range: 0..96,
2517 contents: "Heading".into(),
2518 highlights: Vec::default(),
2519 regions: Vec::default()
2520 })],
2521 }),
2522 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
2523 level: HeadingLevel::H6,
2524 source_range: 0..96,
2525 contents: vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
2526 source_range: 0..96,
2527 contents: "Heading".into(),
2528 highlights: Vec::default(),
2529 regions: Vec::default()
2530 })],
2531 }),
2532 ],
2533 },
2534 parsed
2535 );
2536 }
2537
2538 #[gpui::test]
2539 async fn test_html_image_tag() {
2540 let parsed = parse("<img src=\"http://example.com/foo.png\" />").await;
2541
2542 assert_eq!(
2543 ParsedMarkdown {
2544 children: vec![ParsedMarkdownElement::Image(Image {
2545 source_range: 0..40,
2546 link: Link::Web {
2547 url: "http://example.com/foo.png".to_string(),
2548 },
2549 alt_text: None,
2550 height: None,
2551 width: None,
2552 })]
2553 },
2554 parsed
2555 );
2556 }
2557
2558 #[gpui::test]
2559 async fn test_html_image_tag_with_alt_text() {
2560 let parsed = parse("<img src=\"http://example.com/foo.png\" alt=\"Foo\" />").await;
2561
2562 assert_eq!(
2563 ParsedMarkdown {
2564 children: vec![ParsedMarkdownElement::Image(Image {
2565 source_range: 0..50,
2566 link: Link::Web {
2567 url: "http://example.com/foo.png".to_string(),
2568 },
2569 alt_text: Some("Foo".into()),
2570 height: None,
2571 width: None,
2572 })]
2573 },
2574 parsed
2575 );
2576 }
2577
2578 #[gpui::test]
2579 async fn test_html_image_tag_with_height_and_width() {
2580 let parsed =
2581 parse("<img src=\"http://example.com/foo.png\" height=\"100\" width=\"200\" />").await;
2582
2583 assert_eq!(
2584 ParsedMarkdown {
2585 children: vec![ParsedMarkdownElement::Image(Image {
2586 source_range: 0..65,
2587 link: Link::Web {
2588 url: "http://example.com/foo.png".to_string(),
2589 },
2590 alt_text: None,
2591 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
2592 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
2593 })]
2594 },
2595 parsed
2596 );
2597 }
2598
2599 #[gpui::test]
2600 async fn test_html_image_style_tag_with_height_and_width() {
2601 let parsed = parse(
2602 "<img src=\"http://example.com/foo.png\" style=\"height:100px; width:200px;\" />",
2603 )
2604 .await;
2605
2606 assert_eq!(
2607 ParsedMarkdown {
2608 children: vec![ParsedMarkdownElement::Image(Image {
2609 source_range: 0..75,
2610 link: Link::Web {
2611 url: "http://example.com/foo.png".to_string(),
2612 },
2613 alt_text: None,
2614 height: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(100.)))),
2615 width: Some(DefiniteLength::Absolute(AbsoluteLength::Pixels(px(200.)))),
2616 })]
2617 },
2618 parsed
2619 );
2620 }
2621
2622 #[gpui::test]
2623 async fn test_header_only_table() {
2624 let markdown = "\
2625| Header 1 | Header 2 |
2626|----------|----------|
2627
2628Some other content
2629";
2630
2631 let expected_table = table(
2632 0..48,
2633 None,
2634 vec![row(vec![
2635 column(
2636 1,
2637 1,
2638 true,
2639 text("Header 1", 1..11),
2640 ParsedMarkdownTableAlignment::None,
2641 ),
2642 column(
2643 1,
2644 1,
2645 true,
2646 text("Header 2", 12..22),
2647 ParsedMarkdownTableAlignment::None,
2648 ),
2649 ])],
2650 vec![],
2651 );
2652
2653 assert_eq!(
2654 parse(markdown).await.children[0],
2655 ParsedMarkdownElement::Table(expected_table)
2656 );
2657 }
2658
2659 #[gpui::test]
2660 async fn test_basic_table() {
2661 let markdown = "\
2662| Header 1 | Header 2 |
2663|----------|----------|
2664| Cell 1 | Cell 2 |
2665| Cell 3 | Cell 4 |";
2666
2667 let expected_table = table(
2668 0..95,
2669 None,
2670 vec![row(vec![
2671 column(
2672 1,
2673 1,
2674 true,
2675 text("Header 1", 1..11),
2676 ParsedMarkdownTableAlignment::None,
2677 ),
2678 column(
2679 1,
2680 1,
2681 true,
2682 text("Header 2", 12..22),
2683 ParsedMarkdownTableAlignment::None,
2684 ),
2685 ])],
2686 vec![
2687 row(vec![
2688 column(
2689 1,
2690 1,
2691 false,
2692 text("Cell 1", 49..59),
2693 ParsedMarkdownTableAlignment::None,
2694 ),
2695 column(
2696 1,
2697 1,
2698 false,
2699 text("Cell 2", 60..70),
2700 ParsedMarkdownTableAlignment::None,
2701 ),
2702 ]),
2703 row(vec![
2704 column(
2705 1,
2706 1,
2707 false,
2708 text("Cell 3", 73..83),
2709 ParsedMarkdownTableAlignment::None,
2710 ),
2711 column(
2712 1,
2713 1,
2714 false,
2715 text("Cell 4", 84..94),
2716 ParsedMarkdownTableAlignment::None,
2717 ),
2718 ]),
2719 ],
2720 );
2721
2722 assert_eq!(
2723 parse(markdown).await.children[0],
2724 ParsedMarkdownElement::Table(expected_table)
2725 );
2726 }
2727
2728 #[gpui::test]
2729 async fn test_list_basic() {
2730 let parsed = parse(
2731 "\
2732* Item 1
2733* Item 2
2734* Item 3
2735",
2736 )
2737 .await;
2738
2739 assert_eq!(
2740 parsed.children,
2741 vec![
2742 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
2743 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
2744 list_item(18..26, 1, Unordered, vec![p("Item 3", 20..26)]),
2745 ],
2746 );
2747 }
2748
2749 #[gpui::test]
2750 async fn test_list_with_tasks() {
2751 let parsed = parse(
2752 "\
2753- [ ] TODO
2754- [x] Checked
2755",
2756 )
2757 .await;
2758
2759 assert_eq!(
2760 parsed.children,
2761 vec![
2762 list_item(0..10, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
2763 list_item(11..24, 1, Task(true, 13..16), vec![p("Checked", 17..24)]),
2764 ],
2765 );
2766 }
2767
2768 #[gpui::test]
2769 async fn test_list_with_indented_task() {
2770 let parsed = parse(
2771 "\
2772- [ ] TODO
2773 - [x] Checked
2774 - Unordered
2775 1. Number 1
2776 1. Number 2
27771. Number A
2778",
2779 )
2780 .await;
2781
2782 assert_eq!(
2783 parsed.children,
2784 vec![
2785 list_item(0..12, 1, Task(false, 2..5), vec![p("TODO", 6..10)]),
2786 list_item(13..26, 2, Task(true, 15..18), vec![p("Checked", 19..26)]),
2787 list_item(29..40, 2, Unordered, vec![p("Unordered", 31..40)]),
2788 list_item(43..54, 2, Ordered(1), vec![p("Number 1", 46..54)]),
2789 list_item(57..68, 2, Ordered(2), vec![p("Number 2", 60..68)]),
2790 list_item(69..80, 1, Ordered(1), vec![p("Number A", 72..80)]),
2791 ],
2792 );
2793 }
2794
2795 #[gpui::test]
2796 async fn test_list_with_linebreak_is_handled_correctly() {
2797 let parsed = parse(
2798 "\
2799- [ ] Task 1
2800
2801- [x] Task 2
2802",
2803 )
2804 .await;
2805
2806 assert_eq!(
2807 parsed.children,
2808 vec![
2809 list_item(0..13, 1, Task(false, 2..5), vec![p("Task 1", 6..12)]),
2810 list_item(14..26, 1, Task(true, 16..19), vec![p("Task 2", 20..26)]),
2811 ],
2812 );
2813 }
2814
2815 #[gpui::test]
2816 async fn test_list_nested() {
2817 let parsed = parse(
2818 "\
2819* Item 1
2820* Item 2
2821* Item 3
2822
28231. Hello
28241. Two
2825 1. Three
28262. Four
28273. Five
2828
2829* First
2830 1. Hello
2831 1. Goodbyte
2832 - Inner
2833 - Inner
2834 2. Goodbyte
2835 - Next item empty
2836 -
2837* Last
2838",
2839 )
2840 .await;
2841
2842 assert_eq!(
2843 parsed.children,
2844 vec![
2845 list_item(0..8, 1, Unordered, vec![p("Item 1", 2..8)]),
2846 list_item(9..17, 1, Unordered, vec![p("Item 2", 11..17)]),
2847 list_item(18..27, 1, Unordered, vec![p("Item 3", 20..26)]),
2848 list_item(28..36, 1, Ordered(1), vec![p("Hello", 31..36)]),
2849 list_item(37..46, 1, Ordered(2), vec![p("Two", 40..43),]),
2850 list_item(47..55, 2, Ordered(1), vec![p("Three", 50..55)]),
2851 list_item(56..63, 1, Ordered(3), vec![p("Four", 59..63)]),
2852 list_item(64..72, 1, Ordered(4), vec![p("Five", 67..71)]),
2853 list_item(73..82, 1, Unordered, vec![p("First", 75..80)]),
2854 list_item(83..96, 2, Ordered(1), vec![p("Hello", 86..91)]),
2855 list_item(97..116, 3, Ordered(1), vec![p("Goodbyte", 100..108)]),
2856 list_item(117..124, 4, Unordered, vec![p("Inner", 119..124)]),
2857 list_item(133..140, 4, Unordered, vec![p("Inner", 135..140)]),
2858 list_item(143..159, 2, Ordered(2), vec![p("Goodbyte", 146..154)]),
2859 list_item(160..180, 3, Unordered, vec![p("Next item empty", 165..180)]),
2860 list_item(186..190, 3, Unordered, vec![]),
2861 list_item(191..197, 1, Unordered, vec![p("Last", 193..197)]),
2862 ]
2863 );
2864 }
2865
2866 #[gpui::test]
2867 async fn test_list_with_nested_content() {
2868 let parsed = parse(
2869 "\
2870* This is a list item with two paragraphs.
2871
2872 This is the second paragraph in the list item.
2873",
2874 )
2875 .await;
2876
2877 assert_eq!(
2878 parsed.children,
2879 vec![list_item(
2880 0..96,
2881 1,
2882 Unordered,
2883 vec![
2884 p("This is a list item with two paragraphs.", 4..44),
2885 p("This is the second paragraph in the list item.", 50..97)
2886 ],
2887 ),],
2888 );
2889 }
2890
2891 #[gpui::test]
2892 async fn test_list_item_with_inline_html() {
2893 let parsed = parse(
2894 "\
2895* This is a list item with an inline HTML <sometag>tag</sometag>.
2896",
2897 )
2898 .await;
2899
2900 assert_eq!(
2901 parsed.children,
2902 vec![list_item(
2903 0..67,
2904 1,
2905 Unordered,
2906 vec![p("This is a list item with an inline HTML tag.", 4..44),],
2907 ),],
2908 );
2909 }
2910
2911 #[gpui::test]
2912 async fn test_nested_list_with_paragraph_inside() {
2913 let parsed = parse(
2914 "\
29151. a
2916 1. b
2917 1. c
2918
2919 text
2920
2921 1. d
2922",
2923 )
2924 .await;
2925
2926 assert_eq!(
2927 parsed.children,
2928 vec![
2929 list_item(0..7, 1, Ordered(1), vec![p("a", 3..4)],),
2930 list_item(8..20, 2, Ordered(1), vec![p("b", 12..13),],),
2931 list_item(21..27, 3, Ordered(1), vec![p("c", 25..26),],),
2932 p("text", 32..37),
2933 list_item(41..46, 2, Ordered(1), vec![p("d", 45..46),],),
2934 ],
2935 );
2936 }
2937
2938 #[gpui::test]
2939 async fn test_list_with_leading_text() {
2940 let parsed = parse(
2941 "\
2942* `code`
2943* **bold**
2944* [link](https://example.com)
2945",
2946 )
2947 .await;
2948
2949 assert_eq!(
2950 parsed.children,
2951 vec![
2952 list_item(0..8, 1, Unordered, vec![p("code", 2..8)]),
2953 list_item(9..19, 1, Unordered, vec![p("bold", 11..19)]),
2954 list_item(20..49, 1, Unordered, vec![p("link", 22..49)],),
2955 ],
2956 );
2957 }
2958
2959 #[gpui::test]
2960 async fn test_simple_block_quote() {
2961 let parsed = parse("> Simple block quote with **styled text**").await;
2962
2963 assert_eq!(
2964 parsed.children,
2965 vec![block_quote(
2966 vec![p("Simple block quote with styled text", 2..41)],
2967 0..41
2968 )]
2969 );
2970 }
2971
2972 #[gpui::test]
2973 async fn test_simple_block_quote_with_multiple_lines() {
2974 let parsed = parse(
2975 "\
2976> # Heading
2977> More
2978> text
2979>
2980> More text
2981",
2982 )
2983 .await;
2984
2985 assert_eq!(
2986 parsed.children,
2987 vec![block_quote(
2988 vec![
2989 h1(text("Heading", 4..11), 2..12),
2990 p("More text", 14..26),
2991 p("More text", 30..40)
2992 ],
2993 0..40
2994 )]
2995 );
2996 }
2997
2998 #[gpui::test]
2999 async fn test_nested_block_quote() {
3000 let parsed = parse(
3001 "\
3002> A
3003>
3004> > # B
3005>
3006> C
3007
3008More text
3009",
3010 )
3011 .await;
3012
3013 assert_eq!(
3014 parsed.children,
3015 vec![
3016 block_quote(
3017 vec![
3018 p("A", 2..4),
3019 block_quote(vec![h1(text("B", 12..13), 10..14)], 8..14),
3020 p("C", 18..20)
3021 ],
3022 0..20
3023 ),
3024 p("More text", 21..31)
3025 ]
3026 );
3027 }
3028
3029 #[gpui::test]
3030 async fn test_code_block() {
3031 let parsed = parse(
3032 "\
3033```
3034fn main() {
3035 return 0;
3036}
3037```
3038",
3039 )
3040 .await;
3041
3042 assert_eq!(
3043 parsed.children,
3044 vec![code_block(
3045 None,
3046 "fn main() {\n return 0;\n}",
3047 0..35,
3048 None
3049 )]
3050 );
3051 }
3052
3053 #[gpui::test]
3054 async fn test_code_block_with_language(executor: BackgroundExecutor) {
3055 let language_registry = Arc::new(LanguageRegistry::test(executor.clone()));
3056 language_registry.add(rust_lang());
3057
3058 let parsed = parse_markdown(
3059 "\
3060```rust
3061fn main() {
3062 return 0;
3063}
3064```
3065",
3066 None,
3067 Some(language_registry),
3068 )
3069 .await;
3070
3071 assert_eq!(
3072 parsed.children,
3073 vec![code_block(
3074 Some("rust".to_string()),
3075 "fn main() {\n return 0;\n}",
3076 0..39,
3077 Some(vec![])
3078 )]
3079 );
3080 }
3081
3082 fn rust_lang() -> Arc<Language> {
3083 Arc::new(Language::new(
3084 LanguageConfig {
3085 name: "Rust".into(),
3086 matcher: LanguageMatcher {
3087 path_suffixes: vec!["rs".into()],
3088 ..Default::default()
3089 },
3090 collapsed_placeholder: " /* ... */ ".to_string(),
3091 ..Default::default()
3092 },
3093 Some(tree_sitter_rust::LANGUAGE.into()),
3094 ))
3095 }
3096
3097 fn h1(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
3098 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
3099 source_range,
3100 level: HeadingLevel::H1,
3101 contents,
3102 })
3103 }
3104
3105 fn h2(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
3106 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
3107 source_range,
3108 level: HeadingLevel::H2,
3109 contents,
3110 })
3111 }
3112
3113 fn h3(contents: MarkdownParagraph, source_range: Range<usize>) -> ParsedMarkdownElement {
3114 ParsedMarkdownElement::Heading(ParsedMarkdownHeading {
3115 source_range,
3116 level: HeadingLevel::H3,
3117 contents,
3118 })
3119 }
3120
3121 fn p(contents: &str, source_range: Range<usize>) -> ParsedMarkdownElement {
3122 ParsedMarkdownElement::Paragraph(text(contents, source_range))
3123 }
3124
3125 fn text(contents: &str, source_range: Range<usize>) -> MarkdownParagraph {
3126 vec![MarkdownParagraphChunk::Text(ParsedMarkdownText {
3127 highlights: Vec::new(),
3128 regions: Vec::new(),
3129 source_range,
3130 contents: contents.to_string().into(),
3131 })]
3132 }
3133
3134 fn block_quote(
3135 children: Vec<ParsedMarkdownElement>,
3136 source_range: Range<usize>,
3137 ) -> ParsedMarkdownElement {
3138 ParsedMarkdownElement::BlockQuote(ParsedMarkdownBlockQuote {
3139 source_range,
3140 children,
3141 })
3142 }
3143
3144 fn code_block(
3145 language: Option<String>,
3146 code: &str,
3147 source_range: Range<usize>,
3148 highlights: Option<Vec<(Range<usize>, HighlightId)>>,
3149 ) -> ParsedMarkdownElement {
3150 ParsedMarkdownElement::CodeBlock(ParsedMarkdownCodeBlock {
3151 source_range,
3152 language,
3153 contents: code.to_string().into(),
3154 highlights,
3155 })
3156 }
3157
3158 fn list_item(
3159 source_range: Range<usize>,
3160 depth: u16,
3161 item_type: ParsedMarkdownListItemType,
3162 content: Vec<ParsedMarkdownElement>,
3163 ) -> ParsedMarkdownElement {
3164 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
3165 source_range,
3166 item_type,
3167 depth,
3168 content,
3169 nested: false,
3170 })
3171 }
3172
3173 fn nested_list_item(
3174 source_range: Range<usize>,
3175 depth: u16,
3176 item_type: ParsedMarkdownListItemType,
3177 content: Vec<ParsedMarkdownElement>,
3178 ) -> ParsedMarkdownElement {
3179 ParsedMarkdownElement::ListItem(ParsedMarkdownListItem {
3180 source_range,
3181 item_type,
3182 depth,
3183 content,
3184 nested: true,
3185 })
3186 }
3187
3188 fn table(
3189 source_range: Range<usize>,
3190 caption: Option<MarkdownParagraph>,
3191 header: Vec<ParsedMarkdownTableRow>,
3192 body: Vec<ParsedMarkdownTableRow>,
3193 ) -> ParsedMarkdownTable {
3194 ParsedMarkdownTable {
3195 source_range,
3196 header,
3197 body,
3198 caption,
3199 }
3200 }
3201
3202 fn row(columns: Vec<ParsedMarkdownTableColumn>) -> ParsedMarkdownTableRow {
3203 ParsedMarkdownTableRow { columns }
3204 }
3205
3206 fn column(
3207 col_span: usize,
3208 row_span: usize,
3209 is_header: bool,
3210 children: MarkdownParagraph,
3211 alignment: ParsedMarkdownTableAlignment,
3212 ) -> ParsedMarkdownTableColumn {
3213 ParsedMarkdownTableColumn {
3214 col_span,
3215 row_span,
3216 is_header,
3217 children,
3218 alignment,
3219 }
3220 }
3221
3222 impl PartialEq for ParsedMarkdownTable {
3223 fn eq(&self, other: &Self) -> bool {
3224 self.source_range == other.source_range
3225 && self.header == other.header
3226 && self.body == other.body
3227 }
3228 }
3229
3230 impl PartialEq for ParsedMarkdownText {
3231 fn eq(&self, other: &Self) -> bool {
3232 self.source_range == other.source_range && self.contents == other.contents
3233 }
3234 }
3235}